diff --git a/dumppdf.py b/dumppdf.py
index 6e644f5..c86cf3a 100755
--- a/dumppdf.py
+++ b/dumppdf.py
@@ -143,8 +143,8 @@ def main(argv):
outfp = stdout
for (k, v) in opts:
if k == '-d': debug += 1
- elif k == '-i': objids.append(int(v))
- elif k == '-p': pageids.add(int(v))
+ elif k == '-i': objids.extend( int(x) for x in v.split(',') )
+ elif k == '-p': pageids.update( int(x) for x in v.split(',') )
elif k == '-P': password = v
elif k == '-a': dumpall = True
elif k == '-r': codec = 'raw'
diff --git a/pdf2txt.py b/pdf2txt.py
index d4832bb..129108a 100755
--- a/pdf2txt.py
+++ b/pdf2txt.py
@@ -178,7 +178,8 @@ class TextConverter(PDFDevice):
(wmode, x*scale, (offset-y)*scale, item.fontsize*scale))
outfp.write(enc(item.text, codec))
outfp.write('\n')
- outfp.write('
\n')
+ outfp.write('\n' % codec)
+ outfp.write('\n')
for page in self.pages:
(x0,y0,x1,y1) = page.bbox
offset += y1
diff --git a/pdfinterp.py b/pdfinterp.py
index 15bc330..01fde03 100644
--- a/pdfinterp.py
+++ b/pdfinterp.py
@@ -593,23 +593,22 @@ class PDFPageInterpreter:
def __init__(self):
self.font = None
self.fontsize = 0
- self.reset()
- return
- def __repr__(self):
- return ('' %
- (self.font, self.fontsize, self.matrix,
- self.charspace, self.wordspace, self.scaling, self.leading,
- self.render, self.rise))
- def reset(self):
self.charspace = 0
self.wordspace = 0
self.scaling = 100
self.leading = 0
self.render = 0
self.rise = 0
- #
+ self.reset()
+ return
+ def __repr__(self):
+ return ('' %
+ (self.font, self.fontsize, self.charspace, self.wordspace,
+ self.scaling, self.leading, self.render, self.rise,
+ self.matrix, self.linematrix))
+ def reset(self):
self.matrix = MATRIX_IDENTITY
self.linematrix = (0, 0)
return
@@ -881,15 +880,17 @@ class PDFPageInterpreter:
# text-move
def do_Td(self, tx, ty):
(a,b,c,d,e,f) = self.textstate.matrix
- self.textstate.matrix = (a,b,c,d,e+tx,f+ty)
+ self.textstate.matrix = (a,b,c,d,tx*a+ty*c+e,tx*b+ty*d+f)
self.textstate.linematrix = (0, 0)
+ #print >>stderr, 'Td(%r,%r): %r' % (tx,ty,self.textstate)
return
# text-move
def do_TD(self, tx, ty):
(a,b,c,d,e,f) = self.textstate.matrix
- self.textstate.matrix = (a,b,c,d,e+tx,f+ty)
- self.textstate.leading = -ty
+ self.textstate.matrix = (a,b,c,d,tx*a+ty*c+e,tx*b+ty*d+f)
+ self.textstate.leading = ty
self.textstate.linematrix = (0, 0)
+ #print >>stderr, 'TD(%r,%r): %r' % (tx,ty,self.textstate)
return
# textmatrix
def do_Tm(self, a,b,c,d,e,f):
@@ -899,12 +900,13 @@ class PDFPageInterpreter:
# nextline
def do_T_a(self):
(a,b,c,d,e,f) = self.textstate.matrix
- self.textstate.matrix = (a,b,c,d,e,f+self.textstate.leading)
+ self.textstate.matrix = (a,b,c,d,self.textstate.leading*c+e,self.textstate.leading*d+f)
self.textstate.linematrix = (0, 0)
return
# show-pos
def do_TJ(self, seq):
+ #print >>stderr, 'TJ(%r): %r' % (seq,self.textstate)
textstate = self.textstate
font = textstate.font
(a,b,c,d,e,f) = textstate.matrix