diff --git a/pdf2txt.py b/pdf2txt.py
index efe9949..d4832bb 100755
--- a/pdf2txt.py
+++ b/pdf2txt.py
@@ -9,13 +9,18 @@ from pdfinterp import PDFDevice, PDFResourceManager, \
from cmap import CMapDB
+def enc(x, codec):
+ x = x.replace('&','&').replace('>','>').replace('<','<')
+ return x.encode(codec, 'xmlcharrefreplace')
+
+
## PageItem
##
-class PageItem:
+class PageItem(object):
def __init__(self, id, (x0,y0,x1,y1), rotate=0):
self.id = id
- self.bbox = (x0, y0, x1-x0, y1-y0)
+ self.bbox = (x0, y0, x1, y1)
self.rotate = rotate
self.objs = []
return
@@ -26,15 +31,6 @@ class PageItem:
def add(self, obj):
self.objs.append(obj)
return
-
- def dump(self, outfp, codec):
- bbox = '%.3f,%.3f,%.3f,%.3f' % self.bbox
- outfp.write('\n' %
- (self.id, bbox, self.rotate))
- for obj in self.objs:
- obj.dump(outfp, codec)
- outfp.write('\n')
- return
## FigureItem
@@ -44,18 +40,10 @@ class FigureItem(PageItem):
def __repr__(self):
return ('