Removed ObjIdRange for terseness.
parent
2221163b94
commit
557c2c72e6
|
@ -18,7 +18,7 @@ from pdfparser import PDFSyntaxError
|
||||||
from pdfparser import PDFStreamParser
|
from pdfparser import PDFStreamParser
|
||||||
from arcfour import Arcfour
|
from arcfour import Arcfour
|
||||||
from utils import choplist, nunpack
|
from utils import choplist, nunpack
|
||||||
from utils import decode_text, ObjIdRange
|
from utils import decode_text
|
||||||
|
|
||||||
|
|
||||||
## Exceptions
|
## Exceptions
|
||||||
|
@ -154,7 +154,7 @@ class PDFXRefStream(PDFBaseXRef):
|
||||||
self.data = None
|
self.data = None
|
||||||
self.entlen = None
|
self.entlen = None
|
||||||
self.fl1 = self.fl2 = self.fl3 = None
|
self.fl1 = self.fl2 = self.fl3 = None
|
||||||
self.objid_ranges = []
|
self.ranges = []
|
||||||
return
|
return
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
|
@ -171,15 +171,14 @@ class PDFXRefStream(PDFBaseXRef):
|
||||||
index_array = stream.get('Index', (0,size))
|
index_array = stream.get('Index', (0,size))
|
||||||
if len(index_array) % 2 != 0:
|
if len(index_array) % 2 != 0:
|
||||||
raise PDFSyntaxError('Invalid index number')
|
raise PDFSyntaxError('Invalid index number')
|
||||||
self.objid_ranges.extend( ObjIdRange(start, nobjs)
|
self.ranges.extend(choplist(2, index_array))
|
||||||
for (start,nobjs) in choplist(2, index_array) )
|
|
||||||
(self.fl1, self.fl2, self.fl3) = stream['W']
|
(self.fl1, self.fl2, self.fl3) = stream['W']
|
||||||
self.data = stream.get_data()
|
self.data = stream.get_data()
|
||||||
self.entlen = self.fl1+self.fl2+self.fl3
|
self.entlen = self.fl1+self.fl2+self.fl3
|
||||||
self.trailer = stream.attrs
|
self.trailer = stream.attrs
|
||||||
if 1 <= debug:
|
if 1 <= debug:
|
||||||
print >>sys.stderr, ('xref stream: objid=%s, fields=%d,%d,%d' %
|
print >>sys.stderr, ('xref stream: objid=%s, fields=%d,%d,%d' %
|
||||||
(', '.join(map(repr, self.objid_ranges)),
|
(', '.join(map(repr, self.ranges)),
|
||||||
self.fl1, self.fl2, self.fl3))
|
self.fl1, self.fl2, self.fl3))
|
||||||
return
|
return
|
||||||
|
|
||||||
|
@ -187,24 +186,22 @@ class PDFXRefStream(PDFBaseXRef):
|
||||||
return self.trailer
|
return self.trailer
|
||||||
|
|
||||||
def get_objids(self):
|
def get_objids(self):
|
||||||
for objid_range in self.objid_ranges:
|
for (start,nobjs) in self.ranges:
|
||||||
for x in xrange(objid_range.get_start_id(), objid_range.get_end_id()+1):
|
for i in xrange(nobjs):
|
||||||
yield x
|
yield start+i
|
||||||
return
|
return
|
||||||
|
|
||||||
def get_pos(self, objid):
|
def get_pos(self, objid):
|
||||||
offset = 0
|
index = 0
|
||||||
found = False
|
for (start,nobjs) in self.ranges:
|
||||||
for objid_range in self.objid_ranges:
|
if start <= objid and objid < start+nobjs:
|
||||||
if objid >= objid_range.get_start_id() and objid <= objid_range.get_end_id():
|
index += objid - start
|
||||||
offset += objid - objid_range.get_start_id()
|
|
||||||
found = True
|
|
||||||
break
|
|
||||||
else:
|
else:
|
||||||
offset += objid_range.get_nobjs()
|
index += nobjs
|
||||||
if not found: raise KeyError(objid)
|
else:
|
||||||
i = self.entlen * offset
|
raise KeyError(objid)
|
||||||
ent = self.data[i:i+self.entlen]
|
offset = self.entlen * index
|
||||||
|
ent = self.data[offset:offset+self.entlen]
|
||||||
f1 = nunpack(ent[:self.fl1], 1)
|
f1 = nunpack(ent[:self.fl1], 1)
|
||||||
if f1 == 1:
|
if f1 == 1:
|
||||||
pos = nunpack(ent[self.fl1:self.fl1+self.fl2])
|
pos = nunpack(ent[self.fl1:self.fl1+self.fl2])
|
||||||
|
|
|
@ -212,30 +212,6 @@ def matrix2str((a,b,c,d,e,f)):
|
||||||
return '[%.2f,%.2f,%.2f,%.2f, (%.2f,%.2f)]' % (a,b,c,d,e,f)
|
return '[%.2f,%.2f,%.2f,%.2f, (%.2f,%.2f)]' % (a,b,c,d,e,f)
|
||||||
|
|
||||||
|
|
||||||
## ObjIdRange
|
|
||||||
##
|
|
||||||
class ObjIdRange(object):
|
|
||||||
|
|
||||||
"A utility class to represent a range of object IDs."
|
|
||||||
|
|
||||||
def __init__(self, start, nobjs):
|
|
||||||
self.start = start
|
|
||||||
self.nobjs = nobjs
|
|
||||||
return
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return '<ObjIdRange: %d-%d>' % (self.get_start_id(), self.get_end_id())
|
|
||||||
|
|
||||||
def get_start_id(self):
|
|
||||||
return self.start
|
|
||||||
|
|
||||||
def get_end_id(self):
|
|
||||||
return self.start + self.nobjs - 1
|
|
||||||
|
|
||||||
def get_nobjs(self):
|
|
||||||
return self.nobjs
|
|
||||||
|
|
||||||
|
|
||||||
## Plane
|
## Plane
|
||||||
##
|
##
|
||||||
## A set-like data structure for objects placed on a plane.
|
## A set-like data structure for objects placed on a plane.
|
||||||
|
|
Loading…
Reference in New Issue