Removed ObjIdRange for terseness.

pull/1/head
Yusuke Shinyama 2013-10-10 18:34:43 +09:00
parent 2221163b94
commit 557c2c72e6
2 changed files with 16 additions and 43 deletions

View File

@ -18,7 +18,7 @@ from pdfparser import PDFSyntaxError
from pdfparser import PDFStreamParser from pdfparser import PDFStreamParser
from arcfour import Arcfour from arcfour import Arcfour
from utils import choplist, nunpack from utils import choplist, nunpack
from utils import decode_text, ObjIdRange from utils import decode_text
## Exceptions ## Exceptions
@ -154,7 +154,7 @@ class PDFXRefStream(PDFBaseXRef):
self.data = None self.data = None
self.entlen = None self.entlen = None
self.fl1 = self.fl2 = self.fl3 = None self.fl1 = self.fl2 = self.fl3 = None
self.objid_ranges = [] self.ranges = []
return return
def __repr__(self): def __repr__(self):
@ -171,15 +171,14 @@ class PDFXRefStream(PDFBaseXRef):
index_array = stream.get('Index', (0,size)) index_array = stream.get('Index', (0,size))
if len(index_array) % 2 != 0: if len(index_array) % 2 != 0:
raise PDFSyntaxError('Invalid index number') raise PDFSyntaxError('Invalid index number')
self.objid_ranges.extend( ObjIdRange(start, nobjs) self.ranges.extend(choplist(2, index_array))
for (start,nobjs) in choplist(2, index_array) )
(self.fl1, self.fl2, self.fl3) = stream['W'] (self.fl1, self.fl2, self.fl3) = stream['W']
self.data = stream.get_data() self.data = stream.get_data()
self.entlen = self.fl1+self.fl2+self.fl3 self.entlen = self.fl1+self.fl2+self.fl3
self.trailer = stream.attrs self.trailer = stream.attrs
if 1 <= debug: if 1 <= debug:
print >>sys.stderr, ('xref stream: objid=%s, fields=%d,%d,%d' % print >>sys.stderr, ('xref stream: objid=%s, fields=%d,%d,%d' %
(', '.join(map(repr, self.objid_ranges)), (', '.join(map(repr, self.ranges)),
self.fl1, self.fl2, self.fl3)) self.fl1, self.fl2, self.fl3))
return return
@ -187,24 +186,22 @@ class PDFXRefStream(PDFBaseXRef):
return self.trailer return self.trailer
def get_objids(self): def get_objids(self):
for objid_range in self.objid_ranges: for (start,nobjs) in self.ranges:
for x in xrange(objid_range.get_start_id(), objid_range.get_end_id()+1): for i in xrange(nobjs):
yield x yield start+i
return return
def get_pos(self, objid): def get_pos(self, objid):
offset = 0 index = 0
found = False for (start,nobjs) in self.ranges:
for objid_range in self.objid_ranges: if start <= objid and objid < start+nobjs:
if objid >= objid_range.get_start_id() and objid <= objid_range.get_end_id(): index += objid - start
offset += objid - objid_range.get_start_id()
found = True
break
else: else:
offset += objid_range.get_nobjs() index += nobjs
if not found: raise KeyError(objid) else:
i = self.entlen * offset raise KeyError(objid)
ent = self.data[i:i+self.entlen] offset = self.entlen * index
ent = self.data[offset:offset+self.entlen]
f1 = nunpack(ent[:self.fl1], 1) f1 = nunpack(ent[:self.fl1], 1)
if f1 == 1: if f1 == 1:
pos = nunpack(ent[self.fl1:self.fl1+self.fl2]) pos = nunpack(ent[self.fl1:self.fl1+self.fl2])

View File

@ -212,30 +212,6 @@ def matrix2str((a,b,c,d,e,f)):
return '[%.2f,%.2f,%.2f,%.2f, (%.2f,%.2f)]' % (a,b,c,d,e,f) return '[%.2f,%.2f,%.2f,%.2f, (%.2f,%.2f)]' % (a,b,c,d,e,f)
## ObjIdRange
##
class ObjIdRange(object):
"A utility class to represent a range of object IDs."
def __init__(self, start, nobjs):
self.start = start
self.nobjs = nobjs
return
def __repr__(self):
return '<ObjIdRange: %d-%d>' % (self.get_start_id(), self.get_end_id())
def get_start_id(self):
return self.start
def get_end_id(self):
return self.start + self.nobjs - 1
def get_nobjs(self):
return self.nobjs
## Plane ## Plane
## ##
## A set-like data structure for objects placed on a plane. ## A set-like data structure for objects placed on a plane.