From 557c2c72e6c7d6cc61497c991717915fc6d899a5 Mon Sep 17 00:00:00 2001 From: Yusuke Shinyama Date: Thu, 10 Oct 2013 18:34:43 +0900 Subject: [PATCH] Removed ObjIdRange for terseness. --- pdfminer/pdfdocument.py | 35 ++++++++++++++++------------------- pdfminer/utils.py | 24 ------------------------ 2 files changed, 16 insertions(+), 43 deletions(-) diff --git a/pdfminer/pdfdocument.py b/pdfminer/pdfdocument.py index c476ad1..cb26802 100644 --- a/pdfminer/pdfdocument.py +++ b/pdfminer/pdfdocument.py @@ -18,7 +18,7 @@ from pdfparser import PDFSyntaxError from pdfparser import PDFStreamParser from arcfour import Arcfour from utils import choplist, nunpack -from utils import decode_text, ObjIdRange +from utils import decode_text ## Exceptions @@ -154,7 +154,7 @@ class PDFXRefStream(PDFBaseXRef): self.data = None self.entlen = None self.fl1 = self.fl2 = self.fl3 = None - self.objid_ranges = [] + self.ranges = [] return def __repr__(self): @@ -171,15 +171,14 @@ class PDFXRefStream(PDFBaseXRef): index_array = stream.get('Index', (0,size)) if len(index_array) % 2 != 0: raise PDFSyntaxError('Invalid index number') - self.objid_ranges.extend( ObjIdRange(start, nobjs) - for (start,nobjs) in choplist(2, index_array) ) + self.ranges.extend(choplist(2, index_array)) (self.fl1, self.fl2, self.fl3) = stream['W'] self.data = stream.get_data() self.entlen = self.fl1+self.fl2+self.fl3 self.trailer = stream.attrs if 1 <= debug: print >>sys.stderr, ('xref stream: objid=%s, fields=%d,%d,%d' % - (', '.join(map(repr, self.objid_ranges)), + (', '.join(map(repr, self.ranges)), self.fl1, self.fl2, self.fl3)) return @@ -187,24 +186,22 @@ class PDFXRefStream(PDFBaseXRef): return self.trailer def get_objids(self): - for objid_range in self.objid_ranges: - for x in xrange(objid_range.get_start_id(), objid_range.get_end_id()+1): - yield x + for (start,nobjs) in self.ranges: + for i in xrange(nobjs): + yield start+i return def get_pos(self, objid): - offset = 0 - found = False - for objid_range in self.objid_ranges: - if objid >= objid_range.get_start_id() and objid <= objid_range.get_end_id(): - offset += objid - objid_range.get_start_id() - found = True - break + index = 0 + for (start,nobjs) in self.ranges: + if start <= objid and objid < start+nobjs: + index += objid - start else: - offset += objid_range.get_nobjs() - if not found: raise KeyError(objid) - i = self.entlen * offset - ent = self.data[i:i+self.entlen] + index += nobjs + else: + raise KeyError(objid) + offset = self.entlen * index + ent = self.data[offset:offset+self.entlen] f1 = nunpack(ent[:self.fl1], 1) if f1 == 1: pos = nunpack(ent[self.fl1:self.fl1+self.fl2]) diff --git a/pdfminer/utils.py b/pdfminer/utils.py index 9b0fc29..7c28477 100644 --- a/pdfminer/utils.py +++ b/pdfminer/utils.py @@ -212,30 +212,6 @@ def matrix2str((a,b,c,d,e,f)): return '[%.2f,%.2f,%.2f,%.2f, (%.2f,%.2f)]' % (a,b,c,d,e,f) -## ObjIdRange -## -class ObjIdRange(object): - - "A utility class to represent a range of object IDs." - - def __init__(self, start, nobjs): - self.start = start - self.nobjs = nobjs - return - - def __repr__(self): - return '' % (self.get_start_id(), self.get_end_id()) - - def get_start_id(self): - return self.start - - def get_end_id(self): - return self.start + self.nobjs - 1 - - def get_nobjs(self): - return self.nobjs - - ## Plane ## ## A set-like data structure for objects placed on a plane.