improvement in fallback
git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@238 1aa58f4a-7d42-0410-adbc-911cccaed67cpull/1/head
parent
4554705881
commit
b871331659
|
@ -460,8 +460,8 @@ class PDFDocument(object):
|
||||||
(_,objid1) = self._parser.nexttoken() # objid
|
(_,objid1) = self._parser.nexttoken() # objid
|
||||||
(_,genno) = self._parser.nexttoken() # genno
|
(_,genno) = self._parser.nexttoken() # genno
|
||||||
(_,kwd) = self._parser.nexttoken()
|
(_,kwd) = self._parser.nexttoken()
|
||||||
# #### hack around malformed pdf files
|
# #### hack around malformed pdf files
|
||||||
# assert objid1 == objid, (objid, objid1)
|
#assert objid1 == objid, (objid, objid1)
|
||||||
if objid1 != objid:
|
if objid1 != objid:
|
||||||
x = []
|
x = []
|
||||||
while kwd is not self.KEYWORD_OBJ:
|
while kwd is not self.KEYWORD_OBJ:
|
||||||
|
@ -470,12 +470,15 @@ class PDFDocument(object):
|
||||||
if x:
|
if x:
|
||||||
objid1 = x[-2]
|
objid1 = x[-2]
|
||||||
genno = x[-1]
|
genno = x[-1]
|
||||||
# #### end hack around malformed pdf files
|
# #### end hack around malformed pdf files
|
||||||
if kwd is not self.KEYWORD_OBJ:
|
if kwd is not self.KEYWORD_OBJ:
|
||||||
raise PDFSyntaxError('Invalid object spec: offset=%r' % index)
|
raise PDFSyntaxError('Invalid object spec: offset=%r' % index)
|
||||||
(_,obj) = self._parser.nextobject()
|
try:
|
||||||
if isinstance(obj, PDFStream):
|
(_,obj) = self._parser.nextobject()
|
||||||
obj.set_objid(objid, genno)
|
if isinstance(obj, PDFStream):
|
||||||
|
obj.set_objid(objid, genno)
|
||||||
|
except PSEOF:
|
||||||
|
return None
|
||||||
if 2 <= self.debug:
|
if 2 <= self.debug:
|
||||||
print >>stderr, 'register: objid=%r: %r' % (objid, obj)
|
print >>stderr, 'register: objid=%r: %r' % (objid, obj)
|
||||||
self.objs[objid] = obj
|
self.objs[objid] = obj
|
||||||
|
@ -578,6 +581,7 @@ class PDFParser(PSStackParser):
|
||||||
def __init__(self, fp):
|
def __init__(self, fp):
|
||||||
PSStackParser.__init__(self, fp)
|
PSStackParser.__init__(self, fp)
|
||||||
self.doc = None
|
self.doc = None
|
||||||
|
self.fallback = False
|
||||||
return
|
return
|
||||||
|
|
||||||
def set_document(self, doc):
|
def set_document(self, doc):
|
||||||
|
@ -618,12 +622,13 @@ class PDFParser(PSStackParser):
|
||||||
# stream object
|
# stream object
|
||||||
((_,dic),) = self.pop(1)
|
((_,dic),) = self.pop(1)
|
||||||
dic = dict_value(dic)
|
dic = dict_value(dic)
|
||||||
try:
|
objlen = 0
|
||||||
objlen = int_value(dic['Length'])
|
if not self.fallback:
|
||||||
except KeyError:
|
try:
|
||||||
if STRICT:
|
objlen = int_value(dic['Length'])
|
||||||
raise PDFSyntaxError('/Length is undefined: %r' % dic)
|
except KeyError:
|
||||||
objlen = 0
|
if STRICT:
|
||||||
|
raise PDFSyntaxError('/Length is undefined: %r' % dic)
|
||||||
self.seek(pos)
|
self.seek(pos)
|
||||||
try:
|
try:
|
||||||
(_, line) = self.nextline() # 'stream'
|
(_, line) = self.nextline() # 'stream'
|
||||||
|
@ -650,6 +655,7 @@ class PDFParser(PSStackParser):
|
||||||
objlen += len(line)
|
objlen += len(line)
|
||||||
data += line
|
data += line
|
||||||
self.seek(pos+objlen)
|
self.seek(pos+objlen)
|
||||||
|
# XXX limit objlen not to exceed object boundary
|
||||||
if 2 <= self.debug:
|
if 2 <= self.debug:
|
||||||
print >>stderr, 'Stream: pos=%d, objlen=%d, dic=%r, data=%r...' % \
|
print >>stderr, 'Stream: pos=%d, objlen=%d, dic=%r, data=%r...' % \
|
||||||
(pos, objlen, dic, data[:10])
|
(pos, objlen, dic, data[:10])
|
||||||
|
@ -725,6 +731,7 @@ class PDFParser(PSStackParser):
|
||||||
# fallback
|
# fallback
|
||||||
if 1 <= self.debug:
|
if 1 <= self.debug:
|
||||||
print >>stderr, 'no xref, fallback'
|
print >>stderr, 'no xref, fallback'
|
||||||
|
self.fallback = True
|
||||||
xref = PDFXRef()
|
xref = PDFXRef()
|
||||||
xref.load_fallback(self)
|
xref.load_fallback(self)
|
||||||
xrefs.append(xref)
|
xrefs.append(xref)
|
||||||
|
|
Loading…
Reference in New Issue