add non-strict mode.
git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@16 1aa58f4a-7d42-0410-adbc-911cccaed67cpull/1/head
parent
80d17eb79b
commit
94859ea428
103
pdfinterp.py
103
pdfinterp.py
|
@ -7,7 +7,7 @@ try:
|
|||
except ImportError:
|
||||
from StringIO import StringIO
|
||||
from psparser import PSException, PSSyntaxError, PSTypeError, \
|
||||
PSStackParser, PSLiteral, PSKeyword, \
|
||||
PSStackParser, PSLiteral, PSKeyword, STRICT, \
|
||||
PSLiteralTable, PSKeywordTable, literal_name, keyword_name
|
||||
from pdfparser import PDFException, PDFStream, PDFObjRef, resolve1, \
|
||||
int_value, float_value, num_value, \
|
||||
|
@ -84,14 +84,14 @@ class PDFFont:
|
|||
def __init__(self, descriptor, widths, default_width=None):
|
||||
self.descriptor = descriptor
|
||||
self.widths = widths
|
||||
self.fontname = descriptor['FontName']
|
||||
self.fontname = descriptor.get('FontName', 'unknown')
|
||||
if isinstance(self.fontname, PSLiteral):
|
||||
self.fontname = literal_name(self.fontname)
|
||||
self.ascent = descriptor['Ascent']
|
||||
self.descent = descriptor['Descent']
|
||||
self.ascent = num_value(descriptor.get('Ascent', 0))
|
||||
self.descent = num_value(descriptor.get('Descent', 0))
|
||||
self.default_width = default_width or descriptor.get('MissingWidth', 0)
|
||||
self.leading = descriptor.get('Leading', 0)
|
||||
self.bbox = list_value(descriptor['FontBBox'])
|
||||
self.leading = num_value(descriptor.get('Leading', 0))
|
||||
self.bbox = list_value(descriptor.get('FontBBox', (0,0,0,0)))
|
||||
return
|
||||
|
||||
def __repr__(self):
|
||||
|
@ -155,20 +155,20 @@ class PDFSimpleFont(PDFFont):
|
|||
class PDFType1Font(PDFSimpleFont):
|
||||
|
||||
def __init__(self, spec):
|
||||
if 'BaseFont' not in spec:
|
||||
raise PDFFontError('BaseFont is missing')
|
||||
self.basefont = literal_name(spec['BaseFont'])
|
||||
try:
|
||||
self.basefont = literal_name(spec['BaseFont'])
|
||||
except KeyError:
|
||||
if STRICT:
|
||||
raise PDFFontError('BaseFont is missing')
|
||||
self.basefont = 'unknown'
|
||||
try:
|
||||
(descriptor, widths) = FontMetricsDB.get_metrics(self.basefont)
|
||||
except KeyError:
|
||||
try:
|
||||
descriptor = dict_value(spec['FontDescriptor'])
|
||||
firstchar = int_value(spec['FirstChar'])
|
||||
lastchar = int_value(spec['LastChar'])
|
||||
widths = dict( (i+firstchar,w) for (i,w)
|
||||
in enumerate(list_value(spec['Widths'])) )
|
||||
except KeyError, k:
|
||||
raise PDFFontError('%s is missing' % k)
|
||||
descriptor = dict_value(spec.get('FontDescriptor', {}))
|
||||
firstchar = int_value(spec.get('FirstChar', 0))
|
||||
lastchar = int_value(spec.get('LastChar', 255))
|
||||
widths = list_value(spec.get('Widths', [0]*256))
|
||||
widths = dict( (i+firstchar,w) for (i,w) in enumerate(widths) )
|
||||
PDFSimpleFont.__init__(self, descriptor, widths, spec)
|
||||
return
|
||||
|
||||
|
@ -179,13 +179,10 @@ class PDFTrueTypeFont(PDFType1Font):
|
|||
# PDFType3Font
|
||||
class PDFType3Font(PDFSimpleFont):
|
||||
def __init__(self, spec):
|
||||
try:
|
||||
firstchar = int_value(spec['FirstChar'])
|
||||
lastchar = int_value(spec['LastChar'])
|
||||
widths = dict( (i+firstchar,w) for (i,w)
|
||||
in enumerate(list_value(spec['Widths'])) )
|
||||
except KeyError, k:
|
||||
raise PDFFontError('%s is missing' % k)
|
||||
firstchar = int_value(spec.get('FirstChar', 0))
|
||||
lastchar = int_value(spec.get('LastChar', 0))
|
||||
widths = list_value(spec.get('Widths', [0]*256))
|
||||
widths = dict( (i+firstchar,w) for (i,w) in enumerate(widths))
|
||||
if 'FontDescriptor' in spec:
|
||||
descriptor = dict_value(spec['FontDescriptor'])
|
||||
else:
|
||||
|
@ -215,7 +212,8 @@ class TrueTypeFont:
|
|||
return
|
||||
|
||||
def create_cmap(self):
|
||||
if 'cmap' not in self.tables: raise TrueTypeFont.CMapNotFound
|
||||
if 'cmap' not in self.tables:
|
||||
raise TrueTypeFont.CMapNotFound
|
||||
(base_offset, length) = self.tables['cmap']
|
||||
fp = self.fp
|
||||
fp.seek(base_offset)
|
||||
|
@ -274,15 +272,15 @@ class TrueTypeFont:
|
|||
class PDFCIDFont(PDFFont):
|
||||
|
||||
def __init__(self, spec):
|
||||
if 'BaseFont' not in spec:
|
||||
raise PDFFontError('BaseFont is missing')
|
||||
try:
|
||||
self.cidsysteminfo = dict_value(spec['CIDSystemInfo'])
|
||||
self.cidcoding = '%s-%s' % (self.cidsysteminfo['Registry'],
|
||||
self.cidsysteminfo['Ordering'])
|
||||
self.basefont = literal_name(spec['BaseFont'])
|
||||
except KeyError:
|
||||
raise PDFFontError('CIDSystemInfo not properly defined.')
|
||||
self.basefont = literal_name(spec['BaseFont'])
|
||||
if STRICT:
|
||||
raise PDFFontError('BaseFont is missing')
|
||||
self.basefont = 'unknown'
|
||||
self.cidsysteminfo = dict_value(spec.get('CIDSystemInfo', {}))
|
||||
self.cidcoding = '%s-%s' % (self.cidsysteminfo.get('Registry', 'unknown'),
|
||||
self.cidsysteminfo.get('Ordering', 'unknown'))
|
||||
self.cmap = CMapDB.get_cmap(literal_name(spec['Encoding']))
|
||||
descriptor = dict_value(spec['FontDescriptor'])
|
||||
ttf = None
|
||||
|
@ -391,11 +389,16 @@ class PDFResourceManager:
|
|||
if objid and objid in self.fonts:
|
||||
font = self.fonts[objid]
|
||||
else:
|
||||
assert spec['Type'] == LITERAL_FONT
|
||||
if STRICT:
|
||||
if spec['Type'] != LITERAL_FONT:
|
||||
raise PDFFontError('Type is not /Font')
|
||||
# Create a Font object.
|
||||
if 'Subtype' not in spec:
|
||||
raise PDFFontError('Font Subtype is not specified.')
|
||||
subtype = literal_name(spec['Subtype'])
|
||||
if 'Subtype' in spec:
|
||||
subtype = literal_name(spec['Subtype'])
|
||||
else:
|
||||
if STRICT:
|
||||
raise PDFFontError('Font Subtype is not specified.')
|
||||
subtype = 'Type1'
|
||||
if subtype in ('Type1', 'MMType1'):
|
||||
# Type1 Font
|
||||
font = PDFType1Font(spec)
|
||||
|
@ -411,14 +414,16 @@ class PDFResourceManager:
|
|||
elif subtype == 'Type0':
|
||||
# Type0 Font
|
||||
dfonts = list_value(spec['DescendantFonts'])
|
||||
assert len(dfonts) == 1
|
||||
assert dfonts
|
||||
subspec = dict_value(dfonts[0]).copy()
|
||||
for k in ('Encoding', 'ToUnicode'):
|
||||
if k in spec:
|
||||
subspec[k] = resolve1(spec[k])
|
||||
font = self.get_font(None, subspec)
|
||||
else:
|
||||
raise PDFFontError('Invalid Font: %r' % spec)
|
||||
if STRICT:
|
||||
raise PDFFontError('Invalid Font: %r' % spec)
|
||||
font = PDFType1Font(spec) # this is so wrong!
|
||||
if objid:
|
||||
self.fonts[objid] = font
|
||||
return font
|
||||
|
@ -480,14 +485,17 @@ class PDFContentParser(PSStackParser):
|
|||
objs = self.partobj
|
||||
(type0, self.partobj) = self.context.pop()
|
||||
if len(objs) % 2 != 0:
|
||||
raise PSTypeError('invalid dictionary construct: %r' % objs)
|
||||
if STRICT:
|
||||
raise PSTypeError('invalid dictionary construct: %r' % objs)
|
||||
dic = dict( (literal_name(k), v) for (k,v) in choplist(2, objs) )
|
||||
pos += len('ID ')
|
||||
self.fp.seek(pos)
|
||||
data = self.fp.read(8192)
|
||||
# XXX how do we know the real length other than scanning?
|
||||
m = self.EOIPAT.search(data)
|
||||
assert m
|
||||
data = ''
|
||||
while 1:
|
||||
data += self.fp.read(4096)
|
||||
m = self.EOIPAT.search(data)
|
||||
if m: break
|
||||
objlen = m.start(0)
|
||||
obj = PDFStream(dic, data[:objlen])
|
||||
self.push(obj)
|
||||
|
@ -731,7 +739,9 @@ class PDFPageInterpreter:
|
|||
try:
|
||||
self.textstate.font = self.fontmap[literal_name(fontid)]
|
||||
except KeyError:
|
||||
raise PDFInterpreterError('Undefined font id: %r' % fontid)
|
||||
if STRICT:
|
||||
raise PDFInterpreterError('Undefined font id: %r' % fontid)
|
||||
return
|
||||
self.textstate.fontsize = fontsize
|
||||
return
|
||||
# setrendering
|
||||
|
@ -816,7 +826,9 @@ class PDFPageInterpreter:
|
|||
try:
|
||||
xobj = stream_value(self.xobjmap[xobjid])
|
||||
except KeyError:
|
||||
raise PDFInterpreterError('Undefined xobject id: %r' % xobjid)
|
||||
if STRICT:
|
||||
raise PDFInterpreterError('Undefined xobject id: %r' % xobjid)
|
||||
return
|
||||
if xobj.dic['Subtype'] == LITERAL_FORM:
|
||||
if 1 <= self.debug:
|
||||
print >>stderr, 'Processing xobj: %r' % xobj
|
||||
|
@ -897,7 +909,8 @@ class PDFPageInterpreter:
|
|||
print >>stderr, 'exec: %s' % (obj.name)
|
||||
func()
|
||||
else:
|
||||
raise PDFInterpreterError('unknown operator: %r' % obj.name)
|
||||
if STRICT:
|
||||
raise PDFInterpreterError('unknown operator: %r' % obj.name)
|
||||
else:
|
||||
self.push(obj)
|
||||
return
|
||||
|
|
120
pdfparser.py
120
pdfparser.py
|
@ -24,7 +24,7 @@ from utils import choplist, nunpack
|
|||
from psparser import PSException, PSSyntaxError, PSTypeError, \
|
||||
PSLiteral, PSKeyword, PSLiteralTable, PSKeywordTable, \
|
||||
literal_name, keyword_name, \
|
||||
PSStackParser
|
||||
PSStackParser, STRICT
|
||||
|
||||
|
||||
## PDF Exceptions
|
||||
|
@ -52,7 +52,8 @@ class PDFObjRef:
|
|||
|
||||
def __init__(self, doc, objid, genno):
|
||||
if objid == 0:
|
||||
raise PDFValueError('objid cannot be 0.')
|
||||
if STRICT:
|
||||
raise PDFValueError('objid cannot be 0.')
|
||||
self.doc = doc
|
||||
self.objid = objid
|
||||
#self.genno = genno # Never used.
|
||||
|
@ -94,43 +95,57 @@ def resolveall(x):
|
|||
def int_value(x):
|
||||
x = resolve1(x)
|
||||
if not isinstance(x, int):
|
||||
raise PDFTypeError('integer required: %r' % x)
|
||||
if STRICT:
|
||||
raise PDFTypeError('integer required: %r' % x)
|
||||
return 0
|
||||
return x
|
||||
|
||||
def float_value(x):
|
||||
x = resolve1(x)
|
||||
if not isinstance(x, float):
|
||||
raise PDFTypeError('float required: %r' % x)
|
||||
if STRICT:
|
||||
raise PDFTypeError('float required: %r' % x)
|
||||
return 0.0
|
||||
return x
|
||||
|
||||
def num_value(x):
|
||||
x = resolve1(x)
|
||||
if not (isinstance(x, int) or isinstance(x, float)):
|
||||
raise PDFTypeError('int or float required: %r' % x)
|
||||
if STRICT:
|
||||
raise PDFTypeError('int or float required: %r' % x)
|
||||
return 0
|
||||
return x
|
||||
|
||||
def str_value(x):
|
||||
x = resolve1(x)
|
||||
if not isinstance(x, str):
|
||||
raise PDFTypeError('string required: %r' % x)
|
||||
if STRICT:
|
||||
raise PDFTypeError('string required: %r' % x)
|
||||
return ''
|
||||
return x
|
||||
|
||||
def list_value(x):
|
||||
x = resolve1(x)
|
||||
if not (isinstance(x, list) or isinstance(x, tuple)):
|
||||
raise PDFTypeError('list required: %r' % x)
|
||||
if STRICT:
|
||||
raise PDFTypeError('list required: %r' % x)
|
||||
return []
|
||||
return x
|
||||
|
||||
def dict_value(x):
|
||||
x = resolve1(x)
|
||||
if not isinstance(x, dict):
|
||||
raise PDFTypeError('dict required: %r' % x)
|
||||
if STRICT:
|
||||
raise PDFTypeError('dict required: %r' % x)
|
||||
return {}
|
||||
return x
|
||||
|
||||
def stream_value(x):
|
||||
x = resolve1(x)
|
||||
if not isinstance(x, PDFStream):
|
||||
raise PDFTypeError('stream required: %r' % x)
|
||||
if STRICT:
|
||||
raise PDFTypeError('stream required: %r' % x)
|
||||
return PDFStream({}, '')
|
||||
return x
|
||||
|
||||
|
||||
|
@ -186,7 +201,8 @@ class PDFStream:
|
|||
ent0 = ent1
|
||||
data = buf
|
||||
else:
|
||||
raise PDFValueError('Invalid filter spec: %r' % f)
|
||||
if STRICT:
|
||||
raise PDFValueError('Invalid filter spec: %r' % f)
|
||||
self.data = data
|
||||
self.rawdata = None
|
||||
return
|
||||
|
@ -235,12 +251,15 @@ class PDFXRef:
|
|||
while 1:
|
||||
(_, line) = parser.nextline()
|
||||
if not line:
|
||||
raise PDFSyntaxError('premature eof: %r' % parser)
|
||||
if STRICT:
|
||||
raise PDFSyntaxError('premature eof: %r' % parser)
|
||||
break
|
||||
line = line.strip()
|
||||
f = line.split(' ')
|
||||
if len(f) != 2:
|
||||
if line != 'trailer':
|
||||
raise PDFSyntaxError('trailer not found: %r: line=%r' % (parser, line))
|
||||
if STRICT:
|
||||
raise PDFSyntaxError('trailer not found: %r: line=%r' % (parser, line))
|
||||
break
|
||||
(start, nobjs) = map(long, f)
|
||||
self.objid0 = start
|
||||
|
@ -250,7 +269,9 @@ class PDFXRef:
|
|||
(_, line) = parser.nextline()
|
||||
f = line.strip().split(' ')
|
||||
if len(f) != 3:
|
||||
raise PDFSyntaxError('invalid xref format: %r, line=%r' % (parser, line))
|
||||
if STRICT:
|
||||
raise PDFSyntaxError('invalid xref format: %r, line=%r' % (parser, line))
|
||||
continue
|
||||
(pos, genno, use) = f
|
||||
self.offsets.append((int(genno), long(pos), use))
|
||||
# read trailer
|
||||
|
@ -259,10 +280,11 @@ class PDFXRef:
|
|||
|
||||
def getpos(self, objid):
|
||||
if objid < self.objid0 or self.objid1 <= objid:
|
||||
raise IndexError
|
||||
raise IndexError(objid)
|
||||
(genno, pos, use) = self.offsets[objid-self.objid0]
|
||||
if use != 'n':
|
||||
raise PDFValueError('unused objid=%r' % objid)
|
||||
if STRICT:
|
||||
raise PDFValueError('unused objid=%r' % objid)
|
||||
return (None, pos)
|
||||
|
||||
|
||||
|
@ -272,7 +294,8 @@ class PDFXRefStream:
|
|||
|
||||
def __init__(self, parser):
|
||||
(objid, genno, _, stream) = list_value(parser.parse())
|
||||
assert stream.dic['Type'] == LITERAL_XREF
|
||||
if STRICT:
|
||||
assert stream.dic['Type'] == LITERAL_XREF
|
||||
size = stream.dic['Size']
|
||||
(start, nobjs) = stream.dic.get('Index', (0,size))
|
||||
self.objid0 = start
|
||||
|
@ -285,7 +308,7 @@ class PDFXRefStream:
|
|||
|
||||
def getpos(self, objid):
|
||||
if objid < self.objid0 or self.objid1 <= objid:
|
||||
raise IndexError
|
||||
raise IndexError(objid)
|
||||
i = self.entlen * (objid-self.objid0)
|
||||
ent = self.data[i:i+self.entlen]
|
||||
f1 = nunpack(ent[:self.fl1], 1)
|
||||
|
@ -334,7 +357,7 @@ class PDFDocument:
|
|||
return
|
||||
|
||||
def getobj(self, objid):
|
||||
assert self.xrefs
|
||||
#assert self.xrefs
|
||||
if objid in self.objs:
|
||||
obj = self.objs[objid]
|
||||
else:
|
||||
|
@ -345,13 +368,20 @@ class PDFDocument:
|
|||
except IndexError:
|
||||
pass
|
||||
else:
|
||||
raise PDFValueError('Cannot locate objid=%r' % objid)
|
||||
if STRICT:
|
||||
raise PDFValueError('Cannot locate objid=%r' % objid)
|
||||
return None
|
||||
if strmid:
|
||||
stream = stream_value(self.getobj(strmid))
|
||||
if stream.dic['Type'] != LITERAL_OBJSTM:
|
||||
raise PDFSyntaxError('Not a stream object: %r' % stream)
|
||||
if 'N' not in stream.dic:
|
||||
raise PDFSyntaxError('N is not defined: %r' % stream)
|
||||
if STRICT:
|
||||
raise PDFSyntaxError('Not a stream object: %r' % stream)
|
||||
try:
|
||||
n = stream.dic['N']
|
||||
except KeyError:
|
||||
if STRICT:
|
||||
raise PDFSyntaxError('N is not defined: %r' % stream)
|
||||
n = 0
|
||||
if strmid in self.parsed_objs:
|
||||
objs = self.parsed_objs[stream]
|
||||
else:
|
||||
|
@ -363,8 +393,10 @@ class PDFDocument:
|
|||
else:
|
||||
prevpos = self.parser.seek(index)
|
||||
seq = list_value(self.parser.parse())
|
||||
if not (len(seq) == 4 and seq[0] == objid and seq[2] == KEYWORD_OBJ):
|
||||
raise PDFSyntaxError('invalid stream spec: %r' % seq)
|
||||
if not (4 <= len(seq) and seq[0] == objid and seq[2] == KEYWORD_OBJ):
|
||||
if STRICT:
|
||||
raise PDFSyntaxError('invalid stream spec: %r' % seq)
|
||||
return None
|
||||
obj = seq[3]
|
||||
self.parser.seek(prevpos)
|
||||
if 2 <= self.debug:
|
||||
|
@ -373,7 +405,7 @@ class PDFDocument:
|
|||
return obj
|
||||
|
||||
def get_pages(self, debug=0):
|
||||
assert self.xrefs
|
||||
#assert self.xrefs
|
||||
def search(obj, parent):
|
||||
tree = dict_value(obj).copy()
|
||||
for (k,v) in parent.iteritems():
|
||||
|
@ -397,7 +429,8 @@ class PDFDocument:
|
|||
self.root = root
|
||||
self.catalog = dict_value(self.root)
|
||||
if self.catalog['Type'] != LITERAL_CATALOG:
|
||||
raise PDFValueError('Catalog not found!')
|
||||
if STRICT:
|
||||
raise PDFValueError('Catalog not found!')
|
||||
self.outline = self.catalog.get('Outline')
|
||||
return
|
||||
|
||||
|
@ -437,24 +470,24 @@ class PDFParser(PSStackParser):
|
|||
# stream object
|
||||
(dic,) = self.pop(1)
|
||||
dic = dict_value(dic)
|
||||
if 'Length' not in dic:
|
||||
raise PDFValueError('/Length is undefined: %r' % dic)
|
||||
objlen = int_value(dic['Length'])
|
||||
try:
|
||||
objlen = int_value(dic['Length'])
|
||||
except KeyError:
|
||||
if STRICT:
|
||||
raise PDFValueError('/Length is undefined: %r' % dic)
|
||||
objlen = 0
|
||||
self.seek(pos)
|
||||
(_, line) = self.nextline() # 'stream'
|
||||
self.fp.seek(pos+len(line))
|
||||
pos += len(line)
|
||||
self.fp.seek(pos)
|
||||
data = self.fp.read(objlen)
|
||||
self.seek(pos+len(line)+objlen)
|
||||
self.seek(pos+objlen)
|
||||
while 1:
|
||||
(linepos, line) = self.nextline()
|
||||
if not line:
|
||||
raise PDFSyntaxError('premature eof, need endstream: linepos=%d, line=%r' %
|
||||
(linepos, line))
|
||||
if line.strip():
|
||||
if not line.startswith('endstream'):
|
||||
raise PDFSyntaxError('need endstream: linepos=%d, line=%r' %
|
||||
(linepos, line))
|
||||
if not line or line.startswith('endstream'):
|
||||
break
|
||||
objlen += len(line)
|
||||
data += line
|
||||
if 1 <= self.debug:
|
||||
print >>stderr, 'Stream: pos=%d, objlen=%d, dic=%r, data=%r...' % \
|
||||
(pos, objlen, dic, data[:10])
|
||||
|
@ -477,7 +510,9 @@ class PDFParser(PSStackParser):
|
|||
if line:
|
||||
prev = line
|
||||
else:
|
||||
raise PDFSyntaxError('startxref not found!')
|
||||
if STRICT:
|
||||
raise PDFSyntaxError('startxref not found!')
|
||||
prev = 0
|
||||
if 1 <= self.debug:
|
||||
print >>stderr, 'xref found: pos=%r' % prev
|
||||
self.seek(long(prev))
|
||||
|
@ -495,10 +530,11 @@ class PDFParser(PSStackParser):
|
|||
# XRefStream: PDF-1.5
|
||||
self.seek(linepos)
|
||||
xref = PDFXRefStream(self)
|
||||
elif line.strip() != 'xref':
|
||||
raise PDFSyntaxError('xref not found: linepos=%d, line=%r' %
|
||||
(linepos, line))
|
||||
else:
|
||||
if line.strip() != 'xref':
|
||||
if STRICT:
|
||||
raise PDFSyntaxError('xref not found: linepos=%d, line=%r' %
|
||||
(linepos, line))
|
||||
xref = PDFXRef(self)
|
||||
yield xref
|
||||
trailer = xref.trailer
|
||||
|
|
70
psparser.py
70
psparser.py
|
@ -3,6 +3,8 @@ import sys, re
|
|||
stderr = sys.stderr
|
||||
from utils import choplist
|
||||
|
||||
STRICT = 0
|
||||
|
||||
|
||||
## PS Exceptions
|
||||
##
|
||||
|
@ -73,12 +75,18 @@ PSKeywordTable = PSSymbolTable(PSKeyword)
|
|||
|
||||
def literal_name(x):
|
||||
if not isinstance(x, PSLiteral):
|
||||
raise PSTypeError('literal required: %r' % x)
|
||||
if STRICT:
|
||||
raise PSTypeError('literal required: %r' % x)
|
||||
else:
|
||||
return str(x)
|
||||
return x.name
|
||||
|
||||
def keyword_name(x):
|
||||
if not isinstance(x, PSKeyword):
|
||||
raise PSTypeError('keyword required: %r' % x)
|
||||
if STRICT:
|
||||
raise PSTypeError('keyword required: %r' % x)
|
||||
else:
|
||||
return str(x)
|
||||
return x.name
|
||||
|
||||
|
||||
|
@ -237,23 +245,30 @@ class PSBaseParser:
|
|||
s += s1[-1:]
|
||||
(linepos, line) = self.nextline()
|
||||
if not line:
|
||||
raise PSSyntaxError('end inside string: linepos=%d, line=%r' %
|
||||
(linepos, line))
|
||||
if STRICT:
|
||||
raise PSSyntaxError('end inside string: linepos=%d, line=%r' %
|
||||
(linepos, line))
|
||||
break
|
||||
charpos = 0
|
||||
elif charpos == len(line):
|
||||
s += s1
|
||||
(linepos, line) = self.nextline()
|
||||
if not line:
|
||||
raise PSSyntaxError('end inside string: linepos=%d, line=%r' %
|
||||
(linepos, line))
|
||||
if STRICT:
|
||||
raise PSSyntaxError('end inside string: linepos=%d, line=%r' %
|
||||
(linepos, line))
|
||||
break
|
||||
charpos = 0
|
||||
else:
|
||||
s += s1
|
||||
break
|
||||
if line[charpos] != ')':
|
||||
raise PSSyntaxError('no close paren: linepos=%d, line=%r' %
|
||||
(linepos, line))
|
||||
charpos += 1
|
||||
if line[charpos] == ')':
|
||||
charpos += 1
|
||||
else:
|
||||
if STRICT:
|
||||
raise PSSyntaxError('no close paren: linepos=%d, line=%r' %
|
||||
(linepos, line))
|
||||
pass
|
||||
def convesc(m):
|
||||
x = m.group(0)
|
||||
if x[1:].isdigit():
|
||||
|
@ -271,10 +286,12 @@ class PSBaseParser:
|
|||
# hex string object
|
||||
ms = self.STRING_HEX.match(line, charpos)
|
||||
charpos = ms.end(0)
|
||||
if line[charpos] != '>':
|
||||
raise PSSyntaxError('no close paren: linepos=%d, line=%r' %
|
||||
(linepos, line))
|
||||
charpos += 1
|
||||
if line[charpos] == '>':
|
||||
charpos += 1
|
||||
else:
|
||||
if STRICT:
|
||||
raise PSSyntaxError('no close paren: linepos=%d, line=%r' %
|
||||
(linepos, line))
|
||||
def convhex(m1):
|
||||
return chr(int(m1.group(0), 16))
|
||||
s = self.STRING_HEX_SUB.sub(convhex, ms.group(0))
|
||||
|
@ -341,7 +358,8 @@ class PSStackParser(PSBaseParser):
|
|||
Pop N objects from the stack.
|
||||
'''
|
||||
if len(self.partobj) < n:
|
||||
raise PSSyntaxError('stack too short < %d' % n)
|
||||
if STRICT:
|
||||
raise PSSyntaxError('stack too short < %d' % n)
|
||||
r = self.partobj[-n:]
|
||||
self.partobj = self.partobj[:-n]
|
||||
return r
|
||||
|
@ -366,12 +384,18 @@ class PSStackParser(PSBaseParser):
|
|||
return
|
||||
|
||||
def endobj(type1):
|
||||
assert self.context
|
||||
if not self.context:
|
||||
if STRICT:
|
||||
raise PSTypeError('stack empty.')
|
||||
obj = self.partobj
|
||||
(type0, self.partobj) = self.context.pop()
|
||||
if type0 != type1:
|
||||
raise PSTypeError('type mismatch: %r(%r) != %r(%r)' %
|
||||
(type0, self.partobj, type1, obj))
|
||||
(type0, partobj) = self.context[-1]
|
||||
if type0 == type1:
|
||||
self.partobj = partobj
|
||||
self.context.pop()
|
||||
else:
|
||||
if STRICT:
|
||||
raise PSTypeError('type mismatch: %r(%r) != %r(%r)' %
|
||||
(type0, self.partobj, type1, obj))
|
||||
return obj
|
||||
|
||||
startobj('o')
|
||||
|
@ -407,7 +431,8 @@ class PSStackParser(PSBaseParser):
|
|||
# end dictionary
|
||||
objs = endobj('d')
|
||||
if len(objs) % 2 != 0:
|
||||
raise PSTypeError('invalid dictionary construct: %r' % objs)
|
||||
if STRICT:
|
||||
raise PSTypeError('invalid dictionary construct: %r' % objs)
|
||||
d = dict( (literal_name(k), v) for (k,v) in choplist(2, objs) )
|
||||
if 2 <= self.debug:
|
||||
print >>stderr, 'end dict: %r' % d
|
||||
|
@ -415,4 +440,5 @@ class PSStackParser(PSBaseParser):
|
|||
elif self.do_token(pos, t):
|
||||
break
|
||||
|
||||
return endobj('o')
|
||||
objs = endobj('o')
|
||||
return objs
|
||||
|
|
Loading…
Reference in New Issue