add non-strict mode.

git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@16 1aa58f4a-7d42-0410-adbc-911cccaed67c
pull/1/head
yusuke.shinyama.dummy 2008-01-20 04:44:16 +00:00
parent 80d17eb79b
commit 94859ea428
3 changed files with 184 additions and 109 deletions

View File

@ -7,7 +7,7 @@ try:
except ImportError: except ImportError:
from StringIO import StringIO from StringIO import StringIO
from psparser import PSException, PSSyntaxError, PSTypeError, \ from psparser import PSException, PSSyntaxError, PSTypeError, \
PSStackParser, PSLiteral, PSKeyword, \ PSStackParser, PSLiteral, PSKeyword, STRICT, \
PSLiteralTable, PSKeywordTable, literal_name, keyword_name PSLiteralTable, PSKeywordTable, literal_name, keyword_name
from pdfparser import PDFException, PDFStream, PDFObjRef, resolve1, \ from pdfparser import PDFException, PDFStream, PDFObjRef, resolve1, \
int_value, float_value, num_value, \ int_value, float_value, num_value, \
@ -84,14 +84,14 @@ class PDFFont:
def __init__(self, descriptor, widths, default_width=None): def __init__(self, descriptor, widths, default_width=None):
self.descriptor = descriptor self.descriptor = descriptor
self.widths = widths self.widths = widths
self.fontname = descriptor['FontName'] self.fontname = descriptor.get('FontName', 'unknown')
if isinstance(self.fontname, PSLiteral): if isinstance(self.fontname, PSLiteral):
self.fontname = literal_name(self.fontname) self.fontname = literal_name(self.fontname)
self.ascent = descriptor['Ascent'] self.ascent = num_value(descriptor.get('Ascent', 0))
self.descent = descriptor['Descent'] self.descent = num_value(descriptor.get('Descent', 0))
self.default_width = default_width or descriptor.get('MissingWidth', 0) self.default_width = default_width or descriptor.get('MissingWidth', 0)
self.leading = descriptor.get('Leading', 0) self.leading = num_value(descriptor.get('Leading', 0))
self.bbox = list_value(descriptor['FontBBox']) self.bbox = list_value(descriptor.get('FontBBox', (0,0,0,0)))
return return
def __repr__(self): def __repr__(self):
@ -155,20 +155,20 @@ class PDFSimpleFont(PDFFont):
class PDFType1Font(PDFSimpleFont): class PDFType1Font(PDFSimpleFont):
def __init__(self, spec): def __init__(self, spec):
if 'BaseFont' not in spec: try:
raise PDFFontError('BaseFont is missing')
self.basefont = literal_name(spec['BaseFont']) self.basefont = literal_name(spec['BaseFont'])
except KeyError:
if STRICT:
raise PDFFontError('BaseFont is missing')
self.basefont = 'unknown'
try: try:
(descriptor, widths) = FontMetricsDB.get_metrics(self.basefont) (descriptor, widths) = FontMetricsDB.get_metrics(self.basefont)
except KeyError: except KeyError:
try: descriptor = dict_value(spec.get('FontDescriptor', {}))
descriptor = dict_value(spec['FontDescriptor']) firstchar = int_value(spec.get('FirstChar', 0))
firstchar = int_value(spec['FirstChar']) lastchar = int_value(spec.get('LastChar', 255))
lastchar = int_value(spec['LastChar']) widths = list_value(spec.get('Widths', [0]*256))
widths = dict( (i+firstchar,w) for (i,w) widths = dict( (i+firstchar,w) for (i,w) in enumerate(widths) )
in enumerate(list_value(spec['Widths'])) )
except KeyError, k:
raise PDFFontError('%s is missing' % k)
PDFSimpleFont.__init__(self, descriptor, widths, spec) PDFSimpleFont.__init__(self, descriptor, widths, spec)
return return
@ -179,13 +179,10 @@ class PDFTrueTypeFont(PDFType1Font):
# PDFType3Font # PDFType3Font
class PDFType3Font(PDFSimpleFont): class PDFType3Font(PDFSimpleFont):
def __init__(self, spec): def __init__(self, spec):
try: firstchar = int_value(spec.get('FirstChar', 0))
firstchar = int_value(spec['FirstChar']) lastchar = int_value(spec.get('LastChar', 0))
lastchar = int_value(spec['LastChar']) widths = list_value(spec.get('Widths', [0]*256))
widths = dict( (i+firstchar,w) for (i,w) widths = dict( (i+firstchar,w) for (i,w) in enumerate(widths))
in enumerate(list_value(spec['Widths'])) )
except KeyError, k:
raise PDFFontError('%s is missing' % k)
if 'FontDescriptor' in spec: if 'FontDescriptor' in spec:
descriptor = dict_value(spec['FontDescriptor']) descriptor = dict_value(spec['FontDescriptor'])
else: else:
@ -215,7 +212,8 @@ class TrueTypeFont:
return return
def create_cmap(self): def create_cmap(self):
if 'cmap' not in self.tables: raise TrueTypeFont.CMapNotFound if 'cmap' not in self.tables:
raise TrueTypeFont.CMapNotFound
(base_offset, length) = self.tables['cmap'] (base_offset, length) = self.tables['cmap']
fp = self.fp fp = self.fp
fp.seek(base_offset) fp.seek(base_offset)
@ -274,15 +272,15 @@ class TrueTypeFont:
class PDFCIDFont(PDFFont): class PDFCIDFont(PDFFont):
def __init__(self, spec): def __init__(self, spec):
if 'BaseFont' not in spec:
raise PDFFontError('BaseFont is missing')
try: try:
self.cidsysteminfo = dict_value(spec['CIDSystemInfo'])
self.cidcoding = '%s-%s' % (self.cidsysteminfo['Registry'],
self.cidsysteminfo['Ordering'])
except KeyError:
raise PDFFontError('CIDSystemInfo not properly defined.')
self.basefont = literal_name(spec['BaseFont']) self.basefont = literal_name(spec['BaseFont'])
except KeyError:
if STRICT:
raise PDFFontError('BaseFont is missing')
self.basefont = 'unknown'
self.cidsysteminfo = dict_value(spec.get('CIDSystemInfo', {}))
self.cidcoding = '%s-%s' % (self.cidsysteminfo.get('Registry', 'unknown'),
self.cidsysteminfo.get('Ordering', 'unknown'))
self.cmap = CMapDB.get_cmap(literal_name(spec['Encoding'])) self.cmap = CMapDB.get_cmap(literal_name(spec['Encoding']))
descriptor = dict_value(spec['FontDescriptor']) descriptor = dict_value(spec['FontDescriptor'])
ttf = None ttf = None
@ -391,11 +389,16 @@ class PDFResourceManager:
if objid and objid in self.fonts: if objid and objid in self.fonts:
font = self.fonts[objid] font = self.fonts[objid]
else: else:
assert spec['Type'] == LITERAL_FONT if STRICT:
if spec['Type'] != LITERAL_FONT:
raise PDFFontError('Type is not /Font')
# Create a Font object. # Create a Font object.
if 'Subtype' not in spec: if 'Subtype' in spec:
raise PDFFontError('Font Subtype is not specified.')
subtype = literal_name(spec['Subtype']) subtype = literal_name(spec['Subtype'])
else:
if STRICT:
raise PDFFontError('Font Subtype is not specified.')
subtype = 'Type1'
if subtype in ('Type1', 'MMType1'): if subtype in ('Type1', 'MMType1'):
# Type1 Font # Type1 Font
font = PDFType1Font(spec) font = PDFType1Font(spec)
@ -411,14 +414,16 @@ class PDFResourceManager:
elif subtype == 'Type0': elif subtype == 'Type0':
# Type0 Font # Type0 Font
dfonts = list_value(spec['DescendantFonts']) dfonts = list_value(spec['DescendantFonts'])
assert len(dfonts) == 1 assert dfonts
subspec = dict_value(dfonts[0]).copy() subspec = dict_value(dfonts[0]).copy()
for k in ('Encoding', 'ToUnicode'): for k in ('Encoding', 'ToUnicode'):
if k in spec: if k in spec:
subspec[k] = resolve1(spec[k]) subspec[k] = resolve1(spec[k])
font = self.get_font(None, subspec) font = self.get_font(None, subspec)
else: else:
if STRICT:
raise PDFFontError('Invalid Font: %r' % spec) raise PDFFontError('Invalid Font: %r' % spec)
font = PDFType1Font(spec) # this is so wrong!
if objid: if objid:
self.fonts[objid] = font self.fonts[objid] = font
return font return font
@ -480,14 +485,17 @@ class PDFContentParser(PSStackParser):
objs = self.partobj objs = self.partobj
(type0, self.partobj) = self.context.pop() (type0, self.partobj) = self.context.pop()
if len(objs) % 2 != 0: if len(objs) % 2 != 0:
if STRICT:
raise PSTypeError('invalid dictionary construct: %r' % objs) raise PSTypeError('invalid dictionary construct: %r' % objs)
dic = dict( (literal_name(k), v) for (k,v) in choplist(2, objs) ) dic = dict( (literal_name(k), v) for (k,v) in choplist(2, objs) )
pos += len('ID ') pos += len('ID ')
self.fp.seek(pos) self.fp.seek(pos)
data = self.fp.read(8192)
# XXX how do we know the real length other than scanning? # XXX how do we know the real length other than scanning?
data = ''
while 1:
data += self.fp.read(4096)
m = self.EOIPAT.search(data) m = self.EOIPAT.search(data)
assert m if m: break
objlen = m.start(0) objlen = m.start(0)
obj = PDFStream(dic, data[:objlen]) obj = PDFStream(dic, data[:objlen])
self.push(obj) self.push(obj)
@ -731,7 +739,9 @@ class PDFPageInterpreter:
try: try:
self.textstate.font = self.fontmap[literal_name(fontid)] self.textstate.font = self.fontmap[literal_name(fontid)]
except KeyError: except KeyError:
if STRICT:
raise PDFInterpreterError('Undefined font id: %r' % fontid) raise PDFInterpreterError('Undefined font id: %r' % fontid)
return
self.textstate.fontsize = fontsize self.textstate.fontsize = fontsize
return return
# setrendering # setrendering
@ -816,7 +826,9 @@ class PDFPageInterpreter:
try: try:
xobj = stream_value(self.xobjmap[xobjid]) xobj = stream_value(self.xobjmap[xobjid])
except KeyError: except KeyError:
if STRICT:
raise PDFInterpreterError('Undefined xobject id: %r' % xobjid) raise PDFInterpreterError('Undefined xobject id: %r' % xobjid)
return
if xobj.dic['Subtype'] == LITERAL_FORM: if xobj.dic['Subtype'] == LITERAL_FORM:
if 1 <= self.debug: if 1 <= self.debug:
print >>stderr, 'Processing xobj: %r' % xobj print >>stderr, 'Processing xobj: %r' % xobj
@ -897,6 +909,7 @@ class PDFPageInterpreter:
print >>stderr, 'exec: %s' % (obj.name) print >>stderr, 'exec: %s' % (obj.name)
func() func()
else: else:
if STRICT:
raise PDFInterpreterError('unknown operator: %r' % obj.name) raise PDFInterpreterError('unknown operator: %r' % obj.name)
else: else:
self.push(obj) self.push(obj)

View File

@ -24,7 +24,7 @@ from utils import choplist, nunpack
from psparser import PSException, PSSyntaxError, PSTypeError, \ from psparser import PSException, PSSyntaxError, PSTypeError, \
PSLiteral, PSKeyword, PSLiteralTable, PSKeywordTable, \ PSLiteral, PSKeyword, PSLiteralTable, PSKeywordTable, \
literal_name, keyword_name, \ literal_name, keyword_name, \
PSStackParser PSStackParser, STRICT
## PDF Exceptions ## PDF Exceptions
@ -52,6 +52,7 @@ class PDFObjRef:
def __init__(self, doc, objid, genno): def __init__(self, doc, objid, genno):
if objid == 0: if objid == 0:
if STRICT:
raise PDFValueError('objid cannot be 0.') raise PDFValueError('objid cannot be 0.')
self.doc = doc self.doc = doc
self.objid = objid self.objid = objid
@ -94,43 +95,57 @@ def resolveall(x):
def int_value(x): def int_value(x):
x = resolve1(x) x = resolve1(x)
if not isinstance(x, int): if not isinstance(x, int):
if STRICT:
raise PDFTypeError('integer required: %r' % x) raise PDFTypeError('integer required: %r' % x)
return 0
return x return x
def float_value(x): def float_value(x):
x = resolve1(x) x = resolve1(x)
if not isinstance(x, float): if not isinstance(x, float):
if STRICT:
raise PDFTypeError('float required: %r' % x) raise PDFTypeError('float required: %r' % x)
return 0.0
return x return x
def num_value(x): def num_value(x):
x = resolve1(x) x = resolve1(x)
if not (isinstance(x, int) or isinstance(x, float)): if not (isinstance(x, int) or isinstance(x, float)):
if STRICT:
raise PDFTypeError('int or float required: %r' % x) raise PDFTypeError('int or float required: %r' % x)
return 0
return x return x
def str_value(x): def str_value(x):
x = resolve1(x) x = resolve1(x)
if not isinstance(x, str): if not isinstance(x, str):
if STRICT:
raise PDFTypeError('string required: %r' % x) raise PDFTypeError('string required: %r' % x)
return ''
return x return x
def list_value(x): def list_value(x):
x = resolve1(x) x = resolve1(x)
if not (isinstance(x, list) or isinstance(x, tuple)): if not (isinstance(x, list) or isinstance(x, tuple)):
if STRICT:
raise PDFTypeError('list required: %r' % x) raise PDFTypeError('list required: %r' % x)
return []
return x return x
def dict_value(x): def dict_value(x):
x = resolve1(x) x = resolve1(x)
if not isinstance(x, dict): if not isinstance(x, dict):
if STRICT:
raise PDFTypeError('dict required: %r' % x) raise PDFTypeError('dict required: %r' % x)
return {}
return x return x
def stream_value(x): def stream_value(x):
x = resolve1(x) x = resolve1(x)
if not isinstance(x, PDFStream): if not isinstance(x, PDFStream):
if STRICT:
raise PDFTypeError('stream required: %r' % x) raise PDFTypeError('stream required: %r' % x)
return PDFStream({}, '')
return x return x
@ -186,6 +201,7 @@ class PDFStream:
ent0 = ent1 ent0 = ent1
data = buf data = buf
else: else:
if STRICT:
raise PDFValueError('Invalid filter spec: %r' % f) raise PDFValueError('Invalid filter spec: %r' % f)
self.data = data self.data = data
self.rawdata = None self.rawdata = None
@ -235,11 +251,14 @@ class PDFXRef:
while 1: while 1:
(_, line) = parser.nextline() (_, line) = parser.nextline()
if not line: if not line:
if STRICT:
raise PDFSyntaxError('premature eof: %r' % parser) raise PDFSyntaxError('premature eof: %r' % parser)
break
line = line.strip() line = line.strip()
f = line.split(' ') f = line.split(' ')
if len(f) != 2: if len(f) != 2:
if line != 'trailer': if line != 'trailer':
if STRICT:
raise PDFSyntaxError('trailer not found: %r: line=%r' % (parser, line)) raise PDFSyntaxError('trailer not found: %r: line=%r' % (parser, line))
break break
(start, nobjs) = map(long, f) (start, nobjs) = map(long, f)
@ -250,7 +269,9 @@ class PDFXRef:
(_, line) = parser.nextline() (_, line) = parser.nextline()
f = line.strip().split(' ') f = line.strip().split(' ')
if len(f) != 3: if len(f) != 3:
if STRICT:
raise PDFSyntaxError('invalid xref format: %r, line=%r' % (parser, line)) raise PDFSyntaxError('invalid xref format: %r, line=%r' % (parser, line))
continue
(pos, genno, use) = f (pos, genno, use) = f
self.offsets.append((int(genno), long(pos), use)) self.offsets.append((int(genno), long(pos), use))
# read trailer # read trailer
@ -259,9 +280,10 @@ class PDFXRef:
def getpos(self, objid): def getpos(self, objid):
if objid < self.objid0 or self.objid1 <= objid: if objid < self.objid0 or self.objid1 <= objid:
raise IndexError raise IndexError(objid)
(genno, pos, use) = self.offsets[objid-self.objid0] (genno, pos, use) = self.offsets[objid-self.objid0]
if use != 'n': if use != 'n':
if STRICT:
raise PDFValueError('unused objid=%r' % objid) raise PDFValueError('unused objid=%r' % objid)
return (None, pos) return (None, pos)
@ -272,6 +294,7 @@ class PDFXRefStream:
def __init__(self, parser): def __init__(self, parser):
(objid, genno, _, stream) = list_value(parser.parse()) (objid, genno, _, stream) = list_value(parser.parse())
if STRICT:
assert stream.dic['Type'] == LITERAL_XREF assert stream.dic['Type'] == LITERAL_XREF
size = stream.dic['Size'] size = stream.dic['Size']
(start, nobjs) = stream.dic.get('Index', (0,size)) (start, nobjs) = stream.dic.get('Index', (0,size))
@ -285,7 +308,7 @@ class PDFXRefStream:
def getpos(self, objid): def getpos(self, objid):
if objid < self.objid0 or self.objid1 <= objid: if objid < self.objid0 or self.objid1 <= objid:
raise IndexError raise IndexError(objid)
i = self.entlen * (objid-self.objid0) i = self.entlen * (objid-self.objid0)
ent = self.data[i:i+self.entlen] ent = self.data[i:i+self.entlen]
f1 = nunpack(ent[:self.fl1], 1) f1 = nunpack(ent[:self.fl1], 1)
@ -334,7 +357,7 @@ class PDFDocument:
return return
def getobj(self, objid): def getobj(self, objid):
assert self.xrefs #assert self.xrefs
if objid in self.objs: if objid in self.objs:
obj = self.objs[objid] obj = self.objs[objid]
else: else:
@ -345,13 +368,20 @@ class PDFDocument:
except IndexError: except IndexError:
pass pass
else: else:
if STRICT:
raise PDFValueError('Cannot locate objid=%r' % objid) raise PDFValueError('Cannot locate objid=%r' % objid)
return None
if strmid: if strmid:
stream = stream_value(self.getobj(strmid)) stream = stream_value(self.getobj(strmid))
if stream.dic['Type'] != LITERAL_OBJSTM: if stream.dic['Type'] != LITERAL_OBJSTM:
if STRICT:
raise PDFSyntaxError('Not a stream object: %r' % stream) raise PDFSyntaxError('Not a stream object: %r' % stream)
if 'N' not in stream.dic: try:
n = stream.dic['N']
except KeyError:
if STRICT:
raise PDFSyntaxError('N is not defined: %r' % stream) raise PDFSyntaxError('N is not defined: %r' % stream)
n = 0
if strmid in self.parsed_objs: if strmid in self.parsed_objs:
objs = self.parsed_objs[stream] objs = self.parsed_objs[stream]
else: else:
@ -363,8 +393,10 @@ class PDFDocument:
else: else:
prevpos = self.parser.seek(index) prevpos = self.parser.seek(index)
seq = list_value(self.parser.parse()) seq = list_value(self.parser.parse())
if not (len(seq) == 4 and seq[0] == objid and seq[2] == KEYWORD_OBJ): if not (4 <= len(seq) and seq[0] == objid and seq[2] == KEYWORD_OBJ):
if STRICT:
raise PDFSyntaxError('invalid stream spec: %r' % seq) raise PDFSyntaxError('invalid stream spec: %r' % seq)
return None
obj = seq[3] obj = seq[3]
self.parser.seek(prevpos) self.parser.seek(prevpos)
if 2 <= self.debug: if 2 <= self.debug:
@ -373,7 +405,7 @@ class PDFDocument:
return obj return obj
def get_pages(self, debug=0): def get_pages(self, debug=0):
assert self.xrefs #assert self.xrefs
def search(obj, parent): def search(obj, parent):
tree = dict_value(obj).copy() tree = dict_value(obj).copy()
for (k,v) in parent.iteritems(): for (k,v) in parent.iteritems():
@ -397,6 +429,7 @@ class PDFDocument:
self.root = root self.root = root
self.catalog = dict_value(self.root) self.catalog = dict_value(self.root)
if self.catalog['Type'] != LITERAL_CATALOG: if self.catalog['Type'] != LITERAL_CATALOG:
if STRICT:
raise PDFValueError('Catalog not found!') raise PDFValueError('Catalog not found!')
self.outline = self.catalog.get('Outline') self.outline = self.catalog.get('Outline')
return return
@ -437,24 +470,24 @@ class PDFParser(PSStackParser):
# stream object # stream object
(dic,) = self.pop(1) (dic,) = self.pop(1)
dic = dict_value(dic) dic = dict_value(dic)
if 'Length' not in dic: try:
raise PDFValueError('/Length is undefined: %r' % dic)
objlen = int_value(dic['Length']) objlen = int_value(dic['Length'])
except KeyError:
if STRICT:
raise PDFValueError('/Length is undefined: %r' % dic)
objlen = 0
self.seek(pos) self.seek(pos)
(_, line) = self.nextline() # 'stream' (_, line) = self.nextline() # 'stream'
self.fp.seek(pos+len(line)) pos += len(line)
self.fp.seek(pos)
data = self.fp.read(objlen) data = self.fp.read(objlen)
self.seek(pos+len(line)+objlen) self.seek(pos+objlen)
while 1: while 1:
(linepos, line) = self.nextline() (linepos, line) = self.nextline()
if not line: if not line or line.startswith('endstream'):
raise PDFSyntaxError('premature eof, need endstream: linepos=%d, line=%r' %
(linepos, line))
if line.strip():
if not line.startswith('endstream'):
raise PDFSyntaxError('need endstream: linepos=%d, line=%r' %
(linepos, line))
break break
objlen += len(line)
data += line
if 1 <= self.debug: if 1 <= self.debug:
print >>stderr, 'Stream: pos=%d, objlen=%d, dic=%r, data=%r...' % \ print >>stderr, 'Stream: pos=%d, objlen=%d, dic=%r, data=%r...' % \
(pos, objlen, dic, data[:10]) (pos, objlen, dic, data[:10])
@ -477,7 +510,9 @@ class PDFParser(PSStackParser):
if line: if line:
prev = line prev = line
else: else:
if STRICT:
raise PDFSyntaxError('startxref not found!') raise PDFSyntaxError('startxref not found!')
prev = 0
if 1 <= self.debug: if 1 <= self.debug:
print >>stderr, 'xref found: pos=%r' % prev print >>stderr, 'xref found: pos=%r' % prev
self.seek(long(prev)) self.seek(long(prev))
@ -495,10 +530,11 @@ class PDFParser(PSStackParser):
# XRefStream: PDF-1.5 # XRefStream: PDF-1.5
self.seek(linepos) self.seek(linepos)
xref = PDFXRefStream(self) xref = PDFXRefStream(self)
elif line.strip() != 'xref': else:
if line.strip() != 'xref':
if STRICT:
raise PDFSyntaxError('xref not found: linepos=%d, line=%r' % raise PDFSyntaxError('xref not found: linepos=%d, line=%r' %
(linepos, line)) (linepos, line))
else:
xref = PDFXRef(self) xref = PDFXRef(self)
yield xref yield xref
trailer = xref.trailer trailer = xref.trailer

View File

@ -3,6 +3,8 @@ import sys, re
stderr = sys.stderr stderr = sys.stderr
from utils import choplist from utils import choplist
STRICT = 0
## PS Exceptions ## PS Exceptions
## ##
@ -73,12 +75,18 @@ PSKeywordTable = PSSymbolTable(PSKeyword)
def literal_name(x): def literal_name(x):
if not isinstance(x, PSLiteral): if not isinstance(x, PSLiteral):
if STRICT:
raise PSTypeError('literal required: %r' % x) raise PSTypeError('literal required: %r' % x)
else:
return str(x)
return x.name return x.name
def keyword_name(x): def keyword_name(x):
if not isinstance(x, PSKeyword): if not isinstance(x, PSKeyword):
if STRICT:
raise PSTypeError('keyword required: %r' % x) raise PSTypeError('keyword required: %r' % x)
else:
return str(x)
return x.name return x.name
@ -237,23 +245,30 @@ class PSBaseParser:
s += s1[-1:] s += s1[-1:]
(linepos, line) = self.nextline() (linepos, line) = self.nextline()
if not line: if not line:
if STRICT:
raise PSSyntaxError('end inside string: linepos=%d, line=%r' % raise PSSyntaxError('end inside string: linepos=%d, line=%r' %
(linepos, line)) (linepos, line))
break
charpos = 0 charpos = 0
elif charpos == len(line): elif charpos == len(line):
s += s1 s += s1
(linepos, line) = self.nextline() (linepos, line) = self.nextline()
if not line: if not line:
if STRICT:
raise PSSyntaxError('end inside string: linepos=%d, line=%r' % raise PSSyntaxError('end inside string: linepos=%d, line=%r' %
(linepos, line)) (linepos, line))
break
charpos = 0 charpos = 0
else: else:
s += s1 s += s1
break break
if line[charpos] != ')': if line[charpos] == ')':
charpos += 1
else:
if STRICT:
raise PSSyntaxError('no close paren: linepos=%d, line=%r' % raise PSSyntaxError('no close paren: linepos=%d, line=%r' %
(linepos, line)) (linepos, line))
charpos += 1 pass
def convesc(m): def convesc(m):
x = m.group(0) x = m.group(0)
if x[1:].isdigit(): if x[1:].isdigit():
@ -271,10 +286,12 @@ class PSBaseParser:
# hex string object # hex string object
ms = self.STRING_HEX.match(line, charpos) ms = self.STRING_HEX.match(line, charpos)
charpos = ms.end(0) charpos = ms.end(0)
if line[charpos] != '>': if line[charpos] == '>':
charpos += 1
else:
if STRICT:
raise PSSyntaxError('no close paren: linepos=%d, line=%r' % raise PSSyntaxError('no close paren: linepos=%d, line=%r' %
(linepos, line)) (linepos, line))
charpos += 1
def convhex(m1): def convhex(m1):
return chr(int(m1.group(0), 16)) return chr(int(m1.group(0), 16))
s = self.STRING_HEX_SUB.sub(convhex, ms.group(0)) s = self.STRING_HEX_SUB.sub(convhex, ms.group(0))
@ -341,6 +358,7 @@ class PSStackParser(PSBaseParser):
Pop N objects from the stack. Pop N objects from the stack.
''' '''
if len(self.partobj) < n: if len(self.partobj) < n:
if STRICT:
raise PSSyntaxError('stack too short < %d' % n) raise PSSyntaxError('stack too short < %d' % n)
r = self.partobj[-n:] r = self.partobj[-n:]
self.partobj = self.partobj[:-n] self.partobj = self.partobj[:-n]
@ -366,10 +384,16 @@ class PSStackParser(PSBaseParser):
return return
def endobj(type1): def endobj(type1):
assert self.context if not self.context:
if STRICT:
raise PSTypeError('stack empty.')
obj = self.partobj obj = self.partobj
(type0, self.partobj) = self.context.pop() (type0, partobj) = self.context[-1]
if type0 != type1: if type0 == type1:
self.partobj = partobj
self.context.pop()
else:
if STRICT:
raise PSTypeError('type mismatch: %r(%r) != %r(%r)' % raise PSTypeError('type mismatch: %r(%r) != %r(%r)' %
(type0, self.partobj, type1, obj)) (type0, self.partobj, type1, obj))
return obj return obj
@ -407,6 +431,7 @@ class PSStackParser(PSBaseParser):
# end dictionary # end dictionary
objs = endobj('d') objs = endobj('d')
if len(objs) % 2 != 0: if len(objs) % 2 != 0:
if STRICT:
raise PSTypeError('invalid dictionary construct: %r' % objs) raise PSTypeError('invalid dictionary construct: %r' % objs)
d = dict( (literal_name(k), v) for (k,v) in choplist(2, objs) ) d = dict( (literal_name(k), v) for (k,v) in choplist(2, objs) )
if 2 <= self.debug: if 2 <= self.debug:
@ -415,4 +440,5 @@ class PSStackParser(PSBaseParser):
elif self.do_token(pos, t): elif self.do_token(pos, t):
break break
return endobj('o') objs = endobj('o')
return objs