2007-12-31 03:41:45 +00:00
|
|
|
#!/usr/bin/env python
|
|
|
|
import sys, re
|
|
|
|
stderr = sys.stderr
|
|
|
|
from utils import choplist
|
|
|
|
|
2008-02-03 11:47:24 +00:00
|
|
|
STRICT = 0
|
2008-01-20 04:44:16 +00:00
|
|
|
|
2007-12-31 03:41:45 +00:00
|
|
|
|
|
|
|
## PS Exceptions
|
|
|
|
##
|
|
|
|
class PSException(Exception): pass
|
2008-02-03 09:36:34 +00:00
|
|
|
class PSEOF(PSException): pass
|
2007-12-31 03:41:45 +00:00
|
|
|
class PSSyntaxError(PSException): pass
|
|
|
|
class PSTypeError(PSException): pass
|
|
|
|
class PSValueError(PSException): pass
|
|
|
|
|
|
|
|
|
2008-01-07 13:47:52 +00:00
|
|
|
## Basic PostScript Types
|
2007-12-31 03:41:45 +00:00
|
|
|
##
|
2008-01-07 13:47:52 +00:00
|
|
|
|
|
|
|
# PSLiteral
|
2008-07-09 15:15:32 +00:00
|
|
|
class PSObject(object): pass
|
|
|
|
|
|
|
|
class PSLiteral(PSObject):
|
2008-01-07 13:47:52 +00:00
|
|
|
|
2007-12-31 03:41:45 +00:00
|
|
|
'''
|
|
|
|
PS literals (e.g. "/Name").
|
|
|
|
Caution: Never create these objects directly.
|
|
|
|
Use PSLiteralTable.intern() instead.
|
|
|
|
'''
|
2008-01-07 13:47:52 +00:00
|
|
|
|
2007-12-31 03:41:45 +00:00
|
|
|
def __init__(self, name):
|
|
|
|
self.name = name
|
|
|
|
return
|
2008-01-07 13:47:52 +00:00
|
|
|
|
2007-12-31 03:41:45 +00:00
|
|
|
def __repr__(self):
|
|
|
|
return '/%s' % self.name
|
|
|
|
|
2008-01-07 13:47:52 +00:00
|
|
|
# PSKeyword
|
2008-07-09 15:15:32 +00:00
|
|
|
class PSKeyword(PSObject):
|
2008-01-07 13:47:52 +00:00
|
|
|
|
2007-12-31 03:41:45 +00:00
|
|
|
'''
|
|
|
|
PS keywords (e.g. "showpage").
|
|
|
|
Caution: Never create these objects directly.
|
|
|
|
Use PSKeywordTable.intern() instead.
|
|
|
|
'''
|
2008-01-07 13:47:52 +00:00
|
|
|
|
2007-12-31 03:41:45 +00:00
|
|
|
def __init__(self, name):
|
|
|
|
self.name = name
|
|
|
|
return
|
2008-01-07 13:47:52 +00:00
|
|
|
|
2007-12-31 03:41:45 +00:00
|
|
|
def __repr__(self):
|
|
|
|
return self.name
|
|
|
|
|
2008-01-07 13:47:52 +00:00
|
|
|
# PSSymbolTable
|
2008-07-09 15:15:32 +00:00
|
|
|
class PSSymbolTable(object):
|
2008-01-07 13:47:52 +00:00
|
|
|
|
2007-12-31 03:41:45 +00:00
|
|
|
'''
|
|
|
|
Symbol table that stores PSLiteral or PSKeyword.
|
|
|
|
'''
|
2008-01-07 13:47:52 +00:00
|
|
|
|
2007-12-31 03:41:45 +00:00
|
|
|
def __init__(self, classe):
|
|
|
|
self.dic = {}
|
|
|
|
self.classe = classe
|
|
|
|
return
|
|
|
|
|
|
|
|
def intern(self, name):
|
|
|
|
if name in self.dic:
|
|
|
|
lit = self.dic[name]
|
|
|
|
else:
|
|
|
|
lit = self.classe(name)
|
|
|
|
self.dic[name] = lit
|
|
|
|
return lit
|
|
|
|
|
|
|
|
PSLiteralTable = PSSymbolTable(PSLiteral)
|
|
|
|
PSKeywordTable = PSSymbolTable(PSKeyword)
|
2008-02-03 09:36:34 +00:00
|
|
|
LIT = PSLiteralTable.intern
|
|
|
|
KWD = PSKeywordTable.intern
|
|
|
|
KEYWORD_BRACE_BEGIN = KWD('{')
|
|
|
|
KEYWORD_BRACE_END = KWD('}')
|
|
|
|
KEYWORD_ARRAY_BEGIN = KWD('[')
|
|
|
|
KEYWORD_ARRAY_END = KWD(']')
|
|
|
|
KEYWORD_DICT_BEGIN = KWD('<<')
|
|
|
|
KEYWORD_DICT_END = KWD('>>')
|
2007-12-31 03:41:45 +00:00
|
|
|
|
|
|
|
|
|
|
|
def literal_name(x):
|
|
|
|
if not isinstance(x, PSLiteral):
|
2008-01-20 04:44:16 +00:00
|
|
|
if STRICT:
|
2008-09-06 04:31:06 +00:00
|
|
|
raise PSTypeError('Literal required: %r' % x)
|
2008-01-20 04:44:16 +00:00
|
|
|
else:
|
|
|
|
return str(x)
|
2007-12-31 03:41:45 +00:00
|
|
|
return x.name
|
|
|
|
|
|
|
|
def keyword_name(x):
|
|
|
|
if not isinstance(x, PSKeyword):
|
2008-01-20 04:44:16 +00:00
|
|
|
if STRICT:
|
2008-09-06 04:31:06 +00:00
|
|
|
raise PSTypeError('Keyword required: %r' % x)
|
2008-01-20 04:44:16 +00:00
|
|
|
else:
|
|
|
|
return str(x)
|
2007-12-31 03:41:45 +00:00
|
|
|
return x.name
|
|
|
|
|
|
|
|
|
|
|
|
## PSBaseParser
|
|
|
|
##
|
2008-02-03 09:36:34 +00:00
|
|
|
EOL = re.compile(r'[\r\n]')
|
|
|
|
SPC = re.compile(r'\s')
|
|
|
|
NONSPC = re.compile(r'\S')
|
|
|
|
HEX = re.compile(r'[0-9a-fA-F]')
|
|
|
|
END_LITERAL = re.compile(r'[#/%\[\]()<>{}\s]')
|
|
|
|
END_HEX_STRING = re.compile(r'[^\s0-9a-fA-F]')
|
|
|
|
HEX_PAIR = re.compile(r'[0-9a-fA-F]{2}|.')
|
|
|
|
END_NUMBER = re.compile(r'[^0-9]')
|
|
|
|
END_KEYWORD = re.compile(r'[#/%\[\]()<>{}\s]')
|
|
|
|
END_STRING = re.compile(r'[()\134]')
|
|
|
|
OCT_STRING = re.compile(r'[0-7]')
|
|
|
|
ESC_STRING = { 'b':8, 't':9, 'n':10, 'f':12, 'r':13, '(':40, ')':41, '\\':92 }
|
2008-07-09 15:15:32 +00:00
|
|
|
class PSBaseParser(object):
|
2007-12-31 03:41:45 +00:00
|
|
|
|
2008-01-07 13:47:52 +00:00
|
|
|
'''
|
|
|
|
Most basic PostScript parser that performs only basic tokenization.
|
|
|
|
'''
|
2008-02-03 09:36:34 +00:00
|
|
|
BUFSIZ = 4096
|
|
|
|
|
2007-12-31 03:41:45 +00:00
|
|
|
def __init__(self, fp, debug=0):
|
|
|
|
self.fp = fp
|
|
|
|
self.debug = debug
|
|
|
|
self.seek(0)
|
|
|
|
return
|
|
|
|
|
|
|
|
def __repr__(self):
|
2008-02-03 09:36:34 +00:00
|
|
|
return '<PSBaseParser: %r, bufpos=%d>' % (self.fp, self.bufpos)
|
|
|
|
|
2008-07-09 15:15:32 +00:00
|
|
|
def flush(self):
|
|
|
|
return
|
|
|
|
|
|
|
|
def close(self):
|
|
|
|
self.flush()
|
|
|
|
return
|
|
|
|
|
2008-02-03 09:36:34 +00:00
|
|
|
def tell(self):
|
2008-07-16 11:38:01 +00:00
|
|
|
return self.bufpos+self.charpos
|
2008-02-03 09:36:34 +00:00
|
|
|
|
|
|
|
def poll(self, pos=None, n=80):
|
|
|
|
pos0 = self.fp.tell()
|
|
|
|
if not pos:
|
|
|
|
pos = self.bufpos+self.charpos
|
|
|
|
self.fp.seek(pos)
|
|
|
|
print >>stderr, 'poll(%d): %r' % (pos, self.fp.read(n))
|
|
|
|
self.fp.seek(pos0)
|
|
|
|
return
|
2007-12-31 03:41:45 +00:00
|
|
|
|
|
|
|
def seek(self, pos):
|
|
|
|
'''
|
2008-01-07 13:47:52 +00:00
|
|
|
Seeks the parser to the given position.
|
2007-12-31 03:41:45 +00:00
|
|
|
'''
|
|
|
|
if 2 <= self.debug:
|
2008-02-03 09:36:34 +00:00
|
|
|
print >>stderr, 'seek: %r' % pos
|
2007-12-31 03:41:45 +00:00
|
|
|
self.fp.seek(pos)
|
2008-02-03 09:36:34 +00:00
|
|
|
# reset the status for nextline()
|
|
|
|
self.bufpos = pos
|
|
|
|
self.buf = ''
|
|
|
|
self.charpos = 0
|
|
|
|
# reset the status for nexttoken()
|
|
|
|
self.parse1 = self.parse_main
|
|
|
|
self.tokens = []
|
|
|
|
return
|
|
|
|
|
|
|
|
def fillbuf(self):
|
|
|
|
if self.charpos < len(self.buf): return
|
|
|
|
# fetch next chunk.
|
|
|
|
self.bufpos = self.fp.tell()
|
|
|
|
self.buf = self.fp.read(self.BUFSIZ)
|
|
|
|
if not self.buf:
|
2008-09-06 04:31:06 +00:00
|
|
|
raise PSEOF('Unexpected EOF')
|
2008-02-03 09:36:34 +00:00
|
|
|
self.charpos = 0
|
|
|
|
return
|
|
|
|
|
|
|
|
def parse_main(self, s, i):
|
|
|
|
m = NONSPC.search(s, i)
|
|
|
|
if not m:
|
|
|
|
return (self.parse_main, len(s))
|
|
|
|
j = m.start(0)
|
|
|
|
c = s[j]
|
|
|
|
self.tokenstart = self.bufpos+j
|
|
|
|
if c == '%':
|
|
|
|
self.token = '%'
|
|
|
|
return (self.parse_comment, j+1)
|
|
|
|
if c == '/':
|
|
|
|
self.token = ''
|
|
|
|
return (self.parse_literal, j+1)
|
|
|
|
if c in '-+' or c.isdigit():
|
|
|
|
self.token = c
|
|
|
|
return (self.parse_number, j+1)
|
|
|
|
if c == '.':
|
|
|
|
self.token = c
|
|
|
|
return (self.parse_float, j+1)
|
|
|
|
if c.isalpha():
|
|
|
|
self.token = c
|
|
|
|
return (self.parse_keyword, j+1)
|
|
|
|
if c == '(':
|
|
|
|
self.token = ''
|
|
|
|
self.paren = 1
|
|
|
|
return (self.parse_string, j+1)
|
|
|
|
if c == '<':
|
|
|
|
self.token = ''
|
|
|
|
return (self.parse_wopen, j+1)
|
|
|
|
if c == '>':
|
|
|
|
self.token = ''
|
|
|
|
return (self.parse_wclose, j+1)
|
|
|
|
self.add_token(KWD(c))
|
|
|
|
return (self.parse_main, j+1)
|
|
|
|
|
|
|
|
def add_token(self, obj):
|
|
|
|
self.tokens.append((self.tokenstart, obj))
|
|
|
|
return
|
|
|
|
|
|
|
|
def parse_comment(self, s, i):
|
|
|
|
m = EOL.search(s, i)
|
|
|
|
if not m:
|
|
|
|
self.token += s[i:]
|
|
|
|
return (self.parse_comment, len(s))
|
|
|
|
j = m.start(0)
|
|
|
|
self.token += s[i:j]
|
|
|
|
# We ignore comments.
|
|
|
|
#self.tokens.append(self.token)
|
|
|
|
return (self.parse_main, j)
|
|
|
|
|
|
|
|
def parse_literal(self, s, i):
|
|
|
|
m = END_LITERAL.search(s, i)
|
|
|
|
if not m:
|
|
|
|
self.token += s[i:]
|
|
|
|
return (self.parse_literal, len(s))
|
|
|
|
j = m.start(0)
|
|
|
|
self.token += s[i:j]
|
|
|
|
c = s[j]
|
|
|
|
if c == '#':
|
|
|
|
self.hex = ''
|
|
|
|
return (self.parse_literal_hex, j+1)
|
|
|
|
self.add_token(LIT(self.token))
|
|
|
|
return (self.parse_main, j)
|
2007-12-31 03:41:45 +00:00
|
|
|
|
2008-02-03 09:36:34 +00:00
|
|
|
def parse_literal_hex(self, s, i):
|
|
|
|
c = s[i]
|
|
|
|
if HEX.match(c) and len(self.hex) < 2:
|
|
|
|
self.hex += c
|
|
|
|
return (self.parse_literal_hex, i+1)
|
|
|
|
if self.hex:
|
|
|
|
self.token += chr(int(self.hex, 16))
|
|
|
|
return (self.parse_literal, i)
|
|
|
|
|
|
|
|
def parse_number(self, s, i):
|
|
|
|
m = END_NUMBER.search(s, i)
|
|
|
|
if not m:
|
|
|
|
self.token += s[i:]
|
|
|
|
return (self.parse_number, len(s))
|
|
|
|
j = m.start(0)
|
|
|
|
self.token += s[i:j]
|
|
|
|
c = s[j]
|
|
|
|
if c == '.':
|
|
|
|
self.token += c
|
|
|
|
return (self.parse_float, j+1)
|
|
|
|
try:
|
|
|
|
self.add_token(int(self.token))
|
|
|
|
except ValueError:
|
|
|
|
pass
|
|
|
|
return (self.parse_main, j)
|
|
|
|
def parse_float(self, s, i):
|
|
|
|
m = END_NUMBER.search(s, i)
|
|
|
|
if not m:
|
|
|
|
self.token += s[i:]
|
|
|
|
return (self.parse_float, len(s))
|
|
|
|
j = m.start(0)
|
|
|
|
self.token += s[i:j]
|
|
|
|
self.add_token(float(self.token))
|
|
|
|
return (self.parse_main, j)
|
|
|
|
|
|
|
|
def parse_keyword(self, s, i):
|
|
|
|
m = END_KEYWORD.search(s, i)
|
|
|
|
if not m:
|
|
|
|
self.token += s[i:]
|
|
|
|
return (self.parse_keyword, len(s))
|
|
|
|
j = m.start(0)
|
|
|
|
self.token += s[i:j]
|
|
|
|
if self.token == 'true':
|
|
|
|
token = True
|
|
|
|
elif self.token == 'false':
|
|
|
|
token = False
|
|
|
|
else:
|
|
|
|
token = KWD(self.token)
|
|
|
|
self.add_token(token)
|
|
|
|
return (self.parse_main, j)
|
|
|
|
|
|
|
|
def parse_string(self, s, i):
|
|
|
|
m = END_STRING.search(s, i)
|
|
|
|
if not m:
|
|
|
|
self.token += s[i:]
|
|
|
|
return (self.parse_string, len(s))
|
|
|
|
j = m.start(0)
|
|
|
|
self.token += s[i:j]
|
|
|
|
c = s[j]
|
|
|
|
if c == '\\':
|
|
|
|
self.oct = ''
|
|
|
|
return (self.parse_string_1, j+1)
|
|
|
|
if c == '(':
|
|
|
|
self.paren += 1
|
|
|
|
self.token += c
|
|
|
|
return (self.parse_string, j+1)
|
|
|
|
if c == ')':
|
|
|
|
self.paren -= 1
|
|
|
|
if self.paren: # WTF, they said balanced parens need no special treatment.
|
|
|
|
self.token += c
|
|
|
|
return (self.parse_string, j+1)
|
|
|
|
self.add_token(self.token)
|
|
|
|
return (self.parse_main, j+1)
|
|
|
|
def parse_string_1(self, s, i):
|
|
|
|
c = s[i]
|
|
|
|
if OCT_STRING.match(c) and len(self.oct) < 3:
|
|
|
|
self.oct += c
|
|
|
|
return (self.parse_string_1, i+1)
|
|
|
|
if self.oct:
|
|
|
|
self.token += chr(int(self.oct, 8))
|
|
|
|
return (self.parse_string, i)
|
|
|
|
if c in ESC_STRING:
|
|
|
|
self.token += chr(ESC_STRING[c])
|
|
|
|
return (self.parse_string, i+1)
|
|
|
|
|
|
|
|
def parse_wopen(self, s, i):
|
|
|
|
c = s[i]
|
|
|
|
if c.isspace() or HEX.match(c):
|
|
|
|
return (self.parse_hexstring, i)
|
|
|
|
if c == '<':
|
|
|
|
self.add_token(KEYWORD_DICT_BEGIN)
|
|
|
|
i += 1
|
|
|
|
return (self.parse_main, i)
|
|
|
|
|
|
|
|
def parse_wclose(self, s, i):
|
|
|
|
c = s[i]
|
|
|
|
if c == '>':
|
|
|
|
self.add_token(KEYWORD_DICT_END)
|
|
|
|
i += 1
|
|
|
|
return (self.parse_main, i)
|
|
|
|
|
|
|
|
def parse_hexstring(self, s, i):
|
|
|
|
m = END_HEX_STRING.search(s, i)
|
|
|
|
if not m:
|
|
|
|
self.token += s[i:]
|
|
|
|
return (self.parse_hexstring, len(s))
|
|
|
|
j = m.start(0)
|
|
|
|
self.token += s[i:j]
|
|
|
|
token = HEX_PAIR.sub(lambda m: chr(int(m.group(0), 16)),
|
|
|
|
SPC.sub('', self.token))
|
|
|
|
self.add_token(token)
|
|
|
|
return (self.parse_main, j)
|
|
|
|
|
|
|
|
def nexttoken(self):
|
|
|
|
while not self.tokens:
|
|
|
|
self.fillbuf()
|
|
|
|
(self.parse1, self.charpos) = self.parse1(self.buf, self.charpos)
|
|
|
|
token = self.tokens.pop(0)
|
|
|
|
if 2 <= self.debug:
|
|
|
|
print >>stderr, 'nexttoken: %r' % (token,)
|
|
|
|
return token
|
|
|
|
|
2007-12-31 03:41:45 +00:00
|
|
|
def nextline(self):
|
|
|
|
'''
|
2008-01-07 13:47:52 +00:00
|
|
|
Fetches a next line that ends either with \\r or \\n.
|
2007-12-31 03:41:45 +00:00
|
|
|
'''
|
2008-02-03 09:36:34 +00:00
|
|
|
linebuf = ''
|
|
|
|
linepos = self.bufpos + self.charpos
|
|
|
|
eol = False
|
2007-12-31 03:41:45 +00:00
|
|
|
while 1:
|
2008-02-03 09:36:34 +00:00
|
|
|
self.fillbuf()
|
2007-12-31 03:41:45 +00:00
|
|
|
if eol:
|
2008-02-03 09:36:34 +00:00
|
|
|
c = self.buf[self.charpos]
|
2007-12-31 03:41:45 +00:00
|
|
|
# handle '\r\n'
|
2008-02-03 09:36:34 +00:00
|
|
|
if c == '\n':
|
|
|
|
linebuf += c
|
|
|
|
self.charpos += 1
|
2007-12-31 03:41:45 +00:00
|
|
|
break
|
2008-02-03 09:36:34 +00:00
|
|
|
m = EOL.search(self.buf, self.charpos)
|
2007-12-31 03:41:45 +00:00
|
|
|
if m:
|
2008-02-03 09:36:34 +00:00
|
|
|
linebuf += self.buf[self.charpos:m.end(0)]
|
|
|
|
self.charpos = m.end(0)
|
|
|
|
if linebuf[-1] == '\r':
|
|
|
|
eol = True
|
|
|
|
else:
|
|
|
|
break
|
2007-12-31 03:41:45 +00:00
|
|
|
else:
|
2008-02-03 09:36:34 +00:00
|
|
|
linebuf += self.buf[self.charpos:]
|
|
|
|
self.charpos = len(self.buf)
|
|
|
|
if 2 <= self.debug:
|
|
|
|
print >>stderr, 'nextline: %r' % ((linepos, linebuf),)
|
|
|
|
return (linepos, linebuf)
|
2007-12-31 03:41:45 +00:00
|
|
|
|
|
|
|
def revreadlines(self):
|
|
|
|
'''
|
2008-01-07 13:47:52 +00:00
|
|
|
Fetches a next line backword. This is used to locate
|
|
|
|
the trailers at the end of a file.
|
2007-12-31 03:41:45 +00:00
|
|
|
'''
|
|
|
|
self.fp.seek(0, 2)
|
|
|
|
pos = self.fp.tell()
|
|
|
|
buf = ''
|
|
|
|
while 0 < pos:
|
2008-02-03 09:36:34 +00:00
|
|
|
pos = max(0, pos-self.BUFSIZ)
|
2007-12-31 03:41:45 +00:00
|
|
|
self.fp.seek(pos)
|
2008-02-03 09:36:34 +00:00
|
|
|
s = self.fp.read(self.BUFSIZ)
|
2007-12-31 03:41:45 +00:00
|
|
|
if not s: break
|
|
|
|
while 1:
|
|
|
|
n = max(s.rfind('\r'), s.rfind('\n'))
|
|
|
|
if n == -1:
|
|
|
|
buf = s + buf
|
|
|
|
break
|
|
|
|
yield buf+s[n:]
|
|
|
|
s = s[:n]
|
|
|
|
buf = ''
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
## PSStackParser
|
|
|
|
##
|
|
|
|
class PSStackParser(PSBaseParser):
|
|
|
|
|
|
|
|
def __init__(self, fp, debug=0):
|
2008-02-03 09:36:34 +00:00
|
|
|
PSBaseParser.__init__(self,fp, debug=debug)
|
|
|
|
self.reset()
|
|
|
|
return
|
|
|
|
|
|
|
|
def reset(self):
|
2007-12-31 03:41:45 +00:00
|
|
|
self.context = []
|
2008-02-03 09:36:34 +00:00
|
|
|
self.curtype = None
|
|
|
|
self.curstack = []
|
|
|
|
self.results = []
|
2007-12-31 03:41:45 +00:00
|
|
|
return
|
|
|
|
|
2008-04-27 03:16:27 +00:00
|
|
|
def seek(self, pos):
|
|
|
|
PSBaseParser.seek(self, pos)
|
|
|
|
self.reset()
|
|
|
|
return
|
|
|
|
|
2008-02-03 09:36:34 +00:00
|
|
|
def push(self, *objs):
|
|
|
|
self.curstack.extend(objs)
|
2007-12-31 03:41:45 +00:00
|
|
|
return
|
|
|
|
def pop(self, n):
|
2008-02-03 09:36:34 +00:00
|
|
|
objs = self.curstack[-n:]
|
|
|
|
self.curstack[-n:] = []
|
|
|
|
return objs
|
2007-12-31 03:41:45 +00:00
|
|
|
def popall(self):
|
2008-02-03 09:36:34 +00:00
|
|
|
objs = self.curstack
|
|
|
|
self.curstack = []
|
|
|
|
return objs
|
|
|
|
def add_results(self, *objs):
|
|
|
|
if 2 <= self.debug:
|
|
|
|
print >>stderr, 'add_results: %r' % (objs,)
|
|
|
|
self.results.extend(objs)
|
2007-12-31 03:41:45 +00:00
|
|
|
return
|
|
|
|
|
2008-02-03 09:36:34 +00:00
|
|
|
def start_type(self, pos, type):
|
|
|
|
self.context.append((pos, self.curtype, self.curstack))
|
|
|
|
(self.curtype, self.curstack) = (type, [])
|
|
|
|
if 2 <= self.debug:
|
|
|
|
print >>stderr, 'start_type: pos=%r, type=%r' % (pos, type)
|
|
|
|
return
|
|
|
|
def end_type(self, type):
|
|
|
|
if self.curtype != type:
|
2008-09-06 04:31:06 +00:00
|
|
|
raise PSTypeError('Type mismatch: %r != %r' % (self.curtype, type))
|
2008-02-03 09:36:34 +00:00
|
|
|
objs = [ obj for (_,obj) in self.curstack ]
|
|
|
|
(pos, self.curtype, self.curstack) = self.context.pop()
|
|
|
|
if 2 <= self.debug:
|
|
|
|
print >>stderr, 'end_type: pos=%r, type=%r, objs=%r' % (pos, type, objs)
|
|
|
|
return (pos, objs)
|
|
|
|
|
|
|
|
def do_keyword(self, pos, token):
|
|
|
|
return
|
|
|
|
|
|
|
|
def nextobject(self):
|
2007-12-31 03:41:45 +00:00
|
|
|
'''
|
|
|
|
Yields a list of objects: keywords, literals, strings,
|
|
|
|
numbers, arrays and dictionaries. Arrays and dictionaries
|
|
|
|
are represented as Python sequence and dictionaries.
|
|
|
|
'''
|
2008-02-03 09:36:34 +00:00
|
|
|
while not self.results:
|
|
|
|
(pos, token) = self.nexttoken()
|
|
|
|
#print (pos,token), (self.curtype, self.curstack)
|
|
|
|
if (isinstance(token, int) or
|
|
|
|
isinstance(token, float) or
|
|
|
|
isinstance(token, bool) or
|
|
|
|
isinstance(token, str) or
|
|
|
|
isinstance(token, PSLiteral)):
|
|
|
|
# normal token
|
|
|
|
self.push((pos, token))
|
|
|
|
elif token == KEYWORD_ARRAY_BEGIN:
|
|
|
|
# begin array
|
|
|
|
self.start_type(pos, 'a')
|
|
|
|
elif token == KEYWORD_ARRAY_END:
|
|
|
|
# end array
|
|
|
|
try:
|
|
|
|
self.push(self.end_type('a'))
|
|
|
|
except PSTypeError:
|
|
|
|
if STRICT: raise
|
|
|
|
elif token == KEYWORD_DICT_BEGIN:
|
|
|
|
# begin dictionary
|
|
|
|
self.start_type(pos, 'd')
|
|
|
|
elif token == KEYWORD_DICT_END:
|
|
|
|
# end dictionary
|
|
|
|
try:
|
|
|
|
(pos, objs) = self.end_type('d')
|
|
|
|
if len(objs) % 2 != 0:
|
2008-09-06 04:31:06 +00:00
|
|
|
raise PSSyntaxError('Invalid dictionary construct: %r' % objs)
|
2008-02-03 09:36:34 +00:00
|
|
|
d = dict( (literal_name(k), v) for (k,v) in choplist(2, objs))
|
|
|
|
self.push((pos, d))
|
|
|
|
except PSTypeError:
|
|
|
|
if STRICT: raise
|
2008-01-20 04:44:16 +00:00
|
|
|
else:
|
2008-02-03 09:36:34 +00:00
|
|
|
if 2 <= self.debug:
|
|
|
|
print >>stderr, 'do_keyword: pos=%r, token=%r, stack=%r' % \
|
|
|
|
(pos, token, self.curstack)
|
|
|
|
self.do_keyword(pos, token)
|
|
|
|
if self.context:
|
|
|
|
continue
|
2007-12-31 03:41:45 +00:00
|
|
|
else:
|
2008-02-03 09:36:34 +00:00
|
|
|
self.flush()
|
|
|
|
obj = self.results.pop(0)
|
|
|
|
if 2 <= self.debug:
|
|
|
|
print >>stderr, 'nextobject: %r' % (obj,)
|
|
|
|
return obj
|
2007-12-31 03:41:45 +00:00
|
|
|
|
2008-02-03 09:36:34 +00:00
|
|
|
|
|
|
|
## Simplistic Test cases
|
|
|
|
##
|
|
|
|
import unittest
|
|
|
|
class TestPSBaseParser(unittest.TestCase):
|
|
|
|
|
|
|
|
TESTDATA = r'''%!PS
|
|
|
|
begin end
|
|
|
|
" @ #
|
|
|
|
/a/BCD /Some_Name /foo#5f#xbaa
|
|
|
|
0 +1 -2 .5 1.234
|
|
|
|
(abc) () (abc ( def ) ghi)
|
|
|
|
(def\040\0\0404ghi) (bach\\slask) (foo\nbaa)
|
|
|
|
(this % is not a comment.)
|
|
|
|
(foo
|
|
|
|
baa)
|
|
|
|
(foo\
|
|
|
|
baa)
|
|
|
|
<20> < 40 4020 >
|
|
|
|
<abcd00
|
|
|
|
12345>
|
|
|
|
func/a/b{(c)do*}def
|
|
|
|
[ 1 (z) ! ]
|
|
|
|
<< /foo (bar) >>
|
|
|
|
'''
|
|
|
|
|
|
|
|
TOKENS = [
|
|
|
|
(5, KWD('begin')), (11, KWD('end')), (16, KWD('"')), (19, KWD('@')),
|
|
|
|
(21, KWD('#')), (23, LIT('a')), (25, LIT('BCD')), (30, LIT('Some_Name')),
|
|
|
|
(41, LIT('foo_xbaa')), (54, 0), (56, 1), (59, -2), (62, 0.5),
|
|
|
|
(65, 1.234), (71, 'abc'), (77, ''), (80, 'abc ( def ) ghi'),
|
|
|
|
(98, 'def \x00 4ghi'), (118, 'bach\\slask'), (132, 'foo\nbaa'),
|
|
|
|
(143, 'this % is not a comment.'), (170, 'foo\nbaa'), (180, 'foobaa'),
|
|
|
|
(191, ' '), (196, '@@ '), (208, '\xab\xcd\x00\x124\x05'),
|
|
|
|
(223, KWD('func')), (227, LIT('a')), (229, LIT('b')),
|
|
|
|
(231, KWD('{')), (232, 'c'), (235, KWD('do*')), (238, KWD('}')),
|
|
|
|
(239, KWD('def')), (243, KWD('[')), (245, 1), (247, 'z'), (251, KWD('!')),
|
|
|
|
(253, KWD(']')), (255, KWD('<<')), (258, LIT('foo')), (263, 'bar'),
|
|
|
|
(269, KWD('>>'))
|
|
|
|
]
|
|
|
|
|
|
|
|
OBJS = [
|
|
|
|
(23, LIT('a')), (25, LIT('BCD')), (30, LIT('Some_Name')),
|
|
|
|
(41, LIT('foo_xbaa')), (54, 0), (56, 1), (59, -2), (62, 0.5),
|
|
|
|
(65, 1.234), (71, 'abc'), (77, ''), (80, 'abc ( def ) ghi'),
|
|
|
|
(98, 'def \x00 4ghi'), (118, 'bach\\slask'), (132, 'foo\nbaa'),
|
|
|
|
(143, 'this % is not a comment.'), (170, 'foo\nbaa'), (180, 'foobaa'),
|
|
|
|
(191, ' '), (196, '@@ '), (208, '\xab\xcd\x00\x124\x05'),
|
|
|
|
(227, LIT('a')), (229, LIT('b')), (232, 'c'), (243, [1, 'z']),
|
|
|
|
(255, {'foo': 'bar'}),
|
|
|
|
]
|
|
|
|
|
|
|
|
def get_tokens(self, s):
|
|
|
|
import StringIO
|
|
|
|
class MyParser(PSBaseParser):
|
|
|
|
def flush(self):
|
|
|
|
self.add_results(*self.popall())
|
|
|
|
parser = MyParser(StringIO.StringIO(s), debug=1)
|
|
|
|
r = []
|
|
|
|
try:
|
|
|
|
while 1:
|
|
|
|
r.append(parser.nexttoken())
|
|
|
|
except PSEOF:
|
|
|
|
pass
|
|
|
|
return r
|
|
|
|
|
|
|
|
def get_objects(self, s):
|
|
|
|
import StringIO
|
|
|
|
class MyParser(PSStackParser):
|
|
|
|
def flush(self):
|
|
|
|
self.add_results(*self.popall())
|
|
|
|
parser = MyParser(StringIO.StringIO(s), debug=1)
|
|
|
|
r = []
|
|
|
|
try:
|
|
|
|
while 1:
|
|
|
|
r.append(parser.nextobject())
|
|
|
|
except PSEOF:
|
|
|
|
pass
|
|
|
|
return r
|
|
|
|
|
|
|
|
def test_1(self):
|
|
|
|
tokens = self.get_tokens(self.TESTDATA)
|
|
|
|
print tokens
|
|
|
|
self.assertEqual(tokens, self.TOKENS)
|
|
|
|
return
|
|
|
|
|
|
|
|
def test_2(self):
|
|
|
|
objs = self.get_objects(self.TESTDATA)
|
|
|
|
print objs
|
|
|
|
self.assertEqual(objs, self.OBJS)
|
|
|
|
return
|
|
|
|
|
|
|
|
if __name__ == '__main__': unittest.main()
|