2014-09-01 12:16:49 +00:00
|
|
|
#!/usr/bin/python
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
2009-11-03 01:27:30 +00:00
|
|
|
import re
|
2014-06-14 03:00:49 +00:00
|
|
|
import logging
|
2014-09-01 12:16:49 +00:00
|
|
|
|
|
|
|
import six # Python 2+3 compatibility
|
|
|
|
|
2014-09-11 21:38:05 +00:00
|
|
|
def bytesindex(s,i,j=None):
|
2014-09-01 12:16:49 +00:00
|
|
|
"""implements s[i], s[i:], s[i:j] for Python2 and Python3"""
|
2014-09-11 21:35:26 +00:00
|
|
|
if i<0 : i=len(s)+i
|
|
|
|
if j is None: j=i+1
|
|
|
|
if j<0 : j=len(s)
|
|
|
|
return s[i:j]
|
2014-09-01 12:16:49 +00:00
|
|
|
|
2014-06-26 09:12:39 +00:00
|
|
|
from .utils import choplist
|
2007-12-31 03:41:45 +00:00
|
|
|
|
2014-09-02 13:49:46 +00:00
|
|
|
STRICT = True
|
2008-01-20 04:44:16 +00:00
|
|
|
|
2007-12-31 03:41:45 +00:00
|
|
|
## PS Exceptions
|
|
|
|
##
|
2013-11-07 08:35:04 +00:00
|
|
|
class PSException(Exception):
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
class PSEOF(PSException):
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
class PSSyntaxError(PSException):
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
class PSTypeError(PSException):
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
class PSValueError(PSException):
|
|
|
|
pass
|
2007-12-31 03:41:45 +00:00
|
|
|
|
|
|
|
|
2008-01-07 13:47:52 +00:00
|
|
|
## Basic PostScript Types
|
2007-12-31 03:41:45 +00:00
|
|
|
##
|
2008-01-07 13:47:52 +00:00
|
|
|
|
2009-06-14 08:54:57 +00:00
|
|
|
## PSObject
|
|
|
|
##
|
2009-11-14 11:29:40 +00:00
|
|
|
class PSObject(object):
|
|
|
|
|
|
|
|
"""Base class for all PS or PDF-related data types."""
|
|
|
|
|
|
|
|
pass
|
2008-07-09 15:15:32 +00:00
|
|
|
|
2009-06-14 08:54:57 +00:00
|
|
|
|
|
|
|
## PSLiteral
|
|
|
|
##
|
2008-07-09 15:15:32 +00:00
|
|
|
class PSLiteral(PSObject):
|
2009-10-24 04:41:59 +00:00
|
|
|
|
2009-11-14 11:29:40 +00:00
|
|
|
"""A class that represents a PostScript literal.
|
2013-11-07 07:14:53 +00:00
|
|
|
|
2009-11-14 11:29:40 +00:00
|
|
|
Postscript literals are used as identifiers, such as
|
|
|
|
variable names, property names and dictionary keys.
|
|
|
|
Literals are case sensitive and denoted by a preceding
|
|
|
|
slash sign (e.g. "/Name")
|
|
|
|
|
|
|
|
Note: Do not create an instance of PSLiteral directly.
|
|
|
|
Always use PSLiteralTable.intern().
|
|
|
|
"""
|
|
|
|
|
2009-10-24 04:41:59 +00:00
|
|
|
def __init__(self, name):
|
|
|
|
self.name = name
|
|
|
|
|
|
|
|
def __repr__(self):
|
2014-09-01 12:16:49 +00:00
|
|
|
name=self.name
|
|
|
|
return '/%r' % name
|
2007-12-31 03:41:45 +00:00
|
|
|
|
2009-06-14 08:54:57 +00:00
|
|
|
|
|
|
|
## PSKeyword
|
|
|
|
##
|
2008-07-09 15:15:32 +00:00
|
|
|
class PSKeyword(PSObject):
|
2009-10-24 04:41:59 +00:00
|
|
|
|
2009-11-14 11:29:40 +00:00
|
|
|
"""A class that represents a PostScript keyword.
|
2013-11-07 07:14:53 +00:00
|
|
|
|
2009-11-14 11:29:40 +00:00
|
|
|
PostScript keywords are a dozen of predefined words.
|
|
|
|
Commands and directives in PostScript are expressed by keywords.
|
|
|
|
They are also used to denote the content boundaries.
|
2013-11-07 07:14:53 +00:00
|
|
|
|
2009-11-14 11:29:40 +00:00
|
|
|
Note: Do not create an instance of PSKeyword directly.
|
|
|
|
Always use PSKeywordTable.intern().
|
|
|
|
"""
|
|
|
|
|
2009-10-24 04:41:59 +00:00
|
|
|
def __init__(self, name):
|
|
|
|
self.name = name
|
|
|
|
return
|
|
|
|
|
|
|
|
def __repr__(self):
|
2014-09-01 12:16:49 +00:00
|
|
|
name=self.name
|
|
|
|
return '/%r' % name
|
2007-12-31 03:41:45 +00:00
|
|
|
|
2009-06-14 08:54:57 +00:00
|
|
|
|
|
|
|
## PSSymbolTable
|
|
|
|
##
|
2008-07-09 15:15:32 +00:00
|
|
|
class PSSymbolTable(object):
|
2009-10-24 04:41:59 +00:00
|
|
|
|
2009-11-14 11:29:40 +00:00
|
|
|
"""A utility class for storing PSLiteral/PSKeyword objects.
|
|
|
|
|
|
|
|
Interned objects can be checked its identity with "is" operator.
|
|
|
|
"""
|
2013-11-07 07:14:53 +00:00
|
|
|
|
2009-10-24 04:41:59 +00:00
|
|
|
def __init__(self, klass):
|
2010-01-30 07:30:01 +00:00
|
|
|
self.dict = {}
|
2009-10-24 04:41:59 +00:00
|
|
|
self.klass = klass
|
|
|
|
return
|
|
|
|
|
|
|
|
def intern(self, name):
|
2010-01-30 07:30:01 +00:00
|
|
|
if name in self.dict:
|
|
|
|
lit = self.dict[name]
|
2009-10-24 04:41:59 +00:00
|
|
|
else:
|
|
|
|
lit = self.klass(name)
|
2010-01-30 07:30:01 +00:00
|
|
|
self.dict[name] = lit
|
2009-10-24 04:41:59 +00:00
|
|
|
return lit
|
2007-12-31 03:41:45 +00:00
|
|
|
|
|
|
|
PSLiteralTable = PSSymbolTable(PSLiteral)
|
|
|
|
PSKeywordTable = PSSymbolTable(PSKeyword)
|
2008-02-03 09:36:34 +00:00
|
|
|
LIT = PSLiteralTable.intern
|
|
|
|
KWD = PSKeywordTable.intern
|
2014-06-30 10:05:56 +00:00
|
|
|
KEYWORD_PROC_BEGIN = KWD(b'{')
|
|
|
|
KEYWORD_PROC_END = KWD(b'}')
|
|
|
|
KEYWORD_ARRAY_BEGIN = KWD(b'[')
|
|
|
|
KEYWORD_ARRAY_END = KWD(b']')
|
|
|
|
KEYWORD_DICT_BEGIN = KWD(b'<<')
|
|
|
|
KEYWORD_DICT_END = KWD(b'>>')
|
2007-12-31 03:41:45 +00:00
|
|
|
|
|
|
|
|
|
|
|
def literal_name(x):
|
2009-10-24 04:41:59 +00:00
|
|
|
if not isinstance(x, PSLiteral):
|
|
|
|
if STRICT:
|
|
|
|
raise PSTypeError('Literal required: %r' % x)
|
|
|
|
else:
|
2014-09-02 13:49:46 +00:00
|
|
|
name=x
|
|
|
|
else:
|
|
|
|
name=x.name
|
|
|
|
if six.PY3:
|
|
|
|
try:
|
|
|
|
name = str(name,'utf-8')
|
|
|
|
except:
|
|
|
|
pass
|
|
|
|
return name
|
2013-11-07 08:35:04 +00:00
|
|
|
|
2007-12-31 03:41:45 +00:00
|
|
|
def keyword_name(x):
|
2009-10-24 04:41:59 +00:00
|
|
|
if not isinstance(x, PSKeyword):
|
|
|
|
if STRICT:
|
|
|
|
raise PSTypeError('Keyword required: %r' % x)
|
|
|
|
else:
|
2014-09-02 13:49:46 +00:00
|
|
|
name=x
|
|
|
|
else:
|
|
|
|
name=x.name
|
|
|
|
if six.PY3:
|
|
|
|
try:
|
|
|
|
name = str(name,'utf-8')
|
|
|
|
except:
|
|
|
|
pass
|
|
|
|
return name
|
2007-12-31 03:41:45 +00:00
|
|
|
|
|
|
|
|
|
|
|
## PSBaseParser
|
|
|
|
##
|
2014-06-30 10:05:56 +00:00
|
|
|
EOL = re.compile(br'[\r\n]')
|
|
|
|
SPC = re.compile(br'\s')
|
|
|
|
NONSPC = re.compile(br'\S')
|
|
|
|
HEX = re.compile(br'[0-9a-fA-F]')
|
|
|
|
END_LITERAL = re.compile(br'[#/%\[\]()<>{}\s]')
|
|
|
|
END_HEX_STRING = re.compile(br'[^\s0-9a-fA-F]')
|
|
|
|
HEX_PAIR = re.compile(br'[0-9a-fA-F]{2}|.')
|
|
|
|
END_NUMBER = re.compile(br'[^0-9]')
|
|
|
|
END_KEYWORD = re.compile(br'[#/%\[\]()<>{}\s]')
|
|
|
|
END_STRING = re.compile(br'[()\134]')
|
|
|
|
OCT_STRING = re.compile(br'[0-7]')
|
|
|
|
ESC_STRING = {b'b': 8, b't': 9, b'n': 10, b'f': 12, b'r': 13, b'(': 40, b')': 41, b'\\': 92}
|
2013-11-07 08:35:04 +00:00
|
|
|
|
|
|
|
|
2008-07-09 15:15:32 +00:00
|
|
|
class PSBaseParser(object):
|
2007-12-31 03:41:45 +00:00
|
|
|
|
2011-02-27 04:09:12 +00:00
|
|
|
"""Most basic PostScript parser that performs only tokenization.
|
|
|
|
"""
|
2009-10-24 04:41:59 +00:00
|
|
|
BUFSIZ = 4096
|
|
|
|
|
|
|
|
def __init__(self, fp):
|
|
|
|
self.fp = fp
|
|
|
|
self.seek(0)
|
|
|
|
return
|
|
|
|
|
|
|
|
def __repr__(self):
|
2010-01-30 07:30:01 +00:00
|
|
|
return '<%s: %r, bufpos=%d>' % (self.__class__.__name__, self.fp, self.bufpos)
|
2009-10-24 04:41:59 +00:00
|
|
|
|
|
|
|
def flush(self):
|
|
|
|
return
|
|
|
|
|
|
|
|
def close(self):
|
|
|
|
self.flush()
|
|
|
|
return
|
|
|
|
|
|
|
|
def tell(self):
|
|
|
|
return self.bufpos+self.charpos
|
|
|
|
|
|
|
|
def poll(self, pos=None, n=80):
|
|
|
|
pos0 = self.fp.tell()
|
|
|
|
if not pos:
|
|
|
|
pos = self.bufpos+self.charpos
|
|
|
|
self.fp.seek(pos)
|
2014-09-11 21:40:18 +00:00
|
|
|
logging.info('poll(%d): %r', pos, self.fp.read(n))
|
2009-10-24 04:41:59 +00:00
|
|
|
self.fp.seek(pos0)
|
|
|
|
return
|
|
|
|
|
|
|
|
def seek(self, pos):
|
2011-02-27 04:09:12 +00:00
|
|
|
"""Seeks the parser to the given position.
|
|
|
|
"""
|
2014-09-11 21:40:18 +00:00
|
|
|
logging.debug('seek: %r', pos)
|
2009-10-24 04:41:59 +00:00
|
|
|
self.fp.seek(pos)
|
|
|
|
# reset the status for nextline()
|
|
|
|
self.bufpos = pos
|
2014-06-30 10:05:56 +00:00
|
|
|
self.buf = b''
|
2009-10-24 04:41:59 +00:00
|
|
|
self.charpos = 0
|
|
|
|
# reset the status for nexttoken()
|
|
|
|
self._parse1 = self._parse_main
|
2014-06-30 10:05:56 +00:00
|
|
|
self._curtoken = b''
|
2009-10-24 04:41:59 +00:00
|
|
|
self._curtokenpos = 0
|
|
|
|
self._tokens = []
|
|
|
|
return
|
|
|
|
|
|
|
|
def fillbuf(self):
|
2013-11-07 08:35:04 +00:00
|
|
|
if self.charpos < len(self.buf):
|
|
|
|
return
|
2009-10-24 04:41:59 +00:00
|
|
|
# fetch next chunk.
|
|
|
|
self.bufpos = self.fp.tell()
|
|
|
|
self.buf = self.fp.read(self.BUFSIZ)
|
|
|
|
if not self.buf:
|
|
|
|
raise PSEOF('Unexpected EOF')
|
|
|
|
self.charpos = 0
|
|
|
|
return
|
|
|
|
|
|
|
|
def nextline(self):
|
2011-02-27 04:09:12 +00:00
|
|
|
"""Fetches a next line that ends either with \\r or \\n.
|
|
|
|
"""
|
2014-06-30 10:05:56 +00:00
|
|
|
linebuf = b''
|
2009-10-24 04:41:59 +00:00
|
|
|
linepos = self.bufpos + self.charpos
|
|
|
|
eol = False
|
|
|
|
while 1:
|
|
|
|
self.fillbuf()
|
|
|
|
if eol:
|
2014-09-11 21:38:05 +00:00
|
|
|
c = bytesindex(self.buf,self.charpos)
|
2014-06-30 10:05:56 +00:00
|
|
|
# handle b'\r\n'
|
|
|
|
if c == b'\n':
|
2009-10-24 04:41:59 +00:00
|
|
|
linebuf += c
|
|
|
|
self.charpos += 1
|
|
|
|
break
|
|
|
|
m = EOL.search(self.buf, self.charpos)
|
|
|
|
if m:
|
2014-09-11 21:38:05 +00:00
|
|
|
linebuf += bytesindex(self.buf,self.charpos,m.end(0))
|
2009-10-24 04:41:59 +00:00
|
|
|
self.charpos = m.end(0)
|
2014-09-11 21:38:05 +00:00
|
|
|
if bytesindex(linebuf,-1) == b'\r':
|
2009-10-24 04:41:59 +00:00
|
|
|
eol = True
|
|
|
|
else:
|
|
|
|
break
|
|
|
|
else:
|
2014-09-11 21:38:05 +00:00
|
|
|
linebuf += bytesindex(self.buf,self.charpos,-1)
|
2009-10-24 04:41:59 +00:00
|
|
|
self.charpos = len(self.buf)
|
2014-09-11 21:40:18 +00:00
|
|
|
logging.debug('nextline: %r, %r', linepos, linebuf)
|
2014-09-01 12:16:49 +00:00
|
|
|
|
2009-10-24 04:41:59 +00:00
|
|
|
return (linepos, linebuf)
|
|
|
|
|
|
|
|
def revreadlines(self):
|
2011-02-27 04:09:12 +00:00
|
|
|
"""Fetches a next line backword.
|
|
|
|
|
|
|
|
This is used to locate the trailers at the end of a file.
|
|
|
|
"""
|
2009-10-24 04:41:59 +00:00
|
|
|
self.fp.seek(0, 2)
|
|
|
|
pos = self.fp.tell()
|
2014-06-30 10:05:56 +00:00
|
|
|
buf = b''
|
2009-10-24 04:41:59 +00:00
|
|
|
while 0 < pos:
|
2010-03-22 04:46:59 +00:00
|
|
|
prevpos = pos
|
2009-10-24 04:41:59 +00:00
|
|
|
pos = max(0, pos-self.BUFSIZ)
|
|
|
|
self.fp.seek(pos)
|
2010-03-22 04:46:59 +00:00
|
|
|
s = self.fp.read(prevpos-pos)
|
2013-11-07 08:35:04 +00:00
|
|
|
if not s:
|
|
|
|
break
|
2009-10-24 04:41:59 +00:00
|
|
|
while 1:
|
2014-06-30 10:05:56 +00:00
|
|
|
n = max(s.rfind(b'\r'), s.rfind(b'\n'))
|
2009-10-24 04:41:59 +00:00
|
|
|
if n == -1:
|
|
|
|
buf = s + buf
|
|
|
|
break
|
2014-09-11 21:38:05 +00:00
|
|
|
yield bytesindex(s,n,-1)+buf
|
|
|
|
s = bytesindex(s,0,n)
|
2014-06-30 10:05:56 +00:00
|
|
|
buf = b''
|
2009-10-24 04:41:59 +00:00
|
|
|
return
|
|
|
|
|
|
|
|
def _parse_main(self, s, i):
|
|
|
|
m = NONSPC.search(s, i)
|
|
|
|
if not m:
|
2010-10-18 15:04:38 +00:00
|
|
|
return len(s)
|
2009-10-24 04:41:59 +00:00
|
|
|
j = m.start(0)
|
2014-09-11 21:38:05 +00:00
|
|
|
c = bytesindex(s,j)
|
2009-10-24 04:41:59 +00:00
|
|
|
self._curtokenpos = self.bufpos+j
|
2014-06-30 10:05:56 +00:00
|
|
|
if c == b'%':
|
|
|
|
self._curtoken = b'%'
|
2010-10-18 15:04:38 +00:00
|
|
|
self._parse1 = self._parse_comment
|
|
|
|
return j+1
|
2014-06-30 10:05:56 +00:00
|
|
|
elif c == b'/':
|
|
|
|
self._curtoken = b''
|
2010-10-18 15:04:38 +00:00
|
|
|
self._parse1 = self._parse_literal
|
|
|
|
return j+1
|
2014-06-30 10:05:56 +00:00
|
|
|
elif c in b'-+' or c.isdigit():
|
2009-10-24 04:41:59 +00:00
|
|
|
self._curtoken = c
|
2010-10-18 15:04:38 +00:00
|
|
|
self._parse1 = self._parse_number
|
|
|
|
return j+1
|
2014-06-30 10:05:56 +00:00
|
|
|
elif c == b'.':
|
2009-10-24 04:41:59 +00:00
|
|
|
self._curtoken = c
|
2010-10-18 15:04:38 +00:00
|
|
|
self._parse1 = self._parse_float
|
|
|
|
return j+1
|
2009-10-24 04:41:59 +00:00
|
|
|
elif c.isalpha():
|
|
|
|
self._curtoken = c
|
2010-10-18 15:04:38 +00:00
|
|
|
self._parse1 = self._parse_keyword
|
|
|
|
return j+1
|
2014-06-30 10:05:56 +00:00
|
|
|
elif c == b'(':
|
|
|
|
self._curtoken = b''
|
2009-10-24 04:41:59 +00:00
|
|
|
self.paren = 1
|
2010-10-18 15:04:38 +00:00
|
|
|
self._parse1 = self._parse_string
|
|
|
|
return j+1
|
2014-06-30 10:05:56 +00:00
|
|
|
elif c == b'<':
|
|
|
|
self._curtoken = b''
|
2010-10-18 15:04:38 +00:00
|
|
|
self._parse1 = self._parse_wopen
|
|
|
|
return j+1
|
2014-06-30 10:05:56 +00:00
|
|
|
elif c == b'>':
|
|
|
|
self._curtoken = b''
|
2010-10-18 15:04:38 +00:00
|
|
|
self._parse1 = self._parse_wclose
|
|
|
|
return j+1
|
2009-10-24 04:41:59 +00:00
|
|
|
else:
|
|
|
|
self._add_token(KWD(c))
|
2010-10-18 15:04:38 +00:00
|
|
|
return j+1
|
2009-10-24 04:41:59 +00:00
|
|
|
|
|
|
|
def _add_token(self, obj):
|
|
|
|
self._tokens.append((self._curtokenpos, obj))
|
|
|
|
return
|
|
|
|
|
|
|
|
def _parse_comment(self, s, i):
|
|
|
|
m = EOL.search(s, i)
|
|
|
|
if not m:
|
2014-09-11 21:38:05 +00:00
|
|
|
self._curtoken += bytesindex(s,i,-1)
|
2009-10-24 04:41:59 +00:00
|
|
|
return (self._parse_comment, len(s))
|
|
|
|
j = m.start(0)
|
2014-09-11 21:38:05 +00:00
|
|
|
self._curtoken += bytesindex(s,i,j)
|
2010-10-18 15:04:38 +00:00
|
|
|
self._parse1 = self._parse_main
|
2009-10-24 04:41:59 +00:00
|
|
|
# We ignore comments.
|
|
|
|
#self._tokens.append(self._curtoken)
|
2010-10-18 15:04:38 +00:00
|
|
|
return j
|
2009-10-24 04:41:59 +00:00
|
|
|
|
|
|
|
def _parse_literal(self, s, i):
|
|
|
|
m = END_LITERAL.search(s, i)
|
|
|
|
if not m:
|
2014-09-11 21:38:05 +00:00
|
|
|
self._curtoken += bytesindex(s,i,-1)
|
2010-10-18 15:04:38 +00:00
|
|
|
return len(s)
|
2009-10-24 04:41:59 +00:00
|
|
|
j = m.start(0)
|
2014-09-11 21:38:05 +00:00
|
|
|
self._curtoken += bytesindex(s,i,j)
|
|
|
|
c = bytesindex(s,j)
|
2014-06-30 10:05:56 +00:00
|
|
|
if c == b'#':
|
|
|
|
self.hex = b''
|
2010-10-18 15:04:38 +00:00
|
|
|
self._parse1 = self._parse_literal_hex
|
|
|
|
return j+1
|
2014-09-01 12:16:49 +00:00
|
|
|
try:
|
|
|
|
self._curtoken=str(self._curtoken,'utf-8')
|
|
|
|
except:
|
|
|
|
pass
|
|
|
|
self._add_token(LIT(self._curtoken))
|
2010-10-18 15:04:38 +00:00
|
|
|
self._parse1 = self._parse_main
|
|
|
|
return j
|
2009-10-24 04:41:59 +00:00
|
|
|
|
|
|
|
def _parse_literal_hex(self, s, i):
|
2014-09-11 21:38:05 +00:00
|
|
|
c = bytesindex(s,i)
|
2009-10-24 04:41:59 +00:00
|
|
|
if HEX.match(c) and len(self.hex) < 2:
|
|
|
|
self.hex += c
|
2010-10-18 15:04:38 +00:00
|
|
|
return i+1
|
2009-10-24 04:41:59 +00:00
|
|
|
if self.hex:
|
2014-09-01 12:16:49 +00:00
|
|
|
self._curtoken += six.int2byte(int(self.hex, 16))
|
2010-10-18 15:04:38 +00:00
|
|
|
self._parse1 = self._parse_literal
|
|
|
|
return i
|
2009-10-24 04:41:59 +00:00
|
|
|
|
|
|
|
def _parse_number(self, s, i):
|
|
|
|
m = END_NUMBER.search(s, i)
|
|
|
|
if not m:
|
2014-09-11 21:38:05 +00:00
|
|
|
self._curtoken += bytesindex(s,i,-1)
|
2010-10-18 15:04:38 +00:00
|
|
|
return len(s)
|
2009-10-24 04:41:59 +00:00
|
|
|
j = m.start(0)
|
2014-09-11 21:38:05 +00:00
|
|
|
self._curtoken += bytesindex(s,i,j)
|
|
|
|
c = bytesindex(s,j)
|
2014-06-30 10:05:56 +00:00
|
|
|
if c == b'.':
|
2009-10-24 04:41:59 +00:00
|
|
|
self._curtoken += c
|
2010-10-18 15:04:38 +00:00
|
|
|
self._parse1 = self._parse_float
|
|
|
|
return j+1
|
2009-10-24 04:41:59 +00:00
|
|
|
try:
|
|
|
|
self._add_token(int(self._curtoken))
|
|
|
|
except ValueError:
|
|
|
|
pass
|
2010-10-18 15:04:38 +00:00
|
|
|
self._parse1 = self._parse_main
|
|
|
|
return j
|
2013-11-07 07:14:53 +00:00
|
|
|
|
2009-10-24 04:41:59 +00:00
|
|
|
def _parse_float(self, s, i):
|
|
|
|
m = END_NUMBER.search(s, i)
|
|
|
|
if not m:
|
2014-09-11 21:38:05 +00:00
|
|
|
self._curtoken += bytesindex(s,i,-1)
|
2010-10-18 15:04:38 +00:00
|
|
|
return len(s)
|
2009-10-24 04:41:59 +00:00
|
|
|
j = m.start(0)
|
2014-09-11 21:38:05 +00:00
|
|
|
self._curtoken += bytesindex(s,i,j)
|
2010-10-18 15:04:43 +00:00
|
|
|
try:
|
|
|
|
self._add_token(float(self._curtoken))
|
|
|
|
except ValueError:
|
|
|
|
pass
|
2010-10-18 15:04:38 +00:00
|
|
|
self._parse1 = self._parse_main
|
|
|
|
return j
|
2009-10-24 04:41:59 +00:00
|
|
|
|
|
|
|
def _parse_keyword(self, s, i):
|
|
|
|
m = END_KEYWORD.search(s, i)
|
|
|
|
if not m:
|
2014-09-11 21:38:05 +00:00
|
|
|
self._curtoken += bytesindex(s,i,-1)
|
2010-10-18 15:04:38 +00:00
|
|
|
return len(s)
|
2009-10-24 04:41:59 +00:00
|
|
|
j = m.start(0)
|
2014-09-11 21:38:05 +00:00
|
|
|
self._curtoken += bytesindex(s,i,j)
|
2014-06-30 10:05:56 +00:00
|
|
|
if self._curtoken == b'true':
|
2009-10-24 04:41:59 +00:00
|
|
|
token = True
|
2014-06-30 10:05:56 +00:00
|
|
|
elif self._curtoken == b'false':
|
2009-10-24 04:41:59 +00:00
|
|
|
token = False
|
|
|
|
else:
|
|
|
|
token = KWD(self._curtoken)
|
|
|
|
self._add_token(token)
|
2010-10-18 15:04:38 +00:00
|
|
|
self._parse1 = self._parse_main
|
|
|
|
return j
|
2009-10-24 04:41:59 +00:00
|
|
|
|
|
|
|
def _parse_string(self, s, i):
|
|
|
|
m = END_STRING.search(s, i)
|
|
|
|
if not m:
|
2014-09-11 21:38:05 +00:00
|
|
|
self._curtoken += bytesindex(s,i,-1)
|
2010-10-18 15:04:38 +00:00
|
|
|
return len(s)
|
2009-10-24 04:41:59 +00:00
|
|
|
j = m.start(0)
|
2014-09-11 21:38:05 +00:00
|
|
|
self._curtoken += bytesindex(s,i,j)
|
|
|
|
c = bytesindex(s,j)
|
2014-06-30 10:05:56 +00:00
|
|
|
if c == b'\\':
|
|
|
|
self.oct = b''
|
2010-10-18 15:04:38 +00:00
|
|
|
self._parse1 = self._parse_string_1
|
|
|
|
return j+1
|
2014-06-30 10:05:56 +00:00
|
|
|
if c == b'(':
|
2009-10-24 04:41:59 +00:00
|
|
|
self.paren += 1
|
|
|
|
self._curtoken += c
|
2010-10-18 15:04:38 +00:00
|
|
|
return j+1
|
2014-06-30 10:05:56 +00:00
|
|
|
if c == b')':
|
2009-10-24 04:41:59 +00:00
|
|
|
self.paren -= 1
|
2013-11-07 08:35:04 +00:00
|
|
|
if self.paren: # WTF, they said balanced parens need no special treatment.
|
2009-10-24 04:41:59 +00:00
|
|
|
self._curtoken += c
|
2010-10-18 15:04:38 +00:00
|
|
|
return j+1
|
2014-09-01 12:16:49 +00:00
|
|
|
self._add_token(self._curtoken)
|
2010-10-18 15:04:38 +00:00
|
|
|
self._parse1 = self._parse_main
|
|
|
|
return j+1
|
2009-10-24 04:41:59 +00:00
|
|
|
|
|
|
|
def _parse_string_1(self, s, i):
|
2014-09-11 21:38:05 +00:00
|
|
|
c = bytesindex(s,i)
|
2009-10-24 04:41:59 +00:00
|
|
|
if OCT_STRING.match(c) and len(self.oct) < 3:
|
|
|
|
self.oct += c
|
2010-10-18 15:04:38 +00:00
|
|
|
return i+1
|
2009-10-24 04:41:59 +00:00
|
|
|
if self.oct:
|
2014-09-01 12:16:49 +00:00
|
|
|
self._curtoken += six.int2byte(int(self.oct, 8))
|
2010-10-18 15:04:38 +00:00
|
|
|
self._parse1 = self._parse_string
|
|
|
|
return i
|
2009-10-24 04:41:59 +00:00
|
|
|
if c in ESC_STRING:
|
2014-09-01 12:16:49 +00:00
|
|
|
self._curtoken += six.int2byte(ESC_STRING[c])
|
2010-10-18 15:04:38 +00:00
|
|
|
self._parse1 = self._parse_string
|
|
|
|
return i+1
|
2009-10-24 04:41:59 +00:00
|
|
|
|
|
|
|
def _parse_wopen(self, s, i):
|
2014-09-11 21:38:05 +00:00
|
|
|
c = bytesindex(s,i)
|
2014-06-30 10:05:56 +00:00
|
|
|
if c == b'<':
|
2009-10-24 04:41:59 +00:00
|
|
|
self._add_token(KEYWORD_DICT_BEGIN)
|
2010-10-27 12:29:00 +00:00
|
|
|
self._parse1 = self._parse_main
|
2009-10-24 04:41:59 +00:00
|
|
|
i += 1
|
2010-10-27 12:29:00 +00:00
|
|
|
else:
|
|
|
|
self._parse1 = self._parse_hexstring
|
2010-10-18 15:04:38 +00:00
|
|
|
return i
|
2009-10-24 04:41:59 +00:00
|
|
|
|
|
|
|
def _parse_wclose(self, s, i):
|
2014-09-11 21:38:05 +00:00
|
|
|
c = bytesindex(s,i)
|
2014-06-30 10:05:56 +00:00
|
|
|
if c == b'>':
|
2009-10-24 04:41:59 +00:00
|
|
|
self._add_token(KEYWORD_DICT_END)
|
|
|
|
i += 1
|
2010-10-18 15:04:38 +00:00
|
|
|
self._parse1 = self._parse_main
|
|
|
|
return i
|
2009-10-24 04:41:59 +00:00
|
|
|
|
|
|
|
def _parse_hexstring(self, s, i):
|
|
|
|
m = END_HEX_STRING.search(s, i)
|
|
|
|
if not m:
|
2014-09-11 21:38:05 +00:00
|
|
|
self._curtoken += bytesindex(s,i,-1)
|
2010-10-18 15:04:38 +00:00
|
|
|
return len(s)
|
2009-10-24 04:41:59 +00:00
|
|
|
j = m.start(0)
|
2014-09-11 21:38:05 +00:00
|
|
|
self._curtoken += bytesindex(s,i,j)
|
2014-09-01 12:16:49 +00:00
|
|
|
token = HEX_PAIR.sub(lambda m: six.int2byte(int(m.group(0), 16)),SPC.sub(b'', self._curtoken))
|
2009-10-24 04:41:59 +00:00
|
|
|
self._add_token(token)
|
2010-10-18 15:04:38 +00:00
|
|
|
self._parse1 = self._parse_main
|
|
|
|
return j
|
2009-10-24 04:41:59 +00:00
|
|
|
|
|
|
|
def nexttoken(self):
|
|
|
|
while not self._tokens:
|
|
|
|
self.fillbuf()
|
2010-10-18 15:04:38 +00:00
|
|
|
self.charpos = self._parse1(self.buf, self.charpos)
|
2009-10-24 04:41:59 +00:00
|
|
|
token = self._tokens.pop(0)
|
2014-09-11 21:40:18 +00:00
|
|
|
logging.debug('nexttoken: %r', token)
|
2009-10-24 04:41:59 +00:00
|
|
|
return token
|
2008-02-03 09:36:34 +00:00
|
|
|
|
2007-12-31 03:41:45 +00:00
|
|
|
|
|
|
|
## PSStackParser
|
|
|
|
##
|
|
|
|
class PSStackParser(PSBaseParser):
|
|
|
|
|
2009-10-24 04:41:59 +00:00
|
|
|
def __init__(self, fp):
|
|
|
|
PSBaseParser.__init__(self, fp)
|
|
|
|
self.reset()
|
|
|
|
return
|
|
|
|
|
|
|
|
def reset(self):
|
|
|
|
self.context = []
|
|
|
|
self.curtype = None
|
|
|
|
self.curstack = []
|
|
|
|
self.results = []
|
|
|
|
return
|
|
|
|
|
|
|
|
def seek(self, pos):
|
|
|
|
PSBaseParser.seek(self, pos)
|
|
|
|
self.reset()
|
|
|
|
return
|
|
|
|
|
|
|
|
def push(self, *objs):
|
|
|
|
self.curstack.extend(objs)
|
|
|
|
return
|
2013-11-07 07:14:53 +00:00
|
|
|
|
2009-10-24 04:41:59 +00:00
|
|
|
def pop(self, n):
|
|
|
|
objs = self.curstack[-n:]
|
|
|
|
self.curstack[-n:] = []
|
|
|
|
return objs
|
2013-11-07 07:14:53 +00:00
|
|
|
|
2009-10-24 04:41:59 +00:00
|
|
|
def popall(self):
|
|
|
|
objs = self.curstack
|
|
|
|
self.curstack = []
|
|
|
|
return objs
|
2013-11-07 07:14:53 +00:00
|
|
|
|
2009-10-24 04:41:59 +00:00
|
|
|
def add_results(self, *objs):
|
2014-09-01 12:16:49 +00:00
|
|
|
try:
|
2014-09-11 21:40:18 +00:00
|
|
|
logging.debug('add_results: %r', objs)
|
2014-09-01 12:16:49 +00:00
|
|
|
except:
|
|
|
|
logging.debug('add_results: (unprintable object)')
|
2009-10-24 04:41:59 +00:00
|
|
|
self.results.extend(objs)
|
|
|
|
return
|
|
|
|
|
|
|
|
def start_type(self, pos, type):
|
|
|
|
self.context.append((pos, self.curtype, self.curstack))
|
|
|
|
(self.curtype, self.curstack) = (type, [])
|
2014-09-11 21:40:18 +00:00
|
|
|
logging.debug('start_type: pos=%r, type=%r', pos, type)
|
2009-10-24 04:41:59 +00:00
|
|
|
return
|
2013-11-07 07:14:53 +00:00
|
|
|
|
2009-10-24 04:41:59 +00:00
|
|
|
def end_type(self, type):
|
|
|
|
if self.curtype != type:
|
|
|
|
raise PSTypeError('Type mismatch: %r != %r' % (self.curtype, type))
|
2013-11-07 08:35:04 +00:00
|
|
|
objs = [obj for (_, obj) in self.curstack]
|
2009-10-24 04:41:59 +00:00
|
|
|
(pos, self.curtype, self.curstack) = self.context.pop()
|
2014-09-11 21:40:18 +00:00
|
|
|
logging.debug('end_type: pos=%r, type=%r, objs=%r', pos, type, objs)
|
2009-10-24 04:41:59 +00:00
|
|
|
return (pos, objs)
|
|
|
|
|
|
|
|
def do_keyword(self, pos, token):
|
|
|
|
return
|
|
|
|
|
|
|
|
def nextobject(self):
|
2011-02-27 04:09:12 +00:00
|
|
|
"""Yields a list of objects.
|
|
|
|
|
|
|
|
Returns keywords, literals, strings, numbers, arrays and dictionaries.
|
|
|
|
Arrays and dictionaries are represented as Python lists and dictionaries.
|
|
|
|
"""
|
2009-10-24 04:41:59 +00:00
|
|
|
while not self.results:
|
|
|
|
(pos, token) = self.nexttoken()
|
|
|
|
#print (pos,token), (self.curtype, self.curstack)
|
2014-09-01 12:16:49 +00:00
|
|
|
if isinstance(token, (six.integer_types, float, bool, six.string_types, six.binary_type, PSLiteral)):
|
2009-10-24 04:41:59 +00:00
|
|
|
# normal token
|
|
|
|
self.push((pos, token))
|
|
|
|
elif token == KEYWORD_ARRAY_BEGIN:
|
|
|
|
# begin array
|
|
|
|
self.start_type(pos, 'a')
|
|
|
|
elif token == KEYWORD_ARRAY_END:
|
|
|
|
# end array
|
|
|
|
try:
|
|
|
|
self.push(self.end_type('a'))
|
|
|
|
except PSTypeError:
|
2013-11-07 08:35:04 +00:00
|
|
|
if STRICT:
|
|
|
|
raise
|
2009-10-24 04:41:59 +00:00
|
|
|
elif token == KEYWORD_DICT_BEGIN:
|
|
|
|
# begin dictionary
|
|
|
|
self.start_type(pos, 'd')
|
|
|
|
elif token == KEYWORD_DICT_END:
|
|
|
|
# end dictionary
|
|
|
|
try:
|
|
|
|
(pos, objs) = self.end_type('d')
|
|
|
|
if len(objs) % 2 != 0:
|
|
|
|
raise PSSyntaxError('Invalid dictionary construct: %r' % objs)
|
2010-03-23 10:29:52 +00:00
|
|
|
# construct a Python dictionary.
|
2013-11-07 08:35:04 +00:00
|
|
|
d = dict((literal_name(k), v) for (k, v) in choplist(2, objs) if v is not None)
|
2009-10-24 04:41:59 +00:00
|
|
|
self.push((pos, d))
|
|
|
|
except PSTypeError:
|
2013-11-07 08:35:04 +00:00
|
|
|
if STRICT:
|
|
|
|
raise
|
2010-10-17 05:15:12 +00:00
|
|
|
elif token == KEYWORD_PROC_BEGIN:
|
|
|
|
# begin proc
|
|
|
|
self.start_type(pos, 'p')
|
|
|
|
elif token == KEYWORD_PROC_END:
|
|
|
|
# end proc
|
|
|
|
try:
|
|
|
|
self.push(self.end_type('p'))
|
|
|
|
except PSTypeError:
|
2013-11-07 08:35:04 +00:00
|
|
|
if STRICT:
|
|
|
|
raise
|
2014-09-01 12:16:49 +00:00
|
|
|
elif isinstance(token,PSKeyword):
|
2014-09-11 21:40:18 +00:00
|
|
|
logging.debug('do_keyword: pos=%r, token=%r, stack=%r', pos, token, self.curstack)
|
2014-09-01 12:16:49 +00:00
|
|
|
self.do_keyword(pos, token)
|
2009-10-24 04:41:59 +00:00
|
|
|
else:
|
2014-09-11 21:40:18 +00:00
|
|
|
logging.error('unknown token: pos=%r, token=%r, stack=%r', pos, token, self.curstack)
|
2009-10-24 04:41:59 +00:00
|
|
|
self.do_keyword(pos, token)
|
2014-09-01 12:16:49 +00:00
|
|
|
raise
|
2009-10-24 04:41:59 +00:00
|
|
|
if self.context:
|
|
|
|
continue
|
|
|
|
else:
|
|
|
|
self.flush()
|
|
|
|
obj = self.results.pop(0)
|
|
|
|
try:
|
2014-09-11 21:40:18 +00:00
|
|
|
logging.debug('nextobject: %r', obj)
|
2014-09-01 12:16:49 +00:00
|
|
|
except:
|
|
|
|
logging.debug('nextobject: (unprintable object)')
|
|
|
|
return obj
|