docstring fix
parent
cabaa10e4f
commit
a8bf9b159e
|
@ -2,7 +2,6 @@
|
||||||
import sys
|
import sys
|
||||||
from utils import apply_matrix_pt, get_bound, INF
|
from utils import apply_matrix_pt, get_bound, INF
|
||||||
from utils import bbox2str, matrix2str, uniq, csort, Plane
|
from utils import bbox2str, matrix2str, uniq, csort, Plane
|
||||||
from pdffont import PDFUnicodeNotDefined
|
|
||||||
|
|
||||||
|
|
||||||
## LAParams
|
## LAParams
|
||||||
|
|
|
@ -124,11 +124,12 @@ class PDFGraphicState(object):
|
||||||
##
|
##
|
||||||
class PDFResourceManager(object):
|
class PDFResourceManager(object):
|
||||||
|
|
||||||
'''
|
"""Repository of shared resources.
|
||||||
|
|
||||||
ResourceManager facilitates reuse of shared resources
|
ResourceManager facilitates reuse of shared resources
|
||||||
such as fonts and images so that large objects are not
|
such as fonts and images so that large objects are not
|
||||||
allocated multiple times.
|
allocated multiple times.
|
||||||
'''
|
"""
|
||||||
debug = 0
|
debug = 0
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
|
|
@ -51,20 +51,21 @@ class PDFObjRef(PDFObject):
|
||||||
|
|
||||||
# resolve
|
# resolve
|
||||||
def resolve1(x):
|
def resolve1(x):
|
||||||
'''
|
"""Resolves an object.
|
||||||
Resolve an object. If this is an array or dictionary,
|
|
||||||
it may still contains some indirect objects inside.
|
If this is an array or dictionary, it may still contains
|
||||||
'''
|
some indirect objects inside.
|
||||||
|
"""
|
||||||
while isinstance(x, PDFObjRef):
|
while isinstance(x, PDFObjRef):
|
||||||
x = x.resolve()
|
x = x.resolve()
|
||||||
return x
|
return x
|
||||||
|
|
||||||
def resolve_all(x):
|
def resolve_all(x):
|
||||||
'''
|
"""Recursively resolves the given object and all the internals.
|
||||||
Recursively resolve X and all the internals.
|
|
||||||
Make sure there is no indirect reference within the nested object.
|
Make sure there is no indirect reference within the nested object.
|
||||||
This procedure might be slow.
|
This procedure might be slow.
|
||||||
'''
|
"""
|
||||||
while isinstance(x, PDFObjRef):
|
while isinstance(x, PDFObjRef):
|
||||||
x = x.resolve()
|
x = x.resolve()
|
||||||
if isinstance(x, list):
|
if isinstance(x, list):
|
||||||
|
@ -75,9 +76,8 @@ def resolve_all(x):
|
||||||
return x
|
return x
|
||||||
|
|
||||||
def decipher_all(decipher, objid, genno, x):
|
def decipher_all(decipher, objid, genno, x):
|
||||||
'''
|
"""Recursively deciphers the given object.
|
||||||
Recursively decipher X.
|
"""
|
||||||
'''
|
|
||||||
if isinstance(x, str):
|
if isinstance(x, str):
|
||||||
return decipher(objid, genno, x)
|
return decipher(objid, genno, x)
|
||||||
if isinstance(x, list):
|
if isinstance(x, list):
|
||||||
|
|
|
@ -140,9 +140,8 @@ OCT_STRING = re.compile(r'[0-7]')
|
||||||
ESC_STRING = { 'b':8, 't':9, 'n':10, 'f':12, 'r':13, '(':40, ')':41, '\\':92 }
|
ESC_STRING = { 'b':8, 't':9, 'n':10, 'f':12, 'r':13, '(':40, ')':41, '\\':92 }
|
||||||
class PSBaseParser(object):
|
class PSBaseParser(object):
|
||||||
|
|
||||||
'''
|
"""Most basic PostScript parser that performs only tokenization.
|
||||||
Most basic PostScript parser that performs only tokenization.
|
"""
|
||||||
'''
|
|
||||||
BUFSIZ = 4096
|
BUFSIZ = 4096
|
||||||
|
|
||||||
debug = 0
|
debug = 0
|
||||||
|
@ -175,9 +174,8 @@ class PSBaseParser(object):
|
||||||
return
|
return
|
||||||
|
|
||||||
def seek(self, pos):
|
def seek(self, pos):
|
||||||
'''
|
"""Seeks the parser to the given position.
|
||||||
Seeks the parser to the given position.
|
"""
|
||||||
'''
|
|
||||||
if 2 <= self.debug:
|
if 2 <= self.debug:
|
||||||
print >>stderr, 'seek: %r' % pos
|
print >>stderr, 'seek: %r' % pos
|
||||||
self.fp.seek(pos)
|
self.fp.seek(pos)
|
||||||
|
@ -203,9 +201,8 @@ class PSBaseParser(object):
|
||||||
return
|
return
|
||||||
|
|
||||||
def nextline(self):
|
def nextline(self):
|
||||||
'''
|
"""Fetches a next line that ends either with \\r or \\n.
|
||||||
Fetches a next line that ends either with \\r or \\n.
|
"""
|
||||||
'''
|
|
||||||
linebuf = ''
|
linebuf = ''
|
||||||
linepos = self.bufpos + self.charpos
|
linepos = self.bufpos + self.charpos
|
||||||
eol = False
|
eol = False
|
||||||
|
@ -234,10 +231,10 @@ class PSBaseParser(object):
|
||||||
return (linepos, linebuf)
|
return (linepos, linebuf)
|
||||||
|
|
||||||
def revreadlines(self):
|
def revreadlines(self):
|
||||||
'''
|
"""Fetches a next line backword.
|
||||||
Fetches a next line backword. This is used to locate
|
|
||||||
the trailers at the end of a file.
|
This is used to locate the trailers at the end of a file.
|
||||||
'''
|
"""
|
||||||
self.fp.seek(0, 2)
|
self.fp.seek(0, 2)
|
||||||
pos = self.fp.tell()
|
pos = self.fp.tell()
|
||||||
buf = ''
|
buf = ''
|
||||||
|
@ -534,11 +531,11 @@ class PSStackParser(PSBaseParser):
|
||||||
return
|
return
|
||||||
|
|
||||||
def nextobject(self):
|
def nextobject(self):
|
||||||
'''
|
"""Yields a list of objects.
|
||||||
Yields a list of objects: keywords, literals, strings,
|
|
||||||
numbers, arrays and dictionaries. Arrays and dictionaries
|
Returns keywords, literals, strings, numbers, arrays and dictionaries.
|
||||||
are represented as Python sequence and dictionaries.
|
Arrays and dictionaries are represented as Python lists and dictionaries.
|
||||||
'''
|
"""
|
||||||
while not self.results:
|
while not self.results:
|
||||||
(pos, token) = self.nexttoken()
|
(pos, token) = self.nexttoken()
|
||||||
#print (pos,token), (self.curtype, self.curstack)
|
#print (pos,token), (self.curtype, self.curstack)
|
||||||
|
|
|
@ -11,21 +11,21 @@ from struct import pack, unpack
|
||||||
MATRIX_IDENTITY = (1, 0, 0, 1, 0, 0)
|
MATRIX_IDENTITY = (1, 0, 0, 1, 0, 0)
|
||||||
|
|
||||||
def mult_matrix((a1,b1,c1,d1,e1,f1), (a0,b0,c0,d0,e0,f0)):
|
def mult_matrix((a1,b1,c1,d1,e1,f1), (a0,b0,c0,d0,e0,f0)):
|
||||||
'''Returns the multiplication of two matrices.'''
|
"""Returns the multiplication of two matrices."""
|
||||||
return (a0*a1+c0*b1, b0*a1+d0*b1,
|
return (a0*a1+c0*b1, b0*a1+d0*b1,
|
||||||
a0*c1+c0*d1, b0*c1+d0*d1,
|
a0*c1+c0*d1, b0*c1+d0*d1,
|
||||||
a0*e1+c0*f1+e0, b0*e1+d0*f1+f0)
|
a0*e1+c0*f1+e0, b0*e1+d0*f1+f0)
|
||||||
|
|
||||||
def translate_matrix((a,b,c,d,e,f), (x,y)):
|
def translate_matrix((a,b,c,d,e,f), (x,y)):
|
||||||
'''Translates a matrix by (x,y).'''
|
"""Translates a matrix by (x,y)."""
|
||||||
return (a,b,c,d,x*a+y*c+e,x*b+y*d+f)
|
return (a,b,c,d,x*a+y*c+e,x*b+y*d+f)
|
||||||
|
|
||||||
def apply_matrix_pt((a,b,c,d,e,f), (x,y)):
|
def apply_matrix_pt((a,b,c,d,e,f), (x,y)):
|
||||||
'''Applies a matrix to a point.'''
|
"""Applies a matrix to a point."""
|
||||||
return (a*x+c*y+e, b*x+d*y+f)
|
return (a*x+c*y+e, b*x+d*y+f)
|
||||||
|
|
||||||
def apply_matrix_norm((a,b,c,d,e,f), (p,q)):
|
def apply_matrix_norm((a,b,c,d,e,f), (p,q)):
|
||||||
'''Equivalent to apply_matrix_pt(M, (p,q)) - apply_matrix_pt(M, (0,0))'''
|
"""Equivalent to apply_matrix_pt(M, (p,q)) - apply_matrix_pt(M, (0,0))"""
|
||||||
return (a*p+c*q, b*p+d*q)
|
return (a*p+c*q, b*p+d*q)
|
||||||
|
|
||||||
|
|
||||||
|
@ -34,7 +34,7 @@ def apply_matrix_norm((a,b,c,d,e,f), (p,q)):
|
||||||
|
|
||||||
# uniq
|
# uniq
|
||||||
def uniq(objs):
|
def uniq(objs):
|
||||||
'''Eliminates duplicated elements.'''
|
"""Eliminates duplicated elements."""
|
||||||
done = set()
|
done = set()
|
||||||
for obj in objs:
|
for obj in objs:
|
||||||
if obj in done: continue
|
if obj in done: continue
|
||||||
|
@ -44,19 +44,19 @@ def uniq(objs):
|
||||||
|
|
||||||
# csort
|
# csort
|
||||||
def csort(objs, key):
|
def csort(objs, key):
|
||||||
'''Order-preserving sorting function.'''
|
"""Order-preserving sorting function."""
|
||||||
idxs = dict( (obj,i) for (i,obj) in enumerate(objs) )
|
idxs = dict( (obj,i) for (i,obj) in enumerate(objs) )
|
||||||
return sorted(objs, key=lambda obj:(key(obj), idxs[obj]))
|
return sorted(objs, key=lambda obj:(key(obj), idxs[obj]))
|
||||||
|
|
||||||
# drange
|
# drange
|
||||||
def drange(v0, v1, d):
|
def drange(v0, v1, d):
|
||||||
'''Returns a discrete range.'''
|
"""Returns a discrete range."""
|
||||||
assert v0 < v1
|
assert v0 < v1
|
||||||
return xrange(int(v0)/d, int(v1+d-1)/d)
|
return xrange(int(v0)/d, int(v1+d-1)/d)
|
||||||
|
|
||||||
# get_bound
|
# get_bound
|
||||||
def get_bound(pts):
|
def get_bound(pts):
|
||||||
'''Compute a minimal rectangle that covers all the points.'''
|
"""Compute a minimal rectangle that covers all the points."""
|
||||||
(x0, y0, x1, y1) = (INF, INF, -INF, -INF)
|
(x0, y0, x1, y1) = (INF, INF, -INF, -INF)
|
||||||
for (x,y) in pts:
|
for (x,y) in pts:
|
||||||
x0 = min(x0, x)
|
x0 = min(x0, x)
|
||||||
|
@ -67,7 +67,7 @@ def get_bound(pts):
|
||||||
|
|
||||||
# pick
|
# pick
|
||||||
def pick(seq, func, maxobj=None):
|
def pick(seq, func, maxobj=None):
|
||||||
'''Picks the object obj where func(obj) has the highest value.'''
|
"""Picks the object obj where func(obj) has the highest value."""
|
||||||
maxscore = None
|
maxscore = None
|
||||||
for obj in seq:
|
for obj in seq:
|
||||||
score = func(obj)
|
score = func(obj)
|
||||||
|
@ -77,7 +77,7 @@ def pick(seq, func, maxobj=None):
|
||||||
|
|
||||||
# choplist
|
# choplist
|
||||||
def choplist(n, seq):
|
def choplist(n, seq):
|
||||||
'''Groups every n elements of the list.'''
|
"""Groups every n elements of the list."""
|
||||||
r = []
|
r = []
|
||||||
for x in seq:
|
for x in seq:
|
||||||
r.append(x)
|
r.append(x)
|
||||||
|
@ -88,7 +88,7 @@ def choplist(n, seq):
|
||||||
|
|
||||||
# nunpack
|
# nunpack
|
||||||
def nunpack(s, default=0):
|
def nunpack(s, default=0):
|
||||||
'''Unpacks 1 to 4 byte integers (big endian).'''
|
"""Unpacks 1 to 4 byte integers (big endian)."""
|
||||||
l = len(s)
|
l = len(s)
|
||||||
if not l:
|
if not l:
|
||||||
return default
|
return default
|
||||||
|
@ -139,7 +139,7 @@ PDFDocEncoding = ''.join( unichr(x) for x in (
|
||||||
0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
|
0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
|
||||||
))
|
))
|
||||||
def decode_text(s):
|
def decode_text(s):
|
||||||
'''Decodes a PDFDocEncoding string to Unicode.'''
|
"""Decodes a PDFDocEncoding string to Unicode."""
|
||||||
if s.startswith('\xfe\xff'):
|
if s.startswith('\xfe\xff'):
|
||||||
return unicode(s[2:], 'utf-16be', 'ignore')
|
return unicode(s[2:], 'utf-16be', 'ignore')
|
||||||
else:
|
else:
|
||||||
|
@ -147,7 +147,7 @@ def decode_text(s):
|
||||||
|
|
||||||
# enc
|
# enc
|
||||||
def enc(x, codec='ascii'):
|
def enc(x, codec='ascii'):
|
||||||
'''Encodes a string for SGML/XML/HTML'''
|
"""Encodes a string for SGML/XML/HTML"""
|
||||||
x = x.replace('&','&').replace('>','>').replace('<','<').replace('"','"')
|
x = x.replace('&','&').replace('>','>').replace('<','<').replace('"','"')
|
||||||
return x.encode(codec, 'xmlcharrefreplace')
|
return x.encode(codec, 'xmlcharrefreplace')
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue