tmp commit
git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@70 1aa58f4a-7d42-0410-adbc-911cccaed67cpull/1/head
parent
2694de9521
commit
13a6603151
|
@ -2,8 +2,8 @@
|
||||||
import sys
|
import sys
|
||||||
stderr = sys.stderr
|
stderr = sys.stderr
|
||||||
from struct import pack, unpack
|
from struct import pack, unpack
|
||||||
from pdflib.utils import choplist, nunpack
|
from utils import choplist, nunpack
|
||||||
from pdflib.psparser import PSException, PSSyntaxError, PSTypeError, PSEOF, \
|
from psparser import PSException, PSSyntaxError, PSTypeError, PSEOF, \
|
||||||
PSLiteral, PSKeyword, literal_name, keyword_name, \
|
PSLiteral, PSKeyword, literal_name, keyword_name, \
|
||||||
PSStackParser
|
PSStackParser
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -2,11 +2,11 @@
|
||||||
import sys
|
import sys
|
||||||
stdout = sys.stdout
|
stdout = sys.stdout
|
||||||
stderr = sys.stderr
|
stderr = sys.stderr
|
||||||
from pdflib.pdfparser import PDFDocument, PDFParser, PDFPasswordIncorrect
|
from pdfparser import PDFDocument, PDFParser, PDFPasswordIncorrect
|
||||||
from pdflib.pdfinterp import PDFResourceManager, PDFPageInterpreter
|
from pdfinterp import PDFResourceManager, PDFPageInterpreter
|
||||||
from pdflib.pdfdevice import PDFDevice, FigureItem, TextItem, PDFPageAggregator
|
from pdfdevice import PDFDevice, FigureItem, TextItem, PDFPageAggregator
|
||||||
from pdflib.pdffont import PDFUnicodeNotDefined
|
from pdffont import PDFUnicodeNotDefined
|
||||||
from pdflib.cmap import CMapDB
|
from cmap import CMapDB
|
||||||
|
|
||||||
|
|
||||||
def enc(x, codec):
|
def enc(x, codec):
|
||||||
|
@ -121,7 +121,7 @@ class TagExtractor(PDFDevice):
|
||||||
def render_image(self, stream, size, matrix):
|
def render_image(self, stream, size, matrix):
|
||||||
return
|
return
|
||||||
|
|
||||||
def render_string(self, textstate, textmatrix, size, seq):
|
def render_string(self, textstate, textmatrix, seq):
|
||||||
font = textstate.font
|
font = textstate.font
|
||||||
text = ''
|
text = ''
|
||||||
for x in seq:
|
for x in seq:
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
import sys
|
import sys
|
||||||
stderr = sys.stderr
|
stderr = sys.stderr
|
||||||
from pdflib.psparser import PSLiteralTable
|
from psparser import PSLiteralTable
|
||||||
|
|
||||||
|
|
||||||
## ColorSpace
|
## ColorSpace
|
||||||
|
|
|
@ -2,8 +2,8 @@
|
||||||
import sys
|
import sys
|
||||||
stdout = sys.stdout
|
stdout = sys.stdout
|
||||||
stderr = sys.stderr
|
stderr = sys.stderr
|
||||||
from pdflib.pdffont import PDFUnicodeNotDefined
|
from pdffont import PDFUnicodeNotDefined
|
||||||
from pdflib.utils import mult_matrix, apply_matrix, apply_matrix_norm, translate_matrix
|
from utils import mult_matrix, apply_matrix, apply_matrix_norm, translate_matrix
|
||||||
|
|
||||||
|
|
||||||
## PDFDevice
|
## PDFDevice
|
||||||
|
|
|
@ -6,13 +6,175 @@ try:
|
||||||
from cStringIO import StringIO
|
from cStringIO import StringIO
|
||||||
except ImportError:
|
except ImportError:
|
||||||
from StringIO import StringIO
|
from StringIO import StringIO
|
||||||
from pdflib.psparser import PSLiteralTable, PSKeywordTable, PSLiteral, \
|
from psparser import PSLiteralTable, PSKeywordTable, PSLiteral, \
|
||||||
literal_name, keyword_name, STRICT
|
literal_name, keyword_name, STRICT
|
||||||
from pdflib.pdftypes import PDFException, \
|
from pdftypes import PDFException, \
|
||||||
resolve1, int_value, float_value, num_value, \
|
resolve1, int_value, float_value, num_value, \
|
||||||
str_value, list_value, dict_value, stream_value
|
str_value, list_value, dict_value, stream_value
|
||||||
from pdflib.cmap import CMap, CMapDB, CMapParser, FontMetricsDB, EncodingDB
|
from cmap import CMap, CMapDB, CMapParser, FontMetricsDB, EncodingDB
|
||||||
from utils import apply_matrix_norm
|
from utils import apply_matrix_norm, nunpack
|
||||||
|
|
||||||
|
|
||||||
|
NIBBLES = ('0','1','2','3','4','5','6','7','8','9','.','e','e-',None,'-')
|
||||||
|
def getnum(fp):
|
||||||
|
b0 = ord(fp.read(1))
|
||||||
|
if b0 == 30:
|
||||||
|
s = ''
|
||||||
|
loop = True
|
||||||
|
while loop:
|
||||||
|
b = ord(fp.read(1))
|
||||||
|
for n in (b >> 4, b & 15):
|
||||||
|
if n == 15:
|
||||||
|
loop = False
|
||||||
|
else:
|
||||||
|
s += NIBBLES[n]
|
||||||
|
return float(s)
|
||||||
|
if 32 <= b0 and b0 <= 246:
|
||||||
|
return b0-139
|
||||||
|
b1 = ord(fp.read(1))
|
||||||
|
if 247 <= b0 and b0 <= 250:
|
||||||
|
return ((b0-247)<<8)+b1+108
|
||||||
|
if 251 <= b0 and b0 <= 254:
|
||||||
|
return -((b0-251)<<8)-b1-108
|
||||||
|
b2 = ord(fp.read(1))
|
||||||
|
if 128 <= b1: b1 -= 256
|
||||||
|
if b0 == 28:
|
||||||
|
return b1<<8 | b2
|
||||||
|
return b1<<24 | b2<<16 | unpack('>H',fp.read(2))[0]
|
||||||
|
#assert getop(StringIO('\x8b')) == 0
|
||||||
|
#assert getop(StringIO('\xef')) == 100
|
||||||
|
#assert getop(StringIO('\x27')) == -100
|
||||||
|
#assert getop(StringIO('\xfa\x7c')) == 1000
|
||||||
|
#assert getop(StringIO('\xfe\x7c')) == -1000
|
||||||
|
#assert getop(StringIO('\x1c\x27\x10')) == 10000
|
||||||
|
#assert getop(StringIO('\x1c\xd8\xf0')) == -10000
|
||||||
|
#assert getop(StringIO('\x1d\x00\x01\x86\xa0')) == 100000
|
||||||
|
#assert getop(StringIO('\x1d\xff\xfe\x79\x60')) == -100000
|
||||||
|
#assert getop(StringIO('\x1e\xe2\xa2\x5f')) == -2.25
|
||||||
|
#assert getop(StringIO('\x1e\x0a\x14\x05\x41\xc3\xff')) == 0.140541e-3
|
||||||
|
|
||||||
|
|
||||||
|
## CFFFont
|
||||||
|
## (Format specified in Adobe Technical Note: #5176
|
||||||
|
## "The Compact Font Format Specification")
|
||||||
|
##
|
||||||
|
class CFFFont(object):
|
||||||
|
|
||||||
|
class INDEX(object):
|
||||||
|
|
||||||
|
def __init__(self, fp):
|
||||||
|
self.fp = fp
|
||||||
|
self.offsets = []
|
||||||
|
(count, offsize) = unpack('>HB', self.fp.read(3))
|
||||||
|
for i in xrange(count+1):
|
||||||
|
self.offsets.append(nunpack(self.fp.read(offsize)))
|
||||||
|
self.base = self.fp.tell()-1
|
||||||
|
self.fp.seek(self.base+self.offsets[-1])
|
||||||
|
return
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return '<INDEX: size=%d>' % len(self)
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.offsets)-1
|
||||||
|
|
||||||
|
def __getitem__(self, i):
|
||||||
|
self.fp.seek(self.base+self.offsets[i])
|
||||||
|
return self.fp.read(self.offsets[i+1]-self.offsets[i])
|
||||||
|
|
||||||
|
def __init__(self, name, fp):
|
||||||
|
self.name = name
|
||||||
|
self.fp = fp
|
||||||
|
# Header
|
||||||
|
(_major,_minor,hdrsize,self.offsize) = unpack('BBBB', fp.read(4))
|
||||||
|
self.fp.read(hdrsize-4)
|
||||||
|
# Name INDEX
|
||||||
|
self.name_index = self.INDEX(self.fp)
|
||||||
|
# Top DICT INDEX
|
||||||
|
self.dict_index = self.INDEX(self.fp)
|
||||||
|
# String INDEX
|
||||||
|
self.string_index = self.INDEX(self.fp)
|
||||||
|
# Global Subr INDEX
|
||||||
|
self.subr_index = self.INDEX(self.fp)
|
||||||
|
# Encodings
|
||||||
|
# Charsets
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## TrueTypeFont
|
||||||
|
##
|
||||||
|
class TrueTypeFont(object):
|
||||||
|
|
||||||
|
class CMapNotFound(Exception): pass
|
||||||
|
|
||||||
|
def __init__(self, name, fp):
|
||||||
|
self.name = name
|
||||||
|
self.fp = fp
|
||||||
|
self.tables = {}
|
||||||
|
fonttype = fp.read(4)
|
||||||
|
(ntables, _1, _2, _3) = unpack('>HHHH', fp.read(8))
|
||||||
|
for i in xrange(ntables):
|
||||||
|
(name, tsum, offset, length) = unpack('>4sLLL', fp.read(16))
|
||||||
|
self.tables[name] = (offset, length)
|
||||||
|
return
|
||||||
|
|
||||||
|
def create_cmap(self):
|
||||||
|
if 'cmap' not in self.tables:
|
||||||
|
raise TrueTypeFont.CMapNotFound
|
||||||
|
(base_offset, length) = self.tables['cmap']
|
||||||
|
fp = self.fp
|
||||||
|
fp.seek(base_offset)
|
||||||
|
(version, nsubtables) = unpack('>HH', fp.read(4))
|
||||||
|
subtables = []
|
||||||
|
for i in xrange(nsubtables):
|
||||||
|
subtables.append(unpack('>HHL', fp.read(8)))
|
||||||
|
char2gid = {}
|
||||||
|
# Only supports subtable type 0, 2 and 4.
|
||||||
|
for (_1, _2, st_offset) in subtables:
|
||||||
|
fp.seek(base_offset+st_offset)
|
||||||
|
(fmttype, fmtlen, fmtlang) = unpack('>HHH', fp.read(6))
|
||||||
|
if fmttype == 0:
|
||||||
|
char2gid.update(enumerate(unpack('>256B', fp.read(256))))
|
||||||
|
elif fmttype == 2:
|
||||||
|
subheaderkeys = unpack('>256H', fp.read(512))
|
||||||
|
firstbytes = [0]*8192
|
||||||
|
for (i,k) in enumerate(subheaderkeys):
|
||||||
|
firstbytes[k/8] = i
|
||||||
|
nhdrs = max(subheaderkeys)/8 + 1
|
||||||
|
hdrs = []
|
||||||
|
for i in xrange(nhdrs):
|
||||||
|
(firstcode,entcount,delta,offset) = unpack('>HHhH', fp.read(8))
|
||||||
|
hdrs.append((i,firstcode,entcount,delta,fp.tell()-2+offset))
|
||||||
|
for (i,firstcode,entcount,delta,pos) in hdrs:
|
||||||
|
if not entcount: continue
|
||||||
|
first = firstcode + (firstbytes[i] << 8)
|
||||||
|
fp.seek(pos)
|
||||||
|
for c in xrange(entcount):
|
||||||
|
gid = unpack('>H', fp.read(2))
|
||||||
|
if gid:
|
||||||
|
gid += delta
|
||||||
|
char2gid[first+c] = gid
|
||||||
|
elif fmttype == 4:
|
||||||
|
(segcount, _1, _2, _3) = unpack('>HHHH', fp.read(8))
|
||||||
|
segcount /= 2
|
||||||
|
ecs = unpack('>%dH' % segcount, fp.read(2*segcount))
|
||||||
|
fp.read(2)
|
||||||
|
scs = unpack('>%dH' % segcount, fp.read(2*segcount))
|
||||||
|
idds = unpack('>%dh' % segcount, fp.read(2*segcount))
|
||||||
|
pos = fp.tell()
|
||||||
|
idrs = unpack('>%dH' % segcount, fp.read(2*segcount))
|
||||||
|
for (ec,sc,idd,idr) in zip(ecs, scs, idds, idrs):
|
||||||
|
if idr:
|
||||||
|
fp.seek(pos+idr)
|
||||||
|
for c in xrange(sc, ec+1):
|
||||||
|
char2gid[c] = (unpack('>H', fp.read(2))[0] + idd) & 0xffff
|
||||||
|
else:
|
||||||
|
for c in xrange(sc, ec+1):
|
||||||
|
char2gid[c] = (c + idd) & 0xffff
|
||||||
|
gid2char = dict( (gid, pack('>H', char))
|
||||||
|
for (char,gid) in char2gid.iteritems() )
|
||||||
|
return CMap().update(char2gid, gid2char)
|
||||||
|
|
||||||
|
|
||||||
## Fonts
|
## Fonts
|
||||||
|
@ -96,17 +258,15 @@ class PDFSimpleFont(PDFFont):
|
||||||
return
|
return
|
||||||
|
|
||||||
def to_unicode(self, cid):
|
def to_unicode(self, cid):
|
||||||
if not self.ucs2_cmap:
|
if self.ucs2_cmap:
|
||||||
try:
|
code = self.ucs2_cmap.tocode(cid)
|
||||||
return self.encoding[cid]
|
if code:
|
||||||
except KeyError:
|
chars = unpack('>%dH' % (len(code)/2), code)
|
||||||
raise PDFUnicodeNotDefined(None, cid)
|
return ''.join( unichr(c) for c in chars )
|
||||||
code = self.ucs2_cmap.tocode(cid)
|
try:
|
||||||
if not code:
|
return self.encoding[cid]
|
||||||
|
except KeyError:
|
||||||
raise PDFUnicodeNotDefined(None, cid)
|
raise PDFUnicodeNotDefined(None, cid)
|
||||||
chars = unpack('>%dH' % (len(code)/2), code)
|
|
||||||
return ''.join( unichr(c) for c in chars )
|
|
||||||
|
|
||||||
|
|
||||||
# PDFType1Font
|
# PDFType1Font
|
||||||
class PDFType1Font(PDFSimpleFont):
|
class PDFType1Font(PDFSimpleFont):
|
||||||
|
@ -171,81 +331,6 @@ class PDFType3Font(PDFSimpleFont):
|
||||||
|
|
||||||
|
|
||||||
# PDFCIDFont
|
# PDFCIDFont
|
||||||
|
|
||||||
## TrueTypeFont
|
|
||||||
##
|
|
||||||
class TrueTypeFont(object):
|
|
||||||
|
|
||||||
class CMapNotFound(Exception): pass
|
|
||||||
|
|
||||||
def __init__(self, name, fp):
|
|
||||||
self.name = name
|
|
||||||
self.fp = fp
|
|
||||||
self.tables = {}
|
|
||||||
fonttype = fp.read(4)
|
|
||||||
(ntables, _1, _2, _3) = unpack('>HHHH', fp.read(8))
|
|
||||||
for i in xrange(ntables):
|
|
||||||
(name, tsum, offset, length) = unpack('>4sLLL', fp.read(16))
|
|
||||||
self.tables[name] = (offset, length)
|
|
||||||
return
|
|
||||||
|
|
||||||
def create_cmap(self):
|
|
||||||
if 'cmap' not in self.tables:
|
|
||||||
raise TrueTypeFont.CMapNotFound
|
|
||||||
(base_offset, length) = self.tables['cmap']
|
|
||||||
fp = self.fp
|
|
||||||
fp.seek(base_offset)
|
|
||||||
(version, nsubtables) = unpack('>HH', fp.read(4))
|
|
||||||
subtables = []
|
|
||||||
for i in xrange(nsubtables):
|
|
||||||
subtables.append(unpack('>HHL', fp.read(8)))
|
|
||||||
char2gid = {}
|
|
||||||
# Only supports subtable type 0, 2 and 4.
|
|
||||||
for (_1, _2, st_offset) in subtables:
|
|
||||||
fp.seek(base_offset+st_offset)
|
|
||||||
(fmttype, fmtlen, fmtlang) = unpack('>HHH', fp.read(6))
|
|
||||||
if fmttype == 0:
|
|
||||||
char2gid.update(enumerate(unpack('>256B', fp.read(256))))
|
|
||||||
elif fmttype == 2:
|
|
||||||
subheaderkeys = unpack('>256H', fp.read(512))
|
|
||||||
firstbytes = [0]*8192
|
|
||||||
for (i,k) in enumerate(subheaderkeys):
|
|
||||||
firstbytes[k/8] = i
|
|
||||||
nhdrs = max(subheaderkeys)/8 + 1
|
|
||||||
hdrs = []
|
|
||||||
for i in xrange(nhdrs):
|
|
||||||
(firstcode,entcount,delta,offset) = unpack('>HHhH', fp.read(8))
|
|
||||||
hdrs.append((i,firstcode,entcount,delta,fp.tell()-2+offset))
|
|
||||||
for (i,firstcode,entcount,delta,pos) in hdrs:
|
|
||||||
if not entcount: continue
|
|
||||||
first = firstcode + (firstbytes[i] << 8)
|
|
||||||
fp.seek(pos)
|
|
||||||
for c in xrange(entcount):
|
|
||||||
gid = unpack('>H', fp.read(2))
|
|
||||||
if gid:
|
|
||||||
gid += delta
|
|
||||||
char2gid[first+c] = gid
|
|
||||||
elif fmttype == 4:
|
|
||||||
(segcount, _1, _2, _3) = unpack('>HHHH', fp.read(8))
|
|
||||||
segcount /= 2
|
|
||||||
ecs = unpack('>%dH' % segcount, fp.read(2*segcount))
|
|
||||||
fp.read(2)
|
|
||||||
scs = unpack('>%dH' % segcount, fp.read(2*segcount))
|
|
||||||
idds = unpack('>%dh' % segcount, fp.read(2*segcount))
|
|
||||||
pos = fp.tell()
|
|
||||||
idrs = unpack('>%dH' % segcount, fp.read(2*segcount))
|
|
||||||
for (ec,sc,idd,idr) in zip(ecs, scs, idds, idrs):
|
|
||||||
if idr:
|
|
||||||
fp.seek(pos+idr)
|
|
||||||
for c in xrange(sc, ec+1):
|
|
||||||
char2gid[c] = (unpack('>H', fp.read(2))[0] + idd) & 0xffff
|
|
||||||
else:
|
|
||||||
for c in xrange(sc, ec+1):
|
|
||||||
char2gid[c] = (c + idd) & 0xffff
|
|
||||||
gid2char = dict( (gid, pack('>H', char))
|
|
||||||
for (char,gid) in char2gid.iteritems() )
|
|
||||||
return CMap().update(char2gid, gid2char)
|
|
||||||
|
|
||||||
class PDFCIDFont(PDFFont):
|
class PDFCIDFont(PDFFont):
|
||||||
|
|
||||||
def __init__(self, rsrc, spec):
|
def __init__(self, rsrc, spec):
|
||||||
|
@ -358,3 +443,13 @@ class PDFCIDFont(PDFFont):
|
||||||
|
|
||||||
def space_width(self):
|
def space_width(self):
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
# main
|
||||||
|
def main(argv):
|
||||||
|
for fname in argv[1:]:
|
||||||
|
fp = file(fname, 'rb')
|
||||||
|
CFFFont(fname, fp)
|
||||||
|
fp.close()
|
||||||
|
return
|
||||||
|
if __name__ == '__main__': sys.exit(main(sys.argv))
|
||||||
|
|
|
@ -6,17 +6,17 @@ try:
|
||||||
from cStringIO import StringIO
|
from cStringIO import StringIO
|
||||||
except ImportError:
|
except ImportError:
|
||||||
from StringIO import StringIO
|
from StringIO import StringIO
|
||||||
from pdflib.psparser import PSException, PSTypeError, PSEOF, \
|
from psparser import PSException, PSTypeError, PSEOF, \
|
||||||
PSLiteralTable, PSKeywordTable, literal_name, keyword_name, \
|
PSLiteralTable, PSKeywordTable, literal_name, keyword_name, \
|
||||||
PSStackParser, PSKeyword, STRICT
|
PSStackParser, PSKeyword, STRICT
|
||||||
from pdflib.pdftypes import PDFException, PDFStream, PDFObjRef, \
|
from pdftypes import PDFException, PDFStream, PDFObjRef, \
|
||||||
resolve1, int_value, float_value, num_value, \
|
resolve1, int_value, float_value, num_value, \
|
||||||
str_value, list_value, dict_value, stream_value
|
str_value, list_value, dict_value, stream_value
|
||||||
from pdflib.utils import choplist, mult_matrix, translate_matrix, apply_matrix, apply_matrix_norm, MATRIX_IDENTITY
|
from utils import choplist, mult_matrix, translate_matrix, apply_matrix, apply_matrix_norm, MATRIX_IDENTITY
|
||||||
from pdflib.pdffont import PDFFontError, PDFType1Font, PDFTrueTypeFont, PDFType3Font, PDFCIDFont
|
from pdffont import PDFFontError, PDFType1Font, PDFTrueTypeFont, PDFType3Font, PDFCIDFont
|
||||||
from pdflib.pdfcolor import ColorSpace, PREDEFINED_COLORSPACE, \
|
from pdfcolor import ColorSpace, PREDEFINED_COLORSPACE, \
|
||||||
LITERAL_DEVICE_GRAY, LITERAL_DEVICE_RGB, LITERAL_DEVICE_CMYK
|
LITERAL_DEVICE_GRAY, LITERAL_DEVICE_RGB, LITERAL_DEVICE_CMYK
|
||||||
from pdflib.cmap import CMapDB
|
from cmap import CMapDB
|
||||||
|
|
||||||
|
|
||||||
## Exceptions
|
## Exceptions
|
||||||
|
@ -391,27 +391,27 @@ class PDFPageInterpreter(object):
|
||||||
return
|
return
|
||||||
# setgray-stroking
|
# setgray-stroking
|
||||||
def do_G(self, gray):
|
def do_G(self, gray):
|
||||||
self.do_CS(LITERAL_DEVICE_GRAY)
|
#self.do_CS(LITERAL_DEVICE_GRAY)
|
||||||
return
|
return
|
||||||
# setgray-non-stroking
|
# setgray-non-stroking
|
||||||
def do_g(self, gray):
|
def do_g(self, gray):
|
||||||
self.do_cs(LITERAL_DEVICE_GRAY)
|
#self.do_cs(LITERAL_DEVICE_GRAY)
|
||||||
return
|
return
|
||||||
# setrgb-stroking
|
# setrgb-stroking
|
||||||
def do_RG(self, r, g, b):
|
def do_RG(self, r, g, b):
|
||||||
self.do_CS(LITERAL_DEVICE_RGB)
|
#self.do_CS(LITERAL_DEVICE_RGB)
|
||||||
return
|
return
|
||||||
# setrgb-non-stroking
|
# setrgb-non-stroking
|
||||||
def do_rg(self, r, g, b):
|
def do_rg(self, r, g, b):
|
||||||
self.do_cs(LITERAL_DEVICE_RGB)
|
#self.do_cs(LITERAL_DEVICE_RGB)
|
||||||
return
|
return
|
||||||
# setcmyk-stroking
|
# setcmyk-stroking
|
||||||
def do_K(self, c, m, y, k):
|
def do_K(self, c, m, y, k):
|
||||||
self.do_CS(LITERAL_DEVICE_CMYK)
|
#self.do_CS(LITERAL_DEVICE_CMYK)
|
||||||
return
|
return
|
||||||
# setcmyk-non-stroking
|
# setcmyk-non-stroking
|
||||||
def do_k(self, c, m, y, k):
|
def do_k(self, c, m, y, k):
|
||||||
self.do_cs(LITERAL_DEVICE_CMYK)
|
#self.do_cs(LITERAL_DEVICE_CMYK)
|
||||||
return
|
return
|
||||||
|
|
||||||
# setcolor
|
# setcolor
|
||||||
|
|
|
@ -7,12 +7,12 @@
|
||||||
import sys, re
|
import sys, re
|
||||||
import md5, struct
|
import md5, struct
|
||||||
stderr = sys.stderr
|
stderr = sys.stderr
|
||||||
from pdflib.utils import choplist, nunpack, decode_text
|
from utils import choplist, nunpack, decode_text
|
||||||
from pdflib.arcfour import Arcfour
|
from arcfour import Arcfour
|
||||||
from pdflib.psparser import PSStackParser, PSSyntaxError, PSEOF, \
|
from psparser import PSStackParser, PSSyntaxError, PSEOF, \
|
||||||
PSLiteralTable, PSKeywordTable, literal_name, keyword_name, \
|
PSLiteralTable, PSKeywordTable, literal_name, keyword_name, \
|
||||||
STRICT
|
STRICT
|
||||||
from pdflib.pdftypes import PDFException, PDFTypeError, PDFNotImplementedError, \
|
from pdftypes import PDFException, PDFTypeError, PDFNotImplementedError, \
|
||||||
PDFStream, PDFObjRef, resolve1, decipher_all, \
|
PDFStream, PDFObjRef, resolve1, decipher_all, \
|
||||||
int_value, float_value, num_value, str_value, list_value, dict_value, stream_value
|
int_value, float_value, num_value, str_value, list_value, dict_value, stream_value
|
||||||
|
|
||||||
|
@ -34,23 +34,50 @@ LITERAL_CATALOG = PSLiteralTable.intern('Catalog')
|
||||||
|
|
||||||
## XRefs
|
## XRefs
|
||||||
##
|
##
|
||||||
|
class XRefObjRange(object):
|
||||||
|
def __init__(self, start, nobjs):
|
||||||
|
self.start = start
|
||||||
|
self.nobjs = nobjs
|
||||||
|
return
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return '<XRefObjRange: %d-%d>' % (self.get_start_id(), self.get_end_id())
|
||||||
|
|
||||||
|
def get_start_id(self):
|
||||||
|
return self.start
|
||||||
|
|
||||||
|
def get_end_id(self):
|
||||||
|
return self.start + self.nobjs - 1
|
||||||
|
|
||||||
|
def get_nobjs(self):
|
||||||
|
return self.nobjs
|
||||||
|
|
||||||
|
class PDFBaseXRef(object):
|
||||||
|
def __init__(self):
|
||||||
|
self.objid_ranges = None
|
||||||
|
self.objid_list = None
|
||||||
|
return
|
||||||
|
|
||||||
|
def objids(self):
|
||||||
|
for objid_range in self.objid_ranges:
|
||||||
|
for objid in xrange(objid_range.get_start_id(), objid_range.get_end_id() + 1):
|
||||||
|
yield objid
|
||||||
|
return
|
||||||
|
|
||||||
## PDFXRef
|
## PDFXRef
|
||||||
##
|
##
|
||||||
class PDFXRef(object):
|
class PDFXRef(PDFBaseXRef):
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
PDFBaseXRef.__init__(self)
|
||||||
self.offsets = None
|
self.offsets = None
|
||||||
return
|
return
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return '<PDFXRef: objs=%d>' % len(self.offsets)
|
return '<PDFXRef: objs=%d>' % len(self.offsets)
|
||||||
|
|
||||||
def objids(self):
|
|
||||||
return self.offsets.iterkeys()
|
|
||||||
|
|
||||||
def load(self, parser, debug=0):
|
def load(self, parser, debug=0):
|
||||||
self.offsets = {}
|
self.offsets = {}
|
||||||
|
self.objid_ranges = []
|
||||||
while 1:
|
while 1:
|
||||||
try:
|
try:
|
||||||
(pos, line) = parser.nextline()
|
(pos, line) = parser.nextline()
|
||||||
|
@ -68,6 +95,8 @@ class PDFXRef(object):
|
||||||
(start, nobjs) = map(long, f)
|
(start, nobjs) = map(long, f)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
raise PDFNoValidXRef('Invalid line: %r: line=%r' % (parser, line))
|
raise PDFNoValidXRef('Invalid line: %r: line=%r' % (parser, line))
|
||||||
|
self.newoffsets = {}
|
||||||
|
self.objid_ranges.append(XRefObjRange(start, nobjs))
|
||||||
for objid in xrange(start, start+nobjs):
|
for objid in xrange(start, start+nobjs):
|
||||||
try:
|
try:
|
||||||
(_, line) = parser.nextline()
|
(_, line) = parser.nextline()
|
||||||
|
@ -108,11 +137,10 @@ class PDFXRef(object):
|
||||||
|
|
||||||
## PDFXRefStream
|
## PDFXRefStream
|
||||||
##
|
##
|
||||||
class PDFXRefStream(object):
|
class PDFXRefStream(PDFBaseXRef):
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.objid_first = None
|
PDFBaseXRef.__init__(self)
|
||||||
self.objid_last = None
|
|
||||||
self.data = None
|
self.data = None
|
||||||
self.entlen = None
|
self.entlen = None
|
||||||
self.fl1 = self.fl2 = self.fl3 = None
|
self.fl1 = self.fl2 = self.fl3 = None
|
||||||
|
@ -121,9 +149,6 @@ class PDFXRefStream(object):
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return '<PDFXRef: objid=%d-%d>' % (self.objid_first, self.objid_last)
|
return '<PDFXRef: objid=%d-%d>' % (self.objid_first, self.objid_last)
|
||||||
|
|
||||||
def objids(self):
|
|
||||||
return xrange(self.objid_first, self.objid_last+1)
|
|
||||||
|
|
||||||
def load(self, parser, debug=0):
|
def load(self, parser, debug=0):
|
||||||
(_,objid) = parser.nexttoken() # ignored
|
(_,objid) = parser.nexttoken() # ignored
|
||||||
(_,genno) = parser.nexttoken() # ignored
|
(_,genno) = parser.nexttoken() # ignored
|
||||||
|
@ -132,22 +157,31 @@ class PDFXRefStream(object):
|
||||||
if not isinstance(stream, PDFStream) or stream.dic['Type'] is not LITERAL_XREF:
|
if not isinstance(stream, PDFStream) or stream.dic['Type'] is not LITERAL_XREF:
|
||||||
raise PDFNoValidXRef('Invalid PDF stream spec.')
|
raise PDFNoValidXRef('Invalid PDF stream spec.')
|
||||||
size = stream.dic['Size']
|
size = stream.dic['Size']
|
||||||
(start, nobjs) = stream.dic.get('Index', (0,size))
|
index_array = stream.dic.get('Index', (0,size))
|
||||||
self.objid_first = start
|
if len(index_array) % 2 != 0:
|
||||||
self.objid_last = start+nobjs-1
|
raise PDFSyntaxError('Invalid index number')
|
||||||
|
self.objid_ranges = [ XRefObjRange(start,nobjs) for (start,nobjs) in choplist(2, index_array) ]
|
||||||
(self.fl1, self.fl2, self.fl3) = stream.dic['W']
|
(self.fl1, self.fl2, self.fl3) = stream.dic['W']
|
||||||
self.data = stream.get_data()
|
self.data = stream.get_data()
|
||||||
self.entlen = self.fl1+self.fl2+self.fl3
|
self.entlen = self.fl1+self.fl2+self.fl3
|
||||||
self.trailer = stream.dic
|
self.trailer = stream.dic
|
||||||
if debug:
|
if debug:
|
||||||
print >>stderr, ('xref stream: objid=%d-%d, fields=%d,%d,%d' %
|
print >>stderr, ('xref stream: objid=%s, fields=%d,%d,%d' %
|
||||||
(self.objid_first, self.objid_last, self.fl1, self.fl2, self.fl3))
|
(', '.join(map(repr, self.objid_ranges), self.fl1, self.fl2, self.fl3)))
|
||||||
return
|
return
|
||||||
|
|
||||||
def getpos(self, objid):
|
def getpos(self, objid):
|
||||||
if objid < self.objid_first or self.objid_last < objid:
|
offset = 0
|
||||||
raise KeyError(objid)
|
found = False
|
||||||
i = self.entlen * (objid-self.objid_first)
|
for objid_range in self.objid_ranges:
|
||||||
|
if objid >= objid_range.get_start_id() and objid <= objid_range.get_end_id():
|
||||||
|
offset += objid - objid_range.get_start_id()
|
||||||
|
found = True
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
offset += objid_range.get_nobjs()
|
||||||
|
if not found: raise KeyError(objid)
|
||||||
|
i = self.entlen * offset
|
||||||
ent = self.data[i:i+self.entlen]
|
ent = self.data[i:i+self.entlen]
|
||||||
f1 = nunpack(ent[:self.fl1], 1)
|
f1 = nunpack(ent[:self.fl1], 1)
|
||||||
if f1 == 1:
|
if f1 == 1:
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
import sys, zlib
|
import sys, zlib
|
||||||
stderr = sys.stderr
|
stderr = sys.stderr
|
||||||
from pdflib.lzw import LZWDecoder
|
from lzw import LZWDecoder
|
||||||
from pdflib.psparser import PSException, PSObject, \
|
from psparser import PSException, PSObject, \
|
||||||
PSLiteral, PSKeyword, PSLiteralTable, PSKeywordTable, \
|
PSLiteral, PSKeyword, PSLiteralTable, PSKeywordTable, \
|
||||||
literal_name, keyword_name, STRICT
|
literal_name, keyword_name, STRICT
|
||||||
|
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
import sys, re
|
import sys, re
|
||||||
stderr = sys.stderr
|
stderr = sys.stderr
|
||||||
|
|
||||||
from pdflib.utils import choplist
|
from utils import choplist
|
||||||
|
|
||||||
STRICT = 0
|
STRICT = 0
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue