simple image handling.
git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@23 1aa58f4a-7d42-0410-adbc-911cccaed67cpull/1/head
parent
ecaf68efed
commit
f296ed3999
6
Makefile
6
Makefile
|
@ -1,8 +1,8 @@
|
|||
# Makefile for pdfminer
|
||||
|
||||
PACKAGE=pdfminer
|
||||
VERSION=20080107
|
||||
TAR=tar
|
||||
VERSION=20080427
|
||||
GNUTAR=tar
|
||||
SVN=svn
|
||||
PYTHON=python
|
||||
|
||||
|
@ -24,7 +24,7 @@ clean:
|
|||
pack: clean
|
||||
$(SVN) cleanup
|
||||
$(SVN) export . $(WORKDIR)/$(DISTNAME)
|
||||
$(TAR) c -z -C$(WORKDIR) -f $(WORKDIR)/$(DISTFILE) $(DISTNAME) --dereference --numeric-owner
|
||||
$(GNUTAR) c -z -C$(WORKDIR) -f $(WORKDIR)/$(DISTFILE) $(DISTNAME) --dereference --numeric-owner
|
||||
rm -rf $(WORKDIR)/$(DISTNAME)
|
||||
|
||||
pychecker:
|
||||
|
|
29
dumppdf.py
29
dumppdf.py
|
@ -19,7 +19,7 @@ def esc(s):
|
|||
|
||||
|
||||
# dumpxml
|
||||
def dumpxml(out, obj):
|
||||
def dumpxml(out, obj, codec=None):
|
||||
if isinstance(obj, dict):
|
||||
out.write('<dict size="%d">\n' % len(obj))
|
||||
for (k,v) in obj.iteritems():
|
||||
|
@ -43,15 +43,11 @@ def dumpxml(out, obj):
|
|||
return
|
||||
|
||||
if isinstance(obj, PDFStream):
|
||||
props = obj.dic.copy()
|
||||
if 'Filter' in props:
|
||||
del props['Filter']
|
||||
if 'DecodeParms' in props:
|
||||
del props['DecodeParms']
|
||||
out.write('<stream>\n<props>\n')
|
||||
dumpxml(out, props)
|
||||
data = obj.get_data()
|
||||
dumpxml(out, obj.dic)
|
||||
out.write('\n</props>\n')
|
||||
if codec:
|
||||
data = obj.get_data()
|
||||
out.write('<data size="%d">%s</data>\n' % (len(data), esc(data)))
|
||||
out.write('</stream>')
|
||||
return
|
||||
|
@ -101,17 +97,17 @@ def dumpallobjs(out, doc):
|
|||
|
||||
# dumppdf
|
||||
def dumppdf(outfp, fname, objids, pageids,
|
||||
dumpall=False, binary=False, debug=0):
|
||||
dumpall=False, codec=None, debug=0):
|
||||
doc = PDFDocument(debug=debug)
|
||||
fp = file(fname)
|
||||
parser = PDFParser(doc, fp, debug=debug)
|
||||
if objids:
|
||||
for objid in objids:
|
||||
obj = doc.getobj(objid)
|
||||
if binary and isinstance(obj, PDFStream):
|
||||
if codec == 'binary' and isinstance(obj, PDFStream):
|
||||
outfp.write(obj.get_data())
|
||||
else:
|
||||
dumpxml(outfp, obj)
|
||||
dumpxml(outfp, obj, codec=codec)
|
||||
if pageids:
|
||||
for page in doc.get_pages():
|
||||
if page.pageid in pageids:
|
||||
|
@ -129,17 +125,17 @@ def dumppdf(outfp, fname, objids, pageids,
|
|||
def main(argv):
|
||||
import getopt
|
||||
def usage():
|
||||
print 'usage: %s [-d] [-a] [-b] [-p pageid] [-i objid] file ...' % argv[0]
|
||||
print 'usage: %s [-d] [-a] [-c|-b] [-p pageid] [-i objid] file ...' % argv[0]
|
||||
return 100
|
||||
try:
|
||||
(opts, args) = getopt.getopt(argv[1:], 'dabi:p:')
|
||||
(opts, args) = getopt.getopt(argv[1:], 'dacbi:p:')
|
||||
except getopt.GetoptError:
|
||||
return usage()
|
||||
if not args: return usage()
|
||||
debug = 0
|
||||
objids = []
|
||||
pageids = set()
|
||||
binary = False
|
||||
codec = None
|
||||
dumpall = False
|
||||
outfp = stdout
|
||||
for (k, v) in opts:
|
||||
|
@ -147,12 +143,13 @@ def main(argv):
|
|||
elif k == '-i': objids.append(int(v))
|
||||
elif k == '-p': pageids.add(int(v))
|
||||
elif k == '-a': dumpall = True
|
||||
elif k == '-b': binary = True
|
||||
elif k == '-b': codec = 'binary'
|
||||
elif k == '-c': codec = 'text'
|
||||
elif k == '-o': outfp = file(v, 'w')
|
||||
#
|
||||
for fname in args:
|
||||
dumppdf(outfp, fname, objids, pageids,
|
||||
dumpall=dumpall, binary=binary, debug=debug)
|
||||
dumpall=dumpall, codec=codec, debug=debug)
|
||||
return
|
||||
|
||||
if __name__ == '__main__': sys.exit(main(sys.argv))
|
||||
|
|
|
@ -46,7 +46,8 @@ class FigureItem(PageItem):
|
|||
return ('<figure id=%r bbox="%s">' % (self.id, bbox))
|
||||
|
||||
def dump(self, outfp, codec):
|
||||
outfp.write(repr(self)+'\n')
|
||||
bbox = '%d,%d,%d,%d' % self.bbox
|
||||
outfp.write('<figure id="%s" bbox="%s">\n' % (self.id, bbox))
|
||||
for obj in self.objs:
|
||||
obj.dump(outfp, codec)
|
||||
outfp.write('</figure>\n')
|
||||
|
@ -126,6 +127,9 @@ class TextConverter(PDFDevice):
|
|||
self.context.add(fig)
|
||||
return
|
||||
|
||||
def render_image(self, stream, size, matrix):
|
||||
return
|
||||
|
||||
def handle_undefined_char(self, cidcoding, cid):
|
||||
if self.debug:
|
||||
print >>stderr, 'undefined: %r, %r' % (cidcoding, cid)
|
||||
|
|
16
pdfinterp.py
16
pdfinterp.py
|
@ -41,6 +41,7 @@ LITERAL_PDF = PSLiteralTable.intern('PDF')
|
|||
LITERAL_TEXT = PSLiteralTable.intern('Text')
|
||||
LITERAL_FONT = PSLiteralTable.intern('Font')
|
||||
LITERAL_FORM = PSLiteralTable.intern('Form')
|
||||
LITERAL_IMAGE = PSLiteralTable.intern('Image')
|
||||
LITERAL_STANDARD_ENCODING = PSLiteralTable.intern('StandardEncoding')
|
||||
LITERAL_DEVICE_GRAY = PSLiteralTable.intern('DeviceGray')
|
||||
LITERAL_DEVICE_RGB = PSLiteralTable.intern('DeviceRGB')
|
||||
|
@ -484,6 +485,8 @@ class PDFDevice:
|
|||
|
||||
def render_string(self, textstate, textmatrix, size, seq):
|
||||
raise NotImplementedError
|
||||
def render_image(self, stream, size, matrix):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
## PDFContentParser
|
||||
|
@ -942,9 +945,10 @@ class PDFPageInterpreter:
|
|||
if STRICT:
|
||||
raise PDFInterpreterError('Undefined xobject id: %r' % xobjid)
|
||||
return
|
||||
if xobj.dic.get('Subtype') == LITERAL_FORM and 'BBox' in xobj.dic:
|
||||
if 1 <= self.debug:
|
||||
print >>stderr, 'Processing xobj: %r' % xobj
|
||||
subtype = xobj.dic.get('Subtype')
|
||||
if subtype == LITERAL_FORM and 'BBox' in xobj.dic:
|
||||
interpreter = PDFPageInterpreter(self.rsrc, self.device)
|
||||
(x0,y0,x1,y1) = xobj.dic['BBox']
|
||||
ctm = mult_matrix(xobj.dic.get('Matrix', MATRIX_IDENTITY), self.ctm)
|
||||
|
@ -954,6 +958,16 @@ class PDFPageInterpreter:
|
|||
self.device.begin_figure(xobjid, bbox)
|
||||
interpreter.render_contents(xobj.dic.get('Resources'), [xobj], ctm=ctm)
|
||||
self.device.end_figure(xobjid)
|
||||
elif subtype == LITERAL_IMAGE and 'Width' in xobj.dic and 'Height' in xobj.dic:
|
||||
(x0,y0) = apply_matrix(self.ctm, (0,0))
|
||||
(x1,y1) = apply_matrix(self.ctm, (1,1))
|
||||
self.device.begin_figure(xobjid, (x0,y0,x1,y1))
|
||||
(w,h) = (xobj.dic['Width'], xobj.dic['Height'])
|
||||
self.device.render_image(xobj, (w,h), self.ctm)
|
||||
self.device.end_figure(xobjid)
|
||||
else:
|
||||
# unsupported xobject type.
|
||||
pass
|
||||
return
|
||||
|
||||
def process_page(self, page):
|
||||
|
|
Loading…
Reference in New Issue