apply more patches

git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@181 1aa58f4a-7d42-0410-adbc-911cccaed67c
pull/1/head
yusuke.shinyama.dummy 2010-02-13 15:00:43 +00:00
parent 0424fd8dc9
commit 2dee2efad9
6 changed files with 81 additions and 28 deletions

View File

@ -1,7 +1,9 @@
# Makefile for pdfminer # Makefile for pdfminer
RM=rm -f
all: all:
clean: clean:
-rm *.pyc *.pyo -$(RM) *.pyc *.pyo
cd cmap && make clean cd cmap && make clean

View File

@ -69,21 +69,27 @@ class PDFTextDevice(PDFDevice):
scaling = textstate.scaling * .01 scaling = textstate.scaling * .01
charspace = textstate.charspace * scaling charspace = textstate.charspace * scaling
wordspace = textstate.wordspace * scaling wordspace = textstate.wordspace * scaling
if font.is_multibyte():
wordspace = 0
dxscale = .001 * fontsize * scaling dxscale = .001 * fontsize * scaling
if font.is_vertical():
textstate.linematrix = self.render_string_vertical(
seq, matrix, textstate.linematrix, font, fontsize, scaling, charspace, wordspace, dxscale)
else:
textstate.linematrix = self.render_string_horizontal(
seq, matrix, textstate.linematrix, font, fontsize, scaling, charspace, wordspace, dxscale)
return
def render_string_horizontal(self, seq, matrix, (x,y),
font, fontsize, scaling, charspace, wordspace, dxscale):
chars = [] chars = []
needspace = False needspace = False
(x,y) = textstate.linematrix
for obj in seq: for obj in seq:
if isinstance(obj, int) or isinstance(obj, float): if isinstance(obj, int) or isinstance(obj, float):
(dx,dy) = self.render_chars(translate_matrix(matrix, (x,y)), font, (dx,dy) = self.render_chars(translate_matrix(matrix, (x,y)), font,
fontsize, charspace, scaling, chars) fontsize, charspace, scaling, chars)
x += dx x += dx - obj*dxscale
y += dy y += dy
d = -obj*dxscale
if font.is_vertical():
y += d
else:
x += d
chars = [] chars = []
needspace = False needspace = False
else: else:
@ -94,31 +100,58 @@ class PDFTextDevice(PDFDevice):
(cidcoding, cid) = e.args (cidcoding, cid) = e.args
char = self.handle_undefined_char(cidcoding, cid) char = self.handle_undefined_char(cidcoding, cid)
chars.append((char, cid)) chars.append((char, cid))
if cid == 32 and textstate.wordspace and not font.is_multibyte(): if cid == 32 and wordspace:
if needspace: if needspace:
if font.is_vertical():
y += charspace
else:
x += charspace x += charspace
(dx,dy) = self.render_chars(translate_matrix(matrix, (x,y)), font, (dx,dy) = self.render_chars(translate_matrix(matrix, (x,y)), font,
fontsize, charspace, scaling, chars) fontsize, charspace, scaling, chars)
needspace = True needspace = True
x += dx x += dx + wordspace
y += dy y += dy
if font.is_vertical():
y += wordspace
else:
x += wordspace
chars = [] chars = []
if chars: if chars:
if needspace: if needspace:
if font.is_vertical():
y += charspace
else:
x += charspace x += charspace
(dx,dy) = self.render_chars(translate_matrix(matrix, (x,y)), font, (dx,dy) = self.render_chars(translate_matrix(matrix, (x,y)), font,
fontsize, charspace, scaling, chars) fontsize, charspace, scaling, chars)
x += dx x += dx
y += dy y += dy
textstate.linematrix = (x,y) return (x, y)
return
def render_string_vertical(self, seq, matrix, (x,y),
font, fontsize, scaling, charspace, wordspace, dxscale):
chars = []
needspace = False
for obj in seq:
if isinstance(obj, int) or isinstance(obj, float):
(dx,dy) = self.render_chars(translate_matrix(matrix, (x,y)), font,
fontsize, charspace, scaling, chars)
x += dx
y += dy - obj*dxscale
chars = []
needspace = False
else:
for cid in font.decode(obj):
try:
char = font.to_unichr(cid)
except PDFUnicodeNotDefined, e:
(cidcoding, cid) = e.args
char = self.handle_undefined_char(cidcoding, cid)
chars.append((char, cid))
if cid == 32 and wordspace:
if needspace:
y += charspace
(dx,dy) = self.render_chars(translate_matrix(matrix, (x,y)), font,
fontsize, charspace, scaling, chars)
needspace = True
x += dx
y += dy + wordspace
chars = []
if chars:
if needspace:
y += charspace
(dx,dy) = self.render_chars(translate_matrix(matrix, (x,y)), font,
fontsize, charspace, scaling, chars)
x += dx
y += dy
return (x, y)

View File

@ -668,6 +668,11 @@ class PDFPageInterpreter(object):
def do_ID(self): # never called def do_ID(self): # never called
return return
def do_EI(self, obj): def do_EI(self, obj):
if 'W' in obj and 'H' in obj:
iobjid = str(id(obj))
self.device.begin_figure(iobjid, (0,0,1,1), MATRIX_IDENTITY)
self.device.render_image(iobjid, obj)
self.device.end_figure(iobjid)
return return
# invoke an XObject # invoke an XObject

View File

@ -169,10 +169,13 @@ class PDFStream(PDFObject):
def __contains__(self, name): def __contains__(self, name):
return name in self.attrs return name in self.attrs
def __getitem__(self, name): def __getitem__(self, name):
return self.attrs[name] return self.attrs[name]
def get(self, name, default=None): def get(self, name, default=None):
return self.attrs.get(name, default) return self.attrs.get(name, default)
def get_any(self, names, default=None): def get_any(self, names, default=None):
for name in names: for name in names:
if name in self.attrs: if name in self.attrs:
@ -216,6 +219,9 @@ class PDFStream(PDFObject):
data = asciihexdecode(data) data = asciihexdecode(data)
elif f in LITERALS_RUNLENGTH_DECODE: elif f in LITERALS_RUNLENGTH_DECODE:
data = rldecode(data) data = rldecode(data)
elif f in LITERALS_CCITTFAX_DECODE:
#data = ccittfaxdecode(data)
raise PDFNotImplementedError('Unsupported filter: %r' % f)
elif f == LITERAL_CRYPT: elif f == LITERAL_CRYPT:
# not yet.. # not yet..
raise PDFNotImplementedError('/Crypt filter is unsupported') raise PDFNotImplementedError('/Crypt filter is unsupported')

View File

@ -1,5 +1,7 @@
# GNUMakefile for test # GNUMakefile for test
RM=rm -f
CMP=cmp
PYTHON=python PYTHON=python
PDF2TXT=PYTHONPATH=.. $(PYTHON) ../tools/pdf2txt.py -p1 PDF2TXT=PYTHONPATH=.. $(PYTHON) ../tools/pdf2txt.py -p1
@ -39,18 +41,21 @@ XMLS= \
all: htmls texts xmls all: htmls texts xmls
clean: clean:
-rm $(HTMLS) -$(RM) $(HTMLS)
-rm $(TEXTS) -$(RM) $(TEXTS)
-rm $(XMLS) -$(RM) $(XMLS)
htmls: $(HTMLS) htmls: $(HTMLS)
texts: $(TEXTS) texts: $(TEXTS)
xmls: $(XMLS) xmls: $(XMLS)
.SUFFIXES: .pdf .html .xml .txt .SUFFIXES: .pdf .html .xml .txt
.pdf.html: .pdf.html:
$(PDF2TXT) -t html $< > $@ $(PDF2TXT) -t html $< > $@
.pdf.xml: .pdf.xml:
$(PDF2TXT) -t xml $< > $@ $(PDF2TXT) -t xml $< > $@
.pdf.txt: .pdf.txt:
$(PDF2TXT) -t text $< > $@ $(PDF2TXT) -t text $< > $@

View File

@ -1,6 +1,8 @@
# Makefile for tools # Makefile for tools
RM=rm -f
all: all:
clean: clean:
-rm *.pyc *.pyo -$(RM) *.pyc *.pyo