apply more patches
git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@181 1aa58f4a-7d42-0410-adbc-911cccaed67cpull/1/head
parent
0424fd8dc9
commit
2dee2efad9
|
@ -1,7 +1,9 @@
|
|||
# Makefile for pdfminer
|
||||
|
||||
RM=rm -f
|
||||
|
||||
all:
|
||||
|
||||
clean:
|
||||
-rm *.pyc *.pyo
|
||||
-$(RM) *.pyc *.pyo
|
||||
cd cmap && make clean
|
||||
|
|
|
@ -69,21 +69,27 @@ class PDFTextDevice(PDFDevice):
|
|||
scaling = textstate.scaling * .01
|
||||
charspace = textstate.charspace * scaling
|
||||
wordspace = textstate.wordspace * scaling
|
||||
if font.is_multibyte():
|
||||
wordspace = 0
|
||||
dxscale = .001 * fontsize * scaling
|
||||
if font.is_vertical():
|
||||
textstate.linematrix = self.render_string_vertical(
|
||||
seq, matrix, textstate.linematrix, font, fontsize, scaling, charspace, wordspace, dxscale)
|
||||
else:
|
||||
textstate.linematrix = self.render_string_horizontal(
|
||||
seq, matrix, textstate.linematrix, font, fontsize, scaling, charspace, wordspace, dxscale)
|
||||
return
|
||||
|
||||
def render_string_horizontal(self, seq, matrix, (x,y),
|
||||
font, fontsize, scaling, charspace, wordspace, dxscale):
|
||||
chars = []
|
||||
needspace = False
|
||||
(x,y) = textstate.linematrix
|
||||
for obj in seq:
|
||||
if isinstance(obj, int) or isinstance(obj, float):
|
||||
(dx,dy) = self.render_chars(translate_matrix(matrix, (x,y)), font,
|
||||
fontsize, charspace, scaling, chars)
|
||||
x += dx
|
||||
x += dx - obj*dxscale
|
||||
y += dy
|
||||
d = -obj*dxscale
|
||||
if font.is_vertical():
|
||||
y += d
|
||||
else:
|
||||
x += d
|
||||
chars = []
|
||||
needspace = False
|
||||
else:
|
||||
|
@ -94,31 +100,58 @@ class PDFTextDevice(PDFDevice):
|
|||
(cidcoding, cid) = e.args
|
||||
char = self.handle_undefined_char(cidcoding, cid)
|
||||
chars.append((char, cid))
|
||||
if cid == 32 and textstate.wordspace and not font.is_multibyte():
|
||||
if cid == 32 and wordspace:
|
||||
if needspace:
|
||||
if font.is_vertical():
|
||||
y += charspace
|
||||
else:
|
||||
x += charspace
|
||||
(dx,dy) = self.render_chars(translate_matrix(matrix, (x,y)), font,
|
||||
fontsize, charspace, scaling, chars)
|
||||
needspace = True
|
||||
x += dx
|
||||
x += dx + wordspace
|
||||
y += dy
|
||||
if font.is_vertical():
|
||||
y += wordspace
|
||||
else:
|
||||
x += wordspace
|
||||
chars = []
|
||||
if chars:
|
||||
if needspace:
|
||||
if font.is_vertical():
|
||||
y += charspace
|
||||
else:
|
||||
x += charspace
|
||||
(dx,dy) = self.render_chars(translate_matrix(matrix, (x,y)), font,
|
||||
fontsize, charspace, scaling, chars)
|
||||
x += dx
|
||||
y += dy
|
||||
textstate.linematrix = (x,y)
|
||||
return
|
||||
return (x, y)
|
||||
|
||||
def render_string_vertical(self, seq, matrix, (x,y),
|
||||
font, fontsize, scaling, charspace, wordspace, dxscale):
|
||||
chars = []
|
||||
needspace = False
|
||||
for obj in seq:
|
||||
if isinstance(obj, int) or isinstance(obj, float):
|
||||
(dx,dy) = self.render_chars(translate_matrix(matrix, (x,y)), font,
|
||||
fontsize, charspace, scaling, chars)
|
||||
x += dx
|
||||
y += dy - obj*dxscale
|
||||
chars = []
|
||||
needspace = False
|
||||
else:
|
||||
for cid in font.decode(obj):
|
||||
try:
|
||||
char = font.to_unichr(cid)
|
||||
except PDFUnicodeNotDefined, e:
|
||||
(cidcoding, cid) = e.args
|
||||
char = self.handle_undefined_char(cidcoding, cid)
|
||||
chars.append((char, cid))
|
||||
if cid == 32 and wordspace:
|
||||
if needspace:
|
||||
y += charspace
|
||||
(dx,dy) = self.render_chars(translate_matrix(matrix, (x,y)), font,
|
||||
fontsize, charspace, scaling, chars)
|
||||
needspace = True
|
||||
x += dx
|
||||
y += dy + wordspace
|
||||
chars = []
|
||||
if chars:
|
||||
if needspace:
|
||||
y += charspace
|
||||
(dx,dy) = self.render_chars(translate_matrix(matrix, (x,y)), font,
|
||||
fontsize, charspace, scaling, chars)
|
||||
x += dx
|
||||
y += dy
|
||||
return (x, y)
|
||||
|
|
|
@ -668,6 +668,11 @@ class PDFPageInterpreter(object):
|
|||
def do_ID(self): # never called
|
||||
return
|
||||
def do_EI(self, obj):
|
||||
if 'W' in obj and 'H' in obj:
|
||||
iobjid = str(id(obj))
|
||||
self.device.begin_figure(iobjid, (0,0,1,1), MATRIX_IDENTITY)
|
||||
self.device.render_image(iobjid, obj)
|
||||
self.device.end_figure(iobjid)
|
||||
return
|
||||
|
||||
# invoke an XObject
|
||||
|
|
|
@ -169,10 +169,13 @@ class PDFStream(PDFObject):
|
|||
|
||||
def __contains__(self, name):
|
||||
return name in self.attrs
|
||||
|
||||
def __getitem__(self, name):
|
||||
return self.attrs[name]
|
||||
|
||||
def get(self, name, default=None):
|
||||
return self.attrs.get(name, default)
|
||||
|
||||
def get_any(self, names, default=None):
|
||||
for name in names:
|
||||
if name in self.attrs:
|
||||
|
@ -216,6 +219,9 @@ class PDFStream(PDFObject):
|
|||
data = asciihexdecode(data)
|
||||
elif f in LITERALS_RUNLENGTH_DECODE:
|
||||
data = rldecode(data)
|
||||
elif f in LITERALS_CCITTFAX_DECODE:
|
||||
#data = ccittfaxdecode(data)
|
||||
raise PDFNotImplementedError('Unsupported filter: %r' % f)
|
||||
elif f == LITERAL_CRYPT:
|
||||
# not yet..
|
||||
raise PDFNotImplementedError('/Crypt filter is unsupported')
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
# GNUMakefile for test
|
||||
|
||||
RM=rm -f
|
||||
CMP=cmp
|
||||
PYTHON=python
|
||||
PDF2TXT=PYTHONPATH=.. $(PYTHON) ../tools/pdf2txt.py -p1
|
||||
|
||||
|
@ -39,18 +41,21 @@ XMLS= \
|
|||
all: htmls texts xmls
|
||||
|
||||
clean:
|
||||
-rm $(HTMLS)
|
||||
-rm $(TEXTS)
|
||||
-rm $(XMLS)
|
||||
-$(RM) $(HTMLS)
|
||||
-$(RM) $(TEXTS)
|
||||
-$(RM) $(XMLS)
|
||||
|
||||
htmls: $(HTMLS)
|
||||
texts: $(TEXTS)
|
||||
xmls: $(XMLS)
|
||||
|
||||
.SUFFIXES: .pdf .html .xml .txt
|
||||
|
||||
.pdf.html:
|
||||
$(PDF2TXT) -t html $< > $@
|
||||
|
||||
.pdf.xml:
|
||||
$(PDF2TXT) -t xml $< > $@
|
||||
|
||||
.pdf.txt:
|
||||
$(PDF2TXT) -t text $< > $@
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
# Makefile for tools
|
||||
|
||||
RM=rm -f
|
||||
|
||||
all:
|
||||
|
||||
clean:
|
||||
-rm *.pyc *.pyo
|
||||
-$(RM) *.pyc *.pyo
|
||||
|
|
Loading…
Reference in New Issue