apply more patches
git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@181 1aa58f4a-7d42-0410-adbc-911cccaed67cpull/1/head
parent
0424fd8dc9
commit
2dee2efad9
|
@ -1,7 +1,9 @@
|
||||||
# Makefile for pdfminer
|
# Makefile for pdfminer
|
||||||
|
|
||||||
|
RM=rm -f
|
||||||
|
|
||||||
all:
|
all:
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
-rm *.pyc *.pyo
|
-$(RM) *.pyc *.pyo
|
||||||
cd cmap && make clean
|
cd cmap && make clean
|
||||||
|
|
|
@ -69,21 +69,27 @@ class PDFTextDevice(PDFDevice):
|
||||||
scaling = textstate.scaling * .01
|
scaling = textstate.scaling * .01
|
||||||
charspace = textstate.charspace * scaling
|
charspace = textstate.charspace * scaling
|
||||||
wordspace = textstate.wordspace * scaling
|
wordspace = textstate.wordspace * scaling
|
||||||
|
if font.is_multibyte():
|
||||||
|
wordspace = 0
|
||||||
dxscale = .001 * fontsize * scaling
|
dxscale = .001 * fontsize * scaling
|
||||||
|
if font.is_vertical():
|
||||||
|
textstate.linematrix = self.render_string_vertical(
|
||||||
|
seq, matrix, textstate.linematrix, font, fontsize, scaling, charspace, wordspace, dxscale)
|
||||||
|
else:
|
||||||
|
textstate.linematrix = self.render_string_horizontal(
|
||||||
|
seq, matrix, textstate.linematrix, font, fontsize, scaling, charspace, wordspace, dxscale)
|
||||||
|
return
|
||||||
|
|
||||||
|
def render_string_horizontal(self, seq, matrix, (x,y),
|
||||||
|
font, fontsize, scaling, charspace, wordspace, dxscale):
|
||||||
chars = []
|
chars = []
|
||||||
needspace = False
|
needspace = False
|
||||||
(x,y) = textstate.linematrix
|
|
||||||
for obj in seq:
|
for obj in seq:
|
||||||
if isinstance(obj, int) or isinstance(obj, float):
|
if isinstance(obj, int) or isinstance(obj, float):
|
||||||
(dx,dy) = self.render_chars(translate_matrix(matrix, (x,y)), font,
|
(dx,dy) = self.render_chars(translate_matrix(matrix, (x,y)), font,
|
||||||
fontsize, charspace, scaling, chars)
|
fontsize, charspace, scaling, chars)
|
||||||
x += dx
|
x += dx - obj*dxscale
|
||||||
y += dy
|
y += dy
|
||||||
d = -obj*dxscale
|
|
||||||
if font.is_vertical():
|
|
||||||
y += d
|
|
||||||
else:
|
|
||||||
x += d
|
|
||||||
chars = []
|
chars = []
|
||||||
needspace = False
|
needspace = False
|
||||||
else:
|
else:
|
||||||
|
@ -94,31 +100,58 @@ class PDFTextDevice(PDFDevice):
|
||||||
(cidcoding, cid) = e.args
|
(cidcoding, cid) = e.args
|
||||||
char = self.handle_undefined_char(cidcoding, cid)
|
char = self.handle_undefined_char(cidcoding, cid)
|
||||||
chars.append((char, cid))
|
chars.append((char, cid))
|
||||||
if cid == 32 and textstate.wordspace and not font.is_multibyte():
|
if cid == 32 and wordspace:
|
||||||
if needspace:
|
if needspace:
|
||||||
if font.is_vertical():
|
x += charspace
|
||||||
y += charspace
|
|
||||||
else:
|
|
||||||
x += charspace
|
|
||||||
(dx,dy) = self.render_chars(translate_matrix(matrix, (x,y)), font,
|
(dx,dy) = self.render_chars(translate_matrix(matrix, (x,y)), font,
|
||||||
fontsize, charspace, scaling, chars)
|
fontsize, charspace, scaling, chars)
|
||||||
needspace = True
|
needspace = True
|
||||||
x += dx
|
x += dx + wordspace
|
||||||
y += dy
|
y += dy
|
||||||
if font.is_vertical():
|
|
||||||
y += wordspace
|
|
||||||
else:
|
|
||||||
x += wordspace
|
|
||||||
chars = []
|
chars = []
|
||||||
if chars:
|
if chars:
|
||||||
if needspace:
|
if needspace:
|
||||||
if font.is_vertical():
|
x += charspace
|
||||||
y += charspace
|
|
||||||
else:
|
|
||||||
x += charspace
|
|
||||||
(dx,dy) = self.render_chars(translate_matrix(matrix, (x,y)), font,
|
(dx,dy) = self.render_chars(translate_matrix(matrix, (x,y)), font,
|
||||||
fontsize, charspace, scaling, chars)
|
fontsize, charspace, scaling, chars)
|
||||||
x += dx
|
x += dx
|
||||||
y += dy
|
y += dy
|
||||||
textstate.linematrix = (x,y)
|
return (x, y)
|
||||||
return
|
|
||||||
|
def render_string_vertical(self, seq, matrix, (x,y),
|
||||||
|
font, fontsize, scaling, charspace, wordspace, dxscale):
|
||||||
|
chars = []
|
||||||
|
needspace = False
|
||||||
|
for obj in seq:
|
||||||
|
if isinstance(obj, int) or isinstance(obj, float):
|
||||||
|
(dx,dy) = self.render_chars(translate_matrix(matrix, (x,y)), font,
|
||||||
|
fontsize, charspace, scaling, chars)
|
||||||
|
x += dx
|
||||||
|
y += dy - obj*dxscale
|
||||||
|
chars = []
|
||||||
|
needspace = False
|
||||||
|
else:
|
||||||
|
for cid in font.decode(obj):
|
||||||
|
try:
|
||||||
|
char = font.to_unichr(cid)
|
||||||
|
except PDFUnicodeNotDefined, e:
|
||||||
|
(cidcoding, cid) = e.args
|
||||||
|
char = self.handle_undefined_char(cidcoding, cid)
|
||||||
|
chars.append((char, cid))
|
||||||
|
if cid == 32 and wordspace:
|
||||||
|
if needspace:
|
||||||
|
y += charspace
|
||||||
|
(dx,dy) = self.render_chars(translate_matrix(matrix, (x,y)), font,
|
||||||
|
fontsize, charspace, scaling, chars)
|
||||||
|
needspace = True
|
||||||
|
x += dx
|
||||||
|
y += dy + wordspace
|
||||||
|
chars = []
|
||||||
|
if chars:
|
||||||
|
if needspace:
|
||||||
|
y += charspace
|
||||||
|
(dx,dy) = self.render_chars(translate_matrix(matrix, (x,y)), font,
|
||||||
|
fontsize, charspace, scaling, chars)
|
||||||
|
x += dx
|
||||||
|
y += dy
|
||||||
|
return (x, y)
|
||||||
|
|
|
@ -668,6 +668,11 @@ class PDFPageInterpreter(object):
|
||||||
def do_ID(self): # never called
|
def do_ID(self): # never called
|
||||||
return
|
return
|
||||||
def do_EI(self, obj):
|
def do_EI(self, obj):
|
||||||
|
if 'W' in obj and 'H' in obj:
|
||||||
|
iobjid = str(id(obj))
|
||||||
|
self.device.begin_figure(iobjid, (0,0,1,1), MATRIX_IDENTITY)
|
||||||
|
self.device.render_image(iobjid, obj)
|
||||||
|
self.device.end_figure(iobjid)
|
||||||
return
|
return
|
||||||
|
|
||||||
# invoke an XObject
|
# invoke an XObject
|
||||||
|
|
|
@ -169,10 +169,13 @@ class PDFStream(PDFObject):
|
||||||
|
|
||||||
def __contains__(self, name):
|
def __contains__(self, name):
|
||||||
return name in self.attrs
|
return name in self.attrs
|
||||||
|
|
||||||
def __getitem__(self, name):
|
def __getitem__(self, name):
|
||||||
return self.attrs[name]
|
return self.attrs[name]
|
||||||
|
|
||||||
def get(self, name, default=None):
|
def get(self, name, default=None):
|
||||||
return self.attrs.get(name, default)
|
return self.attrs.get(name, default)
|
||||||
|
|
||||||
def get_any(self, names, default=None):
|
def get_any(self, names, default=None):
|
||||||
for name in names:
|
for name in names:
|
||||||
if name in self.attrs:
|
if name in self.attrs:
|
||||||
|
@ -216,6 +219,9 @@ class PDFStream(PDFObject):
|
||||||
data = asciihexdecode(data)
|
data = asciihexdecode(data)
|
||||||
elif f in LITERALS_RUNLENGTH_DECODE:
|
elif f in LITERALS_RUNLENGTH_DECODE:
|
||||||
data = rldecode(data)
|
data = rldecode(data)
|
||||||
|
elif f in LITERALS_CCITTFAX_DECODE:
|
||||||
|
#data = ccittfaxdecode(data)
|
||||||
|
raise PDFNotImplementedError('Unsupported filter: %r' % f)
|
||||||
elif f == LITERAL_CRYPT:
|
elif f == LITERAL_CRYPT:
|
||||||
# not yet..
|
# not yet..
|
||||||
raise PDFNotImplementedError('/Crypt filter is unsupported')
|
raise PDFNotImplementedError('/Crypt filter is unsupported')
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
# GNUMakefile for test
|
# GNUMakefile for test
|
||||||
|
|
||||||
|
RM=rm -f
|
||||||
|
CMP=cmp
|
||||||
PYTHON=python
|
PYTHON=python
|
||||||
PDF2TXT=PYTHONPATH=.. $(PYTHON) ../tools/pdf2txt.py -p1
|
PDF2TXT=PYTHONPATH=.. $(PYTHON) ../tools/pdf2txt.py -p1
|
||||||
|
|
||||||
|
@ -39,18 +41,21 @@ XMLS= \
|
||||||
all: htmls texts xmls
|
all: htmls texts xmls
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
-rm $(HTMLS)
|
-$(RM) $(HTMLS)
|
||||||
-rm $(TEXTS)
|
-$(RM) $(TEXTS)
|
||||||
-rm $(XMLS)
|
-$(RM) $(XMLS)
|
||||||
|
|
||||||
htmls: $(HTMLS)
|
htmls: $(HTMLS)
|
||||||
texts: $(TEXTS)
|
texts: $(TEXTS)
|
||||||
xmls: $(XMLS)
|
xmls: $(XMLS)
|
||||||
|
|
||||||
.SUFFIXES: .pdf .html .xml .txt
|
.SUFFIXES: .pdf .html .xml .txt
|
||||||
|
|
||||||
.pdf.html:
|
.pdf.html:
|
||||||
$(PDF2TXT) -t html $< > $@
|
$(PDF2TXT) -t html $< > $@
|
||||||
|
|
||||||
.pdf.xml:
|
.pdf.xml:
|
||||||
$(PDF2TXT) -t xml $< > $@
|
$(PDF2TXT) -t xml $< > $@
|
||||||
|
|
||||||
.pdf.txt:
|
.pdf.txt:
|
||||||
$(PDF2TXT) -t text $< > $@
|
$(PDF2TXT) -t text $< > $@
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
# Makefile for tools
|
# Makefile for tools
|
||||||
|
|
||||||
|
RM=rm -f
|
||||||
|
|
||||||
all:
|
all:
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
-rm *.pyc *.pyo
|
-$(RM) *.pyc *.pyo
|
||||||
|
|
Loading…
Reference in New Issue