Merge remote-tracking branch 'origin/master'

Conflicts:
	MANIFEST.in
	README.md
	pdfminer/latin_enc.py
	pdfminer/pdfdocument.py
	pdfminer/pdfinterp.py
	pdfminer/pdfpage.py
	pdfminer/pdftypes.py
	pdfminer/psparser.py
	pdfminer/utils.py
	samples/Makefile
	setup.py
pull/55/head
Philippe Guglielmetti 2017-01-19 08:03:16 +01:00
commit 52feb22eeb
6 changed files with 15 additions and 6 deletions

View File

@ -1,7 +1,9 @@
include Makefile include Makefile
include LICENSE include LICENSE
include *.txt
include *.md include *.md
include *.py include *.py
graft cmaprsrc
graft docs graft docs
graft pdfminer graft pdfminer
graft samples graft samples

View File

@ -1,4 +1,4 @@
## Makefile (for maintainance purpose) ## Makefile (for maintenance purpose)
## ##
PACKAGE=pdfminer PACKAGE=pdfminer

View File

@ -5,7 +5,7 @@ to decode text data written in CJK (Chinese, Japanese, Korean) language.
CMap resources are now available freely from Adobe web site: CMap resources are now available freely from Adobe web site:
http://opensource.adobe.com/wiki/display/cmap/CMap+Resources http://opensource.adobe.com/wiki/display/cmap/CMap+Resources
The follwing files were extracted from the downloadable tarballs: The following files were extracted from the downloadable tarballs:
cid2code_Adobe_CNS1.txt: cid2code_Adobe_CNS1.txt:
http://download.macromedia.com/pub/opensource/cmap/cmapresources_cns1-6.tar.z http://download.macromedia.com/pub/opensource/cmap/cmapresources_cns1-6.tar.z

View File

@ -214,7 +214,7 @@ class TextConverter(PDFConverter):
return return
# Some dummy functions to save memory/CPU when all that is wanted # Some dummy functions to save memory/CPU when all that is wanted
# is text. This stops all the image and drawing ouput from being # is text. This stops all the image and drawing output from being
# recorded and taking up RAM. # recorded and taking up RAM.
def render_image(self, name, stream): def render_image(self, name, stream):
if self.imagewriter is None: if self.imagewriter is None:
@ -349,7 +349,7 @@ class HTMLConverter(PDFConverter):
if self._font is not None: if self._font is not None:
self.write('</span>') self.write('</span>')
self.write('<span style="font-family: %s; font-size:%dpx">' % self.write('<span style="font-family: %s; font-size:%dpx">' %
(fontname, fontsize * self.scale * self.fontscale)) (enc(fontname), fontsize * self.scale * self.fontscale))
self._font = font self._font = font
self.write_text(text) self.write_text(text)
return return

View File

@ -682,13 +682,20 @@ class LTLayoutContainer(LTContainer):
for obj in empties: for obj in empties:
obj.analyze(laparams) obj.analyze(laparams)
textboxes = list(self.group_textlines(laparams, textlines)) textboxes = list(self.group_textlines(laparams, textlines))
if textboxes: if -1 <= laparams.boxes_flow and laparams.boxes_flow <= +1 and textboxes:
self.groups = self.group_textboxes(laparams, textboxes) self.groups = self.group_textboxes(laparams, textboxes)
assigner = IndexAssigner() assigner = IndexAssigner()
for group in self.groups: for group in self.groups:
group.analyze(laparams) group.analyze(laparams)
assigner.run(group) assigner.run(group)
textboxes.sort(key=lambda box: box.index) textboxes.sort(key=lambda box: box.index)
else:
def getkey(box):
if isinstance(box, LTTextBoxVertical):
return (0, -box.x1, box.y0)
else:
return (1, box.y0, box.x0)
textboxes.sort(key=getkey)
self._objs = textboxes + otherobjs + empties self._objs = textboxes + otherobjs + empties
return return

View File

@ -197,7 +197,7 @@ class WebApp(object):
convert(item.file, self.outfp, tmppath, pagenos=pagenos, codec=self.codec, convert(item.file, self.outfp, tmppath, pagenos=pagenos, codec=self.codec,
maxpages=self.MAXPAGES, maxfilesize=self.MAXFILESIZE, html=html) maxpages=self.MAXPAGES, maxfilesize=self.MAXFILESIZE, html=html)
except Exception, e: except Exception, e:
self.put('<p>Sorry, an error has occured: %s' % q(repr(e))) self.put('<p>Sorry, an error has occurred: %s' % q(repr(e)))
self.logger.error('convert: %r: path=%r: %s' % (e, traceback.format_exc())) self.logger.error('convert: %r: path=%r: %s' % (e, traceback.format_exc()))
finally: finally:
try: try: