some wordings and documentations
git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@229 1aa58f4a-7d42-0410-adbc-911cccaed67cpull/1/head
parent
a0dd46bd8e
commit
f5aff374fc
|
@ -1,7 +1,7 @@
|
|||
README.txt for cmaprsrc
|
||||
|
||||
This directory contains Adobe CMap resources. CMaps are required
|
||||
to decode text data written in Chinese, Japanese or Korean language.
|
||||
to decode text data written in CJK (Chinese, Japanese, Korean) language.
|
||||
CMap resources are now available freely from Adobe web site:
|
||||
http://opensource.adobe.com/wiki/display/cmap/CMap+Resources
|
||||
|
||||
|
|
|
@ -559,15 +559,21 @@ class LTAnalyzer(LTContainer):
|
|||
def analyze(self, laparams):
|
||||
"""Perform the layout analysis."""
|
||||
(textobjs, otherobjs) = self.get_textobjs()
|
||||
# textobjs is a list of LTChar objects, i.e.
|
||||
# it has all the individual characters in the page.
|
||||
if not laparams or not textobjs: return
|
||||
if laparams.writing_mode not in ('lr-tb', 'tb-rl'):
|
||||
laparams.writing_mode = guess_wmode(textobjs)
|
||||
if (laparams.writing_mode.startswith('tb-') or
|
||||
laparams.writing_mode.startswith('bt-')):
|
||||
# assemble them into vertical rows of text.
|
||||
textboxes = self.build_textbox_vertical(textobjs, laparams)
|
||||
# turn them into a tree.
|
||||
top = self.group_textbox_tb_rl(textboxes, laparams)
|
||||
else:
|
||||
# assemble them into horizontal rows of text.
|
||||
textboxes = self.build_textbox_horizontal(textobjs, laparams)
|
||||
# turn them into a tree.
|
||||
top = self.group_textbox_lr_tb(textboxes, laparams)
|
||||
def assign_index(obj, i):
|
||||
if isinstance(obj, LTTextBox):
|
||||
|
@ -635,7 +641,7 @@ class LTAnalyzer(LTContainer):
|
|||
# | |
|
||||
# +------+
|
||||
#
|
||||
# |<--->|
|
||||
# |<-->|
|
||||
# (line_overlap)
|
||||
return ((min(obj1.width, obj2.width) * laparams.line_overlap < obj1.hoverlap(obj2)) and
|
||||
(obj1.vdistance(obj2) < min(obj1.height, obj2.height) * laparams.char_margin))
|
||||
|
@ -656,6 +662,17 @@ class LTAnalyzer(LTContainer):
|
|||
|
||||
def group_textbox_lr_tb(self, boxes, laparams):
|
||||
def dist(obj1, obj2):
|
||||
"""A distance function between two TextBoxes.
|
||||
|
||||
Consider the bounding rectangle for obj1 and obj2.
|
||||
Return its area less the areas of obj1 and obj2,
|
||||
shown as 'www' below. This value may be negative.
|
||||
+------+..........+
|
||||
| obj1 |wwwwwwwwww:
|
||||
+------+www+------+
|
||||
:wwwwwwwwww| obj2 |
|
||||
+..........+------+
|
||||
"""
|
||||
return ((max(obj1.x1,obj2.x1) - min(obj1.x0,obj2.x0)) *
|
||||
(max(obj1.y1,obj2.y1) - min(obj1.y0,obj2.y0)) -
|
||||
(obj1.width*obj1.height + obj2.width*obj2.height))
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
This directory contains sample PDF files.
|
||||
|
||||
The files in nonfree/ subdirectory can be distributed freely
|
||||
but does not come with explicit licensing terms or source files.
|
||||
These files (including ones in nonfree/ subdirectory) can be
|
||||
distributed freely but does not come with explicit licensing
|
||||
terms or source files.
|
||||
|
||||
Here are the credits of the original files:
|
||||
|
||||
|
@ -16,7 +17,7 @@ simple2.pdf:
|
|||
jo.pdf:
|
||||
Kenji Miyazawa (1896-1933, copyright expired)
|
||||
Preface of "Haru to Shura"
|
||||
(File generated by LaTeX and dvi2pdfm)
|
||||
(File generated from jo.tex by LaTeX and dvi2pdfm)
|
||||
|
||||
--
|
||||
nonfree/dmca.pdf:
|
||||
|
|
Loading…
Reference in New Issue