test cases updated

git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@282 1aa58f4a-7d42-0410-adbc-911cccaed67c
pull/1/head
yusuke.shinyama.dummy 2010-12-25 08:41:11 +00:00
parent 84ed94aec0
commit 5d98a27d9c
21 changed files with 5742 additions and 24161 deletions

View File

@ -5,7 +5,7 @@ RM=rm -f
CMP=: CMP=:
PYTHON=python2 PYTHON=python2
PDF2TXT=PYTHONPATH=.. $(PYTHON) ../tools/pdf2txt.py -Dx -p1 PDF2TXT=PYTHONPATH=.. $(PYTHON) ../tools/pdf2txt.py -p1
HTMLS=$(HTMLS_FREE) $(HTMLS_NONFREE) HTMLS=$(HTMLS_FREE) $(HTMLS_NONFREE)
HTMLS_FREE= \ HTMLS_FREE= \
@ -49,8 +49,7 @@ XMLS_NONFREE= \
nonfree/naacl06-shinyama.xml \ nonfree/naacl06-shinyama.xml \
nonfree/nlp2004slides.xml nonfree/nlp2004slides.xml
all: all: test
$(MAKE) test CMP=cmp
test: htmls texts xmls test: htmls texts xmls

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -14,6 +14,16 @@ number of other significant copyright-related issues.
The DMCA is divided into five titles: The DMCA is divided into five titles:
!
!
!
!
!
Title I, the “WIPO Copyright and Performances and Phonograms Title I, the “WIPO Copyright and Performances and Phonograms
Treaties Implementation Act of 1998,” implements the WIPO Treaties Implementation Act of 1998,” implements the WIPO
treaties. treaties.
@ -32,16 +42,6 @@ of rights in motion pictures.
Title V, the “Vessel Hull Design Protection Act,” creates a new form Title V, the “Vessel Hull Design Protection Act,” creates a new form
of protection for the design of vessel hulls. of protection for the design of vessel hulls.
!
!
!
!
!
This memorandum summarizes briefly each title of the DMCA. It provides This memorandum summarizes briefly each title of the DMCA. It provides
merely an overview of the laws provisions; for purposes of length and readability a merely an overview of the laws provisions; for purposes of length and readability a
significant amount of detail has been omitted. A complete understanding of any significant amount of detail has been omitted. A complete understanding of any

View File

@ -568,7 +568,42 @@
</text> </text>
</textline> </textline>
</textbox> </textbox>
<textbox id="5" bbox="180.000,235.452,504.108,464.124"> <textbox id="5" bbox="144.000,450.000,152.004,462.480">
<textline bbox="144.000,450.000,152.004,462.480">
<text font="ELCKGH+WPTypographicSymbols" bbox="144.000,450.000,152.004,462.480" size="12.480">!</text>
<text>
</text>
</textline>
</textbox>
<textbox id="6" bbox="144.000,409.680,152.004,422.160">
<textline bbox="144.000,409.680,152.004,422.160">
<text font="ELCKGH+WPTypographicSymbols" bbox="144.000,409.680,152.004,422.160" size="12.480">!</text>
<text>
</text>
</textline>
</textbox>
<textbox id="7" bbox="144.000,369.360,152.004,381.840">
<textline bbox="144.000,369.360,152.004,381.840">
<text font="ELCKGH+WPTypographicSymbols" bbox="144.000,369.360,152.004,381.840" size="12.480">!</text>
<text>
</text>
</textline>
</textbox>
<textbox id="8" bbox="144.000,329.040,152.004,341.520">
<textline bbox="144.000,329.040,152.004,341.520">
<text font="ELCKGH+WPTypographicSymbols" bbox="144.000,329.040,152.004,341.520" size="12.480">!</text>
<text>
</text>
</textline>
</textbox>
<textbox id="9" bbox="144.000,248.400,152.004,260.880">
<textline bbox="144.000,248.400,152.004,260.880">
<text font="ELCKGH+WPTypographicSymbols" bbox="144.000,248.400,152.004,260.880" size="12.480">!</text>
<text>
</text>
</textline>
</textbox>
<textbox id="10" bbox="180.000,235.452,504.108,464.124">
<textline bbox="180.000,450.492,503.985,464.124"> <textline bbox="180.000,450.492,503.985,464.124">
<text font="Garamond" bbox="180.000,450.492,187.380,464.064" size="13.572">T</text> <text font="Garamond" bbox="180.000,450.492,187.380,464.064" size="13.572">T</text>
<text font="Garamond" bbox="187.440,450.492,190.188,464.064" size="13.572">i</text> <text font="Garamond" bbox="187.440,450.492,190.188,464.064" size="13.572">i</text>
@ -1669,41 +1704,6 @@
</text> </text>
</textline> </textline>
</textbox> </textbox>
<textbox id="6" bbox="144.000,450.000,152.004,462.480">
<textline bbox="144.000,450.000,152.004,462.480">
<text font="ELCKGH+WPTypographicSymbols" bbox="144.000,450.000,152.004,462.480" size="12.480">!</text>
<text>
</text>
</textline>
</textbox>
<textbox id="7" bbox="144.000,409.680,152.004,422.160">
<textline bbox="144.000,409.680,152.004,422.160">
<text font="ELCKGH+WPTypographicSymbols" bbox="144.000,409.680,152.004,422.160" size="12.480">!</text>
<text>
</text>
</textline>
</textbox>
<textbox id="8" bbox="144.000,369.360,152.004,381.840">
<textline bbox="144.000,369.360,152.004,381.840">
<text font="ELCKGH+WPTypographicSymbols" bbox="144.000,369.360,152.004,381.840" size="12.480">!</text>
<text>
</text>
</textline>
</textbox>
<textbox id="9" bbox="144.000,329.040,152.004,341.520">
<textline bbox="144.000,329.040,152.004,341.520">
<text font="ELCKGH+WPTypographicSymbols" bbox="144.000,329.040,152.004,341.520" size="12.480">!</text>
<text>
</text>
</textline>
</textbox>
<textbox id="10" bbox="144.000,248.400,152.004,260.880">
<textline bbox="144.000,248.400,152.004,260.880">
<text font="ELCKGH+WPTypographicSymbols" bbox="144.000,248.400,152.004,260.880" size="12.480">!</text>
<text>
</text>
</textline>
</textbox>
<textbox id="11" bbox="108.000,168.360,504.108,222.144"> <textbox id="11" bbox="108.000,168.360,504.108,222.144">
<textline bbox="144.000,208.572,504.060,222.144"> <textline bbox="144.000,208.572,504.060,222.144">
<text font="Garamond" bbox="144.000,208.572,151.380,222.144" size="13.572">T</text> <text font="Garamond" bbox="144.000,208.572,151.380,222.144" size="13.572">T</text>
@ -2194,20 +2194,20 @@
<textgroup bbox="144.000,235.452,504.108,490.944"> <textgroup bbox="144.000,235.452,504.108,490.944">
<textbox id="4" bbox="144.000,477.372,323.881,490.944" /> <textbox id="4" bbox="144.000,477.372,323.881,490.944" />
<textgroup bbox="144.000,235.452,504.108,464.124"> <textgroup bbox="144.000,235.452,504.108,464.124">
<textbox id="5" bbox="180.000,235.452,504.108,464.124" />
<textgroup bbox="144.000,248.400,152.004,462.480"> <textgroup bbox="144.000,248.400,152.004,462.480">
<textgroup bbox="144.000,329.040,152.004,462.480"> <textgroup bbox="144.000,329.040,152.004,462.480">
<textgroup bbox="144.000,369.360,152.004,462.480"> <textgroup bbox="144.000,369.360,152.004,462.480">
<textgroup bbox="144.000,409.680,152.004,462.480"> <textgroup bbox="144.000,409.680,152.004,462.480">
<textbox id="6" bbox="144.000,450.000,152.004,462.480" /> <textbox id="5" bbox="144.000,450.000,152.004,462.480" />
<textbox id="7" bbox="144.000,409.680,152.004,422.160" /> <textbox id="6" bbox="144.000,409.680,152.004,422.160" />
</textgroup> </textgroup>
<textbox id="8" bbox="144.000,369.360,152.004,381.840" /> <textbox id="7" bbox="144.000,369.360,152.004,381.840" />
</textgroup> </textgroup>
<textbox id="9" bbox="144.000,329.040,152.004,341.520" /> <textbox id="8" bbox="144.000,329.040,152.004,341.520" />
</textgroup> </textgroup>
<textbox id="10" bbox="144.000,248.400,152.004,260.880" /> <textbox id="9" bbox="144.000,248.400,152.004,260.880" />
</textgroup> </textgroup>
<textbox id="10" bbox="180.000,235.452,504.108,464.124" />
</textgroup> </textgroup>
</textgroup> </textgroup>
<textbox id="11" bbox="108.000,168.360,504.108,222.144" /> <textbox id="11" bbox="108.000,168.360,504.108,222.144" />

File diff suppressed because it is too large Load Diff

View File

@ -6,7 +6,7 @@ OMB No. 1545-0074
Identifying number (see page 8) Identifying number (see page 8)
I I
Check if: Check if:
@ -18,11 +18,11 @@ Type of entry visa (see page 8)
7 a 7a
Yourself Yourself
7 b 7b
Spouse Spouse
@ -45,10 +45,8 @@ on 7a and 7b
No. of children on No. of children on
7c who: 7c who:
lived with you ● lived with you
● did not live with
● did not live with
you due to divorce you due to divorce
or separation or separation
@ -60,14 +58,13 @@ on lines above
8 8
9 a 9a
A A
1040NR 1040NR
Form Form
Department of the Treasury Department of the Treasury
beginning beginning
Internal Revenue Service Internal Revenue Service
@ -92,8 +89,8 @@ Country 䊳
Of what country were you a citizen or national during the tax year? 䊳 Of what country were you a citizen or national during the tax year? 䊳
Give address outside the United States to which you want any
Give address in the country where you are a permanent resident. Give address in the country where you are a permanent resident.
Give address outside the United States to which you want any
refund check mailed. If same as above, write “Same.” refund check mailed. If same as above, write “Same.”
If same as above, write “Same.” If same as above, write “Same.”
@ -103,12 +100,12 @@ If same as above, write “Same.”
.epyt ro tnirp esaelP Please print or type.
.dlehhtiw saw xat fi R-9901 )s(mroF hcatta oslA Also attach Form(s) 1099-R if tax was withheld.
.ereh 2-W smroF hcattA Attach Forms W-2 here.
@ -128,6 +125,10 @@ child for child tax
credit (see page 9) credit (see page 9)
Filing Status and Exemptions for Individuals (see page 8) Filing Status and Exemptions for Individuals (see page 8)
@ -160,14 +161,10 @@ Qualifying widow(er) with dependent child (see page 9)
Caution: Do not check box 7a if your parent (or someone else) can claim you as a dependent. Caution: Do not check box 7a if your parent (or someone else) can claim you as a dependent.
Do not check box 7b if your spouse had any U.S. gross income. Do not check box 7b if your spouse had any U.S. gross income.
7 c 7c
Dependents: (see page 9) Dependents: (see page 9)
(1) First name (1) First name
@ -212,25 +209,22 @@ identifying number
10a 10a
11
1 1 12
1 2 13
1 3 14
1 4 15
1 5
16b 16b
17b 17b
18
19
20
21
1 8 23
1 9
2 0
2 1
2 3 34
35
3 4
3 5
d d
@ -245,23 +239,7 @@ d
9b
9 b
8
9 a
b
10a
b
Total number of exemptions claimed Total number of exemptions claimed
@ -273,22 +251,21 @@ Tax-exempt interest. Do not include on line 9a
Ordinary dividends Ordinary dividends
10b 10b
Qualified dividends (see page 11) Qualified dividends (see page 11)
1 1 11
Taxable refunds, credits, or offsets of state and local income taxes (see page 11) Taxable refunds, credits, or offsets of state and local income taxes (see page 11)
1 2 12
Scholarship and fellowship grants. Attach Form(s) 1042-S or required statement (see page 11) Scholarship and fellowship grants. Attach Form(s) 1042-S or required statement (see page 11)
1 3 13
Business income or (loss). Attach Schedule C or C-EZ (Form 1040) Business income or (loss). Attach Schedule C or C-EZ (Form 1040)
1 4 14
Capital gain or (loss). Attach Schedule D (Form 1040) if required. If not required, check here Capital gain or (loss). Attach Schedule D (Form 1040) if required. If not required, check here
1 5 15
Other gains or (losses). Attach Form 4797 Other gains or (losses). Attach Form 4797
16a 16a
16b 16b
@ -296,8 +273,6 @@ Other gains or (losses). Attach Form 4797
Taxable amount (see page 12) Taxable amount (see page 12)
IRA distributions IRA distributions
17a 17a
@ -308,77 +283,88 @@ Pensions and annuities
18
1 8
Rental real estate, royalties, partnerships, trusts, etc. Attach Schedule E (Form 1040) Rental real estate, royalties, partnerships, trusts, etc. Attach Schedule E (Form 1040)
1 9 19
Farm income or (loss). Attach Schedule F (Form 1040) Farm income or (loss). Attach Schedule F (Form 1040)
2 0 20
Unemployment compensation Unemployment compensation
2 1 21
Other income. List type and amount (see page 15) Other income. List type and amount (see page 15)
2 2 22
2 2 22
Total income exempt by a treaty from page 5, Item M Total income exempt by a treaty from page 5, Item M
2 3 23
Add lines 8, 9a, 10a, 1115, 16b, and 17b21. This is your total effectively connected income Add lines 8, 9a, 10a, 1115, 16b, and 17b21. This is your total effectively connected income
2 4
2 4 24
24
Educator expenses (see page 15) Educator expenses (see page 15)
2 5 25
2 5 25
Health savings account deduction. Attach Form 8889 Health savings account deduction. Attach Form 8889
2 6 26
2 6 26
Moving expenses. Attach Form 3903 Moving expenses. Attach Form 3903
2 7 27
2 7 27
Self-employed SEP, SIMPLE, and qualified plans Self-employed SEP, SIMPLE, and qualified plans
2 8 28
2 8 28
Self-employed health insurance deduction (see page 16) Self-employed health insurance deduction (see page 16)
2 9 29
2 9 29
Penalty on early withdrawal of savings Penalty on early withdrawal of savings
3 0 30
3 0 30
Scholarship and fellowship grants excluded Scholarship and fellowship grants excluded
3 1 31
IRA deduction (see page 16) IRA deduction (see page 16)
3 1 31
3 2 32
3 2 32
Student loan interest deduction (see page 16) Student loan interest deduction (see page 16)
3 3 33
3 3 33
Domestic production activities deduction. Attach Form 8903 Domestic production activities deduction. Attach Form 8903
3 4 34
Add lines 24 through 33 Add lines 24 through 33
3 5 35
Subtract line 34 from line 23. Enter here and on line 36. This is your adjusted gross income Subtract line 34 from line 23. Enter here and on line 36. This is your adjusted gross income 䊳
For Disclosure, Privacy Act, and Paperwork Reduction Act Notice, see page 32. For Disclosure, Privacy Act, and Paperwork Reduction Act Notice, see page 32.
Cat. No. 11364D Cat. No. 11364D
ssenisuB/edarT .S.U htiW detcennoC ylevitceffE emocnI Income Effectively Connected With U.S. Trade/Business
Enclose, but do not attach, any payment.
8
9a
b
10a
b
Adjusted Gross Income
.tnemyap yna ,hcatta ton od tub ,esolcnE
emocnI ssorG detsujdA

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,19 +1,11 @@
Leadpct: 0% Pt. size: 9.5 ❏ Draft
Userid: ________ DTD INSTR04
PAGER/SGML PAGER/SGML
Page 1 of 48
________
Leadpct: 0%
DTD INSTR04
Pt. size: 9.5 ❏ Draft
Userid:
(Init. & date) (Init. & date)
Fileid: Fileid:
D:\USERS\8fllb\documents\epicfiles\2007Instructions1040NR.sgm D:\USERS\8fllb\documents\epicfiles\2007Instructions1040NR.sgm
Page 1 of 48 Instructions for Form 1040NR
7:48 - 6-DEC-2007 7:48 - 6-DEC-2007
Instructions for Form 1040NR
❏ Ok to Print ❏ Ok to Print
@ -27,6 +19,122 @@ U.S. Nonresident Alien Income Tax Return
Department of the Treasury Department of the Treasury
Internal Revenue Service Internal Revenue Service
use a different address this year. See
Section references are to the Internal
Where To File on page 4.
Revenue Code unless otherwise noted.
General Instructions deduction. The deduction rate for
Domestic production activities
2007 is increased to 6%.
Whats New for 2007
Unreported social security and
Medicare tax on wages.
If you are
Tax benefits extended. The following
an employee and your employer did not
tax benefits were extended through
withhold social security and Medicare
2007.
• Deduction for educator expenses in
tax, see Form 8919 to figure and report
this tax.
figuring adjusted gross income.
• District of Columbia first-time
Refundable credit for prior-year
minimum tax.
If you have an unused
homebuyer credit.
minimum tax credit carryforward from
Alternative minimum tax (AMT)
2004, see Form 8801 to find if you can
exemption amount decreased. The
take this credit.
AMT exemption amount is decreased to
Health savings account (HSA)
$33,750 ($45,000 if a qualifying
funding distributions. You may be
widow(er); $22,500 if married filing
able to elect to exclude from income a
separately).
distribution made from your IRA to your
At the time these instructions
HSA. See the instructions for lines 16a
!
went to print, Congress was
and 16b beginning on page 12.
considering legislation that
CAUTION
New recordkeeping requirements for
would increase the amounts above. To
contributions of money.
For
find out if this legislation was enacted,
charitable contributions of money,
and for more details, see the
regardless of the amount, you must
Instructions for Form 6251.
maintain as a record of the contribution
IRA deduction expanded.
If you were
a bank record (such as a cancelled
covered by a retirement plan, you may
check) or a written record from the
be able to take an IRA deduction if your
charity. The written record must include
2007 modified adjusted gross income
the name of the charity, date, and
(AGI) is less than $62,000 ($103,000 if
amount of the contribution. See Gifts to
a qualifying widow(er)).
U.S. Charities that begins on page 26.
You may be able to deduct up to an
Exemption for housing a person
additional $3,000 if you were a
displaced by Hurricane Katrina
participant in a 401(k) plan and your
expires. The additional exemption
employer was in bankruptcy in an
amount for housing a person displaced
earlier year.
by Hurricane Katrina does not apply for
2007 or later years.
Standard mileage rates. The 2007
Telephone excise tax credit.
rate for business use of your vehicle is
This
481/2 cents a mile. The 2007 rate for
credit was available only on your 2006
use of your vehicle to move is 20 cents
return. If you filed but did not request it
a mile. The special rate for charitable
on your 2006 return, file Form 1040X
use of your vehicle to provide relief
using a simplified procedure explained
related to Hurricane Katrina has
in its instructions to amend your 2006
expired.
return. If you were not required to file a
2006 return, see the 2006 Form
Elective salary deferrals. The
1040EZ-T.
maximum amount you can defer under
all plans is generally limited to $15,500
Whats New for 2008
($10,500 if you only have SIMPLE
plans; $18,500 for section 403(b) plans
IRA deduction expanded. You may
if you qualify for the 15-year rule). See
be able to deduct up to $5,000 ($6,000
the instructions for line 8 on page 10.
if age 50 or older at the end of the
Mailing your return.
If you are filing
year). You may be able to take an IRA
deduction if you were covered by a
the return for an estate or trust, you will
Cat. No. 11368V
retirement plan and your 2008 modified retirement plan and your 2008 modified
AGI is less than $63,000 ($105,000) if a AGI is less than $63,000 ($105,000) if a
qualifying widow(er)). qualifying widow(er)).
@ -71,8 +179,7 @@ investment income on a parents return
and the special rule for when a child and the special rule for when a child
must file Form 6251 will also apply to must file Form 6251 will also apply to
the children listed above. the children listed above.
Expiring tax benefits. Expiring tax benefits. The following
The following
benefits are scheduled to expire and benefits are scheduled to expire and
will not apply for 2008. will not apply for 2008.
• Deduction for educator expenses in • Deduction for educator expenses in
@ -85,130 +192,4 @@ property.
homebuyer credit (for homes homebuyer credit (for homes
purchased after 2007). purchased after 2007).
Section references are to the Internal
Revenue Code unless otherwise noted.
General Instructions
Whats New for 2007
Tax benefits extended.
The following
tax benefits were extended through
2007.
• Deduction for educator expenses in
figuring adjusted gross income.
• District of Columbia first-time
homebuyer credit.
Alternative minimum tax (AMT)
exemption amount decreased.
The
AMT exemption amount is decreased to
$33,750 ($45,000 if a qualifying
widow(er); $22,500 if married filing
separately).
At the time these instructions
!
went to print, Congress was
considering legislation that
CAUTION
would increase the amounts above. To
find out if this legislation was enacted,
and for more details, see the
Instructions for Form 6251.
IRA deduction expanded.
If you were
covered by a retirement plan, you may
be able to take an IRA deduction if your
2007 modified adjusted gross income
(AGI) is less than $62,000 ($103,000 if
a qualifying widow(er)).
You may be able to deduct up to an
additional $3,000 if you were a
participant in a 401(k) plan and your
employer was in bankruptcy in an
earlier year.
Standard mileage rates.
The 2007
rate for business use of your vehicle is
481/2 cents a mile. The 2007 rate for
use of your vehicle to move is 20 cents
a mile. The special rate for charitable
use of your vehicle to provide relief
related to Hurricane Katrina has
expired.
Elective salary deferrals.
The
maximum amount you can defer under
all plans is generally limited to $15,500
($10,500 if you only have SIMPLE
plans; $18,500 for section 403(b) plans
if you qualify for the 15-year rule). See
the instructions for line 8 on page 10.
Mailing your return.
If you are filing
the return for an estate or trust, you will
use a different address this year. See
Where To File on page 4.
Domestic production activities
deduction.
The deduction rate for
2007 is increased to 6%.
Unreported social security and
Medicare tax on wages.
If you are
an employee and your employer did not
withhold social security and Medicare
tax, see Form 8919 to figure and report
this tax.
Refundable credit for prior-year
minimum tax.
If you have an unused
minimum tax credit carryforward from
2004, see Form 8801 to find if you can
take this credit.
Health savings account (HSA)
funding distributions.
You may be
able to elect to exclude from income a
distribution made from your IRA to your
HSA. See the instructions for lines 16a
and 16b beginning on page 12.
New recordkeeping requirements for
contributions of money.
For
charitable contributions of money,
regardless of the amount, you must
maintain as a record of the contribution
a bank record (such as a cancelled
check) or a written record from the
charity. The written record must include
the name of the charity, date, and
amount of the contribution. See Gifts to
U.S. Charities that begins on page 26.
Exemption for housing a person
displaced by Hurricane Katrina
expires.
The additional exemption
amount for housing a person displaced
by Hurricane Katrina does not apply for
2007 or later years.
Telephone excise tax credit.
This
credit was available only on your 2006
return. If you filed but did not request it
on your 2006 return, file Form 1040X
using a simplified procedure explained
in its instructions to amend your 2006
return. If you were not required to file a
2006 return, see the 2006 Form
1040EZ-T.
Whats New for 2008
IRA deduction expanded.
You may
be able to deduct up to $5,000 ($6,000
if age 50 or older at the end of the
year). You may be able to take an IRA
deduction if you were covered by a
Cat. No. 11368V

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,12 +1,12 @@
第 ›Ÿ˜ž 号 平成 ™— 年 月 ™œ 日 金曜日
平成 ™— 年 月 ™œ 日 金曜日 第 ›Ÿ˜ž 号
政令第百四十九号 政令第百四十九号
道路交通法施行令の一部を改正する政令 道路交通法施行令の一部を改正する政令

View File

@ -1,57 +1,22 @@
<?xml version="1.0" encoding="utf-8" ?> <?xml version="1.0" encoding="utf-8" ?>
<pages> <pages>
<page id="1" bbox="0.000,0.000,595.000,842.000" rotate="0"> <page id="1" bbox="0.000,0.000,595.000,842.000" rotate="0">
<textbox id="0" bbox="392.500,787.804,457.500,800.144"> <textbox id="0" bbox="87.500,787.804,242.500,800.144">
<textline bbox="392.500,787.804,457.500,800.144">
<text font="Ryumin-Light" bbox="392.500,787.804,402.500,800.144" size="12.340">第</text>
<text> </text>
<text font="GMALPM+DFHSMincho-W3G014" bbox="405.000,789.064,415.000,798.234" size="10.000"></text>
<text font="GMALPM+DFHSMincho-W3G014" bbox="415.000,789.064,425.000,798.234" size="10.000">Ÿ</text>
<text font="GMALPM+DFHSMincho-W3G014" bbox="425.000,789.064,435.000,798.234" size="10.000">˜</text>
<text font="GMALPM+DFHSMincho-W3G014" bbox="435.000,789.064,445.000,798.234" size="10.000">ž</text>
<text> </text>
<text font="Ryumin-Light" bbox="447.500,787.804,457.500,800.144" size="12.340">号</text>
<text>
</text>
</textline>
</textbox>
<textbox id="1" bbox="527.500,789.064,537.500,798.234">
<textline bbox="527.500,789.064,537.500,798.234">
<text font="GMALPM+DFHSMincho-W3G014" bbox="527.500,789.064,537.500,798.234" size="10.000"></text>
<text>
</text>
</textline>
</textbox>
<textbox id="2" bbox="267.500,787.572,279.500,802.380">
<textline bbox="267.500,787.572,279.500,802.380">
<text font="Ryumin-Light" bbox="267.500,787.572,279.500,802.380" size="14.808">官</text>
<text>
</text>
</textline>
</textbox>
<textbox id="3" bbox="315.500,787.572,327.500,802.380">
<textline bbox="315.500,787.572,327.500,802.380">
<text font="Ryumin-Light" bbox="315.500,787.572,327.500,802.380" size="14.808">報</text>
<text>
</text>
</textline>
</textbox>
<textbox id="4" bbox="87.500,787.804,242.500,800.144">
<textline bbox="87.500,787.804,242.500,800.144"> <textline bbox="87.500,787.804,242.500,800.144">
<text font="Ryumin-Light" bbox="87.500,787.804,97.500,800.144" size="12.340">平</text> <text font="Ryumin-Light" bbox="87.500,787.804,97.500,800.144" size="12.340">平</text>
<text font="Ryumin-Light" bbox="97.500,787.804,107.500,800.144" size="12.340">成</text> <text font="Ryumin-Light" bbox="97.500,787.804,107.500,800.144" size="12.340">成</text>
<text> </text> <text> </text>
<text font="GMALPM+DFHSMincho-W3G014" bbox="110.000,789.064,120.000,798.234" size="10.000">™</text> <text font="GMALPM+DFHSMincho-W3G014" bbox="110.000,789.064,120.000,798.234" size="9.170">™</text>
<text font="GMALPM+DFHSMincho-W3G014" bbox="120.000,789.064,130.000,798.234" size="10.000">—</text> <text font="GMALPM+DFHSMincho-W3G014" bbox="120.000,789.064,130.000,798.234" size="9.170">—</text>
<text> </text> <text> </text>
<text font="Ryumin-Light" bbox="132.500,787.804,142.500,800.144" size="12.340">年</text> <text font="Ryumin-Light" bbox="132.500,787.804,142.500,800.144" size="12.340">年</text>
<text> </text> <text> </text>
<text font="GMALPM+DFHSMincho-W3G014" bbox="145.000,789.064,155.000,798.234" size="10.000"></text> <text font="GMALPM+DFHSMincho-W3G014" bbox="145.000,789.064,155.000,798.234" size="9.170"></text>
<text> </text> <text> </text>
<text font="Ryumin-Light" bbox="157.500,787.804,167.500,800.144" size="12.340">月</text> <text font="Ryumin-Light" bbox="157.500,787.804,167.500,800.144" size="12.340">月</text>
<text> </text> <text> </text>
<text font="GMALPM+DFHSMincho-W3G014" bbox="170.000,789.064,180.000,798.234" size="10.000">™</text> <text font="GMALPM+DFHSMincho-W3G014" bbox="170.000,789.064,180.000,798.234" size="9.170">™</text>
<text font="GMALPM+DFHSMincho-W3G014" bbox="180.000,789.064,190.000,798.234" size="10.000">œ</text> <text font="GMALPM+DFHSMincho-W3G014" bbox="180.000,789.064,190.000,798.234" size="9.170">œ</text>
<text> </text> <text> </text>
<text font="Ryumin-Light" bbox="192.500,787.804,202.500,800.144" size="12.340">日</text> <text font="Ryumin-Light" bbox="192.500,787.804,202.500,800.144" size="12.340">日</text>
<text> </text> <text> </text>
@ -62,6 +27,41 @@
</text> </text>
</textline> </textline>
</textbox> </textbox>
<textbox id="1" bbox="267.500,787.572,279.500,802.380">
<textline bbox="267.500,787.572,279.500,802.380">
<text font="Ryumin-Light" bbox="267.500,787.572,279.500,802.380" size="14.808">官</text>
<text>
</text>
</textline>
</textbox>
<textbox id="2" bbox="315.500,787.572,327.500,802.380">
<textline bbox="315.500,787.572,327.500,802.380">
<text font="Ryumin-Light" bbox="315.500,787.572,327.500,802.380" size="14.808">報</text>
<text>
</text>
</textline>
</textbox>
<textbox id="3" bbox="392.500,787.804,457.500,800.144">
<textline bbox="392.500,787.804,457.500,800.144">
<text font="Ryumin-Light" bbox="392.500,787.804,402.500,800.144" size="12.340">第</text>
<text> </text>
<text font="GMALPM+DFHSMincho-W3G014" bbox="405.000,789.064,415.000,798.234" size="9.170"></text>
<text font="GMALPM+DFHSMincho-W3G014" bbox="415.000,789.064,425.000,798.234" size="9.170">Ÿ</text>
<text font="GMALPM+DFHSMincho-W3G014" bbox="425.000,789.064,435.000,798.234" size="9.170">˜</text>
<text font="GMALPM+DFHSMincho-W3G014" bbox="435.000,789.064,445.000,798.234" size="9.170">ž</text>
<text> </text>
<text font="Ryumin-Light" bbox="447.500,787.804,457.500,800.144" size="12.340">号</text>
<text>
</text>
</textline>
</textbox>
<textbox id="4" bbox="527.500,789.064,537.500,798.234">
<textline bbox="527.500,789.064,537.500,798.234">
<text font="GMALPM+DFHSMincho-W3G014" bbox="527.500,789.064,537.500,798.234" size="9.170"></text>
<text>
</text>
</textline>
</textbox>
<textbox id="5" bbox="420.474,420.036,548.988,781.908" wmode="vertical"> <textbox id="5" bbox="420.474,420.036,548.988,781.908" wmode="vertical">
<textline bbox="540.988,715.796,548.988,781.492"> <textline bbox="540.988,715.796,548.988,781.492">
<text font="GothicBBB-Medium" bbox="540.988,771.796,548.988,781.492" size="9.696">政</text> <text font="GothicBBB-Medium" bbox="540.988,771.796,548.988,781.492" size="9.696">政</text>
@ -3136,16 +3136,16 @@
<layout> <layout>
<textgroup bbox="45.212,52.036,549.776,802.380"> <textgroup bbox="45.212,52.036,549.776,802.380">
<textgroup bbox="87.500,787.572,537.500,802.380"> <textgroup bbox="87.500,787.572,537.500,802.380">
<textgroup bbox="392.500,787.804,537.500,800.144">
<textbox id="0" bbox="392.500,787.804,457.500,800.144" />
<textbox id="1" bbox="527.500,789.064,537.500,798.234" />
</textgroup>
<textgroup bbox="87.500,787.572,327.500,802.380"> <textgroup bbox="87.500,787.572,327.500,802.380">
<textbox id="0" bbox="87.500,787.804,242.500,800.144" />
<textgroup bbox="267.500,787.572,327.500,802.380"> <textgroup bbox="267.500,787.572,327.500,802.380">
<textbox id="2" bbox="267.500,787.572,279.500,802.380" /> <textbox id="1" bbox="267.500,787.572,279.500,802.380" />
<textbox id="3" bbox="315.500,787.572,327.500,802.380" /> <textbox id="2" bbox="315.500,787.572,327.500,802.380" />
</textgroup> </textgroup>
<textbox id="4" bbox="87.500,787.804,242.500,800.144" /> </textgroup>
<textgroup bbox="392.500,787.804,537.500,800.144">
<textbox id="3" bbox="392.500,787.804,457.500,800.144" />
<textbox id="4" bbox="527.500,789.064,537.500,798.234" />
</textgroup> </textgroup>
</textgroup> </textgroup>
<textgroup bbox="45.212,52.036,549.776,781.960"> <textgroup bbox="45.212,52.036,549.776,781.960">

File diff suppressed because it is too large Load Diff

View File

@ -28,46 +28,37 @@ good results.
1 Background 1 Background
Every day, a large number of news articles are cre- Every day, a large number of news articles are cre-
ated and reported, many of which are unique. ated and reported, many of which are unique. But
But
certain types of events, such as hurricanes or mur- certain types of events, such as hurricanes or mur-
ders, are reported again and again throughout a year. ders, are reported again and again throughout a year.
The goal of Information Extraction, or IE, is to re- The goal of Information Extraction, or IE, is to re-
trieve a certain type of news event from past articles trieve a certain type of news event from past articles
and present the events as a table whose columns are and present the events as a table whose columns are
filled with a name of a person or company, accord- filled with a name of a person or company, accord-
However, existing IE ing to its role in the event. However, existing IE
ing to its role in the event. techniques require a lot of human labor. First, you
techniques require a lot of human labor.
First, you
have to specify the type of information you want and have to specify the type of information you want and
collect articles that include this information. collect articles that include this information. Then,
Then,
you have to analyze the articles and manually craft you have to analyze the articles and manually craft
a set of patterns to capture these events. Most exist- a set of patterns to capture these events. Most exist-
ing IE research focuses on reducing this burden by ing IE research focuses on reducing this burden by
helping people create such patterns. helping people create such patterns. But each time
But each time
you want to extract a different kind of information, you want to extract a different kind of information,
you need to repeat the whole process: specify arti- you need to repeat the whole process: specify arti-
cles and adjust its patterns, either manually or semi- cles and adjust its patterns, either manually or semi-
automatically. automatically. There is a bit of a dangerous pitfall
There is a bit of a dangerous pitfall
here. First, it is hard to estimate how good the sys- here. First, it is hard to estimate how good the sys-
tem can be after months of work. Furthermore, you tem can be after months of work. Furthermore, you
might not know if the task is even doable in the first might not know if the task is even doable in the first
place. place. Knowing what kind of information is easily
Knowing what kind of information is easily
obtained in advance would help reduce this risk. obtained in advance would help reduce this risk.
An IE task can be defined as finding a relation An IE task can be defined as finding a relation
among several entities involved in a certain type of among several entities involved in a certain type of
event. event. For example, in the MUC-6 management
For example, in the MUC-6 management
succession scenario, one seeks a relation between succession scenario, one seeks a relation between
COMPANY, PERSON and POST involved with hir- COMPANY, PERSON and POST involved with hir-
ing/firing events. ing/firing events. For each row of an extracted ta-
For each row of an extracted ta-
ble, you can always read it as “COMPANY hired ble, you can always read it as “COMPANY hired
(or fired) PERSON for POST.” The relation between (or fired) PERSON for POST.” The relation between
these entities is retained throughout the table. There these entities is retained throughout the table. There

View File

@ -871,7 +871,7 @@
<text> <text>
</text> </text>
</textline> </textline>
<textline bbox="72.000,336.427,276.805,349.577"> <textline bbox="72.000,336.427,298.882,349.577">
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="72.000,336.427,76.845,349.577" size="13.150">a</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="72.000,336.427,76.845,349.577" size="13.150">a</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="76.845,336.427,79.879,349.577" size="13.150">t</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="76.845,336.427,79.879,349.577" size="13.150">t</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="79.879,336.427,84.725,349.577" size="13.150">e</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="79.879,336.427,84.725,349.577" size="13.150">e</text>
@ -916,10 +916,7 @@
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="263.775,336.427,269.232,349.577" size="13.150">u</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="263.775,336.427,269.232,349.577" size="13.150">u</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="269.232,336.427,274.077,349.577" size="13.150">e</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="269.232,336.427,274.077,349.577" size="13.150">e</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="274.077,336.427,276.805,349.577" size="13.150">.</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="274.077,336.427,276.805,349.577" size="13.150">.</text>
<text> <text> </text>
</text>
</textline>
<textline bbox="283.113,336.427,298.882,349.577">
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="283.113,336.427,290.392,349.577" size="13.150">B</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="283.113,336.427,290.392,349.577" size="13.150">B</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="290.392,336.427,295.849,349.577" size="13.150">u</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="290.392,336.427,295.849,349.577" size="13.150">u</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="295.849,336.427,298.882,349.577" size="13.150">t</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="295.849,336.427,298.882,349.577" size="13.150">t</text>
@ -1260,31 +1257,7 @@
<text> <text>
</text> </text>
</textline> </textline>
<textline bbox="203.557,241.580,298.882,254.730"> <textline bbox="72.000,241.580,298.882,254.730">
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="203.557,241.580,211.436,254.730" size="13.150">H</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="211.436,241.580,216.893,254.730" size="13.150">o</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="216.631,241.580,224.510,254.730" size="13.150">w</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="224.510,241.580,229.355,254.730" size="13.150">e</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="229.083,241.580,234.539,254.730" size="13.150">v</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="234.375,241.580,239.221,254.730" size="13.150">e</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="239.221,241.580,242.855,254.730" size="13.150">r</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="242.418,241.580,245.147,254.730" size="13.150">,</text>
<text> </text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="249.861,241.580,254.706,254.730" size="13.150">e</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="254.554,241.580,260.010,254.730" size="13.150">x</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="260.010,241.580,263.044,254.730" size="13.150">i</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="263.044,241.580,267.289,254.730" size="13.150">s</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="267.289,241.580,270.323,254.730" size="13.150">t</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="270.323,241.580,273.357,254.730" size="13.150">i</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="273.357,241.580,278.813,254.730" size="13.150">n</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="278.813,241.580,284.270,254.730" size="13.150">g</text>
<text> </text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="288.580,241.580,292.215,254.730" size="13.150">I</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="292.215,241.580,298.882,254.730" size="13.150">E</text>
<text>
</text>
</textline>
<textline bbox="72.000,241.580,195.405,254.730">
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="72.000,241.580,75.034,254.730" size="13.150">i</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="72.000,241.580,75.034,254.730" size="13.150">i</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="75.034,241.580,80.490,254.730" size="13.150">n</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="75.034,241.580,80.490,254.730" size="13.150">n</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="80.490,241.580,85.947,254.730" size="13.150">g</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="80.490,241.580,85.947,254.730" size="13.150">g</text>
@ -1314,10 +1287,31 @@
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="184.186,241.580,189.643,254.730" size="13.150">n</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="184.186,241.580,189.643,254.730" size="13.150">n</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="189.643,241.580,192.677,254.730" size="13.150">t</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="189.643,241.580,192.677,254.730" size="13.150">t</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="192.677,241.580,195.405,254.730" size="13.150">.</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="192.677,241.580,195.405,254.730" size="13.150">.</text>
<text> </text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="203.557,241.580,211.436,254.730" size="13.150">H</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="211.436,241.580,216.893,254.730" size="13.150">o</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="216.631,241.580,224.510,254.730" size="13.150">w</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="224.510,241.580,229.355,254.730" size="13.150">e</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="229.083,241.580,234.539,254.730" size="13.150">v</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="234.375,241.580,239.221,254.730" size="13.150">e</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="239.221,241.580,242.855,254.730" size="13.150">r</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="242.418,241.580,245.147,254.730" size="13.150">,</text>
<text> </text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="249.861,241.580,254.706,254.730" size="13.150">e</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="254.554,241.580,260.010,254.730" size="13.150">x</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="260.010,241.580,263.044,254.730" size="13.150">i</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="263.044,241.580,267.289,254.730" size="13.150">s</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="267.289,241.580,270.323,254.730" size="13.150">t</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="270.323,241.580,273.357,254.730" size="13.150">i</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="273.357,241.580,278.813,254.730" size="13.150">n</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="278.813,241.580,284.270,254.730" size="13.150">g</text>
<text> </text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="288.580,241.580,292.215,254.730" size="13.150">I</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="292.215,241.580,298.882,254.730" size="13.150">E</text>
<text> <text>
</text> </text>
</textline> </textline>
<textline bbox="72.000,228.026,250.352,241.177"> <textline bbox="72.000,228.026,298.882,241.177">
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="72.000,228.026,75.034,241.177" size="13.150">t</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="72.000,228.026,75.034,241.177" size="13.150">t</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="75.034,228.026,79.879,241.177" size="13.150">e</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="75.034,228.026,79.879,241.177" size="13.150">e</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="79.879,228.026,84.725,241.177" size="13.150">c</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="79.879,228.026,84.725,241.177" size="13.150">c</text>
@ -1358,10 +1352,7 @@
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="239.123,228.026,244.579,241.177" size="13.150">o</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="239.123,228.026,244.579,241.177" size="13.150">o</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="244.579,228.026,248.213,241.177" size="13.150">r</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="244.579,228.026,248.213,241.177" size="13.150">r</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="247.624,228.026,250.352,241.177" size="13.150">.</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="247.624,228.026,250.352,241.177" size="13.150">.</text>
<text> <text> </text>
</text>
</textline>
<textline bbox="256.060,228.026,298.882,241.177">
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="256.060,228.026,262.127,241.177" size="13.150">F</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="256.060,228.026,262.127,241.177" size="13.150">F</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="262.127,228.026,265.161,241.177" size="13.150">i</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="262.127,228.026,265.161,241.177" size="13.150">i</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="265.161,228.026,268.795,241.177" size="13.150">r</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="265.161,228.026,268.795,241.177" size="13.150">r</text>
@ -1431,7 +1422,7 @@
<text> <text>
</text> </text>
</textline> </textline>
<textline bbox="72.000,200.929,268.249,214.079"> <textline bbox="72.000,200.929,298.882,214.079">
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="72.000,200.929,76.845,214.079" size="13.150">c</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="72.000,200.929,76.845,214.079" size="13.150">c</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="76.845,200.929,82.302,214.079" size="13.150">o</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="76.845,200.929,82.302,214.079" size="13.150">o</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="82.302,200.929,85.336,214.079" size="13.150">l</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="82.302,200.929,85.336,214.079" size="13.150">l</text>
@ -1479,10 +1470,7 @@
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="254.608,200.929,260.065,214.079" size="13.150">o</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="254.608,200.929,260.065,214.079" size="13.150">o</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="260.065,200.929,265.521,214.079" size="13.150">n</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="260.065,200.929,265.521,214.079" size="13.150">n</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="265.521,200.929,268.249,214.079" size="13.150">.</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="265.521,200.929,268.249,214.079" size="13.150">.</text>
<text> <text> </text>
</text>
</textline>
<textline bbox="273.728,200.929,298.882,214.079">
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="273.728,200.929,280.396,214.079" size="13.150">T</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="273.728,200.929,280.396,214.079" size="13.150">T</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="280.396,200.929,285.852,214.079" size="13.150">h</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="280.396,200.929,285.852,214.079" size="13.150">h</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="285.852,200.929,290.698,214.079" size="13.150">e</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="285.852,200.929,290.698,214.079" size="13.150">e</text>
@ -1658,7 +1646,7 @@
<text> <text>
</text> </text>
</textline> </textline>
<textline bbox="72.000,146.734,231.091,159.884"> <textline bbox="72.000,146.734,298.882,159.884">
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="72.000,146.734,77.457,159.884" size="13.150">h</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="72.000,146.734,77.457,159.884" size="13.150">h</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="77.457,146.734,82.302,159.884" size="13.150">e</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="77.457,146.734,82.302,159.884" size="13.150">e</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="82.302,146.734,85.336,159.884" size="13.150">l</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="82.302,146.734,85.336,159.884" size="13.150">l</text>
@ -1695,10 +1683,7 @@
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="218.661,146.734,224.117,159.884" size="13.150">n</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="218.661,146.734,224.117,159.884" size="13.150">n</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="224.117,146.734,228.362,159.884" size="13.150">s</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="224.117,146.734,228.362,159.884" size="13.150">s</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="228.362,146.734,231.091,159.884" size="13.150">.</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="228.362,146.734,231.091,159.884" size="13.150">.</text>
<text> <text> </text>
</text>
</textline>
<textline bbox="236.743,146.734,298.882,159.884">
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="236.743,146.734,244.022,159.884" size="13.150">B</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="236.743,146.734,244.022,159.884" size="13.150">B</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="244.022,146.734,249.479,159.884" size="13.150">u</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="244.022,146.734,249.479,159.884" size="13.150">u</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="249.479,146.734,252.513,159.884" size="13.150">t</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="249.479,146.734,252.513,159.884" size="13.150">t</text>
@ -1886,7 +1871,7 @@
<text> <text>
</text> </text>
</textline> </textline>
<textline bbox="313.198,581.125,374.638,594.275"> <textline bbox="313.198,581.125,540.091,594.275">
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="313.198,581.125,318.043,594.275" size="13.150">a</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="313.198,581.125,318.043,594.275" size="13.150">a</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="318.043,581.125,323.499,594.275" size="13.150">u</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="318.043,581.125,323.499,594.275" size="13.150">u</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="323.499,581.125,326.533,594.275" size="13.150">t</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="323.499,581.125,326.533,594.275" size="13.150">t</text>
@ -1901,10 +1886,7 @@
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="364.118,581.125,367.152,594.275" size="13.150">l</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="364.118,581.125,367.152,594.275" size="13.150">l</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="367.152,581.125,372.608,594.275" size="13.150">y</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="367.152,581.125,372.608,594.275" size="13.150">y</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="371.910,581.125,374.638,594.275" size="13.150">.</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="371.910,581.125,374.638,594.275" size="13.150">.</text>
<text> <text> </text>
</text>
</textline>
<textline bbox="380.749,581.125,540.091,594.275">
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="380.749,581.125,387.417,594.275" size="13.150">T</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="380.749,581.125,387.417,594.275" size="13.150">T</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="387.417,581.125,392.874,594.275" size="13.150">h</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="387.417,581.125,392.874,594.275" size="13.150">h</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="392.874,581.125,397.719,594.275" size="13.150">e</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="392.874,581.125,397.719,594.275" size="13.150">e</text>
@ -2112,17 +2094,14 @@
<text> <text>
</text> </text>
</textline> </textline>
<textline bbox="313.198,526.930,338.952,540.080"> <textline bbox="313.198,526.930,540.080,540.080">
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="313.198,526.930,318.654,540.080" size="13.150">p</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="313.198,526.930,318.654,540.080" size="13.150">p</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="318.654,526.930,321.688,540.080" size="13.150">l</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="318.654,526.930,321.688,540.080" size="13.150">l</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="321.688,526.930,326.533,540.080" size="13.150">a</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="321.688,526.930,326.533,540.080" size="13.150">a</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="326.533,526.930,331.379,540.080" size="13.150">c</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="326.533,526.930,331.379,540.080" size="13.150">c</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="331.379,526.930,336.224,540.080" size="13.150">e</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="331.379,526.930,336.224,540.080" size="13.150">e</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="336.224,526.930,338.952,540.080" size="13.150">.</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="336.224,526.930,338.952,540.080" size="13.150">.</text>
<text> <text> </text>
</text>
</textline>
<textline bbox="344.682,526.930,540.080,540.080">
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="344.682,526.930,352.561,540.080" size="13.150">K</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="344.682,526.930,352.561,540.080" size="13.150">K</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="352.561,526.930,358.017,540.080" size="13.150">n</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="352.561,526.930,358.017,540.080" size="13.150">n</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="358.017,526.930,363.474,540.080" size="13.150">o</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="358.017,526.930,363.474,540.080" size="13.150">o</text>
@ -2325,17 +2304,14 @@
<text> <text>
</text> </text>
</textline> </textline>
<textline bbox="313.198,472.325,339.138,485.475"> <textline bbox="313.198,472.325,540.080,485.475">
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="313.198,472.325,318.043,485.475" size="13.150">e</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="313.198,472.325,318.043,485.475" size="13.150">e</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="317.781,472.325,323.238,485.475" size="13.150">v</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="317.781,472.325,323.238,485.475" size="13.150">v</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="323.074,472.325,327.919,485.475" size="13.150">e</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="323.074,472.325,327.919,485.475" size="13.150">e</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="327.919,472.325,333.376,485.475" size="13.150">n</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="327.919,472.325,333.376,485.475" size="13.150">n</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="333.376,472.325,336.410,485.475" size="13.150">t</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="333.376,472.325,336.410,485.475" size="13.150">t</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="336.410,472.325,339.138,485.475" size="13.150">.</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="336.410,472.325,339.138,485.475" size="13.150">.</text>
<text> <text> </text>
</text>
</textline>
<textline bbox="349.003,472.325,540.080,485.475">
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="349.003,472.325,355.071,485.475" size="13.150">F</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="349.003,472.325,355.071,485.475" size="13.150">F</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="354.918,472.325,360.375,485.475" size="13.150">o</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="354.918,472.325,360.375,485.475" size="13.150">o</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="360.375,472.325,364.009,485.475" size="13.150">r</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="360.375,472.325,364.009,485.475" size="13.150">r</text>
@ -2475,7 +2451,7 @@
<text> <text>
</text> </text>
</textline> </textline>
<textline bbox="313.198,431.673,387.854,444.824"> <textline bbox="313.198,431.673,540.080,444.824">
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="313.198,431.673,316.231,444.824" size="13.150">i</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="313.198,431.673,316.231,444.824" size="13.150">i</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="316.231,431.673,321.688,444.824" size="13.150">n</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="316.231,431.673,321.688,444.824" size="13.150">n</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="321.688,431.673,327.144,444.824" size="13.150">g</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="321.688,431.673,327.144,444.824" size="13.150">g</text>
@ -2493,10 +2469,7 @@
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="377.846,431.673,380.880,444.824" size="13.150">t</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="377.846,431.673,380.880,444.824" size="13.150">t</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="380.880,431.673,385.125,444.824" size="13.150">s</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="380.880,431.673,385.125,444.824" size="13.150">s</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="385.125,431.673,387.854,444.824" size="13.150">.</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="385.125,431.673,387.854,444.824" size="13.150">.</text>
<text> <text> </text>
</text>
</textline>
<textline bbox="394.576,431.673,540.080,444.824">
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="394.576,431.673,400.644,444.824" size="13.150">F</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="394.576,431.673,400.644,444.824" size="13.150">F</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="400.491,431.673,405.948,444.824" size="13.150">o</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="400.491,431.673,405.948,444.824" size="13.150">o</text>
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="405.948,431.673,409.582,444.824" size="13.150">r</text> <text font="QTLIUY+NimbusRomNo9L-Regu" bbox="405.948,431.673,409.582,444.824" size="13.150">r</text>

View File

@ -3,92 +3,13 @@
</head><body> </head><body>
<span style="position:absolute; border: gray 1px solid; left:0px; top:50px; width:800px; height:600px;"></span> <span style="position:absolute; border: gray 1px solid; left:0px; top:50px; width:800px; height:600px;"></span>
<div style="position:absolute; top:50px;"><a name="1">Page 1</a></div> <div style="position:absolute; top:50px;"><a name="1">Page 1</a></div>
<span style="position:absolute; border: cyan 1px solid; left:62px; top:126px; width:672px; height:157px;"></span> <div style="position:absolute; border: textbox 1px solid; writing-mode:lr-tb; left:62px; top:126px; width:672px; height:157px;"><span style="font-family: DAFPJF+HiraKakuPro-W6; font-size:60px">コンパラブルな新聞記事からの
<span style="position:absolute; border: magenta 1px solid; left:62px; top:126px; width:672px; height:85px;"></span> <br>固有表現の発見
<span style="position:absolute; left:62px; top:126px; font-size:85px;">コ</span> <br></span></div><div style="position:absolute; border: textbox 1px solid; writing-mode:lr-tb; left:263px; top:374px; width:468px; height:212px;"><span style="font-family: DAFPJF+HiraKakuPro-W6; font-size:45px">新山 祐介
<span style="position:absolute; left:110px; top:126px; font-size:85px;">ン</span> <br>関根 聡
<span style="position:absolute; left:158px; top:126px; font-size:85px;">パ</span> <br></span><span style="font-family: DAFPJF+HiraKakuPro-W6; font-size:35px">Computer Science Department
<span style="position:absolute; left:206px; top:126px; font-size:85px;">ラ</span> <br>New York University
<span style="position:absolute; left:254px; top:126px; font-size:85px;">ブ</span> <br></span></div><span style="position:absolute; border: black 1px solid; left:0px; top:50px; width:800px; height:600px;"></span>
<span style="position:absolute; left:302px; top:126px; font-size:85px;">ル</span>
<span style="position:absolute; left:350px; top:126px; font-size:85px;">な</span>
<span style="position:absolute; left:398px; top:126px; font-size:85px;">新</span>
<span style="position:absolute; left:446px; top:126px; font-size:85px;">聞</span>
<span style="position:absolute; left:494px; top:126px; font-size:85px;">記</span>
<span style="position:absolute; left:542px; top:126px; font-size:85px;">事</span>
<span style="position:absolute; left:590px; top:126px; font-size:85px;">か</span>
<span style="position:absolute; left:638px; top:126px; font-size:85px;">ら</span>
<span style="position:absolute; left:686px; top:126px; font-size:85px;">の</span>
<span style="position:absolute; border: magenta 1px solid; left:62px; top:198px; width:336px; height:85px;"></span>
<span style="position:absolute; left:62px; top:198px; font-size:85px;">固</span>
<span style="position:absolute; left:110px; top:198px; font-size:85px;">有</span>
<span style="position:absolute; left:158px; top:198px; font-size:85px;">表</span>
<span style="position:absolute; left:206px; top:198px; font-size:85px;">現</span>
<span style="position:absolute; left:254px; top:198px; font-size:85px;">の</span>
<span style="position:absolute; left:302px; top:198px; font-size:85px;">発</span>
<span style="position:absolute; left:350px; top:198px; font-size:85px;">見</span>
<span style="position:absolute; border: cyan 1px solid; left:263px; top:374px; width:468px; height:212px;"></span>
<span style="position:absolute; border: magenta 1px solid; left:576px; top:374px; width:155px; height:64px;"></span>
<span style="position:absolute; left:576px; top:374px; font-size:64px;">新</span>
<span style="position:absolute; left:612px; top:374px; font-size:64px;">山</span>
<span style="position:absolute; left:648px; top:374px; font-size:64px;"> </span>
<span style="position:absolute; left:660px; top:374px; font-size:64px;">祐</span>
<span style="position:absolute; left:696px; top:374px; font-size:64px;">介</span>
<span style="position:absolute; border: magenta 1px solid; left:612px; top:430px; width:119px; height:64px;"></span>
<span style="position:absolute; left:612px; top:430px; font-size:64px;">関</span>
<span style="position:absolute; left:648px; top:430px; font-size:64px;">根</span>
<span style="position:absolute; left:684px; top:430px; font-size:64px;"> </span>
<span style="position:absolute; left:696px; top:430px; font-size:64px;">聡</span>
<span style="position:absolute; border: magenta 1px solid; left:263px; top:493px; width:468px; height:50px;"></span>
<span style="position:absolute; left:263px; top:493px; font-size:50px;">C</span>
<span style="position:absolute; left:285px; top:493px; font-size:50px;">o</span>
<span style="position:absolute; left:304px; top:493px; font-size:50px;">m</span>
<span style="position:absolute; left:332px; top:493px; font-size:50px;">p</span>
<span style="position:absolute; left:352px; top:493px; font-size:50px;">u</span>
<span style="position:absolute; left:371px; top:493px; font-size:50px;">t</span>
<span style="position:absolute; left:383px; top:493px; font-size:50px;">e</span>
<span style="position:absolute; left:401px; top:493px; font-size:50px;">r</span>
<span style="position:absolute; left:415px; top:493px; font-size:50px;"> </span>
<span style="position:absolute; left:424px; top:493px; font-size:50px;">S</span>
<span style="position:absolute; left:444px; top:493px; font-size:50px;">c</span>
<span style="position:absolute; left:462px; top:493px; font-size:50px;">i</span>
<span style="position:absolute; left:469px; top:493px; font-size:50px;">e</span>
<span style="position:absolute; left:487px; top:493px; font-size:50px;">n</span>
<span style="position:absolute; left:506px; top:493px; font-size:50px;">c</span>
<span style="position:absolute; left:523px; top:493px; font-size:50px;">e</span>
<span style="position:absolute; left:541px; top:493px; font-size:50px;"> </span>
<span style="position:absolute; left:550px; top:493px; font-size:50px;">D</span>
<span style="position:absolute; left:573px; top:493px; font-size:50px;">e</span>
<span style="position:absolute; left:591px; top:493px; font-size:50px;">p</span>
<span style="position:absolute; left:611px; top:493px; font-size:50px;">a</span>
<span style="position:absolute; left:628px; top:493px; font-size:50px;">r</span>
<span style="position:absolute; left:642px; top:493px; font-size:50px;">t</span>
<span style="position:absolute; left:654px; top:493px; font-size:50px;">m</span>
<span style="position:absolute; left:683px; top:493px; font-size:50px;">e</span>
<span style="position:absolute; left:701px; top:493px; font-size:50px;">n</span>
<span style="position:absolute; left:719px; top:493px; font-size:50px;">t</span>
<span style="position:absolute; border: magenta 1px solid; left:424px; top:537px; width:307px; height:50px;"></span>
<span style="position:absolute; left:424px; top:537px; font-size:50px;">N</span>
<span style="position:absolute; left:447px; top:537px; font-size:50px;">e</span>
<span style="position:absolute; left:464px; top:537px; font-size:50px;">w</span>
<span style="position:absolute; left:488px; top:537px; font-size:50px;"> </span>
<span style="position:absolute; left:498px; top:537px; font-size:50px;">Y</span>
<span style="position:absolute; left:519px; top:537px; font-size:50px;">o</span>
<span style="position:absolute; left:538px; top:537px; font-size:50px;">r</span>
<span style="position:absolute; left:551px; top:537px; font-size:50px;">k</span>
<span style="position:absolute; left:570px; top:537px; font-size:50px;"> </span>
<span style="position:absolute; left:579px; top:537px; font-size:50px;">U</span>
<span style="position:absolute; left:602px; top:537px; font-size:50px;">n</span>
<span style="position:absolute; left:621px; top:537px; font-size:50px;">i</span>
<span style="position:absolute; left:629px; top:537px; font-size:50px;">v</span>
<span style="position:absolute; left:646px; top:537px; font-size:50px;">e</span>
<span style="position:absolute; left:664px; top:537px; font-size:50px;">r</span>
<span style="position:absolute; left:678px; top:537px; font-size:50px;">s</span>
<span style="position:absolute; left:694px; top:537px; font-size:50px;">i</span>
<span style="position:absolute; left:702px; top:537px; font-size:50px;">t</span>
<span style="position:absolute; left:714px; top:537px; font-size:50px;">y</span>
<span style="position:absolute; border: black 1px solid; left:0px; top:50px; width:800px; height:600px;"></span>
<span style="position:absolute; border: black 1px solid; left:50px; top:308px; width:510px; height:0px;"></span> <span style="position:absolute; border: black 1px solid; left:50px; top:308px; width:510px; height:0px;"></span>
<span style="position:absolute; border: yellow 1px solid; left:25px; top:587px; width:41px; height:40px;"></span>
<div style="position:absolute; top:0px;">Page: <a href="#1">1</a></div> <div style="position:absolute; top:0px;">Page: <a href="#1">1</a></div>
</body></html> </body></html>

View File

@ -3,65 +3,13 @@
</head><body> </head><body>
<span style="position:absolute; border: gray 1px solid; left:0px; top:50px; width:612px; height:792px;"></span> <span style="position:absolute; border: gray 1px solid; left:0px; top:50px; width:612px; height:792px;"></span>
<div style="position:absolute; top:50px;"><a name="1">Page 1</a></div> <div style="position:absolute; top:50px;"><a name="1">Page 1</a></div>
<span style="position:absolute; border: cyan 1px solid; left:100px; top:119px; width:61px; height:27px;"></span> <div style="position:absolute; border: textbox 1px solid; writing-mode:lr-tb; left:100px; top:119px; width:61px; height:27px;"><span style="font-family: Helvetica; font-size:19px">Hello
<span style="position:absolute; border: magenta 1px solid; left:100px; top:119px; width:61px; height:27px;"></span> <br></span></div><div style="position:absolute; border: textbox 1px solid; writing-mode:lr-tb; left:261px; top:119px; width:62px; height:27px;"><span style="font-family: Helvetica; font-size:19px">World
<span style="position:absolute; left:100px; top:119px; font-size:27px;">H</span> <br></span></div><div style="position:absolute; border: textbox 1px solid; writing-mode:lr-tb; left:100px; top:219px; width:61px; height:27px;"><span style="font-family: Helvetica; font-size:19px">Hello
<span style="position:absolute; left:117px; top:119px; font-size:27px;">e</span> <br></span></div><div style="position:absolute; border: textbox 1px solid; writing-mode:lr-tb; left:261px; top:219px; width:62px; height:27px;"><span style="font-family: Helvetica; font-size:19px">World
<span style="position:absolute; left:130px; top:119px; font-size:27px;">l</span> <br></span></div><div style="position:absolute; border: textbox 1px solid; writing-mode:lr-tb; left:100px; top:319px; width:111px; height:27px;"><span style="font-family: Helvetica; font-size:19px">H e l l o
<span style="position:absolute; left:136px; top:119px; font-size:27px;">l</span> <br></span></div><div style="position:absolute; border: textbox 1px solid; writing-mode:lr-tb; left:321px; top:319px; width:102px; height:27px;"><span style="font-family: Helvetica; font-size:19px">W o r l d
<span style="position:absolute; left:141px; top:119px; font-size:27px;">o</span> <br></span></div><div style="position:absolute; border: textbox 1px solid; writing-mode:lr-tb; left:100px; top:419px; width:111px; height:27px;"><span style="font-family: Helvetica; font-size:19px">H e l l o
<span style="position:absolute; left:154px; top:119px; font-size:27px;"> </span> <br></span></div><div style="position:absolute; border: textbox 1px solid; writing-mode:lr-tb; left:321px; top:419px; width:102px; height:27px;"><span style="font-family: Helvetica; font-size:19px">W o r l d
<span style="position:absolute; border: cyan 1px solid; left:261px; top:119px; width:62px; height:27px;"></span> <br></span></div><div style="position:absolute; top:0px;">Page: <a href="#1">1</a></div>
<span style="position:absolute; border: magenta 1px solid; left:261px; top:119px; width:62px; height:27px;"></span>
<span style="position:absolute; left:261px; top:119px; font-size:27px;">W</span>
<span style="position:absolute; left:283px; top:119px; font-size:27px;">o</span>
<span style="position:absolute; left:297px; top:119px; font-size:27px;">r</span>
<span style="position:absolute; left:305px; top:119px; font-size:27px;">l</span>
<span style="position:absolute; left:310px; top:119px; font-size:27px;">d</span>
<span style="position:absolute; border: cyan 1px solid; left:100px; top:219px; width:61px; height:27px;"></span>
<span style="position:absolute; border: magenta 1px solid; left:100px; top:219px; width:61px; height:27px;"></span>
<span style="position:absolute; left:100px; top:219px; font-size:27px;">H</span>
<span style="position:absolute; left:117px; top:219px; font-size:27px;">e</span>
<span style="position:absolute; left:130px; top:219px; font-size:27px;">l</span>
<span style="position:absolute; left:136px; top:219px; font-size:27px;">l</span>
<span style="position:absolute; left:141px; top:219px; font-size:27px;">o</span>
<span style="position:absolute; left:154px; top:219px; font-size:27px;"> </span>
<span style="position:absolute; border: cyan 1px solid; left:261px; top:219px; width:62px; height:27px;"></span>
<span style="position:absolute; border: magenta 1px solid; left:261px; top:219px; width:62px; height:27px;"></span>
<span style="position:absolute; left:261px; top:219px; font-size:27px;">W</span>
<span style="position:absolute; left:284px; top:219px; font-size:27px;">o</span>
<span style="position:absolute; left:297px; top:219px; font-size:27px;">r</span>
<span style="position:absolute; left:305px; top:219px; font-size:27px;">l</span>
<span style="position:absolute; left:310px; top:219px; font-size:27px;">d</span>
<span style="position:absolute; border: cyan 1px solid; left:100px; top:319px; width:111px; height:27px;"></span>
<span style="position:absolute; border: magenta 1px solid; left:100px; top:319px; width:111px; height:27px;"></span>
<span style="position:absolute; left:100px; top:319px; font-size:27px;">H</span>
<span style="position:absolute; left:127px; top:319px; font-size:27px;">e</span>
<span style="position:absolute; left:150px; top:319px; font-size:27px;">l</span>
<span style="position:absolute; left:166px; top:319px; font-size:27px;">l</span>
<span style="position:absolute; left:181px; top:319px; font-size:27px;">o</span>
<span style="position:absolute; left:204px; top:319px; font-size:27px;"> </span>
<span style="position:absolute; border: cyan 1px solid; left:321px; top:319px; width:102px; height:27px;"></span>
<span style="position:absolute; border: magenta 1px solid; left:321px; top:319px; width:102px; height:27px;"></span>
<span style="position:absolute; left:321px; top:319px; font-size:27px;">W</span>
<span style="position:absolute; left:354px; top:319px; font-size:27px;">o</span>
<span style="position:absolute; left:377px; top:319px; font-size:27px;">r</span>
<span style="position:absolute; left:395px; top:319px; font-size:27px;">l</span>
<span style="position:absolute; left:410px; top:319px; font-size:27px;">d</span>
<span style="position:absolute; border: cyan 1px solid; left:100px; top:419px; width:111px; height:27px;"></span>
<span style="position:absolute; border: magenta 1px solid; left:100px; top:419px; width:111px; height:27px;"></span>
<span style="position:absolute; left:100px; top:419px; font-size:27px;">H</span>
<span style="position:absolute; left:127px; top:419px; font-size:27px;">e</span>
<span style="position:absolute; left:150px; top:419px; font-size:27px;">l</span>
<span style="position:absolute; left:165px; top:419px; font-size:27px;">l</span>
<span style="position:absolute; left:181px; top:419px; font-size:27px;">o</span>
<span style="position:absolute; left:204px; top:419px; font-size:27px;"> </span>
<span style="position:absolute; border: cyan 1px solid; left:321px; top:419px; width:102px; height:27px;"></span>
<span style="position:absolute; border: magenta 1px solid; left:321px; top:419px; width:102px; height:27px;"></span>
<span style="position:absolute; left:321px; top:419px; font-size:27px;">W</span>
<span style="position:absolute; left:353px; top:419px; font-size:27px;">o</span>
<span style="position:absolute; left:377px; top:419px; font-size:27px;">r</span>
<span style="position:absolute; left:395px; top:419px; font-size:27px;">l</span>
<span style="position:absolute; left:410px; top:419px; font-size:27px;">d</span>
<div style="position:absolute; top:0px;">Page: <a href="#1">1</a></div>
</body></html> </body></html>

View File

@ -3,42 +3,9 @@
</head><body> </head><body>
<span style="position:absolute; border: gray 1px solid; left:0px; top:50px; width:612px; height:792px;"></span> <span style="position:absolute; border: gray 1px solid; left:0px; top:50px; width:612px; height:792px;"></span>
<div style="position:absolute; top:50px;"><a name="1">Page 1</a></div> <div style="position:absolute; top:50px;"><a name="1">Page 1</a></div>
<span style="position:absolute; border: cyan 1px solid; left:0px; top:72px; width:218px; height:79px;"></span> <div style="position:absolute; border: textbox 1px solid; writing-mode:lr-tb; left:0px; top:72px; width:218px; height:79px;"><span style="font-family: Helvetica; font-size:38px">HelloHello
<span style="position:absolute; border: magenta 1px solid; left:0px; top:72px; width:218px; height:79px;"></span> <br></span></div><div style="position:absolute; border: textbox 1px solid; writing-mode:tb-rl; left:194px; top:136px; width:48px; height:490px;"><span style="font-family: unknown; font-size:33px">あいうえおあいうえお </span><span style="font-family: Helvetica; font-size:19px">W
<span style="position:absolute; left:0px; top:96px; font-size:55px;">H</span> <br></span></div><div style="position:absolute; border: textbox 1px solid; writing-mode:lr-tb; left:241px; top:575px; width:102px; height:51px;"><span style="font-family: Helvetica; font-size:19px">World
<span style="position:absolute; left:34px; top:96px; font-size:55px;">e</span> <br>orld
<span style="position:absolute; left:61px; top:96px; font-size:55px;">l</span> <br></span></div><div style="position:absolute; top:0px;">Page: <a href="#1">1</a></div>
<span style="position:absolute; left:72px; top:96px; font-size:55px;">l</span>
<span style="position:absolute; left:82px; top:96px; font-size:55px;">o</span>
<span style="position:absolute; left:109px; top:72px; font-size:55px;">H</span>
<span style="position:absolute; left:144px; top:72px; font-size:55px;">e</span>
<span style="position:absolute; left:170px; top:72px; font-size:55px;">l</span>
<span style="position:absolute; left:181px; top:72px; font-size:55px;">l</span>
<span style="position:absolute; left:192px; top:72px; font-size:55px;">o</span>
<span style="position:absolute; border: cyan 1px solid; left:194px; top:136px; width:48px; height:490px;"></span>
<span style="position:absolute; border: magenta 1px solid; left:194px; top:136px; width:48px; height:490px;"></span>
<span style="position:absolute; left:194px; top:136px; font-size:48px;">あ</span>
<span style="position:absolute; left:194px; top:184px; font-size:48px;">い</span>
<span style="position:absolute; left:194px; top:232px; font-size:48px;">う</span>
<span style="position:absolute; left:194px; top:280px; font-size:48px;">え</span>
<span style="position:absolute; left:194px; top:328px; font-size:48px;">お</span>
<span style="position:absolute; left:194px; top:352px; font-size:48px;">あ</span>
<span style="position:absolute; left:194px; top:400px; font-size:48px;">い</span>
<span style="position:absolute; left:194px; top:448px; font-size:48px;">う</span>
<span style="position:absolute; left:194px; top:496px; font-size:48px;">え</span>
<span style="position:absolute; left:194px; top:544px; font-size:48px;">お</span>
<span style="position:absolute; left:218px; top:599px; font-size:27px;">W</span>
<span style="position:absolute; border: cyan 1px solid; left:241px; top:575px; width:102px; height:51px;"></span>
<span style="position:absolute; border: magenta 1px solid; left:281px; top:575px; width:62px; height:27px;"></span>
<span style="position:absolute; left:281px; top:575px; font-size:27px;">W</span>
<span style="position:absolute; left:304px; top:575px; font-size:27px;">o</span>
<span style="position:absolute; left:317px; top:575px; font-size:27px;">r</span>
<span style="position:absolute; left:325px; top:575px; font-size:27px;">l</span>
<span style="position:absolute; left:330px; top:575px; font-size:27px;">d</span>
<span style="position:absolute; border: magenta 1px solid; left:241px; top:599px; width:40px; height:27px;"></span>
<span style="position:absolute; left:241px; top:599px; font-size:27px;">o</span>
<span style="position:absolute; left:254px; top:599px; font-size:27px;">r</span>
<span style="position:absolute; left:262px; top:599px; font-size:27px;">l</span>
<span style="position:absolute; left:268px; top:599px; font-size:27px;">d</span>
<div style="position:absolute; top:0px;">Page: <a href="#1">1</a></div>
</body></html> </body></html>

View File

@ -1,4 +1,4 @@
#!/usr/bin/python2 -O #!/usr/bin/python -O
# #
# pdf2html.cgi - Gateway script for converting PDF into HTML. # pdf2html.cgi - Gateway script for converting PDF into HTML.
# #
@ -42,7 +42,7 @@ def url(base, **kw):
## convert ## convert
## ##
class FileSizeExceeded(ValueError): pass class FileSizeExceeded(ValueError): pass
def convert(outfp, infp, path, codec='utf-8', def convert(infp, outfp, path, codec='utf-8',
maxpages=0, maxfilesize=0, pagenos=None, maxpages=0, maxfilesize=0, pagenos=None,
html=True): html=True):
# save the input file. # save the input file.
@ -76,22 +76,22 @@ def convert(outfp, infp, path, codec='utf-8',
class WebApp(object): class WebApp(object):
TITLE = 'pdf2html demo' TITLE = 'pdf2html demo'
APPPATH = '/' # absolute URL path to this application. MAXFILESIZE = 10000000 # set to zero if unlimited.
MAXFILESIZE = 5000000 # set to zero if unlimited.
MAXPAGES = 10 # set to zero if unlimited. MAXPAGES = 10 # set to zero if unlimited.
def __init__(self, infp=sys.stdin, outfp=sys.stdout, codec='utf-8'): def __init__(self, infp=sys.stdin, outfp=sys.stdout, environ=os.environ,
codec='utf-8', apppath='/'):
self.infp = infp
self.outfp = outfp self.outfp = outfp
self.codec = codec self.codec = codec
self.remote_addr = os.environ.get('REMOTE_ADDR') self.apppath = apppath
self.path_info = os.environ.get('PATH_INFO') self.remote_addr = environ.get('REMOTE_ADDR')
self.method = os.environ.get('REQUEST_METHOD', 'GET').upper() self.path_info = environ.get('PATH_INFO')
self.server = os.environ.get('SERVER_SOFTWARE', '') self.method = environ.get('REQUEST_METHOD', 'GET').upper()
self.logpath = os.environ.get('LOG_PATH', './var/log') self.server = environ.get('SERVER_SOFTWARE', '')
self.tmpdir = os.environ.get('TEMP', './var/') self.tmpdir = environ.get('TEMP', './var/')
self.content_type = 'text/html; charset=%s' % codec self.content_type = 'text/html; charset=%s' % codec
self.cur_time = time.time() self.logger = logging.getLogger()
self.form = cgi.FieldStorage(infp)
return return
def put(self, *args): def put(self, *args):
@ -130,7 +130,7 @@ class WebApp(object):
self.put( self.put(
'<html><head><title>%s</title></head><body>\n' % q(self.TITLE), '<html><head><title>%s</title></head><body>\n' % q(self.TITLE),
'<h1>%s</h1><hr>\n' % q(self.TITLE), '<h1>%s</h1><hr>\n' % q(self.TITLE),
'<form method="POST" action="%s" enctype="multipart/form-data">\n' % q(self.APPPATH), '<form method="POST" action="%s" enctype="multipart/form-data">\n' % q(self.apppath),
'<p>Upload PDF File: <input name="f" type="file" value="">\n', '<p>Upload PDF File: <input name="f" type="file" value="">\n',
'&nbsp; Page numbers (comma-separated):\n', '&nbsp; Page numbers (comma-separated):\n',
'<input name="p" type="text" size="10" value="">\n', '<input name="p" type="text" size="10" value="">\n',
@ -145,49 +145,54 @@ class WebApp(object):
) )
return return
def run(self, argv): def setup(self):
logging.basicConfig(level=logging.INFO, if not os.path.isdir(self.tmpdir):
format='%(asctime)s %(levelname)s %(message)s', self.logger.error('no tmpdir')
filename=self.logpath, filemode='a') status = 304
if self.path_info != self.APPPATH: elif self.path_info != self.apppath:
status = 404
else:
status = 200
self._status = status
return status
def run(self):
form = cgi.FieldStorage(self.infp)
if self._status != 200:
self.http_404() self.http_404()
return return
if not os.path.isdir(self.tmpdir):
logging.error('no tmpdir')
self.bummer('error')
return
if (self.method != 'POST' or if (self.method != 'POST' or
'c' not in self.form or 'c' not in form or
'f' not in self.form): 'f' not in form):
self.coverpage() self.coverpage()
return return
item = self.form['f'] item = form['f']
if not (item.file and item.filename): if not (item.file and item.filename):
self.coverpage() self.coverpage()
return return
cmd = self.form.getvalue('c') cmd = form.getvalue('c')
html = (cmd == 'Convert to HTML') html = (cmd == 'Convert to HTML')
pagenos = [] pagenos = []
if 'p' in self.form: if 'p' in form:
for m in re.finditer(r'\d+', self.form.getvalue('p')): for m in re.finditer(r'\d+', form.getvalue('p')):
try: try:
pagenos.append(int(m.group(0))) pagenos.append(int(m.group(0)))
except ValueError: except ValueError:
pass pass
logging.info('received: host=%s, name=%r, pagenos=%r' % self.logger.info('received: host=%s, name=%r, pagenos=%r' %
(self.remote_addr, item.filename, pagenos)) (self.remote_addr, item.filename, pagenos))
h = abs(hash((random.random(), self.remote_addr, item.filename))) h = abs(hash((random.random(), self.remote_addr, item.filename)))
tmppath = os.path.join(self.tmpdir, '%08x%08x.pdf' % (self.cur_time, h)) tmppath = os.path.join(self.tmpdir, '%08x%08x.pdf' % (time.time(), h))
try: try:
if not html: if not html:
self.content_type = 'text/plain; charset=%s' % self.codec self.content_type = 'text/plain; charset=%s' % self.codec
self.http_200() self.http_200()
try: try:
convert(sys.stdout, item.file, tmppath, pagenos=pagenos, codec=self.codec, convert(item.file, sys.stdout, tmppath, pagenos=pagenos, codec=self.codec,
maxpages=self.MAXPAGES, maxfilesize=self.MAXFILESIZE, html=html) maxpages=self.MAXPAGES, maxfilesize=self.MAXFILESIZE, html=html)
except Exception, e: except Exception, e:
self.put('<p>Sorry, an error has occured: %s' % q(repr(e))) self.put('<p>Sorry, an error has occured: %s' % q(repr(e)))
logging.error('convert: %r: path=%r: %s' % (e, tmppath, traceback.format_exc())) self.logger.error('convert: %r: path=%r: %s' % (e, tmppath, traceback.format_exc()))
finally: finally:
try: try:
os.remove(tmppath) os.remove(tmppath)
@ -197,4 +202,7 @@ class WebApp(object):
# main # main
if __name__ == '__main__': sys.exit(WebApp().run(sys.argv)) if __name__ == '__main__':
app = WebApp()
app.setup()
sys.exit(app.run())