test cases updated
git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@282 1aa58f4a-7d42-0410-adbc-911cccaed67cpull/1/head
parent
84ed94aec0
commit
5d98a27d9c
|
@ -5,7 +5,7 @@ RM=rm -f
|
||||||
CMP=:
|
CMP=:
|
||||||
PYTHON=python2
|
PYTHON=python2
|
||||||
|
|
||||||
PDF2TXT=PYTHONPATH=.. $(PYTHON) ../tools/pdf2txt.py -Dx -p1
|
PDF2TXT=PYTHONPATH=.. $(PYTHON) ../tools/pdf2txt.py -p1
|
||||||
|
|
||||||
HTMLS=$(HTMLS_FREE) $(HTMLS_NONFREE)
|
HTMLS=$(HTMLS_FREE) $(HTMLS_NONFREE)
|
||||||
HTMLS_FREE= \
|
HTMLS_FREE= \
|
||||||
|
@ -49,8 +49,7 @@ XMLS_NONFREE= \
|
||||||
nonfree/naacl06-shinyama.xml \
|
nonfree/naacl06-shinyama.xml \
|
||||||
nonfree/nlp2004slides.xml
|
nonfree/nlp2004slides.xml
|
||||||
|
|
||||||
all:
|
all: test
|
||||||
$(MAKE) test CMP=cmp
|
|
||||||
|
|
||||||
test: htmls texts xmls
|
test: htmls texts xmls
|
||||||
|
|
||||||
|
|
1035
samples/jo.html.ref
1035
samples/jo.html.ref
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -14,6 +14,16 @@ number of other significant copyright-related issues.
|
||||||
|
|
||||||
The DMCA is divided into five titles:
|
The DMCA is divided into five titles:
|
||||||
|
|
||||||
|
!
|
||||||
|
|
||||||
|
!
|
||||||
|
|
||||||
|
!
|
||||||
|
|
||||||
|
!
|
||||||
|
|
||||||
|
!
|
||||||
|
|
||||||
Title I, the “WIPO Copyright and Performances and Phonograms
|
Title I, the “WIPO Copyright and Performances and Phonograms
|
||||||
Treaties Implementation Act of 1998,” implements the WIPO
|
Treaties Implementation Act of 1998,” implements the WIPO
|
||||||
treaties.
|
treaties.
|
||||||
|
@ -32,16 +42,6 @@ of rights in motion pictures.
|
||||||
Title V, the “Vessel Hull Design Protection Act,” creates a new form
|
Title V, the “Vessel Hull Design Protection Act,” creates a new form
|
||||||
of protection for the design of vessel hulls.
|
of protection for the design of vessel hulls.
|
||||||
|
|
||||||
!
|
|
||||||
|
|
||||||
!
|
|
||||||
|
|
||||||
!
|
|
||||||
|
|
||||||
!
|
|
||||||
|
|
||||||
!
|
|
||||||
|
|
||||||
This memorandum summarizes briefly each title of the DMCA. It provides
|
This memorandum summarizes briefly each title of the DMCA. It provides
|
||||||
merely an overview of the law’s provisions; for purposes of length and readability a
|
merely an overview of the law’s provisions; for purposes of length and readability a
|
||||||
significant amount of detail has been omitted. A complete understanding of any
|
significant amount of detail has been omitted. A complete understanding of any
|
||||||
|
|
|
@ -568,7 +568,42 @@
|
||||||
</text>
|
</text>
|
||||||
</textline>
|
</textline>
|
||||||
</textbox>
|
</textbox>
|
||||||
<textbox id="5" bbox="180.000,235.452,504.108,464.124">
|
<textbox id="5" bbox="144.000,450.000,152.004,462.480">
|
||||||
|
<textline bbox="144.000,450.000,152.004,462.480">
|
||||||
|
<text font="ELCKGH+WPTypographicSymbols" bbox="144.000,450.000,152.004,462.480" size="12.480">!</text>
|
||||||
|
<text>
|
||||||
|
</text>
|
||||||
|
</textline>
|
||||||
|
</textbox>
|
||||||
|
<textbox id="6" bbox="144.000,409.680,152.004,422.160">
|
||||||
|
<textline bbox="144.000,409.680,152.004,422.160">
|
||||||
|
<text font="ELCKGH+WPTypographicSymbols" bbox="144.000,409.680,152.004,422.160" size="12.480">!</text>
|
||||||
|
<text>
|
||||||
|
</text>
|
||||||
|
</textline>
|
||||||
|
</textbox>
|
||||||
|
<textbox id="7" bbox="144.000,369.360,152.004,381.840">
|
||||||
|
<textline bbox="144.000,369.360,152.004,381.840">
|
||||||
|
<text font="ELCKGH+WPTypographicSymbols" bbox="144.000,369.360,152.004,381.840" size="12.480">!</text>
|
||||||
|
<text>
|
||||||
|
</text>
|
||||||
|
</textline>
|
||||||
|
</textbox>
|
||||||
|
<textbox id="8" bbox="144.000,329.040,152.004,341.520">
|
||||||
|
<textline bbox="144.000,329.040,152.004,341.520">
|
||||||
|
<text font="ELCKGH+WPTypographicSymbols" bbox="144.000,329.040,152.004,341.520" size="12.480">!</text>
|
||||||
|
<text>
|
||||||
|
</text>
|
||||||
|
</textline>
|
||||||
|
</textbox>
|
||||||
|
<textbox id="9" bbox="144.000,248.400,152.004,260.880">
|
||||||
|
<textline bbox="144.000,248.400,152.004,260.880">
|
||||||
|
<text font="ELCKGH+WPTypographicSymbols" bbox="144.000,248.400,152.004,260.880" size="12.480">!</text>
|
||||||
|
<text>
|
||||||
|
</text>
|
||||||
|
</textline>
|
||||||
|
</textbox>
|
||||||
|
<textbox id="10" bbox="180.000,235.452,504.108,464.124">
|
||||||
<textline bbox="180.000,450.492,503.985,464.124">
|
<textline bbox="180.000,450.492,503.985,464.124">
|
||||||
<text font="Garamond" bbox="180.000,450.492,187.380,464.064" size="13.572">T</text>
|
<text font="Garamond" bbox="180.000,450.492,187.380,464.064" size="13.572">T</text>
|
||||||
<text font="Garamond" bbox="187.440,450.492,190.188,464.064" size="13.572">i</text>
|
<text font="Garamond" bbox="187.440,450.492,190.188,464.064" size="13.572">i</text>
|
||||||
|
@ -1669,41 +1704,6 @@
|
||||||
</text>
|
</text>
|
||||||
</textline>
|
</textline>
|
||||||
</textbox>
|
</textbox>
|
||||||
<textbox id="6" bbox="144.000,450.000,152.004,462.480">
|
|
||||||
<textline bbox="144.000,450.000,152.004,462.480">
|
|
||||||
<text font="ELCKGH+WPTypographicSymbols" bbox="144.000,450.000,152.004,462.480" size="12.480">!</text>
|
|
||||||
<text>
|
|
||||||
</text>
|
|
||||||
</textline>
|
|
||||||
</textbox>
|
|
||||||
<textbox id="7" bbox="144.000,409.680,152.004,422.160">
|
|
||||||
<textline bbox="144.000,409.680,152.004,422.160">
|
|
||||||
<text font="ELCKGH+WPTypographicSymbols" bbox="144.000,409.680,152.004,422.160" size="12.480">!</text>
|
|
||||||
<text>
|
|
||||||
</text>
|
|
||||||
</textline>
|
|
||||||
</textbox>
|
|
||||||
<textbox id="8" bbox="144.000,369.360,152.004,381.840">
|
|
||||||
<textline bbox="144.000,369.360,152.004,381.840">
|
|
||||||
<text font="ELCKGH+WPTypographicSymbols" bbox="144.000,369.360,152.004,381.840" size="12.480">!</text>
|
|
||||||
<text>
|
|
||||||
</text>
|
|
||||||
</textline>
|
|
||||||
</textbox>
|
|
||||||
<textbox id="9" bbox="144.000,329.040,152.004,341.520">
|
|
||||||
<textline bbox="144.000,329.040,152.004,341.520">
|
|
||||||
<text font="ELCKGH+WPTypographicSymbols" bbox="144.000,329.040,152.004,341.520" size="12.480">!</text>
|
|
||||||
<text>
|
|
||||||
</text>
|
|
||||||
</textline>
|
|
||||||
</textbox>
|
|
||||||
<textbox id="10" bbox="144.000,248.400,152.004,260.880">
|
|
||||||
<textline bbox="144.000,248.400,152.004,260.880">
|
|
||||||
<text font="ELCKGH+WPTypographicSymbols" bbox="144.000,248.400,152.004,260.880" size="12.480">!</text>
|
|
||||||
<text>
|
|
||||||
</text>
|
|
||||||
</textline>
|
|
||||||
</textbox>
|
|
||||||
<textbox id="11" bbox="108.000,168.360,504.108,222.144">
|
<textbox id="11" bbox="108.000,168.360,504.108,222.144">
|
||||||
<textline bbox="144.000,208.572,504.060,222.144">
|
<textline bbox="144.000,208.572,504.060,222.144">
|
||||||
<text font="Garamond" bbox="144.000,208.572,151.380,222.144" size="13.572">T</text>
|
<text font="Garamond" bbox="144.000,208.572,151.380,222.144" size="13.572">T</text>
|
||||||
|
@ -2194,20 +2194,20 @@
|
||||||
<textgroup bbox="144.000,235.452,504.108,490.944">
|
<textgroup bbox="144.000,235.452,504.108,490.944">
|
||||||
<textbox id="4" bbox="144.000,477.372,323.881,490.944" />
|
<textbox id="4" bbox="144.000,477.372,323.881,490.944" />
|
||||||
<textgroup bbox="144.000,235.452,504.108,464.124">
|
<textgroup bbox="144.000,235.452,504.108,464.124">
|
||||||
<textbox id="5" bbox="180.000,235.452,504.108,464.124" />
|
|
||||||
<textgroup bbox="144.000,248.400,152.004,462.480">
|
<textgroup bbox="144.000,248.400,152.004,462.480">
|
||||||
<textgroup bbox="144.000,329.040,152.004,462.480">
|
<textgroup bbox="144.000,329.040,152.004,462.480">
|
||||||
<textgroup bbox="144.000,369.360,152.004,462.480">
|
<textgroup bbox="144.000,369.360,152.004,462.480">
|
||||||
<textgroup bbox="144.000,409.680,152.004,462.480">
|
<textgroup bbox="144.000,409.680,152.004,462.480">
|
||||||
<textbox id="6" bbox="144.000,450.000,152.004,462.480" />
|
<textbox id="5" bbox="144.000,450.000,152.004,462.480" />
|
||||||
<textbox id="7" bbox="144.000,409.680,152.004,422.160" />
|
<textbox id="6" bbox="144.000,409.680,152.004,422.160" />
|
||||||
</textgroup>
|
</textgroup>
|
||||||
<textbox id="8" bbox="144.000,369.360,152.004,381.840" />
|
<textbox id="7" bbox="144.000,369.360,152.004,381.840" />
|
||||||
</textgroup>
|
</textgroup>
|
||||||
<textbox id="9" bbox="144.000,329.040,152.004,341.520" />
|
<textbox id="8" bbox="144.000,329.040,152.004,341.520" />
|
||||||
</textgroup>
|
</textgroup>
|
||||||
<textbox id="10" bbox="144.000,248.400,152.004,260.880" />
|
<textbox id="9" bbox="144.000,248.400,152.004,260.880" />
|
||||||
</textgroup>
|
</textgroup>
|
||||||
|
<textbox id="10" bbox="180.000,235.452,504.108,464.124" />
|
||||||
</textgroup>
|
</textgroup>
|
||||||
</textgroup>
|
</textgroup>
|
||||||
<textbox id="11" bbox="108.000,168.360,504.108,222.144" />
|
<textbox id="11" bbox="108.000,168.360,504.108,222.144" />
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -6,7 +6,7 @@ OMB No. 1545-0074
|
||||||
Identifying number (see page 8)
|
Identifying number (see page 8)
|
||||||
|
|
||||||
|
|
||||||
I
|
I
|
||||||
|
|
||||||
Check if:
|
Check if:
|
||||||
|
|
||||||
|
@ -18,11 +18,11 @@ Type of entry visa (see page 8)
|
||||||
|
|
||||||
䊳
|
䊳
|
||||||
|
|
||||||
7 a
|
7a
|
||||||
Yourself
|
Yourself
|
||||||
|
|
||||||
|
|
||||||
7 b
|
7b
|
||||||
Spouse
|
Spouse
|
||||||
|
|
||||||
|
|
||||||
|
@ -45,10 +45,8 @@ on 7a and 7b
|
||||||
No. of children on
|
No. of children on
|
||||||
7c who:
|
7c who:
|
||||||
|
|
||||||
lived with you
|
● lived with you
|
||||||
●
|
● did not live with
|
||||||
|
|
||||||
● did not live with
|
|
||||||
you due to divorce
|
you due to divorce
|
||||||
or separation
|
or separation
|
||||||
|
|
||||||
|
@ -60,14 +58,13 @@ on lines above
|
||||||
|
|
||||||
8
|
8
|
||||||
|
|
||||||
9 a
|
9a
|
||||||
|
|
||||||
A
|
A
|
||||||
|
|
||||||
1040NR
|
1040NR
|
||||||
|
|
||||||
Form
|
Form
|
||||||
|
|
||||||
Department of the Treasury
|
Department of the Treasury
|
||||||
beginning
|
beginning
|
||||||
Internal Revenue Service
|
Internal Revenue Service
|
||||||
|
@ -92,8 +89,8 @@ Country 䊳
|
||||||
Of what country were you a citizen or national during the tax year? 䊳
|
Of what country were you a citizen or national during the tax year? 䊳
|
||||||
|
|
||||||
|
|
||||||
Give address outside the United States to which you want any
|
|
||||||
Give address in the country where you are a permanent resident.
|
Give address in the country where you are a permanent resident.
|
||||||
|
Give address outside the United States to which you want any
|
||||||
refund check mailed. If same as above, write “Same.”
|
refund check mailed. If same as above, write “Same.”
|
||||||
If same as above, write “Same.”
|
If same as above, write “Same.”
|
||||||
|
|
||||||
|
@ -103,12 +100,12 @@ If same as above, write “Same.”
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
.epyt ro tnirp esaelP
|
Please print or type.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
.dlehhtiw saw xat fi R-9901 )s(mroF hcatta oslA
|
Also attach Form(s) 1099-R if tax was withheld.
|
||||||
.ereh 2-W smroF hcattA
|
Attach Forms W-2 here.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -128,6 +125,10 @@ child for child tax
|
||||||
credit (see page 9)
|
credit (see page 9)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
其
|
其
|
||||||
|
|
||||||
Filing Status and Exemptions for Individuals (see page 8)
|
Filing Status and Exemptions for Individuals (see page 8)
|
||||||
|
@ -160,14 +161,10 @@ Qualifying widow(er) with dependent child (see page 9)
|
||||||
Caution: Do not check box 7a if your parent (or someone else) can claim you as a dependent.
|
Caution: Do not check box 7a if your parent (or someone else) can claim you as a dependent.
|
||||||
Do not check box 7b if your spouse had any U.S. gross income.
|
Do not check box 7b if your spouse had any U.S. gross income.
|
||||||
|
|
||||||
7 c
|
7c
|
||||||
Dependents: (see page 9)
|
Dependents: (see page 9)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
(1) First name
|
(1) First name
|
||||||
|
|
||||||
|
|
||||||
|
@ -212,25 +209,22 @@ identifying number
|
||||||
|
|
||||||
10a
|
10a
|
||||||
|
|
||||||
|
11
|
||||||
1 1
|
12
|
||||||
1 2
|
13
|
||||||
1 3
|
14
|
||||||
1 4
|
15
|
||||||
1 5
|
|
||||||
16b
|
16b
|
||||||
|
|
||||||
17b
|
17b
|
||||||
|
18
|
||||||
|
19
|
||||||
|
20
|
||||||
|
21
|
||||||
|
|
||||||
1 8
|
23
|
||||||
1 9
|
|
||||||
2 0
|
|
||||||
2 1
|
|
||||||
|
|
||||||
2 3
|
34
|
||||||
|
35
|
||||||
3 4
|
|
||||||
3 5
|
|
||||||
|
|
||||||
d
|
d
|
||||||
|
|
||||||
|
@ -245,23 +239,7 @@ d
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
9b
|
||||||
|
|
||||||
䊳
|
|
||||||
|
|
||||||
9 b
|
|
||||||
|
|
||||||
䊳
|
|
||||||
|
|
||||||
8
|
|
||||||
|
|
||||||
9 a
|
|
||||||
b
|
|
||||||
|
|
||||||
10a
|
|
||||||
|
|
||||||
b
|
|
||||||
|
|
||||||
|
|
||||||
Total number of exemptions claimed
|
Total number of exemptions claimed
|
||||||
|
|
||||||
|
@ -273,22 +251,21 @@ Tax-exempt interest. Do not include on line 9a
|
||||||
Ordinary dividends
|
Ordinary dividends
|
||||||
10b
|
10b
|
||||||
|
|
||||||
|
|
||||||
Qualified dividends (see page 11)
|
Qualified dividends (see page 11)
|
||||||
|
|
||||||
1 1
|
11
|
||||||
Taxable refunds, credits, or offsets of state and local income taxes (see page 11)
|
Taxable refunds, credits, or offsets of state and local income taxes (see page 11)
|
||||||
|
|
||||||
1 2
|
12
|
||||||
Scholarship and fellowship grants. Attach Form(s) 1042-S or required statement (see page 11)
|
Scholarship and fellowship grants. Attach Form(s) 1042-S or required statement (see page 11)
|
||||||
|
|
||||||
1 3
|
13
|
||||||
Business income or (loss). Attach Schedule C or C-EZ (Form 1040)
|
Business income or (loss). Attach Schedule C or C-EZ (Form 1040)
|
||||||
|
|
||||||
1 4
|
14
|
||||||
Capital gain or (loss). Attach Schedule D (Form 1040) if required. If not required, check here
|
Capital gain or (loss). Attach Schedule D (Form 1040) if required. If not required, check here
|
||||||
|
|
||||||
1 5
|
15
|
||||||
Other gains or (losses). Attach Form 4797
|
Other gains or (losses). Attach Form 4797
|
||||||
16a
|
16a
|
||||||
16b
|
16b
|
||||||
|
@ -296,8 +273,6 @@ Other gains or (losses). Attach Form 4797
|
||||||
Taxable amount (see page 12)
|
Taxable amount (see page 12)
|
||||||
IRA distributions
|
IRA distributions
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
17a
|
17a
|
||||||
|
|
||||||
|
|
||||||
|
@ -308,77 +283,88 @@ Pensions and annuities
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
18
|
||||||
|
|
||||||
1 8
|
|
||||||
Rental real estate, royalties, partnerships, trusts, etc. Attach Schedule E (Form 1040)
|
Rental real estate, royalties, partnerships, trusts, etc. Attach Schedule E (Form 1040)
|
||||||
1 9
|
19
|
||||||
Farm income or (loss). Attach Schedule F (Form 1040)
|
Farm income or (loss). Attach Schedule F (Form 1040)
|
||||||
|
|
||||||
2 0
|
20
|
||||||
Unemployment compensation
|
Unemployment compensation
|
||||||
|
|
||||||
2 1
|
21
|
||||||
Other income. List type and amount (see page 15)
|
Other income. List type and amount (see page 15)
|
||||||
2 2
|
22
|
||||||
2 2
|
22
|
||||||
Total income exempt by a treaty from page 5, Item M
|
Total income exempt by a treaty from page 5, Item M
|
||||||
|
|
||||||
2 3
|
23
|
||||||
Add lines 8, 9a, 10a, 11–15, 16b, and 17b–21. This is your total effectively connected income
|
Add lines 8, 9a, 10a, 11–15, 16b, and 17b–21. This is your total effectively connected income 䊳
|
||||||
|
|
||||||
2 4
|
|
||||||
2 4
|
24
|
||||||
|
24
|
||||||
Educator expenses (see page 15)
|
Educator expenses (see page 15)
|
||||||
2 5
|
25
|
||||||
2 5
|
25
|
||||||
Health savings account deduction. Attach Form 8889
|
Health savings account deduction. Attach Form 8889
|
||||||
2 6
|
26
|
||||||
2 6
|
26
|
||||||
Moving expenses. Attach Form 3903
|
Moving expenses. Attach Form 3903
|
||||||
2 7
|
27
|
||||||
|
|
||||||
2 7
|
27
|
||||||
Self-employed SEP, SIMPLE, and qualified plans
|
Self-employed SEP, SIMPLE, and qualified plans
|
||||||
2 8
|
28
|
||||||
|
|
||||||
2 8
|
28
|
||||||
Self-employed health insurance deduction (see page 16)
|
Self-employed health insurance deduction (see page 16)
|
||||||
2 9
|
29
|
||||||
2 9
|
29
|
||||||
Penalty on early withdrawal of savings
|
Penalty on early withdrawal of savings
|
||||||
3 0
|
30
|
||||||
|
|
||||||
3 0
|
30
|
||||||
Scholarship and fellowship grants excluded
|
Scholarship and fellowship grants excluded
|
||||||
3 1
|
31
|
||||||
|
|
||||||
IRA deduction (see page 16)
|
IRA deduction (see page 16)
|
||||||
3 1
|
31
|
||||||
3 2
|
32
|
||||||
|
|
||||||
3 2
|
32
|
||||||
Student loan interest deduction (see page 16)
|
Student loan interest deduction (see page 16)
|
||||||
3 3
|
33
|
||||||
|
|
||||||
3 3
|
33
|
||||||
Domestic production activities deduction. Attach Form 8903
|
Domestic production activities deduction. Attach Form 8903
|
||||||
|
|
||||||
3 4
|
34
|
||||||
Add lines 24 through 33
|
Add lines 24 through 33
|
||||||
3 5
|
35
|
||||||
Subtract line 34 from line 23. Enter here and on line 36. This is your adjusted gross income
|
Subtract line 34 from line 23. Enter here and on line 36. This is your adjusted gross income 䊳
|
||||||
|
|
||||||
|
|
||||||
For Disclosure, Privacy Act, and Paperwork Reduction Act Notice, see page 32.
|
For Disclosure, Privacy Act, and Paperwork Reduction Act Notice, see page 32.
|
||||||
Cat. No. 11364D
|
Cat. No. 11364D
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
ssenisuB/edarT .S.U htiW detcennoC ylevitceffE emocnI
|
Income Effectively Connected With U.S. Trade/Business
|
||||||
|
|
||||||
|
Enclose, but do not attach, any payment.
|
||||||
|
|
||||||
|
8
|
||||||
|
|
||||||
|
9a
|
||||||
|
b
|
||||||
|
|
||||||
|
10a
|
||||||
|
b
|
||||||
|
|
||||||
|
|
||||||
|
Adjusted Gross Income
|
||||||
|
|
||||||
.tnemyap yna ,hcatta ton od tub ,esolcnE
|
|
||||||
|
|
||||||
emocnI ssorG detsujdA
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,19 +1,11 @@
|
||||||
|
Leadpct: 0% Pt. size: 9.5 ❏ Draft
|
||||||
|
Userid: ________ DTD INSTR04
|
||||||
PAGER/SGML
|
PAGER/SGML
|
||||||
Page 1 of 48
|
|
||||||
|
|
||||||
________
|
|
||||||
|
|
||||||
Leadpct: 0%
|
|
||||||
|
|
||||||
DTD INSTR04
|
|
||||||
|
|
||||||
Pt. size: 9.5 ❏ Draft
|
|
||||||
Userid:
|
|
||||||
(Init. & date)
|
(Init. & date)
|
||||||
Fileid:
|
Fileid:
|
||||||
D:\USERS\8fllb\documents\epicfiles\2007Instructions1040NR.sgm
|
D:\USERS\8fllb\documents\epicfiles\2007Instructions1040NR.sgm
|
||||||
|
Page 1 of 48 Instructions for Form 1040NR
|
||||||
7:48 - 6-DEC-2007
|
7:48 - 6-DEC-2007
|
||||||
Instructions for Form 1040NR
|
|
||||||
|
|
||||||
❏ Ok to Print
|
❏ Ok to Print
|
||||||
|
|
||||||
|
@ -27,6 +19,122 @@ U.S. Nonresident Alien Income Tax Return
|
||||||
Department of the Treasury
|
Department of the Treasury
|
||||||
Internal Revenue Service
|
Internal Revenue Service
|
||||||
|
|
||||||
|
use a different address this year. See
|
||||||
|
Section references are to the Internal
|
||||||
|
Where To File on page 4.
|
||||||
|
Revenue Code unless otherwise noted.
|
||||||
|
General Instructions deduction. The deduction rate for
|
||||||
|
Domestic production activities
|
||||||
|
2007 is increased to 6%.
|
||||||
|
What’s New for 2007
|
||||||
|
Unreported social security and
|
||||||
|
Medicare tax on wages.
|
||||||
|
If you are
|
||||||
|
Tax benefits extended. The following
|
||||||
|
an employee and your employer did not
|
||||||
|
tax benefits were extended through
|
||||||
|
withhold social security and Medicare
|
||||||
|
2007.
|
||||||
|
• Deduction for educator expenses in
|
||||||
|
tax, see Form 8919 to figure and report
|
||||||
|
this tax.
|
||||||
|
figuring adjusted gross income.
|
||||||
|
• District of Columbia first-time
|
||||||
|
Refundable credit for prior-year
|
||||||
|
minimum tax.
|
||||||
|
If you have an unused
|
||||||
|
homebuyer credit.
|
||||||
|
minimum tax credit carryforward from
|
||||||
|
Alternative minimum tax (AMT)
|
||||||
|
2004, see Form 8801 to find if you can
|
||||||
|
exemption amount decreased. The
|
||||||
|
take this credit.
|
||||||
|
AMT exemption amount is decreased to
|
||||||
|
Health savings account (HSA)
|
||||||
|
$33,750 ($45,000 if a qualifying
|
||||||
|
funding distributions. You may be
|
||||||
|
widow(er); $22,500 if married filing
|
||||||
|
able to elect to exclude from income a
|
||||||
|
separately).
|
||||||
|
distribution made from your IRA to your
|
||||||
|
At the time these instructions
|
||||||
|
HSA. See the instructions for lines 16a
|
||||||
|
!
|
||||||
|
went to print, Congress was
|
||||||
|
and 16b beginning on page 12.
|
||||||
|
considering legislation that
|
||||||
|
CAUTION
|
||||||
|
New recordkeeping requirements for
|
||||||
|
would increase the amounts above. To
|
||||||
|
contributions of money.
|
||||||
|
For
|
||||||
|
find out if this legislation was enacted,
|
||||||
|
charitable contributions of money,
|
||||||
|
and for more details, see the
|
||||||
|
regardless of the amount, you must
|
||||||
|
Instructions for Form 6251.
|
||||||
|
maintain as a record of the contribution
|
||||||
|
IRA deduction expanded.
|
||||||
|
If you were
|
||||||
|
a bank record (such as a cancelled
|
||||||
|
covered by a retirement plan, you may
|
||||||
|
check) or a written record from the
|
||||||
|
be able to take an IRA deduction if your
|
||||||
|
charity. The written record must include
|
||||||
|
2007 modified adjusted gross income
|
||||||
|
the name of the charity, date, and
|
||||||
|
(AGI) is less than $62,000 ($103,000 if
|
||||||
|
amount of the contribution. See Gifts to
|
||||||
|
a qualifying widow(er)).
|
||||||
|
U.S. Charities that begins on page 26.
|
||||||
|
You may be able to deduct up to an
|
||||||
|
Exemption for housing a person
|
||||||
|
additional $3,000 if you were a
|
||||||
|
displaced by Hurricane Katrina
|
||||||
|
participant in a 401(k) plan and your
|
||||||
|
expires. The additional exemption
|
||||||
|
employer was in bankruptcy in an
|
||||||
|
amount for housing a person displaced
|
||||||
|
earlier year.
|
||||||
|
by Hurricane Katrina does not apply for
|
||||||
|
2007 or later years.
|
||||||
|
Standard mileage rates. The 2007
|
||||||
|
Telephone excise tax credit.
|
||||||
|
rate for business use of your vehicle is
|
||||||
|
This
|
||||||
|
481/2 cents a mile. The 2007 rate for
|
||||||
|
credit was available only on your 2006
|
||||||
|
use of your vehicle to move is 20 cents
|
||||||
|
return. If you filed but did not request it
|
||||||
|
a mile. The special rate for charitable
|
||||||
|
on your 2006 return, file Form 1040X
|
||||||
|
use of your vehicle to provide relief
|
||||||
|
using a simplified procedure explained
|
||||||
|
related to Hurricane Katrina has
|
||||||
|
in its instructions to amend your 2006
|
||||||
|
expired.
|
||||||
|
return. If you were not required to file a
|
||||||
|
2006 return, see the 2006 Form
|
||||||
|
Elective salary deferrals. The
|
||||||
|
1040EZ-T.
|
||||||
|
maximum amount you can defer under
|
||||||
|
all plans is generally limited to $15,500
|
||||||
|
What’s New for 2008
|
||||||
|
($10,500 if you only have SIMPLE
|
||||||
|
plans; $18,500 for section 403(b) plans
|
||||||
|
IRA deduction expanded. You may
|
||||||
|
if you qualify for the 15-year rule). See
|
||||||
|
be able to deduct up to $5,000 ($6,000
|
||||||
|
the instructions for line 8 on page 10.
|
||||||
|
if age 50 or older at the end of the
|
||||||
|
Mailing your return.
|
||||||
|
If you are filing
|
||||||
|
year). You may be able to take an IRA
|
||||||
|
deduction if you were covered by a
|
||||||
|
the return for an estate or trust, you will
|
||||||
|
|
||||||
|
Cat. No. 11368V
|
||||||
|
|
||||||
retirement plan and your 2008 modified
|
retirement plan and your 2008 modified
|
||||||
AGI is less than $63,000 ($105,000) if a
|
AGI is less than $63,000 ($105,000) if a
|
||||||
qualifying widow(er)).
|
qualifying widow(er)).
|
||||||
|
@ -71,8 +179,7 @@ investment income on a parent’s return
|
||||||
and the special rule for when a child
|
and the special rule for when a child
|
||||||
must file Form 6251 will also apply to
|
must file Form 6251 will also apply to
|
||||||
the children listed above.
|
the children listed above.
|
||||||
Expiring tax benefits.
|
Expiring tax benefits. The following
|
||||||
The following
|
|
||||||
benefits are scheduled to expire and
|
benefits are scheduled to expire and
|
||||||
will not apply for 2008.
|
will not apply for 2008.
|
||||||
• Deduction for educator expenses in
|
• Deduction for educator expenses in
|
||||||
|
@ -85,130 +192,4 @@ property.
|
||||||
homebuyer credit (for homes
|
homebuyer credit (for homes
|
||||||
purchased after 2007).
|
purchased after 2007).
|
||||||
|
|
||||||
Section references are to the Internal
|
|
||||||
Revenue Code unless otherwise noted.
|
|
||||||
General Instructions
|
|
||||||
What’s New for 2007
|
|
||||||
Tax benefits extended.
|
|
||||||
The following
|
|
||||||
tax benefits were extended through
|
|
||||||
2007.
|
|
||||||
• Deduction for educator expenses in
|
|
||||||
figuring adjusted gross income.
|
|
||||||
• District of Columbia first-time
|
|
||||||
homebuyer credit.
|
|
||||||
Alternative minimum tax (AMT)
|
|
||||||
exemption amount decreased.
|
|
||||||
The
|
|
||||||
AMT exemption amount is decreased to
|
|
||||||
$33,750 ($45,000 if a qualifying
|
|
||||||
widow(er); $22,500 if married filing
|
|
||||||
separately).
|
|
||||||
At the time these instructions
|
|
||||||
!
|
|
||||||
went to print, Congress was
|
|
||||||
considering legislation that
|
|
||||||
CAUTION
|
|
||||||
would increase the amounts above. To
|
|
||||||
find out if this legislation was enacted,
|
|
||||||
and for more details, see the
|
|
||||||
Instructions for Form 6251.
|
|
||||||
IRA deduction expanded.
|
|
||||||
If you were
|
|
||||||
covered by a retirement plan, you may
|
|
||||||
be able to take an IRA deduction if your
|
|
||||||
2007 modified adjusted gross income
|
|
||||||
(AGI) is less than $62,000 ($103,000 if
|
|
||||||
a qualifying widow(er)).
|
|
||||||
You may be able to deduct up to an
|
|
||||||
additional $3,000 if you were a
|
|
||||||
participant in a 401(k) plan and your
|
|
||||||
employer was in bankruptcy in an
|
|
||||||
earlier year.
|
|
||||||
Standard mileage rates.
|
|
||||||
The 2007
|
|
||||||
rate for business use of your vehicle is
|
|
||||||
481/2 cents a mile. The 2007 rate for
|
|
||||||
use of your vehicle to move is 20 cents
|
|
||||||
a mile. The special rate for charitable
|
|
||||||
use of your vehicle to provide relief
|
|
||||||
related to Hurricane Katrina has
|
|
||||||
expired.
|
|
||||||
Elective salary deferrals.
|
|
||||||
The
|
|
||||||
maximum amount you can defer under
|
|
||||||
all plans is generally limited to $15,500
|
|
||||||
($10,500 if you only have SIMPLE
|
|
||||||
plans; $18,500 for section 403(b) plans
|
|
||||||
if you qualify for the 15-year rule). See
|
|
||||||
the instructions for line 8 on page 10.
|
|
||||||
Mailing your return.
|
|
||||||
If you are filing
|
|
||||||
the return for an estate or trust, you will
|
|
||||||
|
|
||||||
use a different address this year. See
|
|
||||||
Where To File on page 4.
|
|
||||||
Domestic production activities
|
|
||||||
deduction.
|
|
||||||
The deduction rate for
|
|
||||||
2007 is increased to 6%.
|
|
||||||
Unreported social security and
|
|
||||||
Medicare tax on wages.
|
|
||||||
If you are
|
|
||||||
an employee and your employer did not
|
|
||||||
withhold social security and Medicare
|
|
||||||
tax, see Form 8919 to figure and report
|
|
||||||
this tax.
|
|
||||||
Refundable credit for prior-year
|
|
||||||
minimum tax.
|
|
||||||
If you have an unused
|
|
||||||
minimum tax credit carryforward from
|
|
||||||
2004, see Form 8801 to find if you can
|
|
||||||
take this credit.
|
|
||||||
Health savings account (HSA)
|
|
||||||
funding distributions.
|
|
||||||
You may be
|
|
||||||
able to elect to exclude from income a
|
|
||||||
distribution made from your IRA to your
|
|
||||||
HSA. See the instructions for lines 16a
|
|
||||||
and 16b beginning on page 12.
|
|
||||||
New recordkeeping requirements for
|
|
||||||
contributions of money.
|
|
||||||
For
|
|
||||||
charitable contributions of money,
|
|
||||||
regardless of the amount, you must
|
|
||||||
maintain as a record of the contribution
|
|
||||||
a bank record (such as a cancelled
|
|
||||||
check) or a written record from the
|
|
||||||
charity. The written record must include
|
|
||||||
the name of the charity, date, and
|
|
||||||
amount of the contribution. See Gifts to
|
|
||||||
U.S. Charities that begins on page 26.
|
|
||||||
Exemption for housing a person
|
|
||||||
displaced by Hurricane Katrina
|
|
||||||
expires.
|
|
||||||
The additional exemption
|
|
||||||
amount for housing a person displaced
|
|
||||||
by Hurricane Katrina does not apply for
|
|
||||||
2007 or later years.
|
|
||||||
Telephone excise tax credit.
|
|
||||||
This
|
|
||||||
credit was available only on your 2006
|
|
||||||
return. If you filed but did not request it
|
|
||||||
on your 2006 return, file Form 1040X
|
|
||||||
using a simplified procedure explained
|
|
||||||
in its instructions to amend your 2006
|
|
||||||
return. If you were not required to file a
|
|
||||||
2006 return, see the 2006 Form
|
|
||||||
1040EZ-T.
|
|
||||||
What’s New for 2008
|
|
||||||
IRA deduction expanded.
|
|
||||||
You may
|
|
||||||
be able to deduct up to $5,000 ($6,000
|
|
||||||
if age 50 or older at the end of the
|
|
||||||
year). You may be able to take an IRA
|
|
||||||
deduction if you were covered by a
|
|
||||||
|
|
||||||
Cat. No. 11368V
|
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,12 +1,12 @@
|
||||||
第 ›Ÿ˜ 号
|
平成 ™— 年 › 月 ™œ 日 金曜日
|
||||||
|
|
||||||
›
|
|
||||||
|
|
||||||
官
|
官
|
||||||
|
|
||||||
報
|
報
|
||||||
|
|
||||||
平成 ™— 年 › 月 ™œ 日 金曜日
|
第 ›Ÿ˜ 号
|
||||||
|
|
||||||
|
›
|
||||||
|
|
||||||
政令第百四十九号
|
政令第百四十九号
|
||||||
道路交通法施行令の一部を改正する政令
|
道路交通法施行令の一部を改正する政令
|
||||||
|
|
|
@ -1,57 +1,22 @@
|
||||||
<?xml version="1.0" encoding="utf-8" ?>
|
<?xml version="1.0" encoding="utf-8" ?>
|
||||||
<pages>
|
<pages>
|
||||||
<page id="1" bbox="0.000,0.000,595.000,842.000" rotate="0">
|
<page id="1" bbox="0.000,0.000,595.000,842.000" rotate="0">
|
||||||
<textbox id="0" bbox="392.500,787.804,457.500,800.144">
|
<textbox id="0" bbox="87.500,787.804,242.500,800.144">
|
||||||
<textline bbox="392.500,787.804,457.500,800.144">
|
|
||||||
<text font="Ryumin-Light" bbox="392.500,787.804,402.500,800.144" size="12.340">第</text>
|
|
||||||
<text> </text>
|
|
||||||
<text font="GMALPM+DFHSMincho-W3G014" bbox="405.000,789.064,415.000,798.234" size="10.000">›</text>
|
|
||||||
<text font="GMALPM+DFHSMincho-W3G014" bbox="415.000,789.064,425.000,798.234" size="10.000">Ÿ</text>
|
|
||||||
<text font="GMALPM+DFHSMincho-W3G014" bbox="425.000,789.064,435.000,798.234" size="10.000">˜</text>
|
|
||||||
<text font="GMALPM+DFHSMincho-W3G014" bbox="435.000,789.064,445.000,798.234" size="10.000"></text>
|
|
||||||
<text> </text>
|
|
||||||
<text font="Ryumin-Light" bbox="447.500,787.804,457.500,800.144" size="12.340">号</text>
|
|
||||||
<text>
|
|
||||||
</text>
|
|
||||||
</textline>
|
|
||||||
</textbox>
|
|
||||||
<textbox id="1" bbox="527.500,789.064,537.500,798.234">
|
|
||||||
<textline bbox="527.500,789.064,537.500,798.234">
|
|
||||||
<text font="GMALPM+DFHSMincho-W3G014" bbox="527.500,789.064,537.500,798.234" size="10.000">›</text>
|
|
||||||
<text>
|
|
||||||
</text>
|
|
||||||
</textline>
|
|
||||||
</textbox>
|
|
||||||
<textbox id="2" bbox="267.500,787.572,279.500,802.380">
|
|
||||||
<textline bbox="267.500,787.572,279.500,802.380">
|
|
||||||
<text font="Ryumin-Light" bbox="267.500,787.572,279.500,802.380" size="14.808">官</text>
|
|
||||||
<text>
|
|
||||||
</text>
|
|
||||||
</textline>
|
|
||||||
</textbox>
|
|
||||||
<textbox id="3" bbox="315.500,787.572,327.500,802.380">
|
|
||||||
<textline bbox="315.500,787.572,327.500,802.380">
|
|
||||||
<text font="Ryumin-Light" bbox="315.500,787.572,327.500,802.380" size="14.808">報</text>
|
|
||||||
<text>
|
|
||||||
</text>
|
|
||||||
</textline>
|
|
||||||
</textbox>
|
|
||||||
<textbox id="4" bbox="87.500,787.804,242.500,800.144">
|
|
||||||
<textline bbox="87.500,787.804,242.500,800.144">
|
<textline bbox="87.500,787.804,242.500,800.144">
|
||||||
<text font="Ryumin-Light" bbox="87.500,787.804,97.500,800.144" size="12.340">平</text>
|
<text font="Ryumin-Light" bbox="87.500,787.804,97.500,800.144" size="12.340">平</text>
|
||||||
<text font="Ryumin-Light" bbox="97.500,787.804,107.500,800.144" size="12.340">成</text>
|
<text font="Ryumin-Light" bbox="97.500,787.804,107.500,800.144" size="12.340">成</text>
|
||||||
<text> </text>
|
<text> </text>
|
||||||
<text font="GMALPM+DFHSMincho-W3G014" bbox="110.000,789.064,120.000,798.234" size="10.000">™</text>
|
<text font="GMALPM+DFHSMincho-W3G014" bbox="110.000,789.064,120.000,798.234" size="9.170">™</text>
|
||||||
<text font="GMALPM+DFHSMincho-W3G014" bbox="120.000,789.064,130.000,798.234" size="10.000">—</text>
|
<text font="GMALPM+DFHSMincho-W3G014" bbox="120.000,789.064,130.000,798.234" size="9.170">—</text>
|
||||||
<text> </text>
|
<text> </text>
|
||||||
<text font="Ryumin-Light" bbox="132.500,787.804,142.500,800.144" size="12.340">年</text>
|
<text font="Ryumin-Light" bbox="132.500,787.804,142.500,800.144" size="12.340">年</text>
|
||||||
<text> </text>
|
<text> </text>
|
||||||
<text font="GMALPM+DFHSMincho-W3G014" bbox="145.000,789.064,155.000,798.234" size="10.000">›</text>
|
<text font="GMALPM+DFHSMincho-W3G014" bbox="145.000,789.064,155.000,798.234" size="9.170">›</text>
|
||||||
<text> </text>
|
<text> </text>
|
||||||
<text font="Ryumin-Light" bbox="157.500,787.804,167.500,800.144" size="12.340">月</text>
|
<text font="Ryumin-Light" bbox="157.500,787.804,167.500,800.144" size="12.340">月</text>
|
||||||
<text> </text>
|
<text> </text>
|
||||||
<text font="GMALPM+DFHSMincho-W3G014" bbox="170.000,789.064,180.000,798.234" size="10.000">™</text>
|
<text font="GMALPM+DFHSMincho-W3G014" bbox="170.000,789.064,180.000,798.234" size="9.170">™</text>
|
||||||
<text font="GMALPM+DFHSMincho-W3G014" bbox="180.000,789.064,190.000,798.234" size="10.000">œ</text>
|
<text font="GMALPM+DFHSMincho-W3G014" bbox="180.000,789.064,190.000,798.234" size="9.170">œ</text>
|
||||||
<text> </text>
|
<text> </text>
|
||||||
<text font="Ryumin-Light" bbox="192.500,787.804,202.500,800.144" size="12.340">日</text>
|
<text font="Ryumin-Light" bbox="192.500,787.804,202.500,800.144" size="12.340">日</text>
|
||||||
<text> </text>
|
<text> </text>
|
||||||
|
@ -62,6 +27,41 @@
|
||||||
</text>
|
</text>
|
||||||
</textline>
|
</textline>
|
||||||
</textbox>
|
</textbox>
|
||||||
|
<textbox id="1" bbox="267.500,787.572,279.500,802.380">
|
||||||
|
<textline bbox="267.500,787.572,279.500,802.380">
|
||||||
|
<text font="Ryumin-Light" bbox="267.500,787.572,279.500,802.380" size="14.808">官</text>
|
||||||
|
<text>
|
||||||
|
</text>
|
||||||
|
</textline>
|
||||||
|
</textbox>
|
||||||
|
<textbox id="2" bbox="315.500,787.572,327.500,802.380">
|
||||||
|
<textline bbox="315.500,787.572,327.500,802.380">
|
||||||
|
<text font="Ryumin-Light" bbox="315.500,787.572,327.500,802.380" size="14.808">報</text>
|
||||||
|
<text>
|
||||||
|
</text>
|
||||||
|
</textline>
|
||||||
|
</textbox>
|
||||||
|
<textbox id="3" bbox="392.500,787.804,457.500,800.144">
|
||||||
|
<textline bbox="392.500,787.804,457.500,800.144">
|
||||||
|
<text font="Ryumin-Light" bbox="392.500,787.804,402.500,800.144" size="12.340">第</text>
|
||||||
|
<text> </text>
|
||||||
|
<text font="GMALPM+DFHSMincho-W3G014" bbox="405.000,789.064,415.000,798.234" size="9.170">›</text>
|
||||||
|
<text font="GMALPM+DFHSMincho-W3G014" bbox="415.000,789.064,425.000,798.234" size="9.170">Ÿ</text>
|
||||||
|
<text font="GMALPM+DFHSMincho-W3G014" bbox="425.000,789.064,435.000,798.234" size="9.170">˜</text>
|
||||||
|
<text font="GMALPM+DFHSMincho-W3G014" bbox="435.000,789.064,445.000,798.234" size="9.170"></text>
|
||||||
|
<text> </text>
|
||||||
|
<text font="Ryumin-Light" bbox="447.500,787.804,457.500,800.144" size="12.340">号</text>
|
||||||
|
<text>
|
||||||
|
</text>
|
||||||
|
</textline>
|
||||||
|
</textbox>
|
||||||
|
<textbox id="4" bbox="527.500,789.064,537.500,798.234">
|
||||||
|
<textline bbox="527.500,789.064,537.500,798.234">
|
||||||
|
<text font="GMALPM+DFHSMincho-W3G014" bbox="527.500,789.064,537.500,798.234" size="9.170">›</text>
|
||||||
|
<text>
|
||||||
|
</text>
|
||||||
|
</textline>
|
||||||
|
</textbox>
|
||||||
<textbox id="5" bbox="420.474,420.036,548.988,781.908" wmode="vertical">
|
<textbox id="5" bbox="420.474,420.036,548.988,781.908" wmode="vertical">
|
||||||
<textline bbox="540.988,715.796,548.988,781.492">
|
<textline bbox="540.988,715.796,548.988,781.492">
|
||||||
<text font="GothicBBB-Medium" bbox="540.988,771.796,548.988,781.492" size="9.696">政</text>
|
<text font="GothicBBB-Medium" bbox="540.988,771.796,548.988,781.492" size="9.696">政</text>
|
||||||
|
@ -3136,16 +3136,16 @@
|
||||||
<layout>
|
<layout>
|
||||||
<textgroup bbox="45.212,52.036,549.776,802.380">
|
<textgroup bbox="45.212,52.036,549.776,802.380">
|
||||||
<textgroup bbox="87.500,787.572,537.500,802.380">
|
<textgroup bbox="87.500,787.572,537.500,802.380">
|
||||||
<textgroup bbox="392.500,787.804,537.500,800.144">
|
|
||||||
<textbox id="0" bbox="392.500,787.804,457.500,800.144" />
|
|
||||||
<textbox id="1" bbox="527.500,789.064,537.500,798.234" />
|
|
||||||
</textgroup>
|
|
||||||
<textgroup bbox="87.500,787.572,327.500,802.380">
|
<textgroup bbox="87.500,787.572,327.500,802.380">
|
||||||
|
<textbox id="0" bbox="87.500,787.804,242.500,800.144" />
|
||||||
<textgroup bbox="267.500,787.572,327.500,802.380">
|
<textgroup bbox="267.500,787.572,327.500,802.380">
|
||||||
<textbox id="2" bbox="267.500,787.572,279.500,802.380" />
|
<textbox id="1" bbox="267.500,787.572,279.500,802.380" />
|
||||||
<textbox id="3" bbox="315.500,787.572,327.500,802.380" />
|
<textbox id="2" bbox="315.500,787.572,327.500,802.380" />
|
||||||
</textgroup>
|
</textgroup>
|
||||||
<textbox id="4" bbox="87.500,787.804,242.500,800.144" />
|
</textgroup>
|
||||||
|
<textgroup bbox="392.500,787.804,537.500,800.144">
|
||||||
|
<textbox id="3" bbox="392.500,787.804,457.500,800.144" />
|
||||||
|
<textbox id="4" bbox="527.500,789.064,537.500,798.234" />
|
||||||
</textgroup>
|
</textgroup>
|
||||||
</textgroup>
|
</textgroup>
|
||||||
<textgroup bbox="45.212,52.036,549.776,781.960">
|
<textgroup bbox="45.212,52.036,549.776,781.960">
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -28,46 +28,37 @@ good results.
|
||||||
1 Background
|
1 Background
|
||||||
|
|
||||||
Every day, a large number of news articles are cre-
|
Every day, a large number of news articles are cre-
|
||||||
ated and reported, many of which are unique.
|
ated and reported, many of which are unique. But
|
||||||
But
|
|
||||||
certain types of events, such as hurricanes or mur-
|
certain types of events, such as hurricanes or mur-
|
||||||
ders, are reported again and again throughout a year.
|
ders, are reported again and again throughout a year.
|
||||||
The goal of Information Extraction, or IE, is to re-
|
The goal of Information Extraction, or IE, is to re-
|
||||||
trieve a certain type of news event from past articles
|
trieve a certain type of news event from past articles
|
||||||
and present the events as a table whose columns are
|
and present the events as a table whose columns are
|
||||||
filled with a name of a person or company, accord-
|
filled with a name of a person or company, accord-
|
||||||
However, existing IE
|
ing to its role in the event. However, existing IE
|
||||||
ing to its role in the event.
|
techniques require a lot of human labor. First, you
|
||||||
techniques require a lot of human labor.
|
|
||||||
First, you
|
|
||||||
have to specify the type of information you want and
|
have to specify the type of information you want and
|
||||||
collect articles that include this information.
|
collect articles that include this information. Then,
|
||||||
Then,
|
|
||||||
you have to analyze the articles and manually craft
|
you have to analyze the articles and manually craft
|
||||||
a set of patterns to capture these events. Most exist-
|
a set of patterns to capture these events. Most exist-
|
||||||
ing IE research focuses on reducing this burden by
|
ing IE research focuses on reducing this burden by
|
||||||
helping people create such patterns.
|
helping people create such patterns. But each time
|
||||||
But each time
|
|
||||||
you want to extract a different kind of information,
|
you want to extract a different kind of information,
|
||||||
you need to repeat the whole process: specify arti-
|
you need to repeat the whole process: specify arti-
|
||||||
|
|
||||||
cles and adjust its patterns, either manually or semi-
|
cles and adjust its patterns, either manually or semi-
|
||||||
automatically.
|
automatically. There is a bit of a dangerous pitfall
|
||||||
There is a bit of a dangerous pitfall
|
|
||||||
here. First, it is hard to estimate how good the sys-
|
here. First, it is hard to estimate how good the sys-
|
||||||
tem can be after months of work. Furthermore, you
|
tem can be after months of work. Furthermore, you
|
||||||
might not know if the task is even doable in the first
|
might not know if the task is even doable in the first
|
||||||
place.
|
place. Knowing what kind of information is easily
|
||||||
Knowing what kind of information is easily
|
|
||||||
obtained in advance would help reduce this risk.
|
obtained in advance would help reduce this risk.
|
||||||
An IE task can be defined as finding a relation
|
An IE task can be defined as finding a relation
|
||||||
among several entities involved in a certain type of
|
among several entities involved in a certain type of
|
||||||
event.
|
event. For example, in the MUC-6 management
|
||||||
For example, in the MUC-6 management
|
|
||||||
succession scenario, one seeks a relation between
|
succession scenario, one seeks a relation between
|
||||||
COMPANY, PERSON and POST involved with hir-
|
COMPANY, PERSON and POST involved with hir-
|
||||||
ing/firing events.
|
ing/firing events. For each row of an extracted ta-
|
||||||
For each row of an extracted ta-
|
|
||||||
ble, you can always read it as “COMPANY hired
|
ble, you can always read it as “COMPANY hired
|
||||||
(or fired) PERSON for POST.” The relation between
|
(or fired) PERSON for POST.” The relation between
|
||||||
these entities is retained throughout the table. There
|
these entities is retained throughout the table. There
|
||||||
|
|
|
@ -871,7 +871,7 @@
|
||||||
<text>
|
<text>
|
||||||
</text>
|
</text>
|
||||||
</textline>
|
</textline>
|
||||||
<textline bbox="72.000,336.427,276.805,349.577">
|
<textline bbox="72.000,336.427,298.882,349.577">
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="72.000,336.427,76.845,349.577" size="13.150">a</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="72.000,336.427,76.845,349.577" size="13.150">a</text>
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="76.845,336.427,79.879,349.577" size="13.150">t</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="76.845,336.427,79.879,349.577" size="13.150">t</text>
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="79.879,336.427,84.725,349.577" size="13.150">e</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="79.879,336.427,84.725,349.577" size="13.150">e</text>
|
||||||
|
@ -916,10 +916,7 @@
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="263.775,336.427,269.232,349.577" size="13.150">u</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="263.775,336.427,269.232,349.577" size="13.150">u</text>
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="269.232,336.427,274.077,349.577" size="13.150">e</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="269.232,336.427,274.077,349.577" size="13.150">e</text>
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="274.077,336.427,276.805,349.577" size="13.150">.</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="274.077,336.427,276.805,349.577" size="13.150">.</text>
|
||||||
<text>
|
<text> </text>
|
||||||
</text>
|
|
||||||
</textline>
|
|
||||||
<textline bbox="283.113,336.427,298.882,349.577">
|
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="283.113,336.427,290.392,349.577" size="13.150">B</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="283.113,336.427,290.392,349.577" size="13.150">B</text>
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="290.392,336.427,295.849,349.577" size="13.150">u</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="290.392,336.427,295.849,349.577" size="13.150">u</text>
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="295.849,336.427,298.882,349.577" size="13.150">t</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="295.849,336.427,298.882,349.577" size="13.150">t</text>
|
||||||
|
@ -1260,31 +1257,7 @@
|
||||||
<text>
|
<text>
|
||||||
</text>
|
</text>
|
||||||
</textline>
|
</textline>
|
||||||
<textline bbox="203.557,241.580,298.882,254.730">
|
<textline bbox="72.000,241.580,298.882,254.730">
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="203.557,241.580,211.436,254.730" size="13.150">H</text>
|
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="211.436,241.580,216.893,254.730" size="13.150">o</text>
|
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="216.631,241.580,224.510,254.730" size="13.150">w</text>
|
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="224.510,241.580,229.355,254.730" size="13.150">e</text>
|
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="229.083,241.580,234.539,254.730" size="13.150">v</text>
|
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="234.375,241.580,239.221,254.730" size="13.150">e</text>
|
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="239.221,241.580,242.855,254.730" size="13.150">r</text>
|
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="242.418,241.580,245.147,254.730" size="13.150">,</text>
|
|
||||||
<text> </text>
|
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="249.861,241.580,254.706,254.730" size="13.150">e</text>
|
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="254.554,241.580,260.010,254.730" size="13.150">x</text>
|
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="260.010,241.580,263.044,254.730" size="13.150">i</text>
|
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="263.044,241.580,267.289,254.730" size="13.150">s</text>
|
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="267.289,241.580,270.323,254.730" size="13.150">t</text>
|
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="270.323,241.580,273.357,254.730" size="13.150">i</text>
|
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="273.357,241.580,278.813,254.730" size="13.150">n</text>
|
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="278.813,241.580,284.270,254.730" size="13.150">g</text>
|
|
||||||
<text> </text>
|
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="288.580,241.580,292.215,254.730" size="13.150">I</text>
|
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="292.215,241.580,298.882,254.730" size="13.150">E</text>
|
|
||||||
<text>
|
|
||||||
</text>
|
|
||||||
</textline>
|
|
||||||
<textline bbox="72.000,241.580,195.405,254.730">
|
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="72.000,241.580,75.034,254.730" size="13.150">i</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="72.000,241.580,75.034,254.730" size="13.150">i</text>
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="75.034,241.580,80.490,254.730" size="13.150">n</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="75.034,241.580,80.490,254.730" size="13.150">n</text>
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="80.490,241.580,85.947,254.730" size="13.150">g</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="80.490,241.580,85.947,254.730" size="13.150">g</text>
|
||||||
|
@ -1314,10 +1287,31 @@
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="184.186,241.580,189.643,254.730" size="13.150">n</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="184.186,241.580,189.643,254.730" size="13.150">n</text>
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="189.643,241.580,192.677,254.730" size="13.150">t</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="189.643,241.580,192.677,254.730" size="13.150">t</text>
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="192.677,241.580,195.405,254.730" size="13.150">.</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="192.677,241.580,195.405,254.730" size="13.150">.</text>
|
||||||
|
<text> </text>
|
||||||
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="203.557,241.580,211.436,254.730" size="13.150">H</text>
|
||||||
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="211.436,241.580,216.893,254.730" size="13.150">o</text>
|
||||||
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="216.631,241.580,224.510,254.730" size="13.150">w</text>
|
||||||
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="224.510,241.580,229.355,254.730" size="13.150">e</text>
|
||||||
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="229.083,241.580,234.539,254.730" size="13.150">v</text>
|
||||||
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="234.375,241.580,239.221,254.730" size="13.150">e</text>
|
||||||
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="239.221,241.580,242.855,254.730" size="13.150">r</text>
|
||||||
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="242.418,241.580,245.147,254.730" size="13.150">,</text>
|
||||||
|
<text> </text>
|
||||||
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="249.861,241.580,254.706,254.730" size="13.150">e</text>
|
||||||
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="254.554,241.580,260.010,254.730" size="13.150">x</text>
|
||||||
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="260.010,241.580,263.044,254.730" size="13.150">i</text>
|
||||||
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="263.044,241.580,267.289,254.730" size="13.150">s</text>
|
||||||
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="267.289,241.580,270.323,254.730" size="13.150">t</text>
|
||||||
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="270.323,241.580,273.357,254.730" size="13.150">i</text>
|
||||||
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="273.357,241.580,278.813,254.730" size="13.150">n</text>
|
||||||
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="278.813,241.580,284.270,254.730" size="13.150">g</text>
|
||||||
|
<text> </text>
|
||||||
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="288.580,241.580,292.215,254.730" size="13.150">I</text>
|
||||||
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="292.215,241.580,298.882,254.730" size="13.150">E</text>
|
||||||
<text>
|
<text>
|
||||||
</text>
|
</text>
|
||||||
</textline>
|
</textline>
|
||||||
<textline bbox="72.000,228.026,250.352,241.177">
|
<textline bbox="72.000,228.026,298.882,241.177">
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="72.000,228.026,75.034,241.177" size="13.150">t</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="72.000,228.026,75.034,241.177" size="13.150">t</text>
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="75.034,228.026,79.879,241.177" size="13.150">e</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="75.034,228.026,79.879,241.177" size="13.150">e</text>
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="79.879,228.026,84.725,241.177" size="13.150">c</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="79.879,228.026,84.725,241.177" size="13.150">c</text>
|
||||||
|
@ -1358,10 +1352,7 @@
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="239.123,228.026,244.579,241.177" size="13.150">o</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="239.123,228.026,244.579,241.177" size="13.150">o</text>
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="244.579,228.026,248.213,241.177" size="13.150">r</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="244.579,228.026,248.213,241.177" size="13.150">r</text>
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="247.624,228.026,250.352,241.177" size="13.150">.</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="247.624,228.026,250.352,241.177" size="13.150">.</text>
|
||||||
<text>
|
<text> </text>
|
||||||
</text>
|
|
||||||
</textline>
|
|
||||||
<textline bbox="256.060,228.026,298.882,241.177">
|
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="256.060,228.026,262.127,241.177" size="13.150">F</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="256.060,228.026,262.127,241.177" size="13.150">F</text>
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="262.127,228.026,265.161,241.177" size="13.150">i</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="262.127,228.026,265.161,241.177" size="13.150">i</text>
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="265.161,228.026,268.795,241.177" size="13.150">r</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="265.161,228.026,268.795,241.177" size="13.150">r</text>
|
||||||
|
@ -1431,7 +1422,7 @@
|
||||||
<text>
|
<text>
|
||||||
</text>
|
</text>
|
||||||
</textline>
|
</textline>
|
||||||
<textline bbox="72.000,200.929,268.249,214.079">
|
<textline bbox="72.000,200.929,298.882,214.079">
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="72.000,200.929,76.845,214.079" size="13.150">c</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="72.000,200.929,76.845,214.079" size="13.150">c</text>
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="76.845,200.929,82.302,214.079" size="13.150">o</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="76.845,200.929,82.302,214.079" size="13.150">o</text>
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="82.302,200.929,85.336,214.079" size="13.150">l</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="82.302,200.929,85.336,214.079" size="13.150">l</text>
|
||||||
|
@ -1479,10 +1470,7 @@
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="254.608,200.929,260.065,214.079" size="13.150">o</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="254.608,200.929,260.065,214.079" size="13.150">o</text>
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="260.065,200.929,265.521,214.079" size="13.150">n</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="260.065,200.929,265.521,214.079" size="13.150">n</text>
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="265.521,200.929,268.249,214.079" size="13.150">.</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="265.521,200.929,268.249,214.079" size="13.150">.</text>
|
||||||
<text>
|
<text> </text>
|
||||||
</text>
|
|
||||||
</textline>
|
|
||||||
<textline bbox="273.728,200.929,298.882,214.079">
|
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="273.728,200.929,280.396,214.079" size="13.150">T</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="273.728,200.929,280.396,214.079" size="13.150">T</text>
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="280.396,200.929,285.852,214.079" size="13.150">h</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="280.396,200.929,285.852,214.079" size="13.150">h</text>
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="285.852,200.929,290.698,214.079" size="13.150">e</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="285.852,200.929,290.698,214.079" size="13.150">e</text>
|
||||||
|
@ -1658,7 +1646,7 @@
|
||||||
<text>
|
<text>
|
||||||
</text>
|
</text>
|
||||||
</textline>
|
</textline>
|
||||||
<textline bbox="72.000,146.734,231.091,159.884">
|
<textline bbox="72.000,146.734,298.882,159.884">
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="72.000,146.734,77.457,159.884" size="13.150">h</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="72.000,146.734,77.457,159.884" size="13.150">h</text>
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="77.457,146.734,82.302,159.884" size="13.150">e</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="77.457,146.734,82.302,159.884" size="13.150">e</text>
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="82.302,146.734,85.336,159.884" size="13.150">l</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="82.302,146.734,85.336,159.884" size="13.150">l</text>
|
||||||
|
@ -1695,10 +1683,7 @@
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="218.661,146.734,224.117,159.884" size="13.150">n</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="218.661,146.734,224.117,159.884" size="13.150">n</text>
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="224.117,146.734,228.362,159.884" size="13.150">s</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="224.117,146.734,228.362,159.884" size="13.150">s</text>
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="228.362,146.734,231.091,159.884" size="13.150">.</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="228.362,146.734,231.091,159.884" size="13.150">.</text>
|
||||||
<text>
|
<text> </text>
|
||||||
</text>
|
|
||||||
</textline>
|
|
||||||
<textline bbox="236.743,146.734,298.882,159.884">
|
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="236.743,146.734,244.022,159.884" size="13.150">B</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="236.743,146.734,244.022,159.884" size="13.150">B</text>
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="244.022,146.734,249.479,159.884" size="13.150">u</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="244.022,146.734,249.479,159.884" size="13.150">u</text>
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="249.479,146.734,252.513,159.884" size="13.150">t</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="249.479,146.734,252.513,159.884" size="13.150">t</text>
|
||||||
|
@ -1886,7 +1871,7 @@
|
||||||
<text>
|
<text>
|
||||||
</text>
|
</text>
|
||||||
</textline>
|
</textline>
|
||||||
<textline bbox="313.198,581.125,374.638,594.275">
|
<textline bbox="313.198,581.125,540.091,594.275">
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="313.198,581.125,318.043,594.275" size="13.150">a</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="313.198,581.125,318.043,594.275" size="13.150">a</text>
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="318.043,581.125,323.499,594.275" size="13.150">u</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="318.043,581.125,323.499,594.275" size="13.150">u</text>
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="323.499,581.125,326.533,594.275" size="13.150">t</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="323.499,581.125,326.533,594.275" size="13.150">t</text>
|
||||||
|
@ -1901,10 +1886,7 @@
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="364.118,581.125,367.152,594.275" size="13.150">l</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="364.118,581.125,367.152,594.275" size="13.150">l</text>
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="367.152,581.125,372.608,594.275" size="13.150">y</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="367.152,581.125,372.608,594.275" size="13.150">y</text>
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="371.910,581.125,374.638,594.275" size="13.150">.</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="371.910,581.125,374.638,594.275" size="13.150">.</text>
|
||||||
<text>
|
<text> </text>
|
||||||
</text>
|
|
||||||
</textline>
|
|
||||||
<textline bbox="380.749,581.125,540.091,594.275">
|
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="380.749,581.125,387.417,594.275" size="13.150">T</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="380.749,581.125,387.417,594.275" size="13.150">T</text>
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="387.417,581.125,392.874,594.275" size="13.150">h</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="387.417,581.125,392.874,594.275" size="13.150">h</text>
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="392.874,581.125,397.719,594.275" size="13.150">e</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="392.874,581.125,397.719,594.275" size="13.150">e</text>
|
||||||
|
@ -2112,17 +2094,14 @@
|
||||||
<text>
|
<text>
|
||||||
</text>
|
</text>
|
||||||
</textline>
|
</textline>
|
||||||
<textline bbox="313.198,526.930,338.952,540.080">
|
<textline bbox="313.198,526.930,540.080,540.080">
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="313.198,526.930,318.654,540.080" size="13.150">p</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="313.198,526.930,318.654,540.080" size="13.150">p</text>
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="318.654,526.930,321.688,540.080" size="13.150">l</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="318.654,526.930,321.688,540.080" size="13.150">l</text>
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="321.688,526.930,326.533,540.080" size="13.150">a</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="321.688,526.930,326.533,540.080" size="13.150">a</text>
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="326.533,526.930,331.379,540.080" size="13.150">c</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="326.533,526.930,331.379,540.080" size="13.150">c</text>
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="331.379,526.930,336.224,540.080" size="13.150">e</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="331.379,526.930,336.224,540.080" size="13.150">e</text>
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="336.224,526.930,338.952,540.080" size="13.150">.</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="336.224,526.930,338.952,540.080" size="13.150">.</text>
|
||||||
<text>
|
<text> </text>
|
||||||
</text>
|
|
||||||
</textline>
|
|
||||||
<textline bbox="344.682,526.930,540.080,540.080">
|
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="344.682,526.930,352.561,540.080" size="13.150">K</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="344.682,526.930,352.561,540.080" size="13.150">K</text>
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="352.561,526.930,358.017,540.080" size="13.150">n</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="352.561,526.930,358.017,540.080" size="13.150">n</text>
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="358.017,526.930,363.474,540.080" size="13.150">o</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="358.017,526.930,363.474,540.080" size="13.150">o</text>
|
||||||
|
@ -2325,17 +2304,14 @@
|
||||||
<text>
|
<text>
|
||||||
</text>
|
</text>
|
||||||
</textline>
|
</textline>
|
||||||
<textline bbox="313.198,472.325,339.138,485.475">
|
<textline bbox="313.198,472.325,540.080,485.475">
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="313.198,472.325,318.043,485.475" size="13.150">e</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="313.198,472.325,318.043,485.475" size="13.150">e</text>
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="317.781,472.325,323.238,485.475" size="13.150">v</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="317.781,472.325,323.238,485.475" size="13.150">v</text>
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="323.074,472.325,327.919,485.475" size="13.150">e</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="323.074,472.325,327.919,485.475" size="13.150">e</text>
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="327.919,472.325,333.376,485.475" size="13.150">n</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="327.919,472.325,333.376,485.475" size="13.150">n</text>
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="333.376,472.325,336.410,485.475" size="13.150">t</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="333.376,472.325,336.410,485.475" size="13.150">t</text>
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="336.410,472.325,339.138,485.475" size="13.150">.</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="336.410,472.325,339.138,485.475" size="13.150">.</text>
|
||||||
<text>
|
<text> </text>
|
||||||
</text>
|
|
||||||
</textline>
|
|
||||||
<textline bbox="349.003,472.325,540.080,485.475">
|
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="349.003,472.325,355.071,485.475" size="13.150">F</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="349.003,472.325,355.071,485.475" size="13.150">F</text>
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="354.918,472.325,360.375,485.475" size="13.150">o</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="354.918,472.325,360.375,485.475" size="13.150">o</text>
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="360.375,472.325,364.009,485.475" size="13.150">r</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="360.375,472.325,364.009,485.475" size="13.150">r</text>
|
||||||
|
@ -2475,7 +2451,7 @@
|
||||||
<text>
|
<text>
|
||||||
</text>
|
</text>
|
||||||
</textline>
|
</textline>
|
||||||
<textline bbox="313.198,431.673,387.854,444.824">
|
<textline bbox="313.198,431.673,540.080,444.824">
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="313.198,431.673,316.231,444.824" size="13.150">i</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="313.198,431.673,316.231,444.824" size="13.150">i</text>
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="316.231,431.673,321.688,444.824" size="13.150">n</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="316.231,431.673,321.688,444.824" size="13.150">n</text>
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="321.688,431.673,327.144,444.824" size="13.150">g</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="321.688,431.673,327.144,444.824" size="13.150">g</text>
|
||||||
|
@ -2493,10 +2469,7 @@
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="377.846,431.673,380.880,444.824" size="13.150">t</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="377.846,431.673,380.880,444.824" size="13.150">t</text>
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="380.880,431.673,385.125,444.824" size="13.150">s</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="380.880,431.673,385.125,444.824" size="13.150">s</text>
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="385.125,431.673,387.854,444.824" size="13.150">.</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="385.125,431.673,387.854,444.824" size="13.150">.</text>
|
||||||
<text>
|
<text> </text>
|
||||||
</text>
|
|
||||||
</textline>
|
|
||||||
<textline bbox="394.576,431.673,540.080,444.824">
|
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="394.576,431.673,400.644,444.824" size="13.150">F</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="394.576,431.673,400.644,444.824" size="13.150">F</text>
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="400.491,431.673,405.948,444.824" size="13.150">o</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="400.491,431.673,405.948,444.824" size="13.150">o</text>
|
||||||
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="405.948,431.673,409.582,444.824" size="13.150">r</text>
|
<text font="QTLIUY+NimbusRomNo9L-Regu" bbox="405.948,431.673,409.582,444.824" size="13.150">r</text>
|
||||||
|
|
|
@ -3,92 +3,13 @@
|
||||||
</head><body>
|
</head><body>
|
||||||
<span style="position:absolute; border: gray 1px solid; left:0px; top:50px; width:800px; height:600px;"></span>
|
<span style="position:absolute; border: gray 1px solid; left:0px; top:50px; width:800px; height:600px;"></span>
|
||||||
<div style="position:absolute; top:50px;"><a name="1">Page 1</a></div>
|
<div style="position:absolute; top:50px;"><a name="1">Page 1</a></div>
|
||||||
<span style="position:absolute; border: cyan 1px solid; left:62px; top:126px; width:672px; height:157px;"></span>
|
<div style="position:absolute; border: textbox 1px solid; writing-mode:lr-tb; left:62px; top:126px; width:672px; height:157px;"><span style="font-family: DAFPJF+HiraKakuPro-W6; font-size:60px">コンパラブルな新聞記事からの
|
||||||
<span style="position:absolute; border: magenta 1px solid; left:62px; top:126px; width:672px; height:85px;"></span>
|
<br>固有表現の発見
|
||||||
<span style="position:absolute; left:62px; top:126px; font-size:85px;">コ</span>
|
<br></span></div><div style="position:absolute; border: textbox 1px solid; writing-mode:lr-tb; left:263px; top:374px; width:468px; height:212px;"><span style="font-family: DAFPJF+HiraKakuPro-W6; font-size:45px">新山 祐介
|
||||||
<span style="position:absolute; left:110px; top:126px; font-size:85px;">ン</span>
|
<br>関根 聡
|
||||||
<span style="position:absolute; left:158px; top:126px; font-size:85px;">パ</span>
|
<br></span><span style="font-family: DAFPJF+HiraKakuPro-W6; font-size:35px">Computer Science Department
|
||||||
<span style="position:absolute; left:206px; top:126px; font-size:85px;">ラ</span>
|
<br>New York University
|
||||||
<span style="position:absolute; left:254px; top:126px; font-size:85px;">ブ</span>
|
<br></span></div><span style="position:absolute; border: black 1px solid; left:0px; top:50px; width:800px; height:600px;"></span>
|
||||||
<span style="position:absolute; left:302px; top:126px; font-size:85px;">ル</span>
|
|
||||||
<span style="position:absolute; left:350px; top:126px; font-size:85px;">な</span>
|
|
||||||
<span style="position:absolute; left:398px; top:126px; font-size:85px;">新</span>
|
|
||||||
<span style="position:absolute; left:446px; top:126px; font-size:85px;">聞</span>
|
|
||||||
<span style="position:absolute; left:494px; top:126px; font-size:85px;">記</span>
|
|
||||||
<span style="position:absolute; left:542px; top:126px; font-size:85px;">事</span>
|
|
||||||
<span style="position:absolute; left:590px; top:126px; font-size:85px;">か</span>
|
|
||||||
<span style="position:absolute; left:638px; top:126px; font-size:85px;">ら</span>
|
|
||||||
<span style="position:absolute; left:686px; top:126px; font-size:85px;">の</span>
|
|
||||||
<span style="position:absolute; border: magenta 1px solid; left:62px; top:198px; width:336px; height:85px;"></span>
|
|
||||||
<span style="position:absolute; left:62px; top:198px; font-size:85px;">固</span>
|
|
||||||
<span style="position:absolute; left:110px; top:198px; font-size:85px;">有</span>
|
|
||||||
<span style="position:absolute; left:158px; top:198px; font-size:85px;">表</span>
|
|
||||||
<span style="position:absolute; left:206px; top:198px; font-size:85px;">現</span>
|
|
||||||
<span style="position:absolute; left:254px; top:198px; font-size:85px;">の</span>
|
|
||||||
<span style="position:absolute; left:302px; top:198px; font-size:85px;">発</span>
|
|
||||||
<span style="position:absolute; left:350px; top:198px; font-size:85px;">見</span>
|
|
||||||
<span style="position:absolute; border: cyan 1px solid; left:263px; top:374px; width:468px; height:212px;"></span>
|
|
||||||
<span style="position:absolute; border: magenta 1px solid; left:576px; top:374px; width:155px; height:64px;"></span>
|
|
||||||
<span style="position:absolute; left:576px; top:374px; font-size:64px;">新</span>
|
|
||||||
<span style="position:absolute; left:612px; top:374px; font-size:64px;">山</span>
|
|
||||||
<span style="position:absolute; left:648px; top:374px; font-size:64px;"> </span>
|
|
||||||
<span style="position:absolute; left:660px; top:374px; font-size:64px;">祐</span>
|
|
||||||
<span style="position:absolute; left:696px; top:374px; font-size:64px;">介</span>
|
|
||||||
<span style="position:absolute; border: magenta 1px solid; left:612px; top:430px; width:119px; height:64px;"></span>
|
|
||||||
<span style="position:absolute; left:612px; top:430px; font-size:64px;">関</span>
|
|
||||||
<span style="position:absolute; left:648px; top:430px; font-size:64px;">根</span>
|
|
||||||
<span style="position:absolute; left:684px; top:430px; font-size:64px;"> </span>
|
|
||||||
<span style="position:absolute; left:696px; top:430px; font-size:64px;">聡</span>
|
|
||||||
<span style="position:absolute; border: magenta 1px solid; left:263px; top:493px; width:468px; height:50px;"></span>
|
|
||||||
<span style="position:absolute; left:263px; top:493px; font-size:50px;">C</span>
|
|
||||||
<span style="position:absolute; left:285px; top:493px; font-size:50px;">o</span>
|
|
||||||
<span style="position:absolute; left:304px; top:493px; font-size:50px;">m</span>
|
|
||||||
<span style="position:absolute; left:332px; top:493px; font-size:50px;">p</span>
|
|
||||||
<span style="position:absolute; left:352px; top:493px; font-size:50px;">u</span>
|
|
||||||
<span style="position:absolute; left:371px; top:493px; font-size:50px;">t</span>
|
|
||||||
<span style="position:absolute; left:383px; top:493px; font-size:50px;">e</span>
|
|
||||||
<span style="position:absolute; left:401px; top:493px; font-size:50px;">r</span>
|
|
||||||
<span style="position:absolute; left:415px; top:493px; font-size:50px;"> </span>
|
|
||||||
<span style="position:absolute; left:424px; top:493px; font-size:50px;">S</span>
|
|
||||||
<span style="position:absolute; left:444px; top:493px; font-size:50px;">c</span>
|
|
||||||
<span style="position:absolute; left:462px; top:493px; font-size:50px;">i</span>
|
|
||||||
<span style="position:absolute; left:469px; top:493px; font-size:50px;">e</span>
|
|
||||||
<span style="position:absolute; left:487px; top:493px; font-size:50px;">n</span>
|
|
||||||
<span style="position:absolute; left:506px; top:493px; font-size:50px;">c</span>
|
|
||||||
<span style="position:absolute; left:523px; top:493px; font-size:50px;">e</span>
|
|
||||||
<span style="position:absolute; left:541px; top:493px; font-size:50px;"> </span>
|
|
||||||
<span style="position:absolute; left:550px; top:493px; font-size:50px;">D</span>
|
|
||||||
<span style="position:absolute; left:573px; top:493px; font-size:50px;">e</span>
|
|
||||||
<span style="position:absolute; left:591px; top:493px; font-size:50px;">p</span>
|
|
||||||
<span style="position:absolute; left:611px; top:493px; font-size:50px;">a</span>
|
|
||||||
<span style="position:absolute; left:628px; top:493px; font-size:50px;">r</span>
|
|
||||||
<span style="position:absolute; left:642px; top:493px; font-size:50px;">t</span>
|
|
||||||
<span style="position:absolute; left:654px; top:493px; font-size:50px;">m</span>
|
|
||||||
<span style="position:absolute; left:683px; top:493px; font-size:50px;">e</span>
|
|
||||||
<span style="position:absolute; left:701px; top:493px; font-size:50px;">n</span>
|
|
||||||
<span style="position:absolute; left:719px; top:493px; font-size:50px;">t</span>
|
|
||||||
<span style="position:absolute; border: magenta 1px solid; left:424px; top:537px; width:307px; height:50px;"></span>
|
|
||||||
<span style="position:absolute; left:424px; top:537px; font-size:50px;">N</span>
|
|
||||||
<span style="position:absolute; left:447px; top:537px; font-size:50px;">e</span>
|
|
||||||
<span style="position:absolute; left:464px; top:537px; font-size:50px;">w</span>
|
|
||||||
<span style="position:absolute; left:488px; top:537px; font-size:50px;"> </span>
|
|
||||||
<span style="position:absolute; left:498px; top:537px; font-size:50px;">Y</span>
|
|
||||||
<span style="position:absolute; left:519px; top:537px; font-size:50px;">o</span>
|
|
||||||
<span style="position:absolute; left:538px; top:537px; font-size:50px;">r</span>
|
|
||||||
<span style="position:absolute; left:551px; top:537px; font-size:50px;">k</span>
|
|
||||||
<span style="position:absolute; left:570px; top:537px; font-size:50px;"> </span>
|
|
||||||
<span style="position:absolute; left:579px; top:537px; font-size:50px;">U</span>
|
|
||||||
<span style="position:absolute; left:602px; top:537px; font-size:50px;">n</span>
|
|
||||||
<span style="position:absolute; left:621px; top:537px; font-size:50px;">i</span>
|
|
||||||
<span style="position:absolute; left:629px; top:537px; font-size:50px;">v</span>
|
|
||||||
<span style="position:absolute; left:646px; top:537px; font-size:50px;">e</span>
|
|
||||||
<span style="position:absolute; left:664px; top:537px; font-size:50px;">r</span>
|
|
||||||
<span style="position:absolute; left:678px; top:537px; font-size:50px;">s</span>
|
|
||||||
<span style="position:absolute; left:694px; top:537px; font-size:50px;">i</span>
|
|
||||||
<span style="position:absolute; left:702px; top:537px; font-size:50px;">t</span>
|
|
||||||
<span style="position:absolute; left:714px; top:537px; font-size:50px;">y</span>
|
|
||||||
<span style="position:absolute; border: black 1px solid; left:0px; top:50px; width:800px; height:600px;"></span>
|
|
||||||
<span style="position:absolute; border: black 1px solid; left:50px; top:308px; width:510px; height:0px;"></span>
|
<span style="position:absolute; border: black 1px solid; left:50px; top:308px; width:510px; height:0px;"></span>
|
||||||
<span style="position:absolute; border: yellow 1px solid; left:25px; top:587px; width:41px; height:40px;"></span>
|
|
||||||
<div style="position:absolute; top:0px;">Page: <a href="#1">1</a></div>
|
<div style="position:absolute; top:0px;">Page: <a href="#1">1</a></div>
|
||||||
</body></html>
|
</body></html>
|
||||||
|
|
|
@ -3,65 +3,13 @@
|
||||||
</head><body>
|
</head><body>
|
||||||
<span style="position:absolute; border: gray 1px solid; left:0px; top:50px; width:612px; height:792px;"></span>
|
<span style="position:absolute; border: gray 1px solid; left:0px; top:50px; width:612px; height:792px;"></span>
|
||||||
<div style="position:absolute; top:50px;"><a name="1">Page 1</a></div>
|
<div style="position:absolute; top:50px;"><a name="1">Page 1</a></div>
|
||||||
<span style="position:absolute; border: cyan 1px solid; left:100px; top:119px; width:61px; height:27px;"></span>
|
<div style="position:absolute; border: textbox 1px solid; writing-mode:lr-tb; left:100px; top:119px; width:61px; height:27px;"><span style="font-family: Helvetica; font-size:19px">Hello
|
||||||
<span style="position:absolute; border: magenta 1px solid; left:100px; top:119px; width:61px; height:27px;"></span>
|
<br></span></div><div style="position:absolute; border: textbox 1px solid; writing-mode:lr-tb; left:261px; top:119px; width:62px; height:27px;"><span style="font-family: Helvetica; font-size:19px">World
|
||||||
<span style="position:absolute; left:100px; top:119px; font-size:27px;">H</span>
|
<br></span></div><div style="position:absolute; border: textbox 1px solid; writing-mode:lr-tb; left:100px; top:219px; width:61px; height:27px;"><span style="font-family: Helvetica; font-size:19px">Hello
|
||||||
<span style="position:absolute; left:117px; top:119px; font-size:27px;">e</span>
|
<br></span></div><div style="position:absolute; border: textbox 1px solid; writing-mode:lr-tb; left:261px; top:219px; width:62px; height:27px;"><span style="font-family: Helvetica; font-size:19px">World
|
||||||
<span style="position:absolute; left:130px; top:119px; font-size:27px;">l</span>
|
<br></span></div><div style="position:absolute; border: textbox 1px solid; writing-mode:lr-tb; left:100px; top:319px; width:111px; height:27px;"><span style="font-family: Helvetica; font-size:19px">H e l l o
|
||||||
<span style="position:absolute; left:136px; top:119px; font-size:27px;">l</span>
|
<br></span></div><div style="position:absolute; border: textbox 1px solid; writing-mode:lr-tb; left:321px; top:319px; width:102px; height:27px;"><span style="font-family: Helvetica; font-size:19px">W o r l d
|
||||||
<span style="position:absolute; left:141px; top:119px; font-size:27px;">o</span>
|
<br></span></div><div style="position:absolute; border: textbox 1px solid; writing-mode:lr-tb; left:100px; top:419px; width:111px; height:27px;"><span style="font-family: Helvetica; font-size:19px">H e l l o
|
||||||
<span style="position:absolute; left:154px; top:119px; font-size:27px;"> </span>
|
<br></span></div><div style="position:absolute; border: textbox 1px solid; writing-mode:lr-tb; left:321px; top:419px; width:102px; height:27px;"><span style="font-family: Helvetica; font-size:19px">W o r l d
|
||||||
<span style="position:absolute; border: cyan 1px solid; left:261px; top:119px; width:62px; height:27px;"></span>
|
<br></span></div><div style="position:absolute; top:0px;">Page: <a href="#1">1</a></div>
|
||||||
<span style="position:absolute; border: magenta 1px solid; left:261px; top:119px; width:62px; height:27px;"></span>
|
|
||||||
<span style="position:absolute; left:261px; top:119px; font-size:27px;">W</span>
|
|
||||||
<span style="position:absolute; left:283px; top:119px; font-size:27px;">o</span>
|
|
||||||
<span style="position:absolute; left:297px; top:119px; font-size:27px;">r</span>
|
|
||||||
<span style="position:absolute; left:305px; top:119px; font-size:27px;">l</span>
|
|
||||||
<span style="position:absolute; left:310px; top:119px; font-size:27px;">d</span>
|
|
||||||
<span style="position:absolute; border: cyan 1px solid; left:100px; top:219px; width:61px; height:27px;"></span>
|
|
||||||
<span style="position:absolute; border: magenta 1px solid; left:100px; top:219px; width:61px; height:27px;"></span>
|
|
||||||
<span style="position:absolute; left:100px; top:219px; font-size:27px;">H</span>
|
|
||||||
<span style="position:absolute; left:117px; top:219px; font-size:27px;">e</span>
|
|
||||||
<span style="position:absolute; left:130px; top:219px; font-size:27px;">l</span>
|
|
||||||
<span style="position:absolute; left:136px; top:219px; font-size:27px;">l</span>
|
|
||||||
<span style="position:absolute; left:141px; top:219px; font-size:27px;">o</span>
|
|
||||||
<span style="position:absolute; left:154px; top:219px; font-size:27px;"> </span>
|
|
||||||
<span style="position:absolute; border: cyan 1px solid; left:261px; top:219px; width:62px; height:27px;"></span>
|
|
||||||
<span style="position:absolute; border: magenta 1px solid; left:261px; top:219px; width:62px; height:27px;"></span>
|
|
||||||
<span style="position:absolute; left:261px; top:219px; font-size:27px;">W</span>
|
|
||||||
<span style="position:absolute; left:284px; top:219px; font-size:27px;">o</span>
|
|
||||||
<span style="position:absolute; left:297px; top:219px; font-size:27px;">r</span>
|
|
||||||
<span style="position:absolute; left:305px; top:219px; font-size:27px;">l</span>
|
|
||||||
<span style="position:absolute; left:310px; top:219px; font-size:27px;">d</span>
|
|
||||||
<span style="position:absolute; border: cyan 1px solid; left:100px; top:319px; width:111px; height:27px;"></span>
|
|
||||||
<span style="position:absolute; border: magenta 1px solid; left:100px; top:319px; width:111px; height:27px;"></span>
|
|
||||||
<span style="position:absolute; left:100px; top:319px; font-size:27px;">H</span>
|
|
||||||
<span style="position:absolute; left:127px; top:319px; font-size:27px;">e</span>
|
|
||||||
<span style="position:absolute; left:150px; top:319px; font-size:27px;">l</span>
|
|
||||||
<span style="position:absolute; left:166px; top:319px; font-size:27px;">l</span>
|
|
||||||
<span style="position:absolute; left:181px; top:319px; font-size:27px;">o</span>
|
|
||||||
<span style="position:absolute; left:204px; top:319px; font-size:27px;"> </span>
|
|
||||||
<span style="position:absolute; border: cyan 1px solid; left:321px; top:319px; width:102px; height:27px;"></span>
|
|
||||||
<span style="position:absolute; border: magenta 1px solid; left:321px; top:319px; width:102px; height:27px;"></span>
|
|
||||||
<span style="position:absolute; left:321px; top:319px; font-size:27px;">W</span>
|
|
||||||
<span style="position:absolute; left:354px; top:319px; font-size:27px;">o</span>
|
|
||||||
<span style="position:absolute; left:377px; top:319px; font-size:27px;">r</span>
|
|
||||||
<span style="position:absolute; left:395px; top:319px; font-size:27px;">l</span>
|
|
||||||
<span style="position:absolute; left:410px; top:319px; font-size:27px;">d</span>
|
|
||||||
<span style="position:absolute; border: cyan 1px solid; left:100px; top:419px; width:111px; height:27px;"></span>
|
|
||||||
<span style="position:absolute; border: magenta 1px solid; left:100px; top:419px; width:111px; height:27px;"></span>
|
|
||||||
<span style="position:absolute; left:100px; top:419px; font-size:27px;">H</span>
|
|
||||||
<span style="position:absolute; left:127px; top:419px; font-size:27px;">e</span>
|
|
||||||
<span style="position:absolute; left:150px; top:419px; font-size:27px;">l</span>
|
|
||||||
<span style="position:absolute; left:165px; top:419px; font-size:27px;">l</span>
|
|
||||||
<span style="position:absolute; left:181px; top:419px; font-size:27px;">o</span>
|
|
||||||
<span style="position:absolute; left:204px; top:419px; font-size:27px;"> </span>
|
|
||||||
<span style="position:absolute; border: cyan 1px solid; left:321px; top:419px; width:102px; height:27px;"></span>
|
|
||||||
<span style="position:absolute; border: magenta 1px solid; left:321px; top:419px; width:102px; height:27px;"></span>
|
|
||||||
<span style="position:absolute; left:321px; top:419px; font-size:27px;">W</span>
|
|
||||||
<span style="position:absolute; left:353px; top:419px; font-size:27px;">o</span>
|
|
||||||
<span style="position:absolute; left:377px; top:419px; font-size:27px;">r</span>
|
|
||||||
<span style="position:absolute; left:395px; top:419px; font-size:27px;">l</span>
|
|
||||||
<span style="position:absolute; left:410px; top:419px; font-size:27px;">d</span>
|
|
||||||
<div style="position:absolute; top:0px;">Page: <a href="#1">1</a></div>
|
|
||||||
</body></html>
|
</body></html>
|
||||||
|
|
|
@ -3,42 +3,9 @@
|
||||||
</head><body>
|
</head><body>
|
||||||
<span style="position:absolute; border: gray 1px solid; left:0px; top:50px; width:612px; height:792px;"></span>
|
<span style="position:absolute; border: gray 1px solid; left:0px; top:50px; width:612px; height:792px;"></span>
|
||||||
<div style="position:absolute; top:50px;"><a name="1">Page 1</a></div>
|
<div style="position:absolute; top:50px;"><a name="1">Page 1</a></div>
|
||||||
<span style="position:absolute; border: cyan 1px solid; left:0px; top:72px; width:218px; height:79px;"></span>
|
<div style="position:absolute; border: textbox 1px solid; writing-mode:lr-tb; left:0px; top:72px; width:218px; height:79px;"><span style="font-family: Helvetica; font-size:38px">HelloHello
|
||||||
<span style="position:absolute; border: magenta 1px solid; left:0px; top:72px; width:218px; height:79px;"></span>
|
<br></span></div><div style="position:absolute; border: textbox 1px solid; writing-mode:tb-rl; left:194px; top:136px; width:48px; height:490px;"><span style="font-family: unknown; font-size:33px">あいうえおあいうえお </span><span style="font-family: Helvetica; font-size:19px">W
|
||||||
<span style="position:absolute; left:0px; top:96px; font-size:55px;">H</span>
|
<br></span></div><div style="position:absolute; border: textbox 1px solid; writing-mode:lr-tb; left:241px; top:575px; width:102px; height:51px;"><span style="font-family: Helvetica; font-size:19px">World
|
||||||
<span style="position:absolute; left:34px; top:96px; font-size:55px;">e</span>
|
<br>orld
|
||||||
<span style="position:absolute; left:61px; top:96px; font-size:55px;">l</span>
|
<br></span></div><div style="position:absolute; top:0px;">Page: <a href="#1">1</a></div>
|
||||||
<span style="position:absolute; left:72px; top:96px; font-size:55px;">l</span>
|
|
||||||
<span style="position:absolute; left:82px; top:96px; font-size:55px;">o</span>
|
|
||||||
<span style="position:absolute; left:109px; top:72px; font-size:55px;">H</span>
|
|
||||||
<span style="position:absolute; left:144px; top:72px; font-size:55px;">e</span>
|
|
||||||
<span style="position:absolute; left:170px; top:72px; font-size:55px;">l</span>
|
|
||||||
<span style="position:absolute; left:181px; top:72px; font-size:55px;">l</span>
|
|
||||||
<span style="position:absolute; left:192px; top:72px; font-size:55px;">o</span>
|
|
||||||
<span style="position:absolute; border: cyan 1px solid; left:194px; top:136px; width:48px; height:490px;"></span>
|
|
||||||
<span style="position:absolute; border: magenta 1px solid; left:194px; top:136px; width:48px; height:490px;"></span>
|
|
||||||
<span style="position:absolute; left:194px; top:136px; font-size:48px;">あ</span>
|
|
||||||
<span style="position:absolute; left:194px; top:184px; font-size:48px;">い</span>
|
|
||||||
<span style="position:absolute; left:194px; top:232px; font-size:48px;">う</span>
|
|
||||||
<span style="position:absolute; left:194px; top:280px; font-size:48px;">え</span>
|
|
||||||
<span style="position:absolute; left:194px; top:328px; font-size:48px;">お</span>
|
|
||||||
<span style="position:absolute; left:194px; top:352px; font-size:48px;">あ</span>
|
|
||||||
<span style="position:absolute; left:194px; top:400px; font-size:48px;">い</span>
|
|
||||||
<span style="position:absolute; left:194px; top:448px; font-size:48px;">う</span>
|
|
||||||
<span style="position:absolute; left:194px; top:496px; font-size:48px;">え</span>
|
|
||||||
<span style="position:absolute; left:194px; top:544px; font-size:48px;">お</span>
|
|
||||||
<span style="position:absolute; left:218px; top:599px; font-size:27px;">W</span>
|
|
||||||
<span style="position:absolute; border: cyan 1px solid; left:241px; top:575px; width:102px; height:51px;"></span>
|
|
||||||
<span style="position:absolute; border: magenta 1px solid; left:281px; top:575px; width:62px; height:27px;"></span>
|
|
||||||
<span style="position:absolute; left:281px; top:575px; font-size:27px;">W</span>
|
|
||||||
<span style="position:absolute; left:304px; top:575px; font-size:27px;">o</span>
|
|
||||||
<span style="position:absolute; left:317px; top:575px; font-size:27px;">r</span>
|
|
||||||
<span style="position:absolute; left:325px; top:575px; font-size:27px;">l</span>
|
|
||||||
<span style="position:absolute; left:330px; top:575px; font-size:27px;">d</span>
|
|
||||||
<span style="position:absolute; border: magenta 1px solid; left:241px; top:599px; width:40px; height:27px;"></span>
|
|
||||||
<span style="position:absolute; left:241px; top:599px; font-size:27px;">o</span>
|
|
||||||
<span style="position:absolute; left:254px; top:599px; font-size:27px;">r</span>
|
|
||||||
<span style="position:absolute; left:262px; top:599px; font-size:27px;">l</span>
|
|
||||||
<span style="position:absolute; left:268px; top:599px; font-size:27px;">d</span>
|
|
||||||
<div style="position:absolute; top:0px;">Page: <a href="#1">1</a></div>
|
|
||||||
</body></html>
|
</body></html>
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
#!/usr/bin/python2 -O
|
#!/usr/bin/python -O
|
||||||
#
|
#
|
||||||
# pdf2html.cgi - Gateway script for converting PDF into HTML.
|
# pdf2html.cgi - Gateway script for converting PDF into HTML.
|
||||||
#
|
#
|
||||||
|
@ -42,7 +42,7 @@ def url(base, **kw):
|
||||||
## convert
|
## convert
|
||||||
##
|
##
|
||||||
class FileSizeExceeded(ValueError): pass
|
class FileSizeExceeded(ValueError): pass
|
||||||
def convert(outfp, infp, path, codec='utf-8',
|
def convert(infp, outfp, path, codec='utf-8',
|
||||||
maxpages=0, maxfilesize=0, pagenos=None,
|
maxpages=0, maxfilesize=0, pagenos=None,
|
||||||
html=True):
|
html=True):
|
||||||
# save the input file.
|
# save the input file.
|
||||||
|
@ -76,22 +76,22 @@ def convert(outfp, infp, path, codec='utf-8',
|
||||||
class WebApp(object):
|
class WebApp(object):
|
||||||
|
|
||||||
TITLE = 'pdf2html demo'
|
TITLE = 'pdf2html demo'
|
||||||
APPPATH = '/' # absolute URL path to this application.
|
MAXFILESIZE = 10000000 # set to zero if unlimited.
|
||||||
MAXFILESIZE = 5000000 # set to zero if unlimited.
|
|
||||||
MAXPAGES = 10 # set to zero if unlimited.
|
MAXPAGES = 10 # set to zero if unlimited.
|
||||||
|
|
||||||
def __init__(self, infp=sys.stdin, outfp=sys.stdout, codec='utf-8'):
|
def __init__(self, infp=sys.stdin, outfp=sys.stdout, environ=os.environ,
|
||||||
|
codec='utf-8', apppath='/'):
|
||||||
|
self.infp = infp
|
||||||
self.outfp = outfp
|
self.outfp = outfp
|
||||||
self.codec = codec
|
self.codec = codec
|
||||||
self.remote_addr = os.environ.get('REMOTE_ADDR')
|
self.apppath = apppath
|
||||||
self.path_info = os.environ.get('PATH_INFO')
|
self.remote_addr = environ.get('REMOTE_ADDR')
|
||||||
self.method = os.environ.get('REQUEST_METHOD', 'GET').upper()
|
self.path_info = environ.get('PATH_INFO')
|
||||||
self.server = os.environ.get('SERVER_SOFTWARE', '')
|
self.method = environ.get('REQUEST_METHOD', 'GET').upper()
|
||||||
self.logpath = os.environ.get('LOG_PATH', './var/log')
|
self.server = environ.get('SERVER_SOFTWARE', '')
|
||||||
self.tmpdir = os.environ.get('TEMP', './var/')
|
self.tmpdir = environ.get('TEMP', './var/')
|
||||||
self.content_type = 'text/html; charset=%s' % codec
|
self.content_type = 'text/html; charset=%s' % codec
|
||||||
self.cur_time = time.time()
|
self.logger = logging.getLogger()
|
||||||
self.form = cgi.FieldStorage(infp)
|
|
||||||
return
|
return
|
||||||
|
|
||||||
def put(self, *args):
|
def put(self, *args):
|
||||||
|
@ -130,7 +130,7 @@ class WebApp(object):
|
||||||
self.put(
|
self.put(
|
||||||
'<html><head><title>%s</title></head><body>\n' % q(self.TITLE),
|
'<html><head><title>%s</title></head><body>\n' % q(self.TITLE),
|
||||||
'<h1>%s</h1><hr>\n' % q(self.TITLE),
|
'<h1>%s</h1><hr>\n' % q(self.TITLE),
|
||||||
'<form method="POST" action="%s" enctype="multipart/form-data">\n' % q(self.APPPATH),
|
'<form method="POST" action="%s" enctype="multipart/form-data">\n' % q(self.apppath),
|
||||||
'<p>Upload PDF File: <input name="f" type="file" value="">\n',
|
'<p>Upload PDF File: <input name="f" type="file" value="">\n',
|
||||||
' Page numbers (comma-separated):\n',
|
' Page numbers (comma-separated):\n',
|
||||||
'<input name="p" type="text" size="10" value="">\n',
|
'<input name="p" type="text" size="10" value="">\n',
|
||||||
|
@ -145,49 +145,54 @@ class WebApp(object):
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
def run(self, argv):
|
def setup(self):
|
||||||
logging.basicConfig(level=logging.INFO,
|
if not os.path.isdir(self.tmpdir):
|
||||||
format='%(asctime)s %(levelname)s %(message)s',
|
self.logger.error('no tmpdir')
|
||||||
filename=self.logpath, filemode='a')
|
status = 304
|
||||||
if self.path_info != self.APPPATH:
|
elif self.path_info != self.apppath:
|
||||||
|
status = 404
|
||||||
|
else:
|
||||||
|
status = 200
|
||||||
|
self._status = status
|
||||||
|
return status
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
form = cgi.FieldStorage(self.infp)
|
||||||
|
if self._status != 200:
|
||||||
self.http_404()
|
self.http_404()
|
||||||
return
|
return
|
||||||
if not os.path.isdir(self.tmpdir):
|
|
||||||
logging.error('no tmpdir')
|
|
||||||
self.bummer('error')
|
|
||||||
return
|
|
||||||
if (self.method != 'POST' or
|
if (self.method != 'POST' or
|
||||||
'c' not in self.form or
|
'c' not in form or
|
||||||
'f' not in self.form):
|
'f' not in form):
|
||||||
self.coverpage()
|
self.coverpage()
|
||||||
return
|
return
|
||||||
item = self.form['f']
|
item = form['f']
|
||||||
if not (item.file and item.filename):
|
if not (item.file and item.filename):
|
||||||
self.coverpage()
|
self.coverpage()
|
||||||
return
|
return
|
||||||
cmd = self.form.getvalue('c')
|
cmd = form.getvalue('c')
|
||||||
html = (cmd == 'Convert to HTML')
|
html = (cmd == 'Convert to HTML')
|
||||||
pagenos = []
|
pagenos = []
|
||||||
if 'p' in self.form:
|
if 'p' in form:
|
||||||
for m in re.finditer(r'\d+', self.form.getvalue('p')):
|
for m in re.finditer(r'\d+', form.getvalue('p')):
|
||||||
try:
|
try:
|
||||||
pagenos.append(int(m.group(0)))
|
pagenos.append(int(m.group(0)))
|
||||||
except ValueError:
|
except ValueError:
|
||||||
pass
|
pass
|
||||||
logging.info('received: host=%s, name=%r, pagenos=%r' %
|
self.logger.info('received: host=%s, name=%r, pagenos=%r' %
|
||||||
(self.remote_addr, item.filename, pagenos))
|
(self.remote_addr, item.filename, pagenos))
|
||||||
h = abs(hash((random.random(), self.remote_addr, item.filename)))
|
h = abs(hash((random.random(), self.remote_addr, item.filename)))
|
||||||
tmppath = os.path.join(self.tmpdir, '%08x%08x.pdf' % (self.cur_time, h))
|
tmppath = os.path.join(self.tmpdir, '%08x%08x.pdf' % (time.time(), h))
|
||||||
try:
|
try:
|
||||||
if not html:
|
if not html:
|
||||||
self.content_type = 'text/plain; charset=%s' % self.codec
|
self.content_type = 'text/plain; charset=%s' % self.codec
|
||||||
self.http_200()
|
self.http_200()
|
||||||
try:
|
try:
|
||||||
convert(sys.stdout, item.file, tmppath, pagenos=pagenos, codec=self.codec,
|
convert(item.file, sys.stdout, tmppath, pagenos=pagenos, codec=self.codec,
|
||||||
maxpages=self.MAXPAGES, maxfilesize=self.MAXFILESIZE, html=html)
|
maxpages=self.MAXPAGES, maxfilesize=self.MAXFILESIZE, html=html)
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
self.put('<p>Sorry, an error has occured: %s' % q(repr(e)))
|
self.put('<p>Sorry, an error has occured: %s' % q(repr(e)))
|
||||||
logging.error('convert: %r: path=%r: %s' % (e, tmppath, traceback.format_exc()))
|
self.logger.error('convert: %r: path=%r: %s' % (e, tmppath, traceback.format_exc()))
|
||||||
finally:
|
finally:
|
||||||
try:
|
try:
|
||||||
os.remove(tmppath)
|
os.remove(tmppath)
|
||||||
|
@ -197,4 +202,7 @@ class WebApp(object):
|
||||||
|
|
||||||
|
|
||||||
# main
|
# main
|
||||||
if __name__ == '__main__': sys.exit(WebApp().run(sys.argv))
|
if __name__ == '__main__':
|
||||||
|
app = WebApp()
|
||||||
|
app.setup()
|
||||||
|
sys.exit(app.run())
|
||||||
|
|
Loading…
Reference in New Issue