diff --git a/.travis.yml b/.travis.yml
index 319aa54..0575f12 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -7,6 +7,6 @@ python:
- "3.7"
- "3.8"
install:
- - pip install tox-travis
+ - pip install tox-travis flake8
script:
- tox -r
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1838ad3..6e85906 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
## [Unreleased]
+### Changed
+- Enforce pep8 coding style by adding flake8 to CI ([#345](https://github.com/pdfminer/pdfminer.six/pull/345))
+
## [20191110] - 2019-11-10
### Fixed
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 8accc02..e56866a 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -14,7 +14,7 @@ Any contribution is appreciated! You might want to:
- If you report a bug in the results for a particular pdf, include that pdf. This allows others to replicate the
issue.
* Fix issues by [creating pull requests](https://help.github.com/en/articles/creating-a-pull-request).
-* Help others by giving your thoughts on open issues and pull requests.
+* Help others by sharing your thoughs in comments on issues and pull requests.
## Guidelines for creating issues
@@ -29,11 +29,15 @@ Any contribution is appreciated! You might want to:
* Pull requests should be merged to develop, not master. This ensures that master always equals the released version.
* Include unit tests when possible. In case of bugs, this will help to prevent the same mistake in the future. In case
of features, this will show that your code works correctly.
-* Code should work for Python 2.7 and Python 3.x (for now), conform to PEP8 code style (with a line-width of 120)
- and properly documented with docstrings.
+* Code should work for Python 2.7 and Python 3.x (for now), conform to PEP8 code style (enforced by
+ [flake8](http://flake8.pycqa.org/en/latest/)) and properly documented with docstrings.
* Check spelling and grammar.
* Don't forget to update the [CHANGELOG.md](CHANGELOG.md#[Unreleased])
+## Guidelines for posting comments
+
+* [Be cordial and positive](https://www.kennethreitz.org/essays/be-cordial-or-be-on-your-way)
+
## Getting started
1. Clone the repository
diff --git a/docs/source/conf.py b/docs/source/conf.py
index a3a6be7..d61957b 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -12,7 +12,8 @@
import os
import sys
-sys.path.insert(0, os.path.join(os.path.abspath(os.path.dirname(__file__)), '../../'))
+sys.path.insert(0, os.path.join(
+ os.path.abspath(os.path.dirname(__file__)), '../../'))
# -- Project information -----------------------------------------------------
@@ -58,4 +59,4 @@ html_theme = 'alabaster'
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
\ No newline at end of file
+html_static_path = ['_static']
diff --git a/pdfminer/__init__.py b/pdfminer/__init__.py
index 0df3c86..9fbee72 100644
--- a/pdfminer/__init__.py
+++ b/pdfminer/__init__.py
@@ -17,8 +17,11 @@ __version__ = '20191110'
if sys.version_info < (3, 0):
- warnings.warn('On January 1st, 2020, pdfminer.six will stop supporting Python 2. Please upgrade to Python 3. For '
- 'more information see https://github.com/pdfminer/pdfminer.six/issues/194')
+ warnings.warn('On January 1st, 2020, '
+ 'pdfminer.six will stop supporting Python 2. '
+ 'Please upgrade to Python 3. '
+ 'For more information see '
+ 'https://github.com/pdfminer/pdfminer.six/issues/194')
if __name__ == '__main__':
print(__version__)
diff --git a/pdfminer/arcfour.py b/pdfminer/arcfour.py
index 5c0e64c..c09a370 100644
--- a/pdfminer/arcfour.py
+++ b/pdfminer/arcfour.py
@@ -6,17 +6,18 @@ This code is in the public domain.
"""
-import six # Python 2+3 compatibility
-## Arcfour
-##
+import six # Python 2+3 compatibility
+
+
class Arcfour(object):
def __init__(self, key):
- s = [i for i in range(256)] #because Py3 range is not indexable
+ # because Py3 range is not indexable
+ s = [i for i in range(256)]
j = 0
klen = len(key)
for i in range(256):
- j = (j + s[i] + six.indexbytes(key,i % klen)) % 256
+ j = (j + s[i] + six.indexbytes(key, i % klen)) % 256
(s[i], s[j]) = (s[j], s[i])
self.s = s
(self.i, self.j) = (0, 0)
@@ -34,7 +35,8 @@ class Arcfour(object):
r += six.int2byte(c ^ k)
(self.i, self.j) = (i, j)
return r
-
+
encrypt = decrypt = process
+
new = Arcfour
diff --git a/pdfminer/ascii85.py b/pdfminer/ascii85.py
index a9f501d..35be786 100644
--- a/pdfminer/ascii85.py
+++ b/pdfminer/ascii85.py
@@ -9,7 +9,7 @@ This code is in the public domain.
import re
import struct
-import six #Python 2+3 compatibility
+import six # Python 2+3 compatibility
# ascii85decode(data)
@@ -27,7 +27,7 @@ def ascii85decode(data):
n = b = 0
out = b''
for i in six.iterbytes(data):
- c=six.int2byte(i)
+ c = six.int2byte(i)
if b'!' <= c and c <= b'u':
n += 1
b = b*85+(ord(c)-33)
@@ -45,9 +45,11 @@ def ascii85decode(data):
break
return out
+
# asciihexdecode(data)
-hex_re = re.compile(b'([a-f\d]{2})', re.IGNORECASE)
-trail_re = re.compile(b'^(?:[a-f\d]{2}|\s)*([a-f\d])[\s>]*$', re.IGNORECASE)
+hex_re = re.compile(b'([a-f0-9]{2})', re.IGNORECASE)
+trail_re = re.compile(b'^(?:[a-f0-9]{2}|[ \t\n\r\f\v])*'
+ b'([a-f0-9])[ \t\n\r\f\v>]*$', re.IGNORECASE)
def asciihexdecode(data):
@@ -61,14 +63,14 @@ def asciihexdecode(data):
will behave as if a 0 followed the last digit.
"""
def decode(x):
- i=int(x,16)
+ i = int(x, 16)
return six.int2byte(i)
- out=b''
+ out = b''
for x in hex_re.findall(data):
- out+=decode(x)
+ out += decode(x)
m = trail_re.search(data)
if m:
- out+=decode(m.group(1)+b'0')
+ out += decode(m.group(1)+b'0')
return out
diff --git a/pdfminer/ccitt.py b/pdfminer/ccitt.py
index 61caed9..b4a4612 100644
--- a/pdfminer/ccitt.py
+++ b/pdfminer/ccitt.py
@@ -3,17 +3,19 @@
#
# Bugs: uncompressed mode untested.
#
-# cf.
-# ITU-T Recommendation T.4
-# "Standardization of Group 3 facsimile terminals for document transmission"
-# ITU-T Recommendation T.6
-# "FACSIMILE CODING SCHEMES AND CODING CONTROL FUNCTIONS FOR GROUP 4 FACSIMILE APPARATUS"
+# cf.
+# ITU-T Recommendation T.4
+# "Standardization of Group 3 facsimile terminals
+# for document transmission"
+# ITU-T Recommendation T.6
+# "FACSIMILE CODING SCHEMES AND CODING CONTROL FUNCTIONS
+# FOR GROUP 4 FACSIMILE APPARATUS"
import sys
import array
-import six #Python 2+3 compatibility
+import six # Python 2+3 compatibility
if six.PY3:
def get_bytes(data):
@@ -25,8 +27,6 @@ else:
yield ord(char)
-## BitParser
-##
class BitParser(object):
def __init__(self):
@@ -34,7 +34,7 @@ class BitParser(object):
return
@classmethod
- def add(klass, root, v, bits):
+ def add(cls, root, v, bits):
p = root
b = None
for i in range(len(bits)):
@@ -68,8 +68,6 @@ class BitParser(object):
return
-## CCITTG4Parser
-##
class CCITTG4Parser(BitParser):
MODE = [None, None]
@@ -93,85 +91,85 @@ class CCITTG4Parser(BitParser):
BitParser.add(MODE, 'e', '000000000001000000000001')
WHITE = [None, None]
- BitParser.add(WHITE, 0 , '00110101')
- BitParser.add(WHITE, 1 , '000111')
- BitParser.add(WHITE, 2 , '0111')
- BitParser.add(WHITE, 3 , '1000')
- BitParser.add(WHITE, 4 , '1011')
- BitParser.add(WHITE, 5 , '1100')
- BitParser.add(WHITE, 6 , '1110')
- BitParser.add(WHITE, 7 , '1111')
- BitParser.add(WHITE, 8 , '10011')
- BitParser.add(WHITE, 9 , '10100')
- BitParser.add(WHITE, 10 , '00111')
- BitParser.add(WHITE, 11 , '01000')
- BitParser.add(WHITE, 12 , '001000')
- BitParser.add(WHITE, 13 , '000011')
- BitParser.add(WHITE, 14 , '110100')
- BitParser.add(WHITE, 15 , '110101')
- BitParser.add(WHITE, 16 , '101010')
- BitParser.add(WHITE, 17 , '101011')
- BitParser.add(WHITE, 18 , '0100111')
- BitParser.add(WHITE, 19 , '0001100')
- BitParser.add(WHITE, 20 , '0001000')
- BitParser.add(WHITE, 21 , '0010111')
- BitParser.add(WHITE, 22 , '0000011')
- BitParser.add(WHITE, 23 , '0000100')
- BitParser.add(WHITE, 24 , '0101000')
- BitParser.add(WHITE, 25 , '0101011')
- BitParser.add(WHITE, 26 , '0010011')
- BitParser.add(WHITE, 27 , '0100100')
- BitParser.add(WHITE, 28 , '0011000')
- BitParser.add(WHITE, 29 , '00000010')
- BitParser.add(WHITE, 30 , '00000011')
- BitParser.add(WHITE, 31 , '00011010')
- BitParser.add(WHITE, 32 , '00011011')
- BitParser.add(WHITE, 33 , '00010010')
- BitParser.add(WHITE, 34 , '00010011')
- BitParser.add(WHITE, 35 , '00010100')
- BitParser.add(WHITE, 36 , '00010101')
- BitParser.add(WHITE, 37 , '00010110')
- BitParser.add(WHITE, 38 , '00010111')
- BitParser.add(WHITE, 39 , '00101000')
- BitParser.add(WHITE, 40 , '00101001')
- BitParser.add(WHITE, 41 , '00101010')
- BitParser.add(WHITE, 42 , '00101011')
- BitParser.add(WHITE, 43 , '00101100')
- BitParser.add(WHITE, 44 , '00101101')
- BitParser.add(WHITE, 45 , '00000100')
- BitParser.add(WHITE, 46 , '00000101')
- BitParser.add(WHITE, 47 , '00001010')
- BitParser.add(WHITE, 48 , '00001011')
- BitParser.add(WHITE, 49 , '01010010')
- BitParser.add(WHITE, 50 , '01010011')
- BitParser.add(WHITE, 51 , '01010100')
- BitParser.add(WHITE, 52 , '01010101')
- BitParser.add(WHITE, 53 , '00100100')
- BitParser.add(WHITE, 54 , '00100101')
- BitParser.add(WHITE, 55 , '01011000')
- BitParser.add(WHITE, 56 , '01011001')
- BitParser.add(WHITE, 57 , '01011010')
- BitParser.add(WHITE, 58 , '01011011')
- BitParser.add(WHITE, 59 , '01001010')
- BitParser.add(WHITE, 60 , '01001011')
- BitParser.add(WHITE, 61 , '00110010')
- BitParser.add(WHITE, 62 , '00110011')
- BitParser.add(WHITE, 63 , '00110100')
- BitParser.add(WHITE, 64 , '11011')
- BitParser.add(WHITE, 128 , '10010')
- BitParser.add(WHITE, 192 , '010111')
- BitParser.add(WHITE, 256 , '0110111')
- BitParser.add(WHITE, 320 , '00110110')
- BitParser.add(WHITE, 384 , '00110111')
- BitParser.add(WHITE, 448 , '01100100')
- BitParser.add(WHITE, 512 , '01100101')
- BitParser.add(WHITE, 576 , '01101000')
- BitParser.add(WHITE, 640 , '01100111')
- BitParser.add(WHITE, 704 , '011001100')
- BitParser.add(WHITE, 768 , '011001101')
- BitParser.add(WHITE, 832 , '011010010')
- BitParser.add(WHITE, 896 , '011010011')
- BitParser.add(WHITE, 960 , '011010100')
+ BitParser.add(WHITE, 0, '00110101')
+ BitParser.add(WHITE, 1, '000111')
+ BitParser.add(WHITE, 2, '0111')
+ BitParser.add(WHITE, 3, '1000')
+ BitParser.add(WHITE, 4, '1011')
+ BitParser.add(WHITE, 5, '1100')
+ BitParser.add(WHITE, 6, '1110')
+ BitParser.add(WHITE, 7, '1111')
+ BitParser.add(WHITE, 8, '10011')
+ BitParser.add(WHITE, 9, '10100')
+ BitParser.add(WHITE, 10, '00111')
+ BitParser.add(WHITE, 11, '01000')
+ BitParser.add(WHITE, 12, '001000')
+ BitParser.add(WHITE, 13, '000011')
+ BitParser.add(WHITE, 14, '110100')
+ BitParser.add(WHITE, 15, '110101')
+ BitParser.add(WHITE, 16, '101010')
+ BitParser.add(WHITE, 17, '101011')
+ BitParser.add(WHITE, 18, '0100111')
+ BitParser.add(WHITE, 19, '0001100')
+ BitParser.add(WHITE, 20, '0001000')
+ BitParser.add(WHITE, 21, '0010111')
+ BitParser.add(WHITE, 22, '0000011')
+ BitParser.add(WHITE, 23, '0000100')
+ BitParser.add(WHITE, 24, '0101000')
+ BitParser.add(WHITE, 25, '0101011')
+ BitParser.add(WHITE, 26, '0010011')
+ BitParser.add(WHITE, 27, '0100100')
+ BitParser.add(WHITE, 28, '0011000')
+ BitParser.add(WHITE, 29, '00000010')
+ BitParser.add(WHITE, 30, '00000011')
+ BitParser.add(WHITE, 31, '00011010')
+ BitParser.add(WHITE, 32, '00011011')
+ BitParser.add(WHITE, 33, '00010010')
+ BitParser.add(WHITE, 34, '00010011')
+ BitParser.add(WHITE, 35, '00010100')
+ BitParser.add(WHITE, 36, '00010101')
+ BitParser.add(WHITE, 37, '00010110')
+ BitParser.add(WHITE, 38, '00010111')
+ BitParser.add(WHITE, 39, '00101000')
+ BitParser.add(WHITE, 40, '00101001')
+ BitParser.add(WHITE, 41, '00101010')
+ BitParser.add(WHITE, 42, '00101011')
+ BitParser.add(WHITE, 43, '00101100')
+ BitParser.add(WHITE, 44, '00101101')
+ BitParser.add(WHITE, 45, '00000100')
+ BitParser.add(WHITE, 46, '00000101')
+ BitParser.add(WHITE, 47, '00001010')
+ BitParser.add(WHITE, 48, '00001011')
+ BitParser.add(WHITE, 49, '01010010')
+ BitParser.add(WHITE, 50, '01010011')
+ BitParser.add(WHITE, 51, '01010100')
+ BitParser.add(WHITE, 52, '01010101')
+ BitParser.add(WHITE, 53, '00100100')
+ BitParser.add(WHITE, 54, '00100101')
+ BitParser.add(WHITE, 55, '01011000')
+ BitParser.add(WHITE, 56, '01011001')
+ BitParser.add(WHITE, 57, '01011010')
+ BitParser.add(WHITE, 58, '01011011')
+ BitParser.add(WHITE, 59, '01001010')
+ BitParser.add(WHITE, 60, '01001011')
+ BitParser.add(WHITE, 61, '00110010')
+ BitParser.add(WHITE, 62, '00110011')
+ BitParser.add(WHITE, 63, '00110100')
+ BitParser.add(WHITE, 64, '11011')
+ BitParser.add(WHITE, 128, '10010')
+ BitParser.add(WHITE, 192, '010111')
+ BitParser.add(WHITE, 256, '0110111')
+ BitParser.add(WHITE, 320, '00110110')
+ BitParser.add(WHITE, 384, '00110111')
+ BitParser.add(WHITE, 448, '01100100')
+ BitParser.add(WHITE, 512, '01100101')
+ BitParser.add(WHITE, 576, '01101000')
+ BitParser.add(WHITE, 640, '01100111')
+ BitParser.add(WHITE, 704, '011001100')
+ BitParser.add(WHITE, 768, '011001101')
+ BitParser.add(WHITE, 832, '011010010')
+ BitParser.add(WHITE, 896, '011010011')
+ BitParser.add(WHITE, 960, '011010100')
BitParser.add(WHITE, 1024, '011010101')
BitParser.add(WHITE, 1088, '011010110')
BitParser.add(WHITE, 1152, '011010111')
@@ -199,85 +197,85 @@ class CCITTG4Parser(BitParser):
BitParser.add(WHITE, 2560, '000000011111')
BLACK = [None, None]
- BitParser.add(BLACK, 0 , '0000110111')
- BitParser.add(BLACK, 1 , '010')
- BitParser.add(BLACK, 2 , '11')
- BitParser.add(BLACK, 3 , '10')
- BitParser.add(BLACK, 4 , '011')
- BitParser.add(BLACK, 5 , '0011')
- BitParser.add(BLACK, 6 , '0010')
- BitParser.add(BLACK, 7 , '00011')
- BitParser.add(BLACK, 8 , '000101')
- BitParser.add(BLACK, 9 , '000100')
- BitParser.add(BLACK, 10 , '0000100')
- BitParser.add(BLACK, 11 , '0000101')
- BitParser.add(BLACK, 12 , '0000111')
- BitParser.add(BLACK, 13 , '00000100')
- BitParser.add(BLACK, 14 , '00000111')
- BitParser.add(BLACK, 15 , '000011000')
- BitParser.add(BLACK, 16 , '0000010111')
- BitParser.add(BLACK, 17 , '0000011000')
- BitParser.add(BLACK, 18 , '0000001000')
- BitParser.add(BLACK, 19 , '00001100111')
- BitParser.add(BLACK, 20 , '00001101000')
- BitParser.add(BLACK, 21 , '00001101100')
- BitParser.add(BLACK, 22 , '00000110111')
- BitParser.add(BLACK, 23 , '00000101000')
- BitParser.add(BLACK, 24 , '00000010111')
- BitParser.add(BLACK, 25 , '00000011000')
- BitParser.add(BLACK, 26 , '000011001010')
- BitParser.add(BLACK, 27 , '000011001011')
- BitParser.add(BLACK, 28 , '000011001100')
- BitParser.add(BLACK, 29 , '000011001101')
- BitParser.add(BLACK, 30 , '000001101000')
- BitParser.add(BLACK, 31 , '000001101001')
- BitParser.add(BLACK, 32 , '000001101010')
- BitParser.add(BLACK, 33 , '000001101011')
- BitParser.add(BLACK, 34 , '000011010010')
- BitParser.add(BLACK, 35 , '000011010011')
- BitParser.add(BLACK, 36 , '000011010100')
- BitParser.add(BLACK, 37 , '000011010101')
- BitParser.add(BLACK, 38 , '000011010110')
- BitParser.add(BLACK, 39 , '000011010111')
- BitParser.add(BLACK, 40 , '000001101100')
- BitParser.add(BLACK, 41 , '000001101101')
- BitParser.add(BLACK, 42 , '000011011010')
- BitParser.add(BLACK, 43 , '000011011011')
- BitParser.add(BLACK, 44 , '000001010100')
- BitParser.add(BLACK, 45 , '000001010101')
- BitParser.add(BLACK, 46 , '000001010110')
- BitParser.add(BLACK, 47 , '000001010111')
- BitParser.add(BLACK, 48 , '000001100100')
- BitParser.add(BLACK, 49 , '000001100101')
- BitParser.add(BLACK, 50 , '000001010010')
- BitParser.add(BLACK, 51 , '000001010011')
- BitParser.add(BLACK, 52 , '000000100100')
- BitParser.add(BLACK, 53 , '000000110111')
- BitParser.add(BLACK, 54 , '000000111000')
- BitParser.add(BLACK, 55 , '000000100111')
- BitParser.add(BLACK, 56 , '000000101000')
- BitParser.add(BLACK, 57 , '000001011000')
- BitParser.add(BLACK, 58 , '000001011001')
- BitParser.add(BLACK, 59 , '000000101011')
- BitParser.add(BLACK, 60 , '000000101100')
- BitParser.add(BLACK, 61 , '000001011010')
- BitParser.add(BLACK, 62 , '000001100110')
- BitParser.add(BLACK, 63 , '000001100111')
- BitParser.add(BLACK, 64 , '0000001111')
- BitParser.add(BLACK, 128 , '000011001000')
- BitParser.add(BLACK, 192 , '000011001001')
- BitParser.add(BLACK, 256 , '000001011011')
- BitParser.add(BLACK, 320 , '000000110011')
- BitParser.add(BLACK, 384 , '000000110100')
- BitParser.add(BLACK, 448 , '000000110101')
- BitParser.add(BLACK, 512 , '0000001101100')
- BitParser.add(BLACK, 576 , '0000001101101')
- BitParser.add(BLACK, 640 , '0000001001010')
- BitParser.add(BLACK, 704 , '0000001001011')
- BitParser.add(BLACK, 768 , '0000001001100')
- BitParser.add(BLACK, 832 , '0000001001101')
- BitParser.add(BLACK, 896 , '0000001110010')
- BitParser.add(BLACK, 960 , '0000001110011')
+ BitParser.add(BLACK, 0, '0000110111')
+ BitParser.add(BLACK, 1, '010')
+ BitParser.add(BLACK, 2, '11')
+ BitParser.add(BLACK, 3, '10')
+ BitParser.add(BLACK, 4, '011')
+ BitParser.add(BLACK, 5, '0011')
+ BitParser.add(BLACK, 6, '0010')
+ BitParser.add(BLACK, 7, '00011')
+ BitParser.add(BLACK, 8, '000101')
+ BitParser.add(BLACK, 9, '000100')
+ BitParser.add(BLACK, 10, '0000100')
+ BitParser.add(BLACK, 11, '0000101')
+ BitParser.add(BLACK, 12, '0000111')
+ BitParser.add(BLACK, 13, '00000100')
+ BitParser.add(BLACK, 14, '00000111')
+ BitParser.add(BLACK, 15, '000011000')
+ BitParser.add(BLACK, 16, '0000010111')
+ BitParser.add(BLACK, 17, '0000011000')
+ BitParser.add(BLACK, 18, '0000001000')
+ BitParser.add(BLACK, 19, '00001100111')
+ BitParser.add(BLACK, 20, '00001101000')
+ BitParser.add(BLACK, 21, '00001101100')
+ BitParser.add(BLACK, 22, '00000110111')
+ BitParser.add(BLACK, 23, '00000101000')
+ BitParser.add(BLACK, 24, '00000010111')
+ BitParser.add(BLACK, 25, '00000011000')
+ BitParser.add(BLACK, 26, '000011001010')
+ BitParser.add(BLACK, 27, '000011001011')
+ BitParser.add(BLACK, 28, '000011001100')
+ BitParser.add(BLACK, 29, '000011001101')
+ BitParser.add(BLACK, 30, '000001101000')
+ BitParser.add(BLACK, 31, '000001101001')
+ BitParser.add(BLACK, 32, '000001101010')
+ BitParser.add(BLACK, 33, '000001101011')
+ BitParser.add(BLACK, 34, '000011010010')
+ BitParser.add(BLACK, 35, '000011010011')
+ BitParser.add(BLACK, 36, '000011010100')
+ BitParser.add(BLACK, 37, '000011010101')
+ BitParser.add(BLACK, 38, '000011010110')
+ BitParser.add(BLACK, 39, '000011010111')
+ BitParser.add(BLACK, 40, '000001101100')
+ BitParser.add(BLACK, 41, '000001101101')
+ BitParser.add(BLACK, 42, '000011011010')
+ BitParser.add(BLACK, 43, '000011011011')
+ BitParser.add(BLACK, 44, '000001010100')
+ BitParser.add(BLACK, 45, '000001010101')
+ BitParser.add(BLACK, 46, '000001010110')
+ BitParser.add(BLACK, 47, '000001010111')
+ BitParser.add(BLACK, 48, '000001100100')
+ BitParser.add(BLACK, 49, '000001100101')
+ BitParser.add(BLACK, 50, '000001010010')
+ BitParser.add(BLACK, 51, '000001010011')
+ BitParser.add(BLACK, 52, '000000100100')
+ BitParser.add(BLACK, 53, '000000110111')
+ BitParser.add(BLACK, 54, '000000111000')
+ BitParser.add(BLACK, 55, '000000100111')
+ BitParser.add(BLACK, 56, '000000101000')
+ BitParser.add(BLACK, 57, '000001011000')
+ BitParser.add(BLACK, 58, '000001011001')
+ BitParser.add(BLACK, 59, '000000101011')
+ BitParser.add(BLACK, 60, '000000101100')
+ BitParser.add(BLACK, 61, '000001011010')
+ BitParser.add(BLACK, 62, '000001100110')
+ BitParser.add(BLACK, 63, '000001100111')
+ BitParser.add(BLACK, 64, '0000001111')
+ BitParser.add(BLACK, 128, '000011001000')
+ BitParser.add(BLACK, 192, '000011001001')
+ BitParser.add(BLACK, 256, '000001011011')
+ BitParser.add(BLACK, 320, '000000110011')
+ BitParser.add(BLACK, 384, '000000110100')
+ BitParser.add(BLACK, 448, '000000110101')
+ BitParser.add(BLACK, 512, '0000001101100')
+ BitParser.add(BLACK, 576, '0000001101101')
+ BitParser.add(BLACK, 640, '0000001001010')
+ BitParser.add(BLACK, 704, '0000001001011')
+ BitParser.add(BLACK, 768, '0000001001100')
+ BitParser.add(BLACK, 832, '0000001001101')
+ BitParser.add(BLACK, 896, '0000001110010')
+ BitParser.add(BLACK, 960, '0000001110011')
BitParser.add(BLACK, 1024, '0000001110100')
BitParser.add(BLACK, 1088, '0000001110101')
BitParser.add(BLACK, 1152, '0000001110110')
@@ -434,7 +432,7 @@ class CCITTG4Parser(BitParser):
return
def output_line(self, y, bits):
- print (y, ''.join(str(b) for b in bits))
+ print(y, ''.join(str(b) for b in bits))
return
def _reset_line(self):
@@ -454,8 +452,6 @@ class CCITTG4Parser(BitParser):
return
def _do_vertical(self, dx):
- #print '* vertical(%d): curpos=%r, color=%r' % (dx, self._curpos, self._color)
- #print ' refline:', self._get_refline(self._curpos+1)
x1 = self._curpos+1
while 1:
if x1 == 0:
@@ -481,8 +477,6 @@ class CCITTG4Parser(BitParser):
return
def _do_pass(self):
- #print '* pass: curpos=%r, color=%r' % (self._curpos, self._color)
- #print ' refline:', self._get_refline(self._curpos+1)
x1 = self._curpos+1
while 1:
if x1 == 0:
@@ -510,7 +504,6 @@ class CCITTG4Parser(BitParser):
return
def _do_horizontal(self, n1, n2):
- #print '* horizontal(%d,%d): curpos=%r, color=%r' % (n1, n2, self._curpos, self._color)
if self._curpos < 0:
self._curpos = 0
x = self._curpos
@@ -528,7 +521,6 @@ class CCITTG4Parser(BitParser):
return
def _do_uncompressed(self, bits):
- #print '* uncompressed(%r): curpos=%r' % (bits, self._curpos)
for c in bits:
self._curline[self._curpos] = int(c)
self._curpos += 1
@@ -536,8 +528,6 @@ class CCITTG4Parser(BitParser):
return
-
-
class CCITTFaxDecoder(CCITTG4Parser):
def __init__(self, width, bytealign=False, reversed=False):
@@ -607,5 +597,6 @@ def main(argv):
fp.close()
return
+
if __name__ == '__main__':
sys.exit(main(sys.argv))
diff --git a/pdfminer/cmapdb.py b/pdfminer/cmapdb.py
index 1681a8d..0c246e8 100644
--- a/pdfminer/cmapdb.py
+++ b/pdfminer/cmapdb.py
@@ -29,7 +29,7 @@ from .encodingdb import name2unicode
from .utils import choplist
from .utils import nunpack
-import six #Python 2+3 compatibility
+import six
log = logging.getLogger(__name__)
@@ -156,7 +156,8 @@ class UnicodeMap(CMapBase):
class FileCMap(CMap):
def add_code2cid(self, code, cid):
- assert isinstance(code, str) and isinstance(cid, int), str((type(code), type(cid)))
+ assert isinstance(code, str) and isinstance(cid, int),\
+ str((type(code), type(cid)))
d = self.code2cid
for c in code[:-1]:
c = ord(c)
@@ -219,7 +220,7 @@ class CMapDB(object):
pass
@classmethod
- def _load_data(klass, name):
+ def _load_data(cls, name):
name = name.replace("\0", "")
filename = '%s.pickle.gz' % name
log.info('loading: %r', name)
@@ -237,7 +238,7 @@ class CMapDB(object):
raise CMapDB.CMapNotFound(name)
@classmethod
- def get_cmap(klass, name):
+ def get_cmap(cls, name):
if name == 'Identity-H':
return IdentityCMap(WMode=0)
elif name == 'Identity-V':
@@ -247,22 +248,23 @@ class CMapDB(object):
elif name == 'OneByteIdentityV':
return IdentityCMapByte(WMode=1)
try:
- return klass._cmap_cache[name]
+ return cls._cmap_cache[name]
except KeyError:
pass
- data = klass._load_data(name)
- klass._cmap_cache[name] = cmap = PyCMap(name, data)
+ data = cls._load_data(name)
+ cls._cmap_cache[name] = cmap = PyCMap(name, data)
return cmap
@classmethod
- def get_unicode_map(klass, name, vertical=False):
+ def get_unicode_map(cls, name, vertical=False):
try:
- return klass._umap_cache[name][vertical]
+ return cls._umap_cache[name][vertical]
except KeyError:
pass
- data = klass._load_data('to-unicode-%s' % name)
- klass._umap_cache[name] = umaps = [PyUnicodeMap(name, data, v) for v in (False, True)]
- return umaps[vertical]
+ data = cls._load_data('to-unicode-%s' % name)
+ cls._umap_cache[name] = [PyUnicodeMap(name, data, v)
+ for v in (False, True)]
+ return cls._umap_cache[name][vertical]
class CMapParser(PSStackParser):
@@ -375,7 +377,7 @@ class CMapParser(PSStackParser):
for (s, e, code) in choplist(3, objs):
if (not isinstance(s, bytes) or not isinstance(e, bytes) or
len(s) != len(e)):
- continue
+ continue
s1 = nunpack(s)
e1 = nunpack(e)
if isinstance(code, list):
diff --git a/pdfminer/converter.py b/pdfminer/converter.py
index eaf0520..fb333a2 100644
--- a/pdfminer/converter.py
+++ b/pdfminer/converter.py
@@ -1,4 +1,3 @@
-
# -*- coding: utf-8 -*-
import logging
import re
@@ -24,15 +23,12 @@ from .utils import enc
from .utils import bbox2str
from . import utils
-import six # Python 2+3 compatibility
+import six
log = logging.getLogger(__name__)
-## PDFLayoutAnalyzer
-##
class PDFLayoutAnalyzer(PDFTextDevice):
-
def __init__(self, rsrcmgr, pageno=1, laparams=None):
PDFTextDevice.__init__(self, rsrcmgr)
self.pageno = pageno
@@ -87,7 +83,8 @@ class PDFLayoutAnalyzer(PDFTextDevice):
(x1, y1) = apply_matrix_pt(self.ctm, (x1, y1))
if x0 == x1 or y0 == y1:
self.cur_item.add(LTLine(gstate.linewidth, (x0, y0), (x1, y1),
- stroke, fill, evenodd, gstate.scolor, gstate.ncolor))
+ stroke, fill, evenodd, gstate.scolor,
+ gstate.ncolor))
return
if shape == 'mlllh':
# rectangle
@@ -99,21 +96,23 @@ class PDFLayoutAnalyzer(PDFTextDevice):
(x1, y1) = apply_matrix_pt(self.ctm, (x1, y1))
(x2, y2) = apply_matrix_pt(self.ctm, (x2, y2))
(x3, y3) = apply_matrix_pt(self.ctm, (x3, y3))
- if ((x0 == x1 and y1 == y2 and x2 == x3 and y3 == y0) or
- (y0 == y1 and x1 == x2 and y2 == y3 and x3 == x0)):
+ if (x0 == x1 and y1 == y2 and x2 == x3 and y3 == y0) or \
+ (y0 == y1 and x1 == x2 and y2 == y3 and x3 == x0):
self.cur_item.add(LTRect(gstate.linewidth, (x0, y0, x2, y2),
- stroke, fill, evenodd, gstate.scolor, gstate.ncolor))
+ stroke, fill, evenodd, gstate.scolor,
+ gstate.ncolor))
return
# other shapes
pts = []
for p in path:
for i in range(1, len(p), 2):
pts.append(apply_matrix_pt(self.ctm, (p[i], p[i+1])))
- self.cur_item.add(LTCurve(gstate.linewidth, pts, stroke, fill,
- evenodd, gstate.scolor, gstate.ncolor))
+ self.cur_item.add(LTCurve(gstate.linewidth, pts, stroke, fill, evenodd,
+ gstate.scolor, gstate.ncolor))
return
- def render_char(self, matrix, font, fontsize, scaling, rise, cid, ncs, graphicstate):
+ def render_char(self, matrix, font, fontsize, scaling, rise, cid, ncs,
+ graphicstate):
try:
text = font.to_unichr(cid)
assert isinstance(text, six.text_type), str(type(text))
@@ -121,7 +120,8 @@ class PDFLayoutAnalyzer(PDFTextDevice):
text = self.handle_undefined_char(font, cid)
textwidth = font.char_width(cid)
textdisp = font.char_disp(cid)
- item = LTChar(matrix, font, fontsize, scaling, rise, text, textwidth, textdisp, ncs, graphicstate)
+ item = LTChar(matrix, font, fontsize, scaling, rise, text, textwidth,
+ textdisp, ncs, graphicstate)
self.cur_item.add(item)
return item.adv
@@ -133,12 +133,10 @@ class PDFLayoutAnalyzer(PDFTextDevice):
return
-## PDFPageAggregator
-##
class PDFPageAggregator(PDFLayoutAnalyzer):
-
def __init__(self, rsrcmgr, pageno=1, laparams=None):
- PDFLayoutAnalyzer.__init__(self, rsrcmgr, pageno=pageno, laparams=laparams)
+ PDFLayoutAnalyzer.__init__(self, rsrcmgr, pageno=pageno,
+ laparams=laparams)
self.result = None
return
@@ -150,12 +148,11 @@ class PDFPageAggregator(PDFLayoutAnalyzer):
return self.result
-## PDFConverter
-##
class PDFConverter(PDFLayoutAnalyzer):
-
- def __init__(self, rsrcmgr, outfp, codec='utf-8', pageno=1, laparams=None):
- PDFLayoutAnalyzer.__init__(self, rsrcmgr, pageno=pageno, laparams=laparams)
+ def __init__(self, rsrcmgr, outfp, codec='utf-8', pageno=1,
+ laparams=None):
+ PDFLayoutAnalyzer.__init__(self, rsrcmgr, pageno=pageno,
+ laparams=laparams)
self.outfp = outfp
self.codec = codec
if hasattr(self.outfp, 'mode'):
@@ -178,13 +175,11 @@ class PDFConverter(PDFLayoutAnalyzer):
return
-## TextConverter
-##
class TextConverter(PDFConverter):
-
def __init__(self, rsrcmgr, outfp, codec='utf-8', pageno=1, laparams=None,
showpageno=False, imagewriter=None):
- PDFConverter.__init__(self, rsrcmgr, outfp, codec=codec, pageno=pageno, laparams=laparams)
+ PDFConverter.__init__(self, rsrcmgr, outfp, codec=codec, pageno=pageno,
+ laparams=laparams)
self.showpageno = showpageno
self.imagewriter = imagewriter
return
@@ -227,12 +222,8 @@ class TextConverter(PDFConverter):
return
-## HTMLConverter
-##
class HTMLConverter(PDFConverter):
-
RECT_COLORS = {
- #'char': 'green',
'figure': 'yellow',
'textline': 'magenta',
'textbox': 'cyan',
@@ -248,10 +239,15 @@ class HTMLConverter(PDFConverter):
def __init__(self, rsrcmgr, outfp, codec='utf-8', pageno=1, laparams=None,
scale=1, fontscale=1.0, layoutmode='normal', showpageno=True,
- pagemargin=50, imagewriter=None, debug=0,
- rect_colors={'curve': 'black', 'page': 'gray'},
- text_colors={'char': 'black'}):
- PDFConverter.__init__(self, rsrcmgr, outfp, codec=codec, pageno=pageno, laparams=laparams)
+ pagemargin=50, imagewriter=None, debug=0, rect_colors=None,
+ text_colors=None):
+ PDFConverter.__init__(self, rsrcmgr, outfp, codec=codec, pageno=pageno,
+ laparams=laparams)
+ if text_colors is None:
+ text_colors = {'char': 'black'}
+ if rect_colors is None:
+ rect_colors = {'curve': 'black', 'page': 'gray'}
+
self.scale = scale
self.fontscale = fontscale
self.layoutmode = layoutmode
@@ -280,15 +276,20 @@ class HTMLConverter(PDFConverter):
def write_header(self):
self.write('
\n')
if self.codec:
- self.write('\n' % self.codec)
+ s = '\n' % self.codec
else:
- self.write('\n')
+ s = '\n'
+ self.write(s)
self.write('\n')
return
def write_footer(self):
- self.write('Page: %s
\n' %
- ', '.join('%s' % (i, i) for i in range(1, self.pageno)))
+ page_links = ['%s' % (i, i)
+ for i in range(1, self.pageno)]
+ s = 'Page: %s
\n' % \
+ ', '.join(page_links)
+ self.write(s)
self.write('\n')
return
@@ -299,32 +300,39 @@ class HTMLConverter(PDFConverter):
def place_rect(self, color, borderwidth, x, y, w, h):
color = self.rect_colors.get(color)
if color is not None:
- self.write('\n' %
- (color, borderwidth,
- x*self.scale, (self._yoffset-y)*self.scale,
- w*self.scale, h*self.scale))
+ s = '\n' % \
+ (color, borderwidth, x * self.scale,
+ (self._yoffset - y) * self.scale, w * self.scale,
+ h * self.scale)
+ self.write(
+ s)
return
def place_border(self, color, borderwidth, item):
- self.place_rect(color, borderwidth, item.x0, item.y1, item.width, item.height)
+ self.place_rect(color, borderwidth, item.x0, item.y1, item.width,
+ item.height)
return
def place_image(self, item, borderwidth, x, y, w, h):
if self.imagewriter is not None:
name = self.imagewriter.export_image(item)
- self.write('\n' %
- (enc(name, None), borderwidth,
- x*self.scale, (self._yoffset-y)*self.scale,
- w*self.scale, h*self.scale))
+ s = '\n' % \
+ (enc(name, None), borderwidth, x * self.scale,
+ (self._yoffset - y) * self.scale, w * self.scale,
+ h * self.scale)
+ self.write(s)
return
def place_text(self, color, text, x, y, size):
color = self.text_colors.get(color)
if color is not None:
- self.write('' %
- (color, x*self.scale, (self._yoffset-y)*self.scale, size*self.scale*self.fontscale))
+ s = '' % \
+ (color, x * self.scale, (self._yoffset - y) * self.scale,
+ size * self.scale * self.fontscale)
+ self.write(s)
self.write_text(text)
self.write('\n')
return
@@ -332,11 +340,12 @@ class HTMLConverter(PDFConverter):
def begin_div(self, color, borderwidth, x, y, w, h, writing_mode=False):
self._fontstack.append(self._font)
self._font = None
- self.write('' %
- (color, borderwidth, writing_mode,
- x*self.scale, (self._yoffset-y)*self.scale,
- w*self.scale, h*self.scale))
+ s = '
' % \
+ (color, borderwidth, writing_mode, x * self.scale,
+ (self._yoffset - y) * self.scale, w * self.scale, h * self.scale)
+ self.write(s)
return
def end_div(self, color):
@@ -376,7 +385,8 @@ class HTMLConverter(PDFConverter):
if self.showpageno:
self.write('
' %
((self._yoffset-item.y1)*self.scale))
- self.write('
Page %s\n' % (item.pageid, item.pageid))
+ self.write('
Page %s\n' % (
+ item.pageid, item.pageid))
for child in item:
render(child)
if item.groups is not None:
@@ -385,12 +395,14 @@ class HTMLConverter(PDFConverter):
elif isinstance(item, LTCurve):
self.place_border('curve', 1, item)
elif isinstance(item, LTFigure):
- self.begin_div('figure', 1, item.x0, item.y1, item.width, item.height)
+ self.begin_div('figure', 1, item.x0, item.y1, item.width,
+ item.height)
for child in item:
render(child)
self.end_div('figure')
elif isinstance(item, LTImage):
- self.place_image(item, 1, item.x0, item.y1, item.width, item.height)
+ self.place_image(item, 1, item.x0, item.y1, item.width,
+ item.height)
else:
if self.layoutmode == 'exact':
if isinstance(item, LTTextLine):
@@ -399,12 +411,14 @@ class HTMLConverter(PDFConverter):
render(child)
elif isinstance(item, LTTextBox):
self.place_border('textbox', 1, item)
- self.place_text('textbox', str(item.index+1), item.x0, item.y1, 20)
+ self.place_text('textbox', str(item.index+1), item.x0,
+ item.y1, 20)
for child in item:
render(child)
elif isinstance(item, LTChar):
self.place_border('char', 1, item)
- self.place_text('char', item.get_text(), item.x0, item.y1, item.size)
+ self.place_text('char', item.get_text(), item.x0,
+ item.y1, item.size)
else:
if isinstance(item, LTTextLine):
for child in item:
@@ -412,13 +426,15 @@ class HTMLConverter(PDFConverter):
if self.layoutmode != 'loose':
self.put_newline()
elif isinstance(item, LTTextBox):
- self.begin_div('textbox', 1, item.x0, item.y1, item.width, item.height,
+ self.begin_div('textbox', 1, item.x0, item.y1,
+ item.width, item.height,
item.get_writing_mode())
for child in item:
render(child)
self.end_div('textbox')
elif isinstance(item, LTChar):
- self.put_text(item.get_text(), item.fontname, item.size)
+ self.put_text(item.get_text(), item.fontname,
+ item.size)
elif isinstance(item, LTText):
self.write_text(item.get_text())
return
@@ -431,15 +447,14 @@ class HTMLConverter(PDFConverter):
return
-## XMLConverter
-##
class XMLConverter(PDFConverter):
CONTROL = re.compile(u'[\x00-\x08\x0b-\x0c\x0e-\x1f]')
- def __init__(self, rsrcmgr, outfp, codec='utf-8', pageno=1,
- laparams=None, imagewriter=None, stripcontrol=False):
- PDFConverter.__init__(self, rsrcmgr, outfp, codec=codec, pageno=pageno, laparams=laparams)
+ def __init__(self, rsrcmgr, outfp, codec='utf-8', pageno=1, laparams=None,
+ imagewriter=None, stripcontrol=False):
+ PDFConverter.__init__(self, rsrcmgr, outfp, codec=codec, pageno=pageno,
+ laparams=laparams)
self.imagewriter = imagewriter
self.stripcontrol = stripcontrol
self.write_header()
@@ -473,7 +488,7 @@ class XMLConverter(PDFConverter):
def show_group(item):
if isinstance(item, LTTextBox):
self.write('
\n' %
- (item.index, bbox2str(item.bbox)))
+ (item.index, bbox2str(item.bbox)))
elif isinstance(item, LTTextGroup):
self.write('
\n' % bbox2str(item.bbox))
for child in item:
@@ -483,8 +498,9 @@ class XMLConverter(PDFConverter):
def render(item):
if isinstance(item, LTPage):
- self.write('\n' %
- (item.pageid, bbox2str(item.bbox), item.rotate))
+ s = '\n' % \
+ (item.pageid, bbox2str(item.bbox), item.rotate)
+ self.write(s)
for child in item:
render(child)
if item.groups is not None:
@@ -494,17 +510,21 @@ class XMLConverter(PDFConverter):
self.write('\n')
self.write('\n')
elif isinstance(item, LTLine):
- self.write('\n' %
- (item.linewidth, bbox2str(item.bbox)))
+ s = '\n' % \
+ (item.linewidth, bbox2str(item.bbox))
+ self.write(s)
elif isinstance(item, LTRect):
- self.write('\n' %
- (item.linewidth, bbox2str(item.bbox)))
+ s = '\n' % \
+ (item.linewidth, bbox2str(item.bbox))
+ self.write(s)
elif isinstance(item, LTCurve):
- self.write('\n' %
- (item.linewidth, bbox2str(item.bbox), item.get_pts()))
+ s = '\n' % \
+ (item.linewidth, bbox2str(item.bbox), item.get_pts())
+ self.write(s)
elif isinstance(item, LTFigure):
- self.write('