diff --git a/Makefile b/Makefile index 7ed7050..2783a0f 100644 --- a/Makefile +++ b/Makefile @@ -61,6 +61,6 @@ test: cmap pdfminer/ascii85.py \ pdfminer/runlength.py \ pdfminer/rijndael.py - $(PYTHON) pdfminer/ccitt.py - $(PYTHON) pdfminer/psparser.py + $(PYTHON) -m pdfminer.ccitt + $(PYTHON) -m pdfminer.psparser cd samples && $(MAKE) test diff --git a/pdfminer/cmapdb.py b/pdfminer/cmapdb.py index 742a41b..c53793b 100644 --- a/pdfminer/cmapdb.py +++ b/pdfminer/cmapdb.py @@ -21,12 +21,14 @@ except ImportError: import pickle as pickle import struct import logging -from psparser import PSStackParser -from psparser import PSSyntaxError, PSEOF -from psparser import PSLiteral -from psparser import literal_name -from encodingdb import name2unicode -from utils import choplist, nunpack +from .psparser import PSStackParser +from .psparser import PSSyntaxError +from .psparser import PSEOF +from .psparser import PSLiteral +from .psparser import literal_name +from .encodingdb import name2unicode +from .utils import choplist +from .utils import nunpack class CMapError(Exception): diff --git a/pdfminer/converter.py b/pdfminer/converter.py index e2c24e0..3e515d6 100644 --- a/pdfminer/converter.py +++ b/pdfminer/converter.py @@ -1,14 +1,25 @@ #!/usr/bin/env python -import sys import logging import re -from pdfdevice import PDFTextDevice -from pdffont import PDFUnicodeNotDefined -from layout import LTContainer, LTPage, LTText, LTLine, LTRect, LTCurve -from layout import LTFigure, LTImage, LTChar, LTTextLine -from layout import LTTextBox, LTTextBoxVertical, LTTextGroup -from utils import apply_matrix_pt, mult_matrix -from utils import enc, bbox2str +from .pdfdevice import PDFTextDevice +from .pdffont import PDFUnicodeNotDefined +from .layout import LTContainer +from .layout import LTPage +from .layout import LTText +from .layout import LTLine +from .layout import LTRect +from .layout import LTCurve +from .layout import LTFigure +from .layout import LTImage +from .layout import LTChar +from .layout import LTTextLine +from .layout import LTTextBox +from .layout import LTTextBoxVertical +from .layout import LTTextGroup +from .utils import apply_matrix_pt +from .utils import mult_matrix +from .utils import enc +from .utils import bbox2str ## PDFLayoutAnalyzer diff --git a/pdfminer/encodingdb.py b/pdfminer/encodingdb.py index 61a161e..b3263bd 100644 --- a/pdfminer/encodingdb.py +++ b/pdfminer/encodingdb.py @@ -1,9 +1,8 @@ #!/usr/bin/env python - import re -from psparser import PSLiteral -from glyphlist import glyphname2unicode -from latin_enc import ENCODING +from .psparser import PSLiteral +from .glyphlist import glyphname2unicode +from .latin_enc import ENCODING STRIP_NAME = re.compile(r'[0-9]+') diff --git a/pdfminer/image.py b/pdfminer/image.py index 68b6019..e796e9c 100644 --- a/pdfminer/image.py +++ b/pdfminer/image.py @@ -1,9 +1,12 @@ #!/usr/bin/env python import struct -import os, os.path +import os +import os.path from io import BytesIO -from pdftypes import LITERALS_DCT_DECODE -from pdfcolor import LITERAL_DEVICE_GRAY, LITERAL_DEVICE_RGB, LITERAL_DEVICE_CMYK +from .pdftypes import LITERALS_DCT_DECODE +from .pdfcolor import LITERAL_DEVICE_GRAY +from .pdfcolor import LITERAL_DEVICE_RGB +from .pdfcolor import LITERAL_DEVICE_CMYK def align32(x): diff --git a/pdfminer/layout.py b/pdfminer/layout.py index 2b34a4b..32b706f 100644 --- a/pdfminer/layout.py +++ b/pdfminer/layout.py @@ -1,6 +1,13 @@ #!/usr/bin/env python -from utils import INF, Plane, get_bound, uniq, csort, fsplit -from utils import bbox2str, matrix2str, apply_matrix_pt +from .utils import INF +from .utils import Plane +from .utils import get_bound +from .utils import uniq +from .utils import csort +from .utils import fsplit +from .utils import bbox2str +from .utils import matrix2str +from .utils import apply_matrix_pt ## IndexAssigner diff --git a/pdfminer/lzw.py b/pdfminer/lzw.py index d7320ff..c366cfb 100644 --- a/pdfminer/lzw.py +++ b/pdfminer/lzw.py @@ -1,6 +1,4 @@ #!/usr/bin/env python -import sys -import logging from io import BytesIO diff --git a/pdfminer/pdfcolor.py b/pdfminer/pdfcolor.py index 9cf812d..8384396 100644 --- a/pdfminer/pdfcolor.py +++ b/pdfminer/pdfcolor.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -from psparser import LIT +from .psparser import LIT ## PDFColorSpace diff --git a/pdfminer/pdfdevice.py b/pdfminer/pdfdevice.py index f6506f4..3efee9e 100644 --- a/pdfminer/pdfdevice.py +++ b/pdfminer/pdfdevice.py @@ -1,7 +1,10 @@ #!/usr/bin/env python -from utils import mult_matrix, translate_matrix -from utils import enc, bbox2str, isnumber -from pdffont import PDFUnicodeNotDefined +from .utils import mult_matrix +from .utils import translate_matrix +from .utils import enc +from .utils import bbox2str +from .utils import isnumber +from .pdffont import PDFUnicodeNotDefined ## PDFDevice diff --git a/pdfminer/pdfdocument.py b/pdfminer/pdfdocument.py index 762a9cc..1c28ac2 100644 --- a/pdfminer/pdfdocument.py +++ b/pdfminer/pdfdocument.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -import sys import re import struct import logging @@ -13,20 +12,27 @@ try: from Crypto.Hash import SHA256 except ImportError: AES = SHA256 = None - import arcfour as ARC4 - -from psparser import PSEOF -from psparser import literal_name -from psparser import LIT, KWD, STRICT -from pdftypes import PDFException, PDFTypeError, PDFNotImplementedError -from pdftypes import PDFObjectNotFound, PDFStream -from pdftypes import decipher_all -from pdftypes import int_value -from pdftypes import str_value, list_value, dict_value, stream_value -from pdfparser import PDFSyntaxError -from pdfparser import PDFStreamParser -from utils import choplist, nunpack -from utils import decode_text + from . import arcfour as ARC4 +from .psparser import PSEOF +from .psparser import literal_name +from .psparser import LIT +from .psparser import KWD +from .psparser import STRICT +from .pdftypes import PDFException +from .pdftypes import PDFTypeError +from .pdftypes import PDFStream +from .pdftypes import PDFObjectNotFound +from .pdftypes import decipher_all +from .pdftypes import int_value +from .pdftypes import str_value +from .pdftypes import list_value +from .pdftypes import dict_value +from .pdftypes import stream_value +from .pdfparser import PDFSyntaxError +from .pdfparser import PDFStreamParser +from .utils import choplist +from .utils import nunpack +from .utils import decode_text ## Exceptions diff --git a/pdfminer/pdffont.py b/pdfminer/pdffont.py index fb1004a..149751c 100644 --- a/pdfminer/pdffont.py +++ b/pdfminer/pdffont.py @@ -2,17 +2,31 @@ import sys import struct from io import BytesIO -from cmapdb import CMapDB, CMapParser, FileUnicodeMap, CMap -from encodingdb import EncodingDB, name2unicode -from psparser import PSStackParser -from psparser import PSEOF -from psparser import LIT, KWD, STRICT -from psparser import PSLiteral, literal_name -from pdftypes import PDFException, resolve1 -from pdftypes import int_value, num_value -from pdftypes import list_value, dict_value, stream_value -from fontmetrics import FONT_METRICS -from utils import apply_matrix_norm, nunpack, choplist, isnumber +from .cmapdb import CMapDB +from .cmapdb import CMapParser +from .cmapdb import FileUnicodeMap +from .cmapdb import CMap +from .encodingdb import EncodingDB +from .encodingdb import name2unicode +from .psparser import PSStackParser +from .psparser import PSEOF +from .psparser import LIT +from .psparser import KWD +from .psparser import STRICT +from .psparser import PSLiteral +from .psparser import literal_name +from .pdftypes import PDFException +from .pdftypes import resolve1 +from .pdftypes import int_value +from .pdftypes import num_value +from .pdftypes import list_value +from .pdftypes import dict_value +from .pdftypes import stream_value +from .fontmetrics import FONT_METRICS +from .utils import apply_matrix_norm +from .utils import nunpack +from .utils import choplist +from .utils import isnumber def get_widths(seq): @@ -566,7 +580,7 @@ class PDFType1Font(PDFSimpleFont): except KeyError: descriptor = dict_value(spec.get('FontDescriptor', {})) firstchar = int_value(spec.get('FirstChar', 0)) - lastchar = int_value(spec.get('LastChar', 255)) + #lastchar = int_value(spec.get('LastChar', 255)) widths = list_value(spec.get('Widths', [0]*256)) widths = dict((i+firstchar, w) for (i, w) in enumerate(widths)) PDFSimpleFont.__init__(self, descriptor, widths, spec) @@ -595,7 +609,7 @@ class PDFType3Font(PDFSimpleFont): def __init__(self, rsrcmgr, spec): firstchar = int_value(spec.get('FirstChar', 0)) - lastchar = int_value(spec.get('LastChar', 0)) + #lastchar = int_value(spec.get('LastChar', 0)) widths = list_value(spec.get('Widths', [0]*256)) widths = dict((i+firstchar, w) for (i, w) in enumerate(widths)) if 'FontDescriptor' in spec: diff --git a/pdfminer/pdfinterp.py b/pdfminer/pdfinterp.py index 727eb9f..edf7c4f 100644 --- a/pdfminer/pdfinterp.py +++ b/pdfminer/pdfinterp.py @@ -1,25 +1,35 @@ #!/usr/bin/env python -import sys import re import logging from io import BytesIO -from cmapdb import CMapDB, CMap -from psparser import PSTypeError, PSEOF -from psparser import PSKeyword, literal_name, keyword_name -from psparser import PSStackParser -from psparser import LIT, KWD, STRICT -from pdftypes import PDFException, PDFStream, PDFObjRef -from pdftypes import resolve1 -from pdftypes import list_value, dict_value, stream_value -from pdffont import PDFFontError -from pdffont import PDFType1Font, PDFTrueTypeFont, PDFType3Font -from pdffont import PDFCIDFont -from pdfcolor import PDFColorSpace -from pdfcolor import PREDEFINED_COLORSPACE -from pdfcolor import LITERAL_DEVICE_GRAY, LITERAL_DEVICE_RGB -from pdfcolor import LITERAL_DEVICE_CMYK -from utils import choplist -from utils import mult_matrix, MATRIX_IDENTITY +from .cmapdb import CMapDB +from .cmapdb import CMap +from .psparser import PSTypeError +from .psparser import PSEOF +from .psparser import PSKeyword +from .psparser import literal_name +from .psparser import keyword_name +from .psparser import PSStackParser +from .psparser import LIT +from .psparser import KWD +from .psparser import STRICT +from .pdftypes import PDFException +from .pdftypes import PDFStream +from .pdftypes import PDFObjRef +from .pdftypes import resolve1 +from .pdftypes import list_value +from .pdftypes import dict_value +from .pdftypes import stream_value +from .pdffont import PDFFontError +from .pdffont import PDFType1Font +from .pdffont import PDFTrueTypeFont +from .pdffont import PDFType3Font +from .pdffont import PDFCIDFont +from .pdfcolor import PDFColorSpace +from .pdfcolor import PREDEFINED_COLORSPACE +from .utils import choplist +from .utils import mult_matrix +from .utils import MATRIX_IDENTITY ## Exceptions diff --git a/pdfminer/pdfpage.py b/pdfminer/pdfpage.py index 807281c..5cf081a 100644 --- a/pdfminer/pdfpage.py +++ b/pdfminer/pdfpage.py @@ -1,14 +1,14 @@ #!/usr/bin/env python -import sys import logging -from psparser import LIT -from pdftypes import PDFObjectNotFound -from pdftypes import resolve1 -from pdftypes import int_value, list_value, dict_value -from pdfparser import PDFParser -from pdfdocument import PDFDocument -from pdfdocument import PDFEncryptionError -from pdfdocument import PDFTextExtractionNotAllowed +from .psparser import LIT +from .pdftypes import PDFObjectNotFound +from .pdftypes import resolve1 +from .pdftypes import int_value +from .pdftypes import list_value +from .pdftypes import dict_value +from .pdfparser import PDFParser +from .pdfdocument import PDFDocument +from .pdfdocument import PDFTextExtractionNotAllowed # some predefined literals and keywords. LITERAL_PAGE = LIT('Page') diff --git a/pdfminer/pdfparser.py b/pdfminer/pdfparser.py index e48d561..f061917 100644 --- a/pdfminer/pdfparser.py +++ b/pdfminer/pdfparser.py @@ -1,14 +1,16 @@ #!/usr/bin/env python -import sys import logging from io import BytesIO -from psparser import PSStackParser -from psparser import PSSyntaxError, PSEOF -from psparser import KWD, STRICT -from pdftypes import PDFException -from pdftypes import PDFStream, PDFObjRef -from pdftypes import int_value -from pdftypes import dict_value +from .psparser import PSStackParser +from .psparser import PSSyntaxError +from .psparser import PSEOF +from .psparser import KWD +from .psparser import STRICT +from .pdftypes import PDFException +from .pdftypes import PDFStream +from .pdftypes import PDFObjRef +from .pdftypes import int_value +from .pdftypes import dict_value ## Exceptions diff --git a/pdfminer/pdftypes.py b/pdfminer/pdftypes.py index 595eff3..05294e7 100644 --- a/pdfminer/pdftypes.py +++ b/pdfminer/pdftypes.py @@ -1,12 +1,17 @@ #!/usr/bin/env python import zlib -from lzw import lzwdecode -from ascii85 import ascii85decode, asciihexdecode -from runlength import rldecode -from ccitt import ccittfaxdecode -from psparser import PSException, PSObject -from psparser import LIT, STRICT -from utils import apply_png_predictor, isnumber +from .lzw import lzwdecode +from .ascii85 import ascii85decode +from .ascii85 import asciihexdecode +from .runlength import rldecode +from .ccitt import ccittfaxdecode +from .psparser import PSException +from .psparser import PSObject +from .psparser import LIT +from .psparser import STRICT +from .utils import apply_png_predictor +from .utils import isnumber + LITERAL_CRYPT = LIT('Crypt') diff --git a/pdfminer/psparser.py b/pdfminer/psparser.py index 8d5b5d2..7c83391 100644 --- a/pdfminer/psparser.py +++ b/pdfminer/psparser.py @@ -1,8 +1,7 @@ #!/usr/bin/env python -import sys import re import logging -from utils import choplist +from .utils import choplist STRICT = 0