diff --git a/pdfminer/cmapdb.py b/pdfminer/cmapdb.py index 742a32e..742a41b 100644 --- a/pdfminer/cmapdb.py +++ b/pdfminer/cmapdb.py @@ -15,7 +15,10 @@ import sys import os import os.path import gzip -import cPickle as pickle +try: + import cPickle as pickle +except ImportError: + import pickle as pickle import struct import logging from psparser import PSStackParser diff --git a/pdfminer/image.py b/pdfminer/image.py index 9dfc9a1..68b6019 100644 --- a/pdfminer/image.py +++ b/pdfminer/image.py @@ -1,7 +1,7 @@ #!/usr/bin/env python -import cStringIO import struct import os, os.path +from io import BytesIO from pdftypes import LITERALS_DCT_DECODE from pdfcolor import LITERAL_DEVICE_GRAY, LITERAL_DEVICE_RGB, LITERAL_DEVICE_CMYK @@ -83,7 +83,7 @@ class ImageWriter(object): if LITERAL_DEVICE_CMYK in image.colorspace: from PIL import Image from PIL import ImageChops - ifp = cStringIO.StringIO(raw_data) + ifp = BytesIO(raw_data) i = Image.open(ifp) i = ImageChops.invert(i) i = i.convert('RGB') diff --git a/pdfminer/lzw.py b/pdfminer/lzw.py index 3259c30..d7320ff 100644 --- a/pdfminer/lzw.py +++ b/pdfminer/lzw.py @@ -1,10 +1,7 @@ #!/usr/bin/env python import sys import logging -try: - from cStringIO import StringIO -except ImportError: - from StringIO import StringIO +from io import BytesIO class CorruptDataError(Exception): @@ -103,7 +100,7 @@ def lzwdecode(data): >>> lzwdecode('\x80\x0b\x60\x50\x22\x0c\x0c\x85\x01') '\x2d\x2d\x2d\x2d\x2d\x41\x2d\x2d\x2d\x42' """ - fp = StringIO(data) + fp = BytesIO(data) return ''.join(LZWDecoder(fp).run()) if __name__ == '__main__': diff --git a/pdfminer/pdffont.py b/pdfminer/pdffont.py index 860a99c..fb1004a 100644 --- a/pdfminer/pdffont.py +++ b/pdfminer/pdffont.py @@ -1,10 +1,7 @@ #!/usr/bin/env python import sys import struct -try: - from cStringIO import StringIO -except ImportError: - from StringIO import StringIO +from io import BytesIO from cmapdb import CMapDB, CMapParser, FileUnicodeMap, CMap from encodingdb import EncodingDB, name2unicode from psparser import PSStackParser @@ -122,7 +119,7 @@ NIBBLES = ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '.', 'e', 'e-', Non ## def getdict(data): d = {} - fp = StringIO(data) + fp = BytesIO(data) stack = [] while 1: c = fp.read(1) @@ -538,7 +535,7 @@ class PDFSimpleFont(PDFFont): if 'ToUnicode' in spec: strm = stream_value(spec['ToUnicode']) self.unicode_map = FileUnicodeMap() - CMapParser(self.unicode_map, StringIO(strm.get_data())).run() + CMapParser(self.unicode_map, BytesIO(strm.get_data())).run() PDFFont.__init__(self, descriptor, widths) return @@ -578,7 +575,7 @@ class PDFType1Font(PDFSimpleFont): self.fontfile = stream_value(descriptor.get('FontFile')) length1 = int_value(self.fontfile['Length1']) data = self.fontfile.get_data()[:length1] - parser = Type1FontHeaderParser(StringIO(data)) + parser = Type1FontHeaderParser(BytesIO(data)) self.cid2unicode = parser.get_encoding() return @@ -651,12 +648,12 @@ class PDFCIDFont(PDFFont): if 'FontFile2' in descriptor: self.fontfile = stream_value(descriptor.get('FontFile2')) ttf = TrueTypeFont(self.basefont, - StringIO(self.fontfile.get_data())) + BytesIO(self.fontfile.get_data())) self.unicode_map = None if 'ToUnicode' in spec: strm = stream_value(spec['ToUnicode']) self.unicode_map = FileUnicodeMap() - CMapParser(self.unicode_map, StringIO(strm.get_data())).run() + CMapParser(self.unicode_map, BytesIO(strm.get_data())).run() elif self.cidcoding in ('Adobe-Identity', 'Adobe-UCS'): if ttf: try: diff --git a/pdfminer/pdfinterp.py b/pdfminer/pdfinterp.py index c734c68..727eb9f 100644 --- a/pdfminer/pdfinterp.py +++ b/pdfminer/pdfinterp.py @@ -2,10 +2,7 @@ import sys import re import logging -try: - from cStringIO import StringIO -except ImportError: - from StringIO import StringIO +from io import BytesIO from cmapdb import CMapDB, CMap from psparser import PSTypeError, PSEOF from psparser import PSKeyword, literal_name, keyword_name @@ -218,7 +215,7 @@ class PDFContentParser(PSStackParser): self.istream += 1 else: raise PSEOF('Unexpected EOF, file truncated?') - self.fp = StringIO(strm.get_data()) + self.fp = BytesIO(strm.get_data()) return def seek(self, pos): diff --git a/pdfminer/pdfparser.py b/pdfminer/pdfparser.py index 7516f3c..e48d561 100644 --- a/pdfminer/pdfparser.py +++ b/pdfminer/pdfparser.py @@ -1,10 +1,7 @@ #!/usr/bin/env python import sys import logging -try: - from cStringIO import StringIO -except ImportError: - from StringIO import StringIO +from io import BytesIO from psparser import PSStackParser from psparser import PSSyntaxError, PSEOF from psparser import KWD, STRICT @@ -147,7 +144,7 @@ class PDFStreamParser(PDFParser): """ def __init__(self, data): - PDFParser.__init__(self, StringIO(data)) + PDFParser.__init__(self, BytesIO(data)) return def flush(self): diff --git a/pdfminer/psparser.py b/pdfminer/psparser.py index a07b209..8d5b5d2 100644 --- a/pdfminer/psparser.py +++ b/pdfminer/psparser.py @@ -664,12 +664,12 @@ func/a/b{(c)do*}def ] def get_tokens(self, s): - import StringIO + from io import BytesIO class MyParser(PSBaseParser): def flush(self): self.add_results(*self.popall()) - parser = MyParser(StringIO.StringIO(s)) + parser = MyParser(BytesIO(s)) r = [] try: while 1: @@ -679,12 +679,12 @@ func/a/b{(c)do*}def return r def get_objects(self, s): - import StringIO + from io import BytesIO class MyParser(PSStackParser): def flush(self): self.add_results(*self.popall()) - parser = MyParser(StringIO.StringIO(s)) + parser = MyParser(BytesIO(s)) r = [] try: while 1: diff --git a/tools/conv_cmap.py b/tools/conv_cmap.py index 75a9a76..88cab57 100755 --- a/tools/conv_cmap.py +++ b/tools/conv_cmap.py @@ -1,6 +1,9 @@ #!/usr/bin/env python import sys -import cPickle as pickle +try: + import cPickle as pickle +except ImportError: + import pickle as pickle ## CMapConverter