diff --git a/pdfminer/__init__.py b/pdfminer/__init__.py index 43e8acf..d302d53 100644 --- a/pdfminer/__init__.py +++ b/pdfminer/__init__.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +# -*- coding: utf-8 -*- __version__ = '20140915' if __name__ == '__main__': diff --git a/pdfminer/converter.py b/pdfminer/converter.py index b0efc0d..065f21e 100644 --- a/pdfminer/converter.py +++ b/pdfminer/converter.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +# -*- coding: utf-8 -*- import logging import re from .pdfdevice import PDFTextDevice diff --git a/pdfminer/high_level.py b/pdfminer/high_level.py index 5555fcb..b79a1c0 100644 --- a/pdfminer/high_level.py +++ b/pdfminer/high_level.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +# -*- coding: utf-8 -*- """ Functions that encapsulate "usual" use-cases for pdfminer, for use making bundled scripts and for using pdfminer as a module for routine tasks. diff --git a/pdfminer/utils.py b/pdfminer/utils.py index 6294e5d..696dd1d 100644 --- a/pdfminer/utils.py +++ b/pdfminer/utils.py @@ -10,6 +10,9 @@ INF = (1<<31) - 1 import six #Python 2+3 compatibility import chardet # For str encoding detection in Py3 +if six.PY3: + unicode = str + def make_compat_bytes(in_str): "In Py2, does nothing. In Py3, converts to bytes, encoding to unicode." assert isinstance(in_str, str) @@ -20,7 +23,7 @@ def make_compat_bytes(in_str): def make_compat_str(in_str): "In Py2, does nothing. In Py3, converts to string, guessing encoding." - assert isinstance(in_str, (bytes, str)) + assert isinstance(in_str, (bytes, str, unicode)) if six.PY3 and isinstance(in_str, bytes): enc = chardet.detect(in_str) in_str = in_str.decode(enc['encoding']) @@ -29,7 +32,7 @@ def make_compat_str(in_str): def compatible_encode_method(bytesorstring, encoding='utf-8', erraction='ignore'): "When Py2 str.encode is called, it often means bytes.encode in Py3. This does either." if six.PY2: - assert isinstance(bytesorstring, str), ("Error: Assumed was calling" + assert isinstance(bytesorstring, (str, unicode)), ("Error: Assumed was calling" " encode() on a string in Py2: {}").format(type(bytesorstring)) return bytesorstring.encode(encoding, erraction) if six.PY3: diff --git a/tests/test_tools_dumppdf.py b/tests/test_tools_dumppdf.py index 4d240e9..ef93848 100644 --- a/tests/test_tools_dumppdf.py +++ b/tests/test_tools_dumppdf.py @@ -1,9 +1,16 @@ #!/usr/bin/python # -*- coding: utf-8 -*- +import six import nose, logging, os -import tools.dumppdf as dumppdf +if six.PY3: + from tools import dumppdf +elif six.PY2: + import os, sys +# raise Exception("{}\n{}".format(sys.path, os.path.abspath(os.path.curdir))) + sys.path.append(os.path.abspath(os.path.curdir)) + import tools.dumppdf as dumppdf path=os.path.dirname(os.path.abspath(__file__))+'/' @@ -43,4 +50,4 @@ class TestDumpPDF(): if __name__ == '__main__': #import logging,sys,os,six #logging.basicConfig(level=logging.DEBUG, filename='%s_%d.%d.log'%(os.path.basename(__file__),sys.version_info[0],sys.version_info[1])) - nose.runmodule() \ No newline at end of file + nose.runmodule()