Merge branch 'develop' into pdfstream-as-cmap
commit
fe38695739
|
@ -0,0 +1,32 @@
|
||||||
|
# Contributing guidelines
|
||||||
|
|
||||||
|
Any contribution is appreciated! You might want to:
|
||||||
|
|
||||||
|
* Fix spelling errors
|
||||||
|
* Improve documentation
|
||||||
|
* Add tests for untested code
|
||||||
|
* Add new features
|
||||||
|
* Fix bugs
|
||||||
|
|
||||||
|
## How can I contribute?
|
||||||
|
|
||||||
|
* Use [issues](https://github.com/pdfminer/pdfminer.six/issues) to report bugs and features
|
||||||
|
- If you report a bug in the results for a particular pdf, include that pdf. This allows others to replicate the
|
||||||
|
issue.
|
||||||
|
* Fix issues by [creating pull requests](https://help.github.com/en/articles/creating-a-pull-request)
|
||||||
|
* Help others giving your thoughts on open issues and pull requests.
|
||||||
|
|
||||||
|
## General guidelines for creating issues and pull requests
|
||||||
|
|
||||||
|
* Search previous issues, as yours might be a duplicate.
|
||||||
|
* When creating a new issue for a bug, include a minimal reproducible example.
|
||||||
|
* When creating a new issue for a feature, be sure to describe the context of the problem you are trying to solve. This
|
||||||
|
will help others to see the importance of your feature request.
|
||||||
|
* Link pull request to a single issue.
|
||||||
|
* Pull requests should be merged to develop, not master. This ensures that master always equals the released verion.
|
||||||
|
* Include unit tests when possible. In case of bugs, this will help to prevent the same mistake in the future. In case
|
||||||
|
of features, this will show that your code works correctly.
|
||||||
|
* Code should work for Python 2.7 and Python 3.x (for now).
|
||||||
|
* Code changes should conform to PEP8 coding style (with a line-width of 120). Existing code may stay as it is.
|
||||||
|
* New features should be well documented using docstrings.
|
||||||
|
* Check spelling and grammar.
|
|
@ -35,7 +35,7 @@ Features
|
||||||
How to Install
|
How to Install
|
||||||
--------------
|
--------------
|
||||||
|
|
||||||
* Install Python 2.7 or newer. (Python 3.x is supported in pdfminer.six)
|
* Install Python 2.7 or newer.
|
||||||
* Install
|
* Install
|
||||||
|
|
||||||
`pip install pdfminer.six`
|
`pip install pdfminer.six`
|
||||||
|
@ -81,6 +81,12 @@ TODO
|
||||||
* Performance improvements.
|
* Performance improvements.
|
||||||
|
|
||||||
|
|
||||||
|
Contributing
|
||||||
|
------------
|
||||||
|
|
||||||
|
Be sure to read the [contribution guidelines](https://github.com/pdfminer/pdfminer.six/blob/master/CONTRIBUTING.md).
|
||||||
|
|
||||||
|
|
||||||
Terms and Conditions
|
Terms and Conditions
|
||||||
--------------------
|
--------------------
|
||||||
|
|
||||||
|
|
|
@ -83,7 +83,7 @@ class CMap(CMapBase):
|
||||||
assert isinstance(cmap, CMap), str(type(cmap))
|
assert isinstance(cmap, CMap), str(type(cmap))
|
||||||
|
|
||||||
def copy(dst, src):
|
def copy(dst, src):
|
||||||
for (k, v) in src.iteritems():
|
for (k, v) in six.iteritems(src):
|
||||||
if isinstance(v, dict):
|
if isinstance(v, dict):
|
||||||
d = {}
|
d = {}
|
||||||
dst[k] = d
|
dst[k] = d
|
||||||
|
@ -110,7 +110,7 @@ class CMap(CMapBase):
|
||||||
if code2cid is None:
|
if code2cid is None:
|
||||||
code2cid = self.code2cid
|
code2cid = self.code2cid
|
||||||
code = ()
|
code = ()
|
||||||
for (k, v) in sorted(code2cid.iteritems()):
|
for (k, v) in sorted(six.iteritems(code2cid)):
|
||||||
c = code+(k,)
|
c = code+(k,)
|
||||||
if isinstance(v, int):
|
if isinstance(v, int):
|
||||||
out.write('code %r = cid %d\n' % (c, v))
|
out.write('code %r = cid %d\n' % (c, v))
|
||||||
|
@ -157,7 +157,7 @@ class UnicodeMap(CMapBase):
|
||||||
return self.cid2unichr[cid]
|
return self.cid2unichr[cid]
|
||||||
|
|
||||||
def dump(self, out=sys.stdout):
|
def dump(self, out=sys.stdout):
|
||||||
for (k, v) in sorted(self.cid2unichr.iteritems()):
|
for (k, v) in sorted(six.iteritems(self.cid2unichr)):
|
||||||
out.write('cid %d = unicode %r\n' % (k, v))
|
out.write('cid %d = unicode %r\n' % (k, v))
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
|
@ -1,28 +1,67 @@
|
||||||
|
import logging
|
||||||
import re
|
import re
|
||||||
from .psparser import PSLiteral
|
|
||||||
from .glyphlist import glyphname2unicode
|
|
||||||
from .latin_enc import ENCODING
|
|
||||||
|
|
||||||
import six # Python 2+3 compatibility
|
import six # Python 2+3 compatibility
|
||||||
|
|
||||||
STRIP_NAME = re.compile(r'[0-9]+')
|
from .glyphlist import glyphname2unicode
|
||||||
|
from .latin_enc import ENCODING
|
||||||
|
from .psparser import PSLiteral
|
||||||
|
|
||||||
|
HEXADECIMAL = re.compile(r'[0-9a-fA-F]+')
|
||||||
|
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
## name2unicode
|
|
||||||
##
|
|
||||||
def name2unicode(name):
|
def name2unicode(name):
|
||||||
"""Converts Adobe glyph names to Unicode numbers."""
|
"""Converts Adobe glyph names to Unicode numbers.
|
||||||
|
|
||||||
|
In contrast to the specification, this raises a KeyError instead of return an empty string when the key is unknown.
|
||||||
|
This way the caller must explicitly define what to do when there is not a match.
|
||||||
|
|
||||||
|
Reference: https://github.com/adobe-type-tools/agl-specification#2-the-mapping
|
||||||
|
|
||||||
|
:returns unicode character if name resembles something, otherwise a KeyError
|
||||||
|
"""
|
||||||
|
name = name.split('.')[0]
|
||||||
|
components = name.split('_')
|
||||||
|
|
||||||
|
if len(components) > 1:
|
||||||
|
return ''.join(map(name2unicode, components))
|
||||||
|
|
||||||
|
else:
|
||||||
if name in glyphname2unicode:
|
if name in glyphname2unicode:
|
||||||
return glyphname2unicode[name]
|
return glyphname2unicode.get(name)
|
||||||
m = STRIP_NAME.search(name)
|
|
||||||
if not m:
|
elif name.startswith('uni'):
|
||||||
raise KeyError(name)
|
name_without_uni = name.strip('uni')
|
||||||
return six.unichr(int(m.group(0)))
|
|
||||||
|
if HEXADECIMAL.match(name_without_uni) and len(name_without_uni) % 4 == 0:
|
||||||
|
unicode_digits = [int(name_without_uni[i:i + 4], base=16) for i in range(0, len(name_without_uni), 4)]
|
||||||
|
for digit in unicode_digits:
|
||||||
|
raise_key_error_for_invalid_unicode(digit)
|
||||||
|
characters = map(six.unichr, unicode_digits)
|
||||||
|
return ''.join(characters)
|
||||||
|
|
||||||
|
elif name.startswith('u'):
|
||||||
|
name_without_u = name.strip('u')
|
||||||
|
|
||||||
|
if HEXADECIMAL.match(name_without_u) and 4 <= len(name_without_u) <= 6:
|
||||||
|
unicode_digit = int(name_without_u, base=16)
|
||||||
|
raise_key_error_for_invalid_unicode(unicode_digit)
|
||||||
|
return six.unichr(unicode_digit)
|
||||||
|
|
||||||
|
raise KeyError('Could not convert unicode name "%s" to character because it does not match specification' % name)
|
||||||
|
|
||||||
|
|
||||||
|
def raise_key_error_for_invalid_unicode(unicode_digit):
|
||||||
|
"""Unicode values should not be in the range D800 through DFFF because that is used for surrogate pairs in UTF-16
|
||||||
|
|
||||||
|
:raises KeyError if unicode digit is invalid
|
||||||
|
"""
|
||||||
|
if 55295 < unicode_digit < 57344:
|
||||||
|
raise KeyError('Unicode digit %d is invalid because it is in the range D800 through DFFF' % unicode_digit)
|
||||||
|
|
||||||
|
|
||||||
## EncodingDB
|
|
||||||
##
|
|
||||||
class EncodingDB(object):
|
class EncodingDB(object):
|
||||||
|
|
||||||
std2unicode = {}
|
std2unicode = {}
|
||||||
|
@ -59,7 +98,7 @@ class EncodingDB(object):
|
||||||
elif isinstance(x, PSLiteral):
|
elif isinstance(x, PSLiteral):
|
||||||
try:
|
try:
|
||||||
cid2unicode[cid] = name2unicode(x.name)
|
cid2unicode[cid] = name2unicode(x.name)
|
||||||
except KeyError:
|
except KeyError as e:
|
||||||
pass
|
log.debug(str(e))
|
||||||
cid += 1
|
cid += 1
|
||||||
return cid2unicode
|
return cid2unicode
|
||||||
|
|
|
@ -178,7 +178,7 @@ class TagExtractor(PDFDevice):
|
||||||
s = ''
|
s = ''
|
||||||
if isinstance(props, dict):
|
if isinstance(props, dict):
|
||||||
s = ''.join(' %s="%s"' % (utils.enc(k), utils.enc(str(v))) for (k, v)
|
s = ''.join(' %s="%s"' % (utils.enc(k), utils.enc(str(v))) for (k, v)
|
||||||
in sorted(props.iteritems()))
|
in sorted(six.iteritems(props)))
|
||||||
out_s = '<%s%s>' % (utils.enc(tag.name), s)
|
out_s = '<%s%s>' % (utils.enc(tag.name), s)
|
||||||
self.outfp.write(utils.make_compat_bytes(out_s))
|
self.outfp.write(utils.make_compat_bytes(out_s))
|
||||||
self._stack.append(tag)
|
self._stack.append(tag)
|
||||||
|
|
|
@ -1,35 +1,39 @@
|
||||||
|
import logging
|
||||||
import sys
|
|
||||||
import struct
|
import struct
|
||||||
|
import sys
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
|
|
||||||
|
import six # Python 2+3 compatibility
|
||||||
|
|
||||||
|
from . import settings
|
||||||
|
from .cmapdb import CMap
|
||||||
from .cmapdb import CMapDB
|
from .cmapdb import CMapDB
|
||||||
from .cmapdb import CMapParser
|
from .cmapdb import CMapParser
|
||||||
from .cmapdb import FileUnicodeMap
|
from .cmapdb import FileUnicodeMap
|
||||||
from .cmapdb import CMap
|
|
||||||
from .encodingdb import EncodingDB
|
from .encodingdb import EncodingDB
|
||||||
from .encodingdb import name2unicode
|
from .encodingdb import name2unicode
|
||||||
from .psparser import PSStackParser
|
from .fontmetrics import FONT_METRICS
|
||||||
from .psparser import PSEOF
|
|
||||||
from .psparser import LIT
|
|
||||||
from .psparser import KWD
|
|
||||||
from . import settings
|
|
||||||
from .psparser import PSLiteral
|
|
||||||
from .psparser import literal_name
|
|
||||||
from .pdftypes import PDFException
|
from .pdftypes import PDFException
|
||||||
from .pdftypes import PDFStream
|
from .pdftypes import PDFStream
|
||||||
from .pdftypes import resolve1
|
from .pdftypes import resolve1
|
||||||
from .pdftypes import int_value
|
|
||||||
from .pdftypes import num_value
|
|
||||||
from .pdftypes import list_value
|
|
||||||
from .pdftypes import dict_value
|
from .pdftypes import dict_value
|
||||||
|
from .pdftypes import int_value
|
||||||
|
from .pdftypes import list_value
|
||||||
|
from .pdftypes import num_value
|
||||||
|
from .pdftypes import resolve1
|
||||||
from .pdftypes import stream_value
|
from .pdftypes import stream_value
|
||||||
from .fontmetrics import FONT_METRICS
|
from .psparser import KWD
|
||||||
|
from .psparser import LIT
|
||||||
|
from .psparser import PSEOF
|
||||||
|
from .psparser import PSLiteral
|
||||||
|
from .psparser import PSStackParser
|
||||||
|
from .psparser import literal_name
|
||||||
from .utils import apply_matrix_norm
|
from .utils import apply_matrix_norm
|
||||||
from .utils import nunpack
|
|
||||||
from .utils import choplist
|
from .utils import choplist
|
||||||
from .utils import isnumber
|
from .utils import isnumber
|
||||||
|
from .utils import nunpack
|
||||||
|
|
||||||
import six #Python 2+3 compatibility
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def get_widths(seq):
|
def get_widths(seq):
|
||||||
|
@ -99,7 +103,6 @@ class Type1FontHeaderParser(PSStackParser):
|
||||||
KEYWORD_ARRAY = KWD(b'array')
|
KEYWORD_ARRAY = KWD(b'array')
|
||||||
KEYWORD_READONLY = KWD(b'readonly')
|
KEYWORD_READONLY = KWD(b'readonly')
|
||||||
KEYWORD_FOR = KWD(b'for')
|
KEYWORD_FOR = KWD(b'for')
|
||||||
KEYWORD_FOR = KWD(b'for')
|
|
||||||
|
|
||||||
def __init__(self, data):
|
def __init__(self, data):
|
||||||
PSStackParser.__init__(self, data)
|
PSStackParser.__init__(self, data)
|
||||||
|
@ -107,6 +110,17 @@ class Type1FontHeaderParser(PSStackParser):
|
||||||
return
|
return
|
||||||
|
|
||||||
def get_encoding(self):
|
def get_encoding(self):
|
||||||
|
"""Parse the font encoding
|
||||||
|
|
||||||
|
The Type1 font encoding maps character codes to character names. These character names could either be standard
|
||||||
|
Adobe glyph names, or character names associated with custom CharStrings for this font. A CharString is a
|
||||||
|
sequence of operations that describe how the character should be drawn.
|
||||||
|
Currently, this function returns '' (empty string) for character names that are associated with a CharStrings.
|
||||||
|
|
||||||
|
References: http://wwwimages.adobe.com/content/dam/Adobe/en/devnet/font/pdfs/T1_SPEC.pdf
|
||||||
|
|
||||||
|
:returns mapping of character identifiers (cid's) to unicode characters
|
||||||
|
"""
|
||||||
while 1:
|
while 1:
|
||||||
try:
|
try:
|
||||||
(cid, name) = self.nextobject()
|
(cid, name) = self.nextobject()
|
||||||
|
@ -114,8 +128,8 @@ class Type1FontHeaderParser(PSStackParser):
|
||||||
break
|
break
|
||||||
try:
|
try:
|
||||||
self._cid2unicode[cid] = name2unicode(name)
|
self._cid2unicode[cid] = name2unicode(name)
|
||||||
except KeyError:
|
except KeyError as e:
|
||||||
pass
|
log.debug(str(e))
|
||||||
return self._cid2unicode
|
return self._cid2unicode
|
||||||
|
|
||||||
def do_keyword(self, pos, token):
|
def do_keyword(self, pos, token):
|
||||||
|
@ -460,7 +474,7 @@ class TrueTypeFont(object):
|
||||||
assert False, str(('Unhandled', fmttype))
|
assert False, str(('Unhandled', fmttype))
|
||||||
# create unicode map
|
# create unicode map
|
||||||
unicode_map = FileUnicodeMap()
|
unicode_map = FileUnicodeMap()
|
||||||
for (char, gid) in char2gid.iteritems():
|
for (char, gid) in six.iteritems(char2gid):
|
||||||
unicode_map.add_cid2unichr(gid, char)
|
unicode_map.add_cid2unichr(gid, char)
|
||||||
return unicode_map
|
return unicode_map
|
||||||
|
|
||||||
|
|
|
@ -96,7 +96,7 @@ def resolve_all(x, default=None):
|
||||||
if isinstance(x, list):
|
if isinstance(x, list):
|
||||||
x = [resolve_all(v, default=default) for v in x]
|
x = [resolve_all(v, default=default) for v in x]
|
||||||
elif isinstance(x, dict):
|
elif isinstance(x, dict):
|
||||||
for (k, v) in x.iteritems():
|
for (k, v) in six.iteritems(x):
|
||||||
x[k] = resolve_all(v, default=default)
|
x[k] = resolve_all(v, default=default)
|
||||||
return x
|
return x
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,121 @@
|
||||||
|
"""
|
||||||
|
Tests based on the Adobe Glyph List Specification (https://github.com/adobe-type-tools/agl-specification#2-the-mapping)
|
||||||
|
|
||||||
|
While not in the specification, lowercase unicode often occurs in pdf's. Therefore lowercase unittest variants are
|
||||||
|
added.
|
||||||
|
"""
|
||||||
|
from nose.tools import assert_raises
|
||||||
|
|
||||||
|
from pdfminer.encodingdb import name2unicode
|
||||||
|
|
||||||
|
|
||||||
|
def test_name2unicode_name_in_agl():
|
||||||
|
"""The name "Lcommaaccent" has a single component, which is mapped to the string U+013B by AGL"""
|
||||||
|
assert u'\u013B' == name2unicode('Lcommaaccent')
|
||||||
|
|
||||||
|
|
||||||
|
def test_name2unicode_uni():
|
||||||
|
"""The components "Lcommaaccent," "uni013B," and "u013B" all map to the string U+013B"""
|
||||||
|
assert u'\u013B' == name2unicode('uni013B')
|
||||||
|
|
||||||
|
|
||||||
|
def test_name2unicode_uni_lowercase():
|
||||||
|
"""The components "Lcommaaccent," "uni013B," and "u013B" all map to the string U+013B"""
|
||||||
|
assert u'\u013B' == name2unicode('uni013b')
|
||||||
|
|
||||||
|
|
||||||
|
def test_name2unicode_uni_with_sequence_of_digits():
|
||||||
|
"""The name "uni20AC0308" has a single component, which is mapped to the string U+20AC U+0308"""
|
||||||
|
assert u'\u20AC\u0308' == name2unicode('uni20AC0308')
|
||||||
|
|
||||||
|
|
||||||
|
def test_name2unicode_uni_with_sequence_of_digits_lowercase():
|
||||||
|
"""The name "uni20AC0308" has a single component, which is mapped to the string U+20AC U+0308"""
|
||||||
|
assert u'\u20AC\u0308' == name2unicode('uni20ac0308')
|
||||||
|
|
||||||
|
|
||||||
|
def test_name2unicode_uni_empty_string():
|
||||||
|
"""The name "uni20ac" has a single component, which is mapped to a euro-sign.
|
||||||
|
|
||||||
|
According to the specification this should be mapped to an empty string, but we also want to support lowercase
|
||||||
|
hexadecimals
|
||||||
|
"""
|
||||||
|
assert u'\u20ac' == name2unicode('uni20ac')
|
||||||
|
|
||||||
|
|
||||||
|
def test_name2unicode_uni_empty_string_long():
|
||||||
|
"""The name "uniD801DC0C" has a single component, which is mapped to an empty string
|
||||||
|
|
||||||
|
Neither D801 nor DC0C are in the appropriate set. This form cannot be used to map to the character which is
|
||||||
|
expressed as D801 DC0C in UTF-16, specifically U+1040C. This character can be correctly mapped by using the
|
||||||
|
glyph name "u1040C.
|
||||||
|
"""
|
||||||
|
assert_raises(KeyError, name2unicode, 'uniD801DC0C')
|
||||||
|
|
||||||
|
|
||||||
|
def test_name2unicode_uni_empty_string_long_lowercase():
|
||||||
|
"""The name "uniD801DC0C" has a single component, which is mapped to an empty string
|
||||||
|
|
||||||
|
Neither D801 nor DC0C are in the appropriate set. This form cannot be used to map to the character which is
|
||||||
|
expressed as D801 DC0C in UTF-16, specifically U+1040C. This character can be correctly mapped by using the
|
||||||
|
glyph name "u1040C."""
|
||||||
|
assert_raises(KeyError, name2unicode, 'uniD801DC0C')
|
||||||
|
|
||||||
|
|
||||||
|
def test_name2unicode_uni_pua():
|
||||||
|
""""Ogoneksmall" and "uniF6FB" both map to the string that corresponds to U+F6FB."""
|
||||||
|
assert u'\uF6FB' == name2unicode('uniF6FB')
|
||||||
|
|
||||||
|
|
||||||
|
def test_name2unicode_uni_pua_lowercase():
|
||||||
|
""""Ogoneksmall" and "uniF6FB" both map to the string that corresponds to U+F6FB."""
|
||||||
|
assert u'\uF6FB' == name2unicode('unif6fb')
|
||||||
|
|
||||||
|
|
||||||
|
def test_name2unicode_u_with_4_digits():
|
||||||
|
"""The components "Lcommaaccent," "uni013B," and "u013B" all map to the string U+013B"""
|
||||||
|
assert u'\u013B' == name2unicode('u013B')
|
||||||
|
|
||||||
|
|
||||||
|
def test_name2unicode_u_with_4_digits_lowercase():
|
||||||
|
"""The components "Lcommaaccent," "uni013B," and "u013B" all map to the string U+013B"""
|
||||||
|
assert u'\u013B' == name2unicode('u013b')
|
||||||
|
|
||||||
|
|
||||||
|
def test_name2unicode_u_with_5_digits():
|
||||||
|
"""The name "u1040C" has a single component, which is mapped to the string U+1040C"""
|
||||||
|
assert u'\U0001040C' == name2unicode('u1040C')
|
||||||
|
|
||||||
|
|
||||||
|
def test_name2unicode_u_with_5_digits_lowercase():
|
||||||
|
"""The name "u1040C" has a single component, which is mapped to the string U+1040C"""
|
||||||
|
assert u'\U0001040C' == name2unicode('u1040c')
|
||||||
|
|
||||||
|
|
||||||
|
def test_name2unicode_multiple_components():
|
||||||
|
"""The name "Lcommaaccent_uni20AC0308_u1040C.alternate" is mapped to the string U+013B U+20AC U+0308 U+1040C"""
|
||||||
|
assert u'\u013B\u20AC\u0308\U0001040C' == name2unicode('Lcommaaccent_uni20AC0308_u1040C.alternate')
|
||||||
|
|
||||||
|
|
||||||
|
def test_name2unicode_multiple_components_lowercase():
|
||||||
|
"""The name "Lcommaaccent_uni20AC0308_u1040C.alternate" is mapped to the string U+013B U+20AC U+0308 U+1040C"""
|
||||||
|
assert u'\u013B\u20AC\u0308\U0001040C' == name2unicode('Lcommaaccent_uni20ac0308_u1040c.alternate')
|
||||||
|
|
||||||
|
|
||||||
|
def test_name2unicode_foo():
|
||||||
|
"""The name 'foo' maps to an empty string, because 'foo' is not in AGL, and because it does not start with a 'u.'"""
|
||||||
|
assert_raises(KeyError, name2unicode, 'foo')
|
||||||
|
|
||||||
|
|
||||||
|
def test_name2unicode_notdef():
|
||||||
|
"""The name ".notdef" is reduced to an empty string (step 1) and mapped to an empty string (step 3)"""
|
||||||
|
assert_raises(KeyError, name2unicode, '.notdef')
|
||||||
|
|
||||||
|
|
||||||
|
def test_name2unicode_pua_ogoneksmall():
|
||||||
|
""""Ogoneksmall" and "uniF6FB" both map to the string that corresponds to U+F6FB."""
|
||||||
|
assert u'\uF6FB' == name2unicode('Ogoneksmall')
|
||||||
|
|
||||||
|
|
||||||
|
def test_name2unicode_overflow_error():
|
||||||
|
assert_raises(KeyError, name2unicode, '226215240241240240240240')
|
|
@ -3,6 +3,8 @@
|
||||||
import sys
|
import sys
|
||||||
import fileinput
|
import fileinput
|
||||||
|
|
||||||
|
import six #Python 2+3 compatibility
|
||||||
|
|
||||||
def main(argv):
|
def main(argv):
|
||||||
fonts = {}
|
fonts = {}
|
||||||
for line in fileinput.input():
|
for line in fileinput.input():
|
||||||
|
@ -33,7 +35,7 @@ def main(argv):
|
||||||
props[k] = tuple(map(float, f[1:5]))
|
props[k] = tuple(map(float, f[1:5]))
|
||||||
print ('# -*- python -*-')
|
print ('# -*- python -*-')
|
||||||
print ('FONT_METRICS = {')
|
print ('FONT_METRICS = {')
|
||||||
for (fontname,(props,chars)) in fonts.iteritems():
|
for (fontname,(props,chars)) in six.iteritems(fonts):
|
||||||
print (' %r: %r,' % (fontname, (props,chars)))
|
print (' %r: %r,' % (fontname, (props,chars)))
|
||||||
print ('}')
|
print ('}')
|
||||||
return 0
|
return 0
|
||||||
|
|
|
@ -26,6 +26,7 @@ from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
|
||||||
from pdfminer.converter import HTMLConverter, TextConverter
|
from pdfminer.converter import HTMLConverter, TextConverter
|
||||||
from pdfminer.layout import LAParams
|
from pdfminer.layout import LAParams
|
||||||
|
|
||||||
|
import six #Python 2+3 compatibility
|
||||||
|
|
||||||
# quote HTML metacharacters
|
# quote HTML metacharacters
|
||||||
def q(x):
|
def q(x):
|
||||||
|
@ -35,7 +36,7 @@ def q(x):
|
||||||
Q = re.compile(r'[^a-zA-Z0-9_.-=]')
|
Q = re.compile(r'[^a-zA-Z0-9_.-=]')
|
||||||
def url(base, **kw):
|
def url(base, **kw):
|
||||||
r = []
|
r = []
|
||||||
for (k,v) in kw.iteritems():
|
for (k,v) in six.iteritems(kw):
|
||||||
v = Q.sub(lambda m: '%%%02X' % ord(m.group(0)), encoder(q(v), 'replace')[0])
|
v = Q.sub(lambda m: '%%%02X' % ord(m.group(0)), encoder(q(v), 'replace')[0])
|
||||||
r.append('%s=%s' % (k, v))
|
r.append('%s=%s' % (k, v))
|
||||||
return base+'&'.join(r)
|
return base+'&'.join(r)
|
||||||
|
|
Loading…
Reference in New Issue