pdfminer.six/tests/test_pdfminer_psparser.py

151 lines
3.3 KiB
Python
Raw Normal View History

2014-09-03 11:17:41 +00:00
import logging
Enforce pep8 coding-style (#345) * Code Refractor: Use code-style enforcement #312 * Add flake8 to travis-ci * Remove python 2 3 comment on six library. 891 errors > 870 errors. * Remove class and functions comments that consist of just the name. 870 errors > 855 errors. * Fix flake8 errors in pdftypes.py. 855 errors > 833 errors. * Moving flake8 testing from .travis.yml to tox.ini to ensure local testing before commiting * Cleanup pdfinterp.py and add documentation from PDF Reference * Cleanup pdfpage.py * Cleanup pdffont.py * Clean psparser.py * Cleanup high_level.py * Cleanup layout.py * Cleanup pdfparser.py * Cleanup pdfcolor.py * Cleanup rijndael.py * Cleanup converter.py * Rename klass to cls if it is the class variable, to be more consistent with standard practice * Cleanup cmap.py * Cleanup pdfdevice.py * flake8 ignore fontmetrics.py * Cleanup test_pdfminer_psparser.py * Fix flake8 in pdfdocument.py; 339 errors to go * Fix flake8 utils.py; 326 errors togo * pep8 correction for few files in /tools/ 328 > 160 to go (#342) * pep8 correction for few files in /tools/ 328 > 160 to go * pep8 correction: 160 > 5 to go * Fix ascii85.py errors * Fix error in getting index from target that does not exists * Remove commented print lines * Fix flake8 error in pdfinterp.py * Fix python2 specific error by removing argument from print statement * Ignore invalid python2 syntax * Update contributing.md * Added changelog * Remove unused import Co-authored-by: Fakabbir Amin <f4amin@gmail.com>
2019-12-29 20:20:20 +00:00
from pdfminer.psparser import KWD, LIT, PSBaseParser, PSStackParser, PSEOF
Enforce pep8 coding-style (#345) * Code Refractor: Use code-style enforcement #312 * Add flake8 to travis-ci * Remove python 2 3 comment on six library. 891 errors > 870 errors. * Remove class and functions comments that consist of just the name. 870 errors > 855 errors. * Fix flake8 errors in pdftypes.py. 855 errors > 833 errors. * Moving flake8 testing from .travis.yml to tox.ini to ensure local testing before commiting * Cleanup pdfinterp.py and add documentation from PDF Reference * Cleanup pdfpage.py * Cleanup pdffont.py * Clean psparser.py * Cleanup high_level.py * Cleanup layout.py * Cleanup pdfparser.py * Cleanup pdfcolor.py * Cleanup rijndael.py * Cleanup converter.py * Rename klass to cls if it is the class variable, to be more consistent with standard practice * Cleanup cmap.py * Cleanup pdfdevice.py * flake8 ignore fontmetrics.py * Cleanup test_pdfminer_psparser.py * Fix flake8 in pdfdocument.py; 339 errors to go * Fix flake8 utils.py; 326 errors togo * pep8 correction for few files in /tools/ 328 > 160 to go (#342) * pep8 correction for few files in /tools/ 328 > 160 to go * pep8 correction: 160 > 5 to go * Fix ascii85.py errors * Fix error in getting index from target that does not exists * Remove commented print lines * Fix flake8 error in pdfinterp.py * Fix python2 specific error by removing argument from print statement * Ignore invalid python2 syntax * Update contributing.md * Added changelog * Remove unused import Co-authored-by: Fakabbir Amin <f4amin@gmail.com>
2019-12-29 20:20:20 +00:00
logger = logging.getLogger(__name__)
2014-09-03 11:17:41 +00:00
2014-09-03 11:17:41 +00:00
class TestPSBaseParser:
"""Simplistic Test cases"""
2014-09-03 11:17:41 +00:00
TESTDATA = rb"""%!PS
2014-09-03 11:17:41 +00:00
begin end
" @ #
/a/BCD /Some_Name /foo#5f#xbaa
0 +1 -2 .5 1.234
(abc) () (abc ( def ) ghi)
(def\040\0\0404ghi) (bach\\slask) (foo\nbaa)
(this % is not a comment.)
(foo
baa)
(foo\
baa)
<> <20> < 40 4020 >
<abcd00
12345>
func/a/b{(c)do*}def
[ 1 (z) ! ]
<< /foo (bar) >>
"""
2014-09-03 11:17:41 +00:00
TOKENS = [
(5, KWD(b"begin")),
(11, KWD(b"end")),
(16, KWD(b'"')),
(19, KWD(b"@")),
(21, KWD(b"#")),
(23, LIT("a")),
(25, LIT("BCD")),
(30, LIT("Some_Name")),
(41, LIT("foo_xbaa")),
(54, 0),
(56, 1),
(59, -2),
(62, 0.5),
(65, 1.234),
(71, b"abc"),
(77, b""),
(80, b"abc ( def ) ghi"),
(98, b"def \x00 4ghi"),
(118, b"bach\\slask"),
(132, b"foo\nbaa"),
(143, b"this % is not a comment."),
(170, b"foo\nbaa"),
(180, b"foobaa"),
(191, b""),
(194, b" "),
(199, b"@@ "),
(211, b"\xab\xcd\x00\x124\x05"),
(226, KWD(b"func")),
(230, LIT("a")),
(232, LIT("b")),
(234, KWD(b"{")),
(235, b"c"),
(238, KWD(b"do*")),
(241, KWD(b"}")),
(242, KWD(b"def")),
(246, KWD(b"[")),
(248, 1),
(250, b"z"),
(254, KWD(b"!")),
(256, KWD(b"]")),
(258, KWD(b"<<")),
(261, LIT("foo")),
(266, b"bar"),
(272, KWD(b">>")),
2014-09-03 11:17:41 +00:00
]
OBJS = [
(23, LIT("a")),
(25, LIT("BCD")),
(30, LIT("Some_Name")),
(41, LIT("foo_xbaa")),
(54, 0),
(56, 1),
(59, -2),
(62, 0.5),
(65, 1.234),
(71, b"abc"),
(77, b""),
(80, b"abc ( def ) ghi"),
(98, b"def \x00 4ghi"),
(118, b"bach\\slask"),
(132, b"foo\nbaa"),
(143, b"this % is not a comment."),
(170, b"foo\nbaa"),
(180, b"foobaa"),
(191, b""),
(194, b" "),
(199, b"@@ "),
(211, b"\xab\xcd\x00\x124\x05"),
(230, LIT("a")),
(232, LIT("b")),
(234, [b"c"]),
(246, [1, b"z"]),
(258, {"foo": b"bar"}),
2014-09-03 11:17:41 +00:00
]
def get_tokens(self, s):
from io import BytesIO
class MyParser(PSBaseParser):
def flush(self):
self.add_results(*self.popall())
2014-09-03 11:17:41 +00:00
parser = MyParser(BytesIO(s))
r = []
try:
while True:
r.append(parser.nexttoken())
except PSEOF:
pass
return r
def get_objects(self, s):
from io import BytesIO
class MyParser(PSStackParser):
def flush(self):
self.add_results(*self.popall())
2014-09-03 11:17:41 +00:00
parser = MyParser(BytesIO(s))
r = []
try:
while True:
r.append(parser.nextobject())
except PSEOF:
pass
return r
def test_1(self):
tokens = self.get_tokens(self.TESTDATA)
logger.info(tokens)
assert tokens == self.TOKENS
2014-09-03 11:17:41 +00:00
return
def test_2(self):
objs = self.get_objects(self.TESTDATA)
logger.info(objs)
assert objs == self.OBJS
2014-09-03 11:17:41 +00:00
return