Remove obsolete returns (#707)
* Remove obsolete returns * Update CHANGELOG.md * Remove empty lines * Remove more empty linespull/680/head
parent
2610ef13af
commit
b19f9e7270
|
@ -20,6 +20,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
|
|||
### Changed
|
||||
- Replace warnings.warn with logging.Logger.warning in line with [recommended use](https://docs.python.org/3/howto/logging.html#when-to-use-logging) ([#673](https://github.com/pdfminer/pdfminer.six/pull/673))
|
||||
|
||||
### Removed
|
||||
- Unnecessary return statements without argument at the end of functions ([#707](https://github.com/pdfminer/pdfminer.six/pull/707))
|
||||
|
||||
## [20211012]
|
||||
|
||||
### Added
|
||||
|
|
|
@ -20,7 +20,6 @@ class Arcfour:
|
|||
(s[i], s[j]) = (s[j], s[i])
|
||||
self.s = s
|
||||
(self.i, self.j) = (0, 0)
|
||||
return
|
||||
|
||||
def process(self, data: bytes) -> bytes:
|
||||
(i, j) = (self.i, self.j)
|
||||
|
|
|
@ -35,7 +35,6 @@ class BitParser:
|
|||
|
||||
def __init__(self) -> None:
|
||||
self._pos = 0
|
||||
return
|
||||
|
||||
@classmethod
|
||||
def add(cls, root: BitParserState, v: Union[int, str], bits: str) -> None:
|
||||
|
@ -53,13 +52,11 @@ class BitParser:
|
|||
b = 0
|
||||
assert b is not None
|
||||
p[b] = v
|
||||
return
|
||||
|
||||
def feedbytes(self, data: bytes) -> None:
|
||||
for byte in get_bytes(data):
|
||||
for m in (128, 64, 32, 16, 8, 4, 2, 1):
|
||||
self._parse_bit(byte & m)
|
||||
return
|
||||
|
||||
def _parse_bit(self, x: object) -> None:
|
||||
if x:
|
||||
|
@ -72,7 +69,6 @@ class BitParser:
|
|||
else:
|
||||
assert self._accept is not None
|
||||
self._state = self._accept(v)
|
||||
return
|
||||
|
||||
|
||||
class CCITTG4Parser(BitParser):
|
||||
|
|
|
@ -9,27 +9,27 @@ More information is available on the Adobe website:
|
|||
|
||||
"""
|
||||
|
||||
import sys
|
||||
import gzip
|
||||
import logging
|
||||
import os
|
||||
import os.path
|
||||
import gzip
|
||||
import pickle as pickle
|
||||
import struct
|
||||
import logging
|
||||
import sys
|
||||
from typing import (Any, BinaryIO, Dict, Iterable, Iterator, List,
|
||||
MutableMapping, Optional, TextIO, Tuple, Union, cast)
|
||||
from .psparser import PSStackParser
|
||||
from .psparser import PSSyntaxError
|
||||
|
||||
from .encodingdb import name2unicode
|
||||
from .psparser import KWD
|
||||
from .psparser import PSEOF
|
||||
from .psparser import PSKeyword
|
||||
from .psparser import PSLiteral
|
||||
from .psparser import PSStackParser
|
||||
from .psparser import PSSyntaxError
|
||||
from .psparser import literal_name
|
||||
from .psparser import KWD
|
||||
from .encodingdb import name2unicode
|
||||
from .utils import choplist
|
||||
from .utils import nunpack
|
||||
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
@ -43,24 +43,22 @@ class CMapBase:
|
|||
|
||||
def __init__(self, **kwargs: object) -> None:
|
||||
self.attrs: MutableMapping[str, object] = kwargs.copy()
|
||||
return
|
||||
|
||||
def is_vertical(self) -> bool:
|
||||
return self.attrs.get('WMode', 0) != 0
|
||||
|
||||
def set_attr(self, k: str, v: object) -> None:
|
||||
self.attrs[k] = v
|
||||
return
|
||||
|
||||
def add_code2cid(self, code: str, cid: int) -> None:
|
||||
return
|
||||
pass
|
||||
|
||||
def add_cid2unichr(self, cid: int, code: Union[PSLiteral, bytes, int]
|
||||
) -> None:
|
||||
return
|
||||
pass
|
||||
|
||||
def use_cmap(self, cmap: "CMapBase") -> None:
|
||||
return
|
||||
pass
|
||||
|
||||
def decode(self, code: bytes) -> Iterable[int]:
|
||||
raise NotImplementedError
|
||||
|
@ -71,7 +69,6 @@ class CMap(CMapBase):
|
|||
def __init__(self, **kwargs: Union[str, int]) -> None:
|
||||
CMapBase.__init__(self, **kwargs)
|
||||
self.code2cid: Dict[int, object] = {}
|
||||
return
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return '<CMap: %s>' % self.attrs.get('CMapName')
|
||||
|
@ -88,7 +85,6 @@ class CMap(CMapBase):
|
|||
else:
|
||||
dst[k] = v
|
||||
copy(self.code2cid, cmap.code2cid)
|
||||
return
|
||||
|
||||
def decode(self, code: bytes) -> Iterator[int]:
|
||||
log.debug('decode: %r, %r', self, code)
|
||||
|
@ -103,7 +99,6 @@ class CMap(CMapBase):
|
|||
d = cast(Dict[int, object], x)
|
||||
else:
|
||||
d = self.code2cid
|
||||
return
|
||||
|
||||
def dump(self, out: TextIO = sys.stdout,
|
||||
code2cid: Optional[Dict[int, object]] = None,
|
||||
|
@ -117,7 +112,6 @@ class CMap(CMapBase):
|
|||
out.write('code %r = cid %d\n' % (c, v))
|
||||
else:
|
||||
self.dump(out=out, code2cid=cast(Dict[int, object], v), code=c)
|
||||
return
|
||||
|
||||
|
||||
class IdentityCMap(CMapBase):
|
||||
|
@ -145,7 +139,6 @@ class UnicodeMap(CMapBase):
|
|||
def __init__(self, **kwargs: Union[str, int]) -> None:
|
||||
CMapBase.__init__(self, **kwargs)
|
||||
self.cid2unichr: Dict[int, str] = {}
|
||||
return
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return '<UnicodeMap: %s>' % self.attrs.get('CMapName')
|
||||
|
@ -157,7 +150,6 @@ class UnicodeMap(CMapBase):
|
|||
def dump(self, out: TextIO = sys.stdout) -> None:
|
||||
for (k, v) in sorted(self.cid2unichr.items()):
|
||||
out.write('cid %d = unicode %r\n' % (k, v))
|
||||
return
|
||||
|
||||
|
||||
class IdentityUnicodeMap(UnicodeMap):
|
||||
|
@ -183,7 +175,6 @@ class FileCMap(CMap):
|
|||
d = t
|
||||
ci = ord(code[-1])
|
||||
d[ci] = cid
|
||||
return
|
||||
|
||||
|
||||
class FileUnicodeMap(UnicodeMap):
|
||||
|
@ -202,7 +193,6 @@ class FileUnicodeMap(UnicodeMap):
|
|||
self.cid2unichr[cid] = chr(code)
|
||||
else:
|
||||
raise TypeError(code)
|
||||
return
|
||||
|
||||
|
||||
class PyCMap(CMap):
|
||||
|
@ -212,7 +202,6 @@ class PyCMap(CMap):
|
|||
self.code2cid = module.CODE2CID
|
||||
if module.IS_VERTICAL:
|
||||
self.attrs['WMode'] = 1
|
||||
return
|
||||
|
||||
|
||||
class PyUnicodeMap(UnicodeMap):
|
||||
|
@ -224,7 +213,6 @@ class PyUnicodeMap(UnicodeMap):
|
|||
self.attrs['WMode'] = 1
|
||||
else:
|
||||
self.cid2unichr = module.CID2UNICHR_H
|
||||
return
|
||||
|
||||
|
||||
class CMapDB:
|
||||
|
|
|
@ -1,11 +1,12 @@
|
|||
import io
|
||||
import logging
|
||||
from pdfminer.pdfcolor import PDFColorSpace
|
||||
import re
|
||||
from typing import (BinaryIO, Dict, Generic, List, Optional, Sequence, TextIO,
|
||||
Tuple, TypeVar, Union, cast)
|
||||
import re
|
||||
|
||||
from pdfminer.pdfcolor import PDFColorSpace
|
||||
from . import utils
|
||||
from .image import ImageWriter
|
||||
from .layout import LAParams, LTComponent, TextGroupElement
|
||||
from .layout import LTChar
|
||||
from .layout import LTContainer
|
||||
|
@ -33,7 +34,6 @@ from .utils import apply_matrix_pt
|
|||
from .utils import bbox2str
|
||||
from .utils import enc
|
||||
from .utils import mult_matrix
|
||||
from .image import ImageWriter
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
@ -52,7 +52,6 @@ class PDFLayoutAnalyzer(PDFTextDevice):
|
|||
self.pageno = pageno
|
||||
self.laparams = laparams
|
||||
self._stack: List[LTLayoutContainer] = []
|
||||
return
|
||||
|
||||
def begin_page(self, page: PDFPage, ctm: Matrix) -> None:
|
||||
(x0, y0, x1, y1) = page.mediabox
|
||||
|
@ -60,7 +59,6 @@ class PDFLayoutAnalyzer(PDFTextDevice):
|
|||
(x1, y1) = apply_matrix_pt(ctm, (x1, y1))
|
||||
mediabox = (0, 0, abs(x0-x1), abs(y0-y1))
|
||||
self.cur_item = LTPage(self.pageno, mediabox)
|
||||
return
|
||||
|
||||
def end_page(self, page: PDFPage) -> None:
|
||||
assert not self._stack, str(len(self._stack))
|
||||
|
@ -69,19 +67,16 @@ class PDFLayoutAnalyzer(PDFTextDevice):
|
|||
self.cur_item.analyze(self.laparams)
|
||||
self.pageno += 1
|
||||
self.receive_layout(self.cur_item)
|
||||
return
|
||||
|
||||
def begin_figure(self, name: str, bbox: Rect, matrix: Matrix) -> None:
|
||||
self._stack.append(self.cur_item)
|
||||
self.cur_item = LTFigure(name, bbox, mult_matrix(matrix, self.ctm))
|
||||
return
|
||||
|
||||
def end_figure(self, _: str) -> None:
|
||||
fig = self.cur_item
|
||||
assert isinstance(self.cur_item, LTFigure), str(type(self.cur_item))
|
||||
self.cur_item = self._stack.pop()
|
||||
self.cur_item.add(fig)
|
||||
return
|
||||
|
||||
def render_image(self, name: str, stream: PDFStream) -> None:
|
||||
assert isinstance(self.cur_item, LTFigure), str(type(self.cur_item))
|
||||
|
@ -89,7 +84,6 @@ class PDFLayoutAnalyzer(PDFTextDevice):
|
|||
(self.cur_item.x0, self.cur_item.y0,
|
||||
self.cur_item.x1, self.cur_item.y1))
|
||||
self.cur_item.add(item)
|
||||
return
|
||||
|
||||
def paint_path(
|
||||
self,
|
||||
|
@ -178,7 +172,7 @@ class PDFLayoutAnalyzer(PDFTextDevice):
|
|||
return '(cid:%d)' % cid
|
||||
|
||||
def receive_layout(self, ltpage: LTPage) -> None:
|
||||
return
|
||||
pass
|
||||
|
||||
|
||||
class PDFPageAggregator(PDFLayoutAnalyzer):
|
||||
|
@ -191,11 +185,9 @@ class PDFPageAggregator(PDFLayoutAnalyzer):
|
|||
PDFLayoutAnalyzer.__init__(self, rsrcmgr, pageno=pageno,
|
||||
laparams=laparams)
|
||||
self.result: Optional[LTPage] = None
|
||||
return
|
||||
|
||||
def receive_layout(self, ltpage: LTPage) -> None:
|
||||
self.result = ltpage
|
||||
return
|
||||
|
||||
def get_result(self) -> LTPage:
|
||||
assert self.result is not None
|
||||
|
@ -254,7 +246,6 @@ class TextConverter(PDFConverter[AnyIO]):
|
|||
laparams=laparams)
|
||||
self.showpageno = showpageno
|
||||
self.imagewriter = imagewriter
|
||||
return
|
||||
|
||||
def write_text(self, text: str) -> None:
|
||||
text = utils.compatible_encode_method(text, self.codec, 'ignore')
|
||||
|
@ -262,7 +253,6 @@ class TextConverter(PDFConverter[AnyIO]):
|
|||
cast(BinaryIO, self.outfp).write(text.encode())
|
||||
else:
|
||||
cast(TextIO, self.outfp).write(text)
|
||||
return
|
||||
|
||||
def receive_layout(self, ltpage: LTPage) -> None:
|
||||
def render(item: LTItem) -> None:
|
||||
|
@ -280,7 +270,6 @@ class TextConverter(PDFConverter[AnyIO]):
|
|||
self.write_text('Page %s\n' % ltpage.pageid)
|
||||
render(ltpage)
|
||||
self.write_text('\f')
|
||||
return
|
||||
|
||||
# Some dummy functions to save memory/CPU when all that is wanted
|
||||
# is text. This stops all the image and drawing output from being
|
||||
|
|
|
@ -58,12 +58,10 @@ class BMPWriter:
|
|||
self.fp.write(struct.pack('BBBx', i, i, i))
|
||||
self.pos0 = self.fp.tell()
|
||||
self.pos1 = self.pos0 + self.datasize
|
||||
return
|
||||
|
||||
def write_line(self, y: int, data: bytes) -> None:
|
||||
self.fp.seek(self.pos1 - (y+1)*self.linesize)
|
||||
self.fp.write(data)
|
||||
return
|
||||
|
||||
|
||||
class ImageWriter:
|
||||
|
@ -76,7 +74,6 @@ class ImageWriter:
|
|||
self.outdir = outdir
|
||||
if not os.path.exists(self.outdir):
|
||||
os.makedirs(self.outdir)
|
||||
return
|
||||
|
||||
def export_image(self, image: LTImage) -> str:
|
||||
(width, height) = image.srcsize
|
||||
|
|
|
@ -3,6 +3,11 @@ import logging
|
|||
from typing import (Dict, Generic, Iterable, Iterator, List, Optional,
|
||||
Sequence, Set, Tuple, TypeVar, Union, cast)
|
||||
|
||||
from .pdfcolor import PDFColorSpace
|
||||
from .pdffont import PDFFont
|
||||
from .pdfinterp import Color
|
||||
from .pdfinterp import PDFGraphicState
|
||||
from .pdftypes import PDFStream
|
||||
from .utils import INF
|
||||
from .utils import LTComponentT
|
||||
from .utils import Matrix
|
||||
|
@ -15,11 +20,6 @@ from .utils import fsplit
|
|||
from .utils import get_bound
|
||||
from .utils import matrix2str
|
||||
from .utils import uniq
|
||||
from .pdfcolor import PDFColorSpace
|
||||
from .pdftypes import PDFStream
|
||||
from .pdfinterp import Color
|
||||
from .pdfinterp import PDFGraphicState
|
||||
from .pdffont import PDFFont
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -28,7 +28,6 @@ class IndexAssigner:
|
|||
|
||||
def __init__(self, index: int = 0) -> None:
|
||||
self.index = index
|
||||
return
|
||||
|
||||
def run(self, obj: "LTItem") -> None:
|
||||
if isinstance(obj, LTTextBox):
|
||||
|
@ -37,7 +36,6 @@ class IndexAssigner:
|
|||
elif isinstance(obj, LTTextGroup):
|
||||
for x in obj:
|
||||
self.run(x)
|
||||
return
|
||||
|
||||
|
||||
class LAParams:
|
||||
|
@ -87,7 +85,6 @@ class LAParams:
|
|||
self.all_texts = all_texts
|
||||
|
||||
self._validate()
|
||||
return
|
||||
|
||||
def _validate(self) -> None:
|
||||
if self.boxes_flow is not None:
|
||||
|
@ -111,7 +108,7 @@ class LTItem:
|
|||
|
||||
def analyze(self, laparams: LAParams) -> None:
|
||||
"""Perform the layout analysis."""
|
||||
return
|
||||
pass
|
||||
|
||||
|
||||
class LTText:
|
||||
|
@ -132,7 +129,6 @@ class LTComponent(LTItem):
|
|||
def __init__(self, bbox: Rect) -> None:
|
||||
LTItem.__init__(self)
|
||||
self.set_bbox(bbox)
|
||||
return
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return ('<%s %s>' %
|
||||
|
@ -160,7 +156,6 @@ class LTComponent(LTItem):
|
|||
self.width = x1-x0
|
||||
self.height = y1-y0
|
||||
self.bbox = bbox
|
||||
return
|
||||
|
||||
def is_empty(self) -> bool:
|
||||
return self.width <= 0 or self.height <= 0
|
||||
|
@ -223,7 +218,6 @@ class LTCurve(LTComponent):
|
|||
self.evenodd = evenodd
|
||||
self.stroking_color = stroking_color
|
||||
self.non_stroking_color = non_stroking_color
|
||||
return
|
||||
|
||||
def get_pts(self) -> str:
|
||||
return ','.join('%.3f,%.3f' % p for p in self.pts)
|
||||
|
@ -248,7 +242,6 @@ class LTLine(LTCurve):
|
|||
) -> None:
|
||||
LTCurve.__init__(self, linewidth, [p0, p1], stroke, fill, evenodd,
|
||||
stroking_color, non_stroking_color)
|
||||
return
|
||||
|
||||
|
||||
class LTRect(LTCurve):
|
||||
|
@ -271,7 +264,6 @@ class LTRect(LTCurve):
|
|||
LTCurve.__init__(self, linewidth,
|
||||
[(x0, y0), (x1, y0), (x1, y1), (x0, y1)], stroke,
|
||||
fill, evenodd, stroking_color, non_stroking_color)
|
||||
return
|
||||
|
||||
|
||||
class LTImage(LTComponent):
|
||||
|
@ -291,7 +283,6 @@ class LTImage(LTComponent):
|
|||
self.colorspace = stream.get_any(('CS', 'ColorSpace'))
|
||||
if not isinstance(self.colorspace, list):
|
||||
self.colorspace = [self.colorspace]
|
||||
return
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return ('<%s(%s) %s %r>' %
|
||||
|
|
|
@ -1,8 +1,7 @@
|
|||
from io import BytesIO
|
||||
import logging
|
||||
from io import BytesIO
|
||||
from typing import BinaryIO, Iterator, List, Optional, cast
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
@ -20,7 +19,6 @@ class LZWDecoder:
|
|||
# NB: self.table stores None only in indices 256 and 257
|
||||
self.table: Optional[List[Optional[bytes]]] = None
|
||||
self.prevbuf: Optional[bytes] = None
|
||||
return
|
||||
|
||||
def readbits(self, bits: int) -> int:
|
||||
v = 0
|
||||
|
@ -95,7 +93,6 @@ class LZWDecoder:
|
|||
assert self.table is not None
|
||||
logger.debug('nbits=%d, code=%d, output=%r, table=%r'
|
||||
% (self.nbits, code, x, self.table[258:]))
|
||||
return
|
||||
|
||||
|
||||
def lzwdecode(data: bytes) -> bytes:
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
import collections
|
||||
from typing import Dict
|
||||
from .psparser import LIT
|
||||
|
||||
from .psparser import LIT
|
||||
|
||||
LITERAL_DEVICE_GRAY = LIT('DeviceGray')
|
||||
LITERAL_DEVICE_RGB = LIT('DeviceRGB')
|
||||
|
@ -13,7 +13,6 @@ class PDFColorSpace:
|
|||
def __init__(self, name: str, ncomponents: int) -> None:
|
||||
self.name = name
|
||||
self.ncomponents = ncomponents
|
||||
return
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return '<PDFColorSpace: %s, ncomponents=%d>' % \
|
||||
|
|
|
@ -1,13 +1,14 @@
|
|||
from pdfminer.psparser import PSLiteral
|
||||
from typing import (BinaryIO, Iterable, List, Optional, Sequence,
|
||||
TYPE_CHECKING, Union, cast)
|
||||
|
||||
from pdfminer.psparser import PSLiteral
|
||||
from . import utils
|
||||
from .utils import Matrix, Point, Rect, PathSegment
|
||||
from .pdfcolor import PDFColorSpace
|
||||
from .pdffont import PDFFont
|
||||
from .pdffont import PDFUnicodeNotDefined
|
||||
from .pdfpage import PDFPage
|
||||
from .pdftypes import PDFStream
|
||||
from .utils import Matrix, Point, Rect, PathSegment
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .pdfinterp import PDFGraphicState
|
||||
|
@ -26,7 +27,6 @@ class PDFDevice:
|
|||
def __init__(self, rsrcmgr: "PDFResourceManager") -> None:
|
||||
self.rsrcmgr = rsrcmgr
|
||||
self.ctm: Optional[Matrix] = None
|
||||
return
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return '<PDFDevice>'
|
||||
|
@ -43,40 +43,39 @@ class PDFDevice:
|
|||
self.close()
|
||||
|
||||
def close(self) -> None:
|
||||
return
|
||||
pass
|
||||
|
||||
def set_ctm(self, ctm: Matrix) -> None:
|
||||
self.ctm = ctm
|
||||
return
|
||||
|
||||
def begin_tag(
|
||||
self,
|
||||
tag: PSLiteral,
|
||||
props: Optional["PDFStackT"] = None
|
||||
) -> None:
|
||||
return
|
||||
pass
|
||||
|
||||
def end_tag(self) -> None:
|
||||
return
|
||||
pass
|
||||
|
||||
def do_tag(
|
||||
self,
|
||||
tag: PSLiteral,
|
||||
props: Optional["PDFStackT"] = None
|
||||
) -> None:
|
||||
return
|
||||
pass
|
||||
|
||||
def begin_page(self, page: PDFPage, ctm: Matrix) -> None:
|
||||
return
|
||||
pass
|
||||
|
||||
def end_page(self, page: PDFPage) -> None:
|
||||
return
|
||||
pass
|
||||
|
||||
def begin_figure(self, name: str, bbox: Rect, matrix: Matrix) -> None:
|
||||
return
|
||||
pass
|
||||
|
||||
def end_figure(self, name: str) -> None:
|
||||
return
|
||||
pass
|
||||
|
||||
def paint_path(
|
||||
self,
|
||||
|
@ -86,10 +85,10 @@ class PDFDevice:
|
|||
evenodd: bool,
|
||||
path: Sequence[PathSegment]
|
||||
) -> None:
|
||||
return
|
||||
pass
|
||||
|
||||
def render_image(self, name: str, stream: PDFStream) -> None:
|
||||
return
|
||||
pass
|
||||
|
||||
def render_string(
|
||||
self,
|
||||
|
@ -98,7 +97,7 @@ class PDFDevice:
|
|||
ncs: PDFColorSpace,
|
||||
graphicstate: "PDFGraphicState"
|
||||
) -> None:
|
||||
return
|
||||
pass
|
||||
|
||||
|
||||
class PDFTextDevice(PDFDevice):
|
||||
|
@ -132,7 +131,6 @@ class PDFTextDevice(PDFDevice):
|
|||
seq, matrix, textstate.linematrix, font, fontsize,
|
||||
scaling, charspace, wordspace, rise, dxscale, ncs,
|
||||
graphicstate)
|
||||
return
|
||||
|
||||
def render_string_horizontal(
|
||||
self,
|
||||
|
@ -227,7 +225,6 @@ class TagExtractor(PDFDevice):
|
|||
self.codec = codec
|
||||
self.pageno = 0
|
||||
self._stack: List[PSLiteral] = []
|
||||
return
|
||||
|
||||
def render_string(
|
||||
self,
|
||||
|
@ -252,7 +249,6 @@ class TagExtractor(PDFDevice):
|
|||
except PDFUnicodeNotDefined:
|
||||
pass
|
||||
self._write(utils.enc(text))
|
||||
return
|
||||
|
||||
def begin_page(self, page: PDFPage, ctm: Matrix) -> None:
|
||||
output = '<page id="%s" bbox="%s" rotate="%d">' %\
|
||||
|
|
|
@ -100,7 +100,6 @@ class PDFXRef(PDFBaseXRef):
|
|||
def __init__(self) -> None:
|
||||
self.offsets: Dict[int, Tuple[Optional[int], int, int]] = {}
|
||||
self.trailer: Dict[str, Any] = {}
|
||||
return
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return '<PDFXRef: offsets=%r>' % (self.offsets.keys())
|
||||
|
@ -145,7 +144,6 @@ class PDFXRef(PDFBaseXRef):
|
|||
self.offsets[objid] = (None, int(pos_b), int(genno_b))
|
||||
log.info('xref objects: %r', self.offsets)
|
||||
self.load_trailer(parser)
|
||||
return
|
||||
|
||||
def load_trailer(self, parser: PDFParser) -> None:
|
||||
try:
|
||||
|
@ -159,7 +157,6 @@ class PDFXRef(PDFBaseXRef):
|
|||
(_, dic) = x[0]
|
||||
self.trailer.update(dict_value(dic))
|
||||
log.debug('trailer=%r', self.trailer)
|
||||
return
|
||||
|
||||
def get_trailer(self) -> Dict[str, Any]:
|
||||
return self.trailer
|
||||
|
@ -225,7 +222,6 @@ class PDFXRefFallback(PDFXRef):
|
|||
for index in range(n):
|
||||
objid1 = objs[index*2]
|
||||
self.offsets[objid1] = (objid, index, 0)
|
||||
return
|
||||
|
||||
|
||||
class PDFXRefStream(PDFBaseXRef):
|
||||
|
@ -237,7 +233,6 @@ class PDFXRefStream(PDFBaseXRef):
|
|||
self.fl2: Optional[int] = None
|
||||
self.fl3: Optional[int] = None
|
||||
self.ranges: List[Tuple[int, int]] = []
|
||||
return
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return '<PDFXRefStream: ranges=%r>' % (self.ranges)
|
||||
|
|
|
@ -1,43 +1,43 @@
|
|||
import re
|
||||
import logging
|
||||
from typing import Dict, List, Mapping, Optional, Sequence, Tuple, Union, cast
|
||||
import re
|
||||
from io import BytesIO
|
||||
from .cmapdb import CMapDB
|
||||
from typing import Dict, List, Mapping, Optional, Sequence, Tuple, Union, cast
|
||||
|
||||
from . import settings
|
||||
from .cmapdb import CMap
|
||||
from .cmapdb import CMapBase
|
||||
from .psparser import PSLiteral, PSTypeError
|
||||
from .psparser import PSStackType
|
||||
from .psparser import PSEOF
|
||||
from .psparser import PSKeyword
|
||||
from .psparser import literal_name
|
||||
from .psparser import keyword_name
|
||||
from .psparser import PSStackParser
|
||||
from .psparser import LIT
|
||||
from .psparser import KWD
|
||||
from . import settings
|
||||
from .pdfdevice import PDFDevice
|
||||
from .pdfdevice import PDFTextSeq
|
||||
from .pdfpage import PDFPage
|
||||
from .pdftypes import PDFException
|
||||
from .pdftypes import PDFStream
|
||||
from .pdftypes import PDFObjRef
|
||||
from .pdftypes import resolve1
|
||||
from .pdftypes import list_value
|
||||
from .pdftypes import dict_value
|
||||
from .pdftypes import stream_value
|
||||
from .pdffont import PDFFont
|
||||
from .pdffont import PDFFontError
|
||||
from .pdffont import PDFType1Font
|
||||
from .pdffont import PDFTrueTypeFont
|
||||
from .pdffont import PDFType3Font
|
||||
from .pdffont import PDFCIDFont
|
||||
from .cmapdb import CMapDB
|
||||
from .pdfcolor import PDFColorSpace
|
||||
from .pdfcolor import PREDEFINED_COLORSPACE
|
||||
from .pdfdevice import PDFDevice
|
||||
from .pdfdevice import PDFTextSeq
|
||||
from .pdffont import PDFCIDFont
|
||||
from .pdffont import PDFFont
|
||||
from .pdffont import PDFFontError
|
||||
from .pdffont import PDFTrueTypeFont
|
||||
from .pdffont import PDFType1Font
|
||||
from .pdffont import PDFType3Font
|
||||
from .pdfpage import PDFPage
|
||||
from .pdftypes import PDFException
|
||||
from .pdftypes import PDFObjRef
|
||||
from .pdftypes import PDFStream
|
||||
from .pdftypes import dict_value
|
||||
from .pdftypes import list_value
|
||||
from .pdftypes import resolve1
|
||||
from .pdftypes import stream_value
|
||||
from .psparser import KWD
|
||||
from .psparser import LIT
|
||||
from .psparser import PSEOF
|
||||
from .psparser import PSKeyword
|
||||
from .psparser import PSLiteral, PSTypeError
|
||||
from .psparser import PSStackParser
|
||||
from .psparser import PSStackType
|
||||
from .psparser import keyword_name
|
||||
from .psparser import literal_name
|
||||
from .utils import MATRIX_IDENTITY
|
||||
from .utils import Matrix, Point, PathSegment, Rect
|
||||
from .utils import choplist
|
||||
from .utils import mult_matrix
|
||||
from .utils import MATRIX_IDENTITY
|
||||
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
@ -73,7 +73,6 @@ class PDFTextState:
|
|||
self.reset()
|
||||
# self.matrix is set
|
||||
# self.linematrix is set
|
||||
return
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return '<PDFTextState: font=%r, fontsize=%r, charspace=%r, ' \
|
||||
|
@ -100,7 +99,6 @@ class PDFTextState:
|
|||
def reset(self) -> None:
|
||||
self.matrix = MATRIX_IDENTITY
|
||||
self.linematrix = (0, 0)
|
||||
return
|
||||
|
||||
|
||||
Color = Union[
|
||||
|
@ -125,7 +123,6 @@ class PDFGraphicState:
|
|||
|
||||
# non stroking color
|
||||
self.ncolor: Optional[Color] = None
|
||||
return
|
||||
|
||||
def copy(self) -> "PDFGraphicState":
|
||||
obj = PDFGraphicState()
|
||||
|
@ -160,7 +157,6 @@ class PDFResourceManager:
|
|||
def __init__(self, caching: bool = True) -> None:
|
||||
self.caching = caching
|
||||
self._cached_fonts: Dict[object, PDFFont] = {}
|
||||
return
|
||||
|
||||
def get_procset(self, procs: Sequence[object]) -> None:
|
||||
for proc in procs:
|
||||
|
@ -170,7 +166,6 @@ class PDFResourceManager:
|
|||
pass
|
||||
else:
|
||||
pass
|
||||
return
|
||||
|
||||
def get_cmap(self, cmapname: str, strict: bool = False) -> CMapBase:
|
||||
try:
|
||||
|
@ -234,7 +229,6 @@ class PDFContentParser(PSStackParser[Union[PSKeyword, PDFStream]]):
|
|||
# all the methods that would attempt to access self.fp without first
|
||||
# calling self.fillfp().
|
||||
PSStackParser.__init__(self, None) # type: ignore[arg-type]
|
||||
return
|
||||
|
||||
def fillfp(self) -> None:
|
||||
if not self.fp:
|
||||
|
@ -244,12 +238,10 @@ class PDFContentParser(PSStackParser[Union[PSKeyword, PDFStream]]):
|
|||
else:
|
||||
raise PSEOF('Unexpected EOF, file truncated?')
|
||||
self.fp = BytesIO(strm.get_data())
|
||||
return
|
||||
|
||||
def seek(self, pos: int) -> None:
|
||||
self.fillfp()
|
||||
PSStackParser.seek(self, pos)
|
||||
return
|
||||
|
||||
def fillbuf(self) -> None:
|
||||
if self.charpos < len(self.buf):
|
||||
|
@ -262,7 +254,6 @@ class PDFContentParser(PSStackParser[Union[PSKeyword, PDFStream]]):
|
|||
break
|
||||
self.fp = None # type: ignore[assignment]
|
||||
self.charpos = 0
|
||||
return
|
||||
|
||||
def get_inline_data(
|
||||
self,
|
||||
|
@ -300,7 +291,6 @@ class PDFContentParser(PSStackParser[Union[PSKeyword, PDFStream]]):
|
|||
|
||||
def flush(self) -> None:
|
||||
self.add_results(*self.popall())
|
||||
return
|
||||
|
||||
KEYWORD_BI = KWD(b'BI')
|
||||
KEYWORD_ID = KWD(b'ID')
|
||||
|
@ -327,7 +317,6 @@ class PDFContentParser(PSStackParser[Union[PSKeyword, PDFStream]]):
|
|||
raise
|
||||
else:
|
||||
self.push((pos, token))
|
||||
return
|
||||
|
||||
|
||||
PDFStackT = PSStackType[PDFStream]
|
||||
|
|
|
@ -1,16 +1,16 @@
|
|||
import logging
|
||||
from pdfminer.utils import Rect
|
||||
from typing import BinaryIO, Container, Dict, Iterator, List, Optional, Tuple
|
||||
|
||||
from pdfminer.utils import Rect
|
||||
from . import settings
|
||||
from .psparser import LIT
|
||||
from .pdfdocument import PDFDocument, PDFTextExtractionNotAllowed
|
||||
from .pdfparser import PDFParser
|
||||
from .pdftypes import PDFObjectNotFound
|
||||
from .pdftypes import resolve1
|
||||
from .pdftypes import dict_value
|
||||
from .pdftypes import int_value
|
||||
from .pdftypes import list_value
|
||||
from .pdftypes import dict_value
|
||||
from .pdfparser import PDFParser
|
||||
from .pdfdocument import PDFDocument, PDFTextExtractionNotAllowed
|
||||
|
||||
from .pdftypes import resolve1
|
||||
from .psparser import LIT
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
@ -73,7 +73,6 @@ class PDFPage:
|
|||
if not isinstance(contents, list):
|
||||
contents = [contents]
|
||||
self.contents: List[object] = contents
|
||||
return
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return '<PDFPage: Resources={!r}, MediaBox={!r}>'\
|
||||
|
|
|
@ -1,17 +1,18 @@
|
|||
import logging
|
||||
from io import BytesIO
|
||||
from typing import BinaryIO, TYPE_CHECKING, Optional, Union
|
||||
from .psparser import PSStackParser
|
||||
from .psparser import PSKeyword
|
||||
from .psparser import PSSyntaxError
|
||||
from .psparser import PSEOF
|
||||
from .psparser import KWD
|
||||
|
||||
from . import settings
|
||||
from .pdftypes import PDFException
|
||||
from .pdftypes import PDFStream
|
||||
from .pdftypes import PDFObjRef
|
||||
from .pdftypes import int_value
|
||||
from .pdftypes import PDFStream
|
||||
from .pdftypes import dict_value
|
||||
from .pdftypes import int_value
|
||||
from .psparser import KWD
|
||||
from .psparser import PSEOF
|
||||
from .psparser import PSKeyword
|
||||
from .psparser import PSStackParser
|
||||
from .psparser import PSSyntaxError
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .pdfdocument import PDFDocument
|
||||
|
@ -45,12 +46,10 @@ class PDFParser(PSStackParser[Union[PSKeyword, PDFStream, PDFObjRef, None]]):
|
|||
PSStackParser.__init__(self, fp)
|
||||
self.doc: Optional["PDFDocument"] = None
|
||||
self.fallback = False
|
||||
return
|
||||
|
||||
def set_document(self, doc: "PDFDocument") -> None:
|
||||
"""Associates the parser with a PDFDocument object."""
|
||||
self.doc = doc
|
||||
return
|
||||
|
||||
KEYWORD_R = KWD(b'R')
|
||||
KEYWORD_NULL = KWD(b'null')
|
||||
|
@ -134,8 +133,6 @@ class PDFParser(PSStackParser[Union[PSKeyword, PDFStream, PDFObjRef, None]]):
|
|||
# others
|
||||
self.push((pos, token))
|
||||
|
||||
return
|
||||
|
||||
|
||||
class PDFStreamParser(PDFParser):
|
||||
"""
|
||||
|
@ -148,11 +145,9 @@ class PDFStreamParser(PDFParser):
|
|||
|
||||
def __init__(self, data: bytes) -> None:
|
||||
PDFParser.__init__(self, BytesIO(data))
|
||||
return
|
||||
|
||||
def flush(self) -> None:
|
||||
self.add_results(*self.popall())
|
||||
return
|
||||
|
||||
KEYWORD_OBJ = KWD(b'obj')
|
||||
|
||||
|
@ -176,4 +171,3 @@ class PDFStreamParser(PDFParser):
|
|||
return
|
||||
# others
|
||||
self.push((pos, token))
|
||||
return
|
||||
|
|
|
@ -1,25 +1,24 @@
|
|||
import zlib
|
||||
import logging
|
||||
import io
|
||||
import logging
|
||||
import sys
|
||||
import zlib
|
||||
from typing import (TYPE_CHECKING, Any, Dict, Iterable, Optional, Union, List,
|
||||
Tuple, cast)
|
||||
|
||||
from .lzw import lzwdecode
|
||||
from . import settings
|
||||
from .ascii85 import ascii85decode
|
||||
from .ascii85 import asciihexdecode
|
||||
from .runlength import rldecode
|
||||
from .ccitt import ccittfaxdecode
|
||||
from .lzw import lzwdecode
|
||||
from .psparser import LIT
|
||||
from .psparser import PSException
|
||||
from .psparser import PSObject
|
||||
from .psparser import LIT
|
||||
from . import settings
|
||||
from .runlength import rldecode
|
||||
from .utils import apply_png_predictor
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .pdfdocument import PDFDocument
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
LITERAL_CRYPT = LIT('Crypt')
|
||||
|
@ -88,7 +87,6 @@ class PDFObjRef(PDFObject):
|
|||
raise PDFValueError('PDF object id cannot be 0.')
|
||||
self.doc = doc
|
||||
self.objid = objid
|
||||
return
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return '<PDFObjRef:%d>' % (self.objid)
|
||||
|
@ -255,12 +253,10 @@ class PDFStream(PDFObject):
|
|||
self.data: Optional[bytes] = None
|
||||
self.objid: Optional[int] = None
|
||||
self.genno: Optional[int] = None
|
||||
return
|
||||
|
||||
def set_objid(self, objid: int, genno: int) -> None:
|
||||
self.objid = objid
|
||||
self.genno = genno
|
||||
return
|
||||
|
||||
def __repr__(self) -> str:
|
||||
if self.data is None:
|
||||
|
|
|
@ -2,8 +2,8 @@
|
|||
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import re
|
||||
import logging
|
||||
import re
|
||||
from typing import (Any, BinaryIO, Dict, Generic, Iterator, List,
|
||||
Optional, Tuple, Type, TypeVar, Union)
|
||||
|
||||
|
@ -76,7 +76,6 @@ class PSKeyword(PSObject):
|
|||
|
||||
def __init__(self, name: bytes) -> None:
|
||||
self.name = name
|
||||
return
|
||||
|
||||
def __repr__(self) -> str:
|
||||
name = self.name
|
||||
|
@ -95,7 +94,6 @@ class PSSymbolTable(Generic[_SymbolT]):
|
|||
def __init__(self, klass: Type[_SymbolT]) -> None:
|
||||
self.dict: Dict[PSLiteral.NameType, _SymbolT] = {}
|
||||
self.klass: Type[_SymbolT] = klass
|
||||
return
|
||||
|
||||
def intern(self, name: PSLiteral.NameType) -> _SymbolT:
|
||||
if name in self.dict:
|
||||
|
@ -182,7 +180,6 @@ class PSBaseParser:
|
|||
def __init__(self, fp: BinaryIO) -> None:
|
||||
self.fp = fp
|
||||
self.seek(0)
|
||||
return
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return '<%s: %r, bufpos=%d>' % (self.__class__.__name__, self.fp,
|
||||
|
|
|
@ -4,10 +4,10 @@ Miscellaneous Routines.
|
|||
import io
|
||||
import pathlib
|
||||
import struct
|
||||
from html import escape
|
||||
from typing import (Any, BinaryIO, Callable, Dict, Generic, Iterable, Iterator,
|
||||
List, Optional, Set, TextIO, Tuple, TypeVar, Union,
|
||||
TYPE_CHECKING, cast)
|
||||
from html import escape
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .layout import LTComponent
|
||||
|
@ -57,7 +57,6 @@ class open_filename(object):
|
|||
) -> None:
|
||||
if self.closing:
|
||||
self.file_handler.close()
|
||||
return
|
||||
|
||||
|
||||
def make_compat_bytes(in_str: str) -> bytes:
|
||||
|
|
Loading…
Reference in New Issue