Remove obsolete returns (#707)

* Remove obsolete returns

* Update CHANGELOG.md

* Remove empty lines

* Remove more empty lines
pull/680/head
Pieter Marsman 2022-02-01 01:49:46 +01:00 committed by GitHub
parent 2610ef13af
commit b19f9e7270
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 94 additions and 170 deletions

View File

@ -20,6 +20,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
### Changed ### Changed
- Replace warnings.warn with logging.Logger.warning in line with [recommended use](https://docs.python.org/3/howto/logging.html#when-to-use-logging) ([#673](https://github.com/pdfminer/pdfminer.six/pull/673)) - Replace warnings.warn with logging.Logger.warning in line with [recommended use](https://docs.python.org/3/howto/logging.html#when-to-use-logging) ([#673](https://github.com/pdfminer/pdfminer.six/pull/673))
### Removed
- Unnecessary return statements without argument at the end of functions ([#707](https://github.com/pdfminer/pdfminer.six/pull/707))
## [20211012] ## [20211012]
### Added ### Added

View File

@ -20,7 +20,6 @@ class Arcfour:
(s[i], s[j]) = (s[j], s[i]) (s[i], s[j]) = (s[j], s[i])
self.s = s self.s = s
(self.i, self.j) = (0, 0) (self.i, self.j) = (0, 0)
return
def process(self, data: bytes) -> bytes: def process(self, data: bytes) -> bytes:
(i, j) = (self.i, self.j) (i, j) = (self.i, self.j)

View File

@ -35,7 +35,6 @@ class BitParser:
def __init__(self) -> None: def __init__(self) -> None:
self._pos = 0 self._pos = 0
return
@classmethod @classmethod
def add(cls, root: BitParserState, v: Union[int, str], bits: str) -> None: def add(cls, root: BitParserState, v: Union[int, str], bits: str) -> None:
@ -53,13 +52,11 @@ class BitParser:
b = 0 b = 0
assert b is not None assert b is not None
p[b] = v p[b] = v
return
def feedbytes(self, data: bytes) -> None: def feedbytes(self, data: bytes) -> None:
for byte in get_bytes(data): for byte in get_bytes(data):
for m in (128, 64, 32, 16, 8, 4, 2, 1): for m in (128, 64, 32, 16, 8, 4, 2, 1):
self._parse_bit(byte & m) self._parse_bit(byte & m)
return
def _parse_bit(self, x: object) -> None: def _parse_bit(self, x: object) -> None:
if x: if x:
@ -72,7 +69,6 @@ class BitParser:
else: else:
assert self._accept is not None assert self._accept is not None
self._state = self._accept(v) self._state = self._accept(v)
return
class CCITTG4Parser(BitParser): class CCITTG4Parser(BitParser):

View File

@ -9,27 +9,27 @@ More information is available on the Adobe website:
""" """
import sys import gzip
import logging
import os import os
import os.path import os.path
import gzip
import pickle as pickle import pickle as pickle
import struct import struct
import logging import sys
from typing import (Any, BinaryIO, Dict, Iterable, Iterator, List, from typing import (Any, BinaryIO, Dict, Iterable, Iterator, List,
MutableMapping, Optional, TextIO, Tuple, Union, cast) MutableMapping, Optional, TextIO, Tuple, Union, cast)
from .psparser import PSStackParser
from .psparser import PSSyntaxError from .encodingdb import name2unicode
from .psparser import KWD
from .psparser import PSEOF from .psparser import PSEOF
from .psparser import PSKeyword from .psparser import PSKeyword
from .psparser import PSLiteral from .psparser import PSLiteral
from .psparser import PSStackParser
from .psparser import PSSyntaxError
from .psparser import literal_name from .psparser import literal_name
from .psparser import KWD
from .encodingdb import name2unicode
from .utils import choplist from .utils import choplist
from .utils import nunpack from .utils import nunpack
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@ -43,24 +43,22 @@ class CMapBase:
def __init__(self, **kwargs: object) -> None: def __init__(self, **kwargs: object) -> None:
self.attrs: MutableMapping[str, object] = kwargs.copy() self.attrs: MutableMapping[str, object] = kwargs.copy()
return
def is_vertical(self) -> bool: def is_vertical(self) -> bool:
return self.attrs.get('WMode', 0) != 0 return self.attrs.get('WMode', 0) != 0
def set_attr(self, k: str, v: object) -> None: def set_attr(self, k: str, v: object) -> None:
self.attrs[k] = v self.attrs[k] = v
return
def add_code2cid(self, code: str, cid: int) -> None: def add_code2cid(self, code: str, cid: int) -> None:
return pass
def add_cid2unichr(self, cid: int, code: Union[PSLiteral, bytes, int] def add_cid2unichr(self, cid: int, code: Union[PSLiteral, bytes, int]
) -> None: ) -> None:
return pass
def use_cmap(self, cmap: "CMapBase") -> None: def use_cmap(self, cmap: "CMapBase") -> None:
return pass
def decode(self, code: bytes) -> Iterable[int]: def decode(self, code: bytes) -> Iterable[int]:
raise NotImplementedError raise NotImplementedError
@ -71,7 +69,6 @@ class CMap(CMapBase):
def __init__(self, **kwargs: Union[str, int]) -> None: def __init__(self, **kwargs: Union[str, int]) -> None:
CMapBase.__init__(self, **kwargs) CMapBase.__init__(self, **kwargs)
self.code2cid: Dict[int, object] = {} self.code2cid: Dict[int, object] = {}
return
def __repr__(self) -> str: def __repr__(self) -> str:
return '<CMap: %s>' % self.attrs.get('CMapName') return '<CMap: %s>' % self.attrs.get('CMapName')
@ -88,7 +85,6 @@ class CMap(CMapBase):
else: else:
dst[k] = v dst[k] = v
copy(self.code2cid, cmap.code2cid) copy(self.code2cid, cmap.code2cid)
return
def decode(self, code: bytes) -> Iterator[int]: def decode(self, code: bytes) -> Iterator[int]:
log.debug('decode: %r, %r', self, code) log.debug('decode: %r, %r', self, code)
@ -103,7 +99,6 @@ class CMap(CMapBase):
d = cast(Dict[int, object], x) d = cast(Dict[int, object], x)
else: else:
d = self.code2cid d = self.code2cid
return
def dump(self, out: TextIO = sys.stdout, def dump(self, out: TextIO = sys.stdout,
code2cid: Optional[Dict[int, object]] = None, code2cid: Optional[Dict[int, object]] = None,
@ -117,7 +112,6 @@ class CMap(CMapBase):
out.write('code %r = cid %d\n' % (c, v)) out.write('code %r = cid %d\n' % (c, v))
else: else:
self.dump(out=out, code2cid=cast(Dict[int, object], v), code=c) self.dump(out=out, code2cid=cast(Dict[int, object], v), code=c)
return
class IdentityCMap(CMapBase): class IdentityCMap(CMapBase):
@ -145,7 +139,6 @@ class UnicodeMap(CMapBase):
def __init__(self, **kwargs: Union[str, int]) -> None: def __init__(self, **kwargs: Union[str, int]) -> None:
CMapBase.__init__(self, **kwargs) CMapBase.__init__(self, **kwargs)
self.cid2unichr: Dict[int, str] = {} self.cid2unichr: Dict[int, str] = {}
return
def __repr__(self) -> str: def __repr__(self) -> str:
return '<UnicodeMap: %s>' % self.attrs.get('CMapName') return '<UnicodeMap: %s>' % self.attrs.get('CMapName')
@ -157,7 +150,6 @@ class UnicodeMap(CMapBase):
def dump(self, out: TextIO = sys.stdout) -> None: def dump(self, out: TextIO = sys.stdout) -> None:
for (k, v) in sorted(self.cid2unichr.items()): for (k, v) in sorted(self.cid2unichr.items()):
out.write('cid %d = unicode %r\n' % (k, v)) out.write('cid %d = unicode %r\n' % (k, v))
return
class IdentityUnicodeMap(UnicodeMap): class IdentityUnicodeMap(UnicodeMap):
@ -183,7 +175,6 @@ class FileCMap(CMap):
d = t d = t
ci = ord(code[-1]) ci = ord(code[-1])
d[ci] = cid d[ci] = cid
return
class FileUnicodeMap(UnicodeMap): class FileUnicodeMap(UnicodeMap):
@ -202,7 +193,6 @@ class FileUnicodeMap(UnicodeMap):
self.cid2unichr[cid] = chr(code) self.cid2unichr[cid] = chr(code)
else: else:
raise TypeError(code) raise TypeError(code)
return
class PyCMap(CMap): class PyCMap(CMap):
@ -212,7 +202,6 @@ class PyCMap(CMap):
self.code2cid = module.CODE2CID self.code2cid = module.CODE2CID
if module.IS_VERTICAL: if module.IS_VERTICAL:
self.attrs['WMode'] = 1 self.attrs['WMode'] = 1
return
class PyUnicodeMap(UnicodeMap): class PyUnicodeMap(UnicodeMap):
@ -224,7 +213,6 @@ class PyUnicodeMap(UnicodeMap):
self.attrs['WMode'] = 1 self.attrs['WMode'] = 1
else: else:
self.cid2unichr = module.CID2UNICHR_H self.cid2unichr = module.CID2UNICHR_H
return
class CMapDB: class CMapDB:

View File

@ -1,11 +1,12 @@
import io import io
import logging import logging
from pdfminer.pdfcolor import PDFColorSpace import re
from typing import (BinaryIO, Dict, Generic, List, Optional, Sequence, TextIO, from typing import (BinaryIO, Dict, Generic, List, Optional, Sequence, TextIO,
Tuple, TypeVar, Union, cast) Tuple, TypeVar, Union, cast)
import re
from pdfminer.pdfcolor import PDFColorSpace
from . import utils from . import utils
from .image import ImageWriter
from .layout import LAParams, LTComponent, TextGroupElement from .layout import LAParams, LTComponent, TextGroupElement
from .layout import LTChar from .layout import LTChar
from .layout import LTContainer from .layout import LTContainer
@ -33,7 +34,6 @@ from .utils import apply_matrix_pt
from .utils import bbox2str from .utils import bbox2str
from .utils import enc from .utils import enc
from .utils import mult_matrix from .utils import mult_matrix
from .image import ImageWriter
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@ -52,7 +52,6 @@ class PDFLayoutAnalyzer(PDFTextDevice):
self.pageno = pageno self.pageno = pageno
self.laparams = laparams self.laparams = laparams
self._stack: List[LTLayoutContainer] = [] self._stack: List[LTLayoutContainer] = []
return
def begin_page(self, page: PDFPage, ctm: Matrix) -> None: def begin_page(self, page: PDFPage, ctm: Matrix) -> None:
(x0, y0, x1, y1) = page.mediabox (x0, y0, x1, y1) = page.mediabox
@ -60,7 +59,6 @@ class PDFLayoutAnalyzer(PDFTextDevice):
(x1, y1) = apply_matrix_pt(ctm, (x1, y1)) (x1, y1) = apply_matrix_pt(ctm, (x1, y1))
mediabox = (0, 0, abs(x0-x1), abs(y0-y1)) mediabox = (0, 0, abs(x0-x1), abs(y0-y1))
self.cur_item = LTPage(self.pageno, mediabox) self.cur_item = LTPage(self.pageno, mediabox)
return
def end_page(self, page: PDFPage) -> None: def end_page(self, page: PDFPage) -> None:
assert not self._stack, str(len(self._stack)) assert not self._stack, str(len(self._stack))
@ -69,19 +67,16 @@ class PDFLayoutAnalyzer(PDFTextDevice):
self.cur_item.analyze(self.laparams) self.cur_item.analyze(self.laparams)
self.pageno += 1 self.pageno += 1
self.receive_layout(self.cur_item) self.receive_layout(self.cur_item)
return
def begin_figure(self, name: str, bbox: Rect, matrix: Matrix) -> None: def begin_figure(self, name: str, bbox: Rect, matrix: Matrix) -> None:
self._stack.append(self.cur_item) self._stack.append(self.cur_item)
self.cur_item = LTFigure(name, bbox, mult_matrix(matrix, self.ctm)) self.cur_item = LTFigure(name, bbox, mult_matrix(matrix, self.ctm))
return
def end_figure(self, _: str) -> None: def end_figure(self, _: str) -> None:
fig = self.cur_item fig = self.cur_item
assert isinstance(self.cur_item, LTFigure), str(type(self.cur_item)) assert isinstance(self.cur_item, LTFigure), str(type(self.cur_item))
self.cur_item = self._stack.pop() self.cur_item = self._stack.pop()
self.cur_item.add(fig) self.cur_item.add(fig)
return
def render_image(self, name: str, stream: PDFStream) -> None: def render_image(self, name: str, stream: PDFStream) -> None:
assert isinstance(self.cur_item, LTFigure), str(type(self.cur_item)) assert isinstance(self.cur_item, LTFigure), str(type(self.cur_item))
@ -89,7 +84,6 @@ class PDFLayoutAnalyzer(PDFTextDevice):
(self.cur_item.x0, self.cur_item.y0, (self.cur_item.x0, self.cur_item.y0,
self.cur_item.x1, self.cur_item.y1)) self.cur_item.x1, self.cur_item.y1))
self.cur_item.add(item) self.cur_item.add(item)
return
def paint_path( def paint_path(
self, self,
@ -178,7 +172,7 @@ class PDFLayoutAnalyzer(PDFTextDevice):
return '(cid:%d)' % cid return '(cid:%d)' % cid
def receive_layout(self, ltpage: LTPage) -> None: def receive_layout(self, ltpage: LTPage) -> None:
return pass
class PDFPageAggregator(PDFLayoutAnalyzer): class PDFPageAggregator(PDFLayoutAnalyzer):
@ -191,11 +185,9 @@ class PDFPageAggregator(PDFLayoutAnalyzer):
PDFLayoutAnalyzer.__init__(self, rsrcmgr, pageno=pageno, PDFLayoutAnalyzer.__init__(self, rsrcmgr, pageno=pageno,
laparams=laparams) laparams=laparams)
self.result: Optional[LTPage] = None self.result: Optional[LTPage] = None
return
def receive_layout(self, ltpage: LTPage) -> None: def receive_layout(self, ltpage: LTPage) -> None:
self.result = ltpage self.result = ltpage
return
def get_result(self) -> LTPage: def get_result(self) -> LTPage:
assert self.result is not None assert self.result is not None
@ -254,7 +246,6 @@ class TextConverter(PDFConverter[AnyIO]):
laparams=laparams) laparams=laparams)
self.showpageno = showpageno self.showpageno = showpageno
self.imagewriter = imagewriter self.imagewriter = imagewriter
return
def write_text(self, text: str) -> None: def write_text(self, text: str) -> None:
text = utils.compatible_encode_method(text, self.codec, 'ignore') text = utils.compatible_encode_method(text, self.codec, 'ignore')
@ -262,7 +253,6 @@ class TextConverter(PDFConverter[AnyIO]):
cast(BinaryIO, self.outfp).write(text.encode()) cast(BinaryIO, self.outfp).write(text.encode())
else: else:
cast(TextIO, self.outfp).write(text) cast(TextIO, self.outfp).write(text)
return
def receive_layout(self, ltpage: LTPage) -> None: def receive_layout(self, ltpage: LTPage) -> None:
def render(item: LTItem) -> None: def render(item: LTItem) -> None:
@ -280,7 +270,6 @@ class TextConverter(PDFConverter[AnyIO]):
self.write_text('Page %s\n' % ltpage.pageid) self.write_text('Page %s\n' % ltpage.pageid)
render(ltpage) render(ltpage)
self.write_text('\f') self.write_text('\f')
return
# Some dummy functions to save memory/CPU when all that is wanted # Some dummy functions to save memory/CPU when all that is wanted
# is text. This stops all the image and drawing output from being # is text. This stops all the image and drawing output from being

View File

@ -58,12 +58,10 @@ class BMPWriter:
self.fp.write(struct.pack('BBBx', i, i, i)) self.fp.write(struct.pack('BBBx', i, i, i))
self.pos0 = self.fp.tell() self.pos0 = self.fp.tell()
self.pos1 = self.pos0 + self.datasize self.pos1 = self.pos0 + self.datasize
return
def write_line(self, y: int, data: bytes) -> None: def write_line(self, y: int, data: bytes) -> None:
self.fp.seek(self.pos1 - (y+1)*self.linesize) self.fp.seek(self.pos1 - (y+1)*self.linesize)
self.fp.write(data) self.fp.write(data)
return
class ImageWriter: class ImageWriter:
@ -76,7 +74,6 @@ class ImageWriter:
self.outdir = outdir self.outdir = outdir
if not os.path.exists(self.outdir): if not os.path.exists(self.outdir):
os.makedirs(self.outdir) os.makedirs(self.outdir)
return
def export_image(self, image: LTImage) -> str: def export_image(self, image: LTImage) -> str:
(width, height) = image.srcsize (width, height) = image.srcsize

View File

@ -3,6 +3,11 @@ import logging
from typing import (Dict, Generic, Iterable, Iterator, List, Optional, from typing import (Dict, Generic, Iterable, Iterator, List, Optional,
Sequence, Set, Tuple, TypeVar, Union, cast) Sequence, Set, Tuple, TypeVar, Union, cast)
from .pdfcolor import PDFColorSpace
from .pdffont import PDFFont
from .pdfinterp import Color
from .pdfinterp import PDFGraphicState
from .pdftypes import PDFStream
from .utils import INF from .utils import INF
from .utils import LTComponentT from .utils import LTComponentT
from .utils import Matrix from .utils import Matrix
@ -15,11 +20,6 @@ from .utils import fsplit
from .utils import get_bound from .utils import get_bound
from .utils import matrix2str from .utils import matrix2str
from .utils import uniq from .utils import uniq
from .pdfcolor import PDFColorSpace
from .pdftypes import PDFStream
from .pdfinterp import Color
from .pdfinterp import PDFGraphicState
from .pdffont import PDFFont
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -28,7 +28,6 @@ class IndexAssigner:
def __init__(self, index: int = 0) -> None: def __init__(self, index: int = 0) -> None:
self.index = index self.index = index
return
def run(self, obj: "LTItem") -> None: def run(self, obj: "LTItem") -> None:
if isinstance(obj, LTTextBox): if isinstance(obj, LTTextBox):
@ -37,7 +36,6 @@ class IndexAssigner:
elif isinstance(obj, LTTextGroup): elif isinstance(obj, LTTextGroup):
for x in obj: for x in obj:
self.run(x) self.run(x)
return
class LAParams: class LAParams:
@ -87,7 +85,6 @@ class LAParams:
self.all_texts = all_texts self.all_texts = all_texts
self._validate() self._validate()
return
def _validate(self) -> None: def _validate(self) -> None:
if self.boxes_flow is not None: if self.boxes_flow is not None:
@ -111,7 +108,7 @@ class LTItem:
def analyze(self, laparams: LAParams) -> None: def analyze(self, laparams: LAParams) -> None:
"""Perform the layout analysis.""" """Perform the layout analysis."""
return pass
class LTText: class LTText:
@ -132,7 +129,6 @@ class LTComponent(LTItem):
def __init__(self, bbox: Rect) -> None: def __init__(self, bbox: Rect) -> None:
LTItem.__init__(self) LTItem.__init__(self)
self.set_bbox(bbox) self.set_bbox(bbox)
return
def __repr__(self) -> str: def __repr__(self) -> str:
return ('<%s %s>' % return ('<%s %s>' %
@ -160,7 +156,6 @@ class LTComponent(LTItem):
self.width = x1-x0 self.width = x1-x0
self.height = y1-y0 self.height = y1-y0
self.bbox = bbox self.bbox = bbox
return
def is_empty(self) -> bool: def is_empty(self) -> bool:
return self.width <= 0 or self.height <= 0 return self.width <= 0 or self.height <= 0
@ -223,7 +218,6 @@ class LTCurve(LTComponent):
self.evenodd = evenodd self.evenodd = evenodd
self.stroking_color = stroking_color self.stroking_color = stroking_color
self.non_stroking_color = non_stroking_color self.non_stroking_color = non_stroking_color
return
def get_pts(self) -> str: def get_pts(self) -> str:
return ','.join('%.3f,%.3f' % p for p in self.pts) return ','.join('%.3f,%.3f' % p for p in self.pts)
@ -248,7 +242,6 @@ class LTLine(LTCurve):
) -> None: ) -> None:
LTCurve.__init__(self, linewidth, [p0, p1], stroke, fill, evenodd, LTCurve.__init__(self, linewidth, [p0, p1], stroke, fill, evenodd,
stroking_color, non_stroking_color) stroking_color, non_stroking_color)
return
class LTRect(LTCurve): class LTRect(LTCurve):
@ -271,7 +264,6 @@ class LTRect(LTCurve):
LTCurve.__init__(self, linewidth, LTCurve.__init__(self, linewidth,
[(x0, y0), (x1, y0), (x1, y1), (x0, y1)], stroke, [(x0, y0), (x1, y0), (x1, y1), (x0, y1)], stroke,
fill, evenodd, stroking_color, non_stroking_color) fill, evenodd, stroking_color, non_stroking_color)
return
class LTImage(LTComponent): class LTImage(LTComponent):
@ -291,7 +283,6 @@ class LTImage(LTComponent):
self.colorspace = stream.get_any(('CS', 'ColorSpace')) self.colorspace = stream.get_any(('CS', 'ColorSpace'))
if not isinstance(self.colorspace, list): if not isinstance(self.colorspace, list):
self.colorspace = [self.colorspace] self.colorspace = [self.colorspace]
return
def __repr__(self) -> str: def __repr__(self) -> str:
return ('<%s(%s) %s %r>' % return ('<%s(%s) %s %r>' %

View File

@ -1,8 +1,7 @@
from io import BytesIO
import logging import logging
from io import BytesIO
from typing import BinaryIO, Iterator, List, Optional, cast from typing import BinaryIO, Iterator, List, Optional, cast
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -20,7 +19,6 @@ class LZWDecoder:
# NB: self.table stores None only in indices 256 and 257 # NB: self.table stores None only in indices 256 and 257
self.table: Optional[List[Optional[bytes]]] = None self.table: Optional[List[Optional[bytes]]] = None
self.prevbuf: Optional[bytes] = None self.prevbuf: Optional[bytes] = None
return
def readbits(self, bits: int) -> int: def readbits(self, bits: int) -> int:
v = 0 v = 0
@ -95,7 +93,6 @@ class LZWDecoder:
assert self.table is not None assert self.table is not None
logger.debug('nbits=%d, code=%d, output=%r, table=%r' logger.debug('nbits=%d, code=%d, output=%r, table=%r'
% (self.nbits, code, x, self.table[258:])) % (self.nbits, code, x, self.table[258:]))
return
def lzwdecode(data: bytes) -> bytes: def lzwdecode(data: bytes) -> bytes:

View File

@ -1,7 +1,7 @@
import collections import collections
from typing import Dict from typing import Dict
from .psparser import LIT
from .psparser import LIT
LITERAL_DEVICE_GRAY = LIT('DeviceGray') LITERAL_DEVICE_GRAY = LIT('DeviceGray')
LITERAL_DEVICE_RGB = LIT('DeviceRGB') LITERAL_DEVICE_RGB = LIT('DeviceRGB')
@ -13,7 +13,6 @@ class PDFColorSpace:
def __init__(self, name: str, ncomponents: int) -> None: def __init__(self, name: str, ncomponents: int) -> None:
self.name = name self.name = name
self.ncomponents = ncomponents self.ncomponents = ncomponents
return
def __repr__(self) -> str: def __repr__(self) -> str:
return '<PDFColorSpace: %s, ncomponents=%d>' % \ return '<PDFColorSpace: %s, ncomponents=%d>' % \

View File

@ -1,13 +1,14 @@
from pdfminer.psparser import PSLiteral
from typing import (BinaryIO, Iterable, List, Optional, Sequence, from typing import (BinaryIO, Iterable, List, Optional, Sequence,
TYPE_CHECKING, Union, cast) TYPE_CHECKING, Union, cast)
from pdfminer.psparser import PSLiteral
from . import utils from . import utils
from .utils import Matrix, Point, Rect, PathSegment
from .pdfcolor import PDFColorSpace from .pdfcolor import PDFColorSpace
from .pdffont import PDFFont from .pdffont import PDFFont
from .pdffont import PDFUnicodeNotDefined from .pdffont import PDFUnicodeNotDefined
from .pdfpage import PDFPage from .pdfpage import PDFPage
from .pdftypes import PDFStream from .pdftypes import PDFStream
from .utils import Matrix, Point, Rect, PathSegment
if TYPE_CHECKING: if TYPE_CHECKING:
from .pdfinterp import PDFGraphicState from .pdfinterp import PDFGraphicState
@ -26,7 +27,6 @@ class PDFDevice:
def __init__(self, rsrcmgr: "PDFResourceManager") -> None: def __init__(self, rsrcmgr: "PDFResourceManager") -> None:
self.rsrcmgr = rsrcmgr self.rsrcmgr = rsrcmgr
self.ctm: Optional[Matrix] = None self.ctm: Optional[Matrix] = None
return
def __repr__(self) -> str: def __repr__(self) -> str:
return '<PDFDevice>' return '<PDFDevice>'
@ -43,40 +43,39 @@ class PDFDevice:
self.close() self.close()
def close(self) -> None: def close(self) -> None:
return pass
def set_ctm(self, ctm: Matrix) -> None: def set_ctm(self, ctm: Matrix) -> None:
self.ctm = ctm self.ctm = ctm
return
def begin_tag( def begin_tag(
self, self,
tag: PSLiteral, tag: PSLiteral,
props: Optional["PDFStackT"] = None props: Optional["PDFStackT"] = None
) -> None: ) -> None:
return pass
def end_tag(self) -> None: def end_tag(self) -> None:
return pass
def do_tag( def do_tag(
self, self,
tag: PSLiteral, tag: PSLiteral,
props: Optional["PDFStackT"] = None props: Optional["PDFStackT"] = None
) -> None: ) -> None:
return pass
def begin_page(self, page: PDFPage, ctm: Matrix) -> None: def begin_page(self, page: PDFPage, ctm: Matrix) -> None:
return pass
def end_page(self, page: PDFPage) -> None: def end_page(self, page: PDFPage) -> None:
return pass
def begin_figure(self, name: str, bbox: Rect, matrix: Matrix) -> None: def begin_figure(self, name: str, bbox: Rect, matrix: Matrix) -> None:
return pass
def end_figure(self, name: str) -> None: def end_figure(self, name: str) -> None:
return pass
def paint_path( def paint_path(
self, self,
@ -86,10 +85,10 @@ class PDFDevice:
evenodd: bool, evenodd: bool,
path: Sequence[PathSegment] path: Sequence[PathSegment]
) -> None: ) -> None:
return pass
def render_image(self, name: str, stream: PDFStream) -> None: def render_image(self, name: str, stream: PDFStream) -> None:
return pass
def render_string( def render_string(
self, self,
@ -98,7 +97,7 @@ class PDFDevice:
ncs: PDFColorSpace, ncs: PDFColorSpace,
graphicstate: "PDFGraphicState" graphicstate: "PDFGraphicState"
) -> None: ) -> None:
return pass
class PDFTextDevice(PDFDevice): class PDFTextDevice(PDFDevice):
@ -132,7 +131,6 @@ class PDFTextDevice(PDFDevice):
seq, matrix, textstate.linematrix, font, fontsize, seq, matrix, textstate.linematrix, font, fontsize,
scaling, charspace, wordspace, rise, dxscale, ncs, scaling, charspace, wordspace, rise, dxscale, ncs,
graphicstate) graphicstate)
return
def render_string_horizontal( def render_string_horizontal(
self, self,
@ -227,7 +225,6 @@ class TagExtractor(PDFDevice):
self.codec = codec self.codec = codec
self.pageno = 0 self.pageno = 0
self._stack: List[PSLiteral] = [] self._stack: List[PSLiteral] = []
return
def render_string( def render_string(
self, self,
@ -252,7 +249,6 @@ class TagExtractor(PDFDevice):
except PDFUnicodeNotDefined: except PDFUnicodeNotDefined:
pass pass
self._write(utils.enc(text)) self._write(utils.enc(text))
return
def begin_page(self, page: PDFPage, ctm: Matrix) -> None: def begin_page(self, page: PDFPage, ctm: Matrix) -> None:
output = '<page id="%s" bbox="%s" rotate="%d">' %\ output = '<page id="%s" bbox="%s" rotate="%d">' %\

View File

@ -100,7 +100,6 @@ class PDFXRef(PDFBaseXRef):
def __init__(self) -> None: def __init__(self) -> None:
self.offsets: Dict[int, Tuple[Optional[int], int, int]] = {} self.offsets: Dict[int, Tuple[Optional[int], int, int]] = {}
self.trailer: Dict[str, Any] = {} self.trailer: Dict[str, Any] = {}
return
def __repr__(self) -> str: def __repr__(self) -> str:
return '<PDFXRef: offsets=%r>' % (self.offsets.keys()) return '<PDFXRef: offsets=%r>' % (self.offsets.keys())
@ -145,7 +144,6 @@ class PDFXRef(PDFBaseXRef):
self.offsets[objid] = (None, int(pos_b), int(genno_b)) self.offsets[objid] = (None, int(pos_b), int(genno_b))
log.info('xref objects: %r', self.offsets) log.info('xref objects: %r', self.offsets)
self.load_trailer(parser) self.load_trailer(parser)
return
def load_trailer(self, parser: PDFParser) -> None: def load_trailer(self, parser: PDFParser) -> None:
try: try:
@ -159,7 +157,6 @@ class PDFXRef(PDFBaseXRef):
(_, dic) = x[0] (_, dic) = x[0]
self.trailer.update(dict_value(dic)) self.trailer.update(dict_value(dic))
log.debug('trailer=%r', self.trailer) log.debug('trailer=%r', self.trailer)
return
def get_trailer(self) -> Dict[str, Any]: def get_trailer(self) -> Dict[str, Any]:
return self.trailer return self.trailer
@ -225,7 +222,6 @@ class PDFXRefFallback(PDFXRef):
for index in range(n): for index in range(n):
objid1 = objs[index*2] objid1 = objs[index*2]
self.offsets[objid1] = (objid, index, 0) self.offsets[objid1] = (objid, index, 0)
return
class PDFXRefStream(PDFBaseXRef): class PDFXRefStream(PDFBaseXRef):
@ -237,7 +233,6 @@ class PDFXRefStream(PDFBaseXRef):
self.fl2: Optional[int] = None self.fl2: Optional[int] = None
self.fl3: Optional[int] = None self.fl3: Optional[int] = None
self.ranges: List[Tuple[int, int]] = [] self.ranges: List[Tuple[int, int]] = []
return
def __repr__(self) -> str: def __repr__(self) -> str:
return '<PDFXRefStream: ranges=%r>' % (self.ranges) return '<PDFXRefStream: ranges=%r>' % (self.ranges)

View File

@ -1,43 +1,43 @@
import re
import logging import logging
from typing import Dict, List, Mapping, Optional, Sequence, Tuple, Union, cast import re
from io import BytesIO from io import BytesIO
from .cmapdb import CMapDB from typing import Dict, List, Mapping, Optional, Sequence, Tuple, Union, cast
from . import settings
from .cmapdb import CMap from .cmapdb import CMap
from .cmapdb import CMapBase from .cmapdb import CMapBase
from .psparser import PSLiteral, PSTypeError from .cmapdb import CMapDB
from .psparser import PSStackType
from .psparser import PSEOF
from .psparser import PSKeyword
from .psparser import literal_name
from .psparser import keyword_name
from .psparser import PSStackParser
from .psparser import LIT
from .psparser import KWD
from . import settings
from .pdfdevice import PDFDevice
from .pdfdevice import PDFTextSeq
from .pdfpage import PDFPage
from .pdftypes import PDFException
from .pdftypes import PDFStream
from .pdftypes import PDFObjRef
from .pdftypes import resolve1
from .pdftypes import list_value
from .pdftypes import dict_value
from .pdftypes import stream_value
from .pdffont import PDFFont
from .pdffont import PDFFontError
from .pdffont import PDFType1Font
from .pdffont import PDFTrueTypeFont
from .pdffont import PDFType3Font
from .pdffont import PDFCIDFont
from .pdfcolor import PDFColorSpace from .pdfcolor import PDFColorSpace
from .pdfcolor import PREDEFINED_COLORSPACE from .pdfcolor import PREDEFINED_COLORSPACE
from .pdfdevice import PDFDevice
from .pdfdevice import PDFTextSeq
from .pdffont import PDFCIDFont
from .pdffont import PDFFont
from .pdffont import PDFFontError
from .pdffont import PDFTrueTypeFont
from .pdffont import PDFType1Font
from .pdffont import PDFType3Font
from .pdfpage import PDFPage
from .pdftypes import PDFException
from .pdftypes import PDFObjRef
from .pdftypes import PDFStream
from .pdftypes import dict_value
from .pdftypes import list_value
from .pdftypes import resolve1
from .pdftypes import stream_value
from .psparser import KWD
from .psparser import LIT
from .psparser import PSEOF
from .psparser import PSKeyword
from .psparser import PSLiteral, PSTypeError
from .psparser import PSStackParser
from .psparser import PSStackType
from .psparser import keyword_name
from .psparser import literal_name
from .utils import MATRIX_IDENTITY
from .utils import Matrix, Point, PathSegment, Rect from .utils import Matrix, Point, PathSegment, Rect
from .utils import choplist from .utils import choplist
from .utils import mult_matrix from .utils import mult_matrix
from .utils import MATRIX_IDENTITY
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@ -73,7 +73,6 @@ class PDFTextState:
self.reset() self.reset()
# self.matrix is set # self.matrix is set
# self.linematrix is set # self.linematrix is set
return
def __repr__(self) -> str: def __repr__(self) -> str:
return '<PDFTextState: font=%r, fontsize=%r, charspace=%r, ' \ return '<PDFTextState: font=%r, fontsize=%r, charspace=%r, ' \
@ -100,7 +99,6 @@ class PDFTextState:
def reset(self) -> None: def reset(self) -> None:
self.matrix = MATRIX_IDENTITY self.matrix = MATRIX_IDENTITY
self.linematrix = (0, 0) self.linematrix = (0, 0)
return
Color = Union[ Color = Union[
@ -125,7 +123,6 @@ class PDFGraphicState:
# non stroking color # non stroking color
self.ncolor: Optional[Color] = None self.ncolor: Optional[Color] = None
return
def copy(self) -> "PDFGraphicState": def copy(self) -> "PDFGraphicState":
obj = PDFGraphicState() obj = PDFGraphicState()
@ -160,7 +157,6 @@ class PDFResourceManager:
def __init__(self, caching: bool = True) -> None: def __init__(self, caching: bool = True) -> None:
self.caching = caching self.caching = caching
self._cached_fonts: Dict[object, PDFFont] = {} self._cached_fonts: Dict[object, PDFFont] = {}
return
def get_procset(self, procs: Sequence[object]) -> None: def get_procset(self, procs: Sequence[object]) -> None:
for proc in procs: for proc in procs:
@ -170,7 +166,6 @@ class PDFResourceManager:
pass pass
else: else:
pass pass
return
def get_cmap(self, cmapname: str, strict: bool = False) -> CMapBase: def get_cmap(self, cmapname: str, strict: bool = False) -> CMapBase:
try: try:
@ -234,7 +229,6 @@ class PDFContentParser(PSStackParser[Union[PSKeyword, PDFStream]]):
# all the methods that would attempt to access self.fp without first # all the methods that would attempt to access self.fp without first
# calling self.fillfp(). # calling self.fillfp().
PSStackParser.__init__(self, None) # type: ignore[arg-type] PSStackParser.__init__(self, None) # type: ignore[arg-type]
return
def fillfp(self) -> None: def fillfp(self) -> None:
if not self.fp: if not self.fp:
@ -244,12 +238,10 @@ class PDFContentParser(PSStackParser[Union[PSKeyword, PDFStream]]):
else: else:
raise PSEOF('Unexpected EOF, file truncated?') raise PSEOF('Unexpected EOF, file truncated?')
self.fp = BytesIO(strm.get_data()) self.fp = BytesIO(strm.get_data())
return
def seek(self, pos: int) -> None: def seek(self, pos: int) -> None:
self.fillfp() self.fillfp()
PSStackParser.seek(self, pos) PSStackParser.seek(self, pos)
return
def fillbuf(self) -> None: def fillbuf(self) -> None:
if self.charpos < len(self.buf): if self.charpos < len(self.buf):
@ -262,7 +254,6 @@ class PDFContentParser(PSStackParser[Union[PSKeyword, PDFStream]]):
break break
self.fp = None # type: ignore[assignment] self.fp = None # type: ignore[assignment]
self.charpos = 0 self.charpos = 0
return
def get_inline_data( def get_inline_data(
self, self,
@ -300,7 +291,6 @@ class PDFContentParser(PSStackParser[Union[PSKeyword, PDFStream]]):
def flush(self) -> None: def flush(self) -> None:
self.add_results(*self.popall()) self.add_results(*self.popall())
return
KEYWORD_BI = KWD(b'BI') KEYWORD_BI = KWD(b'BI')
KEYWORD_ID = KWD(b'ID') KEYWORD_ID = KWD(b'ID')
@ -327,7 +317,6 @@ class PDFContentParser(PSStackParser[Union[PSKeyword, PDFStream]]):
raise raise
else: else:
self.push((pos, token)) self.push((pos, token))
return
PDFStackT = PSStackType[PDFStream] PDFStackT = PSStackType[PDFStream]

View File

@ -1,16 +1,16 @@
import logging import logging
from pdfminer.utils import Rect
from typing import BinaryIO, Container, Dict, Iterator, List, Optional, Tuple from typing import BinaryIO, Container, Dict, Iterator, List, Optional, Tuple
from pdfminer.utils import Rect
from . import settings from . import settings
from .psparser import LIT from .pdfdocument import PDFDocument, PDFTextExtractionNotAllowed
from .pdfparser import PDFParser
from .pdftypes import PDFObjectNotFound from .pdftypes import PDFObjectNotFound
from .pdftypes import resolve1 from .pdftypes import dict_value
from .pdftypes import int_value from .pdftypes import int_value
from .pdftypes import list_value from .pdftypes import list_value
from .pdftypes import dict_value from .pdftypes import resolve1
from .pdfparser import PDFParser from .psparser import LIT
from .pdfdocument import PDFDocument, PDFTextExtractionNotAllowed
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@ -73,7 +73,6 @@ class PDFPage:
if not isinstance(contents, list): if not isinstance(contents, list):
contents = [contents] contents = [contents]
self.contents: List[object] = contents self.contents: List[object] = contents
return
def __repr__(self) -> str: def __repr__(self) -> str:
return '<PDFPage: Resources={!r}, MediaBox={!r}>'\ return '<PDFPage: Resources={!r}, MediaBox={!r}>'\

View File

@ -1,17 +1,18 @@
import logging import logging
from io import BytesIO from io import BytesIO
from typing import BinaryIO, TYPE_CHECKING, Optional, Union from typing import BinaryIO, TYPE_CHECKING, Optional, Union
from .psparser import PSStackParser
from .psparser import PSKeyword
from .psparser import PSSyntaxError
from .psparser import PSEOF
from .psparser import KWD
from . import settings from . import settings
from .pdftypes import PDFException from .pdftypes import PDFException
from .pdftypes import PDFStream
from .pdftypes import PDFObjRef from .pdftypes import PDFObjRef
from .pdftypes import int_value from .pdftypes import PDFStream
from .pdftypes import dict_value from .pdftypes import dict_value
from .pdftypes import int_value
from .psparser import KWD
from .psparser import PSEOF
from .psparser import PSKeyword
from .psparser import PSStackParser
from .psparser import PSSyntaxError
if TYPE_CHECKING: if TYPE_CHECKING:
from .pdfdocument import PDFDocument from .pdfdocument import PDFDocument
@ -45,12 +46,10 @@ class PDFParser(PSStackParser[Union[PSKeyword, PDFStream, PDFObjRef, None]]):
PSStackParser.__init__(self, fp) PSStackParser.__init__(self, fp)
self.doc: Optional["PDFDocument"] = None self.doc: Optional["PDFDocument"] = None
self.fallback = False self.fallback = False
return
def set_document(self, doc: "PDFDocument") -> None: def set_document(self, doc: "PDFDocument") -> None:
"""Associates the parser with a PDFDocument object.""" """Associates the parser with a PDFDocument object."""
self.doc = doc self.doc = doc
return
KEYWORD_R = KWD(b'R') KEYWORD_R = KWD(b'R')
KEYWORD_NULL = KWD(b'null') KEYWORD_NULL = KWD(b'null')
@ -134,8 +133,6 @@ class PDFParser(PSStackParser[Union[PSKeyword, PDFStream, PDFObjRef, None]]):
# others # others
self.push((pos, token)) self.push((pos, token))
return
class PDFStreamParser(PDFParser): class PDFStreamParser(PDFParser):
""" """
@ -148,11 +145,9 @@ class PDFStreamParser(PDFParser):
def __init__(self, data: bytes) -> None: def __init__(self, data: bytes) -> None:
PDFParser.__init__(self, BytesIO(data)) PDFParser.__init__(self, BytesIO(data))
return
def flush(self) -> None: def flush(self) -> None:
self.add_results(*self.popall()) self.add_results(*self.popall())
return
KEYWORD_OBJ = KWD(b'obj') KEYWORD_OBJ = KWD(b'obj')
@ -176,4 +171,3 @@ class PDFStreamParser(PDFParser):
return return
# others # others
self.push((pos, token)) self.push((pos, token))
return

View File

@ -1,25 +1,24 @@
import zlib
import logging
import io import io
import logging
import sys import sys
import zlib
from typing import (TYPE_CHECKING, Any, Dict, Iterable, Optional, Union, List, from typing import (TYPE_CHECKING, Any, Dict, Iterable, Optional, Union, List,
Tuple, cast) Tuple, cast)
from .lzw import lzwdecode from . import settings
from .ascii85 import ascii85decode from .ascii85 import ascii85decode
from .ascii85 import asciihexdecode from .ascii85 import asciihexdecode
from .runlength import rldecode
from .ccitt import ccittfaxdecode from .ccitt import ccittfaxdecode
from .lzw import lzwdecode
from .psparser import LIT
from .psparser import PSException from .psparser import PSException
from .psparser import PSObject from .psparser import PSObject
from .psparser import LIT from .runlength import rldecode
from . import settings
from .utils import apply_png_predictor from .utils import apply_png_predictor
if TYPE_CHECKING: if TYPE_CHECKING:
from .pdfdocument import PDFDocument from .pdfdocument import PDFDocument
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
LITERAL_CRYPT = LIT('Crypt') LITERAL_CRYPT = LIT('Crypt')
@ -88,7 +87,6 @@ class PDFObjRef(PDFObject):
raise PDFValueError('PDF object id cannot be 0.') raise PDFValueError('PDF object id cannot be 0.')
self.doc = doc self.doc = doc
self.objid = objid self.objid = objid
return
def __repr__(self) -> str: def __repr__(self) -> str:
return '<PDFObjRef:%d>' % (self.objid) return '<PDFObjRef:%d>' % (self.objid)
@ -255,12 +253,10 @@ class PDFStream(PDFObject):
self.data: Optional[bytes] = None self.data: Optional[bytes] = None
self.objid: Optional[int] = None self.objid: Optional[int] = None
self.genno: Optional[int] = None self.genno: Optional[int] = None
return
def set_objid(self, objid: int, genno: int) -> None: def set_objid(self, objid: int, genno: int) -> None:
self.objid = objid self.objid = objid
self.genno = genno self.genno = genno
return
def __repr__(self) -> str: def __repr__(self) -> str:
if self.data is None: if self.data is None:

View File

@ -2,8 +2,8 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import re
import logging import logging
import re
from typing import (Any, BinaryIO, Dict, Generic, Iterator, List, from typing import (Any, BinaryIO, Dict, Generic, Iterator, List,
Optional, Tuple, Type, TypeVar, Union) Optional, Tuple, Type, TypeVar, Union)
@ -76,7 +76,6 @@ class PSKeyword(PSObject):
def __init__(self, name: bytes) -> None: def __init__(self, name: bytes) -> None:
self.name = name self.name = name
return
def __repr__(self) -> str: def __repr__(self) -> str:
name = self.name name = self.name
@ -95,7 +94,6 @@ class PSSymbolTable(Generic[_SymbolT]):
def __init__(self, klass: Type[_SymbolT]) -> None: def __init__(self, klass: Type[_SymbolT]) -> None:
self.dict: Dict[PSLiteral.NameType, _SymbolT] = {} self.dict: Dict[PSLiteral.NameType, _SymbolT] = {}
self.klass: Type[_SymbolT] = klass self.klass: Type[_SymbolT] = klass
return
def intern(self, name: PSLiteral.NameType) -> _SymbolT: def intern(self, name: PSLiteral.NameType) -> _SymbolT:
if name in self.dict: if name in self.dict:
@ -182,7 +180,6 @@ class PSBaseParser:
def __init__(self, fp: BinaryIO) -> None: def __init__(self, fp: BinaryIO) -> None:
self.fp = fp self.fp = fp
self.seek(0) self.seek(0)
return
def __repr__(self) -> str: def __repr__(self) -> str:
return '<%s: %r, bufpos=%d>' % (self.__class__.__name__, self.fp, return '<%s: %r, bufpos=%d>' % (self.__class__.__name__, self.fp,

View File

@ -4,10 +4,10 @@ Miscellaneous Routines.
import io import io
import pathlib import pathlib
import struct import struct
from html import escape
from typing import (Any, BinaryIO, Callable, Dict, Generic, Iterable, Iterator, from typing import (Any, BinaryIO, Callable, Dict, Generic, Iterable, Iterator,
List, Optional, Set, TextIO, Tuple, TypeVar, Union, List, Optional, Set, TextIO, Tuple, TypeVar, Union,
TYPE_CHECKING, cast) TYPE_CHECKING, cast)
from html import escape
if TYPE_CHECKING: if TYPE_CHECKING:
from .layout import LTComponent from .layout import LTComponent
@ -57,7 +57,6 @@ class open_filename(object):
) -> None: ) -> None:
if self.closing: if self.closing:
self.file_handler.close() self.file_handler.close()
return
def make_compat_bytes(in_str: str) -> bytes: def make_compat_bytes(in_str: str) -> bytes: