From b19f9e7270571f95ea279deb15808624de3c3b65 Mon Sep 17 00:00:00 2001 From: Pieter Marsman Date: Tue, 1 Feb 2022 01:49:46 +0100 Subject: [PATCH] Remove obsolete returns (#707) * Remove obsolete returns * Update CHANGELOG.md * Remove empty lines * Remove more empty lines --- CHANGELOG.md | 3 ++ pdfminer/arcfour.py | 1 - pdfminer/ccitt.py | 4 --- pdfminer/cmapdb.py | 34 +++++++------------ pdfminer/converter.py | 19 +++-------- pdfminer/image.py | 3 -- pdfminer/layout.py | 21 ++++-------- pdfminer/lzw.py | 5 +-- pdfminer/pdfcolor.py | 3 +- pdfminer/pdfdevice.py | 32 ++++++++---------- pdfminer/pdfdocument.py | 5 --- pdfminer/pdfinterp.py | 73 +++++++++++++++++------------------------ pdfminer/pdfpage.py | 15 ++++----- pdfminer/pdfparser.py | 22 +++++-------- pdfminer/pdftypes.py | 16 ++++----- pdfminer/psparser.py | 5 +-- pdfminer/utils.py | 3 +- 17 files changed, 94 insertions(+), 170 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7d9f1e5..21483e3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ### Changed - Replace warnings.warn with logging.Logger.warning in line with [recommended use](https://docs.python.org/3/howto/logging.html#when-to-use-logging) ([#673](https://github.com/pdfminer/pdfminer.six/pull/673)) +### Removed +- Unnecessary return statements without argument at the end of functions ([#707](https://github.com/pdfminer/pdfminer.six/pull/707)) + ## [20211012] ### Added diff --git a/pdfminer/arcfour.py b/pdfminer/arcfour.py index dd2697c..88e7e2d 100644 --- a/pdfminer/arcfour.py +++ b/pdfminer/arcfour.py @@ -20,7 +20,6 @@ class Arcfour: (s[i], s[j]) = (s[j], s[i]) self.s = s (self.i, self.j) = (0, 0) - return def process(self, data: bytes) -> bytes: (i, j) = (self.i, self.j) diff --git a/pdfminer/ccitt.py b/pdfminer/ccitt.py index 4dadc81..f151ede 100644 --- a/pdfminer/ccitt.py +++ b/pdfminer/ccitt.py @@ -35,7 +35,6 @@ class BitParser: def __init__(self) -> None: self._pos = 0 - return @classmethod def add(cls, root: BitParserState, v: Union[int, str], bits: str) -> None: @@ -53,13 +52,11 @@ class BitParser: b = 0 assert b is not None p[b] = v - return def feedbytes(self, data: bytes) -> None: for byte in get_bytes(data): for m in (128, 64, 32, 16, 8, 4, 2, 1): self._parse_bit(byte & m) - return def _parse_bit(self, x: object) -> None: if x: @@ -72,7 +69,6 @@ class BitParser: else: assert self._accept is not None self._state = self._accept(v) - return class CCITTG4Parser(BitParser): diff --git a/pdfminer/cmapdb.py b/pdfminer/cmapdb.py index 6974c1c..f25402c 100644 --- a/pdfminer/cmapdb.py +++ b/pdfminer/cmapdb.py @@ -9,27 +9,27 @@ More information is available on the Adobe website: """ -import sys +import gzip +import logging import os import os.path -import gzip import pickle as pickle import struct -import logging +import sys from typing import (Any, BinaryIO, Dict, Iterable, Iterator, List, MutableMapping, Optional, TextIO, Tuple, Union, cast) -from .psparser import PSStackParser -from .psparser import PSSyntaxError + +from .encodingdb import name2unicode +from .psparser import KWD from .psparser import PSEOF from .psparser import PSKeyword from .psparser import PSLiteral +from .psparser import PSStackParser +from .psparser import PSSyntaxError from .psparser import literal_name -from .psparser import KWD -from .encodingdb import name2unicode from .utils import choplist from .utils import nunpack - log = logging.getLogger(__name__) @@ -43,24 +43,22 @@ class CMapBase: def __init__(self, **kwargs: object) -> None: self.attrs: MutableMapping[str, object] = kwargs.copy() - return def is_vertical(self) -> bool: return self.attrs.get('WMode', 0) != 0 def set_attr(self, k: str, v: object) -> None: self.attrs[k] = v - return def add_code2cid(self, code: str, cid: int) -> None: - return + pass def add_cid2unichr(self, cid: int, code: Union[PSLiteral, bytes, int] ) -> None: - return + pass def use_cmap(self, cmap: "CMapBase") -> None: - return + pass def decode(self, code: bytes) -> Iterable[int]: raise NotImplementedError @@ -71,7 +69,6 @@ class CMap(CMapBase): def __init__(self, **kwargs: Union[str, int]) -> None: CMapBase.__init__(self, **kwargs) self.code2cid: Dict[int, object] = {} - return def __repr__(self) -> str: return '' % self.attrs.get('CMapName') @@ -88,7 +85,6 @@ class CMap(CMapBase): else: dst[k] = v copy(self.code2cid, cmap.code2cid) - return def decode(self, code: bytes) -> Iterator[int]: log.debug('decode: %r, %r', self, code) @@ -103,7 +99,6 @@ class CMap(CMapBase): d = cast(Dict[int, object], x) else: d = self.code2cid - return def dump(self, out: TextIO = sys.stdout, code2cid: Optional[Dict[int, object]] = None, @@ -117,7 +112,6 @@ class CMap(CMapBase): out.write('code %r = cid %d\n' % (c, v)) else: self.dump(out=out, code2cid=cast(Dict[int, object], v), code=c) - return class IdentityCMap(CMapBase): @@ -145,7 +139,6 @@ class UnicodeMap(CMapBase): def __init__(self, **kwargs: Union[str, int]) -> None: CMapBase.__init__(self, **kwargs) self.cid2unichr: Dict[int, str] = {} - return def __repr__(self) -> str: return '' % self.attrs.get('CMapName') @@ -157,7 +150,6 @@ class UnicodeMap(CMapBase): def dump(self, out: TextIO = sys.stdout) -> None: for (k, v) in sorted(self.cid2unichr.items()): out.write('cid %d = unicode %r\n' % (k, v)) - return class IdentityUnicodeMap(UnicodeMap): @@ -183,7 +175,6 @@ class FileCMap(CMap): d = t ci = ord(code[-1]) d[ci] = cid - return class FileUnicodeMap(UnicodeMap): @@ -202,7 +193,6 @@ class FileUnicodeMap(UnicodeMap): self.cid2unichr[cid] = chr(code) else: raise TypeError(code) - return class PyCMap(CMap): @@ -212,7 +202,6 @@ class PyCMap(CMap): self.code2cid = module.CODE2CID if module.IS_VERTICAL: self.attrs['WMode'] = 1 - return class PyUnicodeMap(UnicodeMap): @@ -224,7 +213,6 @@ class PyUnicodeMap(UnicodeMap): self.attrs['WMode'] = 1 else: self.cid2unichr = module.CID2UNICHR_H - return class CMapDB: diff --git a/pdfminer/converter.py b/pdfminer/converter.py index bffbb89..b09024e 100644 --- a/pdfminer/converter.py +++ b/pdfminer/converter.py @@ -1,11 +1,12 @@ import io import logging -from pdfminer.pdfcolor import PDFColorSpace +import re from typing import (BinaryIO, Dict, Generic, List, Optional, Sequence, TextIO, Tuple, TypeVar, Union, cast) -import re +from pdfminer.pdfcolor import PDFColorSpace from . import utils +from .image import ImageWriter from .layout import LAParams, LTComponent, TextGroupElement from .layout import LTChar from .layout import LTContainer @@ -33,7 +34,6 @@ from .utils import apply_matrix_pt from .utils import bbox2str from .utils import enc from .utils import mult_matrix -from .image import ImageWriter log = logging.getLogger(__name__) @@ -52,7 +52,6 @@ class PDFLayoutAnalyzer(PDFTextDevice): self.pageno = pageno self.laparams = laparams self._stack: List[LTLayoutContainer] = [] - return def begin_page(self, page: PDFPage, ctm: Matrix) -> None: (x0, y0, x1, y1) = page.mediabox @@ -60,7 +59,6 @@ class PDFLayoutAnalyzer(PDFTextDevice): (x1, y1) = apply_matrix_pt(ctm, (x1, y1)) mediabox = (0, 0, abs(x0-x1), abs(y0-y1)) self.cur_item = LTPage(self.pageno, mediabox) - return def end_page(self, page: PDFPage) -> None: assert not self._stack, str(len(self._stack)) @@ -69,19 +67,16 @@ class PDFLayoutAnalyzer(PDFTextDevice): self.cur_item.analyze(self.laparams) self.pageno += 1 self.receive_layout(self.cur_item) - return def begin_figure(self, name: str, bbox: Rect, matrix: Matrix) -> None: self._stack.append(self.cur_item) self.cur_item = LTFigure(name, bbox, mult_matrix(matrix, self.ctm)) - return def end_figure(self, _: str) -> None: fig = self.cur_item assert isinstance(self.cur_item, LTFigure), str(type(self.cur_item)) self.cur_item = self._stack.pop() self.cur_item.add(fig) - return def render_image(self, name: str, stream: PDFStream) -> None: assert isinstance(self.cur_item, LTFigure), str(type(self.cur_item)) @@ -89,7 +84,6 @@ class PDFLayoutAnalyzer(PDFTextDevice): (self.cur_item.x0, self.cur_item.y0, self.cur_item.x1, self.cur_item.y1)) self.cur_item.add(item) - return def paint_path( self, @@ -178,7 +172,7 @@ class PDFLayoutAnalyzer(PDFTextDevice): return '(cid:%d)' % cid def receive_layout(self, ltpage: LTPage) -> None: - return + pass class PDFPageAggregator(PDFLayoutAnalyzer): @@ -191,11 +185,9 @@ class PDFPageAggregator(PDFLayoutAnalyzer): PDFLayoutAnalyzer.__init__(self, rsrcmgr, pageno=pageno, laparams=laparams) self.result: Optional[LTPage] = None - return def receive_layout(self, ltpage: LTPage) -> None: self.result = ltpage - return def get_result(self) -> LTPage: assert self.result is not None @@ -254,7 +246,6 @@ class TextConverter(PDFConverter[AnyIO]): laparams=laparams) self.showpageno = showpageno self.imagewriter = imagewriter - return def write_text(self, text: str) -> None: text = utils.compatible_encode_method(text, self.codec, 'ignore') @@ -262,7 +253,6 @@ class TextConverter(PDFConverter[AnyIO]): cast(BinaryIO, self.outfp).write(text.encode()) else: cast(TextIO, self.outfp).write(text) - return def receive_layout(self, ltpage: LTPage) -> None: def render(item: LTItem) -> None: @@ -280,7 +270,6 @@ class TextConverter(PDFConverter[AnyIO]): self.write_text('Page %s\n' % ltpage.pageid) render(ltpage) self.write_text('\f') - return # Some dummy functions to save memory/CPU when all that is wanted # is text. This stops all the image and drawing output from being diff --git a/pdfminer/image.py b/pdfminer/image.py index cfed324..5c942fb 100644 --- a/pdfminer/image.py +++ b/pdfminer/image.py @@ -58,12 +58,10 @@ class BMPWriter: self.fp.write(struct.pack('BBBx', i, i, i)) self.pos0 = self.fp.tell() self.pos1 = self.pos0 + self.datasize - return def write_line(self, y: int, data: bytes) -> None: self.fp.seek(self.pos1 - (y+1)*self.linesize) self.fp.write(data) - return class ImageWriter: @@ -76,7 +74,6 @@ class ImageWriter: self.outdir = outdir if not os.path.exists(self.outdir): os.makedirs(self.outdir) - return def export_image(self, image: LTImage) -> str: (width, height) = image.srcsize diff --git a/pdfminer/layout.py b/pdfminer/layout.py index eff2acc..778d324 100644 --- a/pdfminer/layout.py +++ b/pdfminer/layout.py @@ -3,6 +3,11 @@ import logging from typing import (Dict, Generic, Iterable, Iterator, List, Optional, Sequence, Set, Tuple, TypeVar, Union, cast) +from .pdfcolor import PDFColorSpace +from .pdffont import PDFFont +from .pdfinterp import Color +from .pdfinterp import PDFGraphicState +from .pdftypes import PDFStream from .utils import INF from .utils import LTComponentT from .utils import Matrix @@ -15,11 +20,6 @@ from .utils import fsplit from .utils import get_bound from .utils import matrix2str from .utils import uniq -from .pdfcolor import PDFColorSpace -from .pdftypes import PDFStream -from .pdfinterp import Color -from .pdfinterp import PDFGraphicState -from .pdffont import PDFFont logger = logging.getLogger(__name__) @@ -28,7 +28,6 @@ class IndexAssigner: def __init__(self, index: int = 0) -> None: self.index = index - return def run(self, obj: "LTItem") -> None: if isinstance(obj, LTTextBox): @@ -37,7 +36,6 @@ class IndexAssigner: elif isinstance(obj, LTTextGroup): for x in obj: self.run(x) - return class LAParams: @@ -87,7 +85,6 @@ class LAParams: self.all_texts = all_texts self._validate() - return def _validate(self) -> None: if self.boxes_flow is not None: @@ -111,7 +108,7 @@ class LTItem: def analyze(self, laparams: LAParams) -> None: """Perform the layout analysis.""" - return + pass class LTText: @@ -132,7 +129,6 @@ class LTComponent(LTItem): def __init__(self, bbox: Rect) -> None: LTItem.__init__(self) self.set_bbox(bbox) - return def __repr__(self) -> str: return ('<%s %s>' % @@ -160,7 +156,6 @@ class LTComponent(LTItem): self.width = x1-x0 self.height = y1-y0 self.bbox = bbox - return def is_empty(self) -> bool: return self.width <= 0 or self.height <= 0 @@ -223,7 +218,6 @@ class LTCurve(LTComponent): self.evenodd = evenodd self.stroking_color = stroking_color self.non_stroking_color = non_stroking_color - return def get_pts(self) -> str: return ','.join('%.3f,%.3f' % p for p in self.pts) @@ -248,7 +242,6 @@ class LTLine(LTCurve): ) -> None: LTCurve.__init__(self, linewidth, [p0, p1], stroke, fill, evenodd, stroking_color, non_stroking_color) - return class LTRect(LTCurve): @@ -271,7 +264,6 @@ class LTRect(LTCurve): LTCurve.__init__(self, linewidth, [(x0, y0), (x1, y0), (x1, y1), (x0, y1)], stroke, fill, evenodd, stroking_color, non_stroking_color) - return class LTImage(LTComponent): @@ -291,7 +283,6 @@ class LTImage(LTComponent): self.colorspace = stream.get_any(('CS', 'ColorSpace')) if not isinstance(self.colorspace, list): self.colorspace = [self.colorspace] - return def __repr__(self) -> str: return ('<%s(%s) %s %r>' % diff --git a/pdfminer/lzw.py b/pdfminer/lzw.py index 31c085e..3e59f6c 100644 --- a/pdfminer/lzw.py +++ b/pdfminer/lzw.py @@ -1,8 +1,7 @@ -from io import BytesIO import logging +from io import BytesIO from typing import BinaryIO, Iterator, List, Optional, cast - logger = logging.getLogger(__name__) @@ -20,7 +19,6 @@ class LZWDecoder: # NB: self.table stores None only in indices 256 and 257 self.table: Optional[List[Optional[bytes]]] = None self.prevbuf: Optional[bytes] = None - return def readbits(self, bits: int) -> int: v = 0 @@ -95,7 +93,6 @@ class LZWDecoder: assert self.table is not None logger.debug('nbits=%d, code=%d, output=%r, table=%r' % (self.nbits, code, x, self.table[258:])) - return def lzwdecode(data: bytes) -> bytes: diff --git a/pdfminer/pdfcolor.py b/pdfminer/pdfcolor.py index df685ed..6059056 100644 --- a/pdfminer/pdfcolor.py +++ b/pdfminer/pdfcolor.py @@ -1,7 +1,7 @@ import collections from typing import Dict -from .psparser import LIT +from .psparser import LIT LITERAL_DEVICE_GRAY = LIT('DeviceGray') LITERAL_DEVICE_RGB = LIT('DeviceRGB') @@ -13,7 +13,6 @@ class PDFColorSpace: def __init__(self, name: str, ncomponents: int) -> None: self.name = name self.ncomponents = ncomponents - return def __repr__(self) -> str: return '' % \ diff --git a/pdfminer/pdfdevice.py b/pdfminer/pdfdevice.py index 0a37063..2053ccc 100644 --- a/pdfminer/pdfdevice.py +++ b/pdfminer/pdfdevice.py @@ -1,13 +1,14 @@ -from pdfminer.psparser import PSLiteral from typing import (BinaryIO, Iterable, List, Optional, Sequence, TYPE_CHECKING, Union, cast) + +from pdfminer.psparser import PSLiteral from . import utils -from .utils import Matrix, Point, Rect, PathSegment from .pdfcolor import PDFColorSpace from .pdffont import PDFFont from .pdffont import PDFUnicodeNotDefined from .pdfpage import PDFPage from .pdftypes import PDFStream +from .utils import Matrix, Point, Rect, PathSegment if TYPE_CHECKING: from .pdfinterp import PDFGraphicState @@ -26,7 +27,6 @@ class PDFDevice: def __init__(self, rsrcmgr: "PDFResourceManager") -> None: self.rsrcmgr = rsrcmgr self.ctm: Optional[Matrix] = None - return def __repr__(self) -> str: return '' @@ -43,40 +43,39 @@ class PDFDevice: self.close() def close(self) -> None: - return + pass def set_ctm(self, ctm: Matrix) -> None: self.ctm = ctm - return def begin_tag( self, tag: PSLiteral, props: Optional["PDFStackT"] = None ) -> None: - return + pass def end_tag(self) -> None: - return + pass def do_tag( self, tag: PSLiteral, props: Optional["PDFStackT"] = None ) -> None: - return + pass def begin_page(self, page: PDFPage, ctm: Matrix) -> None: - return + pass def end_page(self, page: PDFPage) -> None: - return + pass def begin_figure(self, name: str, bbox: Rect, matrix: Matrix) -> None: - return + pass def end_figure(self, name: str) -> None: - return + pass def paint_path( self, @@ -86,10 +85,10 @@ class PDFDevice: evenodd: bool, path: Sequence[PathSegment] ) -> None: - return + pass def render_image(self, name: str, stream: PDFStream) -> None: - return + pass def render_string( self, @@ -98,7 +97,7 @@ class PDFDevice: ncs: PDFColorSpace, graphicstate: "PDFGraphicState" ) -> None: - return + pass class PDFTextDevice(PDFDevice): @@ -132,7 +131,6 @@ class PDFTextDevice(PDFDevice): seq, matrix, textstate.linematrix, font, fontsize, scaling, charspace, wordspace, rise, dxscale, ncs, graphicstate) - return def render_string_horizontal( self, @@ -227,7 +225,6 @@ class TagExtractor(PDFDevice): self.codec = codec self.pageno = 0 self._stack: List[PSLiteral] = [] - return def render_string( self, @@ -252,7 +249,6 @@ class TagExtractor(PDFDevice): except PDFUnicodeNotDefined: pass self._write(utils.enc(text)) - return def begin_page(self, page: PDFPage, ctm: Matrix) -> None: output = '' %\ diff --git a/pdfminer/pdfdocument.py b/pdfminer/pdfdocument.py index f0102ef..ee61937 100644 --- a/pdfminer/pdfdocument.py +++ b/pdfminer/pdfdocument.py @@ -100,7 +100,6 @@ class PDFXRef(PDFBaseXRef): def __init__(self) -> None: self.offsets: Dict[int, Tuple[Optional[int], int, int]] = {} self.trailer: Dict[str, Any] = {} - return def __repr__(self) -> str: return '' % (self.offsets.keys()) @@ -145,7 +144,6 @@ class PDFXRef(PDFBaseXRef): self.offsets[objid] = (None, int(pos_b), int(genno_b)) log.info('xref objects: %r', self.offsets) self.load_trailer(parser) - return def load_trailer(self, parser: PDFParser) -> None: try: @@ -159,7 +157,6 @@ class PDFXRef(PDFBaseXRef): (_, dic) = x[0] self.trailer.update(dict_value(dic)) log.debug('trailer=%r', self.trailer) - return def get_trailer(self) -> Dict[str, Any]: return self.trailer @@ -225,7 +222,6 @@ class PDFXRefFallback(PDFXRef): for index in range(n): objid1 = objs[index*2] self.offsets[objid1] = (objid, index, 0) - return class PDFXRefStream(PDFBaseXRef): @@ -237,7 +233,6 @@ class PDFXRefStream(PDFBaseXRef): self.fl2: Optional[int] = None self.fl3: Optional[int] = None self.ranges: List[Tuple[int, int]] = [] - return def __repr__(self) -> str: return '' % (self.ranges) diff --git a/pdfminer/pdfinterp.py b/pdfminer/pdfinterp.py index 7b03729..2a6d55b 100644 --- a/pdfminer/pdfinterp.py +++ b/pdfminer/pdfinterp.py @@ -1,43 +1,43 @@ -import re import logging -from typing import Dict, List, Mapping, Optional, Sequence, Tuple, Union, cast +import re from io import BytesIO -from .cmapdb import CMapDB +from typing import Dict, List, Mapping, Optional, Sequence, Tuple, Union, cast + +from . import settings from .cmapdb import CMap from .cmapdb import CMapBase -from .psparser import PSLiteral, PSTypeError -from .psparser import PSStackType -from .psparser import PSEOF -from .psparser import PSKeyword -from .psparser import literal_name -from .psparser import keyword_name -from .psparser import PSStackParser -from .psparser import LIT -from .psparser import KWD -from . import settings -from .pdfdevice import PDFDevice -from .pdfdevice import PDFTextSeq -from .pdfpage import PDFPage -from .pdftypes import PDFException -from .pdftypes import PDFStream -from .pdftypes import PDFObjRef -from .pdftypes import resolve1 -from .pdftypes import list_value -from .pdftypes import dict_value -from .pdftypes import stream_value -from .pdffont import PDFFont -from .pdffont import PDFFontError -from .pdffont import PDFType1Font -from .pdffont import PDFTrueTypeFont -from .pdffont import PDFType3Font -from .pdffont import PDFCIDFont +from .cmapdb import CMapDB from .pdfcolor import PDFColorSpace from .pdfcolor import PREDEFINED_COLORSPACE +from .pdfdevice import PDFDevice +from .pdfdevice import PDFTextSeq +from .pdffont import PDFCIDFont +from .pdffont import PDFFont +from .pdffont import PDFFontError +from .pdffont import PDFTrueTypeFont +from .pdffont import PDFType1Font +from .pdffont import PDFType3Font +from .pdfpage import PDFPage +from .pdftypes import PDFException +from .pdftypes import PDFObjRef +from .pdftypes import PDFStream +from .pdftypes import dict_value +from .pdftypes import list_value +from .pdftypes import resolve1 +from .pdftypes import stream_value +from .psparser import KWD +from .psparser import LIT +from .psparser import PSEOF +from .psparser import PSKeyword +from .psparser import PSLiteral, PSTypeError +from .psparser import PSStackParser +from .psparser import PSStackType +from .psparser import keyword_name +from .psparser import literal_name +from .utils import MATRIX_IDENTITY from .utils import Matrix, Point, PathSegment, Rect from .utils import choplist from .utils import mult_matrix -from .utils import MATRIX_IDENTITY - log = logging.getLogger(__name__) @@ -73,7 +73,6 @@ class PDFTextState: self.reset() # self.matrix is set # self.linematrix is set - return def __repr__(self) -> str: return ' None: self.matrix = MATRIX_IDENTITY self.linematrix = (0, 0) - return Color = Union[ @@ -125,7 +123,6 @@ class PDFGraphicState: # non stroking color self.ncolor: Optional[Color] = None - return def copy(self) -> "PDFGraphicState": obj = PDFGraphicState() @@ -160,7 +157,6 @@ class PDFResourceManager: def __init__(self, caching: bool = True) -> None: self.caching = caching self._cached_fonts: Dict[object, PDFFont] = {} - return def get_procset(self, procs: Sequence[object]) -> None: for proc in procs: @@ -170,7 +166,6 @@ class PDFResourceManager: pass else: pass - return def get_cmap(self, cmapname: str, strict: bool = False) -> CMapBase: try: @@ -234,7 +229,6 @@ class PDFContentParser(PSStackParser[Union[PSKeyword, PDFStream]]): # all the methods that would attempt to access self.fp without first # calling self.fillfp(). PSStackParser.__init__(self, None) # type: ignore[arg-type] - return def fillfp(self) -> None: if not self.fp: @@ -244,12 +238,10 @@ class PDFContentParser(PSStackParser[Union[PSKeyword, PDFStream]]): else: raise PSEOF('Unexpected EOF, file truncated?') self.fp = BytesIO(strm.get_data()) - return def seek(self, pos: int) -> None: self.fillfp() PSStackParser.seek(self, pos) - return def fillbuf(self) -> None: if self.charpos < len(self.buf): @@ -262,7 +254,6 @@ class PDFContentParser(PSStackParser[Union[PSKeyword, PDFStream]]): break self.fp = None # type: ignore[assignment] self.charpos = 0 - return def get_inline_data( self, @@ -300,7 +291,6 @@ class PDFContentParser(PSStackParser[Union[PSKeyword, PDFStream]]): def flush(self) -> None: self.add_results(*self.popall()) - return KEYWORD_BI = KWD(b'BI') KEYWORD_ID = KWD(b'ID') @@ -327,7 +317,6 @@ class PDFContentParser(PSStackParser[Union[PSKeyword, PDFStream]]): raise else: self.push((pos, token)) - return PDFStackT = PSStackType[PDFStream] diff --git a/pdfminer/pdfpage.py b/pdfminer/pdfpage.py index 75f77fd..c822083 100644 --- a/pdfminer/pdfpage.py +++ b/pdfminer/pdfpage.py @@ -1,16 +1,16 @@ import logging -from pdfminer.utils import Rect from typing import BinaryIO, Container, Dict, Iterator, List, Optional, Tuple + +from pdfminer.utils import Rect from . import settings -from .psparser import LIT +from .pdfdocument import PDFDocument, PDFTextExtractionNotAllowed +from .pdfparser import PDFParser from .pdftypes import PDFObjectNotFound -from .pdftypes import resolve1 +from .pdftypes import dict_value from .pdftypes import int_value from .pdftypes import list_value -from .pdftypes import dict_value -from .pdfparser import PDFParser -from .pdfdocument import PDFDocument, PDFTextExtractionNotAllowed - +from .pdftypes import resolve1 +from .psparser import LIT log = logging.getLogger(__name__) @@ -73,7 +73,6 @@ class PDFPage: if not isinstance(contents, list): contents = [contents] self.contents: List[object] = contents - return def __repr__(self) -> str: return ''\ diff --git a/pdfminer/pdfparser.py b/pdfminer/pdfparser.py index 18ad9eb..e0a5a61 100644 --- a/pdfminer/pdfparser.py +++ b/pdfminer/pdfparser.py @@ -1,17 +1,18 @@ import logging from io import BytesIO from typing import BinaryIO, TYPE_CHECKING, Optional, Union -from .psparser import PSStackParser -from .psparser import PSKeyword -from .psparser import PSSyntaxError -from .psparser import PSEOF -from .psparser import KWD + from . import settings from .pdftypes import PDFException -from .pdftypes import PDFStream from .pdftypes import PDFObjRef -from .pdftypes import int_value +from .pdftypes import PDFStream from .pdftypes import dict_value +from .pdftypes import int_value +from .psparser import KWD +from .psparser import PSEOF +from .psparser import PSKeyword +from .psparser import PSStackParser +from .psparser import PSSyntaxError if TYPE_CHECKING: from .pdfdocument import PDFDocument @@ -45,12 +46,10 @@ class PDFParser(PSStackParser[Union[PSKeyword, PDFStream, PDFObjRef, None]]): PSStackParser.__init__(self, fp) self.doc: Optional["PDFDocument"] = None self.fallback = False - return def set_document(self, doc: "PDFDocument") -> None: """Associates the parser with a PDFDocument object.""" self.doc = doc - return KEYWORD_R = KWD(b'R') KEYWORD_NULL = KWD(b'null') @@ -134,8 +133,6 @@ class PDFParser(PSStackParser[Union[PSKeyword, PDFStream, PDFObjRef, None]]): # others self.push((pos, token)) - return - class PDFStreamParser(PDFParser): """ @@ -148,11 +145,9 @@ class PDFStreamParser(PDFParser): def __init__(self, data: bytes) -> None: PDFParser.__init__(self, BytesIO(data)) - return def flush(self) -> None: self.add_results(*self.popall()) - return KEYWORD_OBJ = KWD(b'obj') @@ -176,4 +171,3 @@ class PDFStreamParser(PDFParser): return # others self.push((pos, token)) - return diff --git a/pdfminer/pdftypes.py b/pdfminer/pdftypes.py index b0496e8..037f2ce 100644 --- a/pdfminer/pdftypes.py +++ b/pdfminer/pdftypes.py @@ -1,25 +1,24 @@ -import zlib -import logging import io +import logging import sys +import zlib from typing import (TYPE_CHECKING, Any, Dict, Iterable, Optional, Union, List, Tuple, cast) -from .lzw import lzwdecode +from . import settings from .ascii85 import ascii85decode from .ascii85 import asciihexdecode -from .runlength import rldecode from .ccitt import ccittfaxdecode +from .lzw import lzwdecode +from .psparser import LIT from .psparser import PSException from .psparser import PSObject -from .psparser import LIT -from . import settings +from .runlength import rldecode from .utils import apply_png_predictor if TYPE_CHECKING: from .pdfdocument import PDFDocument - logger = logging.getLogger(__name__) LITERAL_CRYPT = LIT('Crypt') @@ -88,7 +87,6 @@ class PDFObjRef(PDFObject): raise PDFValueError('PDF object id cannot be 0.') self.doc = doc self.objid = objid - return def __repr__(self) -> str: return '' % (self.objid) @@ -255,12 +253,10 @@ class PDFStream(PDFObject): self.data: Optional[bytes] = None self.objid: Optional[int] = None self.genno: Optional[int] = None - return def set_objid(self, objid: int, genno: int) -> None: self.objid = objid self.genno = genno - return def __repr__(self) -> str: if self.data is None: diff --git a/pdfminer/psparser.py b/pdfminer/psparser.py index a05009e..43c457d 100644 --- a/pdfminer/psparser.py +++ b/pdfminer/psparser.py @@ -2,8 +2,8 @@ # -*- coding: utf-8 -*- -import re import logging +import re from typing import (Any, BinaryIO, Dict, Generic, Iterator, List, Optional, Tuple, Type, TypeVar, Union) @@ -76,7 +76,6 @@ class PSKeyword(PSObject): def __init__(self, name: bytes) -> None: self.name = name - return def __repr__(self) -> str: name = self.name @@ -95,7 +94,6 @@ class PSSymbolTable(Generic[_SymbolT]): def __init__(self, klass: Type[_SymbolT]) -> None: self.dict: Dict[PSLiteral.NameType, _SymbolT] = {} self.klass: Type[_SymbolT] = klass - return def intern(self, name: PSLiteral.NameType) -> _SymbolT: if name in self.dict: @@ -182,7 +180,6 @@ class PSBaseParser: def __init__(self, fp: BinaryIO) -> None: self.fp = fp self.seek(0) - return def __repr__(self) -> str: return '<%s: %r, bufpos=%d>' % (self.__class__.__name__, self.fp, diff --git a/pdfminer/utils.py b/pdfminer/utils.py index d483a48..01c5901 100644 --- a/pdfminer/utils.py +++ b/pdfminer/utils.py @@ -4,10 +4,10 @@ Miscellaneous Routines. import io import pathlib import struct +from html import escape from typing import (Any, BinaryIO, Callable, Dict, Generic, Iterable, Iterator, List, Optional, Set, TextIO, Tuple, TypeVar, Union, TYPE_CHECKING, cast) -from html import escape if TYPE_CHECKING: from .layout import LTComponent @@ -57,7 +57,6 @@ class open_filename(object): ) -> None: if self.closing: self.file_handler.close() - return def make_compat_bytes(in_str: str) -> bytes: