diff --git a/pdfminer/arcfour.py b/pdfminer/arcfour.py index dd2697c..94fb65b 100644 --- a/pdfminer/arcfour.py +++ b/pdfminer/arcfour.py @@ -20,7 +20,7 @@ class Arcfour: (s[i], s[j]) = (s[j], s[i]) self.s = s (self.i, self.j) = (0, 0) - return + def process(self, data: bytes) -> bytes: (i, j) = (self.i, self.j) diff --git a/pdfminer/ccitt.py b/pdfminer/ccitt.py index 4dadc81..30ea8c6 100644 --- a/pdfminer/ccitt.py +++ b/pdfminer/ccitt.py @@ -35,7 +35,7 @@ class BitParser: def __init__(self) -> None: self._pos = 0 - return + @classmethod def add(cls, root: BitParserState, v: Union[int, str], bits: str) -> None: @@ -53,13 +53,13 @@ class BitParser: b = 0 assert b is not None p[b] = v - return + def feedbytes(self, data: bytes) -> None: for byte in get_bytes(data): for m in (128, 64, 32, 16, 8, 4, 2, 1): self._parse_bit(byte & m) - return + def _parse_bit(self, x: object) -> None: if x: @@ -72,7 +72,7 @@ class BitParser: else: assert self._accept is not None self._state = self._accept(v) - return + class CCITTG4Parser(BitParser): diff --git a/pdfminer/cmapdb.py b/pdfminer/cmapdb.py index 6974c1c..1976967 100644 --- a/pdfminer/cmapdb.py +++ b/pdfminer/cmapdb.py @@ -43,24 +43,24 @@ class CMapBase: def __init__(self, **kwargs: object) -> None: self.attrs: MutableMapping[str, object] = kwargs.copy() - return + def is_vertical(self) -> bool: return self.attrs.get('WMode', 0) != 0 def set_attr(self, k: str, v: object) -> None: self.attrs[k] = v - return + def add_code2cid(self, code: str, cid: int) -> None: - return + pass def add_cid2unichr(self, cid: int, code: Union[PSLiteral, bytes, int] ) -> None: - return + pass def use_cmap(self, cmap: "CMapBase") -> None: - return + pass def decode(self, code: bytes) -> Iterable[int]: raise NotImplementedError @@ -71,7 +71,7 @@ class CMap(CMapBase): def __init__(self, **kwargs: Union[str, int]) -> None: CMapBase.__init__(self, **kwargs) self.code2cid: Dict[int, object] = {} - return + def __repr__(self) -> str: return '' % self.attrs.get('CMapName') @@ -88,7 +88,7 @@ class CMap(CMapBase): else: dst[k] = v copy(self.code2cid, cmap.code2cid) - return + def decode(self, code: bytes) -> Iterator[int]: log.debug('decode: %r, %r', self, code) @@ -103,7 +103,7 @@ class CMap(CMapBase): d = cast(Dict[int, object], x) else: d = self.code2cid - return + def dump(self, out: TextIO = sys.stdout, code2cid: Optional[Dict[int, object]] = None, @@ -117,7 +117,7 @@ class CMap(CMapBase): out.write('code %r = cid %d\n' % (c, v)) else: self.dump(out=out, code2cid=cast(Dict[int, object], v), code=c) - return + class IdentityCMap(CMapBase): @@ -145,7 +145,7 @@ class UnicodeMap(CMapBase): def __init__(self, **kwargs: Union[str, int]) -> None: CMapBase.__init__(self, **kwargs) self.cid2unichr: Dict[int, str] = {} - return + def __repr__(self) -> str: return '' % self.attrs.get('CMapName') @@ -157,7 +157,7 @@ class UnicodeMap(CMapBase): def dump(self, out: TextIO = sys.stdout) -> None: for (k, v) in sorted(self.cid2unichr.items()): out.write('cid %d = unicode %r\n' % (k, v)) - return + class IdentityUnicodeMap(UnicodeMap): @@ -183,7 +183,7 @@ class FileCMap(CMap): d = t ci = ord(code[-1]) d[ci] = cid - return + class FileUnicodeMap(UnicodeMap): @@ -202,7 +202,7 @@ class FileUnicodeMap(UnicodeMap): self.cid2unichr[cid] = chr(code) else: raise TypeError(code) - return + class PyCMap(CMap): @@ -212,7 +212,7 @@ class PyCMap(CMap): self.code2cid = module.CODE2CID if module.IS_VERTICAL: self.attrs['WMode'] = 1 - return + class PyUnicodeMap(UnicodeMap): @@ -224,7 +224,7 @@ class PyUnicodeMap(UnicodeMap): self.attrs['WMode'] = 1 else: self.cid2unichr = module.CID2UNICHR_H - return + class CMapDB: diff --git a/pdfminer/converter.py b/pdfminer/converter.py index bffbb89..c42a00b 100644 --- a/pdfminer/converter.py +++ b/pdfminer/converter.py @@ -52,7 +52,7 @@ class PDFLayoutAnalyzer(PDFTextDevice): self.pageno = pageno self.laparams = laparams self._stack: List[LTLayoutContainer] = [] - return + def begin_page(self, page: PDFPage, ctm: Matrix) -> None: (x0, y0, x1, y1) = page.mediabox @@ -60,7 +60,7 @@ class PDFLayoutAnalyzer(PDFTextDevice): (x1, y1) = apply_matrix_pt(ctm, (x1, y1)) mediabox = (0, 0, abs(x0-x1), abs(y0-y1)) self.cur_item = LTPage(self.pageno, mediabox) - return + def end_page(self, page: PDFPage) -> None: assert not self._stack, str(len(self._stack)) @@ -69,19 +69,19 @@ class PDFLayoutAnalyzer(PDFTextDevice): self.cur_item.analyze(self.laparams) self.pageno += 1 self.receive_layout(self.cur_item) - return + def begin_figure(self, name: str, bbox: Rect, matrix: Matrix) -> None: self._stack.append(self.cur_item) self.cur_item = LTFigure(name, bbox, mult_matrix(matrix, self.ctm)) - return + def end_figure(self, _: str) -> None: fig = self.cur_item assert isinstance(self.cur_item, LTFigure), str(type(self.cur_item)) self.cur_item = self._stack.pop() self.cur_item.add(fig) - return + def render_image(self, name: str, stream: PDFStream) -> None: assert isinstance(self.cur_item, LTFigure), str(type(self.cur_item)) @@ -89,7 +89,7 @@ class PDFLayoutAnalyzer(PDFTextDevice): (self.cur_item.x0, self.cur_item.y0, self.cur_item.x1, self.cur_item.y1)) self.cur_item.add(item) - return + def paint_path( self, @@ -178,7 +178,7 @@ class PDFLayoutAnalyzer(PDFTextDevice): return '(cid:%d)' % cid def receive_layout(self, ltpage: LTPage) -> None: - return + pass class PDFPageAggregator(PDFLayoutAnalyzer): @@ -191,11 +191,11 @@ class PDFPageAggregator(PDFLayoutAnalyzer): PDFLayoutAnalyzer.__init__(self, rsrcmgr, pageno=pageno, laparams=laparams) self.result: Optional[LTPage] = None - return + def receive_layout(self, ltpage: LTPage) -> None: self.result = ltpage - return + def get_result(self) -> LTPage: assert self.result is not None @@ -254,7 +254,7 @@ class TextConverter(PDFConverter[AnyIO]): laparams=laparams) self.showpageno = showpageno self.imagewriter = imagewriter - return + def write_text(self, text: str) -> None: text = utils.compatible_encode_method(text, self.codec, 'ignore') @@ -262,7 +262,7 @@ class TextConverter(PDFConverter[AnyIO]): cast(BinaryIO, self.outfp).write(text.encode()) else: cast(TextIO, self.outfp).write(text) - return + def receive_layout(self, ltpage: LTPage) -> None: def render(item: LTItem) -> None: @@ -280,7 +280,7 @@ class TextConverter(PDFConverter[AnyIO]): self.write_text('Page %s\n' % ltpage.pageid) render(ltpage) self.write_text('\f') - return + # Some dummy functions to save memory/CPU when all that is wanted # is text. This stops all the image and drawing output from being diff --git a/pdfminer/image.py b/pdfminer/image.py index cfed324..aac75f1 100644 --- a/pdfminer/image.py +++ b/pdfminer/image.py @@ -58,12 +58,12 @@ class BMPWriter: self.fp.write(struct.pack('BBBx', i, i, i)) self.pos0 = self.fp.tell() self.pos1 = self.pos0 + self.datasize - return + def write_line(self, y: int, data: bytes) -> None: self.fp.seek(self.pos1 - (y+1)*self.linesize) self.fp.write(data) - return + class ImageWriter: @@ -76,7 +76,7 @@ class ImageWriter: self.outdir = outdir if not os.path.exists(self.outdir): os.makedirs(self.outdir) - return + def export_image(self, image: LTImage) -> str: (width, height) = image.srcsize diff --git a/pdfminer/layout.py b/pdfminer/layout.py index eff2acc..4db3469 100644 --- a/pdfminer/layout.py +++ b/pdfminer/layout.py @@ -28,7 +28,7 @@ class IndexAssigner: def __init__(self, index: int = 0) -> None: self.index = index - return + def run(self, obj: "LTItem") -> None: if isinstance(obj, LTTextBox): @@ -37,7 +37,7 @@ class IndexAssigner: elif isinstance(obj, LTTextGroup): for x in obj: self.run(x) - return + class LAParams: @@ -87,7 +87,7 @@ class LAParams: self.all_texts = all_texts self._validate() - return + def _validate(self) -> None: if self.boxes_flow is not None: @@ -111,7 +111,7 @@ class LTItem: def analyze(self, laparams: LAParams) -> None: """Perform the layout analysis.""" - return + pass class LTText: @@ -132,7 +132,7 @@ class LTComponent(LTItem): def __init__(self, bbox: Rect) -> None: LTItem.__init__(self) self.set_bbox(bbox) - return + def __repr__(self) -> str: return ('<%s %s>' % @@ -160,7 +160,7 @@ class LTComponent(LTItem): self.width = x1-x0 self.height = y1-y0 self.bbox = bbox - return + def is_empty(self) -> bool: return self.width <= 0 or self.height <= 0 @@ -223,7 +223,7 @@ class LTCurve(LTComponent): self.evenodd = evenodd self.stroking_color = stroking_color self.non_stroking_color = non_stroking_color - return + def get_pts(self) -> str: return ','.join('%.3f,%.3f' % p for p in self.pts) @@ -248,7 +248,7 @@ class LTLine(LTCurve): ) -> None: LTCurve.__init__(self, linewidth, [p0, p1], stroke, fill, evenodd, stroking_color, non_stroking_color) - return + class LTRect(LTCurve): @@ -271,7 +271,7 @@ class LTRect(LTCurve): LTCurve.__init__(self, linewidth, [(x0, y0), (x1, y0), (x1, y1), (x0, y1)], stroke, fill, evenodd, stroking_color, non_stroking_color) - return + class LTImage(LTComponent): @@ -291,7 +291,7 @@ class LTImage(LTComponent): self.colorspace = stream.get_any(('CS', 'ColorSpace')) if not isinstance(self.colorspace, list): self.colorspace = [self.colorspace] - return + def __repr__(self) -> str: return ('<%s(%s) %s %r>' % diff --git a/pdfminer/lzw.py b/pdfminer/lzw.py index 31c085e..bf47d41 100644 --- a/pdfminer/lzw.py +++ b/pdfminer/lzw.py @@ -20,7 +20,7 @@ class LZWDecoder: # NB: self.table stores None only in indices 256 and 257 self.table: Optional[List[Optional[bytes]]] = None self.prevbuf: Optional[bytes] = None - return + def readbits(self, bits: int) -> int: v = 0 @@ -95,7 +95,7 @@ class LZWDecoder: assert self.table is not None logger.debug('nbits=%d, code=%d, output=%r, table=%r' % (self.nbits, code, x, self.table[258:])) - return + def lzwdecode(data: bytes) -> bytes: diff --git a/pdfminer/pdfcolor.py b/pdfminer/pdfcolor.py index df685ed..6715635 100644 --- a/pdfminer/pdfcolor.py +++ b/pdfminer/pdfcolor.py @@ -13,7 +13,7 @@ class PDFColorSpace: def __init__(self, name: str, ncomponents: int) -> None: self.name = name self.ncomponents = ncomponents - return + def __repr__(self) -> str: return '' % \ diff --git a/pdfminer/pdfdevice.py b/pdfminer/pdfdevice.py index 0a37063..231ce82 100644 --- a/pdfminer/pdfdevice.py +++ b/pdfminer/pdfdevice.py @@ -26,7 +26,7 @@ class PDFDevice: def __init__(self, rsrcmgr: "PDFResourceManager") -> None: self.rsrcmgr = rsrcmgr self.ctm: Optional[Matrix] = None - return + def __repr__(self) -> str: return '' @@ -43,40 +43,40 @@ class PDFDevice: self.close() def close(self) -> None: - return + pass def set_ctm(self, ctm: Matrix) -> None: self.ctm = ctm - return + def begin_tag( self, tag: PSLiteral, props: Optional["PDFStackT"] = None ) -> None: - return + pass def end_tag(self) -> None: - return + pass def do_tag( self, tag: PSLiteral, props: Optional["PDFStackT"] = None ) -> None: - return + pass def begin_page(self, page: PDFPage, ctm: Matrix) -> None: - return + pass def end_page(self, page: PDFPage) -> None: - return + pass def begin_figure(self, name: str, bbox: Rect, matrix: Matrix) -> None: - return + pass def end_figure(self, name: str) -> None: - return + pass def paint_path( self, @@ -86,10 +86,10 @@ class PDFDevice: evenodd: bool, path: Sequence[PathSegment] ) -> None: - return + pass def render_image(self, name: str, stream: PDFStream) -> None: - return + pass def render_string( self, @@ -98,7 +98,7 @@ class PDFDevice: ncs: PDFColorSpace, graphicstate: "PDFGraphicState" ) -> None: - return + pass class PDFTextDevice(PDFDevice): @@ -132,7 +132,7 @@ class PDFTextDevice(PDFDevice): seq, matrix, textstate.linematrix, font, fontsize, scaling, charspace, wordspace, rise, dxscale, ncs, graphicstate) - return + def render_string_horizontal( self, @@ -227,7 +227,7 @@ class TagExtractor(PDFDevice): self.codec = codec self.pageno = 0 self._stack: List[PSLiteral] = [] - return + def render_string( self, @@ -252,7 +252,7 @@ class TagExtractor(PDFDevice): except PDFUnicodeNotDefined: pass self._write(utils.enc(text)) - return + def begin_page(self, page: PDFPage, ctm: Matrix) -> None: output = '' %\ diff --git a/pdfminer/pdfdocument.py b/pdfminer/pdfdocument.py index f0102ef..582f361 100644 --- a/pdfminer/pdfdocument.py +++ b/pdfminer/pdfdocument.py @@ -100,7 +100,7 @@ class PDFXRef(PDFBaseXRef): def __init__(self) -> None: self.offsets: Dict[int, Tuple[Optional[int], int, int]] = {} self.trailer: Dict[str, Any] = {} - return + def __repr__(self) -> str: return '' % (self.offsets.keys()) @@ -145,7 +145,7 @@ class PDFXRef(PDFBaseXRef): self.offsets[objid] = (None, int(pos_b), int(genno_b)) log.info('xref objects: %r', self.offsets) self.load_trailer(parser) - return + def load_trailer(self, parser: PDFParser) -> None: try: @@ -159,7 +159,7 @@ class PDFXRef(PDFBaseXRef): (_, dic) = x[0] self.trailer.update(dict_value(dic)) log.debug('trailer=%r', self.trailer) - return + def get_trailer(self) -> Dict[str, Any]: return self.trailer @@ -225,7 +225,7 @@ class PDFXRefFallback(PDFXRef): for index in range(n): objid1 = objs[index*2] self.offsets[objid1] = (objid, index, 0) - return + class PDFXRefStream(PDFBaseXRef): @@ -237,7 +237,7 @@ class PDFXRefStream(PDFBaseXRef): self.fl2: Optional[int] = None self.fl3: Optional[int] = None self.ranges: List[Tuple[int, int]] = [] - return + def __repr__(self) -> str: return '' % (self.ranges) diff --git a/pdfminer/pdfinterp.py b/pdfminer/pdfinterp.py index 7b03729..e70e11e 100644 --- a/pdfminer/pdfinterp.py +++ b/pdfminer/pdfinterp.py @@ -73,7 +73,7 @@ class PDFTextState: self.reset() # self.matrix is set # self.linematrix is set - return + def __repr__(self) -> str: return ' None: self.matrix = MATRIX_IDENTITY self.linematrix = (0, 0) - return + Color = Union[ @@ -125,7 +125,7 @@ class PDFGraphicState: # non stroking color self.ncolor: Optional[Color] = None - return + def copy(self) -> "PDFGraphicState": obj = PDFGraphicState() @@ -160,7 +160,7 @@ class PDFResourceManager: def __init__(self, caching: bool = True) -> None: self.caching = caching self._cached_fonts: Dict[object, PDFFont] = {} - return + def get_procset(self, procs: Sequence[object]) -> None: for proc in procs: @@ -170,7 +170,7 @@ class PDFResourceManager: pass else: pass - return + def get_cmap(self, cmapname: str, strict: bool = False) -> CMapBase: try: @@ -234,7 +234,7 @@ class PDFContentParser(PSStackParser[Union[PSKeyword, PDFStream]]): # all the methods that would attempt to access self.fp without first # calling self.fillfp(). PSStackParser.__init__(self, None) # type: ignore[arg-type] - return + def fillfp(self) -> None: if not self.fp: @@ -244,12 +244,12 @@ class PDFContentParser(PSStackParser[Union[PSKeyword, PDFStream]]): else: raise PSEOF('Unexpected EOF, file truncated?') self.fp = BytesIO(strm.get_data()) - return + def seek(self, pos: int) -> None: self.fillfp() PSStackParser.seek(self, pos) - return + def fillbuf(self) -> None: if self.charpos < len(self.buf): @@ -262,7 +262,7 @@ class PDFContentParser(PSStackParser[Union[PSKeyword, PDFStream]]): break self.fp = None # type: ignore[assignment] self.charpos = 0 - return + def get_inline_data( self, @@ -300,7 +300,7 @@ class PDFContentParser(PSStackParser[Union[PSKeyword, PDFStream]]): def flush(self) -> None: self.add_results(*self.popall()) - return + KEYWORD_BI = KWD(b'BI') KEYWORD_ID = KWD(b'ID') @@ -327,7 +327,7 @@ class PDFContentParser(PSStackParser[Union[PSKeyword, PDFStream]]): raise else: self.push((pos, token)) - return + PDFStackT = PSStackType[PDFStream] diff --git a/pdfminer/pdfpage.py b/pdfminer/pdfpage.py index 75f77fd..9a5af39 100644 --- a/pdfminer/pdfpage.py +++ b/pdfminer/pdfpage.py @@ -73,7 +73,7 @@ class PDFPage: if not isinstance(contents, list): contents = [contents] self.contents: List[object] = contents - return + def __repr__(self) -> str: return ''\ diff --git a/pdfminer/pdfparser.py b/pdfminer/pdfparser.py index 18ad9eb..c17ed1c 100644 --- a/pdfminer/pdfparser.py +++ b/pdfminer/pdfparser.py @@ -45,12 +45,12 @@ class PDFParser(PSStackParser[Union[PSKeyword, PDFStream, PDFObjRef, None]]): PSStackParser.__init__(self, fp) self.doc: Optional["PDFDocument"] = None self.fallback = False - return + def set_document(self, doc: "PDFDocument") -> None: """Associates the parser with a PDFDocument object.""" self.doc = doc - return + KEYWORD_R = KWD(b'R') KEYWORD_NULL = KWD(b'null') @@ -134,7 +134,7 @@ class PDFParser(PSStackParser[Union[PSKeyword, PDFStream, PDFObjRef, None]]): # others self.push((pos, token)) - return + class PDFStreamParser(PDFParser): @@ -148,11 +148,11 @@ class PDFStreamParser(PDFParser): def __init__(self, data: bytes) -> None: PDFParser.__init__(self, BytesIO(data)) - return + def flush(self) -> None: self.add_results(*self.popall()) - return + KEYWORD_OBJ = KWD(b'obj') @@ -176,4 +176,4 @@ class PDFStreamParser(PDFParser): return # others self.push((pos, token)) - return + diff --git a/pdfminer/pdftypes.py b/pdfminer/pdftypes.py index b0496e8..ac856bf 100644 --- a/pdfminer/pdftypes.py +++ b/pdfminer/pdftypes.py @@ -88,7 +88,7 @@ class PDFObjRef(PDFObject): raise PDFValueError('PDF object id cannot be 0.') self.doc = doc self.objid = objid - return + def __repr__(self) -> str: return '' % (self.objid) @@ -255,12 +255,12 @@ class PDFStream(PDFObject): self.data: Optional[bytes] = None self.objid: Optional[int] = None self.genno: Optional[int] = None - return + def set_objid(self, objid: int, genno: int) -> None: self.objid = objid self.genno = genno - return + def __repr__(self) -> str: if self.data is None: diff --git a/pdfminer/psparser.py b/pdfminer/psparser.py index a05009e..017457a 100644 --- a/pdfminer/psparser.py +++ b/pdfminer/psparser.py @@ -76,7 +76,7 @@ class PSKeyword(PSObject): def __init__(self, name: bytes) -> None: self.name = name - return + def __repr__(self) -> str: name = self.name @@ -95,7 +95,7 @@ class PSSymbolTable(Generic[_SymbolT]): def __init__(self, klass: Type[_SymbolT]) -> None: self.dict: Dict[PSLiteral.NameType, _SymbolT] = {} self.klass: Type[_SymbolT] = klass - return + def intern(self, name: PSLiteral.NameType) -> _SymbolT: if name in self.dict: @@ -182,7 +182,7 @@ class PSBaseParser: def __init__(self, fp: BinaryIO) -> None: self.fp = fp self.seek(0) - return + def __repr__(self) -> str: return '<%s: %r, bufpos=%d>' % (self.__class__.__name__, self.fp, diff --git a/pdfminer/utils.py b/pdfminer/utils.py index d483a48..f9e9538 100644 --- a/pdfminer/utils.py +++ b/pdfminer/utils.py @@ -57,7 +57,7 @@ class open_filename(object): ) -> None: if self.closing: self.file_handler.close() - return + def make_compat_bytes(in_str: str) -> bytes: