From ad6587c6970c4efeeb7196bf3e038603c1f77e82 Mon Sep 17 00:00:00 2001 From: Jeremy Singer-Vine Date: Thu, 18 Aug 2022 14:38:51 -0400 Subject: [PATCH] Fix to set color space from color convenience ops (#794) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Section 4.5 of the PDF reference says: "Color values are interpreted according to the current color space, another parameter of the graphics state. A PDF content stream first selects a color space by invoking the CS operator (for the stroking color) or the cs operator (for the non-stroking color). It then selects color values within that color space with the SC operator (stroking) or the sc operator (nonstroking). There are also convenience operators—G, g, RG, rg, K, and k—that select both a color space and a color value within it in a single step." Previously, those convenience operators did *not* set the color space. This commit, following on filed issue #779, fixes this. It also adds a test to demonstrate that, at least for the do_rg method, the fix works as intended. --- CHANGELOG.md | 1 + pdfminer/pdfinterp.py | 6 ++++++ tests/test_converter.py | 28 +++++++++++++++++++++++++++- 3 files changed, 34 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f75e6a2..dfe31f1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - `TypeError` when getting default width of font ([#720](https://github.com/pdfminer/pdfminer.six/issues/720)) - Installing typing-extensions on Python 3.6 and 3.7 ([#775](https://github.com/pdfminer/pdfminer.six/pull/775)) - `TypeError` in cmapdb.py when parsing null characters ([#768](https://github.com/pdfminer/pdfminer.six/pull/768)) +- Color "convenience operators" now (per spec) also set color space ([#779](https://github.com/pdfminer/pdfminer.six/issues/779)) ### Deprecated diff --git a/pdfminer/pdfinterp.py b/pdfminer/pdfinterp.py index bc049b3..c1a8597 100644 --- a/pdfminer/pdfinterp.py +++ b/pdfminer/pdfinterp.py @@ -652,21 +652,25 @@ class PDFPageInterpreter: def do_G(self, gray: PDFStackT) -> None: """Set gray level for stroking operations""" self.graphicstate.scolor = cast(float, gray) + self.scs = self.csmap["DeviceGray"] return def do_g(self, gray: PDFStackT) -> None: """Set gray level for nonstroking operations""" self.graphicstate.ncolor = cast(float, gray) + self.ncs = self.csmap["DeviceGray"] return def do_RG(self, r: PDFStackT, g: PDFStackT, b: PDFStackT) -> None: """Set RGB color for stroking operations""" self.graphicstate.scolor = (cast(float, r), cast(float, g), cast(float, b)) + self.scs = self.csmap["DeviceRGB"] return def do_rg(self, r: PDFStackT, g: PDFStackT, b: PDFStackT) -> None: """Set RGB color for nonstroking operations""" self.graphicstate.ncolor = (cast(float, r), cast(float, g), cast(float, b)) + self.ncs = self.csmap["DeviceRGB"] return def do_K(self, c: PDFStackT, m: PDFStackT, y: PDFStackT, k: PDFStackT) -> None: @@ -677,6 +681,7 @@ class PDFPageInterpreter: cast(float, y), cast(float, k), ) + self.scs = self.csmap["DeviceCMYK"] return def do_k(self, c: PDFStackT, m: PDFStackT, y: PDFStackT, k: PDFStackT) -> None: @@ -687,6 +692,7 @@ class PDFPageInterpreter: cast(float, y), cast(float, k), ) + self.ncs = self.csmap["DeviceCMYK"] return def do_SCN(self) -> None: diff --git a/tests/test_converter.py b/tests/test_converter.py index bae442f..80de019 100644 --- a/tests/test_converter.py +++ b/tests/test_converter.py @@ -1,9 +1,10 @@ import io from tempfile import TemporaryFile +from helpers import absolute_sample_path from pdfminer.converter import PDFLayoutAnalyzer, PDFConverter from pdfminer.high_level import extract_pages -from pdfminer.layout import LTContainer, LTRect, LTLine, LTCurve +from pdfminer.layout import LTChar, LTContainer, LTRect, LTLine, LTCurve from pdfminer.pdfinterp import PDFGraphicState @@ -225,6 +226,31 @@ class TestPaintPath: assert len(analyzer.cur_item._objs) == 0 +def get_chars(el): + if isinstance(el, LTContainer): + for item in el: + yield from get_chars(item) + elif isinstance(el, LTChar): + yield el + else: + pass + + +class TestColorSpace: + def test_do_rg(self): + path = absolute_sample_path("contrib/issue-00352-hash-twos-complement.pdf") + for page in extract_pages(path): + for char in get_chars(page): + cs = char.ncs.name + color = char.graphicstate.ncolor + if cs == "DeviceGray": + assert isinstance(color, (float, int)) + elif cs == "DeviceRGB": + assert len(color) == 3 + elif cs == "DeviceCMYK": + assert len(color) == 4 + + class TestBinaryDetector: def test_stringio(self): assert not PDFConverter._is_binary_stream(io.StringIO())