Fix to set color space from color convenience ops (#794)

Section 4.5 of the PDF reference says: "Color values are interpreted
according to the current color space, another parameter of the graphics
state. A PDF content stream first selects a color space by invoking the
CS operator (for the stroking color) or the cs operator (for the
non-stroking color). It then selects color values within that color
space with the SC operator (stroking) or the sc operator (nonstroking).
There are also convenience operators—G, g, RG, rg, K, and k—that select
both a color space and a color value within it in a single step."

Previously, those convenience operators did *not* set the color space.
This commit, following on filed issue #779, fixes this. It also adds a
test to demonstrate that, at least for the do_rg method, the fix works
as intended.
pull/806/merge
Jeremy Singer-Vine 2022-08-18 14:38:51 -04:00 committed by GitHub
parent ca9f75a032
commit ad6587c697
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 34 additions and 1 deletions

View File

@ -18,6 +18,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
- `TypeError` when getting default width of font ([#720](https://github.com/pdfminer/pdfminer.six/issues/720)) - `TypeError` when getting default width of font ([#720](https://github.com/pdfminer/pdfminer.six/issues/720))
- Installing typing-extensions on Python 3.6 and 3.7 ([#775](https://github.com/pdfminer/pdfminer.six/pull/775)) - Installing typing-extensions on Python 3.6 and 3.7 ([#775](https://github.com/pdfminer/pdfminer.six/pull/775))
- `TypeError` in cmapdb.py when parsing null characters ([#768](https://github.com/pdfminer/pdfminer.six/pull/768)) - `TypeError` in cmapdb.py when parsing null characters ([#768](https://github.com/pdfminer/pdfminer.six/pull/768))
- Color "convenience operators" now (per spec) also set color space ([#779](https://github.com/pdfminer/pdfminer.six/issues/779))
### Deprecated ### Deprecated

View File

@ -652,21 +652,25 @@ class PDFPageInterpreter:
def do_G(self, gray: PDFStackT) -> None: def do_G(self, gray: PDFStackT) -> None:
"""Set gray level for stroking operations""" """Set gray level for stroking operations"""
self.graphicstate.scolor = cast(float, gray) self.graphicstate.scolor = cast(float, gray)
self.scs = self.csmap["DeviceGray"]
return return
def do_g(self, gray: PDFStackT) -> None: def do_g(self, gray: PDFStackT) -> None:
"""Set gray level for nonstroking operations""" """Set gray level for nonstroking operations"""
self.graphicstate.ncolor = cast(float, gray) self.graphicstate.ncolor = cast(float, gray)
self.ncs = self.csmap["DeviceGray"]
return return
def do_RG(self, r: PDFStackT, g: PDFStackT, b: PDFStackT) -> None: def do_RG(self, r: PDFStackT, g: PDFStackT, b: PDFStackT) -> None:
"""Set RGB color for stroking operations""" """Set RGB color for stroking operations"""
self.graphicstate.scolor = (cast(float, r), cast(float, g), cast(float, b)) self.graphicstate.scolor = (cast(float, r), cast(float, g), cast(float, b))
self.scs = self.csmap["DeviceRGB"]
return return
def do_rg(self, r: PDFStackT, g: PDFStackT, b: PDFStackT) -> None: def do_rg(self, r: PDFStackT, g: PDFStackT, b: PDFStackT) -> None:
"""Set RGB color for nonstroking operations""" """Set RGB color for nonstroking operations"""
self.graphicstate.ncolor = (cast(float, r), cast(float, g), cast(float, b)) self.graphicstate.ncolor = (cast(float, r), cast(float, g), cast(float, b))
self.ncs = self.csmap["DeviceRGB"]
return return
def do_K(self, c: PDFStackT, m: PDFStackT, y: PDFStackT, k: PDFStackT) -> None: def do_K(self, c: PDFStackT, m: PDFStackT, y: PDFStackT, k: PDFStackT) -> None:
@ -677,6 +681,7 @@ class PDFPageInterpreter:
cast(float, y), cast(float, y),
cast(float, k), cast(float, k),
) )
self.scs = self.csmap["DeviceCMYK"]
return return
def do_k(self, c: PDFStackT, m: PDFStackT, y: PDFStackT, k: PDFStackT) -> None: def do_k(self, c: PDFStackT, m: PDFStackT, y: PDFStackT, k: PDFStackT) -> None:
@ -687,6 +692,7 @@ class PDFPageInterpreter:
cast(float, y), cast(float, y),
cast(float, k), cast(float, k),
) )
self.ncs = self.csmap["DeviceCMYK"]
return return
def do_SCN(self) -> None: def do_SCN(self) -> None:

View File

@ -1,9 +1,10 @@
import io import io
from tempfile import TemporaryFile from tempfile import TemporaryFile
from helpers import absolute_sample_path
from pdfminer.converter import PDFLayoutAnalyzer, PDFConverter from pdfminer.converter import PDFLayoutAnalyzer, PDFConverter
from pdfminer.high_level import extract_pages from pdfminer.high_level import extract_pages
from pdfminer.layout import LTContainer, LTRect, LTLine, LTCurve from pdfminer.layout import LTChar, LTContainer, LTRect, LTLine, LTCurve
from pdfminer.pdfinterp import PDFGraphicState from pdfminer.pdfinterp import PDFGraphicState
@ -225,6 +226,31 @@ class TestPaintPath:
assert len(analyzer.cur_item._objs) == 0 assert len(analyzer.cur_item._objs) == 0
def get_chars(el):
if isinstance(el, LTContainer):
for item in el:
yield from get_chars(item)
elif isinstance(el, LTChar):
yield el
else:
pass
class TestColorSpace:
def test_do_rg(self):
path = absolute_sample_path("contrib/issue-00352-hash-twos-complement.pdf")
for page in extract_pages(path):
for char in get_chars(page):
cs = char.ncs.name
color = char.graphicstate.ncolor
if cs == "DeviceGray":
assert isinstance(color, (float, int))
elif cs == "DeviceRGB":
assert len(color) == 3
elif cs == "DeviceCMYK":
assert len(color) == 4
class TestBinaryDetector: class TestBinaryDetector:
def test_stringio(self): def test_stringio(self):
assert not PDFConverter._is_binary_stream(io.StringIO()) assert not PDFConverter._is_binary_stream(io.StringIO())