From 5cbdd915c7ea64fa7949518618d9d66eb186ae44 Mon Sep 17 00:00:00 2001 From: speedplane Date: Thu, 11 Dec 2014 00:53:33 -0500 Subject: [PATCH 01/10] Remove the dependancy on python2. Also, allow tests to be run on cygwin by checking for it, and converting unix2dos line endings. --- samples/Makefile | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/samples/Makefile b/samples/Makefile index d1b06c4..7984c37 100644 --- a/samples/Makefile +++ b/samples/Makefile @@ -1,9 +1,15 @@ # GNUMakefile for test +UNAME_S := $(shell uname -o) +CNVTXT=: +ifeq ($(UNAME_S),Cygwin) + CNVTXT=unix2dos +endif + RM=rm -f CMP=: ECHO=echo -PYTHON=python2 +PYTHON=python PDF2TXT=PYTHONPATH=.. $(PYTHON) ../tools/pdf2txt.py -p1 -V @@ -40,10 +46,13 @@ tests: for i in $(TESTS); do \ $(ECHO) $$i; \ $(PDF2TXT) -t html -o $$i.html $$i.pdf || exit 1; \ + $(CNVTXT) $$i.html || exit 1; \ $(CMP) $$i.html $$i.html.ref || exit 1; \ $(PDF2TXT) -t xml -o $$i.xml $$i.pdf || exit 1; \ + $(CNVTXT) $$i.xml || exit 1; \ $(CMP) $$i.xml $$i.xml.ref || exit 1; \ - $(PDF2TXT) -t text -o $$i.txt $$i.pdf || eixt 1; \ + $(PDF2TXT) -t text -o $$i.txt $$i.pdf || exit 1; \ + $(CNVTXT) $$i.txt || exit 1; \ $(CMP) $$i.txt $$i.txt.ref || exit 1; \ done From 45170e718396502c45d13cb44bfde2b6f95e576f Mon Sep 17 00:00:00 2001 From: speedplane Date: Tue, 11 Nov 2014 23:49:13 -0500 Subject: [PATCH 02/10] There are a number of relatively complex changes here. Comments are in order of where the change appears. 1. When detecting text in a horizontal line, we already add a space between words if separated by more than word_margin apart. However now, we only do it if there is not already an existing space. This prevents multiple spaces being placed between words. 2. Detect a horizontal line if the line is zero width. This improves our detection of horizonal lines when looking for both horizontal and vertical. 3. Don't detect a vertical line if the previous letter is whitspace. Prevents double spaces being caught as vert lines. 4. Improve upon an unfortunate O(N^2) algorithm which I have seen taking many minutes to execute. Unfortunately, while the "fix" reduces algorithmic complexity, it isn't technically correct, so we only do it when we know things will take a long time. --- pdfminer/layout.py | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/pdfminer/layout.py b/pdfminer/layout.py index 32b706f..b2dde8c 100644 --- a/pdfminer/layout.py +++ b/pdfminer/layout.py @@ -368,9 +368,17 @@ class LTTextLineHorizontal(LTTextLine): def add(self, obj): if isinstance(obj, LTChar) and self.word_margin: + # Add a space between words if separated by more than word_margin + # apart. Use the max of obj width and height so narrow letters + # (i, l) are treated like wider letters, reducing extra spaces margin = self.word_margin * max(obj.width, obj.height) if self._x1 < obj.x0-margin: - LTContainer.add(self, LTAnno(' ')) + # But only do it if there is not already a space. + last_was_alpha = self._objs and \ + isinstance(self._objs[-1], LTChar) and \ + self._objs[-1]._text == ' ' + if not last_was_alpha: + LTContainer.add(self, LTAnno(' ')) self._x1 = obj.x1 LTTextLine.add(self, obj) return @@ -513,7 +521,9 @@ class LTLayoutContainer(LTContainer): (min(obj0.height, obj1.height) * laparams.line_overlap < obj0.voverlap(obj1)) and (obj0.hdistance(obj1) < - max(obj0.width, obj1.width) * laparams.char_margin)) + max(obj0.width, obj1.width) * laparams.char_margin) or + # If the line is zero width, default to horizontal + (max(obj0.width, obj1.width) == 0 and obj1.x0 >= obj0.x0)) # valign: obj0 and obj1 is vertically aligned. # @@ -535,7 +545,10 @@ class LTLayoutContainer(LTContainer): (min(obj0.width, obj1.width) * laparams.line_overlap < obj0.hoverlap(obj1)) and (obj0.vdistance(obj1) < - max(obj0.height, obj1.height) * laparams.char_margin)) + max(obj0.height, obj1.height) * laparams.char_margin) and + # Don't start a vertical line if the previous letter is + # whitspace. Prevents double spaces being caught as vert lines. + (line or obj0._text.strip())) if ((halign and isinstance(line, LTTextLineHorizontal)) or (valign and isinstance(line, LTTextLineVertical))): @@ -631,8 +644,19 @@ class LTLayoutContainer(LTContainer): (c,d,_,_) = t return (c,d) - # XXX this still takes O(n^2) :( + # The algorithm below still takes O(n^2) :( + # For now, if we have many boxes, split them into two and perform them + # separately. This will cause bugs, but will prevent hanging. + if len(boxes) > 100: + boxes = sorted(boxes, key=lambda obj: obj.y0) + # Divide in two, then perform grouping + # print "Making Recursive Call %d"%len(boxes) + bot_boxes = self.group_textboxes(laparams, boxes[:len(boxes)/2]) + top_boxes = self.group_textboxes(laparams, boxes[len(boxes)/2:]) + boxes = bot_boxes + top_boxes + dists = [] + # Calculate the distance between each box for i in xrange(len(boxes)): obj1 = boxes[i] for j in xrange(i+1, len(boxes)): @@ -642,16 +666,20 @@ class LTLayoutContainer(LTContainer): dists = csort(dists, key=key_obj) plane = Plane(self.bbox) plane.extend(boxes) + # Start with the two closest objects while dists: (c, d, obj1, obj2) = dists.pop(0) + # If there are any objects in between, then skip these two if c == 0 and isany(obj1, obj2): dists.append((1, d, obj1, obj2)) continue + # Group these two closest objects if (isinstance(obj1, (LTTextBoxVertical, LTTextGroupTBRL)) or isinstance(obj2, (LTTextBoxVertical, LTTextGroupTBRL))): group = LTTextGroupTBRL([obj1, obj2]) else: group = LTTextGroupLRTB([obj1, obj2]) + # Remove the two individual objects plane.remove(obj1) plane.remove(obj2) dists = [ (c,d,obj1,obj2) for (c,d,obj1,obj2) in dists From 806ee603ff2b3edec0f7749fa5ee2e4636d10e47 Mon Sep 17 00:00:00 2001 From: speedplane Date: Fri, 12 Dec 2014 00:35:38 -0500 Subject: [PATCH 03/10] More fixes to layout. The compute neighbors function for horizontal lines is only intended to find neighbors on differing lines. However, it's entirely possible that horizontal neighbors could appear. This commit finds horizontal neighbors in a horizonal line and merges them together into a single horizontal line if necessary. This leads to much better text extraction if the PDF was created in a funky way. For example (test case coming), I have seen PDFs which are written almost like vertical columns, but the text is entirely horizontal. --- pdfminer/layout.py | 62 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 55 insertions(+), 7 deletions(-) diff --git a/pdfminer/layout.py b/pdfminer/layout.py index b2dde8c..3d52859 100644 --- a/pdfminer/layout.py +++ b/pdfminer/layout.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +import logging from .utils import INF from .utils import Plane from .utils import get_bound @@ -355,6 +356,9 @@ class LTTextLine(LTTextContainer): LTContainer.add(self, LTAnno('\n')) return + def group_line_neighbors(self, objs, ratio): + return objs + def find_neighbors(self, plane, ratio): raise NotImplementedError @@ -383,14 +387,52 @@ class LTTextLineHorizontal(LTTextLine): LTTextLine.add(self, obj) return + def is_neighbor(self, obj, d, same_line = False): + # Horizontal lines can only connect with horizontal lines + if not isinstance(obj, LTTextLineHorizontal): + return False + # Ensure they are vertically close + if abs(obj.height-self.height) >= d: + return False + # Ensure that they have similar start or stop x positions + if not (abs(obj.x0-self.x0) < d or + abs(obj.x1-self.x1) < d or + # Or that they intersect eachother horizontally. + (obj.x0 < self.x0 and obj.x1 > self.x0) or + (obj.x0 > self.x0 and obj.x0 < self.x1)): + return False + if same_line and not ( + # Ensure they have similar + (obj.y0 == self.y0) or (obj.y1 == self.y1) or + (obj.y0 < self.y0 and obj.y0 > self.y1) or + (obj.y0 > self.y0 and obj.y1 < self.y0)): + return False + return True + + def group_line_neighbors(self, objs, ratio): + ''' + Given a set of objects that may or may not be on the same line as this, + add the objects that are on the same line. + + Return the objects that are not on the same line. + ''' + d = ratio*self.height + other_lines = [] + for o in objs: + if o == self: + other_lines.append(o) + elif self.is_neighbor(o, d, same_line=True): + [self.add(oc) for oc in o] + # Clear out the old line + o._objs = [] + else: + other_lines.append(o) + return other_lines + def find_neighbors(self, plane, ratio): d = ratio*self.height objs = plane.find((self.x0, self.y0-d, self.x1, self.y1+d)) - return [obj for obj in objs - if (isinstance(obj, LTTextLineHorizontal) and - abs(obj.height-self.height) < d and - (abs(obj.x0-self.x0) < d or - abs(obj.x1-self.x1) < d))] + return [o for o in objs if self.is_neighbor(o, d)] class LTTextLineVertical(LTTextLine): @@ -577,19 +619,25 @@ class LTLayoutContainer(LTContainer): yield line return - # group_textlines: group neighboring lines to textboxes. + # group_textlines: group neighbouring lines to textboxes. def group_textlines(self, laparams, lines): plane = Plane(self.bbox) plane.extend(lines) boxes = {} + # for line in plane: + # print "line", ("".join([s._text for s in line])).encode('ascii', 'ignore') for line in lines: neighbors = line.find_neighbors(plane, laparams.line_margin) - if line not in neighbors: continue + if line not in neighbors: + logging.error("Line cannot find itself: %s"%line) + continue + neighbors = line.group_line_neighbors(neighbors, laparams.line_margin) members = [] for obj1 in neighbors: members.append(obj1) if obj1 in boxes: members.extend(boxes.pop(obj1)) + # print "members: ", ["".join([o._text for o in line]) for line in members] if isinstance(line, LTTextLineHorizontal): box = LTTextBoxHorizontal() else: From 2199c254930c900790512cfbd77cca8a23a9c633 Mon Sep 17 00:00:00 2001 From: speedplane Date: Fri, 12 Dec 2014 00:29:57 -0500 Subject: [PATCH 04/10] Add my own .gitignore. --- .gitignore | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..db4561e --- /dev/null +++ b/.gitignore @@ -0,0 +1,54 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.cache +nosetests.xml +coverage.xml + +# Translations +*.mo +*.pot + +# Django stuff: +*.log + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ From 69afd3dd30ef6c8c6ace1ad8fe653763bbb002bd Mon Sep 17 00:00:00 2001 From: speedplane Date: Sun, 14 Dec 2014 01:23:44 -0500 Subject: [PATCH 05/10] Use a .gitignore file. --- .gitignore | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/.gitignore b/.gitignore index db4561e..c132931 100644 --- a/.gitignore +++ b/.gitignore @@ -52,3 +52,58 @@ docs/_build/ # PyBuilder target/ +======= +# Intermediate documents +*.xps + +# Password and Key Files +*.pem +*.p12 + +# Compiled source # +################### +*.pyc +*.com +*.class +*.dll +*.exe +*.o +*.so + +# Mecurial Files? +*.i +*.d +*.mo +*.hg/ + +# Python data files # +*.shelf +*.shelve + +# Don't track these files, they are output from scripts + +# Subversion files +*.svn-base +all-wcprops +entries + +# Logs and databases # +###################### +bulkloader-log-* +*.log +*.sql +*.sql3 +*.sql3-journal +*.sqlite + +# OS generated files # +###################### +.DS_Store? +ehthumbs.db +Icon? +Thumbs.db + +# tmp files # +############# +~$*.doc +~WRL*.tmp \ No newline at end of file From 56094183514b9772f67bb1eadd96f539b56cdf4c Mon Sep 17 00:00:00 2001 From: speedplane Date: Sun, 14 Dec 2014 01:29:39 -0500 Subject: [PATCH 06/10] Add gz to gitignore. --- .gitignore | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index c132931..c81d6cf 100644 --- a/.gitignore +++ b/.gitignore @@ -106,4 +106,6 @@ Thumbs.db # tmp files # ############# ~$*.doc -~WRL*.tmp \ No newline at end of file +~WRL*.tmp + +*.gz \ No newline at end of file From b0b8818a41652bc54c13d4643649c04f5a353f0e Mon Sep 17 00:00:00 2001 From: speedplane Date: Mon, 13 Jun 2016 23:35:11 -0400 Subject: [PATCH 07/10] Fix a bug with pdfminer which occurs when two or more filters are applied to a stream, even though no parameters are specified. The code would previously drop all of the streams after the first due to misapplication of the zip function. --- pdfminer/pdftypes.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pdfminer/pdftypes.py b/pdfminer/pdftypes.py index 10c0777..af1435b 100644 --- a/pdfminer/pdftypes.py +++ b/pdfminer/pdftypes.py @@ -223,8 +223,13 @@ class PDFStream(PDFObject): return [] if not isinstance(filters, list): filters = [filters] - if not isinstance(params, list): + if not params: + # Make sure the parameters list is the same as filters. + params = [{}]*len(filters) + elif not isinstance(params, list): params = [params] + if STRICT and len(params) != len(filters): + raise PDFException("Parameters len filter mismatch") return zip(filters, params) def decode(self): From 2049462f6ff2f2be1efc4c306fb7947d16c173a1 Mon Sep 17 00:00:00 2001 From: speedplane Date: Mon, 13 Jun 2016 23:42:21 -0400 Subject: [PATCH 08/10] Revert changes unrelated to this branch. --- .gitignore | 111 --------------------------------------------- pdfminer/layout.py | 100 +++++----------------------------------- 2 files changed, 12 insertions(+), 199 deletions(-) delete mode 100644 .gitignore diff --git a/.gitignore b/.gitignore deleted file mode 100644 index c81d6cf..0000000 --- a/.gitignore +++ /dev/null @@ -1,111 +0,0 @@ -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] - -# C extensions -*.so - -# Distribution / packaging -.Python -env/ -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -*.egg-info/ -.installed.cfg -*.egg - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.coverage -.cache -nosetests.xml -coverage.xml - -# Translations -*.mo -*.pot - -# Django stuff: -*.log - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ -======= -# Intermediate documents -*.xps - -# Password and Key Files -*.pem -*.p12 - -# Compiled source # -################### -*.pyc -*.com -*.class -*.dll -*.exe -*.o -*.so - -# Mecurial Files? -*.i -*.d -*.mo -*.hg/ - -# Python data files # -*.shelf -*.shelve - -# Don't track these files, they are output from scripts - -# Subversion files -*.svn-base -all-wcprops -entries - -# Logs and databases # -###################### -bulkloader-log-* -*.log -*.sql -*.sql3 -*.sql3-journal -*.sqlite - -# OS generated files # -###################### -.DS_Store? -ehthumbs.db -Icon? -Thumbs.db - -# tmp files # -############# -~$*.doc -~WRL*.tmp - -*.gz \ No newline at end of file diff --git a/pdfminer/layout.py b/pdfminer/layout.py index 3d52859..9426ad3 100644 --- a/pdfminer/layout.py +++ b/pdfminer/layout.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -import logging from .utils import INF from .utils import Plane from .utils import get_bound @@ -356,9 +355,6 @@ class LTTextLine(LTTextContainer): LTContainer.add(self, LTAnno('\n')) return - def group_line_neighbors(self, objs, ratio): - return objs - def find_neighbors(self, plane, ratio): raise NotImplementedError @@ -372,67 +368,21 @@ class LTTextLineHorizontal(LTTextLine): def add(self, obj): if isinstance(obj, LTChar) and self.word_margin: - # Add a space between words if separated by more than word_margin - # apart. Use the max of obj width and height so narrow letters - # (i, l) are treated like wider letters, reducing extra spaces margin = self.word_margin * max(obj.width, obj.height) if self._x1 < obj.x0-margin: - # But only do it if there is not already a space. - last_was_alpha = self._objs and \ - isinstance(self._objs[-1], LTChar) and \ - self._objs[-1]._text == ' ' - if not last_was_alpha: - LTContainer.add(self, LTAnno(' ')) + LTContainer.add(self, LTAnno(' ')) self._x1 = obj.x1 LTTextLine.add(self, obj) return - def is_neighbor(self, obj, d, same_line = False): - # Horizontal lines can only connect with horizontal lines - if not isinstance(obj, LTTextLineHorizontal): - return False - # Ensure they are vertically close - if abs(obj.height-self.height) >= d: - return False - # Ensure that they have similar start or stop x positions - if not (abs(obj.x0-self.x0) < d or - abs(obj.x1-self.x1) < d or - # Or that they intersect eachother horizontally. - (obj.x0 < self.x0 and obj.x1 > self.x0) or - (obj.x0 > self.x0 and obj.x0 < self.x1)): - return False - if same_line and not ( - # Ensure they have similar - (obj.y0 == self.y0) or (obj.y1 == self.y1) or - (obj.y0 < self.y0 and obj.y0 > self.y1) or - (obj.y0 > self.y0 and obj.y1 < self.y0)): - return False - return True - - def group_line_neighbors(self, objs, ratio): - ''' - Given a set of objects that may or may not be on the same line as this, - add the objects that are on the same line. - - Return the objects that are not on the same line. - ''' - d = ratio*self.height - other_lines = [] - for o in objs: - if o == self: - other_lines.append(o) - elif self.is_neighbor(o, d, same_line=True): - [self.add(oc) for oc in o] - # Clear out the old line - o._objs = [] - else: - other_lines.append(o) - return other_lines - def find_neighbors(self, plane, ratio): d = ratio*self.height objs = plane.find((self.x0, self.y0-d, self.x1, self.y1+d)) - return [o for o in objs if self.is_neighbor(o, d)] + return [obj for obj in objs + if (isinstance(obj, LTTextLineHorizontal) and + abs(obj.height-self.height) < d and + (abs(obj.x0-self.x0) < d or + abs(obj.x1-self.x1) < d))] class LTTextLineVertical(LTTextLine): @@ -563,9 +513,7 @@ class LTLayoutContainer(LTContainer): (min(obj0.height, obj1.height) * laparams.line_overlap < obj0.voverlap(obj1)) and (obj0.hdistance(obj1) < - max(obj0.width, obj1.width) * laparams.char_margin) or - # If the line is zero width, default to horizontal - (max(obj0.width, obj1.width) == 0 and obj1.x0 >= obj0.x0)) + max(obj0.width, obj1.width) * laparams.char_margin)) # valign: obj0 and obj1 is vertically aligned. # @@ -587,10 +535,7 @@ class LTLayoutContainer(LTContainer): (min(obj0.width, obj1.width) * laparams.line_overlap < obj0.hoverlap(obj1)) and (obj0.vdistance(obj1) < - max(obj0.height, obj1.height) * laparams.char_margin) and - # Don't start a vertical line if the previous letter is - # whitspace. Prevents double spaces being caught as vert lines. - (line or obj0._text.strip())) + max(obj0.height, obj1.height) * laparams.char_margin)) if ((halign and isinstance(line, LTTextLineHorizontal)) or (valign and isinstance(line, LTTextLineVertical))): @@ -619,25 +564,19 @@ class LTLayoutContainer(LTContainer): yield line return - # group_textlines: group neighbouring lines to textboxes. + # group_textlines: group neighboring lines to textboxes. def group_textlines(self, laparams, lines): plane = Plane(self.bbox) plane.extend(lines) boxes = {} - # for line in plane: - # print "line", ("".join([s._text for s in line])).encode('ascii', 'ignore') for line in lines: neighbors = line.find_neighbors(plane, laparams.line_margin) - if line not in neighbors: - logging.error("Line cannot find itself: %s"%line) - continue - neighbors = line.group_line_neighbors(neighbors, laparams.line_margin) + if line not in neighbors: continue members = [] for obj1 in neighbors: members.append(obj1) if obj1 in boxes: members.extend(boxes.pop(obj1)) - # print "members: ", ["".join([o._text for o in line]) for line in members] if isinstance(line, LTTextLineHorizontal): box = LTTextBoxHorizontal() else: @@ -692,19 +631,8 @@ class LTLayoutContainer(LTContainer): (c,d,_,_) = t return (c,d) - # The algorithm below still takes O(n^2) :( - # For now, if we have many boxes, split them into two and perform them - # separately. This will cause bugs, but will prevent hanging. - if len(boxes) > 100: - boxes = sorted(boxes, key=lambda obj: obj.y0) - # Divide in two, then perform grouping - # print "Making Recursive Call %d"%len(boxes) - bot_boxes = self.group_textboxes(laparams, boxes[:len(boxes)/2]) - top_boxes = self.group_textboxes(laparams, boxes[len(boxes)/2:]) - boxes = bot_boxes + top_boxes - + # XXX this still takes O(n^2) :( dists = [] - # Calculate the distance between each box for i in xrange(len(boxes)): obj1 = boxes[i] for j in xrange(i+1, len(boxes)): @@ -714,20 +642,16 @@ class LTLayoutContainer(LTContainer): dists = csort(dists, key=key_obj) plane = Plane(self.bbox) plane.extend(boxes) - # Start with the two closest objects while dists: (c, d, obj1, obj2) = dists.pop(0) - # If there are any objects in between, then skip these two if c == 0 and isany(obj1, obj2): dists.append((1, d, obj1, obj2)) continue - # Group these two closest objects if (isinstance(obj1, (LTTextBoxVertical, LTTextGroupTBRL)) or isinstance(obj2, (LTTextBoxVertical, LTTextGroupTBRL))): group = LTTextGroupTBRL([obj1, obj2]) else: group = LTTextGroupLRTB([obj1, obj2]) - # Remove the two individual objects plane.remove(obj1) plane.remove(obj2) dists = [ (c,d,obj1,obj2) for (c,d,obj1,obj2) in dists @@ -801,4 +725,4 @@ class LTPage(LTLayoutContainer): def __repr__(self): return ('<%s(%r) %s rotate=%r>' % (self.__class__.__name__, self.pageid, - bbox2str(self.bbox), self.rotate)) + bbox2str(self.bbox), self.rotate)) \ No newline at end of file From 549b5607651625d45867d867f33d78a4cbc65352 Mon Sep 17 00:00:00 2001 From: speedplane Date: Mon, 13 Jun 2016 23:44:54 -0400 Subject: [PATCH 09/10] Revert changes unrelated to this feature. --- samples/Makefile | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/samples/Makefile b/samples/Makefile index 7984c37..a9d9ae0 100644 --- a/samples/Makefile +++ b/samples/Makefile @@ -1,15 +1,9 @@ # GNUMakefile for test -UNAME_S := $(shell uname -o) -CNVTXT=: -ifeq ($(UNAME_S),Cygwin) - CNVTXT=unix2dos -endif - RM=rm -f CMP=: ECHO=echo -PYTHON=python +PYTHON=python2 PDF2TXT=PYTHONPATH=.. $(PYTHON) ../tools/pdf2txt.py -p1 -V @@ -46,13 +40,10 @@ tests: for i in $(TESTS); do \ $(ECHO) $$i; \ $(PDF2TXT) -t html -o $$i.html $$i.pdf || exit 1; \ - $(CNVTXT) $$i.html || exit 1; \ $(CMP) $$i.html $$i.html.ref || exit 1; \ $(PDF2TXT) -t xml -o $$i.xml $$i.pdf || exit 1; \ - $(CNVTXT) $$i.xml || exit 1; \ $(CMP) $$i.xml $$i.xml.ref || exit 1; \ - $(PDF2TXT) -t text -o $$i.txt $$i.pdf || exit 1; \ - $(CNVTXT) $$i.txt || exit 1; \ + $(PDF2TXT) -t text -o $$i.txt $$i.pdf || eixt 1; \ $(CMP) $$i.txt $$i.txt.ref || exit 1; \ done @@ -74,4 +65,4 @@ clean: done -for i in $(CRYPTS); do \ $(RM) $$i.1.xml $$i.2.xml; \ - done + done \ No newline at end of file From dcf07272a175f0c2911c62986e16ac9b4afeb1a6 Mon Sep 17 00:00:00 2001 From: speedplane Date: Mon, 13 Jun 2016 23:46:30 -0400 Subject: [PATCH 10/10] Revert changes unrelated to this feature. --- samples/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/Makefile b/samples/Makefile index a9d9ae0..d1b06c4 100644 --- a/samples/Makefile +++ b/samples/Makefile @@ -65,4 +65,4 @@ clean: done -for i in $(CRYPTS); do \ $(RM) $$i.1.xml $$i.2.xml; \ - done \ No newline at end of file + done