From bcb40a5f0440a35a01e3207f3222b710d3a0aef8 Mon Sep 17 00:00:00 2001 From: Symphorien Gibol Date: Tue, 10 Mar 2020 12:00:00 +0000 Subject: [PATCH] pythonPackages.pyocr: 0.5.3 -> 0.7.2 --- .../python-modules/pyocr/default.nix | 44 +-- .../python-modules/pyocr/paths.patch | 290 ++++++++++++++++-- 2 files changed, 272 insertions(+), 62 deletions(-) diff --git a/pkgs/development/python-modules/pyocr/default.nix b/pkgs/development/python-modules/pyocr/default.nix index ca606767f4d..8cfce9ba43f 100644 --- a/pkgs/development/python-modules/pyocr/default.nix +++ b/pkgs/development/python-modules/pyocr/default.nix @@ -1,10 +1,10 @@ -{ lib, fetchFromGitLab, buildPythonPackage, pillow, six -, tesseract, cuneiform, isPy3k, substituteAll, pytest, tox -}: +{ lib, fetchFromGitLab, buildPythonPackage, pillow, setuptools_scm, +setuptools-scm-git-archive , tesseract, cuneiform, isPy3k, substituteAll, +pytest, tox }: buildPythonPackage rec { pname = "pyocr"; - version = "0.5.3"; + version = "0.7.2"; disabled = !isPy3k; # Don't fetch from PYPI because it doesn't contain tests. @@ -14,7 +14,7 @@ buildPythonPackage rec { owner = "OpenPaperwork"; repo = "pyocr"; rev = version; - sha256 = "1nihf0qmbpg3yj3yp11jp6hp5z5dqf39nz6j9lqbvgi1nqbs7x15"; + sha256 = "09ab86bmizpv94w3mdvdqkjyyvk1vafw3jqhkiw5xx7p180xn3il"; }; patches = [ (substituteAll { @@ -23,38 +23,8 @@ buildPythonPackage rec { }) ]; - postPatch = '' - echo 'version = "${version}"' > src/pyocr/_version.py - - # Disable specific tests that are probably failing because of this issue: - # https://github.com/jflesch/pyocr/issues/52 - for test in $disabledTests; do - file="''${test%%:*}" - fun="''${test#*:}" - echo "import pytest" >> "tests/tests_$file.py" - echo "$fun = pytest.mark.skip($fun)" >> "tests/tests_$file.py" - done - ''; - - disabledTests = [ - "cuneiform:TestTxt.test_basic" - "cuneiform:TestTxt.test_european" - "cuneiform:TestTxt.test_french" - "cuneiform:TestWordBox.test_basic" - "cuneiform:TestWordBox.test_european" - "cuneiform:TestWordBox.test_french" - "libtesseract:TestBasicDoc.test_basic" - "libtesseract:TestDigitLineBox.test_digits" - "libtesseract:TestLineBox.test_japanese" - "libtesseract:TestTxt.test_japanese" - "libtesseract:TestWordBox.test_japanese" - "libtesseract:TestTxt.test_multi" - "tesseract:TestTxt.test_multi" - "tesseract:TestDigitLineBox.test_digits" - "tesseract:TestTxt.test_japanese" - ]; - - propagatedBuildInputs = [ pillow six ]; + buildInputs = [ setuptools_scm setuptools-scm-git-archive ]; + propagatedBuildInputs = [ pillow ]; checkInputs = [ pytest tox ]; checkPhase = "pytest"; diff --git a/pkgs/development/python-modules/pyocr/paths.patch b/pkgs/development/python-modules/pyocr/paths.patch index 9350d4050da..55cbf7d48da 100644 --- a/pkgs/development/python-modules/pyocr/paths.patch +++ b/pkgs/development/python-modules/pyocr/paths.patch @@ -1,9 +1,9 @@ -Index: current/src/pyocr/cuneiform.py -=================================================================== ---- current.orig/src/pyocr/cuneiform.py -+++ current/src/pyocr/cuneiform.py -@@ -27,13 +27,9 @@ from . import error - from . import util +diff --git a/src/pyocr/cuneiform.py b/src/pyocr/cuneiform.py +index 2e5b717..35647e2 100644 +--- a/src/pyocr/cuneiform.py ++++ b/src/pyocr/cuneiform.py +@@ -25,13 +25,9 @@ from . import builders + from .error import CuneiformError -# CHANGE THIS IF CUNEIFORM IS NOT IN YOUR PATH, OR IS NAMED DIFFERENTLY @@ -18,25 +18,34 @@ Index: current/src/pyocr/cuneiform.py LANGUAGES_LINE_PREFIX = "Supported languages: " LANGUAGES_SPLIT_RE = re.compile("[^a-z]") -Index: current/src/pyocr/libtesseract/tesseract_raw.py -=================================================================== ---- current.orig/src/pyocr/libtesseract/tesseract_raw.py -+++ current/src/pyocr/libtesseract/tesseract_raw.py -@@ -1,52 +1,13 @@ - import ctypes +diff --git a/src/pyocr/libtesseract/tesseract_raw.py b/src/pyocr/libtesseract/tesseract_raw.py +index a068e73..9ebea5c 100644 +--- a/src/pyocr/libtesseract/tesseract_raw.py ++++ b/src/pyocr/libtesseract/tesseract_raw.py +@@ -2,7 +2,6 @@ import ctypes + import locale import logging import os -import sys from ..error import TesseractError - +@@ -10,48 +9,16 @@ from ..error import TesseractError logger = logging.getLogger(__name__) --TESSDATA_PREFIX = os.getenv('TESSDATA_PREFIX', None) + TESSDATA_PREFIX = os.getenv('TESSDATA_PREFIX', None) -libnames = [] ++if TESSDATA_PREFIX is None: ++ TESSDATA_PREFIX = '@tesseract@/share/tessdata' ++ os.environ['TESSDATA_PREFIX'] = TESSDATA_PREFIX ++ ++ + # 70 is the minimum credible dpi for tesseract and force it to compute an + # estimate of the image dpi + DPI_DEFAULT = 70 + - --if getattr(sys, 'frozen', False): +-if getattr(sys, 'frozen', False): # pragma: no cover - # Pyinstaller integration - libnames += [os.path.join(sys._MEIPASS, "libtesseract-4.dll")] - libnames += [os.path.join(sys._MEIPASS, "libtesseract-3.dll")] @@ -51,7 +60,7 @@ Index: current/src/pyocr/libtesseract/tesseract_raw.py - TESSDATA_PREFIX = tessdata - - --if sys.platform[:3] == "win": +-if sys.platform[:3] == "win": # pragma: no cover - libnames += [ - # Jflesch> Don't they have the equivalent of LD_LIBRARY_PATH on - # Windows ? @@ -76,15 +85,16 @@ Index: current/src/pyocr/libtesseract/tesseract_raw.py g_libtesseract = None -@@ -346,12 +307,11 @@ def init(lang=None): +@@ -364,12 +331,12 @@ def init(lang=None): try: if lang: lang = lang.encode("utf-8") - prefix = None -- if TESSDATA_PREFIX: +- if TESSDATA_PREFIX: # pragma: no cover - prefix = TESSDATA_PREFIX.encode("utf-8") -+ prefix = os.getenv('TESSDATA_PREFIX', '@tesseract@/share/tessdata') -+ os.environ['TESSDATA_PREFIX'] = prefix ++ ++ prefix = TESSDATA_PREFIX ++ g_libtesseract.TessBaseAPIInit3( ctypes.c_void_p(handle), - ctypes.c_char_p(prefix), @@ -92,11 +102,11 @@ Index: current/src/pyocr/libtesseract/tesseract_raw.py ctypes.c_char_p(lang) ) g_libtesseract.TessBaseAPISetVariable( -Index: current/src/pyocr/tesseract.py -=================================================================== ---- current.orig/src/pyocr/tesseract.py -+++ current/src/pyocr/tesseract.py -@@ -31,8 +31,7 @@ from .builders import DigitBuilder # ba +diff --git a/src/pyocr/tesseract.py b/src/pyocr/tesseract.py +index 7c30852..44e8446 100644 +--- a/src/pyocr/tesseract.py ++++ b/src/pyocr/tesseract.py +@@ -28,8 +28,7 @@ from .builders import DigitBuilder # backward compatibility from .error import TesseractError # backward compatibility from .util import digits_only @@ -106,3 +116,233 @@ Index: current/src/pyocr/tesseract.py TESSDATA_EXTENSION = ".traineddata" +diff --git a/tests/tests_cuneiform.py b/tests/tests_cuneiform.py +index 45b7f6a..95f55c6 100644 +--- a/tests/tests_cuneiform.py ++++ b/tests/tests_cuneiform.py +@@ -21,7 +21,7 @@ class TestCuneiform(BaseTest): + # XXX is it useful? + which.return_value = True + self.assertTrue(cuneiform.is_available()) +- which.assert_called_once_with("cuneiform") ++ which.assert_called_once_with("@cuneiform@/bin/cuneiform") + + @patch("subprocess.Popen") + def test_version(self, popen): +@@ -54,7 +54,7 @@ class TestCuneiform(BaseTest): + self.assertIn("eng", langs) + self.assertIn("fra", langs) + popen.assert_called_once_with( +- ["cuneiform", "-l"], ++ ["@cuneiform@/bin/cuneiform", "-l"], + stdout=subprocess.PIPE, stderr=subprocess.STDOUT + ) + +@@ -109,7 +109,7 @@ class TestCuneiformTxt(BaseTest): + output = cuneiform.image_to_string(self.image) + self.assertEqual(output, self._get_file_content("text").strip()) + popen.assert_called_once_with( +- ["cuneiform", "-f", "text", "-o", self.tmp_filename, "-"], ++ ["@cuneiform@/bin/cuneiform", "-f", "text", "-o", self.tmp_filename, "-"], + stdin=subprocess.PIPE, stdout=subprocess.PIPE, + stderr=subprocess.STDOUT + ) +@@ -125,7 +125,7 @@ class TestCuneiformTxt(BaseTest): + builder=self.builder) + self.assertEqual(output, self._get_file_content("text").strip()) + popen.assert_called_once_with( +- ["cuneiform", "-l", "fra", "-f", "text", "-o", self.tmp_filename, ++ ["@cuneiform@/bin/cuneiform", "-l", "fra", "-f", "text", "-o", self.tmp_filename, + "-"], + stdin=subprocess.PIPE, stdout=subprocess.PIPE, + stderr=subprocess.STDOUT +@@ -142,7 +142,7 @@ class TestCuneiformTxt(BaseTest): + builder=self.builder) + self.assertEqual(output, self._get_file_content("text").strip()) + popen.assert_called_once_with( +- ["cuneiform", "-f", "text", "-o", self.tmp_filename, "-"], ++ ["@cuneiform@/bin/cuneiform", "-f", "text", "-o", self.tmp_filename, "-"], + stdin=subprocess.PIPE, stdout=subprocess.PIPE, + stderr=subprocess.STDOUT + ) +@@ -173,7 +173,7 @@ class TestCuneiformTxt(BaseTest): + output = cuneiform.image_to_string(image, builder=self.builder) + self.assertEqual(output, self._get_file_content("text").strip()) + popen.assert_called_once_with( +- ["cuneiform", "-f", "text", "-o", self.tmp_filename, "-"], ++ ["@cuneiform@/bin/cuneiform", "-f", "text", "-o", self.tmp_filename, "-"], + stdin=subprocess.PIPE, stdout=subprocess.PIPE, + stderr=subprocess.STDOUT + ) +@@ -227,7 +227,7 @@ class TestCuneiformWordBox(BaseTest): + output = cuneiform.image_to_string(self.image, + builder=self.builder) + popen.assert_called_once_with( +- ["cuneiform", "-f", "hocr", "-o", self.tmp_filename, "-"], ++ ["@cuneiform@/bin/cuneiform", "-f", "hocr", "-o", self.tmp_filename, "-"], + stdin=subprocess.PIPE, stdout=subprocess.PIPE, + stderr=subprocess.STDOUT + ) +@@ -280,7 +280,7 @@ class TestCuneiformLineBox(BaseTest): + output = cuneiform.image_to_string(self.image, + builder=self.builder) + popen.assert_called_once_with( +- ["cuneiform", "-f", "hocr", "-o", self.tmp_filename, "-"], ++ ["@cuneiform@/bin/cuneiform", "-f", "hocr", "-o", self.tmp_filename, "-"], + stdin=subprocess.PIPE, stdout=subprocess.PIPE, + stderr=subprocess.STDOUT + ) +diff --git a/tests/tests_libtesseract.py b/tests/tests_libtesseract.py +index ad7fdc9..57e7a60 100644 +--- a/tests/tests_libtesseract.py ++++ b/tests/tests_libtesseract.py +@@ -165,7 +165,8 @@ class TestLibTesseractRaw(BaseTest): + args = libtess.TessBaseAPIInit3.call_args[0] + self.assertEqual(len(args), 3) + self.assertEqual(args[0].value, self.handle) +- self.assertEqual(args[1].value, None) ++ # we hardcode tesseract data, so we don't get None ++ #self.assertEqual(args[1].value, None) + self.assertEqual(args[2].value, lang.encode() if lang else None) + + self.assertEqual( +@@ -201,7 +202,8 @@ class TestLibTesseractRaw(BaseTest): + args = libtess.TessBaseAPIInit3.call_args[0] + self.assertEqual(len(args), 3) + self.assertEqual(args[0].value, self.handle) +- self.assertEqual(args[1].value, None) ++ # we hardcode tesseract data, so we don't get None ++ #self.assertEqual(args[1].value, None) + self.assertEqual(args[2].value, lang.encode() if lang else None) + + self.assertEqual( +diff --git a/tests/tests_tesseract.py b/tests/tests_tesseract.py +index 1a55567..a24d96f 100644 +--- a/tests/tests_tesseract.py ++++ b/tests/tests_tesseract.py +@@ -36,7 +36,7 @@ class TestTesseract(BaseTest): + def test_available(self, which): + which.return_value = True + self.assertTrue(tesseract.is_available()) +- which.assert_called_once_with("tesseract") ++ which.assert_called_once_with("@tesseract@/bin/tesseract") + + @patch("subprocess.Popen") + def test_version_error(self, popen): +@@ -156,7 +156,7 @@ class TestTesseract(BaseTest): + for lang in ("eng", "fra", "jpn", "osd"): + self.assertIn(lang, langs) + popen.assert_called_once_with( +- ["tesseract", "--list-langs"], ++ ["@tesseract@/bin/tesseract", "--list-langs"], + startupinfo=None, creationflags=0, + stdout=subprocess.PIPE, stderr=subprocess.STDOUT + ) +@@ -171,7 +171,7 @@ class TestTesseract(BaseTest): + self.assertEqual(te.exception.status, 1) + self.assertEqual("unable to get languages", te.exception.message) + popen.assert_called_once_with( +- ["tesseract", "--list-langs"], ++ ["@tesseract@/bin/tesseract", "--list-langs"], + startupinfo=None, creationflags=0, + stdout=subprocess.PIPE, stderr=subprocess.STDOUT + ) +@@ -248,7 +248,7 @@ class TestTesseract(BaseTest): + self.assertEqual(status, 0) + self.assertEqual(error, message) + popen.assert_called_once_with( +- ["tesseract", "input.bmp", "output"], ++ ["@tesseract@/bin/tesseract", "input.bmp", "output"], + cwd=tmpdir, + startupinfo=None, + creationflags=0, +@@ -271,7 +271,7 @@ class TestTesseract(BaseTest): + self.assertEqual(status, 0) + self.assertEqual(error, message) + popen.assert_called_with( +- ["tesseract", "input2.bmp", "output2", "-l", "fra", "--psm", "3"], ++ ["@tesseract@/bin/tesseract", "input2.bmp", "output2", "-l", "fra", "--psm", "3"], + cwd=tmpdir, + startupinfo=None, + creationflags=0, +@@ -302,7 +302,7 @@ class TestTesseract(BaseTest): + self.assertEqual(result["angle"], 90) + self.assertEqual(result["confidence"], 9.30) + popen.assert_called_once_with( +- ["tesseract", "input.bmp", "stdout", "--psm", "0"], ++ ["@tesseract@/bin/tesseract", "input.bmp", "stdout", "--psm", "0"], + stdin=subprocess.PIPE, + shell=False, + startupinfo=None, +@@ -338,7 +338,7 @@ class TestTesseract(BaseTest): + self.assertEqual(result["angle"], 90) + self.assertEqual(result["confidence"], 9.30) + popen.assert_called_once_with( +- ["tesseract", "input.bmp", "stdout", "--psm", "0"], ++ ["@tesseract@/bin/tesseract", "input.bmp", "stdout", "--psm", "0"], + stdin=subprocess.PIPE, + shell=False, + startupinfo=None, +@@ -371,7 +371,7 @@ class TestTesseract(BaseTest): + self.assertEqual(result["angle"], 90) + self.assertEqual(result["confidence"], 9.30) + popen.assert_called_once_with( +- ["tesseract", "input.bmp", "stdout", ++ ["@tesseract@/bin/tesseract", "input.bmp", "stdout", + "--psm", "0", "-l", "osd"], + stdin=subprocess.PIPE, + shell=False, +@@ -399,7 +399,7 @@ class TestTesseract(BaseTest): + with self.assertRaises(tesseract.TesseractError) as te: + tesseract.detect_orientation(self.image) + popen.assert_called_once_with( +- ["tesseract", "input.bmp", "stdout", "--psm", "0"], ++ ["@tesseract@/bin/tesseract", "input.bmp", "stdout", "--psm", "0"], + stdin=subprocess.PIPE, + shell=False, + startupinfo=None, +@@ -433,7 +433,7 @@ class TestTesseract(BaseTest): + with self.assertRaises(tesseract.TesseractError) as te: + tesseract.detect_orientation(self.image) + popen.assert_called_once_with( +- ["tesseract", "input.bmp", "stdout", "--psm", "0"], ++ ["@tesseract@/bin/tesseract", "input.bmp", "stdout", "--psm", "0"], + stdin=subprocess.PIPE, + shell=False, + startupinfo=None, +@@ -467,7 +467,7 @@ class TestTesseract(BaseTest): + self.assertEqual(result["angle"], 90) + self.assertEqual(result["confidence"], 9.30) + popen.assert_called_once_with( +- ["tesseract", "input.bmp", "stdout", "-psm", "0"], ++ ["@tesseract@/bin/tesseract", "input.bmp", "stdout", "-psm", "0"], + stdin=subprocess.PIPE, + shell=False, + startupinfo=None, +@@ -500,7 +500,7 @@ class TestTesseract(BaseTest): + self.assertEqual(result["angle"], 90) + self.assertEqual(result["confidence"], 9.30) + popen.assert_called_once_with( +- ["tesseract", "input.bmp", "stdout", "-psm", "0", "-l", "fra"], ++ ["@tesseract@/bin/tesseract", "input.bmp", "stdout", "-psm", "0", "-l", "fra"], + stdin=subprocess.PIPE, + shell=False, + startupinfo=None, +@@ -527,7 +527,7 @@ class TestTesseract(BaseTest): + with self.assertRaises(tesseract.TesseractError) as te: + tesseract.detect_orientation(self.image) + popen.assert_called_once_with( +- ["tesseract", "input.bmp", "stdout", "-psm", "0"], ++ ["@tesseract@/bin/tesseract", "input.bmp", "stdout", "-psm", "0"], + stdin=subprocess.PIPE, + shell=False, + startupinfo=None, +@@ -561,7 +561,7 @@ class TestTesseract(BaseTest): + with self.assertRaises(tesseract.TesseractError) as te: + tesseract.detect_orientation(self.image) + popen.assert_called_once_with( +- ["tesseract", "input.bmp", "stdout", "-psm", "0"], ++ ["@tesseract@/bin/tesseract", "input.bmp", "stdout", "-psm", "0"], + stdin=subprocess.PIPE, + shell=False, + startupinfo=None,