pythonPackages.pyocr: 0.4.7 -> 0.5.3

This commit is contained in:
Symphorien Gibol 2018-09-10 17:27:56 +02:00
parent f4039397f6
commit 5314a74ee6
2 changed files with 48 additions and 55 deletions

View File

@ -1,47 +1,39 @@
{ lib, fetchFromGitHub, buildPythonPackage, pillow, six { lib, fetchFromGitLab, buildPythonPackage, pillow, six
, tesseract, cuneiform, isPy3k , tesseract, cuneiform, isPy3k, substituteAll, pytest, tox
}: }:
buildPythonPackage rec { buildPythonPackage rec {
pname = "pyocr"; pname = "pyocr";
version = "0.4.7"; version = "0.5.3";
name = pname + "-" + version; name = pname + "-" + version;
disabled = !isPy3k; disabled = !isPy3k;
# Don't fetch from PYPI because it doesn't contain tests. # Don't fetch from PYPI because it doesn't contain tests.
src = fetchFromGitHub { src = fetchFromGitLab {
owner = "jflesch"; domain = "gitlab.gnome.org";
group = "World";
owner = "OpenPaperwork";
repo = "pyocr"; repo = "pyocr";
rev = version; rev = version;
sha256 = "1iw73r8yrgjf8g00yzpz62ymqbf89cqhyhl9g430srmsrq7mn2yd"; sha256 = "1nihf0qmbpg3yj3yp11jp6hp5z5dqf39nz6j9lqbvgi1nqbs7x15";
}; };
NIX_CUNEIFORM_CMD = "${cuneiform}/bin/cuneiform"; patches = [ (substituteAll {
NIX_CUNEIFORM_DATA = "${cuneiform}/share/cuneiform"; src = ./paths.patch;
NIX_LIBTESSERACT_PATH = "${tesseract}/lib/libtesseract.so"; inherit cuneiform tesseract;
NIX_TESSDATA_PREFIX = "${tesseract}/share/tessdata"; })
NIX_TESSERACT_CMD = "${tesseract}/bin/tesseract"; ];
patches = [ ./paths.patch ];
postPatch = '' postPatch = ''
substituteInPlace src/pyocr/cuneiform.py \ echo 'version = "${version}"' > src/pyocr/_version.py
--subst-var NIX_CUNEIFORM_CMD \
--subst-var NIX_CUNEIFORM_CMD
substituteInPlace src/pyocr/tesseract.py \
--subst-var NIX_TESSERACT_CMD
substituteInPlace src/pyocr/libtesseract/tesseract_raw.py \
--subst-var NIX_TESSDATA_PREFIX \
--subst-var NIX_LIBTESSERACT_PATH
# Disable specific tests that are probably failing because of this issue: # Disable specific tests that are probably failing because of this issue:
# https://github.com/jflesch/pyocr/issues/52 # https://github.com/jflesch/pyocr/issues/52
for test in $disabledTests; do for test in $disabledTests; do
file="''${test%%:*}" file="''${test%%:*}"
fun="''${test#*:}" fun="''${test#*:}"
echo "$fun = unittest.skip($fun)" >> "tests/tests_$file.py" echo "import pytest" >> "tests/tests_$file.py"
echo "$fun = pytest.mark.skip($fun)" >> "tests/tests_$file.py"
done done
''; '';
@ -57,14 +49,18 @@ buildPythonPackage rec {
"libtesseract:TestLineBox.test_japanese" "libtesseract:TestLineBox.test_japanese"
"libtesseract:TestTxt.test_japanese" "libtesseract:TestTxt.test_japanese"
"libtesseract:TestWordBox.test_japanese" "libtesseract:TestWordBox.test_japanese"
"libtesseract:TestTxt.test_multi"
"tesseract:TestTxt.test_multi"
"tesseract:TestDigitLineBox.test_digits" "tesseract:TestDigitLineBox.test_digits"
"tesseract:TestTxt.test_japanese" "tesseract:TestTxt.test_japanese"
]; ];
propagatedBuildInputs = [ pillow six ]; propagatedBuildInputs = [ pillow six ];
checkInputs = [ pytest tox ];
checkPhase = "pytest";
meta = { meta = {
homepage = "https://github.com/jflesch/pyocr"; inherit (src) homepage;
description = "A Python wrapper for Tesseract and Cuneiform"; description = "A Python wrapper for Tesseract and Cuneiform";
license = lib.licenses.gpl3Plus; license = lib.licenses.gpl3Plus;
}; };

View File

@ -1,28 +1,28 @@
diff --git a/src/pyocr/cuneiform.py b/src/pyocr/cuneiform.py Index: current/src/pyocr/cuneiform.py
index a461d92..1f2b914 100644 ===================================================================
--- a/src/pyocr/cuneiform.py --- current.orig/src/pyocr/cuneiform.py
+++ b/src/pyocr/cuneiform.py +++ current/src/pyocr/cuneiform.py
@@ -27,13 +27,9 @@ from . import error @@ -27,13 +27,9 @@ from . import error
from . import util from . import util
-# CHANGE THIS IF CUNEIFORM IS NOT IN YOUR PATH, OR IS NAMED DIFFERENTLY -# CHANGE THIS IF CUNEIFORM IS NOT IN YOUR PATH, OR IS NAMED DIFFERENTLY
-CUNEIFORM_CMD = 'cuneiform' -CUNEIFORM_CMD = 'cuneiform'
+CUNEIFORM_CMD = '@NIX_CUNEIFORM_CMD@' +CUNEIFORM_CMD = '@cuneiform@/bin/cuneiform'
-CUNEIFORM_DATA_POSSIBLE_PATHS = [ -CUNEIFORM_DATA_POSSIBLE_PATHS = [
- "/usr/local/share/cuneiform", - "/usr/local/share/cuneiform",
- "/usr/share/cuneiform", - "/usr/share/cuneiform",
-] -]
+CUNEIFORM_DATA_POSSIBLE_PATHS = ['@NIX_CUNEIFORM_DATA@'] +CUNEIFORM_DATA_POSSIBLE_PATHS = ['@cuneiform@/share/cuneiform']
LANGUAGES_LINE_PREFIX = "Supported languages: " LANGUAGES_LINE_PREFIX = "Supported languages: "
LANGUAGES_SPLIT_RE = re.compile("[^a-z]") LANGUAGES_SPLIT_RE = re.compile("[^a-z]")
diff --git a/src/pyocr/libtesseract/tesseract_raw.py b/src/pyocr/libtesseract/tesseract_raw.py Index: current/src/pyocr/libtesseract/tesseract_raw.py
index b4e7bda..47505f7 100644 ===================================================================
--- a/src/pyocr/libtesseract/tesseract_raw.py --- current.orig/src/pyocr/libtesseract/tesseract_raw.py
+++ b/src/pyocr/libtesseract/tesseract_raw.py +++ current/src/pyocr/libtesseract/tesseract_raw.py
@@ -1,55 +1,13 @@ @@ -1,52 +1,13 @@
import ctypes import ctypes
import logging import logging
import os import os
@ -56,7 +56,13 @@ index b4e7bda..47505f7 100644
- # Jflesch> Don't they have the equivalent of LD_LIBRARY_PATH on - # Jflesch> Don't they have the equivalent of LD_LIBRARY_PATH on
- # Windows ? - # Windows ?
- "../vs2010/DLL_Release/libtesseract302.dll", - "../vs2010/DLL_Release/libtesseract302.dll",
- # prefer the most recent first
- "libtesseract305.dll",
- "libtesseract304.dll",
- "libtesseract303.dll",
- "libtesseract302.dll", - "libtesseract302.dll",
- "libtesseract400.dll", # Tesseract 4 is still in alpha stage
- "libtesseract.dll",
- "C:\\Program Files (x86)\\Tesseract-OCR\\libtesseract-4.dll", - "C:\\Program Files (x86)\\Tesseract-OCR\\libtesseract-4.dll",
- "C:\\Program Files (x86)\\Tesseract-OCR\\libtesseract-3.dll", - "C:\\Program Files (x86)\\Tesseract-OCR\\libtesseract-3.dll",
- ] - ]
@ -66,27 +72,18 @@ index b4e7bda..47505f7 100644
- "libtesseract.so.3", - "libtesseract.so.3",
- ] - ]
- -
- +libnames = [ "@tesseract@/lib/libtesseract.so" ]
-g_libtesseract = None
-
-for libname in libnames:
- try:
- g_libtesseract = ctypes.cdll.LoadLibrary(libname)
- break
- except OSError:
- pass
+g_libtesseract = ctypes.cdll.LoadLibrary('@NIX_LIBTESSERACT_PATH@')
g_libtesseract = None
class PageSegMode(object): @@ -346,12 +307,11 @@ def init(lang=None):
@@ -326,12 +284,11 @@ def init(lang=None):
try: try:
if lang: if lang:
lang = lang.encode("utf-8") lang = lang.encode("utf-8")
- prefix = None - prefix = None
- if TESSDATA_PREFIX: - if TESSDATA_PREFIX:
- prefix = TESSDATA_PREFIX.encode("utf-8") - prefix = TESSDATA_PREFIX.encode("utf-8")
+ prefix = os.getenv('TESSDATA_PREFIX', '@NIX_TESSDATA_PREFIX@') + prefix = os.getenv('TESSDATA_PREFIX', '@tesseract@/share/tessdata')
+ os.environ['TESSDATA_PREFIX'] = prefix + os.environ['TESSDATA_PREFIX'] = prefix
g_libtesseract.TessBaseAPIInit3( g_libtesseract.TessBaseAPIInit3(
ctypes.c_void_p(handle), ctypes.c_void_p(handle),
@ -95,17 +92,17 @@ index b4e7bda..47505f7 100644
ctypes.c_char_p(lang) ctypes.c_char_p(lang)
) )
g_libtesseract.TessBaseAPISetVariable( g_libtesseract.TessBaseAPISetVariable(
diff --git a/src/pyocr/tesseract.py b/src/pyocr/tesseract.py Index: current/src/pyocr/tesseract.py
index c935881..7139ffe 100755 ===================================================================
--- a/src/pyocr/tesseract.py --- current.orig/src/pyocr/tesseract.py
+++ b/src/pyocr/tesseract.py +++ current/src/pyocr/tesseract.py
@@ -31,8 +31,7 @@ from .builders import DigitBuilder # backward compatibility @@ -31,8 +31,7 @@ from .builders import DigitBuilder # ba
from .error import TesseractError # backward compatibility from .error import TesseractError # backward compatibility
from .util import digits_only from .util import digits_only
-# CHANGE THIS IF TESSERACT IS NOT IN YOUR PATH, OR IS NAMED DIFFERENTLY -# CHANGE THIS IF TESSERACT IS NOT IN YOUR PATH, OR IS NAMED DIFFERENTLY
-TESSERACT_CMD = 'tesseract.exe' if os.name == 'nt' else 'tesseract' -TESSERACT_CMD = 'tesseract.exe' if os.name == 'nt' else 'tesseract'
+TESSERACT_CMD = '@NIX_TESSERACT_CMD@' +TESSERACT_CMD = '@tesseract@/bin/tesseract'
TESSDATA_EXTENSION = ".traineddata" TESSDATA_EXTENSION = ".traineddata"