pythonPackages.pyocr: 0.4.7 -> 0.5.3
This commit is contained in:
parent
f4039397f6
commit
5314a74ee6
@ -1,47 +1,39 @@
|
|||||||
{ lib, fetchFromGitHub, buildPythonPackage, pillow, six
|
{ lib, fetchFromGitLab, buildPythonPackage, pillow, six
|
||||||
, tesseract, cuneiform, isPy3k
|
, tesseract, cuneiform, isPy3k, substituteAll, pytest, tox
|
||||||
}:
|
}:
|
||||||
|
|
||||||
buildPythonPackage rec {
|
buildPythonPackage rec {
|
||||||
pname = "pyocr";
|
pname = "pyocr";
|
||||||
version = "0.4.7";
|
version = "0.5.3";
|
||||||
name = pname + "-" + version;
|
name = pname + "-" + version;
|
||||||
disabled = !isPy3k;
|
disabled = !isPy3k;
|
||||||
|
|
||||||
# Don't fetch from PYPI because it doesn't contain tests.
|
# Don't fetch from PYPI because it doesn't contain tests.
|
||||||
src = fetchFromGitHub {
|
src = fetchFromGitLab {
|
||||||
owner = "jflesch";
|
domain = "gitlab.gnome.org";
|
||||||
|
group = "World";
|
||||||
|
owner = "OpenPaperwork";
|
||||||
repo = "pyocr";
|
repo = "pyocr";
|
||||||
rev = version;
|
rev = version;
|
||||||
sha256 = "1iw73r8yrgjf8g00yzpz62ymqbf89cqhyhl9g430srmsrq7mn2yd";
|
sha256 = "1nihf0qmbpg3yj3yp11jp6hp5z5dqf39nz6j9lqbvgi1nqbs7x15";
|
||||||
};
|
};
|
||||||
|
|
||||||
NIX_CUNEIFORM_CMD = "${cuneiform}/bin/cuneiform";
|
patches = [ (substituteAll {
|
||||||
NIX_CUNEIFORM_DATA = "${cuneiform}/share/cuneiform";
|
src = ./paths.patch;
|
||||||
NIX_LIBTESSERACT_PATH = "${tesseract}/lib/libtesseract.so";
|
inherit cuneiform tesseract;
|
||||||
NIX_TESSDATA_PREFIX = "${tesseract}/share/tessdata";
|
})
|
||||||
NIX_TESSERACT_CMD = "${tesseract}/bin/tesseract";
|
];
|
||||||
|
|
||||||
patches = [ ./paths.patch ];
|
|
||||||
|
|
||||||
postPatch = ''
|
postPatch = ''
|
||||||
substituteInPlace src/pyocr/cuneiform.py \
|
echo 'version = "${version}"' > src/pyocr/_version.py
|
||||||
--subst-var NIX_CUNEIFORM_CMD \
|
|
||||||
--subst-var NIX_CUNEIFORM_CMD
|
|
||||||
|
|
||||||
substituteInPlace src/pyocr/tesseract.py \
|
|
||||||
--subst-var NIX_TESSERACT_CMD
|
|
||||||
|
|
||||||
substituteInPlace src/pyocr/libtesseract/tesseract_raw.py \
|
|
||||||
--subst-var NIX_TESSDATA_PREFIX \
|
|
||||||
--subst-var NIX_LIBTESSERACT_PATH
|
|
||||||
|
|
||||||
# Disable specific tests that are probably failing because of this issue:
|
# Disable specific tests that are probably failing because of this issue:
|
||||||
# https://github.com/jflesch/pyocr/issues/52
|
# https://github.com/jflesch/pyocr/issues/52
|
||||||
for test in $disabledTests; do
|
for test in $disabledTests; do
|
||||||
file="''${test%%:*}"
|
file="''${test%%:*}"
|
||||||
fun="''${test#*:}"
|
fun="''${test#*:}"
|
||||||
echo "$fun = unittest.skip($fun)" >> "tests/tests_$file.py"
|
echo "import pytest" >> "tests/tests_$file.py"
|
||||||
|
echo "$fun = pytest.mark.skip($fun)" >> "tests/tests_$file.py"
|
||||||
done
|
done
|
||||||
'';
|
'';
|
||||||
|
|
||||||
@ -57,14 +49,18 @@ buildPythonPackage rec {
|
|||||||
"libtesseract:TestLineBox.test_japanese"
|
"libtesseract:TestLineBox.test_japanese"
|
||||||
"libtesseract:TestTxt.test_japanese"
|
"libtesseract:TestTxt.test_japanese"
|
||||||
"libtesseract:TestWordBox.test_japanese"
|
"libtesseract:TestWordBox.test_japanese"
|
||||||
|
"libtesseract:TestTxt.test_multi"
|
||||||
|
"tesseract:TestTxt.test_multi"
|
||||||
"tesseract:TestDigitLineBox.test_digits"
|
"tesseract:TestDigitLineBox.test_digits"
|
||||||
"tesseract:TestTxt.test_japanese"
|
"tesseract:TestTxt.test_japanese"
|
||||||
];
|
];
|
||||||
|
|
||||||
propagatedBuildInputs = [ pillow six ];
|
propagatedBuildInputs = [ pillow six ];
|
||||||
|
checkInputs = [ pytest tox ];
|
||||||
|
checkPhase = "pytest";
|
||||||
|
|
||||||
meta = {
|
meta = {
|
||||||
homepage = "https://github.com/jflesch/pyocr";
|
inherit (src) homepage;
|
||||||
description = "A Python wrapper for Tesseract and Cuneiform";
|
description = "A Python wrapper for Tesseract and Cuneiform";
|
||||||
license = lib.licenses.gpl3Plus;
|
license = lib.licenses.gpl3Plus;
|
||||||
};
|
};
|
||||||
|
@ -1,28 +1,28 @@
|
|||||||
diff --git a/src/pyocr/cuneiform.py b/src/pyocr/cuneiform.py
|
Index: current/src/pyocr/cuneiform.py
|
||||||
index a461d92..1f2b914 100644
|
===================================================================
|
||||||
--- a/src/pyocr/cuneiform.py
|
--- current.orig/src/pyocr/cuneiform.py
|
||||||
+++ b/src/pyocr/cuneiform.py
|
+++ current/src/pyocr/cuneiform.py
|
||||||
@@ -27,13 +27,9 @@ from . import error
|
@@ -27,13 +27,9 @@ from . import error
|
||||||
from . import util
|
from . import util
|
||||||
|
|
||||||
|
|
||||||
-# CHANGE THIS IF CUNEIFORM IS NOT IN YOUR PATH, OR IS NAMED DIFFERENTLY
|
-# CHANGE THIS IF CUNEIFORM IS NOT IN YOUR PATH, OR IS NAMED DIFFERENTLY
|
||||||
-CUNEIFORM_CMD = 'cuneiform'
|
-CUNEIFORM_CMD = 'cuneiform'
|
||||||
+CUNEIFORM_CMD = '@NIX_CUNEIFORM_CMD@'
|
+CUNEIFORM_CMD = '@cuneiform@/bin/cuneiform'
|
||||||
|
|
||||||
-CUNEIFORM_DATA_POSSIBLE_PATHS = [
|
-CUNEIFORM_DATA_POSSIBLE_PATHS = [
|
||||||
- "/usr/local/share/cuneiform",
|
- "/usr/local/share/cuneiform",
|
||||||
- "/usr/share/cuneiform",
|
- "/usr/share/cuneiform",
|
||||||
-]
|
-]
|
||||||
+CUNEIFORM_DATA_POSSIBLE_PATHS = ['@NIX_CUNEIFORM_DATA@']
|
+CUNEIFORM_DATA_POSSIBLE_PATHS = ['@cuneiform@/share/cuneiform']
|
||||||
|
|
||||||
LANGUAGES_LINE_PREFIX = "Supported languages: "
|
LANGUAGES_LINE_PREFIX = "Supported languages: "
|
||||||
LANGUAGES_SPLIT_RE = re.compile("[^a-z]")
|
LANGUAGES_SPLIT_RE = re.compile("[^a-z]")
|
||||||
diff --git a/src/pyocr/libtesseract/tesseract_raw.py b/src/pyocr/libtesseract/tesseract_raw.py
|
Index: current/src/pyocr/libtesseract/tesseract_raw.py
|
||||||
index b4e7bda..47505f7 100644
|
===================================================================
|
||||||
--- a/src/pyocr/libtesseract/tesseract_raw.py
|
--- current.orig/src/pyocr/libtesseract/tesseract_raw.py
|
||||||
+++ b/src/pyocr/libtesseract/tesseract_raw.py
|
+++ current/src/pyocr/libtesseract/tesseract_raw.py
|
||||||
@@ -1,55 +1,13 @@
|
@@ -1,52 +1,13 @@
|
||||||
import ctypes
|
import ctypes
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
@ -56,7 +56,13 @@ index b4e7bda..47505f7 100644
|
|||||||
- # Jflesch> Don't they have the equivalent of LD_LIBRARY_PATH on
|
- # Jflesch> Don't they have the equivalent of LD_LIBRARY_PATH on
|
||||||
- # Windows ?
|
- # Windows ?
|
||||||
- "../vs2010/DLL_Release/libtesseract302.dll",
|
- "../vs2010/DLL_Release/libtesseract302.dll",
|
||||||
|
- # prefer the most recent first
|
||||||
|
- "libtesseract305.dll",
|
||||||
|
- "libtesseract304.dll",
|
||||||
|
- "libtesseract303.dll",
|
||||||
- "libtesseract302.dll",
|
- "libtesseract302.dll",
|
||||||
|
- "libtesseract400.dll", # Tesseract 4 is still in alpha stage
|
||||||
|
- "libtesseract.dll",
|
||||||
- "C:\\Program Files (x86)\\Tesseract-OCR\\libtesseract-4.dll",
|
- "C:\\Program Files (x86)\\Tesseract-OCR\\libtesseract-4.dll",
|
||||||
- "C:\\Program Files (x86)\\Tesseract-OCR\\libtesseract-3.dll",
|
- "C:\\Program Files (x86)\\Tesseract-OCR\\libtesseract-3.dll",
|
||||||
- ]
|
- ]
|
||||||
@ -66,27 +72,18 @@ index b4e7bda..47505f7 100644
|
|||||||
- "libtesseract.so.3",
|
- "libtesseract.so.3",
|
||||||
- ]
|
- ]
|
||||||
-
|
-
|
||||||
-
|
+libnames = [ "@tesseract@/lib/libtesseract.so" ]
|
||||||
-g_libtesseract = None
|
|
||||||
-
|
|
||||||
-for libname in libnames:
|
|
||||||
- try:
|
|
||||||
- g_libtesseract = ctypes.cdll.LoadLibrary(libname)
|
|
||||||
- break
|
|
||||||
- except OSError:
|
|
||||||
- pass
|
|
||||||
+g_libtesseract = ctypes.cdll.LoadLibrary('@NIX_LIBTESSERACT_PATH@')
|
|
||||||
|
|
||||||
|
g_libtesseract = None
|
||||||
|
|
||||||
class PageSegMode(object):
|
@@ -346,12 +307,11 @@ def init(lang=None):
|
||||||
@@ -326,12 +284,11 @@ def init(lang=None):
|
|
||||||
try:
|
try:
|
||||||
if lang:
|
if lang:
|
||||||
lang = lang.encode("utf-8")
|
lang = lang.encode("utf-8")
|
||||||
- prefix = None
|
- prefix = None
|
||||||
- if TESSDATA_PREFIX:
|
- if TESSDATA_PREFIX:
|
||||||
- prefix = TESSDATA_PREFIX.encode("utf-8")
|
- prefix = TESSDATA_PREFIX.encode("utf-8")
|
||||||
+ prefix = os.getenv('TESSDATA_PREFIX', '@NIX_TESSDATA_PREFIX@')
|
+ prefix = os.getenv('TESSDATA_PREFIX', '@tesseract@/share/tessdata')
|
||||||
+ os.environ['TESSDATA_PREFIX'] = prefix
|
+ os.environ['TESSDATA_PREFIX'] = prefix
|
||||||
g_libtesseract.TessBaseAPIInit3(
|
g_libtesseract.TessBaseAPIInit3(
|
||||||
ctypes.c_void_p(handle),
|
ctypes.c_void_p(handle),
|
||||||
@ -95,17 +92,17 @@ index b4e7bda..47505f7 100644
|
|||||||
ctypes.c_char_p(lang)
|
ctypes.c_char_p(lang)
|
||||||
)
|
)
|
||||||
g_libtesseract.TessBaseAPISetVariable(
|
g_libtesseract.TessBaseAPISetVariable(
|
||||||
diff --git a/src/pyocr/tesseract.py b/src/pyocr/tesseract.py
|
Index: current/src/pyocr/tesseract.py
|
||||||
index c935881..7139ffe 100755
|
===================================================================
|
||||||
--- a/src/pyocr/tesseract.py
|
--- current.orig/src/pyocr/tesseract.py
|
||||||
+++ b/src/pyocr/tesseract.py
|
+++ current/src/pyocr/tesseract.py
|
||||||
@@ -31,8 +31,7 @@ from .builders import DigitBuilder # backward compatibility
|
@@ -31,8 +31,7 @@ from .builders import DigitBuilder # ba
|
||||||
from .error import TesseractError # backward compatibility
|
from .error import TesseractError # backward compatibility
|
||||||
from .util import digits_only
|
from .util import digits_only
|
||||||
|
|
||||||
-# CHANGE THIS IF TESSERACT IS NOT IN YOUR PATH, OR IS NAMED DIFFERENTLY
|
-# CHANGE THIS IF TESSERACT IS NOT IN YOUR PATH, OR IS NAMED DIFFERENTLY
|
||||||
-TESSERACT_CMD = 'tesseract.exe' if os.name == 'nt' else 'tesseract'
|
-TESSERACT_CMD = 'tesseract.exe' if os.name == 'nt' else 'tesseract'
|
||||||
+TESSERACT_CMD = '@NIX_TESSERACT_CMD@'
|
+TESSERACT_CMD = '@tesseract@/bin/tesseract'
|
||||||
|
|
||||||
TESSDATA_EXTENSION = ".traineddata"
|
TESSDATA_EXTENSION = ".traineddata"
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user