tesseract: Allow to specify a subset of languages.
Especially useful for our OCR based VM tests, where we only need the english language. By default the argument is null so all languages are included. If a list of language name is passed only those languages are enabled, for example: tesseract.override { enableLanguages = [ "eng" "spa" ]; }; To only enable support for English and Spanish languages. Signed-off-by: aszlig <aszlig@redmoonstudios.org>
This commit is contained in:
parent
8be00dc71d
commit
adb7581459
@ -1,26 +1,31 @@
|
|||||||
{ stdenv, fetchurl, autoconf, automake, libtool, leptonica, libpng, libtiff }:
|
{ stdenv, fetchurl, autoconf, automake, libtool, leptonica, libpng, libtiff
|
||||||
|
, enableLanguages ? null
|
||||||
|
}:
|
||||||
|
|
||||||
|
with stdenv.lib;
|
||||||
|
|
||||||
let
|
let
|
||||||
majVersion = "3.02";
|
majVersion = "3.02";
|
||||||
version = "${majVersion}.02";
|
version = "${majVersion}.02";
|
||||||
|
|
||||||
f = lang : sha256 : let
|
mkLang = lang: sha256: let
|
||||||
src = fetchurl {
|
src = fetchurl {
|
||||||
url = "http://tesseract-ocr.googlecode.com/files/tesseract-ocr-${majVersion}.${lang}.tar.gz";
|
url = "http://tesseract-ocr.googlecode.com/files/tesseract-ocr-${majVersion}.${lang}.tar.gz";
|
||||||
inherit sha256;
|
inherit sha256;
|
||||||
};
|
};
|
||||||
in
|
in "tar xfvz ${src} -C $out/share/ --strip=1";
|
||||||
"tar xfvz ${src} -C $out/share/ --strip=1";
|
|
||||||
|
|
||||||
extraLanguages = ''
|
wantLang = name: const (enableLanguages == null || elem name enableLanguages);
|
||||||
${f "cat" "0d1smiv1b3k9ay2s05sl7q08mb3ln4w5iiiymv2cs8g8333z8jl9"}
|
|
||||||
${f "rus" "059336mkhsj9m3hwfb818xjlxkcdpy7wfgr62qwz65cx914xl709"}
|
extraLanguages = mapAttrsToList mkLang (filterAttrs wantLang {
|
||||||
${f "spa" "1c9iza5mbahd9pa7znnq8yv09v5kz3gbd2sarcgcgc1ps1jc437l"}
|
cat = "0d1smiv1b3k9ay2s05sl7q08mb3ln4w5iiiymv2cs8g8333z8jl9";
|
||||||
${f "nld" "162acxp1yb6gyki2is3ay2msalmfcsnrlsd9wml2ja05k94m6bjy"}
|
rus = "059336mkhsj9m3hwfb818xjlxkcdpy7wfgr62qwz65cx914xl709";
|
||||||
${f "eng" "1y5xf794n832s3lymzlsdm2s9nlrd2v27jjjp0fd9xp7c2ah4461"}
|
spa = "1c9iza5mbahd9pa7znnq8yv09v5kz3gbd2sarcgcgc1ps1jc437l";
|
||||||
${f "slv" "0rqng43435cly32idxm1lvxkcippvc3xpxbfizwq5j0155ym00dr"}
|
nld = "162acxp1yb6gyki2is3ay2msalmfcsnrlsd9wml2ja05k94m6bjy";
|
||||||
${f "jpn" "07v8pymd0iwyzh946lxylybda20gsw7p4fsb09jw147955x49gq9"}
|
eng = "1y5xf794n832s3lymzlsdm2s9nlrd2v27jjjp0fd9xp7c2ah4461";
|
||||||
'';
|
slv = "0rqng43435cly32idxm1lvxkcippvc3xpxbfizwq5j0155ym00dr";
|
||||||
|
jpn = "07v8pymd0iwyzh946lxylybda20gsw7p4fsb09jw147955x49gq9";
|
||||||
|
});
|
||||||
in
|
in
|
||||||
|
|
||||||
stdenv.mkDerivation rec {
|
stdenv.mkDerivation rec {
|
||||||
@ -40,7 +45,7 @@ stdenv.mkDerivation rec {
|
|||||||
'LIBLEPT_HEADERSDIR=${leptonica}/include'
|
'LIBLEPT_HEADERSDIR=${leptonica}/include'
|
||||||
'';
|
'';
|
||||||
|
|
||||||
postInstall = extraLanguages;
|
postInstall = concatStrings extraLanguages;
|
||||||
|
|
||||||
meta = {
|
meta = {
|
||||||
description = "OCR engine";
|
description = "OCR engine";
|
||||||
|
Loading…
x
Reference in New Issue
Block a user