tesseract: Allow to specify a subset of languages.

Especially useful for our OCR based VM tests, where we only need the
english language. By default the argument is null so all languages are
included. If a list of language name is passed only those languages are
enabled, for example:

tesseract.override { enableLanguages = [ "eng" "spa" ]; };

To only enable support for English and Spanish languages.

Signed-off-by: aszlig <aszlig@redmoonstudios.org>
This commit is contained in:
aszlig 2015-05-22 07:45:59 +02:00
parent 8be00dc71d
commit adb7581459
No known key found for this signature in database
GPG Key ID: D0EBD0EC8C2DC961

View File

@ -1,26 +1,31 @@
{ stdenv, fetchurl, autoconf, automake, libtool, leptonica, libpng, libtiff }: { stdenv, fetchurl, autoconf, automake, libtool, leptonica, libpng, libtiff
, enableLanguages ? null
}:
with stdenv.lib;
let let
majVersion = "3.02"; majVersion = "3.02";
version = "${majVersion}.02"; version = "${majVersion}.02";
f = lang : sha256 : let mkLang = lang: sha256: let
src = fetchurl { src = fetchurl {
url = "http://tesseract-ocr.googlecode.com/files/tesseract-ocr-${majVersion}.${lang}.tar.gz"; url = "http://tesseract-ocr.googlecode.com/files/tesseract-ocr-${majVersion}.${lang}.tar.gz";
inherit sha256; inherit sha256;
}; };
in in "tar xfvz ${src} -C $out/share/ --strip=1";
"tar xfvz ${src} -C $out/share/ --strip=1";
extraLanguages = '' wantLang = name: const (enableLanguages == null || elem name enableLanguages);
${f "cat" "0d1smiv1b3k9ay2s05sl7q08mb3ln4w5iiiymv2cs8g8333z8jl9"}
${f "rus" "059336mkhsj9m3hwfb818xjlxkcdpy7wfgr62qwz65cx914xl709"} extraLanguages = mapAttrsToList mkLang (filterAttrs wantLang {
${f "spa" "1c9iza5mbahd9pa7znnq8yv09v5kz3gbd2sarcgcgc1ps1jc437l"} cat = "0d1smiv1b3k9ay2s05sl7q08mb3ln4w5iiiymv2cs8g8333z8jl9";
${f "nld" "162acxp1yb6gyki2is3ay2msalmfcsnrlsd9wml2ja05k94m6bjy"} rus = "059336mkhsj9m3hwfb818xjlxkcdpy7wfgr62qwz65cx914xl709";
${f "eng" "1y5xf794n832s3lymzlsdm2s9nlrd2v27jjjp0fd9xp7c2ah4461"} spa = "1c9iza5mbahd9pa7znnq8yv09v5kz3gbd2sarcgcgc1ps1jc437l";
${f "slv" "0rqng43435cly32idxm1lvxkcippvc3xpxbfizwq5j0155ym00dr"} nld = "162acxp1yb6gyki2is3ay2msalmfcsnrlsd9wml2ja05k94m6bjy";
${f "jpn" "07v8pymd0iwyzh946lxylybda20gsw7p4fsb09jw147955x49gq9"} eng = "1y5xf794n832s3lymzlsdm2s9nlrd2v27jjjp0fd9xp7c2ah4461";
''; slv = "0rqng43435cly32idxm1lvxkcippvc3xpxbfizwq5j0155ym00dr";
jpn = "07v8pymd0iwyzh946lxylybda20gsw7p4fsb09jw147955x49gq9";
});
in in
stdenv.mkDerivation rec { stdenv.mkDerivation rec {
@ -40,7 +45,7 @@ stdenv.mkDerivation rec {
'LIBLEPT_HEADERSDIR=${leptonica}/include' 'LIBLEPT_HEADERSDIR=${leptonica}/include'
''; '';
postInstall = extraLanguages; postInstall = concatStrings extraLanguages;
meta = { meta = {
description = "OCR engine"; description = "OCR engine";