Adding training results files for some languages to tesseract to be able to do OCR directly.
svn path=/nixpkgs/trunk/; revision=26956
This commit is contained in:
parent
47c44f9315
commit
9a0a0c92c7
|
@ -1,5 +1,22 @@
|
||||||
{ stdenv, fetchurl, libtiff }:
|
{ stdenv, fetchurl, libtiff }:
|
||||||
|
|
||||||
|
let
|
||||||
|
f = lang : sha256 : let
|
||||||
|
src = fetchurl {
|
||||||
|
url = "http://tesseract-ocr.googlecode.com/files/${lang}.traineddata.gz";
|
||||||
|
inherit sha256;
|
||||||
|
};
|
||||||
|
in
|
||||||
|
"gunzip -c ${src} > $out/share/tessdata/${lang}.traineddata";
|
||||||
|
|
||||||
|
extraLanguages = ''
|
||||||
|
${f "cat" "1qndk8qygw9bq7nzn7kzgxkm3jhlq7jgvdqpj5id4rrcaavjvifw"}
|
||||||
|
${f "rus" "0yjzks189bgcmi2vr4v0l0fla11qdrw3cb1nvpxl9mdis8qr9vcc"}
|
||||||
|
${f "spa" "1q1hw3qi95q5ww3l02fbhjqacxm34cp65fkbx10wjdcg0s5p9q2x"}
|
||||||
|
${f "nld" "0cbqfhl2rwb1mg4y1140nw2vhhcilc0nk7bfbnxw6bzj1y5n49i8"}
|
||||||
|
'';
|
||||||
|
in
|
||||||
|
|
||||||
stdenv.mkDerivation {
|
stdenv.mkDerivation {
|
||||||
name = "tesseract-3.0.0";
|
name = "tesseract-3.0.0";
|
||||||
|
|
||||||
|
@ -10,6 +27,8 @@ stdenv.mkDerivation {
|
||||||
|
|
||||||
buildInputs = [ libtiff ];
|
buildInputs = [ libtiff ];
|
||||||
|
|
||||||
|
postInstall = extraLanguages;
|
||||||
|
|
||||||
meta = {
|
meta = {
|
||||||
description = "OCR engine";
|
description = "OCR engine";
|
||||||
homepage = http://code.google.com/p/tesseract-ocr/;
|
homepage = http://code.google.com/p/tesseract-ocr/;
|
||||||
|
|
Loading…
Reference in New Issue