Merge pull request #37784 from ixxie/wordfreq-and-deps

pythonPackages.wordfreq and dependencies
This commit is contained in:
Robert Schütz 2018-04-05 10:38:56 +02:00 committed by GitHub
commit addb985275
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 183 additions and 0 deletions

View File

@ -0,0 +1,33 @@
{ lib, buildPythonPackage, fetchFromGitHub, glibcLocales, python, isPy3k }:
buildPythonPackage rec {
pname = "jieba";
version = "0.39";
# no tests in PyPI tarball
src = fetchFromGitHub {
owner = "fxsjy";
repo = pname;
rev = "v${version}";
sha256 = "0hbq0l1jbgcvm58qg4p37im4jl5a9igvq3wlhlk22pmbkbvqqgzs";
};
checkInputs = [ glibcLocales ];
# UnicodeEncodeError
doCheck = isPy3k;
# Citing https://github.com/fxsjy/jieba/issues/384: "testcases is in a mess"
# So just picking random ones that currently work
checkPhase = ''
export LC_ALL=en_US.UTF-8
${python.interpreter} test/test.py
${python.interpreter} test/test_tokenize.py
'';
meta = with lib; {
description = "Chinese Words Segementation Utilities";
homepage = https://github.com/fxsjy/jieba;
license = licenses.mit;
};
}

View File

@ -0,0 +1,34 @@
{ lib
, buildPythonPackage
, marisa-trie
, pythonOlder
, fetchPypi
, nose
}:
buildPythonPackage rec {
pname = "langcodes";
version = "1.4.1";
src = fetchPypi {
inherit pname version;
sha256 = "1axdiva2qglsjmnx2ak7i6hm0yhp6kbc4lcsgn8ckwy0nq1z3kr2";
};
propagatedBuildInputs = [ marisa-trie ];
disabled = pythonOlder "3.3";
checkInputs = [ nose ];
checkPhase = ''
nosetests
'';
meta = with lib; {
description = "A toolkit for working with and comparing the standardized codes for languages, such as en for English or es for Spanish";
homepage = http://github.com/LuminosoInsight/langcodes;
license = licenses.mit;
maintainers = with maintainers; [ ixxie ];
};
}

View File

@ -0,0 +1,34 @@
{ lib
, buildPythonPackage
, fetchPypi
, pytestrunner
, pytest
, hypothesis
}:
buildPythonPackage rec {
pname = "marisa-trie";
version = "0.7.4";
src = fetchPypi {
inherit pname version;
sha256 = "1n4pxnaranbh3x2fcqxwh8j1z2918vy7i4q1z4jn75m9rkm5h8ia";
};
postPatch = ''
substituteInPlace setup.py \
--replace "hypothesis==" "hypothesis>="
'';
nativeBuildInputs = [ pytestrunner ];
checkInputs = [ pytest hypothesis ];
meta = with lib; {
description = "Static memory-efficient Trie-like structures for Python (2.x and 3.x) based on marisa-trie C++ library";
longDescription = "There are official SWIG-based Python bindings included in C++ library distribution; this package provides alternative Cython-based pip-installable Python bindings.";
homepage = https://github.com/kmike/marisa-trie;
license = licenses.mit;
maintainers = with maintainers; [ ixxie ];
};
}

View File

@ -0,0 +1,24 @@
{ lib
, buildPythonPackage
, mecab
, fetchPypi
}:
buildPythonPackage rec {
pname = "mecab-python3";
version = "0.7";
src = fetchPypi {
inherit pname version;
sha256 = "007dg4f5fby2yl7cc44x6xwvcrf2w2ifmn0rmk56ss33mhs8l6qy";
};
propagatedBuildInputs = [ mecab ];
meta = with lib; {
description = "A python wrapper for mecab: Morphological Analysis engine";
homepage = https://github.com/LuminosoInsight/wordfreq/;
license = licenses.bsd0;
maintainers = with maintainers; [ ixxie ];
};
}

View File

@ -0,0 +1,48 @@
{ lib
, buildPythonPackage
, regex
, langcodes
, ftfy
, msgpack
, mecab-python3
, jieba
, nose
, pythonOlder
, fetchFromGitHub
}:
buildPythonPackage rec {
pname = "wordfreq";
version = "2.0";
src = fetchFromGitHub {
owner = "LuminosoInsight";
repo = "wordfreq";
rev = "e3a1b470d9f8e0d82e9f179ffc41abba434b823b";
sha256 = "1wjkhhj7nxfnrghwvmvwc672s30lp4b7yr98gxdxgqcq6wdshxwv";
};
checkInputs = [ nose ];
checkPhase = ''
# These languages require additional dictionaries
nosetests -e test_japanese -e test_korean -e test_languages
'';
propagatedBuildInputs = [ regex langcodes ftfy msgpack mecab-python3 jieba ];
# patch to relax version requirements for regex
# dependency to prevent break in upgrade
postPatch = ''
substituteInPlace setup.py --replace "regex ==" "regex >="
'';
disabled = pythonOlder "3";
meta = with lib; {
description = "A library for looking up the frequencies of words in many languages, based on many sources of data";
homepage = https://github.com/LuminosoInsight/wordfreq/;
license = licenses.mit;
maintainers = with maintainers; [ ixxie ];
};
}

View File

@ -4988,6 +4988,8 @@ in {
jdcal = callPackage ../development/python-modules/jdcal { };
jieba = callPackage ../development/python-modules/jieba { };
internetarchive = callPackage ../development/python-modules/internetarchive {};
JPype1 = callPackage ../development/python-modules/JPype1 {};
@ -9177,6 +9179,8 @@ in {
marisa = pkgs.marisa;
};
marisa-trie = callPackage ../development/python-modules/marisa-trie { };
markupsafe = buildPythonPackage rec {
name = "markupsafe-${version}";
version = "1.0";
@ -10733,6 +10737,8 @@ in {
};
};
langcodes = callPackage ../development/python-modules/langcodes { };
livestreamer = buildPythonPackage rec {
version = "1.12.2";
name = "livestreamer-${version}";
@ -11320,6 +11326,8 @@ in {
propagatedBuildInputs = with self; [ six requests ];
};
mecab-python3 = callPackage ../development/python-modules/mecab-python3 { };
mox3 = buildPythonPackage rec {
name = "mox3-${version}";
version = "0.23.0";
@ -17098,6 +17106,8 @@ EOF
widgetsnbextension = callPackage ../development/python-modules/widgetsnbextension { };
wordfreq = callPackage ../development/python-modules/wordfreq { };
magic-wormhole = callPackage ../development/python-modules/magic-wormhole { };
magic-wormhole-transit-relay = callPackage ../development/python-modules/magic-wormhole-transit-relay { };