From fb7acacd42658a59717bb20151ffe2b18efe508e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= Date: Tue, 18 Aug 2020 19:38:43 +0200 Subject: [PATCH] python3Packages.spacy: add passthru test I have been using the main example of the spaCy web page for testing updates of spacy (and its transitive dependencies). Let's convert this into a proper test to take out manual testing. --- .../spacy/annotation-test/annotate.py | 69 +++++++++++++++++++ .../spacy/annotation-test/default.nix | 23 +++++++ .../python-modules/spacy/default.nix | 3 + 3 files changed, 95 insertions(+) create mode 100644 pkgs/development/python-modules/spacy/annotation-test/annotate.py create mode 100644 pkgs/development/python-modules/spacy/annotation-test/default.nix diff --git a/pkgs/development/python-modules/spacy/annotation-test/annotate.py b/pkgs/development/python-modules/spacy/annotation-test/annotate.py new file mode 100644 index 00000000000..822eb8ac074 --- /dev/null +++ b/pkgs/development/python-modules/spacy/annotation-test/annotate.py @@ -0,0 +1,69 @@ +import pytest +import spacy + +en_text = ( + "When Sebastian Thrun started working on self-driving cars at " + "Google in 2007, few people outside of the company took him " + "seriously. “I can tell you very senior CEOs of major American " + "car companies would shake my hand and turn away because I wasn’t " + "worth talking to,” said Thrun, in an interview with Recode earlier " + "this week.") + + +@pytest.fixture +def en_core_web_sm(): + return spacy.load("en_core_web_sm") + + +@pytest.fixture +def doc_en_core_web_sm(en_core_web_sm): + return en_core_web_sm(en_text) + + +def test_entities(doc_en_core_web_sm): + entities = list(map(lambda e: (e.text, e.label_), + doc_en_core_web_sm.ents)) + + assert entities == [ + ('Sebastian Thrun', 'PERSON'), + ('Google', 'ORG'), ('2007', 'DATE'), + ('American', 'NORP'), + ('Thrun', 'ORG'), + ('earlier this week', 'DATE') + ] + + +def test_nouns(doc_en_core_web_sm): + assert [ + chunk.text for chunk in doc_en_core_web_sm.noun_chunks] == [ + 'Sebastian Thrun', + 'self-driving cars', + 'Google', + 'few people', + 'the company', + 'him', + 'I', + 'you', + 'very senior CEOs', + 'major American car companies', + 'my hand', + 'I', + 'Thrun', + 'an interview', + 'Recode'] + + +def test_verbs(doc_en_core_web_sm): + assert [ + token.lemma_ for token in doc_en_core_web_sm if token.pos_ == "VERB"] == [ + 'start', + 'work', + 'drive', + 'take', + 'can', + 'tell', + 'would', + 'shake', + 'turn', + 'talk', + 'say'] diff --git a/pkgs/development/python-modules/spacy/annotation-test/default.nix b/pkgs/development/python-modules/spacy/annotation-test/default.nix new file mode 100644 index 00000000000..1b066ff3c39 --- /dev/null +++ b/pkgs/development/python-modules/spacy/annotation-test/default.nix @@ -0,0 +1,23 @@ +{ stdenv, pytest, spacy_models }: + +stdenv.mkDerivation { + name = "spacy-annotation-test"; + + src = ./.; + + dontConfigure = true; + dontBuild = true; + doCheck = true; + + checkInputs = [ pytest spacy_models.en_core_web_sm ]; + + checkPhase = '' + pytest annotate.py + ''; + + installPhase = '' + touch $out + ''; + + meta.timeout = 60; +} diff --git a/pkgs/development/python-modules/spacy/default.nix b/pkgs/development/python-modules/spacy/default.nix index 322f10d00ae..077aea4dc9a 100644 --- a/pkgs/development/python-modules/spacy/default.nix +++ b/pkgs/development/python-modules/spacy/default.nix @@ -1,5 +1,6 @@ { lib , buildPythonPackage +, callPackage , fetchPypi , pythonOlder , pytest @@ -64,6 +65,8 @@ buildPythonPackage rec { pythonImportsCheck = [ "spacy" ]; + passthru.tests = callPackage ./annotation-test {}; + meta = with lib; { description = "Industrial-strength Natural Language Processing (NLP) with Python and Cython"; homepage = "https://github.com/explosion/spaCy";