Merge pull request #112885 from alyssais/wiktionary
dictdDBs.wiktionary: 20161001 -> 20210201; refactor
This commit is contained in:
commit
5a1a7a359f
|
@ -306,6 +306,11 @@ lib.mapAttrs (n: v: v // { shortName = n; }) {
|
||||||
fullName = "GNU Free Documentation License v1.1 only";
|
fullName = "GNU Free Documentation License v1.1 only";
|
||||||
};
|
};
|
||||||
|
|
||||||
|
fdl11Plus = spdx {
|
||||||
|
spdxId = "GFDL-1.1-or-later";
|
||||||
|
fullName = "GNU Free Documentation License v1.1 or later";
|
||||||
|
};
|
||||||
|
|
||||||
fdl12Only = spdx {
|
fdl12Only = spdx {
|
||||||
spdxId = "GFDL-1.2-only";
|
spdxId = "GFDL-1.2-only";
|
||||||
fullName = "GNU Free Documentation License v1.2 only";
|
fullName = "GNU Free Documentation License v1.2 only";
|
||||||
|
|
|
@ -91,5 +91,5 @@ in rec {
|
||||||
locale = "en_UK";
|
locale = "en_UK";
|
||||||
};
|
};
|
||||||
wordnet = callPackage ./dictd-wordnet.nix {};
|
wordnet = callPackage ./dictd-wordnet.nix {};
|
||||||
wiktionary = callPackage ./dictd-wiktionary.nix {};
|
wiktionary = callPackage ./wiktionary {};
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,31 +0,0 @@
|
||||||
{lib, stdenv, fetchurl, python, dict, glibcLocales, writeScript}:
|
|
||||||
|
|
||||||
stdenv.mkDerivation rec {
|
|
||||||
version = "20161001";
|
|
||||||
pname = "dict-db-wiktionary";
|
|
||||||
data = fetchurl {
|
|
||||||
url = "http://dumps.wikimedia.org/enwiktionary/${version}/enwiktionary-${version}-pages-articles.xml.bz2";
|
|
||||||
sha256 = "0g3k7kxp2nzg0v56i4cz253af3aqvhn1lwkys2fnam51cn3yqm7m";
|
|
||||||
};
|
|
||||||
|
|
||||||
convert = ./wiktionary2dict.py;
|
|
||||||
buildInputs = [python dict glibcLocales];
|
|
||||||
|
|
||||||
builder = writeScript "wiktionary-builder.sh" ''
|
|
||||||
source $stdenv/setup
|
|
||||||
|
|
||||||
mkdir -p $out/share/dictd/
|
|
||||||
cd $out/share/dictd
|
|
||||||
|
|
||||||
python -O ${convert} ${data}
|
|
||||||
dictzip wiktionary-en.dict
|
|
||||||
echo en_US.UTF-8 > locale
|
|
||||||
'';
|
|
||||||
|
|
||||||
meta = {
|
|
||||||
description = "DICT version of English Wiktionary";
|
|
||||||
homepage = "http://en.wiktionary.org/";
|
|
||||||
maintainers = [ ];
|
|
||||||
platforms = lib.platforms.all;
|
|
||||||
};
|
|
||||||
}
|
|
|
@ -0,0 +1,8 @@
|
||||||
|
source $stdenv/setup
|
||||||
|
|
||||||
|
mkdir -p $out/share/dictd/
|
||||||
|
cd $out/share/dictd
|
||||||
|
|
||||||
|
python -O "$convert" "$src"
|
||||||
|
dictzip wiktionary-en.dict
|
||||||
|
echo en_US.UTF-8 > locale
|
|
@ -0,0 +1,25 @@
|
||||||
|
{ lib, stdenv, fetchurl, python, dict, glibcLocales }:
|
||||||
|
|
||||||
|
stdenv.mkDerivation rec {
|
||||||
|
version = "20210201";
|
||||||
|
pname = "dict-db-wiktionary";
|
||||||
|
|
||||||
|
src = fetchurl {
|
||||||
|
url = "https://dumps.wikimedia.org/enwiktionary/${version}/enwiktionary-${version}-pages-articles.xml.bz2";
|
||||||
|
sha256 = "0dc34cbadsg0f6lhfcyx0np7zjnlg6837piqhlvnn0b45xnzn0cs";
|
||||||
|
};
|
||||||
|
|
||||||
|
convert = ./wiktionary2dict.py;
|
||||||
|
buildInputs = [ python dict glibcLocales ];
|
||||||
|
builder = ./builder.sh;
|
||||||
|
|
||||||
|
passthru.updateScript = ./update.sh;
|
||||||
|
|
||||||
|
meta = with lib; {
|
||||||
|
description = "DICT version of English Wiktionary";
|
||||||
|
homepage = "http://en.wiktionary.org/";
|
||||||
|
maintainers = with maintainers; [ qyliss ];
|
||||||
|
platforms = platforms.all;
|
||||||
|
license = with licenses; [ cc-by-sa-30 fdl11Plus ];
|
||||||
|
};
|
||||||
|
}
|
|
@ -0,0 +1,42 @@
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
from html.parser import HTMLParser
|
||||||
|
from os.path import abspath, dirname
|
||||||
|
from urllib.request import urlopen
|
||||||
|
|
||||||
|
class WiktionaryLatestVersionParser(HTMLParser):
|
||||||
|
def __init__(self, current_version, *args, **kwargs):
|
||||||
|
self.latest_version = current_version
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def handle_starttag(self, tag, attrs):
|
||||||
|
if tag != 'a':
|
||||||
|
return
|
||||||
|
|
||||||
|
href = dict(attrs)['href'][0:-1]
|
||||||
|
if href == 'latest':
|
||||||
|
return
|
||||||
|
|
||||||
|
self.latest_version = max(self.latest_version, href)
|
||||||
|
|
||||||
|
|
||||||
|
def nix_prefetch_url(url, algo='sha256'):
|
||||||
|
"""Prefetches the content of the given URL."""
|
||||||
|
print(f'nix-prefetch-url {url}')
|
||||||
|
out = subprocess.check_output(['nix-prefetch-url', '--type', algo, url])
|
||||||
|
return out.decode('utf-8').rstrip()
|
||||||
|
|
||||||
|
|
||||||
|
current_version = subprocess.check_output([
|
||||||
|
'nix', 'eval', '--raw',
|
||||||
|
'-f', dirname(abspath(__file__)) + '/../../../..',
|
||||||
|
'dictdDBs.wiktionary.version',
|
||||||
|
]).decode('utf-8')
|
||||||
|
|
||||||
|
parser = WiktionaryLatestVersionParser(current_version)
|
||||||
|
|
||||||
|
with urlopen('https://dumps.wikimedia.org/enwiktionary/') as resp:
|
||||||
|
parser.feed(resp.read().decode('utf-8'))
|
||||||
|
|
||||||
|
print(parser.latest_version)
|
|
@ -0,0 +1,7 @@
|
||||||
|
#! /usr/bin/env nix-shell
|
||||||
|
#! nix-shell -i bash -p common-updater-scripts python3
|
||||||
|
|
||||||
|
set -ueo pipefail
|
||||||
|
|
||||||
|
version="$(python "$(dirname "${BASH_SOURCE[0]}")"/latest_version.py)"
|
||||||
|
update-source-version dictdDBs.wiktionary "$version"
|
Loading…
Reference in New Issue