diff --git a/pkgs/tools/text/gpt2tc/0001-add-python-shebang.patch b/pkgs/tools/text/gpt2tc/0001-add-python-shebang.patch new file mode 100644 index 00000000000..b47b444009a --- /dev/null +++ b/pkgs/tools/text/gpt2tc/0001-add-python-shebang.patch @@ -0,0 +1,10 @@ +diff --git a/gpt2convert.py b/gpt2convert.py +index 34ca909..6e6cac5 100644 +--- a/gpt2convert.py ++++ b/gpt2convert.py +@@ -1,3 +1,5 @@ ++#!/usr/bin/env python3 ++ + import sys + import tensorflow as tf + import numpy as np diff --git a/pkgs/tools/text/gpt2tc/0002-fix-download-url.patch b/pkgs/tools/text/gpt2tc/0002-fix-download-url.patch new file mode 100644 index 00000000000..fd8f93834e5 --- /dev/null +++ b/pkgs/tools/text/gpt2tc/0002-fix-download-url.patch @@ -0,0 +1,11 @@ +diff --git a/download_model.sh b/download_model.sh +index 9cb401f..ad1dc62 100755 +--- a/download_model.sh ++++ b/download_model.sh +@@ -13,5 +13,5 @@ mkdir -p models/$model + for filename in checkpoint encoder.json hparams.json model.ckpt.data-00000-of-00001 model.ckpt.index model.ckpt.meta vocab.bpe; do + fetch=$model/$filename + echo "Fetching $fetch" +- curl --output models/$fetch https://storage.googleapis.com/gpt-2/models/$fetch ++ curl --output models/$fetch https://openaipublic.blob.core.windows.net/gpt-2/models/$fetch + done diff --git a/pkgs/tools/text/gpt2tc/default.nix b/pkgs/tools/text/gpt2tc/default.nix new file mode 100644 index 00000000000..eb79d9009f3 --- /dev/null +++ b/pkgs/tools/text/gpt2tc/default.nix @@ -0,0 +1,47 @@ +{ lib, stdenv, fetchurl, autoPatchelfHook, python3 }: + +stdenv.mkDerivation rec { + pname = "gpt2tc"; + version = "2020-12-30"; + + src = fetchurl { + url = "https://bellard.org/nncp/gpt2tc-${version}.tar.gz"; + hash = "sha256-KFcl7E6iGx50JaJI1jwzKAdkrkbNngPbrEA/ZSyG+uY="; + }; + + patches = [ + # Add a shebang to the python script so that nix detects it as such and + # wraps it properly. Otherwise, it runs in shell and freezes the system. + ./0001-add-python-shebang.patch + + # Update the source URL for the models because the old one is down. + ./0002-fix-download-url.patch + ]; + + nativeBuildInputs = [ autoPatchelfHook ]; + + buildInputs = [ + (python3.withPackages (p: with p; [ numpy tensorflow ])) + ]; + + dontBuild = true; + + installPhase = '' + runHook preInstall + + install -D -m755 -t $out/bin gpt2tc + install -T -m755 download_model.sh $out/bin/gpt2-download-model + install -T -m755 gpt2convert.py $out/bin/gpt2-convert + install -D -m644 -t $out/share/gpt2tc readme.txt gpt2vocab.txt Changelog + + runHook postInstall + ''; + + meta = with lib; { + description = "Text completion and compression using GPT-2"; + homepage = "https://bellard.org/nncp/gpt2tc.html"; + license = licenses.unfree; + platforms = [ "x86_64-linux" ]; + maintainers = with maintainers; [ anna328p ]; + }; +} diff --git a/pkgs/top-level/all-packages.nix b/pkgs/top-level/all-packages.nix index 5e0794aa6ed..fa645441fa8 100644 --- a/pkgs/top-level/all-packages.nix +++ b/pkgs/top-level/all-packages.nix @@ -5059,6 +5059,8 @@ in gparted = callPackage ../tools/misc/gparted { }; + gpt2tc = callPackage ../tools/text/gpt2tc { }; + ldmtool = callPackage ../tools/misc/ldmtool { }; gpodder = callPackage ../applications/audio/gpodder { };