From c0cecd0e6007382ff6c64d3d47a947895bc36ff6 Mon Sep 17 00:00:00 2001 From: Chuck Date: Thu, 31 Oct 2019 17:00:56 -0700 Subject: [PATCH] python3Packages.libxml2: Patch to work around python3 + utf-8 itstool crash 1. Gnumeric has unbalanced XML tags in its doc translations. 2. itstool's XML error handler tries to print this error with context. 3. libxml2's context snipper treats the data as bytes, not UTF-8. 4. python3Packages.libxml2 casts the context to a UTF-8 Python string. 5. itstool dereferences a null pointer. This patch intervenes at #4. In https://bugzilla.gnome.org/show_bug.cgi?id=789714#c4 , upstream suggests that intervening at #3 would be better -- that each of the four copies of xmlParserPrintFileContextInternal() have four additional UTF-8 problems, one of which is that the caret indicator ought to count "unicode characters" not bytes. But to position a caret correctly, a character count is not sufficient -- this would need to use icu's BiDi logic (with fallback to doing something wrong when libxml2 is configured not to use icu) -- which makes a 'correct' fix a much larger project than this simple band-aid. --- .../development/libraries/libxml2/default.nix | 14 +++++++++ .../libxml2/utf8-xmlErrorFuncHandler.patch | 30 +++++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 pkgs/development/libraries/libxml2/utf8-xmlErrorFuncHandler.patch diff --git a/pkgs/development/libraries/libxml2/default.nix b/pkgs/development/libraries/libxml2/default.nix index 2d2fb89d595..7ed50f61224 100644 --- a/pkgs/development/libraries/libxml2/default.nix +++ b/pkgs/development/libraries/libxml2/default.nix @@ -14,6 +14,20 @@ stdenv.mkDerivation rec { url = "http://xmlsoft.org/sources/${pname}-${version}.tar.gz"; sha256 = "0wd881jzvqayx0ihzba29jl80k06xj9ywp16kxacdqs3064p1ywl"; }; + patches = [ + # Upstream bugs: + # https://bugzilla.gnome.org/show_bug.cgi?id=789714 + # https://gitlab.gnome.org/GNOME/libxml2/issues/64 + # Patch from https://bugzilla.opensuse.org/show_bug.cgi?id=1065270 , + # but only the UTF-8 part. + # Can also be mitigated by fixing malformed XML inputs, such as in + # https://gitlab.gnome.org/GNOME/gnumeric/merge_requests/3 . + # Other discussion: + # https://github.com/itstool/itstool/issues/22 + # https://github.com/NixOS/nixpkgs/pull/63174 + # https://github.com/NixOS/nixpkgs/pull/72342 + ./utf8-xmlErrorFuncHandler.patch + ]; outputs = [ "bin" "dev" "out" "man" "doc" ] ++ lib.optional pythonSupport "py" diff --git a/pkgs/development/libraries/libxml2/utf8-xmlErrorFuncHandler.patch b/pkgs/development/libraries/libxml2/utf8-xmlErrorFuncHandler.patch new file mode 100644 index 00000000000..9f4c99b0934 --- /dev/null +++ b/pkgs/development/libraries/libxml2/utf8-xmlErrorFuncHandler.patch @@ -0,0 +1,30 @@ +Index: libxml2-2.9.5/python/libxml.c +=================================================================== +--- libxml2-2.9.5.orig/python/libxml.c ++++ libxml2-2.9.5/python/libxml.c +@@ -1620,6 +1620,7 @@ libxml_xmlErrorFuncHandler(ATTRIBUTE_UNU + PyObject *message; + PyObject *result; + char str[1000]; ++ unsigned char *ptr = (unsigned char *)str; + + #ifdef DEBUG_ERROR + printf("libxml_xmlErrorFuncHandler(%p, %s, ...) called\n", ctx, msg); +@@ -1636,10 +1637,16 @@ libxml_xmlErrorFuncHandler(ATTRIBUTE_UNU + str[999] = 0; + va_end(ap); + ++#if PY_MAJOR_VERSION >= 3 ++ /* Ensure the error string doesn't start at UTF8 continuation. */ ++ while (*ptr && (*ptr & 0xc0) == 0x80) ++ ptr++; ++#endif ++ + list = PyTuple_New(2); + PyTuple_SetItem(list, 0, libxml_xmlPythonErrorFuncCtxt); + Py_XINCREF(libxml_xmlPythonErrorFuncCtxt); +- message = libxml_charPtrConstWrap(str); ++ message = libxml_charPtrConstWrap(ptr); + PyTuple_SetItem(list, 1, message); + result = PyEval_CallObject(libxml_xmlPythonErrorFuncHandler, list); + Py_XDECREF(list);