From 8c2895400fd59058b5fad86308d38dcaace650d4 Mon Sep 17 00:00:00 2001 From: Daniel Fullmer Date: Sat, 9 Sep 2017 01:09:03 -0400 Subject: [PATCH] k2pdfopt: 2.32 -> 2.42 --- pkgs/applications/misc/k2pdfopt/default.nix | 167 ++++++++---------- .../applications/misc/k2pdfopt/k2pdfopt.patch | 152 ++++++++-------- .../misc/k2pdfopt/tesseract.patch | 13 +- 3 files changed, 162 insertions(+), 170 deletions(-) diff --git a/pkgs/applications/misc/k2pdfopt/default.nix b/pkgs/applications/misc/k2pdfopt/default.nix index 7c0d615f366..587e6a588a7 100644 --- a/pkgs/applications/misc/k2pdfopt/default.nix +++ b/pkgs/applications/misc/k2pdfopt/default.nix @@ -1,105 +1,92 @@ -# Build procedure lifted from https://aur.archlinux.org/packages/k2/k2pdfopt/PKGBUILD -{ stdenv, fetchzip, fetchurl, writeScript, libX11, libXext, autoconf, automake, libtool - , leptonica, libpng, libtiff, zlib, openjpeg, freetype, jbig2dec, djvulibre - , openssl }: +{ stdenv, fetchzip, fetchurl, fetchpatch, cmake, pkgconfig +, zlib, libpng +, enableGSL ? true, gsl +, enableGhostScript ? true, ghostscript +, enableMuPDF ? true, jbig2dec, openjpeg, freetype, harfbuzz, mupdf +, enableJPEG2K ? true, jasper +, enableDJVU ? true, djvulibre +, enableGOCR ? false, gocr # Disabled by default due to crashes +, enableTesseract ? true, leptonica, tesseract +}: -let - mupdf_src = fetchurl { - url = http://www.mupdf.com/downloads/archive/mupdf-1.6-source.tar.gz; - sha256 = "0qx51rj6alzcagcixm59rvdpm54w6syrwr4184v439jh14ryw4wq"; - }; +with stdenv.lib; - tess_src = fetchurl { - url = http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.02.tar.gz; - sha256 = "0g81m9y4iydp7kgr56mlkvjdwpp3mb01q385yhdnyvra7z5kkk96"; - }; - - gocr_src = fetchurl { - url = http://www-e.uni-magdeburg.de/jschulen/ocr/gocr-0.49.tar.gz; - sha256 = "06hpzp7rkkwfr1fvmc8kcfz9v490i9yir7f7imh13gmka0fr6afc"; - }; - -in stdenv.mkDerivation rec { +stdenv.mkDerivation rec { name = "k2pdfopt-${version}"; - version = "2.32"; + version = "2.42"; + src = fetchzip { url = "http://www.willus.com/k2pdfopt/src/k2pdfopt_v${version}_src.zip"; - sha256 = "1v3cj5bwpjvy7s66sfqcmkxs91f7nxaykjpdjm2wn87vn6q7n19m"; + sha256 = "1zag4jmkr0qrcpqqb5davmvdrabhdyz87q4zz0xpfkl6xw2dn9bk"; }; - buildInputs = [ libX11 libXext autoconf automake libtool leptonica libpng libtiff zlib - openjpeg freetype jbig2dec djvulibre openssl ]; - NIX_LDFLAGS = "-lX11 -lXext"; + patches = [ ./k2pdfopt.patch ]; - hardeningDisable = [ "format" ]; + nativeBuildInputs = [ cmake pkgconfig ]; - k2_pa = ./k2pdfopt.patch; - tess_pa = ./tesseract.patch; + buildInputs = + let + mupdf_modded = mupdf.overrideAttrs (attrs: { + name = "mupdf-1.10a"; + src = fetchurl { + url = "http://mupdf.com/downloads/archive/mupdf-1.10a-source.tar.gz"; + sha256 = "0dm8wcs8i29aibzkqkrn8kcnk4q0kd1v66pg48h5c3qqp4v1zk5a"; + }; + # Excluded the pdf-*.c files, since they mostly just broke the #includes + prePatch = '' + cp ${src}/mupdf_mod/{font,stext-device,string}.c source/fitz/ + cp ${src}/mupdf_mod/font-win32.c source/pdf/ + ''; + # Patches from previous 1.10a version in nixpkgs + patches = [ + # Compatibility with new openjpeg + (fetchpatch { + name = "mupdf-1.9a-openjpeg-2.1.1.patch"; + url = "https://git.archlinux.org/svntogit/community.git/plain/mupdf/trunk/0001-mupdf-openjpeg.patch?id=5a28ad0a8999a9234aa7848096041992cc988099"; + sha256 = "1i24qr4xagyapx4bijjfksj4g3bxz8vs5c2mn61nkm29c63knp75"; + }) - builder = writeScript "builder.sh" '' - . ${stdenv}/setup - set -e + (fetchurl { + name = "CVE-2017-5896.patch"; + url = "http://git.ghostscript.com/?p=mupdf.git;a=patch;h=2c4e5867ee699b1081527bc6c6ea0e99a35a5c27"; + sha256 = "14k7x47ifx82sds1c06ibzbmcparfg80719jhgwjk6w1vkh4r693"; + }) + ]; + }); + leptonica_modded = leptonica.overrideAttrs (attrs: { + prePatch = '' + cp ${src}/leptonica_mod/* src/ + ''; + }); + tesseract_modded = tesseract.overrideAttrs (attrs: { + prePatch = '' + cp ${src}/tesseract_mod/{ambigs.cpp,ccutil.h,ccutil.cpp} ccutil/ + cp ${src}/tesseract_mod/dawg.cpp api/ + cp ${src}/tesseract_mod/{imagedata.cpp,tessdatamanager.cpp} ccstruct/ + cp ${src}/tesseract_mod/openclwrapper.h opencl/ + cp ${src}/tesseract_mod/{tessedit.cpp,thresholder.cpp} ccmain/ + cp ${src}/tesseract_mod/tess_lang_mod_edge.h cube/ + cp ${src}/tesseract_mod/tesscapi.cpp api/ + cp ${src}/include_mod/{tesseract.h,leptonica.h} api/ + ''; + patches = [ ./tesseract.patch ]; + }); + in + [ zlib libpng ] ++ + optional enableGSL gsl ++ + optional enableGhostScript ghostscript ++ + optionals enableMuPDF [ jbig2dec openjpeg freetype harfbuzz mupdf_modded ] ++ + optionals enableJPEG2K [ jasper ] ++ + optional enableDJVU djvulibre ++ + optional enableGOCR gocr ++ + optionals enableTesseract [ leptonica_modded tesseract_modded ]; - plibs=`pwd`/patched_libraries + dontUseCmakeBuildDir = true; - tar zxf ${mupdf_src} - cp $src/mupdf_mod/font.c $src/mupdf_mod/string.c mupdf-1.6-source/source/fitz/ - cp $src/mupdf_mod/pdf-* mupdf-1.6-source/source/pdf + cmakeFlags = [ "-DCMAKE_C_FLAGS=-I${src}/include_mod" ]; - tar zxf ${tess_src} - cp $src/tesseract_mod/dawg.cpp tesseract-ocr/dict - cp $src/tesseract_mod/tessdatamanager.cpp tesseract-ocr/ccutil - cp $src/tesseract_mod/tessedit.cpp tesseract-ocr/ccmain - cp $src/tesseract_mod/tesscapi.cpp tesseract-ocr/api - cp $src/include_mod/tesseract.h $src/include_mod/leptonica.h tesseract-ocr/api - - cp -a $src k2pdfopt_v2.21 - chmod -R +w k2pdfopt_v2.21 - - patch -p0 -i $tess_pa - patch -p0 -i $k2_pa - - cd tesseract-ocr - ./autogen.sh - substituteInPlace "configure" \ - --replace 'LIBLEPT_HEADERSDIR="/usr/local/include /usr/include"' \ - 'LIBLEPT_HEADERSDIR=${leptonica}/include' - ./configure --prefix=$plibs --disable-shared - make install - - cd .. - tar zxf ${gocr_src} - cd gocr-0.49 - ./configure - cp src/{gocr.h,pnm.h,unicode.h,list.h} $plibs/include - cp include/config.h $plibs/include - make libs - cp src/libPgm2asc.a $plibs/lib - - cd ../mupdf-1.6-source - make prefix=$plibs install - install -Dm644 build/debug/libmujs.a $plibs/lib - - cd ../k2pdfopt_v2.21/k2pdfoptlib - gcc -Ofast -Wall -c *.c -I ../include_mod/ -I $plibs/include \ - -I . -I ../willuslib - ar rcs libk2pdfopt.a *.o - - cd ../willuslib - gcc -Ofast -Wall -c *.c -I ../include_mod/ -I $plibs/include - ar rcs libwillus.a *.o - - cd .. - gcc -Wall -Ofast -o k2pdfopt.o -c k2pdfopt.c -I k2pdfoptlib/ -I willuslib/ \ - -I include_mod/ -I $plibs/include - g++ -Ofast k2pdfopt.o -o k2pdfopt -I willuslib/ -I k2pdfoptlib/ -I include_mod/ \ - -I $plibs/include -L $plibs/lib/ \ - -L willuslib/ -L k2pdfoptlib/ -lk2pdfopt -lwillus -ldjvulibre -lz -lmupdf \ - -ljbig2dec -ljpeg -lopenjp2 -lpng -lfreetype -lpthread -lmujs \ - -lPgm2asc -llept -ltesseract -lcrypto - - mkdir -p $out/bin - cp k2pdfopt $out/bin + installPhase = '' + install -D -m 755 k2pdfopt $out/bin/k2pdfopt ''; meta = with stdenv.lib; { @@ -107,7 +94,7 @@ in stdenv.mkDerivation rec { homepage = http://www.willus.com/k2pdfopt; license = licenses.gpl3; platforms = platforms.linux; - maintainers = [ maintainers.bosu ]; + maintainers = with maintainers; [ bosu danielfullmer ]; }; } diff --git a/pkgs/applications/misc/k2pdfopt/k2pdfopt.patch b/pkgs/applications/misc/k2pdfopt/k2pdfopt.patch index 00ac5770ea4..a393e30fc72 100644 --- a/pkgs/applications/misc/k2pdfopt/k2pdfopt.patch +++ b/pkgs/applications/misc/k2pdfopt/k2pdfopt.patch @@ -1,95 +1,99 @@ -diff -aur k2pdfopt_v2.21/willuslib/array.c k2pdfopt_v2.21.new/willuslib/array.c ---- k2pdfopt_v2.21/willuslib/array.c 2014-05-23 16:29:58.000000000 -0300 -+++ k2pdfopt_v2.21.new/willuslib/array.c 2014-07-26 11:35:49.829825567 -0300 -@@ -1055,7 +1055,7 @@ - void arrayf_sort(float *a,int n) +diff --git a/CMakeLists.txt b/CMakeLists.txt +index 4a2378b..502c477 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -52,6 +52,7 @@ endif(JPEG_FOUND) + include(FindJasper) + if(JASPER_FOUND) + set(HAVE_JASPER_LIB 1) ++ set(K2PDFOPT_LIB ${K2PDFOPT_LIB} ${JASPER_LIBRARY}) + endif(JASPER_FOUND) - { -- sort(a,(long)n); -+ willus_sort(a,(long)n); - } + # paths from willuslib/wgs.c +@@ -66,8 +67,12 @@ else() + message(STATUS "Could NOT find ghostscript executable") + endif(GHOSTSCRIPT_EXECUTABLE) + +-# willus.h +-# HAVE_GSL_LIB ++pkg_check_modules(GSL gsl) ++if(MUPDF_FOUND) ++ set(HAVE_GSL_LIB 1) ++ include_directories(SYSTEM ${GSL_INCLUDEDIR}) ++ set(K2PDFOPT_LIB ${K2PDFOPT_LIB} ${GSL_LDFLAGS}) ++endif(MUPDF_FOUND) -diff -aur k2pdfopt_v2.21/willuslib/math.c k2pdfopt_v2.21.new/willuslib/math.c ---- k2pdfopt_v2.21/willuslib/math.c 2013-08-15 21:33:50.000000000 -0300 -+++ k2pdfopt_v2.21.new/willuslib/math.c 2014-07-26 11:36:02.853170659 -0300 -@@ -532,7 +532,7 @@ + # libfreetype6 (>= 2.3.9), libjbig2dec0, libjpeg8 (>= 8c), libx11-6, libxext6, zlib1g (>= 1:1.2.0) +@@ -80,7 +85,7 @@ if(MUPDF_FOUND) + include_directories(SYSTEM ${MUPDF_INCLUDEDIR}) + message(STATUS "mupdf libraries: ${MUPDF_LDFLAGS}") + set(K2PDFOPT_LIB ${K2PDFOPT_LIB} ${MUPDF_LDFLAGS} +- -lmupdf-js-none -lopenjpeg -ljbig2dec -ljpeg -lfreetype ++ -lopenjp2 -ljbig2dec -ljpeg -lfreetype -lharfbuzz + ) + endif(MUPDF_FOUND) +@@ -91,9 +96,25 @@ if(DJVU_FOUND) + set(K2PDFOPT_LIB ${K2PDFOPT_LIB} ${DJVU_LDFLAGS}) + endif(DJVU_FOUND) +-# HAVE_GOCR_LIB +-# HAVE_LEPTONICA_LIB +-# HAVE_TESSERACT_LIB ++find_library(GOCR_LIB NAMES Pgm2asc) ++if(GOCR_LIB) ++ set(HAVE_GOCR_LIB 1) ++ set(K2PDFOPT_LIB ${K2PDFOPT_LIB} ${GOCR_LIB}) ++endif(GOCR_LIB) ++ ++pkg_check_modules(LEPTONICA lept) ++if(LEPTONICA_FOUND) ++ set(HAVE_LEPTONICA_LIB 1) ++ include_directories(SYSTEM ${LEPTONICA_INCLUDEDIR}) ++ set(K2PDFOPT_LIB ${K2PDFOPT_LIB} ${LEPTONICA_LDFLAGS}) ++endif(LEPTONICA_FOUND) ++ ++pkg_check_modules(TESSERACT tesseract) ++if(TESSERACT_FOUND) ++ set(HAVE_TESSERACT_LIB 1) ++ include_directories(SYSTEM ${TESSERACT_INCLUDEDIR}) ++ set(K2PDFOPT_LIB ${K2PDFOPT_LIB} ${TESSERACT_LDFLAGS}) ++endif(TESSERACT_FOUND) --void sort(float *x,int n) -+void willus_sort(float *x,int n) + # ---- Describe project - { - int top,n1; -diff -aur k2pdfopt_v2.21/willuslib/ocrjocr.c k2pdfopt_v2.21.new/willuslib/ocrjocr.c ---- k2pdfopt_v2.21/willuslib/ocrjocr.c 2012-11-12 13:09:42.000000000 -0300 -+++ k2pdfopt_v2.21.new/willuslib/ocrjocr.c 2014-07-26 11:36:46.699837185 -0300 +diff --git a/willuslib/CMakeLists.txt b/willuslib/CMakeLists.txt +index 463bbc9..8043db5 100644 +--- a/willuslib/CMakeLists.txt ++++ b/willuslib/CMakeLists.txt +@@ -6,7 +6,7 @@ include_directories(..) + set(WILLUSLIB_SRC + ansi.c array.c bmp.c bmpdjvu.c bmpmupdf.c dtcompress.c filelist.c + fontdata.c fontrender.c gslpolyfit.c linux.c math.c mem.c ocr.c +- ocrjocr.c ocrtess.c pdfwrite.c point2d.c render.c strbuf.c string.c ++ ocrgocr.c ocrtess.c pdfwrite.c point2d.c render.c strbuf.c string.c + token.c wfile.c wgs.c wgui.c willusversion.c win.c winbmp.c + wincomdlg.c winmbox.c winshell.c wmupdf.c wmupdfinfo.c wpdf.c wsys.c + wzfile.c wleptonica.c +diff --git a/willuslib/ocrgocr.c b/willuslib/ocrgocr.c +index 6027e9a..fbe10f0 100644 +--- a/willuslib/ocrgocr.c ++++ b/willuslib/ocrgocr.c @@ -29,6 +29,8 @@ #ifdef HAVE_GOCR_LIB #include -+job_t *JOB; ++job_t *OCR_JOB; + /* ** bmp8 must be grayscale ** (x1,y1) and (x2,y2) from top left of bitmap -@@ -66,6 +68,7 @@ +@@ -63,6 +65,7 @@ void gocr_single_word_from_bmp8(char *text,int maxlen,WILLUSBITMAP *bmp8, h=y2-y1+1; dh=h+bw*2; job=&_job; -+ JOB=job; ++ OCR_JOB=job; job_init(job); job_init_image(job); // willus_mem_alloc_warn((void **)&job->src.p.p,w*h,funcname,10); -diff -aur k2pdfopt_v2.21/willuslib/string.c k2pdfopt_v2.21.new/willuslib/string.c ---- k2pdfopt_v2.21/willuslib/string.c 2014-02-03 00:37:44.000000000 -0300 -+++ k2pdfopt_v2.21.new/willuslib/string.c 2014-07-26 11:37:01.766506277 -0300 -@@ -81,7 +81,7 @@ - ** Returns NULL if EOF, otherwise returns pointer to the string. - ** - */ --char *get_line(char *buf,int max,FILE *f) -+char *willus_get_line(char *buf,int max,FILE *f) - - { - int i; -diff -aur k2pdfopt_v2.21/willuslib/willus.h k2pdfopt_v2.21.new/willuslib/willus.h ---- k2pdfopt_v2.21/willuslib/willus.h 2014-07-25 15:03:51.000000000 -0300 -+++ k2pdfopt_v2.21.new/willuslib/willus.h 2014-07-26 11:37:56.316506038 -0300 -@@ -214,9 +214,6 @@ - ** CMAKE handles the defines, not this source - ** (Mod from Dirk Thierbach, 31-Dec-2013) - */ --#ifdef USE_CMAKE --#include "config.h" --#else /* USE_CMAKE */ - - #ifndef HAVE_Z_LIB - #define HAVE_Z_LIB -@@ -268,7 +265,6 @@ - #undef HAVE_GSL_LIB - #endif - --#endif /* USE_CMAKE */ - /* - ** Consistency check - */ -@@ -533,7 +529,7 @@ - int *n,FILE *err); - int readxyz_ex (char *filename,double **x,double **y,double **z, - int *n,FILE *err,int ignore_after_semicolon); --void sort (float *x,int n); -+void willus_sort (float *x,int n); - void sortd (double *x,int n); - void sorti (int *x,int n); - void sortxy (float *x,float *y,int n); -@@ -602,7 +598,7 @@ - /* string.c */ - void clean_line (char *buf); - void clean_line_end(char *buf); --char *get_line (char *buf,int max,FILE *f); -+char *willus_get_line (char *buf,int max,FILE *f); - char *get_line_cf (char *buf,int max,FILE *f); - int mem_get_line_cf(char *buf,int maxlen,char *cptr,long *cindex,long csize); - int in_string (char *buffer,char *pattern); diff --git a/pkgs/applications/misc/k2pdfopt/tesseract.patch b/pkgs/applications/misc/k2pdfopt/tesseract.patch index 5cb6e0fe317..4827daa1a90 100644 --- a/pkgs/applications/misc/k2pdfopt/tesseract.patch +++ b/pkgs/applications/misc/k2pdfopt/tesseract.patch @@ -1,12 +1,13 @@ -diff -aur tesseract-ocr/api/Makefile.am tesseract-ocr.new/api/Makefile.am ---- tesseract-ocr/api/Makefile.am 2012-10-09 14:18:39.000000000 -0300 -+++ tesseract-ocr.new/api/Makefile.am 2014-03-20 18:43:13.926030341 -0300 -@@ -36,7 +36,7 @@ +diff --git a/api/Makefile.am b/api/Makefile.am +index d8c1e54..46ead13 100644 +--- a/api/Makefile.am ++++ b/api/Makefile.am +@@ -42,7 +42,7 @@ libtesseract_api_la_CPPFLAGS = $(AM_CPPFLAGS) if VISIBILITY libtesseract_api_la_CPPFLAGS += -DTESS_EXPORTS endif --libtesseract_api_la_SOURCES = baseapi.cpp capi.cpp -+libtesseract_api_la_SOURCES = baseapi.cpp capi.cpp tesscapi.cpp +-libtesseract_api_la_SOURCES = baseapi.cpp capi.cpp renderer.cpp pdfrenderer.cpp ++libtesseract_api_la_SOURCES = baseapi.cpp capi.cpp renderer.cpp pdfrenderer.cpp tesscapi.cpp lib_LTLIBRARIES += libtesseract.la libtesseract_la_LDFLAGS =