diff --git a/pkgs/development/libraries/arrow-cpp/default.nix b/pkgs/development/libraries/arrow-cpp/default.nix index 8a9074ccb90..ac53ae3bbd4 100644 --- a/pkgs/development/libraries/arrow-cpp/default.nix +++ b/pkgs/development/libraries/arrow-cpp/default.nix @@ -1,6 +1,7 @@ -{ stdenv, lib, fetchurl, fetchFromGitHub, fetchpatch, fixDarwinDylibNames +{ stdenv, lib, fetchurl, fetchFromGitHub, fixDarwinDylibNames , autoconf, boost, brotli, cmake, flatbuffers, gflags, glog, gtest, lz4 -, perl, python3, rapidjson, re2, snappy, thrift, utf8proc, which, zlib, zstd +, perl, python3, rapidjson, re2, snappy, thrift, utf8proc, which, xsimd +, zlib, zstd , enableShared ? !stdenv.hostPlatform.isStatic }: @@ -15,18 +16,18 @@ let parquet-testing = fetchFromGitHub { owner = "apache"; repo = "parquet-testing"; - rev = "e31fe1a02c9e9f271e4bfb8002d403c52f1ef8eb"; - sha256 = "02f51dvx8w5mw0bx3hn70hkn55mn1m65kzdps1ifvga9hghpy0sh"; + rev = "ddd898958803cb89b7156c6350584d1cda0fe8de"; + sha256 = "0n16xqlpxn2ryp43w8pppxrbwmllx6sk4hv3ycgikfj57nd3ibc0"; }; in stdenv.mkDerivation rec { pname = "arrow-cpp"; - version = "3.0.0"; + version = "4.0.0"; src = fetchurl { url = "mirror://apache/arrow/arrow-${version}/apache-arrow-${version}.tar.gz"; - sha256 = "0yp2b02wrc3s50zd56fmpz4nhhbihp0zw329v4zizaipwlxwrhkk"; + sha256 = "1bj9jr0pgq9f2nyzqiyj3cl0hcx3c83z2ym6rpdkp59ff2zx0caa"; }; sourceRoot = "apache-arrow-${version}/cpp"; @@ -90,6 +91,10 @@ in stdenv.mkDerivation rec { "-DARROW_VERBOSE_THIRDPARTY_BUILD=ON" "-DARROW_DEPENDENCY_SOURCE=SYSTEM" "-DARROW_DEPENDENCY_USE_SHARED=${if enableShared then "ON" else "OFF"}" + "-DARROW_COMPUTE=ON" + "-DARROW_CSV=ON" + "-DARROW_DATASET=ON" + "-DARROW_JSON=ON" "-DARROW_PLASMA=ON" # Disable Python for static mode because openblas is currently broken there. "-DARROW_PYTHON=${if enableShared then "ON" else "OFF"}" @@ -111,6 +116,8 @@ in stdenv.mkDerivation rec { "-DCMAKE_INSTALL_RPATH=@loader_path/../lib" # needed for tools executables ] ++ lib.optional (!stdenv.isx86_64) "-DARROW_USE_SIMD=OFF"; + ARROW_XSIMD_URL = xsimd.src; + doInstallCheck = true; ARROW_TEST_DATA = if doInstallCheck then "${arrow-testing}/data" else null; diff --git a/pkgs/development/libraries/xsimd/default.nix b/pkgs/development/libraries/xsimd/default.nix new file mode 100644 index 00000000000..745ee9ee3fc --- /dev/null +++ b/pkgs/development/libraries/xsimd/default.nix @@ -0,0 +1,56 @@ +{ lib, stdenv, fetchFromGitHub, cmake, gtest }: +let + version = "7.5.0"; + + darwin_src = fetchFromGitHub { + owner = "xtensor-stack"; + repo = "xsimd"; + rev = version; + sha256 = "eGAdRSYhf7rbFdm8g1Tz1ZtSVu44yjH/loewblhv9Vs="; + # Avoid requiring apple_sdk. We're doing this here instead of in the patchPhase + # because this source is directly used in arrow-cpp. + # pyconfig.h defines _GNU_SOURCE to 1, so we need to stamp that out too. + # Upstream PR with a better fix: https://github.com/xtensor-stack/xsimd/pull/463 + postFetch = '' + mkdir $out + tar -xf $downloadedFile --directory=$out --strip-components=1 + substituteInPlace $out/include/xsimd/types/xsimd_scalar.hpp \ + --replace 'defined(__APPLE__)' 0 \ + --replace 'defined(_GNU_SOURCE)' 0 + ''; + }; + + src = fetchFromGitHub { + owner = "xtensor-stack"; + repo = "xsimd"; + rev = version; + sha256 = "0c9pq5vz43j99z83w3b9qylfi66mn749k1afpv5cwfxggbxvy63f"; + }; +in stdenv.mkDerivation { + pname = "xsimd"; + inherit version; + src = if stdenv.hostPlatform.isDarwin then darwin_src else src; + + nativeBuildInputs = [ cmake ]; + + cmakeFlags = [ "-DBUILD_TESTS=ON" ]; + + doCheck = true; + checkInputs = [ gtest ]; + checkTarget = "xtest"; + GTEST_FILTER = let + # Upstream Issue: https://github.com/xtensor-stack/xsimd/issues/456 + filteredTests = lib.optionals stdenv.hostPlatform.isDarwin [ + "error_gamma_test/sse_double.gamma" + "error_gamma_test/avx_double.gamma" + ]; + in "-${builtins.concatStringsSep ":" filteredTests}"; + + meta = with lib; { + description = "C++ wrappers for SIMD intrinsics"; + homepage = "https://github.com/xtensor-stack/xsimd"; + license = licenses.bsd3; + maintainers = with maintainers; [ tobim ]; + platforms = platforms.all; + }; +} diff --git a/pkgs/development/python-modules/pyarrow/default.nix b/pkgs/development/python-modules/pyarrow/default.nix index a38d5df50dd..dabe85b9043 100644 --- a/pkgs/development/python-modules/pyarrow/default.nix +++ b/pkgs/development/python-modules/pyarrow/default.nix @@ -34,12 +34,17 @@ buildPythonPackage rec { export PYARROW_PARALLEL=$NIX_BUILD_CORES ''; - # Deselect a single test because pyarrow prints a 2-line error message where - # only a single line is expected. The additional line of output comes from - # the glog library which is an optional dependency of arrow-cpp that is - # enabled in nixpkgs. - # Upstream Issue: https://issues.apache.org/jira/browse/ARROW-11393 - pytestFlagsArray = [ "--deselect=pyarrow/tests/test_memory.py::test_env_var" ]; + pytestFlagsArray = [ + # Deselect a single test because pyarrow prints a 2-line error message where + # only a single line is expected. The additional line of output comes from + # the glog library which is an optional dependency of arrow-cpp that is + # enabled in nixpkgs. + # Upstream Issue: https://issues.apache.org/jira/browse/ARROW-11393 + "--deselect=pyarrow/tests/test_memory.py::test_env_var" + # Deselect the parquet dataset write test because it erroneously fails to find the + # pyarrow._dataset module. + "--deselect=pyarrow/tests/parquet/test_dataset.py::test_write_to_dataset_filesystem" + ]; dontUseSetuptoolsCheck = true; preCheck = '' diff --git a/pkgs/top-level/all-packages.nix b/pkgs/top-level/all-packages.nix index d54ae4a179f..cd038c72939 100644 --- a/pkgs/top-level/all-packages.nix +++ b/pkgs/top-level/all-packages.nix @@ -18104,6 +18104,8 @@ in xlslib = callPackage ../development/libraries/xlslib { }; + xsimd = callPackage ../development/libraries/xsimd { }; + xvidcore = callPackage ../development/libraries/xvidcore { }; xxHash = callPackage ../development/libraries/xxHash {};