nixpkgs/pkgs/development/python-modules/pyarrow/default.nix
Guillaume Girol 33afbf39f6 treewide: switch to nativeCheckInputs
checkInputs used to be added to nativeBuildInputs. Now we have
nativeCheckInputs to do that instead. Doing this treewide change allows
to keep hashes identical to before the introduction of
nativeCheckInputs.
2023-01-21 12:00:00 +00:00

150 lines
3.8 KiB
Nix

{ lib
, stdenv
, buildPythonPackage
, python
, pythonOlder
, arrow-cpp
, cffi
, cloudpickle
, cmake
, cython
, fsspec
, hypothesis
, numpy
, pandas
, pytestCheckHook
, pytest-lazy-fixture
, pkg-config
, scipy
, setuptools-scm
, six
}:
let
zero_or_one = cond: if cond then 1 else 0;
_arrow-cpp = arrow-cpp.override { python3 = python; };
in
buildPythonPackage rec {
pname = "pyarrow";
inherit (_arrow-cpp) version src;
disabled = pythonOlder "3.7";
sourceRoot = "apache-arrow-${version}/python";
nativeBuildInputs = [
cmake
cython
pkg-config
setuptools-scm
];
propagatedBuildInputs = [
cffi
cloudpickle
fsspec
numpy
scipy
six
];
nativeCheckInputs = [
hypothesis
pandas
pytestCheckHook
pytest-lazy-fixture
];
PYARROW_BUILD_TYPE = "release";
PYARROW_WITH_DATASET = zero_or_one true;
PYARROW_WITH_FLIGHT = zero_or_one _arrow-cpp.enableFlight;
PYARROW_WITH_HDFS = zero_or_one true;
PYARROW_WITH_PARQUET = zero_or_one true;
PYARROW_WITH_PLASMA = zero_or_one (!stdenv.isDarwin);
PYARROW_WITH_S3 = zero_or_one _arrow-cpp.enableS3;
PYARROW_CMAKE_OPTIONS = [
"-DCMAKE_INSTALL_RPATH=${ARROW_HOME}/lib"
];
ARROW_HOME = _arrow-cpp;
PARQUET_HOME = _arrow-cpp;
ARROW_TEST_DATA = lib.optionalString doCheck _arrow-cpp.ARROW_TEST_DATA;
doCheck = true;
dontUseCmakeConfigure = true;
__darwinAllowLocalNetworking = true;
preBuild = ''
export PYARROW_PARALLEL=$NIX_BUILD_CORES
'';
pytestFlagsArray = [
# Deselect a single test because pyarrow prints a 2-line error message where
# only a single line is expected. The additional line of output comes from
# the glog library which is an optional dependency of arrow-cpp that is
# enabled in nixpkgs.
# Upstream Issue: https://issues.apache.org/jira/browse/ARROW-11393
"--deselect=pyarrow/tests/test_memory.py::test_env_var"
# these tests require access to s3 via the internet
"--deselect=pyarrow/tests/test_fs.py::test_resolve_s3_region"
"--deselect=pyarrow/tests/test_fs.py::test_s3_real_aws"
"--deselect=pyarrow/tests/test_fs.py::test_s3_real_aws_region_selection"
"--deselect=pyarrow/tests/test_fs.py::test_s3_options"
# Flaky test
"--deselect=pyarrow/tests/test_flight.py::test_roundtrip_errors"
"--deselect=pyarrow/tests/test_pandas.py::test_threaded_pandas_import"
# Flaky test, works locally but not on Hydra
"--deselect=pyarrow/tests/test_csv.py::TestThreadedCSVTableRead::test_cancellation"
] ++ lib.optionals stdenv.isDarwin [
# Requires loopback networking
"--deselect=pyarrow/tests/test_ipc.py::test_socket_"
"--deselect=pyarrow/tests/test_flight.py::test_never_sends_data"
"--deselect=pyarrow/tests/test_flight.py::test_large_descriptor"
"--deselect=pyarrow/tests/test_flight.py::test_large_metadata_client"
"--deselect=pyarrow/tests/test_flight.py::test_none_action_side_effect"
];
dontUseSetuptoolsCheck = true;
preCheck = ''
shopt -s extglob
rm -r pyarrow/!(conftest.py|tests)
mv pyarrow/conftest.py pyarrow/tests/parent_conftest.py
substituteInPlace pyarrow/tests/conftest.py --replace ..conftest .parent_conftest
'' + lib.optionalString stdenv.isDarwin ''
# OSError: [Errno 24] Too many open files
ulimit -n 1024
'';
pythonImportsCheck = [
"pyarrow"
] ++ map (module: "pyarrow.${module}") ([
"compute"
"csv"
"dataset"
"feather"
"flight"
"fs"
"hdfs"
"json"
"parquet"
] ++ lib.optionals (!stdenv.isDarwin) [
"plasma"
]);
meta = with lib; {
description = "A cross-language development platform for in-memory data";
homepage = "https://arrow.apache.org/";
license = licenses.asl20;
platforms = platforms.unix;
maintainers = with maintainers; [ veprbl cpcloud ];
};
}