nixpkgs/pkgs/development/python-modules/txtai/default.nix
2024-10-10 10:06:16 -07:00

253 lines
4.6 KiB
Nix

{
lib,
buildPythonPackage,
pythonOlder,
fetchFromGitHub,
setuptools,
# propagated build input
faiss,
torch,
transformers,
huggingface-hub,
numpy,
pyyaml,
regex,
# optional-dependencies
aiohttp,
fastapi,
uvicorn,
# TODO add apache-libcloud
# , apache-libcloud
rich,
duckdb,
pillow,
networkx,
python-louvain,
onnx,
onnxruntime,
soundfile,
scipy,
ttstokenizer,
beautifulsoup4,
nltk,
pandas,
tika,
imagehash,
timm,
fasttext,
sentencepiece,
accelerate,
onnxmltools,
annoy,
hnswlib,
# TODO add pymagnitude-lite
#, pymagnitude-lite
scikit-learn,
sentence-transformers,
croniter,
openpyxl,
requests,
xmltodict,
pgvector,
sqlite-vec,
python-multipart,
# native check inputs
pytestCheckHook,
# check inputs
httpx,
msgpack,
sqlalchemy,
}:
let
version = "7.4.0";
api = [
aiohttp
fastapi
pillow
python-multipart
uvicorn
];
ann = [
annoy
hnswlib
pgvector
sqlalchemy
sqlite-vec
];
# cloud = [ apache-libcloud ];
console = [ rich ];
database = [
duckdb
pillow
];
graph = [
networkx
python-louvain
];
model = [
onnx
onnxruntime
];
pipeline-audio = [
onnx
onnxruntime
soundfile
scipy
ttstokenizer
];
pipeline-data = [
beautifulsoup4
nltk
pandas
tika
];
pipeline-image = [
imagehash
pillow
timm
];
pipeline-text = [
fasttext
sentencepiece
];
pipeline-train = [
accelerate
onnx
onnxmltools
onnxruntime
];
pipeline = pipeline-audio ++ pipeline-data ++ pipeline-image ++ pipeline-text ++ pipeline-train;
similarity = [
annoy
fasttext
hnswlib
# pymagnitude-lite
scikit-learn
sentence-transformers
];
workflow = [
# apache-libcloud
croniter
openpyxl
pandas
pillow
requests
xmltodict
];
all = api ++ ann ++ console ++ database ++ graph ++ model ++ pipeline ++ similarity ++ workflow;
optional-dependencies = {
inherit
ann
api
console
database
graph
model
pipeline-audio
pipeline-image
pipeline-text
pipeline-train
pipeline
similarity
workflow
all
;
};
in
buildPythonPackage {
pname = "txtai";
inherit version;
pyproject = true;
disabled = pythonOlder "3.8";
src = fetchFromGitHub {
owner = "neuml";
repo = "txtai";
rev = "refs/tags/v${version}";
hash = "sha256-DQB12mFUMsKJ8cACowI1Vc7k2n1npdTOQknRmHd5EIM=";
};
buildTools = [ setuptools ];
pythonRemoveDeps = [
# We call it faiss, not faiss-cpu.
"faiss-cpu"
];
dependencies = [
faiss
torch
transformers
huggingface-hub
numpy
pyyaml
regex
];
optional-dependencies = optional-dependencies;
# The Python imports check runs huggingface-hub which needs a writable directory.
# `pythonImportsCheck` runs in the installPhase (before checkPhase).
preInstall = ''
export HF_HOME=$(mktemp -d)
'';
pythonImportsCheck = [ "txtai" ];
nativeCheckInputs = [
pytestCheckHook
] ++ optional-dependencies.ann ++ optional-dependencies.api ++ optional-dependencies.similarity;
checkInputs = [
httpx
msgpack
python-multipart
sqlalchemy
];
# The deselected paths depend on the huggingface hub and should be run as a passthru test
# disabledTestPaths won't work as the problem is with the classes containing the tests
# (in other words, it fails on __init__)
pytestFlagsArray = [
"test/python/test*.py"
"--deselect=test/python/testcloud.py"
"--deselect=test/python/testconsole.py"
"--deselect=test/python/testembeddings.py"
"--deselect=test/python/testgraph.py"
"--deselect=test/python/testapi/testembeddings.py"
"--deselect=test/python/testapi/testpipelines.py"
"--deselect=test/python/testapi/testworkflow.py"
"--deselect=test/python/testdatabase/testclient.py"
"--deselect=test/python/testdatabase/testduckdb.py"
"--deselect=test/python/testdatabase/testencoder.py"
"--deselect=test/python/testworkflow.py"
];
disabledTests = [
# Hardcoded paths
"testInvalidTar"
"testInvalidZip"
# Downloads from Huggingface
"testPipeline"
# Not finding sqlite-vec despite being supplied
"testSQLite"
"testSQLiteCustom"
];
meta = {
description = "Semantic search and workflows powered by language models";
changelog = "https://github.com/neuml/txtai/releases/tag/v${version}";
homepage = "https://github.com/neuml/txtai";
license = lib.licenses.asl20;
maintainers = with lib.maintainers; [ happysalada ];
};
}