mirror of
https://github.com/NixOS/nixpkgs.git
synced 2024-11-25 00:12:56 +00:00
python3Packages.trafilatura: init at 1.6.3
This commit is contained in:
parent
16d9eff467
commit
8200b0b5a0
67
pkgs/development/python-modules/trafilatura/default.nix
Normal file
67
pkgs/development/python-modules/trafilatura/default.nix
Normal file
@ -0,0 +1,67 @@
|
||||
{ lib
|
||||
, buildPythonPackage
|
||||
, fetchPypi
|
||||
, pytestCheckHook
|
||||
, pythonOlder
|
||||
, certifi
|
||||
, charset-normalizer
|
||||
, courlan
|
||||
, htmldate
|
||||
, justext
|
||||
, lxml
|
||||
, urllib3
|
||||
}:
|
||||
|
||||
buildPythonPackage rec {
|
||||
pname = "trafilatura";
|
||||
version = "1.6.3";
|
||||
format = "setuptools";
|
||||
|
||||
disabled = pythonOlder "3.6";
|
||||
|
||||
src = fetchPypi {
|
||||
inherit pname version;
|
||||
hash = "sha256-Zx3W4AAOEBxLzo1w9ECLy3n8vyJ17iVZHv4z4sihYA0=";
|
||||
};
|
||||
|
||||
propagatedBuildInputs = [
|
||||
certifi
|
||||
charset-normalizer
|
||||
courlan
|
||||
htmldate
|
||||
justext
|
||||
lxml
|
||||
urllib3
|
||||
];
|
||||
|
||||
nativeCheckInputs = [ pytestCheckHook ];
|
||||
|
||||
# disable tests that require an internet connection
|
||||
disabledTests = [
|
||||
"test_download"
|
||||
"test_fetch"
|
||||
"test_redirection"
|
||||
"test_meta_redirections"
|
||||
"test_crawl_page"
|
||||
"test_whole"
|
||||
"test_probing"
|
||||
"test_cli_pipeline"
|
||||
];
|
||||
|
||||
# patch out gui cli because it is not supported in this packaging
|
||||
# nixify path to the trafilatura binary in the test suite
|
||||
postPatch = ''
|
||||
substituteInPlace setup.py --replace '"trafilatura_gui=trafilatura.gui:main",' ""
|
||||
substituteInPlace tests/cli_tests.py --replace "trafilatura_bin = 'trafilatura'" "trafilatura_bin = '$out/bin/trafilatura'"
|
||||
'';
|
||||
|
||||
pythonImportsCheck = [ "trafilatura" ];
|
||||
|
||||
meta = with lib; {
|
||||
description = "Python package and command-line tool designed to gather text on the Web";
|
||||
homepage = "https://trafilatura.readthedocs.io";
|
||||
changelog = "https://github.com/adbar/trafilatura/blob/v${version}/HISTORY.md";
|
||||
license = licenses.gpl3Plus;
|
||||
maintainers = with maintainers; [ jokatzke ];
|
||||
};
|
||||
}
|
@ -14493,6 +14493,8 @@ self: super: with self; {
|
||||
|
||||
trackpy = callPackage ../development/python-modules/trackpy { };
|
||||
|
||||
trafilatura = callPackage ../development/python-modules/trafilatura { };
|
||||
|
||||
trailrunner = callPackage ../development/python-modules/trailrunner {};
|
||||
|
||||
trainer = callPackage ../development/python-modules/trainer {};
|
||||
|
Loading…
Reference in New Issue
Block a user