nixpkgs/pkgs/development/python-modules/scrapy/default.nix

148 lines
3.5 KiB
Nix
Raw Normal View History

{ lib
, stdenv
2021-06-09 21:38:31 +00:00
, botocore
2020-03-03 09:22:00 +00:00
, buildPythonPackage
2021-06-09 21:38:31 +00:00
, cryptography
, cssselect
, fetchFromGitHub
, fetchpatch
2020-03-03 09:22:00 +00:00
, glibcLocales
2021-06-09 21:38:31 +00:00
, installShellFiles
, itemadapter
, itemloaders
, jmespath
, lxml
, parsel
, pillow
, protego
, pydispatcher
, pyopenssl
, pytest-twisted
, pytestCheckHook
2021-06-09 21:38:31 +00:00
, pythonOlder
, queuelib
, service-identity
, sybil
2020-03-03 09:22:00 +00:00
, testfixtures
, twisted
, w3lib
, zope_interface
}:
buildPythonPackage rec {
2021-06-09 21:38:31 +00:00
pname = "scrapy";
version = "2.5.0";
disabled = pythonOlder "3.6";
2021-06-09 21:38:31 +00:00
src = fetchFromGitHub {
owner = pname;
repo = pname;
rev = version;
sha256 = "09lxnjz1cw37i9bgk8sci2xxknj20gi2lq8l7i0b3xw7q8bxzp7h";
};
2020-03-03 09:22:00 +00:00
2021-06-09 21:38:31 +00:00
nativeBuildInputs = [
installShellFiles
2020-03-03 09:22:00 +00:00
];
propagatedBuildInputs = [
2020-03-03 09:22:00 +00:00
cryptography
cssselect
2021-06-09 21:38:31 +00:00
itemadapter
itemloaders
2020-03-03 09:22:00 +00:00
lxml
parsel
2021-06-09 21:38:31 +00:00
protego
2020-03-03 09:22:00 +00:00
pydispatcher
pyopenssl
queuelib
service-identity
2021-06-09 21:38:31 +00:00
twisted
2020-03-03 09:22:00 +00:00
w3lib
zope_interface
2021-06-09 21:38:31 +00:00
];
checkInputs = [
botocore
glibcLocales
jmespath
pytestCheckHook
sybil
testfixtures
];
patches = [
# Require setuptools, https://github.com/scrapy/scrapy/pull/5122
(fetchpatch {
name = "add-setuptools.patch";
url = "https://github.com/scrapy/scrapy/commit/4f500342c8ad4674b191e1fab0d1b2ac944d7d3e.patch";
sha256 = "14030sfv1cf7dy4yww02b49mg39cfcg4bv7ys1iwycfqag3xcjda";
})
# Make Twisted[http2] installation optional, https://github.com/scrapy/scrapy/pull/5113
(fetchpatch {
name = "remove-h2.patch";
url = "https://github.com/scrapy/scrapy/commit/c5b1ee810167266fcd259f263dbfc0fe0204761a.patch";
sha256 = "1gw28wg8qcb0al59rz214hm17smspi6j5kg62nr1r850pykyrsqk";
})
];
2020-03-03 09:22:00 +00:00
LC_ALL = "en_US.UTF-8";
# Disable doctest plugin because it causes pytest to hang
preCheck = ''
2020-03-03 09:22:00 +00:00
substituteInPlace pytest.ini --replace "--doctest-modules" ""
'';
2021-06-09 21:38:31 +00:00
disabledTestPaths = [
"tests/test_proxy_connect.py"
"tests/test_utils_display.py"
"tests/test_command_check.py"
# Don't test the documentation
"docs"
];
disabledTests = [
2021-06-09 21:38:31 +00:00
# It's unclear if the failures are related to libxml2, https://github.com/NixOS/nixpkgs/pull/123890
"test_nested_css"
"test_nested_xpath"
"test_flavor_detection"
# Requires network access
"FTPFeedStorageTest"
2021-06-09 21:38:31 +00:00
"FeedExportTest"
"test_custom_asyncio_loop_enabled_true"
"test_custom_loop_asyncio"
2021-06-09 21:38:31 +00:00
"test_custom_loop_asyncio_deferred_signal"
"FileFeedStoragePreFeedOptionsTest" # https://github.com/scrapy/scrapy/issues/5157
2021-08-09 21:19:43 +00:00
# Fails with AssertionError
"test_peek_fifo"
"test_peek_one_element"
"test_peek_lifo"
] ++ lib.optionals stdenv.isDarwin [
"test_xmliter_encoding"
"test_download"
];
postInstall = ''
2021-06-09 21:38:31 +00:00
installManPage extras/scrapy.1
install -m 644 -D extras/scrapy_bash_completion $out/share/bash-completion/completions/scrapy
install -m 644 -D extras/scrapy_zsh_completion $out/share/zsh/site-functions/_scrapy
'';
2021-06-09 21:38:31 +00:00
pythonImportsCheck = [ "scrapy" ];
__darwinAllowLocalNetworking = true;
meta = with lib; {
2021-06-09 21:38:31 +00:00
description = "High-level web crawling and web scraping framework";
longDescription = ''
Scrapy is a fast high-level web crawling and web scraping framework, used to crawl
websites and extract structured data from their pages. It can be used for a wide
range of purposes, from data mining to monitoring and automated testing.
'';
2020-03-03 09:22:00 +00:00
homepage = "https://scrapy.org/";
license = licenses.bsd3;
maintainers = with maintainers; [ drewkett marsam ];
platforms = platforms.unix;
};
}