mirror of
https://github.com/NixOS/nixpkgs.git
synced 2024-11-30 19:02:57 +00:00
Merge pull request #215176 from jboynyc/spacy-models
python3Packages.spacy_models.*: add Croatian, Korean, Finnish and Ukrainian language models
This commit is contained in:
commit
603d31c59d
41
pkgs/development/python-modules/pymorphy3/default.nix
Normal file
41
pkgs/development/python-modules/pymorphy3/default.nix
Normal file
@ -0,0 +1,41 @@
|
||||
{ lib
|
||||
, fetchFromGitHub
|
||||
, buildPythonPackage
|
||||
, dawg-python
|
||||
, docopt
|
||||
, pytestCheckHook
|
||||
, pymorphy3-dicts-ru
|
||||
, pymorphy3-dicts-uk
|
||||
}:
|
||||
|
||||
buildPythonPackage rec {
|
||||
pname = "pymorphy3";
|
||||
version = "1.2.0";
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "no-plagiarism";
|
||||
repo = pname;
|
||||
rev = version;
|
||||
hash = "sha256-5MXAYcjZPUrGf5G5e7Yml1SLukrZURA0TCv0GiP56rM=";
|
||||
};
|
||||
|
||||
propagatedBuildInputs = [
|
||||
dawg-python
|
||||
docopt
|
||||
pymorphy3-dicts-ru
|
||||
pymorphy3-dicts-uk
|
||||
];
|
||||
|
||||
nativeCheckInputs = [
|
||||
pytestCheckHook
|
||||
];
|
||||
|
||||
pythonImportsCheck = [ "pymorphy3" ];
|
||||
|
||||
meta = with lib; {
|
||||
description = "Morphological analyzer/inflection engine for Russian and Ukrainian";
|
||||
homepage = "https://github.com/no-plagiarism/pymorphy3";
|
||||
license = licenses.mit;
|
||||
maintainers = with maintainers; [ jboy ];
|
||||
};
|
||||
}
|
26
pkgs/development/python-modules/pymorphy3/dicts-ru.nix
Normal file
26
pkgs/development/python-modules/pymorphy3/dicts-ru.nix
Normal file
@ -0,0 +1,26 @@
|
||||
{ lib
|
||||
, fetchPypi
|
||||
, buildPythonPackage
|
||||
}:
|
||||
|
||||
buildPythonPackage rec {
|
||||
pname = "pymorphy3-dicts-ru";
|
||||
version = "2.4.417150.4580142";
|
||||
|
||||
src = fetchPypi {
|
||||
inherit pname version;
|
||||
hash = "sha256-Oas3nUypBbr+1Q9a/Do95vlkNgV3b7yrxNMIjU7TgrA=";
|
||||
};
|
||||
|
||||
# has no tests
|
||||
doCheck = false;
|
||||
|
||||
pythonImportsCheck = [ "pymorphy3_dicts_ru" ];
|
||||
|
||||
meta = with lib; {
|
||||
description = "Russian dictionaries for pymorphy3";
|
||||
homepage = "https://github.com/no-plagiarism/pymorphy3-dicts";
|
||||
license = licenses.mit;
|
||||
maintainers = with maintainers; [ jboy ];
|
||||
};
|
||||
}
|
26
pkgs/development/python-modules/pymorphy3/dicts-uk.nix
Normal file
26
pkgs/development/python-modules/pymorphy3/dicts-uk.nix
Normal file
@ -0,0 +1,26 @@
|
||||
{ lib
|
||||
, fetchPypi
|
||||
, buildPythonPackage
|
||||
}:
|
||||
|
||||
buildPythonPackage rec {
|
||||
pname = "pymorphy3-dicts-uk";
|
||||
version = "2.4.1.1.1663094765";
|
||||
|
||||
src = fetchPypi {
|
||||
inherit pname version;
|
||||
hash = "sha256-s5RaNBNuGTgGzeZXuicdiKYHYedRN8E9E4qNFCqNEqw=";
|
||||
};
|
||||
|
||||
# has no tests
|
||||
doCheck = false;
|
||||
|
||||
pythonImportsCheck = [ "pymorphy3_dicts_uk" ];
|
||||
|
||||
meta = with lib; {
|
||||
description = "Ukrainian dictionaries for pymorphy3";
|
||||
homepage = "https://github.com/no-plagiarism/pymorphy3-dicts";
|
||||
license = licenses.mit;
|
||||
maintainers = with maintainers; [ jboy ];
|
||||
};
|
||||
}
|
@ -59,10 +59,11 @@ def test_verbs(doc_en_core_web_trf):
|
||||
assert [
|
||||
token.lemma_ for token in doc_en_core_web_trf if token.pos_ == "VERB"] == [
|
||||
'start',
|
||||
'work',
|
||||
'drive',
|
||||
'take',
|
||||
'tell',
|
||||
'shake',
|
||||
'turn',
|
||||
'be',
|
||||
'talk',
|
||||
'say']
|
||||
|
@ -12,14 +12,14 @@
|
||||
|
||||
buildPythonPackage rec {
|
||||
pname = "spacy-transformers";
|
||||
version = "1.1.9";
|
||||
version = "1.2.2";
|
||||
format = "setuptools";
|
||||
|
||||
disabled = pythonOlder "3.7";
|
||||
|
||||
src = fetchPypi {
|
||||
inherit pname version;
|
||||
hash = "sha256-2uU6y/rsvNSLpeXL6O9IOQ0RMN0AEMH+/IKH6uufusU=";
|
||||
hash = "sha256-Up9ZlLlAM0CDXEYDI95KsLzA0TBz/uZFqEgZLmNIABA=";
|
||||
};
|
||||
|
||||
propagatedBuildInputs = [
|
||||
|
@ -41,6 +41,12 @@
|
||||
"sha256": "0bmbk6vnad3xqhg0jg8dhfhh75vyahsm16mn8ddzchhl7wm8axcc",
|
||||
"license": "cc-by-sa-40"
|
||||
},
|
||||
{
|
||||
"pname": "da_core_news_trf",
|
||||
"version": "3.5.0",
|
||||
"sha256": "0b8mxr1ajyw8ccm0khmcp4n3jcxl4syfrmiy9kzf3cp4hcrnqnxy",
|
||||
"license": "cc-by-sa-40"
|
||||
},
|
||||
{
|
||||
"pname": "de_core_news_lg",
|
||||
"version": "3.5.0",
|
||||
@ -131,6 +137,24 @@
|
||||
"sha256": "1py98kc6dxx5a6v6pc7hpldd6jm5s2a8vwp7l7d2jxadh947ma12",
|
||||
"license": "gpl3"
|
||||
},
|
||||
{
|
||||
"pname": "fi_core_news_lg",
|
||||
"version": "3.5.0",
|
||||
"sha256": "0j3r01a0yqgj8apfjv1wkblhqg86yp2nzxv51nf99pi2nmh81jzx",
|
||||
"license": "cc-by-sa-40"
|
||||
},
|
||||
{
|
||||
"pname": "fi_core_news_md",
|
||||
"version": "3.5.0",
|
||||
"sha256": "09qfzwyw6wfdmw1bgd1kfg1gdbmzal5z1r240djivxygzn6f1ixs",
|
||||
"license": "cc-by-sa-40"
|
||||
},
|
||||
{
|
||||
"pname": "fi_core_news_sm",
|
||||
"version": "3.5.0",
|
||||
"sha256": "1ly71cacy0gr62acvc3vl8dxh2czd6zkm7ijprisdblw17ik9yln",
|
||||
"license": "cc-by-sa-40"
|
||||
},
|
||||
{
|
||||
"pname": "fr_core_news_lg",
|
||||
"version": "3.5.0",
|
||||
@ -155,6 +179,24 @@
|
||||
"sha256": "0ciyilnc5gx0f1qakim57pizj1dknm8l8gd72avmrmzg3z52mgl2",
|
||||
"license": "lgpllr"
|
||||
},
|
||||
{
|
||||
"pname": "hr_core_news_lg",
|
||||
"version": "3.5.0",
|
||||
"sha256": "1fvkzfi539fmp6jy3hjcrwvdxw5k6zc3h351s887xidlw3gs1kr3",
|
||||
"license": "cc-by-sa-40"
|
||||
},
|
||||
{
|
||||
"pname": "hr_core_news_md",
|
||||
"version": "3.5.0",
|
||||
"sha256": "1mi6k9qjxbigrl2fa60blyyz8b54jda5hc1s96vn9rykg4rni8cr",
|
||||
"license": "cc-by-sa-40"
|
||||
},
|
||||
{
|
||||
"pname": "hr_core_news_sm",
|
||||
"version": "3.5.0",
|
||||
"sha256": "1s22mx7y5h135ry5l49az30l7mw7fdrz53s4a9gaxfsp9rzs474g",
|
||||
"license": "cc-by-sa-40"
|
||||
},
|
||||
{
|
||||
"pname": "it_core_news_lg",
|
||||
"version": "3.5.0",
|
||||
@ -173,6 +215,24 @@
|
||||
"sha256": "1fw262m7bl3g31gz0jb6fxrd385p67q82wfrsff6z9daxi3pi6ip",
|
||||
"license": "cc-by-nc-sa-30"
|
||||
},
|
||||
{
|
||||
"pname": "ko_core_news_lg",
|
||||
"version": "3.5.0",
|
||||
"sha256": "1q314wb114ynkf455cm8jd9jsx3yb6y0rrgf820ww31jlk5jzaa9",
|
||||
"license": "cc-by-sa-40"
|
||||
},
|
||||
{
|
||||
"pname": "ko_core_news_md",
|
||||
"version": "3.5.0",
|
||||
"sha256": "0dy7kk4bvjl944vv2m4hcvppar7clwq28y2rk40i3022jbqh2nxq",
|
||||
"license": "cc-by-sa-40"
|
||||
},
|
||||
{
|
||||
"pname": "ko_core_news_sm",
|
||||
"version": "3.5.0",
|
||||
"sha256": "1i5q8dpyfa2sy80hr81r6s9dqpawp36ni8slz035b0wd9sq3i73v",
|
||||
"license": "cc-by-sa-40"
|
||||
},
|
||||
{
|
||||
"pname": "lt_core_news_lg",
|
||||
"version": "3.5.0",
|
||||
@ -335,6 +395,30 @@
|
||||
"sha256": "1c0w85xn8lnx394qmmnv3px68w0pha7fxx0qlqa74r2mfi3sv6s7",
|
||||
"license": "cc-by-sa-40"
|
||||
},
|
||||
{
|
||||
"pname": "uk_core_news_lg",
|
||||
"version": "3.5.0",
|
||||
"sha256": "0hl9xjnxslckc6wvfgkj30r3py8q95yj7mrxdb6m5gvknlq72kp2",
|
||||
"license": "mit"
|
||||
},
|
||||
{
|
||||
"pname": "uk_core_news_md",
|
||||
"version": "3.5.0",
|
||||
"sha256": "05mg719ra5khm61yr7xhfcsh3apl29s3h2wkq0v87gkyqn13812p",
|
||||
"license": "mit"
|
||||
},
|
||||
{
|
||||
"pname": "uk_core_news_sm",
|
||||
"version": "3.5.0",
|
||||
"sha256": "1dkbmjbyhf6vsr7c4m4njgi969sfhbdnp73skl3k206dign5qgnz",
|
||||
"license": "mit"
|
||||
},
|
||||
{
|
||||
"pname": "uk_core_news_trf",
|
||||
"version": "3.5.0",
|
||||
"sha256": "02bhvcivalifrxd3vl118799wvg6hgykj31wwfdsgnq68lwc28fb",
|
||||
"license": "mit"
|
||||
},
|
||||
{
|
||||
"pname": "xx_ent_wiki_sm",
|
||||
"version": "3.5.0",
|
||||
|
@ -1,8 +1,9 @@
|
||||
{ lib
|
||||
, buildPythonPackage
|
||||
, fetchurl
|
||||
, jieba
|
||||
, pymorphy2
|
||||
, protobuf
|
||||
, pymorphy3
|
||||
, pymorphy3-dicts-uk
|
||||
, sentencepiece
|
||||
, spacy
|
||||
, spacy-pkuseg
|
||||
@ -15,8 +16,10 @@
|
||||
}:
|
||||
let
|
||||
buildModelPackage = { pname, version, sha256, license }:
|
||||
|
||||
let
|
||||
lang = builtins.substring 0 2 pname;
|
||||
requires-protobuf = pname == "fr_dep_news_trf" || pname == "uk_core_news_trf";
|
||||
in
|
||||
buildPythonPackage {
|
||||
inherit pname version;
|
||||
@ -27,16 +30,21 @@ let
|
||||
};
|
||||
|
||||
propagatedBuildInputs = [ spacy ]
|
||||
++ lib.optionals (lang == "zh") [ jieba spacy-pkuseg ]
|
||||
++ lib.optionals (lib.hasSuffix "_trf" pname) [ spacy-transformers ]
|
||||
++ lib.optionals (lang == "ru") [ pymorphy2 ]
|
||||
++ lib.optionals (lang == "ru") [ pymorphy3 ]
|
||||
++ lib.optionals (lang == "uk") [ pymorphy3 pymorphy3-dicts-uk ]
|
||||
++ lib.optionals (lang == "zh") [ spacy-pkuseg ]
|
||||
++ lib.optionals (pname == "fr_dep_news_trf") [ sentencepiece ];
|
||||
|
||||
postPatch = lib.optionalString (pname == "fr_dep_news_trf") ''
|
||||
postPatch = lib.optionalString requires-protobuf ''
|
||||
substituteInPlace meta.json \
|
||||
--replace "sentencepiece==0.1.91" "sentencepiece>=0.1.91"
|
||||
--replace "protobuf<3.21.0" "protobuf"
|
||||
'';
|
||||
|
||||
nativeBuildInputs = lib.optionals requires-protobuf [
|
||||
protobuf
|
||||
];
|
||||
|
||||
pythonImportsCheck = [ pname ];
|
||||
|
||||
passthru.updateScript = writeScript "update-spacy-models" ''
|
||||
|
@ -8453,6 +8453,12 @@ self: super: with self; {
|
||||
|
||||
pymorphy2-dicts-ru = callPackage ../development/python-modules/pymorphy2/dicts-ru.nix { };
|
||||
|
||||
pymorphy3 = callPackage ../development/python-modules/pymorphy3 { };
|
||||
|
||||
pymorphy3-dicts-ru = callPackage ../development/python-modules/pymorphy3/dicts-ru.nix { };
|
||||
|
||||
pymorphy3-dicts-uk = callPackage ../development/python-modules/pymorphy3/dicts-uk.nix { };
|
||||
|
||||
pympler = callPackage ../development/python-modules/pympler { };
|
||||
|
||||
pymsgbox = callPackage ../development/python-modules/pymsgbox { };
|
||||
|
Loading…
Reference in New Issue
Block a user