Merge pull request #215176 from jboynyc/spacy-models

python3Packages.spacy_models.*: add Croatian, Korean, Finnish and Ukrainian language models
This commit is contained in:
Weijia Wang 2023-03-02 00:26:53 +02:00 committed by GitHub
commit 603d31c59d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 201 additions and 9 deletions

View File

@ -0,0 +1,41 @@
{ lib
, fetchFromGitHub
, buildPythonPackage
, dawg-python
, docopt
, pytestCheckHook
, pymorphy3-dicts-ru
, pymorphy3-dicts-uk
}:
buildPythonPackage rec {
pname = "pymorphy3";
version = "1.2.0";
src = fetchFromGitHub {
owner = "no-plagiarism";
repo = pname;
rev = version;
hash = "sha256-5MXAYcjZPUrGf5G5e7Yml1SLukrZURA0TCv0GiP56rM=";
};
propagatedBuildInputs = [
dawg-python
docopt
pymorphy3-dicts-ru
pymorphy3-dicts-uk
];
nativeCheckInputs = [
pytestCheckHook
];
pythonImportsCheck = [ "pymorphy3" ];
meta = with lib; {
description = "Morphological analyzer/inflection engine for Russian and Ukrainian";
homepage = "https://github.com/no-plagiarism/pymorphy3";
license = licenses.mit;
maintainers = with maintainers; [ jboy ];
};
}

View File

@ -0,0 +1,26 @@
{ lib
, fetchPypi
, buildPythonPackage
}:
buildPythonPackage rec {
pname = "pymorphy3-dicts-ru";
version = "2.4.417150.4580142";
src = fetchPypi {
inherit pname version;
hash = "sha256-Oas3nUypBbr+1Q9a/Do95vlkNgV3b7yrxNMIjU7TgrA=";
};
# has no tests
doCheck = false;
pythonImportsCheck = [ "pymorphy3_dicts_ru" ];
meta = with lib; {
description = "Russian dictionaries for pymorphy3";
homepage = "https://github.com/no-plagiarism/pymorphy3-dicts";
license = licenses.mit;
maintainers = with maintainers; [ jboy ];
};
}

View File

@ -0,0 +1,26 @@
{ lib
, fetchPypi
, buildPythonPackage
}:
buildPythonPackage rec {
pname = "pymorphy3-dicts-uk";
version = "2.4.1.1.1663094765";
src = fetchPypi {
inherit pname version;
hash = "sha256-s5RaNBNuGTgGzeZXuicdiKYHYedRN8E9E4qNFCqNEqw=";
};
# has no tests
doCheck = false;
pythonImportsCheck = [ "pymorphy3_dicts_uk" ];
meta = with lib; {
description = "Ukrainian dictionaries for pymorphy3";
homepage = "https://github.com/no-plagiarism/pymorphy3-dicts";
license = licenses.mit;
maintainers = with maintainers; [ jboy ];
};
}

View File

@ -59,10 +59,11 @@ def test_verbs(doc_en_core_web_trf):
assert [
token.lemma_ for token in doc_en_core_web_trf if token.pos_ == "VERB"] == [
'start',
'work',
'drive',
'take',
'tell',
'shake',
'turn',
'be',
'talk',
'say']

View File

@ -12,14 +12,14 @@
buildPythonPackage rec {
pname = "spacy-transformers";
version = "1.1.9";
version = "1.2.2";
format = "setuptools";
disabled = pythonOlder "3.7";
src = fetchPypi {
inherit pname version;
hash = "sha256-2uU6y/rsvNSLpeXL6O9IOQ0RMN0AEMH+/IKH6uufusU=";
hash = "sha256-Up9ZlLlAM0CDXEYDI95KsLzA0TBz/uZFqEgZLmNIABA=";
};
propagatedBuildInputs = [

View File

@ -41,6 +41,12 @@
"sha256": "0bmbk6vnad3xqhg0jg8dhfhh75vyahsm16mn8ddzchhl7wm8axcc",
"license": "cc-by-sa-40"
},
{
"pname": "da_core_news_trf",
"version": "3.5.0",
"sha256": "0b8mxr1ajyw8ccm0khmcp4n3jcxl4syfrmiy9kzf3cp4hcrnqnxy",
"license": "cc-by-sa-40"
},
{
"pname": "de_core_news_lg",
"version": "3.5.0",
@ -131,6 +137,24 @@
"sha256": "1py98kc6dxx5a6v6pc7hpldd6jm5s2a8vwp7l7d2jxadh947ma12",
"license": "gpl3"
},
{
"pname": "fi_core_news_lg",
"version": "3.5.0",
"sha256": "0j3r01a0yqgj8apfjv1wkblhqg86yp2nzxv51nf99pi2nmh81jzx",
"license": "cc-by-sa-40"
},
{
"pname": "fi_core_news_md",
"version": "3.5.0",
"sha256": "09qfzwyw6wfdmw1bgd1kfg1gdbmzal5z1r240djivxygzn6f1ixs",
"license": "cc-by-sa-40"
},
{
"pname": "fi_core_news_sm",
"version": "3.5.0",
"sha256": "1ly71cacy0gr62acvc3vl8dxh2czd6zkm7ijprisdblw17ik9yln",
"license": "cc-by-sa-40"
},
{
"pname": "fr_core_news_lg",
"version": "3.5.0",
@ -155,6 +179,24 @@
"sha256": "0ciyilnc5gx0f1qakim57pizj1dknm8l8gd72avmrmzg3z52mgl2",
"license": "lgpllr"
},
{
"pname": "hr_core_news_lg",
"version": "3.5.0",
"sha256": "1fvkzfi539fmp6jy3hjcrwvdxw5k6zc3h351s887xidlw3gs1kr3",
"license": "cc-by-sa-40"
},
{
"pname": "hr_core_news_md",
"version": "3.5.0",
"sha256": "1mi6k9qjxbigrl2fa60blyyz8b54jda5hc1s96vn9rykg4rni8cr",
"license": "cc-by-sa-40"
},
{
"pname": "hr_core_news_sm",
"version": "3.5.0",
"sha256": "1s22mx7y5h135ry5l49az30l7mw7fdrz53s4a9gaxfsp9rzs474g",
"license": "cc-by-sa-40"
},
{
"pname": "it_core_news_lg",
"version": "3.5.0",
@ -173,6 +215,24 @@
"sha256": "1fw262m7bl3g31gz0jb6fxrd385p67q82wfrsff6z9daxi3pi6ip",
"license": "cc-by-nc-sa-30"
},
{
"pname": "ko_core_news_lg",
"version": "3.5.0",
"sha256": "1q314wb114ynkf455cm8jd9jsx3yb6y0rrgf820ww31jlk5jzaa9",
"license": "cc-by-sa-40"
},
{
"pname": "ko_core_news_md",
"version": "3.5.0",
"sha256": "0dy7kk4bvjl944vv2m4hcvppar7clwq28y2rk40i3022jbqh2nxq",
"license": "cc-by-sa-40"
},
{
"pname": "ko_core_news_sm",
"version": "3.5.0",
"sha256": "1i5q8dpyfa2sy80hr81r6s9dqpawp36ni8slz035b0wd9sq3i73v",
"license": "cc-by-sa-40"
},
{
"pname": "lt_core_news_lg",
"version": "3.5.0",
@ -335,6 +395,30 @@
"sha256": "1c0w85xn8lnx394qmmnv3px68w0pha7fxx0qlqa74r2mfi3sv6s7",
"license": "cc-by-sa-40"
},
{
"pname": "uk_core_news_lg",
"version": "3.5.0",
"sha256": "0hl9xjnxslckc6wvfgkj30r3py8q95yj7mrxdb6m5gvknlq72kp2",
"license": "mit"
},
{
"pname": "uk_core_news_md",
"version": "3.5.0",
"sha256": "05mg719ra5khm61yr7xhfcsh3apl29s3h2wkq0v87gkyqn13812p",
"license": "mit"
},
{
"pname": "uk_core_news_sm",
"version": "3.5.0",
"sha256": "1dkbmjbyhf6vsr7c4m4njgi969sfhbdnp73skl3k206dign5qgnz",
"license": "mit"
},
{
"pname": "uk_core_news_trf",
"version": "3.5.0",
"sha256": "02bhvcivalifrxd3vl118799wvg6hgykj31wwfdsgnq68lwc28fb",
"license": "mit"
},
{
"pname": "xx_ent_wiki_sm",
"version": "3.5.0",

View File

@ -1,8 +1,9 @@
{ lib
, buildPythonPackage
, fetchurl
, jieba
, pymorphy2
, protobuf
, pymorphy3
, pymorphy3-dicts-uk
, sentencepiece
, spacy
, spacy-pkuseg
@ -15,8 +16,10 @@
}:
let
buildModelPackage = { pname, version, sha256, license }:
let
lang = builtins.substring 0 2 pname;
requires-protobuf = pname == "fr_dep_news_trf" || pname == "uk_core_news_trf";
in
buildPythonPackage {
inherit pname version;
@ -27,16 +30,21 @@ let
};
propagatedBuildInputs = [ spacy ]
++ lib.optionals (lang == "zh") [ jieba spacy-pkuseg ]
++ lib.optionals (lib.hasSuffix "_trf" pname) [ spacy-transformers ]
++ lib.optionals (lang == "ru") [ pymorphy2 ]
++ lib.optionals (lang == "ru") [ pymorphy3 ]
++ lib.optionals (lang == "uk") [ pymorphy3 pymorphy3-dicts-uk ]
++ lib.optionals (lang == "zh") [ spacy-pkuseg ]
++ lib.optionals (pname == "fr_dep_news_trf") [ sentencepiece ];
postPatch = lib.optionalString (pname == "fr_dep_news_trf") ''
postPatch = lib.optionalString requires-protobuf ''
substituteInPlace meta.json \
--replace "sentencepiece==0.1.91" "sentencepiece>=0.1.91"
--replace "protobuf<3.21.0" "protobuf"
'';
nativeBuildInputs = lib.optionals requires-protobuf [
protobuf
];
pythonImportsCheck = [ pname ];
passthru.updateScript = writeScript "update-spacy-models" ''

View File

@ -8453,6 +8453,12 @@ self: super: with self; {
pymorphy2-dicts-ru = callPackage ../development/python-modules/pymorphy2/dicts-ru.nix { };
pymorphy3 = callPackage ../development/python-modules/pymorphy3 { };
pymorphy3-dicts-ru = callPackage ../development/python-modules/pymorphy3/dicts-ru.nix { };
pymorphy3-dicts-uk = callPackage ../development/python-modules/pymorphy3/dicts-uk.nix { };
pympler = callPackage ../development/python-modules/pympler { };
pymsgbox = callPackage ../development/python-modules/pymsgbox { };