mirror of
https://github.com/NixOS/nixpkgs.git
synced 2024-11-22 23:13:19 +00:00
Merge pull request #138463 from dotlambda/ocrmypdf-pythonPackages
ocrmypdf: move to python3Packages
This commit is contained in:
commit
b8c97a0d97
@ -5,7 +5,6 @@
|
||||
, ghostscript
|
||||
, imagemagick
|
||||
, jbig2enc
|
||||
, ocrmypdf
|
||||
, optipng
|
||||
, pngquant
|
||||
, qpdf
|
||||
|
@ -1,34 +1,32 @@
|
||||
{ fetchFromGitHub
|
||||
{ lib
|
||||
, buildPythonPackage
|
||||
, cffi
|
||||
, coloredlogs
|
||||
, fetchFromGitHub
|
||||
, ghostscript
|
||||
, img2pdf
|
||||
, importlib-resources
|
||||
, jbig2enc
|
||||
, leptonica
|
||||
, pdfminer
|
||||
, pikepdf
|
||||
, pillow
|
||||
, pluggy
|
||||
, pngquant
|
||||
, python3
|
||||
, python3Packages
|
||||
, qpdf
|
||||
, lib
|
||||
, pytest-xdist
|
||||
, pytestCheckHook
|
||||
, reportlab
|
||||
, setuptools
|
||||
, setuptools-scm
|
||||
, setuptools-scm-git-archive
|
||||
, stdenv
|
||||
, tesseract4
|
||||
, unpaper
|
||||
, substituteAll
|
||||
, tesseract4
|
||||
, tqdm
|
||||
, unpaper
|
||||
}:
|
||||
let
|
||||
inherit (python3Packages) buildPythonApplication;
|
||||
|
||||
runtimeDeps = with python3Packages; [
|
||||
ghostscript
|
||||
jbig2enc
|
||||
leptonica
|
||||
pngquant
|
||||
qpdf
|
||||
tesseract4
|
||||
unpaper
|
||||
pillow
|
||||
];
|
||||
|
||||
in
|
||||
buildPythonApplication rec {
|
||||
buildPythonPackage rec {
|
||||
pname = "ocrmypdf";
|
||||
version = "12.5.0";
|
||||
|
||||
@ -39,51 +37,48 @@ buildPythonApplication rec {
|
||||
sha256 = "sha256-g80WedX+TGHE9EJ/RSgOc53PM17V3WZslUNaHoqKTo0=";
|
||||
};
|
||||
|
||||
nativeBuildInputs = with python3Packages; [
|
||||
setuptools
|
||||
patches = [
|
||||
(substituteAll {
|
||||
src = ./paths.patch;
|
||||
gs = "${lib.getBin ghostscript}/bin/gs";
|
||||
jbig2 = "${lib.getBin jbig2enc}/bin/jbig2";
|
||||
liblept = "${lib.getLib leptonica}/lib/liblept${stdenv.hostPlatform.extensions.sharedLibrary}";
|
||||
pngquant = "${lib.getBin pngquant}/bin/pngquant";
|
||||
tesseract = "${lib.getBin tesseract4}/bin/tesseract";
|
||||
unpaper = "${lib.getBin unpaper}/bin/unpaper";
|
||||
})
|
||||
];
|
||||
|
||||
nativeBuildInputs = [
|
||||
setuptools-scm-git-archive
|
||||
setuptools-scm
|
||||
];
|
||||
|
||||
propagatedBuildInputs = with python3Packages; [
|
||||
propagatedBuildInputs = [
|
||||
cffi
|
||||
coloredlogs
|
||||
img2pdf
|
||||
importlib-resources
|
||||
pdfminer
|
||||
pluggy
|
||||
pikepdf
|
||||
pillow
|
||||
pluggy
|
||||
reportlab
|
||||
setuptools
|
||||
tqdm
|
||||
];
|
||||
|
||||
checkInputs = with python3Packages; [
|
||||
pypdf2
|
||||
pytest
|
||||
pytest-helpers-namespace
|
||||
checkInputs = [
|
||||
pytest-xdist
|
||||
pytest-cov
|
||||
python-xmp-toolkit
|
||||
pytestCheckHook
|
||||
] ++ runtimeDeps;
|
||||
|
||||
patches = [
|
||||
(substituteAll {
|
||||
src = ./liblept.patch;
|
||||
liblept = "${lib.getLib leptonica}/lib/liblept${stdenv.hostPlatform.extensions.sharedLibrary}";
|
||||
})
|
||||
];
|
||||
|
||||
makeWrapperArgs = [ "--prefix PATH : ${lib.makeBinPath [ ghostscript jbig2enc pngquant qpdf tesseract4 unpaper ]}" ];
|
||||
|
||||
meta = with lib; {
|
||||
homepage = "https://github.com/jbarlow83/OCRmyPDF";
|
||||
description = "Adds an OCR text layer to scanned PDF files, allowing them to be searched";
|
||||
license = with licenses; [ mpl20 mit ];
|
||||
platforms = platforms.linux;
|
||||
maintainers = [ maintainers.kiwi ];
|
||||
maintainers = with maintainers; [ kiwi dotlambda ];
|
||||
changelog = "https://github.com/jbarlow83/OCRmyPDF/blob/v${version}/docs/release_notes.rst";
|
||||
};
|
||||
}
|
160
pkgs/development/python-modules/ocrmypdf/paths.patch
Normal file
160
pkgs/development/python-modules/ocrmypdf/paths.patch
Normal file
@ -0,0 +1,160 @@
|
||||
diff --git a/src/ocrmypdf/_exec/ghostscript.py b/src/ocrmypdf/_exec/ghostscript.py
|
||||
index 5c357f1b..f459763a 100644
|
||||
--- a/src/ocrmypdf/_exec/ghostscript.py
|
||||
+++ b/src/ocrmypdf/_exec/ghostscript.py
|
||||
@@ -25,28 +25,7 @@ from ocrmypdf.subprocess import get_version, run, run_polling_stderr
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
-missing_gs_error = """
|
||||
----------------------------------------------------------------------
|
||||
-This error normally occurs when ocrmypdf find can't Ghostscript.
|
||||
-Please ensure Ghostscript is installed and its location is added to
|
||||
-the system PATH environment variable.
|
||||
-
|
||||
-For details see:
|
||||
- https://ocrmypdf.readthedocs.io/en/latest/installation.html
|
||||
----------------------------------------------------------------------
|
||||
-"""
|
||||
-
|
||||
-_gswin = None
|
||||
-if os.name == 'nt':
|
||||
- _gswin = which('gswin64c')
|
||||
- if not _gswin:
|
||||
- _gswin = which('gswin32c')
|
||||
- if not _gswin:
|
||||
- raise MissingDependencyError(missing_gs_error)
|
||||
- _gswin = Path(_gswin).stem
|
||||
-
|
||||
-GS = _gswin if _gswin else 'gs'
|
||||
-del _gswin
|
||||
+GS = '@gs@'
|
||||
|
||||
|
||||
def version():
|
||||
diff --git a/src/ocrmypdf/_exec/jbig2enc.py b/src/ocrmypdf/_exec/jbig2enc.py
|
||||
index 2e8a058b..65a09088 100644
|
||||
--- a/src/ocrmypdf/_exec/jbig2enc.py
|
||||
+++ b/src/ocrmypdf/_exec/jbig2enc.py
|
||||
@@ -14,7 +14,7 @@ from ocrmypdf.subprocess import get_version, run
|
||||
|
||||
|
||||
def version():
|
||||
- return get_version('jbig2', regex=r'jbig2enc (\d+(\.\d+)*).*')
|
||||
+ return get_version('@jbig2@', regex=r'jbig2enc (\d+(\.\d+)*).*')
|
||||
|
||||
|
||||
def available():
|
||||
@@ -27,7 +27,7 @@ def available():
|
||||
|
||||
def convert_group(*, cwd, infiles, out_prefix):
|
||||
args = [
|
||||
- 'jbig2',
|
||||
+ '@jbig2@',
|
||||
'-b',
|
||||
out_prefix,
|
||||
'-s', # symbol mode (lossy)
|
||||
@@ -46,7 +46,7 @@ def convert_group_mp(args):
|
||||
|
||||
|
||||
def convert_single(*, cwd, infile, outfile):
|
||||
- args = ['jbig2', '-p', infile]
|
||||
+ args = ['@jbig2@', '-p', infile]
|
||||
with open(outfile, 'wb') as fstdout:
|
||||
proc = run(args, cwd=cwd, stdout=fstdout, stderr=PIPE)
|
||||
proc.check_returncode()
|
||||
diff --git a/src/ocrmypdf/_exec/pngquant.py b/src/ocrmypdf/_exec/pngquant.py
|
||||
index ca8a4542..d0544174 100644
|
||||
--- a/src/ocrmypdf/_exec/pngquant.py
|
||||
+++ b/src/ocrmypdf/_exec/pngquant.py
|
||||
@@ -19,7 +19,7 @@ from ocrmypdf.subprocess import get_version, run
|
||||
|
||||
|
||||
def version():
|
||||
- return get_version('pngquant', regex=r'(\d+(\.\d+)*).*')
|
||||
+ return get_version('@pngquant@', regex=r'(\d+(\.\d+)*).*')
|
||||
|
||||
|
||||
def available():
|
||||
@@ -46,7 +46,7 @@ def input_as_png(input_file: Path):
|
||||
def quantize(input_file: Path, output_file: Path, quality_min: int, quality_max: int):
|
||||
with input_as_png(input_file) as input_stream:
|
||||
args = [
|
||||
- 'pngquant',
|
||||
+ '@pngquant@',
|
||||
'--force',
|
||||
'--skip-if-larger',
|
||||
'--quality',
|
||||
diff --git a/src/ocrmypdf/_exec/tesseract.py b/src/ocrmypdf/_exec/tesseract.py
|
||||
index 33ead41e..5840f7c1 100644
|
||||
--- a/src/ocrmypdf/_exec/tesseract.py
|
||||
+++ b/src/ocrmypdf/_exec/tesseract.py
|
||||
@@ -78,7 +78,7 @@ class TesseractVersion(StrictVersion):
|
||||
|
||||
|
||||
def version():
|
||||
- return get_version('tesseract', regex=r'tesseract\s(.+)')
|
||||
+ return get_version('@tesseract@', regex=r'tesseract\s(.+)')
|
||||
|
||||
|
||||
def has_user_words():
|
||||
@@ -100,7 +100,7 @@ def get_languages():
|
||||
msg += output
|
||||
return msg
|
||||
|
||||
- args_tess = ['tesseract', '--list-langs']
|
||||
+ args_tess = ['@tesseract@', '--list-langs']
|
||||
try:
|
||||
proc = run(
|
||||
args_tess,
|
||||
@@ -122,7 +122,7 @@ def get_languages():
|
||||
|
||||
|
||||
def tess_base_args(langs: List[str], engine_mode: Optional[int]) -> List[str]:
|
||||
- args = ['tesseract']
|
||||
+ args = ['@tesseract@']
|
||||
if langs:
|
||||
args.extend(['-l', '+'.join(langs)])
|
||||
if engine_mode is not None:
|
||||
diff --git a/src/ocrmypdf/_exec/unpaper.py b/src/ocrmypdf/_exec/unpaper.py
|
||||
index 3c3ae72c..d269966a 100644
|
||||
--- a/src/ocrmypdf/_exec/unpaper.py
|
||||
+++ b/src/ocrmypdf/_exec/unpaper.py
|
||||
@@ -31,7 +31,7 @@ log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def version() -> str:
|
||||
- return get_version('unpaper')
|
||||
+ return get_version('@unpaper@')
|
||||
|
||||
|
||||
def _setup_unpaper_io(tmpdir: Path, input_file: Path) -> Tuple[Path, Path]:
|
||||
@@ -71,7 +71,7 @@ def _setup_unpaper_io(tmpdir: Path, input_file: Path) -> Tuple[Path, Path]:
|
||||
def run(
|
||||
input_file: Path, output_file: Path, *, dpi: DecFloat, mode_args: List[str]
|
||||
) -> None:
|
||||
- args_unpaper = ['unpaper', '-v', '--dpi', str(round(dpi, 6))] + mode_args
|
||||
+ args_unpaper = ['@unpaper@', '-v', '--dpi', str(round(dpi, 6))] + mode_args
|
||||
|
||||
with TemporaryDirectory() as tmpdir:
|
||||
input_pnm, output_pnm = _setup_unpaper_io(Path(tmpdir), input_file)
|
||||
diff --git a/src/ocrmypdf/leptonica.py b/src/ocrmypdf/leptonica.py
|
||||
index e4814f1a..fdaf7ea4 100644
|
||||
--- a/src/ocrmypdf/leptonica.py
|
||||
+++ b/src/ocrmypdf/leptonica.py
|
||||
@@ -33,14 +33,7 @@ from ocrmypdf.lib._leptonica import ffi
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
-if os.name == 'nt':
|
||||
- from ocrmypdf.subprocess._windows import shim_env_path
|
||||
-
|
||||
- libname = 'liblept-5'
|
||||
- os.environ['PATH'] = shim_env_path()
|
||||
-else:
|
||||
- libname = 'lept'
|
||||
-_libpath = find_library(libname)
|
||||
+_libpath = '@liblept@'
|
||||
if not _libpath:
|
||||
raise MissingDependencyError(
|
||||
"""
|
@ -5,13 +5,12 @@
|
||||
, fetchPypi
|
||||
, hypothesis
|
||||
, isPy3k
|
||||
, jbig2dec
|
||||
, lxml
|
||||
, mupdf
|
||||
, pillow
|
||||
, psutil
|
||||
, pybind11
|
||||
, pytest-cov
|
||||
, pytest-helpers-namespace
|
||||
, pytest-timeout
|
||||
, pytest-xdist
|
||||
, pytestCheckHook
|
||||
, python-dateutil
|
||||
@ -20,18 +19,27 @@
|
||||
, setuptools
|
||||
, setuptools-scm
|
||||
, setuptools-scm-git-archive
|
||||
, substituteAll
|
||||
}:
|
||||
|
||||
buildPythonPackage rec {
|
||||
pname = "pikepdf";
|
||||
version = "2.16.1";
|
||||
version = "3.0.0";
|
||||
disabled = ! isPy3k;
|
||||
|
||||
src = fetchPypi {
|
||||
inherit pname version;
|
||||
sha256 = "sha256-4k3/avMfHrcy/LXbRniDXR8xJkOZb9zZ2+uKylK8Dd4=";
|
||||
sha256 = "sha256-PBeTfiMLIq+pdeaRMOid8pEd0eLHu+IAE4aEFU5CiEM=";
|
||||
};
|
||||
|
||||
patches = [
|
||||
(substituteAll {
|
||||
src = ./paths.patch;
|
||||
jbig2dec = "${lib.getBin jbig2dec}/bin/jbig2dec";
|
||||
mudraw = "${lib.getBin mupdf}/bin/mudraw";
|
||||
})
|
||||
];
|
||||
|
||||
buildInputs = [
|
||||
pybind11
|
||||
qpdf
|
||||
@ -45,11 +53,8 @@ buildPythonPackage rec {
|
||||
checkInputs = [
|
||||
attrs
|
||||
hypothesis
|
||||
pytest-helpers-namespace
|
||||
pytest-timeout
|
||||
pytest-xdist
|
||||
psutil
|
||||
pytest-cov
|
||||
pytestCheckHook
|
||||
python-dateutil
|
||||
python-xmp-toolkit
|
||||
@ -62,17 +67,13 @@ buildPythonPackage rec {
|
||||
setuptools
|
||||
];
|
||||
|
||||
preBuild = ''
|
||||
HOME=$TMPDIR
|
||||
'';
|
||||
|
||||
pythonImportsCheck = [ "pikepdf" ];
|
||||
|
||||
meta = with lib; {
|
||||
homepage = "https://github.com/pikepdf/pikepdf";
|
||||
description = "Read and write PDFs with Python, powered by qpdf";
|
||||
license = licenses.mpl20;
|
||||
maintainers = [ maintainers.kiwi ];
|
||||
maintainers = with maintainers; [ kiwi dotlambda ];
|
||||
changelog = "https://github.com/pikepdf/pikepdf/blob/${version}/docs/release_notes.rst";
|
||||
};
|
||||
}
|
||||
|
26
pkgs/development/python-modules/pikepdf/paths.patch
Normal file
26
pkgs/development/python-modules/pikepdf/paths.patch
Normal file
@ -0,0 +1,26 @@
|
||||
diff --git a/src/pikepdf/_methods.py b/src/pikepdf/_methods.py
|
||||
index 70cdc9e..c3a14d0 100644
|
||||
--- a/src/pikepdf/_methods.py
|
||||
+++ b/src/pikepdf/_methods.py
|
||||
@@ -190,7 +190,7 @@ def _mudraw(buffer, fmt) -> bytes:
|
||||
tmp_in.flush()
|
||||
|
||||
proc = run(
|
||||
- ['mudraw', '-F', fmt, '-o', '-', tmp_in.name],
|
||||
+ ['@mudraw@', '-F', fmt, '-o', '-', tmp_in.name],
|
||||
stdout=PIPE,
|
||||
stderr=PIPE,
|
||||
check=True,
|
||||
diff --git a/src/pikepdf/jbig2.py b/src/pikepdf/jbig2.py
|
||||
index 80cc910..64f6d31 100644
|
||||
--- a/src/pikepdf/jbig2.py
|
||||
+++ b/src/pikepdf/jbig2.py
|
||||
@@ -25,7 +25,7 @@ def extract_jbig2(
|
||||
global_path = Path(tmpdir) / "global"
|
||||
output_path = Path(tmpdir) / "outfile"
|
||||
|
||||
- args = ["jbig2dec", "-e", "-o", os.fspath(output_path)]
|
||||
+ args = ["@jbig2dec@", "-e", "-o", os.fspath(output_path)]
|
||||
|
||||
# Get the raw stream, because we can't decode im_obj - that is why we are here
|
||||
# (Strictly speaking we should remove any non-JBIG2 filters if double encoded)
|
@ -1,13 +0,0 @@
|
||||
diff --git a/src/ocrmypdf/leptonica.py b/src/ocrmypdf/leptonica.py
|
||||
index 328b063..b993cc9 100644
|
||||
--- a/src/ocrmypdf/leptonica.py
|
||||
+++ b/src/ocrmypdf/leptonica.py
|
||||
@@ -46,7 +46,7 @@ if os.name == 'nt':
|
||||
os.environ['PATH'] = shim_paths_with_program_files()
|
||||
else:
|
||||
libname = 'lept'
|
||||
-_libpath = find_library(libname)
|
||||
+_libpath = '@liblept@'
|
||||
if not _libpath:
|
||||
raise MissingDependencyError(
|
||||
"""
|
@ -3249,7 +3249,7 @@ with pkgs;
|
||||
|
||||
oci-cli = callPackage ../tools/admin/oci-cli { };
|
||||
|
||||
ocrmypdf = callPackage ../tools/text/ocrmypdf { };
|
||||
ocrmypdf = with python3.pkgs; toPythonApplication ocrmypdf;
|
||||
|
||||
ocrfeeder = callPackage ../applications/graphics/ocrfeeder { };
|
||||
|
||||
|
@ -5061,6 +5061,8 @@ in {
|
||||
|
||||
oci = callPackage ../development/python-modules/oci { };
|
||||
|
||||
ocrmypdf = callPackage ../development/python-modules/ocrmypdf { };
|
||||
|
||||
od = callPackage ../development/python-modules/od { };
|
||||
|
||||
odfpy = callPackage ../development/python-modules/odfpy { };
|
||||
|
Loading…
Reference in New Issue
Block a user