python311Packages.pyocr: fix non-Linux builds

Cuneiform doesn't build on non-Linux, causing pyocr to not build on that platform.

The fix was pretty simple; the general idea was to split Cuneiform and Tesseract support, and make Cuneiform support conditional based on the system.

pyocr gracefully degrades when it can't find a tool, so this shouldn't result in adverse behavior.
This commit is contained in:
Tomo 2024-02-12 20:15:46 -08:00
parent 3efc6ce7e4
commit 64b6280e45
3 changed files with 114 additions and 104 deletions

View File

@ -1,4 +1,5 @@
{ lib
, stdenv
, fetchFromGitLab
, buildPythonPackage
, pillow
@ -9,6 +10,8 @@
, pytestCheckHook
, setuptools
, setuptools-scm
, withTesseractSupport ? true
, withCuneiformSupport ? stdenv.hostPlatform.isLinux
}:
buildPythonPackage rec {
@ -27,12 +30,14 @@ buildPythonPackage rec {
hash = "sha256-gE0+qbHCwpDdxXFY+4rjVU2FbUSfSVrvrVMcWUk+9FU=";
};
patches = [
(substituteAll {
src = ./paths.patch;
inherit cuneiform tesseract;
})
];
patches = [] ++ (lib.optional withTesseractSupport (substituteAll {
src = ./paths-tesseract.patch;
inherit tesseract;
tesseractLibraryLocation = "${tesseract}/lib/libtesseract${stdenv.hostPlatform.extensions.sharedLibrary}";
})) ++ (lib.optional stdenv.hostPlatform.isLinux (substituteAll {
src = ./paths-cuneiform.patch;
inherit cuneiform;
}));
propagatedBuildInputs = [ pillow ];

View File

@ -0,0 +1,101 @@
commit cfc05af26b571e9ca09e9c709c0fb8934e9e46dd
Author: Guillaume Girol <symphorien+git@xlumurb.eu>
Date: Sat Aug 20 17:48:01 2022 +0200
Fix finding cuneiform
diff --git a/src/pyocr/cuneiform.py b/src/pyocr/cuneiform.py
index 2e5b717..35647e2 100644
--- a/src/pyocr/cuneiform.py
+++ b/src/pyocr/cuneiform.py
@@ -25,13 +25,9 @@ from . import builders
from .error import CuneiformError
-# CHANGE THIS IF CUNEIFORM IS NOT IN YOUR PATH, OR IS NAMED DIFFERENTLY
-CUNEIFORM_CMD = 'cuneiform'
+CUNEIFORM_CMD = '@cuneiform@/bin/cuneiform'
-CUNEIFORM_DATA_POSSIBLE_PATHS = [
- "/usr/local/share/cuneiform",
- "/usr/share/cuneiform",
-]
+CUNEIFORM_DATA_POSSIBLE_PATHS = ['@cuneiform@/share/cuneiform']
LANGUAGES_LINE_PREFIX = "Supported languages: "
LANGUAGES_SPLIT_RE = re.compile("[^a-z]")
diff --git a/tests/test_cuneiform.py b/tests/test_cuneiform.py
index b76e93c..266f6b2 100644
--- a/tests/test_cuneiform.py
+++ b/tests/test_cuneiform.py
@@ -21,7 +21,7 @@ class TestCuneiform(BaseTest):
# XXX is it useful?
which.return_value = True
self.assertTrue(cuneiform.is_available())
- which.assert_called_once_with("cuneiform")
+ which.assert_called_once_with("@cuneiform@/bin/cuneiform")
@patch("subprocess.Popen")
def test_version(self, popen):
@@ -54,7 +54,7 @@ class TestCuneiform(BaseTest):
self.assertIn("eng", langs)
self.assertIn("fra", langs)
popen.assert_called_once_with(
- ["cuneiform", "-l"],
+ ["@cuneiform@/bin/cuneiform", "-l"],
stdout=subprocess.PIPE, stderr=subprocess.STDOUT
)
@@ -110,7 +110,7 @@ class TestCuneiformTxt(BaseTest):
output = cuneiform.image_to_string(self.image)
self.assertEqual(output, self._get_file_content("text").strip())
popen.assert_called_once_with(
- ["cuneiform", "-f", "text", "-o", self.tmp_filename, "-"],
+ ["@cuneiform@/bin/cuneiform", "-f", "text", "-o", self.tmp_filename, "-"],
stdin=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.STDOUT
)
@@ -126,7 +126,7 @@ class TestCuneiformTxt(BaseTest):
builder=self.builder)
self.assertEqual(output, self._get_file_content("text").strip())
popen.assert_called_once_with(
- ["cuneiform", "-l", "fra", "-f", "text", "-o", self.tmp_filename,
+ ["@cuneiform@/bin/cuneiform", "-l", "fra", "-f", "text", "-o", self.tmp_filename,
"-"],
stdin=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.STDOUT
@@ -143,7 +143,7 @@ class TestCuneiformTxt(BaseTest):
builder=self.builder)
self.assertEqual(output, self._get_file_content("text").strip())
popen.assert_called_once_with(
- ["cuneiform", "-f", "text", "-o", self.tmp_filename, "-"],
+ ["@cuneiform@/bin/cuneiform", "-f", "text", "-o", self.tmp_filename, "-"],
stdin=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.STDOUT
)
@@ -174,7 +174,7 @@ class TestCuneiformTxt(BaseTest):
output = cuneiform.image_to_string(image, builder=self.builder)
self.assertEqual(output, self._get_file_content("text").strip())
popen.assert_called_once_with(
- ["cuneiform", "-f", "text", "-o", self.tmp_filename, "-"],
+ ["@cuneiform@/bin/cuneiform", "-f", "text", "-o", self.tmp_filename, "-"],
stdin=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.STDOUT
)
@@ -230,7 +230,7 @@ class TestCuneiformWordBox(BaseTest):
output = cuneiform.image_to_string(self.image,
builder=self.builder)
popen.assert_called_once_with(
- ["cuneiform", "-f", "hocr", "-o", self.tmp_filename, "-"],
+ ["@cuneiform@/bin/cuneiform", "-f", "hocr", "-o", self.tmp_filename, "-"],
stdin=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.STDOUT
)
@@ -284,7 +284,7 @@ class TestCuneiformLineBox(BaseTest):
output = cuneiform.image_to_string(self.image,
builder=self.builder)
popen.assert_called_once_with(
- ["cuneiform", "-f", "hocr", "-o", self.tmp_filename, "-"],
+ ["@cuneiform@/bin/cuneiform", "-f", "hocr", "-o", self.tmp_filename, "-"],
stdin=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.STDOUT

View File

@ -2,28 +2,8 @@ commit cfc05af26b571e9ca09e9c709c0fb8934e9e46dd
Author: Guillaume Girol <symphorien+git@xlumurb.eu>
Date: Sat Aug 20 17:48:01 2022 +0200
Fix finding tesseract and cuneiform
Fix finding tesseract
diff --git a/src/pyocr/cuneiform.py b/src/pyocr/cuneiform.py
index 2e5b717..35647e2 100644
--- a/src/pyocr/cuneiform.py
+++ b/src/pyocr/cuneiform.py
@@ -25,13 +25,9 @@ from . import builders
from .error import CuneiformError
-# CHANGE THIS IF CUNEIFORM IS NOT IN YOUR PATH, OR IS NAMED DIFFERENTLY
-CUNEIFORM_CMD = 'cuneiform'
+CUNEIFORM_CMD = '@cuneiform@/bin/cuneiform'
-CUNEIFORM_DATA_POSSIBLE_PATHS = [
- "/usr/local/share/cuneiform",
- "/usr/share/cuneiform",
-]
+CUNEIFORM_DATA_POSSIBLE_PATHS = ['@cuneiform@/share/cuneiform']
LANGUAGES_LINE_PREFIX = "Supported languages: "
LANGUAGES_SPLIT_RE = re.compile("[^a-z]")
diff --git a/src/pyocr/libtesseract/tesseract_raw.py b/src/pyocr/libtesseract/tesseract_raw.py
index 1edec8c..434a336 100644
--- a/src/pyocr/libtesseract/tesseract_raw.py
@ -90,7 +70,7 @@ index 1edec8c..434a336 100644
- "libtesseract.4.dylib",
- ]
-
+libnames = [ "@tesseract@/lib/libtesseract.so" ]
+libnames = [ "@tesseractLibraryLocation@" ]
g_libtesseract = None
@ -125,82 +105,6 @@ index 0fe0d20..c1fdd27 100644
TESSDATA_EXTENSION = ".traineddata"
diff --git a/tests/test_cuneiform.py b/tests/test_cuneiform.py
index b76e93c..266f6b2 100644
--- a/tests/test_cuneiform.py
+++ b/tests/test_cuneiform.py
@@ -21,7 +21,7 @@ class TestCuneiform(BaseTest):
# XXX is it useful?
which.return_value = True
self.assertTrue(cuneiform.is_available())
- which.assert_called_once_with("cuneiform")
+ which.assert_called_once_with("@cuneiform@/bin/cuneiform")
@patch("subprocess.Popen")
def test_version(self, popen):
@@ -54,7 +54,7 @@ class TestCuneiform(BaseTest):
self.assertIn("eng", langs)
self.assertIn("fra", langs)
popen.assert_called_once_with(
- ["cuneiform", "-l"],
+ ["@cuneiform@/bin/cuneiform", "-l"],
stdout=subprocess.PIPE, stderr=subprocess.STDOUT
)
@@ -110,7 +110,7 @@ class TestCuneiformTxt(BaseTest):
output = cuneiform.image_to_string(self.image)
self.assertEqual(output, self._get_file_content("text").strip())
popen.assert_called_once_with(
- ["cuneiform", "-f", "text", "-o", self.tmp_filename, "-"],
+ ["@cuneiform@/bin/cuneiform", "-f", "text", "-o", self.tmp_filename, "-"],
stdin=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.STDOUT
)
@@ -126,7 +126,7 @@ class TestCuneiformTxt(BaseTest):
builder=self.builder)
self.assertEqual(output, self._get_file_content("text").strip())
popen.assert_called_once_with(
- ["cuneiform", "-l", "fra", "-f", "text", "-o", self.tmp_filename,
+ ["@cuneiform@/bin/cuneiform", "-l", "fra", "-f", "text", "-o", self.tmp_filename,
"-"],
stdin=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.STDOUT
@@ -143,7 +143,7 @@ class TestCuneiformTxt(BaseTest):
builder=self.builder)
self.assertEqual(output, self._get_file_content("text").strip())
popen.assert_called_once_with(
- ["cuneiform", "-f", "text", "-o", self.tmp_filename, "-"],
+ ["@cuneiform@/bin/cuneiform", "-f", "text", "-o", self.tmp_filename, "-"],
stdin=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.STDOUT
)
@@ -174,7 +174,7 @@ class TestCuneiformTxt(BaseTest):
output = cuneiform.image_to_string(image, builder=self.builder)
self.assertEqual(output, self._get_file_content("text").strip())
popen.assert_called_once_with(
- ["cuneiform", "-f", "text", "-o", self.tmp_filename, "-"],
+ ["@cuneiform@/bin/cuneiform", "-f", "text", "-o", self.tmp_filename, "-"],
stdin=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.STDOUT
)
@@ -230,7 +230,7 @@ class TestCuneiformWordBox(BaseTest):
output = cuneiform.image_to_string(self.image,
builder=self.builder)
popen.assert_called_once_with(
- ["cuneiform", "-f", "hocr", "-o", self.tmp_filename, "-"],
+ ["@cuneiform@/bin/cuneiform", "-f", "hocr", "-o", self.tmp_filename, "-"],
stdin=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.STDOUT
)
@@ -284,7 +284,7 @@ class TestCuneiformLineBox(BaseTest):
output = cuneiform.image_to_string(self.image,
builder=self.builder)
popen.assert_called_once_with(
- ["cuneiform", "-f", "hocr", "-o", self.tmp_filename, "-"],
+ ["@cuneiform@/bin/cuneiform", "-f", "hocr", "-o", self.tmp_filename, "-"],
stdin=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.STDOUT
)
diff --git a/tests/test_libtesseract.py b/tests/test_libtesseract.py
index cc31a50..890c02c 100644
--- a/tests/test_libtesseract.py