Skip to content

Instantly share code, notes, and snippets.

@Kiwi

Kiwi/Questions Secret

Last active February 25, 2019 06:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Kiwi/9d69fd15419223b4d97c02881a431d14 to your computer and use it in GitHub Desktop.
Save Kiwi/9d69fd15419223b4d97c02881a431d14 to your computer and use it in GitHub Desktop.
diff --git a/pkgs/development/python-modules/ocrmypdf/default.nix b/pkgs/development/python-modules/ocrmypdf/default.nix
new file mode 100644
index 00000000000..d9cbbca9573
--- /dev/null
+++ b/pkgs/development/python-modules/ocrmypdf/default.nix
@@ -0,0 +1,47 @@
+{ lib, pkgs, fetchPypi, buildPythonPackage, stdenv, setuptools, pytestcov, python-xmp-toolkit, pytest_xdist, pypdf2, ruffus, pikepdf, reportlab, pytestrunner, setuptools_scm, pdfminer-20181108, six, img2pdf, leptonica, ghostscript, qpdf, tesseract4, jbig2enc, pngquant, unpaper }:
+
+buildPythonPackage rec {
+ pname = "ocrmypdf";
+ version = "8.1.0";
+
+ buildInputs = [ setuptools python-xmp-toolkit pypdf2 ruffus pikepdf reportlab pkgs.python37Packages.cffi pytestrunner setuptools_scm pdfminer-20181108 six img2pdf leptonica ghostscript qpdf tesseract4 jbig2enc pngquant unpaper ];
+
+ propagatedBuildInputs = [ setuptools python-xmp-toolkit pypdf2 ruffus pikepdf reportlab pkgs.python37Packages.cffi pytestrunner setuptools_scm pdfminer-20181108 six img2pdf leptonica ghostscript qpdf tesseract4 jbig2enc pngquant unpaper ];
+
+
+ src = fetchPypi {
+ inherit pname version;
+ sha256 = "b90ab8140ef473f07020338dc9390e9aa7c3894c3efc1931926a44b594f9b46f";
+ };
+
+#substituteInPlace "./pytest.ini" --replace "--testmon" ""
+# postPatch = ''
+# substituteInPlace setup.py \
+# --replace "'setuptools_scm_git_archive'" ""
+# '';
+ doCheck = false;
+
+# --replace "'setuptools_scm>=1.15.0'," "" \
+
+ # TODO: not very nice!
+ postPatch =
+ let libname = if stdenv.isDarwin then "liblept.dylib" else "liblept.so";
+ in
+ ''
+ substituteInPlace src/ocrmypdf/leptonica.py \
+ --replace "ffi.dlopen(find_library('lept'))" \
+ 'ffi.dlopen("${lib.makeLibraryPath [leptonica]}/${libname}")'
+
+ substituteInPlace setup.py \
+ --replace "'setuptools_scm_git_archive'" ""
+ '';
+
+
+ meta = with stdenv.lib; {
+ homepage = "https://github.com/jbarlow83/OCRmyPDF";
+ description = "A tool to add an OCR text layer to scanned PDF files, allowing them to be searched";
+ license = licenses.gpl3;
+ platforms = platforms.linux;
+ maintainers = with maintainers; [ ];
+ };
+}
diff --git a/pkgs/development/python-modules/pdfminer_six/20181108.nix b/pkgs/development/python-modules/pdfminer_six/20181108.nix
new file mode 100644
index 00000000000..879cb095dc9
--- /dev/null
+++ b/pkgs/development/python-modules/pdfminer_six/20181108.nix
@@ -0,0 +1,28 @@
+{ stdenv, buildPythonPackage, python, fetchFromGitHub, six, pycryptodome, chardet, nose, pytest, sortedcontainers }:
+
+buildPythonPackage rec {
+ pname = "pdfminer_six";
+ version = "20181108";
+
+ src = fetchFromGitHub {
+ owner = "pdfminer";
+ repo = "pdfminer.six";
+ rev = "${version}";
+ sha256 = "1v8pcx43fgidv1g54s92k85anvcss08blkhm4yi1hn1ybl0mmw6c";
+ };
+
+ propagatedBuildInputs = [ six pycryptodome chardet sortedcontainers ];
+
+ checkInputs = [ nose pytest ];
+ checkPhase = ''
+ ${python.interpreter} -m pytest
+ '';
+
+ meta = with stdenv.lib; {
+ description = "fork of PDFMiner using six for Python 2+3 compatibility";
+ homepage = https://github.com/pdfminer/pdfminer.six;
+ license = licenses.mit;
+ maintainers = with maintainers; [ psyanticy ];
+ };
+}
+
diff --git a/pkgs/development/python-modules/pikepdf/default.nix b/pkgs/development/python-modules/pikepdf/default.nix
new file mode 100644
index 00000000000..4f065cac27d
--- /dev/null
+++ b/pkgs/development/python-modules/pikepdf/default.nix
@@ -0,0 +1,40 @@
+{ stdenv, lib, fetchPypi, buildPythonPackage, qpdf, pybind11, setuptools_scm, setuptools, pytestrunner, defusedxml, lxml, pkgs }:
+#{ stdenv, lib, fetchPypi, buildPythonPackage, qpdf, pybind11, defusedxml }:
+
+buildPythonPackage rec {
+ pname = "pikepdf";
+ version = "1.0.5";
+
+ #nativeBuildInputs = [pybind11];
+
+ buildInputs = [pkgs.pybind11 setuptools_scm pytestrunner qpdf];
+
+ propagatedBuildInputs = [pkgs.pybind11 lxml defusedxml pybind11 setuptools pytestrunner qpdf];
+ src = fetchPypi {
+ inherit pname version;
+ sha256 = "b878dda8618939b8dda61418f193904c720aaa606167906e33d6e21c5cb531e1";
+ };
+
+ doCheck = false;
+ preBuild = ''
+ export HOME=$TMP
+ '';
+
+ postPatch = ''
+ substituteInPlace setup.py \
+ --replace "'setuptools_scm_git_archive'," ""
+ '';
+
+ #buildPhase = ''
+ #export PATH=$PATH:$out/bin
+ #export HOME=$TMP # fix tests failing in sandbox due to "/homeless-shelter"
+ #'';
+ # preConfigure = ''
+ #'';
+
+ meta = with lib; {
+ homepage = "https://github.com/pikepdf/pikepdf";
+ description = "A Python library for reading and writing PDF, powered by qpdf";
+ license = licenses.mpl20;
+ };
+}
diff --git a/pkgs/development/python-modules/python-xmp-toolkit/default.nix b/pkgs/development/python-modules/python-xmp-toolkit/default.nix
new file mode 100644
index 00000000000..d708e0f5076
--- /dev/null
+++ b/pkgs/development/python-modules/python-xmp-toolkit/default.nix
@@ -0,0 +1,38 @@
+{ stdenv, python, lib, fetchPypi, buildPythonPackage, setuptools, pytz, exempi }:
+
+buildPythonPackage rec {
+ pname = "python-xmp-toolkit";
+ version = "2.0.1";
+
+ buildInputs = [setuptools pytz exempi ];
+ #propagatedBuildInputs = [ exempi setuptools ];
+ #nativeBuildInputs = [ exempi pkgconfig ];
+ src = fetchPypi {
+ inherit pname version;
+ sha256 = "f8d912946ff9fd46ed5c7c355aa5d4ea193328b3f200909ef32d9a28a1419a38";
+ };
+
+ # preBuild = ''
+ # export PATH=$PATH:$out/bin
+ # '';
+#buildPhase = ''
+ # Fix up the LD_LIBRARY_PATH so that expat is on it
+# export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:${exempi.out}/lib"
+ #python mscript.py configure --prefix="$out" --disable-nls --disable-translations
+ #python mscript.py build
+# '';
+
+ doCheck = false;
+ preBuild = ''
+ export PYTHONPATH="$PYTHONPATH:$out/lib/python${python.pythonVersion}/site-packages"
+ export PATH="$PATH:$out/bin"
+ export LD_LIBRARY_PATH="${stdenv.lib.makeLibraryPath [exempi]}"
+ '';
+
+
+ meta = with lib; {
+ homepage = "https://github.com/python-xmp-toolkit/python-xmp-toolkit";
+ description = "Python XMP Toolkit is a library for working with XMP metadata, as well as reading/writing XMP metadata stored in many different file formats.";
+ license = licenses.bsd3;
+ };
+}
diff --git a/pkgs/development/python-modules/ruffus/default.nix b/pkgs/development/python-modules/ruffus/default.nix
new file mode 100644
index 00000000000..df647a68cda
--- /dev/null
+++ b/pkgs/development/python-modules/ruffus/default.nix
@@ -0,0 +1,19 @@
+{ stdenv, lib, fetchPypi, buildPythonPackage, setuptools, pkgs }:
+
+buildPythonPackage rec {
+ pname = "ruffus";
+ version = "2.8.1";
+
+ buildInputs = [setuptools];
+
+ src = fetchPypi {
+ inherit pname version;
+ sha256 = "90bc1e57ffb95be11e9c4461a406fee63395898beacd35a1dce9dd2c468c2582";
+ };
+
+ meta = with lib; {
+ homepage = "http://www.ruffus.org.uk/";
+ description = "Ruffus is a Computation Pipeline library for python. It is open-sourced, powerful and user-friendly, and widely used in science and bioinformatics.";
+ license = licenses.mit;
+ };
+}
diff --git a/pkgs/development/python-modules/setuptools_scm_git_archive/default.nix b/pkgs/development/python-modules/setuptools_scm_git_archive/default.nix
new file mode 100644
index 00000000000..51da6b26072
--- /dev/null
+++ b/pkgs/development/python-modules/setuptools_scm_git_archive/default.nix
@@ -0,0 +1,24 @@
+{ stdenv, buildPythonPackage, fetchPypi, pip, setuptools_scm, setuptools, wheel }:
+buildPythonPackage rec {
+ pname = "setuptools_scm_git_archive";
+ version = "1.0";
+
+ src = fetchPypi {
+ inherit pname version;
+ sha256 = "52425f905518247c685fc64c5fdba6e1e74443c8562e141c8de56059be0e31da";
+ };
+
+ #nativeBuildInputs = [setuptools_scm];
+ buildInputs = [pip setuptools_scm];
+ #propagatedBuildInputs = [setuptools_scm ];
+ # Seems to fail due to chroot and would cause circular dependency
+ # with pytest
+ #doCheck = false;
+
+ meta = with stdenv.lib; {
+ homepage = "https://github.com/Changaco/setuptools_scm_git_archive";
+ description = "This is a setuptools_scm plugin that adds support for git archives (for example the ones GitHub automatically generates)";
+ license = licenses.mit;
+ maintainers = with maintainers; [ ];
+ };
+}
diff --git a/pkgs/top-level/all-packages.nix b/pkgs/top-level/all-packages.nix
index 0f1ab70d617..bfc440299b8 100644
--- a/pkgs/top-level/all-packages.nix
+++ b/pkgs/top-level/all-packages.nix
@@ -7881,7 +7881,11 @@ in
ocropus = callPackage ../applications/misc/ocropus { };
- inherit (callPackages ../development/interpreters/perl {}) perl528 perldevel;
+# ocrmypdf = callPackage ../tools/typesetting/ocrmypdf { };
+
+ ocrmypdf = with python3.pkgs; toPythonApplication ocrmypdf;
+
+ inherit (callPackages ../development/interpreters/perl {}) perl528 perldevel;
pachyderm = callPackage ../applications/networking/cluster/pachyderm { };
diff --git a/pkgs/top-level/python-packages.nix b/pkgs/top-level/python-packages.nix
index 0077482ab17..baebe523e0f 100644
--- a/pkgs/top-level/python-packages.nix
+++ b/pkgs/top-level/python-packages.nix
@@ -542,6 +542,8 @@ in {
pdfminer = callPackage ../development/python-modules/pdfminer_six { };
+ pdfminer-20181108 = callPackage ../development/python-modules/pdfminer_six/20181108.nix { };
+
pdfx = callPackage ../development/python-modules/pdfx { };
perf = callPackage ../development/python-modules/perf { };
@@ -4080,6 +4082,8 @@ in {
setuptools_scm = callPackage ../development/python-modules/setuptools_scm { };
+ setuptools_scm_git_archive = callPackage ../development/python-modules/setuptools_scm_git_archive { };
+
shippai = callPackage ../development/python-modules/shippai {};
simanneal = callPackage ../development/python-modules/simanneal { };
@@ -5287,6 +5291,8 @@ in {
inherit python;
})).python;
+ ruffus = callPackage ../development/python-modules/ruffus { };
+
scour = callPackage ../development/python-modules/scour { };
pymssql = callPackage ../development/python-modules/pymssql { };
@@ -5311,6 +5317,11 @@ in {
casttube = callPackage ../development/python-modules/casttube { };
+ pikepdf = callPackage ../development/python-modules/pikepdf { };
+ ocrmypdf = callPackage ../development/python-modules/ocrmypdf { };
+
+ python-xmp-toolkit = callPackage ../development/python-modules/python-xmp-toolkit { };
+
});
in fix' (extends overrides packages)
new file: pkgs/development/python-modules/ocrmypdf/default.nix
new file: pkgs/development/python-modules/pdfminer_six/20181108.nix
new file: pkgs/development/python-modules/pikepdf/default.nix
new file: pkgs/development/python-modules/python-xmp-toolkit/default.nix
new file: pkgs/development/python-modules/ruffus/default.nix
new file: pkgs/development/python-modules/setuptools_scm_git_archive/default.nix
modified: pkgs/top-level/all-packages.nix
modified: pkgs/top-level/python-packages.nix
{ lib, pkgs, fetchPypi, buildPythonPackage, stdenv, setuptools, pytestcov, python-xmp-toolkit, pytest_xdist, pypdf2, ruffus, pikepdf, reportlab, pytestrunner, setuptools_scm, pdfminer-20181108, six, img2pdf, leptonica, ghostscript, qpdf, tesseract4, jbig2enc, pngquant, unpaper }:
buildPythonPackage rec {
pname = "ocrmypdf";
version = "8.1.0";
buildInputs = [ setuptools python-xmp-toolkit pypdf2 ruffus pikepdf reportlab pkgs.python37Packages.cffi pytestrunner setuptools_scm pdfminer-20181108 six img2pdf leptonica ghostscript qpdf tesseract4 jbig2enc pngquant unpaper ];
propagatedBuildInputs = [ setuptools python-xmp-toolkit pypdf2 ruffus pikepdf reportlab pkgs.python37Packages.cffi pytestrunner setuptools_scm pdfminer-20181108 six img2pdf leptonica ghostscript qpdf tesseract4 jbig2enc pngquant unpaper ];
src = fetchPypi {
inherit pname version;
sha256 = "b90ab8140ef473f07020338dc9390e9aa7c3894c3efc1931926a44b594f9b46f";
};
#substituteInPlace "./pytest.ini" --replace "--testmon" ""
# postPatch = ''
# substituteInPlace setup.py \
# --replace "'setuptools_scm_git_archive'" ""
# '';
doCheck = false;
# --replace "'setuptools_scm>=1.15.0'," "" \
# TODO: not very nice!
postPatch =
let libname = if stdenv.isDarwin then "liblept.dylib" else "liblept.so";
in
''
substituteInPlace src/ocrmypdf/leptonica.py \
--replace "ffi.dlopen(find_library('lept'))" \
'ffi.dlopen("${lib.makeLibraryPath [leptonica]}/${libname}")'
substituteInPlace setup.py \
--replace "'setuptools_scm_git_archive'" ""
'';
meta = with stdenv.lib; {
homepage = "https://github.com/jbarlow83/OCRmyPDF";
description = "A tool to add an OCR text layer to scanned PDF files, allowing them to be searched";
license = licenses.gpl3;
platforms = platforms.linux;
maintainers = with maintainers; [ ];
};
}
{ stdenv, buildPythonPackage, python, fetchFromGitHub, six, pycryptodome, chardet, nose, pytest, sortedcontainers }:
buildPythonPackage rec {
pname = "pdfminer_six";
version = "20181108";
src = fetchFromGitHub {
owner = "pdfminer";
repo = "pdfminer.six";
rev = "${version}";
sha256 = "1v8pcx43fgidv1g54s92k85anvcss08blkhm4yi1hn1ybl0mmw6c";
};
propagatedBuildInputs = [ six pycryptodome chardet sortedcontainers ];
checkInputs = [ nose pytest ];
checkPhase = ''
${python.interpreter} -m pytest
'';
meta = with stdenv.lib; {
description = "fork of PDFMiner using six for Python 2+3 compatibility";
homepage = https://github.com/pdfminer/pdfminer.six;
license = licenses.mit;
maintainers = with maintainers; [ psyanticy ];
};
}
{ stdenv, lib, fetchPypi, buildPythonPackage, qpdf, pybind11, setuptools_scm, setuptools, pytestrunner, defusedxml, lxml, pkgs }:
#{ stdenv, lib, fetchPypi, buildPythonPackage, qpdf, pybind11, defusedxml }:
buildPythonPackage rec {
pname = "pikepdf";
version = "1.0.5";
#nativeBuildInputs = [pybind11];
buildInputs = [pkgs.pybind11 setuptools_scm pytestrunner qpdf];
propagatedBuildInputs = [pkgs.pybind11 lxml defusedxml pybind11 setuptools pytestrunner qpdf];
src = fetchPypi {
inherit pname version;
sha256 = "b878dda8618939b8dda61418f193904c720aaa606167906e33d6e21c5cb531e1";
};
doCheck = false;
preBuild = ''
export HOME=$TMP
'';
postPatch = ''
substituteInPlace setup.py \
--replace "'setuptools_scm_git_archive'," ""
'';
#buildPhase = ''
#export PATH=$PATH:$out/bin
#export HOME=$TMP # fix tests failing in sandbox due to "/homeless-shelter"
#'';
# preConfigure = ''
#'';
meta = with lib; {
homepage = "https://github.com/pikepdf/pikepdf";
description = "A Python library for reading and writing PDF, powered by qpdf";
license = licenses.mpl20;
};
}
{ stdenv, python, lib, fetchPypi, buildPythonPackage, setuptools, pytz, exempi }:
buildPythonPackage rec {
pname = "python-xmp-toolkit";
version = "2.0.1";
buildInputs = [setuptools pytz exempi ];
#propagatedBuildInputs = [ exempi setuptools ];
#nativeBuildInputs = [ exempi pkgconfig ];
src = fetchPypi {
inherit pname version;
sha256 = "f8d912946ff9fd46ed5c7c355aa5d4ea193328b3f200909ef32d9a28a1419a38";
};
# preBuild = ''
# export PATH=$PATH:$out/bin
# '';
#buildPhase = ''
# Fix up the LD_LIBRARY_PATH so that expat is on it
# export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:${exempi.out}/lib"
#python mscript.py configure --prefix="$out" --disable-nls --disable-translations
#python mscript.py build
# '';
doCheck = false;
preBuild = ''
export PYTHONPATH="$PYTHONPATH:$out/lib/python${python.pythonVersion}/site-packages"
export PATH="$PATH:$out/bin"
export LD_LIBRARY_PATH="${stdenv.lib.makeLibraryPath [exempi]}"
'';
meta = with lib; {
homepage = "https://github.com/python-xmp-toolkit/python-xmp-toolkit";
description = "Python XMP Toolkit is a library for working with XMP metadata, as well as reading/writing XMP metadata stored in many different file formats.";
license = licenses.bsd3;
};
}
pdfminer_six already has a version in nixpkgs but it is not compatible with ocrmypdf.
1. Should update it instead of adding new version?
2. What about the packages that use the old one, if they break, update them? How do I make sure I update everything?
The tests need even more packages packaged to be able to run so I didn't get them all working. There's 1 unit test that fails related to the python-xmp-toolkit anyway...
3. What to do about the tests?
4. I tried to package setuptools_scm_git_archive (which is a dependency) but was unable to. Another existing nixpkg solved it by patching it out, which is what I ended up doing, but feels hackish. I'm not sure how to make it work.
5. General cleanups/style. Obviously there are some comments I need to and will remove for the finalized. I left them for now to help clean up the rest. I'm not sure I did the right thing with the python37Packages.cffi etc.
6. Not sure how many commits/PR this should even be since there are so many new files and such, nor what to use for the message.
{ stdenv, lib, fetchPypi, buildPythonPackage, setuptools, pkgs }:
buildPythonPackage rec {
pname = "ruffus";
version = "2.8.1";
buildInputs = [setuptools];
src = fetchPypi {
inherit pname version;
sha256 = "90bc1e57ffb95be11e9c4461a406fee63395898beacd35a1dce9dd2c468c2582";
};
meta = with lib; {
homepage = "http://www.ruffus.org.uk/";
description = "Ruffus is a Computation Pipeline library for python. It is open-sourced, powerful and user-friendly, and widely used in science and bioinformatics.";
license = licenses.mit;
};
}
{ stdenv, buildPythonPackage, fetchPypi, pip, setuptools_scm, setuptools, wheel }:
buildPythonPackage rec {
pname = "setuptools_scm_git_archive";
version = "1.0";
src = fetchPypi {
inherit pname version;
sha256 = "52425f905518247c685fc64c5fdba6e1e74443c8562e141c8de56059be0e31da";
};
#nativeBuildInputs = [setuptools_scm];
buildInputs = [pip setuptools_scm];
#propagatedBuildInputs = [setuptools_scm ];
# Seems to fail due to chroot and would cause circular dependency
# with pytest
#doCheck = false;
meta = with stdenv.lib; {
homepage = "https://github.com/Changaco/setuptools_scm_git_archive";
description = "This is a setuptools_scm plugin that adds support for git archives (for example the ones GitHub automatically generates)";
license = licenses.mit;
maintainers = with maintainers; [ ];
};
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment