summaryrefslogtreecommitdiff
path: root/gnu/packages/machine-learning.scm
diff options
context:
space:
mode:
Diffstat (limited to 'gnu/packages/machine-learning.scm')
-rw-r--r--gnu/packages/machine-learning.scm475
1 files changed, 365 insertions, 110 deletions
diff --git a/gnu/packages/machine-learning.scm b/gnu/packages/machine-learning.scm
index 77f1282a2b..6e94e21f3e 100644
--- a/gnu/packages/machine-learning.scm
+++ b/gnu/packages/machine-learning.scm
@@ -18,10 +18,14 @@
;;; Copyright © 2020, 2021, 2022, 2023, 2024 Vinicius Monego <monego@posteo.net>
;;; Copyright © 2020, 2021, 2022, 2023 Maxim Cournoyer <maxim.cournoyer@gmail.com>
;;; Copyright © 2022, 2023 Nicolas Graves <ngraves@ngraves.fr>
+;;; Copyright © 2022 Kiran Shila <me@kiranshila.com>
;;; Copyright © 2023 zamfofex <zamfofex@twdb.moe>
;;; Copyright © 2023 Navid Afkhami <navid.afkhami@mdc-berlin.de>
;;; Copyright © 2023 Zheng Junjie <873216071@qq.com>
;;; Copyright © 2023 Troy Figiel <troy@troyfigiel.com>
+;;; Copyright © 2024 Sharlatan Hellseher <sharlatanus@gmail.com>
+;;; Copyright © 2024 David Pflug <david@pflug.io>
+;;; Copyright © 2024 Timothee Mathieu <timothee.mathieu@inria.fr>
;;;
;;; This file is part of GNU Guix.
;;;
@@ -71,6 +75,7 @@
#:use-module (gnu packages cran)
#:use-module (gnu packages databases)
#:use-module (gnu packages dejagnu)
+ #:use-module (gnu packages documentation)
#:use-module (gnu packages freedesktop)
#:use-module (gnu packages gcc)
#:use-module (gnu packages gettext)
@@ -104,6 +109,7 @@
#:use-module (gnu packages python-science)
#:use-module (gnu packages python-web)
#:use-module (gnu packages python-xyz)
+ #:use-module (gnu packages rdf)
#:use-module (gnu packages regex)
#:use-module (gnu packages rpc)
#:use-module (gnu packages serialization)
@@ -657,6 +663,53 @@ networks) based on simulation of (stochastic) flow in graphs.")
algorithm.")
(license license:gpl3)))
+(define-public openmm
+ (package
+ (name "openmm")
+ (version "8.1.1")
+ (source (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/openmm/openmm")
+ (commit version)))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32
+ "064vv6zaci30pj38z5lwfqscxssm67jqxkz30hcya9vm4ng831d5"))))
+ (build-system cmake-build-system)
+ (arguments
+ (list
+ #:configure-flags
+ '(list "-DOPENMM_BUILD_SHARED_LIB=TRUE"
+ "-DOPENMM_BUILD_C_AND_FORTRAN_WRAPPERS=TRUE"
+ "-DOPENMM_BUILD_PYTHON_WRAPPERS=TRUE"
+ "-DOPENMM_BUILD_CUDA_LIB=FALSE")
+ #:phases
+ #~(modify-phases %standard-phases
+ (add-after 'unpack 'patch-python-build-system
+ (lambda _
+ (substitute* "wrappers/python/CMakeLists.txt"
+ (("install --root=\\\\\\$ENV\\{DESTDIR\\}/")
+ (string-append "install --prefix=" #$output
+ " --root=/ --single-version-externally-managed")))))
+ (add-after 'install 'install-python
+ (lambda _
+ (invoke "make" "PythonInstall"))))))
+ (inputs
+ (list python-wrapper))
+ (propagated-inputs
+ (list python-numpy))
+ (native-inputs
+ (list doxygen gfortran opencl-headers python-cython swig))
+ (home-page "https://github.com/openmm/openmm/")
+ (synopsis "Toolkit for molecular simulation")
+ (description
+ "OpenMM is a toolkit for molecular simulation. It can be used either as
+a stand-alone application for running simulations, or as a library you call
+from your own code.")
+ ;; See https://github.com/openmm/openmm/issues/4278#issuecomment-1772982471
+ (license license:expat)))
+
(define-public randomjungle
(package
(name "randomjungle")
@@ -1629,8 +1682,11 @@ computing environments.")
(list
#:test-flags
'(list "-m" "not network"
- ;; This test tries to access the internet.
- "-k" "not test_load_boston_alternative")
+ "-k" (string-append
+ ;; This test tries to access the internet.
+ "not test_load_boston_alternative"
+ ;; DID NOT RAISE <class 'ValueError'>
+ " and not test_singular_matrix"))
#:phases
'(modify-phases %standard-phases
(add-before 'build 'configure
@@ -1670,52 +1726,61 @@ data analysis.")
(license license:bsd-3)))
(define-public python-scikit-learn-extra
- (package
- (name "python-scikit-learn-extra")
- (version "0.3.0")
- (source
- (origin
- (method git-fetch)
- (uri (git-reference
- (url "https://github.com/scikit-learn-contrib/scikit-learn-extra")
- (commit (string-append "v" version))))
- (file-name (git-file-name name version))
- (sha256
- (base32 "0yy6ka94ss88f3r7b6mpjf1l8lnv7aabhsg844pigfj8lfiv0wvl"))))
- (build-system pyproject-build-system)
- (arguments
- (list #:phases
- #~(modify-phases %standard-phases
- (add-after 'build 'build-ext
- (lambda _
- (invoke "python" "setup.py" "build_ext"
- "--inplace")))
- (replace 'check
- (lambda* (#:key tests? #:allow-other-keys)
- (when tests?
- ;; Restrict OpenBLAS threads to prevent segfaults while testing!
- (setenv "OPENBLAS_NUM_THREADS" "1")
-
- ;; Some tests require write access to $HOME.
- (setenv "HOME" "/tmp")
-
- ;; Step out of the source directory to avoid interference;
- ;; we want to run the installed code with extensions etc.
- (with-directory-excursion "/tmp"
- (invoke "pytest" "-vv" "--pyargs"
- "sklearn_extra"
- ;; ignore tests that require network
- "-k" "not test_build"))))))))
- (propagated-inputs (list python-numpy python-scikit-learn python-scipy))
- (native-inputs (list python-pytest python-pytest-cov python-cython))
- (home-page "https://github.com/scikit-learn-contrib/scikit-learn-extra")
- (synopsis "Set of tools for scikit-learn")
- (description
- "This package provides a Python module for machine learning that extends
+ ;; This commit fixes an incompatibility with newer versions of scikit-learn
+ (let ((commit "0f95d8dda4c69f9de4fb002366041adcb1302f3b")
+ (revision "1"))
+ (package
+ (name "python-scikit-learn-extra")
+ (version (git-version "0.3.0" revision commit))
+ (source
+ (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/scikit-learn-contrib/scikit-learn-extra")
+ (commit commit)))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32 "0a248sff1psfwzmngj465gzyisq20d83nzpwpq2cspxhih51m6j9"))))
+ (build-system pyproject-build-system)
+ (arguments
+ (list
+ #:test-flags
+ ;; ignore tests that require network
+ '(list "--pyargs" "sklearn_extra"
+ "-k" "not test_build")
+ #:phases
+ #~(modify-phases %standard-phases
+ (add-after 'build 'build-ext
+ (lambda _
+ (invoke "python" "setup.py" "build_ext"
+ "--inplace")))
+ (replace 'check
+ (lambda* (#:key tests? test-flags #:allow-other-keys)
+ (when tests?
+ ;; Restrict OpenBLAS threads to prevent segfaults while testing!
+ (setenv "OPENBLAS_NUM_THREADS" "1")
+
+ ;; Some tests require write access to $HOME.
+ (setenv "HOME" "/tmp")
+
+ ;; Step out of the source directory to avoid interference;
+ ;; we want to run the installed code with extensions etc.
+ (with-directory-excursion "/tmp"
+ (apply invoke "pytest" "-vv" test-flags))))))))
+ (propagated-inputs
+ (list python-numpy
+ python-scikit-learn
+ python-scipy
+ python-packaging))
+ (native-inputs (list python-pytest python-pytest-cov python-cython))
+ (home-page "https://github.com/scikit-learn-contrib/scikit-learn-extra")
+ (synopsis "Set of tools for scikit-learn")
+ (description
+ "This package provides a Python module for machine learning that extends
scikit-learn. It includes algorithms that are useful but do not satisfy the
scikit-learn inclusion criteria, for instance due to their novelty or lower
citation number.")
- (license license:bsd-3)))
+ (license license:bsd-3))))
(define-public python-thinc
(package
@@ -1821,6 +1886,44 @@ techniques commonly used in datasets showing strong between-class imbalance.
It is compatible with @code{scikit-learn}.")
(license license:expat)))
+(define-public python-hdbscan
+ (package
+ (name "python-hdbscan")
+ (version "0.8.33")
+ (source
+ (origin
+ (method url-fetch)
+ (uri (pypi-uri "hdbscan" version))
+ (sha256
+ (base32 "03gr70ys1zrnp15pxzhichvrdj5bj88p6p5k0wj8vx251rgvryjp"))))
+ (build-system pyproject-build-system)
+ (arguments
+ (list
+ #:phases
+ #~(modify-phases %standard-phases
+ (add-before 'check 'build-extensions
+ (lambda _
+ (invoke "python" "setup.py" "build_ext" "--inplace"))))))
+ (propagated-inputs (list python-joblib
+ python-numpy
+ python-scikit-learn
+ python-scipy))
+ (native-inputs (list python-cython
+ python-nose
+ python-pytest
+ python-pandas
+ python-networkx))
+ (home-page "https://github.com/scikit-learn-contrib/hdbscan")
+ (synopsis "High performance implementation of HDBSCAN clustering")
+ (description "HDBSCAN - Hierarchical Density-Based Spatial Clustering of
+Applications with Noise. Performs DBSCAN over varying epsilon values and
+integrates the result to find a clustering that gives the best stability over
+epsilon. This allows HDBSCAN to find clusters of varying densities (unlike
+DBSCAN), and be more robust to parameter selection. HDBSCAN is ideal for
+exploratory data analysis; it's a fast and robust algorithm that you can trust
+to return meaningful clusters (if there are any).")
+ (license license:bsd-3)))
+
(define-public python-pynndescent
(package
(name "python-pynndescent")
@@ -1850,7 +1953,7 @@ for k-neighbor-graph construction and approximate nearest neighbor search.")
(define-public python-opentsne
(package
(name "python-opentsne")
- (version "1.0.0")
+ (version "1.0.1")
(source
(origin
(method git-fetch) ; no tests in PyPI release
@@ -1859,7 +1962,7 @@ for k-neighbor-graph construction and approximate nearest neighbor search.")
(commit (string-append "v" version))))
(file-name (git-file-name name version))
(sha256
- (base32 "05qzpq1zjs42bl0z8girfwcj3nfxs1a99c5525vp3589sglk351g"))))
+ (base32 "0xjp0l4rxk1s685skbx50m3m9hwlj78w74qwgswnkmkk6f7c8dsi"))))
(build-system python-build-system)
(arguments
`(#:phases
@@ -3406,57 +3509,79 @@ in a fast and accurate way.")
(inherit xgboost)
(name "python-xgboost")
(source (package-source xgboost))
- (build-system python-build-system)
+ (build-system pyproject-build-system)
(arguments
- `(#:phases
- (modify-phases %standard-phases
- (add-after 'unpack 'preparations
- (lambda _
- ;; Move python-package content to parent directory to silence
- ;; some warnings about files not being found if we chdir.
- (rename-file "python-package/xgboost" "xgboost")
- (rename-file "python-package/README.rst" "README.rst")
- (rename-file "python-package/setup.cfg" "setup.cfg")
- (rename-file "python-package/setup.py" "setup.py")
- ;; Skip rebuilding libxgboost.so.
- (substitute* "setup.py"
- (("ext_modules=\\[CMakeExtension\\('libxgboost'\\)\\],") "")
- (("'install_lib': InstallLib,") ""))))
- (add-after 'install 'install-version-and-libxgboost
- (lambda* (#:key inputs outputs #:allow-other-keys)
- (let* ((out (assoc-ref outputs "out"))
- (pylib (string-append out "/lib/python"
- ,(version-major+minor
- (package-version python))
- "/site-packages"))
- (xgbdir (string-append pylib "/xgboost"))
- (version-file (string-append xgbdir "/VERSION"))
- (libxgboost (string-append (assoc-ref inputs "xgboost")
- "/lib/libxgboost.so")))
- (with-output-to-file version-file
- (lambda ()
- (display ,(package-version xgboost))))
- (mkdir-p (string-append xgbdir "/lib"))
- (symlink libxgboost (string-append xgbdir "/lib"
- "/libxgboost.so")))))
- (replace 'check
- ;; Python-specific tests are located in tests/python.
- (lambda* (#:key inputs outputs tests? #:allow-other-keys)
- (when tests?
- (add-installed-pythonpath inputs outputs)
- (invoke "pytest" "tests/python"
- ;; FIXME: CLI tests fail with PermissionError.
- "--ignore" "tests/python/test_cli.py" "-k"
- (string-append
- "not test_cli_regression_demo"
- ;; The tests below open a network connection.
- " and not test_model_compatibility"
- " and not test_get_group"
- " and not test_cv_no_shuffle"
- " and not test_cv"
- " and not test_training"
- ;; "'['./runexp.sh']' returned non-zero exit status 1"
- " and not test_cli_binary_classification"))))))))
+ (list
+ #:test-flags
+ '(list "tests/python"
+ ;; FIXME: CLI tests fail with PermissionError.
+ "--ignore" "tests/python/test_cli.py"
+ "-k"
+ (string-append
+ "not test_cli_regression_demo"
+ ;; These tests use the Boston dataset that has been
+ ;; removed from scipy.
+ " and not test_sklearn_demo"
+ " and not test_sklearn_parallel_demo"
+ " and not test_predict_shape"
+ " and not test_num_parallel_tree"
+ " and not test_boston_housing_regression"
+ " and not test_boston_housing_rf_regression"
+ " and not test_parameter_tuning"
+ " and not test_regression_with_custom_objective"
+ " and not test_RFECV"
+ ;; Pandas incompatibility? Says:
+ ;; '_CalibratedClassifier' object has no attribute
+ ;; 'base_estimator'
+ " and not test_pandas_input"
+ ;; Accuracy problems?
+ " and not test_exact"
+ " and not test_approx"
+ " and not test_hist"
+ ;; The tests below open a network connection.
+ " and not test_model_compatibility"
+ " and not test_get_group"
+ " and not test_cv_no_shuffle"
+ " and not test_cv"
+ " and not test_training"
+ ;; "'['./runexp.sh']' returned non-zero exit status 1"
+ " and not test_cli_binary_classification"))
+ #:phases
+ #~(modify-phases %standard-phases
+ (add-after 'unpack 'preparations
+ (lambda _
+ ;; Move python-package content to parent directory to silence
+ ;; some warnings about files not being found if we chdir.
+ (rename-file "python-package/xgboost" "xgboost")
+ (rename-file "python-package/README.rst" "README.rst")
+ (rename-file "python-package/setup.cfg" "setup.cfg")
+ (rename-file "python-package/setup.py" "setup.py")
+ ;; Skip rebuilding libxgboost.so.
+ (substitute* "setup.py"
+ (("ext_modules=\\[CMakeExtension\\('libxgboost'\\)\\],") "")
+ (("'install_lib': InstallLib,") ""))
+ ;; Remove bad dataset. This has been removed in scipy.
+ (substitute* "tests/python/testing.py"
+ (("TestDataset\\('boston', get_boston, 'reg:squarederror', 'rmse'\\),")
+ "")
+ (("datasets.load_boston")
+ "datasets.load_digits"))))
+ (add-after 'install 'install-version-and-libxgboost
+ (lambda* (#:key inputs #:allow-other-keys)
+ (let* ((pylib (string-append #$output "/lib/python"
+ #$(version-major+minor
+ (package-version python))
+ "/site-packages"))
+ (xgbdir (string-append pylib "/xgboost"))
+ (version-file (string-append xgbdir "/VERSION"))
+ (libxgboost (string-append (assoc-ref inputs "xgboost")
+ "/lib/libxgboost.so")))
+ (with-output-to-file version-file
+ (lambda ()
+ (display #$(package-version xgboost))))
+ (mkdir-p (string-append xgbdir "/lib"))
+ (symlink libxgboost (string-append xgbdir "/lib"
+ "/libxgboost.so"))))))))
(native-inputs
(list python-pandas python-pytest python-scikit-learn))
(inputs
@@ -4149,6 +4274,86 @@ Note: currently this package does not provide GPU support.")
(replace "onnx" onnx-for-torch2)
(replace "onnx-optimizer" onnx-optimizer-for-torch2)))))
+(define-public python-pytorch-geometric
+ (package
+ (name "python-pytorch-geometric")
+ (version "2.4.0")
+ (source (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/pyg-team/pytorch_geometric/")
+ (commit version)))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32
+ "0hrs579asjsph16hyb4ablkbgfwd5j9y5s6ny7ahn3qrbkl2ji1g"))))
+ (build-system pyproject-build-system)
+ (arguments
+ (list
+ #:test-flags
+ ;; Hangs with AttributeError: 'NoneType' object has no attribute 'rpc_async'
+ '(list "--ignore=test/distributed/test_rpc.py"
+ ;; A message passing jinja template is missing
+ "--ignore=test/nn/conv/test_message_passing.py"
+ "--ignore=test/nn/test_sequential.py"
+ "--ignore=test/nn/models/test_basic_gnn.py"
+ ;; These all fail with a size mismatch error such as
+ ;; RuntimeError: shape '[-1, 2, 1, 1]' is invalid for input of size 3
+ "--ignore=test/explain/algorithm/test_captum_explainer.py"
+ "-k" (string-append
+ ;; Permissions error
+ "not test_packaging"
+ ;; This can fail due to accuracy problems
+ " and not test_gdc"
+ ;; These refuse to be run on CPU and really want a GPU
+ " and not test_add_random_walk_pe"
+ " and not test_asap"
+ " and not test_two_hop"))
+ #:phases
+ '(modify-phases %standard-phases
+ (add-after 'unpack 'delete-top-level-directories
+ (lambda _
+ ;; The presence of these directories confuses the pyproject build
+ ;; system.
+ (for-each delete-file-recursively
+ '("conda" "docker" "graphgym")))))))
+ (propagated-inputs
+ (list onnx
+ python-captum
+ python-graphviz
+ python-h5py
+ python-jinja2
+ python-matplotlib
+ python-networkx
+ python-numba
+ python-numpy
+ python-opt-einsum
+ python-pandas
+ python-protobuf
+ python-psutil
+ python-pyparsing
+ python-pytorch-lightning
+ python-rdflib
+ python-requests
+ python-scikit-image
+ python-scikit-learn
+ python-scipy
+ python-statsmodels
+ python-sympy
+ python-tabulate
+ python-torchmetrics
+ python-tqdm))
+ (native-inputs
+ (list python-flit-core
+ python-pytest
+ python-pytest-cov))
+ (home-page "https://pyg.org")
+ (synopsis "Graph Neural Network library for PyTorch")
+ (description
+ "PyG is a library built upon PyTorch to easily write and train Graph
+Neural Networks for a wide range of applications related to structured data.")
+ (license license:expat)))
+
(define-public python-lightning-cloud
(package
(name "python-lightning-cloud")
@@ -4733,24 +4938,23 @@ and Numpy.")
(file-name (git-file-name name version))
(sha256
(base32 "0n1vsih99pvswcaygdxkc6kq6r48ny130z6ca8pp3281396r2ykw"))))
- (build-system python-build-system)
+ (build-system pyproject-build-system)
(arguments
- `(#:phases
- (modify-phases %standard-phases
- (replace 'check
- (lambda* (#:key tests? #:allow-other-keys)
+ (list
+ #:test-flags
+ '(list "-vv" "--stage=unit"
;; This tests features that are only implemented when non-free
;; software is available (Intel MKL or CUDA).
- (for-each delete-file
- (list "tests/distributions/test_spanning_tree.py"
- "tests/infer/mcmc/test_mcmc_api.py"))
-
+ "--ignore=tests/distributions/test_spanning_tree.py"
+ "--ignore=tests/infer/mcmc/test_mcmc_api.py"
+ ;; This test fails sometimes.
+ "--ignore=tests/optim/test_optim.py"
;; Four test_gamma_elbo tests fail with bad values for unknown
;; reasons.
- (delete-file "tests/distributions/test_rejector.py")
- ;; This test fails sometimes.
- (delete-file "tests/optim/test_optim.py")
- (invoke "pytest" "-vv" "--stage=unit"))))))
+ "--ignore=tests/distributions/test_rejector.py"
+ ;; This looks like a test system failure. All of these fail
+ ;; because x is an array of functions, not an array of numbers.
+ "-k" "not test_sample")))
(propagated-inputs
(list python-numpy
python-opt-einsum
@@ -5163,3 +5367,54 @@ Brian 2 simulator.")
"OneAPI Deep Neural Network Library (oneDNN) is a cross-platform
performance library of basic building blocks for deep learning applications.")
(license license:asl2.0)))
+
+(define-public python-gguf
+ (package
+ (name "python-gguf")
+ (version "0.6.0")
+ (source
+ (origin
+ (method url-fetch)
+ (uri (pypi-uri "gguf" version))
+ (sha256
+ (base32 "0rbyc2h3kpqnrvbyjvv8a69l577jv55a31l12jnw21m1lamjxqmj"))))
+ (build-system pyproject-build-system)
+ (arguments
+ (list #:tests? #false))
+ (inputs (list poetry python-pytest))
+ (propagated-inputs (list python-numpy))
+ (home-page "https://ggml.ai")
+ (synopsis "Read and write ML models in GGUF for GGML")
+ (description "A Python library for reading and writing GGUF & GGML format ML models.")
+ (license license:expat)))
+
+(define-public python-gymnasium
+ (package
+ (name "python-gymnasium")
+ (version "0.29.1")
+ (source
+ (origin
+ (method url-fetch)
+ (uri (pypi-uri "gymnasium" version))
+ (sha256
+ (base32 "1cab4wsnlsxn2z90qmymv8ppmsq8yq2amiqwid3r0xfbxx92flqs"))))
+ (build-system pyproject-build-system)
+ (propagated-inputs (list python-cloudpickle python-farama-notifications
+ python-importlib-metadata python-numpy
+ python-typing-extensions))
+ (native-inputs (list python-pytest python-scipy))
+ (arguments
+ (list
+ #:phases
+ #~(modify-phases %standard-phases
+ (add-after 'unpack 'create-tests-module
+ (lambda _
+ (with-output-to-file "tests/__init__.py"
+ (lambda _ (display ""))))))))
+ (home-page "https://gymnasium.farama.org/")
+ (synopsis
+ "Standard API for reinforcement learning and a set of reference environments")
+ (description
+ "This package provides a standard API for reinforcement learning and a
+diverse set of reference environments (formerly Gym).")
+ (license license:expat)))