diff options
Diffstat (limited to 'gnu/packages/machine-learning.scm')
-rw-r--r-- | gnu/packages/machine-learning.scm | 475 |
1 files changed, 365 insertions, 110 deletions
diff --git a/gnu/packages/machine-learning.scm b/gnu/packages/machine-learning.scm index 77f1282a2b..6e94e21f3e 100644 --- a/gnu/packages/machine-learning.scm +++ b/gnu/packages/machine-learning.scm @@ -18,10 +18,14 @@ ;;; Copyright © 2020, 2021, 2022, 2023, 2024 Vinicius Monego <monego@posteo.net> ;;; Copyright © 2020, 2021, 2022, 2023 Maxim Cournoyer <maxim.cournoyer@gmail.com> ;;; Copyright © 2022, 2023 Nicolas Graves <ngraves@ngraves.fr> +;;; Copyright © 2022 Kiran Shila <me@kiranshila.com> ;;; Copyright © 2023 zamfofex <zamfofex@twdb.moe> ;;; Copyright © 2023 Navid Afkhami <navid.afkhami@mdc-berlin.de> ;;; Copyright © 2023 Zheng Junjie <873216071@qq.com> ;;; Copyright © 2023 Troy Figiel <troy@troyfigiel.com> +;;; Copyright © 2024 Sharlatan Hellseher <sharlatanus@gmail.com> +;;; Copyright © 2024 David Pflug <david@pflug.io> +;;; Copyright © 2024 Timothee Mathieu <timothee.mathieu@inria.fr> ;;; ;;; This file is part of GNU Guix. ;;; @@ -71,6 +75,7 @@ #:use-module (gnu packages cran) #:use-module (gnu packages databases) #:use-module (gnu packages dejagnu) + #:use-module (gnu packages documentation) #:use-module (gnu packages freedesktop) #:use-module (gnu packages gcc) #:use-module (gnu packages gettext) @@ -104,6 +109,7 @@ #:use-module (gnu packages python-science) #:use-module (gnu packages python-web) #:use-module (gnu packages python-xyz) + #:use-module (gnu packages rdf) #:use-module (gnu packages regex) #:use-module (gnu packages rpc) #:use-module (gnu packages serialization) @@ -657,6 +663,53 @@ networks) based on simulation of (stochastic) flow in graphs.") algorithm.") (license license:gpl3))) +(define-public openmm + (package + (name "openmm") + (version "8.1.1") + (source (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/openmm/openmm") + (commit version))) + (file-name (git-file-name name version)) + (sha256 + (base32 + "064vv6zaci30pj38z5lwfqscxssm67jqxkz30hcya9vm4ng831d5")))) + (build-system cmake-build-system) + (arguments + (list + #:configure-flags + '(list "-DOPENMM_BUILD_SHARED_LIB=TRUE" + "-DOPENMM_BUILD_C_AND_FORTRAN_WRAPPERS=TRUE" + "-DOPENMM_BUILD_PYTHON_WRAPPERS=TRUE" + "-DOPENMM_BUILD_CUDA_LIB=FALSE") + #:phases + #~(modify-phases %standard-phases + (add-after 'unpack 'patch-python-build-system + (lambda _ + (substitute* "wrappers/python/CMakeLists.txt" + (("install --root=\\\\\\$ENV\\{DESTDIR\\}/") + (string-append "install --prefix=" #$output + " --root=/ --single-version-externally-managed"))))) + (add-after 'install 'install-python + (lambda _ + (invoke "make" "PythonInstall")))))) + (inputs + (list python-wrapper)) + (propagated-inputs + (list python-numpy)) + (native-inputs + (list doxygen gfortran opencl-headers python-cython swig)) + (home-page "https://github.com/openmm/openmm/") + (synopsis "Toolkit for molecular simulation") + (description + "OpenMM is a toolkit for molecular simulation. It can be used either as +a stand-alone application for running simulations, or as a library you call +from your own code.") + ;; See https://github.com/openmm/openmm/issues/4278#issuecomment-1772982471 + (license license:expat))) + (define-public randomjungle (package (name "randomjungle") @@ -1629,8 +1682,11 @@ computing environments.") (list #:test-flags '(list "-m" "not network" - ;; This test tries to access the internet. - "-k" "not test_load_boston_alternative") + "-k" (string-append + ;; This test tries to access the internet. + "not test_load_boston_alternative" + ;; DID NOT RAISE <class 'ValueError'> + " and not test_singular_matrix")) #:phases '(modify-phases %standard-phases (add-before 'build 'configure @@ -1670,52 +1726,61 @@ data analysis.") (license license:bsd-3))) (define-public python-scikit-learn-extra - (package - (name "python-scikit-learn-extra") - (version "0.3.0") - (source - (origin - (method git-fetch) - (uri (git-reference - (url "https://github.com/scikit-learn-contrib/scikit-learn-extra") - (commit (string-append "v" version)))) - (file-name (git-file-name name version)) - (sha256 - (base32 "0yy6ka94ss88f3r7b6mpjf1l8lnv7aabhsg844pigfj8lfiv0wvl")))) - (build-system pyproject-build-system) - (arguments - (list #:phases - #~(modify-phases %standard-phases - (add-after 'build 'build-ext - (lambda _ - (invoke "python" "setup.py" "build_ext" - "--inplace"))) - (replace 'check - (lambda* (#:key tests? #:allow-other-keys) - (when tests? - ;; Restrict OpenBLAS threads to prevent segfaults while testing! - (setenv "OPENBLAS_NUM_THREADS" "1") - - ;; Some tests require write access to $HOME. - (setenv "HOME" "/tmp") - - ;; Step out of the source directory to avoid interference; - ;; we want to run the installed code with extensions etc. - (with-directory-excursion "/tmp" - (invoke "pytest" "-vv" "--pyargs" - "sklearn_extra" - ;; ignore tests that require network - "-k" "not test_build")))))))) - (propagated-inputs (list python-numpy python-scikit-learn python-scipy)) - (native-inputs (list python-pytest python-pytest-cov python-cython)) - (home-page "https://github.com/scikit-learn-contrib/scikit-learn-extra") - (synopsis "Set of tools for scikit-learn") - (description - "This package provides a Python module for machine learning that extends + ;; This commit fixes an incompatibility with newer versions of scikit-learn + (let ((commit "0f95d8dda4c69f9de4fb002366041adcb1302f3b") + (revision "1")) + (package + (name "python-scikit-learn-extra") + (version (git-version "0.3.0" revision commit)) + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/scikit-learn-contrib/scikit-learn-extra") + (commit commit))) + (file-name (git-file-name name version)) + (sha256 + (base32 "0a248sff1psfwzmngj465gzyisq20d83nzpwpq2cspxhih51m6j9")))) + (build-system pyproject-build-system) + (arguments + (list + #:test-flags + ;; ignore tests that require network + '(list "--pyargs" "sklearn_extra" + "-k" "not test_build") + #:phases + #~(modify-phases %standard-phases + (add-after 'build 'build-ext + (lambda _ + (invoke "python" "setup.py" "build_ext" + "--inplace"))) + (replace 'check + (lambda* (#:key tests? test-flags #:allow-other-keys) + (when tests? + ;; Restrict OpenBLAS threads to prevent segfaults while testing! + (setenv "OPENBLAS_NUM_THREADS" "1") + + ;; Some tests require write access to $HOME. + (setenv "HOME" "/tmp") + + ;; Step out of the source directory to avoid interference; + ;; we want to run the installed code with extensions etc. + (with-directory-excursion "/tmp" + (apply invoke "pytest" "-vv" test-flags)))))))) + (propagated-inputs + (list python-numpy + python-scikit-learn + python-scipy + python-packaging)) + (native-inputs (list python-pytest python-pytest-cov python-cython)) + (home-page "https://github.com/scikit-learn-contrib/scikit-learn-extra") + (synopsis "Set of tools for scikit-learn") + (description + "This package provides a Python module for machine learning that extends scikit-learn. It includes algorithms that are useful but do not satisfy the scikit-learn inclusion criteria, for instance due to their novelty or lower citation number.") - (license license:bsd-3))) + (license license:bsd-3)))) (define-public python-thinc (package @@ -1821,6 +1886,44 @@ techniques commonly used in datasets showing strong between-class imbalance. It is compatible with @code{scikit-learn}.") (license license:expat))) +(define-public python-hdbscan + (package + (name "python-hdbscan") + (version "0.8.33") + (source + (origin + (method url-fetch) + (uri (pypi-uri "hdbscan" version)) + (sha256 + (base32 "03gr70ys1zrnp15pxzhichvrdj5bj88p6p5k0wj8vx251rgvryjp")))) + (build-system pyproject-build-system) + (arguments + (list + #:phases + #~(modify-phases %standard-phases + (add-before 'check 'build-extensions + (lambda _ + (invoke "python" "setup.py" "build_ext" "--inplace")))))) + (propagated-inputs (list python-joblib + python-numpy + python-scikit-learn + python-scipy)) + (native-inputs (list python-cython + python-nose + python-pytest + python-pandas + python-networkx)) + (home-page "https://github.com/scikit-learn-contrib/hdbscan") + (synopsis "High performance implementation of HDBSCAN clustering") + (description "HDBSCAN - Hierarchical Density-Based Spatial Clustering of +Applications with Noise. Performs DBSCAN over varying epsilon values and +integrates the result to find a clustering that gives the best stability over +epsilon. This allows HDBSCAN to find clusters of varying densities (unlike +DBSCAN), and be more robust to parameter selection. HDBSCAN is ideal for +exploratory data analysis; it's a fast and robust algorithm that you can trust +to return meaningful clusters (if there are any).") + (license license:bsd-3))) + (define-public python-pynndescent (package (name "python-pynndescent") @@ -1850,7 +1953,7 @@ for k-neighbor-graph construction and approximate nearest neighbor search.") (define-public python-opentsne (package (name "python-opentsne") - (version "1.0.0") + (version "1.0.1") (source (origin (method git-fetch) ; no tests in PyPI release @@ -1859,7 +1962,7 @@ for k-neighbor-graph construction and approximate nearest neighbor search.") (commit (string-append "v" version)))) (file-name (git-file-name name version)) (sha256 - (base32 "05qzpq1zjs42bl0z8girfwcj3nfxs1a99c5525vp3589sglk351g")))) + (base32 "0xjp0l4rxk1s685skbx50m3m9hwlj78w74qwgswnkmkk6f7c8dsi")))) (build-system python-build-system) (arguments `(#:phases @@ -3406,57 +3509,79 @@ in a fast and accurate way.") (inherit xgboost) (name "python-xgboost") (source (package-source xgboost)) - (build-system python-build-system) + (build-system pyproject-build-system) (arguments - `(#:phases - (modify-phases %standard-phases - (add-after 'unpack 'preparations - (lambda _ - ;; Move python-package content to parent directory to silence - ;; some warnings about files not being found if we chdir. - (rename-file "python-package/xgboost" "xgboost") - (rename-file "python-package/README.rst" "README.rst") - (rename-file "python-package/setup.cfg" "setup.cfg") - (rename-file "python-package/setup.py" "setup.py") - ;; Skip rebuilding libxgboost.so. - (substitute* "setup.py" - (("ext_modules=\\[CMakeExtension\\('libxgboost'\\)\\],") "") - (("'install_lib': InstallLib,") "")))) - (add-after 'install 'install-version-and-libxgboost - (lambda* (#:key inputs outputs #:allow-other-keys) - (let* ((out (assoc-ref outputs "out")) - (pylib (string-append out "/lib/python" - ,(version-major+minor - (package-version python)) - "/site-packages")) - (xgbdir (string-append pylib "/xgboost")) - (version-file (string-append xgbdir "/VERSION")) - (libxgboost (string-append (assoc-ref inputs "xgboost") - "/lib/libxgboost.so"))) - (with-output-to-file version-file - (lambda () - (display ,(package-version xgboost)))) - (mkdir-p (string-append xgbdir "/lib")) - (symlink libxgboost (string-append xgbdir "/lib" - "/libxgboost.so"))))) - (replace 'check - ;; Python-specific tests are located in tests/python. - (lambda* (#:key inputs outputs tests? #:allow-other-keys) - (when tests? - (add-installed-pythonpath inputs outputs) - (invoke "pytest" "tests/python" - ;; FIXME: CLI tests fail with PermissionError. - "--ignore" "tests/python/test_cli.py" "-k" - (string-append - "not test_cli_regression_demo" - ;; The tests below open a network connection. - " and not test_model_compatibility" - " and not test_get_group" - " and not test_cv_no_shuffle" - " and not test_cv" - " and not test_training" - ;; "'['./runexp.sh']' returned non-zero exit status 1" - " and not test_cli_binary_classification")))))))) + (list + #:test-flags + '(list "tests/python" + ;; FIXME: CLI tests fail with PermissionError. + "--ignore" "tests/python/test_cli.py" + "-k" + (string-append + "not test_cli_regression_demo" + ;; These tests use the Boston dataset that has been + ;; removed from scipy. + " and not test_sklearn_demo" + " and not test_sklearn_parallel_demo" + " and not test_predict_shape" + " and not test_num_parallel_tree" + " and not test_boston_housing_regression" + " and not test_boston_housing_rf_regression" + " and not test_parameter_tuning" + " and not test_regression_with_custom_objective" + " and not test_RFECV" + ;; Pandas incompatibility? Says: + ;; '_CalibratedClassifier' object has no attribute + ;; 'base_estimator' + " and not test_pandas_input" + ;; Accuracy problems? + " and not test_exact" + " and not test_approx" + " and not test_hist" + ;; The tests below open a network connection. + " and not test_model_compatibility" + " and not test_get_group" + " and not test_cv_no_shuffle" + " and not test_cv" + " and not test_training" + ;; "'['./runexp.sh']' returned non-zero exit status 1" + " and not test_cli_binary_classification")) + #:phases + #~(modify-phases %standard-phases + (add-after 'unpack 'preparations + (lambda _ + ;; Move python-package content to parent directory to silence + ;; some warnings about files not being found if we chdir. + (rename-file "python-package/xgboost" "xgboost") + (rename-file "python-package/README.rst" "README.rst") + (rename-file "python-package/setup.cfg" "setup.cfg") + (rename-file "python-package/setup.py" "setup.py") + ;; Skip rebuilding libxgboost.so. + (substitute* "setup.py" + (("ext_modules=\\[CMakeExtension\\('libxgboost'\\)\\],") "") + (("'install_lib': InstallLib,") "")) + ;; Remove bad dataset. This has been removed in scipy. + (substitute* "tests/python/testing.py" + (("TestDataset\\('boston', get_boston, 'reg:squarederror', 'rmse'\\),") + "") + (("datasets.load_boston") + "datasets.load_digits")))) + (add-after 'install 'install-version-and-libxgboost + (lambda* (#:key inputs #:allow-other-keys) + (let* ((pylib (string-append #$output "/lib/python" + #$(version-major+minor + (package-version python)) + "/site-packages")) + (xgbdir (string-append pylib "/xgboost")) + (version-file (string-append xgbdir "/VERSION")) + (libxgboost (string-append (assoc-ref inputs "xgboost") + "/lib/libxgboost.so"))) + (with-output-to-file version-file + (lambda () + (display #$(package-version xgboost)))) + (mkdir-p (string-append xgbdir "/lib")) + (symlink libxgboost (string-append xgbdir "/lib" + "/libxgboost.so")))))))) (native-inputs (list python-pandas python-pytest python-scikit-learn)) (inputs @@ -4149,6 +4274,86 @@ Note: currently this package does not provide GPU support.") (replace "onnx" onnx-for-torch2) (replace "onnx-optimizer" onnx-optimizer-for-torch2))))) +(define-public python-pytorch-geometric + (package + (name "python-pytorch-geometric") + (version "2.4.0") + (source (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/pyg-team/pytorch_geometric/") + (commit version))) + (file-name (git-file-name name version)) + (sha256 + (base32 + "0hrs579asjsph16hyb4ablkbgfwd5j9y5s6ny7ahn3qrbkl2ji1g")))) + (build-system pyproject-build-system) + (arguments + (list + #:test-flags + ;; Hangs with AttributeError: 'NoneType' object has no attribute 'rpc_async' + '(list "--ignore=test/distributed/test_rpc.py" + ;; A message passing jinja template is missing + "--ignore=test/nn/conv/test_message_passing.py" + "--ignore=test/nn/test_sequential.py" + "--ignore=test/nn/models/test_basic_gnn.py" + ;; These all fail with a size mismatch error such as + ;; RuntimeError: shape '[-1, 2, 1, 1]' is invalid for input of size 3 + "--ignore=test/explain/algorithm/test_captum_explainer.py" + "-k" (string-append + ;; Permissions error + "not test_packaging" + ;; This can fail due to accuracy problems + " and not test_gdc" + ;; These refuse to be run on CPU and really want a GPU + " and not test_add_random_walk_pe" + " and not test_asap" + " and not test_two_hop")) + #:phases + '(modify-phases %standard-phases + (add-after 'unpack 'delete-top-level-directories + (lambda _ + ;; The presence of these directories confuses the pyproject build + ;; system. + (for-each delete-file-recursively + '("conda" "docker" "graphgym"))))))) + (propagated-inputs + (list onnx + python-captum + python-graphviz + python-h5py + python-jinja2 + python-matplotlib + python-networkx + python-numba + python-numpy + python-opt-einsum + python-pandas + python-protobuf + python-psutil + python-pyparsing + python-pytorch-lightning + python-rdflib + python-requests + python-scikit-image + python-scikit-learn + python-scipy + python-statsmodels + python-sympy + python-tabulate + python-torchmetrics + python-tqdm)) + (native-inputs + (list python-flit-core + python-pytest + python-pytest-cov)) + (home-page "https://pyg.org") + (synopsis "Graph Neural Network library for PyTorch") + (description + "PyG is a library built upon PyTorch to easily write and train Graph +Neural Networks for a wide range of applications related to structured data.") + (license license:expat))) + (define-public python-lightning-cloud (package (name "python-lightning-cloud") @@ -4733,24 +4938,23 @@ and Numpy.") (file-name (git-file-name name version)) (sha256 (base32 "0n1vsih99pvswcaygdxkc6kq6r48ny130z6ca8pp3281396r2ykw")))) - (build-system python-build-system) + (build-system pyproject-build-system) (arguments - `(#:phases - (modify-phases %standard-phases - (replace 'check - (lambda* (#:key tests? #:allow-other-keys) + (list + #:test-flags + '(list "-vv" "--stage=unit" ;; This tests features that are only implemented when non-free ;; software is available (Intel MKL or CUDA). - (for-each delete-file - (list "tests/distributions/test_spanning_tree.py" - "tests/infer/mcmc/test_mcmc_api.py")) - + "--ignore=tests/distributions/test_spanning_tree.py" + "--ignore=tests/infer/mcmc/test_mcmc_api.py" + ;; This test fails sometimes. + "--ignore=tests/optim/test_optim.py" ;; Four test_gamma_elbo tests fail with bad values for unknown ;; reasons. - (delete-file "tests/distributions/test_rejector.py") - ;; This test fails sometimes. - (delete-file "tests/optim/test_optim.py") - (invoke "pytest" "-vv" "--stage=unit")))))) + "--ignore=tests/distributions/test_rejector.py" + ;; This looks like a test system failure. All of these fail + ;; because x is an array of functions, not an array of numbers. + "-k" "not test_sample"))) (propagated-inputs (list python-numpy python-opt-einsum @@ -5163,3 +5367,54 @@ Brian 2 simulator.") "OneAPI Deep Neural Network Library (oneDNN) is a cross-platform performance library of basic building blocks for deep learning applications.") (license license:asl2.0))) + +(define-public python-gguf + (package + (name "python-gguf") + (version "0.6.0") + (source + (origin + (method url-fetch) + (uri (pypi-uri "gguf" version)) + (sha256 + (base32 "0rbyc2h3kpqnrvbyjvv8a69l577jv55a31l12jnw21m1lamjxqmj")))) + (build-system pyproject-build-system) + (arguments + (list #:tests? #false)) + (inputs (list poetry python-pytest)) + (propagated-inputs (list python-numpy)) + (home-page "https://ggml.ai") + (synopsis "Read and write ML models in GGUF for GGML") + (description "A Python library for reading and writing GGUF & GGML format ML models.") + (license license:expat))) + +(define-public python-gymnasium + (package + (name "python-gymnasium") + (version "0.29.1") + (source + (origin + (method url-fetch) + (uri (pypi-uri "gymnasium" version)) + (sha256 + (base32 "1cab4wsnlsxn2z90qmymv8ppmsq8yq2amiqwid3r0xfbxx92flqs")))) + (build-system pyproject-build-system) + (propagated-inputs (list python-cloudpickle python-farama-notifications + python-importlib-metadata python-numpy + python-typing-extensions)) + (native-inputs (list python-pytest python-scipy)) + (arguments + (list + #:phases + #~(modify-phases %standard-phases + (add-after 'unpack 'create-tests-module + (lambda _ + (with-output-to-file "tests/__init__.py" + (lambda _ (display "")))))))) + (home-page "https://gymnasium.farama.org/") + (synopsis + "Standard API for reinforcement learning and a set of reference environments") + (description + "This package provides a standard API for reinforcement learning and a +diverse set of reference environments (formerly Gym).") + (license license:expat))) |