summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMaxim Cournoyer <maxim.cournoyer@gmail.com>2019-03-28 00:26:01 -0400
committerMaxim Cournoyer <maxim.cournoyer@gmail.com>2019-07-02 10:07:59 +0900
commit803fb336d62ea65990e263ce58d8552e04c9c038 (patch)
tree302306159a166490b7837135ba766960d5be5490
parentc4797121beea74ae93e3ce17677b9e72b8df920d (diff)
import: pypi: Improve parsing of requirement specifications.
The previous solution was fragile and could leave unwanted characters in a requirement name, such as '[' or ']'. Partially fixes <https://bugs.gnu.org/33047>. * guix/import/pypi.scm (use-modules): Export SPECIFICATION->REQUIREMENT-NAME (%requirement-name-regexp): New variable. (clean-requirement): Rename to... (specification->requirement-name): this, which now uses %requirement-name-regexp to select the requirement name from the requirement specification. (parse-requires.txt): Adapt.
-rw-r--r--guix/import/pypi.scm54
-rw-r--r--tests/pypi.scm12
2 files changed, 52 insertions, 14 deletions
diff --git a/guix/import/pypi.scm b/guix/import/pypi.scm
index d9db876222..6a881bda12 100644
--- a/guix/import/pypi.scm
+++ b/guix/import/pypi.scm
@@ -48,6 +48,7 @@
#:use-module ((guix licenses) #:prefix license:)
#:use-module (guix build-system python)
#:export (parse-requires.txt
+ specification->requirement-name
guix-package->pypi-name
pypi-recursive-import
pypi->guix-package
@@ -118,22 +119,47 @@ package definition."
((package-inputs ...)
`((propagated-inputs (,'quasiquote ,package-inputs))))))
-(define (clean-requirement s)
- ;; Given a requirement LINE, as can be found in a setuptools requires.txt
- ;; file, remove everything other than the actual name of the required
- ;; package, and return it.
- (cond
- ((string-index s (char-set #\space #\> #\= #\<)) => (cut string-take s <>))
- (else s)))
+(define %requirement-name-regexp
+ ;; Regexp to match the requirement name in a requirement specification.
+
+ ;; Some grammar, taken from PEP-0508 (see:
+ ;; https://www.python.org/dev/peps/pep-0508/).
+
+ ;; Using this grammar makes the PEP-0508 regexp easier to understand for
+ ;; humans. The use of a regexp is preferred to more primitive string
+ ;; manipulations because we can more directly match what upstream uses
+ ;; (again, per PEP-0508). The regexp approach is also easier to extend,
+ ;; should we want to implement more completely the grammar of PEP-0508.
+
+ ;; The unified rule can be expressed as:
+ ;; specification = wsp* ( url_req | name_req ) wsp*
+
+ ;; where url_req is:
+ ;; url_req = name wsp* extras? wsp* urlspec wsp+ quoted_marker?
+
+ ;; and where name_req is:
+ ;; name_req = name wsp* extras? wsp* versionspec? wsp* quoted_marker?
+
+ ;; Thus, we need only matching NAME, which is expressed as:
+ ;; identifer_end = letterOrDigit | (('-' | '_' | '.' )* letterOrDigit)
+ ;; identifier = letterOrDigit identifier_end*
+ ;; name = identifier
+ (let* ((letter-or-digit "[A-Za-z0-9]")
+ (identifier-end (string-append "(" letter-or-digit "|"
+ "[-_.]*" letter-or-digit ")"))
+ (identifier (string-append "^" letter-or-digit identifier-end "*"))
+ (name identifier))
+ (make-regexp name)))
+
+(define (specification->requirement-name spec)
+ "Given a specification SPEC, return the requirement name."
+ (match:substring
+ (or (regexp-exec %requirement-name-regexp spec)
+ (error (G_ "Could not extract requirement name in spec:") spec))))
(define (parse-requires.txt requires.txt)
"Given REQUIRES.TXT, a Setuptools requires.txt file, return a list of
requirement names."
- ;; This is a very incomplete parser, whose job is to select the non-optional
- ;; dependencies and strip them out of any version information.
- ;; Alternatively, we could implement a PEG parser with the (ice-9 peg)
- ;; library and the requirements grammar defined by PEP-0508
- ;; (https://www.python.org/dev/peps/pep-0508/).
(define (comment? line)
;; Return #t if the given LINE is a comment, #f otherwise.
@@ -156,7 +182,7 @@ requirement names."
((or (string-null? line) (comment? line))
(loop result))
(else
- (loop (cons (clean-requirement line)
+ (loop (cons (specification->requirement-name line)
result))))))))))
(define (guess-requirements source-url wheel-url tarball)
@@ -198,7 +224,7 @@ cannot determine package dependencies"))
(hash-ref (list-ref run_requires 0)
"requires")
'())))
- (map clean-requirement requirements)))))
+ (map specification->requirement-name requirements)))))
(lambda ()
(delete-file json-file)
(rmdir dirname))))))
diff --git a/tests/pypi.scm b/tests/pypi.scm
index 03455ba6be..c40be6c21d 100644
--- a/tests/pypi.scm
+++ b/tests/pypi.scm
@@ -55,6 +55,14 @@
(define test-source-hash
"")
+(define test-specifications
+ '("Fizzy [foo, bar]"
+ "PickyThing<1.6,>1.9,!=1.9.6,<2.0a0,==2.4c1"
+ "SomethingWithMarker[foo]>1.0;python_version<\"2.7\""
+ "requests [security,tests] >= 2.8.1, == 2.8.* ; python_version < \"2.7\""
+ "pip @ https://github.com/pypa/pip/archive/1.3.1.zip#\
+sha1=da9234ee9982d4bbb3c72346a6de940a148ea686"))
+
(define test-requires.txt "\
# A comment
# A comment after a space
@@ -109,6 +117,10 @@ pytest (>=2.5.0)
(uri (list "https://bitheap.org/cram/cram-0.7.tar.gz"
(pypi-uri "cram" "0.7"))))))))
+(test-equal "specification->requirement-name"
+ '("Fizzy" "PickyThing" "SomethingWithMarker" "requests" "pip")
+ (map specification->requirement-name test-specifications))
+
(test-equal "parse-requires.txt, with sections"
'("foo" "bar")
(mock ((ice-9 ports) call-with-input-file