From 11d218004e3a2668985a6d9b2628cb4b3fdc0051 Mon Sep 17 00:00:00 2001 From: Sergey Matveev Date: Sat, 7 Dec 2019 21:53:48 +0300 Subject: [PATCH] Refactor digest processing, BLAKE2b-256 support, cleanup non-SHA256 digests --- gocheese.go | 181 +++++++++++++++++++++----------------------------- gocheese.texi | 50 ++++++++------ integrity.go | 73 ++++++++++++++++++++ 3 files changed, 177 insertions(+), 127 deletions(-) create mode 100644 integrity.go diff --git a/gocheese.go b/gocheese.go index 40f172b..d6a1a70 100644 --- a/gocheese.go +++ b/gocheese.go @@ -45,6 +45,7 @@ import ( "syscall" "time" + "golang.org/x/crypto/blake2b" "golang.org/x/net/netutil" ) @@ -60,7 +61,6 @@ const ( HTMLElement = " %s
\n" InternalFlag = ".internal" GPGSigExt = ".asc" - GPGSigAttr = " data-gpg-sig=true" Warranty = `This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -76,8 +76,19 @@ along with this program. If not, see .` ) var ( - pkgPyPI = regexp.MustCompile(`^.*]*>(.+)
.*$`) - Version string = "UNKNOWN" + pkgPyPI = regexp.MustCompile(`^.*]*>(.+)
.*$`) + normalizationRe = regexp.MustCompilePOSIX("[-_.]+") + + HashAlgoSHA256 = "sha256" + HashAlgoBLAKE2b256 = "blake2_256" + HashAlgoSHA512 = "sha512" + HashAlgoMD5 = "md5" + knownHashAlgos []string = []string{ + HashAlgoSHA256, + HashAlgoBLAKE2b256, + HashAlgoSHA512, + HashAlgoMD5, + } root = flag.String("root", "./packages", "Path to packages directory") bind = flag.String("bind", "[::]:8080", "Address to bind to") @@ -94,10 +105,9 @@ var ( version = flag.Bool("version", false, "Print version information") warranty = flag.Bool("warranty", false, "Print warranty information") - killed bool - - pypiURLParsed *url.URL - normalizationRe *regexp.Regexp = regexp.MustCompilePOSIX("[-_.]+") + Version string = "UNKNOWN" + killed bool + pypiURLParsed *url.URL ) func mkdirForPkg(w http.ResponseWriter, r *http.Request, dir string) bool { @@ -112,6 +122,14 @@ func mkdirForPkg(w http.ResponseWriter, r *http.Request, dir string) bool { return true } +func blake2b256New() hash.Hash { + h, err := blake2b.New256(nil) + if err != nil { + panic(err) + } + return h +} + func refreshDir( w http.ResponseWriter, r *http.Request, @@ -137,20 +155,15 @@ func refreshDir( return false } dirPath := filepath.Join(*root, dir) - var submatches []string - var uri string - var filename string - var path string - var pkgURL *url.URL - var digest []byte for _, lineRaw := range bytes.Split(body, []byte("\n")) { - submatches = pkgPyPI.FindStringSubmatch(string(lineRaw)) + submatches := pkgPyPI.FindStringSubmatch(string(lineRaw)) if len(submatches) == 0 { continue } - uri = submatches[1] - filename = submatches[2] - if pkgURL, err = url.Parse(uri); err != nil { + uri := submatches[1] + filename := submatches[2] + pkgURL, err := url.Parse(uri) + if err != nil { http.Error(w, err.Error(), http.StatusBadGateway) return false } @@ -169,31 +182,31 @@ func refreshDir( http.Error(w, "invalid digest provided", http.StatusBadGateway) return false } - digest, err = hex.DecodeString(digestInfo[1]) + digest, err := hex.DecodeString(digestInfo[1]) if err != nil { http.Error(w, err.Error(), http.StatusBadGateway) return false } + hashAlgo := digestInfo[0] var hasherNew func() hash.Hash - var hashExt string var hashSize int - switch digestInfo[0] { - case "md5": - hashExt = ".md5" + switch hashAlgo { + case HashAlgoMD5: hasherNew = md5.New hashSize = md5.Size - case "sha256": - hashExt = ".sha256" + case HashAlgoSHA256: hasherNew = sha256.New hashSize = sha256.Size - case "sha512": - hashExt = ".sha512" + case HashAlgoSHA512: hasherNew = sha512.New hashSize = sha512.Size + case HashAlgoBLAKE2b256: + hasherNew = blake2b256New + hashSize = blake2b.Size256 default: log.Println( r.RemoteAddr, "pypi", filename, - "unknown digest algorithm", digestInfo[0], + "unknown digest algorithm", hashAlgo, ) http.Error(w, "unknown digest algorithm", http.StatusBadGateway) return false @@ -210,7 +223,8 @@ func refreshDir( } else { uri = pkgURL.String() } - path = filepath.Join(dirPath, filename) + + path := filepath.Join(dirPath, filename) if filename == filenameGet { if killed { // Skip heavy remote call, when shutting down @@ -226,7 +240,7 @@ func refreshDir( } defer resp.Body.Close() hasher := hasherNew() - hasherOur := sha256.New() + hasherSHA256 := sha256.New() dst, err := TempFile(dirPath) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) @@ -234,8 +248,8 @@ func refreshDir( } dstBuf := bufio.NewWriter(dst) wrs := []io.Writer{hasher, dstBuf} - if hashExt != ".sha256" { - wrs = append(wrs, hasherOur) + if hashAlgo != HashAlgoSHA256 { + wrs = append(wrs, hasherSHA256) } wr := io.MultiWriter(wrs...) if _, err = io.Copy(wr, resp.Body); err != nil { @@ -275,9 +289,12 @@ func refreshDir( http.Error(w, err.Error(), http.StatusInternalServerError) return false } - if hashExt != ".sha256" { - hashExt = ".sha256" - digest = hasherOur.Sum(nil) + if hashAlgo != HashAlgoSHA256 { + hashAlgo = HashAlgoSHA256 + digest = hasherSHA256.Sum(nil) + for _, algo := range knownHashAlgos[1:] { + os.Remove(path + "." + algo) + } } } if filename == filenameGet || gpgUpdate { @@ -297,6 +314,10 @@ func refreshDir( if err != nil { goto GPGSigSkip } + if !bytes.HasPrefix(sig, []byte("-----BEGIN PGP SIGNATURE-----")) { + log.Println(r.RemoteAddr, "pypi non PGP signature", filename) + goto GPGSigSkip + } if err = WriteFileSync(dirPath, path+GPGSigExt, sig); err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return false @@ -304,7 +325,7 @@ func refreshDir( log.Println(r.RemoteAddr, "pypi downloaded signature", filename) } GPGSigSkip: - path = path + hashExt + path = path + "." + hashAlgo _, err = os.Stat(path) if err == nil { continue @@ -363,54 +384,44 @@ func listDir( http.Error(w, err.Error(), http.StatusInternalServerError) return } - var result bytes.Buffer - result.WriteString(fmt.Sprintf(HTMLBegin, dir)) - var digest []byte - var gpgSigAttr string - var fnClean string files := make(map[string]struct{}, len(fis)/2) for _, fi := range fis { files[fi.Name()] = struct{}{} } - for _, algoExt := range []string{".sha256", ".sha512", ".md5"} { + var result bytes.Buffer + result.WriteString(fmt.Sprintf(HTMLBegin, dir)) + for _, algo := range knownHashAlgos { for fn, _ := range files { if killed { // Skip expensive I/O when shutting down http.Error(w, "shutting down", http.StatusInternalServerError) return } - if !strings.HasSuffix(fn, algoExt) { + if !strings.HasSuffix(fn, "."+algo) { continue } - digest, err = ioutil.ReadFile(filepath.Join(dirPath, fn)) + delete(files, fn) + digest, err := ioutil.ReadFile(filepath.Join(dirPath, fn)) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } - fnClean = strings.TrimSuffix(fn, algoExt) - if _, err = os.Stat(filepath.Join(dirPath, fnClean+GPGSigExt)); os.IsNotExist(err) { - gpgSigAttr = "" - } else { - gpgSigAttr = GPGSigAttr + fnClean := strings.TrimSuffix(fn, "."+algo) + delete(files, fnClean) + gpgSigAttr := "" + if _, err = os.Stat(filepath.Join(dirPath, fnClean+GPGSigExt)); err == nil { + gpgSigAttr = " data-gpg-sig=true" + delete(files, fnClean+GPGSigExt) } result.WriteString(fmt.Sprintf( HTMLElement, strings.Join([]string{ *refreshURLPath, dir, "/", fnClean, - "#", algoExt[1:], "=", hex.EncodeToString(digest), + "#", algo, "=", hex.EncodeToString(digest), }, ""), gpgSigAttr, fnClean, )) - for _, n := range []string{ - fnClean, - fnClean + GPGSigExt, - fnClean + ".sha256", - fnClean + ".sha512", - fnClean + ".md5", - } { - delete(files, n) - } } } result.WriteString(HTMLEnd) @@ -454,8 +465,8 @@ func serveUpload(w http.ResponseWriter, r *http.Request) { http.Error(w, "single name is expected in request", http.StatusBadRequest) return } - dir := normalizationRe.ReplaceAllString(pkgNames[0], "-") - dirPath := filepath.Join(*root, dir) + pkgName := normalizationRe.ReplaceAllString(pkgNames[0], "-") + dirPath := filepath.Join(*root, pkgName) var digestExpected []byte if digestExpectedHex, exists := r.MultipartForm.Value["sha256_digest"]; exists { digestExpected, err = hex.DecodeString(digestExpectedHex[0]) @@ -468,7 +479,7 @@ func serveUpload(w http.ResponseWriter, r *http.Request) { // Checking is it internal package if _, err = os.Stat(filepath.Join(dirPath, InternalFlag)); err != nil { - log.Println(r.RemoteAddr, "non-internal package", dir) + log.Println(r.RemoteAddr, "non-internal package", pkgName) http.Error(w, "unknown internal package", http.StatusUnauthorized) return } @@ -483,7 +494,7 @@ func serveUpload(w http.ResponseWriter, r *http.Request) { http.Error(w, "already exists", http.StatusBadRequest) return } - if !mkdirForPkg(w, r, dir) { + if !mkdirForPkg(w, r, pkgName) { return } src, err := file.Open() @@ -538,7 +549,7 @@ func serveUpload(w http.ResponseWriter, r *http.Request) { http.Error(w, err.Error(), http.StatusInternalServerError) return } - if err = WriteFileSync(dirPath, path+".sha256", digest); err != nil { + if err = WriteFileSync(dirPath, path+"."+HashAlgoSHA256, digest); err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } @@ -615,50 +626,6 @@ func handler(w http.ResponseWriter, r *http.Request) { } } -func goodIntegrity() bool { - dirs, err := ioutil.ReadDir(*root) - if err != nil { - log.Fatal(err) - } - hasher := sha256.New() - digest := make([]byte, sha256.Size) - isGood := true - var data []byte - var pkgName string - for _, dir := range dirs { - files, err := ioutil.ReadDir(filepath.Join(*root, dir.Name())) - if err != nil { - log.Fatal(err) - } - for _, file := range files { - if !strings.HasSuffix(file.Name(), ".sha256") { - continue - } - pkgName = strings.TrimSuffix(file.Name(), ".sha256") - data, err = ioutil.ReadFile(filepath.Join(*root, dir.Name(), pkgName)) - if err != nil { - if os.IsNotExist(err) { - continue - } - log.Fatal(err) - } - hasher.Write(data) - data, err = ioutil.ReadFile(filepath.Join(*root, dir.Name(), file.Name())) - if err != nil { - log.Fatal(err) - } - if bytes.Compare(hasher.Sum(digest[:0]), data) == 0 { - fmt.Println(pkgName, "GOOD") - } else { - isGood = false - fmt.Println(pkgName, "BAD") - } - hasher.Reset() - } - } - return isGood -} - func main() { flag.Parse() if *warranty { diff --git a/gocheese.texi b/gocheese.texi index 6cff768..8bb6b9c 100644 --- a/gocheese.texi +++ b/gocheese.texi @@ -25,13 +25,14 @@ but nearly all the code was rewritten. It has huge differences: @itemize @item proxying and caching of missing packages, including GPG signatures @item @url{https://pythonwheels.com/, Wheel} uploading support -@item atomic packages store on filesystem -@item SHA256-checksummed packages: storing checksums, giving them back, - verifying stored files integrity, verifying checksum of uploaded - packaged -@item graceful HTTP-server shutdown +@item integrity check of proxied packages: MD5, SHA256, SHA512, BLAKE2b-256 +@item SHA256 checksums for stored packages +@item verifying of SHA256 checksum for uploaded packages +@item storing of uploaded GPG signatures @item no YAML configuration, just command-line arguments @item no package overwriting ability (as PyPI does too) +@item atomic packages store on filesystem +@item graceful HTTP-server shutdown @end itemize Also it contains @file{pyshop2packages.sh} migration script for @@ -91,7 +92,7 @@ file is checked against it. Pay attention that you have to manually create corresponding private package directory! You are not allowed to upload anything explicitly -flagged as private. +flagged as internal package. @node Passwords @unnumbered Password authentication @@ -184,31 +185,40 @@ Root directory has the following hierarchy: root +-- public-package | +- public-package-0.1.tar.gz.md5 - | +- public-package-0.1.1.tar.gz.sha256 + | +- public-package-0.1.tar.gz.blake2_256 + | +- public-package-0.1.1.tar.gz.blake2_256 | +- public-package-0.2.tar.gz | +- public-package-0.2.tar.gz.asc | +- public-package-0.2.tar.gz.sha256 +-- private-package | +- .internal | +- private-package-0.1.tar.gz + | +- private-package-0.1.tar.gz.asc | +- private-package-0.1.tar.gz.sha256 |... @end verbatim -Each directory is a package name. When you try to list non existent -directory contents (you are downloading package you have not seen -before), then GoCheese will download information about package's -versions with checksums and write them in corresponding @file{.sha256} -files. However no package package tarball is downloaded. +Each directory is a normalized package name. When you try to list non +existent directory contents (you are downloading package you have not +seen before), then GoCheese will download information about package's +versions with checksums and write them in corresponding +@file{.sha256}, @file{.blake2_256}, @file{.sha512}, @file{.md5} files. +However no package package tarball is downloaded. When you request for particular package version, then its tarball is -downloaded and verified against the checksum. For example in the root -directory above we have downloaded only @file{public-package-0.2}. -If upstream has corresponding @file{.asc} file, then it also will be -downloaded. - -Private packages contain @file{.internal} file, indicating that it must -not be asked in PyPI if required version is missing. You have to create -it manually. +downloaded and verified against the stored checksum. But SHA256 is +forced to be stored and used later. + +For example @file{public-package} has @code{0.1} version, downloaded a +long time ago with MD5 checksum. @code{0.1.1} version is downloaded more +recently with BLAKE2b-256 checksum, also storing that checksum for +@code{0.1}. @code{0.2} version is downloaded tarball, having forced +SHA256 recalculated checksum. Also upstream has corresponding +@file{.asc} signature file. + +@file{private-package} is private package, because it contains +@file{.internal} file. It can be uploaded and queries to it are not +proxied to upstream PyPI. You have to create it manually. If you upload +GPG signature, then it will be also stored. @bye diff --git a/integrity.go b/integrity.go new file mode 100644 index 0000000..a96d80e --- /dev/null +++ b/integrity.go @@ -0,0 +1,73 @@ +/* +GoCheese -- Python private package repository and caching proxy +Copyright (C) 2019 Sergey Matveev + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, version 3 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +*/ + +package main + +import ( + "bytes" + "crypto/sha256" + "fmt" + "io/ioutil" + "log" + "os" + "path/filepath" + "strings" +) + +func goodIntegrity() bool { + dirs, err := ioutil.ReadDir(*root) + if err != nil { + log.Fatal(err) + } + hasher := sha256.New() + digest := make([]byte, sha256.Size) + isGood := true + var data []byte + var pkgName string + for _, dir := range dirs { + files, err := ioutil.ReadDir(filepath.Join(*root, dir.Name())) + if err != nil { + log.Fatal(err) + } + for _, file := range files { + if !strings.HasSuffix(file.Name(), "."+HashAlgoSHA256) { + continue + } + pkgName = strings.TrimSuffix(file.Name(), "."+HashAlgoSHA256) + data, err = ioutil.ReadFile(filepath.Join(*root, dir.Name(), pkgName)) + if err != nil { + if os.IsNotExist(err) { + continue + } + log.Fatal(err) + } + hasher.Write(data) + data, err = ioutil.ReadFile(filepath.Join(*root, dir.Name(), file.Name())) + if err != nil { + log.Fatal(err) + } + if bytes.Compare(hasher.Sum(digest[:0]), data) == 0 { + fmt.Println(pkgName, "GOOD") + } else { + isGood = false + fmt.Println(pkgName, "BAD") + } + hasher.Reset() + } + } + return isGood +} -- 2.44.0