]> Cypherpunks.ru repositories - gocheese.git/commitdiff
Refactor digest processing, BLAKE2b-256 support, cleanup non-SHA256 digests
authorSergey Matveev <stargrave@stargrave.org>
Sat, 7 Dec 2019 18:53:48 +0000 (21:53 +0300)
committerSergey Matveev <stargrave@stargrave.org>
Sat, 7 Dec 2019 19:07:22 +0000 (22:07 +0300)
gocheese.go
gocheese.texi
integrity.go [new file with mode: 0644]

index 40f172bc9eedb50d5c337572d5da5036f760ac15..d6a1a70ba708b63911e02b5bfc55f5d79290aa84 100644 (file)
@@ -45,6 +45,7 @@ import (
        "syscall"
        "time"
 
+       "golang.org/x/crypto/blake2b"
        "golang.org/x/net/netutil"
 )
 
@@ -60,7 +61,6 @@ const (
        HTMLElement  = "    <a href=\"%s\"%s>%s</a><br/>\n"
        InternalFlag = ".internal"
        GPGSigExt    = ".asc"
-       GPGSigAttr   = " data-gpg-sig=true"
 
        Warranty = `This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -76,8 +76,19 @@ along with this program.  If not, see <http://www.gnu.org/licenses/>.`
 )
 
 var (
-       pkgPyPI        = regexp.MustCompile(`^.*<a href="([^"]+)"[^>]*>(.+)</a><br/>.*$`)
-       Version string = "UNKNOWN"
+       pkgPyPI         = regexp.MustCompile(`^.*<a href="([^"]+)"[^>]*>(.+)</a><br/>.*$`)
+       normalizationRe = regexp.MustCompilePOSIX("[-_.]+")
+
+       HashAlgoSHA256              = "sha256"
+       HashAlgoBLAKE2b256          = "blake2_256"
+       HashAlgoSHA512              = "sha512"
+       HashAlgoMD5                 = "md5"
+       knownHashAlgos     []string = []string{
+               HashAlgoSHA256,
+               HashAlgoBLAKE2b256,
+               HashAlgoSHA512,
+               HashAlgoMD5,
+       }
 
        root             = flag.String("root", "./packages", "Path to packages directory")
        bind             = flag.String("bind", "[::]:8080", "Address to bind to")
@@ -94,10 +105,9 @@ var (
        version          = flag.Bool("version", false, "Print version information")
        warranty         = flag.Bool("warranty", false, "Print warranty information")
 
-       killed bool
-
-       pypiURLParsed   *url.URL
-       normalizationRe *regexp.Regexp = regexp.MustCompilePOSIX("[-_.]+")
+       Version       string = "UNKNOWN"
+       killed        bool
+       pypiURLParsed *url.URL
 )
 
 func mkdirForPkg(w http.ResponseWriter, r *http.Request, dir string) bool {
@@ -112,6 +122,14 @@ func mkdirForPkg(w http.ResponseWriter, r *http.Request, dir string) bool {
        return true
 }
 
+func blake2b256New() hash.Hash {
+       h, err := blake2b.New256(nil)
+       if err != nil {
+               panic(err)
+       }
+       return h
+}
+
 func refreshDir(
        w http.ResponseWriter,
        r *http.Request,
@@ -137,20 +155,15 @@ func refreshDir(
                return false
        }
        dirPath := filepath.Join(*root, dir)
-       var submatches []string
-       var uri string
-       var filename string
-       var path string
-       var pkgURL *url.URL
-       var digest []byte
        for _, lineRaw := range bytes.Split(body, []byte("\n")) {
-               submatches = pkgPyPI.FindStringSubmatch(string(lineRaw))
+               submatches := pkgPyPI.FindStringSubmatch(string(lineRaw))
                if len(submatches) == 0 {
                        continue
                }
-               uri = submatches[1]
-               filename = submatches[2]
-               if pkgURL, err = url.Parse(uri); err != nil {
+               uri := submatches[1]
+               filename := submatches[2]
+               pkgURL, err := url.Parse(uri)
+               if err != nil {
                        http.Error(w, err.Error(), http.StatusBadGateway)
                        return false
                }
@@ -169,31 +182,31 @@ func refreshDir(
                        http.Error(w, "invalid digest provided", http.StatusBadGateway)
                        return false
                }
-               digest, err = hex.DecodeString(digestInfo[1])
+               digest, err := hex.DecodeString(digestInfo[1])
                if err != nil {
                        http.Error(w, err.Error(), http.StatusBadGateway)
                        return false
                }
+               hashAlgo := digestInfo[0]
                var hasherNew func() hash.Hash
-               var hashExt string
                var hashSize int
-               switch digestInfo[0] {
-               case "md5":
-                       hashExt = ".md5"
+               switch hashAlgo {
+               case HashAlgoMD5:
                        hasherNew = md5.New
                        hashSize = md5.Size
-               case "sha256":
-                       hashExt = ".sha256"
+               case HashAlgoSHA256:
                        hasherNew = sha256.New
                        hashSize = sha256.Size
-               case "sha512":
-                       hashExt = ".sha512"
+               case HashAlgoSHA512:
                        hasherNew = sha512.New
                        hashSize = sha512.Size
+               case HashAlgoBLAKE2b256:
+                       hasherNew = blake2b256New
+                       hashSize = blake2b.Size256
                default:
                        log.Println(
                                r.RemoteAddr, "pypi", filename,
-                               "unknown digest algorithm", digestInfo[0],
+                               "unknown digest algorithm", hashAlgo,
                        )
                        http.Error(w, "unknown digest algorithm", http.StatusBadGateway)
                        return false
@@ -210,7 +223,8 @@ func refreshDir(
                } else {
                        uri = pkgURL.String()
                }
-               path = filepath.Join(dirPath, filename)
+
+               path := filepath.Join(dirPath, filename)
                if filename == filenameGet {
                        if killed {
                                // Skip heavy remote call, when shutting down
@@ -226,7 +240,7 @@ func refreshDir(
                        }
                        defer resp.Body.Close()
                        hasher := hasherNew()
-                       hasherOur := sha256.New()
+                       hasherSHA256 := sha256.New()
                        dst, err := TempFile(dirPath)
                        if err != nil {
                                http.Error(w, err.Error(), http.StatusInternalServerError)
@@ -234,8 +248,8 @@ func refreshDir(
                        }
                        dstBuf := bufio.NewWriter(dst)
                        wrs := []io.Writer{hasher, dstBuf}
-                       if hashExt != ".sha256" {
-                               wrs = append(wrs, hasherOur)
+                       if hashAlgo != HashAlgoSHA256 {
+                               wrs = append(wrs, hasherSHA256)
                        }
                        wr := io.MultiWriter(wrs...)
                        if _, err = io.Copy(wr, resp.Body); err != nil {
@@ -275,9 +289,12 @@ func refreshDir(
                                http.Error(w, err.Error(), http.StatusInternalServerError)
                                return false
                        }
-                       if hashExt != ".sha256" {
-                               hashExt = ".sha256"
-                               digest = hasherOur.Sum(nil)
+                       if hashAlgo != HashAlgoSHA256 {
+                               hashAlgo = HashAlgoSHA256
+                               digest = hasherSHA256.Sum(nil)
+                               for _, algo := range knownHashAlgos[1:] {
+                                       os.Remove(path + "." + algo)
+                               }
                        }
                }
                if filename == filenameGet || gpgUpdate {
@@ -297,6 +314,10 @@ func refreshDir(
                        if err != nil {
                                goto GPGSigSkip
                        }
+                       if !bytes.HasPrefix(sig, []byte("-----BEGIN PGP SIGNATURE-----")) {
+                               log.Println(r.RemoteAddr, "pypi non PGP signature", filename)
+                               goto GPGSigSkip
+                       }
                        if err = WriteFileSync(dirPath, path+GPGSigExt, sig); err != nil {
                                http.Error(w, err.Error(), http.StatusInternalServerError)
                                return false
@@ -304,7 +325,7 @@ func refreshDir(
                        log.Println(r.RemoteAddr, "pypi downloaded signature", filename)
                }
        GPGSigSkip:
-               path = path + hashExt
+               path = path + "." + hashAlgo
                _, err = os.Stat(path)
                if err == nil {
                        continue
@@ -363,54 +384,44 @@ func listDir(
                http.Error(w, err.Error(), http.StatusInternalServerError)
                return
        }
-       var result bytes.Buffer
-       result.WriteString(fmt.Sprintf(HTMLBegin, dir))
-       var digest []byte
-       var gpgSigAttr string
-       var fnClean string
        files := make(map[string]struct{}, len(fis)/2)
        for _, fi := range fis {
                files[fi.Name()] = struct{}{}
        }
-       for _, algoExt := range []string{".sha256", ".sha512", ".md5"} {
+       var result bytes.Buffer
+       result.WriteString(fmt.Sprintf(HTMLBegin, dir))
+       for _, algo := range knownHashAlgos {
                for fn, _ := range files {
                        if killed {
                                // Skip expensive I/O when shutting down
                                http.Error(w, "shutting down", http.StatusInternalServerError)
                                return
                        }
-                       if !strings.HasSuffix(fn, algoExt) {
+                       if !strings.HasSuffix(fn, "."+algo) {
                                continue
                        }
-                       digest, err = ioutil.ReadFile(filepath.Join(dirPath, fn))
+                       delete(files, fn)
+                       digest, err := ioutil.ReadFile(filepath.Join(dirPath, fn))
                        if err != nil {
                                http.Error(w, err.Error(), http.StatusInternalServerError)
                                return
                        }
-                       fnClean = strings.TrimSuffix(fn, algoExt)
-                       if _, err = os.Stat(filepath.Join(dirPath, fnClean+GPGSigExt)); os.IsNotExist(err) {
-                               gpgSigAttr = ""
-                       } else {
-                               gpgSigAttr = GPGSigAttr
+                       fnClean := strings.TrimSuffix(fn, "."+algo)
+                       delete(files, fnClean)
+                       gpgSigAttr := ""
+                       if _, err = os.Stat(filepath.Join(dirPath, fnClean+GPGSigExt)); err == nil {
+                               gpgSigAttr = " data-gpg-sig=true"
+                               delete(files, fnClean+GPGSigExt)
                        }
                        result.WriteString(fmt.Sprintf(
                                HTMLElement,
                                strings.Join([]string{
                                        *refreshURLPath, dir, "/", fnClean,
-                                       "#", algoExt[1:], "=", hex.EncodeToString(digest),
+                                       "#", algo, "=", hex.EncodeToString(digest),
                                }, ""),
                                gpgSigAttr,
                                fnClean,
                        ))
-                       for _, n := range []string{
-                               fnClean,
-                               fnClean + GPGSigExt,
-                               fnClean + ".sha256",
-                               fnClean + ".sha512",
-                               fnClean + ".md5",
-                       } {
-                               delete(files, n)
-                       }
                }
        }
        result.WriteString(HTMLEnd)
@@ -454,8 +465,8 @@ func serveUpload(w http.ResponseWriter, r *http.Request) {
                http.Error(w, "single name is expected in request", http.StatusBadRequest)
                return
        }
-       dir := normalizationRe.ReplaceAllString(pkgNames[0], "-")
-       dirPath := filepath.Join(*root, dir)
+       pkgName := normalizationRe.ReplaceAllString(pkgNames[0], "-")
+       dirPath := filepath.Join(*root, pkgName)
        var digestExpected []byte
        if digestExpectedHex, exists := r.MultipartForm.Value["sha256_digest"]; exists {
                digestExpected, err = hex.DecodeString(digestExpectedHex[0])
@@ -468,7 +479,7 @@ func serveUpload(w http.ResponseWriter, r *http.Request) {
 
        // Checking is it internal package
        if _, err = os.Stat(filepath.Join(dirPath, InternalFlag)); err != nil {
-               log.Println(r.RemoteAddr, "non-internal package", dir)
+               log.Println(r.RemoteAddr, "non-internal package", pkgName)
                http.Error(w, "unknown internal package", http.StatusUnauthorized)
                return
        }
@@ -483,7 +494,7 @@ func serveUpload(w http.ResponseWriter, r *http.Request) {
                        http.Error(w, "already exists", http.StatusBadRequest)
                        return
                }
-               if !mkdirForPkg(w, r, dir) {
+               if !mkdirForPkg(w, r, pkgName) {
                        return
                }
                src, err := file.Open()
@@ -538,7 +549,7 @@ func serveUpload(w http.ResponseWriter, r *http.Request) {
                        http.Error(w, err.Error(), http.StatusInternalServerError)
                        return
                }
-               if err = WriteFileSync(dirPath, path+".sha256", digest); err != nil {
+               if err = WriteFileSync(dirPath, path+"."+HashAlgoSHA256, digest); err != nil {
                        http.Error(w, err.Error(), http.StatusInternalServerError)
                        return
                }
@@ -615,50 +626,6 @@ func handler(w http.ResponseWriter, r *http.Request) {
        }
 }
 
-func goodIntegrity() bool {
-       dirs, err := ioutil.ReadDir(*root)
-       if err != nil {
-               log.Fatal(err)
-       }
-       hasher := sha256.New()
-       digest := make([]byte, sha256.Size)
-       isGood := true
-       var data []byte
-       var pkgName string
-       for _, dir := range dirs {
-               files, err := ioutil.ReadDir(filepath.Join(*root, dir.Name()))
-               if err != nil {
-                       log.Fatal(err)
-               }
-               for _, file := range files {
-                       if !strings.HasSuffix(file.Name(), ".sha256") {
-                               continue
-                       }
-                       pkgName = strings.TrimSuffix(file.Name(), ".sha256")
-                       data, err = ioutil.ReadFile(filepath.Join(*root, dir.Name(), pkgName))
-                       if err != nil {
-                               if os.IsNotExist(err) {
-                                       continue
-                               }
-                               log.Fatal(err)
-                       }
-                       hasher.Write(data)
-                       data, err = ioutil.ReadFile(filepath.Join(*root, dir.Name(), file.Name()))
-                       if err != nil {
-                               log.Fatal(err)
-                       }
-                       if bytes.Compare(hasher.Sum(digest[:0]), data) == 0 {
-                               fmt.Println(pkgName, "GOOD")
-                       } else {
-                               isGood = false
-                               fmt.Println(pkgName, "BAD")
-                       }
-                       hasher.Reset()
-               }
-       }
-       return isGood
-}
-
 func main() {
        flag.Parse()
        if *warranty {
index 6cff768e01837465f7d286933e420789d2c3f759..8bb6b9cdbbcd7a95a8041d45662ca053317460df 100644 (file)
@@ -25,13 +25,14 @@ but nearly all the code was rewritten. It has huge differences:
 @itemize
 @item proxying and caching of missing packages, including GPG signatures
 @item @url{https://pythonwheels.com/, Wheel} uploading support
-@item atomic packages store on filesystem
-@item SHA256-checksummed packages: storing checksums, giving them back,
-    verifying stored files integrity, verifying checksum of uploaded
-    packaged
-@item graceful HTTP-server shutdown
+@item integrity check of proxied packages: MD5, SHA256, SHA512, BLAKE2b-256
+@item SHA256 checksums for stored packages
+@item verifying of SHA256 checksum for uploaded packages
+@item storing of uploaded GPG signatures
 @item no YAML configuration, just command-line arguments
 @item no package overwriting ability (as PyPI does too)
+@item atomic packages store on filesystem
+@item graceful HTTP-server shutdown
 @end itemize
 
 Also it contains @file{pyshop2packages.sh} migration script for
@@ -91,7 +92,7 @@ file is checked against it.
 
 Pay attention that you have to manually create corresponding private
 package directory! You are not allowed to upload anything explicitly
-flagged as private.
+flagged as internal package.
 
 @node Passwords
 @unnumbered Password authentication
@@ -184,31 +185,40 @@ Root directory has the following hierarchy:
 root
   +-- public-package
   |     +- public-package-0.1.tar.gz.md5
-  |     +- public-package-0.1.1.tar.gz.sha256
+  |     +- public-package-0.1.tar.gz.blake2_256
+  |     +- public-package-0.1.1.tar.gz.blake2_256
   |     +- public-package-0.2.tar.gz
   |     +- public-package-0.2.tar.gz.asc
   |     +- public-package-0.2.tar.gz.sha256
   +-- private-package
   |     +- .internal
   |     +- private-package-0.1.tar.gz
+  |     +- private-package-0.1.tar.gz.asc
   |     +- private-package-0.1.tar.gz.sha256
   |...
 @end verbatim
 
-Each directory is a package name. When you try to list non existent
-directory contents (you are downloading package you have not seen
-before), then GoCheese will download information about package's
-versions with checksums and write them in corresponding @file{.sha256}
-files. However no package package tarball is downloaded.
+Each directory is a normalized package name. When you try to list non
+existent directory contents (you are downloading package you have not
+seen before), then GoCheese will download information about package's
+versions with checksums and write them in corresponding
+@file{.sha256}, @file{.blake2_256}, @file{.sha512}, @file{.md5} files.
+However no package package tarball is downloaded.
 
 When you request for particular package version, then its tarball is
-downloaded and verified against the checksum. For example in the root
-directory above we have downloaded only @file{public-package-0.2}.
-If upstream has corresponding @file{.asc} file, then it also will be
-downloaded.
-
-Private packages contain @file{.internal} file, indicating that it must
-not be asked in PyPI if required version is missing. You have to create
-it manually.
+downloaded and verified against the stored checksum. But SHA256 is
+forced to be stored and used later.
+
+For example @file{public-package} has @code{0.1} version, downloaded a
+long time ago with MD5 checksum. @code{0.1.1} version is downloaded more
+recently with BLAKE2b-256 checksum, also storing that checksum for
+@code{0.1}. @code{0.2} version is downloaded tarball, having forced
+SHA256 recalculated checksum. Also upstream has corresponding
+@file{.asc} signature file.
+
+@file{private-package} is private package, because it contains
+@file{.internal} file. It can be uploaded and queries to it are not
+proxied to upstream PyPI. You have to create it manually. If you upload
+GPG signature, then it will be also stored.
 
 @bye
diff --git a/integrity.go b/integrity.go
new file mode 100644 (file)
index 0000000..a96d80e
--- /dev/null
@@ -0,0 +1,73 @@
+/*
+GoCheese -- Python private package repository and caching proxy
+Copyright (C) 2019 Sergey Matveev <stargrave@stargrave.org>
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, version 3 of the License.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+package main
+
+import (
+       "bytes"
+       "crypto/sha256"
+       "fmt"
+       "io/ioutil"
+       "log"
+       "os"
+       "path/filepath"
+       "strings"
+)
+
+func goodIntegrity() bool {
+       dirs, err := ioutil.ReadDir(*root)
+       if err != nil {
+               log.Fatal(err)
+       }
+       hasher := sha256.New()
+       digest := make([]byte, sha256.Size)
+       isGood := true
+       var data []byte
+       var pkgName string
+       for _, dir := range dirs {
+               files, err := ioutil.ReadDir(filepath.Join(*root, dir.Name()))
+               if err != nil {
+                       log.Fatal(err)
+               }
+               for _, file := range files {
+                       if !strings.HasSuffix(file.Name(), "."+HashAlgoSHA256) {
+                               continue
+                       }
+                       pkgName = strings.TrimSuffix(file.Name(), "."+HashAlgoSHA256)
+                       data, err = ioutil.ReadFile(filepath.Join(*root, dir.Name(), pkgName))
+                       if err != nil {
+                               if os.IsNotExist(err) {
+                                       continue
+                               }
+                               log.Fatal(err)
+                       }
+                       hasher.Write(data)
+                       data, err = ioutil.ReadFile(filepath.Join(*root, dir.Name(), file.Name()))
+                       if err != nil {
+                               log.Fatal(err)
+                       }
+                       if bytes.Compare(hasher.Sum(digest[:0]), data) == 0 {
+                               fmt.Println(pkgName, "GOOD")
+                       } else {
+                               isGood = false
+                               fmt.Println(pkgName, "BAD")
+                       }
+                       hasher.Reset()
+               }
+       }
+       return isGood
+}