]> Cypherpunks.ru repositories - gocheese.git/blobdiff - refresh.go
Download link for 3.0.0 release
[gocheese.git] / refresh.go
index 4a5e962925d7b8ff2a2e7ee3ac0a58fb3ce06658..d9411179ef3ccd79909e01aa1eb1856fc2d547a9 100644 (file)
@@ -1,6 +1,6 @@
 /*
 GoCheese -- Python private package repository and caching proxy
-Copyright (C) 2019 Sergey Matveev <stargrave@stargrave.org>
+Copyright (C) 2019-2021 Sergey Matveev <stargrave@stargrave.org>
 
 This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -24,6 +24,7 @@ import (
        "crypto/sha256"
        "crypto/sha512"
        "encoding/hex"
+       "encoding/json"
        "hash"
        "io"
        "io/ioutil"
@@ -32,11 +33,35 @@ import (
        "net/url"
        "os"
        "path/filepath"
+       "regexp"
        "strings"
+       "time"
 
+       "go.cypherpunks.ru/recfile"
        "golang.org/x/crypto/blake2b"
 )
 
+const (
+       HashAlgoSHA256     = "sha256"
+       HashAlgoBLAKE2b256 = "blake2_256"
+       HashAlgoSHA512     = "sha512"
+       HashAlgoMD5        = "md5"
+       GPGSigExt          = ".asc"
+       InternalFlag       = ".internal"
+)
+
+var (
+       PkgPyPI           = regexp.MustCompile(`^.*<a href="([^"]+)"[^>]*>(.+)</a>.*$`)
+       PyPIURLParsed     *url.URL
+       PyPIHTTPTransport http.Transport
+       KnownHashAlgos    []string = []string{
+               HashAlgoSHA256,
+               HashAlgoBLAKE2b256,
+               HashAlgoSHA512,
+               HashAlgoMD5,
+       }
+)
+
 func blake2b256New() hash.Hash {
        h, err := blake2b.New256(nil)
        if err != nil {
@@ -45,21 +70,213 @@ func blake2b256New() hash.Hash {
        return h
 }
 
+func agentedReq(url string) *http.Request {
+       req, err := http.NewRequest("GET", url, nil)
+       if err != nil {
+               log.Fatalln(err)
+       }
+       req.Header.Set("User-Agent", UserAgent)
+       return req
+}
+
+type RecFieldToValuesMap struct {
+       recField   string
+       jsonFields []string
+}
+
 func refreshDir(
        w http.ResponseWriter,
        r *http.Request,
        pkgName, filenameGet string,
        gpgUpdate bool,
 ) bool {
-       if _, err := os.Stat(filepath.Join(*root, pkgName, InternalFlag)); err == nil {
+       if _, err := os.Stat(filepath.Join(Root, pkgName, InternalFlag)); err == nil {
                return true
        }
-       resp, err := http.Get(*pypiURL + pkgName + "/")
+       c := http.Client{Transport: &PyPIHTTPTransport}
+       dirPath := filepath.Join(Root, pkgName)
+       now := time.Now()
+
+       var allReleases map[string][]*PkgReleaseInfo
+       if *JSONURL != "" {
+               resp, err := c.Do(agentedReq(*JSONURL + pkgName + "/json"))
+               if err != nil {
+                       log.Println("error", r.RemoteAddr, "refresh-json", pkgName, err)
+                       http.Error(w, err.Error(), http.StatusBadGateway)
+                       return false
+               }
+               if resp.StatusCode != http.StatusOK {
+                       resp.Body.Close()
+                       log.Println(
+                               "error", r.RemoteAddr, "refresh-json", pkgName,
+                               "HTTP status:", resp.Status,
+                       )
+                       http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
+                       return false
+               }
+               body, err := ioutil.ReadAll(resp.Body)
+               resp.Body.Close()
+               var buf bytes.Buffer
+               var description string
+               wr := recfile.NewWriter(&buf)
+               var meta PkgMeta
+               err = json.Unmarshal(body, &meta)
+               if err == nil {
+                       for _, m := range [][2]string{
+                               {MetadataFieldName, meta.Info.Name},
+                               {MetadataFieldVersion, meta.Info.Version},
+                               {MetadataFieldSummary, meta.Info.Summary},
+                               {MetadataFieldDescriptionContentType, meta.Info.DescriptionContentType},
+                               {MetadataFieldKeywords, meta.Info.Keywords},
+                               {MetadataFieldHomePage, meta.Info.HomePage},
+                               {MetadataFieldAuthor, meta.Info.Author},
+                               {MetadataFieldAuthorEmail, meta.Info.AuthorEmail},
+                               {MetadataFieldMaintainer, meta.Info.Maintainer},
+                               {MetadataFieldMaintainerEmail, meta.Info.MaintainerEmail},
+                               {MetadataFieldLicense, meta.Info.License},
+                               {MetadataFieldRequiresPython, meta.Info.RequiresPython},
+                       } {
+                               recField, jsonField := m[0], m[1]
+                               if jsonField == "" {
+                                       continue
+                               }
+                               if _, err = wr.WriteFields(recfile.Field{
+                                       Name:  metadataFieldToRecField(recField),
+                                       Value: jsonField,
+                               }); err != nil {
+                                       log.Fatalln(err)
+                               }
+                       }
+                       for _, m := range []RecFieldToValuesMap{
+                               {MetadataFieldClassifier, meta.Info.Classifier},
+                               {MetadataFieldPlatform, meta.Info.Platform},
+                               {MetadataFieldSupportedPlatform, meta.Info.SupportedPlatform},
+                               {MetadataFieldRequiresDist, meta.Info.RequiresDist},
+                               {MetadataFieldRequiresExternal, meta.Info.RequiresExternal},
+                               {MetadataFieldProjectURL, meta.Info.ProjectURL},
+                               {MetadataFieldProvidesExtra, meta.Info.ProvidesExtra},
+                       } {
+                               for _, v := range m.jsonFields {
+                                       if _, err = wr.WriteFields(recfile.Field{
+                                               Name:  metadataFieldToRecField(m.recField),
+                                               Value: v,
+                                       }); err != nil {
+                                               log.Fatalln(err)
+                                       }
+                               }
+                       }
+                       description = meta.Info.Description
+                       allReleases = meta.Releases
+               } else {
+                       var metaStripped PkgMetaStripped
+                       err = json.Unmarshal(body, &metaStripped)
+                       if err != nil {
+                               log.Println(
+                                       "error", r.RemoteAddr, "refresh-json", pkgName,
+                                       "can not parse JSON:", err,
+                               )
+                               http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
+                               return false
+                       }
+                       for _, m := range [][2]string{
+                               {MetadataFieldName, metaStripped.Info.Name},
+                               {MetadataFieldVersion, metaStripped.Info.Version},
+                               {MetadataFieldSummary, metaStripped.Info.Summary},
+                               {MetadataFieldDescriptionContentType, metaStripped.Info.DescriptionContentType},
+                               {MetadataFieldKeywords, metaStripped.Info.Keywords},
+                               {MetadataFieldHomePage, metaStripped.Info.HomePage},
+                               {MetadataFieldAuthor, metaStripped.Info.Author},
+                               {MetadataFieldAuthorEmail, metaStripped.Info.AuthorEmail},
+                               {MetadataFieldMaintainer, metaStripped.Info.Maintainer},
+                               {MetadataFieldMaintainerEmail, metaStripped.Info.MaintainerEmail},
+                               {MetadataFieldLicense, metaStripped.Info.License},
+                               {MetadataFieldRequiresPython, metaStripped.Info.RequiresPython},
+                       } {
+                               recField, jsonField := m[0], m[1]
+                               if jsonField == "" {
+                                       continue
+                               }
+                               if _, err = wr.WriteFields(recfile.Field{
+                                       Name:  metadataFieldToRecField(recField),
+                                       Value: jsonField,
+                               }); err != nil {
+                                       log.Fatalln(err)
+                               }
+                       }
+
+                       for _, m := range []RecFieldToValuesMap{
+                               {MetadataFieldClassifier, metaStripped.Info.Classifier},
+                               {MetadataFieldRequiresDist, metaStripped.Info.RequiresDist},
+                       } {
+                               for _, v := range m.jsonFields {
+                                       if _, err = wr.WriteFields(recfile.Field{
+                                               Name:  metadataFieldToRecField(m.recField),
+                                               Value: v,
+                                       }); err != nil {
+                                               log.Fatalln(err)
+                                       }
+                               }
+                       }
+                       description = metaStripped.Info.Description
+                       allReleases = metaStripped.Releases
+               }
+               lines := strings.Split(description, "\n")
+               if len(lines) > 0 {
+                       if _, err = wr.WriteFieldMultiline(
+                               MetadataFieldDescription, lines,
+                       ); err != nil {
+                               log.Fatalln(err)
+                       }
+               }
+
+               if !mkdirForPkg(w, r, pkgName) {
+                       return false
+               }
+               path := filepath.Join(dirPath, MetadataFile)
+               existing, err := ioutil.ReadFile(path)
+               if err != nil || bytes.Compare(existing, buf.Bytes()) != 0 {
+                       if err = WriteFileSync(dirPath, path, buf.Bytes(), now); err != nil {
+                               log.Println("error", r.RemoteAddr, "refresh-json", path, err)
+                               http.Error(w, err.Error(), http.StatusInternalServerError)
+                               return false
+                       }
+                       log.Println(r.RemoteAddr, "pypi", pkgName+"."+MetadataFile, "touch")
+               }
+       }
+       mtimes := make(map[string]time.Time)
+       for _, releases := range allReleases {
+               for _, rel := range releases {
+                       if rel.Filename == "" || rel.UploadTimeISO8601 == "" {
+                               continue
+                       }
+                       t, err := time.Parse(time.RFC3339Nano, rel.UploadTimeISO8601)
+                       if err != nil {
+                               log.Println(
+                                       "error", r.RemoteAddr, "refresh-json", pkgName,
+                                       "can not parse upload_time:", err,
+                               )
+                               http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
+                               return false
+                       }
+                       mtimes[rel.Filename] = t.Truncate(time.Second)
+               }
+       }
+
+       resp, err := c.Do(agentedReq(*PyPIURL + pkgName + "/"))
        if err != nil {
                log.Println("error", r.RemoteAddr, "refresh", pkgName, err)
                http.Error(w, err.Error(), http.StatusBadGateway)
                return false
        }
+       if resp.StatusCode != http.StatusOK {
+               resp.Body.Close()
+               log.Println(
+                       "error", r.RemoteAddr, "refresh", pkgName,
+                       "HTTP status:", resp.Status,
+               )
+               http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
+               return false
+       }
        body, err := ioutil.ReadAll(resp.Body)
        resp.Body.Close()
        if err != nil {
@@ -70,9 +287,8 @@ func refreshDir(
        if !mkdirForPkg(w, r, pkgName) {
                return false
        }
-       dirPath := filepath.Join(*root, pkgName)
        for _, lineRaw := range bytes.Split(body, []byte("\n")) {
-               submatches := pkgPyPI.FindStringSubmatch(string(lineRaw))
+               submatches := PkgPyPI.FindStringSubmatch(string(lineRaw))
                if len(submatches) == 0 {
                        continue
                }
@@ -134,28 +350,42 @@ func refreshDir(
 
                pkgURL.Fragment = ""
                if pkgURL.Host == "" {
-                       uri = pypiURLParsed.ResolveReference(pkgURL).String()
+                       uri = PyPIURLParsed.ResolveReference(pkgURL).String()
                } else {
                        uri = pkgURL.String()
                }
+               mtime, mtimeExists := mtimes[filename]
+               if !mtimeExists {
+                       mtime = now
+               }
 
                path := filepath.Join(dirPath, filename)
                if filename == filenameGet {
-                       if killed {
+                       if Killed {
                                // Skip heavy remote call, when shutting down
                                http.Error(w, "shutting down", http.StatusInternalServerError)
                                return false
                        }
                        log.Println(r.RemoteAddr, "pypi", filename, "download")
-                       resp, err = http.Get(uri)
+                       resp, err = c.Do(agentedReq(uri))
                        if err != nil {
                                log.Println("error", r.RemoteAddr, "pypi", filename, "download", err)
                                http.Error(w, err.Error(), http.StatusBadGateway)
                                return false
                        }
                        defer resp.Body.Close()
+                       if resp.StatusCode != http.StatusOK {
+                               log.Println(
+                                       "error", r.RemoteAddr,
+                                       "pypi", filename, "download",
+                                       "HTTP status:", resp.Status,
+                               )
+                               http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
+                               return false
+                       }
                        hasher := hasherNew()
                        hasherSHA256 := sha256.New()
+                       hasherBLAKE2b256 := blake2b256New()
                        dst, err := TempFile(dirPath)
                        if err != nil {
                                log.Println("error", r.RemoteAddr, "pypi", filename, err)
@@ -167,6 +397,9 @@ func refreshDir(
                        if hashAlgo != HashAlgoSHA256 {
                                wrs = append(wrs, hasherSHA256)
                        }
+                       if hashAlgo != HashAlgoBLAKE2b256 {
+                               wrs = append(wrs, hasherBLAKE2b256)
+                       }
                        wr := io.MultiWriter(wrs...)
                        if _, err = io.Copy(wr, resp.Body); err != nil {
                                os.Remove(dst.Name())
@@ -189,17 +422,23 @@ func refreshDir(
                                http.Error(w, "digest mismatch", http.StatusBadGateway)
                                return false
                        }
-                       if err = dst.Sync(); err != nil {
-                               os.Remove(dst.Name())
-                               dst.Close()
+                       if !NoSync {
+                               if err = dst.Sync(); err != nil {
+                                       os.Remove(dst.Name())
+                                       dst.Close()
+                                       log.Println("error", r.RemoteAddr, "pypi", filename, err)
+                                       http.Error(w, err.Error(), http.StatusInternalServerError)
+                                       return false
+                               }
+                       }
+                       if err = dst.Close(); err != nil {
                                log.Println("error", r.RemoteAddr, "pypi", filename, err)
                                http.Error(w, err.Error(), http.StatusInternalServerError)
                                return false
                        }
-                       if err = dst.Close(); err != nil {
+                       if err = os.Chtimes(dst.Name(), mtime, mtime); err != nil {
                                log.Println("error", r.RemoteAddr, "pypi", filename, err)
                                http.Error(w, err.Error(), http.StatusInternalServerError)
-                               return false
                        }
                        if err = os.Rename(dst.Name(), path); err != nil {
                                log.Println("error", r.RemoteAddr, "pypi", filename, err)
@@ -211,19 +450,62 @@ func refreshDir(
                                http.Error(w, err.Error(), http.StatusInternalServerError)
                                return false
                        }
-                       if hashAlgo != HashAlgoSHA256 {
-                               hashAlgo = HashAlgoSHA256
-                               digest = hasherSHA256.Sum(nil)
-                               for _, algo := range knownHashAlgos[1:] {
-                                       os.Remove(path + "." + algo)
+
+                       var digestSHA256 []byte
+                       var digestBLAKE2b256 []byte
+                       if hashAlgo == HashAlgoSHA256 {
+                               digestSHA256 = hasher.Sum(nil)
+                       } else {
+                               digestSHA256 = hasherSHA256.Sum(nil)
+                       }
+                       if hashAlgo == HashAlgoBLAKE2b256 {
+                               digestBLAKE2b256 = hasher.Sum(nil)
+                       } else {
+                               digestBLAKE2b256 = hasherBLAKE2b256.Sum(nil)
+                       }
+                       if err = WriteFileSync(
+                               dirPath, path+"."+HashAlgoSHA256,
+                               digestSHA256, mtime,
+                       ); err != nil {
+                               log.Println(
+                                       "error", r.RemoteAddr, "pypi",
+                                       path+"."+HashAlgoSHA256, err,
+                               )
+                               http.Error(w, err.Error(), http.StatusInternalServerError)
+                               return false
+                       }
+                       if err = WriteFileSync(
+                               dirPath, path+"."+HashAlgoBLAKE2b256,
+                               digestBLAKE2b256, mtime,
+                       ); err != nil {
+                               log.Println(
+                                       "error", r.RemoteAddr, "pypi",
+                                       path+"."+HashAlgoBLAKE2b256, err,
+                               )
+                               http.Error(w, err.Error(), http.StatusInternalServerError)
+                               return false
+                       }
+                       for _, algo := range KnownHashAlgos[2:] {
+                               os.Remove(path + "." + algo)
+                       }
+                       digest = nil
+               }
+               if mtimeExists {
+                       stat, err := os.Stat(path)
+                       if err == nil && !stat.ModTime().Truncate(time.Second).Equal(mtime) {
+                               log.Println(r.RemoteAddr, "pypi", filename, "touch")
+                               if err = os.Chtimes(path, mtime, mtime); err != nil {
+                                       log.Println("error", r.RemoteAddr, "pypi", filename, err)
+                                       http.Error(w, err.Error(), http.StatusInternalServerError)
                                }
                        }
                }
+
                if filename == filenameGet || gpgUpdate {
                        if _, err = os.Stat(path); err != nil {
                                goto GPGSigSkip
                        }
-                       resp, err := http.Get(uri + GPGSigExt)
+                       resp, err := c.Do(agentedReq(uri + GPGSigExt))
                        if err != nil {
                                goto GPGSigSkip
                        }
@@ -240,26 +522,41 @@ func refreshDir(
                                log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "non PGP")
                                goto GPGSigSkip
                        }
-                       if err = WriteFileSync(dirPath, path+GPGSigExt, sig); err != nil {
+                       if err = WriteFileSync(dirPath, path+GPGSigExt, sig, mtime); err != nil {
                                log.Println("error", r.RemoteAddr, "pypi", filename+GPGSigExt, err)
                                http.Error(w, err.Error(), http.StatusInternalServerError)
                                return false
                        }
                        log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "downloaded")
                }
+               if mtimeExists {
+                       stat, err := os.Stat(path + GPGSigExt)
+                       if err == nil && !stat.ModTime().Truncate(time.Second).Equal(mtime) {
+                               log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "touch")
+                               if err = os.Chtimes(path+GPGSigExt, mtime, mtime); err != nil {
+                                       log.Println("error", r.RemoteAddr, "pypi", filename, err)
+                                       http.Error(w, err.Error(), http.StatusInternalServerError)
+                               }
+                       }
+               }
+
        GPGSigSkip:
+               if digest == nil {
+                       continue
+               }
                path = path + "." + hashAlgo
-               _, err = os.Stat(path)
-               if err == nil {
+               stat, err := os.Stat(path)
+               if err == nil &&
+                       (mtimeExists && stat.ModTime().Truncate(time.Second).Equal(mtime)) {
                        continue
                }
-               if !os.IsNotExist(err) {
+               if err != nil && !os.IsNotExist(err) {
                        log.Println("error", r.RemoteAddr, "pypi", path, err)
                        http.Error(w, err.Error(), http.StatusInternalServerError)
                        return false
                }
                log.Println(r.RemoteAddr, "pypi", filename, "touch")
-               if err = WriteFileSync(dirPath, path, digest); err != nil {
+               if err = WriteFileSync(dirPath, path, digest, mtime); err != nil {
                        log.Println("error", r.RemoteAddr, "pypi", path, err)
                        http.Error(w, err.Error(), http.StatusInternalServerError)
                        return false