X-Git-Url: http://www.git.cypherpunks.ru/?p=gocheese.git;a=blobdiff_plain;f=refresh.go;h=ea3af19c049d4738273a63f9189267577914e5f7;hp=11c3ab7a52a73db2c3b13aa08a0f4723c7cfbc8a;hb=HEAD;hpb=cb0449b3d3da2498e360098694a556c882d454bd diff --git a/refresh.go b/refresh.go index 11c3ab7..3d134a0 100644 --- a/refresh.go +++ b/refresh.go @@ -1,19 +1,17 @@ -/* -GoCheese -- Python private package repository and caching proxy -Copyright (C) 2019-2021 Sergey Matveev - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, version 3 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . -*/ +// GoCheese -- Python private package repository and caching proxy +// Copyright (C) 2019-2024 Sergey Matveev +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 3 of the License. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . package main @@ -24,19 +22,43 @@ import ( "crypto/sha256" "crypto/sha512" "encoding/hex" + "encoding/json" + "errors" "hash" "io" - "io/ioutil" "log" "net/http" "net/url" "os" "path/filepath" + "regexp" "strings" + "time" + "go.cypherpunks.ru/recfile" "golang.org/x/crypto/blake2b" ) +const ( + HashAlgoBLAKE2b256 = "blake2b_256" + HashAlgoSHA256 = "sha256" + HashAlgoSHA512 = "sha512" + HashAlgoMD5 = "md5" + InternalFlag = ".internal" +) + +var ( + PkgPyPI = regexp.MustCompile(`^.*]*>(.+).*$`) + PyPIURLParsed *url.URL + PyPIHTTPTransport http.Transport + KnownHashAlgos []string = []string{ + HashAlgoBLAKE2b256, + HashAlgoSHA256, + HashAlgoSHA512, + HashAlgoMD5, + } +) + func blake2b256New() hash.Hash { h, err := blake2b.New256(nil) if err != nil { @@ -45,16 +67,240 @@ func blake2b256New() hash.Hash { return h } +func agentedReq(url string) *http.Request { + req, err := http.NewRequest("GET", url, nil) + if err != nil { + log.Fatal(err) + } + req.Header.Set("User-Agent", UserAgent) + return req +} + +type RecFieldToValuesMap struct { + recField string + jsonFields []string +} + func refreshDir( w http.ResponseWriter, r *http.Request, pkgName, filenameGet string, - gpgUpdate bool, ) bool { - if _, err := os.Stat(filepath.Join(*root, pkgName, InternalFlag)); err == nil { + if _, err := os.Stat(filepath.Join(Root, pkgName, InternalFlag)); err == nil { return true } - resp, err := http.Get(*pypiURL + pkgName + "/") + c := http.Client{Transport: &PyPIHTTPTransport} + dirPath := filepath.Join(Root, pkgName) + now := time.Now() + + var allReleases map[string][]*PkgReleaseInfo + if *JSONURL != "" { + resp, err := c.Do(agentedReq(*JSONURL + pkgName + "/json")) + if err != nil { + log.Println("error", r.RemoteAddr, "refresh-json", pkgName, err) + http.Error(w, err.Error(), http.StatusBadGateway) + return false + } + if resp.StatusCode != http.StatusOK { + resp.Body.Close() + log.Println( + "error", r.RemoteAddr, "refresh-json", pkgName, + "HTTP status:", resp.Status, + ) + http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway) + return false + } + body, err := io.ReadAll(resp.Body) + if err != nil { + resp.Body.Close() + log.Println("error", r.RemoteAddr, "refresh-json", pkgName, err) + http.Error(w, "can not read body", http.StatusBadGateway) + return false + } + resp.Body.Close() + var buf bytes.Buffer + var description string + wr := recfile.NewWriter(&buf) + var meta PkgMeta + err = json.Unmarshal(body, &meta) + if err == nil { + for _, m := range [][2]string{ + {MDFieldName, meta.Info.Name}, + {MDFieldVersion, meta.Info.Version}, + {MDFieldSummary, meta.Info.Summary}, + {MDFieldDescriptionContentType, meta.Info.DescriptionContentType}, + {MDFieldKeywords, meta.Info.Keywords}, + {MDFieldHomePage, meta.Info.HomePage}, + {MDFieldAuthor, meta.Info.Author}, + {MDFieldAuthorEmail, meta.Info.AuthorEmail}, + {MDFieldMaintainer, meta.Info.Maintainer}, + {MDFieldMaintainerEmail, meta.Info.MaintainerEmail}, + {MDFieldLicense, meta.Info.License}, + {MDFieldRequiresPython, meta.Info.RequiresPython}, + } { + recField, jsonField := m[0], m[1] + if jsonField == "" { + continue + } + if _, err = wr.WriteFields(recfile.Field{ + Name: MDFieldToRecField[recField], + Value: jsonField, + }); err != nil { + log.Fatal(err) + } + } + for _, m := range []RecFieldToValuesMap{ + {MDFieldClassifier, meta.Info.Classifier}, + {MDFieldPlatform, meta.Info.Platform}, + {MDFieldSupportedPlatform, meta.Info.SupportedPlatform}, + {MDFieldRequiresDist, meta.Info.RequiresDist}, + {MDFieldRequiresExternal, meta.Info.RequiresExternal}, + {MDFieldProjectURL, meta.Info.ProjectURL}, + {MDFieldProvidesExtra, meta.Info.ProvidesExtra}, + } { + for _, v := range m.jsonFields { + if _, err = wr.WriteFields(recfile.Field{ + Name: MDFieldToRecField[m.recField], + Value: v, + }); err != nil { + log.Fatal(err) + } + } + } + description = meta.Info.Description + allReleases = meta.Releases + } else { + var metaStripped PkgMetaStripped + err = json.Unmarshal(body, &metaStripped) + if err != nil { + log.Println( + "error", r.RemoteAddr, "refresh-json", pkgName, + "can not parse JSON:", err, + ) + http.Error(w, "can not parse metadata JSON", http.StatusBadGateway) + return false + } + for _, m := range [][2]string{ + {MDFieldName, metaStripped.Info.Name}, + {MDFieldVersion, metaStripped.Info.Version}, + {MDFieldSummary, metaStripped.Info.Summary}, + {MDFieldDescriptionContentType, metaStripped.Info.DescriptionContentType}, + {MDFieldKeywords, metaStripped.Info.Keywords}, + {MDFieldHomePage, metaStripped.Info.HomePage}, + {MDFieldAuthor, metaStripped.Info.Author}, + {MDFieldAuthorEmail, metaStripped.Info.AuthorEmail}, + {MDFieldMaintainer, metaStripped.Info.Maintainer}, + {MDFieldMaintainerEmail, metaStripped.Info.MaintainerEmail}, + {MDFieldLicense, metaStripped.Info.License}, + {MDFieldRequiresPython, metaStripped.Info.RequiresPython}, + } { + recField, jsonField := m[0], m[1] + if jsonField == "" { + continue + } + if _, err = wr.WriteFields(recfile.Field{ + Name: MDFieldToRecField[recField], + Value: jsonField, + }); err != nil { + log.Fatal(err) + } + } + + for _, m := range []RecFieldToValuesMap{ + {MDFieldClassifier, metaStripped.Info.Classifier}, + {MDFieldRequiresDist, metaStripped.Info.RequiresDist}, + } { + for _, v := range m.jsonFields { + if _, err = wr.WriteFields(recfile.Field{ + Name: MDFieldToRecField[m.recField], + Value: v, + }); err != nil { + log.Fatal(err) + } + } + } + description = metaStripped.Info.Description + allReleases = metaStripped.Releases + } + lines := strings.Split(description, "\n") + if len(lines) > 0 { + if _, err = wr.WriteFieldMultiline( + MDFieldDescription, lines, + ); err != nil { + log.Fatal(err) + } + } + + if !mkdirForPkg(w, r, pkgName) { + return false + } + path := filepath.Join(dirPath, MDFile) + existing, err := os.ReadFile(path) + if err != nil || !bytes.Equal(existing, buf.Bytes()) { + if err = WriteFileSync(dirPath, path, buf.Bytes(), now); err != nil { + log.Println("error", r.RemoteAddr, "refresh-json", path, err) + http.Error(w, err.Error(), http.StatusInternalServerError) + return false + } + log.Println(r.RemoteAddr, "pypi", pkgName+"/"+MDFile, "touch") + } + } + mtimes := make(map[string]time.Time) + digestsBLAKE2b256 := make(map[string][]byte) + digestsSHA256 := make(map[string][]byte) + digestsSHA512 := make(map[string][]byte) + for _, releases := range allReleases { + for _, rel := range releases { + if rel.Filename == "" || rel.UploadTimeISO8601 == "" { + continue + } + t, err := time.Parse(time.RFC3339Nano, rel.UploadTimeISO8601) + if err != nil { + log.Println( + "error", r.RemoteAddr, "refresh-json", pkgName, + "can not parse upload_time:", err, + ) + http.Error(w, "can not parse metadata JSON", http.StatusBadGateway) + return false + } + mtimes[rel.Filename] = t.Truncate(time.Second) + if d := rel.Digests[HashAlgoBLAKE2b256]; d != "" { + digestsBLAKE2b256[rel.Filename], err = hex.DecodeString(d) + if err != nil { + log.Println( + "error", r.RemoteAddr, "refresh-json", pkgName, + "can not decode blake2b_256 digest:", err, + ) + http.Error(w, "can not parse metadata JSON", http.StatusBadGateway) + return false + } + } + if d := rel.Digests[HashAlgoSHA256]; d != "" { + digestsSHA256[rel.Filename], err = hex.DecodeString(d) + if err != nil { + log.Println( + "error", r.RemoteAddr, "refresh-json", pkgName, + "can not decode sha256 digest:", err, + ) + http.Error(w, "can not parse metadata JSON", http.StatusBadGateway) + return false + } + } + if d := rel.Digests[HashAlgoSHA512]; d != "" { + digestsSHA512[rel.Filename], err = hex.DecodeString(d) + if err != nil { + log.Println( + "error", r.RemoteAddr, "refresh-json", pkgName, + "can not decode sha512 digest:", err, + ) + http.Error(w, "can not parse metadata JSON", http.StatusBadGateway) + return false + } + } + } + } + + resp, err := c.Do(agentedReq(*PyPIURL + pkgName + "/")) if err != nil { log.Println("error", r.RemoteAddr, "refresh", pkgName, err) http.Error(w, err.Error(), http.StatusBadGateway) @@ -62,11 +308,14 @@ func refreshDir( } if resp.StatusCode != http.StatusOK { resp.Body.Close() - log.Println("error", r.RemoteAddr, "refresh", pkgName, "HTTP status:", resp.Status) + log.Println( + "error", r.RemoteAddr, "refresh", pkgName, + "HTTP status:", resp.Status, + ) http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway) return false } - body, err := ioutil.ReadAll(resp.Body) + body, err := io.ReadAll(resp.Body) resp.Body.Close() if err != nil { log.Println("error", r.RemoteAddr, "refresh", pkgName, err) @@ -76,9 +325,8 @@ func refreshDir( if !mkdirForPkg(w, r, pkgName) { return false } - dirPath := filepath.Join(*root, pkgName) for _, lineRaw := range bytes.Split(body, []byte("\n")) { - submatches := pkgPyPI.FindStringSubmatch(string(lineRaw)) + submatches := PkgPyPI.FindStringSubmatch(string(lineRaw)) if len(submatches) == 0 { continue } @@ -91,69 +339,95 @@ func refreshDir( return false } - if pkgURL.Fragment == "" { - log.Println(r.RemoteAddr, "pypi", filename, "no digest") - http.Error(w, "no digest provided", http.StatusBadGateway) - return false - } - digestInfo := strings.Split(pkgURL.Fragment, "=") - if len(digestInfo) == 1 { - // Ancient non PEP-0503 PyPIs, assume MD5 - digestInfo = []string{"md5", digestInfo[0]} - } else if len(digestInfo) != 2 { - log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest") - http.Error(w, "invalid digest provided", http.StatusBadGateway) - return false - } - digest, err := hex.DecodeString(digestInfo[1]) - if err != nil { - log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest") - http.Error(w, err.Error(), http.StatusBadGateway) - return false - } - hashAlgo := digestInfo[0] + var hashAlgo string var hasherNew func() hash.Hash - var hashSize int - switch hashAlgo { - case HashAlgoMD5: - hasherNew = md5.New - hashSize = md5.Size - case HashAlgoSHA256: + var digest []byte + if d := digestsBLAKE2b256[filename]; d != nil { + hasherNew = blake2b256New + hashAlgo = HashAlgoBLAKE2b256 + digest = d + } else if d := digestsSHA256[filename]; d != nil { hasherNew = sha256.New - hashSize = sha256.Size - case HashAlgoSHA512: + hashAlgo = HashAlgoSHA256 + digest = d + } else if d := digestsSHA512[filename]; d != nil { hasherNew = sha512.New - hashSize = sha512.Size - case HashAlgoBLAKE2b256: - hasherNew = blake2b256New - hashSize = blake2b.Size256 - default: - log.Println("error", r.RemoteAddr, "pypi", filename, "unknown digest", hashAlgo) - http.Error(w, "unknown digest algorithm", http.StatusBadGateway) - return false - } - if len(digest) != hashSize { - log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest length") - http.Error(w, "invalid digest length", http.StatusBadGateway) - return false + hashAlgo = HashAlgoSHA512 + digest = d + } else { + if pkgURL.Fragment == "" { + log.Println(r.RemoteAddr, "pypi", filename, "no digest") + http.Error(w, "no digest provided", http.StatusBadGateway) + return false + } + digestInfo := strings.Split(pkgURL.Fragment, "=") + if len(digestInfo) == 1 { + // Ancient non PEP-0503 PyPIs, assume MD5 + digestInfo = []string{"md5", digestInfo[0]} + } else if len(digestInfo) != 2 { + log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest") + http.Error(w, "invalid digest provided", http.StatusBadGateway) + return false + } + var err error + digest, err = hex.DecodeString(digestInfo[1]) + if err != nil { + log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest") + http.Error(w, err.Error(), http.StatusBadGateway) + return false + } + hashAlgo = digestInfo[0] + var hashSize int + switch hashAlgo { + case HashAlgoBLAKE2b256: + hasherNew = blake2b256New + hashSize = blake2b.Size256 + case HashAlgoSHA256: + hasherNew = sha256.New + hashSize = sha256.Size + case HashAlgoSHA512: + hasherNew = sha512.New + hashSize = sha512.Size + case HashAlgoMD5: + hasherNew = md5.New + hashSize = md5.Size + default: + log.Println( + "error", r.RemoteAddr, "pypi", + filename, "unknown digest", hashAlgo, + ) + http.Error(w, "unknown digest algorithm", http.StatusBadGateway) + return false + } + if len(digest) != hashSize { + log.Println( + "error", r.RemoteAddr, "pypi", + filename, "invalid digest length") + http.Error(w, "invalid digest length", http.StatusBadGateway) + return false + } } pkgURL.Fragment = "" if pkgURL.Host == "" { - uri = pypiURLParsed.ResolveReference(pkgURL).String() + uri = PyPIURLParsed.ResolveReference(pkgURL).String() } else { uri = pkgURL.String() } + mtime, mtimeExists := mtimes[filename] + if !mtimeExists { + mtime = now + } path := filepath.Join(dirPath, filename) if filename == filenameGet { - if killed { + if Killed { // Skip heavy remote call, when shutting down http.Error(w, "shutting down", http.StatusInternalServerError) return false } log.Println(r.RemoteAddr, "pypi", filename, "download") - resp, err = http.Get(uri) + resp, err = c.Do(agentedReq(uri)) if err != nil { log.Println("error", r.RemoteAddr, "pypi", filename, "download", err) http.Error(w, err.Error(), http.StatusBadGateway) @@ -170,6 +444,7 @@ func refreshDir( return false } hasher := hasherNew() + hasherBLAKE2b256 := blake2b256New() hasherSHA256 := sha256.New() dst, err := TempFile(dirPath) if err != nil { @@ -179,6 +454,9 @@ func refreshDir( } dstBuf := bufio.NewWriter(dst) wrs := []io.Writer{hasher, dstBuf} + if hashAlgo != HashAlgoBLAKE2b256 { + wrs = append(wrs, hasherBLAKE2b256) + } if hashAlgo != HashAlgoSHA256 { wrs = append(wrs, hasherSHA256) } @@ -197,25 +475,40 @@ func refreshDir( http.Error(w, err.Error(), http.StatusInternalServerError) return false } - if bytes.Compare(hasher.Sum(nil), digest) != 0 { + if !bytes.Equal(hasher.Sum(nil), digest) { log.Println(r.RemoteAddr, "pypi", filename, "digest mismatch") os.Remove(dst.Name()) dst.Close() http.Error(w, "digest mismatch", http.StatusBadGateway) return false } - if err = dst.Sync(); err != nil { + if digestStored, err := os.ReadFile(path + "." + hashAlgo); err == nil && + !bytes.Equal(digest, digestStored) { + err = errors.New("stored digest mismatch") + log.Println("error", r.RemoteAddr, "pypi", filename, err) os.Remove(dst.Name()) dst.Close() - log.Println("error", r.RemoteAddr, "pypi", filename, err) http.Error(w, err.Error(), http.StatusInternalServerError) return false } + if !NoSync { + if err = dst.Sync(); err != nil { + os.Remove(dst.Name()) + dst.Close() + log.Println("error", r.RemoteAddr, "pypi", filename, err) + http.Error(w, err.Error(), http.StatusInternalServerError) + return false + } + } if err = dst.Close(); err != nil { log.Println("error", r.RemoteAddr, "pypi", filename, err) http.Error(w, err.Error(), http.StatusInternalServerError) return false } + if err = os.Chtimes(dst.Name(), mtime, mtime); err != nil { + log.Println("error", r.RemoteAddr, "pypi", filename, err) + http.Error(w, err.Error(), http.StatusInternalServerError) + } if err = os.Rename(dst.Name(), path); err != nil { log.Println("error", r.RemoteAddr, "pypi", filename, err) http.Error(w, err.Error(), http.StatusInternalServerError) @@ -226,55 +519,73 @@ func refreshDir( http.Error(w, err.Error(), http.StatusInternalServerError) return false } - if hashAlgo != HashAlgoSHA256 { - hashAlgo = HashAlgoSHA256 - digest = hasherSHA256.Sum(nil) - for _, algo := range knownHashAlgos[1:] { - os.Remove(path + "." + algo) - } - } - } - if filename == filenameGet || gpgUpdate { - if _, err = os.Stat(path); err != nil { - goto GPGSigSkip - } - resp, err := http.Get(uri + GPGSigExt) - if err != nil { - goto GPGSigSkip + + var digestBLAKE2b256 []byte + var digestSHA256 []byte + if hashAlgo == HashAlgoBLAKE2b256 { + digestBLAKE2b256 = hasher.Sum(nil) + } else { + digestBLAKE2b256 = hasherBLAKE2b256.Sum(nil) } - if resp.StatusCode != http.StatusOK { - resp.Body.Close() - goto GPGSigSkip + if hashAlgo == HashAlgoSHA256 { + digestSHA256 = hasher.Sum(nil) + } else { + digestSHA256 = hasherSHA256.Sum(nil) } - sig, err := ioutil.ReadAll(resp.Body) - resp.Body.Close() - if err != nil { - goto GPGSigSkip - } - if !bytes.HasPrefix(sig, []byte("-----BEGIN PGP SIGNATURE-----")) { - log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "non PGP") - goto GPGSigSkip + if err = WriteFileSync( + dirPath, path+"."+HashAlgoBLAKE2b256, + digestBLAKE2b256, mtime, + ); err != nil { + log.Println( + "error", r.RemoteAddr, "pypi", + path+"."+HashAlgoBLAKE2b256, err, + ) + http.Error(w, err.Error(), http.StatusInternalServerError) + return false } - if err = WriteFileSync(dirPath, path+GPGSigExt, sig); err != nil { - log.Println("error", r.RemoteAddr, "pypi", filename+GPGSigExt, err) + if err = WriteFileSync( + dirPath, path+"."+HashAlgoSHA256, + digestSHA256, mtime, + ); err != nil { + log.Println( + "error", r.RemoteAddr, "pypi", + path+"."+HashAlgoSHA256, err, + ) http.Error(w, err.Error(), http.StatusInternalServerError) return false } - log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "downloaded") + for _, algo := range KnownHashAlgos[2:] { + os.Remove(path + "." + algo) + } + digest = nil + } + if mtimeExists { + stat, err := os.Stat(path) + if err == nil && !stat.ModTime().Truncate(time.Second).Equal(mtime) { + log.Println(r.RemoteAddr, "pypi", filename, "touch") + if err = os.Chtimes(path, mtime, mtime); err != nil { + log.Println("error", r.RemoteAddr, "pypi", filename, err) + http.Error(w, err.Error(), http.StatusInternalServerError) + } + } + } + + if digest == nil { + continue } - GPGSigSkip: path = path + "." + hashAlgo - _, err = os.Stat(path) - if err == nil { + stat, err := os.Stat(path) + if err == nil && (!mtimeExists || + (mtimeExists && stat.ModTime().Truncate(time.Second).Equal(mtime))) { continue } - if !os.IsNotExist(err) { + if err != nil && !os.IsNotExist(err) { log.Println("error", r.RemoteAddr, "pypi", path, err) http.Error(w, err.Error(), http.StatusInternalServerError) return false } log.Println(r.RemoteAddr, "pypi", filename, "touch") - if err = WriteFileSync(dirPath, path, digest); err != nil { + if err = WriteFileSync(dirPath, path, digest, mtime); err != nil { log.Println("error", r.RemoteAddr, "pypi", path, err) http.Error(w, err.Error(), http.StatusInternalServerError) return false