X-Git-Url: http://www.git.cypherpunks.ru/?p=gocheese.git;a=blobdiff_plain;f=refresh.go;h=ea3af19c049d4738273a63f9189267577914e5f7;hp=092bd0125fa9ae555cb8a18b14c89ffd1440967f;hb=HEAD;hpb=60834a0713d5dcc6a9911511cb8618ce7358c824 diff --git a/refresh.go b/refresh.go index 092bd01..3d134a0 100644 --- a/refresh.go +++ b/refresh.go @@ -1,19 +1,17 @@ -/* -GoCheese -- Python private package repository and caching proxy -Copyright (C) 2019-2021 Sergey Matveev - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, version 3 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . -*/ +// GoCheese -- Python private package repository and caching proxy +// Copyright (C) 2019-2024 Sergey Matveev +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 3 of the License. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . package main @@ -25,9 +23,9 @@ import ( "crypto/sha512" "encoding/hex" "encoding/json" + "errors" "hash" "io" - "io/ioutil" "log" "net/http" "net/url" @@ -42,11 +40,10 @@ import ( ) const ( + HashAlgoBLAKE2b256 = "blake2b_256" HashAlgoSHA256 = "sha256" - HashAlgoBLAKE2b256 = "blake2_256" HashAlgoSHA512 = "sha512" HashAlgoMD5 = "md5" - GPGSigExt = ".asc" InternalFlag = ".internal" ) @@ -55,8 +52,8 @@ var ( PyPIURLParsed *url.URL PyPIHTTPTransport http.Transport KnownHashAlgos []string = []string{ - HashAlgoSHA256, HashAlgoBLAKE2b256, + HashAlgoSHA256, HashAlgoSHA512, HashAlgoMD5, } @@ -73,23 +70,27 @@ func blake2b256New() hash.Hash { func agentedReq(url string) *http.Request { req, err := http.NewRequest("GET", url, nil) if err != nil { - log.Fatalln(err) + log.Fatal(err) } req.Header.Set("User-Agent", UserAgent) return req } +type RecFieldToValuesMap struct { + recField string + jsonFields []string +} + func refreshDir( w http.ResponseWriter, r *http.Request, pkgName, filenameGet string, - gpgUpdate bool, ) bool { - if _, err := os.Stat(filepath.Join(*Root, pkgName, InternalFlag)); err == nil { + if _, err := os.Stat(filepath.Join(Root, pkgName, InternalFlag)); err == nil { return true } c := http.Client{Transport: &PyPIHTTPTransport} - dirPath := filepath.Join(*Root, pkgName) + dirPath := filepath.Join(Root, pkgName) now := time.Now() var allReleases map[string][]*PkgReleaseInfo @@ -109,7 +110,13 @@ func refreshDir( http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway) return false } - body, err := ioutil.ReadAll(resp.Body) + body, err := io.ReadAll(resp.Body) + if err != nil { + resp.Body.Close() + log.Println("error", r.RemoteAddr, "refresh-json", pkgName, err) + http.Error(w, "can not read body", http.StatusBadGateway) + return false + } resp.Body.Close() var buf bytes.Buffer var description string @@ -117,45 +124,46 @@ func refreshDir( var meta PkgMeta err = json.Unmarshal(body, &meta) if err == nil { - for recField, jsonField := range map[string]string{ - MetadataFieldName: meta.Info.Name, - MetadataFieldVersion: meta.Info.Version, - MetadataFieldSummary: meta.Info.Summary, - MetadataFieldDescriptionContentType: meta.Info.DescriptionContentType, - MetadataFieldKeywords: meta.Info.Keywords, - MetadataFieldHomePage: meta.Info.HomePage, - MetadataFieldAuthor: meta.Info.Author, - MetadataFieldAuthorEmail: meta.Info.AuthorEmail, - MetadataFieldMaintainer: meta.Info.Maintainer, - MetadataFieldMaintainerEmail: meta.Info.MaintainerEmail, - MetadataFieldLicense: meta.Info.License, - MetadataFieldRequiresPython: meta.Info.RequiresPython, + for _, m := range [][2]string{ + {MDFieldName, meta.Info.Name}, + {MDFieldVersion, meta.Info.Version}, + {MDFieldSummary, meta.Info.Summary}, + {MDFieldDescriptionContentType, meta.Info.DescriptionContentType}, + {MDFieldKeywords, meta.Info.Keywords}, + {MDFieldHomePage, meta.Info.HomePage}, + {MDFieldAuthor, meta.Info.Author}, + {MDFieldAuthorEmail, meta.Info.AuthorEmail}, + {MDFieldMaintainer, meta.Info.Maintainer}, + {MDFieldMaintainerEmail, meta.Info.MaintainerEmail}, + {MDFieldLicense, meta.Info.License}, + {MDFieldRequiresPython, meta.Info.RequiresPython}, } { + recField, jsonField := m[0], m[1] if jsonField == "" { continue } if _, err = wr.WriteFields(recfile.Field{ - Name: metadataFieldToRecField(recField), + Name: MDFieldToRecField[recField], Value: jsonField, }); err != nil { - log.Fatalln(err) + log.Fatal(err) } } - for recField, jsonFields := range map[string][]string{ - MetadataFieldClassifier: meta.Info.Classifier, - MetadataFieldPlatform: meta.Info.Platform, - MetadataFieldSupportedPlatform: meta.Info.SupportedPlatform, - MetadataFieldRequiresDist: meta.Info.RequiresDist, - MetadataFieldRequiresExternal: meta.Info.RequiresExternal, - MetadataFieldProjectURL: meta.Info.ProjectURL, - MetadataFieldProvidesExtra: meta.Info.ProvidesExtra, + for _, m := range []RecFieldToValuesMap{ + {MDFieldClassifier, meta.Info.Classifier}, + {MDFieldPlatform, meta.Info.Platform}, + {MDFieldSupportedPlatform, meta.Info.SupportedPlatform}, + {MDFieldRequiresDist, meta.Info.RequiresDist}, + {MDFieldRequiresExternal, meta.Info.RequiresExternal}, + {MDFieldProjectURL, meta.Info.ProjectURL}, + {MDFieldProvidesExtra, meta.Info.ProvidesExtra}, } { - for _, v := range jsonFields { + for _, v := range m.jsonFields { if _, err = wr.WriteFields(recfile.Field{ - Name: metadataFieldToRecField(recField), + Name: MDFieldToRecField[m.recField], Value: v, }); err != nil { - log.Fatalln(err) + log.Fatal(err) } } } @@ -172,41 +180,42 @@ func refreshDir( http.Error(w, "can not parse metadata JSON", http.StatusBadGateway) return false } - for recField, jsonField := range map[string]string{ - MetadataFieldName: metaStripped.Info.Name, - MetadataFieldVersion: metaStripped.Info.Version, - MetadataFieldSummary: metaStripped.Info.Summary, - MetadataFieldDescriptionContentType: metaStripped.Info.DescriptionContentType, - MetadataFieldKeywords: metaStripped.Info.Keywords, - MetadataFieldHomePage: metaStripped.Info.HomePage, - MetadataFieldAuthor: metaStripped.Info.Author, - MetadataFieldAuthorEmail: metaStripped.Info.AuthorEmail, - MetadataFieldMaintainer: metaStripped.Info.Maintainer, - MetadataFieldMaintainerEmail: metaStripped.Info.MaintainerEmail, - MetadataFieldLicense: metaStripped.Info.License, - MetadataFieldRequiresPython: metaStripped.Info.RequiresPython, + for _, m := range [][2]string{ + {MDFieldName, metaStripped.Info.Name}, + {MDFieldVersion, metaStripped.Info.Version}, + {MDFieldSummary, metaStripped.Info.Summary}, + {MDFieldDescriptionContentType, metaStripped.Info.DescriptionContentType}, + {MDFieldKeywords, metaStripped.Info.Keywords}, + {MDFieldHomePage, metaStripped.Info.HomePage}, + {MDFieldAuthor, metaStripped.Info.Author}, + {MDFieldAuthorEmail, metaStripped.Info.AuthorEmail}, + {MDFieldMaintainer, metaStripped.Info.Maintainer}, + {MDFieldMaintainerEmail, metaStripped.Info.MaintainerEmail}, + {MDFieldLicense, metaStripped.Info.License}, + {MDFieldRequiresPython, metaStripped.Info.RequiresPython}, } { + recField, jsonField := m[0], m[1] if jsonField == "" { continue } if _, err = wr.WriteFields(recfile.Field{ - Name: metadataFieldToRecField(recField), + Name: MDFieldToRecField[recField], Value: jsonField, }); err != nil { - log.Fatalln(err) + log.Fatal(err) } } - for recField, jsonFields := range map[string][]string{ - MetadataFieldClassifier: metaStripped.Info.Classifier, - MetadataFieldRequiresDist: metaStripped.Info.RequiresDist, + for _, m := range []RecFieldToValuesMap{ + {MDFieldClassifier, metaStripped.Info.Classifier}, + {MDFieldRequiresDist, metaStripped.Info.RequiresDist}, } { - for _, v := range jsonFields { + for _, v := range m.jsonFields { if _, err = wr.WriteFields(recfile.Field{ - Name: metadataFieldToRecField(recField), + Name: MDFieldToRecField[m.recField], Value: v, }); err != nil { - log.Fatalln(err) + log.Fatal(err) } } } @@ -216,27 +225,30 @@ func refreshDir( lines := strings.Split(description, "\n") if len(lines) > 0 { if _, err = wr.WriteFieldMultiline( - MetadataFieldDescription, lines, + MDFieldDescription, lines, ); err != nil { - log.Fatalln(err) + log.Fatal(err) } } if !mkdirForPkg(w, r, pkgName) { return false } - path := filepath.Join(dirPath, MetadataFile) - existing, err := ioutil.ReadFile(path) - if err != nil || bytes.Compare(existing, buf.Bytes()) != 0 { + path := filepath.Join(dirPath, MDFile) + existing, err := os.ReadFile(path) + if err != nil || !bytes.Equal(existing, buf.Bytes()) { if err = WriteFileSync(dirPath, path, buf.Bytes(), now); err != nil { log.Println("error", r.RemoteAddr, "refresh-json", path, err) http.Error(w, err.Error(), http.StatusInternalServerError) return false } - log.Println(r.RemoteAddr, "pypi", pkgName+"."+MetadataFile, "touch") + log.Println(r.RemoteAddr, "pypi", pkgName+"/"+MDFile, "touch") } } mtimes := make(map[string]time.Time) + digestsBLAKE2b256 := make(map[string][]byte) + digestsSHA256 := make(map[string][]byte) + digestsSHA512 := make(map[string][]byte) for _, releases := range allReleases { for _, rel := range releases { if rel.Filename == "" || rel.UploadTimeISO8601 == "" { @@ -252,6 +264,39 @@ func refreshDir( return false } mtimes[rel.Filename] = t.Truncate(time.Second) + if d := rel.Digests[HashAlgoBLAKE2b256]; d != "" { + digestsBLAKE2b256[rel.Filename], err = hex.DecodeString(d) + if err != nil { + log.Println( + "error", r.RemoteAddr, "refresh-json", pkgName, + "can not decode blake2b_256 digest:", err, + ) + http.Error(w, "can not parse metadata JSON", http.StatusBadGateway) + return false + } + } + if d := rel.Digests[HashAlgoSHA256]; d != "" { + digestsSHA256[rel.Filename], err = hex.DecodeString(d) + if err != nil { + log.Println( + "error", r.RemoteAddr, "refresh-json", pkgName, + "can not decode sha256 digest:", err, + ) + http.Error(w, "can not parse metadata JSON", http.StatusBadGateway) + return false + } + } + if d := rel.Digests[HashAlgoSHA512]; d != "" { + digestsSHA512[rel.Filename], err = hex.DecodeString(d) + if err != nil { + log.Println( + "error", r.RemoteAddr, "refresh-json", pkgName, + "can not decode sha512 digest:", err, + ) + http.Error(w, "can not parse metadata JSON", http.StatusBadGateway) + return false + } + } } } @@ -270,7 +315,7 @@ func refreshDir( http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway) return false } - body, err := ioutil.ReadAll(resp.Body) + body, err := io.ReadAll(resp.Body) resp.Body.Close() if err != nil { log.Println("error", r.RemoteAddr, "refresh", pkgName, err) @@ -294,51 +339,73 @@ func refreshDir( return false } - if pkgURL.Fragment == "" { - log.Println(r.RemoteAddr, "pypi", filename, "no digest") - http.Error(w, "no digest provided", http.StatusBadGateway) - return false - } - digestInfo := strings.Split(pkgURL.Fragment, "=") - if len(digestInfo) == 1 { - // Ancient non PEP-0503 PyPIs, assume MD5 - digestInfo = []string{"md5", digestInfo[0]} - } else if len(digestInfo) != 2 { - log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest") - http.Error(w, "invalid digest provided", http.StatusBadGateway) - return false - } - digest, err := hex.DecodeString(digestInfo[1]) - if err != nil { - log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest") - http.Error(w, err.Error(), http.StatusBadGateway) - return false - } - hashAlgo := digestInfo[0] + var hashAlgo string var hasherNew func() hash.Hash - var hashSize int - switch hashAlgo { - case HashAlgoMD5: - hasherNew = md5.New - hashSize = md5.Size - case HashAlgoSHA256: + var digest []byte + if d := digestsBLAKE2b256[filename]; d != nil { + hasherNew = blake2b256New + hashAlgo = HashAlgoBLAKE2b256 + digest = d + } else if d := digestsSHA256[filename]; d != nil { hasherNew = sha256.New - hashSize = sha256.Size - case HashAlgoSHA512: + hashAlgo = HashAlgoSHA256 + digest = d + } else if d := digestsSHA512[filename]; d != nil { hasherNew = sha512.New - hashSize = sha512.Size - case HashAlgoBLAKE2b256: - hasherNew = blake2b256New - hashSize = blake2b.Size256 - default: - log.Println("error", r.RemoteAddr, "pypi", filename, "unknown digest", hashAlgo) - http.Error(w, "unknown digest algorithm", http.StatusBadGateway) - return false - } - if len(digest) != hashSize { - log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest length") - http.Error(w, "invalid digest length", http.StatusBadGateway) - return false + hashAlgo = HashAlgoSHA512 + digest = d + } else { + if pkgURL.Fragment == "" { + log.Println(r.RemoteAddr, "pypi", filename, "no digest") + http.Error(w, "no digest provided", http.StatusBadGateway) + return false + } + digestInfo := strings.Split(pkgURL.Fragment, "=") + if len(digestInfo) == 1 { + // Ancient non PEP-0503 PyPIs, assume MD5 + digestInfo = []string{"md5", digestInfo[0]} + } else if len(digestInfo) != 2 { + log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest") + http.Error(w, "invalid digest provided", http.StatusBadGateway) + return false + } + var err error + digest, err = hex.DecodeString(digestInfo[1]) + if err != nil { + log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest") + http.Error(w, err.Error(), http.StatusBadGateway) + return false + } + hashAlgo = digestInfo[0] + var hashSize int + switch hashAlgo { + case HashAlgoBLAKE2b256: + hasherNew = blake2b256New + hashSize = blake2b.Size256 + case HashAlgoSHA256: + hasherNew = sha256.New + hashSize = sha256.Size + case HashAlgoSHA512: + hasherNew = sha512.New + hashSize = sha512.Size + case HashAlgoMD5: + hasherNew = md5.New + hashSize = md5.Size + default: + log.Println( + "error", r.RemoteAddr, "pypi", + filename, "unknown digest", hashAlgo, + ) + http.Error(w, "unknown digest algorithm", http.StatusBadGateway) + return false + } + if len(digest) != hashSize { + log.Println( + "error", r.RemoteAddr, "pypi", + filename, "invalid digest length") + http.Error(w, "invalid digest length", http.StatusBadGateway) + return false + } } pkgURL.Fragment = "" @@ -377,6 +444,7 @@ func refreshDir( return false } hasher := hasherNew() + hasherBLAKE2b256 := blake2b256New() hasherSHA256 := sha256.New() dst, err := TempFile(dirPath) if err != nil { @@ -386,6 +454,9 @@ func refreshDir( } dstBuf := bufio.NewWriter(dst) wrs := []io.Writer{hasher, dstBuf} + if hashAlgo != HashAlgoBLAKE2b256 { + wrs = append(wrs, hasherBLAKE2b256) + } if hashAlgo != HashAlgoSHA256 { wrs = append(wrs, hasherSHA256) } @@ -404,13 +475,22 @@ func refreshDir( http.Error(w, err.Error(), http.StatusInternalServerError) return false } - if bytes.Compare(hasher.Sum(nil), digest) != 0 { + if !bytes.Equal(hasher.Sum(nil), digest) { log.Println(r.RemoteAddr, "pypi", filename, "digest mismatch") os.Remove(dst.Name()) dst.Close() http.Error(w, "digest mismatch", http.StatusBadGateway) return false } + if digestStored, err := os.ReadFile(path + "." + hashAlgo); err == nil && + !bytes.Equal(digest, digestStored) { + err = errors.New("stored digest mismatch") + log.Println("error", r.RemoteAddr, "pypi", filename, err) + os.Remove(dst.Name()) + dst.Close() + http.Error(w, err.Error(), http.StatusInternalServerError) + return false + } if !NoSync { if err = dst.Sync(); err != nil { os.Remove(dst.Name()) @@ -439,69 +519,64 @@ func refreshDir( http.Error(w, err.Error(), http.StatusInternalServerError) return false } - if hashAlgo != HashAlgoSHA256 { - hashAlgo = HashAlgoSHA256 - digest = hasherSHA256.Sum(nil) - for _, algo := range KnownHashAlgos[1:] { - os.Remove(path + "." + algo) - } - } - } - if mtimeExists { - stat, err := os.Stat(path) - if err == nil && !stat.ModTime().Truncate(time.Second).Equal(mtime) { - log.Println(r.RemoteAddr, "pypi", filename, "touch") - if err = os.Chtimes(path, mtime, mtime); err != nil { - log.Println("error", r.RemoteAddr, "pypi", filename, err) - http.Error(w, err.Error(), http.StatusInternalServerError) - } - } - } - if filename == filenameGet || gpgUpdate { - if _, err = os.Stat(path); err != nil { - goto GPGSigSkip - } - resp, err := c.Do(agentedReq(uri + GPGSigExt)) - if err != nil { - goto GPGSigSkip + var digestBLAKE2b256 []byte + var digestSHA256 []byte + if hashAlgo == HashAlgoBLAKE2b256 { + digestBLAKE2b256 = hasher.Sum(nil) + } else { + digestBLAKE2b256 = hasherBLAKE2b256.Sum(nil) } - if resp.StatusCode != http.StatusOK { - resp.Body.Close() - goto GPGSigSkip + if hashAlgo == HashAlgoSHA256 { + digestSHA256 = hasher.Sum(nil) + } else { + digestSHA256 = hasherSHA256.Sum(nil) } - sig, err := ioutil.ReadAll(resp.Body) - resp.Body.Close() - if err != nil { - goto GPGSigSkip - } - if !bytes.HasPrefix(sig, []byte("-----BEGIN PGP SIGNATURE-----")) { - log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "non PGP") - goto GPGSigSkip + if err = WriteFileSync( + dirPath, path+"."+HashAlgoBLAKE2b256, + digestBLAKE2b256, mtime, + ); err != nil { + log.Println( + "error", r.RemoteAddr, "pypi", + path+"."+HashAlgoBLAKE2b256, err, + ) + http.Error(w, err.Error(), http.StatusInternalServerError) + return false } - if err = WriteFileSync(dirPath, path+GPGSigExt, sig, mtime); err != nil { - log.Println("error", r.RemoteAddr, "pypi", filename+GPGSigExt, err) + if err = WriteFileSync( + dirPath, path+"."+HashAlgoSHA256, + digestSHA256, mtime, + ); err != nil { + log.Println( + "error", r.RemoteAddr, "pypi", + path+"."+HashAlgoSHA256, err, + ) http.Error(w, err.Error(), http.StatusInternalServerError) return false } - log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "downloaded") + for _, algo := range KnownHashAlgos[2:] { + os.Remove(path + "." + algo) + } + digest = nil } if mtimeExists { - stat, err := os.Stat(path + GPGSigExt) + stat, err := os.Stat(path) if err == nil && !stat.ModTime().Truncate(time.Second).Equal(mtime) { - log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "touch") - if err = os.Chtimes(path+GPGSigExt, mtime, mtime); err != nil { + log.Println(r.RemoteAddr, "pypi", filename, "touch") + if err = os.Chtimes(path, mtime, mtime); err != nil { log.Println("error", r.RemoteAddr, "pypi", filename, err) http.Error(w, err.Error(), http.StatusInternalServerError) } } } - GPGSigSkip: + if digest == nil { + continue + } path = path + "." + hashAlgo stat, err := os.Stat(path) - if err == nil && - (mtimeExists && stat.ModTime().Truncate(time.Second).Equal(mtime)) { + if err == nil && (!mtimeExists || + (mtimeExists && stat.ModTime().Truncate(time.Second).Equal(mtime))) { continue } if err != nil && !os.IsNotExist(err) {