X-Git-Url: http://www.git.cypherpunks.ru/?p=gocheese.git;a=blobdiff_plain;f=refresh.go;h=ea3af19c049d4738273a63f9189267577914e5f7;hp=59c8ceb9f5dd7c2b6288e27eb1d1cc0c9f6574e9;hb=HEAD;hpb=3a658c891e7f4f8966a83cd709b2b7468226457f diff --git a/refresh.go b/refresh.go index 59c8ceb..3d134a0 100644 --- a/refresh.go +++ b/refresh.go @@ -1,19 +1,17 @@ -/* -GoCheese -- Python private package repository and caching proxy -Copyright (C) 2019-2021 Sergey Matveev - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, version 3 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . -*/ +// GoCheese -- Python private package repository and caching proxy +// Copyright (C) 2019-2024 Sergey Matveev +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 3 of the License. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . package main @@ -25,9 +23,9 @@ import ( "crypto/sha512" "encoding/hex" "encoding/json" + "errors" "hash" "io" - "io/ioutil" "log" "net/http" "net/url" @@ -42,11 +40,10 @@ import ( ) const ( + HashAlgoBLAKE2b256 = "blake2b_256" HashAlgoSHA256 = "sha256" - HashAlgoBLAKE2b256 = "blake2_256" HashAlgoSHA512 = "sha512" HashAlgoMD5 = "md5" - GPGSigExt = ".asc" InternalFlag = ".internal" ) @@ -55,8 +52,8 @@ var ( PyPIURLParsed *url.URL PyPIHTTPTransport http.Transport KnownHashAlgos []string = []string{ - HashAlgoSHA256, HashAlgoBLAKE2b256, + HashAlgoSHA256, HashAlgoSHA512, HashAlgoMD5, } @@ -73,7 +70,7 @@ func blake2b256New() hash.Hash { func agentedReq(url string) *http.Request { req, err := http.NewRequest("GET", url, nil) if err != nil { - log.Fatalln(err) + log.Fatal(err) } req.Header.Set("User-Agent", UserAgent) return req @@ -88,7 +85,6 @@ func refreshDir( w http.ResponseWriter, r *http.Request, pkgName, filenameGet string, - gpgUpdate bool, ) bool { if _, err := os.Stat(filepath.Join(Root, pkgName, InternalFlag)); err == nil { return true @@ -114,7 +110,13 @@ func refreshDir( http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway) return false } - body, err := ioutil.ReadAll(resp.Body) + body, err := io.ReadAll(resp.Body) + if err != nil { + resp.Body.Close() + log.Println("error", r.RemoteAddr, "refresh-json", pkgName, err) + http.Error(w, "can not read body", http.StatusBadGateway) + return false + } resp.Body.Close() var buf bytes.Buffer var description string @@ -123,45 +125,45 @@ func refreshDir( err = json.Unmarshal(body, &meta) if err == nil { for _, m := range [][2]string{ - {MetadataFieldName, meta.Info.Name}, - {MetadataFieldVersion, meta.Info.Version}, - {MetadataFieldSummary, meta.Info.Summary}, - {MetadataFieldDescriptionContentType, meta.Info.DescriptionContentType}, - {MetadataFieldKeywords, meta.Info.Keywords}, - {MetadataFieldHomePage, meta.Info.HomePage}, - {MetadataFieldAuthor, meta.Info.Author}, - {MetadataFieldAuthorEmail, meta.Info.AuthorEmail}, - {MetadataFieldMaintainer, meta.Info.Maintainer}, - {MetadataFieldMaintainerEmail, meta.Info.MaintainerEmail}, - {MetadataFieldLicense, meta.Info.License}, - {MetadataFieldRequiresPython, meta.Info.RequiresPython}, + {MDFieldName, meta.Info.Name}, + {MDFieldVersion, meta.Info.Version}, + {MDFieldSummary, meta.Info.Summary}, + {MDFieldDescriptionContentType, meta.Info.DescriptionContentType}, + {MDFieldKeywords, meta.Info.Keywords}, + {MDFieldHomePage, meta.Info.HomePage}, + {MDFieldAuthor, meta.Info.Author}, + {MDFieldAuthorEmail, meta.Info.AuthorEmail}, + {MDFieldMaintainer, meta.Info.Maintainer}, + {MDFieldMaintainerEmail, meta.Info.MaintainerEmail}, + {MDFieldLicense, meta.Info.License}, + {MDFieldRequiresPython, meta.Info.RequiresPython}, } { recField, jsonField := m[0], m[1] if jsonField == "" { continue } if _, err = wr.WriteFields(recfile.Field{ - Name: metadataFieldToRecField(recField), + Name: MDFieldToRecField[recField], Value: jsonField, }); err != nil { - log.Fatalln(err) + log.Fatal(err) } } for _, m := range []RecFieldToValuesMap{ - {MetadataFieldClassifier, meta.Info.Classifier}, - {MetadataFieldPlatform, meta.Info.Platform}, - {MetadataFieldSupportedPlatform, meta.Info.SupportedPlatform}, - {MetadataFieldRequiresDist, meta.Info.RequiresDist}, - {MetadataFieldRequiresExternal, meta.Info.RequiresExternal}, - {MetadataFieldProjectURL, meta.Info.ProjectURL}, - {MetadataFieldProvidesExtra, meta.Info.ProvidesExtra}, + {MDFieldClassifier, meta.Info.Classifier}, + {MDFieldPlatform, meta.Info.Platform}, + {MDFieldSupportedPlatform, meta.Info.SupportedPlatform}, + {MDFieldRequiresDist, meta.Info.RequiresDist}, + {MDFieldRequiresExternal, meta.Info.RequiresExternal}, + {MDFieldProjectURL, meta.Info.ProjectURL}, + {MDFieldProvidesExtra, meta.Info.ProvidesExtra}, } { for _, v := range m.jsonFields { if _, err = wr.WriteFields(recfile.Field{ - Name: metadataFieldToRecField(m.recField), + Name: MDFieldToRecField[m.recField], Value: v, }); err != nil { - log.Fatalln(err) + log.Fatal(err) } } } @@ -179,41 +181,41 @@ func refreshDir( return false } for _, m := range [][2]string{ - {MetadataFieldName, metaStripped.Info.Name}, - {MetadataFieldVersion, metaStripped.Info.Version}, - {MetadataFieldSummary, metaStripped.Info.Summary}, - {MetadataFieldDescriptionContentType, metaStripped.Info.DescriptionContentType}, - {MetadataFieldKeywords, metaStripped.Info.Keywords}, - {MetadataFieldHomePage, metaStripped.Info.HomePage}, - {MetadataFieldAuthor, metaStripped.Info.Author}, - {MetadataFieldAuthorEmail, metaStripped.Info.AuthorEmail}, - {MetadataFieldMaintainer, metaStripped.Info.Maintainer}, - {MetadataFieldMaintainerEmail, metaStripped.Info.MaintainerEmail}, - {MetadataFieldLicense, metaStripped.Info.License}, - {MetadataFieldRequiresPython, metaStripped.Info.RequiresPython}, + {MDFieldName, metaStripped.Info.Name}, + {MDFieldVersion, metaStripped.Info.Version}, + {MDFieldSummary, metaStripped.Info.Summary}, + {MDFieldDescriptionContentType, metaStripped.Info.DescriptionContentType}, + {MDFieldKeywords, metaStripped.Info.Keywords}, + {MDFieldHomePage, metaStripped.Info.HomePage}, + {MDFieldAuthor, metaStripped.Info.Author}, + {MDFieldAuthorEmail, metaStripped.Info.AuthorEmail}, + {MDFieldMaintainer, metaStripped.Info.Maintainer}, + {MDFieldMaintainerEmail, metaStripped.Info.MaintainerEmail}, + {MDFieldLicense, metaStripped.Info.License}, + {MDFieldRequiresPython, metaStripped.Info.RequiresPython}, } { recField, jsonField := m[0], m[1] if jsonField == "" { continue } if _, err = wr.WriteFields(recfile.Field{ - Name: metadataFieldToRecField(recField), + Name: MDFieldToRecField[recField], Value: jsonField, }); err != nil { - log.Fatalln(err) + log.Fatal(err) } } for _, m := range []RecFieldToValuesMap{ - {MetadataFieldClassifier, metaStripped.Info.Classifier}, - {MetadataFieldRequiresDist, metaStripped.Info.RequiresDist}, + {MDFieldClassifier, metaStripped.Info.Classifier}, + {MDFieldRequiresDist, metaStripped.Info.RequiresDist}, } { for _, v := range m.jsonFields { if _, err = wr.WriteFields(recfile.Field{ - Name: metadataFieldToRecField(m.recField), + Name: MDFieldToRecField[m.recField], Value: v, }); err != nil { - log.Fatalln(err) + log.Fatal(err) } } } @@ -223,27 +225,30 @@ func refreshDir( lines := strings.Split(description, "\n") if len(lines) > 0 { if _, err = wr.WriteFieldMultiline( - MetadataFieldDescription, lines, + MDFieldDescription, lines, ); err != nil { - log.Fatalln(err) + log.Fatal(err) } } if !mkdirForPkg(w, r, pkgName) { return false } - path := filepath.Join(dirPath, MetadataFile) - existing, err := ioutil.ReadFile(path) - if err != nil || bytes.Compare(existing, buf.Bytes()) != 0 { + path := filepath.Join(dirPath, MDFile) + existing, err := os.ReadFile(path) + if err != nil || !bytes.Equal(existing, buf.Bytes()) { if err = WriteFileSync(dirPath, path, buf.Bytes(), now); err != nil { log.Println("error", r.RemoteAddr, "refresh-json", path, err) http.Error(w, err.Error(), http.StatusInternalServerError) return false } - log.Println(r.RemoteAddr, "pypi", pkgName+"/"+MetadataFile, "touch") + log.Println(r.RemoteAddr, "pypi", pkgName+"/"+MDFile, "touch") } } mtimes := make(map[string]time.Time) + digestsBLAKE2b256 := make(map[string][]byte) + digestsSHA256 := make(map[string][]byte) + digestsSHA512 := make(map[string][]byte) for _, releases := range allReleases { for _, rel := range releases { if rel.Filename == "" || rel.UploadTimeISO8601 == "" { @@ -259,6 +264,39 @@ func refreshDir( return false } mtimes[rel.Filename] = t.Truncate(time.Second) + if d := rel.Digests[HashAlgoBLAKE2b256]; d != "" { + digestsBLAKE2b256[rel.Filename], err = hex.DecodeString(d) + if err != nil { + log.Println( + "error", r.RemoteAddr, "refresh-json", pkgName, + "can not decode blake2b_256 digest:", err, + ) + http.Error(w, "can not parse metadata JSON", http.StatusBadGateway) + return false + } + } + if d := rel.Digests[HashAlgoSHA256]; d != "" { + digestsSHA256[rel.Filename], err = hex.DecodeString(d) + if err != nil { + log.Println( + "error", r.RemoteAddr, "refresh-json", pkgName, + "can not decode sha256 digest:", err, + ) + http.Error(w, "can not parse metadata JSON", http.StatusBadGateway) + return false + } + } + if d := rel.Digests[HashAlgoSHA512]; d != "" { + digestsSHA512[rel.Filename], err = hex.DecodeString(d) + if err != nil { + log.Println( + "error", r.RemoteAddr, "refresh-json", pkgName, + "can not decode sha512 digest:", err, + ) + http.Error(w, "can not parse metadata JSON", http.StatusBadGateway) + return false + } + } } } @@ -277,7 +315,7 @@ func refreshDir( http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway) return false } - body, err := ioutil.ReadAll(resp.Body) + body, err := io.ReadAll(resp.Body) resp.Body.Close() if err != nil { log.Println("error", r.RemoteAddr, "refresh", pkgName, err) @@ -301,56 +339,73 @@ func refreshDir( return false } - if pkgURL.Fragment == "" { - log.Println(r.RemoteAddr, "pypi", filename, "no digest") - http.Error(w, "no digest provided", http.StatusBadGateway) - return false - } - digestInfo := strings.Split(pkgURL.Fragment, "=") - if len(digestInfo) == 1 { - // Ancient non PEP-0503 PyPIs, assume MD5 - digestInfo = []string{"md5", digestInfo[0]} - } else if len(digestInfo) != 2 { - log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest") - http.Error(w, "invalid digest provided", http.StatusBadGateway) - return false - } - digest, err := hex.DecodeString(digestInfo[1]) - if err != nil { - log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest") - http.Error(w, err.Error(), http.StatusBadGateway) - return false - } - hashAlgo := digestInfo[0] + var hashAlgo string var hasherNew func() hash.Hash - var hashSize int - switch hashAlgo { - case HashAlgoMD5: - hasherNew = md5.New - hashSize = md5.Size - case HashAlgoSHA256: + var digest []byte + if d := digestsBLAKE2b256[filename]; d != nil { + hasherNew = blake2b256New + hashAlgo = HashAlgoBLAKE2b256 + digest = d + } else if d := digestsSHA256[filename]; d != nil { hasherNew = sha256.New - hashSize = sha256.Size - case HashAlgoSHA512: + hashAlgo = HashAlgoSHA256 + digest = d + } else if d := digestsSHA512[filename]; d != nil { hasherNew = sha512.New - hashSize = sha512.Size - case HashAlgoBLAKE2b256: - hasherNew = blake2b256New - hashSize = blake2b.Size256 - default: - log.Println( - "error", r.RemoteAddr, "pypi", - filename, "unknown digest", hashAlgo, - ) - http.Error(w, "unknown digest algorithm", http.StatusBadGateway) - return false - } - if len(digest) != hashSize { - log.Println( - "error", r.RemoteAddr, "pypi", - filename, "invalid digest length") - http.Error(w, "invalid digest length", http.StatusBadGateway) - return false + hashAlgo = HashAlgoSHA512 + digest = d + } else { + if pkgURL.Fragment == "" { + log.Println(r.RemoteAddr, "pypi", filename, "no digest") + http.Error(w, "no digest provided", http.StatusBadGateway) + return false + } + digestInfo := strings.Split(pkgURL.Fragment, "=") + if len(digestInfo) == 1 { + // Ancient non PEP-0503 PyPIs, assume MD5 + digestInfo = []string{"md5", digestInfo[0]} + } else if len(digestInfo) != 2 { + log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest") + http.Error(w, "invalid digest provided", http.StatusBadGateway) + return false + } + var err error + digest, err = hex.DecodeString(digestInfo[1]) + if err != nil { + log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest") + http.Error(w, err.Error(), http.StatusBadGateway) + return false + } + hashAlgo = digestInfo[0] + var hashSize int + switch hashAlgo { + case HashAlgoBLAKE2b256: + hasherNew = blake2b256New + hashSize = blake2b.Size256 + case HashAlgoSHA256: + hasherNew = sha256.New + hashSize = sha256.Size + case HashAlgoSHA512: + hasherNew = sha512.New + hashSize = sha512.Size + case HashAlgoMD5: + hasherNew = md5.New + hashSize = md5.Size + default: + log.Println( + "error", r.RemoteAddr, "pypi", + filename, "unknown digest", hashAlgo, + ) + http.Error(w, "unknown digest algorithm", http.StatusBadGateway) + return false + } + if len(digest) != hashSize { + log.Println( + "error", r.RemoteAddr, "pypi", + filename, "invalid digest length") + http.Error(w, "invalid digest length", http.StatusBadGateway) + return false + } } pkgURL.Fragment = "" @@ -389,8 +444,8 @@ func refreshDir( return false } hasher := hasherNew() - hasherSHA256 := sha256.New() hasherBLAKE2b256 := blake2b256New() + hasherSHA256 := sha256.New() dst, err := TempFile(dirPath) if err != nil { log.Println("error", r.RemoteAddr, "pypi", filename, err) @@ -399,12 +454,12 @@ func refreshDir( } dstBuf := bufio.NewWriter(dst) wrs := []io.Writer{hasher, dstBuf} - if hashAlgo != HashAlgoSHA256 { - wrs = append(wrs, hasherSHA256) - } if hashAlgo != HashAlgoBLAKE2b256 { wrs = append(wrs, hasherBLAKE2b256) } + if hashAlgo != HashAlgoSHA256 { + wrs = append(wrs, hasherSHA256) + } wr := io.MultiWriter(wrs...) if _, err = io.Copy(wr, resp.Body); err != nil { os.Remove(dst.Name()) @@ -420,13 +475,22 @@ func refreshDir( http.Error(w, err.Error(), http.StatusInternalServerError) return false } - if bytes.Compare(hasher.Sum(nil), digest) != 0 { + if !bytes.Equal(hasher.Sum(nil), digest) { log.Println(r.RemoteAddr, "pypi", filename, "digest mismatch") os.Remove(dst.Name()) dst.Close() http.Error(w, "digest mismatch", http.StatusBadGateway) return false } + if digestStored, err := os.ReadFile(path + "." + hashAlgo); err == nil && + !bytes.Equal(digest, digestStored) { + err = errors.New("stored digest mismatch") + log.Println("error", r.RemoteAddr, "pypi", filename, err) + os.Remove(dst.Name()) + dst.Close() + http.Error(w, err.Error(), http.StatusInternalServerError) + return false + } if !NoSync { if err = dst.Sync(); err != nil { os.Remove(dst.Name()) @@ -456,36 +520,36 @@ func refreshDir( return false } - var digestSHA256 []byte var digestBLAKE2b256 []byte - if hashAlgo == HashAlgoSHA256 { - digestSHA256 = hasher.Sum(nil) - } else { - digestSHA256 = hasherSHA256.Sum(nil) - } + var digestSHA256 []byte if hashAlgo == HashAlgoBLAKE2b256 { digestBLAKE2b256 = hasher.Sum(nil) } else { digestBLAKE2b256 = hasherBLAKE2b256.Sum(nil) } + if hashAlgo == HashAlgoSHA256 { + digestSHA256 = hasher.Sum(nil) + } else { + digestSHA256 = hasherSHA256.Sum(nil) + } if err = WriteFileSync( - dirPath, path+"."+HashAlgoSHA256, - digestSHA256, mtime, + dirPath, path+"."+HashAlgoBLAKE2b256, + digestBLAKE2b256, mtime, ); err != nil { log.Println( "error", r.RemoteAddr, "pypi", - path+"."+HashAlgoSHA256, err, + path+"."+HashAlgoBLAKE2b256, err, ) http.Error(w, err.Error(), http.StatusInternalServerError) return false } if err = WriteFileSync( - dirPath, path+"."+HashAlgoBLAKE2b256, - digestBLAKE2b256, mtime, + dirPath, path+"."+HashAlgoSHA256, + digestSHA256, mtime, ); err != nil { log.Println( "error", r.RemoteAddr, "pypi", - path+"."+HashAlgoBLAKE2b256, err, + path+"."+HashAlgoSHA256, err, ) http.Error(w, err.Error(), http.StatusInternalServerError) return false @@ -506,46 +570,6 @@ func refreshDir( } } - if filename == filenameGet || gpgUpdate { - if _, err = os.Stat(path); err != nil { - goto GPGSigSkip - } - resp, err := c.Do(agentedReq(uri + GPGSigExt)) - if err != nil { - goto GPGSigSkip - } - if resp.StatusCode != http.StatusOK { - resp.Body.Close() - goto GPGSigSkip - } - sig, err := ioutil.ReadAll(resp.Body) - resp.Body.Close() - if err != nil { - goto GPGSigSkip - } - if !bytes.HasPrefix(sig, []byte("-----BEGIN PGP SIGNATURE-----")) { - log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "non PGP") - goto GPGSigSkip - } - if err = WriteFileSync(dirPath, path+GPGSigExt, sig, mtime); err != nil { - log.Println("error", r.RemoteAddr, "pypi", filename+GPGSigExt, err) - http.Error(w, err.Error(), http.StatusInternalServerError) - return false - } - log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "downloaded") - } - if mtimeExists { - stat, err := os.Stat(path + GPGSigExt) - if err == nil && !stat.ModTime().Truncate(time.Second).Equal(mtime) { - log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "touch") - if err = os.Chtimes(path+GPGSigExt, mtime, mtime); err != nil { - log.Println("error", r.RemoteAddr, "pypi", filename, err) - http.Error(w, err.Error(), http.StatusInternalServerError) - } - } - } - - GPGSigSkip: if digest == nil { continue }