// GoCheese -- Python private package repository and caching proxy // Copyright (C) 2019-2024 Sergey Matveev // // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, version 3 of the License. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program. If not, see . package main import ( "bufio" "bytes" "crypto/md5" "crypto/sha256" "crypto/sha512" "encoding/hex" "encoding/json" "errors" "hash" "io" "log" "net/http" "net/url" "os" "path/filepath" "regexp" "strings" "time" "go.cypherpunks.ru/recfile" "golang.org/x/crypto/blake2b" ) const ( HashAlgoBLAKE2b256 = "blake2b_256" HashAlgoSHA256 = "sha256" HashAlgoSHA512 = "sha512" HashAlgoMD5 = "md5" InternalFlag = ".internal" ) var ( PkgPyPI = regexp.MustCompile(`^.*]*>(.+).*$`) PyPIURLParsed *url.URL PyPIHTTPTransport http.Transport KnownHashAlgos []string = []string{ HashAlgoBLAKE2b256, HashAlgoSHA256, HashAlgoSHA512, HashAlgoMD5, } ) func blake2b256New() hash.Hash { h, err := blake2b.New256(nil) if err != nil { panic(err) } return h } func agentedReq(url string) *http.Request { req, err := http.NewRequest("GET", url, nil) if err != nil { log.Fatal(err) } req.Header.Set("User-Agent", UserAgent) return req } type RecFieldToValuesMap struct { recField string jsonFields []string } func refreshDir( w http.ResponseWriter, r *http.Request, pkgName, filenameGet string, ) bool { if _, err := os.Stat(filepath.Join(Root, pkgName, InternalFlag)); err == nil { return true } c := http.Client{Transport: &PyPIHTTPTransport} dirPath := filepath.Join(Root, pkgName) now := time.Now() var allReleases map[string][]*PkgReleaseInfo if *JSONURL != "" { resp, err := c.Do(agentedReq(*JSONURL + pkgName + "/json")) if err != nil { log.Println("error", r.RemoteAddr, "refresh-json", pkgName, err) http.Error(w, err.Error(), http.StatusBadGateway) return false } if resp.StatusCode != http.StatusOK { resp.Body.Close() log.Println( "error", r.RemoteAddr, "refresh-json", pkgName, "HTTP status:", resp.Status, ) http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway) return false } body, err := io.ReadAll(resp.Body) if err != nil { resp.Body.Close() log.Println("error", r.RemoteAddr, "refresh-json", pkgName, err) http.Error(w, "can not read body", http.StatusBadGateway) return false } resp.Body.Close() var buf bytes.Buffer var description string wr := recfile.NewWriter(&buf) var meta PkgMeta err = json.Unmarshal(body, &meta) if err == nil { for _, m := range [][2]string{ {MDFieldName, meta.Info.Name}, {MDFieldVersion, meta.Info.Version}, {MDFieldSummary, meta.Info.Summary}, {MDFieldDescriptionContentType, meta.Info.DescriptionContentType}, {MDFieldKeywords, meta.Info.Keywords}, {MDFieldHomePage, meta.Info.HomePage}, {MDFieldAuthor, meta.Info.Author}, {MDFieldAuthorEmail, meta.Info.AuthorEmail}, {MDFieldMaintainer, meta.Info.Maintainer}, {MDFieldMaintainerEmail, meta.Info.MaintainerEmail}, {MDFieldLicense, meta.Info.License}, {MDFieldRequiresPython, meta.Info.RequiresPython}, } { recField, jsonField := m[0], m[1] if jsonField == "" { continue } if _, err = wr.WriteFields(recfile.Field{ Name: MDFieldToRecField[recField], Value: jsonField, }); err != nil { log.Fatal(err) } } for _, m := range []RecFieldToValuesMap{ {MDFieldClassifier, meta.Info.Classifier}, {MDFieldPlatform, meta.Info.Platform}, {MDFieldSupportedPlatform, meta.Info.SupportedPlatform}, {MDFieldRequiresDist, meta.Info.RequiresDist}, {MDFieldRequiresExternal, meta.Info.RequiresExternal}, {MDFieldProjectURL, meta.Info.ProjectURL}, {MDFieldProvidesExtra, meta.Info.ProvidesExtra}, } { for _, v := range m.jsonFields { if _, err = wr.WriteFields(recfile.Field{ Name: MDFieldToRecField[m.recField], Value: v, }); err != nil { log.Fatal(err) } } } description = meta.Info.Description allReleases = meta.Releases } else { var metaStripped PkgMetaStripped err = json.Unmarshal(body, &metaStripped) if err != nil { log.Println( "error", r.RemoteAddr, "refresh-json", pkgName, "can not parse JSON:", err, ) http.Error(w, "can not parse metadata JSON", http.StatusBadGateway) return false } for _, m := range [][2]string{ {MDFieldName, metaStripped.Info.Name}, {MDFieldVersion, metaStripped.Info.Version}, {MDFieldSummary, metaStripped.Info.Summary}, {MDFieldDescriptionContentType, metaStripped.Info.DescriptionContentType}, {MDFieldKeywords, metaStripped.Info.Keywords}, {MDFieldHomePage, metaStripped.Info.HomePage}, {MDFieldAuthor, metaStripped.Info.Author}, {MDFieldAuthorEmail, metaStripped.Info.AuthorEmail}, {MDFieldMaintainer, metaStripped.Info.Maintainer}, {MDFieldMaintainerEmail, metaStripped.Info.MaintainerEmail}, {MDFieldLicense, metaStripped.Info.License}, {MDFieldRequiresPython, metaStripped.Info.RequiresPython}, } { recField, jsonField := m[0], m[1] if jsonField == "" { continue } if _, err = wr.WriteFields(recfile.Field{ Name: MDFieldToRecField[recField], Value: jsonField, }); err != nil { log.Fatal(err) } } for _, m := range []RecFieldToValuesMap{ {MDFieldClassifier, metaStripped.Info.Classifier}, {MDFieldRequiresDist, metaStripped.Info.RequiresDist}, } { for _, v := range m.jsonFields { if _, err = wr.WriteFields(recfile.Field{ Name: MDFieldToRecField[m.recField], Value: v, }); err != nil { log.Fatal(err) } } } description = metaStripped.Info.Description allReleases = metaStripped.Releases } lines := strings.Split(description, "\n") if len(lines) > 0 { if _, err = wr.WriteFieldMultiline( MDFieldDescription, lines, ); err != nil { log.Fatal(err) } } if !mkdirForPkg(w, r, pkgName) { return false } path := filepath.Join(dirPath, MDFile) existing, err := os.ReadFile(path) if err != nil || !bytes.Equal(existing, buf.Bytes()) { if err = WriteFileSync(dirPath, path, buf.Bytes(), now); err != nil { log.Println("error", r.RemoteAddr, "refresh-json", path, err) http.Error(w, err.Error(), http.StatusInternalServerError) return false } log.Println(r.RemoteAddr, "pypi", pkgName+"/"+MDFile, "touch") } } mtimes := make(map[string]time.Time) digestsBLAKE2b256 := make(map[string][]byte) digestsSHA256 := make(map[string][]byte) digestsSHA512 := make(map[string][]byte) for _, releases := range allReleases { for _, rel := range releases { if rel.Filename == "" || rel.UploadTimeISO8601 == "" { continue } t, err := time.Parse(time.RFC3339Nano, rel.UploadTimeISO8601) if err != nil { log.Println( "error", r.RemoteAddr, "refresh-json", pkgName, "can not parse upload_time:", err, ) http.Error(w, "can not parse metadata JSON", http.StatusBadGateway) return false } mtimes[rel.Filename] = t.Truncate(time.Second) if d := rel.Digests[HashAlgoBLAKE2b256]; d != "" { digestsBLAKE2b256[rel.Filename], err = hex.DecodeString(d) if err != nil { log.Println( "error", r.RemoteAddr, "refresh-json", pkgName, "can not decode blake2b_256 digest:", err, ) http.Error(w, "can not parse metadata JSON", http.StatusBadGateway) return false } } if d := rel.Digests[HashAlgoSHA256]; d != "" { digestsSHA256[rel.Filename], err = hex.DecodeString(d) if err != nil { log.Println( "error", r.RemoteAddr, "refresh-json", pkgName, "can not decode sha256 digest:", err, ) http.Error(w, "can not parse metadata JSON", http.StatusBadGateway) return false } } if d := rel.Digests[HashAlgoSHA512]; d != "" { digestsSHA512[rel.Filename], err = hex.DecodeString(d) if err != nil { log.Println( "error", r.RemoteAddr, "refresh-json", pkgName, "can not decode sha512 digest:", err, ) http.Error(w, "can not parse metadata JSON", http.StatusBadGateway) return false } } } } resp, err := c.Do(agentedReq(*PyPIURL + pkgName + "/")) if err != nil { log.Println("error", r.RemoteAddr, "refresh", pkgName, err) http.Error(w, err.Error(), http.StatusBadGateway) return false } if resp.StatusCode != http.StatusOK { resp.Body.Close() log.Println( "error", r.RemoteAddr, "refresh", pkgName, "HTTP status:", resp.Status, ) http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway) return false } body, err := io.ReadAll(resp.Body) resp.Body.Close() if err != nil { log.Println("error", r.RemoteAddr, "refresh", pkgName, err) http.Error(w, err.Error(), http.StatusBadGateway) return false } if !mkdirForPkg(w, r, pkgName) { return false } for _, lineRaw := range bytes.Split(body, []byte("\n")) { submatches := PkgPyPI.FindStringSubmatch(string(lineRaw)) if len(submatches) == 0 { continue } uri := submatches[1] filename := submatches[2] pkgURL, err := url.Parse(uri) if err != nil { log.Println("error", r.RemoteAddr, "refresh", uri, err) http.Error(w, err.Error(), http.StatusBadGateway) return false } var hashAlgo string var hasherNew func() hash.Hash var digest []byte if d := digestsBLAKE2b256[filename]; d != nil { hasherNew = blake2b256New hashAlgo = HashAlgoBLAKE2b256 digest = d } else if d := digestsSHA256[filename]; d != nil { hasherNew = sha256.New hashAlgo = HashAlgoSHA256 digest = d } else if d := digestsSHA512[filename]; d != nil { hasherNew = sha512.New hashAlgo = HashAlgoSHA512 digest = d } else { if pkgURL.Fragment == "" { log.Println(r.RemoteAddr, "pypi", filename, "no digest") http.Error(w, "no digest provided", http.StatusBadGateway) return false } digestInfo := strings.Split(pkgURL.Fragment, "=") if len(digestInfo) == 1 { // Ancient non PEP-0503 PyPIs, assume MD5 digestInfo = []string{"md5", digestInfo[0]} } else if len(digestInfo) != 2 { log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest") http.Error(w, "invalid digest provided", http.StatusBadGateway) return false } var err error digest, err = hex.DecodeString(digestInfo[1]) if err != nil { log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest") http.Error(w, err.Error(), http.StatusBadGateway) return false } hashAlgo = digestInfo[0] var hashSize int switch hashAlgo { case HashAlgoBLAKE2b256: hasherNew = blake2b256New hashSize = blake2b.Size256 case HashAlgoSHA256: hasherNew = sha256.New hashSize = sha256.Size case HashAlgoSHA512: hasherNew = sha512.New hashSize = sha512.Size case HashAlgoMD5: hasherNew = md5.New hashSize = md5.Size default: log.Println( "error", r.RemoteAddr, "pypi", filename, "unknown digest", hashAlgo, ) http.Error(w, "unknown digest algorithm", http.StatusBadGateway) return false } if len(digest) != hashSize { log.Println( "error", r.RemoteAddr, "pypi", filename, "invalid digest length") http.Error(w, "invalid digest length", http.StatusBadGateway) return false } } pkgURL.Fragment = "" if pkgURL.Host == "" { uri = PyPIURLParsed.ResolveReference(pkgURL).String() } else { uri = pkgURL.String() } mtime, mtimeExists := mtimes[filename] if !mtimeExists { mtime = now } path := filepath.Join(dirPath, filename) if filename == filenameGet { if Killed { // Skip heavy remote call, when shutting down http.Error(w, "shutting down", http.StatusInternalServerError) return false } log.Println(r.RemoteAddr, "pypi", filename, "download") resp, err = c.Do(agentedReq(uri)) if err != nil { log.Println("error", r.RemoteAddr, "pypi", filename, "download", err) http.Error(w, err.Error(), http.StatusBadGateway) return false } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { log.Println( "error", r.RemoteAddr, "pypi", filename, "download", "HTTP status:", resp.Status, ) http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway) return false } hasher := hasherNew() hasherBLAKE2b256 := blake2b256New() hasherSHA256 := sha256.New() dst, err := TempFile(dirPath) if err != nil { log.Println("error", r.RemoteAddr, "pypi", filename, err) http.Error(w, err.Error(), http.StatusInternalServerError) return false } dstBuf := bufio.NewWriter(dst) wrs := []io.Writer{hasher, dstBuf} if hashAlgo != HashAlgoBLAKE2b256 { wrs = append(wrs, hasherBLAKE2b256) } if hashAlgo != HashAlgoSHA256 { wrs = append(wrs, hasherSHA256) } wr := io.MultiWriter(wrs...) if _, err = io.Copy(wr, resp.Body); err != nil { os.Remove(dst.Name()) dst.Close() log.Println("error", r.RemoteAddr, "pypi", filename, err) http.Error(w, err.Error(), http.StatusInternalServerError) return false } if err = dstBuf.Flush(); err != nil { os.Remove(dst.Name()) dst.Close() log.Println("error", r.RemoteAddr, "pypi", filename, err) http.Error(w, err.Error(), http.StatusInternalServerError) return false } if !bytes.Equal(hasher.Sum(nil), digest) { log.Println(r.RemoteAddr, "pypi", filename, "digest mismatch") os.Remove(dst.Name()) dst.Close() http.Error(w, "digest mismatch", http.StatusBadGateway) return false } if digestStored, err := os.ReadFile(path + "." + hashAlgo); err == nil && !bytes.Equal(digest, digestStored) { err = errors.New("stored digest mismatch") log.Println("error", r.RemoteAddr, "pypi", filename, err) os.Remove(dst.Name()) dst.Close() http.Error(w, err.Error(), http.StatusInternalServerError) return false } if !NoSync { if err = dst.Sync(); err != nil { os.Remove(dst.Name()) dst.Close() log.Println("error", r.RemoteAddr, "pypi", filename, err) http.Error(w, err.Error(), http.StatusInternalServerError) return false } } if err = dst.Close(); err != nil { log.Println("error", r.RemoteAddr, "pypi", filename, err) http.Error(w, err.Error(), http.StatusInternalServerError) return false } if err = os.Chtimes(dst.Name(), mtime, mtime); err != nil { log.Println("error", r.RemoteAddr, "pypi", filename, err) http.Error(w, err.Error(), http.StatusInternalServerError) } if err = os.Rename(dst.Name(), path); err != nil { log.Println("error", r.RemoteAddr, "pypi", filename, err) http.Error(w, err.Error(), http.StatusInternalServerError) return false } if err = DirSync(dirPath); err != nil { log.Println("error", r.RemoteAddr, "pypi", filename, err) http.Error(w, err.Error(), http.StatusInternalServerError) return false } var digestBLAKE2b256 []byte var digestSHA256 []byte if hashAlgo == HashAlgoBLAKE2b256 { digestBLAKE2b256 = hasher.Sum(nil) } else { digestBLAKE2b256 = hasherBLAKE2b256.Sum(nil) } if hashAlgo == HashAlgoSHA256 { digestSHA256 = hasher.Sum(nil) } else { digestSHA256 = hasherSHA256.Sum(nil) } if err = WriteFileSync( dirPath, path+"."+HashAlgoBLAKE2b256, digestBLAKE2b256, mtime, ); err != nil { log.Println( "error", r.RemoteAddr, "pypi", path+"."+HashAlgoBLAKE2b256, err, ) http.Error(w, err.Error(), http.StatusInternalServerError) return false } if err = WriteFileSync( dirPath, path+"."+HashAlgoSHA256, digestSHA256, mtime, ); err != nil { log.Println( "error", r.RemoteAddr, "pypi", path+"."+HashAlgoSHA256, err, ) http.Error(w, err.Error(), http.StatusInternalServerError) return false } for _, algo := range KnownHashAlgos[2:] { os.Remove(path + "." + algo) } digest = nil } if mtimeExists { stat, err := os.Stat(path) if err == nil && !stat.ModTime().Truncate(time.Second).Equal(mtime) { log.Println(r.RemoteAddr, "pypi", filename, "touch") if err = os.Chtimes(path, mtime, mtime); err != nil { log.Println("error", r.RemoteAddr, "pypi", filename, err) http.Error(w, err.Error(), http.StatusInternalServerError) } } } if digest == nil { continue } path = path + "." + hashAlgo stat, err := os.Stat(path) if err == nil && (!mtimeExists || (mtimeExists && stat.ModTime().Truncate(time.Second).Equal(mtime))) { continue } if err != nil && !os.IsNotExist(err) { log.Println("error", r.RemoteAddr, "pypi", path, err) http.Error(w, err.Error(), http.StatusInternalServerError) return false } log.Println(r.RemoteAddr, "pypi", filename, "touch") if err = WriteFileSync(dirPath, path, digest, mtime); err != nil { log.Println("error", r.RemoteAddr, "pypi", path, err) http.Error(w, err.Error(), http.StatusInternalServerError) return false } } return true }