/* GoCheese -- Python private package repository and caching proxy Copyright (C) 2019-2021 Sergey Matveev This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 3 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ package main import ( "bufio" "bytes" "crypto/md5" "crypto/sha256" "crypto/sha512" "encoding/hex" "encoding/json" "hash" "io" "io/ioutil" "log" "net/http" "net/url" "os" "path/filepath" "regexp" "strings" "time" "go.cypherpunks.ru/recfile" "golang.org/x/crypto/blake2b" ) const ( HashAlgoSHA256 = "sha256" HashAlgoBLAKE2b256 = "blake2_256" HashAlgoSHA512 = "sha512" HashAlgoMD5 = "md5" GPGSigExt = ".asc" InternalFlag = ".internal" ) var ( PkgPyPI = regexp.MustCompile(`^.*]*>(.+).*$`) PyPIURLParsed *url.URL PyPIHTTPTransport http.Transport KnownHashAlgos []string = []string{ HashAlgoSHA256, HashAlgoBLAKE2b256, HashAlgoSHA512, HashAlgoMD5, } ) func blake2b256New() hash.Hash { h, err := blake2b.New256(nil) if err != nil { panic(err) } return h } func agentedReq(url string) *http.Request { req, err := http.NewRequest("GET", url, nil) if err != nil { log.Fatalln(err) } req.Header.Set("User-Agent", UserAgent) return req } type RecFieldToValuesMap struct { recField string jsonFields []string } func refreshDir( w http.ResponseWriter, r *http.Request, pkgName, filenameGet string, gpgUpdate bool, ) bool { if _, err := os.Stat(filepath.Join(Root, pkgName, InternalFlag)); err == nil { return true } c := http.Client{Transport: &PyPIHTTPTransport} dirPath := filepath.Join(Root, pkgName) now := time.Now() var allReleases map[string][]*PkgReleaseInfo if *JSONURL != "" { resp, err := c.Do(agentedReq(*JSONURL + pkgName + "/json")) if err != nil { log.Println("error", r.RemoteAddr, "refresh-json", pkgName, err) http.Error(w, err.Error(), http.StatusBadGateway) return false } if resp.StatusCode != http.StatusOK { resp.Body.Close() log.Println( "error", r.RemoteAddr, "refresh-json", pkgName, "HTTP status:", resp.Status, ) http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway) return false } body, err := ioutil.ReadAll(resp.Body) resp.Body.Close() var buf bytes.Buffer var description string wr := recfile.NewWriter(&buf) var meta PkgMeta err = json.Unmarshal(body, &meta) if err == nil { for _, m := range [][2]string{ {MetadataFieldName, meta.Info.Name}, {MetadataFieldVersion, meta.Info.Version}, {MetadataFieldSummary, meta.Info.Summary}, {MetadataFieldDescriptionContentType, meta.Info.DescriptionContentType}, {MetadataFieldKeywords, meta.Info.Keywords}, {MetadataFieldHomePage, meta.Info.HomePage}, {MetadataFieldAuthor, meta.Info.Author}, {MetadataFieldAuthorEmail, meta.Info.AuthorEmail}, {MetadataFieldMaintainer, meta.Info.Maintainer}, {MetadataFieldMaintainerEmail, meta.Info.MaintainerEmail}, {MetadataFieldLicense, meta.Info.License}, {MetadataFieldRequiresPython, meta.Info.RequiresPython}, } { recField, jsonField := m[0], m[1] if jsonField == "" { continue } if _, err = wr.WriteFields(recfile.Field{ Name: metadataFieldToRecField(recField), Value: jsonField, }); err != nil { log.Fatalln(err) } } for _, m := range []RecFieldToValuesMap{ {MetadataFieldClassifier, meta.Info.Classifier}, {MetadataFieldPlatform, meta.Info.Platform}, {MetadataFieldSupportedPlatform, meta.Info.SupportedPlatform}, {MetadataFieldRequiresDist, meta.Info.RequiresDist}, {MetadataFieldRequiresExternal, meta.Info.RequiresExternal}, {MetadataFieldProjectURL, meta.Info.ProjectURL}, {MetadataFieldProvidesExtra, meta.Info.ProvidesExtra}, } { for _, v := range m.jsonFields { if _, err = wr.WriteFields(recfile.Field{ Name: metadataFieldToRecField(m.recField), Value: v, }); err != nil { log.Fatalln(err) } } } description = meta.Info.Description allReleases = meta.Releases } else { var metaStripped PkgMetaStripped err = json.Unmarshal(body, &metaStripped) if err != nil { log.Println( "error", r.RemoteAddr, "refresh-json", pkgName, "can not parse JSON:", err, ) http.Error(w, "can not parse metadata JSON", http.StatusBadGateway) return false } for _, m := range [][2]string{ {MetadataFieldName, metaStripped.Info.Name}, {MetadataFieldVersion, metaStripped.Info.Version}, {MetadataFieldSummary, metaStripped.Info.Summary}, {MetadataFieldDescriptionContentType, metaStripped.Info.DescriptionContentType}, {MetadataFieldKeywords, metaStripped.Info.Keywords}, {MetadataFieldHomePage, metaStripped.Info.HomePage}, {MetadataFieldAuthor, metaStripped.Info.Author}, {MetadataFieldAuthorEmail, metaStripped.Info.AuthorEmail}, {MetadataFieldMaintainer, metaStripped.Info.Maintainer}, {MetadataFieldMaintainerEmail, metaStripped.Info.MaintainerEmail}, {MetadataFieldLicense, metaStripped.Info.License}, {MetadataFieldRequiresPython, metaStripped.Info.RequiresPython}, } { recField, jsonField := m[0], m[1] if jsonField == "" { continue } if _, err = wr.WriteFields(recfile.Field{ Name: metadataFieldToRecField(recField), Value: jsonField, }); err != nil { log.Fatalln(err) } } for _, m := range []RecFieldToValuesMap{ {MetadataFieldClassifier, metaStripped.Info.Classifier}, {MetadataFieldRequiresDist, metaStripped.Info.RequiresDist}, } { for _, v := range m.jsonFields { if _, err = wr.WriteFields(recfile.Field{ Name: metadataFieldToRecField(m.recField), Value: v, }); err != nil { log.Fatalln(err) } } } description = metaStripped.Info.Description allReleases = metaStripped.Releases } lines := strings.Split(description, "\n") if len(lines) > 0 { if _, err = wr.WriteFieldMultiline( MetadataFieldDescription, lines, ); err != nil { log.Fatalln(err) } } if !mkdirForPkg(w, r, pkgName) { return false } path := filepath.Join(dirPath, MetadataFile) existing, err := ioutil.ReadFile(path) if err != nil || bytes.Compare(existing, buf.Bytes()) != 0 { if err = WriteFileSync(dirPath, path, buf.Bytes(), now); err != nil { log.Println("error", r.RemoteAddr, "refresh-json", path, err) http.Error(w, err.Error(), http.StatusInternalServerError) return false } log.Println(r.RemoteAddr, "pypi", pkgName+"."+MetadataFile, "touch") } } mtimes := make(map[string]time.Time) for _, releases := range allReleases { for _, rel := range releases { if rel.Filename == "" || rel.UploadTimeISO8601 == "" { continue } t, err := time.Parse(time.RFC3339Nano, rel.UploadTimeISO8601) if err != nil { log.Println( "error", r.RemoteAddr, "refresh-json", pkgName, "can not parse upload_time:", err, ) http.Error(w, "can not parse metadata JSON", http.StatusBadGateway) return false } mtimes[rel.Filename] = t.Truncate(time.Second) } } resp, err := c.Do(agentedReq(*PyPIURL + pkgName + "/")) if err != nil { log.Println("error", r.RemoteAddr, "refresh", pkgName, err) http.Error(w, err.Error(), http.StatusBadGateway) return false } if resp.StatusCode != http.StatusOK { resp.Body.Close() log.Println( "error", r.RemoteAddr, "refresh", pkgName, "HTTP status:", resp.Status, ) http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway) return false } body, err := ioutil.ReadAll(resp.Body) resp.Body.Close() if err != nil { log.Println("error", r.RemoteAddr, "refresh", pkgName, err) http.Error(w, err.Error(), http.StatusBadGateway) return false } if !mkdirForPkg(w, r, pkgName) { return false } for _, lineRaw := range bytes.Split(body, []byte("\n")) { submatches := PkgPyPI.FindStringSubmatch(string(lineRaw)) if len(submatches) == 0 { continue } uri := submatches[1] filename := submatches[2] pkgURL, err := url.Parse(uri) if err != nil { log.Println("error", r.RemoteAddr, "refresh", uri, err) http.Error(w, err.Error(), http.StatusBadGateway) return false } if pkgURL.Fragment == "" { log.Println(r.RemoteAddr, "pypi", filename, "no digest") http.Error(w, "no digest provided", http.StatusBadGateway) return false } digestInfo := strings.Split(pkgURL.Fragment, "=") if len(digestInfo) == 1 { // Ancient non PEP-0503 PyPIs, assume MD5 digestInfo = []string{"md5", digestInfo[0]} } else if len(digestInfo) != 2 { log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest") http.Error(w, "invalid digest provided", http.StatusBadGateway) return false } digest, err := hex.DecodeString(digestInfo[1]) if err != nil { log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest") http.Error(w, err.Error(), http.StatusBadGateway) return false } hashAlgo := digestInfo[0] var hasherNew func() hash.Hash var hashSize int switch hashAlgo { case HashAlgoMD5: hasherNew = md5.New hashSize = md5.Size case HashAlgoSHA256: hasherNew = sha256.New hashSize = sha256.Size case HashAlgoSHA512: hasherNew = sha512.New hashSize = sha512.Size case HashAlgoBLAKE2b256: hasherNew = blake2b256New hashSize = blake2b.Size256 default: log.Println("error", r.RemoteAddr, "pypi", filename, "unknown digest", hashAlgo) http.Error(w, "unknown digest algorithm", http.StatusBadGateway) return false } if len(digest) != hashSize { log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest length") http.Error(w, "invalid digest length", http.StatusBadGateway) return false } pkgURL.Fragment = "" if pkgURL.Host == "" { uri = PyPIURLParsed.ResolveReference(pkgURL).String() } else { uri = pkgURL.String() } mtime, mtimeExists := mtimes[filename] if !mtimeExists { mtime = now } path := filepath.Join(dirPath, filename) if filename == filenameGet { if Killed { // Skip heavy remote call, when shutting down http.Error(w, "shutting down", http.StatusInternalServerError) return false } log.Println(r.RemoteAddr, "pypi", filename, "download") resp, err = c.Do(agentedReq(uri)) if err != nil { log.Println("error", r.RemoteAddr, "pypi", filename, "download", err) http.Error(w, err.Error(), http.StatusBadGateway) return false } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { log.Println( "error", r.RemoteAddr, "pypi", filename, "download", "HTTP status:", resp.Status, ) http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway) return false } hasher := hasherNew() hasherSHA256 := sha256.New() hasherBLAKE2b256 := blake2b256New() dst, err := TempFile(dirPath) if err != nil { log.Println("error", r.RemoteAddr, "pypi", filename, err) http.Error(w, err.Error(), http.StatusInternalServerError) return false } dstBuf := bufio.NewWriter(dst) wrs := []io.Writer{hasher, dstBuf} if hashAlgo != HashAlgoSHA256 { wrs = append(wrs, hasherSHA256) } if hashAlgo != HashAlgoBLAKE2b256 { wrs = append(wrs, hasherBLAKE2b256) } wr := io.MultiWriter(wrs...) if _, err = io.Copy(wr, resp.Body); err != nil { os.Remove(dst.Name()) dst.Close() log.Println("error", r.RemoteAddr, "pypi", filename, err) http.Error(w, err.Error(), http.StatusInternalServerError) return false } if err = dstBuf.Flush(); err != nil { os.Remove(dst.Name()) dst.Close() log.Println("error", r.RemoteAddr, "pypi", filename, err) http.Error(w, err.Error(), http.StatusInternalServerError) return false } if bytes.Compare(hasher.Sum(nil), digest) != 0 { log.Println(r.RemoteAddr, "pypi", filename, "digest mismatch") os.Remove(dst.Name()) dst.Close() http.Error(w, "digest mismatch", http.StatusBadGateway) return false } if !NoSync { if err = dst.Sync(); err != nil { os.Remove(dst.Name()) dst.Close() log.Println("error", r.RemoteAddr, "pypi", filename, err) http.Error(w, err.Error(), http.StatusInternalServerError) return false } } if err = dst.Close(); err != nil { log.Println("error", r.RemoteAddr, "pypi", filename, err) http.Error(w, err.Error(), http.StatusInternalServerError) return false } if err = os.Chtimes(dst.Name(), mtime, mtime); err != nil { log.Println("error", r.RemoteAddr, "pypi", filename, err) http.Error(w, err.Error(), http.StatusInternalServerError) } if err = os.Rename(dst.Name(), path); err != nil { log.Println("error", r.RemoteAddr, "pypi", filename, err) http.Error(w, err.Error(), http.StatusInternalServerError) return false } if err = DirSync(dirPath); err != nil { log.Println("error", r.RemoteAddr, "pypi", filename, err) http.Error(w, err.Error(), http.StatusInternalServerError) return false } var digestSHA256 []byte var digestBLAKE2b256 []byte if hashAlgo == HashAlgoSHA256 { digestSHA256 = hasher.Sum(nil) } else { digestSHA256 = hasherSHA256.Sum(nil) } if hashAlgo == HashAlgoBLAKE2b256 { digestBLAKE2b256 = hasher.Sum(nil) } else { digestBLAKE2b256 = hasherBLAKE2b256.Sum(nil) } if err = WriteFileSync( dirPath, path+"."+HashAlgoSHA256, digestSHA256, mtime, ); err != nil { log.Println( "error", r.RemoteAddr, "pypi", path+"."+HashAlgoSHA256, err, ) http.Error(w, err.Error(), http.StatusInternalServerError) return false } if err = WriteFileSync( dirPath, path+"."+HashAlgoBLAKE2b256, digestBLAKE2b256, mtime, ); err != nil { log.Println( "error", r.RemoteAddr, "pypi", path+"."+HashAlgoBLAKE2b256, err, ) http.Error(w, err.Error(), http.StatusInternalServerError) return false } for _, algo := range KnownHashAlgos[2:] { os.Remove(path + "." + algo) } digest = nil } if mtimeExists { stat, err := os.Stat(path) if err == nil && !stat.ModTime().Truncate(time.Second).Equal(mtime) { log.Println(r.RemoteAddr, "pypi", filename, "touch") if err = os.Chtimes(path, mtime, mtime); err != nil { log.Println("error", r.RemoteAddr, "pypi", filename, err) http.Error(w, err.Error(), http.StatusInternalServerError) } } } if filename == filenameGet || gpgUpdate { if _, err = os.Stat(path); err != nil { goto GPGSigSkip } resp, err := c.Do(agentedReq(uri + GPGSigExt)) if err != nil { goto GPGSigSkip } if resp.StatusCode != http.StatusOK { resp.Body.Close() goto GPGSigSkip } sig, err := ioutil.ReadAll(resp.Body) resp.Body.Close() if err != nil { goto GPGSigSkip } if !bytes.HasPrefix(sig, []byte("-----BEGIN PGP SIGNATURE-----")) { log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "non PGP") goto GPGSigSkip } if err = WriteFileSync(dirPath, path+GPGSigExt, sig, mtime); err != nil { log.Println("error", r.RemoteAddr, "pypi", filename+GPGSigExt, err) http.Error(w, err.Error(), http.StatusInternalServerError) return false } log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "downloaded") } if mtimeExists { stat, err := os.Stat(path + GPGSigExt) if err == nil && !stat.ModTime().Truncate(time.Second).Equal(mtime) { log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "touch") if err = os.Chtimes(path+GPGSigExt, mtime, mtime); err != nil { log.Println("error", r.RemoteAddr, "pypi", filename, err) http.Error(w, err.Error(), http.StatusInternalServerError) } } } GPGSigSkip: if digest == nil { continue } path = path + "." + hashAlgo stat, err := os.Stat(path) if err == nil && (mtimeExists && stat.ModTime().Truncate(time.Second).Equal(mtime)) { continue } if err != nil && !os.IsNotExist(err) { log.Println("error", r.RemoteAddr, "pypi", path, err) http.Error(w, err.Error(), http.StatusInternalServerError) return false } log.Println(r.RemoteAddr, "pypi", filename, "touch") if err = WriteFileSync(dirPath, path, digest, mtime); err != nil { log.Println("error", r.RemoteAddr, "pypi", path, err) http.Error(w, err.Error(), http.StatusInternalServerError) return false } } return true }