X-Git-Url: http://www.git.cypherpunks.ru/?p=gocheese.git;a=blobdiff_plain;f=refresh.go;h=d9411179ef3ccd79909e01aa1eb1856fc2d547a9;hp=3404a37bc015a53217bdd42d124f6ebdb2872a09;hb=f077359218864862f94290b582cb09a4a8fa1b9c;hpb=b036ee436eb9bd8889734232a22d3f24be5c9ee2 diff --git a/refresh.go b/refresh.go index 3404a37..d941117 100644 --- a/refresh.go +++ b/refresh.go @@ -1,6 +1,6 @@ /* GoCheese -- Python private package repository and caching proxy -Copyright (C) 2019 Sergey Matveev +Copyright (C) 2019-2021 Sergey Matveev This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -24,6 +24,7 @@ import ( "crypto/sha256" "crypto/sha512" "encoding/hex" + "encoding/json" "hash" "io" "io/ioutil" @@ -32,11 +33,35 @@ import ( "net/url" "os" "path/filepath" + "regexp" "strings" + "time" + "go.cypherpunks.ru/recfile" "golang.org/x/crypto/blake2b" ) +const ( + HashAlgoSHA256 = "sha256" + HashAlgoBLAKE2b256 = "blake2_256" + HashAlgoSHA512 = "sha512" + HashAlgoMD5 = "md5" + GPGSigExt = ".asc" + InternalFlag = ".internal" +) + +var ( + PkgPyPI = regexp.MustCompile(`^.*]*>(.+).*$`) + PyPIURLParsed *url.URL + PyPIHTTPTransport http.Transport + KnownHashAlgos []string = []string{ + HashAlgoSHA256, + HashAlgoBLAKE2b256, + HashAlgoSHA512, + HashAlgoMD5, + } +) + func blake2b256New() hash.Hash { h, err := blake2b.New256(nil) if err != nil { @@ -45,33 +70,225 @@ func blake2b256New() hash.Hash { return h } +func agentedReq(url string) *http.Request { + req, err := http.NewRequest("GET", url, nil) + if err != nil { + log.Fatalln(err) + } + req.Header.Set("User-Agent", UserAgent) + return req +} + +type RecFieldToValuesMap struct { + recField string + jsonFields []string +} + func refreshDir( w http.ResponseWriter, r *http.Request, - dir, - filenameGet string, + pkgName, filenameGet string, gpgUpdate bool, ) bool { - if _, err := os.Stat(filepath.Join(*root, dir, InternalFlag)); err == nil { + if _, err := os.Stat(filepath.Join(Root, pkgName, InternalFlag)); err == nil { return true } - resp, err := http.Get(*pypiURL + dir + "/") + c := http.Client{Transport: &PyPIHTTPTransport} + dirPath := filepath.Join(Root, pkgName) + now := time.Now() + + var allReleases map[string][]*PkgReleaseInfo + if *JSONURL != "" { + resp, err := c.Do(agentedReq(*JSONURL + pkgName + "/json")) + if err != nil { + log.Println("error", r.RemoteAddr, "refresh-json", pkgName, err) + http.Error(w, err.Error(), http.StatusBadGateway) + return false + } + if resp.StatusCode != http.StatusOK { + resp.Body.Close() + log.Println( + "error", r.RemoteAddr, "refresh-json", pkgName, + "HTTP status:", resp.Status, + ) + http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway) + return false + } + body, err := ioutil.ReadAll(resp.Body) + resp.Body.Close() + var buf bytes.Buffer + var description string + wr := recfile.NewWriter(&buf) + var meta PkgMeta + err = json.Unmarshal(body, &meta) + if err == nil { + for _, m := range [][2]string{ + {MetadataFieldName, meta.Info.Name}, + {MetadataFieldVersion, meta.Info.Version}, + {MetadataFieldSummary, meta.Info.Summary}, + {MetadataFieldDescriptionContentType, meta.Info.DescriptionContentType}, + {MetadataFieldKeywords, meta.Info.Keywords}, + {MetadataFieldHomePage, meta.Info.HomePage}, + {MetadataFieldAuthor, meta.Info.Author}, + {MetadataFieldAuthorEmail, meta.Info.AuthorEmail}, + {MetadataFieldMaintainer, meta.Info.Maintainer}, + {MetadataFieldMaintainerEmail, meta.Info.MaintainerEmail}, + {MetadataFieldLicense, meta.Info.License}, + {MetadataFieldRequiresPython, meta.Info.RequiresPython}, + } { + recField, jsonField := m[0], m[1] + if jsonField == "" { + continue + } + if _, err = wr.WriteFields(recfile.Field{ + Name: metadataFieldToRecField(recField), + Value: jsonField, + }); err != nil { + log.Fatalln(err) + } + } + for _, m := range []RecFieldToValuesMap{ + {MetadataFieldClassifier, meta.Info.Classifier}, + {MetadataFieldPlatform, meta.Info.Platform}, + {MetadataFieldSupportedPlatform, meta.Info.SupportedPlatform}, + {MetadataFieldRequiresDist, meta.Info.RequiresDist}, + {MetadataFieldRequiresExternal, meta.Info.RequiresExternal}, + {MetadataFieldProjectURL, meta.Info.ProjectURL}, + {MetadataFieldProvidesExtra, meta.Info.ProvidesExtra}, + } { + for _, v := range m.jsonFields { + if _, err = wr.WriteFields(recfile.Field{ + Name: metadataFieldToRecField(m.recField), + Value: v, + }); err != nil { + log.Fatalln(err) + } + } + } + description = meta.Info.Description + allReleases = meta.Releases + } else { + var metaStripped PkgMetaStripped + err = json.Unmarshal(body, &metaStripped) + if err != nil { + log.Println( + "error", r.RemoteAddr, "refresh-json", pkgName, + "can not parse JSON:", err, + ) + http.Error(w, "can not parse metadata JSON", http.StatusBadGateway) + return false + } + for _, m := range [][2]string{ + {MetadataFieldName, metaStripped.Info.Name}, + {MetadataFieldVersion, metaStripped.Info.Version}, + {MetadataFieldSummary, metaStripped.Info.Summary}, + {MetadataFieldDescriptionContentType, metaStripped.Info.DescriptionContentType}, + {MetadataFieldKeywords, metaStripped.Info.Keywords}, + {MetadataFieldHomePage, metaStripped.Info.HomePage}, + {MetadataFieldAuthor, metaStripped.Info.Author}, + {MetadataFieldAuthorEmail, metaStripped.Info.AuthorEmail}, + {MetadataFieldMaintainer, metaStripped.Info.Maintainer}, + {MetadataFieldMaintainerEmail, metaStripped.Info.MaintainerEmail}, + {MetadataFieldLicense, metaStripped.Info.License}, + {MetadataFieldRequiresPython, metaStripped.Info.RequiresPython}, + } { + recField, jsonField := m[0], m[1] + if jsonField == "" { + continue + } + if _, err = wr.WriteFields(recfile.Field{ + Name: metadataFieldToRecField(recField), + Value: jsonField, + }); err != nil { + log.Fatalln(err) + } + } + + for _, m := range []RecFieldToValuesMap{ + {MetadataFieldClassifier, metaStripped.Info.Classifier}, + {MetadataFieldRequiresDist, metaStripped.Info.RequiresDist}, + } { + for _, v := range m.jsonFields { + if _, err = wr.WriteFields(recfile.Field{ + Name: metadataFieldToRecField(m.recField), + Value: v, + }); err != nil { + log.Fatalln(err) + } + } + } + description = metaStripped.Info.Description + allReleases = metaStripped.Releases + } + lines := strings.Split(description, "\n") + if len(lines) > 0 { + if _, err = wr.WriteFieldMultiline( + MetadataFieldDescription, lines, + ); err != nil { + log.Fatalln(err) + } + } + + if !mkdirForPkg(w, r, pkgName) { + return false + } + path := filepath.Join(dirPath, MetadataFile) + existing, err := ioutil.ReadFile(path) + if err != nil || bytes.Compare(existing, buf.Bytes()) != 0 { + if err = WriteFileSync(dirPath, path, buf.Bytes(), now); err != nil { + log.Println("error", r.RemoteAddr, "refresh-json", path, err) + http.Error(w, err.Error(), http.StatusInternalServerError) + return false + } + log.Println(r.RemoteAddr, "pypi", pkgName+"."+MetadataFile, "touch") + } + } + mtimes := make(map[string]time.Time) + for _, releases := range allReleases { + for _, rel := range releases { + if rel.Filename == "" || rel.UploadTimeISO8601 == "" { + continue + } + t, err := time.Parse(time.RFC3339Nano, rel.UploadTimeISO8601) + if err != nil { + log.Println( + "error", r.RemoteAddr, "refresh-json", pkgName, + "can not parse upload_time:", err, + ) + http.Error(w, "can not parse metadata JSON", http.StatusBadGateway) + return false + } + mtimes[rel.Filename] = t.Truncate(time.Second) + } + } + + resp, err := c.Do(agentedReq(*PyPIURL + pkgName + "/")) if err != nil { + log.Println("error", r.RemoteAddr, "refresh", pkgName, err) http.Error(w, err.Error(), http.StatusBadGateway) return false } + if resp.StatusCode != http.StatusOK { + resp.Body.Close() + log.Println( + "error", r.RemoteAddr, "refresh", pkgName, + "HTTP status:", resp.Status, + ) + http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway) + return false + } body, err := ioutil.ReadAll(resp.Body) resp.Body.Close() if err != nil { + log.Println("error", r.RemoteAddr, "refresh", pkgName, err) http.Error(w, err.Error(), http.StatusBadGateway) return false } - if !mkdirForPkg(w, r, dir) { + if !mkdirForPkg(w, r, pkgName) { return false } - dirPath := filepath.Join(*root, dir) for _, lineRaw := range bytes.Split(body, []byte("\n")) { - submatches := pkgPyPI.FindStringSubmatch(string(lineRaw)) + submatches := PkgPyPI.FindStringSubmatch(string(lineRaw)) if len(submatches) == 0 { continue } @@ -79,12 +296,13 @@ func refreshDir( filename := submatches[2] pkgURL, err := url.Parse(uri) if err != nil { + log.Println("error", r.RemoteAddr, "refresh", uri, err) http.Error(w, err.Error(), http.StatusBadGateway) return false } if pkgURL.Fragment == "" { - log.Println(r.RemoteAddr, "pypi", filename, "no digest provided") + log.Println(r.RemoteAddr, "pypi", filename, "no digest") http.Error(w, "no digest provided", http.StatusBadGateway) return false } @@ -93,12 +311,13 @@ func refreshDir( // Ancient non PEP-0503 PyPIs, assume MD5 digestInfo = []string{"md5", digestInfo[0]} } else if len(digestInfo) != 2 { - log.Println(r.RemoteAddr, "pypi", filename, "invalid digest provided") + log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest") http.Error(w, "invalid digest provided", http.StatusBadGateway) return false } digest, err := hex.DecodeString(digestInfo[1]) if err != nil { + log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest") http.Error(w, err.Error(), http.StatusBadGateway) return false } @@ -119,45 +338,57 @@ func refreshDir( hasherNew = blake2b256New hashSize = blake2b.Size256 default: - log.Println( - r.RemoteAddr, "pypi", filename, - "unknown digest algorithm", hashAlgo, - ) + log.Println("error", r.RemoteAddr, "pypi", filename, "unknown digest", hashAlgo) http.Error(w, "unknown digest algorithm", http.StatusBadGateway) return false } if len(digest) != hashSize { - log.Println(r.RemoteAddr, "pypi", filename, "invalid digest length") + log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest length") http.Error(w, "invalid digest length", http.StatusBadGateway) return false } pkgURL.Fragment = "" if pkgURL.Host == "" { - uri = pypiURLParsed.ResolveReference(pkgURL).String() + uri = PyPIURLParsed.ResolveReference(pkgURL).String() } else { uri = pkgURL.String() } + mtime, mtimeExists := mtimes[filename] + if !mtimeExists { + mtime = now + } path := filepath.Join(dirPath, filename) if filename == filenameGet { - if killed { + if Killed { // Skip heavy remote call, when shutting down http.Error(w, "shutting down", http.StatusInternalServerError) return false } - log.Println(r.RemoteAddr, "pypi download", filename) - resp, err = http.Get(uri) + log.Println(r.RemoteAddr, "pypi", filename, "download") + resp, err = c.Do(agentedReq(uri)) if err != nil { - log.Println(r.RemoteAddr, "pypi download error:", err.Error()) + log.Println("error", r.RemoteAddr, "pypi", filename, "download", err) http.Error(w, err.Error(), http.StatusBadGateway) return false } defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + log.Println( + "error", r.RemoteAddr, + "pypi", filename, "download", + "HTTP status:", resp.Status, + ) + http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway) + return false + } hasher := hasherNew() hasherSHA256 := sha256.New() + hasherBLAKE2b256 := blake2b256New() dst, err := TempFile(dirPath) if err != nil { + log.Println("error", r.RemoteAddr, "pypi", filename, err) http.Error(w, err.Error(), http.StatusInternalServerError) return false } @@ -166,16 +397,21 @@ func refreshDir( if hashAlgo != HashAlgoSHA256 { wrs = append(wrs, hasherSHA256) } + if hashAlgo != HashAlgoBLAKE2b256 { + wrs = append(wrs, hasherBLAKE2b256) + } wr := io.MultiWriter(wrs...) if _, err = io.Copy(wr, resp.Body); err != nil { os.Remove(dst.Name()) dst.Close() + log.Println("error", r.RemoteAddr, "pypi", filename, err) http.Error(w, err.Error(), http.StatusInternalServerError) return false } if err = dstBuf.Flush(); err != nil { os.Remove(dst.Name()) dst.Close() + log.Println("error", r.RemoteAddr, "pypi", filename, err) http.Error(w, err.Error(), http.StatusInternalServerError) return false } @@ -186,37 +422,90 @@ func refreshDir( http.Error(w, "digest mismatch", http.StatusBadGateway) return false } - if err = dst.Sync(); err != nil { - os.Remove(dst.Name()) - dst.Close() - http.Error(w, err.Error(), http.StatusInternalServerError) - return false + if !NoSync { + if err = dst.Sync(); err != nil { + os.Remove(dst.Name()) + dst.Close() + log.Println("error", r.RemoteAddr, "pypi", filename, err) + http.Error(w, err.Error(), http.StatusInternalServerError) + return false + } } if err = dst.Close(); err != nil { + log.Println("error", r.RemoteAddr, "pypi", filename, err) http.Error(w, err.Error(), http.StatusInternalServerError) return false } + if err = os.Chtimes(dst.Name(), mtime, mtime); err != nil { + log.Println("error", r.RemoteAddr, "pypi", filename, err) + http.Error(w, err.Error(), http.StatusInternalServerError) + } if err = os.Rename(dst.Name(), path); err != nil { + log.Println("error", r.RemoteAddr, "pypi", filename, err) http.Error(w, err.Error(), http.StatusInternalServerError) return false } if err = DirSync(dirPath); err != nil { + log.Println("error", r.RemoteAddr, "pypi", filename, err) http.Error(w, err.Error(), http.StatusInternalServerError) return false } - if hashAlgo != HashAlgoSHA256 { - hashAlgo = HashAlgoSHA256 - digest = hasherSHA256.Sum(nil) - for _, algo := range knownHashAlgos[1:] { - os.Remove(path + "." + algo) + + var digestSHA256 []byte + var digestBLAKE2b256 []byte + if hashAlgo == HashAlgoSHA256 { + digestSHA256 = hasher.Sum(nil) + } else { + digestSHA256 = hasherSHA256.Sum(nil) + } + if hashAlgo == HashAlgoBLAKE2b256 { + digestBLAKE2b256 = hasher.Sum(nil) + } else { + digestBLAKE2b256 = hasherBLAKE2b256.Sum(nil) + } + if err = WriteFileSync( + dirPath, path+"."+HashAlgoSHA256, + digestSHA256, mtime, + ); err != nil { + log.Println( + "error", r.RemoteAddr, "pypi", + path+"."+HashAlgoSHA256, err, + ) + http.Error(w, err.Error(), http.StatusInternalServerError) + return false + } + if err = WriteFileSync( + dirPath, path+"."+HashAlgoBLAKE2b256, + digestBLAKE2b256, mtime, + ); err != nil { + log.Println( + "error", r.RemoteAddr, "pypi", + path+"."+HashAlgoBLAKE2b256, err, + ) + http.Error(w, err.Error(), http.StatusInternalServerError) + return false + } + for _, algo := range KnownHashAlgos[2:] { + os.Remove(path + "." + algo) + } + digest = nil + } + if mtimeExists { + stat, err := os.Stat(path) + if err == nil && !stat.ModTime().Truncate(time.Second).Equal(mtime) { + log.Println(r.RemoteAddr, "pypi", filename, "touch") + if err = os.Chtimes(path, mtime, mtime); err != nil { + log.Println("error", r.RemoteAddr, "pypi", filename, err) + http.Error(w, err.Error(), http.StatusInternalServerError) } } } + if filename == filenameGet || gpgUpdate { if _, err = os.Stat(path); err != nil { goto GPGSigSkip } - resp, err := http.Get(uri + GPGSigExt) + resp, err := c.Do(agentedReq(uri + GPGSigExt)) if err != nil { goto GPGSigSkip } @@ -230,27 +519,45 @@ func refreshDir( goto GPGSigSkip } if !bytes.HasPrefix(sig, []byte("-----BEGIN PGP SIGNATURE-----")) { - log.Println(r.RemoteAddr, "pypi non PGP signature", filename) + log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "non PGP") goto GPGSigSkip } - if err = WriteFileSync(dirPath, path+GPGSigExt, sig); err != nil { + if err = WriteFileSync(dirPath, path+GPGSigExt, sig, mtime); err != nil { + log.Println("error", r.RemoteAddr, "pypi", filename+GPGSigExt, err) http.Error(w, err.Error(), http.StatusInternalServerError) return false } - log.Println(r.RemoteAddr, "pypi downloaded signature", filename) + log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "downloaded") } + if mtimeExists { + stat, err := os.Stat(path + GPGSigExt) + if err == nil && !stat.ModTime().Truncate(time.Second).Equal(mtime) { + log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "touch") + if err = os.Chtimes(path+GPGSigExt, mtime, mtime); err != nil { + log.Println("error", r.RemoteAddr, "pypi", filename, err) + http.Error(w, err.Error(), http.StatusInternalServerError) + } + } + } + GPGSigSkip: + if digest == nil { + continue + } path = path + "." + hashAlgo - _, err = os.Stat(path) - if err == nil { + stat, err := os.Stat(path) + if err == nil && + (mtimeExists && stat.ModTime().Truncate(time.Second).Equal(mtime)) { continue } - if !os.IsNotExist(err) { + if err != nil && !os.IsNotExist(err) { + log.Println("error", r.RemoteAddr, "pypi", path, err) http.Error(w, err.Error(), http.StatusInternalServerError) return false } - log.Println(r.RemoteAddr, "pypi touch", filename) - if err = WriteFileSync(dirPath, path, digest); err != nil { + log.Println(r.RemoteAddr, "pypi", filename, "touch") + if err = WriteFileSync(dirPath, path, digest, mtime); err != nil { + log.Println("error", r.RemoteAddr, "pypi", path, err) http.Error(w, err.Error(), http.StatusInternalServerError) return false }