/*
GoCheese -- Python private package repository and caching proxy
-Copyright (C) 2019-2021 Sergey Matveev <stargrave@stargrave.org>
+Copyright (C) 2019-2023 Sergey Matveev <stargrave@stargrave.org>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
"crypto/sha256"
"crypto/sha512"
"encoding/hex"
+ "encoding/json"
+ "errors"
"hash"
"io"
- "io/ioutil"
"log"
"net/http"
"net/url"
"os"
"path/filepath"
+ "regexp"
"strings"
+ "time"
+ "go.cypherpunks.ru/recfile"
"golang.org/x/crypto/blake2b"
)
+const (
+ HashAlgoBLAKE2b256 = "blake2b_256"
+ HashAlgoSHA256 = "sha256"
+ HashAlgoSHA512 = "sha512"
+ HashAlgoMD5 = "md5"
+ InternalFlag = ".internal"
+)
+
+var (
+ PkgPyPI = regexp.MustCompile(`^.*<a href="([^"]+)"[^>]*>(.+)</a>.*$`)
+ PyPIURLParsed *url.URL
+ PyPIHTTPTransport http.Transport
+ KnownHashAlgos []string = []string{
+ HashAlgoBLAKE2b256,
+ HashAlgoSHA256,
+ HashAlgoSHA512,
+ HashAlgoMD5,
+ }
+)
+
func blake2b256New() hash.Hash {
h, err := blake2b.New256(nil)
if err != nil {
return h
}
+func agentedReq(url string) *http.Request {
+ req, err := http.NewRequest("GET", url, nil)
+ if err != nil {
+ log.Fatalln(err)
+ }
+ req.Header.Set("User-Agent", UserAgent)
+ return req
+}
+
+type RecFieldToValuesMap struct {
+ recField string
+ jsonFields []string
+}
+
func refreshDir(
w http.ResponseWriter,
r *http.Request,
pkgName, filenameGet string,
- gpgUpdate bool,
) bool {
- if _, err := os.Stat(filepath.Join(*root, pkgName, InternalFlag)); err == nil {
+ if _, err := os.Stat(filepath.Join(Root, pkgName, InternalFlag)); err == nil {
return true
}
- resp, err := http.Get(*pypiURL + pkgName + "/")
+ c := http.Client{Transport: &PyPIHTTPTransport}
+ dirPath := filepath.Join(Root, pkgName)
+ now := time.Now()
+
+ var allReleases map[string][]*PkgReleaseInfo
+ if *JSONURL != "" {
+ resp, err := c.Do(agentedReq(*JSONURL + pkgName + "/json"))
+ if err != nil {
+ log.Println("error", r.RemoteAddr, "refresh-json", pkgName, err)
+ http.Error(w, err.Error(), http.StatusBadGateway)
+ return false
+ }
+ if resp.StatusCode != http.StatusOK {
+ resp.Body.Close()
+ log.Println(
+ "error", r.RemoteAddr, "refresh-json", pkgName,
+ "HTTP status:", resp.Status,
+ )
+ http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
+ return false
+ }
+ body, err := io.ReadAll(resp.Body)
+ if err != nil {
+ resp.Body.Close()
+ log.Println("error", r.RemoteAddr, "refresh-json", pkgName, err)
+ http.Error(w, "can not read body", http.StatusBadGateway)
+ return false
+ }
+ resp.Body.Close()
+ var buf bytes.Buffer
+ var description string
+ wr := recfile.NewWriter(&buf)
+ var meta PkgMeta
+ err = json.Unmarshal(body, &meta)
+ if err == nil {
+ for _, m := range [][2]string{
+ {MDFieldName, meta.Info.Name},
+ {MDFieldVersion, meta.Info.Version},
+ {MDFieldSummary, meta.Info.Summary},
+ {MDFieldDescriptionContentType, meta.Info.DescriptionContentType},
+ {MDFieldKeywords, meta.Info.Keywords},
+ {MDFieldHomePage, meta.Info.HomePage},
+ {MDFieldAuthor, meta.Info.Author},
+ {MDFieldAuthorEmail, meta.Info.AuthorEmail},
+ {MDFieldMaintainer, meta.Info.Maintainer},
+ {MDFieldMaintainerEmail, meta.Info.MaintainerEmail},
+ {MDFieldLicense, meta.Info.License},
+ {MDFieldRequiresPython, meta.Info.RequiresPython},
+ } {
+ recField, jsonField := m[0], m[1]
+ if jsonField == "" {
+ continue
+ }
+ if _, err = wr.WriteFields(recfile.Field{
+ Name: MDFieldToRecField[recField],
+ Value: jsonField,
+ }); err != nil {
+ log.Fatalln(err)
+ }
+ }
+ for _, m := range []RecFieldToValuesMap{
+ {MDFieldClassifier, meta.Info.Classifier},
+ {MDFieldPlatform, meta.Info.Platform},
+ {MDFieldSupportedPlatform, meta.Info.SupportedPlatform},
+ {MDFieldRequiresDist, meta.Info.RequiresDist},
+ {MDFieldRequiresExternal, meta.Info.RequiresExternal},
+ {MDFieldProjectURL, meta.Info.ProjectURL},
+ {MDFieldProvidesExtra, meta.Info.ProvidesExtra},
+ } {
+ for _, v := range m.jsonFields {
+ if _, err = wr.WriteFields(recfile.Field{
+ Name: MDFieldToRecField[m.recField],
+ Value: v,
+ }); err != nil {
+ log.Fatalln(err)
+ }
+ }
+ }
+ description = meta.Info.Description
+ allReleases = meta.Releases
+ } else {
+ var metaStripped PkgMetaStripped
+ err = json.Unmarshal(body, &metaStripped)
+ if err != nil {
+ log.Println(
+ "error", r.RemoteAddr, "refresh-json", pkgName,
+ "can not parse JSON:", err,
+ )
+ http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
+ return false
+ }
+ for _, m := range [][2]string{
+ {MDFieldName, metaStripped.Info.Name},
+ {MDFieldVersion, metaStripped.Info.Version},
+ {MDFieldSummary, metaStripped.Info.Summary},
+ {MDFieldDescriptionContentType, metaStripped.Info.DescriptionContentType},
+ {MDFieldKeywords, metaStripped.Info.Keywords},
+ {MDFieldHomePage, metaStripped.Info.HomePage},
+ {MDFieldAuthor, metaStripped.Info.Author},
+ {MDFieldAuthorEmail, metaStripped.Info.AuthorEmail},
+ {MDFieldMaintainer, metaStripped.Info.Maintainer},
+ {MDFieldMaintainerEmail, metaStripped.Info.MaintainerEmail},
+ {MDFieldLicense, metaStripped.Info.License},
+ {MDFieldRequiresPython, metaStripped.Info.RequiresPython},
+ } {
+ recField, jsonField := m[0], m[1]
+ if jsonField == "" {
+ continue
+ }
+ if _, err = wr.WriteFields(recfile.Field{
+ Name: MDFieldToRecField[recField],
+ Value: jsonField,
+ }); err != nil {
+ log.Fatalln(err)
+ }
+ }
+
+ for _, m := range []RecFieldToValuesMap{
+ {MDFieldClassifier, metaStripped.Info.Classifier},
+ {MDFieldRequiresDist, metaStripped.Info.RequiresDist},
+ } {
+ for _, v := range m.jsonFields {
+ if _, err = wr.WriteFields(recfile.Field{
+ Name: MDFieldToRecField[m.recField],
+ Value: v,
+ }); err != nil {
+ log.Fatalln(err)
+ }
+ }
+ }
+ description = metaStripped.Info.Description
+ allReleases = metaStripped.Releases
+ }
+ lines := strings.Split(description, "\n")
+ if len(lines) > 0 {
+ if _, err = wr.WriteFieldMultiline(
+ MDFieldDescription, lines,
+ ); err != nil {
+ log.Fatalln(err)
+ }
+ }
+
+ if !mkdirForPkg(w, r, pkgName) {
+ return false
+ }
+ path := filepath.Join(dirPath, MDFile)
+ existing, err := os.ReadFile(path)
+ if err != nil || !bytes.Equal(existing, buf.Bytes()) {
+ if err = WriteFileSync(dirPath, path, buf.Bytes(), now); err != nil {
+ log.Println("error", r.RemoteAddr, "refresh-json", path, err)
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ return false
+ }
+ log.Println(r.RemoteAddr, "pypi", pkgName+"/"+MDFile, "touch")
+ }
+ }
+ mtimes := make(map[string]time.Time)
+ digestsBLAKE2b256 := make(map[string][]byte)
+ digestsSHA256 := make(map[string][]byte)
+ digestsSHA512 := make(map[string][]byte)
+ for _, releases := range allReleases {
+ for _, rel := range releases {
+ if rel.Filename == "" || rel.UploadTimeISO8601 == "" {
+ continue
+ }
+ t, err := time.Parse(time.RFC3339Nano, rel.UploadTimeISO8601)
+ if err != nil {
+ log.Println(
+ "error", r.RemoteAddr, "refresh-json", pkgName,
+ "can not parse upload_time:", err,
+ )
+ http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
+ return false
+ }
+ mtimes[rel.Filename] = t.Truncate(time.Second)
+ if d := rel.Digests[HashAlgoBLAKE2b256]; d != "" {
+ digestsBLAKE2b256[rel.Filename], err = hex.DecodeString(d)
+ if err != nil {
+ log.Println(
+ "error", r.RemoteAddr, "refresh-json", pkgName,
+ "can not decode blake2b_256 digest:", err,
+ )
+ http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
+ return false
+ }
+ }
+ if d := rel.Digests[HashAlgoSHA256]; d != "" {
+ digestsSHA256[rel.Filename], err = hex.DecodeString(d)
+ if err != nil {
+ log.Println(
+ "error", r.RemoteAddr, "refresh-json", pkgName,
+ "can not decode sha256 digest:", err,
+ )
+ http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
+ return false
+ }
+ }
+ if d := rel.Digests[HashAlgoSHA512]; d != "" {
+ digestsSHA512[rel.Filename], err = hex.DecodeString(d)
+ if err != nil {
+ log.Println(
+ "error", r.RemoteAddr, "refresh-json", pkgName,
+ "can not decode sha512 digest:", err,
+ )
+ http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
+ return false
+ }
+ }
+ }
+ }
+
+ resp, err := c.Do(agentedReq(*PyPIURL + pkgName + "/"))
if err != nil {
log.Println("error", r.RemoteAddr, "refresh", pkgName, err)
http.Error(w, err.Error(), http.StatusBadGateway)
}
if resp.StatusCode != http.StatusOK {
resp.Body.Close()
- log.Println("error", r.RemoteAddr, "refresh", pkgName, "HTTP status:", resp.Status)
+ log.Println(
+ "error", r.RemoteAddr, "refresh", pkgName,
+ "HTTP status:", resp.Status,
+ )
http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
return false
}
- body, err := ioutil.ReadAll(resp.Body)
+ body, err := io.ReadAll(resp.Body)
resp.Body.Close()
if err != nil {
log.Println("error", r.RemoteAddr, "refresh", pkgName, err)
if !mkdirForPkg(w, r, pkgName) {
return false
}
- dirPath := filepath.Join(*root, pkgName)
for _, lineRaw := range bytes.Split(body, []byte("\n")) {
- submatches := pkgPyPI.FindStringSubmatch(string(lineRaw))
+ submatches := PkgPyPI.FindStringSubmatch(string(lineRaw))
if len(submatches) == 0 {
continue
}
return false
}
- if pkgURL.Fragment == "" {
- log.Println(r.RemoteAddr, "pypi", filename, "no digest")
- http.Error(w, "no digest provided", http.StatusBadGateway)
- return false
- }
- digestInfo := strings.Split(pkgURL.Fragment, "=")
- if len(digestInfo) == 1 {
- // Ancient non PEP-0503 PyPIs, assume MD5
- digestInfo = []string{"md5", digestInfo[0]}
- } else if len(digestInfo) != 2 {
- log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest")
- http.Error(w, "invalid digest provided", http.StatusBadGateway)
- return false
- }
- digest, err := hex.DecodeString(digestInfo[1])
- if err != nil {
- log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest")
- http.Error(w, err.Error(), http.StatusBadGateway)
- return false
- }
- hashAlgo := digestInfo[0]
+ var hashAlgo string
var hasherNew func() hash.Hash
- var hashSize int
- switch hashAlgo {
- case HashAlgoMD5:
- hasherNew = md5.New
- hashSize = md5.Size
- case HashAlgoSHA256:
+ var digest []byte
+ if d := digestsBLAKE2b256[filename]; d != nil {
+ hasherNew = blake2b256New
+ hashAlgo = HashAlgoBLAKE2b256
+ digest = d
+ } else if d := digestsSHA256[filename]; d != nil {
hasherNew = sha256.New
- hashSize = sha256.Size
- case HashAlgoSHA512:
+ hashAlgo = HashAlgoSHA256
+ digest = d
+ } else if d := digestsSHA512[filename]; d != nil {
hasherNew = sha512.New
- hashSize = sha512.Size
- case HashAlgoBLAKE2b256:
- hasherNew = blake2b256New
- hashSize = blake2b.Size256
- default:
- log.Println("error", r.RemoteAddr, "pypi", filename, "unknown digest", hashAlgo)
- http.Error(w, "unknown digest algorithm", http.StatusBadGateway)
- return false
- }
- if len(digest) != hashSize {
- log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest length")
- http.Error(w, "invalid digest length", http.StatusBadGateway)
- return false
+ hashAlgo = HashAlgoSHA512
+ digest = d
+ } else {
+ if pkgURL.Fragment == "" {
+ log.Println(r.RemoteAddr, "pypi", filename, "no digest")
+ http.Error(w, "no digest provided", http.StatusBadGateway)
+ return false
+ }
+ digestInfo := strings.Split(pkgURL.Fragment, "=")
+ if len(digestInfo) == 1 {
+ // Ancient non PEP-0503 PyPIs, assume MD5
+ digestInfo = []string{"md5", digestInfo[0]}
+ } else if len(digestInfo) != 2 {
+ log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest")
+ http.Error(w, "invalid digest provided", http.StatusBadGateway)
+ return false
+ }
+ var err error
+ digest, err = hex.DecodeString(digestInfo[1])
+ if err != nil {
+ log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest")
+ http.Error(w, err.Error(), http.StatusBadGateway)
+ return false
+ }
+ hashAlgo = digestInfo[0]
+ var hashSize int
+ switch hashAlgo {
+ case HashAlgoBLAKE2b256:
+ hasherNew = blake2b256New
+ hashSize = blake2b.Size256
+ case HashAlgoSHA256:
+ hasherNew = sha256.New
+ hashSize = sha256.Size
+ case HashAlgoSHA512:
+ hasherNew = sha512.New
+ hashSize = sha512.Size
+ case HashAlgoMD5:
+ hasherNew = md5.New
+ hashSize = md5.Size
+ default:
+ log.Println(
+ "error", r.RemoteAddr, "pypi",
+ filename, "unknown digest", hashAlgo,
+ )
+ http.Error(w, "unknown digest algorithm", http.StatusBadGateway)
+ return false
+ }
+ if len(digest) != hashSize {
+ log.Println(
+ "error", r.RemoteAddr, "pypi",
+ filename, "invalid digest length")
+ http.Error(w, "invalid digest length", http.StatusBadGateway)
+ return false
+ }
}
pkgURL.Fragment = ""
if pkgURL.Host == "" {
- uri = pypiURLParsed.ResolveReference(pkgURL).String()
+ uri = PyPIURLParsed.ResolveReference(pkgURL).String()
} else {
uri = pkgURL.String()
}
+ mtime, mtimeExists := mtimes[filename]
+ if !mtimeExists {
+ mtime = now
+ }
path := filepath.Join(dirPath, filename)
if filename == filenameGet {
- if killed {
+ if Killed {
// Skip heavy remote call, when shutting down
http.Error(w, "shutting down", http.StatusInternalServerError)
return false
}
log.Println(r.RemoteAddr, "pypi", filename, "download")
- resp, err = http.Get(uri)
+ resp, err = c.Do(agentedReq(uri))
if err != nil {
log.Println("error", r.RemoteAddr, "pypi", filename, "download", err)
http.Error(w, err.Error(), http.StatusBadGateway)
return false
}
hasher := hasherNew()
+ hasherBLAKE2b256 := blake2b256New()
hasherSHA256 := sha256.New()
dst, err := TempFile(dirPath)
if err != nil {
}
dstBuf := bufio.NewWriter(dst)
wrs := []io.Writer{hasher, dstBuf}
+ if hashAlgo != HashAlgoBLAKE2b256 {
+ wrs = append(wrs, hasherBLAKE2b256)
+ }
if hashAlgo != HashAlgoSHA256 {
wrs = append(wrs, hasherSHA256)
}
http.Error(w, err.Error(), http.StatusInternalServerError)
return false
}
- if bytes.Compare(hasher.Sum(nil), digest) != 0 {
+ if !bytes.Equal(hasher.Sum(nil), digest) {
log.Println(r.RemoteAddr, "pypi", filename, "digest mismatch")
os.Remove(dst.Name())
dst.Close()
http.Error(w, "digest mismatch", http.StatusBadGateway)
return false
}
- if err = dst.Sync(); err != nil {
+ if digestStored, err := os.ReadFile(path + "." + hashAlgo); err == nil &&
+ !bytes.Equal(digest, digestStored) {
+ err = errors.New("stored digest mismatch")
+ log.Println("error", r.RemoteAddr, "pypi", filename, err)
os.Remove(dst.Name())
dst.Close()
- log.Println("error", r.RemoteAddr, "pypi", filename, err)
http.Error(w, err.Error(), http.StatusInternalServerError)
return false
}
+ if !NoSync {
+ if err = dst.Sync(); err != nil {
+ os.Remove(dst.Name())
+ dst.Close()
+ log.Println("error", r.RemoteAddr, "pypi", filename, err)
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ return false
+ }
+ }
if err = dst.Close(); err != nil {
log.Println("error", r.RemoteAddr, "pypi", filename, err)
http.Error(w, err.Error(), http.StatusInternalServerError)
return false
}
+ if err = os.Chtimes(dst.Name(), mtime, mtime); err != nil {
+ log.Println("error", r.RemoteAddr, "pypi", filename, err)
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ }
if err = os.Rename(dst.Name(), path); err != nil {
log.Println("error", r.RemoteAddr, "pypi", filename, err)
http.Error(w, err.Error(), http.StatusInternalServerError)
http.Error(w, err.Error(), http.StatusInternalServerError)
return false
}
- if hashAlgo != HashAlgoSHA256 {
- hashAlgo = HashAlgoSHA256
- digest = hasherSHA256.Sum(nil)
- for _, algo := range knownHashAlgos[1:] {
- os.Remove(path + "." + algo)
- }
- }
- }
- if filename == filenameGet || gpgUpdate {
- if _, err = os.Stat(path); err != nil {
- goto GPGSigSkip
- }
- resp, err := http.Get(uri + GPGSigExt)
- if err != nil {
- goto GPGSigSkip
- }
- if resp.StatusCode != http.StatusOK {
- resp.Body.Close()
- goto GPGSigSkip
+
+ var digestBLAKE2b256 []byte
+ var digestSHA256 []byte
+ if hashAlgo == HashAlgoBLAKE2b256 {
+ digestBLAKE2b256 = hasher.Sum(nil)
+ } else {
+ digestBLAKE2b256 = hasherBLAKE2b256.Sum(nil)
}
- sig, err := ioutil.ReadAll(resp.Body)
- resp.Body.Close()
- if err != nil {
- goto GPGSigSkip
+ if hashAlgo == HashAlgoSHA256 {
+ digestSHA256 = hasher.Sum(nil)
+ } else {
+ digestSHA256 = hasherSHA256.Sum(nil)
}
- if !bytes.HasPrefix(sig, []byte("-----BEGIN PGP SIGNATURE-----")) {
- log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "non PGP")
- goto GPGSigSkip
+ if err = WriteFileSync(
+ dirPath, path+"."+HashAlgoBLAKE2b256,
+ digestBLAKE2b256, mtime,
+ ); err != nil {
+ log.Println(
+ "error", r.RemoteAddr, "pypi",
+ path+"."+HashAlgoBLAKE2b256, err,
+ )
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ return false
}
- if err = WriteFileSync(dirPath, path+GPGSigExt, sig); err != nil {
- log.Println("error", r.RemoteAddr, "pypi", filename+GPGSigExt, err)
+ if err = WriteFileSync(
+ dirPath, path+"."+HashAlgoSHA256,
+ digestSHA256, mtime,
+ ); err != nil {
+ log.Println(
+ "error", r.RemoteAddr, "pypi",
+ path+"."+HashAlgoSHA256, err,
+ )
http.Error(w, err.Error(), http.StatusInternalServerError)
return false
}
- log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "downloaded")
+ for _, algo := range KnownHashAlgos[2:] {
+ os.Remove(path + "." + algo)
+ }
+ digest = nil
+ }
+ if mtimeExists {
+ stat, err := os.Stat(path)
+ if err == nil && !stat.ModTime().Truncate(time.Second).Equal(mtime) {
+ log.Println(r.RemoteAddr, "pypi", filename, "touch")
+ if err = os.Chtimes(path, mtime, mtime); err != nil {
+ log.Println("error", r.RemoteAddr, "pypi", filename, err)
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ }
+ }
+ }
+
+ if digest == nil {
+ continue
}
- GPGSigSkip:
path = path + "." + hashAlgo
- _, err = os.Stat(path)
- if err == nil {
+ stat, err := os.Stat(path)
+ if err == nil && (!mtimeExists ||
+ (mtimeExists && stat.ModTime().Truncate(time.Second).Equal(mtime))) {
continue
}
- if !os.IsNotExist(err) {
+ if err != nil && !os.IsNotExist(err) {
log.Println("error", r.RemoteAddr, "pypi", path, err)
http.Error(w, err.Error(), http.StatusInternalServerError)
return false
}
log.Println(r.RemoteAddr, "pypi", filename, "touch")
- if err = WriteFileSync(dirPath, path, digest); err != nil {
+ if err = WriteFileSync(dirPath, path, digest, mtime); err != nil {
log.Println("error", r.RemoteAddr, "pypi", path, err)
http.Error(w, err.Error(), http.StatusInternalServerError)
return false