/*
GoCheese -- Python private package repository and caching proxy
-Copyright (C) 2019 Sergey Matveev <stargrave@stargrave.org>
+Copyright (C) 2019-2023 Sergey Matveev <stargrave@stargrave.org>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
"crypto/sha256"
"crypto/sha512"
"encoding/hex"
+ "encoding/json"
+ "errors"
"hash"
"io"
- "io/ioutil"
"log"
"net/http"
"net/url"
"os"
"path/filepath"
+ "regexp"
"strings"
+ "time"
+ "go.cypherpunks.ru/recfile"
"golang.org/x/crypto/blake2b"
)
+const (
+ HashAlgoSHA256 = "sha256"
+ HashAlgoBLAKE2b256 = "blake2_256"
+ HashAlgoSHA512 = "sha512"
+ HashAlgoMD5 = "md5"
+ GPGSigExt = ".asc"
+ InternalFlag = ".internal"
+)
+
+var (
+ PkgPyPI = regexp.MustCompile(`^.*<a href="([^"]+)"[^>]*>(.+)</a>.*$`)
+ PyPIURLParsed *url.URL
+ PyPIHTTPTransport http.Transport
+ KnownHashAlgos []string = []string{
+ HashAlgoSHA256,
+ HashAlgoBLAKE2b256,
+ HashAlgoSHA512,
+ HashAlgoMD5,
+ }
+)
+
func blake2b256New() hash.Hash {
h, err := blake2b.New256(nil)
if err != nil {
return h
}
+func agentedReq(url string) *http.Request {
+ req, err := http.NewRequest("GET", url, nil)
+ if err != nil {
+ log.Fatalln(err)
+ }
+ req.Header.Set("User-Agent", UserAgent)
+ return req
+}
+
+type RecFieldToValuesMap struct {
+ recField string
+ jsonFields []string
+}
+
func refreshDir(
w http.ResponseWriter,
r *http.Request,
- dir,
- filenameGet string,
+ pkgName, filenameGet string,
gpgUpdate bool,
) bool {
- if _, err := os.Stat(filepath.Join(*root, dir, InternalFlag)); err == nil {
+ if _, err := os.Stat(filepath.Join(Root, pkgName, InternalFlag)); err == nil {
return true
}
- resp, err := http.Get(*pypiURL + dir + "/")
+ c := http.Client{Transport: &PyPIHTTPTransport}
+ dirPath := filepath.Join(Root, pkgName)
+ now := time.Now()
+
+ var allReleases map[string][]*PkgReleaseInfo
+ if *JSONURL != "" {
+ resp, err := c.Do(agentedReq(*JSONURL + pkgName + "/json"))
+ if err != nil {
+ log.Println("error", r.RemoteAddr, "refresh-json", pkgName, err)
+ http.Error(w, err.Error(), http.StatusBadGateway)
+ return false
+ }
+ if resp.StatusCode != http.StatusOK {
+ resp.Body.Close()
+ log.Println(
+ "error", r.RemoteAddr, "refresh-json", pkgName,
+ "HTTP status:", resp.Status,
+ )
+ http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
+ return false
+ }
+ body, err := io.ReadAll(resp.Body)
+ if err != nil {
+ resp.Body.Close()
+ log.Println("error", r.RemoteAddr, "refresh-json", pkgName, err)
+ http.Error(w, "can not read body", http.StatusBadGateway)
+ return false
+ }
+ resp.Body.Close()
+ var buf bytes.Buffer
+ var description string
+ wr := recfile.NewWriter(&buf)
+ var meta PkgMeta
+ err = json.Unmarshal(body, &meta)
+ if err == nil {
+ for _, m := range [][2]string{
+ {MDFieldName, meta.Info.Name},
+ {MDFieldVersion, meta.Info.Version},
+ {MDFieldSummary, meta.Info.Summary},
+ {MDFieldDescriptionContentType, meta.Info.DescriptionContentType},
+ {MDFieldKeywords, meta.Info.Keywords},
+ {MDFieldHomePage, meta.Info.HomePage},
+ {MDFieldAuthor, meta.Info.Author},
+ {MDFieldAuthorEmail, meta.Info.AuthorEmail},
+ {MDFieldMaintainer, meta.Info.Maintainer},
+ {MDFieldMaintainerEmail, meta.Info.MaintainerEmail},
+ {MDFieldLicense, meta.Info.License},
+ {MDFieldRequiresPython, meta.Info.RequiresPython},
+ } {
+ recField, jsonField := m[0], m[1]
+ if jsonField == "" {
+ continue
+ }
+ if _, err = wr.WriteFields(recfile.Field{
+ Name: MDFieldToRecField[recField],
+ Value: jsonField,
+ }); err != nil {
+ log.Fatalln(err)
+ }
+ }
+ for _, m := range []RecFieldToValuesMap{
+ {MDFieldClassifier, meta.Info.Classifier},
+ {MDFieldPlatform, meta.Info.Platform},
+ {MDFieldSupportedPlatform, meta.Info.SupportedPlatform},
+ {MDFieldRequiresDist, meta.Info.RequiresDist},
+ {MDFieldRequiresExternal, meta.Info.RequiresExternal},
+ {MDFieldProjectURL, meta.Info.ProjectURL},
+ {MDFieldProvidesExtra, meta.Info.ProvidesExtra},
+ } {
+ for _, v := range m.jsonFields {
+ if _, err = wr.WriteFields(recfile.Field{
+ Name: MDFieldToRecField[m.recField],
+ Value: v,
+ }); err != nil {
+ log.Fatalln(err)
+ }
+ }
+ }
+ description = meta.Info.Description
+ allReleases = meta.Releases
+ } else {
+ var metaStripped PkgMetaStripped
+ err = json.Unmarshal(body, &metaStripped)
+ if err != nil {
+ log.Println(
+ "error", r.RemoteAddr, "refresh-json", pkgName,
+ "can not parse JSON:", err,
+ )
+ http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
+ return false
+ }
+ for _, m := range [][2]string{
+ {MDFieldName, metaStripped.Info.Name},
+ {MDFieldVersion, metaStripped.Info.Version},
+ {MDFieldSummary, metaStripped.Info.Summary},
+ {MDFieldDescriptionContentType, metaStripped.Info.DescriptionContentType},
+ {MDFieldKeywords, metaStripped.Info.Keywords},
+ {MDFieldHomePage, metaStripped.Info.HomePage},
+ {MDFieldAuthor, metaStripped.Info.Author},
+ {MDFieldAuthorEmail, metaStripped.Info.AuthorEmail},
+ {MDFieldMaintainer, metaStripped.Info.Maintainer},
+ {MDFieldMaintainerEmail, metaStripped.Info.MaintainerEmail},
+ {MDFieldLicense, metaStripped.Info.License},
+ {MDFieldRequiresPython, metaStripped.Info.RequiresPython},
+ } {
+ recField, jsonField := m[0], m[1]
+ if jsonField == "" {
+ continue
+ }
+ if _, err = wr.WriteFields(recfile.Field{
+ Name: MDFieldToRecField[recField],
+ Value: jsonField,
+ }); err != nil {
+ log.Fatalln(err)
+ }
+ }
+
+ for _, m := range []RecFieldToValuesMap{
+ {MDFieldClassifier, metaStripped.Info.Classifier},
+ {MDFieldRequiresDist, metaStripped.Info.RequiresDist},
+ } {
+ for _, v := range m.jsonFields {
+ if _, err = wr.WriteFields(recfile.Field{
+ Name: MDFieldToRecField[m.recField],
+ Value: v,
+ }); err != nil {
+ log.Fatalln(err)
+ }
+ }
+ }
+ description = metaStripped.Info.Description
+ allReleases = metaStripped.Releases
+ }
+ lines := strings.Split(description, "\n")
+ if len(lines) > 0 {
+ if _, err = wr.WriteFieldMultiline(
+ MDFieldDescription, lines,
+ ); err != nil {
+ log.Fatalln(err)
+ }
+ }
+
+ if !mkdirForPkg(w, r, pkgName) {
+ return false
+ }
+ path := filepath.Join(dirPath, MDFile)
+ existing, err := os.ReadFile(path)
+ if err != nil || bytes.Compare(existing, buf.Bytes()) != 0 {
+ if err = WriteFileSync(dirPath, path, buf.Bytes(), now); err != nil {
+ log.Println("error", r.RemoteAddr, "refresh-json", path, err)
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ return false
+ }
+ log.Println(r.RemoteAddr, "pypi", pkgName+"/"+MDFile, "touch")
+ }
+ }
+ mtimes := make(map[string]time.Time)
+ for _, releases := range allReleases {
+ for _, rel := range releases {
+ if rel.Filename == "" || rel.UploadTimeISO8601 == "" {
+ continue
+ }
+ t, err := time.Parse(time.RFC3339Nano, rel.UploadTimeISO8601)
+ if err != nil {
+ log.Println(
+ "error", r.RemoteAddr, "refresh-json", pkgName,
+ "can not parse upload_time:", err,
+ )
+ http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
+ return false
+ }
+ mtimes[rel.Filename] = t.Truncate(time.Second)
+ }
+ }
+
+ resp, err := c.Do(agentedReq(*PyPIURL + pkgName + "/"))
if err != nil {
+ log.Println("error", r.RemoteAddr, "refresh", pkgName, err)
http.Error(w, err.Error(), http.StatusBadGateway)
return false
}
- body, err := ioutil.ReadAll(resp.Body)
+ if resp.StatusCode != http.StatusOK {
+ resp.Body.Close()
+ log.Println(
+ "error", r.RemoteAddr, "refresh", pkgName,
+ "HTTP status:", resp.Status,
+ )
+ http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
+ return false
+ }
+ body, err := io.ReadAll(resp.Body)
resp.Body.Close()
if err != nil {
+ log.Println("error", r.RemoteAddr, "refresh", pkgName, err)
http.Error(w, err.Error(), http.StatusBadGateway)
return false
}
- if !mkdirForPkg(w, r, dir) {
+ if !mkdirForPkg(w, r, pkgName) {
return false
}
- dirPath := filepath.Join(*root, dir)
for _, lineRaw := range bytes.Split(body, []byte("\n")) {
- submatches := pkgPyPI.FindStringSubmatch(string(lineRaw))
+ submatches := PkgPyPI.FindStringSubmatch(string(lineRaw))
if len(submatches) == 0 {
continue
}
filename := submatches[2]
pkgURL, err := url.Parse(uri)
if err != nil {
+ log.Println("error", r.RemoteAddr, "refresh", uri, err)
http.Error(w, err.Error(), http.StatusBadGateway)
return false
}
if pkgURL.Fragment == "" {
- log.Println(r.RemoteAddr, "pypi", filename, "no digest provided")
+ log.Println(r.RemoteAddr, "pypi", filename, "no digest")
http.Error(w, "no digest provided", http.StatusBadGateway)
return false
}
// Ancient non PEP-0503 PyPIs, assume MD5
digestInfo = []string{"md5", digestInfo[0]}
} else if len(digestInfo) != 2 {
- log.Println(r.RemoteAddr, "pypi", filename, "invalid digest provided")
+ log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest")
http.Error(w, "invalid digest provided", http.StatusBadGateway)
return false
}
digest, err := hex.DecodeString(digestInfo[1])
if err != nil {
+ log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest")
http.Error(w, err.Error(), http.StatusBadGateway)
return false
}
hashSize = blake2b.Size256
default:
log.Println(
- r.RemoteAddr, "pypi", filename,
- "unknown digest algorithm", hashAlgo,
+ "error", r.RemoteAddr, "pypi",
+ filename, "unknown digest", hashAlgo,
)
http.Error(w, "unknown digest algorithm", http.StatusBadGateway)
return false
}
if len(digest) != hashSize {
- log.Println(r.RemoteAddr, "pypi", filename, "invalid digest length")
+ log.Println(
+ "error", r.RemoteAddr, "pypi",
+ filename, "invalid digest length")
http.Error(w, "invalid digest length", http.StatusBadGateway)
return false
}
pkgURL.Fragment = ""
if pkgURL.Host == "" {
- uri = pypiURLParsed.ResolveReference(pkgURL).String()
+ uri = PyPIURLParsed.ResolveReference(pkgURL).String()
} else {
uri = pkgURL.String()
}
+ mtime, mtimeExists := mtimes[filename]
+ if !mtimeExists {
+ mtime = now
+ }
path := filepath.Join(dirPath, filename)
if filename == filenameGet {
- if killed {
+ if Killed {
// Skip heavy remote call, when shutting down
http.Error(w, "shutting down", http.StatusInternalServerError)
return false
}
- log.Println(r.RemoteAddr, "pypi download", filename)
- resp, err = http.Get(uri)
+ log.Println(r.RemoteAddr, "pypi", filename, "download")
+ resp, err = c.Do(agentedReq(uri))
if err != nil {
- log.Println(r.RemoteAddr, "pypi download error:", err.Error())
+ log.Println("error", r.RemoteAddr, "pypi", filename, "download", err)
http.Error(w, err.Error(), http.StatusBadGateway)
return false
}
defer resp.Body.Close()
+ if resp.StatusCode != http.StatusOK {
+ log.Println(
+ "error", r.RemoteAddr,
+ "pypi", filename, "download",
+ "HTTP status:", resp.Status,
+ )
+ http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
+ return false
+ }
hasher := hasherNew()
hasherSHA256 := sha256.New()
+ hasherBLAKE2b256 := blake2b256New()
dst, err := TempFile(dirPath)
if err != nil {
+ log.Println("error", r.RemoteAddr, "pypi", filename, err)
http.Error(w, err.Error(), http.StatusInternalServerError)
return false
}
if hashAlgo != HashAlgoSHA256 {
wrs = append(wrs, hasherSHA256)
}
+ if hashAlgo != HashAlgoBLAKE2b256 {
+ wrs = append(wrs, hasherBLAKE2b256)
+ }
wr := io.MultiWriter(wrs...)
if _, err = io.Copy(wr, resp.Body); err != nil {
os.Remove(dst.Name())
dst.Close()
+ log.Println("error", r.RemoteAddr, "pypi", filename, err)
http.Error(w, err.Error(), http.StatusInternalServerError)
return false
}
if err = dstBuf.Flush(); err != nil {
os.Remove(dst.Name())
dst.Close()
+ log.Println("error", r.RemoteAddr, "pypi", filename, err)
http.Error(w, err.Error(), http.StatusInternalServerError)
return false
}
http.Error(w, "digest mismatch", http.StatusBadGateway)
return false
}
- if err = dst.Sync(); err != nil {
+ if digestStored, err := os.ReadFile(path + "." + hashAlgo); err == nil &&
+ bytes.Compare(digest, digestStored) != 0 {
+ err = errors.New("stored digest mismatch")
+ log.Println("error", r.RemoteAddr, "pypi", filename, err)
os.Remove(dst.Name())
dst.Close()
http.Error(w, err.Error(), http.StatusInternalServerError)
return false
}
+ if !NoSync {
+ if err = dst.Sync(); err != nil {
+ os.Remove(dst.Name())
+ dst.Close()
+ log.Println("error", r.RemoteAddr, "pypi", filename, err)
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ return false
+ }
+ }
if err = dst.Close(); err != nil {
+ log.Println("error", r.RemoteAddr, "pypi", filename, err)
http.Error(w, err.Error(), http.StatusInternalServerError)
return false
}
+ if err = os.Chtimes(dst.Name(), mtime, mtime); err != nil {
+ log.Println("error", r.RemoteAddr, "pypi", filename, err)
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ }
if err = os.Rename(dst.Name(), path); err != nil {
+ log.Println("error", r.RemoteAddr, "pypi", filename, err)
http.Error(w, err.Error(), http.StatusInternalServerError)
return false
}
if err = DirSync(dirPath); err != nil {
+ log.Println("error", r.RemoteAddr, "pypi", filename, err)
http.Error(w, err.Error(), http.StatusInternalServerError)
return false
}
- if hashAlgo != HashAlgoSHA256 {
- hashAlgo = HashAlgoSHA256
- digest = hasherSHA256.Sum(nil)
- for _, algo := range knownHashAlgos[1:] {
- os.Remove(path + "." + algo)
+
+ var digestSHA256 []byte
+ var digestBLAKE2b256 []byte
+ if hashAlgo == HashAlgoSHA256 {
+ digestSHA256 = hasher.Sum(nil)
+ } else {
+ digestSHA256 = hasherSHA256.Sum(nil)
+ }
+ if hashAlgo == HashAlgoBLAKE2b256 {
+ digestBLAKE2b256 = hasher.Sum(nil)
+ } else {
+ digestBLAKE2b256 = hasherBLAKE2b256.Sum(nil)
+ }
+ if err = WriteFileSync(
+ dirPath, path+"."+HashAlgoSHA256,
+ digestSHA256, mtime,
+ ); err != nil {
+ log.Println(
+ "error", r.RemoteAddr, "pypi",
+ path+"."+HashAlgoSHA256, err,
+ )
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ return false
+ }
+ if err = WriteFileSync(
+ dirPath, path+"."+HashAlgoBLAKE2b256,
+ digestBLAKE2b256, mtime,
+ ); err != nil {
+ log.Println(
+ "error", r.RemoteAddr, "pypi",
+ path+"."+HashAlgoBLAKE2b256, err,
+ )
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ return false
+ }
+ for _, algo := range KnownHashAlgos[2:] {
+ os.Remove(path + "." + algo)
+ }
+ digest = nil
+ }
+ if mtimeExists {
+ stat, err := os.Stat(path)
+ if err == nil && !stat.ModTime().Truncate(time.Second).Equal(mtime) {
+ log.Println(r.RemoteAddr, "pypi", filename, "touch")
+ if err = os.Chtimes(path, mtime, mtime); err != nil {
+ log.Println("error", r.RemoteAddr, "pypi", filename, err)
+ http.Error(w, err.Error(), http.StatusInternalServerError)
}
}
}
+
if filename == filenameGet || gpgUpdate {
- if _, err = os.Stat(path); err != nil {
- goto GPGSigSkip
- }
- resp, err := http.Get(uri + GPGSigExt)
+ resp, err := c.Do(agentedReq(uri + GPGSigExt))
if err != nil {
goto GPGSigSkip
}
resp.Body.Close()
goto GPGSigSkip
}
- sig, err := ioutil.ReadAll(resp.Body)
+ sig, err := io.ReadAll(resp.Body)
resp.Body.Close()
if err != nil {
goto GPGSigSkip
}
if !bytes.HasPrefix(sig, []byte("-----BEGIN PGP SIGNATURE-----")) {
- log.Println(r.RemoteAddr, "pypi non PGP signature", filename)
+ log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "non PGP")
goto GPGSigSkip
}
- if err = WriteFileSync(dirPath, path+GPGSigExt, sig); err != nil {
+ if err = WriteFileSync(dirPath, path+GPGSigExt, sig, mtime); err != nil {
+ log.Println("error", r.RemoteAddr, "pypi", filename+GPGSigExt, err)
http.Error(w, err.Error(), http.StatusInternalServerError)
return false
}
- log.Println(r.RemoteAddr, "pypi downloaded signature", filename)
+ log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "downloaded")
}
+ if mtimeExists {
+ stat, err := os.Stat(path + GPGSigExt)
+ if err == nil && !stat.ModTime().Truncate(time.Second).Equal(mtime) {
+ log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "touch")
+ if err = os.Chtimes(path+GPGSigExt, mtime, mtime); err != nil {
+ log.Println("error", r.RemoteAddr, "pypi", filename, err)
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ }
+ }
+ }
+
GPGSigSkip:
+ if digest == nil {
+ continue
+ }
path = path + "." + hashAlgo
- _, err = os.Stat(path)
- if err == nil {
+ stat, err := os.Stat(path)
+ if err == nil && (!mtimeExists ||
+ (mtimeExists && stat.ModTime().Truncate(time.Second).Equal(mtime))) {
continue
}
- if !os.IsNotExist(err) {
+ if err != nil && !os.IsNotExist(err) {
+ log.Println("error", r.RemoteAddr, "pypi", path, err)
http.Error(w, err.Error(), http.StatusInternalServerError)
return false
}
- log.Println(r.RemoteAddr, "pypi touch", filename)
- if err = WriteFileSync(dirPath, path, digest); err != nil {
+ log.Println(r.RemoteAddr, "pypi", filename, "touch")
+ if err = WriteFileSync(dirPath, path, digest, mtime); err != nil {
+ log.Println("error", r.RemoteAddr, "pypi", path, err)
http.Error(w, err.Error(), http.StatusInternalServerError)
return false
}