2 GoCheese -- Python private package repository and caching proxy
3 Copyright (C) 2019-2023 Sergey Matveev <stargrave@stargrave.org>
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, version 3 of the License.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
40 "go.cypherpunks.ru/recfile"
41 "golang.org/x/crypto/blake2b"
45 HashAlgoBLAKE2b256 = "blake2b_256"
46 HashAlgoSHA256 = "sha256"
47 HashAlgoSHA512 = "sha512"
49 InternalFlag = ".internal"
53 PkgPyPI = regexp.MustCompile(`^.*<a href="([^"]+)"[^>]*>(.+)</a>.*$`)
54 PyPIURLParsed *url.URL
55 PyPIHTTPTransport http.Transport
56 KnownHashAlgos []string = []string{
64 func blake2b256New() hash.Hash {
65 h, err := blake2b.New256(nil)
72 func agentedReq(url string) *http.Request {
73 req, err := http.NewRequest("GET", url, nil)
77 req.Header.Set("User-Agent", UserAgent)
81 type RecFieldToValuesMap struct {
87 w http.ResponseWriter,
89 pkgName, filenameGet string,
91 if _, err := os.Stat(filepath.Join(Root, pkgName, InternalFlag)); err == nil {
94 c := http.Client{Transport: &PyPIHTTPTransport}
95 dirPath := filepath.Join(Root, pkgName)
98 var allReleases map[string][]*PkgReleaseInfo
100 resp, err := c.Do(agentedReq(*JSONURL + pkgName + "/json"))
102 log.Println("error", r.RemoteAddr, "refresh-json", pkgName, err)
103 http.Error(w, err.Error(), http.StatusBadGateway)
106 if resp.StatusCode != http.StatusOK {
109 "error", r.RemoteAddr, "refresh-json", pkgName,
110 "HTTP status:", resp.Status,
112 http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
115 body, err := io.ReadAll(resp.Body)
118 log.Println("error", r.RemoteAddr, "refresh-json", pkgName, err)
119 http.Error(w, "can not read body", http.StatusBadGateway)
124 var description string
125 wr := recfile.NewWriter(&buf)
127 err = json.Unmarshal(body, &meta)
129 for _, m := range [][2]string{
130 {MDFieldName, meta.Info.Name},
131 {MDFieldVersion, meta.Info.Version},
132 {MDFieldSummary, meta.Info.Summary},
133 {MDFieldDescriptionContentType, meta.Info.DescriptionContentType},
134 {MDFieldKeywords, meta.Info.Keywords},
135 {MDFieldHomePage, meta.Info.HomePage},
136 {MDFieldAuthor, meta.Info.Author},
137 {MDFieldAuthorEmail, meta.Info.AuthorEmail},
138 {MDFieldMaintainer, meta.Info.Maintainer},
139 {MDFieldMaintainerEmail, meta.Info.MaintainerEmail},
140 {MDFieldLicense, meta.Info.License},
141 {MDFieldRequiresPython, meta.Info.RequiresPython},
143 recField, jsonField := m[0], m[1]
147 if _, err = wr.WriteFields(recfile.Field{
148 Name: MDFieldToRecField[recField],
154 for _, m := range []RecFieldToValuesMap{
155 {MDFieldClassifier, meta.Info.Classifier},
156 {MDFieldPlatform, meta.Info.Platform},
157 {MDFieldSupportedPlatform, meta.Info.SupportedPlatform},
158 {MDFieldRequiresDist, meta.Info.RequiresDist},
159 {MDFieldRequiresExternal, meta.Info.RequiresExternal},
160 {MDFieldProjectURL, meta.Info.ProjectURL},
161 {MDFieldProvidesExtra, meta.Info.ProvidesExtra},
163 for _, v := range m.jsonFields {
164 if _, err = wr.WriteFields(recfile.Field{
165 Name: MDFieldToRecField[m.recField],
172 description = meta.Info.Description
173 allReleases = meta.Releases
175 var metaStripped PkgMetaStripped
176 err = json.Unmarshal(body, &metaStripped)
179 "error", r.RemoteAddr, "refresh-json", pkgName,
180 "can not parse JSON:", err,
182 http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
185 for _, m := range [][2]string{
186 {MDFieldName, metaStripped.Info.Name},
187 {MDFieldVersion, metaStripped.Info.Version},
188 {MDFieldSummary, metaStripped.Info.Summary},
189 {MDFieldDescriptionContentType, metaStripped.Info.DescriptionContentType},
190 {MDFieldKeywords, metaStripped.Info.Keywords},
191 {MDFieldHomePage, metaStripped.Info.HomePage},
192 {MDFieldAuthor, metaStripped.Info.Author},
193 {MDFieldAuthorEmail, metaStripped.Info.AuthorEmail},
194 {MDFieldMaintainer, metaStripped.Info.Maintainer},
195 {MDFieldMaintainerEmail, metaStripped.Info.MaintainerEmail},
196 {MDFieldLicense, metaStripped.Info.License},
197 {MDFieldRequiresPython, metaStripped.Info.RequiresPython},
199 recField, jsonField := m[0], m[1]
203 if _, err = wr.WriteFields(recfile.Field{
204 Name: MDFieldToRecField[recField],
211 for _, m := range []RecFieldToValuesMap{
212 {MDFieldClassifier, metaStripped.Info.Classifier},
213 {MDFieldRequiresDist, metaStripped.Info.RequiresDist},
215 for _, v := range m.jsonFields {
216 if _, err = wr.WriteFields(recfile.Field{
217 Name: MDFieldToRecField[m.recField],
224 description = metaStripped.Info.Description
225 allReleases = metaStripped.Releases
227 lines := strings.Split(description, "\n")
229 if _, err = wr.WriteFieldMultiline(
230 MDFieldDescription, lines,
236 if !mkdirForPkg(w, r, pkgName) {
239 path := filepath.Join(dirPath, MDFile)
240 existing, err := os.ReadFile(path)
241 if err != nil || !bytes.Equal(existing, buf.Bytes()) {
242 if err = WriteFileSync(dirPath, path, buf.Bytes(), now); err != nil {
243 log.Println("error", r.RemoteAddr, "refresh-json", path, err)
244 http.Error(w, err.Error(), http.StatusInternalServerError)
247 log.Println(r.RemoteAddr, "pypi", pkgName+"/"+MDFile, "touch")
250 mtimes := make(map[string]time.Time)
251 digestsBLAKE2b256 := make(map[string][]byte)
252 digestsSHA256 := make(map[string][]byte)
253 digestsSHA512 := make(map[string][]byte)
254 for _, releases := range allReleases {
255 for _, rel := range releases {
256 if rel.Filename == "" || rel.UploadTimeISO8601 == "" {
259 t, err := time.Parse(time.RFC3339Nano, rel.UploadTimeISO8601)
262 "error", r.RemoteAddr, "refresh-json", pkgName,
263 "can not parse upload_time:", err,
265 http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
268 mtimes[rel.Filename] = t.Truncate(time.Second)
269 if d := rel.Digests[HashAlgoBLAKE2b256]; d != "" {
270 digestsBLAKE2b256[rel.Filename], err = hex.DecodeString(d)
273 "error", r.RemoteAddr, "refresh-json", pkgName,
274 "can not decode blake2b_256 digest:", err,
276 http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
280 if d := rel.Digests[HashAlgoSHA256]; d != "" {
281 digestsSHA256[rel.Filename], err = hex.DecodeString(d)
284 "error", r.RemoteAddr, "refresh-json", pkgName,
285 "can not decode sha256 digest:", err,
287 http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
291 if d := rel.Digests[HashAlgoSHA512]; d != "" {
292 digestsSHA512[rel.Filename], err = hex.DecodeString(d)
295 "error", r.RemoteAddr, "refresh-json", pkgName,
296 "can not decode sha512 digest:", err,
298 http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
305 resp, err := c.Do(agentedReq(*PyPIURL + pkgName + "/"))
307 log.Println("error", r.RemoteAddr, "refresh", pkgName, err)
308 http.Error(w, err.Error(), http.StatusBadGateway)
311 if resp.StatusCode != http.StatusOK {
314 "error", r.RemoteAddr, "refresh", pkgName,
315 "HTTP status:", resp.Status,
317 http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
320 body, err := io.ReadAll(resp.Body)
323 log.Println("error", r.RemoteAddr, "refresh", pkgName, err)
324 http.Error(w, err.Error(), http.StatusBadGateway)
327 if !mkdirForPkg(w, r, pkgName) {
330 for _, lineRaw := range bytes.Split(body, []byte("\n")) {
331 submatches := PkgPyPI.FindStringSubmatch(string(lineRaw))
332 if len(submatches) == 0 {
336 filename := submatches[2]
337 pkgURL, err := url.Parse(uri)
339 log.Println("error", r.RemoteAddr, "refresh", uri, err)
340 http.Error(w, err.Error(), http.StatusBadGateway)
345 var hasherNew func() hash.Hash
347 if d := digestsBLAKE2b256[filename]; d != nil {
348 hasherNew = blake2b256New
349 hashAlgo = HashAlgoBLAKE2b256
351 } else if d := digestsSHA256[filename]; d != nil {
352 hasherNew = sha256.New
353 hashAlgo = HashAlgoSHA256
355 } else if d := digestsSHA512[filename]; d != nil {
356 hasherNew = sha512.New
357 hashAlgo = HashAlgoSHA512
360 if pkgURL.Fragment == "" {
361 log.Println(r.RemoteAddr, "pypi", filename, "no digest")
362 http.Error(w, "no digest provided", http.StatusBadGateway)
365 digestInfo := strings.Split(pkgURL.Fragment, "=")
366 if len(digestInfo) == 1 {
367 // Ancient non PEP-0503 PyPIs, assume MD5
368 digestInfo = []string{"md5", digestInfo[0]}
369 } else if len(digestInfo) != 2 {
370 log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest")
371 http.Error(w, "invalid digest provided", http.StatusBadGateway)
375 digest, err = hex.DecodeString(digestInfo[1])
377 log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest")
378 http.Error(w, err.Error(), http.StatusBadGateway)
381 hashAlgo = digestInfo[0]
384 case HashAlgoBLAKE2b256:
385 hasherNew = blake2b256New
386 hashSize = blake2b.Size256
388 hasherNew = sha256.New
389 hashSize = sha256.Size
391 hasherNew = sha512.New
392 hashSize = sha512.Size
398 "error", r.RemoteAddr, "pypi",
399 filename, "unknown digest", hashAlgo,
401 http.Error(w, "unknown digest algorithm", http.StatusBadGateway)
404 if len(digest) != hashSize {
406 "error", r.RemoteAddr, "pypi",
407 filename, "invalid digest length")
408 http.Error(w, "invalid digest length", http.StatusBadGateway)
414 if pkgURL.Host == "" {
415 uri = PyPIURLParsed.ResolveReference(pkgURL).String()
417 uri = pkgURL.String()
419 mtime, mtimeExists := mtimes[filename]
424 path := filepath.Join(dirPath, filename)
425 if filename == filenameGet {
427 // Skip heavy remote call, when shutting down
428 http.Error(w, "shutting down", http.StatusInternalServerError)
431 log.Println(r.RemoteAddr, "pypi", filename, "download")
432 resp, err = c.Do(agentedReq(uri))
434 log.Println("error", r.RemoteAddr, "pypi", filename, "download", err)
435 http.Error(w, err.Error(), http.StatusBadGateway)
438 defer resp.Body.Close()
439 if resp.StatusCode != http.StatusOK {
441 "error", r.RemoteAddr,
442 "pypi", filename, "download",
443 "HTTP status:", resp.Status,
445 http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
448 hasher := hasherNew()
449 hasherBLAKE2b256 := blake2b256New()
450 hasherSHA256 := sha256.New()
451 dst, err := TempFile(dirPath)
453 log.Println("error", r.RemoteAddr, "pypi", filename, err)
454 http.Error(w, err.Error(), http.StatusInternalServerError)
457 dstBuf := bufio.NewWriter(dst)
458 wrs := []io.Writer{hasher, dstBuf}
459 if hashAlgo != HashAlgoBLAKE2b256 {
460 wrs = append(wrs, hasherBLAKE2b256)
462 if hashAlgo != HashAlgoSHA256 {
463 wrs = append(wrs, hasherSHA256)
465 wr := io.MultiWriter(wrs...)
466 if _, err = io.Copy(wr, resp.Body); err != nil {
467 os.Remove(dst.Name())
469 log.Println("error", r.RemoteAddr, "pypi", filename, err)
470 http.Error(w, err.Error(), http.StatusInternalServerError)
473 if err = dstBuf.Flush(); err != nil {
474 os.Remove(dst.Name())
476 log.Println("error", r.RemoteAddr, "pypi", filename, err)
477 http.Error(w, err.Error(), http.StatusInternalServerError)
480 if !bytes.Equal(hasher.Sum(nil), digest) {
481 log.Println(r.RemoteAddr, "pypi", filename, "digest mismatch")
482 os.Remove(dst.Name())
484 http.Error(w, "digest mismatch", http.StatusBadGateway)
487 if digestStored, err := os.ReadFile(path + "." + hashAlgo); err == nil &&
488 !bytes.Equal(digest, digestStored) {
489 err = errors.New("stored digest mismatch")
490 log.Println("error", r.RemoteAddr, "pypi", filename, err)
491 os.Remove(dst.Name())
493 http.Error(w, err.Error(), http.StatusInternalServerError)
497 if err = dst.Sync(); err != nil {
498 os.Remove(dst.Name())
500 log.Println("error", r.RemoteAddr, "pypi", filename, err)
501 http.Error(w, err.Error(), http.StatusInternalServerError)
505 if err = dst.Close(); err != nil {
506 log.Println("error", r.RemoteAddr, "pypi", filename, err)
507 http.Error(w, err.Error(), http.StatusInternalServerError)
510 if err = os.Chtimes(dst.Name(), mtime, mtime); err != nil {
511 log.Println("error", r.RemoteAddr, "pypi", filename, err)
512 http.Error(w, err.Error(), http.StatusInternalServerError)
514 if err = os.Rename(dst.Name(), path); err != nil {
515 log.Println("error", r.RemoteAddr, "pypi", filename, err)
516 http.Error(w, err.Error(), http.StatusInternalServerError)
519 if err = DirSync(dirPath); err != nil {
520 log.Println("error", r.RemoteAddr, "pypi", filename, err)
521 http.Error(w, err.Error(), http.StatusInternalServerError)
525 var digestBLAKE2b256 []byte
526 var digestSHA256 []byte
527 if hashAlgo == HashAlgoBLAKE2b256 {
528 digestBLAKE2b256 = hasher.Sum(nil)
530 digestBLAKE2b256 = hasherBLAKE2b256.Sum(nil)
532 if hashAlgo == HashAlgoSHA256 {
533 digestSHA256 = hasher.Sum(nil)
535 digestSHA256 = hasherSHA256.Sum(nil)
537 if err = WriteFileSync(
538 dirPath, path+"."+HashAlgoBLAKE2b256,
539 digestBLAKE2b256, mtime,
542 "error", r.RemoteAddr, "pypi",
543 path+"."+HashAlgoBLAKE2b256, err,
545 http.Error(w, err.Error(), http.StatusInternalServerError)
548 if err = WriteFileSync(
549 dirPath, path+"."+HashAlgoSHA256,
553 "error", r.RemoteAddr, "pypi",
554 path+"."+HashAlgoSHA256, err,
556 http.Error(w, err.Error(), http.StatusInternalServerError)
559 for _, algo := range KnownHashAlgos[2:] {
560 os.Remove(path + "." + algo)
565 stat, err := os.Stat(path)
566 if err == nil && !stat.ModTime().Truncate(time.Second).Equal(mtime) {
567 log.Println(r.RemoteAddr, "pypi", filename, "touch")
568 if err = os.Chtimes(path, mtime, mtime); err != nil {
569 log.Println("error", r.RemoteAddr, "pypi", filename, err)
570 http.Error(w, err.Error(), http.StatusInternalServerError)
578 path = path + "." + hashAlgo
579 stat, err := os.Stat(path)
580 if err == nil && (!mtimeExists ||
581 (mtimeExists && stat.ModTime().Truncate(time.Second).Equal(mtime))) {
584 if err != nil && !os.IsNotExist(err) {
585 log.Println("error", r.RemoteAddr, "pypi", path, err)
586 http.Error(w, err.Error(), http.StatusInternalServerError)
589 log.Println(r.RemoteAddr, "pypi", filename, "touch")
590 if err = WriteFileSync(dirPath, path, digest, mtime); err != nil {
591 log.Println("error", r.RemoteAddr, "pypi", path, err)
592 http.Error(w, err.Error(), http.StatusInternalServerError)