1 // GoCheese -- Python private package repository and caching proxy
2 // Copyright (C) 2019-2024 Sergey Matveev <stargrave@stargrave.org>
4 // This program is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation, version 3 of the License.
8 // This program is distributed in the hope that it will be useful,
9 // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 // GNU General Public License for more details.
13 // You should have received a copy of the GNU General Public License
14 // along with this program. If not, see <http://www.gnu.org/licenses/>.
38 "go.cypherpunks.ru/recfile"
39 "golang.org/x/crypto/blake2b"
43 HashAlgoBLAKE2b256 = "blake2b_256"
44 HashAlgoSHA256 = "sha256"
45 HashAlgoSHA512 = "sha512"
47 InternalFlag = ".internal"
51 PkgPyPI = regexp.MustCompile(`^.*<a href="([^"]+)"[^>]*>(.+)</a>.*$`)
52 PyPIURLParsed *url.URL
53 PyPIHTTPTransport http.Transport
54 KnownHashAlgos []string = []string{
62 func blake2b256New() hash.Hash {
63 h, err := blake2b.New256(nil)
70 func agentedReq(url string) *http.Request {
71 req, err := http.NewRequest("GET", url, nil)
75 req.Header.Set("User-Agent", UserAgent)
79 type RecFieldToValuesMap struct {
85 w http.ResponseWriter,
87 pkgName, filenameGet string,
89 if _, err := os.Stat(filepath.Join(Root, pkgName, InternalFlag)); err == nil {
92 c := http.Client{Transport: &PyPIHTTPTransport}
93 dirPath := filepath.Join(Root, pkgName)
96 var allReleases map[string][]*PkgReleaseInfo
98 resp, err := c.Do(agentedReq(*JSONURL + pkgName + "/json"))
100 log.Println("error", r.RemoteAddr, "refresh-json", pkgName, err)
101 http.Error(w, err.Error(), http.StatusBadGateway)
104 if resp.StatusCode != http.StatusOK {
107 "error", r.RemoteAddr, "refresh-json", pkgName,
108 "HTTP status:", resp.Status,
110 http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
113 body, err := io.ReadAll(resp.Body)
116 log.Println("error", r.RemoteAddr, "refresh-json", pkgName, err)
117 http.Error(w, "can not read body", http.StatusBadGateway)
122 var description string
123 wr := recfile.NewWriter(&buf)
125 err = json.Unmarshal(body, &meta)
127 for _, m := range [][2]string{
128 {MDFieldName, meta.Info.Name},
129 {MDFieldVersion, meta.Info.Version},
130 {MDFieldSummary, meta.Info.Summary},
131 {MDFieldDescriptionContentType, meta.Info.DescriptionContentType},
132 {MDFieldKeywords, meta.Info.Keywords},
133 {MDFieldHomePage, meta.Info.HomePage},
134 {MDFieldAuthor, meta.Info.Author},
135 {MDFieldAuthorEmail, meta.Info.AuthorEmail},
136 {MDFieldMaintainer, meta.Info.Maintainer},
137 {MDFieldMaintainerEmail, meta.Info.MaintainerEmail},
138 {MDFieldLicense, meta.Info.License},
139 {MDFieldRequiresPython, meta.Info.RequiresPython},
141 recField, jsonField := m[0], m[1]
145 if _, err = wr.WriteFields(recfile.Field{
146 Name: MDFieldToRecField[recField],
152 for _, m := range []RecFieldToValuesMap{
153 {MDFieldClassifier, meta.Info.Classifier},
154 {MDFieldPlatform, meta.Info.Platform},
155 {MDFieldSupportedPlatform, meta.Info.SupportedPlatform},
156 {MDFieldRequiresDist, meta.Info.RequiresDist},
157 {MDFieldRequiresExternal, meta.Info.RequiresExternal},
158 {MDFieldProjectURL, meta.Info.ProjectURL},
159 {MDFieldProvidesExtra, meta.Info.ProvidesExtra},
161 for _, v := range m.jsonFields {
162 if _, err = wr.WriteFields(recfile.Field{
163 Name: MDFieldToRecField[m.recField],
170 description = meta.Info.Description
171 allReleases = meta.Releases
173 var metaStripped PkgMetaStripped
174 err = json.Unmarshal(body, &metaStripped)
177 "error", r.RemoteAddr, "refresh-json", pkgName,
178 "can not parse JSON:", err,
180 http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
183 for _, m := range [][2]string{
184 {MDFieldName, metaStripped.Info.Name},
185 {MDFieldVersion, metaStripped.Info.Version},
186 {MDFieldSummary, metaStripped.Info.Summary},
187 {MDFieldDescriptionContentType, metaStripped.Info.DescriptionContentType},
188 {MDFieldKeywords, metaStripped.Info.Keywords},
189 {MDFieldHomePage, metaStripped.Info.HomePage},
190 {MDFieldAuthor, metaStripped.Info.Author},
191 {MDFieldAuthorEmail, metaStripped.Info.AuthorEmail},
192 {MDFieldMaintainer, metaStripped.Info.Maintainer},
193 {MDFieldMaintainerEmail, metaStripped.Info.MaintainerEmail},
194 {MDFieldLicense, metaStripped.Info.License},
195 {MDFieldRequiresPython, metaStripped.Info.RequiresPython},
197 recField, jsonField := m[0], m[1]
201 if _, err = wr.WriteFields(recfile.Field{
202 Name: MDFieldToRecField[recField],
209 for _, m := range []RecFieldToValuesMap{
210 {MDFieldClassifier, metaStripped.Info.Classifier},
211 {MDFieldRequiresDist, metaStripped.Info.RequiresDist},
213 for _, v := range m.jsonFields {
214 if _, err = wr.WriteFields(recfile.Field{
215 Name: MDFieldToRecField[m.recField],
222 description = metaStripped.Info.Description
223 allReleases = metaStripped.Releases
225 lines := strings.Split(description, "\n")
227 if _, err = wr.WriteFieldMultiline(
228 MDFieldDescription, lines,
234 if !mkdirForPkg(w, r, pkgName) {
237 path := filepath.Join(dirPath, MDFile)
238 existing, err := os.ReadFile(path)
239 if err != nil || !bytes.Equal(existing, buf.Bytes()) {
240 if err = WriteFileSync(dirPath, path, buf.Bytes(), now); err != nil {
241 log.Println("error", r.RemoteAddr, "refresh-json", path, err)
242 http.Error(w, err.Error(), http.StatusInternalServerError)
245 log.Println(r.RemoteAddr, "pypi", pkgName+"/"+MDFile, "touch")
248 mtimes := make(map[string]time.Time)
249 digestsBLAKE2b256 := make(map[string][]byte)
250 digestsSHA256 := make(map[string][]byte)
251 digestsSHA512 := make(map[string][]byte)
252 for _, releases := range allReleases {
253 for _, rel := range releases {
254 if rel.Filename == "" || rel.UploadTimeISO8601 == "" {
257 t, err := time.Parse(time.RFC3339Nano, rel.UploadTimeISO8601)
260 "error", r.RemoteAddr, "refresh-json", pkgName,
261 "can not parse upload_time:", err,
263 http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
266 mtimes[rel.Filename] = t.Truncate(time.Second)
267 if d := rel.Digests[HashAlgoBLAKE2b256]; d != "" {
268 digestsBLAKE2b256[rel.Filename], err = hex.DecodeString(d)
271 "error", r.RemoteAddr, "refresh-json", pkgName,
272 "can not decode blake2b_256 digest:", err,
274 http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
278 if d := rel.Digests[HashAlgoSHA256]; d != "" {
279 digestsSHA256[rel.Filename], err = hex.DecodeString(d)
282 "error", r.RemoteAddr, "refresh-json", pkgName,
283 "can not decode sha256 digest:", err,
285 http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
289 if d := rel.Digests[HashAlgoSHA512]; d != "" {
290 digestsSHA512[rel.Filename], err = hex.DecodeString(d)
293 "error", r.RemoteAddr, "refresh-json", pkgName,
294 "can not decode sha512 digest:", err,
296 http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
303 resp, err := c.Do(agentedReq(*PyPIURL + pkgName + "/"))
305 log.Println("error", r.RemoteAddr, "refresh", pkgName, err)
306 http.Error(w, err.Error(), http.StatusBadGateway)
309 if resp.StatusCode != http.StatusOK {
312 "error", r.RemoteAddr, "refresh", pkgName,
313 "HTTP status:", resp.Status,
315 http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
318 body, err := io.ReadAll(resp.Body)
321 log.Println("error", r.RemoteAddr, "refresh", pkgName, err)
322 http.Error(w, err.Error(), http.StatusBadGateway)
325 if !mkdirForPkg(w, r, pkgName) {
328 for _, lineRaw := range bytes.Split(body, []byte("\n")) {
329 submatches := PkgPyPI.FindStringSubmatch(string(lineRaw))
330 if len(submatches) == 0 {
334 filename := submatches[2]
335 pkgURL, err := url.Parse(uri)
337 log.Println("error", r.RemoteAddr, "refresh", uri, err)
338 http.Error(w, err.Error(), http.StatusBadGateway)
343 var hasherNew func() hash.Hash
345 if d := digestsBLAKE2b256[filename]; d != nil {
346 hasherNew = blake2b256New
347 hashAlgo = HashAlgoBLAKE2b256
349 } else if d := digestsSHA256[filename]; d != nil {
350 hasherNew = sha256.New
351 hashAlgo = HashAlgoSHA256
353 } else if d := digestsSHA512[filename]; d != nil {
354 hasherNew = sha512.New
355 hashAlgo = HashAlgoSHA512
358 if pkgURL.Fragment == "" {
359 log.Println(r.RemoteAddr, "pypi", filename, "no digest")
360 http.Error(w, "no digest provided", http.StatusBadGateway)
363 digestInfo := strings.Split(pkgURL.Fragment, "=")
364 if len(digestInfo) == 1 {
365 // Ancient non PEP-0503 PyPIs, assume MD5
366 digestInfo = []string{"md5", digestInfo[0]}
367 } else if len(digestInfo) != 2 {
368 log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest")
369 http.Error(w, "invalid digest provided", http.StatusBadGateway)
373 digest, err = hex.DecodeString(digestInfo[1])
375 log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest")
376 http.Error(w, err.Error(), http.StatusBadGateway)
379 hashAlgo = digestInfo[0]
382 case HashAlgoBLAKE2b256:
383 hasherNew = blake2b256New
384 hashSize = blake2b.Size256
386 hasherNew = sha256.New
387 hashSize = sha256.Size
389 hasherNew = sha512.New
390 hashSize = sha512.Size
396 "error", r.RemoteAddr, "pypi",
397 filename, "unknown digest", hashAlgo,
399 http.Error(w, "unknown digest algorithm", http.StatusBadGateway)
402 if len(digest) != hashSize {
404 "error", r.RemoteAddr, "pypi",
405 filename, "invalid digest length")
406 http.Error(w, "invalid digest length", http.StatusBadGateway)
412 if pkgURL.Host == "" {
413 uri = PyPIURLParsed.ResolveReference(pkgURL).String()
415 uri = pkgURL.String()
417 mtime, mtimeExists := mtimes[filename]
422 path := filepath.Join(dirPath, filename)
423 if filename == filenameGet {
425 // Skip heavy remote call, when shutting down
426 http.Error(w, "shutting down", http.StatusInternalServerError)
429 log.Println(r.RemoteAddr, "pypi", filename, "download")
430 resp, err = c.Do(agentedReq(uri))
432 log.Println("error", r.RemoteAddr, "pypi", filename, "download", err)
433 http.Error(w, err.Error(), http.StatusBadGateway)
436 defer resp.Body.Close()
437 if resp.StatusCode != http.StatusOK {
439 "error", r.RemoteAddr,
440 "pypi", filename, "download",
441 "HTTP status:", resp.Status,
443 http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
446 hasher := hasherNew()
447 hasherBLAKE2b256 := blake2b256New()
448 hasherSHA256 := sha256.New()
449 dst, err := TempFile(dirPath)
451 log.Println("error", r.RemoteAddr, "pypi", filename, err)
452 http.Error(w, err.Error(), http.StatusInternalServerError)
455 dstBuf := bufio.NewWriter(dst)
456 wrs := []io.Writer{hasher, dstBuf}
457 if hashAlgo != HashAlgoBLAKE2b256 {
458 wrs = append(wrs, hasherBLAKE2b256)
460 if hashAlgo != HashAlgoSHA256 {
461 wrs = append(wrs, hasherSHA256)
463 wr := io.MultiWriter(wrs...)
464 if _, err = io.Copy(wr, resp.Body); err != nil {
465 os.Remove(dst.Name())
467 log.Println("error", r.RemoteAddr, "pypi", filename, err)
468 http.Error(w, err.Error(), http.StatusInternalServerError)
471 if err = dstBuf.Flush(); err != nil {
472 os.Remove(dst.Name())
474 log.Println("error", r.RemoteAddr, "pypi", filename, err)
475 http.Error(w, err.Error(), http.StatusInternalServerError)
478 if !bytes.Equal(hasher.Sum(nil), digest) {
479 log.Println(r.RemoteAddr, "pypi", filename, "digest mismatch")
480 os.Remove(dst.Name())
482 http.Error(w, "digest mismatch", http.StatusBadGateway)
485 if digestStored, err := os.ReadFile(path + "." + hashAlgo); err == nil &&
486 !bytes.Equal(digest, digestStored) {
487 err = errors.New("stored digest mismatch")
488 log.Println("error", r.RemoteAddr, "pypi", filename, err)
489 os.Remove(dst.Name())
491 http.Error(w, err.Error(), http.StatusInternalServerError)
495 if err = dst.Sync(); err != nil {
496 os.Remove(dst.Name())
498 log.Println("error", r.RemoteAddr, "pypi", filename, err)
499 http.Error(w, err.Error(), http.StatusInternalServerError)
503 if err = dst.Close(); err != nil {
504 log.Println("error", r.RemoteAddr, "pypi", filename, err)
505 http.Error(w, err.Error(), http.StatusInternalServerError)
508 if err = os.Chtimes(dst.Name(), mtime, mtime); err != nil {
509 log.Println("error", r.RemoteAddr, "pypi", filename, err)
510 http.Error(w, err.Error(), http.StatusInternalServerError)
512 if err = os.Rename(dst.Name(), path); err != nil {
513 log.Println("error", r.RemoteAddr, "pypi", filename, err)
514 http.Error(w, err.Error(), http.StatusInternalServerError)
517 if err = DirSync(dirPath); err != nil {
518 log.Println("error", r.RemoteAddr, "pypi", filename, err)
519 http.Error(w, err.Error(), http.StatusInternalServerError)
523 var digestBLAKE2b256 []byte
524 var digestSHA256 []byte
525 if hashAlgo == HashAlgoBLAKE2b256 {
526 digestBLAKE2b256 = hasher.Sum(nil)
528 digestBLAKE2b256 = hasherBLAKE2b256.Sum(nil)
530 if hashAlgo == HashAlgoSHA256 {
531 digestSHA256 = hasher.Sum(nil)
533 digestSHA256 = hasherSHA256.Sum(nil)
535 if err = WriteFileSync(
536 dirPath, path+"."+HashAlgoBLAKE2b256,
537 digestBLAKE2b256, mtime,
540 "error", r.RemoteAddr, "pypi",
541 path+"."+HashAlgoBLAKE2b256, err,
543 http.Error(w, err.Error(), http.StatusInternalServerError)
546 if err = WriteFileSync(
547 dirPath, path+"."+HashAlgoSHA256,
551 "error", r.RemoteAddr, "pypi",
552 path+"."+HashAlgoSHA256, err,
554 http.Error(w, err.Error(), http.StatusInternalServerError)
557 for _, algo := range KnownHashAlgos[2:] {
558 os.Remove(path + "." + algo)
563 stat, err := os.Stat(path)
564 if err == nil && !stat.ModTime().Truncate(time.Second).Equal(mtime) {
565 log.Println(r.RemoteAddr, "pypi", filename, "touch")
566 if err = os.Chtimes(path, mtime, mtime); err != nil {
567 log.Println("error", r.RemoteAddr, "pypi", filename, err)
568 http.Error(w, err.Error(), http.StatusInternalServerError)
576 path = path + "." + hashAlgo
577 stat, err := os.Stat(path)
578 if err == nil && (!mtimeExists ||
579 (mtimeExists && stat.ModTime().Truncate(time.Second).Equal(mtime))) {
582 if err != nil && !os.IsNotExist(err) {
583 log.Println("error", r.RemoteAddr, "pypi", path, err)
584 http.Error(w, err.Error(), http.StatusInternalServerError)
587 log.Println(r.RemoteAddr, "pypi", filename, "touch")
588 if err = WriteFileSync(dirPath, path, digest, mtime); err != nil {
589 log.Println("error", r.RemoteAddr, "pypi", path, err)
590 http.Error(w, err.Error(), http.StatusInternalServerError)