2 GoCheese -- Python private package repository and caching proxy
3 Copyright (C) 2019-2023 Sergey Matveev <stargrave@stargrave.org>
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, version 3 of the License.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
40 "go.cypherpunks.ru/recfile"
41 "golang.org/x/crypto/blake2b"
45 HashAlgoSHA256 = "sha256"
46 HashAlgoBLAKE2b256 = "blake2_256"
47 HashAlgoSHA512 = "sha512"
50 InternalFlag = ".internal"
54 PkgPyPI = regexp.MustCompile(`^.*<a href="([^"]+)"[^>]*>(.+)</a>.*$`)
55 PyPIURLParsed *url.URL
56 PyPIHTTPTransport http.Transport
57 KnownHashAlgos []string = []string{
65 func blake2b256New() hash.Hash {
66 h, err := blake2b.New256(nil)
73 func agentedReq(url string) *http.Request {
74 req, err := http.NewRequest("GET", url, nil)
78 req.Header.Set("User-Agent", UserAgent)
82 type RecFieldToValuesMap struct {
88 w http.ResponseWriter,
90 pkgName, filenameGet string,
93 if _, err := os.Stat(filepath.Join(Root, pkgName, InternalFlag)); err == nil {
96 c := http.Client{Transport: &PyPIHTTPTransport}
97 dirPath := filepath.Join(Root, pkgName)
100 var allReleases map[string][]*PkgReleaseInfo
102 resp, err := c.Do(agentedReq(*JSONURL + pkgName + "/json"))
104 log.Println("error", r.RemoteAddr, "refresh-json", pkgName, err)
105 http.Error(w, err.Error(), http.StatusBadGateway)
108 if resp.StatusCode != http.StatusOK {
111 "error", r.RemoteAddr, "refresh-json", pkgName,
112 "HTTP status:", resp.Status,
114 http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
117 body, err := io.ReadAll(resp.Body)
120 log.Println("error", r.RemoteAddr, "refresh-json", pkgName, err)
121 http.Error(w, "can not read body", http.StatusBadGateway)
126 var description string
127 wr := recfile.NewWriter(&buf)
129 err = json.Unmarshal(body, &meta)
131 for _, m := range [][2]string{
132 {MDFieldName, meta.Info.Name},
133 {MDFieldVersion, meta.Info.Version},
134 {MDFieldSummary, meta.Info.Summary},
135 {MDFieldDescriptionContentType, meta.Info.DescriptionContentType},
136 {MDFieldKeywords, meta.Info.Keywords},
137 {MDFieldHomePage, meta.Info.HomePage},
138 {MDFieldAuthor, meta.Info.Author},
139 {MDFieldAuthorEmail, meta.Info.AuthorEmail},
140 {MDFieldMaintainer, meta.Info.Maintainer},
141 {MDFieldMaintainerEmail, meta.Info.MaintainerEmail},
142 {MDFieldLicense, meta.Info.License},
143 {MDFieldRequiresPython, meta.Info.RequiresPython},
145 recField, jsonField := m[0], m[1]
149 if _, err = wr.WriteFields(recfile.Field{
150 Name: MDFieldToRecField[recField],
156 for _, m := range []RecFieldToValuesMap{
157 {MDFieldClassifier, meta.Info.Classifier},
158 {MDFieldPlatform, meta.Info.Platform},
159 {MDFieldSupportedPlatform, meta.Info.SupportedPlatform},
160 {MDFieldRequiresDist, meta.Info.RequiresDist},
161 {MDFieldRequiresExternal, meta.Info.RequiresExternal},
162 {MDFieldProjectURL, meta.Info.ProjectURL},
163 {MDFieldProvidesExtra, meta.Info.ProvidesExtra},
165 for _, v := range m.jsonFields {
166 if _, err = wr.WriteFields(recfile.Field{
167 Name: MDFieldToRecField[m.recField],
174 description = meta.Info.Description
175 allReleases = meta.Releases
177 var metaStripped PkgMetaStripped
178 err = json.Unmarshal(body, &metaStripped)
181 "error", r.RemoteAddr, "refresh-json", pkgName,
182 "can not parse JSON:", err,
184 http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
187 for _, m := range [][2]string{
188 {MDFieldName, metaStripped.Info.Name},
189 {MDFieldVersion, metaStripped.Info.Version},
190 {MDFieldSummary, metaStripped.Info.Summary},
191 {MDFieldDescriptionContentType, metaStripped.Info.DescriptionContentType},
192 {MDFieldKeywords, metaStripped.Info.Keywords},
193 {MDFieldHomePage, metaStripped.Info.HomePage},
194 {MDFieldAuthor, metaStripped.Info.Author},
195 {MDFieldAuthorEmail, metaStripped.Info.AuthorEmail},
196 {MDFieldMaintainer, metaStripped.Info.Maintainer},
197 {MDFieldMaintainerEmail, metaStripped.Info.MaintainerEmail},
198 {MDFieldLicense, metaStripped.Info.License},
199 {MDFieldRequiresPython, metaStripped.Info.RequiresPython},
201 recField, jsonField := m[0], m[1]
205 if _, err = wr.WriteFields(recfile.Field{
206 Name: MDFieldToRecField[recField],
213 for _, m := range []RecFieldToValuesMap{
214 {MDFieldClassifier, metaStripped.Info.Classifier},
215 {MDFieldRequiresDist, metaStripped.Info.RequiresDist},
217 for _, v := range m.jsonFields {
218 if _, err = wr.WriteFields(recfile.Field{
219 Name: MDFieldToRecField[m.recField],
226 description = metaStripped.Info.Description
227 allReleases = metaStripped.Releases
229 lines := strings.Split(description, "\n")
231 if _, err = wr.WriteFieldMultiline(
232 MDFieldDescription, lines,
238 if !mkdirForPkg(w, r, pkgName) {
241 path := filepath.Join(dirPath, MDFile)
242 existing, err := os.ReadFile(path)
243 if err != nil || !bytes.Equal(existing, buf.Bytes()) {
244 if err = WriteFileSync(dirPath, path, buf.Bytes(), now); err != nil {
245 log.Println("error", r.RemoteAddr, "refresh-json", path, err)
246 http.Error(w, err.Error(), http.StatusInternalServerError)
249 log.Println(r.RemoteAddr, "pypi", pkgName+"/"+MDFile, "touch")
252 mtimes := make(map[string]time.Time)
253 for _, releases := range allReleases {
254 for _, rel := range releases {
255 if rel.Filename == "" || rel.UploadTimeISO8601 == "" {
258 t, err := time.Parse(time.RFC3339Nano, rel.UploadTimeISO8601)
261 "error", r.RemoteAddr, "refresh-json", pkgName,
262 "can not parse upload_time:", err,
264 http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
267 mtimes[rel.Filename] = t.Truncate(time.Second)
271 resp, err := c.Do(agentedReq(*PyPIURL + pkgName + "/"))
273 log.Println("error", r.RemoteAddr, "refresh", pkgName, err)
274 http.Error(w, err.Error(), http.StatusBadGateway)
277 if resp.StatusCode != http.StatusOK {
280 "error", r.RemoteAddr, "refresh", pkgName,
281 "HTTP status:", resp.Status,
283 http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
286 body, err := io.ReadAll(resp.Body)
289 log.Println("error", r.RemoteAddr, "refresh", pkgName, err)
290 http.Error(w, err.Error(), http.StatusBadGateway)
293 if !mkdirForPkg(w, r, pkgName) {
296 for _, lineRaw := range bytes.Split(body, []byte("\n")) {
297 submatches := PkgPyPI.FindStringSubmatch(string(lineRaw))
298 if len(submatches) == 0 {
302 filename := submatches[2]
303 pkgURL, err := url.Parse(uri)
305 log.Println("error", r.RemoteAddr, "refresh", uri, err)
306 http.Error(w, err.Error(), http.StatusBadGateway)
310 if pkgURL.Fragment == "" {
311 log.Println(r.RemoteAddr, "pypi", filename, "no digest")
312 http.Error(w, "no digest provided", http.StatusBadGateway)
315 digestInfo := strings.Split(pkgURL.Fragment, "=")
316 if len(digestInfo) == 1 {
317 // Ancient non PEP-0503 PyPIs, assume MD5
318 digestInfo = []string{"md5", digestInfo[0]}
319 } else if len(digestInfo) != 2 {
320 log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest")
321 http.Error(w, "invalid digest provided", http.StatusBadGateway)
324 digest, err := hex.DecodeString(digestInfo[1])
326 log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest")
327 http.Error(w, err.Error(), http.StatusBadGateway)
330 hashAlgo := digestInfo[0]
331 var hasherNew func() hash.Hash
338 hasherNew = sha256.New
339 hashSize = sha256.Size
341 hasherNew = sha512.New
342 hashSize = sha512.Size
343 case HashAlgoBLAKE2b256:
344 hasherNew = blake2b256New
345 hashSize = blake2b.Size256
348 "error", r.RemoteAddr, "pypi",
349 filename, "unknown digest", hashAlgo,
351 http.Error(w, "unknown digest algorithm", http.StatusBadGateway)
354 if len(digest) != hashSize {
356 "error", r.RemoteAddr, "pypi",
357 filename, "invalid digest length")
358 http.Error(w, "invalid digest length", http.StatusBadGateway)
363 if pkgURL.Host == "" {
364 uri = PyPIURLParsed.ResolveReference(pkgURL).String()
366 uri = pkgURL.String()
368 mtime, mtimeExists := mtimes[filename]
373 path := filepath.Join(dirPath, filename)
374 if filename == filenameGet {
376 // Skip heavy remote call, when shutting down
377 http.Error(w, "shutting down", http.StatusInternalServerError)
380 log.Println(r.RemoteAddr, "pypi", filename, "download")
381 resp, err = c.Do(agentedReq(uri))
383 log.Println("error", r.RemoteAddr, "pypi", filename, "download", err)
384 http.Error(w, err.Error(), http.StatusBadGateway)
387 defer resp.Body.Close()
388 if resp.StatusCode != http.StatusOK {
390 "error", r.RemoteAddr,
391 "pypi", filename, "download",
392 "HTTP status:", resp.Status,
394 http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
397 hasher := hasherNew()
398 hasherSHA256 := sha256.New()
399 hasherBLAKE2b256 := blake2b256New()
400 dst, err := TempFile(dirPath)
402 log.Println("error", r.RemoteAddr, "pypi", filename, err)
403 http.Error(w, err.Error(), http.StatusInternalServerError)
406 dstBuf := bufio.NewWriter(dst)
407 wrs := []io.Writer{hasher, dstBuf}
408 if hashAlgo != HashAlgoSHA256 {
409 wrs = append(wrs, hasherSHA256)
411 if hashAlgo != HashAlgoBLAKE2b256 {
412 wrs = append(wrs, hasherBLAKE2b256)
414 wr := io.MultiWriter(wrs...)
415 if _, err = io.Copy(wr, resp.Body); err != nil {
416 os.Remove(dst.Name())
418 log.Println("error", r.RemoteAddr, "pypi", filename, err)
419 http.Error(w, err.Error(), http.StatusInternalServerError)
422 if err = dstBuf.Flush(); err != nil {
423 os.Remove(dst.Name())
425 log.Println("error", r.RemoteAddr, "pypi", filename, err)
426 http.Error(w, err.Error(), http.StatusInternalServerError)
429 if !bytes.Equal(hasher.Sum(nil), digest) {
430 log.Println(r.RemoteAddr, "pypi", filename, "digest mismatch")
431 os.Remove(dst.Name())
433 http.Error(w, "digest mismatch", http.StatusBadGateway)
436 if digestStored, err := os.ReadFile(path + "." + hashAlgo); err == nil &&
437 !bytes.Equal(digest, digestStored) {
438 err = errors.New("stored digest mismatch")
439 log.Println("error", r.RemoteAddr, "pypi", filename, err)
440 os.Remove(dst.Name())
442 http.Error(w, err.Error(), http.StatusInternalServerError)
446 if err = dst.Sync(); err != nil {
447 os.Remove(dst.Name())
449 log.Println("error", r.RemoteAddr, "pypi", filename, err)
450 http.Error(w, err.Error(), http.StatusInternalServerError)
454 if err = dst.Close(); err != nil {
455 log.Println("error", r.RemoteAddr, "pypi", filename, err)
456 http.Error(w, err.Error(), http.StatusInternalServerError)
459 if err = os.Chtimes(dst.Name(), mtime, mtime); err != nil {
460 log.Println("error", r.RemoteAddr, "pypi", filename, err)
461 http.Error(w, err.Error(), http.StatusInternalServerError)
463 if err = os.Rename(dst.Name(), path); err != nil {
464 log.Println("error", r.RemoteAddr, "pypi", filename, err)
465 http.Error(w, err.Error(), http.StatusInternalServerError)
468 if err = DirSync(dirPath); err != nil {
469 log.Println("error", r.RemoteAddr, "pypi", filename, err)
470 http.Error(w, err.Error(), http.StatusInternalServerError)
474 var digestSHA256 []byte
475 var digestBLAKE2b256 []byte
476 if hashAlgo == HashAlgoSHA256 {
477 digestSHA256 = hasher.Sum(nil)
479 digestSHA256 = hasherSHA256.Sum(nil)
481 if hashAlgo == HashAlgoBLAKE2b256 {
482 digestBLAKE2b256 = hasher.Sum(nil)
484 digestBLAKE2b256 = hasherBLAKE2b256.Sum(nil)
486 if err = WriteFileSync(
487 dirPath, path+"."+HashAlgoSHA256,
491 "error", r.RemoteAddr, "pypi",
492 path+"."+HashAlgoSHA256, err,
494 http.Error(w, err.Error(), http.StatusInternalServerError)
497 if err = WriteFileSync(
498 dirPath, path+"."+HashAlgoBLAKE2b256,
499 digestBLAKE2b256, mtime,
502 "error", r.RemoteAddr, "pypi",
503 path+"."+HashAlgoBLAKE2b256, err,
505 http.Error(w, err.Error(), http.StatusInternalServerError)
508 for _, algo := range KnownHashAlgos[2:] {
509 os.Remove(path + "." + algo)
514 stat, err := os.Stat(path)
515 if err == nil && !stat.ModTime().Truncate(time.Second).Equal(mtime) {
516 log.Println(r.RemoteAddr, "pypi", filename, "touch")
517 if err = os.Chtimes(path, mtime, mtime); err != nil {
518 log.Println("error", r.RemoteAddr, "pypi", filename, err)
519 http.Error(w, err.Error(), http.StatusInternalServerError)
524 if filename == filenameGet || gpgUpdate {
525 resp, err := c.Do(agentedReq(uri + GPGSigExt))
529 if resp.StatusCode != http.StatusOK {
533 sig, err := io.ReadAll(resp.Body)
538 if !bytes.HasPrefix(sig, []byte("-----BEGIN PGP SIGNATURE-----")) {
539 log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "non PGP")
542 if err = WriteFileSync(dirPath, path+GPGSigExt, sig, mtime); err != nil {
543 log.Println("error", r.RemoteAddr, "pypi", filename+GPGSigExt, err)
544 http.Error(w, err.Error(), http.StatusInternalServerError)
547 log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "downloaded")
550 stat, err := os.Stat(path + GPGSigExt)
551 if err == nil && !stat.ModTime().Truncate(time.Second).Equal(mtime) {
552 log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "touch")
553 if err = os.Chtimes(path+GPGSigExt, mtime, mtime); err != nil {
554 log.Println("error", r.RemoteAddr, "pypi", filename, err)
555 http.Error(w, err.Error(), http.StatusInternalServerError)
564 path = path + "." + hashAlgo
565 stat, err := os.Stat(path)
566 if err == nil && (!mtimeExists ||
567 (mtimeExists && stat.ModTime().Truncate(time.Second).Equal(mtime))) {
570 if err != nil && !os.IsNotExist(err) {
571 log.Println("error", r.RemoteAddr, "pypi", path, err)
572 http.Error(w, err.Error(), http.StatusInternalServerError)
575 log.Println(r.RemoteAddr, "pypi", filename, "touch")
576 if err = WriteFileSync(dirPath, path, digest, mtime); err != nil {
577 log.Println("error", r.RemoteAddr, "pypi", path, err)
578 http.Error(w, err.Error(), http.StatusInternalServerError)