2 GoCheese -- Python private package repository and caching proxy
3 Copyright (C) 2019-2021 Sergey Matveev <stargrave@stargrave.org>
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, version 3 of the License.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
40 "go.cypherpunks.ru/recfile"
41 "golang.org/x/crypto/blake2b"
45 HashAlgoSHA256 = "sha256"
46 HashAlgoBLAKE2b256 = "blake2_256"
47 HashAlgoSHA512 = "sha512"
50 InternalFlag = ".internal"
54 PkgPyPI = regexp.MustCompile(`^.*<a href="([^"]+)"[^>]*>(.+)</a>.*$`)
55 PyPIURLParsed *url.URL
56 PyPIHTTPTransport http.Transport
57 KnownHashAlgos []string = []string{
65 func blake2b256New() hash.Hash {
66 h, err := blake2b.New256(nil)
73 func agentedReq(url string) *http.Request {
74 req, err := http.NewRequest("GET", url, nil)
78 req.Header.Set("User-Agent", UserAgent)
82 type RecFieldToValuesMap struct {
88 w http.ResponseWriter,
90 pkgName, filenameGet string,
93 if _, err := os.Stat(filepath.Join(Root, pkgName, InternalFlag)); err == nil {
96 c := http.Client{Transport: &PyPIHTTPTransport}
97 dirPath := filepath.Join(Root, pkgName)
100 var allReleases map[string][]*PkgReleaseInfo
102 resp, err := c.Do(agentedReq(*JSONURL + pkgName + "/json"))
104 log.Println("error", r.RemoteAddr, "refresh-json", pkgName, err)
105 http.Error(w, err.Error(), http.StatusBadGateway)
108 if resp.StatusCode != http.StatusOK {
111 "error", r.RemoteAddr, "refresh-json", pkgName,
112 "HTTP status:", resp.Status,
114 http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
117 body, err := ioutil.ReadAll(resp.Body)
120 var description string
121 wr := recfile.NewWriter(&buf)
123 err = json.Unmarshal(body, &meta)
125 for _, m := range [][2]string{
126 {MetadataFieldName, meta.Info.Name},
127 {MetadataFieldVersion, meta.Info.Version},
128 {MetadataFieldSummary, meta.Info.Summary},
129 {MetadataFieldDescriptionContentType, meta.Info.DescriptionContentType},
130 {MetadataFieldKeywords, meta.Info.Keywords},
131 {MetadataFieldHomePage, meta.Info.HomePage},
132 {MetadataFieldAuthor, meta.Info.Author},
133 {MetadataFieldAuthorEmail, meta.Info.AuthorEmail},
134 {MetadataFieldMaintainer, meta.Info.Maintainer},
135 {MetadataFieldMaintainerEmail, meta.Info.MaintainerEmail},
136 {MetadataFieldLicense, meta.Info.License},
137 {MetadataFieldRequiresPython, meta.Info.RequiresPython},
139 recField, jsonField := m[0], m[1]
143 if _, err = wr.WriteFields(recfile.Field{
144 Name: metadataFieldToRecField(recField),
150 for _, m := range []RecFieldToValuesMap{
151 {MetadataFieldClassifier, meta.Info.Classifier},
152 {MetadataFieldPlatform, meta.Info.Platform},
153 {MetadataFieldSupportedPlatform, meta.Info.SupportedPlatform},
154 {MetadataFieldRequiresDist, meta.Info.RequiresDist},
155 {MetadataFieldRequiresExternal, meta.Info.RequiresExternal},
156 {MetadataFieldProjectURL, meta.Info.ProjectURL},
157 {MetadataFieldProvidesExtra, meta.Info.ProvidesExtra},
159 for _, v := range m.jsonFields {
160 if _, err = wr.WriteFields(recfile.Field{
161 Name: metadataFieldToRecField(m.recField),
168 description = meta.Info.Description
169 allReleases = meta.Releases
171 var metaStripped PkgMetaStripped
172 err = json.Unmarshal(body, &metaStripped)
175 "error", r.RemoteAddr, "refresh-json", pkgName,
176 "can not parse JSON:", err,
178 http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
181 for _, m := range [][2]string{
182 {MetadataFieldName, metaStripped.Info.Name},
183 {MetadataFieldVersion, metaStripped.Info.Version},
184 {MetadataFieldSummary, metaStripped.Info.Summary},
185 {MetadataFieldDescriptionContentType, metaStripped.Info.DescriptionContentType},
186 {MetadataFieldKeywords, metaStripped.Info.Keywords},
187 {MetadataFieldHomePage, metaStripped.Info.HomePage},
188 {MetadataFieldAuthor, metaStripped.Info.Author},
189 {MetadataFieldAuthorEmail, metaStripped.Info.AuthorEmail},
190 {MetadataFieldMaintainer, metaStripped.Info.Maintainer},
191 {MetadataFieldMaintainerEmail, metaStripped.Info.MaintainerEmail},
192 {MetadataFieldLicense, metaStripped.Info.License},
193 {MetadataFieldRequiresPython, metaStripped.Info.RequiresPython},
195 recField, jsonField := m[0], m[1]
199 if _, err = wr.WriteFields(recfile.Field{
200 Name: metadataFieldToRecField(recField),
207 for _, m := range []RecFieldToValuesMap{
208 {MetadataFieldClassifier, metaStripped.Info.Classifier},
209 {MetadataFieldRequiresDist, metaStripped.Info.RequiresDist},
211 for _, v := range m.jsonFields {
212 if _, err = wr.WriteFields(recfile.Field{
213 Name: metadataFieldToRecField(m.recField),
220 description = metaStripped.Info.Description
221 allReleases = metaStripped.Releases
223 lines := strings.Split(description, "\n")
225 if _, err = wr.WriteFieldMultiline(
226 MetadataFieldDescription, lines,
232 if !mkdirForPkg(w, r, pkgName) {
235 path := filepath.Join(dirPath, MetadataFile)
236 existing, err := ioutil.ReadFile(path)
237 if err != nil || bytes.Compare(existing, buf.Bytes()) != 0 {
238 if err = WriteFileSync(dirPath, path, buf.Bytes(), now); err != nil {
239 log.Println("error", r.RemoteAddr, "refresh-json", path, err)
240 http.Error(w, err.Error(), http.StatusInternalServerError)
243 log.Println(r.RemoteAddr, "pypi", pkgName+"/"+MetadataFile, "touch")
246 mtimes := make(map[string]time.Time)
247 for _, releases := range allReleases {
248 for _, rel := range releases {
249 if rel.Filename == "" || rel.UploadTimeISO8601 == "" {
252 t, err := time.Parse(time.RFC3339Nano, rel.UploadTimeISO8601)
255 "error", r.RemoteAddr, "refresh-json", pkgName,
256 "can not parse upload_time:", err,
258 http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
261 mtimes[rel.Filename] = t.Truncate(time.Second)
265 resp, err := c.Do(agentedReq(*PyPIURL + pkgName + "/"))
267 log.Println("error", r.RemoteAddr, "refresh", pkgName, err)
268 http.Error(w, err.Error(), http.StatusBadGateway)
271 if resp.StatusCode != http.StatusOK {
274 "error", r.RemoteAddr, "refresh", pkgName,
275 "HTTP status:", resp.Status,
277 http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
280 body, err := ioutil.ReadAll(resp.Body)
283 log.Println("error", r.RemoteAddr, "refresh", pkgName, err)
284 http.Error(w, err.Error(), http.StatusBadGateway)
287 if !mkdirForPkg(w, r, pkgName) {
290 for _, lineRaw := range bytes.Split(body, []byte("\n")) {
291 submatches := PkgPyPI.FindStringSubmatch(string(lineRaw))
292 if len(submatches) == 0 {
296 filename := submatches[2]
297 pkgURL, err := url.Parse(uri)
299 log.Println("error", r.RemoteAddr, "refresh", uri, err)
300 http.Error(w, err.Error(), http.StatusBadGateway)
304 if pkgURL.Fragment == "" {
305 log.Println(r.RemoteAddr, "pypi", filename, "no digest")
306 http.Error(w, "no digest provided", http.StatusBadGateway)
309 digestInfo := strings.Split(pkgURL.Fragment, "=")
310 if len(digestInfo) == 1 {
311 // Ancient non PEP-0503 PyPIs, assume MD5
312 digestInfo = []string{"md5", digestInfo[0]}
313 } else if len(digestInfo) != 2 {
314 log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest")
315 http.Error(w, "invalid digest provided", http.StatusBadGateway)
318 digest, err := hex.DecodeString(digestInfo[1])
320 log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest")
321 http.Error(w, err.Error(), http.StatusBadGateway)
324 hashAlgo := digestInfo[0]
325 var hasherNew func() hash.Hash
332 hasherNew = sha256.New
333 hashSize = sha256.Size
335 hasherNew = sha512.New
336 hashSize = sha512.Size
337 case HashAlgoBLAKE2b256:
338 hasherNew = blake2b256New
339 hashSize = blake2b.Size256
342 "error", r.RemoteAddr, "pypi",
343 filename, "unknown digest", hashAlgo,
345 http.Error(w, "unknown digest algorithm", http.StatusBadGateway)
348 if len(digest) != hashSize {
350 "error", r.RemoteAddr, "pypi",
351 filename, "invalid digest length")
352 http.Error(w, "invalid digest length", http.StatusBadGateway)
357 if pkgURL.Host == "" {
358 uri = PyPIURLParsed.ResolveReference(pkgURL).String()
360 uri = pkgURL.String()
362 mtime, mtimeExists := mtimes[filename]
367 path := filepath.Join(dirPath, filename)
368 if filename == filenameGet {
370 // Skip heavy remote call, when shutting down
371 http.Error(w, "shutting down", http.StatusInternalServerError)
374 log.Println(r.RemoteAddr, "pypi", filename, "download")
375 resp, err = c.Do(agentedReq(uri))
377 log.Println("error", r.RemoteAddr, "pypi", filename, "download", err)
378 http.Error(w, err.Error(), http.StatusBadGateway)
381 defer resp.Body.Close()
382 if resp.StatusCode != http.StatusOK {
384 "error", r.RemoteAddr,
385 "pypi", filename, "download",
386 "HTTP status:", resp.Status,
388 http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
391 hasher := hasherNew()
392 hasherSHA256 := sha256.New()
393 hasherBLAKE2b256 := blake2b256New()
394 dst, err := TempFile(dirPath)
396 log.Println("error", r.RemoteAddr, "pypi", filename, err)
397 http.Error(w, err.Error(), http.StatusInternalServerError)
400 dstBuf := bufio.NewWriter(dst)
401 wrs := []io.Writer{hasher, dstBuf}
402 if hashAlgo != HashAlgoSHA256 {
403 wrs = append(wrs, hasherSHA256)
405 if hashAlgo != HashAlgoBLAKE2b256 {
406 wrs = append(wrs, hasherBLAKE2b256)
408 wr := io.MultiWriter(wrs...)
409 if _, err = io.Copy(wr, resp.Body); err != nil {
410 os.Remove(dst.Name())
412 log.Println("error", r.RemoteAddr, "pypi", filename, err)
413 http.Error(w, err.Error(), http.StatusInternalServerError)
416 if err = dstBuf.Flush(); err != nil {
417 os.Remove(dst.Name())
419 log.Println("error", r.RemoteAddr, "pypi", filename, err)
420 http.Error(w, err.Error(), http.StatusInternalServerError)
423 if bytes.Compare(hasher.Sum(nil), digest) != 0 {
424 log.Println(r.RemoteAddr, "pypi", filename, "digest mismatch")
425 os.Remove(dst.Name())
427 http.Error(w, "digest mismatch", http.StatusBadGateway)
431 if err = dst.Sync(); err != nil {
432 os.Remove(dst.Name())
434 log.Println("error", r.RemoteAddr, "pypi", filename, err)
435 http.Error(w, err.Error(), http.StatusInternalServerError)
439 if err = dst.Close(); err != nil {
440 log.Println("error", r.RemoteAddr, "pypi", filename, err)
441 http.Error(w, err.Error(), http.StatusInternalServerError)
444 if err = os.Chtimes(dst.Name(), mtime, mtime); err != nil {
445 log.Println("error", r.RemoteAddr, "pypi", filename, err)
446 http.Error(w, err.Error(), http.StatusInternalServerError)
448 if err = os.Rename(dst.Name(), path); err != nil {
449 log.Println("error", r.RemoteAddr, "pypi", filename, err)
450 http.Error(w, err.Error(), http.StatusInternalServerError)
453 if err = DirSync(dirPath); err != nil {
454 log.Println("error", r.RemoteAddr, "pypi", filename, err)
455 http.Error(w, err.Error(), http.StatusInternalServerError)
459 var digestSHA256 []byte
460 var digestBLAKE2b256 []byte
461 if hashAlgo == HashAlgoSHA256 {
462 digestSHA256 = hasher.Sum(nil)
464 digestSHA256 = hasherSHA256.Sum(nil)
466 if hashAlgo == HashAlgoBLAKE2b256 {
467 digestBLAKE2b256 = hasher.Sum(nil)
469 digestBLAKE2b256 = hasherBLAKE2b256.Sum(nil)
471 if err = WriteFileSync(
472 dirPath, path+"."+HashAlgoSHA256,
476 "error", r.RemoteAddr, "pypi",
477 path+"."+HashAlgoSHA256, err,
479 http.Error(w, err.Error(), http.StatusInternalServerError)
482 if err = WriteFileSync(
483 dirPath, path+"."+HashAlgoBLAKE2b256,
484 digestBLAKE2b256, mtime,
487 "error", r.RemoteAddr, "pypi",
488 path+"."+HashAlgoBLAKE2b256, err,
490 http.Error(w, err.Error(), http.StatusInternalServerError)
493 for _, algo := range KnownHashAlgos[2:] {
494 os.Remove(path + "." + algo)
499 stat, err := os.Stat(path)
500 if err == nil && !stat.ModTime().Truncate(time.Second).Equal(mtime) {
501 log.Println(r.RemoteAddr, "pypi", filename, "touch")
502 if err = os.Chtimes(path, mtime, mtime); err != nil {
503 log.Println("error", r.RemoteAddr, "pypi", filename, err)
504 http.Error(w, err.Error(), http.StatusInternalServerError)
509 if filename == filenameGet || gpgUpdate {
510 if _, err = os.Stat(path); err != nil {
513 resp, err := c.Do(agentedReq(uri + GPGSigExt))
517 if resp.StatusCode != http.StatusOK {
521 sig, err := ioutil.ReadAll(resp.Body)
526 if !bytes.HasPrefix(sig, []byte("-----BEGIN PGP SIGNATURE-----")) {
527 log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "non PGP")
530 if err = WriteFileSync(dirPath, path+GPGSigExt, sig, mtime); err != nil {
531 log.Println("error", r.RemoteAddr, "pypi", filename+GPGSigExt, err)
532 http.Error(w, err.Error(), http.StatusInternalServerError)
535 log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "downloaded")
538 stat, err := os.Stat(path + GPGSigExt)
539 if err == nil && !stat.ModTime().Truncate(time.Second).Equal(mtime) {
540 log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "touch")
541 if err = os.Chtimes(path+GPGSigExt, mtime, mtime); err != nil {
542 log.Println("error", r.RemoteAddr, "pypi", filename, err)
543 http.Error(w, err.Error(), http.StatusInternalServerError)
552 path = path + "." + hashAlgo
553 stat, err := os.Stat(path)
554 if err == nil && (!mtimeExists ||
555 (mtimeExists && stat.ModTime().Truncate(time.Second).Equal(mtime))) {
558 if err != nil && !os.IsNotExist(err) {
559 log.Println("error", r.RemoteAddr, "pypi", path, err)
560 http.Error(w, err.Error(), http.StatusInternalServerError)
563 log.Println(r.RemoteAddr, "pypi", filename, "touch")
564 if err = WriteFileSync(dirPath, path, digest, mtime); err != nil {
565 log.Println("error", r.RemoteAddr, "pypi", path, err)
566 http.Error(w, err.Error(), http.StatusInternalServerError)