2 GoCheese -- Python private package repository and caching proxy
3 Copyright (C) 2019-2021 Sergey Matveev <stargrave@stargrave.org>
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, version 3 of the License.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
41 "go.cypherpunks.ru/recfile"
42 "golang.org/x/crypto/blake2b"
46 HashAlgoSHA256 = "sha256"
47 HashAlgoBLAKE2b256 = "blake2_256"
48 HashAlgoSHA512 = "sha512"
51 InternalFlag = ".internal"
55 PkgPyPI = regexp.MustCompile(`^.*<a href="([^"]+)"[^>]*>(.+)</a>.*$`)
56 PyPIURLParsed *url.URL
57 PyPIHTTPTransport http.Transport
58 KnownHashAlgos []string = []string{
66 func blake2b256New() hash.Hash {
67 h, err := blake2b.New256(nil)
74 func agentedReq(url string) *http.Request {
75 req, err := http.NewRequest("GET", url, nil)
79 req.Header.Set("User-Agent", UserAgent)
83 type RecFieldToValuesMap struct {
89 w http.ResponseWriter,
91 pkgName, filenameGet string,
94 if _, err := os.Stat(filepath.Join(Root, pkgName, InternalFlag)); err == nil {
97 c := http.Client{Transport: &PyPIHTTPTransport}
98 dirPath := filepath.Join(Root, pkgName)
101 var allReleases map[string][]*PkgReleaseInfo
103 resp, err := c.Do(agentedReq(*JSONURL + pkgName + "/json"))
105 log.Println("error", r.RemoteAddr, "refresh-json", pkgName, err)
106 http.Error(w, err.Error(), http.StatusBadGateway)
109 if resp.StatusCode != http.StatusOK {
112 "error", r.RemoteAddr, "refresh-json", pkgName,
113 "HTTP status:", resp.Status,
115 http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
118 body, err := ioutil.ReadAll(resp.Body)
121 var description string
122 wr := recfile.NewWriter(&buf)
124 err = json.Unmarshal(body, &meta)
126 for _, m := range [][2]string{
127 {MetadataFieldName, meta.Info.Name},
128 {MetadataFieldVersion, meta.Info.Version},
129 {MetadataFieldSummary, meta.Info.Summary},
130 {MetadataFieldDescriptionContentType, meta.Info.DescriptionContentType},
131 {MetadataFieldKeywords, meta.Info.Keywords},
132 {MetadataFieldHomePage, meta.Info.HomePage},
133 {MetadataFieldAuthor, meta.Info.Author},
134 {MetadataFieldAuthorEmail, meta.Info.AuthorEmail},
135 {MetadataFieldMaintainer, meta.Info.Maintainer},
136 {MetadataFieldMaintainerEmail, meta.Info.MaintainerEmail},
137 {MetadataFieldLicense, meta.Info.License},
138 {MetadataFieldRequiresPython, meta.Info.RequiresPython},
140 recField, jsonField := m[0], m[1]
144 if _, err = wr.WriteFields(recfile.Field{
145 Name: metadataFieldToRecField(recField),
151 for _, m := range []RecFieldToValuesMap{
152 {MetadataFieldClassifier, meta.Info.Classifier},
153 {MetadataFieldPlatform, meta.Info.Platform},
154 {MetadataFieldSupportedPlatform, meta.Info.SupportedPlatform},
155 {MetadataFieldRequiresDist, meta.Info.RequiresDist},
156 {MetadataFieldRequiresExternal, meta.Info.RequiresExternal},
157 {MetadataFieldProjectURL, meta.Info.ProjectURL},
158 {MetadataFieldProvidesExtra, meta.Info.ProvidesExtra},
160 for _, v := range m.jsonFields {
161 if _, err = wr.WriteFields(recfile.Field{
162 Name: metadataFieldToRecField(m.recField),
169 description = meta.Info.Description
170 allReleases = meta.Releases
172 var metaStripped PkgMetaStripped
173 err = json.Unmarshal(body, &metaStripped)
176 "error", r.RemoteAddr, "refresh-json", pkgName,
177 "can not parse JSON:", err,
179 http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
182 for _, m := range [][2]string{
183 {MetadataFieldName, metaStripped.Info.Name},
184 {MetadataFieldVersion, metaStripped.Info.Version},
185 {MetadataFieldSummary, metaStripped.Info.Summary},
186 {MetadataFieldDescriptionContentType, metaStripped.Info.DescriptionContentType},
187 {MetadataFieldKeywords, metaStripped.Info.Keywords},
188 {MetadataFieldHomePage, metaStripped.Info.HomePage},
189 {MetadataFieldAuthor, metaStripped.Info.Author},
190 {MetadataFieldAuthorEmail, metaStripped.Info.AuthorEmail},
191 {MetadataFieldMaintainer, metaStripped.Info.Maintainer},
192 {MetadataFieldMaintainerEmail, metaStripped.Info.MaintainerEmail},
193 {MetadataFieldLicense, metaStripped.Info.License},
194 {MetadataFieldRequiresPython, metaStripped.Info.RequiresPython},
196 recField, jsonField := m[0], m[1]
200 if _, err = wr.WriteFields(recfile.Field{
201 Name: metadataFieldToRecField(recField),
208 for _, m := range []RecFieldToValuesMap{
209 {MetadataFieldClassifier, metaStripped.Info.Classifier},
210 {MetadataFieldRequiresDist, metaStripped.Info.RequiresDist},
212 for _, v := range m.jsonFields {
213 if _, err = wr.WriteFields(recfile.Field{
214 Name: metadataFieldToRecField(m.recField),
221 description = metaStripped.Info.Description
222 allReleases = metaStripped.Releases
224 lines := strings.Split(description, "\n")
226 if _, err = wr.WriteFieldMultiline(
227 MetadataFieldDescription, lines,
233 if !mkdirForPkg(w, r, pkgName) {
236 path := filepath.Join(dirPath, MetadataFile)
237 existing, err := ioutil.ReadFile(path)
238 if err != nil || bytes.Compare(existing, buf.Bytes()) != 0 {
239 if err = WriteFileSync(dirPath, path, buf.Bytes(), now); err != nil {
240 log.Println("error", r.RemoteAddr, "refresh-json", path, err)
241 http.Error(w, err.Error(), http.StatusInternalServerError)
244 log.Println(r.RemoteAddr, "pypi", pkgName+"/"+MetadataFile, "touch")
247 mtimes := make(map[string]time.Time)
248 for _, releases := range allReleases {
249 for _, rel := range releases {
250 if rel.Filename == "" || rel.UploadTimeISO8601 == "" {
253 t, err := time.Parse(time.RFC3339Nano, rel.UploadTimeISO8601)
256 "error", r.RemoteAddr, "refresh-json", pkgName,
257 "can not parse upload_time:", err,
259 http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
262 mtimes[rel.Filename] = t.Truncate(time.Second)
266 resp, err := c.Do(agentedReq(*PyPIURL + pkgName + "/"))
268 log.Println("error", r.RemoteAddr, "refresh", pkgName, err)
269 http.Error(w, err.Error(), http.StatusBadGateway)
272 if resp.StatusCode != http.StatusOK {
275 "error", r.RemoteAddr, "refresh", pkgName,
276 "HTTP status:", resp.Status,
278 http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
281 body, err := ioutil.ReadAll(resp.Body)
284 log.Println("error", r.RemoteAddr, "refresh", pkgName, err)
285 http.Error(w, err.Error(), http.StatusBadGateway)
288 if !mkdirForPkg(w, r, pkgName) {
291 for _, lineRaw := range bytes.Split(body, []byte("\n")) {
292 submatches := PkgPyPI.FindStringSubmatch(string(lineRaw))
293 if len(submatches) == 0 {
297 filename := submatches[2]
298 pkgURL, err := url.Parse(uri)
300 log.Println("error", r.RemoteAddr, "refresh", uri, err)
301 http.Error(w, err.Error(), http.StatusBadGateway)
305 if pkgURL.Fragment == "" {
306 log.Println(r.RemoteAddr, "pypi", filename, "no digest")
307 http.Error(w, "no digest provided", http.StatusBadGateway)
310 digestInfo := strings.Split(pkgURL.Fragment, "=")
311 if len(digestInfo) == 1 {
312 // Ancient non PEP-0503 PyPIs, assume MD5
313 digestInfo = []string{"md5", digestInfo[0]}
314 } else if len(digestInfo) != 2 {
315 log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest")
316 http.Error(w, "invalid digest provided", http.StatusBadGateway)
319 digest, err := hex.DecodeString(digestInfo[1])
321 log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest")
322 http.Error(w, err.Error(), http.StatusBadGateway)
325 hashAlgo := digestInfo[0]
326 var hasherNew func() hash.Hash
333 hasherNew = sha256.New
334 hashSize = sha256.Size
336 hasherNew = sha512.New
337 hashSize = sha512.Size
338 case HashAlgoBLAKE2b256:
339 hasherNew = blake2b256New
340 hashSize = blake2b.Size256
343 "error", r.RemoteAddr, "pypi",
344 filename, "unknown digest", hashAlgo,
346 http.Error(w, "unknown digest algorithm", http.StatusBadGateway)
349 if len(digest) != hashSize {
351 "error", r.RemoteAddr, "pypi",
352 filename, "invalid digest length")
353 http.Error(w, "invalid digest length", http.StatusBadGateway)
358 if pkgURL.Host == "" {
359 uri = PyPIURLParsed.ResolveReference(pkgURL).String()
361 uri = pkgURL.String()
363 mtime, mtimeExists := mtimes[filename]
368 path := filepath.Join(dirPath, filename)
369 if filename == filenameGet {
371 // Skip heavy remote call, when shutting down
372 http.Error(w, "shutting down", http.StatusInternalServerError)
375 log.Println(r.RemoteAddr, "pypi", filename, "download")
376 resp, err = c.Do(agentedReq(uri))
378 log.Println("error", r.RemoteAddr, "pypi", filename, "download", err)
379 http.Error(w, err.Error(), http.StatusBadGateway)
382 defer resp.Body.Close()
383 if resp.StatusCode != http.StatusOK {
385 "error", r.RemoteAddr,
386 "pypi", filename, "download",
387 "HTTP status:", resp.Status,
389 http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
392 hasher := hasherNew()
393 hasherSHA256 := sha256.New()
394 hasherBLAKE2b256 := blake2b256New()
395 dst, err := TempFile(dirPath)
397 log.Println("error", r.RemoteAddr, "pypi", filename, err)
398 http.Error(w, err.Error(), http.StatusInternalServerError)
401 dstBuf := bufio.NewWriter(dst)
402 wrs := []io.Writer{hasher, dstBuf}
403 if hashAlgo != HashAlgoSHA256 {
404 wrs = append(wrs, hasherSHA256)
406 if hashAlgo != HashAlgoBLAKE2b256 {
407 wrs = append(wrs, hasherBLAKE2b256)
409 wr := io.MultiWriter(wrs...)
410 if _, err = io.Copy(wr, resp.Body); err != nil {
411 os.Remove(dst.Name())
413 log.Println("error", r.RemoteAddr, "pypi", filename, err)
414 http.Error(w, err.Error(), http.StatusInternalServerError)
417 if err = dstBuf.Flush(); err != nil {
418 os.Remove(dst.Name())
420 log.Println("error", r.RemoteAddr, "pypi", filename, err)
421 http.Error(w, err.Error(), http.StatusInternalServerError)
424 if bytes.Compare(hasher.Sum(nil), digest) != 0 {
425 log.Println(r.RemoteAddr, "pypi", filename, "digest mismatch")
426 os.Remove(dst.Name())
428 http.Error(w, "digest mismatch", http.StatusBadGateway)
431 if digestStored, err := ioutil.ReadFile(path + "." + hashAlgo); err == nil &&
432 bytes.Compare(digest, digestStored) != 0 {
433 err = errors.New("stored digest mismatch")
434 log.Println("error", r.RemoteAddr, "pypi", filename, err)
435 os.Remove(dst.Name())
437 http.Error(w, err.Error(), http.StatusInternalServerError)
441 if err = dst.Sync(); err != nil {
442 os.Remove(dst.Name())
444 log.Println("error", r.RemoteAddr, "pypi", filename, err)
445 http.Error(w, err.Error(), http.StatusInternalServerError)
449 if err = dst.Close(); err != nil {
450 log.Println("error", r.RemoteAddr, "pypi", filename, err)
451 http.Error(w, err.Error(), http.StatusInternalServerError)
454 if err = os.Chtimes(dst.Name(), mtime, mtime); err != nil {
455 log.Println("error", r.RemoteAddr, "pypi", filename, err)
456 http.Error(w, err.Error(), http.StatusInternalServerError)
458 if err = os.Rename(dst.Name(), path); err != nil {
459 log.Println("error", r.RemoteAddr, "pypi", filename, err)
460 http.Error(w, err.Error(), http.StatusInternalServerError)
463 if err = DirSync(dirPath); err != nil {
464 log.Println("error", r.RemoteAddr, "pypi", filename, err)
465 http.Error(w, err.Error(), http.StatusInternalServerError)
469 var digestSHA256 []byte
470 var digestBLAKE2b256 []byte
471 if hashAlgo == HashAlgoSHA256 {
472 digestSHA256 = hasher.Sum(nil)
474 digestSHA256 = hasherSHA256.Sum(nil)
476 if hashAlgo == HashAlgoBLAKE2b256 {
477 digestBLAKE2b256 = hasher.Sum(nil)
479 digestBLAKE2b256 = hasherBLAKE2b256.Sum(nil)
481 if err = WriteFileSync(
482 dirPath, path+"."+HashAlgoSHA256,
486 "error", r.RemoteAddr, "pypi",
487 path+"."+HashAlgoSHA256, err,
489 http.Error(w, err.Error(), http.StatusInternalServerError)
492 if err = WriteFileSync(
493 dirPath, path+"."+HashAlgoBLAKE2b256,
494 digestBLAKE2b256, mtime,
497 "error", r.RemoteAddr, "pypi",
498 path+"."+HashAlgoBLAKE2b256, err,
500 http.Error(w, err.Error(), http.StatusInternalServerError)
503 for _, algo := range KnownHashAlgos[2:] {
504 os.Remove(path + "." + algo)
509 stat, err := os.Stat(path)
510 if err == nil && !stat.ModTime().Truncate(time.Second).Equal(mtime) {
511 log.Println(r.RemoteAddr, "pypi", filename, "touch")
512 if err = os.Chtimes(path, mtime, mtime); err != nil {
513 log.Println("error", r.RemoteAddr, "pypi", filename, err)
514 http.Error(w, err.Error(), http.StatusInternalServerError)
519 if filename == filenameGet || gpgUpdate {
520 if _, err = os.Stat(path); err != nil {
523 resp, err := c.Do(agentedReq(uri + GPGSigExt))
527 if resp.StatusCode != http.StatusOK {
531 sig, err := ioutil.ReadAll(resp.Body)
536 if !bytes.HasPrefix(sig, []byte("-----BEGIN PGP SIGNATURE-----")) {
537 log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "non PGP")
540 if err = WriteFileSync(dirPath, path+GPGSigExt, sig, mtime); err != nil {
541 log.Println("error", r.RemoteAddr, "pypi", filename+GPGSigExt, err)
542 http.Error(w, err.Error(), http.StatusInternalServerError)
545 log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "downloaded")
548 stat, err := os.Stat(path + GPGSigExt)
549 if err == nil && !stat.ModTime().Truncate(time.Second).Equal(mtime) {
550 log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "touch")
551 if err = os.Chtimes(path+GPGSigExt, mtime, mtime); err != nil {
552 log.Println("error", r.RemoteAddr, "pypi", filename, err)
553 http.Error(w, err.Error(), http.StatusInternalServerError)
562 path = path + "." + hashAlgo
563 stat, err := os.Stat(path)
564 if err == nil && (!mtimeExists ||
565 (mtimeExists && stat.ModTime().Truncate(time.Second).Equal(mtime))) {
568 if err != nil && !os.IsNotExist(err) {
569 log.Println("error", r.RemoteAddr, "pypi", path, err)
570 http.Error(w, err.Error(), http.StatusInternalServerError)
573 log.Println(r.RemoteAddr, "pypi", filename, "touch")
574 if err = WriteFileSync(dirPath, path, digest, mtime); err != nil {
575 log.Println("error", r.RemoteAddr, "pypi", path, err)
576 http.Error(w, err.Error(), http.StatusInternalServerError)