2 GoCheese -- Python private package repository and caching proxy
3 Copyright (C) 2019-2021 Sergey Matveev <stargrave@stargrave.org>
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, version 3 of the License.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
40 "go.cypherpunks.ru/recfile"
41 "golang.org/x/crypto/blake2b"
45 HashAlgoSHA256 = "sha256"
46 HashAlgoBLAKE2b256 = "blake2_256"
47 HashAlgoSHA512 = "sha512"
50 InternalFlag = ".internal"
54 PkgPyPI = regexp.MustCompile(`^.*<a href="([^"]+)"[^>]*>(.+)</a>.*$`)
55 PyPIURLParsed *url.URL
56 PyPIHTTPTransport http.Transport
57 KnownHashAlgos []string = []string{
65 func blake2b256New() hash.Hash {
66 h, err := blake2b.New256(nil)
73 func agentedReq(url string) *http.Request {
74 req, err := http.NewRequest("GET", url, nil)
78 req.Header.Set("User-Agent", UserAgent)
82 type RecFieldToValuesMap struct {
88 w http.ResponseWriter,
90 pkgName, filenameGet string,
93 if _, err := os.Stat(filepath.Join(Root, pkgName, InternalFlag)); err == nil {
96 c := http.Client{Transport: &PyPIHTTPTransport}
97 dirPath := filepath.Join(Root, pkgName)
100 var allReleases map[string][]*PkgReleaseInfo
102 resp, err := c.Do(agentedReq(*JSONURL + pkgName + "/json"))
104 log.Println("error", r.RemoteAddr, "refresh-json", pkgName, err)
105 http.Error(w, err.Error(), http.StatusBadGateway)
108 if resp.StatusCode != http.StatusOK {
111 "error", r.RemoteAddr, "refresh-json", pkgName,
112 "HTTP status:", resp.Status,
114 http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
117 body, err := ioutil.ReadAll(resp.Body)
120 var description string
121 wr := recfile.NewWriter(&buf)
123 err = json.Unmarshal(body, &meta)
125 for _, m := range [][2]string{
126 {MetadataFieldName, meta.Info.Name},
127 {MetadataFieldVersion, meta.Info.Version},
128 {MetadataFieldSummary, meta.Info.Summary},
129 {MetadataFieldDescriptionContentType, meta.Info.DescriptionContentType},
130 {MetadataFieldKeywords, meta.Info.Keywords},
131 {MetadataFieldHomePage, meta.Info.HomePage},
132 {MetadataFieldAuthor, meta.Info.Author},
133 {MetadataFieldAuthorEmail, meta.Info.AuthorEmail},
134 {MetadataFieldMaintainer, meta.Info.Maintainer},
135 {MetadataFieldMaintainerEmail, meta.Info.MaintainerEmail},
136 {MetadataFieldLicense, meta.Info.License},
137 {MetadataFieldRequiresPython, meta.Info.RequiresPython},
139 recField, jsonField := m[0], m[1]
143 if _, err = wr.WriteFields(recfile.Field{
144 Name: metadataFieldToRecField(recField),
150 for _, m := range []RecFieldToValuesMap{
151 {MetadataFieldClassifier, meta.Info.Classifier},
152 {MetadataFieldPlatform, meta.Info.Platform},
153 {MetadataFieldSupportedPlatform, meta.Info.SupportedPlatform},
154 {MetadataFieldRequiresDist, meta.Info.RequiresDist},
155 {MetadataFieldRequiresExternal, meta.Info.RequiresExternal},
156 {MetadataFieldProjectURL, meta.Info.ProjectURL},
157 {MetadataFieldProvidesExtra, meta.Info.ProvidesExtra},
159 for _, v := range m.jsonFields {
160 if _, err = wr.WriteFields(recfile.Field{
161 Name: metadataFieldToRecField(m.recField),
168 description = meta.Info.Description
169 allReleases = meta.Releases
171 var metaStripped PkgMetaStripped
172 err = json.Unmarshal(body, &metaStripped)
175 "error", r.RemoteAddr, "refresh-json", pkgName,
176 "can not parse JSON:", err,
178 http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
181 for _, m := range [][2]string{
182 {MetadataFieldName, metaStripped.Info.Name},
183 {MetadataFieldVersion, metaStripped.Info.Version},
184 {MetadataFieldSummary, metaStripped.Info.Summary},
185 {MetadataFieldDescriptionContentType, metaStripped.Info.DescriptionContentType},
186 {MetadataFieldKeywords, metaStripped.Info.Keywords},
187 {MetadataFieldHomePage, metaStripped.Info.HomePage},
188 {MetadataFieldAuthor, metaStripped.Info.Author},
189 {MetadataFieldAuthorEmail, metaStripped.Info.AuthorEmail},
190 {MetadataFieldMaintainer, metaStripped.Info.Maintainer},
191 {MetadataFieldMaintainerEmail, metaStripped.Info.MaintainerEmail},
192 {MetadataFieldLicense, metaStripped.Info.License},
193 {MetadataFieldRequiresPython, metaStripped.Info.RequiresPython},
195 recField, jsonField := m[0], m[1]
199 if _, err = wr.WriteFields(recfile.Field{
200 Name: metadataFieldToRecField(recField),
207 for _, m := range []RecFieldToValuesMap{
208 {MetadataFieldClassifier, metaStripped.Info.Classifier},
209 {MetadataFieldRequiresDist, metaStripped.Info.RequiresDist},
211 for _, v := range m.jsonFields {
212 if _, err = wr.WriteFields(recfile.Field{
213 Name: metadataFieldToRecField(m.recField),
220 description = metaStripped.Info.Description
221 allReleases = metaStripped.Releases
223 lines := strings.Split(description, "\n")
225 if _, err = wr.WriteFieldMultiline(
226 MetadataFieldDescription, lines,
232 if !mkdirForPkg(w, r, pkgName) {
235 path := filepath.Join(dirPath, MetadataFile)
236 existing, err := ioutil.ReadFile(path)
237 if err != nil || bytes.Compare(existing, buf.Bytes()) != 0 {
238 if err = WriteFileSync(dirPath, path, buf.Bytes(), now); err != nil {
239 log.Println("error", r.RemoteAddr, "refresh-json", path, err)
240 http.Error(w, err.Error(), http.StatusInternalServerError)
243 log.Println(r.RemoteAddr, "pypi", pkgName+"."+MetadataFile, "touch")
246 mtimes := make(map[string]time.Time)
247 for _, releases := range allReleases {
248 for _, rel := range releases {
249 if rel.Filename == "" || rel.UploadTimeISO8601 == "" {
252 t, err := time.Parse(time.RFC3339Nano, rel.UploadTimeISO8601)
255 "error", r.RemoteAddr, "refresh-json", pkgName,
256 "can not parse upload_time:", err,
258 http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
261 mtimes[rel.Filename] = t.Truncate(time.Second)
265 resp, err := c.Do(agentedReq(*PyPIURL + pkgName + "/"))
267 log.Println("error", r.RemoteAddr, "refresh", pkgName, err)
268 http.Error(w, err.Error(), http.StatusBadGateway)
271 if resp.StatusCode != http.StatusOK {
274 "error", r.RemoteAddr, "refresh", pkgName,
275 "HTTP status:", resp.Status,
277 http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
280 body, err := ioutil.ReadAll(resp.Body)
283 log.Println("error", r.RemoteAddr, "refresh", pkgName, err)
284 http.Error(w, err.Error(), http.StatusBadGateway)
287 if !mkdirForPkg(w, r, pkgName) {
290 for _, lineRaw := range bytes.Split(body, []byte("\n")) {
291 submatches := PkgPyPI.FindStringSubmatch(string(lineRaw))
292 if len(submatches) == 0 {
296 filename := submatches[2]
297 pkgURL, err := url.Parse(uri)
299 log.Println("error", r.RemoteAddr, "refresh", uri, err)
300 http.Error(w, err.Error(), http.StatusBadGateway)
304 if pkgURL.Fragment == "" {
305 log.Println(r.RemoteAddr, "pypi", filename, "no digest")
306 http.Error(w, "no digest provided", http.StatusBadGateway)
309 digestInfo := strings.Split(pkgURL.Fragment, "=")
310 if len(digestInfo) == 1 {
311 // Ancient non PEP-0503 PyPIs, assume MD5
312 digestInfo = []string{"md5", digestInfo[0]}
313 } else if len(digestInfo) != 2 {
314 log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest")
315 http.Error(w, "invalid digest provided", http.StatusBadGateway)
318 digest, err := hex.DecodeString(digestInfo[1])
320 log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest")
321 http.Error(w, err.Error(), http.StatusBadGateway)
324 hashAlgo := digestInfo[0]
325 var hasherNew func() hash.Hash
332 hasherNew = sha256.New
333 hashSize = sha256.Size
335 hasherNew = sha512.New
336 hashSize = sha512.Size
337 case HashAlgoBLAKE2b256:
338 hasherNew = blake2b256New
339 hashSize = blake2b.Size256
341 log.Println("error", r.RemoteAddr, "pypi", filename, "unknown digest", hashAlgo)
342 http.Error(w, "unknown digest algorithm", http.StatusBadGateway)
345 if len(digest) != hashSize {
346 log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest length")
347 http.Error(w, "invalid digest length", http.StatusBadGateway)
352 if pkgURL.Host == "" {
353 uri = PyPIURLParsed.ResolveReference(pkgURL).String()
355 uri = pkgURL.String()
357 mtime, mtimeExists := mtimes[filename]
362 path := filepath.Join(dirPath, filename)
363 if filename == filenameGet {
365 // Skip heavy remote call, when shutting down
366 http.Error(w, "shutting down", http.StatusInternalServerError)
369 log.Println(r.RemoteAddr, "pypi", filename, "download")
370 resp, err = c.Do(agentedReq(uri))
372 log.Println("error", r.RemoteAddr, "pypi", filename, "download", err)
373 http.Error(w, err.Error(), http.StatusBadGateway)
376 defer resp.Body.Close()
377 if resp.StatusCode != http.StatusOK {
379 "error", r.RemoteAddr,
380 "pypi", filename, "download",
381 "HTTP status:", resp.Status,
383 http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
386 hasher := hasherNew()
387 hasherSHA256 := sha256.New()
388 hasherBLAKE2b256 := blake2b256New()
389 dst, err := TempFile(dirPath)
391 log.Println("error", r.RemoteAddr, "pypi", filename, err)
392 http.Error(w, err.Error(), http.StatusInternalServerError)
395 dstBuf := bufio.NewWriter(dst)
396 wrs := []io.Writer{hasher, dstBuf}
397 if hashAlgo != HashAlgoSHA256 {
398 wrs = append(wrs, hasherSHA256)
400 if hashAlgo != HashAlgoBLAKE2b256 {
401 wrs = append(wrs, hasherBLAKE2b256)
403 wr := io.MultiWriter(wrs...)
404 if _, err = io.Copy(wr, resp.Body); err != nil {
405 os.Remove(dst.Name())
407 log.Println("error", r.RemoteAddr, "pypi", filename, err)
408 http.Error(w, err.Error(), http.StatusInternalServerError)
411 if err = dstBuf.Flush(); err != nil {
412 os.Remove(dst.Name())
414 log.Println("error", r.RemoteAddr, "pypi", filename, err)
415 http.Error(w, err.Error(), http.StatusInternalServerError)
418 if bytes.Compare(hasher.Sum(nil), digest) != 0 {
419 log.Println(r.RemoteAddr, "pypi", filename, "digest mismatch")
420 os.Remove(dst.Name())
422 http.Error(w, "digest mismatch", http.StatusBadGateway)
426 if err = dst.Sync(); err != nil {
427 os.Remove(dst.Name())
429 log.Println("error", r.RemoteAddr, "pypi", filename, err)
430 http.Error(w, err.Error(), http.StatusInternalServerError)
434 if err = dst.Close(); err != nil {
435 log.Println("error", r.RemoteAddr, "pypi", filename, err)
436 http.Error(w, err.Error(), http.StatusInternalServerError)
439 if err = os.Chtimes(dst.Name(), mtime, mtime); err != nil {
440 log.Println("error", r.RemoteAddr, "pypi", filename, err)
441 http.Error(w, err.Error(), http.StatusInternalServerError)
443 if err = os.Rename(dst.Name(), path); err != nil {
444 log.Println("error", r.RemoteAddr, "pypi", filename, err)
445 http.Error(w, err.Error(), http.StatusInternalServerError)
448 if err = DirSync(dirPath); err != nil {
449 log.Println("error", r.RemoteAddr, "pypi", filename, err)
450 http.Error(w, err.Error(), http.StatusInternalServerError)
454 var digestSHA256 []byte
455 var digestBLAKE2b256 []byte
456 if hashAlgo == HashAlgoSHA256 {
457 digestSHA256 = hasher.Sum(nil)
459 digestSHA256 = hasherSHA256.Sum(nil)
461 if hashAlgo == HashAlgoBLAKE2b256 {
462 digestBLAKE2b256 = hasher.Sum(nil)
464 digestBLAKE2b256 = hasherBLAKE2b256.Sum(nil)
466 if err = WriteFileSync(
467 dirPath, path+"."+HashAlgoSHA256,
471 "error", r.RemoteAddr, "pypi",
472 path+"."+HashAlgoSHA256, err,
474 http.Error(w, err.Error(), http.StatusInternalServerError)
477 if err = WriteFileSync(
478 dirPath, path+"."+HashAlgoBLAKE2b256,
479 digestBLAKE2b256, mtime,
482 "error", r.RemoteAddr, "pypi",
483 path+"."+HashAlgoBLAKE2b256, err,
485 http.Error(w, err.Error(), http.StatusInternalServerError)
488 for _, algo := range KnownHashAlgos[2:] {
489 os.Remove(path + "." + algo)
494 stat, err := os.Stat(path)
495 if err == nil && !stat.ModTime().Truncate(time.Second).Equal(mtime) {
496 log.Println(r.RemoteAddr, "pypi", filename, "touch")
497 if err = os.Chtimes(path, mtime, mtime); err != nil {
498 log.Println("error", r.RemoteAddr, "pypi", filename, err)
499 http.Error(w, err.Error(), http.StatusInternalServerError)
504 if filename == filenameGet || gpgUpdate {
505 if _, err = os.Stat(path); err != nil {
508 resp, err := c.Do(agentedReq(uri + GPGSigExt))
512 if resp.StatusCode != http.StatusOK {
516 sig, err := ioutil.ReadAll(resp.Body)
521 if !bytes.HasPrefix(sig, []byte("-----BEGIN PGP SIGNATURE-----")) {
522 log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "non PGP")
525 if err = WriteFileSync(dirPath, path+GPGSigExt, sig, mtime); err != nil {
526 log.Println("error", r.RemoteAddr, "pypi", filename+GPGSigExt, err)
527 http.Error(w, err.Error(), http.StatusInternalServerError)
530 log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "downloaded")
533 stat, err := os.Stat(path + GPGSigExt)
534 if err == nil && !stat.ModTime().Truncate(time.Second).Equal(mtime) {
535 log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "touch")
536 if err = os.Chtimes(path+GPGSigExt, mtime, mtime); err != nil {
537 log.Println("error", r.RemoteAddr, "pypi", filename, err)
538 http.Error(w, err.Error(), http.StatusInternalServerError)
547 path = path + "." + hashAlgo
548 stat, err := os.Stat(path)
550 (mtimeExists && stat.ModTime().Truncate(time.Second).Equal(mtime)) {
553 if err != nil && !os.IsNotExist(err) {
554 log.Println("error", r.RemoteAddr, "pypi", path, err)
555 http.Error(w, err.Error(), http.StatusInternalServerError)
558 log.Println(r.RemoteAddr, "pypi", filename, "touch")
559 if err = WriteFileSync(dirPath, path, digest, mtime); err != nil {
560 log.Println("error", r.RemoteAddr, "pypi", path, err)
561 http.Error(w, err.Error(), http.StatusInternalServerError)