2 GoCheese -- Python private package repository and caching proxy
3 Copyright (C) 2019-2022 Sergey Matveev <stargrave@stargrave.org>
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, version 3 of the License.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
41 "go.cypherpunks.ru/recfile"
42 "golang.org/x/crypto/blake2b"
46 HashAlgoSHA256 = "sha256"
47 HashAlgoBLAKE2b256 = "blake2_256"
48 HashAlgoSHA512 = "sha512"
51 InternalFlag = ".internal"
55 PkgPyPI = regexp.MustCompile(`^.*<a href="([^"]+)"[^>]*>(.+)</a>.*$`)
56 PyPIURLParsed *url.URL
57 PyPIHTTPTransport http.Transport
58 KnownHashAlgos []string = []string{
66 func blake2b256New() hash.Hash {
67 h, err := blake2b.New256(nil)
74 func agentedReq(url string) *http.Request {
75 req, err := http.NewRequest("GET", url, nil)
79 req.Header.Set("User-Agent", UserAgent)
83 type RecFieldToValuesMap struct {
89 w http.ResponseWriter,
91 pkgName, filenameGet string,
94 if _, err := os.Stat(filepath.Join(Root, pkgName, InternalFlag)); err == nil {
97 c := http.Client{Transport: &PyPIHTTPTransport}
98 dirPath := filepath.Join(Root, pkgName)
101 var allReleases map[string][]*PkgReleaseInfo
103 resp, err := c.Do(agentedReq(*JSONURL + pkgName + "/json"))
105 log.Println("error", r.RemoteAddr, "refresh-json", pkgName, err)
106 http.Error(w, err.Error(), http.StatusBadGateway)
109 if resp.StatusCode != http.StatusOK {
112 "error", r.RemoteAddr, "refresh-json", pkgName,
113 "HTTP status:", resp.Status,
115 http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
118 body, err := ioutil.ReadAll(resp.Body)
121 log.Println("error", r.RemoteAddr, "refresh-json", pkgName, err)
122 http.Error(w, "can not read body", http.StatusBadGateway)
127 var description string
128 wr := recfile.NewWriter(&buf)
130 err = json.Unmarshal(body, &meta)
132 for _, m := range [][2]string{
133 {MDFieldName, meta.Info.Name},
134 {MDFieldVersion, meta.Info.Version},
135 {MDFieldSummary, meta.Info.Summary},
136 {MDFieldDescriptionContentType, meta.Info.DescriptionContentType},
137 {MDFieldKeywords, meta.Info.Keywords},
138 {MDFieldHomePage, meta.Info.HomePage},
139 {MDFieldAuthor, meta.Info.Author},
140 {MDFieldAuthorEmail, meta.Info.AuthorEmail},
141 {MDFieldMaintainer, meta.Info.Maintainer},
142 {MDFieldMaintainerEmail, meta.Info.MaintainerEmail},
143 {MDFieldLicense, meta.Info.License},
144 {MDFieldRequiresPython, meta.Info.RequiresPython},
146 recField, jsonField := m[0], m[1]
150 if _, err = wr.WriteFields(recfile.Field{
151 Name: MDFieldToRecField[recField],
157 for _, m := range []RecFieldToValuesMap{
158 {MDFieldClassifier, meta.Info.Classifier},
159 {MDFieldPlatform, meta.Info.Platform},
160 {MDFieldSupportedPlatform, meta.Info.SupportedPlatform},
161 {MDFieldRequiresDist, meta.Info.RequiresDist},
162 {MDFieldRequiresExternal, meta.Info.RequiresExternal},
163 {MDFieldProjectURL, meta.Info.ProjectURL},
164 {MDFieldProvidesExtra, meta.Info.ProvidesExtra},
166 for _, v := range m.jsonFields {
167 if _, err = wr.WriteFields(recfile.Field{
168 Name: MDFieldToRecField[m.recField],
175 description = meta.Info.Description
176 allReleases = meta.Releases
178 var metaStripped PkgMetaStripped
179 err = json.Unmarshal(body, &metaStripped)
182 "error", r.RemoteAddr, "refresh-json", pkgName,
183 "can not parse JSON:", err,
185 http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
188 for _, m := range [][2]string{
189 {MDFieldName, metaStripped.Info.Name},
190 {MDFieldVersion, metaStripped.Info.Version},
191 {MDFieldSummary, metaStripped.Info.Summary},
192 {MDFieldDescriptionContentType, metaStripped.Info.DescriptionContentType},
193 {MDFieldKeywords, metaStripped.Info.Keywords},
194 {MDFieldHomePage, metaStripped.Info.HomePage},
195 {MDFieldAuthor, metaStripped.Info.Author},
196 {MDFieldAuthorEmail, metaStripped.Info.AuthorEmail},
197 {MDFieldMaintainer, metaStripped.Info.Maintainer},
198 {MDFieldMaintainerEmail, metaStripped.Info.MaintainerEmail},
199 {MDFieldLicense, metaStripped.Info.License},
200 {MDFieldRequiresPython, metaStripped.Info.RequiresPython},
202 recField, jsonField := m[0], m[1]
206 if _, err = wr.WriteFields(recfile.Field{
207 Name: MDFieldToRecField[recField],
214 for _, m := range []RecFieldToValuesMap{
215 {MDFieldClassifier, metaStripped.Info.Classifier},
216 {MDFieldRequiresDist, metaStripped.Info.RequiresDist},
218 for _, v := range m.jsonFields {
219 if _, err = wr.WriteFields(recfile.Field{
220 Name: MDFieldToRecField[m.recField],
227 description = metaStripped.Info.Description
228 allReleases = metaStripped.Releases
230 lines := strings.Split(description, "\n")
232 if _, err = wr.WriteFieldMultiline(
233 MDFieldDescription, lines,
239 if !mkdirForPkg(w, r, pkgName) {
242 path := filepath.Join(dirPath, MDFile)
243 existing, err := ioutil.ReadFile(path)
244 if err != nil || bytes.Compare(existing, buf.Bytes()) != 0 {
245 if err = WriteFileSync(dirPath, path, buf.Bytes(), now); err != nil {
246 log.Println("error", r.RemoteAddr, "refresh-json", path, err)
247 http.Error(w, err.Error(), http.StatusInternalServerError)
250 log.Println(r.RemoteAddr, "pypi", pkgName+"/"+MDFile, "touch")
253 mtimes := make(map[string]time.Time)
254 for _, releases := range allReleases {
255 for _, rel := range releases {
256 if rel.Filename == "" || rel.UploadTimeISO8601 == "" {
259 t, err := time.Parse(time.RFC3339Nano, rel.UploadTimeISO8601)
262 "error", r.RemoteAddr, "refresh-json", pkgName,
263 "can not parse upload_time:", err,
265 http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
268 mtimes[rel.Filename] = t.Truncate(time.Second)
272 resp, err := c.Do(agentedReq(*PyPIURL + pkgName + "/"))
274 log.Println("error", r.RemoteAddr, "refresh", pkgName, err)
275 http.Error(w, err.Error(), http.StatusBadGateway)
278 if resp.StatusCode != http.StatusOK {
281 "error", r.RemoteAddr, "refresh", pkgName,
282 "HTTP status:", resp.Status,
284 http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
287 body, err := ioutil.ReadAll(resp.Body)
290 log.Println("error", r.RemoteAddr, "refresh", pkgName, err)
291 http.Error(w, err.Error(), http.StatusBadGateway)
294 if !mkdirForPkg(w, r, pkgName) {
297 for _, lineRaw := range bytes.Split(body, []byte("\n")) {
298 submatches := PkgPyPI.FindStringSubmatch(string(lineRaw))
299 if len(submatches) == 0 {
303 filename := submatches[2]
304 pkgURL, err := url.Parse(uri)
306 log.Println("error", r.RemoteAddr, "refresh", uri, err)
307 http.Error(w, err.Error(), http.StatusBadGateway)
311 if pkgURL.Fragment == "" {
312 log.Println(r.RemoteAddr, "pypi", filename, "no digest")
313 http.Error(w, "no digest provided", http.StatusBadGateway)
316 digestInfo := strings.Split(pkgURL.Fragment, "=")
317 if len(digestInfo) == 1 {
318 // Ancient non PEP-0503 PyPIs, assume MD5
319 digestInfo = []string{"md5", digestInfo[0]}
320 } else if len(digestInfo) != 2 {
321 log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest")
322 http.Error(w, "invalid digest provided", http.StatusBadGateway)
325 digest, err := hex.DecodeString(digestInfo[1])
327 log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest")
328 http.Error(w, err.Error(), http.StatusBadGateway)
331 hashAlgo := digestInfo[0]
332 var hasherNew func() hash.Hash
339 hasherNew = sha256.New
340 hashSize = sha256.Size
342 hasherNew = sha512.New
343 hashSize = sha512.Size
344 case HashAlgoBLAKE2b256:
345 hasherNew = blake2b256New
346 hashSize = blake2b.Size256
349 "error", r.RemoteAddr, "pypi",
350 filename, "unknown digest", hashAlgo,
352 http.Error(w, "unknown digest algorithm", http.StatusBadGateway)
355 if len(digest) != hashSize {
357 "error", r.RemoteAddr, "pypi",
358 filename, "invalid digest length")
359 http.Error(w, "invalid digest length", http.StatusBadGateway)
364 if pkgURL.Host == "" {
365 uri = PyPIURLParsed.ResolveReference(pkgURL).String()
367 uri = pkgURL.String()
369 mtime, mtimeExists := mtimes[filename]
374 path := filepath.Join(dirPath, filename)
375 if filename == filenameGet {
377 // Skip heavy remote call, when shutting down
378 http.Error(w, "shutting down", http.StatusInternalServerError)
381 log.Println(r.RemoteAddr, "pypi", filename, "download")
382 resp, err = c.Do(agentedReq(uri))
384 log.Println("error", r.RemoteAddr, "pypi", filename, "download", err)
385 http.Error(w, err.Error(), http.StatusBadGateway)
388 defer resp.Body.Close()
389 if resp.StatusCode != http.StatusOK {
391 "error", r.RemoteAddr,
392 "pypi", filename, "download",
393 "HTTP status:", resp.Status,
395 http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
398 hasher := hasherNew()
399 hasherSHA256 := sha256.New()
400 hasherBLAKE2b256 := blake2b256New()
401 dst, err := TempFile(dirPath)
403 log.Println("error", r.RemoteAddr, "pypi", filename, err)
404 http.Error(w, err.Error(), http.StatusInternalServerError)
407 dstBuf := bufio.NewWriter(dst)
408 wrs := []io.Writer{hasher, dstBuf}
409 if hashAlgo != HashAlgoSHA256 {
410 wrs = append(wrs, hasherSHA256)
412 if hashAlgo != HashAlgoBLAKE2b256 {
413 wrs = append(wrs, hasherBLAKE2b256)
415 wr := io.MultiWriter(wrs...)
416 if _, err = io.Copy(wr, resp.Body); err != nil {
417 os.Remove(dst.Name())
419 log.Println("error", r.RemoteAddr, "pypi", filename, err)
420 http.Error(w, err.Error(), http.StatusInternalServerError)
423 if err = dstBuf.Flush(); err != nil {
424 os.Remove(dst.Name())
426 log.Println("error", r.RemoteAddr, "pypi", filename, err)
427 http.Error(w, err.Error(), http.StatusInternalServerError)
430 if bytes.Compare(hasher.Sum(nil), digest) != 0 {
431 log.Println(r.RemoteAddr, "pypi", filename, "digest mismatch")
432 os.Remove(dst.Name())
434 http.Error(w, "digest mismatch", http.StatusBadGateway)
437 if digestStored, err := ioutil.ReadFile(path + "." + hashAlgo); err == nil &&
438 bytes.Compare(digest, digestStored) != 0 {
439 err = errors.New("stored digest mismatch")
440 log.Println("error", r.RemoteAddr, "pypi", filename, err)
441 os.Remove(dst.Name())
443 http.Error(w, err.Error(), http.StatusInternalServerError)
447 if err = dst.Sync(); err != nil {
448 os.Remove(dst.Name())
450 log.Println("error", r.RemoteAddr, "pypi", filename, err)
451 http.Error(w, err.Error(), http.StatusInternalServerError)
455 if err = dst.Close(); err != nil {
456 log.Println("error", r.RemoteAddr, "pypi", filename, err)
457 http.Error(w, err.Error(), http.StatusInternalServerError)
460 if err = os.Chtimes(dst.Name(), mtime, mtime); err != nil {
461 log.Println("error", r.RemoteAddr, "pypi", filename, err)
462 http.Error(w, err.Error(), http.StatusInternalServerError)
464 if err = os.Rename(dst.Name(), path); err != nil {
465 log.Println("error", r.RemoteAddr, "pypi", filename, err)
466 http.Error(w, err.Error(), http.StatusInternalServerError)
469 if err = DirSync(dirPath); err != nil {
470 log.Println("error", r.RemoteAddr, "pypi", filename, err)
471 http.Error(w, err.Error(), http.StatusInternalServerError)
475 var digestSHA256 []byte
476 var digestBLAKE2b256 []byte
477 if hashAlgo == HashAlgoSHA256 {
478 digestSHA256 = hasher.Sum(nil)
480 digestSHA256 = hasherSHA256.Sum(nil)
482 if hashAlgo == HashAlgoBLAKE2b256 {
483 digestBLAKE2b256 = hasher.Sum(nil)
485 digestBLAKE2b256 = hasherBLAKE2b256.Sum(nil)
487 if err = WriteFileSync(
488 dirPath, path+"."+HashAlgoSHA256,
492 "error", r.RemoteAddr, "pypi",
493 path+"."+HashAlgoSHA256, err,
495 http.Error(w, err.Error(), http.StatusInternalServerError)
498 if err = WriteFileSync(
499 dirPath, path+"."+HashAlgoBLAKE2b256,
500 digestBLAKE2b256, mtime,
503 "error", r.RemoteAddr, "pypi",
504 path+"."+HashAlgoBLAKE2b256, err,
506 http.Error(w, err.Error(), http.StatusInternalServerError)
509 for _, algo := range KnownHashAlgos[2:] {
510 os.Remove(path + "." + algo)
515 stat, err := os.Stat(path)
516 if err == nil && !stat.ModTime().Truncate(time.Second).Equal(mtime) {
517 log.Println(r.RemoteAddr, "pypi", filename, "touch")
518 if err = os.Chtimes(path, mtime, mtime); err != nil {
519 log.Println("error", r.RemoteAddr, "pypi", filename, err)
520 http.Error(w, err.Error(), http.StatusInternalServerError)
525 if filename == filenameGet || gpgUpdate {
526 resp, err := c.Do(agentedReq(uri + GPGSigExt))
530 if resp.StatusCode != http.StatusOK {
534 sig, err := ioutil.ReadAll(resp.Body)
539 if !bytes.HasPrefix(sig, []byte("-----BEGIN PGP SIGNATURE-----")) {
540 log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "non PGP")
543 if err = WriteFileSync(dirPath, path+GPGSigExt, sig, mtime); err != nil {
544 log.Println("error", r.RemoteAddr, "pypi", filename+GPGSigExt, err)
545 http.Error(w, err.Error(), http.StatusInternalServerError)
548 log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "downloaded")
551 stat, err := os.Stat(path + GPGSigExt)
552 if err == nil && !stat.ModTime().Truncate(time.Second).Equal(mtime) {
553 log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "touch")
554 if err = os.Chtimes(path+GPGSigExt, mtime, mtime); err != nil {
555 log.Println("error", r.RemoteAddr, "pypi", filename, err)
556 http.Error(w, err.Error(), http.StatusInternalServerError)
565 path = path + "." + hashAlgo
566 stat, err := os.Stat(path)
567 if err == nil && (!mtimeExists ||
568 (mtimeExists && stat.ModTime().Truncate(time.Second).Equal(mtime))) {
571 if err != nil && !os.IsNotExist(err) {
572 log.Println("error", r.RemoteAddr, "pypi", path, err)
573 http.Error(w, err.Error(), http.StatusInternalServerError)
576 log.Println(r.RemoteAddr, "pypi", filename, "touch")
577 if err = WriteFileSync(dirPath, path, digest, mtime); err != nil {
578 log.Println("error", r.RemoteAddr, "pypi", path, err)
579 http.Error(w, err.Error(), http.StatusInternalServerError)