2 GoCheese -- Python private package repository and caching proxy
3 Copyright (C) 2019-2021 Sergey Matveev <stargrave@stargrave.org>
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, version 3 of the License.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
40 "go.cypherpunks.ru/recfile"
41 "golang.org/x/crypto/blake2b"
45 HashAlgoSHA256 = "sha256"
46 HashAlgoBLAKE2b256 = "blake2_256"
47 HashAlgoSHA512 = "sha512"
50 InternalFlag = ".internal"
54 PkgPyPI = regexp.MustCompile(`^.*<a href="([^"]+)"[^>]*>(.+)</a>.*$`)
55 PyPIURLParsed *url.URL
56 PyPIHTTPTransport http.Transport
57 KnownHashAlgos []string = []string{
65 func blake2b256New() hash.Hash {
66 h, err := blake2b.New256(nil)
73 func agentedReq(url string) *http.Request {
74 req, err := http.NewRequest("GET", url, nil)
78 req.Header.Set("User-Agent", UserAgent)
83 w http.ResponseWriter,
85 pkgName, filenameGet string,
88 if _, err := os.Stat(filepath.Join(*Root, pkgName, InternalFlag)); err == nil {
91 c := http.Client{Transport: &PyPIHTTPTransport}
92 dirPath := filepath.Join(*Root, pkgName)
95 var allReleases map[string][]*PkgReleaseInfo
97 resp, err := c.Do(agentedReq(*JSONURL + pkgName + "/json"))
99 log.Println("error", r.RemoteAddr, "refresh-json", pkgName, err)
100 http.Error(w, err.Error(), http.StatusBadGateway)
103 if resp.StatusCode != http.StatusOK {
106 "error", r.RemoteAddr, "refresh-json", pkgName,
107 "HTTP status:", resp.Status,
109 http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
112 body, err := ioutil.ReadAll(resp.Body)
115 var description string
116 wr := recfile.NewWriter(&buf)
118 err = json.Unmarshal(body, &meta)
120 for recField, jsonField := range map[string]string{
121 MetadataFieldName: meta.Info.Name,
122 MetadataFieldVersion: meta.Info.Version,
123 MetadataFieldSummary: meta.Info.Summary,
124 MetadataFieldDescriptionContentType: meta.Info.DescriptionContentType,
125 MetadataFieldKeywords: meta.Info.Keywords,
126 MetadataFieldHomePage: meta.Info.HomePage,
127 MetadataFieldAuthor: meta.Info.Author,
128 MetadataFieldAuthorEmail: meta.Info.AuthorEmail,
129 MetadataFieldMaintainer: meta.Info.Maintainer,
130 MetadataFieldMaintainerEmail: meta.Info.MaintainerEmail,
131 MetadataFieldLicense: meta.Info.License,
132 MetadataFieldRequiresPython: meta.Info.RequiresPython,
137 if _, err = wr.WriteFields(recfile.Field{
138 Name: metadataFieldToRecField(recField),
144 for recField, jsonFields := range map[string][]string{
145 MetadataFieldClassifier: meta.Info.Classifier,
146 MetadataFieldPlatform: meta.Info.Platform,
147 MetadataFieldSupportedPlatform: meta.Info.SupportedPlatform,
148 MetadataFieldRequiresDist: meta.Info.RequiresDist,
149 MetadataFieldRequiresExternal: meta.Info.RequiresExternal,
150 MetadataFieldProjectURL: meta.Info.ProjectURL,
151 MetadataFieldProvidesExtra: meta.Info.ProvidesExtra,
153 for _, v := range jsonFields {
154 if _, err = wr.WriteFields(recfile.Field{
155 Name: metadataFieldToRecField(recField),
162 description = meta.Info.Description
163 allReleases = meta.Releases
165 var metaStripped PkgMetaStripped
166 err = json.Unmarshal(body, &metaStripped)
169 "error", r.RemoteAddr, "refresh-json", pkgName,
170 "can not parse JSON:", err,
172 http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
175 for recField, jsonField := range map[string]string{
176 MetadataFieldName: metaStripped.Info.Name,
177 MetadataFieldVersion: metaStripped.Info.Version,
178 MetadataFieldSummary: metaStripped.Info.Summary,
179 MetadataFieldDescriptionContentType: metaStripped.Info.DescriptionContentType,
180 MetadataFieldKeywords: metaStripped.Info.Keywords,
181 MetadataFieldHomePage: metaStripped.Info.HomePage,
182 MetadataFieldAuthor: metaStripped.Info.Author,
183 MetadataFieldAuthorEmail: metaStripped.Info.AuthorEmail,
184 MetadataFieldMaintainer: metaStripped.Info.Maintainer,
185 MetadataFieldMaintainerEmail: metaStripped.Info.MaintainerEmail,
186 MetadataFieldLicense: metaStripped.Info.License,
187 MetadataFieldRequiresPython: metaStripped.Info.RequiresPython,
192 if _, err = wr.WriteFields(recfile.Field{
193 Name: metadataFieldToRecField(recField),
200 for recField, jsonFields := range map[string][]string{
201 MetadataFieldClassifier: metaStripped.Info.Classifier,
202 MetadataFieldRequiresDist: metaStripped.Info.RequiresDist,
204 for _, v := range jsonFields {
205 if _, err = wr.WriteFields(recfile.Field{
206 Name: metadataFieldToRecField(recField),
213 description = metaStripped.Info.Description
214 allReleases = metaStripped.Releases
216 lines := strings.Split(description, "\n")
218 if _, err = wr.WriteFieldMultiline(
219 MetadataFieldDescription, lines,
225 if !mkdirForPkg(w, r, pkgName) {
228 path := filepath.Join(dirPath, MetadataFile)
229 existing, err := ioutil.ReadFile(path)
230 if err != nil || bytes.Compare(existing, buf.Bytes()) != 0 {
231 if err = WriteFileSync(dirPath, path, buf.Bytes(), now); err != nil {
232 log.Println("error", r.RemoteAddr, "refresh-json", path, err)
233 http.Error(w, err.Error(), http.StatusInternalServerError)
236 log.Println(r.RemoteAddr, "pypi", pkgName+"."+MetadataFile, "touch")
239 mtimes := make(map[string]time.Time)
240 for _, releases := range allReleases {
241 for _, rel := range releases {
242 if rel.Filename == "" || rel.UploadTimeISO8601 == "" {
245 t, err := time.Parse(time.RFC3339Nano, rel.UploadTimeISO8601)
248 "error", r.RemoteAddr, "refresh-json", pkgName,
249 "can not parse upload_time:", err,
251 http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
254 mtimes[rel.Filename] = t.Truncate(time.Second)
258 resp, err := c.Do(agentedReq(*PyPIURL + pkgName + "/"))
260 log.Println("error", r.RemoteAddr, "refresh", pkgName, err)
261 http.Error(w, err.Error(), http.StatusBadGateway)
264 if resp.StatusCode != http.StatusOK {
267 "error", r.RemoteAddr, "refresh", pkgName,
268 "HTTP status:", resp.Status,
270 http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
273 body, err := ioutil.ReadAll(resp.Body)
276 log.Println("error", r.RemoteAddr, "refresh", pkgName, err)
277 http.Error(w, err.Error(), http.StatusBadGateway)
280 if !mkdirForPkg(w, r, pkgName) {
283 for _, lineRaw := range bytes.Split(body, []byte("\n")) {
284 submatches := PkgPyPI.FindStringSubmatch(string(lineRaw))
285 if len(submatches) == 0 {
289 filename := submatches[2]
290 pkgURL, err := url.Parse(uri)
292 log.Println("error", r.RemoteAddr, "refresh", uri, err)
293 http.Error(w, err.Error(), http.StatusBadGateway)
297 if pkgURL.Fragment == "" {
298 log.Println(r.RemoteAddr, "pypi", filename, "no digest")
299 http.Error(w, "no digest provided", http.StatusBadGateway)
302 digestInfo := strings.Split(pkgURL.Fragment, "=")
303 if len(digestInfo) == 1 {
304 // Ancient non PEP-0503 PyPIs, assume MD5
305 digestInfo = []string{"md5", digestInfo[0]}
306 } else if len(digestInfo) != 2 {
307 log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest")
308 http.Error(w, "invalid digest provided", http.StatusBadGateway)
311 digest, err := hex.DecodeString(digestInfo[1])
313 log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest")
314 http.Error(w, err.Error(), http.StatusBadGateway)
317 hashAlgo := digestInfo[0]
318 var hasherNew func() hash.Hash
325 hasherNew = sha256.New
326 hashSize = sha256.Size
328 hasherNew = sha512.New
329 hashSize = sha512.Size
330 case HashAlgoBLAKE2b256:
331 hasherNew = blake2b256New
332 hashSize = blake2b.Size256
334 log.Println("error", r.RemoteAddr, "pypi", filename, "unknown digest", hashAlgo)
335 http.Error(w, "unknown digest algorithm", http.StatusBadGateway)
338 if len(digest) != hashSize {
339 log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest length")
340 http.Error(w, "invalid digest length", http.StatusBadGateway)
345 if pkgURL.Host == "" {
346 uri = PyPIURLParsed.ResolveReference(pkgURL).String()
348 uri = pkgURL.String()
350 mtime, mtimeExists := mtimes[filename]
355 path := filepath.Join(dirPath, filename)
356 if filename == filenameGet {
358 // Skip heavy remote call, when shutting down
359 http.Error(w, "shutting down", http.StatusInternalServerError)
362 log.Println(r.RemoteAddr, "pypi", filename, "download")
363 resp, err = c.Do(agentedReq(uri))
365 log.Println("error", r.RemoteAddr, "pypi", filename, "download", err)
366 http.Error(w, err.Error(), http.StatusBadGateway)
369 defer resp.Body.Close()
370 if resp.StatusCode != http.StatusOK {
372 "error", r.RemoteAddr,
373 "pypi", filename, "download",
374 "HTTP status:", resp.Status,
376 http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
379 hasher := hasherNew()
380 hasherSHA256 := sha256.New()
381 hasherBLAKE2b256 := blake2b256New()
382 dst, err := TempFile(dirPath)
384 log.Println("error", r.RemoteAddr, "pypi", filename, err)
385 http.Error(w, err.Error(), http.StatusInternalServerError)
388 dstBuf := bufio.NewWriter(dst)
389 wrs := []io.Writer{hasher, dstBuf}
390 if hashAlgo != HashAlgoSHA256 {
391 wrs = append(wrs, hasherSHA256)
393 if hashAlgo != HashAlgoBLAKE2b256 {
394 wrs = append(wrs, hasherBLAKE2b256)
396 wr := io.MultiWriter(wrs...)
397 if _, err = io.Copy(wr, resp.Body); err != nil {
398 os.Remove(dst.Name())
400 log.Println("error", r.RemoteAddr, "pypi", filename, err)
401 http.Error(w, err.Error(), http.StatusInternalServerError)
404 if err = dstBuf.Flush(); err != nil {
405 os.Remove(dst.Name())
407 log.Println("error", r.RemoteAddr, "pypi", filename, err)
408 http.Error(w, err.Error(), http.StatusInternalServerError)
411 if bytes.Compare(hasher.Sum(nil), digest) != 0 {
412 log.Println(r.RemoteAddr, "pypi", filename, "digest mismatch")
413 os.Remove(dst.Name())
415 http.Error(w, "digest mismatch", http.StatusBadGateway)
419 if err = dst.Sync(); err != nil {
420 os.Remove(dst.Name())
422 log.Println("error", r.RemoteAddr, "pypi", filename, err)
423 http.Error(w, err.Error(), http.StatusInternalServerError)
427 if err = dst.Close(); err != nil {
428 log.Println("error", r.RemoteAddr, "pypi", filename, err)
429 http.Error(w, err.Error(), http.StatusInternalServerError)
432 if err = os.Chtimes(dst.Name(), mtime, mtime); err != nil {
433 log.Println("error", r.RemoteAddr, "pypi", filename, err)
434 http.Error(w, err.Error(), http.StatusInternalServerError)
436 if err = os.Rename(dst.Name(), path); err != nil {
437 log.Println("error", r.RemoteAddr, "pypi", filename, err)
438 http.Error(w, err.Error(), http.StatusInternalServerError)
441 if err = DirSync(dirPath); err != nil {
442 log.Println("error", r.RemoteAddr, "pypi", filename, err)
443 http.Error(w, err.Error(), http.StatusInternalServerError)
447 var digestSHA256 []byte
448 var digestBLAKE2b256 []byte
449 if hashAlgo == HashAlgoSHA256 {
450 digestSHA256 = hasher.Sum(nil)
452 digestSHA256 = hasherSHA256.Sum(nil)
454 if hashAlgo == HashAlgoBLAKE2b256 {
455 digestBLAKE2b256 = hasher.Sum(nil)
457 digestBLAKE2b256 = hasherBLAKE2b256.Sum(nil)
459 if err = WriteFileSync(
460 dirPath, path+"."+HashAlgoSHA256,
464 "error", r.RemoteAddr, "pypi",
465 path+"."+HashAlgoSHA256, err,
467 http.Error(w, err.Error(), http.StatusInternalServerError)
470 if err = WriteFileSync(
471 dirPath, path+"."+HashAlgoBLAKE2b256,
472 digestBLAKE2b256, mtime,
475 "error", r.RemoteAddr, "pypi",
476 path+"."+HashAlgoBLAKE2b256, err,
478 http.Error(w, err.Error(), http.StatusInternalServerError)
481 for _, algo := range KnownHashAlgos[2:] {
482 os.Remove(path + "." + algo)
487 stat, err := os.Stat(path)
488 if err == nil && !stat.ModTime().Truncate(time.Second).Equal(mtime) {
489 log.Println(r.RemoteAddr, "pypi", filename, "touch")
490 if err = os.Chtimes(path, mtime, mtime); err != nil {
491 log.Println("error", r.RemoteAddr, "pypi", filename, err)
492 http.Error(w, err.Error(), http.StatusInternalServerError)
497 if filename == filenameGet || gpgUpdate {
498 if _, err = os.Stat(path); err != nil {
501 resp, err := c.Do(agentedReq(uri + GPGSigExt))
505 if resp.StatusCode != http.StatusOK {
509 sig, err := ioutil.ReadAll(resp.Body)
514 if !bytes.HasPrefix(sig, []byte("-----BEGIN PGP SIGNATURE-----")) {
515 log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "non PGP")
518 if err = WriteFileSync(dirPath, path+GPGSigExt, sig, mtime); err != nil {
519 log.Println("error", r.RemoteAddr, "pypi", filename+GPGSigExt, err)
520 http.Error(w, err.Error(), http.StatusInternalServerError)
523 log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "downloaded")
526 stat, err := os.Stat(path + GPGSigExt)
527 if err == nil && !stat.ModTime().Truncate(time.Second).Equal(mtime) {
528 log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "touch")
529 if err = os.Chtimes(path+GPGSigExt, mtime, mtime); err != nil {
530 log.Println("error", r.RemoteAddr, "pypi", filename, err)
531 http.Error(w, err.Error(), http.StatusInternalServerError)
540 path = path + "." + hashAlgo
541 stat, err := os.Stat(path)
543 (mtimeExists && stat.ModTime().Truncate(time.Second).Equal(mtime)) {
546 if err != nil && !os.IsNotExist(err) {
547 log.Println("error", r.RemoteAddr, "pypi", path, err)
548 http.Error(w, err.Error(), http.StatusInternalServerError)
551 log.Println(r.RemoteAddr, "pypi", filename, "touch")
552 if err = WriteFileSync(dirPath, path, digest, mtime); err != nil {
553 log.Println("error", r.RemoteAddr, "pypi", path, err)
554 http.Error(w, err.Error(), http.StatusInternalServerError)