]> Cypherpunks.ru repositories - gocheese.git/blob - refresh.go
Unify copyright comment format
[gocheese.git] / refresh.go
1 // GoCheese -- Python private package repository and caching proxy
2 // Copyright (C) 2019-2024 Sergey Matveev <stargrave@stargrave.org>
3 //
4 // This program is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation, version 3 of the License.
7 //
8 // This program is distributed in the hope that it will be useful,
9 // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 // GNU General Public License for more details.
12 //
13 // You should have received a copy of the GNU General Public License
14 // along with this program.  If not, see <http://www.gnu.org/licenses/>.
15
16 package main
17
18 import (
19         "bufio"
20         "bytes"
21         "crypto/md5"
22         "crypto/sha256"
23         "crypto/sha512"
24         "encoding/hex"
25         "encoding/json"
26         "errors"
27         "hash"
28         "io"
29         "log"
30         "net/http"
31         "net/url"
32         "os"
33         "path/filepath"
34         "regexp"
35         "strings"
36         "time"
37
38         "go.cypherpunks.ru/recfile"
39         "golang.org/x/crypto/blake2b"
40 )
41
42 const (
43         HashAlgoBLAKE2b256 = "blake2b_256"
44         HashAlgoSHA256     = "sha256"
45         HashAlgoSHA512     = "sha512"
46         HashAlgoMD5        = "md5"
47         InternalFlag       = ".internal"
48 )
49
50 var (
51         PkgPyPI           = regexp.MustCompile(`^.*<a href="([^"]+)"[^>]*>(.+)</a>.*$`)
52         PyPIURLParsed     *url.URL
53         PyPIHTTPTransport http.Transport
54         KnownHashAlgos    []string = []string{
55                 HashAlgoBLAKE2b256,
56                 HashAlgoSHA256,
57                 HashAlgoSHA512,
58                 HashAlgoMD5,
59         }
60 )
61
62 func blake2b256New() hash.Hash {
63         h, err := blake2b.New256(nil)
64         if err != nil {
65                 panic(err)
66         }
67         return h
68 }
69
70 func agentedReq(url string) *http.Request {
71         req, err := http.NewRequest("GET", url, nil)
72         if err != nil {
73                 log.Fatal(err)
74         }
75         req.Header.Set("User-Agent", UserAgent)
76         return req
77 }
78
79 type RecFieldToValuesMap struct {
80         recField   string
81         jsonFields []string
82 }
83
84 func refreshDir(
85         w http.ResponseWriter,
86         r *http.Request,
87         pkgName, filenameGet string,
88 ) bool {
89         if _, err := os.Stat(filepath.Join(Root, pkgName, InternalFlag)); err == nil {
90                 return true
91         }
92         c := http.Client{Transport: &PyPIHTTPTransport}
93         dirPath := filepath.Join(Root, pkgName)
94         now := time.Now()
95
96         var allReleases map[string][]*PkgReleaseInfo
97         if *JSONURL != "" {
98                 resp, err := c.Do(agentedReq(*JSONURL + pkgName + "/json"))
99                 if err != nil {
100                         log.Println("error", r.RemoteAddr, "refresh-json", pkgName, err)
101                         http.Error(w, err.Error(), http.StatusBadGateway)
102                         return false
103                 }
104                 if resp.StatusCode != http.StatusOK {
105                         resp.Body.Close()
106                         log.Println(
107                                 "error", r.RemoteAddr, "refresh-json", pkgName,
108                                 "HTTP status:", resp.Status,
109                         )
110                         http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
111                         return false
112                 }
113                 body, err := io.ReadAll(resp.Body)
114                 if err != nil {
115                         resp.Body.Close()
116                         log.Println("error", r.RemoteAddr, "refresh-json", pkgName, err)
117                         http.Error(w, "can not read body", http.StatusBadGateway)
118                         return false
119                 }
120                 resp.Body.Close()
121                 var buf bytes.Buffer
122                 var description string
123                 wr := recfile.NewWriter(&buf)
124                 var meta PkgMeta
125                 err = json.Unmarshal(body, &meta)
126                 if err == nil {
127                         for _, m := range [][2]string{
128                                 {MDFieldName, meta.Info.Name},
129                                 {MDFieldVersion, meta.Info.Version},
130                                 {MDFieldSummary, meta.Info.Summary},
131                                 {MDFieldDescriptionContentType, meta.Info.DescriptionContentType},
132                                 {MDFieldKeywords, meta.Info.Keywords},
133                                 {MDFieldHomePage, meta.Info.HomePage},
134                                 {MDFieldAuthor, meta.Info.Author},
135                                 {MDFieldAuthorEmail, meta.Info.AuthorEmail},
136                                 {MDFieldMaintainer, meta.Info.Maintainer},
137                                 {MDFieldMaintainerEmail, meta.Info.MaintainerEmail},
138                                 {MDFieldLicense, meta.Info.License},
139                                 {MDFieldRequiresPython, meta.Info.RequiresPython},
140                         } {
141                                 recField, jsonField := m[0], m[1]
142                                 if jsonField == "" {
143                                         continue
144                                 }
145                                 if _, err = wr.WriteFields(recfile.Field{
146                                         Name:  MDFieldToRecField[recField],
147                                         Value: jsonField,
148                                 }); err != nil {
149                                         log.Fatal(err)
150                                 }
151                         }
152                         for _, m := range []RecFieldToValuesMap{
153                                 {MDFieldClassifier, meta.Info.Classifier},
154                                 {MDFieldPlatform, meta.Info.Platform},
155                                 {MDFieldSupportedPlatform, meta.Info.SupportedPlatform},
156                                 {MDFieldRequiresDist, meta.Info.RequiresDist},
157                                 {MDFieldRequiresExternal, meta.Info.RequiresExternal},
158                                 {MDFieldProjectURL, meta.Info.ProjectURL},
159                                 {MDFieldProvidesExtra, meta.Info.ProvidesExtra},
160                         } {
161                                 for _, v := range m.jsonFields {
162                                         if _, err = wr.WriteFields(recfile.Field{
163                                                 Name:  MDFieldToRecField[m.recField],
164                                                 Value: v,
165                                         }); err != nil {
166                                                 log.Fatal(err)
167                                         }
168                                 }
169                         }
170                         description = meta.Info.Description
171                         allReleases = meta.Releases
172                 } else {
173                         var metaStripped PkgMetaStripped
174                         err = json.Unmarshal(body, &metaStripped)
175                         if err != nil {
176                                 log.Println(
177                                         "error", r.RemoteAddr, "refresh-json", pkgName,
178                                         "can not parse JSON:", err,
179                                 )
180                                 http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
181                                 return false
182                         }
183                         for _, m := range [][2]string{
184                                 {MDFieldName, metaStripped.Info.Name},
185                                 {MDFieldVersion, metaStripped.Info.Version},
186                                 {MDFieldSummary, metaStripped.Info.Summary},
187                                 {MDFieldDescriptionContentType, metaStripped.Info.DescriptionContentType},
188                                 {MDFieldKeywords, metaStripped.Info.Keywords},
189                                 {MDFieldHomePage, metaStripped.Info.HomePage},
190                                 {MDFieldAuthor, metaStripped.Info.Author},
191                                 {MDFieldAuthorEmail, metaStripped.Info.AuthorEmail},
192                                 {MDFieldMaintainer, metaStripped.Info.Maintainer},
193                                 {MDFieldMaintainerEmail, metaStripped.Info.MaintainerEmail},
194                                 {MDFieldLicense, metaStripped.Info.License},
195                                 {MDFieldRequiresPython, metaStripped.Info.RequiresPython},
196                         } {
197                                 recField, jsonField := m[0], m[1]
198                                 if jsonField == "" {
199                                         continue
200                                 }
201                                 if _, err = wr.WriteFields(recfile.Field{
202                                         Name:  MDFieldToRecField[recField],
203                                         Value: jsonField,
204                                 }); err != nil {
205                                         log.Fatal(err)
206                                 }
207                         }
208
209                         for _, m := range []RecFieldToValuesMap{
210                                 {MDFieldClassifier, metaStripped.Info.Classifier},
211                                 {MDFieldRequiresDist, metaStripped.Info.RequiresDist},
212                         } {
213                                 for _, v := range m.jsonFields {
214                                         if _, err = wr.WriteFields(recfile.Field{
215                                                 Name:  MDFieldToRecField[m.recField],
216                                                 Value: v,
217                                         }); err != nil {
218                                                 log.Fatal(err)
219                                         }
220                                 }
221                         }
222                         description = metaStripped.Info.Description
223                         allReleases = metaStripped.Releases
224                 }
225                 lines := strings.Split(description, "\n")
226                 if len(lines) > 0 {
227                         if _, err = wr.WriteFieldMultiline(
228                                 MDFieldDescription, lines,
229                         ); err != nil {
230                                 log.Fatal(err)
231                         }
232                 }
233
234                 if !mkdirForPkg(w, r, pkgName) {
235                         return false
236                 }
237                 path := filepath.Join(dirPath, MDFile)
238                 existing, err := os.ReadFile(path)
239                 if err != nil || !bytes.Equal(existing, buf.Bytes()) {
240                         if err = WriteFileSync(dirPath, path, buf.Bytes(), now); err != nil {
241                                 log.Println("error", r.RemoteAddr, "refresh-json", path, err)
242                                 http.Error(w, err.Error(), http.StatusInternalServerError)
243                                 return false
244                         }
245                         log.Println(r.RemoteAddr, "pypi", pkgName+"/"+MDFile, "touch")
246                 }
247         }
248         mtimes := make(map[string]time.Time)
249         digestsBLAKE2b256 := make(map[string][]byte)
250         digestsSHA256 := make(map[string][]byte)
251         digestsSHA512 := make(map[string][]byte)
252         for _, releases := range allReleases {
253                 for _, rel := range releases {
254                         if rel.Filename == "" || rel.UploadTimeISO8601 == "" {
255                                 continue
256                         }
257                         t, err := time.Parse(time.RFC3339Nano, rel.UploadTimeISO8601)
258                         if err != nil {
259                                 log.Println(
260                                         "error", r.RemoteAddr, "refresh-json", pkgName,
261                                         "can not parse upload_time:", err,
262                                 )
263                                 http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
264                                 return false
265                         }
266                         mtimes[rel.Filename] = t.Truncate(time.Second)
267                         if d := rel.Digests[HashAlgoBLAKE2b256]; d != "" {
268                                 digestsBLAKE2b256[rel.Filename], err = hex.DecodeString(d)
269                                 if err != nil {
270                                         log.Println(
271                                                 "error", r.RemoteAddr, "refresh-json", pkgName,
272                                                 "can not decode blake2b_256 digest:", err,
273                                         )
274                                         http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
275                                         return false
276                                 }
277                         }
278                         if d := rel.Digests[HashAlgoSHA256]; d != "" {
279                                 digestsSHA256[rel.Filename], err = hex.DecodeString(d)
280                                 if err != nil {
281                                         log.Println(
282                                                 "error", r.RemoteAddr, "refresh-json", pkgName,
283                                                 "can not decode sha256 digest:", err,
284                                         )
285                                         http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
286                                         return false
287                                 }
288                         }
289                         if d := rel.Digests[HashAlgoSHA512]; d != "" {
290                                 digestsSHA512[rel.Filename], err = hex.DecodeString(d)
291                                 if err != nil {
292                                         log.Println(
293                                                 "error", r.RemoteAddr, "refresh-json", pkgName,
294                                                 "can not decode sha512 digest:", err,
295                                         )
296                                         http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
297                                         return false
298                                 }
299                         }
300                 }
301         }
302
303         resp, err := c.Do(agentedReq(*PyPIURL + pkgName + "/"))
304         if err != nil {
305                 log.Println("error", r.RemoteAddr, "refresh", pkgName, err)
306                 http.Error(w, err.Error(), http.StatusBadGateway)
307                 return false
308         }
309         if resp.StatusCode != http.StatusOK {
310                 resp.Body.Close()
311                 log.Println(
312                         "error", r.RemoteAddr, "refresh", pkgName,
313                         "HTTP status:", resp.Status,
314                 )
315                 http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
316                 return false
317         }
318         body, err := io.ReadAll(resp.Body)
319         resp.Body.Close()
320         if err != nil {
321                 log.Println("error", r.RemoteAddr, "refresh", pkgName, err)
322                 http.Error(w, err.Error(), http.StatusBadGateway)
323                 return false
324         }
325         if !mkdirForPkg(w, r, pkgName) {
326                 return false
327         }
328         for _, lineRaw := range bytes.Split(body, []byte("\n")) {
329                 submatches := PkgPyPI.FindStringSubmatch(string(lineRaw))
330                 if len(submatches) == 0 {
331                         continue
332                 }
333                 uri := submatches[1]
334                 filename := submatches[2]
335                 pkgURL, err := url.Parse(uri)
336                 if err != nil {
337                         log.Println("error", r.RemoteAddr, "refresh", uri, err)
338                         http.Error(w, err.Error(), http.StatusBadGateway)
339                         return false
340                 }
341
342                 var hashAlgo string
343                 var hasherNew func() hash.Hash
344                 var digest []byte
345                 if d := digestsBLAKE2b256[filename]; d != nil {
346                         hasherNew = blake2b256New
347                         hashAlgo = HashAlgoBLAKE2b256
348                         digest = d
349                 } else if d := digestsSHA256[filename]; d != nil {
350                         hasherNew = sha256.New
351                         hashAlgo = HashAlgoSHA256
352                         digest = d
353                 } else if d := digestsSHA512[filename]; d != nil {
354                         hasherNew = sha512.New
355                         hashAlgo = HashAlgoSHA512
356                         digest = d
357                 } else {
358                         if pkgURL.Fragment == "" {
359                                 log.Println(r.RemoteAddr, "pypi", filename, "no digest")
360                                 http.Error(w, "no digest provided", http.StatusBadGateway)
361                                 return false
362                         }
363                         digestInfo := strings.Split(pkgURL.Fragment, "=")
364                         if len(digestInfo) == 1 {
365                                 // Ancient non PEP-0503 PyPIs, assume MD5
366                                 digestInfo = []string{"md5", digestInfo[0]}
367                         } else if len(digestInfo) != 2 {
368                                 log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest")
369                                 http.Error(w, "invalid digest provided", http.StatusBadGateway)
370                                 return false
371                         }
372                         var err error
373                         digest, err = hex.DecodeString(digestInfo[1])
374                         if err != nil {
375                                 log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest")
376                                 http.Error(w, err.Error(), http.StatusBadGateway)
377                                 return false
378                         }
379                         hashAlgo = digestInfo[0]
380                         var hashSize int
381                         switch hashAlgo {
382                         case HashAlgoBLAKE2b256:
383                                 hasherNew = blake2b256New
384                                 hashSize = blake2b.Size256
385                         case HashAlgoSHA256:
386                                 hasherNew = sha256.New
387                                 hashSize = sha256.Size
388                         case HashAlgoSHA512:
389                                 hasherNew = sha512.New
390                                 hashSize = sha512.Size
391                         case HashAlgoMD5:
392                                 hasherNew = md5.New
393                                 hashSize = md5.Size
394                         default:
395                                 log.Println(
396                                         "error", r.RemoteAddr, "pypi",
397                                         filename, "unknown digest", hashAlgo,
398                                 )
399                                 http.Error(w, "unknown digest algorithm", http.StatusBadGateway)
400                                 return false
401                         }
402                         if len(digest) != hashSize {
403                                 log.Println(
404                                         "error", r.RemoteAddr, "pypi",
405                                         filename, "invalid digest length")
406                                 http.Error(w, "invalid digest length", http.StatusBadGateway)
407                                 return false
408                         }
409                 }
410
411                 pkgURL.Fragment = ""
412                 if pkgURL.Host == "" {
413                         uri = PyPIURLParsed.ResolveReference(pkgURL).String()
414                 } else {
415                         uri = pkgURL.String()
416                 }
417                 mtime, mtimeExists := mtimes[filename]
418                 if !mtimeExists {
419                         mtime = now
420                 }
421
422                 path := filepath.Join(dirPath, filename)
423                 if filename == filenameGet {
424                         if Killed {
425                                 // Skip heavy remote call, when shutting down
426                                 http.Error(w, "shutting down", http.StatusInternalServerError)
427                                 return false
428                         }
429                         log.Println(r.RemoteAddr, "pypi", filename, "download")
430                         resp, err = c.Do(agentedReq(uri))
431                         if err != nil {
432                                 log.Println("error", r.RemoteAddr, "pypi", filename, "download", err)
433                                 http.Error(w, err.Error(), http.StatusBadGateway)
434                                 return false
435                         }
436                         defer resp.Body.Close()
437                         if resp.StatusCode != http.StatusOK {
438                                 log.Println(
439                                         "error", r.RemoteAddr,
440                                         "pypi", filename, "download",
441                                         "HTTP status:", resp.Status,
442                                 )
443                                 http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
444                                 return false
445                         }
446                         hasher := hasherNew()
447                         hasherBLAKE2b256 := blake2b256New()
448                         hasherSHA256 := sha256.New()
449                         dst, err := TempFile(dirPath)
450                         if err != nil {
451                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
452                                 http.Error(w, err.Error(), http.StatusInternalServerError)
453                                 return false
454                         }
455                         dstBuf := bufio.NewWriter(dst)
456                         wrs := []io.Writer{hasher, dstBuf}
457                         if hashAlgo != HashAlgoBLAKE2b256 {
458                                 wrs = append(wrs, hasherBLAKE2b256)
459                         }
460                         if hashAlgo != HashAlgoSHA256 {
461                                 wrs = append(wrs, hasherSHA256)
462                         }
463                         wr := io.MultiWriter(wrs...)
464                         if _, err = io.Copy(wr, resp.Body); err != nil {
465                                 os.Remove(dst.Name())
466                                 dst.Close()
467                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
468                                 http.Error(w, err.Error(), http.StatusInternalServerError)
469                                 return false
470                         }
471                         if err = dstBuf.Flush(); err != nil {
472                                 os.Remove(dst.Name())
473                                 dst.Close()
474                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
475                                 http.Error(w, err.Error(), http.StatusInternalServerError)
476                                 return false
477                         }
478                         if !bytes.Equal(hasher.Sum(nil), digest) {
479                                 log.Println(r.RemoteAddr, "pypi", filename, "digest mismatch")
480                                 os.Remove(dst.Name())
481                                 dst.Close()
482                                 http.Error(w, "digest mismatch", http.StatusBadGateway)
483                                 return false
484                         }
485                         if digestStored, err := os.ReadFile(path + "." + hashAlgo); err == nil &&
486                                 !bytes.Equal(digest, digestStored) {
487                                 err = errors.New("stored digest mismatch")
488                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
489                                 os.Remove(dst.Name())
490                                 dst.Close()
491                                 http.Error(w, err.Error(), http.StatusInternalServerError)
492                                 return false
493                         }
494                         if !NoSync {
495                                 if err = dst.Sync(); err != nil {
496                                         os.Remove(dst.Name())
497                                         dst.Close()
498                                         log.Println("error", r.RemoteAddr, "pypi", filename, err)
499                                         http.Error(w, err.Error(), http.StatusInternalServerError)
500                                         return false
501                                 }
502                         }
503                         if err = dst.Close(); err != nil {
504                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
505                                 http.Error(w, err.Error(), http.StatusInternalServerError)
506                                 return false
507                         }
508                         if err = os.Chtimes(dst.Name(), mtime, mtime); err != nil {
509                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
510                                 http.Error(w, err.Error(), http.StatusInternalServerError)
511                         }
512                         if err = os.Rename(dst.Name(), path); err != nil {
513                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
514                                 http.Error(w, err.Error(), http.StatusInternalServerError)
515                                 return false
516                         }
517                         if err = DirSync(dirPath); err != nil {
518                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
519                                 http.Error(w, err.Error(), http.StatusInternalServerError)
520                                 return false
521                         }
522
523                         var digestBLAKE2b256 []byte
524                         var digestSHA256 []byte
525                         if hashAlgo == HashAlgoBLAKE2b256 {
526                                 digestBLAKE2b256 = hasher.Sum(nil)
527                         } else {
528                                 digestBLAKE2b256 = hasherBLAKE2b256.Sum(nil)
529                         }
530                         if hashAlgo == HashAlgoSHA256 {
531                                 digestSHA256 = hasher.Sum(nil)
532                         } else {
533                                 digestSHA256 = hasherSHA256.Sum(nil)
534                         }
535                         if err = WriteFileSync(
536                                 dirPath, path+"."+HashAlgoBLAKE2b256,
537                                 digestBLAKE2b256, mtime,
538                         ); err != nil {
539                                 log.Println(
540                                         "error", r.RemoteAddr, "pypi",
541                                         path+"."+HashAlgoBLAKE2b256, err,
542                                 )
543                                 http.Error(w, err.Error(), http.StatusInternalServerError)
544                                 return false
545                         }
546                         if err = WriteFileSync(
547                                 dirPath, path+"."+HashAlgoSHA256,
548                                 digestSHA256, mtime,
549                         ); err != nil {
550                                 log.Println(
551                                         "error", r.RemoteAddr, "pypi",
552                                         path+"."+HashAlgoSHA256, err,
553                                 )
554                                 http.Error(w, err.Error(), http.StatusInternalServerError)
555                                 return false
556                         }
557                         for _, algo := range KnownHashAlgos[2:] {
558                                 os.Remove(path + "." + algo)
559                         }
560                         digest = nil
561                 }
562                 if mtimeExists {
563                         stat, err := os.Stat(path)
564                         if err == nil && !stat.ModTime().Truncate(time.Second).Equal(mtime) {
565                                 log.Println(r.RemoteAddr, "pypi", filename, "touch")
566                                 if err = os.Chtimes(path, mtime, mtime); err != nil {
567                                         log.Println("error", r.RemoteAddr, "pypi", filename, err)
568                                         http.Error(w, err.Error(), http.StatusInternalServerError)
569                                 }
570                         }
571                 }
572
573                 if digest == nil {
574                         continue
575                 }
576                 path = path + "." + hashAlgo
577                 stat, err := os.Stat(path)
578                 if err == nil && (!mtimeExists ||
579                         (mtimeExists && stat.ModTime().Truncate(time.Second).Equal(mtime))) {
580                         continue
581                 }
582                 if err != nil && !os.IsNotExist(err) {
583                         log.Println("error", r.RemoteAddr, "pypi", path, err)
584                         http.Error(w, err.Error(), http.StatusInternalServerError)
585                         return false
586                 }
587                 log.Println(r.RemoteAddr, "pypi", filename, "touch")
588                 if err = WriteFileSync(dirPath, path, digest, mtime); err != nil {
589                         log.Println("error", r.RemoteAddr, "pypi", path, err)
590                         http.Error(w, err.Error(), http.StatusInternalServerError)
591                         return false
592                 }
593         }
594         return true
595 }