]> Cypherpunks.ru repositories - gocheese.git/blob - refresh.go
Store blake2b_256 during refresh
[gocheese.git] / refresh.go
1 /*
2 GoCheese -- Python private package repository and caching proxy
3 Copyright (C) 2019-2023 Sergey Matveev <stargrave@stargrave.org>
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, version 3 of the License.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 package main
19
20 import (
21         "bufio"
22         "bytes"
23         "crypto/md5"
24         "crypto/sha256"
25         "crypto/sha512"
26         "encoding/hex"
27         "encoding/json"
28         "errors"
29         "hash"
30         "io"
31         "log"
32         "net/http"
33         "net/url"
34         "os"
35         "path/filepath"
36         "regexp"
37         "strings"
38         "time"
39
40         "go.cypherpunks.ru/recfile"
41         "golang.org/x/crypto/blake2b"
42 )
43
44 const (
45         HashAlgoBLAKE2b256 = "blake2b_256"
46         HashAlgoSHA256     = "sha256"
47         HashAlgoSHA512     = "sha512"
48         HashAlgoMD5        = "md5"
49         InternalFlag       = ".internal"
50 )
51
52 var (
53         PkgPyPI           = regexp.MustCompile(`^.*<a href="([^"]+)"[^>]*>(.+)</a>.*$`)
54         PyPIURLParsed     *url.URL
55         PyPIHTTPTransport http.Transport
56         KnownHashAlgos    []string = []string{
57                 HashAlgoBLAKE2b256,
58                 HashAlgoSHA256,
59                 HashAlgoSHA512,
60                 HashAlgoMD5,
61         }
62 )
63
64 func blake2b256New() hash.Hash {
65         h, err := blake2b.New256(nil)
66         if err != nil {
67                 panic(err)
68         }
69         return h
70 }
71
72 func agentedReq(url string) *http.Request {
73         req, err := http.NewRequest("GET", url, nil)
74         if err != nil {
75                 log.Fatalln(err)
76         }
77         req.Header.Set("User-Agent", UserAgent)
78         return req
79 }
80
81 type RecFieldToValuesMap struct {
82         recField   string
83         jsonFields []string
84 }
85
86 func refreshDir(
87         w http.ResponseWriter,
88         r *http.Request,
89         pkgName, filenameGet string,
90 ) bool {
91         if _, err := os.Stat(filepath.Join(Root, pkgName, InternalFlag)); err == nil {
92                 return true
93         }
94         c := http.Client{Transport: &PyPIHTTPTransport}
95         dirPath := filepath.Join(Root, pkgName)
96         now := time.Now()
97
98         var allReleases map[string][]*PkgReleaseInfo
99         if *JSONURL != "" {
100                 resp, err := c.Do(agentedReq(*JSONURL + pkgName + "/json"))
101                 if err != nil {
102                         log.Println("error", r.RemoteAddr, "refresh-json", pkgName, err)
103                         http.Error(w, err.Error(), http.StatusBadGateway)
104                         return false
105                 }
106                 if resp.StatusCode != http.StatusOK {
107                         resp.Body.Close()
108                         log.Println(
109                                 "error", r.RemoteAddr, "refresh-json", pkgName,
110                                 "HTTP status:", resp.Status,
111                         )
112                         http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
113                         return false
114                 }
115                 body, err := io.ReadAll(resp.Body)
116                 if err != nil {
117                         resp.Body.Close()
118                         log.Println("error", r.RemoteAddr, "refresh-json", pkgName, err)
119                         http.Error(w, "can not read body", http.StatusBadGateway)
120                         return false
121                 }
122                 resp.Body.Close()
123                 var buf bytes.Buffer
124                 var description string
125                 wr := recfile.NewWriter(&buf)
126                 var meta PkgMeta
127                 err = json.Unmarshal(body, &meta)
128                 if err == nil {
129                         for _, m := range [][2]string{
130                                 {MDFieldName, meta.Info.Name},
131                                 {MDFieldVersion, meta.Info.Version},
132                                 {MDFieldSummary, meta.Info.Summary},
133                                 {MDFieldDescriptionContentType, meta.Info.DescriptionContentType},
134                                 {MDFieldKeywords, meta.Info.Keywords},
135                                 {MDFieldHomePage, meta.Info.HomePage},
136                                 {MDFieldAuthor, meta.Info.Author},
137                                 {MDFieldAuthorEmail, meta.Info.AuthorEmail},
138                                 {MDFieldMaintainer, meta.Info.Maintainer},
139                                 {MDFieldMaintainerEmail, meta.Info.MaintainerEmail},
140                                 {MDFieldLicense, meta.Info.License},
141                                 {MDFieldRequiresPython, meta.Info.RequiresPython},
142                         } {
143                                 recField, jsonField := m[0], m[1]
144                                 if jsonField == "" {
145                                         continue
146                                 }
147                                 if _, err = wr.WriteFields(recfile.Field{
148                                         Name:  MDFieldToRecField[recField],
149                                         Value: jsonField,
150                                 }); err != nil {
151                                         log.Fatalln(err)
152                                 }
153                         }
154                         for _, m := range []RecFieldToValuesMap{
155                                 {MDFieldClassifier, meta.Info.Classifier},
156                                 {MDFieldPlatform, meta.Info.Platform},
157                                 {MDFieldSupportedPlatform, meta.Info.SupportedPlatform},
158                                 {MDFieldRequiresDist, meta.Info.RequiresDist},
159                                 {MDFieldRequiresExternal, meta.Info.RequiresExternal},
160                                 {MDFieldProjectURL, meta.Info.ProjectURL},
161                                 {MDFieldProvidesExtra, meta.Info.ProvidesExtra},
162                         } {
163                                 for _, v := range m.jsonFields {
164                                         if _, err = wr.WriteFields(recfile.Field{
165                                                 Name:  MDFieldToRecField[m.recField],
166                                                 Value: v,
167                                         }); err != nil {
168                                                 log.Fatalln(err)
169                                         }
170                                 }
171                         }
172                         description = meta.Info.Description
173                         allReleases = meta.Releases
174                 } else {
175                         var metaStripped PkgMetaStripped
176                         err = json.Unmarshal(body, &metaStripped)
177                         if err != nil {
178                                 log.Println(
179                                         "error", r.RemoteAddr, "refresh-json", pkgName,
180                                         "can not parse JSON:", err,
181                                 )
182                                 http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
183                                 return false
184                         }
185                         for _, m := range [][2]string{
186                                 {MDFieldName, metaStripped.Info.Name},
187                                 {MDFieldVersion, metaStripped.Info.Version},
188                                 {MDFieldSummary, metaStripped.Info.Summary},
189                                 {MDFieldDescriptionContentType, metaStripped.Info.DescriptionContentType},
190                                 {MDFieldKeywords, metaStripped.Info.Keywords},
191                                 {MDFieldHomePage, metaStripped.Info.HomePage},
192                                 {MDFieldAuthor, metaStripped.Info.Author},
193                                 {MDFieldAuthorEmail, metaStripped.Info.AuthorEmail},
194                                 {MDFieldMaintainer, metaStripped.Info.Maintainer},
195                                 {MDFieldMaintainerEmail, metaStripped.Info.MaintainerEmail},
196                                 {MDFieldLicense, metaStripped.Info.License},
197                                 {MDFieldRequiresPython, metaStripped.Info.RequiresPython},
198                         } {
199                                 recField, jsonField := m[0], m[1]
200                                 if jsonField == "" {
201                                         continue
202                                 }
203                                 if _, err = wr.WriteFields(recfile.Field{
204                                         Name:  MDFieldToRecField[recField],
205                                         Value: jsonField,
206                                 }); err != nil {
207                                         log.Fatalln(err)
208                                 }
209                         }
210
211                         for _, m := range []RecFieldToValuesMap{
212                                 {MDFieldClassifier, metaStripped.Info.Classifier},
213                                 {MDFieldRequiresDist, metaStripped.Info.RequiresDist},
214                         } {
215                                 for _, v := range m.jsonFields {
216                                         if _, err = wr.WriteFields(recfile.Field{
217                                                 Name:  MDFieldToRecField[m.recField],
218                                                 Value: v,
219                                         }); err != nil {
220                                                 log.Fatalln(err)
221                                         }
222                                 }
223                         }
224                         description = metaStripped.Info.Description
225                         allReleases = metaStripped.Releases
226                 }
227                 lines := strings.Split(description, "\n")
228                 if len(lines) > 0 {
229                         if _, err = wr.WriteFieldMultiline(
230                                 MDFieldDescription, lines,
231                         ); err != nil {
232                                 log.Fatalln(err)
233                         }
234                 }
235
236                 if !mkdirForPkg(w, r, pkgName) {
237                         return false
238                 }
239                 path := filepath.Join(dirPath, MDFile)
240                 existing, err := os.ReadFile(path)
241                 if err != nil || !bytes.Equal(existing, buf.Bytes()) {
242                         if err = WriteFileSync(dirPath, path, buf.Bytes(), now); err != nil {
243                                 log.Println("error", r.RemoteAddr, "refresh-json", path, err)
244                                 http.Error(w, err.Error(), http.StatusInternalServerError)
245                                 return false
246                         }
247                         log.Println(r.RemoteAddr, "pypi", pkgName+"/"+MDFile, "touch")
248                 }
249         }
250         mtimes := make(map[string]time.Time)
251         digestsBLAKE2b256 := make(map[string][]byte)
252         digestsSHA256 := make(map[string][]byte)
253         digestsSHA512 := make(map[string][]byte)
254         for _, releases := range allReleases {
255                 for _, rel := range releases {
256                         if rel.Filename == "" || rel.UploadTimeISO8601 == "" {
257                                 continue
258                         }
259                         t, err := time.Parse(time.RFC3339Nano, rel.UploadTimeISO8601)
260                         if err != nil {
261                                 log.Println(
262                                         "error", r.RemoteAddr, "refresh-json", pkgName,
263                                         "can not parse upload_time:", err,
264                                 )
265                                 http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
266                                 return false
267                         }
268                         mtimes[rel.Filename] = t.Truncate(time.Second)
269                         if d := rel.Digests[HashAlgoBLAKE2b256]; d != "" {
270                                 digestsBLAKE2b256[rel.Filename], err = hex.DecodeString(d)
271                                 if err != nil {
272                                         log.Println(
273                                                 "error", r.RemoteAddr, "refresh-json", pkgName,
274                                                 "can not decode blake2b_256 digest:", err,
275                                         )
276                                         http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
277                                         return false
278                                 }
279                         }
280                         if d := rel.Digests[HashAlgoSHA256]; d != "" {
281                                 digestsSHA256[rel.Filename], err = hex.DecodeString(d)
282                                 if err != nil {
283                                         log.Println(
284                                                 "error", r.RemoteAddr, "refresh-json", pkgName,
285                                                 "can not decode sha256 digest:", err,
286                                         )
287                                         http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
288                                         return false
289                                 }
290                         }
291                         if d := rel.Digests[HashAlgoSHA512]; d != "" {
292                                 digestsSHA512[rel.Filename], err = hex.DecodeString(d)
293                                 if err != nil {
294                                         log.Println(
295                                                 "error", r.RemoteAddr, "refresh-json", pkgName,
296                                                 "can not decode sha512 digest:", err,
297                                         )
298                                         http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
299                                         return false
300                                 }
301                         }
302                 }
303         }
304
305         resp, err := c.Do(agentedReq(*PyPIURL + pkgName + "/"))
306         if err != nil {
307                 log.Println("error", r.RemoteAddr, "refresh", pkgName, err)
308                 http.Error(w, err.Error(), http.StatusBadGateway)
309                 return false
310         }
311         if resp.StatusCode != http.StatusOK {
312                 resp.Body.Close()
313                 log.Println(
314                         "error", r.RemoteAddr, "refresh", pkgName,
315                         "HTTP status:", resp.Status,
316                 )
317                 http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
318                 return false
319         }
320         body, err := io.ReadAll(resp.Body)
321         resp.Body.Close()
322         if err != nil {
323                 log.Println("error", r.RemoteAddr, "refresh", pkgName, err)
324                 http.Error(w, err.Error(), http.StatusBadGateway)
325                 return false
326         }
327         if !mkdirForPkg(w, r, pkgName) {
328                 return false
329         }
330         for _, lineRaw := range bytes.Split(body, []byte("\n")) {
331                 submatches := PkgPyPI.FindStringSubmatch(string(lineRaw))
332                 if len(submatches) == 0 {
333                         continue
334                 }
335                 uri := submatches[1]
336                 filename := submatches[2]
337                 pkgURL, err := url.Parse(uri)
338                 if err != nil {
339                         log.Println("error", r.RemoteAddr, "refresh", uri, err)
340                         http.Error(w, err.Error(), http.StatusBadGateway)
341                         return false
342                 }
343
344                 var hashAlgo string
345                 var hasherNew func() hash.Hash
346                 var digest []byte
347                 if d := digestsBLAKE2b256[filename]; d != nil {
348                         hasherNew = blake2b256New
349                         hashAlgo = HashAlgoBLAKE2b256
350                         digest = d
351                 } else if d := digestsSHA256[filename]; d != nil {
352                         hasherNew = sha256.New
353                         hashAlgo = HashAlgoSHA256
354                         digest = d
355                 } else if d := digestsSHA512[filename]; d != nil {
356                         hasherNew = sha512.New
357                         hashAlgo = HashAlgoSHA512
358                         digest = d
359                 } else {
360                         if pkgURL.Fragment == "" {
361                                 log.Println(r.RemoteAddr, "pypi", filename, "no digest")
362                                 http.Error(w, "no digest provided", http.StatusBadGateway)
363                                 return false
364                         }
365                         digestInfo := strings.Split(pkgURL.Fragment, "=")
366                         if len(digestInfo) == 1 {
367                                 // Ancient non PEP-0503 PyPIs, assume MD5
368                                 digestInfo = []string{"md5", digestInfo[0]}
369                         } else if len(digestInfo) != 2 {
370                                 log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest")
371                                 http.Error(w, "invalid digest provided", http.StatusBadGateway)
372                                 return false
373                         }
374                         var err error
375                         digest, err = hex.DecodeString(digestInfo[1])
376                         if err != nil {
377                                 log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest")
378                                 http.Error(w, err.Error(), http.StatusBadGateway)
379                                 return false
380                         }
381                         hashAlgo = digestInfo[0]
382                         var hashSize int
383                         switch hashAlgo {
384                         case HashAlgoBLAKE2b256:
385                                 hasherNew = blake2b256New
386                                 hashSize = blake2b.Size256
387                         case HashAlgoSHA256:
388                                 hasherNew = sha256.New
389                                 hashSize = sha256.Size
390                         case HashAlgoSHA512:
391                                 hasherNew = sha512.New
392                                 hashSize = sha512.Size
393                         case HashAlgoMD5:
394                                 hasherNew = md5.New
395                                 hashSize = md5.Size
396                         default:
397                                 log.Println(
398                                         "error", r.RemoteAddr, "pypi",
399                                         filename, "unknown digest", hashAlgo,
400                                 )
401                                 http.Error(w, "unknown digest algorithm", http.StatusBadGateway)
402                                 return false
403                         }
404                         if len(digest) != hashSize {
405                                 log.Println(
406                                         "error", r.RemoteAddr, "pypi",
407                                         filename, "invalid digest length")
408                                 http.Error(w, "invalid digest length", http.StatusBadGateway)
409                                 return false
410                         }
411                 }
412
413                 pkgURL.Fragment = ""
414                 if pkgURL.Host == "" {
415                         uri = PyPIURLParsed.ResolveReference(pkgURL).String()
416                 } else {
417                         uri = pkgURL.String()
418                 }
419                 mtime, mtimeExists := mtimes[filename]
420                 if !mtimeExists {
421                         mtime = now
422                 }
423
424                 path := filepath.Join(dirPath, filename)
425                 if filename == filenameGet {
426                         if Killed {
427                                 // Skip heavy remote call, when shutting down
428                                 http.Error(w, "shutting down", http.StatusInternalServerError)
429                                 return false
430                         }
431                         log.Println(r.RemoteAddr, "pypi", filename, "download")
432                         resp, err = c.Do(agentedReq(uri))
433                         if err != nil {
434                                 log.Println("error", r.RemoteAddr, "pypi", filename, "download", err)
435                                 http.Error(w, err.Error(), http.StatusBadGateway)
436                                 return false
437                         }
438                         defer resp.Body.Close()
439                         if resp.StatusCode != http.StatusOK {
440                                 log.Println(
441                                         "error", r.RemoteAddr,
442                                         "pypi", filename, "download",
443                                         "HTTP status:", resp.Status,
444                                 )
445                                 http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
446                                 return false
447                         }
448                         hasher := hasherNew()
449                         hasherBLAKE2b256 := blake2b256New()
450                         hasherSHA256 := sha256.New()
451                         dst, err := TempFile(dirPath)
452                         if err != nil {
453                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
454                                 http.Error(w, err.Error(), http.StatusInternalServerError)
455                                 return false
456                         }
457                         dstBuf := bufio.NewWriter(dst)
458                         wrs := []io.Writer{hasher, dstBuf}
459                         if hashAlgo != HashAlgoBLAKE2b256 {
460                                 wrs = append(wrs, hasherBLAKE2b256)
461                         }
462                         if hashAlgo != HashAlgoSHA256 {
463                                 wrs = append(wrs, hasherSHA256)
464                         }
465                         wr := io.MultiWriter(wrs...)
466                         if _, err = io.Copy(wr, resp.Body); err != nil {
467                                 os.Remove(dst.Name())
468                                 dst.Close()
469                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
470                                 http.Error(w, err.Error(), http.StatusInternalServerError)
471                                 return false
472                         }
473                         if err = dstBuf.Flush(); err != nil {
474                                 os.Remove(dst.Name())
475                                 dst.Close()
476                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
477                                 http.Error(w, err.Error(), http.StatusInternalServerError)
478                                 return false
479                         }
480                         if !bytes.Equal(hasher.Sum(nil), digest) {
481                                 log.Println(r.RemoteAddr, "pypi", filename, "digest mismatch")
482                                 os.Remove(dst.Name())
483                                 dst.Close()
484                                 http.Error(w, "digest mismatch", http.StatusBadGateway)
485                                 return false
486                         }
487                         if digestStored, err := os.ReadFile(path + "." + hashAlgo); err == nil &&
488                                 !bytes.Equal(digest, digestStored) {
489                                 err = errors.New("stored digest mismatch")
490                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
491                                 os.Remove(dst.Name())
492                                 dst.Close()
493                                 http.Error(w, err.Error(), http.StatusInternalServerError)
494                                 return false
495                         }
496                         if !NoSync {
497                                 if err = dst.Sync(); err != nil {
498                                         os.Remove(dst.Name())
499                                         dst.Close()
500                                         log.Println("error", r.RemoteAddr, "pypi", filename, err)
501                                         http.Error(w, err.Error(), http.StatusInternalServerError)
502                                         return false
503                                 }
504                         }
505                         if err = dst.Close(); err != nil {
506                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
507                                 http.Error(w, err.Error(), http.StatusInternalServerError)
508                                 return false
509                         }
510                         if err = os.Chtimes(dst.Name(), mtime, mtime); err != nil {
511                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
512                                 http.Error(w, err.Error(), http.StatusInternalServerError)
513                         }
514                         if err = os.Rename(dst.Name(), path); err != nil {
515                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
516                                 http.Error(w, err.Error(), http.StatusInternalServerError)
517                                 return false
518                         }
519                         if err = DirSync(dirPath); err != nil {
520                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
521                                 http.Error(w, err.Error(), http.StatusInternalServerError)
522                                 return false
523                         }
524
525                         var digestBLAKE2b256 []byte
526                         var digestSHA256 []byte
527                         if hashAlgo == HashAlgoBLAKE2b256 {
528                                 digestBLAKE2b256 = hasher.Sum(nil)
529                         } else {
530                                 digestBLAKE2b256 = hasherBLAKE2b256.Sum(nil)
531                         }
532                         if hashAlgo == HashAlgoSHA256 {
533                                 digestSHA256 = hasher.Sum(nil)
534                         } else {
535                                 digestSHA256 = hasherSHA256.Sum(nil)
536                         }
537                         if err = WriteFileSync(
538                                 dirPath, path+"."+HashAlgoBLAKE2b256,
539                                 digestBLAKE2b256, mtime,
540                         ); err != nil {
541                                 log.Println(
542                                         "error", r.RemoteAddr, "pypi",
543                                         path+"."+HashAlgoBLAKE2b256, err,
544                                 )
545                                 http.Error(w, err.Error(), http.StatusInternalServerError)
546                                 return false
547                         }
548                         if err = WriteFileSync(
549                                 dirPath, path+"."+HashAlgoSHA256,
550                                 digestSHA256, mtime,
551                         ); err != nil {
552                                 log.Println(
553                                         "error", r.RemoteAddr, "pypi",
554                                         path+"."+HashAlgoSHA256, err,
555                                 )
556                                 http.Error(w, err.Error(), http.StatusInternalServerError)
557                                 return false
558                         }
559                         for _, algo := range KnownHashAlgos[2:] {
560                                 os.Remove(path + "." + algo)
561                         }
562                         digest = nil
563                 }
564                 if mtimeExists {
565                         stat, err := os.Stat(path)
566                         if err == nil && !stat.ModTime().Truncate(time.Second).Equal(mtime) {
567                                 log.Println(r.RemoteAddr, "pypi", filename, "touch")
568                                 if err = os.Chtimes(path, mtime, mtime); err != nil {
569                                         log.Println("error", r.RemoteAddr, "pypi", filename, err)
570                                         http.Error(w, err.Error(), http.StatusInternalServerError)
571                                 }
572                         }
573                 }
574
575                 if digest == nil {
576                         continue
577                 }
578                 path = path + "." + hashAlgo
579                 stat, err := os.Stat(path)
580                 if err == nil && (!mtimeExists ||
581                         (mtimeExists && stat.ModTime().Truncate(time.Second).Equal(mtime))) {
582                         continue
583                 }
584                 if err != nil && !os.IsNotExist(err) {
585                         log.Println("error", r.RemoteAddr, "pypi", path, err)
586                         http.Error(w, err.Error(), http.StatusInternalServerError)
587                         return false
588                 }
589                 log.Println(r.RemoteAddr, "pypi", filename, "touch")
590                 if err = WriteFileSync(dirPath, path, digest, mtime); err != nil {
591                         log.Println("error", r.RemoteAddr, "pypi", path, err)
592                         http.Error(w, err.Error(), http.StatusInternalServerError)
593                         return false
594                 }
595         }
596         return true
597 }