]> Cypherpunks.ru repositories - gocheese.git/blob - refresh.go
More convenient trusted-host
[gocheese.git] / refresh.go
1 /*
2 GoCheese -- Python private package repository and caching proxy
3 Copyright (C) 2019-2022 Sergey Matveev <stargrave@stargrave.org>
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, version 3 of the License.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 package main
19
20 import (
21         "bufio"
22         "bytes"
23         "crypto/md5"
24         "crypto/sha256"
25         "crypto/sha512"
26         "encoding/hex"
27         "encoding/json"
28         "errors"
29         "hash"
30         "io"
31         "log"
32         "net/http"
33         "net/url"
34         "os"
35         "path/filepath"
36         "regexp"
37         "strings"
38         "time"
39
40         "go.cypherpunks.ru/recfile"
41         "golang.org/x/crypto/blake2b"
42 )
43
44 const (
45         HashAlgoSHA256     = "sha256"
46         HashAlgoBLAKE2b256 = "blake2_256"
47         HashAlgoSHA512     = "sha512"
48         HashAlgoMD5        = "md5"
49         GPGSigExt          = ".asc"
50         InternalFlag       = ".internal"
51 )
52
53 var (
54         PkgPyPI           = regexp.MustCompile(`^.*<a href="([^"]+)"[^>]*>(.+)</a>.*$`)
55         PyPIURLParsed     *url.URL
56         PyPIHTTPTransport http.Transport
57         KnownHashAlgos    []string = []string{
58                 HashAlgoSHA256,
59                 HashAlgoBLAKE2b256,
60                 HashAlgoSHA512,
61                 HashAlgoMD5,
62         }
63 )
64
65 func blake2b256New() hash.Hash {
66         h, err := blake2b.New256(nil)
67         if err != nil {
68                 panic(err)
69         }
70         return h
71 }
72
73 func agentedReq(url string) *http.Request {
74         req, err := http.NewRequest("GET", url, nil)
75         if err != nil {
76                 log.Fatalln(err)
77         }
78         req.Header.Set("User-Agent", UserAgent)
79         return req
80 }
81
82 type RecFieldToValuesMap struct {
83         recField   string
84         jsonFields []string
85 }
86
87 func refreshDir(
88         w http.ResponseWriter,
89         r *http.Request,
90         pkgName, filenameGet string,
91         gpgUpdate bool,
92 ) bool {
93         if _, err := os.Stat(filepath.Join(Root, pkgName, InternalFlag)); err == nil {
94                 return true
95         }
96         c := http.Client{Transport: &PyPIHTTPTransport}
97         dirPath := filepath.Join(Root, pkgName)
98         now := time.Now()
99
100         var allReleases map[string][]*PkgReleaseInfo
101         if *JSONURL != "" {
102                 resp, err := c.Do(agentedReq(*JSONURL + pkgName + "/json"))
103                 if err != nil {
104                         log.Println("error", r.RemoteAddr, "refresh-json", pkgName, err)
105                         http.Error(w, err.Error(), http.StatusBadGateway)
106                         return false
107                 }
108                 if resp.StatusCode != http.StatusOK {
109                         resp.Body.Close()
110                         log.Println(
111                                 "error", r.RemoteAddr, "refresh-json", pkgName,
112                                 "HTTP status:", resp.Status,
113                         )
114                         http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
115                         return false
116                 }
117                 body, err := io.ReadAll(resp.Body)
118                 if err != nil {
119                         resp.Body.Close()
120                         log.Println("error", r.RemoteAddr, "refresh-json", pkgName, err)
121                         http.Error(w, "can not read body", http.StatusBadGateway)
122                         return false
123                 }
124                 resp.Body.Close()
125                 var buf bytes.Buffer
126                 var description string
127                 wr := recfile.NewWriter(&buf)
128                 var meta PkgMeta
129                 err = json.Unmarshal(body, &meta)
130                 if err == nil {
131                         for _, m := range [][2]string{
132                                 {MDFieldName, meta.Info.Name},
133                                 {MDFieldVersion, meta.Info.Version},
134                                 {MDFieldSummary, meta.Info.Summary},
135                                 {MDFieldDescriptionContentType, meta.Info.DescriptionContentType},
136                                 {MDFieldKeywords, meta.Info.Keywords},
137                                 {MDFieldHomePage, meta.Info.HomePage},
138                                 {MDFieldAuthor, meta.Info.Author},
139                                 {MDFieldAuthorEmail, meta.Info.AuthorEmail},
140                                 {MDFieldMaintainer, meta.Info.Maintainer},
141                                 {MDFieldMaintainerEmail, meta.Info.MaintainerEmail},
142                                 {MDFieldLicense, meta.Info.License},
143                                 {MDFieldRequiresPython, meta.Info.RequiresPython},
144                         } {
145                                 recField, jsonField := m[0], m[1]
146                                 if jsonField == "" {
147                                         continue
148                                 }
149                                 if _, err = wr.WriteFields(recfile.Field{
150                                         Name:  MDFieldToRecField[recField],
151                                         Value: jsonField,
152                                 }); err != nil {
153                                         log.Fatalln(err)
154                                 }
155                         }
156                         for _, m := range []RecFieldToValuesMap{
157                                 {MDFieldClassifier, meta.Info.Classifier},
158                                 {MDFieldPlatform, meta.Info.Platform},
159                                 {MDFieldSupportedPlatform, meta.Info.SupportedPlatform},
160                                 {MDFieldRequiresDist, meta.Info.RequiresDist},
161                                 {MDFieldRequiresExternal, meta.Info.RequiresExternal},
162                                 {MDFieldProjectURL, meta.Info.ProjectURL},
163                                 {MDFieldProvidesExtra, meta.Info.ProvidesExtra},
164                         } {
165                                 for _, v := range m.jsonFields {
166                                         if _, err = wr.WriteFields(recfile.Field{
167                                                 Name:  MDFieldToRecField[m.recField],
168                                                 Value: v,
169                                         }); err != nil {
170                                                 log.Fatalln(err)
171                                         }
172                                 }
173                         }
174                         description = meta.Info.Description
175                         allReleases = meta.Releases
176                 } else {
177                         var metaStripped PkgMetaStripped
178                         err = json.Unmarshal(body, &metaStripped)
179                         if err != nil {
180                                 log.Println(
181                                         "error", r.RemoteAddr, "refresh-json", pkgName,
182                                         "can not parse JSON:", err,
183                                 )
184                                 http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
185                                 return false
186                         }
187                         for _, m := range [][2]string{
188                                 {MDFieldName, metaStripped.Info.Name},
189                                 {MDFieldVersion, metaStripped.Info.Version},
190                                 {MDFieldSummary, metaStripped.Info.Summary},
191                                 {MDFieldDescriptionContentType, metaStripped.Info.DescriptionContentType},
192                                 {MDFieldKeywords, metaStripped.Info.Keywords},
193                                 {MDFieldHomePage, metaStripped.Info.HomePage},
194                                 {MDFieldAuthor, metaStripped.Info.Author},
195                                 {MDFieldAuthorEmail, metaStripped.Info.AuthorEmail},
196                                 {MDFieldMaintainer, metaStripped.Info.Maintainer},
197                                 {MDFieldMaintainerEmail, metaStripped.Info.MaintainerEmail},
198                                 {MDFieldLicense, metaStripped.Info.License},
199                                 {MDFieldRequiresPython, metaStripped.Info.RequiresPython},
200                         } {
201                                 recField, jsonField := m[0], m[1]
202                                 if jsonField == "" {
203                                         continue
204                                 }
205                                 if _, err = wr.WriteFields(recfile.Field{
206                                         Name:  MDFieldToRecField[recField],
207                                         Value: jsonField,
208                                 }); err != nil {
209                                         log.Fatalln(err)
210                                 }
211                         }
212
213                         for _, m := range []RecFieldToValuesMap{
214                                 {MDFieldClassifier, metaStripped.Info.Classifier},
215                                 {MDFieldRequiresDist, metaStripped.Info.RequiresDist},
216                         } {
217                                 for _, v := range m.jsonFields {
218                                         if _, err = wr.WriteFields(recfile.Field{
219                                                 Name:  MDFieldToRecField[m.recField],
220                                                 Value: v,
221                                         }); err != nil {
222                                                 log.Fatalln(err)
223                                         }
224                                 }
225                         }
226                         description = metaStripped.Info.Description
227                         allReleases = metaStripped.Releases
228                 }
229                 lines := strings.Split(description, "\n")
230                 if len(lines) > 0 {
231                         if _, err = wr.WriteFieldMultiline(
232                                 MDFieldDescription, lines,
233                         ); err != nil {
234                                 log.Fatalln(err)
235                         }
236                 }
237
238                 if !mkdirForPkg(w, r, pkgName) {
239                         return false
240                 }
241                 path := filepath.Join(dirPath, MDFile)
242                 existing, err := os.ReadFile(path)
243                 if err != nil || bytes.Compare(existing, buf.Bytes()) != 0 {
244                         if err = WriteFileSync(dirPath, path, buf.Bytes(), now); err != nil {
245                                 log.Println("error", r.RemoteAddr, "refresh-json", path, err)
246                                 http.Error(w, err.Error(), http.StatusInternalServerError)
247                                 return false
248                         }
249                         log.Println(r.RemoteAddr, "pypi", pkgName+"/"+MDFile, "touch")
250                 }
251         }
252         mtimes := make(map[string]time.Time)
253         for _, releases := range allReleases {
254                 for _, rel := range releases {
255                         if rel.Filename == "" || rel.UploadTimeISO8601 == "" {
256                                 continue
257                         }
258                         t, err := time.Parse(time.RFC3339Nano, rel.UploadTimeISO8601)
259                         if err != nil {
260                                 log.Println(
261                                         "error", r.RemoteAddr, "refresh-json", pkgName,
262                                         "can not parse upload_time:", err,
263                                 )
264                                 http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
265                                 return false
266                         }
267                         mtimes[rel.Filename] = t.Truncate(time.Second)
268                 }
269         }
270
271         resp, err := c.Do(agentedReq(*PyPIURL + pkgName + "/"))
272         if err != nil {
273                 log.Println("error", r.RemoteAddr, "refresh", pkgName, err)
274                 http.Error(w, err.Error(), http.StatusBadGateway)
275                 return false
276         }
277         if resp.StatusCode != http.StatusOK {
278                 resp.Body.Close()
279                 log.Println(
280                         "error", r.RemoteAddr, "refresh", pkgName,
281                         "HTTP status:", resp.Status,
282                 )
283                 http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
284                 return false
285         }
286         body, err := io.ReadAll(resp.Body)
287         resp.Body.Close()
288         if err != nil {
289                 log.Println("error", r.RemoteAddr, "refresh", pkgName, err)
290                 http.Error(w, err.Error(), http.StatusBadGateway)
291                 return false
292         }
293         if !mkdirForPkg(w, r, pkgName) {
294                 return false
295         }
296         for _, lineRaw := range bytes.Split(body, []byte("\n")) {
297                 submatches := PkgPyPI.FindStringSubmatch(string(lineRaw))
298                 if len(submatches) == 0 {
299                         continue
300                 }
301                 uri := submatches[1]
302                 filename := submatches[2]
303                 pkgURL, err := url.Parse(uri)
304                 if err != nil {
305                         log.Println("error", r.RemoteAddr, "refresh", uri, err)
306                         http.Error(w, err.Error(), http.StatusBadGateway)
307                         return false
308                 }
309
310                 if pkgURL.Fragment == "" {
311                         log.Println(r.RemoteAddr, "pypi", filename, "no digest")
312                         http.Error(w, "no digest provided", http.StatusBadGateway)
313                         return false
314                 }
315                 digestInfo := strings.Split(pkgURL.Fragment, "=")
316                 if len(digestInfo) == 1 {
317                         // Ancient non PEP-0503 PyPIs, assume MD5
318                         digestInfo = []string{"md5", digestInfo[0]}
319                 } else if len(digestInfo) != 2 {
320                         log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest")
321                         http.Error(w, "invalid digest provided", http.StatusBadGateway)
322                         return false
323                 }
324                 digest, err := hex.DecodeString(digestInfo[1])
325                 if err != nil {
326                         log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest")
327                         http.Error(w, err.Error(), http.StatusBadGateway)
328                         return false
329                 }
330                 hashAlgo := digestInfo[0]
331                 var hasherNew func() hash.Hash
332                 var hashSize int
333                 switch hashAlgo {
334                 case HashAlgoMD5:
335                         hasherNew = md5.New
336                         hashSize = md5.Size
337                 case HashAlgoSHA256:
338                         hasherNew = sha256.New
339                         hashSize = sha256.Size
340                 case HashAlgoSHA512:
341                         hasherNew = sha512.New
342                         hashSize = sha512.Size
343                 case HashAlgoBLAKE2b256:
344                         hasherNew = blake2b256New
345                         hashSize = blake2b.Size256
346                 default:
347                         log.Println(
348                                 "error", r.RemoteAddr, "pypi",
349                                 filename, "unknown digest", hashAlgo,
350                         )
351                         http.Error(w, "unknown digest algorithm", http.StatusBadGateway)
352                         return false
353                 }
354                 if len(digest) != hashSize {
355                         log.Println(
356                                 "error", r.RemoteAddr, "pypi",
357                                 filename, "invalid digest length")
358                         http.Error(w, "invalid digest length", http.StatusBadGateway)
359                         return false
360                 }
361
362                 pkgURL.Fragment = ""
363                 if pkgURL.Host == "" {
364                         uri = PyPIURLParsed.ResolveReference(pkgURL).String()
365                 } else {
366                         uri = pkgURL.String()
367                 }
368                 mtime, mtimeExists := mtimes[filename]
369                 if !mtimeExists {
370                         mtime = now
371                 }
372
373                 path := filepath.Join(dirPath, filename)
374                 if filename == filenameGet {
375                         if Killed {
376                                 // Skip heavy remote call, when shutting down
377                                 http.Error(w, "shutting down", http.StatusInternalServerError)
378                                 return false
379                         }
380                         log.Println(r.RemoteAddr, "pypi", filename, "download")
381                         resp, err = c.Do(agentedReq(uri))
382                         if err != nil {
383                                 log.Println("error", r.RemoteAddr, "pypi", filename, "download", err)
384                                 http.Error(w, err.Error(), http.StatusBadGateway)
385                                 return false
386                         }
387                         defer resp.Body.Close()
388                         if resp.StatusCode != http.StatusOK {
389                                 log.Println(
390                                         "error", r.RemoteAddr,
391                                         "pypi", filename, "download",
392                                         "HTTP status:", resp.Status,
393                                 )
394                                 http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
395                                 return false
396                         }
397                         hasher := hasherNew()
398                         hasherSHA256 := sha256.New()
399                         hasherBLAKE2b256 := blake2b256New()
400                         dst, err := TempFile(dirPath)
401                         if err != nil {
402                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
403                                 http.Error(w, err.Error(), http.StatusInternalServerError)
404                                 return false
405                         }
406                         dstBuf := bufio.NewWriter(dst)
407                         wrs := []io.Writer{hasher, dstBuf}
408                         if hashAlgo != HashAlgoSHA256 {
409                                 wrs = append(wrs, hasherSHA256)
410                         }
411                         if hashAlgo != HashAlgoBLAKE2b256 {
412                                 wrs = append(wrs, hasherBLAKE2b256)
413                         }
414                         wr := io.MultiWriter(wrs...)
415                         if _, err = io.Copy(wr, resp.Body); err != nil {
416                                 os.Remove(dst.Name())
417                                 dst.Close()
418                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
419                                 http.Error(w, err.Error(), http.StatusInternalServerError)
420                                 return false
421                         }
422                         if err = dstBuf.Flush(); err != nil {
423                                 os.Remove(dst.Name())
424                                 dst.Close()
425                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
426                                 http.Error(w, err.Error(), http.StatusInternalServerError)
427                                 return false
428                         }
429                         if bytes.Compare(hasher.Sum(nil), digest) != 0 {
430                                 log.Println(r.RemoteAddr, "pypi", filename, "digest mismatch")
431                                 os.Remove(dst.Name())
432                                 dst.Close()
433                                 http.Error(w, "digest mismatch", http.StatusBadGateway)
434                                 return false
435                         }
436                         if digestStored, err := os.ReadFile(path + "." + hashAlgo); err == nil &&
437                                 bytes.Compare(digest, digestStored) != 0 {
438                                 err = errors.New("stored digest mismatch")
439                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
440                                 os.Remove(dst.Name())
441                                 dst.Close()
442                                 http.Error(w, err.Error(), http.StatusInternalServerError)
443                                 return false
444                         }
445                         if !NoSync {
446                                 if err = dst.Sync(); err != nil {
447                                         os.Remove(dst.Name())
448                                         dst.Close()
449                                         log.Println("error", r.RemoteAddr, "pypi", filename, err)
450                                         http.Error(w, err.Error(), http.StatusInternalServerError)
451                                         return false
452                                 }
453                         }
454                         if err = dst.Close(); err != nil {
455                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
456                                 http.Error(w, err.Error(), http.StatusInternalServerError)
457                                 return false
458                         }
459                         if err = os.Chtimes(dst.Name(), mtime, mtime); err != nil {
460                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
461                                 http.Error(w, err.Error(), http.StatusInternalServerError)
462                         }
463                         if err = os.Rename(dst.Name(), path); err != nil {
464                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
465                                 http.Error(w, err.Error(), http.StatusInternalServerError)
466                                 return false
467                         }
468                         if err = DirSync(dirPath); err != nil {
469                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
470                                 http.Error(w, err.Error(), http.StatusInternalServerError)
471                                 return false
472                         }
473
474                         var digestSHA256 []byte
475                         var digestBLAKE2b256 []byte
476                         if hashAlgo == HashAlgoSHA256 {
477                                 digestSHA256 = hasher.Sum(nil)
478                         } else {
479                                 digestSHA256 = hasherSHA256.Sum(nil)
480                         }
481                         if hashAlgo == HashAlgoBLAKE2b256 {
482                                 digestBLAKE2b256 = hasher.Sum(nil)
483                         } else {
484                                 digestBLAKE2b256 = hasherBLAKE2b256.Sum(nil)
485                         }
486                         if err = WriteFileSync(
487                                 dirPath, path+"."+HashAlgoSHA256,
488                                 digestSHA256, mtime,
489                         ); err != nil {
490                                 log.Println(
491                                         "error", r.RemoteAddr, "pypi",
492                                         path+"."+HashAlgoSHA256, err,
493                                 )
494                                 http.Error(w, err.Error(), http.StatusInternalServerError)
495                                 return false
496                         }
497                         if err = WriteFileSync(
498                                 dirPath, path+"."+HashAlgoBLAKE2b256,
499                                 digestBLAKE2b256, mtime,
500                         ); err != nil {
501                                 log.Println(
502                                         "error", r.RemoteAddr, "pypi",
503                                         path+"."+HashAlgoBLAKE2b256, err,
504                                 )
505                                 http.Error(w, err.Error(), http.StatusInternalServerError)
506                                 return false
507                         }
508                         for _, algo := range KnownHashAlgos[2:] {
509                                 os.Remove(path + "." + algo)
510                         }
511                         digest = nil
512                 }
513                 if mtimeExists {
514                         stat, err := os.Stat(path)
515                         if err == nil && !stat.ModTime().Truncate(time.Second).Equal(mtime) {
516                                 log.Println(r.RemoteAddr, "pypi", filename, "touch")
517                                 if err = os.Chtimes(path, mtime, mtime); err != nil {
518                                         log.Println("error", r.RemoteAddr, "pypi", filename, err)
519                                         http.Error(w, err.Error(), http.StatusInternalServerError)
520                                 }
521                         }
522                 }
523
524                 if filename == filenameGet || gpgUpdate {
525                         resp, err := c.Do(agentedReq(uri + GPGSigExt))
526                         if err != nil {
527                                 goto GPGSigSkip
528                         }
529                         if resp.StatusCode != http.StatusOK {
530                                 resp.Body.Close()
531                                 goto GPGSigSkip
532                         }
533                         sig, err := io.ReadAll(resp.Body)
534                         resp.Body.Close()
535                         if err != nil {
536                                 goto GPGSigSkip
537                         }
538                         if !bytes.HasPrefix(sig, []byte("-----BEGIN PGP SIGNATURE-----")) {
539                                 log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "non PGP")
540                                 goto GPGSigSkip
541                         }
542                         if err = WriteFileSync(dirPath, path+GPGSigExt, sig, mtime); err != nil {
543                                 log.Println("error", r.RemoteAddr, "pypi", filename+GPGSigExt, err)
544                                 http.Error(w, err.Error(), http.StatusInternalServerError)
545                                 return false
546                         }
547                         log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "downloaded")
548                 }
549                 if mtimeExists {
550                         stat, err := os.Stat(path + GPGSigExt)
551                         if err == nil && !stat.ModTime().Truncate(time.Second).Equal(mtime) {
552                                 log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "touch")
553                                 if err = os.Chtimes(path+GPGSigExt, mtime, mtime); err != nil {
554                                         log.Println("error", r.RemoteAddr, "pypi", filename, err)
555                                         http.Error(w, err.Error(), http.StatusInternalServerError)
556                                 }
557                         }
558                 }
559
560         GPGSigSkip:
561                 if digest == nil {
562                         continue
563                 }
564                 path = path + "." + hashAlgo
565                 stat, err := os.Stat(path)
566                 if err == nil && (!mtimeExists ||
567                         (mtimeExists && stat.ModTime().Truncate(time.Second).Equal(mtime))) {
568                         continue
569                 }
570                 if err != nil && !os.IsNotExist(err) {
571                         log.Println("error", r.RemoteAddr, "pypi", path, err)
572                         http.Error(w, err.Error(), http.StatusInternalServerError)
573                         return false
574                 }
575                 log.Println(r.RemoteAddr, "pypi", filename, "touch")
576                 if err = WriteFileSync(dirPath, path, digest, mtime); err != nil {
577                         log.Println("error", r.RemoteAddr, "pypi", path, err)
578                         http.Error(w, err.Error(), http.StatusInternalServerError)
579                         return false
580                 }
581         }
582         return true
583 }