]> Cypherpunks.ru repositories - gocheese.git/blob - refresh.go
Explicitly check stored digest
[gocheese.git] / refresh.go
1 /*
2 GoCheese -- Python private package repository and caching proxy
3 Copyright (C) 2019-2021 Sergey Matveev <stargrave@stargrave.org>
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, version 3 of the License.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 package main
19
20 import (
21         "bufio"
22         "bytes"
23         "crypto/md5"
24         "crypto/sha256"
25         "crypto/sha512"
26         "encoding/hex"
27         "encoding/json"
28         "errors"
29         "hash"
30         "io"
31         "io/ioutil"
32         "log"
33         "net/http"
34         "net/url"
35         "os"
36         "path/filepath"
37         "regexp"
38         "strings"
39         "time"
40
41         "go.cypherpunks.ru/recfile"
42         "golang.org/x/crypto/blake2b"
43 )
44
45 const (
46         HashAlgoSHA256     = "sha256"
47         HashAlgoBLAKE2b256 = "blake2_256"
48         HashAlgoSHA512     = "sha512"
49         HashAlgoMD5        = "md5"
50         GPGSigExt          = ".asc"
51         InternalFlag       = ".internal"
52 )
53
54 var (
55         PkgPyPI           = regexp.MustCompile(`^.*<a href="([^"]+)"[^>]*>(.+)</a>.*$`)
56         PyPIURLParsed     *url.URL
57         PyPIHTTPTransport http.Transport
58         KnownHashAlgos    []string = []string{
59                 HashAlgoSHA256,
60                 HashAlgoBLAKE2b256,
61                 HashAlgoSHA512,
62                 HashAlgoMD5,
63         }
64 )
65
66 func blake2b256New() hash.Hash {
67         h, err := blake2b.New256(nil)
68         if err != nil {
69                 panic(err)
70         }
71         return h
72 }
73
74 func agentedReq(url string) *http.Request {
75         req, err := http.NewRequest("GET", url, nil)
76         if err != nil {
77                 log.Fatalln(err)
78         }
79         req.Header.Set("User-Agent", UserAgent)
80         return req
81 }
82
83 type RecFieldToValuesMap struct {
84         recField   string
85         jsonFields []string
86 }
87
88 func refreshDir(
89         w http.ResponseWriter,
90         r *http.Request,
91         pkgName, filenameGet string,
92         gpgUpdate bool,
93 ) bool {
94         if _, err := os.Stat(filepath.Join(Root, pkgName, InternalFlag)); err == nil {
95                 return true
96         }
97         c := http.Client{Transport: &PyPIHTTPTransport}
98         dirPath := filepath.Join(Root, pkgName)
99         now := time.Now()
100
101         var allReleases map[string][]*PkgReleaseInfo
102         if *JSONURL != "" {
103                 resp, err := c.Do(agentedReq(*JSONURL + pkgName + "/json"))
104                 if err != nil {
105                         log.Println("error", r.RemoteAddr, "refresh-json", pkgName, err)
106                         http.Error(w, err.Error(), http.StatusBadGateway)
107                         return false
108                 }
109                 if resp.StatusCode != http.StatusOK {
110                         resp.Body.Close()
111                         log.Println(
112                                 "error", r.RemoteAddr, "refresh-json", pkgName,
113                                 "HTTP status:", resp.Status,
114                         )
115                         http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
116                         return false
117                 }
118                 body, err := ioutil.ReadAll(resp.Body)
119                 resp.Body.Close()
120                 var buf bytes.Buffer
121                 var description string
122                 wr := recfile.NewWriter(&buf)
123                 var meta PkgMeta
124                 err = json.Unmarshal(body, &meta)
125                 if err == nil {
126                         for _, m := range [][2]string{
127                                 {MetadataFieldName, meta.Info.Name},
128                                 {MetadataFieldVersion, meta.Info.Version},
129                                 {MetadataFieldSummary, meta.Info.Summary},
130                                 {MetadataFieldDescriptionContentType, meta.Info.DescriptionContentType},
131                                 {MetadataFieldKeywords, meta.Info.Keywords},
132                                 {MetadataFieldHomePage, meta.Info.HomePage},
133                                 {MetadataFieldAuthor, meta.Info.Author},
134                                 {MetadataFieldAuthorEmail, meta.Info.AuthorEmail},
135                                 {MetadataFieldMaintainer, meta.Info.Maintainer},
136                                 {MetadataFieldMaintainerEmail, meta.Info.MaintainerEmail},
137                                 {MetadataFieldLicense, meta.Info.License},
138                                 {MetadataFieldRequiresPython, meta.Info.RequiresPython},
139                         } {
140                                 recField, jsonField := m[0], m[1]
141                                 if jsonField == "" {
142                                         continue
143                                 }
144                                 if _, err = wr.WriteFields(recfile.Field{
145                                         Name:  metadataFieldToRecField(recField),
146                                         Value: jsonField,
147                                 }); err != nil {
148                                         log.Fatalln(err)
149                                 }
150                         }
151                         for _, m := range []RecFieldToValuesMap{
152                                 {MetadataFieldClassifier, meta.Info.Classifier},
153                                 {MetadataFieldPlatform, meta.Info.Platform},
154                                 {MetadataFieldSupportedPlatform, meta.Info.SupportedPlatform},
155                                 {MetadataFieldRequiresDist, meta.Info.RequiresDist},
156                                 {MetadataFieldRequiresExternal, meta.Info.RequiresExternal},
157                                 {MetadataFieldProjectURL, meta.Info.ProjectURL},
158                                 {MetadataFieldProvidesExtra, meta.Info.ProvidesExtra},
159                         } {
160                                 for _, v := range m.jsonFields {
161                                         if _, err = wr.WriteFields(recfile.Field{
162                                                 Name:  metadataFieldToRecField(m.recField),
163                                                 Value: v,
164                                         }); err != nil {
165                                                 log.Fatalln(err)
166                                         }
167                                 }
168                         }
169                         description = meta.Info.Description
170                         allReleases = meta.Releases
171                 } else {
172                         var metaStripped PkgMetaStripped
173                         err = json.Unmarshal(body, &metaStripped)
174                         if err != nil {
175                                 log.Println(
176                                         "error", r.RemoteAddr, "refresh-json", pkgName,
177                                         "can not parse JSON:", err,
178                                 )
179                                 http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
180                                 return false
181                         }
182                         for _, m := range [][2]string{
183                                 {MetadataFieldName, metaStripped.Info.Name},
184                                 {MetadataFieldVersion, metaStripped.Info.Version},
185                                 {MetadataFieldSummary, metaStripped.Info.Summary},
186                                 {MetadataFieldDescriptionContentType, metaStripped.Info.DescriptionContentType},
187                                 {MetadataFieldKeywords, metaStripped.Info.Keywords},
188                                 {MetadataFieldHomePage, metaStripped.Info.HomePage},
189                                 {MetadataFieldAuthor, metaStripped.Info.Author},
190                                 {MetadataFieldAuthorEmail, metaStripped.Info.AuthorEmail},
191                                 {MetadataFieldMaintainer, metaStripped.Info.Maintainer},
192                                 {MetadataFieldMaintainerEmail, metaStripped.Info.MaintainerEmail},
193                                 {MetadataFieldLicense, metaStripped.Info.License},
194                                 {MetadataFieldRequiresPython, metaStripped.Info.RequiresPython},
195                         } {
196                                 recField, jsonField := m[0], m[1]
197                                 if jsonField == "" {
198                                         continue
199                                 }
200                                 if _, err = wr.WriteFields(recfile.Field{
201                                         Name:  metadataFieldToRecField(recField),
202                                         Value: jsonField,
203                                 }); err != nil {
204                                         log.Fatalln(err)
205                                 }
206                         }
207
208                         for _, m := range []RecFieldToValuesMap{
209                                 {MetadataFieldClassifier, metaStripped.Info.Classifier},
210                                 {MetadataFieldRequiresDist, metaStripped.Info.RequiresDist},
211                         } {
212                                 for _, v := range m.jsonFields {
213                                         if _, err = wr.WriteFields(recfile.Field{
214                                                 Name:  metadataFieldToRecField(m.recField),
215                                                 Value: v,
216                                         }); err != nil {
217                                                 log.Fatalln(err)
218                                         }
219                                 }
220                         }
221                         description = metaStripped.Info.Description
222                         allReleases = metaStripped.Releases
223                 }
224                 lines := strings.Split(description, "\n")
225                 if len(lines) > 0 {
226                         if _, err = wr.WriteFieldMultiline(
227                                 MetadataFieldDescription, lines,
228                         ); err != nil {
229                                 log.Fatalln(err)
230                         }
231                 }
232
233                 if !mkdirForPkg(w, r, pkgName) {
234                         return false
235                 }
236                 path := filepath.Join(dirPath, MetadataFile)
237                 existing, err := ioutil.ReadFile(path)
238                 if err != nil || bytes.Compare(existing, buf.Bytes()) != 0 {
239                         if err = WriteFileSync(dirPath, path, buf.Bytes(), now); err != nil {
240                                 log.Println("error", r.RemoteAddr, "refresh-json", path, err)
241                                 http.Error(w, err.Error(), http.StatusInternalServerError)
242                                 return false
243                         }
244                         log.Println(r.RemoteAddr, "pypi", pkgName+"/"+MetadataFile, "touch")
245                 }
246         }
247         mtimes := make(map[string]time.Time)
248         for _, releases := range allReleases {
249                 for _, rel := range releases {
250                         if rel.Filename == "" || rel.UploadTimeISO8601 == "" {
251                                 continue
252                         }
253                         t, err := time.Parse(time.RFC3339Nano, rel.UploadTimeISO8601)
254                         if err != nil {
255                                 log.Println(
256                                         "error", r.RemoteAddr, "refresh-json", pkgName,
257                                         "can not parse upload_time:", err,
258                                 )
259                                 http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
260                                 return false
261                         }
262                         mtimes[rel.Filename] = t.Truncate(time.Second)
263                 }
264         }
265
266         resp, err := c.Do(agentedReq(*PyPIURL + pkgName + "/"))
267         if err != nil {
268                 log.Println("error", r.RemoteAddr, "refresh", pkgName, err)
269                 http.Error(w, err.Error(), http.StatusBadGateway)
270                 return false
271         }
272         if resp.StatusCode != http.StatusOK {
273                 resp.Body.Close()
274                 log.Println(
275                         "error", r.RemoteAddr, "refresh", pkgName,
276                         "HTTP status:", resp.Status,
277                 )
278                 http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
279                 return false
280         }
281         body, err := ioutil.ReadAll(resp.Body)
282         resp.Body.Close()
283         if err != nil {
284                 log.Println("error", r.RemoteAddr, "refresh", pkgName, err)
285                 http.Error(w, err.Error(), http.StatusBadGateway)
286                 return false
287         }
288         if !mkdirForPkg(w, r, pkgName) {
289                 return false
290         }
291         for _, lineRaw := range bytes.Split(body, []byte("\n")) {
292                 submatches := PkgPyPI.FindStringSubmatch(string(lineRaw))
293                 if len(submatches) == 0 {
294                         continue
295                 }
296                 uri := submatches[1]
297                 filename := submatches[2]
298                 pkgURL, err := url.Parse(uri)
299                 if err != nil {
300                         log.Println("error", r.RemoteAddr, "refresh", uri, err)
301                         http.Error(w, err.Error(), http.StatusBadGateway)
302                         return false
303                 }
304
305                 if pkgURL.Fragment == "" {
306                         log.Println(r.RemoteAddr, "pypi", filename, "no digest")
307                         http.Error(w, "no digest provided", http.StatusBadGateway)
308                         return false
309                 }
310                 digestInfo := strings.Split(pkgURL.Fragment, "=")
311                 if len(digestInfo) == 1 {
312                         // Ancient non PEP-0503 PyPIs, assume MD5
313                         digestInfo = []string{"md5", digestInfo[0]}
314                 } else if len(digestInfo) != 2 {
315                         log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest")
316                         http.Error(w, "invalid digest provided", http.StatusBadGateway)
317                         return false
318                 }
319                 digest, err := hex.DecodeString(digestInfo[1])
320                 if err != nil {
321                         log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest")
322                         http.Error(w, err.Error(), http.StatusBadGateway)
323                         return false
324                 }
325                 hashAlgo := digestInfo[0]
326                 var hasherNew func() hash.Hash
327                 var hashSize int
328                 switch hashAlgo {
329                 case HashAlgoMD5:
330                         hasherNew = md5.New
331                         hashSize = md5.Size
332                 case HashAlgoSHA256:
333                         hasherNew = sha256.New
334                         hashSize = sha256.Size
335                 case HashAlgoSHA512:
336                         hasherNew = sha512.New
337                         hashSize = sha512.Size
338                 case HashAlgoBLAKE2b256:
339                         hasherNew = blake2b256New
340                         hashSize = blake2b.Size256
341                 default:
342                         log.Println(
343                                 "error", r.RemoteAddr, "pypi",
344                                 filename, "unknown digest", hashAlgo,
345                         )
346                         http.Error(w, "unknown digest algorithm", http.StatusBadGateway)
347                         return false
348                 }
349                 if len(digest) != hashSize {
350                         log.Println(
351                                 "error", r.RemoteAddr, "pypi",
352                                 filename, "invalid digest length")
353                         http.Error(w, "invalid digest length", http.StatusBadGateway)
354                         return false
355                 }
356
357                 pkgURL.Fragment = ""
358                 if pkgURL.Host == "" {
359                         uri = PyPIURLParsed.ResolveReference(pkgURL).String()
360                 } else {
361                         uri = pkgURL.String()
362                 }
363                 mtime, mtimeExists := mtimes[filename]
364                 if !mtimeExists {
365                         mtime = now
366                 }
367
368                 path := filepath.Join(dirPath, filename)
369                 if filename == filenameGet {
370                         if Killed {
371                                 // Skip heavy remote call, when shutting down
372                                 http.Error(w, "shutting down", http.StatusInternalServerError)
373                                 return false
374                         }
375                         log.Println(r.RemoteAddr, "pypi", filename, "download")
376                         resp, err = c.Do(agentedReq(uri))
377                         if err != nil {
378                                 log.Println("error", r.RemoteAddr, "pypi", filename, "download", err)
379                                 http.Error(w, err.Error(), http.StatusBadGateway)
380                                 return false
381                         }
382                         defer resp.Body.Close()
383                         if resp.StatusCode != http.StatusOK {
384                                 log.Println(
385                                         "error", r.RemoteAddr,
386                                         "pypi", filename, "download",
387                                         "HTTP status:", resp.Status,
388                                 )
389                                 http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
390                                 return false
391                         }
392                         hasher := hasherNew()
393                         hasherSHA256 := sha256.New()
394                         hasherBLAKE2b256 := blake2b256New()
395                         dst, err := TempFile(dirPath)
396                         if err != nil {
397                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
398                                 http.Error(w, err.Error(), http.StatusInternalServerError)
399                                 return false
400                         }
401                         dstBuf := bufio.NewWriter(dst)
402                         wrs := []io.Writer{hasher, dstBuf}
403                         if hashAlgo != HashAlgoSHA256 {
404                                 wrs = append(wrs, hasherSHA256)
405                         }
406                         if hashAlgo != HashAlgoBLAKE2b256 {
407                                 wrs = append(wrs, hasherBLAKE2b256)
408                         }
409                         wr := io.MultiWriter(wrs...)
410                         if _, err = io.Copy(wr, resp.Body); err != nil {
411                                 os.Remove(dst.Name())
412                                 dst.Close()
413                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
414                                 http.Error(w, err.Error(), http.StatusInternalServerError)
415                                 return false
416                         }
417                         if err = dstBuf.Flush(); err != nil {
418                                 os.Remove(dst.Name())
419                                 dst.Close()
420                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
421                                 http.Error(w, err.Error(), http.StatusInternalServerError)
422                                 return false
423                         }
424                         if bytes.Compare(hasher.Sum(nil), digest) != 0 {
425                                 log.Println(r.RemoteAddr, "pypi", filename, "digest mismatch")
426                                 os.Remove(dst.Name())
427                                 dst.Close()
428                                 http.Error(w, "digest mismatch", http.StatusBadGateway)
429                                 return false
430                         }
431                         if digestStored, err := ioutil.ReadFile(path + "." + hashAlgo); err == nil &&
432                                 bytes.Compare(digest, digestStored) != 0 {
433                                 err = errors.New("stored digest mismatch")
434                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
435                                 os.Remove(dst.Name())
436                                 dst.Close()
437                                 http.Error(w, err.Error(), http.StatusInternalServerError)
438                                 return false
439                         }
440                         if !NoSync {
441                                 if err = dst.Sync(); err != nil {
442                                         os.Remove(dst.Name())
443                                         dst.Close()
444                                         log.Println("error", r.RemoteAddr, "pypi", filename, err)
445                                         http.Error(w, err.Error(), http.StatusInternalServerError)
446                                         return false
447                                 }
448                         }
449                         if err = dst.Close(); err != nil {
450                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
451                                 http.Error(w, err.Error(), http.StatusInternalServerError)
452                                 return false
453                         }
454                         if err = os.Chtimes(dst.Name(), mtime, mtime); err != nil {
455                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
456                                 http.Error(w, err.Error(), http.StatusInternalServerError)
457                         }
458                         if err = os.Rename(dst.Name(), path); err != nil {
459                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
460                                 http.Error(w, err.Error(), http.StatusInternalServerError)
461                                 return false
462                         }
463                         if err = DirSync(dirPath); err != nil {
464                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
465                                 http.Error(w, err.Error(), http.StatusInternalServerError)
466                                 return false
467                         }
468
469                         var digestSHA256 []byte
470                         var digestBLAKE2b256 []byte
471                         if hashAlgo == HashAlgoSHA256 {
472                                 digestSHA256 = hasher.Sum(nil)
473                         } else {
474                                 digestSHA256 = hasherSHA256.Sum(nil)
475                         }
476                         if hashAlgo == HashAlgoBLAKE2b256 {
477                                 digestBLAKE2b256 = hasher.Sum(nil)
478                         } else {
479                                 digestBLAKE2b256 = hasherBLAKE2b256.Sum(nil)
480                         }
481                         if err = WriteFileSync(
482                                 dirPath, path+"."+HashAlgoSHA256,
483                                 digestSHA256, mtime,
484                         ); err != nil {
485                                 log.Println(
486                                         "error", r.RemoteAddr, "pypi",
487                                         path+"."+HashAlgoSHA256, err,
488                                 )
489                                 http.Error(w, err.Error(), http.StatusInternalServerError)
490                                 return false
491                         }
492                         if err = WriteFileSync(
493                                 dirPath, path+"."+HashAlgoBLAKE2b256,
494                                 digestBLAKE2b256, mtime,
495                         ); err != nil {
496                                 log.Println(
497                                         "error", r.RemoteAddr, "pypi",
498                                         path+"."+HashAlgoBLAKE2b256, err,
499                                 )
500                                 http.Error(w, err.Error(), http.StatusInternalServerError)
501                                 return false
502                         }
503                         for _, algo := range KnownHashAlgos[2:] {
504                                 os.Remove(path + "." + algo)
505                         }
506                         digest = nil
507                 }
508                 if mtimeExists {
509                         stat, err := os.Stat(path)
510                         if err == nil && !stat.ModTime().Truncate(time.Second).Equal(mtime) {
511                                 log.Println(r.RemoteAddr, "pypi", filename, "touch")
512                                 if err = os.Chtimes(path, mtime, mtime); err != nil {
513                                         log.Println("error", r.RemoteAddr, "pypi", filename, err)
514                                         http.Error(w, err.Error(), http.StatusInternalServerError)
515                                 }
516                         }
517                 }
518
519                 if filename == filenameGet || gpgUpdate {
520                         if _, err = os.Stat(path); err != nil {
521                                 goto GPGSigSkip
522                         }
523                         resp, err := c.Do(agentedReq(uri + GPGSigExt))
524                         if err != nil {
525                                 goto GPGSigSkip
526                         }
527                         if resp.StatusCode != http.StatusOK {
528                                 resp.Body.Close()
529                                 goto GPGSigSkip
530                         }
531                         sig, err := ioutil.ReadAll(resp.Body)
532                         resp.Body.Close()
533                         if err != nil {
534                                 goto GPGSigSkip
535                         }
536                         if !bytes.HasPrefix(sig, []byte("-----BEGIN PGP SIGNATURE-----")) {
537                                 log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "non PGP")
538                                 goto GPGSigSkip
539                         }
540                         if err = WriteFileSync(dirPath, path+GPGSigExt, sig, mtime); err != nil {
541                                 log.Println("error", r.RemoteAddr, "pypi", filename+GPGSigExt, err)
542                                 http.Error(w, err.Error(), http.StatusInternalServerError)
543                                 return false
544                         }
545                         log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "downloaded")
546                 }
547                 if mtimeExists {
548                         stat, err := os.Stat(path + GPGSigExt)
549                         if err == nil && !stat.ModTime().Truncate(time.Second).Equal(mtime) {
550                                 log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "touch")
551                                 if err = os.Chtimes(path+GPGSigExt, mtime, mtime); err != nil {
552                                         log.Println("error", r.RemoteAddr, "pypi", filename, err)
553                                         http.Error(w, err.Error(), http.StatusInternalServerError)
554                                 }
555                         }
556                 }
557
558         GPGSigSkip:
559                 if digest == nil {
560                         continue
561                 }
562                 path = path + "." + hashAlgo
563                 stat, err := os.Stat(path)
564                 if err == nil && (!mtimeExists ||
565                         (mtimeExists && stat.ModTime().Truncate(time.Second).Equal(mtime))) {
566                         continue
567                 }
568                 if err != nil && !os.IsNotExist(err) {
569                         log.Println("error", r.RemoteAddr, "pypi", path, err)
570                         http.Error(w, err.Error(), http.StatusInternalServerError)
571                         return false
572                 }
573                 log.Println(r.RemoteAddr, "pypi", filename, "touch")
574                 if err = WriteFileSync(dirPath, path, digest, mtime); err != nil {
575                         log.Println("error", r.RemoteAddr, "pypi", path, err)
576                         http.Error(w, err.Error(), http.StatusInternalServerError)
577                         return false
578                 }
579         }
580         return true
581 }