]> Cypherpunks.ru repositories - gocheese.git/blob - refresh.go
Save BLAKE2b-256 checksum during download
[gocheese.git] / refresh.go
1 /*
2 GoCheese -- Python private package repository and caching proxy
3 Copyright (C) 2019-2021 Sergey Matveev <stargrave@stargrave.org>
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, version 3 of the License.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 package main
19
20 import (
21         "bufio"
22         "bytes"
23         "crypto/md5"
24         "crypto/sha256"
25         "crypto/sha512"
26         "encoding/hex"
27         "encoding/json"
28         "hash"
29         "io"
30         "io/ioutil"
31         "log"
32         "net/http"
33         "net/url"
34         "os"
35         "path/filepath"
36         "regexp"
37         "strings"
38         "time"
39
40         "go.cypherpunks.ru/recfile"
41         "golang.org/x/crypto/blake2b"
42 )
43
44 const (
45         HashAlgoSHA256     = "sha256"
46         HashAlgoBLAKE2b256 = "blake2_256"
47         HashAlgoSHA512     = "sha512"
48         HashAlgoMD5        = "md5"
49         GPGSigExt          = ".asc"
50         InternalFlag       = ".internal"
51 )
52
53 var (
54         PkgPyPI           = regexp.MustCompile(`^.*<a href="([^"]+)"[^>]*>(.+)</a>.*$`)
55         PyPIURLParsed     *url.URL
56         PyPIHTTPTransport http.Transport
57         KnownHashAlgos    []string = []string{
58                 HashAlgoSHA256,
59                 HashAlgoBLAKE2b256,
60                 HashAlgoSHA512,
61                 HashAlgoMD5,
62         }
63 )
64
65 func blake2b256New() hash.Hash {
66         h, err := blake2b.New256(nil)
67         if err != nil {
68                 panic(err)
69         }
70         return h
71 }
72
73 func agentedReq(url string) *http.Request {
74         req, err := http.NewRequest("GET", url, nil)
75         if err != nil {
76                 log.Fatalln(err)
77         }
78         req.Header.Set("User-Agent", UserAgent)
79         return req
80 }
81
82 func refreshDir(
83         w http.ResponseWriter,
84         r *http.Request,
85         pkgName, filenameGet string,
86         gpgUpdate bool,
87 ) bool {
88         if _, err := os.Stat(filepath.Join(*Root, pkgName, InternalFlag)); err == nil {
89                 return true
90         }
91         c := http.Client{Transport: &PyPIHTTPTransport}
92         dirPath := filepath.Join(*Root, pkgName)
93         now := time.Now()
94
95         var allReleases map[string][]*PkgReleaseInfo
96         if *JSONURL != "" {
97                 resp, err := c.Do(agentedReq(*JSONURL + pkgName + "/json"))
98                 if err != nil {
99                         log.Println("error", r.RemoteAddr, "refresh-json", pkgName, err)
100                         http.Error(w, err.Error(), http.StatusBadGateway)
101                         return false
102                 }
103                 if resp.StatusCode != http.StatusOK {
104                         resp.Body.Close()
105                         log.Println(
106                                 "error", r.RemoteAddr, "refresh-json", pkgName,
107                                 "HTTP status:", resp.Status,
108                         )
109                         http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
110                         return false
111                 }
112                 body, err := ioutil.ReadAll(resp.Body)
113                 resp.Body.Close()
114                 var buf bytes.Buffer
115                 var description string
116                 wr := recfile.NewWriter(&buf)
117                 var meta PkgMeta
118                 err = json.Unmarshal(body, &meta)
119                 if err == nil {
120                         for recField, jsonField := range map[string]string{
121                                 MetadataFieldName:                   meta.Info.Name,
122                                 MetadataFieldVersion:                meta.Info.Version,
123                                 MetadataFieldSummary:                meta.Info.Summary,
124                                 MetadataFieldDescriptionContentType: meta.Info.DescriptionContentType,
125                                 MetadataFieldKeywords:               meta.Info.Keywords,
126                                 MetadataFieldHomePage:               meta.Info.HomePage,
127                                 MetadataFieldAuthor:                 meta.Info.Author,
128                                 MetadataFieldAuthorEmail:            meta.Info.AuthorEmail,
129                                 MetadataFieldMaintainer:             meta.Info.Maintainer,
130                                 MetadataFieldMaintainerEmail:        meta.Info.MaintainerEmail,
131                                 MetadataFieldLicense:                meta.Info.License,
132                                 MetadataFieldRequiresPython:         meta.Info.RequiresPython,
133                         } {
134                                 if jsonField == "" {
135                                         continue
136                                 }
137                                 if _, err = wr.WriteFields(recfile.Field{
138                                         Name:  metadataFieldToRecField(recField),
139                                         Value: jsonField,
140                                 }); err != nil {
141                                         log.Fatalln(err)
142                                 }
143                         }
144                         for recField, jsonFields := range map[string][]string{
145                                 MetadataFieldClassifier:        meta.Info.Classifier,
146                                 MetadataFieldPlatform:          meta.Info.Platform,
147                                 MetadataFieldSupportedPlatform: meta.Info.SupportedPlatform,
148                                 MetadataFieldRequiresDist:      meta.Info.RequiresDist,
149                                 MetadataFieldRequiresExternal:  meta.Info.RequiresExternal,
150                                 MetadataFieldProjectURL:        meta.Info.ProjectURL,
151                                 MetadataFieldProvidesExtra:     meta.Info.ProvidesExtra,
152                         } {
153                                 for _, v := range jsonFields {
154                                         if _, err = wr.WriteFields(recfile.Field{
155                                                 Name:  metadataFieldToRecField(recField),
156                                                 Value: v,
157                                         }); err != nil {
158                                                 log.Fatalln(err)
159                                         }
160                                 }
161                         }
162                         description = meta.Info.Description
163                         allReleases = meta.Releases
164                 } else {
165                         var metaStripped PkgMetaStripped
166                         err = json.Unmarshal(body, &metaStripped)
167                         if err != nil {
168                                 log.Println(
169                                         "error", r.RemoteAddr, "refresh-json", pkgName,
170                                         "can not parse JSON:", err,
171                                 )
172                                 http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
173                                 return false
174                         }
175                         for recField, jsonField := range map[string]string{
176                                 MetadataFieldName:                   metaStripped.Info.Name,
177                                 MetadataFieldVersion:                metaStripped.Info.Version,
178                                 MetadataFieldSummary:                metaStripped.Info.Summary,
179                                 MetadataFieldDescriptionContentType: metaStripped.Info.DescriptionContentType,
180                                 MetadataFieldKeywords:               metaStripped.Info.Keywords,
181                                 MetadataFieldHomePage:               metaStripped.Info.HomePage,
182                                 MetadataFieldAuthor:                 metaStripped.Info.Author,
183                                 MetadataFieldAuthorEmail:            metaStripped.Info.AuthorEmail,
184                                 MetadataFieldMaintainer:             metaStripped.Info.Maintainer,
185                                 MetadataFieldMaintainerEmail:        metaStripped.Info.MaintainerEmail,
186                                 MetadataFieldLicense:                metaStripped.Info.License,
187                                 MetadataFieldRequiresPython:         metaStripped.Info.RequiresPython,
188                         } {
189                                 if jsonField == "" {
190                                         continue
191                                 }
192                                 if _, err = wr.WriteFields(recfile.Field{
193                                         Name:  metadataFieldToRecField(recField),
194                                         Value: jsonField,
195                                 }); err != nil {
196                                         log.Fatalln(err)
197                                 }
198                         }
199
200                         for recField, jsonFields := range map[string][]string{
201                                 MetadataFieldClassifier:   metaStripped.Info.Classifier,
202                                 MetadataFieldRequiresDist: metaStripped.Info.RequiresDist,
203                         } {
204                                 for _, v := range jsonFields {
205                                         if _, err = wr.WriteFields(recfile.Field{
206                                                 Name:  metadataFieldToRecField(recField),
207                                                 Value: v,
208                                         }); err != nil {
209                                                 log.Fatalln(err)
210                                         }
211                                 }
212                         }
213                         description = metaStripped.Info.Description
214                         allReleases = metaStripped.Releases
215                 }
216                 lines := strings.Split(description, "\n")
217                 if len(lines) > 0 {
218                         if _, err = wr.WriteFieldMultiline(
219                                 MetadataFieldDescription, lines,
220                         ); err != nil {
221                                 log.Fatalln(err)
222                         }
223                 }
224
225                 if !mkdirForPkg(w, r, pkgName) {
226                         return false
227                 }
228                 path := filepath.Join(dirPath, MetadataFile)
229                 existing, err := ioutil.ReadFile(path)
230                 if err != nil || bytes.Compare(existing, buf.Bytes()) != 0 {
231                         if err = WriteFileSync(dirPath, path, buf.Bytes(), now); err != nil {
232                                 log.Println("error", r.RemoteAddr, "refresh-json", path, err)
233                                 http.Error(w, err.Error(), http.StatusInternalServerError)
234                                 return false
235                         }
236                         log.Println(r.RemoteAddr, "pypi", pkgName+"."+MetadataFile, "touch")
237                 }
238         }
239         mtimes := make(map[string]time.Time)
240         for _, releases := range allReleases {
241                 for _, rel := range releases {
242                         if rel.Filename == "" || rel.UploadTimeISO8601 == "" {
243                                 continue
244                         }
245                         t, err := time.Parse(time.RFC3339Nano, rel.UploadTimeISO8601)
246                         if err != nil {
247                                 log.Println(
248                                         "error", r.RemoteAddr, "refresh-json", pkgName,
249                                         "can not parse upload_time:", err,
250                                 )
251                                 http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
252                                 return false
253                         }
254                         mtimes[rel.Filename] = t.Truncate(time.Second)
255                 }
256         }
257
258         resp, err := c.Do(agentedReq(*PyPIURL + pkgName + "/"))
259         if err != nil {
260                 log.Println("error", r.RemoteAddr, "refresh", pkgName, err)
261                 http.Error(w, err.Error(), http.StatusBadGateway)
262                 return false
263         }
264         if resp.StatusCode != http.StatusOK {
265                 resp.Body.Close()
266                 log.Println(
267                         "error", r.RemoteAddr, "refresh", pkgName,
268                         "HTTP status:", resp.Status,
269                 )
270                 http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
271                 return false
272         }
273         body, err := ioutil.ReadAll(resp.Body)
274         resp.Body.Close()
275         if err != nil {
276                 log.Println("error", r.RemoteAddr, "refresh", pkgName, err)
277                 http.Error(w, err.Error(), http.StatusBadGateway)
278                 return false
279         }
280         if !mkdirForPkg(w, r, pkgName) {
281                 return false
282         }
283         for _, lineRaw := range bytes.Split(body, []byte("\n")) {
284                 submatches := PkgPyPI.FindStringSubmatch(string(lineRaw))
285                 if len(submatches) == 0 {
286                         continue
287                 }
288                 uri := submatches[1]
289                 filename := submatches[2]
290                 pkgURL, err := url.Parse(uri)
291                 if err != nil {
292                         log.Println("error", r.RemoteAddr, "refresh", uri, err)
293                         http.Error(w, err.Error(), http.StatusBadGateway)
294                         return false
295                 }
296
297                 if pkgURL.Fragment == "" {
298                         log.Println(r.RemoteAddr, "pypi", filename, "no digest")
299                         http.Error(w, "no digest provided", http.StatusBadGateway)
300                         return false
301                 }
302                 digestInfo := strings.Split(pkgURL.Fragment, "=")
303                 if len(digestInfo) == 1 {
304                         // Ancient non PEP-0503 PyPIs, assume MD5
305                         digestInfo = []string{"md5", digestInfo[0]}
306                 } else if len(digestInfo) != 2 {
307                         log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest")
308                         http.Error(w, "invalid digest provided", http.StatusBadGateway)
309                         return false
310                 }
311                 digest, err := hex.DecodeString(digestInfo[1])
312                 if err != nil {
313                         log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest")
314                         http.Error(w, err.Error(), http.StatusBadGateway)
315                         return false
316                 }
317                 hashAlgo := digestInfo[0]
318                 var hasherNew func() hash.Hash
319                 var hashSize int
320                 switch hashAlgo {
321                 case HashAlgoMD5:
322                         hasherNew = md5.New
323                         hashSize = md5.Size
324                 case HashAlgoSHA256:
325                         hasherNew = sha256.New
326                         hashSize = sha256.Size
327                 case HashAlgoSHA512:
328                         hasherNew = sha512.New
329                         hashSize = sha512.Size
330                 case HashAlgoBLAKE2b256:
331                         hasherNew = blake2b256New
332                         hashSize = blake2b.Size256
333                 default:
334                         log.Println("error", r.RemoteAddr, "pypi", filename, "unknown digest", hashAlgo)
335                         http.Error(w, "unknown digest algorithm", http.StatusBadGateway)
336                         return false
337                 }
338                 if len(digest) != hashSize {
339                         log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest length")
340                         http.Error(w, "invalid digest length", http.StatusBadGateway)
341                         return false
342                 }
343
344                 pkgURL.Fragment = ""
345                 if pkgURL.Host == "" {
346                         uri = PyPIURLParsed.ResolveReference(pkgURL).String()
347                 } else {
348                         uri = pkgURL.String()
349                 }
350                 mtime, mtimeExists := mtimes[filename]
351                 if !mtimeExists {
352                         mtime = now
353                 }
354
355                 path := filepath.Join(dirPath, filename)
356                 if filename == filenameGet {
357                         if Killed {
358                                 // Skip heavy remote call, when shutting down
359                                 http.Error(w, "shutting down", http.StatusInternalServerError)
360                                 return false
361                         }
362                         log.Println(r.RemoteAddr, "pypi", filename, "download")
363                         resp, err = c.Do(agentedReq(uri))
364                         if err != nil {
365                                 log.Println("error", r.RemoteAddr, "pypi", filename, "download", err)
366                                 http.Error(w, err.Error(), http.StatusBadGateway)
367                                 return false
368                         }
369                         defer resp.Body.Close()
370                         if resp.StatusCode != http.StatusOK {
371                                 log.Println(
372                                         "error", r.RemoteAddr,
373                                         "pypi", filename, "download",
374                                         "HTTP status:", resp.Status,
375                                 )
376                                 http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
377                                 return false
378                         }
379                         hasher := hasherNew()
380                         hasherSHA256 := sha256.New()
381                         hasherBLAKE2b256 := blake2b256New()
382                         dst, err := TempFile(dirPath)
383                         if err != nil {
384                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
385                                 http.Error(w, err.Error(), http.StatusInternalServerError)
386                                 return false
387                         }
388                         dstBuf := bufio.NewWriter(dst)
389                         wrs := []io.Writer{hasher, dstBuf}
390                         if hashAlgo != HashAlgoSHA256 {
391                                 wrs = append(wrs, hasherSHA256)
392                         }
393                         if hashAlgo != HashAlgoBLAKE2b256 {
394                                 wrs = append(wrs, hasherBLAKE2b256)
395                         }
396                         wr := io.MultiWriter(wrs...)
397                         if _, err = io.Copy(wr, resp.Body); err != nil {
398                                 os.Remove(dst.Name())
399                                 dst.Close()
400                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
401                                 http.Error(w, err.Error(), http.StatusInternalServerError)
402                                 return false
403                         }
404                         if err = dstBuf.Flush(); err != nil {
405                                 os.Remove(dst.Name())
406                                 dst.Close()
407                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
408                                 http.Error(w, err.Error(), http.StatusInternalServerError)
409                                 return false
410                         }
411                         if bytes.Compare(hasher.Sum(nil), digest) != 0 {
412                                 log.Println(r.RemoteAddr, "pypi", filename, "digest mismatch")
413                                 os.Remove(dst.Name())
414                                 dst.Close()
415                                 http.Error(w, "digest mismatch", http.StatusBadGateway)
416                                 return false
417                         }
418                         if !NoSync {
419                                 if err = dst.Sync(); err != nil {
420                                         os.Remove(dst.Name())
421                                         dst.Close()
422                                         log.Println("error", r.RemoteAddr, "pypi", filename, err)
423                                         http.Error(w, err.Error(), http.StatusInternalServerError)
424                                         return false
425                                 }
426                         }
427                         if err = dst.Close(); err != nil {
428                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
429                                 http.Error(w, err.Error(), http.StatusInternalServerError)
430                                 return false
431                         }
432                         if err = os.Chtimes(dst.Name(), mtime, mtime); err != nil {
433                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
434                                 http.Error(w, err.Error(), http.StatusInternalServerError)
435                         }
436                         if err = os.Rename(dst.Name(), path); err != nil {
437                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
438                                 http.Error(w, err.Error(), http.StatusInternalServerError)
439                                 return false
440                         }
441                         if err = DirSync(dirPath); err != nil {
442                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
443                                 http.Error(w, err.Error(), http.StatusInternalServerError)
444                                 return false
445                         }
446
447                         var digestSHA256 []byte
448                         var digestBLAKE2b256 []byte
449                         if hashAlgo == HashAlgoSHA256 {
450                                 digestSHA256 = hasher.Sum(nil)
451                         } else {
452                                 digestSHA256 = hasherSHA256.Sum(nil)
453                         }
454                         if hashAlgo == HashAlgoBLAKE2b256 {
455                                 digestBLAKE2b256 = hasher.Sum(nil)
456                         } else {
457                                 digestBLAKE2b256 = hasherBLAKE2b256.Sum(nil)
458                         }
459                         if err = WriteFileSync(
460                                 dirPath, path+"."+HashAlgoSHA256,
461                                 digestSHA256, mtime,
462                         ); err != nil {
463                                 log.Println(
464                                         "error", r.RemoteAddr, "pypi",
465                                         path+"."+HashAlgoSHA256, err,
466                                 )
467                                 http.Error(w, err.Error(), http.StatusInternalServerError)
468                                 return false
469                         }
470                         if err = WriteFileSync(
471                                 dirPath, path+"."+HashAlgoBLAKE2b256,
472                                 digestBLAKE2b256, mtime,
473                         ); err != nil {
474                                 log.Println(
475                                         "error", r.RemoteAddr, "pypi",
476                                         path+"."+HashAlgoBLAKE2b256, err,
477                                 )
478                                 http.Error(w, err.Error(), http.StatusInternalServerError)
479                                 return false
480                         }
481                         for _, algo := range KnownHashAlgos[2:] {
482                                 os.Remove(path + "." + algo)
483                         }
484                         digest = nil
485                 }
486                 if mtimeExists {
487                         stat, err := os.Stat(path)
488                         if err == nil && !stat.ModTime().Truncate(time.Second).Equal(mtime) {
489                                 log.Println(r.RemoteAddr, "pypi", filename, "touch")
490                                 if err = os.Chtimes(path, mtime, mtime); err != nil {
491                                         log.Println("error", r.RemoteAddr, "pypi", filename, err)
492                                         http.Error(w, err.Error(), http.StatusInternalServerError)
493                                 }
494                         }
495                 }
496
497                 if filename == filenameGet || gpgUpdate {
498                         if _, err = os.Stat(path); err != nil {
499                                 goto GPGSigSkip
500                         }
501                         resp, err := c.Do(agentedReq(uri + GPGSigExt))
502                         if err != nil {
503                                 goto GPGSigSkip
504                         }
505                         if resp.StatusCode != http.StatusOK {
506                                 resp.Body.Close()
507                                 goto GPGSigSkip
508                         }
509                         sig, err := ioutil.ReadAll(resp.Body)
510                         resp.Body.Close()
511                         if err != nil {
512                                 goto GPGSigSkip
513                         }
514                         if !bytes.HasPrefix(sig, []byte("-----BEGIN PGP SIGNATURE-----")) {
515                                 log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "non PGP")
516                                 goto GPGSigSkip
517                         }
518                         if err = WriteFileSync(dirPath, path+GPGSigExt, sig, mtime); err != nil {
519                                 log.Println("error", r.RemoteAddr, "pypi", filename+GPGSigExt, err)
520                                 http.Error(w, err.Error(), http.StatusInternalServerError)
521                                 return false
522                         }
523                         log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "downloaded")
524                 }
525                 if mtimeExists {
526                         stat, err := os.Stat(path + GPGSigExt)
527                         if err == nil && !stat.ModTime().Truncate(time.Second).Equal(mtime) {
528                                 log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "touch")
529                                 if err = os.Chtimes(path+GPGSigExt, mtime, mtime); err != nil {
530                                         log.Println("error", r.RemoteAddr, "pypi", filename, err)
531                                         http.Error(w, err.Error(), http.StatusInternalServerError)
532                                 }
533                         }
534                 }
535
536         GPGSigSkip:
537                 if digest == nil {
538                         continue
539                 }
540                 path = path + "." + hashAlgo
541                 stat, err := os.Stat(path)
542                 if err == nil &&
543                         (mtimeExists && stat.ModTime().Truncate(time.Second).Equal(mtime)) {
544                         continue
545                 }
546                 if err != nil && !os.IsNotExist(err) {
547                         log.Println("error", r.RemoteAddr, "pypi", path, err)
548                         http.Error(w, err.Error(), http.StatusInternalServerError)
549                         return false
550                 }
551                 log.Println(r.RemoteAddr, "pypi", filename, "touch")
552                 if err = WriteFileSync(dirPath, path, digest, mtime); err != nil {
553                         log.Println("error", r.RemoteAddr, "pypi", path, err)
554                         http.Error(w, err.Error(), http.StatusInternalServerError)
555                         return false
556                 }
557         }
558         return true
559 }