]> Cypherpunks.ru repositories - gocheese.git/blob - refresh.go
Missing error check
[gocheese.git] / refresh.go
1 /*
2 GoCheese -- Python private package repository and caching proxy
3 Copyright (C) 2019-2021 Sergey Matveev <stargrave@stargrave.org>
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, version 3 of the License.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 package main
19
20 import (
21         "bufio"
22         "bytes"
23         "crypto/md5"
24         "crypto/sha256"
25         "crypto/sha512"
26         "encoding/hex"
27         "encoding/json"
28         "errors"
29         "hash"
30         "io"
31         "io/ioutil"
32         "log"
33         "net/http"
34         "net/url"
35         "os"
36         "path/filepath"
37         "regexp"
38         "strings"
39         "time"
40
41         "go.cypherpunks.ru/recfile"
42         "golang.org/x/crypto/blake2b"
43 )
44
45 const (
46         HashAlgoSHA256     = "sha256"
47         HashAlgoBLAKE2b256 = "blake2_256"
48         HashAlgoSHA512     = "sha512"
49         HashAlgoMD5        = "md5"
50         GPGSigExt          = ".asc"
51         InternalFlag       = ".internal"
52 )
53
54 var (
55         PkgPyPI           = regexp.MustCompile(`^.*<a href="([^"]+)"[^>]*>(.+)</a>.*$`)
56         PyPIURLParsed     *url.URL
57         PyPIHTTPTransport http.Transport
58         KnownHashAlgos    []string = []string{
59                 HashAlgoSHA256,
60                 HashAlgoBLAKE2b256,
61                 HashAlgoSHA512,
62                 HashAlgoMD5,
63         }
64 )
65
66 func blake2b256New() hash.Hash {
67         h, err := blake2b.New256(nil)
68         if err != nil {
69                 panic(err)
70         }
71         return h
72 }
73
74 func agentedReq(url string) *http.Request {
75         req, err := http.NewRequest("GET", url, nil)
76         if err != nil {
77                 log.Fatalln(err)
78         }
79         req.Header.Set("User-Agent", UserAgent)
80         return req
81 }
82
83 type RecFieldToValuesMap struct {
84         recField   string
85         jsonFields []string
86 }
87
88 func refreshDir(
89         w http.ResponseWriter,
90         r *http.Request,
91         pkgName, filenameGet string,
92         gpgUpdate bool,
93 ) bool {
94         if _, err := os.Stat(filepath.Join(Root, pkgName, InternalFlag)); err == nil {
95                 return true
96         }
97         c := http.Client{Transport: &PyPIHTTPTransport}
98         dirPath := filepath.Join(Root, pkgName)
99         now := time.Now()
100
101         var allReleases map[string][]*PkgReleaseInfo
102         if *JSONURL != "" {
103                 resp, err := c.Do(agentedReq(*JSONURL + pkgName + "/json"))
104                 if err != nil {
105                         log.Println("error", r.RemoteAddr, "refresh-json", pkgName, err)
106                         http.Error(w, err.Error(), http.StatusBadGateway)
107                         return false
108                 }
109                 if resp.StatusCode != http.StatusOK {
110                         resp.Body.Close()
111                         log.Println(
112                                 "error", r.RemoteAddr, "refresh-json", pkgName,
113                                 "HTTP status:", resp.Status,
114                         )
115                         http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
116                         return false
117                 }
118                 body, err := ioutil.ReadAll(resp.Body)
119                 if err != nil {
120                         resp.Body.Close()
121                         log.Println("error", r.RemoteAddr, "refresh-json", pkgName, err)
122                         http.Error(w, "can not read body", http.StatusBadGateway)
123                         return false
124                 }
125                 resp.Body.Close()
126                 var buf bytes.Buffer
127                 var description string
128                 wr := recfile.NewWriter(&buf)
129                 var meta PkgMeta
130                 err = json.Unmarshal(body, &meta)
131                 if err == nil {
132                         for _, m := range [][2]string{
133                                 {MDFieldName, meta.Info.Name},
134                                 {MDFieldVersion, meta.Info.Version},
135                                 {MDFieldSummary, meta.Info.Summary},
136                                 {MDFieldDescriptionContentType, meta.Info.DescriptionContentType},
137                                 {MDFieldKeywords, meta.Info.Keywords},
138                                 {MDFieldHomePage, meta.Info.HomePage},
139                                 {MDFieldAuthor, meta.Info.Author},
140                                 {MDFieldAuthorEmail, meta.Info.AuthorEmail},
141                                 {MDFieldMaintainer, meta.Info.Maintainer},
142                                 {MDFieldMaintainerEmail, meta.Info.MaintainerEmail},
143                                 {MDFieldLicense, meta.Info.License},
144                                 {MDFieldRequiresPython, meta.Info.RequiresPython},
145                         } {
146                                 recField, jsonField := m[0], m[1]
147                                 if jsonField == "" {
148                                         continue
149                                 }
150                                 if _, err = wr.WriteFields(recfile.Field{
151                                         Name:  MDFieldToRecField[recField],
152                                         Value: jsonField,
153                                 }); err != nil {
154                                         log.Fatalln(err)
155                                 }
156                         }
157                         for _, m := range []RecFieldToValuesMap{
158                                 {MDFieldClassifier, meta.Info.Classifier},
159                                 {MDFieldPlatform, meta.Info.Platform},
160                                 {MDFieldSupportedPlatform, meta.Info.SupportedPlatform},
161                                 {MDFieldRequiresDist, meta.Info.RequiresDist},
162                                 {MDFieldRequiresExternal, meta.Info.RequiresExternal},
163                                 {MDFieldProjectURL, meta.Info.ProjectURL},
164                                 {MDFieldProvidesExtra, meta.Info.ProvidesExtra},
165                         } {
166                                 for _, v := range m.jsonFields {
167                                         if _, err = wr.WriteFields(recfile.Field{
168                                                 Name:  MDFieldToRecField[m.recField],
169                                                 Value: v,
170                                         }); err != nil {
171                                                 log.Fatalln(err)
172                                         }
173                                 }
174                         }
175                         description = meta.Info.Description
176                         allReleases = meta.Releases
177                 } else {
178                         var metaStripped PkgMetaStripped
179                         err = json.Unmarshal(body, &metaStripped)
180                         if err != nil {
181                                 log.Println(
182                                         "error", r.RemoteAddr, "refresh-json", pkgName,
183                                         "can not parse JSON:", err,
184                                 )
185                                 http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
186                                 return false
187                         }
188                         for _, m := range [][2]string{
189                                 {MDFieldName, metaStripped.Info.Name},
190                                 {MDFieldVersion, metaStripped.Info.Version},
191                                 {MDFieldSummary, metaStripped.Info.Summary},
192                                 {MDFieldDescriptionContentType, metaStripped.Info.DescriptionContentType},
193                                 {MDFieldKeywords, metaStripped.Info.Keywords},
194                                 {MDFieldHomePage, metaStripped.Info.HomePage},
195                                 {MDFieldAuthor, metaStripped.Info.Author},
196                                 {MDFieldAuthorEmail, metaStripped.Info.AuthorEmail},
197                                 {MDFieldMaintainer, metaStripped.Info.Maintainer},
198                                 {MDFieldMaintainerEmail, metaStripped.Info.MaintainerEmail},
199                                 {MDFieldLicense, metaStripped.Info.License},
200                                 {MDFieldRequiresPython, metaStripped.Info.RequiresPython},
201                         } {
202                                 recField, jsonField := m[0], m[1]
203                                 if jsonField == "" {
204                                         continue
205                                 }
206                                 if _, err = wr.WriteFields(recfile.Field{
207                                         Name:  MDFieldToRecField[recField],
208                                         Value: jsonField,
209                                 }); err != nil {
210                                         log.Fatalln(err)
211                                 }
212                         }
213
214                         for _, m := range []RecFieldToValuesMap{
215                                 {MDFieldClassifier, metaStripped.Info.Classifier},
216                                 {MDFieldRequiresDist, metaStripped.Info.RequiresDist},
217                         } {
218                                 for _, v := range m.jsonFields {
219                                         if _, err = wr.WriteFields(recfile.Field{
220                                                 Name:  MDFieldToRecField[m.recField],
221                                                 Value: v,
222                                         }); err != nil {
223                                                 log.Fatalln(err)
224                                         }
225                                 }
226                         }
227                         description = metaStripped.Info.Description
228                         allReleases = metaStripped.Releases
229                 }
230                 lines := strings.Split(description, "\n")
231                 if len(lines) > 0 {
232                         if _, err = wr.WriteFieldMultiline(
233                                 MDFieldDescription, lines,
234                         ); err != nil {
235                                 log.Fatalln(err)
236                         }
237                 }
238
239                 if !mkdirForPkg(w, r, pkgName) {
240                         return false
241                 }
242                 path := filepath.Join(dirPath, MDFile)
243                 existing, err := ioutil.ReadFile(path)
244                 if err != nil || bytes.Compare(existing, buf.Bytes()) != 0 {
245                         if err = WriteFileSync(dirPath, path, buf.Bytes(), now); err != nil {
246                                 log.Println("error", r.RemoteAddr, "refresh-json", path, err)
247                                 http.Error(w, err.Error(), http.StatusInternalServerError)
248                                 return false
249                         }
250                         log.Println(r.RemoteAddr, "pypi", pkgName+"/"+MDFile, "touch")
251                 }
252         }
253         mtimes := make(map[string]time.Time)
254         for _, releases := range allReleases {
255                 for _, rel := range releases {
256                         if rel.Filename == "" || rel.UploadTimeISO8601 == "" {
257                                 continue
258                         }
259                         t, err := time.Parse(time.RFC3339Nano, rel.UploadTimeISO8601)
260                         if err != nil {
261                                 log.Println(
262                                         "error", r.RemoteAddr, "refresh-json", pkgName,
263                                         "can not parse upload_time:", err,
264                                 )
265                                 http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
266                                 return false
267                         }
268                         mtimes[rel.Filename] = t.Truncate(time.Second)
269                 }
270         }
271
272         resp, err := c.Do(agentedReq(*PyPIURL + pkgName + "/"))
273         if err != nil {
274                 log.Println("error", r.RemoteAddr, "refresh", pkgName, err)
275                 http.Error(w, err.Error(), http.StatusBadGateway)
276                 return false
277         }
278         if resp.StatusCode != http.StatusOK {
279                 resp.Body.Close()
280                 log.Println(
281                         "error", r.RemoteAddr, "refresh", pkgName,
282                         "HTTP status:", resp.Status,
283                 )
284                 http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
285                 return false
286         }
287         body, err := ioutil.ReadAll(resp.Body)
288         resp.Body.Close()
289         if err != nil {
290                 log.Println("error", r.RemoteAddr, "refresh", pkgName, err)
291                 http.Error(w, err.Error(), http.StatusBadGateway)
292                 return false
293         }
294         if !mkdirForPkg(w, r, pkgName) {
295                 return false
296         }
297         for _, lineRaw := range bytes.Split(body, []byte("\n")) {
298                 submatches := PkgPyPI.FindStringSubmatch(string(lineRaw))
299                 if len(submatches) == 0 {
300                         continue
301                 }
302                 uri := submatches[1]
303                 filename := submatches[2]
304                 pkgURL, err := url.Parse(uri)
305                 if err != nil {
306                         log.Println("error", r.RemoteAddr, "refresh", uri, err)
307                         http.Error(w, err.Error(), http.StatusBadGateway)
308                         return false
309                 }
310
311                 if pkgURL.Fragment == "" {
312                         log.Println(r.RemoteAddr, "pypi", filename, "no digest")
313                         http.Error(w, "no digest provided", http.StatusBadGateway)
314                         return false
315                 }
316                 digestInfo := strings.Split(pkgURL.Fragment, "=")
317                 if len(digestInfo) == 1 {
318                         // Ancient non PEP-0503 PyPIs, assume MD5
319                         digestInfo = []string{"md5", digestInfo[0]}
320                 } else if len(digestInfo) != 2 {
321                         log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest")
322                         http.Error(w, "invalid digest provided", http.StatusBadGateway)
323                         return false
324                 }
325                 digest, err := hex.DecodeString(digestInfo[1])
326                 if err != nil {
327                         log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest")
328                         http.Error(w, err.Error(), http.StatusBadGateway)
329                         return false
330                 }
331                 hashAlgo := digestInfo[0]
332                 var hasherNew func() hash.Hash
333                 var hashSize int
334                 switch hashAlgo {
335                 case HashAlgoMD5:
336                         hasherNew = md5.New
337                         hashSize = md5.Size
338                 case HashAlgoSHA256:
339                         hasherNew = sha256.New
340                         hashSize = sha256.Size
341                 case HashAlgoSHA512:
342                         hasherNew = sha512.New
343                         hashSize = sha512.Size
344                 case HashAlgoBLAKE2b256:
345                         hasherNew = blake2b256New
346                         hashSize = blake2b.Size256
347                 default:
348                         log.Println(
349                                 "error", r.RemoteAddr, "pypi",
350                                 filename, "unknown digest", hashAlgo,
351                         )
352                         http.Error(w, "unknown digest algorithm", http.StatusBadGateway)
353                         return false
354                 }
355                 if len(digest) != hashSize {
356                         log.Println(
357                                 "error", r.RemoteAddr, "pypi",
358                                 filename, "invalid digest length")
359                         http.Error(w, "invalid digest length", http.StatusBadGateway)
360                         return false
361                 }
362
363                 pkgURL.Fragment = ""
364                 if pkgURL.Host == "" {
365                         uri = PyPIURLParsed.ResolveReference(pkgURL).String()
366                 } else {
367                         uri = pkgURL.String()
368                 }
369                 mtime, mtimeExists := mtimes[filename]
370                 if !mtimeExists {
371                         mtime = now
372                 }
373
374                 path := filepath.Join(dirPath, filename)
375                 if filename == filenameGet {
376                         if Killed {
377                                 // Skip heavy remote call, when shutting down
378                                 http.Error(w, "shutting down", http.StatusInternalServerError)
379                                 return false
380                         }
381                         log.Println(r.RemoteAddr, "pypi", filename, "download")
382                         resp, err = c.Do(agentedReq(uri))
383                         if err != nil {
384                                 log.Println("error", r.RemoteAddr, "pypi", filename, "download", err)
385                                 http.Error(w, err.Error(), http.StatusBadGateway)
386                                 return false
387                         }
388                         defer resp.Body.Close()
389                         if resp.StatusCode != http.StatusOK {
390                                 log.Println(
391                                         "error", r.RemoteAddr,
392                                         "pypi", filename, "download",
393                                         "HTTP status:", resp.Status,
394                                 )
395                                 http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
396                                 return false
397                         }
398                         hasher := hasherNew()
399                         hasherSHA256 := sha256.New()
400                         hasherBLAKE2b256 := blake2b256New()
401                         dst, err := TempFile(dirPath)
402                         if err != nil {
403                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
404                                 http.Error(w, err.Error(), http.StatusInternalServerError)
405                                 return false
406                         }
407                         dstBuf := bufio.NewWriter(dst)
408                         wrs := []io.Writer{hasher, dstBuf}
409                         if hashAlgo != HashAlgoSHA256 {
410                                 wrs = append(wrs, hasherSHA256)
411                         }
412                         if hashAlgo != HashAlgoBLAKE2b256 {
413                                 wrs = append(wrs, hasherBLAKE2b256)
414                         }
415                         wr := io.MultiWriter(wrs...)
416                         if _, err = io.Copy(wr, resp.Body); err != nil {
417                                 os.Remove(dst.Name())
418                                 dst.Close()
419                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
420                                 http.Error(w, err.Error(), http.StatusInternalServerError)
421                                 return false
422                         }
423                         if err = dstBuf.Flush(); err != nil {
424                                 os.Remove(dst.Name())
425                                 dst.Close()
426                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
427                                 http.Error(w, err.Error(), http.StatusInternalServerError)
428                                 return false
429                         }
430                         if bytes.Compare(hasher.Sum(nil), digest) != 0 {
431                                 log.Println(r.RemoteAddr, "pypi", filename, "digest mismatch")
432                                 os.Remove(dst.Name())
433                                 dst.Close()
434                                 http.Error(w, "digest mismatch", http.StatusBadGateway)
435                                 return false
436                         }
437                         if digestStored, err := ioutil.ReadFile(path + "." + hashAlgo); err == nil &&
438                                 bytes.Compare(digest, digestStored) != 0 {
439                                 err = errors.New("stored digest mismatch")
440                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
441                                 os.Remove(dst.Name())
442                                 dst.Close()
443                                 http.Error(w, err.Error(), http.StatusInternalServerError)
444                                 return false
445                         }
446                         if !NoSync {
447                                 if err = dst.Sync(); err != nil {
448                                         os.Remove(dst.Name())
449                                         dst.Close()
450                                         log.Println("error", r.RemoteAddr, "pypi", filename, err)
451                                         http.Error(w, err.Error(), http.StatusInternalServerError)
452                                         return false
453                                 }
454                         }
455                         if err = dst.Close(); err != nil {
456                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
457                                 http.Error(w, err.Error(), http.StatusInternalServerError)
458                                 return false
459                         }
460                         if err = os.Chtimes(dst.Name(), mtime, mtime); err != nil {
461                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
462                                 http.Error(w, err.Error(), http.StatusInternalServerError)
463                         }
464                         if err = os.Rename(dst.Name(), path); err != nil {
465                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
466                                 http.Error(w, err.Error(), http.StatusInternalServerError)
467                                 return false
468                         }
469                         if err = DirSync(dirPath); err != nil {
470                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
471                                 http.Error(w, err.Error(), http.StatusInternalServerError)
472                                 return false
473                         }
474
475                         var digestSHA256 []byte
476                         var digestBLAKE2b256 []byte
477                         if hashAlgo == HashAlgoSHA256 {
478                                 digestSHA256 = hasher.Sum(nil)
479                         } else {
480                                 digestSHA256 = hasherSHA256.Sum(nil)
481                         }
482                         if hashAlgo == HashAlgoBLAKE2b256 {
483                                 digestBLAKE2b256 = hasher.Sum(nil)
484                         } else {
485                                 digestBLAKE2b256 = hasherBLAKE2b256.Sum(nil)
486                         }
487                         if err = WriteFileSync(
488                                 dirPath, path+"."+HashAlgoSHA256,
489                                 digestSHA256, mtime,
490                         ); err != nil {
491                                 log.Println(
492                                         "error", r.RemoteAddr, "pypi",
493                                         path+"."+HashAlgoSHA256, err,
494                                 )
495                                 http.Error(w, err.Error(), http.StatusInternalServerError)
496                                 return false
497                         }
498                         if err = WriteFileSync(
499                                 dirPath, path+"."+HashAlgoBLAKE2b256,
500                                 digestBLAKE2b256, mtime,
501                         ); err != nil {
502                                 log.Println(
503                                         "error", r.RemoteAddr, "pypi",
504                                         path+"."+HashAlgoBLAKE2b256, err,
505                                 )
506                                 http.Error(w, err.Error(), http.StatusInternalServerError)
507                                 return false
508                         }
509                         for _, algo := range KnownHashAlgos[2:] {
510                                 os.Remove(path + "." + algo)
511                         }
512                         digest = nil
513                 }
514                 if mtimeExists {
515                         stat, err := os.Stat(path)
516                         if err == nil && !stat.ModTime().Truncate(time.Second).Equal(mtime) {
517                                 log.Println(r.RemoteAddr, "pypi", filename, "touch")
518                                 if err = os.Chtimes(path, mtime, mtime); err != nil {
519                                         log.Println("error", r.RemoteAddr, "pypi", filename, err)
520                                         http.Error(w, err.Error(), http.StatusInternalServerError)
521                                 }
522                         }
523                 }
524
525                 if filename == filenameGet || gpgUpdate {
526                         if _, err = os.Stat(path); err != nil {
527                                 goto GPGSigSkip
528                         }
529                         resp, err := c.Do(agentedReq(uri + GPGSigExt))
530                         if err != nil {
531                                 goto GPGSigSkip
532                         }
533                         if resp.StatusCode != http.StatusOK {
534                                 resp.Body.Close()
535                                 goto GPGSigSkip
536                         }
537                         sig, err := ioutil.ReadAll(resp.Body)
538                         resp.Body.Close()
539                         if err != nil {
540                                 goto GPGSigSkip
541                         }
542                         if !bytes.HasPrefix(sig, []byte("-----BEGIN PGP SIGNATURE-----")) {
543                                 log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "non PGP")
544                                 goto GPGSigSkip
545                         }
546                         if err = WriteFileSync(dirPath, path+GPGSigExt, sig, mtime); err != nil {
547                                 log.Println("error", r.RemoteAddr, "pypi", filename+GPGSigExt, err)
548                                 http.Error(w, err.Error(), http.StatusInternalServerError)
549                                 return false
550                         }
551                         log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "downloaded")
552                 }
553                 if mtimeExists {
554                         stat, err := os.Stat(path + GPGSigExt)
555                         if err == nil && !stat.ModTime().Truncate(time.Second).Equal(mtime) {
556                                 log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "touch")
557                                 if err = os.Chtimes(path+GPGSigExt, mtime, mtime); err != nil {
558                                         log.Println("error", r.RemoteAddr, "pypi", filename, err)
559                                         http.Error(w, err.Error(), http.StatusInternalServerError)
560                                 }
561                         }
562                 }
563
564         GPGSigSkip:
565                 if digest == nil {
566                         continue
567                 }
568                 path = path + "." + hashAlgo
569                 stat, err := os.Stat(path)
570                 if err == nil && (!mtimeExists ||
571                         (mtimeExists && stat.ModTime().Truncate(time.Second).Equal(mtime))) {
572                         continue
573                 }
574                 if err != nil && !os.IsNotExist(err) {
575                         log.Println("error", r.RemoteAddr, "pypi", path, err)
576                         http.Error(w, err.Error(), http.StatusInternalServerError)
577                         return false
578                 }
579                 log.Println(r.RemoteAddr, "pypi", filename, "touch")
580                 if err = WriteFileSync(dirPath, path, digest, mtime); err != nil {
581                         log.Println("error", r.RemoteAddr, "pypi", path, err)
582                         http.Error(w, err.Error(), http.StatusInternalServerError)
583                         return false
584                 }
585         }
586         return true
587 }