]> Cypherpunks.ru repositories - gocheese.git/blob - refresh.go
59c8ceb9f5dd7c2b6288e27eb1d1cc0c9f6574e9
[gocheese.git] / refresh.go
1 /*
2 GoCheese -- Python private package repository and caching proxy
3 Copyright (C) 2019-2021 Sergey Matveev <stargrave@stargrave.org>
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, version 3 of the License.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 package main
19
20 import (
21         "bufio"
22         "bytes"
23         "crypto/md5"
24         "crypto/sha256"
25         "crypto/sha512"
26         "encoding/hex"
27         "encoding/json"
28         "hash"
29         "io"
30         "io/ioutil"
31         "log"
32         "net/http"
33         "net/url"
34         "os"
35         "path/filepath"
36         "regexp"
37         "strings"
38         "time"
39
40         "go.cypherpunks.ru/recfile"
41         "golang.org/x/crypto/blake2b"
42 )
43
44 const (
45         HashAlgoSHA256     = "sha256"
46         HashAlgoBLAKE2b256 = "blake2_256"
47         HashAlgoSHA512     = "sha512"
48         HashAlgoMD5        = "md5"
49         GPGSigExt          = ".asc"
50         InternalFlag       = ".internal"
51 )
52
53 var (
54         PkgPyPI           = regexp.MustCompile(`^.*<a href="([^"]+)"[^>]*>(.+)</a>.*$`)
55         PyPIURLParsed     *url.URL
56         PyPIHTTPTransport http.Transport
57         KnownHashAlgos    []string = []string{
58                 HashAlgoSHA256,
59                 HashAlgoBLAKE2b256,
60                 HashAlgoSHA512,
61                 HashAlgoMD5,
62         }
63 )
64
65 func blake2b256New() hash.Hash {
66         h, err := blake2b.New256(nil)
67         if err != nil {
68                 panic(err)
69         }
70         return h
71 }
72
73 func agentedReq(url string) *http.Request {
74         req, err := http.NewRequest("GET", url, nil)
75         if err != nil {
76                 log.Fatalln(err)
77         }
78         req.Header.Set("User-Agent", UserAgent)
79         return req
80 }
81
82 type RecFieldToValuesMap struct {
83         recField   string
84         jsonFields []string
85 }
86
87 func refreshDir(
88         w http.ResponseWriter,
89         r *http.Request,
90         pkgName, filenameGet string,
91         gpgUpdate bool,
92 ) bool {
93         if _, err := os.Stat(filepath.Join(Root, pkgName, InternalFlag)); err == nil {
94                 return true
95         }
96         c := http.Client{Transport: &PyPIHTTPTransport}
97         dirPath := filepath.Join(Root, pkgName)
98         now := time.Now()
99
100         var allReleases map[string][]*PkgReleaseInfo
101         if *JSONURL != "" {
102                 resp, err := c.Do(agentedReq(*JSONURL + pkgName + "/json"))
103                 if err != nil {
104                         log.Println("error", r.RemoteAddr, "refresh-json", pkgName, err)
105                         http.Error(w, err.Error(), http.StatusBadGateway)
106                         return false
107                 }
108                 if resp.StatusCode != http.StatusOK {
109                         resp.Body.Close()
110                         log.Println(
111                                 "error", r.RemoteAddr, "refresh-json", pkgName,
112                                 "HTTP status:", resp.Status,
113                         )
114                         http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
115                         return false
116                 }
117                 body, err := ioutil.ReadAll(resp.Body)
118                 resp.Body.Close()
119                 var buf bytes.Buffer
120                 var description string
121                 wr := recfile.NewWriter(&buf)
122                 var meta PkgMeta
123                 err = json.Unmarshal(body, &meta)
124                 if err == nil {
125                         for _, m := range [][2]string{
126                                 {MetadataFieldName, meta.Info.Name},
127                                 {MetadataFieldVersion, meta.Info.Version},
128                                 {MetadataFieldSummary, meta.Info.Summary},
129                                 {MetadataFieldDescriptionContentType, meta.Info.DescriptionContentType},
130                                 {MetadataFieldKeywords, meta.Info.Keywords},
131                                 {MetadataFieldHomePage, meta.Info.HomePage},
132                                 {MetadataFieldAuthor, meta.Info.Author},
133                                 {MetadataFieldAuthorEmail, meta.Info.AuthorEmail},
134                                 {MetadataFieldMaintainer, meta.Info.Maintainer},
135                                 {MetadataFieldMaintainerEmail, meta.Info.MaintainerEmail},
136                                 {MetadataFieldLicense, meta.Info.License},
137                                 {MetadataFieldRequiresPython, meta.Info.RequiresPython},
138                         } {
139                                 recField, jsonField := m[0], m[1]
140                                 if jsonField == "" {
141                                         continue
142                                 }
143                                 if _, err = wr.WriteFields(recfile.Field{
144                                         Name:  metadataFieldToRecField(recField),
145                                         Value: jsonField,
146                                 }); err != nil {
147                                         log.Fatalln(err)
148                                 }
149                         }
150                         for _, m := range []RecFieldToValuesMap{
151                                 {MetadataFieldClassifier, meta.Info.Classifier},
152                                 {MetadataFieldPlatform, meta.Info.Platform},
153                                 {MetadataFieldSupportedPlatform, meta.Info.SupportedPlatform},
154                                 {MetadataFieldRequiresDist, meta.Info.RequiresDist},
155                                 {MetadataFieldRequiresExternal, meta.Info.RequiresExternal},
156                                 {MetadataFieldProjectURL, meta.Info.ProjectURL},
157                                 {MetadataFieldProvidesExtra, meta.Info.ProvidesExtra},
158                         } {
159                                 for _, v := range m.jsonFields {
160                                         if _, err = wr.WriteFields(recfile.Field{
161                                                 Name:  metadataFieldToRecField(m.recField),
162                                                 Value: v,
163                                         }); err != nil {
164                                                 log.Fatalln(err)
165                                         }
166                                 }
167                         }
168                         description = meta.Info.Description
169                         allReleases = meta.Releases
170                 } else {
171                         var metaStripped PkgMetaStripped
172                         err = json.Unmarshal(body, &metaStripped)
173                         if err != nil {
174                                 log.Println(
175                                         "error", r.RemoteAddr, "refresh-json", pkgName,
176                                         "can not parse JSON:", err,
177                                 )
178                                 http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
179                                 return false
180                         }
181                         for _, m := range [][2]string{
182                                 {MetadataFieldName, metaStripped.Info.Name},
183                                 {MetadataFieldVersion, metaStripped.Info.Version},
184                                 {MetadataFieldSummary, metaStripped.Info.Summary},
185                                 {MetadataFieldDescriptionContentType, metaStripped.Info.DescriptionContentType},
186                                 {MetadataFieldKeywords, metaStripped.Info.Keywords},
187                                 {MetadataFieldHomePage, metaStripped.Info.HomePage},
188                                 {MetadataFieldAuthor, metaStripped.Info.Author},
189                                 {MetadataFieldAuthorEmail, metaStripped.Info.AuthorEmail},
190                                 {MetadataFieldMaintainer, metaStripped.Info.Maintainer},
191                                 {MetadataFieldMaintainerEmail, metaStripped.Info.MaintainerEmail},
192                                 {MetadataFieldLicense, metaStripped.Info.License},
193                                 {MetadataFieldRequiresPython, metaStripped.Info.RequiresPython},
194                         } {
195                                 recField, jsonField := m[0], m[1]
196                                 if jsonField == "" {
197                                         continue
198                                 }
199                                 if _, err = wr.WriteFields(recfile.Field{
200                                         Name:  metadataFieldToRecField(recField),
201                                         Value: jsonField,
202                                 }); err != nil {
203                                         log.Fatalln(err)
204                                 }
205                         }
206
207                         for _, m := range []RecFieldToValuesMap{
208                                 {MetadataFieldClassifier, metaStripped.Info.Classifier},
209                                 {MetadataFieldRequiresDist, metaStripped.Info.RequiresDist},
210                         } {
211                                 for _, v := range m.jsonFields {
212                                         if _, err = wr.WriteFields(recfile.Field{
213                                                 Name:  metadataFieldToRecField(m.recField),
214                                                 Value: v,
215                                         }); err != nil {
216                                                 log.Fatalln(err)
217                                         }
218                                 }
219                         }
220                         description = metaStripped.Info.Description
221                         allReleases = metaStripped.Releases
222                 }
223                 lines := strings.Split(description, "\n")
224                 if len(lines) > 0 {
225                         if _, err = wr.WriteFieldMultiline(
226                                 MetadataFieldDescription, lines,
227                         ); err != nil {
228                                 log.Fatalln(err)
229                         }
230                 }
231
232                 if !mkdirForPkg(w, r, pkgName) {
233                         return false
234                 }
235                 path := filepath.Join(dirPath, MetadataFile)
236                 existing, err := ioutil.ReadFile(path)
237                 if err != nil || bytes.Compare(existing, buf.Bytes()) != 0 {
238                         if err = WriteFileSync(dirPath, path, buf.Bytes(), now); err != nil {
239                                 log.Println("error", r.RemoteAddr, "refresh-json", path, err)
240                                 http.Error(w, err.Error(), http.StatusInternalServerError)
241                                 return false
242                         }
243                         log.Println(r.RemoteAddr, "pypi", pkgName+"/"+MetadataFile, "touch")
244                 }
245         }
246         mtimes := make(map[string]time.Time)
247         for _, releases := range allReleases {
248                 for _, rel := range releases {
249                         if rel.Filename == "" || rel.UploadTimeISO8601 == "" {
250                                 continue
251                         }
252                         t, err := time.Parse(time.RFC3339Nano, rel.UploadTimeISO8601)
253                         if err != nil {
254                                 log.Println(
255                                         "error", r.RemoteAddr, "refresh-json", pkgName,
256                                         "can not parse upload_time:", err,
257                                 )
258                                 http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
259                                 return false
260                         }
261                         mtimes[rel.Filename] = t.Truncate(time.Second)
262                 }
263         }
264
265         resp, err := c.Do(agentedReq(*PyPIURL + pkgName + "/"))
266         if err != nil {
267                 log.Println("error", r.RemoteAddr, "refresh", pkgName, err)
268                 http.Error(w, err.Error(), http.StatusBadGateway)
269                 return false
270         }
271         if resp.StatusCode != http.StatusOK {
272                 resp.Body.Close()
273                 log.Println(
274                         "error", r.RemoteAddr, "refresh", pkgName,
275                         "HTTP status:", resp.Status,
276                 )
277                 http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
278                 return false
279         }
280         body, err := ioutil.ReadAll(resp.Body)
281         resp.Body.Close()
282         if err != nil {
283                 log.Println("error", r.RemoteAddr, "refresh", pkgName, err)
284                 http.Error(w, err.Error(), http.StatusBadGateway)
285                 return false
286         }
287         if !mkdirForPkg(w, r, pkgName) {
288                 return false
289         }
290         for _, lineRaw := range bytes.Split(body, []byte("\n")) {
291                 submatches := PkgPyPI.FindStringSubmatch(string(lineRaw))
292                 if len(submatches) == 0 {
293                         continue
294                 }
295                 uri := submatches[1]
296                 filename := submatches[2]
297                 pkgURL, err := url.Parse(uri)
298                 if err != nil {
299                         log.Println("error", r.RemoteAddr, "refresh", uri, err)
300                         http.Error(w, err.Error(), http.StatusBadGateway)
301                         return false
302                 }
303
304                 if pkgURL.Fragment == "" {
305                         log.Println(r.RemoteAddr, "pypi", filename, "no digest")
306                         http.Error(w, "no digest provided", http.StatusBadGateway)
307                         return false
308                 }
309                 digestInfo := strings.Split(pkgURL.Fragment, "=")
310                 if len(digestInfo) == 1 {
311                         // Ancient non PEP-0503 PyPIs, assume MD5
312                         digestInfo = []string{"md5", digestInfo[0]}
313                 } else if len(digestInfo) != 2 {
314                         log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest")
315                         http.Error(w, "invalid digest provided", http.StatusBadGateway)
316                         return false
317                 }
318                 digest, err := hex.DecodeString(digestInfo[1])
319                 if err != nil {
320                         log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest")
321                         http.Error(w, err.Error(), http.StatusBadGateway)
322                         return false
323                 }
324                 hashAlgo := digestInfo[0]
325                 var hasherNew func() hash.Hash
326                 var hashSize int
327                 switch hashAlgo {
328                 case HashAlgoMD5:
329                         hasherNew = md5.New
330                         hashSize = md5.Size
331                 case HashAlgoSHA256:
332                         hasherNew = sha256.New
333                         hashSize = sha256.Size
334                 case HashAlgoSHA512:
335                         hasherNew = sha512.New
336                         hashSize = sha512.Size
337                 case HashAlgoBLAKE2b256:
338                         hasherNew = blake2b256New
339                         hashSize = blake2b.Size256
340                 default:
341                         log.Println(
342                                 "error", r.RemoteAddr, "pypi",
343                                 filename, "unknown digest", hashAlgo,
344                         )
345                         http.Error(w, "unknown digest algorithm", http.StatusBadGateway)
346                         return false
347                 }
348                 if len(digest) != hashSize {
349                         log.Println(
350                                 "error", r.RemoteAddr, "pypi",
351                                 filename, "invalid digest length")
352                         http.Error(w, "invalid digest length", http.StatusBadGateway)
353                         return false
354                 }
355
356                 pkgURL.Fragment = ""
357                 if pkgURL.Host == "" {
358                         uri = PyPIURLParsed.ResolveReference(pkgURL).String()
359                 } else {
360                         uri = pkgURL.String()
361                 }
362                 mtime, mtimeExists := mtimes[filename]
363                 if !mtimeExists {
364                         mtime = now
365                 }
366
367                 path := filepath.Join(dirPath, filename)
368                 if filename == filenameGet {
369                         if Killed {
370                                 // Skip heavy remote call, when shutting down
371                                 http.Error(w, "shutting down", http.StatusInternalServerError)
372                                 return false
373                         }
374                         log.Println(r.RemoteAddr, "pypi", filename, "download")
375                         resp, err = c.Do(agentedReq(uri))
376                         if err != nil {
377                                 log.Println("error", r.RemoteAddr, "pypi", filename, "download", err)
378                                 http.Error(w, err.Error(), http.StatusBadGateway)
379                                 return false
380                         }
381                         defer resp.Body.Close()
382                         if resp.StatusCode != http.StatusOK {
383                                 log.Println(
384                                         "error", r.RemoteAddr,
385                                         "pypi", filename, "download",
386                                         "HTTP status:", resp.Status,
387                                 )
388                                 http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
389                                 return false
390                         }
391                         hasher := hasherNew()
392                         hasherSHA256 := sha256.New()
393                         hasherBLAKE2b256 := blake2b256New()
394                         dst, err := TempFile(dirPath)
395                         if err != nil {
396                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
397                                 http.Error(w, err.Error(), http.StatusInternalServerError)
398                                 return false
399                         }
400                         dstBuf := bufio.NewWriter(dst)
401                         wrs := []io.Writer{hasher, dstBuf}
402                         if hashAlgo != HashAlgoSHA256 {
403                                 wrs = append(wrs, hasherSHA256)
404                         }
405                         if hashAlgo != HashAlgoBLAKE2b256 {
406                                 wrs = append(wrs, hasherBLAKE2b256)
407                         }
408                         wr := io.MultiWriter(wrs...)
409                         if _, err = io.Copy(wr, resp.Body); err != nil {
410                                 os.Remove(dst.Name())
411                                 dst.Close()
412                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
413                                 http.Error(w, err.Error(), http.StatusInternalServerError)
414                                 return false
415                         }
416                         if err = dstBuf.Flush(); err != nil {
417                                 os.Remove(dst.Name())
418                                 dst.Close()
419                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
420                                 http.Error(w, err.Error(), http.StatusInternalServerError)
421                                 return false
422                         }
423                         if bytes.Compare(hasher.Sum(nil), digest) != 0 {
424                                 log.Println(r.RemoteAddr, "pypi", filename, "digest mismatch")
425                                 os.Remove(dst.Name())
426                                 dst.Close()
427                                 http.Error(w, "digest mismatch", http.StatusBadGateway)
428                                 return false
429                         }
430                         if !NoSync {
431                                 if err = dst.Sync(); err != nil {
432                                         os.Remove(dst.Name())
433                                         dst.Close()
434                                         log.Println("error", r.RemoteAddr, "pypi", filename, err)
435                                         http.Error(w, err.Error(), http.StatusInternalServerError)
436                                         return false
437                                 }
438                         }
439                         if err = dst.Close(); err != nil {
440                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
441                                 http.Error(w, err.Error(), http.StatusInternalServerError)
442                                 return false
443                         }
444                         if err = os.Chtimes(dst.Name(), mtime, mtime); err != nil {
445                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
446                                 http.Error(w, err.Error(), http.StatusInternalServerError)
447                         }
448                         if err = os.Rename(dst.Name(), path); err != nil {
449                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
450                                 http.Error(w, err.Error(), http.StatusInternalServerError)
451                                 return false
452                         }
453                         if err = DirSync(dirPath); err != nil {
454                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
455                                 http.Error(w, err.Error(), http.StatusInternalServerError)
456                                 return false
457                         }
458
459                         var digestSHA256 []byte
460                         var digestBLAKE2b256 []byte
461                         if hashAlgo == HashAlgoSHA256 {
462                                 digestSHA256 = hasher.Sum(nil)
463                         } else {
464                                 digestSHA256 = hasherSHA256.Sum(nil)
465                         }
466                         if hashAlgo == HashAlgoBLAKE2b256 {
467                                 digestBLAKE2b256 = hasher.Sum(nil)
468                         } else {
469                                 digestBLAKE2b256 = hasherBLAKE2b256.Sum(nil)
470                         }
471                         if err = WriteFileSync(
472                                 dirPath, path+"."+HashAlgoSHA256,
473                                 digestSHA256, mtime,
474                         ); err != nil {
475                                 log.Println(
476                                         "error", r.RemoteAddr, "pypi",
477                                         path+"."+HashAlgoSHA256, err,
478                                 )
479                                 http.Error(w, err.Error(), http.StatusInternalServerError)
480                                 return false
481                         }
482                         if err = WriteFileSync(
483                                 dirPath, path+"."+HashAlgoBLAKE2b256,
484                                 digestBLAKE2b256, mtime,
485                         ); err != nil {
486                                 log.Println(
487                                         "error", r.RemoteAddr, "pypi",
488                                         path+"."+HashAlgoBLAKE2b256, err,
489                                 )
490                                 http.Error(w, err.Error(), http.StatusInternalServerError)
491                                 return false
492                         }
493                         for _, algo := range KnownHashAlgos[2:] {
494                                 os.Remove(path + "." + algo)
495                         }
496                         digest = nil
497                 }
498                 if mtimeExists {
499                         stat, err := os.Stat(path)
500                         if err == nil && !stat.ModTime().Truncate(time.Second).Equal(mtime) {
501                                 log.Println(r.RemoteAddr, "pypi", filename, "touch")
502                                 if err = os.Chtimes(path, mtime, mtime); err != nil {
503                                         log.Println("error", r.RemoteAddr, "pypi", filename, err)
504                                         http.Error(w, err.Error(), http.StatusInternalServerError)
505                                 }
506                         }
507                 }
508
509                 if filename == filenameGet || gpgUpdate {
510                         if _, err = os.Stat(path); err != nil {
511                                 goto GPGSigSkip
512                         }
513                         resp, err := c.Do(agentedReq(uri + GPGSigExt))
514                         if err != nil {
515                                 goto GPGSigSkip
516                         }
517                         if resp.StatusCode != http.StatusOK {
518                                 resp.Body.Close()
519                                 goto GPGSigSkip
520                         }
521                         sig, err := ioutil.ReadAll(resp.Body)
522                         resp.Body.Close()
523                         if err != nil {
524                                 goto GPGSigSkip
525                         }
526                         if !bytes.HasPrefix(sig, []byte("-----BEGIN PGP SIGNATURE-----")) {
527                                 log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "non PGP")
528                                 goto GPGSigSkip
529                         }
530                         if err = WriteFileSync(dirPath, path+GPGSigExt, sig, mtime); err != nil {
531                                 log.Println("error", r.RemoteAddr, "pypi", filename+GPGSigExt, err)
532                                 http.Error(w, err.Error(), http.StatusInternalServerError)
533                                 return false
534                         }
535                         log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "downloaded")
536                 }
537                 if mtimeExists {
538                         stat, err := os.Stat(path + GPGSigExt)
539                         if err == nil && !stat.ModTime().Truncate(time.Second).Equal(mtime) {
540                                 log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "touch")
541                                 if err = os.Chtimes(path+GPGSigExt, mtime, mtime); err != nil {
542                                         log.Println("error", r.RemoteAddr, "pypi", filename, err)
543                                         http.Error(w, err.Error(), http.StatusInternalServerError)
544                                 }
545                         }
546                 }
547
548         GPGSigSkip:
549                 if digest == nil {
550                         continue
551                 }
552                 path = path + "." + hashAlgo
553                 stat, err := os.Stat(path)
554                 if err == nil && (!mtimeExists ||
555                         (mtimeExists && stat.ModTime().Truncate(time.Second).Equal(mtime))) {
556                         continue
557                 }
558                 if err != nil && !os.IsNotExist(err) {
559                         log.Println("error", r.RemoteAddr, "pypi", path, err)
560                         http.Error(w, err.Error(), http.StatusInternalServerError)
561                         return false
562                 }
563                 log.Println(r.RemoteAddr, "pypi", filename, "touch")
564                 if err = WriteFileSync(dirPath, path, digest, mtime); err != nil {
565                         log.Println("error", r.RemoteAddr, "pypi", path, err)
566                         http.Error(w, err.Error(), http.StatusInternalServerError)
567                         return false
568                 }
569         }
570         return true
571 }