]> Cypherpunks.ru repositories - gocheese.git/blob - refresh.go
Download link for 3.0.0 release
[gocheese.git] / refresh.go
1 /*
2 GoCheese -- Python private package repository and caching proxy
3 Copyright (C) 2019-2021 Sergey Matveev <stargrave@stargrave.org>
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, version 3 of the License.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 package main
19
20 import (
21         "bufio"
22         "bytes"
23         "crypto/md5"
24         "crypto/sha256"
25         "crypto/sha512"
26         "encoding/hex"
27         "encoding/json"
28         "hash"
29         "io"
30         "io/ioutil"
31         "log"
32         "net/http"
33         "net/url"
34         "os"
35         "path/filepath"
36         "regexp"
37         "strings"
38         "time"
39
40         "go.cypherpunks.ru/recfile"
41         "golang.org/x/crypto/blake2b"
42 )
43
44 const (
45         HashAlgoSHA256     = "sha256"
46         HashAlgoBLAKE2b256 = "blake2_256"
47         HashAlgoSHA512     = "sha512"
48         HashAlgoMD5        = "md5"
49         GPGSigExt          = ".asc"
50         InternalFlag       = ".internal"
51 )
52
53 var (
54         PkgPyPI           = regexp.MustCompile(`^.*<a href="([^"]+)"[^>]*>(.+)</a>.*$`)
55         PyPIURLParsed     *url.URL
56         PyPIHTTPTransport http.Transport
57         KnownHashAlgos    []string = []string{
58                 HashAlgoSHA256,
59                 HashAlgoBLAKE2b256,
60                 HashAlgoSHA512,
61                 HashAlgoMD5,
62         }
63 )
64
65 func blake2b256New() hash.Hash {
66         h, err := blake2b.New256(nil)
67         if err != nil {
68                 panic(err)
69         }
70         return h
71 }
72
73 func agentedReq(url string) *http.Request {
74         req, err := http.NewRequest("GET", url, nil)
75         if err != nil {
76                 log.Fatalln(err)
77         }
78         req.Header.Set("User-Agent", UserAgent)
79         return req
80 }
81
82 type RecFieldToValuesMap struct {
83         recField   string
84         jsonFields []string
85 }
86
87 func refreshDir(
88         w http.ResponseWriter,
89         r *http.Request,
90         pkgName, filenameGet string,
91         gpgUpdate bool,
92 ) bool {
93         if _, err := os.Stat(filepath.Join(Root, pkgName, InternalFlag)); err == nil {
94                 return true
95         }
96         c := http.Client{Transport: &PyPIHTTPTransport}
97         dirPath := filepath.Join(Root, pkgName)
98         now := time.Now()
99
100         var allReleases map[string][]*PkgReleaseInfo
101         if *JSONURL != "" {
102                 resp, err := c.Do(agentedReq(*JSONURL + pkgName + "/json"))
103                 if err != nil {
104                         log.Println("error", r.RemoteAddr, "refresh-json", pkgName, err)
105                         http.Error(w, err.Error(), http.StatusBadGateway)
106                         return false
107                 }
108                 if resp.StatusCode != http.StatusOK {
109                         resp.Body.Close()
110                         log.Println(
111                                 "error", r.RemoteAddr, "refresh-json", pkgName,
112                                 "HTTP status:", resp.Status,
113                         )
114                         http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
115                         return false
116                 }
117                 body, err := ioutil.ReadAll(resp.Body)
118                 resp.Body.Close()
119                 var buf bytes.Buffer
120                 var description string
121                 wr := recfile.NewWriter(&buf)
122                 var meta PkgMeta
123                 err = json.Unmarshal(body, &meta)
124                 if err == nil {
125                         for _, m := range [][2]string{
126                                 {MetadataFieldName, meta.Info.Name},
127                                 {MetadataFieldVersion, meta.Info.Version},
128                                 {MetadataFieldSummary, meta.Info.Summary},
129                                 {MetadataFieldDescriptionContentType, meta.Info.DescriptionContentType},
130                                 {MetadataFieldKeywords, meta.Info.Keywords},
131                                 {MetadataFieldHomePage, meta.Info.HomePage},
132                                 {MetadataFieldAuthor, meta.Info.Author},
133                                 {MetadataFieldAuthorEmail, meta.Info.AuthorEmail},
134                                 {MetadataFieldMaintainer, meta.Info.Maintainer},
135                                 {MetadataFieldMaintainerEmail, meta.Info.MaintainerEmail},
136                                 {MetadataFieldLicense, meta.Info.License},
137                                 {MetadataFieldRequiresPython, meta.Info.RequiresPython},
138                         } {
139                                 recField, jsonField := m[0], m[1]
140                                 if jsonField == "" {
141                                         continue
142                                 }
143                                 if _, err = wr.WriteFields(recfile.Field{
144                                         Name:  metadataFieldToRecField(recField),
145                                         Value: jsonField,
146                                 }); err != nil {
147                                         log.Fatalln(err)
148                                 }
149                         }
150                         for _, m := range []RecFieldToValuesMap{
151                                 {MetadataFieldClassifier, meta.Info.Classifier},
152                                 {MetadataFieldPlatform, meta.Info.Platform},
153                                 {MetadataFieldSupportedPlatform, meta.Info.SupportedPlatform},
154                                 {MetadataFieldRequiresDist, meta.Info.RequiresDist},
155                                 {MetadataFieldRequiresExternal, meta.Info.RequiresExternal},
156                                 {MetadataFieldProjectURL, meta.Info.ProjectURL},
157                                 {MetadataFieldProvidesExtra, meta.Info.ProvidesExtra},
158                         } {
159                                 for _, v := range m.jsonFields {
160                                         if _, err = wr.WriteFields(recfile.Field{
161                                                 Name:  metadataFieldToRecField(m.recField),
162                                                 Value: v,
163                                         }); err != nil {
164                                                 log.Fatalln(err)
165                                         }
166                                 }
167                         }
168                         description = meta.Info.Description
169                         allReleases = meta.Releases
170                 } else {
171                         var metaStripped PkgMetaStripped
172                         err = json.Unmarshal(body, &metaStripped)
173                         if err != nil {
174                                 log.Println(
175                                         "error", r.RemoteAddr, "refresh-json", pkgName,
176                                         "can not parse JSON:", err,
177                                 )
178                                 http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
179                                 return false
180                         }
181                         for _, m := range [][2]string{
182                                 {MetadataFieldName, metaStripped.Info.Name},
183                                 {MetadataFieldVersion, metaStripped.Info.Version},
184                                 {MetadataFieldSummary, metaStripped.Info.Summary},
185                                 {MetadataFieldDescriptionContentType, metaStripped.Info.DescriptionContentType},
186                                 {MetadataFieldKeywords, metaStripped.Info.Keywords},
187                                 {MetadataFieldHomePage, metaStripped.Info.HomePage},
188                                 {MetadataFieldAuthor, metaStripped.Info.Author},
189                                 {MetadataFieldAuthorEmail, metaStripped.Info.AuthorEmail},
190                                 {MetadataFieldMaintainer, metaStripped.Info.Maintainer},
191                                 {MetadataFieldMaintainerEmail, metaStripped.Info.MaintainerEmail},
192                                 {MetadataFieldLicense, metaStripped.Info.License},
193                                 {MetadataFieldRequiresPython, metaStripped.Info.RequiresPython},
194                         } {
195                                 recField, jsonField := m[0], m[1]
196                                 if jsonField == "" {
197                                         continue
198                                 }
199                                 if _, err = wr.WriteFields(recfile.Field{
200                                         Name:  metadataFieldToRecField(recField),
201                                         Value: jsonField,
202                                 }); err != nil {
203                                         log.Fatalln(err)
204                                 }
205                         }
206
207                         for _, m := range []RecFieldToValuesMap{
208                                 {MetadataFieldClassifier, metaStripped.Info.Classifier},
209                                 {MetadataFieldRequiresDist, metaStripped.Info.RequiresDist},
210                         } {
211                                 for _, v := range m.jsonFields {
212                                         if _, err = wr.WriteFields(recfile.Field{
213                                                 Name:  metadataFieldToRecField(m.recField),
214                                                 Value: v,
215                                         }); err != nil {
216                                                 log.Fatalln(err)
217                                         }
218                                 }
219                         }
220                         description = metaStripped.Info.Description
221                         allReleases = metaStripped.Releases
222                 }
223                 lines := strings.Split(description, "\n")
224                 if len(lines) > 0 {
225                         if _, err = wr.WriteFieldMultiline(
226                                 MetadataFieldDescription, lines,
227                         ); err != nil {
228                                 log.Fatalln(err)
229                         }
230                 }
231
232                 if !mkdirForPkg(w, r, pkgName) {
233                         return false
234                 }
235                 path := filepath.Join(dirPath, MetadataFile)
236                 existing, err := ioutil.ReadFile(path)
237                 if err != nil || bytes.Compare(existing, buf.Bytes()) != 0 {
238                         if err = WriteFileSync(dirPath, path, buf.Bytes(), now); err != nil {
239                                 log.Println("error", r.RemoteAddr, "refresh-json", path, err)
240                                 http.Error(w, err.Error(), http.StatusInternalServerError)
241                                 return false
242                         }
243                         log.Println(r.RemoteAddr, "pypi", pkgName+"."+MetadataFile, "touch")
244                 }
245         }
246         mtimes := make(map[string]time.Time)
247         for _, releases := range allReleases {
248                 for _, rel := range releases {
249                         if rel.Filename == "" || rel.UploadTimeISO8601 == "" {
250                                 continue
251                         }
252                         t, err := time.Parse(time.RFC3339Nano, rel.UploadTimeISO8601)
253                         if err != nil {
254                                 log.Println(
255                                         "error", r.RemoteAddr, "refresh-json", pkgName,
256                                         "can not parse upload_time:", err,
257                                 )
258                                 http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
259                                 return false
260                         }
261                         mtimes[rel.Filename] = t.Truncate(time.Second)
262                 }
263         }
264
265         resp, err := c.Do(agentedReq(*PyPIURL + pkgName + "/"))
266         if err != nil {
267                 log.Println("error", r.RemoteAddr, "refresh", pkgName, err)
268                 http.Error(w, err.Error(), http.StatusBadGateway)
269                 return false
270         }
271         if resp.StatusCode != http.StatusOK {
272                 resp.Body.Close()
273                 log.Println(
274                         "error", r.RemoteAddr, "refresh", pkgName,
275                         "HTTP status:", resp.Status,
276                 )
277                 http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
278                 return false
279         }
280         body, err := ioutil.ReadAll(resp.Body)
281         resp.Body.Close()
282         if err != nil {
283                 log.Println("error", r.RemoteAddr, "refresh", pkgName, err)
284                 http.Error(w, err.Error(), http.StatusBadGateway)
285                 return false
286         }
287         if !mkdirForPkg(w, r, pkgName) {
288                 return false
289         }
290         for _, lineRaw := range bytes.Split(body, []byte("\n")) {
291                 submatches := PkgPyPI.FindStringSubmatch(string(lineRaw))
292                 if len(submatches) == 0 {
293                         continue
294                 }
295                 uri := submatches[1]
296                 filename := submatches[2]
297                 pkgURL, err := url.Parse(uri)
298                 if err != nil {
299                         log.Println("error", r.RemoteAddr, "refresh", uri, err)
300                         http.Error(w, err.Error(), http.StatusBadGateway)
301                         return false
302                 }
303
304                 if pkgURL.Fragment == "" {
305                         log.Println(r.RemoteAddr, "pypi", filename, "no digest")
306                         http.Error(w, "no digest provided", http.StatusBadGateway)
307                         return false
308                 }
309                 digestInfo := strings.Split(pkgURL.Fragment, "=")
310                 if len(digestInfo) == 1 {
311                         // Ancient non PEP-0503 PyPIs, assume MD5
312                         digestInfo = []string{"md5", digestInfo[0]}
313                 } else if len(digestInfo) != 2 {
314                         log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest")
315                         http.Error(w, "invalid digest provided", http.StatusBadGateway)
316                         return false
317                 }
318                 digest, err := hex.DecodeString(digestInfo[1])
319                 if err != nil {
320                         log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest")
321                         http.Error(w, err.Error(), http.StatusBadGateway)
322                         return false
323                 }
324                 hashAlgo := digestInfo[0]
325                 var hasherNew func() hash.Hash
326                 var hashSize int
327                 switch hashAlgo {
328                 case HashAlgoMD5:
329                         hasherNew = md5.New
330                         hashSize = md5.Size
331                 case HashAlgoSHA256:
332                         hasherNew = sha256.New
333                         hashSize = sha256.Size
334                 case HashAlgoSHA512:
335                         hasherNew = sha512.New
336                         hashSize = sha512.Size
337                 case HashAlgoBLAKE2b256:
338                         hasherNew = blake2b256New
339                         hashSize = blake2b.Size256
340                 default:
341                         log.Println("error", r.RemoteAddr, "pypi", filename, "unknown digest", hashAlgo)
342                         http.Error(w, "unknown digest algorithm", http.StatusBadGateway)
343                         return false
344                 }
345                 if len(digest) != hashSize {
346                         log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest length")
347                         http.Error(w, "invalid digest length", http.StatusBadGateway)
348                         return false
349                 }
350
351                 pkgURL.Fragment = ""
352                 if pkgURL.Host == "" {
353                         uri = PyPIURLParsed.ResolveReference(pkgURL).String()
354                 } else {
355                         uri = pkgURL.String()
356                 }
357                 mtime, mtimeExists := mtimes[filename]
358                 if !mtimeExists {
359                         mtime = now
360                 }
361
362                 path := filepath.Join(dirPath, filename)
363                 if filename == filenameGet {
364                         if Killed {
365                                 // Skip heavy remote call, when shutting down
366                                 http.Error(w, "shutting down", http.StatusInternalServerError)
367                                 return false
368                         }
369                         log.Println(r.RemoteAddr, "pypi", filename, "download")
370                         resp, err = c.Do(agentedReq(uri))
371                         if err != nil {
372                                 log.Println("error", r.RemoteAddr, "pypi", filename, "download", err)
373                                 http.Error(w, err.Error(), http.StatusBadGateway)
374                                 return false
375                         }
376                         defer resp.Body.Close()
377                         if resp.StatusCode != http.StatusOK {
378                                 log.Println(
379                                         "error", r.RemoteAddr,
380                                         "pypi", filename, "download",
381                                         "HTTP status:", resp.Status,
382                                 )
383                                 http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
384                                 return false
385                         }
386                         hasher := hasherNew()
387                         hasherSHA256 := sha256.New()
388                         hasherBLAKE2b256 := blake2b256New()
389                         dst, err := TempFile(dirPath)
390                         if err != nil {
391                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
392                                 http.Error(w, err.Error(), http.StatusInternalServerError)
393                                 return false
394                         }
395                         dstBuf := bufio.NewWriter(dst)
396                         wrs := []io.Writer{hasher, dstBuf}
397                         if hashAlgo != HashAlgoSHA256 {
398                                 wrs = append(wrs, hasherSHA256)
399                         }
400                         if hashAlgo != HashAlgoBLAKE2b256 {
401                                 wrs = append(wrs, hasherBLAKE2b256)
402                         }
403                         wr := io.MultiWriter(wrs...)
404                         if _, err = io.Copy(wr, resp.Body); err != nil {
405                                 os.Remove(dst.Name())
406                                 dst.Close()
407                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
408                                 http.Error(w, err.Error(), http.StatusInternalServerError)
409                                 return false
410                         }
411                         if err = dstBuf.Flush(); err != nil {
412                                 os.Remove(dst.Name())
413                                 dst.Close()
414                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
415                                 http.Error(w, err.Error(), http.StatusInternalServerError)
416                                 return false
417                         }
418                         if bytes.Compare(hasher.Sum(nil), digest) != 0 {
419                                 log.Println(r.RemoteAddr, "pypi", filename, "digest mismatch")
420                                 os.Remove(dst.Name())
421                                 dst.Close()
422                                 http.Error(w, "digest mismatch", http.StatusBadGateway)
423                                 return false
424                         }
425                         if !NoSync {
426                                 if err = dst.Sync(); err != nil {
427                                         os.Remove(dst.Name())
428                                         dst.Close()
429                                         log.Println("error", r.RemoteAddr, "pypi", filename, err)
430                                         http.Error(w, err.Error(), http.StatusInternalServerError)
431                                         return false
432                                 }
433                         }
434                         if err = dst.Close(); err != nil {
435                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
436                                 http.Error(w, err.Error(), http.StatusInternalServerError)
437                                 return false
438                         }
439                         if err = os.Chtimes(dst.Name(), mtime, mtime); err != nil {
440                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
441                                 http.Error(w, err.Error(), http.StatusInternalServerError)
442                         }
443                         if err = os.Rename(dst.Name(), path); err != nil {
444                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
445                                 http.Error(w, err.Error(), http.StatusInternalServerError)
446                                 return false
447                         }
448                         if err = DirSync(dirPath); err != nil {
449                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
450                                 http.Error(w, err.Error(), http.StatusInternalServerError)
451                                 return false
452                         }
453
454                         var digestSHA256 []byte
455                         var digestBLAKE2b256 []byte
456                         if hashAlgo == HashAlgoSHA256 {
457                                 digestSHA256 = hasher.Sum(nil)
458                         } else {
459                                 digestSHA256 = hasherSHA256.Sum(nil)
460                         }
461                         if hashAlgo == HashAlgoBLAKE2b256 {
462                                 digestBLAKE2b256 = hasher.Sum(nil)
463                         } else {
464                                 digestBLAKE2b256 = hasherBLAKE2b256.Sum(nil)
465                         }
466                         if err = WriteFileSync(
467                                 dirPath, path+"."+HashAlgoSHA256,
468                                 digestSHA256, mtime,
469                         ); err != nil {
470                                 log.Println(
471                                         "error", r.RemoteAddr, "pypi",
472                                         path+"."+HashAlgoSHA256, err,
473                                 )
474                                 http.Error(w, err.Error(), http.StatusInternalServerError)
475                                 return false
476                         }
477                         if err = WriteFileSync(
478                                 dirPath, path+"."+HashAlgoBLAKE2b256,
479                                 digestBLAKE2b256, mtime,
480                         ); err != nil {
481                                 log.Println(
482                                         "error", r.RemoteAddr, "pypi",
483                                         path+"."+HashAlgoBLAKE2b256, err,
484                                 )
485                                 http.Error(w, err.Error(), http.StatusInternalServerError)
486                                 return false
487                         }
488                         for _, algo := range KnownHashAlgos[2:] {
489                                 os.Remove(path + "." + algo)
490                         }
491                         digest = nil
492                 }
493                 if mtimeExists {
494                         stat, err := os.Stat(path)
495                         if err == nil && !stat.ModTime().Truncate(time.Second).Equal(mtime) {
496                                 log.Println(r.RemoteAddr, "pypi", filename, "touch")
497                                 if err = os.Chtimes(path, mtime, mtime); err != nil {
498                                         log.Println("error", r.RemoteAddr, "pypi", filename, err)
499                                         http.Error(w, err.Error(), http.StatusInternalServerError)
500                                 }
501                         }
502                 }
503
504                 if filename == filenameGet || gpgUpdate {
505                         if _, err = os.Stat(path); err != nil {
506                                 goto GPGSigSkip
507                         }
508                         resp, err := c.Do(agentedReq(uri + GPGSigExt))
509                         if err != nil {
510                                 goto GPGSigSkip
511                         }
512                         if resp.StatusCode != http.StatusOK {
513                                 resp.Body.Close()
514                                 goto GPGSigSkip
515                         }
516                         sig, err := ioutil.ReadAll(resp.Body)
517                         resp.Body.Close()
518                         if err != nil {
519                                 goto GPGSigSkip
520                         }
521                         if !bytes.HasPrefix(sig, []byte("-----BEGIN PGP SIGNATURE-----")) {
522                                 log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "non PGP")
523                                 goto GPGSigSkip
524                         }
525                         if err = WriteFileSync(dirPath, path+GPGSigExt, sig, mtime); err != nil {
526                                 log.Println("error", r.RemoteAddr, "pypi", filename+GPGSigExt, err)
527                                 http.Error(w, err.Error(), http.StatusInternalServerError)
528                                 return false
529                         }
530                         log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "downloaded")
531                 }
532                 if mtimeExists {
533                         stat, err := os.Stat(path + GPGSigExt)
534                         if err == nil && !stat.ModTime().Truncate(time.Second).Equal(mtime) {
535                                 log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "touch")
536                                 if err = os.Chtimes(path+GPGSigExt, mtime, mtime); err != nil {
537                                         log.Println("error", r.RemoteAddr, "pypi", filename, err)
538                                         http.Error(w, err.Error(), http.StatusInternalServerError)
539                                 }
540                         }
541                 }
542
543         GPGSigSkip:
544                 if digest == nil {
545                         continue
546                 }
547                 path = path + "." + hashAlgo
548                 stat, err := os.Stat(path)
549                 if err == nil &&
550                         (mtimeExists && stat.ModTime().Truncate(time.Second).Equal(mtime)) {
551                         continue
552                 }
553                 if err != nil && !os.IsNotExist(err) {
554                         log.Println("error", r.RemoteAddr, "pypi", path, err)
555                         http.Error(w, err.Error(), http.StatusInternalServerError)
556                         return false
557                 }
558                 log.Println(r.RemoteAddr, "pypi", filename, "touch")
559                 if err = WriteFileSync(dirPath, path, digest, mtime); err != nil {
560                         log.Println("error", r.RemoteAddr, "pypi", path, err)
561                         http.Error(w, err.Error(), http.StatusInternalServerError)
562                         return false
563                 }
564         }
565         return true
566 }