]> Cypherpunks.ru repositories - gocheese.git/blob - refresh.go
Metadata, mtime support. Massive refactoring
[gocheese.git] / refresh.go
1 /*
2 GoCheese -- Python private package repository and caching proxy
3 Copyright (C) 2019-2021 Sergey Matveev <stargrave@stargrave.org>
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, version 3 of the License.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 package main
19
20 import (
21         "bufio"
22         "bytes"
23         "crypto/md5"
24         "crypto/sha256"
25         "crypto/sha512"
26         "encoding/hex"
27         "encoding/json"
28         "hash"
29         "io"
30         "io/ioutil"
31         "log"
32         "net/http"
33         "net/url"
34         "os"
35         "path/filepath"
36         "regexp"
37         "strings"
38         "time"
39
40         "go.cypherpunks.ru/recfile"
41         "golang.org/x/crypto/blake2b"
42 )
43
44 const (
45         HashAlgoSHA256     = "sha256"
46         HashAlgoBLAKE2b256 = "blake2_256"
47         HashAlgoSHA512     = "sha512"
48         HashAlgoMD5        = "md5"
49         GPGSigExt          = ".asc"
50         InternalFlag       = ".internal"
51 )
52
53 var (
54         PkgPyPI           = regexp.MustCompile(`^.*<a href="([^"]+)"[^>]*>(.+)</a>.*$`)
55         PyPIURLParsed     *url.URL
56         PyPIHTTPTransport http.Transport
57         KnownHashAlgos    []string = []string{
58                 HashAlgoSHA256,
59                 HashAlgoBLAKE2b256,
60                 HashAlgoSHA512,
61                 HashAlgoMD5,
62         }
63 )
64
65 func blake2b256New() hash.Hash {
66         h, err := blake2b.New256(nil)
67         if err != nil {
68                 panic(err)
69         }
70         return h
71 }
72
73 func agentedReq(url string) *http.Request {
74         req, err := http.NewRequest("GET", url, nil)
75         if err != nil {
76                 log.Fatalln(err)
77         }
78         req.Header.Set("User-Agent", UserAgent)
79         return req
80 }
81
82 func refreshDir(
83         w http.ResponseWriter,
84         r *http.Request,
85         pkgName, filenameGet string,
86         gpgUpdate bool,
87 ) bool {
88         if _, err := os.Stat(filepath.Join(*Root, pkgName, InternalFlag)); err == nil {
89                 return true
90         }
91         c := http.Client{Transport: &PyPIHTTPTransport}
92         dirPath := filepath.Join(*Root, pkgName)
93         now := time.Now()
94
95         var allReleases map[string][]*PkgReleaseInfo
96         if *JSONURL != "" {
97                 resp, err := c.Do(agentedReq(*JSONURL + pkgName + "/json"))
98                 if err != nil {
99                         log.Println("error", r.RemoteAddr, "refresh-json", pkgName, err)
100                         http.Error(w, err.Error(), http.StatusBadGateway)
101                         return false
102                 }
103                 if resp.StatusCode != http.StatusOK {
104                         resp.Body.Close()
105                         log.Println(
106                                 "error", r.RemoteAddr, "refresh-json", pkgName,
107                                 "HTTP status:", resp.Status,
108                         )
109                         http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
110                         return false
111                 }
112                 body, err := ioutil.ReadAll(resp.Body)
113                 resp.Body.Close()
114                 var buf bytes.Buffer
115                 var description string
116                 wr := recfile.NewWriter(&buf)
117                 var meta PkgMeta
118                 err = json.Unmarshal(body, &meta)
119                 if err == nil {
120                         for recField, jsonField := range map[string]string{
121                                 MetadataFieldName:                   meta.Info.Name,
122                                 MetadataFieldVersion:                meta.Info.Version,
123                                 MetadataFieldSummary:                meta.Info.Summary,
124                                 MetadataFieldDescriptionContentType: meta.Info.DescriptionContentType,
125                                 MetadataFieldKeywords:               meta.Info.Keywords,
126                                 MetadataFieldHomePage:               meta.Info.HomePage,
127                                 MetadataFieldAuthor:                 meta.Info.Author,
128                                 MetadataFieldAuthorEmail:            meta.Info.AuthorEmail,
129                                 MetadataFieldMaintainer:             meta.Info.Maintainer,
130                                 MetadataFieldMaintainerEmail:        meta.Info.MaintainerEmail,
131                                 MetadataFieldLicense:                meta.Info.License,
132                                 MetadataFieldRequiresPython:         meta.Info.RequiresPython,
133                         } {
134                                 if jsonField == "" {
135                                         continue
136                                 }
137                                 if _, err = wr.WriteFields(recfile.Field{
138                                         Name:  metadataFieldToRecField(recField),
139                                         Value: jsonField,
140                                 }); err != nil {
141                                         log.Fatalln(err)
142                                 }
143                         }
144                         for recField, jsonFields := range map[string][]string{
145                                 MetadataFieldClassifier:        meta.Info.Classifier,
146                                 MetadataFieldPlatform:          meta.Info.Platform,
147                                 MetadataFieldSupportedPlatform: meta.Info.SupportedPlatform,
148                                 MetadataFieldRequiresDist:      meta.Info.RequiresDist,
149                                 MetadataFieldRequiresExternal:  meta.Info.RequiresExternal,
150                                 MetadataFieldProjectURL:        meta.Info.ProjectURL,
151                                 MetadataFieldProvidesExtra:     meta.Info.ProvidesExtra,
152                         } {
153                                 for _, v := range jsonFields {
154                                         if _, err = wr.WriteFields(recfile.Field{
155                                                 Name:  metadataFieldToRecField(recField),
156                                                 Value: v,
157                                         }); err != nil {
158                                                 log.Fatalln(err)
159                                         }
160                                 }
161                         }
162                         description = meta.Info.Description
163                         allReleases = meta.Releases
164                 } else {
165                         var metaStripped PkgMetaStripped
166                         err = json.Unmarshal(body, &metaStripped)
167                         if err != nil {
168                                 log.Println(
169                                         "error", r.RemoteAddr, "refresh-json", pkgName,
170                                         "can not parse JSON:", err,
171                                 )
172                                 http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
173                                 return false
174                         }
175                         for recField, jsonField := range map[string]string{
176                                 MetadataFieldName:                   metaStripped.Info.Name,
177                                 MetadataFieldVersion:                metaStripped.Info.Version,
178                                 MetadataFieldSummary:                metaStripped.Info.Summary,
179                                 MetadataFieldDescriptionContentType: metaStripped.Info.DescriptionContentType,
180                                 MetadataFieldKeywords:               metaStripped.Info.Keywords,
181                                 MetadataFieldHomePage:               metaStripped.Info.HomePage,
182                                 MetadataFieldAuthor:                 metaStripped.Info.Author,
183                                 MetadataFieldAuthorEmail:            metaStripped.Info.AuthorEmail,
184                                 MetadataFieldMaintainer:             metaStripped.Info.Maintainer,
185                                 MetadataFieldMaintainerEmail:        metaStripped.Info.MaintainerEmail,
186                                 MetadataFieldLicense:                metaStripped.Info.License,
187                                 MetadataFieldRequiresPython:         metaStripped.Info.RequiresPython,
188                         } {
189                                 if jsonField == "" {
190                                         continue
191                                 }
192                                 if _, err = wr.WriteFields(recfile.Field{
193                                         Name:  metadataFieldToRecField(recField),
194                                         Value: jsonField,
195                                 }); err != nil {
196                                         log.Fatalln(err)
197                                 }
198                         }
199
200                         for recField, jsonFields := range map[string][]string{
201                                 MetadataFieldClassifier:   metaStripped.Info.Classifier,
202                                 MetadataFieldRequiresDist: metaStripped.Info.RequiresDist,
203                         } {
204                                 for _, v := range jsonFields {
205                                         if _, err = wr.WriteFields(recfile.Field{
206                                                 Name:  metadataFieldToRecField(recField),
207                                                 Value: v,
208                                         }); err != nil {
209                                                 log.Fatalln(err)
210                                         }
211                                 }
212                         }
213                         description = metaStripped.Info.Description
214                         allReleases = metaStripped.Releases
215                 }
216                 lines := strings.Split(description, "\n")
217                 if len(lines) > 0 {
218                         if _, err = wr.WriteFieldMultiline(
219                                 MetadataFieldDescription, lines,
220                         ); err != nil {
221                                 log.Fatalln(err)
222                         }
223                 }
224
225                 if !mkdirForPkg(w, r, pkgName) {
226                         return false
227                 }
228                 path := filepath.Join(dirPath, MetadataFile)
229                 existing, err := ioutil.ReadFile(path)
230                 if err != nil || bytes.Compare(existing, buf.Bytes()) != 0 {
231                         if err = WriteFileSync(dirPath, path, buf.Bytes(), now); err != nil {
232                                 log.Println("error", r.RemoteAddr, "refresh-json", path, err)
233                                 http.Error(w, err.Error(), http.StatusInternalServerError)
234                                 return false
235                         }
236                         log.Println(r.RemoteAddr, "pypi", pkgName+"."+MetadataFile, "touch")
237                 }
238         }
239         mtimes := make(map[string]time.Time)
240         for _, releases := range allReleases {
241                 for _, rel := range releases {
242                         if rel.Filename == "" || rel.UploadTimeISO8601 == "" {
243                                 continue
244                         }
245                         t, err := time.Parse(time.RFC3339Nano, rel.UploadTimeISO8601)
246                         if err != nil {
247                                 log.Println(
248                                         "error", r.RemoteAddr, "refresh-json", pkgName,
249                                         "can not parse upload_time:", err,
250                                 )
251                                 http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
252                                 return false
253                         }
254                         mtimes[rel.Filename] = t.Truncate(time.Second)
255                 }
256         }
257
258         resp, err := c.Do(agentedReq(*PyPIURL + pkgName + "/"))
259         if err != nil {
260                 log.Println("error", r.RemoteAddr, "refresh", pkgName, err)
261                 http.Error(w, err.Error(), http.StatusBadGateway)
262                 return false
263         }
264         if resp.StatusCode != http.StatusOK {
265                 resp.Body.Close()
266                 log.Println(
267                         "error", r.RemoteAddr, "refresh", pkgName,
268                         "HTTP status:", resp.Status,
269                 )
270                 http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
271                 return false
272         }
273         body, err := ioutil.ReadAll(resp.Body)
274         resp.Body.Close()
275         if err != nil {
276                 log.Println("error", r.RemoteAddr, "refresh", pkgName, err)
277                 http.Error(w, err.Error(), http.StatusBadGateway)
278                 return false
279         }
280         if !mkdirForPkg(w, r, pkgName) {
281                 return false
282         }
283         for _, lineRaw := range bytes.Split(body, []byte("\n")) {
284                 submatches := PkgPyPI.FindStringSubmatch(string(lineRaw))
285                 if len(submatches) == 0 {
286                         continue
287                 }
288                 uri := submatches[1]
289                 filename := submatches[2]
290                 pkgURL, err := url.Parse(uri)
291                 if err != nil {
292                         log.Println("error", r.RemoteAddr, "refresh", uri, err)
293                         http.Error(w, err.Error(), http.StatusBadGateway)
294                         return false
295                 }
296
297                 if pkgURL.Fragment == "" {
298                         log.Println(r.RemoteAddr, "pypi", filename, "no digest")
299                         http.Error(w, "no digest provided", http.StatusBadGateway)
300                         return false
301                 }
302                 digestInfo := strings.Split(pkgURL.Fragment, "=")
303                 if len(digestInfo) == 1 {
304                         // Ancient non PEP-0503 PyPIs, assume MD5
305                         digestInfo = []string{"md5", digestInfo[0]}
306                 } else if len(digestInfo) != 2 {
307                         log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest")
308                         http.Error(w, "invalid digest provided", http.StatusBadGateway)
309                         return false
310                 }
311                 digest, err := hex.DecodeString(digestInfo[1])
312                 if err != nil {
313                         log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest")
314                         http.Error(w, err.Error(), http.StatusBadGateway)
315                         return false
316                 }
317                 hashAlgo := digestInfo[0]
318                 var hasherNew func() hash.Hash
319                 var hashSize int
320                 switch hashAlgo {
321                 case HashAlgoMD5:
322                         hasherNew = md5.New
323                         hashSize = md5.Size
324                 case HashAlgoSHA256:
325                         hasherNew = sha256.New
326                         hashSize = sha256.Size
327                 case HashAlgoSHA512:
328                         hasherNew = sha512.New
329                         hashSize = sha512.Size
330                 case HashAlgoBLAKE2b256:
331                         hasherNew = blake2b256New
332                         hashSize = blake2b.Size256
333                 default:
334                         log.Println("error", r.RemoteAddr, "pypi", filename, "unknown digest", hashAlgo)
335                         http.Error(w, "unknown digest algorithm", http.StatusBadGateway)
336                         return false
337                 }
338                 if len(digest) != hashSize {
339                         log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest length")
340                         http.Error(w, "invalid digest length", http.StatusBadGateway)
341                         return false
342                 }
343
344                 pkgURL.Fragment = ""
345                 if pkgURL.Host == "" {
346                         uri = PyPIURLParsed.ResolveReference(pkgURL).String()
347                 } else {
348                         uri = pkgURL.String()
349                 }
350                 mtime, mtimeExists := mtimes[filename]
351                 if !mtimeExists {
352                         mtime = now
353                 }
354
355                 path := filepath.Join(dirPath, filename)
356                 if filename == filenameGet {
357                         if Killed {
358                                 // Skip heavy remote call, when shutting down
359                                 http.Error(w, "shutting down", http.StatusInternalServerError)
360                                 return false
361                         }
362                         log.Println(r.RemoteAddr, "pypi", filename, "download")
363                         resp, err = c.Do(agentedReq(uri))
364                         if err != nil {
365                                 log.Println("error", r.RemoteAddr, "pypi", filename, "download", err)
366                                 http.Error(w, err.Error(), http.StatusBadGateway)
367                                 return false
368                         }
369                         defer resp.Body.Close()
370                         if resp.StatusCode != http.StatusOK {
371                                 log.Println(
372                                         "error", r.RemoteAddr,
373                                         "pypi", filename, "download",
374                                         "HTTP status:", resp.Status,
375                                 )
376                                 http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
377                                 return false
378                         }
379                         hasher := hasherNew()
380                         hasherSHA256 := sha256.New()
381                         dst, err := TempFile(dirPath)
382                         if err != nil {
383                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
384                                 http.Error(w, err.Error(), http.StatusInternalServerError)
385                                 return false
386                         }
387                         dstBuf := bufio.NewWriter(dst)
388                         wrs := []io.Writer{hasher, dstBuf}
389                         if hashAlgo != HashAlgoSHA256 {
390                                 wrs = append(wrs, hasherSHA256)
391                         }
392                         wr := io.MultiWriter(wrs...)
393                         if _, err = io.Copy(wr, resp.Body); err != nil {
394                                 os.Remove(dst.Name())
395                                 dst.Close()
396                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
397                                 http.Error(w, err.Error(), http.StatusInternalServerError)
398                                 return false
399                         }
400                         if err = dstBuf.Flush(); err != nil {
401                                 os.Remove(dst.Name())
402                                 dst.Close()
403                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
404                                 http.Error(w, err.Error(), http.StatusInternalServerError)
405                                 return false
406                         }
407                         if bytes.Compare(hasher.Sum(nil), digest) != 0 {
408                                 log.Println(r.RemoteAddr, "pypi", filename, "digest mismatch")
409                                 os.Remove(dst.Name())
410                                 dst.Close()
411                                 http.Error(w, "digest mismatch", http.StatusBadGateway)
412                                 return false
413                         }
414                         if !NoSync {
415                                 if err = dst.Sync(); err != nil {
416                                         os.Remove(dst.Name())
417                                         dst.Close()
418                                         log.Println("error", r.RemoteAddr, "pypi", filename, err)
419                                         http.Error(w, err.Error(), http.StatusInternalServerError)
420                                         return false
421                                 }
422                         }
423                         if err = dst.Close(); err != nil {
424                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
425                                 http.Error(w, err.Error(), http.StatusInternalServerError)
426                                 return false
427                         }
428                         if err = os.Chtimes(dst.Name(), mtime, mtime); err != nil {
429                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
430                                 http.Error(w, err.Error(), http.StatusInternalServerError)
431                         }
432                         if err = os.Rename(dst.Name(), path); err != nil {
433                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
434                                 http.Error(w, err.Error(), http.StatusInternalServerError)
435                                 return false
436                         }
437                         if err = DirSync(dirPath); err != nil {
438                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
439                                 http.Error(w, err.Error(), http.StatusInternalServerError)
440                                 return false
441                         }
442                         if hashAlgo != HashAlgoSHA256 {
443                                 hashAlgo = HashAlgoSHA256
444                                 digest = hasherSHA256.Sum(nil)
445                                 for _, algo := range KnownHashAlgos[1:] {
446                                         os.Remove(path + "." + algo)
447                                 }
448                         }
449                 }
450                 if mtimeExists {
451                         stat, err := os.Stat(path)
452                         if err == nil && !stat.ModTime().Truncate(time.Second).Equal(mtime) {
453                                 log.Println(r.RemoteAddr, "pypi", filename, "touch")
454                                 if err = os.Chtimes(path, mtime, mtime); err != nil {
455                                         log.Println("error", r.RemoteAddr, "pypi", filename, err)
456                                         http.Error(w, err.Error(), http.StatusInternalServerError)
457                                 }
458                         }
459                 }
460
461                 if filename == filenameGet || gpgUpdate {
462                         if _, err = os.Stat(path); err != nil {
463                                 goto GPGSigSkip
464                         }
465                         resp, err := c.Do(agentedReq(uri + GPGSigExt))
466                         if err != nil {
467                                 goto GPGSigSkip
468                         }
469                         if resp.StatusCode != http.StatusOK {
470                                 resp.Body.Close()
471                                 goto GPGSigSkip
472                         }
473                         sig, err := ioutil.ReadAll(resp.Body)
474                         resp.Body.Close()
475                         if err != nil {
476                                 goto GPGSigSkip
477                         }
478                         if !bytes.HasPrefix(sig, []byte("-----BEGIN PGP SIGNATURE-----")) {
479                                 log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "non PGP")
480                                 goto GPGSigSkip
481                         }
482                         if err = WriteFileSync(dirPath, path+GPGSigExt, sig, mtime); err != nil {
483                                 log.Println("error", r.RemoteAddr, "pypi", filename+GPGSigExt, err)
484                                 http.Error(w, err.Error(), http.StatusInternalServerError)
485                                 return false
486                         }
487                         log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "downloaded")
488                 }
489                 if mtimeExists {
490                         stat, err := os.Stat(path + GPGSigExt)
491                         if err == nil && !stat.ModTime().Truncate(time.Second).Equal(mtime) {
492                                 log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "touch")
493                                 if err = os.Chtimes(path+GPGSigExt, mtime, mtime); err != nil {
494                                         log.Println("error", r.RemoteAddr, "pypi", filename, err)
495                                         http.Error(w, err.Error(), http.StatusInternalServerError)
496                                 }
497                         }
498                 }
499
500         GPGSigSkip:
501                 path = path + "." + hashAlgo
502                 stat, err := os.Stat(path)
503                 if err == nil &&
504                         (mtimeExists && stat.ModTime().Truncate(time.Second).Equal(mtime)) {
505                         continue
506                 }
507                 if err != nil && !os.IsNotExist(err) {
508                         log.Println("error", r.RemoteAddr, "pypi", path, err)
509                         http.Error(w, err.Error(), http.StatusInternalServerError)
510                         return false
511                 }
512                 log.Println(r.RemoteAddr, "pypi", filename, "touch")
513                 if err = WriteFileSync(dirPath, path, digest, mtime); err != nil {
514                         log.Println("error", r.RemoteAddr, "pypi", path, err)
515                         http.Error(w, err.Error(), http.StatusInternalServerError)
516                         return false
517                 }
518         }
519         return true
520 }