]> Cypherpunks.ru repositories - gocheese.git/blob - refresh.go
Warehouse renamed blake2_256 to blake2b_256
[gocheese.git] / refresh.go
1 /*
2 GoCheese -- Python private package repository and caching proxy
3 Copyright (C) 2019-2023 Sergey Matveev <stargrave@stargrave.org>
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, version 3 of the License.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 package main
19
20 import (
21         "bufio"
22         "bytes"
23         "crypto/md5"
24         "crypto/sha256"
25         "crypto/sha512"
26         "encoding/hex"
27         "encoding/json"
28         "errors"
29         "hash"
30         "io"
31         "log"
32         "net/http"
33         "net/url"
34         "os"
35         "path/filepath"
36         "regexp"
37         "strings"
38         "time"
39
40         "go.cypherpunks.ru/recfile"
41         "golang.org/x/crypto/blake2b"
42 )
43
44 const (
45         HashAlgoSHA256     = "sha256"
46         HashAlgoBLAKE2b256 = "blake2b_256"
47         HashAlgoSHA512     = "sha512"
48         HashAlgoMD5        = "md5"
49         InternalFlag       = ".internal"
50 )
51
52 var (
53         PkgPyPI           = regexp.MustCompile(`^.*<a href="([^"]+)"[^>]*>(.+)</a>.*$`)
54         PyPIURLParsed     *url.URL
55         PyPIHTTPTransport http.Transport
56         KnownHashAlgos    []string = []string{
57                 HashAlgoSHA256,
58                 HashAlgoBLAKE2b256,
59                 HashAlgoSHA512,
60                 HashAlgoMD5,
61         }
62 )
63
64 func blake2b256New() hash.Hash {
65         h, err := blake2b.New256(nil)
66         if err != nil {
67                 panic(err)
68         }
69         return h
70 }
71
72 func agentedReq(url string) *http.Request {
73         req, err := http.NewRequest("GET", url, nil)
74         if err != nil {
75                 log.Fatalln(err)
76         }
77         req.Header.Set("User-Agent", UserAgent)
78         return req
79 }
80
81 type RecFieldToValuesMap struct {
82         recField   string
83         jsonFields []string
84 }
85
86 func refreshDir(
87         w http.ResponseWriter,
88         r *http.Request,
89         pkgName, filenameGet string,
90 ) bool {
91         if _, err := os.Stat(filepath.Join(Root, pkgName, InternalFlag)); err == nil {
92                 return true
93         }
94         c := http.Client{Transport: &PyPIHTTPTransport}
95         dirPath := filepath.Join(Root, pkgName)
96         now := time.Now()
97
98         var allReleases map[string][]*PkgReleaseInfo
99         if *JSONURL != "" {
100                 resp, err := c.Do(agentedReq(*JSONURL + pkgName + "/json"))
101                 if err != nil {
102                         log.Println("error", r.RemoteAddr, "refresh-json", pkgName, err)
103                         http.Error(w, err.Error(), http.StatusBadGateway)
104                         return false
105                 }
106                 if resp.StatusCode != http.StatusOK {
107                         resp.Body.Close()
108                         log.Println(
109                                 "error", r.RemoteAddr, "refresh-json", pkgName,
110                                 "HTTP status:", resp.Status,
111                         )
112                         http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
113                         return false
114                 }
115                 body, err := io.ReadAll(resp.Body)
116                 if err != nil {
117                         resp.Body.Close()
118                         log.Println("error", r.RemoteAddr, "refresh-json", pkgName, err)
119                         http.Error(w, "can not read body", http.StatusBadGateway)
120                         return false
121                 }
122                 resp.Body.Close()
123                 var buf bytes.Buffer
124                 var description string
125                 wr := recfile.NewWriter(&buf)
126                 var meta PkgMeta
127                 err = json.Unmarshal(body, &meta)
128                 if err == nil {
129                         for _, m := range [][2]string{
130                                 {MDFieldName, meta.Info.Name},
131                                 {MDFieldVersion, meta.Info.Version},
132                                 {MDFieldSummary, meta.Info.Summary},
133                                 {MDFieldDescriptionContentType, meta.Info.DescriptionContentType},
134                                 {MDFieldKeywords, meta.Info.Keywords},
135                                 {MDFieldHomePage, meta.Info.HomePage},
136                                 {MDFieldAuthor, meta.Info.Author},
137                                 {MDFieldAuthorEmail, meta.Info.AuthorEmail},
138                                 {MDFieldMaintainer, meta.Info.Maintainer},
139                                 {MDFieldMaintainerEmail, meta.Info.MaintainerEmail},
140                                 {MDFieldLicense, meta.Info.License},
141                                 {MDFieldRequiresPython, meta.Info.RequiresPython},
142                         } {
143                                 recField, jsonField := m[0], m[1]
144                                 if jsonField == "" {
145                                         continue
146                                 }
147                                 if _, err = wr.WriteFields(recfile.Field{
148                                         Name:  MDFieldToRecField[recField],
149                                         Value: jsonField,
150                                 }); err != nil {
151                                         log.Fatalln(err)
152                                 }
153                         }
154                         for _, m := range []RecFieldToValuesMap{
155                                 {MDFieldClassifier, meta.Info.Classifier},
156                                 {MDFieldPlatform, meta.Info.Platform},
157                                 {MDFieldSupportedPlatform, meta.Info.SupportedPlatform},
158                                 {MDFieldRequiresDist, meta.Info.RequiresDist},
159                                 {MDFieldRequiresExternal, meta.Info.RequiresExternal},
160                                 {MDFieldProjectURL, meta.Info.ProjectURL},
161                                 {MDFieldProvidesExtra, meta.Info.ProvidesExtra},
162                         } {
163                                 for _, v := range m.jsonFields {
164                                         if _, err = wr.WriteFields(recfile.Field{
165                                                 Name:  MDFieldToRecField[m.recField],
166                                                 Value: v,
167                                         }); err != nil {
168                                                 log.Fatalln(err)
169                                         }
170                                 }
171                         }
172                         description = meta.Info.Description
173                         allReleases = meta.Releases
174                 } else {
175                         var metaStripped PkgMetaStripped
176                         err = json.Unmarshal(body, &metaStripped)
177                         if err != nil {
178                                 log.Println(
179                                         "error", r.RemoteAddr, "refresh-json", pkgName,
180                                         "can not parse JSON:", err,
181                                 )
182                                 http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
183                                 return false
184                         }
185                         for _, m := range [][2]string{
186                                 {MDFieldName, metaStripped.Info.Name},
187                                 {MDFieldVersion, metaStripped.Info.Version},
188                                 {MDFieldSummary, metaStripped.Info.Summary},
189                                 {MDFieldDescriptionContentType, metaStripped.Info.DescriptionContentType},
190                                 {MDFieldKeywords, metaStripped.Info.Keywords},
191                                 {MDFieldHomePage, metaStripped.Info.HomePage},
192                                 {MDFieldAuthor, metaStripped.Info.Author},
193                                 {MDFieldAuthorEmail, metaStripped.Info.AuthorEmail},
194                                 {MDFieldMaintainer, metaStripped.Info.Maintainer},
195                                 {MDFieldMaintainerEmail, metaStripped.Info.MaintainerEmail},
196                                 {MDFieldLicense, metaStripped.Info.License},
197                                 {MDFieldRequiresPython, metaStripped.Info.RequiresPython},
198                         } {
199                                 recField, jsonField := m[0], m[1]
200                                 if jsonField == "" {
201                                         continue
202                                 }
203                                 if _, err = wr.WriteFields(recfile.Field{
204                                         Name:  MDFieldToRecField[recField],
205                                         Value: jsonField,
206                                 }); err != nil {
207                                         log.Fatalln(err)
208                                 }
209                         }
210
211                         for _, m := range []RecFieldToValuesMap{
212                                 {MDFieldClassifier, metaStripped.Info.Classifier},
213                                 {MDFieldRequiresDist, metaStripped.Info.RequiresDist},
214                         } {
215                                 for _, v := range m.jsonFields {
216                                         if _, err = wr.WriteFields(recfile.Field{
217                                                 Name:  MDFieldToRecField[m.recField],
218                                                 Value: v,
219                                         }); err != nil {
220                                                 log.Fatalln(err)
221                                         }
222                                 }
223                         }
224                         description = metaStripped.Info.Description
225                         allReleases = metaStripped.Releases
226                 }
227                 lines := strings.Split(description, "\n")
228                 if len(lines) > 0 {
229                         if _, err = wr.WriteFieldMultiline(
230                                 MDFieldDescription, lines,
231                         ); err != nil {
232                                 log.Fatalln(err)
233                         }
234                 }
235
236                 if !mkdirForPkg(w, r, pkgName) {
237                         return false
238                 }
239                 path := filepath.Join(dirPath, MDFile)
240                 existing, err := os.ReadFile(path)
241                 if err != nil || !bytes.Equal(existing, buf.Bytes()) {
242                         if err = WriteFileSync(dirPath, path, buf.Bytes(), now); err != nil {
243                                 log.Println("error", r.RemoteAddr, "refresh-json", path, err)
244                                 http.Error(w, err.Error(), http.StatusInternalServerError)
245                                 return false
246                         }
247                         log.Println(r.RemoteAddr, "pypi", pkgName+"/"+MDFile, "touch")
248                 }
249         }
250         mtimes := make(map[string]time.Time)
251         for _, releases := range allReleases {
252                 for _, rel := range releases {
253                         if rel.Filename == "" || rel.UploadTimeISO8601 == "" {
254                                 continue
255                         }
256                         t, err := time.Parse(time.RFC3339Nano, rel.UploadTimeISO8601)
257                         if err != nil {
258                                 log.Println(
259                                         "error", r.RemoteAddr, "refresh-json", pkgName,
260                                         "can not parse upload_time:", err,
261                                 )
262                                 http.Error(w, "can not parse metadata JSON", http.StatusBadGateway)
263                                 return false
264                         }
265                         mtimes[rel.Filename] = t.Truncate(time.Second)
266                 }
267         }
268
269         resp, err := c.Do(agentedReq(*PyPIURL + pkgName + "/"))
270         if err != nil {
271                 log.Println("error", r.RemoteAddr, "refresh", pkgName, err)
272                 http.Error(w, err.Error(), http.StatusBadGateway)
273                 return false
274         }
275         if resp.StatusCode != http.StatusOK {
276                 resp.Body.Close()
277                 log.Println(
278                         "error", r.RemoteAddr, "refresh", pkgName,
279                         "HTTP status:", resp.Status,
280                 )
281                 http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
282                 return false
283         }
284         body, err := io.ReadAll(resp.Body)
285         resp.Body.Close()
286         if err != nil {
287                 log.Println("error", r.RemoteAddr, "refresh", pkgName, err)
288                 http.Error(w, err.Error(), http.StatusBadGateway)
289                 return false
290         }
291         if !mkdirForPkg(w, r, pkgName) {
292                 return false
293         }
294         for _, lineRaw := range bytes.Split(body, []byte("\n")) {
295                 submatches := PkgPyPI.FindStringSubmatch(string(lineRaw))
296                 if len(submatches) == 0 {
297                         continue
298                 }
299                 uri := submatches[1]
300                 filename := submatches[2]
301                 pkgURL, err := url.Parse(uri)
302                 if err != nil {
303                         log.Println("error", r.RemoteAddr, "refresh", uri, err)
304                         http.Error(w, err.Error(), http.StatusBadGateway)
305                         return false
306                 }
307
308                 if pkgURL.Fragment == "" {
309                         log.Println(r.RemoteAddr, "pypi", filename, "no digest")
310                         http.Error(w, "no digest provided", http.StatusBadGateway)
311                         return false
312                 }
313                 digestInfo := strings.Split(pkgURL.Fragment, "=")
314                 if len(digestInfo) == 1 {
315                         // Ancient non PEP-0503 PyPIs, assume MD5
316                         digestInfo = []string{"md5", digestInfo[0]}
317                 } else if len(digestInfo) != 2 {
318                         log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest")
319                         http.Error(w, "invalid digest provided", http.StatusBadGateway)
320                         return false
321                 }
322                 digest, err := hex.DecodeString(digestInfo[1])
323                 if err != nil {
324                         log.Println("error", r.RemoteAddr, "pypi", filename, "invalid digest")
325                         http.Error(w, err.Error(), http.StatusBadGateway)
326                         return false
327                 }
328                 hashAlgo := digestInfo[0]
329                 var hasherNew func() hash.Hash
330                 var hashSize int
331                 switch hashAlgo {
332                 case HashAlgoMD5:
333                         hasherNew = md5.New
334                         hashSize = md5.Size
335                 case HashAlgoSHA256:
336                         hasherNew = sha256.New
337                         hashSize = sha256.Size
338                 case HashAlgoSHA512:
339                         hasherNew = sha512.New
340                         hashSize = sha512.Size
341                 case HashAlgoBLAKE2b256:
342                         hasherNew = blake2b256New
343                         hashSize = blake2b.Size256
344                 default:
345                         log.Println(
346                                 "error", r.RemoteAddr, "pypi",
347                                 filename, "unknown digest", hashAlgo,
348                         )
349                         http.Error(w, "unknown digest algorithm", http.StatusBadGateway)
350                         return false
351                 }
352                 if len(digest) != hashSize {
353                         log.Println(
354                                 "error", r.RemoteAddr, "pypi",
355                                 filename, "invalid digest length")
356                         http.Error(w, "invalid digest length", http.StatusBadGateway)
357                         return false
358                 }
359
360                 pkgURL.Fragment = ""
361                 if pkgURL.Host == "" {
362                         uri = PyPIURLParsed.ResolveReference(pkgURL).String()
363                 } else {
364                         uri = pkgURL.String()
365                 }
366                 mtime, mtimeExists := mtimes[filename]
367                 if !mtimeExists {
368                         mtime = now
369                 }
370
371                 path := filepath.Join(dirPath, filename)
372                 if filename == filenameGet {
373                         if Killed {
374                                 // Skip heavy remote call, when shutting down
375                                 http.Error(w, "shutting down", http.StatusInternalServerError)
376                                 return false
377                         }
378                         log.Println(r.RemoteAddr, "pypi", filename, "download")
379                         resp, err = c.Do(agentedReq(uri))
380                         if err != nil {
381                                 log.Println("error", r.RemoteAddr, "pypi", filename, "download", err)
382                                 http.Error(w, err.Error(), http.StatusBadGateway)
383                                 return false
384                         }
385                         defer resp.Body.Close()
386                         if resp.StatusCode != http.StatusOK {
387                                 log.Println(
388                                         "error", r.RemoteAddr,
389                                         "pypi", filename, "download",
390                                         "HTTP status:", resp.Status,
391                                 )
392                                 http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway)
393                                 return false
394                         }
395                         hasher := hasherNew()
396                         hasherSHA256 := sha256.New()
397                         hasherBLAKE2b256 := blake2b256New()
398                         dst, err := TempFile(dirPath)
399                         if err != nil {
400                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
401                                 http.Error(w, err.Error(), http.StatusInternalServerError)
402                                 return false
403                         }
404                         dstBuf := bufio.NewWriter(dst)
405                         wrs := []io.Writer{hasher, dstBuf}
406                         if hashAlgo != HashAlgoSHA256 {
407                                 wrs = append(wrs, hasherSHA256)
408                         }
409                         if hashAlgo != HashAlgoBLAKE2b256 {
410                                 wrs = append(wrs, hasherBLAKE2b256)
411                         }
412                         wr := io.MultiWriter(wrs...)
413                         if _, err = io.Copy(wr, resp.Body); err != nil {
414                                 os.Remove(dst.Name())
415                                 dst.Close()
416                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
417                                 http.Error(w, err.Error(), http.StatusInternalServerError)
418                                 return false
419                         }
420                         if err = dstBuf.Flush(); err != nil {
421                                 os.Remove(dst.Name())
422                                 dst.Close()
423                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
424                                 http.Error(w, err.Error(), http.StatusInternalServerError)
425                                 return false
426                         }
427                         if !bytes.Equal(hasher.Sum(nil), digest) {
428                                 log.Println(r.RemoteAddr, "pypi", filename, "digest mismatch")
429                                 os.Remove(dst.Name())
430                                 dst.Close()
431                                 http.Error(w, "digest mismatch", http.StatusBadGateway)
432                                 return false
433                         }
434                         if digestStored, err := os.ReadFile(path + "." + hashAlgo); err == nil &&
435                                 !bytes.Equal(digest, digestStored) {
436                                 err = errors.New("stored digest mismatch")
437                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
438                                 os.Remove(dst.Name())
439                                 dst.Close()
440                                 http.Error(w, err.Error(), http.StatusInternalServerError)
441                                 return false
442                         }
443                         if !NoSync {
444                                 if err = dst.Sync(); err != nil {
445                                         os.Remove(dst.Name())
446                                         dst.Close()
447                                         log.Println("error", r.RemoteAddr, "pypi", filename, err)
448                                         http.Error(w, err.Error(), http.StatusInternalServerError)
449                                         return false
450                                 }
451                         }
452                         if err = dst.Close(); err != nil {
453                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
454                                 http.Error(w, err.Error(), http.StatusInternalServerError)
455                                 return false
456                         }
457                         if err = os.Chtimes(dst.Name(), mtime, mtime); err != nil {
458                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
459                                 http.Error(w, err.Error(), http.StatusInternalServerError)
460                         }
461                         if err = os.Rename(dst.Name(), path); err != nil {
462                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
463                                 http.Error(w, err.Error(), http.StatusInternalServerError)
464                                 return false
465                         }
466                         if err = DirSync(dirPath); err != nil {
467                                 log.Println("error", r.RemoteAddr, "pypi", filename, err)
468                                 http.Error(w, err.Error(), http.StatusInternalServerError)
469                                 return false
470                         }
471
472                         var digestSHA256 []byte
473                         var digestBLAKE2b256 []byte
474                         if hashAlgo == HashAlgoSHA256 {
475                                 digestSHA256 = hasher.Sum(nil)
476                         } else {
477                                 digestSHA256 = hasherSHA256.Sum(nil)
478                         }
479                         if hashAlgo == HashAlgoBLAKE2b256 {
480                                 digestBLAKE2b256 = hasher.Sum(nil)
481                         } else {
482                                 digestBLAKE2b256 = hasherBLAKE2b256.Sum(nil)
483                         }
484                         if err = WriteFileSync(
485                                 dirPath, path+"."+HashAlgoSHA256,
486                                 digestSHA256, mtime,
487                         ); err != nil {
488                                 log.Println(
489                                         "error", r.RemoteAddr, "pypi",
490                                         path+"."+HashAlgoSHA256, err,
491                                 )
492                                 http.Error(w, err.Error(), http.StatusInternalServerError)
493                                 return false
494                         }
495                         if err = WriteFileSync(
496                                 dirPath, path+"."+HashAlgoBLAKE2b256,
497                                 digestBLAKE2b256, mtime,
498                         ); err != nil {
499                                 log.Println(
500                                         "error", r.RemoteAddr, "pypi",
501                                         path+"."+HashAlgoBLAKE2b256, err,
502                                 )
503                                 http.Error(w, err.Error(), http.StatusInternalServerError)
504                                 return false
505                         }
506                         for _, algo := range KnownHashAlgos[2:] {
507                                 os.Remove(path + "." + algo)
508                         }
509                         digest = nil
510                 }
511                 if mtimeExists {
512                         stat, err := os.Stat(path)
513                         if err == nil && !stat.ModTime().Truncate(time.Second).Equal(mtime) {
514                                 log.Println(r.RemoteAddr, "pypi", filename, "touch")
515                                 if err = os.Chtimes(path, mtime, mtime); err != nil {
516                                         log.Println("error", r.RemoteAddr, "pypi", filename, err)
517                                         http.Error(w, err.Error(), http.StatusInternalServerError)
518                                 }
519                         }
520                 }
521
522                 if digest == nil {
523                         continue
524                 }
525                 path = path + "." + hashAlgo
526                 stat, err := os.Stat(path)
527                 if err == nil && (!mtimeExists ||
528                         (mtimeExists && stat.ModTime().Truncate(time.Second).Equal(mtime))) {
529                         continue
530                 }
531                 if err != nil && !os.IsNotExist(err) {
532                         log.Println("error", r.RemoteAddr, "pypi", path, err)
533                         http.Error(w, err.Error(), http.StatusInternalServerError)
534                         return false
535                 }
536                 log.Println(r.RemoteAddr, "pypi", filename, "touch")
537                 if err = WriteFileSync(dirPath, path, digest, mtime); err != nil {
538                         log.Println("error", r.RemoteAddr, "pypi", path, err)
539                         http.Error(w, err.Error(), http.StatusInternalServerError)
540                         return false
541                 }
542         }
543         return true
544 }