From: Sergey Matveev Date: Sat, 25 Sep 2021 15:04:23 +0000 (+0300) Subject: Metadata, mtime support. Massive refactoring X-Git-Tag: v3.0.0~7 X-Git-Url: http://www.git.cypherpunks.ru/?p=gocheese.git;a=commitdiff_plain;h=60834a0713d5dcc6a9911511cb8618ce7358c824 Metadata, mtime support. Massive refactoring --- diff --git a/all.do b/all.do index 3a7a0f1..0b4598e 100644 --- a/all.do +++ b/all.do @@ -1 +1 @@ -redo-ifchange gocheese gocheese.info +redo-ifchange gocheese doc/gocheese.info diff --git a/clean.do b/clean.do index 07e0d6a..7a1ead6 100644 --- a/clean.do +++ b/clean.do @@ -1 +1,2 @@ -rm -f gocheese doc/clean VERSION +redo-ifchange doc/clean +rm -f gocheese VERSION diff --git a/contrib/pyshop2packages.sh b/contrib/pyshop2packages.sh index efdfbb3..98613f3 100755 --- a/contrib/pyshop2packages.sh +++ b/contrib/pyshop2packages.sh @@ -6,7 +6,7 @@ # with all Pyshop-downloaded/uploaded files, SHA256 checksums, # private package marks. Also it will call GoCheese's /simple/ API # for forcing metainformation update (necessary for storing SHA256 -# checksums missing in Pyshop). +# checksums missing in Pyshop). No metadata conversion is performed. pkgname() { perl -ne "s/[-_.]+/-/g ; print lc" @@ -17,7 +17,7 @@ pkgname() { ######################################################################## ctr=0 echo " -SELECT package.name, release_file.filename +SELECT package.name, release_file.filename, release_file.created_at FROM release_file JOIN release ON release.id = release_file.release_id JOIN package ON release.package_id = package.id @@ -27,6 +27,7 @@ ORDER BY package.name [ $(( $ctr % 100 )) -ne 0 ] || echo $ctr $pkginfo pkg=$(echo "$pkginfo" | cut -f1 | pkgname) filename=$(echo "$pkginfo" | cut -f2) + created=$(echo "$pkginfo" | cut -f3 | sed "s/ /T/") [ -n "$pkg" ] [ -n "$filename" ] src=$(echo $pkg | cut -c1)/$filename @@ -35,6 +36,7 @@ ORDER BY package.name [ -r $dst ] && continue || : mkdir -p packages/$pkg ln $src $dst + touch -d "$created" $dst done ######################################################################## @@ -44,6 +46,7 @@ for pkg in $(echo "SELECT name FROM package WHERE local = true" | sqlite3 pyshop cd packages/$(echo $pkg | pkgname) for f in * ; do sha256 < $f | xxd -r -p > $f.sha256 ; done touch .internal + touch -r $f $f.sha256 cd ../.. done diff --git a/doc/index.texi b/doc/index.texi index 93ee48a..0ba5721 100644 --- a/doc/index.texi +++ b/doc/index.texi @@ -17,7 +17,8 @@ It serves two purposes: @item proxying and caching of missing packages from upstream @url{https://pypi.org/, PyPI}, conforming to @url{https://www.python.org/dev/peps/pep-0503/, PEP-0503} - (Simple Repository API) + (Simple Repository API) and + @url{https://warehouse.pypa.io/api-reference/json.html, JSON API} @item hosting of private locally uploaded packages, conforming to @url{https://warehouse.pypa.io/api-reference/legacy/, Warehouse Legacy API} @end itemize @@ -29,30 +30,56 @@ Why could you like it and how it can be better to fit your needs? files per package. Package deletion, renaming, making it uploadable (private) is done with simple @command{mkdir}, @command{touch}, etc commands -@item Just single statically compiled Go binary +@item Just single statically compiled Go binary, UCSPI-TCP compatibility, + high performance (including HTTP/2, keepalives and TLS session resumption) @item No configuration file, but several simple command line arguments @item Consistency (because of atomic synced operations) and integrity - (because of SHA256 checksums stored nearby) + (because of cryptographic checksums stored nearby) +@item Package's metadata and created times storing for uploaded and + proxied packages @end itemize Initially it was created as a fork of @url{https://github.com/c4s4/cheeseshop, cheeseshop}, -but nearly all the code was rewritten. It has huge differences: +but nearly all the code was rewritten. It is aimed to be replacement for +@command{PyShop} lacking huge quantity of features, reliability and +consistency guarantees, workability without deprecated XML-RPC API, and +suffering performance. @itemize -@item Proxying and caching of missing packages, including GPG signatures -@item @url{https://pythonwheels.com/, Wheel} uploading support -@item Integrity check of proxied packages: MD5, SHA256, SHA512, BLAKE2b-256 -@item SHA256 checksums for stored packages -@item Verifying of SHA256 checksum for uploaded packages -@item Ability to authenticate upstream PyPI by its X.509 certificate's SPKI hash -@item Storing of uploaded GPG signatures -@item Secure Argon2i (or SHA256) stored passwords hashing -@item No YAML configuration, just command-line arguments -@item No package overwriting ability (as PyPI does too) -@item Graceful HTTP-server shutdown -@item Atomic packages store on filesystem -@item @url{https://cr.yp.to/ucspi-tcp.html, UCSPI-TCP} compatible mode + +@item Supports proxying and caching of non-internal packages from the +upstream PyPI installation. + +@item Supports uploading of internal packages through the standard +Warehouse API, including signatures, metadata and checksums. + +@item Supports @url{https://pythonwheels.com/, wheels}, GPG signatures, +@url{https://packaging.python.org/specifications/core-metadata/, Metadata} +with @url{https://www.python.org/dev/peps/pep-0566/, PEP-0566} compatible +conversion to JSON, multiple (MD5, SHA256, SHA512, BLAKE2b-256) integrity +checksums storing and verifying during uploading and proxying. + +@item Supports Simple and JSON APIs, being able to replace PyPI for all +downstream clients. + +@item Stores package's creation time with solicitude as @code{mtime}, +giving it in HTTP headers and JSON API. + +@item TLS and HTTP/2 capable transport to the upstream, with enabled +keepalives and session resumption TLS tickets. Graceful HTTP server +shutdown. Can work as a @url{https://cr.yp.to/ucspi-tcp.html, UCSPI-TCP} +service. + +@item Atomic and @code{fsync}ed reliable and consistent updates to the +filesystem. + +@item Has ability to authenticate upstream PyPI by its X.509 +certificate's SPKI hash. + +@item User authentication supports passwords either hashed with SHA256 +or strengthened with @url{https://datatracker.ietf.org/doc/html/rfc9106, Argon2i}. + @end itemize Also it contains @file{contrib/pyshop2packages.sh} migration script for @@ -68,19 +95,9 @@ Please send questions, bug reports and patches to @url{gocheese@@cypherpunks.ru} @insertcopying -@menu -* Install:: -* Usage:: -* Password authentication: Passwords. -* UCSPI-TCP:: -* TLS support: TLS. -* Storage format: Storage. -@end menu - @include install.texi @include usage.texi @include passwords.texi -@include ucspi.texi @include tls.texi @include storage.texi diff --git a/doc/internal.texi b/doc/internal.texi new file mode 100644 index 0000000..55dfe91 --- /dev/null +++ b/doc/internal.texi @@ -0,0 +1,10 @@ +@node Internal +@section Internal + +Internal packages are never refreshed from the upstream and they can be +@ref{Uploading, updated}. They are manually marked as internal/private: + +@example +$ mkdir packages/myprivatepkg +$ touch packages/myprivatepkg/.internal +@end example diff --git a/doc/proxy.texi b/doc/proxy.texi new file mode 100644 index 0000000..ef8d30e --- /dev/null +++ b/doc/proxy.texi @@ -0,0 +1,43 @@ +@node Proxying +@section Proxying + +By default GoCheese is configured to use PyPI: +@option{-pypi https://pypi.org/simple/}, +@option{-pypi-json https://pypi.org/pypi/} (if empty string, then do not +try to update the metadata). + +It gives several HTTP endpoints to work with: + +@table @asis + +@item @code{/simple/} (@option{-refresh} option) +Simple API entrypoint. Each access to it asks for upstream if any state +refresh is needed. Each refresh update the package's metadata (if +@option{-pypi-json} is enabled), available releases and their checksums. + +@item @code{/norefresh/} (@option{-norefresh} option) +Same as above, but does not refresh data from the upstream, completely +read only mode. + +@item @code{/gpgupdate/} (@option{-gpgupdate} option) +Refresh the package state from the upstream as above, but additionally +check and download missing GPG signatures. Intended to be used only +manually, for example after database migration. +It is probably useful to set @env{$GOCHEESE_NO_SYNC=1} environment +variable to turn off filesystem synchronization calls. + +@item @code{/pypi/} (@option{-json} option) +Read only (non refreshing) JSON API entrypoint, giving metadata for the +packages and releases. + +@item @code{/} and @code{/hr/*} +Those URLs give human readable packages listing and package information. + +@end table + +To use GoCheese as a proxy, just configure your @file{pip.conf}: + +@example +[install] +index-url = http://gocheese.host:8080/simple/ +@end example diff --git a/doc/storage.texi b/doc/storage.texi index 46e35bc..215db8d 100644 --- a/doc/storage.texi +++ b/doc/storage.texi @@ -6,6 +6,7 @@ Root directory has the following hierarchy: @verbatim root +-- public-package + | +- .metadata.rec | +- public-package-0.1.tar.gz.md5 | +- public-package-0.1.tar.gz.blake2_256 | +- public-package-0.1.1.tar.gz.blake2_256 @@ -14,6 +15,7 @@ root | +- public-package-0.2.tar.gz.sha256 +-- private-package | +- .internal + | +- .metadata.rec | +- private-package-0.1.tar.gz | +- private-package-0.1.tar.gz.asc | +- private-package-0.1.tar.gz.sha256 @@ -27,9 +29,13 @@ versions with checksums and write them in corresponding @file{.sha256}, @file{.blake2_256}, @file{.sha512}, @file{.md5} files. However no package package tarball is downloaded. +If JSON API is enabled, them metadata is also downloaded and stored in +@file{.metadata.rec} @url{https://www.gnu.org/software/recutils/, recfile}. +It fully resembles Core Metadata structure. + When you request for particular package version, then its tarball is -downloaded and verified against the stored checksum. But SHA256 is -forced to be stored and used later. +downloaded and verified against the stored checksum. But SHA256 is then +forcefully used later. For example @file{public-package} has @code{0.1} version, downloaded a long time ago with MD5 checksum. @code{0.1.1} version is downloaded more @@ -42,3 +48,5 @@ SHA256 recalculated checksum. Also upstream has corresponding @file{.internal} file. It can be uploaded and queries to it are not proxied to upstream PyPI. You have to create it manually. If you upload GPG signature, then it will be also stored. + +Each packages release file has @code{mtime} set to its upload time. diff --git a/doc/ucspi.texi b/doc/ucspi.texi index 56fb85f..45d8fe3 100644 --- a/doc/ucspi.texi +++ b/doc/ucspi.texi @@ -1,8 +1,8 @@ @node UCSPI-TCP -@unnumbered UCSPI-TCP +@section UCSPI-TCP You can use GoCheese as UCSPI-TCP service. For example running it also -under @command{daemontools}: +under @url{http://cr.yp.to/daemontools.html, daemontools}: @example # mkdir -p /var/service/.gocheese/log diff --git a/doc/upload.texi b/doc/upload.texi new file mode 100644 index 0000000..75b9848 --- /dev/null +++ b/doc/upload.texi @@ -0,0 +1,24 @@ +@node Uploading +@section Uploading + +Ordinary @url{https://pypi.org/project/twine/, twine} can be easily used: + +@example +$ twine upload + --repository-url http://gocheese.host:8080/simple/ \ + --username spam --password foo dist/tarball.tar.gz +@end example + +Also you can permanently configure it: + +@example +[pypi] +repository: https://gocheese.host/simple/ +username: spam +password: foo +@end example + +All metadata information sent by @command{twine} is stored on the disk. +Package creation time will be server's current time. If @command{twine} +send package checksums, then they are checked against. GPG signature +file is also saved. diff --git a/doc/usage.texi b/doc/usage.texi index a116bc9..e3dd242 100644 --- a/doc/usage.texi +++ b/doc/usage.texi @@ -1,50 +1,7 @@ @node Usage @unnumbered Usage -To use it for download purposes, just configure your @file{pip.conf}: - -@example -[install] -index-url = http://gocheese.host:8080/simple/ -@end example - -@option{-refresh} URL (@code{/simple/} by default) automatically -refreshes metainformation (available versions and their checksums) -from the upstream, when queried for package directory listing. -@option{-norefresh} prevents upstream queries. - -@option{-gpgupdate} is useful mainly for migrated for Pyshop migrated -repositories. It forces GPG signature files downloading for all existing -package files. - -You can upload packages to it with @url{https://pypi.org/project/twine/, twine}: - -@example -twine upload - --repository-url http://gocheese.host:8080/simple/ \ - --username spam \ - --password foo dist/tarball.tar.gz -@end example - -Or you can store it permanently in @file{.pypirc}: - -@example -[pypi] -repository: https://gocheese.host/simple/ -username: spam -password: foo -@end example - -If @command{twine} sends SHA256 checksum in the request, then uploaded -file is checked against it. - -Pay attention that you have to manually create corresponding private -package directory! You are not allowed to upload anything explicitly -flagged as internal package. - -It is advisable to run GoCheese under some kind of -@url{http://cr.yp.to/daemontools.html, daemontools}. - -@env{$GOCHEESE_NO_SYNC=1} environment variable turns off filesystem -synchronization calls, that could be useful when massively updating the -database. +@include proxy.texi +@include upload.texi +@include internal.texi +@include ucspi.texi diff --git a/fileutils.go b/fileutils.go index 03b852e..b37b99c 100644 --- a/fileutils.go +++ b/fileutils.go @@ -18,6 +18,8 @@ along with this program. If not, see . package main import ( + "log" + "net/http" "os" "path/filepath" "strconv" @@ -49,7 +51,7 @@ func DirSync(dirPath string) error { return fd.Close() } -func WriteFileSync(dirPath, filePath string, data []byte) error { +func WriteFileSync(dirPath, filePath string, data []byte, mtime time.Time) error { dst, err := TempFile(dirPath) if err != nil { return err @@ -67,8 +69,24 @@ func WriteFileSync(dirPath, filePath string, data []byte) error { } } dst.Close() + if err = os.Chtimes(dst.Name(), mtime, mtime); err != nil { + return err + } if err = os.Rename(dst.Name(), filePath); err != nil { return err } return DirSync(dirPath) } + +func mkdirForPkg(w http.ResponseWriter, r *http.Request, pkgName string) bool { + path := filepath.Join(*Root, pkgName) + if _, err := os.Stat(path); os.IsNotExist(err) { + if err = os.Mkdir(path, os.FileMode(0777)); err != nil { + log.Println("error", r.RemoteAddr, "mkdir", pkgName, err) + http.Error(w, err.Error(), http.StatusInternalServerError) + return false + } + log.Println(r.RemoteAddr, "mkdir", pkgName) + } + return true +} diff --git a/go.mod b/go.mod index a3ad4e2..0669056 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module go.cypherpunks.ru/gocheese/v2 go 1.12 require ( - golang.org/x/crypto v0.0.0-20201221181555-eec23a3978ad + go.cypherpunks.ru/recfile v0.4.3 golang.org/x/crypto v0.0.0-20210921155107-089bfa567519 golang.org/x/net v0.0.0-20210924054057-cf34111cab4d ) diff --git a/go.sum b/go.sum index 88ff880..6b5296b 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,5 @@ -golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/crypto v0.0.0-20201221181555-eec23a3978ad h1:DN0cp81fZ3njFcrLCytUHRSUkqBjfTo4Tx9RJTWs0EY= +go.cypherpunks.ru/recfile v0.4.3 h1:ephokihmV//p0ob6gx2FWXvm28/NBDbWTOJPUNahxO8= +go.cypherpunks.ru/recfile v0.4.3/go.mod h1:sR+KajB+vzofL3SFVFwKt3Fke0FaCcN1g3YPNAhU3qI= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519 h1:7I4JAnoQBe7ZtJcBaYHi5UtiO8tQHbUSXxL+pnGRANg= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= diff --git a/hr.go b/hr.go new file mode 100644 index 0000000..845a13e --- /dev/null +++ b/hr.go @@ -0,0 +1,184 @@ +/* +GoCheese -- Python private package repository and caching proxy +Copyright (C) 2019-2021 Sergey Matveev + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, version 3 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +*/ + +package main + +import ( + "bytes" + "html/template" + "io/ioutil" + "log" + "net/http" + "os" + "sort" + "strings" +) + +var ( + HRRootTmpl = template.Must(template.New("hr-root").Parse(` + + + {{.Version}}: human readable listing + + +
    {{range .Packages}} +
  • {{.}}
  • +{{- end}} +
+ + +`)) + HRPkgTmpl = template.Must(template.New("hr-pkg").Parse(` + + + {{.Version}}: package {{.PkgName}} + + +
+ {{with .Info.Name}}
Name
{{.}}
{{end}} + {{with .Info.Version}}
Version
{{.}}
{{end}} + + {{with .Info.Platform}}
Platform
    + {{range .}}
  • {{.}}
  • + {{end}}
{{end}} + + {{with .Info.SupportedPlatform}}
SupportedPlatform
    + {{range .}}
  • {{.}}
  • + {{end}}
{{end}} + +
Summary
{{.Info.Summary}}
+
Description
+{{.Info.Description}}
+      
+ + {{with .Info.DescriptionContentType}}
DescriptionContentType
{{.}}
{{end}} + {{with .Info.Keywords}}
Keywords
{{.}}
{{end}} + {{with .Info.HomePage}}
HomePage
{{.}}
{{end}} + {{with .Info.Author}}
Author
{{.}}
{{end}} + {{with .Info.AuthorEmail}}
AuthorEmail
{{.}}
{{end}} + {{with .Info.Maintainer}}
Maintainer
{{.}}
{{end}} + {{with .Info.MaintainerEmail}}
MaintainerEmail
{{.}}
{{end}} + {{with .Info.License}}
License
{{.}}
{{end}} + + {{with .Info.Classifier}}
Classifier
    + {{range .}}
  • {{.}}
  • + {{end}}
{{end}} + + {{with .Info.RequiresDist}}
RequiresDist
    + {{range .}}
  • {{.}}
  • + {{end}}
{{end}} + + {{with .Info.RequiresPython}}
RequiresPython
{{.}}
{{end}} + + {{with .Info.RequiresExternal}}
RequiresExternal
    + {{range .}}
  • {{.}}
  • + {{end}}
{{end}} + + {{with .Info.ProjectURL}}
ProjectURL
    + {{range .}}
  • {{.}}
  • + {{end}}
{{end}} + + {{with .Info.ProvidesExtra}}
ProvidesExtra
    + {{range .}}
  • {{.}}
  • + {{end}}
{{end}} +
+ +

Releases

+ + + + + + + + + {{range .Releases}}{{if .Size}} + + + {{end}}{{end}} +
FilenameVersionUploadedSizeDigests
{{.Filename}} + {{.Version}} + {{.UploadTimeISO8601}} + {{.Size}} +
    {{range $a, $d := .Digests}} +
  • {{$a}}: {{$d}}
  • + {{end}}
+ + +`)) +) + +func serveHRRoot(w http.ResponseWriter, r *http.Request) { + files, err := ioutil.ReadDir(*Root) + if err != nil { + log.Println("error", r.RemoteAddr, "hr-root", err) + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + packages := make([]string, 0, len(files)) + for _, f := range files { + packages = append(packages, f.Name()) + } + sort.Strings(packages) + var buf bytes.Buffer + err = HRRootTmpl.Execute(&buf, struct { + Version string + Packages []string + }{ + Version: UserAgent, + Packages: packages, + }) + if err != nil { + log.Println("error", r.RemoteAddr, "hr-root", err) + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + w.Write(buf.Bytes()) +} + +func serveHRPkg(w http.ResponseWriter, r *http.Request) { + cols := strings.Split(strings.TrimRight(r.URL.Path, "/"), "/") + pkgName := cols[len(cols)-1] + meta, releases, err := getMetadata(pkgName, "") + if err != nil { + if os.IsNotExist(err) { + http.NotFound(w, r) + } else { + log.Println("error", r.RemoteAddr, "json", pkgName, err) + http.Error(w, err.Error(), http.StatusInternalServerError) + } + return + } + var buf bytes.Buffer + err = HRPkgTmpl.Execute(&buf, struct { + Version string + PkgName string + Info PkgInfo + Releases []*PkgReleaseInfo + }{ + Version: UserAgent, + PkgName: pkgName, + Info: meta.Info, + Releases: releases, + }) + if err != nil { + log.Println("error", r.RemoteAddr, "root", err) + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + w.Write(buf.Bytes()) +} diff --git a/integrity.go b/integrity.go index 7ca0a0c..1e0dddd 100644 --- a/integrity.go +++ b/integrity.go @@ -29,7 +29,7 @@ import ( ) func goodIntegrity() bool { - dirs, err := ioutil.ReadDir(*root) + dirs, err := ioutil.ReadDir(*Root) if err != nil { log.Fatal(err) } @@ -39,7 +39,7 @@ func goodIntegrity() bool { var data []byte var pkgName string for _, dir := range dirs { - files, err := ioutil.ReadDir(filepath.Join(*root, dir.Name())) + files, err := ioutil.ReadDir(filepath.Join(*Root, dir.Name())) if err != nil { log.Fatal(err) } @@ -48,7 +48,7 @@ func goodIntegrity() bool { continue } pkgName = strings.TrimSuffix(file.Name(), "."+HashAlgoSHA256) - data, err = ioutil.ReadFile(filepath.Join(*root, dir.Name(), pkgName)) + data, err = ioutil.ReadFile(filepath.Join(*Root, dir.Name(), pkgName)) if err != nil { if os.IsNotExist(err) { continue @@ -56,7 +56,7 @@ func goodIntegrity() bool { log.Fatal(err) } hasher.Write(data) - data, err = ioutil.ReadFile(filepath.Join(*root, dir.Name(), file.Name())) + data, err = ioutil.ReadFile(filepath.Join(*Root, dir.Name(), file.Name())) if err != nil { log.Fatal(err) } diff --git a/json.go b/json.go new file mode 100644 index 0000000..7ccb142 --- /dev/null +++ b/json.go @@ -0,0 +1,147 @@ +/* +GoCheese -- Python private package repository and caching proxy +Copyright (C) 2019-2021 Sergey Matveev + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, version 3 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +*/ + +package main + +import ( + "bytes" + "encoding/json" + "io/ioutil" + "log" + "net/http" + "os" + "path/filepath" + "strings" + + "go.cypherpunks.ru/recfile" +) + +func getMetadata(pkgName, version string) (*PkgMeta, []*PkgReleaseInfo, error) { + serial, releases, err := listDir(pkgName, true) + if err != nil { + return nil, nil, err + } + metadata, err := ioutil.ReadFile(filepath.Join(*Root, pkgName, MetadataFile)) + if err != nil { + if !os.IsNotExist(err) { + return nil, nil, err + } + } + info := PkgInfo{Name: pkgName} + if len(metadata) == 0 { + info.Version = releases[len(releases)-1].Version + } else { + m, err := recfile.NewReader(bytes.NewReader(metadata)).NextMapWithSlice() + if err != nil { + return nil, nil, err + } + if v, ok := m[metadataFieldToRecField(MetadataFieldVersion)]; ok { + info.Version = v[0] + } + if v, ok := m[metadataFieldToRecField(MetadataFieldSummary)]; ok { + info.Summary = v[0] + } + if v, ok := m[metadataFieldToRecField(MetadataFieldDescriptionContentType)]; ok { + info.DescriptionContentType = v[0] + } + if v, ok := m[metadataFieldToRecField(MetadataFieldKeywords)]; ok { + info.Keywords = v[0] + } + if v, ok := m[metadataFieldToRecField(MetadataFieldHomePage)]; ok { + info.HomePage = v[0] + } + if v, ok := m[metadataFieldToRecField(MetadataFieldAuthor)]; ok { + info.Author = v[0] + } + if v, ok := m[metadataFieldToRecField(MetadataFieldAuthorEmail)]; ok { + info.AuthorEmail = v[0] + } + if v, ok := m[metadataFieldToRecField(MetadataFieldMaintainer)]; ok { + info.Maintainer = v[0] + } + if v, ok := m[metadataFieldToRecField(MetadataFieldMaintainerEmail)]; ok { + info.MaintainerEmail = v[0] + } + if v, ok := m[metadataFieldToRecField(MetadataFieldLicense)]; ok { + info.License = v[0] + } + if v, ok := m[metadataFieldToRecField(MetadataFieldRequiresPython)]; ok { + info.RequiresPython = v[0] + } + if v, ok := m[metadataFieldToRecField(MetadataFieldDescription)]; ok { + info.Description = v[0] + } + info.Classifier = m[metadataFieldToRecField(MetadataFieldClassifier)] + info.Platform = m[metadataFieldToRecField(MetadataFieldPlatform)] + info.SupportedPlatform = m[metadataFieldToRecField(MetadataFieldSupportedPlatform)] + info.RequiresDist = m[metadataFieldToRecField(MetadataFieldRequiresDist)] + info.RequiresExternal = m[metadataFieldToRecField(MetadataFieldRequiresExternal)] + info.ProjectURL = m[metadataFieldToRecField(MetadataFieldProjectURL)] + info.ProvidesExtra = m[metadataFieldToRecField(MetadataFieldProvidesExtra)] + } + meta := PkgMeta{ + Info: info, + LastSerial: serial, + Releases: make(map[string][]*PkgReleaseInfo), + } + var lastVersion string + for _, release := range releases { + meta.Releases[release.Version] = append( + meta.Releases[release.Version], release, + ) + lastVersion = release.Version + } + if version != "" { + lastVersion = version + } + meta.URLs = meta.Releases[lastVersion] + return &meta, releases, nil +} + +// https://warehouse.pypa.io/api-reference/json.html +func serveJSON(w http.ResponseWriter, r *http.Request) { + path := strings.TrimPrefix(r.URL.Path, *JSONURLPath) + parts := strings.Split(strings.TrimSuffix(path, "/"), "/") + if len(parts) < 2 || parts[len(parts)-1] != "json" { + http.Error(w, "invalid JSON API action", http.StatusBadRequest) + return + } + var version string + if len(parts) == 3 { + version = parts[1] + } + pkgName := parts[0] + + meta, _, err := getMetadata(pkgName, version) + if err != nil { + if os.IsNotExist(err) { + http.NotFound(w, r) + } else { + log.Println("error", r.RemoteAddr, "json", pkgName, err) + http.Error(w, err.Error(), http.StatusInternalServerError) + } + return + } + w.Header().Set("Content-Type", "application/json") + buf, err := json.Marshal(&meta) + if err != nil { + log.Println("error", r.RemoteAddr, "json", pkgName, err) + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + w.Write(buf) +} diff --git a/list.go b/list.go new file mode 100644 index 0000000..d8bd073 --- /dev/null +++ b/list.go @@ -0,0 +1,262 @@ +/* +GoCheese -- Python private package repository and caching proxy +Copyright (C) 2019-2021 Sergey Matveev + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, version 3 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +*/ + +package main + +import ( + "bytes" + "encoding/hex" + "errors" + "html/template" + "io/fs" + "io/ioutil" + "log" + "net/http" + "os" + "path/filepath" + "sort" + "strings" + "time" +) + +// https://warehouse.pypa.io/api-reference/legacy.html +var ( + HTMLRootTmpl = template.Must(template.New("root").Parse(` + + + + Links for root + + {{$Refresh := .RefreshURLPath}}{{range .Packages}} + {{.}}
+{{- end}} + + +`)) + HTMLReleasesTmpl = template.Must(template.New("list").Parse(` + + + + Links for {{.PkgName}} + + {{$Refresh := .RefreshURLPath}}{{$PkgName := .PkgName}}{{range .Releases}} + {{.Filename}}
+{{- end}} + + +`)) + KnownExts = []string{".tar.bz2", ".tar.gz", ".whl", ".zip", ".egg", + ".exe", ".dmg", ".msi", ".rpm", ".deb", ".tgz"} +) + +func listRoot(w http.ResponseWriter, r *http.Request) { + files, err := ioutil.ReadDir(*Root) + if err != nil { + log.Println("error", r.RemoteAddr, "root", err) + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + packages := make([]string, 0, len(files)) + for _, f := range files { + packages = append(packages, f.Name()) + } + sort.Strings(packages) + var buf bytes.Buffer + err = HTMLRootTmpl.Execute(&buf, struct { + RefreshURLPath string + Packages []string + }{ + RefreshURLPath: *RefreshURLPath, + Packages: packages, + }) + if err != nil { + log.Println("error", r.RemoteAddr, "root", err) + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + w.Write(buf.Bytes()) +} + +type PkgReleaseInfoByName []*PkgReleaseInfo + +func (a PkgReleaseInfoByName) Len() int { + return len(a) +} + +func (a PkgReleaseInfoByName) Swap(i, j int) { + a[i], a[j] = a[j], a[i] +} + +func (a PkgReleaseInfoByName) Less(i, j int) bool { + if a[i].Version == a[j].Version { + return a[i].Filename < a[j].Filename + } + return a[i].Version < a[j].Version +} + +// Version format is too complicated: https://www.python.org/dev/peps/pep-0386/ +// So here is very simple parser working good enough for most packages +func filenameToVersion(fn string) string { + fn = strings.TrimSuffix(fn, GPGSigExt) + var trimmed string + for _, ext := range KnownExts { + trimmed = strings.TrimSuffix(fn, ext) + if trimmed != fn { + fn = trimmed + break + } + } + cols := strings.Split(fn, "-") + for i := 0; i < len(cols); i++ { + if len(cols[i]) == 0 { + continue + } + if ('0' <= cols[i][0]) && (cols[i][0] <= '9') { + return cols[i] + } + } + if len(cols) > 1 { + return cols[1] + } + return cols[0] +} + +func listDir(pkgName string, doSize bool) (int, []*PkgReleaseInfo, error) { + dirPath := filepath.Join(*Root, pkgName) + entries, err := os.ReadDir(dirPath) + if err != nil { + return 0, nil, err + } + files := make(map[string]fs.DirEntry, len(entries)) + for _, entry := range entries { + if entry.IsDir() { + continue + } + if entry.Name()[0] == '.' { + continue + } + files[entry.Name()] = entry + } + releaseFiles := make(map[string]*PkgReleaseInfo) + for _, algo := range KnownHashAlgos { + for fn, entry := range files { + if Killed { + return 0, nil, errors.New("killed") + } + if !strings.HasSuffix(fn, "."+algo) { + continue + } + delete(files, fn) + digest, err := ioutil.ReadFile(filepath.Join(dirPath, fn)) + if err != nil { + return 0, nil, err + } + fnClean := strings.TrimSuffix(fn, "."+algo) + release := releaseFiles[fnClean] + if release == nil { + fi, err := entry.Info() + if err != nil { + return 0, nil, err + } + release = &PkgReleaseInfo{ + Filename: fnClean, + Version: filenameToVersion(fnClean), + UploadTimeISO8601: fi.ModTime().UTC().Truncate( + time.Second, + ).Format(time.RFC3339), + Digests: make(map[string]string), + } + releaseFiles[fnClean] = release + if entry, exists := files[fnClean]; exists { + if doSize { + fi, err := entry.Info() + if err != nil { + return 0, nil, err + } + release.Size = fi.Size() + } + delete(files, fnClean) + } + if _, exists := files[fnClean+GPGSigExt]; exists { + release.HasSig = true + delete(files, fnClean+GPGSigExt) + } + } + release.Digests[algo] = hex.EncodeToString(digest) + } + } + releases := make([]*PkgReleaseInfo, 0, len(releaseFiles)) + for _, release := range releaseFiles { + releases = append(releases, release) + } + sort.Sort(PkgReleaseInfoByName(releases)) + return len(entries), releases, nil +} + +func serveListDir( + w http.ResponseWriter, + r *http.Request, + pkgName string, + autorefresh, gpgUpdate bool, +) { + dirPath := filepath.Join(*Root, pkgName) + if autorefresh { + if !refreshDir(w, r, pkgName, "", gpgUpdate) { + return + } + } else if _, err := os.Stat(dirPath); os.IsNotExist(err) && + !refreshDir(w, r, pkgName, "", false) { + return + } + _, releases, err := listDir(pkgName, false) + if err != nil { + log.Println("error", r.RemoteAddr, "list", pkgName, err) + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + for _, release := range releases { + singleDigest := make(map[string]string) + if digest, exists := release.Digests[HashAlgoSHA256]; exists { + singleDigest[HashAlgoSHA256] = digest + } else if digest, exists := release.Digests[HashAlgoSHA512]; exists { + singleDigest[HashAlgoSHA512] = digest + } else if digest, exists := release.Digests[HashAlgoBLAKE2b256]; exists { + singleDigest[HashAlgoBLAKE2b256] = digest + } else { + singleDigest = release.Digests + } + release.Digests = singleDigest + } + var buf bytes.Buffer + err = HTMLReleasesTmpl.Execute(&buf, struct { + RefreshURLPath string + PkgName string + Releases []*PkgReleaseInfo + }{ + RefreshURLPath: *RefreshURLPath, + PkgName: pkgName, + Releases: releases, + }) + if err != nil { + log.Println("error", r.RemoteAddr, "list", pkgName, err) + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + w.Write(buf.Bytes()) +} diff --git a/main.go b/main.go index f4a4359..76f4b8a 100644 --- a/main.go +++ b/main.go @@ -28,7 +28,6 @@ import ( "errors" "flag" "fmt" - "io/ioutil" "log" "net" "net/http" @@ -36,7 +35,6 @@ import ( "os" "os/signal" "path/filepath" - "regexp" "runtime" "strings" "syscall" @@ -48,18 +46,6 @@ import ( const ( Version = "3.0.0" UserAgent = "GoCheese/" + Version - HTMLBegin = ` - - - - Links for %s - - -` - HTMLEnd = " \n\n" - HTMLElement = " %s\n" - InternalFlag = ".internal" - GPGSigExt = ".asc" Warranty = `This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -74,155 +60,39 @@ You should have received a copy of the GNU General Public License along with this program. If not, see .` ) -const ( - HashAlgoSHA256 = "sha256" - HashAlgoBLAKE2b256 = "blake2_256" - HashAlgoSHA512 = "sha512" - HashAlgoMD5 = "md5" -) - var ( - pkgPyPI = regexp.MustCompile(`^.*]*>(.+)
.*$`) - normalizationRe = regexp.MustCompilePOSIX("[-_.]+") - - knownHashAlgos []string = []string{ - HashAlgoSHA256, - HashAlgoBLAKE2b256, - HashAlgoSHA512, - HashAlgoMD5, - } + Root = flag.String("root", "./packages", "Path to packages directory") + Bind = flag.String("bind", "[::]:8080", "Address to bind to") + MaxClients = flag.Int("maxclients", 128, "Maximal amount of simultaneous clients") + DoUCSPI = flag.Bool("ucspi", false, "Work as UCSPI-TCP service") - root = flag.String("root", "./packages", "Path to packages directory") - bind = flag.String("bind", "[::]:8080", "Address to bind to") - maxClients = flag.Int("maxclients", 128, "Maximal amount of simultaneous clients") - doUCSPI = flag.Bool("ucspi", false, "Work as UCSPI-TCP service") + TLSCert = flag.String("tls-cert", "", "Path to TLS X.509 certificate") + TLSKey = flag.String("tls-key", "", "Path to TLS X.509 private key") - tlsCert = flag.String("tls-cert", "", "Path to TLS X.509 certificate") - tlsKey = flag.String("tls-key", "", "Path to TLS X.509 private key") + NoRefreshURLPath = flag.String("norefresh", "/norefresh/", "Non-refreshing URL path") + RefreshURLPath = flag.String("refresh", "/simple/", "Auto-refreshing URL path") + GPGUpdateURLPath = flag.String("gpgupdate", "/gpgupdate/", "GPG forceful refreshing URL path") + JSONURLPath = flag.String("json", "/pypi/", "JSON API URL path") - norefreshURLPath = flag.String("norefresh", "/norefresh/", "Non-refreshing URL path") - refreshURLPath = flag.String("refresh", "/simple/", "Auto-refreshing URL path") - gpgUpdateURLPath = flag.String("gpgupdate", "/gpgupdate/", "GPG forceful refreshing URL path") + PyPIURL = flag.String("pypi", "https://pypi.org/simple/", "Upstream (PyPI) URL") + PyPICertHash = flag.String("pypi-cert-hash", "", "Authenticate upstream by its X.509 certificate's SPKI SHA256 hash") + JSONURL = flag.String("pypi-json", "https://pypi.org/pypi/", "Enable and use specified JSON API upstream URL") - pypiURL = flag.String("pypi", "https://pypi.org/simple/", "Upstream (PyPI) URL") - pypiCertHash = flag.String("pypi-cert-hash", "", "Authenticate upstream by its X.509 certificate's SPKI SHA256 hash") + PasswdPath = flag.String("passwd", "", "Path to FIFO for upload authentication") + PasswdListPath = flag.String("passwd-list", "", "Path to FIFO for login listing") + PasswdCheck = flag.Bool("passwd-check", false, "Run password checker") - passwdPath = flag.String("passwd", "", "Path to FIFO for upload authentication") - passwdListPath = flag.String("passwd-list", "", "Path to FIFO for login listing") - passwdCheck = flag.Bool("passwd-check", false, "Run password checker") + LogTimestamped = flag.Bool("log-timestamped", false, "Prepend timestmap to log messages") + FSCK = flag.Bool("fsck", false, "Check integrity of all packages (errors are in stderr)") + DoVersion = flag.Bool("version", false, "Print version information") + DoWarranty = flag.Bool("warranty", false, "Print warranty information") - logTimestamped = flag.Bool("log-timestamped", false, "Prepend timestmap to log messages") - fsck = flag.Bool("fsck", false, "Check integrity of all packages (errors are in stderr)") - version = flag.Bool("version", false, "Print version information") - warranty = flag.Bool("warranty", false, "Print warranty information") - - killed bool - pypiURLParsed *url.URL + Killed bool ) -func mkdirForPkg(w http.ResponseWriter, r *http.Request, pkgName string) bool { - path := filepath.Join(*root, pkgName) - if _, err := os.Stat(path); os.IsNotExist(err) { - if err = os.Mkdir(path, os.FileMode(0777)); err != nil { - log.Println("error", r.RemoteAddr, "mkdir", pkgName, err) - http.Error(w, err.Error(), http.StatusInternalServerError) - return false - } - log.Println(r.RemoteAddr, "mkdir", pkgName) - } - return true -} - -func listRoot(w http.ResponseWriter, r *http.Request) { - files, err := ioutil.ReadDir(*root) - if err != nil { - log.Println("error", r.RemoteAddr, "root", err) - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - var result bytes.Buffer - result.WriteString(fmt.Sprintf(HTMLBegin, "root")) - for _, file := range files { - if file.Mode().IsDir() { - result.WriteString(fmt.Sprintf( - HTMLElement, - *refreshURLPath+file.Name()+"/", - "", file.Name(), - )) - } - } - result.WriteString(HTMLEnd) - w.Write(result.Bytes()) -} - -func listDir( - w http.ResponseWriter, - r *http.Request, - pkgName string, - autorefresh, gpgUpdate bool, -) { - dirPath := filepath.Join(*root, pkgName) - if autorefresh { - if !refreshDir(w, r, pkgName, "", gpgUpdate) { - return - } - } else if _, err := os.Stat(dirPath); os.IsNotExist(err) && !refreshDir(w, r, pkgName, "", false) { - return - } - fis, err := ioutil.ReadDir(dirPath) - if err != nil { - log.Println("error", r.RemoteAddr, "list", pkgName, err) - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - files := make(map[string]struct{}, len(fis)/2) - for _, fi := range fis { - files[fi.Name()] = struct{}{} - } - var result bytes.Buffer - result.WriteString(fmt.Sprintf(HTMLBegin, pkgName)) - for _, algo := range knownHashAlgos { - for fn := range files { - if killed { - // Skip expensive I/O when shutting down - http.Error(w, "shutting down", http.StatusInternalServerError) - return - } - if !strings.HasSuffix(fn, "."+algo) { - continue - } - delete(files, fn) - digest, err := ioutil.ReadFile(filepath.Join(dirPath, fn)) - if err != nil { - log.Println("error", r.RemoteAddr, "list", fn, err) - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - fnClean := strings.TrimSuffix(fn, "."+algo) - delete(files, fnClean) - gpgSigAttr := "" - if _, err = os.Stat(filepath.Join(dirPath, fnClean+GPGSigExt)); err == nil { - gpgSigAttr = " data-gpg-sig=true" - delete(files, fnClean+GPGSigExt) - } - result.WriteString(fmt.Sprintf( - HTMLElement, - strings.Join([]string{ - *refreshURLPath, pkgName, "/", fnClean, - "#", algo, "=", hex.EncodeToString(digest), - }, ""), - gpgSigAttr, - fnClean, - )) - } - } - result.WriteString(HTMLEnd) - w.Write(result.Bytes()) -} - func servePkg(w http.ResponseWriter, r *http.Request, pkgName, filename string) { log.Println(r.RemoteAddr, "get", filename) - path := filepath.Join(*root, pkgName, filename) + path := filepath.Join(*Root, pkgName, filename) if _, err := os.Stat(path); os.IsNotExist(err) { if !refreshDir(w, r, pkgName, filename, false) { return @@ -238,13 +108,13 @@ func handler(w http.ResponseWriter, r *http.Request) { var path string var autorefresh bool var gpgUpdate bool - if strings.HasPrefix(r.URL.Path, *norefreshURLPath) { - path = strings.TrimPrefix(r.URL.Path, *norefreshURLPath) - } else if strings.HasPrefix(r.URL.Path, *refreshURLPath) { - path = strings.TrimPrefix(r.URL.Path, *refreshURLPath) + if strings.HasPrefix(r.URL.Path, *NoRefreshURLPath) { + path = strings.TrimPrefix(r.URL.Path, *NoRefreshURLPath) + } else if strings.HasPrefix(r.URL.Path, *RefreshURLPath) { + path = strings.TrimPrefix(r.URL.Path, *RefreshURLPath) autorefresh = true - } else if strings.HasPrefix(r.URL.Path, *gpgUpdateURLPath) { - path = strings.TrimPrefix(r.URL.Path, *gpgUpdateURLPath) + } else if strings.HasPrefix(r.URL.Path, *GPGUpdateURLPath) { + path = strings.TrimPrefix(r.URL.Path, *GPGUpdateURLPath) autorefresh = true gpgUpdate = true } else { @@ -260,7 +130,7 @@ func handler(w http.ResponseWriter, r *http.Request) { if parts[0] == "" { listRoot(w, r) } else { - listDir(w, r, parts[0], autorefresh, gpgUpdate) + serveListDir(w, r, parts[0], autorefresh, gpgUpdate) } } else { servePkg(w, r, parts[0], parts[1]) @@ -274,32 +144,32 @@ func handler(w http.ResponseWriter, r *http.Request) { func main() { flag.Parse() - if *warranty { + if *DoWarranty { fmt.Println(Warranty) return } - if *version { + if *DoVersion { fmt.Println("GoCheese", Version, "built with", runtime.Version()) return } - if *logTimestamped { + if *LogTimestamped { log.SetFlags(log.Ldate | log.Lmicroseconds | log.Lshortfile) } else { log.SetFlags(log.Lshortfile) } - if !*doUCSPI { + if !*DoUCSPI { log.SetOutput(os.Stdout) } - if *fsck { + if *FSCK { if !goodIntegrity() { os.Exit(1) } return } - if *passwdCheck { + if *PasswdCheck { if passwdReader(os.Stdin) { os.Exit(0) } else { @@ -307,11 +177,11 @@ func main() { } } - if *passwdPath != "" { + if *PasswdPath != "" { go func() { for { fd, err := os.OpenFile( - *passwdPath, + *PasswdPath, os.O_RDONLY, os.FileMode(0666), ) @@ -323,11 +193,11 @@ func main() { } }() } - if *passwdListPath != "" { + if *PasswdListPath != "" { go func() { for { fd, err := os.OpenFile( - *passwdListPath, + *PasswdListPath, os.O_WRONLY|os.O_APPEND, os.FileMode(0666), ) @@ -340,12 +210,12 @@ func main() { }() } - if (*tlsCert != "" && *tlsKey == "") || (*tlsCert == "" && *tlsKey != "") { + if (*TLSCert != "" && *TLSKey == "") || (*TLSCert == "" && *TLSKey != "") { log.Fatalln("Both -tls-cert and -tls-key are required") } var err error - pypiURLParsed, err = url.Parse(*pypiURL) + PyPIURLParsed, err = url.Parse(*PyPIURL) if err != nil { log.Fatalln(err) } @@ -353,12 +223,12 @@ func main() { ClientSessionCache: tls.NewLRUClientSessionCache(16), NextProtos: []string{"h2", "http/1.1"}, } - pypiHTTPTransport = http.Transport{ + PyPIHTTPTransport = http.Transport{ ForceAttemptHTTP2: true, TLSClientConfig: &tlsConfig, } - if *pypiCertHash != "" { - ourDgst, err := hex.DecodeString(*pypiCertHash) + if *PyPICertHash != "" { + ourDgst, err := hex.DecodeString(*PyPICertHash) if err != nil { log.Fatalln(err) } @@ -376,13 +246,16 @@ func main() { ReadTimeout: time.Minute, WriteTimeout: time.Minute, } - http.HandleFunc(*norefreshURLPath, handler) - http.HandleFunc(*refreshURLPath, handler) - if *gpgUpdateURLPath != "" { - http.HandleFunc(*gpgUpdateURLPath, handler) + http.HandleFunc("/", serveHRRoot) + http.HandleFunc("/hr/", serveHRPkg) + http.HandleFunc(*JSONURLPath, serveJSON) + http.HandleFunc(*NoRefreshURLPath, handler) + http.HandleFunc(*RefreshURLPath, handler) + if *GPGUpdateURLPath != "" { + http.HandleFunc(*GPGUpdateURLPath, handler) } - if *doUCSPI { + if *DoUCSPI { server.SetKeepAlivesEnabled(false) ln := &UCSPI{} server.ConnState = connStater @@ -394,18 +267,18 @@ func main() { return } - ln, err := net.Listen("tcp", *bind) + ln, err := net.Listen("tcp", *Bind) if err != nil { log.Fatal(err) } - ln = netutil.LimitListener(ln, *maxClients) + ln = netutil.LimitListener(ln, *MaxClients) needsShutdown := make(chan os.Signal, 0) exitErr := make(chan error, 0) signal.Notify(needsShutdown, syscall.SIGTERM, syscall.SIGINT) go func(s *http.Server) { <-needsShutdown - killed = true + Killed = true log.Println("shutting down") ctx, cancel := context.WithTimeout(context.TODO(), time.Minute) exitErr <- s.Shutdown(ctx) @@ -413,15 +286,17 @@ func main() { }(server) log.Println( - "GoCheese", Version, "listens:", - "root:", *root, - "bind:", *bind, - "pypi:", *pypiURL, + UserAgent, "ready:", + "root:", *Root, + "bind:", *Bind, + "pypi:", *PyPIURL, + "json:", *JSONURL, + "hr: /", ) - if *tlsCert == "" { + if *TLSCert == "" { err = server.Serve(ln) } else { - err = server.ServeTLS(ln, *tlsCert, *tlsKey) + err = server.ServeTLS(ln, *TLSCert, *TLSKey) } if err != http.ErrServerClosed { log.Fatal(err) diff --git a/makedist.sh b/makedist.sh index 6eb38a7..c7424fd 100755 --- a/makedist.sh +++ b/makedist.sh @@ -13,17 +13,20 @@ redo-ifchange VERSION go mod vendor -cat > download.texi < doc/download.texi < $texi < + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, version 3 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +*/ + +package main + +import "strings" + +const ( + MetadataFile = ".metadata.rec" + + // https://packaging.python.org/specifications/core-metadata/ + MetadataVersion = "2.1" + MetadataFieldMetadataVersion = "Metadata-Version" + MetadataFieldName = "Name" + MetadataFieldVersion = "Version" + MetadataFieldDescription = "Description" + MetadataFieldPlatform = "Platform" + MetadataFieldSupportedPlatform = "Supported-Platform" + MetadataFieldSummary = "Summary" + MetadataFieldDescriptionContentType = "Description-Content-Type" + MetadataFieldKeywords = "Keywords" + MetadataFieldHomePage = "Home-page" + MetadataFieldAuthor = "Author" + MetadataFieldAuthorEmail = "Author-email" + MetadataFieldMaintainer = "Maintainer" + MetadataFieldMaintainerEmail = "Maintainer-Email" + MetadataFieldLicense = "License" + MetadataFieldRequiresDist = "Requires-Dist" + MetadataFieldRequiresPython = "Requires-Python" + MetadataFieldRequiresExternal = "Requires-External" + MetadataFieldProjectURL = "Project-URL" + MetadataFieldProvidesExtra = "Provides-Extra" + MetadataFieldClassifier = "Classifier" +) + +func metadataFieldToRecField(f string) string { + return strings.ReplaceAll(f, "-", "") +} + +// It should follow https://www.python.org/dev/peps/pep-0566/ +type PkgInfo struct { + Name string `json:"name"` + Version string `json:"version"` + Platform []string `json:"platform,omitempty"` + SupportedPlatform []string `json:"supported_platform,omitempty"` + Summary string `json:"summary,omitempty"` + Description string `json:"description,omitempty"` + DescriptionContentType string `json:"description_content_type,omitempty"` + Keywords string `json:"keywords,omitempty"` + HomePage string `json:"home_page,omitempty"` + Author string `json:"author,omitempty"` + AuthorEmail string `json:"author_email,omitempty"` + Maintainer string `json:"maintainer,omitempty"` + MaintainerEmail string `json:"maintainer_email,omitempty"` + License string `json:"license,omitempty"` + Classifier []string `json:"classifier,omitempty"` + RequiresDist []string `json:"requires_dist,omitempty"` + RequiresPython string `json:"requires_python,omitempty"` + RequiresExternal []string `json:"requires_external,omitempty"` + ProjectURL []string `json:"project_url,omitempty"` + ProvidesExtra []string `json:"provides_extra,omitempty"` +} + +// But current PyPI does not follow PEP-0566, so just ignore some fields +type PkgInfoStripped struct { + Name string `json:"name"` + Version string `json:"version"` + Summary string `json:"summary,omitempty"` + Description string `json:"description,omitempty"` + DescriptionContentType string `json:"description_content_type,omitempty"` + Keywords string `json:"keywords,omitempty"` + HomePage string `json:"home_page,omitempty"` + Author string `json:"author,omitempty"` + AuthorEmail string `json:"author_email,omitempty"` + Maintainer string `json:"maintainer,omitempty"` + MaintainerEmail string `json:"maintainer_email,omitempty"` + License string `json:"license,omitempty"` + Classifier []string `json:"classifiers,omitempty"` + RequiresPython string `json:"requires_python,omitempty"` + RequiresDist []string `json:"requires_dist,omitempty"` +} + +// Unknown format: no detailed specification available +// https://github.com/cooperlees/peps/blob/warehouse_json_api/pep-9999.rst +type PkgReleaseInfo struct { + Filename string `json:"filename"` + Version string `json:"version"` + UploadTimeISO8601 string `json:"upload_time_iso_8601"` + Size int64 `json:"size"` + HasSig bool `json:"has_sig"` + + Digests map[string]string `json:"digests"` +} + +type PkgMeta struct { + Info PkgInfo `json:"info"` + LastSerial int `json:"last_serial"` + Releases map[string][]*PkgReleaseInfo `json:"releases"` + URLs []*PkgReleaseInfo `json:"urls"` +} + +type PkgMetaStripped struct { + Info PkgInfoStripped `json:"info"` + Releases map[string][]*PkgReleaseInfo `json:"releases"` +} diff --git a/refresh.go b/refresh.go index 0764a03..092bd01 100644 --- a/refresh.go +++ b/refresh.go @@ -24,6 +24,7 @@ import ( "crypto/sha256" "crypto/sha512" "encoding/hex" + "encoding/json" "hash" "io" "io/ioutil" @@ -32,12 +33,34 @@ import ( "net/url" "os" "path/filepath" + "regexp" "strings" + "time" + "go.cypherpunks.ru/recfile" "golang.org/x/crypto/blake2b" ) -var pypiHTTPTransport http.Transport +const ( + HashAlgoSHA256 = "sha256" + HashAlgoBLAKE2b256 = "blake2_256" + HashAlgoSHA512 = "sha512" + HashAlgoMD5 = "md5" + GPGSigExt = ".asc" + InternalFlag = ".internal" +) + +var ( + PkgPyPI = regexp.MustCompile(`^.*]*>(.+).*$`) + PyPIURLParsed *url.URL + PyPIHTTPTransport http.Transport + KnownHashAlgos []string = []string{ + HashAlgoSHA256, + HashAlgoBLAKE2b256, + HashAlgoSHA512, + HashAlgoMD5, + } +) func blake2b256New() hash.Hash { h, err := blake2b.New256(nil) @@ -62,11 +85,177 @@ func refreshDir( pkgName, filenameGet string, gpgUpdate bool, ) bool { - if _, err := os.Stat(filepath.Join(*root, pkgName, InternalFlag)); err == nil { + if _, err := os.Stat(filepath.Join(*Root, pkgName, InternalFlag)); err == nil { return true } - c := http.Client{Transport: &pypiHTTPTransport} - resp, err := c.Get(*pypiURL + pkgName + "/") + c := http.Client{Transport: &PyPIHTTPTransport} + dirPath := filepath.Join(*Root, pkgName) + now := time.Now() + + var allReleases map[string][]*PkgReleaseInfo + if *JSONURL != "" { + resp, err := c.Do(agentedReq(*JSONURL + pkgName + "/json")) + if err != nil { + log.Println("error", r.RemoteAddr, "refresh-json", pkgName, err) + http.Error(w, err.Error(), http.StatusBadGateway) + return false + } + if resp.StatusCode != http.StatusOK { + resp.Body.Close() + log.Println( + "error", r.RemoteAddr, "refresh-json", pkgName, + "HTTP status:", resp.Status, + ) + http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway) + return false + } + body, err := ioutil.ReadAll(resp.Body) + resp.Body.Close() + var buf bytes.Buffer + var description string + wr := recfile.NewWriter(&buf) + var meta PkgMeta + err = json.Unmarshal(body, &meta) + if err == nil { + for recField, jsonField := range map[string]string{ + MetadataFieldName: meta.Info.Name, + MetadataFieldVersion: meta.Info.Version, + MetadataFieldSummary: meta.Info.Summary, + MetadataFieldDescriptionContentType: meta.Info.DescriptionContentType, + MetadataFieldKeywords: meta.Info.Keywords, + MetadataFieldHomePage: meta.Info.HomePage, + MetadataFieldAuthor: meta.Info.Author, + MetadataFieldAuthorEmail: meta.Info.AuthorEmail, + MetadataFieldMaintainer: meta.Info.Maintainer, + MetadataFieldMaintainerEmail: meta.Info.MaintainerEmail, + MetadataFieldLicense: meta.Info.License, + MetadataFieldRequiresPython: meta.Info.RequiresPython, + } { + if jsonField == "" { + continue + } + if _, err = wr.WriteFields(recfile.Field{ + Name: metadataFieldToRecField(recField), + Value: jsonField, + }); err != nil { + log.Fatalln(err) + } + } + for recField, jsonFields := range map[string][]string{ + MetadataFieldClassifier: meta.Info.Classifier, + MetadataFieldPlatform: meta.Info.Platform, + MetadataFieldSupportedPlatform: meta.Info.SupportedPlatform, + MetadataFieldRequiresDist: meta.Info.RequiresDist, + MetadataFieldRequiresExternal: meta.Info.RequiresExternal, + MetadataFieldProjectURL: meta.Info.ProjectURL, + MetadataFieldProvidesExtra: meta.Info.ProvidesExtra, + } { + for _, v := range jsonFields { + if _, err = wr.WriteFields(recfile.Field{ + Name: metadataFieldToRecField(recField), + Value: v, + }); err != nil { + log.Fatalln(err) + } + } + } + description = meta.Info.Description + allReleases = meta.Releases + } else { + var metaStripped PkgMetaStripped + err = json.Unmarshal(body, &metaStripped) + if err != nil { + log.Println( + "error", r.RemoteAddr, "refresh-json", pkgName, + "can not parse JSON:", err, + ) + http.Error(w, "can not parse metadata JSON", http.StatusBadGateway) + return false + } + for recField, jsonField := range map[string]string{ + MetadataFieldName: metaStripped.Info.Name, + MetadataFieldVersion: metaStripped.Info.Version, + MetadataFieldSummary: metaStripped.Info.Summary, + MetadataFieldDescriptionContentType: metaStripped.Info.DescriptionContentType, + MetadataFieldKeywords: metaStripped.Info.Keywords, + MetadataFieldHomePage: metaStripped.Info.HomePage, + MetadataFieldAuthor: metaStripped.Info.Author, + MetadataFieldAuthorEmail: metaStripped.Info.AuthorEmail, + MetadataFieldMaintainer: metaStripped.Info.Maintainer, + MetadataFieldMaintainerEmail: metaStripped.Info.MaintainerEmail, + MetadataFieldLicense: metaStripped.Info.License, + MetadataFieldRequiresPython: metaStripped.Info.RequiresPython, + } { + if jsonField == "" { + continue + } + if _, err = wr.WriteFields(recfile.Field{ + Name: metadataFieldToRecField(recField), + Value: jsonField, + }); err != nil { + log.Fatalln(err) + } + } + + for recField, jsonFields := range map[string][]string{ + MetadataFieldClassifier: metaStripped.Info.Classifier, + MetadataFieldRequiresDist: metaStripped.Info.RequiresDist, + } { + for _, v := range jsonFields { + if _, err = wr.WriteFields(recfile.Field{ + Name: metadataFieldToRecField(recField), + Value: v, + }); err != nil { + log.Fatalln(err) + } + } + } + description = metaStripped.Info.Description + allReleases = metaStripped.Releases + } + lines := strings.Split(description, "\n") + if len(lines) > 0 { + if _, err = wr.WriteFieldMultiline( + MetadataFieldDescription, lines, + ); err != nil { + log.Fatalln(err) + } + } + + if !mkdirForPkg(w, r, pkgName) { + return false + } + path := filepath.Join(dirPath, MetadataFile) + existing, err := ioutil.ReadFile(path) + if err != nil || bytes.Compare(existing, buf.Bytes()) != 0 { + if err = WriteFileSync(dirPath, path, buf.Bytes(), now); err != nil { + log.Println("error", r.RemoteAddr, "refresh-json", path, err) + http.Error(w, err.Error(), http.StatusInternalServerError) + return false + } + log.Println(r.RemoteAddr, "pypi", pkgName+"."+MetadataFile, "touch") + } + } + mtimes := make(map[string]time.Time) + for _, releases := range allReleases { + for _, rel := range releases { + if rel.Filename == "" || rel.UploadTimeISO8601 == "" { + continue + } + t, err := time.Parse(time.RFC3339Nano, rel.UploadTimeISO8601) + if err != nil { + log.Println( + "error", r.RemoteAddr, "refresh-json", pkgName, + "can not parse upload_time:", err, + ) + http.Error(w, "can not parse metadata JSON", http.StatusBadGateway) + return false + } + mtimes[rel.Filename] = t.Truncate(time.Second) + } + } + + resp, err := c.Do(agentedReq(*PyPIURL + pkgName + "/")) if err != nil { log.Println("error", r.RemoteAddr, "refresh", pkgName, err) http.Error(w, err.Error(), http.StatusBadGateway) @@ -74,7 +263,10 @@ func refreshDir( } if resp.StatusCode != http.StatusOK { resp.Body.Close() - log.Println("error", r.RemoteAddr, "refresh", pkgName, "HTTP status:", resp.Status) + log.Println( + "error", r.RemoteAddr, "refresh", pkgName, + "HTTP status:", resp.Status, + ) http.Error(w, "PyPI has non 200 status code", http.StatusBadGateway) return false } @@ -88,9 +280,8 @@ func refreshDir( if !mkdirForPkg(w, r, pkgName) { return false } - dirPath := filepath.Join(*root, pkgName) for _, lineRaw := range bytes.Split(body, []byte("\n")) { - submatches := pkgPyPI.FindStringSubmatch(string(lineRaw)) + submatches := PkgPyPI.FindStringSubmatch(string(lineRaw)) if len(submatches) == 0 { continue } @@ -152,14 +343,18 @@ func refreshDir( pkgURL.Fragment = "" if pkgURL.Host == "" { - uri = pypiURLParsed.ResolveReference(pkgURL).String() + uri = PyPIURLParsed.ResolveReference(pkgURL).String() } else { uri = pkgURL.String() } + mtime, mtimeExists := mtimes[filename] + if !mtimeExists { + mtime = now + } path := filepath.Join(dirPath, filename) if filename == filenameGet { - if killed { + if Killed { // Skip heavy remote call, when shutting down http.Error(w, "shutting down", http.StatusInternalServerError) return false @@ -230,6 +425,10 @@ func refreshDir( http.Error(w, err.Error(), http.StatusInternalServerError) return false } + if err = os.Chtimes(dst.Name(), mtime, mtime); err != nil { + log.Println("error", r.RemoteAddr, "pypi", filename, err) + http.Error(w, err.Error(), http.StatusInternalServerError) + } if err = os.Rename(dst.Name(), path); err != nil { log.Println("error", r.RemoteAddr, "pypi", filename, err) http.Error(w, err.Error(), http.StatusInternalServerError) @@ -243,11 +442,22 @@ func refreshDir( if hashAlgo != HashAlgoSHA256 { hashAlgo = HashAlgoSHA256 digest = hasherSHA256.Sum(nil) - for _, algo := range knownHashAlgos[1:] { + for _, algo := range KnownHashAlgos[1:] { os.Remove(path + "." + algo) } } } + if mtimeExists { + stat, err := os.Stat(path) + if err == nil && !stat.ModTime().Truncate(time.Second).Equal(mtime) { + log.Println(r.RemoteAddr, "pypi", filename, "touch") + if err = os.Chtimes(path, mtime, mtime); err != nil { + log.Println("error", r.RemoteAddr, "pypi", filename, err) + http.Error(w, err.Error(), http.StatusInternalServerError) + } + } + } + if filename == filenameGet || gpgUpdate { if _, err = os.Stat(path); err != nil { goto GPGSigSkip @@ -269,26 +479,38 @@ func refreshDir( log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "non PGP") goto GPGSigSkip } - if err = WriteFileSync(dirPath, path+GPGSigExt, sig); err != nil { + if err = WriteFileSync(dirPath, path+GPGSigExt, sig, mtime); err != nil { log.Println("error", r.RemoteAddr, "pypi", filename+GPGSigExt, err) http.Error(w, err.Error(), http.StatusInternalServerError) return false } log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "downloaded") } + if mtimeExists { + stat, err := os.Stat(path + GPGSigExt) + if err == nil && !stat.ModTime().Truncate(time.Second).Equal(mtime) { + log.Println(r.RemoteAddr, "pypi", filename+GPGSigExt, "touch") + if err = os.Chtimes(path+GPGSigExt, mtime, mtime); err != nil { + log.Println("error", r.RemoteAddr, "pypi", filename, err) + http.Error(w, err.Error(), http.StatusInternalServerError) + } + } + } + GPGSigSkip: path = path + "." + hashAlgo - _, err = os.Stat(path) - if err == nil { + stat, err := os.Stat(path) + if err == nil && + (mtimeExists && stat.ModTime().Truncate(time.Second).Equal(mtime)) { continue } - if !os.IsNotExist(err) { + if err != nil && !os.IsNotExist(err) { log.Println("error", r.RemoteAddr, "pypi", path, err) http.Error(w, err.Error(), http.StatusInternalServerError) return false } log.Println(r.RemoteAddr, "pypi", filename, "touch") - if err = WriteFileSync(dirPath, path, digest); err != nil { + if err = WriteFileSync(dirPath, path, digest, mtime); err != nil { log.Println("error", r.RemoteAddr, "pypi", path, err) http.Error(w, err.Error(), http.StatusInternalServerError) return false diff --git a/ucspi.go b/ucspi.go index 8fce33d..ff15dbd 100644 --- a/ucspi.go +++ b/ucspi.go @@ -27,7 +27,7 @@ import ( ) var ( - aLongTimeAgo = time.Unix(1, 0) + ALongTimeAgo = time.Unix(1, 0) UCSPIJob sync.WaitGroup ) @@ -91,7 +91,7 @@ func (conn *UCSPIConn) SetReadDeadline(t time.Time) error { // An ugly hack to forcefully terminate pending read. // net/http calls SetReadDeadline(aLongTimeAgo), but file // descriptors are not capable to exit immediately that way. - if t.Equal(aLongTimeAgo) { + if t.Equal(ALongTimeAgo) { conn.eof <- struct{}{} } return os.Stdin.SetReadDeadline(t) diff --git a/upload.go b/upload.go index cd98df5..a72d779 100644 --- a/upload.go +++ b/upload.go @@ -29,8 +29,15 @@ import ( "net/http" "os" "path/filepath" + "regexp" + "strings" + "time" + + "go.cypherpunks.ru/recfile" ) +var NormalizationRe = regexp.MustCompilePOSIX("[-_.]+") + func serveUpload(w http.ResponseWriter, r *http.Request) { // Authentication username, password, ok := r.BasicAuth() @@ -59,8 +66,8 @@ func serveUpload(w http.ResponseWriter, r *http.Request) { http.Error(w, "single name is expected in request", http.StatusBadRequest) return } - pkgName := normalizationRe.ReplaceAllString(pkgNames[0], "-") - dirPath := filepath.Join(*root, pkgName) + pkgName := strings.ToLower(NormalizationRe.ReplaceAllString(pkgNames[0], "-")) + dirPath := filepath.Join(*Root, pkgName) var digestExpected []byte if digestExpectedHex, exists := r.MultipartForm.Value["sha256_digest"]; exists { digestExpected, err = hex.DecodeString(digestExpectedHex[0]) @@ -70,6 +77,7 @@ func serveUpload(w http.ResponseWriter, r *http.Request) { } } gpgSigsExpected := make(map[string]struct{}) + now := time.Now().UTC() // Checking is it internal package if _, err = os.Stat(filepath.Join(dirPath, InternalFlag)); err != nil { @@ -152,7 +160,7 @@ func serveUpload(w http.ResponseWriter, r *http.Request) { http.Error(w, err.Error(), http.StatusInternalServerError) return } - if err = WriteFileSync(dirPath, path+"."+HashAlgoSHA256, digest); err != nil { + if err = WriteFileSync(dirPath, path+"."+HashAlgoSHA256, digest, now); err != nil { log.Println("error", r.RemoteAddr, path+"."+HashAlgoSHA256, err) http.Error(w, err.Error(), http.StatusInternalServerError) return @@ -186,10 +194,61 @@ func serveUpload(w http.ResponseWriter, r *http.Request) { http.Error(w, err.Error(), http.StatusInternalServerError) return } - if err = WriteFileSync(dirPath, path, sig); err != nil { + if err = WriteFileSync(dirPath, path, sig, now); err != nil { log.Println("error", r.RemoteAddr, path, err) http.Error(w, err.Error(), http.StatusInternalServerError) return } } + + var buf bytes.Buffer + wr := recfile.NewWriter(&buf) + for formField, recField := range map[string]string{ + "name": MetadataFieldName, + "version": MetadataFieldVersion, + "platform": MetadataFieldPlatform, + "supported_platform": MetadataFieldSupportedPlatform, + "summary": MetadataFieldSummary, + "description": MetadataFieldDescription, + "description_content_type": MetadataFieldDescriptionContentType, + "keywords": MetadataFieldKeywords, + "home_page": MetadataFieldHomePage, + "author": MetadataFieldAuthor, + "author_email": MetadataFieldAuthorEmail, + "maintainer": MetadataFieldMaintainer, + "maintainer_email": MetadataFieldMaintainerEmail, + "license": MetadataFieldLicense, + "classifiers": MetadataFieldClassifier, + "requires_dist": MetadataFieldRequiresDist, + "requires_python": MetadataFieldRequiresPython, + "requires_external": MetadataFieldRequiresExternal, + "project_url": MetadataFieldProjectURL, + "provides_extra": MetadataFieldProvidesExtra, + } { + if vs, exists := r.MultipartForm.Value[formField]; exists { + for _, v := range vs { + lines := strings.Split(v, "\n") + if len(lines) > 1 { + _, err = wr.WriteFieldMultiline( + metadataFieldToRecField(recField), + lines, + ) + } else { + _, err = wr.WriteFields(recfile.Field{ + Name: metadataFieldToRecField(recField), + Value: lines[0], + }) + } + if err != nil { + log.Fatalln(err) + } + } + } + } + path := filepath.Join(dirPath, MetadataFile) + if err = WriteFileSync(dirPath, path, buf.Bytes(), now); err != nil { + log.Println("error", r.RemoteAddr, path, err) + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } }