/* GoCheese -- Python private package repository and caching proxy Copyright (C) 2019 Sergey Matveev 2019 Elena Balakhonova This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 3 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ // Python private package repository and caching proxy package main import ( "bufio" "bytes" "context" "crypto/md5" "crypto/sha256" "crypto/sha512" "encoding/hex" "flag" "fmt" "hash" "io" "io/ioutil" "log" "net" "net/http" "net/url" "os" "os/signal" "path/filepath" "regexp" "runtime" "strings" "syscall" "time" "golang.org/x/crypto/blake2b" "golang.org/x/net/netutil" ) const ( HTMLBegin = ` Links for %s ` HTMLEnd = " \n\n" HTMLElement = " %s
\n" InternalFlag = ".internal" GPGSigExt = ".asc" Warranty = `This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 3 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see .` ) var ( pkgPyPI = regexp.MustCompile(`^.*]*>(.+)
.*$`) normalizationRe = regexp.MustCompilePOSIX("[-_.]+") HashAlgoSHA256 = "sha256" HashAlgoBLAKE2b256 = "blake2_256" HashAlgoSHA512 = "sha512" HashAlgoMD5 = "md5" knownHashAlgos []string = []string{ HashAlgoSHA256, HashAlgoBLAKE2b256, HashAlgoSHA512, HashAlgoMD5, } root = flag.String("root", "./packages", "Path to packages directory") bind = flag.String("bind", "[::]:8080", "Address to bind to") tlsCert = flag.String("tls-cert", "", "Path to TLS X.509 certificate") tlsKey = flag.String("tls-key", "", "Path to TLS X.509 private key") norefreshURLPath = flag.String("norefresh", "/norefresh/", "Non-refreshing URL path") refreshURLPath = flag.String("refresh", "/simple/", "Auto-refreshing URL path") gpgUpdateURLPath = flag.String("gpgupdate", "/gpgupdate/", "GPG forceful refreshing URL path") pypiURL = flag.String("pypi", "https://pypi.org/simple/", "Upstream PyPI URL") passwdPath = flag.String("passwd", "passwd", "Path to file with authenticators") passwdCheck = flag.Bool("passwd-check", false, "Test the -passwd file for syntax errors and exit") fsck = flag.Bool("fsck", false, "Check integrity of all packages") maxClients = flag.Int("maxclients", 128, "Maximal amount of simultaneous clients") version = flag.Bool("version", false, "Print version information") warranty = flag.Bool("warranty", false, "Print warranty information") Version string = "UNKNOWN" killed bool pypiURLParsed *url.URL ) func mkdirForPkg(w http.ResponseWriter, r *http.Request, dir string) bool { path := filepath.Join(*root, dir) if _, err := os.Stat(path); os.IsNotExist(err) { if err = os.Mkdir(path, os.FileMode(0777)); err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return false } log.Println(r.RemoteAddr, "mkdir", dir) } return true } func blake2b256New() hash.Hash { h, err := blake2b.New256(nil) if err != nil { panic(err) } return h } func refreshDir( w http.ResponseWriter, r *http.Request, dir, filenameGet string, gpgUpdate bool, ) bool { if _, err := os.Stat(filepath.Join(*root, dir, InternalFlag)); err == nil { return true } resp, err := http.Get(*pypiURL + dir + "/") if err != nil { http.Error(w, err.Error(), http.StatusBadGateway) return false } body, err := ioutil.ReadAll(resp.Body) resp.Body.Close() if err != nil { http.Error(w, err.Error(), http.StatusBadGateway) return false } if !mkdirForPkg(w, r, dir) { return false } dirPath := filepath.Join(*root, dir) for _, lineRaw := range bytes.Split(body, []byte("\n")) { submatches := pkgPyPI.FindStringSubmatch(string(lineRaw)) if len(submatches) == 0 { continue } uri := submatches[1] filename := submatches[2] pkgURL, err := url.Parse(uri) if err != nil { http.Error(w, err.Error(), http.StatusBadGateway) return false } if pkgURL.Fragment == "" { log.Println(r.RemoteAddr, "pypi", filename, "no digest provided") http.Error(w, "no digest provided", http.StatusBadGateway) return false } digestInfo := strings.Split(pkgURL.Fragment, "=") if len(digestInfo) == 1 { // Ancient non PEP-0503 PyPIs, assume MD5 digestInfo = []string{"md5", digestInfo[0]} } else if len(digestInfo) != 2 { log.Println(r.RemoteAddr, "pypi", filename, "invalid digest provided") http.Error(w, "invalid digest provided", http.StatusBadGateway) return false } digest, err := hex.DecodeString(digestInfo[1]) if err != nil { http.Error(w, err.Error(), http.StatusBadGateway) return false } hashAlgo := digestInfo[0] var hasherNew func() hash.Hash var hashSize int switch hashAlgo { case HashAlgoMD5: hasherNew = md5.New hashSize = md5.Size case HashAlgoSHA256: hasherNew = sha256.New hashSize = sha256.Size case HashAlgoSHA512: hasherNew = sha512.New hashSize = sha512.Size case HashAlgoBLAKE2b256: hasherNew = blake2b256New hashSize = blake2b.Size256 default: log.Println( r.RemoteAddr, "pypi", filename, "unknown digest algorithm", hashAlgo, ) http.Error(w, "unknown digest algorithm", http.StatusBadGateway) return false } if len(digest) != hashSize { log.Println(r.RemoteAddr, "pypi", filename, "invalid digest length") http.Error(w, "invalid digest length", http.StatusBadGateway) return false } pkgURL.Fragment = "" if pkgURL.Host == "" { uri = pypiURLParsed.ResolveReference(pkgURL).String() } else { uri = pkgURL.String() } path := filepath.Join(dirPath, filename) if filename == filenameGet { if killed { // Skip heavy remote call, when shutting down http.Error(w, "shutting down", http.StatusInternalServerError) return false } log.Println(r.RemoteAddr, "pypi download", filename) resp, err = http.Get(uri) if err != nil { log.Println(r.RemoteAddr, "pypi download error:", err.Error()) http.Error(w, err.Error(), http.StatusBadGateway) return false } defer resp.Body.Close() hasher := hasherNew() hasherSHA256 := sha256.New() dst, err := TempFile(dirPath) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return false } dstBuf := bufio.NewWriter(dst) wrs := []io.Writer{hasher, dstBuf} if hashAlgo != HashAlgoSHA256 { wrs = append(wrs, hasherSHA256) } wr := io.MultiWriter(wrs...) if _, err = io.Copy(wr, resp.Body); err != nil { os.Remove(dst.Name()) dst.Close() http.Error(w, err.Error(), http.StatusInternalServerError) return false } if err = dstBuf.Flush(); err != nil { os.Remove(dst.Name()) dst.Close() http.Error(w, err.Error(), http.StatusInternalServerError) return false } if bytes.Compare(hasher.Sum(nil), digest) != 0 { log.Println(r.RemoteAddr, "pypi", filename, "digest mismatch") os.Remove(dst.Name()) dst.Close() http.Error(w, "digest mismatch", http.StatusBadGateway) return false } if err = dst.Sync(); err != nil { os.Remove(dst.Name()) dst.Close() http.Error(w, err.Error(), http.StatusInternalServerError) return false } if err = dst.Close(); err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return false } if err = os.Rename(dst.Name(), path); err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return false } if err = DirSync(dirPath); err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return false } if hashAlgo != HashAlgoSHA256 { hashAlgo = HashAlgoSHA256 digest = hasherSHA256.Sum(nil) for _, algo := range knownHashAlgos[1:] { os.Remove(path + "." + algo) } } } if filename == filenameGet || gpgUpdate { if _, err = os.Stat(path); err != nil { goto GPGSigSkip } resp, err := http.Get(uri + GPGSigExt) if err != nil { goto GPGSigSkip } if resp.StatusCode != http.StatusOK { resp.Body.Close() goto GPGSigSkip } sig, err := ioutil.ReadAll(resp.Body) resp.Body.Close() if err != nil { goto GPGSigSkip } if !bytes.HasPrefix(sig, []byte("-----BEGIN PGP SIGNATURE-----")) { log.Println(r.RemoteAddr, "pypi non PGP signature", filename) goto GPGSigSkip } if err = WriteFileSync(dirPath, path+GPGSigExt, sig); err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return false } log.Println(r.RemoteAddr, "pypi downloaded signature", filename) } GPGSigSkip: path = path + "." + hashAlgo _, err = os.Stat(path) if err == nil { continue } if !os.IsNotExist(err) { http.Error(w, err.Error(), http.StatusInternalServerError) return false } log.Println(r.RemoteAddr, "pypi touch", filename) if err = WriteFileSync(dirPath, path, digest); err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return false } } return true } func listRoot(w http.ResponseWriter, r *http.Request) { files, err := ioutil.ReadDir(*root) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } var result bytes.Buffer result.WriteString(fmt.Sprintf(HTMLBegin, "root")) for _, file := range files { if file.Mode().IsDir() { result.WriteString(fmt.Sprintf( HTMLElement, *refreshURLPath+file.Name()+"/", file.Name(), )) } } result.WriteString(HTMLEnd) w.Write(result.Bytes()) } func listDir( w http.ResponseWriter, r *http.Request, dir string, autorefresh, gpgUpdate bool, ) { dirPath := filepath.Join(*root, dir) if autorefresh { if !refreshDir(w, r, dir, "", gpgUpdate) { return } } else if _, err := os.Stat(dirPath); os.IsNotExist(err) && !refreshDir(w, r, dir, "", false) { return } fis, err := ioutil.ReadDir(dirPath) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } files := make(map[string]struct{}, len(fis)/2) for _, fi := range fis { files[fi.Name()] = struct{}{} } var result bytes.Buffer result.WriteString(fmt.Sprintf(HTMLBegin, dir)) for _, algo := range knownHashAlgos { for fn, _ := range files { if killed { // Skip expensive I/O when shutting down http.Error(w, "shutting down", http.StatusInternalServerError) return } if !strings.HasSuffix(fn, "."+algo) { continue } delete(files, fn) digest, err := ioutil.ReadFile(filepath.Join(dirPath, fn)) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } fnClean := strings.TrimSuffix(fn, "."+algo) delete(files, fnClean) gpgSigAttr := "" if _, err = os.Stat(filepath.Join(dirPath, fnClean+GPGSigExt)); err == nil { gpgSigAttr = " data-gpg-sig=true" delete(files, fnClean+GPGSigExt) } result.WriteString(fmt.Sprintf( HTMLElement, strings.Join([]string{ *refreshURLPath, dir, "/", fnClean, "#", algo, "=", hex.EncodeToString(digest), }, ""), gpgSigAttr, fnClean, )) } } result.WriteString(HTMLEnd) w.Write(result.Bytes()) } func servePkg(w http.ResponseWriter, r *http.Request, dir, filename string) { log.Println(r.RemoteAddr, "get", filename) path := filepath.Join(*root, dir, filename) if _, err := os.Stat(path); os.IsNotExist(err) { if !refreshDir(w, r, dir, filename, false) { return } } http.ServeFile(w, r, path) } func serveUpload(w http.ResponseWriter, r *http.Request) { // Authentication username, password, ok := r.BasicAuth() if !ok { log.Println(r.RemoteAddr, "unauthenticated", username) http.Error(w, "unauthenticated", http.StatusUnauthorized) return } auther, ok := passwords[username] if !ok || !auther.Auth(password) { log.Println(r.RemoteAddr, "unauthenticated", username) http.Error(w, "unauthenticated", http.StatusUnauthorized) return } // Form parsing var err error if err = r.ParseMultipartForm(1 << 20); err != nil { http.Error(w, err.Error(), http.StatusBadRequest) return } pkgNames, exists := r.MultipartForm.Value["name"] if !exists || len(pkgNames) != 1 { http.Error(w, "single name is expected in request", http.StatusBadRequest) return } pkgName := normalizationRe.ReplaceAllString(pkgNames[0], "-") dirPath := filepath.Join(*root, pkgName) var digestExpected []byte if digestExpectedHex, exists := r.MultipartForm.Value["sha256_digest"]; exists { digestExpected, err = hex.DecodeString(digestExpectedHex[0]) if err != nil { http.Error(w, "bad sha256_digest: "+err.Error(), http.StatusBadRequest) return } } gpgSigsExpected := make(map[string]struct{}) // Checking is it internal package if _, err = os.Stat(filepath.Join(dirPath, InternalFlag)); err != nil { log.Println(r.RemoteAddr, "non-internal package", pkgName) http.Error(w, "unknown internal package", http.StatusUnauthorized) return } for _, file := range r.MultipartForm.File["content"] { filename := file.Filename gpgSigsExpected[filename+GPGSigExt] = struct{}{} log.Println(r.RemoteAddr, "put", filename, "by", username) path := filepath.Join(dirPath, filename) if _, err = os.Stat(path); err == nil { log.Println(r.RemoteAddr, "already exists", filename) http.Error(w, "already exists", http.StatusBadRequest) return } if !mkdirForPkg(w, r, pkgName) { return } src, err := file.Open() defer src.Close() if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } dst, err := TempFile(dirPath) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } dstBuf := bufio.NewWriter(dst) hasher := sha256.New() wr := io.MultiWriter(hasher, dst) if _, err = io.Copy(wr, src); err != nil { os.Remove(dst.Name()) dst.Close() http.Error(w, err.Error(), http.StatusInternalServerError) return } if err = dstBuf.Flush(); err != nil { os.Remove(dst.Name()) dst.Close() http.Error(w, err.Error(), http.StatusInternalServerError) return } if err = dst.Sync(); err != nil { os.Remove(dst.Name()) dst.Close() http.Error(w, err.Error(), http.StatusInternalServerError) return } dst.Close() digest := hasher.Sum(nil) if digestExpected != nil { if bytes.Compare(digestExpected, digest) == 0 { log.Println(r.RemoteAddr, filename, "good checksum received") } else { log.Println(r.RemoteAddr, filename, "bad checksum received") http.Error(w, "bad checksum", http.StatusBadRequest) os.Remove(dst.Name()) return } } if err = os.Rename(dst.Name(), path); err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } if err = DirSync(dirPath); err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } if err = WriteFileSync(dirPath, path+"."+HashAlgoSHA256, digest); err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } } for _, file := range r.MultipartForm.File["gpg_signature"] { filename := file.Filename if _, exists := gpgSigsExpected[filename]; !exists { http.Error(w, "unexpected GPG signature filename", http.StatusBadRequest) return } delete(gpgSigsExpected, filename) log.Println(r.RemoteAddr, "put", filename, "by", username) path := filepath.Join(dirPath, filename) if _, err = os.Stat(path); err == nil { log.Println(r.RemoteAddr, "already exists", filename) http.Error(w, "already exists", http.StatusBadRequest) return } src, err := file.Open() if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } sig, err := ioutil.ReadAll(src) src.Close() if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } if err = WriteFileSync(dirPath, path, sig); err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } } } func handler(w http.ResponseWriter, r *http.Request) { switch r.Method { case "GET": var path string var autorefresh bool var gpgUpdate bool if strings.HasPrefix(r.URL.Path, *norefreshURLPath) { path = strings.TrimPrefix(r.URL.Path, *norefreshURLPath) } else if strings.HasPrefix(r.URL.Path, *refreshURLPath) { path = strings.TrimPrefix(r.URL.Path, *refreshURLPath) autorefresh = true } else if strings.HasPrefix(r.URL.Path, *gpgUpdateURLPath) { path = strings.TrimPrefix(r.URL.Path, *gpgUpdateURLPath) autorefresh = true gpgUpdate = true } else { http.Error(w, "unknown action", http.StatusBadRequest) return } parts := strings.Split(strings.TrimSuffix(path, "/"), "/") if len(parts) > 2 { http.Error(w, "invalid path", http.StatusBadRequest) return } if len(parts) == 1 { if parts[0] == "" { listRoot(w, r) } else { listDir(w, r, parts[0], autorefresh, gpgUpdate) } } else { servePkg(w, r, parts[0], parts[1]) } case "POST": serveUpload(w, r) default: http.Error(w, "unknown action", http.StatusBadRequest) } } func main() { flag.Parse() if *warranty { fmt.Println(Warranty) return } if *version { fmt.Println("GoCheese version " + Version + " built with " + runtime.Version()) return } if *fsck { if !goodIntegrity() { os.Exit(1) } return } if *passwdCheck { refreshPasswd() return } if (*tlsCert != "" && *tlsKey == "") || (*tlsCert == "" && *tlsKey != "") { log.Fatalln("Both -tls-cert and -tls-key are required") } var err error pypiURLParsed, err = url.Parse(*pypiURL) if err != nil { log.Fatalln(err) } refreshPasswd() log.Println("root:", *root, "bind:", *bind) ln, err := net.Listen("tcp", *bind) if err != nil { log.Fatal(err) } ln = netutil.LimitListener(ln, *maxClients) server := &http.Server{ ReadTimeout: time.Minute, WriteTimeout: time.Minute, } http.HandleFunc(*norefreshURLPath, handler) http.HandleFunc(*refreshURLPath, handler) if *gpgUpdateURLPath != "" { http.HandleFunc(*gpgUpdateURLPath, handler) } needsRefreshPasswd := make(chan os.Signal, 0) needsShutdown := make(chan os.Signal, 0) exitErr := make(chan error, 0) signal.Notify(needsRefreshPasswd, syscall.SIGHUP) signal.Notify(needsShutdown, syscall.SIGTERM, syscall.SIGINT) go func() { for range needsRefreshPasswd { log.Println("Refreshing passwords") refreshPasswd() } }() go func(s *http.Server) { <-needsShutdown killed = true log.Println("Shutting down") ctx, cancel := context.WithTimeout(context.TODO(), time.Minute) exitErr <- s.Shutdown(ctx) cancel() }(server) if *tlsCert == "" { err = server.Serve(ln) } else { err = server.ServeTLS(ln, *tlsCert, *tlsKey) } if err != http.ErrServerClosed { log.Fatal(err) } if err := <-exitErr; err != nil { log.Fatal(err) } }