From: Sergey Matveev Date: Wed, 4 Oct 2023 12:31:42 +0000 (+0300) Subject: Storage optimisations for the same often used data X-Git-Tag: v2.0.0~21 X-Git-Url: http://www.git.cypherpunks.ru/?p=goredo.git;a=commitdiff_plain;h=0db0e21921706ba645c229e809fd3668542dcaae Storage optimisations for the same often used data --- diff --git a/dep.go b/dep.go index e587e6a..94daa2a 100644 --- a/dep.go +++ b/dep.go @@ -38,6 +38,8 @@ var ( DepCwd string ErrBadRecFormat = errors.New("invalid format of .rec") + InodeCache = make(map[string][]*Inode) + HashCache = make(map[string][]string) ) func recfileWrite(fdDep io.StringWriter, fields ...recfile.Field) error { @@ -70,23 +72,23 @@ func stamp(fdDep, src *os.File) error { if err != nil { return err } - tracef(CDebug, "stamp: %s <- %s", fdDep.Name(), hex.EncodeToString(hsh)) + tracef(CDebug, "stamp: %s <- %s", fdDep.Name(), hex.EncodeToString([]byte(hsh))) return recfileWrite( fdDep, recfile.Field{Name: "Type", Value: DepTypeStamp}, - recfile.Field{Name: "Hash", Value: hex.EncodeToString(hsh)}, + recfile.Field{Name: "Hash", Value: hex.EncodeToString([]byte(hsh))}, ) } -func fileHash(fd *os.File) ([]byte, error) { +func fileHash(fd *os.File) (string, error) { h := blake3.New(32, nil) if _, err := io.Copy(h, bufio.NewReader(fd)); err != nil { - return nil, err + return "", err } - return h.Sum(nil), nil + return string(h.Sum(nil)), nil } -func depWrite(fdDep *os.File, cwd string, tgt *Tgt, hsh []byte) error { +func depWrite(fdDep *os.File, cwd string, tgt *Tgt, hsh string) error { tracef(CDebug, "ifchange: %s <- %s", fdDep.Name(), tgt) fd, err := os.Open(tgt.a) if err != nil { @@ -100,7 +102,7 @@ func depWrite(fdDep *os.File, cwd string, tgt *Tgt, hsh []byte) error { if isDir { return nil } - if hsh == nil { + if hsh == "" { hsh, err = fileHash(fd) if err != nil { return ErrLine(err) @@ -109,7 +111,7 @@ func depWrite(fdDep *os.File, cwd string, tgt *Tgt, hsh []byte) error { fields := []recfile.Field{ {Name: "Type", Value: DepTypeIfchange}, {Name: "Target", Value: tgt.RelTo(cwd)}, - {Name: "Hash", Value: hex.EncodeToString(hsh)}, + {Name: "Hash", Value: hex.EncodeToString([]byte(hsh))}, } fields = append(fields, inode.RecfileFields()...) return recfileWrite(fdDep, fields...) @@ -129,7 +131,7 @@ func depsWrite(fdDep *os.File, tgts []*Tgt) error { } tgtDir := path.Join(cwd, DirPrefix) if _, errStat := os.Stat(tgt.a); errStat == nil { - err = ErrLine(depWrite(fdDep, tgtDir, tgt, nil)) + err = ErrLine(depWrite(fdDep, tgtDir, tgt, "")) } else { tgtRel := tgt.RelTo(tgtDir) tracef(CDebug, "ifchange: %s <- %s (non-existing)", fdDep.Name(), tgtRel) @@ -151,13 +153,13 @@ func depsWrite(fdDep *os.File, tgts []*Tgt) error { type DepInfoIfchange struct { tgt *Tgt inode *Inode - hash []byte + hash string } type DepInfo struct { build string always bool - stamp []byte + stamp string ifcreates []*Tgt ifchanges []DepInfoIfchange } @@ -207,8 +209,8 @@ func depRead(tgt *Tgt) (*DepInfo, error) { depInfo.ifcreates = append(depInfo.ifcreates, NewTgt(path.Join(tgt.h, dep))) case DepTypeIfchange: - dep := m["Target"] - if dep == "" { + depRaw := m["Target"] + if depRaw == "" { return nil, ErrBadRecFormat } inode, err := inodeFromRec(m) @@ -216,16 +218,42 @@ func depRead(tgt *Tgt) (*DepInfo, error) { log.Print(err) return nil, ErrBadRecFormat } - hsh := mustHexDecode(m["Hash"]) + dep := NewTgt(path.Join(tgt.h, depRaw)) + + cachedFound := false + for _, cachedInode := range InodeCache[dep.a] { + if inode.Equals(cachedInode) { + inode = cachedInode + cachedFound = true + break + } + } + if InodeCache != nil && !cachedFound { + InodeCache[dep.a] = append(InodeCache[dep.a], inode) + } + + hsh := string(mustHexDecode(m["Hash"])) + cachedFound = false + for _, cachedHash := range HashCache[dep.a] { + if hsh == cachedHash { + hsh = cachedHash + cachedFound = true + break + } + } + if HashCache != nil && !cachedFound { + HashCache[dep.a] = append(HashCache[dep.a], hsh) + } + depInfo.ifchanges = append(depInfo.ifchanges, DepInfoIfchange{ - tgt: NewTgt(path.Join(tgt.h, dep)), inode: inode, hash: hsh, + tgt: dep, inode: inode, hash: hsh, }) case DepTypeStamp: hsh := m["Hash"] if hsh == "" { return nil, ErrBadRecFormat } - depInfo.stamp = mustHexDecode(hsh) + depInfo.stamp = string(mustHexDecode(hsh)) default: return nil, ErrBadRecFormat } diff --git a/depfix.go b/depfix.go index bf04879..87ecd88 100644 --- a/depfix.go +++ b/depfix.go @@ -18,7 +18,6 @@ along with this program. If not, see . package main import ( - "bytes" "encoding/hex" "errors" "io" @@ -150,7 +149,7 @@ func depFix(root string) error { if err != nil { return ErrLine(err) } - if !bytes.Equal(hsh, theirHsh) { + if hsh != string(theirHsh) { tracef( CDebug, "depfix: %s/%s -> %s: hash differs", root, entry.Name(), dep, @@ -160,7 +159,7 @@ func depFix(root string) error { fields = []recfile.Field{ {Name: "Type", Value: DepTypeIfchange}, {Name: "Target", Value: dep}, - {Name: "Hash", Value: hex.EncodeToString(hsh)}, + {Name: "Hash", Value: hex.EncodeToString([]byte(hsh))}, } fields = append(fields, inode.RecfileFields()...) fieldses[len(fieldses)-1] = fields diff --git a/ifchange.go b/ifchange.go index 9f7f4b5..c905371 100644 --- a/ifchange.go +++ b/ifchange.go @@ -35,6 +35,7 @@ func collectDeps( if err != nil { return nil } + DepInfoCache[tgt.Dep()] = depInfo seen[tgt.a] = struct{}{} var alwayses []*Tgt returnReady := false @@ -85,7 +86,11 @@ func buildDependants(tgts []*Tgt) map[string]*Tgt { } } } + InodeCache = make(map[string][]*Inode) } + TgtCache = nil + HashCache = nil + InodeCache = nil if len(seen) == 0 { return seen } diff --git a/ood.go b/ood.go index dffaa4e..9a70be4 100644 --- a/ood.go +++ b/ood.go @@ -20,7 +20,6 @@ along with this program. If not, see . package main import ( - "bytes" "errors" "fmt" "io" @@ -49,6 +48,7 @@ var ( OODCache = make(map[string]bool) FileExistsCache = make(map[string]bool) + DepInfoCache = make(map[string]*DepInfo) ErrMissingTarget = errors.New("invalid format of .rec: missing Target") ) @@ -105,22 +105,27 @@ func isOOD(tgt *Tgt, level int, seen map[string]*Tgt) (bool, error) { tracef(CDebug, "ood: %s%s -> cached: %v", indent, tgt, ood) return ood, nil } - depInfo, err := depRead(tgt) - if err != nil { - if errors.Is(err, fs.ErrNotExist) { - if isSrc(tgt) { - ood = false - tracef(CDebug, "ood: %s%s -> is source", indent, tgt) - } else { - ood = true - tracef(CDebug, "ood: %s%s -> no dep: %s", indent, tgt, tgt.Dep()) - } - OODCache[tgt.a] = ood - return ood, nil - } + depInfo := DepInfoCache[tgt.Dep()] + var err error + if depInfo == nil { + depInfo, err = depRead(tgt) if err != nil { - return true, TgtError{tgt, ErrLine(err)} + if errors.Is(err, fs.ErrNotExist) { + if isSrc(tgt) { + ood = false + tracef(CDebug, "ood: %s%s -> is source", indent, tgt) + } else { + ood = true + tracef(CDebug, "ood: %s%s -> no dep: %s", indent, tgt, tgt.Dep()) + } + OODCache[tgt.a] = ood + return ood, nil + } + if err != nil { + return true, TgtError{tgt, ErrLine(err)} + } } + DepInfoCache[tgt.Dep()] = depInfo } if depInfo.build == BuildUUID { @@ -183,7 +188,7 @@ func isOOD(tgt *Tgt, level int, seen map[string]*Tgt) (bool, error) { if err != nil { return ood, TgtError{tgt, ErrLine(err)} } - if !bytes.Equal(dep.hash, hsh) { + if dep.hash != hsh { tracef(CDebug, "ood: %s%s -> %s: hash differs", indent, tgt, dep.tgt) ood = true OODCache[dep.tgt.a] = ood diff --git a/path.go b/path.go index 0c96196..e7f3359 100644 --- a/path.go +++ b/path.go @@ -5,6 +5,8 @@ import ( "path/filepath" ) +var TgtCache = make(map[string]*Tgt) + func mustAbs(pth string) string { pth, err := filepath.Abs(pth) if err != nil { @@ -40,12 +42,21 @@ type Tgt struct { } func NewTgt(tgt string) *Tgt { - t := Tgt{a: mustAbs(tgt)} + a := mustAbs(tgt) + if TgtCache != nil { + if t := TgtCache[a]; t != nil { + return t + } + } + t := Tgt{a: a} t.h, t.t = path.Split(t.a) if len(t.h) > 1 { t.h = t.h[:len(t.h)-1] } t.rel = mustRel(Cwd, t.a) + if TgtCache != nil { + TgtCache[a] = &t + } return &t } diff --git a/run.go b/run.go index 903a063..7a6ac37 100644 --- a/run.go +++ b/run.go @@ -21,7 +21,6 @@ package main import ( "bufio" - "bytes" "crypto/rand" "encoding/hex" "errors" @@ -134,7 +133,7 @@ func mkdirs(pth string) error { } func isModified(depInfo *DepInfo, tgt *Tgt) ( - modified bool, ourInode *Inode, hshPrev []byte, err error, + modified bool, ourInode *Inode, hshPrev string, err error, ) { if depInfo == nil { return @@ -355,7 +354,7 @@ func runScript(tgt *Tgt, errs chan error, forced, traced bool) error { } } - if err = depWrite(fdDep, tgt.h, doFile, nil); err != nil { + if err = depWrite(fdDep, tgt.h, doFile, ""); err != nil { cleanup() return TgtError{tgt, ErrLine(err)} } @@ -722,8 +721,8 @@ func runScript(tgt *Tgt, errs chan error, forced, traced bool) error { goto Finish } } else { - var hsh []byte - if hshPrev != nil { + var hsh string + if hshPrev != "" { _, err = fd.Seek(0, io.SeekStart) if err != nil { err = ErrLine(err) @@ -734,7 +733,7 @@ func runScript(tgt *Tgt, errs chan error, forced, traced bool) error { err = ErrLine(err) goto Finish } - if bytes.Equal(hsh, hshPrev) { + if hsh == hshPrev { tracef(CDebug, "%s has same hash, not renaming", tgt) err = ErrLine(os.Remove(fd.Name())) if err != nil {