From 317438ff2d0e1d2ee397a89bf692350df068a723 Mon Sep 17 00:00:00 2001 From: Sergey Matveev Date: Sat, 30 Sep 2023 15:22:32 +0300 Subject: [PATCH] Optimise memory storage of dependency information --- dep.go | 60 +++++++++++++++++++++--------- depfix.go | 8 ++-- ifchange.go | 22 ++++------- ood.go | 95 ++++++++++++++++++++++------------------------- run.go | 105 +++++++++++++++++++++++----------------------------- sources.go | 17 +++------ 6 files changed, 153 insertions(+), 154 deletions(-) diff --git a/dep.go b/dep.go index 4024519..0add0a7 100644 --- a/dep.go +++ b/dep.go @@ -21,9 +21,11 @@ package main import ( "bufio" + "bytes" "encoding/hex" "errors" "io" + "log" "os" "path" @@ -64,28 +66,27 @@ func always(fdDep *os.File) error { } func stamp(fdDep, src *os.File) error { - var hsh string hsh, err := fileHash(src) if err != nil { return err } - tracef(CDebug, "stamp: %s <- %s", fdDep.Name(), hsh) + tracef(CDebug, "stamp: %s <- %s", fdDep.Name(), hex.EncodeToString(hsh)) return recfileWrite( fdDep, recfile.Field{Name: "Type", Value: DepTypeStamp}, - recfile.Field{Name: "Hash", Value: hsh}, + recfile.Field{Name: "Hash", Value: hex.EncodeToString(hsh)}, ) } -func fileHash(fd *os.File) (string, error) { +func fileHash(fd *os.File) ([]byte, error) { h := blake3.New(32, nil) if _, err := io.Copy(h, bufio.NewReader(fd)); err != nil { - return "", err + return nil, err } - return hex.EncodeToString(h.Sum(nil)), nil + return h.Sum(nil), nil } -func depWrite(fdDep *os.File, cwd, tgt, hsh string) error { +func depWrite(fdDep *os.File, cwd, tgt string, hsh []byte) error { tracef(CDebug, "ifchange: %s <- %s", fdDep.Name(), tgt) fd, err := os.Open(path.Join(cwd, tgt)) if err != nil { @@ -99,7 +100,7 @@ func depWrite(fdDep *os.File, cwd, tgt, hsh string) error { if isDir { return nil } - if hsh == "" { + if hsh == nil { hsh, err = fileHash(fd) if err != nil { return ErrLine(err) @@ -108,7 +109,7 @@ func depWrite(fdDep *os.File, cwd, tgt, hsh string) error { fields := []recfile.Field{ {Name: "Type", Value: DepTypeIfchange}, {Name: "Target", Value: tgt}, - {Name: "Hash", Value: hsh}, + {Name: "Hash", Value: hex.EncodeToString(hsh)}, } fields = append(fields, inode.RecfileFields()...) return recfileWrite(fdDep, fields...) @@ -129,7 +130,7 @@ func depsWrite(fdDep *os.File, tgts []string) error { tgtDir := path.Join(cwd, DirPrefix) tgtRel := mustRel(tgtDir, tgtAbs) if _, errStat := os.Stat(tgt); errStat == nil { - err = ErrLine(depWrite(fdDep, tgtDir, tgtRel, "")) + err = ErrLine(depWrite(fdDep, tgtDir, tgtRel, nil)) } else { tracef(CDebug, "ifchange: %s <- %s (non-existing)", fdDep.Name(), tgtRel) fields := []recfile.Field{ @@ -147,18 +148,36 @@ func depsWrite(fdDep *os.File, tgts []string) error { return nil } +type DepInfoIfchange struct { + tgt string + inode *Inode + hash []byte +} + type DepInfo struct { build string always bool - stamp string + stamp []byte ifcreates []string - ifchanges []map[string]string + ifchanges []DepInfoIfchange +} + +func mustHexDecode(s string) []byte { + b, err := hex.DecodeString(s) + if err != nil { + log.Fatal(err) + } + return b } var missingBuild = errors.New(".rec missing Build:") -func depRead(fdDep io.Reader) (*DepInfo, error) { - r := recfile.NewReader(fdDep) +func depRead(pth string) (*DepInfo, error) { + data, err := os.ReadFile(pth) + if err != nil { + return nil, err + } + r := recfile.NewReader(bytes.NewReader(data)) m, err := r.NextMap() if err != nil { return nil, err @@ -187,14 +206,21 @@ func depRead(fdDep io.Reader) (*DepInfo, error) { } depInfo.ifcreates = append(depInfo.ifcreates, dep) case DepTypeIfchange: - delete(m, "Type") - depInfo.ifchanges = append(depInfo.ifchanges, m) + inode, err := inodeFromRec(m) + if err != nil { + log.Print(err) + return nil, ErrBadRecFormat + } + hsh := mustHexDecode(m["Hash"]) + depInfo.ifchanges = append(depInfo.ifchanges, DepInfoIfchange{ + tgt: m["Target"], inode: inode, hash: hsh, + }) case DepTypeStamp: hsh := m["Hash"] if hsh == "" { return nil, ErrBadRecFormat } - depInfo.stamp = hsh + depInfo.stamp = mustHexDecode(hsh) default: return nil, ErrBadRecFormat } diff --git a/depfix.go b/depfix.go index b045682..bf04879 100644 --- a/depfix.go +++ b/depfix.go @@ -18,6 +18,8 @@ along with this program. If not, see . package main import ( + "bytes" + "encoding/hex" "errors" "io" "io/fs" @@ -110,7 +112,7 @@ func depFix(root string) error { if err != nil { return ErrLine(err) } - theirHsh := m["Hash"] + theirHsh := mustHexDecode(m["Hash"]) fd, err := os.Open(path.Join(root, dep)) if err != nil { if errors.Is(err, fs.ErrNotExist) { @@ -148,7 +150,7 @@ func depFix(root string) error { if err != nil { return ErrLine(err) } - if hsh != theirHsh { + if !bytes.Equal(hsh, theirHsh) { tracef( CDebug, "depfix: %s/%s -> %s: hash differs", root, entry.Name(), dep, @@ -158,7 +160,7 @@ func depFix(root string) error { fields = []recfile.Field{ {Name: "Type", Value: DepTypeIfchange}, {Name: "Target", Value: dep}, - {Name: "Hash", Value: hsh}, + {Name: "Hash", Value: hex.EncodeToString(hsh)}, } fields = append(fields, inode.RecfileFields()...) fieldses[len(fieldses)-1] = fields diff --git a/ifchange.go b/ifchange.go index 0b8d694..84bd260 100644 --- a/ifchange.go +++ b/ifchange.go @@ -18,7 +18,6 @@ along with this program. If not, see . package main import ( - "os" "path" "strings" ) @@ -36,15 +35,11 @@ func collectDeps( return nil } depPath := path.Join(cwd, RedoDir, tgt+DepSuffix) - fdDep, err := os.Open(depPath) - if err != nil { - return nil - } - depInfo, err := depRead(fdDep) - fdDep.Close() + depInfo, err := depRead(depPath) if err != nil { return nil } + // DepInfoCache[depPath] = depInfo seen[tgtFull] = struct{}{} var alwayses []string returnReady := false @@ -62,19 +57,18 @@ func collectDeps( returnReady = true } } - for _, m := range depInfo.ifchanges { - dep := m["Target"] - if dep == "" { + for _, dep := range depInfo.ifchanges { + if dep.tgt == "" { return alwayses } - if dep == tgt { + if dep.tgt == tgt { continue } - if !includeSrc && isSrc(cwd, dep) { + if !includeSrc && isSrc(cwd, dep.tgt) { continue } if !returnReady { - depRel := cwdMustRel(cwd, dep) + depRel := cwdMustRel(cwd, dep.tgt) if m, ok := deps[depRel]; ok { m[tgtRel] = struct{}{} } else { @@ -83,7 +77,7 @@ func collectDeps( deps[depRel] = m } alwayses = append(alwayses, - collectDeps(cwd, dep, level+1, deps, includeSrc, seen)...) + collectDeps(cwd, dep.tgt, level+1, deps, includeSrc, seen)...) } } return alwayses diff --git a/ood.go b/ood.go index 4a284c6..3843fa1 100644 --- a/ood.go +++ b/ood.go @@ -20,6 +20,7 @@ along with this program. If not, see . package main import ( + "bytes" "errors" "fmt" "io" @@ -110,22 +111,22 @@ func isOOD(cwd, tgtOrig string, level int, seen map[string]struct{}) (bool, erro return ood, nil } depPath := path.Join(cwd, RedoDir, tgt+DepSuffix) - fdDep, err := os.Open(depPath) + depInfo, err := depRead(depPath) if err != nil { - if isSrc(cwd, tgt) { - ood = false - tracef(CDebug, "ood: %s%s -> is source", indent, tgtOrig) - } else { - ood = true - tracef(CDebug, "ood: %s%s -> no dep: %s", indent, tgtOrig, depPath) + if errors.Is(err, fs.ErrNotExist) { + if isSrc(cwd, tgt) { + ood = false + tracef(CDebug, "ood: %s%s -> is source", indent, tgtOrig) + } else { + ood = true + tracef(CDebug, "ood: %s%s -> no dep: %s", indent, tgtOrig, depPath) + } + OODCache[path.Join(cwd, tgt)] = ood + return ood, nil + } + if err != nil { + return true, TgtError{tgtOrig, ErrLine(err)} } - OODCache[path.Join(cwd, tgt)] = ood - return ood, nil - } - depInfo, err := depRead(fdDep) - fdDep.Close() - if err != nil { - return true, TgtError{tgtOrig, ErrLine(err)} } if depInfo.build == BuildUUID { @@ -147,48 +148,42 @@ func isOOD(cwd, tgtOrig string, level int, seen map[string]struct{}) (bool, erro } } - for _, m := range depInfo.ifchanges { - dep := m["Target"] - if dep == "" { + for _, dep := range depInfo.ifchanges { + if dep.tgt == "" { return ood, TgtError{tgtOrig, ErrMissingTarget} } - theirInode, err := inodeFromRec(m) - if err != nil { - return ood, TgtError{tgtOrig, fmt.Errorf("invalid format of .rec: %w", err)} - } - theirHsh := m["Hash"] - tracef(CDebug, "ood: %s%s -> %s: checking", indent, tgtOrig, dep) - ood, cached = OODCache[path.Join(cwd, dep)] + tracef(CDebug, "ood: %s%s -> %s: checking", indent, tgtOrig, dep.tgt) + ood, cached = OODCache[path.Join(cwd, dep.tgt)] if cached { - tracef(CDebug, "ood: %s%s -> %s: cached: %v", indent, tgtOrig, dep, ood) + tracef(CDebug, "ood: %s%s -> %s: cached: %v", indent, tgtOrig, dep.tgt, ood) if ood { goto Done } continue } - inode, err := inodeFromFileByPath(path.Join(cwd, dep)) + inode, err := inodeFromFileByPath(path.Join(cwd, dep.tgt)) if err != nil { if errors.Is(err, fs.ErrNotExist) { - tracef(CDebug, "ood: %s%s -> %s: not exists", indent, tgtOrig, dep) + tracef(CDebug, "ood: %s%s -> %s: not exists", indent, tgtOrig, dep.tgt) ood = true - OODCache[path.Join(cwd, dep)] = ood + OODCache[path.Join(cwd, dep.tgt)] = ood goto Done } return ood, TgtError{tgtOrig, ErrLine(err)} } - if inode.Size != theirInode.Size { - tracef(CDebug, "ood: %s%s -> %s: size differs", indent, tgtOrig, dep) + if inode.Size != dep.inode.Size { + tracef(CDebug, "ood: %s%s -> %s: size differs", indent, tgtOrig, dep.tgt) ood = true - OODCache[path.Join(cwd, dep)] = ood + OODCache[path.Join(cwd, dep.tgt)] = ood goto Done } - if InodeTrust != InodeTrustNone && inode.Equals(theirInode) { - tracef(CDebug, "ood: %s%s -> %s: same inode", indent, tgtOrig, dep) + if InodeTrust != InodeTrustNone && inode.Equals(dep.inode) { + tracef(CDebug, "ood: %s%s -> %s: same inode", indent, tgtOrig, dep.tgt) } else { - tracef(CDebug, "ood: %s%s -> %s: inode differs", indent, tgtOrig, dep) - fd, err := os.Open(path.Join(cwd, dep)) + tracef(CDebug, "ood: %s%s -> %s: inode differs", indent, tgtOrig, dep.tgt) + fd, err := os.Open(path.Join(cwd, dep.tgt)) if err != nil { return ood, TgtError{tgtOrig, ErrLine(err)} } @@ -197,41 +192,41 @@ func isOOD(cwd, tgtOrig string, level int, seen map[string]struct{}) (bool, erro if err != nil { return ood, TgtError{tgtOrig, ErrLine(err)} } - if theirHsh != hsh { - tracef(CDebug, "ood: %s%s -> %s: hash differs", indent, tgtOrig, dep) + if !bytes.Equal(dep.hash, hsh) { + tracef(CDebug, "ood: %s%s -> %s: hash differs", indent, tgtOrig, dep.tgt) ood = true - OODCache[path.Join(cwd, dep)] = ood + OODCache[path.Join(cwd, dep.tgt)] = ood goto Done } - tracef(CDebug, "ood: %s%s -> %s: same hash", indent, tgtOrig, dep) + tracef(CDebug, "ood: %s%s -> %s: same hash", indent, tgtOrig, dep.tgt) } - if dep == tgt { - tracef(CDebug, "ood: %s%s -> %s: same target", indent, tgtOrig, dep) + if dep.tgt == tgt { + tracef(CDebug, "ood: %s%s -> %s: same target", indent, tgtOrig, dep.tgt) continue } - if isSrc(cwd, dep) { - tracef(CDebug, "ood: %s%s -> %s: is source", indent, tgtOrig, dep) - OODCache[path.Join(cwd, dep)] = false + if isSrc(cwd, dep.tgt) { + tracef(CDebug, "ood: %s%s -> %s: is source", indent, tgtOrig, dep.tgt) + OODCache[path.Join(cwd, dep.tgt)] = false continue } - if _, ok := seen[cwdMustRel(cwd, dep)]; ok { - tracef(CDebug, "ood: %s%s -> %s: was always built", indent, tgtOrig, dep) - OODCache[path.Join(cwd, dep)] = false + if _, ok := seen[cwdMustRel(cwd, dep.tgt)]; ok { + tracef(CDebug, "ood: %s%s -> %s: was always built", indent, tgtOrig, dep.tgt) + OODCache[path.Join(cwd, dep.tgt)] = false continue } - depOOD, err := isOODWithTrace(cwd, dep, level+1, seen) + depOOD, err := isOODWithTrace(cwd, dep.tgt, level+1, seen) if err != nil { return ood, TgtError{tgtOrig, err} } if depOOD { - tracef(CDebug, "ood: %s%s -> %s: ood", indent, tgtOrig, dep) + tracef(CDebug, "ood: %s%s -> %s: ood", indent, tgtOrig, dep.tgt) ood = true goto Done } - tracef(CDebug, "ood: %s%s -> %s: !ood", indent, tgtOrig, dep) + tracef(CDebug, "ood: %s%s -> %s: !ood", indent, tgtOrig, dep.tgt) } Done: diff --git a/run.go b/run.go index 6bd7029..89640cb 100644 --- a/run.go +++ b/run.go @@ -21,12 +21,14 @@ package main import ( "bufio" + "bytes" "crypto/rand" "encoding/hex" "errors" "flag" "fmt" "io" + "io/fs" "log" "os" "os/exec" @@ -131,46 +133,30 @@ func mkdirs(pth string) error { return os.MkdirAll(pth, os.FileMode(0777)) } -func isModified(cwd, redoDir, tgt string) (bool, *Inode, string, error) { - fdDep, err := os.Open(path.Join(redoDir, tgt+DepSuffix)) - if err != nil { - if os.IsNotExist(err) { - return false, nil, "", nil - } - return false, nil, "", ErrLine(err) +func isModified(depInfo *DepInfo, cwd, tgt string) ( + modified bool, ourInode *Inode, hshPrev []byte, err error, +) { + if depInfo == nil { + return } - defer fdDep.Close() - r := recfile.NewReader(fdDep) - var modified bool - var ourInode *Inode - var hshPrev string - for { - m, err := r.NextMap() - if err != nil { - if errors.Is(err, io.EOF) { - break - } - return false, nil, "", ErrLine(err) - } - if m["Type"] != DepTypeIfchange || m["Target"] != tgt { + for _, dep := range depInfo.ifchanges { + if dep.tgt != tgt { continue } ourInode, err = inodeFromFileByPath(path.Join(cwd, tgt)) if err != nil { if os.IsNotExist(err) { - return false, nil, "", nil + err = nil + return } - return false, nil, "", ErrLine(err) - } - theirInode, err := inodeFromRec(m) - if err != nil { - return false, nil, "", ErrLine(err) + err = ErrLine(err) + return } - hshPrev = m["Hash"] - modified = !ourInode.Equals(theirInode) + hshPrev = dep.hash + modified = !ourInode.Equals(dep.inode) break } - return modified, ourInode, hshPrev, nil + return } func syncDir(dir string) error { @@ -276,17 +262,25 @@ func runScript(tgtOrig string, errs chan error, forced, traced bool) error { return nil } - // Check if it was already built in parallel - if !forced { - if build, err := depReadBuild(path.Join(redoDir, tgt+DepSuffix)); err == nil && build == BuildUUID { + depInfo, err := depRead(path.Join(redoDir, tgt+DepSuffix)) + if err != nil { + if errors.Is(err, fs.ErrNotExist) { + err = nil + } else { lockRelease() - errs <- nil - return nil + return TgtError{tgtOrig, err} } } + // Check if it was already built in parallel + if !forced && depInfo != nil && depInfo.build == BuildUUID { + lockRelease() + errs <- nil + return nil + } + // Check if target is not modified externally - modified, inodePrev, hshPrev, err := isModified(cwd, redoDir, tgt) + modified, inodePrev, hshPrev, err := isModified(depInfo, cwd, tgt) if err != nil { lockRelease() return TgtError{tgtOrig, err} @@ -302,6 +296,7 @@ func runScript(tgtOrig string, errs chan error, forced, traced bool) error { }() return nil } + depInfo = nil // Start preparing .rec fdDep, err := tempfile(redoDir, tgt+DepSuffix) @@ -352,7 +347,7 @@ func runScript(tgtOrig string, errs chan error, forced, traced bool) error { runErr.DoFile = doFileRelPath } - if err = depWrite(fdDep, cwdOrig, doFileRelPath, ""); err != nil { + if err = depWrite(fdDep, cwdOrig, doFileRelPath, nil); err != nil { cleanup() return TgtError{tgtOrig, ErrLine(err)} } @@ -546,14 +541,7 @@ func runScript(tgtOrig string, errs chan error, forced, traced bool) error { } w := bufio.NewWriter(fdStderr) - var depInfo *DepInfo - fdDep, err := os.Open(fdDepPath) - if err != nil { - err = ErrLine(err) - goto Err - } - depInfo, err = depRead(fdDep) - fdDep.Close() + depInfo, err := depRead(fdDepPath) if err != nil { err = ErrLine(err) goto Err @@ -561,7 +549,7 @@ func runScript(tgtOrig string, errs chan error, forced, traced bool) error { for _, dep := range depInfo.ifchanges { fields = append(fields, recfile.Field{ Name: "Ifchange", - Value: dep["Target"], + Value: dep.tgt, }) } _, err = recfile.NewWriter(w).WriteFields(fields...) @@ -728,8 +716,8 @@ func runScript(tgtOrig string, errs chan error, forced, traced bool) error { goto Finish } } else { - var hsh string - if hshPrev != "" { + var hsh []byte + if hshPrev != nil { _, err = fd.Seek(0, io.SeekStart) if err != nil { err = ErrLine(err) @@ -740,7 +728,7 @@ func runScript(tgtOrig string, errs chan error, forced, traced bool) error { err = ErrLine(err) goto Finish } - if hsh == hshPrev { + if bytes.Equal(hsh, hshPrev) { tracef(CDebug, "%s has same hash, not renaming", tgtOrig) err = ErrLine(os.Remove(fd.Name())) if err != nil { @@ -756,7 +744,7 @@ func runScript(tgtOrig string, errs chan error, forced, traced bool) error { goto Finish } } - err = ErrLine(depWrite(fdDep, cwdOrig, tgt, hshPrev)) + err = ErrLine(depWrite(fdDep, cwdOrig, tgt, hsh)) if err != nil { goto Finish } @@ -804,25 +792,24 @@ func runScript(tgtOrig string, errs chan error, forced, traced bool) error { goto Finish } } + fdDep.Close() // Post-commit .rec sanitizing - fdDep.Close() - if fdDepR, err := os.Open(fdDepPath); err == nil { - depInfo, err := depRead(fdDepR) - fdDepR.Close() - if err != nil { - err = ErrLine(err) - goto Finish - } + if depInfo, err := depRead(fdDepPath); err == nil { ifchangeSeen := make(map[string]struct{}, len(depInfo.ifchanges)) for _, dep := range depInfo.ifchanges { - ifchangeSeen[dep["Target"]] = struct{}{} + ifchangeSeen[dep.tgt] = struct{}{} } for _, dep := range depInfo.ifcreates { if _, exists := ifchangeSeen[dep]; exists { tracef(CWarn, "simultaneous ifcreate and ifchange records: %s", tgt) } } + } else if errors.Is(err, fs.ErrNotExist) { + err = nil + } else { + err = ErrLine(err) + goto Finish } Finish: diff --git a/sources.go b/sources.go index 3f375a6..95b8e4e 100644 --- a/sources.go +++ b/sources.go @@ -18,7 +18,8 @@ along with this program. If not, see . package main import ( - "os" + "errors" + "io/fs" "path" ) @@ -36,26 +37,20 @@ func sourcesWalker( continue } seenDeps[depPath] = struct{}{} - fdDep, err := os.Open(depPath) + depInfo, err := depRead(depPath) if err != nil { if errors.Is(err, fs.ErrNotExist) { continue } return ErrLine(err) } - depInfo, err := depRead(fdDep) - fdDep.Close() - if err != nil { - return ErrLine(err) - } - for _, m := range depInfo.ifchanges { - depTgt := m["Target"] - depTgtAbsPath := mustAbs(path.Join(cwd, depTgt)) + for _, dep := range depInfo.ifchanges { + depTgtAbsPath := mustAbs(path.Join(cwd, dep.tgt)) if _, ok := seen[depTgtAbsPath]; ok { continue } seen[depTgtAbsPath] = struct{}{} - if isSrc(cwd, depTgt) { + if isSrc(cwd, dep.tgt) { srcs[cwdMustRel(depTgtAbsPath)] = struct{}{} } else if depTgtAbsPath != tgtAbsPath { if err := sourcesWalker( -- 2.44.0