From 2cea0c64b5a0240bfe6746a98a5b350812edb96d Mon Sep 17 00:00:00 2001 From: Sergey Matveev Date: Sat, 20 Nov 2021 17:41:33 +0300 Subject: [PATCH] Ability to rely on mtime, instead of ctime --- dep.rec | 6 +++-- doc/cmds.texi | 6 ----- doc/faq.texi | 6 ++--- doc/features.texi | 8 +++--- doc/index.texi | 1 + doc/news.texi | 10 ++++++++ doc/ood.texi | 52 +++++++++++++++++++++++++++++++++++++++ inode.go | 62 +++++++++++++++++++++++++++++++++++++++++------ main.go | 17 ++++++++++--- makedist.sh | 1 + ood.go | 2 +- usage.go | 8 +++--- 12 files changed, 150 insertions(+), 29 deletions(-) create mode 100644 doc/ood.texi diff --git a/dep.rec b/dep.rec index 9f41434..56c32ca 100644 --- a/dep.rec +++ b/dep.rec @@ -6,10 +6,12 @@ %rec: Dependency %doc: Dependency information %mandatory: Type -%allowed: Target Size CtimeSec CtimeNsec Hash -%unique: Type Target Size CtimeSec CtimeNsec Hash +%allowed: Target Size CtimeSec CtimeNsec MtimeSec MtimeNsec Hash +%unique: Type Target Size CtimeSec CtimeNsec MtimeSec MtimeNsec Hash %type: Type enum ifcreate ifchange always stamp %type: Size int %type: CtimeSec int %type: CtimeNsec int +%type: MtimeSec int +%type: MtimeNsec int %type: Hash regexp /[0-9a-f]{64}/ diff --git a/doc/cmds.texi b/doc/cmds.texi index 144b118..465ef65 100644 --- a/doc/cmds.texi +++ b/doc/cmds.texi @@ -46,12 +46,6 @@ By default all build commands use @code{fsync} to assure data is reached the disk. You can disable its usage with @env{$REDO_NO_SYNC=1} environment variable, for speeding up the build process. -@command{goredo} determines target is out-of-date by comparing its size, -@code{ctime} and content's hash, if @code{ctime} differs. Depending on -the filesystem you use, probably you can not trust its @code{ctime} -value at all. In that case you can set @env{$REDO_INODE_NO_TRUST=1} to -forcefully verify the hash. - There are other commands that could be found in other implementations too: @table @command diff --git a/doc/faq.texi b/doc/faq.texi index 7039e2a..1e399a0 100644 --- a/doc/faq.texi +++ b/doc/faq.texi @@ -4,9 +4,9 @@ @anchor{Stamping} @section Hashing and stamping -All targets are checksummed if target's @file{ctime} differs from the -previous one, or @env{$REDO_INODE_NO_TRUST} environment variable is set. -@command{apenwarr/redo} gives +All targets are checksummed if target's size, @code{ctime}/@code{mtime} +differs from the previous one (depending on @ref{OOD, @env{$REDO_INODE_TRUST}} +value). @command{apenwarr/redo} gives @url{https://redo.readthedocs.io/en/latest/FAQImpl/#why-not-always-use-checksum-based-dependencies-instead-of-timestamps, many reasons} why every time checksumming is bad, but in my opinion in practice all of them do not apply. diff --git a/doc/features.texi b/doc/features.texi index c1eb41b..13530ae 100644 --- a/doc/features.texi +++ b/doc/features.texi @@ -21,9 +21,11 @@ implementations. codes can be saved for later investigation @item targets, dependency information and their directories are explicitly synced (can be disabled, should work faster) -@item file's change is detected by comparing its size, @code{ctime} (if - @env{$REDO_INODE_NO_TRUST} environment variable is not set) and - @url{https://github.com/BLAKE3-team/BLAKE3, BLAKE3} hash +@item file's change is detected by comparing its size, and + @url{https://github.com/BLAKE3-team/BLAKE3, BLAKE3} hash. Also as an + optimization, by default if file's @code{ctime} is same, then hash + check is skipped. Optionally you can disable that behaviour, or even + enable trust to file's @code{mtime} @item files creation is @code{umask}-friendly (unlike @code{mkstemp()} used in @command{redo-c}) @item parallel build with jobs limit, optionally in infinite mode diff --git a/doc/index.texi b/doc/index.texi index 8b88014..3aa870c 100644 --- a/doc/index.texi +++ b/doc/index.texi @@ -49,6 +49,7 @@ maillist. Announcements also go to this mailing list. @include notes.texi @include rules.texi @include cmds.texi +@include ood.texi @include logs.texi @include news.texi @include install.texi diff --git a/doc/news.texi b/doc/news.texi index d74791e..ec4da6f 100644 --- a/doc/news.texi +++ b/doc/news.texi @@ -1,6 +1,16 @@ @node News @unnumbered News +@anchor{Release 1_21_0} +@section Release 1.21.0 +@itemize +@item + @env{$REDO_INODE_NO_TRUST} is replaced with @env{$REDO_INODE_TRUST} + environment variable, that takes either @code{none}, or @code{ctime} + (the default one), or @code{mtime} (new one). Check documentation's + separate page about that option. +@end itemize + @anchor{Release 1_20_0} @section Release 1.20.0 @itemize diff --git a/doc/ood.texi b/doc/ood.texi new file mode 100644 index 0000000..dec389f --- /dev/null +++ b/doc/ood.texi @@ -0,0 +1,52 @@ +@node OOD +@unnumbered Out-of-date determination + +The main task for build system is deciding if the target is out-of-date +and needs rebuilding. The single most reliable way to do that is to +compare file's content with previously recorded one. But that is too +expensive. + +So direct content storage/comparison can be replaced with +collision-resistant hash function of enough length. @command{goredo} +uses @url{https://github.com/BLAKE3-team/BLAKE3, BLAKE3} with 256-bit +output for that purpose. + +Also it stores file's size. Obviously if size differs, then file's +content too and there is no need to read and hash it. + +But still it could be relatively expensive. So there are additional +possible checks that can skip need of hash checking, based on some trust +to the underlying filesystem and operating system behaviour, controlled +by @env{$REDO_INODE_TRUST} environment variable value: + +@table @env + +@item $REDO_INODE_TRUST=none +Do not trust filesystem at all, except for file's size knowledge. +Most reliable mode. + +@item $REDO_INODE_TRUST=ctime +Trust @code{ctime} value of file's inode. It should change every time +inode is updated. If nothing is touched and @code{ctime} is the same, +then assume that file was not modified and we do not try to read its +content. Unfortunately @code{ctime} also changes if link count is +updated and ownership, that could give false positive decision and force +file's rereading. + +@item $REDO_INODE_TRUST=mtime +Trust @code{mtime} value of file's inode. It should change every time +file's content is updated. But unfortunately there are +@url{https://apenwarr.ca/log/20181113, many reasons} it won't. + +@end table + +Pay attention that although @code{mtime} is considered harmful (link +above), and is hardly acceptable in build system like Make, because it +compares timestamps of two files, redo is satisfied only with the fact +of its changing, so badly jumping clocks are not so devastating. Modern +filesystem and operating systems with micro- and nano-seconds resolution +timestamps should be pretty good choice for @env{$REDO_INODE_TRUST=mtime}. +However GNU/Linux with @code{ext4} filesystem can easily have pretty big +granularity of 10ms. + +@command{goredo} uses @env{$REDO_INODE_TRUST=ctime} by default. diff --git a/inode.go b/inode.go index 8ec6b9e..51c97b3 100644 --- a/inode.go +++ b/inode.go @@ -28,20 +28,45 @@ import ( "golang.org/x/sys/unix" ) -const EnvInodeNoTrust = "REDO_INODE_NO_TRUST" +type InodeTrustType int -var InodeTrust = false +//go:generate stringer -type=InodeTrustType +const ( + EnvInodeTrust = "REDO_INODE_TRUST" + + InodeTrustNone InodeTrustType = iota + InodeTrustCtime + InodeTrustMtime +) + +var InodeTrust InodeTrustType type Inode struct { Size int64 CtimeSec int64 CtimeNsec int64 + MtimeSec int64 + MtimeNsec int64 } func (our *Inode) Equals(their *Inode) bool { - return (our.Size == their.Size) && - (our.CtimeSec == their.CtimeSec) && - (our.CtimeNsec == their.CtimeNsec) + if our.Size != their.Size { + return false + } + switch InodeTrust { + case InodeTrustCtime: + if our.CtimeSec != their.CtimeSec || our.CtimeNsec != their.CtimeNsec { + return false + } + case InodeTrustMtime: + if our.MtimeSec == 0 || our.MtimeNsec == 0 { + return false + } + if our.MtimeSec != their.MtimeSec || our.MtimeNsec != their.MtimeNsec { + return false + } + } + return true } func (inode *Inode) RecfileFields() []recfile.Field { @@ -49,6 +74,8 @@ func (inode *Inode) RecfileFields() []recfile.Field { {Name: "Size", Value: strconv.FormatInt(inode.Size, 10)}, {Name: "CtimeSec", Value: strconv.FormatInt(inode.CtimeSec, 10)}, {Name: "CtimeNsec", Value: strconv.FormatInt(inode.CtimeNsec, 10)}, + {Name: "MtimeSec", Value: strconv.FormatInt(inode.MtimeSec, 10)}, + {Name: "MtimeNsec", Value: strconv.FormatInt(inode.MtimeNsec, 10)}, } } @@ -63,14 +90,22 @@ func inodeFromFile(fd *os.File) (*Inode, error) { if err != nil { return nil, err } - sec, nsec := stat.Ctim.Unix() - return &Inode{Size: fi.Size(), CtimeSec: sec, CtimeNsec: nsec}, nil + ctimeSec, ctimeNsec := stat.Ctim.Unix() + mtimeSec := fi.ModTime().Unix() + mtimeNsec := fi.ModTime().UnixNano() + return &Inode{ + Size: fi.Size(), + CtimeSec: ctimeSec, CtimeNsec: ctimeNsec, + MtimeSec: mtimeSec, MtimeNsec: mtimeNsec, + }, nil } func inodeFromRec(m map[string]string) (*Inode, error) { size := m["Size"] ctimeSec := m["CtimeSec"] ctimeNsec := m["CtimeNsec"] + mtimeSec := m["MtimeSec"] + mtimeNsec := m["MtimeNsec"] if size == "" { return nil, errors.New("Size is missing") } @@ -94,5 +129,18 @@ func inodeFromRec(m map[string]string) (*Inode, error) { if err != nil { return nil, err } + if mtimeSec != "" { + if mtimeNsec == "" { + return nil, errors.New("MtimeNsec is missing") + } + inode.MtimeSec, err = strconv.ParseInt(mtimeSec, 10, 64) + if err != nil { + return nil, err + } + inode.MtimeNsec, err = strconv.ParseInt(mtimeNsec, 10, 64) + if err != nil { + return nil, err + } + } return &inode, nil } diff --git a/main.go b/main.go index 0b893da..732564b 100644 --- a/main.go +++ b/main.go @@ -138,10 +138,6 @@ func main() { log.Fatalln(err) } - NoColor = os.Getenv(EnvNoColor) != "" - NoSync = os.Getenv(EnvNoSync) == "1" - InodeTrust = os.Getenv(EnvInodeNoTrust) == "" - TopDir = os.Getenv(EnvTopDir) if TopDir == "" { TopDir = "/" @@ -198,6 +194,19 @@ func main() { } else if flagTrace != nil { traced = *flagTrace } + NoColor = os.Getenv(EnvNoColor) != "" + NoSync = os.Getenv(EnvNoSync) == "1" + switch s := os.Getenv(EnvInodeTrust); s { + case "none": + InodeTrust = InodeTrustNone + case "", "ctime": + InodeTrust = InodeTrustCtime + case "mtime": + InodeTrust = InodeTrustMtime + default: + log.Fatalln("unknown", EnvInodeTrust, "value") + } + tracef(CDebug, "inode-trust: %s", InodeTrust) // Those are internal envs FdOODTgts, err = ioutil.TempFile("", "ood-tgts") diff --git a/makedist.sh b/makedist.sh index 51f912a..9f81f9f 100755 --- a/makedist.sh +++ b/makedist.sh @@ -8,6 +8,7 @@ release=$1 git clone . $tmp/goredo-$release cd $tmp/goredo-$release git checkout v$release +go generate redo-ifchange VERSION diff --git a/ood.go b/ood.go index fdb0d84..81f0f7b 100644 --- a/ood.go +++ b/ood.go @@ -171,7 +171,7 @@ func isOOD(cwd, tgtOrig string, level int, seen map[string]struct{}) (bool, erro ood = true goto Done } - if InodeTrust && inode.Equals(theirInode) { + if InodeTrust != InodeTrustNone && inode.Equals(theirInode) { tracef(CDebug, "ood: %s%s -> %s: same inode", indent, tgtOrig, dep) } else { tracef(CDebug, "ood: %s%s -> %s: inode differs", indent, tgtOrig, dep) diff --git a/usage.go b/usage.go index 428c9bc..67fded8 100644 --- a/usage.go +++ b/usage.go @@ -24,7 +24,7 @@ import ( ) const ( - Version = "1.20.0" + Version = "1.21.0" Warranty = `Copyright (C) 2020-2021 Sergey Matveev This program is free software: you can redistribute it and/or modify @@ -123,8 +123,10 @@ Additional environment variables: if cmd == CmdNameRedo || cmd == CmdNameRedoIfchange { fmt.Fprintln(os.Stderr, ` REDO_NO_SYNC -- disable files/directories explicit filesystem syncing - REDO_INODE_NO_TRUST -- do not trust inode information (except for size) - and always check file's hash + REDO_INODE_TRUST -- {none,ctime,mtime}, either do not trust inode + information at all (always check size and hash), or + trust its ctime (the default one), or be satisfied + with its mtime REDO_MAKE -- bmake/gmake/none(default) jobserver protocol compatibility`) } } -- 2.44.0