]> Cypherpunks.ru repositories - gostls13.git/commitdiff
cmd/compile,runtime: stack maps only at calls, remove register maps
authorAustin Clements <austin@google.com>
Thu, 16 Apr 2020 12:13:58 +0000 (08:13 -0400)
committerAustin Clements <austin@google.com>
Wed, 29 Apr 2020 21:29:21 +0000 (21:29 +0000)
Currently, we emit stack maps and register maps at almost every
instruction. This was originally intended to support non-cooperative
preemption, but was only ever used for debug call injection. Now debug
call injection also uses conservative frame scanning. As a result,
stack maps are only needed at call sites and register maps aren't
needed at all except that we happen to also encode unsafe-point
information in the register map PCDATA stream.

This CL reduces stack maps to only appear at calls, and replace full
register maps with just safe/unsafe-point information.

This is all protected by the go115ReduceLiveness feature flag, which
is defined in both runtime and cmd/compile.

This CL significantly reduces binary sizes and also speeds up compiles
and links:

name                      old exe-bytes     new exe-bytes     delta
BinGoSize                      15.0MB ± 0%       14.1MB ± 0%   -5.72%

name                      old pcln-bytes    new pcln-bytes    delta
BinGoSize                      3.14MB ± 0%       2.48MB ± 0%  -21.08%

name                      old time/op       new time/op       delta
Template                        178ms ± 7%        172ms ±14%  -3.59%  (p=0.005 n=19+19)
Unicode                        71.0ms ±12%       69.8ms ±10%    ~     (p=0.126 n=18+18)
GoTypes                         655ms ± 8%        615ms ± 8%  -6.11%  (p=0.000 n=19+19)
Compiler                        3.27s ± 6%        3.15s ± 7%  -3.69%  (p=0.001 n=20+20)
SSA                             7.10s ± 5%        6.85s ± 8%  -3.53%  (p=0.001 n=19+20)
Flate                           124ms ±15%        116ms ±22%  -6.57%  (p=0.024 n=18+19)
GoParser                        156ms ±26%        147ms ±34%    ~     (p=0.070 n=19+19)
Reflect                         406ms ± 9%        387ms ±21%  -4.69%  (p=0.028 n=19+20)
Tar                             163ms ±15%        162ms ±27%    ~     (p=0.370 n=19+19)
XML                             223ms ±13%        218ms ±14%    ~     (p=0.157 n=20+20)
LinkCompiler                    503ms ±21%        484ms ±23%    ~     (p=0.072 n=20+20)
ExternalLinkCompiler            1.27s ± 7%        1.22s ± 8%  -3.85%  (p=0.005 n=20+19)
LinkWithoutDebugCompiler        294ms ±17%        273ms ±11%  -7.16%  (p=0.001 n=19+18)

(https://perf.golang.org/search?q=upload:20200428.8)

The binary size improvement is even slightly better when you include
the CLs leading up to this. Relative to the parent of "cmd/compile:
mark PanicBounds/Extend as calls":

name                      old exe-bytes     new exe-bytes     delta
BinGoSize                      15.0MB ± 0%       14.1MB ± 0%   -6.18%

name                      old pcln-bytes    new pcln-bytes    delta
BinGoSize                      3.22MB ± 0%       2.48MB ± 0%  -22.92%

(https://perf.golang.org/search?q=upload:20200428.9)

For #36365.

Change-Id: I69448e714f2a44430067ca97f6b78e08c0abed27
Reviewed-on: https://go-review.googlesource.com/c/go/+/230544
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
src/cmd/compile/internal/gc/gsubr.go
src/cmd/compile/internal/gc/plive.go
src/cmd/internal/obj/link.go
src/cmd/internal/objabi/funcdata.go
src/runtime/debugcall.go
src/runtime/preempt.go
src/runtime/symtab.go

index 5a7d4c9e4dc631b5a9862def65a9fdaf638ac22d..bb1393ae6ae43e7ad21a01ae4fcae81565386bce 100644 (file)
@@ -70,9 +70,13 @@ func newProgs(fn *Node, worker int) *Progs {
 
        pp.pos = fn.Pos
        pp.settext(fn)
-       pp.nextLive = LivenessInvalid
        // PCDATA tables implicitly start with index -1.
        pp.prevLive = LivenessIndex{-1, -1, false}
+       if go115ReduceLiveness {
+               pp.nextLive = pp.prevLive
+       } else {
+               pp.nextLive = LivenessInvalid
+       }
        return pp
 }
 
@@ -117,18 +121,32 @@ func (pp *Progs) Prog(as obj.As) *obj.Prog {
                Addrconst(&p.From, objabi.PCDATA_StackMapIndex)
                Addrconst(&p.To, int64(idx))
        }
-       if pp.nextLive.isUnsafePoint {
-               // Unsafe points are encoded as a special value in the
-               // register map.
-               pp.nextLive.regMapIndex = objabi.PCDATA_RegMapUnsafe
-       }
-       if pp.nextLive.regMapIndex != pp.prevLive.regMapIndex {
-               // Emit register map index change.
-               idx := pp.nextLive.regMapIndex
-               pp.prevLive.regMapIndex = idx
-               p := pp.Prog(obj.APCDATA)
-               Addrconst(&p.From, objabi.PCDATA_RegMapIndex)
-               Addrconst(&p.To, int64(idx))
+       if !go115ReduceLiveness {
+               if pp.nextLive.isUnsafePoint {
+                       // Unsafe points are encoded as a special value in the
+                       // register map.
+                       pp.nextLive.regMapIndex = objabi.PCDATA_RegMapUnsafe
+               }
+               if pp.nextLive.regMapIndex != pp.prevLive.regMapIndex {
+                       // Emit register map index change.
+                       idx := pp.nextLive.regMapIndex
+                       pp.prevLive.regMapIndex = idx
+                       p := pp.Prog(obj.APCDATA)
+                       Addrconst(&p.From, objabi.PCDATA_RegMapIndex)
+                       Addrconst(&p.To, int64(idx))
+               }
+       } else {
+               if pp.nextLive.isUnsafePoint != pp.prevLive.isUnsafePoint {
+                       // Emit unsafe-point marker.
+                       pp.prevLive.isUnsafePoint = pp.nextLive.isUnsafePoint
+                       p := pp.Prog(obj.APCDATA)
+                       Addrconst(&p.From, objabi.PCDATA_UnsafePoint)
+                       if pp.nextLive.isUnsafePoint {
+                               Addrconst(&p.To, objabi.PCDATA_UnsafePointUnsafe)
+                       } else {
+                               Addrconst(&p.To, objabi.PCDATA_UnsafePointSafe)
+                       }
+               }
        }
 
        p := pp.next
index f8ccdd23695086a5e4d207a832a8119b17d708b6..a4c051bda6defa66f74734c35ab9fa371a3d0d87 100644 (file)
@@ -24,6 +24,16 @@ import (
        "strings"
 )
 
+// go115ReduceLiveness disables register maps and only produces stack
+// maps at call sites.
+//
+// In Go 1.15, we changed debug call injection to use conservative
+// scanning instead of precise pointer maps, so these are no longer
+// necessary.
+//
+// Keep in sync with runtime/preempt.go:go115ReduceLiveness.
+const go115ReduceLiveness = true
+
 // OpVarDef is an annotation for the liveness analysis, marking a place
 // where a complete initialization (definition) of a variable begins.
 // Since the liveness analysis can see initialization of single-word
@@ -165,18 +175,27 @@ func (m *LivenessMap) set(v *ssa.Value, i LivenessIndex) {
 }
 
 func (m LivenessMap) Get(v *ssa.Value) LivenessIndex {
-       // All safe-points are in the map, so if v isn't in
-       // the map, it's an unsafe-point.
+       if !go115ReduceLiveness {
+               // All safe-points are in the map, so if v isn't in
+               // the map, it's an unsafe-point.
+               if idx, ok := m.vals[v.ID]; ok {
+                       return idx
+               }
+               return LivenessInvalid
+       }
+
+       // If v isn't in the map, then it's a "don't care" and not an
+       // unsafe-point.
        if idx, ok := m.vals[v.ID]; ok {
                return idx
        }
-       return LivenessInvalid
+       return LivenessIndex{StackMapDontCare, StackMapDontCare, false}
 }
 
 // LivenessIndex stores the liveness map information for a Value.
 type LivenessIndex struct {
        stackMapIndex int
-       regMapIndex   int
+       regMapIndex   int // only for !go115ReduceLiveness
 
        // isUnsafePoint indicates that this is an unsafe-point.
        //
@@ -188,7 +207,7 @@ type LivenessIndex struct {
 }
 
 // LivenessInvalid indicates an unsafe point with no stack map.
-var LivenessInvalid = LivenessIndex{StackMapDontCare, StackMapDontCare, true}
+var LivenessInvalid = LivenessIndex{StackMapDontCare, StackMapDontCare, true} // only for !go115ReduceLiveness
 
 // StackMapDontCare indicates that the stack map index at a Value
 // doesn't matter.
@@ -392,6 +411,9 @@ func affectedNode(v *ssa.Value) (*Node, ssa.SymEffect) {
 
 // regEffects returns the registers affected by v.
 func (lv *Liveness) regEffects(v *ssa.Value) (uevar, kill liveRegMask) {
+       if go115ReduceLiveness {
+               return 0, 0
+       }
        if v.Op == ssa.OpPhi {
                // All phi node arguments must come from the same
                // register and the result must also go to that
@@ -473,7 +495,7 @@ func (lv *Liveness) regEffects(v *ssa.Value) (uevar, kill liveRegMask) {
        return uevar, kill
 }
 
-type liveRegMask uint32
+type liveRegMask uint32 // only if !go115ReduceLiveness
 
 func (m liveRegMask) niceString(config *ssa.Config) string {
        if m == 0 {
@@ -835,7 +857,7 @@ func (lv *Liveness) hasStackMap(v *ssa.Value) bool {
        // The runtime only has safe-points in function prologues, so
        // we only need stack maps at call sites. go:nosplit functions
        // are similar.
-       if compiling_runtime || lv.f.NoSplit {
+       if go115ReduceLiveness || compiling_runtime || lv.f.NoSplit {
                if !v.Op.IsCall() {
                        return false
                }
@@ -1172,13 +1194,15 @@ func (lv *Liveness) epilogue() {
                        lv.f.Fatalf("%v %L recorded as live on entry", lv.fn.Func.Nname, n)
                }
        }
-       // Check that no registers are live at function entry.
-       // The context register, if any, comes from a
-       // LoweredGetClosurePtr operation first thing in the function,
-       // so it doesn't appear live at entry.
-       if regs := lv.regMaps[0]; regs != 0 {
-               lv.printDebug()
-               lv.f.Fatalf("%v register %s recorded as live on entry", lv.fn.Func.Nname, regs.niceString(lv.f.Config))
+       if !go115ReduceLiveness {
+               // Check that no registers are live at function entry.
+               // The context register, if any, comes from a
+               // LoweredGetClosurePtr operation first thing in the function,
+               // so it doesn't appear live at entry.
+               if regs := lv.regMaps[0]; regs != 0 {
+                       lv.printDebug()
+                       lv.f.Fatalf("%v register %s recorded as live on entry", lv.fn.Func.Nname, regs.niceString(lv.f.Config))
+               }
        }
 }
 
@@ -1199,7 +1223,7 @@ func (lv *Liveness) epilogue() {
 // PCDATA tables cost about 100k. So for now we keep using a single index for
 // both bitmap lists.
 func (lv *Liveness) compact(b *ssa.Block) {
-       add := func(live varRegVec, isUnsafePoint bool) LivenessIndex {
+       add := func(live varRegVec, isUnsafePoint bool) LivenessIndex { // only if !go115ReduceLiveness
                // Deduplicate the stack map.
                stackIndex := lv.stackMapSet.add(live.vars)
                // Deduplicate the register map.
@@ -1214,11 +1238,26 @@ func (lv *Liveness) compact(b *ssa.Block) {
        pos := 0
        if b == lv.f.Entry {
                // Handle entry stack map.
-               add(lv.livevars[0], false)
+               if !go115ReduceLiveness {
+                       add(lv.livevars[0], false)
+               } else {
+                       lv.stackMapSet.add(lv.livevars[0].vars)
+               }
                pos++
        }
        for _, v := range b.Values {
-               if lv.hasStackMap(v) {
+               if go115ReduceLiveness {
+                       hasStackMap := lv.hasStackMap(v)
+                       isUnsafePoint := lv.allUnsafe || lv.unsafePoints.Get(int32(v.ID))
+                       idx := LivenessIndex{StackMapDontCare, 0, isUnsafePoint}
+                       if hasStackMap {
+                               idx.stackMapIndex = lv.stackMapSet.add(lv.livevars[pos].vars)
+                               pos++
+                       }
+                       if hasStackMap || isUnsafePoint {
+                               lv.livenessMap.set(v, idx)
+                       }
+               } else if lv.hasStackMap(v) {
                        isUnsafePoint := lv.allUnsafe || lv.unsafePoints.Get(int32(v.ID))
                        lv.livenessMap.set(v, add(lv.livevars[pos], isUnsafePoint))
                        pos++
@@ -1407,7 +1446,7 @@ func (lv *Liveness) printDebug() {
                                                printed = true
                                        }
                                }
-                               if pcdata.RegMapValid() {
+                               if pcdata.RegMapValid() { // only if !go115ReduceLiveness
                                        regLive := lv.regMaps[pcdata.regMapIndex]
                                        if regLive != 0 {
                                                if printed {
@@ -1491,19 +1530,21 @@ func (lv *Liveness) emit() (argsSym, liveSym, regsSym *obj.LSym) {
                loff = dbvec(&liveSymTmp, loff, locals)
        }
 
-       regs := bvalloc(lv.usedRegs())
-       roff := duint32(&regsSymTmp, 0, uint32(len(lv.regMaps))) // number of bitmaps
-       roff = duint32(&regsSymTmp, roff, uint32(regs.n))        // number of bits in each bitmap
-       if regs.n > 32 {
-               // Our uint32 conversion below won't work.
-               Fatalf("GP registers overflow uint32")
-       }
+       if !go115ReduceLiveness {
+               regs := bvalloc(lv.usedRegs())
+               roff := duint32(&regsSymTmp, 0, uint32(len(lv.regMaps))) // number of bitmaps
+               roff = duint32(&regsSymTmp, roff, uint32(regs.n))        // number of bits in each bitmap
+               if regs.n > 32 {
+                       // Our uint32 conversion below won't work.
+                       Fatalf("GP registers overflow uint32")
+               }
 
-       if regs.n > 0 {
-               for _, live := range lv.regMaps {
-                       regs.Clear()
-                       regs.b[0] = uint32(live)
-                       roff = dbvec(&regsSymTmp, roff, regs)
+               if regs.n > 0 {
+                       for _, live := range lv.regMaps {
+                               regs.Clear()
+                               regs.b[0] = uint32(live)
+                               roff = dbvec(&regsSymTmp, roff, regs)
+                       }
                }
        }
 
@@ -1518,7 +1559,11 @@ func (lv *Liveness) emit() (argsSym, liveSym, regsSym *obj.LSym) {
                        lsym.P = tmpSym.P
                })
        }
-       return makeSym(&argsSymTmp), makeSym(&liveSymTmp), makeSym(&regsSymTmp)
+       if !go115ReduceLiveness {
+               return makeSym(&argsSymTmp), makeSym(&liveSymTmp), makeSym(&regsSymTmp)
+       }
+       // TODO(go115ReduceLiveness): Remove regsSym result
+       return makeSym(&argsSymTmp), makeSym(&liveSymTmp), nil
 }
 
 // Entry pointer for liveness analysis. Solves for the liveness of
@@ -1578,11 +1623,13 @@ func liveness(e *ssafn, f *ssa.Func, pp *Progs) LivenessMap {
        p.To.Name = obj.NAME_EXTERN
        p.To.Sym = ls.Func.GCLocals
 
-       p = pp.Prog(obj.AFUNCDATA)
-       Addrconst(&p.From, objabi.FUNCDATA_RegPointerMaps)
-       p.To.Type = obj.TYPE_MEM
-       p.To.Name = obj.NAME_EXTERN
-       p.To.Sym = ls.Func.GCRegs
+       if !go115ReduceLiveness {
+               p = pp.Prog(obj.AFUNCDATA)
+               Addrconst(&p.From, objabi.FUNCDATA_RegPointerMaps)
+               p.To.Type = obj.TYPE_MEM
+               p.To.Name = obj.NAME_EXTERN
+               p.To.Sym = ls.Func.GCRegs
+       }
 
        return lv.livenessMap
 }
index c7bab6a2ca6351906092b67d1c2d3ce7dc290e8f..65e58887e67ab806f34583dc5c30c9ba36f4a690 100644 (file)
@@ -412,7 +412,7 @@ type FuncInfo struct {
 
        GCArgs             *LSym
        GCLocals           *LSym
-       GCRegs             *LSym
+       GCRegs             *LSym // Only if !go115ReduceLiveness
        StackObjects       *LSym
        OpenCodedDeferInfo *LSym
 
index 1c07f011dacd3700abb419e625a39e0b69b51c2e..2a51816cbd75509bcb56dd12c4f317df0ce46353 100644 (file)
@@ -11,13 +11,14 @@ package objabi
 // ../../../runtime/symtab.go.
 
 const (
-       PCDATA_RegMapIndex   = 0
+       PCDATA_RegMapIndex   = 0 // if !go115ReduceLiveness
+       PCDATA_UnsafePoint   = 0 // if go115ReduceLiveness
        PCDATA_StackMapIndex = 1
        PCDATA_InlTreeIndex  = 2
 
        FUNCDATA_ArgsPointerMaps    = 0
        FUNCDATA_LocalsPointerMaps  = 1
-       FUNCDATA_RegPointerMaps     = 2
+       FUNCDATA_RegPointerMaps     = 2 // if !go115ReduceLiveness
        FUNCDATA_StackObjects       = 3
        FUNCDATA_InlTree            = 4
        FUNCDATA_OpenCodedDeferInfo = 5
@@ -32,5 +33,11 @@ const (
 // Special PCDATA values.
 const (
        // PCDATA_RegMapIndex values.
+       //
+       // Only if !go115ReduceLiveness.
        PCDATA_RegMapUnsafe = -2 // Unsafe for async preemption
+
+       // PCDATA_UnsafePoint values.
+       PCDATA_UnsafePointSafe   = -1 // Safe for async preemption
+       PCDATA_UnsafePointUnsafe = -2 // Unsafe for async preemption
 )
index 33c70b8c7fd70d015e35c5652ed28eef4cf7b335..5cbe382ce7aac5e39f5856c3c11f1b151bfa3f65 100644 (file)
@@ -76,20 +76,32 @@ func debugCallCheck(pc uintptr) string {
                        return
                }
 
-               // Look up PC's register map.
-               pcdata := int32(-1)
-               if pc != f.entry {
-                       pc--
-                       pcdata = pcdatavalue(f, _PCDATA_RegMapIndex, pc, nil)
-               }
-               if pcdata == -1 {
-                       pcdata = 0 // in prologue
-               }
-               stkmap := (*stackmap)(funcdata(f, _FUNCDATA_RegPointerMaps))
-               if pcdata == -2 || stkmap == nil {
-                       // Not at a safe point.
-                       ret = debugCallUnsafePoint
-                       return
+               if !go115ReduceLiveness {
+                       // Look up PC's register map.
+                       pcdata := int32(-1)
+                       if pc != f.entry {
+                               pc--
+                               pcdata = pcdatavalue(f, _PCDATA_RegMapIndex, pc, nil)
+                       }
+                       if pcdata == -1 {
+                               pcdata = 0 // in prologue
+                       }
+                       stkmap := (*stackmap)(funcdata(f, _FUNCDATA_RegPointerMaps))
+                       if pcdata == -2 || stkmap == nil {
+                               // Not at a safe point.
+                               ret = debugCallUnsafePoint
+                               return
+                       }
+               } else {
+                       // Check that this isn't an unsafe-point.
+                       if pc != f.entry {
+                               pc--
+                       }
+                       up := pcdatavalue(f, _PCDATA_UnsafePoint, pc, nil)
+                       if up != _PCDATA_UnsafePointSafe {
+                               // Not at a safe point.
+                               ret = debugCallUnsafePoint
+                       }
                }
        })
        return ret
index 420a7f96e089fae0a711f1ed52711ac3a84c17aa..41a32fa6505f9294cfd87706c332124d74d37666 100644 (file)
@@ -58,6 +58,9 @@ import (
        "unsafe"
 )
 
+// Keep in sync with cmd/compile/internal/gc/plive.go:go115ReduceLiveness.
+const go115ReduceLiveness = true
+
 type suspendGState struct {
        g *g
 
@@ -393,12 +396,22 @@ func isAsyncSafePoint(gp *g, pc, sp, lr uintptr) bool {
                // use the LR for unwinding, which will be bad.
                return false
        }
-       smi := pcdatavalue(f, _PCDATA_RegMapIndex, pc, nil)
-       if smi == -2 {
-               // Unsafe-point marked by compiler. This includes
-               // atomic sequences (e.g., write barrier) and nosplit
-               // functions (except at calls).
-               return false
+       if !go115ReduceLiveness {
+               smi := pcdatavalue(f, _PCDATA_RegMapIndex, pc, nil)
+               if smi == -2 {
+                       // Unsafe-point marked by compiler. This includes
+                       // atomic sequences (e.g., write barrier) and nosplit
+                       // functions (except at calls).
+                       return false
+               }
+       } else {
+               up := pcdatavalue(f, _PCDATA_UnsafePoint, pc, nil)
+               if up != _PCDATA_UnsafePointSafe {
+                       // Unsafe-point marked by compiler. This includes
+                       // atomic sequences (e.g., write barrier) and nosplit
+                       // functions (except at calls).
+                       return false
+               }
        }
        if fd := funcdata(f, _FUNCDATA_LocalsPointerMaps); fd == nil || fd == unsafe.Pointer(&no_pointers_stackmap) {
                // This is assembly code. Don't assume it's
index b2147c4cb40f4105fdf1d166ca4860ceb7953974..04aa90e0779a70504f81cfe7a543e4fd2be9f68b 100644 (file)
@@ -268,13 +268,14 @@ func (f *Func) funcInfo() funcInfo {
 //
 // See funcdata.h and ../cmd/internal/objabi/funcdata.go.
 const (
-       _PCDATA_RegMapIndex   = 0
+       _PCDATA_RegMapIndex   = 0 // if !go115ReduceLiveness
+       _PCDATA_UnsafePoint   = 0 // if go115ReduceLiveness
        _PCDATA_StackMapIndex = 1
        _PCDATA_InlTreeIndex  = 2
 
        _FUNCDATA_ArgsPointerMaps    = 0
        _FUNCDATA_LocalsPointerMaps  = 1
-       _FUNCDATA_RegPointerMaps     = 2
+       _FUNCDATA_RegPointerMaps     = 2 // if !go115ReduceLiveness
        _FUNCDATA_StackObjects       = 3
        _FUNCDATA_InlTree            = 4
        _FUNCDATA_OpenCodedDeferInfo = 5
@@ -282,6 +283,12 @@ const (
        _ArgsSizeUnknown = -0x80000000
 )
 
+const (
+       // PCDATA_UnsafePoint values.
+       _PCDATA_UnsafePointSafe   = -1 // Safe for async preemption
+       _PCDATA_UnsafePointUnsafe = -2 // Unsafe for async preemption
+)
+
 // A FuncID identifies particular functions that need to be treated
 // specially by the runtime.
 // Note that in some situations involving plugins, there may be multiple