Performance is kind of hard to exactly quantify.
One big difference between jump tables and the old binary search
scheme is that there's only 1 branch statement instead of O(n) of
them. That can be both a blessing and a curse, and can make evaluating
jump tables very hard to do.
The single branch can become a choke point for the hardware branch
predictor. A branch table jump must fit all of its state in a single
branch predictor entry (technically, a branch target predictor entry).
With binary search that predictor state can be spread among lots of
entries. In cases where the case selection is repetitive and thus
predictable, binary search can perform better.
The big win for a jump table is that it doesn't consume so much of the
branch predictor's resources. But that benefit is essentially never
observed in microbenchmarks, because the branch predictor can easily
keep state for all the binary search branches in a microbenchmark. So
that benefit is really hard to measure.
So predictable switch microbenchmarks are ~useless - they will almost
always favor the binary search scheme. Fully unpredictable switch
microbenchmarks are better, as they aren't lying to us quite so
much. In a perfectly unpredictable situation, a jump table will expect
to incur 1-1/N branch mispredicts, where a binary search would incur
lg(N)/2 of them. That makes the crossover point at about N=4. But of
course switches in real programs are seldom fully unpredictable, so
we'll use a higher crossover point.
Beyond the branch predictor, jump tables tend to execute more
instructions per switch but have no additional instructions per case,
which also argues for a larger crossover.
As far as code size goes, with this CL cmd/go has a slightly smaller
code segment and a slightly larger overall size (from the jump tables
themselves which live in the data segment).
This is a case where some FDO (feedback-directed optimization) would
be really nice to have. #28262
Some large-program benchmarks might help make the case for this
CL. Especially if we can turn on branch mispredict counters so we can
see how much using jump tables can free up branch prediction resources
that can be gainfully used elsewhere in the program.
name old time/op new time/op delta
Switch8Predictable 1.89ns ± 2% 1.27ns ± 3% -32.58% (p=0.000 n=9+10)
Switch8Unpredictable 9.33ns ± 1% 7.50ns ± 1% -19.60% (p=0.000 n=10+9)
Switch32Predictable 2.20ns ± 2% 1.64ns ± 1% -25.39% (p=0.000 n=10+9)
Switch32Unpredictable 10.0ns ± 2% 7.6ns ± 2% -24.04% (p=0.000 n=10+10)
Fixes #5496
Update #34381
Change-Id: I3ff56011d02be53f605ca5fd3fb96b905517c34f
Reviewed-on: https://go-review.googlesource.com/c/go/+/357330
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Keith Randall <khr@google.com>
}
}
+ case ssa.BlockAMD64JUMPTABLE:
+ // JMP *(TABLE)(INDEX*8)
+ p := s.Prog(obj.AJMP)
+ p.To.Type = obj.TYPE_MEM
+ p.To.Reg = b.Controls[1].Reg()
+ p.To.Index = b.Controls[0].Reg()
+ p.To.Scale = 8
+ // Save jump tables for later resolution of the target blocks.
+ s.JumpTables = append(s.JumpTables, b)
+
default:
b.Fatalf("branch not implemented: %s", b.LongString())
}
objw.Global(x, int32(len(x.P)), obj.RODATA|obj.DUPOK)
x.Set(obj.AttrStatic, true)
}
+ for _, jt := range fn.JumpTables {
+ objw.Global(jt.Sym, int32(len(jt.Targets)*base.Ctxt.Arch.PtrSize), obj.RODATA)
+ }
}
}
ORESULT // result of a function call; Xoffset is stack offset
OINLMARK // start of an inlined body, with file/line of caller. Xoffset is an index into the inline tree.
OLINKSYMOFFSET // offset within a name
+ OJUMPTABLE // A jump table structure for implementing dense expression switches
// opcodes for generics
ODYNAMICDOTTYPE // x = i.(T) where T is a type parameter (or derived from a type parameter)
editNodes(n.Targs, edit)
}
+func (n *JumpTableStmt) Format(s fmt.State, verb rune) { fmtNode(n, s, verb) }
+func (n *JumpTableStmt) copy() Node {
+ c := *n
+ c.init = copyNodes(c.init)
+ return &c
+}
+func (n *JumpTableStmt) doChildren(do func(Node) bool) bool {
+ if doNodes(n.init, do) {
+ return true
+ }
+ if n.Idx != nil && do(n.Idx) {
+ return true
+ }
+ return false
+}
+func (n *JumpTableStmt) editChildren(edit func(Node) Node) {
+ editNodes(n.init, edit)
+ if n.Idx != nil {
+ n.Idx = edit(n.Idx).(Node)
+ }
+}
+
func (n *KeyExpr) Format(s fmt.State, verb rune) { fmtNode(n, s, verb) }
func (n *KeyExpr) copy() Node {
c := *n
_ = x[ORESULT-143]
_ = x[OINLMARK-144]
_ = x[OLINKSYMOFFSET-145]
- _ = x[ODYNAMICDOTTYPE-146]
- _ = x[ODYNAMICDOTTYPE2-147]
- _ = x[ODYNAMICTYPE-148]
- _ = x[OTAILCALL-149]
- _ = x[OGETG-150]
- _ = x[OGETCALLERPC-151]
- _ = x[OGETCALLERSP-152]
- _ = x[OEND-153]
+ _ = x[OJUMPTABLE-146]
+ _ = x[ODYNAMICDOTTYPE-147]
+ _ = x[ODYNAMICDOTTYPE2-148]
+ _ = x[ODYNAMICTYPE-149]
+ _ = x[OTAILCALL-150]
+ _ = x[OGETG-151]
+ _ = x[OGETCALLERPC-152]
+ _ = x[OGETCALLERSP-153]
+ _ = x[OEND-154]
}
-const _Op_name = "XXXNAMENONAMETYPELITERALNILADDSUBORXORADDSTRADDRANDANDAPPENDBYTES2STRBYTES2STRTMPRUNES2STRSTR2BYTESSTR2BYTESTMPSTR2RUNESSLICE2ARRPTRASAS2AS2DOTTYPEAS2FUNCAS2MAPRAS2RECVASOPCALLCALLFUNCCALLMETHCALLINTERCAPCLOSECLOSURECOMPLITMAPLITSTRUCTLITARRAYLITSLICELITPTRLITCONVCONVIFACECONVIDATACONVNOPCOPYDCLDCLFUNCDCLCONSTDCLTYPEDELETEDOTDOTPTRDOTMETHDOTINTERXDOTDOTTYPEDOTTYPE2EQNELTLEGEGTDEREFINDEXINDEXMAPKEYSTRUCTKEYLENMAKEMAKECHANMAKEMAPMAKESLICEMAKESLICECOPYMULDIVMODLSHRSHANDANDNOTNEWNOTBITNOTPLUSNEGORORPANICPRINTPRINTNPARENSENDSLICESLICEARRSLICESTRSLICE3SLICE3ARRSLICEHEADERRECOVERRECOVERFPRECVRUNESTRSELRECV2REALIMAGCOMPLEXALIGNOFOFFSETOFSIZEOFUNSAFEADDUNSAFESLICEMETHEXPRMETHVALUEBLOCKBREAKCASECONTINUEDEFERFALLFORFORUNTILGOTOIFLABELGORANGERETURNSELECTSWITCHTYPESWFUNCINSTTFUNCINLCALLEFACEITABIDATASPTRCFUNCCHECKNILVARDEFVARKILLVARLIVERESULTINLMARKLINKSYMOFFSETDYNAMICDOTTYPEDYNAMICDOTTYPE2DYNAMICTYPETAILCALLGETGGETCALLERPCGETCALLERSPEND"
+const _Op_name = "XXXNAMENONAMETYPELITERALNILADDSUBORXORADDSTRADDRANDANDAPPENDBYTES2STRBYTES2STRTMPRUNES2STRSTR2BYTESSTR2BYTESTMPSTR2RUNESSLICE2ARRPTRASAS2AS2DOTTYPEAS2FUNCAS2MAPRAS2RECVASOPCALLCALLFUNCCALLMETHCALLINTERCAPCLOSECLOSURECOMPLITMAPLITSTRUCTLITARRAYLITSLICELITPTRLITCONVCONVIFACECONVIDATACONVNOPCOPYDCLDCLFUNCDCLCONSTDCLTYPEDELETEDOTDOTPTRDOTMETHDOTINTERXDOTDOTTYPEDOTTYPE2EQNELTLEGEGTDEREFINDEXINDEXMAPKEYSTRUCTKEYLENMAKEMAKECHANMAKEMAPMAKESLICEMAKESLICECOPYMULDIVMODLSHRSHANDANDNOTNEWNOTBITNOTPLUSNEGORORPANICPRINTPRINTNPARENSENDSLICESLICEARRSLICESTRSLICE3SLICE3ARRSLICEHEADERRECOVERRECOVERFPRECVRUNESTRSELRECV2REALIMAGCOMPLEXALIGNOFOFFSETOFSIZEOFUNSAFEADDUNSAFESLICEMETHEXPRMETHVALUEBLOCKBREAKCASECONTINUEDEFERFALLFORFORUNTILGOTOIFLABELGORANGERETURNSELECTSWITCHTYPESWFUNCINSTTFUNCINLCALLEFACEITABIDATASPTRCFUNCCHECKNILVARDEFVARKILLVARLIVERESULTINLMARKLINKSYMOFFSETJUMPTABLEDYNAMICDOTTYPEDYNAMICDOTTYPE2DYNAMICTYPETAILCALLGETGGETCALLERPCGETCALLERSPEND"
-var _Op_index = [...]uint16{0, 3, 7, 13, 17, 24, 27, 30, 33, 35, 38, 44, 48, 54, 60, 69, 81, 90, 99, 111, 120, 132, 134, 137, 147, 154, 161, 168, 172, 176, 184, 192, 201, 204, 209, 216, 223, 229, 238, 246, 254, 260, 264, 273, 282, 289, 293, 296, 303, 311, 318, 324, 327, 333, 340, 348, 352, 359, 367, 369, 371, 373, 375, 377, 379, 384, 389, 397, 400, 409, 412, 416, 424, 431, 440, 453, 456, 459, 462, 465, 468, 471, 477, 480, 483, 489, 493, 496, 500, 505, 510, 516, 521, 525, 530, 538, 546, 552, 561, 572, 579, 588, 592, 599, 607, 611, 615, 622, 629, 637, 643, 652, 663, 671, 680, 685, 690, 694, 702, 707, 711, 714, 722, 726, 728, 733, 735, 740, 746, 752, 758, 764, 772, 777, 784, 789, 793, 798, 802, 807, 815, 821, 828, 835, 841, 848, 861, 875, 890, 901, 909, 913, 924, 935, 938}
+var _Op_index = [...]uint16{0, 3, 7, 13, 17, 24, 27, 30, 33, 35, 38, 44, 48, 54, 60, 69, 81, 90, 99, 111, 120, 132, 134, 137, 147, 154, 161, 168, 172, 176, 184, 192, 201, 204, 209, 216, 223, 229, 238, 246, 254, 260, 264, 273, 282, 289, 293, 296, 303, 311, 318, 324, 327, 333, 340, 348, 352, 359, 367, 369, 371, 373, 375, 377, 379, 384, 389, 397, 400, 409, 412, 416, 424, 431, 440, 453, 456, 459, 462, 465, 468, 471, 477, 480, 483, 489, 493, 496, 500, 505, 510, 516, 521, 525, 530, 538, 546, 552, 561, 572, 579, 588, 592, 599, 607, 611, 615, 622, 629, 637, 643, 652, 663, 671, 680, 685, 690, 694, 702, 707, 711, 714, 722, 726, 728, 733, 735, 740, 746, 752, 758, 764, 772, 777, 784, 789, 793, 798, 802, 807, 815, 821, 828, 835, 841, 848, 861, 870, 884, 899, 910, 918, 922, 933, 944, 947}
func (i Op) String() string {
if i >= Op(len(_Op_index)-1) {
"cmd/compile/internal/base"
"cmd/compile/internal/types"
"cmd/internal/src"
+ "go/constant"
)
// A Decl is a declaration of a const, type, or var. (A declared func is a Func.)
return n
}
+// A JumpTableStmt is used to implement switches. Its semantics are:
+// tmp := jt.Idx
+// if tmp == Cases[0] goto Targets[0]
+// if tmp == Cases[1] goto Targets[1]
+// ...
+// if tmp == Cases[n] goto Targets[n]
+// Note that a JumpTableStmt is more like a multiway-goto than
+// a multiway-if. In particular, the case bodies are just
+// labels to jump to, not not full Nodes lists.
+type JumpTableStmt struct {
+ miniStmt
+
+ // Value used to index the jump table.
+ // We support only integer types that
+ // are at most the size of a uintptr.
+ Idx Node
+
+ // If Idx is equal to Cases[i], jump to Targets[i].
+ // Cases entries must be distinct and in increasing order.
+ // The length of Cases and Targets must be equal.
+ Cases []constant.Value
+ Targets []*types.Sym
+}
+
+func NewJumpTableStmt(pos src.XPos, idx Node) *JumpTableStmt {
+ n := &JumpTableStmt{Idx: idx}
+ n.pos = pos
+ n.op = OJUMPTABLE
+ return n
+}
+
// An InlineMarkStmt is a marker placed just before an inlined body.
type InlineMarkStmt struct {
miniStmt
if b.NumControls() != 0 {
f.Fatalf("plain/dead block %s has a control value", b)
}
+ case BlockJumpTable:
+ if b.NumControls() != 1 {
+ f.Fatalf("jumpTable block %s has no control value", b)
+ }
}
if len(b.Succs) != 2 && b.Likely != BranchUnknown {
f.Fatalf("likeliness prediction %d for block %s with %d successors", b.Likely, b, len(b.Succs))
// MyImportPath provides the import name (roughly, the package) for the function being compiled.
MyImportPath() string
+
+ // LSym returns the linker symbol of the function being compiled.
+ LSym() string
}
// NewConfig returns a new configuration object for the given architecture.
func (d TestFrontend) MyImportPath() string {
return "my/import/path"
}
+func (d TestFrontend) LSym() string {
+ return "my/import/path.function"
+}
var testTypes Types
(If cond yes no) => (NE (TESTB cond cond) yes no)
+(JumpTable idx) => (JUMPTABLE {makeJumpTableSym(b)} idx (LEAQ <typ.Uintptr> {makeJumpTableSym(b)} (SB)))
+
// Atomic loads. Other than preserving their ordering with respect to other loads, nothing special here.
(AtomicLoad8 ptr mem) => (MOVBatomicload ptr mem)
(AtomicLoad32 ptr mem) => (MOVLatomicload ptr mem)
{name: "NEF", controls: 1},
{name: "ORD", controls: 1}, // FP, ordered comparison (parity zero)
{name: "NAN", controls: 1}, // FP, unordered comparison (parity one)
+
+ // JUMPTABLE implements jump tables.
+ // Aux is the symbol (an *obj.LSym) for the jump table.
+ // control[0] is the index into the jump table.
+ // control[1] is the address of the jump table (the address of the symbol stored in Aux).
+ {name: "JUMPTABLE", controls: 2, aux: "Sym"},
}
archs = append(archs, arch{
// First [] [always, never]
var genericBlocks = []blockData{
- {name: "Plain"}, // a single successor
- {name: "If", controls: 1}, // if Controls[0] goto Succs[0] else goto Succs[1]
- {name: "Defer", controls: 1}, // Succs[0]=defer queued, Succs[1]=defer recovered. Controls[0] is call op (of memory type)
- {name: "Ret", controls: 1}, // no successors, Controls[0] value is memory result
- {name: "RetJmp", controls: 1}, // no successors, Controls[0] value is a tail call
- {name: "Exit", controls: 1}, // no successors, Controls[0] value generates a panic
+ {name: "Plain"}, // a single successor
+ {name: "If", controls: 1}, // if Controls[0] goto Succs[0] else goto Succs[1]
+ {name: "Defer", controls: 1}, // Succs[0]=defer queued, Succs[1]=defer recovered. Controls[0] is call op (of memory type)
+ {name: "Ret", controls: 1}, // no successors, Controls[0] value is memory result
+ {name: "RetJmp", controls: 1}, // no successors, Controls[0] value is a tail call
+ {name: "Exit", controls: 1}, // no successors, Controls[0] value generates a panic
+ {name: "JumpTable", controls: 1}, // multiple successors, the integer Controls[0] selects which one
// transient block state used for dead code removal
{name: "First"}, // 2 successors, always takes the first one (second is dead)
// auxType returns the Go type that this block should store in its aux field.
func (b blockData) auxType() string {
switch b.aux {
+ case "Sym":
+ return "Sym"
case "S390XCCMask", "S390XCCMaskInt8", "S390XCCMaskUint8":
return "s390x.CCMask"
case "S390XRotateParams":
BlockAMD64NEF
BlockAMD64ORD
BlockAMD64NAN
+ BlockAMD64JUMPTABLE
BlockARMEQ
BlockARMNE
BlockRet
BlockRetJmp
BlockExit
+ BlockJumpTable
BlockFirst
)
Block386ORD: "ORD",
Block386NAN: "NAN",
- BlockAMD64EQ: "EQ",
- BlockAMD64NE: "NE",
- BlockAMD64LT: "LT",
- BlockAMD64LE: "LE",
- BlockAMD64GT: "GT",
- BlockAMD64GE: "GE",
- BlockAMD64OS: "OS",
- BlockAMD64OC: "OC",
- BlockAMD64ULT: "ULT",
- BlockAMD64ULE: "ULE",
- BlockAMD64UGT: "UGT",
- BlockAMD64UGE: "UGE",
- BlockAMD64EQF: "EQF",
- BlockAMD64NEF: "NEF",
- BlockAMD64ORD: "ORD",
- BlockAMD64NAN: "NAN",
+ BlockAMD64EQ: "EQ",
+ BlockAMD64NE: "NE",
+ BlockAMD64LT: "LT",
+ BlockAMD64LE: "LE",
+ BlockAMD64GT: "GT",
+ BlockAMD64GE: "GE",
+ BlockAMD64OS: "OS",
+ BlockAMD64OC: "OC",
+ BlockAMD64ULT: "ULT",
+ BlockAMD64ULE: "ULE",
+ BlockAMD64UGT: "UGT",
+ BlockAMD64UGE: "UGE",
+ BlockAMD64EQF: "EQF",
+ BlockAMD64NEF: "NEF",
+ BlockAMD64ORD: "ORD",
+ BlockAMD64NAN: "NAN",
+ BlockAMD64JUMPTABLE: "JUMPTABLE",
BlockARMEQ: "EQ",
BlockARMNE: "NE",
BlockS390XCLIJ: "CLIJ",
BlockS390XCLGIJ: "CLGIJ",
- BlockPlain: "Plain",
- BlockIf: "If",
- BlockDefer: "Defer",
- BlockRet: "Ret",
- BlockRetJmp: "RetJmp",
- BlockExit: "Exit",
- BlockFirst: "First",
+ BlockPlain: "Plain",
+ BlockIf: "If",
+ BlockDefer: "Defer",
+ BlockRet: "Ret",
+ BlockRetJmp: "RetJmp",
+ BlockExit: "Exit",
+ BlockJumpTable: "JumpTable",
+ BlockFirst: "First",
}
func (k BlockKind) String() string { return blockString[k] }
package ssa
import (
+ "cmd/compile/internal/base"
"cmd/compile/internal/logopt"
"cmd/compile/internal/types"
"cmd/internal/obj"
fcb.N = x < 0
return fcb.encode()
}
+
+func makeJumpTableSym(b *Block) *obj.LSym {
+ s := base.Ctxt.Lookup(fmt.Sprintf("%s.jump%d", b.Func.fe.LSym(), b.ID))
+ s.Set(obj.AttrDuplicateOK, true)
+ return s
+}
return false
}
func rewriteBlockAMD64(b *Block) bool {
+ typ := &b.Func.Config.Types
switch b.Kind {
case BlockAMD64EQ:
// match: (EQ (TESTL (SHLL (MOVLconst [1]) x) y))
b.resetWithControl(BlockAMD64NE, v0)
return true
}
+ case BlockJumpTable:
+ // match: (JumpTable idx)
+ // result: (JUMPTABLE {makeJumpTableSym(b)} idx (LEAQ <typ.Uintptr> {makeJumpTableSym(b)} (SB)))
+ for {
+ idx := b.Controls[0]
+ v0 := b.NewValue0(b.Pos, OpAMD64LEAQ, typ.Uintptr)
+ v0.Aux = symToAux(makeJumpTableSym(b))
+ v1 := b.NewValue0(b.Pos, OpSB, typ.Uintptr)
+ v0.AddArg(v1)
+ b.resetWithControl2(BlockAMD64JUMPTABLE, idx, v0)
+ b.Aux = symToAux(makeJumpTableSym(b))
+ return true
+ }
case BlockAMD64LE:
// match: (LE (InvertFlags cmp) yes no)
// result: (GE cmp yes no)
}
s.startBlock(bEnd)
+ case ir.OJUMPTABLE:
+ n := n.(*ir.JumpTableStmt)
+
+ // Make blocks we'll need.
+ jt := s.f.NewBlock(ssa.BlockJumpTable)
+ bEnd := s.f.NewBlock(ssa.BlockPlain)
+
+ // The only thing that needs evaluating is the index we're looking up.
+ idx := s.expr(n.Idx)
+ unsigned := idx.Type.IsUnsigned()
+
+ // Extend so we can do everything in uintptr arithmetic.
+ t := types.Types[types.TUINTPTR]
+ idx = s.conv(nil, idx, idx.Type, t)
+
+ // The ending condition for the current block decides whether we'll use
+ // the jump table at all.
+ // We check that min <= idx <= max and jump around the jump table
+ // if that test fails.
+ // We implement min <= idx <= max with 0 <= idx-min <= max-min, because
+ // we'll need idx-min anyway as the control value for the jump table.
+ var min, max uint64
+ if unsigned {
+ min, _ = constant.Uint64Val(n.Cases[0])
+ max, _ = constant.Uint64Val(n.Cases[len(n.Cases)-1])
+ } else {
+ mn, _ := constant.Int64Val(n.Cases[0])
+ mx, _ := constant.Int64Val(n.Cases[len(n.Cases)-1])
+ min = uint64(mn)
+ max = uint64(mx)
+ }
+ // Compare idx-min with max-min, to see if we can use the jump table.
+ idx = s.newValue2(s.ssaOp(ir.OSUB, t), t, idx, s.uintptrConstant(min))
+ width := s.uintptrConstant(max - min)
+ cmp := s.newValue2(s.ssaOp(ir.OLE, t), types.Types[types.TBOOL], idx, width)
+ b := s.endBlock()
+ b.Kind = ssa.BlockIf
+ b.SetControl(cmp)
+ b.AddEdgeTo(jt) // in range - use jump table
+ b.AddEdgeTo(bEnd) // out of range - no case in the jump table will trigger
+ b.Likely = ssa.BranchLikely // TODO: assumes missing the table entirely is unlikely. True?
+
+ // Build jump table block.
+ s.startBlock(jt)
+ jt.Pos = n.Pos()
+ if base.Flag.Cfg.SpectreIndex {
+ idx = s.newValue2(ssa.OpSpectreSliceIndex, t, idx, width)
+ }
+ jt.SetControl(idx)
+
+ // Figure out where we should go for each index in the table.
+ table := make([]*ssa.Block, max-min+1)
+ for i := range table {
+ table[i] = bEnd // default target
+ }
+ for i := range n.Targets {
+ c := n.Cases[i]
+ lab := s.label(n.Targets[i])
+ if lab.target == nil {
+ lab.target = s.f.NewBlock(ssa.BlockPlain)
+ }
+ var val uint64
+ if unsigned {
+ val, _ = constant.Uint64Val(c)
+ } else {
+ vl, _ := constant.Int64Val(c)
+ val = uint64(vl)
+ }
+ // Overwrite the default target.
+ table[val-min] = lab.target
+ }
+ for _, t := range table {
+ jt.AddEdgeTo(t)
+ }
+ s.endBlock()
+
+ s.startBlock(bEnd)
+
case ir.OVARDEF:
n := n.(*ir.UnaryExpr)
if !s.canSSA(n.X) {
return x
}
+func (s *state) uintptrConstant(v uint64) *ssa.Value {
+ if s.config.PtrSize == 4 {
+ return s.newValue0I(ssa.OpConst32, types.Types[types.TUINTPTR], int64(v))
+ }
+ return s.newValue0I(ssa.OpConst64, types.Types[types.TUINTPTR], int64(v))
+}
+
func (s *state) conv(n ir.Node, v *ssa.Value, ft, tt *types.Type) *ssa.Value {
if ft.IsBoolean() && tt.IsKind(types.TUINT8) {
// Bool -> uint8 is generated internally when indexing into runtime.staticbyte.
// and where they would like to go.
Branches []Branch
+ // JumpTables remembers all the jump tables we've seen.
+ JumpTables []*ssa.Block
+
// bstart remembers where each block starts (indexed by block ID)
bstart []*obj.Prog
}
+ // Resolve jump table destinations.
+ for _, jt := range s.JumpTables {
+ // Convert from *Block targets to *Prog targets.
+ targets := make([]*obj.Prog, len(jt.Succs))
+ for i, e := range jt.Succs {
+ targets[i] = s.bstart[e.Block().ID]
+ }
+ // Add to list of jump tables to be resolved at assembly time.
+ // The assembler converts from *Prog entries to absolute addresses
+ // once it knows instruction byte offsets.
+ fi := pp.CurFunc.LSym.Func()
+ fi.JumpTables = append(fi.JumpTables, obj.JumpTable{Sym: jt.Aux.(*obj.LSym), Targets: targets})
+ }
+
if e.log { // spew to stdout
filename := ""
for p := pp.Text; p != nil; p = p.Link {
return base.Ctxt.Pkgpath
}
+func (e *ssafn) LSym() string {
+ return e.curfn.LSym.Name
+}
+
func clobberBase(n ir.Node) ir.Node {
if n.Op() == ir.ODOT {
n := n.(*ir.SelectorExpr)
--- /dev/null
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package test
+
+import (
+ "math/bits"
+ "testing"
+)
+
+func BenchmarkSwitch8Predictable(b *testing.B) {
+ benchmarkSwitch8(b, true)
+}
+func BenchmarkSwitch8Unpredictable(b *testing.B) {
+ benchmarkSwitch8(b, false)
+}
+func benchmarkSwitch8(b *testing.B, predictable bool) {
+ n := 0
+ rng := newRNG()
+ for i := 0; i < b.N; i++ {
+ rng = rng.next(predictable)
+ switch rng.value() & 7 {
+ case 0:
+ n += 1
+ case 1:
+ n += 2
+ case 2:
+ n += 3
+ case 3:
+ n += 4
+ case 4:
+ n += 5
+ case 5:
+ n += 6
+ case 6:
+ n += 7
+ case 7:
+ n += 8
+ }
+ }
+ sink = n
+}
+
+func BenchmarkSwitch32Predictable(b *testing.B) {
+ benchmarkSwitch32(b, true)
+}
+func BenchmarkSwitch32Unpredictable(b *testing.B) {
+ benchmarkSwitch32(b, false)
+}
+func benchmarkSwitch32(b *testing.B, predictable bool) {
+ n := 0
+ rng := newRNG()
+ for i := 0; i < b.N; i++ {
+ rng = rng.next(predictable)
+ switch rng.value() & 31 {
+ case 0, 1, 2:
+ n += 1
+ case 4, 5, 6:
+ n += 2
+ case 8, 9, 10:
+ n += 3
+ case 12, 13, 14:
+ n += 4
+ case 16, 17, 18:
+ n += 5
+ case 20, 21, 22:
+ n += 6
+ case 24, 25, 26:
+ n += 7
+ case 28, 29, 30:
+ n += 8
+ default:
+ n += 9
+ }
+ }
+ sink = n
+}
+
+// A simple random number generator used to make switches conditionally predictable.
+type rng uint64
+
+func newRNG() rng {
+ return 1
+}
+func (r rng) next(predictable bool) rng {
+ if predictable {
+ return r + 1
+ }
+ return rng(bits.RotateLeft64(uint64(r), 13) * 0x3c374d)
+}
+func (r rng) value() uint64 {
+ return uint64(r)
+}
ir.OFALL,
ir.OGOTO,
ir.OLABEL,
+ ir.OJUMPTABLE,
ir.ODCL,
ir.ODCLCONST,
ir.ODCLTYPE,
"cmd/compile/internal/base"
"cmd/compile/internal/ir"
+ "cmd/compile/internal/ssagen"
"cmd/compile/internal/typecheck"
"cmd/compile/internal/types"
"cmd/internal/src"
}
func (s *exprSwitch) search(cc []exprClause, out *ir.Nodes) {
+ if s.tryJumpTable(cc, out) {
+ return
+ }
binarySearch(len(cc), out,
func(i int) ir.Node {
return ir.NewBinaryExpr(base.Pos, ir.OLE, s.exprname, cc[i-1].hi)
)
}
+// Try to implement the clauses with a jump table. Returns true if successful.
+func (s *exprSwitch) tryJumpTable(cc []exprClause, out *ir.Nodes) bool {
+ const go119UseJumpTables = true
+ const minCases = 8 // have at least minCases cases in the switch
+ const minDensity = 4 // use at least 1 out of every minDensity entries
+
+ if !go119UseJumpTables || !ssagen.Arch.LinkArch.CanJumpTable {
+ return false
+ }
+ if len(cc) < minCases {
+ return false // not enough cases for it to be worth it
+ }
+ if cc[0].lo.Val().Kind() != constant.Int {
+ return false // e.g. float
+ }
+ if s.exprname.Type().Size() > int64(types.PtrSize) {
+ return false // 64-bit switches on 32-bit archs
+ }
+ min := cc[0].lo.Val()
+ max := cc[len(cc)-1].hi.Val()
+ width := constant.BinaryOp(constant.BinaryOp(max, token.SUB, min), token.ADD, constant.MakeInt64(1))
+ limit := constant.MakeInt64(int64(len(cc)) * minDensity)
+ if constant.Compare(width, token.GTR, limit) {
+ // We disable jump tables if we use less than a minimum fraction of the entries.
+ // i.e. for switch x {case 0: case 1000: case 2000:} we don't want to use a jump table.
+ return false
+ }
+ jt := ir.NewJumpTableStmt(base.Pos, s.exprname)
+ for _, c := range cc {
+ jmp := c.jmp.(*ir.BranchStmt)
+ if jmp.Op() != ir.OGOTO || jmp.Label == nil {
+ panic("bad switch case body")
+ }
+ for i := c.lo.Val(); constant.Compare(i, token.LEQ, c.hi.Val()); i = constant.BinaryOp(i, token.ADD, constant.MakeInt64(1)) {
+ jt.Cases = append(jt.Cases, i)
+ jt.Targets = append(jt.Targets, jmp.Label)
+ }
+ }
+ out.Append(jt)
+ // TODO: handle the size portion of string switches using a jump table.
+ return true
+}
+
func (c *exprClause) test(exprname ir.Node) ir.Node {
// Integer range.
if c.hi != c.lo {
// then cases before i will be tested; otherwise, cases i and later.
//
// leaf(i, nif) should setup nif (an OIF node) to test case i. In
-// particular, it should set nif.Left and nif.Nbody.
+// particular, it should set nif.Cond and nif.Body.
func binarySearch(n int, out *ir.Nodes, less func(i int) ir.Node, leaf func(i int, nif *ir.IfStmt)) {
const binarySearchMin = 4 // minimum number of cases for binary search
ArgInfo *LSym // argument info for traceback
ArgLiveInfo *LSym // argument liveness info for traceback
WrapInfo *LSym // for wrapper, info of wrapped function
+ JumpTables []JumpTable
FuncInfoSym *LSym
}
+// JumpTable represents a table used for implementing multi-way
+// computed branching, used typically for implementing switches.
+// Sym is the table itself, and Targets is a list of target
+// instructions to go to for the computed branch index.
+type JumpTable struct {
+ Sym *LSym
+ Targets []*Prog
+}
+
// NewFuncInfo allocates and returns a FuncInfo for LSym.
func (s *LSym) NewFuncInfo() *FuncInfo {
if s.Extra != nil {
}
obj.MarkUnsafePoints(ctxt, s.Func().Text, newprog, useTLS, nil)
}
+
+ // Now that we know byte offsets, we can generate jump table entries.
+ // TODO: could this live in obj instead of obj/$ARCH?
+ for _, jt := range s.Func().JumpTables {
+ for i, p := range jt.Targets {
+ // The ith jumptable entry points to the p.Pc'th
+ // byte in the function symbol s.
+ jt.Sym.WriteAddr(ctxt, int64(i)*8, 8, s, p.Pc)
+ }
+ }
}
func instinit(ctxt *obj.Link) {
// can combine adjacent loads into a single larger, possibly unaligned, load.
// Note that currently the optimizations must be able to handle little endian byte order.
CanMergeLoads bool
+
+ // CanJumpTable reports whether the backend can handle
+ // compiling a jump table.
+ CanJumpTable bool
}
// InFamily reports whether a is a member of any of the specified
MinLC: 1,
Alignment: 1,
CanMergeLoads: true,
+ CanJumpTable: true,
}
var ArchARM = &Arch{