1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
9 // an edgeMemCtr records a backedge, together with the memory and
10 // counter phi functions at the target of the backedge that must
11 // be updated when a rescheduling check replaces the backedge.
12 type edgeMemCtr struct {
14 m *Value // phi for memory at dest of e
15 c *Value // phi for counter at dest of e
18 // a rewriteTarget is a a value-argindex pair indicating
19 // where a rewrite is applied. Note that this is for values,
20 // not for block controls, because block controls are not targets
21 // for the rewrites performed in inserting rescheduling checks.
22 type rewriteTarget struct {
28 before, after *Value // before is the expected value before rewrite, after is the new value installed.
29 rewrites []rewriteTarget // all the targets for this rewrite.
32 func (r *rewrite) String() string {
33 s := "\n\tbefore=" + r.before.String() + ", after=" + r.after.String()
34 for _, rw := range r.rewrites {
35 s += ", (i=" + fmt.Sprint(rw.i) + ", v=" + rw.v.LongString() + ")"
41 const initialRescheduleCounterValue = 1021 // Largest 10-bit prime. 97 nSec loop bodies will check every 100 uSec.
43 // insertLoopReschedChecks inserts rescheduling checks on loop backedges.
44 func insertLoopReschedChecks(f *Func) {
45 // TODO: when split information is recorded in export data, insert checks only on backedges that can be reached on a split-call-free path.
47 // Loop reschedule checks decrement a per-function counter
48 // shared by all loops, and when the counter becomes non-positive
49 // a call is made to a rescheduling check in the runtime.
52 // 1. locate backedges.
53 // 2. Record memory definitions at block end so that
54 // the SSA graph for mem can be prperly modified.
55 // 3. Define a counter and record its future uses (at backedges)
56 // (Same process as 2, applied to a single definition of the counter.
57 // difference for mem is that there are zero-to-many existing mem
58 // definitions, versus exactly one for the new counter.)
59 // 4. Ensure that phi functions that will-be-needed for mem and counter
60 // are present in the graph, initially with trivial inputs.
61 // 5. Record all to-be-modified uses of mem and counter;
62 // apply modifications (split into two steps to simplify and
63 // avoided nagging order-dependences).
64 // 6. Rewrite backedges to include counter check, reschedule check,
65 // and modify destination phi function appropriately with new
66 // definitions for mem and counter.
68 if f.NoSplit { // nosplit functions don't reschedule.
72 backedges := backedges(f)
73 if len(backedges) == 0 { // no backedges means no rescheduling checks.
77 lastMems := findLastMems(f)
83 fmt.Printf("before %s = %s\n", f.Name, sdom.treestructure(f.Entry))
86 tofixBackedges := []edgeMemCtr{}
88 for _, e := range backedges { // TODO: could filter here by calls in loops, if declared and inferred nosplit are recorded in export data.
89 tofixBackedges = append(tofixBackedges, edgeMemCtr{e, nil, nil})
92 // It's possible that there is no memory state (no global/pointer loads/stores or calls)
93 if lastMems[f.Entry.ID] == nil {
94 lastMems[f.Entry.ID] = f.Entry.NewValue0(f.Entry.Pos, OpInitMem, TypeMem)
97 memDefsAtBlockEnds := make([]*Value, f.NumBlocks()) // For each block, the mem def seen at its bottom. Could be from earlier block.
99 // Propagate last mem definitions forward through successor blocks.
101 for i := len(po) - 1; i >= 0; i-- {
103 mem := lastMems[b.ID]
104 for j := 0; mem == nil; j++ { // if there's no def, then there's no phi, so the visible mem is identical in all predecessors.
105 // loop because there might be backedges that haven't been visited yet.
106 mem = memDefsAtBlockEnds[b.Preds[j].b.ID]
108 memDefsAtBlockEnds[b.ID] = mem
111 // Set up counter. There are no phis etc pre-existing for it.
112 counter0 := f.Entry.NewValue0I(f.Entry.Pos, OpConst32, f.Config.fe.TypeInt32(), initialRescheduleCounterValue)
113 ctrDefsAtBlockEnds := make([]*Value, f.NumBlocks()) // For each block, def visible at its end, if that def will be used.
115 // There's a minor difference between memDefsAtBlockEnds and ctrDefsAtBlockEnds;
116 // because the counter only matter for loops and code that reaches them, it is nil for blocks where the ctr is no
117 // longer live. This will avoid creation of dead phi functions. This optimization is ignored for the mem variable
118 // because it is harder and also less likely to be helpful, though dead code elimination ought to clean this out anyhow.
120 for _, emc := range tofixBackedges {
122 // set initial uses of counter zero (note available-at-bottom and use are the same thing initially.)
123 // each back-edge will be rewritten to include a reschedule check, and that will use the counter.
124 src := e.b.Preds[e.i].b
125 ctrDefsAtBlockEnds[src.ID] = counter0
128 // Push uses towards root
129 for _, b := range f.postorder() {
130 bd := ctrDefsAtBlockEnds[b.ID]
134 for _, e := range b.Preds {
136 if ctrDefsAtBlockEnds[p.ID] == nil {
137 ctrDefsAtBlockEnds[p.ID] = bd
142 // Maps from block to newly-inserted phi function in block.
143 newmemphis := make(map[*Block]rewrite)
144 newctrphis := make(map[*Block]rewrite)
146 // Insert phi functions as necessary for future changes to flow graph.
147 for i, emc := range tofixBackedges {
151 // find the phi function for the memory input at "h", if there is one.
152 var headerMemPhi *Value // look for header mem phi
154 for _, v := range h.Values {
155 if v.Op == OpPhi && v.Type.IsMemory() {
160 if headerMemPhi == nil {
161 // if the header is nil, make a trivial phi from the dominator
162 mem0 := memDefsAtBlockEnds[idom[h.ID].ID]
163 headerMemPhi = newPhiFor(h, mem0)
164 newmemphis[h] = rewrite{before: mem0, after: headerMemPhi}
165 addDFphis(mem0, h, h, f, memDefsAtBlockEnds, newmemphis)
168 tofixBackedges[i].m = headerMemPhi
170 var headerCtrPhi *Value
171 rw, ok := newctrphis[h]
173 headerCtrPhi = newPhiFor(h, counter0)
174 newctrphis[h] = rewrite{before: counter0, after: headerCtrPhi}
175 addDFphis(counter0, h, h, f, ctrDefsAtBlockEnds, newctrphis)
177 headerCtrPhi = rw.after
179 tofixBackedges[i].c = headerCtrPhi
182 rewriteNewPhis(f.Entry, f.Entry, f, memDefsAtBlockEnds, newmemphis)
183 rewriteNewPhis(f.Entry, f.Entry, f, ctrDefsAtBlockEnds, newctrphis)
185 if f.pass.debug > 0 {
186 for b, r := range newmemphis {
187 fmt.Printf("b=%s, rewrite=%s\n", b, r.String())
190 for b, r := range newctrphis {
191 fmt.Printf("b=%s, rewrite=%s\n", b, r.String())
195 // Apply collected rewrites.
196 for _, r := range newmemphis {
197 for _, rw := range r.rewrites {
198 rw.v.SetArg(rw.i, r.after)
202 for _, r := range newctrphis {
203 for _, rw := range r.rewrites {
204 rw.v.SetArg(rw.i, r.after)
208 zero := f.Entry.NewValue0I(f.Entry.Pos, OpConst32, f.Config.fe.TypeInt32(), 0)
209 one := f.Entry.NewValue0I(f.Entry.Pos, OpConst32, f.Config.fe.TypeInt32(), 1)
211 // Rewrite backedges to include reschedule checks.
212 for _, emc := range tofixBackedges {
214 headerMemPhi := emc.m
215 headerCtrPhi := emc.c
220 mem0 := headerMemPhi.Args[i]
221 ctr0 := headerCtrPhi.Args[i]
223 // Because we're going to insert a rare-call, make sure the
224 // looping edge still looks likely.
225 likely := BranchLikely
227 likely = BranchUnlikely
231 // rewrite edge to include reschedule check
234 // bb.Succs[p.i] == Edge{h, i}
235 // h.Preds[i] == p == Edge{bb,p.i}
240 // if ctr1 <= 0 { goto sched }
243 // mem1 := call resched (mem0)
246 // ctr2 := phi(ctr1, counter0) // counter0 is the constant
247 // mem2 := phi(mem0, mem1)
250 // and correct arg i of headerMemPhi and headerCtrPhi
252 // EXCEPT: block containing only phi functions is bad
253 // for the register allocator. Therefore, there is no
254 // join, and instead branches targeting join instead target
255 // the header, and the other phi functions within header are
256 // adjusted for the additional input.
258 test := f.NewBlock(BlockIf)
259 sched := f.NewBlock(BlockPlain)
265 // if ctr1 <= 0 { goto sched }
267 ctr1 := test.NewValue2(bb.Pos, OpSub32, f.Config.fe.TypeInt32(), ctr0, one)
268 cmp := test.NewValue2(bb.Pos, OpLeq32, f.Config.fe.TypeBool(), ctr1, zero)
270 test.AddEdgeTo(sched) // if true
271 // if false -- rewrite edge to header.
272 // do NOT remove+add, because that will perturb all the other phi functions
273 // as well as messing up other edges to the header.
274 test.Succs = append(test.Succs, Edge{h, i})
275 h.Preds[i] = Edge{test, 1}
276 headerMemPhi.SetArg(i, mem0)
277 headerCtrPhi.SetArg(i, ctr1)
279 test.Likely = BranchUnlikely
282 // mem1 := call resched (mem0)
284 resched := f.Config.fe.Syslook("goschedguarded")
285 mem1 := sched.NewValue1A(bb.Pos, OpStaticCall, TypeMem, resched, mem0)
287 headerMemPhi.AddArg(mem1)
288 headerCtrPhi.AddArg(counter0)
290 bb.Succs[p.i] = Edge{test, 0}
291 test.Preds = append(test.Preds, Edge{bb, p.i})
293 // Must correct all the other phi functions in the header for new incoming edge.
294 // Except for mem and counter phis, it will be the same value seen on the original
295 // backedge at index i.
296 for _, v := range h.Values {
297 if v.Op == OpPhi && v != headerMemPhi && v != headerCtrPhi {
305 if f.pass.debug > 2 {
306 sdom = newSparseTree(f, f.Idom())
307 fmt.Printf("after %s = %s\n", f.Name, sdom.treestructure(f.Entry))
313 // newPhiFor inserts a new Phi function into b,
314 // with all inputs set to v.
315 func newPhiFor(b *Block, v *Value) *Value {
316 phiV := b.NewValue0(b.Pos, OpPhi, v.Type)
324 // rewriteNewPhis updates newphis[h] to record all places where the new phi function inserted
325 // in block h will replace a previous definition. Block b is the block currently being processed;
326 // if b has its own phi definition then it takes the place of h.
327 // defsForUses provides information about other definitions of the variable that are present
328 // (and if nil, indicates that the variable is no longer live)
329 func rewriteNewPhis(h, b *Block, f *Func, defsForUses []*Value, newphis map[*Block]rewrite) {
330 // If b is a block with a new phi, then a new rewrite applies below it in the dominator tree.
331 if _, ok := newphis[b]; ok {
338 // Apply rewrites to this block
339 if x != nil { // don't waste time on the common case of no definition.
340 p := &change.rewrites
341 for _, v := range b.Values {
342 if v == y { // don't rewrite self -- phi inputs are handled below.
345 for i, w := range v.Args {
349 *p = append(*p, rewriteTarget{v, i})
353 // Rewrite appropriate inputs of phis reached in successors
354 // in dominance frontier, self, and dominated.
355 // If the variable def reaching uses in b is itself defined in b, then the new phi function
356 // does not reach the successors of b. (This assumes a bit about the structure of the
357 // phi use-def graph, but it's true for memory and the inserted counter.)
358 if dfu := defsForUses[b.ID]; dfu != nil && dfu.Block != b {
359 for _, e := range b.Succs {
361 if sphi, ok := newphis[s]; ok { // saves time to find the phi this way.
362 *p = append(*p, rewriteTarget{sphi.after, e.i})
365 for _, v := range s.Values {
366 if v.Op == OpPhi && v.Args[e.i] == x {
367 *p = append(*p, rewriteTarget{v, e.i})
378 for c := sdom[b.ID].child; c != nil; c = sdom[c.ID].sibling {
379 rewriteNewPhis(h, c, f, defsForUses, newphis) // TODO: convert to explicit stack from recursion.
383 // addDFphis creates new trivial phis that are necessary to correctly reflect (within SSA)
384 // a new definition for variable "x" inserted at h (usually but not necessarily a phi).
385 // These new phis can only occur at the dominance frontier of h; block s is in the dominance
386 // frontier of h if h does not strictly dominate s and if s is a successor of a block b where
387 // either b = h or h strictly dominates b.
388 // These newly created phis are themselves new definitions that may require addition of their
389 // own trivial phi functions in their own dominance frontier, and this is handled recursively.
390 func addDFphis(x *Value, h, b *Block, f *Func, defForUses []*Value, newphis map[*Block]rewrite) {
391 oldv := defForUses[b.ID]
392 if oldv != x { // either a new definition replacing x, or nil if it is proven that there are no uses reachable from b
398 for _, e := range b.Succs {
400 // check phi functions in the dominance frontier
401 if sdom.isAncestor(h, s) {
402 continue // h dominates s, successor of b, therefore s is not in the frontier.
404 if _, ok := newphis[s]; ok {
405 continue // successor s of b already has a new phi function, so there is no need to add another.
408 for _, v := range s.Values {
409 if v.Op == OpPhi && v.Args[e.i] == x {
410 continue outer // successor s of b has an old phi function, so there is no need to add another.
415 old := defForUses[idom[s.ID].ID] // new phi function is correct-but-redundant, combining value "old" on all inputs.
416 headerPhi := newPhiFor(s, old)
417 // the new phi will replace "old" in block s and all blocks dominated by s.
418 newphis[s] = rewrite{before: old, after: headerPhi} // record new phi, to have inputs labeled "old" rewritten to "headerPhi"
419 addDFphis(old, s, s, f, defForUses, newphis) // the new definition may also create new phi functions.
421 for c := sdom[b.ID].child; c != nil; c = sdom[c.ID].sibling {
422 addDFphis(x, h, c, f, defForUses, newphis) // TODO: convert to explicit stack from recursion.
426 // findLastMems maps block ids to last memory-output op in a block, if any
427 func findLastMems(f *Func) []*Value {
430 lastMems := make([]*Value, f.NumBlocks())
431 storeUse := f.newSparseSet(f.NumValues())
432 defer f.retSparseSet(storeUse)
433 for _, b := range f.Blocks {
434 // Find all the stores in this block. Categorize their uses:
435 // storeUse contains stores which are used by a subsequent store.
439 for _, v := range b.Values {
441 if v.Type.IsMemory() {
446 if v.Type.IsMemory() {
447 stores = append(stores, v)
448 if v.Op == OpSelect1 {
449 // Use the arg of the tuple-generating op.
452 for _, a := range v.Args {
453 if a.Block == b && a.Type.IsMemory() {
459 if len(stores) == 0 {
460 lastMems[b.ID] = memPhi
464 // find last store in the block
466 for _, v := range stores {
467 if storeUse.contains(v.ID) {
471 b.Fatalf("two final stores - simultaneous live stores %s %s", last, v)
476 b.Fatalf("no last store found - cycle?")
478 lastMems[b.ID] = last
483 type backedgesState struct {
488 // backedges returns a slice of successor edges that are back
489 // edges. For reducible loops, edge.b is the header.
490 func backedges(f *Func) []Edge {
492 mark := make([]markKind, f.NumBlocks())
493 stack := []backedgesState{}
495 mark[f.Entry.ID] = notExplored
496 stack = append(stack, backedgesState{f.Entry, 0})
501 if x.i < len(x.b.Succs) {
505 if mark[s.ID] == notFound {
506 mark[s.ID] = notExplored
507 stack = append(stack, backedgesState{s, 0})
508 } else if mark[s.ID] == notExplored {
509 edges = append(edges, e)
513 stack = stack[0 : l-1]