1 // Copyright 2015 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // Register allocation.
7 // We use a version of a linear scan register allocator. We treat the
8 // whole function as a single long basic block and run through
9 // it using a greedy register allocator. Then all merge edges
10 // (those targeting a block with len(Preds)>1) are processed to
11 // shuffle data into the place that the target of the edge expects.
13 // The greedy allocator moves values into registers just before they
14 // are used, spills registers only when necessary, and spills the
15 // value whose next use is farthest in the future.
17 // The register allocator requires that a block is not scheduled until
18 // at least one of its predecessors have been scheduled. The most recent
19 // such predecessor provides the starting register state for a block.
21 // It also requires that there are no critical edges (critical =
22 // comes from a block with >1 successor and goes to a block with >1
23 // predecessor). This makes it easy to add fixup code on merge edges -
24 // the source of a merge edge has only one successor, so we can add
25 // fixup code to the end of that block.
29 // During the normal course of the allocator, we might throw a still-live
30 // value out of all registers. When that value is subsequently used, we must
31 // load it from a slot on the stack. We must also issue an instruction to
32 // initialize that stack location with a copy of v.
40 // (1) v = Op ... : AX // computes v, store result in AX
41 // s = StoreReg v // spill v to a stack slot
42 // (2) x = Op ... : AX // some other op uses AX
43 // c = LoadReg s : CX // restore v from stack slot
44 // (3) ... = Op c ... // use the restored value
46 // Allocation occurs normally until we reach (3) and we realize we have
47 // a use of v and it isn't in any register. At that point, we allocate
48 // a spill (a StoreReg) for v. We can't determine the correct place for
49 // the spill at this point, so we allocate the spill as blockless initially.
50 // The restore is then generated to load v back into a register so it can
51 // be used. Subsequent uses of v will use the restored value c instead.
53 // What remains is the question of where to schedule the spill.
54 // During allocation, we keep track of the dominator of all restores of v.
55 // The spill of v must dominate that block. The spill must also be issued at
56 // a point where v is still in a register.
58 // To find the right place, start at b, the block which dominates all restores.
59 // - If b is v.Block, then issue the spill right after v.
60 // It is known to be in a register at that point, and dominates any restores.
61 // - Otherwise, if v is in a register at the start of b,
62 // put the spill of v at the start of b.
63 // - Otherwise, set b = immediate dominator of b, and repeat.
65 // Phi values are special, as always. We define two kinds of phis, those
66 // where the merge happens in a register (a "register" phi) and those where
67 // the merge happens in a stack location (a "stack" phi).
69 // A register phi must have the phi and all of its inputs allocated to the
70 // same register. Register phis are spilled similarly to regular ops.
72 // A stack phi must have the phi and all of its inputs allocated to the same
73 // stack location. Stack phis start out life already spilled - each phi
74 // input must be a store (using StoreReg) at the end of the corresponding
76 // b1: y = ... : AX b2: z = ... : BX
77 // y2 = StoreReg y z2 = StoreReg z
79 // b3: x = phi(y2, z2)
80 // The stack allocator knows that StoreReg args of stack-allocated phis
81 // must be allocated to the same stack slot as the phi that uses them.
82 // x is now a spilled value and a restore must appear before its first use.
86 // Use an affinity graph to mark two values which should use the
87 // same register. This affinity graph will be used to prefer certain
88 // registers for allocation. This affinity helps eliminate moves that
89 // are required for phi implementations and helps generate allocations
90 // for 2-register architectures.
92 // Note: regalloc generates a not-quite-SSA output. If we have:
96 // ... AX gets reused for something else ...
97 // if ... goto b3 else b4
99 // b3: x3 = LoadReg x2 : BX b4: x4 = LoadReg x2 : CX
100 // ... use x3 ... ... use x4 ...
102 // b2: ... use x3 ...
104 // If b3 is the primary predecessor of b2, then we use x3 in b2 and
105 // add a x4:CX->BX copy at the end of b4.
106 // But the definition of x3 doesn't dominate b2. We should really
107 // insert an extra phi at the start of b2 (x5=phi(x3,x4):BX) to keep
108 // SSA form. For now, we ignore this problem as remaining in strict
109 // SSA form isn't needed after regalloc. We'll just leave the use
110 // of x3 not dominated by the definition of x3, and the CX->BX copy
111 // will have no use (so don't run deadcode after regalloc!).
112 // TODO: maybe we should introduce these extra phis?
117 "cmd/compile/internal/base"
118 "cmd/compile/internal/ir"
119 "cmd/compile/internal/types"
135 // distance is a measure of how far into the future values are used.
136 // distance is measured in units of instructions.
140 unlikelyDistance = 100
143 // regalloc performs register allocation on f. It sets f.RegAlloc
144 // to the resulting allocation.
145 func regalloc(f *Func) {
153 const noRegister register = 255
155 // For bulk initializing
156 var noRegisters [32]register = [32]register{
157 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
158 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
159 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
160 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
163 // A regMask encodes a set of machine registers.
164 // TODO: regMask -> regSet?
167 func (m regMask) String() string {
169 for r := register(0); m != 0; r++ {
173 m &^= regMask(1) << r
177 s += fmt.Sprintf("r%d", r)
182 func (s *regAllocState) RegMaskString(m regMask) string {
184 for r := register(0); m != 0; r++ {
188 m &^= regMask(1) << r
192 str += s.registers[r].String()
197 // countRegs returns the number of set bits in the register mask.
198 func countRegs(r regMask) int {
199 return bits.OnesCount64(uint64(r))
202 // pickReg picks an arbitrary register from the register mask.
203 func pickReg(r regMask) register {
205 panic("can't pick a register from an empty set")
207 // pick the lowest one
208 return register(bits.TrailingZeros64(uint64(r)))
212 dist int32 // distance from start of the block to a use of a value
213 pos src.XPos // source position of the use
214 next *use // linked list of uses of a value in nondecreasing dist order
217 // A valState records the register allocation state for a (pre-regalloc) value.
218 type valState struct {
219 regs regMask // the set of registers holding a Value (usually just one)
220 uses *use // list of uses in this block
221 spill *Value // spilled copy of the Value (if any)
222 restoreMin int32 // minimum of all restores' blocks' sdom.entry
223 restoreMax int32 // maximum of all restores' blocks' sdom.exit
224 needReg bool // cached value of !v.Type.IsMemory() && !v.Type.IsVoid() && !.v.Type.IsFlags()
225 rematerializeable bool // cached value of v.rematerializeable()
228 type regState struct {
229 v *Value // Original (preregalloc) Value stored in this register.
230 c *Value // A Value equal to v which is currently in a register. Might be v or a copy of it.
231 // If a register is unused, v==c==nil
234 type regAllocState struct {
245 // live values at the end of each block. live[b.ID] is a list of value IDs
246 // which are live at the end of b, together with a count of how many instructions
247 // forward to the next use.
249 // desired register assignments at the end of each block.
250 // Note that this is a static map computed before allocation occurs. Dynamic
251 // register desires (from partially completed allocations) will trump
253 desired []desiredState
255 // current state of each (preregalloc) Value
258 // ID of SP, SB values
261 // For each Value, map from its value ID back to the
262 // preregalloc Value it was derived from.
265 // current state of each register
268 // registers that contain values which can't be kicked out
271 // mask of registers currently in use
274 // mask of registers used in the current instruction
277 // current block we're working on
280 // cache of use records
283 // endRegs[blockid] is the register state at the end of each block.
284 // encoded as a set of endReg records.
287 // startRegs[blockid] is the register state at the start of merge blocks.
288 // saved state does not include the state of phi ops in the block.
289 startRegs [][]startReg
291 // spillLive[blockid] is the set of live spills at the end of each block
294 // a set of copies we generated to move things around, and
295 // whether it is used in shuffle. Unused copies will be deleted.
296 copies map[*Value]bool
300 // choose a good order in which to visit blocks for allocation purposes.
303 // blockOrder[b.ID] corresponds to the index of block b in visitOrder.
306 // whether to insert instructions that clobber dead registers at call sites
312 v *Value // pre-regalloc value held in this register (TODO: can we use ID here?)
313 c *Value // cached version of the value
316 type startReg struct {
318 v *Value // pre-regalloc value needed in this register
319 c *Value // cached version of the value
320 pos src.XPos // source position of use of this register
323 // freeReg frees up register r. Any current user of r is kicked out.
324 func (s *regAllocState) freeReg(r register) {
327 s.f.Fatalf("tried to free an already free register %d\n", r)
331 if s.f.pass.debug > regDebug {
332 fmt.Printf("freeReg %s (dump %s/%s)\n", &s.registers[r], v, s.regs[r].c)
334 s.regs[r] = regState{}
335 s.values[v.ID].regs &^= regMask(1) << r
336 s.used &^= regMask(1) << r
339 // freeRegs frees up all registers listed in m.
340 func (s *regAllocState) freeRegs(m regMask) {
342 s.freeReg(pickReg(m & s.used))
346 // clobberRegs inserts instructions that clobber registers listed in m.
347 func (s *regAllocState) clobberRegs(m regMask) {
348 m &= s.allocatable & s.f.Config.gpRegMask // only integer register can contain pointers, only clobber them
352 x := s.curBlock.NewValue0(src.NoXPos, OpClobberReg, types.TypeVoid)
353 s.f.setHome(x, &s.registers[r])
357 // setOrig records that c's original value is the same as
358 // v's original value.
359 func (s *regAllocState) setOrig(c *Value, v *Value) {
360 for int(c.ID) >= len(s.orig) {
361 s.orig = append(s.orig, nil)
363 if s.orig[c.ID] != nil {
364 s.f.Fatalf("orig value set twice %s %s", c, v)
366 s.orig[c.ID] = s.orig[v.ID]
369 // assignReg assigns register r to hold c, a copy of v.
371 func (s *regAllocState) assignReg(r register, v *Value, c *Value) {
372 if s.f.pass.debug > regDebug {
373 fmt.Printf("assignReg %s %s/%s\n", &s.registers[r], v, c)
375 if s.regs[r].v != nil {
376 s.f.Fatalf("tried to assign register %d to %s/%s but it is already used by %s", r, v, c, s.regs[r].v)
380 s.regs[r] = regState{v, c}
381 s.values[v.ID].regs |= regMask(1) << r
382 s.used |= regMask(1) << r
383 s.f.setHome(c, &s.registers[r])
386 // allocReg chooses a register from the set of registers in mask.
387 // If there is no unused register, a Value will be kicked out of
388 // a register to make room.
389 func (s *regAllocState) allocReg(mask regMask, v *Value) register {
394 mask &= s.allocatable
397 s.f.Fatalf("no register available for %s", v.LongString())
400 // Pick an unused register if one is available.
401 if mask&^s.used != 0 {
402 return pickReg(mask &^ s.used)
405 // Pick a value to spill. Spill the value with the
406 // farthest-in-the-future use.
407 // TODO: Prefer registers with already spilled Values?
408 // TODO: Modify preference using affinity graph.
409 // TODO: if a single value is in multiple registers, spill one of them
410 // before spilling a value in just a single register.
412 // Find a register to spill. We spill the register containing the value
413 // whose next use is as far in the future as possible.
414 // https://en.wikipedia.org/wiki/Page_replacement_algorithm#The_theoretically_optimal_page_replacement_algorithm
417 for t := register(0); t < s.numRegs; t++ {
422 if n := s.values[v.ID].uses.dist; n > maxuse {
423 // v's next use is farther in the future than any value
424 // we've seen so far. A new best spill candidate.
430 s.f.Fatalf("couldn't find register to spill")
433 if s.f.Config.ctxt.Arch.Arch == sys.ArchWasm {
434 // TODO(neelance): In theory this should never happen, because all wasm registers are equal.
435 // So if there is still a free register, the allocation should have picked that one in the first place instead of
436 // trying to kick some other value out. In practice, this case does happen and it breaks the stack optimization.
441 // Try to move it around before kicking out, if there is a free register.
442 // We generate a Copy and record it. It will be deleted if never used.
444 m := s.compatRegs(v2.Type) &^ s.used &^ s.tmpused &^ (regMask(1) << r)
445 if m != 0 && !s.values[v2.ID].rematerializeable && countRegs(s.values[v2.ID].regs) == 1 {
447 c := s.curBlock.NewValue1(v2.Pos, OpCopy, v2.Type, s.regs[r].c)
449 if s.f.pass.debug > regDebug {
450 fmt.Printf("copy %s to %s : %s\n", v2, c, &s.registers[r2])
453 s.assignReg(r2, v2, c)
459 // makeSpill returns a Value which represents the spilled value of v.
460 // b is the block in which the spill is used.
461 func (s *regAllocState) makeSpill(v *Value, b *Block) *Value {
462 vi := &s.values[v.ID]
464 // Final block not known - keep track of subtree where restores reside.
465 vi.restoreMin = min32(vi.restoreMin, s.sdom[b.ID].entry)
466 vi.restoreMax = max32(vi.restoreMax, s.sdom[b.ID].exit)
469 // Make a spill for v. We don't know where we want
470 // to put it yet, so we leave it blockless for now.
471 spill := s.f.newValueNoBlock(OpStoreReg, v.Type, v.Pos)
472 // We also don't know what the spill's arg will be.
473 // Leave it argless for now.
476 vi.restoreMin = s.sdom[b.ID].entry
477 vi.restoreMax = s.sdom[b.ID].exit
481 // allocValToReg allocates v to a register selected from regMask and
482 // returns the register copy of v. Any previous user is kicked out and spilled
483 // (if necessary). Load code is added at the current pc. If nospill is set the
484 // allocated register is marked nospill so the assignment cannot be
485 // undone until the caller allows it by clearing nospill. Returns a
486 // *Value which is either v or a copy of v allocated to the chosen register.
487 func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, pos src.XPos) *Value {
488 if s.f.Config.ctxt.Arch.Arch == sys.ArchWasm && v.rematerializeable() {
489 c := v.copyIntoWithXPos(s.curBlock, pos)
498 vi := &s.values[v.ID]
499 pos = pos.WithNotStmt()
500 // Check if v is already in a requested register.
501 if mask&vi.regs != 0 {
502 r := pickReg(mask & vi.regs)
503 if s.regs[r].v != v || s.regs[r].c == nil {
504 panic("bad register state")
507 s.nospill |= regMask(1) << r
513 // If nospill is set, the value is used immediately, so it can live on the WebAssembly stack.
514 onWasmStack := nospill && s.f.Config.ctxt.Arch.Arch == sys.ArchWasm
516 // Allocate a register.
517 r = s.allocReg(mask, v)
520 // Allocate v to the new register.
523 // Copy from a register that v is already in.
524 r2 := pickReg(vi.regs)
525 if s.regs[r2].v != v {
526 panic("bad register state")
528 c = s.curBlock.NewValue1(pos, OpCopy, v.Type, s.regs[r2].c)
529 } else if v.rematerializeable() {
530 // Rematerialize instead of loading from the spill location.
531 c = v.copyIntoWithXPos(s.curBlock, pos)
533 // Load v from its spill location.
534 spill := s.makeSpill(v, s.curBlock)
535 if s.f.pass.debug > logSpills {
536 s.f.Warnl(vi.spill.Pos, "load spill for %v from %v", v, spill)
538 c = s.curBlock.NewValue1(pos, OpLoadReg, v.Type, spill)
549 if c.Op == OpLoadReg && s.isGReg(r) {
550 s.f.Fatalf("allocValToReg.OpLoadReg targeting g: " + c.LongString())
553 s.nospill |= regMask(1) << r
558 // isLeaf reports whether f performs any calls.
559 func isLeaf(f *Func) bool {
560 for _, b := range f.Blocks {
561 for _, v := range b.Values {
562 if opcodeTable[v.Op].call {
570 func (s *regAllocState) init(f *Func) {
572 s.f.RegAlloc = s.f.Cache.locs[:0]
573 s.registers = f.Config.registers
574 if nr := len(s.registers); nr == 0 || nr > int(noRegister) || nr > int(unsafe.Sizeof(regMask(0))*8) {
575 s.f.Fatalf("bad number of registers: %d", nr)
577 s.numRegs = register(nr)
579 // Locate SP, SB, and g registers.
583 for r := register(0); r < s.numRegs; r++ {
584 switch s.registers[r].String() {
593 // Make sure we found all required registers.
596 s.f.Fatalf("no SP register found")
598 s.f.Fatalf("no SB register found")
600 if f.Config.hasGReg {
601 s.f.Fatalf("no g register found")
605 // Figure out which registers we're allowed to use.
606 s.allocatable = s.f.Config.gpRegMask | s.f.Config.fpRegMask | s.f.Config.specialRegMask
607 s.allocatable &^= 1 << s.SPReg
608 s.allocatable &^= 1 << s.SBReg
609 if s.f.Config.hasGReg {
610 s.allocatable &^= 1 << s.GReg
612 if buildcfg.FramePointerEnabled && s.f.Config.FPReg >= 0 {
613 s.allocatable &^= 1 << uint(s.f.Config.FPReg)
615 if s.f.Config.LinkReg != -1 {
617 // Leaf functions don't save/restore the link register.
618 s.allocatable &^= 1 << uint(s.f.Config.LinkReg)
621 if s.f.Config.ctxt.Flag_dynlink {
622 switch s.f.Config.arch {
624 s.allocatable &^= 1 << 15 // R15
626 s.allocatable &^= 1 << 9 // R9
627 case "ppc64le": // R2 already reserved.
633 // Note that for Flag_shared (position independent code)
634 // we do need to be careful, but that carefulness is hidden
635 // in the rewrite rules so we always have a free register
636 // available for global load/stores. See gen/386.rules (search for Flag_shared).
638 s.allocatable &^= 1 << 11 // R11
640 s.f.fe.Fatalf(src.NoXPos, "arch %s not implemented", s.f.Config.arch)
644 // Linear scan register allocation can be influenced by the order in which blocks appear.
645 // Decouple the register allocation order from the generated block order.
646 // This also creates an opportunity for experiments to find a better order.
647 s.visitOrder = layoutRegallocOrder(f)
649 // Compute block order. This array allows us to distinguish forward edges
650 // from backward edges and compute how far they go.
651 s.blockOrder = make([]int32, f.NumBlocks())
652 for i, b := range s.visitOrder {
653 s.blockOrder[b.ID] = int32(i)
656 s.regs = make([]regState, s.numRegs)
658 if cap(s.f.Cache.regallocValues) >= nv {
659 s.f.Cache.regallocValues = s.f.Cache.regallocValues[:nv]
661 s.f.Cache.regallocValues = make([]valState, nv)
663 s.values = s.f.Cache.regallocValues
664 s.orig = make([]*Value, nv)
665 s.copies = make(map[*Value]bool)
666 for _, b := range s.visitOrder {
667 for _, v := range b.Values {
668 if !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() && !v.Type.IsTuple() {
669 s.values[v.ID].needReg = true
670 s.values[v.ID].rematerializeable = v.rematerializeable()
673 // Note: needReg is false for values returning Tuple types.
674 // Instead, we mark the corresponding Selects as needReg.
679 s.endRegs = make([][]endReg, f.NumBlocks())
680 s.startRegs = make([][]startReg, f.NumBlocks())
681 s.spillLive = make([][]ID, f.NumBlocks())
684 // wasm: Mark instructions that can be optimized to have their values only on the WebAssembly stack.
685 if f.Config.ctxt.Arch.Arch == sys.ArchWasm {
686 canLiveOnStack := f.newSparseSet(f.NumValues())
687 defer f.retSparseSet(canLiveOnStack)
688 for _, b := range f.Blocks {
689 // New block. Clear candidate set.
690 canLiveOnStack.clear()
691 for _, c := range b.ControlValues() {
692 if c.Uses == 1 && !opcodeTable[c.Op].generic {
693 canLiveOnStack.add(c.ID)
696 // Walking backwards.
697 for i := len(b.Values) - 1; i >= 0; i-- {
699 if canLiveOnStack.contains(v.ID) {
702 // Value can not live on stack. Values are not allowed to be reordered, so clear candidate set.
703 canLiveOnStack.clear()
705 for _, arg := range v.Args {
706 // Value can live on the stack if:
707 // - it is only used once
708 // - it is used in the same basic block
709 // - it is not a "mem" value
710 // - it is a WebAssembly op
711 if arg.Uses == 1 && arg.Block == v.Block && !arg.Type.IsMemory() && !opcodeTable[arg.Op].generic {
712 canLiveOnStack.add(arg.ID)
719 // The clobberdeadreg experiment inserts code to clobber dead registers
721 // Ignore huge functions to avoid doing too much work.
722 if base.Flag.ClobberDeadReg && len(s.f.Blocks) <= 10000 {
723 // TODO: honor GOCLOBBERDEADHASH, or maybe GOSSAHASH.
728 // Adds a use record for id at distance dist from the start of the block.
729 // All calls to addUse must happen with nonincreasing dist.
730 func (s *regAllocState) addUse(id ID, dist int32, pos src.XPos) {
731 r := s.freeUseRecords
733 s.freeUseRecords = r.next
739 r.next = s.values[id].uses
740 s.values[id].uses = r
741 if r.next != nil && dist > r.next.dist {
742 s.f.Fatalf("uses added in wrong order")
746 // advanceUses advances the uses of v's args from the state before v to the state after v.
747 // Any values which have no more uses are deallocated from registers.
748 func (s *regAllocState) advanceUses(v *Value) {
749 for _, a := range v.Args {
750 if !s.values[a.ID].needReg {
753 ai := &s.values[a.ID]
757 // Value is dead, free all registers that hold it.
760 r.next = s.freeUseRecords
765 // liveAfterCurrentInstruction reports whether v is live after
766 // the current instruction is completed. v must be used by the
767 // current instruction.
768 func (s *regAllocState) liveAfterCurrentInstruction(v *Value) bool {
769 u := s.values[v.ID].uses
771 panic(fmt.Errorf("u is nil, v = %s, s.values[v.ID] = %v", v.LongString(), s.values[v.ID]))
774 for u != nil && u.dist == d {
777 return u != nil && u.dist > d
780 // Sets the state of the registers to that encoded in regs.
781 func (s *regAllocState) setState(regs []endReg) {
783 for _, x := range regs {
784 s.assignReg(x.r, x.v, x.c)
788 // compatRegs returns the set of registers which can store a type t.
789 func (s *regAllocState) compatRegs(t *types.Type) regMask {
791 if t.IsTuple() || t.IsFlags() {
794 if t.IsFloat() || t == types.TypeInt128 {
795 if t.Kind() == types.TFLOAT32 && s.f.Config.fp32RegMask != 0 {
796 m = s.f.Config.fp32RegMask
797 } else if t.Kind() == types.TFLOAT64 && s.f.Config.fp64RegMask != 0 {
798 m = s.f.Config.fp64RegMask
800 m = s.f.Config.fpRegMask
803 m = s.f.Config.gpRegMask
805 return m & s.allocatable
808 // regspec returns the regInfo for operation op.
809 func (s *regAllocState) regspec(v *Value) regInfo {
812 // OpConvert is a generic op, so it doesn't have a
813 // register set in the static table. It can use any
814 // allocatable integer register.
815 m := s.allocatable & s.f.Config.gpRegMask
816 return regInfo{inputs: []inputInfo{{regs: m}}, outputs: []outputInfo{{regs: m}}}
818 if op == OpArgIntReg {
819 reg := v.Block.Func.Config.intParamRegs[v.AuxInt8()]
820 return regInfo{outputs: []outputInfo{{regs: 1 << uint(reg)}}}
822 if op == OpArgFloatReg {
823 reg := v.Block.Func.Config.floatParamRegs[v.AuxInt8()]
824 return regInfo{outputs: []outputInfo{{regs: 1 << uint(reg)}}}
827 if ac, ok := v.Aux.(*AuxCall); ok && ac.reg != nil {
828 return *ac.Reg(&opcodeTable[op].reg, s.f.Config)
831 if op == OpMakeResult && s.f.OwnAux.reg != nil {
832 return *s.f.OwnAux.ResultReg(s.f.Config)
834 return opcodeTable[op].reg
837 func (s *regAllocState) isGReg(r register) bool {
838 return s.f.Config.hasGReg && s.GReg == r
841 func (s *regAllocState) regalloc(f *Func) {
842 regValLiveSet := f.newSparseSet(f.NumValues()) // set of values that may be live in register
843 defer f.retSparseSet(regValLiveSet)
844 var oldSched []*Value
846 var phiRegs []register
849 // Data structure used for computing desired registers.
850 var desired desiredState
852 // Desired registers for inputs & outputs for each instruction in the block.
854 out [4]register // desired output registers
855 in [3][4]register // desired input registers (for inputs 0,1, and 2)
859 if f.Entry != f.Blocks[0] {
860 f.Fatalf("entry block must be first")
863 for _, b := range s.visitOrder {
864 if s.f.pass.debug > regDebug {
865 fmt.Printf("Begin processing block %v\n", b)
869 // Initialize regValLiveSet and uses fields for this block.
870 // Walk backwards through the block doing liveness analysis.
871 regValLiveSet.clear()
872 for _, e := range s.live[b.ID] {
873 s.addUse(e.ID, int32(len(b.Values))+e.dist, e.pos) // pseudo-uses from beyond end of block
874 regValLiveSet.add(e.ID)
876 for _, v := range b.ControlValues() {
877 if s.values[v.ID].needReg {
878 s.addUse(v.ID, int32(len(b.Values)), b.Pos) // pseudo-use by control values
879 regValLiveSet.add(v.ID)
882 for i := len(b.Values) - 1; i >= 0; i-- {
884 regValLiveSet.remove(v.ID)
886 // Remove v from the live set, but don't add
887 // any inputs. This is the state the len(b.Preds)>1
888 // case below desires; it wants to process phis specially.
891 if opcodeTable[v.Op].call {
892 // Function call clobbers all the registers but SP and SB.
893 regValLiveSet.clear()
894 if s.sp != 0 && s.values[s.sp].uses != nil {
895 regValLiveSet.add(s.sp)
897 if s.sb != 0 && s.values[s.sb].uses != nil {
898 regValLiveSet.add(s.sb)
901 for _, a := range v.Args {
902 if !s.values[a.ID].needReg {
905 s.addUse(a.ID, int32(i), v.Pos)
906 regValLiveSet.add(a.ID)
909 if s.f.pass.debug > regDebug {
910 fmt.Printf("use distances for %s\n", b)
911 for i := range s.values {
917 fmt.Printf(" v%d:", i)
919 fmt.Printf(" %d", u.dist)
926 // Make a copy of the block schedule so we can generate a new one in place.
927 // We make a separate copy for phis and regular values.
929 for _, v := range b.Values {
935 phis = append(phis[:0], b.Values[:nphi]...)
936 oldSched = append(oldSched[:0], b.Values[nphi:]...)
937 b.Values = b.Values[:0]
939 // Initialize start state of block.
941 // Regalloc state is empty to start.
943 f.Fatalf("phis in entry block")
945 } else if len(b.Preds) == 1 {
946 // Start regalloc state with the end state of the previous block.
947 s.setState(s.endRegs[b.Preds[0].b.ID])
949 f.Fatalf("phis in single-predecessor block")
951 // Drop any values which are no longer live.
952 // This may happen because at the end of p, a value may be
953 // live but only used by some other successor of p.
954 for r := register(0); r < s.numRegs; r++ {
956 if v != nil && !regValLiveSet.contains(v.ID) {
961 // This is the complicated case. We have more than one predecessor,
962 // which means we may have Phi ops.
964 // Start with the final register state of the predecessor with least spill values.
965 // This is based on the following points:
966 // 1, The less spill value indicates that the register pressure of this path is smaller,
967 // so the values of this block are more likely to be allocated to registers.
968 // 2, Avoid the predecessor that contains the function call, because the predecessor that
969 // contains the function call usually generates a lot of spills and lose the previous
971 // TODO: Improve this part. At least the size of endRegs of the predecessor also has
972 // an impact on the code size and compiler speed. But it is not easy to find a simple
973 // and efficient method that combines multiple factors.
975 for i, p := range b.Preds {
976 // If the predecessor has not been visited yet, skip it because its end state
977 // (redRegs and spillLive) has not been computed yet.
979 if s.blockOrder[pb.ID] >= s.blockOrder[b.ID] {
986 pSel := b.Preds[idx].b
987 if len(s.spillLive[pb.ID]) < len(s.spillLive[pSel.ID]) {
989 } else if len(s.spillLive[pb.ID]) == len(s.spillLive[pSel.ID]) {
990 // Use a bit of likely information. After critical pass, pb and pSel must
991 // be plain blocks, so check edge pb->pb.Preds instead of edge pb->b.
992 // TODO: improve the prediction of the likely predecessor. The following
993 // method is only suitable for the simplest cases. For complex cases,
994 // the prediction may be inaccurate, but this does not affect the
995 // correctness of the program.
996 // According to the layout algorithm, the predecessor with the
997 // smaller blockOrder is the true branch, and the test results show
998 // that it is better to choose the predecessor with a smaller
999 // blockOrder than no choice.
1000 if pb.likelyBranch() && !pSel.likelyBranch() || s.blockOrder[pb.ID] < s.blockOrder[pSel.ID] {
1006 f.Fatalf("bad visitOrder, no predecessor of %s has been visited before it", b)
1009 s.setState(s.endRegs[p.ID])
1011 if s.f.pass.debug > regDebug {
1012 fmt.Printf("starting merge block %s with end state of %s:\n", b, p)
1013 for _, x := range s.endRegs[p.ID] {
1014 fmt.Printf(" %s: orig:%s cache:%s\n", &s.registers[x.r], x.v, x.c)
1018 // Decide on registers for phi ops. Use the registers determined
1019 // by the primary predecessor if we can.
1020 // TODO: pick best of (already processed) predecessors?
1021 // Majority vote? Deepest nesting level?
1022 phiRegs = phiRegs[:0]
1025 for _, v := range phis {
1026 if !s.values[v.ID].needReg {
1027 phiRegs = append(phiRegs, noRegister)
1031 // Some instructions target not-allocatable registers.
1032 // They're not suitable for further (phi-function) allocation.
1033 m := s.values[a.ID].regs &^ phiUsed & s.allocatable
1036 phiUsed |= regMask(1) << r
1037 phiRegs = append(phiRegs, r)
1039 phiRegs = append(phiRegs, noRegister)
1043 // Second pass - deallocate all in-register phi inputs.
1044 for i, v := range phis {
1045 if !s.values[v.ID].needReg {
1050 if r == noRegister {
1053 if regValLiveSet.contains(a.ID) {
1054 // Input value is still live (it is used by something other than Phi).
1055 // Try to move it around before kicking out, if there is a free register.
1056 // We generate a Copy in the predecessor block and record it. It will be
1057 // deleted later if never used.
1059 // Pick a free register. At this point some registers used in the predecessor
1060 // block may have been deallocated. Those are the ones used for Phis. Exclude
1061 // them (and they are not going to be helpful anyway).
1062 m := s.compatRegs(a.Type) &^ s.used &^ phiUsed
1063 if m != 0 && !s.values[a.ID].rematerializeable && countRegs(s.values[a.ID].regs) == 1 {
1065 c := p.NewValue1(a.Pos, OpCopy, a.Type, s.regs[r].c)
1067 if s.f.pass.debug > regDebug {
1068 fmt.Printf("copy %s to %s : %s\n", a, c, &s.registers[r2])
1071 s.assignReg(r2, a, c)
1072 s.endRegs[p.ID] = append(s.endRegs[p.ID], endReg{r2, a, c})
1078 // Copy phi ops into new schedule.
1079 b.Values = append(b.Values, phis...)
1081 // Third pass - pick registers for phis whose input
1082 // was not in a register in the primary predecessor.
1083 for i, v := range phis {
1084 if !s.values[v.ID].needReg {
1087 if phiRegs[i] != noRegister {
1090 m := s.compatRegs(v.Type) &^ phiUsed &^ s.used
1091 // If one of the other inputs of v is in a register, and the register is available,
1092 // select this register, which can save some unnecessary copies.
1093 for i, pe := range b.Preds {
1098 for _, er := range s.endRegs[pe.b.ID] {
1099 if er.v == s.orig[v.Args[i].ID] {
1104 if ri != noRegister && m>>ri&1 != 0 {
1105 m = regMask(1) << ri
1112 phiUsed |= regMask(1) << r
1116 // Set registers for phis. Add phi spill code.
1117 for i, v := range phis {
1118 if !s.values[v.ID].needReg {
1122 if r == noRegister {
1124 // Spills will be inserted in all the predecessors below.
1125 s.values[v.ID].spill = v // v starts life spilled
1128 // register-based phi
1129 s.assignReg(r, v, v)
1132 // Deallocate any values which are no longer live. Phis are excluded.
1133 for r := register(0); r < s.numRegs; r++ {
1134 if phiUsed>>r&1 != 0 {
1138 if v != nil && !regValLiveSet.contains(v.ID) {
1143 // Save the starting state for use by merge edges.
1144 // We append to a stack allocated variable that we'll
1145 // later copy into s.startRegs in one fell swoop, to save
1147 regList := make([]startReg, 0, 32)
1148 for r := register(0); r < s.numRegs; r++ {
1153 if phiUsed>>r&1 != 0 {
1154 // Skip registers that phis used, we'll handle those
1155 // specially during merge edge processing.
1158 regList = append(regList, startReg{r, v, s.regs[r].c, s.values[v.ID].uses.pos})
1160 s.startRegs[b.ID] = make([]startReg, len(regList))
1161 copy(s.startRegs[b.ID], regList)
1163 if s.f.pass.debug > regDebug {
1164 fmt.Printf("after phis\n")
1165 for _, x := range s.startRegs[b.ID] {
1166 fmt.Printf(" %s: v%d\n", &s.registers[x.r], x.v.ID)
1171 // Allocate space to record the desired registers for each value.
1172 if l := len(oldSched); cap(dinfo) < l {
1173 dinfo = make([]dentry, l)
1176 for i := range dinfo {
1181 // Load static desired register info at the end of the block.
1182 desired.copy(&s.desired[b.ID])
1184 // Check actual assigned registers at the start of the next block(s).
1185 // Dynamically assigned registers will trump the static
1186 // desired registers computed during liveness analysis.
1187 // Note that we do this phase after startRegs is set above, so that
1188 // we get the right behavior for a block which branches to itself.
1189 for _, e := range b.Succs {
1191 // TODO: prioritize likely successor?
1192 for _, x := range s.startRegs[succ.ID] {
1193 desired.add(x.v.ID, x.r)
1195 // Process phi ops in succ.
1197 for _, v := range succ.Values {
1201 if !s.values[v.ID].needReg {
1204 rp, ok := s.f.getHome(v.ID).(*Register)
1206 // If v is not assigned a register, pick a register assigned to one of v's inputs.
1207 // Hopefully v will get assigned that register later.
1208 // If the inputs have allocated register information, add it to desired,
1209 // which may reduce spill or copy operations when the register is available.
1210 for _, a := range v.Args {
1211 rp, ok = s.f.getHome(a.ID).(*Register)
1220 desired.add(v.Args[pidx].ID, register(rp.num))
1223 // Walk values backwards computing desired register info.
1224 // See computeLive for more comments.
1225 for i := len(oldSched) - 1; i >= 0; i-- {
1227 prefs := desired.remove(v.ID)
1228 regspec := s.regspec(v)
1229 desired.clobber(regspec.clobbers)
1230 for _, j := range regspec.inputs {
1231 if countRegs(j.regs) != 1 {
1234 desired.clobber(j.regs)
1235 desired.add(v.Args[j.idx].ID, pickReg(j.regs))
1237 if opcodeTable[v.Op].resultInArg0 {
1238 if opcodeTable[v.Op].commutative {
1239 desired.addList(v.Args[1].ID, prefs)
1241 desired.addList(v.Args[0].ID, prefs)
1243 // Save desired registers for this value.
1244 dinfo[i].out = prefs
1245 for j, a := range v.Args {
1246 if j >= len(dinfo[i].in) {
1249 dinfo[i].in[j] = desired.get(a.ID)
1253 // Process all the non-phi values.
1254 for idx, v := range oldSched {
1255 if s.f.pass.debug > regDebug {
1256 fmt.Printf(" processing %s\n", v.LongString())
1258 regspec := s.regspec(v)
1260 f.Fatalf("phi %s not at start of block", v)
1263 s.assignReg(s.SPReg, v, v)
1264 b.Values = append(b.Values, v)
1270 s.assignReg(s.SBReg, v, v)
1271 b.Values = append(b.Values, v)
1276 if v.Op == OpSelect0 || v.Op == OpSelect1 || v.Op == OpSelectN {
1277 if s.values[v.ID].needReg {
1278 if v.Op == OpSelectN {
1279 s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocResults)[int(v.AuxInt)].(*Register).num), v, v)
1282 if v.Op == OpSelect1 {
1285 s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocPair)[i].(*Register).num), v, v)
1288 b.Values = append(b.Values, v)
1292 if v.Op == OpGetG && s.f.Config.hasGReg {
1293 // use hardware g register
1294 if s.regs[s.GReg].v != nil {
1295 s.freeReg(s.GReg) // kick out the old value
1297 s.assignReg(s.GReg, v, v)
1298 b.Values = append(b.Values, v)
1303 // Args are "pre-spilled" values. We don't allocate
1304 // any register here. We just set up the spill pointer to
1305 // point at itself and any later user will restore it to use it.
1306 s.values[v.ID].spill = v
1307 b.Values = append(b.Values, v)
1311 if v.Op == OpKeepAlive {
1312 // Make sure the argument to v is still live here.
1315 vi := &s.values[a.ID]
1316 if vi.regs == 0 && !vi.rematerializeable {
1317 // Use the spill location.
1318 // This forces later liveness analysis to make the
1319 // value live at this point.
1320 v.SetArg(0, s.makeSpill(a, b))
1321 } else if _, ok := a.Aux.(*ir.Name); ok && vi.rematerializeable {
1322 // Rematerializeable value with a gc.Node. This is the address of
1323 // a stack object (e.g. an LEAQ). Keep the object live.
1324 // Change it to VarLive, which is what plive expects for locals.
1326 v.SetArgs1(v.Args[1])
1329 // In-register and rematerializeable values are already live.
1330 // These are typically rematerializeable constants like nil,
1331 // or values of a variable that were modified since the last call.
1333 v.SetArgs1(v.Args[1])
1335 b.Values = append(b.Values, v)
1338 if len(regspec.inputs) == 0 && len(regspec.outputs) == 0 {
1339 // No register allocation required (or none specified yet)
1340 if s.doClobber && v.Op.IsCall() {
1341 s.clobberRegs(regspec.clobbers)
1343 s.freeRegs(regspec.clobbers)
1344 b.Values = append(b.Values, v)
1349 if s.values[v.ID].rematerializeable {
1350 // Value is rematerializeable, don't issue it here.
1351 // It will get issued just before each use (see
1352 // allocValueToReg).
1353 for _, a := range v.Args {
1360 if s.f.pass.debug > regDebug {
1361 fmt.Printf("value %s\n", v.LongString())
1363 for _, r := range dinfo[idx].out {
1364 if r != noRegister {
1365 fmt.Printf(" %s", &s.registers[r])
1369 for i := 0; i < len(v.Args) && i < 3; i++ {
1370 fmt.Printf(" in%d:", i)
1371 for _, r := range dinfo[idx].in[i] {
1372 if r != noRegister {
1373 fmt.Printf(" %s", &s.registers[r])
1380 // Move arguments to registers. Process in an ordering defined
1381 // by the register specification (most constrained first).
1382 args = append(args[:0], v.Args...)
1383 for _, i := range regspec.inputs {
1385 if mask&s.values[args[i.idx].ID].regs == 0 {
1386 // Need a new register for the input.
1387 mask &= s.allocatable
1389 // Used desired register if available.
1391 for _, r := range dinfo[idx].in[i.idx] {
1392 if r != noRegister && (mask&^s.used)>>r&1 != 0 {
1393 // Desired register is allowed and unused.
1394 mask = regMask(1) << r
1399 // Avoid registers we're saving for other values.
1400 if mask&^desired.avoid != 0 {
1401 mask &^= desired.avoid
1404 args[i.idx] = s.allocValToReg(args[i.idx], mask, true, v.Pos)
1407 // If the output clobbers the input register, make sure we have
1408 // at least two copies of the input register so we don't
1409 // have to reload the value from the spill location.
1410 if opcodeTable[v.Op].resultInArg0 {
1412 if !s.liveAfterCurrentInstruction(v.Args[0]) {
1413 // arg0 is dead. We can clobber its register.
1416 if opcodeTable[v.Op].commutative && !s.liveAfterCurrentInstruction(v.Args[1]) {
1417 args[0], args[1] = args[1], args[0]
1420 if s.values[v.Args[0].ID].rematerializeable {
1421 // We can rematerialize the input, don't worry about clobbering it.
1424 if opcodeTable[v.Op].commutative && s.values[v.Args[1].ID].rematerializeable {
1425 args[0], args[1] = args[1], args[0]
1428 if countRegs(s.values[v.Args[0].ID].regs) >= 2 {
1429 // we have at least 2 copies of arg0. We can afford to clobber one.
1432 if opcodeTable[v.Op].commutative && countRegs(s.values[v.Args[1].ID].regs) >= 2 {
1433 args[0], args[1] = args[1], args[0]
1437 // We can't overwrite arg0 (or arg1, if commutative). So we
1438 // need to make a copy of an input so we have a register we can modify.
1440 // Possible new registers to copy into.
1441 m = s.compatRegs(v.Args[0].Type) &^ s.used
1443 // No free registers. In this case we'll just clobber
1444 // an input and future uses of that input must use a restore.
1445 // TODO(khr): We should really do this like allocReg does it,
1446 // spilling the value with the most distant next use.
1450 // Try to move an input to the desired output.
1451 for _, r := range dinfo[idx].out {
1452 if r != noRegister && m>>r&1 != 0 {
1454 args[0] = s.allocValToReg(v.Args[0], m, true, v.Pos)
1455 // Note: we update args[0] so the instruction will
1456 // use the register copy we just made.
1460 // Try to copy input to its desired location & use its old
1461 // location as the result register.
1462 for _, r := range dinfo[idx].in[0] {
1463 if r != noRegister && m>>r&1 != 0 {
1465 c := s.allocValToReg(v.Args[0], m, true, v.Pos)
1467 // Note: no update to args[0] so the instruction will
1468 // use the original copy.
1472 if opcodeTable[v.Op].commutative {
1473 for _, r := range dinfo[idx].in[1] {
1474 if r != noRegister && m>>r&1 != 0 {
1476 c := s.allocValToReg(v.Args[1], m, true, v.Pos)
1478 args[0], args[1] = args[1], args[0]
1483 // Avoid future fixed uses if we can.
1484 if m&^desired.avoid != 0 {
1487 // Save input 0 to a new register so we can clobber it.
1488 c := s.allocValToReg(v.Args[0], m, true, v.Pos)
1493 // Now that all args are in regs, we're ready to issue the value itself.
1494 // Before we pick a register for the output value, allow input registers
1495 // to be deallocated. We do this here so that the output can use the
1496 // same register as a dying input.
1497 if !opcodeTable[v.Op].resultNotInArgs {
1498 s.tmpused = s.nospill
1500 s.advanceUses(v) // frees any registers holding args that are no longer live
1503 // Dump any registers which will be clobbered
1504 if s.doClobber && v.Op.IsCall() {
1505 // clobber registers that are marked as clobber in regmask, but
1506 // don't clobber inputs.
1507 s.clobberRegs(regspec.clobbers &^ s.tmpused &^ s.nospill)
1509 s.freeRegs(regspec.clobbers)
1510 s.tmpused |= regspec.clobbers
1512 // Pick registers for outputs.
1514 outRegs := noRegisters // TODO if this is costly, hoist and clear incrementally below.
1517 for _, out := range regspec.outputs {
1518 mask := out.regs & s.allocatable &^ used
1522 if opcodeTable[v.Op].resultInArg0 && out.idx == 0 {
1523 if !opcodeTable[v.Op].commutative {
1524 // Output must use the same register as input 0.
1525 r := register(s.f.getHome(args[0].ID).(*Register).num)
1526 mask = regMask(1) << r
1528 // Output must use the same register as input 0 or 1.
1529 r0 := register(s.f.getHome(args[0].ID).(*Register).num)
1530 r1 := register(s.f.getHome(args[1].ID).(*Register).num)
1531 // Check r0 and r1 for desired output register.
1533 for _, r := range dinfo[idx].out {
1534 if (r == r0 || r == r1) && (mask&^s.used)>>r&1 != 0 {
1535 mask = regMask(1) << r
1538 args[0], args[1] = args[1], args[0]
1544 // Neither are desired, pick r0.
1545 mask = regMask(1) << r0
1549 for _, r := range dinfo[idx].out {
1550 if r != noRegister && (mask&^s.used)>>r&1 != 0 {
1551 // Desired register is allowed and unused.
1552 mask = regMask(1) << r
1556 // Avoid registers we're saving for other values.
1557 if mask&^desired.avoid&^s.nospill != 0 {
1558 mask &^= desired.avoid
1560 r := s.allocReg(mask, v)
1561 if out.idx > maxOutIdx {
1564 outRegs[out.idx] = r
1565 used |= regMask(1) << r
1566 s.tmpused |= regMask(1) << r
1568 // Record register choices
1569 if v.Type.IsTuple() {
1571 if r := outRegs[0]; r != noRegister {
1572 outLocs[0] = &s.registers[r]
1574 if r := outRegs[1]; r != noRegister {
1575 outLocs[1] = &s.registers[r]
1577 s.f.setHome(v, outLocs)
1578 // Note that subsequent SelectX instructions will do the assignReg calls.
1579 } else if v.Type.IsResults() {
1580 // preallocate outLocs to the right size, which is maxOutIdx+1
1581 outLocs := make(LocResults, maxOutIdx+1, maxOutIdx+1)
1582 for i := 0; i <= maxOutIdx; i++ {
1583 if r := outRegs[i]; r != noRegister {
1584 outLocs[i] = &s.registers[r]
1587 s.f.setHome(v, outLocs)
1589 if r := outRegs[0]; r != noRegister {
1590 s.assignReg(r, v, v)
1595 // deallocate dead args, if we have not done so
1596 if opcodeTable[v.Op].resultNotInArgs {
1598 s.advanceUses(v) // frees any registers holding args that are no longer live
1602 // Issue the Value itself.
1603 for i, a := range args {
1604 v.SetArg(i, a) // use register version of arguments
1606 b.Values = append(b.Values, v)
1611 // Copy the control values - we need this so we can reduce the
1612 // uses property of these values later.
1613 controls := append(make([]*Value, 0, 2), b.ControlValues()...)
1615 // Load control values into registers.
1616 for i, v := range b.ControlValues() {
1617 if !s.values[v.ID].needReg {
1620 if s.f.pass.debug > regDebug {
1621 fmt.Printf(" processing control %s\n", v.LongString())
1623 // We assume that a control input can be passed in any
1624 // type-compatible register. If this turns out not to be true,
1625 // we'll need to introduce a regspec for a block's control value.
1626 b.ReplaceControl(i, s.allocValToReg(v, s.compatRegs(v.Type), false, b.Pos))
1629 // Reduce the uses of the control values once registers have been loaded.
1630 // This loop is equivalent to the advanceUses method.
1631 for _, v := range controls {
1632 vi := &s.values[v.ID]
1636 // Remove this use from the uses list.
1640 s.freeRegs(vi.regs) // value is dead
1642 u.next = s.freeUseRecords
1643 s.freeUseRecords = u
1646 // If we are approaching a merge point and we are the primary
1647 // predecessor of it, find live values that we use soon after
1648 // the merge point and promote them to registers now.
1649 if len(b.Succs) == 1 {
1650 if s.f.Config.hasGReg && s.regs[s.GReg].v != nil {
1651 s.freeReg(s.GReg) // Spill value in G register before any merge.
1653 // For this to be worthwhile, the loop must have no calls in it.
1655 loop := s.loopnest.b2l[top.ID]
1656 if loop == nil || loop.header != top || loop.containsUnavoidableCall {
1660 // TODO: sort by distance, pick the closest ones?
1661 for _, live := range s.live[b.ID] {
1662 if live.dist >= unlikelyDistance {
1663 // Don't preload anything live after the loop.
1667 vi := &s.values[vid]
1671 if vi.rematerializeable {
1675 m := s.compatRegs(v.Type) &^ s.used
1676 // Used desired register if available.
1678 for _, e := range desired.entries {
1682 for _, r := range e.regs {
1683 if r != noRegister && m>>r&1 != 0 {
1689 if m&^desired.avoid != 0 {
1693 s.allocValToReg(v, m, false, b.Pos)
1700 // Save end-of-block register state.
1701 // First count how many, this cuts allocations in half.
1703 for r := register(0); r < s.numRegs; r++ {
1710 regList := make([]endReg, 0, k)
1711 for r := register(0); r < s.numRegs; r++ {
1716 regList = append(regList, endReg{r, v, s.regs[r].c})
1718 s.endRegs[b.ID] = regList
1721 regValLiveSet.clear()
1722 for _, x := range s.live[b.ID] {
1723 regValLiveSet.add(x.ID)
1725 for r := register(0); r < s.numRegs; r++ {
1730 if !regValLiveSet.contains(v.ID) {
1731 s.f.Fatalf("val %s is in reg but not live at end of %s", v, b)
1736 // If a value is live at the end of the block and
1737 // isn't in a register, generate a use for the spill location.
1738 // We need to remember this information so that
1739 // the liveness analysis in stackalloc is correct.
1740 for _, e := range s.live[b.ID] {
1741 vi := &s.values[e.ID]
1743 // in a register, we'll use that source for the merge.
1746 if vi.rematerializeable {
1747 // we'll rematerialize during the merge.
1750 if s.f.pass.debug > regDebug {
1751 fmt.Printf("live-at-end spill for %s at %s\n", s.orig[e.ID], b)
1753 spill := s.makeSpill(s.orig[e.ID], b)
1754 s.spillLive[b.ID] = append(s.spillLive[b.ID], spill.ID)
1757 // Clear any final uses.
1758 // All that is left should be the pseudo-uses added for values which
1759 // are live at the end of b.
1760 for _, e := range s.live[b.ID] {
1761 u := s.values[e.ID].uses
1763 f.Fatalf("live at end, no uses v%d", e.ID)
1766 f.Fatalf("live at end, too many uses v%d", e.ID)
1768 s.values[e.ID].uses = nil
1769 u.next = s.freeUseRecords
1770 s.freeUseRecords = u
1774 // Decide where the spills we generated will go.
1777 // Anything that didn't get a register gets a stack location here.
1778 // (StoreReg, stack-based phis, inputs, ...)
1779 stacklive := stackalloc(s.f, s.spillLive)
1781 // Fix up all merge edges.
1782 s.shuffle(stacklive)
1784 // Erase any copies we never used.
1785 // Also, an unused copy might be the only use of another copy,
1786 // so continue erasing until we reach a fixed point.
1789 for c, used := range s.copies {
1790 if !used && c.Uses == 0 {
1791 if s.f.pass.debug > regDebug {
1792 fmt.Printf("delete copied value %s\n", c.LongString())
1805 for _, b := range s.visitOrder {
1807 for _, v := range b.Values {
1808 if v.Op == OpInvalid {
1814 b.Values = b.Values[:i]
1818 func (s *regAllocState) placeSpills() {
1821 // Precompute some useful info.
1822 phiRegs := make([]regMask, f.NumBlocks())
1823 for _, b := range s.visitOrder {
1825 for _, v := range b.Values {
1829 if r, ok := f.getHome(v.ID).(*Register); ok {
1830 m |= regMask(1) << uint(r.num)
1836 // Start maps block IDs to the list of spills
1837 // that go at the start of the block (but after any phis).
1838 start := map[ID][]*Value{}
1839 // After maps value IDs to the list of spills
1840 // that go immediately after that value ID.
1841 after := map[ID][]*Value{}
1843 for i := range s.values {
1849 if spill.Block != nil {
1850 // Some spills are already fully set up,
1851 // like OpArgs and stack-based phis.
1856 // Walk down the dominator tree looking for a good place to
1857 // put the spill of v. At the start "best" is the best place
1858 // we have found so far.
1859 // TODO: find a way to make this O(1) without arbitrary cutoffs.
1861 panic(fmt.Errorf("nil v, s.orig[%d], vi = %v, spill = %s", i, vi, spill.LongString()))
1866 if l := s.loopnest.b2l[best.ID]; l != nil {
1870 const maxSpillSearch = 100
1871 for i := 0; i < maxSpillSearch; i++ {
1872 // Find the child of b in the dominator tree which
1873 // dominates all restores.
1876 for c := s.sdom.Child(p); c != nil && i < maxSpillSearch; c, i = s.sdom.Sibling(c), i+1 {
1877 if s.sdom[c.ID].entry <= vi.restoreMin && s.sdom[c.ID].exit >= vi.restoreMax {
1878 // c also dominates all restores. Walk down into c.
1884 // Ran out of blocks which dominate all restores.
1889 if l := s.loopnest.b2l[b.ID]; l != nil {
1892 if depth > bestDepth {
1893 // Don't push the spill into a deeper loop.
1897 // If v is in a register at the start of b, we can
1898 // place the spill here (after the phis).
1899 if len(b.Preds) == 1 {
1900 for _, e := range s.endRegs[b.Preds[0].b.ID] {
1902 // Found a better spot for the spill.
1910 for _, e := range s.startRegs[b.ID] {
1912 // Found a better spot for the spill.
1922 // Put the spill in the best block we found.
1924 spill.AddArg(bestArg)
1925 if best == v.Block && v.Op != OpPhi {
1926 // Place immediately after v.
1927 after[v.ID] = append(after[v.ID], spill)
1929 // Place at the start of best block.
1930 start[best.ID] = append(start[best.ID], spill)
1934 // Insert spill instructions into the block schedules.
1935 var oldSched []*Value
1936 for _, b := range s.visitOrder {
1938 for _, v := range b.Values {
1944 oldSched = append(oldSched[:0], b.Values[nphi:]...)
1945 b.Values = b.Values[:nphi]
1946 b.Values = append(b.Values, start[b.ID]...)
1947 for _, v := range oldSched {
1948 b.Values = append(b.Values, v)
1949 b.Values = append(b.Values, after[v.ID]...)
1954 // shuffle fixes up all the merge edges (those going into blocks of indegree > 1).
1955 func (s *regAllocState) shuffle(stacklive [][]ID) {
1958 e.cache = map[ID][]*Value{}
1959 e.contents = map[Location]contentRecord{}
1960 if s.f.pass.debug > regDebug {
1961 fmt.Printf("shuffle %s\n", s.f.Name)
1962 fmt.Println(s.f.String())
1965 for _, b := range s.visitOrder {
1966 if len(b.Preds) <= 1 {
1970 for i, edge := range b.Preds {
1973 e.setup(i, s.endRegs[p.ID], s.startRegs[b.ID], stacklive[p.ID])
1978 if s.f.pass.debug > regDebug {
1979 fmt.Printf("post shuffle %s\n", s.f.Name)
1980 fmt.Println(s.f.String())
1984 type edgeState struct {
1986 p, b *Block // edge goes from p->b.
1988 // for each pre-regalloc value, a list of equivalent cached values
1989 cache map[ID][]*Value
1990 cachedVals []ID // (superset of) keys of the above map, for deterministic iteration
1992 // map from location to the value it contains
1993 contents map[Location]contentRecord
1995 // desired destination locations
1996 destinations []dstRecord
1999 usedRegs regMask // registers currently holding something
2000 uniqueRegs regMask // registers holding the only copy of a value
2001 finalRegs regMask // registers holding final target
2002 rematerializeableRegs regMask // registers that hold rematerializeable values
2005 type contentRecord struct {
2006 vid ID // pre-regalloc value
2007 c *Value // cached value
2008 final bool // this is a satisfied destination
2009 pos src.XPos // source position of use of the value
2012 type dstRecord struct {
2013 loc Location // register or stack slot
2014 vid ID // pre-regalloc value it should contain
2015 splice **Value // place to store reference to the generating instruction
2016 pos src.XPos // source position of use of this location
2019 // setup initializes the edge state for shuffling.
2020 func (e *edgeState) setup(idx int, srcReg []endReg, dstReg []startReg, stacklive []ID) {
2021 if e.s.f.pass.debug > regDebug {
2022 fmt.Printf("edge %s->%s\n", e.p, e.b)
2026 for _, vid := range e.cachedVals {
2027 delete(e.cache, vid)
2029 e.cachedVals = e.cachedVals[:0]
2030 for k := range e.contents {
2031 delete(e.contents, k)
2036 e.rematerializeableRegs = 0
2038 // Live registers can be sources.
2039 for _, x := range srcReg {
2040 e.set(&e.s.registers[x.r], x.v.ID, x.c, false, src.NoXPos) // don't care the position of the source
2042 // So can all of the spill locations.
2043 for _, spillID := range stacklive {
2044 v := e.s.orig[spillID]
2045 spill := e.s.values[v.ID].spill
2046 if !e.s.sdom.IsAncestorEq(spill.Block, e.p) {
2047 // Spills were placed that only dominate the uses found
2048 // during the first regalloc pass. The edge fixup code
2049 // can't use a spill location if the spill doesn't dominate
2051 // We are guaranteed that if the spill doesn't dominate this edge,
2052 // then the value is available in a register (because we called
2053 // makeSpill for every value not in a register at the start
2057 e.set(e.s.f.getHome(spillID), v.ID, spill, false, src.NoXPos) // don't care the position of the source
2060 // Figure out all the destinations we need.
2061 dsts := e.destinations[:0]
2062 for _, x := range dstReg {
2063 dsts = append(dsts, dstRecord{&e.s.registers[x.r], x.v.ID, nil, x.pos})
2065 // Phis need their args to end up in a specific location.
2066 for _, v := range e.b.Values {
2070 loc := e.s.f.getHome(v.ID)
2074 dsts = append(dsts, dstRecord{loc, v.Args[idx].ID, &v.Args[idx], v.Pos})
2076 e.destinations = dsts
2078 if e.s.f.pass.debug > regDebug {
2079 for _, vid := range e.cachedVals {
2081 for _, c := range a {
2082 fmt.Printf("src %s: v%d cache=%s\n", e.s.f.getHome(c.ID), vid, c)
2085 for _, d := range e.destinations {
2086 fmt.Printf("dst %s: v%d\n", d.loc, d.vid)
2091 // process generates code to move all the values to the right destination locations.
2092 func (e *edgeState) process() {
2093 dsts := e.destinations
2095 // Process the destinations until they are all satisfied.
2098 for _, d := range dsts {
2099 if !e.processDest(d.loc, d.vid, d.splice, d.pos) {
2100 // Failed - save for next iteration.
2106 // Made some progress. Go around again.
2109 // Append any extras destinations we generated.
2110 dsts = append(dsts, e.extra...)
2111 e.extra = e.extra[:0]
2115 // We made no progress. That means that any
2116 // remaining unsatisfied moves are in simple cycles.
2117 // For example, A -> B -> C -> D -> A.
2124 // To break the cycle, we pick an unused register, say R,
2125 // and put a copy of B there.
2130 // D <---- C <---- R=copyofB
2131 // When we resume the outer loop, the A->B move can now proceed,
2132 // and eventually the whole cycle completes.
2134 // Copy any cycle location to a temp register. This duplicates
2135 // one of the cycle entries, allowing the just duplicated value
2136 // to be overwritten and the cycle to proceed.
2139 vid := e.contents[loc].vid
2140 c := e.contents[loc].c
2141 r := e.findRegFor(c.Type)
2142 if e.s.f.pass.debug > regDebug {
2143 fmt.Printf("breaking cycle with v%d in %s:%s\n", vid, loc, c)
2146 pos := d.pos.WithNotStmt()
2147 if _, isReg := loc.(*Register); isReg {
2148 c = e.p.NewValue1(pos, OpCopy, c.Type, c)
2150 c = e.p.NewValue1(pos, OpLoadReg, c.Type, c)
2152 e.set(r, vid, c, false, pos)
2153 if c.Op == OpLoadReg && e.s.isGReg(register(r.(*Register).num)) {
2154 e.s.f.Fatalf("process.OpLoadReg targeting g: " + c.LongString())
2159 // processDest generates code to put value vid into location loc. Returns true
2160 // if progress was made.
2161 func (e *edgeState) processDest(loc Location, vid ID, splice **Value, pos src.XPos) bool {
2162 pos = pos.WithNotStmt()
2163 occupant := e.contents[loc]
2164 if occupant.vid == vid {
2165 // Value is already in the correct place.
2166 e.contents[loc] = contentRecord{vid, occupant.c, true, pos}
2169 *splice = occupant.c
2172 // Note: if splice==nil then c will appear dead. This is
2173 // non-SSA formed code, so be careful after this pass not to run
2174 // deadcode elimination.
2175 if _, ok := e.s.copies[occupant.c]; ok {
2176 // The copy at occupant.c was used to avoid spill.
2177 e.s.copies[occupant.c] = true
2182 // Check if we're allowed to clobber the destination location.
2183 if len(e.cache[occupant.vid]) == 1 && !e.s.values[occupant.vid].rematerializeable {
2184 // We can't overwrite the last copy
2185 // of a value that needs to survive.
2189 // Copy from a source of v, register preferred.
2193 if e.s.f.pass.debug > regDebug {
2194 fmt.Printf("moving v%d to %s\n", vid, loc)
2195 fmt.Printf("sources of v%d:", vid)
2197 for _, w := range e.cache[vid] {
2198 h := e.s.f.getHome(w.ID)
2199 if e.s.f.pass.debug > regDebug {
2200 fmt.Printf(" %s:%s", h, w)
2202 _, isreg := h.(*Register)
2203 if src == nil || isreg {
2208 if e.s.f.pass.debug > regDebug {
2210 fmt.Printf(" [use %s]\n", src)
2212 fmt.Printf(" [no source]\n")
2215 _, dstReg := loc.(*Register)
2217 // Pre-clobber destination. This avoids the
2218 // following situation:
2219 // - v is currently held in R0 and stacktmp0.
2220 // - We want to copy stacktmp1 to stacktmp0.
2221 // - We choose R0 as the temporary register.
2222 // During the copy, both R0 and stacktmp0 are
2223 // clobbered, losing both copies of v. Oops!
2224 // Erasing the destination early means R0 will not
2225 // be chosen as the temp register, as it will then
2226 // be the last copy of v.
2229 if c == nil || e.s.values[vid].rematerializeable {
2230 if !e.s.values[vid].rematerializeable {
2231 e.s.f.Fatalf("can't find source for %s->%s: %s\n", e.p, e.b, v.LongString())
2236 // Rematerialize into stack slot. Need a free
2237 // register to accomplish this.
2238 r := e.findRegFor(v.Type)
2240 x = v.copyIntoWithXPos(e.p, pos)
2241 e.set(r, vid, x, false, pos)
2242 // Make sure we spill with the size of the slot, not the
2243 // size of x (which might be wider due to our dropping
2244 // of narrowing conversions).
2245 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, x)
2248 // Emit move from src to dst.
2249 _, srcReg := src.(*Register)
2252 x = e.p.NewValue1(pos, OpCopy, c.Type, c)
2254 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, c)
2258 x = e.p.NewValue1(pos, OpLoadReg, c.Type, c)
2260 // mem->mem. Use temp register.
2261 r := e.findRegFor(c.Type)
2263 t := e.p.NewValue1(pos, OpLoadReg, c.Type, c)
2264 e.set(r, vid, t, false, pos)
2265 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, t)
2269 e.set(loc, vid, x, true, pos)
2270 if x.Op == OpLoadReg && e.s.isGReg(register(loc.(*Register).num)) {
2271 e.s.f.Fatalf("processDest.OpLoadReg targeting g: " + x.LongString())
2281 // set changes the contents of location loc to hold the given value and its cached representative.
2282 func (e *edgeState) set(loc Location, vid ID, c *Value, final bool, pos src.XPos) {
2283 e.s.f.setHome(c, loc)
2284 e.contents[loc] = contentRecord{vid, c, final, pos}
2287 e.cachedVals = append(e.cachedVals, vid)
2291 if r, ok := loc.(*Register); ok {
2292 if e.usedRegs&(regMask(1)<<uint(r.num)) != 0 {
2293 e.s.f.Fatalf("%v is already set (v%d/%v)", r, vid, c)
2295 e.usedRegs |= regMask(1) << uint(r.num)
2297 e.finalRegs |= regMask(1) << uint(r.num)
2300 e.uniqueRegs |= regMask(1) << uint(r.num)
2303 if t, ok := e.s.f.getHome(a[0].ID).(*Register); ok {
2304 e.uniqueRegs &^= regMask(1) << uint(t.num)
2307 if e.s.values[vid].rematerializeable {
2308 e.rematerializeableRegs |= regMask(1) << uint(r.num)
2311 if e.s.f.pass.debug > regDebug {
2312 fmt.Printf("%s\n", c.LongString())
2313 fmt.Printf("v%d now available in %s:%s\n", vid, loc, c)
2317 // erase removes any user of loc.
2318 func (e *edgeState) erase(loc Location) {
2319 cr := e.contents[loc]
2326 // Add a destination to move this value back into place.
2327 // Make sure it gets added to the tail of the destination queue
2328 // so we make progress on other moves first.
2329 e.extra = append(e.extra, dstRecord{loc, cr.vid, nil, cr.pos})
2332 // Remove c from the list of cached values.
2334 for i, c := range a {
2335 if e.s.f.getHome(c.ID) == loc {
2336 if e.s.f.pass.debug > regDebug {
2337 fmt.Printf("v%d no longer available in %s:%s\n", vid, loc, c)
2339 a[i], a = a[len(a)-1], a[:len(a)-1]
2345 // Update register masks.
2346 if r, ok := loc.(*Register); ok {
2347 e.usedRegs &^= regMask(1) << uint(r.num)
2349 e.finalRegs &^= regMask(1) << uint(r.num)
2351 e.rematerializeableRegs &^= regMask(1) << uint(r.num)
2354 if r, ok := e.s.f.getHome(a[0].ID).(*Register); ok {
2355 e.uniqueRegs |= regMask(1) << uint(r.num)
2360 // findRegFor finds a register we can use to make a temp copy of type typ.
2361 func (e *edgeState) findRegFor(typ *types.Type) Location {
2362 // Which registers are possibilities.
2363 types := &e.s.f.Config.Types
2364 m := e.s.compatRegs(typ)
2366 // Pick a register. In priority order:
2367 // 1) an unused register
2368 // 2) a non-unique register not holding a final value
2369 // 3) a non-unique register
2370 // 4) a register holding a rematerializeable value
2371 x := m &^ e.usedRegs
2373 return &e.s.registers[pickReg(x)]
2375 x = m &^ e.uniqueRegs &^ e.finalRegs
2377 return &e.s.registers[pickReg(x)]
2379 x = m &^ e.uniqueRegs
2381 return &e.s.registers[pickReg(x)]
2383 x = m & e.rematerializeableRegs
2385 return &e.s.registers[pickReg(x)]
2388 // No register is available.
2389 // Pick a register to spill.
2390 for _, vid := range e.cachedVals {
2392 for _, c := range a {
2393 if r, ok := e.s.f.getHome(c.ID).(*Register); ok && m>>uint(r.num)&1 != 0 {
2394 if !c.rematerializeable() {
2395 x := e.p.NewValue1(c.Pos, OpStoreReg, c.Type, c)
2396 // Allocate a temp location to spill a register to.
2397 // The type of the slot is immaterial - it will not be live across
2398 // any safepoint. Just use a type big enough to hold any register.
2399 t := LocalSlot{N: e.s.f.fe.Auto(c.Pos, types.Int64), Type: types.Int64}
2400 // TODO: reuse these slots. They'll need to be erased first.
2401 e.set(t, vid, x, false, c.Pos)
2402 if e.s.f.pass.debug > regDebug {
2403 fmt.Printf(" SPILL %s->%s %s\n", r, t, x.LongString())
2406 // r will now be overwritten by the caller. At some point
2407 // later, the newly saved value will be moved back to its
2408 // final destination in processDest.
2414 fmt.Printf("m:%d unique:%d final:%d rematerializable:%d\n", m, e.uniqueRegs, e.finalRegs, e.rematerializeableRegs)
2415 for _, vid := range e.cachedVals {
2417 for _, c := range a {
2418 fmt.Printf("v%d: %s %s\n", vid, c, e.s.f.getHome(c.ID))
2421 e.s.f.Fatalf("can't find empty register on edge %s->%s", e.p, e.b)
2425 // rematerializeable reports whether the register allocator should recompute
2426 // a value instead of spilling/restoring it.
2427 func (v *Value) rematerializeable() bool {
2428 if !opcodeTable[v.Op].rematerializeable {
2431 for _, a := range v.Args {
2432 // SP and SB (generated by OpSP and OpSB) are always available.
2433 if a.Op != OpSP && a.Op != OpSB {
2440 type liveInfo struct {
2441 ID ID // ID of value
2442 dist int32 // # of instructions before next use
2443 pos src.XPos // source position of next use
2446 // computeLive computes a map from block ID to a list of value IDs live at the end
2447 // of that block. Together with the value ID is a count of how many instructions
2448 // to the next use of that value. The resulting map is stored in s.live.
2449 // computeLive also computes the desired register information at the end of each block.
2450 // This desired register information is stored in s.desired.
2451 // TODO: this could be quadratic if lots of variables are live across lots of
2452 // basic blocks. Figure out a way to make this function (or, more precisely, the user
2453 // of this function) require only linear size & time.
2454 func (s *regAllocState) computeLive() {
2456 s.live = make([][]liveInfo, f.NumBlocks())
2457 s.desired = make([]desiredState, f.NumBlocks())
2460 live := f.newSparseMap(f.NumValues())
2461 defer f.retSparseMap(live)
2462 t := f.newSparseMap(f.NumValues())
2463 defer f.retSparseMap(t)
2465 // Keep track of which value we want in each register.
2466 var desired desiredState
2468 // Instead of iterating over f.Blocks, iterate over their postordering.
2469 // Liveness information flows backward, so starting at the end
2470 // increases the probability that we will stabilize quickly.
2471 // TODO: Do a better job yet. Here's one possibility:
2472 // Calculate the dominator tree and locate all strongly connected components.
2473 // If a value is live in one block of an SCC, it is live in all.
2474 // Walk the dominator tree from end to beginning, just once, treating SCC
2475 // components as single blocks, duplicated calculated liveness information
2476 // out to all of them.
2478 s.loopnest = f.loopnest()
2479 s.loopnest.calculateDepths()
2483 for _, b := range po {
2484 // Start with known live values at the end of the block.
2485 // Add len(b.Values) to adjust from end-of-block distance
2486 // to beginning-of-block distance.
2488 for _, e := range s.live[b.ID] {
2489 live.set(e.ID, e.dist+int32(len(b.Values)), e.pos)
2492 // Mark control values as live
2493 for _, c := range b.ControlValues() {
2494 if s.values[c.ID].needReg {
2495 live.set(c.ID, int32(len(b.Values)), b.Pos)
2499 // Propagate backwards to the start of the block
2500 // Assumes Values have been scheduled.
2502 for i := len(b.Values) - 1; i >= 0; i-- {
2506 // save phi ops for later
2507 phis = append(phis, v)
2510 if opcodeTable[v.Op].call {
2511 c := live.contents()
2513 c[i].val += unlikelyDistance
2516 for _, a := range v.Args {
2517 if s.values[a.ID].needReg {
2518 live.set(a.ID, int32(i), v.Pos)
2522 // Propagate desired registers backwards.
2523 desired.copy(&s.desired[b.ID])
2524 for i := len(b.Values) - 1; i >= 0; i-- {
2526 prefs := desired.remove(v.ID)
2528 // TODO: if v is a phi, save desired register for phi inputs.
2529 // For now, we just drop it and don't propagate
2530 // desired registers back though phi nodes.
2533 regspec := s.regspec(v)
2534 // Cancel desired registers if they get clobbered.
2535 desired.clobber(regspec.clobbers)
2536 // Update desired registers if there are any fixed register inputs.
2537 for _, j := range regspec.inputs {
2538 if countRegs(j.regs) != 1 {
2541 desired.clobber(j.regs)
2542 desired.add(v.Args[j.idx].ID, pickReg(j.regs))
2544 // Set desired register of input 0 if this is a 2-operand instruction.
2545 if opcodeTable[v.Op].resultInArg0 {
2546 if opcodeTable[v.Op].commutative {
2547 desired.addList(v.Args[1].ID, prefs)
2549 desired.addList(v.Args[0].ID, prefs)
2553 // For each predecessor of b, expand its list of live-at-end values.
2554 // invariant: live contains the values live at the start of b (excluding phi inputs)
2555 for i, e := range b.Preds {
2557 // Compute additional distance for the edge.
2558 // Note: delta must be at least 1 to distinguish the control
2559 // value use from the first user in a successor block.
2560 delta := int32(normalDistance)
2561 if len(p.Succs) == 2 {
2562 if p.Succs[0].b == b && p.Likely == BranchLikely ||
2563 p.Succs[1].b == b && p.Likely == BranchUnlikely {
2564 delta = likelyDistance
2566 if p.Succs[0].b == b && p.Likely == BranchUnlikely ||
2567 p.Succs[1].b == b && p.Likely == BranchLikely {
2568 delta = unlikelyDistance
2572 // Update any desired registers at the end of p.
2573 s.desired[p.ID].merge(&desired)
2575 // Start t off with the previously known live values at the end of p.
2577 for _, e := range s.live[p.ID] {
2578 t.set(e.ID, e.dist, e.pos)
2582 // Add new live values from scanning this block.
2583 for _, e := range live.contents() {
2585 if !t.contains(e.key) || d < t.get(e.key) {
2587 t.set(e.key, d, e.aux)
2590 // Also add the correct arg from the saved phi values.
2591 // All phis are at distance delta (we consider them
2592 // simultaneously happening at the start of the block).
2593 for _, v := range phis {
2595 if s.values[id].needReg && (!t.contains(id) || delta < t.get(id)) {
2597 t.set(id, delta, v.Pos)
2604 // The live set has changed, update it.
2605 l := s.live[p.ID][:0]
2606 if cap(l) < t.size() {
2607 l = make([]liveInfo, 0, t.size())
2609 for _, e := range t.contents() {
2610 l = append(l, liveInfo{e.key, e.val, e.aux})
2621 if f.pass.debug > regDebug {
2622 fmt.Println("live values at end of each block")
2623 for _, b := range f.Blocks {
2624 fmt.Printf(" %s:", b)
2625 for _, x := range s.live[b.ID] {
2626 fmt.Printf(" v%d(%d)", x.ID, x.dist)
2627 for _, e := range s.desired[b.ID].entries {
2633 for _, r := range e.regs {
2634 if r == noRegister {
2640 fmt.Print(&s.registers[r])
2646 if avoid := s.desired[b.ID].avoid; avoid != 0 {
2647 fmt.Printf(" avoid=%v", s.RegMaskString(avoid))
2654 // A desiredState represents desired register assignments.
2655 type desiredState struct {
2656 // Desired assignments will be small, so we just use a list
2657 // of valueID+registers entries.
2658 entries []desiredStateEntry
2659 // Registers that other values want to be in. This value will
2660 // contain at least the union of the regs fields of entries, but
2661 // may contain additional entries for values that were once in
2662 // this data structure but are no longer.
2665 type desiredStateEntry struct {
2666 // (pre-regalloc) value
2668 // Registers it would like to be in, in priority order.
2669 // Unused slots are filled with noRegister.
2673 func (d *desiredState) clear() {
2674 d.entries = d.entries[:0]
2678 // get returns a list of desired registers for value vid.
2679 func (d *desiredState) get(vid ID) [4]register {
2680 for _, e := range d.entries {
2685 return [4]register{noRegister, noRegister, noRegister, noRegister}
2688 // add records that we'd like value vid to be in register r.
2689 func (d *desiredState) add(vid ID, r register) {
2690 d.avoid |= regMask(1) << r
2691 for i := range d.entries {
2697 // Already known and highest priority
2700 for j := 1; j < len(e.regs); j++ {
2702 // Move from lower priority to top priority
2703 copy(e.regs[1:], e.regs[:j])
2708 copy(e.regs[1:], e.regs[:])
2712 d.entries = append(d.entries, desiredStateEntry{vid, [4]register{r, noRegister, noRegister, noRegister}})
2715 func (d *desiredState) addList(vid ID, regs [4]register) {
2716 // regs is in priority order, so iterate in reverse order.
2717 for i := len(regs) - 1; i >= 0; i-- {
2719 if r != noRegister {
2725 // clobber erases any desired registers in the set m.
2726 func (d *desiredState) clobber(m regMask) {
2727 for i := 0; i < len(d.entries); {
2730 for _, r := range e.regs {
2731 if r != noRegister && m>>r&1 == 0 {
2737 // No more desired registers for this value.
2738 d.entries[i] = d.entries[len(d.entries)-1]
2739 d.entries = d.entries[:len(d.entries)-1]
2742 for ; j < len(e.regs); j++ {
2743 e.regs[j] = noRegister
2750 // copy copies a desired state from another desiredState x.
2751 func (d *desiredState) copy(x *desiredState) {
2752 d.entries = append(d.entries[:0], x.entries...)
2756 // remove removes the desired registers for vid and returns them.
2757 func (d *desiredState) remove(vid ID) [4]register {
2758 for i := range d.entries {
2759 if d.entries[i].ID == vid {
2760 regs := d.entries[i].regs
2761 d.entries[i] = d.entries[len(d.entries)-1]
2762 d.entries = d.entries[:len(d.entries)-1]
2766 return [4]register{noRegister, noRegister, noRegister, noRegister}
2769 // merge merges another desired state x into d.
2770 func (d *desiredState) merge(x *desiredState) {
2772 // There should only be a few desired registers, so
2773 // linear insert is ok.
2774 for _, e := range x.entries {
2775 d.addList(e.ID, e.regs)
2779 func min32(x, y int32) int32 {
2785 func max32(x, y int32) int32 {