src/cmd/compile/internal/ssa/regalloc.go

   1 // Copyright 2015 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 // Register allocation.
   6 //
   7 // We use a version of a linear scan register allocator. We treat the
   8 // whole function as a single long basic block and run through
   9 // it using a greedy register allocator. Then all merge edges
  10 // (those targeting a block with len(Preds)>1) are processed to
  11 // shuffle data into the place that the target of the edge expects.
  12 //
  13 // The greedy allocator moves values into registers just before they
  14 // are used, spills registers only when necessary, and spills the
  15 // value whose next use is farthest in the future.
  16 //
  17 // The register allocator requires that a block is not scheduled until
  18 // at least one of its predecessors have been scheduled. The most recent
  19 // such predecessor provides the starting register state for a block.
  20 //
  21 // It also requires that there are no critical edges (critical =
  22 // comes from a block with >1 successor and goes to a block with >1
  23 // predecessor).  This makes it easy to add fixup code on merge edges -
  24 // the source of a merge edge has only one successor, so we can add
  25 // fixup code to the end of that block.
  26
  27 // Spilling
  28 //
  29 // During the normal course of the allocator, we might throw a still-live
  30 // value out of all registers. When that value is subsequently used, we must
  31 // load it from a slot on the stack. We must also issue an instruction to
  32 // initialize that stack location with a copy of v.
  33 //
  34 // pre-regalloc:
  35 //   (1) v = Op ...
  36 //   (2) x = Op ...
  37 //   (3) ... = Op v ...
  38 //
  39 // post-regalloc:
  40 //   (1) v = Op ...    : AX // computes v, store result in AX
  41 //       s = StoreReg v     // spill v to a stack slot
  42 //   (2) x = Op ...    : AX // some other op uses AX
  43 //       c = LoadReg s : CX // restore v from stack slot
  44 //   (3) ... = Op c ...     // use the restored value
  45 //
  46 // Allocation occurs normally until we reach (3) and we realize we have
  47 // a use of v and it isn't in any register. At that point, we allocate
  48 // a spill (a StoreReg) for v. We can't determine the correct place for
  49 // the spill at this point, so we allocate the spill as blockless initially.
  50 // The restore is then generated to load v back into a register so it can
  51 // be used. Subsequent uses of v will use the restored value c instead.
  52 //
  53 // What remains is the question of where to schedule the spill.
  54 // During allocation, we keep track of the dominator of all restores of v.
  55 // The spill of v must dominate that block. The spill must also be issued at
  56 // a point where v is still in a register.
  57 //
  58 // To find the right place, start at b, the block which dominates all restores.
  59 //  - If b is v.Block, then issue the spill right after v.
  60 //    It is known to be in a register at that point, and dominates any restores.
  61 //  - Otherwise, if v is in a register at the start of b,
  62 //    put the spill of v at the start of b.
  63 //  - Otherwise, set b = immediate dominator of b, and repeat.
  64 //
  65 // Phi values are special, as always. We define two kinds of phis, those
  66 // where the merge happens in a register (a "register" phi) and those where
  67 // the merge happens in a stack location (a "stack" phi).
  68 //
  69 // A register phi must have the phi and all of its inputs allocated to the
  70 // same register. Register phis are spilled similarly to regular ops.
  71 //
  72 // A stack phi must have the phi and all of its inputs allocated to the same
  73 // stack location. Stack phis start out life already spilled - each phi
  74 // input must be a store (using StoreReg) at the end of the corresponding
  75 // predecessor block.
  76 //     b1: y = ... : AX        b2: z = ... : BX
  77 //         y2 = StoreReg y         z2 = StoreReg z
  78 //         goto b3                 goto b3
  79 //     b3: x = phi(y2, z2)
  80 // The stack allocator knows that StoreReg args of stack-allocated phis
  81 // must be allocated to the same stack slot as the phi that uses them.
  82 // x is now a spilled value and a restore must appear before its first use.
  83
  84 // TODO
  85
  86 // Use an affinity graph to mark two values which should use the
  87 // same register. This affinity graph will be used to prefer certain
  88 // registers for allocation. This affinity helps eliminate moves that
  89 // are required for phi implementations and helps generate allocations
  90 // for 2-register architectures.
  91
  92 // Note: regalloc generates a not-quite-SSA output. If we have:
  93 //
  94 //             b1: x = ... : AX
  95 //                 x2 = StoreReg x
  96 //                 ... AX gets reused for something else ...
  97 //                 if ... goto b3 else b4
  98 //
  99 //   b3: x3 = LoadReg x2 : BX       b4: x4 = LoadReg x2 : CX
 100 //       ... use x3 ...                 ... use x4 ...
 101 //
 102 //             b2: ... use x3 ...
 103 //
 104 // If b3 is the primary predecessor of b2, then we use x3 in b2 and
 105 // add a x4:CX->BX copy at the end of b4.
 106 // But the definition of x3 doesn't dominate b2.  We should really
 107 // insert an extra phi at the start of b2 (x5=phi(x3,x4):BX) to keep
 108 // SSA form. For now, we ignore this problem as remaining in strict
 109 // SSA form isn't needed after regalloc. We'll just leave the use
 110 // of x3 not dominated by the definition of x3, and the CX->BX copy
 111 // will have no use (so don't run deadcode after regalloc!).
 112 // TODO: maybe we should introduce these extra phis?
 113
 114 package ssa
 115
 116 import (
 117         "cmd/compile/internal/base"
 118         "cmd/compile/internal/ir"
 119         "cmd/compile/internal/types"
 120         "cmd/internal/src"
 121         "cmd/internal/sys"
 122         "fmt"
 123         "internal/buildcfg"
 124         "math/bits"
 125         "unsafe"
 126 )
 127
 128 const (
 129         moveSpills = iota
 130         logSpills
 131         regDebug
 132         stackDebug
 133 )
 134
 135 // distance is a measure of how far into the future values are used.
 136 // distance is measured in units of instructions.
 137 const (
 138         likelyDistance   = 1
 139         normalDistance   = 10
 140         unlikelyDistance = 100
 141 )
 142
 143 // regalloc performs register allocation on f. It sets f.RegAlloc
 144 // to the resulting allocation.
 145 func regalloc(f *Func) {
 146         var s regAllocState
 147         s.init(f)
 148         s.regalloc(f)
 149 }
 150
 151 type register uint8
 152
 153 const noRegister register = 255
 154
 155 // For bulk initializing
 156 var noRegisters [32]register = [32]register{
 157         noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
 158         noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
 159         noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
 160         noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
 161 }
 162
 163 // A regMask encodes a set of machine registers.
 164 // TODO: regMask -> regSet?
 165 type regMask uint64
 166
 167 func (m regMask) String() string {
 168         s := ""
 169         for r := register(0); m != 0; r++ {
 170                 if m>>r&1 == 0 {
 171                         continue
 172                 }
 173                 m &^= regMask(1) << r
 174                 if s != "" {
 175                         s += " "
 176                 }
 177                 s += fmt.Sprintf("r%d", r)
 178         }
 179         return s
 180 }
 181
 182 func (s *regAllocState) RegMaskString(m regMask) string {
 183         str := ""
 184         for r := register(0); m != 0; r++ {
 185                 if m>>r&1 == 0 {
 186                         continue
 187                 }
 188                 m &^= regMask(1) << r
 189                 if str != "" {
 190                         str += " "
 191                 }
 192                 str += s.registers[r].String()
 193         }
 194         return str
 195 }
 196
 197 // countRegs returns the number of set bits in the register mask.
 198 func countRegs(r regMask) int {
 199         return bits.OnesCount64(uint64(r))
 200 }
 201
 202 // pickReg picks an arbitrary register from the register mask.
 203 func pickReg(r regMask) register {
 204         if r == 0 {
 205                 panic("can't pick a register from an empty set")
 206         }
 207         // pick the lowest one
 208         return register(bits.TrailingZeros64(uint64(r)))
 209 }
 210
 211 type use struct {
 212         dist int32    // distance from start of the block to a use of a value
 213         pos  src.XPos // source position of the use
 214         next *use     // linked list of uses of a value in nondecreasing dist order
 215 }
 216
 217 // A valState records the register allocation state for a (pre-regalloc) value.
 218 type valState struct {
 219         regs              regMask // the set of registers holding a Value (usually just one)
 220         uses              *use    // list of uses in this block
 221         spill             *Value  // spilled copy of the Value (if any)
 222         restoreMin        int32   // minimum of all restores' blocks' sdom.entry
 223         restoreMax        int32   // maximum of all restores' blocks' sdom.exit
 224         needReg           bool    // cached value of !v.Type.IsMemory() && !v.Type.IsVoid() && !.v.Type.IsFlags()
 225         rematerializeable bool    // cached value of v.rematerializeable()
 226 }
 227
 228 type regState struct {
 229         v *Value // Original (preregalloc) Value stored in this register.
 230         c *Value // A Value equal to v which is currently in a register.  Might be v or a copy of it.
 231         // If a register is unused, v==c==nil
 232 }
 233
 234 type regAllocState struct {
 235         f *Func
 236
 237         sdom        SparseTree
 238         registers   []Register
 239         numRegs     register
 240         SPReg       register
 241         SBReg       register
 242         GReg        register
 243         allocatable regMask
 244
 245         // live values at the end of each block.  live[b.ID] is a list of value IDs
 246         // which are live at the end of b, together with a count of how many instructions
 247         // forward to the next use.
 248         live [][]liveInfo
 249         // desired register assignments at the end of each block.
 250         // Note that this is a static map computed before allocation occurs. Dynamic
 251         // register desires (from partially completed allocations) will trump
 252         // this information.
 253         desired []desiredState
 254
 255         // current state of each (preregalloc) Value
 256         values []valState
 257
 258         // ID of SP, SB values
 259         sp, sb ID
 260
 261         // For each Value, map from its value ID back to the
 262         // preregalloc Value it was derived from.
 263         orig []*Value
 264
 265         // current state of each register
 266         regs []regState
 267
 268         // registers that contain values which can't be kicked out
 269         nospill regMask
 270
 271         // mask of registers currently in use
 272         used regMask
 273
 274         // mask of registers used in the current instruction
 275         tmpused regMask
 276
 277         // current block we're working on
 278         curBlock *Block
 279
 280         // cache of use records
 281         freeUseRecords *use
 282
 283         // endRegs[blockid] is the register state at the end of each block.
 284         // encoded as a set of endReg records.
 285         endRegs [][]endReg
 286
 287         // startRegs[blockid] is the register state at the start of merge blocks.
 288         // saved state does not include the state of phi ops in the block.
 289         startRegs [][]startReg
 290
 291         // spillLive[blockid] is the set of live spills at the end of each block
 292         spillLive [][]ID
 293
 294         // a set of copies we generated to move things around, and
 295         // whether it is used in shuffle. Unused copies will be deleted.
 296         copies map[*Value]bool
 297
 298         loopnest *loopnest
 299
 300         // choose a good order in which to visit blocks for allocation purposes.
 301         visitOrder []*Block
 302
 303         // blockOrder[b.ID] corresponds to the index of block b in visitOrder.
 304         blockOrder []int32
 305
 306         // whether to insert instructions that clobber dead registers at call sites
 307         doClobber bool
 308 }
 309
 310 type endReg struct {
 311         r register
 312         v *Value // pre-regalloc value held in this register (TODO: can we use ID here?)
 313         c *Value // cached version of the value
 314 }
 315
 316 type startReg struct {
 317         r   register
 318         v   *Value   // pre-regalloc value needed in this register
 319         c   *Value   // cached version of the value
 320         pos src.XPos // source position of use of this register
 321 }
 322
 323 // freeReg frees up register r. Any current user of r is kicked out.
 324 func (s *regAllocState) freeReg(r register) {
 325         v := s.regs[r].v
 326         if v == nil {
 327                 s.f.Fatalf("tried to free an already free register %d\n", r)
 328         }
 329
 330         // Mark r as unused.
 331         if s.f.pass.debug > regDebug {
 332                 fmt.Printf("freeReg %s (dump %s/%s)\n", &s.registers[r], v, s.regs[r].c)
 333         }
 334         s.regs[r] = regState{}
 335         s.values[v.ID].regs &^= regMask(1) << r
 336         s.used &^= regMask(1) << r
 337 }
 338
 339 // freeRegs frees up all registers listed in m.
 340 func (s *regAllocState) freeRegs(m regMask) {
 341         for m&s.used != 0 {
 342                 s.freeReg(pickReg(m & s.used))
 343         }
 344 }
 345
 346 // clobberRegs inserts instructions that clobber registers listed in m.
 347 func (s *regAllocState) clobberRegs(m regMask) {
 348         m &= s.allocatable & s.f.Config.gpRegMask // only integer register can contain pointers, only clobber them
 349         for m != 0 {
 350                 r := pickReg(m)
 351                 m &^= 1 << r
 352                 x := s.curBlock.NewValue0(src.NoXPos, OpClobberReg, types.TypeVoid)
 353                 s.f.setHome(x, &s.registers[r])
 354         }
 355 }
 356
 357 // setOrig records that c's original value is the same as
 358 // v's original value.
 359 func (s *regAllocState) setOrig(c *Value, v *Value) {
 360         for int(c.ID) >= len(s.orig) {
 361                 s.orig = append(s.orig, nil)
 362         }
 363         if s.orig[c.ID] != nil {
 364                 s.f.Fatalf("orig value set twice %s %s", c, v)
 365         }
 366         s.orig[c.ID] = s.orig[v.ID]
 367 }
 368
 369 // assignReg assigns register r to hold c, a copy of v.
 370 // r must be unused.
 371 func (s *regAllocState) assignReg(r register, v *Value, c *Value) {
 372         if s.f.pass.debug > regDebug {
 373                 fmt.Printf("assignReg %s %s/%s\n", &s.registers[r], v, c)
 374         }
 375         if s.regs[r].v != nil {
 376                 s.f.Fatalf("tried to assign register %d to %s/%s but it is already used by %s", r, v, c, s.regs[r].v)
 377         }
 378
 379         // Update state.
 380         s.regs[r] = regState{v, c}
 381         s.values[v.ID].regs |= regMask(1) << r
 382         s.used |= regMask(1) << r
 383         s.f.setHome(c, &s.registers[r])
 384 }
 385
 386 // allocReg chooses a register from the set of registers in mask.
 387 // If there is no unused register, a Value will be kicked out of
 388 // a register to make room.
 389 func (s *regAllocState) allocReg(mask regMask, v *Value) register {
 390         if v.OnWasmStack {
 391                 return noRegister
 392         }
 393
 394         mask &= s.allocatable
 395         mask &^= s.nospill
 396         if mask == 0 {
 397                 s.f.Fatalf("no register available for %s", v.LongString())
 398         }
 399
 400         // Pick an unused register if one is available.
 401         if mask&^s.used != 0 {
 402                 return pickReg(mask &^ s.used)
 403         }
 404
 405         // Pick a value to spill. Spill the value with the
 406         // farthest-in-the-future use.
 407         // TODO: Prefer registers with already spilled Values?
 408         // TODO: Modify preference using affinity graph.
 409         // TODO: if a single value is in multiple registers, spill one of them
 410         // before spilling a value in just a single register.
 411
 412         // Find a register to spill. We spill the register containing the value
 413         // whose next use is as far in the future as possible.
 414         // https://en.wikipedia.org/wiki/Page_replacement_algorithm#The_theoretically_optimal_page_replacement_algorithm
 415         var r register
 416         maxuse := int32(-1)
 417         for t := register(0); t < s.numRegs; t++ {
 418                 if mask>>t&1 == 0 {
 419                         continue
 420                 }
 421                 v := s.regs[t].v
 422                 if n := s.values[v.ID].uses.dist; n > maxuse {
 423                         // v's next use is farther in the future than any value
 424                         // we've seen so far. A new best spill candidate.
 425                         r = t
 426                         maxuse = n
 427                 }
 428         }
 429         if maxuse == -1 {
 430                 s.f.Fatalf("couldn't find register to spill")
 431         }
 432
 433         if s.f.Config.ctxt.Arch.Arch == sys.ArchWasm {
 434                 // TODO(neelance): In theory this should never happen, because all wasm registers are equal.
 435                 // So if there is still a free register, the allocation should have picked that one in the first place instead of
 436                 // trying to kick some other value out. In practice, this case does happen and it breaks the stack optimization.
 437                 s.freeReg(r)
 438                 return r
 439         }
 440
 441         // Try to move it around before kicking out, if there is a free register.
 442         // We generate a Copy and record it. It will be deleted if never used.
 443         v2 := s.regs[r].v
 444         m := s.compatRegs(v2.Type) &^ s.used &^ s.tmpused &^ (regMask(1) << r)
 445         if m != 0 && !s.values[v2.ID].rematerializeable && countRegs(s.values[v2.ID].regs) == 1 {
 446                 r2 := pickReg(m)
 447                 c := s.curBlock.NewValue1(v2.Pos, OpCopy, v2.Type, s.regs[r].c)
 448                 s.copies[c] = false
 449                 if s.f.pass.debug > regDebug {
 450                         fmt.Printf("copy %s to %s : %s\n", v2, c, &s.registers[r2])
 451                 }
 452                 s.setOrig(c, v2)
 453                 s.assignReg(r2, v2, c)
 454         }
 455         s.freeReg(r)
 456         return r
 457 }
 458
 459 // makeSpill returns a Value which represents the spilled value of v.
 460 // b is the block in which the spill is used.
 461 func (s *regAllocState) makeSpill(v *Value, b *Block) *Value {
 462         vi := &s.values[v.ID]
 463         if vi.spill != nil {
 464                 // Final block not known - keep track of subtree where restores reside.
 465                 vi.restoreMin = min32(vi.restoreMin, s.sdom[b.ID].entry)
 466                 vi.restoreMax = max32(vi.restoreMax, s.sdom[b.ID].exit)
 467                 return vi.spill
 468         }
 469         // Make a spill for v. We don't know where we want
 470         // to put it yet, so we leave it blockless for now.
 471         spill := s.f.newValueNoBlock(OpStoreReg, v.Type, v.Pos)
 472         // We also don't know what the spill's arg will be.
 473         // Leave it argless for now.
 474         s.setOrig(spill, v)
 475         vi.spill = spill
 476         vi.restoreMin = s.sdom[b.ID].entry
 477         vi.restoreMax = s.sdom[b.ID].exit
 478         return spill
 479 }
 480
 481 // allocValToReg allocates v to a register selected from regMask and
 482 // returns the register copy of v. Any previous user is kicked out and spilled
 483 // (if necessary). Load code is added at the current pc. If nospill is set the
 484 // allocated register is marked nospill so the assignment cannot be
 485 // undone until the caller allows it by clearing nospill. Returns a
 486 // *Value which is either v or a copy of v allocated to the chosen register.
 487 func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, pos src.XPos) *Value {
 488         if s.f.Config.ctxt.Arch.Arch == sys.ArchWasm && v.rematerializeable() {
 489                 c := v.copyIntoWithXPos(s.curBlock, pos)
 490                 c.OnWasmStack = true
 491                 s.setOrig(c, v)
 492                 return c
 493         }
 494         if v.OnWasmStack {
 495                 return v
 496         }
 497
 498         vi := &s.values[v.ID]
 499         pos = pos.WithNotStmt()
 500         // Check if v is already in a requested register.
 501         if mask&vi.regs != 0 {
 502                 r := pickReg(mask & vi.regs)
 503                 if s.regs[r].v != v || s.regs[r].c == nil {
 504                         panic("bad register state")
 505                 }
 506                 if nospill {
 507                         s.nospill |= regMask(1) << r
 508                 }
 509                 return s.regs[r].c
 510         }
 511
 512         var r register
 513         // If nospill is set, the value is used immediately, so it can live on the WebAssembly stack.
 514         onWasmStack := nospill && s.f.Config.ctxt.Arch.Arch == sys.ArchWasm
 515         if !onWasmStack {
 516                 // Allocate a register.
 517                 r = s.allocReg(mask, v)
 518         }
 519
 520         // Allocate v to the new register.
 521         var c *Value
 522         if vi.regs != 0 {
 523                 // Copy from a register that v is already in.
 524                 r2 := pickReg(vi.regs)
 525                 if s.regs[r2].v != v {
 526                         panic("bad register state")
 527                 }
 528                 c = s.curBlock.NewValue1(pos, OpCopy, v.Type, s.regs[r2].c)
 529         } else if v.rematerializeable() {
 530                 // Rematerialize instead of loading from the spill location.
 531                 c = v.copyIntoWithXPos(s.curBlock, pos)
 532         } else {
 533                 // Load v from its spill location.
 534                 spill := s.makeSpill(v, s.curBlock)
 535                 if s.f.pass.debug > logSpills {
 536                         s.f.Warnl(vi.spill.Pos, "load spill for %v from %v", v, spill)
 537                 }
 538                 c = s.curBlock.NewValue1(pos, OpLoadReg, v.Type, spill)
 539         }
 540
 541         s.setOrig(c, v)
 542
 543         if onWasmStack {
 544                 c.OnWasmStack = true
 545                 return c
 546         }
 547
 548         s.assignReg(r, v, c)
 549         if c.Op == OpLoadReg && s.isGReg(r) {
 550                 s.f.Fatalf("allocValToReg.OpLoadReg targeting g: " + c.LongString())
 551         }
 552         if nospill {
 553                 s.nospill |= regMask(1) << r
 554         }
 555         return c
 556 }
 557
 558 // isLeaf reports whether f performs any calls.
 559 func isLeaf(f *Func) bool {
 560         for _, b := range f.Blocks {
 561                 for _, v := range b.Values {
 562                         if opcodeTable[v.Op].call {
 563                                 return false
 564                         }
 565                 }
 566         }
 567         return true
 568 }
 569
 570 func (s *regAllocState) init(f *Func) {
 571         s.f = f
 572         s.f.RegAlloc = s.f.Cache.locs[:0]
 573         s.registers = f.Config.registers
 574         if nr := len(s.registers); nr == 0 || nr > int(noRegister) || nr > int(unsafe.Sizeof(regMask(0))*8) {
 575                 s.f.Fatalf("bad number of registers: %d", nr)
 576         } else {
 577                 s.numRegs = register(nr)
 578         }
 579         // Locate SP, SB, and g registers.
 580         s.SPReg = noRegister
 581         s.SBReg = noRegister
 582         s.GReg = noRegister
 583         for r := register(0); r < s.numRegs; r++ {
 584                 switch s.registers[r].String() {
 585                 case "SP":
 586                         s.SPReg = r
 587                 case "SB":
 588                         s.SBReg = r
 589                 case "g":
 590                         s.GReg = r
 591                 }
 592         }
 593         // Make sure we found all required registers.
 594         switch noRegister {
 595         case s.SPReg:
 596                 s.f.Fatalf("no SP register found")
 597         case s.SBReg:
 598                 s.f.Fatalf("no SB register found")
 599         case s.GReg:
 600                 if f.Config.hasGReg {
 601                         s.f.Fatalf("no g register found")
 602                 }
 603         }
 604
 605         // Figure out which registers we're allowed to use.
 606         s.allocatable = s.f.Config.gpRegMask | s.f.Config.fpRegMask | s.f.Config.specialRegMask
 607         s.allocatable &^= 1 << s.SPReg
 608         s.allocatable &^= 1 << s.SBReg
 609         if s.f.Config.hasGReg {
 610                 s.allocatable &^= 1 << s.GReg
 611         }
 612         if buildcfg.FramePointerEnabled && s.f.Config.FPReg >= 0 {
 613                 s.allocatable &^= 1 << uint(s.f.Config.FPReg)
 614         }
 615         if s.f.Config.LinkReg != -1 {
 616                 if isLeaf(f) {
 617                         // Leaf functions don't save/restore the link register.
 618                         s.allocatable &^= 1 << uint(s.f.Config.LinkReg)
 619                 }
 620         }
 621         if s.f.Config.ctxt.Flag_dynlink {
 622                 switch s.f.Config.arch {
 623                 case "amd64":
 624                         s.allocatable &^= 1 << 15 // R15
 625                 case "arm":
 626                         s.allocatable &^= 1 << 9 // R9
 627                 case "ppc64le": // R2 already reserved.
 628                         // nothing to do
 629                 case "arm64":
 630                         // nothing to do?
 631                 case "386":
 632                         // nothing to do.
 633                         // Note that for Flag_shared (position independent code)
 634                         // we do need to be careful, but that carefulness is hidden
 635                         // in the rewrite rules so we always have a free register
 636                         // available for global load/stores. See gen/386.rules (search for Flag_shared).
 637                 case "s390x":
 638                         s.allocatable &^= 1 << 11 // R11
 639                 default:
 640                         s.f.fe.Fatalf(src.NoXPos, "arch %s not implemented", s.f.Config.arch)
 641                 }
 642         }
 643
 644         // Linear scan register allocation can be influenced by the order in which blocks appear.
 645         // Decouple the register allocation order from the generated block order.
 646         // This also creates an opportunity for experiments to find a better order.
 647         s.visitOrder = layoutRegallocOrder(f)
 648
 649         // Compute block order. This array allows us to distinguish forward edges
 650         // from backward edges and compute how far they go.
 651         s.blockOrder = make([]int32, f.NumBlocks())
 652         for i, b := range s.visitOrder {
 653                 s.blockOrder[b.ID] = int32(i)
 654         }
 655
 656         s.regs = make([]regState, s.numRegs)
 657         nv := f.NumValues()
 658         if cap(s.f.Cache.regallocValues) >= nv {
 659                 s.f.Cache.regallocValues = s.f.Cache.regallocValues[:nv]
 660         } else {
 661                 s.f.Cache.regallocValues = make([]valState, nv)
 662         }
 663         s.values = s.f.Cache.regallocValues
 664         s.orig = make([]*Value, nv)
 665         s.copies = make(map[*Value]bool)
 666         for _, b := range s.visitOrder {
 667                 for _, v := range b.Values {
 668                         if !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() && !v.Type.IsTuple() {
 669                                 s.values[v.ID].needReg = true
 670                                 s.values[v.ID].rematerializeable = v.rematerializeable()
 671                                 s.orig[v.ID] = v
 672                         }
 673                         // Note: needReg is false for values returning Tuple types.
 674                         // Instead, we mark the corresponding Selects as needReg.
 675                 }
 676         }
 677         s.computeLive()
 678
 679         s.endRegs = make([][]endReg, f.NumBlocks())
 680         s.startRegs = make([][]startReg, f.NumBlocks())
 681         s.spillLive = make([][]ID, f.NumBlocks())
 682         s.sdom = f.Sdom()
 683
 684         // wasm: Mark instructions that can be optimized to have their values only on the WebAssembly stack.
 685         if f.Config.ctxt.Arch.Arch == sys.ArchWasm {
 686                 canLiveOnStack := f.newSparseSet(f.NumValues())
 687                 defer f.retSparseSet(canLiveOnStack)
 688                 for _, b := range f.Blocks {
 689                         // New block. Clear candidate set.
 690                         canLiveOnStack.clear()
 691                         for _, c := range b.ControlValues() {
 692                                 if c.Uses == 1 && !opcodeTable[c.Op].generic {
 693                                         canLiveOnStack.add(c.ID)
 694                                 }
 695                         }
 696                         // Walking backwards.
 697                         for i := len(b.Values) - 1; i >= 0; i-- {
 698                                 v := b.Values[i]
 699                                 if canLiveOnStack.contains(v.ID) {
 700                                         v.OnWasmStack = true
 701                                 } else {
 702                                         // Value can not live on stack. Values are not allowed to be reordered, so clear candidate set.
 703                                         canLiveOnStack.clear()
 704                                 }
 705                                 for _, arg := range v.Args {
 706                                         // Value can live on the stack if:
 707                                         // - it is only used once
 708                                         // - it is used in the same basic block
 709                                         // - it is not a "mem" value
 710                                         // - it is a WebAssembly op
 711                                         if arg.Uses == 1 && arg.Block == v.Block && !arg.Type.IsMemory() && !opcodeTable[arg.Op].generic {
 712                                                 canLiveOnStack.add(arg.ID)
 713                                         }
 714                                 }
 715                         }
 716                 }
 717         }
 718
 719         // The clobberdeadreg experiment inserts code to clobber dead registers
 720         // at call sites.
 721         // Ignore huge functions to avoid doing too much work.
 722         if base.Flag.ClobberDeadReg && len(s.f.Blocks) <= 10000 {
 723                 // TODO: honor GOCLOBBERDEADHASH, or maybe GOSSAHASH.
 724                 s.doClobber = true
 725         }
 726 }
 727
 728 // Adds a use record for id at distance dist from the start of the block.
 729 // All calls to addUse must happen with nonincreasing dist.
 730 func (s *regAllocState) addUse(id ID, dist int32, pos src.XPos) {
 731         r := s.freeUseRecords
 732         if r != nil {
 733                 s.freeUseRecords = r.next
 734         } else {
 735                 r = &use{}
 736         }
 737         r.dist = dist
 738         r.pos = pos
 739         r.next = s.values[id].uses
 740         s.values[id].uses = r
 741         if r.next != nil && dist > r.next.dist {
 742                 s.f.Fatalf("uses added in wrong order")
 743         }
 744 }
 745
 746 // advanceUses advances the uses of v's args from the state before v to the state after v.
 747 // Any values which have no more uses are deallocated from registers.
 748 func (s *regAllocState) advanceUses(v *Value) {
 749         for _, a := range v.Args {
 750                 if !s.values[a.ID].needReg {
 751                         continue
 752                 }
 753                 ai := &s.values[a.ID]
 754                 r := ai.uses
 755                 ai.uses = r.next
 756                 if r.next == nil {
 757                         // Value is dead, free all registers that hold it.
 758                         s.freeRegs(ai.regs)
 759                 }
 760                 r.next = s.freeUseRecords
 761                 s.freeUseRecords = r
 762         }
 763 }
 764
 765 // liveAfterCurrentInstruction reports whether v is live after
 766 // the current instruction is completed.  v must be used by the
 767 // current instruction.
 768 func (s *regAllocState) liveAfterCurrentInstruction(v *Value) bool {
 769         u := s.values[v.ID].uses
 770         if u == nil {
 771                 panic(fmt.Errorf("u is nil, v = %s, s.values[v.ID] = %v", v.LongString(), s.values[v.ID]))
 772         }
 773         d := u.dist
 774         for u != nil && u.dist == d {
 775                 u = u.next
 776         }
 777         return u != nil && u.dist > d
 778 }
 779
 780 // Sets the state of the registers to that encoded in regs.
 781 func (s *regAllocState) setState(regs []endReg) {
 782         s.freeRegs(s.used)
 783         for _, x := range regs {
 784                 s.assignReg(x.r, x.v, x.c)
 785         }
 786 }
 787
 788 // compatRegs returns the set of registers which can store a type t.
 789 func (s *regAllocState) compatRegs(t *types.Type) regMask {
 790         var m regMask
 791         if t.IsTuple() || t.IsFlags() {
 792                 return 0
 793         }
 794         if t.IsFloat() || t == types.TypeInt128 {
 795                 if t.Kind() == types.TFLOAT32 && s.f.Config.fp32RegMask != 0 {
 796                         m = s.f.Config.fp32RegMask
 797                 } else if t.Kind() == types.TFLOAT64 && s.f.Config.fp64RegMask != 0 {
 798                         m = s.f.Config.fp64RegMask
 799                 } else {
 800                         m = s.f.Config.fpRegMask
 801                 }
 802         } else {
 803                 m = s.f.Config.gpRegMask
 804         }
 805         return m & s.allocatable
 806 }
 807
 808 // regspec returns the regInfo for operation op.
 809 func (s *regAllocState) regspec(v *Value) regInfo {
 810         op := v.Op
 811         if op == OpConvert {
 812                 // OpConvert is a generic op, so it doesn't have a
 813                 // register set in the static table. It can use any
 814                 // allocatable integer register.
 815                 m := s.allocatable & s.f.Config.gpRegMask
 816                 return regInfo{inputs: []inputInfo{{regs: m}}, outputs: []outputInfo{{regs: m}}}
 817         }
 818         if op == OpArgIntReg {
 819                 reg := v.Block.Func.Config.intParamRegs[v.AuxInt8()]
 820                 return regInfo{outputs: []outputInfo{{regs: 1 << uint(reg)}}}
 821         }
 822         if op == OpArgFloatReg {
 823                 reg := v.Block.Func.Config.floatParamRegs[v.AuxInt8()]
 824                 return regInfo{outputs: []outputInfo{{regs: 1 << uint(reg)}}}
 825         }
 826         if op.IsCall() {
 827                 if ac, ok := v.Aux.(*AuxCall); ok && ac.reg != nil {
 828                         return *ac.Reg(&opcodeTable[op].reg, s.f.Config)
 829                 }
 830         }
 831         if op == OpMakeResult && s.f.OwnAux.reg != nil {
 832                 return *s.f.OwnAux.ResultReg(s.f.Config)
 833         }
 834         return opcodeTable[op].reg
 835 }
 836
 837 func (s *regAllocState) isGReg(r register) bool {
 838         return s.f.Config.hasGReg && s.GReg == r
 839 }
 840
 841 func (s *regAllocState) regalloc(f *Func) {
 842         regValLiveSet := f.newSparseSet(f.NumValues()) // set of values that may be live in register
 843         defer f.retSparseSet(regValLiveSet)
 844         var oldSched []*Value
 845         var phis []*Value
 846         var phiRegs []register
 847         var args []*Value
 848
 849         // Data structure used for computing desired registers.
 850         var desired desiredState
 851
 852         // Desired registers for inputs & outputs for each instruction in the block.
 853         type dentry struct {
 854                 out [4]register    // desired output registers
 855                 in  [3][4]register // desired input registers (for inputs 0,1, and 2)
 856         }
 857         var dinfo []dentry
 858
 859         if f.Entry != f.Blocks[0] {
 860                 f.Fatalf("entry block must be first")
 861         }
 862
 863         for _, b := range s.visitOrder {
 864                 if s.f.pass.debug > regDebug {
 865                         fmt.Printf("Begin processing block %v\n", b)
 866                 }
 867                 s.curBlock = b
 868
 869                 // Initialize regValLiveSet and uses fields for this block.
 870                 // Walk backwards through the block doing liveness analysis.
 871                 regValLiveSet.clear()
 872                 for _, e := range s.live[b.ID] {
 873                         s.addUse(e.ID, int32(len(b.Values))+e.dist, e.pos) // pseudo-uses from beyond end of block
 874                         regValLiveSet.add(e.ID)
 875                 }
 876                 for _, v := range b.ControlValues() {
 877                         if s.values[v.ID].needReg {
 878                                 s.addUse(v.ID, int32(len(b.Values)), b.Pos) // pseudo-use by control values
 879                                 regValLiveSet.add(v.ID)
 880                         }
 881                 }
 882                 for i := len(b.Values) - 1; i >= 0; i-- {
 883                         v := b.Values[i]
 884                         regValLiveSet.remove(v.ID)
 885                         if v.Op == OpPhi {
 886                                 // Remove v from the live set, but don't add
 887                                 // any inputs. This is the state the len(b.Preds)>1
 888                                 // case below desires; it wants to process phis specially.
 889                                 continue
 890                         }
 891                         if opcodeTable[v.Op].call {
 892                                 // Function call clobbers all the registers but SP and SB.
 893                                 regValLiveSet.clear()
 894                                 if s.sp != 0 && s.values[s.sp].uses != nil {
 895                                         regValLiveSet.add(s.sp)
 896                                 }
 897                                 if s.sb != 0 && s.values[s.sb].uses != nil {
 898                                         regValLiveSet.add(s.sb)
 899                                 }
 900                         }
 901                         for _, a := range v.Args {
 902                                 if !s.values[a.ID].needReg {
 903                                         continue
 904                                 }
 905                                 s.addUse(a.ID, int32(i), v.Pos)
 906                                 regValLiveSet.add(a.ID)
 907                         }
 908                 }
 909                 if s.f.pass.debug > regDebug {
 910                         fmt.Printf("use distances for %s\n", b)
 911                         for i := range s.values {
 912                                 vi := &s.values[i]
 913                                 u := vi.uses
 914                                 if u == nil {
 915                                         continue
 916                                 }
 917                                 fmt.Printf("  v%d:", i)
 918                                 for u != nil {
 919                                         fmt.Printf(" %d", u.dist)
 920                                         u = u.next
 921                                 }
 922                                 fmt.Println()
 923                         }
 924                 }
 925
 926                 // Make a copy of the block schedule so we can generate a new one in place.
 927                 // We make a separate copy for phis and regular values.
 928                 nphi := 0
 929                 for _, v := range b.Values {
 930                         if v.Op != OpPhi {
 931                                 break
 932                         }
 933                         nphi++
 934                 }
 935                 phis = append(phis[:0], b.Values[:nphi]...)
 936                 oldSched = append(oldSched[:0], b.Values[nphi:]...)
 937                 b.Values = b.Values[:0]
 938
 939                 // Initialize start state of block.
 940                 if b == f.Entry {
 941                         // Regalloc state is empty to start.
 942                         if nphi > 0 {
 943                                 f.Fatalf("phis in entry block")
 944                         }
 945                 } else if len(b.Preds) == 1 {
 946                         // Start regalloc state with the end state of the previous block.
 947                         s.setState(s.endRegs[b.Preds[0].b.ID])
 948                         if nphi > 0 {
 949                                 f.Fatalf("phis in single-predecessor block")
 950                         }
 951                         // Drop any values which are no longer live.
 952                         // This may happen because at the end of p, a value may be
 953                         // live but only used by some other successor of p.
 954                         for r := register(0); r < s.numRegs; r++ {
 955                                 v := s.regs[r].v
 956                                 if v != nil && !regValLiveSet.contains(v.ID) {
 957                                         s.freeReg(r)
 958                                 }
 959                         }
 960                 } else {
 961                         // This is the complicated case. We have more than one predecessor,
 962                         // which means we may have Phi ops.
 963
 964                         // Start with the final register state of the predecessor with least spill values.
 965                         // This is based on the following points:
 966                         // 1, The less spill value indicates that the register pressure of this path is smaller,
 967                         //    so the values of this block are more likely to be allocated to registers.
 968                         // 2, Avoid the predecessor that contains the function call, because the predecessor that
 969                         //    contains the function call usually generates a lot of spills and lose the previous
 970                         //    allocation state.
 971                         // TODO: Improve this part. At least the size of endRegs of the predecessor also has
 972                         // an impact on the code size and compiler speed. But it is not easy to find a simple
 973                         // and efficient method that combines multiple factors.
 974                         idx := -1
 975                         for i, p := range b.Preds {
 976                                 // If the predecessor has not been visited yet, skip it because its end state
 977                                 // (redRegs and spillLive) has not been computed yet.
 978                                 pb := p.b
 979                                 if s.blockOrder[pb.ID] >= s.blockOrder[b.ID] {
 980                                         continue
 981                                 }
 982                                 if idx == -1 {
 983                                         idx = i
 984                                         continue
 985                                 }
 986                                 pSel := b.Preds[idx].b
 987                                 if len(s.spillLive[pb.ID]) < len(s.spillLive[pSel.ID]) {
 988                                         idx = i
 989                                 } else if len(s.spillLive[pb.ID]) == len(s.spillLive[pSel.ID]) {
 990                                         // Use a bit of likely information. After critical pass, pb and pSel must
 991                                         // be plain blocks, so check edge pb->pb.Preds instead of edge pb->b.
 992                                         // TODO: improve the prediction of the likely predecessor. The following
 993                                         // method is only suitable for the simplest cases. For complex cases,
 994                                         // the prediction may be inaccurate, but this does not affect the
 995                                         // correctness of the program.
 996                                         // According to the layout algorithm, the predecessor with the
 997                                         // smaller blockOrder is the true branch, and the test results show
 998                                         // that it is better to choose the predecessor with a smaller
 999                                         // blockOrder than no choice.
1000                                         if pb.likelyBranch() && !pSel.likelyBranch() || s.blockOrder[pb.ID] < s.blockOrder[pSel.ID] {
1001                                                 idx = i
1002                                         }
1003                                 }
1004                         }
1005                         if idx < 0 {
1006                                 f.Fatalf("bad visitOrder, no predecessor of %s has been visited before it", b)
1007                         }
1008                         p := b.Preds[idx].b
1009                         s.setState(s.endRegs[p.ID])
1010
1011                         if s.f.pass.debug > regDebug {
1012                                 fmt.Printf("starting merge block %s with end state of %s:\n", b, p)
1013                                 for _, x := range s.endRegs[p.ID] {
1014                                         fmt.Printf("  %s: orig:%s cache:%s\n", &s.registers[x.r], x.v, x.c)
1015                                 }
1016                         }
1017
1018                         // Decide on registers for phi ops. Use the registers determined
1019                         // by the primary predecessor if we can.
1020                         // TODO: pick best of (already processed) predecessors?
1021                         // Majority vote? Deepest nesting level?
1022                         phiRegs = phiRegs[:0]
1023                         var phiUsed regMask
1024
1025                         for _, v := range phis {
1026                                 if !s.values[v.ID].needReg {
1027                                         phiRegs = append(phiRegs, noRegister)
1028                                         continue
1029                                 }
1030                                 a := v.Args[idx]
1031                                 // Some instructions target not-allocatable registers.
1032                                 // They're not suitable for further (phi-function) allocation.
1033                                 m := s.values[a.ID].regs &^ phiUsed & s.allocatable
1034                                 if m != 0 {
1035                                         r := pickReg(m)
1036                                         phiUsed |= regMask(1) << r
1037                                         phiRegs = append(phiRegs, r)
1038                                 } else {
1039                                         phiRegs = append(phiRegs, noRegister)
1040                                 }
1041                         }
1042
1043                         // Second pass - deallocate all in-register phi inputs.
1044                         for i, v := range phis {
1045                                 if !s.values[v.ID].needReg {
1046                                         continue
1047                                 }
1048                                 a := v.Args[idx]
1049                                 r := phiRegs[i]
1050                                 if r == noRegister {
1051                                         continue
1052                                 }
1053                                 if regValLiveSet.contains(a.ID) {
1054                                         // Input value is still live (it is used by something other than Phi).
1055                                         // Try to move it around before kicking out, if there is a free register.
1056                                         // We generate a Copy in the predecessor block and record it. It will be
1057                                         // deleted later if never used.
1058                                         //
1059                                         // Pick a free register. At this point some registers used in the predecessor
1060                                         // block may have been deallocated. Those are the ones used for Phis. Exclude
1061                                         // them (and they are not going to be helpful anyway).
1062                                         m := s.compatRegs(a.Type) &^ s.used &^ phiUsed
1063                                         if m != 0 && !s.values[a.ID].rematerializeable && countRegs(s.values[a.ID].regs) == 1 {
1064                                                 r2 := pickReg(m)
1065                                                 c := p.NewValue1(a.Pos, OpCopy, a.Type, s.regs[r].c)
1066                                                 s.copies[c] = false
1067                                                 if s.f.pass.debug > regDebug {
1068                                                         fmt.Printf("copy %s to %s : %s\n", a, c, &s.registers[r2])
1069                                                 }
1070                                                 s.setOrig(c, a)
1071                                                 s.assignReg(r2, a, c)
1072                                                 s.endRegs[p.ID] = append(s.endRegs[p.ID], endReg{r2, a, c})
1073                                         }
1074                                 }
1075                                 s.freeReg(r)
1076                         }
1077
1078                         // Copy phi ops into new schedule.
1079                         b.Values = append(b.Values, phis...)
1080
1081                         // Third pass - pick registers for phis whose input
1082                         // was not in a register in the primary predecessor.
1083                         for i, v := range phis {
1084                                 if !s.values[v.ID].needReg {
1085                                         continue
1086                                 }
1087                                 if phiRegs[i] != noRegister {
1088                                         continue
1089                                 }
1090                                 m := s.compatRegs(v.Type) &^ phiUsed &^ s.used
1091                                 // If one of the other inputs of v is in a register, and the register is available,
1092                                 // select this register, which can save some unnecessary copies.
1093                                 for i, pe := range b.Preds {
1094                                         if i == idx {
1095                                                 continue
1096                                         }
1097                                         ri := noRegister
1098                                         for _, er := range s.endRegs[pe.b.ID] {
1099                                                 if er.v == s.orig[v.Args[i].ID] {
1100                                                         ri = er.r
1101                                                         break
1102                                                 }
1103                                         }
1104                                         if ri != noRegister && m>>ri&1 != 0 {
1105                                                 m = regMask(1) << ri
1106                                                 break
1107                                         }
1108                                 }
1109                                 if m != 0 {
1110                                         r := pickReg(m)
1111                                         phiRegs[i] = r
1112                                         phiUsed |= regMask(1) << r
1113                                 }
1114                         }
1115
1116                         // Set registers for phis. Add phi spill code.
1117                         for i, v := range phis {
1118                                 if !s.values[v.ID].needReg {
1119                                         continue
1120                                 }
1121                                 r := phiRegs[i]
1122                                 if r == noRegister {
1123                                         // stack-based phi
1124                                         // Spills will be inserted in all the predecessors below.
1125                                         s.values[v.ID].spill = v // v starts life spilled
1126                                         continue
1127                                 }
1128                                 // register-based phi
1129                                 s.assignReg(r, v, v)
1130                         }
1131
1132                         // Deallocate any values which are no longer live. Phis are excluded.
1133                         for r := register(0); r < s.numRegs; r++ {
1134                                 if phiUsed>>r&1 != 0 {
1135                                         continue
1136                                 }
1137                                 v := s.regs[r].v
1138                                 if v != nil && !regValLiveSet.contains(v.ID) {
1139                                         s.freeReg(r)
1140                                 }
1141                         }
1142
1143                         // Save the starting state for use by merge edges.
1144                         // We append to a stack allocated variable that we'll
1145                         // later copy into s.startRegs in one fell swoop, to save
1146                         // on allocations.
1147                         regList := make([]startReg, 0, 32)
1148                         for r := register(0); r < s.numRegs; r++ {
1149                                 v := s.regs[r].v
1150                                 if v == nil {
1151                                         continue
1152                                 }
1153                                 if phiUsed>>r&1 != 0 {
1154                                         // Skip registers that phis used, we'll handle those
1155                                         // specially during merge edge processing.
1156                                         continue
1157                                 }
1158                                 regList = append(regList, startReg{r, v, s.regs[r].c, s.values[v.ID].uses.pos})
1159                         }
1160                         s.startRegs[b.ID] = make([]startReg, len(regList))
1161                         copy(s.startRegs[b.ID], regList)
1162
1163                         if s.f.pass.debug > regDebug {
1164                                 fmt.Printf("after phis\n")
1165                                 for _, x := range s.startRegs[b.ID] {
1166                                         fmt.Printf("  %s: v%d\n", &s.registers[x.r], x.v.ID)
1167                                 }
1168                         }
1169                 }
1170
1171                 // Allocate space to record the desired registers for each value.
1172                 if l := len(oldSched); cap(dinfo) < l {
1173                         dinfo = make([]dentry, l)
1174                 } else {
1175                         dinfo = dinfo[:l]
1176                         for i := range dinfo {
1177                                 dinfo[i] = dentry{}
1178                         }
1179                 }
1180
1181                 // Load static desired register info at the end of the block.
1182                 desired.copy(&s.desired[b.ID])
1183
1184                 // Check actual assigned registers at the start of the next block(s).
1185                 // Dynamically assigned registers will trump the static
1186                 // desired registers computed during liveness analysis.
1187                 // Note that we do this phase after startRegs is set above, so that
1188                 // we get the right behavior for a block which branches to itself.
1189                 for _, e := range b.Succs {
1190                         succ := e.b
1191                         // TODO: prioritize likely successor?
1192                         for _, x := range s.startRegs[succ.ID] {
1193                                 desired.add(x.v.ID, x.r)
1194                         }
1195                         // Process phi ops in succ.
1196                         pidx := e.i
1197                         for _, v := range succ.Values {
1198                                 if v.Op != OpPhi {
1199                                         break
1200                                 }
1201                                 if !s.values[v.ID].needReg {
1202                                         continue
1203                                 }
1204                                 rp, ok := s.f.getHome(v.ID).(*Register)
1205                                 if !ok {
1206                                         // If v is not assigned a register, pick a register assigned to one of v's inputs.
1207                                         // Hopefully v will get assigned that register later.
1208                                         // If the inputs have allocated register information, add it to desired,
1209                                         // which may reduce spill or copy operations when the register is available.
1210                                         for _, a := range v.Args {
1211                                                 rp, ok = s.f.getHome(a.ID).(*Register)
1212                                                 if ok {
1213                                                         break
1214                                                 }
1215                                         }
1216                                         if !ok {
1217                                                 continue
1218                                         }
1219                                 }
1220                                 desired.add(v.Args[pidx].ID, register(rp.num))
1221                         }
1222                 }
1223                 // Walk values backwards computing desired register info.
1224                 // See computeLive for more comments.
1225                 for i := len(oldSched) - 1; i >= 0; i-- {
1226                         v := oldSched[i]
1227                         prefs := desired.remove(v.ID)
1228                         regspec := s.regspec(v)
1229                         desired.clobber(regspec.clobbers)
1230                         for _, j := range regspec.inputs {
1231                                 if countRegs(j.regs) != 1 {
1232                                         continue
1233                                 }
1234                                 desired.clobber(j.regs)
1235                                 desired.add(v.Args[j.idx].ID, pickReg(j.regs))
1236                         }
1237                         if opcodeTable[v.Op].resultInArg0 {
1238                                 if opcodeTable[v.Op].commutative {
1239                                         desired.addList(v.Args[1].ID, prefs)
1240                                 }
1241                                 desired.addList(v.Args[0].ID, prefs)
1242                         }
1243                         // Save desired registers for this value.
1244                         dinfo[i].out = prefs
1245                         for j, a := range v.Args {
1246                                 if j >= len(dinfo[i].in) {
1247                                         break
1248                                 }
1249                                 dinfo[i].in[j] = desired.get(a.ID)
1250                         }
1251                 }
1252
1253                 // Process all the non-phi values.
1254                 for idx, v := range oldSched {
1255                         if s.f.pass.debug > regDebug {
1256                                 fmt.Printf("  processing %s\n", v.LongString())
1257                         }
1258                         regspec := s.regspec(v)
1259                         if v.Op == OpPhi {
1260                                 f.Fatalf("phi %s not at start of block", v)
1261                         }
1262                         if v.Op == OpSP {
1263                                 s.assignReg(s.SPReg, v, v)
1264                                 b.Values = append(b.Values, v)
1265                                 s.advanceUses(v)
1266                                 s.sp = v.ID
1267                                 continue
1268                         }
1269                         if v.Op == OpSB {
1270                                 s.assignReg(s.SBReg, v, v)
1271                                 b.Values = append(b.Values, v)
1272                                 s.advanceUses(v)
1273                                 s.sb = v.ID
1274                                 continue
1275                         }
1276                         if v.Op == OpSelect0 || v.Op == OpSelect1 || v.Op == OpSelectN {
1277                                 if s.values[v.ID].needReg {
1278                                         if v.Op == OpSelectN {
1279                                                 s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocResults)[int(v.AuxInt)].(*Register).num), v, v)
1280                                         } else {
1281                                                 var i = 0
1282                                                 if v.Op == OpSelect1 {
1283                                                         i = 1
1284                                                 }
1285                                                 s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocPair)[i].(*Register).num), v, v)
1286                                         }
1287                                 }
1288                                 b.Values = append(b.Values, v)
1289                                 s.advanceUses(v)
1290                                 goto issueSpill
1291                         }
1292                         if v.Op == OpGetG && s.f.Config.hasGReg {
1293                                 // use hardware g register
1294                                 if s.regs[s.GReg].v != nil {
1295                                         s.freeReg(s.GReg) // kick out the old value
1296                                 }
1297                                 s.assignReg(s.GReg, v, v)
1298                                 b.Values = append(b.Values, v)
1299                                 s.advanceUses(v)
1300                                 goto issueSpill
1301                         }
1302                         if v.Op == OpArg {
1303                                 // Args are "pre-spilled" values. We don't allocate
1304                                 // any register here. We just set up the spill pointer to
1305                                 // point at itself and any later user will restore it to use it.
1306                                 s.values[v.ID].spill = v
1307                                 b.Values = append(b.Values, v)
1308                                 s.advanceUses(v)
1309                                 continue
1310                         }
1311                         if v.Op == OpKeepAlive {
1312                                 // Make sure the argument to v is still live here.
1313                                 s.advanceUses(v)
1314                                 a := v.Args[0]
1315                                 vi := &s.values[a.ID]
1316                                 if vi.regs == 0 && !vi.rematerializeable {
1317                                         // Use the spill location.
1318                                         // This forces later liveness analysis to make the
1319                                         // value live at this point.
1320                                         v.SetArg(0, s.makeSpill(a, b))
1321                                 } else if _, ok := a.Aux.(*ir.Name); ok && vi.rematerializeable {
1322                                         // Rematerializeable value with a gc.Node. This is the address of
1323                                         // a stack object (e.g. an LEAQ). Keep the object live.
1324                                         // Change it to VarLive, which is what plive expects for locals.
1325                                         v.Op = OpVarLive
1326                                         v.SetArgs1(v.Args[1])
1327                                         v.Aux = a.Aux
1328                                 } else {
1329                                         // In-register and rematerializeable values are already live.
1330                                         // These are typically rematerializeable constants like nil,
1331                                         // or values of a variable that were modified since the last call.
1332                                         v.Op = OpCopy
1333                                         v.SetArgs1(v.Args[1])
1334                                 }
1335                                 b.Values = append(b.Values, v)
1336                                 continue
1337                         }
1338                         if len(regspec.inputs) == 0 && len(regspec.outputs) == 0 {
1339                                 // No register allocation required (or none specified yet)
1340                                 if s.doClobber && v.Op.IsCall() {
1341                                         s.clobberRegs(regspec.clobbers)
1342                                 }
1343                                 s.freeRegs(regspec.clobbers)
1344                                 b.Values = append(b.Values, v)
1345                                 s.advanceUses(v)
1346                                 continue
1347                         }
1348
1349                         if s.values[v.ID].rematerializeable {
1350                                 // Value is rematerializeable, don't issue it here.
1351                                 // It will get issued just before each use (see
1352                                 // allocValueToReg).
1353                                 for _, a := range v.Args {
1354                                         a.Uses--
1355                                 }
1356                                 s.advanceUses(v)
1357                                 continue
1358                         }
1359
1360                         if s.f.pass.debug > regDebug {
1361                                 fmt.Printf("value %s\n", v.LongString())
1362                                 fmt.Printf("  out:")
1363                                 for _, r := range dinfo[idx].out {
1364                                         if r != noRegister {
1365                                                 fmt.Printf(" %s", &s.registers[r])
1366                                         }
1367                                 }
1368                                 fmt.Println()
1369                                 for i := 0; i < len(v.Args) && i < 3; i++ {
1370                                         fmt.Printf("  in%d:", i)
1371                                         for _, r := range dinfo[idx].in[i] {
1372                                                 if r != noRegister {
1373                                                         fmt.Printf(" %s", &s.registers[r])
1374                                                 }
1375                                         }
1376                                         fmt.Println()
1377                                 }
1378                         }
1379
1380                         // Move arguments to registers. Process in an ordering defined
1381                         // by the register specification (most constrained first).
1382                         args = append(args[:0], v.Args...)
1383                         for _, i := range regspec.inputs {
1384                                 mask := i.regs
1385                                 if mask&s.values[args[i.idx].ID].regs == 0 {
1386                                         // Need a new register for the input.
1387                                         mask &= s.allocatable
1388                                         mask &^= s.nospill
1389                                         // Used desired register if available.
1390                                         if i.idx < 3 {
1391                                                 for _, r := range dinfo[idx].in[i.idx] {
1392                                                         if r != noRegister && (mask&^s.used)>>r&1 != 0 {
1393                                                                 // Desired register is allowed and unused.
1394                                                                 mask = regMask(1) << r
1395                                                                 break
1396                                                         }
1397                                                 }
1398                                         }
1399                                         // Avoid registers we're saving for other values.
1400                                         if mask&^desired.avoid != 0 {
1401                                                 mask &^= desired.avoid
1402                                         }
1403                                 }
1404                                 args[i.idx] = s.allocValToReg(args[i.idx], mask, true, v.Pos)
1405                         }
1406
1407                         // If the output clobbers the input register, make sure we have
1408                         // at least two copies of the input register so we don't
1409                         // have to reload the value from the spill location.
1410                         if opcodeTable[v.Op].resultInArg0 {
1411                                 var m regMask
1412                                 if !s.liveAfterCurrentInstruction(v.Args[0]) {
1413                                         // arg0 is dead.  We can clobber its register.
1414                                         goto ok
1415                                 }
1416                                 if opcodeTable[v.Op].commutative && !s.liveAfterCurrentInstruction(v.Args[1]) {
1417                                         args[0], args[1] = args[1], args[0]
1418                                         goto ok
1419                                 }
1420                                 if s.values[v.Args[0].ID].rematerializeable {
1421                                         // We can rematerialize the input, don't worry about clobbering it.
1422                                         goto ok
1423                                 }
1424                                 if opcodeTable[v.Op].commutative && s.values[v.Args[1].ID].rematerializeable {
1425                                         args[0], args[1] = args[1], args[0]
1426                                         goto ok
1427                                 }
1428                                 if countRegs(s.values[v.Args[0].ID].regs) >= 2 {
1429                                         // we have at least 2 copies of arg0.  We can afford to clobber one.
1430                                         goto ok
1431                                 }
1432                                 if opcodeTable[v.Op].commutative && countRegs(s.values[v.Args[1].ID].regs) >= 2 {
1433                                         args[0], args[1] = args[1], args[0]
1434                                         goto ok
1435                                 }
1436
1437                                 // We can't overwrite arg0 (or arg1, if commutative).  So we
1438                                 // need to make a copy of an input so we have a register we can modify.
1439
1440                                 // Possible new registers to copy into.
1441                                 m = s.compatRegs(v.Args[0].Type) &^ s.used
1442                                 if m == 0 {
1443                                         // No free registers.  In this case we'll just clobber
1444                                         // an input and future uses of that input must use a restore.
1445                                         // TODO(khr): We should really do this like allocReg does it,
1446                                         // spilling the value with the most distant next use.
1447                                         goto ok
1448                                 }
1449
1450                                 // Try to move an input to the desired output.
1451                                 for _, r := range dinfo[idx].out {
1452                                         if r != noRegister && m>>r&1 != 0 {
1453                                                 m = regMask(1) << r
1454                                                 args[0] = s.allocValToReg(v.Args[0], m, true, v.Pos)
1455                                                 // Note: we update args[0] so the instruction will
1456                                                 // use the register copy we just made.
1457                                                 goto ok
1458                                         }
1459                                 }
1460                                 // Try to copy input to its desired location & use its old
1461                                 // location as the result register.
1462                                 for _, r := range dinfo[idx].in[0] {
1463                                         if r != noRegister && m>>r&1 != 0 {
1464                                                 m = regMask(1) << r
1465                                                 c := s.allocValToReg(v.Args[0], m, true, v.Pos)
1466                                                 s.copies[c] = false
1467                                                 // Note: no update to args[0] so the instruction will
1468                                                 // use the original copy.
1469                                                 goto ok
1470                                         }
1471                                 }
1472                                 if opcodeTable[v.Op].commutative {
1473                                         for _, r := range dinfo[idx].in[1] {
1474                                                 if r != noRegister && m>>r&1 != 0 {
1475                                                         m = regMask(1) << r
1476                                                         c := s.allocValToReg(v.Args[1], m, true, v.Pos)
1477                                                         s.copies[c] = false
1478                                                         args[0], args[1] = args[1], args[0]
1479                                                         goto ok
1480                                                 }
1481                                         }
1482                                 }
1483                                 // Avoid future fixed uses if we can.
1484                                 if m&^desired.avoid != 0 {
1485                                         m &^= desired.avoid
1486                                 }
1487                                 // Save input 0 to a new register so we can clobber it.
1488                                 c := s.allocValToReg(v.Args[0], m, true, v.Pos)
1489                                 s.copies[c] = false
1490                         }
1491
1492                 ok:
1493                         // Now that all args are in regs, we're ready to issue the value itself.
1494                         // Before we pick a register for the output value, allow input registers
1495                         // to be deallocated. We do this here so that the output can use the
1496                         // same register as a dying input.
1497                         if !opcodeTable[v.Op].resultNotInArgs {
1498                                 s.tmpused = s.nospill
1499                                 s.nospill = 0
1500                                 s.advanceUses(v) // frees any registers holding args that are no longer live
1501                         }
1502
1503                         // Dump any registers which will be clobbered
1504                         if s.doClobber && v.Op.IsCall() {
1505                                 // clobber registers that are marked as clobber in regmask, but
1506                                 // don't clobber inputs.
1507                                 s.clobberRegs(regspec.clobbers &^ s.tmpused &^ s.nospill)
1508                         }
1509                         s.freeRegs(regspec.clobbers)
1510                         s.tmpused |= regspec.clobbers
1511
1512                         // Pick registers for outputs.
1513                         {
1514                                 outRegs := noRegisters // TODO if this is costly, hoist and clear incrementally below.
1515                                 maxOutIdx := -1
1516                                 var used regMask
1517                                 for _, out := range regspec.outputs {
1518                                         mask := out.regs & s.allocatable &^ used
1519                                         if mask == 0 {
1520                                                 continue
1521                                         }
1522                                         if opcodeTable[v.Op].resultInArg0 && out.idx == 0 {
1523                                                 if !opcodeTable[v.Op].commutative {
1524                                                         // Output must use the same register as input 0.
1525                                                         r := register(s.f.getHome(args[0].ID).(*Register).num)
1526                                                         mask = regMask(1) << r
1527                                                 } else {
1528                                                         // Output must use the same register as input 0 or 1.
1529                                                         r0 := register(s.f.getHome(args[0].ID).(*Register).num)
1530                                                         r1 := register(s.f.getHome(args[1].ID).(*Register).num)
1531                                                         // Check r0 and r1 for desired output register.
1532                                                         found := false
1533                                                         for _, r := range dinfo[idx].out {
1534                                                                 if (r == r0 || r == r1) && (mask&^s.used)>>r&1 != 0 {
1535                                                                         mask = regMask(1) << r
1536                                                                         found = true
1537                                                                         if r == r1 {
1538                                                                                 args[0], args[1] = args[1], args[0]
1539                                                                         }
1540                                                                         break
1541                                                                 }
1542                                                         }
1543                                                         if !found {
1544                                                                 // Neither are desired, pick r0.
1545                                                                 mask = regMask(1) << r0
1546                                                         }
1547                                                 }
1548                                         }
1549                                         for _, r := range dinfo[idx].out {
1550                                                 if r != noRegister && (mask&^s.used)>>r&1 != 0 {
1551                                                         // Desired register is allowed and unused.
1552                                                         mask = regMask(1) << r
1553                                                         break
1554                                                 }
1555                                         }
1556                                         // Avoid registers we're saving for other values.
1557                                         if mask&^desired.avoid&^s.nospill != 0 {
1558                                                 mask &^= desired.avoid
1559                                         }
1560                                         r := s.allocReg(mask, v)
1561                                         if out.idx > maxOutIdx {
1562                                                 maxOutIdx = out.idx
1563                                         }
1564                                         outRegs[out.idx] = r
1565                                         used |= regMask(1) << r
1566                                         s.tmpused |= regMask(1) << r
1567                                 }
1568                                 // Record register choices
1569                                 if v.Type.IsTuple() {
1570                                         var outLocs LocPair
1571                                         if r := outRegs[0]; r != noRegister {
1572                                                 outLocs[0] = &s.registers[r]
1573                                         }
1574                                         if r := outRegs[1]; r != noRegister {
1575                                                 outLocs[1] = &s.registers[r]
1576                                         }
1577                                         s.f.setHome(v, outLocs)
1578                                         // Note that subsequent SelectX instructions will do the assignReg calls.
1579                                 } else if v.Type.IsResults() {
1580                                         // preallocate outLocs to the right size, which is maxOutIdx+1
1581                                         outLocs := make(LocResults, maxOutIdx+1, maxOutIdx+1)
1582                                         for i := 0; i <= maxOutIdx; i++ {
1583                                                 if r := outRegs[i]; r != noRegister {
1584                                                         outLocs[i] = &s.registers[r]
1585                                                 }
1586                                         }
1587                                         s.f.setHome(v, outLocs)
1588                                 } else {
1589                                         if r := outRegs[0]; r != noRegister {
1590                                                 s.assignReg(r, v, v)
1591                                         }
1592                                 }
1593                         }
1594
1595                         // deallocate dead args, if we have not done so
1596                         if opcodeTable[v.Op].resultNotInArgs {
1597                                 s.nospill = 0
1598                                 s.advanceUses(v) // frees any registers holding args that are no longer live
1599                         }
1600                         s.tmpused = 0
1601
1602                         // Issue the Value itself.
1603                         for i, a := range args {
1604                                 v.SetArg(i, a) // use register version of arguments
1605                         }
1606                         b.Values = append(b.Values, v)
1607
1608                 issueSpill:
1609                 }
1610
1611                 // Copy the control values - we need this so we can reduce the
1612                 // uses property of these values later.
1613                 controls := append(make([]*Value, 0, 2), b.ControlValues()...)
1614
1615                 // Load control values into registers.
1616                 for i, v := range b.ControlValues() {
1617                         if !s.values[v.ID].needReg {
1618                                 continue
1619                         }
1620                         if s.f.pass.debug > regDebug {
1621                                 fmt.Printf("  processing control %s\n", v.LongString())
1622                         }
1623                         // We assume that a control input can be passed in any
1624                         // type-compatible register. If this turns out not to be true,
1625                         // we'll need to introduce a regspec for a block's control value.
1626                         b.ReplaceControl(i, s.allocValToReg(v, s.compatRegs(v.Type), false, b.Pos))
1627                 }
1628
1629                 // Reduce the uses of the control values once registers have been loaded.
1630                 // This loop is equivalent to the advanceUses method.
1631                 for _, v := range controls {
1632                         vi := &s.values[v.ID]
1633                         if !vi.needReg {
1634                                 continue
1635                         }
1636                         // Remove this use from the uses list.
1637                         u := vi.uses
1638                         vi.uses = u.next
1639                         if u.next == nil {
1640                                 s.freeRegs(vi.regs) // value is dead
1641                         }
1642                         u.next = s.freeUseRecords
1643                         s.freeUseRecords = u
1644                 }
1645
1646                 // If we are approaching a merge point and we are the primary
1647                 // predecessor of it, find live values that we use soon after
1648                 // the merge point and promote them to registers now.
1649                 if len(b.Succs) == 1 {
1650                         if s.f.Config.hasGReg && s.regs[s.GReg].v != nil {
1651                                 s.freeReg(s.GReg) // Spill value in G register before any merge.
1652                         }
1653                         // For this to be worthwhile, the loop must have no calls in it.
1654                         top := b.Succs[0].b
1655                         loop := s.loopnest.b2l[top.ID]
1656                         if loop == nil || loop.header != top || loop.containsUnavoidableCall {
1657                                 goto badloop
1658                         }
1659
1660                         // TODO: sort by distance, pick the closest ones?
1661                         for _, live := range s.live[b.ID] {
1662                                 if live.dist >= unlikelyDistance {
1663                                         // Don't preload anything live after the loop.
1664                                         continue
1665                                 }
1666                                 vid := live.ID
1667                                 vi := &s.values[vid]
1668                                 if vi.regs != 0 {
1669                                         continue
1670                                 }
1671                                 if vi.rematerializeable {
1672                                         continue
1673                                 }
1674                                 v := s.orig[vid]
1675                                 m := s.compatRegs(v.Type) &^ s.used
1676                                 // Used desired register if available.
1677                         outerloop:
1678                                 for _, e := range desired.entries {
1679                                         if e.ID != v.ID {
1680                                                 continue
1681                                         }
1682                                         for _, r := range e.regs {
1683                                                 if r != noRegister && m>>r&1 != 0 {
1684                                                         m = regMask(1) << r
1685                                                         break outerloop
1686                                                 }
1687                                         }
1688                                 }
1689                                 if m&^desired.avoid != 0 {
1690                                         m &^= desired.avoid
1691                                 }
1692                                 if m != 0 {
1693                                         s.allocValToReg(v, m, false, b.Pos)
1694                                 }
1695                         }
1696                 }
1697         badloop:
1698                 ;
1699
1700                 // Save end-of-block register state.
1701                 // First count how many, this cuts allocations in half.
1702                 k := 0
1703                 for r := register(0); r < s.numRegs; r++ {
1704                         v := s.regs[r].v
1705                         if v == nil {
1706                                 continue
1707                         }
1708                         k++
1709                 }
1710                 regList := make([]endReg, 0, k)
1711                 for r := register(0); r < s.numRegs; r++ {
1712                         v := s.regs[r].v
1713                         if v == nil {
1714                                 continue
1715                         }
1716                         regList = append(regList, endReg{r, v, s.regs[r].c})
1717                 }
1718                 s.endRegs[b.ID] = regList
1719
1720                 if checkEnabled {
1721                         regValLiveSet.clear()
1722                         for _, x := range s.live[b.ID] {
1723                                 regValLiveSet.add(x.ID)
1724                         }
1725                         for r := register(0); r < s.numRegs; r++ {
1726                                 v := s.regs[r].v
1727                                 if v == nil {
1728                                         continue
1729                                 }
1730                                 if !regValLiveSet.contains(v.ID) {
1731                                         s.f.Fatalf("val %s is in reg but not live at end of %s", v, b)
1732                                 }
1733                         }
1734                 }
1735
1736                 // If a value is live at the end of the block and
1737                 // isn't in a register, generate a use for the spill location.
1738                 // We need to remember this information so that
1739                 // the liveness analysis in stackalloc is correct.
1740                 for _, e := range s.live[b.ID] {
1741                         vi := &s.values[e.ID]
1742                         if vi.regs != 0 {
1743                                 // in a register, we'll use that source for the merge.
1744                                 continue
1745                         }
1746                         if vi.rematerializeable {
1747                                 // we'll rematerialize during the merge.
1748                                 continue
1749                         }
1750                         if s.f.pass.debug > regDebug {
1751                                 fmt.Printf("live-at-end spill for %s at %s\n", s.orig[e.ID], b)
1752                         }
1753                         spill := s.makeSpill(s.orig[e.ID], b)
1754                         s.spillLive[b.ID] = append(s.spillLive[b.ID], spill.ID)
1755                 }
1756
1757                 // Clear any final uses.
1758                 // All that is left should be the pseudo-uses added for values which
1759                 // are live at the end of b.
1760                 for _, e := range s.live[b.ID] {
1761                         u := s.values[e.ID].uses
1762                         if u == nil {
1763                                 f.Fatalf("live at end, no uses v%d", e.ID)
1764                         }
1765                         if u.next != nil {
1766                                 f.Fatalf("live at end, too many uses v%d", e.ID)
1767                         }
1768                         s.values[e.ID].uses = nil
1769                         u.next = s.freeUseRecords
1770                         s.freeUseRecords = u
1771                 }
1772         }
1773
1774         // Decide where the spills we generated will go.
1775         s.placeSpills()
1776
1777         // Anything that didn't get a register gets a stack location here.
1778         // (StoreReg, stack-based phis, inputs, ...)
1779         stacklive := stackalloc(s.f, s.spillLive)
1780
1781         // Fix up all merge edges.
1782         s.shuffle(stacklive)
1783
1784         // Erase any copies we never used.
1785         // Also, an unused copy might be the only use of another copy,
1786         // so continue erasing until we reach a fixed point.
1787         for {
1788                 progress := false
1789                 for c, used := range s.copies {
1790                         if !used && c.Uses == 0 {
1791                                 if s.f.pass.debug > regDebug {
1792                                         fmt.Printf("delete copied value %s\n", c.LongString())
1793                                 }
1794                                 c.RemoveArg(0)
1795                                 f.freeValue(c)
1796                                 delete(s.copies, c)
1797                                 progress = true
1798                         }
1799                 }
1800                 if !progress {
1801                         break
1802                 }
1803         }
1804
1805         for _, b := range s.visitOrder {
1806                 i := 0
1807                 for _, v := range b.Values {
1808                         if v.Op == OpInvalid {
1809                                 continue
1810                         }
1811                         b.Values[i] = v
1812                         i++
1813                 }
1814                 b.Values = b.Values[:i]
1815         }
1816 }
1817
1818 func (s *regAllocState) placeSpills() {
1819         f := s.f
1820
1821         // Precompute some useful info.
1822         phiRegs := make([]regMask, f.NumBlocks())
1823         for _, b := range s.visitOrder {
1824                 var m regMask
1825                 for _, v := range b.Values {
1826                         if v.Op != OpPhi {
1827                                 break
1828                         }
1829                         if r, ok := f.getHome(v.ID).(*Register); ok {
1830                                 m |= regMask(1) << uint(r.num)
1831                         }
1832                 }
1833                 phiRegs[b.ID] = m
1834         }
1835
1836         // Start maps block IDs to the list of spills
1837         // that go at the start of the block (but after any phis).
1838         start := map[ID][]*Value{}
1839         // After maps value IDs to the list of spills
1840         // that go immediately after that value ID.
1841         after := map[ID][]*Value{}
1842
1843         for i := range s.values {
1844                 vi := s.values[i]
1845                 spill := vi.spill
1846                 if spill == nil {
1847                         continue
1848                 }
1849                 if spill.Block != nil {
1850                         // Some spills are already fully set up,
1851                         // like OpArgs and stack-based phis.
1852                         continue
1853                 }
1854                 v := s.orig[i]
1855
1856                 // Walk down the dominator tree looking for a good place to
1857                 // put the spill of v.  At the start "best" is the best place
1858                 // we have found so far.
1859                 // TODO: find a way to make this O(1) without arbitrary cutoffs.
1860                 if v == nil {
1861                         panic(fmt.Errorf("nil v, s.orig[%d], vi = %v, spill = %s", i, vi, spill.LongString()))
1862                 }
1863                 best := v.Block
1864                 bestArg := v
1865                 var bestDepth int16
1866                 if l := s.loopnest.b2l[best.ID]; l != nil {
1867                         bestDepth = l.depth
1868                 }
1869                 b := best
1870                 const maxSpillSearch = 100
1871                 for i := 0; i < maxSpillSearch; i++ {
1872                         // Find the child of b in the dominator tree which
1873                         // dominates all restores.
1874                         p := b
1875                         b = nil
1876                         for c := s.sdom.Child(p); c != nil && i < maxSpillSearch; c, i = s.sdom.Sibling(c), i+1 {
1877                                 if s.sdom[c.ID].entry <= vi.restoreMin && s.sdom[c.ID].exit >= vi.restoreMax {
1878                                         // c also dominates all restores.  Walk down into c.
1879                                         b = c
1880                                         break
1881                                 }
1882                         }
1883                         if b == nil {
1884                                 // Ran out of blocks which dominate all restores.
1885                                 break
1886                         }
1887
1888                         var depth int16
1889                         if l := s.loopnest.b2l[b.ID]; l != nil {
1890                                 depth = l.depth
1891                         }
1892                         if depth > bestDepth {
1893                                 // Don't push the spill into a deeper loop.
1894                                 continue
1895                         }
1896
1897                         // If v is in a register at the start of b, we can
1898                         // place the spill here (after the phis).
1899                         if len(b.Preds) == 1 {
1900                                 for _, e := range s.endRegs[b.Preds[0].b.ID] {
1901                                         if e.v == v {
1902                                                 // Found a better spot for the spill.
1903                                                 best = b
1904                                                 bestArg = e.c
1905                                                 bestDepth = depth
1906                                                 break
1907                                         }
1908                                 }
1909                         } else {
1910                                 for _, e := range s.startRegs[b.ID] {
1911                                         if e.v == v {
1912                                                 // Found a better spot for the spill.
1913                                                 best = b
1914                                                 bestArg = e.c
1915                                                 bestDepth = depth
1916                                                 break
1917                                         }
1918                                 }
1919                         }
1920                 }
1921
1922                 // Put the spill in the best block we found.
1923                 spill.Block = best
1924                 spill.AddArg(bestArg)
1925                 if best == v.Block && v.Op != OpPhi {
1926                         // Place immediately after v.
1927                         after[v.ID] = append(after[v.ID], spill)
1928                 } else {
1929                         // Place at the start of best block.
1930                         start[best.ID] = append(start[best.ID], spill)
1931                 }
1932         }
1933
1934         // Insert spill instructions into the block schedules.
1935         var oldSched []*Value
1936         for _, b := range s.visitOrder {
1937                 nphi := 0
1938                 for _, v := range b.Values {
1939                         if v.Op != OpPhi {
1940                                 break
1941                         }
1942                         nphi++
1943                 }
1944                 oldSched = append(oldSched[:0], b.Values[nphi:]...)
1945                 b.Values = b.Values[:nphi]
1946                 b.Values = append(b.Values, start[b.ID]...)
1947                 for _, v := range oldSched {
1948                         b.Values = append(b.Values, v)
1949                         b.Values = append(b.Values, after[v.ID]...)
1950                 }
1951         }
1952 }
1953
1954 // shuffle fixes up all the merge edges (those going into blocks of indegree > 1).
1955 func (s *regAllocState) shuffle(stacklive [][]ID) {
1956         var e edgeState
1957         e.s = s
1958         e.cache = map[ID][]*Value{}
1959         e.contents = map[Location]contentRecord{}
1960         if s.f.pass.debug > regDebug {
1961                 fmt.Printf("shuffle %s\n", s.f.Name)
1962                 fmt.Println(s.f.String())
1963         }
1964
1965         for _, b := range s.visitOrder {
1966                 if len(b.Preds) <= 1 {
1967                         continue
1968                 }
1969                 e.b = b
1970                 for i, edge := range b.Preds {
1971                         p := edge.b
1972                         e.p = p
1973                         e.setup(i, s.endRegs[p.ID], s.startRegs[b.ID], stacklive[p.ID])
1974                         e.process()
1975                 }
1976         }
1977
1978         if s.f.pass.debug > regDebug {
1979                 fmt.Printf("post shuffle %s\n", s.f.Name)
1980                 fmt.Println(s.f.String())
1981         }
1982 }
1983
1984 type edgeState struct {
1985         s    *regAllocState
1986         p, b *Block // edge goes from p->b.
1987
1988         // for each pre-regalloc value, a list of equivalent cached values
1989         cache      map[ID][]*Value
1990         cachedVals []ID // (superset of) keys of the above map, for deterministic iteration
1991
1992         // map from location to the value it contains
1993         contents map[Location]contentRecord
1994
1995         // desired destination locations
1996         destinations []dstRecord
1997         extra        []dstRecord
1998
1999         usedRegs              regMask // registers currently holding something
2000         uniqueRegs            regMask // registers holding the only copy of a value
2001         finalRegs             regMask // registers holding final target
2002         rematerializeableRegs regMask // registers that hold rematerializeable values
2003 }
2004
2005 type contentRecord struct {
2006         vid   ID       // pre-regalloc value
2007         c     *Value   // cached value
2008         final bool     // this is a satisfied destination
2009         pos   src.XPos // source position of use of the value
2010 }
2011
2012 type dstRecord struct {
2013         loc    Location // register or stack slot
2014         vid    ID       // pre-regalloc value it should contain
2015         splice **Value  // place to store reference to the generating instruction
2016         pos    src.XPos // source position of use of this location
2017 }
2018
2019 // setup initializes the edge state for shuffling.
2020 func (e *edgeState) setup(idx int, srcReg []endReg, dstReg []startReg, stacklive []ID) {
2021         if e.s.f.pass.debug > regDebug {
2022                 fmt.Printf("edge %s->%s\n", e.p, e.b)
2023         }
2024
2025         // Clear state.
2026         for _, vid := range e.cachedVals {
2027                 delete(e.cache, vid)
2028         }
2029         e.cachedVals = e.cachedVals[:0]
2030         for k := range e.contents {
2031                 delete(e.contents, k)
2032         }
2033         e.usedRegs = 0
2034         e.uniqueRegs = 0
2035         e.finalRegs = 0
2036         e.rematerializeableRegs = 0
2037
2038         // Live registers can be sources.
2039         for _, x := range srcReg {
2040                 e.set(&e.s.registers[x.r], x.v.ID, x.c, false, src.NoXPos) // don't care the position of the source
2041         }
2042         // So can all of the spill locations.
2043         for _, spillID := range stacklive {
2044                 v := e.s.orig[spillID]
2045                 spill := e.s.values[v.ID].spill
2046                 if !e.s.sdom.IsAncestorEq(spill.Block, e.p) {
2047                         // Spills were placed that only dominate the uses found
2048                         // during the first regalloc pass. The edge fixup code
2049                         // can't use a spill location if the spill doesn't dominate
2050                         // the edge.
2051                         // We are guaranteed that if the spill doesn't dominate this edge,
2052                         // then the value is available in a register (because we called
2053                         // makeSpill for every value not in a register at the start
2054                         // of an edge).
2055                         continue
2056                 }
2057                 e.set(e.s.f.getHome(spillID), v.ID, spill, false, src.NoXPos) // don't care the position of the source
2058         }
2059
2060         // Figure out all the destinations we need.
2061         dsts := e.destinations[:0]
2062         for _, x := range dstReg {
2063                 dsts = append(dsts, dstRecord{&e.s.registers[x.r], x.v.ID, nil, x.pos})
2064         }
2065         // Phis need their args to end up in a specific location.
2066         for _, v := range e.b.Values {
2067                 if v.Op != OpPhi {
2068                         break
2069                 }
2070                 loc := e.s.f.getHome(v.ID)
2071                 if loc == nil {
2072                         continue
2073                 }
2074                 dsts = append(dsts, dstRecord{loc, v.Args[idx].ID, &v.Args[idx], v.Pos})
2075         }
2076         e.destinations = dsts
2077
2078         if e.s.f.pass.debug > regDebug {
2079                 for _, vid := range e.cachedVals {
2080                         a := e.cache[vid]
2081                         for _, c := range a {
2082                                 fmt.Printf("src %s: v%d cache=%s\n", e.s.f.getHome(c.ID), vid, c)
2083                         }
2084                 }
2085                 for _, d := range e.destinations {
2086                         fmt.Printf("dst %s: v%d\n", d.loc, d.vid)
2087                 }
2088         }
2089 }
2090
2091 // process generates code to move all the values to the right destination locations.
2092 func (e *edgeState) process() {
2093         dsts := e.destinations
2094
2095         // Process the destinations until they are all satisfied.
2096         for len(dsts) > 0 {
2097                 i := 0
2098                 for _, d := range dsts {
2099                         if !e.processDest(d.loc, d.vid, d.splice, d.pos) {
2100                                 // Failed - save for next iteration.
2101                                 dsts[i] = d
2102                                 i++
2103                         }
2104                 }
2105                 if i < len(dsts) {
2106                         // Made some progress. Go around again.
2107                         dsts = dsts[:i]
2108
2109                         // Append any extras destinations we generated.
2110                         dsts = append(dsts, e.extra...)
2111                         e.extra = e.extra[:0]
2112                         continue
2113                 }
2114
2115                 // We made no progress. That means that any
2116                 // remaining unsatisfied moves are in simple cycles.
2117                 // For example, A -> B -> C -> D -> A.
2118                 //   A ----> B
2119                 //   ^       |
2120                 //   |       |
2121                 //   |       v
2122                 //   D <---- C
2123
2124                 // To break the cycle, we pick an unused register, say R,
2125                 // and put a copy of B there.
2126                 //   A ----> B
2127                 //   ^       |
2128                 //   |       |
2129                 //   |       v
2130                 //   D <---- C <---- R=copyofB
2131                 // When we resume the outer loop, the A->B move can now proceed,
2132                 // and eventually the whole cycle completes.
2133
2134                 // Copy any cycle location to a temp register. This duplicates
2135                 // one of the cycle entries, allowing the just duplicated value
2136                 // to be overwritten and the cycle to proceed.
2137                 d := dsts[0]
2138                 loc := d.loc
2139                 vid := e.contents[loc].vid
2140                 c := e.contents[loc].c
2141                 r := e.findRegFor(c.Type)
2142                 if e.s.f.pass.debug > regDebug {
2143                         fmt.Printf("breaking cycle with v%d in %s:%s\n", vid, loc, c)
2144                 }
2145                 e.erase(r)
2146                 pos := d.pos.WithNotStmt()
2147                 if _, isReg := loc.(*Register); isReg {
2148                         c = e.p.NewValue1(pos, OpCopy, c.Type, c)
2149                 } else {
2150                         c = e.p.NewValue1(pos, OpLoadReg, c.Type, c)
2151                 }
2152                 e.set(r, vid, c, false, pos)
2153                 if c.Op == OpLoadReg && e.s.isGReg(register(r.(*Register).num)) {
2154                         e.s.f.Fatalf("process.OpLoadReg targeting g: " + c.LongString())
2155                 }
2156         }
2157 }
2158
2159 // processDest generates code to put value vid into location loc. Returns true
2160 // if progress was made.
2161 func (e *edgeState) processDest(loc Location, vid ID, splice **Value, pos src.XPos) bool {
2162         pos = pos.WithNotStmt()
2163         occupant := e.contents[loc]
2164         if occupant.vid == vid {
2165                 // Value is already in the correct place.
2166                 e.contents[loc] = contentRecord{vid, occupant.c, true, pos}
2167                 if splice != nil {
2168                         (*splice).Uses--
2169                         *splice = occupant.c
2170                         occupant.c.Uses++
2171                 }
2172                 // Note: if splice==nil then c will appear dead. This is
2173                 // non-SSA formed code, so be careful after this pass not to run
2174                 // deadcode elimination.
2175                 if _, ok := e.s.copies[occupant.c]; ok {
2176                         // The copy at occupant.c was used to avoid spill.
2177                         e.s.copies[occupant.c] = true
2178                 }
2179                 return true
2180         }
2181
2182         // Check if we're allowed to clobber the destination location.
2183         if len(e.cache[occupant.vid]) == 1 && !e.s.values[occupant.vid].rematerializeable {
2184                 // We can't overwrite the last copy
2185                 // of a value that needs to survive.
2186                 return false
2187         }
2188
2189         // Copy from a source of v, register preferred.
2190         v := e.s.orig[vid]
2191         var c *Value
2192         var src Location
2193         if e.s.f.pass.debug > regDebug {
2194                 fmt.Printf("moving v%d to %s\n", vid, loc)
2195                 fmt.Printf("sources of v%d:", vid)
2196         }
2197         for _, w := range e.cache[vid] {
2198                 h := e.s.f.getHome(w.ID)
2199                 if e.s.f.pass.debug > regDebug {
2200                         fmt.Printf(" %s:%s", h, w)
2201                 }
2202                 _, isreg := h.(*Register)
2203                 if src == nil || isreg {
2204                         c = w
2205                         src = h
2206                 }
2207         }
2208         if e.s.f.pass.debug > regDebug {
2209                 if src != nil {
2210                         fmt.Printf(" [use %s]\n", src)
2211                 } else {
2212                         fmt.Printf(" [no source]\n")
2213                 }
2214         }
2215         _, dstReg := loc.(*Register)
2216
2217         // Pre-clobber destination. This avoids the
2218         // following situation:
2219         //   - v is currently held in R0 and stacktmp0.
2220         //   - We want to copy stacktmp1 to stacktmp0.
2221         //   - We choose R0 as the temporary register.
2222         // During the copy, both R0 and stacktmp0 are
2223         // clobbered, losing both copies of v. Oops!
2224         // Erasing the destination early means R0 will not
2225         // be chosen as the temp register, as it will then
2226         // be the last copy of v.
2227         e.erase(loc)
2228         var x *Value
2229         if c == nil || e.s.values[vid].rematerializeable {
2230                 if !e.s.values[vid].rematerializeable {
2231                         e.s.f.Fatalf("can't find source for %s->%s: %s\n", e.p, e.b, v.LongString())
2232                 }
2233                 if dstReg {
2234                         x = v.copyInto(e.p)
2235                 } else {
2236                         // Rematerialize into stack slot. Need a free
2237                         // register to accomplish this.
2238                         r := e.findRegFor(v.Type)
2239                         e.erase(r)
2240                         x = v.copyIntoWithXPos(e.p, pos)
2241                         e.set(r, vid, x, false, pos)
2242                         // Make sure we spill with the size of the slot, not the
2243                         // size of x (which might be wider due to our dropping
2244                         // of narrowing conversions).
2245                         x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, x)
2246                 }
2247         } else {
2248                 // Emit move from src to dst.
2249                 _, srcReg := src.(*Register)
2250                 if srcReg {
2251                         if dstReg {
2252                                 x = e.p.NewValue1(pos, OpCopy, c.Type, c)
2253                         } else {
2254                                 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, c)
2255                         }
2256                 } else {
2257                         if dstReg {
2258                                 x = e.p.NewValue1(pos, OpLoadReg, c.Type, c)
2259                         } else {
2260                                 // mem->mem. Use temp register.
2261                                 r := e.findRegFor(c.Type)
2262                                 e.erase(r)
2263                                 t := e.p.NewValue1(pos, OpLoadReg, c.Type, c)
2264                                 e.set(r, vid, t, false, pos)
2265                                 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, t)
2266                         }
2267                 }
2268         }
2269         e.set(loc, vid, x, true, pos)
2270         if x.Op == OpLoadReg && e.s.isGReg(register(loc.(*Register).num)) {
2271                 e.s.f.Fatalf("processDest.OpLoadReg targeting g: " + x.LongString())
2272         }
2273         if splice != nil {
2274                 (*splice).Uses--
2275                 *splice = x
2276                 x.Uses++
2277         }
2278         return true
2279 }
2280
2281 // set changes the contents of location loc to hold the given value and its cached representative.
2282 func (e *edgeState) set(loc Location, vid ID, c *Value, final bool, pos src.XPos) {
2283         e.s.f.setHome(c, loc)
2284         e.contents[loc] = contentRecord{vid, c, final, pos}
2285         a := e.cache[vid]
2286         if len(a) == 0 {
2287                 e.cachedVals = append(e.cachedVals, vid)
2288         }
2289         a = append(a, c)
2290         e.cache[vid] = a
2291         if r, ok := loc.(*Register); ok {
2292                 if e.usedRegs&(regMask(1)<<uint(r.num)) != 0 {
2293                         e.s.f.Fatalf("%v is already set (v%d/%v)", r, vid, c)
2294                 }
2295                 e.usedRegs |= regMask(1) << uint(r.num)
2296                 if final {
2297                         e.finalRegs |= regMask(1) << uint(r.num)
2298                 }
2299                 if len(a) == 1 {
2300                         e.uniqueRegs |= regMask(1) << uint(r.num)
2301                 }
2302                 if len(a) == 2 {
2303                         if t, ok := e.s.f.getHome(a[0].ID).(*Register); ok {
2304                                 e.uniqueRegs &^= regMask(1) << uint(t.num)
2305                         }
2306                 }
2307                 if e.s.values[vid].rematerializeable {
2308                         e.rematerializeableRegs |= regMask(1) << uint(r.num)
2309                 }
2310         }
2311         if e.s.f.pass.debug > regDebug {
2312                 fmt.Printf("%s\n", c.LongString())
2313                 fmt.Printf("v%d now available in %s:%s\n", vid, loc, c)
2314         }
2315 }
2316
2317 // erase removes any user of loc.
2318 func (e *edgeState) erase(loc Location) {
2319         cr := e.contents[loc]
2320         if cr.c == nil {
2321                 return
2322         }
2323         vid := cr.vid
2324
2325         if cr.final {
2326                 // Add a destination to move this value back into place.
2327                 // Make sure it gets added to the tail of the destination queue
2328                 // so we make progress on other moves first.
2329                 e.extra = append(e.extra, dstRecord{loc, cr.vid, nil, cr.pos})
2330         }
2331
2332         // Remove c from the list of cached values.
2333         a := e.cache[vid]
2334         for i, c := range a {
2335                 if e.s.f.getHome(c.ID) == loc {
2336                         if e.s.f.pass.debug > regDebug {
2337                                 fmt.Printf("v%d no longer available in %s:%s\n", vid, loc, c)
2338                         }
2339                         a[i], a = a[len(a)-1], a[:len(a)-1]
2340                         break
2341                 }
2342         }
2343         e.cache[vid] = a
2344
2345         // Update register masks.
2346         if r, ok := loc.(*Register); ok {
2347                 e.usedRegs &^= regMask(1) << uint(r.num)
2348                 if cr.final {
2349                         e.finalRegs &^= regMask(1) << uint(r.num)
2350                 }
2351                 e.rematerializeableRegs &^= regMask(1) << uint(r.num)
2352         }
2353         if len(a) == 1 {
2354                 if r, ok := e.s.f.getHome(a[0].ID).(*Register); ok {
2355                         e.uniqueRegs |= regMask(1) << uint(r.num)
2356                 }
2357         }
2358 }
2359
2360 // findRegFor finds a register we can use to make a temp copy of type typ.
2361 func (e *edgeState) findRegFor(typ *types.Type) Location {
2362         // Which registers are possibilities.
2363         types := &e.s.f.Config.Types
2364         m := e.s.compatRegs(typ)
2365
2366         // Pick a register. In priority order:
2367         // 1) an unused register
2368         // 2) a non-unique register not holding a final value
2369         // 3) a non-unique register
2370         // 4) a register holding a rematerializeable value
2371         x := m &^ e.usedRegs
2372         if x != 0 {
2373                 return &e.s.registers[pickReg(x)]
2374         }
2375         x = m &^ e.uniqueRegs &^ e.finalRegs
2376         if x != 0 {
2377                 return &e.s.registers[pickReg(x)]
2378         }
2379         x = m &^ e.uniqueRegs
2380         if x != 0 {
2381                 return &e.s.registers[pickReg(x)]
2382         }
2383         x = m & e.rematerializeableRegs
2384         if x != 0 {
2385                 return &e.s.registers[pickReg(x)]
2386         }
2387
2388         // No register is available.
2389         // Pick a register to spill.
2390         for _, vid := range e.cachedVals {
2391                 a := e.cache[vid]
2392                 for _, c := range a {
2393                         if r, ok := e.s.f.getHome(c.ID).(*Register); ok && m>>uint(r.num)&1 != 0 {
2394                                 if !c.rematerializeable() {
2395                                         x := e.p.NewValue1(c.Pos, OpStoreReg, c.Type, c)
2396                                         // Allocate a temp location to spill a register to.
2397                                         // The type of the slot is immaterial - it will not be live across
2398                                         // any safepoint. Just use a type big enough to hold any register.
2399                                         t := LocalSlot{N: e.s.f.fe.Auto(c.Pos, types.Int64), Type: types.Int64}
2400                                         // TODO: reuse these slots. They'll need to be erased first.
2401                                         e.set(t, vid, x, false, c.Pos)
2402                                         if e.s.f.pass.debug > regDebug {
2403                                                 fmt.Printf("  SPILL %s->%s %s\n", r, t, x.LongString())
2404                                         }
2405                                 }
2406                                 // r will now be overwritten by the caller. At some point
2407                                 // later, the newly saved value will be moved back to its
2408                                 // final destination in processDest.
2409                                 return r
2410                         }
2411                 }
2412         }
2413
2414         fmt.Printf("m:%d unique:%d final:%d rematerializable:%d\n", m, e.uniqueRegs, e.finalRegs, e.rematerializeableRegs)
2415         for _, vid := range e.cachedVals {
2416                 a := e.cache[vid]
2417                 for _, c := range a {
2418                         fmt.Printf("v%d: %s %s\n", vid, c, e.s.f.getHome(c.ID))
2419                 }
2420         }
2421         e.s.f.Fatalf("can't find empty register on edge %s->%s", e.p, e.b)
2422         return nil
2423 }
2424
2425 // rematerializeable reports whether the register allocator should recompute
2426 // a value instead of spilling/restoring it.
2427 func (v *Value) rematerializeable() bool {
2428         if !opcodeTable[v.Op].rematerializeable {
2429                 return false
2430         }
2431         for _, a := range v.Args {
2432                 // SP and SB (generated by OpSP and OpSB) are always available.
2433                 if a.Op != OpSP && a.Op != OpSB {
2434                         return false
2435                 }
2436         }
2437         return true
2438 }
2439
2440 type liveInfo struct {
2441         ID   ID       // ID of value
2442         dist int32    // # of instructions before next use
2443         pos  src.XPos // source position of next use
2444 }
2445
2446 // computeLive computes a map from block ID to a list of value IDs live at the end
2447 // of that block. Together with the value ID is a count of how many instructions
2448 // to the next use of that value. The resulting map is stored in s.live.
2449 // computeLive also computes the desired register information at the end of each block.
2450 // This desired register information is stored in s.desired.
2451 // TODO: this could be quadratic if lots of variables are live across lots of
2452 // basic blocks. Figure out a way to make this function (or, more precisely, the user
2453 // of this function) require only linear size & time.
2454 func (s *regAllocState) computeLive() {
2455         f := s.f
2456         s.live = make([][]liveInfo, f.NumBlocks())
2457         s.desired = make([]desiredState, f.NumBlocks())
2458         var phis []*Value
2459
2460         live := f.newSparseMap(f.NumValues())
2461         defer f.retSparseMap(live)
2462         t := f.newSparseMap(f.NumValues())
2463         defer f.retSparseMap(t)
2464
2465         // Keep track of which value we want in each register.
2466         var desired desiredState
2467
2468         // Instead of iterating over f.Blocks, iterate over their postordering.
2469         // Liveness information flows backward, so starting at the end
2470         // increases the probability that we will stabilize quickly.
2471         // TODO: Do a better job yet. Here's one possibility:
2472         // Calculate the dominator tree and locate all strongly connected components.
2473         // If a value is live in one block of an SCC, it is live in all.
2474         // Walk the dominator tree from end to beginning, just once, treating SCC
2475         // components as single blocks, duplicated calculated liveness information
2476         // out to all of them.
2477         po := f.postorder()
2478         s.loopnest = f.loopnest()
2479         s.loopnest.calculateDepths()
2480         for {
2481                 changed := false
2482
2483                 for _, b := range po {
2484                         // Start with known live values at the end of the block.
2485                         // Add len(b.Values) to adjust from end-of-block distance
2486                         // to beginning-of-block distance.
2487                         live.clear()
2488                         for _, e := range s.live[b.ID] {
2489                                 live.set(e.ID, e.dist+int32(len(b.Values)), e.pos)
2490                         }
2491
2492                         // Mark control values as live
2493                         for _, c := range b.ControlValues() {
2494                                 if s.values[c.ID].needReg {
2495                                         live.set(c.ID, int32(len(b.Values)), b.Pos)
2496                                 }
2497                         }
2498
2499                         // Propagate backwards to the start of the block
2500                         // Assumes Values have been scheduled.
2501                         phis = phis[:0]
2502                         for i := len(b.Values) - 1; i >= 0; i-- {
2503                                 v := b.Values[i]
2504                                 live.remove(v.ID)
2505                                 if v.Op == OpPhi {
2506                                         // save phi ops for later
2507                                         phis = append(phis, v)
2508                                         continue
2509                                 }
2510                                 if opcodeTable[v.Op].call {
2511                                         c := live.contents()
2512                                         for i := range c {
2513                                                 c[i].val += unlikelyDistance
2514                                         }
2515                                 }
2516                                 for _, a := range v.Args {
2517                                         if s.values[a.ID].needReg {
2518                                                 live.set(a.ID, int32(i), v.Pos)
2519                                         }
2520                                 }
2521                         }
2522                         // Propagate desired registers backwards.
2523                         desired.copy(&s.desired[b.ID])
2524                         for i := len(b.Values) - 1; i >= 0; i-- {
2525                                 v := b.Values[i]
2526                                 prefs := desired.remove(v.ID)
2527                                 if v.Op == OpPhi {
2528                                         // TODO: if v is a phi, save desired register for phi inputs.
2529                                         // For now, we just drop it and don't propagate
2530                                         // desired registers back though phi nodes.
2531                                         continue
2532                                 }
2533                                 regspec := s.regspec(v)
2534                                 // Cancel desired registers if they get clobbered.
2535                                 desired.clobber(regspec.clobbers)
2536                                 // Update desired registers if there are any fixed register inputs.
2537                                 for _, j := range regspec.inputs {
2538                                         if countRegs(j.regs) != 1 {
2539                                                 continue
2540                                         }
2541                                         desired.clobber(j.regs)
2542                                         desired.add(v.Args[j.idx].ID, pickReg(j.regs))
2543                                 }
2544                                 // Set desired register of input 0 if this is a 2-operand instruction.
2545                                 if opcodeTable[v.Op].resultInArg0 {
2546                                         if opcodeTable[v.Op].commutative {
2547                                                 desired.addList(v.Args[1].ID, prefs)
2548                                         }
2549                                         desired.addList(v.Args[0].ID, prefs)
2550                                 }
2551                         }
2552
2553                         // For each predecessor of b, expand its list of live-at-end values.
2554                         // invariant: live contains the values live at the start of b (excluding phi inputs)
2555                         for i, e := range b.Preds {
2556                                 p := e.b
2557                                 // Compute additional distance for the edge.
2558                                 // Note: delta must be at least 1 to distinguish the control
2559                                 // value use from the first user in a successor block.
2560                                 delta := int32(normalDistance)
2561                                 if len(p.Succs) == 2 {
2562                                         if p.Succs[0].b == b && p.Likely == BranchLikely ||
2563                                                 p.Succs[1].b == b && p.Likely == BranchUnlikely {
2564                                                 delta = likelyDistance
2565                                         }
2566                                         if p.Succs[0].b == b && p.Likely == BranchUnlikely ||
2567                                                 p.Succs[1].b == b && p.Likely == BranchLikely {
2568                                                 delta = unlikelyDistance
2569                                         }
2570                                 }
2571
2572                                 // Update any desired registers at the end of p.
2573                                 s.desired[p.ID].merge(&desired)
2574
2575                                 // Start t off with the previously known live values at the end of p.
2576                                 t.clear()
2577                                 for _, e := range s.live[p.ID] {
2578                                         t.set(e.ID, e.dist, e.pos)
2579                                 }
2580                                 update := false
2581
2582                                 // Add new live values from scanning this block.
2583                                 for _, e := range live.contents() {
2584                                         d := e.val + delta
2585                                         if !t.contains(e.key) || d < t.get(e.key) {
2586                                                 update = true
2587                                                 t.set(e.key, d, e.aux)
2588                                         }
2589                                 }
2590                                 // Also add the correct arg from the saved phi values.
2591                                 // All phis are at distance delta (we consider them
2592                                 // simultaneously happening at the start of the block).
2593                                 for _, v := range phis {
2594                                         id := v.Args[i].ID
2595                                         if s.values[id].needReg && (!t.contains(id) || delta < t.get(id)) {
2596                                                 update = true
2597                                                 t.set(id, delta, v.Pos)
2598                                         }
2599                                 }
2600
2601                                 if !update {
2602                                         continue
2603                                 }
2604                                 // The live set has changed, update it.
2605                                 l := s.live[p.ID][:0]
2606                                 if cap(l) < t.size() {
2607                                         l = make([]liveInfo, 0, t.size())
2608                                 }
2609                                 for _, e := range t.contents() {
2610                                         l = append(l, liveInfo{e.key, e.val, e.aux})
2611                                 }
2612                                 s.live[p.ID] = l
2613                                 changed = true
2614                         }
2615                 }
2616
2617                 if !changed {
2618                         break
2619                 }
2620         }
2621         if f.pass.debug > regDebug {
2622                 fmt.Println("live values at end of each block")
2623                 for _, b := range f.Blocks {
2624                         fmt.Printf("  %s:", b)
2625                         for _, x := range s.live[b.ID] {
2626                                 fmt.Printf(" v%d(%d)", x.ID, x.dist)
2627                                 for _, e := range s.desired[b.ID].entries {
2628                                         if e.ID != x.ID {
2629                                                 continue
2630                                         }
2631                                         fmt.Printf("[")
2632                                         first := true
2633                                         for _, r := range e.regs {
2634                                                 if r == noRegister {
2635                                                         continue
2636                                                 }
2637                                                 if !first {
2638                                                         fmt.Printf(",")
2639                                                 }
2640                                                 fmt.Print(&s.registers[r])
2641                                                 first = false
2642                                         }
2643                                         fmt.Printf("]")
2644                                 }
2645                         }
2646                         if avoid := s.desired[b.ID].avoid; avoid != 0 {
2647                                 fmt.Printf(" avoid=%v", s.RegMaskString(avoid))
2648                         }
2649                         fmt.Println()
2650                 }
2651         }
2652 }
2653
2654 // A desiredState represents desired register assignments.
2655 type desiredState struct {
2656         // Desired assignments will be small, so we just use a list
2657         // of valueID+registers entries.
2658         entries []desiredStateEntry
2659         // Registers that other values want to be in.  This value will
2660         // contain at least the union of the regs fields of entries, but
2661         // may contain additional entries for values that were once in
2662         // this data structure but are no longer.
2663         avoid regMask
2664 }
2665 type desiredStateEntry struct {
2666         // (pre-regalloc) value
2667         ID ID
2668         // Registers it would like to be in, in priority order.
2669         // Unused slots are filled with noRegister.
2670         regs [4]register
2671 }
2672
2673 func (d *desiredState) clear() {
2674         d.entries = d.entries[:0]
2675         d.avoid = 0
2676 }
2677
2678 // get returns a list of desired registers for value vid.
2679 func (d *desiredState) get(vid ID) [4]register {
2680         for _, e := range d.entries {
2681                 if e.ID == vid {
2682                         return e.regs
2683                 }
2684         }
2685         return [4]register{noRegister, noRegister, noRegister, noRegister}
2686 }
2687
2688 // add records that we'd like value vid to be in register r.
2689 func (d *desiredState) add(vid ID, r register) {
2690         d.avoid |= regMask(1) << r
2691         for i := range d.entries {
2692                 e := &d.entries[i]
2693                 if e.ID != vid {
2694                         continue
2695                 }
2696                 if e.regs[0] == r {
2697                         // Already known and highest priority
2698                         return
2699                 }
2700                 for j := 1; j < len(e.regs); j++ {
2701                         if e.regs[j] == r {
2702                                 // Move from lower priority to top priority
2703                                 copy(e.regs[1:], e.regs[:j])
2704                                 e.regs[0] = r
2705                                 return
2706                         }
2707                 }
2708                 copy(e.regs[1:], e.regs[:])
2709                 e.regs[0] = r
2710                 return
2711         }
2712         d.entries = append(d.entries, desiredStateEntry{vid, [4]register{r, noRegister, noRegister, noRegister}})
2713 }
2714
2715 func (d *desiredState) addList(vid ID, regs [4]register) {
2716         // regs is in priority order, so iterate in reverse order.
2717         for i := len(regs) - 1; i >= 0; i-- {
2718                 r := regs[i]
2719                 if r != noRegister {
2720                         d.add(vid, r)
2721                 }
2722         }
2723 }
2724
2725 // clobber erases any desired registers in the set m.
2726 func (d *desiredState) clobber(m regMask) {
2727         for i := 0; i < len(d.entries); {
2728                 e := &d.entries[i]
2729                 j := 0
2730                 for _, r := range e.regs {
2731                         if r != noRegister && m>>r&1 == 0 {
2732                                 e.regs[j] = r
2733                                 j++
2734                         }
2735                 }
2736                 if j == 0 {
2737                         // No more desired registers for this value.
2738                         d.entries[i] = d.entries[len(d.entries)-1]
2739                         d.entries = d.entries[:len(d.entries)-1]
2740                         continue
2741                 }
2742                 for ; j < len(e.regs); j++ {
2743                         e.regs[j] = noRegister
2744                 }
2745                 i++
2746         }
2747         d.avoid &^= m
2748 }
2749
2750 // copy copies a desired state from another desiredState x.
2751 func (d *desiredState) copy(x *desiredState) {
2752         d.entries = append(d.entries[:0], x.entries...)
2753         d.avoid = x.avoid
2754 }
2755
2756 // remove removes the desired registers for vid and returns them.
2757 func (d *desiredState) remove(vid ID) [4]register {
2758         for i := range d.entries {
2759                 if d.entries[i].ID == vid {
2760                         regs := d.entries[i].regs
2761                         d.entries[i] = d.entries[len(d.entries)-1]
2762                         d.entries = d.entries[:len(d.entries)-1]
2763                         return regs
2764                 }
2765         }
2766         return [4]register{noRegister, noRegister, noRegister, noRegister}
2767 }
2768
2769 // merge merges another desired state x into d.
2770 func (d *desiredState) merge(x *desiredState) {
2771         d.avoid |= x.avoid
2772         // There should only be a few desired registers, so
2773         // linear insert is ok.
2774         for _, e := range x.entries {
2775                 d.addList(e.ID, e.regs)
2776         }
2777 }
2778
2779 func min32(x, y int32) int32 {
2780         if x < y {
2781                 return x
2782         }
2783         return y
2784 }
2785 func max32(x, y int32) int32 {
2786         if x > y {
2787                 return x
2788         }
2789         return y
2790 }