src/cmd/compile/internal/gc/reg.go

   1 // Derived from Inferno utils/6c/reg.c
   2 // http://code.google.com/p/inferno-os/source/browse/utils/6c/reg.c
   3 //
   4 //      Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
   5 //      Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
   6 //      Portions Copyright © 1997-1999 Vita Nuova Limited
   7 //      Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
   8 //      Portions Copyright © 2004,2006 Bruce Ellis
   9 //      Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
  10 //      Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
  11 //      Portions Copyright © 2009 The Go Authors. All rights reserved.
  12 //
  13 // Permission is hereby granted, free of charge, to any person obtaining a copy
  14 // of this software and associated documentation files (the "Software"), to deal
  15 // in the Software without restriction, including without limitation the rights
  16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  17 // copies of the Software, and to permit persons to whom the Software is
  18 // furnished to do so, subject to the following conditions:
  19 //
  20 // The above copyright notice and this permission notice shall be included in
  21 // all copies or substantial portions of the Software.
  22 //
  23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
  26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  29 // THE SOFTWARE.
  30
  31 package gc
  32
  33 import (
  34         "bytes"
  35         "cmd/internal/obj"
  36         "cmd/internal/sys"
  37         "fmt"
  38         "sort"
  39         "strings"
  40 )
  41
  42 // A Var represents a single variable that may be stored in a register.
  43 // That variable may itself correspond to a hardware register,
  44 // to represent the use of registers in the unoptimized instruction stream.
  45 type Var struct {
  46         offset     int64
  47         node       *Node
  48         nextinnode *Var
  49         width      int
  50         id         int // index in vars
  51         name       int8
  52         etype      EType
  53         addr       int8
  54 }
  55
  56 // Bits represents a set of Vars, stored as a bit set of var numbers
  57 // (the index in vars, or equivalently v.id).
  58 type Bits struct {
  59         b [BITS]uint64
  60 }
  61
  62 const (
  63         BITS = 3
  64         NVAR = BITS * 64
  65 )
  66
  67 var (
  68         vars [NVAR]Var // variables under consideration
  69         nvar int       // number of vars
  70
  71         regbits uint64 // bits for hardware registers
  72
  73         zbits   Bits // zero
  74         externs Bits // global variables
  75         params  Bits // function parameters and results
  76         ivar    Bits // function parameters (inputs)
  77         ovar    Bits // function results (outputs)
  78         consts  Bits // constant values
  79         addrs   Bits // variables with address taken
  80 )
  81
  82 // A Reg is a wrapper around a single Prog (one instruction) that holds
  83 // register optimization information while the optimizer runs.
  84 // r->prog is the instruction.
  85 type Reg struct {
  86         set  Bits // regopt variables written by this instruction.
  87         use1 Bits // regopt variables read by prog->from.
  88         use2 Bits // regopt variables read by prog->to.
  89
  90         // refahead/refbehind are the regopt variables whose current
  91         // value may be used in the following/preceding instructions
  92         // up to a CALL (or the value is clobbered).
  93         refbehind Bits
  94         refahead  Bits
  95
  96         // calahead/calbehind are similar, but for variables in
  97         // instructions that are reachable after hitting at least one
  98         // CALL.
  99         calbehind Bits
 100         calahead  Bits
 101
 102         regdiff Bits
 103         act     Bits
 104         regu    uint64 // register used bitmap
 105 }
 106
 107 // A Rgn represents a single regopt variable over a region of code
 108 // where a register could potentially be dedicated to that variable.
 109 // The code encompassed by a Rgn is defined by the flow graph,
 110 // starting at enter, flood-filling forward while varno is refahead
 111 // and backward while varno is refbehind, and following branches.
 112 // A single variable may be represented by multiple disjoint Rgns and
 113 // each Rgn may choose a different register for that variable.
 114 // Registers are allocated to regions greedily in order of descending
 115 // cost.
 116 type Rgn struct {
 117         enter *Flow
 118         cost  int16
 119         varno int16
 120         regno int16
 121 }
 122
 123 // The Plan 9 C compilers used a limit of 600 regions,
 124 // but the yacc-generated parser in y.go has 3100 regions.
 125 // We set MaxRgn large enough to handle that.
 126 // There's not a huge cost to having too many regions:
 127 // the main processing traces the live area for each variable,
 128 // which is limited by the number of variables times the area,
 129 // not the raw region count. If there are many regions, they
 130 // are almost certainly small and easy to trace.
 131 // The only operation that scales with region count is the
 132 // sorting by cost, which uses sort.Sort and is therefore
 133 // guaranteed n log n.
 134 const MaxRgn = 6000
 135
 136 var (
 137         region  []Rgn
 138         nregion int
 139 )
 140
 141 type rcmp []Rgn
 142
 143 func (x rcmp) Len() int {
 144         return len(x)
 145 }
 146
 147 func (x rcmp) Swap(i, j int) {
 148         x[i], x[j] = x[j], x[i]
 149 }
 150
 151 func (x rcmp) Less(i, j int) bool {
 152         p1 := &x[i]
 153         p2 := &x[j]
 154         if p1.cost != p2.cost {
 155                 return int(p2.cost)-int(p1.cost) < 0
 156         }
 157         if p1.varno != p2.varno {
 158                 return int(p2.varno)-int(p1.varno) < 0
 159         }
 160         if p1.enter != p2.enter {
 161                 return int(p2.enter.Id-p1.enter.Id) < 0
 162         }
 163         return false
 164 }
 165
 166 func setaddrs(bit Bits) {
 167         var i int
 168         var n int
 169         var v *Var
 170         var node *Node
 171
 172         for bany(&bit) {
 173                 // convert each bit to a variable
 174                 i = bnum(&bit)
 175
 176                 node = vars[i].node
 177                 n = int(vars[i].name)
 178                 biclr(&bit, uint(i))
 179
 180                 // disable all pieces of that variable
 181                 for i = 0; i < nvar; i++ {
 182                         v = &vars[i]
 183                         if v.node == node && int(v.name) == n {
 184                                 v.addr = 2
 185                         }
 186                 }
 187         }
 188 }
 189
 190 var regnodes [64]*Node
 191
 192 func walkvardef(n *Node, f *Flow, active int) {
 193         var f1 *Flow
 194         var bn int
 195         var v *Var
 196
 197         for f1 = f; f1 != nil; f1 = f1.S1 {
 198                 if f1.Active == int32(active) {
 199                         break
 200                 }
 201                 f1.Active = int32(active)
 202                 if f1.Prog.As == obj.AVARKILL && f1.Prog.To.Node == n {
 203                         break
 204                 }
 205                 for v, _ = n.Opt().(*Var); v != nil; v = v.nextinnode {
 206                         bn = v.id
 207                         biset(&(f1.Data.(*Reg)).act, uint(bn))
 208                 }
 209
 210                 if f1.Prog.As == obj.ACALL {
 211                         break
 212                 }
 213         }
 214
 215         for f2 := f; f2 != f1; f2 = f2.S1 {
 216                 if f2.S2 != nil {
 217                         walkvardef(n, f2.S2, active)
 218                 }
 219         }
 220 }
 221
 222 // add mov b,rn
 223 // just after r
 224 func addmove(r *Flow, bn int, rn int, f int) {
 225         p1 := Ctxt.NewProg()
 226         Clearp(p1)
 227         p1.Pc = 9999
 228
 229         p := r.Prog
 230         p1.Link = p.Link
 231         p.Link = p1
 232         p1.Lineno = p.Lineno
 233
 234         v := &vars[bn]
 235
 236         a := &p1.To
 237         a.Offset = v.offset
 238         a.Etype = uint8(v.etype)
 239         a.Type = obj.TYPE_MEM
 240         a.Name = v.name
 241         a.Node = v.node
 242         a.Sym = Linksym(v.node.Sym)
 243
 244         /* NOTE(rsc): 9g did
 245         if(a->etype == TARRAY)
 246                 a->type = TYPE_ADDR;
 247         else if(a->sym == nil)
 248                 a->type = TYPE_CONST;
 249         */
 250         p1.As = Thearch.Optoas(OAS, Types[uint8(v.etype)])
 251
 252         // TODO(rsc): Remove special case here.
 253         if Thearch.LinkArch.InFamily(sys.MIPS64, sys.ARM, sys.ARM64, sys.PPC64) && v.etype == TBOOL {
 254                 p1.As = Thearch.Optoas(OAS, Types[TUINT8])
 255         }
 256         p1.From.Type = obj.TYPE_REG
 257         p1.From.Reg = int16(rn)
 258         p1.From.Name = obj.NAME_NONE
 259         if f == 0 {
 260                 p1.From = *a
 261                 *a = obj.Addr{}
 262                 a.Type = obj.TYPE_REG
 263                 a.Reg = int16(rn)
 264         }
 265
 266         if Debug['R'] != 0 && Debug['v'] != 0 {
 267                 fmt.Printf("%v ===add=== %v\n", p, p1)
 268         }
 269         Ostats.Nspill++
 270 }
 271
 272 func overlap_reg(o1 int64, w1 int, o2 int64, w2 int) bool {
 273         t1 := o1 + int64(w1)
 274         t2 := o2 + int64(w2)
 275
 276         if t1 <= o2 || t2 <= o1 {
 277                 return false
 278         }
 279
 280         return true
 281 }
 282
 283 func mkvar(f *Flow, a *obj.Addr) Bits {
 284         // mark registers used
 285         if a.Type == obj.TYPE_NONE {
 286                 return zbits
 287         }
 288
 289         r := f.Data.(*Reg)
 290         r.use1.b[0] |= Thearch.Doregbits(int(a.Index)) // TODO: Use RtoB
 291
 292         var n int
 293         switch a.Type {
 294         default:
 295                 regu := Thearch.Doregbits(int(a.Reg)) | Thearch.RtoB(int(a.Reg)) // TODO: Use RtoB
 296                 if regu == 0 {
 297                         return zbits
 298                 }
 299                 bit := zbits
 300                 bit.b[0] = regu
 301                 return bit
 302
 303                 // TODO(rsc): Remove special case here.
 304         case obj.TYPE_ADDR:
 305                 var bit Bits
 306                 if Thearch.LinkArch.InFamily(sys.MIPS64, sys.ARM, sys.ARM64, sys.PPC64) {
 307                         goto memcase
 308                 }
 309                 a.Type = obj.TYPE_MEM
 310                 bit = mkvar(f, a)
 311                 setaddrs(bit)
 312                 a.Type = obj.TYPE_ADDR
 313                 Ostats.Naddr++
 314                 return zbits
 315
 316         memcase:
 317                 fallthrough
 318
 319         case obj.TYPE_MEM:
 320                 if r != nil {
 321                         r.use1.b[0] |= Thearch.RtoB(int(a.Reg))
 322                 }
 323
 324                 /* NOTE: 5g did
 325                 if(r->f.prog->scond & (C_PBIT|C_WBIT))
 326                         r->set.b[0] |= RtoB(a->reg);
 327                 */
 328                 switch a.Name {
 329                 default:
 330                         // Note: This case handles NAME_EXTERN and NAME_STATIC.
 331                         // We treat these as requiring eager writes to memory, due to
 332                         // the possibility of a fault handler looking at them, so there is
 333                         // not much point in registerizing the loads.
 334                         // If we later choose the set of candidate variables from a
 335                         // larger list, these cases could be deprioritized instead of
 336                         // removed entirely.
 337                         return zbits
 338
 339                 case obj.NAME_PARAM,
 340                         obj.NAME_AUTO:
 341                         n = int(a.Name)
 342                 }
 343         }
 344
 345         node, _ := a.Node.(*Node)
 346         if node == nil || node.Op != ONAME || node.Orig == nil {
 347                 return zbits
 348         }
 349         node = node.Orig
 350         if node.Orig != node {
 351                 Fatalf("%v: bad node", Ctxt.Dconv(a))
 352         }
 353         if node.Sym == nil || node.Sym.Name[0] == '.' {
 354                 return zbits
 355         }
 356         et := EType(a.Etype)
 357         o := a.Offset
 358         w := a.Width
 359         if w < 0 {
 360                 Fatalf("bad width %d for %v", w, Ctxt.Dconv(a))
 361         }
 362
 363         flag := 0
 364         var v *Var
 365         for i := 0; i < nvar; i++ {
 366                 v = &vars[i]
 367                 if v.node == node && int(v.name) == n {
 368                         if v.offset == o {
 369                                 if v.etype == et {
 370                                         if int64(v.width) == w {
 371                                                 // TODO(rsc): Remove special case for arm here.
 372                                                 if flag == 0 || Thearch.LinkArch.Family != sys.ARM {
 373                                                         return blsh(uint(i))
 374                                                 }
 375                                         }
 376                                 }
 377                         }
 378
 379                         // if they overlap, disable both
 380                         if overlap_reg(v.offset, v.width, o, int(w)) {
 381                                 //                              print("disable overlap %s %d %d %d %d, %E != %E\n", s->name, v->offset, v->width, o, w, v->etype, et);
 382                                 v.addr = 1
 383
 384                                 flag = 1
 385                         }
 386                 }
 387         }
 388
 389         switch et {
 390         case 0, TFUNC:
 391                 return zbits
 392         }
 393
 394         if nvar >= NVAR {
 395                 if Debug['w'] > 1 && node != nil {
 396                         Fatalf("variable not optimized: %v", Nconv(node, FmtSharp))
 397                 }
 398                 if Debug['v'] > 0 {
 399                         Warn("variable not optimized: %v", Nconv(node, FmtSharp))
 400                 }
 401
 402                 // If we're not tracking a word in a variable, mark the rest as
 403                 // having its address taken, so that we keep the whole thing
 404                 // live at all calls. otherwise we might optimize away part of
 405                 // a variable but not all of it.
 406                 var v *Var
 407                 for i := 0; i < nvar; i++ {
 408                         v = &vars[i]
 409                         if v.node == node {
 410                                 v.addr = 1
 411                         }
 412                 }
 413
 414                 return zbits
 415         }
 416
 417         i := nvar
 418         nvar++
 419         v = &vars[i]
 420         v.id = i
 421         v.offset = o
 422         v.name = int8(n)
 423         v.etype = et
 424         v.width = int(w)
 425         v.addr = int8(flag) // funny punning
 426         v.node = node
 427
 428         // node->opt is the head of a linked list
 429         // of Vars within the given Node, so that
 430         // we can start at a Var and find all the other
 431         // Vars in the same Go variable.
 432         v.nextinnode, _ = node.Opt().(*Var)
 433
 434         node.SetOpt(v)
 435
 436         bit := blsh(uint(i))
 437         if n == obj.NAME_EXTERN || n == obj.NAME_STATIC {
 438                 for z := 0; z < BITS; z++ {
 439                         externs.b[z] |= bit.b[z]
 440                 }
 441         }
 442         if n == obj.NAME_PARAM {
 443                 for z := 0; z < BITS; z++ {
 444                         params.b[z] |= bit.b[z]
 445                 }
 446         }
 447
 448         if node.Class == PPARAM {
 449                 for z := 0; z < BITS; z++ {
 450                         ivar.b[z] |= bit.b[z]
 451                 }
 452         }
 453         if node.Class == PPARAMOUT {
 454                 for z := 0; z < BITS; z++ {
 455                         ovar.b[z] |= bit.b[z]
 456                 }
 457         }
 458
 459         // Treat values with their address taken as live at calls,
 460         // because the garbage collector's liveness analysis in plive.go does.
 461         // These must be consistent or else we will elide stores and the garbage
 462         // collector will see uninitialized data.
 463         // The typical case where our own analysis is out of sync is when the
 464         // node appears to have its address taken but that code doesn't actually
 465         // get generated and therefore doesn't show up as an address being
 466         // taken when we analyze the instruction stream.
 467         // One instance of this case is when a closure uses the same name as
 468         // an outer variable for one of its own variables declared with :=.
 469         // The parser flags the outer variable as possibly shared, and therefore
 470         // sets addrtaken, even though it ends up not being actually shared.
 471         // If we were better about _ elision, _ = &x would suffice too.
 472         // The broader := in a closure problem is mentioned in a comment in
 473         // closure.go:/^typecheckclosure and dcl.go:/^oldname.
 474         if node.Addrtaken {
 475                 v.addr = 1
 476         }
 477
 478         // Disable registerization for globals, because:
 479         // (1) we might panic at any time and we want the recovery code
 480         // to see the latest values (issue 1304).
 481         // (2) we don't know what pointers might point at them and we want
 482         // loads via those pointers to see updated values and vice versa (issue 7995).
 483         //
 484         // Disable registerization for results if using defer, because the deferred func
 485         // might recover and return, causing the current values to be used.
 486         if node.Class == PEXTERN || (hasdefer && node.Class == PPARAMOUT) {
 487                 v.addr = 1
 488         }
 489
 490         if Debug['R'] != 0 {
 491                 fmt.Printf("bit=%2d et=%v w=%d+%d %v %v flag=%d\n", i, et, o, w, Nconv(node, FmtSharp), Ctxt.Dconv(a), v.addr)
 492         }
 493         Ostats.Nvar++
 494
 495         return bit
 496 }
 497
 498 var change int
 499
 500 func prop(f *Flow, ref Bits, cal Bits) {
 501         var f1 *Flow
 502         var r1 *Reg
 503         var z int
 504         var i int
 505         var v *Var
 506         var v1 *Var
 507
 508         for f1 = f; f1 != nil; f1 = f1.P1 {
 509                 r1 = f1.Data.(*Reg)
 510                 for z = 0; z < BITS; z++ {
 511                         ref.b[z] |= r1.refahead.b[z]
 512                         if ref.b[z] != r1.refahead.b[z] {
 513                                 r1.refahead.b[z] = ref.b[z]
 514                                 change = 1
 515                         }
 516
 517                         cal.b[z] |= r1.calahead.b[z]
 518                         if cal.b[z] != r1.calahead.b[z] {
 519                                 r1.calahead.b[z] = cal.b[z]
 520                                 change = 1
 521                         }
 522                 }
 523
 524                 switch f1.Prog.As {
 525                 case obj.ACALL:
 526                         if Noreturn(f1.Prog) {
 527                                 break
 528                         }
 529
 530                         // Mark all input variables (ivar) as used, because that's what the
 531                         // liveness bitmaps say. The liveness bitmaps say that so that a
 532                         // panic will not show stale values in the parameter dump.
 533                         // Mark variables with a recent VARDEF (r1->act) as used,
 534                         // so that the optimizer flushes initializations to memory,
 535                         // so that if a garbage collection happens during this CALL,
 536                         // the collector will see initialized memory. Again this is to
 537                         // match what the liveness bitmaps say.
 538                         for z = 0; z < BITS; z++ {
 539                                 cal.b[z] |= ref.b[z] | externs.b[z] | ivar.b[z] | r1.act.b[z]
 540                                 ref.b[z] = 0
 541                         }
 542
 543                         // cal.b is the current approximation of what's live across the call.
 544                         // Every bit in cal.b is a single stack word. For each such word,
 545                         // find all the other tracked stack words in the same Go variable
 546                         // (struct/slice/string/interface) and mark them live too.
 547                         // This is necessary because the liveness analysis for the garbage
 548                         // collector works at variable granularity, not at word granularity.
 549                         // It is fundamental for slice/string/interface: the garbage collector
 550                         // needs the whole value, not just some of the words, in order to
 551                         // interpret the other bits correctly. Specifically, slice needs a consistent
 552                         // ptr and cap, string needs a consistent ptr and len, and interface
 553                         // needs a consistent type word and data word.
 554                         for z = 0; z < BITS; z++ {
 555                                 if cal.b[z] == 0 {
 556                                         continue
 557                                 }
 558                                 for i = 0; i < 64; i++ {
 559                                         if z*64+i >= nvar || (cal.b[z]>>uint(i))&1 == 0 {
 560                                                 continue
 561                                         }
 562                                         v = &vars[z*64+i]
 563                                         if v.node.Opt() == nil { // v represents fixed register, not Go variable
 564                                                 continue
 565                                         }
 566
 567                                         // v->node->opt is the head of a linked list of Vars
 568                                         // corresponding to tracked words from the Go variable v->node.
 569                                         // Walk the list and set all the bits.
 570                                         // For a large struct this could end up being quadratic:
 571                                         // after the first setting, the outer loop (for z, i) would see a 1 bit
 572                                         // for all of the remaining words in the struct, and for each such
 573                                         // word would go through and turn on all the bits again.
 574                                         // To avoid the quadratic behavior, we only turn on the bits if
 575                                         // v is the head of the list or if the head's bit is not yet turned on.
 576                                         // This will set the bits at most twice, keeping the overall loop linear.
 577                                         v1, _ = v.node.Opt().(*Var)
 578
 579                                         if v == v1 || !btest(&cal, uint(v1.id)) {
 580                                                 for ; v1 != nil; v1 = v1.nextinnode {
 581                                                         biset(&cal, uint(v1.id))
 582                                                 }
 583                                         }
 584                                 }
 585                         }
 586
 587                 case obj.ATEXT:
 588                         for z = 0; z < BITS; z++ {
 589                                 cal.b[z] = 0
 590                                 ref.b[z] = 0
 591                         }
 592
 593                 case obj.ARET:
 594                         for z = 0; z < BITS; z++ {
 595                                 cal.b[z] = externs.b[z] | ovar.b[z]
 596                                 ref.b[z] = 0
 597                         }
 598                 }
 599
 600                 for z = 0; z < BITS; z++ {
 601                         ref.b[z] = ref.b[z]&^r1.set.b[z] | r1.use1.b[z] | r1.use2.b[z]
 602                         cal.b[z] &^= (r1.set.b[z] | r1.use1.b[z] | r1.use2.b[z])
 603                         r1.refbehind.b[z] = ref.b[z]
 604                         r1.calbehind.b[z] = cal.b[z]
 605                 }
 606
 607                 if f1.Active != 0 {
 608                         break
 609                 }
 610                 f1.Active = 1
 611         }
 612
 613         var r *Reg
 614         var f2 *Flow
 615         for ; f != f1; f = f.P1 {
 616                 r = f.Data.(*Reg)
 617                 for f2 = f.P2; f2 != nil; f2 = f2.P2link {
 618                         prop(f2, r.refbehind, r.calbehind)
 619                 }
 620         }
 621 }
 622
 623 func synch(f *Flow, dif Bits) {
 624         var r1 *Reg
 625         var z int
 626
 627         for f1 := f; f1 != nil; f1 = f1.S1 {
 628                 r1 = f1.Data.(*Reg)
 629                 for z = 0; z < BITS; z++ {
 630                         dif.b[z] = dif.b[z]&^(^r1.refbehind.b[z]&r1.refahead.b[z]) | r1.set.b[z] | r1.regdiff.b[z]
 631                         if dif.b[z] != r1.regdiff.b[z] {
 632                                 r1.regdiff.b[z] = dif.b[z]
 633                                 change = 1
 634                         }
 635                 }
 636
 637                 if f1.Active != 0 {
 638                         break
 639                 }
 640                 f1.Active = 1
 641                 for z = 0; z < BITS; z++ {
 642                         dif.b[z] &^= (^r1.calbehind.b[z] & r1.calahead.b[z])
 643                 }
 644                 if f1.S2 != nil {
 645                         synch(f1.S2, dif)
 646                 }
 647         }
 648 }
 649
 650 func allreg(b uint64, r *Rgn) uint64 {
 651         v := &vars[r.varno]
 652         r.regno = 0
 653         switch v.etype {
 654         default:
 655                 Fatalf("unknown etype %d/%v", Bitno(b), v.etype)
 656
 657         case TINT8,
 658                 TUINT8,
 659                 TINT16,
 660                 TUINT16,
 661                 TINT32,
 662                 TUINT32,
 663                 TINT64,
 664                 TUINT64,
 665                 TINT,
 666                 TUINT,
 667                 TUINTPTR,
 668                 TBOOL,
 669                 TPTR32,
 670                 TPTR64:
 671                 i := Thearch.BtoR(^b)
 672                 if i != 0 && r.cost > 0 {
 673                         r.regno = int16(i)
 674                         return Thearch.RtoB(i)
 675                 }
 676
 677         case TFLOAT32, TFLOAT64:
 678                 i := Thearch.BtoF(^b)
 679                 if i != 0 && r.cost > 0 {
 680                         r.regno = int16(i)
 681                         return Thearch.FtoB(i)
 682                 }
 683         }
 684
 685         return 0
 686 }
 687
 688 func LOAD(r *Reg, z int) uint64 {
 689         return ^r.refbehind.b[z] & r.refahead.b[z]
 690 }
 691
 692 func STORE(r *Reg, z int) uint64 {
 693         return ^r.calbehind.b[z] & r.calahead.b[z]
 694 }
 695
 696 // Cost parameters
 697 const (
 698         CLOAD = 5 // cost of load
 699         CREF  = 5 // cost of reference if not registerized
 700         LOOP  = 3 // loop execution count (applied in popt.go)
 701 )
 702
 703 func paint1(f *Flow, bn int) {
 704         z := bn / 64
 705         bb := uint64(1 << uint(bn%64))
 706         r := f.Data.(*Reg)
 707         if r.act.b[z]&bb != 0 {
 708                 return
 709         }
 710         var f1 *Flow
 711         var r1 *Reg
 712         for {
 713                 if r.refbehind.b[z]&bb == 0 {
 714                         break
 715                 }
 716                 f1 = f.P1
 717                 if f1 == nil {
 718                         break
 719                 }
 720                 r1 = f1.Data.(*Reg)
 721                 if r1.refahead.b[z]&bb == 0 {
 722                         break
 723                 }
 724                 if r1.act.b[z]&bb != 0 {
 725                         break
 726                 }
 727                 f = f1
 728                 r = r1
 729         }
 730
 731         if LOAD(r, z)&^(r.set.b[z]&^(r.use1.b[z]|r.use2.b[z]))&bb != 0 {
 732                 change -= CLOAD * int(f.Loop)
 733         }
 734
 735         for {
 736                 r.act.b[z] |= bb
 737
 738                 if f.Prog.As != obj.ANOP { // don't give credit for NOPs
 739                         if r.use1.b[z]&bb != 0 {
 740                                 change += CREF * int(f.Loop)
 741                         }
 742                         if (r.use2.b[z]|r.set.b[z])&bb != 0 {
 743                                 change += CREF * int(f.Loop)
 744                         }
 745                 }
 746
 747                 if STORE(r, z)&r.regdiff.b[z]&bb != 0 {
 748                         change -= CLOAD * int(f.Loop)
 749                 }
 750
 751                 if r.refbehind.b[z]&bb != 0 {
 752                         for f1 = f.P2; f1 != nil; f1 = f1.P2link {
 753                                 if (f1.Data.(*Reg)).refahead.b[z]&bb != 0 {
 754                                         paint1(f1, bn)
 755                                 }
 756                         }
 757                 }
 758
 759                 if r.refahead.b[z]&bb == 0 {
 760                         break
 761                 }
 762                 f1 = f.S2
 763                 if f1 != nil {
 764                         if (f1.Data.(*Reg)).refbehind.b[z]&bb != 0 {
 765                                 paint1(f1, bn)
 766                         }
 767                 }
 768                 f = f.S1
 769                 if f == nil {
 770                         break
 771                 }
 772                 r = f.Data.(*Reg)
 773                 if r.act.b[z]&bb != 0 {
 774                         break
 775                 }
 776                 if r.refbehind.b[z]&bb == 0 {
 777                         break
 778                 }
 779         }
 780 }
 781
 782 func paint2(f *Flow, bn int, depth int) uint64 {
 783         z := bn / 64
 784         bb := uint64(1 << uint(bn%64))
 785         vreg := regbits
 786         r := f.Data.(*Reg)
 787         if r.act.b[z]&bb == 0 {
 788                 return vreg
 789         }
 790         var r1 *Reg
 791         var f1 *Flow
 792         for {
 793                 if r.refbehind.b[z]&bb == 0 {
 794                         break
 795                 }
 796                 f1 = f.P1
 797                 if f1 == nil {
 798                         break
 799                 }
 800                 r1 = f1.Data.(*Reg)
 801                 if r1.refahead.b[z]&bb == 0 {
 802                         break
 803                 }
 804                 if r1.act.b[z]&bb == 0 {
 805                         break
 806                 }
 807                 f = f1
 808                 r = r1
 809         }
 810
 811         for {
 812                 if Debug['R'] != 0 && Debug['v'] != 0 {
 813                         fmt.Printf("  paint2 %d %v\n", depth, f.Prog)
 814                 }
 815
 816                 r.act.b[z] &^= bb
 817
 818                 vreg |= r.regu
 819
 820                 if r.refbehind.b[z]&bb != 0 {
 821                         for f1 = f.P2; f1 != nil; f1 = f1.P2link {
 822                                 if (f1.Data.(*Reg)).refahead.b[z]&bb != 0 {
 823                                         vreg |= paint2(f1, bn, depth+1)
 824                                 }
 825                         }
 826                 }
 827
 828                 if r.refahead.b[z]&bb == 0 {
 829                         break
 830                 }
 831                 f1 = f.S2
 832                 if f1 != nil {
 833                         if (f1.Data.(*Reg)).refbehind.b[z]&bb != 0 {
 834                                 vreg |= paint2(f1, bn, depth+1)
 835                         }
 836                 }
 837                 f = f.S1
 838                 if f == nil {
 839                         break
 840                 }
 841                 r = f.Data.(*Reg)
 842                 if r.act.b[z]&bb == 0 {
 843                         break
 844                 }
 845                 if r.refbehind.b[z]&bb == 0 {
 846                         break
 847                 }
 848         }
 849
 850         return vreg
 851 }
 852
 853 func paint3(f *Flow, bn int, rb uint64, rn int) {
 854         z := bn / 64
 855         bb := uint64(1 << uint(bn%64))
 856         r := f.Data.(*Reg)
 857         if r.act.b[z]&bb != 0 {
 858                 return
 859         }
 860         var r1 *Reg
 861         var f1 *Flow
 862         for {
 863                 if r.refbehind.b[z]&bb == 0 {
 864                         break
 865                 }
 866                 f1 = f.P1
 867                 if f1 == nil {
 868                         break
 869                 }
 870                 r1 = f1.Data.(*Reg)
 871                 if r1.refahead.b[z]&bb == 0 {
 872                         break
 873                 }
 874                 if r1.act.b[z]&bb != 0 {
 875                         break
 876                 }
 877                 f = f1
 878                 r = r1
 879         }
 880
 881         if LOAD(r, z)&^(r.set.b[z]&^(r.use1.b[z]|r.use2.b[z]))&bb != 0 {
 882                 addmove(f, bn, rn, 0)
 883         }
 884         var p *obj.Prog
 885         for {
 886                 r.act.b[z] |= bb
 887                 p = f.Prog
 888
 889                 if r.use1.b[z]&bb != 0 {
 890                         if Debug['R'] != 0 && Debug['v'] != 0 {
 891                                 fmt.Printf("%v", p)
 892                         }
 893                         addreg(&p.From, rn)
 894                         if Debug['R'] != 0 && Debug['v'] != 0 {
 895                                 fmt.Printf(" ===change== %v\n", p)
 896                         }
 897                 }
 898
 899                 if (r.use2.b[z]|r.set.b[z])&bb != 0 {
 900                         if Debug['R'] != 0 && Debug['v'] != 0 {
 901                                 fmt.Printf("%v", p)
 902                         }
 903                         addreg(&p.To, rn)
 904                         if Debug['R'] != 0 && Debug['v'] != 0 {
 905                                 fmt.Printf(" ===change== %v\n", p)
 906                         }
 907                 }
 908
 909                 if STORE(r, z)&r.regdiff.b[z]&bb != 0 {
 910                         addmove(f, bn, rn, 1)
 911                 }
 912                 r.regu |= rb
 913
 914                 if r.refbehind.b[z]&bb != 0 {
 915                         for f1 = f.P2; f1 != nil; f1 = f1.P2link {
 916                                 if (f1.Data.(*Reg)).refahead.b[z]&bb != 0 {
 917                                         paint3(f1, bn, rb, rn)
 918                                 }
 919                         }
 920                 }
 921
 922                 if r.refahead.b[z]&bb == 0 {
 923                         break
 924                 }
 925                 f1 = f.S2
 926                 if f1 != nil {
 927                         if (f1.Data.(*Reg)).refbehind.b[z]&bb != 0 {
 928                                 paint3(f1, bn, rb, rn)
 929                         }
 930                 }
 931                 f = f.S1
 932                 if f == nil {
 933                         break
 934                 }
 935                 r = f.Data.(*Reg)
 936                 if r.act.b[z]&bb != 0 {
 937                         break
 938                 }
 939                 if r.refbehind.b[z]&bb == 0 {
 940                         break
 941                 }
 942         }
 943 }
 944
 945 func addreg(a *obj.Addr, rn int) {
 946         a.Sym = nil
 947         a.Node = nil
 948         a.Offset = 0
 949         a.Type = obj.TYPE_REG
 950         a.Reg = int16(rn)
 951         a.Name = 0
 952
 953         Ostats.Ncvtreg++
 954 }
 955
 956 func dumpone(f *Flow, isreg int) {
 957         fmt.Printf("%d:%v", f.Loop, f.Prog)
 958         if isreg != 0 {
 959                 r := f.Data.(*Reg)
 960                 var bit Bits
 961                 for z := 0; z < BITS; z++ {
 962                         bit.b[z] = r.set.b[z] | r.use1.b[z] | r.use2.b[z] | r.refbehind.b[z] | r.refahead.b[z] | r.calbehind.b[z] | r.calahead.b[z] | r.regdiff.b[z] | r.act.b[z] | 0
 963                 }
 964                 if bany(&bit) {
 965                         fmt.Printf("\t")
 966                         if bany(&r.set) {
 967                                 fmt.Printf(" s:%v", &r.set)
 968                         }
 969                         if bany(&r.use1) {
 970                                 fmt.Printf(" u1:%v", &r.use1)
 971                         }
 972                         if bany(&r.use2) {
 973                                 fmt.Printf(" u2:%v", &r.use2)
 974                         }
 975                         if bany(&r.refbehind) {
 976                                 fmt.Printf(" rb:%v ", &r.refbehind)
 977                         }
 978                         if bany(&r.refahead) {
 979                                 fmt.Printf(" ra:%v ", &r.refahead)
 980                         }
 981                         if bany(&r.calbehind) {
 982                                 fmt.Printf(" cb:%v ", &r.calbehind)
 983                         }
 984                         if bany(&r.calahead) {
 985                                 fmt.Printf(" ca:%v ", &r.calahead)
 986                         }
 987                         if bany(&r.regdiff) {
 988                                 fmt.Printf(" d:%v ", &r.regdiff)
 989                         }
 990                         if bany(&r.act) {
 991                                 fmt.Printf(" a:%v ", &r.act)
 992                         }
 993                 }
 994         }
 995
 996         fmt.Printf("\n")
 997 }
 998
 999 func Dumpit(str string, r0 *Flow, isreg int) {
1000         var r1 *Flow
1001
1002         fmt.Printf("\n%s\n", str)
1003         for r := r0; r != nil; r = r.Link {
1004                 dumpone(r, isreg)
1005                 r1 = r.P2
1006                 if r1 != nil {
1007                         fmt.Printf("\tpred:")
1008                         for ; r1 != nil; r1 = r1.P2link {
1009                                 fmt.Printf(" %.4d", uint(int(r1.Prog.Pc)))
1010                         }
1011                         if r.P1 != nil {
1012                                 fmt.Printf(" (and %.4d)", uint(int(r.P1.Prog.Pc)))
1013                         } else {
1014                                 fmt.Printf(" (only)")
1015                         }
1016                         fmt.Printf("\n")
1017                 }
1018
1019                 // Print successors if it's not just the next one
1020                 if r.S1 != r.Link || r.S2 != nil {
1021                         fmt.Printf("\tsucc:")
1022                         if r.S1 != nil {
1023                                 fmt.Printf(" %.4d", uint(int(r.S1.Prog.Pc)))
1024                         }
1025                         if r.S2 != nil {
1026                                 fmt.Printf(" %.4d", uint(int(r.S2.Prog.Pc)))
1027                         }
1028                         fmt.Printf("\n")
1029                 }
1030         }
1031 }
1032
1033 func regopt(firstp *obj.Prog) {
1034         mergetemp(firstp)
1035
1036         // control flow is more complicated in generated go code
1037         // than in generated c code.  define pseudo-variables for
1038         // registers, so we have complete register usage information.
1039         var nreg int
1040         regnames := Thearch.Regnames(&nreg)
1041
1042         nvar = nreg
1043         for i := 0; i < nreg; i++ {
1044                 vars[i] = Var{}
1045         }
1046         for i := 0; i < nreg; i++ {
1047                 if regnodes[i] == nil {
1048                         regnodes[i] = newname(Lookup(regnames[i]))
1049                 }
1050                 vars[i].node = regnodes[i]
1051         }
1052
1053         regbits = Thearch.Excludedregs()
1054         externs = zbits
1055         params = zbits
1056         consts = zbits
1057         addrs = zbits
1058         ivar = zbits
1059         ovar = zbits
1060
1061         // pass 1
1062         // build aux data structure
1063         // allocate pcs
1064         // find use and set of variables
1065         g := Flowstart(firstp, func() interface{} { return new(Reg) })
1066         if g == nil {
1067                 for i := 0; i < nvar; i++ {
1068                         vars[i].node.SetOpt(nil)
1069                 }
1070                 return
1071         }
1072
1073         firstf := g.Start
1074
1075         for f := firstf; f != nil; f = f.Link {
1076                 p := f.Prog
1077                 // AVARLIVE must be considered a use, do not skip it.
1078                 // Otherwise the variable will be optimized away,
1079                 // and the whole point of AVARLIVE is to keep it on the stack.
1080                 if p.As == obj.AVARDEF || p.As == obj.AVARKILL {
1081                         continue
1082                 }
1083
1084                 // Avoid making variables for direct-called functions.
1085                 if p.As == obj.ACALL && p.To.Type == obj.TYPE_MEM && p.To.Name == obj.NAME_EXTERN {
1086                         continue
1087                 }
1088
1089                 // from vs to doesn't matter for registers.
1090                 r := f.Data.(*Reg)
1091                 r.use1.b[0] |= p.Info.Reguse | p.Info.Regindex
1092                 r.set.b[0] |= p.Info.Regset
1093
1094                 bit := mkvar(f, &p.From)
1095                 if bany(&bit) {
1096                         if p.Info.Flags&LeftAddr != 0 {
1097                                 setaddrs(bit)
1098                         }
1099                         if p.Info.Flags&LeftRead != 0 {
1100                                 for z := 0; z < BITS; z++ {
1101                                         r.use1.b[z] |= bit.b[z]
1102                                 }
1103                         }
1104                         if p.Info.Flags&LeftWrite != 0 {
1105                                 for z := 0; z < BITS; z++ {
1106                                         r.set.b[z] |= bit.b[z]
1107                                 }
1108                         }
1109                 }
1110
1111                 // Compute used register for reg
1112                 if p.Info.Flags&RegRead != 0 {
1113                         r.use1.b[0] |= Thearch.RtoB(int(p.Reg))
1114                 }
1115
1116                 // Currently we never generate three register forms.
1117                 // If we do, this will need to change.
1118                 if p.From3Type() != obj.TYPE_NONE && p.From3Type() != obj.TYPE_CONST {
1119                         Fatalf("regopt not implemented for from3")
1120                 }
1121
1122                 bit = mkvar(f, &p.To)
1123                 if bany(&bit) {
1124                         if p.Info.Flags&RightAddr != 0 {
1125                                 setaddrs(bit)
1126                         }
1127                         if p.Info.Flags&RightRead != 0 {
1128                                 for z := 0; z < BITS; z++ {
1129                                         r.use2.b[z] |= bit.b[z]
1130                                 }
1131                         }
1132                         if p.Info.Flags&RightWrite != 0 {
1133                                 for z := 0; z < BITS; z++ {
1134                                         r.set.b[z] |= bit.b[z]
1135                                 }
1136                         }
1137                 }
1138         }
1139
1140         for i := 0; i < nvar; i++ {
1141                 v := &vars[i]
1142                 if v.addr != 0 {
1143                         bit := blsh(uint(i))
1144                         for z := 0; z < BITS; z++ {
1145                                 addrs.b[z] |= bit.b[z]
1146                         }
1147                 }
1148
1149                 if Debug['R'] != 0 && Debug['v'] != 0 {
1150                         fmt.Printf("bit=%2d addr=%d et=%v w=%-2d s=%v + %d\n", i, v.addr, v.etype, v.width, v.node, v.offset)
1151                 }
1152         }
1153
1154         if Debug['R'] != 0 && Debug['v'] != 0 {
1155                 Dumpit("pass1", firstf, 1)
1156         }
1157
1158         // pass 2
1159         // find looping structure
1160         flowrpo(g)
1161
1162         if Debug['R'] != 0 && Debug['v'] != 0 {
1163                 Dumpit("pass2", firstf, 1)
1164         }
1165
1166         // pass 2.5
1167         // iterate propagating fat vardef covering forward
1168         // r->act records vars with a VARDEF since the last CALL.
1169         // (r->act will be reused in pass 5 for something else,
1170         // but we'll be done with it by then.)
1171         active := 0
1172
1173         for f := firstf; f != nil; f = f.Link {
1174                 f.Active = 0
1175                 r := f.Data.(*Reg)
1176                 r.act = zbits
1177         }
1178
1179         for f := firstf; f != nil; f = f.Link {
1180                 p := f.Prog
1181                 if p.As == obj.AVARDEF && Isfat(((p.To.Node).(*Node)).Type) && ((p.To.Node).(*Node)).Opt() != nil {
1182                         active++
1183                         walkvardef(p.To.Node.(*Node), f, active)
1184                 }
1185         }
1186
1187         // pass 3
1188         // iterate propagating usage
1189         //      back until flow graph is complete
1190         var f1 *Flow
1191         var i int
1192         var f *Flow
1193 loop1:
1194         change = 0
1195
1196         for f = firstf; f != nil; f = f.Link {
1197                 f.Active = 0
1198         }
1199         for f = firstf; f != nil; f = f.Link {
1200                 if f.Prog.As == obj.ARET {
1201                         prop(f, zbits, zbits)
1202                 }
1203         }
1204
1205         // pick up unreachable code
1206 loop11:
1207         i = 0
1208
1209         for f = firstf; f != nil; f = f1 {
1210                 f1 = f.Link
1211                 if f1 != nil && f1.Active != 0 && f.Active == 0 {
1212                         prop(f, zbits, zbits)
1213                         i = 1
1214                 }
1215         }
1216
1217         if i != 0 {
1218                 goto loop11
1219         }
1220         if change != 0 {
1221                 goto loop1
1222         }
1223
1224         if Debug['R'] != 0 && Debug['v'] != 0 {
1225                 Dumpit("pass3", firstf, 1)
1226         }
1227
1228         // pass 4
1229         // iterate propagating register/variable synchrony
1230         //      forward until graph is complete
1231 loop2:
1232         change = 0
1233
1234         for f = firstf; f != nil; f = f.Link {
1235                 f.Active = 0
1236         }
1237         synch(firstf, zbits)
1238         if change != 0 {
1239                 goto loop2
1240         }
1241
1242         if Debug['R'] != 0 && Debug['v'] != 0 {
1243                 Dumpit("pass4", firstf, 1)
1244         }
1245
1246         // pass 4.5
1247         // move register pseudo-variables into regu.
1248         mask := uint64((1 << uint(nreg)) - 1)
1249         for f := firstf; f != nil; f = f.Link {
1250                 r := f.Data.(*Reg)
1251                 r.regu = (r.refbehind.b[0] | r.set.b[0]) & mask
1252                 r.set.b[0] &^= mask
1253                 r.use1.b[0] &^= mask
1254                 r.use2.b[0] &^= mask
1255                 r.refbehind.b[0] &^= mask
1256                 r.refahead.b[0] &^= mask
1257                 r.calbehind.b[0] &^= mask
1258                 r.calahead.b[0] &^= mask
1259                 r.regdiff.b[0] &^= mask
1260                 r.act.b[0] &^= mask
1261         }
1262
1263         if Debug['R'] != 0 && Debug['v'] != 0 {
1264                 Dumpit("pass4.5", firstf, 1)
1265         }
1266
1267         // pass 5
1268         // isolate regions
1269         // calculate costs (paint1)
1270         var bit Bits
1271         if f := firstf; f != nil {
1272                 r := f.Data.(*Reg)
1273                 for z := 0; z < BITS; z++ {
1274                         bit.b[z] = (r.refahead.b[z] | r.calahead.b[z]) &^ (externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z])
1275                 }
1276                 if bany(&bit) && !f.Refset {
1277                         // should never happen - all variables are preset
1278                         if Debug['w'] != 0 {
1279                                 fmt.Printf("%v: used and not set: %v\n", f.Prog.Line(), &bit)
1280                         }
1281                         f.Refset = true
1282                 }
1283         }
1284
1285         for f := firstf; f != nil; f = f.Link {
1286                 (f.Data.(*Reg)).act = zbits
1287         }
1288         nregion = 0
1289         region = region[:0]
1290         for f := firstf; f != nil; f = f.Link {
1291                 r := f.Data.(*Reg)
1292                 for z := 0; z < BITS; z++ {
1293                         bit.b[z] = r.set.b[z] &^ (r.refahead.b[z] | r.calahead.b[z] | addrs.b[z])
1294                 }
1295                 if bany(&bit) && !f.Refset {
1296                         if Debug['w'] != 0 {
1297                                 fmt.Printf("%v: set and not used: %v\n", f.Prog.Line(), &bit)
1298                         }
1299                         f.Refset = true
1300                         Thearch.Excise(f)
1301                 }
1302
1303                 for z := 0; z < BITS; z++ {
1304                         bit.b[z] = LOAD(r, z) &^ (r.act.b[z] | addrs.b[z])
1305                 }
1306                 for bany(&bit) {
1307                         i = bnum(&bit)
1308                         change = 0
1309                         paint1(f, i)
1310                         biclr(&bit, uint(i))
1311                         if change <= 0 {
1312                                 continue
1313                         }
1314                         if nregion >= MaxRgn {
1315                                 nregion++
1316                                 continue
1317                         }
1318
1319                         region = append(region, Rgn{
1320                                 enter: f,
1321                                 cost:  int16(change),
1322                                 varno: int16(i),
1323                         })
1324                         nregion++
1325                 }
1326         }
1327
1328         if false && Debug['v'] != 0 && strings.Contains(Curfn.Func.Nname.Sym.Name, "Parse") {
1329                 Warn("regions: %d\n", nregion)
1330         }
1331         if nregion >= MaxRgn {
1332                 if Debug['v'] != 0 {
1333                         Warn("too many regions: %d\n", nregion)
1334                 }
1335                 nregion = MaxRgn
1336         }
1337
1338         sort.Sort(rcmp(region[:nregion]))
1339
1340         if Debug['R'] != 0 && Debug['v'] != 0 {
1341                 Dumpit("pass5", firstf, 1)
1342         }
1343
1344         // pass 6
1345         // determine used registers (paint2)
1346         // replace code (paint3)
1347         if Debug['R'] != 0 && Debug['v'] != 0 {
1348                 fmt.Printf("\nregisterizing\n")
1349         }
1350         for i := 0; i < nregion; i++ {
1351                 rgp := &region[i]
1352                 if Debug['R'] != 0 && Debug['v'] != 0 {
1353                         fmt.Printf("region %d: cost %d varno %d enter %d\n", i, rgp.cost, rgp.varno, rgp.enter.Prog.Pc)
1354                 }
1355                 bit = blsh(uint(rgp.varno))
1356                 usedreg := paint2(rgp.enter, int(rgp.varno), 0)
1357                 vreg := allreg(usedreg, rgp)
1358                 if rgp.regno != 0 {
1359                         if Debug['R'] != 0 && Debug['v'] != 0 {
1360                                 v := &vars[rgp.varno]
1361                                 fmt.Printf("registerize %v+%d (bit=%2d et=%v) in %v usedreg=%#x vreg=%#x\n", v.node, v.offset, rgp.varno, v.etype, obj.Rconv(int(rgp.regno)), usedreg, vreg)
1362                         }
1363
1364                         paint3(rgp.enter, int(rgp.varno), vreg, int(rgp.regno))
1365                 }
1366         }
1367
1368         // free aux structures. peep allocates new ones.
1369         for i := 0; i < nvar; i++ {
1370                 vars[i].node.SetOpt(nil)
1371         }
1372         Flowend(g)
1373         firstf = nil
1374
1375         if Debug['R'] != 0 && Debug['v'] != 0 {
1376                 // Rebuild flow graph, since we inserted instructions
1377                 g := Flowstart(firstp, nil)
1378                 firstf = g.Start
1379                 Dumpit("pass6", firstf, 0)
1380                 Flowend(g)
1381                 firstf = nil
1382         }
1383
1384         // pass 7
1385         // peep-hole on basic block
1386         if Debug['R'] == 0 || Debug['P'] != 0 {
1387                 Thearch.Peep(firstp)
1388         }
1389
1390         // eliminate nops
1391         for p := firstp; p != nil; p = p.Link {
1392                 for p.Link != nil && p.Link.As == obj.ANOP {
1393                         p.Link = p.Link.Link
1394                 }
1395                 if p.To.Type == obj.TYPE_BRANCH {
1396                         for p.To.Val.(*obj.Prog) != nil && p.To.Val.(*obj.Prog).As == obj.ANOP {
1397                                 p.To.Val = p.To.Val.(*obj.Prog).Link
1398                         }
1399                 }
1400         }
1401
1402         if Debug['R'] != 0 {
1403                 if Ostats.Ncvtreg != 0 || Ostats.Nspill != 0 || Ostats.Nreload != 0 || Ostats.Ndelmov != 0 || Ostats.Nvar != 0 || Ostats.Naddr != 0 || false {
1404                         fmt.Printf("\nstats\n")
1405                 }
1406
1407                 if Ostats.Ncvtreg != 0 {
1408                         fmt.Printf("\t%4d cvtreg\n", Ostats.Ncvtreg)
1409                 }
1410                 if Ostats.Nspill != 0 {
1411                         fmt.Printf("\t%4d spill\n", Ostats.Nspill)
1412                 }
1413                 if Ostats.Nreload != 0 {
1414                         fmt.Printf("\t%4d reload\n", Ostats.Nreload)
1415                 }
1416                 if Ostats.Ndelmov != 0 {
1417                         fmt.Printf("\t%4d delmov\n", Ostats.Ndelmov)
1418                 }
1419                 if Ostats.Nvar != 0 {
1420                         fmt.Printf("\t%4d var\n", Ostats.Nvar)
1421                 }
1422                 if Ostats.Naddr != 0 {
1423                         fmt.Printf("\t%4d addr\n", Ostats.Naddr)
1424                 }
1425
1426                 Ostats = OptStats{}
1427         }
1428 }
1429
1430 // bany reports whether any bits in a are set.
1431 func bany(a *Bits) bool {
1432         for _, x := range &a.b { // & to avoid making a copy of a.b
1433                 if x != 0 {
1434                         return true
1435                 }
1436         }
1437         return false
1438 }
1439
1440 // bnum reports the lowest index of a 1 bit in a.
1441 func bnum(a *Bits) int {
1442         for i, x := range &a.b { // & to avoid making a copy of a.b
1443                 if x != 0 {
1444                         return 64*i + Bitno(x)
1445                 }
1446         }
1447
1448         Fatalf("bad in bnum")
1449         return 0
1450 }
1451
1452 // blsh returns a Bits with 1 at index n, 0 elsewhere (1<<n).
1453 func blsh(n uint) Bits {
1454         c := zbits
1455         c.b[n/64] = 1 << (n % 64)
1456         return c
1457 }
1458
1459 // btest reports whether bit n is 1.
1460 func btest(a *Bits, n uint) bool {
1461         return a.b[n/64]&(1<<(n%64)) != 0
1462 }
1463
1464 // biset sets bit n to 1.
1465 func biset(a *Bits, n uint) {
1466         a.b[n/64] |= 1 << (n % 64)
1467 }
1468
1469 // biclr sets bit n to 0.
1470 func biclr(a *Bits, n uint) {
1471         a.b[n/64] &^= (1 << (n % 64))
1472 }
1473
1474 // Bitno reports the lowest index of a 1 bit in b.
1475 // It calls Fatalf if there is no 1 bit.
1476 func Bitno(b uint64) int {
1477         if b == 0 {
1478                 Fatalf("bad in bitno")
1479         }
1480         n := 0
1481         if b&(1<<32-1) == 0 {
1482                 n += 32
1483                 b >>= 32
1484         }
1485         if b&(1<<16-1) == 0 {
1486                 n += 16
1487                 b >>= 16
1488         }
1489         if b&(1<<8-1) == 0 {
1490                 n += 8
1491                 b >>= 8
1492         }
1493         if b&(1<<4-1) == 0 {
1494                 n += 4
1495                 b >>= 4
1496         }
1497         if b&(1<<2-1) == 0 {
1498                 n += 2
1499                 b >>= 2
1500         }
1501         if b&1 == 0 {
1502                 n++
1503         }
1504         return n
1505 }
1506
1507 // String returns a space-separated list of the variables represented by bits.
1508 func (bits Bits) String() string {
1509         // Note: This method takes a value receiver, both for convenience
1510         // and to make it safe to modify the bits as we process them.
1511         // Even so, most prints above use &bits, because then the value
1512         // being stored in the interface{} is a pointer and does not require
1513         // an allocation and copy to create the interface{}.
1514         var buf bytes.Buffer
1515         sep := ""
1516         for bany(&bits) {
1517                 i := bnum(&bits)
1518                 buf.WriteString(sep)
1519                 sep = " "
1520                 v := &vars[i]
1521                 if v.node == nil || v.node.Sym == nil {
1522                         fmt.Fprintf(&buf, "$%d", i)
1523                 } else {
1524                         fmt.Fprintf(&buf, "%s(%d)", v.node.Sym.Name, i)
1525                         if v.offset != 0 {
1526                                 fmt.Fprintf(&buf, "%+d", v.offset)
1527                         }
1528                 }
1529                 biclr(&bits, uint(i))
1530         }
1531         return buf.String()
1532 }