src/cmd/compile/internal/gc/popt.go

   1 // Derived from Inferno utils/6c/gc.h
   2 // http://code.google.com/p/inferno-os/source/browse/utils/6c/gc.h
   3 //
   4 //      Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
   5 //      Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
   6 //      Portions Copyright © 1997-1999 Vita Nuova Limited
   7 //      Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
   8 //      Portions Copyright © 2004,2006 Bruce Ellis
   9 //      Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
  10 //      Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
  11 //      Portions Copyright © 2009 The Go Authors.  All rights reserved.
  12 //
  13 // Permission is hereby granted, free of charge, to any person obtaining a copy
  14 // of this software and associated documentation files (the "Software"), to deal
  15 // in the Software without restriction, including without limitation the rights
  16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  17 // copies of the Software, and to permit persons to whom the Software is
  18 // furnished to do so, subject to the following conditions:
  19 //
  20 // The above copyright notice and this permission notice shall be included in
  21 // all copies or substantial portions of the Software.
  22 //
  23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
  26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  29 // THE SOFTWARE.
  30
  31 // "Portable" optimizations.
  32
  33 package gc
  34
  35 import (
  36         "cmd/internal/obj"
  37         "fmt"
  38         "sort"
  39         "strings"
  40 )
  41
  42 type OptStats struct {
  43         Ncvtreg int32
  44         Nspill  int32
  45         Nreload int32
  46         Ndelmov int32
  47         Nvar    int32
  48         Naddr   int32
  49 }
  50
  51 var Ostats OptStats
  52
  53 var noreturn_symlist [10]*Sym
  54
  55 // p is a call instruction. Does the call fail to return?
  56 func Noreturn(p *obj.Prog) bool {
  57         if noreturn_symlist[0] == nil {
  58                 noreturn_symlist[0] = Pkglookup("panicindex", Runtimepkg)
  59                 noreturn_symlist[1] = Pkglookup("panicslice", Runtimepkg)
  60                 noreturn_symlist[2] = Pkglookup("throwinit", Runtimepkg)
  61                 noreturn_symlist[3] = Pkglookup("gopanic", Runtimepkg)
  62                 noreturn_symlist[4] = Pkglookup("panicwrap", Runtimepkg)
  63                 noreturn_symlist[5] = Pkglookup("throwreturn", Runtimepkg)
  64                 noreturn_symlist[6] = Pkglookup("selectgo", Runtimepkg)
  65                 noreturn_symlist[7] = Pkglookup("block", Runtimepkg)
  66         }
  67
  68         if p.To.Node == nil {
  69                 return false
  70         }
  71         s := ((p.To.Node).(*Node)).Sym
  72         if s == nil {
  73                 return false
  74         }
  75         for i := 0; noreturn_symlist[i] != nil; i++ {
  76                 if s == noreturn_symlist[i] {
  77                         return true
  78                 }
  79         }
  80         return false
  81 }
  82
  83 // JMP chasing and removal.
  84 //
  85 // The code generator depends on being able to write out jump
  86 // instructions that it can jump to now but fill in later.
  87 // the linker will resolve them nicely, but they make the code
  88 // longer and more difficult to follow during debugging.
  89 // Remove them.
  90
  91 // what instruction does a JMP to p eventually land on?
  92 func chasejmp(p *obj.Prog, jmploop *int) *obj.Prog {
  93         n := 0
  94         for p != nil && p.As == obj.AJMP && p.To.Type == obj.TYPE_BRANCH {
  95                 n++
  96                 if n > 10 {
  97                         *jmploop = 1
  98                         break
  99                 }
 100
 101                 p = p.To.Val.(*obj.Prog)
 102         }
 103
 104         return p
 105 }
 106
 107 // reuse reg pointer for mark/sweep state.
 108 // leave reg==nil at end because alive==nil.
 109 var alive interface{} = nil
 110 var dead interface{} = 1
 111
 112 // mark all code reachable from firstp as alive
 113 func mark(firstp *obj.Prog) {
 114         for p := firstp; p != nil; p = p.Link {
 115                 if p.Opt != dead {
 116                         break
 117                 }
 118                 p.Opt = alive
 119                 if p.As != obj.ACALL && p.To.Type == obj.TYPE_BRANCH && p.To.Val.(*obj.Prog) != nil {
 120                         mark(p.To.Val.(*obj.Prog))
 121                 }
 122                 if p.As == obj.AJMP || p.As == obj.ARET || p.As == obj.AUNDEF {
 123                         break
 124                 }
 125         }
 126 }
 127
 128 func fixjmp(firstp *obj.Prog) {
 129         if Debug['R'] != 0 && Debug['v'] != 0 {
 130                 fmt.Printf("\nfixjmp\n")
 131         }
 132
 133         // pass 1: resolve jump to jump, mark all code as dead.
 134         jmploop := 0
 135
 136         for p := firstp; p != nil; p = p.Link {
 137                 if Debug['R'] != 0 && Debug['v'] != 0 {
 138                         fmt.Printf("%v\n", p)
 139                 }
 140                 if p.As != obj.ACALL && p.To.Type == obj.TYPE_BRANCH && p.To.Val.(*obj.Prog) != nil && p.To.Val.(*obj.Prog).As == obj.AJMP {
 141                         if Debug['N'] == 0 {
 142                                 p.To.Val = chasejmp(p.To.Val.(*obj.Prog), &jmploop)
 143                                 if Debug['R'] != 0 && Debug['v'] != 0 {
 144                                         fmt.Printf("->%v\n", p)
 145                                 }
 146                         }
 147                 }
 148
 149                 p.Opt = dead
 150         }
 151         if Debug['R'] != 0 && Debug['v'] != 0 {
 152                 fmt.Printf("\n")
 153         }
 154
 155         // pass 2: mark all reachable code alive
 156         mark(firstp)
 157
 158         // pass 3: delete dead code (mostly JMPs).
 159         var last *obj.Prog
 160
 161         for p := firstp; p != nil; p = p.Link {
 162                 if p.Opt == dead {
 163                         if p.Link == nil && p.As == obj.ARET && last != nil && last.As != obj.ARET {
 164                                 // This is the final ARET, and the code so far doesn't have one.
 165                                 // Let it stay. The register allocator assumes that all live code in
 166                                 // the function can be traversed by starting at all the RET instructions
 167                                 // and following predecessor links. If we remove the final RET,
 168                                 // this assumption will not hold in the case of an infinite loop
 169                                 // at the end of a function.
 170                                 // Keep the RET but mark it dead for the liveness analysis.
 171                                 p.Mode = 1
 172                         } else {
 173                                 if Debug['R'] != 0 && Debug['v'] != 0 {
 174                                         fmt.Printf("del %v\n", p)
 175                                 }
 176                                 continue
 177                         }
 178                 }
 179
 180                 if last != nil {
 181                         last.Link = p
 182                 }
 183                 last = p
 184         }
 185
 186         last.Link = nil
 187
 188         // pass 4: elide JMP to next instruction.
 189         // only safe if there are no jumps to JMPs anymore.
 190         if jmploop == 0 && Debug['N'] == 0 {
 191                 var last *obj.Prog
 192                 for p := firstp; p != nil; p = p.Link {
 193                         if p.As == obj.AJMP && p.To.Type == obj.TYPE_BRANCH && p.To.Val == p.Link {
 194                                 if Debug['R'] != 0 && Debug['v'] != 0 {
 195                                         fmt.Printf("del %v\n", p)
 196                                 }
 197                                 continue
 198                         }
 199
 200                         if last != nil {
 201                                 last.Link = p
 202                         }
 203                         last = p
 204                 }
 205
 206                 last.Link = nil
 207         }
 208
 209         if Debug['R'] != 0 && Debug['v'] != 0 {
 210                 fmt.Printf("\n")
 211                 for p := firstp; p != nil; p = p.Link {
 212                         fmt.Printf("%v\n", p)
 213                 }
 214                 fmt.Printf("\n")
 215         }
 216 }
 217
 218 // Control flow analysis. The Flow structures hold predecessor and successor
 219 // information as well as basic loop analysis.
 220 //
 221 //      graph = Flowstart(firstp, nil)
 222 //      ... use flow graph ...
 223 //      Flowend(graph) // free graph
 224 //
 225 // Typical uses of the flow graph are to iterate over all the flow-relevant instructions:
 226 //
 227 //      for f := graph.Start; f != nil; f = f.Link {}
 228 //
 229 // or, given an instruction f, to iterate over all the predecessors, which is
 230 // f.P1 and this list:
 231 //
 232 //      for f2 := f.P2; f2 != nil; f2 = f2.P2link {}
 233 //
 234 // The second argument (newData) to Flowstart specifies a func to create object
 235 // for every f.Data field, for use by the client.
 236 // If newData is nil, f.Data will be nil.
 237
 238 type Graph struct {
 239         Start *Flow
 240         Num   int
 241
 242         // After calling flowrpo, rpo lists the flow nodes in reverse postorder,
 243         // and each non-dead Flow node f has g->rpo[f->rpo] == f.
 244         Rpo []*Flow
 245 }
 246
 247 type Flow struct {
 248         Prog   *obj.Prog // actual instruction
 249         P1     *Flow     // predecessors of this instruction: p1,
 250         P2     *Flow     // and then p2 linked though p2link.
 251         P2link *Flow
 252         S1     *Flow // successors of this instruction (at most two: s1 and s2).
 253         S2     *Flow
 254         Link   *Flow // next instruction in function code
 255
 256         Active int32 // usable by client
 257
 258         Id     int32  // sequence number in flow graph
 259         Rpo    int32  // reverse post ordering
 260         Loop   uint16 // x5 for every loop
 261         Refset bool   // diagnostic generated
 262
 263         Data interface{} // for use by client
 264 }
 265
 266 var flowmark int
 267
 268 // MaxFlowProg is the maximum size program (counted in instructions)
 269 // for which the flow code will build a graph. Functions larger than this limit
 270 // will not have flow graphs and consequently will not be optimized.
 271 const MaxFlowProg = 50000
 272
 273 var ffcache []Flow // reusable []Flow, to reduce allocation
 274
 275 func growffcache(n int) {
 276         if n > cap(ffcache) {
 277                 n = (n * 5) / 4
 278                 if n > MaxFlowProg {
 279                         n = MaxFlowProg
 280                 }
 281                 ffcache = make([]Flow, n)
 282         }
 283         ffcache = ffcache[:n]
 284 }
 285
 286 func Flowstart(firstp *obj.Prog, newData func() interface{}) *Graph {
 287         // Count and mark instructions to annotate.
 288         nf := 0
 289
 290         for p := firstp; p != nil; p = p.Link {
 291                 p.Opt = nil // should be already, but just in case
 292                 Thearch.Proginfo(p)
 293                 if p.Info.Flags&Skip != 0 {
 294                         continue
 295                 }
 296                 p.Opt = &flowmark
 297                 nf++
 298         }
 299
 300         if nf == 0 {
 301                 return nil
 302         }
 303
 304         if nf >= MaxFlowProg {
 305                 if Debug['v'] != 0 {
 306                         Warn("%v is too big (%d instructions)", Curfn.Func.Nname.Sym, nf)
 307                 }
 308                 return nil
 309         }
 310
 311         // Allocate annotations and assign to instructions.
 312         graph := new(Graph)
 313
 314         growffcache(nf)
 315         ff := ffcache
 316         start := &ff[0]
 317         id := 0
 318         var last *Flow
 319         for p := firstp; p != nil; p = p.Link {
 320                 if p.Opt == nil {
 321                         continue
 322                 }
 323                 f := &ff[0]
 324                 ff = ff[1:]
 325                 p.Opt = f
 326                 f.Prog = p
 327                 if last != nil {
 328                         last.Link = f
 329                 }
 330                 last = f
 331                 if newData != nil {
 332                         f.Data = newData()
 333                 }
 334                 f.Id = int32(id)
 335                 id++
 336         }
 337
 338         // Fill in pred/succ information.
 339         var f1 *Flow
 340         var p *obj.Prog
 341         for f := start; f != nil; f = f.Link {
 342                 p = f.Prog
 343                 if p.Info.Flags&Break == 0 {
 344                         f1 = f.Link
 345                         f.S1 = f1
 346                         f1.P1 = f
 347                 }
 348
 349                 if p.To.Type == obj.TYPE_BRANCH {
 350                         if p.To.Val == nil {
 351                                 Fatalf("pnil %v", p)
 352                         }
 353                         f1 = p.To.Val.(*obj.Prog).Opt.(*Flow)
 354                         if f1 == nil {
 355                                 Fatalf("fnil %v / %v", p, p.To.Val.(*obj.Prog))
 356                         }
 357                         if f1 == f {
 358                                 //fatal("self loop %v", p);
 359                                 continue
 360                         }
 361
 362                         f.S2 = f1
 363                         f.P2link = f1.P2
 364                         f1.P2 = f
 365                 }
 366         }
 367
 368         graph.Start = start
 369         graph.Num = nf
 370         return graph
 371 }
 372
 373 func Flowend(graph *Graph) {
 374         for f := graph.Start; f != nil; f = f.Link {
 375                 f.Prog.Info.Flags = 0 // drop cached proginfo
 376                 f.Prog.Opt = nil
 377         }
 378         clear := ffcache[:graph.Num]
 379         for i := range clear {
 380                 clear[i] = Flow{}
 381         }
 382 }
 383
 384 // find looping structure
 385 //
 386 // 1) find reverse postordering
 387 // 2) find approximate dominators,
 388 //      the actual dominators if the flow graph is reducible
 389 //      otherwise, dominators plus some other non-dominators.
 390 //      See Matthew S. Hecht and Jeffrey D. Ullman,
 391 //      "Analysis of a Simple Algorithm for Global Data Flow Problems",
 392 //      Conf.  Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
 393 //      Oct. 1-3, 1973, pp.  207-217.
 394 // 3) find all nodes with a predecessor dominated by the current node.
 395 //      such a node is a loop head.
 396 //      recursively, all preds with a greater rpo number are in the loop
 397 func postorder(r *Flow, rpo2r []*Flow, n int32) int32 {
 398         r.Rpo = 1
 399         r1 := r.S1
 400         if r1 != nil && r1.Rpo == 0 {
 401                 n = postorder(r1, rpo2r, n)
 402         }
 403         r1 = r.S2
 404         if r1 != nil && r1.Rpo == 0 {
 405                 n = postorder(r1, rpo2r, n)
 406         }
 407         rpo2r[n] = r
 408         n++
 409         return n
 410 }
 411
 412 func rpolca(idom []int32, rpo1 int32, rpo2 int32) int32 {
 413         if rpo1 == -1 {
 414                 return rpo2
 415         }
 416         var t int32
 417         for rpo1 != rpo2 {
 418                 if rpo1 > rpo2 {
 419                         t = rpo2
 420                         rpo2 = rpo1
 421                         rpo1 = t
 422                 }
 423
 424                 for rpo1 < rpo2 {
 425                         t = idom[rpo2]
 426                         if t >= rpo2 {
 427                                 Fatalf("bad idom")
 428                         }
 429                         rpo2 = t
 430                 }
 431         }
 432
 433         return rpo1
 434 }
 435
 436 func doms(idom []int32, r int32, s int32) bool {
 437         for s > r {
 438                 s = idom[s]
 439         }
 440         return s == r
 441 }
 442
 443 func loophead(idom []int32, r *Flow) bool {
 444         src := r.Rpo
 445         if r.P1 != nil && doms(idom, src, r.P1.Rpo) {
 446                 return true
 447         }
 448         for r = r.P2; r != nil; r = r.P2link {
 449                 if doms(idom, src, r.Rpo) {
 450                         return true
 451                 }
 452         }
 453         return false
 454 }
 455
 456 func loopmark(rpo2r **Flow, head int32, r *Flow) {
 457         if r.Rpo < head || r.Active == head {
 458                 return
 459         }
 460         r.Active = head
 461         r.Loop += LOOP
 462         if r.P1 != nil {
 463                 loopmark(rpo2r, head, r.P1)
 464         }
 465         for r = r.P2; r != nil; r = r.P2link {
 466                 loopmark(rpo2r, head, r)
 467         }
 468 }
 469
 470 func flowrpo(g *Graph) {
 471         g.Rpo = make([]*Flow, g.Num)
 472         idom := make([]int32, g.Num)
 473
 474         for r1 := g.Start; r1 != nil; r1 = r1.Link {
 475                 r1.Active = 0
 476         }
 477
 478         rpo2r := g.Rpo
 479         d := postorder(g.Start, rpo2r, 0)
 480         nr := int32(g.Num)
 481         if d > nr {
 482                 Fatalf("too many reg nodes %d %d", d, nr)
 483         }
 484         nr = d
 485         var r1 *Flow
 486         for i := int32(0); i < nr/2; i++ {
 487                 r1 = rpo2r[i]
 488                 rpo2r[i] = rpo2r[nr-1-i]
 489                 rpo2r[nr-1-i] = r1
 490         }
 491
 492         for i := int32(0); i < nr; i++ {
 493                 rpo2r[i].Rpo = i
 494         }
 495
 496         idom[0] = 0
 497         var me int32
 498         for i := int32(0); i < nr; i++ {
 499                 r1 = rpo2r[i]
 500                 me = r1.Rpo
 501                 d = -1
 502
 503                 // rpo2r[r.Rpo] == r protects against considering dead code,
 504                 // which has r.Rpo == 0.
 505                 if r1.P1 != nil && rpo2r[r1.P1.Rpo] == r1.P1 && r1.P1.Rpo < me {
 506                         d = r1.P1.Rpo
 507                 }
 508                 for r1 = r1.P2; r1 != nil; r1 = r1.P2link {
 509                         if rpo2r[r1.Rpo] == r1 && r1.Rpo < me {
 510                                 d = rpolca(idom, d, r1.Rpo)
 511                         }
 512                 }
 513                 idom[i] = d
 514         }
 515
 516         for i := int32(0); i < nr; i++ {
 517                 r1 = rpo2r[i]
 518                 r1.Loop++
 519                 if r1.P2 != nil && loophead(idom, r1) {
 520                         loopmark(&rpo2r[0], i, r1)
 521                 }
 522         }
 523
 524         for r1 := g.Start; r1 != nil; r1 = r1.Link {
 525                 r1.Active = 0
 526         }
 527 }
 528
 529 func Uniqp(r *Flow) *Flow {
 530         r1 := r.P1
 531         if r1 == nil {
 532                 r1 = r.P2
 533                 if r1 == nil || r1.P2link != nil {
 534                         return nil
 535                 }
 536         } else if r.P2 != nil {
 537                 return nil
 538         }
 539         return r1
 540 }
 541
 542 func Uniqs(r *Flow) *Flow {
 543         r1 := r.S1
 544         if r1 == nil {
 545                 r1 = r.S2
 546                 if r1 == nil {
 547                         return nil
 548                 }
 549         } else if r.S2 != nil {
 550                 return nil
 551         }
 552         return r1
 553 }
 554
 555 // The compilers assume they can generate temporary variables
 556 // as needed to preserve the right semantics or simplify code
 557 // generation and the back end will still generate good code.
 558 // This results in a large number of ephemeral temporary variables.
 559 // Merge temps with non-overlapping lifetimes and equal types using the
 560 // greedy algorithm in Poletto and Sarkar, "Linear Scan Register Allocation",
 561 // ACM TOPLAS 1999.
 562
 563 type TempVar struct {
 564         node    *Node
 565         def     *Flow    // definition of temp var
 566         use     *Flow    // use list, chained through Flow.data
 567         merge   *TempVar // merge var with this one
 568         start   int64    // smallest Prog.pc in live range
 569         end     int64    // largest Prog.pc in live range
 570         addr    bool     // address taken - no accurate end
 571         removed bool     // removed from program
 572 }
 573
 574 // startcmp sorts TempVars by start, then id, then symbol name.
 575 type startcmp []*TempVar
 576
 577 func (x startcmp) Len() int      { return len(x) }
 578 func (x startcmp) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
 579 func (x startcmp) Less(i, j int) bool {
 580         a := x[i]
 581         b := x[j]
 582
 583         if a.start < b.start {
 584                 return true
 585         }
 586         if a.start > b.start {
 587                 return false
 588         }
 589
 590         // Order what's left by id or symbol name,
 591         // just so that sort is forced into a specific ordering,
 592         // so that the result of the sort does not depend on
 593         // the sort implementation.
 594         if a.def != b.def {
 595                 return int(a.def.Id-b.def.Id) < 0
 596         }
 597         if a.node != b.node {
 598                 return a.node.Sym.Name < b.node.Sym.Name
 599         }
 600         return false
 601 }
 602
 603 // Is n available for merging?
 604 func canmerge(n *Node) bool {
 605         return n.Class == PAUTO && strings.HasPrefix(n.Sym.Name, "autotmp")
 606 }
 607
 608 func mergetemp(firstp *obj.Prog) {
 609         const (
 610                 debugmerge = 0
 611         )
 612
 613         g := Flowstart(firstp, nil)
 614         if g == nil {
 615                 return
 616         }
 617
 618         // Build list of all mergeable variables.
 619         var vars []*TempVar
 620         for _, n := range Curfn.Func.Dcl {
 621                 if canmerge(n) {
 622                         v := &TempVar{}
 623                         vars = append(vars, v)
 624                         n.SetOpt(v)
 625                         v.node = n
 626                 }
 627         }
 628
 629         // Build list of uses.
 630         // We assume that the earliest reference to a temporary is its definition.
 631         // This is not true of variables in general but our temporaries are all
 632         // single-use (that's why we have so many!).
 633         for f := g.Start; f != nil; f = f.Link {
 634                 p := f.Prog
 635                 if p.From.Node != nil && ((p.From.Node).(*Node)).Opt() != nil && p.To.Node != nil && ((p.To.Node).(*Node)).Opt() != nil {
 636                         Fatalf("double node %v", p)
 637                 }
 638                 var v *TempVar
 639                 n, _ := p.From.Node.(*Node)
 640                 if n != nil {
 641                         v, _ = n.Opt().(*TempVar)
 642                 }
 643                 if v == nil {
 644                         n, _ = p.To.Node.(*Node)
 645                         if n != nil {
 646                                 v, _ = n.Opt().(*TempVar)
 647                         }
 648                 }
 649                 if v != nil {
 650                         if v.def == nil {
 651                                 v.def = f
 652                         }
 653                         f.Data = v.use
 654                         v.use = f
 655                         if n == p.From.Node && (p.Info.Flags&LeftAddr != 0) {
 656                                 v.addr = true
 657                         }
 658                 }
 659         }
 660
 661         if debugmerge > 1 && Debug['v'] != 0 {
 662                 Dumpit("before", g.Start, 0)
 663         }
 664
 665         nkill := 0
 666
 667         // Special case.
 668         for _, v := range vars {
 669                 if v.addr {
 670                         continue
 671                 }
 672
 673                 // Used in only one instruction, which had better be a write.
 674                 f := v.use
 675                 if f != nil && f.Data.(*Flow) == nil {
 676                         p := f.Prog
 677                         if p.To.Node == v.node && (p.Info.Flags&RightWrite != 0) && p.Info.Flags&RightRead == 0 {
 678                                 p.As = obj.ANOP
 679                                 p.To = obj.Addr{}
 680                                 v.removed = true
 681                                 if debugmerge > 0 && Debug['v'] != 0 {
 682                                         fmt.Printf("drop write-only %v\n", v.node.Sym)
 683                                 }
 684                         } else {
 685                                 Fatalf("temp used and not set: %v", p)
 686                         }
 687                         nkill++
 688                         continue
 689                 }
 690
 691                 // Written in one instruction, read in the next, otherwise unused,
 692                 // no jumps to the next instruction. Happens mainly in 386 compiler.
 693                 f = v.use
 694                 if f != nil && f.Link == f.Data.(*Flow) && (f.Data.(*Flow)).Data.(*Flow) == nil && Uniqp(f.Link) == f {
 695                         p := f.Prog
 696                         p1 := f.Link.Prog
 697                         const (
 698                                 SizeAny = SizeB | SizeW | SizeL | SizeQ | SizeF | SizeD
 699                         )
 700                         if p.From.Node == v.node && p1.To.Node == v.node && (p.Info.Flags&Move != 0) && (p.Info.Flags|p1.Info.Flags)&(LeftAddr|RightAddr) == 0 && p.Info.Flags&SizeAny == p1.Info.Flags&SizeAny {
 701                                 p1.From = p.From
 702                                 Thearch.Excise(f)
 703                                 v.removed = true
 704                                 if debugmerge > 0 && Debug['v'] != 0 {
 705                                         fmt.Printf("drop immediate-use %v\n", v.node.Sym)
 706                                 }
 707                         }
 708
 709                         nkill++
 710                         continue
 711                 }
 712         }
 713
 714         // Traverse live range of each variable to set start, end.
 715         // Each flood uses a new value of gen so that we don't have
 716         // to clear all the r.Active words after each variable.
 717         gen := uint32(0)
 718
 719         for _, v := range vars {
 720                 gen++
 721                 for f := v.use; f != nil; f = f.Data.(*Flow) {
 722                         mergewalk(v, f, gen)
 723                 }
 724                 if v.addr {
 725                         gen++
 726                         for f := v.use; f != nil; f = f.Data.(*Flow) {
 727                                 varkillwalk(v, f, gen)
 728                         }
 729                 }
 730         }
 731
 732         // Sort variables by start.
 733         bystart := make([]*TempVar, len(vars))
 734         copy(bystart, vars)
 735         sort.Sort(startcmp(bystart))
 736
 737         // List of in-use variables, sorted by end, so that the ones that
 738         // will last the longest are the earliest ones in the array.
 739         // The tail inuse[nfree:] holds no-longer-used variables.
 740         // In theory we should use a sorted tree so that insertions are
 741         // guaranteed O(log n) and then the loop is guaranteed O(n log n).
 742         // In practice, it doesn't really matter.
 743         inuse := make([]*TempVar, len(bystart))
 744
 745         ninuse := 0
 746         nfree := len(bystart)
 747         for _, v := range bystart {
 748                 if debugmerge > 0 && Debug['v'] != 0 {
 749                         fmt.Printf("consider %v: removed=%t\n", Nconv(v.node, FmtSharp), v.removed)
 750                 }
 751
 752                 if v.removed {
 753                         continue
 754                 }
 755
 756                 // Expire no longer in use.
 757                 for ninuse > 0 && inuse[ninuse-1].end < v.start {
 758                         ninuse--
 759                         nfree--
 760                         inuse[nfree] = inuse[ninuse]
 761                 }
 762
 763                 if debugmerge > 0 && Debug['v'] != 0 {
 764                         fmt.Printf("consider %v: removed=%t nfree=%d nvar=%d\n", Nconv(v.node, FmtSharp), v.removed, nfree, len(bystart))
 765                 }
 766
 767                 // Find old temp to reuse if possible.
 768                 t := v.node.Type
 769
 770                 for j := nfree; j < len(inuse); j++ {
 771                         v1 := inuse[j]
 772                         if debugmerge > 0 && Debug['v'] != 0 {
 773                                 fmt.Printf("consider %v: maybe %v: type=%v,%v addrtaken=%v,%v\n", Nconv(v.node, FmtSharp), Nconv(v1.node, FmtSharp), t, v1.node.Type, v.node.Addrtaken, v1.node.Addrtaken)
 774                         }
 775
 776                         // Require the types to match but also require the addrtaken bits to match.
 777                         // If a variable's address is taken, that disables registerization for the individual
 778                         // words of the variable (for example, the base,len,cap of a slice).
 779                         // We don't want to merge a non-addressed var with an addressed one and
 780                         // inhibit registerization of the former.
 781                         if Eqtype(t, v1.node.Type) && v.node.Addrtaken == v1.node.Addrtaken {
 782                                 inuse[j] = inuse[nfree]
 783                                 nfree++
 784                                 if v1.merge != nil {
 785                                         v.merge = v1.merge
 786                                 } else {
 787                                         v.merge = v1
 788                                 }
 789                                 nkill++
 790                                 break
 791                         }
 792                 }
 793
 794                 // Sort v into inuse.
 795                 j := ninuse
 796                 ninuse++
 797
 798                 for j > 0 && inuse[j-1].end < v.end {
 799                         inuse[j] = inuse[j-1]
 800                         j--
 801                 }
 802
 803                 inuse[j] = v
 804         }
 805
 806         if debugmerge > 0 && Debug['v'] != 0 {
 807                 fmt.Printf("%v [%d - %d]\n", Curfn.Func.Nname.Sym, len(vars), nkill)
 808                 for _, v := range vars {
 809                         fmt.Printf("var %v %v %d-%d", Nconv(v.node, FmtSharp), v.node.Type, v.start, v.end)
 810                         if v.addr {
 811                                 fmt.Printf(" addr=true")
 812                         }
 813                         if v.removed {
 814                                 fmt.Printf(" removed=true")
 815                         }
 816                         if v.merge != nil {
 817                                 fmt.Printf(" merge %v", Nconv(v.merge.node, FmtSharp))
 818                         }
 819                         if v.start == v.end && v.def != nil {
 820                                 fmt.Printf(" %v", v.def.Prog)
 821                         }
 822                         fmt.Printf("\n")
 823                 }
 824
 825                 if debugmerge > 1 && Debug['v'] != 0 {
 826                         Dumpit("after", g.Start, 0)
 827                 }
 828         }
 829
 830         // Update node references to use merged temporaries.
 831         for f := g.Start; f != nil; f = f.Link {
 832                 p := f.Prog
 833                 n, _ := p.From.Node.(*Node)
 834                 if n != nil {
 835                         v, _ := n.Opt().(*TempVar)
 836                         if v != nil && v.merge != nil {
 837                                 p.From.Node = v.merge.node
 838                         }
 839                 }
 840                 n, _ = p.To.Node.(*Node)
 841                 if n != nil {
 842                         v, _ := n.Opt().(*TempVar)
 843                         if v != nil && v.merge != nil {
 844                                 p.To.Node = v.merge.node
 845                         }
 846                 }
 847         }
 848
 849         // Delete merged nodes from declaration list.
 850         dcl := make([]*Node, 0, len(Curfn.Func.Dcl)-nkill)
 851         for _, n := range Curfn.Func.Dcl {
 852                 v, _ := n.Opt().(*TempVar)
 853                 if v != nil && (v.merge != nil || v.removed) {
 854                         continue
 855                 }
 856                 dcl = append(dcl, n)
 857         }
 858         Curfn.Func.Dcl = dcl
 859
 860         // Clear aux structures.
 861         for _, v := range vars {
 862                 v.node.SetOpt(nil)
 863         }
 864
 865         Flowend(g)
 866 }
 867
 868 func mergewalk(v *TempVar, f0 *Flow, gen uint32) {
 869         var p *obj.Prog
 870         var f1 *Flow
 871
 872         for f1 = f0; f1 != nil; f1 = f1.P1 {
 873                 if uint32(f1.Active) == gen {
 874                         break
 875                 }
 876                 f1.Active = int32(gen)
 877                 p = f1.Prog
 878                 if v.end < p.Pc {
 879                         v.end = p.Pc
 880                 }
 881                 if f1 == v.def {
 882                         v.start = p.Pc
 883                         break
 884                 }
 885         }
 886
 887         var f2 *Flow
 888         for f := f0; f != f1; f = f.P1 {
 889                 for f2 = f.P2; f2 != nil; f2 = f2.P2link {
 890                         mergewalk(v, f2, gen)
 891                 }
 892         }
 893 }
 894
 895 func varkillwalk(v *TempVar, f0 *Flow, gen uint32) {
 896         var p *obj.Prog
 897         var f1 *Flow
 898
 899         for f1 = f0; f1 != nil; f1 = f1.S1 {
 900                 if uint32(f1.Active) == gen {
 901                         break
 902                 }
 903                 f1.Active = int32(gen)
 904                 p = f1.Prog
 905                 if v.end < p.Pc {
 906                         v.end = p.Pc
 907                 }
 908                 if v.start > p.Pc {
 909                         v.start = p.Pc
 910                 }
 911                 if p.As == obj.ARET || (p.As == obj.AVARKILL && p.To.Node == v.node) {
 912                         break
 913                 }
 914         }
 915
 916         for f := f0; f != f1; f = f.S1 {
 917                 varkillwalk(v, f.S2, gen)
 918         }
 919 }
 920
 921 // Eliminate redundant nil pointer checks.
 922 //
 923 // The code generation pass emits a CHECKNIL for every possibly nil pointer.
 924 // This pass removes a CHECKNIL if every predecessor path has already
 925 // checked this value for nil.
 926 //
 927 // Simple backwards flood from check to definition.
 928 // Run prog loop backward from end of program to beginning to avoid quadratic
 929 // behavior removing a run of checks.
 930 //
 931 // Assume that stack variables with address not taken can be loaded multiple times
 932 // from memory without being rechecked. Other variables need to be checked on
 933 // each load.
 934
 935 var killed int // f.Data is either nil or &killed
 936
 937 func nilopt(firstp *obj.Prog) {
 938         g := Flowstart(firstp, nil)
 939         if g == nil {
 940                 return
 941         }
 942
 943         if Debug_checknil > 1 { // || strcmp(curfn->nname->sym->name, "f1") == 0
 944                 Dumpit("nilopt", g.Start, 0)
 945         }
 946
 947         ncheck := 0
 948         nkill := 0
 949         var p *obj.Prog
 950         for f := g.Start; f != nil; f = f.Link {
 951                 p = f.Prog
 952                 if p.As != obj.ACHECKNIL || !Thearch.Regtyp(&p.From) {
 953                         continue
 954                 }
 955                 ncheck++
 956                 if Thearch.Stackaddr(&p.From) {
 957                         if Debug_checknil != 0 && p.Lineno > 1 {
 958                                 Warnl(p.Lineno, "removed nil check of SP address")
 959                         }
 960                         f.Data = &killed
 961                         continue
 962                 }
 963
 964                 nilwalkfwd(f)
 965                 if f.Data != nil {
 966                         if Debug_checknil != 0 && p.Lineno > 1 {
 967                                 Warnl(p.Lineno, "removed nil check before indirect")
 968                         }
 969                         continue
 970                 }
 971
 972                 nilwalkback(f)
 973                 if f.Data != nil {
 974                         if Debug_checknil != 0 && p.Lineno > 1 {
 975                                 Warnl(p.Lineno, "removed repeated nil check")
 976                         }
 977                         continue
 978                 }
 979         }
 980
 981         for f := g.Start; f != nil; f = f.Link {
 982                 if f.Data != nil {
 983                         nkill++
 984                         Thearch.Excise(f)
 985                 }
 986         }
 987
 988         Flowend(g)
 989
 990         if Debug_checknil > 1 {
 991                 fmt.Printf("%v: removed %d of %d nil checks\n", Curfn.Func.Nname.Sym, nkill, ncheck)
 992         }
 993 }
 994
 995 func nilwalkback(fcheck *Flow) {
 996         for f := fcheck; f != nil; f = Uniqp(f) {
 997                 p := f.Prog
 998                 if (p.Info.Flags&RightWrite != 0) && Thearch.Sameaddr(&p.To, &fcheck.Prog.From) {
 999                         // Found initialization of value we're checking for nil.
1000                         // without first finding the check, so this one is unchecked.
1001                         return
1002                 }
1003
1004                 if f != fcheck && p.As == obj.ACHECKNIL && Thearch.Sameaddr(&p.From, &fcheck.Prog.From) {
1005                         fcheck.Data = &killed
1006                         return
1007                 }
1008         }
1009 }
1010
1011 // Here is a more complex version that scans backward across branches.
1012 // It assumes fcheck->kill = 1 has been set on entry, and its job is to find a reason
1013 // to keep the check (setting fcheck->kill = 0).
1014 // It doesn't handle copying of aggregates as well as I would like,
1015 // nor variables with their address taken,
1016 // and it's too subtle to turn on this late in Go 1.2. Perhaps for Go 1.3.
1017 /*
1018 for(f1 = f0; f1 != nil; f1 = f1->p1) {
1019         if(f1->active == gen)
1020                 break;
1021         f1->active = gen;
1022         p = f1->prog;
1023
1024         // If same check, stop this loop but still check
1025         // alternate predecessors up to this point.
1026         if(f1 != fcheck && p->as == ACHECKNIL && thearch.sameaddr(&p->from, &fcheck->prog->from))
1027                 break;
1028
1029         if((p.Info.flags & RightWrite) && thearch.sameaddr(&p->to, &fcheck->prog->from)) {
1030                 // Found initialization of value we're checking for nil.
1031                 // without first finding the check, so this one is unchecked.
1032                 fcheck->kill = 0;
1033                 return;
1034         }
1035
1036         if(f1->p1 == nil && f1->p2 == nil) {
1037                 print("lost pred for %v\n", fcheck->prog);
1038                 for(f1=f0; f1!=nil; f1=f1->p1) {
1039                         thearch.proginfo(&info, f1->prog);
1040                         print("\t%v %d %d %D %D\n", r1->prog, info.flags&RightWrite, thearch.sameaddr(&f1->prog->to, &fcheck->prog->from), &f1->prog->to, &fcheck->prog->from);
1041                 }
1042                 fatal("lost pred trail");
1043         }
1044 }
1045
1046 for(f = f0; f != f1; f = f->p1)
1047         for(f2 = f->p2; f2 != nil; f2 = f2->p2link)
1048                 nilwalkback(fcheck, f2, gen);
1049 */
1050
1051 func nilwalkfwd(fcheck *Flow) {
1052         // If the path down from rcheck dereferences the address
1053         // (possibly with a small offset) before writing to memory
1054         // and before any subsequent checks, it's okay to wait for
1055         // that implicit check. Only consider this basic block to
1056         // avoid problems like:
1057         //      _ = *x // should panic
1058         //      for {} // no writes but infinite loop may be considered visible
1059
1060         var last *Flow
1061         for f := Uniqs(fcheck); f != nil; f = Uniqs(f) {
1062                 p := f.Prog
1063                 if (p.Info.Flags&LeftRead != 0) && Thearch.Smallindir(&p.From, &fcheck.Prog.From) {
1064                         fcheck.Data = &killed
1065                         return
1066                 }
1067
1068                 if (p.Info.Flags&(RightRead|RightWrite) != 0) && Thearch.Smallindir(&p.To, &fcheck.Prog.From) {
1069                         fcheck.Data = &killed
1070                         return
1071                 }
1072
1073                 // Stop if another nil check happens.
1074                 if p.As == obj.ACHECKNIL {
1075                         return
1076                 }
1077
1078                 // Stop if value is lost.
1079                 if (p.Info.Flags&RightWrite != 0) && Thearch.Sameaddr(&p.To, &fcheck.Prog.From) {
1080                         return
1081                 }
1082
1083                 // Stop if memory write.
1084                 if (p.Info.Flags&RightWrite != 0) && !Thearch.Regtyp(&p.To) {
1085                         return
1086                 }
1087
1088                 // Stop if we jump backward.
1089                 if last != nil && f.Id <= last.Id {
1090                         return
1091                 }
1092                 last = f
1093         }
1094 }