1 // Derived from Inferno utils/6c/reg.c
2 // http://code.google.com/p/inferno-os/source/browse/utils/6c/reg.c
4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
5 // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
6 // Portions Copyright © 1997-1999 Vita Nuova Limited
7 // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
8 // Portions Copyright © 2004,2006 Bruce Ellis
9 // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
10 // Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
11 // Portions Copyright © 2009 The Go Authors. All rights reserved.
13 // Permission is hereby granted, free of charge, to any person obtaining a copy
14 // of this software and associated documentation files (the "Software"), to deal
15 // in the Software without restriction, including without limitation the rights
16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17 // copies of the Software, and to permit persons to whom the Software is
18 // furnished to do so, subject to the following conditions:
20 // The above copyright notice and this permission notice shall be included in
21 // all copies or substantial portions of the Software.
23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
42 // A Var represents a single variable that may be stored in a register.
43 // That variable may itself correspond to a hardware register,
44 // to represent the use of registers in the unoptimized instruction stream.
50 id int // index in vars
56 // Bits represents a set of Vars, stored as a bit set of var numbers
57 // (the index in vars, or equivalently v.id).
68 vars [NVAR]Var // variables under consideration
69 nvar int // number of vars
71 regbits uint64 // bits for hardware registers
74 externs Bits // global variables
75 params Bits // function parameters and results
76 ivar Bits // function parameters (inputs)
77 ovar Bits // function results (outputs)
78 consts Bits // constant values
79 addrs Bits // variables with address taken
82 // A Reg is a wrapper around a single Prog (one instruction) that holds
83 // register optimization information while the optimizer runs.
84 // r->prog is the instruction.
86 set Bits // regopt variables written by this instruction.
87 use1 Bits // regopt variables read by prog->from.
88 use2 Bits // regopt variables read by prog->to.
90 // refahead/refbehind are the regopt variables whose current
91 // value may be used in the following/preceding instructions
92 // up to a CALL (or the value is clobbered).
96 // calahead/calbehind are similar, but for variables in
97 // instructions that are reachable after hitting at least one
104 regu uint64 // register used bitmap
107 // A Rgn represents a single regopt variable over a region of code
108 // where a register could potentially be dedicated to that variable.
109 // The code encompassed by a Rgn is defined by the flow graph,
110 // starting at enter, flood-filling forward while varno is refahead
111 // and backward while varno is refbehind, and following branches.
112 // A single variable may be represented by multiple disjoint Rgns and
113 // each Rgn may choose a different register for that variable.
114 // Registers are allocated to regions greedily in order of descending
123 // The Plan 9 C compilers used a limit of 600 regions,
124 // but the yacc-generated parser in y.go has 3100 regions.
125 // We set MaxRgn large enough to handle that.
126 // There's not a huge cost to having too many regions:
127 // the main processing traces the live area for each variable,
128 // which is limited by the number of variables times the area,
129 // not the raw region count. If there are many regions, they
130 // are almost certainly small and easy to trace.
131 // The only operation that scales with region count is the
132 // sorting by cost, which uses sort.Sort and is therefore
133 // guaranteed n log n.
143 func (x rcmp) Len() int {
147 func (x rcmp) Swap(i, j int) {
148 x[i], x[j] = x[j], x[i]
151 func (x rcmp) Less(i, j int) bool {
154 if p1.cost != p2.cost {
155 return int(p2.cost)-int(p1.cost) < 0
157 if p1.varno != p2.varno {
158 return int(p2.varno)-int(p1.varno) < 0
160 if p1.enter != p2.enter {
161 return int(p2.enter.Id-p1.enter.Id) < 0
166 func setaddrs(bit Bits) {
173 // convert each bit to a variable
177 n = int(vars[i].name)
180 // disable all pieces of that variable
181 for i = 0; i < nvar; i++ {
183 if v.node == node && int(v.name) == n {
190 var regnodes [64]*Node
192 func walkvardef(n *Node, f *Flow, active int) {
197 for f1 = f; f1 != nil; f1 = f1.S1 {
198 if f1.Active == int32(active) {
201 f1.Active = int32(active)
202 if f1.Prog.As == obj.AVARKILL && f1.Prog.To.Node == n {
205 for v, _ = n.Opt().(*Var); v != nil; v = v.nextinnode {
207 biset(&(f1.Data.(*Reg)).act, uint(bn))
210 if f1.Prog.As == obj.ACALL {
215 for f2 := f; f2 != f1; f2 = f2.S1 {
217 walkvardef(n, f2.S2, active)
224 func addmove(r *Flow, bn int, rn int, f int) {
238 a.Etype = uint8(v.etype)
239 a.Type = obj.TYPE_MEM
242 a.Sym = Linksym(v.node.Sym)
245 if(a->etype == TARRAY)
247 else if(a->sym == nil)
248 a->type = TYPE_CONST;
250 p1.As = Thearch.Optoas(OAS, Types[uint8(v.etype)])
252 // TODO(rsc): Remove special case here.
253 if Thearch.LinkArch.InFamily(sys.MIPS64, sys.ARM, sys.ARM64, sys.PPC64) && v.etype == TBOOL {
254 p1.As = Thearch.Optoas(OAS, Types[TUINT8])
256 p1.From.Type = obj.TYPE_REG
257 p1.From.Reg = int16(rn)
258 p1.From.Name = obj.NAME_NONE
262 a.Type = obj.TYPE_REG
266 if Debug['R'] != 0 && Debug['v'] != 0 {
267 fmt.Printf("%v ===add=== %v\n", p, p1)
272 func overlap_reg(o1 int64, w1 int, o2 int64, w2 int) bool {
276 if t1 <= o2 || t2 <= o1 {
283 func mkvar(f *Flow, a *obj.Addr) Bits {
284 // mark registers used
285 if a.Type == obj.TYPE_NONE {
290 r.use1.b[0] |= Thearch.Doregbits(int(a.Index)) // TODO: Use RtoB
295 regu := Thearch.Doregbits(int(a.Reg)) | Thearch.RtoB(int(a.Reg)) // TODO: Use RtoB
303 // TODO(rsc): Remove special case here.
306 if Thearch.LinkArch.InFamily(sys.MIPS64, sys.ARM, sys.ARM64, sys.PPC64) {
309 a.Type = obj.TYPE_MEM
312 a.Type = obj.TYPE_ADDR
321 r.use1.b[0] |= Thearch.RtoB(int(a.Reg))
325 if(r->f.prog->scond & (C_PBIT|C_WBIT))
326 r->set.b[0] |= RtoB(a->reg);
330 // Note: This case handles NAME_EXTERN and NAME_STATIC.
331 // We treat these as requiring eager writes to memory, due to
332 // the possibility of a fault handler looking at them, so there is
333 // not much point in registerizing the loads.
334 // If we later choose the set of candidate variables from a
335 // larger list, these cases could be deprioritized instead of
345 node, _ := a.Node.(*Node)
346 if node == nil || node.Op != ONAME || node.Orig == nil {
350 if node.Orig != node {
351 Fatalf("%v: bad node", Ctxt.Dconv(a))
353 if node.Sym == nil || node.Sym.Name[0] == '.' {
360 Fatalf("bad width %d for %v", w, Ctxt.Dconv(a))
365 for i := 0; i < nvar; i++ {
367 if v.node == node && int(v.name) == n {
370 if int64(v.width) == w {
371 // TODO(rsc): Remove special case for arm here.
372 if flag == 0 || Thearch.LinkArch.Family != sys.ARM {
379 // if they overlap, disable both
380 if overlap_reg(v.offset, v.width, o, int(w)) {
381 // print("disable overlap %s %d %d %d %d, %E != %E\n", s->name, v->offset, v->width, o, w, v->etype, et);
395 if Debug['w'] > 1 && node != nil {
396 Fatalf("variable not optimized: %v", Nconv(node, FmtSharp))
399 Warn("variable not optimized: %v", Nconv(node, FmtSharp))
402 // If we're not tracking a word in a variable, mark the rest as
403 // having its address taken, so that we keep the whole thing
404 // live at all calls. otherwise we might optimize away part of
405 // a variable but not all of it.
407 for i := 0; i < nvar; i++ {
425 v.addr = int8(flag) // funny punning
428 // node->opt is the head of a linked list
429 // of Vars within the given Node, so that
430 // we can start at a Var and find all the other
431 // Vars in the same Go variable.
432 v.nextinnode, _ = node.Opt().(*Var)
437 if n == obj.NAME_EXTERN || n == obj.NAME_STATIC {
438 for z := 0; z < BITS; z++ {
439 externs.b[z] |= bit.b[z]
442 if n == obj.NAME_PARAM {
443 for z := 0; z < BITS; z++ {
444 params.b[z] |= bit.b[z]
448 if node.Class == PPARAM {
449 for z := 0; z < BITS; z++ {
450 ivar.b[z] |= bit.b[z]
453 if node.Class == PPARAMOUT {
454 for z := 0; z < BITS; z++ {
455 ovar.b[z] |= bit.b[z]
459 // Treat values with their address taken as live at calls,
460 // because the garbage collector's liveness analysis in plive.go does.
461 // These must be consistent or else we will elide stores and the garbage
462 // collector will see uninitialized data.
463 // The typical case where our own analysis is out of sync is when the
464 // node appears to have its address taken but that code doesn't actually
465 // get generated and therefore doesn't show up as an address being
466 // taken when we analyze the instruction stream.
467 // One instance of this case is when a closure uses the same name as
468 // an outer variable for one of its own variables declared with :=.
469 // The parser flags the outer variable as possibly shared, and therefore
470 // sets addrtaken, even though it ends up not being actually shared.
471 // If we were better about _ elision, _ = &x would suffice too.
472 // The broader := in a closure problem is mentioned in a comment in
473 // closure.go:/^typecheckclosure and dcl.go:/^oldname.
478 // Disable registerization for globals, because:
479 // (1) we might panic at any time and we want the recovery code
480 // to see the latest values (issue 1304).
481 // (2) we don't know what pointers might point at them and we want
482 // loads via those pointers to see updated values and vice versa (issue 7995).
484 // Disable registerization for results if using defer, because the deferred func
485 // might recover and return, causing the current values to be used.
486 if node.Class == PEXTERN || (hasdefer && node.Class == PPARAMOUT) {
491 fmt.Printf("bit=%2d et=%v w=%d+%d %v %v flag=%d\n", i, et, o, w, Nconv(node, FmtSharp), Ctxt.Dconv(a), v.addr)
500 func prop(f *Flow, ref Bits, cal Bits) {
508 for f1 = f; f1 != nil; f1 = f1.P1 {
510 for z = 0; z < BITS; z++ {
511 ref.b[z] |= r1.refahead.b[z]
512 if ref.b[z] != r1.refahead.b[z] {
513 r1.refahead.b[z] = ref.b[z]
517 cal.b[z] |= r1.calahead.b[z]
518 if cal.b[z] != r1.calahead.b[z] {
519 r1.calahead.b[z] = cal.b[z]
526 if Noreturn(f1.Prog) {
530 // Mark all input variables (ivar) as used, because that's what the
531 // liveness bitmaps say. The liveness bitmaps say that so that a
532 // panic will not show stale values in the parameter dump.
533 // Mark variables with a recent VARDEF (r1->act) as used,
534 // so that the optimizer flushes initializations to memory,
535 // so that if a garbage collection happens during this CALL,
536 // the collector will see initialized memory. Again this is to
537 // match what the liveness bitmaps say.
538 for z = 0; z < BITS; z++ {
539 cal.b[z] |= ref.b[z] | externs.b[z] | ivar.b[z] | r1.act.b[z]
543 // cal.b is the current approximation of what's live across the call.
544 // Every bit in cal.b is a single stack word. For each such word,
545 // find all the other tracked stack words in the same Go variable
546 // (struct/slice/string/interface) and mark them live too.
547 // This is necessary because the liveness analysis for the garbage
548 // collector works at variable granularity, not at word granularity.
549 // It is fundamental for slice/string/interface: the garbage collector
550 // needs the whole value, not just some of the words, in order to
551 // interpret the other bits correctly. Specifically, slice needs a consistent
552 // ptr and cap, string needs a consistent ptr and len, and interface
553 // needs a consistent type word and data word.
554 for z = 0; z < BITS; z++ {
558 for i = 0; i < 64; i++ {
559 if z*64+i >= nvar || (cal.b[z]>>uint(i))&1 == 0 {
563 if v.node.Opt() == nil { // v represents fixed register, not Go variable
567 // v->node->opt is the head of a linked list of Vars
568 // corresponding to tracked words from the Go variable v->node.
569 // Walk the list and set all the bits.
570 // For a large struct this could end up being quadratic:
571 // after the first setting, the outer loop (for z, i) would see a 1 bit
572 // for all of the remaining words in the struct, and for each such
573 // word would go through and turn on all the bits again.
574 // To avoid the quadratic behavior, we only turn on the bits if
575 // v is the head of the list or if the head's bit is not yet turned on.
576 // This will set the bits at most twice, keeping the overall loop linear.
577 v1, _ = v.node.Opt().(*Var)
579 if v == v1 || !btest(&cal, uint(v1.id)) {
580 for ; v1 != nil; v1 = v1.nextinnode {
581 biset(&cal, uint(v1.id))
588 for z = 0; z < BITS; z++ {
594 for z = 0; z < BITS; z++ {
595 cal.b[z] = externs.b[z] | ovar.b[z]
600 for z = 0; z < BITS; z++ {
601 ref.b[z] = ref.b[z]&^r1.set.b[z] | r1.use1.b[z] | r1.use2.b[z]
602 cal.b[z] &^= (r1.set.b[z] | r1.use1.b[z] | r1.use2.b[z])
603 r1.refbehind.b[z] = ref.b[z]
604 r1.calbehind.b[z] = cal.b[z]
615 for ; f != f1; f = f.P1 {
617 for f2 = f.P2; f2 != nil; f2 = f2.P2link {
618 prop(f2, r.refbehind, r.calbehind)
623 func synch(f *Flow, dif Bits) {
627 for f1 := f; f1 != nil; f1 = f1.S1 {
629 for z = 0; z < BITS; z++ {
630 dif.b[z] = dif.b[z]&^(^r1.refbehind.b[z]&r1.refahead.b[z]) | r1.set.b[z] | r1.regdiff.b[z]
631 if dif.b[z] != r1.regdiff.b[z] {
632 r1.regdiff.b[z] = dif.b[z]
641 for z = 0; z < BITS; z++ {
642 dif.b[z] &^= (^r1.calbehind.b[z] & r1.calahead.b[z])
650 func allreg(b uint64, r *Rgn) uint64 {
655 Fatalf("unknown etype %d/%v", Bitno(b), v.etype)
671 i := Thearch.BtoR(^b)
672 if i != 0 && r.cost > 0 {
674 return Thearch.RtoB(i)
677 case TFLOAT32, TFLOAT64:
678 i := Thearch.BtoF(^b)
679 if i != 0 && r.cost > 0 {
681 return Thearch.FtoB(i)
688 func LOAD(r *Reg, z int) uint64 {
689 return ^r.refbehind.b[z] & r.refahead.b[z]
692 func STORE(r *Reg, z int) uint64 {
693 return ^r.calbehind.b[z] & r.calahead.b[z]
698 CLOAD = 5 // cost of load
699 CREF = 5 // cost of reference if not registerized
700 LOOP = 3 // loop execution count (applied in popt.go)
703 func paint1(f *Flow, bn int) {
705 bb := uint64(1 << uint(bn%64))
707 if r.act.b[z]&bb != 0 {
713 if r.refbehind.b[z]&bb == 0 {
721 if r1.refahead.b[z]&bb == 0 {
724 if r1.act.b[z]&bb != 0 {
731 if LOAD(r, z)&^(r.set.b[z]&^(r.use1.b[z]|r.use2.b[z]))&bb != 0 {
732 change -= CLOAD * int(f.Loop)
738 if f.Prog.As != obj.ANOP { // don't give credit for NOPs
739 if r.use1.b[z]&bb != 0 {
740 change += CREF * int(f.Loop)
742 if (r.use2.b[z]|r.set.b[z])&bb != 0 {
743 change += CREF * int(f.Loop)
747 if STORE(r, z)&r.regdiff.b[z]&bb != 0 {
748 change -= CLOAD * int(f.Loop)
751 if r.refbehind.b[z]&bb != 0 {
752 for f1 = f.P2; f1 != nil; f1 = f1.P2link {
753 if (f1.Data.(*Reg)).refahead.b[z]&bb != 0 {
759 if r.refahead.b[z]&bb == 0 {
764 if (f1.Data.(*Reg)).refbehind.b[z]&bb != 0 {
773 if r.act.b[z]&bb != 0 {
776 if r.refbehind.b[z]&bb == 0 {
782 func paint2(f *Flow, bn int, depth int) uint64 {
784 bb := uint64(1 << uint(bn%64))
787 if r.act.b[z]&bb == 0 {
793 if r.refbehind.b[z]&bb == 0 {
801 if r1.refahead.b[z]&bb == 0 {
804 if r1.act.b[z]&bb == 0 {
812 if Debug['R'] != 0 && Debug['v'] != 0 {
813 fmt.Printf(" paint2 %d %v\n", depth, f.Prog)
820 if r.refbehind.b[z]&bb != 0 {
821 for f1 = f.P2; f1 != nil; f1 = f1.P2link {
822 if (f1.Data.(*Reg)).refahead.b[z]&bb != 0 {
823 vreg |= paint2(f1, bn, depth+1)
828 if r.refahead.b[z]&bb == 0 {
833 if (f1.Data.(*Reg)).refbehind.b[z]&bb != 0 {
834 vreg |= paint2(f1, bn, depth+1)
842 if r.act.b[z]&bb == 0 {
845 if r.refbehind.b[z]&bb == 0 {
853 func paint3(f *Flow, bn int, rb uint64, rn int) {
855 bb := uint64(1 << uint(bn%64))
857 if r.act.b[z]&bb != 0 {
863 if r.refbehind.b[z]&bb == 0 {
871 if r1.refahead.b[z]&bb == 0 {
874 if r1.act.b[z]&bb != 0 {
881 if LOAD(r, z)&^(r.set.b[z]&^(r.use1.b[z]|r.use2.b[z]))&bb != 0 {
882 addmove(f, bn, rn, 0)
889 if r.use1.b[z]&bb != 0 {
890 if Debug['R'] != 0 && Debug['v'] != 0 {
894 if Debug['R'] != 0 && Debug['v'] != 0 {
895 fmt.Printf(" ===change== %v\n", p)
899 if (r.use2.b[z]|r.set.b[z])&bb != 0 {
900 if Debug['R'] != 0 && Debug['v'] != 0 {
904 if Debug['R'] != 0 && Debug['v'] != 0 {
905 fmt.Printf(" ===change== %v\n", p)
909 if STORE(r, z)&r.regdiff.b[z]&bb != 0 {
910 addmove(f, bn, rn, 1)
914 if r.refbehind.b[z]&bb != 0 {
915 for f1 = f.P2; f1 != nil; f1 = f1.P2link {
916 if (f1.Data.(*Reg)).refahead.b[z]&bb != 0 {
917 paint3(f1, bn, rb, rn)
922 if r.refahead.b[z]&bb == 0 {
927 if (f1.Data.(*Reg)).refbehind.b[z]&bb != 0 {
928 paint3(f1, bn, rb, rn)
936 if r.act.b[z]&bb != 0 {
939 if r.refbehind.b[z]&bb == 0 {
945 func addreg(a *obj.Addr, rn int) {
949 a.Type = obj.TYPE_REG
956 func dumpone(f *Flow, isreg int) {
957 fmt.Printf("%d:%v", f.Loop, f.Prog)
961 for z := 0; z < BITS; z++ {
962 bit.b[z] = r.set.b[z] | r.use1.b[z] | r.use2.b[z] | r.refbehind.b[z] | r.refahead.b[z] | r.calbehind.b[z] | r.calahead.b[z] | r.regdiff.b[z] | r.act.b[z] | 0
967 fmt.Printf(" s:%v", &r.set)
970 fmt.Printf(" u1:%v", &r.use1)
973 fmt.Printf(" u2:%v", &r.use2)
975 if bany(&r.refbehind) {
976 fmt.Printf(" rb:%v ", &r.refbehind)
978 if bany(&r.refahead) {
979 fmt.Printf(" ra:%v ", &r.refahead)
981 if bany(&r.calbehind) {
982 fmt.Printf(" cb:%v ", &r.calbehind)
984 if bany(&r.calahead) {
985 fmt.Printf(" ca:%v ", &r.calahead)
987 if bany(&r.regdiff) {
988 fmt.Printf(" d:%v ", &r.regdiff)
991 fmt.Printf(" a:%v ", &r.act)
999 func Dumpit(str string, r0 *Flow, isreg int) {
1002 fmt.Printf("\n%s\n", str)
1003 for r := r0; r != nil; r = r.Link {
1007 fmt.Printf("\tpred:")
1008 for ; r1 != nil; r1 = r1.P2link {
1009 fmt.Printf(" %.4d", uint(int(r1.Prog.Pc)))
1012 fmt.Printf(" (and %.4d)", uint(int(r.P1.Prog.Pc)))
1014 fmt.Printf(" (only)")
1019 // Print successors if it's not just the next one
1020 if r.S1 != r.Link || r.S2 != nil {
1021 fmt.Printf("\tsucc:")
1023 fmt.Printf(" %.4d", uint(int(r.S1.Prog.Pc)))
1026 fmt.Printf(" %.4d", uint(int(r.S2.Prog.Pc)))
1033 func regopt(firstp *obj.Prog) {
1036 // control flow is more complicated in generated go code
1037 // than in generated c code. define pseudo-variables for
1038 // registers, so we have complete register usage information.
1040 regnames := Thearch.Regnames(&nreg)
1043 for i := 0; i < nreg; i++ {
1046 for i := 0; i < nreg; i++ {
1047 if regnodes[i] == nil {
1048 regnodes[i] = newname(Lookup(regnames[i]))
1050 vars[i].node = regnodes[i]
1053 regbits = Thearch.Excludedregs()
1062 // build aux data structure
1064 // find use and set of variables
1065 g := Flowstart(firstp, func() interface{} { return new(Reg) })
1067 for i := 0; i < nvar; i++ {
1068 vars[i].node.SetOpt(nil)
1075 for f := firstf; f != nil; f = f.Link {
1077 // AVARLIVE must be considered a use, do not skip it.
1078 // Otherwise the variable will be optimized away,
1079 // and the whole point of AVARLIVE is to keep it on the stack.
1080 if p.As == obj.AVARDEF || p.As == obj.AVARKILL {
1084 // Avoid making variables for direct-called functions.
1085 if p.As == obj.ACALL && p.To.Type == obj.TYPE_MEM && p.To.Name == obj.NAME_EXTERN {
1089 // from vs to doesn't matter for registers.
1091 r.use1.b[0] |= p.Info.Reguse | p.Info.Regindex
1092 r.set.b[0] |= p.Info.Regset
1094 bit := mkvar(f, &p.From)
1096 if p.Info.Flags&LeftAddr != 0 {
1099 if p.Info.Flags&LeftRead != 0 {
1100 for z := 0; z < BITS; z++ {
1101 r.use1.b[z] |= bit.b[z]
1104 if p.Info.Flags&LeftWrite != 0 {
1105 for z := 0; z < BITS; z++ {
1106 r.set.b[z] |= bit.b[z]
1111 // Compute used register for reg
1112 if p.Info.Flags&RegRead != 0 {
1113 r.use1.b[0] |= Thearch.RtoB(int(p.Reg))
1116 // Currently we never generate three register forms.
1117 // If we do, this will need to change.
1118 if p.From3Type() != obj.TYPE_NONE && p.From3Type() != obj.TYPE_CONST {
1119 Fatalf("regopt not implemented for from3")
1122 bit = mkvar(f, &p.To)
1124 if p.Info.Flags&RightAddr != 0 {
1127 if p.Info.Flags&RightRead != 0 {
1128 for z := 0; z < BITS; z++ {
1129 r.use2.b[z] |= bit.b[z]
1132 if p.Info.Flags&RightWrite != 0 {
1133 for z := 0; z < BITS; z++ {
1134 r.set.b[z] |= bit.b[z]
1140 for i := 0; i < nvar; i++ {
1143 bit := blsh(uint(i))
1144 for z := 0; z < BITS; z++ {
1145 addrs.b[z] |= bit.b[z]
1149 if Debug['R'] != 0 && Debug['v'] != 0 {
1150 fmt.Printf("bit=%2d addr=%d et=%v w=%-2d s=%v + %d\n", i, v.addr, v.etype, v.width, v.node, v.offset)
1154 if Debug['R'] != 0 && Debug['v'] != 0 {
1155 Dumpit("pass1", firstf, 1)
1159 // find looping structure
1162 if Debug['R'] != 0 && Debug['v'] != 0 {
1163 Dumpit("pass2", firstf, 1)
1167 // iterate propagating fat vardef covering forward
1168 // r->act records vars with a VARDEF since the last CALL.
1169 // (r->act will be reused in pass 5 for something else,
1170 // but we'll be done with it by then.)
1173 for f := firstf; f != nil; f = f.Link {
1179 for f := firstf; f != nil; f = f.Link {
1181 if p.As == obj.AVARDEF && Isfat(((p.To.Node).(*Node)).Type) && ((p.To.Node).(*Node)).Opt() != nil {
1183 walkvardef(p.To.Node.(*Node), f, active)
1188 // iterate propagating usage
1189 // back until flow graph is complete
1196 for f = firstf; f != nil; f = f.Link {
1199 for f = firstf; f != nil; f = f.Link {
1200 if f.Prog.As == obj.ARET {
1201 prop(f, zbits, zbits)
1205 // pick up unreachable code
1209 for f = firstf; f != nil; f = f1 {
1211 if f1 != nil && f1.Active != 0 && f.Active == 0 {
1212 prop(f, zbits, zbits)
1224 if Debug['R'] != 0 && Debug['v'] != 0 {
1225 Dumpit("pass3", firstf, 1)
1229 // iterate propagating register/variable synchrony
1230 // forward until graph is complete
1234 for f = firstf; f != nil; f = f.Link {
1237 synch(firstf, zbits)
1242 if Debug['R'] != 0 && Debug['v'] != 0 {
1243 Dumpit("pass4", firstf, 1)
1247 // move register pseudo-variables into regu.
1248 mask := uint64((1 << uint(nreg)) - 1)
1249 for f := firstf; f != nil; f = f.Link {
1251 r.regu = (r.refbehind.b[0] | r.set.b[0]) & mask
1253 r.use1.b[0] &^= mask
1254 r.use2.b[0] &^= mask
1255 r.refbehind.b[0] &^= mask
1256 r.refahead.b[0] &^= mask
1257 r.calbehind.b[0] &^= mask
1258 r.calahead.b[0] &^= mask
1259 r.regdiff.b[0] &^= mask
1263 if Debug['R'] != 0 && Debug['v'] != 0 {
1264 Dumpit("pass4.5", firstf, 1)
1269 // calculate costs (paint1)
1271 if f := firstf; f != nil {
1273 for z := 0; z < BITS; z++ {
1274 bit.b[z] = (r.refahead.b[z] | r.calahead.b[z]) &^ (externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z])
1276 if bany(&bit) && !f.Refset {
1277 // should never happen - all variables are preset
1278 if Debug['w'] != 0 {
1279 fmt.Printf("%v: used and not set: %v\n", f.Prog.Line(), &bit)
1285 for f := firstf; f != nil; f = f.Link {
1286 (f.Data.(*Reg)).act = zbits
1290 for f := firstf; f != nil; f = f.Link {
1292 for z := 0; z < BITS; z++ {
1293 bit.b[z] = r.set.b[z] &^ (r.refahead.b[z] | r.calahead.b[z] | addrs.b[z])
1295 if bany(&bit) && !f.Refset {
1296 if Debug['w'] != 0 {
1297 fmt.Printf("%v: set and not used: %v\n", f.Prog.Line(), &bit)
1303 for z := 0; z < BITS; z++ {
1304 bit.b[z] = LOAD(r, z) &^ (r.act.b[z] | addrs.b[z])
1310 biclr(&bit, uint(i))
1314 if nregion >= MaxRgn {
1319 region = append(region, Rgn{
1321 cost: int16(change),
1328 if false && Debug['v'] != 0 && strings.Contains(Curfn.Func.Nname.Sym.Name, "Parse") {
1329 Warn("regions: %d\n", nregion)
1331 if nregion >= MaxRgn {
1332 if Debug['v'] != 0 {
1333 Warn("too many regions: %d\n", nregion)
1338 sort.Sort(rcmp(region[:nregion]))
1340 if Debug['R'] != 0 && Debug['v'] != 0 {
1341 Dumpit("pass5", firstf, 1)
1345 // determine used registers (paint2)
1346 // replace code (paint3)
1347 if Debug['R'] != 0 && Debug['v'] != 0 {
1348 fmt.Printf("\nregisterizing\n")
1350 for i := 0; i < nregion; i++ {
1352 if Debug['R'] != 0 && Debug['v'] != 0 {
1353 fmt.Printf("region %d: cost %d varno %d enter %d\n", i, rgp.cost, rgp.varno, rgp.enter.Prog.Pc)
1355 bit = blsh(uint(rgp.varno))
1356 usedreg := paint2(rgp.enter, int(rgp.varno), 0)
1357 vreg := allreg(usedreg, rgp)
1359 if Debug['R'] != 0 && Debug['v'] != 0 {
1360 v := &vars[rgp.varno]
1361 fmt.Printf("registerize %v+%d (bit=%2d et=%v) in %v usedreg=%#x vreg=%#x\n", v.node, v.offset, rgp.varno, v.etype, obj.Rconv(int(rgp.regno)), usedreg, vreg)
1364 paint3(rgp.enter, int(rgp.varno), vreg, int(rgp.regno))
1368 // free aux structures. peep allocates new ones.
1369 for i := 0; i < nvar; i++ {
1370 vars[i].node.SetOpt(nil)
1375 if Debug['R'] != 0 && Debug['v'] != 0 {
1376 // Rebuild flow graph, since we inserted instructions
1377 g := Flowstart(firstp, nil)
1379 Dumpit("pass6", firstf, 0)
1385 // peep-hole on basic block
1386 if Debug['R'] == 0 || Debug['P'] != 0 {
1387 Thearch.Peep(firstp)
1391 for p := firstp; p != nil; p = p.Link {
1392 for p.Link != nil && p.Link.As == obj.ANOP {
1393 p.Link = p.Link.Link
1395 if p.To.Type == obj.TYPE_BRANCH {
1396 for p.To.Val.(*obj.Prog) != nil && p.To.Val.(*obj.Prog).As == obj.ANOP {
1397 p.To.Val = p.To.Val.(*obj.Prog).Link
1402 if Debug['R'] != 0 {
1403 if Ostats.Ncvtreg != 0 || Ostats.Nspill != 0 || Ostats.Nreload != 0 || Ostats.Ndelmov != 0 || Ostats.Nvar != 0 || Ostats.Naddr != 0 || false {
1404 fmt.Printf("\nstats\n")
1407 if Ostats.Ncvtreg != 0 {
1408 fmt.Printf("\t%4d cvtreg\n", Ostats.Ncvtreg)
1410 if Ostats.Nspill != 0 {
1411 fmt.Printf("\t%4d spill\n", Ostats.Nspill)
1413 if Ostats.Nreload != 0 {
1414 fmt.Printf("\t%4d reload\n", Ostats.Nreload)
1416 if Ostats.Ndelmov != 0 {
1417 fmt.Printf("\t%4d delmov\n", Ostats.Ndelmov)
1419 if Ostats.Nvar != 0 {
1420 fmt.Printf("\t%4d var\n", Ostats.Nvar)
1422 if Ostats.Naddr != 0 {
1423 fmt.Printf("\t%4d addr\n", Ostats.Naddr)
1430 // bany reports whether any bits in a are set.
1431 func bany(a *Bits) bool {
1432 for _, x := range &a.b { // & to avoid making a copy of a.b
1440 // bnum reports the lowest index of a 1 bit in a.
1441 func bnum(a *Bits) int {
1442 for i, x := range &a.b { // & to avoid making a copy of a.b
1444 return 64*i + Bitno(x)
1448 Fatalf("bad in bnum")
1452 // blsh returns a Bits with 1 at index n, 0 elsewhere (1<<n).
1453 func blsh(n uint) Bits {
1455 c.b[n/64] = 1 << (n % 64)
1459 // btest reports whether bit n is 1.
1460 func btest(a *Bits, n uint) bool {
1461 return a.b[n/64]&(1<<(n%64)) != 0
1464 // biset sets bit n to 1.
1465 func biset(a *Bits, n uint) {
1466 a.b[n/64] |= 1 << (n % 64)
1469 // biclr sets bit n to 0.
1470 func biclr(a *Bits, n uint) {
1471 a.b[n/64] &^= (1 << (n % 64))
1474 // Bitno reports the lowest index of a 1 bit in b.
1475 // It calls Fatalf if there is no 1 bit.
1476 func Bitno(b uint64) int {
1478 Fatalf("bad in bitno")
1481 if b&(1<<32-1) == 0 {
1485 if b&(1<<16-1) == 0 {
1489 if b&(1<<8-1) == 0 {
1493 if b&(1<<4-1) == 0 {
1497 if b&(1<<2-1) == 0 {
1507 // String returns a space-separated list of the variables represented by bits.
1508 func (bits Bits) String() string {
1509 // Note: This method takes a value receiver, both for convenience
1510 // and to make it safe to modify the bits as we process them.
1511 // Even so, most prints above use &bits, because then the value
1512 // being stored in the interface{} is a pointer and does not require
1513 // an allocation and copy to create the interface{}.
1514 var buf bytes.Buffer
1518 buf.WriteString(sep)
1521 if v.node == nil || v.node.Sym == nil {
1522 fmt.Fprintf(&buf, "$%d", i)
1524 fmt.Fprintf(&buf, "%s(%d)", v.node.Sym.Name, i)
1526 fmt.Fprintf(&buf, "%+d", v.offset)
1529 biclr(&bits, uint(i))