1 // Inferno utils/5c/reg.c
2 // http://code.google.com/p/inferno-os/source/browse/utils/5c/reg.c
4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
5 // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
6 // Portions Copyright © 1997-1999 Vita Nuova Limited
7 // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
8 // Portions Copyright © 2004,2006 Bruce Ellis
9 // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
10 // Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
11 // Portions Copyright © 2009 The Go Authors. All rights reserved.
13 // Permission is hereby granted, free of charge, to any person obtaining a copy
14 // of this software and associated documentation files (the "Software"), to deal
15 // in the Software without restriction, including without limitation the rights
16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17 // copies of the Software, and to permit persons to whom the Software is
18 // furnished to do so, subject to the following conditions:
20 // The above copyright notice and this permission notice shall be included in
21 // all copies or substantial portions of the Software.
23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
38 #define REGBITS ((uint64)0xffffffffull)
50 rcmp(const void *a1, const void *a2)
61 return p2->varno - p1->varno;
71 p->scond = zprog.scond;
85 // convert each bit to a variable
91 // disable all pieces of that variable
92 for(i=0; i<nvar; i++) {
94 if(v->node == node && v->name == n)
100 static char* regname[] = {
135 static Node* regnodes[NREGVAR];
137 static void walkvardef(Node *n, Reg *r, int active);
151 fmtinstall('Q', Qconv);
158 * control flow is more complicated in generated go code
159 * than in generated c code. define pseudo-variables for
160 * registers, so we have complete register usage information.
163 memset(var, 0, NREGVAR*sizeof var[0]);
164 for(i=0; i<NREGVAR; i++) {
166 regnodes[i] = newname(lookup(regname[i]));
167 var[i].node = regnodes[i];
170 regbits = RtoB(REGSP)|RtoB(REGLINK)|RtoB(REGPC);
171 for(z=0; z<BITS; z++) {
182 * build aux data structure
184 * find use and set of variables
186 g = flowstart(firstp, sizeof(Reg));
188 for(i=0; i<nvar; i++)
189 var[i].node->opt = nil;
193 firstr = (Reg*)g->start;
195 for(r = firstr; r != R; r = (Reg*)r->f.link) {
197 if(p->as == AVARDEF || p->as == AVARKILL)
201 // Avoid making variables for direct-called functions.
202 if(p->as == ABL && p->to.name == D_EXTERN)
205 bit = mkvar(r, &p->from);
206 if(info.flags & LeftRead)
207 for(z=0; z<BITS; z++)
208 r->use1.b[z] |= bit.b[z];
209 if(info.flags & LeftAddr)
212 if(info.flags & RegRead) {
213 if(p->from.type != D_FREG)
214 r->use1.b[0] |= RtoB(p->reg);
216 r->use1.b[0] |= FtoB(p->reg);
219 if(info.flags & (RightAddr | RightRead | RightWrite)) {
220 bit = mkvar(r, &p->to);
221 if(info.flags & RightAddr)
223 if(info.flags & RightRead)
224 for(z=0; z<BITS; z++)
225 r->use2.b[z] |= bit.b[z];
226 if(info.flags & RightWrite)
227 for(z=0; z<BITS; z++)
228 r->set.b[z] |= bit.b[z];
231 /* the mod/div runtime routines smash R12 */
232 if(p->as == ADIV || p->as == ADIVU || p->as == AMOD || p->as == AMODU)
233 r->set.b[z] |= RtoB(12);
238 for(i=0; i<nvar; i++) {
242 for(z=0; z<BITS; z++)
243 addrs.b[z] |= bit.b[z];
246 if(debug['R'] && debug['v'])
247 print("bit=%2d addr=%d et=%-6E w=%-2d s=%N + %lld\n",
248 i, v->addr, v->etype, v->width, v->node, v->offset);
251 if(debug['R'] && debug['v'])
252 dumpit("pass1", &firstr->f, 1);
256 * find looping structure
260 if(debug['R'] && debug['v'])
261 dumpit("pass2", &firstr->f, 1);
265 * iterate propagating fat vardef covering forward
266 * r->act records vars with a VARDEF since the last CALL.
267 * (r->act will be reused in pass 5 for something else,
268 * but we'll be done with it by then.)
271 for(r = firstr; r != R; r = (Reg*)r->f.link) {
275 for(r = firstr; r != R; r = (Reg*)r->f.link) {
277 if(p->as == AVARDEF && isfat(p->to.node->type) && p->to.node->opt != nil) {
279 walkvardef(p->to.node, r, active);
285 * iterate propagating usage
286 * back until flow graph is complete
290 for(r = firstr; r != R; r = (Reg*)r->f.link)
292 for(r = firstr; r != R; r = (Reg*)r->f.link)
293 if(r->f.prog->as == ARET)
294 prop(r, zbits, zbits);
296 /* pick up unreachable code */
298 for(r = firstr; r != R; r = r1) {
299 r1 = (Reg*)r->f.link;
300 if(r1 && r1->f.active && !r->f.active) {
301 prop(r, zbits, zbits);
310 if(debug['R'] && debug['v'])
311 dumpit("pass3", &firstr->f, 1);
316 * iterate propagating register/variable synchrony
317 * forward until graph is complete
321 for(r = firstr; r != R; r = (Reg*)r->f.link)
323 synch(firstr, zbits);
329 if(debug['R'] && debug['v'])
330 dumpit("pass4", &firstr->f, 1);
333 print("\nprop structure:\n");
334 for(r = firstr; r != R; r = (Reg*)r->f.link) {
335 print("%d:%P", r->f.loop, r->f.prog);
336 for(z=0; z<BITS; z++) {
337 bit.b[z] = r->set.b[z] |
338 r->refahead.b[z] | r->calahead.b[z] |
339 r->refbehind.b[z] | r->calbehind.b[z] |
340 r->use1.b[z] | r->use2.b[z];
341 bit.b[z] &= ~addrs.b[z];
347 print(" u1=%Q", r->use1);
349 print(" u2=%Q", r->use2);
351 print(" st=%Q", r->set);
352 if(bany(&r->refahead))
353 print(" ra=%Q", r->refahead);
354 if(bany(&r->calahead))
355 print(" ca=%Q", r->calahead);
356 if(bany(&r->refbehind))
357 print(" rb=%Q", r->refbehind);
358 if(bany(&r->calbehind))
359 print(" cb=%Q", r->calbehind);
367 * move register pseudo-variables into regu.
369 for(r = firstr; r != R; r = (Reg*)r->f.link) {
370 r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS;
372 r->set.b[0] &= ~REGBITS;
373 r->use1.b[0] &= ~REGBITS;
374 r->use2.b[0] &= ~REGBITS;
375 r->refbehind.b[0] &= ~REGBITS;
376 r->refahead.b[0] &= ~REGBITS;
377 r->calbehind.b[0] &= ~REGBITS;
378 r->calahead.b[0] &= ~REGBITS;
379 r->regdiff.b[0] &= ~REGBITS;
380 r->act.b[0] &= ~REGBITS;
383 if(debug['R'] && debug['v'])
384 dumpit("pass4.5", &firstr->f, 1);
389 * calculate costs (paint1)
393 for(z=0; z<BITS; z++)
394 bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
395 ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
396 if(bany(&bit) && !r->f.refset) {
397 // should never happen - all variables are preset
399 print("%L: used and not set: %Q\n", r->f.prog->lineno, bit);
404 for(r = firstr; r != R; r = (Reg*)r->f.link)
408 for(r = firstr; r != R; r = (Reg*)r->f.link) {
409 for(z=0; z<BITS; z++)
410 bit.b[z] = r->set.b[z] &
411 ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
412 if(bany(&bit) && !r->f.refset) {
414 print("%L: set and not used: %Q\n", r->f.prog->lineno, bit);
418 for(z=0; z<BITS; z++)
419 bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
431 print("%L $%d: %Q\n",
432 r->f.prog->lineno, change, blsh(i));
437 if(nregion >= NRGN) {
439 print("too many regions\n");
446 qsort(region, nregion, sizeof(region[0]), rcmp);
448 if(debug['R'] && debug['v'])
449 dumpit("pass5", &firstr->f, 1);
453 * determine used registers (paint2)
454 * replace code (paint3)
457 if(debug['R'] && debug['v'])
458 print("\nregisterizing\n");
459 for(i=0; i<nregion; i++) {
460 if(debug['R'] && debug['v'])
461 print("region %d: cost %d varno %d enter %d\n", i, rgp->cost, rgp->varno, rgp->enter->f.prog->pc);
462 bit = blsh(rgp->varno);
463 vreg = paint2(rgp->enter, rgp->varno, 0);
464 vreg = allreg(vreg, rgp);
466 if(rgp->regno >= NREG)
467 print("%L $%d F%d: %Q\n",
468 rgp->enter->f.prog->lineno,
473 print("%L $%d R%d: %Q\n",
474 rgp->enter->f.prog->lineno,
480 paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
485 * free aux structures. peep allocates new ones.
487 for(i=0; i<nvar; i++)
488 var[i].node->opt = nil;
492 if(debug['R'] && debug['v']) {
493 // Rebuild flow graph, since we inserted instructions
494 g = flowstart(firstp, sizeof(Reg));
495 firstr = (Reg*)g->start;
496 dumpit("pass6", &firstr->f, 1);
503 * peep-hole on basic block
505 if(!debug['R'] || debug['P']) {
509 if(debug['R'] && debug['v'])
510 dumpit("pass7", &firstr->f, 1);
515 * free aux structures
516 * adjust the stack pointer
517 * MOVW.W R1,-12(R13) <<- start
522 * BL ,runtime.newproc+0(SB)
523 * MOVW &ft+-32(SP),R7 <<- adjust
524 * MOVW &j+-40(SP),R6 <<- adjust
525 * MOVW autotmp_0003+-24(SP),R5 <<- adjust
526 * MOVW $12(R13),R13 <<- finish
529 for(p = firstp; p != P; p = p->link) {
530 while(p->link != P && p->link->as == ANOP)
531 p->link = p->link->link;
532 if(p->to.type == D_BRANCH)
533 while(p->to.u.branch != P && p->to.u.branch->as == ANOP)
534 p->to.u.branch = p->to.u.branch->link;
535 if(p->as == AMOVW && p->to.reg == 13) {
536 if(p->scond & C_WBIT) {
537 vreg = -p->to.offset; // in adjust region
538 // print("%P adjusting %d\n", p, vreg);
541 if(p->from.type == D_CONST && p->to.type == D_REG) {
542 if(p->from.offset != vreg)
543 print("in and out different\n");
544 // print("%P finish %d\n", p, vreg);
545 vreg = 0; // done adjust region
549 // print("%P %d %d from type\n", p, p->from.type, D_CONST);
550 // print("%P %d %d to type\n\n", p, p->to.type, D_REG);
553 if(p->as == AMOVW && vreg != 0) {
554 if(p->from.sym != nil)
555 if(p->from.name == D_AUTO || p->from.name == D_PARAM) {
556 p->from.offset += vreg;
557 // print("%P adjusting from %d %d\n", p, vreg, p->from.type);
560 if(p->to.name == D_AUTO || p->to.name == D_PARAM) {
561 p->to.offset += vreg;
562 // print("%P adjusting to %d %d\n", p, vreg, p->from.type);
569 walkvardef(Node *n, Reg *r, int active)
575 for(r1=r; r1!=R; r1=(Reg*)r1->f.s1) {
576 if(r1->f.active == active)
578 r1->f.active = active;
579 if(r1->f.prog->as == AVARKILL && r1->f.prog->to.node == n)
581 for(v=n->opt; v!=nil; v=v->nextinnode) {
585 if(r1->f.prog->as == ABL)
589 for(r2=r; r2!=r1; r2=(Reg*)r2->f.s1)
591 walkvardef(n, (Reg*)r2->f.s2, active);
601 for(r = firstr; r != R; r = (Reg*)r->f.link) {
604 if(r->f.prog->as == ABL)
606 if(r->f.prog->as == ADUFFZERO)
608 if(r->f.prog->as == ADUFFCOPY)
610 for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) {
613 for(z=0; z<BITS; z++)
614 bit.b[z] = r1->calbehind.b[z] &
615 (r->refahead.b[z] | r->use1.b[z] | r->use2.b[z]) &
616 ~(r->calahead.b[z] & addrs.b[z]);
630 addmove(Reg *r, int bn, int rn, int f)
636 p1 = mal(sizeof(*p1));
640 // If there's a stack fixup coming (after BL newproc or BL deferproc),
641 // delay the load until after the fixup.
643 if(p2 && p2->as == AMOVW && p2->from.type == D_CONST && p2->from.reg == REGSP && p2->to.reg == REGSP && p2->to.type == D_REG)
648 p1->lineno = p->lineno;
655 a->sym = linksym(v->node->sym);
656 a->offset = v->offset;
659 if(a->etype == TARRAY || a->sym == nil)
663 fatal("addmove: shouldn't be doing this %A\n", a);
667 print("What is this %E\n", v->etype);
674 //print("movbu %E %d %S\n", v->etype, bn, v->sym);
696 p1->from.type = D_REG;
699 p1->from.type = D_FREG;
700 p1->from.reg = rn-NREG;
711 if(v->etype == TUINT8 || v->etype == TBOOL)
713 if(v->etype == TUINT16)
717 print("%P\t.a%P\n", p, p1);
721 overlap(int32 o1, int w1, int32 o2, int w2)
728 if(!(t1 > o2 && t2 > o1))
735 mkvar(Reg *r, Adr *a)
738 int i, t, n, et, z, w, flag;
743 // mark registers used
749 print("type %d %d %D\n", t, a->name, a);
761 if(a->offset != NREG)
762 bit.b[0] |= RtoB(a->offset);
764 bit.b[0] |= RtoB(a->reg);
772 bit.b[0] = RtoB(a->reg);
779 if(a == &r->f.prog->from)
780 r->use1.b[0] |= RtoB(a->reg);
782 r->use2.b[0] |= RtoB(a->reg);
783 if(r->f.prog->scond & (C_PBIT|C_WBIT))
784 r->set.b[0] |= RtoB(a->reg);
791 bit.b[0] = FtoB(a->reg);
810 if(node == N || node->op != ONAME || node->orig == N)
813 if(node->orig != node)
814 fatal("%D: bad node", a);
815 if(node->sym == S || node->sym->name[0] == '.')
821 fatal("bad width %d for %D", w, a);
823 for(i=0; i<nvar; i++) {
825 if(v->node == node && v->name == n) {
832 // if they overlap, disable both
833 if(overlap(v->offset, v->width, o, w)) {
847 if(debug['w'] > 1 && node)
848 fatal("variable not optimized: %D", a);
850 // If we're not tracking a word in a variable, mark the rest as
851 // having its address taken, so that we keep the whole thing
852 // live at all calls. otherwise we might optimize away part of
853 // a variable but not all of it.
854 for(i=0; i<nvar; i++) {
864 //print("var %d %E %D %S\n", i, et, a, s);
870 v->addr = flag; // funny punning
873 // node->opt is the head of a linked list
874 // of Vars within the given Node, so that
875 // we can start at a Var and find all the other
876 // Vars in the same Go variable.
877 v->nextinnode = node->opt;
881 if(n == D_EXTERN || n == D_STATIC)
882 for(z=0; z<BITS; z++)
883 externs.b[z] |= bit.b[z];
885 for(z=0; z<BITS; z++)
886 params.b[z] |= bit.b[z];
888 if(node->class == PPARAM)
889 for(z=0; z<BITS; z++)
890 ivar.b[z] |= bit.b[z];
891 if(node->class == PPARAMOUT)
892 for(z=0; z<BITS; z++)
893 ovar.b[z] |= bit.b[z];
895 // Treat values with their address taken as live at calls,
896 // because the garbage collector's liveness analysis in ../gc/plive.c does.
897 // These must be consistent or else we will elide stores and the garbage
898 // collector will see uninitialized data.
899 // The typical case where our own analysis is out of sync is when the
900 // node appears to have its address taken but that code doesn't actually
901 // get generated and therefore doesn't show up as an address being
902 // taken when we analyze the instruction stream.
903 // One instance of this case is when a closure uses the same name as
904 // an outer variable for one of its own variables declared with :=.
905 // The parser flags the outer variable as possibly shared, and therefore
906 // sets addrtaken, even though it ends up not being actually shared.
907 // If we were better about _ elision, _ = &x would suffice too.
908 // The broader := in a closure problem is mentioned in a comment in
909 // closure.c:/^typecheckclosure and dcl.c:/^oldname.
913 // Disable registerization for globals, because:
914 // (1) we might panic at any time and we want the recovery code
915 // to see the latest values (issue 1304).
916 // (2) we don't know what pointers might point at them and we want
917 // loads via those pointers to see updated values and vice versa (issue 7995).
919 // Disable registerization for results if using defer, because the deferred func
920 // might recover and return, causing the current values to be used.
921 if(node->class == PEXTERN || (hasdefer && node->class == PPARAMOUT))
925 print("bit=%2d et=%2E w=%d+%d %#N %D flag=%d\n", i, et, o, w, node, a, v->addr);
934 prop(Reg *r, Bits ref, Bits cal)
940 for(r1 = r; r1 != R; r1 = (Reg*)r1->f.p1) {
941 for(z=0; z<BITS; z++) {
942 ref.b[z] |= r1->refahead.b[z];
943 if(ref.b[z] != r1->refahead.b[z]) {
944 r1->refahead.b[z] = ref.b[z];
947 cal.b[z] |= r1->calahead.b[z];
948 if(cal.b[z] != r1->calahead.b[z]) {
949 r1->calahead.b[z] = cal.b[z];
953 switch(r1->f.prog->as) {
955 if(noreturn(r1->f.prog))
958 // Mark all input variables (ivar) as used, because that's what the
959 // liveness bitmaps say. The liveness bitmaps say that so that a
960 // panic will not show stale values in the parameter dump.
961 // Mark variables with a recent VARDEF (r1->act) as used,
962 // so that the optimizer flushes initializations to memory,
963 // so that if a garbage collection happens during this CALL,
964 // the collector will see initialized memory. Again this is to
965 // match what the liveness bitmaps say.
966 for(z=0; z<BITS; z++) {
967 cal.b[z] |= ref.b[z] | externs.b[z] | ivar.b[z] | r1->act.b[z];
971 // cal.b is the current approximation of what's live across the call.
972 // Every bit in cal.b is a single stack word. For each such word,
973 // find all the other tracked stack words in the same Go variable
974 // (struct/slice/string/interface) and mark them live too.
975 // This is necessary because the liveness analysis for the garbage
976 // collector works at variable granularity, not at word granularity.
977 // It is fundamental for slice/string/interface: the garbage collector
978 // needs the whole value, not just some of the words, in order to
979 // interpret the other bits correctly. Specifically, slice needs a consistent
980 // ptr and cap, string needs a consistent ptr and len, and interface
981 // needs a consistent type word and data word.
982 for(z=0; z<BITS; z++) {
985 for(i=0; i<64; i++) {
986 if(z*64+i >= nvar || ((cal.b[z]>>i)&1) == 0)
989 if(v->node->opt == nil) // v represents fixed register, not Go variable
992 // v->node->opt is the head of a linked list of Vars
993 // corresponding to tracked words from the Go variable v->node.
994 // Walk the list and set all the bits.
995 // For a large struct this could end up being quadratic:
996 // after the first setting, the outer loop (for z, i) would see a 1 bit
997 // for all of the remaining words in the struct, and for each such
998 // word would go through and turn on all the bits again.
999 // To avoid the quadratic behavior, we only turn on the bits if
1000 // v is the head of the list or if the head's bit is not yet turned on.
1001 // This will set the bits at most twice, keeping the overall loop linear.
1004 if(v == v1 || !btest(&cal, j)) {
1005 for(; v1 != nil; v1 = v1->nextinnode) {
1015 for(z=0; z<BITS; z++) {
1022 for(z=0; z<BITS; z++) {
1023 cal.b[z] = externs.b[z] | ovar.b[z];
1028 for(z=0; z<BITS; z++) {
1029 ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
1030 r1->use1.b[z] | r1->use2.b[z];
1031 cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
1032 r1->refbehind.b[z] = ref.b[z];
1033 r1->calbehind.b[z] = cal.b[z];
1039 for(; r != r1; r = (Reg*)r->f.p1)
1040 for(r2 = (Reg*)r->f.p2; r2 != R; r2 = (Reg*)r2->f.p2link)
1041 prop(r2, r->refbehind, r->calbehind);
1045 synch(Reg *r, Bits dif)
1050 for(r1 = r; r1 != R; r1 = (Reg*)r1->f.s1) {
1051 for(z=0; z<BITS; z++) {
1052 dif.b[z] = (dif.b[z] &
1053 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
1054 r1->set.b[z] | r1->regdiff.b[z];
1055 if(dif.b[z] != r1->regdiff.b[z]) {
1056 r1->regdiff.b[z] = dif.b[z];
1063 for(z=0; z<BITS; z++)
1064 dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
1066 synch((Reg*)r1->f.s2, dif);
1071 allreg(uint32 b, Rgn *r)
1081 fatal("unknown etype %d/%E", bitno(b), v->etype);
1096 if(i && r->cost >= 0) {
1105 if(i && r->cost >= 0) {
1123 paint1(Reg *r, int bn)
1132 if(r->act.b[z] & bb)
1135 if(!(r->refbehind.b[z] & bb))
1140 if(!(r1->refahead.b[z] & bb))
1142 if(r1->act.b[z] & bb)
1147 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) {
1148 change -= CLOAD * r->f.loop;
1150 print("%d%P\td %Q $%d\n", r->f.loop,
1151 r->f.prog, blsh(bn), change);
1158 if(r->f.prog->as != ANOP) { // don't give credit for NOPs
1159 if(r->use1.b[z] & bb) {
1160 change += CREF * r->f.loop;
1162 print("%d%P\tu1 %Q $%d\n", r->f.loop,
1163 p, blsh(bn), change);
1165 if((r->use2.b[z]|r->set.b[z]) & bb) {
1166 change += CREF * r->f.loop;
1168 print("%d%P\tu2 %Q $%d\n", r->f.loop,
1169 p, blsh(bn), change);
1173 if(STORE(r) & r->regdiff.b[z] & bb) {
1174 change -= CLOAD * r->f.loop;
1176 print("%d%P\tst %Q $%d\n", r->f.loop,
1177 p, blsh(bn), change);
1180 if(r->refbehind.b[z] & bb)
1181 for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
1182 if(r1->refahead.b[z] & bb)
1185 if(!(r->refahead.b[z] & bb))
1189 if(r1->refbehind.b[z] & bb)
1194 if(r->act.b[z] & bb)
1196 if(!(r->refbehind.b[z] & bb))
1202 paint2(Reg *r, int bn, int depth)
1209 bb = 1LL << (bn%64);
1211 if(!(r->act.b[z] & bb))
1214 if(!(r->refbehind.b[z] & bb))
1219 if(!(r1->refahead.b[z] & bb))
1221 if(!(r1->act.b[z] & bb))
1226 if(debug['R'] && debug['v'])
1227 print(" paint2 %d %P\n", depth, r->f.prog);
1233 if(r->refbehind.b[z] & bb)
1234 for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
1235 if(r1->refahead.b[z] & bb)
1236 vreg |= paint2(r1, bn, depth+1);
1238 if(!(r->refahead.b[z] & bb))
1242 if(r1->refbehind.b[z] & bb)
1243 vreg |= paint2(r1, bn, depth+1);
1247 if(!(r->act.b[z] & bb))
1249 if(!(r->refbehind.b[z] & bb))
1256 paint3(Reg *r, int bn, uint32 rb, int rn)
1264 bb = 1LL << (bn%64);
1265 if(r->act.b[z] & bb)
1268 if(!(r->refbehind.b[z] & bb))
1273 if(!(r1->refahead.b[z] & bb))
1275 if(r1->act.b[z] & bb)
1280 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
1281 addmove(r, bn, rn, 0);
1287 if(r->use1.b[z] & bb) {
1290 addreg(&p->from, rn);
1292 print("\t.c%P\n", p);
1294 if((r->use2.b[z]|r->set.b[z]) & bb) {
1299 print("\t.c%P\n", p);
1302 if(STORE(r) & r->regdiff.b[z] & bb)
1303 addmove(r, bn, rn, 1);
1306 if(r->refbehind.b[z] & bb)
1307 for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
1308 if(r1->refahead.b[z] & bb)
1309 paint3(r1, bn, rb, rn);
1311 if(!(r->refahead.b[z] & bb))
1315 if(r1->refbehind.b[z] & bb)
1316 paint3(r1, bn, rb, rn);
1320 if(r->act.b[z] & bb)
1322 if(!(r->refbehind.b[z] & bb))
1328 addreg(Adr *a, int rn)
1352 if(r >= REGTMP-2 && r != 12) // excluded R9 and R10 for m and g, but not R12
1360 // TODO Allow R0 and R1, but be careful with a 0 return
1361 // TODO Allow R9. Only R10 is reserved now (just g, not m).
1362 b &= 0x11fcL; // excluded R9 and R10 for m and g, but not R12
1379 if(f < 2 || f > NFREG-1)
1381 return 1L << (f + 16);
1391 return bitno(b) - 16;
1395 dumpone(Flow *f, int isreg)
1401 print("%d:%P", f->loop, f->prog);
1404 for(z=0; z<BITS; z++)
1419 print(" s:%Q", r->set);
1421 print(" u1:%Q", r->use1);
1423 print(" u2:%Q", r->use2);
1424 if(bany(&r->refbehind))
1425 print(" rb:%Q ", r->refbehind);
1426 if(bany(&r->refahead))
1427 print(" ra:%Q ", r->refahead);
1428 if(bany(&r->calbehind))
1429 print(" cb:%Q ", r->calbehind);
1430 if(bany(&r->calahead))
1431 print(" ca:%Q ", r->calahead);
1432 if(bany(&r->regdiff))
1433 print(" d:%Q ", r->regdiff);
1435 print(" a:%Q ", r->act);
1442 dumpit(char *str, Flow *r0, int isreg)
1446 print("\n%s\n", str);
1447 for(r = r0; r != nil; r = r->link) {
1452 for(; r1 != nil; r1 = r1->p2link)
1453 print(" %.4ud", (int)r1->prog->pc);
1455 print(" (and %.4ud)", (int)r->p1->prog->pc);
1460 // Print successors if it's not just the next one
1461 if(r->s1 != r->link || r->s2 != nil) {
1464 print(" %.4ud", (int)r->s1->prog->pc);
1466 print(" %.4ud", (int)r->s2->prog->pc);