src/cmd/5g/reg.c

   1 // Inferno utils/5c/reg.c
   2 // http://code.google.com/p/inferno-os/source/browse/utils/5c/reg.c
   3 //
   4 //      Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
   5 //      Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
   6 //      Portions Copyright © 1997-1999 Vita Nuova Limited
   7 //      Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
   8 //      Portions Copyright © 2004,2006 Bruce Ellis
   9 //      Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
  10 //      Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
  11 //      Portions Copyright © 2009 The Go Authors.  All rights reserved.
  12 //
  13 // Permission is hereby granted, free of charge, to any person obtaining a copy
  14 // of this software and associated documentation files (the "Software"), to deal
  15 // in the Software without restriction, including without limitation the rights
  16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  17 // copies of the Software, and to permit persons to whom the Software is
  18 // furnished to do so, subject to the following conditions:
  19 //
  20 // The above copyright notice and this permission notice shall be included in
  21 // all copies or substantial portions of the Software.
  22 //
  23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
  26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  29 // THE SOFTWARE.
  30
  31
  32 #include <u.h>
  33 #include <libc.h>
  34 #include "gg.h"
  35 #include "opt.h"
  36
  37 #define NREGVAR 32
  38 #define REGBITS ((uint64)0xffffffffull)
  39 /*c2go enum {
  40         NREGVAR = 32,
  41         REGBITS = 0xffffffff,
  42 };
  43 */
  44
  45         void    addsplits(void);
  46 static  Reg*    firstr;
  47 static  int     first   = 1;
  48
  49 int
  50 rcmp(const void *a1, const void *a2)
  51 {
  52         Rgn *p1, *p2;
  53         int c1, c2;
  54
  55         p1 = (Rgn*)a1;
  56         p2 = (Rgn*)a2;
  57         c1 = p2->cost;
  58         c2 = p1->cost;
  59         if(c1 -= c2)
  60                 return c1;
  61         return p2->varno - p1->varno;
  62 }
  63
  64 void
  65 excise(Flow *r)
  66 {
  67         Prog *p;
  68
  69         p = r->prog;
  70         p->as = ANOP;
  71         p->scond = zprog.scond;
  72         p->from = zprog.from;
  73         p->to = zprog.to;
  74         p->reg = zprog.reg;
  75 }
  76
  77 static void
  78 setaddrs(Bits bit)
  79 {
  80         int i, n;
  81         Var *v;
  82         Node *node;
  83
  84         while(bany(&bit)) {
  85                 // convert each bit to a variable
  86                 i = bnum(bit);
  87                 node = var[i].node;
  88                 n = var[i].name;
  89                 biclr(&bit, i);
  90
  91                 // disable all pieces of that variable
  92                 for(i=0; i<nvar; i++) {
  93                         v = var+i;
  94                         if(v->node == node && v->name == n)
  95                                 v->addr = 2;
  96                 }
  97         }
  98 }
  99
 100 static char* regname[] = {
 101         ".R0",
 102         ".R1",
 103         ".R2",
 104         ".R3",
 105         ".R4",
 106         ".R5",
 107         ".R6",
 108         ".R7",
 109         ".R8",
 110         ".R9",
 111         ".R10",
 112         ".R11",
 113         ".R12",
 114         ".R13",
 115         ".R14",
 116         ".R15",
 117         ".F0",
 118         ".F1",
 119         ".F2",
 120         ".F3",
 121         ".F4",
 122         ".F5",
 123         ".F6",
 124         ".F7",
 125         ".F8",
 126         ".F9",
 127         ".F10",
 128         ".F11",
 129         ".F12",
 130         ".F13",
 131         ".F14",
 132         ".F15",
 133 };
 134
 135 static Node* regnodes[NREGVAR];
 136
 137 static void walkvardef(Node *n, Reg *r, int active);
 138
 139 void
 140 regopt(Prog *firstp)
 141 {
 142         Reg *r, *r1;
 143         Prog *p;
 144         Graph *g;
 145         int i, z, active;
 146         uint32 vreg;
 147         Bits bit;
 148         ProgInfo info;
 149
 150         if(first) {
 151                 fmtinstall('Q', Qconv);
 152                 first = 0;
 153         }
 154
 155         mergetemp(firstp);
 156
 157         /*
 158          * control flow is more complicated in generated go code
 159          * than in generated c code.  define pseudo-variables for
 160          * registers, so we have complete register usage information.
 161          */
 162         nvar = NREGVAR;
 163         memset(var, 0, NREGVAR*sizeof var[0]);
 164         for(i=0; i<NREGVAR; i++) {
 165                 if(regnodes[i] == N)
 166                         regnodes[i] = newname(lookup(regname[i]));
 167                 var[i].node = regnodes[i];
 168         }
 169
 170         regbits = RtoB(REGSP)|RtoB(REGLINK)|RtoB(REGPC);
 171         for(z=0; z<BITS; z++) {
 172                 externs.b[z] = 0;
 173                 params.b[z] = 0;
 174                 consts.b[z] = 0;
 175                 addrs.b[z] = 0;
 176                 ivar.b[z] = 0;
 177                 ovar.b[z] = 0;
 178         }
 179
 180         /*
 181          * pass 1
 182          * build aux data structure
 183          * allocate pcs
 184          * find use and set of variables
 185          */
 186         g = flowstart(firstp, sizeof(Reg));
 187         if(g == nil) {
 188                 for(i=0; i<nvar; i++)
 189                         var[i].node->opt = nil;
 190                 return;
 191         }
 192
 193         firstr = (Reg*)g->start;
 194
 195         for(r = firstr; r != R; r = (Reg*)r->f.link) {
 196                 p = r->f.prog;
 197                 if(p->as == AVARDEF || p->as == AVARKILL)
 198                         continue;
 199                 proginfo(&info, p);
 200
 201                 // Avoid making variables for direct-called functions.
 202                 if(p->as == ABL && p->to.name == D_EXTERN)
 203                         continue;
 204
 205                 bit = mkvar(r, &p->from);
 206                 if(info.flags & LeftRead)
 207                         for(z=0; z<BITS; z++)
 208                                 r->use1.b[z] |= bit.b[z];
 209                 if(info.flags & LeftAddr)
 210                         setaddrs(bit);
 211
 212                 if(info.flags & RegRead) {
 213                         if(p->from.type != D_FREG)
 214                                 r->use1.b[0] |= RtoB(p->reg);
 215                         else
 216                                 r->use1.b[0] |= FtoB(p->reg);
 217                 }
 218
 219                 if(info.flags & (RightAddr | RightRead | RightWrite)) {
 220                         bit = mkvar(r, &p->to);
 221                         if(info.flags & RightAddr)
 222                                 setaddrs(bit);
 223                         if(info.flags & RightRead)
 224                                 for(z=0; z<BITS; z++)
 225                                         r->use2.b[z] |= bit.b[z];
 226                         if(info.flags & RightWrite)
 227                                 for(z=0; z<BITS; z++)
 228                                         r->set.b[z] |= bit.b[z];
 229                 }
 230
 231                 /* the mod/div runtime routines smash R12 */
 232                 if(p->as == ADIV || p->as == ADIVU || p->as == AMOD || p->as == AMODU)
 233                         r->set.b[z] |= RtoB(12);
 234         }
 235         if(firstr == R)
 236                 return;
 237
 238         for(i=0; i<nvar; i++) {
 239                 Var *v = var+i;
 240                 if(v->addr) {
 241                         bit = blsh(i);
 242                         for(z=0; z<BITS; z++)
 243                                 addrs.b[z] |= bit.b[z];
 244                 }
 245
 246                 if(debug['R'] && debug['v'])
 247                         print("bit=%2d addr=%d et=%-6E w=%-2d s=%N + %lld\n",
 248                                 i, v->addr, v->etype, v->width, v->node, v->offset);
 249         }
 250
 251         if(debug['R'] && debug['v'])
 252                 dumpit("pass1", &firstr->f, 1);
 253
 254         /*
 255          * pass 2
 256          * find looping structure
 257          */
 258         flowrpo(g);
 259
 260         if(debug['R'] && debug['v'])
 261                 dumpit("pass2", &firstr->f, 1);
 262
 263         /*
 264          * pass 2.5
 265          * iterate propagating fat vardef covering forward
 266          * r->act records vars with a VARDEF since the last CALL.
 267          * (r->act will be reused in pass 5 for something else,
 268          * but we'll be done with it by then.)
 269          */
 270         active = 0;
 271         for(r = firstr; r != R; r = (Reg*)r->f.link) {
 272                 r->f.active = 0;
 273                 r->act = zbits;
 274         }
 275         for(r = firstr; r != R; r = (Reg*)r->f.link) {
 276                 p = r->f.prog;
 277                 if(p->as == AVARDEF && isfat(p->to.node->type) && p->to.node->opt != nil) {
 278                         active++;
 279                         walkvardef(p->to.node, r, active);
 280                 }
 281         }
 282
 283         /*
 284          * pass 3
 285          * iterate propagating usage
 286          *      back until flow graph is complete
 287          */
 288 loop1:
 289         change = 0;
 290         for(r = firstr; r != R; r = (Reg*)r->f.link)
 291                 r->f.active = 0;
 292         for(r = firstr; r != R; r = (Reg*)r->f.link)
 293                 if(r->f.prog->as == ARET)
 294                         prop(r, zbits, zbits);
 295 loop11:
 296         /* pick up unreachable code */
 297         i = 0;
 298         for(r = firstr; r != R; r = r1) {
 299                 r1 = (Reg*)r->f.link;
 300                 if(r1 && r1->f.active && !r->f.active) {
 301                         prop(r, zbits, zbits);
 302                         i = 1;
 303                 }
 304         }
 305         if(i)
 306                 goto loop11;
 307         if(change)
 308                 goto loop1;
 309
 310         if(debug['R'] && debug['v'])
 311                 dumpit("pass3", &firstr->f, 1);
 312
 313
 314         /*
 315          * pass 4
 316          * iterate propagating register/variable synchrony
 317          *      forward until graph is complete
 318          */
 319 loop2:
 320         change = 0;
 321         for(r = firstr; r != R; r = (Reg*)r->f.link)
 322                 r->f.active = 0;
 323         synch(firstr, zbits);
 324         if(change)
 325                 goto loop2;
 326
 327         addsplits();
 328
 329         if(debug['R'] && debug['v'])
 330                 dumpit("pass4", &firstr->f, 1);
 331
 332         if(debug['R'] > 1) {
 333                 print("\nprop structure:\n");
 334                 for(r = firstr; r != R; r = (Reg*)r->f.link) {
 335                         print("%d:%P", r->f.loop, r->f.prog);
 336                         for(z=0; z<BITS; z++) {
 337                                 bit.b[z] = r->set.b[z] |
 338                                         r->refahead.b[z] | r->calahead.b[z] |
 339                                         r->refbehind.b[z] | r->calbehind.b[z] |
 340                                         r->use1.b[z] | r->use2.b[z];
 341                                 bit.b[z] &= ~addrs.b[z];
 342                         }
 343
 344                         if(bany(&bit)) {
 345                                 print("\t");
 346                                 if(bany(&r->use1))
 347                                         print(" u1=%Q", r->use1);
 348                                 if(bany(&r->use2))
 349                                         print(" u2=%Q", r->use2);
 350                                 if(bany(&r->set))
 351                                         print(" st=%Q", r->set);
 352                                 if(bany(&r->refahead))
 353                                         print(" ra=%Q", r->refahead);
 354                                 if(bany(&r->calahead))
 355                                         print(" ca=%Q", r->calahead);
 356                                 if(bany(&r->refbehind))
 357                                         print(" rb=%Q", r->refbehind);
 358                                 if(bany(&r->calbehind))
 359                                         print(" cb=%Q", r->calbehind);
 360                         }
 361                         print("\n");
 362                 }
 363         }
 364
 365         /*
 366          * pass 4.5
 367          * move register pseudo-variables into regu.
 368          */
 369         for(r = firstr; r != R; r = (Reg*)r->f.link) {
 370                 r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS;
 371
 372                 r->set.b[0] &= ~REGBITS;
 373                 r->use1.b[0] &= ~REGBITS;
 374                 r->use2.b[0] &= ~REGBITS;
 375                 r->refbehind.b[0] &= ~REGBITS;
 376                 r->refahead.b[0] &= ~REGBITS;
 377                 r->calbehind.b[0] &= ~REGBITS;
 378                 r->calahead.b[0] &= ~REGBITS;
 379                 r->regdiff.b[0] &= ~REGBITS;
 380                 r->act.b[0] &= ~REGBITS;
 381         }
 382
 383         if(debug['R'] && debug['v'])
 384                 dumpit("pass4.5", &firstr->f, 1);
 385
 386         /*
 387          * pass 5
 388          * isolate regions
 389          * calculate costs (paint1)
 390          */
 391         r = firstr;
 392         if(r) {
 393                 for(z=0; z<BITS; z++)
 394                         bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
 395                           ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
 396                 if(bany(&bit) && !r->f.refset) {
 397                         // should never happen - all variables are preset
 398                         if(debug['w'])
 399                                 print("%L: used and not set: %Q\n", r->f.prog->lineno, bit);
 400                         r->f.refset = 1;
 401                 }
 402         }
 403
 404         for(r = firstr; r != R; r = (Reg*)r->f.link)
 405                 r->act = zbits;
 406         rgp = region;
 407         nregion = 0;
 408         for(r = firstr; r != R; r = (Reg*)r->f.link) {
 409                 for(z=0; z<BITS; z++)
 410                         bit.b[z] = r->set.b[z] &
 411                           ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
 412                 if(bany(&bit) && !r->f.refset) {
 413                         if(debug['w'])
 414                                 print("%L: set and not used: %Q\n", r->f.prog->lineno, bit);
 415                         r->f.refset = 1;
 416                         excise(&r->f);
 417                 }
 418                 for(z=0; z<BITS; z++)
 419                         bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
 420                 while(bany(&bit)) {
 421                         i = bnum(bit);
 422                         rgp->enter = r;
 423                         rgp->varno = i;
 424                         change = 0;
 425                         if(debug['R'] > 1)
 426                                 print("\n");
 427                         paint1(r, i);
 428                         biclr(&bit, i);
 429                         if(change <= 0) {
 430                                 if(debug['R'])
 431                                         print("%L $%d: %Q\n",
 432                                                 r->f.prog->lineno, change, blsh(i));
 433                                 continue;
 434                         }
 435                         rgp->cost = change;
 436                         nregion++;
 437                         if(nregion >= NRGN) {
 438                                 if(debug['R'] > 1)
 439                                         print("too many regions\n");
 440                                 goto brk;
 441                         }
 442                         rgp++;
 443                 }
 444         }
 445 brk:
 446         qsort(region, nregion, sizeof(region[0]), rcmp);
 447
 448         if(debug['R'] && debug['v'])
 449                 dumpit("pass5", &firstr->f, 1);
 450
 451         /*
 452          * pass 6
 453          * determine used registers (paint2)
 454          * replace code (paint3)
 455          */
 456         rgp = region;
 457         if(debug['R'] && debug['v'])
 458                 print("\nregisterizing\n");
 459         for(i=0; i<nregion; i++) {
 460                 if(debug['R'] && debug['v'])
 461                         print("region %d: cost %d varno %d enter %d\n", i, rgp->cost, rgp->varno, rgp->enter->f.prog->pc);
 462                 bit = blsh(rgp->varno);
 463                 vreg = paint2(rgp->enter, rgp->varno, 0);
 464                 vreg = allreg(vreg, rgp);
 465                 if(debug['R']) {
 466                         if(rgp->regno >= NREG)
 467                                 print("%L $%d F%d: %Q\n",
 468                                         rgp->enter->f.prog->lineno,
 469                                         rgp->cost,
 470                                         rgp->regno-NREG,
 471                                         bit);
 472                         else
 473                                 print("%L $%d R%d: %Q\n",
 474                                         rgp->enter->f.prog->lineno,
 475                                         rgp->cost,
 476                                         rgp->regno,
 477                                         bit);
 478                 }
 479                 if(rgp->regno != 0)
 480                         paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
 481                 rgp++;
 482         }
 483
 484         /*
 485          * free aux structures. peep allocates new ones.
 486          */
 487         for(i=0; i<nvar; i++)
 488                 var[i].node->opt = nil;
 489         flowend(g);
 490         firstr = R;
 491
 492         if(debug['R'] && debug['v']) {
 493                 // Rebuild flow graph, since we inserted instructions
 494                 g = flowstart(firstp, sizeof(Reg));
 495                 firstr = (Reg*)g->start;
 496                 dumpit("pass6", &firstr->f, 1);
 497                 flowend(g);
 498                 firstr = R;
 499         }
 500
 501         /*
 502          * pass 7
 503          * peep-hole on basic block
 504          */
 505         if(!debug['R'] || debug['P']) {
 506                 peep(firstp);
 507         }
 508
 509         if(debug['R'] && debug['v'])
 510                 dumpit("pass7", &firstr->f, 1);
 511
 512         /*
 513          * last pass
 514          * eliminate nops
 515          * free aux structures
 516          * adjust the stack pointer
 517          *      MOVW.W  R1,-12(R13)                     <<- start
 518          *      MOVW    R0,R1
 519          *      MOVW    R1,8(R13)
 520          *      MOVW    $0,R1
 521          *      MOVW    R1,4(R13)
 522          *      BL      ,runtime.newproc+0(SB)
 523          *      MOVW    &ft+-32(SP),R7                  <<- adjust
 524          *      MOVW    &j+-40(SP),R6                   <<- adjust
 525          *      MOVW    autotmp_0003+-24(SP),R5         <<- adjust
 526          *      MOVW    $12(R13),R13                    <<- finish
 527          */
 528         vreg = 0;
 529         for(p = firstp; p != P; p = p->link) {
 530                 while(p->link != P && p->link->as == ANOP)
 531                         p->link = p->link->link;
 532                 if(p->to.type == D_BRANCH)
 533                         while(p->to.u.branch != P && p->to.u.branch->as == ANOP)
 534                                 p->to.u.branch = p->to.u.branch->link;
 535                 if(p->as == AMOVW && p->to.reg == 13) {
 536                         if(p->scond & C_WBIT) {
 537                                 vreg = -p->to.offset;           // in adjust region
 538 //                              print("%P adjusting %d\n", p, vreg);
 539                                 continue;
 540                         }
 541                         if(p->from.type == D_CONST && p->to.type == D_REG) {
 542                                 if(p->from.offset != vreg)
 543                                         print("in and out different\n");
 544 //                              print("%P finish %d\n", p, vreg);
 545                                 vreg = 0;       // done adjust region
 546                                 continue;
 547                         }
 548
 549 //                      print("%P %d %d from type\n", p, p->from.type, D_CONST);
 550 //                      print("%P %d %d to type\n\n", p, p->to.type, D_REG);
 551                 }
 552
 553                 if(p->as == AMOVW && vreg != 0) {
 554                         if(p->from.sym != nil)
 555                         if(p->from.name == D_AUTO || p->from.name == D_PARAM) {
 556                                 p->from.offset += vreg;
 557 //                              print("%P adjusting from %d %d\n", p, vreg, p->from.type);
 558                         }
 559                         if(p->to.sym != nil)
 560                         if(p->to.name == D_AUTO || p->to.name == D_PARAM) {
 561                                 p->to.offset += vreg;
 562 //                              print("%P adjusting to %d %d\n", p, vreg, p->from.type);
 563                         }
 564                 }
 565         }
 566 }
 567
 568 static void
 569 walkvardef(Node *n, Reg *r, int active)
 570 {
 571         Reg *r1, *r2;
 572         int bn;
 573         Var *v;
 574
 575         for(r1=r; r1!=R; r1=(Reg*)r1->f.s1) {
 576                 if(r1->f.active == active)
 577                         break;
 578                 r1->f.active = active;
 579                 if(r1->f.prog->as == AVARKILL && r1->f.prog->to.node == n)
 580                         break;
 581                 for(v=n->opt; v!=nil; v=v->nextinnode) {
 582                         bn = v - var;
 583                         biset(&r1->act, bn);
 584                 }
 585                 if(r1->f.prog->as == ABL)
 586                         break;
 587         }
 588
 589         for(r2=r; r2!=r1; r2=(Reg*)r2->f.s1)
 590                 if(r2->f.s2 != nil)
 591                         walkvardef(n, (Reg*)r2->f.s2, active);
 592 }
 593
 594 void
 595 addsplits(void)
 596 {
 597         Reg *r, *r1;
 598         int z, i;
 599         Bits bit;
 600
 601         for(r = firstr; r != R; r = (Reg*)r->f.link) {
 602                 if(r->f.loop > 1)
 603                         continue;
 604                 if(r->f.prog->as == ABL)
 605                         continue;
 606                 if(r->f.prog->as == ADUFFZERO)
 607                         continue;
 608                 if(r->f.prog->as == ADUFFCOPY)
 609                         continue;
 610                 for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) {
 611                         if(r1->f.loop <= 1)
 612                                 continue;
 613                         for(z=0; z<BITS; z++)
 614                                 bit.b[z] = r1->calbehind.b[z] &
 615                                         (r->refahead.b[z] | r->use1.b[z] | r->use2.b[z]) &
 616                                         ~(r->calahead.b[z] & addrs.b[z]);
 617                         while(bany(&bit)) {
 618                                 i = bnum(bit);
 619                                 biclr(&bit, i);
 620                         }
 621                 }
 622         }
 623 }
 624
 625 /*
 626  * add mov b,rn
 627  * just after r
 628  */
 629 void
 630 addmove(Reg *r, int bn, int rn, int f)
 631 {
 632         Prog *p, *p1, *p2;
 633         Adr *a;
 634         Var *v;
 635
 636         p1 = mal(sizeof(*p1));
 637         *p1 = zprog;
 638         p = r->f.prog;
 639
 640         // If there's a stack fixup coming (after BL newproc or BL deferproc),
 641         // delay the load until after the fixup.
 642         p2 = p->link;
 643         if(p2 && p2->as == AMOVW && p2->from.type == D_CONST && p2->from.reg == REGSP && p2->to.reg == REGSP && p2->to.type == D_REG)
 644                 p = p2;
 645
 646         p1->link = p->link;
 647         p->link = p1;
 648         p1->lineno = p->lineno;
 649
 650         v = var + bn;
 651
 652         a = &p1->to;
 653         a->name = v->name;
 654         a->node = v->node;
 655         a->sym = linksym(v->node->sym);
 656         a->offset = v->offset;
 657         a->etype = v->etype;
 658         a->type = D_OREG;
 659         if(a->etype == TARRAY || a->sym == nil)
 660                 a->type = D_CONST;
 661
 662         if(v->addr)
 663                 fatal("addmove: shouldn't be doing this %A\n", a);
 664
 665         switch(v->etype) {
 666         default:
 667                 print("What is this %E\n", v->etype);
 668
 669         case TINT8:
 670                 p1->as = AMOVBS;
 671                 break;
 672         case TBOOL:
 673         case TUINT8:
 674 //print("movbu %E %d %S\n", v->etype, bn, v->sym);
 675                 p1->as = AMOVBU;
 676                 break;
 677         case TINT16:
 678                 p1->as = AMOVHS;
 679                 break;
 680         case TUINT16:
 681                 p1->as = AMOVHU;
 682                 break;
 683         case TINT32:
 684         case TUINT32:
 685         case TPTR32:
 686                 p1->as = AMOVW;
 687                 break;
 688         case TFLOAT32:
 689                 p1->as = AMOVF;
 690                 break;
 691         case TFLOAT64:
 692                 p1->as = AMOVD;
 693                 break;
 694         }
 695
 696         p1->from.type = D_REG;
 697         p1->from.reg = rn;
 698         if(rn >= NREG) {
 699                 p1->from.type = D_FREG;
 700                 p1->from.reg = rn-NREG;
 701         }
 702         if(!f) {
 703                 p1->from = *a;
 704                 *a = zprog.from;
 705                 a->type = D_REG;
 706                 a->reg = rn;
 707                 if(rn >= NREG) {
 708                         a->type = D_FREG;
 709                         a->reg = rn-NREG;
 710                 }
 711                 if(v->etype == TUINT8 || v->etype == TBOOL)
 712                         p1->as = AMOVBU;
 713                 if(v->etype == TUINT16)
 714                         p1->as = AMOVHU;
 715         }
 716         if(debug['R'])
 717                 print("%P\t.a%P\n", p, p1);
 718 }
 719
 720 static int
 721 overlap(int32 o1, int w1, int32 o2, int w2)
 722 {
 723         int32 t1, t2;
 724
 725         t1 = o1+w1;
 726         t2 = o2+w2;
 727
 728         if(!(t1 > o2 && t2 > o1))
 729                 return 0;
 730
 731         return 1;
 732 }
 733
 734 Bits
 735 mkvar(Reg *r, Adr *a)
 736 {
 737         Var *v;
 738         int i, t, n, et, z, w, flag;
 739         int32 o;
 740         Bits bit;
 741         Node *node;
 742
 743         // mark registers used
 744         t = a->type;
 745
 746         flag = 0;
 747         switch(t) {
 748         default:
 749                 print("type %d %d %D\n", t, a->name, a);
 750                 goto none;
 751
 752         case D_NONE:
 753         case D_FCONST:
 754         case D_BRANCH:
 755                 break;
 756
 757
 758         case D_REGREG:
 759         case D_REGREG2:
 760                 bit = zbits;
 761                 if(a->offset != NREG)
 762                         bit.b[0] |= RtoB(a->offset);
 763                 if(a->reg != NREG)
 764                         bit.b[0] |= RtoB(a->reg);
 765                 return bit;
 766
 767         case D_CONST:
 768         case D_REG:
 769         case D_SHIFT:
 770                 if(a->reg != NREG) {
 771                         bit = zbits;
 772                         bit.b[0] = RtoB(a->reg);
 773                         return bit;
 774                 }
 775                 break;
 776
 777         case D_OREG:
 778                 if(a->reg != NREG) {
 779                         if(a == &r->f.prog->from)
 780                                 r->use1.b[0] |= RtoB(a->reg);
 781                         else
 782                                 r->use2.b[0] |= RtoB(a->reg);
 783                         if(r->f.prog->scond & (C_PBIT|C_WBIT))
 784                                 r->set.b[0] |= RtoB(a->reg);
 785                 }
 786                 break;
 787
 788         case D_FREG:
 789                 if(a->reg != NREG) {
 790                         bit = zbits;
 791                         bit.b[0] = FtoB(a->reg);
 792                         return bit;
 793                 }
 794                 break;
 795         }
 796
 797         switch(a->name) {
 798         default:
 799                 goto none;
 800
 801         case D_EXTERN:
 802         case D_STATIC:
 803         case D_AUTO:
 804         case D_PARAM:
 805                 n = a->name;
 806                 break;
 807         }
 808
 809         node = a->node;
 810         if(node == N || node->op != ONAME || node->orig == N)
 811                 goto none;
 812         node = node->orig;
 813         if(node->orig != node)
 814                 fatal("%D: bad node", a);
 815         if(node->sym == S || node->sym->name[0] == '.')
 816                 goto none;
 817         et = a->etype;
 818         o = a->offset;
 819         w = a->width;
 820         if(w < 0)
 821                 fatal("bad width %d for %D", w, a);
 822
 823         for(i=0; i<nvar; i++) {
 824                 v = var+i;
 825                 if(v->node == node && v->name == n) {
 826                         if(v->offset == o)
 827                         if(v->etype == et)
 828                         if(v->width == w)
 829                                 if(!flag)
 830                                         return blsh(i);
 831
 832                         // if they overlap, disable both
 833                         if(overlap(v->offset, v->width, o, w)) {
 834                                 v->addr = 1;
 835                                 flag = 1;
 836                         }
 837                 }
 838         }
 839
 840         switch(et) {
 841         case 0:
 842         case TFUNC:
 843                 goto none;
 844         }
 845
 846         if(nvar >= NVAR) {
 847                 if(debug['w'] > 1 && node)
 848                         fatal("variable not optimized: %D", a);
 849
 850                 // If we're not tracking a word in a variable, mark the rest as
 851                 // having its address taken, so that we keep the whole thing
 852                 // live at all calls. otherwise we might optimize away part of
 853                 // a variable but not all of it.
 854                 for(i=0; i<nvar; i++) {
 855                         v = var+i;
 856                         if(v->node == node)
 857                                 v->addr = 1;
 858                 }
 859                 goto none;
 860         }
 861
 862         i = nvar;
 863         nvar++;
 864 //print("var %d %E %D %S\n", i, et, a, s);
 865         v = var+i;
 866         v->offset = o;
 867         v->name = n;
 868         v->etype = et;
 869         v->width = w;
 870         v->addr = flag;         // funny punning
 871         v->node = node;
 872
 873         // node->opt is the head of a linked list
 874         // of Vars within the given Node, so that
 875         // we can start at a Var and find all the other
 876         // Vars in the same Go variable.
 877         v->nextinnode = node->opt;
 878         node->opt = v;
 879
 880         bit = blsh(i);
 881         if(n == D_EXTERN || n == D_STATIC)
 882                 for(z=0; z<BITS; z++)
 883                         externs.b[z] |= bit.b[z];
 884         if(n == D_PARAM)
 885                 for(z=0; z<BITS; z++)
 886                         params.b[z] |= bit.b[z];
 887
 888         if(node->class == PPARAM)
 889                 for(z=0; z<BITS; z++)
 890                         ivar.b[z] |= bit.b[z];
 891         if(node->class == PPARAMOUT)
 892                 for(z=0; z<BITS; z++)
 893                         ovar.b[z] |= bit.b[z];
 894
 895         // Treat values with their address taken as live at calls,
 896         // because the garbage collector's liveness analysis in ../gc/plive.c does.
 897         // These must be consistent or else we will elide stores and the garbage
 898         // collector will see uninitialized data.
 899         // The typical case where our own analysis is out of sync is when the
 900         // node appears to have its address taken but that code doesn't actually
 901         // get generated and therefore doesn't show up as an address being
 902         // taken when we analyze the instruction stream.
 903         // One instance of this case is when a closure uses the same name as
 904         // an outer variable for one of its own variables declared with :=.
 905         // The parser flags the outer variable as possibly shared, and therefore
 906         // sets addrtaken, even though it ends up not being actually shared.
 907         // If we were better about _ elision, _ = &x would suffice too.
 908         // The broader := in a closure problem is mentioned in a comment in
 909         // closure.c:/^typecheckclosure and dcl.c:/^oldname.
 910         if(node->addrtaken)
 911                 v->addr = 1;
 912
 913         // Disable registerization for globals, because:
 914         // (1) we might panic at any time and we want the recovery code
 915         // to see the latest values (issue 1304).
 916         // (2) we don't know what pointers might point at them and we want
 917         // loads via those pointers to see updated values and vice versa (issue 7995).
 918         //
 919         // Disable registerization for results if using defer, because the deferred func
 920         // might recover and return, causing the current values to be used.
 921         if(node->class == PEXTERN || (hasdefer && node->class == PPARAMOUT))
 922                 v->addr = 1;
 923
 924         if(debug['R'])
 925                 print("bit=%2d et=%2E w=%d+%d %#N %D flag=%d\n", i, et, o, w, node, a, v->addr);
 926
 927         return bit;
 928
 929 none:
 930         return zbits;
 931 }
 932
 933 void
 934 prop(Reg *r, Bits ref, Bits cal)
 935 {
 936         Reg *r1, *r2;
 937         int z, i, j;
 938         Var *v, *v1;
 939
 940         for(r1 = r; r1 != R; r1 = (Reg*)r1->f.p1) {
 941                 for(z=0; z<BITS; z++) {
 942                         ref.b[z] |= r1->refahead.b[z];
 943                         if(ref.b[z] != r1->refahead.b[z]) {
 944                                 r1->refahead.b[z] = ref.b[z];
 945                                 change++;
 946                         }
 947                         cal.b[z] |= r1->calahead.b[z];
 948                         if(cal.b[z] != r1->calahead.b[z]) {
 949                                 r1->calahead.b[z] = cal.b[z];
 950                                 change++;
 951                         }
 952                 }
 953                 switch(r1->f.prog->as) {
 954                 case ABL:
 955                         if(noreturn(r1->f.prog))
 956                                 break;
 957
 958                         // Mark all input variables (ivar) as used, because that's what the
 959                         // liveness bitmaps say. The liveness bitmaps say that so that a
 960                         // panic will not show stale values in the parameter dump.
 961                         // Mark variables with a recent VARDEF (r1->act) as used,
 962                         // so that the optimizer flushes initializations to memory,
 963                         // so that if a garbage collection happens during this CALL,
 964                         // the collector will see initialized memory. Again this is to
 965                         // match what the liveness bitmaps say.
 966                         for(z=0; z<BITS; z++) {
 967                                 cal.b[z] |= ref.b[z] | externs.b[z] | ivar.b[z] | r1->act.b[z];
 968                                 ref.b[z] = 0;
 969                         }
 970
 971                         // cal.b is the current approximation of what's live across the call.
 972                         // Every bit in cal.b is a single stack word. For each such word,
 973                         // find all the other tracked stack words in the same Go variable
 974                         // (struct/slice/string/interface) and mark them live too.
 975                         // This is necessary because the liveness analysis for the garbage
 976                         // collector works at variable granularity, not at word granularity.
 977                         // It is fundamental for slice/string/interface: the garbage collector
 978                         // needs the whole value, not just some of the words, in order to
 979                         // interpret the other bits correctly. Specifically, slice needs a consistent
 980                         // ptr and cap, string needs a consistent ptr and len, and interface
 981                         // needs a consistent type word and data word.
 982                         for(z=0; z<BITS; z++) {
 983                                 if(cal.b[z] == 0)
 984                                         continue;
 985                                 for(i=0; i<64; i++) {
 986                                         if(z*64+i >= nvar || ((cal.b[z]>>i)&1) == 0)
 987                                                 continue;
 988                                         v = var+z*64+i;
 989                                         if(v->node->opt == nil) // v represents fixed register, not Go variable
 990                                                 continue;
 991
 992                                         // v->node->opt is the head of a linked list of Vars
 993                                         // corresponding to tracked words from the Go variable v->node.
 994                                         // Walk the list and set all the bits.
 995                                         // For a large struct this could end up being quadratic:
 996                                         // after the first setting, the outer loop (for z, i) would see a 1 bit
 997                                         // for all of the remaining words in the struct, and for each such
 998                                         // word would go through and turn on all the bits again.
 999                                         // To avoid the quadratic behavior, we only turn on the bits if
1000                                         // v is the head of the list or if the head's bit is not yet turned on.
1001                                         // This will set the bits at most twice, keeping the overall loop linear.
1002                                         v1 = v->node->opt;
1003                                         j = v1 - var;
1004                                         if(v == v1 || !btest(&cal, j)) {
1005                                                 for(; v1 != nil; v1 = v1->nextinnode) {
1006                                                         j = v1 - var;
1007                                                         biset(&cal, j);
1008                                                 }
1009                                         }
1010                                 }
1011                         }
1012                         break;
1013
1014                 case ATEXT:
1015                         for(z=0; z<BITS; z++) {
1016                                 cal.b[z] = 0;
1017                                 ref.b[z] = 0;
1018                         }
1019                         break;
1020
1021                 case ARET:
1022                         for(z=0; z<BITS; z++) {
1023                                 cal.b[z] = externs.b[z] | ovar.b[z];
1024                                 ref.b[z] = 0;
1025                         }
1026                         break;
1027                 }
1028                 for(z=0; z<BITS; z++) {
1029                         ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
1030                                 r1->use1.b[z] | r1->use2.b[z];
1031                         cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
1032                         r1->refbehind.b[z] = ref.b[z];
1033                         r1->calbehind.b[z] = cal.b[z];
1034                 }
1035                 if(r1->f.active)
1036                         break;
1037                 r1->f.active = 1;
1038         }
1039         for(; r != r1; r = (Reg*)r->f.p1)
1040                 for(r2 = (Reg*)r->f.p2; r2 != R; r2 = (Reg*)r2->f.p2link)
1041                         prop(r2, r->refbehind, r->calbehind);
1042 }
1043
1044 void
1045 synch(Reg *r, Bits dif)
1046 {
1047         Reg *r1;
1048         int z;
1049
1050         for(r1 = r; r1 != R; r1 = (Reg*)r1->f.s1) {
1051                 for(z=0; z<BITS; z++) {
1052                         dif.b[z] = (dif.b[z] &
1053                                 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
1054                                         r1->set.b[z] | r1->regdiff.b[z];
1055                         if(dif.b[z] != r1->regdiff.b[z]) {
1056                                 r1->regdiff.b[z] = dif.b[z];
1057                                 change++;
1058                         }
1059                 }
1060                 if(r1->f.active)
1061                         break;
1062                 r1->f.active = 1;
1063                 for(z=0; z<BITS; z++)
1064                         dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
1065                 if(r1->f.s2 != nil)
1066                         synch((Reg*)r1->f.s2, dif);
1067         }
1068 }
1069
1070 uint32
1071 allreg(uint32 b, Rgn *r)
1072 {
1073         Var *v;
1074         int i;
1075
1076         v = var + r->varno;
1077         r->regno = 0;
1078         switch(v->etype) {
1079
1080         default:
1081                 fatal("unknown etype %d/%E", bitno(b), v->etype);
1082                 break;
1083
1084         case TINT8:
1085         case TUINT8:
1086         case TINT16:
1087         case TUINT16:
1088         case TINT32:
1089         case TUINT32:
1090         case TINT:
1091         case TUINT:
1092         case TUINTPTR:
1093         case TBOOL:
1094         case TPTR32:
1095                 i = BtoR(~b);
1096                 if(i && r->cost >= 0) {
1097                         r->regno = i;
1098                         return RtoB(i);
1099                 }
1100                 break;
1101
1102         case TFLOAT32:
1103         case TFLOAT64:
1104                 i = BtoF(~b);
1105                 if(i && r->cost >= 0) {
1106                         r->regno = i+NREG;
1107                         return FtoB(i);
1108                 }
1109                 break;
1110
1111         case TINT64:
1112         case TUINT64:
1113         case TPTR64:
1114         case TINTER:
1115         case TSTRUCT:
1116         case TARRAY:
1117                 break;
1118         }
1119         return 0;
1120 }
1121
1122 void
1123 paint1(Reg *r, int bn)
1124 {
1125         Reg *r1;
1126         Prog *p;
1127         int z;
1128         uint64 bb;
1129
1130         z = bn/64;
1131         bb = 1LL<<(bn%64);
1132         if(r->act.b[z] & bb)
1133                 return;
1134         for(;;) {
1135                 if(!(r->refbehind.b[z] & bb))
1136                         break;
1137                 r1 = (Reg*)r->f.p1;
1138                 if(r1 == R)
1139                         break;
1140                 if(!(r1->refahead.b[z] & bb))
1141                         break;
1142                 if(r1->act.b[z] & bb)
1143                         break;
1144                 r = r1;
1145         }
1146
1147         if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) {
1148                 change -= CLOAD * r->f.loop;
1149                 if(debug['R'] > 1)
1150                         print("%d%P\td %Q $%d\n", r->f.loop,
1151                                 r->f.prog, blsh(bn), change);
1152         }
1153         for(;;) {
1154                 r->act.b[z] |= bb;
1155                 p = r->f.prog;
1156
1157
1158                 if(r->f.prog->as != ANOP) { // don't give credit for NOPs
1159                         if(r->use1.b[z] & bb) {
1160                                 change += CREF * r->f.loop;
1161                                 if(debug['R'] > 1)
1162                                         print("%d%P\tu1 %Q $%d\n", r->f.loop,
1163                                                 p, blsh(bn), change);
1164                         }
1165                         if((r->use2.b[z]|r->set.b[z]) & bb) {
1166                                 change += CREF * r->f.loop;
1167                                 if(debug['R'] > 1)
1168                                         print("%d%P\tu2 %Q $%d\n", r->f.loop,
1169                                                 p, blsh(bn), change);
1170                         }
1171                 }
1172
1173                 if(STORE(r) & r->regdiff.b[z] & bb) {
1174                         change -= CLOAD * r->f.loop;
1175                         if(debug['R'] > 1)
1176                                 print("%d%P\tst %Q $%d\n", r->f.loop,
1177                                         p, blsh(bn), change);
1178                 }
1179
1180                 if(r->refbehind.b[z] & bb)
1181                         for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
1182                                 if(r1->refahead.b[z] & bb)
1183                                         paint1(r1, bn);
1184
1185                 if(!(r->refahead.b[z] & bb))
1186                         break;
1187                 r1 = (Reg*)r->f.s2;
1188                 if(r1 != R)
1189                         if(r1->refbehind.b[z] & bb)
1190                                 paint1(r1, bn);
1191                 r = (Reg*)r->f.s1;
1192                 if(r == R)
1193                         break;
1194                 if(r->act.b[z] & bb)
1195                         break;
1196                 if(!(r->refbehind.b[z] & bb))
1197                         break;
1198         }
1199 }
1200
1201 uint32
1202 paint2(Reg *r, int bn, int depth)
1203 {
1204         Reg *r1;
1205         int z;
1206         uint64 bb, vreg;
1207
1208         z = bn/64;
1209         bb = 1LL << (bn%64);
1210         vreg = regbits;
1211         if(!(r->act.b[z] & bb))
1212                 return vreg;
1213         for(;;) {
1214                 if(!(r->refbehind.b[z] & bb))
1215                         break;
1216                 r1 = (Reg*)r->f.p1;
1217                 if(r1 == R)
1218                         break;
1219                 if(!(r1->refahead.b[z] & bb))
1220                         break;
1221                 if(!(r1->act.b[z] & bb))
1222                         break;
1223                 r = r1;
1224         }
1225         for(;;) {
1226                 if(debug['R'] && debug['v'])
1227                         print("  paint2 %d %P\n", depth, r->f.prog);
1228
1229                 r->act.b[z] &= ~bb;
1230
1231                 vreg |= r->regu;
1232
1233                 if(r->refbehind.b[z] & bb)
1234                         for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
1235                                 if(r1->refahead.b[z] & bb)
1236                                         vreg |= paint2(r1, bn, depth+1);
1237
1238                 if(!(r->refahead.b[z] & bb))
1239                         break;
1240                 r1 = (Reg*)r->f.s2;
1241                 if(r1 != R)
1242                         if(r1->refbehind.b[z] & bb)
1243                                 vreg |= paint2(r1, bn, depth+1);
1244                 r = (Reg*)r->f.s1;
1245                 if(r == R)
1246                         break;
1247                 if(!(r->act.b[z] & bb))
1248                         break;
1249                 if(!(r->refbehind.b[z] & bb))
1250                         break;
1251         }
1252         return vreg;
1253 }
1254
1255 void
1256 paint3(Reg *r, int bn, uint32 rb, int rn)
1257 {
1258         Reg *r1;
1259         Prog *p;
1260         int z;
1261         uint64 bb;
1262
1263         z = bn/64;
1264         bb = 1LL << (bn%64);
1265         if(r->act.b[z] & bb)
1266                 return;
1267         for(;;) {
1268                 if(!(r->refbehind.b[z] & bb))
1269                         break;
1270                 r1 = (Reg*)r->f.p1;
1271                 if(r1 == R)
1272                         break;
1273                 if(!(r1->refahead.b[z] & bb))
1274                         break;
1275                 if(r1->act.b[z] & bb)
1276                         break;
1277                 r = r1;
1278         }
1279
1280         if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
1281                 addmove(r, bn, rn, 0);
1282
1283         for(;;) {
1284                 r->act.b[z] |= bb;
1285                 p = r->f.prog;
1286
1287                 if(r->use1.b[z] & bb) {
1288                         if(debug['R'])
1289                                 print("%P", p);
1290                         addreg(&p->from, rn);
1291                         if(debug['R'])
1292                                 print("\t.c%P\n", p);
1293                 }
1294                 if((r->use2.b[z]|r->set.b[z]) & bb) {
1295                         if(debug['R'])
1296                                 print("%P", p);
1297                         addreg(&p->to, rn);
1298                         if(debug['R'])
1299                                 print("\t.c%P\n", p);
1300                 }
1301
1302                 if(STORE(r) & r->regdiff.b[z] & bb)
1303                         addmove(r, bn, rn, 1);
1304                 r->regu |= rb;
1305
1306                 if(r->refbehind.b[z] & bb)
1307                         for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
1308                                 if(r1->refahead.b[z] & bb)
1309                                         paint3(r1, bn, rb, rn);
1310
1311                 if(!(r->refahead.b[z] & bb))
1312                         break;
1313                 r1 = (Reg*)r->f.s2;
1314                 if(r1 != R)
1315                         if(r1->refbehind.b[z] & bb)
1316                                 paint3(r1, bn, rb, rn);
1317                 r = (Reg*)r->f.s1;
1318                 if(r == R)
1319                         break;
1320                 if(r->act.b[z] & bb)
1321                         break;
1322                 if(!(r->refbehind.b[z] & bb))
1323                         break;
1324         }
1325 }
1326
1327 void
1328 addreg(Adr *a, int rn)
1329 {
1330         a->sym = nil;
1331         a->node = nil;
1332         a->name = D_NONE;
1333         a->type = D_REG;
1334         a->reg = rn;
1335         if(rn >= NREG) {
1336                 a->type = D_FREG;
1337                 a->reg = rn-NREG;
1338         }
1339 }
1340
1341 /*
1342  *      bit     reg
1343  *      0       R0
1344  *      1       R1
1345  *      ...     ...
1346  *      10      R10
1347  *      12  R12
1348  */
1349 uint32
1350 RtoB(int r)
1351 {
1352         if(r >= REGTMP-2 && r != 12)    // excluded R9 and R10 for m and g, but not R12
1353                 return 0;
1354         return 1L << r;
1355 }
1356
1357 int
1358 BtoR(uint32 b)
1359 {
1360         // TODO Allow R0 and R1, but be careful with a 0 return
1361         // TODO Allow R9. Only R10 is reserved now (just g, not m).
1362         b &= 0x11fcL;   // excluded R9 and R10 for m and g, but not R12
1363         if(b == 0)
1364                 return 0;
1365         return bitno(b);
1366 }
1367
1368 /*
1369  *      bit     reg
1370  *      18      F2
1371  *      19      F3
1372  *      ...     ...
1373  *      31      F15
1374  */
1375 uint32
1376 FtoB(int f)
1377 {
1378
1379         if(f < 2 || f > NFREG-1)
1380                 return 0;
1381         return 1L << (f + 16);
1382 }
1383
1384 int
1385 BtoF(uint32 b)
1386 {
1387
1388         b &= 0xfffc0000L;
1389         if(b == 0)
1390                 return 0;
1391         return bitno(b) - 16;
1392 }
1393
1394 void
1395 dumpone(Flow *f, int isreg)
1396 {
1397         int z;
1398         Bits bit;
1399         Reg *r;
1400
1401         print("%d:%P", f->loop, f->prog);
1402         if(isreg) {
1403                 r = (Reg*)f;
1404                 for(z=0; z<BITS; z++)
1405                         bit.b[z] =
1406                                 r->set.b[z] |
1407                                 r->use1.b[z] |
1408                                 r->use2.b[z] |
1409                                 r->refbehind.b[z] |
1410                                 r->refahead.b[z] |
1411                                 r->calbehind.b[z] |
1412                                 r->calahead.b[z] |
1413                                 r->regdiff.b[z] |
1414                                 r->act.b[z] |
1415                                         0;
1416                 if(bany(&bit)) {
1417                         print("\t");
1418                         if(bany(&r->set))
1419                                 print(" s:%Q", r->set);
1420                         if(bany(&r->use1))
1421                                 print(" u1:%Q", r->use1);
1422                         if(bany(&r->use2))
1423                                 print(" u2:%Q", r->use2);
1424                         if(bany(&r->refbehind))
1425                                 print(" rb:%Q ", r->refbehind);
1426                         if(bany(&r->refahead))
1427                                 print(" ra:%Q ", r->refahead);
1428                         if(bany(&r->calbehind))
1429                                 print(" cb:%Q ", r->calbehind);
1430                         if(bany(&r->calahead))
1431                                 print(" ca:%Q ", r->calahead);
1432                         if(bany(&r->regdiff))
1433                                 print(" d:%Q ", r->regdiff);
1434                         if(bany(&r->act))
1435                                 print(" a:%Q ", r->act);
1436                 }
1437         }
1438         print("\n");
1439 }
1440
1441 void
1442 dumpit(char *str, Flow *r0, int isreg)
1443 {
1444         Flow *r, *r1;
1445
1446         print("\n%s\n", str);
1447         for(r = r0; r != nil; r = r->link) {
1448                 dumpone(r, isreg);
1449                 r1 = r->p2;
1450                 if(r1 != nil) {
1451                         print(" pred:");
1452                         for(; r1 != nil; r1 = r1->p2link)
1453                                 print(" %.4ud", (int)r1->prog->pc);
1454                         if(r->p1 != nil)
1455                                 print(" (and %.4ud)", (int)r->p1->prog->pc);
1456                         else
1457                                 print(" (only)");
1458                         print("\n");
1459                 }
1460                 // Print successors if it's not just the next one
1461                 if(r->s1 != r->link || r->s2 != nil) {
1462                         print(" succ:");
1463                         if(r->s1 != nil)
1464                                 print(" %.4ud", (int)r->s1->prog->pc);
1465                         if(r->s2 != nil)
1466                                 print(" %.4ud", (int)r->s2->prog->pc);
1467                         print("\n");
1468                 }
1469         }
1470 }