src/cmd/8g/reg.c

   1 // Derived from Inferno utils/6c/reg.c
   2 // http://code.google.com/p/inferno-os/source/browse/utils/6c/reg.c
   3 //
   4 //      Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
   5 //      Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
   6 //      Portions Copyright © 1997-1999 Vita Nuova Limited
   7 //      Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
   8 //      Portions Copyright © 2004,2006 Bruce Ellis
   9 //      Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
  10 //      Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
  11 //      Portions Copyright © 2009 The Go Authors.  All rights reserved.
  12 //
  13 // Permission is hereby granted, free of charge, to any person obtaining a copy
  14 // of this software and associated documentation files (the "Software"), to deal
  15 // in the Software without restriction, including without limitation the rights
  16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  17 // copies of the Software, and to permit persons to whom the Software is
  18 // furnished to do so, subject to the following conditions:
  19 //
  20 // The above copyright notice and this permission notice shall be included in
  21 // all copies or substantial portions of the Software.
  22 //
  23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
  26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  29 // THE SOFTWARE.
  30
  31 #include <u.h>
  32 #include <libc.h>
  33 #include "gg.h"
  34 #include "opt.h"
  35
  36 #define NREGVAR 16      /* 8 integer + 8 floating */
  37 #define REGBITS ((uint64)0xffffull)
  38 /*c2go enum {
  39         NREGVAR = 16,
  40         REGBITS = (1<<NREGVAR) - 1,
  41 };
  42 */
  43
  44 static  Reg*    firstr;
  45 static  int     first   = 1;
  46
  47 int
  48 rcmp(const void *a1, const void *a2)
  49 {
  50         Rgn *p1, *p2;
  51         int c1, c2;
  52
  53         p1 = (Rgn*)a1;
  54         p2 = (Rgn*)a2;
  55         c1 = p2->cost;
  56         c2 = p1->cost;
  57         if(c1 -= c2)
  58                 return c1;
  59         return p2->varno - p1->varno;
  60 }
  61
  62 static void
  63 setaddrs(Bits bit)
  64 {
  65         int i, n;
  66         Var *v;
  67         Node *node;
  68
  69         while(bany(&bit)) {
  70                 // convert each bit to a variable
  71                 i = bnum(bit);
  72                 node = var[i].node;
  73                 n = var[i].name;
  74                 biclr(&bit, i);
  75
  76                 // disable all pieces of that variable
  77                 for(i=0; i<nvar; i++) {
  78                         v = var+i;
  79                         if(v->node == node && v->name == n)
  80                                 v->addr = 2;
  81                 }
  82         }
  83 }
  84
  85 static char* regname[] = {
  86         ".ax", ".cx", ".dx", ".bx", ".sp", ".bp", ".si", ".di",
  87         ".x0", ".x1", ".x2", ".x3", ".x4", ".x5", ".x6", ".x7",
  88 };
  89
  90 static Node* regnodes[NREGVAR];
  91
  92 static void walkvardef(Node *n, Reg *r, int active);
  93
  94 void
  95 regopt(Prog *firstp)
  96 {
  97         Reg *r, *r1;
  98         Prog *p;
  99         Graph *g;
 100         ProgInfo info;
 101         int i, z, active;
 102         uint32 vreg;
 103         Bits bit;
 104
 105         if(first) {
 106                 fmtinstall('Q', Qconv);
 107                 exregoffset = D_DI;     // no externals
 108                 first = 0;
 109         }
 110
 111         mergetemp(firstp);
 112
 113         /*
 114          * control flow is more complicated in generated go code
 115          * than in generated c code.  define pseudo-variables for
 116          * registers, so we have complete register usage information.
 117          */
 118         nvar = NREGVAR;
 119         memset(var, 0, NREGVAR*sizeof var[0]);
 120         for(i=0; i<NREGVAR; i++) {
 121                 if(regnodes[i] == N)
 122                         regnodes[i] = newname(lookup(regname[i]));
 123                 var[i].node = regnodes[i];
 124         }
 125
 126         regbits = RtoB(D_SP);
 127         for(z=0; z<BITS; z++) {
 128                 externs.b[z] = 0;
 129                 params.b[z] = 0;
 130                 consts.b[z] = 0;
 131                 addrs.b[z] = 0;
 132                 ivar.b[z] = 0;
 133                 ovar.b[z] = 0;
 134         }
 135
 136         /*
 137          * pass 1
 138          * build aux data structure
 139          * allocate pcs
 140          * find use and set of variables
 141          */
 142         g = flowstart(firstp, sizeof(Reg));
 143         if(g == nil) {
 144                 for(i=0; i<nvar; i++)
 145                         var[i].node->opt = nil;
 146                 return;
 147         }
 148
 149         firstr = (Reg*)g->start;
 150
 151         for(r = firstr; r != R; r = (Reg*)r->f.link) {
 152                 p = r->f.prog;
 153                 if(p->as == AVARDEF || p->as == AVARKILL)
 154                         continue;
 155                 proginfo(&info, p);
 156
 157                 // Avoid making variables for direct-called functions.
 158                 if(p->as == ACALL && p->to.type == D_EXTERN)
 159                         continue;
 160
 161                 r->use1.b[0] |= info.reguse | info.regindex;
 162                 r->set.b[0] |= info.regset;
 163
 164                 bit = mkvar(r, &p->from);
 165                 if(bany(&bit)) {
 166                         if(info.flags & LeftAddr)
 167                                 setaddrs(bit);
 168                         if(info.flags & LeftRead)
 169                                 for(z=0; z<BITS; z++)
 170                                         r->use1.b[z] |= bit.b[z];
 171                         if(info.flags & LeftWrite)
 172                                 for(z=0; z<BITS; z++)
 173                                         r->set.b[z] |= bit.b[z];
 174                 }
 175
 176                 bit = mkvar(r, &p->to);
 177                 if(bany(&bit)) {
 178                         if(info.flags & RightAddr)
 179                                 setaddrs(bit);
 180                         if(info.flags & RightRead)
 181                                 for(z=0; z<BITS; z++)
 182                                         r->use2.b[z] |= bit.b[z];
 183                         if(info.flags & RightWrite)
 184                                 for(z=0; z<BITS; z++)
 185                                         r->set.b[z] |= bit.b[z];
 186                 }
 187         }
 188         if(firstr == R)
 189                 return;
 190
 191         for(i=0; i<nvar; i++) {
 192                 Var *v = var+i;
 193                 if(v->addr) {
 194                         bit = blsh(i);
 195                         for(z=0; z<BITS; z++)
 196                                 addrs.b[z] |= bit.b[z];
 197                 }
 198
 199                 if(debug['R'] && debug['v'])
 200                         print("bit=%2d addr=%d et=%-6E w=%-2d s=%N + %lld\n",
 201                                 i, v->addr, v->etype, v->width, v->node, v->offset);
 202         }
 203
 204         if(debug['R'] && debug['v'])
 205                 dumpit("pass1", &firstr->f, 1);
 206
 207         /*
 208          * pass 2
 209          * find looping structure
 210          */
 211         flowrpo(g);
 212
 213         if(debug['R'] && debug['v'])
 214                 dumpit("pass2", &firstr->f, 1);
 215
 216         /*
 217          * pass 2.5
 218          * iterate propagating fat vardef covering forward
 219          * r->act records vars with a VARDEF since the last CALL.
 220          * (r->act will be reused in pass 5 for something else,
 221          * but we'll be done with it by then.)
 222          */
 223         active = 0;
 224         for(r = firstr; r != R; r = (Reg*)r->f.link) {
 225                 r->f.active = 0;
 226                 r->act = zbits;
 227         }
 228         for(r = firstr; r != R; r = (Reg*)r->f.link) {
 229                 p = r->f.prog;
 230                 if(p->as == AVARDEF && isfat(p->to.node->type) && p->to.node->opt != nil) {
 231                         active++;
 232                         walkvardef(p->to.node, r, active);
 233                 }
 234         }
 235
 236         /*
 237          * pass 3
 238          * iterate propagating usage
 239          *      back until flow graph is complete
 240          */
 241 loop1:
 242         change = 0;
 243         for(r = firstr; r != R; r = (Reg*)r->f.link)
 244                 r->f.active = 0;
 245         for(r = firstr; r != R; r = (Reg*)r->f.link)
 246                 if(r->f.prog->as == ARET)
 247                         prop(r, zbits, zbits);
 248 loop11:
 249         /* pick up unreachable code */
 250         i = 0;
 251         for(r = firstr; r != R; r = r1) {
 252                 r1 = (Reg*)r->f.link;
 253                 if(r1 && r1->f.active && !r->f.active) {
 254                         prop(r, zbits, zbits);
 255                         i = 1;
 256                 }
 257         }
 258         if(i)
 259                 goto loop11;
 260         if(change)
 261                 goto loop1;
 262
 263         if(debug['R'] && debug['v'])
 264                 dumpit("pass3", &firstr->f, 1);
 265
 266         /*
 267          * pass 4
 268          * iterate propagating register/variable synchrony
 269          *      forward until graph is complete
 270          */
 271 loop2:
 272         change = 0;
 273         for(r = firstr; r != R; r = (Reg*)r->f.link)
 274                 r->f.active = 0;
 275         synch(firstr, zbits);
 276         if(change)
 277                 goto loop2;
 278
 279         if(debug['R'] && debug['v'])
 280                 dumpit("pass4", &firstr->f, 1);
 281
 282         /*
 283          * pass 4.5
 284          * move register pseudo-variables into regu.
 285          */
 286         for(r = firstr; r != R; r = (Reg*)r->f.link) {
 287                 r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS;
 288
 289                 r->set.b[0] &= ~REGBITS;
 290                 r->use1.b[0] &= ~REGBITS;
 291                 r->use2.b[0] &= ~REGBITS;
 292                 r->refbehind.b[0] &= ~REGBITS;
 293                 r->refahead.b[0] &= ~REGBITS;
 294                 r->calbehind.b[0] &= ~REGBITS;
 295                 r->calahead.b[0] &= ~REGBITS;
 296                 r->regdiff.b[0] &= ~REGBITS;
 297                 r->act.b[0] &= ~REGBITS;
 298         }
 299
 300         /*
 301          * pass 5
 302          * isolate regions
 303          * calculate costs (paint1)
 304          */
 305         r = firstr;
 306         if(r) {
 307                 for(z=0; z<BITS; z++)
 308                         bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
 309                           ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
 310                 if(bany(&bit) && !r->f.refset) {
 311                         // should never happen - all variables are preset
 312                         if(debug['w'])
 313                                 print("%L: used and not set: %Q\n", r->f.prog->lineno, bit);
 314                         r->f.refset = 1;
 315                 }
 316         }
 317         for(r = firstr; r != R; r = (Reg*)r->f.link)
 318                 r->act = zbits;
 319         rgp = region;
 320         nregion = 0;
 321         for(r = firstr; r != R; r = (Reg*)r->f.link) {
 322                 for(z=0; z<BITS; z++)
 323                         bit.b[z] = r->set.b[z] &
 324                           ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
 325                 if(bany(&bit) && !r->f.refset) {
 326                         if(debug['w'])
 327                                 print("%L: set and not used: %Q\n", r->f.prog->lineno, bit);
 328                         r->f.refset = 1;
 329                         excise(&r->f);
 330                 }
 331                 for(z=0; z<BITS; z++)
 332                         bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
 333                 while(bany(&bit)) {
 334                         i = bnum(bit);
 335                         rgp->enter = r;
 336                         rgp->varno = i;
 337                         change = 0;
 338                         paint1(r, i);
 339                         biclr(&bit, i);
 340                         if(change <= 0)
 341                                 continue;
 342                         rgp->cost = change;
 343                         nregion++;
 344                         if(nregion >= NRGN) {
 345                                 if(debug['R'] && debug['v'])
 346                                         print("too many regions\n");
 347                                 goto brk;
 348                         }
 349                         rgp++;
 350                 }
 351         }
 352 brk:
 353         qsort(region, nregion, sizeof(region[0]), rcmp);
 354
 355         /*
 356          * pass 6
 357          * determine used registers (paint2)
 358          * replace code (paint3)
 359          */
 360         rgp = region;
 361         if(debug['R'] && debug['v'])
 362                 print("\nregisterizing\n");
 363         for(i=0; i<nregion; i++) {
 364                 if(debug['R'] && debug['v'])
 365                         print("region %d: cost %d varno %d enter %d\n", i, rgp->cost, rgp->varno, rgp->enter->f.prog->pc);
 366                 bit = blsh(rgp->varno);
 367                 vreg = paint2(rgp->enter, rgp->varno, 0);
 368                 vreg = allreg(vreg, rgp);
 369                 if(rgp->regno != 0)
 370                         paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
 371                 rgp++;
 372         }
 373
 374         /*
 375          * free aux structures. peep allocates new ones.
 376          */
 377         for(i=0; i<nvar; i++)
 378                 var[i].node->opt = nil;
 379         flowend(g);
 380         firstr = R;
 381
 382         if(debug['R'] && debug['v']) {
 383                 // Rebuild flow graph, since we inserted instructions
 384                 g = flowstart(firstp, sizeof(Reg));
 385                 firstr = (Reg*)g->start;
 386                 dumpit("pass6", &firstr->f, 1);
 387                 flowend(g);
 388                 firstr = R;
 389         }
 390
 391         /*
 392          * pass 7
 393          * peep-hole on basic block
 394          */
 395         if(!debug['R'] || debug['P'])
 396                 peep(firstp);
 397
 398         /*
 399          * eliminate nops
 400          */
 401         for(p=firstp; p!=P; p=p->link) {
 402                 while(p->link != P && p->link->as == ANOP)
 403                         p->link = p->link->link;
 404                 if(p->to.type == D_BRANCH)
 405                         while(p->to.u.branch != P && p->to.u.branch->as == ANOP)
 406                                 p->to.u.branch = p->to.u.branch->link;
 407         }
 408
 409         if(!use_sse)
 410         for(p=firstp; p!=P; p=p->link) {
 411                 if(p->from.type >= D_X0 && p->from.type <= D_X7)
 412                         fatal("invalid use of %R with GO386=387: %P", p->from.type, p);
 413                 if(p->to.type >= D_X0 && p->to.type <= D_X7)
 414                         fatal("invalid use of %R with GO386=387: %P", p->to.type, p);
 415         }
 416
 417         if(debug['R']) {
 418                 if(ostats.ncvtreg ||
 419                    ostats.nspill ||
 420                    ostats.nreload ||
 421                    ostats.ndelmov ||
 422                    ostats.nvar ||
 423                    ostats.naddr ||
 424                    0)
 425                         print("\nstats\n");
 426
 427                 if(ostats.ncvtreg)
 428                         print(" %4d cvtreg\n", ostats.ncvtreg);
 429                 if(ostats.nspill)
 430                         print(" %4d spill\n", ostats.nspill);
 431                 if(ostats.nreload)
 432                         print(" %4d reload\n", ostats.nreload);
 433                 if(ostats.ndelmov)
 434                         print(" %4d delmov\n", ostats.ndelmov);
 435                 if(ostats.nvar)
 436                         print(" %4d var\n", ostats.nvar);
 437                 if(ostats.naddr)
 438                         print(" %4d addr\n", ostats.naddr);
 439
 440                 memset(&ostats, 0, sizeof(ostats));
 441         }
 442 }
 443
 444 static void
 445 walkvardef(Node *n, Reg *r, int active)
 446 {
 447         Reg *r1, *r2;
 448         int bn;
 449         Var *v;
 450
 451         for(r1=r; r1!=R; r1=(Reg*)r1->f.s1) {
 452                 if(r1->f.active == active)
 453                         break;
 454                 r1->f.active = active;
 455                 if(r1->f.prog->as == AVARKILL && r1->f.prog->to.node == n)
 456                         break;
 457                 for(v=n->opt; v!=nil; v=v->nextinnode) {
 458                         bn = v - var;
 459                         biset(&r1->act, bn);
 460                 }
 461                 if(r1->f.prog->as == ACALL)
 462                         break;
 463         }
 464
 465         for(r2=r; r2!=r1; r2=(Reg*)r2->f.s1)
 466                 if(r2->f.s2 != nil)
 467                         walkvardef(n, (Reg*)r2->f.s2, active);
 468 }
 469
 470 /*
 471  * add mov b,rn
 472  * just after r
 473  */
 474 void
 475 addmove(Reg *r, int bn, int rn, int f)
 476 {
 477         Prog *p, *p1;
 478         Adr *a;
 479         Var *v;
 480
 481         p1 = mal(sizeof(*p1));
 482         clearp(p1);
 483         p1->pc = 9999;
 484
 485         p = r->f.prog;
 486         p1->link = p->link;
 487         p->link = p1;
 488         p1->lineno = p->lineno;
 489
 490         v = var + bn;
 491
 492         a = &p1->to;
 493         a->offset = v->offset;
 494         a->etype = v->etype;
 495         a->type = v->name;
 496         a->node = v->node;
 497         a->sym = linksym(v->node->sym);
 498
 499         // need to clean this up with wptr and
 500         // some of the defaults
 501         p1->as = AMOVL;
 502         switch(v->etype) {
 503         default:
 504                 fatal("unknown type %E", v->etype);
 505         case TINT8:
 506         case TUINT8:
 507         case TBOOL:
 508                 p1->as = AMOVB;
 509                 break;
 510         case TINT16:
 511         case TUINT16:
 512                 p1->as = AMOVW;
 513                 break;
 514         case TFLOAT32:
 515                 p1->as = AMOVSS;
 516                 break;
 517         case TFLOAT64:
 518                 p1->as = AMOVSD;
 519                 break;
 520         case TINT:
 521         case TUINT:
 522         case TINT32:
 523         case TUINT32:
 524         case TPTR32:
 525                 break;
 526         }
 527
 528         p1->from.type = rn;
 529         if(!f) {
 530                 p1->from = *a;
 531                 *a = zprog.from;
 532                 a->type = rn;
 533                 if(v->etype == TUINT8)
 534                         p1->as = AMOVB;
 535                 if(v->etype == TUINT16)
 536                         p1->as = AMOVW;
 537         }
 538         if(debug['R'] && debug['v'])
 539                 print("%P ===add=== %P\n", p, p1);
 540         ostats.nspill++;
 541 }
 542
 543 uint32
 544 doregbits(int r)
 545 {
 546         uint32 b;
 547
 548         b = 0;
 549         if(r >= D_INDIR)
 550                 r -= D_INDIR;
 551         if(r >= D_AX && r <= D_DI)
 552                 b |= RtoB(r);
 553         else
 554         if(r >= D_AL && r <= D_BL)
 555                 b |= RtoB(r-D_AL+D_AX);
 556         else
 557         if(r >= D_AH && r <= D_BH)
 558                 b |= RtoB(r-D_AH+D_AX);
 559         else
 560         if(r >= D_X0 && r <= D_X0+7)
 561                 b |= FtoB(r);
 562         return b;
 563 }
 564
 565 static int
 566 overlap(int32 o1, int w1, int32 o2, int w2)
 567 {
 568         int32 t1, t2;
 569
 570         t1 = o1+w1;
 571         t2 = o2+w2;
 572
 573         if(!(t1 > o2 && t2 > o1))
 574                 return 0;
 575
 576         return 1;
 577 }
 578
 579 Bits
 580 mkvar(Reg *r, Adr *a)
 581 {
 582         Var *v;
 583         int i, t, n, et, z, w, flag, regu;
 584         int32 o;
 585         Bits bit;
 586         Node *node;
 587
 588         /*
 589          * mark registers used
 590          */
 591         t = a->type;
 592         if(t == D_NONE)
 593                 goto none;
 594
 595         if(r != R)
 596                 r->use1.b[0] |= doregbits(a->index);
 597
 598         switch(t) {
 599         default:
 600                 regu = doregbits(t);
 601                 if(regu == 0)
 602                         goto none;
 603                 bit = zbits;
 604                 bit.b[0] = regu;
 605                 return bit;
 606
 607         case D_ADDR:
 608                 a->type = a->index;
 609                 bit = mkvar(r, a);
 610                 setaddrs(bit);
 611                 a->type = t;
 612                 ostats.naddr++;
 613                 goto none;
 614
 615         case D_EXTERN:
 616         case D_STATIC:
 617         case D_PARAM:
 618         case D_AUTO:
 619                 n = t;
 620                 break;
 621         }
 622
 623         node = a->node;
 624         if(node == N || node->op != ONAME || node->orig == N)
 625                 goto none;
 626         node = node->orig;
 627         if(node->orig != node)
 628                 fatal("%D: bad node", a);
 629         if(node->sym == S || node->sym->name[0] == '.')
 630                 goto none;
 631         et = a->etype;
 632         o = a->offset;
 633         w = a->width;
 634         if(w < 0)
 635                 fatal("bad width %d for %D", w, a);
 636
 637         flag = 0;
 638         for(i=0; i<nvar; i++) {
 639                 v = var+i;
 640                 if(v->node == node && v->name == n) {
 641                         if(v->offset == o)
 642                         if(v->etype == et)
 643                         if(v->width == w)
 644                                 return blsh(i);
 645
 646                         // if they overlap, disable both
 647                         if(overlap(v->offset, v->width, o, w)) {
 648                                 if(debug['R'])
 649                                         print("disable %s\n", node->sym->name);
 650                                 v->addr = 1;
 651                                 flag = 1;
 652                         }
 653                 }
 654         }
 655
 656         switch(et) {
 657         case 0:
 658         case TFUNC:
 659                 goto none;
 660         }
 661
 662         if(nvar >= NVAR) {
 663                 if(debug['w'] > 1 && node != N)
 664                         fatal("variable not optimized: %D", a);
 665
 666                 // If we're not tracking a word in a variable, mark the rest as
 667                 // having its address taken, so that we keep the whole thing
 668                 // live at all calls. otherwise we might optimize away part of
 669                 // a variable but not all of it.
 670                 for(i=0; i<nvar; i++) {
 671                         v = var+i;
 672                         if(v->node == node)
 673                                 v->addr = 1;
 674                 }
 675                 goto none;
 676         }
 677
 678         i = nvar;
 679         nvar++;
 680         v = var+i;
 681         v->offset = o;
 682         v->name = n;
 683         v->etype = et;
 684         v->width = w;
 685         v->addr = flag;         // funny punning
 686         v->node = node;
 687
 688         // node->opt is the head of a linked list
 689         // of Vars within the given Node, so that
 690         // we can start at a Var and find all the other
 691         // Vars in the same Go variable.
 692         v->nextinnode = node->opt;
 693         node->opt = v;
 694
 695         bit = blsh(i);
 696         if(n == D_EXTERN || n == D_STATIC)
 697                 for(z=0; z<BITS; z++)
 698                         externs.b[z] |= bit.b[z];
 699         if(n == D_PARAM)
 700                 for(z=0; z<BITS; z++)
 701                         params.b[z] |= bit.b[z];
 702
 703         if(node->class == PPARAM)
 704                 for(z=0; z<BITS; z++)
 705                         ivar.b[z] |= bit.b[z];
 706         if(node->class == PPARAMOUT)
 707                 for(z=0; z<BITS; z++)
 708                         ovar.b[z] |= bit.b[z];
 709
 710         // Treat values with their address taken as live at calls,
 711         // because the garbage collector's liveness analysis in ../gc/plive.c does.
 712         // These must be consistent or else we will elide stores and the garbage
 713         // collector will see uninitialized data.
 714         // The typical case where our own analysis is out of sync is when the
 715         // node appears to have its address taken but that code doesn't actually
 716         // get generated and therefore doesn't show up as an address being
 717         // taken when we analyze the instruction stream.
 718         // One instance of this case is when a closure uses the same name as
 719         // an outer variable for one of its own variables declared with :=.
 720         // The parser flags the outer variable as possibly shared, and therefore
 721         // sets addrtaken, even though it ends up not being actually shared.
 722         // If we were better about _ elision, _ = &x would suffice too.
 723         // The broader := in a closure problem is mentioned in a comment in
 724         // closure.c:/^typecheckclosure and dcl.c:/^oldname.
 725         if(node->addrtaken)
 726                 v->addr = 1;
 727
 728         // Disable registerization for globals, because:
 729         // (1) we might panic at any time and we want the recovery code
 730         // to see the latest values (issue 1304).
 731         // (2) we don't know what pointers might point at them and we want
 732         // loads via those pointers to see updated values and vice versa (issue 7995).
 733         //
 734         // Disable registerization for results if using defer, because the deferred func
 735         // might recover and return, causing the current values to be used.
 736         if(node->class == PEXTERN || (hasdefer && node->class == PPARAMOUT))
 737                 v->addr = 1;
 738
 739         if(debug['R'])
 740                 print("bit=%2d et=%2E w=%d+%d %#N %D flag=%d\n", i, et, o, w, node, a, v->addr);
 741         ostats.nvar++;
 742
 743         return bit;
 744
 745 none:
 746         return zbits;
 747 }
 748
 749 void
 750 prop(Reg *r, Bits ref, Bits cal)
 751 {
 752         Reg *r1, *r2;
 753         int z, i, j;
 754         Var *v, *v1;
 755
 756         for(r1 = r; r1 != R; r1 = (Reg*)r1->f.p1) {
 757                 for(z=0; z<BITS; z++) {
 758                         ref.b[z] |= r1->refahead.b[z];
 759                         if(ref.b[z] != r1->refahead.b[z]) {
 760                                 r1->refahead.b[z] = ref.b[z];
 761                                 change++;
 762                         }
 763                         cal.b[z] |= r1->calahead.b[z];
 764                         if(cal.b[z] != r1->calahead.b[z]) {
 765                                 r1->calahead.b[z] = cal.b[z];
 766                                 change++;
 767                         }
 768                 }
 769                 switch(r1->f.prog->as) {
 770                 case ACALL:
 771                         if(noreturn(r1->f.prog))
 772                                 break;
 773
 774                         // Mark all input variables (ivar) as used, because that's what the
 775                         // liveness bitmaps say. The liveness bitmaps say that so that a
 776                         // panic will not show stale values in the parameter dump.
 777                         // Mark variables with a recent VARDEF (r1->act) as used,
 778                         // so that the optimizer flushes initializations to memory,
 779                         // so that if a garbage collection happens during this CALL,
 780                         // the collector will see initialized memory. Again this is to
 781                         // match what the liveness bitmaps say.
 782                         for(z=0; z<BITS; z++) {
 783                                 cal.b[z] |= ref.b[z] | externs.b[z] | ivar.b[z] | r1->act.b[z];
 784                                 ref.b[z] = 0;
 785                         }
 786
 787                         // cal.b is the current approximation of what's live across the call.
 788                         // Every bit in cal.b is a single stack word. For each such word,
 789                         // find all the other tracked stack words in the same Go variable
 790                         // (struct/slice/string/interface) and mark them live too.
 791                         // This is necessary because the liveness analysis for the garbage
 792                         // collector works at variable granularity, not at word granularity.
 793                         // It is fundamental for slice/string/interface: the garbage collector
 794                         // needs the whole value, not just some of the words, in order to
 795                         // interpret the other bits correctly. Specifically, slice needs a consistent
 796                         // ptr and cap, string needs a consistent ptr and len, and interface
 797                         // needs a consistent type word and data word.
 798                         for(z=0; z<BITS; z++) {
 799                                 if(cal.b[z] == 0)
 800                                         continue;
 801                                 for(i=0; i<64; i++) {
 802                                         if(z*64+i >= nvar || ((cal.b[z]>>i)&1) == 0)
 803                                                 continue;
 804                                         v = var+z*64+i;
 805                                         if(v->node->opt == nil) // v represents fixed register, not Go variable
 806                                                 continue;
 807
 808                                         // v->node->opt is the head of a linked list of Vars
 809                                         // corresponding to tracked words from the Go variable v->node.
 810                                         // Walk the list and set all the bits.
 811                                         // For a large struct this could end up being quadratic:
 812                                         // after the first setting, the outer loop (for z, i) would see a 1 bit
 813                                         // for all of the remaining words in the struct, and for each such
 814                                         // word would go through and turn on all the bits again.
 815                                         // To avoid the quadratic behavior, we only turn on the bits if
 816                                         // v is the head of the list or if the head's bit is not yet turned on.
 817                                         // This will set the bits at most twice, keeping the overall loop linear.
 818                                         v1 = v->node->opt;
 819                                         j = v1 - var;
 820                                         if(v == v1 || !btest(&cal, j)) {
 821                                                 for(; v1 != nil; v1 = v1->nextinnode) {
 822                                                         j = v1 - var;
 823                                                         biset(&cal, j);
 824                                                 }
 825                                         }
 826                                 }
 827                         }
 828                         break;
 829
 830                 case ATEXT:
 831                         for(z=0; z<BITS; z++) {
 832                                 cal.b[z] = 0;
 833                                 ref.b[z] = 0;
 834                         }
 835                         break;
 836
 837                 case ARET:
 838                         for(z=0; z<BITS; z++) {
 839                                 cal.b[z] = externs.b[z] | ovar.b[z];
 840                                 ref.b[z] = 0;
 841                         }
 842                         break;
 843                 }
 844                 for(z=0; z<BITS; z++) {
 845                         ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
 846                                 r1->use1.b[z] | r1->use2.b[z];
 847                         cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
 848                         r1->refbehind.b[z] = ref.b[z];
 849                         r1->calbehind.b[z] = cal.b[z];
 850                 }
 851                 if(r1->f.active)
 852                         break;
 853                 r1->f.active = 1;
 854         }
 855         for(; r != r1; r = (Reg*)r->f.p1)
 856                 for(r2 = (Reg*)r->f.p2; r2 != R; r2 = (Reg*)r2->f.p2link)
 857                         prop(r2, r->refbehind, r->calbehind);
 858 }
 859
 860 void
 861 synch(Reg *r, Bits dif)
 862 {
 863         Reg *r1;
 864         int z;
 865
 866         for(r1 = r; r1 != R; r1 = (Reg*)r1->f.s1) {
 867                 for(z=0; z<BITS; z++) {
 868                         dif.b[z] = (dif.b[z] &
 869                                 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
 870                                         r1->set.b[z] | r1->regdiff.b[z];
 871                         if(dif.b[z] != r1->regdiff.b[z]) {
 872                                 r1->regdiff.b[z] = dif.b[z];
 873                                 change++;
 874                         }
 875                 }
 876                 if(r1->f.active)
 877                         break;
 878                 r1->f.active = 1;
 879                 for(z=0; z<BITS; z++)
 880                         dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
 881                 if((Reg*)r1->f.s2 != R)
 882                         synch((Reg*)r1->f.s2, dif);
 883         }
 884 }
 885
 886 uint32
 887 allreg(uint32 b, Rgn *r)
 888 {
 889         Var *v;
 890         int i;
 891
 892         v = var + r->varno;
 893         r->regno = 0;
 894         switch(v->etype) {
 895
 896         default:
 897                 fatal("unknown etype %d/%E", bitno(b), v->etype);
 898                 break;
 899
 900         case TINT8:
 901         case TUINT8:
 902         case TINT16:
 903         case TUINT16:
 904         case TINT32:
 905         case TUINT32:
 906         case TINT64:
 907         case TINT:
 908         case TUINT:
 909         case TUINTPTR:
 910         case TBOOL:
 911         case TPTR32:
 912                 i = BtoR(~b);
 913                 if(i && r->cost > 0) {
 914                         r->regno = i;
 915                         return RtoB(i);
 916                 }
 917                 break;
 918
 919         case TFLOAT32:
 920         case TFLOAT64:
 921                 if(!use_sse)
 922                         break;
 923                 i = BtoF(~b);
 924                 if(i && r->cost > 0) {
 925                         r->regno = i;
 926                         return FtoB(i);
 927                 }
 928                 break;
 929         }
 930         return 0;
 931 }
 932
 933 void
 934 paint1(Reg *r, int bn)
 935 {
 936         Reg *r1;
 937         Prog *p;
 938         int z;
 939         uint64 bb, rbz;
 940
 941         z = bn/64;
 942         bb = 1LL<<(bn%64);
 943         if(r->act.b[z] & bb)
 944                 return;
 945         for(;;) {
 946                 if(!(r->refbehind.b[z] & bb))
 947                         break;
 948                 r1 = (Reg*)r->f.p1;
 949                 if(r1 == R)
 950                         break;
 951                 if(!(r1->refahead.b[z] & bb))
 952                         break;
 953                 if(r1->act.b[z] & bb)
 954                         break;
 955                 r = r1;
 956         }
 957
 958         rbz = ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z]));
 959         if(LOAD(r) & rbz & bb) {
 960                 change -= CLOAD * r->f.loop;
 961         }
 962         for(;;) {
 963                 r->act.b[z] |= bb;
 964                 p = r->f.prog;
 965
 966                 if(r->f.prog->as != ANOP) { // don't give credit for NOPs
 967                         if(r->use1.b[z] & bb) {
 968                                 change += CREF * r->f.loop;
 969                                 if(p->as == AFMOVL || p->as == AFMOVW)
 970                                         if(BtoR(bb) != D_F0)
 971                                                 change = -CINF;
 972                         }
 973                         if((r->use2.b[z]|r->set.b[z]) & bb) {
 974                                 change += CREF * r->f.loop;
 975                                 if(p->as == AFMOVL || p->as == AFMOVW)
 976                                         if(BtoR(bb) != D_F0)
 977                                                 change = -CINF;
 978                         }
 979                 }
 980
 981                 if(STORE(r) & r->regdiff.b[z] & bb) {
 982                         change -= CLOAD * r->f.loop;
 983                         if(p->as == AFMOVL || p->as == AFMOVW)
 984                                 if(BtoR(bb) != D_F0)
 985                                         change = -CINF;
 986                 }
 987
 988                 if(r->refbehind.b[z] & bb)
 989                         for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
 990                                 if(r1->refahead.b[z] & bb)
 991                                         paint1(r1, bn);
 992
 993                 if(!(r->refahead.b[z] & bb))
 994                         break;
 995                 r1 = (Reg*)r->f.s2;
 996                 if(r1 != R)
 997                         if(r1->refbehind.b[z] & bb)
 998                                 paint1(r1, bn);
 999                 r = (Reg*)r->f.s1;
1000                 if(r == R)
1001                         break;
1002                 if(r->act.b[z] & bb)
1003                         break;
1004                 if(!(r->refbehind.b[z] & bb))
1005                         break;
1006         }
1007 }
1008
1009 uint32
1010 paint2(Reg *r, int bn, int depth)
1011 {
1012         Reg *r1;
1013         int z;
1014         uint64 bb, vreg;
1015
1016         z = bn/64;
1017         bb = 1LL << (bn%64);
1018         vreg = regbits;
1019         if(!(r->act.b[z] & bb))
1020                 return vreg;
1021         for(;;) {
1022                 if(!(r->refbehind.b[z] & bb))
1023                         break;
1024                 r1 = (Reg*)r->f.p1;
1025                 if(r1 == R)
1026                         break;
1027                 if(!(r1->refahead.b[z] & bb))
1028                         break;
1029                 if(!(r1->act.b[z] & bb))
1030                         break;
1031                 r = r1;
1032         }
1033         for(;;) {
1034                 if(debug['R'] && debug['v'])
1035                         print("  paint2 %d %P\n", depth, r->f.prog);
1036
1037                 r->act.b[z] &= ~bb;
1038
1039                 vreg |= r->regu;
1040
1041                 if(r->refbehind.b[z] & bb)
1042                         for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
1043                                 if(r1->refahead.b[z] & bb)
1044                                         vreg |= paint2(r1, bn, depth+1);
1045
1046                 if(!(r->refahead.b[z] & bb))
1047                         break;
1048                 r1 = (Reg*)r->f.s2;
1049                 if(r1 != R)
1050                         if(r1->refbehind.b[z] & bb)
1051                                 vreg |= paint2(r1, bn, depth+1);
1052                 r = (Reg*)r->f.s1;
1053                 if(r == R)
1054                         break;
1055                 if(!(r->act.b[z] & bb))
1056                         break;
1057                 if(!(r->refbehind.b[z] & bb))
1058                         break;
1059         }
1060
1061         return vreg;
1062 }
1063
1064 void
1065 paint3(Reg *r, int bn, uint32 rb, int rn)
1066 {
1067         Reg *r1;
1068         Prog *p;
1069         int z;
1070         uint64 bb, rbz;
1071
1072         z = bn/64;
1073         bb = 1LL << (bn%64);
1074         if(r->act.b[z] & bb)
1075                 return;
1076         for(;;) {
1077                 if(!(r->refbehind.b[z] & bb))
1078                         break;
1079                 r1 = (Reg*)r->f.p1;
1080                 if(r1 == R)
1081                         break;
1082                 if(!(r1->refahead.b[z] & bb))
1083                         break;
1084                 if(r1->act.b[z] & bb)
1085                         break;
1086                 r = r1;
1087         }
1088
1089         rbz = ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z]));
1090         if(LOAD(r) & rbz & bb)
1091                 addmove(r, bn, rn, 0);
1092         for(;;) {
1093                 r->act.b[z] |= bb;
1094                 p = r->f.prog;
1095
1096                 if(r->use1.b[z] & bb) {
1097                         if(debug['R'] && debug['v'])
1098                                 print("%P", p);
1099                         addreg(&p->from, rn);
1100                         if(debug['R'] && debug['v'])
1101                                 print(" ===change== %P\n", p);
1102                 }
1103                 if((r->use2.b[z]|r->set.b[z]) & bb) {
1104                         if(debug['R'] && debug['v'])
1105                                 print("%P", p);
1106                         addreg(&p->to, rn);
1107                         if(debug['R'] && debug['v'])
1108                                 print(" ===change== %P\n", p);
1109                 }
1110
1111                 if(STORE(r) & r->regdiff.b[z] & bb)
1112                         addmove(r, bn, rn, 1);
1113                 r->regu |= rb;
1114
1115                 if(r->refbehind.b[z] & bb)
1116                         for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
1117                                 if(r1->refahead.b[z] & bb)
1118                                         paint3(r1, bn, rb, rn);
1119
1120                 if(!(r->refahead.b[z] & bb))
1121                         break;
1122                 r1 = (Reg*)r->f.s2;
1123                 if(r1 != R)
1124                         if(r1->refbehind.b[z] & bb)
1125                                 paint3(r1, bn, rb, rn);
1126                 r = (Reg*)r->f.s1;
1127                 if(r == R)
1128                         break;
1129                 if(r->act.b[z] & bb)
1130                         break;
1131                 if(!(r->refbehind.b[z] & bb))
1132                         break;
1133         }
1134 }
1135
1136 void
1137 addreg(Adr *a, int rn)
1138 {
1139         a->sym = nil;
1140         a->node = nil;
1141         a->offset = 0;
1142         a->type = rn;
1143
1144         ostats.ncvtreg++;
1145 }
1146
1147 uint32
1148 RtoB(int r)
1149 {
1150
1151         if(r < D_AX || r > D_DI)
1152                 return 0;
1153         return 1L << (r-D_AX);
1154 }
1155
1156 int
1157 BtoR(uint32 b)
1158 {
1159
1160         b &= 0xffL;
1161         if(b == 0)
1162                 return 0;
1163         return bitno(b) + D_AX;
1164 }
1165
1166 uint32
1167 FtoB(int f)
1168 {
1169         if(f < D_X0 || f > D_X7)
1170                 return 0;
1171         return 1L << (f - D_X0 + 8);
1172 }
1173
1174 int
1175 BtoF(uint32 b)
1176 {
1177         b &= 0xFF00L;
1178         if(b == 0)
1179                 return 0;
1180         return bitno(b) - 8 + D_X0;
1181 }
1182
1183 void
1184 dumpone(Flow *f, int isreg)
1185 {
1186         int z;
1187         Bits bit;
1188         Reg *r;
1189
1190         print("%d:%P", f->loop, f->prog);
1191         if(isreg) {
1192                 r = (Reg*)f;
1193                 for(z=0; z<BITS; z++)
1194                         bit.b[z] =
1195                                 r->set.b[z] |
1196                                 r->use1.b[z] |
1197                                 r->use2.b[z] |
1198                                 r->refbehind.b[z] |
1199                                 r->refahead.b[z] |
1200                                 r->calbehind.b[z] |
1201                                 r->calahead.b[z] |
1202                                 r->regdiff.b[z] |
1203                                 r->act.b[z] |
1204                                         0;
1205                 if(bany(&bit)) {
1206                         print("\t");
1207                         if(bany(&r->set))
1208                                 print(" s:%Q", r->set);
1209                         if(bany(&r->use1))
1210                                 print(" u1:%Q", r->use1);
1211                         if(bany(&r->use2))
1212                                 print(" u2:%Q", r->use2);
1213                         if(bany(&r->refbehind))
1214                                 print(" rb:%Q ", r->refbehind);
1215                         if(bany(&r->refahead))
1216                                 print(" ra:%Q ", r->refahead);
1217                         if(bany(&r->calbehind))
1218                                 print(" cb:%Q ", r->calbehind);
1219                         if(bany(&r->calahead))
1220                                 print(" ca:%Q ", r->calahead);
1221                         if(bany(&r->regdiff))
1222                                 print(" d:%Q ", r->regdiff);
1223                         if(bany(&r->act))
1224                                 print(" a:%Q ", r->act);
1225                 }
1226         }
1227         print("\n");
1228 }
1229
1230 void
1231 dumpit(char *str, Flow *r0, int isreg)
1232 {
1233         Flow *r, *r1;
1234
1235         print("\n%s\n", str);
1236         for(r = r0; r != nil; r = r->link) {
1237                 dumpone(r, isreg);
1238                 r1 = r->p2;
1239                 if(r1 != nil) {
1240                         print(" pred:");
1241                         for(; r1 != nil; r1 = r1->p2link)
1242                                 print(" %.4ud", (int)r1->prog->pc);
1243                         print("\n");
1244                 }
1245                 // Print successors if it's not just the next one
1246                 if(r->s1 != r->link || r->s2 != nil) {
1247                         print(" succ:");
1248                         if(r->s1 != nil)
1249                                 print(" %.4ud", (int)r->s1->prog->pc);
1250                         if(r->s2 != nil)
1251                                 print(" %.4ud", (int)r->s2->prog->pc);
1252                         print("\n");
1253                 }
1254         }
1255 }