]> Cypherpunks.ru repositories - gostls13.git/blob - src/cmd/8g/reg.c
[dev.cc] all: merge dev.power64 (f57928630b36) into dev.cc
[gostls13.git] / src / cmd / 8g / reg.c
1 // Derived from Inferno utils/6c/reg.c
2 // http://code.google.com/p/inferno-os/source/browse/utils/6c/reg.c
3 //
4 //      Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
5 //      Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
6 //      Portions Copyright © 1997-1999 Vita Nuova Limited
7 //      Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
8 //      Portions Copyright © 2004,2006 Bruce Ellis
9 //      Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
10 //      Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
11 //      Portions Copyright © 2009 The Go Authors.  All rights reserved.
12 //
13 // Permission is hereby granted, free of charge, to any person obtaining a copy
14 // of this software and associated documentation files (the "Software"), to deal
15 // in the Software without restriction, including without limitation the rights
16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17 // copies of the Software, and to permit persons to whom the Software is
18 // furnished to do so, subject to the following conditions:
19 //
20 // The above copyright notice and this permission notice shall be included in
21 // all copies or substantial portions of the Software.
22 //
23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
29 // THE SOFTWARE.
30
31 #include <u.h>
32 #include <libc.h>
33 #include "gg.h"
34 #include "opt.h"
35
36 #define NREGVAR 16      /* 8 integer + 8 floating */
37 #define REGBITS ((uint64)0xffffull)
38 /*c2go enum {
39         NREGVAR = 16,
40         REGBITS = (1<<NREGVAR) - 1,
41 };
42 */
43
44 static  Reg*    firstr;
45 static  int     first   = 1;
46
47 int
48 rcmp(const void *a1, const void *a2)
49 {
50         Rgn *p1, *p2;
51         int c1, c2;
52
53         p1 = (Rgn*)a1;
54         p2 = (Rgn*)a2;
55         c1 = p2->cost;
56         c2 = p1->cost;
57         if(c1 -= c2)
58                 return c1;
59         return p2->varno - p1->varno;
60 }
61
62 static void
63 setaddrs(Bits bit)
64 {
65         int i, n;
66         Var *v;
67         Node *node;
68
69         while(bany(&bit)) {
70                 // convert each bit to a variable
71                 i = bnum(bit);
72                 node = var[i].node;
73                 n = var[i].name;
74                 biclr(&bit, i);
75
76                 // disable all pieces of that variable
77                 for(i=0; i<nvar; i++) {
78                         v = var+i;
79                         if(v->node == node && v->name == n)
80                                 v->addr = 2;
81                 }
82         }
83 }
84
85 static char* regname[] = {
86         ".ax", ".cx", ".dx", ".bx", ".sp", ".bp", ".si", ".di",
87         ".x0", ".x1", ".x2", ".x3", ".x4", ".x5", ".x6", ".x7",
88 };
89
90 static Node* regnodes[NREGVAR];
91
92 static void walkvardef(Node *n, Reg *r, int active);
93
94 void
95 regopt(Prog *firstp)
96 {
97         Reg *r, *r1;
98         Prog *p;
99         Graph *g;
100         ProgInfo info;
101         int i, z, active;
102         uint32 vreg;
103         Bits bit;
104
105         if(first) {
106                 fmtinstall('Q', Qconv);
107                 exregoffset = D_DI;     // no externals
108                 first = 0;
109         }
110
111         mergetemp(firstp);
112
113         /*
114          * control flow is more complicated in generated go code
115          * than in generated c code.  define pseudo-variables for
116          * registers, so we have complete register usage information.
117          */
118         nvar = NREGVAR;
119         memset(var, 0, NREGVAR*sizeof var[0]);
120         for(i=0; i<NREGVAR; i++) {
121                 if(regnodes[i] == N)
122                         regnodes[i] = newname(lookup(regname[i]));
123                 var[i].node = regnodes[i];
124         }
125
126         regbits = RtoB(D_SP);
127         for(z=0; z<BITS; z++) {
128                 externs.b[z] = 0;
129                 params.b[z] = 0;
130                 consts.b[z] = 0;
131                 addrs.b[z] = 0;
132                 ivar.b[z] = 0;
133                 ovar.b[z] = 0;
134         }
135
136         /*
137          * pass 1
138          * build aux data structure
139          * allocate pcs
140          * find use and set of variables
141          */
142         g = flowstart(firstp, sizeof(Reg));
143         if(g == nil) {
144                 for(i=0; i<nvar; i++)
145                         var[i].node->opt = nil;
146                 return;
147         }
148
149         firstr = (Reg*)g->start;
150
151         for(r = firstr; r != R; r = (Reg*)r->f.link) {
152                 p = r->f.prog;
153                 if(p->as == AVARDEF || p->as == AVARKILL)
154                         continue;
155                 proginfo(&info, p);
156
157                 // Avoid making variables for direct-called functions.
158                 if(p->as == ACALL && p->to.type == D_EXTERN)
159                         continue;
160
161                 r->use1.b[0] |= info.reguse | info.regindex;
162                 r->set.b[0] |= info.regset;
163
164                 bit = mkvar(r, &p->from);
165                 if(bany(&bit)) {
166                         if(info.flags & LeftAddr)
167                                 setaddrs(bit);
168                         if(info.flags & LeftRead)
169                                 for(z=0; z<BITS; z++)
170                                         r->use1.b[z] |= bit.b[z];
171                         if(info.flags & LeftWrite)
172                                 for(z=0; z<BITS; z++)
173                                         r->set.b[z] |= bit.b[z];
174                 }
175
176                 bit = mkvar(r, &p->to);
177                 if(bany(&bit)) {        
178                         if(info.flags & RightAddr)
179                                 setaddrs(bit);
180                         if(info.flags & RightRead)
181                                 for(z=0; z<BITS; z++)
182                                         r->use2.b[z] |= bit.b[z];
183                         if(info.flags & RightWrite)
184                                 for(z=0; z<BITS; z++)
185                                         r->set.b[z] |= bit.b[z];
186                 }
187         }
188         if(firstr == R)
189                 return;
190
191         for(i=0; i<nvar; i++) {
192                 Var *v = var+i;
193                 if(v->addr) {
194                         bit = blsh(i);
195                         for(z=0; z<BITS; z++)
196                                 addrs.b[z] |= bit.b[z];
197                 }
198
199                 if(debug['R'] && debug['v'])
200                         print("bit=%2d addr=%d et=%-6E w=%-2d s=%N + %lld\n",
201                                 i, v->addr, v->etype, v->width, v->node, v->offset);
202         }
203
204         if(debug['R'] && debug['v'])
205                 dumpit("pass1", &firstr->f, 1);
206
207         /*
208          * pass 2
209          * find looping structure
210          */
211         flowrpo(g);
212
213         if(debug['R'] && debug['v'])
214                 dumpit("pass2", &firstr->f, 1);
215
216         /*
217          * pass 2.5
218          * iterate propagating fat vardef covering forward
219          * r->act records vars with a VARDEF since the last CALL.
220          * (r->act will be reused in pass 5 for something else,
221          * but we'll be done with it by then.)
222          */
223         active = 0;
224         for(r = firstr; r != R; r = (Reg*)r->f.link) {
225                 r->f.active = 0;
226                 r->act = zbits;
227         }
228         for(r = firstr; r != R; r = (Reg*)r->f.link) {
229                 p = r->f.prog;
230                 if(p->as == AVARDEF && isfat(p->to.node->type) && p->to.node->opt != nil) {
231                         active++;
232                         walkvardef(p->to.node, r, active);
233                 }
234         }
235
236         /*
237          * pass 3
238          * iterate propagating usage
239          *      back until flow graph is complete
240          */
241 loop1:
242         change = 0;
243         for(r = firstr; r != R; r = (Reg*)r->f.link)
244                 r->f.active = 0;
245         for(r = firstr; r != R; r = (Reg*)r->f.link)
246                 if(r->f.prog->as == ARET)
247                         prop(r, zbits, zbits);
248 loop11:
249         /* pick up unreachable code */
250         i = 0;
251         for(r = firstr; r != R; r = r1) {
252                 r1 = (Reg*)r->f.link;
253                 if(r1 && r1->f.active && !r->f.active) {
254                         prop(r, zbits, zbits);
255                         i = 1;
256                 }
257         }
258         if(i)
259                 goto loop11;
260         if(change)
261                 goto loop1;
262
263         if(debug['R'] && debug['v'])
264                 dumpit("pass3", &firstr->f, 1);
265
266         /*
267          * pass 4
268          * iterate propagating register/variable synchrony
269          *      forward until graph is complete
270          */
271 loop2:
272         change = 0;
273         for(r = firstr; r != R; r = (Reg*)r->f.link)
274                 r->f.active = 0;
275         synch(firstr, zbits);
276         if(change)
277                 goto loop2;
278
279         if(debug['R'] && debug['v'])
280                 dumpit("pass4", &firstr->f, 1);
281
282         /*
283          * pass 4.5
284          * move register pseudo-variables into regu.
285          */
286         for(r = firstr; r != R; r = (Reg*)r->f.link) {
287                 r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS;
288
289                 r->set.b[0] &= ~REGBITS;
290                 r->use1.b[0] &= ~REGBITS;
291                 r->use2.b[0] &= ~REGBITS;
292                 r->refbehind.b[0] &= ~REGBITS;
293                 r->refahead.b[0] &= ~REGBITS;
294                 r->calbehind.b[0] &= ~REGBITS;
295                 r->calahead.b[0] &= ~REGBITS;
296                 r->regdiff.b[0] &= ~REGBITS;
297                 r->act.b[0] &= ~REGBITS;
298         }
299
300         /*
301          * pass 5
302          * isolate regions
303          * calculate costs (paint1)
304          */
305         r = firstr;
306         if(r) {
307                 for(z=0; z<BITS; z++)
308                         bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
309                           ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
310                 if(bany(&bit) && !r->f.refset) {
311                         // should never happen - all variables are preset
312                         if(debug['w'])
313                                 print("%L: used and not set: %Q\n", r->f.prog->lineno, bit);
314                         r->f.refset = 1;
315                 }
316         }
317         for(r = firstr; r != R; r = (Reg*)r->f.link)
318                 r->act = zbits;
319         rgp = region;
320         nregion = 0;
321         for(r = firstr; r != R; r = (Reg*)r->f.link) {
322                 for(z=0; z<BITS; z++)
323                         bit.b[z] = r->set.b[z] &
324                           ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
325                 if(bany(&bit) && !r->f.refset) {
326                         if(debug['w'])
327                                 print("%L: set and not used: %Q\n", r->f.prog->lineno, bit);
328                         r->f.refset = 1;
329                         excise(&r->f);
330                 }
331                 for(z=0; z<BITS; z++)
332                         bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
333                 while(bany(&bit)) {
334                         i = bnum(bit);
335                         rgp->enter = r;
336                         rgp->varno = i;
337                         change = 0;
338                         paint1(r, i);
339                         biclr(&bit, i);
340                         if(change <= 0)
341                                 continue;
342                         rgp->cost = change;
343                         nregion++;
344                         if(nregion >= NRGN) {
345                                 if(debug['R'] && debug['v'])
346                                         print("too many regions\n");
347                                 goto brk;
348                         }
349                         rgp++;
350                 }
351         }
352 brk:
353         qsort(region, nregion, sizeof(region[0]), rcmp);
354
355         /*
356          * pass 6
357          * determine used registers (paint2)
358          * replace code (paint3)
359          */
360         rgp = region;
361         if(debug['R'] && debug['v'])
362                 print("\nregisterizing\n");
363         for(i=0; i<nregion; i++) {
364                 if(debug['R'] && debug['v'])
365                         print("region %d: cost %d varno %d enter %d\n", i, rgp->cost, rgp->varno, rgp->enter->f.prog->pc);
366                 bit = blsh(rgp->varno);
367                 vreg = paint2(rgp->enter, rgp->varno, 0);
368                 vreg = allreg(vreg, rgp);
369                 if(rgp->regno != 0)
370                         paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
371                 rgp++;
372         }
373
374         /*
375          * free aux structures. peep allocates new ones.
376          */
377         for(i=0; i<nvar; i++)
378                 var[i].node->opt = nil;
379         flowend(g);
380         firstr = R;
381
382         if(debug['R'] && debug['v']) {
383                 // Rebuild flow graph, since we inserted instructions
384                 g = flowstart(firstp, sizeof(Reg));
385                 firstr = (Reg*)g->start;
386                 dumpit("pass6", &firstr->f, 1);
387                 flowend(g);
388                 firstr = R;
389         }
390
391         /*
392          * pass 7
393          * peep-hole on basic block
394          */
395         if(!debug['R'] || debug['P'])
396                 peep(firstp);
397
398         /*
399          * eliminate nops
400          */
401         for(p=firstp; p!=P; p=p->link) {
402                 while(p->link != P && p->link->as == ANOP)
403                         p->link = p->link->link;
404                 if(p->to.type == D_BRANCH)
405                         while(p->to.u.branch != P && p->to.u.branch->as == ANOP)
406                                 p->to.u.branch = p->to.u.branch->link;
407         }
408
409         if(!use_sse)
410         for(p=firstp; p!=P; p=p->link) {
411                 if(p->from.type >= D_X0 && p->from.type <= D_X7)
412                         fatal("invalid use of %R with GO386=387: %P", p->from.type, p);
413                 if(p->to.type >= D_X0 && p->to.type <= D_X7)
414                         fatal("invalid use of %R with GO386=387: %P", p->to.type, p);
415         }
416
417         if(debug['R']) {
418                 if(ostats.ncvtreg ||
419                    ostats.nspill ||
420                    ostats.nreload ||
421                    ostats.ndelmov ||
422                    ostats.nvar ||
423                    ostats.naddr ||
424                    0)
425                         print("\nstats\n");
426
427                 if(ostats.ncvtreg)
428                         print(" %4d cvtreg\n", ostats.ncvtreg);
429                 if(ostats.nspill)
430                         print(" %4d spill\n", ostats.nspill);
431                 if(ostats.nreload)
432                         print(" %4d reload\n", ostats.nreload);
433                 if(ostats.ndelmov)
434                         print(" %4d delmov\n", ostats.ndelmov);
435                 if(ostats.nvar)
436                         print(" %4d var\n", ostats.nvar);
437                 if(ostats.naddr)
438                         print(" %4d addr\n", ostats.naddr);
439
440                 memset(&ostats, 0, sizeof(ostats));
441         }
442 }
443
444 static void
445 walkvardef(Node *n, Reg *r, int active)
446 {
447         Reg *r1, *r2;
448         int bn;
449         Var *v;
450         
451         for(r1=r; r1!=R; r1=(Reg*)r1->f.s1) {
452                 if(r1->f.active == active)
453                         break;
454                 r1->f.active = active;
455                 if(r1->f.prog->as == AVARKILL && r1->f.prog->to.node == n)
456                         break;
457                 for(v=n->opt; v!=nil; v=v->nextinnode) {
458                         bn = v - var;
459                         biset(&r1->act, bn);
460                 }
461                 if(r1->f.prog->as == ACALL)
462                         break;
463         }
464
465         for(r2=r; r2!=r1; r2=(Reg*)r2->f.s1)
466                 if(r2->f.s2 != nil)
467                         walkvardef(n, (Reg*)r2->f.s2, active);
468 }
469
470 /*
471  * add mov b,rn
472  * just after r
473  */
474 void
475 addmove(Reg *r, int bn, int rn, int f)
476 {
477         Prog *p, *p1;
478         Adr *a;
479         Var *v;
480
481         p1 = mal(sizeof(*p1));
482         clearp(p1);
483         p1->pc = 9999;
484
485         p = r->f.prog;
486         p1->link = p->link;
487         p->link = p1;
488         p1->lineno = p->lineno;
489
490         v = var + bn;
491
492         a = &p1->to;
493         a->offset = v->offset;
494         a->etype = v->etype;
495         a->type = v->name;
496         a->node = v->node;
497         a->sym = linksym(v->node->sym);
498
499         // need to clean this up with wptr and
500         // some of the defaults
501         p1->as = AMOVL;
502         switch(v->etype) {
503         default:
504                 fatal("unknown type %E", v->etype);
505         case TINT8:
506         case TUINT8:
507         case TBOOL:
508                 p1->as = AMOVB;
509                 break;
510         case TINT16:
511         case TUINT16:
512                 p1->as = AMOVW;
513                 break;
514         case TFLOAT32:
515                 p1->as = AMOVSS;
516                 break;
517         case TFLOAT64:
518                 p1->as = AMOVSD;
519                 break;
520         case TINT:
521         case TUINT:
522         case TINT32:
523         case TUINT32:
524         case TPTR32:
525                 break;
526         }
527
528         p1->from.type = rn;
529         if(!f) {
530                 p1->from = *a;
531                 *a = zprog.from;
532                 a->type = rn;
533                 if(v->etype == TUINT8)
534                         p1->as = AMOVB;
535                 if(v->etype == TUINT16)
536                         p1->as = AMOVW;
537         }
538         if(debug['R'] && debug['v'])
539                 print("%P ===add=== %P\n", p, p1);
540         ostats.nspill++;
541 }
542
543 uint32
544 doregbits(int r)
545 {
546         uint32 b;
547
548         b = 0;
549         if(r >= D_INDIR)
550                 r -= D_INDIR;
551         if(r >= D_AX && r <= D_DI)
552                 b |= RtoB(r);
553         else
554         if(r >= D_AL && r <= D_BL)
555                 b |= RtoB(r-D_AL+D_AX);
556         else
557         if(r >= D_AH && r <= D_BH)
558                 b |= RtoB(r-D_AH+D_AX);
559         else
560         if(r >= D_X0 && r <= D_X0+7)
561                 b |= FtoB(r);
562         return b;
563 }
564
565 static int
566 overlap(int32 o1, int w1, int32 o2, int w2)
567 {
568         int32 t1, t2;
569
570         t1 = o1+w1;
571         t2 = o2+w2;
572
573         if(!(t1 > o2 && t2 > o1))
574                 return 0;
575
576         return 1;
577 }
578
579 Bits
580 mkvar(Reg *r, Adr *a)
581 {
582         Var *v;
583         int i, t, n, et, z, w, flag, regu;
584         int32 o;
585         Bits bit;
586         Node *node;
587
588         /*
589          * mark registers used
590          */
591         t = a->type;
592         if(t == D_NONE)
593                 goto none;
594
595         if(r != R)
596                 r->use1.b[0] |= doregbits(a->index);
597
598         switch(t) {
599         default:
600                 regu = doregbits(t);
601                 if(regu == 0)
602                         goto none;
603                 bit = zbits;
604                 bit.b[0] = regu;
605                 return bit;
606
607         case D_ADDR:
608                 a->type = a->index;
609                 bit = mkvar(r, a);
610                 setaddrs(bit);
611                 a->type = t;
612                 ostats.naddr++;
613                 goto none;
614
615         case D_EXTERN:
616         case D_STATIC:
617         case D_PARAM:
618         case D_AUTO:
619                 n = t;
620                 break;
621         }
622
623         node = a->node;
624         if(node == N || node->op != ONAME || node->orig == N)
625                 goto none;
626         node = node->orig;
627         if(node->orig != node)
628                 fatal("%D: bad node", a);
629         if(node->sym == S || node->sym->name[0] == '.')
630                 goto none;
631         et = a->etype;
632         o = a->offset;
633         w = a->width;
634         if(w < 0)
635                 fatal("bad width %d for %D", w, a);
636
637         flag = 0;
638         for(i=0; i<nvar; i++) {
639                 v = var+i;
640                 if(v->node == node && v->name == n) {
641                         if(v->offset == o)
642                         if(v->etype == et)
643                         if(v->width == w)
644                                 return blsh(i);
645
646                         // if they overlap, disable both
647                         if(overlap(v->offset, v->width, o, w)) {
648                                 if(debug['R'])
649                                         print("disable %s\n", node->sym->name);
650                                 v->addr = 1;
651                                 flag = 1;
652                         }
653                 }
654         }
655
656         switch(et) {
657         case 0:
658         case TFUNC:
659                 goto none;
660         }
661
662         if(nvar >= NVAR) {
663                 if(debug['w'] > 1 && node != N)
664                         fatal("variable not optimized: %D", a);
665                 
666                 // If we're not tracking a word in a variable, mark the rest as
667                 // having its address taken, so that we keep the whole thing
668                 // live at all calls. otherwise we might optimize away part of
669                 // a variable but not all of it.
670                 for(i=0; i<nvar; i++) {
671                         v = var+i;
672                         if(v->node == node)
673                                 v->addr = 1;
674                 }
675                 goto none;
676         }
677
678         i = nvar;
679         nvar++;
680         v = var+i;
681         v->offset = o;
682         v->name = n;
683         v->etype = et;
684         v->width = w;
685         v->addr = flag;         // funny punning
686         v->node = node;
687         
688         // node->opt is the head of a linked list
689         // of Vars within the given Node, so that
690         // we can start at a Var and find all the other
691         // Vars in the same Go variable.
692         v->nextinnode = node->opt;
693         node->opt = v;
694
695         bit = blsh(i);
696         if(n == D_EXTERN || n == D_STATIC)
697                 for(z=0; z<BITS; z++)
698                         externs.b[z] |= bit.b[z];
699         if(n == D_PARAM)
700                 for(z=0; z<BITS; z++)
701                         params.b[z] |= bit.b[z];
702                 
703         if(node->class == PPARAM)
704                 for(z=0; z<BITS; z++)
705                         ivar.b[z] |= bit.b[z];
706         if(node->class == PPARAMOUT)
707                 for(z=0; z<BITS; z++)
708                         ovar.b[z] |= bit.b[z];
709
710         // Treat values with their address taken as live at calls,
711         // because the garbage collector's liveness analysis in ../gc/plive.c does.
712         // These must be consistent or else we will elide stores and the garbage
713         // collector will see uninitialized data.
714         // The typical case where our own analysis is out of sync is when the
715         // node appears to have its address taken but that code doesn't actually
716         // get generated and therefore doesn't show up as an address being
717         // taken when we analyze the instruction stream.
718         // One instance of this case is when a closure uses the same name as
719         // an outer variable for one of its own variables declared with :=.
720         // The parser flags the outer variable as possibly shared, and therefore
721         // sets addrtaken, even though it ends up not being actually shared.
722         // If we were better about _ elision, _ = &x would suffice too.
723         // The broader := in a closure problem is mentioned in a comment in
724         // closure.c:/^typecheckclosure and dcl.c:/^oldname.
725         if(node->addrtaken)
726                 v->addr = 1;
727
728         // Disable registerization for globals, because:
729         // (1) we might panic at any time and we want the recovery code
730         // to see the latest values (issue 1304).
731         // (2) we don't know what pointers might point at them and we want
732         // loads via those pointers to see updated values and vice versa (issue 7995).
733         //
734         // Disable registerization for results if using defer, because the deferred func
735         // might recover and return, causing the current values to be used.
736         if(node->class == PEXTERN || (hasdefer && node->class == PPARAMOUT))
737                 v->addr = 1;
738
739         if(debug['R'])
740                 print("bit=%2d et=%2E w=%d+%d %#N %D flag=%d\n", i, et, o, w, node, a, v->addr);
741         ostats.nvar++;
742
743         return bit;
744
745 none:
746         return zbits;
747 }
748
749 void
750 prop(Reg *r, Bits ref, Bits cal)
751 {
752         Reg *r1, *r2;
753         int z, i, j;
754         Var *v, *v1;
755
756         for(r1 = r; r1 != R; r1 = (Reg*)r1->f.p1) {
757                 for(z=0; z<BITS; z++) {
758                         ref.b[z] |= r1->refahead.b[z];
759                         if(ref.b[z] != r1->refahead.b[z]) {
760                                 r1->refahead.b[z] = ref.b[z];
761                                 change++;
762                         }
763                         cal.b[z] |= r1->calahead.b[z];
764                         if(cal.b[z] != r1->calahead.b[z]) {
765                                 r1->calahead.b[z] = cal.b[z];
766                                 change++;
767                         }
768                 }
769                 switch(r1->f.prog->as) {
770                 case ACALL:
771                         if(noreturn(r1->f.prog))
772                                 break;
773
774                         // Mark all input variables (ivar) as used, because that's what the
775                         // liveness bitmaps say. The liveness bitmaps say that so that a
776                         // panic will not show stale values in the parameter dump.
777                         // Mark variables with a recent VARDEF (r1->act) as used,
778                         // so that the optimizer flushes initializations to memory,
779                         // so that if a garbage collection happens during this CALL,
780                         // the collector will see initialized memory. Again this is to
781                         // match what the liveness bitmaps say.
782                         for(z=0; z<BITS; z++) {
783                                 cal.b[z] |= ref.b[z] | externs.b[z] | ivar.b[z] | r1->act.b[z];
784                                 ref.b[z] = 0;
785                         }
786                         
787                         // cal.b is the current approximation of what's live across the call.
788                         // Every bit in cal.b is a single stack word. For each such word,
789                         // find all the other tracked stack words in the same Go variable
790                         // (struct/slice/string/interface) and mark them live too.
791                         // This is necessary because the liveness analysis for the garbage
792                         // collector works at variable granularity, not at word granularity.
793                         // It is fundamental for slice/string/interface: the garbage collector
794                         // needs the whole value, not just some of the words, in order to
795                         // interpret the other bits correctly. Specifically, slice needs a consistent
796                         // ptr and cap, string needs a consistent ptr and len, and interface
797                         // needs a consistent type word and data word.
798                         for(z=0; z<BITS; z++) {
799                                 if(cal.b[z] == 0)
800                                         continue;
801                                 for(i=0; i<64; i++) {
802                                         if(z*64+i >= nvar || ((cal.b[z]>>i)&1) == 0)
803                                                 continue;
804                                         v = var+z*64+i;
805                                         if(v->node->opt == nil) // v represents fixed register, not Go variable
806                                                 continue;
807
808                                         // v->node->opt is the head of a linked list of Vars
809                                         // corresponding to tracked words from the Go variable v->node.
810                                         // Walk the list and set all the bits.
811                                         // For a large struct this could end up being quadratic:
812                                         // after the first setting, the outer loop (for z, i) would see a 1 bit
813                                         // for all of the remaining words in the struct, and for each such
814                                         // word would go through and turn on all the bits again.
815                                         // To avoid the quadratic behavior, we only turn on the bits if
816                                         // v is the head of the list or if the head's bit is not yet turned on.
817                                         // This will set the bits at most twice, keeping the overall loop linear.
818                                         v1 = v->node->opt;
819                                         j = v1 - var;
820                                         if(v == v1 || !btest(&cal, j)) {
821                                                 for(; v1 != nil; v1 = v1->nextinnode) {
822                                                         j = v1 - var;
823                                                         biset(&cal, j);
824                                                 }
825                                         }
826                                 }
827                         }
828                         break;
829
830                 case ATEXT:
831                         for(z=0; z<BITS; z++) {
832                                 cal.b[z] = 0;
833                                 ref.b[z] = 0;
834                         }
835                         break;
836
837                 case ARET:
838                         for(z=0; z<BITS; z++) {
839                                 cal.b[z] = externs.b[z] | ovar.b[z];
840                                 ref.b[z] = 0;
841                         }
842                         break;
843                 }
844                 for(z=0; z<BITS; z++) {
845                         ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
846                                 r1->use1.b[z] | r1->use2.b[z];
847                         cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
848                         r1->refbehind.b[z] = ref.b[z];
849                         r1->calbehind.b[z] = cal.b[z];
850                 }
851                 if(r1->f.active)
852                         break;
853                 r1->f.active = 1;
854         }
855         for(; r != r1; r = (Reg*)r->f.p1)
856                 for(r2 = (Reg*)r->f.p2; r2 != R; r2 = (Reg*)r2->f.p2link)
857                         prop(r2, r->refbehind, r->calbehind);
858 }
859
860 void
861 synch(Reg *r, Bits dif)
862 {
863         Reg *r1;
864         int z;
865
866         for(r1 = r; r1 != R; r1 = (Reg*)r1->f.s1) {
867                 for(z=0; z<BITS; z++) {
868                         dif.b[z] = (dif.b[z] &
869                                 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
870                                         r1->set.b[z] | r1->regdiff.b[z];
871                         if(dif.b[z] != r1->regdiff.b[z]) {
872                                 r1->regdiff.b[z] = dif.b[z];
873                                 change++;
874                         }
875                 }
876                 if(r1->f.active)
877                         break;
878                 r1->f.active = 1;
879                 for(z=0; z<BITS; z++)
880                         dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
881                 if((Reg*)r1->f.s2 != R)
882                         synch((Reg*)r1->f.s2, dif);
883         }
884 }
885
886 uint32
887 allreg(uint32 b, Rgn *r)
888 {
889         Var *v;
890         int i;
891
892         v = var + r->varno;
893         r->regno = 0;
894         switch(v->etype) {
895
896         default:
897                 fatal("unknown etype %d/%E", bitno(b), v->etype);
898                 break;
899
900         case TINT8:
901         case TUINT8:
902         case TINT16:
903         case TUINT16:
904         case TINT32:
905         case TUINT32:
906         case TINT64:
907         case TINT:
908         case TUINT:
909         case TUINTPTR:
910         case TBOOL:
911         case TPTR32:
912                 i = BtoR(~b);
913                 if(i && r->cost > 0) {
914                         r->regno = i;
915                         return RtoB(i);
916                 }
917                 break;
918
919         case TFLOAT32:
920         case TFLOAT64:
921                 if(!use_sse)
922                         break;
923                 i = BtoF(~b);
924                 if(i && r->cost > 0) {
925                         r->regno = i;
926                         return FtoB(i);
927                 }
928                 break;
929         }
930         return 0;
931 }
932
933 void
934 paint1(Reg *r, int bn)
935 {
936         Reg *r1;
937         Prog *p;
938         int z;
939         uint64 bb, rbz;
940
941         z = bn/64;
942         bb = 1LL<<(bn%64);
943         if(r->act.b[z] & bb)
944                 return;
945         for(;;) {
946                 if(!(r->refbehind.b[z] & bb))
947                         break;
948                 r1 = (Reg*)r->f.p1;
949                 if(r1 == R)
950                         break;
951                 if(!(r1->refahead.b[z] & bb))
952                         break;
953                 if(r1->act.b[z] & bb)
954                         break;
955                 r = r1;
956         }
957
958         rbz = ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z]));
959         if(LOAD(r) & rbz & bb) {
960                 change -= CLOAD * r->f.loop;
961         }
962         for(;;) {
963                 r->act.b[z] |= bb;
964                 p = r->f.prog;
965
966                 if(r->f.prog->as != ANOP) { // don't give credit for NOPs
967                         if(r->use1.b[z] & bb) {
968                                 change += CREF * r->f.loop;
969                                 if(p->as == AFMOVL || p->as == AFMOVW)
970                                         if(BtoR(bb) != D_F0)
971                                                 change = -CINF;
972                         }
973                         if((r->use2.b[z]|r->set.b[z]) & bb) {
974                                 change += CREF * r->f.loop;
975                                 if(p->as == AFMOVL || p->as == AFMOVW)
976                                         if(BtoR(bb) != D_F0)
977                                                 change = -CINF;
978                         }
979                 }
980
981                 if(STORE(r) & r->regdiff.b[z] & bb) {
982                         change -= CLOAD * r->f.loop;
983                         if(p->as == AFMOVL || p->as == AFMOVW)
984                                 if(BtoR(bb) != D_F0)
985                                         change = -CINF;
986                 }
987
988                 if(r->refbehind.b[z] & bb)
989                         for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
990                                 if(r1->refahead.b[z] & bb)
991                                         paint1(r1, bn);
992
993                 if(!(r->refahead.b[z] & bb))
994                         break;
995                 r1 = (Reg*)r->f.s2;
996                 if(r1 != R)
997                         if(r1->refbehind.b[z] & bb)
998                                 paint1(r1, bn);
999                 r = (Reg*)r->f.s1;
1000                 if(r == R)
1001                         break;
1002                 if(r->act.b[z] & bb)
1003                         break;
1004                 if(!(r->refbehind.b[z] & bb))
1005                         break;
1006         }
1007 }
1008
1009 uint32
1010 paint2(Reg *r, int bn, int depth)
1011 {
1012         Reg *r1;
1013         int z;
1014         uint64 bb, vreg;
1015
1016         z = bn/64;
1017         bb = 1LL << (bn%64);
1018         vreg = regbits;
1019         if(!(r->act.b[z] & bb))
1020                 return vreg;
1021         for(;;) {
1022                 if(!(r->refbehind.b[z] & bb))
1023                         break;
1024                 r1 = (Reg*)r->f.p1;
1025                 if(r1 == R)
1026                         break;
1027                 if(!(r1->refahead.b[z] & bb))
1028                         break;
1029                 if(!(r1->act.b[z] & bb))
1030                         break;
1031                 r = r1;
1032         }
1033         for(;;) {
1034                 if(debug['R'] && debug['v'])
1035                         print("  paint2 %d %P\n", depth, r->f.prog);
1036
1037                 r->act.b[z] &= ~bb;
1038
1039                 vreg |= r->regu;
1040
1041                 if(r->refbehind.b[z] & bb)
1042                         for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
1043                                 if(r1->refahead.b[z] & bb)
1044                                         vreg |= paint2(r1, bn, depth+1);
1045
1046                 if(!(r->refahead.b[z] & bb))
1047                         break;
1048                 r1 = (Reg*)r->f.s2;
1049                 if(r1 != R)
1050                         if(r1->refbehind.b[z] & bb)
1051                                 vreg |= paint2(r1, bn, depth+1);
1052                 r = (Reg*)r->f.s1;
1053                 if(r == R)
1054                         break;
1055                 if(!(r->act.b[z] & bb))
1056                         break;
1057                 if(!(r->refbehind.b[z] & bb))
1058                         break;
1059         }
1060
1061         return vreg;
1062 }
1063
1064 void
1065 paint3(Reg *r, int bn, uint32 rb, int rn)
1066 {
1067         Reg *r1;
1068         Prog *p;
1069         int z;
1070         uint64 bb, rbz;
1071
1072         z = bn/64;
1073         bb = 1LL << (bn%64);
1074         if(r->act.b[z] & bb)
1075                 return;
1076         for(;;) {
1077                 if(!(r->refbehind.b[z] & bb))
1078                         break;
1079                 r1 = (Reg*)r->f.p1;
1080                 if(r1 == R)
1081                         break;
1082                 if(!(r1->refahead.b[z] & bb))
1083                         break;
1084                 if(r1->act.b[z] & bb)
1085                         break;
1086                 r = r1;
1087         }
1088
1089         rbz = ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z]));
1090         if(LOAD(r) & rbz & bb)
1091                 addmove(r, bn, rn, 0);
1092         for(;;) {
1093                 r->act.b[z] |= bb;
1094                 p = r->f.prog;
1095
1096                 if(r->use1.b[z] & bb) {
1097                         if(debug['R'] && debug['v'])
1098                                 print("%P", p);
1099                         addreg(&p->from, rn);
1100                         if(debug['R'] && debug['v'])
1101                                 print(" ===change== %P\n", p);
1102                 }
1103                 if((r->use2.b[z]|r->set.b[z]) & bb) {
1104                         if(debug['R'] && debug['v'])
1105                                 print("%P", p);
1106                         addreg(&p->to, rn);
1107                         if(debug['R'] && debug['v'])
1108                                 print(" ===change== %P\n", p);
1109                 }
1110
1111                 if(STORE(r) & r->regdiff.b[z] & bb)
1112                         addmove(r, bn, rn, 1);
1113                 r->regu |= rb;
1114
1115                 if(r->refbehind.b[z] & bb)
1116                         for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
1117                                 if(r1->refahead.b[z] & bb)
1118                                         paint3(r1, bn, rb, rn);
1119
1120                 if(!(r->refahead.b[z] & bb))
1121                         break;
1122                 r1 = (Reg*)r->f.s2;
1123                 if(r1 != R)
1124                         if(r1->refbehind.b[z] & bb)
1125                                 paint3(r1, bn, rb, rn);
1126                 r = (Reg*)r->f.s1;
1127                 if(r == R)
1128                         break;
1129                 if(r->act.b[z] & bb)
1130                         break;
1131                 if(!(r->refbehind.b[z] & bb))
1132                         break;
1133         }
1134 }
1135
1136 void
1137 addreg(Adr *a, int rn)
1138 {
1139         a->sym = nil;
1140         a->node = nil;
1141         a->offset = 0;
1142         a->type = rn;
1143
1144         ostats.ncvtreg++;
1145 }
1146
1147 uint32
1148 RtoB(int r)
1149 {
1150
1151         if(r < D_AX || r > D_DI)
1152                 return 0;
1153         return 1L << (r-D_AX);
1154 }
1155
1156 int
1157 BtoR(uint32 b)
1158 {
1159
1160         b &= 0xffL;
1161         if(b == 0)
1162                 return 0;
1163         return bitno(b) + D_AX;
1164 }
1165
1166 uint32
1167 FtoB(int f)
1168 {
1169         if(f < D_X0 || f > D_X7)
1170                 return 0;
1171         return 1L << (f - D_X0 + 8);
1172 }
1173
1174 int
1175 BtoF(uint32 b)
1176 {
1177         b &= 0xFF00L;
1178         if(b == 0)
1179                 return 0;
1180         return bitno(b) - 8 + D_X0;
1181 }
1182
1183 void
1184 dumpone(Flow *f, int isreg)
1185 {
1186         int z;
1187         Bits bit;
1188         Reg *r;
1189
1190         print("%d:%P", f->loop, f->prog);
1191         if(isreg) {
1192                 r = (Reg*)f;
1193                 for(z=0; z<BITS; z++)
1194                         bit.b[z] =
1195                                 r->set.b[z] |
1196                                 r->use1.b[z] |
1197                                 r->use2.b[z] |
1198                                 r->refbehind.b[z] |
1199                                 r->refahead.b[z] |
1200                                 r->calbehind.b[z] |
1201                                 r->calahead.b[z] |
1202                                 r->regdiff.b[z] |
1203                                 r->act.b[z] |
1204                                         0;
1205                 if(bany(&bit)) {
1206                         print("\t");
1207                         if(bany(&r->set))
1208                                 print(" s:%Q", r->set);
1209                         if(bany(&r->use1))
1210                                 print(" u1:%Q", r->use1);
1211                         if(bany(&r->use2))
1212                                 print(" u2:%Q", r->use2);
1213                         if(bany(&r->refbehind))
1214                                 print(" rb:%Q ", r->refbehind);
1215                         if(bany(&r->refahead))
1216                                 print(" ra:%Q ", r->refahead);
1217                         if(bany(&r->calbehind))
1218                                 print(" cb:%Q ", r->calbehind);
1219                         if(bany(&r->calahead))
1220                                 print(" ca:%Q ", r->calahead);
1221                         if(bany(&r->regdiff))
1222                                 print(" d:%Q ", r->regdiff);
1223                         if(bany(&r->act))
1224                                 print(" a:%Q ", r->act);
1225                 }
1226         }
1227         print("\n");
1228 }
1229
1230 void
1231 dumpit(char *str, Flow *r0, int isreg)
1232 {
1233         Flow *r, *r1;
1234
1235         print("\n%s\n", str);
1236         for(r = r0; r != nil; r = r->link) {
1237                 dumpone(r, isreg);
1238                 r1 = r->p2;
1239                 if(r1 != nil) {
1240                         print(" pred:");
1241                         for(; r1 != nil; r1 = r1->p2link)
1242                                 print(" %.4ud", (int)r1->prog->pc);
1243                         print("\n");
1244                 }
1245                 // Print successors if it's not just the next one
1246                 if(r->s1 != r->link || r->s2 != nil) {
1247                         print(" succ:");
1248                         if(r->s1 != nil)
1249                                 print(" %.4ud", (int)r->s1->prog->pc);
1250                         if(r->s2 != nil)
1251                                 print(" %.4ud", (int)r->s2->prog->pc);
1252                         print("\n");
1253                 }
1254         }
1255 }