]> Cypherpunks.ru repositories - gostls13.git/blob - src/cmd/compile/internal/x86/gsubr.go
569ffd11bd87ff0bc7355e39b5f2190829562e9e
[gostls13.git] / src / cmd / compile / internal / x86 / gsubr.go
1 // Derived from Inferno utils/8c/txt.c
2 // http://code.google.com/p/inferno-os/source/browse/utils/8c/txt.c
3 //
4 //      Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
5 //      Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
6 //      Portions Copyright © 1997-1999 Vita Nuova Limited
7 //      Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
8 //      Portions Copyright © 2004,2006 Bruce Ellis
9 //      Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
10 //      Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
11 //      Portions Copyright © 2009 The Go Authors.  All rights reserved.
12 //
13 // Permission is hereby granted, free of charge, to any person obtaining a copy
14 // of this software and associated documentation files (the "Software"), to deal
15 // in the Software without restriction, including without limitation the rights
16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17 // copies of the Software, and to permit persons to whom the Software is
18 // furnished to do so, subject to the following conditions:
19 //
20 // The above copyright notice and this permission notice shall be included in
21 // all copies or substantial portions of the Software.
22 //
23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
29 // THE SOFTWARE.
30
31 package x86
32
33 import (
34         "cmd/compile/internal/big"
35         "cmd/compile/internal/gc"
36         "cmd/internal/obj"
37         "cmd/internal/obj/x86"
38         "fmt"
39 )
40
41 // TODO(rsc): Can make this bigger if we move
42 // the text segment up higher in 8l for all GOOS.
43 // At the same time, can raise StackBig in ../../runtime/stack.h.
44 var unmappedzero uint32 = 4096
45
46 // foptoas flags
47 const (
48         Frev  = 1 << 0
49         Fpop  = 1 << 1
50         Fpop2 = 1 << 2
51 )
52
53 /*
54  * return Axxx for Oxxx on type t.
55  */
56 func optoas(op gc.Op, t *gc.Type) obj.As {
57         if t == nil {
58                 gc.Fatalf("optoas: t is nil")
59         }
60
61         // avoid constant conversions in switches below
62         const (
63                 OMINUS_  = uint32(gc.OMINUS) << 16
64                 OLSH_    = uint32(gc.OLSH) << 16
65                 ORSH_    = uint32(gc.ORSH) << 16
66                 OADD_    = uint32(gc.OADD) << 16
67                 OSUB_    = uint32(gc.OSUB) << 16
68                 OMUL_    = uint32(gc.OMUL) << 16
69                 ODIV_    = uint32(gc.ODIV) << 16
70                 OMOD_    = uint32(gc.OMOD) << 16
71                 OOR_     = uint32(gc.OOR) << 16
72                 OAND_    = uint32(gc.OAND) << 16
73                 OXOR_    = uint32(gc.OXOR) << 16
74                 OEQ_     = uint32(gc.OEQ) << 16
75                 ONE_     = uint32(gc.ONE) << 16
76                 OLT_     = uint32(gc.OLT) << 16
77                 OLE_     = uint32(gc.OLE) << 16
78                 OGE_     = uint32(gc.OGE) << 16
79                 OGT_     = uint32(gc.OGT) << 16
80                 OCMP_    = uint32(gc.OCMP) << 16
81                 OAS_     = uint32(gc.OAS) << 16
82                 OHMUL_   = uint32(gc.OHMUL) << 16
83                 OADDR_   = uint32(gc.OADDR) << 16
84                 OINC_    = uint32(gc.OINC) << 16
85                 ODEC_    = uint32(gc.ODEC) << 16
86                 OLROT_   = uint32(gc.OLROT) << 16
87                 OEXTEND_ = uint32(gc.OEXTEND) << 16
88                 OCOM_    = uint32(gc.OCOM) << 16
89         )
90
91         a := obj.AXXX
92         switch uint32(op)<<16 | uint32(gc.Simtype[t.Etype]) {
93         default:
94                 gc.Fatalf("optoas: no entry %v-%v", op, t)
95
96         case OADDR_ | gc.TPTR32:
97                 a = x86.ALEAL
98
99         case OEQ_ | gc.TBOOL,
100                 OEQ_ | gc.TINT8,
101                 OEQ_ | gc.TUINT8,
102                 OEQ_ | gc.TINT16,
103                 OEQ_ | gc.TUINT16,
104                 OEQ_ | gc.TINT32,
105                 OEQ_ | gc.TUINT32,
106                 OEQ_ | gc.TINT64,
107                 OEQ_ | gc.TUINT64,
108                 OEQ_ | gc.TPTR32,
109                 OEQ_ | gc.TPTR64,
110                 OEQ_ | gc.TFLOAT32,
111                 OEQ_ | gc.TFLOAT64:
112                 a = x86.AJEQ
113
114         case ONE_ | gc.TBOOL,
115                 ONE_ | gc.TINT8,
116                 ONE_ | gc.TUINT8,
117                 ONE_ | gc.TINT16,
118                 ONE_ | gc.TUINT16,
119                 ONE_ | gc.TINT32,
120                 ONE_ | gc.TUINT32,
121                 ONE_ | gc.TINT64,
122                 ONE_ | gc.TUINT64,
123                 ONE_ | gc.TPTR32,
124                 ONE_ | gc.TPTR64,
125                 ONE_ | gc.TFLOAT32,
126                 ONE_ | gc.TFLOAT64:
127                 a = x86.AJNE
128
129         case OLT_ | gc.TINT8,
130                 OLT_ | gc.TINT16,
131                 OLT_ | gc.TINT32,
132                 OLT_ | gc.TINT64:
133                 a = x86.AJLT
134
135         case OLT_ | gc.TUINT8,
136                 OLT_ | gc.TUINT16,
137                 OLT_ | gc.TUINT32,
138                 OLT_ | gc.TUINT64:
139                 a = x86.AJCS
140
141         case OLE_ | gc.TINT8,
142                 OLE_ | gc.TINT16,
143                 OLE_ | gc.TINT32,
144                 OLE_ | gc.TINT64:
145                 a = x86.AJLE
146
147         case OLE_ | gc.TUINT8,
148                 OLE_ | gc.TUINT16,
149                 OLE_ | gc.TUINT32,
150                 OLE_ | gc.TUINT64:
151                 a = x86.AJLS
152
153         case OGT_ | gc.TINT8,
154                 OGT_ | gc.TINT16,
155                 OGT_ | gc.TINT32,
156                 OGT_ | gc.TINT64:
157                 a = x86.AJGT
158
159         case OGT_ | gc.TUINT8,
160                 OGT_ | gc.TUINT16,
161                 OGT_ | gc.TUINT32,
162                 OGT_ | gc.TUINT64,
163                 OLT_ | gc.TFLOAT32,
164                 OLT_ | gc.TFLOAT64:
165                 a = x86.AJHI
166
167         case OGE_ | gc.TINT8,
168                 OGE_ | gc.TINT16,
169                 OGE_ | gc.TINT32,
170                 OGE_ | gc.TINT64:
171                 a = x86.AJGE
172
173         case OGE_ | gc.TUINT8,
174                 OGE_ | gc.TUINT16,
175                 OGE_ | gc.TUINT32,
176                 OGE_ | gc.TUINT64,
177                 OLE_ | gc.TFLOAT32,
178                 OLE_ | gc.TFLOAT64:
179                 a = x86.AJCC
180
181         case OCMP_ | gc.TBOOL,
182                 OCMP_ | gc.TINT8,
183                 OCMP_ | gc.TUINT8:
184                 a = x86.ACMPB
185
186         case OCMP_ | gc.TINT16,
187                 OCMP_ | gc.TUINT16:
188                 a = x86.ACMPW
189
190         case OCMP_ | gc.TINT32,
191                 OCMP_ | gc.TUINT32,
192                 OCMP_ | gc.TPTR32:
193                 a = x86.ACMPL
194
195         case OAS_ | gc.TBOOL,
196                 OAS_ | gc.TINT8,
197                 OAS_ | gc.TUINT8:
198                 a = x86.AMOVB
199
200         case OAS_ | gc.TINT16,
201                 OAS_ | gc.TUINT16:
202                 a = x86.AMOVW
203
204         case OAS_ | gc.TINT32,
205                 OAS_ | gc.TUINT32,
206                 OAS_ | gc.TPTR32:
207                 a = x86.AMOVL
208
209         case OAS_ | gc.TFLOAT32:
210                 a = x86.AMOVSS
211
212         case OAS_ | gc.TFLOAT64:
213                 a = x86.AMOVSD
214
215         case OADD_ | gc.TINT8,
216                 OADD_ | gc.TUINT8:
217                 a = x86.AADDB
218
219         case OADD_ | gc.TINT16,
220                 OADD_ | gc.TUINT16:
221                 a = x86.AADDW
222
223         case OADD_ | gc.TINT32,
224                 OADD_ | gc.TUINT32,
225                 OADD_ | gc.TPTR32:
226                 a = x86.AADDL
227
228         case OSUB_ | gc.TINT8,
229                 OSUB_ | gc.TUINT8:
230                 a = x86.ASUBB
231
232         case OSUB_ | gc.TINT16,
233                 OSUB_ | gc.TUINT16:
234                 a = x86.ASUBW
235
236         case OSUB_ | gc.TINT32,
237                 OSUB_ | gc.TUINT32,
238                 OSUB_ | gc.TPTR32:
239                 a = x86.ASUBL
240
241         case OINC_ | gc.TINT8,
242                 OINC_ | gc.TUINT8:
243                 a = x86.AINCB
244
245         case OINC_ | gc.TINT16,
246                 OINC_ | gc.TUINT16:
247                 a = x86.AINCW
248
249         case OINC_ | gc.TINT32,
250                 OINC_ | gc.TUINT32,
251                 OINC_ | gc.TPTR32:
252                 a = x86.AINCL
253
254         case ODEC_ | gc.TINT8,
255                 ODEC_ | gc.TUINT8:
256                 a = x86.ADECB
257
258         case ODEC_ | gc.TINT16,
259                 ODEC_ | gc.TUINT16:
260                 a = x86.ADECW
261
262         case ODEC_ | gc.TINT32,
263                 ODEC_ | gc.TUINT32,
264                 ODEC_ | gc.TPTR32:
265                 a = x86.ADECL
266
267         case OCOM_ | gc.TINT8,
268                 OCOM_ | gc.TUINT8:
269                 a = x86.ANOTB
270
271         case OCOM_ | gc.TINT16,
272                 OCOM_ | gc.TUINT16:
273                 a = x86.ANOTW
274
275         case OCOM_ | gc.TINT32,
276                 OCOM_ | gc.TUINT32,
277                 OCOM_ | gc.TPTR32:
278                 a = x86.ANOTL
279
280         case OMINUS_ | gc.TINT8,
281                 OMINUS_ | gc.TUINT8:
282                 a = x86.ANEGB
283
284         case OMINUS_ | gc.TINT16,
285                 OMINUS_ | gc.TUINT16:
286                 a = x86.ANEGW
287
288         case OMINUS_ | gc.TINT32,
289                 OMINUS_ | gc.TUINT32,
290                 OMINUS_ | gc.TPTR32:
291                 a = x86.ANEGL
292
293         case OAND_ | gc.TINT8,
294                 OAND_ | gc.TUINT8:
295                 a = x86.AANDB
296
297         case OAND_ | gc.TINT16,
298                 OAND_ | gc.TUINT16:
299                 a = x86.AANDW
300
301         case OAND_ | gc.TINT32,
302                 OAND_ | gc.TUINT32,
303                 OAND_ | gc.TPTR32:
304                 a = x86.AANDL
305
306         case OOR_ | gc.TINT8,
307                 OOR_ | gc.TUINT8:
308                 a = x86.AORB
309
310         case OOR_ | gc.TINT16,
311                 OOR_ | gc.TUINT16:
312                 a = x86.AORW
313
314         case OOR_ | gc.TINT32,
315                 OOR_ | gc.TUINT32,
316                 OOR_ | gc.TPTR32:
317                 a = x86.AORL
318
319         case OXOR_ | gc.TINT8,
320                 OXOR_ | gc.TUINT8:
321                 a = x86.AXORB
322
323         case OXOR_ | gc.TINT16,
324                 OXOR_ | gc.TUINT16:
325                 a = x86.AXORW
326
327         case OXOR_ | gc.TINT32,
328                 OXOR_ | gc.TUINT32,
329                 OXOR_ | gc.TPTR32:
330                 a = x86.AXORL
331
332         case OLROT_ | gc.TINT8,
333                 OLROT_ | gc.TUINT8:
334                 a = x86.AROLB
335
336         case OLROT_ | gc.TINT16,
337                 OLROT_ | gc.TUINT16:
338                 a = x86.AROLW
339
340         case OLROT_ | gc.TINT32,
341                 OLROT_ | gc.TUINT32,
342                 OLROT_ | gc.TPTR32:
343                 a = x86.AROLL
344
345         case OLSH_ | gc.TINT8,
346                 OLSH_ | gc.TUINT8:
347                 a = x86.ASHLB
348
349         case OLSH_ | gc.TINT16,
350                 OLSH_ | gc.TUINT16:
351                 a = x86.ASHLW
352
353         case OLSH_ | gc.TINT32,
354                 OLSH_ | gc.TUINT32,
355                 OLSH_ | gc.TPTR32:
356                 a = x86.ASHLL
357
358         case ORSH_ | gc.TUINT8:
359                 a = x86.ASHRB
360
361         case ORSH_ | gc.TUINT16:
362                 a = x86.ASHRW
363
364         case ORSH_ | gc.TUINT32,
365                 ORSH_ | gc.TPTR32:
366                 a = x86.ASHRL
367
368         case ORSH_ | gc.TINT8:
369                 a = x86.ASARB
370
371         case ORSH_ | gc.TINT16:
372                 a = x86.ASARW
373
374         case ORSH_ | gc.TINT32:
375                 a = x86.ASARL
376
377         case OHMUL_ | gc.TINT8,
378                 OMUL_ | gc.TINT8,
379                 OMUL_ | gc.TUINT8:
380                 a = x86.AIMULB
381
382         case OHMUL_ | gc.TINT16,
383                 OMUL_ | gc.TINT16,
384                 OMUL_ | gc.TUINT16:
385                 a = x86.AIMULW
386
387         case OHMUL_ | gc.TINT32,
388                 OMUL_ | gc.TINT32,
389                 OMUL_ | gc.TUINT32,
390                 OMUL_ | gc.TPTR32:
391                 a = x86.AIMULL
392
393         case OHMUL_ | gc.TUINT8:
394                 a = x86.AMULB
395
396         case OHMUL_ | gc.TUINT16:
397                 a = x86.AMULW
398
399         case OHMUL_ | gc.TUINT32,
400                 OHMUL_ | gc.TPTR32:
401                 a = x86.AMULL
402
403         case ODIV_ | gc.TINT8,
404                 OMOD_ | gc.TINT8:
405                 a = x86.AIDIVB
406
407         case ODIV_ | gc.TUINT8,
408                 OMOD_ | gc.TUINT8:
409                 a = x86.ADIVB
410
411         case ODIV_ | gc.TINT16,
412                 OMOD_ | gc.TINT16:
413                 a = x86.AIDIVW
414
415         case ODIV_ | gc.TUINT16,
416                 OMOD_ | gc.TUINT16:
417                 a = x86.ADIVW
418
419         case ODIV_ | gc.TINT32,
420                 OMOD_ | gc.TINT32:
421                 a = x86.AIDIVL
422
423         case ODIV_ | gc.TUINT32,
424                 ODIV_ | gc.TPTR32,
425                 OMOD_ | gc.TUINT32,
426                 OMOD_ | gc.TPTR32:
427                 a = x86.ADIVL
428
429         case OEXTEND_ | gc.TINT16:
430                 a = x86.ACWD
431
432         case OEXTEND_ | gc.TINT32:
433                 a = x86.ACDQ
434         }
435
436         return a
437 }
438
439 func foptoas(op gc.Op, t *gc.Type, flg int) obj.As {
440         a := obj.AXXX
441         et := gc.Simtype[t.Etype]
442
443         // avoid constant conversions in switches below
444         const (
445                 OCMP_   = uint32(gc.OCMP) << 16
446                 OAS_    = uint32(gc.OAS) << 16
447                 OADD_   = uint32(gc.OADD) << 16
448                 OSUB_   = uint32(gc.OSUB) << 16
449                 OMUL_   = uint32(gc.OMUL) << 16
450                 ODIV_   = uint32(gc.ODIV) << 16
451                 OMINUS_ = uint32(gc.OMINUS) << 16
452         )
453
454         if !gc.Thearch.Use387 {
455                 switch uint32(op)<<16 | uint32(et) {
456                 default:
457                         gc.Fatalf("foptoas-sse: no entry %v-%v", op, t)
458
459                 case OCMP_ | gc.TFLOAT32:
460                         a = x86.AUCOMISS
461
462                 case OCMP_ | gc.TFLOAT64:
463                         a = x86.AUCOMISD
464
465                 case OAS_ | gc.TFLOAT32:
466                         a = x86.AMOVSS
467
468                 case OAS_ | gc.TFLOAT64:
469                         a = x86.AMOVSD
470
471                 case OADD_ | gc.TFLOAT32:
472                         a = x86.AADDSS
473
474                 case OADD_ | gc.TFLOAT64:
475                         a = x86.AADDSD
476
477                 case OSUB_ | gc.TFLOAT32:
478                         a = x86.ASUBSS
479
480                 case OSUB_ | gc.TFLOAT64:
481                         a = x86.ASUBSD
482
483                 case OMUL_ | gc.TFLOAT32:
484                         a = x86.AMULSS
485
486                 case OMUL_ | gc.TFLOAT64:
487                         a = x86.AMULSD
488
489                 case ODIV_ | gc.TFLOAT32:
490                         a = x86.ADIVSS
491
492                 case ODIV_ | gc.TFLOAT64:
493                         a = x86.ADIVSD
494                 }
495
496                 return a
497         }
498
499         // If we need Fpop, it means we're working on
500         // two different floating-point registers, not memory.
501         // There the instruction only has a float64 form.
502         if flg&Fpop != 0 {
503                 et = gc.TFLOAT64
504         }
505
506         // clear Frev if unneeded
507         switch op {
508         case gc.OADD,
509                 gc.OMUL:
510                 flg &^= Frev
511         }
512
513         switch uint32(op)<<16 | (uint32(et)<<8 | uint32(flg)) {
514         case OADD_ | (gc.TFLOAT32<<8 | 0):
515                 return x86.AFADDF
516
517         case OADD_ | (gc.TFLOAT64<<8 | 0):
518                 return x86.AFADDD
519
520         case OADD_ | (gc.TFLOAT64<<8 | Fpop):
521                 return x86.AFADDDP
522
523         case OSUB_ | (gc.TFLOAT32<<8 | 0):
524                 return x86.AFSUBF
525
526         case OSUB_ | (gc.TFLOAT32<<8 | Frev):
527                 return x86.AFSUBRF
528
529         case OSUB_ | (gc.TFLOAT64<<8 | 0):
530                 return x86.AFSUBD
531
532         case OSUB_ | (gc.TFLOAT64<<8 | Frev):
533                 return x86.AFSUBRD
534
535         case OSUB_ | (gc.TFLOAT64<<8 | Fpop):
536                 return x86.AFSUBDP
537
538         case OSUB_ | (gc.TFLOAT64<<8 | (Fpop | Frev)):
539                 return x86.AFSUBRDP
540
541         case OMUL_ | (gc.TFLOAT32<<8 | 0):
542                 return x86.AFMULF
543
544         case OMUL_ | (gc.TFLOAT64<<8 | 0):
545                 return x86.AFMULD
546
547         case OMUL_ | (gc.TFLOAT64<<8 | Fpop):
548                 return x86.AFMULDP
549
550         case ODIV_ | (gc.TFLOAT32<<8 | 0):
551                 return x86.AFDIVF
552
553         case ODIV_ | (gc.TFLOAT32<<8 | Frev):
554                 return x86.AFDIVRF
555
556         case ODIV_ | (gc.TFLOAT64<<8 | 0):
557                 return x86.AFDIVD
558
559         case ODIV_ | (gc.TFLOAT64<<8 | Frev):
560                 return x86.AFDIVRD
561
562         case ODIV_ | (gc.TFLOAT64<<8 | Fpop):
563                 return x86.AFDIVDP
564
565         case ODIV_ | (gc.TFLOAT64<<8 | (Fpop | Frev)):
566                 return x86.AFDIVRDP
567
568         case OCMP_ | (gc.TFLOAT32<<8 | 0):
569                 return x86.AFCOMF
570
571         case OCMP_ | (gc.TFLOAT32<<8 | Fpop):
572                 return x86.AFCOMFP
573
574         case OCMP_ | (gc.TFLOAT64<<8 | 0):
575                 return x86.AFCOMD
576
577         case OCMP_ | (gc.TFLOAT64<<8 | Fpop):
578                 return x86.AFCOMDP
579
580         case OCMP_ | (gc.TFLOAT64<<8 | Fpop2):
581                 return x86.AFCOMDPP
582
583         case OMINUS_ | (gc.TFLOAT32<<8 | 0):
584                 return x86.AFCHS
585
586         case OMINUS_ | (gc.TFLOAT64<<8 | 0):
587                 return x86.AFCHS
588         }
589
590         gc.Fatalf("foptoas %v %v %#x", op, t, flg)
591         return 0
592 }
593
594 var resvd = []int{
595         //      REG_DI, // for movstring
596         //      REG_SI, // for movstring
597
598         x86.REG_AX, // for divide
599         x86.REG_CX, // for shift
600         x86.REG_DX, // for divide, context
601         x86.REG_SP, // for stack
602 }
603
604 /*
605  * generate
606  *      as $c, reg
607  */
608 func gconreg(as obj.As, c int64, reg int) {
609         var n1 gc.Node
610         var n2 gc.Node
611
612         gc.Nodconst(&n1, gc.Types[gc.TINT64], c)
613         gc.Nodreg(&n2, gc.Types[gc.TINT64], reg)
614         gins(as, &n1, &n2)
615 }
616
617 /*
618  * generate
619  *      as $c, n
620  */
621 func ginscon(as obj.As, c int64, n2 *gc.Node) {
622         var n1 gc.Node
623         gc.Nodconst(&n1, gc.Types[gc.TINT32], c)
624         gins(as, &n1, n2)
625 }
626
627 func ginscmp(op gc.Op, t *gc.Type, n1, n2 *gc.Node, likely int) *obj.Prog {
628         if t.IsInteger() || t.Etype == gc.Tptr {
629                 if (n1.Op == gc.OLITERAL || n1.Op == gc.OADDR && n1.Left.Op == gc.ONAME) && n2.Op != gc.OLITERAL {
630                         // Reverse comparison to place constant (including address constant) last.
631                         op = gc.Brrev(op)
632                         n1, n2 = n2, n1
633                 }
634         }
635
636         // General case.
637         var r1, r2, g1, g2 gc.Node
638
639         // A special case to make write barriers more efficient.
640         // Comparing the first field of a named struct can be done directly.
641         base := n1
642         if n1.Op == gc.ODOT && n1.Left.Type.IsStruct() && n1.Left.Type.Field(0).Sym == n1.Sym {
643                 base = n1.Left
644         }
645
646         if base.Op == gc.ONAME && base.Class&gc.PHEAP == 0 || n1.Op == gc.OINDREG {
647                 r1 = *n1
648         } else {
649                 gc.Regalloc(&r1, t, n1)
650                 gc.Regalloc(&g1, n1.Type, &r1)
651                 gc.Cgen(n1, &g1)
652                 gmove(&g1, &r1)
653         }
654         if n2.Op == gc.OLITERAL && t.IsInteger() || n2.Op == gc.OADDR && n2.Left.Op == gc.ONAME && n2.Left.Class == gc.PEXTERN {
655                 r2 = *n2
656         } else {
657                 gc.Regalloc(&r2, t, n2)
658                 gc.Regalloc(&g2, n1.Type, &r2)
659                 gc.Cgen(n2, &g2)
660                 gmove(&g2, &r2)
661         }
662         gins(optoas(gc.OCMP, t), &r1, &r2)
663         if r1.Op == gc.OREGISTER {
664                 gc.Regfree(&g1)
665                 gc.Regfree(&r1)
666         }
667         if r2.Op == gc.OREGISTER {
668                 gc.Regfree(&g2)
669                 gc.Regfree(&r2)
670         }
671         return gc.Gbranch(optoas(op, t), nil, likely)
672 }
673
674 /*
675  * swap node contents
676  */
677 func nswap(a *gc.Node, b *gc.Node) {
678         t := *a
679         *a = *b
680         *b = t
681 }
682
683 /*
684  * return constant i node.
685  * overwritten by next call, but useful in calls to gins.
686  */
687
688 var ncon_n gc.Node
689
690 func ncon(i uint32) *gc.Node {
691         if ncon_n.Type == nil {
692                 gc.Nodconst(&ncon_n, gc.Types[gc.TUINT32], 0)
693         }
694         ncon_n.SetInt(int64(i))
695         return &ncon_n
696 }
697
698 var sclean [10]gc.Node
699
700 var nsclean int
701
702 /*
703  * n is a 64-bit value.  fill in lo and hi to refer to its 32-bit halves.
704  */
705 func split64(n *gc.Node, lo *gc.Node, hi *gc.Node) {
706         if !gc.Is64(n.Type) {
707                 gc.Fatalf("split64 %v", n.Type)
708         }
709
710         if nsclean >= len(sclean) {
711                 gc.Fatalf("split64 clean")
712         }
713         sclean[nsclean].Op = gc.OEMPTY
714         nsclean++
715         switch n.Op {
716         default:
717                 switch n.Op {
718                 default:
719                         var n1 gc.Node
720                         if !dotaddable(n, &n1) {
721                                 gc.Igen(n, &n1, nil)
722                                 sclean[nsclean-1] = n1
723                         }
724
725                         n = &n1
726
727                 case gc.ONAME:
728                         if n.Class == gc.PPARAMREF {
729                                 var n1 gc.Node
730                                 gc.Cgen(n.Name.Heapaddr, &n1)
731                                 sclean[nsclean-1] = n1
732                                 n = &n1
733                         }
734
735                         // nothing
736                 case gc.OINDREG:
737                         break
738                 }
739
740                 *lo = *n
741                 *hi = *n
742                 lo.Type = gc.Types[gc.TUINT32]
743                 if n.Type.Etype == gc.TINT64 {
744                         hi.Type = gc.Types[gc.TINT32]
745                 } else {
746                         hi.Type = gc.Types[gc.TUINT32]
747                 }
748                 hi.Xoffset += 4
749
750         case gc.OLITERAL:
751                 var n1 gc.Node
752                 n.Convconst(&n1, n.Type)
753                 i := n1.Int64()
754                 gc.Nodconst(lo, gc.Types[gc.TUINT32], int64(uint32(i)))
755                 i >>= 32
756                 if n.Type.Etype == gc.TINT64 {
757                         gc.Nodconst(hi, gc.Types[gc.TINT32], int64(int32(i)))
758                 } else {
759                         gc.Nodconst(hi, gc.Types[gc.TUINT32], int64(uint32(i)))
760                 }
761         }
762 }
763
764 func splitclean() {
765         if nsclean <= 0 {
766                 gc.Fatalf("splitclean")
767         }
768         nsclean--
769         if sclean[nsclean].Op != gc.OEMPTY {
770                 gc.Regfree(&sclean[nsclean])
771         }
772 }
773
774 // set up nodes representing fp constants
775 var (
776         zerof        gc.Node
777         two63f       gc.Node
778         two64f       gc.Node
779         bignodes_did bool
780 )
781
782 func bignodes() {
783         if bignodes_did {
784                 return
785         }
786         bignodes_did = true
787
788         gc.Nodconst(&zerof, gc.Types[gc.TINT64], 0)
789         zerof.Convconst(&zerof, gc.Types[gc.TFLOAT64])
790
791         var i big.Int
792         i.SetInt64(1)
793         i.Lsh(&i, 63)
794         var bigi gc.Node
795
796         gc.Nodconst(&bigi, gc.Types[gc.TUINT64], 0)
797         bigi.SetBigInt(&i)
798         bigi.Convconst(&two63f, gc.Types[gc.TFLOAT64])
799
800         gc.Nodconst(&bigi, gc.Types[gc.TUINT64], 0)
801         i.Lsh(&i, 1)
802         bigi.SetBigInt(&i)
803         bigi.Convconst(&two64f, gc.Types[gc.TFLOAT64])
804 }
805
806 func memname(n *gc.Node, t *gc.Type) {
807         gc.Tempname(n, t)
808         n.Sym = gc.Lookup("." + n.Sym.Name[1:]) // keep optimizer from registerizing
809         n.Orig.Sym = n.Sym
810 }
811
812 func gmove(f *gc.Node, t *gc.Node) {
813         if gc.Debug['M'] != 0 {
814                 fmt.Printf("gmove %v -> %v\n", f, t)
815         }
816
817         ft := gc.Simsimtype(f.Type)
818         tt := gc.Simsimtype(t.Type)
819         cvt := t.Type
820
821         if gc.Iscomplex[ft] || gc.Iscomplex[tt] {
822                 gc.Complexmove(f, t)
823                 return
824         }
825
826         if gc.Isfloat[ft] || gc.Isfloat[tt] {
827                 floatmove(f, t)
828                 return
829         }
830
831         // cannot have two integer memory operands;
832         // except 64-bit, which always copies via registers anyway.
833         var r1 gc.Node
834         var a obj.As
835         if gc.Isint[ft] && gc.Isint[tt] && !gc.Is64(f.Type) && !gc.Is64(t.Type) && gc.Ismem(f) && gc.Ismem(t) {
836                 goto hard
837         }
838
839         // convert constant to desired type
840         if f.Op == gc.OLITERAL {
841                 var con gc.Node
842                 f.Convconst(&con, t.Type)
843                 f = &con
844                 ft = gc.Simsimtype(con.Type)
845         }
846
847         // value -> value copy, only one memory operand.
848         // figure out the instruction to use.
849         // break out of switch for one-instruction gins.
850         // goto rdst for "destination must be register".
851         // goto hard for "convert to cvt type first".
852         // otherwise handle and return.
853
854         switch uint32(ft)<<16 | uint32(tt) {
855         default:
856                 // should not happen
857                 gc.Fatalf("gmove %v -> %v", f, t)
858                 return
859
860                 /*
861                  * integer copy and truncate
862                  */
863         case gc.TINT8<<16 | gc.TINT8, // same size
864                 gc.TINT8<<16 | gc.TUINT8,
865                 gc.TUINT8<<16 | gc.TINT8,
866                 gc.TUINT8<<16 | gc.TUINT8:
867                 a = x86.AMOVB
868
869         case gc.TINT16<<16 | gc.TINT8, // truncate
870                 gc.TUINT16<<16 | gc.TINT8,
871                 gc.TINT32<<16 | gc.TINT8,
872                 gc.TUINT32<<16 | gc.TINT8,
873                 gc.TINT16<<16 | gc.TUINT8,
874                 gc.TUINT16<<16 | gc.TUINT8,
875                 gc.TINT32<<16 | gc.TUINT8,
876                 gc.TUINT32<<16 | gc.TUINT8:
877                 a = x86.AMOVB
878
879                 goto rsrc
880
881         case gc.TINT64<<16 | gc.TINT8, // truncate low word
882                 gc.TUINT64<<16 | gc.TINT8,
883                 gc.TINT64<<16 | gc.TUINT8,
884                 gc.TUINT64<<16 | gc.TUINT8:
885                 var flo gc.Node
886                 var fhi gc.Node
887                 split64(f, &flo, &fhi)
888
889                 var r1 gc.Node
890                 gc.Nodreg(&r1, t.Type, x86.REG_AX)
891                 gmove(&flo, &r1)
892                 gins(x86.AMOVB, &r1, t)
893                 splitclean()
894                 return
895
896         case gc.TINT16<<16 | gc.TINT16, // same size
897                 gc.TINT16<<16 | gc.TUINT16,
898                 gc.TUINT16<<16 | gc.TINT16,
899                 gc.TUINT16<<16 | gc.TUINT16:
900                 a = x86.AMOVW
901
902         case gc.TINT32<<16 | gc.TINT16, // truncate
903                 gc.TUINT32<<16 | gc.TINT16,
904                 gc.TINT32<<16 | gc.TUINT16,
905                 gc.TUINT32<<16 | gc.TUINT16:
906                 a = x86.AMOVW
907
908                 goto rsrc
909
910         case gc.TINT64<<16 | gc.TINT16, // truncate low word
911                 gc.TUINT64<<16 | gc.TINT16,
912                 gc.TINT64<<16 | gc.TUINT16,
913                 gc.TUINT64<<16 | gc.TUINT16:
914                 var flo gc.Node
915                 var fhi gc.Node
916                 split64(f, &flo, &fhi)
917
918                 var r1 gc.Node
919                 gc.Nodreg(&r1, t.Type, x86.REG_AX)
920                 gmove(&flo, &r1)
921                 gins(x86.AMOVW, &r1, t)
922                 splitclean()
923                 return
924
925         case gc.TINT32<<16 | gc.TINT32, // same size
926                 gc.TINT32<<16 | gc.TUINT32,
927                 gc.TUINT32<<16 | gc.TINT32,
928                 gc.TUINT32<<16 | gc.TUINT32:
929                 a = x86.AMOVL
930
931         case gc.TINT64<<16 | gc.TINT32, // truncate
932                 gc.TUINT64<<16 | gc.TINT32,
933                 gc.TINT64<<16 | gc.TUINT32,
934                 gc.TUINT64<<16 | gc.TUINT32:
935                 var fhi gc.Node
936                 var flo gc.Node
937                 split64(f, &flo, &fhi)
938
939                 var r1 gc.Node
940                 gc.Nodreg(&r1, t.Type, x86.REG_AX)
941                 gmove(&flo, &r1)
942                 gins(x86.AMOVL, &r1, t)
943                 splitclean()
944                 return
945
946         case gc.TINT64<<16 | gc.TINT64, // same size
947                 gc.TINT64<<16 | gc.TUINT64,
948                 gc.TUINT64<<16 | gc.TINT64,
949                 gc.TUINT64<<16 | gc.TUINT64:
950                 var fhi gc.Node
951                 var flo gc.Node
952                 split64(f, &flo, &fhi)
953
954                 var tlo gc.Node
955                 var thi gc.Node
956                 split64(t, &tlo, &thi)
957                 if f.Op == gc.OLITERAL {
958                         gins(x86.AMOVL, &flo, &tlo)
959                         gins(x86.AMOVL, &fhi, &thi)
960                 } else {
961                         // Implementation of conversion-free x = y for int64 or uint64 x.
962                         // This is generated by the code that copies small values out of closures,
963                         // and that code has DX live, so avoid DX and just use AX twice.
964                         var r1 gc.Node
965                         gc.Nodreg(&r1, gc.Types[gc.TUINT32], x86.REG_AX)
966                         gins(x86.AMOVL, &flo, &r1)
967                         gins(x86.AMOVL, &r1, &tlo)
968                         gins(x86.AMOVL, &fhi, &r1)
969                         gins(x86.AMOVL, &r1, &thi)
970                 }
971
972                 splitclean()
973                 splitclean()
974                 return
975
976                 /*
977                  * integer up-conversions
978                  */
979         case gc.TINT8<<16 | gc.TINT16, // sign extend int8
980                 gc.TINT8<<16 | gc.TUINT16:
981                 a = x86.AMOVBWSX
982
983                 goto rdst
984
985         case gc.TINT8<<16 | gc.TINT32,
986                 gc.TINT8<<16 | gc.TUINT32:
987                 a = x86.AMOVBLSX
988                 goto rdst
989
990         case gc.TINT8<<16 | gc.TINT64, // convert via int32
991                 gc.TINT8<<16 | gc.TUINT64:
992                 cvt = gc.Types[gc.TINT32]
993
994                 goto hard
995
996         case gc.TUINT8<<16 | gc.TINT16, // zero extend uint8
997                 gc.TUINT8<<16 | gc.TUINT16:
998                 a = x86.AMOVBWZX
999
1000                 goto rdst
1001
1002         case gc.TUINT8<<16 | gc.TINT32,
1003                 gc.TUINT8<<16 | gc.TUINT32:
1004                 a = x86.AMOVBLZX
1005                 goto rdst
1006
1007         case gc.TUINT8<<16 | gc.TINT64, // convert via uint32
1008                 gc.TUINT8<<16 | gc.TUINT64:
1009                 cvt = gc.Types[gc.TUINT32]
1010
1011                 goto hard
1012
1013         case gc.TINT16<<16 | gc.TINT32, // sign extend int16
1014                 gc.TINT16<<16 | gc.TUINT32:
1015                 a = x86.AMOVWLSX
1016
1017                 goto rdst
1018
1019         case gc.TINT16<<16 | gc.TINT64, // convert via int32
1020                 gc.TINT16<<16 | gc.TUINT64:
1021                 cvt = gc.Types[gc.TINT32]
1022
1023                 goto hard
1024
1025         case gc.TUINT16<<16 | gc.TINT32, // zero extend uint16
1026                 gc.TUINT16<<16 | gc.TUINT32:
1027                 a = x86.AMOVWLZX
1028
1029                 goto rdst
1030
1031         case gc.TUINT16<<16 | gc.TINT64, // convert via uint32
1032                 gc.TUINT16<<16 | gc.TUINT64:
1033                 cvt = gc.Types[gc.TUINT32]
1034
1035                 goto hard
1036
1037         case gc.TINT32<<16 | gc.TINT64, // sign extend int32
1038                 gc.TINT32<<16 | gc.TUINT64:
1039                 var thi gc.Node
1040                 var tlo gc.Node
1041                 split64(t, &tlo, &thi)
1042
1043                 var flo gc.Node
1044                 gc.Nodreg(&flo, tlo.Type, x86.REG_AX)
1045                 var fhi gc.Node
1046                 gc.Nodreg(&fhi, thi.Type, x86.REG_DX)
1047                 gmove(f, &flo)
1048                 gins(x86.ACDQ, nil, nil)
1049                 gins(x86.AMOVL, &flo, &tlo)
1050                 gins(x86.AMOVL, &fhi, &thi)
1051                 splitclean()
1052                 return
1053
1054         case gc.TUINT32<<16 | gc.TINT64, // zero extend uint32
1055                 gc.TUINT32<<16 | gc.TUINT64:
1056                 var tlo gc.Node
1057                 var thi gc.Node
1058                 split64(t, &tlo, &thi)
1059
1060                 gmove(f, &tlo)
1061                 gins(x86.AMOVL, ncon(0), &thi)
1062                 splitclean()
1063                 return
1064         }
1065
1066         gins(a, f, t)
1067         return
1068
1069         // requires register source
1070 rsrc:
1071         gc.Regalloc(&r1, f.Type, t)
1072
1073         gmove(f, &r1)
1074         gins(a, &r1, t)
1075         gc.Regfree(&r1)
1076         return
1077
1078         // requires register destination
1079 rdst:
1080         {
1081                 gc.Regalloc(&r1, t.Type, t)
1082
1083                 gins(a, f, &r1)
1084                 gmove(&r1, t)
1085                 gc.Regfree(&r1)
1086                 return
1087         }
1088
1089         // requires register intermediate
1090 hard:
1091         gc.Regalloc(&r1, cvt, t)
1092
1093         gmove(f, &r1)
1094         gmove(&r1, t)
1095         gc.Regfree(&r1)
1096         return
1097 }
1098
1099 func floatmove(f *gc.Node, t *gc.Node) {
1100         var r1 gc.Node
1101
1102         ft := gc.Simsimtype(f.Type)
1103         tt := gc.Simsimtype(t.Type)
1104         cvt := t.Type
1105
1106         // cannot have two floating point memory operands.
1107         if gc.Isfloat[ft] && gc.Isfloat[tt] && gc.Ismem(f) && gc.Ismem(t) {
1108                 goto hard
1109         }
1110
1111         // convert constant to desired type
1112         if f.Op == gc.OLITERAL {
1113                 var con gc.Node
1114                 f.Convconst(&con, t.Type)
1115                 f = &con
1116                 ft = gc.Simsimtype(con.Type)
1117
1118                 // some constants can't move directly to memory.
1119                 if gc.Ismem(t) {
1120                         // float constants come from memory.
1121                         if gc.Isfloat[tt] {
1122                                 goto hard
1123                         }
1124                 }
1125         }
1126
1127         // value -> value copy, only one memory operand.
1128         // figure out the instruction to use.
1129         // break out of switch for one-instruction gins.
1130         // goto rdst for "destination must be register".
1131         // goto hard for "convert to cvt type first".
1132         // otherwise handle and return.
1133
1134         switch uint32(ft)<<16 | uint32(tt) {
1135         default:
1136                 if gc.Thearch.Use387 {
1137                         floatmove_387(f, t)
1138                 } else {
1139                         floatmove_sse(f, t)
1140                 }
1141                 return
1142
1143                 // float to very long integer.
1144         case gc.TFLOAT32<<16 | gc.TINT64,
1145                 gc.TFLOAT64<<16 | gc.TINT64:
1146                 if f.Op == gc.OREGISTER {
1147                         cvt = f.Type
1148                         goto hardmem
1149                 }
1150
1151                 var r1 gc.Node
1152                 gc.Nodreg(&r1, gc.Types[ft], x86.REG_F0)
1153                 if ft == gc.TFLOAT32 {
1154                         gins(x86.AFMOVF, f, &r1)
1155                 } else {
1156                         gins(x86.AFMOVD, f, &r1)
1157                 }
1158
1159                 // set round to zero mode during conversion
1160                 var t1 gc.Node
1161                 memname(&t1, gc.Types[gc.TUINT16])
1162
1163                 var t2 gc.Node
1164                 memname(&t2, gc.Types[gc.TUINT16])
1165                 gins(x86.AFSTCW, nil, &t1)
1166                 gins(x86.AMOVW, ncon(0xf7f), &t2)
1167                 gins(x86.AFLDCW, &t2, nil)
1168                 if tt == gc.TINT16 {
1169                         gins(x86.AFMOVWP, &r1, t)
1170                 } else if tt == gc.TINT32 {
1171                         gins(x86.AFMOVLP, &r1, t)
1172                 } else {
1173                         gins(x86.AFMOVVP, &r1, t)
1174                 }
1175                 gins(x86.AFLDCW, &t1, nil)
1176                 return
1177
1178         case gc.TFLOAT32<<16 | gc.TUINT64,
1179                 gc.TFLOAT64<<16 | gc.TUINT64:
1180                 if !gc.Ismem(f) {
1181                         cvt = f.Type
1182                         goto hardmem
1183                 }
1184
1185                 bignodes()
1186                 var f0 gc.Node
1187                 gc.Nodreg(&f0, gc.Types[ft], x86.REG_F0)
1188                 var f1 gc.Node
1189                 gc.Nodreg(&f1, gc.Types[ft], x86.REG_F0+1)
1190                 var ax gc.Node
1191                 gc.Nodreg(&ax, gc.Types[gc.TUINT16], x86.REG_AX)
1192
1193                 if ft == gc.TFLOAT32 {
1194                         gins(x86.AFMOVF, f, &f0)
1195                 } else {
1196                         gins(x86.AFMOVD, f, &f0)
1197                 }
1198
1199                 // if 0 > v { answer = 0 }
1200                 gins(x86.AFMOVD, &zerof, &f0)
1201                 gins(x86.AFUCOMP, &f0, &f1)
1202                 gins(x86.AFSTSW, nil, &ax)
1203                 gins(x86.ASAHF, nil, nil)
1204                 p1 := gc.Gbranch(optoas(gc.OGT, gc.Types[tt]), nil, 0)
1205
1206                 // if 1<<64 <= v { answer = 0 too }
1207                 gins(x86.AFMOVD, &two64f, &f0)
1208
1209                 gins(x86.AFUCOMP, &f0, &f1)
1210                 gins(x86.AFSTSW, nil, &ax)
1211                 gins(x86.ASAHF, nil, nil)
1212                 p2 := gc.Gbranch(optoas(gc.OGT, gc.Types[tt]), nil, 0)
1213                 gc.Patch(p1, gc.Pc)
1214                 gins(x86.AFMOVVP, &f0, t) // don't care about t, but will pop the stack
1215                 var thi gc.Node
1216                 var tlo gc.Node
1217                 split64(t, &tlo, &thi)
1218                 gins(x86.AMOVL, ncon(0), &tlo)
1219                 gins(x86.AMOVL, ncon(0), &thi)
1220                 splitclean()
1221                 p1 = gc.Gbranch(obj.AJMP, nil, 0)
1222                 gc.Patch(p2, gc.Pc)
1223
1224                 // in range; algorithm is:
1225                 //      if small enough, use native float64 -> int64 conversion.
1226                 //      otherwise, subtract 2^63, convert, and add it back.
1227
1228                 // set round to zero mode during conversion
1229                 var t1 gc.Node
1230                 memname(&t1, gc.Types[gc.TUINT16])
1231
1232                 var t2 gc.Node
1233                 memname(&t2, gc.Types[gc.TUINT16])
1234                 gins(x86.AFSTCW, nil, &t1)
1235                 gins(x86.AMOVW, ncon(0xf7f), &t2)
1236                 gins(x86.AFLDCW, &t2, nil)
1237
1238                 // actual work
1239                 gins(x86.AFMOVD, &two63f, &f0)
1240
1241                 gins(x86.AFUCOMP, &f0, &f1)
1242                 gins(x86.AFSTSW, nil, &ax)
1243                 gins(x86.ASAHF, nil, nil)
1244                 p2 = gc.Gbranch(optoas(gc.OLE, gc.Types[tt]), nil, 0)
1245                 gins(x86.AFMOVVP, &f0, t)
1246                 p3 := gc.Gbranch(obj.AJMP, nil, 0)
1247                 gc.Patch(p2, gc.Pc)
1248                 gins(x86.AFMOVD, &two63f, &f0)
1249                 gins(x86.AFSUBDP, &f0, &f1)
1250                 gins(x86.AFMOVVP, &f0, t)
1251                 split64(t, &tlo, &thi)
1252                 gins(x86.AXORL, ncon(0x80000000), &thi) // + 2^63
1253                 gc.Patch(p3, gc.Pc)
1254                 splitclean()
1255
1256                 // restore rounding mode
1257                 gins(x86.AFLDCW, &t1, nil)
1258
1259                 gc.Patch(p1, gc.Pc)
1260                 return
1261
1262                 /*
1263                  * integer to float
1264                  */
1265         case gc.TINT64<<16 | gc.TFLOAT32,
1266                 gc.TINT64<<16 | gc.TFLOAT64:
1267                 if t.Op == gc.OREGISTER {
1268                         goto hardmem
1269                 }
1270                 var f0 gc.Node
1271                 gc.Nodreg(&f0, t.Type, x86.REG_F0)
1272                 gins(x86.AFMOVV, f, &f0)
1273                 if tt == gc.TFLOAT32 {
1274                         gins(x86.AFMOVFP, &f0, t)
1275                 } else {
1276                         gins(x86.AFMOVDP, &f0, t)
1277                 }
1278                 return
1279
1280                 // algorithm is:
1281         //      if small enough, use native int64 -> float64 conversion.
1282         //      otherwise, halve (rounding to odd?), convert, and double.
1283         case gc.TUINT64<<16 | gc.TFLOAT32,
1284                 gc.TUINT64<<16 | gc.TFLOAT64:
1285                 var ax gc.Node
1286                 gc.Nodreg(&ax, gc.Types[gc.TUINT32], x86.REG_AX)
1287
1288                 var dx gc.Node
1289                 gc.Nodreg(&dx, gc.Types[gc.TUINT32], x86.REG_DX)
1290                 var cx gc.Node
1291                 gc.Nodreg(&cx, gc.Types[gc.TUINT32], x86.REG_CX)
1292                 var t1 gc.Node
1293                 gc.Tempname(&t1, f.Type)
1294                 var tlo gc.Node
1295                 var thi gc.Node
1296                 split64(&t1, &tlo, &thi)
1297                 gmove(f, &t1)
1298                 gins(x86.ACMPL, &thi, ncon(0))
1299                 p1 := gc.Gbranch(x86.AJLT, nil, 0)
1300
1301                 // native
1302                 var r1 gc.Node
1303                 gc.Nodreg(&r1, gc.Types[tt], x86.REG_F0)
1304
1305                 gins(x86.AFMOVV, &t1, &r1)
1306                 if tt == gc.TFLOAT32 {
1307                         gins(x86.AFMOVFP, &r1, t)
1308                 } else {
1309                         gins(x86.AFMOVDP, &r1, t)
1310                 }
1311                 p2 := gc.Gbranch(obj.AJMP, nil, 0)
1312
1313                 // simulated
1314                 gc.Patch(p1, gc.Pc)
1315
1316                 gmove(&tlo, &ax)
1317                 gmove(&thi, &dx)
1318                 p1 = gins(x86.ASHRL, ncon(1), &ax)
1319                 p1.From.Index = x86.REG_DX // double-width shift DX -> AX
1320                 p1.From.Scale = 0
1321                 gins(x86.AMOVL, ncon(0), &cx)
1322                 gins(x86.ASETCC, nil, &cx)
1323                 gins(x86.AORL, &cx, &ax)
1324                 gins(x86.ASHRL, ncon(1), &dx)
1325                 gmove(&dx, &thi)
1326                 gmove(&ax, &tlo)
1327                 gc.Nodreg(&r1, gc.Types[tt], x86.REG_F0)
1328                 var r2 gc.Node
1329                 gc.Nodreg(&r2, gc.Types[tt], x86.REG_F0+1)
1330                 gins(x86.AFMOVV, &t1, &r1)
1331                 gins(x86.AFMOVD, &r1, &r1)
1332                 gins(x86.AFADDDP, &r1, &r2)
1333                 if tt == gc.TFLOAT32 {
1334                         gins(x86.AFMOVFP, &r1, t)
1335                 } else {
1336                         gins(x86.AFMOVDP, &r1, t)
1337                 }
1338                 gc.Patch(p2, gc.Pc)
1339                 splitclean()
1340                 return
1341         }
1342
1343         // requires register intermediate
1344 hard:
1345         gc.Regalloc(&r1, cvt, t)
1346
1347         gmove(f, &r1)
1348         gmove(&r1, t)
1349         gc.Regfree(&r1)
1350         return
1351
1352         // requires memory intermediate
1353 hardmem:
1354         gc.Tempname(&r1, cvt)
1355
1356         gmove(f, &r1)
1357         gmove(&r1, t)
1358         return
1359 }
1360
1361 func floatmove_387(f *gc.Node, t *gc.Node) {
1362         var r1 gc.Node
1363         var a obj.As
1364
1365         ft := gc.Simsimtype(f.Type)
1366         tt := gc.Simsimtype(t.Type)
1367         cvt := t.Type
1368
1369         switch uint32(ft)<<16 | uint32(tt) {
1370         default:
1371                 goto fatal
1372
1373                 /*
1374                 * float to integer
1375                  */
1376         case gc.TFLOAT32<<16 | gc.TINT16,
1377                 gc.TFLOAT32<<16 | gc.TINT32,
1378                 gc.TFLOAT32<<16 | gc.TINT64,
1379                 gc.TFLOAT64<<16 | gc.TINT16,
1380                 gc.TFLOAT64<<16 | gc.TINT32,
1381                 gc.TFLOAT64<<16 | gc.TINT64:
1382                 if t.Op == gc.OREGISTER {
1383                         goto hardmem
1384                 }
1385                 var r1 gc.Node
1386                 gc.Nodreg(&r1, gc.Types[ft], x86.REG_F0)
1387                 if f.Op != gc.OREGISTER {
1388                         if ft == gc.TFLOAT32 {
1389                                 gins(x86.AFMOVF, f, &r1)
1390                         } else {
1391                                 gins(x86.AFMOVD, f, &r1)
1392                         }
1393                 }
1394
1395                 // set round to zero mode during conversion
1396                 var t1 gc.Node
1397                 memname(&t1, gc.Types[gc.TUINT16])
1398
1399                 var t2 gc.Node
1400                 memname(&t2, gc.Types[gc.TUINT16])
1401                 gins(x86.AFSTCW, nil, &t1)
1402                 gins(x86.AMOVW, ncon(0xf7f), &t2)
1403                 gins(x86.AFLDCW, &t2, nil)
1404                 if tt == gc.TINT16 {
1405                         gins(x86.AFMOVWP, &r1, t)
1406                 } else if tt == gc.TINT32 {
1407                         gins(x86.AFMOVLP, &r1, t)
1408                 } else {
1409                         gins(x86.AFMOVVP, &r1, t)
1410                 }
1411                 gins(x86.AFLDCW, &t1, nil)
1412                 return
1413
1414                 // convert via int32.
1415         case gc.TFLOAT32<<16 | gc.TINT8,
1416                 gc.TFLOAT32<<16 | gc.TUINT16,
1417                 gc.TFLOAT32<<16 | gc.TUINT8,
1418                 gc.TFLOAT64<<16 | gc.TINT8,
1419                 gc.TFLOAT64<<16 | gc.TUINT16,
1420                 gc.TFLOAT64<<16 | gc.TUINT8:
1421                 var t1 gc.Node
1422                 gc.Tempname(&t1, gc.Types[gc.TINT32])
1423
1424                 gmove(f, &t1)
1425                 switch tt {
1426                 default:
1427                         gc.Fatalf("gmove %v", t)
1428
1429                 case gc.TINT8:
1430                         gins(x86.ACMPL, &t1, ncon(-0x80&(1<<32-1)))
1431                         p1 := gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TINT32]), nil, -1)
1432                         gins(x86.ACMPL, &t1, ncon(0x7f))
1433                         p2 := gc.Gbranch(optoas(gc.OGT, gc.Types[gc.TINT32]), nil, -1)
1434                         p3 := gc.Gbranch(obj.AJMP, nil, 0)
1435                         gc.Patch(p1, gc.Pc)
1436                         gc.Patch(p2, gc.Pc)
1437                         gmove(ncon(-0x80&(1<<32-1)), &t1)
1438                         gc.Patch(p3, gc.Pc)
1439                         gmove(&t1, t)
1440
1441                 case gc.TUINT8:
1442                         gins(x86.ATESTL, ncon(0xffffff00), &t1)
1443                         p1 := gc.Gbranch(x86.AJEQ, nil, +1)
1444                         gins(x86.AMOVL, ncon(0), &t1)
1445                         gc.Patch(p1, gc.Pc)
1446                         gmove(&t1, t)
1447
1448                 case gc.TUINT16:
1449                         gins(x86.ATESTL, ncon(0xffff0000), &t1)
1450                         p1 := gc.Gbranch(x86.AJEQ, nil, +1)
1451                         gins(x86.AMOVL, ncon(0), &t1)
1452                         gc.Patch(p1, gc.Pc)
1453                         gmove(&t1, t)
1454                 }
1455
1456                 return
1457
1458                 // convert via int64.
1459         case gc.TFLOAT32<<16 | gc.TUINT32,
1460                 gc.TFLOAT64<<16 | gc.TUINT32:
1461                 cvt = gc.Types[gc.TINT64]
1462
1463                 goto hardmem
1464
1465                 /*
1466                  * integer to float
1467                  */
1468         case gc.TINT16<<16 | gc.TFLOAT32,
1469                 gc.TINT16<<16 | gc.TFLOAT64,
1470                 gc.TINT32<<16 | gc.TFLOAT32,
1471                 gc.TINT32<<16 | gc.TFLOAT64,
1472                 gc.TINT64<<16 | gc.TFLOAT32,
1473                 gc.TINT64<<16 | gc.TFLOAT64:
1474                 if t.Op != gc.OREGISTER {
1475                         goto hard
1476                 }
1477                 if f.Op == gc.OREGISTER {
1478                         cvt = f.Type
1479                         goto hardmem
1480                 }
1481
1482                 switch ft {
1483                 case gc.TINT16:
1484                         a = x86.AFMOVW
1485
1486                 case gc.TINT32:
1487                         a = x86.AFMOVL
1488
1489                 default:
1490                         a = x86.AFMOVV
1491                 }
1492
1493                 // convert via int32 memory
1494         case gc.TINT8<<16 | gc.TFLOAT32,
1495                 gc.TINT8<<16 | gc.TFLOAT64,
1496                 gc.TUINT16<<16 | gc.TFLOAT32,
1497                 gc.TUINT16<<16 | gc.TFLOAT64,
1498                 gc.TUINT8<<16 | gc.TFLOAT32,
1499                 gc.TUINT8<<16 | gc.TFLOAT64:
1500                 cvt = gc.Types[gc.TINT32]
1501
1502                 goto hardmem
1503
1504                 // convert via int64 memory
1505         case gc.TUINT32<<16 | gc.TFLOAT32,
1506                 gc.TUINT32<<16 | gc.TFLOAT64:
1507                 cvt = gc.Types[gc.TINT64]
1508
1509                 goto hardmem
1510
1511                 // The way the code generator uses floating-point
1512         // registers, a move from F0 to F0 is intended as a no-op.
1513         // On the x86, it's not: it pushes a second copy of F0
1514         // on the floating point stack. So toss it away here.
1515         // Also, F0 is the *only* register we ever evaluate
1516         // into, so we should only see register/register as F0/F0.
1517         /*
1518          * float to float
1519          */
1520         case gc.TFLOAT32<<16 | gc.TFLOAT32,
1521                 gc.TFLOAT64<<16 | gc.TFLOAT64:
1522                 if gc.Ismem(f) && gc.Ismem(t) {
1523                         goto hard
1524                 }
1525                 if f.Op == gc.OREGISTER && t.Op == gc.OREGISTER {
1526                         if f.Reg != x86.REG_F0 || t.Reg != x86.REG_F0 {
1527                                 goto fatal
1528                         }
1529                         return
1530                 }
1531
1532                 a = x86.AFMOVF
1533                 if ft == gc.TFLOAT64 {
1534                         a = x86.AFMOVD
1535                 }
1536                 if gc.Ismem(t) {
1537                         if f.Op != gc.OREGISTER || f.Reg != x86.REG_F0 {
1538                                 gc.Fatalf("gmove %v", f)
1539                         }
1540                         a = x86.AFMOVFP
1541                         if ft == gc.TFLOAT64 {
1542                                 a = x86.AFMOVDP
1543                         }
1544                 }
1545
1546         case gc.TFLOAT32<<16 | gc.TFLOAT64:
1547                 if gc.Ismem(f) && gc.Ismem(t) {
1548                         goto hard
1549                 }
1550                 if f.Op == gc.OREGISTER && t.Op == gc.OREGISTER {
1551                         if f.Reg != x86.REG_F0 || t.Reg != x86.REG_F0 {
1552                                 goto fatal
1553                         }
1554                         return
1555                 }
1556
1557                 if f.Op == gc.OREGISTER {
1558                         gins(x86.AFMOVDP, f, t)
1559                 } else {
1560                         gins(x86.AFMOVF, f, t)
1561                 }
1562                 return
1563
1564         case gc.TFLOAT64<<16 | gc.TFLOAT32:
1565                 if gc.Ismem(f) && gc.Ismem(t) {
1566                         goto hard
1567                 }
1568                 if f.Op == gc.OREGISTER && t.Op == gc.OREGISTER {
1569                         var r1 gc.Node
1570                         gc.Tempname(&r1, gc.Types[gc.TFLOAT32])
1571                         gins(x86.AFMOVFP, f, &r1)
1572                         gins(x86.AFMOVF, &r1, t)
1573                         return
1574                 }
1575
1576                 if f.Op == gc.OREGISTER {
1577                         gins(x86.AFMOVFP, f, t)
1578                 } else {
1579                         gins(x86.AFMOVD, f, t)
1580                 }
1581                 return
1582         }
1583
1584         gins(a, f, t)
1585         return
1586
1587         // requires register intermediate
1588 hard:
1589         gc.Regalloc(&r1, cvt, t)
1590
1591         gmove(f, &r1)
1592         gmove(&r1, t)
1593         gc.Regfree(&r1)
1594         return
1595
1596         // requires memory intermediate
1597 hardmem:
1598         gc.Tempname(&r1, cvt)
1599
1600         gmove(f, &r1)
1601         gmove(&r1, t)
1602         return
1603
1604         // should not happen
1605 fatal:
1606         gc.Fatalf("gmove %v -> %v", gc.Nconv(f, gc.FmtLong), gc.Nconv(t, gc.FmtLong))
1607
1608         return
1609 }
1610
1611 func floatmove_sse(f *gc.Node, t *gc.Node) {
1612         var r1 gc.Node
1613         var cvt *gc.Type
1614         var a obj.As
1615
1616         ft := gc.Simsimtype(f.Type)
1617         tt := gc.Simsimtype(t.Type)
1618
1619         switch uint32(ft)<<16 | uint32(tt) {
1620         // should not happen
1621         default:
1622                 gc.Fatalf("gmove %v -> %v", f, t)
1623
1624                 return
1625
1626                 // convert via int32.
1627         /*
1628         * float to integer
1629          */
1630         case gc.TFLOAT32<<16 | gc.TINT16,
1631                 gc.TFLOAT32<<16 | gc.TINT8,
1632                 gc.TFLOAT32<<16 | gc.TUINT16,
1633                 gc.TFLOAT32<<16 | gc.TUINT8,
1634                 gc.TFLOAT64<<16 | gc.TINT16,
1635                 gc.TFLOAT64<<16 | gc.TINT8,
1636                 gc.TFLOAT64<<16 | gc.TUINT16,
1637                 gc.TFLOAT64<<16 | gc.TUINT8:
1638                 cvt = gc.Types[gc.TINT32]
1639
1640                 goto hard
1641
1642                 // convert via int64.
1643         case gc.TFLOAT32<<16 | gc.TUINT32,
1644                 gc.TFLOAT64<<16 | gc.TUINT32:
1645                 cvt = gc.Types[gc.TINT64]
1646
1647                 goto hardmem
1648
1649         case gc.TFLOAT32<<16 | gc.TINT32:
1650                 a = x86.ACVTTSS2SL
1651                 goto rdst
1652
1653         case gc.TFLOAT64<<16 | gc.TINT32:
1654                 a = x86.ACVTTSD2SL
1655                 goto rdst
1656
1657                 // convert via int32 memory
1658         /*
1659          * integer to float
1660          */
1661         case gc.TINT8<<16 | gc.TFLOAT32,
1662                 gc.TINT8<<16 | gc.TFLOAT64,
1663                 gc.TINT16<<16 | gc.TFLOAT32,
1664                 gc.TINT16<<16 | gc.TFLOAT64,
1665                 gc.TUINT16<<16 | gc.TFLOAT32,
1666                 gc.TUINT16<<16 | gc.TFLOAT64,
1667                 gc.TUINT8<<16 | gc.TFLOAT32,
1668                 gc.TUINT8<<16 | gc.TFLOAT64:
1669                 cvt = gc.Types[gc.TINT32]
1670
1671                 goto hard
1672
1673                 // convert via int64 memory
1674         case gc.TUINT32<<16 | gc.TFLOAT32,
1675                 gc.TUINT32<<16 | gc.TFLOAT64:
1676                 cvt = gc.Types[gc.TINT64]
1677
1678                 goto hardmem
1679
1680         case gc.TINT32<<16 | gc.TFLOAT32:
1681                 a = x86.ACVTSL2SS
1682                 goto rdst
1683
1684         case gc.TINT32<<16 | gc.TFLOAT64:
1685                 a = x86.ACVTSL2SD
1686                 goto rdst
1687
1688                 /*
1689                  * float to float
1690                  */
1691         case gc.TFLOAT32<<16 | gc.TFLOAT32:
1692                 a = x86.AMOVSS
1693
1694         case gc.TFLOAT64<<16 | gc.TFLOAT64:
1695                 a = x86.AMOVSD
1696
1697         case gc.TFLOAT32<<16 | gc.TFLOAT64:
1698                 a = x86.ACVTSS2SD
1699                 goto rdst
1700
1701         case gc.TFLOAT64<<16 | gc.TFLOAT32:
1702                 a = x86.ACVTSD2SS
1703                 goto rdst
1704         }
1705
1706         gins(a, f, t)
1707         return
1708
1709         // requires register intermediate
1710 hard:
1711         gc.Regalloc(&r1, cvt, t)
1712
1713         gmove(f, &r1)
1714         gmove(&r1, t)
1715         gc.Regfree(&r1)
1716         return
1717
1718         // requires memory intermediate
1719 hardmem:
1720         gc.Tempname(&r1, cvt)
1721
1722         gmove(f, &r1)
1723         gmove(&r1, t)
1724         return
1725
1726         // requires register destination
1727 rdst:
1728         gc.Regalloc(&r1, t.Type, t)
1729
1730         gins(a, f, &r1)
1731         gmove(&r1, t)
1732         gc.Regfree(&r1)
1733         return
1734 }
1735
1736 func samaddr(f *gc.Node, t *gc.Node) bool {
1737         if f.Op != t.Op {
1738                 return false
1739         }
1740
1741         switch f.Op {
1742         case gc.OREGISTER:
1743                 if f.Reg != t.Reg {
1744                         break
1745                 }
1746                 return true
1747         }
1748
1749         return false
1750 }
1751
1752 /*
1753  * generate one instruction:
1754  *      as f, t
1755  */
1756 func gins(as obj.As, f *gc.Node, t *gc.Node) *obj.Prog {
1757         if as == x86.AFMOVF && f != nil && f.Op == gc.OREGISTER && t != nil && t.Op == gc.OREGISTER {
1758                 gc.Fatalf("gins MOVF reg, reg")
1759         }
1760         if as == x86.ACVTSD2SS && f != nil && f.Op == gc.OLITERAL {
1761                 gc.Fatalf("gins CVTSD2SS const")
1762         }
1763         if as == x86.AMOVSD && t != nil && t.Op == gc.OREGISTER && t.Reg == x86.REG_F0 {
1764                 gc.Fatalf("gins MOVSD into F0")
1765         }
1766
1767         if as == x86.AMOVL && f != nil && f.Op == gc.OADDR && f.Left.Op == gc.ONAME && f.Left.Class != gc.PEXTERN && f.Left.Class != gc.PFUNC {
1768                 // Turn MOVL $xxx(FP/SP) into LEAL xxx.
1769                 // These should be equivalent but most of the backend
1770                 // only expects to see LEAL, because that's what we had
1771                 // historically generated. Various hidden assumptions are baked in by now.
1772                 as = x86.ALEAL
1773                 f = f.Left
1774         }
1775
1776         switch as {
1777         case x86.AMOVB,
1778                 x86.AMOVW,
1779                 x86.AMOVL:
1780                 if f != nil && t != nil && samaddr(f, t) {
1781                         return nil
1782                 }
1783
1784         case x86.ALEAL:
1785                 if f != nil && gc.Isconst(f, gc.CTNIL) {
1786                         gc.Fatalf("gins LEAL nil %v", f.Type)
1787                 }
1788         }
1789
1790         p := gc.Prog(as)
1791         gc.Naddr(&p.From, f)
1792         gc.Naddr(&p.To, t)
1793
1794         if gc.Debug['g'] != 0 {
1795                 fmt.Printf("%v\n", p)
1796         }
1797
1798         w := 0
1799         switch as {
1800         case x86.AMOVB:
1801                 w = 1
1802
1803         case x86.AMOVW:
1804                 w = 2
1805
1806         case x86.AMOVL:
1807                 w = 4
1808         }
1809
1810         if true && w != 0 && f != nil && (p.From.Width > int64(w) || p.To.Width > int64(w)) {
1811                 gc.Dump("bad width from:", f)
1812                 gc.Dump("bad width to:", t)
1813                 gc.Fatalf("bad width: %v (%d, %d)\n", p, p.From.Width, p.To.Width)
1814         }
1815
1816         if p.To.Type == obj.TYPE_ADDR && w > 0 {
1817                 gc.Fatalf("bad use of addr: %v", p)
1818         }
1819
1820         return p
1821 }
1822
1823 func ginsnop() {
1824         var reg gc.Node
1825         gc.Nodreg(&reg, gc.Types[gc.TINT], x86.REG_AX)
1826         gins(x86.AXCHGL, &reg, &reg)
1827 }
1828
1829 func dotaddable(n *gc.Node, n1 *gc.Node) bool {
1830         if n.Op != gc.ODOT {
1831                 return false
1832         }
1833
1834         var oary [10]int64
1835         var nn *gc.Node
1836         o := gc.Dotoffset(n, oary[:], &nn)
1837         if nn != nil && nn.Addable && o == 1 && oary[0] >= 0 {
1838                 *n1 = *nn
1839                 n1.Type = n.Type
1840                 n1.Xoffset += oary[0]
1841                 return true
1842         }
1843
1844         return false
1845 }
1846
1847 func sudoclean() {
1848 }
1849
1850 func sudoaddable(as obj.As, n *gc.Node, a *obj.Addr) bool {
1851         *a = obj.Addr{}
1852         return false
1853 }