]> Cypherpunks.ru repositories - gostls13.git/blob - src/cmd/compile/internal/amd64/ssa.go
d32ea7ec161034be4291c595bc3cc280fa415fe2
[gostls13.git] / src / cmd / compile / internal / amd64 / ssa.go
1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 package amd64
6
7 import (
8         "fmt"
9         "internal/buildcfg"
10         "math"
11
12         "cmd/compile/internal/base"
13         "cmd/compile/internal/ir"
14         "cmd/compile/internal/logopt"
15         "cmd/compile/internal/objw"
16         "cmd/compile/internal/ssa"
17         "cmd/compile/internal/ssagen"
18         "cmd/compile/internal/types"
19         "cmd/internal/obj"
20         "cmd/internal/obj/x86"
21 )
22
23 // ssaMarkMoves marks any MOVXconst ops that need to avoid clobbering flags.
24 func ssaMarkMoves(s *ssagen.State, b *ssa.Block) {
25         flive := b.FlagsLiveAtEnd
26         for _, c := range b.ControlValues() {
27                 flive = c.Type.IsFlags() || flive
28         }
29         for i := len(b.Values) - 1; i >= 0; i-- {
30                 v := b.Values[i]
31                 if flive && (v.Op == ssa.OpAMD64MOVLconst || v.Op == ssa.OpAMD64MOVQconst) {
32                         // The "mark" is any non-nil Aux value.
33                         v.Aux = ssa.AuxMark
34                 }
35                 if v.Type.IsFlags() {
36                         flive = false
37                 }
38                 for _, a := range v.Args {
39                         if a.Type.IsFlags() {
40                                 flive = true
41                         }
42                 }
43         }
44 }
45
46 // loadByType returns the load instruction of the given type.
47 func loadByType(t *types.Type) obj.As {
48         // Avoid partial register write
49         if !t.IsFloat() {
50                 switch t.Size() {
51                 case 1:
52                         return x86.AMOVBLZX
53                 case 2:
54                         return x86.AMOVWLZX
55                 }
56         }
57         // Otherwise, there's no difference between load and store opcodes.
58         return storeByType(t)
59 }
60
61 // storeByType returns the store instruction of the given type.
62 func storeByType(t *types.Type) obj.As {
63         width := t.Size()
64         if t.IsFloat() {
65                 switch width {
66                 case 4:
67                         return x86.AMOVSS
68                 case 8:
69                         return x86.AMOVSD
70                 }
71         } else {
72                 switch width {
73                 case 1:
74                         return x86.AMOVB
75                 case 2:
76                         return x86.AMOVW
77                 case 4:
78                         return x86.AMOVL
79                 case 8:
80                         return x86.AMOVQ
81                 case 16:
82                         return x86.AMOVUPS
83                 }
84         }
85         panic(fmt.Sprintf("bad store type %v", t))
86 }
87
88 // moveByType returns the reg->reg move instruction of the given type.
89 func moveByType(t *types.Type) obj.As {
90         if t.IsFloat() {
91                 // Moving the whole sse2 register is faster
92                 // than moving just the correct low portion of it.
93                 // There is no xmm->xmm move with 1 byte opcode,
94                 // so use movups, which has 2 byte opcode.
95                 return x86.AMOVUPS
96         } else {
97                 switch t.Size() {
98                 case 1:
99                         // Avoids partial register write
100                         return x86.AMOVL
101                 case 2:
102                         return x86.AMOVL
103                 case 4:
104                         return x86.AMOVL
105                 case 8:
106                         return x86.AMOVQ
107                 case 16:
108                         return x86.AMOVUPS // int128s are in SSE registers
109                 default:
110                         panic(fmt.Sprintf("bad int register width %d:%v", t.Size(), t))
111                 }
112         }
113 }
114
115 // opregreg emits instructions for
116 //
117 //      dest := dest(To) op src(From)
118 //
119 // and also returns the created obj.Prog so it
120 // may be further adjusted (offset, scale, etc).
121 func opregreg(s *ssagen.State, op obj.As, dest, src int16) *obj.Prog {
122         p := s.Prog(op)
123         p.From.Type = obj.TYPE_REG
124         p.To.Type = obj.TYPE_REG
125         p.To.Reg = dest
126         p.From.Reg = src
127         return p
128 }
129
130 // memIdx fills out a as an indexed memory reference for v.
131 // It assumes that the base register and the index register
132 // are v.Args[0].Reg() and v.Args[1].Reg(), respectively.
133 // The caller must still use gc.AddAux/gc.AddAux2 to handle v.Aux as necessary.
134 func memIdx(a *obj.Addr, v *ssa.Value) {
135         r, i := v.Args[0].Reg(), v.Args[1].Reg()
136         a.Type = obj.TYPE_MEM
137         a.Scale = v.Op.Scale()
138         if a.Scale == 1 && i == x86.REG_SP {
139                 r, i = i, r
140         }
141         a.Reg = r
142         a.Index = i
143 }
144
145 // DUFFZERO consists of repeated blocks of 4 MOVUPSs + LEAQ,
146 // See runtime/mkduff.go.
147 func duffStart(size int64) int64 {
148         x, _ := duff(size)
149         return x
150 }
151 func duffAdj(size int64) int64 {
152         _, x := duff(size)
153         return x
154 }
155
156 // duff returns the offset (from duffzero, in bytes) and pointer adjust (in bytes)
157 // required to use the duffzero mechanism for a block of the given size.
158 func duff(size int64) (int64, int64) {
159         if size < 32 || size > 1024 || size%dzClearStep != 0 {
160                 panic("bad duffzero size")
161         }
162         steps := size / dzClearStep
163         blocks := steps / dzBlockLen
164         steps %= dzBlockLen
165         off := dzBlockSize * (dzBlocks - blocks)
166         var adj int64
167         if steps != 0 {
168                 off -= dzLeaqSize
169                 off -= dzMovSize * steps
170                 adj -= dzClearStep * (dzBlockLen - steps)
171         }
172         return off, adj
173 }
174
175 func getgFromTLS(s *ssagen.State, r int16) {
176         // See the comments in cmd/internal/obj/x86/obj6.go
177         // near CanUse1InsnTLS for a detailed explanation of these instructions.
178         if x86.CanUse1InsnTLS(base.Ctxt) {
179                 // MOVQ (TLS), r
180                 p := s.Prog(x86.AMOVQ)
181                 p.From.Type = obj.TYPE_MEM
182                 p.From.Reg = x86.REG_TLS
183                 p.To.Type = obj.TYPE_REG
184                 p.To.Reg = r
185         } else {
186                 // MOVQ TLS, r
187                 // MOVQ (r)(TLS*1), r
188                 p := s.Prog(x86.AMOVQ)
189                 p.From.Type = obj.TYPE_REG
190                 p.From.Reg = x86.REG_TLS
191                 p.To.Type = obj.TYPE_REG
192                 p.To.Reg = r
193                 q := s.Prog(x86.AMOVQ)
194                 q.From.Type = obj.TYPE_MEM
195                 q.From.Reg = r
196                 q.From.Index = x86.REG_TLS
197                 q.From.Scale = 1
198                 q.To.Type = obj.TYPE_REG
199                 q.To.Reg = r
200         }
201 }
202
203 func ssaGenValue(s *ssagen.State, v *ssa.Value) {
204         switch v.Op {
205         case ssa.OpAMD64VFMADD231SD:
206                 p := s.Prog(v.Op.Asm())
207                 p.From = obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[2].Reg()}
208                 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
209                 p.AddRestSourceReg(v.Args[1].Reg())
210         case ssa.OpAMD64ADDQ, ssa.OpAMD64ADDL:
211                 r := v.Reg()
212                 r1 := v.Args[0].Reg()
213                 r2 := v.Args[1].Reg()
214                 switch {
215                 case r == r1:
216                         p := s.Prog(v.Op.Asm())
217                         p.From.Type = obj.TYPE_REG
218                         p.From.Reg = r2
219                         p.To.Type = obj.TYPE_REG
220                         p.To.Reg = r
221                 case r == r2:
222                         p := s.Prog(v.Op.Asm())
223                         p.From.Type = obj.TYPE_REG
224                         p.From.Reg = r1
225                         p.To.Type = obj.TYPE_REG
226                         p.To.Reg = r
227                 default:
228                         var asm obj.As
229                         if v.Op == ssa.OpAMD64ADDQ {
230                                 asm = x86.ALEAQ
231                         } else {
232                                 asm = x86.ALEAL
233                         }
234                         p := s.Prog(asm)
235                         p.From.Type = obj.TYPE_MEM
236                         p.From.Reg = r1
237                         p.From.Scale = 1
238                         p.From.Index = r2
239                         p.To.Type = obj.TYPE_REG
240                         p.To.Reg = r
241                 }
242         // 2-address opcode arithmetic
243         case ssa.OpAMD64SUBQ, ssa.OpAMD64SUBL,
244                 ssa.OpAMD64MULQ, ssa.OpAMD64MULL,
245                 ssa.OpAMD64ANDQ, ssa.OpAMD64ANDL,
246                 ssa.OpAMD64ORQ, ssa.OpAMD64ORL,
247                 ssa.OpAMD64XORQ, ssa.OpAMD64XORL,
248                 ssa.OpAMD64SHLQ, ssa.OpAMD64SHLL,
249                 ssa.OpAMD64SHRQ, ssa.OpAMD64SHRL, ssa.OpAMD64SHRW, ssa.OpAMD64SHRB,
250                 ssa.OpAMD64SARQ, ssa.OpAMD64SARL, ssa.OpAMD64SARW, ssa.OpAMD64SARB,
251                 ssa.OpAMD64ROLQ, ssa.OpAMD64ROLL, ssa.OpAMD64ROLW, ssa.OpAMD64ROLB,
252                 ssa.OpAMD64RORQ, ssa.OpAMD64RORL, ssa.OpAMD64RORW, ssa.OpAMD64RORB,
253                 ssa.OpAMD64ADDSS, ssa.OpAMD64ADDSD, ssa.OpAMD64SUBSS, ssa.OpAMD64SUBSD,
254                 ssa.OpAMD64MULSS, ssa.OpAMD64MULSD, ssa.OpAMD64DIVSS, ssa.OpAMD64DIVSD,
255                 ssa.OpAMD64MINSS, ssa.OpAMD64MINSD,
256                 ssa.OpAMD64POR, ssa.OpAMD64PXOR,
257                 ssa.OpAMD64BTSL, ssa.OpAMD64BTSQ,
258                 ssa.OpAMD64BTCL, ssa.OpAMD64BTCQ,
259                 ssa.OpAMD64BTRL, ssa.OpAMD64BTRQ:
260                 opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg())
261
262         case ssa.OpAMD64SHRDQ, ssa.OpAMD64SHLDQ:
263                 p := s.Prog(v.Op.Asm())
264                 lo, hi, bits := v.Args[0].Reg(), v.Args[1].Reg(), v.Args[2].Reg()
265                 p.From.Type = obj.TYPE_REG
266                 p.From.Reg = bits
267                 p.To.Type = obj.TYPE_REG
268                 p.To.Reg = lo
269                 p.AddRestSourceReg(hi)
270
271         case ssa.OpAMD64BLSIQ, ssa.OpAMD64BLSIL,
272                 ssa.OpAMD64BLSMSKQ, ssa.OpAMD64BLSMSKL,
273                 ssa.OpAMD64BLSRQ, ssa.OpAMD64BLSRL:
274                 p := s.Prog(v.Op.Asm())
275                 p.From.Type = obj.TYPE_REG
276                 p.From.Reg = v.Args[0].Reg()
277                 p.To.Type = obj.TYPE_REG
278                 switch v.Op {
279                 case ssa.OpAMD64BLSRQ, ssa.OpAMD64BLSRL:
280                         p.To.Reg = v.Reg0()
281                 default:
282                         p.To.Reg = v.Reg()
283                 }
284
285         case ssa.OpAMD64ANDNQ, ssa.OpAMD64ANDNL:
286                 p := s.Prog(v.Op.Asm())
287                 p.From.Type = obj.TYPE_REG
288                 p.From.Reg = v.Args[0].Reg()
289                 p.To.Type = obj.TYPE_REG
290                 p.To.Reg = v.Reg()
291                 p.AddRestSourceReg(v.Args[1].Reg())
292
293         case ssa.OpAMD64SARXL, ssa.OpAMD64SARXQ,
294                 ssa.OpAMD64SHLXL, ssa.OpAMD64SHLXQ,
295                 ssa.OpAMD64SHRXL, ssa.OpAMD64SHRXQ:
296                 p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg())
297                 p.AddRestSourceReg(v.Args[0].Reg())
298
299         case ssa.OpAMD64SHLXLload, ssa.OpAMD64SHLXQload,
300                 ssa.OpAMD64SHRXLload, ssa.OpAMD64SHRXQload,
301                 ssa.OpAMD64SARXLload, ssa.OpAMD64SARXQload:
302                 p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg())
303                 m := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
304                 ssagen.AddAux(&m, v)
305                 p.AddRestSource(m)
306
307         case ssa.OpAMD64SHLXLloadidx1, ssa.OpAMD64SHLXLloadidx4, ssa.OpAMD64SHLXLloadidx8,
308                 ssa.OpAMD64SHRXLloadidx1, ssa.OpAMD64SHRXLloadidx4, ssa.OpAMD64SHRXLloadidx8,
309                 ssa.OpAMD64SARXLloadidx1, ssa.OpAMD64SARXLloadidx4, ssa.OpAMD64SARXLloadidx8,
310                 ssa.OpAMD64SHLXQloadidx1, ssa.OpAMD64SHLXQloadidx8,
311                 ssa.OpAMD64SHRXQloadidx1, ssa.OpAMD64SHRXQloadidx8,
312                 ssa.OpAMD64SARXQloadidx1, ssa.OpAMD64SARXQloadidx8:
313                 p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[2].Reg())
314                 m := obj.Addr{Type: obj.TYPE_MEM}
315                 memIdx(&m, v)
316                 ssagen.AddAux(&m, v)
317                 p.AddRestSource(m)
318
319         case ssa.OpAMD64DIVQU, ssa.OpAMD64DIVLU, ssa.OpAMD64DIVWU:
320                 // Arg[0] (the dividend) is in AX.
321                 // Arg[1] (the divisor) can be in any other register.
322                 // Result[0] (the quotient) is in AX.
323                 // Result[1] (the remainder) is in DX.
324                 r := v.Args[1].Reg()
325
326                 // Zero extend dividend.
327                 opregreg(s, x86.AXORL, x86.REG_DX, x86.REG_DX)
328
329                 // Issue divide.
330                 p := s.Prog(v.Op.Asm())
331                 p.From.Type = obj.TYPE_REG
332                 p.From.Reg = r
333
334         case ssa.OpAMD64DIVQ, ssa.OpAMD64DIVL, ssa.OpAMD64DIVW:
335                 // Arg[0] (the dividend) is in AX.
336                 // Arg[1] (the divisor) can be in any other register.
337                 // Result[0] (the quotient) is in AX.
338                 // Result[1] (the remainder) is in DX.
339                 r := v.Args[1].Reg()
340
341                 var opCMP, opNEG, opSXD obj.As
342                 switch v.Op {
343                 case ssa.OpAMD64DIVQ:
344                         opCMP, opNEG, opSXD = x86.ACMPQ, x86.ANEGQ, x86.ACQO
345                 case ssa.OpAMD64DIVL:
346                         opCMP, opNEG, opSXD = x86.ACMPL, x86.ANEGL, x86.ACDQ
347                 case ssa.OpAMD64DIVW:
348                         opCMP, opNEG, opSXD = x86.ACMPW, x86.ANEGW, x86.ACWD
349                 }
350
351                 // CPU faults upon signed overflow, which occurs when the most
352                 // negative int is divided by -1. Handle divide by -1 as a special case.
353                 var j1, j2 *obj.Prog
354                 if ssa.DivisionNeedsFixUp(v) {
355                         c := s.Prog(opCMP)
356                         c.From.Type = obj.TYPE_REG
357                         c.From.Reg = r
358                         c.To.Type = obj.TYPE_CONST
359                         c.To.Offset = -1
360
361                         // Divisor is not -1, proceed with normal division.
362                         j1 = s.Prog(x86.AJNE)
363                         j1.To.Type = obj.TYPE_BRANCH
364
365                         // Divisor is -1, manually compute quotient and remainder via fixup code.
366                         // n / -1 = -n
367                         n1 := s.Prog(opNEG)
368                         n1.To.Type = obj.TYPE_REG
369                         n1.To.Reg = x86.REG_AX
370
371                         // n % -1 == 0
372                         opregreg(s, x86.AXORL, x86.REG_DX, x86.REG_DX)
373
374                         // TODO(khr): issue only the -1 fixup code we need.
375                         // For instance, if only the quotient is used, no point in zeroing the remainder.
376
377                         // Skip over normal division.
378                         j2 = s.Prog(obj.AJMP)
379                         j2.To.Type = obj.TYPE_BRANCH
380                 }
381
382                 // Sign extend dividend and perform division.
383                 p := s.Prog(opSXD)
384                 if j1 != nil {
385                         j1.To.SetTarget(p)
386                 }
387                 p = s.Prog(v.Op.Asm())
388                 p.From.Type = obj.TYPE_REG
389                 p.From.Reg = r
390
391                 if j2 != nil {
392                         j2.To.SetTarget(s.Pc())
393                 }
394
395         case ssa.OpAMD64HMULQ, ssa.OpAMD64HMULL, ssa.OpAMD64HMULQU, ssa.OpAMD64HMULLU:
396                 // the frontend rewrites constant division by 8/16/32 bit integers into
397                 // HMUL by a constant
398                 // SSA rewrites generate the 64 bit versions
399
400                 // Arg[0] is already in AX as it's the only register we allow
401                 // and DX is the only output we care about (the high bits)
402                 p := s.Prog(v.Op.Asm())
403                 p.From.Type = obj.TYPE_REG
404                 p.From.Reg = v.Args[1].Reg()
405
406                 // IMULB puts the high portion in AH instead of DL,
407                 // so move it to DL for consistency
408                 if v.Type.Size() == 1 {
409                         m := s.Prog(x86.AMOVB)
410                         m.From.Type = obj.TYPE_REG
411                         m.From.Reg = x86.REG_AH
412                         m.To.Type = obj.TYPE_REG
413                         m.To.Reg = x86.REG_DX
414                 }
415
416         case ssa.OpAMD64MULQU, ssa.OpAMD64MULLU:
417                 // Arg[0] is already in AX as it's the only register we allow
418                 // results lo in AX
419                 p := s.Prog(v.Op.Asm())
420                 p.From.Type = obj.TYPE_REG
421                 p.From.Reg = v.Args[1].Reg()
422
423         case ssa.OpAMD64MULQU2:
424                 // Arg[0] is already in AX as it's the only register we allow
425                 // results hi in DX, lo in AX
426                 p := s.Prog(v.Op.Asm())
427                 p.From.Type = obj.TYPE_REG
428                 p.From.Reg = v.Args[1].Reg()
429
430         case ssa.OpAMD64DIVQU2:
431                 // Arg[0], Arg[1] are already in Dx, AX, as they're the only registers we allow
432                 // results q in AX, r in DX
433                 p := s.Prog(v.Op.Asm())
434                 p.From.Type = obj.TYPE_REG
435                 p.From.Reg = v.Args[2].Reg()
436
437         case ssa.OpAMD64AVGQU:
438                 // compute (x+y)/2 unsigned.
439                 // Do a 64-bit add, the overflow goes into the carry.
440                 // Shift right once and pull the carry back into the 63rd bit.
441                 p := s.Prog(x86.AADDQ)
442                 p.From.Type = obj.TYPE_REG
443                 p.To.Type = obj.TYPE_REG
444                 p.To.Reg = v.Reg()
445                 p.From.Reg = v.Args[1].Reg()
446                 p = s.Prog(x86.ARCRQ)
447                 p.From.Type = obj.TYPE_CONST
448                 p.From.Offset = 1
449                 p.To.Type = obj.TYPE_REG
450                 p.To.Reg = v.Reg()
451
452         case ssa.OpAMD64ADDQcarry, ssa.OpAMD64ADCQ:
453                 r := v.Reg0()
454                 r0 := v.Args[0].Reg()
455                 r1 := v.Args[1].Reg()
456                 switch r {
457                 case r0:
458                         p := s.Prog(v.Op.Asm())
459                         p.From.Type = obj.TYPE_REG
460                         p.From.Reg = r1
461                         p.To.Type = obj.TYPE_REG
462                         p.To.Reg = r
463                 case r1:
464                         p := s.Prog(v.Op.Asm())
465                         p.From.Type = obj.TYPE_REG
466                         p.From.Reg = r0
467                         p.To.Type = obj.TYPE_REG
468                         p.To.Reg = r
469                 default:
470                         v.Fatalf("output not in same register as an input %s", v.LongString())
471                 }
472
473         case ssa.OpAMD64SUBQborrow, ssa.OpAMD64SBBQ:
474                 p := s.Prog(v.Op.Asm())
475                 p.From.Type = obj.TYPE_REG
476                 p.From.Reg = v.Args[1].Reg()
477                 p.To.Type = obj.TYPE_REG
478                 p.To.Reg = v.Reg0()
479
480         case ssa.OpAMD64ADDQconstcarry, ssa.OpAMD64ADCQconst, ssa.OpAMD64SUBQconstborrow, ssa.OpAMD64SBBQconst:
481                 p := s.Prog(v.Op.Asm())
482                 p.From.Type = obj.TYPE_CONST
483                 p.From.Offset = v.AuxInt
484                 p.To.Type = obj.TYPE_REG
485                 p.To.Reg = v.Reg0()
486
487         case ssa.OpAMD64ADDQconst, ssa.OpAMD64ADDLconst:
488                 r := v.Reg()
489                 a := v.Args[0].Reg()
490                 if r == a {
491                         switch v.AuxInt {
492                         case 1:
493                                 var asm obj.As
494                                 // Software optimization manual recommends add $1,reg.
495                                 // But inc/dec is 1 byte smaller. ICC always uses inc
496                                 // Clang/GCC choose depending on flags, but prefer add.
497                                 // Experiments show that inc/dec is both a little faster
498                                 // and make a binary a little smaller.
499                                 if v.Op == ssa.OpAMD64ADDQconst {
500                                         asm = x86.AINCQ
501                                 } else {
502                                         asm = x86.AINCL
503                                 }
504                                 p := s.Prog(asm)
505                                 p.To.Type = obj.TYPE_REG
506                                 p.To.Reg = r
507                                 return
508                         case -1:
509                                 var asm obj.As
510                                 if v.Op == ssa.OpAMD64ADDQconst {
511                                         asm = x86.ADECQ
512                                 } else {
513                                         asm = x86.ADECL
514                                 }
515                                 p := s.Prog(asm)
516                                 p.To.Type = obj.TYPE_REG
517                                 p.To.Reg = r
518                                 return
519                         case 0x80:
520                                 // 'SUBQ $-0x80, r' is shorter to encode than
521                                 // and functionally equivalent to 'ADDQ $0x80, r'.
522                                 asm := x86.ASUBL
523                                 if v.Op == ssa.OpAMD64ADDQconst {
524                                         asm = x86.ASUBQ
525                                 }
526                                 p := s.Prog(asm)
527                                 p.From.Type = obj.TYPE_CONST
528                                 p.From.Offset = -0x80
529                                 p.To.Type = obj.TYPE_REG
530                                 p.To.Reg = r
531                                 return
532
533                         }
534                         p := s.Prog(v.Op.Asm())
535                         p.From.Type = obj.TYPE_CONST
536                         p.From.Offset = v.AuxInt
537                         p.To.Type = obj.TYPE_REG
538                         p.To.Reg = r
539                         return
540                 }
541                 var asm obj.As
542                 if v.Op == ssa.OpAMD64ADDQconst {
543                         asm = x86.ALEAQ
544                 } else {
545                         asm = x86.ALEAL
546                 }
547                 p := s.Prog(asm)
548                 p.From.Type = obj.TYPE_MEM
549                 p.From.Reg = a
550                 p.From.Offset = v.AuxInt
551                 p.To.Type = obj.TYPE_REG
552                 p.To.Reg = r
553
554         case ssa.OpAMD64CMOVQEQ, ssa.OpAMD64CMOVLEQ, ssa.OpAMD64CMOVWEQ,
555                 ssa.OpAMD64CMOVQLT, ssa.OpAMD64CMOVLLT, ssa.OpAMD64CMOVWLT,
556                 ssa.OpAMD64CMOVQNE, ssa.OpAMD64CMOVLNE, ssa.OpAMD64CMOVWNE,
557                 ssa.OpAMD64CMOVQGT, ssa.OpAMD64CMOVLGT, ssa.OpAMD64CMOVWGT,
558                 ssa.OpAMD64CMOVQLE, ssa.OpAMD64CMOVLLE, ssa.OpAMD64CMOVWLE,
559                 ssa.OpAMD64CMOVQGE, ssa.OpAMD64CMOVLGE, ssa.OpAMD64CMOVWGE,
560                 ssa.OpAMD64CMOVQHI, ssa.OpAMD64CMOVLHI, ssa.OpAMD64CMOVWHI,
561                 ssa.OpAMD64CMOVQLS, ssa.OpAMD64CMOVLLS, ssa.OpAMD64CMOVWLS,
562                 ssa.OpAMD64CMOVQCC, ssa.OpAMD64CMOVLCC, ssa.OpAMD64CMOVWCC,
563                 ssa.OpAMD64CMOVQCS, ssa.OpAMD64CMOVLCS, ssa.OpAMD64CMOVWCS,
564                 ssa.OpAMD64CMOVQGTF, ssa.OpAMD64CMOVLGTF, ssa.OpAMD64CMOVWGTF,
565                 ssa.OpAMD64CMOVQGEF, ssa.OpAMD64CMOVLGEF, ssa.OpAMD64CMOVWGEF:
566                 p := s.Prog(v.Op.Asm())
567                 p.From.Type = obj.TYPE_REG
568                 p.From.Reg = v.Args[1].Reg()
569                 p.To.Type = obj.TYPE_REG
570                 p.To.Reg = v.Reg()
571
572         case ssa.OpAMD64CMOVQNEF, ssa.OpAMD64CMOVLNEF, ssa.OpAMD64CMOVWNEF:
573                 // Flag condition: ^ZERO || PARITY
574                 // Generate:
575                 //   CMOV*NE  SRC,DST
576                 //   CMOV*PS  SRC,DST
577                 p := s.Prog(v.Op.Asm())
578                 p.From.Type = obj.TYPE_REG
579                 p.From.Reg = v.Args[1].Reg()
580                 p.To.Type = obj.TYPE_REG
581                 p.To.Reg = v.Reg()
582                 var q *obj.Prog
583                 if v.Op == ssa.OpAMD64CMOVQNEF {
584                         q = s.Prog(x86.ACMOVQPS)
585                 } else if v.Op == ssa.OpAMD64CMOVLNEF {
586                         q = s.Prog(x86.ACMOVLPS)
587                 } else {
588                         q = s.Prog(x86.ACMOVWPS)
589                 }
590                 q.From.Type = obj.TYPE_REG
591                 q.From.Reg = v.Args[1].Reg()
592                 q.To.Type = obj.TYPE_REG
593                 q.To.Reg = v.Reg()
594
595         case ssa.OpAMD64CMOVQEQF, ssa.OpAMD64CMOVLEQF, ssa.OpAMD64CMOVWEQF:
596                 // Flag condition: ZERO && !PARITY
597                 // Generate:
598                 //   MOV      SRC,TMP
599                 //   CMOV*NE  DST,TMP
600                 //   CMOV*PC  TMP,DST
601                 //
602                 // TODO(rasky): we could generate:
603                 //   CMOV*NE  DST,SRC
604                 //   CMOV*PC  SRC,DST
605                 // But this requires a way for regalloc to know that SRC might be
606                 // clobbered by this instruction.
607                 t := v.RegTmp()
608                 opregreg(s, moveByType(v.Type), t, v.Args[1].Reg())
609
610                 p := s.Prog(v.Op.Asm())
611                 p.From.Type = obj.TYPE_REG
612                 p.From.Reg = v.Reg()
613                 p.To.Type = obj.TYPE_REG
614                 p.To.Reg = t
615                 var q *obj.Prog
616                 if v.Op == ssa.OpAMD64CMOVQEQF {
617                         q = s.Prog(x86.ACMOVQPC)
618                 } else if v.Op == ssa.OpAMD64CMOVLEQF {
619                         q = s.Prog(x86.ACMOVLPC)
620                 } else {
621                         q = s.Prog(x86.ACMOVWPC)
622                 }
623                 q.From.Type = obj.TYPE_REG
624                 q.From.Reg = t
625                 q.To.Type = obj.TYPE_REG
626                 q.To.Reg = v.Reg()
627
628         case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst:
629                 r := v.Reg()
630                 p := s.Prog(v.Op.Asm())
631                 p.From.Type = obj.TYPE_CONST
632                 p.From.Offset = v.AuxInt
633                 p.To.Type = obj.TYPE_REG
634                 p.To.Reg = r
635                 p.AddRestSourceReg(v.Args[0].Reg())
636
637         case ssa.OpAMD64ANDQconst:
638                 asm := v.Op.Asm()
639                 // If the constant is positive and fits into 32 bits, use ANDL.
640                 // This saves a few bytes of encoding.
641                 if 0 <= v.AuxInt && v.AuxInt <= (1<<32-1) {
642                         asm = x86.AANDL
643                 }
644                 p := s.Prog(asm)
645                 p.From.Type = obj.TYPE_CONST
646                 p.From.Offset = v.AuxInt
647                 p.To.Type = obj.TYPE_REG
648                 p.To.Reg = v.Reg()
649
650         case ssa.OpAMD64SUBQconst, ssa.OpAMD64SUBLconst,
651                 ssa.OpAMD64ANDLconst,
652                 ssa.OpAMD64ORQconst, ssa.OpAMD64ORLconst,
653                 ssa.OpAMD64XORQconst, ssa.OpAMD64XORLconst,
654                 ssa.OpAMD64SHLQconst, ssa.OpAMD64SHLLconst,
655                 ssa.OpAMD64SHRQconst, ssa.OpAMD64SHRLconst, ssa.OpAMD64SHRWconst, ssa.OpAMD64SHRBconst,
656                 ssa.OpAMD64SARQconst, ssa.OpAMD64SARLconst, ssa.OpAMD64SARWconst, ssa.OpAMD64SARBconst,
657                 ssa.OpAMD64ROLQconst, ssa.OpAMD64ROLLconst, ssa.OpAMD64ROLWconst, ssa.OpAMD64ROLBconst:
658                 p := s.Prog(v.Op.Asm())
659                 p.From.Type = obj.TYPE_CONST
660                 p.From.Offset = v.AuxInt
661                 p.To.Type = obj.TYPE_REG
662                 p.To.Reg = v.Reg()
663         case ssa.OpAMD64SBBQcarrymask, ssa.OpAMD64SBBLcarrymask:
664                 r := v.Reg()
665                 p := s.Prog(v.Op.Asm())
666                 p.From.Type = obj.TYPE_REG
667                 p.From.Reg = r
668                 p.To.Type = obj.TYPE_REG
669                 p.To.Reg = r
670         case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8,
671                 ssa.OpAMD64LEAL1, ssa.OpAMD64LEAL2, ssa.OpAMD64LEAL4, ssa.OpAMD64LEAL8,
672                 ssa.OpAMD64LEAW1, ssa.OpAMD64LEAW2, ssa.OpAMD64LEAW4, ssa.OpAMD64LEAW8:
673                 p := s.Prog(v.Op.Asm())
674                 memIdx(&p.From, v)
675                 o := v.Reg()
676                 p.To.Type = obj.TYPE_REG
677                 p.To.Reg = o
678                 if v.AuxInt != 0 && v.Aux == nil {
679                         // Emit an additional LEA to add the displacement instead of creating a slow 3 operand LEA.
680                         switch v.Op {
681                         case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8:
682                                 p = s.Prog(x86.ALEAQ)
683                         case ssa.OpAMD64LEAL1, ssa.OpAMD64LEAL2, ssa.OpAMD64LEAL4, ssa.OpAMD64LEAL8:
684                                 p = s.Prog(x86.ALEAL)
685                         case ssa.OpAMD64LEAW1, ssa.OpAMD64LEAW2, ssa.OpAMD64LEAW4, ssa.OpAMD64LEAW8:
686                                 p = s.Prog(x86.ALEAW)
687                         }
688                         p.From.Type = obj.TYPE_MEM
689                         p.From.Reg = o
690                         p.To.Type = obj.TYPE_REG
691                         p.To.Reg = o
692                 }
693                 ssagen.AddAux(&p.From, v)
694         case ssa.OpAMD64LEAQ, ssa.OpAMD64LEAL, ssa.OpAMD64LEAW:
695                 p := s.Prog(v.Op.Asm())
696                 p.From.Type = obj.TYPE_MEM
697                 p.From.Reg = v.Args[0].Reg()
698                 ssagen.AddAux(&p.From, v)
699                 p.To.Type = obj.TYPE_REG
700                 p.To.Reg = v.Reg()
701         case ssa.OpAMD64CMPQ, ssa.OpAMD64CMPL, ssa.OpAMD64CMPW, ssa.OpAMD64CMPB,
702                 ssa.OpAMD64TESTQ, ssa.OpAMD64TESTL, ssa.OpAMD64TESTW, ssa.OpAMD64TESTB,
703                 ssa.OpAMD64BTL, ssa.OpAMD64BTQ:
704                 opregreg(s, v.Op.Asm(), v.Args[1].Reg(), v.Args[0].Reg())
705         case ssa.OpAMD64UCOMISS, ssa.OpAMD64UCOMISD:
706                 // Go assembler has swapped operands for UCOMISx relative to CMP,
707                 // must account for that right here.
708                 opregreg(s, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg())
709         case ssa.OpAMD64CMPQconst, ssa.OpAMD64CMPLconst, ssa.OpAMD64CMPWconst, ssa.OpAMD64CMPBconst:
710                 p := s.Prog(v.Op.Asm())
711                 p.From.Type = obj.TYPE_REG
712                 p.From.Reg = v.Args[0].Reg()
713                 p.To.Type = obj.TYPE_CONST
714                 p.To.Offset = v.AuxInt
715         case ssa.OpAMD64BTLconst, ssa.OpAMD64BTQconst,
716                 ssa.OpAMD64TESTQconst, ssa.OpAMD64TESTLconst, ssa.OpAMD64TESTWconst, ssa.OpAMD64TESTBconst,
717                 ssa.OpAMD64BTSLconst, ssa.OpAMD64BTSQconst,
718                 ssa.OpAMD64BTCLconst, ssa.OpAMD64BTCQconst,
719                 ssa.OpAMD64BTRLconst, ssa.OpAMD64BTRQconst:
720                 op := v.Op
721                 if op == ssa.OpAMD64BTQconst && v.AuxInt < 32 {
722                         // Emit 32-bit version because it's shorter
723                         op = ssa.OpAMD64BTLconst
724                 }
725                 p := s.Prog(op.Asm())
726                 p.From.Type = obj.TYPE_CONST
727                 p.From.Offset = v.AuxInt
728                 p.To.Type = obj.TYPE_REG
729                 p.To.Reg = v.Args[0].Reg()
730         case ssa.OpAMD64CMPQload, ssa.OpAMD64CMPLload, ssa.OpAMD64CMPWload, ssa.OpAMD64CMPBload:
731                 p := s.Prog(v.Op.Asm())
732                 p.From.Type = obj.TYPE_MEM
733                 p.From.Reg = v.Args[0].Reg()
734                 ssagen.AddAux(&p.From, v)
735                 p.To.Type = obj.TYPE_REG
736                 p.To.Reg = v.Args[1].Reg()
737         case ssa.OpAMD64CMPQconstload, ssa.OpAMD64CMPLconstload, ssa.OpAMD64CMPWconstload, ssa.OpAMD64CMPBconstload:
738                 sc := v.AuxValAndOff()
739                 p := s.Prog(v.Op.Asm())
740                 p.From.Type = obj.TYPE_MEM
741                 p.From.Reg = v.Args[0].Reg()
742                 ssagen.AddAux2(&p.From, v, sc.Off64())
743                 p.To.Type = obj.TYPE_CONST
744                 p.To.Offset = sc.Val64()
745         case ssa.OpAMD64CMPQloadidx8, ssa.OpAMD64CMPQloadidx1, ssa.OpAMD64CMPLloadidx4, ssa.OpAMD64CMPLloadidx1, ssa.OpAMD64CMPWloadidx2, ssa.OpAMD64CMPWloadidx1, ssa.OpAMD64CMPBloadidx1:
746                 p := s.Prog(v.Op.Asm())
747                 memIdx(&p.From, v)
748                 ssagen.AddAux(&p.From, v)
749                 p.To.Type = obj.TYPE_REG
750                 p.To.Reg = v.Args[2].Reg()
751         case ssa.OpAMD64CMPQconstloadidx8, ssa.OpAMD64CMPQconstloadidx1, ssa.OpAMD64CMPLconstloadidx4, ssa.OpAMD64CMPLconstloadidx1, ssa.OpAMD64CMPWconstloadidx2, ssa.OpAMD64CMPWconstloadidx1, ssa.OpAMD64CMPBconstloadidx1:
752                 sc := v.AuxValAndOff()
753                 p := s.Prog(v.Op.Asm())
754                 memIdx(&p.From, v)
755                 ssagen.AddAux2(&p.From, v, sc.Off64())
756                 p.To.Type = obj.TYPE_CONST
757                 p.To.Offset = sc.Val64()
758         case ssa.OpAMD64MOVLconst, ssa.OpAMD64MOVQconst:
759                 x := v.Reg()
760
761                 // If flags aren't live (indicated by v.Aux == nil),
762                 // then we can rewrite MOV $0, AX into XOR AX, AX.
763                 if v.AuxInt == 0 && v.Aux == nil {
764                         opregreg(s, x86.AXORL, x, x)
765                         break
766                 }
767
768                 asm := v.Op.Asm()
769                 // Use MOVL to move a small constant into a register
770                 // when the constant is positive and fits into 32 bits.
771                 if 0 <= v.AuxInt && v.AuxInt <= (1<<32-1) {
772                         // The upper 32bit are zeroed automatically when using MOVL.
773                         asm = x86.AMOVL
774                 }
775                 p := s.Prog(asm)
776                 p.From.Type = obj.TYPE_CONST
777                 p.From.Offset = v.AuxInt
778                 p.To.Type = obj.TYPE_REG
779                 p.To.Reg = x
780         case ssa.OpAMD64MOVSSconst, ssa.OpAMD64MOVSDconst:
781                 x := v.Reg()
782                 p := s.Prog(v.Op.Asm())
783                 p.From.Type = obj.TYPE_FCONST
784                 p.From.Val = math.Float64frombits(uint64(v.AuxInt))
785                 p.To.Type = obj.TYPE_REG
786                 p.To.Reg = x
787         case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, ssa.OpAMD64MOVOload,
788                 ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload,
789                 ssa.OpAMD64MOVBEQload, ssa.OpAMD64MOVBELload:
790                 p := s.Prog(v.Op.Asm())
791                 p.From.Type = obj.TYPE_MEM
792                 p.From.Reg = v.Args[0].Reg()
793                 ssagen.AddAux(&p.From, v)
794                 p.To.Type = obj.TYPE_REG
795                 p.To.Reg = v.Reg()
796         case ssa.OpAMD64MOVBloadidx1, ssa.OpAMD64MOVWloadidx1, ssa.OpAMD64MOVLloadidx1, ssa.OpAMD64MOVQloadidx1, ssa.OpAMD64MOVSSloadidx1, ssa.OpAMD64MOVSDloadidx1,
797                 ssa.OpAMD64MOVQloadidx8, ssa.OpAMD64MOVSDloadidx8, ssa.OpAMD64MOVLloadidx8, ssa.OpAMD64MOVLloadidx4, ssa.OpAMD64MOVSSloadidx4, ssa.OpAMD64MOVWloadidx2,
798                 ssa.OpAMD64MOVBELloadidx1, ssa.OpAMD64MOVBELloadidx4, ssa.OpAMD64MOVBELloadidx8, ssa.OpAMD64MOVBEQloadidx1, ssa.OpAMD64MOVBEQloadidx8:
799                 p := s.Prog(v.Op.Asm())
800                 memIdx(&p.From, v)
801                 ssagen.AddAux(&p.From, v)
802                 p.To.Type = obj.TYPE_REG
803                 p.To.Reg = v.Reg()
804         case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVOstore,
805                 ssa.OpAMD64ADDQmodify, ssa.OpAMD64SUBQmodify, ssa.OpAMD64ANDQmodify, ssa.OpAMD64ORQmodify, ssa.OpAMD64XORQmodify,
806                 ssa.OpAMD64ADDLmodify, ssa.OpAMD64SUBLmodify, ssa.OpAMD64ANDLmodify, ssa.OpAMD64ORLmodify, ssa.OpAMD64XORLmodify,
807                 ssa.OpAMD64MOVBEQstore, ssa.OpAMD64MOVBELstore, ssa.OpAMD64MOVBEWstore:
808                 p := s.Prog(v.Op.Asm())
809                 p.From.Type = obj.TYPE_REG
810                 p.From.Reg = v.Args[1].Reg()
811                 p.To.Type = obj.TYPE_MEM
812                 p.To.Reg = v.Args[0].Reg()
813                 ssagen.AddAux(&p.To, v)
814         case ssa.OpAMD64MOVBstoreidx1, ssa.OpAMD64MOVWstoreidx1, ssa.OpAMD64MOVLstoreidx1, ssa.OpAMD64MOVQstoreidx1, ssa.OpAMD64MOVSSstoreidx1, ssa.OpAMD64MOVSDstoreidx1,
815                 ssa.OpAMD64MOVQstoreidx8, ssa.OpAMD64MOVSDstoreidx8, ssa.OpAMD64MOVLstoreidx8, ssa.OpAMD64MOVSSstoreidx4, ssa.OpAMD64MOVLstoreidx4, ssa.OpAMD64MOVWstoreidx2,
816                 ssa.OpAMD64ADDLmodifyidx1, ssa.OpAMD64ADDLmodifyidx4, ssa.OpAMD64ADDLmodifyidx8, ssa.OpAMD64ADDQmodifyidx1, ssa.OpAMD64ADDQmodifyidx8,
817                 ssa.OpAMD64SUBLmodifyidx1, ssa.OpAMD64SUBLmodifyidx4, ssa.OpAMD64SUBLmodifyidx8, ssa.OpAMD64SUBQmodifyidx1, ssa.OpAMD64SUBQmodifyidx8,
818                 ssa.OpAMD64ANDLmodifyidx1, ssa.OpAMD64ANDLmodifyidx4, ssa.OpAMD64ANDLmodifyidx8, ssa.OpAMD64ANDQmodifyidx1, ssa.OpAMD64ANDQmodifyidx8,
819                 ssa.OpAMD64ORLmodifyidx1, ssa.OpAMD64ORLmodifyidx4, ssa.OpAMD64ORLmodifyidx8, ssa.OpAMD64ORQmodifyidx1, ssa.OpAMD64ORQmodifyidx8,
820                 ssa.OpAMD64XORLmodifyidx1, ssa.OpAMD64XORLmodifyidx4, ssa.OpAMD64XORLmodifyidx8, ssa.OpAMD64XORQmodifyidx1, ssa.OpAMD64XORQmodifyidx8,
821                 ssa.OpAMD64MOVBEWstoreidx1, ssa.OpAMD64MOVBEWstoreidx2, ssa.OpAMD64MOVBELstoreidx1, ssa.OpAMD64MOVBELstoreidx4, ssa.OpAMD64MOVBELstoreidx8, ssa.OpAMD64MOVBEQstoreidx1, ssa.OpAMD64MOVBEQstoreidx8:
822                 p := s.Prog(v.Op.Asm())
823                 p.From.Type = obj.TYPE_REG
824                 p.From.Reg = v.Args[2].Reg()
825                 memIdx(&p.To, v)
826                 ssagen.AddAux(&p.To, v)
827         case ssa.OpAMD64ADDQconstmodify, ssa.OpAMD64ADDLconstmodify:
828                 sc := v.AuxValAndOff()
829                 off := sc.Off64()
830                 val := sc.Val()
831                 if val == 1 || val == -1 {
832                         var asm obj.As
833                         if v.Op == ssa.OpAMD64ADDQconstmodify {
834                                 if val == 1 {
835                                         asm = x86.AINCQ
836                                 } else {
837                                         asm = x86.ADECQ
838                                 }
839                         } else {
840                                 if val == 1 {
841                                         asm = x86.AINCL
842                                 } else {
843                                         asm = x86.ADECL
844                                 }
845                         }
846                         p := s.Prog(asm)
847                         p.To.Type = obj.TYPE_MEM
848                         p.To.Reg = v.Args[0].Reg()
849                         ssagen.AddAux2(&p.To, v, off)
850                         break
851                 }
852                 fallthrough
853         case ssa.OpAMD64ANDQconstmodify, ssa.OpAMD64ANDLconstmodify, ssa.OpAMD64ORQconstmodify, ssa.OpAMD64ORLconstmodify,
854                 ssa.OpAMD64XORQconstmodify, ssa.OpAMD64XORLconstmodify:
855                 sc := v.AuxValAndOff()
856                 off := sc.Off64()
857                 val := sc.Val64()
858                 p := s.Prog(v.Op.Asm())
859                 p.From.Type = obj.TYPE_CONST
860                 p.From.Offset = val
861                 p.To.Type = obj.TYPE_MEM
862                 p.To.Reg = v.Args[0].Reg()
863                 ssagen.AddAux2(&p.To, v, off)
864
865         case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst:
866                 p := s.Prog(v.Op.Asm())
867                 p.From.Type = obj.TYPE_CONST
868                 sc := v.AuxValAndOff()
869                 p.From.Offset = sc.Val64()
870                 p.To.Type = obj.TYPE_MEM
871                 p.To.Reg = v.Args[0].Reg()
872                 ssagen.AddAux2(&p.To, v, sc.Off64())
873         case ssa.OpAMD64MOVOstoreconst:
874                 sc := v.AuxValAndOff()
875                 if sc.Val() != 0 {
876                         v.Fatalf("MOVO for non zero constants not implemented: %s", v.LongString())
877                 }
878
879                 if s.ABI != obj.ABIInternal {
880                         // zero X15 manually
881                         opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
882                 }
883                 p := s.Prog(v.Op.Asm())
884                 p.From.Type = obj.TYPE_REG
885                 p.From.Reg = x86.REG_X15
886                 p.To.Type = obj.TYPE_MEM
887                 p.To.Reg = v.Args[0].Reg()
888                 ssagen.AddAux2(&p.To, v, sc.Off64())
889
890         case ssa.OpAMD64MOVQstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx8, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx4, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx2, ssa.OpAMD64MOVBstoreconstidx1,
891                 ssa.OpAMD64ADDLconstmodifyidx1, ssa.OpAMD64ADDLconstmodifyidx4, ssa.OpAMD64ADDLconstmodifyidx8, ssa.OpAMD64ADDQconstmodifyidx1, ssa.OpAMD64ADDQconstmodifyidx8,
892                 ssa.OpAMD64ANDLconstmodifyidx1, ssa.OpAMD64ANDLconstmodifyidx4, ssa.OpAMD64ANDLconstmodifyidx8, ssa.OpAMD64ANDQconstmodifyidx1, ssa.OpAMD64ANDQconstmodifyidx8,
893                 ssa.OpAMD64ORLconstmodifyidx1, ssa.OpAMD64ORLconstmodifyidx4, ssa.OpAMD64ORLconstmodifyidx8, ssa.OpAMD64ORQconstmodifyidx1, ssa.OpAMD64ORQconstmodifyidx8,
894                 ssa.OpAMD64XORLconstmodifyidx1, ssa.OpAMD64XORLconstmodifyidx4, ssa.OpAMD64XORLconstmodifyidx8, ssa.OpAMD64XORQconstmodifyidx1, ssa.OpAMD64XORQconstmodifyidx8:
895                 p := s.Prog(v.Op.Asm())
896                 p.From.Type = obj.TYPE_CONST
897                 sc := v.AuxValAndOff()
898                 p.From.Offset = sc.Val64()
899                 switch {
900                 case p.As == x86.AADDQ && p.From.Offset == 1:
901                         p.As = x86.AINCQ
902                         p.From.Type = obj.TYPE_NONE
903                 case p.As == x86.AADDQ && p.From.Offset == -1:
904                         p.As = x86.ADECQ
905                         p.From.Type = obj.TYPE_NONE
906                 case p.As == x86.AADDL && p.From.Offset == 1:
907                         p.As = x86.AINCL
908                         p.From.Type = obj.TYPE_NONE
909                 case p.As == x86.AADDL && p.From.Offset == -1:
910                         p.As = x86.ADECL
911                         p.From.Type = obj.TYPE_NONE
912                 }
913                 memIdx(&p.To, v)
914                 ssagen.AddAux2(&p.To, v, sc.Off64())
915         case ssa.OpAMD64MOVLQSX, ssa.OpAMD64MOVWQSX, ssa.OpAMD64MOVBQSX, ssa.OpAMD64MOVLQZX, ssa.OpAMD64MOVWQZX, ssa.OpAMD64MOVBQZX,
916                 ssa.OpAMD64CVTTSS2SL, ssa.OpAMD64CVTTSD2SL, ssa.OpAMD64CVTTSS2SQ, ssa.OpAMD64CVTTSD2SQ,
917                 ssa.OpAMD64CVTSS2SD, ssa.OpAMD64CVTSD2SS:
918                 opregreg(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg())
919         case ssa.OpAMD64CVTSL2SD, ssa.OpAMD64CVTSQ2SD, ssa.OpAMD64CVTSQ2SS, ssa.OpAMD64CVTSL2SS:
920                 r := v.Reg()
921                 // Break false dependency on destination register.
922                 opregreg(s, x86.AXORPS, r, r)
923                 opregreg(s, v.Op.Asm(), r, v.Args[0].Reg())
924         case ssa.OpAMD64MOVQi2f, ssa.OpAMD64MOVQf2i, ssa.OpAMD64MOVLi2f, ssa.OpAMD64MOVLf2i:
925                 var p *obj.Prog
926                 switch v.Op {
927                 case ssa.OpAMD64MOVQi2f, ssa.OpAMD64MOVQf2i:
928                         p = s.Prog(x86.AMOVQ)
929                 case ssa.OpAMD64MOVLi2f, ssa.OpAMD64MOVLf2i:
930                         p = s.Prog(x86.AMOVL)
931                 }
932                 p.From.Type = obj.TYPE_REG
933                 p.From.Reg = v.Args[0].Reg()
934                 p.To.Type = obj.TYPE_REG
935                 p.To.Reg = v.Reg()
936         case ssa.OpAMD64ADDQload, ssa.OpAMD64ADDLload, ssa.OpAMD64SUBQload, ssa.OpAMD64SUBLload,
937                 ssa.OpAMD64ANDQload, ssa.OpAMD64ANDLload, ssa.OpAMD64ORQload, ssa.OpAMD64ORLload,
938                 ssa.OpAMD64XORQload, ssa.OpAMD64XORLload, ssa.OpAMD64ADDSDload, ssa.OpAMD64ADDSSload,
939                 ssa.OpAMD64SUBSDload, ssa.OpAMD64SUBSSload, ssa.OpAMD64MULSDload, ssa.OpAMD64MULSSload,
940                 ssa.OpAMD64DIVSDload, ssa.OpAMD64DIVSSload:
941                 p := s.Prog(v.Op.Asm())
942                 p.From.Type = obj.TYPE_MEM
943                 p.From.Reg = v.Args[1].Reg()
944                 ssagen.AddAux(&p.From, v)
945                 p.To.Type = obj.TYPE_REG
946                 p.To.Reg = v.Reg()
947         case ssa.OpAMD64ADDLloadidx1, ssa.OpAMD64ADDLloadidx4, ssa.OpAMD64ADDLloadidx8, ssa.OpAMD64ADDQloadidx1, ssa.OpAMD64ADDQloadidx8,
948                 ssa.OpAMD64SUBLloadidx1, ssa.OpAMD64SUBLloadidx4, ssa.OpAMD64SUBLloadidx8, ssa.OpAMD64SUBQloadidx1, ssa.OpAMD64SUBQloadidx8,
949                 ssa.OpAMD64ANDLloadidx1, ssa.OpAMD64ANDLloadidx4, ssa.OpAMD64ANDLloadidx8, ssa.OpAMD64ANDQloadidx1, ssa.OpAMD64ANDQloadidx8,
950                 ssa.OpAMD64ORLloadidx1, ssa.OpAMD64ORLloadidx4, ssa.OpAMD64ORLloadidx8, ssa.OpAMD64ORQloadidx1, ssa.OpAMD64ORQloadidx8,
951                 ssa.OpAMD64XORLloadidx1, ssa.OpAMD64XORLloadidx4, ssa.OpAMD64XORLloadidx8, ssa.OpAMD64XORQloadidx1, ssa.OpAMD64XORQloadidx8,
952                 ssa.OpAMD64ADDSSloadidx1, ssa.OpAMD64ADDSSloadidx4, ssa.OpAMD64ADDSDloadidx1, ssa.OpAMD64ADDSDloadidx8,
953                 ssa.OpAMD64SUBSSloadidx1, ssa.OpAMD64SUBSSloadidx4, ssa.OpAMD64SUBSDloadidx1, ssa.OpAMD64SUBSDloadidx8,
954                 ssa.OpAMD64MULSSloadidx1, ssa.OpAMD64MULSSloadidx4, ssa.OpAMD64MULSDloadidx1, ssa.OpAMD64MULSDloadidx8,
955                 ssa.OpAMD64DIVSSloadidx1, ssa.OpAMD64DIVSSloadidx4, ssa.OpAMD64DIVSDloadidx1, ssa.OpAMD64DIVSDloadidx8:
956                 p := s.Prog(v.Op.Asm())
957
958                 r, i := v.Args[1].Reg(), v.Args[2].Reg()
959                 p.From.Type = obj.TYPE_MEM
960                 p.From.Scale = v.Op.Scale()
961                 if p.From.Scale == 1 && i == x86.REG_SP {
962                         r, i = i, r
963                 }
964                 p.From.Reg = r
965                 p.From.Index = i
966
967                 ssagen.AddAux(&p.From, v)
968                 p.To.Type = obj.TYPE_REG
969                 p.To.Reg = v.Reg()
970         case ssa.OpAMD64DUFFZERO:
971                 if s.ABI != obj.ABIInternal {
972                         // zero X15 manually
973                         opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
974                 }
975                 off := duffStart(v.AuxInt)
976                 adj := duffAdj(v.AuxInt)
977                 var p *obj.Prog
978                 if adj != 0 {
979                         p = s.Prog(x86.ALEAQ)
980                         p.From.Type = obj.TYPE_MEM
981                         p.From.Offset = adj
982                         p.From.Reg = x86.REG_DI
983                         p.To.Type = obj.TYPE_REG
984                         p.To.Reg = x86.REG_DI
985                 }
986                 p = s.Prog(obj.ADUFFZERO)
987                 p.To.Type = obj.TYPE_ADDR
988                 p.To.Sym = ir.Syms.Duffzero
989                 p.To.Offset = off
990         case ssa.OpAMD64DUFFCOPY:
991                 p := s.Prog(obj.ADUFFCOPY)
992                 p.To.Type = obj.TYPE_ADDR
993                 p.To.Sym = ir.Syms.Duffcopy
994                 if v.AuxInt%16 != 0 {
995                         v.Fatalf("bad DUFFCOPY AuxInt %v", v.AuxInt)
996                 }
997                 p.To.Offset = 14 * (64 - v.AuxInt/16)
998                 // 14 and 64 are magic constants.  14 is the number of bytes to encode:
999                 //      MOVUPS  (SI), X0
1000                 //      ADDQ    $16, SI
1001                 //      MOVUPS  X0, (DI)
1002                 //      ADDQ    $16, DI
1003                 // and 64 is the number of such blocks. See src/runtime/duff_amd64.s:duffcopy.
1004
1005         case ssa.OpCopy: // TODO: use MOVQreg for reg->reg copies instead of OpCopy?
1006                 if v.Type.IsMemory() {
1007                         return
1008                 }
1009                 x := v.Args[0].Reg()
1010                 y := v.Reg()
1011                 if x != y {
1012                         opregreg(s, moveByType(v.Type), y, x)
1013                 }
1014         case ssa.OpLoadReg:
1015                 if v.Type.IsFlags() {
1016                         v.Fatalf("load flags not implemented: %v", v.LongString())
1017                         return
1018                 }
1019                 p := s.Prog(loadByType(v.Type))
1020                 ssagen.AddrAuto(&p.From, v.Args[0])
1021                 p.To.Type = obj.TYPE_REG
1022                 p.To.Reg = v.Reg()
1023
1024         case ssa.OpStoreReg:
1025                 if v.Type.IsFlags() {
1026                         v.Fatalf("store flags not implemented: %v", v.LongString())
1027                         return
1028                 }
1029                 p := s.Prog(storeByType(v.Type))
1030                 p.From.Type = obj.TYPE_REG
1031                 p.From.Reg = v.Args[0].Reg()
1032                 ssagen.AddrAuto(&p.To, v)
1033         case ssa.OpAMD64LoweredHasCPUFeature:
1034                 p := s.Prog(x86.AMOVBLZX)
1035                 p.From.Type = obj.TYPE_MEM
1036                 ssagen.AddAux(&p.From, v)
1037                 p.To.Type = obj.TYPE_REG
1038                 p.To.Reg = v.Reg()
1039         case ssa.OpArgIntReg, ssa.OpArgFloatReg:
1040                 // The assembler needs to wrap the entry safepoint/stack growth code with spill/unspill
1041                 // The loop only runs once.
1042                 for _, ap := range v.Block.Func.RegArgs {
1043                         // Pass the spill/unspill information along to the assembler, offset by size of return PC pushed on stack.
1044                         addr := ssagen.SpillSlotAddr(ap, x86.REG_SP, v.Block.Func.Config.PtrSize)
1045                         s.FuncInfo().AddSpill(
1046                                 obj.RegSpill{Reg: ap.Reg, Addr: addr, Unspill: loadByType(ap.Type), Spill: storeByType(ap.Type)})
1047                 }
1048                 v.Block.Func.RegArgs = nil
1049                 ssagen.CheckArgReg(v)
1050         case ssa.OpAMD64LoweredGetClosurePtr:
1051                 // Closure pointer is DX.
1052                 ssagen.CheckLoweredGetClosurePtr(v)
1053         case ssa.OpAMD64LoweredGetG:
1054                 if s.ABI == obj.ABIInternal {
1055                         v.Fatalf("LoweredGetG should not appear in ABIInternal")
1056                 }
1057                 r := v.Reg()
1058                 getgFromTLS(s, r)
1059         case ssa.OpAMD64CALLstatic, ssa.OpAMD64CALLtail:
1060                 if s.ABI == obj.ABI0 && v.Aux.(*ssa.AuxCall).Fn.ABI() == obj.ABIInternal {
1061                         // zeroing X15 when entering ABIInternal from ABI0
1062                         if buildcfg.GOOS != "plan9" { // do not use SSE on Plan 9
1063                                 opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
1064                         }
1065                         // set G register from TLS
1066                         getgFromTLS(s, x86.REG_R14)
1067                 }
1068                 if v.Op == ssa.OpAMD64CALLtail {
1069                         s.TailCall(v)
1070                         break
1071                 }
1072                 s.Call(v)
1073                 if s.ABI == obj.ABIInternal && v.Aux.(*ssa.AuxCall).Fn.ABI() == obj.ABI0 {
1074                         // zeroing X15 when entering ABIInternal from ABI0
1075                         if buildcfg.GOOS != "plan9" { // do not use SSE on Plan 9
1076                                 opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
1077                         }
1078                         // set G register from TLS
1079                         getgFromTLS(s, x86.REG_R14)
1080                 }
1081         case ssa.OpAMD64CALLclosure, ssa.OpAMD64CALLinter:
1082                 s.Call(v)
1083
1084         case ssa.OpAMD64LoweredGetCallerPC:
1085                 p := s.Prog(x86.AMOVQ)
1086                 p.From.Type = obj.TYPE_MEM
1087                 p.From.Offset = -8 // PC is stored 8 bytes below first parameter.
1088                 p.From.Name = obj.NAME_PARAM
1089                 p.To.Type = obj.TYPE_REG
1090                 p.To.Reg = v.Reg()
1091
1092         case ssa.OpAMD64LoweredGetCallerSP:
1093                 // caller's SP is the address of the first arg
1094                 mov := x86.AMOVQ
1095                 if types.PtrSize == 4 {
1096                         mov = x86.AMOVL
1097                 }
1098                 p := s.Prog(mov)
1099                 p.From.Type = obj.TYPE_ADDR
1100                 p.From.Offset = -base.Ctxt.Arch.FixedFrameSize // 0 on amd64, just to be consistent with other architectures
1101                 p.From.Name = obj.NAME_PARAM
1102                 p.To.Type = obj.TYPE_REG
1103                 p.To.Reg = v.Reg()
1104
1105         case ssa.OpAMD64LoweredWB:
1106                 p := s.Prog(obj.ACALL)
1107                 p.To.Type = obj.TYPE_MEM
1108                 p.To.Name = obj.NAME_EXTERN
1109                 // AuxInt encodes how many buffer entries we need.
1110                 p.To.Sym = ir.Syms.GCWriteBarrier[v.AuxInt-1]
1111
1112         case ssa.OpAMD64LoweredPanicBoundsA, ssa.OpAMD64LoweredPanicBoundsB, ssa.OpAMD64LoweredPanicBoundsC:
1113                 p := s.Prog(obj.ACALL)
1114                 p.To.Type = obj.TYPE_MEM
1115                 p.To.Name = obj.NAME_EXTERN
1116                 p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt]
1117                 s.UseArgs(int64(2 * types.PtrSize)) // space used in callee args area by assembly stubs
1118
1119         case ssa.OpAMD64NEGQ, ssa.OpAMD64NEGL,
1120                 ssa.OpAMD64BSWAPQ, ssa.OpAMD64BSWAPL,
1121                 ssa.OpAMD64NOTQ, ssa.OpAMD64NOTL:
1122                 p := s.Prog(v.Op.Asm())
1123                 p.To.Type = obj.TYPE_REG
1124                 p.To.Reg = v.Reg()
1125
1126         case ssa.OpAMD64NEGLflags:
1127                 p := s.Prog(v.Op.Asm())
1128                 p.To.Type = obj.TYPE_REG
1129                 p.To.Reg = v.Reg0()
1130
1131         case ssa.OpAMD64BSFQ, ssa.OpAMD64BSRQ, ssa.OpAMD64BSFL, ssa.OpAMD64BSRL, ssa.OpAMD64SQRTSD, ssa.OpAMD64SQRTSS:
1132                 p := s.Prog(v.Op.Asm())
1133                 p.From.Type = obj.TYPE_REG
1134                 p.From.Reg = v.Args[0].Reg()
1135                 p.To.Type = obj.TYPE_REG
1136                 switch v.Op {
1137                 case ssa.OpAMD64BSFQ, ssa.OpAMD64BSRQ:
1138                         p.To.Reg = v.Reg0()
1139                 case ssa.OpAMD64BSFL, ssa.OpAMD64BSRL, ssa.OpAMD64SQRTSD, ssa.OpAMD64SQRTSS:
1140                         p.To.Reg = v.Reg()
1141                 }
1142         case ssa.OpAMD64ROUNDSD:
1143                 p := s.Prog(v.Op.Asm())
1144                 val := v.AuxInt
1145                 // 0 means math.RoundToEven, 1 Floor, 2 Ceil, 3 Trunc
1146                 if val < 0 || val > 3 {
1147                         v.Fatalf("Invalid rounding mode")
1148                 }
1149                 p.From.Offset = val
1150                 p.From.Type = obj.TYPE_CONST
1151                 p.AddRestSourceReg(v.Args[0].Reg())
1152                 p.To.Type = obj.TYPE_REG
1153                 p.To.Reg = v.Reg()
1154         case ssa.OpAMD64POPCNTQ, ssa.OpAMD64POPCNTL,
1155                 ssa.OpAMD64TZCNTQ, ssa.OpAMD64TZCNTL,
1156                 ssa.OpAMD64LZCNTQ, ssa.OpAMD64LZCNTL:
1157                 if v.Args[0].Reg() != v.Reg() {
1158                         // POPCNT/TZCNT/LZCNT have a false dependency on the destination register on Intel cpus.
1159                         // TZCNT/LZCNT problem affects pre-Skylake models. See discussion at https://gcc.gnu.org/bugzilla/show_bug.cgi?id=62011#c7.
1160                         // Xor register with itself to break the dependency.
1161                         opregreg(s, x86.AXORL, v.Reg(), v.Reg())
1162                 }
1163                 p := s.Prog(v.Op.Asm())
1164                 p.From.Type = obj.TYPE_REG
1165                 p.From.Reg = v.Args[0].Reg()
1166                 p.To.Type = obj.TYPE_REG
1167                 p.To.Reg = v.Reg()
1168
1169         case ssa.OpAMD64SETEQ, ssa.OpAMD64SETNE,
1170                 ssa.OpAMD64SETL, ssa.OpAMD64SETLE,
1171                 ssa.OpAMD64SETG, ssa.OpAMD64SETGE,
1172                 ssa.OpAMD64SETGF, ssa.OpAMD64SETGEF,
1173                 ssa.OpAMD64SETB, ssa.OpAMD64SETBE,
1174                 ssa.OpAMD64SETORD, ssa.OpAMD64SETNAN,
1175                 ssa.OpAMD64SETA, ssa.OpAMD64SETAE,
1176                 ssa.OpAMD64SETO:
1177                 p := s.Prog(v.Op.Asm())
1178                 p.To.Type = obj.TYPE_REG
1179                 p.To.Reg = v.Reg()
1180
1181         case ssa.OpAMD64SETEQstore, ssa.OpAMD64SETNEstore,
1182                 ssa.OpAMD64SETLstore, ssa.OpAMD64SETLEstore,
1183                 ssa.OpAMD64SETGstore, ssa.OpAMD64SETGEstore,
1184                 ssa.OpAMD64SETBstore, ssa.OpAMD64SETBEstore,
1185                 ssa.OpAMD64SETAstore, ssa.OpAMD64SETAEstore:
1186                 p := s.Prog(v.Op.Asm())
1187                 p.To.Type = obj.TYPE_MEM
1188                 p.To.Reg = v.Args[0].Reg()
1189                 ssagen.AddAux(&p.To, v)
1190
1191         case ssa.OpAMD64SETEQstoreidx1, ssa.OpAMD64SETNEstoreidx1,
1192                 ssa.OpAMD64SETLstoreidx1, ssa.OpAMD64SETLEstoreidx1,
1193                 ssa.OpAMD64SETGstoreidx1, ssa.OpAMD64SETGEstoreidx1,
1194                 ssa.OpAMD64SETBstoreidx1, ssa.OpAMD64SETBEstoreidx1,
1195                 ssa.OpAMD64SETAstoreidx1, ssa.OpAMD64SETAEstoreidx1:
1196                 p := s.Prog(v.Op.Asm())
1197                 memIdx(&p.To, v)
1198                 ssagen.AddAux(&p.To, v)
1199
1200         case ssa.OpAMD64SETNEF:
1201                 t := v.RegTmp()
1202                 p := s.Prog(v.Op.Asm())
1203                 p.To.Type = obj.TYPE_REG
1204                 p.To.Reg = v.Reg()
1205                 q := s.Prog(x86.ASETPS)
1206                 q.To.Type = obj.TYPE_REG
1207                 q.To.Reg = t
1208                 // ORL avoids partial register write and is smaller than ORQ, used by old compiler
1209                 opregreg(s, x86.AORL, v.Reg(), t)
1210
1211         case ssa.OpAMD64SETEQF:
1212                 t := v.RegTmp()
1213                 p := s.Prog(v.Op.Asm())
1214                 p.To.Type = obj.TYPE_REG
1215                 p.To.Reg = v.Reg()
1216                 q := s.Prog(x86.ASETPC)
1217                 q.To.Type = obj.TYPE_REG
1218                 q.To.Reg = t
1219                 // ANDL avoids partial register write and is smaller than ANDQ, used by old compiler
1220                 opregreg(s, x86.AANDL, v.Reg(), t)
1221
1222         case ssa.OpAMD64InvertFlags:
1223                 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
1224         case ssa.OpAMD64FlagEQ, ssa.OpAMD64FlagLT_ULT, ssa.OpAMD64FlagLT_UGT, ssa.OpAMD64FlagGT_ULT, ssa.OpAMD64FlagGT_UGT:
1225                 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
1226         case ssa.OpAMD64AddTupleFirst32, ssa.OpAMD64AddTupleFirst64:
1227                 v.Fatalf("AddTupleFirst* should never make it to codegen %v", v.LongString())
1228         case ssa.OpAMD64REPSTOSQ:
1229                 s.Prog(x86.AREP)
1230                 s.Prog(x86.ASTOSQ)
1231         case ssa.OpAMD64REPMOVSQ:
1232                 s.Prog(x86.AREP)
1233                 s.Prog(x86.AMOVSQ)
1234         case ssa.OpAMD64LoweredNilCheck:
1235                 // Issue a load which will fault if the input is nil.
1236                 // TODO: We currently use the 2-byte instruction TESTB AX, (reg).
1237                 // Should we use the 3-byte TESTB $0, (reg) instead? It is larger
1238                 // but it doesn't have false dependency on AX.
1239                 // Or maybe allocate an output register and use MOVL (reg),reg2 ?
1240                 // That trades clobbering flags for clobbering a register.
1241                 p := s.Prog(x86.ATESTB)
1242                 p.From.Type = obj.TYPE_REG
1243                 p.From.Reg = x86.REG_AX
1244                 p.To.Type = obj.TYPE_MEM
1245                 p.To.Reg = v.Args[0].Reg()
1246                 if logopt.Enabled() {
1247                         logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
1248                 }
1249                 if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
1250                         base.WarnfAt(v.Pos, "generated nil check")
1251                 }
1252         case ssa.OpAMD64MOVBatomicload, ssa.OpAMD64MOVLatomicload, ssa.OpAMD64MOVQatomicload:
1253                 p := s.Prog(v.Op.Asm())
1254                 p.From.Type = obj.TYPE_MEM
1255                 p.From.Reg = v.Args[0].Reg()
1256                 ssagen.AddAux(&p.From, v)
1257                 p.To.Type = obj.TYPE_REG
1258                 p.To.Reg = v.Reg0()
1259         case ssa.OpAMD64XCHGB, ssa.OpAMD64XCHGL, ssa.OpAMD64XCHGQ:
1260                 p := s.Prog(v.Op.Asm())
1261                 p.From.Type = obj.TYPE_REG
1262                 p.From.Reg = v.Reg0()
1263                 p.To.Type = obj.TYPE_MEM
1264                 p.To.Reg = v.Args[1].Reg()
1265                 ssagen.AddAux(&p.To, v)
1266         case ssa.OpAMD64XADDLlock, ssa.OpAMD64XADDQlock:
1267                 s.Prog(x86.ALOCK)
1268                 p := s.Prog(v.Op.Asm())
1269                 p.From.Type = obj.TYPE_REG
1270                 p.From.Reg = v.Reg0()
1271                 p.To.Type = obj.TYPE_MEM
1272                 p.To.Reg = v.Args[1].Reg()
1273                 ssagen.AddAux(&p.To, v)
1274         case ssa.OpAMD64CMPXCHGLlock, ssa.OpAMD64CMPXCHGQlock:
1275                 if v.Args[1].Reg() != x86.REG_AX {
1276                         v.Fatalf("input[1] not in AX %s", v.LongString())
1277                 }
1278                 s.Prog(x86.ALOCK)
1279                 p := s.Prog(v.Op.Asm())
1280                 p.From.Type = obj.TYPE_REG
1281                 p.From.Reg = v.Args[2].Reg()
1282                 p.To.Type = obj.TYPE_MEM
1283                 p.To.Reg = v.Args[0].Reg()
1284                 ssagen.AddAux(&p.To, v)
1285                 p = s.Prog(x86.ASETEQ)
1286                 p.To.Type = obj.TYPE_REG
1287                 p.To.Reg = v.Reg0()
1288         case ssa.OpAMD64ANDBlock, ssa.OpAMD64ANDLlock, ssa.OpAMD64ORBlock, ssa.OpAMD64ORLlock:
1289                 s.Prog(x86.ALOCK)
1290                 p := s.Prog(v.Op.Asm())
1291                 p.From.Type = obj.TYPE_REG
1292                 p.From.Reg = v.Args[1].Reg()
1293                 p.To.Type = obj.TYPE_MEM
1294                 p.To.Reg = v.Args[0].Reg()
1295                 ssagen.AddAux(&p.To, v)
1296         case ssa.OpAMD64PrefetchT0, ssa.OpAMD64PrefetchNTA:
1297                 p := s.Prog(v.Op.Asm())
1298                 p.From.Type = obj.TYPE_MEM
1299                 p.From.Reg = v.Args[0].Reg()
1300         case ssa.OpClobber:
1301                 p := s.Prog(x86.AMOVL)
1302                 p.From.Type = obj.TYPE_CONST
1303                 p.From.Offset = 0xdeaddead
1304                 p.To.Type = obj.TYPE_MEM
1305                 p.To.Reg = x86.REG_SP
1306                 ssagen.AddAux(&p.To, v)
1307                 p = s.Prog(x86.AMOVL)
1308                 p.From.Type = obj.TYPE_CONST
1309                 p.From.Offset = 0xdeaddead
1310                 p.To.Type = obj.TYPE_MEM
1311                 p.To.Reg = x86.REG_SP
1312                 ssagen.AddAux(&p.To, v)
1313                 p.To.Offset += 4
1314         case ssa.OpClobberReg:
1315                 x := uint64(0xdeaddeaddeaddead)
1316                 p := s.Prog(x86.AMOVQ)
1317                 p.From.Type = obj.TYPE_CONST
1318                 p.From.Offset = int64(x)
1319                 p.To.Type = obj.TYPE_REG
1320                 p.To.Reg = v.Reg()
1321         default:
1322                 v.Fatalf("genValue not implemented: %s", v.LongString())
1323         }
1324 }
1325
1326 var blockJump = [...]struct {
1327         asm, invasm obj.As
1328 }{
1329         ssa.BlockAMD64EQ:  {x86.AJEQ, x86.AJNE},
1330         ssa.BlockAMD64NE:  {x86.AJNE, x86.AJEQ},
1331         ssa.BlockAMD64LT:  {x86.AJLT, x86.AJGE},
1332         ssa.BlockAMD64GE:  {x86.AJGE, x86.AJLT},
1333         ssa.BlockAMD64LE:  {x86.AJLE, x86.AJGT},
1334         ssa.BlockAMD64GT:  {x86.AJGT, x86.AJLE},
1335         ssa.BlockAMD64OS:  {x86.AJOS, x86.AJOC},
1336         ssa.BlockAMD64OC:  {x86.AJOC, x86.AJOS},
1337         ssa.BlockAMD64ULT: {x86.AJCS, x86.AJCC},
1338         ssa.BlockAMD64UGE: {x86.AJCC, x86.AJCS},
1339         ssa.BlockAMD64UGT: {x86.AJHI, x86.AJLS},
1340         ssa.BlockAMD64ULE: {x86.AJLS, x86.AJHI},
1341         ssa.BlockAMD64ORD: {x86.AJPC, x86.AJPS},
1342         ssa.BlockAMD64NAN: {x86.AJPS, x86.AJPC},
1343 }
1344
1345 var eqfJumps = [2][2]ssagen.IndexJump{
1346         {{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPS, Index: 1}}, // next == b.Succs[0]
1347         {{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPC, Index: 0}}, // next == b.Succs[1]
1348 }
1349 var nefJumps = [2][2]ssagen.IndexJump{
1350         {{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPC, Index: 1}}, // next == b.Succs[0]
1351         {{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPS, Index: 0}}, // next == b.Succs[1]
1352 }
1353
1354 func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
1355         switch b.Kind {
1356         case ssa.BlockPlain:
1357                 if b.Succs[0].Block() != next {
1358                         p := s.Prog(obj.AJMP)
1359                         p.To.Type = obj.TYPE_BRANCH
1360                         s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
1361                 }
1362         case ssa.BlockDefer:
1363                 // defer returns in rax:
1364                 // 0 if we should continue executing
1365                 // 1 if we should jump to deferreturn call
1366                 p := s.Prog(x86.ATESTL)
1367                 p.From.Type = obj.TYPE_REG
1368                 p.From.Reg = x86.REG_AX
1369                 p.To.Type = obj.TYPE_REG
1370                 p.To.Reg = x86.REG_AX
1371                 p = s.Prog(x86.AJNE)
1372                 p.To.Type = obj.TYPE_BRANCH
1373                 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[1].Block()})
1374                 if b.Succs[0].Block() != next {
1375                         p := s.Prog(obj.AJMP)
1376                         p.To.Type = obj.TYPE_BRANCH
1377                         s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
1378                 }
1379         case ssa.BlockExit, ssa.BlockRetJmp:
1380         case ssa.BlockRet:
1381                 s.Prog(obj.ARET)
1382
1383         case ssa.BlockAMD64EQF:
1384                 s.CombJump(b, next, &eqfJumps)
1385
1386         case ssa.BlockAMD64NEF:
1387                 s.CombJump(b, next, &nefJumps)
1388
1389         case ssa.BlockAMD64EQ, ssa.BlockAMD64NE,
1390                 ssa.BlockAMD64LT, ssa.BlockAMD64GE,
1391                 ssa.BlockAMD64LE, ssa.BlockAMD64GT,
1392                 ssa.BlockAMD64OS, ssa.BlockAMD64OC,
1393                 ssa.BlockAMD64ULT, ssa.BlockAMD64UGT,
1394                 ssa.BlockAMD64ULE, ssa.BlockAMD64UGE:
1395                 jmp := blockJump[b.Kind]
1396                 switch next {
1397                 case b.Succs[0].Block():
1398                         s.Br(jmp.invasm, b.Succs[1].Block())
1399                 case b.Succs[1].Block():
1400                         s.Br(jmp.asm, b.Succs[0].Block())
1401                 default:
1402                         if b.Likely != ssa.BranchUnlikely {
1403                                 s.Br(jmp.asm, b.Succs[0].Block())
1404                                 s.Br(obj.AJMP, b.Succs[1].Block())
1405                         } else {
1406                                 s.Br(jmp.invasm, b.Succs[1].Block())
1407                                 s.Br(obj.AJMP, b.Succs[0].Block())
1408                         }
1409                 }
1410
1411         case ssa.BlockAMD64JUMPTABLE:
1412                 // JMP      *(TABLE)(INDEX*8)
1413                 p := s.Prog(obj.AJMP)
1414                 p.To.Type = obj.TYPE_MEM
1415                 p.To.Reg = b.Controls[1].Reg()
1416                 p.To.Index = b.Controls[0].Reg()
1417                 p.To.Scale = 8
1418                 // Save jump tables for later resolution of the target blocks.
1419                 s.JumpTables = append(s.JumpTables, b)
1420
1421         default:
1422                 b.Fatalf("branch not implemented: %s", b.LongString())
1423         }
1424 }
1425
1426 func loadRegResult(s *ssagen.State, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
1427         p := s.Prog(loadByType(t))
1428         p.From.Type = obj.TYPE_MEM
1429         p.From.Name = obj.NAME_AUTO
1430         p.From.Sym = n.Linksym()
1431         p.From.Offset = n.FrameOffset() + off
1432         p.To.Type = obj.TYPE_REG
1433         p.To.Reg = reg
1434         return p
1435 }
1436
1437 func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
1438         p = pp.Append(p, storeByType(t), obj.TYPE_REG, reg, 0, obj.TYPE_MEM, 0, n.FrameOffset()+off)
1439         p.To.Name = obj.NAME_PARAM
1440         p.To.Sym = n.Linksym()
1441         p.Pos = p.Pos.WithNotStmt()
1442         return p
1443 }