1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
12 "cmd/compile/internal/base"
13 "cmd/compile/internal/ir"
14 "cmd/compile/internal/logopt"
15 "cmd/compile/internal/objw"
16 "cmd/compile/internal/ssa"
17 "cmd/compile/internal/ssagen"
18 "cmd/compile/internal/types"
20 "cmd/internal/obj/x86"
23 // ssaMarkMoves marks any MOVXconst ops that need to avoid clobbering flags.
24 func ssaMarkMoves(s *ssagen.State, b *ssa.Block) {
25 flive := b.FlagsLiveAtEnd
26 for _, c := range b.ControlValues() {
27 flive = c.Type.IsFlags() || flive
29 for i := len(b.Values) - 1; i >= 0; i-- {
31 if flive && (v.Op == ssa.OpAMD64MOVLconst || v.Op == ssa.OpAMD64MOVQconst) {
32 // The "mark" is any non-nil Aux value.
38 for _, a := range v.Args {
46 // loadByType returns the load instruction of the given type.
47 func loadByType(t *types.Type) obj.As {
48 // Avoid partial register write
57 // Otherwise, there's no difference between load and store opcodes.
61 // storeByType returns the store instruction of the given type.
62 func storeByType(t *types.Type) obj.As {
85 panic(fmt.Sprintf("bad store type %v", t))
88 // moveByType returns the reg->reg move instruction of the given type.
89 func moveByType(t *types.Type) obj.As {
91 // Moving the whole sse2 register is faster
92 // than moving just the correct low portion of it.
93 // There is no xmm->xmm move with 1 byte opcode,
94 // so use movups, which has 2 byte opcode.
99 // Avoids partial register write
108 return x86.AMOVUPS // int128s are in SSE registers
110 panic(fmt.Sprintf("bad int register width %d:%v", t.Size(), t))
115 // opregreg emits instructions for
117 // dest := dest(To) op src(From)
119 // and also returns the created obj.Prog so it
120 // may be further adjusted (offset, scale, etc).
121 func opregreg(s *ssagen.State, op obj.As, dest, src int16) *obj.Prog {
123 p.From.Type = obj.TYPE_REG
124 p.To.Type = obj.TYPE_REG
130 // memIdx fills out a as an indexed memory reference for v.
131 // It assumes that the base register and the index register
132 // are v.Args[0].Reg() and v.Args[1].Reg(), respectively.
133 // The caller must still use gc.AddAux/gc.AddAux2 to handle v.Aux as necessary.
134 func memIdx(a *obj.Addr, v *ssa.Value) {
135 r, i := v.Args[0].Reg(), v.Args[1].Reg()
136 a.Type = obj.TYPE_MEM
137 a.Scale = v.Op.Scale()
138 if a.Scale == 1 && i == x86.REG_SP {
145 // DUFFZERO consists of repeated blocks of 4 MOVUPSs + LEAQ,
146 // See runtime/mkduff.go.
147 func duffStart(size int64) int64 {
151 func duffAdj(size int64) int64 {
156 // duff returns the offset (from duffzero, in bytes) and pointer adjust (in bytes)
157 // required to use the duffzero mechanism for a block of the given size.
158 func duff(size int64) (int64, int64) {
159 if size < 32 || size > 1024 || size%dzClearStep != 0 {
160 panic("bad duffzero size")
162 steps := size / dzClearStep
163 blocks := steps / dzBlockLen
165 off := dzBlockSize * (dzBlocks - blocks)
169 off -= dzMovSize * steps
170 adj -= dzClearStep * (dzBlockLen - steps)
175 func getgFromTLS(s *ssagen.State, r int16) {
176 // See the comments in cmd/internal/obj/x86/obj6.go
177 // near CanUse1InsnTLS for a detailed explanation of these instructions.
178 if x86.CanUse1InsnTLS(base.Ctxt) {
180 p := s.Prog(x86.AMOVQ)
181 p.From.Type = obj.TYPE_MEM
182 p.From.Reg = x86.REG_TLS
183 p.To.Type = obj.TYPE_REG
187 // MOVQ (r)(TLS*1), r
188 p := s.Prog(x86.AMOVQ)
189 p.From.Type = obj.TYPE_REG
190 p.From.Reg = x86.REG_TLS
191 p.To.Type = obj.TYPE_REG
193 q := s.Prog(x86.AMOVQ)
194 q.From.Type = obj.TYPE_MEM
196 q.From.Index = x86.REG_TLS
198 q.To.Type = obj.TYPE_REG
203 func ssaGenValue(s *ssagen.State, v *ssa.Value) {
205 case ssa.OpAMD64VFMADD231SD:
206 p := s.Prog(v.Op.Asm())
207 p.From = obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[2].Reg()}
208 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
209 p.AddRestSourceReg(v.Args[1].Reg())
210 case ssa.OpAMD64ADDQ, ssa.OpAMD64ADDL:
212 r1 := v.Args[0].Reg()
213 r2 := v.Args[1].Reg()
216 p := s.Prog(v.Op.Asm())
217 p.From.Type = obj.TYPE_REG
219 p.To.Type = obj.TYPE_REG
222 p := s.Prog(v.Op.Asm())
223 p.From.Type = obj.TYPE_REG
225 p.To.Type = obj.TYPE_REG
229 if v.Op == ssa.OpAMD64ADDQ {
235 p.From.Type = obj.TYPE_MEM
239 p.To.Type = obj.TYPE_REG
242 // 2-address opcode arithmetic
243 case ssa.OpAMD64SUBQ, ssa.OpAMD64SUBL,
244 ssa.OpAMD64MULQ, ssa.OpAMD64MULL,
245 ssa.OpAMD64ANDQ, ssa.OpAMD64ANDL,
246 ssa.OpAMD64ORQ, ssa.OpAMD64ORL,
247 ssa.OpAMD64XORQ, ssa.OpAMD64XORL,
248 ssa.OpAMD64SHLQ, ssa.OpAMD64SHLL,
249 ssa.OpAMD64SHRQ, ssa.OpAMD64SHRL, ssa.OpAMD64SHRW, ssa.OpAMD64SHRB,
250 ssa.OpAMD64SARQ, ssa.OpAMD64SARL, ssa.OpAMD64SARW, ssa.OpAMD64SARB,
251 ssa.OpAMD64ROLQ, ssa.OpAMD64ROLL, ssa.OpAMD64ROLW, ssa.OpAMD64ROLB,
252 ssa.OpAMD64RORQ, ssa.OpAMD64RORL, ssa.OpAMD64RORW, ssa.OpAMD64RORB,
253 ssa.OpAMD64ADDSS, ssa.OpAMD64ADDSD, ssa.OpAMD64SUBSS, ssa.OpAMD64SUBSD,
254 ssa.OpAMD64MULSS, ssa.OpAMD64MULSD, ssa.OpAMD64DIVSS, ssa.OpAMD64DIVSD,
255 ssa.OpAMD64MINSS, ssa.OpAMD64MINSD,
256 ssa.OpAMD64POR, ssa.OpAMD64PXOR,
257 ssa.OpAMD64BTSL, ssa.OpAMD64BTSQ,
258 ssa.OpAMD64BTCL, ssa.OpAMD64BTCQ,
259 ssa.OpAMD64BTRL, ssa.OpAMD64BTRQ:
260 opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg())
262 case ssa.OpAMD64SHRDQ, ssa.OpAMD64SHLDQ:
263 p := s.Prog(v.Op.Asm())
264 lo, hi, bits := v.Args[0].Reg(), v.Args[1].Reg(), v.Args[2].Reg()
265 p.From.Type = obj.TYPE_REG
267 p.To.Type = obj.TYPE_REG
269 p.AddRestSourceReg(hi)
271 case ssa.OpAMD64BLSIQ, ssa.OpAMD64BLSIL,
272 ssa.OpAMD64BLSMSKQ, ssa.OpAMD64BLSMSKL,
273 ssa.OpAMD64BLSRQ, ssa.OpAMD64BLSRL:
274 p := s.Prog(v.Op.Asm())
275 p.From.Type = obj.TYPE_REG
276 p.From.Reg = v.Args[0].Reg()
277 p.To.Type = obj.TYPE_REG
279 case ssa.OpAMD64BLSRQ, ssa.OpAMD64BLSRL:
285 case ssa.OpAMD64ANDNQ, ssa.OpAMD64ANDNL:
286 p := s.Prog(v.Op.Asm())
287 p.From.Type = obj.TYPE_REG
288 p.From.Reg = v.Args[0].Reg()
289 p.To.Type = obj.TYPE_REG
291 p.AddRestSourceReg(v.Args[1].Reg())
293 case ssa.OpAMD64SARXL, ssa.OpAMD64SARXQ,
294 ssa.OpAMD64SHLXL, ssa.OpAMD64SHLXQ,
295 ssa.OpAMD64SHRXL, ssa.OpAMD64SHRXQ:
296 p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg())
297 p.AddRestSourceReg(v.Args[0].Reg())
299 case ssa.OpAMD64SHLXLload, ssa.OpAMD64SHLXQload,
300 ssa.OpAMD64SHRXLload, ssa.OpAMD64SHRXQload,
301 ssa.OpAMD64SARXLload, ssa.OpAMD64SARXQload:
302 p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg())
303 m := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
307 case ssa.OpAMD64SHLXLloadidx1, ssa.OpAMD64SHLXLloadidx4, ssa.OpAMD64SHLXLloadidx8,
308 ssa.OpAMD64SHRXLloadidx1, ssa.OpAMD64SHRXLloadidx4, ssa.OpAMD64SHRXLloadidx8,
309 ssa.OpAMD64SARXLloadidx1, ssa.OpAMD64SARXLloadidx4, ssa.OpAMD64SARXLloadidx8,
310 ssa.OpAMD64SHLXQloadidx1, ssa.OpAMD64SHLXQloadidx8,
311 ssa.OpAMD64SHRXQloadidx1, ssa.OpAMD64SHRXQloadidx8,
312 ssa.OpAMD64SARXQloadidx1, ssa.OpAMD64SARXQloadidx8:
313 p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[2].Reg())
314 m := obj.Addr{Type: obj.TYPE_MEM}
319 case ssa.OpAMD64DIVQU, ssa.OpAMD64DIVLU, ssa.OpAMD64DIVWU:
320 // Arg[0] (the dividend) is in AX.
321 // Arg[1] (the divisor) can be in any other register.
322 // Result[0] (the quotient) is in AX.
323 // Result[1] (the remainder) is in DX.
326 // Zero extend dividend.
327 opregreg(s, x86.AXORL, x86.REG_DX, x86.REG_DX)
330 p := s.Prog(v.Op.Asm())
331 p.From.Type = obj.TYPE_REG
334 case ssa.OpAMD64DIVQ, ssa.OpAMD64DIVL, ssa.OpAMD64DIVW:
335 // Arg[0] (the dividend) is in AX.
336 // Arg[1] (the divisor) can be in any other register.
337 // Result[0] (the quotient) is in AX.
338 // Result[1] (the remainder) is in DX.
341 var opCMP, opNEG, opSXD obj.As
343 case ssa.OpAMD64DIVQ:
344 opCMP, opNEG, opSXD = x86.ACMPQ, x86.ANEGQ, x86.ACQO
345 case ssa.OpAMD64DIVL:
346 opCMP, opNEG, opSXD = x86.ACMPL, x86.ANEGL, x86.ACDQ
347 case ssa.OpAMD64DIVW:
348 opCMP, opNEG, opSXD = x86.ACMPW, x86.ANEGW, x86.ACWD
351 // CPU faults upon signed overflow, which occurs when the most
352 // negative int is divided by -1. Handle divide by -1 as a special case.
354 if ssa.DivisionNeedsFixUp(v) {
356 c.From.Type = obj.TYPE_REG
358 c.To.Type = obj.TYPE_CONST
361 // Divisor is not -1, proceed with normal division.
362 j1 = s.Prog(x86.AJNE)
363 j1.To.Type = obj.TYPE_BRANCH
365 // Divisor is -1, manually compute quotient and remainder via fixup code.
368 n1.To.Type = obj.TYPE_REG
369 n1.To.Reg = x86.REG_AX
372 opregreg(s, x86.AXORL, x86.REG_DX, x86.REG_DX)
374 // TODO(khr): issue only the -1 fixup code we need.
375 // For instance, if only the quotient is used, no point in zeroing the remainder.
377 // Skip over normal division.
378 j2 = s.Prog(obj.AJMP)
379 j2.To.Type = obj.TYPE_BRANCH
382 // Sign extend dividend and perform division.
387 p = s.Prog(v.Op.Asm())
388 p.From.Type = obj.TYPE_REG
392 j2.To.SetTarget(s.Pc())
395 case ssa.OpAMD64HMULQ, ssa.OpAMD64HMULL, ssa.OpAMD64HMULQU, ssa.OpAMD64HMULLU:
396 // the frontend rewrites constant division by 8/16/32 bit integers into
397 // HMUL by a constant
398 // SSA rewrites generate the 64 bit versions
400 // Arg[0] is already in AX as it's the only register we allow
401 // and DX is the only output we care about (the high bits)
402 p := s.Prog(v.Op.Asm())
403 p.From.Type = obj.TYPE_REG
404 p.From.Reg = v.Args[1].Reg()
406 // IMULB puts the high portion in AH instead of DL,
407 // so move it to DL for consistency
408 if v.Type.Size() == 1 {
409 m := s.Prog(x86.AMOVB)
410 m.From.Type = obj.TYPE_REG
411 m.From.Reg = x86.REG_AH
412 m.To.Type = obj.TYPE_REG
413 m.To.Reg = x86.REG_DX
416 case ssa.OpAMD64MULQU, ssa.OpAMD64MULLU:
417 // Arg[0] is already in AX as it's the only register we allow
419 p := s.Prog(v.Op.Asm())
420 p.From.Type = obj.TYPE_REG
421 p.From.Reg = v.Args[1].Reg()
423 case ssa.OpAMD64MULQU2:
424 // Arg[0] is already in AX as it's the only register we allow
425 // results hi in DX, lo in AX
426 p := s.Prog(v.Op.Asm())
427 p.From.Type = obj.TYPE_REG
428 p.From.Reg = v.Args[1].Reg()
430 case ssa.OpAMD64DIVQU2:
431 // Arg[0], Arg[1] are already in Dx, AX, as they're the only registers we allow
432 // results q in AX, r in DX
433 p := s.Prog(v.Op.Asm())
434 p.From.Type = obj.TYPE_REG
435 p.From.Reg = v.Args[2].Reg()
437 case ssa.OpAMD64AVGQU:
438 // compute (x+y)/2 unsigned.
439 // Do a 64-bit add, the overflow goes into the carry.
440 // Shift right once and pull the carry back into the 63rd bit.
441 p := s.Prog(x86.AADDQ)
442 p.From.Type = obj.TYPE_REG
443 p.To.Type = obj.TYPE_REG
445 p.From.Reg = v.Args[1].Reg()
446 p = s.Prog(x86.ARCRQ)
447 p.From.Type = obj.TYPE_CONST
449 p.To.Type = obj.TYPE_REG
452 case ssa.OpAMD64ADDQcarry, ssa.OpAMD64ADCQ:
454 r0 := v.Args[0].Reg()
455 r1 := v.Args[1].Reg()
458 p := s.Prog(v.Op.Asm())
459 p.From.Type = obj.TYPE_REG
461 p.To.Type = obj.TYPE_REG
464 p := s.Prog(v.Op.Asm())
465 p.From.Type = obj.TYPE_REG
467 p.To.Type = obj.TYPE_REG
470 v.Fatalf("output not in same register as an input %s", v.LongString())
473 case ssa.OpAMD64SUBQborrow, ssa.OpAMD64SBBQ:
474 p := s.Prog(v.Op.Asm())
475 p.From.Type = obj.TYPE_REG
476 p.From.Reg = v.Args[1].Reg()
477 p.To.Type = obj.TYPE_REG
480 case ssa.OpAMD64ADDQconstcarry, ssa.OpAMD64ADCQconst, ssa.OpAMD64SUBQconstborrow, ssa.OpAMD64SBBQconst:
481 p := s.Prog(v.Op.Asm())
482 p.From.Type = obj.TYPE_CONST
483 p.From.Offset = v.AuxInt
484 p.To.Type = obj.TYPE_REG
487 case ssa.OpAMD64ADDQconst, ssa.OpAMD64ADDLconst:
494 // Software optimization manual recommends add $1,reg.
495 // But inc/dec is 1 byte smaller. ICC always uses inc
496 // Clang/GCC choose depending on flags, but prefer add.
497 // Experiments show that inc/dec is both a little faster
498 // and make a binary a little smaller.
499 if v.Op == ssa.OpAMD64ADDQconst {
505 p.To.Type = obj.TYPE_REG
510 if v.Op == ssa.OpAMD64ADDQconst {
516 p.To.Type = obj.TYPE_REG
520 // 'SUBQ $-0x80, r' is shorter to encode than
521 // and functionally equivalent to 'ADDQ $0x80, r'.
523 if v.Op == ssa.OpAMD64ADDQconst {
527 p.From.Type = obj.TYPE_CONST
528 p.From.Offset = -0x80
529 p.To.Type = obj.TYPE_REG
534 p := s.Prog(v.Op.Asm())
535 p.From.Type = obj.TYPE_CONST
536 p.From.Offset = v.AuxInt
537 p.To.Type = obj.TYPE_REG
542 if v.Op == ssa.OpAMD64ADDQconst {
548 p.From.Type = obj.TYPE_MEM
550 p.From.Offset = v.AuxInt
551 p.To.Type = obj.TYPE_REG
554 case ssa.OpAMD64CMOVQEQ, ssa.OpAMD64CMOVLEQ, ssa.OpAMD64CMOVWEQ,
555 ssa.OpAMD64CMOVQLT, ssa.OpAMD64CMOVLLT, ssa.OpAMD64CMOVWLT,
556 ssa.OpAMD64CMOVQNE, ssa.OpAMD64CMOVLNE, ssa.OpAMD64CMOVWNE,
557 ssa.OpAMD64CMOVQGT, ssa.OpAMD64CMOVLGT, ssa.OpAMD64CMOVWGT,
558 ssa.OpAMD64CMOVQLE, ssa.OpAMD64CMOVLLE, ssa.OpAMD64CMOVWLE,
559 ssa.OpAMD64CMOVQGE, ssa.OpAMD64CMOVLGE, ssa.OpAMD64CMOVWGE,
560 ssa.OpAMD64CMOVQHI, ssa.OpAMD64CMOVLHI, ssa.OpAMD64CMOVWHI,
561 ssa.OpAMD64CMOVQLS, ssa.OpAMD64CMOVLLS, ssa.OpAMD64CMOVWLS,
562 ssa.OpAMD64CMOVQCC, ssa.OpAMD64CMOVLCC, ssa.OpAMD64CMOVWCC,
563 ssa.OpAMD64CMOVQCS, ssa.OpAMD64CMOVLCS, ssa.OpAMD64CMOVWCS,
564 ssa.OpAMD64CMOVQGTF, ssa.OpAMD64CMOVLGTF, ssa.OpAMD64CMOVWGTF,
565 ssa.OpAMD64CMOVQGEF, ssa.OpAMD64CMOVLGEF, ssa.OpAMD64CMOVWGEF:
566 p := s.Prog(v.Op.Asm())
567 p.From.Type = obj.TYPE_REG
568 p.From.Reg = v.Args[1].Reg()
569 p.To.Type = obj.TYPE_REG
572 case ssa.OpAMD64CMOVQNEF, ssa.OpAMD64CMOVLNEF, ssa.OpAMD64CMOVWNEF:
573 // Flag condition: ^ZERO || PARITY
577 p := s.Prog(v.Op.Asm())
578 p.From.Type = obj.TYPE_REG
579 p.From.Reg = v.Args[1].Reg()
580 p.To.Type = obj.TYPE_REG
583 if v.Op == ssa.OpAMD64CMOVQNEF {
584 q = s.Prog(x86.ACMOVQPS)
585 } else if v.Op == ssa.OpAMD64CMOVLNEF {
586 q = s.Prog(x86.ACMOVLPS)
588 q = s.Prog(x86.ACMOVWPS)
590 q.From.Type = obj.TYPE_REG
591 q.From.Reg = v.Args[1].Reg()
592 q.To.Type = obj.TYPE_REG
595 case ssa.OpAMD64CMOVQEQF, ssa.OpAMD64CMOVLEQF, ssa.OpAMD64CMOVWEQF:
596 // Flag condition: ZERO && !PARITY
602 // TODO(rasky): we could generate:
605 // But this requires a way for regalloc to know that SRC might be
606 // clobbered by this instruction.
608 opregreg(s, moveByType(v.Type), t, v.Args[1].Reg())
610 p := s.Prog(v.Op.Asm())
611 p.From.Type = obj.TYPE_REG
613 p.To.Type = obj.TYPE_REG
616 if v.Op == ssa.OpAMD64CMOVQEQF {
617 q = s.Prog(x86.ACMOVQPC)
618 } else if v.Op == ssa.OpAMD64CMOVLEQF {
619 q = s.Prog(x86.ACMOVLPC)
621 q = s.Prog(x86.ACMOVWPC)
623 q.From.Type = obj.TYPE_REG
625 q.To.Type = obj.TYPE_REG
628 case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst:
630 p := s.Prog(v.Op.Asm())
631 p.From.Type = obj.TYPE_CONST
632 p.From.Offset = v.AuxInt
633 p.To.Type = obj.TYPE_REG
635 p.AddRestSourceReg(v.Args[0].Reg())
637 case ssa.OpAMD64ANDQconst:
639 // If the constant is positive and fits into 32 bits, use ANDL.
640 // This saves a few bytes of encoding.
641 if 0 <= v.AuxInt && v.AuxInt <= (1<<32-1) {
645 p.From.Type = obj.TYPE_CONST
646 p.From.Offset = v.AuxInt
647 p.To.Type = obj.TYPE_REG
650 case ssa.OpAMD64SUBQconst, ssa.OpAMD64SUBLconst,
651 ssa.OpAMD64ANDLconst,
652 ssa.OpAMD64ORQconst, ssa.OpAMD64ORLconst,
653 ssa.OpAMD64XORQconst, ssa.OpAMD64XORLconst,
654 ssa.OpAMD64SHLQconst, ssa.OpAMD64SHLLconst,
655 ssa.OpAMD64SHRQconst, ssa.OpAMD64SHRLconst, ssa.OpAMD64SHRWconst, ssa.OpAMD64SHRBconst,
656 ssa.OpAMD64SARQconst, ssa.OpAMD64SARLconst, ssa.OpAMD64SARWconst, ssa.OpAMD64SARBconst,
657 ssa.OpAMD64ROLQconst, ssa.OpAMD64ROLLconst, ssa.OpAMD64ROLWconst, ssa.OpAMD64ROLBconst:
658 p := s.Prog(v.Op.Asm())
659 p.From.Type = obj.TYPE_CONST
660 p.From.Offset = v.AuxInt
661 p.To.Type = obj.TYPE_REG
663 case ssa.OpAMD64SBBQcarrymask, ssa.OpAMD64SBBLcarrymask:
665 p := s.Prog(v.Op.Asm())
666 p.From.Type = obj.TYPE_REG
668 p.To.Type = obj.TYPE_REG
670 case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8,
671 ssa.OpAMD64LEAL1, ssa.OpAMD64LEAL2, ssa.OpAMD64LEAL4, ssa.OpAMD64LEAL8,
672 ssa.OpAMD64LEAW1, ssa.OpAMD64LEAW2, ssa.OpAMD64LEAW4, ssa.OpAMD64LEAW8:
673 p := s.Prog(v.Op.Asm())
676 p.To.Type = obj.TYPE_REG
678 if v.AuxInt != 0 && v.Aux == nil {
679 // Emit an additional LEA to add the displacement instead of creating a slow 3 operand LEA.
681 case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8:
682 p = s.Prog(x86.ALEAQ)
683 case ssa.OpAMD64LEAL1, ssa.OpAMD64LEAL2, ssa.OpAMD64LEAL4, ssa.OpAMD64LEAL8:
684 p = s.Prog(x86.ALEAL)
685 case ssa.OpAMD64LEAW1, ssa.OpAMD64LEAW2, ssa.OpAMD64LEAW4, ssa.OpAMD64LEAW8:
686 p = s.Prog(x86.ALEAW)
688 p.From.Type = obj.TYPE_MEM
690 p.To.Type = obj.TYPE_REG
693 ssagen.AddAux(&p.From, v)
694 case ssa.OpAMD64LEAQ, ssa.OpAMD64LEAL, ssa.OpAMD64LEAW:
695 p := s.Prog(v.Op.Asm())
696 p.From.Type = obj.TYPE_MEM
697 p.From.Reg = v.Args[0].Reg()
698 ssagen.AddAux(&p.From, v)
699 p.To.Type = obj.TYPE_REG
701 case ssa.OpAMD64CMPQ, ssa.OpAMD64CMPL, ssa.OpAMD64CMPW, ssa.OpAMD64CMPB,
702 ssa.OpAMD64TESTQ, ssa.OpAMD64TESTL, ssa.OpAMD64TESTW, ssa.OpAMD64TESTB,
703 ssa.OpAMD64BTL, ssa.OpAMD64BTQ:
704 opregreg(s, v.Op.Asm(), v.Args[1].Reg(), v.Args[0].Reg())
705 case ssa.OpAMD64UCOMISS, ssa.OpAMD64UCOMISD:
706 // Go assembler has swapped operands for UCOMISx relative to CMP,
707 // must account for that right here.
708 opregreg(s, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg())
709 case ssa.OpAMD64CMPQconst, ssa.OpAMD64CMPLconst, ssa.OpAMD64CMPWconst, ssa.OpAMD64CMPBconst:
710 p := s.Prog(v.Op.Asm())
711 p.From.Type = obj.TYPE_REG
712 p.From.Reg = v.Args[0].Reg()
713 p.To.Type = obj.TYPE_CONST
714 p.To.Offset = v.AuxInt
715 case ssa.OpAMD64BTLconst, ssa.OpAMD64BTQconst,
716 ssa.OpAMD64TESTQconst, ssa.OpAMD64TESTLconst, ssa.OpAMD64TESTWconst, ssa.OpAMD64TESTBconst,
717 ssa.OpAMD64BTSLconst, ssa.OpAMD64BTSQconst,
718 ssa.OpAMD64BTCLconst, ssa.OpAMD64BTCQconst,
719 ssa.OpAMD64BTRLconst, ssa.OpAMD64BTRQconst:
721 if op == ssa.OpAMD64BTQconst && v.AuxInt < 32 {
722 // Emit 32-bit version because it's shorter
723 op = ssa.OpAMD64BTLconst
725 p := s.Prog(op.Asm())
726 p.From.Type = obj.TYPE_CONST
727 p.From.Offset = v.AuxInt
728 p.To.Type = obj.TYPE_REG
729 p.To.Reg = v.Args[0].Reg()
730 case ssa.OpAMD64CMPQload, ssa.OpAMD64CMPLload, ssa.OpAMD64CMPWload, ssa.OpAMD64CMPBload:
731 p := s.Prog(v.Op.Asm())
732 p.From.Type = obj.TYPE_MEM
733 p.From.Reg = v.Args[0].Reg()
734 ssagen.AddAux(&p.From, v)
735 p.To.Type = obj.TYPE_REG
736 p.To.Reg = v.Args[1].Reg()
737 case ssa.OpAMD64CMPQconstload, ssa.OpAMD64CMPLconstload, ssa.OpAMD64CMPWconstload, ssa.OpAMD64CMPBconstload:
738 sc := v.AuxValAndOff()
739 p := s.Prog(v.Op.Asm())
740 p.From.Type = obj.TYPE_MEM
741 p.From.Reg = v.Args[0].Reg()
742 ssagen.AddAux2(&p.From, v, sc.Off64())
743 p.To.Type = obj.TYPE_CONST
744 p.To.Offset = sc.Val64()
745 case ssa.OpAMD64CMPQloadidx8, ssa.OpAMD64CMPQloadidx1, ssa.OpAMD64CMPLloadidx4, ssa.OpAMD64CMPLloadidx1, ssa.OpAMD64CMPWloadidx2, ssa.OpAMD64CMPWloadidx1, ssa.OpAMD64CMPBloadidx1:
746 p := s.Prog(v.Op.Asm())
748 ssagen.AddAux(&p.From, v)
749 p.To.Type = obj.TYPE_REG
750 p.To.Reg = v.Args[2].Reg()
751 case ssa.OpAMD64CMPQconstloadidx8, ssa.OpAMD64CMPQconstloadidx1, ssa.OpAMD64CMPLconstloadidx4, ssa.OpAMD64CMPLconstloadidx1, ssa.OpAMD64CMPWconstloadidx2, ssa.OpAMD64CMPWconstloadidx1, ssa.OpAMD64CMPBconstloadidx1:
752 sc := v.AuxValAndOff()
753 p := s.Prog(v.Op.Asm())
755 ssagen.AddAux2(&p.From, v, sc.Off64())
756 p.To.Type = obj.TYPE_CONST
757 p.To.Offset = sc.Val64()
758 case ssa.OpAMD64MOVLconst, ssa.OpAMD64MOVQconst:
761 // If flags aren't live (indicated by v.Aux == nil),
762 // then we can rewrite MOV $0, AX into XOR AX, AX.
763 if v.AuxInt == 0 && v.Aux == nil {
764 opregreg(s, x86.AXORL, x, x)
769 // Use MOVL to move a small constant into a register
770 // when the constant is positive and fits into 32 bits.
771 if 0 <= v.AuxInt && v.AuxInt <= (1<<32-1) {
772 // The upper 32bit are zeroed automatically when using MOVL.
776 p.From.Type = obj.TYPE_CONST
777 p.From.Offset = v.AuxInt
778 p.To.Type = obj.TYPE_REG
780 case ssa.OpAMD64MOVSSconst, ssa.OpAMD64MOVSDconst:
782 p := s.Prog(v.Op.Asm())
783 p.From.Type = obj.TYPE_FCONST
784 p.From.Val = math.Float64frombits(uint64(v.AuxInt))
785 p.To.Type = obj.TYPE_REG
787 case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, ssa.OpAMD64MOVOload,
788 ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload,
789 ssa.OpAMD64MOVBEQload, ssa.OpAMD64MOVBELload:
790 p := s.Prog(v.Op.Asm())
791 p.From.Type = obj.TYPE_MEM
792 p.From.Reg = v.Args[0].Reg()
793 ssagen.AddAux(&p.From, v)
794 p.To.Type = obj.TYPE_REG
796 case ssa.OpAMD64MOVBloadidx1, ssa.OpAMD64MOVWloadidx1, ssa.OpAMD64MOVLloadidx1, ssa.OpAMD64MOVQloadidx1, ssa.OpAMD64MOVSSloadidx1, ssa.OpAMD64MOVSDloadidx1,
797 ssa.OpAMD64MOVQloadidx8, ssa.OpAMD64MOVSDloadidx8, ssa.OpAMD64MOVLloadidx8, ssa.OpAMD64MOVLloadidx4, ssa.OpAMD64MOVSSloadidx4, ssa.OpAMD64MOVWloadidx2,
798 ssa.OpAMD64MOVBELloadidx1, ssa.OpAMD64MOVBELloadidx4, ssa.OpAMD64MOVBELloadidx8, ssa.OpAMD64MOVBEQloadidx1, ssa.OpAMD64MOVBEQloadidx8:
799 p := s.Prog(v.Op.Asm())
801 ssagen.AddAux(&p.From, v)
802 p.To.Type = obj.TYPE_REG
804 case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVOstore,
805 ssa.OpAMD64ADDQmodify, ssa.OpAMD64SUBQmodify, ssa.OpAMD64ANDQmodify, ssa.OpAMD64ORQmodify, ssa.OpAMD64XORQmodify,
806 ssa.OpAMD64ADDLmodify, ssa.OpAMD64SUBLmodify, ssa.OpAMD64ANDLmodify, ssa.OpAMD64ORLmodify, ssa.OpAMD64XORLmodify,
807 ssa.OpAMD64MOVBEQstore, ssa.OpAMD64MOVBELstore, ssa.OpAMD64MOVBEWstore:
808 p := s.Prog(v.Op.Asm())
809 p.From.Type = obj.TYPE_REG
810 p.From.Reg = v.Args[1].Reg()
811 p.To.Type = obj.TYPE_MEM
812 p.To.Reg = v.Args[0].Reg()
813 ssagen.AddAux(&p.To, v)
814 case ssa.OpAMD64MOVBstoreidx1, ssa.OpAMD64MOVWstoreidx1, ssa.OpAMD64MOVLstoreidx1, ssa.OpAMD64MOVQstoreidx1, ssa.OpAMD64MOVSSstoreidx1, ssa.OpAMD64MOVSDstoreidx1,
815 ssa.OpAMD64MOVQstoreidx8, ssa.OpAMD64MOVSDstoreidx8, ssa.OpAMD64MOVLstoreidx8, ssa.OpAMD64MOVSSstoreidx4, ssa.OpAMD64MOVLstoreidx4, ssa.OpAMD64MOVWstoreidx2,
816 ssa.OpAMD64ADDLmodifyidx1, ssa.OpAMD64ADDLmodifyidx4, ssa.OpAMD64ADDLmodifyidx8, ssa.OpAMD64ADDQmodifyidx1, ssa.OpAMD64ADDQmodifyidx8,
817 ssa.OpAMD64SUBLmodifyidx1, ssa.OpAMD64SUBLmodifyidx4, ssa.OpAMD64SUBLmodifyidx8, ssa.OpAMD64SUBQmodifyidx1, ssa.OpAMD64SUBQmodifyidx8,
818 ssa.OpAMD64ANDLmodifyidx1, ssa.OpAMD64ANDLmodifyidx4, ssa.OpAMD64ANDLmodifyidx8, ssa.OpAMD64ANDQmodifyidx1, ssa.OpAMD64ANDQmodifyidx8,
819 ssa.OpAMD64ORLmodifyidx1, ssa.OpAMD64ORLmodifyidx4, ssa.OpAMD64ORLmodifyidx8, ssa.OpAMD64ORQmodifyidx1, ssa.OpAMD64ORQmodifyidx8,
820 ssa.OpAMD64XORLmodifyidx1, ssa.OpAMD64XORLmodifyidx4, ssa.OpAMD64XORLmodifyidx8, ssa.OpAMD64XORQmodifyidx1, ssa.OpAMD64XORQmodifyidx8,
821 ssa.OpAMD64MOVBEWstoreidx1, ssa.OpAMD64MOVBEWstoreidx2, ssa.OpAMD64MOVBELstoreidx1, ssa.OpAMD64MOVBELstoreidx4, ssa.OpAMD64MOVBELstoreidx8, ssa.OpAMD64MOVBEQstoreidx1, ssa.OpAMD64MOVBEQstoreidx8:
822 p := s.Prog(v.Op.Asm())
823 p.From.Type = obj.TYPE_REG
824 p.From.Reg = v.Args[2].Reg()
826 ssagen.AddAux(&p.To, v)
827 case ssa.OpAMD64ADDQconstmodify, ssa.OpAMD64ADDLconstmodify:
828 sc := v.AuxValAndOff()
831 if val == 1 || val == -1 {
833 if v.Op == ssa.OpAMD64ADDQconstmodify {
847 p.To.Type = obj.TYPE_MEM
848 p.To.Reg = v.Args[0].Reg()
849 ssagen.AddAux2(&p.To, v, off)
853 case ssa.OpAMD64ANDQconstmodify, ssa.OpAMD64ANDLconstmodify, ssa.OpAMD64ORQconstmodify, ssa.OpAMD64ORLconstmodify,
854 ssa.OpAMD64XORQconstmodify, ssa.OpAMD64XORLconstmodify:
855 sc := v.AuxValAndOff()
858 p := s.Prog(v.Op.Asm())
859 p.From.Type = obj.TYPE_CONST
861 p.To.Type = obj.TYPE_MEM
862 p.To.Reg = v.Args[0].Reg()
863 ssagen.AddAux2(&p.To, v, off)
865 case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst:
866 p := s.Prog(v.Op.Asm())
867 p.From.Type = obj.TYPE_CONST
868 sc := v.AuxValAndOff()
869 p.From.Offset = sc.Val64()
870 p.To.Type = obj.TYPE_MEM
871 p.To.Reg = v.Args[0].Reg()
872 ssagen.AddAux2(&p.To, v, sc.Off64())
873 case ssa.OpAMD64MOVOstoreconst:
874 sc := v.AuxValAndOff()
876 v.Fatalf("MOVO for non zero constants not implemented: %s", v.LongString())
879 if s.ABI != obj.ABIInternal {
881 opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
883 p := s.Prog(v.Op.Asm())
884 p.From.Type = obj.TYPE_REG
885 p.From.Reg = x86.REG_X15
886 p.To.Type = obj.TYPE_MEM
887 p.To.Reg = v.Args[0].Reg()
888 ssagen.AddAux2(&p.To, v, sc.Off64())
890 case ssa.OpAMD64MOVQstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx8, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx4, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx2, ssa.OpAMD64MOVBstoreconstidx1,
891 ssa.OpAMD64ADDLconstmodifyidx1, ssa.OpAMD64ADDLconstmodifyidx4, ssa.OpAMD64ADDLconstmodifyidx8, ssa.OpAMD64ADDQconstmodifyidx1, ssa.OpAMD64ADDQconstmodifyidx8,
892 ssa.OpAMD64ANDLconstmodifyidx1, ssa.OpAMD64ANDLconstmodifyidx4, ssa.OpAMD64ANDLconstmodifyidx8, ssa.OpAMD64ANDQconstmodifyidx1, ssa.OpAMD64ANDQconstmodifyidx8,
893 ssa.OpAMD64ORLconstmodifyidx1, ssa.OpAMD64ORLconstmodifyidx4, ssa.OpAMD64ORLconstmodifyidx8, ssa.OpAMD64ORQconstmodifyidx1, ssa.OpAMD64ORQconstmodifyidx8,
894 ssa.OpAMD64XORLconstmodifyidx1, ssa.OpAMD64XORLconstmodifyidx4, ssa.OpAMD64XORLconstmodifyidx8, ssa.OpAMD64XORQconstmodifyidx1, ssa.OpAMD64XORQconstmodifyidx8:
895 p := s.Prog(v.Op.Asm())
896 p.From.Type = obj.TYPE_CONST
897 sc := v.AuxValAndOff()
898 p.From.Offset = sc.Val64()
900 case p.As == x86.AADDQ && p.From.Offset == 1:
902 p.From.Type = obj.TYPE_NONE
903 case p.As == x86.AADDQ && p.From.Offset == -1:
905 p.From.Type = obj.TYPE_NONE
906 case p.As == x86.AADDL && p.From.Offset == 1:
908 p.From.Type = obj.TYPE_NONE
909 case p.As == x86.AADDL && p.From.Offset == -1:
911 p.From.Type = obj.TYPE_NONE
914 ssagen.AddAux2(&p.To, v, sc.Off64())
915 case ssa.OpAMD64MOVLQSX, ssa.OpAMD64MOVWQSX, ssa.OpAMD64MOVBQSX, ssa.OpAMD64MOVLQZX, ssa.OpAMD64MOVWQZX, ssa.OpAMD64MOVBQZX,
916 ssa.OpAMD64CVTTSS2SL, ssa.OpAMD64CVTTSD2SL, ssa.OpAMD64CVTTSS2SQ, ssa.OpAMD64CVTTSD2SQ,
917 ssa.OpAMD64CVTSS2SD, ssa.OpAMD64CVTSD2SS:
918 opregreg(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg())
919 case ssa.OpAMD64CVTSL2SD, ssa.OpAMD64CVTSQ2SD, ssa.OpAMD64CVTSQ2SS, ssa.OpAMD64CVTSL2SS:
921 // Break false dependency on destination register.
922 opregreg(s, x86.AXORPS, r, r)
923 opregreg(s, v.Op.Asm(), r, v.Args[0].Reg())
924 case ssa.OpAMD64MOVQi2f, ssa.OpAMD64MOVQf2i, ssa.OpAMD64MOVLi2f, ssa.OpAMD64MOVLf2i:
927 case ssa.OpAMD64MOVQi2f, ssa.OpAMD64MOVQf2i:
928 p = s.Prog(x86.AMOVQ)
929 case ssa.OpAMD64MOVLi2f, ssa.OpAMD64MOVLf2i:
930 p = s.Prog(x86.AMOVL)
932 p.From.Type = obj.TYPE_REG
933 p.From.Reg = v.Args[0].Reg()
934 p.To.Type = obj.TYPE_REG
936 case ssa.OpAMD64ADDQload, ssa.OpAMD64ADDLload, ssa.OpAMD64SUBQload, ssa.OpAMD64SUBLload,
937 ssa.OpAMD64ANDQload, ssa.OpAMD64ANDLload, ssa.OpAMD64ORQload, ssa.OpAMD64ORLload,
938 ssa.OpAMD64XORQload, ssa.OpAMD64XORLload, ssa.OpAMD64ADDSDload, ssa.OpAMD64ADDSSload,
939 ssa.OpAMD64SUBSDload, ssa.OpAMD64SUBSSload, ssa.OpAMD64MULSDload, ssa.OpAMD64MULSSload,
940 ssa.OpAMD64DIVSDload, ssa.OpAMD64DIVSSload:
941 p := s.Prog(v.Op.Asm())
942 p.From.Type = obj.TYPE_MEM
943 p.From.Reg = v.Args[1].Reg()
944 ssagen.AddAux(&p.From, v)
945 p.To.Type = obj.TYPE_REG
947 case ssa.OpAMD64ADDLloadidx1, ssa.OpAMD64ADDLloadidx4, ssa.OpAMD64ADDLloadidx8, ssa.OpAMD64ADDQloadidx1, ssa.OpAMD64ADDQloadidx8,
948 ssa.OpAMD64SUBLloadidx1, ssa.OpAMD64SUBLloadidx4, ssa.OpAMD64SUBLloadidx8, ssa.OpAMD64SUBQloadidx1, ssa.OpAMD64SUBQloadidx8,
949 ssa.OpAMD64ANDLloadidx1, ssa.OpAMD64ANDLloadidx4, ssa.OpAMD64ANDLloadidx8, ssa.OpAMD64ANDQloadidx1, ssa.OpAMD64ANDQloadidx8,
950 ssa.OpAMD64ORLloadidx1, ssa.OpAMD64ORLloadidx4, ssa.OpAMD64ORLloadidx8, ssa.OpAMD64ORQloadidx1, ssa.OpAMD64ORQloadidx8,
951 ssa.OpAMD64XORLloadidx1, ssa.OpAMD64XORLloadidx4, ssa.OpAMD64XORLloadidx8, ssa.OpAMD64XORQloadidx1, ssa.OpAMD64XORQloadidx8,
952 ssa.OpAMD64ADDSSloadidx1, ssa.OpAMD64ADDSSloadidx4, ssa.OpAMD64ADDSDloadidx1, ssa.OpAMD64ADDSDloadidx8,
953 ssa.OpAMD64SUBSSloadidx1, ssa.OpAMD64SUBSSloadidx4, ssa.OpAMD64SUBSDloadidx1, ssa.OpAMD64SUBSDloadidx8,
954 ssa.OpAMD64MULSSloadidx1, ssa.OpAMD64MULSSloadidx4, ssa.OpAMD64MULSDloadidx1, ssa.OpAMD64MULSDloadidx8,
955 ssa.OpAMD64DIVSSloadidx1, ssa.OpAMD64DIVSSloadidx4, ssa.OpAMD64DIVSDloadidx1, ssa.OpAMD64DIVSDloadidx8:
956 p := s.Prog(v.Op.Asm())
958 r, i := v.Args[1].Reg(), v.Args[2].Reg()
959 p.From.Type = obj.TYPE_MEM
960 p.From.Scale = v.Op.Scale()
961 if p.From.Scale == 1 && i == x86.REG_SP {
967 ssagen.AddAux(&p.From, v)
968 p.To.Type = obj.TYPE_REG
970 case ssa.OpAMD64DUFFZERO:
971 if s.ABI != obj.ABIInternal {
973 opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
975 off := duffStart(v.AuxInt)
976 adj := duffAdj(v.AuxInt)
979 p = s.Prog(x86.ALEAQ)
980 p.From.Type = obj.TYPE_MEM
982 p.From.Reg = x86.REG_DI
983 p.To.Type = obj.TYPE_REG
984 p.To.Reg = x86.REG_DI
986 p = s.Prog(obj.ADUFFZERO)
987 p.To.Type = obj.TYPE_ADDR
988 p.To.Sym = ir.Syms.Duffzero
990 case ssa.OpAMD64DUFFCOPY:
991 p := s.Prog(obj.ADUFFCOPY)
992 p.To.Type = obj.TYPE_ADDR
993 p.To.Sym = ir.Syms.Duffcopy
994 if v.AuxInt%16 != 0 {
995 v.Fatalf("bad DUFFCOPY AuxInt %v", v.AuxInt)
997 p.To.Offset = 14 * (64 - v.AuxInt/16)
998 // 14 and 64 are magic constants. 14 is the number of bytes to encode:
1003 // and 64 is the number of such blocks. See src/runtime/duff_amd64.s:duffcopy.
1005 case ssa.OpCopy: // TODO: use MOVQreg for reg->reg copies instead of OpCopy?
1006 if v.Type.IsMemory() {
1009 x := v.Args[0].Reg()
1012 opregreg(s, moveByType(v.Type), y, x)
1015 if v.Type.IsFlags() {
1016 v.Fatalf("load flags not implemented: %v", v.LongString())
1019 p := s.Prog(loadByType(v.Type))
1020 ssagen.AddrAuto(&p.From, v.Args[0])
1021 p.To.Type = obj.TYPE_REG
1024 case ssa.OpStoreReg:
1025 if v.Type.IsFlags() {
1026 v.Fatalf("store flags not implemented: %v", v.LongString())
1029 p := s.Prog(storeByType(v.Type))
1030 p.From.Type = obj.TYPE_REG
1031 p.From.Reg = v.Args[0].Reg()
1032 ssagen.AddrAuto(&p.To, v)
1033 case ssa.OpAMD64LoweredHasCPUFeature:
1034 p := s.Prog(x86.AMOVBLZX)
1035 p.From.Type = obj.TYPE_MEM
1036 ssagen.AddAux(&p.From, v)
1037 p.To.Type = obj.TYPE_REG
1039 case ssa.OpArgIntReg, ssa.OpArgFloatReg:
1040 // The assembler needs to wrap the entry safepoint/stack growth code with spill/unspill
1041 // The loop only runs once.
1042 for _, ap := range v.Block.Func.RegArgs {
1043 // Pass the spill/unspill information along to the assembler, offset by size of return PC pushed on stack.
1044 addr := ssagen.SpillSlotAddr(ap, x86.REG_SP, v.Block.Func.Config.PtrSize)
1045 s.FuncInfo().AddSpill(
1046 obj.RegSpill{Reg: ap.Reg, Addr: addr, Unspill: loadByType(ap.Type), Spill: storeByType(ap.Type)})
1048 v.Block.Func.RegArgs = nil
1049 ssagen.CheckArgReg(v)
1050 case ssa.OpAMD64LoweredGetClosurePtr:
1051 // Closure pointer is DX.
1052 ssagen.CheckLoweredGetClosurePtr(v)
1053 case ssa.OpAMD64LoweredGetG:
1054 if s.ABI == obj.ABIInternal {
1055 v.Fatalf("LoweredGetG should not appear in ABIInternal")
1059 case ssa.OpAMD64CALLstatic, ssa.OpAMD64CALLtail:
1060 if s.ABI == obj.ABI0 && v.Aux.(*ssa.AuxCall).Fn.ABI() == obj.ABIInternal {
1061 // zeroing X15 when entering ABIInternal from ABI0
1062 if buildcfg.GOOS != "plan9" { // do not use SSE on Plan 9
1063 opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
1065 // set G register from TLS
1066 getgFromTLS(s, x86.REG_R14)
1068 if v.Op == ssa.OpAMD64CALLtail {
1073 if s.ABI == obj.ABIInternal && v.Aux.(*ssa.AuxCall).Fn.ABI() == obj.ABI0 {
1074 // zeroing X15 when entering ABIInternal from ABI0
1075 if buildcfg.GOOS != "plan9" { // do not use SSE on Plan 9
1076 opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
1078 // set G register from TLS
1079 getgFromTLS(s, x86.REG_R14)
1081 case ssa.OpAMD64CALLclosure, ssa.OpAMD64CALLinter:
1084 case ssa.OpAMD64LoweredGetCallerPC:
1085 p := s.Prog(x86.AMOVQ)
1086 p.From.Type = obj.TYPE_MEM
1087 p.From.Offset = -8 // PC is stored 8 bytes below first parameter.
1088 p.From.Name = obj.NAME_PARAM
1089 p.To.Type = obj.TYPE_REG
1092 case ssa.OpAMD64LoweredGetCallerSP:
1093 // caller's SP is the address of the first arg
1095 if types.PtrSize == 4 {
1099 p.From.Type = obj.TYPE_ADDR
1100 p.From.Offset = -base.Ctxt.Arch.FixedFrameSize // 0 on amd64, just to be consistent with other architectures
1101 p.From.Name = obj.NAME_PARAM
1102 p.To.Type = obj.TYPE_REG
1105 case ssa.OpAMD64LoweredWB:
1106 p := s.Prog(obj.ACALL)
1107 p.To.Type = obj.TYPE_MEM
1108 p.To.Name = obj.NAME_EXTERN
1109 // AuxInt encodes how many buffer entries we need.
1110 p.To.Sym = ir.Syms.GCWriteBarrier[v.AuxInt-1]
1112 case ssa.OpAMD64LoweredPanicBoundsA, ssa.OpAMD64LoweredPanicBoundsB, ssa.OpAMD64LoweredPanicBoundsC:
1113 p := s.Prog(obj.ACALL)
1114 p.To.Type = obj.TYPE_MEM
1115 p.To.Name = obj.NAME_EXTERN
1116 p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt]
1117 s.UseArgs(int64(2 * types.PtrSize)) // space used in callee args area by assembly stubs
1119 case ssa.OpAMD64NEGQ, ssa.OpAMD64NEGL,
1120 ssa.OpAMD64BSWAPQ, ssa.OpAMD64BSWAPL,
1121 ssa.OpAMD64NOTQ, ssa.OpAMD64NOTL:
1122 p := s.Prog(v.Op.Asm())
1123 p.To.Type = obj.TYPE_REG
1126 case ssa.OpAMD64NEGLflags:
1127 p := s.Prog(v.Op.Asm())
1128 p.To.Type = obj.TYPE_REG
1131 case ssa.OpAMD64BSFQ, ssa.OpAMD64BSRQ, ssa.OpAMD64BSFL, ssa.OpAMD64BSRL, ssa.OpAMD64SQRTSD, ssa.OpAMD64SQRTSS:
1132 p := s.Prog(v.Op.Asm())
1133 p.From.Type = obj.TYPE_REG
1134 p.From.Reg = v.Args[0].Reg()
1135 p.To.Type = obj.TYPE_REG
1137 case ssa.OpAMD64BSFQ, ssa.OpAMD64BSRQ:
1139 case ssa.OpAMD64BSFL, ssa.OpAMD64BSRL, ssa.OpAMD64SQRTSD, ssa.OpAMD64SQRTSS:
1142 case ssa.OpAMD64ROUNDSD:
1143 p := s.Prog(v.Op.Asm())
1145 // 0 means math.RoundToEven, 1 Floor, 2 Ceil, 3 Trunc
1146 if val < 0 || val > 3 {
1147 v.Fatalf("Invalid rounding mode")
1150 p.From.Type = obj.TYPE_CONST
1151 p.AddRestSourceReg(v.Args[0].Reg())
1152 p.To.Type = obj.TYPE_REG
1154 case ssa.OpAMD64POPCNTQ, ssa.OpAMD64POPCNTL,
1155 ssa.OpAMD64TZCNTQ, ssa.OpAMD64TZCNTL,
1156 ssa.OpAMD64LZCNTQ, ssa.OpAMD64LZCNTL:
1157 if v.Args[0].Reg() != v.Reg() {
1158 // POPCNT/TZCNT/LZCNT have a false dependency on the destination register on Intel cpus.
1159 // TZCNT/LZCNT problem affects pre-Skylake models. See discussion at https://gcc.gnu.org/bugzilla/show_bug.cgi?id=62011#c7.
1160 // Xor register with itself to break the dependency.
1161 opregreg(s, x86.AXORL, v.Reg(), v.Reg())
1163 p := s.Prog(v.Op.Asm())
1164 p.From.Type = obj.TYPE_REG
1165 p.From.Reg = v.Args[0].Reg()
1166 p.To.Type = obj.TYPE_REG
1169 case ssa.OpAMD64SETEQ, ssa.OpAMD64SETNE,
1170 ssa.OpAMD64SETL, ssa.OpAMD64SETLE,
1171 ssa.OpAMD64SETG, ssa.OpAMD64SETGE,
1172 ssa.OpAMD64SETGF, ssa.OpAMD64SETGEF,
1173 ssa.OpAMD64SETB, ssa.OpAMD64SETBE,
1174 ssa.OpAMD64SETORD, ssa.OpAMD64SETNAN,
1175 ssa.OpAMD64SETA, ssa.OpAMD64SETAE,
1177 p := s.Prog(v.Op.Asm())
1178 p.To.Type = obj.TYPE_REG
1181 case ssa.OpAMD64SETEQstore, ssa.OpAMD64SETNEstore,
1182 ssa.OpAMD64SETLstore, ssa.OpAMD64SETLEstore,
1183 ssa.OpAMD64SETGstore, ssa.OpAMD64SETGEstore,
1184 ssa.OpAMD64SETBstore, ssa.OpAMD64SETBEstore,
1185 ssa.OpAMD64SETAstore, ssa.OpAMD64SETAEstore:
1186 p := s.Prog(v.Op.Asm())
1187 p.To.Type = obj.TYPE_MEM
1188 p.To.Reg = v.Args[0].Reg()
1189 ssagen.AddAux(&p.To, v)
1191 case ssa.OpAMD64SETEQstoreidx1, ssa.OpAMD64SETNEstoreidx1,
1192 ssa.OpAMD64SETLstoreidx1, ssa.OpAMD64SETLEstoreidx1,
1193 ssa.OpAMD64SETGstoreidx1, ssa.OpAMD64SETGEstoreidx1,
1194 ssa.OpAMD64SETBstoreidx1, ssa.OpAMD64SETBEstoreidx1,
1195 ssa.OpAMD64SETAstoreidx1, ssa.OpAMD64SETAEstoreidx1:
1196 p := s.Prog(v.Op.Asm())
1198 ssagen.AddAux(&p.To, v)
1200 case ssa.OpAMD64SETNEF:
1202 p := s.Prog(v.Op.Asm())
1203 p.To.Type = obj.TYPE_REG
1205 q := s.Prog(x86.ASETPS)
1206 q.To.Type = obj.TYPE_REG
1208 // ORL avoids partial register write and is smaller than ORQ, used by old compiler
1209 opregreg(s, x86.AORL, v.Reg(), t)
1211 case ssa.OpAMD64SETEQF:
1213 p := s.Prog(v.Op.Asm())
1214 p.To.Type = obj.TYPE_REG
1216 q := s.Prog(x86.ASETPC)
1217 q.To.Type = obj.TYPE_REG
1219 // ANDL avoids partial register write and is smaller than ANDQ, used by old compiler
1220 opregreg(s, x86.AANDL, v.Reg(), t)
1222 case ssa.OpAMD64InvertFlags:
1223 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
1224 case ssa.OpAMD64FlagEQ, ssa.OpAMD64FlagLT_ULT, ssa.OpAMD64FlagLT_UGT, ssa.OpAMD64FlagGT_ULT, ssa.OpAMD64FlagGT_UGT:
1225 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
1226 case ssa.OpAMD64AddTupleFirst32, ssa.OpAMD64AddTupleFirst64:
1227 v.Fatalf("AddTupleFirst* should never make it to codegen %v", v.LongString())
1228 case ssa.OpAMD64REPSTOSQ:
1231 case ssa.OpAMD64REPMOVSQ:
1234 case ssa.OpAMD64LoweredNilCheck:
1235 // Issue a load which will fault if the input is nil.
1236 // TODO: We currently use the 2-byte instruction TESTB AX, (reg).
1237 // Should we use the 3-byte TESTB $0, (reg) instead? It is larger
1238 // but it doesn't have false dependency on AX.
1239 // Or maybe allocate an output register and use MOVL (reg),reg2 ?
1240 // That trades clobbering flags for clobbering a register.
1241 p := s.Prog(x86.ATESTB)
1242 p.From.Type = obj.TYPE_REG
1243 p.From.Reg = x86.REG_AX
1244 p.To.Type = obj.TYPE_MEM
1245 p.To.Reg = v.Args[0].Reg()
1246 if logopt.Enabled() {
1247 logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
1249 if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
1250 base.WarnfAt(v.Pos, "generated nil check")
1252 case ssa.OpAMD64MOVBatomicload, ssa.OpAMD64MOVLatomicload, ssa.OpAMD64MOVQatomicload:
1253 p := s.Prog(v.Op.Asm())
1254 p.From.Type = obj.TYPE_MEM
1255 p.From.Reg = v.Args[0].Reg()
1256 ssagen.AddAux(&p.From, v)
1257 p.To.Type = obj.TYPE_REG
1259 case ssa.OpAMD64XCHGB, ssa.OpAMD64XCHGL, ssa.OpAMD64XCHGQ:
1260 p := s.Prog(v.Op.Asm())
1261 p.From.Type = obj.TYPE_REG
1262 p.From.Reg = v.Reg0()
1263 p.To.Type = obj.TYPE_MEM
1264 p.To.Reg = v.Args[1].Reg()
1265 ssagen.AddAux(&p.To, v)
1266 case ssa.OpAMD64XADDLlock, ssa.OpAMD64XADDQlock:
1268 p := s.Prog(v.Op.Asm())
1269 p.From.Type = obj.TYPE_REG
1270 p.From.Reg = v.Reg0()
1271 p.To.Type = obj.TYPE_MEM
1272 p.To.Reg = v.Args[1].Reg()
1273 ssagen.AddAux(&p.To, v)
1274 case ssa.OpAMD64CMPXCHGLlock, ssa.OpAMD64CMPXCHGQlock:
1275 if v.Args[1].Reg() != x86.REG_AX {
1276 v.Fatalf("input[1] not in AX %s", v.LongString())
1279 p := s.Prog(v.Op.Asm())
1280 p.From.Type = obj.TYPE_REG
1281 p.From.Reg = v.Args[2].Reg()
1282 p.To.Type = obj.TYPE_MEM
1283 p.To.Reg = v.Args[0].Reg()
1284 ssagen.AddAux(&p.To, v)
1285 p = s.Prog(x86.ASETEQ)
1286 p.To.Type = obj.TYPE_REG
1288 case ssa.OpAMD64ANDBlock, ssa.OpAMD64ANDLlock, ssa.OpAMD64ORBlock, ssa.OpAMD64ORLlock:
1290 p := s.Prog(v.Op.Asm())
1291 p.From.Type = obj.TYPE_REG
1292 p.From.Reg = v.Args[1].Reg()
1293 p.To.Type = obj.TYPE_MEM
1294 p.To.Reg = v.Args[0].Reg()
1295 ssagen.AddAux(&p.To, v)
1296 case ssa.OpAMD64PrefetchT0, ssa.OpAMD64PrefetchNTA:
1297 p := s.Prog(v.Op.Asm())
1298 p.From.Type = obj.TYPE_MEM
1299 p.From.Reg = v.Args[0].Reg()
1301 p := s.Prog(x86.AMOVL)
1302 p.From.Type = obj.TYPE_CONST
1303 p.From.Offset = 0xdeaddead
1304 p.To.Type = obj.TYPE_MEM
1305 p.To.Reg = x86.REG_SP
1306 ssagen.AddAux(&p.To, v)
1307 p = s.Prog(x86.AMOVL)
1308 p.From.Type = obj.TYPE_CONST
1309 p.From.Offset = 0xdeaddead
1310 p.To.Type = obj.TYPE_MEM
1311 p.To.Reg = x86.REG_SP
1312 ssagen.AddAux(&p.To, v)
1314 case ssa.OpClobberReg:
1315 x := uint64(0xdeaddeaddeaddead)
1316 p := s.Prog(x86.AMOVQ)
1317 p.From.Type = obj.TYPE_CONST
1318 p.From.Offset = int64(x)
1319 p.To.Type = obj.TYPE_REG
1322 v.Fatalf("genValue not implemented: %s", v.LongString())
1326 var blockJump = [...]struct {
1329 ssa.BlockAMD64EQ: {x86.AJEQ, x86.AJNE},
1330 ssa.BlockAMD64NE: {x86.AJNE, x86.AJEQ},
1331 ssa.BlockAMD64LT: {x86.AJLT, x86.AJGE},
1332 ssa.BlockAMD64GE: {x86.AJGE, x86.AJLT},
1333 ssa.BlockAMD64LE: {x86.AJLE, x86.AJGT},
1334 ssa.BlockAMD64GT: {x86.AJGT, x86.AJLE},
1335 ssa.BlockAMD64OS: {x86.AJOS, x86.AJOC},
1336 ssa.BlockAMD64OC: {x86.AJOC, x86.AJOS},
1337 ssa.BlockAMD64ULT: {x86.AJCS, x86.AJCC},
1338 ssa.BlockAMD64UGE: {x86.AJCC, x86.AJCS},
1339 ssa.BlockAMD64UGT: {x86.AJHI, x86.AJLS},
1340 ssa.BlockAMD64ULE: {x86.AJLS, x86.AJHI},
1341 ssa.BlockAMD64ORD: {x86.AJPC, x86.AJPS},
1342 ssa.BlockAMD64NAN: {x86.AJPS, x86.AJPC},
1345 var eqfJumps = [2][2]ssagen.IndexJump{
1346 {{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPS, Index: 1}}, // next == b.Succs[0]
1347 {{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPC, Index: 0}}, // next == b.Succs[1]
1349 var nefJumps = [2][2]ssagen.IndexJump{
1350 {{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPC, Index: 1}}, // next == b.Succs[0]
1351 {{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPS, Index: 0}}, // next == b.Succs[1]
1354 func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
1356 case ssa.BlockPlain:
1357 if b.Succs[0].Block() != next {
1358 p := s.Prog(obj.AJMP)
1359 p.To.Type = obj.TYPE_BRANCH
1360 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
1362 case ssa.BlockDefer:
1363 // defer returns in rax:
1364 // 0 if we should continue executing
1365 // 1 if we should jump to deferreturn call
1366 p := s.Prog(x86.ATESTL)
1367 p.From.Type = obj.TYPE_REG
1368 p.From.Reg = x86.REG_AX
1369 p.To.Type = obj.TYPE_REG
1370 p.To.Reg = x86.REG_AX
1371 p = s.Prog(x86.AJNE)
1372 p.To.Type = obj.TYPE_BRANCH
1373 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[1].Block()})
1374 if b.Succs[0].Block() != next {
1375 p := s.Prog(obj.AJMP)
1376 p.To.Type = obj.TYPE_BRANCH
1377 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
1379 case ssa.BlockExit, ssa.BlockRetJmp:
1383 case ssa.BlockAMD64EQF:
1384 s.CombJump(b, next, &eqfJumps)
1386 case ssa.BlockAMD64NEF:
1387 s.CombJump(b, next, &nefJumps)
1389 case ssa.BlockAMD64EQ, ssa.BlockAMD64NE,
1390 ssa.BlockAMD64LT, ssa.BlockAMD64GE,
1391 ssa.BlockAMD64LE, ssa.BlockAMD64GT,
1392 ssa.BlockAMD64OS, ssa.BlockAMD64OC,
1393 ssa.BlockAMD64ULT, ssa.BlockAMD64UGT,
1394 ssa.BlockAMD64ULE, ssa.BlockAMD64UGE:
1395 jmp := blockJump[b.Kind]
1397 case b.Succs[0].Block():
1398 s.Br(jmp.invasm, b.Succs[1].Block())
1399 case b.Succs[1].Block():
1400 s.Br(jmp.asm, b.Succs[0].Block())
1402 if b.Likely != ssa.BranchUnlikely {
1403 s.Br(jmp.asm, b.Succs[0].Block())
1404 s.Br(obj.AJMP, b.Succs[1].Block())
1406 s.Br(jmp.invasm, b.Succs[1].Block())
1407 s.Br(obj.AJMP, b.Succs[0].Block())
1411 case ssa.BlockAMD64JUMPTABLE:
1412 // JMP *(TABLE)(INDEX*8)
1413 p := s.Prog(obj.AJMP)
1414 p.To.Type = obj.TYPE_MEM
1415 p.To.Reg = b.Controls[1].Reg()
1416 p.To.Index = b.Controls[0].Reg()
1418 // Save jump tables for later resolution of the target blocks.
1419 s.JumpTables = append(s.JumpTables, b)
1422 b.Fatalf("branch not implemented: %s", b.LongString())
1426 func loadRegResult(s *ssagen.State, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
1427 p := s.Prog(loadByType(t))
1428 p.From.Type = obj.TYPE_MEM
1429 p.From.Name = obj.NAME_AUTO
1430 p.From.Sym = n.Linksym()
1431 p.From.Offset = n.FrameOffset() + off
1432 p.To.Type = obj.TYPE_REG
1437 func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
1438 p = pp.Append(p, storeByType(t), obj.TYPE_REG, reg, 0, obj.TYPE_MEM, 0, n.FrameOffset()+off)
1439 p.To.Name = obj.NAME_PARAM
1440 p.To.Sym = n.Linksym()
1441 p.Pos = p.Pos.WithNotStmt()