1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
8 "cmd/compile/internal/base"
9 "cmd/compile/internal/ir"
10 "cmd/compile/internal/logopt"
11 "cmd/compile/internal/objw"
12 "cmd/compile/internal/ssa"
13 "cmd/compile/internal/ssagen"
14 "cmd/compile/internal/types"
16 "cmd/internal/obj/ppc64"
22 // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
23 func ssaMarkMoves(s *ssagen.State, b *ssa.Block) {
24 // flive := b.FlagsLiveAtEnd
25 // if b.Control != nil && b.Control.Type.IsFlags() {
28 // for i := len(b.Values) - 1; i >= 0; i-- {
30 // if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) {
31 // // The "mark" is any non-nil Aux value.
34 // if v.Type.IsFlags() {
37 // for _, a := range v.Args {
38 // if a.Type.IsFlags() {
45 // loadByType returns the load instruction of the given type.
46 func loadByType(t *types.Type) obj.As {
78 panic("bad load type")
81 // storeByType returns the store instruction of the given type.
82 func storeByType(t *types.Type) obj.As {
102 panic("bad store type")
105 func ssaGenValue(s *ssagen.State, v *ssa.Value) {
128 case ssa.OpPPC64LoweredMuluhilo:
129 // MULHDU Rarg1, Rarg0, Reg0
130 // MULLD Rarg1, Rarg0, Reg1
131 r0 := v.Args[0].Reg()
132 r1 := v.Args[1].Reg()
133 p := s.Prog(ppc64.AMULHDU)
134 p.From.Type = obj.TYPE_REG
137 p.To.Type = obj.TYPE_REG
139 p1 := s.Prog(ppc64.AMULLD)
140 p1.From.Type = obj.TYPE_REG
143 p1.To.Type = obj.TYPE_REG
146 case ssa.OpPPC64LoweredAdd64Carry:
147 // ADDC Rarg2, -1, Rtmp
148 // ADDE Rarg1, Rarg0, Reg0
150 r0 := v.Args[0].Reg()
151 r1 := v.Args[1].Reg()
152 r2 := v.Args[2].Reg()
153 p := s.Prog(ppc64.AADDC)
154 p.From.Type = obj.TYPE_CONST
157 p.To.Type = obj.TYPE_REG
158 p.To.Reg = ppc64.REGTMP
159 p1 := s.Prog(ppc64.AADDE)
160 p1.From.Type = obj.TYPE_REG
163 p1.To.Type = obj.TYPE_REG
165 p2 := s.Prog(ppc64.AADDZE)
166 p2.From.Type = obj.TYPE_REG
167 p2.From.Reg = ppc64.REGZERO
168 p2.To.Type = obj.TYPE_REG
171 case ssa.OpPPC64LoweredAtomicAnd8,
172 ssa.OpPPC64LoweredAtomicAnd32,
173 ssa.OpPPC64LoweredAtomicOr8,
174 ssa.OpPPC64LoweredAtomicOr32:
176 // LBAR/LWAR (Rarg0), Rtmp
177 // AND/OR Rarg1, Rtmp
178 // STBCCC/STWCCC Rtmp, (Rarg0)
182 if v.Op == ssa.OpPPC64LoweredAtomicAnd32 || v.Op == ssa.OpPPC64LoweredAtomicOr32 {
186 r0 := v.Args[0].Reg()
187 r1 := v.Args[1].Reg()
188 // LWSYNC - Assuming shared data not write-through-required nor
189 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
190 plwsync := s.Prog(ppc64.ALWSYNC)
191 plwsync.To.Type = obj.TYPE_NONE
194 p.From.Type = obj.TYPE_MEM
196 p.To.Type = obj.TYPE_REG
197 p.To.Reg = ppc64.REGTMP
199 p1 := s.Prog(v.Op.Asm())
200 p1.From.Type = obj.TYPE_REG
202 p1.To.Type = obj.TYPE_REG
203 p1.To.Reg = ppc64.REGTMP
206 p2.From.Type = obj.TYPE_REG
207 p2.From.Reg = ppc64.REGTMP
208 p2.To.Type = obj.TYPE_MEM
210 p2.RegTo2 = ppc64.REGTMP
212 p3 := s.Prog(ppc64.ABNE)
213 p3.To.Type = obj.TYPE_BRANCH
216 case ssa.OpPPC64LoweredAtomicAdd32,
217 ssa.OpPPC64LoweredAtomicAdd64:
219 // LDAR/LWAR (Rarg0), Rout
221 // STDCCC/STWCCC Rout, (Rarg0)
223 // MOVW Rout,Rout (if Add32)
226 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
230 r0 := v.Args[0].Reg()
231 r1 := v.Args[1].Reg()
233 // LWSYNC - Assuming shared data not write-through-required nor
234 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
235 plwsync := s.Prog(ppc64.ALWSYNC)
236 plwsync.To.Type = obj.TYPE_NONE
239 p.From.Type = obj.TYPE_MEM
241 p.To.Type = obj.TYPE_REG
244 p1 := s.Prog(ppc64.AADD)
245 p1.From.Type = obj.TYPE_REG
248 p1.To.Type = obj.TYPE_REG
251 p3.From.Type = obj.TYPE_REG
253 p3.To.Type = obj.TYPE_MEM
256 p4 := s.Prog(ppc64.ABNE)
257 p4.To.Type = obj.TYPE_BRANCH
260 // Ensure a 32 bit result
261 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
262 p5 := s.Prog(ppc64.AMOVWZ)
263 p5.To.Type = obj.TYPE_REG
265 p5.From.Type = obj.TYPE_REG
269 case ssa.OpPPC64LoweredAtomicExchange32,
270 ssa.OpPPC64LoweredAtomicExchange64:
272 // LDAR/LWAR (Rarg0), Rout
273 // STDCCC/STWCCC Rout, (Rarg0)
278 if v.Op == ssa.OpPPC64LoweredAtomicExchange32 {
282 r0 := v.Args[0].Reg()
283 r1 := v.Args[1].Reg()
285 // LWSYNC - Assuming shared data not write-through-required nor
286 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
287 plwsync := s.Prog(ppc64.ALWSYNC)
288 plwsync.To.Type = obj.TYPE_NONE
291 p.From.Type = obj.TYPE_MEM
293 p.To.Type = obj.TYPE_REG
297 p1.From.Type = obj.TYPE_REG
299 p1.To.Type = obj.TYPE_MEM
302 p2 := s.Prog(ppc64.ABNE)
303 p2.To.Type = obj.TYPE_BRANCH
306 pisync := s.Prog(ppc64.AISYNC)
307 pisync.To.Type = obj.TYPE_NONE
309 case ssa.OpPPC64LoweredAtomicLoad8,
310 ssa.OpPPC64LoweredAtomicLoad32,
311 ssa.OpPPC64LoweredAtomicLoad64,
312 ssa.OpPPC64LoweredAtomicLoadPtr:
314 // MOVB/MOVD/MOVW (Rarg0), Rout
321 case ssa.OpPPC64LoweredAtomicLoad8:
323 case ssa.OpPPC64LoweredAtomicLoad32:
327 arg0 := v.Args[0].Reg()
329 // SYNC when AuxInt == 1; otherwise, load-acquire
331 psync := s.Prog(ppc64.ASYNC)
332 psync.To.Type = obj.TYPE_NONE
336 p.From.Type = obj.TYPE_MEM
338 p.To.Type = obj.TYPE_REG
342 p1.From.Type = obj.TYPE_REG
344 p1.To.Type = obj.TYPE_REG
347 p2 := s.Prog(ppc64.ABNE)
348 p2.To.Type = obj.TYPE_BRANCH
350 pisync := s.Prog(ppc64.AISYNC)
351 pisync.To.Type = obj.TYPE_NONE
352 p2.To.SetTarget(pisync)
354 case ssa.OpPPC64LoweredAtomicStore8,
355 ssa.OpPPC64LoweredAtomicStore32,
356 ssa.OpPPC64LoweredAtomicStore64:
358 // MOVB/MOVW/MOVD arg1,(arg0)
361 case ssa.OpPPC64LoweredAtomicStore8:
363 case ssa.OpPPC64LoweredAtomicStore32:
366 arg0 := v.Args[0].Reg()
367 arg1 := v.Args[1].Reg()
368 // If AuxInt == 0, LWSYNC (Store-Release), else SYNC
370 syncOp := ppc64.ASYNC
372 syncOp = ppc64.ALWSYNC
374 psync := s.Prog(syncOp)
375 psync.To.Type = obj.TYPE_NONE
378 p.To.Type = obj.TYPE_MEM
380 p.From.Type = obj.TYPE_REG
383 case ssa.OpPPC64LoweredAtomicCas64,
384 ssa.OpPPC64LoweredAtomicCas32:
387 // LDAR (Rarg0), MutexHint, Rtmp
390 // STDCCC Rarg2, (Rarg0)
392 // LWSYNC // Only for sequential consistency; not required in CasRel.
401 if v.Op == ssa.OpPPC64LoweredAtomicCas32 {
406 r0 := v.Args[0].Reg()
407 r1 := v.Args[1].Reg()
408 r2 := v.Args[2].Reg()
410 // LWSYNC - Assuming shared data not write-through-required nor
411 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
412 plwsync1 := s.Prog(ppc64.ALWSYNC)
413 plwsync1.To.Type = obj.TYPE_NONE
416 p.From.Type = obj.TYPE_MEM
418 p.To.Type = obj.TYPE_REG
419 p.To.Reg = ppc64.REGTMP
420 // If it is a Compare-and-Swap-Release operation, set the EH field with
427 p1.From.Type = obj.TYPE_REG
429 p1.To.Reg = ppc64.REGTMP
430 p1.To.Type = obj.TYPE_REG
432 p2 := s.Prog(ppc64.ABNE)
433 p2.To.Type = obj.TYPE_BRANCH
436 p3.From.Type = obj.TYPE_REG
438 p3.To.Type = obj.TYPE_MEM
441 p4 := s.Prog(ppc64.ABNE)
442 p4.To.Type = obj.TYPE_BRANCH
444 // LWSYNC - Assuming shared data not write-through-required nor
445 // caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b.
446 // If the operation is a CAS-Release, then synchronization is not necessary.
448 plwsync2 := s.Prog(ppc64.ALWSYNC)
449 plwsync2.To.Type = obj.TYPE_NONE
452 p5 := s.Prog(ppc64.AMOVD)
453 p5.From.Type = obj.TYPE_CONST
455 p5.To.Type = obj.TYPE_REG
458 p6 := s.Prog(obj.AJMP)
459 p6.To.Type = obj.TYPE_BRANCH
461 p7 := s.Prog(ppc64.AMOVD)
462 p7.From.Type = obj.TYPE_CONST
464 p7.To.Type = obj.TYPE_REG
468 p8 := s.Prog(obj.ANOP)
471 case ssa.OpPPC64LoweredGetClosurePtr:
472 // Closure pointer is R11 (already)
473 ssagen.CheckLoweredGetClosurePtr(v)
475 case ssa.OpPPC64LoweredGetCallerSP:
476 // caller's SP is FixedFrameSize below the address of the first arg
477 p := s.Prog(ppc64.AMOVD)
478 p.From.Type = obj.TYPE_ADDR
479 p.From.Offset = -base.Ctxt.FixedFrameSize()
480 p.From.Name = obj.NAME_PARAM
481 p.To.Type = obj.TYPE_REG
484 case ssa.OpPPC64LoweredGetCallerPC:
485 p := s.Prog(obj.AGETCALLERPC)
486 p.To.Type = obj.TYPE_REG
489 case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F:
490 // input is already rounded
493 loadOp := loadByType(v.Type)
495 ssagen.AddrAuto(&p.From, v.Args[0])
496 p.To.Type = obj.TYPE_REG
500 storeOp := storeByType(v.Type)
502 p.From.Type = obj.TYPE_REG
503 p.From.Reg = v.Args[0].Reg()
504 ssagen.AddrAuto(&p.To, v)
506 case ssa.OpArgIntReg, ssa.OpArgFloatReg:
507 // The assembler needs to wrap the entry safepoint/stack growth code with spill/unspill
508 // The loop only runs once.
509 for _, a := range v.Block.Func.RegArgs {
510 // Pass the spill/unspill information along to the assembler, offset by size of
511 // the saved LR slot.
512 addr := ssagen.SpillSlotAddr(a, ppc64.REGSP, base.Ctxt.FixedFrameSize())
513 s.FuncInfo().AddSpill(
514 obj.RegSpill{Reg: a.Reg, Addr: addr, Unspill: loadByType(a.Type), Spill: storeByType(a.Type)})
516 v.Block.Func.RegArgs = nil
518 ssagen.CheckArgReg(v)
520 case ssa.OpPPC64DIVD:
530 r0 := v.Args[0].Reg()
531 r1 := v.Args[1].Reg()
533 p := s.Prog(ppc64.ACMP)
534 p.From.Type = obj.TYPE_REG
536 p.To.Type = obj.TYPE_CONST
539 pbahead := s.Prog(ppc64.ABEQ)
540 pbahead.To.Type = obj.TYPE_BRANCH
542 p = s.Prog(v.Op.Asm())
543 p.From.Type = obj.TYPE_REG
546 p.To.Type = obj.TYPE_REG
549 pbover := s.Prog(obj.AJMP)
550 pbover.To.Type = obj.TYPE_BRANCH
552 p = s.Prog(ppc64.ANEG)
553 p.To.Type = obj.TYPE_REG
555 p.From.Type = obj.TYPE_REG
557 pbahead.To.SetTarget(p)
560 pbover.To.SetTarget(p)
562 case ssa.OpPPC64DIVW:
563 // word-width version of above
565 r0 := v.Args[0].Reg()
566 r1 := v.Args[1].Reg()
568 p := s.Prog(ppc64.ACMPW)
569 p.From.Type = obj.TYPE_REG
571 p.To.Type = obj.TYPE_CONST
574 pbahead := s.Prog(ppc64.ABEQ)
575 pbahead.To.Type = obj.TYPE_BRANCH
577 p = s.Prog(v.Op.Asm())
578 p.From.Type = obj.TYPE_REG
581 p.To.Type = obj.TYPE_REG
584 pbover := s.Prog(obj.AJMP)
585 pbover.To.Type = obj.TYPE_BRANCH
587 p = s.Prog(ppc64.ANEG)
588 p.To.Type = obj.TYPE_REG
590 p.From.Type = obj.TYPE_REG
592 pbahead.To.SetTarget(p)
595 pbover.To.SetTarget(p)
597 case ssa.OpPPC64CLRLSLWI:
599 r1 := v.Args[0].Reg()
601 p := s.Prog(v.Op.Asm())
602 // clrlslwi ra,rs,mb,sh will become rlwinm ra,rs,sh,mb-sh,31-sh as described in ISA
603 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
604 p.SetFrom3Const(ssa.GetPPC64Shiftsh(shifts))
606 p.To.Type = obj.TYPE_REG
609 case ssa.OpPPC64CLRLSLDI:
611 r1 := v.Args[0].Reg()
613 p := s.Prog(v.Op.Asm())
614 // clrlsldi ra,rs,mb,sh will become rldic ra,rs,sh,mb-sh
615 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
616 p.SetFrom3Const(ssa.GetPPC64Shiftsh(shifts))
618 p.To.Type = obj.TYPE_REG
621 // Mask has been set as sh
622 case ssa.OpPPC64RLDICL:
624 r1 := v.Args[0].Reg()
626 p := s.Prog(v.Op.Asm())
627 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)}
628 p.SetFrom3Const(ssa.GetPPC64Shiftmb(shifts))
630 p.To.Type = obj.TYPE_REG
633 case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS,
634 ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU,
635 ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW,
636 ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
637 ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
638 ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
639 ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV,
640 ssa.OpPPC64MODUD, ssa.OpPPC64MODSD, ssa.OpPPC64MODUW, ssa.OpPPC64MODSW:
642 r1 := v.Args[0].Reg()
643 r2 := v.Args[1].Reg()
644 p := s.Prog(v.Op.Asm())
645 p.From.Type = obj.TYPE_REG
648 p.To.Type = obj.TYPE_REG
651 case ssa.OpPPC64ANDCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC:
652 r1 := v.Args[0].Reg()
653 r2 := v.Args[1].Reg()
654 p := s.Prog(v.Op.Asm())
655 p.From.Type = obj.TYPE_REG
658 p.To.Type = obj.TYPE_REG
659 p.To.Reg = ppc64.REGTMP // result is not needed
661 case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
662 p := s.Prog(v.Op.Asm())
663 p.From.Type = obj.TYPE_CONST
664 p.From.Offset = v.AuxInt
665 p.Reg = v.Args[0].Reg()
666 p.To.Type = obj.TYPE_REG
669 // Auxint holds encoded rotate + mask
670 case ssa.OpPPC64RLWINM, ssa.OpPPC64RLWMI:
671 rot, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
672 p := s.Prog(v.Op.Asm())
673 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
674 p.Reg = v.Args[0].Reg()
675 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: int64(rot)}
676 p.SetRestArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}})
679 case ssa.OpPPC64RLWNM:
680 _, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
681 p := s.Prog(v.Op.Asm())
682 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
683 p.Reg = v.Args[0].Reg()
684 p.From = obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[1].Reg()}
685 p.SetRestArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}})
687 case ssa.OpPPC64MADDLD:
689 r1 := v.Args[0].Reg()
690 r2 := v.Args[1].Reg()
691 r3 := v.Args[2].Reg()
693 p := s.Prog(v.Op.Asm())
694 p.From.Type = obj.TYPE_REG
698 p.To.Type = obj.TYPE_REG
701 case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
703 r1 := v.Args[0].Reg()
704 r2 := v.Args[1].Reg()
705 r3 := v.Args[2].Reg()
707 p := s.Prog(v.Op.Asm())
708 p.From.Type = obj.TYPE_REG
712 p.To.Type = obj.TYPE_REG
715 case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL,
716 ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW,
717 ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS,
718 ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD:
720 p := s.Prog(v.Op.Asm())
721 p.To.Type = obj.TYPE_REG
723 p.From.Type = obj.TYPE_REG
724 p.From.Reg = v.Args[0].Reg()
726 case ssa.OpPPC64ADDconst, ssa.OpPPC64ANDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst,
727 ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst,
728 ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst, ssa.OpPPC64EXTSWSLconst, ssa.OpPPC64MULLWconst, ssa.OpPPC64MULLDconst:
729 p := s.Prog(v.Op.Asm())
730 p.Reg = v.Args[0].Reg()
731 p.From.Type = obj.TYPE_CONST
732 p.From.Offset = v.AuxInt
733 p.To.Type = obj.TYPE_REG
736 case ssa.OpPPC64ADDC, ssa.OpPPC64ADDE, ssa.OpPPC64SUBC, ssa.OpPPC64SUBE:
737 r := v.Reg0() // CA is the first, implied argument.
738 r1 := v.Args[0].Reg()
739 r2 := v.Args[1].Reg()
740 p := s.Prog(v.Op.Asm())
741 p.From.Type = obj.TYPE_REG
744 p.To.Type = obj.TYPE_REG
747 case ssa.OpPPC64ADDZEzero, ssa.OpPPC64SUBZEzero:
748 p := s.Prog(v.Op.Asm())
749 p.From.Type = obj.TYPE_REG
750 p.From.Reg = ppc64.REG_R0
751 p.To.Type = obj.TYPE_REG
754 case ssa.OpPPC64ADDCconst:
755 p := s.Prog(v.Op.Asm())
756 p.Reg = v.Args[0].Reg()
757 p.From.Type = obj.TYPE_CONST
758 p.From.Offset = v.AuxInt
759 p.To.Type = obj.TYPE_REG
760 // Output is a pair, the second is the CA, which is implied.
763 case ssa.OpPPC64SUBCconst:
764 p := s.Prog(v.Op.Asm())
765 p.SetFrom3Const(v.AuxInt)
766 p.From.Type = obj.TYPE_REG
767 p.From.Reg = v.Args[0].Reg()
768 p.To.Type = obj.TYPE_REG
771 case ssa.OpPPC64SUBFCconst:
772 p := s.Prog(v.Op.Asm())
773 p.SetFrom3Const(v.AuxInt)
774 p.From.Type = obj.TYPE_REG
775 p.From.Reg = v.Args[0].Reg()
776 p.To.Type = obj.TYPE_REG
779 case ssa.OpPPC64ANDCCconst:
780 p := s.Prog(v.Op.Asm())
781 p.Reg = v.Args[0].Reg()
782 p.From.Type = obj.TYPE_CONST
783 p.From.Offset = v.AuxInt
784 p.To.Type = obj.TYPE_REG
785 p.To.Reg = ppc64.REGTMP // discard result
787 case ssa.OpPPC64MOVDaddr:
788 switch v.Aux.(type) {
790 v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux)
792 // If aux offset and aux int are both 0, and the same
793 // input and output regs are used, no instruction
794 // needs to be generated, since it would just be
796 if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() {
797 p := s.Prog(ppc64.AMOVD)
798 p.From.Type = obj.TYPE_ADDR
799 p.From.Reg = v.Args[0].Reg()
800 p.From.Offset = v.AuxInt
801 p.To.Type = obj.TYPE_REG
805 case *obj.LSym, ir.Node:
806 p := s.Prog(ppc64.AMOVD)
807 p.From.Type = obj.TYPE_ADDR
808 p.From.Reg = v.Args[0].Reg()
809 p.To.Type = obj.TYPE_REG
811 ssagen.AddAux(&p.From, v)
815 case ssa.OpPPC64MOVDconst:
816 p := s.Prog(v.Op.Asm())
817 p.From.Type = obj.TYPE_CONST
818 p.From.Offset = v.AuxInt
819 p.To.Type = obj.TYPE_REG
822 case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst:
823 p := s.Prog(v.Op.Asm())
824 p.From.Type = obj.TYPE_FCONST
825 p.From.Val = math.Float64frombits(uint64(v.AuxInt))
826 p.To.Type = obj.TYPE_REG
829 case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU:
830 p := s.Prog(v.Op.Asm())
831 p.From.Type = obj.TYPE_REG
832 p.From.Reg = v.Args[0].Reg()
833 p.To.Type = obj.TYPE_REG
834 p.To.Reg = v.Args[1].Reg()
836 case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst:
837 p := s.Prog(v.Op.Asm())
838 p.From.Type = obj.TYPE_REG
839 p.From.Reg = v.Args[0].Reg()
840 p.To.Type = obj.TYPE_CONST
841 p.To.Offset = v.AuxInt
843 case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg:
844 // Shift in register to required size
845 p := s.Prog(v.Op.Asm())
846 p.From.Type = obj.TYPE_REG
847 p.From.Reg = v.Args[0].Reg()
849 p.To.Type = obj.TYPE_REG
851 case ssa.OpPPC64MOVDload, ssa.OpPPC64MOVWload:
853 // MOVDload and MOVWload are DS form instructions that are restricted to
854 // offsets that are a multiple of 4. If the offset is not a multple of 4,
855 // then the address of the symbol to be loaded is computed (base + offset)
856 // and used as the new base register and the offset field in the instruction
857 // can be set to zero.
859 // This same problem can happen with gostrings since the final offset is not
860 // known yet, but could be unaligned after the relocation is resolved.
861 // So gostrings are handled the same way.
863 // This allows the MOVDload and MOVWload to be generated in more cases and
864 // eliminates some offset and alignment checking in the rules file.
866 fromAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
867 ssagen.AddAux(&fromAddr, v)
871 switch fromAddr.Name {
872 case obj.NAME_EXTERN, obj.NAME_STATIC:
873 // Special case for a rule combines the bytes of gostring.
874 // The v alignment might seem OK, but we don't want to load it
875 // using an offset because relocation comes later.
876 genAddr = strings.HasPrefix(fromAddr.Sym.Name, "go.string") || v.Type.Alignment()%4 != 0 || fromAddr.Offset%4 != 0
878 genAddr = fromAddr.Offset%4 != 0
881 // Load full address into the temp register.
882 p := s.Prog(ppc64.AMOVD)
883 p.From.Type = obj.TYPE_ADDR
884 p.From.Reg = v.Args[0].Reg()
885 ssagen.AddAux(&p.From, v)
886 // Load target using temp as base register
887 // and offset zero. Setting NAME_NONE
888 // prevents any extra offsets from being
890 p.To.Type = obj.TYPE_REG
891 p.To.Reg = ppc64.REGTMP
892 fromAddr.Reg = ppc64.REGTMP
893 // Clear the offset field and other
894 // information that might be used
895 // by the assembler to add to the
896 // final offset value.
898 fromAddr.Name = obj.NAME_NONE
901 p := s.Prog(v.Op.Asm())
903 p.To.Type = obj.TYPE_REG
907 case ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
908 p := s.Prog(v.Op.Asm())
909 p.From.Type = obj.TYPE_MEM
910 p.From.Reg = v.Args[0].Reg()
911 ssagen.AddAux(&p.From, v)
912 p.To.Type = obj.TYPE_REG
915 case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload:
916 p := s.Prog(v.Op.Asm())
917 p.From.Type = obj.TYPE_MEM
918 p.From.Reg = v.Args[0].Reg()
919 p.To.Type = obj.TYPE_REG
922 case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore:
923 p := s.Prog(v.Op.Asm())
924 p.To.Type = obj.TYPE_MEM
925 p.To.Reg = v.Args[0].Reg()
926 p.From.Type = obj.TYPE_REG
927 p.From.Reg = v.Args[1].Reg()
929 case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx,
930 ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx,
931 ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx:
932 p := s.Prog(v.Op.Asm())
933 p.From.Type = obj.TYPE_MEM
934 p.From.Reg = v.Args[0].Reg()
935 p.From.Index = v.Args[1].Reg()
936 p.To.Type = obj.TYPE_REG
939 case ssa.OpPPC64DCBT:
940 p := s.Prog(v.Op.Asm())
941 p.From.Type = obj.TYPE_MEM
942 p.From.Reg = v.Args[0].Reg()
943 p.To.Type = obj.TYPE_CONST
944 p.To.Offset = v.AuxInt
946 case ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero:
947 p := s.Prog(v.Op.Asm())
948 p.From.Type = obj.TYPE_REG
949 p.From.Reg = ppc64.REGZERO
950 p.To.Type = obj.TYPE_MEM
951 p.To.Reg = v.Args[0].Reg()
952 ssagen.AddAux(&p.To, v)
954 case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVDstorezero:
956 // MOVDstore and MOVDstorezero become DS form instructions that are restricted
957 // to offset values that are a multple of 4. If the offset field is not a
958 // multiple of 4, then the full address of the store target is computed (base +
959 // offset) and used as the new base register and the offset in the instruction
962 // This allows the MOVDstore and MOVDstorezero to be generated in more cases,
963 // and prevents checking of the offset value and alignment in the rules.
965 toAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
966 ssagen.AddAux(&toAddr, v)
968 if toAddr.Offset%4 != 0 {
969 p := s.Prog(ppc64.AMOVD)
970 p.From.Type = obj.TYPE_ADDR
971 p.From.Reg = v.Args[0].Reg()
972 ssagen.AddAux(&p.From, v)
973 p.To.Type = obj.TYPE_REG
974 p.To.Reg = ppc64.REGTMP
975 toAddr.Reg = ppc64.REGTMP
976 // Clear the offset field and other
977 // information that might be used
978 // by the assembler to add to the
979 // final offset value.
981 toAddr.Name = obj.NAME_NONE
984 p := s.Prog(v.Op.Asm())
986 p.From.Type = obj.TYPE_REG
987 if v.Op == ssa.OpPPC64MOVDstorezero {
988 p.From.Reg = ppc64.REGZERO
990 p.From.Reg = v.Args[1].Reg()
993 case ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore:
994 p := s.Prog(v.Op.Asm())
995 p.From.Type = obj.TYPE_REG
996 p.From.Reg = v.Args[1].Reg()
997 p.To.Type = obj.TYPE_MEM
998 p.To.Reg = v.Args[0].Reg()
999 ssagen.AddAux(&p.To, v)
1001 case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx,
1002 ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx,
1003 ssa.OpPPC64MOVHBRstoreidx:
1004 p := s.Prog(v.Op.Asm())
1005 p.From.Type = obj.TYPE_REG
1006 p.From.Reg = v.Args[2].Reg()
1007 p.To.Index = v.Args[1].Reg()
1008 p.To.Type = obj.TYPE_MEM
1009 p.To.Reg = v.Args[0].Reg()
1011 case ssa.OpPPC64ISEL, ssa.OpPPC64ISELB:
1013 // AuxInt value indicates condition: 0=LT 1=GT 2=EQ 4=GE 5=LE 6=NE
1014 // ISEL only accepts 0, 1, 2 condition values but the others can be
1015 // achieved by swapping operand order.
1016 // arg0 ? arg1 : arg2 with conditions LT, GT, EQ
1017 // arg0 ? arg2 : arg1 for conditions GE, LE, NE
1018 // ISELB is used when a boolean result is needed, returning 0 or 1
1019 p := s.Prog(ppc64.AISEL)
1020 p.To.Type = obj.TYPE_REG
1022 // For ISELB, boolean result 0 or 1. Use R0 for 0 operand to avoid load.
1023 r := obj.Addr{Type: obj.TYPE_REG, Reg: ppc64.REG_R0}
1024 if v.Op == ssa.OpPPC64ISEL {
1025 r.Reg = v.Args[1].Reg()
1027 // AuxInt values 4,5,6 implemented with reverse operand order from 0,1,2
1030 p.SetFrom3Reg(v.Args[0].Reg())
1032 p.Reg = v.Args[0].Reg()
1035 p.From.Type = obj.TYPE_CONST
1036 p.From.Offset = v.AuxInt & 3
1038 case ssa.OpPPC64LoweredQuadZero, ssa.OpPPC64LoweredQuadZeroShort:
1039 // The LoweredQuad code generation
1040 // generates STXV instructions on
1041 // power9. The Short variation is used
1042 // if no loop is generated.
1044 // sizes >= 64 generate a loop as follows:
1046 // Set up loop counter in CTR, used by BC
1047 // XXLXOR clears VS32
1048 // XXLXOR VS32,VS32,VS32
1049 // MOVD len/64,REG_TMP
1053 // STXV VS32,16(R20)
1054 // STXV VS32,32(R20)
1055 // STXV VS32,48(R20)
1059 // Bytes per iteration
1060 ctr := v.AuxInt / 64
1063 rem := v.AuxInt % 64
1065 // Only generate a loop if there is more
1066 // than 1 iteration.
1068 // Set up VS32 (V0) to hold 0s
1069 p := s.Prog(ppc64.AXXLXOR)
1070 p.From.Type = obj.TYPE_REG
1071 p.From.Reg = ppc64.REG_VS32
1072 p.To.Type = obj.TYPE_REG
1073 p.To.Reg = ppc64.REG_VS32
1074 p.Reg = ppc64.REG_VS32
1076 // Set up CTR loop counter
1077 p = s.Prog(ppc64.AMOVD)
1078 p.From.Type = obj.TYPE_CONST
1080 p.To.Type = obj.TYPE_REG
1081 p.To.Reg = ppc64.REGTMP
1083 p = s.Prog(ppc64.AMOVD)
1084 p.From.Type = obj.TYPE_REG
1085 p.From.Reg = ppc64.REGTMP
1086 p.To.Type = obj.TYPE_REG
1087 p.To.Reg = ppc64.REG_CTR
1089 // Don't generate padding for
1090 // loops with few iterations.
1092 p = s.Prog(obj.APCALIGN)
1093 p.From.Type = obj.TYPE_CONST
1097 // generate 4 STXVs to zero 64 bytes
1100 p = s.Prog(ppc64.ASTXV)
1101 p.From.Type = obj.TYPE_REG
1102 p.From.Reg = ppc64.REG_VS32
1103 p.To.Type = obj.TYPE_MEM
1104 p.To.Reg = v.Args[0].Reg()
1106 // Save the top of loop
1110 p = s.Prog(ppc64.ASTXV)
1111 p.From.Type = obj.TYPE_REG
1112 p.From.Reg = ppc64.REG_VS32
1113 p.To.Type = obj.TYPE_MEM
1114 p.To.Reg = v.Args[0].Reg()
1117 p = s.Prog(ppc64.ASTXV)
1118 p.From.Type = obj.TYPE_REG
1119 p.From.Reg = ppc64.REG_VS32
1120 p.To.Type = obj.TYPE_MEM
1121 p.To.Reg = v.Args[0].Reg()
1124 p = s.Prog(ppc64.ASTXV)
1125 p.From.Type = obj.TYPE_REG
1126 p.From.Reg = ppc64.REG_VS32
1127 p.To.Type = obj.TYPE_MEM
1128 p.To.Reg = v.Args[0].Reg()
1131 // Increment address for the
1132 // 64 bytes just zeroed.
1133 p = s.Prog(ppc64.AADD)
1134 p.Reg = v.Args[0].Reg()
1135 p.From.Type = obj.TYPE_CONST
1137 p.To.Type = obj.TYPE_REG
1138 p.To.Reg = v.Args[0].Reg()
1140 // Branch back to top of loop
1142 // BC with BO_BCTR generates bdnz
1143 p = s.Prog(ppc64.ABC)
1144 p.From.Type = obj.TYPE_CONST
1145 p.From.Offset = ppc64.BO_BCTR
1146 p.Reg = ppc64.REG_R0
1147 p.To.Type = obj.TYPE_BRANCH
1150 // When ctr == 1 the loop was not generated but
1151 // there are at least 64 bytes to clear, so add
1152 // that to the remainder to generate the code
1153 // to clear those doublewords
1158 // Clear the remainder starting at offset zero
1161 if rem >= 16 && ctr <= 1 {
1162 // If the XXLXOR hasn't already been
1163 // generated, do it here to initialize
1165 p := s.Prog(ppc64.AXXLXOR)
1166 p.From.Type = obj.TYPE_REG
1167 p.From.Reg = ppc64.REG_VS32
1168 p.To.Type = obj.TYPE_REG
1169 p.To.Reg = ppc64.REG_VS32
1170 p.Reg = ppc64.REG_VS32
1172 // Generate STXV for 32 or 64
1175 p := s.Prog(ppc64.ASTXV)
1176 p.From.Type = obj.TYPE_REG
1177 p.From.Reg = ppc64.REG_VS32
1178 p.To.Type = obj.TYPE_MEM
1179 p.To.Reg = v.Args[0].Reg()
1180 p.To.Offset = offset
1182 p = s.Prog(ppc64.ASTXV)
1183 p.From.Type = obj.TYPE_REG
1184 p.From.Reg = ppc64.REG_VS32
1185 p.To.Type = obj.TYPE_MEM
1186 p.To.Reg = v.Args[0].Reg()
1187 p.To.Offset = offset + 16
1191 // Generate 16 bytes
1193 p := s.Prog(ppc64.ASTXV)
1194 p.From.Type = obj.TYPE_REG
1195 p.From.Reg = ppc64.REG_VS32
1196 p.To.Type = obj.TYPE_MEM
1197 p.To.Reg = v.Args[0].Reg()
1198 p.To.Offset = offset
1203 // first clear as many doublewords as possible
1204 // then clear remaining sizes as available
1206 op, size := ppc64.AMOVB, int64(1)
1209 op, size = ppc64.AMOVD, 8
1211 op, size = ppc64.AMOVW, 4
1213 op, size = ppc64.AMOVH, 2
1216 p.From.Type = obj.TYPE_REG
1217 p.From.Reg = ppc64.REG_R0
1218 p.To.Type = obj.TYPE_MEM
1219 p.To.Reg = v.Args[0].Reg()
1220 p.To.Offset = offset
1225 case ssa.OpPPC64LoweredZero, ssa.OpPPC64LoweredZeroShort:
1227 // Unaligned data doesn't hurt performance
1228 // for these instructions on power8.
1230 // For sizes >= 64 generate a loop as follows:
1232 // Set up loop counter in CTR, used by BC
1233 // XXLXOR VS32,VS32,VS32
1234 // MOVD len/32,REG_TMP
1238 // STXVD2X VS32,(R0)(R20)
1239 // STXVD2X VS32,(R31)(R20)
1243 // any remainder is done as described below
1245 // for sizes < 64 bytes, first clear as many doublewords as possible,
1246 // then handle the remainder
1251 // the remainder bytes are cleared using one or more
1252 // of the following instructions with the appropriate
1253 // offsets depending which instructions are needed
1255 // MOVW R0,n1(R20) 4 bytes
1256 // MOVH R0,n2(R20) 2 bytes
1257 // MOVB R0,n3(R20) 1 byte
1259 // 7 bytes: MOVW, MOVH, MOVB
1260 // 6 bytes: MOVW, MOVH
1261 // 5 bytes: MOVW, MOVB
1262 // 3 bytes: MOVH, MOVB
1264 // each loop iteration does 32 bytes
1265 ctr := v.AuxInt / 32
1268 rem := v.AuxInt % 32
1270 // only generate a loop if there is more
1271 // than 1 iteration.
1273 // Set up VS32 (V0) to hold 0s
1274 p := s.Prog(ppc64.AXXLXOR)
1275 p.From.Type = obj.TYPE_REG
1276 p.From.Reg = ppc64.REG_VS32
1277 p.To.Type = obj.TYPE_REG
1278 p.To.Reg = ppc64.REG_VS32
1279 p.Reg = ppc64.REG_VS32
1281 // Set up CTR loop counter
1282 p = s.Prog(ppc64.AMOVD)
1283 p.From.Type = obj.TYPE_CONST
1285 p.To.Type = obj.TYPE_REG
1286 p.To.Reg = ppc64.REGTMP
1288 p = s.Prog(ppc64.AMOVD)
1289 p.From.Type = obj.TYPE_REG
1290 p.From.Reg = ppc64.REGTMP
1291 p.To.Type = obj.TYPE_REG
1292 p.To.Reg = ppc64.REG_CTR
1294 // Set up R31 to hold index value 16
1295 p = s.Prog(ppc64.AMOVD)
1296 p.From.Type = obj.TYPE_CONST
1298 p.To.Type = obj.TYPE_REG
1299 p.To.Reg = ppc64.REGTMP
1301 // Don't add padding for alignment
1302 // with few loop iterations.
1304 p = s.Prog(obj.APCALIGN)
1305 p.From.Type = obj.TYPE_CONST
1309 // generate 2 STXVD2Xs to store 16 bytes
1310 // when this is a loop then the top must be saved
1312 // This is the top of loop
1314 p = s.Prog(ppc64.ASTXVD2X)
1315 p.From.Type = obj.TYPE_REG
1316 p.From.Reg = ppc64.REG_VS32
1317 p.To.Type = obj.TYPE_MEM
1318 p.To.Reg = v.Args[0].Reg()
1319 p.To.Index = ppc64.REGZERO
1320 // Save the top of loop
1324 p = s.Prog(ppc64.ASTXVD2X)
1325 p.From.Type = obj.TYPE_REG
1326 p.From.Reg = ppc64.REG_VS32
1327 p.To.Type = obj.TYPE_MEM
1328 p.To.Reg = v.Args[0].Reg()
1329 p.To.Index = ppc64.REGTMP
1331 // Increment address for the
1332 // 4 doublewords just zeroed.
1333 p = s.Prog(ppc64.AADD)
1334 p.Reg = v.Args[0].Reg()
1335 p.From.Type = obj.TYPE_CONST
1337 p.To.Type = obj.TYPE_REG
1338 p.To.Reg = v.Args[0].Reg()
1340 // Branch back to top of loop
1342 // BC with BO_BCTR generates bdnz
1343 p = s.Prog(ppc64.ABC)
1344 p.From.Type = obj.TYPE_CONST
1345 p.From.Offset = ppc64.BO_BCTR
1346 p.Reg = ppc64.REG_R0
1347 p.To.Type = obj.TYPE_BRANCH
1351 // when ctr == 1 the loop was not generated but
1352 // there are at least 32 bytes to clear, so add
1353 // that to the remainder to generate the code
1354 // to clear those doublewords
1359 // clear the remainder starting at offset zero
1362 // first clear as many doublewords as possible
1363 // then clear remaining sizes as available
1365 op, size := ppc64.AMOVB, int64(1)
1368 op, size = ppc64.AMOVD, 8
1370 op, size = ppc64.AMOVW, 4
1372 op, size = ppc64.AMOVH, 2
1375 p.From.Type = obj.TYPE_REG
1376 p.From.Reg = ppc64.REG_R0
1377 p.To.Type = obj.TYPE_MEM
1378 p.To.Reg = v.Args[0].Reg()
1379 p.To.Offset = offset
1384 case ssa.OpPPC64LoweredMove, ssa.OpPPC64LoweredMoveShort:
1386 bytesPerLoop := int64(32)
1387 // This will be used when moving more
1388 // than 8 bytes. Moves start with
1389 // as many 8 byte moves as possible, then
1390 // 4, 2, or 1 byte(s) as remaining. This will
1391 // work and be efficient for power8 or later.
1392 // If there are 64 or more bytes, then a
1393 // loop is generated to move 32 bytes and
1394 // update the src and dst addresses on each
1395 // iteration. When < 64 bytes, the appropriate
1396 // number of moves are generated based on the
1398 // When moving >= 64 bytes a loop is used
1399 // MOVD len/32,REG_TMP
1403 // LXVD2X (R0)(R21),VS32
1404 // LXVD2X (R31)(R21),VS33
1406 // STXVD2X VS32,(R0)(R20)
1407 // STXVD2X VS33,(R31)(R20)
1410 // Bytes not moved by this loop are moved
1411 // with a combination of the following instructions,
1412 // starting with the largest sizes and generating as
1413 // many as needed, using the appropriate offset value.
1423 // Each loop iteration moves 32 bytes
1424 ctr := v.AuxInt / bytesPerLoop
1426 // Remainder after the loop
1427 rem := v.AuxInt % bytesPerLoop
1429 dstReg := v.Args[0].Reg()
1430 srcReg := v.Args[1].Reg()
1432 // The set of registers used here, must match the clobbered reg list
1438 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1441 p := s.Prog(ppc64.AMOVD)
1442 p.From.Type = obj.TYPE_CONST
1444 p.To.Type = obj.TYPE_REG
1445 p.To.Reg = ppc64.REGTMP
1447 p = s.Prog(ppc64.AMOVD)
1448 p.From.Type = obj.TYPE_REG
1449 p.From.Reg = ppc64.REGTMP
1450 p.To.Type = obj.TYPE_REG
1451 p.To.Reg = ppc64.REG_CTR
1453 // Use REGTMP as index reg
1454 p = s.Prog(ppc64.AMOVD)
1455 p.From.Type = obj.TYPE_CONST
1457 p.To.Type = obj.TYPE_REG
1458 p.To.Reg = ppc64.REGTMP
1460 // Don't adding padding for
1461 // alignment with small iteration
1464 p = s.Prog(obj.APCALIGN)
1465 p.From.Type = obj.TYPE_CONST
1469 // Generate 16 byte loads and stores.
1470 // Use temp register for index (16)
1471 // on the second one.
1473 p = s.Prog(ppc64.ALXVD2X)
1474 p.From.Type = obj.TYPE_MEM
1476 p.From.Index = ppc64.REGZERO
1477 p.To.Type = obj.TYPE_REG
1478 p.To.Reg = ppc64.REG_VS32
1482 p = s.Prog(ppc64.ALXVD2X)
1483 p.From.Type = obj.TYPE_MEM
1485 p.From.Index = ppc64.REGTMP
1486 p.To.Type = obj.TYPE_REG
1487 p.To.Reg = ppc64.REG_VS33
1489 // increment the src reg for next iteration
1490 p = s.Prog(ppc64.AADD)
1492 p.From.Type = obj.TYPE_CONST
1493 p.From.Offset = bytesPerLoop
1494 p.To.Type = obj.TYPE_REG
1497 // generate 16 byte stores
1498 p = s.Prog(ppc64.ASTXVD2X)
1499 p.From.Type = obj.TYPE_REG
1500 p.From.Reg = ppc64.REG_VS32
1501 p.To.Type = obj.TYPE_MEM
1503 p.To.Index = ppc64.REGZERO
1505 p = s.Prog(ppc64.ASTXVD2X)
1506 p.From.Type = obj.TYPE_REG
1507 p.From.Reg = ppc64.REG_VS33
1508 p.To.Type = obj.TYPE_MEM
1510 p.To.Index = ppc64.REGTMP
1512 // increment the dst reg for next iteration
1513 p = s.Prog(ppc64.AADD)
1515 p.From.Type = obj.TYPE_CONST
1516 p.From.Offset = bytesPerLoop
1517 p.To.Type = obj.TYPE_REG
1520 // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1522 p = s.Prog(ppc64.ABC)
1523 p.From.Type = obj.TYPE_CONST
1524 p.From.Offset = ppc64.BO_BCTR
1525 p.Reg = ppc64.REG_R0
1526 p.To.Type = obj.TYPE_BRANCH
1529 // srcReg and dstReg were incremented in the loop, so
1530 // later instructions start with offset 0.
1534 // No loop was generated for one iteration, so
1535 // add 32 bytes to the remainder to move those bytes.
1541 // Generate 16 byte loads and stores.
1542 // Use temp register for index (value 16)
1543 // on the second one.
1544 p := s.Prog(ppc64.ALXVD2X)
1545 p.From.Type = obj.TYPE_MEM
1547 p.From.Index = ppc64.REGZERO
1548 p.To.Type = obj.TYPE_REG
1549 p.To.Reg = ppc64.REG_VS32
1551 p = s.Prog(ppc64.ASTXVD2X)
1552 p.From.Type = obj.TYPE_REG
1553 p.From.Reg = ppc64.REG_VS32
1554 p.To.Type = obj.TYPE_MEM
1556 p.To.Index = ppc64.REGZERO
1562 // Use REGTMP as index reg
1563 p := s.Prog(ppc64.AMOVD)
1564 p.From.Type = obj.TYPE_CONST
1566 p.To.Type = obj.TYPE_REG
1567 p.To.Reg = ppc64.REGTMP
1569 p = s.Prog(ppc64.ALXVD2X)
1570 p.From.Type = obj.TYPE_MEM
1572 p.From.Index = ppc64.REGTMP
1573 p.To.Type = obj.TYPE_REG
1574 p.To.Reg = ppc64.REG_VS32
1576 p = s.Prog(ppc64.ASTXVD2X)
1577 p.From.Type = obj.TYPE_REG
1578 p.From.Reg = ppc64.REG_VS32
1579 p.To.Type = obj.TYPE_MEM
1581 p.To.Index = ppc64.REGTMP
1588 // Generate all the remaining load and store pairs, starting with
1589 // as many 8 byte moves as possible, then 4, 2, 1.
1591 op, size := ppc64.AMOVB, int64(1)
1594 op, size = ppc64.AMOVD, 8
1596 op, size = ppc64.AMOVWZ, 4
1598 op, size = ppc64.AMOVH, 2
1602 p.To.Type = obj.TYPE_REG
1603 p.To.Reg = ppc64.REGTMP
1604 p.From.Type = obj.TYPE_MEM
1606 p.From.Offset = offset
1610 p.From.Type = obj.TYPE_REG
1611 p.From.Reg = ppc64.REGTMP
1612 p.To.Type = obj.TYPE_MEM
1614 p.To.Offset = offset
1619 case ssa.OpPPC64LoweredQuadMove, ssa.OpPPC64LoweredQuadMoveShort:
1620 bytesPerLoop := int64(64)
1621 // This is used when moving more
1622 // than 8 bytes on power9. Moves start with
1623 // as many 8 byte moves as possible, then
1624 // 4, 2, or 1 byte(s) as remaining. This will
1625 // work and be efficient for power8 or later.
1626 // If there are 64 or more bytes, then a
1627 // loop is generated to move 32 bytes and
1628 // update the src and dst addresses on each
1629 // iteration. When < 64 bytes, the appropriate
1630 // number of moves are generated based on the
1632 // When moving >= 64 bytes a loop is used
1633 // MOVD len/32,REG_TMP
1640 // STXV VS33,16(R20)
1643 // Bytes not moved by this loop are moved
1644 // with a combination of the following instructions,
1645 // starting with the largest sizes and generating as
1646 // many as needed, using the appropriate offset value.
1656 // Each loop iteration moves 32 bytes
1657 ctr := v.AuxInt / bytesPerLoop
1659 // Remainder after the loop
1660 rem := v.AuxInt % bytesPerLoop
1662 dstReg := v.Args[0].Reg()
1663 srcReg := v.Args[1].Reg()
1670 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1673 p := s.Prog(ppc64.AMOVD)
1674 p.From.Type = obj.TYPE_CONST
1676 p.To.Type = obj.TYPE_REG
1677 p.To.Reg = ppc64.REGTMP
1679 p = s.Prog(ppc64.AMOVD)
1680 p.From.Type = obj.TYPE_REG
1681 p.From.Reg = ppc64.REGTMP
1682 p.To.Type = obj.TYPE_REG
1683 p.To.Reg = ppc64.REG_CTR
1685 p = s.Prog(obj.APCALIGN)
1686 p.From.Type = obj.TYPE_CONST
1689 // Generate 16 byte loads and stores.
1690 p = s.Prog(ppc64.ALXV)
1691 p.From.Type = obj.TYPE_MEM
1693 p.From.Offset = offset
1694 p.To.Type = obj.TYPE_REG
1695 p.To.Reg = ppc64.REG_VS32
1699 p = s.Prog(ppc64.ALXV)
1700 p.From.Type = obj.TYPE_MEM
1702 p.From.Offset = offset + 16
1703 p.To.Type = obj.TYPE_REG
1704 p.To.Reg = ppc64.REG_VS33
1706 // generate 16 byte stores
1707 p = s.Prog(ppc64.ASTXV)
1708 p.From.Type = obj.TYPE_REG
1709 p.From.Reg = ppc64.REG_VS32
1710 p.To.Type = obj.TYPE_MEM
1712 p.To.Offset = offset
1714 p = s.Prog(ppc64.ASTXV)
1715 p.From.Type = obj.TYPE_REG
1716 p.From.Reg = ppc64.REG_VS33
1717 p.To.Type = obj.TYPE_MEM
1719 p.To.Offset = offset + 16
1721 // Generate 16 byte loads and stores.
1722 p = s.Prog(ppc64.ALXV)
1723 p.From.Type = obj.TYPE_MEM
1725 p.From.Offset = offset + 32
1726 p.To.Type = obj.TYPE_REG
1727 p.To.Reg = ppc64.REG_VS32
1729 p = s.Prog(ppc64.ALXV)
1730 p.From.Type = obj.TYPE_MEM
1732 p.From.Offset = offset + 48
1733 p.To.Type = obj.TYPE_REG
1734 p.To.Reg = ppc64.REG_VS33
1736 // generate 16 byte stores
1737 p = s.Prog(ppc64.ASTXV)
1738 p.From.Type = obj.TYPE_REG
1739 p.From.Reg = ppc64.REG_VS32
1740 p.To.Type = obj.TYPE_MEM
1742 p.To.Offset = offset + 32
1744 p = s.Prog(ppc64.ASTXV)
1745 p.From.Type = obj.TYPE_REG
1746 p.From.Reg = ppc64.REG_VS33
1747 p.To.Type = obj.TYPE_MEM
1749 p.To.Offset = offset + 48
1751 // increment the src reg for next iteration
1752 p = s.Prog(ppc64.AADD)
1754 p.From.Type = obj.TYPE_CONST
1755 p.From.Offset = bytesPerLoop
1756 p.To.Type = obj.TYPE_REG
1759 // increment the dst reg for next iteration
1760 p = s.Prog(ppc64.AADD)
1762 p.From.Type = obj.TYPE_CONST
1763 p.From.Offset = bytesPerLoop
1764 p.To.Type = obj.TYPE_REG
1767 // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1769 p = s.Prog(ppc64.ABC)
1770 p.From.Type = obj.TYPE_CONST
1771 p.From.Offset = ppc64.BO_BCTR
1772 p.Reg = ppc64.REG_R0
1773 p.To.Type = obj.TYPE_BRANCH
1776 // srcReg and dstReg were incremented in the loop, so
1777 // later instructions start with offset 0.
1781 // No loop was generated for one iteration, so
1782 // add 32 bytes to the remainder to move those bytes.
1787 p := s.Prog(ppc64.ALXV)
1788 p.From.Type = obj.TYPE_MEM
1790 p.To.Type = obj.TYPE_REG
1791 p.To.Reg = ppc64.REG_VS32
1793 p = s.Prog(ppc64.ALXV)
1794 p.From.Type = obj.TYPE_MEM
1797 p.To.Type = obj.TYPE_REG
1798 p.To.Reg = ppc64.REG_VS33
1800 p = s.Prog(ppc64.ASTXV)
1801 p.From.Type = obj.TYPE_REG
1802 p.From.Reg = ppc64.REG_VS32
1803 p.To.Type = obj.TYPE_MEM
1806 p = s.Prog(ppc64.ASTXV)
1807 p.From.Type = obj.TYPE_REG
1808 p.From.Reg = ppc64.REG_VS33
1809 p.To.Type = obj.TYPE_MEM
1818 // Generate 16 byte loads and stores.
1819 p := s.Prog(ppc64.ALXV)
1820 p.From.Type = obj.TYPE_MEM
1822 p.From.Offset = offset
1823 p.To.Type = obj.TYPE_REG
1824 p.To.Reg = ppc64.REG_VS32
1826 p = s.Prog(ppc64.ASTXV)
1827 p.From.Type = obj.TYPE_REG
1828 p.From.Reg = ppc64.REG_VS32
1829 p.To.Type = obj.TYPE_MEM
1831 p.To.Offset = offset
1837 p := s.Prog(ppc64.ALXV)
1838 p.From.Type = obj.TYPE_MEM
1840 p.From.Offset = offset
1841 p.To.Type = obj.TYPE_REG
1842 p.To.Reg = ppc64.REG_VS32
1844 p = s.Prog(ppc64.ASTXV)
1845 p.From.Type = obj.TYPE_REG
1846 p.From.Reg = ppc64.REG_VS32
1847 p.To.Type = obj.TYPE_MEM
1849 p.To.Offset = offset
1855 // Generate all the remaining load and store pairs, starting with
1856 // as many 8 byte moves as possible, then 4, 2, 1.
1858 op, size := ppc64.AMOVB, int64(1)
1861 op, size = ppc64.AMOVD, 8
1863 op, size = ppc64.AMOVWZ, 4
1865 op, size = ppc64.AMOVH, 2
1869 p.To.Type = obj.TYPE_REG
1870 p.To.Reg = ppc64.REGTMP
1871 p.From.Type = obj.TYPE_MEM
1873 p.From.Offset = offset
1877 p.From.Type = obj.TYPE_REG
1878 p.From.Reg = ppc64.REGTMP
1879 p.To.Type = obj.TYPE_MEM
1881 p.To.Offset = offset
1886 case ssa.OpPPC64CALLstatic:
1889 case ssa.OpPPC64CALLtail:
1892 case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter:
1893 p := s.Prog(ppc64.AMOVD)
1894 p.From.Type = obj.TYPE_REG
1895 p.From.Reg = v.Args[0].Reg()
1896 p.To.Type = obj.TYPE_REG
1897 p.To.Reg = ppc64.REG_LR
1899 if v.Args[0].Reg() != ppc64.REG_R12 {
1900 v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg)
1904 pp.To.Reg = ppc64.REG_LR
1906 // Insert a hint this is not a subroutine return.
1909 if base.Ctxt.Flag_shared {
1910 // When compiling Go into PIC, the function we just
1911 // called via pointer might have been implemented in
1912 // a separate module and so overwritten the TOC
1913 // pointer in R2; reload it.
1914 q := s.Prog(ppc64.AMOVD)
1915 q.From.Type = obj.TYPE_MEM
1917 q.From.Reg = ppc64.REGSP
1918 q.To.Type = obj.TYPE_REG
1919 q.To.Reg = ppc64.REG_R2
1922 case ssa.OpPPC64LoweredWB:
1923 p := s.Prog(obj.ACALL)
1924 p.To.Type = obj.TYPE_MEM
1925 p.To.Name = obj.NAME_EXTERN
1926 p.To.Sym = v.Aux.(*obj.LSym)
1928 case ssa.OpPPC64LoweredPanicBoundsA, ssa.OpPPC64LoweredPanicBoundsB, ssa.OpPPC64LoweredPanicBoundsC:
1929 p := s.Prog(obj.ACALL)
1930 p.To.Type = obj.TYPE_MEM
1931 p.To.Name = obj.NAME_EXTERN
1932 p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt]
1933 s.UseArgs(16) // space used in callee args area by assembly stubs
1935 case ssa.OpPPC64LoweredNilCheck:
1936 if buildcfg.GOOS == "aix" {
1940 // NOP (so the BNE has somewhere to land)
1943 p := s.Prog(ppc64.ACMP)
1944 p.From.Type = obj.TYPE_REG
1945 p.From.Reg = v.Args[0].Reg()
1946 p.To.Type = obj.TYPE_REG
1947 p.To.Reg = ppc64.REG_R0
1950 p2 := s.Prog(ppc64.ABNE)
1951 p2.To.Type = obj.TYPE_BRANCH
1954 // Write at 0 is forbidden and will trigger a SIGSEGV
1955 p = s.Prog(ppc64.AMOVW)
1956 p.From.Type = obj.TYPE_REG
1957 p.From.Reg = ppc64.REG_R0
1958 p.To.Type = obj.TYPE_MEM
1959 p.To.Reg = ppc64.REG_R0
1961 // NOP (so the BNE has somewhere to land)
1962 nop := s.Prog(obj.ANOP)
1963 p2.To.SetTarget(nop)
1966 // Issue a load which will fault if arg is nil.
1967 p := s.Prog(ppc64.AMOVBZ)
1968 p.From.Type = obj.TYPE_MEM
1969 p.From.Reg = v.Args[0].Reg()
1970 ssagen.AddAux(&p.From, v)
1971 p.To.Type = obj.TYPE_REG
1972 p.To.Reg = ppc64.REGTMP
1974 if logopt.Enabled() {
1975 logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
1977 if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
1978 base.WarnfAt(v.Pos, "generated nil check")
1981 // These should be resolved by rules and not make it here.
1982 case ssa.OpPPC64Equal, ssa.OpPPC64NotEqual, ssa.OpPPC64LessThan, ssa.OpPPC64FLessThan,
1983 ssa.OpPPC64LessEqual, ssa.OpPPC64GreaterThan, ssa.OpPPC64FGreaterThan, ssa.OpPPC64GreaterEqual,
1984 ssa.OpPPC64FLessEqual, ssa.OpPPC64FGreaterEqual:
1985 v.Fatalf("Pseudo-op should not make it to codegen: %s ###\n", v.LongString())
1986 case ssa.OpPPC64InvertFlags:
1987 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
1988 case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT:
1989 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
1990 case ssa.OpClobber, ssa.OpClobberReg:
1991 // TODO: implement for clobberdead experiment. Nop is ok for now.
1993 v.Fatalf("genValue not implemented: %s", v.LongString())
1997 var blockJump = [...]struct {
1999 asmeq, invasmun bool
2001 ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false},
2002 ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false},
2004 ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false},
2005 ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false},
2006 ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false},
2007 ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false},
2009 // TODO: need to work FP comparisons into block jumps
2010 ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false},
2011 ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN
2012 ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN
2013 ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false},
2016 func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
2018 case ssa.BlockDefer:
2019 // defer returns in R3:
2020 // 0 if we should continue executing
2021 // 1 if we should jump to deferreturn call
2022 p := s.Prog(ppc64.ACMP)
2023 p.From.Type = obj.TYPE_REG
2024 p.From.Reg = ppc64.REG_R3
2025 p.To.Type = obj.TYPE_REG
2026 p.To.Reg = ppc64.REG_R0
2028 p = s.Prog(ppc64.ABNE)
2029 p.To.Type = obj.TYPE_BRANCH
2030 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[1].Block()})
2031 if b.Succs[0].Block() != next {
2032 p := s.Prog(obj.AJMP)
2033 p.To.Type = obj.TYPE_BRANCH
2034 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
2037 case ssa.BlockPlain:
2038 if b.Succs[0].Block() != next {
2039 p := s.Prog(obj.AJMP)
2040 p.To.Type = obj.TYPE_BRANCH
2041 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
2043 case ssa.BlockExit, ssa.BlockRetJmp:
2047 case ssa.BlockPPC64EQ, ssa.BlockPPC64NE,
2048 ssa.BlockPPC64LT, ssa.BlockPPC64GE,
2049 ssa.BlockPPC64LE, ssa.BlockPPC64GT,
2050 ssa.BlockPPC64FLT, ssa.BlockPPC64FGE,
2051 ssa.BlockPPC64FLE, ssa.BlockPPC64FGT:
2052 jmp := blockJump[b.Kind]
2054 case b.Succs[0].Block():
2055 s.Br(jmp.invasm, b.Succs[1].Block())
2057 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
2058 s.Br(ppc64.ABVS, b.Succs[1].Block())
2060 case b.Succs[1].Block():
2061 s.Br(jmp.asm, b.Succs[0].Block())
2063 s.Br(ppc64.ABEQ, b.Succs[0].Block())
2066 if b.Likely != ssa.BranchUnlikely {
2067 s.Br(jmp.asm, b.Succs[0].Block())
2069 s.Br(ppc64.ABEQ, b.Succs[0].Block())
2071 s.Br(obj.AJMP, b.Succs[1].Block())
2073 s.Br(jmp.invasm, b.Succs[1].Block())
2075 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
2076 s.Br(ppc64.ABVS, b.Succs[1].Block())
2078 s.Br(obj.AJMP, b.Succs[0].Block())
2082 b.Fatalf("branch not implemented: %s", b.LongString())
2086 func loadRegResult(s *ssagen.State, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
2087 p := s.Prog(loadByType(t))
2088 p.From.Type = obj.TYPE_MEM
2089 p.From.Name = obj.NAME_AUTO
2090 p.From.Sym = n.Linksym()
2091 p.From.Offset = n.FrameOffset() + off
2092 p.To.Type = obj.TYPE_REG
2097 func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
2098 p = pp.Append(p, storeByType(t), obj.TYPE_REG, reg, 0, obj.TYPE_MEM, 0, n.FrameOffset()+off)
2099 p.To.Name = obj.NAME_PARAM
2100 p.To.Sym = n.Linksym()
2101 p.Pos = p.Pos.WithNotStmt()