1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
8 "cmd/compile/internal/base"
9 "cmd/compile/internal/ir"
10 "cmd/compile/internal/logopt"
11 "cmd/compile/internal/objw"
12 "cmd/compile/internal/ssa"
13 "cmd/compile/internal/ssagen"
14 "cmd/compile/internal/types"
16 "cmd/internal/obj/ppc64"
22 // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
23 func ssaMarkMoves(s *ssagen.State, b *ssa.Block) {
24 // flive := b.FlagsLiveAtEnd
25 // if b.Control != nil && b.Control.Type.IsFlags() {
28 // for i := len(b.Values) - 1; i >= 0; i-- {
30 // if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) {
31 // // The "mark" is any non-nil Aux value.
34 // if v.Type.IsFlags() {
37 // for _, a := range v.Args {
38 // if a.Type.IsFlags() {
45 // loadByType returns the load instruction of the given type.
46 func loadByType(t *types.Type) obj.As {
78 panic("bad load type")
81 // storeByType returns the store instruction of the given type.
82 func storeByType(t *types.Type) obj.As {
102 panic("bad store type")
105 func ssaGenValue(s *ssagen.State, v *ssa.Value) {
128 case ssa.OpPPC64LoweredMuluhilo:
129 // MULHDU Rarg1, Rarg0, Reg0
130 // MULLD Rarg1, Rarg0, Reg1
131 r0 := v.Args[0].Reg()
132 r1 := v.Args[1].Reg()
133 p := s.Prog(ppc64.AMULHDU)
134 p.From.Type = obj.TYPE_REG
137 p.To.Type = obj.TYPE_REG
139 p1 := s.Prog(ppc64.AMULLD)
140 p1.From.Type = obj.TYPE_REG
143 p1.To.Type = obj.TYPE_REG
146 case ssa.OpPPC64LoweredAdd64Carry:
147 // ADDC Rarg2, -1, Rtmp
148 // ADDE Rarg1, Rarg0, Reg0
150 r0 := v.Args[0].Reg()
151 r1 := v.Args[1].Reg()
152 r2 := v.Args[2].Reg()
153 p := s.Prog(ppc64.AADDC)
154 p.From.Type = obj.TYPE_CONST
157 p.To.Type = obj.TYPE_REG
158 p.To.Reg = ppc64.REGTMP
159 p1 := s.Prog(ppc64.AADDE)
160 p1.From.Type = obj.TYPE_REG
163 p1.To.Type = obj.TYPE_REG
165 p2 := s.Prog(ppc64.AADDZE)
166 p2.From.Type = obj.TYPE_REG
167 p2.From.Reg = ppc64.REGZERO
168 p2.To.Type = obj.TYPE_REG
171 case ssa.OpPPC64LoweredAtomicAnd8,
172 ssa.OpPPC64LoweredAtomicAnd32,
173 ssa.OpPPC64LoweredAtomicOr8,
174 ssa.OpPPC64LoweredAtomicOr32:
176 // LBAR/LWAR (Rarg0), Rtmp
177 // AND/OR Rarg1, Rtmp
178 // STBCCC/STWCCC Rtmp, (Rarg0)
182 if v.Op == ssa.OpPPC64LoweredAtomicAnd32 || v.Op == ssa.OpPPC64LoweredAtomicOr32 {
186 r0 := v.Args[0].Reg()
187 r1 := v.Args[1].Reg()
188 // LWSYNC - Assuming shared data not write-through-required nor
189 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
190 plwsync := s.Prog(ppc64.ALWSYNC)
191 plwsync.To.Type = obj.TYPE_NONE
194 p.From.Type = obj.TYPE_MEM
196 p.To.Type = obj.TYPE_REG
197 p.To.Reg = ppc64.REGTMP
199 p1 := s.Prog(v.Op.Asm())
200 p1.From.Type = obj.TYPE_REG
202 p1.To.Type = obj.TYPE_REG
203 p1.To.Reg = ppc64.REGTMP
206 p2.From.Type = obj.TYPE_REG
207 p2.From.Reg = ppc64.REGTMP
208 p2.To.Type = obj.TYPE_MEM
210 p2.RegTo2 = ppc64.REGTMP
212 p3 := s.Prog(ppc64.ABNE)
213 p3.To.Type = obj.TYPE_BRANCH
216 case ssa.OpPPC64LoweredAtomicAdd32,
217 ssa.OpPPC64LoweredAtomicAdd64:
219 // LDAR/LWAR (Rarg0), Rout
221 // STDCCC/STWCCC Rout, (Rarg0)
223 // MOVW Rout,Rout (if Add32)
226 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
230 r0 := v.Args[0].Reg()
231 r1 := v.Args[1].Reg()
233 // LWSYNC - Assuming shared data not write-through-required nor
234 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
235 plwsync := s.Prog(ppc64.ALWSYNC)
236 plwsync.To.Type = obj.TYPE_NONE
239 p.From.Type = obj.TYPE_MEM
241 p.To.Type = obj.TYPE_REG
244 p1 := s.Prog(ppc64.AADD)
245 p1.From.Type = obj.TYPE_REG
248 p1.To.Type = obj.TYPE_REG
251 p3.From.Type = obj.TYPE_REG
253 p3.To.Type = obj.TYPE_MEM
256 p4 := s.Prog(ppc64.ABNE)
257 p4.To.Type = obj.TYPE_BRANCH
260 // Ensure a 32 bit result
261 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
262 p5 := s.Prog(ppc64.AMOVWZ)
263 p5.To.Type = obj.TYPE_REG
265 p5.From.Type = obj.TYPE_REG
269 case ssa.OpPPC64LoweredAtomicExchange32,
270 ssa.OpPPC64LoweredAtomicExchange64:
272 // LDAR/LWAR (Rarg0), Rout
273 // STDCCC/STWCCC Rout, (Rarg0)
278 if v.Op == ssa.OpPPC64LoweredAtomicExchange32 {
282 r0 := v.Args[0].Reg()
283 r1 := v.Args[1].Reg()
285 // LWSYNC - Assuming shared data not write-through-required nor
286 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
287 plwsync := s.Prog(ppc64.ALWSYNC)
288 plwsync.To.Type = obj.TYPE_NONE
291 p.From.Type = obj.TYPE_MEM
293 p.To.Type = obj.TYPE_REG
297 p1.From.Type = obj.TYPE_REG
299 p1.To.Type = obj.TYPE_MEM
302 p2 := s.Prog(ppc64.ABNE)
303 p2.To.Type = obj.TYPE_BRANCH
306 pisync := s.Prog(ppc64.AISYNC)
307 pisync.To.Type = obj.TYPE_NONE
309 case ssa.OpPPC64LoweredAtomicLoad8,
310 ssa.OpPPC64LoweredAtomicLoad32,
311 ssa.OpPPC64LoweredAtomicLoad64,
312 ssa.OpPPC64LoweredAtomicLoadPtr:
314 // MOVB/MOVD/MOVW (Rarg0), Rout
321 case ssa.OpPPC64LoweredAtomicLoad8:
323 case ssa.OpPPC64LoweredAtomicLoad32:
327 arg0 := v.Args[0].Reg()
329 // SYNC when AuxInt == 1; otherwise, load-acquire
331 psync := s.Prog(ppc64.ASYNC)
332 psync.To.Type = obj.TYPE_NONE
336 p.From.Type = obj.TYPE_MEM
338 p.To.Type = obj.TYPE_REG
342 p1.From.Type = obj.TYPE_REG
344 p1.To.Type = obj.TYPE_REG
347 p2 := s.Prog(ppc64.ABNE)
348 p2.To.Type = obj.TYPE_BRANCH
350 pisync := s.Prog(ppc64.AISYNC)
351 pisync.To.Type = obj.TYPE_NONE
352 p2.To.SetTarget(pisync)
354 case ssa.OpPPC64LoweredAtomicStore8,
355 ssa.OpPPC64LoweredAtomicStore32,
356 ssa.OpPPC64LoweredAtomicStore64:
358 // MOVB/MOVW/MOVD arg1,(arg0)
361 case ssa.OpPPC64LoweredAtomicStore8:
363 case ssa.OpPPC64LoweredAtomicStore32:
366 arg0 := v.Args[0].Reg()
367 arg1 := v.Args[1].Reg()
368 // If AuxInt == 0, LWSYNC (Store-Release), else SYNC
370 syncOp := ppc64.ASYNC
372 syncOp = ppc64.ALWSYNC
374 psync := s.Prog(syncOp)
375 psync.To.Type = obj.TYPE_NONE
378 p.To.Type = obj.TYPE_MEM
380 p.From.Type = obj.TYPE_REG
383 case ssa.OpPPC64LoweredAtomicCas64,
384 ssa.OpPPC64LoweredAtomicCas32:
387 // LDAR (Rarg0), MutexHint, Rtmp
390 // STDCCC Rarg2, (Rarg0)
392 // LWSYNC // Only for sequential consistency; not required in CasRel.
401 if v.Op == ssa.OpPPC64LoweredAtomicCas32 {
406 r0 := v.Args[0].Reg()
407 r1 := v.Args[1].Reg()
408 r2 := v.Args[2].Reg()
410 // LWSYNC - Assuming shared data not write-through-required nor
411 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
412 plwsync1 := s.Prog(ppc64.ALWSYNC)
413 plwsync1.To.Type = obj.TYPE_NONE
416 p.From.Type = obj.TYPE_MEM
418 p.To.Type = obj.TYPE_REG
419 p.To.Reg = ppc64.REGTMP
420 // If it is a Compare-and-Swap-Release operation, set the EH field with
427 p1.From.Type = obj.TYPE_REG
429 p1.To.Reg = ppc64.REGTMP
430 p1.To.Type = obj.TYPE_REG
432 p2 := s.Prog(ppc64.ABNE)
433 p2.To.Type = obj.TYPE_BRANCH
436 p3.From.Type = obj.TYPE_REG
438 p3.To.Type = obj.TYPE_MEM
441 p4 := s.Prog(ppc64.ABNE)
442 p4.To.Type = obj.TYPE_BRANCH
444 // LWSYNC - Assuming shared data not write-through-required nor
445 // caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b.
446 // If the operation is a CAS-Release, then synchronization is not necessary.
448 plwsync2 := s.Prog(ppc64.ALWSYNC)
449 plwsync2.To.Type = obj.TYPE_NONE
452 p5 := s.Prog(ppc64.AMOVD)
453 p5.From.Type = obj.TYPE_CONST
455 p5.To.Type = obj.TYPE_REG
458 p6 := s.Prog(obj.AJMP)
459 p6.To.Type = obj.TYPE_BRANCH
461 p7 := s.Prog(ppc64.AMOVD)
462 p7.From.Type = obj.TYPE_CONST
464 p7.To.Type = obj.TYPE_REG
468 p8 := s.Prog(obj.ANOP)
471 case ssa.OpPPC64LoweredPubBarrier:
475 case ssa.OpPPC64LoweredGetClosurePtr:
476 // Closure pointer is R11 (already)
477 ssagen.CheckLoweredGetClosurePtr(v)
479 case ssa.OpPPC64LoweredGetCallerSP:
480 // caller's SP is FixedFrameSize below the address of the first arg
481 p := s.Prog(ppc64.AMOVD)
482 p.From.Type = obj.TYPE_ADDR
483 p.From.Offset = -base.Ctxt.Arch.FixedFrameSize
484 p.From.Name = obj.NAME_PARAM
485 p.To.Type = obj.TYPE_REG
488 case ssa.OpPPC64LoweredGetCallerPC:
489 p := s.Prog(obj.AGETCALLERPC)
490 p.To.Type = obj.TYPE_REG
493 case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F:
494 // input is already rounded
497 loadOp := loadByType(v.Type)
499 ssagen.AddrAuto(&p.From, v.Args[0])
500 p.To.Type = obj.TYPE_REG
504 storeOp := storeByType(v.Type)
506 p.From.Type = obj.TYPE_REG
507 p.From.Reg = v.Args[0].Reg()
508 ssagen.AddrAuto(&p.To, v)
510 case ssa.OpArgIntReg, ssa.OpArgFloatReg:
511 // The assembler needs to wrap the entry safepoint/stack growth code with spill/unspill
512 // The loop only runs once.
513 for _, a := range v.Block.Func.RegArgs {
514 // Pass the spill/unspill information along to the assembler, offset by size of
515 // the saved LR slot.
516 addr := ssagen.SpillSlotAddr(a, ppc64.REGSP, base.Ctxt.Arch.FixedFrameSize)
517 s.FuncInfo().AddSpill(
518 obj.RegSpill{Reg: a.Reg, Addr: addr, Unspill: loadByType(a.Type), Spill: storeByType(a.Type)})
520 v.Block.Func.RegArgs = nil
522 ssagen.CheckArgReg(v)
524 case ssa.OpPPC64DIVD:
534 r0 := v.Args[0].Reg()
535 r1 := v.Args[1].Reg()
537 p := s.Prog(ppc64.ACMP)
538 p.From.Type = obj.TYPE_REG
540 p.To.Type = obj.TYPE_CONST
543 pbahead := s.Prog(ppc64.ABEQ)
544 pbahead.To.Type = obj.TYPE_BRANCH
546 p = s.Prog(v.Op.Asm())
547 p.From.Type = obj.TYPE_REG
550 p.To.Type = obj.TYPE_REG
553 pbover := s.Prog(obj.AJMP)
554 pbover.To.Type = obj.TYPE_BRANCH
556 p = s.Prog(ppc64.ANEG)
557 p.To.Type = obj.TYPE_REG
559 p.From.Type = obj.TYPE_REG
561 pbahead.To.SetTarget(p)
564 pbover.To.SetTarget(p)
566 case ssa.OpPPC64DIVW:
567 // word-width version of above
569 r0 := v.Args[0].Reg()
570 r1 := v.Args[1].Reg()
572 p := s.Prog(ppc64.ACMPW)
573 p.From.Type = obj.TYPE_REG
575 p.To.Type = obj.TYPE_CONST
578 pbahead := s.Prog(ppc64.ABEQ)
579 pbahead.To.Type = obj.TYPE_BRANCH
581 p = s.Prog(v.Op.Asm())
582 p.From.Type = obj.TYPE_REG
585 p.To.Type = obj.TYPE_REG
588 pbover := s.Prog(obj.AJMP)
589 pbover.To.Type = obj.TYPE_BRANCH
591 p = s.Prog(ppc64.ANEG)
592 p.To.Type = obj.TYPE_REG
594 p.From.Type = obj.TYPE_REG
596 pbahead.To.SetTarget(p)
599 pbover.To.SetTarget(p)
601 case ssa.OpPPC64CLRLSLWI:
603 r1 := v.Args[0].Reg()
605 p := s.Prog(v.Op.Asm())
606 // clrlslwi ra,rs,mb,sh will become rlwinm ra,rs,sh,mb-sh,31-sh as described in ISA
607 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
608 p.SetFrom3Const(ssa.GetPPC64Shiftsh(shifts))
610 p.To.Type = obj.TYPE_REG
613 case ssa.OpPPC64CLRLSLDI:
615 r1 := v.Args[0].Reg()
617 p := s.Prog(v.Op.Asm())
618 // clrlsldi ra,rs,mb,sh will become rldic ra,rs,sh,mb-sh
619 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
620 p.SetFrom3Const(ssa.GetPPC64Shiftsh(shifts))
622 p.To.Type = obj.TYPE_REG
625 // Mask has been set as sh
626 case ssa.OpPPC64RLDICL:
628 r1 := v.Args[0].Reg()
630 p := s.Prog(v.Op.Asm())
631 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)}
632 p.SetFrom3Const(ssa.GetPPC64Shiftmb(shifts))
634 p.To.Type = obj.TYPE_REG
637 case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS,
638 ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU,
639 ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW,
640 ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
641 ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
642 ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
643 ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV,
644 ssa.OpPPC64MODUD, ssa.OpPPC64MODSD, ssa.OpPPC64MODUW, ssa.OpPPC64MODSW:
646 r1 := v.Args[0].Reg()
647 r2 := v.Args[1].Reg()
648 p := s.Prog(v.Op.Asm())
649 p.From.Type = obj.TYPE_REG
652 p.To.Type = obj.TYPE_REG
655 case ssa.OpPPC64ANDCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC:
656 r1 := v.Args[0].Reg()
657 r2 := v.Args[1].Reg()
658 p := s.Prog(v.Op.Asm())
659 p.From.Type = obj.TYPE_REG
662 p.To.Type = obj.TYPE_REG
663 p.To.Reg = ppc64.REGTMP // result is not needed
665 case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
666 p := s.Prog(v.Op.Asm())
667 p.From.Type = obj.TYPE_CONST
668 p.From.Offset = v.AuxInt
669 p.Reg = v.Args[0].Reg()
670 p.To.Type = obj.TYPE_REG
673 // Auxint holds encoded rotate + mask
674 case ssa.OpPPC64RLWINM, ssa.OpPPC64RLWMI:
675 rot, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
676 p := s.Prog(v.Op.Asm())
677 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
678 p.Reg = v.Args[0].Reg()
679 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: int64(rot)}
680 p.SetRestArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}})
683 case ssa.OpPPC64RLWNM:
684 _, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
685 p := s.Prog(v.Op.Asm())
686 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
687 p.Reg = v.Args[0].Reg()
688 p.From = obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[1].Reg()}
689 p.SetRestArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}})
691 case ssa.OpPPC64MADDLD:
693 r1 := v.Args[0].Reg()
694 r2 := v.Args[1].Reg()
695 r3 := v.Args[2].Reg()
697 p := s.Prog(v.Op.Asm())
698 p.From.Type = obj.TYPE_REG
702 p.To.Type = obj.TYPE_REG
705 case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
707 r1 := v.Args[0].Reg()
708 r2 := v.Args[1].Reg()
709 r3 := v.Args[2].Reg()
711 p := s.Prog(v.Op.Asm())
712 p.From.Type = obj.TYPE_REG
716 p.To.Type = obj.TYPE_REG
719 case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL,
720 ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW,
721 ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS,
722 ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD:
724 p := s.Prog(v.Op.Asm())
725 p.To.Type = obj.TYPE_REG
727 p.From.Type = obj.TYPE_REG
728 p.From.Reg = v.Args[0].Reg()
730 case ssa.OpPPC64ADDconst, ssa.OpPPC64ANDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst,
731 ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst,
732 ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst, ssa.OpPPC64EXTSWSLconst, ssa.OpPPC64MULLWconst, ssa.OpPPC64MULLDconst:
733 p := s.Prog(v.Op.Asm())
734 p.Reg = v.Args[0].Reg()
735 p.From.Type = obj.TYPE_CONST
736 p.From.Offset = v.AuxInt
737 p.To.Type = obj.TYPE_REG
740 case ssa.OpPPC64ADDC, ssa.OpPPC64ADDE, ssa.OpPPC64SUBC, ssa.OpPPC64SUBE:
741 r := v.Reg0() // CA is the first, implied argument.
742 r1 := v.Args[0].Reg()
743 r2 := v.Args[1].Reg()
744 p := s.Prog(v.Op.Asm())
745 p.From.Type = obj.TYPE_REG
748 p.To.Type = obj.TYPE_REG
751 case ssa.OpPPC64ADDZEzero, ssa.OpPPC64SUBZEzero:
752 p := s.Prog(v.Op.Asm())
753 p.From.Type = obj.TYPE_REG
754 p.From.Reg = ppc64.REG_R0
755 p.To.Type = obj.TYPE_REG
758 case ssa.OpPPC64ADDCconst:
759 p := s.Prog(v.Op.Asm())
760 p.Reg = v.Args[0].Reg()
761 p.From.Type = obj.TYPE_CONST
762 p.From.Offset = v.AuxInt
763 p.To.Type = obj.TYPE_REG
764 // Output is a pair, the second is the CA, which is implied.
767 case ssa.OpPPC64SUBCconst:
768 p := s.Prog(v.Op.Asm())
769 p.SetFrom3Const(v.AuxInt)
770 p.From.Type = obj.TYPE_REG
771 p.From.Reg = v.Args[0].Reg()
772 p.To.Type = obj.TYPE_REG
775 case ssa.OpPPC64SUBFCconst:
776 p := s.Prog(v.Op.Asm())
777 p.SetFrom3Const(v.AuxInt)
778 p.From.Type = obj.TYPE_REG
779 p.From.Reg = v.Args[0].Reg()
780 p.To.Type = obj.TYPE_REG
783 case ssa.OpPPC64ANDCCconst:
784 p := s.Prog(v.Op.Asm())
785 p.Reg = v.Args[0].Reg()
786 p.From.Type = obj.TYPE_CONST
787 p.From.Offset = v.AuxInt
788 p.To.Type = obj.TYPE_REG
789 p.To.Reg = ppc64.REGTMP // discard result
791 case ssa.OpPPC64MOVDaddr:
792 switch v.Aux.(type) {
794 v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux)
796 // If aux offset and aux int are both 0, and the same
797 // input and output regs are used, no instruction
798 // needs to be generated, since it would just be
800 if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() {
801 p := s.Prog(ppc64.AMOVD)
802 p.From.Type = obj.TYPE_ADDR
803 p.From.Reg = v.Args[0].Reg()
804 p.From.Offset = v.AuxInt
805 p.To.Type = obj.TYPE_REG
809 case *obj.LSym, ir.Node:
810 p := s.Prog(ppc64.AMOVD)
811 p.From.Type = obj.TYPE_ADDR
812 p.From.Reg = v.Args[0].Reg()
813 p.To.Type = obj.TYPE_REG
815 ssagen.AddAux(&p.From, v)
819 case ssa.OpPPC64MOVDconst:
820 p := s.Prog(v.Op.Asm())
821 p.From.Type = obj.TYPE_CONST
822 p.From.Offset = v.AuxInt
823 p.To.Type = obj.TYPE_REG
826 case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst:
827 p := s.Prog(v.Op.Asm())
828 p.From.Type = obj.TYPE_FCONST
829 p.From.Val = math.Float64frombits(uint64(v.AuxInt))
830 p.To.Type = obj.TYPE_REG
833 case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU:
834 p := s.Prog(v.Op.Asm())
835 p.From.Type = obj.TYPE_REG
836 p.From.Reg = v.Args[0].Reg()
837 p.To.Type = obj.TYPE_REG
838 p.To.Reg = v.Args[1].Reg()
840 case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst:
841 p := s.Prog(v.Op.Asm())
842 p.From.Type = obj.TYPE_REG
843 p.From.Reg = v.Args[0].Reg()
844 p.To.Type = obj.TYPE_CONST
845 p.To.Offset = v.AuxInt
847 case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg:
848 // Shift in register to required size
849 p := s.Prog(v.Op.Asm())
850 p.From.Type = obj.TYPE_REG
851 p.From.Reg = v.Args[0].Reg()
853 p.To.Type = obj.TYPE_REG
855 case ssa.OpPPC64MOVDload, ssa.OpPPC64MOVWload:
857 // MOVDload and MOVWload are DS form instructions that are restricted to
858 // offsets that are a multiple of 4. If the offset is not a multple of 4,
859 // then the address of the symbol to be loaded is computed (base + offset)
860 // and used as the new base register and the offset field in the instruction
861 // can be set to zero.
863 // This same problem can happen with gostrings since the final offset is not
864 // known yet, but could be unaligned after the relocation is resolved.
865 // So gostrings are handled the same way.
867 // This allows the MOVDload and MOVWload to be generated in more cases and
868 // eliminates some offset and alignment checking in the rules file.
870 fromAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
871 ssagen.AddAux(&fromAddr, v)
875 switch fromAddr.Name {
876 case obj.NAME_EXTERN, obj.NAME_STATIC:
877 // Special case for a rule combines the bytes of gostring.
878 // The v alignment might seem OK, but we don't want to load it
879 // using an offset because relocation comes later.
880 genAddr = strings.HasPrefix(fromAddr.Sym.Name, "go.string") || v.Type.Alignment()%4 != 0 || fromAddr.Offset%4 != 0
882 genAddr = fromAddr.Offset%4 != 0
885 // Load full address into the temp register.
886 p := s.Prog(ppc64.AMOVD)
887 p.From.Type = obj.TYPE_ADDR
888 p.From.Reg = v.Args[0].Reg()
889 ssagen.AddAux(&p.From, v)
890 // Load target using temp as base register
891 // and offset zero. Setting NAME_NONE
892 // prevents any extra offsets from being
894 p.To.Type = obj.TYPE_REG
895 p.To.Reg = ppc64.REGTMP
896 fromAddr.Reg = ppc64.REGTMP
897 // Clear the offset field and other
898 // information that might be used
899 // by the assembler to add to the
900 // final offset value.
902 fromAddr.Name = obj.NAME_NONE
905 p := s.Prog(v.Op.Asm())
907 p.To.Type = obj.TYPE_REG
911 case ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
912 p := s.Prog(v.Op.Asm())
913 p.From.Type = obj.TYPE_MEM
914 p.From.Reg = v.Args[0].Reg()
915 ssagen.AddAux(&p.From, v)
916 p.To.Type = obj.TYPE_REG
919 case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload:
920 p := s.Prog(v.Op.Asm())
921 p.From.Type = obj.TYPE_MEM
922 p.From.Reg = v.Args[0].Reg()
923 p.To.Type = obj.TYPE_REG
926 case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore:
927 p := s.Prog(v.Op.Asm())
928 p.To.Type = obj.TYPE_MEM
929 p.To.Reg = v.Args[0].Reg()
930 p.From.Type = obj.TYPE_REG
931 p.From.Reg = v.Args[1].Reg()
933 case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx,
934 ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx,
935 ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx:
936 p := s.Prog(v.Op.Asm())
937 p.From.Type = obj.TYPE_MEM
938 p.From.Reg = v.Args[0].Reg()
939 p.From.Index = v.Args[1].Reg()
940 p.To.Type = obj.TYPE_REG
943 case ssa.OpPPC64DCBT:
944 p := s.Prog(v.Op.Asm())
945 p.From.Type = obj.TYPE_MEM
946 p.From.Reg = v.Args[0].Reg()
947 p.To.Type = obj.TYPE_CONST
948 p.To.Offset = v.AuxInt
950 case ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero:
951 p := s.Prog(v.Op.Asm())
952 p.From.Type = obj.TYPE_REG
953 p.From.Reg = ppc64.REGZERO
954 p.To.Type = obj.TYPE_MEM
955 p.To.Reg = v.Args[0].Reg()
956 ssagen.AddAux(&p.To, v)
958 case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVDstorezero:
960 // MOVDstore and MOVDstorezero become DS form instructions that are restricted
961 // to offset values that are a multple of 4. If the offset field is not a
962 // multiple of 4, then the full address of the store target is computed (base +
963 // offset) and used as the new base register and the offset in the instruction
966 // This allows the MOVDstore and MOVDstorezero to be generated in more cases,
967 // and prevents checking of the offset value and alignment in the rules.
969 toAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
970 ssagen.AddAux(&toAddr, v)
972 if toAddr.Offset%4 != 0 {
973 p := s.Prog(ppc64.AMOVD)
974 p.From.Type = obj.TYPE_ADDR
975 p.From.Reg = v.Args[0].Reg()
976 ssagen.AddAux(&p.From, v)
977 p.To.Type = obj.TYPE_REG
978 p.To.Reg = ppc64.REGTMP
979 toAddr.Reg = ppc64.REGTMP
980 // Clear the offset field and other
981 // information that might be used
982 // by the assembler to add to the
983 // final offset value.
985 toAddr.Name = obj.NAME_NONE
988 p := s.Prog(v.Op.Asm())
990 p.From.Type = obj.TYPE_REG
991 if v.Op == ssa.OpPPC64MOVDstorezero {
992 p.From.Reg = ppc64.REGZERO
994 p.From.Reg = v.Args[1].Reg()
997 case ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore:
998 p := s.Prog(v.Op.Asm())
999 p.From.Type = obj.TYPE_REG
1000 p.From.Reg = v.Args[1].Reg()
1001 p.To.Type = obj.TYPE_MEM
1002 p.To.Reg = v.Args[0].Reg()
1003 ssagen.AddAux(&p.To, v)
1005 case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx,
1006 ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx,
1007 ssa.OpPPC64MOVHBRstoreidx:
1008 p := s.Prog(v.Op.Asm())
1009 p.From.Type = obj.TYPE_REG
1010 p.From.Reg = v.Args[2].Reg()
1011 p.To.Index = v.Args[1].Reg()
1012 p.To.Type = obj.TYPE_MEM
1013 p.To.Reg = v.Args[0].Reg()
1015 case ssa.OpPPC64ISEL, ssa.OpPPC64ISELB:
1017 // AuxInt value indicates condition: 0=LT 1=GT 2=EQ 4=GE 5=LE 6=NE
1018 // ISEL only accepts 0, 1, 2 condition values but the others can be
1019 // achieved by swapping operand order.
1020 // arg0 ? arg1 : arg2 with conditions LT, GT, EQ
1021 // arg0 ? arg2 : arg1 for conditions GE, LE, NE
1022 // ISELB is used when a boolean result is needed, returning 0 or 1
1023 p := s.Prog(ppc64.AISEL)
1024 p.To.Type = obj.TYPE_REG
1026 // For ISELB, boolean result 0 or 1. Use R0 for 0 operand to avoid load.
1027 r := obj.Addr{Type: obj.TYPE_REG, Reg: ppc64.REG_R0}
1028 if v.Op == ssa.OpPPC64ISEL {
1029 r.Reg = v.Args[1].Reg()
1031 // AuxInt values 4,5,6 implemented with reverse operand order from 0,1,2
1034 p.SetFrom3Reg(v.Args[0].Reg())
1036 p.Reg = v.Args[0].Reg()
1039 p.From.Type = obj.TYPE_CONST
1040 p.From.Offset = v.AuxInt & 3
1042 case ssa.OpPPC64LoweredQuadZero, ssa.OpPPC64LoweredQuadZeroShort:
1043 // The LoweredQuad code generation
1044 // generates STXV instructions on
1045 // power9. The Short variation is used
1046 // if no loop is generated.
1048 // sizes >= 64 generate a loop as follows:
1050 // Set up loop counter in CTR, used by BC
1051 // XXLXOR clears VS32
1052 // XXLXOR VS32,VS32,VS32
1053 // MOVD len/64,REG_TMP
1057 // STXV VS32,16(R20)
1058 // STXV VS32,32(R20)
1059 // STXV VS32,48(R20)
1063 // Bytes per iteration
1064 ctr := v.AuxInt / 64
1067 rem := v.AuxInt % 64
1069 // Only generate a loop if there is more
1070 // than 1 iteration.
1072 // Set up VS32 (V0) to hold 0s
1073 p := s.Prog(ppc64.AXXLXOR)
1074 p.From.Type = obj.TYPE_REG
1075 p.From.Reg = ppc64.REG_VS32
1076 p.To.Type = obj.TYPE_REG
1077 p.To.Reg = ppc64.REG_VS32
1078 p.Reg = ppc64.REG_VS32
1080 // Set up CTR loop counter
1081 p = s.Prog(ppc64.AMOVD)
1082 p.From.Type = obj.TYPE_CONST
1084 p.To.Type = obj.TYPE_REG
1085 p.To.Reg = ppc64.REGTMP
1087 p = s.Prog(ppc64.AMOVD)
1088 p.From.Type = obj.TYPE_REG
1089 p.From.Reg = ppc64.REGTMP
1090 p.To.Type = obj.TYPE_REG
1091 p.To.Reg = ppc64.REG_CTR
1093 // Don't generate padding for
1094 // loops with few iterations.
1096 p = s.Prog(obj.APCALIGN)
1097 p.From.Type = obj.TYPE_CONST
1101 // generate 4 STXVs to zero 64 bytes
1104 p = s.Prog(ppc64.ASTXV)
1105 p.From.Type = obj.TYPE_REG
1106 p.From.Reg = ppc64.REG_VS32
1107 p.To.Type = obj.TYPE_MEM
1108 p.To.Reg = v.Args[0].Reg()
1110 // Save the top of loop
1114 p = s.Prog(ppc64.ASTXV)
1115 p.From.Type = obj.TYPE_REG
1116 p.From.Reg = ppc64.REG_VS32
1117 p.To.Type = obj.TYPE_MEM
1118 p.To.Reg = v.Args[0].Reg()
1121 p = s.Prog(ppc64.ASTXV)
1122 p.From.Type = obj.TYPE_REG
1123 p.From.Reg = ppc64.REG_VS32
1124 p.To.Type = obj.TYPE_MEM
1125 p.To.Reg = v.Args[0].Reg()
1128 p = s.Prog(ppc64.ASTXV)
1129 p.From.Type = obj.TYPE_REG
1130 p.From.Reg = ppc64.REG_VS32
1131 p.To.Type = obj.TYPE_MEM
1132 p.To.Reg = v.Args[0].Reg()
1135 // Increment address for the
1136 // 64 bytes just zeroed.
1137 p = s.Prog(ppc64.AADD)
1138 p.Reg = v.Args[0].Reg()
1139 p.From.Type = obj.TYPE_CONST
1141 p.To.Type = obj.TYPE_REG
1142 p.To.Reg = v.Args[0].Reg()
1144 // Branch back to top of loop
1146 // BC with BO_BCTR generates bdnz
1147 p = s.Prog(ppc64.ABC)
1148 p.From.Type = obj.TYPE_CONST
1149 p.From.Offset = ppc64.BO_BCTR
1150 p.Reg = ppc64.REG_R0
1151 p.To.Type = obj.TYPE_BRANCH
1154 // When ctr == 1 the loop was not generated but
1155 // there are at least 64 bytes to clear, so add
1156 // that to the remainder to generate the code
1157 // to clear those doublewords
1162 // Clear the remainder starting at offset zero
1165 if rem >= 16 && ctr <= 1 {
1166 // If the XXLXOR hasn't already been
1167 // generated, do it here to initialize
1169 p := s.Prog(ppc64.AXXLXOR)
1170 p.From.Type = obj.TYPE_REG
1171 p.From.Reg = ppc64.REG_VS32
1172 p.To.Type = obj.TYPE_REG
1173 p.To.Reg = ppc64.REG_VS32
1174 p.Reg = ppc64.REG_VS32
1176 // Generate STXV for 32 or 64
1179 p := s.Prog(ppc64.ASTXV)
1180 p.From.Type = obj.TYPE_REG
1181 p.From.Reg = ppc64.REG_VS32
1182 p.To.Type = obj.TYPE_MEM
1183 p.To.Reg = v.Args[0].Reg()
1184 p.To.Offset = offset
1186 p = s.Prog(ppc64.ASTXV)
1187 p.From.Type = obj.TYPE_REG
1188 p.From.Reg = ppc64.REG_VS32
1189 p.To.Type = obj.TYPE_MEM
1190 p.To.Reg = v.Args[0].Reg()
1191 p.To.Offset = offset + 16
1195 // Generate 16 bytes
1197 p := s.Prog(ppc64.ASTXV)
1198 p.From.Type = obj.TYPE_REG
1199 p.From.Reg = ppc64.REG_VS32
1200 p.To.Type = obj.TYPE_MEM
1201 p.To.Reg = v.Args[0].Reg()
1202 p.To.Offset = offset
1207 // first clear as many doublewords as possible
1208 // then clear remaining sizes as available
1210 op, size := ppc64.AMOVB, int64(1)
1213 op, size = ppc64.AMOVD, 8
1215 op, size = ppc64.AMOVW, 4
1217 op, size = ppc64.AMOVH, 2
1220 p.From.Type = obj.TYPE_REG
1221 p.From.Reg = ppc64.REG_R0
1222 p.To.Type = obj.TYPE_MEM
1223 p.To.Reg = v.Args[0].Reg()
1224 p.To.Offset = offset
1229 case ssa.OpPPC64LoweredZero, ssa.OpPPC64LoweredZeroShort:
1231 // Unaligned data doesn't hurt performance
1232 // for these instructions on power8.
1234 // For sizes >= 64 generate a loop as follows:
1236 // Set up loop counter in CTR, used by BC
1237 // XXLXOR VS32,VS32,VS32
1238 // MOVD len/32,REG_TMP
1242 // STXVD2X VS32,(R0)(R20)
1243 // STXVD2X VS32,(R31)(R20)
1247 // any remainder is done as described below
1249 // for sizes < 64 bytes, first clear as many doublewords as possible,
1250 // then handle the remainder
1255 // the remainder bytes are cleared using one or more
1256 // of the following instructions with the appropriate
1257 // offsets depending which instructions are needed
1259 // MOVW R0,n1(R20) 4 bytes
1260 // MOVH R0,n2(R20) 2 bytes
1261 // MOVB R0,n3(R20) 1 byte
1263 // 7 bytes: MOVW, MOVH, MOVB
1264 // 6 bytes: MOVW, MOVH
1265 // 5 bytes: MOVW, MOVB
1266 // 3 bytes: MOVH, MOVB
1268 // each loop iteration does 32 bytes
1269 ctr := v.AuxInt / 32
1272 rem := v.AuxInt % 32
1274 // only generate a loop if there is more
1275 // than 1 iteration.
1277 // Set up VS32 (V0) to hold 0s
1278 p := s.Prog(ppc64.AXXLXOR)
1279 p.From.Type = obj.TYPE_REG
1280 p.From.Reg = ppc64.REG_VS32
1281 p.To.Type = obj.TYPE_REG
1282 p.To.Reg = ppc64.REG_VS32
1283 p.Reg = ppc64.REG_VS32
1285 // Set up CTR loop counter
1286 p = s.Prog(ppc64.AMOVD)
1287 p.From.Type = obj.TYPE_CONST
1289 p.To.Type = obj.TYPE_REG
1290 p.To.Reg = ppc64.REGTMP
1292 p = s.Prog(ppc64.AMOVD)
1293 p.From.Type = obj.TYPE_REG
1294 p.From.Reg = ppc64.REGTMP
1295 p.To.Type = obj.TYPE_REG
1296 p.To.Reg = ppc64.REG_CTR
1298 // Set up R31 to hold index value 16
1299 p = s.Prog(ppc64.AMOVD)
1300 p.From.Type = obj.TYPE_CONST
1302 p.To.Type = obj.TYPE_REG
1303 p.To.Reg = ppc64.REGTMP
1305 // Don't add padding for alignment
1306 // with few loop iterations.
1308 p = s.Prog(obj.APCALIGN)
1309 p.From.Type = obj.TYPE_CONST
1313 // generate 2 STXVD2Xs to store 16 bytes
1314 // when this is a loop then the top must be saved
1316 // This is the top of loop
1318 p = s.Prog(ppc64.ASTXVD2X)
1319 p.From.Type = obj.TYPE_REG
1320 p.From.Reg = ppc64.REG_VS32
1321 p.To.Type = obj.TYPE_MEM
1322 p.To.Reg = v.Args[0].Reg()
1323 p.To.Index = ppc64.REGZERO
1324 // Save the top of loop
1328 p = s.Prog(ppc64.ASTXVD2X)
1329 p.From.Type = obj.TYPE_REG
1330 p.From.Reg = ppc64.REG_VS32
1331 p.To.Type = obj.TYPE_MEM
1332 p.To.Reg = v.Args[0].Reg()
1333 p.To.Index = ppc64.REGTMP
1335 // Increment address for the
1336 // 4 doublewords just zeroed.
1337 p = s.Prog(ppc64.AADD)
1338 p.Reg = v.Args[0].Reg()
1339 p.From.Type = obj.TYPE_CONST
1341 p.To.Type = obj.TYPE_REG
1342 p.To.Reg = v.Args[0].Reg()
1344 // Branch back to top of loop
1346 // BC with BO_BCTR generates bdnz
1347 p = s.Prog(ppc64.ABC)
1348 p.From.Type = obj.TYPE_CONST
1349 p.From.Offset = ppc64.BO_BCTR
1350 p.Reg = ppc64.REG_R0
1351 p.To.Type = obj.TYPE_BRANCH
1355 // when ctr == 1 the loop was not generated but
1356 // there are at least 32 bytes to clear, so add
1357 // that to the remainder to generate the code
1358 // to clear those doublewords
1363 // clear the remainder starting at offset zero
1366 // first clear as many doublewords as possible
1367 // then clear remaining sizes as available
1369 op, size := ppc64.AMOVB, int64(1)
1372 op, size = ppc64.AMOVD, 8
1374 op, size = ppc64.AMOVW, 4
1376 op, size = ppc64.AMOVH, 2
1379 p.From.Type = obj.TYPE_REG
1380 p.From.Reg = ppc64.REG_R0
1381 p.To.Type = obj.TYPE_MEM
1382 p.To.Reg = v.Args[0].Reg()
1383 p.To.Offset = offset
1388 case ssa.OpPPC64LoweredMove, ssa.OpPPC64LoweredMoveShort:
1390 bytesPerLoop := int64(32)
1391 // This will be used when moving more
1392 // than 8 bytes. Moves start with
1393 // as many 8 byte moves as possible, then
1394 // 4, 2, or 1 byte(s) as remaining. This will
1395 // work and be efficient for power8 or later.
1396 // If there are 64 or more bytes, then a
1397 // loop is generated to move 32 bytes and
1398 // update the src and dst addresses on each
1399 // iteration. When < 64 bytes, the appropriate
1400 // number of moves are generated based on the
1402 // When moving >= 64 bytes a loop is used
1403 // MOVD len/32,REG_TMP
1407 // LXVD2X (R0)(R21),VS32
1408 // LXVD2X (R31)(R21),VS33
1410 // STXVD2X VS32,(R0)(R20)
1411 // STXVD2X VS33,(R31)(R20)
1414 // Bytes not moved by this loop are moved
1415 // with a combination of the following instructions,
1416 // starting with the largest sizes and generating as
1417 // many as needed, using the appropriate offset value.
1427 // Each loop iteration moves 32 bytes
1428 ctr := v.AuxInt / bytesPerLoop
1430 // Remainder after the loop
1431 rem := v.AuxInt % bytesPerLoop
1433 dstReg := v.Args[0].Reg()
1434 srcReg := v.Args[1].Reg()
1436 // The set of registers used here, must match the clobbered reg list
1442 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1445 p := s.Prog(ppc64.AMOVD)
1446 p.From.Type = obj.TYPE_CONST
1448 p.To.Type = obj.TYPE_REG
1449 p.To.Reg = ppc64.REGTMP
1451 p = s.Prog(ppc64.AMOVD)
1452 p.From.Type = obj.TYPE_REG
1453 p.From.Reg = ppc64.REGTMP
1454 p.To.Type = obj.TYPE_REG
1455 p.To.Reg = ppc64.REG_CTR
1457 // Use REGTMP as index reg
1458 p = s.Prog(ppc64.AMOVD)
1459 p.From.Type = obj.TYPE_CONST
1461 p.To.Type = obj.TYPE_REG
1462 p.To.Reg = ppc64.REGTMP
1464 // Don't adding padding for
1465 // alignment with small iteration
1468 p = s.Prog(obj.APCALIGN)
1469 p.From.Type = obj.TYPE_CONST
1473 // Generate 16 byte loads and stores.
1474 // Use temp register for index (16)
1475 // on the second one.
1477 p = s.Prog(ppc64.ALXVD2X)
1478 p.From.Type = obj.TYPE_MEM
1480 p.From.Index = ppc64.REGZERO
1481 p.To.Type = obj.TYPE_REG
1482 p.To.Reg = ppc64.REG_VS32
1486 p = s.Prog(ppc64.ALXVD2X)
1487 p.From.Type = obj.TYPE_MEM
1489 p.From.Index = ppc64.REGTMP
1490 p.To.Type = obj.TYPE_REG
1491 p.To.Reg = ppc64.REG_VS33
1493 // increment the src reg for next iteration
1494 p = s.Prog(ppc64.AADD)
1496 p.From.Type = obj.TYPE_CONST
1497 p.From.Offset = bytesPerLoop
1498 p.To.Type = obj.TYPE_REG
1501 // generate 16 byte stores
1502 p = s.Prog(ppc64.ASTXVD2X)
1503 p.From.Type = obj.TYPE_REG
1504 p.From.Reg = ppc64.REG_VS32
1505 p.To.Type = obj.TYPE_MEM
1507 p.To.Index = ppc64.REGZERO
1509 p = s.Prog(ppc64.ASTXVD2X)
1510 p.From.Type = obj.TYPE_REG
1511 p.From.Reg = ppc64.REG_VS33
1512 p.To.Type = obj.TYPE_MEM
1514 p.To.Index = ppc64.REGTMP
1516 // increment the dst reg for next iteration
1517 p = s.Prog(ppc64.AADD)
1519 p.From.Type = obj.TYPE_CONST
1520 p.From.Offset = bytesPerLoop
1521 p.To.Type = obj.TYPE_REG
1524 // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1526 p = s.Prog(ppc64.ABC)
1527 p.From.Type = obj.TYPE_CONST
1528 p.From.Offset = ppc64.BO_BCTR
1529 p.Reg = ppc64.REG_R0
1530 p.To.Type = obj.TYPE_BRANCH
1533 // srcReg and dstReg were incremented in the loop, so
1534 // later instructions start with offset 0.
1538 // No loop was generated for one iteration, so
1539 // add 32 bytes to the remainder to move those bytes.
1545 // Generate 16 byte loads and stores.
1546 // Use temp register for index (value 16)
1547 // on the second one.
1548 p := s.Prog(ppc64.ALXVD2X)
1549 p.From.Type = obj.TYPE_MEM
1551 p.From.Index = ppc64.REGZERO
1552 p.To.Type = obj.TYPE_REG
1553 p.To.Reg = ppc64.REG_VS32
1555 p = s.Prog(ppc64.ASTXVD2X)
1556 p.From.Type = obj.TYPE_REG
1557 p.From.Reg = ppc64.REG_VS32
1558 p.To.Type = obj.TYPE_MEM
1560 p.To.Index = ppc64.REGZERO
1566 // Use REGTMP as index reg
1567 p := s.Prog(ppc64.AMOVD)
1568 p.From.Type = obj.TYPE_CONST
1570 p.To.Type = obj.TYPE_REG
1571 p.To.Reg = ppc64.REGTMP
1573 p = s.Prog(ppc64.ALXVD2X)
1574 p.From.Type = obj.TYPE_MEM
1576 p.From.Index = ppc64.REGTMP
1577 p.To.Type = obj.TYPE_REG
1578 p.To.Reg = ppc64.REG_VS32
1580 p = s.Prog(ppc64.ASTXVD2X)
1581 p.From.Type = obj.TYPE_REG
1582 p.From.Reg = ppc64.REG_VS32
1583 p.To.Type = obj.TYPE_MEM
1585 p.To.Index = ppc64.REGTMP
1592 // Generate all the remaining load and store pairs, starting with
1593 // as many 8 byte moves as possible, then 4, 2, 1.
1595 op, size := ppc64.AMOVB, int64(1)
1598 op, size = ppc64.AMOVD, 8
1600 op, size = ppc64.AMOVWZ, 4
1602 op, size = ppc64.AMOVH, 2
1606 p.To.Type = obj.TYPE_REG
1607 p.To.Reg = ppc64.REGTMP
1608 p.From.Type = obj.TYPE_MEM
1610 p.From.Offset = offset
1614 p.From.Type = obj.TYPE_REG
1615 p.From.Reg = ppc64.REGTMP
1616 p.To.Type = obj.TYPE_MEM
1618 p.To.Offset = offset
1623 case ssa.OpPPC64LoweredQuadMove, ssa.OpPPC64LoweredQuadMoveShort:
1624 bytesPerLoop := int64(64)
1625 // This is used when moving more
1626 // than 8 bytes on power9. Moves start with
1627 // as many 8 byte moves as possible, then
1628 // 4, 2, or 1 byte(s) as remaining. This will
1629 // work and be efficient for power8 or later.
1630 // If there are 64 or more bytes, then a
1631 // loop is generated to move 32 bytes and
1632 // update the src and dst addresses on each
1633 // iteration. When < 64 bytes, the appropriate
1634 // number of moves are generated based on the
1636 // When moving >= 64 bytes a loop is used
1637 // MOVD len/32,REG_TMP
1644 // STXV VS33,16(R20)
1647 // Bytes not moved by this loop are moved
1648 // with a combination of the following instructions,
1649 // starting with the largest sizes and generating as
1650 // many as needed, using the appropriate offset value.
1660 // Each loop iteration moves 32 bytes
1661 ctr := v.AuxInt / bytesPerLoop
1663 // Remainder after the loop
1664 rem := v.AuxInt % bytesPerLoop
1666 dstReg := v.Args[0].Reg()
1667 srcReg := v.Args[1].Reg()
1674 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1677 p := s.Prog(ppc64.AMOVD)
1678 p.From.Type = obj.TYPE_CONST
1680 p.To.Type = obj.TYPE_REG
1681 p.To.Reg = ppc64.REGTMP
1683 p = s.Prog(ppc64.AMOVD)
1684 p.From.Type = obj.TYPE_REG
1685 p.From.Reg = ppc64.REGTMP
1686 p.To.Type = obj.TYPE_REG
1687 p.To.Reg = ppc64.REG_CTR
1689 p = s.Prog(obj.APCALIGN)
1690 p.From.Type = obj.TYPE_CONST
1693 // Generate 16 byte loads and stores.
1694 p = s.Prog(ppc64.ALXV)
1695 p.From.Type = obj.TYPE_MEM
1697 p.From.Offset = offset
1698 p.To.Type = obj.TYPE_REG
1699 p.To.Reg = ppc64.REG_VS32
1703 p = s.Prog(ppc64.ALXV)
1704 p.From.Type = obj.TYPE_MEM
1706 p.From.Offset = offset + 16
1707 p.To.Type = obj.TYPE_REG
1708 p.To.Reg = ppc64.REG_VS33
1710 // generate 16 byte stores
1711 p = s.Prog(ppc64.ASTXV)
1712 p.From.Type = obj.TYPE_REG
1713 p.From.Reg = ppc64.REG_VS32
1714 p.To.Type = obj.TYPE_MEM
1716 p.To.Offset = offset
1718 p = s.Prog(ppc64.ASTXV)
1719 p.From.Type = obj.TYPE_REG
1720 p.From.Reg = ppc64.REG_VS33
1721 p.To.Type = obj.TYPE_MEM
1723 p.To.Offset = offset + 16
1725 // Generate 16 byte loads and stores.
1726 p = s.Prog(ppc64.ALXV)
1727 p.From.Type = obj.TYPE_MEM
1729 p.From.Offset = offset + 32
1730 p.To.Type = obj.TYPE_REG
1731 p.To.Reg = ppc64.REG_VS32
1733 p = s.Prog(ppc64.ALXV)
1734 p.From.Type = obj.TYPE_MEM
1736 p.From.Offset = offset + 48
1737 p.To.Type = obj.TYPE_REG
1738 p.To.Reg = ppc64.REG_VS33
1740 // generate 16 byte stores
1741 p = s.Prog(ppc64.ASTXV)
1742 p.From.Type = obj.TYPE_REG
1743 p.From.Reg = ppc64.REG_VS32
1744 p.To.Type = obj.TYPE_MEM
1746 p.To.Offset = offset + 32
1748 p = s.Prog(ppc64.ASTXV)
1749 p.From.Type = obj.TYPE_REG
1750 p.From.Reg = ppc64.REG_VS33
1751 p.To.Type = obj.TYPE_MEM
1753 p.To.Offset = offset + 48
1755 // increment the src reg for next iteration
1756 p = s.Prog(ppc64.AADD)
1758 p.From.Type = obj.TYPE_CONST
1759 p.From.Offset = bytesPerLoop
1760 p.To.Type = obj.TYPE_REG
1763 // increment the dst reg for next iteration
1764 p = s.Prog(ppc64.AADD)
1766 p.From.Type = obj.TYPE_CONST
1767 p.From.Offset = bytesPerLoop
1768 p.To.Type = obj.TYPE_REG
1771 // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1773 p = s.Prog(ppc64.ABC)
1774 p.From.Type = obj.TYPE_CONST
1775 p.From.Offset = ppc64.BO_BCTR
1776 p.Reg = ppc64.REG_R0
1777 p.To.Type = obj.TYPE_BRANCH
1780 // srcReg and dstReg were incremented in the loop, so
1781 // later instructions start with offset 0.
1785 // No loop was generated for one iteration, so
1786 // add 32 bytes to the remainder to move those bytes.
1791 p := s.Prog(ppc64.ALXV)
1792 p.From.Type = obj.TYPE_MEM
1794 p.To.Type = obj.TYPE_REG
1795 p.To.Reg = ppc64.REG_VS32
1797 p = s.Prog(ppc64.ALXV)
1798 p.From.Type = obj.TYPE_MEM
1801 p.To.Type = obj.TYPE_REG
1802 p.To.Reg = ppc64.REG_VS33
1804 p = s.Prog(ppc64.ASTXV)
1805 p.From.Type = obj.TYPE_REG
1806 p.From.Reg = ppc64.REG_VS32
1807 p.To.Type = obj.TYPE_MEM
1810 p = s.Prog(ppc64.ASTXV)
1811 p.From.Type = obj.TYPE_REG
1812 p.From.Reg = ppc64.REG_VS33
1813 p.To.Type = obj.TYPE_MEM
1822 // Generate 16 byte loads and stores.
1823 p := s.Prog(ppc64.ALXV)
1824 p.From.Type = obj.TYPE_MEM
1826 p.From.Offset = offset
1827 p.To.Type = obj.TYPE_REG
1828 p.To.Reg = ppc64.REG_VS32
1830 p = s.Prog(ppc64.ASTXV)
1831 p.From.Type = obj.TYPE_REG
1832 p.From.Reg = ppc64.REG_VS32
1833 p.To.Type = obj.TYPE_MEM
1835 p.To.Offset = offset
1841 p := s.Prog(ppc64.ALXV)
1842 p.From.Type = obj.TYPE_MEM
1844 p.From.Offset = offset
1845 p.To.Type = obj.TYPE_REG
1846 p.To.Reg = ppc64.REG_VS32
1848 p = s.Prog(ppc64.ASTXV)
1849 p.From.Type = obj.TYPE_REG
1850 p.From.Reg = ppc64.REG_VS32
1851 p.To.Type = obj.TYPE_MEM
1853 p.To.Offset = offset
1859 // Generate all the remaining load and store pairs, starting with
1860 // as many 8 byte moves as possible, then 4, 2, 1.
1862 op, size := ppc64.AMOVB, int64(1)
1865 op, size = ppc64.AMOVD, 8
1867 op, size = ppc64.AMOVWZ, 4
1869 op, size = ppc64.AMOVH, 2
1873 p.To.Type = obj.TYPE_REG
1874 p.To.Reg = ppc64.REGTMP
1875 p.From.Type = obj.TYPE_MEM
1877 p.From.Offset = offset
1881 p.From.Type = obj.TYPE_REG
1882 p.From.Reg = ppc64.REGTMP
1883 p.To.Type = obj.TYPE_MEM
1885 p.To.Offset = offset
1890 case ssa.OpPPC64CALLstatic:
1893 case ssa.OpPPC64CALLtail:
1896 case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter:
1897 p := s.Prog(ppc64.AMOVD)
1898 p.From.Type = obj.TYPE_REG
1899 p.From.Reg = v.Args[0].Reg()
1900 p.To.Type = obj.TYPE_REG
1901 p.To.Reg = ppc64.REG_LR
1903 if v.Args[0].Reg() != ppc64.REG_R12 {
1904 v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg)
1908 pp.To.Reg = ppc64.REG_LR
1910 // Insert a hint this is not a subroutine return.
1913 if base.Ctxt.Flag_shared {
1914 // When compiling Go into PIC, the function we just
1915 // called via pointer might have been implemented in
1916 // a separate module and so overwritten the TOC
1917 // pointer in R2; reload it.
1918 q := s.Prog(ppc64.AMOVD)
1919 q.From.Type = obj.TYPE_MEM
1921 q.From.Reg = ppc64.REGSP
1922 q.To.Type = obj.TYPE_REG
1923 q.To.Reg = ppc64.REG_R2
1926 case ssa.OpPPC64LoweredWB:
1927 p := s.Prog(obj.ACALL)
1928 p.To.Type = obj.TYPE_MEM
1929 p.To.Name = obj.NAME_EXTERN
1930 p.To.Sym = v.Aux.(*obj.LSym)
1932 case ssa.OpPPC64LoweredPanicBoundsA, ssa.OpPPC64LoweredPanicBoundsB, ssa.OpPPC64LoweredPanicBoundsC:
1933 p := s.Prog(obj.ACALL)
1934 p.To.Type = obj.TYPE_MEM
1935 p.To.Name = obj.NAME_EXTERN
1936 p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt]
1937 s.UseArgs(16) // space used in callee args area by assembly stubs
1939 case ssa.OpPPC64LoweredNilCheck:
1940 if buildcfg.GOOS == "aix" {
1944 // NOP (so the BNE has somewhere to land)
1947 p := s.Prog(ppc64.ACMP)
1948 p.From.Type = obj.TYPE_REG
1949 p.From.Reg = v.Args[0].Reg()
1950 p.To.Type = obj.TYPE_REG
1951 p.To.Reg = ppc64.REG_R0
1954 p2 := s.Prog(ppc64.ABNE)
1955 p2.To.Type = obj.TYPE_BRANCH
1958 // Write at 0 is forbidden and will trigger a SIGSEGV
1959 p = s.Prog(ppc64.AMOVW)
1960 p.From.Type = obj.TYPE_REG
1961 p.From.Reg = ppc64.REG_R0
1962 p.To.Type = obj.TYPE_MEM
1963 p.To.Reg = ppc64.REG_R0
1965 // NOP (so the BNE has somewhere to land)
1966 nop := s.Prog(obj.ANOP)
1967 p2.To.SetTarget(nop)
1970 // Issue a load which will fault if arg is nil.
1971 p := s.Prog(ppc64.AMOVBZ)
1972 p.From.Type = obj.TYPE_MEM
1973 p.From.Reg = v.Args[0].Reg()
1974 ssagen.AddAux(&p.From, v)
1975 p.To.Type = obj.TYPE_REG
1976 p.To.Reg = ppc64.REGTMP
1978 if logopt.Enabled() {
1979 logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
1981 if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
1982 base.WarnfAt(v.Pos, "generated nil check")
1985 // These should be resolved by rules and not make it here.
1986 case ssa.OpPPC64Equal, ssa.OpPPC64NotEqual, ssa.OpPPC64LessThan, ssa.OpPPC64FLessThan,
1987 ssa.OpPPC64LessEqual, ssa.OpPPC64GreaterThan, ssa.OpPPC64FGreaterThan, ssa.OpPPC64GreaterEqual,
1988 ssa.OpPPC64FLessEqual, ssa.OpPPC64FGreaterEqual:
1989 v.Fatalf("Pseudo-op should not make it to codegen: %s ###\n", v.LongString())
1990 case ssa.OpPPC64InvertFlags:
1991 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
1992 case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT:
1993 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
1994 case ssa.OpClobber, ssa.OpClobberReg:
1995 // TODO: implement for clobberdead experiment. Nop is ok for now.
1997 v.Fatalf("genValue not implemented: %s", v.LongString())
2001 var blockJump = [...]struct {
2003 asmeq, invasmun bool
2005 ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false},
2006 ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false},
2008 ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false},
2009 ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false},
2010 ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false},
2011 ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false},
2013 // TODO: need to work FP comparisons into block jumps
2014 ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false},
2015 ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN
2016 ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN
2017 ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false},
2020 func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
2022 case ssa.BlockDefer:
2023 // defer returns in R3:
2024 // 0 if we should continue executing
2025 // 1 if we should jump to deferreturn call
2026 p := s.Prog(ppc64.ACMP)
2027 p.From.Type = obj.TYPE_REG
2028 p.From.Reg = ppc64.REG_R3
2029 p.To.Type = obj.TYPE_REG
2030 p.To.Reg = ppc64.REG_R0
2032 p = s.Prog(ppc64.ABNE)
2033 p.To.Type = obj.TYPE_BRANCH
2034 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[1].Block()})
2035 if b.Succs[0].Block() != next {
2036 p := s.Prog(obj.AJMP)
2037 p.To.Type = obj.TYPE_BRANCH
2038 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
2041 case ssa.BlockPlain:
2042 if b.Succs[0].Block() != next {
2043 p := s.Prog(obj.AJMP)
2044 p.To.Type = obj.TYPE_BRANCH
2045 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
2047 case ssa.BlockExit, ssa.BlockRetJmp:
2051 case ssa.BlockPPC64EQ, ssa.BlockPPC64NE,
2052 ssa.BlockPPC64LT, ssa.BlockPPC64GE,
2053 ssa.BlockPPC64LE, ssa.BlockPPC64GT,
2054 ssa.BlockPPC64FLT, ssa.BlockPPC64FGE,
2055 ssa.BlockPPC64FLE, ssa.BlockPPC64FGT:
2056 jmp := blockJump[b.Kind]
2058 case b.Succs[0].Block():
2059 s.Br(jmp.invasm, b.Succs[1].Block())
2061 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
2062 s.Br(ppc64.ABVS, b.Succs[1].Block())
2064 case b.Succs[1].Block():
2065 s.Br(jmp.asm, b.Succs[0].Block())
2067 s.Br(ppc64.ABEQ, b.Succs[0].Block())
2070 if b.Likely != ssa.BranchUnlikely {
2071 s.Br(jmp.asm, b.Succs[0].Block())
2073 s.Br(ppc64.ABEQ, b.Succs[0].Block())
2075 s.Br(obj.AJMP, b.Succs[1].Block())
2077 s.Br(jmp.invasm, b.Succs[1].Block())
2079 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
2080 s.Br(ppc64.ABVS, b.Succs[1].Block())
2082 s.Br(obj.AJMP, b.Succs[0].Block())
2086 b.Fatalf("branch not implemented: %s", b.LongString())
2090 func loadRegResult(s *ssagen.State, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
2091 p := s.Prog(loadByType(t))
2092 p.From.Type = obj.TYPE_MEM
2093 p.From.Name = obj.NAME_AUTO
2094 p.From.Sym = n.Linksym()
2095 p.From.Offset = n.FrameOffset() + off
2096 p.To.Type = obj.TYPE_REG
2101 func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
2102 p = pp.Append(p, storeByType(t), obj.TYPE_REG, reg, 0, obj.TYPE_MEM, 0, n.FrameOffset()+off)
2103 p.To.Name = obj.NAME_PARAM
2104 p.To.Sym = n.Linksym()
2105 p.Pos = p.Pos.WithNotStmt()