1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
8 "cmd/compile/internal/base"
9 "cmd/compile/internal/ir"
10 "cmd/compile/internal/logopt"
11 "cmd/compile/internal/objw"
12 "cmd/compile/internal/ssa"
13 "cmd/compile/internal/ssagen"
14 "cmd/compile/internal/types"
16 "cmd/internal/obj/ppc64"
22 // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
23 func ssaMarkMoves(s *ssagen.State, b *ssa.Block) {
24 // flive := b.FlagsLiveAtEnd
25 // if b.Control != nil && b.Control.Type.IsFlags() {
28 // for i := len(b.Values) - 1; i >= 0; i-- {
30 // if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) {
31 // // The "mark" is any non-nil Aux value.
34 // if v.Type.IsFlags() {
37 // for _, a := range v.Args {
38 // if a.Type.IsFlags() {
45 // loadByType returns the load instruction of the given type.
46 func loadByType(t *types.Type) obj.As {
78 panic("bad load type")
81 // storeByType returns the store instruction of the given type.
82 func storeByType(t *types.Type) obj.As {
102 panic("bad store type")
105 func ssaGenValue(s *ssagen.State, v *ssa.Value) {
128 case ssa.OpPPC64LoweredAtomicAnd8,
129 ssa.OpPPC64LoweredAtomicAnd32,
130 ssa.OpPPC64LoweredAtomicOr8,
131 ssa.OpPPC64LoweredAtomicOr32:
133 // LBAR/LWAR (Rarg0), Rtmp
134 // AND/OR Rarg1, Rtmp
135 // STBCCC/STWCCC Rtmp, (Rarg0)
139 if v.Op == ssa.OpPPC64LoweredAtomicAnd32 || v.Op == ssa.OpPPC64LoweredAtomicOr32 {
143 r0 := v.Args[0].Reg()
144 r1 := v.Args[1].Reg()
145 // LWSYNC - Assuming shared data not write-through-required nor
146 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
147 plwsync := s.Prog(ppc64.ALWSYNC)
148 plwsync.To.Type = obj.TYPE_NONE
151 p.From.Type = obj.TYPE_MEM
153 p.To.Type = obj.TYPE_REG
154 p.To.Reg = ppc64.REGTMP
156 p1 := s.Prog(v.Op.Asm())
157 p1.From.Type = obj.TYPE_REG
159 p1.To.Type = obj.TYPE_REG
160 p1.To.Reg = ppc64.REGTMP
163 p2.From.Type = obj.TYPE_REG
164 p2.From.Reg = ppc64.REGTMP
165 p2.To.Type = obj.TYPE_MEM
167 p2.RegTo2 = ppc64.REGTMP
169 p3 := s.Prog(ppc64.ABNE)
170 p3.To.Type = obj.TYPE_BRANCH
173 case ssa.OpPPC64LoweredAtomicAdd32,
174 ssa.OpPPC64LoweredAtomicAdd64:
176 // LDAR/LWAR (Rarg0), Rout
178 // STDCCC/STWCCC Rout, (Rarg0)
180 // MOVW Rout,Rout (if Add32)
183 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
187 r0 := v.Args[0].Reg()
188 r1 := v.Args[1].Reg()
190 // LWSYNC - Assuming shared data not write-through-required nor
191 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
192 plwsync := s.Prog(ppc64.ALWSYNC)
193 plwsync.To.Type = obj.TYPE_NONE
196 p.From.Type = obj.TYPE_MEM
198 p.To.Type = obj.TYPE_REG
201 p1 := s.Prog(ppc64.AADD)
202 p1.From.Type = obj.TYPE_REG
205 p1.To.Type = obj.TYPE_REG
208 p3.From.Type = obj.TYPE_REG
210 p3.To.Type = obj.TYPE_MEM
213 p4 := s.Prog(ppc64.ABNE)
214 p4.To.Type = obj.TYPE_BRANCH
217 // Ensure a 32 bit result
218 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
219 p5 := s.Prog(ppc64.AMOVWZ)
220 p5.To.Type = obj.TYPE_REG
222 p5.From.Type = obj.TYPE_REG
226 case ssa.OpPPC64LoweredAtomicExchange32,
227 ssa.OpPPC64LoweredAtomicExchange64:
229 // LDAR/LWAR (Rarg0), Rout
230 // STDCCC/STWCCC Rout, (Rarg0)
235 if v.Op == ssa.OpPPC64LoweredAtomicExchange32 {
239 r0 := v.Args[0].Reg()
240 r1 := v.Args[1].Reg()
242 // LWSYNC - Assuming shared data not write-through-required nor
243 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
244 plwsync := s.Prog(ppc64.ALWSYNC)
245 plwsync.To.Type = obj.TYPE_NONE
248 p.From.Type = obj.TYPE_MEM
250 p.To.Type = obj.TYPE_REG
254 p1.From.Type = obj.TYPE_REG
256 p1.To.Type = obj.TYPE_MEM
259 p2 := s.Prog(ppc64.ABNE)
260 p2.To.Type = obj.TYPE_BRANCH
263 pisync := s.Prog(ppc64.AISYNC)
264 pisync.To.Type = obj.TYPE_NONE
266 case ssa.OpPPC64LoweredAtomicLoad8,
267 ssa.OpPPC64LoweredAtomicLoad32,
268 ssa.OpPPC64LoweredAtomicLoad64,
269 ssa.OpPPC64LoweredAtomicLoadPtr:
271 // MOVB/MOVD/MOVW (Rarg0), Rout
278 case ssa.OpPPC64LoweredAtomicLoad8:
280 case ssa.OpPPC64LoweredAtomicLoad32:
284 arg0 := v.Args[0].Reg()
286 // SYNC when AuxInt == 1; otherwise, load-acquire
288 psync := s.Prog(ppc64.ASYNC)
289 psync.To.Type = obj.TYPE_NONE
293 p.From.Type = obj.TYPE_MEM
295 p.To.Type = obj.TYPE_REG
299 p1.From.Type = obj.TYPE_REG
301 p1.To.Type = obj.TYPE_REG
304 p2 := s.Prog(ppc64.ABNE)
305 p2.To.Type = obj.TYPE_BRANCH
307 pisync := s.Prog(ppc64.AISYNC)
308 pisync.To.Type = obj.TYPE_NONE
309 p2.To.SetTarget(pisync)
311 case ssa.OpPPC64LoweredAtomicStore8,
312 ssa.OpPPC64LoweredAtomicStore32,
313 ssa.OpPPC64LoweredAtomicStore64:
315 // MOVB/MOVW/MOVD arg1,(arg0)
318 case ssa.OpPPC64LoweredAtomicStore8:
320 case ssa.OpPPC64LoweredAtomicStore32:
323 arg0 := v.Args[0].Reg()
324 arg1 := v.Args[1].Reg()
325 // If AuxInt == 0, LWSYNC (Store-Release), else SYNC
327 syncOp := ppc64.ASYNC
329 syncOp = ppc64.ALWSYNC
331 psync := s.Prog(syncOp)
332 psync.To.Type = obj.TYPE_NONE
335 p.To.Type = obj.TYPE_MEM
337 p.From.Type = obj.TYPE_REG
340 case ssa.OpPPC64LoweredAtomicCas64,
341 ssa.OpPPC64LoweredAtomicCas32:
345 // LDAR (Rarg0), MutexHint, Rtmp
348 // STDCCC Rarg2, (Rarg0)
350 // LWSYNC // Only for sequential consistency; not required in CasRel.
356 if v.Op == ssa.OpPPC64LoweredAtomicCas32 {
361 r0 := v.Args[0].Reg()
362 r1 := v.Args[1].Reg()
363 r2 := v.Args[2].Reg()
365 // Initialize return value to false
366 p := s.Prog(ppc64.AMOVD)
367 p.From.Type = obj.TYPE_CONST
369 p.To.Type = obj.TYPE_REG
371 // LWSYNC - Assuming shared data not write-through-required nor
372 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
373 plwsync1 := s.Prog(ppc64.ALWSYNC)
374 plwsync1.To.Type = obj.TYPE_NONE
377 p0.From.Type = obj.TYPE_MEM
379 p0.To.Type = obj.TYPE_REG
380 p0.To.Reg = ppc64.REGTMP
381 // If it is a Compare-and-Swap-Release operation, set the EH field with
388 p1.From.Type = obj.TYPE_REG
390 p1.To.Reg = ppc64.REGTMP
391 p1.To.Type = obj.TYPE_REG
392 // BNE done with return value = false
393 p2 := s.Prog(ppc64.ABNE)
394 p2.To.Type = obj.TYPE_BRANCH
397 p3.From.Type = obj.TYPE_REG
399 p3.To.Type = obj.TYPE_MEM
402 p4 := s.Prog(ppc64.ABNE)
403 p4.To.Type = obj.TYPE_BRANCH
405 // LWSYNC - Assuming shared data not write-through-required nor
406 // caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b.
407 // If the operation is a CAS-Release, then synchronization is not necessary.
409 plwsync2 := s.Prog(ppc64.ALWSYNC)
410 plwsync2.To.Type = obj.TYPE_NONE
413 p5 := s.Prog(ppc64.AMOVD)
414 p5.From.Type = obj.TYPE_CONST
416 p5.To.Type = obj.TYPE_REG
419 p6 := s.Prog(obj.ANOP)
422 case ssa.OpPPC64LoweredPubBarrier:
426 case ssa.OpPPC64LoweredGetClosurePtr:
427 // Closure pointer is R11 (already)
428 ssagen.CheckLoweredGetClosurePtr(v)
430 case ssa.OpPPC64LoweredGetCallerSP:
431 // caller's SP is FixedFrameSize below the address of the first arg
432 p := s.Prog(ppc64.AMOVD)
433 p.From.Type = obj.TYPE_ADDR
434 p.From.Offset = -base.Ctxt.Arch.FixedFrameSize
435 p.From.Name = obj.NAME_PARAM
436 p.To.Type = obj.TYPE_REG
439 case ssa.OpPPC64LoweredGetCallerPC:
440 p := s.Prog(obj.AGETCALLERPC)
441 p.To.Type = obj.TYPE_REG
444 case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F:
445 // input is already rounded
448 loadOp := loadByType(v.Type)
450 ssagen.AddrAuto(&p.From, v.Args[0])
451 p.To.Type = obj.TYPE_REG
455 storeOp := storeByType(v.Type)
457 p.From.Type = obj.TYPE_REG
458 p.From.Reg = v.Args[0].Reg()
459 ssagen.AddrAuto(&p.To, v)
461 case ssa.OpArgIntReg, ssa.OpArgFloatReg:
462 // The assembler needs to wrap the entry safepoint/stack growth code with spill/unspill
463 // The loop only runs once.
464 for _, a := range v.Block.Func.RegArgs {
465 // Pass the spill/unspill information along to the assembler, offset by size of
466 // the saved LR slot.
467 addr := ssagen.SpillSlotAddr(a, ppc64.REGSP, base.Ctxt.Arch.FixedFrameSize)
468 s.FuncInfo().AddSpill(
469 obj.RegSpill{Reg: a.Reg, Addr: addr, Unspill: loadByType(a.Type), Spill: storeByType(a.Type)})
471 v.Block.Func.RegArgs = nil
473 ssagen.CheckArgReg(v)
475 case ssa.OpPPC64DIVD:
485 r0 := v.Args[0].Reg()
486 r1 := v.Args[1].Reg()
488 p := s.Prog(ppc64.ACMP)
489 p.From.Type = obj.TYPE_REG
491 p.To.Type = obj.TYPE_CONST
494 pbahead := s.Prog(ppc64.ABEQ)
495 pbahead.To.Type = obj.TYPE_BRANCH
497 p = s.Prog(v.Op.Asm())
498 p.From.Type = obj.TYPE_REG
501 p.To.Type = obj.TYPE_REG
504 pbover := s.Prog(obj.AJMP)
505 pbover.To.Type = obj.TYPE_BRANCH
507 p = s.Prog(ppc64.ANEG)
508 p.To.Type = obj.TYPE_REG
510 p.From.Type = obj.TYPE_REG
512 pbahead.To.SetTarget(p)
515 pbover.To.SetTarget(p)
517 case ssa.OpPPC64DIVW:
518 // word-width version of above
520 r0 := v.Args[0].Reg()
521 r1 := v.Args[1].Reg()
523 p := s.Prog(ppc64.ACMPW)
524 p.From.Type = obj.TYPE_REG
526 p.To.Type = obj.TYPE_CONST
529 pbahead := s.Prog(ppc64.ABEQ)
530 pbahead.To.Type = obj.TYPE_BRANCH
532 p = s.Prog(v.Op.Asm())
533 p.From.Type = obj.TYPE_REG
536 p.To.Type = obj.TYPE_REG
539 pbover := s.Prog(obj.AJMP)
540 pbover.To.Type = obj.TYPE_BRANCH
542 p = s.Prog(ppc64.ANEG)
543 p.To.Type = obj.TYPE_REG
545 p.From.Type = obj.TYPE_REG
547 pbahead.To.SetTarget(p)
550 pbover.To.SetTarget(p)
552 case ssa.OpPPC64CLRLSLWI:
554 r1 := v.Args[0].Reg()
556 p := s.Prog(v.Op.Asm())
557 // clrlslwi ra,rs,mb,sh will become rlwinm ra,rs,sh,mb-sh,31-sh as described in ISA
558 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
559 p.SetFrom3Const(ssa.GetPPC64Shiftsh(shifts))
561 p.To.Type = obj.TYPE_REG
564 case ssa.OpPPC64CLRLSLDI:
566 r1 := v.Args[0].Reg()
568 p := s.Prog(v.Op.Asm())
569 // clrlsldi ra,rs,mb,sh will become rldic ra,rs,sh,mb-sh
570 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
571 p.SetFrom3Const(ssa.GetPPC64Shiftsh(shifts))
573 p.To.Type = obj.TYPE_REG
576 // Mask has been set as sh
577 case ssa.OpPPC64RLDICL:
579 r1 := v.Args[0].Reg()
581 p := s.Prog(v.Op.Asm())
582 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)}
583 p.SetFrom3Const(ssa.GetPPC64Shiftmb(shifts))
585 p.To.Type = obj.TYPE_REG
588 case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS,
589 ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU,
590 ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW,
591 ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
592 ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
593 ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
594 ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV,
595 ssa.OpPPC64MODUD, ssa.OpPPC64MODSD, ssa.OpPPC64MODUW, ssa.OpPPC64MODSW:
597 r1 := v.Args[0].Reg()
598 r2 := v.Args[1].Reg()
599 p := s.Prog(v.Op.Asm())
600 p.From.Type = obj.TYPE_REG
603 p.To.Type = obj.TYPE_REG
606 case ssa.OpPPC64ANDCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC:
607 r1 := v.Args[0].Reg()
608 r2 := v.Args[1].Reg()
609 p := s.Prog(v.Op.Asm())
610 p.From.Type = obj.TYPE_REG
613 p.To.Type = obj.TYPE_REG
616 case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
617 p := s.Prog(v.Op.Asm())
618 p.From.Type = obj.TYPE_CONST
619 p.From.Offset = v.AuxInt
620 p.Reg = v.Args[0].Reg()
621 p.To.Type = obj.TYPE_REG
624 // Auxint holds encoded rotate + mask
625 case ssa.OpPPC64RLWINM, ssa.OpPPC64RLWMI:
626 rot, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
627 p := s.Prog(v.Op.Asm())
628 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
629 p.Reg = v.Args[0].Reg()
630 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: int64(rot)}
631 p.SetRestArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}})
634 case ssa.OpPPC64RLWNM:
635 _, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
636 p := s.Prog(v.Op.Asm())
637 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
638 p.Reg = v.Args[0].Reg()
639 p.From = obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[1].Reg()}
640 p.SetRestArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}})
642 case ssa.OpPPC64MADDLD:
644 r1 := v.Args[0].Reg()
645 r2 := v.Args[1].Reg()
646 r3 := v.Args[2].Reg()
648 p := s.Prog(v.Op.Asm())
649 p.From.Type = obj.TYPE_REG
653 p.To.Type = obj.TYPE_REG
656 case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
658 r1 := v.Args[0].Reg()
659 r2 := v.Args[1].Reg()
660 r3 := v.Args[2].Reg()
662 p := s.Prog(v.Op.Asm())
663 p.From.Type = obj.TYPE_REG
667 p.To.Type = obj.TYPE_REG
670 case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL,
671 ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW,
672 ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS,
673 ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD:
675 p := s.Prog(v.Op.Asm())
676 p.To.Type = obj.TYPE_REG
678 p.From.Type = obj.TYPE_REG
679 p.From.Reg = v.Args[0].Reg()
681 case ssa.OpPPC64ADDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst,
682 ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst,
683 ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst, ssa.OpPPC64EXTSWSLconst, ssa.OpPPC64MULLWconst, ssa.OpPPC64MULLDconst:
684 p := s.Prog(v.Op.Asm())
685 p.Reg = v.Args[0].Reg()
686 p.From.Type = obj.TYPE_CONST
687 p.From.Offset = v.AuxInt
688 p.To.Type = obj.TYPE_REG
691 case ssa.OpPPC64ADDC, ssa.OpPPC64ADDE, ssa.OpPPC64SUBC, ssa.OpPPC64SUBE:
692 r := v.Reg0() // CA is the first, implied argument.
693 r1 := v.Args[0].Reg()
694 r2 := v.Args[1].Reg()
695 p := s.Prog(v.Op.Asm())
696 p.From.Type = obj.TYPE_REG
699 p.To.Type = obj.TYPE_REG
702 case ssa.OpPPC64ADDZEzero, ssa.OpPPC64SUBZEzero:
703 p := s.Prog(v.Op.Asm())
704 p.From.Type = obj.TYPE_REG
705 p.From.Reg = ppc64.REG_R0
706 p.To.Type = obj.TYPE_REG
709 case ssa.OpPPC64ADDCconst:
710 p := s.Prog(v.Op.Asm())
711 p.Reg = v.Args[0].Reg()
712 p.From.Type = obj.TYPE_CONST
713 p.From.Offset = v.AuxInt
714 p.To.Type = obj.TYPE_REG
715 // Output is a pair, the second is the CA, which is implied.
718 case ssa.OpPPC64SUBCconst:
719 p := s.Prog(v.Op.Asm())
720 p.SetFrom3Const(v.AuxInt)
721 p.From.Type = obj.TYPE_REG
722 p.From.Reg = v.Args[0].Reg()
723 p.To.Type = obj.TYPE_REG
726 case ssa.OpPPC64SUBFCconst:
727 p := s.Prog(v.Op.Asm())
728 p.SetFrom3Const(v.AuxInt)
729 p.From.Type = obj.TYPE_REG
730 p.From.Reg = v.Args[0].Reg()
731 p.To.Type = obj.TYPE_REG
734 case ssa.OpPPC64ANDCCconst:
735 p := s.Prog(v.Op.Asm())
736 p.Reg = v.Args[0].Reg()
737 p.From.Type = obj.TYPE_CONST
738 p.From.Offset = v.AuxInt
739 p.To.Type = obj.TYPE_REG
740 // p.To.Reg = ppc64.REGTMP // discard result
743 case ssa.OpPPC64MOVDaddr:
744 switch v.Aux.(type) {
746 v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux)
748 // If aux offset and aux int are both 0, and the same
749 // input and output regs are used, no instruction
750 // needs to be generated, since it would just be
752 if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() {
753 p := s.Prog(ppc64.AMOVD)
754 p.From.Type = obj.TYPE_ADDR
755 p.From.Reg = v.Args[0].Reg()
756 p.From.Offset = v.AuxInt
757 p.To.Type = obj.TYPE_REG
761 case *obj.LSym, ir.Node:
762 p := s.Prog(ppc64.AMOVD)
763 p.From.Type = obj.TYPE_ADDR
764 p.From.Reg = v.Args[0].Reg()
765 p.To.Type = obj.TYPE_REG
767 ssagen.AddAux(&p.From, v)
771 case ssa.OpPPC64MOVDconst:
772 p := s.Prog(v.Op.Asm())
773 p.From.Type = obj.TYPE_CONST
774 p.From.Offset = v.AuxInt
775 p.To.Type = obj.TYPE_REG
778 case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst:
779 p := s.Prog(v.Op.Asm())
780 p.From.Type = obj.TYPE_FCONST
781 p.From.Val = math.Float64frombits(uint64(v.AuxInt))
782 p.To.Type = obj.TYPE_REG
785 case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU:
786 p := s.Prog(v.Op.Asm())
787 p.From.Type = obj.TYPE_REG
788 p.From.Reg = v.Args[0].Reg()
789 p.To.Type = obj.TYPE_REG
790 p.To.Reg = v.Args[1].Reg()
792 case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst:
793 p := s.Prog(v.Op.Asm())
794 p.From.Type = obj.TYPE_REG
795 p.From.Reg = v.Args[0].Reg()
796 p.To.Type = obj.TYPE_CONST
797 p.To.Offset = v.AuxInt
799 case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg:
800 // Shift in register to required size
801 p := s.Prog(v.Op.Asm())
802 p.From.Type = obj.TYPE_REG
803 p.From.Reg = v.Args[0].Reg()
805 p.To.Type = obj.TYPE_REG
807 case ssa.OpPPC64MOVDload, ssa.OpPPC64MOVWload:
809 // MOVDload and MOVWload are DS form instructions that are restricted to
810 // offsets that are a multiple of 4. If the offset is not a multiple of 4,
811 // then the address of the symbol to be loaded is computed (base + offset)
812 // and used as the new base register and the offset field in the instruction
813 // can be set to zero.
815 // This same problem can happen with gostrings since the final offset is not
816 // known yet, but could be unaligned after the relocation is resolved.
817 // So gostrings are handled the same way.
819 // This allows the MOVDload and MOVWload to be generated in more cases and
820 // eliminates some offset and alignment checking in the rules file.
822 fromAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
823 ssagen.AddAux(&fromAddr, v)
827 switch fromAddr.Name {
828 case obj.NAME_EXTERN, obj.NAME_STATIC:
829 // Special case for a rule combines the bytes of gostring.
830 // The v alignment might seem OK, but we don't want to load it
831 // using an offset because relocation comes later.
832 genAddr = strings.HasPrefix(fromAddr.Sym.Name, "go:string") || v.Type.Alignment()%4 != 0 || fromAddr.Offset%4 != 0
834 genAddr = fromAddr.Offset%4 != 0
837 // Load full address into the temp register.
838 p := s.Prog(ppc64.AMOVD)
839 p.From.Type = obj.TYPE_ADDR
840 p.From.Reg = v.Args[0].Reg()
841 ssagen.AddAux(&p.From, v)
842 // Load target using temp as base register
843 // and offset zero. Setting NAME_NONE
844 // prevents any extra offsets from being
846 p.To.Type = obj.TYPE_REG
847 p.To.Reg = ppc64.REGTMP
848 fromAddr.Reg = ppc64.REGTMP
849 // Clear the offset field and other
850 // information that might be used
851 // by the assembler to add to the
852 // final offset value.
854 fromAddr.Name = obj.NAME_NONE
857 p := s.Prog(v.Op.Asm())
859 p.To.Type = obj.TYPE_REG
862 case ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
863 p := s.Prog(v.Op.Asm())
864 p.From.Type = obj.TYPE_MEM
865 p.From.Reg = v.Args[0].Reg()
866 ssagen.AddAux(&p.From, v)
867 p.To.Type = obj.TYPE_REG
870 case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload:
871 p := s.Prog(v.Op.Asm())
872 p.From.Type = obj.TYPE_MEM
873 p.From.Reg = v.Args[0].Reg()
874 p.To.Type = obj.TYPE_REG
877 case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore:
878 p := s.Prog(v.Op.Asm())
879 p.To.Type = obj.TYPE_MEM
880 p.To.Reg = v.Args[0].Reg()
881 p.From.Type = obj.TYPE_REG
882 p.From.Reg = v.Args[1].Reg()
884 case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx,
885 ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx,
886 ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx:
887 p := s.Prog(v.Op.Asm())
888 p.From.Type = obj.TYPE_MEM
889 p.From.Reg = v.Args[0].Reg()
890 p.From.Index = v.Args[1].Reg()
891 p.To.Type = obj.TYPE_REG
894 case ssa.OpPPC64DCBT:
895 p := s.Prog(v.Op.Asm())
896 p.From.Type = obj.TYPE_MEM
897 p.From.Reg = v.Args[0].Reg()
898 p.To.Type = obj.TYPE_CONST
899 p.To.Offset = v.AuxInt
901 case ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero:
902 p := s.Prog(v.Op.Asm())
903 p.From.Type = obj.TYPE_REG
904 p.From.Reg = ppc64.REGZERO
905 p.To.Type = obj.TYPE_MEM
906 p.To.Reg = v.Args[0].Reg()
907 ssagen.AddAux(&p.To, v)
909 case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVDstorezero:
911 // MOVDstore and MOVDstorezero become DS form instructions that are restricted
912 // to offset values that are a multiple of 4. If the offset field is not a
913 // multiple of 4, then the full address of the store target is computed (base +
914 // offset) and used as the new base register and the offset in the instruction
917 // This allows the MOVDstore and MOVDstorezero to be generated in more cases,
918 // and prevents checking of the offset value and alignment in the rules.
920 toAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
921 ssagen.AddAux(&toAddr, v)
923 if toAddr.Offset%4 != 0 {
924 p := s.Prog(ppc64.AMOVD)
925 p.From.Type = obj.TYPE_ADDR
926 p.From.Reg = v.Args[0].Reg()
927 ssagen.AddAux(&p.From, v)
928 p.To.Type = obj.TYPE_REG
929 p.To.Reg = ppc64.REGTMP
930 toAddr.Reg = ppc64.REGTMP
931 // Clear the offset field and other
932 // information that might be used
933 // by the assembler to add to the
934 // final offset value.
936 toAddr.Name = obj.NAME_NONE
939 p := s.Prog(v.Op.Asm())
941 p.From.Type = obj.TYPE_REG
942 if v.Op == ssa.OpPPC64MOVDstorezero {
943 p.From.Reg = ppc64.REGZERO
945 p.From.Reg = v.Args[1].Reg()
948 case ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore:
949 p := s.Prog(v.Op.Asm())
950 p.From.Type = obj.TYPE_REG
951 p.From.Reg = v.Args[1].Reg()
952 p.To.Type = obj.TYPE_MEM
953 p.To.Reg = v.Args[0].Reg()
954 ssagen.AddAux(&p.To, v)
956 case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx,
957 ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx,
958 ssa.OpPPC64MOVHBRstoreidx:
959 p := s.Prog(v.Op.Asm())
960 p.From.Type = obj.TYPE_REG
961 p.From.Reg = v.Args[2].Reg()
962 p.To.Index = v.Args[1].Reg()
963 p.To.Type = obj.TYPE_MEM
964 p.To.Reg = v.Args[0].Reg()
966 case ssa.OpPPC64ISEL, ssa.OpPPC64ISELB:
968 // AuxInt value indicates condition: 0=LT 1=GT 2=EQ 4=GE 5=LE 6=NE
969 // ISEL only accepts 0, 1, 2 condition values but the others can be
970 // achieved by swapping operand order.
971 // arg0 ? arg1 : arg2 with conditions LT, GT, EQ
972 // arg0 ? arg2 : arg1 for conditions GE, LE, NE
973 // ISELB is used when a boolean result is needed, returning 0 or 1
974 p := s.Prog(ppc64.AISEL)
975 p.To.Type = obj.TYPE_REG
977 // For ISELB, boolean result 0 or 1. Use R0 for 0 operand to avoid load.
978 r := obj.Addr{Type: obj.TYPE_REG, Reg: ppc64.REG_R0}
979 if v.Op == ssa.OpPPC64ISEL {
980 r.Reg = v.Args[1].Reg()
982 // AuxInt values 4,5,6 implemented with reverse operand order from 0,1,2
985 p.SetFrom3Reg(v.Args[0].Reg())
987 p.Reg = v.Args[0].Reg()
990 p.From.Type = obj.TYPE_CONST
991 p.From.Offset = v.AuxInt & 3
993 case ssa.OpPPC64LoweredQuadZero, ssa.OpPPC64LoweredQuadZeroShort:
994 // The LoweredQuad code generation
995 // generates STXV instructions on
996 // power9. The Short variation is used
997 // if no loop is generated.
999 // sizes >= 64 generate a loop as follows:
1001 // Set up loop counter in CTR, used by BC
1002 // XXLXOR clears VS32
1003 // XXLXOR VS32,VS32,VS32
1004 // MOVD len/64,REG_TMP
1008 // STXV VS32,16(R20)
1009 // STXV VS32,32(R20)
1010 // STXV VS32,48(R20)
1014 // Bytes per iteration
1015 ctr := v.AuxInt / 64
1018 rem := v.AuxInt % 64
1020 // Only generate a loop if there is more
1021 // than 1 iteration.
1023 // Set up VS32 (V0) to hold 0s
1024 p := s.Prog(ppc64.AXXLXOR)
1025 p.From.Type = obj.TYPE_REG
1026 p.From.Reg = ppc64.REG_VS32
1027 p.To.Type = obj.TYPE_REG
1028 p.To.Reg = ppc64.REG_VS32
1029 p.Reg = ppc64.REG_VS32
1031 // Set up CTR loop counter
1032 p = s.Prog(ppc64.AMOVD)
1033 p.From.Type = obj.TYPE_CONST
1035 p.To.Type = obj.TYPE_REG
1036 p.To.Reg = ppc64.REGTMP
1038 p = s.Prog(ppc64.AMOVD)
1039 p.From.Type = obj.TYPE_REG
1040 p.From.Reg = ppc64.REGTMP
1041 p.To.Type = obj.TYPE_REG
1042 p.To.Reg = ppc64.REG_CTR
1044 // Don't generate padding for
1045 // loops with few iterations.
1047 p = s.Prog(obj.APCALIGN)
1048 p.From.Type = obj.TYPE_CONST
1052 // generate 4 STXVs to zero 64 bytes
1055 p = s.Prog(ppc64.ASTXV)
1056 p.From.Type = obj.TYPE_REG
1057 p.From.Reg = ppc64.REG_VS32
1058 p.To.Type = obj.TYPE_MEM
1059 p.To.Reg = v.Args[0].Reg()
1061 // Save the top of loop
1065 p = s.Prog(ppc64.ASTXV)
1066 p.From.Type = obj.TYPE_REG
1067 p.From.Reg = ppc64.REG_VS32
1068 p.To.Type = obj.TYPE_MEM
1069 p.To.Reg = v.Args[0].Reg()
1072 p = s.Prog(ppc64.ASTXV)
1073 p.From.Type = obj.TYPE_REG
1074 p.From.Reg = ppc64.REG_VS32
1075 p.To.Type = obj.TYPE_MEM
1076 p.To.Reg = v.Args[0].Reg()
1079 p = s.Prog(ppc64.ASTXV)
1080 p.From.Type = obj.TYPE_REG
1081 p.From.Reg = ppc64.REG_VS32
1082 p.To.Type = obj.TYPE_MEM
1083 p.To.Reg = v.Args[0].Reg()
1086 // Increment address for the
1087 // 64 bytes just zeroed.
1088 p = s.Prog(ppc64.AADD)
1089 p.Reg = v.Args[0].Reg()
1090 p.From.Type = obj.TYPE_CONST
1092 p.To.Type = obj.TYPE_REG
1093 p.To.Reg = v.Args[0].Reg()
1095 // Branch back to top of loop
1097 // BC with BO_BCTR generates bdnz
1098 p = s.Prog(ppc64.ABC)
1099 p.From.Type = obj.TYPE_CONST
1100 p.From.Offset = ppc64.BO_BCTR
1101 p.Reg = ppc64.REG_CR0LT
1102 p.To.Type = obj.TYPE_BRANCH
1105 // When ctr == 1 the loop was not generated but
1106 // there are at least 64 bytes to clear, so add
1107 // that to the remainder to generate the code
1108 // to clear those doublewords
1113 // Clear the remainder starting at offset zero
1116 if rem >= 16 && ctr <= 1 {
1117 // If the XXLXOR hasn't already been
1118 // generated, do it here to initialize
1120 p := s.Prog(ppc64.AXXLXOR)
1121 p.From.Type = obj.TYPE_REG
1122 p.From.Reg = ppc64.REG_VS32
1123 p.To.Type = obj.TYPE_REG
1124 p.To.Reg = ppc64.REG_VS32
1125 p.Reg = ppc64.REG_VS32
1127 // Generate STXV for 32 or 64
1130 p := s.Prog(ppc64.ASTXV)
1131 p.From.Type = obj.TYPE_REG
1132 p.From.Reg = ppc64.REG_VS32
1133 p.To.Type = obj.TYPE_MEM
1134 p.To.Reg = v.Args[0].Reg()
1135 p.To.Offset = offset
1137 p = s.Prog(ppc64.ASTXV)
1138 p.From.Type = obj.TYPE_REG
1139 p.From.Reg = ppc64.REG_VS32
1140 p.To.Type = obj.TYPE_MEM
1141 p.To.Reg = v.Args[0].Reg()
1142 p.To.Offset = offset + 16
1146 // Generate 16 bytes
1148 p := s.Prog(ppc64.ASTXV)
1149 p.From.Type = obj.TYPE_REG
1150 p.From.Reg = ppc64.REG_VS32
1151 p.To.Type = obj.TYPE_MEM
1152 p.To.Reg = v.Args[0].Reg()
1153 p.To.Offset = offset
1158 // first clear as many doublewords as possible
1159 // then clear remaining sizes as available
1161 op, size := ppc64.AMOVB, int64(1)
1164 op, size = ppc64.AMOVD, 8
1166 op, size = ppc64.AMOVW, 4
1168 op, size = ppc64.AMOVH, 2
1171 p.From.Type = obj.TYPE_REG
1172 p.From.Reg = ppc64.REG_R0
1173 p.To.Type = obj.TYPE_MEM
1174 p.To.Reg = v.Args[0].Reg()
1175 p.To.Offset = offset
1180 case ssa.OpPPC64LoweredZero, ssa.OpPPC64LoweredZeroShort:
1182 // Unaligned data doesn't hurt performance
1183 // for these instructions on power8.
1185 // For sizes >= 64 generate a loop as follows:
1187 // Set up loop counter in CTR, used by BC
1188 // XXLXOR VS32,VS32,VS32
1189 // MOVD len/32,REG_TMP
1193 // STXVD2X VS32,(R0)(R20)
1194 // STXVD2X VS32,(R31)(R20)
1198 // any remainder is done as described below
1200 // for sizes < 64 bytes, first clear as many doublewords as possible,
1201 // then handle the remainder
1206 // the remainder bytes are cleared using one or more
1207 // of the following instructions with the appropriate
1208 // offsets depending which instructions are needed
1210 // MOVW R0,n1(R20) 4 bytes
1211 // MOVH R0,n2(R20) 2 bytes
1212 // MOVB R0,n3(R20) 1 byte
1214 // 7 bytes: MOVW, MOVH, MOVB
1215 // 6 bytes: MOVW, MOVH
1216 // 5 bytes: MOVW, MOVB
1217 // 3 bytes: MOVH, MOVB
1219 // each loop iteration does 32 bytes
1220 ctr := v.AuxInt / 32
1223 rem := v.AuxInt % 32
1225 // only generate a loop if there is more
1226 // than 1 iteration.
1228 // Set up VS32 (V0) to hold 0s
1229 p := s.Prog(ppc64.AXXLXOR)
1230 p.From.Type = obj.TYPE_REG
1231 p.From.Reg = ppc64.REG_VS32
1232 p.To.Type = obj.TYPE_REG
1233 p.To.Reg = ppc64.REG_VS32
1234 p.Reg = ppc64.REG_VS32
1236 // Set up CTR loop counter
1237 p = s.Prog(ppc64.AMOVD)
1238 p.From.Type = obj.TYPE_CONST
1240 p.To.Type = obj.TYPE_REG
1241 p.To.Reg = ppc64.REGTMP
1243 p = s.Prog(ppc64.AMOVD)
1244 p.From.Type = obj.TYPE_REG
1245 p.From.Reg = ppc64.REGTMP
1246 p.To.Type = obj.TYPE_REG
1247 p.To.Reg = ppc64.REG_CTR
1249 // Set up R31 to hold index value 16
1250 p = s.Prog(ppc64.AMOVD)
1251 p.From.Type = obj.TYPE_CONST
1253 p.To.Type = obj.TYPE_REG
1254 p.To.Reg = ppc64.REGTMP
1256 // Don't add padding for alignment
1257 // with few loop iterations.
1259 p = s.Prog(obj.APCALIGN)
1260 p.From.Type = obj.TYPE_CONST
1264 // generate 2 STXVD2Xs to store 16 bytes
1265 // when this is a loop then the top must be saved
1267 // This is the top of loop
1269 p = s.Prog(ppc64.ASTXVD2X)
1270 p.From.Type = obj.TYPE_REG
1271 p.From.Reg = ppc64.REG_VS32
1272 p.To.Type = obj.TYPE_MEM
1273 p.To.Reg = v.Args[0].Reg()
1274 p.To.Index = ppc64.REGZERO
1275 // Save the top of loop
1279 p = s.Prog(ppc64.ASTXVD2X)
1280 p.From.Type = obj.TYPE_REG
1281 p.From.Reg = ppc64.REG_VS32
1282 p.To.Type = obj.TYPE_MEM
1283 p.To.Reg = v.Args[0].Reg()
1284 p.To.Index = ppc64.REGTMP
1286 // Increment address for the
1287 // 4 doublewords just zeroed.
1288 p = s.Prog(ppc64.AADD)
1289 p.Reg = v.Args[0].Reg()
1290 p.From.Type = obj.TYPE_CONST
1292 p.To.Type = obj.TYPE_REG
1293 p.To.Reg = v.Args[0].Reg()
1295 // Branch back to top of loop
1297 // BC with BO_BCTR generates bdnz
1298 p = s.Prog(ppc64.ABC)
1299 p.From.Type = obj.TYPE_CONST
1300 p.From.Offset = ppc64.BO_BCTR
1301 p.Reg = ppc64.REG_CR0LT
1302 p.To.Type = obj.TYPE_BRANCH
1306 // when ctr == 1 the loop was not generated but
1307 // there are at least 32 bytes to clear, so add
1308 // that to the remainder to generate the code
1309 // to clear those doublewords
1314 // clear the remainder starting at offset zero
1317 // first clear as many doublewords as possible
1318 // then clear remaining sizes as available
1320 op, size := ppc64.AMOVB, int64(1)
1323 op, size = ppc64.AMOVD, 8
1325 op, size = ppc64.AMOVW, 4
1327 op, size = ppc64.AMOVH, 2
1330 p.From.Type = obj.TYPE_REG
1331 p.From.Reg = ppc64.REG_R0
1332 p.To.Type = obj.TYPE_MEM
1333 p.To.Reg = v.Args[0].Reg()
1334 p.To.Offset = offset
1339 case ssa.OpPPC64LoweredMove, ssa.OpPPC64LoweredMoveShort:
1341 bytesPerLoop := int64(32)
1342 // This will be used when moving more
1343 // than 8 bytes. Moves start with
1344 // as many 8 byte moves as possible, then
1345 // 4, 2, or 1 byte(s) as remaining. This will
1346 // work and be efficient for power8 or later.
1347 // If there are 64 or more bytes, then a
1348 // loop is generated to move 32 bytes and
1349 // update the src and dst addresses on each
1350 // iteration. When < 64 bytes, the appropriate
1351 // number of moves are generated based on the
1353 // When moving >= 64 bytes a loop is used
1354 // MOVD len/32,REG_TMP
1358 // LXVD2X (R0)(R21),VS32
1359 // LXVD2X (R31)(R21),VS33
1361 // STXVD2X VS32,(R0)(R20)
1362 // STXVD2X VS33,(R31)(R20)
1365 // Bytes not moved by this loop are moved
1366 // with a combination of the following instructions,
1367 // starting with the largest sizes and generating as
1368 // many as needed, using the appropriate offset value.
1378 // Each loop iteration moves 32 bytes
1379 ctr := v.AuxInt / bytesPerLoop
1381 // Remainder after the loop
1382 rem := v.AuxInt % bytesPerLoop
1384 dstReg := v.Args[0].Reg()
1385 srcReg := v.Args[1].Reg()
1387 // The set of registers used here, must match the clobbered reg list
1393 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1396 p := s.Prog(ppc64.AMOVD)
1397 p.From.Type = obj.TYPE_CONST
1399 p.To.Type = obj.TYPE_REG
1400 p.To.Reg = ppc64.REGTMP
1402 p = s.Prog(ppc64.AMOVD)
1403 p.From.Type = obj.TYPE_REG
1404 p.From.Reg = ppc64.REGTMP
1405 p.To.Type = obj.TYPE_REG
1406 p.To.Reg = ppc64.REG_CTR
1408 // Use REGTMP as index reg
1409 p = s.Prog(ppc64.AMOVD)
1410 p.From.Type = obj.TYPE_CONST
1412 p.To.Type = obj.TYPE_REG
1413 p.To.Reg = ppc64.REGTMP
1415 // Don't adding padding for
1416 // alignment with small iteration
1419 p = s.Prog(obj.APCALIGN)
1420 p.From.Type = obj.TYPE_CONST
1424 // Generate 16 byte loads and stores.
1425 // Use temp register for index (16)
1426 // on the second one.
1428 p = s.Prog(ppc64.ALXVD2X)
1429 p.From.Type = obj.TYPE_MEM
1431 p.From.Index = ppc64.REGZERO
1432 p.To.Type = obj.TYPE_REG
1433 p.To.Reg = ppc64.REG_VS32
1437 p = s.Prog(ppc64.ALXVD2X)
1438 p.From.Type = obj.TYPE_MEM
1440 p.From.Index = ppc64.REGTMP
1441 p.To.Type = obj.TYPE_REG
1442 p.To.Reg = ppc64.REG_VS33
1444 // increment the src reg for next iteration
1445 p = s.Prog(ppc64.AADD)
1447 p.From.Type = obj.TYPE_CONST
1448 p.From.Offset = bytesPerLoop
1449 p.To.Type = obj.TYPE_REG
1452 // generate 16 byte stores
1453 p = s.Prog(ppc64.ASTXVD2X)
1454 p.From.Type = obj.TYPE_REG
1455 p.From.Reg = ppc64.REG_VS32
1456 p.To.Type = obj.TYPE_MEM
1458 p.To.Index = ppc64.REGZERO
1460 p = s.Prog(ppc64.ASTXVD2X)
1461 p.From.Type = obj.TYPE_REG
1462 p.From.Reg = ppc64.REG_VS33
1463 p.To.Type = obj.TYPE_MEM
1465 p.To.Index = ppc64.REGTMP
1467 // increment the dst reg for next iteration
1468 p = s.Prog(ppc64.AADD)
1470 p.From.Type = obj.TYPE_CONST
1471 p.From.Offset = bytesPerLoop
1472 p.To.Type = obj.TYPE_REG
1475 // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1477 p = s.Prog(ppc64.ABC)
1478 p.From.Type = obj.TYPE_CONST
1479 p.From.Offset = ppc64.BO_BCTR
1480 p.Reg = ppc64.REG_CR0LT
1481 p.To.Type = obj.TYPE_BRANCH
1484 // srcReg and dstReg were incremented in the loop, so
1485 // later instructions start with offset 0.
1489 // No loop was generated for one iteration, so
1490 // add 32 bytes to the remainder to move those bytes.
1496 // Generate 16 byte loads and stores.
1497 // Use temp register for index (value 16)
1498 // on the second one.
1499 p := s.Prog(ppc64.ALXVD2X)
1500 p.From.Type = obj.TYPE_MEM
1502 p.From.Index = ppc64.REGZERO
1503 p.To.Type = obj.TYPE_REG
1504 p.To.Reg = ppc64.REG_VS32
1506 p = s.Prog(ppc64.ASTXVD2X)
1507 p.From.Type = obj.TYPE_REG
1508 p.From.Reg = ppc64.REG_VS32
1509 p.To.Type = obj.TYPE_MEM
1511 p.To.Index = ppc64.REGZERO
1517 // Use REGTMP as index reg
1518 p := s.Prog(ppc64.AMOVD)
1519 p.From.Type = obj.TYPE_CONST
1521 p.To.Type = obj.TYPE_REG
1522 p.To.Reg = ppc64.REGTMP
1524 p = s.Prog(ppc64.ALXVD2X)
1525 p.From.Type = obj.TYPE_MEM
1527 p.From.Index = ppc64.REGTMP
1528 p.To.Type = obj.TYPE_REG
1529 p.To.Reg = ppc64.REG_VS32
1531 p = s.Prog(ppc64.ASTXVD2X)
1532 p.From.Type = obj.TYPE_REG
1533 p.From.Reg = ppc64.REG_VS32
1534 p.To.Type = obj.TYPE_MEM
1536 p.To.Index = ppc64.REGTMP
1543 // Generate all the remaining load and store pairs, starting with
1544 // as many 8 byte moves as possible, then 4, 2, 1.
1546 op, size := ppc64.AMOVB, int64(1)
1549 op, size = ppc64.AMOVD, 8
1551 op, size = ppc64.AMOVWZ, 4
1553 op, size = ppc64.AMOVH, 2
1557 p.To.Type = obj.TYPE_REG
1558 p.To.Reg = ppc64.REGTMP
1559 p.From.Type = obj.TYPE_MEM
1561 p.From.Offset = offset
1565 p.From.Type = obj.TYPE_REG
1566 p.From.Reg = ppc64.REGTMP
1567 p.To.Type = obj.TYPE_MEM
1569 p.To.Offset = offset
1574 case ssa.OpPPC64LoweredQuadMove, ssa.OpPPC64LoweredQuadMoveShort:
1575 bytesPerLoop := int64(64)
1576 // This is used when moving more
1577 // than 8 bytes on power9. Moves start with
1578 // as many 8 byte moves as possible, then
1579 // 4, 2, or 1 byte(s) as remaining. This will
1580 // work and be efficient for power8 or later.
1581 // If there are 64 or more bytes, then a
1582 // loop is generated to move 32 bytes and
1583 // update the src and dst addresses on each
1584 // iteration. When < 64 bytes, the appropriate
1585 // number of moves are generated based on the
1587 // When moving >= 64 bytes a loop is used
1588 // MOVD len/32,REG_TMP
1595 // STXV VS33,16(R20)
1598 // Bytes not moved by this loop are moved
1599 // with a combination of the following instructions,
1600 // starting with the largest sizes and generating as
1601 // many as needed, using the appropriate offset value.
1611 // Each loop iteration moves 32 bytes
1612 ctr := v.AuxInt / bytesPerLoop
1614 // Remainder after the loop
1615 rem := v.AuxInt % bytesPerLoop
1617 dstReg := v.Args[0].Reg()
1618 srcReg := v.Args[1].Reg()
1625 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1628 p := s.Prog(ppc64.AMOVD)
1629 p.From.Type = obj.TYPE_CONST
1631 p.To.Type = obj.TYPE_REG
1632 p.To.Reg = ppc64.REGTMP
1634 p = s.Prog(ppc64.AMOVD)
1635 p.From.Type = obj.TYPE_REG
1636 p.From.Reg = ppc64.REGTMP
1637 p.To.Type = obj.TYPE_REG
1638 p.To.Reg = ppc64.REG_CTR
1640 p = s.Prog(obj.APCALIGN)
1641 p.From.Type = obj.TYPE_CONST
1644 // Generate 16 byte loads and stores.
1645 p = s.Prog(ppc64.ALXV)
1646 p.From.Type = obj.TYPE_MEM
1648 p.From.Offset = offset
1649 p.To.Type = obj.TYPE_REG
1650 p.To.Reg = ppc64.REG_VS32
1654 p = s.Prog(ppc64.ALXV)
1655 p.From.Type = obj.TYPE_MEM
1657 p.From.Offset = offset + 16
1658 p.To.Type = obj.TYPE_REG
1659 p.To.Reg = ppc64.REG_VS33
1661 // generate 16 byte stores
1662 p = s.Prog(ppc64.ASTXV)
1663 p.From.Type = obj.TYPE_REG
1664 p.From.Reg = ppc64.REG_VS32
1665 p.To.Type = obj.TYPE_MEM
1667 p.To.Offset = offset
1669 p = s.Prog(ppc64.ASTXV)
1670 p.From.Type = obj.TYPE_REG
1671 p.From.Reg = ppc64.REG_VS33
1672 p.To.Type = obj.TYPE_MEM
1674 p.To.Offset = offset + 16
1676 // Generate 16 byte loads and stores.
1677 p = s.Prog(ppc64.ALXV)
1678 p.From.Type = obj.TYPE_MEM
1680 p.From.Offset = offset + 32
1681 p.To.Type = obj.TYPE_REG
1682 p.To.Reg = ppc64.REG_VS32
1684 p = s.Prog(ppc64.ALXV)
1685 p.From.Type = obj.TYPE_MEM
1687 p.From.Offset = offset + 48
1688 p.To.Type = obj.TYPE_REG
1689 p.To.Reg = ppc64.REG_VS33
1691 // generate 16 byte stores
1692 p = s.Prog(ppc64.ASTXV)
1693 p.From.Type = obj.TYPE_REG
1694 p.From.Reg = ppc64.REG_VS32
1695 p.To.Type = obj.TYPE_MEM
1697 p.To.Offset = offset + 32
1699 p = s.Prog(ppc64.ASTXV)
1700 p.From.Type = obj.TYPE_REG
1701 p.From.Reg = ppc64.REG_VS33
1702 p.To.Type = obj.TYPE_MEM
1704 p.To.Offset = offset + 48
1706 // increment the src reg for next iteration
1707 p = s.Prog(ppc64.AADD)
1709 p.From.Type = obj.TYPE_CONST
1710 p.From.Offset = bytesPerLoop
1711 p.To.Type = obj.TYPE_REG
1714 // increment the dst reg for next iteration
1715 p = s.Prog(ppc64.AADD)
1717 p.From.Type = obj.TYPE_CONST
1718 p.From.Offset = bytesPerLoop
1719 p.To.Type = obj.TYPE_REG
1722 // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1724 p = s.Prog(ppc64.ABC)
1725 p.From.Type = obj.TYPE_CONST
1726 p.From.Offset = ppc64.BO_BCTR
1727 p.Reg = ppc64.REG_CR0LT
1728 p.To.Type = obj.TYPE_BRANCH
1731 // srcReg and dstReg were incremented in the loop, so
1732 // later instructions start with offset 0.
1736 // No loop was generated for one iteration, so
1737 // add 32 bytes to the remainder to move those bytes.
1742 p := s.Prog(ppc64.ALXV)
1743 p.From.Type = obj.TYPE_MEM
1745 p.To.Type = obj.TYPE_REG
1746 p.To.Reg = ppc64.REG_VS32
1748 p = s.Prog(ppc64.ALXV)
1749 p.From.Type = obj.TYPE_MEM
1752 p.To.Type = obj.TYPE_REG
1753 p.To.Reg = ppc64.REG_VS33
1755 p = s.Prog(ppc64.ASTXV)
1756 p.From.Type = obj.TYPE_REG
1757 p.From.Reg = ppc64.REG_VS32
1758 p.To.Type = obj.TYPE_MEM
1761 p = s.Prog(ppc64.ASTXV)
1762 p.From.Type = obj.TYPE_REG
1763 p.From.Reg = ppc64.REG_VS33
1764 p.To.Type = obj.TYPE_MEM
1773 // Generate 16 byte loads and stores.
1774 p := s.Prog(ppc64.ALXV)
1775 p.From.Type = obj.TYPE_MEM
1777 p.From.Offset = offset
1778 p.To.Type = obj.TYPE_REG
1779 p.To.Reg = ppc64.REG_VS32
1781 p = s.Prog(ppc64.ASTXV)
1782 p.From.Type = obj.TYPE_REG
1783 p.From.Reg = ppc64.REG_VS32
1784 p.To.Type = obj.TYPE_MEM
1786 p.To.Offset = offset
1792 p := s.Prog(ppc64.ALXV)
1793 p.From.Type = obj.TYPE_MEM
1795 p.From.Offset = offset
1796 p.To.Type = obj.TYPE_REG
1797 p.To.Reg = ppc64.REG_VS32
1799 p = s.Prog(ppc64.ASTXV)
1800 p.From.Type = obj.TYPE_REG
1801 p.From.Reg = ppc64.REG_VS32
1802 p.To.Type = obj.TYPE_MEM
1804 p.To.Offset = offset
1810 // Generate all the remaining load and store pairs, starting with
1811 // as many 8 byte moves as possible, then 4, 2, 1.
1813 op, size := ppc64.AMOVB, int64(1)
1816 op, size = ppc64.AMOVD, 8
1818 op, size = ppc64.AMOVWZ, 4
1820 op, size = ppc64.AMOVH, 2
1824 p.To.Type = obj.TYPE_REG
1825 p.To.Reg = ppc64.REGTMP
1826 p.From.Type = obj.TYPE_MEM
1828 p.From.Offset = offset
1832 p.From.Type = obj.TYPE_REG
1833 p.From.Reg = ppc64.REGTMP
1834 p.To.Type = obj.TYPE_MEM
1836 p.To.Offset = offset
1841 case ssa.OpPPC64CALLstatic:
1844 case ssa.OpPPC64CALLtail:
1847 case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter:
1848 p := s.Prog(ppc64.AMOVD)
1849 p.From.Type = obj.TYPE_REG
1850 p.From.Reg = v.Args[0].Reg()
1851 p.To.Type = obj.TYPE_REG
1852 p.To.Reg = ppc64.REG_LR
1854 if v.Args[0].Reg() != ppc64.REG_R12 {
1855 v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg)
1860 // Convert the call into a blrl with hint this is not a subroutine return.
1861 // The full bclrl opcode must be specified when passing a hint.
1863 pp.From.Type = obj.TYPE_CONST
1864 pp.From.Offset = ppc64.BO_ALWAYS
1865 pp.Reg = ppc64.REG_CR0LT // The preferred value if BI is ignored.
1866 pp.To.Reg = ppc64.REG_LR
1869 if base.Ctxt.Flag_shared {
1870 // When compiling Go into PIC, the function we just
1871 // called via pointer might have been implemented in
1872 // a separate module and so overwritten the TOC
1873 // pointer in R2; reload it.
1874 q := s.Prog(ppc64.AMOVD)
1875 q.From.Type = obj.TYPE_MEM
1877 q.From.Reg = ppc64.REGSP
1878 q.To.Type = obj.TYPE_REG
1879 q.To.Reg = ppc64.REG_R2
1882 case ssa.OpPPC64LoweredWB:
1883 p := s.Prog(obj.ACALL)
1884 p.To.Type = obj.TYPE_MEM
1885 p.To.Name = obj.NAME_EXTERN
1886 p.To.Sym = v.Aux.(*obj.LSym)
1888 case ssa.OpPPC64LoweredPanicBoundsA, ssa.OpPPC64LoweredPanicBoundsB, ssa.OpPPC64LoweredPanicBoundsC:
1889 p := s.Prog(obj.ACALL)
1890 p.To.Type = obj.TYPE_MEM
1891 p.To.Name = obj.NAME_EXTERN
1892 p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt]
1893 s.UseArgs(16) // space used in callee args area by assembly stubs
1895 case ssa.OpPPC64LoweredNilCheck:
1896 if buildcfg.GOOS == "aix" {
1900 // NOP (so the BNE has somewhere to land)
1903 p := s.Prog(ppc64.ACMP)
1904 p.From.Type = obj.TYPE_REG
1905 p.From.Reg = v.Args[0].Reg()
1906 p.To.Type = obj.TYPE_REG
1907 p.To.Reg = ppc64.REG_R0
1910 p2 := s.Prog(ppc64.ABNE)
1911 p2.To.Type = obj.TYPE_BRANCH
1914 // Write at 0 is forbidden and will trigger a SIGSEGV
1915 p = s.Prog(ppc64.AMOVW)
1916 p.From.Type = obj.TYPE_REG
1917 p.From.Reg = ppc64.REG_R0
1918 p.To.Type = obj.TYPE_MEM
1919 p.To.Reg = ppc64.REG_R0
1921 // NOP (so the BNE has somewhere to land)
1922 nop := s.Prog(obj.ANOP)
1923 p2.To.SetTarget(nop)
1926 // Issue a load which will fault if arg is nil.
1927 p := s.Prog(ppc64.AMOVBZ)
1928 p.From.Type = obj.TYPE_MEM
1929 p.From.Reg = v.Args[0].Reg()
1930 ssagen.AddAux(&p.From, v)
1931 p.To.Type = obj.TYPE_REG
1932 p.To.Reg = ppc64.REGTMP
1934 if logopt.Enabled() {
1935 logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
1937 if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
1938 base.WarnfAt(v.Pos, "generated nil check")
1941 // These should be resolved by rules and not make it here.
1942 case ssa.OpPPC64Equal, ssa.OpPPC64NotEqual, ssa.OpPPC64LessThan, ssa.OpPPC64FLessThan,
1943 ssa.OpPPC64LessEqual, ssa.OpPPC64GreaterThan, ssa.OpPPC64FGreaterThan, ssa.OpPPC64GreaterEqual,
1944 ssa.OpPPC64FLessEqual, ssa.OpPPC64FGreaterEqual:
1945 v.Fatalf("Pseudo-op should not make it to codegen: %s ###\n", v.LongString())
1946 case ssa.OpPPC64InvertFlags:
1947 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
1948 case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT:
1949 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
1950 case ssa.OpClobber, ssa.OpClobberReg:
1951 // TODO: implement for clobberdead experiment. Nop is ok for now.
1953 v.Fatalf("genValue not implemented: %s", v.LongString())
1957 var blockJump = [...]struct {
1959 asmeq, invasmun bool
1961 ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false},
1962 ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false},
1964 ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false},
1965 ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false},
1966 ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false},
1967 ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false},
1969 // TODO: need to work FP comparisons into block jumps
1970 ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false},
1971 ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN
1972 ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN
1973 ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false},
1976 func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
1978 case ssa.BlockDefer:
1979 // defer returns in R3:
1980 // 0 if we should continue executing
1981 // 1 if we should jump to deferreturn call
1982 p := s.Prog(ppc64.ACMP)
1983 p.From.Type = obj.TYPE_REG
1984 p.From.Reg = ppc64.REG_R3
1985 p.To.Type = obj.TYPE_REG
1986 p.To.Reg = ppc64.REG_R0
1988 p = s.Prog(ppc64.ABNE)
1989 p.To.Type = obj.TYPE_BRANCH
1990 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[1].Block()})
1991 if b.Succs[0].Block() != next {
1992 p := s.Prog(obj.AJMP)
1993 p.To.Type = obj.TYPE_BRANCH
1994 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
1997 case ssa.BlockPlain:
1998 if b.Succs[0].Block() != next {
1999 p := s.Prog(obj.AJMP)
2000 p.To.Type = obj.TYPE_BRANCH
2001 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
2003 case ssa.BlockExit, ssa.BlockRetJmp:
2007 case ssa.BlockPPC64EQ, ssa.BlockPPC64NE,
2008 ssa.BlockPPC64LT, ssa.BlockPPC64GE,
2009 ssa.BlockPPC64LE, ssa.BlockPPC64GT,
2010 ssa.BlockPPC64FLT, ssa.BlockPPC64FGE,
2011 ssa.BlockPPC64FLE, ssa.BlockPPC64FGT:
2012 jmp := blockJump[b.Kind]
2014 case b.Succs[0].Block():
2015 s.Br(jmp.invasm, b.Succs[1].Block())
2017 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
2018 s.Br(ppc64.ABVS, b.Succs[1].Block())
2020 case b.Succs[1].Block():
2021 s.Br(jmp.asm, b.Succs[0].Block())
2023 s.Br(ppc64.ABEQ, b.Succs[0].Block())
2026 if b.Likely != ssa.BranchUnlikely {
2027 s.Br(jmp.asm, b.Succs[0].Block())
2029 s.Br(ppc64.ABEQ, b.Succs[0].Block())
2031 s.Br(obj.AJMP, b.Succs[1].Block())
2033 s.Br(jmp.invasm, b.Succs[1].Block())
2035 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
2036 s.Br(ppc64.ABVS, b.Succs[1].Block())
2038 s.Br(obj.AJMP, b.Succs[0].Block())
2042 b.Fatalf("branch not implemented: %s", b.LongString())
2046 func loadRegResult(s *ssagen.State, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
2047 p := s.Prog(loadByType(t))
2048 p.From.Type = obj.TYPE_MEM
2049 p.From.Name = obj.NAME_AUTO
2050 p.From.Sym = n.Linksym()
2051 p.From.Offset = n.FrameOffset() + off
2052 p.To.Type = obj.TYPE_REG
2057 func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
2058 p = pp.Append(p, storeByType(t), obj.TYPE_REG, reg, 0, obj.TYPE_MEM, 0, n.FrameOffset()+off)
2059 p.To.Name = obj.NAME_PARAM
2060 p.To.Sym = n.Linksym()
2061 p.Pos = p.Pos.WithNotStmt()