1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
8 "cmd/compile/internal/base"
9 "cmd/compile/internal/ir"
10 "cmd/compile/internal/logopt"
11 "cmd/compile/internal/objw"
12 "cmd/compile/internal/ssa"
13 "cmd/compile/internal/ssagen"
14 "cmd/compile/internal/types"
16 "cmd/internal/obj/ppc64"
22 // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
23 func ssaMarkMoves(s *ssagen.State, b *ssa.Block) {
24 // flive := b.FlagsLiveAtEnd
25 // if b.Control != nil && b.Control.Type.IsFlags() {
28 // for i := len(b.Values) - 1; i >= 0; i-- {
30 // if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) {
31 // // The "mark" is any non-nil Aux value.
34 // if v.Type.IsFlags() {
37 // for _, a := range v.Args {
38 // if a.Type.IsFlags() {
45 // loadByType returns the load instruction of the given type.
46 func loadByType(t *types.Type) obj.As {
78 panic("bad load type")
81 // storeByType returns the store instruction of the given type.
82 func storeByType(t *types.Type) obj.As {
102 panic("bad store type")
105 func ssaGenValue(s *ssagen.State, v *ssa.Value) {
128 case ssa.OpPPC64LoweredAtomicAnd8,
129 ssa.OpPPC64LoweredAtomicAnd32,
130 ssa.OpPPC64LoweredAtomicOr8,
131 ssa.OpPPC64LoweredAtomicOr32:
133 // LBAR/LWAR (Rarg0), Rtmp
134 // AND/OR Rarg1, Rtmp
135 // STBCCC/STWCCC Rtmp, (Rarg0)
139 if v.Op == ssa.OpPPC64LoweredAtomicAnd32 || v.Op == ssa.OpPPC64LoweredAtomicOr32 {
143 r0 := v.Args[0].Reg()
144 r1 := v.Args[1].Reg()
145 // LWSYNC - Assuming shared data not write-through-required nor
146 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
147 plwsync := s.Prog(ppc64.ALWSYNC)
148 plwsync.To.Type = obj.TYPE_NONE
151 p.From.Type = obj.TYPE_MEM
153 p.To.Type = obj.TYPE_REG
154 p.To.Reg = ppc64.REGTMP
156 p1 := s.Prog(v.Op.Asm())
157 p1.From.Type = obj.TYPE_REG
159 p1.To.Type = obj.TYPE_REG
160 p1.To.Reg = ppc64.REGTMP
163 p2.From.Type = obj.TYPE_REG
164 p2.From.Reg = ppc64.REGTMP
165 p2.To.Type = obj.TYPE_MEM
167 p2.RegTo2 = ppc64.REGTMP
169 p3 := s.Prog(ppc64.ABNE)
170 p3.To.Type = obj.TYPE_BRANCH
173 case ssa.OpPPC64LoweredAtomicAdd32,
174 ssa.OpPPC64LoweredAtomicAdd64:
176 // LDAR/LWAR (Rarg0), Rout
178 // STDCCC/STWCCC Rout, (Rarg0)
180 // MOVW Rout,Rout (if Add32)
183 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
187 r0 := v.Args[0].Reg()
188 r1 := v.Args[1].Reg()
190 // LWSYNC - Assuming shared data not write-through-required nor
191 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
192 plwsync := s.Prog(ppc64.ALWSYNC)
193 plwsync.To.Type = obj.TYPE_NONE
196 p.From.Type = obj.TYPE_MEM
198 p.To.Type = obj.TYPE_REG
201 p1 := s.Prog(ppc64.AADD)
202 p1.From.Type = obj.TYPE_REG
205 p1.To.Type = obj.TYPE_REG
208 p3.From.Type = obj.TYPE_REG
210 p3.To.Type = obj.TYPE_MEM
213 p4 := s.Prog(ppc64.ABNE)
214 p4.To.Type = obj.TYPE_BRANCH
217 // Ensure a 32 bit result
218 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
219 p5 := s.Prog(ppc64.AMOVWZ)
220 p5.To.Type = obj.TYPE_REG
222 p5.From.Type = obj.TYPE_REG
226 case ssa.OpPPC64LoweredAtomicExchange32,
227 ssa.OpPPC64LoweredAtomicExchange64:
229 // LDAR/LWAR (Rarg0), Rout
230 // STDCCC/STWCCC Rout, (Rarg0)
235 if v.Op == ssa.OpPPC64LoweredAtomicExchange32 {
239 r0 := v.Args[0].Reg()
240 r1 := v.Args[1].Reg()
242 // LWSYNC - Assuming shared data not write-through-required nor
243 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
244 plwsync := s.Prog(ppc64.ALWSYNC)
245 plwsync.To.Type = obj.TYPE_NONE
248 p.From.Type = obj.TYPE_MEM
250 p.To.Type = obj.TYPE_REG
254 p1.From.Type = obj.TYPE_REG
256 p1.To.Type = obj.TYPE_MEM
259 p2 := s.Prog(ppc64.ABNE)
260 p2.To.Type = obj.TYPE_BRANCH
263 pisync := s.Prog(ppc64.AISYNC)
264 pisync.To.Type = obj.TYPE_NONE
266 case ssa.OpPPC64LoweredAtomicLoad8,
267 ssa.OpPPC64LoweredAtomicLoad32,
268 ssa.OpPPC64LoweredAtomicLoad64,
269 ssa.OpPPC64LoweredAtomicLoadPtr:
271 // MOVB/MOVD/MOVW (Rarg0), Rout
278 case ssa.OpPPC64LoweredAtomicLoad8:
280 case ssa.OpPPC64LoweredAtomicLoad32:
284 arg0 := v.Args[0].Reg()
286 // SYNC when AuxInt == 1; otherwise, load-acquire
288 psync := s.Prog(ppc64.ASYNC)
289 psync.To.Type = obj.TYPE_NONE
293 p.From.Type = obj.TYPE_MEM
295 p.To.Type = obj.TYPE_REG
299 p1.From.Type = obj.TYPE_REG
301 p1.To.Type = obj.TYPE_REG
304 p2 := s.Prog(ppc64.ABNE)
305 p2.To.Type = obj.TYPE_BRANCH
307 pisync := s.Prog(ppc64.AISYNC)
308 pisync.To.Type = obj.TYPE_NONE
309 p2.To.SetTarget(pisync)
311 case ssa.OpPPC64LoweredAtomicStore8,
312 ssa.OpPPC64LoweredAtomicStore32,
313 ssa.OpPPC64LoweredAtomicStore64:
315 // MOVB/MOVW/MOVD arg1,(arg0)
318 case ssa.OpPPC64LoweredAtomicStore8:
320 case ssa.OpPPC64LoweredAtomicStore32:
323 arg0 := v.Args[0].Reg()
324 arg1 := v.Args[1].Reg()
325 // If AuxInt == 0, LWSYNC (Store-Release), else SYNC
327 syncOp := ppc64.ASYNC
329 syncOp = ppc64.ALWSYNC
331 psync := s.Prog(syncOp)
332 psync.To.Type = obj.TYPE_NONE
335 p.To.Type = obj.TYPE_MEM
337 p.From.Type = obj.TYPE_REG
340 case ssa.OpPPC64LoweredAtomicCas64,
341 ssa.OpPPC64LoweredAtomicCas32:
345 // LDAR (Rarg0), MutexHint, Rtmp
348 // STDCCC Rarg2, (Rarg0)
350 // LWSYNC // Only for sequential consistency; not required in CasRel.
356 if v.Op == ssa.OpPPC64LoweredAtomicCas32 {
361 r0 := v.Args[0].Reg()
362 r1 := v.Args[1].Reg()
363 r2 := v.Args[2].Reg()
365 // Initialize return value to false
366 p := s.Prog(ppc64.AMOVD)
367 p.From.Type = obj.TYPE_CONST
369 p.To.Type = obj.TYPE_REG
371 // LWSYNC - Assuming shared data not write-through-required nor
372 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
373 plwsync1 := s.Prog(ppc64.ALWSYNC)
374 plwsync1.To.Type = obj.TYPE_NONE
377 p0.From.Type = obj.TYPE_MEM
379 p0.To.Type = obj.TYPE_REG
380 p0.To.Reg = ppc64.REGTMP
381 // If it is a Compare-and-Swap-Release operation, set the EH field with
388 p1.From.Type = obj.TYPE_REG
390 p1.To.Reg = ppc64.REGTMP
391 p1.To.Type = obj.TYPE_REG
392 // BNE done with return value = false
393 p2 := s.Prog(ppc64.ABNE)
394 p2.To.Type = obj.TYPE_BRANCH
397 p3.From.Type = obj.TYPE_REG
399 p3.To.Type = obj.TYPE_MEM
402 p4 := s.Prog(ppc64.ABNE)
403 p4.To.Type = obj.TYPE_BRANCH
405 // LWSYNC - Assuming shared data not write-through-required nor
406 // caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b.
407 // If the operation is a CAS-Release, then synchronization is not necessary.
409 plwsync2 := s.Prog(ppc64.ALWSYNC)
410 plwsync2.To.Type = obj.TYPE_NONE
413 p5 := s.Prog(ppc64.AMOVD)
414 p5.From.Type = obj.TYPE_CONST
416 p5.To.Type = obj.TYPE_REG
419 p6 := s.Prog(obj.ANOP)
422 case ssa.OpPPC64LoweredPubBarrier:
426 case ssa.OpPPC64LoweredGetClosurePtr:
427 // Closure pointer is R11 (already)
428 ssagen.CheckLoweredGetClosurePtr(v)
430 case ssa.OpPPC64LoweredGetCallerSP:
431 // caller's SP is FixedFrameSize below the address of the first arg
432 p := s.Prog(ppc64.AMOVD)
433 p.From.Type = obj.TYPE_ADDR
434 p.From.Offset = -base.Ctxt.Arch.FixedFrameSize
435 p.From.Name = obj.NAME_PARAM
436 p.To.Type = obj.TYPE_REG
439 case ssa.OpPPC64LoweredGetCallerPC:
440 p := s.Prog(obj.AGETCALLERPC)
441 p.To.Type = obj.TYPE_REG
444 case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F:
445 // input is already rounded
448 loadOp := loadByType(v.Type)
450 ssagen.AddrAuto(&p.From, v.Args[0])
451 p.To.Type = obj.TYPE_REG
455 storeOp := storeByType(v.Type)
457 p.From.Type = obj.TYPE_REG
458 p.From.Reg = v.Args[0].Reg()
459 ssagen.AddrAuto(&p.To, v)
461 case ssa.OpArgIntReg, ssa.OpArgFloatReg:
462 // The assembler needs to wrap the entry safepoint/stack growth code with spill/unspill
463 // The loop only runs once.
464 for _, a := range v.Block.Func.RegArgs {
465 // Pass the spill/unspill information along to the assembler, offset by size of
466 // the saved LR slot.
467 addr := ssagen.SpillSlotAddr(a, ppc64.REGSP, base.Ctxt.Arch.FixedFrameSize)
468 s.FuncInfo().AddSpill(
469 obj.RegSpill{Reg: a.Reg, Addr: addr, Unspill: loadByType(a.Type), Spill: storeByType(a.Type)})
471 v.Block.Func.RegArgs = nil
473 ssagen.CheckArgReg(v)
475 case ssa.OpPPC64DIVD:
485 r0 := v.Args[0].Reg()
486 r1 := v.Args[1].Reg()
488 p := s.Prog(ppc64.ACMP)
489 p.From.Type = obj.TYPE_REG
491 p.To.Type = obj.TYPE_CONST
494 pbahead := s.Prog(ppc64.ABEQ)
495 pbahead.To.Type = obj.TYPE_BRANCH
497 p = s.Prog(v.Op.Asm())
498 p.From.Type = obj.TYPE_REG
501 p.To.Type = obj.TYPE_REG
504 pbover := s.Prog(obj.AJMP)
505 pbover.To.Type = obj.TYPE_BRANCH
507 p = s.Prog(ppc64.ANEG)
508 p.To.Type = obj.TYPE_REG
510 p.From.Type = obj.TYPE_REG
512 pbahead.To.SetTarget(p)
515 pbover.To.SetTarget(p)
517 case ssa.OpPPC64DIVW:
518 // word-width version of above
520 r0 := v.Args[0].Reg()
521 r1 := v.Args[1].Reg()
523 p := s.Prog(ppc64.ACMPW)
524 p.From.Type = obj.TYPE_REG
526 p.To.Type = obj.TYPE_CONST
529 pbahead := s.Prog(ppc64.ABEQ)
530 pbahead.To.Type = obj.TYPE_BRANCH
532 p = s.Prog(v.Op.Asm())
533 p.From.Type = obj.TYPE_REG
536 p.To.Type = obj.TYPE_REG
539 pbover := s.Prog(obj.AJMP)
540 pbover.To.Type = obj.TYPE_BRANCH
542 p = s.Prog(ppc64.ANEG)
543 p.To.Type = obj.TYPE_REG
545 p.From.Type = obj.TYPE_REG
547 pbahead.To.SetTarget(p)
550 pbover.To.SetTarget(p)
552 case ssa.OpPPC64CLRLSLWI:
554 r1 := v.Args[0].Reg()
556 p := s.Prog(v.Op.Asm())
557 // clrlslwi ra,rs,mb,sh will become rlwinm ra,rs,sh,mb-sh,31-sh as described in ISA
558 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
559 p.SetFrom3Const(ssa.GetPPC64Shiftsh(shifts))
561 p.To.Type = obj.TYPE_REG
564 case ssa.OpPPC64CLRLSLDI:
566 r1 := v.Args[0].Reg()
568 p := s.Prog(v.Op.Asm())
569 // clrlsldi ra,rs,mb,sh will become rldic ra,rs,sh,mb-sh
570 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
571 p.SetFrom3Const(ssa.GetPPC64Shiftsh(shifts))
573 p.To.Type = obj.TYPE_REG
576 // Mask has been set as sh
577 case ssa.OpPPC64RLDICL:
579 r1 := v.Args[0].Reg()
581 p := s.Prog(v.Op.Asm())
582 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)}
583 p.SetFrom3Const(ssa.GetPPC64Shiftmb(shifts))
585 p.To.Type = obj.TYPE_REG
588 case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS,
589 ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU,
590 ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW,
591 ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
592 ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
593 ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
594 ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV,
595 ssa.OpPPC64MODUD, ssa.OpPPC64MODSD, ssa.OpPPC64MODUW, ssa.OpPPC64MODSW:
597 r1 := v.Args[0].Reg()
598 r2 := v.Args[1].Reg()
599 p := s.Prog(v.Op.Asm())
600 p.From.Type = obj.TYPE_REG
603 p.To.Type = obj.TYPE_REG
606 case ssa.OpPPC64ANDCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC:
607 r1 := v.Args[0].Reg()
608 r2 := v.Args[1].Reg()
609 p := s.Prog(v.Op.Asm())
610 p.From.Type = obj.TYPE_REG
613 p.To.Type = obj.TYPE_REG
616 case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
617 p := s.Prog(v.Op.Asm())
618 p.From.Type = obj.TYPE_CONST
619 p.From.Offset = v.AuxInt
620 p.Reg = v.Args[0].Reg()
621 p.To.Type = obj.TYPE_REG
624 // Auxint holds encoded rotate + mask
625 case ssa.OpPPC64RLWINM, ssa.OpPPC64RLWMI:
626 rot, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
627 p := s.Prog(v.Op.Asm())
628 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
629 p.Reg = v.Args[0].Reg()
630 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: int64(rot)}
631 p.SetRestArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}})
634 case ssa.OpPPC64RLWNM:
635 _, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
636 p := s.Prog(v.Op.Asm())
637 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
638 p.Reg = v.Args[0].Reg()
639 p.From = obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[1].Reg()}
640 p.SetRestArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}})
642 case ssa.OpPPC64MADDLD:
644 r1 := v.Args[0].Reg()
645 r2 := v.Args[1].Reg()
646 r3 := v.Args[2].Reg()
648 p := s.Prog(v.Op.Asm())
649 p.From.Type = obj.TYPE_REG
653 p.To.Type = obj.TYPE_REG
656 case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
658 r1 := v.Args[0].Reg()
659 r2 := v.Args[1].Reg()
660 r3 := v.Args[2].Reg()
662 p := s.Prog(v.Op.Asm())
663 p.From.Type = obj.TYPE_REG
667 p.To.Type = obj.TYPE_REG
670 case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL,
671 ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW,
672 ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS,
673 ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD:
675 p := s.Prog(v.Op.Asm())
676 p.To.Type = obj.TYPE_REG
678 p.From.Type = obj.TYPE_REG
679 p.From.Reg = v.Args[0].Reg()
681 case ssa.OpPPC64ADDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst,
682 ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst,
683 ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst, ssa.OpPPC64EXTSWSLconst, ssa.OpPPC64MULLWconst, ssa.OpPPC64MULLDconst:
684 p := s.Prog(v.Op.Asm())
685 p.Reg = v.Args[0].Reg()
686 p.From.Type = obj.TYPE_CONST
687 p.From.Offset = v.AuxInt
688 p.To.Type = obj.TYPE_REG
691 case ssa.OpPPC64ADDC, ssa.OpPPC64ADDE, ssa.OpPPC64SUBC, ssa.OpPPC64SUBE:
692 r := v.Reg0() // CA is the first, implied argument.
693 r1 := v.Args[0].Reg()
694 r2 := v.Args[1].Reg()
695 p := s.Prog(v.Op.Asm())
696 p.From.Type = obj.TYPE_REG
699 p.To.Type = obj.TYPE_REG
702 case ssa.OpPPC64ADDZEzero, ssa.OpPPC64SUBZEzero:
703 p := s.Prog(v.Op.Asm())
704 p.From.Type = obj.TYPE_REG
705 p.From.Reg = ppc64.REG_R0
706 p.To.Type = obj.TYPE_REG
709 case ssa.OpPPC64ADDCconst:
710 p := s.Prog(v.Op.Asm())
711 p.Reg = v.Args[0].Reg()
712 p.From.Type = obj.TYPE_CONST
713 p.From.Offset = v.AuxInt
714 p.To.Type = obj.TYPE_REG
715 // Output is a pair, the second is the CA, which is implied.
718 case ssa.OpPPC64SUBCconst:
719 p := s.Prog(v.Op.Asm())
720 p.SetFrom3Const(v.AuxInt)
721 p.From.Type = obj.TYPE_REG
722 p.From.Reg = v.Args[0].Reg()
723 p.To.Type = obj.TYPE_REG
726 case ssa.OpPPC64SUBFCconst:
727 p := s.Prog(v.Op.Asm())
728 p.SetFrom3Const(v.AuxInt)
729 p.From.Type = obj.TYPE_REG
730 p.From.Reg = v.Args[0].Reg()
731 p.To.Type = obj.TYPE_REG
734 case ssa.OpPPC64ANDCCconst:
735 p := s.Prog(v.Op.Asm())
736 p.Reg = v.Args[0].Reg()
737 p.From.Type = obj.TYPE_CONST
738 p.From.Offset = v.AuxInt
739 p.To.Type = obj.TYPE_REG
740 // p.To.Reg = ppc64.REGTMP // discard result
743 case ssa.OpPPC64MOVDaddr:
744 switch v.Aux.(type) {
746 v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux)
748 // If aux offset and aux int are both 0, and the same
749 // input and output regs are used, no instruction
750 // needs to be generated, since it would just be
752 if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() {
753 p := s.Prog(ppc64.AMOVD)
754 p.From.Type = obj.TYPE_ADDR
755 p.From.Reg = v.Args[0].Reg()
756 p.From.Offset = v.AuxInt
757 p.To.Type = obj.TYPE_REG
761 case *obj.LSym, ir.Node:
762 p := s.Prog(ppc64.AMOVD)
763 p.From.Type = obj.TYPE_ADDR
764 p.From.Reg = v.Args[0].Reg()
765 p.To.Type = obj.TYPE_REG
767 ssagen.AddAux(&p.From, v)
771 case ssa.OpPPC64MOVDconst:
772 p := s.Prog(v.Op.Asm())
773 p.From.Type = obj.TYPE_CONST
774 p.From.Offset = v.AuxInt
775 p.To.Type = obj.TYPE_REG
778 case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst:
779 p := s.Prog(v.Op.Asm())
780 p.From.Type = obj.TYPE_FCONST
781 p.From.Val = math.Float64frombits(uint64(v.AuxInt))
782 p.To.Type = obj.TYPE_REG
785 case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU:
786 p := s.Prog(v.Op.Asm())
787 p.From.Type = obj.TYPE_REG
788 p.From.Reg = v.Args[0].Reg()
789 p.To.Type = obj.TYPE_REG
790 p.To.Reg = v.Args[1].Reg()
792 case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst:
793 p := s.Prog(v.Op.Asm())
794 p.From.Type = obj.TYPE_REG
795 p.From.Reg = v.Args[0].Reg()
796 p.To.Type = obj.TYPE_CONST
797 p.To.Offset = v.AuxInt
799 case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg:
800 // Shift in register to required size
801 p := s.Prog(v.Op.Asm())
802 p.From.Type = obj.TYPE_REG
803 p.From.Reg = v.Args[0].Reg()
805 p.To.Type = obj.TYPE_REG
807 case ssa.OpPPC64MOVDload, ssa.OpPPC64MOVWload:
809 // MOVDload and MOVWload are DS form instructions that are restricted to
810 // offsets that are a multiple of 4. If the offset is not a multiple of 4,
811 // then the address of the symbol to be loaded is computed (base + offset)
812 // and used as the new base register and the offset field in the instruction
813 // can be set to zero.
815 // This same problem can happen with gostrings since the final offset is not
816 // known yet, but could be unaligned after the relocation is resolved.
817 // So gostrings are handled the same way.
819 // This allows the MOVDload and MOVWload to be generated in more cases and
820 // eliminates some offset and alignment checking in the rules file.
822 fromAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
823 ssagen.AddAux(&fromAddr, v)
827 switch fromAddr.Name {
828 case obj.NAME_EXTERN, obj.NAME_STATIC:
829 // Special case for a rule combines the bytes of gostring.
830 // The v alignment might seem OK, but we don't want to load it
831 // using an offset because relocation comes later.
832 genAddr = strings.HasPrefix(fromAddr.Sym.Name, "go:string") || v.Type.Alignment()%4 != 0 || fromAddr.Offset%4 != 0
834 genAddr = fromAddr.Offset%4 != 0
837 // Load full address into the temp register.
838 p := s.Prog(ppc64.AMOVD)
839 p.From.Type = obj.TYPE_ADDR
840 p.From.Reg = v.Args[0].Reg()
841 ssagen.AddAux(&p.From, v)
842 // Load target using temp as base register
843 // and offset zero. Setting NAME_NONE
844 // prevents any extra offsets from being
846 p.To.Type = obj.TYPE_REG
847 p.To.Reg = ppc64.REGTMP
848 fromAddr.Reg = ppc64.REGTMP
849 // Clear the offset field and other
850 // information that might be used
851 // by the assembler to add to the
852 // final offset value.
854 fromAddr.Name = obj.NAME_NONE
857 p := s.Prog(v.Op.Asm())
859 p.To.Type = obj.TYPE_REG
862 case ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
863 p := s.Prog(v.Op.Asm())
864 p.From.Type = obj.TYPE_MEM
865 p.From.Reg = v.Args[0].Reg()
866 ssagen.AddAux(&p.From, v)
867 p.To.Type = obj.TYPE_REG
870 case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload:
871 p := s.Prog(v.Op.Asm())
872 p.From.Type = obj.TYPE_MEM
873 p.From.Reg = v.Args[0].Reg()
874 p.To.Type = obj.TYPE_REG
877 case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore:
878 p := s.Prog(v.Op.Asm())
879 p.To.Type = obj.TYPE_MEM
880 p.To.Reg = v.Args[0].Reg()
881 p.From.Type = obj.TYPE_REG
882 p.From.Reg = v.Args[1].Reg()
884 case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx,
885 ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx,
886 ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx:
887 p := s.Prog(v.Op.Asm())
888 p.From.Type = obj.TYPE_MEM
889 p.From.Reg = v.Args[0].Reg()
890 p.From.Index = v.Args[1].Reg()
891 p.To.Type = obj.TYPE_REG
894 case ssa.OpPPC64DCBT:
895 p := s.Prog(v.Op.Asm())
896 p.From.Type = obj.TYPE_MEM
897 p.From.Reg = v.Args[0].Reg()
898 p.To.Type = obj.TYPE_CONST
899 p.To.Offset = v.AuxInt
901 case ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero:
902 p := s.Prog(v.Op.Asm())
903 p.From.Type = obj.TYPE_REG
904 p.From.Reg = ppc64.REGZERO
905 p.To.Type = obj.TYPE_MEM
906 p.To.Reg = v.Args[0].Reg()
907 ssagen.AddAux(&p.To, v)
909 case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVDstorezero:
911 // MOVDstore and MOVDstorezero become DS form instructions that are restricted
912 // to offset values that are a multiple of 4. If the offset field is not a
913 // multiple of 4, then the full address of the store target is computed (base +
914 // offset) and used as the new base register and the offset in the instruction
917 // This allows the MOVDstore and MOVDstorezero to be generated in more cases,
918 // and prevents checking of the offset value and alignment in the rules.
920 toAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
921 ssagen.AddAux(&toAddr, v)
923 if toAddr.Offset%4 != 0 {
924 p := s.Prog(ppc64.AMOVD)
925 p.From.Type = obj.TYPE_ADDR
926 p.From.Reg = v.Args[0].Reg()
927 ssagen.AddAux(&p.From, v)
928 p.To.Type = obj.TYPE_REG
929 p.To.Reg = ppc64.REGTMP
930 toAddr.Reg = ppc64.REGTMP
931 // Clear the offset field and other
932 // information that might be used
933 // by the assembler to add to the
934 // final offset value.
936 toAddr.Name = obj.NAME_NONE
939 p := s.Prog(v.Op.Asm())
941 p.From.Type = obj.TYPE_REG
942 if v.Op == ssa.OpPPC64MOVDstorezero {
943 p.From.Reg = ppc64.REGZERO
945 p.From.Reg = v.Args[1].Reg()
948 case ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore:
949 p := s.Prog(v.Op.Asm())
950 p.From.Type = obj.TYPE_REG
951 p.From.Reg = v.Args[1].Reg()
952 p.To.Type = obj.TYPE_MEM
953 p.To.Reg = v.Args[0].Reg()
954 ssagen.AddAux(&p.To, v)
956 case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx,
957 ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx,
958 ssa.OpPPC64MOVHBRstoreidx:
959 p := s.Prog(v.Op.Asm())
960 p.From.Type = obj.TYPE_REG
961 p.From.Reg = v.Args[2].Reg()
962 p.To.Index = v.Args[1].Reg()
963 p.To.Type = obj.TYPE_MEM
964 p.To.Reg = v.Args[0].Reg()
966 case ssa.OpPPC64ISEL, ssa.OpPPC64ISELB, ssa.OpPPC64ISELZ:
967 // ISEL AuxInt ? arg0 : arg1
968 // ISELB is a special case of ISEL where AuxInt ? $1 (arg0) : $0.
969 // ISELZ is a special case of ISEL where arg1 is implicitly $0.
971 // AuxInt value indicates conditions 0=LT 1=GT 2=EQ 3=SO 4=GE 5=LE 6=NE 7=NSO.
972 // ISEL accepts a CR bit argument, not a condition as expressed by AuxInt.
973 // Convert the condition to a CR bit argument by the following conversion:
975 // AuxInt&3 ? arg0 : arg1 for conditions LT, GT, EQ, SO
976 // AuxInt&3 ? arg1 : arg0 for conditions GE, LE, NE, NSO
977 p := s.Prog(ppc64.AISEL)
978 p.To.Type = obj.TYPE_REG
980 // For ISELB/ISELZ Use R0 for 0 operand to avoid load.
981 r := obj.Addr{Type: obj.TYPE_REG, Reg: ppc64.REG_R0}
982 if v.Op == ssa.OpPPC64ISEL {
983 r.Reg = v.Args[1].Reg()
985 // AuxInt values 4,5,6 implemented with reverse operand order from 0,1,2
988 p.SetFrom3Reg(v.Args[0].Reg())
990 p.Reg = v.Args[0].Reg()
993 p.From.Type = obj.TYPE_CONST
994 p.From.Offset = v.AuxInt & 3
996 case ssa.OpPPC64LoweredQuadZero, ssa.OpPPC64LoweredQuadZeroShort:
997 // The LoweredQuad code generation
998 // generates STXV instructions on
999 // power9. The Short variation is used
1000 // if no loop is generated.
1002 // sizes >= 64 generate a loop as follows:
1004 // Set up loop counter in CTR, used by BC
1005 // XXLXOR clears VS32
1006 // XXLXOR VS32,VS32,VS32
1007 // MOVD len/64,REG_TMP
1011 // STXV VS32,16(R20)
1012 // STXV VS32,32(R20)
1013 // STXV VS32,48(R20)
1017 // Bytes per iteration
1018 ctr := v.AuxInt / 64
1021 rem := v.AuxInt % 64
1023 // Only generate a loop if there is more
1024 // than 1 iteration.
1026 // Set up VS32 (V0) to hold 0s
1027 p := s.Prog(ppc64.AXXLXOR)
1028 p.From.Type = obj.TYPE_REG
1029 p.From.Reg = ppc64.REG_VS32
1030 p.To.Type = obj.TYPE_REG
1031 p.To.Reg = ppc64.REG_VS32
1032 p.Reg = ppc64.REG_VS32
1034 // Set up CTR loop counter
1035 p = s.Prog(ppc64.AMOVD)
1036 p.From.Type = obj.TYPE_CONST
1038 p.To.Type = obj.TYPE_REG
1039 p.To.Reg = ppc64.REGTMP
1041 p = s.Prog(ppc64.AMOVD)
1042 p.From.Type = obj.TYPE_REG
1043 p.From.Reg = ppc64.REGTMP
1044 p.To.Type = obj.TYPE_REG
1045 p.To.Reg = ppc64.REG_CTR
1047 // Don't generate padding for
1048 // loops with few iterations.
1050 p = s.Prog(obj.APCALIGN)
1051 p.From.Type = obj.TYPE_CONST
1055 // generate 4 STXVs to zero 64 bytes
1058 p = s.Prog(ppc64.ASTXV)
1059 p.From.Type = obj.TYPE_REG
1060 p.From.Reg = ppc64.REG_VS32
1061 p.To.Type = obj.TYPE_MEM
1062 p.To.Reg = v.Args[0].Reg()
1064 // Save the top of loop
1068 p = s.Prog(ppc64.ASTXV)
1069 p.From.Type = obj.TYPE_REG
1070 p.From.Reg = ppc64.REG_VS32
1071 p.To.Type = obj.TYPE_MEM
1072 p.To.Reg = v.Args[0].Reg()
1075 p = s.Prog(ppc64.ASTXV)
1076 p.From.Type = obj.TYPE_REG
1077 p.From.Reg = ppc64.REG_VS32
1078 p.To.Type = obj.TYPE_MEM
1079 p.To.Reg = v.Args[0].Reg()
1082 p = s.Prog(ppc64.ASTXV)
1083 p.From.Type = obj.TYPE_REG
1084 p.From.Reg = ppc64.REG_VS32
1085 p.To.Type = obj.TYPE_MEM
1086 p.To.Reg = v.Args[0].Reg()
1089 // Increment address for the
1090 // 64 bytes just zeroed.
1091 p = s.Prog(ppc64.AADD)
1092 p.Reg = v.Args[0].Reg()
1093 p.From.Type = obj.TYPE_CONST
1095 p.To.Type = obj.TYPE_REG
1096 p.To.Reg = v.Args[0].Reg()
1098 // Branch back to top of loop
1100 // BC with BO_BCTR generates bdnz
1101 p = s.Prog(ppc64.ABC)
1102 p.From.Type = obj.TYPE_CONST
1103 p.From.Offset = ppc64.BO_BCTR
1104 p.Reg = ppc64.REG_CR0LT
1105 p.To.Type = obj.TYPE_BRANCH
1108 // When ctr == 1 the loop was not generated but
1109 // there are at least 64 bytes to clear, so add
1110 // that to the remainder to generate the code
1111 // to clear those doublewords
1116 // Clear the remainder starting at offset zero
1119 if rem >= 16 && ctr <= 1 {
1120 // If the XXLXOR hasn't already been
1121 // generated, do it here to initialize
1123 p := s.Prog(ppc64.AXXLXOR)
1124 p.From.Type = obj.TYPE_REG
1125 p.From.Reg = ppc64.REG_VS32
1126 p.To.Type = obj.TYPE_REG
1127 p.To.Reg = ppc64.REG_VS32
1128 p.Reg = ppc64.REG_VS32
1130 // Generate STXV for 32 or 64
1133 p := s.Prog(ppc64.ASTXV)
1134 p.From.Type = obj.TYPE_REG
1135 p.From.Reg = ppc64.REG_VS32
1136 p.To.Type = obj.TYPE_MEM
1137 p.To.Reg = v.Args[0].Reg()
1138 p.To.Offset = offset
1140 p = s.Prog(ppc64.ASTXV)
1141 p.From.Type = obj.TYPE_REG
1142 p.From.Reg = ppc64.REG_VS32
1143 p.To.Type = obj.TYPE_MEM
1144 p.To.Reg = v.Args[0].Reg()
1145 p.To.Offset = offset + 16
1149 // Generate 16 bytes
1151 p := s.Prog(ppc64.ASTXV)
1152 p.From.Type = obj.TYPE_REG
1153 p.From.Reg = ppc64.REG_VS32
1154 p.To.Type = obj.TYPE_MEM
1155 p.To.Reg = v.Args[0].Reg()
1156 p.To.Offset = offset
1161 // first clear as many doublewords as possible
1162 // then clear remaining sizes as available
1164 op, size := ppc64.AMOVB, int64(1)
1167 op, size = ppc64.AMOVD, 8
1169 op, size = ppc64.AMOVW, 4
1171 op, size = ppc64.AMOVH, 2
1174 p.From.Type = obj.TYPE_REG
1175 p.From.Reg = ppc64.REG_R0
1176 p.To.Type = obj.TYPE_MEM
1177 p.To.Reg = v.Args[0].Reg()
1178 p.To.Offset = offset
1183 case ssa.OpPPC64LoweredZero, ssa.OpPPC64LoweredZeroShort:
1185 // Unaligned data doesn't hurt performance
1186 // for these instructions on power8.
1188 // For sizes >= 64 generate a loop as follows:
1190 // Set up loop counter in CTR, used by BC
1191 // XXLXOR VS32,VS32,VS32
1192 // MOVD len/32,REG_TMP
1196 // STXVD2X VS32,(R0)(R20)
1197 // STXVD2X VS32,(R31)(R20)
1201 // any remainder is done as described below
1203 // for sizes < 64 bytes, first clear as many doublewords as possible,
1204 // then handle the remainder
1209 // the remainder bytes are cleared using one or more
1210 // of the following instructions with the appropriate
1211 // offsets depending which instructions are needed
1213 // MOVW R0,n1(R20) 4 bytes
1214 // MOVH R0,n2(R20) 2 bytes
1215 // MOVB R0,n3(R20) 1 byte
1217 // 7 bytes: MOVW, MOVH, MOVB
1218 // 6 bytes: MOVW, MOVH
1219 // 5 bytes: MOVW, MOVB
1220 // 3 bytes: MOVH, MOVB
1222 // each loop iteration does 32 bytes
1223 ctr := v.AuxInt / 32
1226 rem := v.AuxInt % 32
1228 // only generate a loop if there is more
1229 // than 1 iteration.
1231 // Set up VS32 (V0) to hold 0s
1232 p := s.Prog(ppc64.AXXLXOR)
1233 p.From.Type = obj.TYPE_REG
1234 p.From.Reg = ppc64.REG_VS32
1235 p.To.Type = obj.TYPE_REG
1236 p.To.Reg = ppc64.REG_VS32
1237 p.Reg = ppc64.REG_VS32
1239 // Set up CTR loop counter
1240 p = s.Prog(ppc64.AMOVD)
1241 p.From.Type = obj.TYPE_CONST
1243 p.To.Type = obj.TYPE_REG
1244 p.To.Reg = ppc64.REGTMP
1246 p = s.Prog(ppc64.AMOVD)
1247 p.From.Type = obj.TYPE_REG
1248 p.From.Reg = ppc64.REGTMP
1249 p.To.Type = obj.TYPE_REG
1250 p.To.Reg = ppc64.REG_CTR
1252 // Set up R31 to hold index value 16
1253 p = s.Prog(ppc64.AMOVD)
1254 p.From.Type = obj.TYPE_CONST
1256 p.To.Type = obj.TYPE_REG
1257 p.To.Reg = ppc64.REGTMP
1259 // Don't add padding for alignment
1260 // with few loop iterations.
1262 p = s.Prog(obj.APCALIGN)
1263 p.From.Type = obj.TYPE_CONST
1267 // generate 2 STXVD2Xs to store 16 bytes
1268 // when this is a loop then the top must be saved
1270 // This is the top of loop
1272 p = s.Prog(ppc64.ASTXVD2X)
1273 p.From.Type = obj.TYPE_REG
1274 p.From.Reg = ppc64.REG_VS32
1275 p.To.Type = obj.TYPE_MEM
1276 p.To.Reg = v.Args[0].Reg()
1277 p.To.Index = ppc64.REGZERO
1278 // Save the top of loop
1282 p = s.Prog(ppc64.ASTXVD2X)
1283 p.From.Type = obj.TYPE_REG
1284 p.From.Reg = ppc64.REG_VS32
1285 p.To.Type = obj.TYPE_MEM
1286 p.To.Reg = v.Args[0].Reg()
1287 p.To.Index = ppc64.REGTMP
1289 // Increment address for the
1290 // 4 doublewords just zeroed.
1291 p = s.Prog(ppc64.AADD)
1292 p.Reg = v.Args[0].Reg()
1293 p.From.Type = obj.TYPE_CONST
1295 p.To.Type = obj.TYPE_REG
1296 p.To.Reg = v.Args[0].Reg()
1298 // Branch back to top of loop
1300 // BC with BO_BCTR generates bdnz
1301 p = s.Prog(ppc64.ABC)
1302 p.From.Type = obj.TYPE_CONST
1303 p.From.Offset = ppc64.BO_BCTR
1304 p.Reg = ppc64.REG_CR0LT
1305 p.To.Type = obj.TYPE_BRANCH
1309 // when ctr == 1 the loop was not generated but
1310 // there are at least 32 bytes to clear, so add
1311 // that to the remainder to generate the code
1312 // to clear those doublewords
1317 // clear the remainder starting at offset zero
1320 // first clear as many doublewords as possible
1321 // then clear remaining sizes as available
1323 op, size := ppc64.AMOVB, int64(1)
1326 op, size = ppc64.AMOVD, 8
1328 op, size = ppc64.AMOVW, 4
1330 op, size = ppc64.AMOVH, 2
1333 p.From.Type = obj.TYPE_REG
1334 p.From.Reg = ppc64.REG_R0
1335 p.To.Type = obj.TYPE_MEM
1336 p.To.Reg = v.Args[0].Reg()
1337 p.To.Offset = offset
1342 case ssa.OpPPC64LoweredMove, ssa.OpPPC64LoweredMoveShort:
1344 bytesPerLoop := int64(32)
1345 // This will be used when moving more
1346 // than 8 bytes. Moves start with
1347 // as many 8 byte moves as possible, then
1348 // 4, 2, or 1 byte(s) as remaining. This will
1349 // work and be efficient for power8 or later.
1350 // If there are 64 or more bytes, then a
1351 // loop is generated to move 32 bytes and
1352 // update the src and dst addresses on each
1353 // iteration. When < 64 bytes, the appropriate
1354 // number of moves are generated based on the
1356 // When moving >= 64 bytes a loop is used
1357 // MOVD len/32,REG_TMP
1361 // LXVD2X (R0)(R21),VS32
1362 // LXVD2X (R31)(R21),VS33
1364 // STXVD2X VS32,(R0)(R20)
1365 // STXVD2X VS33,(R31)(R20)
1368 // Bytes not moved by this loop are moved
1369 // with a combination of the following instructions,
1370 // starting with the largest sizes and generating as
1371 // many as needed, using the appropriate offset value.
1381 // Each loop iteration moves 32 bytes
1382 ctr := v.AuxInt / bytesPerLoop
1384 // Remainder after the loop
1385 rem := v.AuxInt % bytesPerLoop
1387 dstReg := v.Args[0].Reg()
1388 srcReg := v.Args[1].Reg()
1390 // The set of registers used here, must match the clobbered reg list
1396 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1399 p := s.Prog(ppc64.AMOVD)
1400 p.From.Type = obj.TYPE_CONST
1402 p.To.Type = obj.TYPE_REG
1403 p.To.Reg = ppc64.REGTMP
1405 p = s.Prog(ppc64.AMOVD)
1406 p.From.Type = obj.TYPE_REG
1407 p.From.Reg = ppc64.REGTMP
1408 p.To.Type = obj.TYPE_REG
1409 p.To.Reg = ppc64.REG_CTR
1411 // Use REGTMP as index reg
1412 p = s.Prog(ppc64.AMOVD)
1413 p.From.Type = obj.TYPE_CONST
1415 p.To.Type = obj.TYPE_REG
1416 p.To.Reg = ppc64.REGTMP
1418 // Don't adding padding for
1419 // alignment with small iteration
1422 p = s.Prog(obj.APCALIGN)
1423 p.From.Type = obj.TYPE_CONST
1427 // Generate 16 byte loads and stores.
1428 // Use temp register for index (16)
1429 // on the second one.
1431 p = s.Prog(ppc64.ALXVD2X)
1432 p.From.Type = obj.TYPE_MEM
1434 p.From.Index = ppc64.REGZERO
1435 p.To.Type = obj.TYPE_REG
1436 p.To.Reg = ppc64.REG_VS32
1440 p = s.Prog(ppc64.ALXVD2X)
1441 p.From.Type = obj.TYPE_MEM
1443 p.From.Index = ppc64.REGTMP
1444 p.To.Type = obj.TYPE_REG
1445 p.To.Reg = ppc64.REG_VS33
1447 // increment the src reg for next iteration
1448 p = s.Prog(ppc64.AADD)
1450 p.From.Type = obj.TYPE_CONST
1451 p.From.Offset = bytesPerLoop
1452 p.To.Type = obj.TYPE_REG
1455 // generate 16 byte stores
1456 p = s.Prog(ppc64.ASTXVD2X)
1457 p.From.Type = obj.TYPE_REG
1458 p.From.Reg = ppc64.REG_VS32
1459 p.To.Type = obj.TYPE_MEM
1461 p.To.Index = ppc64.REGZERO
1463 p = s.Prog(ppc64.ASTXVD2X)
1464 p.From.Type = obj.TYPE_REG
1465 p.From.Reg = ppc64.REG_VS33
1466 p.To.Type = obj.TYPE_MEM
1468 p.To.Index = ppc64.REGTMP
1470 // increment the dst reg for next iteration
1471 p = s.Prog(ppc64.AADD)
1473 p.From.Type = obj.TYPE_CONST
1474 p.From.Offset = bytesPerLoop
1475 p.To.Type = obj.TYPE_REG
1478 // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1480 p = s.Prog(ppc64.ABC)
1481 p.From.Type = obj.TYPE_CONST
1482 p.From.Offset = ppc64.BO_BCTR
1483 p.Reg = ppc64.REG_CR0LT
1484 p.To.Type = obj.TYPE_BRANCH
1487 // srcReg and dstReg were incremented in the loop, so
1488 // later instructions start with offset 0.
1492 // No loop was generated for one iteration, so
1493 // add 32 bytes to the remainder to move those bytes.
1499 // Generate 16 byte loads and stores.
1500 // Use temp register for index (value 16)
1501 // on the second one.
1502 p := s.Prog(ppc64.ALXVD2X)
1503 p.From.Type = obj.TYPE_MEM
1505 p.From.Index = ppc64.REGZERO
1506 p.To.Type = obj.TYPE_REG
1507 p.To.Reg = ppc64.REG_VS32
1509 p = s.Prog(ppc64.ASTXVD2X)
1510 p.From.Type = obj.TYPE_REG
1511 p.From.Reg = ppc64.REG_VS32
1512 p.To.Type = obj.TYPE_MEM
1514 p.To.Index = ppc64.REGZERO
1520 // Use REGTMP as index reg
1521 p := s.Prog(ppc64.AMOVD)
1522 p.From.Type = obj.TYPE_CONST
1524 p.To.Type = obj.TYPE_REG
1525 p.To.Reg = ppc64.REGTMP
1527 p = s.Prog(ppc64.ALXVD2X)
1528 p.From.Type = obj.TYPE_MEM
1530 p.From.Index = ppc64.REGTMP
1531 p.To.Type = obj.TYPE_REG
1532 p.To.Reg = ppc64.REG_VS32
1534 p = s.Prog(ppc64.ASTXVD2X)
1535 p.From.Type = obj.TYPE_REG
1536 p.From.Reg = ppc64.REG_VS32
1537 p.To.Type = obj.TYPE_MEM
1539 p.To.Index = ppc64.REGTMP
1546 // Generate all the remaining load and store pairs, starting with
1547 // as many 8 byte moves as possible, then 4, 2, 1.
1549 op, size := ppc64.AMOVB, int64(1)
1552 op, size = ppc64.AMOVD, 8
1554 op, size = ppc64.AMOVWZ, 4
1556 op, size = ppc64.AMOVH, 2
1560 p.To.Type = obj.TYPE_REG
1561 p.To.Reg = ppc64.REGTMP
1562 p.From.Type = obj.TYPE_MEM
1564 p.From.Offset = offset
1568 p.From.Type = obj.TYPE_REG
1569 p.From.Reg = ppc64.REGTMP
1570 p.To.Type = obj.TYPE_MEM
1572 p.To.Offset = offset
1577 case ssa.OpPPC64LoweredQuadMove, ssa.OpPPC64LoweredQuadMoveShort:
1578 bytesPerLoop := int64(64)
1579 // This is used when moving more
1580 // than 8 bytes on power9. Moves start with
1581 // as many 8 byte moves as possible, then
1582 // 4, 2, or 1 byte(s) as remaining. This will
1583 // work and be efficient for power8 or later.
1584 // If there are 64 or more bytes, then a
1585 // loop is generated to move 32 bytes and
1586 // update the src and dst addresses on each
1587 // iteration. When < 64 bytes, the appropriate
1588 // number of moves are generated based on the
1590 // When moving >= 64 bytes a loop is used
1591 // MOVD len/32,REG_TMP
1598 // STXV VS33,16(R20)
1601 // Bytes not moved by this loop are moved
1602 // with a combination of the following instructions,
1603 // starting with the largest sizes and generating as
1604 // many as needed, using the appropriate offset value.
1614 // Each loop iteration moves 32 bytes
1615 ctr := v.AuxInt / bytesPerLoop
1617 // Remainder after the loop
1618 rem := v.AuxInt % bytesPerLoop
1620 dstReg := v.Args[0].Reg()
1621 srcReg := v.Args[1].Reg()
1628 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1631 p := s.Prog(ppc64.AMOVD)
1632 p.From.Type = obj.TYPE_CONST
1634 p.To.Type = obj.TYPE_REG
1635 p.To.Reg = ppc64.REGTMP
1637 p = s.Prog(ppc64.AMOVD)
1638 p.From.Type = obj.TYPE_REG
1639 p.From.Reg = ppc64.REGTMP
1640 p.To.Type = obj.TYPE_REG
1641 p.To.Reg = ppc64.REG_CTR
1643 p = s.Prog(obj.APCALIGN)
1644 p.From.Type = obj.TYPE_CONST
1647 // Generate 16 byte loads and stores.
1648 p = s.Prog(ppc64.ALXV)
1649 p.From.Type = obj.TYPE_MEM
1651 p.From.Offset = offset
1652 p.To.Type = obj.TYPE_REG
1653 p.To.Reg = ppc64.REG_VS32
1657 p = s.Prog(ppc64.ALXV)
1658 p.From.Type = obj.TYPE_MEM
1660 p.From.Offset = offset + 16
1661 p.To.Type = obj.TYPE_REG
1662 p.To.Reg = ppc64.REG_VS33
1664 // generate 16 byte stores
1665 p = s.Prog(ppc64.ASTXV)
1666 p.From.Type = obj.TYPE_REG
1667 p.From.Reg = ppc64.REG_VS32
1668 p.To.Type = obj.TYPE_MEM
1670 p.To.Offset = offset
1672 p = s.Prog(ppc64.ASTXV)
1673 p.From.Type = obj.TYPE_REG
1674 p.From.Reg = ppc64.REG_VS33
1675 p.To.Type = obj.TYPE_MEM
1677 p.To.Offset = offset + 16
1679 // Generate 16 byte loads and stores.
1680 p = s.Prog(ppc64.ALXV)
1681 p.From.Type = obj.TYPE_MEM
1683 p.From.Offset = offset + 32
1684 p.To.Type = obj.TYPE_REG
1685 p.To.Reg = ppc64.REG_VS32
1687 p = s.Prog(ppc64.ALXV)
1688 p.From.Type = obj.TYPE_MEM
1690 p.From.Offset = offset + 48
1691 p.To.Type = obj.TYPE_REG
1692 p.To.Reg = ppc64.REG_VS33
1694 // generate 16 byte stores
1695 p = s.Prog(ppc64.ASTXV)
1696 p.From.Type = obj.TYPE_REG
1697 p.From.Reg = ppc64.REG_VS32
1698 p.To.Type = obj.TYPE_MEM
1700 p.To.Offset = offset + 32
1702 p = s.Prog(ppc64.ASTXV)
1703 p.From.Type = obj.TYPE_REG
1704 p.From.Reg = ppc64.REG_VS33
1705 p.To.Type = obj.TYPE_MEM
1707 p.To.Offset = offset + 48
1709 // increment the src reg for next iteration
1710 p = s.Prog(ppc64.AADD)
1712 p.From.Type = obj.TYPE_CONST
1713 p.From.Offset = bytesPerLoop
1714 p.To.Type = obj.TYPE_REG
1717 // increment the dst reg for next iteration
1718 p = s.Prog(ppc64.AADD)
1720 p.From.Type = obj.TYPE_CONST
1721 p.From.Offset = bytesPerLoop
1722 p.To.Type = obj.TYPE_REG
1725 // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1727 p = s.Prog(ppc64.ABC)
1728 p.From.Type = obj.TYPE_CONST
1729 p.From.Offset = ppc64.BO_BCTR
1730 p.Reg = ppc64.REG_CR0LT
1731 p.To.Type = obj.TYPE_BRANCH
1734 // srcReg and dstReg were incremented in the loop, so
1735 // later instructions start with offset 0.
1739 // No loop was generated for one iteration, so
1740 // add 32 bytes to the remainder to move those bytes.
1745 p := s.Prog(ppc64.ALXV)
1746 p.From.Type = obj.TYPE_MEM
1748 p.To.Type = obj.TYPE_REG
1749 p.To.Reg = ppc64.REG_VS32
1751 p = s.Prog(ppc64.ALXV)
1752 p.From.Type = obj.TYPE_MEM
1755 p.To.Type = obj.TYPE_REG
1756 p.To.Reg = ppc64.REG_VS33
1758 p = s.Prog(ppc64.ASTXV)
1759 p.From.Type = obj.TYPE_REG
1760 p.From.Reg = ppc64.REG_VS32
1761 p.To.Type = obj.TYPE_MEM
1764 p = s.Prog(ppc64.ASTXV)
1765 p.From.Type = obj.TYPE_REG
1766 p.From.Reg = ppc64.REG_VS33
1767 p.To.Type = obj.TYPE_MEM
1776 // Generate 16 byte loads and stores.
1777 p := s.Prog(ppc64.ALXV)
1778 p.From.Type = obj.TYPE_MEM
1780 p.From.Offset = offset
1781 p.To.Type = obj.TYPE_REG
1782 p.To.Reg = ppc64.REG_VS32
1784 p = s.Prog(ppc64.ASTXV)
1785 p.From.Type = obj.TYPE_REG
1786 p.From.Reg = ppc64.REG_VS32
1787 p.To.Type = obj.TYPE_MEM
1789 p.To.Offset = offset
1795 p := s.Prog(ppc64.ALXV)
1796 p.From.Type = obj.TYPE_MEM
1798 p.From.Offset = offset
1799 p.To.Type = obj.TYPE_REG
1800 p.To.Reg = ppc64.REG_VS32
1802 p = s.Prog(ppc64.ASTXV)
1803 p.From.Type = obj.TYPE_REG
1804 p.From.Reg = ppc64.REG_VS32
1805 p.To.Type = obj.TYPE_MEM
1807 p.To.Offset = offset
1813 // Generate all the remaining load and store pairs, starting with
1814 // as many 8 byte moves as possible, then 4, 2, 1.
1816 op, size := ppc64.AMOVB, int64(1)
1819 op, size = ppc64.AMOVD, 8
1821 op, size = ppc64.AMOVWZ, 4
1823 op, size = ppc64.AMOVH, 2
1827 p.To.Type = obj.TYPE_REG
1828 p.To.Reg = ppc64.REGTMP
1829 p.From.Type = obj.TYPE_MEM
1831 p.From.Offset = offset
1835 p.From.Type = obj.TYPE_REG
1836 p.From.Reg = ppc64.REGTMP
1837 p.To.Type = obj.TYPE_MEM
1839 p.To.Offset = offset
1844 case ssa.OpPPC64CALLstatic:
1847 case ssa.OpPPC64CALLtail:
1850 case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter:
1851 p := s.Prog(ppc64.AMOVD)
1852 p.From.Type = obj.TYPE_REG
1853 p.From.Reg = v.Args[0].Reg()
1854 p.To.Type = obj.TYPE_REG
1855 p.To.Reg = ppc64.REG_LR
1857 if v.Args[0].Reg() != ppc64.REG_R12 {
1858 v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg)
1863 // Convert the call into a blrl with hint this is not a subroutine return.
1864 // The full bclrl opcode must be specified when passing a hint.
1866 pp.From.Type = obj.TYPE_CONST
1867 pp.From.Offset = ppc64.BO_ALWAYS
1868 pp.Reg = ppc64.REG_CR0LT // The preferred value if BI is ignored.
1869 pp.To.Reg = ppc64.REG_LR
1872 if base.Ctxt.Flag_shared {
1873 // When compiling Go into PIC, the function we just
1874 // called via pointer might have been implemented in
1875 // a separate module and so overwritten the TOC
1876 // pointer in R2; reload it.
1877 q := s.Prog(ppc64.AMOVD)
1878 q.From.Type = obj.TYPE_MEM
1880 q.From.Reg = ppc64.REGSP
1881 q.To.Type = obj.TYPE_REG
1882 q.To.Reg = ppc64.REG_R2
1885 case ssa.OpPPC64LoweredWB:
1886 p := s.Prog(obj.ACALL)
1887 p.To.Type = obj.TYPE_MEM
1888 p.To.Name = obj.NAME_EXTERN
1889 p.To.Sym = v.Aux.(*obj.LSym)
1891 case ssa.OpPPC64LoweredPanicBoundsA, ssa.OpPPC64LoweredPanicBoundsB, ssa.OpPPC64LoweredPanicBoundsC:
1892 p := s.Prog(obj.ACALL)
1893 p.To.Type = obj.TYPE_MEM
1894 p.To.Name = obj.NAME_EXTERN
1895 p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt]
1896 s.UseArgs(16) // space used in callee args area by assembly stubs
1898 case ssa.OpPPC64LoweredNilCheck:
1899 if buildcfg.GOOS == "aix" {
1903 // NOP (so the BNE has somewhere to land)
1906 p := s.Prog(ppc64.ACMP)
1907 p.From.Type = obj.TYPE_REG
1908 p.From.Reg = v.Args[0].Reg()
1909 p.To.Type = obj.TYPE_REG
1910 p.To.Reg = ppc64.REG_R0
1913 p2 := s.Prog(ppc64.ABNE)
1914 p2.To.Type = obj.TYPE_BRANCH
1917 // Write at 0 is forbidden and will trigger a SIGSEGV
1918 p = s.Prog(ppc64.AMOVW)
1919 p.From.Type = obj.TYPE_REG
1920 p.From.Reg = ppc64.REG_R0
1921 p.To.Type = obj.TYPE_MEM
1922 p.To.Reg = ppc64.REG_R0
1924 // NOP (so the BNE has somewhere to land)
1925 nop := s.Prog(obj.ANOP)
1926 p2.To.SetTarget(nop)
1929 // Issue a load which will fault if arg is nil.
1930 p := s.Prog(ppc64.AMOVBZ)
1931 p.From.Type = obj.TYPE_MEM
1932 p.From.Reg = v.Args[0].Reg()
1933 ssagen.AddAux(&p.From, v)
1934 p.To.Type = obj.TYPE_REG
1935 p.To.Reg = ppc64.REGTMP
1937 if logopt.Enabled() {
1938 logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
1940 if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
1941 base.WarnfAt(v.Pos, "generated nil check")
1944 // These should be resolved by rules and not make it here.
1945 case ssa.OpPPC64Equal, ssa.OpPPC64NotEqual, ssa.OpPPC64LessThan, ssa.OpPPC64FLessThan,
1946 ssa.OpPPC64LessEqual, ssa.OpPPC64GreaterThan, ssa.OpPPC64FGreaterThan, ssa.OpPPC64GreaterEqual,
1947 ssa.OpPPC64FLessEqual, ssa.OpPPC64FGreaterEqual:
1948 v.Fatalf("Pseudo-op should not make it to codegen: %s ###\n", v.LongString())
1949 case ssa.OpPPC64InvertFlags:
1950 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
1951 case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT:
1952 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
1953 case ssa.OpClobber, ssa.OpClobberReg:
1954 // TODO: implement for clobberdead experiment. Nop is ok for now.
1956 v.Fatalf("genValue not implemented: %s", v.LongString())
1960 var blockJump = [...]struct {
1962 asmeq, invasmun bool
1964 ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false},
1965 ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false},
1967 ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false},
1968 ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false},
1969 ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false},
1970 ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false},
1972 // TODO: need to work FP comparisons into block jumps
1973 ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false},
1974 ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN
1975 ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN
1976 ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false},
1979 func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
1981 case ssa.BlockDefer:
1982 // defer returns in R3:
1983 // 0 if we should continue executing
1984 // 1 if we should jump to deferreturn call
1985 p := s.Prog(ppc64.ACMP)
1986 p.From.Type = obj.TYPE_REG
1987 p.From.Reg = ppc64.REG_R3
1988 p.To.Type = obj.TYPE_REG
1989 p.To.Reg = ppc64.REG_R0
1991 p = s.Prog(ppc64.ABNE)
1992 p.To.Type = obj.TYPE_BRANCH
1993 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[1].Block()})
1994 if b.Succs[0].Block() != next {
1995 p := s.Prog(obj.AJMP)
1996 p.To.Type = obj.TYPE_BRANCH
1997 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
2000 case ssa.BlockPlain:
2001 if b.Succs[0].Block() != next {
2002 p := s.Prog(obj.AJMP)
2003 p.To.Type = obj.TYPE_BRANCH
2004 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
2006 case ssa.BlockExit, ssa.BlockRetJmp:
2010 case ssa.BlockPPC64EQ, ssa.BlockPPC64NE,
2011 ssa.BlockPPC64LT, ssa.BlockPPC64GE,
2012 ssa.BlockPPC64LE, ssa.BlockPPC64GT,
2013 ssa.BlockPPC64FLT, ssa.BlockPPC64FGE,
2014 ssa.BlockPPC64FLE, ssa.BlockPPC64FGT:
2015 jmp := blockJump[b.Kind]
2017 case b.Succs[0].Block():
2018 s.Br(jmp.invasm, b.Succs[1].Block())
2020 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
2021 s.Br(ppc64.ABVS, b.Succs[1].Block())
2023 case b.Succs[1].Block():
2024 s.Br(jmp.asm, b.Succs[0].Block())
2026 s.Br(ppc64.ABEQ, b.Succs[0].Block())
2029 if b.Likely != ssa.BranchUnlikely {
2030 s.Br(jmp.asm, b.Succs[0].Block())
2032 s.Br(ppc64.ABEQ, b.Succs[0].Block())
2034 s.Br(obj.AJMP, b.Succs[1].Block())
2036 s.Br(jmp.invasm, b.Succs[1].Block())
2038 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
2039 s.Br(ppc64.ABVS, b.Succs[1].Block())
2041 s.Br(obj.AJMP, b.Succs[0].Block())
2045 b.Fatalf("branch not implemented: %s", b.LongString())
2049 func loadRegResult(s *ssagen.State, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
2050 p := s.Prog(loadByType(t))
2051 p.From.Type = obj.TYPE_MEM
2052 p.From.Name = obj.NAME_AUTO
2053 p.From.Sym = n.Linksym()
2054 p.From.Offset = n.FrameOffset() + off
2055 p.To.Type = obj.TYPE_REG
2060 func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
2061 p = pp.Append(p, storeByType(t), obj.TYPE_REG, reg, 0, obj.TYPE_MEM, 0, n.FrameOffset()+off)
2062 p.To.Name = obj.NAME_PARAM
2063 p.To.Sym = n.Linksym()
2064 p.Pos = p.Pos.WithNotStmt()