1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
8 "cmd/compile/internal/base"
9 "cmd/compile/internal/ir"
10 "cmd/compile/internal/logopt"
11 "cmd/compile/internal/objw"
12 "cmd/compile/internal/ssa"
13 "cmd/compile/internal/ssagen"
14 "cmd/compile/internal/types"
16 "cmd/internal/obj/ppc64"
22 // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
23 func ssaMarkMoves(s *ssagen.State, b *ssa.Block) {
24 // flive := b.FlagsLiveAtEnd
25 // if b.Control != nil && b.Control.Type.IsFlags() {
28 // for i := len(b.Values) - 1; i >= 0; i-- {
30 // if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) {
31 // // The "mark" is any non-nil Aux value.
34 // if v.Type.IsFlags() {
37 // for _, a := range v.Args {
38 // if a.Type.IsFlags() {
45 // loadByType returns the load instruction of the given type.
46 func loadByType(t *types.Type) obj.As {
78 panic("bad load type")
81 // storeByType returns the store instruction of the given type.
82 func storeByType(t *types.Type) obj.As {
102 panic("bad store type")
105 func ssaGenValue(s *ssagen.State, v *ssa.Value) {
128 case ssa.OpPPC64LoweredMuluhilo:
129 // MULHDU Rarg1, Rarg0, Reg0
130 // MULLD Rarg1, Rarg0, Reg1
131 r0 := v.Args[0].Reg()
132 r1 := v.Args[1].Reg()
133 p := s.Prog(ppc64.AMULHDU)
134 p.From.Type = obj.TYPE_REG
137 p.To.Type = obj.TYPE_REG
139 p1 := s.Prog(ppc64.AMULLD)
140 p1.From.Type = obj.TYPE_REG
143 p1.To.Type = obj.TYPE_REG
146 case ssa.OpPPC64LoweredAtomicAnd8,
147 ssa.OpPPC64LoweredAtomicAnd32,
148 ssa.OpPPC64LoweredAtomicOr8,
149 ssa.OpPPC64LoweredAtomicOr32:
151 // LBAR/LWAR (Rarg0), Rtmp
152 // AND/OR Rarg1, Rtmp
153 // STBCCC/STWCCC Rtmp, (Rarg0)
157 if v.Op == ssa.OpPPC64LoweredAtomicAnd32 || v.Op == ssa.OpPPC64LoweredAtomicOr32 {
161 r0 := v.Args[0].Reg()
162 r1 := v.Args[1].Reg()
163 // LWSYNC - Assuming shared data not write-through-required nor
164 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
165 plwsync := s.Prog(ppc64.ALWSYNC)
166 plwsync.To.Type = obj.TYPE_NONE
169 p.From.Type = obj.TYPE_MEM
171 p.To.Type = obj.TYPE_REG
172 p.To.Reg = ppc64.REGTMP
174 p1 := s.Prog(v.Op.Asm())
175 p1.From.Type = obj.TYPE_REG
177 p1.To.Type = obj.TYPE_REG
178 p1.To.Reg = ppc64.REGTMP
181 p2.From.Type = obj.TYPE_REG
182 p2.From.Reg = ppc64.REGTMP
183 p2.To.Type = obj.TYPE_MEM
185 p2.RegTo2 = ppc64.REGTMP
187 p3 := s.Prog(ppc64.ABNE)
188 p3.To.Type = obj.TYPE_BRANCH
191 case ssa.OpPPC64LoweredAtomicAdd32,
192 ssa.OpPPC64LoweredAtomicAdd64:
194 // LDAR/LWAR (Rarg0), Rout
196 // STDCCC/STWCCC Rout, (Rarg0)
198 // MOVW Rout,Rout (if Add32)
201 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
205 r0 := v.Args[0].Reg()
206 r1 := v.Args[1].Reg()
208 // LWSYNC - Assuming shared data not write-through-required nor
209 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
210 plwsync := s.Prog(ppc64.ALWSYNC)
211 plwsync.To.Type = obj.TYPE_NONE
214 p.From.Type = obj.TYPE_MEM
216 p.To.Type = obj.TYPE_REG
219 p1 := s.Prog(ppc64.AADD)
220 p1.From.Type = obj.TYPE_REG
223 p1.To.Type = obj.TYPE_REG
226 p3.From.Type = obj.TYPE_REG
228 p3.To.Type = obj.TYPE_MEM
231 p4 := s.Prog(ppc64.ABNE)
232 p4.To.Type = obj.TYPE_BRANCH
235 // Ensure a 32 bit result
236 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
237 p5 := s.Prog(ppc64.AMOVWZ)
238 p5.To.Type = obj.TYPE_REG
240 p5.From.Type = obj.TYPE_REG
244 case ssa.OpPPC64LoweredAtomicExchange32,
245 ssa.OpPPC64LoweredAtomicExchange64:
247 // LDAR/LWAR (Rarg0), Rout
248 // STDCCC/STWCCC Rout, (Rarg0)
253 if v.Op == ssa.OpPPC64LoweredAtomicExchange32 {
257 r0 := v.Args[0].Reg()
258 r1 := v.Args[1].Reg()
260 // LWSYNC - Assuming shared data not write-through-required nor
261 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
262 plwsync := s.Prog(ppc64.ALWSYNC)
263 plwsync.To.Type = obj.TYPE_NONE
266 p.From.Type = obj.TYPE_MEM
268 p.To.Type = obj.TYPE_REG
272 p1.From.Type = obj.TYPE_REG
274 p1.To.Type = obj.TYPE_MEM
277 p2 := s.Prog(ppc64.ABNE)
278 p2.To.Type = obj.TYPE_BRANCH
281 pisync := s.Prog(ppc64.AISYNC)
282 pisync.To.Type = obj.TYPE_NONE
284 case ssa.OpPPC64LoweredAtomicLoad8,
285 ssa.OpPPC64LoweredAtomicLoad32,
286 ssa.OpPPC64LoweredAtomicLoad64,
287 ssa.OpPPC64LoweredAtomicLoadPtr:
289 // MOVB/MOVD/MOVW (Rarg0), Rout
296 case ssa.OpPPC64LoweredAtomicLoad8:
298 case ssa.OpPPC64LoweredAtomicLoad32:
302 arg0 := v.Args[0].Reg()
304 // SYNC when AuxInt == 1; otherwise, load-acquire
306 psync := s.Prog(ppc64.ASYNC)
307 psync.To.Type = obj.TYPE_NONE
311 p.From.Type = obj.TYPE_MEM
313 p.To.Type = obj.TYPE_REG
317 p1.From.Type = obj.TYPE_REG
319 p1.To.Type = obj.TYPE_REG
322 p2 := s.Prog(ppc64.ABNE)
323 p2.To.Type = obj.TYPE_BRANCH
325 pisync := s.Prog(ppc64.AISYNC)
326 pisync.To.Type = obj.TYPE_NONE
327 p2.To.SetTarget(pisync)
329 case ssa.OpPPC64LoweredAtomicStore8,
330 ssa.OpPPC64LoweredAtomicStore32,
331 ssa.OpPPC64LoweredAtomicStore64:
333 // MOVB/MOVW/MOVD arg1,(arg0)
336 case ssa.OpPPC64LoweredAtomicStore8:
338 case ssa.OpPPC64LoweredAtomicStore32:
341 arg0 := v.Args[0].Reg()
342 arg1 := v.Args[1].Reg()
343 // If AuxInt == 0, LWSYNC (Store-Release), else SYNC
345 syncOp := ppc64.ASYNC
347 syncOp = ppc64.ALWSYNC
349 psync := s.Prog(syncOp)
350 psync.To.Type = obj.TYPE_NONE
353 p.To.Type = obj.TYPE_MEM
355 p.From.Type = obj.TYPE_REG
358 case ssa.OpPPC64LoweredAtomicCas64,
359 ssa.OpPPC64LoweredAtomicCas32:
362 // LDAR (Rarg0), MutexHint, Rtmp
365 // STDCCC Rarg2, (Rarg0)
367 // LWSYNC // Only for sequential consistency; not required in CasRel.
376 if v.Op == ssa.OpPPC64LoweredAtomicCas32 {
381 r0 := v.Args[0].Reg()
382 r1 := v.Args[1].Reg()
383 r2 := v.Args[2].Reg()
385 // LWSYNC - Assuming shared data not write-through-required nor
386 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
387 plwsync1 := s.Prog(ppc64.ALWSYNC)
388 plwsync1.To.Type = obj.TYPE_NONE
391 p.From.Type = obj.TYPE_MEM
393 p.To.Type = obj.TYPE_REG
394 p.To.Reg = ppc64.REGTMP
395 // If it is a Compare-and-Swap-Release operation, set the EH field with
402 p1.From.Type = obj.TYPE_REG
404 p1.To.Reg = ppc64.REGTMP
405 p1.To.Type = obj.TYPE_REG
407 p2 := s.Prog(ppc64.ABNE)
408 p2.To.Type = obj.TYPE_BRANCH
411 p3.From.Type = obj.TYPE_REG
413 p3.To.Type = obj.TYPE_MEM
416 p4 := s.Prog(ppc64.ABNE)
417 p4.To.Type = obj.TYPE_BRANCH
419 // LWSYNC - Assuming shared data not write-through-required nor
420 // caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b.
421 // If the operation is a CAS-Release, then synchronization is not necessary.
423 plwsync2 := s.Prog(ppc64.ALWSYNC)
424 plwsync2.To.Type = obj.TYPE_NONE
427 p5 := s.Prog(ppc64.AMOVD)
428 p5.From.Type = obj.TYPE_CONST
430 p5.To.Type = obj.TYPE_REG
433 p6 := s.Prog(obj.AJMP)
434 p6.To.Type = obj.TYPE_BRANCH
436 p7 := s.Prog(ppc64.AMOVD)
437 p7.From.Type = obj.TYPE_CONST
439 p7.To.Type = obj.TYPE_REG
443 p8 := s.Prog(obj.ANOP)
446 case ssa.OpPPC64LoweredPubBarrier:
450 case ssa.OpPPC64LoweredGetClosurePtr:
451 // Closure pointer is R11 (already)
452 ssagen.CheckLoweredGetClosurePtr(v)
454 case ssa.OpPPC64LoweredGetCallerSP:
455 // caller's SP is FixedFrameSize below the address of the first arg
456 p := s.Prog(ppc64.AMOVD)
457 p.From.Type = obj.TYPE_ADDR
458 p.From.Offset = -base.Ctxt.Arch.FixedFrameSize
459 p.From.Name = obj.NAME_PARAM
460 p.To.Type = obj.TYPE_REG
463 case ssa.OpPPC64LoweredGetCallerPC:
464 p := s.Prog(obj.AGETCALLERPC)
465 p.To.Type = obj.TYPE_REG
468 case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F:
469 // input is already rounded
472 loadOp := loadByType(v.Type)
474 ssagen.AddrAuto(&p.From, v.Args[0])
475 p.To.Type = obj.TYPE_REG
479 storeOp := storeByType(v.Type)
481 p.From.Type = obj.TYPE_REG
482 p.From.Reg = v.Args[0].Reg()
483 ssagen.AddrAuto(&p.To, v)
485 case ssa.OpArgIntReg, ssa.OpArgFloatReg:
486 // The assembler needs to wrap the entry safepoint/stack growth code with spill/unspill
487 // The loop only runs once.
488 for _, a := range v.Block.Func.RegArgs {
489 // Pass the spill/unspill information along to the assembler, offset by size of
490 // the saved LR slot.
491 addr := ssagen.SpillSlotAddr(a, ppc64.REGSP, base.Ctxt.Arch.FixedFrameSize)
492 s.FuncInfo().AddSpill(
493 obj.RegSpill{Reg: a.Reg, Addr: addr, Unspill: loadByType(a.Type), Spill: storeByType(a.Type)})
495 v.Block.Func.RegArgs = nil
497 ssagen.CheckArgReg(v)
499 case ssa.OpPPC64DIVD:
509 r0 := v.Args[0].Reg()
510 r1 := v.Args[1].Reg()
512 p := s.Prog(ppc64.ACMP)
513 p.From.Type = obj.TYPE_REG
515 p.To.Type = obj.TYPE_CONST
518 pbahead := s.Prog(ppc64.ABEQ)
519 pbahead.To.Type = obj.TYPE_BRANCH
521 p = s.Prog(v.Op.Asm())
522 p.From.Type = obj.TYPE_REG
525 p.To.Type = obj.TYPE_REG
528 pbover := s.Prog(obj.AJMP)
529 pbover.To.Type = obj.TYPE_BRANCH
531 p = s.Prog(ppc64.ANEG)
532 p.To.Type = obj.TYPE_REG
534 p.From.Type = obj.TYPE_REG
536 pbahead.To.SetTarget(p)
539 pbover.To.SetTarget(p)
541 case ssa.OpPPC64DIVW:
542 // word-width version of above
544 r0 := v.Args[0].Reg()
545 r1 := v.Args[1].Reg()
547 p := s.Prog(ppc64.ACMPW)
548 p.From.Type = obj.TYPE_REG
550 p.To.Type = obj.TYPE_CONST
553 pbahead := s.Prog(ppc64.ABEQ)
554 pbahead.To.Type = obj.TYPE_BRANCH
556 p = s.Prog(v.Op.Asm())
557 p.From.Type = obj.TYPE_REG
560 p.To.Type = obj.TYPE_REG
563 pbover := s.Prog(obj.AJMP)
564 pbover.To.Type = obj.TYPE_BRANCH
566 p = s.Prog(ppc64.ANEG)
567 p.To.Type = obj.TYPE_REG
569 p.From.Type = obj.TYPE_REG
571 pbahead.To.SetTarget(p)
574 pbover.To.SetTarget(p)
576 case ssa.OpPPC64CLRLSLWI:
578 r1 := v.Args[0].Reg()
580 p := s.Prog(v.Op.Asm())
581 // clrlslwi ra,rs,mb,sh will become rlwinm ra,rs,sh,mb-sh,31-sh as described in ISA
582 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
583 p.SetFrom3Const(ssa.GetPPC64Shiftsh(shifts))
585 p.To.Type = obj.TYPE_REG
588 case ssa.OpPPC64CLRLSLDI:
590 r1 := v.Args[0].Reg()
592 p := s.Prog(v.Op.Asm())
593 // clrlsldi ra,rs,mb,sh will become rldic ra,rs,sh,mb-sh
594 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
595 p.SetFrom3Const(ssa.GetPPC64Shiftsh(shifts))
597 p.To.Type = obj.TYPE_REG
600 // Mask has been set as sh
601 case ssa.OpPPC64RLDICL:
603 r1 := v.Args[0].Reg()
605 p := s.Prog(v.Op.Asm())
606 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)}
607 p.SetFrom3Const(ssa.GetPPC64Shiftmb(shifts))
609 p.To.Type = obj.TYPE_REG
612 case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS,
613 ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU,
614 ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW,
615 ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
616 ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
617 ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
618 ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV,
619 ssa.OpPPC64MODUD, ssa.OpPPC64MODSD, ssa.OpPPC64MODUW, ssa.OpPPC64MODSW:
621 r1 := v.Args[0].Reg()
622 r2 := v.Args[1].Reg()
623 p := s.Prog(v.Op.Asm())
624 p.From.Type = obj.TYPE_REG
627 p.To.Type = obj.TYPE_REG
630 case ssa.OpPPC64ANDCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC:
631 r1 := v.Args[0].Reg()
632 r2 := v.Args[1].Reg()
633 p := s.Prog(v.Op.Asm())
634 p.From.Type = obj.TYPE_REG
637 p.To.Type = obj.TYPE_REG
638 p.To.Reg = ppc64.REGTMP // result is not needed
640 case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
641 p := s.Prog(v.Op.Asm())
642 p.From.Type = obj.TYPE_CONST
643 p.From.Offset = v.AuxInt
644 p.Reg = v.Args[0].Reg()
645 p.To.Type = obj.TYPE_REG
648 // Auxint holds encoded rotate + mask
649 case ssa.OpPPC64RLWINM, ssa.OpPPC64RLWMI:
650 rot, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
651 p := s.Prog(v.Op.Asm())
652 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
653 p.Reg = v.Args[0].Reg()
654 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: int64(rot)}
655 p.SetRestArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}})
658 case ssa.OpPPC64RLWNM:
659 _, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
660 p := s.Prog(v.Op.Asm())
661 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
662 p.Reg = v.Args[0].Reg()
663 p.From = obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[1].Reg()}
664 p.SetRestArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}})
666 case ssa.OpPPC64MADDLD:
668 r1 := v.Args[0].Reg()
669 r2 := v.Args[1].Reg()
670 r3 := v.Args[2].Reg()
672 p := s.Prog(v.Op.Asm())
673 p.From.Type = obj.TYPE_REG
677 p.To.Type = obj.TYPE_REG
680 case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
682 r1 := v.Args[0].Reg()
683 r2 := v.Args[1].Reg()
684 r3 := v.Args[2].Reg()
686 p := s.Prog(v.Op.Asm())
687 p.From.Type = obj.TYPE_REG
691 p.To.Type = obj.TYPE_REG
694 case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL,
695 ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW,
696 ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS,
697 ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD:
699 p := s.Prog(v.Op.Asm())
700 p.To.Type = obj.TYPE_REG
702 p.From.Type = obj.TYPE_REG
703 p.From.Reg = v.Args[0].Reg()
705 case ssa.OpPPC64ADDconst, ssa.OpPPC64ANDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst,
706 ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst,
707 ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst, ssa.OpPPC64EXTSWSLconst, ssa.OpPPC64MULLWconst, ssa.OpPPC64MULLDconst:
708 p := s.Prog(v.Op.Asm())
709 p.Reg = v.Args[0].Reg()
710 p.From.Type = obj.TYPE_CONST
711 p.From.Offset = v.AuxInt
712 p.To.Type = obj.TYPE_REG
715 case ssa.OpPPC64ADDC, ssa.OpPPC64ADDE, ssa.OpPPC64SUBC, ssa.OpPPC64SUBE:
716 r := v.Reg0() // CA is the first, implied argument.
717 r1 := v.Args[0].Reg()
718 r2 := v.Args[1].Reg()
719 p := s.Prog(v.Op.Asm())
720 p.From.Type = obj.TYPE_REG
723 p.To.Type = obj.TYPE_REG
726 case ssa.OpPPC64ADDZEzero, ssa.OpPPC64SUBZEzero:
727 p := s.Prog(v.Op.Asm())
728 p.From.Type = obj.TYPE_REG
729 p.From.Reg = ppc64.REG_R0
730 p.To.Type = obj.TYPE_REG
733 case ssa.OpPPC64ADDCconst:
734 p := s.Prog(v.Op.Asm())
735 p.Reg = v.Args[0].Reg()
736 p.From.Type = obj.TYPE_CONST
737 p.From.Offset = v.AuxInt
738 p.To.Type = obj.TYPE_REG
739 // Output is a pair, the second is the CA, which is implied.
742 case ssa.OpPPC64SUBCconst:
743 p := s.Prog(v.Op.Asm())
744 p.SetFrom3Const(v.AuxInt)
745 p.From.Type = obj.TYPE_REG
746 p.From.Reg = v.Args[0].Reg()
747 p.To.Type = obj.TYPE_REG
750 case ssa.OpPPC64SUBFCconst:
751 p := s.Prog(v.Op.Asm())
752 p.SetFrom3Const(v.AuxInt)
753 p.From.Type = obj.TYPE_REG
754 p.From.Reg = v.Args[0].Reg()
755 p.To.Type = obj.TYPE_REG
758 case ssa.OpPPC64ANDCCconst:
759 p := s.Prog(v.Op.Asm())
760 p.Reg = v.Args[0].Reg()
761 p.From.Type = obj.TYPE_CONST
762 p.From.Offset = v.AuxInt
763 p.To.Type = obj.TYPE_REG
764 p.To.Reg = ppc64.REGTMP // discard result
766 case ssa.OpPPC64MOVDaddr:
767 switch v.Aux.(type) {
769 v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux)
771 // If aux offset and aux int are both 0, and the same
772 // input and output regs are used, no instruction
773 // needs to be generated, since it would just be
775 if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() {
776 p := s.Prog(ppc64.AMOVD)
777 p.From.Type = obj.TYPE_ADDR
778 p.From.Reg = v.Args[0].Reg()
779 p.From.Offset = v.AuxInt
780 p.To.Type = obj.TYPE_REG
784 case *obj.LSym, ir.Node:
785 p := s.Prog(ppc64.AMOVD)
786 p.From.Type = obj.TYPE_ADDR
787 p.From.Reg = v.Args[0].Reg()
788 p.To.Type = obj.TYPE_REG
790 ssagen.AddAux(&p.From, v)
794 case ssa.OpPPC64MOVDconst:
795 p := s.Prog(v.Op.Asm())
796 p.From.Type = obj.TYPE_CONST
797 p.From.Offset = v.AuxInt
798 p.To.Type = obj.TYPE_REG
801 case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst:
802 p := s.Prog(v.Op.Asm())
803 p.From.Type = obj.TYPE_FCONST
804 p.From.Val = math.Float64frombits(uint64(v.AuxInt))
805 p.To.Type = obj.TYPE_REG
808 case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU:
809 p := s.Prog(v.Op.Asm())
810 p.From.Type = obj.TYPE_REG
811 p.From.Reg = v.Args[0].Reg()
812 p.To.Type = obj.TYPE_REG
813 p.To.Reg = v.Args[1].Reg()
815 case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst:
816 p := s.Prog(v.Op.Asm())
817 p.From.Type = obj.TYPE_REG
818 p.From.Reg = v.Args[0].Reg()
819 p.To.Type = obj.TYPE_CONST
820 p.To.Offset = v.AuxInt
822 case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg:
823 // Shift in register to required size
824 p := s.Prog(v.Op.Asm())
825 p.From.Type = obj.TYPE_REG
826 p.From.Reg = v.Args[0].Reg()
828 p.To.Type = obj.TYPE_REG
830 case ssa.OpPPC64MOVDload, ssa.OpPPC64MOVWload:
832 // MOVDload and MOVWload are DS form instructions that are restricted to
833 // offsets that are a multiple of 4. If the offset is not a multiple of 4,
834 // then the address of the symbol to be loaded is computed (base + offset)
835 // and used as the new base register and the offset field in the instruction
836 // can be set to zero.
838 // This same problem can happen with gostrings since the final offset is not
839 // known yet, but could be unaligned after the relocation is resolved.
840 // So gostrings are handled the same way.
842 // This allows the MOVDload and MOVWload to be generated in more cases and
843 // eliminates some offset and alignment checking in the rules file.
845 fromAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
846 ssagen.AddAux(&fromAddr, v)
850 switch fromAddr.Name {
851 case obj.NAME_EXTERN, obj.NAME_STATIC:
852 // Special case for a rule combines the bytes of gostring.
853 // The v alignment might seem OK, but we don't want to load it
854 // using an offset because relocation comes later.
855 genAddr = strings.HasPrefix(fromAddr.Sym.Name, "go.string") || v.Type.Alignment()%4 != 0 || fromAddr.Offset%4 != 0
857 genAddr = fromAddr.Offset%4 != 0
860 // Load full address into the temp register.
861 p := s.Prog(ppc64.AMOVD)
862 p.From.Type = obj.TYPE_ADDR
863 p.From.Reg = v.Args[0].Reg()
864 ssagen.AddAux(&p.From, v)
865 // Load target using temp as base register
866 // and offset zero. Setting NAME_NONE
867 // prevents any extra offsets from being
869 p.To.Type = obj.TYPE_REG
870 p.To.Reg = ppc64.REGTMP
871 fromAddr.Reg = ppc64.REGTMP
872 // Clear the offset field and other
873 // information that might be used
874 // by the assembler to add to the
875 // final offset value.
877 fromAddr.Name = obj.NAME_NONE
880 p := s.Prog(v.Op.Asm())
882 p.To.Type = obj.TYPE_REG
886 case ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
887 p := s.Prog(v.Op.Asm())
888 p.From.Type = obj.TYPE_MEM
889 p.From.Reg = v.Args[0].Reg()
890 ssagen.AddAux(&p.From, v)
891 p.To.Type = obj.TYPE_REG
894 case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload:
895 p := s.Prog(v.Op.Asm())
896 p.From.Type = obj.TYPE_MEM
897 p.From.Reg = v.Args[0].Reg()
898 p.To.Type = obj.TYPE_REG
901 case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore:
902 p := s.Prog(v.Op.Asm())
903 p.To.Type = obj.TYPE_MEM
904 p.To.Reg = v.Args[0].Reg()
905 p.From.Type = obj.TYPE_REG
906 p.From.Reg = v.Args[1].Reg()
908 case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx,
909 ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx,
910 ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx:
911 p := s.Prog(v.Op.Asm())
912 p.From.Type = obj.TYPE_MEM
913 p.From.Reg = v.Args[0].Reg()
914 p.From.Index = v.Args[1].Reg()
915 p.To.Type = obj.TYPE_REG
918 case ssa.OpPPC64DCBT:
919 p := s.Prog(v.Op.Asm())
920 p.From.Type = obj.TYPE_MEM
921 p.From.Reg = v.Args[0].Reg()
922 p.To.Type = obj.TYPE_CONST
923 p.To.Offset = v.AuxInt
925 case ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero:
926 p := s.Prog(v.Op.Asm())
927 p.From.Type = obj.TYPE_REG
928 p.From.Reg = ppc64.REGZERO
929 p.To.Type = obj.TYPE_MEM
930 p.To.Reg = v.Args[0].Reg()
931 ssagen.AddAux(&p.To, v)
933 case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVDstorezero:
935 // MOVDstore and MOVDstorezero become DS form instructions that are restricted
936 // to offset values that are a multiple of 4. If the offset field is not a
937 // multiple of 4, then the full address of the store target is computed (base +
938 // offset) and used as the new base register and the offset in the instruction
941 // This allows the MOVDstore and MOVDstorezero to be generated in more cases,
942 // and prevents checking of the offset value and alignment in the rules.
944 toAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
945 ssagen.AddAux(&toAddr, v)
947 if toAddr.Offset%4 != 0 {
948 p := s.Prog(ppc64.AMOVD)
949 p.From.Type = obj.TYPE_ADDR
950 p.From.Reg = v.Args[0].Reg()
951 ssagen.AddAux(&p.From, v)
952 p.To.Type = obj.TYPE_REG
953 p.To.Reg = ppc64.REGTMP
954 toAddr.Reg = ppc64.REGTMP
955 // Clear the offset field and other
956 // information that might be used
957 // by the assembler to add to the
958 // final offset value.
960 toAddr.Name = obj.NAME_NONE
963 p := s.Prog(v.Op.Asm())
965 p.From.Type = obj.TYPE_REG
966 if v.Op == ssa.OpPPC64MOVDstorezero {
967 p.From.Reg = ppc64.REGZERO
969 p.From.Reg = v.Args[1].Reg()
972 case ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore:
973 p := s.Prog(v.Op.Asm())
974 p.From.Type = obj.TYPE_REG
975 p.From.Reg = v.Args[1].Reg()
976 p.To.Type = obj.TYPE_MEM
977 p.To.Reg = v.Args[0].Reg()
978 ssagen.AddAux(&p.To, v)
980 case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx,
981 ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx,
982 ssa.OpPPC64MOVHBRstoreidx:
983 p := s.Prog(v.Op.Asm())
984 p.From.Type = obj.TYPE_REG
985 p.From.Reg = v.Args[2].Reg()
986 p.To.Index = v.Args[1].Reg()
987 p.To.Type = obj.TYPE_MEM
988 p.To.Reg = v.Args[0].Reg()
990 case ssa.OpPPC64ISEL, ssa.OpPPC64ISELB:
992 // AuxInt value indicates condition: 0=LT 1=GT 2=EQ 4=GE 5=LE 6=NE
993 // ISEL only accepts 0, 1, 2 condition values but the others can be
994 // achieved by swapping operand order.
995 // arg0 ? arg1 : arg2 with conditions LT, GT, EQ
996 // arg0 ? arg2 : arg1 for conditions GE, LE, NE
997 // ISELB is used when a boolean result is needed, returning 0 or 1
998 p := s.Prog(ppc64.AISEL)
999 p.To.Type = obj.TYPE_REG
1001 // For ISELB, boolean result 0 or 1. Use R0 for 0 operand to avoid load.
1002 r := obj.Addr{Type: obj.TYPE_REG, Reg: ppc64.REG_R0}
1003 if v.Op == ssa.OpPPC64ISEL {
1004 r.Reg = v.Args[1].Reg()
1006 // AuxInt values 4,5,6 implemented with reverse operand order from 0,1,2
1009 p.SetFrom3Reg(v.Args[0].Reg())
1011 p.Reg = v.Args[0].Reg()
1014 p.From.Type = obj.TYPE_CONST
1015 p.From.Offset = v.AuxInt & 3
1017 case ssa.OpPPC64LoweredQuadZero, ssa.OpPPC64LoweredQuadZeroShort:
1018 // The LoweredQuad code generation
1019 // generates STXV instructions on
1020 // power9. The Short variation is used
1021 // if no loop is generated.
1023 // sizes >= 64 generate a loop as follows:
1025 // Set up loop counter in CTR, used by BC
1026 // XXLXOR clears VS32
1027 // XXLXOR VS32,VS32,VS32
1028 // MOVD len/64,REG_TMP
1032 // STXV VS32,16(R20)
1033 // STXV VS32,32(R20)
1034 // STXV VS32,48(R20)
1038 // Bytes per iteration
1039 ctr := v.AuxInt / 64
1042 rem := v.AuxInt % 64
1044 // Only generate a loop if there is more
1045 // than 1 iteration.
1047 // Set up VS32 (V0) to hold 0s
1048 p := s.Prog(ppc64.AXXLXOR)
1049 p.From.Type = obj.TYPE_REG
1050 p.From.Reg = ppc64.REG_VS32
1051 p.To.Type = obj.TYPE_REG
1052 p.To.Reg = ppc64.REG_VS32
1053 p.Reg = ppc64.REG_VS32
1055 // Set up CTR loop counter
1056 p = s.Prog(ppc64.AMOVD)
1057 p.From.Type = obj.TYPE_CONST
1059 p.To.Type = obj.TYPE_REG
1060 p.To.Reg = ppc64.REGTMP
1062 p = s.Prog(ppc64.AMOVD)
1063 p.From.Type = obj.TYPE_REG
1064 p.From.Reg = ppc64.REGTMP
1065 p.To.Type = obj.TYPE_REG
1066 p.To.Reg = ppc64.REG_CTR
1068 // Don't generate padding for
1069 // loops with few iterations.
1071 p = s.Prog(obj.APCALIGN)
1072 p.From.Type = obj.TYPE_CONST
1076 // generate 4 STXVs to zero 64 bytes
1079 p = s.Prog(ppc64.ASTXV)
1080 p.From.Type = obj.TYPE_REG
1081 p.From.Reg = ppc64.REG_VS32
1082 p.To.Type = obj.TYPE_MEM
1083 p.To.Reg = v.Args[0].Reg()
1085 // Save the top of loop
1089 p = s.Prog(ppc64.ASTXV)
1090 p.From.Type = obj.TYPE_REG
1091 p.From.Reg = ppc64.REG_VS32
1092 p.To.Type = obj.TYPE_MEM
1093 p.To.Reg = v.Args[0].Reg()
1096 p = s.Prog(ppc64.ASTXV)
1097 p.From.Type = obj.TYPE_REG
1098 p.From.Reg = ppc64.REG_VS32
1099 p.To.Type = obj.TYPE_MEM
1100 p.To.Reg = v.Args[0].Reg()
1103 p = s.Prog(ppc64.ASTXV)
1104 p.From.Type = obj.TYPE_REG
1105 p.From.Reg = ppc64.REG_VS32
1106 p.To.Type = obj.TYPE_MEM
1107 p.To.Reg = v.Args[0].Reg()
1110 // Increment address for the
1111 // 64 bytes just zeroed.
1112 p = s.Prog(ppc64.AADD)
1113 p.Reg = v.Args[0].Reg()
1114 p.From.Type = obj.TYPE_CONST
1116 p.To.Type = obj.TYPE_REG
1117 p.To.Reg = v.Args[0].Reg()
1119 // Branch back to top of loop
1121 // BC with BO_BCTR generates bdnz
1122 p = s.Prog(ppc64.ABC)
1123 p.From.Type = obj.TYPE_CONST
1124 p.From.Offset = ppc64.BO_BCTR
1125 p.Reg = ppc64.REG_CR0LT
1126 p.To.Type = obj.TYPE_BRANCH
1129 // When ctr == 1 the loop was not generated but
1130 // there are at least 64 bytes to clear, so add
1131 // that to the remainder to generate the code
1132 // to clear those doublewords
1137 // Clear the remainder starting at offset zero
1140 if rem >= 16 && ctr <= 1 {
1141 // If the XXLXOR hasn't already been
1142 // generated, do it here to initialize
1144 p := s.Prog(ppc64.AXXLXOR)
1145 p.From.Type = obj.TYPE_REG
1146 p.From.Reg = ppc64.REG_VS32
1147 p.To.Type = obj.TYPE_REG
1148 p.To.Reg = ppc64.REG_VS32
1149 p.Reg = ppc64.REG_VS32
1151 // Generate STXV for 32 or 64
1154 p := s.Prog(ppc64.ASTXV)
1155 p.From.Type = obj.TYPE_REG
1156 p.From.Reg = ppc64.REG_VS32
1157 p.To.Type = obj.TYPE_MEM
1158 p.To.Reg = v.Args[0].Reg()
1159 p.To.Offset = offset
1161 p = s.Prog(ppc64.ASTXV)
1162 p.From.Type = obj.TYPE_REG
1163 p.From.Reg = ppc64.REG_VS32
1164 p.To.Type = obj.TYPE_MEM
1165 p.To.Reg = v.Args[0].Reg()
1166 p.To.Offset = offset + 16
1170 // Generate 16 bytes
1172 p := s.Prog(ppc64.ASTXV)
1173 p.From.Type = obj.TYPE_REG
1174 p.From.Reg = ppc64.REG_VS32
1175 p.To.Type = obj.TYPE_MEM
1176 p.To.Reg = v.Args[0].Reg()
1177 p.To.Offset = offset
1182 // first clear as many doublewords as possible
1183 // then clear remaining sizes as available
1185 op, size := ppc64.AMOVB, int64(1)
1188 op, size = ppc64.AMOVD, 8
1190 op, size = ppc64.AMOVW, 4
1192 op, size = ppc64.AMOVH, 2
1195 p.From.Type = obj.TYPE_REG
1196 p.From.Reg = ppc64.REG_R0
1197 p.To.Type = obj.TYPE_MEM
1198 p.To.Reg = v.Args[0].Reg()
1199 p.To.Offset = offset
1204 case ssa.OpPPC64LoweredZero, ssa.OpPPC64LoweredZeroShort:
1206 // Unaligned data doesn't hurt performance
1207 // for these instructions on power8.
1209 // For sizes >= 64 generate a loop as follows:
1211 // Set up loop counter in CTR, used by BC
1212 // XXLXOR VS32,VS32,VS32
1213 // MOVD len/32,REG_TMP
1217 // STXVD2X VS32,(R0)(R20)
1218 // STXVD2X VS32,(R31)(R20)
1222 // any remainder is done as described below
1224 // for sizes < 64 bytes, first clear as many doublewords as possible,
1225 // then handle the remainder
1230 // the remainder bytes are cleared using one or more
1231 // of the following instructions with the appropriate
1232 // offsets depending which instructions are needed
1234 // MOVW R0,n1(R20) 4 bytes
1235 // MOVH R0,n2(R20) 2 bytes
1236 // MOVB R0,n3(R20) 1 byte
1238 // 7 bytes: MOVW, MOVH, MOVB
1239 // 6 bytes: MOVW, MOVH
1240 // 5 bytes: MOVW, MOVB
1241 // 3 bytes: MOVH, MOVB
1243 // each loop iteration does 32 bytes
1244 ctr := v.AuxInt / 32
1247 rem := v.AuxInt % 32
1249 // only generate a loop if there is more
1250 // than 1 iteration.
1252 // Set up VS32 (V0) to hold 0s
1253 p := s.Prog(ppc64.AXXLXOR)
1254 p.From.Type = obj.TYPE_REG
1255 p.From.Reg = ppc64.REG_VS32
1256 p.To.Type = obj.TYPE_REG
1257 p.To.Reg = ppc64.REG_VS32
1258 p.Reg = ppc64.REG_VS32
1260 // Set up CTR loop counter
1261 p = s.Prog(ppc64.AMOVD)
1262 p.From.Type = obj.TYPE_CONST
1264 p.To.Type = obj.TYPE_REG
1265 p.To.Reg = ppc64.REGTMP
1267 p = s.Prog(ppc64.AMOVD)
1268 p.From.Type = obj.TYPE_REG
1269 p.From.Reg = ppc64.REGTMP
1270 p.To.Type = obj.TYPE_REG
1271 p.To.Reg = ppc64.REG_CTR
1273 // Set up R31 to hold index value 16
1274 p = s.Prog(ppc64.AMOVD)
1275 p.From.Type = obj.TYPE_CONST
1277 p.To.Type = obj.TYPE_REG
1278 p.To.Reg = ppc64.REGTMP
1280 // Don't add padding for alignment
1281 // with few loop iterations.
1283 p = s.Prog(obj.APCALIGN)
1284 p.From.Type = obj.TYPE_CONST
1288 // generate 2 STXVD2Xs to store 16 bytes
1289 // when this is a loop then the top must be saved
1291 // This is the top of loop
1293 p = s.Prog(ppc64.ASTXVD2X)
1294 p.From.Type = obj.TYPE_REG
1295 p.From.Reg = ppc64.REG_VS32
1296 p.To.Type = obj.TYPE_MEM
1297 p.To.Reg = v.Args[0].Reg()
1298 p.To.Index = ppc64.REGZERO
1299 // Save the top of loop
1303 p = s.Prog(ppc64.ASTXVD2X)
1304 p.From.Type = obj.TYPE_REG
1305 p.From.Reg = ppc64.REG_VS32
1306 p.To.Type = obj.TYPE_MEM
1307 p.To.Reg = v.Args[0].Reg()
1308 p.To.Index = ppc64.REGTMP
1310 // Increment address for the
1311 // 4 doublewords just zeroed.
1312 p = s.Prog(ppc64.AADD)
1313 p.Reg = v.Args[0].Reg()
1314 p.From.Type = obj.TYPE_CONST
1316 p.To.Type = obj.TYPE_REG
1317 p.To.Reg = v.Args[0].Reg()
1319 // Branch back to top of loop
1321 // BC with BO_BCTR generates bdnz
1322 p = s.Prog(ppc64.ABC)
1323 p.From.Type = obj.TYPE_CONST
1324 p.From.Offset = ppc64.BO_BCTR
1325 p.Reg = ppc64.REG_CR0LT
1326 p.To.Type = obj.TYPE_BRANCH
1330 // when ctr == 1 the loop was not generated but
1331 // there are at least 32 bytes to clear, so add
1332 // that to the remainder to generate the code
1333 // to clear those doublewords
1338 // clear the remainder starting at offset zero
1341 // first clear as many doublewords as possible
1342 // then clear remaining sizes as available
1344 op, size := ppc64.AMOVB, int64(1)
1347 op, size = ppc64.AMOVD, 8
1349 op, size = ppc64.AMOVW, 4
1351 op, size = ppc64.AMOVH, 2
1354 p.From.Type = obj.TYPE_REG
1355 p.From.Reg = ppc64.REG_R0
1356 p.To.Type = obj.TYPE_MEM
1357 p.To.Reg = v.Args[0].Reg()
1358 p.To.Offset = offset
1363 case ssa.OpPPC64LoweredMove, ssa.OpPPC64LoweredMoveShort:
1365 bytesPerLoop := int64(32)
1366 // This will be used when moving more
1367 // than 8 bytes. Moves start with
1368 // as many 8 byte moves as possible, then
1369 // 4, 2, or 1 byte(s) as remaining. This will
1370 // work and be efficient for power8 or later.
1371 // If there are 64 or more bytes, then a
1372 // loop is generated to move 32 bytes and
1373 // update the src and dst addresses on each
1374 // iteration. When < 64 bytes, the appropriate
1375 // number of moves are generated based on the
1377 // When moving >= 64 bytes a loop is used
1378 // MOVD len/32,REG_TMP
1382 // LXVD2X (R0)(R21),VS32
1383 // LXVD2X (R31)(R21),VS33
1385 // STXVD2X VS32,(R0)(R20)
1386 // STXVD2X VS33,(R31)(R20)
1389 // Bytes not moved by this loop are moved
1390 // with a combination of the following instructions,
1391 // starting with the largest sizes and generating as
1392 // many as needed, using the appropriate offset value.
1402 // Each loop iteration moves 32 bytes
1403 ctr := v.AuxInt / bytesPerLoop
1405 // Remainder after the loop
1406 rem := v.AuxInt % bytesPerLoop
1408 dstReg := v.Args[0].Reg()
1409 srcReg := v.Args[1].Reg()
1411 // The set of registers used here, must match the clobbered reg list
1417 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1420 p := s.Prog(ppc64.AMOVD)
1421 p.From.Type = obj.TYPE_CONST
1423 p.To.Type = obj.TYPE_REG
1424 p.To.Reg = ppc64.REGTMP
1426 p = s.Prog(ppc64.AMOVD)
1427 p.From.Type = obj.TYPE_REG
1428 p.From.Reg = ppc64.REGTMP
1429 p.To.Type = obj.TYPE_REG
1430 p.To.Reg = ppc64.REG_CTR
1432 // Use REGTMP as index reg
1433 p = s.Prog(ppc64.AMOVD)
1434 p.From.Type = obj.TYPE_CONST
1436 p.To.Type = obj.TYPE_REG
1437 p.To.Reg = ppc64.REGTMP
1439 // Don't adding padding for
1440 // alignment with small iteration
1443 p = s.Prog(obj.APCALIGN)
1444 p.From.Type = obj.TYPE_CONST
1448 // Generate 16 byte loads and stores.
1449 // Use temp register for index (16)
1450 // on the second one.
1452 p = s.Prog(ppc64.ALXVD2X)
1453 p.From.Type = obj.TYPE_MEM
1455 p.From.Index = ppc64.REGZERO
1456 p.To.Type = obj.TYPE_REG
1457 p.To.Reg = ppc64.REG_VS32
1461 p = s.Prog(ppc64.ALXVD2X)
1462 p.From.Type = obj.TYPE_MEM
1464 p.From.Index = ppc64.REGTMP
1465 p.To.Type = obj.TYPE_REG
1466 p.To.Reg = ppc64.REG_VS33
1468 // increment the src reg for next iteration
1469 p = s.Prog(ppc64.AADD)
1471 p.From.Type = obj.TYPE_CONST
1472 p.From.Offset = bytesPerLoop
1473 p.To.Type = obj.TYPE_REG
1476 // generate 16 byte stores
1477 p = s.Prog(ppc64.ASTXVD2X)
1478 p.From.Type = obj.TYPE_REG
1479 p.From.Reg = ppc64.REG_VS32
1480 p.To.Type = obj.TYPE_MEM
1482 p.To.Index = ppc64.REGZERO
1484 p = s.Prog(ppc64.ASTXVD2X)
1485 p.From.Type = obj.TYPE_REG
1486 p.From.Reg = ppc64.REG_VS33
1487 p.To.Type = obj.TYPE_MEM
1489 p.To.Index = ppc64.REGTMP
1491 // increment the dst reg for next iteration
1492 p = s.Prog(ppc64.AADD)
1494 p.From.Type = obj.TYPE_CONST
1495 p.From.Offset = bytesPerLoop
1496 p.To.Type = obj.TYPE_REG
1499 // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1501 p = s.Prog(ppc64.ABC)
1502 p.From.Type = obj.TYPE_CONST
1503 p.From.Offset = ppc64.BO_BCTR
1504 p.Reg = ppc64.REG_CR0LT
1505 p.To.Type = obj.TYPE_BRANCH
1508 // srcReg and dstReg were incremented in the loop, so
1509 // later instructions start with offset 0.
1513 // No loop was generated for one iteration, so
1514 // add 32 bytes to the remainder to move those bytes.
1520 // Generate 16 byte loads and stores.
1521 // Use temp register for index (value 16)
1522 // on the second one.
1523 p := s.Prog(ppc64.ALXVD2X)
1524 p.From.Type = obj.TYPE_MEM
1526 p.From.Index = ppc64.REGZERO
1527 p.To.Type = obj.TYPE_REG
1528 p.To.Reg = ppc64.REG_VS32
1530 p = s.Prog(ppc64.ASTXVD2X)
1531 p.From.Type = obj.TYPE_REG
1532 p.From.Reg = ppc64.REG_VS32
1533 p.To.Type = obj.TYPE_MEM
1535 p.To.Index = ppc64.REGZERO
1541 // Use REGTMP as index reg
1542 p := s.Prog(ppc64.AMOVD)
1543 p.From.Type = obj.TYPE_CONST
1545 p.To.Type = obj.TYPE_REG
1546 p.To.Reg = ppc64.REGTMP
1548 p = s.Prog(ppc64.ALXVD2X)
1549 p.From.Type = obj.TYPE_MEM
1551 p.From.Index = ppc64.REGTMP
1552 p.To.Type = obj.TYPE_REG
1553 p.To.Reg = ppc64.REG_VS32
1555 p = s.Prog(ppc64.ASTXVD2X)
1556 p.From.Type = obj.TYPE_REG
1557 p.From.Reg = ppc64.REG_VS32
1558 p.To.Type = obj.TYPE_MEM
1560 p.To.Index = ppc64.REGTMP
1567 // Generate all the remaining load and store pairs, starting with
1568 // as many 8 byte moves as possible, then 4, 2, 1.
1570 op, size := ppc64.AMOVB, int64(1)
1573 op, size = ppc64.AMOVD, 8
1575 op, size = ppc64.AMOVWZ, 4
1577 op, size = ppc64.AMOVH, 2
1581 p.To.Type = obj.TYPE_REG
1582 p.To.Reg = ppc64.REGTMP
1583 p.From.Type = obj.TYPE_MEM
1585 p.From.Offset = offset
1589 p.From.Type = obj.TYPE_REG
1590 p.From.Reg = ppc64.REGTMP
1591 p.To.Type = obj.TYPE_MEM
1593 p.To.Offset = offset
1598 case ssa.OpPPC64LoweredQuadMove, ssa.OpPPC64LoweredQuadMoveShort:
1599 bytesPerLoop := int64(64)
1600 // This is used when moving more
1601 // than 8 bytes on power9. Moves start with
1602 // as many 8 byte moves as possible, then
1603 // 4, 2, or 1 byte(s) as remaining. This will
1604 // work and be efficient for power8 or later.
1605 // If there are 64 or more bytes, then a
1606 // loop is generated to move 32 bytes and
1607 // update the src and dst addresses on each
1608 // iteration. When < 64 bytes, the appropriate
1609 // number of moves are generated based on the
1611 // When moving >= 64 bytes a loop is used
1612 // MOVD len/32,REG_TMP
1619 // STXV VS33,16(R20)
1622 // Bytes not moved by this loop are moved
1623 // with a combination of the following instructions,
1624 // starting with the largest sizes and generating as
1625 // many as needed, using the appropriate offset value.
1635 // Each loop iteration moves 32 bytes
1636 ctr := v.AuxInt / bytesPerLoop
1638 // Remainder after the loop
1639 rem := v.AuxInt % bytesPerLoop
1641 dstReg := v.Args[0].Reg()
1642 srcReg := v.Args[1].Reg()
1649 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1652 p := s.Prog(ppc64.AMOVD)
1653 p.From.Type = obj.TYPE_CONST
1655 p.To.Type = obj.TYPE_REG
1656 p.To.Reg = ppc64.REGTMP
1658 p = s.Prog(ppc64.AMOVD)
1659 p.From.Type = obj.TYPE_REG
1660 p.From.Reg = ppc64.REGTMP
1661 p.To.Type = obj.TYPE_REG
1662 p.To.Reg = ppc64.REG_CTR
1664 p = s.Prog(obj.APCALIGN)
1665 p.From.Type = obj.TYPE_CONST
1668 // Generate 16 byte loads and stores.
1669 p = s.Prog(ppc64.ALXV)
1670 p.From.Type = obj.TYPE_MEM
1672 p.From.Offset = offset
1673 p.To.Type = obj.TYPE_REG
1674 p.To.Reg = ppc64.REG_VS32
1678 p = s.Prog(ppc64.ALXV)
1679 p.From.Type = obj.TYPE_MEM
1681 p.From.Offset = offset + 16
1682 p.To.Type = obj.TYPE_REG
1683 p.To.Reg = ppc64.REG_VS33
1685 // generate 16 byte stores
1686 p = s.Prog(ppc64.ASTXV)
1687 p.From.Type = obj.TYPE_REG
1688 p.From.Reg = ppc64.REG_VS32
1689 p.To.Type = obj.TYPE_MEM
1691 p.To.Offset = offset
1693 p = s.Prog(ppc64.ASTXV)
1694 p.From.Type = obj.TYPE_REG
1695 p.From.Reg = ppc64.REG_VS33
1696 p.To.Type = obj.TYPE_MEM
1698 p.To.Offset = offset + 16
1700 // Generate 16 byte loads and stores.
1701 p = s.Prog(ppc64.ALXV)
1702 p.From.Type = obj.TYPE_MEM
1704 p.From.Offset = offset + 32
1705 p.To.Type = obj.TYPE_REG
1706 p.To.Reg = ppc64.REG_VS32
1708 p = s.Prog(ppc64.ALXV)
1709 p.From.Type = obj.TYPE_MEM
1711 p.From.Offset = offset + 48
1712 p.To.Type = obj.TYPE_REG
1713 p.To.Reg = ppc64.REG_VS33
1715 // generate 16 byte stores
1716 p = s.Prog(ppc64.ASTXV)
1717 p.From.Type = obj.TYPE_REG
1718 p.From.Reg = ppc64.REG_VS32
1719 p.To.Type = obj.TYPE_MEM
1721 p.To.Offset = offset + 32
1723 p = s.Prog(ppc64.ASTXV)
1724 p.From.Type = obj.TYPE_REG
1725 p.From.Reg = ppc64.REG_VS33
1726 p.To.Type = obj.TYPE_MEM
1728 p.To.Offset = offset + 48
1730 // increment the src reg for next iteration
1731 p = s.Prog(ppc64.AADD)
1733 p.From.Type = obj.TYPE_CONST
1734 p.From.Offset = bytesPerLoop
1735 p.To.Type = obj.TYPE_REG
1738 // increment the dst reg for next iteration
1739 p = s.Prog(ppc64.AADD)
1741 p.From.Type = obj.TYPE_CONST
1742 p.From.Offset = bytesPerLoop
1743 p.To.Type = obj.TYPE_REG
1746 // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1748 p = s.Prog(ppc64.ABC)
1749 p.From.Type = obj.TYPE_CONST
1750 p.From.Offset = ppc64.BO_BCTR
1751 p.Reg = ppc64.REG_CR0LT
1752 p.To.Type = obj.TYPE_BRANCH
1755 // srcReg and dstReg were incremented in the loop, so
1756 // later instructions start with offset 0.
1760 // No loop was generated for one iteration, so
1761 // add 32 bytes to the remainder to move those bytes.
1766 p := s.Prog(ppc64.ALXV)
1767 p.From.Type = obj.TYPE_MEM
1769 p.To.Type = obj.TYPE_REG
1770 p.To.Reg = ppc64.REG_VS32
1772 p = s.Prog(ppc64.ALXV)
1773 p.From.Type = obj.TYPE_MEM
1776 p.To.Type = obj.TYPE_REG
1777 p.To.Reg = ppc64.REG_VS33
1779 p = s.Prog(ppc64.ASTXV)
1780 p.From.Type = obj.TYPE_REG
1781 p.From.Reg = ppc64.REG_VS32
1782 p.To.Type = obj.TYPE_MEM
1785 p = s.Prog(ppc64.ASTXV)
1786 p.From.Type = obj.TYPE_REG
1787 p.From.Reg = ppc64.REG_VS33
1788 p.To.Type = obj.TYPE_MEM
1797 // Generate 16 byte loads and stores.
1798 p := s.Prog(ppc64.ALXV)
1799 p.From.Type = obj.TYPE_MEM
1801 p.From.Offset = offset
1802 p.To.Type = obj.TYPE_REG
1803 p.To.Reg = ppc64.REG_VS32
1805 p = s.Prog(ppc64.ASTXV)
1806 p.From.Type = obj.TYPE_REG
1807 p.From.Reg = ppc64.REG_VS32
1808 p.To.Type = obj.TYPE_MEM
1810 p.To.Offset = offset
1816 p := s.Prog(ppc64.ALXV)
1817 p.From.Type = obj.TYPE_MEM
1819 p.From.Offset = offset
1820 p.To.Type = obj.TYPE_REG
1821 p.To.Reg = ppc64.REG_VS32
1823 p = s.Prog(ppc64.ASTXV)
1824 p.From.Type = obj.TYPE_REG
1825 p.From.Reg = ppc64.REG_VS32
1826 p.To.Type = obj.TYPE_MEM
1828 p.To.Offset = offset
1834 // Generate all the remaining load and store pairs, starting with
1835 // as many 8 byte moves as possible, then 4, 2, 1.
1837 op, size := ppc64.AMOVB, int64(1)
1840 op, size = ppc64.AMOVD, 8
1842 op, size = ppc64.AMOVWZ, 4
1844 op, size = ppc64.AMOVH, 2
1848 p.To.Type = obj.TYPE_REG
1849 p.To.Reg = ppc64.REGTMP
1850 p.From.Type = obj.TYPE_MEM
1852 p.From.Offset = offset
1856 p.From.Type = obj.TYPE_REG
1857 p.From.Reg = ppc64.REGTMP
1858 p.To.Type = obj.TYPE_MEM
1860 p.To.Offset = offset
1865 case ssa.OpPPC64CALLstatic:
1868 case ssa.OpPPC64CALLtail:
1871 case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter:
1872 p := s.Prog(ppc64.AMOVD)
1873 p.From.Type = obj.TYPE_REG
1874 p.From.Reg = v.Args[0].Reg()
1875 p.To.Type = obj.TYPE_REG
1876 p.To.Reg = ppc64.REG_LR
1878 if v.Args[0].Reg() != ppc64.REG_R12 {
1879 v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg)
1884 // Convert the call into a blrl with hint this is not a subroutine return.
1885 // The full bclrl opcode must be specified when passing a hint.
1887 pp.From.Type = obj.TYPE_CONST
1888 pp.From.Offset = ppc64.BO_ALWAYS
1889 pp.Reg = ppc64.REG_CR0LT // The preferred value if BI is ignored.
1890 pp.To.Reg = ppc64.REG_LR
1893 if base.Ctxt.Flag_shared {
1894 // When compiling Go into PIC, the function we just
1895 // called via pointer might have been implemented in
1896 // a separate module and so overwritten the TOC
1897 // pointer in R2; reload it.
1898 q := s.Prog(ppc64.AMOVD)
1899 q.From.Type = obj.TYPE_MEM
1901 q.From.Reg = ppc64.REGSP
1902 q.To.Type = obj.TYPE_REG
1903 q.To.Reg = ppc64.REG_R2
1906 case ssa.OpPPC64LoweredWB:
1907 p := s.Prog(obj.ACALL)
1908 p.To.Type = obj.TYPE_MEM
1909 p.To.Name = obj.NAME_EXTERN
1910 p.To.Sym = v.Aux.(*obj.LSym)
1912 case ssa.OpPPC64LoweredPanicBoundsA, ssa.OpPPC64LoweredPanicBoundsB, ssa.OpPPC64LoweredPanicBoundsC:
1913 p := s.Prog(obj.ACALL)
1914 p.To.Type = obj.TYPE_MEM
1915 p.To.Name = obj.NAME_EXTERN
1916 p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt]
1917 s.UseArgs(16) // space used in callee args area by assembly stubs
1919 case ssa.OpPPC64LoweredNilCheck:
1920 if buildcfg.GOOS == "aix" {
1924 // NOP (so the BNE has somewhere to land)
1927 p := s.Prog(ppc64.ACMP)
1928 p.From.Type = obj.TYPE_REG
1929 p.From.Reg = v.Args[0].Reg()
1930 p.To.Type = obj.TYPE_REG
1931 p.To.Reg = ppc64.REG_R0
1934 p2 := s.Prog(ppc64.ABNE)
1935 p2.To.Type = obj.TYPE_BRANCH
1938 // Write at 0 is forbidden and will trigger a SIGSEGV
1939 p = s.Prog(ppc64.AMOVW)
1940 p.From.Type = obj.TYPE_REG
1941 p.From.Reg = ppc64.REG_R0
1942 p.To.Type = obj.TYPE_MEM
1943 p.To.Reg = ppc64.REG_R0
1945 // NOP (so the BNE has somewhere to land)
1946 nop := s.Prog(obj.ANOP)
1947 p2.To.SetTarget(nop)
1950 // Issue a load which will fault if arg is nil.
1951 p := s.Prog(ppc64.AMOVBZ)
1952 p.From.Type = obj.TYPE_MEM
1953 p.From.Reg = v.Args[0].Reg()
1954 ssagen.AddAux(&p.From, v)
1955 p.To.Type = obj.TYPE_REG
1956 p.To.Reg = ppc64.REGTMP
1958 if logopt.Enabled() {
1959 logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
1961 if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
1962 base.WarnfAt(v.Pos, "generated nil check")
1965 // These should be resolved by rules and not make it here.
1966 case ssa.OpPPC64Equal, ssa.OpPPC64NotEqual, ssa.OpPPC64LessThan, ssa.OpPPC64FLessThan,
1967 ssa.OpPPC64LessEqual, ssa.OpPPC64GreaterThan, ssa.OpPPC64FGreaterThan, ssa.OpPPC64GreaterEqual,
1968 ssa.OpPPC64FLessEqual, ssa.OpPPC64FGreaterEqual:
1969 v.Fatalf("Pseudo-op should not make it to codegen: %s ###\n", v.LongString())
1970 case ssa.OpPPC64InvertFlags:
1971 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
1972 case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT:
1973 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
1974 case ssa.OpClobber, ssa.OpClobberReg:
1975 // TODO: implement for clobberdead experiment. Nop is ok for now.
1977 v.Fatalf("genValue not implemented: %s", v.LongString())
1981 var blockJump = [...]struct {
1983 asmeq, invasmun bool
1985 ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false},
1986 ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false},
1988 ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false},
1989 ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false},
1990 ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false},
1991 ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false},
1993 // TODO: need to work FP comparisons into block jumps
1994 ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false},
1995 ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN
1996 ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN
1997 ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false},
2000 func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
2002 case ssa.BlockDefer:
2003 // defer returns in R3:
2004 // 0 if we should continue executing
2005 // 1 if we should jump to deferreturn call
2006 p := s.Prog(ppc64.ACMP)
2007 p.From.Type = obj.TYPE_REG
2008 p.From.Reg = ppc64.REG_R3
2009 p.To.Type = obj.TYPE_REG
2010 p.To.Reg = ppc64.REG_R0
2012 p = s.Prog(ppc64.ABNE)
2013 p.To.Type = obj.TYPE_BRANCH
2014 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[1].Block()})
2015 if b.Succs[0].Block() != next {
2016 p := s.Prog(obj.AJMP)
2017 p.To.Type = obj.TYPE_BRANCH
2018 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
2021 case ssa.BlockPlain:
2022 if b.Succs[0].Block() != next {
2023 p := s.Prog(obj.AJMP)
2024 p.To.Type = obj.TYPE_BRANCH
2025 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
2027 case ssa.BlockExit, ssa.BlockRetJmp:
2031 case ssa.BlockPPC64EQ, ssa.BlockPPC64NE,
2032 ssa.BlockPPC64LT, ssa.BlockPPC64GE,
2033 ssa.BlockPPC64LE, ssa.BlockPPC64GT,
2034 ssa.BlockPPC64FLT, ssa.BlockPPC64FGE,
2035 ssa.BlockPPC64FLE, ssa.BlockPPC64FGT:
2036 jmp := blockJump[b.Kind]
2038 case b.Succs[0].Block():
2039 s.Br(jmp.invasm, b.Succs[1].Block())
2041 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
2042 s.Br(ppc64.ABVS, b.Succs[1].Block())
2044 case b.Succs[1].Block():
2045 s.Br(jmp.asm, b.Succs[0].Block())
2047 s.Br(ppc64.ABEQ, b.Succs[0].Block())
2050 if b.Likely != ssa.BranchUnlikely {
2051 s.Br(jmp.asm, b.Succs[0].Block())
2053 s.Br(ppc64.ABEQ, b.Succs[0].Block())
2055 s.Br(obj.AJMP, b.Succs[1].Block())
2057 s.Br(jmp.invasm, b.Succs[1].Block())
2059 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
2060 s.Br(ppc64.ABVS, b.Succs[1].Block())
2062 s.Br(obj.AJMP, b.Succs[0].Block())
2066 b.Fatalf("branch not implemented: %s", b.LongString())
2070 func loadRegResult(s *ssagen.State, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
2071 p := s.Prog(loadByType(t))
2072 p.From.Type = obj.TYPE_MEM
2073 p.From.Name = obj.NAME_AUTO
2074 p.From.Sym = n.Linksym()
2075 p.From.Offset = n.FrameOffset() + off
2076 p.To.Type = obj.TYPE_REG
2081 func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
2082 p = pp.Append(p, storeByType(t), obj.TYPE_REG, reg, 0, obj.TYPE_MEM, 0, n.FrameOffset()+off)
2083 p.To.Name = obj.NAME_PARAM
2084 p.To.Sym = n.Linksym()
2085 p.Pos = p.Pos.WithNotStmt()