1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
8 "cmd/compile/internal/base"
9 "cmd/compile/internal/ir"
10 "cmd/compile/internal/logopt"
11 "cmd/compile/internal/objw"
12 "cmd/compile/internal/ssa"
13 "cmd/compile/internal/ssagen"
14 "cmd/compile/internal/types"
16 "cmd/internal/obj/ppc64"
22 // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
23 func ssaMarkMoves(s *ssagen.State, b *ssa.Block) {
24 // flive := b.FlagsLiveAtEnd
25 // if b.Control != nil && b.Control.Type.IsFlags() {
28 // for i := len(b.Values) - 1; i >= 0; i-- {
30 // if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) {
31 // // The "mark" is any non-nil Aux value.
34 // if v.Type.IsFlags() {
37 // for _, a := range v.Args {
38 // if a.Type.IsFlags() {
45 // loadByType returns the load instruction of the given type.
46 func loadByType(t *types.Type) obj.As {
78 panic("bad load type")
81 // storeByType returns the store instruction of the given type.
82 func storeByType(t *types.Type) obj.As {
102 panic("bad store type")
105 func ssaGenValue(s *ssagen.State, v *ssa.Value) {
128 case ssa.OpPPC64LoweredAtomicAnd8,
129 ssa.OpPPC64LoweredAtomicAnd32,
130 ssa.OpPPC64LoweredAtomicOr8,
131 ssa.OpPPC64LoweredAtomicOr32:
133 // LBAR/LWAR (Rarg0), Rtmp
134 // AND/OR Rarg1, Rtmp
135 // STBCCC/STWCCC Rtmp, (Rarg0)
139 if v.Op == ssa.OpPPC64LoweredAtomicAnd32 || v.Op == ssa.OpPPC64LoweredAtomicOr32 {
143 r0 := v.Args[0].Reg()
144 r1 := v.Args[1].Reg()
145 // LWSYNC - Assuming shared data not write-through-required nor
146 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
147 plwsync := s.Prog(ppc64.ALWSYNC)
148 plwsync.To.Type = obj.TYPE_NONE
151 p.From.Type = obj.TYPE_MEM
153 p.To.Type = obj.TYPE_REG
154 p.To.Reg = ppc64.REGTMP
156 p1 := s.Prog(v.Op.Asm())
157 p1.From.Type = obj.TYPE_REG
159 p1.To.Type = obj.TYPE_REG
160 p1.To.Reg = ppc64.REGTMP
163 p2.From.Type = obj.TYPE_REG
164 p2.From.Reg = ppc64.REGTMP
165 p2.To.Type = obj.TYPE_MEM
167 p2.RegTo2 = ppc64.REGTMP
169 p3 := s.Prog(ppc64.ABNE)
170 p3.To.Type = obj.TYPE_BRANCH
173 case ssa.OpPPC64LoweredAtomicAdd32,
174 ssa.OpPPC64LoweredAtomicAdd64:
176 // LDAR/LWAR (Rarg0), Rout
178 // STDCCC/STWCCC Rout, (Rarg0)
180 // MOVW Rout,Rout (if Add32)
183 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
187 r0 := v.Args[0].Reg()
188 r1 := v.Args[1].Reg()
190 // LWSYNC - Assuming shared data not write-through-required nor
191 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
192 plwsync := s.Prog(ppc64.ALWSYNC)
193 plwsync.To.Type = obj.TYPE_NONE
196 p.From.Type = obj.TYPE_MEM
198 p.To.Type = obj.TYPE_REG
201 p1 := s.Prog(ppc64.AADD)
202 p1.From.Type = obj.TYPE_REG
205 p1.To.Type = obj.TYPE_REG
208 p3.From.Type = obj.TYPE_REG
210 p3.To.Type = obj.TYPE_MEM
213 p4 := s.Prog(ppc64.ABNE)
214 p4.To.Type = obj.TYPE_BRANCH
217 // Ensure a 32 bit result
218 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
219 p5 := s.Prog(ppc64.AMOVWZ)
220 p5.To.Type = obj.TYPE_REG
222 p5.From.Type = obj.TYPE_REG
226 case ssa.OpPPC64LoweredAtomicExchange32,
227 ssa.OpPPC64LoweredAtomicExchange64:
229 // LDAR/LWAR (Rarg0), Rout
230 // STDCCC/STWCCC Rout, (Rarg0)
235 if v.Op == ssa.OpPPC64LoweredAtomicExchange32 {
239 r0 := v.Args[0].Reg()
240 r1 := v.Args[1].Reg()
242 // LWSYNC - Assuming shared data not write-through-required nor
243 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
244 plwsync := s.Prog(ppc64.ALWSYNC)
245 plwsync.To.Type = obj.TYPE_NONE
248 p.From.Type = obj.TYPE_MEM
250 p.To.Type = obj.TYPE_REG
254 p1.From.Type = obj.TYPE_REG
256 p1.To.Type = obj.TYPE_MEM
259 p2 := s.Prog(ppc64.ABNE)
260 p2.To.Type = obj.TYPE_BRANCH
263 pisync := s.Prog(ppc64.AISYNC)
264 pisync.To.Type = obj.TYPE_NONE
266 case ssa.OpPPC64LoweredAtomicLoad8,
267 ssa.OpPPC64LoweredAtomicLoad32,
268 ssa.OpPPC64LoweredAtomicLoad64,
269 ssa.OpPPC64LoweredAtomicLoadPtr:
271 // MOVB/MOVD/MOVW (Rarg0), Rout
278 case ssa.OpPPC64LoweredAtomicLoad8:
280 case ssa.OpPPC64LoweredAtomicLoad32:
284 arg0 := v.Args[0].Reg()
286 // SYNC when AuxInt == 1; otherwise, load-acquire
288 psync := s.Prog(ppc64.ASYNC)
289 psync.To.Type = obj.TYPE_NONE
293 p.From.Type = obj.TYPE_MEM
295 p.To.Type = obj.TYPE_REG
299 p1.From.Type = obj.TYPE_REG
301 p1.To.Type = obj.TYPE_REG
304 p2 := s.Prog(ppc64.ABNE)
305 p2.To.Type = obj.TYPE_BRANCH
307 pisync := s.Prog(ppc64.AISYNC)
308 pisync.To.Type = obj.TYPE_NONE
309 p2.To.SetTarget(pisync)
311 case ssa.OpPPC64LoweredAtomicStore8,
312 ssa.OpPPC64LoweredAtomicStore32,
313 ssa.OpPPC64LoweredAtomicStore64:
315 // MOVB/MOVW/MOVD arg1,(arg0)
318 case ssa.OpPPC64LoweredAtomicStore8:
320 case ssa.OpPPC64LoweredAtomicStore32:
323 arg0 := v.Args[0].Reg()
324 arg1 := v.Args[1].Reg()
325 // If AuxInt == 0, LWSYNC (Store-Release), else SYNC
327 syncOp := ppc64.ASYNC
329 syncOp = ppc64.ALWSYNC
331 psync := s.Prog(syncOp)
332 psync.To.Type = obj.TYPE_NONE
335 p.To.Type = obj.TYPE_MEM
337 p.From.Type = obj.TYPE_REG
340 case ssa.OpPPC64LoweredAtomicCas64,
341 ssa.OpPPC64LoweredAtomicCas32:
345 // LDAR (Rarg0), MutexHint, Rtmp
348 // STDCCC Rarg2, (Rarg0)
350 // LWSYNC // Only for sequential consistency; not required in CasRel.
356 if v.Op == ssa.OpPPC64LoweredAtomicCas32 {
361 r0 := v.Args[0].Reg()
362 r1 := v.Args[1].Reg()
363 r2 := v.Args[2].Reg()
365 // Initialize return value to false
366 p := s.Prog(ppc64.AMOVD)
367 p.From.Type = obj.TYPE_CONST
369 p.To.Type = obj.TYPE_REG
371 // LWSYNC - Assuming shared data not write-through-required nor
372 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
373 plwsync1 := s.Prog(ppc64.ALWSYNC)
374 plwsync1.To.Type = obj.TYPE_NONE
377 p0.From.Type = obj.TYPE_MEM
379 p0.To.Type = obj.TYPE_REG
380 p0.To.Reg = ppc64.REGTMP
381 // If it is a Compare-and-Swap-Release operation, set the EH field with
388 p1.From.Type = obj.TYPE_REG
390 p1.To.Reg = ppc64.REGTMP
391 p1.To.Type = obj.TYPE_REG
392 // BNE done with return value = false
393 p2 := s.Prog(ppc64.ABNE)
394 p2.To.Type = obj.TYPE_BRANCH
397 p3.From.Type = obj.TYPE_REG
399 p3.To.Type = obj.TYPE_MEM
402 p4 := s.Prog(ppc64.ABNE)
403 p4.To.Type = obj.TYPE_BRANCH
405 // LWSYNC - Assuming shared data not write-through-required nor
406 // caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b.
407 // If the operation is a CAS-Release, then synchronization is not necessary.
409 plwsync2 := s.Prog(ppc64.ALWSYNC)
410 plwsync2.To.Type = obj.TYPE_NONE
413 p5 := s.Prog(ppc64.AMOVD)
414 p5.From.Type = obj.TYPE_CONST
416 p5.To.Type = obj.TYPE_REG
419 p6 := s.Prog(obj.ANOP)
422 case ssa.OpPPC64LoweredPubBarrier:
426 case ssa.OpPPC64LoweredGetClosurePtr:
427 // Closure pointer is R11 (already)
428 ssagen.CheckLoweredGetClosurePtr(v)
430 case ssa.OpPPC64LoweredGetCallerSP:
431 // caller's SP is FixedFrameSize below the address of the first arg
432 p := s.Prog(ppc64.AMOVD)
433 p.From.Type = obj.TYPE_ADDR
434 p.From.Offset = -base.Ctxt.Arch.FixedFrameSize
435 p.From.Name = obj.NAME_PARAM
436 p.To.Type = obj.TYPE_REG
439 case ssa.OpPPC64LoweredGetCallerPC:
440 p := s.Prog(obj.AGETCALLERPC)
441 p.To.Type = obj.TYPE_REG
444 case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F:
445 // input is already rounded
448 loadOp := loadByType(v.Type)
450 ssagen.AddrAuto(&p.From, v.Args[0])
451 p.To.Type = obj.TYPE_REG
455 storeOp := storeByType(v.Type)
457 p.From.Type = obj.TYPE_REG
458 p.From.Reg = v.Args[0].Reg()
459 ssagen.AddrAuto(&p.To, v)
461 case ssa.OpArgIntReg, ssa.OpArgFloatReg:
462 // The assembler needs to wrap the entry safepoint/stack growth code with spill/unspill
463 // The loop only runs once.
464 for _, a := range v.Block.Func.RegArgs {
465 // Pass the spill/unspill information along to the assembler, offset by size of
466 // the saved LR slot.
467 addr := ssagen.SpillSlotAddr(a, ppc64.REGSP, base.Ctxt.Arch.FixedFrameSize)
468 s.FuncInfo().AddSpill(
469 obj.RegSpill{Reg: a.Reg, Addr: addr, Unspill: loadByType(a.Type), Spill: storeByType(a.Type)})
471 v.Block.Func.RegArgs = nil
473 ssagen.CheckArgReg(v)
475 case ssa.OpPPC64DIVD:
485 r0 := v.Args[0].Reg()
486 r1 := v.Args[1].Reg()
488 p := s.Prog(ppc64.ACMP)
489 p.From.Type = obj.TYPE_REG
491 p.To.Type = obj.TYPE_CONST
494 pbahead := s.Prog(ppc64.ABEQ)
495 pbahead.To.Type = obj.TYPE_BRANCH
497 p = s.Prog(v.Op.Asm())
498 p.From.Type = obj.TYPE_REG
501 p.To.Type = obj.TYPE_REG
504 pbover := s.Prog(obj.AJMP)
505 pbover.To.Type = obj.TYPE_BRANCH
507 p = s.Prog(ppc64.ANEG)
508 p.To.Type = obj.TYPE_REG
510 p.From.Type = obj.TYPE_REG
512 pbahead.To.SetTarget(p)
515 pbover.To.SetTarget(p)
517 case ssa.OpPPC64DIVW:
518 // word-width version of above
520 r0 := v.Args[0].Reg()
521 r1 := v.Args[1].Reg()
523 p := s.Prog(ppc64.ACMPW)
524 p.From.Type = obj.TYPE_REG
526 p.To.Type = obj.TYPE_CONST
529 pbahead := s.Prog(ppc64.ABEQ)
530 pbahead.To.Type = obj.TYPE_BRANCH
532 p = s.Prog(v.Op.Asm())
533 p.From.Type = obj.TYPE_REG
536 p.To.Type = obj.TYPE_REG
539 pbover := s.Prog(obj.AJMP)
540 pbover.To.Type = obj.TYPE_BRANCH
542 p = s.Prog(ppc64.ANEG)
543 p.To.Type = obj.TYPE_REG
545 p.From.Type = obj.TYPE_REG
547 pbahead.To.SetTarget(p)
550 pbover.To.SetTarget(p)
552 case ssa.OpPPC64CLRLSLWI:
554 r1 := v.Args[0].Reg()
556 p := s.Prog(v.Op.Asm())
557 // clrlslwi ra,rs,mb,sh will become rlwinm ra,rs,sh,mb-sh,31-sh as described in ISA
558 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
559 p.SetFrom3Const(ssa.GetPPC64Shiftsh(shifts))
561 p.To.Type = obj.TYPE_REG
564 case ssa.OpPPC64CLRLSLDI:
566 r1 := v.Args[0].Reg()
568 p := s.Prog(v.Op.Asm())
569 // clrlsldi ra,rs,mb,sh will become rldic ra,rs,sh,mb-sh
570 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
571 p.SetFrom3Const(ssa.GetPPC64Shiftsh(shifts))
573 p.To.Type = obj.TYPE_REG
576 // Mask has been set as sh
577 case ssa.OpPPC64RLDICL:
579 r1 := v.Args[0].Reg()
581 p := s.Prog(v.Op.Asm())
582 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)}
583 p.SetFrom3Const(ssa.GetPPC64Shiftmb(shifts))
585 p.To.Type = obj.TYPE_REG
588 case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS,
589 ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU,
590 ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW,
591 ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
592 ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
593 ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
594 ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV,
595 ssa.OpPPC64MODUD, ssa.OpPPC64MODSD, ssa.OpPPC64MODUW, ssa.OpPPC64MODSW:
597 r1 := v.Args[0].Reg()
598 r2 := v.Args[1].Reg()
599 p := s.Prog(v.Op.Asm())
600 p.From.Type = obj.TYPE_REG
603 p.To.Type = obj.TYPE_REG
606 case ssa.OpPPC64ANDCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC:
607 r1 := v.Args[0].Reg()
608 r2 := v.Args[1].Reg()
609 p := s.Prog(v.Op.Asm())
610 p.From.Type = obj.TYPE_REG
613 p.To.Type = obj.TYPE_REG
614 p.To.Reg = ppc64.REGTMP // result is not needed
616 case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
617 p := s.Prog(v.Op.Asm())
618 p.From.Type = obj.TYPE_CONST
619 p.From.Offset = v.AuxInt
620 p.Reg = v.Args[0].Reg()
621 p.To.Type = obj.TYPE_REG
624 // Auxint holds encoded rotate + mask
625 case ssa.OpPPC64RLWINM, ssa.OpPPC64RLWMI:
626 rot, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
627 p := s.Prog(v.Op.Asm())
628 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
629 p.Reg = v.Args[0].Reg()
630 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: int64(rot)}
631 p.SetRestArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}})
634 case ssa.OpPPC64RLWNM:
635 _, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
636 p := s.Prog(v.Op.Asm())
637 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
638 p.Reg = v.Args[0].Reg()
639 p.From = obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[1].Reg()}
640 p.SetRestArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}})
642 case ssa.OpPPC64MADDLD:
644 r1 := v.Args[0].Reg()
645 r2 := v.Args[1].Reg()
646 r3 := v.Args[2].Reg()
648 p := s.Prog(v.Op.Asm())
649 p.From.Type = obj.TYPE_REG
653 p.To.Type = obj.TYPE_REG
656 case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
658 r1 := v.Args[0].Reg()
659 r2 := v.Args[1].Reg()
660 r3 := v.Args[2].Reg()
662 p := s.Prog(v.Op.Asm())
663 p.From.Type = obj.TYPE_REG
667 p.To.Type = obj.TYPE_REG
670 case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL,
671 ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW,
672 ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS,
673 ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD:
675 p := s.Prog(v.Op.Asm())
676 p.To.Type = obj.TYPE_REG
678 p.From.Type = obj.TYPE_REG
679 p.From.Reg = v.Args[0].Reg()
681 case ssa.OpPPC64ADDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst,
682 ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst,
683 ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst, ssa.OpPPC64EXTSWSLconst, ssa.OpPPC64MULLWconst, ssa.OpPPC64MULLDconst:
684 p := s.Prog(v.Op.Asm())
685 p.Reg = v.Args[0].Reg()
686 p.From.Type = obj.TYPE_CONST
687 p.From.Offset = v.AuxInt
688 p.To.Type = obj.TYPE_REG
691 case ssa.OpPPC64ADDC, ssa.OpPPC64ADDE, ssa.OpPPC64SUBC, ssa.OpPPC64SUBE:
692 r := v.Reg0() // CA is the first, implied argument.
693 r1 := v.Args[0].Reg()
694 r2 := v.Args[1].Reg()
695 p := s.Prog(v.Op.Asm())
696 p.From.Type = obj.TYPE_REG
699 p.To.Type = obj.TYPE_REG
702 case ssa.OpPPC64ADDZEzero, ssa.OpPPC64SUBZEzero:
703 p := s.Prog(v.Op.Asm())
704 p.From.Type = obj.TYPE_REG
705 p.From.Reg = ppc64.REG_R0
706 p.To.Type = obj.TYPE_REG
709 case ssa.OpPPC64ADDCconst:
710 p := s.Prog(v.Op.Asm())
711 p.Reg = v.Args[0].Reg()
712 p.From.Type = obj.TYPE_CONST
713 p.From.Offset = v.AuxInt
714 p.To.Type = obj.TYPE_REG
715 // Output is a pair, the second is the CA, which is implied.
718 case ssa.OpPPC64SUBCconst:
719 p := s.Prog(v.Op.Asm())
720 p.SetFrom3Const(v.AuxInt)
721 p.From.Type = obj.TYPE_REG
722 p.From.Reg = v.Args[0].Reg()
723 p.To.Type = obj.TYPE_REG
726 case ssa.OpPPC64SUBFCconst:
727 p := s.Prog(v.Op.Asm())
728 p.SetFrom3Const(v.AuxInt)
729 p.From.Type = obj.TYPE_REG
730 p.From.Reg = v.Args[0].Reg()
731 p.To.Type = obj.TYPE_REG
734 case ssa.OpPPC64ANDCCconst:
735 p := s.Prog(v.Op.Asm())
736 p.Reg = v.Args[0].Reg()
737 p.From.Type = obj.TYPE_CONST
738 p.From.Offset = v.AuxInt
739 p.To.Type = obj.TYPE_REG
740 // p.To.Reg = ppc64.REGTMP // discard result
743 case ssa.OpPPC64MOVDaddr:
744 switch v.Aux.(type) {
746 v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux)
748 // If aux offset and aux int are both 0, and the same
749 // input and output regs are used, no instruction
750 // needs to be generated, since it would just be
752 if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() {
753 p := s.Prog(ppc64.AMOVD)
754 p.From.Type = obj.TYPE_ADDR
755 p.From.Reg = v.Args[0].Reg()
756 p.From.Offset = v.AuxInt
757 p.To.Type = obj.TYPE_REG
761 case *obj.LSym, ir.Node:
762 p := s.Prog(ppc64.AMOVD)
763 p.From.Type = obj.TYPE_ADDR
764 p.From.Reg = v.Args[0].Reg()
765 p.To.Type = obj.TYPE_REG
767 ssagen.AddAux(&p.From, v)
771 case ssa.OpPPC64MOVDconst:
772 p := s.Prog(v.Op.Asm())
773 p.From.Type = obj.TYPE_CONST
774 p.From.Offset = v.AuxInt
775 p.To.Type = obj.TYPE_REG
778 case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst:
779 p := s.Prog(v.Op.Asm())
780 p.From.Type = obj.TYPE_FCONST
781 p.From.Val = math.Float64frombits(uint64(v.AuxInt))
782 p.To.Type = obj.TYPE_REG
785 case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU:
786 p := s.Prog(v.Op.Asm())
787 p.From.Type = obj.TYPE_REG
788 p.From.Reg = v.Args[0].Reg()
789 p.To.Type = obj.TYPE_REG
790 p.To.Reg = v.Args[1].Reg()
792 case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst:
793 p := s.Prog(v.Op.Asm())
794 p.From.Type = obj.TYPE_REG
795 p.From.Reg = v.Args[0].Reg()
796 p.To.Type = obj.TYPE_CONST
797 p.To.Offset = v.AuxInt
799 case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg:
800 // Shift in register to required size
801 p := s.Prog(v.Op.Asm())
802 p.From.Type = obj.TYPE_REG
803 p.From.Reg = v.Args[0].Reg()
805 p.To.Type = obj.TYPE_REG
807 case ssa.OpPPC64MOVDload, ssa.OpPPC64MOVWload:
809 // MOVDload and MOVWload are DS form instructions that are restricted to
810 // offsets that are a multiple of 4. If the offset is not a multiple of 4,
811 // then the address of the symbol to be loaded is computed (base + offset)
812 // and used as the new base register and the offset field in the instruction
813 // can be set to zero.
815 // This same problem can happen with gostrings since the final offset is not
816 // known yet, but could be unaligned after the relocation is resolved.
817 // So gostrings are handled the same way.
819 // This allows the MOVDload and MOVWload to be generated in more cases and
820 // eliminates some offset and alignment checking in the rules file.
822 fromAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
823 ssagen.AddAux(&fromAddr, v)
827 switch fromAddr.Name {
828 case obj.NAME_EXTERN, obj.NAME_STATIC:
829 // Special case for a rule combines the bytes of gostring.
830 // The v alignment might seem OK, but we don't want to load it
831 // using an offset because relocation comes later.
832 genAddr = strings.HasPrefix(fromAddr.Sym.Name, "go:string") || v.Type.Alignment()%4 != 0 || fromAddr.Offset%4 != 0
834 genAddr = fromAddr.Offset%4 != 0
837 // Load full address into the temp register.
838 p := s.Prog(ppc64.AMOVD)
839 p.From.Type = obj.TYPE_ADDR
840 p.From.Reg = v.Args[0].Reg()
841 ssagen.AddAux(&p.From, v)
842 // Load target using temp as base register
843 // and offset zero. Setting NAME_NONE
844 // prevents any extra offsets from being
846 p.To.Type = obj.TYPE_REG
847 p.To.Reg = ppc64.REGTMP
848 fromAddr.Reg = ppc64.REGTMP
849 // Clear the offset field and other
850 // information that might be used
851 // by the assembler to add to the
852 // final offset value.
854 fromAddr.Name = obj.NAME_NONE
857 p := s.Prog(v.Op.Asm())
859 p.To.Type = obj.TYPE_REG
863 case ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
864 p := s.Prog(v.Op.Asm())
865 p.From.Type = obj.TYPE_MEM
866 p.From.Reg = v.Args[0].Reg()
867 ssagen.AddAux(&p.From, v)
868 p.To.Type = obj.TYPE_REG
871 case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload:
872 p := s.Prog(v.Op.Asm())
873 p.From.Type = obj.TYPE_MEM
874 p.From.Reg = v.Args[0].Reg()
875 p.To.Type = obj.TYPE_REG
878 case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore:
879 p := s.Prog(v.Op.Asm())
880 p.To.Type = obj.TYPE_MEM
881 p.To.Reg = v.Args[0].Reg()
882 p.From.Type = obj.TYPE_REG
883 p.From.Reg = v.Args[1].Reg()
885 case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx,
886 ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx,
887 ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx:
888 p := s.Prog(v.Op.Asm())
889 p.From.Type = obj.TYPE_MEM
890 p.From.Reg = v.Args[0].Reg()
891 p.From.Index = v.Args[1].Reg()
892 p.To.Type = obj.TYPE_REG
895 case ssa.OpPPC64DCBT:
896 p := s.Prog(v.Op.Asm())
897 p.From.Type = obj.TYPE_MEM
898 p.From.Reg = v.Args[0].Reg()
899 p.To.Type = obj.TYPE_CONST
900 p.To.Offset = v.AuxInt
902 case ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero:
903 p := s.Prog(v.Op.Asm())
904 p.From.Type = obj.TYPE_REG
905 p.From.Reg = ppc64.REGZERO
906 p.To.Type = obj.TYPE_MEM
907 p.To.Reg = v.Args[0].Reg()
908 ssagen.AddAux(&p.To, v)
910 case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVDstorezero:
912 // MOVDstore and MOVDstorezero become DS form instructions that are restricted
913 // to offset values that are a multiple of 4. If the offset field is not a
914 // multiple of 4, then the full address of the store target is computed (base +
915 // offset) and used as the new base register and the offset in the instruction
918 // This allows the MOVDstore and MOVDstorezero to be generated in more cases,
919 // and prevents checking of the offset value and alignment in the rules.
921 toAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
922 ssagen.AddAux(&toAddr, v)
924 if toAddr.Offset%4 != 0 {
925 p := s.Prog(ppc64.AMOVD)
926 p.From.Type = obj.TYPE_ADDR
927 p.From.Reg = v.Args[0].Reg()
928 ssagen.AddAux(&p.From, v)
929 p.To.Type = obj.TYPE_REG
930 p.To.Reg = ppc64.REGTMP
931 toAddr.Reg = ppc64.REGTMP
932 // Clear the offset field and other
933 // information that might be used
934 // by the assembler to add to the
935 // final offset value.
937 toAddr.Name = obj.NAME_NONE
940 p := s.Prog(v.Op.Asm())
942 p.From.Type = obj.TYPE_REG
943 if v.Op == ssa.OpPPC64MOVDstorezero {
944 p.From.Reg = ppc64.REGZERO
946 p.From.Reg = v.Args[1].Reg()
949 case ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore:
950 p := s.Prog(v.Op.Asm())
951 p.From.Type = obj.TYPE_REG
952 p.From.Reg = v.Args[1].Reg()
953 p.To.Type = obj.TYPE_MEM
954 p.To.Reg = v.Args[0].Reg()
955 ssagen.AddAux(&p.To, v)
957 case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx,
958 ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx,
959 ssa.OpPPC64MOVHBRstoreidx:
960 p := s.Prog(v.Op.Asm())
961 p.From.Type = obj.TYPE_REG
962 p.From.Reg = v.Args[2].Reg()
963 p.To.Index = v.Args[1].Reg()
964 p.To.Type = obj.TYPE_MEM
965 p.To.Reg = v.Args[0].Reg()
967 case ssa.OpPPC64ISEL, ssa.OpPPC64ISELB:
969 // AuxInt value indicates condition: 0=LT 1=GT 2=EQ 4=GE 5=LE 6=NE
970 // ISEL only accepts 0, 1, 2 condition values but the others can be
971 // achieved by swapping operand order.
972 // arg0 ? arg1 : arg2 with conditions LT, GT, EQ
973 // arg0 ? arg2 : arg1 for conditions GE, LE, NE
974 // ISELB is used when a boolean result is needed, returning 0 or 1
975 p := s.Prog(ppc64.AISEL)
976 p.To.Type = obj.TYPE_REG
978 // For ISELB, boolean result 0 or 1. Use R0 for 0 operand to avoid load.
979 r := obj.Addr{Type: obj.TYPE_REG, Reg: ppc64.REG_R0}
980 if v.Op == ssa.OpPPC64ISEL {
981 r.Reg = v.Args[1].Reg()
983 // AuxInt values 4,5,6 implemented with reverse operand order from 0,1,2
986 p.SetFrom3Reg(v.Args[0].Reg())
988 p.Reg = v.Args[0].Reg()
991 p.From.Type = obj.TYPE_CONST
992 p.From.Offset = v.AuxInt & 3
994 case ssa.OpPPC64LoweredQuadZero, ssa.OpPPC64LoweredQuadZeroShort:
995 // The LoweredQuad code generation
996 // generates STXV instructions on
997 // power9. The Short variation is used
998 // if no loop is generated.
1000 // sizes >= 64 generate a loop as follows:
1002 // Set up loop counter in CTR, used by BC
1003 // XXLXOR clears VS32
1004 // XXLXOR VS32,VS32,VS32
1005 // MOVD len/64,REG_TMP
1009 // STXV VS32,16(R20)
1010 // STXV VS32,32(R20)
1011 // STXV VS32,48(R20)
1015 // Bytes per iteration
1016 ctr := v.AuxInt / 64
1019 rem := v.AuxInt % 64
1021 // Only generate a loop if there is more
1022 // than 1 iteration.
1024 // Set up VS32 (V0) to hold 0s
1025 p := s.Prog(ppc64.AXXLXOR)
1026 p.From.Type = obj.TYPE_REG
1027 p.From.Reg = ppc64.REG_VS32
1028 p.To.Type = obj.TYPE_REG
1029 p.To.Reg = ppc64.REG_VS32
1030 p.Reg = ppc64.REG_VS32
1032 // Set up CTR loop counter
1033 p = s.Prog(ppc64.AMOVD)
1034 p.From.Type = obj.TYPE_CONST
1036 p.To.Type = obj.TYPE_REG
1037 p.To.Reg = ppc64.REGTMP
1039 p = s.Prog(ppc64.AMOVD)
1040 p.From.Type = obj.TYPE_REG
1041 p.From.Reg = ppc64.REGTMP
1042 p.To.Type = obj.TYPE_REG
1043 p.To.Reg = ppc64.REG_CTR
1045 // Don't generate padding for
1046 // loops with few iterations.
1048 p = s.Prog(obj.APCALIGN)
1049 p.From.Type = obj.TYPE_CONST
1053 // generate 4 STXVs to zero 64 bytes
1056 p = s.Prog(ppc64.ASTXV)
1057 p.From.Type = obj.TYPE_REG
1058 p.From.Reg = ppc64.REG_VS32
1059 p.To.Type = obj.TYPE_MEM
1060 p.To.Reg = v.Args[0].Reg()
1062 // Save the top of loop
1066 p = s.Prog(ppc64.ASTXV)
1067 p.From.Type = obj.TYPE_REG
1068 p.From.Reg = ppc64.REG_VS32
1069 p.To.Type = obj.TYPE_MEM
1070 p.To.Reg = v.Args[0].Reg()
1073 p = s.Prog(ppc64.ASTXV)
1074 p.From.Type = obj.TYPE_REG
1075 p.From.Reg = ppc64.REG_VS32
1076 p.To.Type = obj.TYPE_MEM
1077 p.To.Reg = v.Args[0].Reg()
1080 p = s.Prog(ppc64.ASTXV)
1081 p.From.Type = obj.TYPE_REG
1082 p.From.Reg = ppc64.REG_VS32
1083 p.To.Type = obj.TYPE_MEM
1084 p.To.Reg = v.Args[0].Reg()
1087 // Increment address for the
1088 // 64 bytes just zeroed.
1089 p = s.Prog(ppc64.AADD)
1090 p.Reg = v.Args[0].Reg()
1091 p.From.Type = obj.TYPE_CONST
1093 p.To.Type = obj.TYPE_REG
1094 p.To.Reg = v.Args[0].Reg()
1096 // Branch back to top of loop
1098 // BC with BO_BCTR generates bdnz
1099 p = s.Prog(ppc64.ABC)
1100 p.From.Type = obj.TYPE_CONST
1101 p.From.Offset = ppc64.BO_BCTR
1102 p.Reg = ppc64.REG_CR0LT
1103 p.To.Type = obj.TYPE_BRANCH
1106 // When ctr == 1 the loop was not generated but
1107 // there are at least 64 bytes to clear, so add
1108 // that to the remainder to generate the code
1109 // to clear those doublewords
1114 // Clear the remainder starting at offset zero
1117 if rem >= 16 && ctr <= 1 {
1118 // If the XXLXOR hasn't already been
1119 // generated, do it here to initialize
1121 p := s.Prog(ppc64.AXXLXOR)
1122 p.From.Type = obj.TYPE_REG
1123 p.From.Reg = ppc64.REG_VS32
1124 p.To.Type = obj.TYPE_REG
1125 p.To.Reg = ppc64.REG_VS32
1126 p.Reg = ppc64.REG_VS32
1128 // Generate STXV for 32 or 64
1131 p := s.Prog(ppc64.ASTXV)
1132 p.From.Type = obj.TYPE_REG
1133 p.From.Reg = ppc64.REG_VS32
1134 p.To.Type = obj.TYPE_MEM
1135 p.To.Reg = v.Args[0].Reg()
1136 p.To.Offset = offset
1138 p = s.Prog(ppc64.ASTXV)
1139 p.From.Type = obj.TYPE_REG
1140 p.From.Reg = ppc64.REG_VS32
1141 p.To.Type = obj.TYPE_MEM
1142 p.To.Reg = v.Args[0].Reg()
1143 p.To.Offset = offset + 16
1147 // Generate 16 bytes
1149 p := s.Prog(ppc64.ASTXV)
1150 p.From.Type = obj.TYPE_REG
1151 p.From.Reg = ppc64.REG_VS32
1152 p.To.Type = obj.TYPE_MEM
1153 p.To.Reg = v.Args[0].Reg()
1154 p.To.Offset = offset
1159 // first clear as many doublewords as possible
1160 // then clear remaining sizes as available
1162 op, size := ppc64.AMOVB, int64(1)
1165 op, size = ppc64.AMOVD, 8
1167 op, size = ppc64.AMOVW, 4
1169 op, size = ppc64.AMOVH, 2
1172 p.From.Type = obj.TYPE_REG
1173 p.From.Reg = ppc64.REG_R0
1174 p.To.Type = obj.TYPE_MEM
1175 p.To.Reg = v.Args[0].Reg()
1176 p.To.Offset = offset
1181 case ssa.OpPPC64LoweredZero, ssa.OpPPC64LoweredZeroShort:
1183 // Unaligned data doesn't hurt performance
1184 // for these instructions on power8.
1186 // For sizes >= 64 generate a loop as follows:
1188 // Set up loop counter in CTR, used by BC
1189 // XXLXOR VS32,VS32,VS32
1190 // MOVD len/32,REG_TMP
1194 // STXVD2X VS32,(R0)(R20)
1195 // STXVD2X VS32,(R31)(R20)
1199 // any remainder is done as described below
1201 // for sizes < 64 bytes, first clear as many doublewords as possible,
1202 // then handle the remainder
1207 // the remainder bytes are cleared using one or more
1208 // of the following instructions with the appropriate
1209 // offsets depending which instructions are needed
1211 // MOVW R0,n1(R20) 4 bytes
1212 // MOVH R0,n2(R20) 2 bytes
1213 // MOVB R0,n3(R20) 1 byte
1215 // 7 bytes: MOVW, MOVH, MOVB
1216 // 6 bytes: MOVW, MOVH
1217 // 5 bytes: MOVW, MOVB
1218 // 3 bytes: MOVH, MOVB
1220 // each loop iteration does 32 bytes
1221 ctr := v.AuxInt / 32
1224 rem := v.AuxInt % 32
1226 // only generate a loop if there is more
1227 // than 1 iteration.
1229 // Set up VS32 (V0) to hold 0s
1230 p := s.Prog(ppc64.AXXLXOR)
1231 p.From.Type = obj.TYPE_REG
1232 p.From.Reg = ppc64.REG_VS32
1233 p.To.Type = obj.TYPE_REG
1234 p.To.Reg = ppc64.REG_VS32
1235 p.Reg = ppc64.REG_VS32
1237 // Set up CTR loop counter
1238 p = s.Prog(ppc64.AMOVD)
1239 p.From.Type = obj.TYPE_CONST
1241 p.To.Type = obj.TYPE_REG
1242 p.To.Reg = ppc64.REGTMP
1244 p = s.Prog(ppc64.AMOVD)
1245 p.From.Type = obj.TYPE_REG
1246 p.From.Reg = ppc64.REGTMP
1247 p.To.Type = obj.TYPE_REG
1248 p.To.Reg = ppc64.REG_CTR
1250 // Set up R31 to hold index value 16
1251 p = s.Prog(ppc64.AMOVD)
1252 p.From.Type = obj.TYPE_CONST
1254 p.To.Type = obj.TYPE_REG
1255 p.To.Reg = ppc64.REGTMP
1257 // Don't add padding for alignment
1258 // with few loop iterations.
1260 p = s.Prog(obj.APCALIGN)
1261 p.From.Type = obj.TYPE_CONST
1265 // generate 2 STXVD2Xs to store 16 bytes
1266 // when this is a loop then the top must be saved
1268 // This is the top of loop
1270 p = s.Prog(ppc64.ASTXVD2X)
1271 p.From.Type = obj.TYPE_REG
1272 p.From.Reg = ppc64.REG_VS32
1273 p.To.Type = obj.TYPE_MEM
1274 p.To.Reg = v.Args[0].Reg()
1275 p.To.Index = ppc64.REGZERO
1276 // Save the top of loop
1280 p = s.Prog(ppc64.ASTXVD2X)
1281 p.From.Type = obj.TYPE_REG
1282 p.From.Reg = ppc64.REG_VS32
1283 p.To.Type = obj.TYPE_MEM
1284 p.To.Reg = v.Args[0].Reg()
1285 p.To.Index = ppc64.REGTMP
1287 // Increment address for the
1288 // 4 doublewords just zeroed.
1289 p = s.Prog(ppc64.AADD)
1290 p.Reg = v.Args[0].Reg()
1291 p.From.Type = obj.TYPE_CONST
1293 p.To.Type = obj.TYPE_REG
1294 p.To.Reg = v.Args[0].Reg()
1296 // Branch back to top of loop
1298 // BC with BO_BCTR generates bdnz
1299 p = s.Prog(ppc64.ABC)
1300 p.From.Type = obj.TYPE_CONST
1301 p.From.Offset = ppc64.BO_BCTR
1302 p.Reg = ppc64.REG_CR0LT
1303 p.To.Type = obj.TYPE_BRANCH
1307 // when ctr == 1 the loop was not generated but
1308 // there are at least 32 bytes to clear, so add
1309 // that to the remainder to generate the code
1310 // to clear those doublewords
1315 // clear the remainder starting at offset zero
1318 // first clear as many doublewords as possible
1319 // then clear remaining sizes as available
1321 op, size := ppc64.AMOVB, int64(1)
1324 op, size = ppc64.AMOVD, 8
1326 op, size = ppc64.AMOVW, 4
1328 op, size = ppc64.AMOVH, 2
1331 p.From.Type = obj.TYPE_REG
1332 p.From.Reg = ppc64.REG_R0
1333 p.To.Type = obj.TYPE_MEM
1334 p.To.Reg = v.Args[0].Reg()
1335 p.To.Offset = offset
1340 case ssa.OpPPC64LoweredMove, ssa.OpPPC64LoweredMoveShort:
1342 bytesPerLoop := int64(32)
1343 // This will be used when moving more
1344 // than 8 bytes. Moves start with
1345 // as many 8 byte moves as possible, then
1346 // 4, 2, or 1 byte(s) as remaining. This will
1347 // work and be efficient for power8 or later.
1348 // If there are 64 or more bytes, then a
1349 // loop is generated to move 32 bytes and
1350 // update the src and dst addresses on each
1351 // iteration. When < 64 bytes, the appropriate
1352 // number of moves are generated based on the
1354 // When moving >= 64 bytes a loop is used
1355 // MOVD len/32,REG_TMP
1359 // LXVD2X (R0)(R21),VS32
1360 // LXVD2X (R31)(R21),VS33
1362 // STXVD2X VS32,(R0)(R20)
1363 // STXVD2X VS33,(R31)(R20)
1366 // Bytes not moved by this loop are moved
1367 // with a combination of the following instructions,
1368 // starting with the largest sizes and generating as
1369 // many as needed, using the appropriate offset value.
1379 // Each loop iteration moves 32 bytes
1380 ctr := v.AuxInt / bytesPerLoop
1382 // Remainder after the loop
1383 rem := v.AuxInt % bytesPerLoop
1385 dstReg := v.Args[0].Reg()
1386 srcReg := v.Args[1].Reg()
1388 // The set of registers used here, must match the clobbered reg list
1394 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1397 p := s.Prog(ppc64.AMOVD)
1398 p.From.Type = obj.TYPE_CONST
1400 p.To.Type = obj.TYPE_REG
1401 p.To.Reg = ppc64.REGTMP
1403 p = s.Prog(ppc64.AMOVD)
1404 p.From.Type = obj.TYPE_REG
1405 p.From.Reg = ppc64.REGTMP
1406 p.To.Type = obj.TYPE_REG
1407 p.To.Reg = ppc64.REG_CTR
1409 // Use REGTMP as index reg
1410 p = s.Prog(ppc64.AMOVD)
1411 p.From.Type = obj.TYPE_CONST
1413 p.To.Type = obj.TYPE_REG
1414 p.To.Reg = ppc64.REGTMP
1416 // Don't adding padding for
1417 // alignment with small iteration
1420 p = s.Prog(obj.APCALIGN)
1421 p.From.Type = obj.TYPE_CONST
1425 // Generate 16 byte loads and stores.
1426 // Use temp register for index (16)
1427 // on the second one.
1429 p = s.Prog(ppc64.ALXVD2X)
1430 p.From.Type = obj.TYPE_MEM
1432 p.From.Index = ppc64.REGZERO
1433 p.To.Type = obj.TYPE_REG
1434 p.To.Reg = ppc64.REG_VS32
1438 p = s.Prog(ppc64.ALXVD2X)
1439 p.From.Type = obj.TYPE_MEM
1441 p.From.Index = ppc64.REGTMP
1442 p.To.Type = obj.TYPE_REG
1443 p.To.Reg = ppc64.REG_VS33
1445 // increment the src reg for next iteration
1446 p = s.Prog(ppc64.AADD)
1448 p.From.Type = obj.TYPE_CONST
1449 p.From.Offset = bytesPerLoop
1450 p.To.Type = obj.TYPE_REG
1453 // generate 16 byte stores
1454 p = s.Prog(ppc64.ASTXVD2X)
1455 p.From.Type = obj.TYPE_REG
1456 p.From.Reg = ppc64.REG_VS32
1457 p.To.Type = obj.TYPE_MEM
1459 p.To.Index = ppc64.REGZERO
1461 p = s.Prog(ppc64.ASTXVD2X)
1462 p.From.Type = obj.TYPE_REG
1463 p.From.Reg = ppc64.REG_VS33
1464 p.To.Type = obj.TYPE_MEM
1466 p.To.Index = ppc64.REGTMP
1468 // increment the dst reg for next iteration
1469 p = s.Prog(ppc64.AADD)
1471 p.From.Type = obj.TYPE_CONST
1472 p.From.Offset = bytesPerLoop
1473 p.To.Type = obj.TYPE_REG
1476 // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1478 p = s.Prog(ppc64.ABC)
1479 p.From.Type = obj.TYPE_CONST
1480 p.From.Offset = ppc64.BO_BCTR
1481 p.Reg = ppc64.REG_CR0LT
1482 p.To.Type = obj.TYPE_BRANCH
1485 // srcReg and dstReg were incremented in the loop, so
1486 // later instructions start with offset 0.
1490 // No loop was generated for one iteration, so
1491 // add 32 bytes to the remainder to move those bytes.
1497 // Generate 16 byte loads and stores.
1498 // Use temp register for index (value 16)
1499 // on the second one.
1500 p := s.Prog(ppc64.ALXVD2X)
1501 p.From.Type = obj.TYPE_MEM
1503 p.From.Index = ppc64.REGZERO
1504 p.To.Type = obj.TYPE_REG
1505 p.To.Reg = ppc64.REG_VS32
1507 p = s.Prog(ppc64.ASTXVD2X)
1508 p.From.Type = obj.TYPE_REG
1509 p.From.Reg = ppc64.REG_VS32
1510 p.To.Type = obj.TYPE_MEM
1512 p.To.Index = ppc64.REGZERO
1518 // Use REGTMP as index reg
1519 p := s.Prog(ppc64.AMOVD)
1520 p.From.Type = obj.TYPE_CONST
1522 p.To.Type = obj.TYPE_REG
1523 p.To.Reg = ppc64.REGTMP
1525 p = s.Prog(ppc64.ALXVD2X)
1526 p.From.Type = obj.TYPE_MEM
1528 p.From.Index = ppc64.REGTMP
1529 p.To.Type = obj.TYPE_REG
1530 p.To.Reg = ppc64.REG_VS32
1532 p = s.Prog(ppc64.ASTXVD2X)
1533 p.From.Type = obj.TYPE_REG
1534 p.From.Reg = ppc64.REG_VS32
1535 p.To.Type = obj.TYPE_MEM
1537 p.To.Index = ppc64.REGTMP
1544 // Generate all the remaining load and store pairs, starting with
1545 // as many 8 byte moves as possible, then 4, 2, 1.
1547 op, size := ppc64.AMOVB, int64(1)
1550 op, size = ppc64.AMOVD, 8
1552 op, size = ppc64.AMOVWZ, 4
1554 op, size = ppc64.AMOVH, 2
1558 p.To.Type = obj.TYPE_REG
1559 p.To.Reg = ppc64.REGTMP
1560 p.From.Type = obj.TYPE_MEM
1562 p.From.Offset = offset
1566 p.From.Type = obj.TYPE_REG
1567 p.From.Reg = ppc64.REGTMP
1568 p.To.Type = obj.TYPE_MEM
1570 p.To.Offset = offset
1575 case ssa.OpPPC64LoweredQuadMove, ssa.OpPPC64LoweredQuadMoveShort:
1576 bytesPerLoop := int64(64)
1577 // This is used when moving more
1578 // than 8 bytes on power9. Moves start with
1579 // as many 8 byte moves as possible, then
1580 // 4, 2, or 1 byte(s) as remaining. This will
1581 // work and be efficient for power8 or later.
1582 // If there are 64 or more bytes, then a
1583 // loop is generated to move 32 bytes and
1584 // update the src and dst addresses on each
1585 // iteration. When < 64 bytes, the appropriate
1586 // number of moves are generated based on the
1588 // When moving >= 64 bytes a loop is used
1589 // MOVD len/32,REG_TMP
1596 // STXV VS33,16(R20)
1599 // Bytes not moved by this loop are moved
1600 // with a combination of the following instructions,
1601 // starting with the largest sizes and generating as
1602 // many as needed, using the appropriate offset value.
1612 // Each loop iteration moves 32 bytes
1613 ctr := v.AuxInt / bytesPerLoop
1615 // Remainder after the loop
1616 rem := v.AuxInt % bytesPerLoop
1618 dstReg := v.Args[0].Reg()
1619 srcReg := v.Args[1].Reg()
1626 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1629 p := s.Prog(ppc64.AMOVD)
1630 p.From.Type = obj.TYPE_CONST
1632 p.To.Type = obj.TYPE_REG
1633 p.To.Reg = ppc64.REGTMP
1635 p = s.Prog(ppc64.AMOVD)
1636 p.From.Type = obj.TYPE_REG
1637 p.From.Reg = ppc64.REGTMP
1638 p.To.Type = obj.TYPE_REG
1639 p.To.Reg = ppc64.REG_CTR
1641 p = s.Prog(obj.APCALIGN)
1642 p.From.Type = obj.TYPE_CONST
1645 // Generate 16 byte loads and stores.
1646 p = s.Prog(ppc64.ALXV)
1647 p.From.Type = obj.TYPE_MEM
1649 p.From.Offset = offset
1650 p.To.Type = obj.TYPE_REG
1651 p.To.Reg = ppc64.REG_VS32
1655 p = s.Prog(ppc64.ALXV)
1656 p.From.Type = obj.TYPE_MEM
1658 p.From.Offset = offset + 16
1659 p.To.Type = obj.TYPE_REG
1660 p.To.Reg = ppc64.REG_VS33
1662 // generate 16 byte stores
1663 p = s.Prog(ppc64.ASTXV)
1664 p.From.Type = obj.TYPE_REG
1665 p.From.Reg = ppc64.REG_VS32
1666 p.To.Type = obj.TYPE_MEM
1668 p.To.Offset = offset
1670 p = s.Prog(ppc64.ASTXV)
1671 p.From.Type = obj.TYPE_REG
1672 p.From.Reg = ppc64.REG_VS33
1673 p.To.Type = obj.TYPE_MEM
1675 p.To.Offset = offset + 16
1677 // Generate 16 byte loads and stores.
1678 p = s.Prog(ppc64.ALXV)
1679 p.From.Type = obj.TYPE_MEM
1681 p.From.Offset = offset + 32
1682 p.To.Type = obj.TYPE_REG
1683 p.To.Reg = ppc64.REG_VS32
1685 p = s.Prog(ppc64.ALXV)
1686 p.From.Type = obj.TYPE_MEM
1688 p.From.Offset = offset + 48
1689 p.To.Type = obj.TYPE_REG
1690 p.To.Reg = ppc64.REG_VS33
1692 // generate 16 byte stores
1693 p = s.Prog(ppc64.ASTXV)
1694 p.From.Type = obj.TYPE_REG
1695 p.From.Reg = ppc64.REG_VS32
1696 p.To.Type = obj.TYPE_MEM
1698 p.To.Offset = offset + 32
1700 p = s.Prog(ppc64.ASTXV)
1701 p.From.Type = obj.TYPE_REG
1702 p.From.Reg = ppc64.REG_VS33
1703 p.To.Type = obj.TYPE_MEM
1705 p.To.Offset = offset + 48
1707 // increment the src reg for next iteration
1708 p = s.Prog(ppc64.AADD)
1710 p.From.Type = obj.TYPE_CONST
1711 p.From.Offset = bytesPerLoop
1712 p.To.Type = obj.TYPE_REG
1715 // increment the dst reg for next iteration
1716 p = s.Prog(ppc64.AADD)
1718 p.From.Type = obj.TYPE_CONST
1719 p.From.Offset = bytesPerLoop
1720 p.To.Type = obj.TYPE_REG
1723 // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1725 p = s.Prog(ppc64.ABC)
1726 p.From.Type = obj.TYPE_CONST
1727 p.From.Offset = ppc64.BO_BCTR
1728 p.Reg = ppc64.REG_CR0LT
1729 p.To.Type = obj.TYPE_BRANCH
1732 // srcReg and dstReg were incremented in the loop, so
1733 // later instructions start with offset 0.
1737 // No loop was generated for one iteration, so
1738 // add 32 bytes to the remainder to move those bytes.
1743 p := s.Prog(ppc64.ALXV)
1744 p.From.Type = obj.TYPE_MEM
1746 p.To.Type = obj.TYPE_REG
1747 p.To.Reg = ppc64.REG_VS32
1749 p = s.Prog(ppc64.ALXV)
1750 p.From.Type = obj.TYPE_MEM
1753 p.To.Type = obj.TYPE_REG
1754 p.To.Reg = ppc64.REG_VS33
1756 p = s.Prog(ppc64.ASTXV)
1757 p.From.Type = obj.TYPE_REG
1758 p.From.Reg = ppc64.REG_VS32
1759 p.To.Type = obj.TYPE_MEM
1762 p = s.Prog(ppc64.ASTXV)
1763 p.From.Type = obj.TYPE_REG
1764 p.From.Reg = ppc64.REG_VS33
1765 p.To.Type = obj.TYPE_MEM
1774 // Generate 16 byte loads and stores.
1775 p := s.Prog(ppc64.ALXV)
1776 p.From.Type = obj.TYPE_MEM
1778 p.From.Offset = offset
1779 p.To.Type = obj.TYPE_REG
1780 p.To.Reg = ppc64.REG_VS32
1782 p = s.Prog(ppc64.ASTXV)
1783 p.From.Type = obj.TYPE_REG
1784 p.From.Reg = ppc64.REG_VS32
1785 p.To.Type = obj.TYPE_MEM
1787 p.To.Offset = offset
1793 p := s.Prog(ppc64.ALXV)
1794 p.From.Type = obj.TYPE_MEM
1796 p.From.Offset = offset
1797 p.To.Type = obj.TYPE_REG
1798 p.To.Reg = ppc64.REG_VS32
1800 p = s.Prog(ppc64.ASTXV)
1801 p.From.Type = obj.TYPE_REG
1802 p.From.Reg = ppc64.REG_VS32
1803 p.To.Type = obj.TYPE_MEM
1805 p.To.Offset = offset
1811 // Generate all the remaining load and store pairs, starting with
1812 // as many 8 byte moves as possible, then 4, 2, 1.
1814 op, size := ppc64.AMOVB, int64(1)
1817 op, size = ppc64.AMOVD, 8
1819 op, size = ppc64.AMOVWZ, 4
1821 op, size = ppc64.AMOVH, 2
1825 p.To.Type = obj.TYPE_REG
1826 p.To.Reg = ppc64.REGTMP
1827 p.From.Type = obj.TYPE_MEM
1829 p.From.Offset = offset
1833 p.From.Type = obj.TYPE_REG
1834 p.From.Reg = ppc64.REGTMP
1835 p.To.Type = obj.TYPE_MEM
1837 p.To.Offset = offset
1842 case ssa.OpPPC64CALLstatic:
1845 case ssa.OpPPC64CALLtail:
1848 case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter:
1849 p := s.Prog(ppc64.AMOVD)
1850 p.From.Type = obj.TYPE_REG
1851 p.From.Reg = v.Args[0].Reg()
1852 p.To.Type = obj.TYPE_REG
1853 p.To.Reg = ppc64.REG_LR
1855 if v.Args[0].Reg() != ppc64.REG_R12 {
1856 v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg)
1861 // Convert the call into a blrl with hint this is not a subroutine return.
1862 // The full bclrl opcode must be specified when passing a hint.
1864 pp.From.Type = obj.TYPE_CONST
1865 pp.From.Offset = ppc64.BO_ALWAYS
1866 pp.Reg = ppc64.REG_CR0LT // The preferred value if BI is ignored.
1867 pp.To.Reg = ppc64.REG_LR
1870 if base.Ctxt.Flag_shared {
1871 // When compiling Go into PIC, the function we just
1872 // called via pointer might have been implemented in
1873 // a separate module and so overwritten the TOC
1874 // pointer in R2; reload it.
1875 q := s.Prog(ppc64.AMOVD)
1876 q.From.Type = obj.TYPE_MEM
1878 q.From.Reg = ppc64.REGSP
1879 q.To.Type = obj.TYPE_REG
1880 q.To.Reg = ppc64.REG_R2
1883 case ssa.OpPPC64LoweredWB:
1884 p := s.Prog(obj.ACALL)
1885 p.To.Type = obj.TYPE_MEM
1886 p.To.Name = obj.NAME_EXTERN
1887 p.To.Sym = v.Aux.(*obj.LSym)
1889 case ssa.OpPPC64LoweredPanicBoundsA, ssa.OpPPC64LoweredPanicBoundsB, ssa.OpPPC64LoweredPanicBoundsC:
1890 p := s.Prog(obj.ACALL)
1891 p.To.Type = obj.TYPE_MEM
1892 p.To.Name = obj.NAME_EXTERN
1893 p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt]
1894 s.UseArgs(16) // space used in callee args area by assembly stubs
1896 case ssa.OpPPC64LoweredNilCheck:
1897 if buildcfg.GOOS == "aix" {
1901 // NOP (so the BNE has somewhere to land)
1904 p := s.Prog(ppc64.ACMP)
1905 p.From.Type = obj.TYPE_REG
1906 p.From.Reg = v.Args[0].Reg()
1907 p.To.Type = obj.TYPE_REG
1908 p.To.Reg = ppc64.REG_R0
1911 p2 := s.Prog(ppc64.ABNE)
1912 p2.To.Type = obj.TYPE_BRANCH
1915 // Write at 0 is forbidden and will trigger a SIGSEGV
1916 p = s.Prog(ppc64.AMOVW)
1917 p.From.Type = obj.TYPE_REG
1918 p.From.Reg = ppc64.REG_R0
1919 p.To.Type = obj.TYPE_MEM
1920 p.To.Reg = ppc64.REG_R0
1922 // NOP (so the BNE has somewhere to land)
1923 nop := s.Prog(obj.ANOP)
1924 p2.To.SetTarget(nop)
1927 // Issue a load which will fault if arg is nil.
1928 p := s.Prog(ppc64.AMOVBZ)
1929 p.From.Type = obj.TYPE_MEM
1930 p.From.Reg = v.Args[0].Reg()
1931 ssagen.AddAux(&p.From, v)
1932 p.To.Type = obj.TYPE_REG
1933 p.To.Reg = ppc64.REGTMP
1935 if logopt.Enabled() {
1936 logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
1938 if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
1939 base.WarnfAt(v.Pos, "generated nil check")
1942 // These should be resolved by rules and not make it here.
1943 case ssa.OpPPC64Equal, ssa.OpPPC64NotEqual, ssa.OpPPC64LessThan, ssa.OpPPC64FLessThan,
1944 ssa.OpPPC64LessEqual, ssa.OpPPC64GreaterThan, ssa.OpPPC64FGreaterThan, ssa.OpPPC64GreaterEqual,
1945 ssa.OpPPC64FLessEqual, ssa.OpPPC64FGreaterEqual:
1946 v.Fatalf("Pseudo-op should not make it to codegen: %s ###\n", v.LongString())
1947 case ssa.OpPPC64InvertFlags:
1948 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
1949 case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT:
1950 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
1951 case ssa.OpClobber, ssa.OpClobberReg:
1952 // TODO: implement for clobberdead experiment. Nop is ok for now.
1954 v.Fatalf("genValue not implemented: %s", v.LongString())
1958 var blockJump = [...]struct {
1960 asmeq, invasmun bool
1962 ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false},
1963 ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false},
1965 ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false},
1966 ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false},
1967 ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false},
1968 ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false},
1970 // TODO: need to work FP comparisons into block jumps
1971 ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false},
1972 ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN
1973 ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN
1974 ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false},
1977 func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
1979 case ssa.BlockDefer:
1980 // defer returns in R3:
1981 // 0 if we should continue executing
1982 // 1 if we should jump to deferreturn call
1983 p := s.Prog(ppc64.ACMP)
1984 p.From.Type = obj.TYPE_REG
1985 p.From.Reg = ppc64.REG_R3
1986 p.To.Type = obj.TYPE_REG
1987 p.To.Reg = ppc64.REG_R0
1989 p = s.Prog(ppc64.ABNE)
1990 p.To.Type = obj.TYPE_BRANCH
1991 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[1].Block()})
1992 if b.Succs[0].Block() != next {
1993 p := s.Prog(obj.AJMP)
1994 p.To.Type = obj.TYPE_BRANCH
1995 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
1998 case ssa.BlockPlain:
1999 if b.Succs[0].Block() != next {
2000 p := s.Prog(obj.AJMP)
2001 p.To.Type = obj.TYPE_BRANCH
2002 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
2004 case ssa.BlockExit, ssa.BlockRetJmp:
2008 case ssa.BlockPPC64EQ, ssa.BlockPPC64NE,
2009 ssa.BlockPPC64LT, ssa.BlockPPC64GE,
2010 ssa.BlockPPC64LE, ssa.BlockPPC64GT,
2011 ssa.BlockPPC64FLT, ssa.BlockPPC64FGE,
2012 ssa.BlockPPC64FLE, ssa.BlockPPC64FGT:
2013 jmp := blockJump[b.Kind]
2015 case b.Succs[0].Block():
2016 s.Br(jmp.invasm, b.Succs[1].Block())
2018 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
2019 s.Br(ppc64.ABVS, b.Succs[1].Block())
2021 case b.Succs[1].Block():
2022 s.Br(jmp.asm, b.Succs[0].Block())
2024 s.Br(ppc64.ABEQ, b.Succs[0].Block())
2027 if b.Likely != ssa.BranchUnlikely {
2028 s.Br(jmp.asm, b.Succs[0].Block())
2030 s.Br(ppc64.ABEQ, b.Succs[0].Block())
2032 s.Br(obj.AJMP, b.Succs[1].Block())
2034 s.Br(jmp.invasm, b.Succs[1].Block())
2036 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
2037 s.Br(ppc64.ABVS, b.Succs[1].Block())
2039 s.Br(obj.AJMP, b.Succs[0].Block())
2043 b.Fatalf("branch not implemented: %s", b.LongString())
2047 func loadRegResult(s *ssagen.State, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
2048 p := s.Prog(loadByType(t))
2049 p.From.Type = obj.TYPE_MEM
2050 p.From.Name = obj.NAME_AUTO
2051 p.From.Sym = n.Linksym()
2052 p.From.Offset = n.FrameOffset() + off
2053 p.To.Type = obj.TYPE_REG
2058 func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
2059 p = pp.Append(p, storeByType(t), obj.TYPE_REG, reg, 0, obj.TYPE_MEM, 0, n.FrameOffset()+off)
2060 p.To.Name = obj.NAME_PARAM
2061 p.To.Sym = n.Linksym()
2062 p.Pos = p.Pos.WithNotStmt()