1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
8 "cmd/compile/internal/gc"
9 "cmd/compile/internal/logopt"
10 "cmd/compile/internal/ssa"
11 "cmd/compile/internal/types"
13 "cmd/internal/obj/ppc64"
19 // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
20 func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) {
21 // flive := b.FlagsLiveAtEnd
22 // if b.Control != nil && b.Control.Type.IsFlags() {
25 // for i := len(b.Values) - 1; i >= 0; i-- {
27 // if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) {
28 // // The "mark" is any non-nil Aux value.
31 // if v.Type.IsFlags() {
34 // for _, a := range v.Args {
35 // if a.Type.IsFlags() {
42 // loadByType returns the load instruction of the given type.
43 func loadByType(t *types.Type) obj.As {
75 panic("bad load type")
78 // storeByType returns the store instruction of the given type.
79 func storeByType(t *types.Type) obj.As {
99 panic("bad store type")
102 func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
125 case ssa.OpPPC64LoweredMuluhilo:
126 // MULHDU Rarg1, Rarg0, Reg0
127 // MULLD Rarg1, Rarg0, Reg1
128 r0 := v.Args[0].Reg()
129 r1 := v.Args[1].Reg()
130 p := s.Prog(ppc64.AMULHDU)
131 p.From.Type = obj.TYPE_REG
134 p.To.Type = obj.TYPE_REG
136 p1 := s.Prog(ppc64.AMULLD)
137 p1.From.Type = obj.TYPE_REG
140 p1.To.Type = obj.TYPE_REG
143 case ssa.OpPPC64LoweredAdd64Carry:
144 // ADDC Rarg2, -1, Rtmp
145 // ADDE Rarg1, Rarg0, Reg0
147 r0 := v.Args[0].Reg()
148 r1 := v.Args[1].Reg()
149 r2 := v.Args[2].Reg()
150 p := s.Prog(ppc64.AADDC)
151 p.From.Type = obj.TYPE_CONST
154 p.To.Type = obj.TYPE_REG
155 p.To.Reg = ppc64.REGTMP
156 p1 := s.Prog(ppc64.AADDE)
157 p1.From.Type = obj.TYPE_REG
160 p1.To.Type = obj.TYPE_REG
162 p2 := s.Prog(ppc64.AADDZE)
163 p2.From.Type = obj.TYPE_REG
164 p2.From.Reg = ppc64.REGZERO
165 p2.To.Type = obj.TYPE_REG
168 case ssa.OpPPC64LoweredAtomicAnd8,
169 ssa.OpPPC64LoweredAtomicAnd32,
170 ssa.OpPPC64LoweredAtomicOr8,
171 ssa.OpPPC64LoweredAtomicOr32:
173 // LBAR/LWAR (Rarg0), Rtmp
174 // AND/OR Rarg1, Rtmp
175 // STBCCC/STWCCC Rtmp, (Rarg0)
179 if v.Op == ssa.OpPPC64LoweredAtomicAnd32 || v.Op == ssa.OpPPC64LoweredAtomicOr32 {
183 r0 := v.Args[0].Reg()
184 r1 := v.Args[1].Reg()
185 // LWSYNC - Assuming shared data not write-through-required nor
186 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
187 plwsync := s.Prog(ppc64.ALWSYNC)
188 plwsync.To.Type = obj.TYPE_NONE
191 p.From.Type = obj.TYPE_MEM
193 p.To.Type = obj.TYPE_REG
194 p.To.Reg = ppc64.REGTMP
196 p1 := s.Prog(v.Op.Asm())
197 p1.From.Type = obj.TYPE_REG
199 p1.To.Type = obj.TYPE_REG
200 p1.To.Reg = ppc64.REGTMP
203 p2.From.Type = obj.TYPE_REG
204 p2.From.Reg = ppc64.REGTMP
205 p2.To.Type = obj.TYPE_MEM
207 p2.RegTo2 = ppc64.REGTMP
209 p3 := s.Prog(ppc64.ABNE)
210 p3.To.Type = obj.TYPE_BRANCH
213 case ssa.OpPPC64LoweredAtomicAdd32,
214 ssa.OpPPC64LoweredAtomicAdd64:
216 // LDAR/LWAR (Rarg0), Rout
218 // STDCCC/STWCCC Rout, (Rarg0)
220 // MOVW Rout,Rout (if Add32)
223 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
227 r0 := v.Args[0].Reg()
228 r1 := v.Args[1].Reg()
230 // LWSYNC - Assuming shared data not write-through-required nor
231 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
232 plwsync := s.Prog(ppc64.ALWSYNC)
233 plwsync.To.Type = obj.TYPE_NONE
236 p.From.Type = obj.TYPE_MEM
238 p.To.Type = obj.TYPE_REG
241 p1 := s.Prog(ppc64.AADD)
242 p1.From.Type = obj.TYPE_REG
245 p1.To.Type = obj.TYPE_REG
248 p3.From.Type = obj.TYPE_REG
250 p3.To.Type = obj.TYPE_MEM
253 p4 := s.Prog(ppc64.ABNE)
254 p4.To.Type = obj.TYPE_BRANCH
257 // Ensure a 32 bit result
258 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
259 p5 := s.Prog(ppc64.AMOVWZ)
260 p5.To.Type = obj.TYPE_REG
262 p5.From.Type = obj.TYPE_REG
266 case ssa.OpPPC64LoweredAtomicExchange32,
267 ssa.OpPPC64LoweredAtomicExchange64:
269 // LDAR/LWAR (Rarg0), Rout
270 // STDCCC/STWCCC Rout, (Rarg0)
275 if v.Op == ssa.OpPPC64LoweredAtomicExchange32 {
279 r0 := v.Args[0].Reg()
280 r1 := v.Args[1].Reg()
282 // LWSYNC - Assuming shared data not write-through-required nor
283 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
284 plwsync := s.Prog(ppc64.ALWSYNC)
285 plwsync.To.Type = obj.TYPE_NONE
288 p.From.Type = obj.TYPE_MEM
290 p.To.Type = obj.TYPE_REG
294 p1.From.Type = obj.TYPE_REG
296 p1.To.Type = obj.TYPE_MEM
299 p2 := s.Prog(ppc64.ABNE)
300 p2.To.Type = obj.TYPE_BRANCH
303 pisync := s.Prog(ppc64.AISYNC)
304 pisync.To.Type = obj.TYPE_NONE
306 case ssa.OpPPC64LoweredAtomicLoad8,
307 ssa.OpPPC64LoweredAtomicLoad32,
308 ssa.OpPPC64LoweredAtomicLoad64,
309 ssa.OpPPC64LoweredAtomicLoadPtr:
311 // MOVB/MOVD/MOVW (Rarg0), Rout
318 case ssa.OpPPC64LoweredAtomicLoad8:
320 case ssa.OpPPC64LoweredAtomicLoad32:
324 arg0 := v.Args[0].Reg()
326 // SYNC when AuxInt == 1; otherwise, load-acquire
328 psync := s.Prog(ppc64.ASYNC)
329 psync.To.Type = obj.TYPE_NONE
333 p.From.Type = obj.TYPE_MEM
335 p.To.Type = obj.TYPE_REG
339 p1.From.Type = obj.TYPE_REG
341 p1.To.Type = obj.TYPE_REG
344 p2 := s.Prog(ppc64.ABNE)
345 p2.To.Type = obj.TYPE_BRANCH
347 pisync := s.Prog(ppc64.AISYNC)
348 pisync.To.Type = obj.TYPE_NONE
351 case ssa.OpPPC64LoweredAtomicStore8,
352 ssa.OpPPC64LoweredAtomicStore32,
353 ssa.OpPPC64LoweredAtomicStore64:
355 // MOVB/MOVW/MOVD arg1,(arg0)
358 case ssa.OpPPC64LoweredAtomicStore8:
360 case ssa.OpPPC64LoweredAtomicStore32:
363 arg0 := v.Args[0].Reg()
364 arg1 := v.Args[1].Reg()
365 // If AuxInt == 0, LWSYNC (Store-Release), else SYNC
367 syncOp := ppc64.ASYNC
369 syncOp = ppc64.ALWSYNC
371 psync := s.Prog(syncOp)
372 psync.To.Type = obj.TYPE_NONE
375 p.To.Type = obj.TYPE_MEM
377 p.From.Type = obj.TYPE_REG
380 case ssa.OpPPC64LoweredAtomicCas64,
381 ssa.OpPPC64LoweredAtomicCas32:
384 // LDAR (Rarg0), MutexHint, Rtmp
387 // STDCCC Rarg2, (Rarg0)
389 // LWSYNC // Only for sequential consistency; not required in CasRel.
398 if v.Op == ssa.OpPPC64LoweredAtomicCas32 {
403 r0 := v.Args[0].Reg()
404 r1 := v.Args[1].Reg()
405 r2 := v.Args[2].Reg()
407 // LWSYNC - Assuming shared data not write-through-required nor
408 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
409 plwsync1 := s.Prog(ppc64.ALWSYNC)
410 plwsync1.To.Type = obj.TYPE_NONE
413 p.From.Type = obj.TYPE_MEM
415 p.To.Type = obj.TYPE_REG
416 p.To.Reg = ppc64.REGTMP
417 // If it is a Compare-and-Swap-Release operation, set the EH field with
420 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: 0})
424 p1.From.Type = obj.TYPE_REG
426 p1.To.Reg = ppc64.REGTMP
427 p1.To.Type = obj.TYPE_REG
429 p2 := s.Prog(ppc64.ABNE)
430 p2.To.Type = obj.TYPE_BRANCH
433 p3.From.Type = obj.TYPE_REG
435 p3.To.Type = obj.TYPE_MEM
438 p4 := s.Prog(ppc64.ABNE)
439 p4.To.Type = obj.TYPE_BRANCH
441 // LWSYNC - Assuming shared data not write-through-required nor
442 // caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b.
443 // If the operation is a CAS-Release, then synchronization is not necessary.
445 plwsync2 := s.Prog(ppc64.ALWSYNC)
446 plwsync2.To.Type = obj.TYPE_NONE
449 p5 := s.Prog(ppc64.AMOVD)
450 p5.From.Type = obj.TYPE_CONST
452 p5.To.Type = obj.TYPE_REG
455 p6 := s.Prog(obj.AJMP)
456 p6.To.Type = obj.TYPE_BRANCH
458 p7 := s.Prog(ppc64.AMOVD)
459 p7.From.Type = obj.TYPE_CONST
461 p7.To.Type = obj.TYPE_REG
465 p8 := s.Prog(obj.ANOP)
468 case ssa.OpPPC64LoweredGetClosurePtr:
469 // Closure pointer is R11 (already)
470 gc.CheckLoweredGetClosurePtr(v)
472 case ssa.OpPPC64LoweredGetCallerSP:
473 // caller's SP is FixedFrameSize below the address of the first arg
474 p := s.Prog(ppc64.AMOVD)
475 p.From.Type = obj.TYPE_ADDR
476 p.From.Offset = -gc.Ctxt.FixedFrameSize()
477 p.From.Name = obj.NAME_PARAM
478 p.To.Type = obj.TYPE_REG
481 case ssa.OpPPC64LoweredGetCallerPC:
482 p := s.Prog(obj.AGETCALLERPC)
483 p.To.Type = obj.TYPE_REG
486 case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F:
487 // input is already rounded
490 loadOp := loadByType(v.Type)
492 gc.AddrAuto(&p.From, v.Args[0])
493 p.To.Type = obj.TYPE_REG
497 storeOp := storeByType(v.Type)
499 p.From.Type = obj.TYPE_REG
500 p.From.Reg = v.Args[0].Reg()
501 gc.AddrAuto(&p.To, v)
503 case ssa.OpPPC64DIVD:
513 r0 := v.Args[0].Reg()
514 r1 := v.Args[1].Reg()
516 p := s.Prog(ppc64.ACMP)
517 p.From.Type = obj.TYPE_REG
519 p.To.Type = obj.TYPE_CONST
522 pbahead := s.Prog(ppc64.ABEQ)
523 pbahead.To.Type = obj.TYPE_BRANCH
525 p = s.Prog(v.Op.Asm())
526 p.From.Type = obj.TYPE_REG
529 p.To.Type = obj.TYPE_REG
532 pbover := s.Prog(obj.AJMP)
533 pbover.To.Type = obj.TYPE_BRANCH
535 p = s.Prog(ppc64.ANEG)
536 p.To.Type = obj.TYPE_REG
538 p.From.Type = obj.TYPE_REG
545 case ssa.OpPPC64DIVW:
546 // word-width version of above
548 r0 := v.Args[0].Reg()
549 r1 := v.Args[1].Reg()
551 p := s.Prog(ppc64.ACMPW)
552 p.From.Type = obj.TYPE_REG
554 p.To.Type = obj.TYPE_CONST
557 pbahead := s.Prog(ppc64.ABEQ)
558 pbahead.To.Type = obj.TYPE_BRANCH
560 p = s.Prog(v.Op.Asm())
561 p.From.Type = obj.TYPE_REG
564 p.To.Type = obj.TYPE_REG
567 pbover := s.Prog(obj.AJMP)
568 pbover.To.Type = obj.TYPE_BRANCH
570 p = s.Prog(ppc64.ANEG)
571 p.To.Type = obj.TYPE_REG
573 p.From.Type = obj.TYPE_REG
580 case ssa.OpPPC64CLRLSLWI:
582 r1 := v.Args[0].Reg()
584 p := s.Prog(v.Op.Asm())
585 // clrlslwi ra,rs,mb,sh will become rlwinm ra,rs,sh,mb-sh,31-sh as described in ISA
586 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
587 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)})
589 p.To.Type = obj.TYPE_REG
592 case ssa.OpPPC64CLRLSLDI:
594 r1 := v.Args[0].Reg()
596 p := s.Prog(v.Op.Asm())
597 // clrlsldi ra,rs,mb,sh will become rldic ra,rs,sh,mb-sh
598 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
599 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)})
601 p.To.Type = obj.TYPE_REG
604 // Mask has been set as sh
605 case ssa.OpPPC64RLDICL:
607 r1 := v.Args[0].Reg()
609 p := s.Prog(v.Op.Asm())
610 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)}
611 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)})
613 p.To.Type = obj.TYPE_REG
616 case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS,
617 ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU,
618 ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW,
619 ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
620 ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
621 ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
622 ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV,
623 ssa.OpPPC64MODUD, ssa.OpPPC64MODSD, ssa.OpPPC64MODUW, ssa.OpPPC64MODSW:
625 r1 := v.Args[0].Reg()
626 r2 := v.Args[1].Reg()
627 p := s.Prog(v.Op.Asm())
628 p.From.Type = obj.TYPE_REG
631 p.To.Type = obj.TYPE_REG
634 case ssa.OpPPC64ANDCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC:
635 r1 := v.Args[0].Reg()
636 r2 := v.Args[1].Reg()
637 p := s.Prog(v.Op.Asm())
638 p.From.Type = obj.TYPE_REG
641 p.To.Type = obj.TYPE_REG
642 p.To.Reg = ppc64.REGTMP // result is not needed
644 case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
645 p := s.Prog(v.Op.Asm())
646 p.From.Type = obj.TYPE_CONST
647 p.From.Offset = v.AuxInt
648 p.Reg = v.Args[0].Reg()
649 p.To.Type = obj.TYPE_REG
652 // Auxint holds encoded rotate + mask
653 case ssa.OpPPC64RLWINM, ssa.OpPPC64RLWMI:
654 rot, _, _, mask := ssa.DecodePPC64RotateMask(v.AuxInt)
655 p := s.Prog(v.Op.Asm())
656 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
657 p.Reg = v.Args[0].Reg()
658 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: int64(rot)}
659 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: int64(mask)})
662 case ssa.OpPPC64RLWNM:
663 _, _, _, mask := ssa.DecodePPC64RotateMask(v.AuxInt)
664 p := s.Prog(v.Op.Asm())
665 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
666 p.Reg = v.Args[0].Reg()
667 p.From = obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[1].Reg()}
668 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: int64(mask)})
670 case ssa.OpPPC64MADDLD:
672 r1 := v.Args[0].Reg()
673 r2 := v.Args[1].Reg()
674 r3 := v.Args[2].Reg()
676 p := s.Prog(v.Op.Asm())
677 p.From.Type = obj.TYPE_REG
680 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: r3})
681 p.To.Type = obj.TYPE_REG
684 case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
686 r1 := v.Args[0].Reg()
687 r2 := v.Args[1].Reg()
688 r3 := v.Args[2].Reg()
690 p := s.Prog(v.Op.Asm())
691 p.From.Type = obj.TYPE_REG
694 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: r2})
695 p.To.Type = obj.TYPE_REG
698 case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL,
699 ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW,
700 ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS,
701 ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD:
703 p := s.Prog(v.Op.Asm())
704 p.To.Type = obj.TYPE_REG
706 p.From.Type = obj.TYPE_REG
707 p.From.Reg = v.Args[0].Reg()
709 case ssa.OpPPC64ADDconst, ssa.OpPPC64ANDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst,
710 ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst,
711 ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst, ssa.OpPPC64EXTSWSLconst, ssa.OpPPC64MULLWconst, ssa.OpPPC64MULLDconst:
712 p := s.Prog(v.Op.Asm())
713 p.Reg = v.Args[0].Reg()
714 p.From.Type = obj.TYPE_CONST
715 p.From.Offset = v.AuxInt
716 p.To.Type = obj.TYPE_REG
719 case ssa.OpPPC64SUBFCconst:
720 p := s.Prog(v.Op.Asm())
721 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: v.AuxInt})
722 p.From.Type = obj.TYPE_REG
723 p.From.Reg = v.Args[0].Reg()
724 p.To.Type = obj.TYPE_REG
727 case ssa.OpPPC64ANDCCconst:
728 p := s.Prog(v.Op.Asm())
729 p.Reg = v.Args[0].Reg()
730 p.From.Type = obj.TYPE_CONST
731 p.From.Offset = v.AuxInt
732 p.To.Type = obj.TYPE_REG
733 p.To.Reg = ppc64.REGTMP // discard result
735 case ssa.OpPPC64MOVDaddr:
736 switch v.Aux.(type) {
738 v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux)
740 // If aux offset and aux int are both 0, and the same
741 // input and output regs are used, no instruction
742 // needs to be generated, since it would just be
744 if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() {
745 p := s.Prog(ppc64.AMOVD)
746 p.From.Type = obj.TYPE_ADDR
747 p.From.Reg = v.Args[0].Reg()
748 p.From.Offset = v.AuxInt
749 p.To.Type = obj.TYPE_REG
753 case *obj.LSym, *gc.Node:
754 p := s.Prog(ppc64.AMOVD)
755 p.From.Type = obj.TYPE_ADDR
756 p.From.Reg = v.Args[0].Reg()
757 p.To.Type = obj.TYPE_REG
759 gc.AddAux(&p.From, v)
763 case ssa.OpPPC64MOVDconst:
764 p := s.Prog(v.Op.Asm())
765 p.From.Type = obj.TYPE_CONST
766 p.From.Offset = v.AuxInt
767 p.To.Type = obj.TYPE_REG
770 case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst:
771 p := s.Prog(v.Op.Asm())
772 p.From.Type = obj.TYPE_FCONST
773 p.From.Val = math.Float64frombits(uint64(v.AuxInt))
774 p.To.Type = obj.TYPE_REG
777 case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU:
778 p := s.Prog(v.Op.Asm())
779 p.From.Type = obj.TYPE_REG
780 p.From.Reg = v.Args[0].Reg()
781 p.To.Type = obj.TYPE_REG
782 p.To.Reg = v.Args[1].Reg()
784 case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst:
785 p := s.Prog(v.Op.Asm())
786 p.From.Type = obj.TYPE_REG
787 p.From.Reg = v.Args[0].Reg()
788 p.To.Type = obj.TYPE_CONST
789 p.To.Offset = v.AuxInt
791 case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg:
792 // Shift in register to required size
793 p := s.Prog(v.Op.Asm())
794 p.From.Type = obj.TYPE_REG
795 p.From.Reg = v.Args[0].Reg()
797 p.To.Type = obj.TYPE_REG
799 case ssa.OpPPC64MOVDload:
801 // MOVDload uses a DS instruction which requires the offset value of the data to be a multiple of 4.
802 // For offsets known at compile time, a MOVDload won't be selected, but in the case of a go.string,
803 // the offset is not known until link time. If the load of a go.string uses relocation for the
804 // offset field of the instruction, and if the offset is not aligned to 4, then a link error will occur.
805 // To avoid this problem, the full address of the go.string is computed and loaded into the base register,
806 // and that base register is used for the MOVDload using a 0 offset. This problem can only occur with
807 // go.string types because other types will have proper alignment.
810 switch n := v.Aux.(type) {
812 gostring = strings.HasPrefix(n.Name, "go.string.")
815 // Generate full addr of the go.string const
817 p := s.Prog(ppc64.AMOVD)
818 p.From.Type = obj.TYPE_ADDR
819 p.From.Reg = v.Args[0].Reg()
820 gc.AddAux(&p.From, v)
821 p.To.Type = obj.TYPE_REG
823 // Load go.string using 0 offset
824 p = s.Prog(v.Op.Asm())
825 p.From.Type = obj.TYPE_MEM
827 p.To.Type = obj.TYPE_REG
831 // Not a go.string, generate a normal load
834 case ssa.OpPPC64MOVWload, ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
835 p := s.Prog(v.Op.Asm())
836 p.From.Type = obj.TYPE_MEM
837 p.From.Reg = v.Args[0].Reg()
838 gc.AddAux(&p.From, v)
839 p.To.Type = obj.TYPE_REG
842 case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload:
843 p := s.Prog(v.Op.Asm())
844 p.From.Type = obj.TYPE_MEM
845 p.From.Reg = v.Args[0].Reg()
846 p.To.Type = obj.TYPE_REG
849 case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore:
850 p := s.Prog(v.Op.Asm())
851 p.To.Type = obj.TYPE_MEM
852 p.To.Reg = v.Args[0].Reg()
853 p.From.Type = obj.TYPE_REG
854 p.From.Reg = v.Args[1].Reg()
856 case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx,
857 ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx,
858 ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx:
859 p := s.Prog(v.Op.Asm())
860 p.From.Type = obj.TYPE_MEM
861 p.From.Reg = v.Args[0].Reg()
862 p.From.Index = v.Args[1].Reg()
863 p.To.Type = obj.TYPE_REG
866 case ssa.OpPPC64MOVDstorezero, ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero:
867 p := s.Prog(v.Op.Asm())
868 p.From.Type = obj.TYPE_REG
869 p.From.Reg = ppc64.REGZERO
870 p.To.Type = obj.TYPE_MEM
871 p.To.Reg = v.Args[0].Reg()
874 case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore:
875 p := s.Prog(v.Op.Asm())
876 p.From.Type = obj.TYPE_REG
877 p.From.Reg = v.Args[1].Reg()
878 p.To.Type = obj.TYPE_MEM
879 p.To.Reg = v.Args[0].Reg()
882 case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx,
883 ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx,
884 ssa.OpPPC64MOVHBRstoreidx:
885 p := s.Prog(v.Op.Asm())
886 p.From.Type = obj.TYPE_REG
887 p.From.Reg = v.Args[2].Reg()
888 p.To.Index = v.Args[1].Reg()
889 p.To.Type = obj.TYPE_MEM
890 p.To.Reg = v.Args[0].Reg()
892 case ssa.OpPPC64ISEL, ssa.OpPPC64ISELB:
894 // AuxInt value indicates condition: 0=LT 1=GT 2=EQ 4=GE 5=LE 6=NE
895 // ISEL only accepts 0, 1, 2 condition values but the others can be
896 // achieved by swapping operand order.
897 // arg0 ? arg1 : arg2 with conditions LT, GT, EQ
898 // arg0 ? arg2 : arg1 for conditions GE, LE, NE
899 // ISELB is used when a boolean result is needed, returning 0 or 1
900 p := s.Prog(ppc64.AISEL)
901 p.To.Type = obj.TYPE_REG
903 // For ISELB, boolean result 0 or 1. Use R0 for 0 operand to avoid load.
904 r := obj.Addr{Type: obj.TYPE_REG, Reg: ppc64.REG_R0}
905 if v.Op == ssa.OpPPC64ISEL {
906 r.Reg = v.Args[1].Reg()
908 // AuxInt values 4,5,6 implemented with reverse operand order from 0,1,2
911 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[0].Reg()})
913 p.Reg = v.Args[0].Reg()
916 p.From.Type = obj.TYPE_CONST
917 p.From.Offset = v.AuxInt & 3
919 case ssa.OpPPC64LoweredQuadZero, ssa.OpPPC64LoweredQuadZeroShort:
920 // The LoweredQuad code generation
921 // generates STXV instructions on
922 // power9. The Short variation is used
923 // if no loop is generated.
925 // sizes >= 64 generate a loop as follows:
927 // Set up loop counter in CTR, used by BC
928 // XXLXOR clears VS32
929 // XXLXOR VS32,VS32,VS32
930 // MOVD len/64,REG_TMP
940 // Bytes per iteration
946 // Only generate a loop if there is more
949 // Set up VS32 (V0) to hold 0s
950 p := s.Prog(ppc64.AXXLXOR)
951 p.From.Type = obj.TYPE_REG
952 p.From.Reg = ppc64.REG_VS32
953 p.To.Type = obj.TYPE_REG
954 p.To.Reg = ppc64.REG_VS32
955 p.Reg = ppc64.REG_VS32
957 // Set up CTR loop counter
958 p = s.Prog(ppc64.AMOVD)
959 p.From.Type = obj.TYPE_CONST
961 p.To.Type = obj.TYPE_REG
962 p.To.Reg = ppc64.REGTMP
964 p = s.Prog(ppc64.AMOVD)
965 p.From.Type = obj.TYPE_REG
966 p.From.Reg = ppc64.REGTMP
967 p.To.Type = obj.TYPE_REG
968 p.To.Reg = ppc64.REG_CTR
970 // Don't generate padding for
971 // loops with few iterations.
973 p = s.Prog(obj.APCALIGN)
974 p.From.Type = obj.TYPE_CONST
978 // generate 4 STXVs to zero 64 bytes
981 p = s.Prog(ppc64.ASTXV)
982 p.From.Type = obj.TYPE_REG
983 p.From.Reg = ppc64.REG_VS32
984 p.To.Type = obj.TYPE_MEM
985 p.To.Reg = v.Args[0].Reg()
987 // Save the top of loop
991 p = s.Prog(ppc64.ASTXV)
992 p.From.Type = obj.TYPE_REG
993 p.From.Reg = ppc64.REG_VS32
994 p.To.Type = obj.TYPE_MEM
995 p.To.Reg = v.Args[0].Reg()
998 p = s.Prog(ppc64.ASTXV)
999 p.From.Type = obj.TYPE_REG
1000 p.From.Reg = ppc64.REG_VS32
1001 p.To.Type = obj.TYPE_MEM
1002 p.To.Reg = v.Args[0].Reg()
1005 p = s.Prog(ppc64.ASTXV)
1006 p.From.Type = obj.TYPE_REG
1007 p.From.Reg = ppc64.REG_VS32
1008 p.To.Type = obj.TYPE_MEM
1009 p.To.Reg = v.Args[0].Reg()
1012 // Increment address for the
1013 // 64 bytes just zeroed.
1014 p = s.Prog(ppc64.AADD)
1015 p.Reg = v.Args[0].Reg()
1016 p.From.Type = obj.TYPE_CONST
1018 p.To.Type = obj.TYPE_REG
1019 p.To.Reg = v.Args[0].Reg()
1021 // Branch back to top of loop
1023 // BC with BO_BCTR generates bdnz
1024 p = s.Prog(ppc64.ABC)
1025 p.From.Type = obj.TYPE_CONST
1026 p.From.Offset = ppc64.BO_BCTR
1027 p.Reg = ppc64.REG_R0
1028 p.To.Type = obj.TYPE_BRANCH
1031 // When ctr == 1 the loop was not generated but
1032 // there are at least 64 bytes to clear, so add
1033 // that to the remainder to generate the code
1034 // to clear those doublewords
1039 // Clear the remainder starting at offset zero
1042 if rem >= 16 && ctr <= 1 {
1043 // If the XXLXOR hasn't already been
1044 // generated, do it here to initialize
1046 p := s.Prog(ppc64.AXXLXOR)
1047 p.From.Type = obj.TYPE_REG
1048 p.From.Reg = ppc64.REG_VS32
1049 p.To.Type = obj.TYPE_REG
1050 p.To.Reg = ppc64.REG_VS32
1051 p.Reg = ppc64.REG_VS32
1053 // Generate STXV for 32 or 64
1056 p := s.Prog(ppc64.ASTXV)
1057 p.From.Type = obj.TYPE_REG
1058 p.From.Reg = ppc64.REG_VS32
1059 p.To.Type = obj.TYPE_MEM
1060 p.To.Reg = v.Args[0].Reg()
1061 p.To.Offset = offset
1063 p = s.Prog(ppc64.ASTXV)
1064 p.From.Type = obj.TYPE_REG
1065 p.From.Reg = ppc64.REG_VS32
1066 p.To.Type = obj.TYPE_MEM
1067 p.To.Reg = v.Args[0].Reg()
1068 p.To.Offset = offset + 16
1072 // Generate 16 bytes
1074 p := s.Prog(ppc64.ASTXV)
1075 p.From.Type = obj.TYPE_REG
1076 p.From.Reg = ppc64.REG_VS32
1077 p.To.Type = obj.TYPE_MEM
1078 p.To.Reg = v.Args[0].Reg()
1079 p.To.Offset = offset
1084 // first clear as many doublewords as possible
1085 // then clear remaining sizes as available
1087 op, size := ppc64.AMOVB, int64(1)
1090 op, size = ppc64.AMOVD, 8
1092 op, size = ppc64.AMOVW, 4
1094 op, size = ppc64.AMOVH, 2
1097 p.From.Type = obj.TYPE_REG
1098 p.From.Reg = ppc64.REG_R0
1099 p.To.Type = obj.TYPE_MEM
1100 p.To.Reg = v.Args[0].Reg()
1101 p.To.Offset = offset
1106 case ssa.OpPPC64LoweredZero, ssa.OpPPC64LoweredZeroShort:
1108 // Unaligned data doesn't hurt performance
1109 // for these instructions on power8.
1111 // For sizes >= 64 generate a loop as follows:
1113 // Set up loop counter in CTR, used by BC
1114 // XXLXOR VS32,VS32,VS32
1115 // MOVD len/32,REG_TMP
1119 // STXVD2X VS32,(R0)(R20)
1120 // STXVD2X VS32,(R31)(R20)
1124 // any remainder is done as described below
1126 // for sizes < 64 bytes, first clear as many doublewords as possible,
1127 // then handle the remainder
1132 // the remainder bytes are cleared using one or more
1133 // of the following instructions with the appropriate
1134 // offsets depending which instructions are needed
1136 // MOVW R0,n1(R20) 4 bytes
1137 // MOVH R0,n2(R20) 2 bytes
1138 // MOVB R0,n3(R20) 1 byte
1140 // 7 bytes: MOVW, MOVH, MOVB
1141 // 6 bytes: MOVW, MOVH
1142 // 5 bytes: MOVW, MOVB
1143 // 3 bytes: MOVH, MOVB
1145 // each loop iteration does 32 bytes
1146 ctr := v.AuxInt / 32
1149 rem := v.AuxInt % 32
1151 // only generate a loop if there is more
1152 // than 1 iteration.
1154 // Set up VS32 (V0) to hold 0s
1155 p := s.Prog(ppc64.AXXLXOR)
1156 p.From.Type = obj.TYPE_REG
1157 p.From.Reg = ppc64.REG_VS32
1158 p.To.Type = obj.TYPE_REG
1159 p.To.Reg = ppc64.REG_VS32
1160 p.Reg = ppc64.REG_VS32
1162 // Set up CTR loop counter
1163 p = s.Prog(ppc64.AMOVD)
1164 p.From.Type = obj.TYPE_CONST
1166 p.To.Type = obj.TYPE_REG
1167 p.To.Reg = ppc64.REGTMP
1169 p = s.Prog(ppc64.AMOVD)
1170 p.From.Type = obj.TYPE_REG
1171 p.From.Reg = ppc64.REGTMP
1172 p.To.Type = obj.TYPE_REG
1173 p.To.Reg = ppc64.REG_CTR
1175 // Set up R31 to hold index value 16
1176 p = s.Prog(ppc64.AMOVD)
1177 p.From.Type = obj.TYPE_CONST
1179 p.To.Type = obj.TYPE_REG
1180 p.To.Reg = ppc64.REGTMP
1182 // Don't add padding for alignment
1183 // with few loop iterations.
1185 p = s.Prog(obj.APCALIGN)
1186 p.From.Type = obj.TYPE_CONST
1190 // generate 2 STXVD2Xs to store 16 bytes
1191 // when this is a loop then the top must be saved
1193 // This is the top of loop
1195 p = s.Prog(ppc64.ASTXVD2X)
1196 p.From.Type = obj.TYPE_REG
1197 p.From.Reg = ppc64.REG_VS32
1198 p.To.Type = obj.TYPE_MEM
1199 p.To.Reg = v.Args[0].Reg()
1200 p.To.Index = ppc64.REGZERO
1201 // Save the top of loop
1205 p = s.Prog(ppc64.ASTXVD2X)
1206 p.From.Type = obj.TYPE_REG
1207 p.From.Reg = ppc64.REG_VS32
1208 p.To.Type = obj.TYPE_MEM
1209 p.To.Reg = v.Args[0].Reg()
1210 p.To.Index = ppc64.REGTMP
1212 // Increment address for the
1213 // 4 doublewords just zeroed.
1214 p = s.Prog(ppc64.AADD)
1215 p.Reg = v.Args[0].Reg()
1216 p.From.Type = obj.TYPE_CONST
1218 p.To.Type = obj.TYPE_REG
1219 p.To.Reg = v.Args[0].Reg()
1221 // Branch back to top of loop
1223 // BC with BO_BCTR generates bdnz
1224 p = s.Prog(ppc64.ABC)
1225 p.From.Type = obj.TYPE_CONST
1226 p.From.Offset = ppc64.BO_BCTR
1227 p.Reg = ppc64.REG_R0
1228 p.To.Type = obj.TYPE_BRANCH
1232 // when ctr == 1 the loop was not generated but
1233 // there are at least 32 bytes to clear, so add
1234 // that to the remainder to generate the code
1235 // to clear those doublewords
1240 // clear the remainder starting at offset zero
1243 // first clear as many doublewords as possible
1244 // then clear remaining sizes as available
1246 op, size := ppc64.AMOVB, int64(1)
1249 op, size = ppc64.AMOVD, 8
1251 op, size = ppc64.AMOVW, 4
1253 op, size = ppc64.AMOVH, 2
1256 p.From.Type = obj.TYPE_REG
1257 p.From.Reg = ppc64.REG_R0
1258 p.To.Type = obj.TYPE_MEM
1259 p.To.Reg = v.Args[0].Reg()
1260 p.To.Offset = offset
1265 case ssa.OpPPC64LoweredMove, ssa.OpPPC64LoweredMoveShort:
1267 bytesPerLoop := int64(32)
1268 // This will be used when moving more
1269 // than 8 bytes. Moves start with
1270 // as many 8 byte moves as possible, then
1271 // 4, 2, or 1 byte(s) as remaining. This will
1272 // work and be efficient for power8 or later.
1273 // If there are 64 or more bytes, then a
1274 // loop is generated to move 32 bytes and
1275 // update the src and dst addresses on each
1276 // iteration. When < 64 bytes, the appropriate
1277 // number of moves are generated based on the
1279 // When moving >= 64 bytes a loop is used
1280 // MOVD len/32,REG_TMP
1284 // LXVD2X (R0)(R21),VS32
1285 // LXVD2X (R31)(R21),VS33
1287 // STXVD2X VS32,(R0)(R20)
1288 // STXVD2X VS33,(R31)(R20)
1291 // Bytes not moved by this loop are moved
1292 // with a combination of the following instructions,
1293 // starting with the largest sizes and generating as
1294 // many as needed, using the appropriate offset value.
1304 // Each loop iteration moves 32 bytes
1305 ctr := v.AuxInt / bytesPerLoop
1307 // Remainder after the loop
1308 rem := v.AuxInt % bytesPerLoop
1310 dstReg := v.Args[0].Reg()
1311 srcReg := v.Args[1].Reg()
1313 // The set of registers used here, must match the clobbered reg list
1319 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1322 p := s.Prog(ppc64.AMOVD)
1323 p.From.Type = obj.TYPE_CONST
1325 p.To.Type = obj.TYPE_REG
1326 p.To.Reg = ppc64.REGTMP
1328 p = s.Prog(ppc64.AMOVD)
1329 p.From.Type = obj.TYPE_REG
1330 p.From.Reg = ppc64.REGTMP
1331 p.To.Type = obj.TYPE_REG
1332 p.To.Reg = ppc64.REG_CTR
1334 // Use REGTMP as index reg
1335 p = s.Prog(ppc64.AMOVD)
1336 p.From.Type = obj.TYPE_CONST
1338 p.To.Type = obj.TYPE_REG
1339 p.To.Reg = ppc64.REGTMP
1341 // Don't adding padding for
1342 // alignment with small iteration
1345 p = s.Prog(obj.APCALIGN)
1346 p.From.Type = obj.TYPE_CONST
1350 // Generate 16 byte loads and stores.
1351 // Use temp register for index (16)
1352 // on the second one.
1354 p = s.Prog(ppc64.ALXVD2X)
1355 p.From.Type = obj.TYPE_MEM
1357 p.From.Index = ppc64.REGZERO
1358 p.To.Type = obj.TYPE_REG
1359 p.To.Reg = ppc64.REG_VS32
1363 p = s.Prog(ppc64.ALXVD2X)
1364 p.From.Type = obj.TYPE_MEM
1366 p.From.Index = ppc64.REGTMP
1367 p.To.Type = obj.TYPE_REG
1368 p.To.Reg = ppc64.REG_VS33
1370 // increment the src reg for next iteration
1371 p = s.Prog(ppc64.AADD)
1373 p.From.Type = obj.TYPE_CONST
1374 p.From.Offset = bytesPerLoop
1375 p.To.Type = obj.TYPE_REG
1378 // generate 16 byte stores
1379 p = s.Prog(ppc64.ASTXVD2X)
1380 p.From.Type = obj.TYPE_REG
1381 p.From.Reg = ppc64.REG_VS32
1382 p.To.Type = obj.TYPE_MEM
1384 p.To.Index = ppc64.REGZERO
1386 p = s.Prog(ppc64.ASTXVD2X)
1387 p.From.Type = obj.TYPE_REG
1388 p.From.Reg = ppc64.REG_VS33
1389 p.To.Type = obj.TYPE_MEM
1391 p.To.Index = ppc64.REGTMP
1393 // increment the dst reg for next iteration
1394 p = s.Prog(ppc64.AADD)
1396 p.From.Type = obj.TYPE_CONST
1397 p.From.Offset = bytesPerLoop
1398 p.To.Type = obj.TYPE_REG
1401 // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1403 p = s.Prog(ppc64.ABC)
1404 p.From.Type = obj.TYPE_CONST
1405 p.From.Offset = ppc64.BO_BCTR
1406 p.Reg = ppc64.REG_R0
1407 p.To.Type = obj.TYPE_BRANCH
1410 // srcReg and dstReg were incremented in the loop, so
1411 // later instructions start with offset 0.
1415 // No loop was generated for one iteration, so
1416 // add 32 bytes to the remainder to move those bytes.
1422 // Generate 16 byte loads and stores.
1423 // Use temp register for index (value 16)
1424 // on the second one.
1425 p := s.Prog(ppc64.ALXVD2X)
1426 p.From.Type = obj.TYPE_MEM
1428 p.From.Index = ppc64.REGZERO
1429 p.To.Type = obj.TYPE_REG
1430 p.To.Reg = ppc64.REG_VS32
1432 p = s.Prog(ppc64.ASTXVD2X)
1433 p.From.Type = obj.TYPE_REG
1434 p.From.Reg = ppc64.REG_VS32
1435 p.To.Type = obj.TYPE_MEM
1437 p.To.Index = ppc64.REGZERO
1443 // Use REGTMP as index reg
1444 p := s.Prog(ppc64.AMOVD)
1445 p.From.Type = obj.TYPE_CONST
1447 p.To.Type = obj.TYPE_REG
1448 p.To.Reg = ppc64.REGTMP
1450 p = s.Prog(ppc64.ALXVD2X)
1451 p.From.Type = obj.TYPE_MEM
1453 p.From.Index = ppc64.REGTMP
1454 p.To.Type = obj.TYPE_REG
1455 p.To.Reg = ppc64.REG_VS32
1457 p = s.Prog(ppc64.ASTXVD2X)
1458 p.From.Type = obj.TYPE_REG
1459 p.From.Reg = ppc64.REG_VS32
1460 p.To.Type = obj.TYPE_MEM
1462 p.To.Index = ppc64.REGTMP
1469 // Generate all the remaining load and store pairs, starting with
1470 // as many 8 byte moves as possible, then 4, 2, 1.
1472 op, size := ppc64.AMOVB, int64(1)
1475 op, size = ppc64.AMOVD, 8
1477 op, size = ppc64.AMOVW, 4
1479 op, size = ppc64.AMOVH, 2
1483 p.To.Type = obj.TYPE_REG
1484 p.To.Reg = ppc64.REGTMP
1485 p.From.Type = obj.TYPE_MEM
1487 p.From.Offset = offset
1491 p.From.Type = obj.TYPE_REG
1492 p.From.Reg = ppc64.REGTMP
1493 p.To.Type = obj.TYPE_MEM
1495 p.To.Offset = offset
1500 case ssa.OpPPC64LoweredQuadMove, ssa.OpPPC64LoweredQuadMoveShort:
1501 bytesPerLoop := int64(64)
1502 // This is used when moving more
1503 // than 8 bytes on power9. Moves start with
1504 // as many 8 byte moves as possible, then
1505 // 4, 2, or 1 byte(s) as remaining. This will
1506 // work and be efficient for power8 or later.
1507 // If there are 64 or more bytes, then a
1508 // loop is generated to move 32 bytes and
1509 // update the src and dst addresses on each
1510 // iteration. When < 64 bytes, the appropriate
1511 // number of moves are generated based on the
1513 // When moving >= 64 bytes a loop is used
1514 // MOVD len/32,REG_TMP
1521 // STXV VS33,16(R20)
1524 // Bytes not moved by this loop are moved
1525 // with a combination of the following instructions,
1526 // starting with the largest sizes and generating as
1527 // many as needed, using the appropriate offset value.
1537 // Each loop iteration moves 32 bytes
1538 ctr := v.AuxInt / bytesPerLoop
1540 // Remainder after the loop
1541 rem := v.AuxInt % bytesPerLoop
1543 dstReg := v.Args[0].Reg()
1544 srcReg := v.Args[1].Reg()
1551 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1554 p := s.Prog(ppc64.AMOVD)
1555 p.From.Type = obj.TYPE_CONST
1557 p.To.Type = obj.TYPE_REG
1558 p.To.Reg = ppc64.REGTMP
1560 p = s.Prog(ppc64.AMOVD)
1561 p.From.Type = obj.TYPE_REG
1562 p.From.Reg = ppc64.REGTMP
1563 p.To.Type = obj.TYPE_REG
1564 p.To.Reg = ppc64.REG_CTR
1566 p = s.Prog(obj.APCALIGN)
1567 p.From.Type = obj.TYPE_CONST
1570 // Generate 16 byte loads and stores.
1571 p = s.Prog(ppc64.ALXV)
1572 p.From.Type = obj.TYPE_MEM
1574 p.From.Offset = offset
1575 p.To.Type = obj.TYPE_REG
1576 p.To.Reg = ppc64.REG_VS32
1580 p = s.Prog(ppc64.ALXV)
1581 p.From.Type = obj.TYPE_MEM
1583 p.From.Offset = offset + 16
1584 p.To.Type = obj.TYPE_REG
1585 p.To.Reg = ppc64.REG_VS33
1587 // generate 16 byte stores
1588 p = s.Prog(ppc64.ASTXV)
1589 p.From.Type = obj.TYPE_REG
1590 p.From.Reg = ppc64.REG_VS32
1591 p.To.Type = obj.TYPE_MEM
1593 p.To.Offset = offset
1595 p = s.Prog(ppc64.ASTXV)
1596 p.From.Type = obj.TYPE_REG
1597 p.From.Reg = ppc64.REG_VS33
1598 p.To.Type = obj.TYPE_MEM
1600 p.To.Offset = offset + 16
1602 // Generate 16 byte loads and stores.
1603 p = s.Prog(ppc64.ALXV)
1604 p.From.Type = obj.TYPE_MEM
1606 p.From.Offset = offset + 32
1607 p.To.Type = obj.TYPE_REG
1608 p.To.Reg = ppc64.REG_VS32
1610 p = s.Prog(ppc64.ALXV)
1611 p.From.Type = obj.TYPE_MEM
1613 p.From.Offset = offset + 48
1614 p.To.Type = obj.TYPE_REG
1615 p.To.Reg = ppc64.REG_VS33
1617 // generate 16 byte stores
1618 p = s.Prog(ppc64.ASTXV)
1619 p.From.Type = obj.TYPE_REG
1620 p.From.Reg = ppc64.REG_VS32
1621 p.To.Type = obj.TYPE_MEM
1623 p.To.Offset = offset + 32
1625 p = s.Prog(ppc64.ASTXV)
1626 p.From.Type = obj.TYPE_REG
1627 p.From.Reg = ppc64.REG_VS33
1628 p.To.Type = obj.TYPE_MEM
1630 p.To.Offset = offset + 48
1632 // increment the src reg for next iteration
1633 p = s.Prog(ppc64.AADD)
1635 p.From.Type = obj.TYPE_CONST
1636 p.From.Offset = bytesPerLoop
1637 p.To.Type = obj.TYPE_REG
1640 // increment the dst reg for next iteration
1641 p = s.Prog(ppc64.AADD)
1643 p.From.Type = obj.TYPE_CONST
1644 p.From.Offset = bytesPerLoop
1645 p.To.Type = obj.TYPE_REG
1648 // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1650 p = s.Prog(ppc64.ABC)
1651 p.From.Type = obj.TYPE_CONST
1652 p.From.Offset = ppc64.BO_BCTR
1653 p.Reg = ppc64.REG_R0
1654 p.To.Type = obj.TYPE_BRANCH
1657 // srcReg and dstReg were incremented in the loop, so
1658 // later instructions start with offset 0.
1662 // No loop was generated for one iteration, so
1663 // add 32 bytes to the remainder to move those bytes.
1668 p := s.Prog(ppc64.ALXV)
1669 p.From.Type = obj.TYPE_MEM
1671 p.To.Type = obj.TYPE_REG
1672 p.To.Reg = ppc64.REG_VS32
1674 p = s.Prog(ppc64.ALXV)
1675 p.From.Type = obj.TYPE_MEM
1678 p.To.Type = obj.TYPE_REG
1679 p.To.Reg = ppc64.REG_VS33
1681 p = s.Prog(ppc64.ASTXV)
1682 p.From.Type = obj.TYPE_REG
1683 p.From.Reg = ppc64.REG_VS32
1684 p.To.Type = obj.TYPE_MEM
1687 p = s.Prog(ppc64.ASTXV)
1688 p.From.Type = obj.TYPE_REG
1689 p.From.Reg = ppc64.REG_VS33
1690 p.To.Type = obj.TYPE_MEM
1699 // Generate 16 byte loads and stores.
1700 p := s.Prog(ppc64.ALXV)
1701 p.From.Type = obj.TYPE_MEM
1703 p.From.Offset = offset
1704 p.To.Type = obj.TYPE_REG
1705 p.To.Reg = ppc64.REG_VS32
1707 p = s.Prog(ppc64.ASTXV)
1708 p.From.Type = obj.TYPE_REG
1709 p.From.Reg = ppc64.REG_VS32
1710 p.To.Type = obj.TYPE_MEM
1712 p.To.Offset = offset
1718 p := s.Prog(ppc64.ALXV)
1719 p.From.Type = obj.TYPE_MEM
1721 p.From.Offset = offset
1722 p.To.Type = obj.TYPE_REG
1723 p.To.Reg = ppc64.REG_VS32
1725 p = s.Prog(ppc64.ASTXV)
1726 p.From.Type = obj.TYPE_REG
1727 p.From.Reg = ppc64.REG_VS32
1728 p.To.Type = obj.TYPE_MEM
1730 p.To.Offset = offset
1736 // Generate all the remaining load and store pairs, starting with
1737 // as many 8 byte moves as possible, then 4, 2, 1.
1739 op, size := ppc64.AMOVB, int64(1)
1742 op, size = ppc64.AMOVD, 8
1744 op, size = ppc64.AMOVW, 4
1746 op, size = ppc64.AMOVH, 2
1750 p.To.Type = obj.TYPE_REG
1751 p.To.Reg = ppc64.REGTMP
1752 p.From.Type = obj.TYPE_MEM
1754 p.From.Offset = offset
1758 p.From.Type = obj.TYPE_REG
1759 p.From.Reg = ppc64.REGTMP
1760 p.To.Type = obj.TYPE_MEM
1762 p.To.Offset = offset
1767 case ssa.OpPPC64CALLstatic:
1770 case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter:
1771 p := s.Prog(ppc64.AMOVD)
1772 p.From.Type = obj.TYPE_REG
1773 p.From.Reg = v.Args[0].Reg()
1774 p.To.Type = obj.TYPE_REG
1775 p.To.Reg = ppc64.REG_LR
1777 if v.Args[0].Reg() != ppc64.REG_R12 {
1778 v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg)
1782 pp.To.Reg = ppc64.REG_LR
1784 if gc.Ctxt.Flag_shared {
1785 // When compiling Go into PIC, the function we just
1786 // called via pointer might have been implemented in
1787 // a separate module and so overwritten the TOC
1788 // pointer in R2; reload it.
1789 q := s.Prog(ppc64.AMOVD)
1790 q.From.Type = obj.TYPE_MEM
1792 q.From.Reg = ppc64.REGSP
1793 q.To.Type = obj.TYPE_REG
1794 q.To.Reg = ppc64.REG_R2
1797 case ssa.OpPPC64LoweredWB:
1798 p := s.Prog(obj.ACALL)
1799 p.To.Type = obj.TYPE_MEM
1800 p.To.Name = obj.NAME_EXTERN
1801 p.To.Sym = v.Aux.(*obj.LSym)
1803 case ssa.OpPPC64LoweredPanicBoundsA, ssa.OpPPC64LoweredPanicBoundsB, ssa.OpPPC64LoweredPanicBoundsC:
1804 p := s.Prog(obj.ACALL)
1805 p.To.Type = obj.TYPE_MEM
1806 p.To.Name = obj.NAME_EXTERN
1807 p.To.Sym = gc.BoundsCheckFunc[v.AuxInt]
1808 s.UseArgs(16) // space used in callee args area by assembly stubs
1810 case ssa.OpPPC64LoweredNilCheck:
1811 if objabi.GOOS == "aix" {
1815 // NOP (so the BNE has somewhere to land)
1818 p := s.Prog(ppc64.ACMP)
1819 p.From.Type = obj.TYPE_REG
1820 p.From.Reg = v.Args[0].Reg()
1821 p.To.Type = obj.TYPE_REG
1822 p.To.Reg = ppc64.REG_R0
1825 p2 := s.Prog(ppc64.ABNE)
1826 p2.To.Type = obj.TYPE_BRANCH
1829 // Write at 0 is forbidden and will trigger a SIGSEGV
1830 p = s.Prog(ppc64.AMOVW)
1831 p.From.Type = obj.TYPE_REG
1832 p.From.Reg = ppc64.REG_R0
1833 p.To.Type = obj.TYPE_MEM
1834 p.To.Reg = ppc64.REG_R0
1836 // NOP (so the BNE has somewhere to land)
1837 nop := s.Prog(obj.ANOP)
1841 // Issue a load which will fault if arg is nil.
1842 p := s.Prog(ppc64.AMOVBZ)
1843 p.From.Type = obj.TYPE_MEM
1844 p.From.Reg = v.Args[0].Reg()
1845 gc.AddAux(&p.From, v)
1846 p.To.Type = obj.TYPE_REG
1847 p.To.Reg = ppc64.REGTMP
1849 if logopt.Enabled() {
1850 logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
1852 if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
1853 gc.Warnl(v.Pos, "generated nil check")
1856 // These should be resolved by rules and not make it here.
1857 case ssa.OpPPC64Equal, ssa.OpPPC64NotEqual, ssa.OpPPC64LessThan, ssa.OpPPC64FLessThan,
1858 ssa.OpPPC64LessEqual, ssa.OpPPC64GreaterThan, ssa.OpPPC64FGreaterThan, ssa.OpPPC64GreaterEqual,
1859 ssa.OpPPC64FLessEqual, ssa.OpPPC64FGreaterEqual:
1860 v.Fatalf("Pseudo-op should not make it to codegen: %s ###\n", v.LongString())
1861 case ssa.OpPPC64InvertFlags:
1862 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
1863 case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT:
1864 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
1866 // TODO: implement for clobberdead experiment. Nop is ok for now.
1868 v.Fatalf("genValue not implemented: %s", v.LongString())
1872 var blockJump = [...]struct {
1874 asmeq, invasmun bool
1876 ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false},
1877 ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false},
1879 ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false},
1880 ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false},
1881 ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false},
1882 ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false},
1884 // TODO: need to work FP comparisons into block jumps
1885 ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false},
1886 ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN
1887 ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN
1888 ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false},
1891 func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
1893 case ssa.BlockDefer:
1894 // defer returns in R3:
1895 // 0 if we should continue executing
1896 // 1 if we should jump to deferreturn call
1897 p := s.Prog(ppc64.ACMP)
1898 p.From.Type = obj.TYPE_REG
1899 p.From.Reg = ppc64.REG_R3
1900 p.To.Type = obj.TYPE_REG
1901 p.To.Reg = ppc64.REG_R0
1903 p = s.Prog(ppc64.ABNE)
1904 p.To.Type = obj.TYPE_BRANCH
1905 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
1906 if b.Succs[0].Block() != next {
1907 p := s.Prog(obj.AJMP)
1908 p.To.Type = obj.TYPE_BRANCH
1909 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
1912 case ssa.BlockPlain:
1913 if b.Succs[0].Block() != next {
1914 p := s.Prog(obj.AJMP)
1915 p.To.Type = obj.TYPE_BRANCH
1916 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
1921 case ssa.BlockRetJmp:
1922 p := s.Prog(obj.AJMP)
1923 p.To.Type = obj.TYPE_MEM
1924 p.To.Name = obj.NAME_EXTERN
1925 p.To.Sym = b.Aux.(*obj.LSym)
1927 case ssa.BlockPPC64EQ, ssa.BlockPPC64NE,
1928 ssa.BlockPPC64LT, ssa.BlockPPC64GE,
1929 ssa.BlockPPC64LE, ssa.BlockPPC64GT,
1930 ssa.BlockPPC64FLT, ssa.BlockPPC64FGE,
1931 ssa.BlockPPC64FLE, ssa.BlockPPC64FGT:
1932 jmp := blockJump[b.Kind]
1934 case b.Succs[0].Block():
1935 s.Br(jmp.invasm, b.Succs[1].Block())
1937 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
1938 s.Br(ppc64.ABVS, b.Succs[1].Block())
1940 case b.Succs[1].Block():
1941 s.Br(jmp.asm, b.Succs[0].Block())
1943 s.Br(ppc64.ABEQ, b.Succs[0].Block())
1946 if b.Likely != ssa.BranchUnlikely {
1947 s.Br(jmp.asm, b.Succs[0].Block())
1949 s.Br(ppc64.ABEQ, b.Succs[0].Block())
1951 s.Br(obj.AJMP, b.Succs[1].Block())
1953 s.Br(jmp.invasm, b.Succs[1].Block())
1955 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
1956 s.Br(ppc64.ABVS, b.Succs[1].Block())
1958 s.Br(obj.AJMP, b.Succs[0].Block())
1962 b.Fatalf("branch not implemented: %s", b.LongString())