1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
8 "cmd/compile/internal/base"
9 "cmd/compile/internal/gc"
10 "cmd/compile/internal/logopt"
11 "cmd/compile/internal/ssa"
12 "cmd/compile/internal/types"
14 "cmd/internal/obj/ppc64"
20 // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
21 func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) {
22 // flive := b.FlagsLiveAtEnd
23 // if b.Control != nil && b.Control.Type.IsFlags() {
26 // for i := len(b.Values) - 1; i >= 0; i-- {
28 // if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) {
29 // // The "mark" is any non-nil Aux value.
32 // if v.Type.IsFlags() {
35 // for _, a := range v.Args {
36 // if a.Type.IsFlags() {
43 // loadByType returns the load instruction of the given type.
44 func loadByType(t *types.Type) obj.As {
76 panic("bad load type")
79 // storeByType returns the store instruction of the given type.
80 func storeByType(t *types.Type) obj.As {
100 panic("bad store type")
103 func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
126 case ssa.OpPPC64LoweredMuluhilo:
127 // MULHDU Rarg1, Rarg0, Reg0
128 // MULLD Rarg1, Rarg0, Reg1
129 r0 := v.Args[0].Reg()
130 r1 := v.Args[1].Reg()
131 p := s.Prog(ppc64.AMULHDU)
132 p.From.Type = obj.TYPE_REG
135 p.To.Type = obj.TYPE_REG
137 p1 := s.Prog(ppc64.AMULLD)
138 p1.From.Type = obj.TYPE_REG
141 p1.To.Type = obj.TYPE_REG
144 case ssa.OpPPC64LoweredAdd64Carry:
145 // ADDC Rarg2, -1, Rtmp
146 // ADDE Rarg1, Rarg0, Reg0
148 r0 := v.Args[0].Reg()
149 r1 := v.Args[1].Reg()
150 r2 := v.Args[2].Reg()
151 p := s.Prog(ppc64.AADDC)
152 p.From.Type = obj.TYPE_CONST
155 p.To.Type = obj.TYPE_REG
156 p.To.Reg = ppc64.REGTMP
157 p1 := s.Prog(ppc64.AADDE)
158 p1.From.Type = obj.TYPE_REG
161 p1.To.Type = obj.TYPE_REG
163 p2 := s.Prog(ppc64.AADDZE)
164 p2.From.Type = obj.TYPE_REG
165 p2.From.Reg = ppc64.REGZERO
166 p2.To.Type = obj.TYPE_REG
169 case ssa.OpPPC64LoweredAtomicAnd8,
170 ssa.OpPPC64LoweredAtomicAnd32,
171 ssa.OpPPC64LoweredAtomicOr8,
172 ssa.OpPPC64LoweredAtomicOr32:
174 // LBAR/LWAR (Rarg0), Rtmp
175 // AND/OR Rarg1, Rtmp
176 // STBCCC/STWCCC Rtmp, (Rarg0)
180 if v.Op == ssa.OpPPC64LoweredAtomicAnd32 || v.Op == ssa.OpPPC64LoweredAtomicOr32 {
184 r0 := v.Args[0].Reg()
185 r1 := v.Args[1].Reg()
186 // LWSYNC - Assuming shared data not write-through-required nor
187 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
188 plwsync := s.Prog(ppc64.ALWSYNC)
189 plwsync.To.Type = obj.TYPE_NONE
192 p.From.Type = obj.TYPE_MEM
194 p.To.Type = obj.TYPE_REG
195 p.To.Reg = ppc64.REGTMP
197 p1 := s.Prog(v.Op.Asm())
198 p1.From.Type = obj.TYPE_REG
200 p1.To.Type = obj.TYPE_REG
201 p1.To.Reg = ppc64.REGTMP
204 p2.From.Type = obj.TYPE_REG
205 p2.From.Reg = ppc64.REGTMP
206 p2.To.Type = obj.TYPE_MEM
208 p2.RegTo2 = ppc64.REGTMP
210 p3 := s.Prog(ppc64.ABNE)
211 p3.To.Type = obj.TYPE_BRANCH
214 case ssa.OpPPC64LoweredAtomicAdd32,
215 ssa.OpPPC64LoweredAtomicAdd64:
217 // LDAR/LWAR (Rarg0), Rout
219 // STDCCC/STWCCC Rout, (Rarg0)
221 // MOVW Rout,Rout (if Add32)
224 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
228 r0 := v.Args[0].Reg()
229 r1 := v.Args[1].Reg()
231 // LWSYNC - Assuming shared data not write-through-required nor
232 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
233 plwsync := s.Prog(ppc64.ALWSYNC)
234 plwsync.To.Type = obj.TYPE_NONE
237 p.From.Type = obj.TYPE_MEM
239 p.To.Type = obj.TYPE_REG
242 p1 := s.Prog(ppc64.AADD)
243 p1.From.Type = obj.TYPE_REG
246 p1.To.Type = obj.TYPE_REG
249 p3.From.Type = obj.TYPE_REG
251 p3.To.Type = obj.TYPE_MEM
254 p4 := s.Prog(ppc64.ABNE)
255 p4.To.Type = obj.TYPE_BRANCH
258 // Ensure a 32 bit result
259 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
260 p5 := s.Prog(ppc64.AMOVWZ)
261 p5.To.Type = obj.TYPE_REG
263 p5.From.Type = obj.TYPE_REG
267 case ssa.OpPPC64LoweredAtomicExchange32,
268 ssa.OpPPC64LoweredAtomicExchange64:
270 // LDAR/LWAR (Rarg0), Rout
271 // STDCCC/STWCCC Rout, (Rarg0)
276 if v.Op == ssa.OpPPC64LoweredAtomicExchange32 {
280 r0 := v.Args[0].Reg()
281 r1 := v.Args[1].Reg()
283 // LWSYNC - Assuming shared data not write-through-required nor
284 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
285 plwsync := s.Prog(ppc64.ALWSYNC)
286 plwsync.To.Type = obj.TYPE_NONE
289 p.From.Type = obj.TYPE_MEM
291 p.To.Type = obj.TYPE_REG
295 p1.From.Type = obj.TYPE_REG
297 p1.To.Type = obj.TYPE_MEM
300 p2 := s.Prog(ppc64.ABNE)
301 p2.To.Type = obj.TYPE_BRANCH
304 pisync := s.Prog(ppc64.AISYNC)
305 pisync.To.Type = obj.TYPE_NONE
307 case ssa.OpPPC64LoweredAtomicLoad8,
308 ssa.OpPPC64LoweredAtomicLoad32,
309 ssa.OpPPC64LoweredAtomicLoad64,
310 ssa.OpPPC64LoweredAtomicLoadPtr:
312 // MOVB/MOVD/MOVW (Rarg0), Rout
319 case ssa.OpPPC64LoweredAtomicLoad8:
321 case ssa.OpPPC64LoweredAtomicLoad32:
325 arg0 := v.Args[0].Reg()
327 // SYNC when AuxInt == 1; otherwise, load-acquire
329 psync := s.Prog(ppc64.ASYNC)
330 psync.To.Type = obj.TYPE_NONE
334 p.From.Type = obj.TYPE_MEM
336 p.To.Type = obj.TYPE_REG
340 p1.From.Type = obj.TYPE_REG
342 p1.To.Type = obj.TYPE_REG
345 p2 := s.Prog(ppc64.ABNE)
346 p2.To.Type = obj.TYPE_BRANCH
348 pisync := s.Prog(ppc64.AISYNC)
349 pisync.To.Type = obj.TYPE_NONE
352 case ssa.OpPPC64LoweredAtomicStore8,
353 ssa.OpPPC64LoweredAtomicStore32,
354 ssa.OpPPC64LoweredAtomicStore64:
356 // MOVB/MOVW/MOVD arg1,(arg0)
359 case ssa.OpPPC64LoweredAtomicStore8:
361 case ssa.OpPPC64LoweredAtomicStore32:
364 arg0 := v.Args[0].Reg()
365 arg1 := v.Args[1].Reg()
366 // If AuxInt == 0, LWSYNC (Store-Release), else SYNC
368 syncOp := ppc64.ASYNC
370 syncOp = ppc64.ALWSYNC
372 psync := s.Prog(syncOp)
373 psync.To.Type = obj.TYPE_NONE
376 p.To.Type = obj.TYPE_MEM
378 p.From.Type = obj.TYPE_REG
381 case ssa.OpPPC64LoweredAtomicCas64,
382 ssa.OpPPC64LoweredAtomicCas32:
385 // LDAR (Rarg0), MutexHint, Rtmp
388 // STDCCC Rarg2, (Rarg0)
390 // LWSYNC // Only for sequential consistency; not required in CasRel.
399 if v.Op == ssa.OpPPC64LoweredAtomicCas32 {
404 r0 := v.Args[0].Reg()
405 r1 := v.Args[1].Reg()
406 r2 := v.Args[2].Reg()
408 // LWSYNC - Assuming shared data not write-through-required nor
409 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
410 plwsync1 := s.Prog(ppc64.ALWSYNC)
411 plwsync1.To.Type = obj.TYPE_NONE
414 p.From.Type = obj.TYPE_MEM
416 p.To.Type = obj.TYPE_REG
417 p.To.Reg = ppc64.REGTMP
418 // If it is a Compare-and-Swap-Release operation, set the EH field with
421 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: 0})
425 p1.From.Type = obj.TYPE_REG
427 p1.To.Reg = ppc64.REGTMP
428 p1.To.Type = obj.TYPE_REG
430 p2 := s.Prog(ppc64.ABNE)
431 p2.To.Type = obj.TYPE_BRANCH
434 p3.From.Type = obj.TYPE_REG
436 p3.To.Type = obj.TYPE_MEM
439 p4 := s.Prog(ppc64.ABNE)
440 p4.To.Type = obj.TYPE_BRANCH
442 // LWSYNC - Assuming shared data not write-through-required nor
443 // caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b.
444 // If the operation is a CAS-Release, then synchronization is not necessary.
446 plwsync2 := s.Prog(ppc64.ALWSYNC)
447 plwsync2.To.Type = obj.TYPE_NONE
450 p5 := s.Prog(ppc64.AMOVD)
451 p5.From.Type = obj.TYPE_CONST
453 p5.To.Type = obj.TYPE_REG
456 p6 := s.Prog(obj.AJMP)
457 p6.To.Type = obj.TYPE_BRANCH
459 p7 := s.Prog(ppc64.AMOVD)
460 p7.From.Type = obj.TYPE_CONST
462 p7.To.Type = obj.TYPE_REG
466 p8 := s.Prog(obj.ANOP)
469 case ssa.OpPPC64LoweredGetClosurePtr:
470 // Closure pointer is R11 (already)
471 gc.CheckLoweredGetClosurePtr(v)
473 case ssa.OpPPC64LoweredGetCallerSP:
474 // caller's SP is FixedFrameSize below the address of the first arg
475 p := s.Prog(ppc64.AMOVD)
476 p.From.Type = obj.TYPE_ADDR
477 p.From.Offset = -base.Ctxt.FixedFrameSize()
478 p.From.Name = obj.NAME_PARAM
479 p.To.Type = obj.TYPE_REG
482 case ssa.OpPPC64LoweredGetCallerPC:
483 p := s.Prog(obj.AGETCALLERPC)
484 p.To.Type = obj.TYPE_REG
487 case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F:
488 // input is already rounded
491 loadOp := loadByType(v.Type)
493 gc.AddrAuto(&p.From, v.Args[0])
494 p.To.Type = obj.TYPE_REG
498 storeOp := storeByType(v.Type)
500 p.From.Type = obj.TYPE_REG
501 p.From.Reg = v.Args[0].Reg()
502 gc.AddrAuto(&p.To, v)
504 case ssa.OpPPC64DIVD:
514 r0 := v.Args[0].Reg()
515 r1 := v.Args[1].Reg()
517 p := s.Prog(ppc64.ACMP)
518 p.From.Type = obj.TYPE_REG
520 p.To.Type = obj.TYPE_CONST
523 pbahead := s.Prog(ppc64.ABEQ)
524 pbahead.To.Type = obj.TYPE_BRANCH
526 p = s.Prog(v.Op.Asm())
527 p.From.Type = obj.TYPE_REG
530 p.To.Type = obj.TYPE_REG
533 pbover := s.Prog(obj.AJMP)
534 pbover.To.Type = obj.TYPE_BRANCH
536 p = s.Prog(ppc64.ANEG)
537 p.To.Type = obj.TYPE_REG
539 p.From.Type = obj.TYPE_REG
546 case ssa.OpPPC64DIVW:
547 // word-width version of above
549 r0 := v.Args[0].Reg()
550 r1 := v.Args[1].Reg()
552 p := s.Prog(ppc64.ACMPW)
553 p.From.Type = obj.TYPE_REG
555 p.To.Type = obj.TYPE_CONST
558 pbahead := s.Prog(ppc64.ABEQ)
559 pbahead.To.Type = obj.TYPE_BRANCH
561 p = s.Prog(v.Op.Asm())
562 p.From.Type = obj.TYPE_REG
565 p.To.Type = obj.TYPE_REG
568 pbover := s.Prog(obj.AJMP)
569 pbover.To.Type = obj.TYPE_BRANCH
571 p = s.Prog(ppc64.ANEG)
572 p.To.Type = obj.TYPE_REG
574 p.From.Type = obj.TYPE_REG
581 case ssa.OpPPC64CLRLSLWI:
583 r1 := v.Args[0].Reg()
585 p := s.Prog(v.Op.Asm())
586 // clrlslwi ra,rs,mb,sh will become rlwinm ra,rs,sh,mb-sh,31-sh as described in ISA
587 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
588 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)})
590 p.To.Type = obj.TYPE_REG
593 case ssa.OpPPC64CLRLSLDI:
595 r1 := v.Args[0].Reg()
597 p := s.Prog(v.Op.Asm())
598 // clrlsldi ra,rs,mb,sh will become rldic ra,rs,sh,mb-sh
599 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
600 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)})
602 p.To.Type = obj.TYPE_REG
605 // Mask has been set as sh
606 case ssa.OpPPC64RLDICL:
608 r1 := v.Args[0].Reg()
610 p := s.Prog(v.Op.Asm())
611 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)}
612 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)})
614 p.To.Type = obj.TYPE_REG
617 case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS,
618 ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU,
619 ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW,
620 ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
621 ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
622 ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
623 ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV,
624 ssa.OpPPC64MODUD, ssa.OpPPC64MODSD, ssa.OpPPC64MODUW, ssa.OpPPC64MODSW:
626 r1 := v.Args[0].Reg()
627 r2 := v.Args[1].Reg()
628 p := s.Prog(v.Op.Asm())
629 p.From.Type = obj.TYPE_REG
632 p.To.Type = obj.TYPE_REG
635 case ssa.OpPPC64ANDCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC:
636 r1 := v.Args[0].Reg()
637 r2 := v.Args[1].Reg()
638 p := s.Prog(v.Op.Asm())
639 p.From.Type = obj.TYPE_REG
642 p.To.Type = obj.TYPE_REG
643 p.To.Reg = ppc64.REGTMP // result is not needed
645 case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
646 p := s.Prog(v.Op.Asm())
647 p.From.Type = obj.TYPE_CONST
648 p.From.Offset = v.AuxInt
649 p.Reg = v.Args[0].Reg()
650 p.To.Type = obj.TYPE_REG
653 // Auxint holds encoded rotate + mask
654 case ssa.OpPPC64RLWINM, ssa.OpPPC64RLWMI:
655 rot, _, _, mask := ssa.DecodePPC64RotateMask(v.AuxInt)
656 p := s.Prog(v.Op.Asm())
657 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
658 p.Reg = v.Args[0].Reg()
659 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: int64(rot)}
660 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: int64(mask)})
663 case ssa.OpPPC64RLWNM:
664 _, _, _, mask := ssa.DecodePPC64RotateMask(v.AuxInt)
665 p := s.Prog(v.Op.Asm())
666 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
667 p.Reg = v.Args[0].Reg()
668 p.From = obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[1].Reg()}
669 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: int64(mask)})
671 case ssa.OpPPC64MADDLD:
673 r1 := v.Args[0].Reg()
674 r2 := v.Args[1].Reg()
675 r3 := v.Args[2].Reg()
677 p := s.Prog(v.Op.Asm())
678 p.From.Type = obj.TYPE_REG
681 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: r3})
682 p.To.Type = obj.TYPE_REG
685 case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
687 r1 := v.Args[0].Reg()
688 r2 := v.Args[1].Reg()
689 r3 := v.Args[2].Reg()
691 p := s.Prog(v.Op.Asm())
692 p.From.Type = obj.TYPE_REG
695 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: r2})
696 p.To.Type = obj.TYPE_REG
699 case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL,
700 ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW,
701 ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS,
702 ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD:
704 p := s.Prog(v.Op.Asm())
705 p.To.Type = obj.TYPE_REG
707 p.From.Type = obj.TYPE_REG
708 p.From.Reg = v.Args[0].Reg()
710 case ssa.OpPPC64ADDconst, ssa.OpPPC64ANDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst,
711 ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst,
712 ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst, ssa.OpPPC64EXTSWSLconst, ssa.OpPPC64MULLWconst, ssa.OpPPC64MULLDconst:
713 p := s.Prog(v.Op.Asm())
714 p.Reg = v.Args[0].Reg()
715 p.From.Type = obj.TYPE_CONST
716 p.From.Offset = v.AuxInt
717 p.To.Type = obj.TYPE_REG
720 case ssa.OpPPC64SUBFCconst:
721 p := s.Prog(v.Op.Asm())
722 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: v.AuxInt})
723 p.From.Type = obj.TYPE_REG
724 p.From.Reg = v.Args[0].Reg()
725 p.To.Type = obj.TYPE_REG
728 case ssa.OpPPC64ANDCCconst:
729 p := s.Prog(v.Op.Asm())
730 p.Reg = v.Args[0].Reg()
731 p.From.Type = obj.TYPE_CONST
732 p.From.Offset = v.AuxInt
733 p.To.Type = obj.TYPE_REG
734 p.To.Reg = ppc64.REGTMP // discard result
736 case ssa.OpPPC64MOVDaddr:
737 switch v.Aux.(type) {
739 v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux)
741 // If aux offset and aux int are both 0, and the same
742 // input and output regs are used, no instruction
743 // needs to be generated, since it would just be
745 if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() {
746 p := s.Prog(ppc64.AMOVD)
747 p.From.Type = obj.TYPE_ADDR
748 p.From.Reg = v.Args[0].Reg()
749 p.From.Offset = v.AuxInt
750 p.To.Type = obj.TYPE_REG
754 case *obj.LSym, *gc.Node:
755 p := s.Prog(ppc64.AMOVD)
756 p.From.Type = obj.TYPE_ADDR
757 p.From.Reg = v.Args[0].Reg()
758 p.To.Type = obj.TYPE_REG
760 gc.AddAux(&p.From, v)
764 case ssa.OpPPC64MOVDconst:
765 p := s.Prog(v.Op.Asm())
766 p.From.Type = obj.TYPE_CONST
767 p.From.Offset = v.AuxInt
768 p.To.Type = obj.TYPE_REG
771 case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst:
772 p := s.Prog(v.Op.Asm())
773 p.From.Type = obj.TYPE_FCONST
774 p.From.Val = math.Float64frombits(uint64(v.AuxInt))
775 p.To.Type = obj.TYPE_REG
778 case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU:
779 p := s.Prog(v.Op.Asm())
780 p.From.Type = obj.TYPE_REG
781 p.From.Reg = v.Args[0].Reg()
782 p.To.Type = obj.TYPE_REG
783 p.To.Reg = v.Args[1].Reg()
785 case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst:
786 p := s.Prog(v.Op.Asm())
787 p.From.Type = obj.TYPE_REG
788 p.From.Reg = v.Args[0].Reg()
789 p.To.Type = obj.TYPE_CONST
790 p.To.Offset = v.AuxInt
792 case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg:
793 // Shift in register to required size
794 p := s.Prog(v.Op.Asm())
795 p.From.Type = obj.TYPE_REG
796 p.From.Reg = v.Args[0].Reg()
798 p.To.Type = obj.TYPE_REG
800 case ssa.OpPPC64MOVDload:
802 // MOVDload uses a DS instruction which requires the offset value of the data to be a multiple of 4.
803 // For offsets known at compile time, a MOVDload won't be selected, but in the case of a go.string,
804 // the offset is not known until link time. If the load of a go.string uses relocation for the
805 // offset field of the instruction, and if the offset is not aligned to 4, then a link error will occur.
806 // To avoid this problem, the full address of the go.string is computed and loaded into the base register,
807 // and that base register is used for the MOVDload using a 0 offset. This problem can only occur with
808 // go.string types because other types will have proper alignment.
811 switch n := v.Aux.(type) {
813 gostring = strings.HasPrefix(n.Name, "go.string.")
816 // Generate full addr of the go.string const
818 p := s.Prog(ppc64.AMOVD)
819 p.From.Type = obj.TYPE_ADDR
820 p.From.Reg = v.Args[0].Reg()
821 gc.AddAux(&p.From, v)
822 p.To.Type = obj.TYPE_REG
824 // Load go.string using 0 offset
825 p = s.Prog(v.Op.Asm())
826 p.From.Type = obj.TYPE_MEM
828 p.To.Type = obj.TYPE_REG
832 // Not a go.string, generate a normal load
835 case ssa.OpPPC64MOVWload, ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
836 p := s.Prog(v.Op.Asm())
837 p.From.Type = obj.TYPE_MEM
838 p.From.Reg = v.Args[0].Reg()
839 gc.AddAux(&p.From, v)
840 p.To.Type = obj.TYPE_REG
843 case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload:
844 p := s.Prog(v.Op.Asm())
845 p.From.Type = obj.TYPE_MEM
846 p.From.Reg = v.Args[0].Reg()
847 p.To.Type = obj.TYPE_REG
850 case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore:
851 p := s.Prog(v.Op.Asm())
852 p.To.Type = obj.TYPE_MEM
853 p.To.Reg = v.Args[0].Reg()
854 p.From.Type = obj.TYPE_REG
855 p.From.Reg = v.Args[1].Reg()
857 case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx,
858 ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx,
859 ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx:
860 p := s.Prog(v.Op.Asm())
861 p.From.Type = obj.TYPE_MEM
862 p.From.Reg = v.Args[0].Reg()
863 p.From.Index = v.Args[1].Reg()
864 p.To.Type = obj.TYPE_REG
867 case ssa.OpPPC64MOVDstorezero, ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero:
868 p := s.Prog(v.Op.Asm())
869 p.From.Type = obj.TYPE_REG
870 p.From.Reg = ppc64.REGZERO
871 p.To.Type = obj.TYPE_MEM
872 p.To.Reg = v.Args[0].Reg()
875 case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore:
876 p := s.Prog(v.Op.Asm())
877 p.From.Type = obj.TYPE_REG
878 p.From.Reg = v.Args[1].Reg()
879 p.To.Type = obj.TYPE_MEM
880 p.To.Reg = v.Args[0].Reg()
883 case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx,
884 ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx,
885 ssa.OpPPC64MOVHBRstoreidx:
886 p := s.Prog(v.Op.Asm())
887 p.From.Type = obj.TYPE_REG
888 p.From.Reg = v.Args[2].Reg()
889 p.To.Index = v.Args[1].Reg()
890 p.To.Type = obj.TYPE_MEM
891 p.To.Reg = v.Args[0].Reg()
893 case ssa.OpPPC64ISEL, ssa.OpPPC64ISELB:
895 // AuxInt value indicates condition: 0=LT 1=GT 2=EQ 4=GE 5=LE 6=NE
896 // ISEL only accepts 0, 1, 2 condition values but the others can be
897 // achieved by swapping operand order.
898 // arg0 ? arg1 : arg2 with conditions LT, GT, EQ
899 // arg0 ? arg2 : arg1 for conditions GE, LE, NE
900 // ISELB is used when a boolean result is needed, returning 0 or 1
901 p := s.Prog(ppc64.AISEL)
902 p.To.Type = obj.TYPE_REG
904 // For ISELB, boolean result 0 or 1. Use R0 for 0 operand to avoid load.
905 r := obj.Addr{Type: obj.TYPE_REG, Reg: ppc64.REG_R0}
906 if v.Op == ssa.OpPPC64ISEL {
907 r.Reg = v.Args[1].Reg()
909 // AuxInt values 4,5,6 implemented with reverse operand order from 0,1,2
912 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[0].Reg()})
914 p.Reg = v.Args[0].Reg()
917 p.From.Type = obj.TYPE_CONST
918 p.From.Offset = v.AuxInt & 3
920 case ssa.OpPPC64LoweredQuadZero, ssa.OpPPC64LoweredQuadZeroShort:
921 // The LoweredQuad code generation
922 // generates STXV instructions on
923 // power9. The Short variation is used
924 // if no loop is generated.
926 // sizes >= 64 generate a loop as follows:
928 // Set up loop counter in CTR, used by BC
929 // XXLXOR clears VS32
930 // XXLXOR VS32,VS32,VS32
931 // MOVD len/64,REG_TMP
941 // Bytes per iteration
947 // Only generate a loop if there is more
950 // Set up VS32 (V0) to hold 0s
951 p := s.Prog(ppc64.AXXLXOR)
952 p.From.Type = obj.TYPE_REG
953 p.From.Reg = ppc64.REG_VS32
954 p.To.Type = obj.TYPE_REG
955 p.To.Reg = ppc64.REG_VS32
956 p.Reg = ppc64.REG_VS32
958 // Set up CTR loop counter
959 p = s.Prog(ppc64.AMOVD)
960 p.From.Type = obj.TYPE_CONST
962 p.To.Type = obj.TYPE_REG
963 p.To.Reg = ppc64.REGTMP
965 p = s.Prog(ppc64.AMOVD)
966 p.From.Type = obj.TYPE_REG
967 p.From.Reg = ppc64.REGTMP
968 p.To.Type = obj.TYPE_REG
969 p.To.Reg = ppc64.REG_CTR
971 // Don't generate padding for
972 // loops with few iterations.
974 p = s.Prog(obj.APCALIGN)
975 p.From.Type = obj.TYPE_CONST
979 // generate 4 STXVs to zero 64 bytes
982 p = s.Prog(ppc64.ASTXV)
983 p.From.Type = obj.TYPE_REG
984 p.From.Reg = ppc64.REG_VS32
985 p.To.Type = obj.TYPE_MEM
986 p.To.Reg = v.Args[0].Reg()
988 // Save the top of loop
992 p = s.Prog(ppc64.ASTXV)
993 p.From.Type = obj.TYPE_REG
994 p.From.Reg = ppc64.REG_VS32
995 p.To.Type = obj.TYPE_MEM
996 p.To.Reg = v.Args[0].Reg()
999 p = s.Prog(ppc64.ASTXV)
1000 p.From.Type = obj.TYPE_REG
1001 p.From.Reg = ppc64.REG_VS32
1002 p.To.Type = obj.TYPE_MEM
1003 p.To.Reg = v.Args[0].Reg()
1006 p = s.Prog(ppc64.ASTXV)
1007 p.From.Type = obj.TYPE_REG
1008 p.From.Reg = ppc64.REG_VS32
1009 p.To.Type = obj.TYPE_MEM
1010 p.To.Reg = v.Args[0].Reg()
1013 // Increment address for the
1014 // 64 bytes just zeroed.
1015 p = s.Prog(ppc64.AADD)
1016 p.Reg = v.Args[0].Reg()
1017 p.From.Type = obj.TYPE_CONST
1019 p.To.Type = obj.TYPE_REG
1020 p.To.Reg = v.Args[0].Reg()
1022 // Branch back to top of loop
1024 // BC with BO_BCTR generates bdnz
1025 p = s.Prog(ppc64.ABC)
1026 p.From.Type = obj.TYPE_CONST
1027 p.From.Offset = ppc64.BO_BCTR
1028 p.Reg = ppc64.REG_R0
1029 p.To.Type = obj.TYPE_BRANCH
1032 // When ctr == 1 the loop was not generated but
1033 // there are at least 64 bytes to clear, so add
1034 // that to the remainder to generate the code
1035 // to clear those doublewords
1040 // Clear the remainder starting at offset zero
1043 if rem >= 16 && ctr <= 1 {
1044 // If the XXLXOR hasn't already been
1045 // generated, do it here to initialize
1047 p := s.Prog(ppc64.AXXLXOR)
1048 p.From.Type = obj.TYPE_REG
1049 p.From.Reg = ppc64.REG_VS32
1050 p.To.Type = obj.TYPE_REG
1051 p.To.Reg = ppc64.REG_VS32
1052 p.Reg = ppc64.REG_VS32
1054 // Generate STXV for 32 or 64
1057 p := s.Prog(ppc64.ASTXV)
1058 p.From.Type = obj.TYPE_REG
1059 p.From.Reg = ppc64.REG_VS32
1060 p.To.Type = obj.TYPE_MEM
1061 p.To.Reg = v.Args[0].Reg()
1062 p.To.Offset = offset
1064 p = s.Prog(ppc64.ASTXV)
1065 p.From.Type = obj.TYPE_REG
1066 p.From.Reg = ppc64.REG_VS32
1067 p.To.Type = obj.TYPE_MEM
1068 p.To.Reg = v.Args[0].Reg()
1069 p.To.Offset = offset + 16
1073 // Generate 16 bytes
1075 p := s.Prog(ppc64.ASTXV)
1076 p.From.Type = obj.TYPE_REG
1077 p.From.Reg = ppc64.REG_VS32
1078 p.To.Type = obj.TYPE_MEM
1079 p.To.Reg = v.Args[0].Reg()
1080 p.To.Offset = offset
1085 // first clear as many doublewords as possible
1086 // then clear remaining sizes as available
1088 op, size := ppc64.AMOVB, int64(1)
1091 op, size = ppc64.AMOVD, 8
1093 op, size = ppc64.AMOVW, 4
1095 op, size = ppc64.AMOVH, 2
1098 p.From.Type = obj.TYPE_REG
1099 p.From.Reg = ppc64.REG_R0
1100 p.To.Type = obj.TYPE_MEM
1101 p.To.Reg = v.Args[0].Reg()
1102 p.To.Offset = offset
1107 case ssa.OpPPC64LoweredZero, ssa.OpPPC64LoweredZeroShort:
1109 // Unaligned data doesn't hurt performance
1110 // for these instructions on power8.
1112 // For sizes >= 64 generate a loop as follows:
1114 // Set up loop counter in CTR, used by BC
1115 // XXLXOR VS32,VS32,VS32
1116 // MOVD len/32,REG_TMP
1120 // STXVD2X VS32,(R0)(R20)
1121 // STXVD2X VS32,(R31)(R20)
1125 // any remainder is done as described below
1127 // for sizes < 64 bytes, first clear as many doublewords as possible,
1128 // then handle the remainder
1133 // the remainder bytes are cleared using one or more
1134 // of the following instructions with the appropriate
1135 // offsets depending which instructions are needed
1137 // MOVW R0,n1(R20) 4 bytes
1138 // MOVH R0,n2(R20) 2 bytes
1139 // MOVB R0,n3(R20) 1 byte
1141 // 7 bytes: MOVW, MOVH, MOVB
1142 // 6 bytes: MOVW, MOVH
1143 // 5 bytes: MOVW, MOVB
1144 // 3 bytes: MOVH, MOVB
1146 // each loop iteration does 32 bytes
1147 ctr := v.AuxInt / 32
1150 rem := v.AuxInt % 32
1152 // only generate a loop if there is more
1153 // than 1 iteration.
1155 // Set up VS32 (V0) to hold 0s
1156 p := s.Prog(ppc64.AXXLXOR)
1157 p.From.Type = obj.TYPE_REG
1158 p.From.Reg = ppc64.REG_VS32
1159 p.To.Type = obj.TYPE_REG
1160 p.To.Reg = ppc64.REG_VS32
1161 p.Reg = ppc64.REG_VS32
1163 // Set up CTR loop counter
1164 p = s.Prog(ppc64.AMOVD)
1165 p.From.Type = obj.TYPE_CONST
1167 p.To.Type = obj.TYPE_REG
1168 p.To.Reg = ppc64.REGTMP
1170 p = s.Prog(ppc64.AMOVD)
1171 p.From.Type = obj.TYPE_REG
1172 p.From.Reg = ppc64.REGTMP
1173 p.To.Type = obj.TYPE_REG
1174 p.To.Reg = ppc64.REG_CTR
1176 // Set up R31 to hold index value 16
1177 p = s.Prog(ppc64.AMOVD)
1178 p.From.Type = obj.TYPE_CONST
1180 p.To.Type = obj.TYPE_REG
1181 p.To.Reg = ppc64.REGTMP
1183 // Don't add padding for alignment
1184 // with few loop iterations.
1186 p = s.Prog(obj.APCALIGN)
1187 p.From.Type = obj.TYPE_CONST
1191 // generate 2 STXVD2Xs to store 16 bytes
1192 // when this is a loop then the top must be saved
1194 // This is the top of loop
1196 p = s.Prog(ppc64.ASTXVD2X)
1197 p.From.Type = obj.TYPE_REG
1198 p.From.Reg = ppc64.REG_VS32
1199 p.To.Type = obj.TYPE_MEM
1200 p.To.Reg = v.Args[0].Reg()
1201 p.To.Index = ppc64.REGZERO
1202 // Save the top of loop
1206 p = s.Prog(ppc64.ASTXVD2X)
1207 p.From.Type = obj.TYPE_REG
1208 p.From.Reg = ppc64.REG_VS32
1209 p.To.Type = obj.TYPE_MEM
1210 p.To.Reg = v.Args[0].Reg()
1211 p.To.Index = ppc64.REGTMP
1213 // Increment address for the
1214 // 4 doublewords just zeroed.
1215 p = s.Prog(ppc64.AADD)
1216 p.Reg = v.Args[0].Reg()
1217 p.From.Type = obj.TYPE_CONST
1219 p.To.Type = obj.TYPE_REG
1220 p.To.Reg = v.Args[0].Reg()
1222 // Branch back to top of loop
1224 // BC with BO_BCTR generates bdnz
1225 p = s.Prog(ppc64.ABC)
1226 p.From.Type = obj.TYPE_CONST
1227 p.From.Offset = ppc64.BO_BCTR
1228 p.Reg = ppc64.REG_R0
1229 p.To.Type = obj.TYPE_BRANCH
1233 // when ctr == 1 the loop was not generated but
1234 // there are at least 32 bytes to clear, so add
1235 // that to the remainder to generate the code
1236 // to clear those doublewords
1241 // clear the remainder starting at offset zero
1244 // first clear as many doublewords as possible
1245 // then clear remaining sizes as available
1247 op, size := ppc64.AMOVB, int64(1)
1250 op, size = ppc64.AMOVD, 8
1252 op, size = ppc64.AMOVW, 4
1254 op, size = ppc64.AMOVH, 2
1257 p.From.Type = obj.TYPE_REG
1258 p.From.Reg = ppc64.REG_R0
1259 p.To.Type = obj.TYPE_MEM
1260 p.To.Reg = v.Args[0].Reg()
1261 p.To.Offset = offset
1266 case ssa.OpPPC64LoweredMove, ssa.OpPPC64LoweredMoveShort:
1268 bytesPerLoop := int64(32)
1269 // This will be used when moving more
1270 // than 8 bytes. Moves start with
1271 // as many 8 byte moves as possible, then
1272 // 4, 2, or 1 byte(s) as remaining. This will
1273 // work and be efficient for power8 or later.
1274 // If there are 64 or more bytes, then a
1275 // loop is generated to move 32 bytes and
1276 // update the src and dst addresses on each
1277 // iteration. When < 64 bytes, the appropriate
1278 // number of moves are generated based on the
1280 // When moving >= 64 bytes a loop is used
1281 // MOVD len/32,REG_TMP
1285 // LXVD2X (R0)(R21),VS32
1286 // LXVD2X (R31)(R21),VS33
1288 // STXVD2X VS32,(R0)(R20)
1289 // STXVD2X VS33,(R31)(R20)
1292 // Bytes not moved by this loop are moved
1293 // with a combination of the following instructions,
1294 // starting with the largest sizes and generating as
1295 // many as needed, using the appropriate offset value.
1305 // Each loop iteration moves 32 bytes
1306 ctr := v.AuxInt / bytesPerLoop
1308 // Remainder after the loop
1309 rem := v.AuxInt % bytesPerLoop
1311 dstReg := v.Args[0].Reg()
1312 srcReg := v.Args[1].Reg()
1314 // The set of registers used here, must match the clobbered reg list
1320 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1323 p := s.Prog(ppc64.AMOVD)
1324 p.From.Type = obj.TYPE_CONST
1326 p.To.Type = obj.TYPE_REG
1327 p.To.Reg = ppc64.REGTMP
1329 p = s.Prog(ppc64.AMOVD)
1330 p.From.Type = obj.TYPE_REG
1331 p.From.Reg = ppc64.REGTMP
1332 p.To.Type = obj.TYPE_REG
1333 p.To.Reg = ppc64.REG_CTR
1335 // Use REGTMP as index reg
1336 p = s.Prog(ppc64.AMOVD)
1337 p.From.Type = obj.TYPE_CONST
1339 p.To.Type = obj.TYPE_REG
1340 p.To.Reg = ppc64.REGTMP
1342 // Don't adding padding for
1343 // alignment with small iteration
1346 p = s.Prog(obj.APCALIGN)
1347 p.From.Type = obj.TYPE_CONST
1351 // Generate 16 byte loads and stores.
1352 // Use temp register for index (16)
1353 // on the second one.
1355 p = s.Prog(ppc64.ALXVD2X)
1356 p.From.Type = obj.TYPE_MEM
1358 p.From.Index = ppc64.REGZERO
1359 p.To.Type = obj.TYPE_REG
1360 p.To.Reg = ppc64.REG_VS32
1364 p = s.Prog(ppc64.ALXVD2X)
1365 p.From.Type = obj.TYPE_MEM
1367 p.From.Index = ppc64.REGTMP
1368 p.To.Type = obj.TYPE_REG
1369 p.To.Reg = ppc64.REG_VS33
1371 // increment the src reg for next iteration
1372 p = s.Prog(ppc64.AADD)
1374 p.From.Type = obj.TYPE_CONST
1375 p.From.Offset = bytesPerLoop
1376 p.To.Type = obj.TYPE_REG
1379 // generate 16 byte stores
1380 p = s.Prog(ppc64.ASTXVD2X)
1381 p.From.Type = obj.TYPE_REG
1382 p.From.Reg = ppc64.REG_VS32
1383 p.To.Type = obj.TYPE_MEM
1385 p.To.Index = ppc64.REGZERO
1387 p = s.Prog(ppc64.ASTXVD2X)
1388 p.From.Type = obj.TYPE_REG
1389 p.From.Reg = ppc64.REG_VS33
1390 p.To.Type = obj.TYPE_MEM
1392 p.To.Index = ppc64.REGTMP
1394 // increment the dst reg for next iteration
1395 p = s.Prog(ppc64.AADD)
1397 p.From.Type = obj.TYPE_CONST
1398 p.From.Offset = bytesPerLoop
1399 p.To.Type = obj.TYPE_REG
1402 // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1404 p = s.Prog(ppc64.ABC)
1405 p.From.Type = obj.TYPE_CONST
1406 p.From.Offset = ppc64.BO_BCTR
1407 p.Reg = ppc64.REG_R0
1408 p.To.Type = obj.TYPE_BRANCH
1411 // srcReg and dstReg were incremented in the loop, so
1412 // later instructions start with offset 0.
1416 // No loop was generated for one iteration, so
1417 // add 32 bytes to the remainder to move those bytes.
1423 // Generate 16 byte loads and stores.
1424 // Use temp register for index (value 16)
1425 // on the second one.
1426 p := s.Prog(ppc64.ALXVD2X)
1427 p.From.Type = obj.TYPE_MEM
1429 p.From.Index = ppc64.REGZERO
1430 p.To.Type = obj.TYPE_REG
1431 p.To.Reg = ppc64.REG_VS32
1433 p = s.Prog(ppc64.ASTXVD2X)
1434 p.From.Type = obj.TYPE_REG
1435 p.From.Reg = ppc64.REG_VS32
1436 p.To.Type = obj.TYPE_MEM
1438 p.To.Index = ppc64.REGZERO
1444 // Use REGTMP as index reg
1445 p := s.Prog(ppc64.AMOVD)
1446 p.From.Type = obj.TYPE_CONST
1448 p.To.Type = obj.TYPE_REG
1449 p.To.Reg = ppc64.REGTMP
1451 p = s.Prog(ppc64.ALXVD2X)
1452 p.From.Type = obj.TYPE_MEM
1454 p.From.Index = ppc64.REGTMP
1455 p.To.Type = obj.TYPE_REG
1456 p.To.Reg = ppc64.REG_VS32
1458 p = s.Prog(ppc64.ASTXVD2X)
1459 p.From.Type = obj.TYPE_REG
1460 p.From.Reg = ppc64.REG_VS32
1461 p.To.Type = obj.TYPE_MEM
1463 p.To.Index = ppc64.REGTMP
1470 // Generate all the remaining load and store pairs, starting with
1471 // as many 8 byte moves as possible, then 4, 2, 1.
1473 op, size := ppc64.AMOVB, int64(1)
1476 op, size = ppc64.AMOVD, 8
1478 op, size = ppc64.AMOVW, 4
1480 op, size = ppc64.AMOVH, 2
1484 p.To.Type = obj.TYPE_REG
1485 p.To.Reg = ppc64.REGTMP
1486 p.From.Type = obj.TYPE_MEM
1488 p.From.Offset = offset
1492 p.From.Type = obj.TYPE_REG
1493 p.From.Reg = ppc64.REGTMP
1494 p.To.Type = obj.TYPE_MEM
1496 p.To.Offset = offset
1501 case ssa.OpPPC64LoweredQuadMove, ssa.OpPPC64LoweredQuadMoveShort:
1502 bytesPerLoop := int64(64)
1503 // This is used when moving more
1504 // than 8 bytes on power9. Moves start with
1505 // as many 8 byte moves as possible, then
1506 // 4, 2, or 1 byte(s) as remaining. This will
1507 // work and be efficient for power8 or later.
1508 // If there are 64 or more bytes, then a
1509 // loop is generated to move 32 bytes and
1510 // update the src and dst addresses on each
1511 // iteration. When < 64 bytes, the appropriate
1512 // number of moves are generated based on the
1514 // When moving >= 64 bytes a loop is used
1515 // MOVD len/32,REG_TMP
1522 // STXV VS33,16(R20)
1525 // Bytes not moved by this loop are moved
1526 // with a combination of the following instructions,
1527 // starting with the largest sizes and generating as
1528 // many as needed, using the appropriate offset value.
1538 // Each loop iteration moves 32 bytes
1539 ctr := v.AuxInt / bytesPerLoop
1541 // Remainder after the loop
1542 rem := v.AuxInt % bytesPerLoop
1544 dstReg := v.Args[0].Reg()
1545 srcReg := v.Args[1].Reg()
1552 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1555 p := s.Prog(ppc64.AMOVD)
1556 p.From.Type = obj.TYPE_CONST
1558 p.To.Type = obj.TYPE_REG
1559 p.To.Reg = ppc64.REGTMP
1561 p = s.Prog(ppc64.AMOVD)
1562 p.From.Type = obj.TYPE_REG
1563 p.From.Reg = ppc64.REGTMP
1564 p.To.Type = obj.TYPE_REG
1565 p.To.Reg = ppc64.REG_CTR
1567 p = s.Prog(obj.APCALIGN)
1568 p.From.Type = obj.TYPE_CONST
1571 // Generate 16 byte loads and stores.
1572 p = s.Prog(ppc64.ALXV)
1573 p.From.Type = obj.TYPE_MEM
1575 p.From.Offset = offset
1576 p.To.Type = obj.TYPE_REG
1577 p.To.Reg = ppc64.REG_VS32
1581 p = s.Prog(ppc64.ALXV)
1582 p.From.Type = obj.TYPE_MEM
1584 p.From.Offset = offset + 16
1585 p.To.Type = obj.TYPE_REG
1586 p.To.Reg = ppc64.REG_VS33
1588 // generate 16 byte stores
1589 p = s.Prog(ppc64.ASTXV)
1590 p.From.Type = obj.TYPE_REG
1591 p.From.Reg = ppc64.REG_VS32
1592 p.To.Type = obj.TYPE_MEM
1594 p.To.Offset = offset
1596 p = s.Prog(ppc64.ASTXV)
1597 p.From.Type = obj.TYPE_REG
1598 p.From.Reg = ppc64.REG_VS33
1599 p.To.Type = obj.TYPE_MEM
1601 p.To.Offset = offset + 16
1603 // Generate 16 byte loads and stores.
1604 p = s.Prog(ppc64.ALXV)
1605 p.From.Type = obj.TYPE_MEM
1607 p.From.Offset = offset + 32
1608 p.To.Type = obj.TYPE_REG
1609 p.To.Reg = ppc64.REG_VS32
1611 p = s.Prog(ppc64.ALXV)
1612 p.From.Type = obj.TYPE_MEM
1614 p.From.Offset = offset + 48
1615 p.To.Type = obj.TYPE_REG
1616 p.To.Reg = ppc64.REG_VS33
1618 // generate 16 byte stores
1619 p = s.Prog(ppc64.ASTXV)
1620 p.From.Type = obj.TYPE_REG
1621 p.From.Reg = ppc64.REG_VS32
1622 p.To.Type = obj.TYPE_MEM
1624 p.To.Offset = offset + 32
1626 p = s.Prog(ppc64.ASTXV)
1627 p.From.Type = obj.TYPE_REG
1628 p.From.Reg = ppc64.REG_VS33
1629 p.To.Type = obj.TYPE_MEM
1631 p.To.Offset = offset + 48
1633 // increment the src reg for next iteration
1634 p = s.Prog(ppc64.AADD)
1636 p.From.Type = obj.TYPE_CONST
1637 p.From.Offset = bytesPerLoop
1638 p.To.Type = obj.TYPE_REG
1641 // increment the dst reg for next iteration
1642 p = s.Prog(ppc64.AADD)
1644 p.From.Type = obj.TYPE_CONST
1645 p.From.Offset = bytesPerLoop
1646 p.To.Type = obj.TYPE_REG
1649 // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1651 p = s.Prog(ppc64.ABC)
1652 p.From.Type = obj.TYPE_CONST
1653 p.From.Offset = ppc64.BO_BCTR
1654 p.Reg = ppc64.REG_R0
1655 p.To.Type = obj.TYPE_BRANCH
1658 // srcReg and dstReg were incremented in the loop, so
1659 // later instructions start with offset 0.
1663 // No loop was generated for one iteration, so
1664 // add 32 bytes to the remainder to move those bytes.
1669 p := s.Prog(ppc64.ALXV)
1670 p.From.Type = obj.TYPE_MEM
1672 p.To.Type = obj.TYPE_REG
1673 p.To.Reg = ppc64.REG_VS32
1675 p = s.Prog(ppc64.ALXV)
1676 p.From.Type = obj.TYPE_MEM
1679 p.To.Type = obj.TYPE_REG
1680 p.To.Reg = ppc64.REG_VS33
1682 p = s.Prog(ppc64.ASTXV)
1683 p.From.Type = obj.TYPE_REG
1684 p.From.Reg = ppc64.REG_VS32
1685 p.To.Type = obj.TYPE_MEM
1688 p = s.Prog(ppc64.ASTXV)
1689 p.From.Type = obj.TYPE_REG
1690 p.From.Reg = ppc64.REG_VS33
1691 p.To.Type = obj.TYPE_MEM
1700 // Generate 16 byte loads and stores.
1701 p := s.Prog(ppc64.ALXV)
1702 p.From.Type = obj.TYPE_MEM
1704 p.From.Offset = offset
1705 p.To.Type = obj.TYPE_REG
1706 p.To.Reg = ppc64.REG_VS32
1708 p = s.Prog(ppc64.ASTXV)
1709 p.From.Type = obj.TYPE_REG
1710 p.From.Reg = ppc64.REG_VS32
1711 p.To.Type = obj.TYPE_MEM
1713 p.To.Offset = offset
1719 p := s.Prog(ppc64.ALXV)
1720 p.From.Type = obj.TYPE_MEM
1722 p.From.Offset = offset
1723 p.To.Type = obj.TYPE_REG
1724 p.To.Reg = ppc64.REG_VS32
1726 p = s.Prog(ppc64.ASTXV)
1727 p.From.Type = obj.TYPE_REG
1728 p.From.Reg = ppc64.REG_VS32
1729 p.To.Type = obj.TYPE_MEM
1731 p.To.Offset = offset
1737 // Generate all the remaining load and store pairs, starting with
1738 // as many 8 byte moves as possible, then 4, 2, 1.
1740 op, size := ppc64.AMOVB, int64(1)
1743 op, size = ppc64.AMOVD, 8
1745 op, size = ppc64.AMOVW, 4
1747 op, size = ppc64.AMOVH, 2
1751 p.To.Type = obj.TYPE_REG
1752 p.To.Reg = ppc64.REGTMP
1753 p.From.Type = obj.TYPE_MEM
1755 p.From.Offset = offset
1759 p.From.Type = obj.TYPE_REG
1760 p.From.Reg = ppc64.REGTMP
1761 p.To.Type = obj.TYPE_MEM
1763 p.To.Offset = offset
1768 case ssa.OpPPC64CALLstatic:
1771 case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter:
1772 p := s.Prog(ppc64.AMOVD)
1773 p.From.Type = obj.TYPE_REG
1774 p.From.Reg = v.Args[0].Reg()
1775 p.To.Type = obj.TYPE_REG
1776 p.To.Reg = ppc64.REG_LR
1778 if v.Args[0].Reg() != ppc64.REG_R12 {
1779 v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg)
1783 pp.To.Reg = ppc64.REG_LR
1785 // Insert a hint this is not a subroutine return.
1786 pp.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: 1})
1788 if base.Ctxt.Flag_shared {
1789 // When compiling Go into PIC, the function we just
1790 // called via pointer might have been implemented in
1791 // a separate module and so overwritten the TOC
1792 // pointer in R2; reload it.
1793 q := s.Prog(ppc64.AMOVD)
1794 q.From.Type = obj.TYPE_MEM
1796 q.From.Reg = ppc64.REGSP
1797 q.To.Type = obj.TYPE_REG
1798 q.To.Reg = ppc64.REG_R2
1801 case ssa.OpPPC64LoweredWB:
1802 p := s.Prog(obj.ACALL)
1803 p.To.Type = obj.TYPE_MEM
1804 p.To.Name = obj.NAME_EXTERN
1805 p.To.Sym = v.Aux.(*obj.LSym)
1807 case ssa.OpPPC64LoweredPanicBoundsA, ssa.OpPPC64LoweredPanicBoundsB, ssa.OpPPC64LoweredPanicBoundsC:
1808 p := s.Prog(obj.ACALL)
1809 p.To.Type = obj.TYPE_MEM
1810 p.To.Name = obj.NAME_EXTERN
1811 p.To.Sym = gc.BoundsCheckFunc[v.AuxInt]
1812 s.UseArgs(16) // space used in callee args area by assembly stubs
1814 case ssa.OpPPC64LoweredNilCheck:
1815 if objabi.GOOS == "aix" {
1819 // NOP (so the BNE has somewhere to land)
1822 p := s.Prog(ppc64.ACMP)
1823 p.From.Type = obj.TYPE_REG
1824 p.From.Reg = v.Args[0].Reg()
1825 p.To.Type = obj.TYPE_REG
1826 p.To.Reg = ppc64.REG_R0
1829 p2 := s.Prog(ppc64.ABNE)
1830 p2.To.Type = obj.TYPE_BRANCH
1833 // Write at 0 is forbidden and will trigger a SIGSEGV
1834 p = s.Prog(ppc64.AMOVW)
1835 p.From.Type = obj.TYPE_REG
1836 p.From.Reg = ppc64.REG_R0
1837 p.To.Type = obj.TYPE_MEM
1838 p.To.Reg = ppc64.REG_R0
1840 // NOP (so the BNE has somewhere to land)
1841 nop := s.Prog(obj.ANOP)
1845 // Issue a load which will fault if arg is nil.
1846 p := s.Prog(ppc64.AMOVBZ)
1847 p.From.Type = obj.TYPE_MEM
1848 p.From.Reg = v.Args[0].Reg()
1849 gc.AddAux(&p.From, v)
1850 p.To.Type = obj.TYPE_REG
1851 p.To.Reg = ppc64.REGTMP
1853 if logopt.Enabled() {
1854 logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
1856 if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
1857 base.WarnfAt(v.Pos, "generated nil check")
1860 // These should be resolved by rules and not make it here.
1861 case ssa.OpPPC64Equal, ssa.OpPPC64NotEqual, ssa.OpPPC64LessThan, ssa.OpPPC64FLessThan,
1862 ssa.OpPPC64LessEqual, ssa.OpPPC64GreaterThan, ssa.OpPPC64FGreaterThan, ssa.OpPPC64GreaterEqual,
1863 ssa.OpPPC64FLessEqual, ssa.OpPPC64FGreaterEqual:
1864 v.Fatalf("Pseudo-op should not make it to codegen: %s ###\n", v.LongString())
1865 case ssa.OpPPC64InvertFlags:
1866 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
1867 case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT:
1868 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
1870 // TODO: implement for clobberdead experiment. Nop is ok for now.
1872 v.Fatalf("genValue not implemented: %s", v.LongString())
1876 var blockJump = [...]struct {
1878 asmeq, invasmun bool
1880 ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false},
1881 ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false},
1883 ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false},
1884 ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false},
1885 ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false},
1886 ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false},
1888 // TODO: need to work FP comparisons into block jumps
1889 ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false},
1890 ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN
1891 ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN
1892 ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false},
1895 func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
1897 case ssa.BlockDefer:
1898 // defer returns in R3:
1899 // 0 if we should continue executing
1900 // 1 if we should jump to deferreturn call
1901 p := s.Prog(ppc64.ACMP)
1902 p.From.Type = obj.TYPE_REG
1903 p.From.Reg = ppc64.REG_R3
1904 p.To.Type = obj.TYPE_REG
1905 p.To.Reg = ppc64.REG_R0
1907 p = s.Prog(ppc64.ABNE)
1908 p.To.Type = obj.TYPE_BRANCH
1909 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
1910 if b.Succs[0].Block() != next {
1911 p := s.Prog(obj.AJMP)
1912 p.To.Type = obj.TYPE_BRANCH
1913 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
1916 case ssa.BlockPlain:
1917 if b.Succs[0].Block() != next {
1918 p := s.Prog(obj.AJMP)
1919 p.To.Type = obj.TYPE_BRANCH
1920 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
1925 case ssa.BlockRetJmp:
1926 p := s.Prog(obj.AJMP)
1927 p.To.Type = obj.TYPE_MEM
1928 p.To.Name = obj.NAME_EXTERN
1929 p.To.Sym = b.Aux.(*obj.LSym)
1931 case ssa.BlockPPC64EQ, ssa.BlockPPC64NE,
1932 ssa.BlockPPC64LT, ssa.BlockPPC64GE,
1933 ssa.BlockPPC64LE, ssa.BlockPPC64GT,
1934 ssa.BlockPPC64FLT, ssa.BlockPPC64FGE,
1935 ssa.BlockPPC64FLE, ssa.BlockPPC64FGT:
1936 jmp := blockJump[b.Kind]
1938 case b.Succs[0].Block():
1939 s.Br(jmp.invasm, b.Succs[1].Block())
1941 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
1942 s.Br(ppc64.ABVS, b.Succs[1].Block())
1944 case b.Succs[1].Block():
1945 s.Br(jmp.asm, b.Succs[0].Block())
1947 s.Br(ppc64.ABEQ, b.Succs[0].Block())
1950 if b.Likely != ssa.BranchUnlikely {
1951 s.Br(jmp.asm, b.Succs[0].Block())
1953 s.Br(ppc64.ABEQ, b.Succs[0].Block())
1955 s.Br(obj.AJMP, b.Succs[1].Block())
1957 s.Br(jmp.invasm, b.Succs[1].Block())
1959 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
1960 s.Br(ppc64.ABVS, b.Succs[1].Block())
1962 s.Br(obj.AJMP, b.Succs[0].Block())
1966 b.Fatalf("branch not implemented: %s", b.LongString())