1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
8 "cmd/compile/internal/base"
9 "cmd/compile/internal/ir"
10 "cmd/compile/internal/logopt"
11 "cmd/compile/internal/ssa"
12 "cmd/compile/internal/ssagen"
13 "cmd/compile/internal/types"
15 "cmd/internal/obj/ppc64"
21 // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
22 func ssaMarkMoves(s *ssagen.State, b *ssa.Block) {
23 // flive := b.FlagsLiveAtEnd
24 // if b.Control != nil && b.Control.Type.IsFlags() {
27 // for i := len(b.Values) - 1; i >= 0; i-- {
29 // if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) {
30 // // The "mark" is any non-nil Aux value.
33 // if v.Type.IsFlags() {
36 // for _, a := range v.Args {
37 // if a.Type.IsFlags() {
44 // loadByType returns the load instruction of the given type.
45 func loadByType(t *types.Type) obj.As {
77 panic("bad load type")
80 // storeByType returns the store instruction of the given type.
81 func storeByType(t *types.Type) obj.As {
101 panic("bad store type")
104 func ssaGenValue(s *ssagen.State, v *ssa.Value) {
127 case ssa.OpPPC64LoweredMuluhilo:
128 // MULHDU Rarg1, Rarg0, Reg0
129 // MULLD Rarg1, Rarg0, Reg1
130 r0 := v.Args[0].Reg()
131 r1 := v.Args[1].Reg()
132 p := s.Prog(ppc64.AMULHDU)
133 p.From.Type = obj.TYPE_REG
136 p.To.Type = obj.TYPE_REG
138 p1 := s.Prog(ppc64.AMULLD)
139 p1.From.Type = obj.TYPE_REG
142 p1.To.Type = obj.TYPE_REG
145 case ssa.OpPPC64LoweredAdd64Carry:
146 // ADDC Rarg2, -1, Rtmp
147 // ADDE Rarg1, Rarg0, Reg0
149 r0 := v.Args[0].Reg()
150 r1 := v.Args[1].Reg()
151 r2 := v.Args[2].Reg()
152 p := s.Prog(ppc64.AADDC)
153 p.From.Type = obj.TYPE_CONST
156 p.To.Type = obj.TYPE_REG
157 p.To.Reg = ppc64.REGTMP
158 p1 := s.Prog(ppc64.AADDE)
159 p1.From.Type = obj.TYPE_REG
162 p1.To.Type = obj.TYPE_REG
164 p2 := s.Prog(ppc64.AADDZE)
165 p2.From.Type = obj.TYPE_REG
166 p2.From.Reg = ppc64.REGZERO
167 p2.To.Type = obj.TYPE_REG
170 case ssa.OpPPC64LoweredAtomicAnd8,
171 ssa.OpPPC64LoweredAtomicAnd32,
172 ssa.OpPPC64LoweredAtomicOr8,
173 ssa.OpPPC64LoweredAtomicOr32:
175 // LBAR/LWAR (Rarg0), Rtmp
176 // AND/OR Rarg1, Rtmp
177 // STBCCC/STWCCC Rtmp, (Rarg0)
181 if v.Op == ssa.OpPPC64LoweredAtomicAnd32 || v.Op == ssa.OpPPC64LoweredAtomicOr32 {
185 r0 := v.Args[0].Reg()
186 r1 := v.Args[1].Reg()
187 // LWSYNC - Assuming shared data not write-through-required nor
188 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
189 plwsync := s.Prog(ppc64.ALWSYNC)
190 plwsync.To.Type = obj.TYPE_NONE
193 p.From.Type = obj.TYPE_MEM
195 p.To.Type = obj.TYPE_REG
196 p.To.Reg = ppc64.REGTMP
198 p1 := s.Prog(v.Op.Asm())
199 p1.From.Type = obj.TYPE_REG
201 p1.To.Type = obj.TYPE_REG
202 p1.To.Reg = ppc64.REGTMP
205 p2.From.Type = obj.TYPE_REG
206 p2.From.Reg = ppc64.REGTMP
207 p2.To.Type = obj.TYPE_MEM
209 p2.RegTo2 = ppc64.REGTMP
211 p3 := s.Prog(ppc64.ABNE)
212 p3.To.Type = obj.TYPE_BRANCH
215 case ssa.OpPPC64LoweredAtomicAdd32,
216 ssa.OpPPC64LoweredAtomicAdd64:
218 // LDAR/LWAR (Rarg0), Rout
220 // STDCCC/STWCCC Rout, (Rarg0)
222 // MOVW Rout,Rout (if Add32)
225 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
229 r0 := v.Args[0].Reg()
230 r1 := v.Args[1].Reg()
232 // LWSYNC - Assuming shared data not write-through-required nor
233 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
234 plwsync := s.Prog(ppc64.ALWSYNC)
235 plwsync.To.Type = obj.TYPE_NONE
238 p.From.Type = obj.TYPE_MEM
240 p.To.Type = obj.TYPE_REG
243 p1 := s.Prog(ppc64.AADD)
244 p1.From.Type = obj.TYPE_REG
247 p1.To.Type = obj.TYPE_REG
250 p3.From.Type = obj.TYPE_REG
252 p3.To.Type = obj.TYPE_MEM
255 p4 := s.Prog(ppc64.ABNE)
256 p4.To.Type = obj.TYPE_BRANCH
259 // Ensure a 32 bit result
260 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
261 p5 := s.Prog(ppc64.AMOVWZ)
262 p5.To.Type = obj.TYPE_REG
264 p5.From.Type = obj.TYPE_REG
268 case ssa.OpPPC64LoweredAtomicExchange32,
269 ssa.OpPPC64LoweredAtomicExchange64:
271 // LDAR/LWAR (Rarg0), Rout
272 // STDCCC/STWCCC Rout, (Rarg0)
277 if v.Op == ssa.OpPPC64LoweredAtomicExchange32 {
281 r0 := v.Args[0].Reg()
282 r1 := v.Args[1].Reg()
284 // LWSYNC - Assuming shared data not write-through-required nor
285 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
286 plwsync := s.Prog(ppc64.ALWSYNC)
287 plwsync.To.Type = obj.TYPE_NONE
290 p.From.Type = obj.TYPE_MEM
292 p.To.Type = obj.TYPE_REG
296 p1.From.Type = obj.TYPE_REG
298 p1.To.Type = obj.TYPE_MEM
301 p2 := s.Prog(ppc64.ABNE)
302 p2.To.Type = obj.TYPE_BRANCH
305 pisync := s.Prog(ppc64.AISYNC)
306 pisync.To.Type = obj.TYPE_NONE
308 case ssa.OpPPC64LoweredAtomicLoad8,
309 ssa.OpPPC64LoweredAtomicLoad32,
310 ssa.OpPPC64LoweredAtomicLoad64,
311 ssa.OpPPC64LoweredAtomicLoadPtr:
313 // MOVB/MOVD/MOVW (Rarg0), Rout
320 case ssa.OpPPC64LoweredAtomicLoad8:
322 case ssa.OpPPC64LoweredAtomicLoad32:
326 arg0 := v.Args[0].Reg()
328 // SYNC when AuxInt == 1; otherwise, load-acquire
330 psync := s.Prog(ppc64.ASYNC)
331 psync.To.Type = obj.TYPE_NONE
335 p.From.Type = obj.TYPE_MEM
337 p.To.Type = obj.TYPE_REG
341 p1.From.Type = obj.TYPE_REG
343 p1.To.Type = obj.TYPE_REG
346 p2 := s.Prog(ppc64.ABNE)
347 p2.To.Type = obj.TYPE_BRANCH
349 pisync := s.Prog(ppc64.AISYNC)
350 pisync.To.Type = obj.TYPE_NONE
351 p2.To.SetTarget(pisync)
353 case ssa.OpPPC64LoweredAtomicStore8,
354 ssa.OpPPC64LoweredAtomicStore32,
355 ssa.OpPPC64LoweredAtomicStore64:
357 // MOVB/MOVW/MOVD arg1,(arg0)
360 case ssa.OpPPC64LoweredAtomicStore8:
362 case ssa.OpPPC64LoweredAtomicStore32:
365 arg0 := v.Args[0].Reg()
366 arg1 := v.Args[1].Reg()
367 // If AuxInt == 0, LWSYNC (Store-Release), else SYNC
369 syncOp := ppc64.ASYNC
371 syncOp = ppc64.ALWSYNC
373 psync := s.Prog(syncOp)
374 psync.To.Type = obj.TYPE_NONE
377 p.To.Type = obj.TYPE_MEM
379 p.From.Type = obj.TYPE_REG
382 case ssa.OpPPC64LoweredAtomicCas64,
383 ssa.OpPPC64LoweredAtomicCas32:
386 // LDAR (Rarg0), MutexHint, Rtmp
389 // STDCCC Rarg2, (Rarg0)
391 // LWSYNC // Only for sequential consistency; not required in CasRel.
400 if v.Op == ssa.OpPPC64LoweredAtomicCas32 {
405 r0 := v.Args[0].Reg()
406 r1 := v.Args[1].Reg()
407 r2 := v.Args[2].Reg()
409 // LWSYNC - Assuming shared data not write-through-required nor
410 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
411 plwsync1 := s.Prog(ppc64.ALWSYNC)
412 plwsync1.To.Type = obj.TYPE_NONE
415 p.From.Type = obj.TYPE_MEM
417 p.To.Type = obj.TYPE_REG
418 p.To.Reg = ppc64.REGTMP
419 // If it is a Compare-and-Swap-Release operation, set the EH field with
426 p1.From.Type = obj.TYPE_REG
428 p1.To.Reg = ppc64.REGTMP
429 p1.To.Type = obj.TYPE_REG
431 p2 := s.Prog(ppc64.ABNE)
432 p2.To.Type = obj.TYPE_BRANCH
435 p3.From.Type = obj.TYPE_REG
437 p3.To.Type = obj.TYPE_MEM
440 p4 := s.Prog(ppc64.ABNE)
441 p4.To.Type = obj.TYPE_BRANCH
443 // LWSYNC - Assuming shared data not write-through-required nor
444 // caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b.
445 // If the operation is a CAS-Release, then synchronization is not necessary.
447 plwsync2 := s.Prog(ppc64.ALWSYNC)
448 plwsync2.To.Type = obj.TYPE_NONE
451 p5 := s.Prog(ppc64.AMOVD)
452 p5.From.Type = obj.TYPE_CONST
454 p5.To.Type = obj.TYPE_REG
457 p6 := s.Prog(obj.AJMP)
458 p6.To.Type = obj.TYPE_BRANCH
460 p7 := s.Prog(ppc64.AMOVD)
461 p7.From.Type = obj.TYPE_CONST
463 p7.To.Type = obj.TYPE_REG
467 p8 := s.Prog(obj.ANOP)
470 case ssa.OpPPC64LoweredGetClosurePtr:
471 // Closure pointer is R11 (already)
472 ssagen.CheckLoweredGetClosurePtr(v)
474 case ssa.OpPPC64LoweredGetCallerSP:
475 // caller's SP is FixedFrameSize below the address of the first arg
476 p := s.Prog(ppc64.AMOVD)
477 p.From.Type = obj.TYPE_ADDR
478 p.From.Offset = -base.Ctxt.FixedFrameSize()
479 p.From.Name = obj.NAME_PARAM
480 p.To.Type = obj.TYPE_REG
483 case ssa.OpPPC64LoweredGetCallerPC:
484 p := s.Prog(obj.AGETCALLERPC)
485 p.To.Type = obj.TYPE_REG
488 case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F:
489 // input is already rounded
492 loadOp := loadByType(v.Type)
494 ssagen.AddrAuto(&p.From, v.Args[0])
495 p.To.Type = obj.TYPE_REG
499 storeOp := storeByType(v.Type)
501 p.From.Type = obj.TYPE_REG
502 p.From.Reg = v.Args[0].Reg()
503 ssagen.AddrAuto(&p.To, v)
505 case ssa.OpArgIntReg, ssa.OpArgFloatReg:
506 ssagen.CheckArgReg(v)
508 case ssa.OpPPC64DIVD:
518 r0 := v.Args[0].Reg()
519 r1 := v.Args[1].Reg()
521 p := s.Prog(ppc64.ACMP)
522 p.From.Type = obj.TYPE_REG
524 p.To.Type = obj.TYPE_CONST
527 pbahead := s.Prog(ppc64.ABEQ)
528 pbahead.To.Type = obj.TYPE_BRANCH
530 p = s.Prog(v.Op.Asm())
531 p.From.Type = obj.TYPE_REG
534 p.To.Type = obj.TYPE_REG
537 pbover := s.Prog(obj.AJMP)
538 pbover.To.Type = obj.TYPE_BRANCH
540 p = s.Prog(ppc64.ANEG)
541 p.To.Type = obj.TYPE_REG
543 p.From.Type = obj.TYPE_REG
545 pbahead.To.SetTarget(p)
548 pbover.To.SetTarget(p)
550 case ssa.OpPPC64DIVW:
551 // word-width version of above
553 r0 := v.Args[0].Reg()
554 r1 := v.Args[1].Reg()
556 p := s.Prog(ppc64.ACMPW)
557 p.From.Type = obj.TYPE_REG
559 p.To.Type = obj.TYPE_CONST
562 pbahead := s.Prog(ppc64.ABEQ)
563 pbahead.To.Type = obj.TYPE_BRANCH
565 p = s.Prog(v.Op.Asm())
566 p.From.Type = obj.TYPE_REG
569 p.To.Type = obj.TYPE_REG
572 pbover := s.Prog(obj.AJMP)
573 pbover.To.Type = obj.TYPE_BRANCH
575 p = s.Prog(ppc64.ANEG)
576 p.To.Type = obj.TYPE_REG
578 p.From.Type = obj.TYPE_REG
580 pbahead.To.SetTarget(p)
583 pbover.To.SetTarget(p)
585 case ssa.OpPPC64CLRLSLWI:
587 r1 := v.Args[0].Reg()
589 p := s.Prog(v.Op.Asm())
590 // clrlslwi ra,rs,mb,sh will become rlwinm ra,rs,sh,mb-sh,31-sh as described in ISA
591 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
592 p.SetFrom3Const(ssa.GetPPC64Shiftsh(shifts))
594 p.To.Type = obj.TYPE_REG
597 case ssa.OpPPC64CLRLSLDI:
599 r1 := v.Args[0].Reg()
601 p := s.Prog(v.Op.Asm())
602 // clrlsldi ra,rs,mb,sh will become rldic ra,rs,sh,mb-sh
603 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
604 p.SetFrom3Const(ssa.GetPPC64Shiftsh(shifts))
606 p.To.Type = obj.TYPE_REG
609 // Mask has been set as sh
610 case ssa.OpPPC64RLDICL:
612 r1 := v.Args[0].Reg()
614 p := s.Prog(v.Op.Asm())
615 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)}
616 p.SetFrom3Const(ssa.GetPPC64Shiftmb(shifts))
618 p.To.Type = obj.TYPE_REG
621 case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS,
622 ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU,
623 ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW,
624 ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
625 ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
626 ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
627 ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV,
628 ssa.OpPPC64MODUD, ssa.OpPPC64MODSD, ssa.OpPPC64MODUW, ssa.OpPPC64MODSW:
630 r1 := v.Args[0].Reg()
631 r2 := v.Args[1].Reg()
632 p := s.Prog(v.Op.Asm())
633 p.From.Type = obj.TYPE_REG
636 p.To.Type = obj.TYPE_REG
639 case ssa.OpPPC64ANDCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC:
640 r1 := v.Args[0].Reg()
641 r2 := v.Args[1].Reg()
642 p := s.Prog(v.Op.Asm())
643 p.From.Type = obj.TYPE_REG
646 p.To.Type = obj.TYPE_REG
647 p.To.Reg = ppc64.REGTMP // result is not needed
649 case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
650 p := s.Prog(v.Op.Asm())
651 p.From.Type = obj.TYPE_CONST
652 p.From.Offset = v.AuxInt
653 p.Reg = v.Args[0].Reg()
654 p.To.Type = obj.TYPE_REG
657 // Auxint holds encoded rotate + mask
658 case ssa.OpPPC64RLWINM, ssa.OpPPC64RLWMI:
659 rot, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
660 p := s.Prog(v.Op.Asm())
661 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
662 p.Reg = v.Args[0].Reg()
663 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: int64(rot)}
664 p.SetRestArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}})
667 case ssa.OpPPC64RLWNM:
668 _, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
669 p := s.Prog(v.Op.Asm())
670 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
671 p.Reg = v.Args[0].Reg()
672 p.From = obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[1].Reg()}
673 p.SetRestArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}})
675 case ssa.OpPPC64MADDLD:
677 r1 := v.Args[0].Reg()
678 r2 := v.Args[1].Reg()
679 r3 := v.Args[2].Reg()
681 p := s.Prog(v.Op.Asm())
682 p.From.Type = obj.TYPE_REG
686 p.To.Type = obj.TYPE_REG
689 case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
691 r1 := v.Args[0].Reg()
692 r2 := v.Args[1].Reg()
693 r3 := v.Args[2].Reg()
695 p := s.Prog(v.Op.Asm())
696 p.From.Type = obj.TYPE_REG
700 p.To.Type = obj.TYPE_REG
703 case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL,
704 ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW,
705 ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS,
706 ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD:
708 p := s.Prog(v.Op.Asm())
709 p.To.Type = obj.TYPE_REG
711 p.From.Type = obj.TYPE_REG
712 p.From.Reg = v.Args[0].Reg()
714 case ssa.OpPPC64ADDconst, ssa.OpPPC64ANDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst,
715 ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst,
716 ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst, ssa.OpPPC64EXTSWSLconst, ssa.OpPPC64MULLWconst, ssa.OpPPC64MULLDconst:
717 p := s.Prog(v.Op.Asm())
718 p.Reg = v.Args[0].Reg()
719 p.From.Type = obj.TYPE_CONST
720 p.From.Offset = v.AuxInt
721 p.To.Type = obj.TYPE_REG
724 case ssa.OpPPC64SUBFCconst:
725 p := s.Prog(v.Op.Asm())
726 p.SetFrom3Const(v.AuxInt)
727 p.From.Type = obj.TYPE_REG
728 p.From.Reg = v.Args[0].Reg()
729 p.To.Type = obj.TYPE_REG
732 case ssa.OpPPC64ANDCCconst:
733 p := s.Prog(v.Op.Asm())
734 p.Reg = v.Args[0].Reg()
735 p.From.Type = obj.TYPE_CONST
736 p.From.Offset = v.AuxInt
737 p.To.Type = obj.TYPE_REG
738 p.To.Reg = ppc64.REGTMP // discard result
740 case ssa.OpPPC64MOVDaddr:
741 switch v.Aux.(type) {
743 v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux)
745 // If aux offset and aux int are both 0, and the same
746 // input and output regs are used, no instruction
747 // needs to be generated, since it would just be
749 if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() {
750 p := s.Prog(ppc64.AMOVD)
751 p.From.Type = obj.TYPE_ADDR
752 p.From.Reg = v.Args[0].Reg()
753 p.From.Offset = v.AuxInt
754 p.To.Type = obj.TYPE_REG
758 case *obj.LSym, ir.Node:
759 p := s.Prog(ppc64.AMOVD)
760 p.From.Type = obj.TYPE_ADDR
761 p.From.Reg = v.Args[0].Reg()
762 p.To.Type = obj.TYPE_REG
764 ssagen.AddAux(&p.From, v)
768 case ssa.OpPPC64MOVDconst:
769 p := s.Prog(v.Op.Asm())
770 p.From.Type = obj.TYPE_CONST
771 p.From.Offset = v.AuxInt
772 p.To.Type = obj.TYPE_REG
775 case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst:
776 p := s.Prog(v.Op.Asm())
777 p.From.Type = obj.TYPE_FCONST
778 p.From.Val = math.Float64frombits(uint64(v.AuxInt))
779 p.To.Type = obj.TYPE_REG
782 case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU:
783 p := s.Prog(v.Op.Asm())
784 p.From.Type = obj.TYPE_REG
785 p.From.Reg = v.Args[0].Reg()
786 p.To.Type = obj.TYPE_REG
787 p.To.Reg = v.Args[1].Reg()
789 case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst:
790 p := s.Prog(v.Op.Asm())
791 p.From.Type = obj.TYPE_REG
792 p.From.Reg = v.Args[0].Reg()
793 p.To.Type = obj.TYPE_CONST
794 p.To.Offset = v.AuxInt
796 case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg:
797 // Shift in register to required size
798 p := s.Prog(v.Op.Asm())
799 p.From.Type = obj.TYPE_REG
800 p.From.Reg = v.Args[0].Reg()
802 p.To.Type = obj.TYPE_REG
804 case ssa.OpPPC64MOVDload, ssa.OpPPC64MOVWload:
806 // MOVDload and MOVWload are DS form instructions that are restricted to
807 // offsets that are a multiple of 4. If the offset is not a multple of 4,
808 // then the address of the symbol to be loaded is computed (base + offset)
809 // and used as the new base register and the offset field in the instruction
810 // can be set to zero.
812 // This same problem can happen with gostrings since the final offset is not
813 // known yet, but could be unaligned after the relocation is resolved.
814 // So gostrings are handled the same way.
816 // This allows the MOVDload and MOVWload to be generated in more cases and
817 // eliminates some offset and alignment checking in the rules file.
819 fromAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
820 ssagen.AddAux(&fromAddr, v)
824 switch fromAddr.Name {
825 case obj.NAME_EXTERN, obj.NAME_STATIC:
826 // Special case for a rule combines the bytes of gostring.
827 // The v alignment might seem OK, but we don't want to load it
828 // using an offset because relocation comes later.
829 genAddr = strings.HasPrefix(fromAddr.Sym.Name, "go.string") || v.Type.Alignment()%4 != 0 || fromAddr.Offset%4 != 0
831 genAddr = fromAddr.Offset%4 != 0
834 // Load full address into the temp register.
835 p := s.Prog(ppc64.AMOVD)
836 p.From.Type = obj.TYPE_ADDR
837 p.From.Reg = v.Args[0].Reg()
838 ssagen.AddAux(&p.From, v)
839 // Load target using temp as base register
840 // and offset zero. Setting NAME_NONE
841 // prevents any extra offsets from being
843 p.To.Type = obj.TYPE_REG
844 p.To.Reg = ppc64.REGTMP
845 fromAddr.Reg = ppc64.REGTMP
846 // Clear the offset field and other
847 // information that might be used
848 // by the assembler to add to the
849 // final offset value.
851 fromAddr.Name = obj.NAME_NONE
854 p := s.Prog(v.Op.Asm())
856 p.To.Type = obj.TYPE_REG
860 case ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
861 p := s.Prog(v.Op.Asm())
862 p.From.Type = obj.TYPE_MEM
863 p.From.Reg = v.Args[0].Reg()
864 ssagen.AddAux(&p.From, v)
865 p.To.Type = obj.TYPE_REG
868 case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload:
869 p := s.Prog(v.Op.Asm())
870 p.From.Type = obj.TYPE_MEM
871 p.From.Reg = v.Args[0].Reg()
872 p.To.Type = obj.TYPE_REG
875 case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore:
876 p := s.Prog(v.Op.Asm())
877 p.To.Type = obj.TYPE_MEM
878 p.To.Reg = v.Args[0].Reg()
879 p.From.Type = obj.TYPE_REG
880 p.From.Reg = v.Args[1].Reg()
882 case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx,
883 ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx,
884 ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx:
885 p := s.Prog(v.Op.Asm())
886 p.From.Type = obj.TYPE_MEM
887 p.From.Reg = v.Args[0].Reg()
888 p.From.Index = v.Args[1].Reg()
889 p.To.Type = obj.TYPE_REG
892 case ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero:
893 p := s.Prog(v.Op.Asm())
894 p.From.Type = obj.TYPE_REG
895 p.From.Reg = ppc64.REGZERO
896 p.To.Type = obj.TYPE_MEM
897 p.To.Reg = v.Args[0].Reg()
898 ssagen.AddAux(&p.To, v)
900 case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVDstorezero:
902 // MOVDstore and MOVDstorezero become DS form instructions that are restricted
903 // to offset values that are a multple of 4. If the offset field is not a
904 // multiple of 4, then the full address of the store target is computed (base +
905 // offset) and used as the new base register and the offset in the instruction
908 // This allows the MOVDstore and MOVDstorezero to be generated in more cases,
909 // and prevents checking of the offset value and alignment in the rules.
911 toAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
912 ssagen.AddAux(&toAddr, v)
914 if toAddr.Offset%4 != 0 {
915 p := s.Prog(ppc64.AMOVD)
916 p.From.Type = obj.TYPE_ADDR
917 p.From.Reg = v.Args[0].Reg()
918 ssagen.AddAux(&p.From, v)
919 p.To.Type = obj.TYPE_REG
920 p.To.Reg = ppc64.REGTMP
921 toAddr.Reg = ppc64.REGTMP
922 // Clear the offset field and other
923 // information that might be used
924 // by the assembler to add to the
925 // final offset value.
927 toAddr.Name = obj.NAME_NONE
930 p := s.Prog(v.Op.Asm())
932 p.From.Type = obj.TYPE_REG
933 if v.Op == ssa.OpPPC64MOVDstorezero {
934 p.From.Reg = ppc64.REGZERO
936 p.From.Reg = v.Args[1].Reg()
939 case ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore:
940 p := s.Prog(v.Op.Asm())
941 p.From.Type = obj.TYPE_REG
942 p.From.Reg = v.Args[1].Reg()
943 p.To.Type = obj.TYPE_MEM
944 p.To.Reg = v.Args[0].Reg()
945 ssagen.AddAux(&p.To, v)
947 case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx,
948 ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx,
949 ssa.OpPPC64MOVHBRstoreidx:
950 p := s.Prog(v.Op.Asm())
951 p.From.Type = obj.TYPE_REG
952 p.From.Reg = v.Args[2].Reg()
953 p.To.Index = v.Args[1].Reg()
954 p.To.Type = obj.TYPE_MEM
955 p.To.Reg = v.Args[0].Reg()
957 case ssa.OpPPC64ISEL, ssa.OpPPC64ISELB:
959 // AuxInt value indicates condition: 0=LT 1=GT 2=EQ 4=GE 5=LE 6=NE
960 // ISEL only accepts 0, 1, 2 condition values but the others can be
961 // achieved by swapping operand order.
962 // arg0 ? arg1 : arg2 with conditions LT, GT, EQ
963 // arg0 ? arg2 : arg1 for conditions GE, LE, NE
964 // ISELB is used when a boolean result is needed, returning 0 or 1
965 p := s.Prog(ppc64.AISEL)
966 p.To.Type = obj.TYPE_REG
968 // For ISELB, boolean result 0 or 1. Use R0 for 0 operand to avoid load.
969 r := obj.Addr{Type: obj.TYPE_REG, Reg: ppc64.REG_R0}
970 if v.Op == ssa.OpPPC64ISEL {
971 r.Reg = v.Args[1].Reg()
973 // AuxInt values 4,5,6 implemented with reverse operand order from 0,1,2
976 p.SetFrom3Reg(v.Args[0].Reg())
978 p.Reg = v.Args[0].Reg()
981 p.From.Type = obj.TYPE_CONST
982 p.From.Offset = v.AuxInt & 3
984 case ssa.OpPPC64LoweredQuadZero, ssa.OpPPC64LoweredQuadZeroShort:
985 // The LoweredQuad code generation
986 // generates STXV instructions on
987 // power9. The Short variation is used
988 // if no loop is generated.
990 // sizes >= 64 generate a loop as follows:
992 // Set up loop counter in CTR, used by BC
993 // XXLXOR clears VS32
994 // XXLXOR VS32,VS32,VS32
995 // MOVD len/64,REG_TMP
1000 // STXV VS32,32(R20)
1001 // STXV VS32,48(R20)
1005 // Bytes per iteration
1006 ctr := v.AuxInt / 64
1009 rem := v.AuxInt % 64
1011 // Only generate a loop if there is more
1012 // than 1 iteration.
1014 // Set up VS32 (V0) to hold 0s
1015 p := s.Prog(ppc64.AXXLXOR)
1016 p.From.Type = obj.TYPE_REG
1017 p.From.Reg = ppc64.REG_VS32
1018 p.To.Type = obj.TYPE_REG
1019 p.To.Reg = ppc64.REG_VS32
1020 p.Reg = ppc64.REG_VS32
1022 // Set up CTR loop counter
1023 p = s.Prog(ppc64.AMOVD)
1024 p.From.Type = obj.TYPE_CONST
1026 p.To.Type = obj.TYPE_REG
1027 p.To.Reg = ppc64.REGTMP
1029 p = s.Prog(ppc64.AMOVD)
1030 p.From.Type = obj.TYPE_REG
1031 p.From.Reg = ppc64.REGTMP
1032 p.To.Type = obj.TYPE_REG
1033 p.To.Reg = ppc64.REG_CTR
1035 // Don't generate padding for
1036 // loops with few iterations.
1038 p = s.Prog(obj.APCALIGN)
1039 p.From.Type = obj.TYPE_CONST
1043 // generate 4 STXVs to zero 64 bytes
1046 p = s.Prog(ppc64.ASTXV)
1047 p.From.Type = obj.TYPE_REG
1048 p.From.Reg = ppc64.REG_VS32
1049 p.To.Type = obj.TYPE_MEM
1050 p.To.Reg = v.Args[0].Reg()
1052 // Save the top of loop
1056 p = s.Prog(ppc64.ASTXV)
1057 p.From.Type = obj.TYPE_REG
1058 p.From.Reg = ppc64.REG_VS32
1059 p.To.Type = obj.TYPE_MEM
1060 p.To.Reg = v.Args[0].Reg()
1063 p = s.Prog(ppc64.ASTXV)
1064 p.From.Type = obj.TYPE_REG
1065 p.From.Reg = ppc64.REG_VS32
1066 p.To.Type = obj.TYPE_MEM
1067 p.To.Reg = v.Args[0].Reg()
1070 p = s.Prog(ppc64.ASTXV)
1071 p.From.Type = obj.TYPE_REG
1072 p.From.Reg = ppc64.REG_VS32
1073 p.To.Type = obj.TYPE_MEM
1074 p.To.Reg = v.Args[0].Reg()
1077 // Increment address for the
1078 // 64 bytes just zeroed.
1079 p = s.Prog(ppc64.AADD)
1080 p.Reg = v.Args[0].Reg()
1081 p.From.Type = obj.TYPE_CONST
1083 p.To.Type = obj.TYPE_REG
1084 p.To.Reg = v.Args[0].Reg()
1086 // Branch back to top of loop
1088 // BC with BO_BCTR generates bdnz
1089 p = s.Prog(ppc64.ABC)
1090 p.From.Type = obj.TYPE_CONST
1091 p.From.Offset = ppc64.BO_BCTR
1092 p.Reg = ppc64.REG_R0
1093 p.To.Type = obj.TYPE_BRANCH
1096 // When ctr == 1 the loop was not generated but
1097 // there are at least 64 bytes to clear, so add
1098 // that to the remainder to generate the code
1099 // to clear those doublewords
1104 // Clear the remainder starting at offset zero
1107 if rem >= 16 && ctr <= 1 {
1108 // If the XXLXOR hasn't already been
1109 // generated, do it here to initialize
1111 p := s.Prog(ppc64.AXXLXOR)
1112 p.From.Type = obj.TYPE_REG
1113 p.From.Reg = ppc64.REG_VS32
1114 p.To.Type = obj.TYPE_REG
1115 p.To.Reg = ppc64.REG_VS32
1116 p.Reg = ppc64.REG_VS32
1118 // Generate STXV for 32 or 64
1121 p := s.Prog(ppc64.ASTXV)
1122 p.From.Type = obj.TYPE_REG
1123 p.From.Reg = ppc64.REG_VS32
1124 p.To.Type = obj.TYPE_MEM
1125 p.To.Reg = v.Args[0].Reg()
1126 p.To.Offset = offset
1128 p = s.Prog(ppc64.ASTXV)
1129 p.From.Type = obj.TYPE_REG
1130 p.From.Reg = ppc64.REG_VS32
1131 p.To.Type = obj.TYPE_MEM
1132 p.To.Reg = v.Args[0].Reg()
1133 p.To.Offset = offset + 16
1137 // Generate 16 bytes
1139 p := s.Prog(ppc64.ASTXV)
1140 p.From.Type = obj.TYPE_REG
1141 p.From.Reg = ppc64.REG_VS32
1142 p.To.Type = obj.TYPE_MEM
1143 p.To.Reg = v.Args[0].Reg()
1144 p.To.Offset = offset
1149 // first clear as many doublewords as possible
1150 // then clear remaining sizes as available
1152 op, size := ppc64.AMOVB, int64(1)
1155 op, size = ppc64.AMOVD, 8
1157 op, size = ppc64.AMOVW, 4
1159 op, size = ppc64.AMOVH, 2
1162 p.From.Type = obj.TYPE_REG
1163 p.From.Reg = ppc64.REG_R0
1164 p.To.Type = obj.TYPE_MEM
1165 p.To.Reg = v.Args[0].Reg()
1166 p.To.Offset = offset
1171 case ssa.OpPPC64LoweredZero, ssa.OpPPC64LoweredZeroShort:
1173 // Unaligned data doesn't hurt performance
1174 // for these instructions on power8.
1176 // For sizes >= 64 generate a loop as follows:
1178 // Set up loop counter in CTR, used by BC
1179 // XXLXOR VS32,VS32,VS32
1180 // MOVD len/32,REG_TMP
1184 // STXVD2X VS32,(R0)(R20)
1185 // STXVD2X VS32,(R31)(R20)
1189 // any remainder is done as described below
1191 // for sizes < 64 bytes, first clear as many doublewords as possible,
1192 // then handle the remainder
1197 // the remainder bytes are cleared using one or more
1198 // of the following instructions with the appropriate
1199 // offsets depending which instructions are needed
1201 // MOVW R0,n1(R20) 4 bytes
1202 // MOVH R0,n2(R20) 2 bytes
1203 // MOVB R0,n3(R20) 1 byte
1205 // 7 bytes: MOVW, MOVH, MOVB
1206 // 6 bytes: MOVW, MOVH
1207 // 5 bytes: MOVW, MOVB
1208 // 3 bytes: MOVH, MOVB
1210 // each loop iteration does 32 bytes
1211 ctr := v.AuxInt / 32
1214 rem := v.AuxInt % 32
1216 // only generate a loop if there is more
1217 // than 1 iteration.
1219 // Set up VS32 (V0) to hold 0s
1220 p := s.Prog(ppc64.AXXLXOR)
1221 p.From.Type = obj.TYPE_REG
1222 p.From.Reg = ppc64.REG_VS32
1223 p.To.Type = obj.TYPE_REG
1224 p.To.Reg = ppc64.REG_VS32
1225 p.Reg = ppc64.REG_VS32
1227 // Set up CTR loop counter
1228 p = s.Prog(ppc64.AMOVD)
1229 p.From.Type = obj.TYPE_CONST
1231 p.To.Type = obj.TYPE_REG
1232 p.To.Reg = ppc64.REGTMP
1234 p = s.Prog(ppc64.AMOVD)
1235 p.From.Type = obj.TYPE_REG
1236 p.From.Reg = ppc64.REGTMP
1237 p.To.Type = obj.TYPE_REG
1238 p.To.Reg = ppc64.REG_CTR
1240 // Set up R31 to hold index value 16
1241 p = s.Prog(ppc64.AMOVD)
1242 p.From.Type = obj.TYPE_CONST
1244 p.To.Type = obj.TYPE_REG
1245 p.To.Reg = ppc64.REGTMP
1247 // Don't add padding for alignment
1248 // with few loop iterations.
1250 p = s.Prog(obj.APCALIGN)
1251 p.From.Type = obj.TYPE_CONST
1255 // generate 2 STXVD2Xs to store 16 bytes
1256 // when this is a loop then the top must be saved
1258 // This is the top of loop
1260 p = s.Prog(ppc64.ASTXVD2X)
1261 p.From.Type = obj.TYPE_REG
1262 p.From.Reg = ppc64.REG_VS32
1263 p.To.Type = obj.TYPE_MEM
1264 p.To.Reg = v.Args[0].Reg()
1265 p.To.Index = ppc64.REGZERO
1266 // Save the top of loop
1270 p = s.Prog(ppc64.ASTXVD2X)
1271 p.From.Type = obj.TYPE_REG
1272 p.From.Reg = ppc64.REG_VS32
1273 p.To.Type = obj.TYPE_MEM
1274 p.To.Reg = v.Args[0].Reg()
1275 p.To.Index = ppc64.REGTMP
1277 // Increment address for the
1278 // 4 doublewords just zeroed.
1279 p = s.Prog(ppc64.AADD)
1280 p.Reg = v.Args[0].Reg()
1281 p.From.Type = obj.TYPE_CONST
1283 p.To.Type = obj.TYPE_REG
1284 p.To.Reg = v.Args[0].Reg()
1286 // Branch back to top of loop
1288 // BC with BO_BCTR generates bdnz
1289 p = s.Prog(ppc64.ABC)
1290 p.From.Type = obj.TYPE_CONST
1291 p.From.Offset = ppc64.BO_BCTR
1292 p.Reg = ppc64.REG_R0
1293 p.To.Type = obj.TYPE_BRANCH
1297 // when ctr == 1 the loop was not generated but
1298 // there are at least 32 bytes to clear, so add
1299 // that to the remainder to generate the code
1300 // to clear those doublewords
1305 // clear the remainder starting at offset zero
1308 // first clear as many doublewords as possible
1309 // then clear remaining sizes as available
1311 op, size := ppc64.AMOVB, int64(1)
1314 op, size = ppc64.AMOVD, 8
1316 op, size = ppc64.AMOVW, 4
1318 op, size = ppc64.AMOVH, 2
1321 p.From.Type = obj.TYPE_REG
1322 p.From.Reg = ppc64.REG_R0
1323 p.To.Type = obj.TYPE_MEM
1324 p.To.Reg = v.Args[0].Reg()
1325 p.To.Offset = offset
1330 case ssa.OpPPC64LoweredMove, ssa.OpPPC64LoweredMoveShort:
1332 bytesPerLoop := int64(32)
1333 // This will be used when moving more
1334 // than 8 bytes. Moves start with
1335 // as many 8 byte moves as possible, then
1336 // 4, 2, or 1 byte(s) as remaining. This will
1337 // work and be efficient for power8 or later.
1338 // If there are 64 or more bytes, then a
1339 // loop is generated to move 32 bytes and
1340 // update the src and dst addresses on each
1341 // iteration. When < 64 bytes, the appropriate
1342 // number of moves are generated based on the
1344 // When moving >= 64 bytes a loop is used
1345 // MOVD len/32,REG_TMP
1349 // LXVD2X (R0)(R21),VS32
1350 // LXVD2X (R31)(R21),VS33
1352 // STXVD2X VS32,(R0)(R20)
1353 // STXVD2X VS33,(R31)(R20)
1356 // Bytes not moved by this loop are moved
1357 // with a combination of the following instructions,
1358 // starting with the largest sizes and generating as
1359 // many as needed, using the appropriate offset value.
1369 // Each loop iteration moves 32 bytes
1370 ctr := v.AuxInt / bytesPerLoop
1372 // Remainder after the loop
1373 rem := v.AuxInt % bytesPerLoop
1375 dstReg := v.Args[0].Reg()
1376 srcReg := v.Args[1].Reg()
1378 // The set of registers used here, must match the clobbered reg list
1384 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1387 p := s.Prog(ppc64.AMOVD)
1388 p.From.Type = obj.TYPE_CONST
1390 p.To.Type = obj.TYPE_REG
1391 p.To.Reg = ppc64.REGTMP
1393 p = s.Prog(ppc64.AMOVD)
1394 p.From.Type = obj.TYPE_REG
1395 p.From.Reg = ppc64.REGTMP
1396 p.To.Type = obj.TYPE_REG
1397 p.To.Reg = ppc64.REG_CTR
1399 // Use REGTMP as index reg
1400 p = s.Prog(ppc64.AMOVD)
1401 p.From.Type = obj.TYPE_CONST
1403 p.To.Type = obj.TYPE_REG
1404 p.To.Reg = ppc64.REGTMP
1406 // Don't adding padding for
1407 // alignment with small iteration
1410 p = s.Prog(obj.APCALIGN)
1411 p.From.Type = obj.TYPE_CONST
1415 // Generate 16 byte loads and stores.
1416 // Use temp register for index (16)
1417 // on the second one.
1419 p = s.Prog(ppc64.ALXVD2X)
1420 p.From.Type = obj.TYPE_MEM
1422 p.From.Index = ppc64.REGZERO
1423 p.To.Type = obj.TYPE_REG
1424 p.To.Reg = ppc64.REG_VS32
1428 p = s.Prog(ppc64.ALXVD2X)
1429 p.From.Type = obj.TYPE_MEM
1431 p.From.Index = ppc64.REGTMP
1432 p.To.Type = obj.TYPE_REG
1433 p.To.Reg = ppc64.REG_VS33
1435 // increment the src reg for next iteration
1436 p = s.Prog(ppc64.AADD)
1438 p.From.Type = obj.TYPE_CONST
1439 p.From.Offset = bytesPerLoop
1440 p.To.Type = obj.TYPE_REG
1443 // generate 16 byte stores
1444 p = s.Prog(ppc64.ASTXVD2X)
1445 p.From.Type = obj.TYPE_REG
1446 p.From.Reg = ppc64.REG_VS32
1447 p.To.Type = obj.TYPE_MEM
1449 p.To.Index = ppc64.REGZERO
1451 p = s.Prog(ppc64.ASTXVD2X)
1452 p.From.Type = obj.TYPE_REG
1453 p.From.Reg = ppc64.REG_VS33
1454 p.To.Type = obj.TYPE_MEM
1456 p.To.Index = ppc64.REGTMP
1458 // increment the dst reg for next iteration
1459 p = s.Prog(ppc64.AADD)
1461 p.From.Type = obj.TYPE_CONST
1462 p.From.Offset = bytesPerLoop
1463 p.To.Type = obj.TYPE_REG
1466 // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1468 p = s.Prog(ppc64.ABC)
1469 p.From.Type = obj.TYPE_CONST
1470 p.From.Offset = ppc64.BO_BCTR
1471 p.Reg = ppc64.REG_R0
1472 p.To.Type = obj.TYPE_BRANCH
1475 // srcReg and dstReg were incremented in the loop, so
1476 // later instructions start with offset 0.
1480 // No loop was generated for one iteration, so
1481 // add 32 bytes to the remainder to move those bytes.
1487 // Generate 16 byte loads and stores.
1488 // Use temp register for index (value 16)
1489 // on the second one.
1490 p := s.Prog(ppc64.ALXVD2X)
1491 p.From.Type = obj.TYPE_MEM
1493 p.From.Index = ppc64.REGZERO
1494 p.To.Type = obj.TYPE_REG
1495 p.To.Reg = ppc64.REG_VS32
1497 p = s.Prog(ppc64.ASTXVD2X)
1498 p.From.Type = obj.TYPE_REG
1499 p.From.Reg = ppc64.REG_VS32
1500 p.To.Type = obj.TYPE_MEM
1502 p.To.Index = ppc64.REGZERO
1508 // Use REGTMP as index reg
1509 p := s.Prog(ppc64.AMOVD)
1510 p.From.Type = obj.TYPE_CONST
1512 p.To.Type = obj.TYPE_REG
1513 p.To.Reg = ppc64.REGTMP
1515 p = s.Prog(ppc64.ALXVD2X)
1516 p.From.Type = obj.TYPE_MEM
1518 p.From.Index = ppc64.REGTMP
1519 p.To.Type = obj.TYPE_REG
1520 p.To.Reg = ppc64.REG_VS32
1522 p = s.Prog(ppc64.ASTXVD2X)
1523 p.From.Type = obj.TYPE_REG
1524 p.From.Reg = ppc64.REG_VS32
1525 p.To.Type = obj.TYPE_MEM
1527 p.To.Index = ppc64.REGTMP
1534 // Generate all the remaining load and store pairs, starting with
1535 // as many 8 byte moves as possible, then 4, 2, 1.
1537 op, size := ppc64.AMOVB, int64(1)
1540 op, size = ppc64.AMOVD, 8
1542 op, size = ppc64.AMOVWZ, 4
1544 op, size = ppc64.AMOVH, 2
1548 p.To.Type = obj.TYPE_REG
1549 p.To.Reg = ppc64.REGTMP
1550 p.From.Type = obj.TYPE_MEM
1552 p.From.Offset = offset
1556 p.From.Type = obj.TYPE_REG
1557 p.From.Reg = ppc64.REGTMP
1558 p.To.Type = obj.TYPE_MEM
1560 p.To.Offset = offset
1565 case ssa.OpPPC64LoweredQuadMove, ssa.OpPPC64LoweredQuadMoveShort:
1566 bytesPerLoop := int64(64)
1567 // This is used when moving more
1568 // than 8 bytes on power9. Moves start with
1569 // as many 8 byte moves as possible, then
1570 // 4, 2, or 1 byte(s) as remaining. This will
1571 // work and be efficient for power8 or later.
1572 // If there are 64 or more bytes, then a
1573 // loop is generated to move 32 bytes and
1574 // update the src and dst addresses on each
1575 // iteration. When < 64 bytes, the appropriate
1576 // number of moves are generated based on the
1578 // When moving >= 64 bytes a loop is used
1579 // MOVD len/32,REG_TMP
1586 // STXV VS33,16(R20)
1589 // Bytes not moved by this loop are moved
1590 // with a combination of the following instructions,
1591 // starting with the largest sizes and generating as
1592 // many as needed, using the appropriate offset value.
1602 // Each loop iteration moves 32 bytes
1603 ctr := v.AuxInt / bytesPerLoop
1605 // Remainder after the loop
1606 rem := v.AuxInt % bytesPerLoop
1608 dstReg := v.Args[0].Reg()
1609 srcReg := v.Args[1].Reg()
1616 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1619 p := s.Prog(ppc64.AMOVD)
1620 p.From.Type = obj.TYPE_CONST
1622 p.To.Type = obj.TYPE_REG
1623 p.To.Reg = ppc64.REGTMP
1625 p = s.Prog(ppc64.AMOVD)
1626 p.From.Type = obj.TYPE_REG
1627 p.From.Reg = ppc64.REGTMP
1628 p.To.Type = obj.TYPE_REG
1629 p.To.Reg = ppc64.REG_CTR
1631 p = s.Prog(obj.APCALIGN)
1632 p.From.Type = obj.TYPE_CONST
1635 // Generate 16 byte loads and stores.
1636 p = s.Prog(ppc64.ALXV)
1637 p.From.Type = obj.TYPE_MEM
1639 p.From.Offset = offset
1640 p.To.Type = obj.TYPE_REG
1641 p.To.Reg = ppc64.REG_VS32
1645 p = s.Prog(ppc64.ALXV)
1646 p.From.Type = obj.TYPE_MEM
1648 p.From.Offset = offset + 16
1649 p.To.Type = obj.TYPE_REG
1650 p.To.Reg = ppc64.REG_VS33
1652 // generate 16 byte stores
1653 p = s.Prog(ppc64.ASTXV)
1654 p.From.Type = obj.TYPE_REG
1655 p.From.Reg = ppc64.REG_VS32
1656 p.To.Type = obj.TYPE_MEM
1658 p.To.Offset = offset
1660 p = s.Prog(ppc64.ASTXV)
1661 p.From.Type = obj.TYPE_REG
1662 p.From.Reg = ppc64.REG_VS33
1663 p.To.Type = obj.TYPE_MEM
1665 p.To.Offset = offset + 16
1667 // Generate 16 byte loads and stores.
1668 p = s.Prog(ppc64.ALXV)
1669 p.From.Type = obj.TYPE_MEM
1671 p.From.Offset = offset + 32
1672 p.To.Type = obj.TYPE_REG
1673 p.To.Reg = ppc64.REG_VS32
1675 p = s.Prog(ppc64.ALXV)
1676 p.From.Type = obj.TYPE_MEM
1678 p.From.Offset = offset + 48
1679 p.To.Type = obj.TYPE_REG
1680 p.To.Reg = ppc64.REG_VS33
1682 // generate 16 byte stores
1683 p = s.Prog(ppc64.ASTXV)
1684 p.From.Type = obj.TYPE_REG
1685 p.From.Reg = ppc64.REG_VS32
1686 p.To.Type = obj.TYPE_MEM
1688 p.To.Offset = offset + 32
1690 p = s.Prog(ppc64.ASTXV)
1691 p.From.Type = obj.TYPE_REG
1692 p.From.Reg = ppc64.REG_VS33
1693 p.To.Type = obj.TYPE_MEM
1695 p.To.Offset = offset + 48
1697 // increment the src reg for next iteration
1698 p = s.Prog(ppc64.AADD)
1700 p.From.Type = obj.TYPE_CONST
1701 p.From.Offset = bytesPerLoop
1702 p.To.Type = obj.TYPE_REG
1705 // increment the dst reg for next iteration
1706 p = s.Prog(ppc64.AADD)
1708 p.From.Type = obj.TYPE_CONST
1709 p.From.Offset = bytesPerLoop
1710 p.To.Type = obj.TYPE_REG
1713 // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1715 p = s.Prog(ppc64.ABC)
1716 p.From.Type = obj.TYPE_CONST
1717 p.From.Offset = ppc64.BO_BCTR
1718 p.Reg = ppc64.REG_R0
1719 p.To.Type = obj.TYPE_BRANCH
1722 // srcReg and dstReg were incremented in the loop, so
1723 // later instructions start with offset 0.
1727 // No loop was generated for one iteration, so
1728 // add 32 bytes to the remainder to move those bytes.
1733 p := s.Prog(ppc64.ALXV)
1734 p.From.Type = obj.TYPE_MEM
1736 p.To.Type = obj.TYPE_REG
1737 p.To.Reg = ppc64.REG_VS32
1739 p = s.Prog(ppc64.ALXV)
1740 p.From.Type = obj.TYPE_MEM
1743 p.To.Type = obj.TYPE_REG
1744 p.To.Reg = ppc64.REG_VS33
1746 p = s.Prog(ppc64.ASTXV)
1747 p.From.Type = obj.TYPE_REG
1748 p.From.Reg = ppc64.REG_VS32
1749 p.To.Type = obj.TYPE_MEM
1752 p = s.Prog(ppc64.ASTXV)
1753 p.From.Type = obj.TYPE_REG
1754 p.From.Reg = ppc64.REG_VS33
1755 p.To.Type = obj.TYPE_MEM
1764 // Generate 16 byte loads and stores.
1765 p := s.Prog(ppc64.ALXV)
1766 p.From.Type = obj.TYPE_MEM
1768 p.From.Offset = offset
1769 p.To.Type = obj.TYPE_REG
1770 p.To.Reg = ppc64.REG_VS32
1772 p = s.Prog(ppc64.ASTXV)
1773 p.From.Type = obj.TYPE_REG
1774 p.From.Reg = ppc64.REG_VS32
1775 p.To.Type = obj.TYPE_MEM
1777 p.To.Offset = offset
1783 p := s.Prog(ppc64.ALXV)
1784 p.From.Type = obj.TYPE_MEM
1786 p.From.Offset = offset
1787 p.To.Type = obj.TYPE_REG
1788 p.To.Reg = ppc64.REG_VS32
1790 p = s.Prog(ppc64.ASTXV)
1791 p.From.Type = obj.TYPE_REG
1792 p.From.Reg = ppc64.REG_VS32
1793 p.To.Type = obj.TYPE_MEM
1795 p.To.Offset = offset
1801 // Generate all the remaining load and store pairs, starting with
1802 // as many 8 byte moves as possible, then 4, 2, 1.
1804 op, size := ppc64.AMOVB, int64(1)
1807 op, size = ppc64.AMOVD, 8
1809 op, size = ppc64.AMOVWZ, 4
1811 op, size = ppc64.AMOVH, 2
1815 p.To.Type = obj.TYPE_REG
1816 p.To.Reg = ppc64.REGTMP
1817 p.From.Type = obj.TYPE_MEM
1819 p.From.Offset = offset
1823 p.From.Type = obj.TYPE_REG
1824 p.From.Reg = ppc64.REGTMP
1825 p.To.Type = obj.TYPE_MEM
1827 p.To.Offset = offset
1832 case ssa.OpPPC64CALLstatic:
1835 case ssa.OpPPC64CALLtail:
1838 case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter:
1839 p := s.Prog(ppc64.AMOVD)
1840 p.From.Type = obj.TYPE_REG
1841 p.From.Reg = v.Args[0].Reg()
1842 p.To.Type = obj.TYPE_REG
1843 p.To.Reg = ppc64.REG_LR
1845 if v.Args[0].Reg() != ppc64.REG_R12 {
1846 v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg)
1850 pp.To.Reg = ppc64.REG_LR
1852 // Insert a hint this is not a subroutine return.
1855 if base.Ctxt.Flag_shared {
1856 // When compiling Go into PIC, the function we just
1857 // called via pointer might have been implemented in
1858 // a separate module and so overwritten the TOC
1859 // pointer in R2; reload it.
1860 q := s.Prog(ppc64.AMOVD)
1861 q.From.Type = obj.TYPE_MEM
1863 q.From.Reg = ppc64.REGSP
1864 q.To.Type = obj.TYPE_REG
1865 q.To.Reg = ppc64.REG_R2
1868 case ssa.OpPPC64LoweredWB:
1869 p := s.Prog(obj.ACALL)
1870 p.To.Type = obj.TYPE_MEM
1871 p.To.Name = obj.NAME_EXTERN
1872 p.To.Sym = v.Aux.(*obj.LSym)
1874 case ssa.OpPPC64LoweredPanicBoundsA, ssa.OpPPC64LoweredPanicBoundsB, ssa.OpPPC64LoweredPanicBoundsC:
1875 p := s.Prog(obj.ACALL)
1876 p.To.Type = obj.TYPE_MEM
1877 p.To.Name = obj.NAME_EXTERN
1878 p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt]
1879 s.UseArgs(16) // space used in callee args area by assembly stubs
1881 case ssa.OpPPC64LoweredNilCheck:
1882 if buildcfg.GOOS == "aix" {
1886 // NOP (so the BNE has somewhere to land)
1889 p := s.Prog(ppc64.ACMP)
1890 p.From.Type = obj.TYPE_REG
1891 p.From.Reg = v.Args[0].Reg()
1892 p.To.Type = obj.TYPE_REG
1893 p.To.Reg = ppc64.REG_R0
1896 p2 := s.Prog(ppc64.ABNE)
1897 p2.To.Type = obj.TYPE_BRANCH
1900 // Write at 0 is forbidden and will trigger a SIGSEGV
1901 p = s.Prog(ppc64.AMOVW)
1902 p.From.Type = obj.TYPE_REG
1903 p.From.Reg = ppc64.REG_R0
1904 p.To.Type = obj.TYPE_MEM
1905 p.To.Reg = ppc64.REG_R0
1907 // NOP (so the BNE has somewhere to land)
1908 nop := s.Prog(obj.ANOP)
1909 p2.To.SetTarget(nop)
1912 // Issue a load which will fault if arg is nil.
1913 p := s.Prog(ppc64.AMOVBZ)
1914 p.From.Type = obj.TYPE_MEM
1915 p.From.Reg = v.Args[0].Reg()
1916 ssagen.AddAux(&p.From, v)
1917 p.To.Type = obj.TYPE_REG
1918 p.To.Reg = ppc64.REGTMP
1920 if logopt.Enabled() {
1921 logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
1923 if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
1924 base.WarnfAt(v.Pos, "generated nil check")
1927 // These should be resolved by rules and not make it here.
1928 case ssa.OpPPC64Equal, ssa.OpPPC64NotEqual, ssa.OpPPC64LessThan, ssa.OpPPC64FLessThan,
1929 ssa.OpPPC64LessEqual, ssa.OpPPC64GreaterThan, ssa.OpPPC64FGreaterThan, ssa.OpPPC64GreaterEqual,
1930 ssa.OpPPC64FLessEqual, ssa.OpPPC64FGreaterEqual:
1931 v.Fatalf("Pseudo-op should not make it to codegen: %s ###\n", v.LongString())
1932 case ssa.OpPPC64InvertFlags:
1933 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
1934 case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT:
1935 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
1936 case ssa.OpClobber, ssa.OpClobberReg:
1937 // TODO: implement for clobberdead experiment. Nop is ok for now.
1939 v.Fatalf("genValue not implemented: %s", v.LongString())
1943 var blockJump = [...]struct {
1945 asmeq, invasmun bool
1947 ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false},
1948 ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false},
1950 ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false},
1951 ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false},
1952 ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false},
1953 ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false},
1955 // TODO: need to work FP comparisons into block jumps
1956 ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false},
1957 ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN
1958 ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN
1959 ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false},
1962 func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
1964 case ssa.BlockDefer:
1965 // defer returns in R3:
1966 // 0 if we should continue executing
1967 // 1 if we should jump to deferreturn call
1968 p := s.Prog(ppc64.ACMP)
1969 p.From.Type = obj.TYPE_REG
1970 p.From.Reg = ppc64.REG_R3
1971 p.To.Type = obj.TYPE_REG
1972 p.To.Reg = ppc64.REG_R0
1974 p = s.Prog(ppc64.ABNE)
1975 p.To.Type = obj.TYPE_BRANCH
1976 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[1].Block()})
1977 if b.Succs[0].Block() != next {
1978 p := s.Prog(obj.AJMP)
1979 p.To.Type = obj.TYPE_BRANCH
1980 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
1983 case ssa.BlockPlain:
1984 if b.Succs[0].Block() != next {
1985 p := s.Prog(obj.AJMP)
1986 p.To.Type = obj.TYPE_BRANCH
1987 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
1989 case ssa.BlockExit, ssa.BlockRetJmp:
1993 case ssa.BlockPPC64EQ, ssa.BlockPPC64NE,
1994 ssa.BlockPPC64LT, ssa.BlockPPC64GE,
1995 ssa.BlockPPC64LE, ssa.BlockPPC64GT,
1996 ssa.BlockPPC64FLT, ssa.BlockPPC64FGE,
1997 ssa.BlockPPC64FLE, ssa.BlockPPC64FGT:
1998 jmp := blockJump[b.Kind]
2000 case b.Succs[0].Block():
2001 s.Br(jmp.invasm, b.Succs[1].Block())
2003 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
2004 s.Br(ppc64.ABVS, b.Succs[1].Block())
2006 case b.Succs[1].Block():
2007 s.Br(jmp.asm, b.Succs[0].Block())
2009 s.Br(ppc64.ABEQ, b.Succs[0].Block())
2012 if b.Likely != ssa.BranchUnlikely {
2013 s.Br(jmp.asm, b.Succs[0].Block())
2015 s.Br(ppc64.ABEQ, b.Succs[0].Block())
2017 s.Br(obj.AJMP, b.Succs[1].Block())
2019 s.Br(jmp.invasm, b.Succs[1].Block())
2021 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
2022 s.Br(ppc64.ABVS, b.Succs[1].Block())
2024 s.Br(obj.AJMP, b.Succs[0].Block())
2028 b.Fatalf("branch not implemented: %s", b.LongString())