1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
8 "cmd/compile/internal/base"
9 "cmd/compile/internal/ir"
10 "cmd/compile/internal/logopt"
11 "cmd/compile/internal/ssa"
12 "cmd/compile/internal/ssagen"
13 "cmd/compile/internal/types"
15 "cmd/internal/obj/ppc64"
21 // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
22 func ssaMarkMoves(s *ssagen.State, b *ssa.Block) {
23 // flive := b.FlagsLiveAtEnd
24 // if b.Control != nil && b.Control.Type.IsFlags() {
27 // for i := len(b.Values) - 1; i >= 0; i-- {
29 // if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) {
30 // // The "mark" is any non-nil Aux value.
33 // if v.Type.IsFlags() {
36 // for _, a := range v.Args {
37 // if a.Type.IsFlags() {
44 // loadByType returns the load instruction of the given type.
45 func loadByType(t *types.Type) obj.As {
77 panic("bad load type")
80 // storeByType returns the store instruction of the given type.
81 func storeByType(t *types.Type) obj.As {
101 panic("bad store type")
104 func ssaGenValue(s *ssagen.State, v *ssa.Value) {
127 case ssa.OpPPC64LoweredMuluhilo:
128 // MULHDU Rarg1, Rarg0, Reg0
129 // MULLD Rarg1, Rarg0, Reg1
130 r0 := v.Args[0].Reg()
131 r1 := v.Args[1].Reg()
132 p := s.Prog(ppc64.AMULHDU)
133 p.From.Type = obj.TYPE_REG
136 p.To.Type = obj.TYPE_REG
138 p1 := s.Prog(ppc64.AMULLD)
139 p1.From.Type = obj.TYPE_REG
142 p1.To.Type = obj.TYPE_REG
145 case ssa.OpPPC64LoweredAdd64Carry:
146 // ADDC Rarg2, -1, Rtmp
147 // ADDE Rarg1, Rarg0, Reg0
149 r0 := v.Args[0].Reg()
150 r1 := v.Args[1].Reg()
151 r2 := v.Args[2].Reg()
152 p := s.Prog(ppc64.AADDC)
153 p.From.Type = obj.TYPE_CONST
156 p.To.Type = obj.TYPE_REG
157 p.To.Reg = ppc64.REGTMP
158 p1 := s.Prog(ppc64.AADDE)
159 p1.From.Type = obj.TYPE_REG
162 p1.To.Type = obj.TYPE_REG
164 p2 := s.Prog(ppc64.AADDZE)
165 p2.From.Type = obj.TYPE_REG
166 p2.From.Reg = ppc64.REGZERO
167 p2.To.Type = obj.TYPE_REG
170 case ssa.OpPPC64LoweredAtomicAnd8,
171 ssa.OpPPC64LoweredAtomicAnd32,
172 ssa.OpPPC64LoweredAtomicOr8,
173 ssa.OpPPC64LoweredAtomicOr32:
175 // LBAR/LWAR (Rarg0), Rtmp
176 // AND/OR Rarg1, Rtmp
177 // STBCCC/STWCCC Rtmp, (Rarg0)
181 if v.Op == ssa.OpPPC64LoweredAtomicAnd32 || v.Op == ssa.OpPPC64LoweredAtomicOr32 {
185 r0 := v.Args[0].Reg()
186 r1 := v.Args[1].Reg()
187 // LWSYNC - Assuming shared data not write-through-required nor
188 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
189 plwsync := s.Prog(ppc64.ALWSYNC)
190 plwsync.To.Type = obj.TYPE_NONE
193 p.From.Type = obj.TYPE_MEM
195 p.To.Type = obj.TYPE_REG
196 p.To.Reg = ppc64.REGTMP
198 p1 := s.Prog(v.Op.Asm())
199 p1.From.Type = obj.TYPE_REG
201 p1.To.Type = obj.TYPE_REG
202 p1.To.Reg = ppc64.REGTMP
205 p2.From.Type = obj.TYPE_REG
206 p2.From.Reg = ppc64.REGTMP
207 p2.To.Type = obj.TYPE_MEM
209 p2.RegTo2 = ppc64.REGTMP
211 p3 := s.Prog(ppc64.ABNE)
212 p3.To.Type = obj.TYPE_BRANCH
215 case ssa.OpPPC64LoweredAtomicAdd32,
216 ssa.OpPPC64LoweredAtomicAdd64:
218 // LDAR/LWAR (Rarg0), Rout
220 // STDCCC/STWCCC Rout, (Rarg0)
222 // MOVW Rout,Rout (if Add32)
225 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
229 r0 := v.Args[0].Reg()
230 r1 := v.Args[1].Reg()
232 // LWSYNC - Assuming shared data not write-through-required nor
233 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
234 plwsync := s.Prog(ppc64.ALWSYNC)
235 plwsync.To.Type = obj.TYPE_NONE
238 p.From.Type = obj.TYPE_MEM
240 p.To.Type = obj.TYPE_REG
243 p1 := s.Prog(ppc64.AADD)
244 p1.From.Type = obj.TYPE_REG
247 p1.To.Type = obj.TYPE_REG
250 p3.From.Type = obj.TYPE_REG
252 p3.To.Type = obj.TYPE_MEM
255 p4 := s.Prog(ppc64.ABNE)
256 p4.To.Type = obj.TYPE_BRANCH
259 // Ensure a 32 bit result
260 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
261 p5 := s.Prog(ppc64.AMOVWZ)
262 p5.To.Type = obj.TYPE_REG
264 p5.From.Type = obj.TYPE_REG
268 case ssa.OpPPC64LoweredAtomicExchange32,
269 ssa.OpPPC64LoweredAtomicExchange64:
271 // LDAR/LWAR (Rarg0), Rout
272 // STDCCC/STWCCC Rout, (Rarg0)
277 if v.Op == ssa.OpPPC64LoweredAtomicExchange32 {
281 r0 := v.Args[0].Reg()
282 r1 := v.Args[1].Reg()
284 // LWSYNC - Assuming shared data not write-through-required nor
285 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
286 plwsync := s.Prog(ppc64.ALWSYNC)
287 plwsync.To.Type = obj.TYPE_NONE
290 p.From.Type = obj.TYPE_MEM
292 p.To.Type = obj.TYPE_REG
296 p1.From.Type = obj.TYPE_REG
298 p1.To.Type = obj.TYPE_MEM
301 p2 := s.Prog(ppc64.ABNE)
302 p2.To.Type = obj.TYPE_BRANCH
305 pisync := s.Prog(ppc64.AISYNC)
306 pisync.To.Type = obj.TYPE_NONE
308 case ssa.OpPPC64LoweredAtomicLoad8,
309 ssa.OpPPC64LoweredAtomicLoad32,
310 ssa.OpPPC64LoweredAtomicLoad64,
311 ssa.OpPPC64LoweredAtomicLoadPtr:
313 // MOVB/MOVD/MOVW (Rarg0), Rout
320 case ssa.OpPPC64LoweredAtomicLoad8:
322 case ssa.OpPPC64LoweredAtomicLoad32:
326 arg0 := v.Args[0].Reg()
328 // SYNC when AuxInt == 1; otherwise, load-acquire
330 psync := s.Prog(ppc64.ASYNC)
331 psync.To.Type = obj.TYPE_NONE
335 p.From.Type = obj.TYPE_MEM
337 p.To.Type = obj.TYPE_REG
341 p1.From.Type = obj.TYPE_REG
343 p1.To.Type = obj.TYPE_REG
346 p2 := s.Prog(ppc64.ABNE)
347 p2.To.Type = obj.TYPE_BRANCH
349 pisync := s.Prog(ppc64.AISYNC)
350 pisync.To.Type = obj.TYPE_NONE
351 p2.To.SetTarget(pisync)
353 case ssa.OpPPC64LoweredAtomicStore8,
354 ssa.OpPPC64LoweredAtomicStore32,
355 ssa.OpPPC64LoweredAtomicStore64:
357 // MOVB/MOVW/MOVD arg1,(arg0)
360 case ssa.OpPPC64LoweredAtomicStore8:
362 case ssa.OpPPC64LoweredAtomicStore32:
365 arg0 := v.Args[0].Reg()
366 arg1 := v.Args[1].Reg()
367 // If AuxInt == 0, LWSYNC (Store-Release), else SYNC
369 syncOp := ppc64.ASYNC
371 syncOp = ppc64.ALWSYNC
373 psync := s.Prog(syncOp)
374 psync.To.Type = obj.TYPE_NONE
377 p.To.Type = obj.TYPE_MEM
379 p.From.Type = obj.TYPE_REG
382 case ssa.OpPPC64LoweredAtomicCas64,
383 ssa.OpPPC64LoweredAtomicCas32:
386 // LDAR (Rarg0), MutexHint, Rtmp
389 // STDCCC Rarg2, (Rarg0)
391 // LWSYNC // Only for sequential consistency; not required in CasRel.
400 if v.Op == ssa.OpPPC64LoweredAtomicCas32 {
405 r0 := v.Args[0].Reg()
406 r1 := v.Args[1].Reg()
407 r2 := v.Args[2].Reg()
409 // LWSYNC - Assuming shared data not write-through-required nor
410 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
411 plwsync1 := s.Prog(ppc64.ALWSYNC)
412 plwsync1.To.Type = obj.TYPE_NONE
415 p.From.Type = obj.TYPE_MEM
417 p.To.Type = obj.TYPE_REG
418 p.To.Reg = ppc64.REGTMP
419 // If it is a Compare-and-Swap-Release operation, set the EH field with
426 p1.From.Type = obj.TYPE_REG
428 p1.To.Reg = ppc64.REGTMP
429 p1.To.Type = obj.TYPE_REG
431 p2 := s.Prog(ppc64.ABNE)
432 p2.To.Type = obj.TYPE_BRANCH
435 p3.From.Type = obj.TYPE_REG
437 p3.To.Type = obj.TYPE_MEM
440 p4 := s.Prog(ppc64.ABNE)
441 p4.To.Type = obj.TYPE_BRANCH
443 // LWSYNC - Assuming shared data not write-through-required nor
444 // caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b.
445 // If the operation is a CAS-Release, then synchronization is not necessary.
447 plwsync2 := s.Prog(ppc64.ALWSYNC)
448 plwsync2.To.Type = obj.TYPE_NONE
451 p5 := s.Prog(ppc64.AMOVD)
452 p5.From.Type = obj.TYPE_CONST
454 p5.To.Type = obj.TYPE_REG
457 p6 := s.Prog(obj.AJMP)
458 p6.To.Type = obj.TYPE_BRANCH
460 p7 := s.Prog(ppc64.AMOVD)
461 p7.From.Type = obj.TYPE_CONST
463 p7.To.Type = obj.TYPE_REG
467 p8 := s.Prog(obj.ANOP)
470 case ssa.OpPPC64LoweredGetClosurePtr:
471 // Closure pointer is R11 (already)
472 ssagen.CheckLoweredGetClosurePtr(v)
474 case ssa.OpPPC64LoweredGetCallerSP:
475 // caller's SP is FixedFrameSize below the address of the first arg
476 p := s.Prog(ppc64.AMOVD)
477 p.From.Type = obj.TYPE_ADDR
478 p.From.Offset = -base.Ctxt.FixedFrameSize()
479 p.From.Name = obj.NAME_PARAM
480 p.To.Type = obj.TYPE_REG
483 case ssa.OpPPC64LoweredGetCallerPC:
484 p := s.Prog(obj.AGETCALLERPC)
485 p.To.Type = obj.TYPE_REG
488 case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F:
489 // input is already rounded
492 loadOp := loadByType(v.Type)
494 ssagen.AddrAuto(&p.From, v.Args[0])
495 p.To.Type = obj.TYPE_REG
499 storeOp := storeByType(v.Type)
501 p.From.Type = obj.TYPE_REG
502 p.From.Reg = v.Args[0].Reg()
503 ssagen.AddrAuto(&p.To, v)
505 case ssa.OpPPC64DIVD:
515 r0 := v.Args[0].Reg()
516 r1 := v.Args[1].Reg()
518 p := s.Prog(ppc64.ACMP)
519 p.From.Type = obj.TYPE_REG
521 p.To.Type = obj.TYPE_CONST
524 pbahead := s.Prog(ppc64.ABEQ)
525 pbahead.To.Type = obj.TYPE_BRANCH
527 p = s.Prog(v.Op.Asm())
528 p.From.Type = obj.TYPE_REG
531 p.To.Type = obj.TYPE_REG
534 pbover := s.Prog(obj.AJMP)
535 pbover.To.Type = obj.TYPE_BRANCH
537 p = s.Prog(ppc64.ANEG)
538 p.To.Type = obj.TYPE_REG
540 p.From.Type = obj.TYPE_REG
542 pbahead.To.SetTarget(p)
545 pbover.To.SetTarget(p)
547 case ssa.OpPPC64DIVW:
548 // word-width version of above
550 r0 := v.Args[0].Reg()
551 r1 := v.Args[1].Reg()
553 p := s.Prog(ppc64.ACMPW)
554 p.From.Type = obj.TYPE_REG
556 p.To.Type = obj.TYPE_CONST
559 pbahead := s.Prog(ppc64.ABEQ)
560 pbahead.To.Type = obj.TYPE_BRANCH
562 p = s.Prog(v.Op.Asm())
563 p.From.Type = obj.TYPE_REG
566 p.To.Type = obj.TYPE_REG
569 pbover := s.Prog(obj.AJMP)
570 pbover.To.Type = obj.TYPE_BRANCH
572 p = s.Prog(ppc64.ANEG)
573 p.To.Type = obj.TYPE_REG
575 p.From.Type = obj.TYPE_REG
577 pbahead.To.SetTarget(p)
580 pbover.To.SetTarget(p)
582 case ssa.OpPPC64CLRLSLWI:
584 r1 := v.Args[0].Reg()
586 p := s.Prog(v.Op.Asm())
587 // clrlslwi ra,rs,mb,sh will become rlwinm ra,rs,sh,mb-sh,31-sh as described in ISA
588 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
589 p.SetFrom3Const(ssa.GetPPC64Shiftsh(shifts))
591 p.To.Type = obj.TYPE_REG
594 case ssa.OpPPC64CLRLSLDI:
596 r1 := v.Args[0].Reg()
598 p := s.Prog(v.Op.Asm())
599 // clrlsldi ra,rs,mb,sh will become rldic ra,rs,sh,mb-sh
600 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
601 p.SetFrom3Const(ssa.GetPPC64Shiftsh(shifts))
603 p.To.Type = obj.TYPE_REG
606 // Mask has been set as sh
607 case ssa.OpPPC64RLDICL:
609 r1 := v.Args[0].Reg()
611 p := s.Prog(v.Op.Asm())
612 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)}
613 p.SetFrom3Const(ssa.GetPPC64Shiftmb(shifts))
615 p.To.Type = obj.TYPE_REG
618 case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS,
619 ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU,
620 ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW,
621 ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
622 ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
623 ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
624 ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV,
625 ssa.OpPPC64MODUD, ssa.OpPPC64MODSD, ssa.OpPPC64MODUW, ssa.OpPPC64MODSW:
627 r1 := v.Args[0].Reg()
628 r2 := v.Args[1].Reg()
629 p := s.Prog(v.Op.Asm())
630 p.From.Type = obj.TYPE_REG
633 p.To.Type = obj.TYPE_REG
636 case ssa.OpPPC64ANDCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC:
637 r1 := v.Args[0].Reg()
638 r2 := v.Args[1].Reg()
639 p := s.Prog(v.Op.Asm())
640 p.From.Type = obj.TYPE_REG
643 p.To.Type = obj.TYPE_REG
644 p.To.Reg = ppc64.REGTMP // result is not needed
646 case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
647 p := s.Prog(v.Op.Asm())
648 p.From.Type = obj.TYPE_CONST
649 p.From.Offset = v.AuxInt
650 p.Reg = v.Args[0].Reg()
651 p.To.Type = obj.TYPE_REG
654 // Auxint holds encoded rotate + mask
655 case ssa.OpPPC64RLWINM, ssa.OpPPC64RLWMI:
656 rot, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
657 p := s.Prog(v.Op.Asm())
658 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
659 p.Reg = v.Args[0].Reg()
660 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: int64(rot)}
661 p.SetRestArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}})
664 case ssa.OpPPC64RLWNM:
665 _, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
666 p := s.Prog(v.Op.Asm())
667 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
668 p.Reg = v.Args[0].Reg()
669 p.From = obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[1].Reg()}
670 p.SetRestArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}})
672 case ssa.OpPPC64MADDLD:
674 r1 := v.Args[0].Reg()
675 r2 := v.Args[1].Reg()
676 r3 := v.Args[2].Reg()
678 p := s.Prog(v.Op.Asm())
679 p.From.Type = obj.TYPE_REG
683 p.To.Type = obj.TYPE_REG
686 case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
688 r1 := v.Args[0].Reg()
689 r2 := v.Args[1].Reg()
690 r3 := v.Args[2].Reg()
692 p := s.Prog(v.Op.Asm())
693 p.From.Type = obj.TYPE_REG
697 p.To.Type = obj.TYPE_REG
700 case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL,
701 ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW,
702 ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS,
703 ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD:
705 p := s.Prog(v.Op.Asm())
706 p.To.Type = obj.TYPE_REG
708 p.From.Type = obj.TYPE_REG
709 p.From.Reg = v.Args[0].Reg()
711 case ssa.OpPPC64ADDconst, ssa.OpPPC64ANDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst,
712 ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst,
713 ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst, ssa.OpPPC64EXTSWSLconst, ssa.OpPPC64MULLWconst, ssa.OpPPC64MULLDconst:
714 p := s.Prog(v.Op.Asm())
715 p.Reg = v.Args[0].Reg()
716 p.From.Type = obj.TYPE_CONST
717 p.From.Offset = v.AuxInt
718 p.To.Type = obj.TYPE_REG
721 case ssa.OpPPC64SUBFCconst:
722 p := s.Prog(v.Op.Asm())
723 p.SetFrom3Const(v.AuxInt)
724 p.From.Type = obj.TYPE_REG
725 p.From.Reg = v.Args[0].Reg()
726 p.To.Type = obj.TYPE_REG
729 case ssa.OpPPC64ANDCCconst:
730 p := s.Prog(v.Op.Asm())
731 p.Reg = v.Args[0].Reg()
732 p.From.Type = obj.TYPE_CONST
733 p.From.Offset = v.AuxInt
734 p.To.Type = obj.TYPE_REG
735 p.To.Reg = ppc64.REGTMP // discard result
737 case ssa.OpPPC64MOVDaddr:
738 switch v.Aux.(type) {
740 v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux)
742 // If aux offset and aux int are both 0, and the same
743 // input and output regs are used, no instruction
744 // needs to be generated, since it would just be
746 if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() {
747 p := s.Prog(ppc64.AMOVD)
748 p.From.Type = obj.TYPE_ADDR
749 p.From.Reg = v.Args[0].Reg()
750 p.From.Offset = v.AuxInt
751 p.To.Type = obj.TYPE_REG
755 case *obj.LSym, ir.Node:
756 p := s.Prog(ppc64.AMOVD)
757 p.From.Type = obj.TYPE_ADDR
758 p.From.Reg = v.Args[0].Reg()
759 p.To.Type = obj.TYPE_REG
761 ssagen.AddAux(&p.From, v)
765 case ssa.OpPPC64MOVDconst:
766 p := s.Prog(v.Op.Asm())
767 p.From.Type = obj.TYPE_CONST
768 p.From.Offset = v.AuxInt
769 p.To.Type = obj.TYPE_REG
772 case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst:
773 p := s.Prog(v.Op.Asm())
774 p.From.Type = obj.TYPE_FCONST
775 p.From.Val = math.Float64frombits(uint64(v.AuxInt))
776 p.To.Type = obj.TYPE_REG
779 case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU:
780 p := s.Prog(v.Op.Asm())
781 p.From.Type = obj.TYPE_REG
782 p.From.Reg = v.Args[0].Reg()
783 p.To.Type = obj.TYPE_REG
784 p.To.Reg = v.Args[1].Reg()
786 case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst:
787 p := s.Prog(v.Op.Asm())
788 p.From.Type = obj.TYPE_REG
789 p.From.Reg = v.Args[0].Reg()
790 p.To.Type = obj.TYPE_CONST
791 p.To.Offset = v.AuxInt
793 case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg:
794 // Shift in register to required size
795 p := s.Prog(v.Op.Asm())
796 p.From.Type = obj.TYPE_REG
797 p.From.Reg = v.Args[0].Reg()
799 p.To.Type = obj.TYPE_REG
801 case ssa.OpPPC64MOVDload, ssa.OpPPC64MOVWload:
803 // MOVDload and MOVWload are DS form instructions that are restricted to
804 // offsets that are a multiple of 4. If the offset is not a multple of 4,
805 // then the address of the symbol to be loaded is computed (base + offset)
806 // and used as the new base register and the offset field in the instruction
807 // can be set to zero.
809 // This same problem can happen with gostrings since the final offset is not
810 // known yet, but could be unaligned after the relocation is resolved.
811 // So gostrings are handled the same way.
813 // This allows the MOVDload and MOVWload to be generated in more cases and
814 // eliminates some offset and alignment checking in the rules file.
816 fromAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
817 ssagen.AddAux(&fromAddr, v)
821 switch fromAddr.Name {
822 case obj.NAME_EXTERN, obj.NAME_STATIC:
823 // Special case for a rule combines the bytes of gostring.
824 // The v alignment might seem OK, but we don't want to load it
825 // using an offset because relocation comes later.
826 genAddr = strings.HasPrefix(fromAddr.Sym.Name, "go.string") || v.Type.Alignment()%4 != 0 || fromAddr.Offset%4 != 0
828 genAddr = fromAddr.Offset%4 != 0
831 // Load full address into the temp register.
832 p := s.Prog(ppc64.AMOVD)
833 p.From.Type = obj.TYPE_ADDR
834 p.From.Reg = v.Args[0].Reg()
835 ssagen.AddAux(&p.From, v)
836 // Load target using temp as base register
837 // and offset zero. Setting NAME_NONE
838 // prevents any extra offsets from being
840 p.To.Type = obj.TYPE_REG
841 p.To.Reg = ppc64.REGTMP
842 fromAddr.Reg = ppc64.REGTMP
843 // Clear the offset field and other
844 // information that might be used
845 // by the assembler to add to the
846 // final offset value.
848 fromAddr.Name = obj.NAME_NONE
851 p := s.Prog(v.Op.Asm())
853 p.To.Type = obj.TYPE_REG
857 case ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
858 p := s.Prog(v.Op.Asm())
859 p.From.Type = obj.TYPE_MEM
860 p.From.Reg = v.Args[0].Reg()
861 ssagen.AddAux(&p.From, v)
862 p.To.Type = obj.TYPE_REG
865 case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload:
866 p := s.Prog(v.Op.Asm())
867 p.From.Type = obj.TYPE_MEM
868 p.From.Reg = v.Args[0].Reg()
869 p.To.Type = obj.TYPE_REG
872 case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore:
873 p := s.Prog(v.Op.Asm())
874 p.To.Type = obj.TYPE_MEM
875 p.To.Reg = v.Args[0].Reg()
876 p.From.Type = obj.TYPE_REG
877 p.From.Reg = v.Args[1].Reg()
879 case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx,
880 ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx,
881 ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx:
882 p := s.Prog(v.Op.Asm())
883 p.From.Type = obj.TYPE_MEM
884 p.From.Reg = v.Args[0].Reg()
885 p.From.Index = v.Args[1].Reg()
886 p.To.Type = obj.TYPE_REG
889 case ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero:
890 p := s.Prog(v.Op.Asm())
891 p.From.Type = obj.TYPE_REG
892 p.From.Reg = ppc64.REGZERO
893 p.To.Type = obj.TYPE_MEM
894 p.To.Reg = v.Args[0].Reg()
895 ssagen.AddAux(&p.To, v)
897 case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVDstorezero:
899 // MOVDstore and MOVDstorezero become DS form instructions that are restricted
900 // to offset values that are a multple of 4. If the offset field is not a
901 // multiple of 4, then the full address of the store target is computed (base +
902 // offset) and used as the new base register and the offset in the instruction
905 // This allows the MOVDstore and MOVDstorezero to be generated in more cases,
906 // and prevents checking of the offset value and alignment in the rules.
908 toAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
909 ssagen.AddAux(&toAddr, v)
911 if toAddr.Offset%4 != 0 {
912 p := s.Prog(ppc64.AMOVD)
913 p.From.Type = obj.TYPE_ADDR
914 p.From.Reg = v.Args[0].Reg()
915 ssagen.AddAux(&p.From, v)
916 p.To.Type = obj.TYPE_REG
917 p.To.Reg = ppc64.REGTMP
918 toAddr.Reg = ppc64.REGTMP
919 // Clear the offset field and other
920 // information that might be used
921 // by the assembler to add to the
922 // final offset value.
924 toAddr.Name = obj.NAME_NONE
927 p := s.Prog(v.Op.Asm())
929 p.From.Type = obj.TYPE_REG
930 if v.Op == ssa.OpPPC64MOVDstorezero {
931 p.From.Reg = ppc64.REGZERO
933 p.From.Reg = v.Args[1].Reg()
936 case ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore:
937 p := s.Prog(v.Op.Asm())
938 p.From.Type = obj.TYPE_REG
939 p.From.Reg = v.Args[1].Reg()
940 p.To.Type = obj.TYPE_MEM
941 p.To.Reg = v.Args[0].Reg()
942 ssagen.AddAux(&p.To, v)
944 case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx,
945 ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx,
946 ssa.OpPPC64MOVHBRstoreidx:
947 p := s.Prog(v.Op.Asm())
948 p.From.Type = obj.TYPE_REG
949 p.From.Reg = v.Args[2].Reg()
950 p.To.Index = v.Args[1].Reg()
951 p.To.Type = obj.TYPE_MEM
952 p.To.Reg = v.Args[0].Reg()
954 case ssa.OpPPC64ISEL, ssa.OpPPC64ISELB:
956 // AuxInt value indicates condition: 0=LT 1=GT 2=EQ 4=GE 5=LE 6=NE
957 // ISEL only accepts 0, 1, 2 condition values but the others can be
958 // achieved by swapping operand order.
959 // arg0 ? arg1 : arg2 with conditions LT, GT, EQ
960 // arg0 ? arg2 : arg1 for conditions GE, LE, NE
961 // ISELB is used when a boolean result is needed, returning 0 or 1
962 p := s.Prog(ppc64.AISEL)
963 p.To.Type = obj.TYPE_REG
965 // For ISELB, boolean result 0 or 1. Use R0 for 0 operand to avoid load.
966 r := obj.Addr{Type: obj.TYPE_REG, Reg: ppc64.REG_R0}
967 if v.Op == ssa.OpPPC64ISEL {
968 r.Reg = v.Args[1].Reg()
970 // AuxInt values 4,5,6 implemented with reverse operand order from 0,1,2
973 p.SetFrom3Reg(v.Args[0].Reg())
975 p.Reg = v.Args[0].Reg()
978 p.From.Type = obj.TYPE_CONST
979 p.From.Offset = v.AuxInt & 3
981 case ssa.OpPPC64LoweredQuadZero, ssa.OpPPC64LoweredQuadZeroShort:
982 // The LoweredQuad code generation
983 // generates STXV instructions on
984 // power9. The Short variation is used
985 // if no loop is generated.
987 // sizes >= 64 generate a loop as follows:
989 // Set up loop counter in CTR, used by BC
990 // XXLXOR clears VS32
991 // XXLXOR VS32,VS32,VS32
992 // MOVD len/64,REG_TMP
1002 // Bytes per iteration
1003 ctr := v.AuxInt / 64
1006 rem := v.AuxInt % 64
1008 // Only generate a loop if there is more
1009 // than 1 iteration.
1011 // Set up VS32 (V0) to hold 0s
1012 p := s.Prog(ppc64.AXXLXOR)
1013 p.From.Type = obj.TYPE_REG
1014 p.From.Reg = ppc64.REG_VS32
1015 p.To.Type = obj.TYPE_REG
1016 p.To.Reg = ppc64.REG_VS32
1017 p.Reg = ppc64.REG_VS32
1019 // Set up CTR loop counter
1020 p = s.Prog(ppc64.AMOVD)
1021 p.From.Type = obj.TYPE_CONST
1023 p.To.Type = obj.TYPE_REG
1024 p.To.Reg = ppc64.REGTMP
1026 p = s.Prog(ppc64.AMOVD)
1027 p.From.Type = obj.TYPE_REG
1028 p.From.Reg = ppc64.REGTMP
1029 p.To.Type = obj.TYPE_REG
1030 p.To.Reg = ppc64.REG_CTR
1032 // Don't generate padding for
1033 // loops with few iterations.
1035 p = s.Prog(obj.APCALIGN)
1036 p.From.Type = obj.TYPE_CONST
1040 // generate 4 STXVs to zero 64 bytes
1043 p = s.Prog(ppc64.ASTXV)
1044 p.From.Type = obj.TYPE_REG
1045 p.From.Reg = ppc64.REG_VS32
1046 p.To.Type = obj.TYPE_MEM
1047 p.To.Reg = v.Args[0].Reg()
1049 // Save the top of loop
1053 p = s.Prog(ppc64.ASTXV)
1054 p.From.Type = obj.TYPE_REG
1055 p.From.Reg = ppc64.REG_VS32
1056 p.To.Type = obj.TYPE_MEM
1057 p.To.Reg = v.Args[0].Reg()
1060 p = s.Prog(ppc64.ASTXV)
1061 p.From.Type = obj.TYPE_REG
1062 p.From.Reg = ppc64.REG_VS32
1063 p.To.Type = obj.TYPE_MEM
1064 p.To.Reg = v.Args[0].Reg()
1067 p = s.Prog(ppc64.ASTXV)
1068 p.From.Type = obj.TYPE_REG
1069 p.From.Reg = ppc64.REG_VS32
1070 p.To.Type = obj.TYPE_MEM
1071 p.To.Reg = v.Args[0].Reg()
1074 // Increment address for the
1075 // 64 bytes just zeroed.
1076 p = s.Prog(ppc64.AADD)
1077 p.Reg = v.Args[0].Reg()
1078 p.From.Type = obj.TYPE_CONST
1080 p.To.Type = obj.TYPE_REG
1081 p.To.Reg = v.Args[0].Reg()
1083 // Branch back to top of loop
1085 // BC with BO_BCTR generates bdnz
1086 p = s.Prog(ppc64.ABC)
1087 p.From.Type = obj.TYPE_CONST
1088 p.From.Offset = ppc64.BO_BCTR
1089 p.Reg = ppc64.REG_R0
1090 p.To.Type = obj.TYPE_BRANCH
1093 // When ctr == 1 the loop was not generated but
1094 // there are at least 64 bytes to clear, so add
1095 // that to the remainder to generate the code
1096 // to clear those doublewords
1101 // Clear the remainder starting at offset zero
1104 if rem >= 16 && ctr <= 1 {
1105 // If the XXLXOR hasn't already been
1106 // generated, do it here to initialize
1108 p := s.Prog(ppc64.AXXLXOR)
1109 p.From.Type = obj.TYPE_REG
1110 p.From.Reg = ppc64.REG_VS32
1111 p.To.Type = obj.TYPE_REG
1112 p.To.Reg = ppc64.REG_VS32
1113 p.Reg = ppc64.REG_VS32
1115 // Generate STXV for 32 or 64
1118 p := s.Prog(ppc64.ASTXV)
1119 p.From.Type = obj.TYPE_REG
1120 p.From.Reg = ppc64.REG_VS32
1121 p.To.Type = obj.TYPE_MEM
1122 p.To.Reg = v.Args[0].Reg()
1123 p.To.Offset = offset
1125 p = s.Prog(ppc64.ASTXV)
1126 p.From.Type = obj.TYPE_REG
1127 p.From.Reg = ppc64.REG_VS32
1128 p.To.Type = obj.TYPE_MEM
1129 p.To.Reg = v.Args[0].Reg()
1130 p.To.Offset = offset + 16
1134 // Generate 16 bytes
1136 p := s.Prog(ppc64.ASTXV)
1137 p.From.Type = obj.TYPE_REG
1138 p.From.Reg = ppc64.REG_VS32
1139 p.To.Type = obj.TYPE_MEM
1140 p.To.Reg = v.Args[0].Reg()
1141 p.To.Offset = offset
1146 // first clear as many doublewords as possible
1147 // then clear remaining sizes as available
1149 op, size := ppc64.AMOVB, int64(1)
1152 op, size = ppc64.AMOVD, 8
1154 op, size = ppc64.AMOVW, 4
1156 op, size = ppc64.AMOVH, 2
1159 p.From.Type = obj.TYPE_REG
1160 p.From.Reg = ppc64.REG_R0
1161 p.To.Type = obj.TYPE_MEM
1162 p.To.Reg = v.Args[0].Reg()
1163 p.To.Offset = offset
1168 case ssa.OpPPC64LoweredZero, ssa.OpPPC64LoweredZeroShort:
1170 // Unaligned data doesn't hurt performance
1171 // for these instructions on power8.
1173 // For sizes >= 64 generate a loop as follows:
1175 // Set up loop counter in CTR, used by BC
1176 // XXLXOR VS32,VS32,VS32
1177 // MOVD len/32,REG_TMP
1181 // STXVD2X VS32,(R0)(R20)
1182 // STXVD2X VS32,(R31)(R20)
1186 // any remainder is done as described below
1188 // for sizes < 64 bytes, first clear as many doublewords as possible,
1189 // then handle the remainder
1194 // the remainder bytes are cleared using one or more
1195 // of the following instructions with the appropriate
1196 // offsets depending which instructions are needed
1198 // MOVW R0,n1(R20) 4 bytes
1199 // MOVH R0,n2(R20) 2 bytes
1200 // MOVB R0,n3(R20) 1 byte
1202 // 7 bytes: MOVW, MOVH, MOVB
1203 // 6 bytes: MOVW, MOVH
1204 // 5 bytes: MOVW, MOVB
1205 // 3 bytes: MOVH, MOVB
1207 // each loop iteration does 32 bytes
1208 ctr := v.AuxInt / 32
1211 rem := v.AuxInt % 32
1213 // only generate a loop if there is more
1214 // than 1 iteration.
1216 // Set up VS32 (V0) to hold 0s
1217 p := s.Prog(ppc64.AXXLXOR)
1218 p.From.Type = obj.TYPE_REG
1219 p.From.Reg = ppc64.REG_VS32
1220 p.To.Type = obj.TYPE_REG
1221 p.To.Reg = ppc64.REG_VS32
1222 p.Reg = ppc64.REG_VS32
1224 // Set up CTR loop counter
1225 p = s.Prog(ppc64.AMOVD)
1226 p.From.Type = obj.TYPE_CONST
1228 p.To.Type = obj.TYPE_REG
1229 p.To.Reg = ppc64.REGTMP
1231 p = s.Prog(ppc64.AMOVD)
1232 p.From.Type = obj.TYPE_REG
1233 p.From.Reg = ppc64.REGTMP
1234 p.To.Type = obj.TYPE_REG
1235 p.To.Reg = ppc64.REG_CTR
1237 // Set up R31 to hold index value 16
1238 p = s.Prog(ppc64.AMOVD)
1239 p.From.Type = obj.TYPE_CONST
1241 p.To.Type = obj.TYPE_REG
1242 p.To.Reg = ppc64.REGTMP
1244 // Don't add padding for alignment
1245 // with few loop iterations.
1247 p = s.Prog(obj.APCALIGN)
1248 p.From.Type = obj.TYPE_CONST
1252 // generate 2 STXVD2Xs to store 16 bytes
1253 // when this is a loop then the top must be saved
1255 // This is the top of loop
1257 p = s.Prog(ppc64.ASTXVD2X)
1258 p.From.Type = obj.TYPE_REG
1259 p.From.Reg = ppc64.REG_VS32
1260 p.To.Type = obj.TYPE_MEM
1261 p.To.Reg = v.Args[0].Reg()
1262 p.To.Index = ppc64.REGZERO
1263 // Save the top of loop
1267 p = s.Prog(ppc64.ASTXVD2X)
1268 p.From.Type = obj.TYPE_REG
1269 p.From.Reg = ppc64.REG_VS32
1270 p.To.Type = obj.TYPE_MEM
1271 p.To.Reg = v.Args[0].Reg()
1272 p.To.Index = ppc64.REGTMP
1274 // Increment address for the
1275 // 4 doublewords just zeroed.
1276 p = s.Prog(ppc64.AADD)
1277 p.Reg = v.Args[0].Reg()
1278 p.From.Type = obj.TYPE_CONST
1280 p.To.Type = obj.TYPE_REG
1281 p.To.Reg = v.Args[0].Reg()
1283 // Branch back to top of loop
1285 // BC with BO_BCTR generates bdnz
1286 p = s.Prog(ppc64.ABC)
1287 p.From.Type = obj.TYPE_CONST
1288 p.From.Offset = ppc64.BO_BCTR
1289 p.Reg = ppc64.REG_R0
1290 p.To.Type = obj.TYPE_BRANCH
1294 // when ctr == 1 the loop was not generated but
1295 // there are at least 32 bytes to clear, so add
1296 // that to the remainder to generate the code
1297 // to clear those doublewords
1302 // clear the remainder starting at offset zero
1305 // first clear as many doublewords as possible
1306 // then clear remaining sizes as available
1308 op, size := ppc64.AMOVB, int64(1)
1311 op, size = ppc64.AMOVD, 8
1313 op, size = ppc64.AMOVW, 4
1315 op, size = ppc64.AMOVH, 2
1318 p.From.Type = obj.TYPE_REG
1319 p.From.Reg = ppc64.REG_R0
1320 p.To.Type = obj.TYPE_MEM
1321 p.To.Reg = v.Args[0].Reg()
1322 p.To.Offset = offset
1327 case ssa.OpPPC64LoweredMove, ssa.OpPPC64LoweredMoveShort:
1329 bytesPerLoop := int64(32)
1330 // This will be used when moving more
1331 // than 8 bytes. Moves start with
1332 // as many 8 byte moves as possible, then
1333 // 4, 2, or 1 byte(s) as remaining. This will
1334 // work and be efficient for power8 or later.
1335 // If there are 64 or more bytes, then a
1336 // loop is generated to move 32 bytes and
1337 // update the src and dst addresses on each
1338 // iteration. When < 64 bytes, the appropriate
1339 // number of moves are generated based on the
1341 // When moving >= 64 bytes a loop is used
1342 // MOVD len/32,REG_TMP
1346 // LXVD2X (R0)(R21),VS32
1347 // LXVD2X (R31)(R21),VS33
1349 // STXVD2X VS32,(R0)(R20)
1350 // STXVD2X VS33,(R31)(R20)
1353 // Bytes not moved by this loop are moved
1354 // with a combination of the following instructions,
1355 // starting with the largest sizes and generating as
1356 // many as needed, using the appropriate offset value.
1366 // Each loop iteration moves 32 bytes
1367 ctr := v.AuxInt / bytesPerLoop
1369 // Remainder after the loop
1370 rem := v.AuxInt % bytesPerLoop
1372 dstReg := v.Args[0].Reg()
1373 srcReg := v.Args[1].Reg()
1375 // The set of registers used here, must match the clobbered reg list
1381 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1384 p := s.Prog(ppc64.AMOVD)
1385 p.From.Type = obj.TYPE_CONST
1387 p.To.Type = obj.TYPE_REG
1388 p.To.Reg = ppc64.REGTMP
1390 p = s.Prog(ppc64.AMOVD)
1391 p.From.Type = obj.TYPE_REG
1392 p.From.Reg = ppc64.REGTMP
1393 p.To.Type = obj.TYPE_REG
1394 p.To.Reg = ppc64.REG_CTR
1396 // Use REGTMP as index reg
1397 p = s.Prog(ppc64.AMOVD)
1398 p.From.Type = obj.TYPE_CONST
1400 p.To.Type = obj.TYPE_REG
1401 p.To.Reg = ppc64.REGTMP
1403 // Don't adding padding for
1404 // alignment with small iteration
1407 p = s.Prog(obj.APCALIGN)
1408 p.From.Type = obj.TYPE_CONST
1412 // Generate 16 byte loads and stores.
1413 // Use temp register for index (16)
1414 // on the second one.
1416 p = s.Prog(ppc64.ALXVD2X)
1417 p.From.Type = obj.TYPE_MEM
1419 p.From.Index = ppc64.REGZERO
1420 p.To.Type = obj.TYPE_REG
1421 p.To.Reg = ppc64.REG_VS32
1425 p = s.Prog(ppc64.ALXVD2X)
1426 p.From.Type = obj.TYPE_MEM
1428 p.From.Index = ppc64.REGTMP
1429 p.To.Type = obj.TYPE_REG
1430 p.To.Reg = ppc64.REG_VS33
1432 // increment the src reg for next iteration
1433 p = s.Prog(ppc64.AADD)
1435 p.From.Type = obj.TYPE_CONST
1436 p.From.Offset = bytesPerLoop
1437 p.To.Type = obj.TYPE_REG
1440 // generate 16 byte stores
1441 p = s.Prog(ppc64.ASTXVD2X)
1442 p.From.Type = obj.TYPE_REG
1443 p.From.Reg = ppc64.REG_VS32
1444 p.To.Type = obj.TYPE_MEM
1446 p.To.Index = ppc64.REGZERO
1448 p = s.Prog(ppc64.ASTXVD2X)
1449 p.From.Type = obj.TYPE_REG
1450 p.From.Reg = ppc64.REG_VS33
1451 p.To.Type = obj.TYPE_MEM
1453 p.To.Index = ppc64.REGTMP
1455 // increment the dst reg for next iteration
1456 p = s.Prog(ppc64.AADD)
1458 p.From.Type = obj.TYPE_CONST
1459 p.From.Offset = bytesPerLoop
1460 p.To.Type = obj.TYPE_REG
1463 // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1465 p = s.Prog(ppc64.ABC)
1466 p.From.Type = obj.TYPE_CONST
1467 p.From.Offset = ppc64.BO_BCTR
1468 p.Reg = ppc64.REG_R0
1469 p.To.Type = obj.TYPE_BRANCH
1472 // srcReg and dstReg were incremented in the loop, so
1473 // later instructions start with offset 0.
1477 // No loop was generated for one iteration, so
1478 // add 32 bytes to the remainder to move those bytes.
1484 // Generate 16 byte loads and stores.
1485 // Use temp register for index (value 16)
1486 // on the second one.
1487 p := s.Prog(ppc64.ALXVD2X)
1488 p.From.Type = obj.TYPE_MEM
1490 p.From.Index = ppc64.REGZERO
1491 p.To.Type = obj.TYPE_REG
1492 p.To.Reg = ppc64.REG_VS32
1494 p = s.Prog(ppc64.ASTXVD2X)
1495 p.From.Type = obj.TYPE_REG
1496 p.From.Reg = ppc64.REG_VS32
1497 p.To.Type = obj.TYPE_MEM
1499 p.To.Index = ppc64.REGZERO
1505 // Use REGTMP as index reg
1506 p := s.Prog(ppc64.AMOVD)
1507 p.From.Type = obj.TYPE_CONST
1509 p.To.Type = obj.TYPE_REG
1510 p.To.Reg = ppc64.REGTMP
1512 p = s.Prog(ppc64.ALXVD2X)
1513 p.From.Type = obj.TYPE_MEM
1515 p.From.Index = ppc64.REGTMP
1516 p.To.Type = obj.TYPE_REG
1517 p.To.Reg = ppc64.REG_VS32
1519 p = s.Prog(ppc64.ASTXVD2X)
1520 p.From.Type = obj.TYPE_REG
1521 p.From.Reg = ppc64.REG_VS32
1522 p.To.Type = obj.TYPE_MEM
1524 p.To.Index = ppc64.REGTMP
1531 // Generate all the remaining load and store pairs, starting with
1532 // as many 8 byte moves as possible, then 4, 2, 1.
1534 op, size := ppc64.AMOVB, int64(1)
1537 op, size = ppc64.AMOVD, 8
1539 op, size = ppc64.AMOVWZ, 4
1541 op, size = ppc64.AMOVH, 2
1545 p.To.Type = obj.TYPE_REG
1546 p.To.Reg = ppc64.REGTMP
1547 p.From.Type = obj.TYPE_MEM
1549 p.From.Offset = offset
1553 p.From.Type = obj.TYPE_REG
1554 p.From.Reg = ppc64.REGTMP
1555 p.To.Type = obj.TYPE_MEM
1557 p.To.Offset = offset
1562 case ssa.OpPPC64LoweredQuadMove, ssa.OpPPC64LoweredQuadMoveShort:
1563 bytesPerLoop := int64(64)
1564 // This is used when moving more
1565 // than 8 bytes on power9. Moves start with
1566 // as many 8 byte moves as possible, then
1567 // 4, 2, or 1 byte(s) as remaining. This will
1568 // work and be efficient for power8 or later.
1569 // If there are 64 or more bytes, then a
1570 // loop is generated to move 32 bytes and
1571 // update the src and dst addresses on each
1572 // iteration. When < 64 bytes, the appropriate
1573 // number of moves are generated based on the
1575 // When moving >= 64 bytes a loop is used
1576 // MOVD len/32,REG_TMP
1583 // STXV VS33,16(R20)
1586 // Bytes not moved by this loop are moved
1587 // with a combination of the following instructions,
1588 // starting with the largest sizes and generating as
1589 // many as needed, using the appropriate offset value.
1599 // Each loop iteration moves 32 bytes
1600 ctr := v.AuxInt / bytesPerLoop
1602 // Remainder after the loop
1603 rem := v.AuxInt % bytesPerLoop
1605 dstReg := v.Args[0].Reg()
1606 srcReg := v.Args[1].Reg()
1613 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1616 p := s.Prog(ppc64.AMOVD)
1617 p.From.Type = obj.TYPE_CONST
1619 p.To.Type = obj.TYPE_REG
1620 p.To.Reg = ppc64.REGTMP
1622 p = s.Prog(ppc64.AMOVD)
1623 p.From.Type = obj.TYPE_REG
1624 p.From.Reg = ppc64.REGTMP
1625 p.To.Type = obj.TYPE_REG
1626 p.To.Reg = ppc64.REG_CTR
1628 p = s.Prog(obj.APCALIGN)
1629 p.From.Type = obj.TYPE_CONST
1632 // Generate 16 byte loads and stores.
1633 p = s.Prog(ppc64.ALXV)
1634 p.From.Type = obj.TYPE_MEM
1636 p.From.Offset = offset
1637 p.To.Type = obj.TYPE_REG
1638 p.To.Reg = ppc64.REG_VS32
1642 p = s.Prog(ppc64.ALXV)
1643 p.From.Type = obj.TYPE_MEM
1645 p.From.Offset = offset + 16
1646 p.To.Type = obj.TYPE_REG
1647 p.To.Reg = ppc64.REG_VS33
1649 // generate 16 byte stores
1650 p = s.Prog(ppc64.ASTXV)
1651 p.From.Type = obj.TYPE_REG
1652 p.From.Reg = ppc64.REG_VS32
1653 p.To.Type = obj.TYPE_MEM
1655 p.To.Offset = offset
1657 p = s.Prog(ppc64.ASTXV)
1658 p.From.Type = obj.TYPE_REG
1659 p.From.Reg = ppc64.REG_VS33
1660 p.To.Type = obj.TYPE_MEM
1662 p.To.Offset = offset + 16
1664 // Generate 16 byte loads and stores.
1665 p = s.Prog(ppc64.ALXV)
1666 p.From.Type = obj.TYPE_MEM
1668 p.From.Offset = offset + 32
1669 p.To.Type = obj.TYPE_REG
1670 p.To.Reg = ppc64.REG_VS32
1672 p = s.Prog(ppc64.ALXV)
1673 p.From.Type = obj.TYPE_MEM
1675 p.From.Offset = offset + 48
1676 p.To.Type = obj.TYPE_REG
1677 p.To.Reg = ppc64.REG_VS33
1679 // generate 16 byte stores
1680 p = s.Prog(ppc64.ASTXV)
1681 p.From.Type = obj.TYPE_REG
1682 p.From.Reg = ppc64.REG_VS32
1683 p.To.Type = obj.TYPE_MEM
1685 p.To.Offset = offset + 32
1687 p = s.Prog(ppc64.ASTXV)
1688 p.From.Type = obj.TYPE_REG
1689 p.From.Reg = ppc64.REG_VS33
1690 p.To.Type = obj.TYPE_MEM
1692 p.To.Offset = offset + 48
1694 // increment the src reg for next iteration
1695 p = s.Prog(ppc64.AADD)
1697 p.From.Type = obj.TYPE_CONST
1698 p.From.Offset = bytesPerLoop
1699 p.To.Type = obj.TYPE_REG
1702 // increment the dst reg for next iteration
1703 p = s.Prog(ppc64.AADD)
1705 p.From.Type = obj.TYPE_CONST
1706 p.From.Offset = bytesPerLoop
1707 p.To.Type = obj.TYPE_REG
1710 // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1712 p = s.Prog(ppc64.ABC)
1713 p.From.Type = obj.TYPE_CONST
1714 p.From.Offset = ppc64.BO_BCTR
1715 p.Reg = ppc64.REG_R0
1716 p.To.Type = obj.TYPE_BRANCH
1719 // srcReg and dstReg were incremented in the loop, so
1720 // later instructions start with offset 0.
1724 // No loop was generated for one iteration, so
1725 // add 32 bytes to the remainder to move those bytes.
1730 p := s.Prog(ppc64.ALXV)
1731 p.From.Type = obj.TYPE_MEM
1733 p.To.Type = obj.TYPE_REG
1734 p.To.Reg = ppc64.REG_VS32
1736 p = s.Prog(ppc64.ALXV)
1737 p.From.Type = obj.TYPE_MEM
1740 p.To.Type = obj.TYPE_REG
1741 p.To.Reg = ppc64.REG_VS33
1743 p = s.Prog(ppc64.ASTXV)
1744 p.From.Type = obj.TYPE_REG
1745 p.From.Reg = ppc64.REG_VS32
1746 p.To.Type = obj.TYPE_MEM
1749 p = s.Prog(ppc64.ASTXV)
1750 p.From.Type = obj.TYPE_REG
1751 p.From.Reg = ppc64.REG_VS33
1752 p.To.Type = obj.TYPE_MEM
1761 // Generate 16 byte loads and stores.
1762 p := s.Prog(ppc64.ALXV)
1763 p.From.Type = obj.TYPE_MEM
1765 p.From.Offset = offset
1766 p.To.Type = obj.TYPE_REG
1767 p.To.Reg = ppc64.REG_VS32
1769 p = s.Prog(ppc64.ASTXV)
1770 p.From.Type = obj.TYPE_REG
1771 p.From.Reg = ppc64.REG_VS32
1772 p.To.Type = obj.TYPE_MEM
1774 p.To.Offset = offset
1780 p := s.Prog(ppc64.ALXV)
1781 p.From.Type = obj.TYPE_MEM
1783 p.From.Offset = offset
1784 p.To.Type = obj.TYPE_REG
1785 p.To.Reg = ppc64.REG_VS32
1787 p = s.Prog(ppc64.ASTXV)
1788 p.From.Type = obj.TYPE_REG
1789 p.From.Reg = ppc64.REG_VS32
1790 p.To.Type = obj.TYPE_MEM
1792 p.To.Offset = offset
1798 // Generate all the remaining load and store pairs, starting with
1799 // as many 8 byte moves as possible, then 4, 2, 1.
1801 op, size := ppc64.AMOVB, int64(1)
1804 op, size = ppc64.AMOVD, 8
1806 op, size = ppc64.AMOVWZ, 4
1808 op, size = ppc64.AMOVH, 2
1812 p.To.Type = obj.TYPE_REG
1813 p.To.Reg = ppc64.REGTMP
1814 p.From.Type = obj.TYPE_MEM
1816 p.From.Offset = offset
1820 p.From.Type = obj.TYPE_REG
1821 p.From.Reg = ppc64.REGTMP
1822 p.To.Type = obj.TYPE_MEM
1824 p.To.Offset = offset
1829 case ssa.OpPPC64CALLstatic:
1832 case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter:
1833 p := s.Prog(ppc64.AMOVD)
1834 p.From.Type = obj.TYPE_REG
1835 p.From.Reg = v.Args[0].Reg()
1836 p.To.Type = obj.TYPE_REG
1837 p.To.Reg = ppc64.REG_LR
1839 if v.Args[0].Reg() != ppc64.REG_R12 {
1840 v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg)
1844 pp.To.Reg = ppc64.REG_LR
1846 // Insert a hint this is not a subroutine return.
1849 if base.Ctxt.Flag_shared {
1850 // When compiling Go into PIC, the function we just
1851 // called via pointer might have been implemented in
1852 // a separate module and so overwritten the TOC
1853 // pointer in R2; reload it.
1854 q := s.Prog(ppc64.AMOVD)
1855 q.From.Type = obj.TYPE_MEM
1857 q.From.Reg = ppc64.REGSP
1858 q.To.Type = obj.TYPE_REG
1859 q.To.Reg = ppc64.REG_R2
1862 case ssa.OpPPC64LoweredWB:
1863 p := s.Prog(obj.ACALL)
1864 p.To.Type = obj.TYPE_MEM
1865 p.To.Name = obj.NAME_EXTERN
1866 p.To.Sym = v.Aux.(*obj.LSym)
1868 case ssa.OpPPC64LoweredPanicBoundsA, ssa.OpPPC64LoweredPanicBoundsB, ssa.OpPPC64LoweredPanicBoundsC:
1869 p := s.Prog(obj.ACALL)
1870 p.To.Type = obj.TYPE_MEM
1871 p.To.Name = obj.NAME_EXTERN
1872 p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt]
1873 s.UseArgs(16) // space used in callee args area by assembly stubs
1875 case ssa.OpPPC64LoweredNilCheck:
1876 if buildcfg.GOOS == "aix" {
1880 // NOP (so the BNE has somewhere to land)
1883 p := s.Prog(ppc64.ACMP)
1884 p.From.Type = obj.TYPE_REG
1885 p.From.Reg = v.Args[0].Reg()
1886 p.To.Type = obj.TYPE_REG
1887 p.To.Reg = ppc64.REG_R0
1890 p2 := s.Prog(ppc64.ABNE)
1891 p2.To.Type = obj.TYPE_BRANCH
1894 // Write at 0 is forbidden and will trigger a SIGSEGV
1895 p = s.Prog(ppc64.AMOVW)
1896 p.From.Type = obj.TYPE_REG
1897 p.From.Reg = ppc64.REG_R0
1898 p.To.Type = obj.TYPE_MEM
1899 p.To.Reg = ppc64.REG_R0
1901 // NOP (so the BNE has somewhere to land)
1902 nop := s.Prog(obj.ANOP)
1903 p2.To.SetTarget(nop)
1906 // Issue a load which will fault if arg is nil.
1907 p := s.Prog(ppc64.AMOVBZ)
1908 p.From.Type = obj.TYPE_MEM
1909 p.From.Reg = v.Args[0].Reg()
1910 ssagen.AddAux(&p.From, v)
1911 p.To.Type = obj.TYPE_REG
1912 p.To.Reg = ppc64.REGTMP
1914 if logopt.Enabled() {
1915 logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
1917 if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
1918 base.WarnfAt(v.Pos, "generated nil check")
1921 // These should be resolved by rules and not make it here.
1922 case ssa.OpPPC64Equal, ssa.OpPPC64NotEqual, ssa.OpPPC64LessThan, ssa.OpPPC64FLessThan,
1923 ssa.OpPPC64LessEqual, ssa.OpPPC64GreaterThan, ssa.OpPPC64FGreaterThan, ssa.OpPPC64GreaterEqual,
1924 ssa.OpPPC64FLessEqual, ssa.OpPPC64FGreaterEqual:
1925 v.Fatalf("Pseudo-op should not make it to codegen: %s ###\n", v.LongString())
1926 case ssa.OpPPC64InvertFlags:
1927 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
1928 case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT:
1929 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
1930 case ssa.OpClobber, ssa.OpClobberReg:
1931 // TODO: implement for clobberdead experiment. Nop is ok for now.
1933 v.Fatalf("genValue not implemented: %s", v.LongString())
1937 var blockJump = [...]struct {
1939 asmeq, invasmun bool
1941 ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false},
1942 ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false},
1944 ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false},
1945 ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false},
1946 ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false},
1947 ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false},
1949 // TODO: need to work FP comparisons into block jumps
1950 ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false},
1951 ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN
1952 ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN
1953 ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false},
1956 func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
1958 case ssa.BlockDefer:
1959 // defer returns in R3:
1960 // 0 if we should continue executing
1961 // 1 if we should jump to deferreturn call
1962 p := s.Prog(ppc64.ACMP)
1963 p.From.Type = obj.TYPE_REG
1964 p.From.Reg = ppc64.REG_R3
1965 p.To.Type = obj.TYPE_REG
1966 p.To.Reg = ppc64.REG_R0
1968 p = s.Prog(ppc64.ABNE)
1969 p.To.Type = obj.TYPE_BRANCH
1970 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[1].Block()})
1971 if b.Succs[0].Block() != next {
1972 p := s.Prog(obj.AJMP)
1973 p.To.Type = obj.TYPE_BRANCH
1974 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
1977 case ssa.BlockPlain:
1978 if b.Succs[0].Block() != next {
1979 p := s.Prog(obj.AJMP)
1980 p.To.Type = obj.TYPE_BRANCH
1981 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
1986 case ssa.BlockRetJmp:
1987 p := s.Prog(obj.AJMP)
1988 p.To.Type = obj.TYPE_MEM
1989 p.To.Name = obj.NAME_EXTERN
1990 p.To.Sym = b.Aux.(*obj.LSym)
1992 case ssa.BlockPPC64EQ, ssa.BlockPPC64NE,
1993 ssa.BlockPPC64LT, ssa.BlockPPC64GE,
1994 ssa.BlockPPC64LE, ssa.BlockPPC64GT,
1995 ssa.BlockPPC64FLT, ssa.BlockPPC64FGE,
1996 ssa.BlockPPC64FLE, ssa.BlockPPC64FGT:
1997 jmp := blockJump[b.Kind]
1999 case b.Succs[0].Block():
2000 s.Br(jmp.invasm, b.Succs[1].Block())
2002 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
2003 s.Br(ppc64.ABVS, b.Succs[1].Block())
2005 case b.Succs[1].Block():
2006 s.Br(jmp.asm, b.Succs[0].Block())
2008 s.Br(ppc64.ABEQ, b.Succs[0].Block())
2011 if b.Likely != ssa.BranchUnlikely {
2012 s.Br(jmp.asm, b.Succs[0].Block())
2014 s.Br(ppc64.ABEQ, b.Succs[0].Block())
2016 s.Br(obj.AJMP, b.Succs[1].Block())
2018 s.Br(jmp.invasm, b.Succs[1].Block())
2020 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
2021 s.Br(ppc64.ABVS, b.Succs[1].Block())
2023 s.Br(obj.AJMP, b.Succs[0].Block())
2027 b.Fatalf("branch not implemented: %s", b.LongString())