1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
8 "cmd/compile/internal/base"
9 "cmd/compile/internal/ir"
10 "cmd/compile/internal/logopt"
11 "cmd/compile/internal/ssa"
12 "cmd/compile/internal/ssagen"
13 "cmd/compile/internal/types"
15 "cmd/internal/obj/ppc64"
21 // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
22 func ssaMarkMoves(s *ssagen.State, b *ssa.Block) {
23 // flive := b.FlagsLiveAtEnd
24 // if b.Control != nil && b.Control.Type.IsFlags() {
27 // for i := len(b.Values) - 1; i >= 0; i-- {
29 // if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) {
30 // // The "mark" is any non-nil Aux value.
33 // if v.Type.IsFlags() {
36 // for _, a := range v.Args {
37 // if a.Type.IsFlags() {
44 // loadByType returns the load instruction of the given type.
45 func loadByType(t *types.Type) obj.As {
77 panic("bad load type")
80 // storeByType returns the store instruction of the given type.
81 func storeByType(t *types.Type) obj.As {
101 panic("bad store type")
104 func ssaGenValue(s *ssagen.State, v *ssa.Value) {
127 case ssa.OpPPC64LoweredMuluhilo:
128 // MULHDU Rarg1, Rarg0, Reg0
129 // MULLD Rarg1, Rarg0, Reg1
130 r0 := v.Args[0].Reg()
131 r1 := v.Args[1].Reg()
132 p := s.Prog(ppc64.AMULHDU)
133 p.From.Type = obj.TYPE_REG
136 p.To.Type = obj.TYPE_REG
138 p1 := s.Prog(ppc64.AMULLD)
139 p1.From.Type = obj.TYPE_REG
142 p1.To.Type = obj.TYPE_REG
145 case ssa.OpPPC64LoweredAdd64Carry:
146 // ADDC Rarg2, -1, Rtmp
147 // ADDE Rarg1, Rarg0, Reg0
149 r0 := v.Args[0].Reg()
150 r1 := v.Args[1].Reg()
151 r2 := v.Args[2].Reg()
152 p := s.Prog(ppc64.AADDC)
153 p.From.Type = obj.TYPE_CONST
156 p.To.Type = obj.TYPE_REG
157 p.To.Reg = ppc64.REGTMP
158 p1 := s.Prog(ppc64.AADDE)
159 p1.From.Type = obj.TYPE_REG
162 p1.To.Type = obj.TYPE_REG
164 p2 := s.Prog(ppc64.AADDZE)
165 p2.From.Type = obj.TYPE_REG
166 p2.From.Reg = ppc64.REGZERO
167 p2.To.Type = obj.TYPE_REG
170 case ssa.OpPPC64LoweredAtomicAnd8,
171 ssa.OpPPC64LoweredAtomicAnd32,
172 ssa.OpPPC64LoweredAtomicOr8,
173 ssa.OpPPC64LoweredAtomicOr32:
175 // LBAR/LWAR (Rarg0), Rtmp
176 // AND/OR Rarg1, Rtmp
177 // STBCCC/STWCCC Rtmp, (Rarg0)
181 if v.Op == ssa.OpPPC64LoweredAtomicAnd32 || v.Op == ssa.OpPPC64LoweredAtomicOr32 {
185 r0 := v.Args[0].Reg()
186 r1 := v.Args[1].Reg()
187 // LWSYNC - Assuming shared data not write-through-required nor
188 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
189 plwsync := s.Prog(ppc64.ALWSYNC)
190 plwsync.To.Type = obj.TYPE_NONE
193 p.From.Type = obj.TYPE_MEM
195 p.To.Type = obj.TYPE_REG
196 p.To.Reg = ppc64.REGTMP
198 p1 := s.Prog(v.Op.Asm())
199 p1.From.Type = obj.TYPE_REG
201 p1.To.Type = obj.TYPE_REG
202 p1.To.Reg = ppc64.REGTMP
205 p2.From.Type = obj.TYPE_REG
206 p2.From.Reg = ppc64.REGTMP
207 p2.To.Type = obj.TYPE_MEM
209 p2.RegTo2 = ppc64.REGTMP
211 p3 := s.Prog(ppc64.ABNE)
212 p3.To.Type = obj.TYPE_BRANCH
215 case ssa.OpPPC64LoweredAtomicAdd32,
216 ssa.OpPPC64LoweredAtomicAdd64:
218 // LDAR/LWAR (Rarg0), Rout
220 // STDCCC/STWCCC Rout, (Rarg0)
222 // MOVW Rout,Rout (if Add32)
225 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
229 r0 := v.Args[0].Reg()
230 r1 := v.Args[1].Reg()
232 // LWSYNC - Assuming shared data not write-through-required nor
233 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
234 plwsync := s.Prog(ppc64.ALWSYNC)
235 plwsync.To.Type = obj.TYPE_NONE
238 p.From.Type = obj.TYPE_MEM
240 p.To.Type = obj.TYPE_REG
243 p1 := s.Prog(ppc64.AADD)
244 p1.From.Type = obj.TYPE_REG
247 p1.To.Type = obj.TYPE_REG
250 p3.From.Type = obj.TYPE_REG
252 p3.To.Type = obj.TYPE_MEM
255 p4 := s.Prog(ppc64.ABNE)
256 p4.To.Type = obj.TYPE_BRANCH
259 // Ensure a 32 bit result
260 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
261 p5 := s.Prog(ppc64.AMOVWZ)
262 p5.To.Type = obj.TYPE_REG
264 p5.From.Type = obj.TYPE_REG
268 case ssa.OpPPC64LoweredAtomicExchange32,
269 ssa.OpPPC64LoweredAtomicExchange64:
271 // LDAR/LWAR (Rarg0), Rout
272 // STDCCC/STWCCC Rout, (Rarg0)
277 if v.Op == ssa.OpPPC64LoweredAtomicExchange32 {
281 r0 := v.Args[0].Reg()
282 r1 := v.Args[1].Reg()
284 // LWSYNC - Assuming shared data not write-through-required nor
285 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
286 plwsync := s.Prog(ppc64.ALWSYNC)
287 plwsync.To.Type = obj.TYPE_NONE
290 p.From.Type = obj.TYPE_MEM
292 p.To.Type = obj.TYPE_REG
296 p1.From.Type = obj.TYPE_REG
298 p1.To.Type = obj.TYPE_MEM
301 p2 := s.Prog(ppc64.ABNE)
302 p2.To.Type = obj.TYPE_BRANCH
305 pisync := s.Prog(ppc64.AISYNC)
306 pisync.To.Type = obj.TYPE_NONE
308 case ssa.OpPPC64LoweredAtomicLoad8,
309 ssa.OpPPC64LoweredAtomicLoad32,
310 ssa.OpPPC64LoweredAtomicLoad64,
311 ssa.OpPPC64LoweredAtomicLoadPtr:
313 // MOVB/MOVD/MOVW (Rarg0), Rout
320 case ssa.OpPPC64LoweredAtomicLoad8:
322 case ssa.OpPPC64LoweredAtomicLoad32:
326 arg0 := v.Args[0].Reg()
328 // SYNC when AuxInt == 1; otherwise, load-acquire
330 psync := s.Prog(ppc64.ASYNC)
331 psync.To.Type = obj.TYPE_NONE
335 p.From.Type = obj.TYPE_MEM
337 p.To.Type = obj.TYPE_REG
341 p1.From.Type = obj.TYPE_REG
343 p1.To.Type = obj.TYPE_REG
346 p2 := s.Prog(ppc64.ABNE)
347 p2.To.Type = obj.TYPE_BRANCH
349 pisync := s.Prog(ppc64.AISYNC)
350 pisync.To.Type = obj.TYPE_NONE
351 p2.To.SetTarget(pisync)
353 case ssa.OpPPC64LoweredAtomicStore8,
354 ssa.OpPPC64LoweredAtomicStore32,
355 ssa.OpPPC64LoweredAtomicStore64:
357 // MOVB/MOVW/MOVD arg1,(arg0)
360 case ssa.OpPPC64LoweredAtomicStore8:
362 case ssa.OpPPC64LoweredAtomicStore32:
365 arg0 := v.Args[0].Reg()
366 arg1 := v.Args[1].Reg()
367 // If AuxInt == 0, LWSYNC (Store-Release), else SYNC
369 syncOp := ppc64.ASYNC
371 syncOp = ppc64.ALWSYNC
373 psync := s.Prog(syncOp)
374 psync.To.Type = obj.TYPE_NONE
377 p.To.Type = obj.TYPE_MEM
379 p.From.Type = obj.TYPE_REG
382 case ssa.OpPPC64LoweredAtomicCas64,
383 ssa.OpPPC64LoweredAtomicCas32:
386 // LDAR (Rarg0), MutexHint, Rtmp
389 // STDCCC Rarg2, (Rarg0)
391 // LWSYNC // Only for sequential consistency; not required in CasRel.
400 if v.Op == ssa.OpPPC64LoweredAtomicCas32 {
405 r0 := v.Args[0].Reg()
406 r1 := v.Args[1].Reg()
407 r2 := v.Args[2].Reg()
409 // LWSYNC - Assuming shared data not write-through-required nor
410 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
411 plwsync1 := s.Prog(ppc64.ALWSYNC)
412 plwsync1.To.Type = obj.TYPE_NONE
415 p.From.Type = obj.TYPE_MEM
417 p.To.Type = obj.TYPE_REG
418 p.To.Reg = ppc64.REGTMP
419 // If it is a Compare-and-Swap-Release operation, set the EH field with
422 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: 0})
426 p1.From.Type = obj.TYPE_REG
428 p1.To.Reg = ppc64.REGTMP
429 p1.To.Type = obj.TYPE_REG
431 p2 := s.Prog(ppc64.ABNE)
432 p2.To.Type = obj.TYPE_BRANCH
435 p3.From.Type = obj.TYPE_REG
437 p3.To.Type = obj.TYPE_MEM
440 p4 := s.Prog(ppc64.ABNE)
441 p4.To.Type = obj.TYPE_BRANCH
443 // LWSYNC - Assuming shared data not write-through-required nor
444 // caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b.
445 // If the operation is a CAS-Release, then synchronization is not necessary.
447 plwsync2 := s.Prog(ppc64.ALWSYNC)
448 plwsync2.To.Type = obj.TYPE_NONE
451 p5 := s.Prog(ppc64.AMOVD)
452 p5.From.Type = obj.TYPE_CONST
454 p5.To.Type = obj.TYPE_REG
457 p6 := s.Prog(obj.AJMP)
458 p6.To.Type = obj.TYPE_BRANCH
460 p7 := s.Prog(ppc64.AMOVD)
461 p7.From.Type = obj.TYPE_CONST
463 p7.To.Type = obj.TYPE_REG
467 p8 := s.Prog(obj.ANOP)
470 case ssa.OpPPC64LoweredGetClosurePtr:
471 // Closure pointer is R11 (already)
472 ssagen.CheckLoweredGetClosurePtr(v)
474 case ssa.OpPPC64LoweredGetCallerSP:
475 // caller's SP is FixedFrameSize below the address of the first arg
476 p := s.Prog(ppc64.AMOVD)
477 p.From.Type = obj.TYPE_ADDR
478 p.From.Offset = -base.Ctxt.FixedFrameSize()
479 p.From.Name = obj.NAME_PARAM
480 p.To.Type = obj.TYPE_REG
483 case ssa.OpPPC64LoweredGetCallerPC:
484 p := s.Prog(obj.AGETCALLERPC)
485 p.To.Type = obj.TYPE_REG
488 case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F:
489 // input is already rounded
492 loadOp := loadByType(v.Type)
494 ssagen.AddrAuto(&p.From, v.Args[0])
495 p.To.Type = obj.TYPE_REG
499 storeOp := storeByType(v.Type)
501 p.From.Type = obj.TYPE_REG
502 p.From.Reg = v.Args[0].Reg()
503 ssagen.AddrAuto(&p.To, v)
505 case ssa.OpPPC64DIVD:
515 r0 := v.Args[0].Reg()
516 r1 := v.Args[1].Reg()
518 p := s.Prog(ppc64.ACMP)
519 p.From.Type = obj.TYPE_REG
521 p.To.Type = obj.TYPE_CONST
524 pbahead := s.Prog(ppc64.ABEQ)
525 pbahead.To.Type = obj.TYPE_BRANCH
527 p = s.Prog(v.Op.Asm())
528 p.From.Type = obj.TYPE_REG
531 p.To.Type = obj.TYPE_REG
534 pbover := s.Prog(obj.AJMP)
535 pbover.To.Type = obj.TYPE_BRANCH
537 p = s.Prog(ppc64.ANEG)
538 p.To.Type = obj.TYPE_REG
540 p.From.Type = obj.TYPE_REG
542 pbahead.To.SetTarget(p)
545 pbover.To.SetTarget(p)
547 case ssa.OpPPC64DIVW:
548 // word-width version of above
550 r0 := v.Args[0].Reg()
551 r1 := v.Args[1].Reg()
553 p := s.Prog(ppc64.ACMPW)
554 p.From.Type = obj.TYPE_REG
556 p.To.Type = obj.TYPE_CONST
559 pbahead := s.Prog(ppc64.ABEQ)
560 pbahead.To.Type = obj.TYPE_BRANCH
562 p = s.Prog(v.Op.Asm())
563 p.From.Type = obj.TYPE_REG
566 p.To.Type = obj.TYPE_REG
569 pbover := s.Prog(obj.AJMP)
570 pbover.To.Type = obj.TYPE_BRANCH
572 p = s.Prog(ppc64.ANEG)
573 p.To.Type = obj.TYPE_REG
575 p.From.Type = obj.TYPE_REG
577 pbahead.To.SetTarget(p)
580 pbover.To.SetTarget(p)
582 case ssa.OpPPC64CLRLSLWI:
584 r1 := v.Args[0].Reg()
586 p := s.Prog(v.Op.Asm())
587 // clrlslwi ra,rs,mb,sh will become rlwinm ra,rs,sh,mb-sh,31-sh as described in ISA
588 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
589 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)})
591 p.To.Type = obj.TYPE_REG
594 case ssa.OpPPC64CLRLSLDI:
596 r1 := v.Args[0].Reg()
598 p := s.Prog(v.Op.Asm())
599 // clrlsldi ra,rs,mb,sh will become rldic ra,rs,sh,mb-sh
600 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
601 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)})
603 p.To.Type = obj.TYPE_REG
606 // Mask has been set as sh
607 case ssa.OpPPC64RLDICL:
609 r1 := v.Args[0].Reg()
611 p := s.Prog(v.Op.Asm())
612 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)}
613 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)})
615 p.To.Type = obj.TYPE_REG
618 case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS,
619 ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU,
620 ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW,
621 ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
622 ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
623 ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
624 ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV,
625 ssa.OpPPC64MODUD, ssa.OpPPC64MODSD, ssa.OpPPC64MODUW, ssa.OpPPC64MODSW:
627 r1 := v.Args[0].Reg()
628 r2 := v.Args[1].Reg()
629 p := s.Prog(v.Op.Asm())
630 p.From.Type = obj.TYPE_REG
633 p.To.Type = obj.TYPE_REG
636 case ssa.OpPPC64ANDCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC:
637 r1 := v.Args[0].Reg()
638 r2 := v.Args[1].Reg()
639 p := s.Prog(v.Op.Asm())
640 p.From.Type = obj.TYPE_REG
643 p.To.Type = obj.TYPE_REG
644 p.To.Reg = ppc64.REGTMP // result is not needed
646 case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
647 p := s.Prog(v.Op.Asm())
648 p.From.Type = obj.TYPE_CONST
649 p.From.Offset = v.AuxInt
650 p.Reg = v.Args[0].Reg()
651 p.To.Type = obj.TYPE_REG
654 // Auxint holds encoded rotate + mask
655 case ssa.OpPPC64RLWINM, ssa.OpPPC64RLWMI:
656 rot, _, _, mask := ssa.DecodePPC64RotateMask(v.AuxInt)
657 p := s.Prog(v.Op.Asm())
658 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
659 p.Reg = v.Args[0].Reg()
660 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: int64(rot)}
661 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: int64(mask)})
664 case ssa.OpPPC64RLWNM:
665 _, _, _, mask := ssa.DecodePPC64RotateMask(v.AuxInt)
666 p := s.Prog(v.Op.Asm())
667 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
668 p.Reg = v.Args[0].Reg()
669 p.From = obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[1].Reg()}
670 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: int64(mask)})
672 case ssa.OpPPC64MADDLD:
674 r1 := v.Args[0].Reg()
675 r2 := v.Args[1].Reg()
676 r3 := v.Args[2].Reg()
678 p := s.Prog(v.Op.Asm())
679 p.From.Type = obj.TYPE_REG
682 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: r3})
683 p.To.Type = obj.TYPE_REG
686 case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
688 r1 := v.Args[0].Reg()
689 r2 := v.Args[1].Reg()
690 r3 := v.Args[2].Reg()
692 p := s.Prog(v.Op.Asm())
693 p.From.Type = obj.TYPE_REG
696 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: r2})
697 p.To.Type = obj.TYPE_REG
700 case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL,
701 ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW,
702 ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS,
703 ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD:
705 p := s.Prog(v.Op.Asm())
706 p.To.Type = obj.TYPE_REG
708 p.From.Type = obj.TYPE_REG
709 p.From.Reg = v.Args[0].Reg()
711 case ssa.OpPPC64ADDconst, ssa.OpPPC64ANDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst,
712 ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst,
713 ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst, ssa.OpPPC64EXTSWSLconst, ssa.OpPPC64MULLWconst, ssa.OpPPC64MULLDconst:
714 p := s.Prog(v.Op.Asm())
715 p.Reg = v.Args[0].Reg()
716 p.From.Type = obj.TYPE_CONST
717 p.From.Offset = v.AuxInt
718 p.To.Type = obj.TYPE_REG
721 case ssa.OpPPC64SUBFCconst:
722 p := s.Prog(v.Op.Asm())
723 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: v.AuxInt})
724 p.From.Type = obj.TYPE_REG
725 p.From.Reg = v.Args[0].Reg()
726 p.To.Type = obj.TYPE_REG
729 case ssa.OpPPC64ANDCCconst:
730 p := s.Prog(v.Op.Asm())
731 p.Reg = v.Args[0].Reg()
732 p.From.Type = obj.TYPE_CONST
733 p.From.Offset = v.AuxInt
734 p.To.Type = obj.TYPE_REG
735 p.To.Reg = ppc64.REGTMP // discard result
737 case ssa.OpPPC64MOVDaddr:
738 switch v.Aux.(type) {
740 v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux)
742 // If aux offset and aux int are both 0, and the same
743 // input and output regs are used, no instruction
744 // needs to be generated, since it would just be
746 if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() {
747 p := s.Prog(ppc64.AMOVD)
748 p.From.Type = obj.TYPE_ADDR
749 p.From.Reg = v.Args[0].Reg()
750 p.From.Offset = v.AuxInt
751 p.To.Type = obj.TYPE_REG
755 case *obj.LSym, ir.Node:
756 p := s.Prog(ppc64.AMOVD)
757 p.From.Type = obj.TYPE_ADDR
758 p.From.Reg = v.Args[0].Reg()
759 p.To.Type = obj.TYPE_REG
761 ssagen.AddAux(&p.From, v)
765 case ssa.OpPPC64MOVDconst:
766 p := s.Prog(v.Op.Asm())
767 p.From.Type = obj.TYPE_CONST
768 p.From.Offset = v.AuxInt
769 p.To.Type = obj.TYPE_REG
772 case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst:
773 p := s.Prog(v.Op.Asm())
774 p.From.Type = obj.TYPE_FCONST
775 p.From.Val = math.Float64frombits(uint64(v.AuxInt))
776 p.To.Type = obj.TYPE_REG
779 case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU:
780 p := s.Prog(v.Op.Asm())
781 p.From.Type = obj.TYPE_REG
782 p.From.Reg = v.Args[0].Reg()
783 p.To.Type = obj.TYPE_REG
784 p.To.Reg = v.Args[1].Reg()
786 case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst:
787 p := s.Prog(v.Op.Asm())
788 p.From.Type = obj.TYPE_REG
789 p.From.Reg = v.Args[0].Reg()
790 p.To.Type = obj.TYPE_CONST
791 p.To.Offset = v.AuxInt
793 case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg:
794 // Shift in register to required size
795 p := s.Prog(v.Op.Asm())
796 p.From.Type = obj.TYPE_REG
797 p.From.Reg = v.Args[0].Reg()
799 p.To.Type = obj.TYPE_REG
801 case ssa.OpPPC64MOVDload:
803 // MOVDload uses a DS instruction which requires the offset value of the data to be a multiple of 4.
804 // For offsets known at compile time, a MOVDload won't be selected, but in the case of a go.string,
805 // the offset is not known until link time. If the load of a go.string uses relocation for the
806 // offset field of the instruction, and if the offset is not aligned to 4, then a link error will occur.
807 // To avoid this problem, the full address of the go.string is computed and loaded into the base register,
808 // and that base register is used for the MOVDload using a 0 offset. This problem can only occur with
809 // go.string types because other types will have proper alignment.
812 switch n := v.Aux.(type) {
814 gostring = strings.HasPrefix(n.Name, "go.string.")
817 // Generate full addr of the go.string const
819 p := s.Prog(ppc64.AMOVD)
820 p.From.Type = obj.TYPE_ADDR
821 p.From.Reg = v.Args[0].Reg()
822 ssagen.AddAux(&p.From, v)
823 p.To.Type = obj.TYPE_REG
825 // Load go.string using 0 offset
826 p = s.Prog(v.Op.Asm())
827 p.From.Type = obj.TYPE_MEM
829 p.To.Type = obj.TYPE_REG
833 // Not a go.string, generate a normal load
836 case ssa.OpPPC64MOVWload, ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
837 p := s.Prog(v.Op.Asm())
838 p.From.Type = obj.TYPE_MEM
839 p.From.Reg = v.Args[0].Reg()
840 ssagen.AddAux(&p.From, v)
841 p.To.Type = obj.TYPE_REG
844 case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload:
845 p := s.Prog(v.Op.Asm())
846 p.From.Type = obj.TYPE_MEM
847 p.From.Reg = v.Args[0].Reg()
848 p.To.Type = obj.TYPE_REG
851 case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore:
852 p := s.Prog(v.Op.Asm())
853 p.To.Type = obj.TYPE_MEM
854 p.To.Reg = v.Args[0].Reg()
855 p.From.Type = obj.TYPE_REG
856 p.From.Reg = v.Args[1].Reg()
858 case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx,
859 ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx,
860 ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx:
861 p := s.Prog(v.Op.Asm())
862 p.From.Type = obj.TYPE_MEM
863 p.From.Reg = v.Args[0].Reg()
864 p.From.Index = v.Args[1].Reg()
865 p.To.Type = obj.TYPE_REG
868 case ssa.OpPPC64MOVDstorezero, ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero:
869 p := s.Prog(v.Op.Asm())
870 p.From.Type = obj.TYPE_REG
871 p.From.Reg = ppc64.REGZERO
872 p.To.Type = obj.TYPE_MEM
873 p.To.Reg = v.Args[0].Reg()
874 ssagen.AddAux(&p.To, v)
876 case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore:
877 p := s.Prog(v.Op.Asm())
878 p.From.Type = obj.TYPE_REG
879 p.From.Reg = v.Args[1].Reg()
880 p.To.Type = obj.TYPE_MEM
881 p.To.Reg = v.Args[0].Reg()
882 ssagen.AddAux(&p.To, v)
884 case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx,
885 ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx,
886 ssa.OpPPC64MOVHBRstoreidx:
887 p := s.Prog(v.Op.Asm())
888 p.From.Type = obj.TYPE_REG
889 p.From.Reg = v.Args[2].Reg()
890 p.To.Index = v.Args[1].Reg()
891 p.To.Type = obj.TYPE_MEM
892 p.To.Reg = v.Args[0].Reg()
894 case ssa.OpPPC64ISEL, ssa.OpPPC64ISELB:
896 // AuxInt value indicates condition: 0=LT 1=GT 2=EQ 4=GE 5=LE 6=NE
897 // ISEL only accepts 0, 1, 2 condition values but the others can be
898 // achieved by swapping operand order.
899 // arg0 ? arg1 : arg2 with conditions LT, GT, EQ
900 // arg0 ? arg2 : arg1 for conditions GE, LE, NE
901 // ISELB is used when a boolean result is needed, returning 0 or 1
902 p := s.Prog(ppc64.AISEL)
903 p.To.Type = obj.TYPE_REG
905 // For ISELB, boolean result 0 or 1. Use R0 for 0 operand to avoid load.
906 r := obj.Addr{Type: obj.TYPE_REG, Reg: ppc64.REG_R0}
907 if v.Op == ssa.OpPPC64ISEL {
908 r.Reg = v.Args[1].Reg()
910 // AuxInt values 4,5,6 implemented with reverse operand order from 0,1,2
913 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[0].Reg()})
915 p.Reg = v.Args[0].Reg()
918 p.From.Type = obj.TYPE_CONST
919 p.From.Offset = v.AuxInt & 3
921 case ssa.OpPPC64LoweredQuadZero, ssa.OpPPC64LoweredQuadZeroShort:
922 // The LoweredQuad code generation
923 // generates STXV instructions on
924 // power9. The Short variation is used
925 // if no loop is generated.
927 // sizes >= 64 generate a loop as follows:
929 // Set up loop counter in CTR, used by BC
930 // XXLXOR clears VS32
931 // XXLXOR VS32,VS32,VS32
932 // MOVD len/64,REG_TMP
942 // Bytes per iteration
948 // Only generate a loop if there is more
951 // Set up VS32 (V0) to hold 0s
952 p := s.Prog(ppc64.AXXLXOR)
953 p.From.Type = obj.TYPE_REG
954 p.From.Reg = ppc64.REG_VS32
955 p.To.Type = obj.TYPE_REG
956 p.To.Reg = ppc64.REG_VS32
957 p.Reg = ppc64.REG_VS32
959 // Set up CTR loop counter
960 p = s.Prog(ppc64.AMOVD)
961 p.From.Type = obj.TYPE_CONST
963 p.To.Type = obj.TYPE_REG
964 p.To.Reg = ppc64.REGTMP
966 p = s.Prog(ppc64.AMOVD)
967 p.From.Type = obj.TYPE_REG
968 p.From.Reg = ppc64.REGTMP
969 p.To.Type = obj.TYPE_REG
970 p.To.Reg = ppc64.REG_CTR
972 // Don't generate padding for
973 // loops with few iterations.
975 p = s.Prog(obj.APCALIGN)
976 p.From.Type = obj.TYPE_CONST
980 // generate 4 STXVs to zero 64 bytes
983 p = s.Prog(ppc64.ASTXV)
984 p.From.Type = obj.TYPE_REG
985 p.From.Reg = ppc64.REG_VS32
986 p.To.Type = obj.TYPE_MEM
987 p.To.Reg = v.Args[0].Reg()
989 // Save the top of loop
993 p = s.Prog(ppc64.ASTXV)
994 p.From.Type = obj.TYPE_REG
995 p.From.Reg = ppc64.REG_VS32
996 p.To.Type = obj.TYPE_MEM
997 p.To.Reg = v.Args[0].Reg()
1000 p = s.Prog(ppc64.ASTXV)
1001 p.From.Type = obj.TYPE_REG
1002 p.From.Reg = ppc64.REG_VS32
1003 p.To.Type = obj.TYPE_MEM
1004 p.To.Reg = v.Args[0].Reg()
1007 p = s.Prog(ppc64.ASTXV)
1008 p.From.Type = obj.TYPE_REG
1009 p.From.Reg = ppc64.REG_VS32
1010 p.To.Type = obj.TYPE_MEM
1011 p.To.Reg = v.Args[0].Reg()
1014 // Increment address for the
1015 // 64 bytes just zeroed.
1016 p = s.Prog(ppc64.AADD)
1017 p.Reg = v.Args[0].Reg()
1018 p.From.Type = obj.TYPE_CONST
1020 p.To.Type = obj.TYPE_REG
1021 p.To.Reg = v.Args[0].Reg()
1023 // Branch back to top of loop
1025 // BC with BO_BCTR generates bdnz
1026 p = s.Prog(ppc64.ABC)
1027 p.From.Type = obj.TYPE_CONST
1028 p.From.Offset = ppc64.BO_BCTR
1029 p.Reg = ppc64.REG_R0
1030 p.To.Type = obj.TYPE_BRANCH
1033 // When ctr == 1 the loop was not generated but
1034 // there are at least 64 bytes to clear, so add
1035 // that to the remainder to generate the code
1036 // to clear those doublewords
1041 // Clear the remainder starting at offset zero
1044 if rem >= 16 && ctr <= 1 {
1045 // If the XXLXOR hasn't already been
1046 // generated, do it here to initialize
1048 p := s.Prog(ppc64.AXXLXOR)
1049 p.From.Type = obj.TYPE_REG
1050 p.From.Reg = ppc64.REG_VS32
1051 p.To.Type = obj.TYPE_REG
1052 p.To.Reg = ppc64.REG_VS32
1053 p.Reg = ppc64.REG_VS32
1055 // Generate STXV for 32 or 64
1058 p := s.Prog(ppc64.ASTXV)
1059 p.From.Type = obj.TYPE_REG
1060 p.From.Reg = ppc64.REG_VS32
1061 p.To.Type = obj.TYPE_MEM
1062 p.To.Reg = v.Args[0].Reg()
1063 p.To.Offset = offset
1065 p = s.Prog(ppc64.ASTXV)
1066 p.From.Type = obj.TYPE_REG
1067 p.From.Reg = ppc64.REG_VS32
1068 p.To.Type = obj.TYPE_MEM
1069 p.To.Reg = v.Args[0].Reg()
1070 p.To.Offset = offset + 16
1074 // Generate 16 bytes
1076 p := s.Prog(ppc64.ASTXV)
1077 p.From.Type = obj.TYPE_REG
1078 p.From.Reg = ppc64.REG_VS32
1079 p.To.Type = obj.TYPE_MEM
1080 p.To.Reg = v.Args[0].Reg()
1081 p.To.Offset = offset
1086 // first clear as many doublewords as possible
1087 // then clear remaining sizes as available
1089 op, size := ppc64.AMOVB, int64(1)
1092 op, size = ppc64.AMOVD, 8
1094 op, size = ppc64.AMOVW, 4
1096 op, size = ppc64.AMOVH, 2
1099 p.From.Type = obj.TYPE_REG
1100 p.From.Reg = ppc64.REG_R0
1101 p.To.Type = obj.TYPE_MEM
1102 p.To.Reg = v.Args[0].Reg()
1103 p.To.Offset = offset
1108 case ssa.OpPPC64LoweredZero, ssa.OpPPC64LoweredZeroShort:
1110 // Unaligned data doesn't hurt performance
1111 // for these instructions on power8.
1113 // For sizes >= 64 generate a loop as follows:
1115 // Set up loop counter in CTR, used by BC
1116 // XXLXOR VS32,VS32,VS32
1117 // MOVD len/32,REG_TMP
1121 // STXVD2X VS32,(R0)(R20)
1122 // STXVD2X VS32,(R31)(R20)
1126 // any remainder is done as described below
1128 // for sizes < 64 bytes, first clear as many doublewords as possible,
1129 // then handle the remainder
1134 // the remainder bytes are cleared using one or more
1135 // of the following instructions with the appropriate
1136 // offsets depending which instructions are needed
1138 // MOVW R0,n1(R20) 4 bytes
1139 // MOVH R0,n2(R20) 2 bytes
1140 // MOVB R0,n3(R20) 1 byte
1142 // 7 bytes: MOVW, MOVH, MOVB
1143 // 6 bytes: MOVW, MOVH
1144 // 5 bytes: MOVW, MOVB
1145 // 3 bytes: MOVH, MOVB
1147 // each loop iteration does 32 bytes
1148 ctr := v.AuxInt / 32
1151 rem := v.AuxInt % 32
1153 // only generate a loop if there is more
1154 // than 1 iteration.
1156 // Set up VS32 (V0) to hold 0s
1157 p := s.Prog(ppc64.AXXLXOR)
1158 p.From.Type = obj.TYPE_REG
1159 p.From.Reg = ppc64.REG_VS32
1160 p.To.Type = obj.TYPE_REG
1161 p.To.Reg = ppc64.REG_VS32
1162 p.Reg = ppc64.REG_VS32
1164 // Set up CTR loop counter
1165 p = s.Prog(ppc64.AMOVD)
1166 p.From.Type = obj.TYPE_CONST
1168 p.To.Type = obj.TYPE_REG
1169 p.To.Reg = ppc64.REGTMP
1171 p = s.Prog(ppc64.AMOVD)
1172 p.From.Type = obj.TYPE_REG
1173 p.From.Reg = ppc64.REGTMP
1174 p.To.Type = obj.TYPE_REG
1175 p.To.Reg = ppc64.REG_CTR
1177 // Set up R31 to hold index value 16
1178 p = s.Prog(ppc64.AMOVD)
1179 p.From.Type = obj.TYPE_CONST
1181 p.To.Type = obj.TYPE_REG
1182 p.To.Reg = ppc64.REGTMP
1184 // Don't add padding for alignment
1185 // with few loop iterations.
1187 p = s.Prog(obj.APCALIGN)
1188 p.From.Type = obj.TYPE_CONST
1192 // generate 2 STXVD2Xs to store 16 bytes
1193 // when this is a loop then the top must be saved
1195 // This is the top of loop
1197 p = s.Prog(ppc64.ASTXVD2X)
1198 p.From.Type = obj.TYPE_REG
1199 p.From.Reg = ppc64.REG_VS32
1200 p.To.Type = obj.TYPE_MEM
1201 p.To.Reg = v.Args[0].Reg()
1202 p.To.Index = ppc64.REGZERO
1203 // Save the top of loop
1207 p = s.Prog(ppc64.ASTXVD2X)
1208 p.From.Type = obj.TYPE_REG
1209 p.From.Reg = ppc64.REG_VS32
1210 p.To.Type = obj.TYPE_MEM
1211 p.To.Reg = v.Args[0].Reg()
1212 p.To.Index = ppc64.REGTMP
1214 // Increment address for the
1215 // 4 doublewords just zeroed.
1216 p = s.Prog(ppc64.AADD)
1217 p.Reg = v.Args[0].Reg()
1218 p.From.Type = obj.TYPE_CONST
1220 p.To.Type = obj.TYPE_REG
1221 p.To.Reg = v.Args[0].Reg()
1223 // Branch back to top of loop
1225 // BC with BO_BCTR generates bdnz
1226 p = s.Prog(ppc64.ABC)
1227 p.From.Type = obj.TYPE_CONST
1228 p.From.Offset = ppc64.BO_BCTR
1229 p.Reg = ppc64.REG_R0
1230 p.To.Type = obj.TYPE_BRANCH
1234 // when ctr == 1 the loop was not generated but
1235 // there are at least 32 bytes to clear, so add
1236 // that to the remainder to generate the code
1237 // to clear those doublewords
1242 // clear the remainder starting at offset zero
1245 // first clear as many doublewords as possible
1246 // then clear remaining sizes as available
1248 op, size := ppc64.AMOVB, int64(1)
1251 op, size = ppc64.AMOVD, 8
1253 op, size = ppc64.AMOVW, 4
1255 op, size = ppc64.AMOVH, 2
1258 p.From.Type = obj.TYPE_REG
1259 p.From.Reg = ppc64.REG_R0
1260 p.To.Type = obj.TYPE_MEM
1261 p.To.Reg = v.Args[0].Reg()
1262 p.To.Offset = offset
1267 case ssa.OpPPC64LoweredMove, ssa.OpPPC64LoweredMoveShort:
1269 bytesPerLoop := int64(32)
1270 // This will be used when moving more
1271 // than 8 bytes. Moves start with
1272 // as many 8 byte moves as possible, then
1273 // 4, 2, or 1 byte(s) as remaining. This will
1274 // work and be efficient for power8 or later.
1275 // If there are 64 or more bytes, then a
1276 // loop is generated to move 32 bytes and
1277 // update the src and dst addresses on each
1278 // iteration. When < 64 bytes, the appropriate
1279 // number of moves are generated based on the
1281 // When moving >= 64 bytes a loop is used
1282 // MOVD len/32,REG_TMP
1286 // LXVD2X (R0)(R21),VS32
1287 // LXVD2X (R31)(R21),VS33
1289 // STXVD2X VS32,(R0)(R20)
1290 // STXVD2X VS33,(R31)(R20)
1293 // Bytes not moved by this loop are moved
1294 // with a combination of the following instructions,
1295 // starting with the largest sizes and generating as
1296 // many as needed, using the appropriate offset value.
1306 // Each loop iteration moves 32 bytes
1307 ctr := v.AuxInt / bytesPerLoop
1309 // Remainder after the loop
1310 rem := v.AuxInt % bytesPerLoop
1312 dstReg := v.Args[0].Reg()
1313 srcReg := v.Args[1].Reg()
1315 // The set of registers used here, must match the clobbered reg list
1321 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1324 p := s.Prog(ppc64.AMOVD)
1325 p.From.Type = obj.TYPE_CONST
1327 p.To.Type = obj.TYPE_REG
1328 p.To.Reg = ppc64.REGTMP
1330 p = s.Prog(ppc64.AMOVD)
1331 p.From.Type = obj.TYPE_REG
1332 p.From.Reg = ppc64.REGTMP
1333 p.To.Type = obj.TYPE_REG
1334 p.To.Reg = ppc64.REG_CTR
1336 // Use REGTMP as index reg
1337 p = s.Prog(ppc64.AMOVD)
1338 p.From.Type = obj.TYPE_CONST
1340 p.To.Type = obj.TYPE_REG
1341 p.To.Reg = ppc64.REGTMP
1343 // Don't adding padding for
1344 // alignment with small iteration
1347 p = s.Prog(obj.APCALIGN)
1348 p.From.Type = obj.TYPE_CONST
1352 // Generate 16 byte loads and stores.
1353 // Use temp register for index (16)
1354 // on the second one.
1356 p = s.Prog(ppc64.ALXVD2X)
1357 p.From.Type = obj.TYPE_MEM
1359 p.From.Index = ppc64.REGZERO
1360 p.To.Type = obj.TYPE_REG
1361 p.To.Reg = ppc64.REG_VS32
1365 p = s.Prog(ppc64.ALXVD2X)
1366 p.From.Type = obj.TYPE_MEM
1368 p.From.Index = ppc64.REGTMP
1369 p.To.Type = obj.TYPE_REG
1370 p.To.Reg = ppc64.REG_VS33
1372 // increment the src reg for next iteration
1373 p = s.Prog(ppc64.AADD)
1375 p.From.Type = obj.TYPE_CONST
1376 p.From.Offset = bytesPerLoop
1377 p.To.Type = obj.TYPE_REG
1380 // generate 16 byte stores
1381 p = s.Prog(ppc64.ASTXVD2X)
1382 p.From.Type = obj.TYPE_REG
1383 p.From.Reg = ppc64.REG_VS32
1384 p.To.Type = obj.TYPE_MEM
1386 p.To.Index = ppc64.REGZERO
1388 p = s.Prog(ppc64.ASTXVD2X)
1389 p.From.Type = obj.TYPE_REG
1390 p.From.Reg = ppc64.REG_VS33
1391 p.To.Type = obj.TYPE_MEM
1393 p.To.Index = ppc64.REGTMP
1395 // increment the dst reg for next iteration
1396 p = s.Prog(ppc64.AADD)
1398 p.From.Type = obj.TYPE_CONST
1399 p.From.Offset = bytesPerLoop
1400 p.To.Type = obj.TYPE_REG
1403 // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1405 p = s.Prog(ppc64.ABC)
1406 p.From.Type = obj.TYPE_CONST
1407 p.From.Offset = ppc64.BO_BCTR
1408 p.Reg = ppc64.REG_R0
1409 p.To.Type = obj.TYPE_BRANCH
1412 // srcReg and dstReg were incremented in the loop, so
1413 // later instructions start with offset 0.
1417 // No loop was generated for one iteration, so
1418 // add 32 bytes to the remainder to move those bytes.
1424 // Generate 16 byte loads and stores.
1425 // Use temp register for index (value 16)
1426 // on the second one.
1427 p := s.Prog(ppc64.ALXVD2X)
1428 p.From.Type = obj.TYPE_MEM
1430 p.From.Index = ppc64.REGZERO
1431 p.To.Type = obj.TYPE_REG
1432 p.To.Reg = ppc64.REG_VS32
1434 p = s.Prog(ppc64.ASTXVD2X)
1435 p.From.Type = obj.TYPE_REG
1436 p.From.Reg = ppc64.REG_VS32
1437 p.To.Type = obj.TYPE_MEM
1439 p.To.Index = ppc64.REGZERO
1445 // Use REGTMP as index reg
1446 p := s.Prog(ppc64.AMOVD)
1447 p.From.Type = obj.TYPE_CONST
1449 p.To.Type = obj.TYPE_REG
1450 p.To.Reg = ppc64.REGTMP
1452 p = s.Prog(ppc64.ALXVD2X)
1453 p.From.Type = obj.TYPE_MEM
1455 p.From.Index = ppc64.REGTMP
1456 p.To.Type = obj.TYPE_REG
1457 p.To.Reg = ppc64.REG_VS32
1459 p = s.Prog(ppc64.ASTXVD2X)
1460 p.From.Type = obj.TYPE_REG
1461 p.From.Reg = ppc64.REG_VS32
1462 p.To.Type = obj.TYPE_MEM
1464 p.To.Index = ppc64.REGTMP
1471 // Generate all the remaining load and store pairs, starting with
1472 // as many 8 byte moves as possible, then 4, 2, 1.
1474 op, size := ppc64.AMOVB, int64(1)
1477 op, size = ppc64.AMOVD, 8
1479 op, size = ppc64.AMOVW, 4
1481 op, size = ppc64.AMOVH, 2
1485 p.To.Type = obj.TYPE_REG
1486 p.To.Reg = ppc64.REGTMP
1487 p.From.Type = obj.TYPE_MEM
1489 p.From.Offset = offset
1493 p.From.Type = obj.TYPE_REG
1494 p.From.Reg = ppc64.REGTMP
1495 p.To.Type = obj.TYPE_MEM
1497 p.To.Offset = offset
1502 case ssa.OpPPC64LoweredQuadMove, ssa.OpPPC64LoweredQuadMoveShort:
1503 bytesPerLoop := int64(64)
1504 // This is used when moving more
1505 // than 8 bytes on power9. Moves start with
1506 // as many 8 byte moves as possible, then
1507 // 4, 2, or 1 byte(s) as remaining. This will
1508 // work and be efficient for power8 or later.
1509 // If there are 64 or more bytes, then a
1510 // loop is generated to move 32 bytes and
1511 // update the src and dst addresses on each
1512 // iteration. When < 64 bytes, the appropriate
1513 // number of moves are generated based on the
1515 // When moving >= 64 bytes a loop is used
1516 // MOVD len/32,REG_TMP
1523 // STXV VS33,16(R20)
1526 // Bytes not moved by this loop are moved
1527 // with a combination of the following instructions,
1528 // starting with the largest sizes and generating as
1529 // many as needed, using the appropriate offset value.
1539 // Each loop iteration moves 32 bytes
1540 ctr := v.AuxInt / bytesPerLoop
1542 // Remainder after the loop
1543 rem := v.AuxInt % bytesPerLoop
1545 dstReg := v.Args[0].Reg()
1546 srcReg := v.Args[1].Reg()
1553 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1556 p := s.Prog(ppc64.AMOVD)
1557 p.From.Type = obj.TYPE_CONST
1559 p.To.Type = obj.TYPE_REG
1560 p.To.Reg = ppc64.REGTMP
1562 p = s.Prog(ppc64.AMOVD)
1563 p.From.Type = obj.TYPE_REG
1564 p.From.Reg = ppc64.REGTMP
1565 p.To.Type = obj.TYPE_REG
1566 p.To.Reg = ppc64.REG_CTR
1568 p = s.Prog(obj.APCALIGN)
1569 p.From.Type = obj.TYPE_CONST
1572 // Generate 16 byte loads and stores.
1573 p = s.Prog(ppc64.ALXV)
1574 p.From.Type = obj.TYPE_MEM
1576 p.From.Offset = offset
1577 p.To.Type = obj.TYPE_REG
1578 p.To.Reg = ppc64.REG_VS32
1582 p = s.Prog(ppc64.ALXV)
1583 p.From.Type = obj.TYPE_MEM
1585 p.From.Offset = offset + 16
1586 p.To.Type = obj.TYPE_REG
1587 p.To.Reg = ppc64.REG_VS33
1589 // generate 16 byte stores
1590 p = s.Prog(ppc64.ASTXV)
1591 p.From.Type = obj.TYPE_REG
1592 p.From.Reg = ppc64.REG_VS32
1593 p.To.Type = obj.TYPE_MEM
1595 p.To.Offset = offset
1597 p = s.Prog(ppc64.ASTXV)
1598 p.From.Type = obj.TYPE_REG
1599 p.From.Reg = ppc64.REG_VS33
1600 p.To.Type = obj.TYPE_MEM
1602 p.To.Offset = offset + 16
1604 // Generate 16 byte loads and stores.
1605 p = s.Prog(ppc64.ALXV)
1606 p.From.Type = obj.TYPE_MEM
1608 p.From.Offset = offset + 32
1609 p.To.Type = obj.TYPE_REG
1610 p.To.Reg = ppc64.REG_VS32
1612 p = s.Prog(ppc64.ALXV)
1613 p.From.Type = obj.TYPE_MEM
1615 p.From.Offset = offset + 48
1616 p.To.Type = obj.TYPE_REG
1617 p.To.Reg = ppc64.REG_VS33
1619 // generate 16 byte stores
1620 p = s.Prog(ppc64.ASTXV)
1621 p.From.Type = obj.TYPE_REG
1622 p.From.Reg = ppc64.REG_VS32
1623 p.To.Type = obj.TYPE_MEM
1625 p.To.Offset = offset + 32
1627 p = s.Prog(ppc64.ASTXV)
1628 p.From.Type = obj.TYPE_REG
1629 p.From.Reg = ppc64.REG_VS33
1630 p.To.Type = obj.TYPE_MEM
1632 p.To.Offset = offset + 48
1634 // increment the src reg for next iteration
1635 p = s.Prog(ppc64.AADD)
1637 p.From.Type = obj.TYPE_CONST
1638 p.From.Offset = bytesPerLoop
1639 p.To.Type = obj.TYPE_REG
1642 // increment the dst reg for next iteration
1643 p = s.Prog(ppc64.AADD)
1645 p.From.Type = obj.TYPE_CONST
1646 p.From.Offset = bytesPerLoop
1647 p.To.Type = obj.TYPE_REG
1650 // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1652 p = s.Prog(ppc64.ABC)
1653 p.From.Type = obj.TYPE_CONST
1654 p.From.Offset = ppc64.BO_BCTR
1655 p.Reg = ppc64.REG_R0
1656 p.To.Type = obj.TYPE_BRANCH
1659 // srcReg and dstReg were incremented in the loop, so
1660 // later instructions start with offset 0.
1664 // No loop was generated for one iteration, so
1665 // add 32 bytes to the remainder to move those bytes.
1670 p := s.Prog(ppc64.ALXV)
1671 p.From.Type = obj.TYPE_MEM
1673 p.To.Type = obj.TYPE_REG
1674 p.To.Reg = ppc64.REG_VS32
1676 p = s.Prog(ppc64.ALXV)
1677 p.From.Type = obj.TYPE_MEM
1680 p.To.Type = obj.TYPE_REG
1681 p.To.Reg = ppc64.REG_VS33
1683 p = s.Prog(ppc64.ASTXV)
1684 p.From.Type = obj.TYPE_REG
1685 p.From.Reg = ppc64.REG_VS32
1686 p.To.Type = obj.TYPE_MEM
1689 p = s.Prog(ppc64.ASTXV)
1690 p.From.Type = obj.TYPE_REG
1691 p.From.Reg = ppc64.REG_VS33
1692 p.To.Type = obj.TYPE_MEM
1701 // Generate 16 byte loads and stores.
1702 p := s.Prog(ppc64.ALXV)
1703 p.From.Type = obj.TYPE_MEM
1705 p.From.Offset = offset
1706 p.To.Type = obj.TYPE_REG
1707 p.To.Reg = ppc64.REG_VS32
1709 p = s.Prog(ppc64.ASTXV)
1710 p.From.Type = obj.TYPE_REG
1711 p.From.Reg = ppc64.REG_VS32
1712 p.To.Type = obj.TYPE_MEM
1714 p.To.Offset = offset
1720 p := s.Prog(ppc64.ALXV)
1721 p.From.Type = obj.TYPE_MEM
1723 p.From.Offset = offset
1724 p.To.Type = obj.TYPE_REG
1725 p.To.Reg = ppc64.REG_VS32
1727 p = s.Prog(ppc64.ASTXV)
1728 p.From.Type = obj.TYPE_REG
1729 p.From.Reg = ppc64.REG_VS32
1730 p.To.Type = obj.TYPE_MEM
1732 p.To.Offset = offset
1738 // Generate all the remaining load and store pairs, starting with
1739 // as many 8 byte moves as possible, then 4, 2, 1.
1741 op, size := ppc64.AMOVB, int64(1)
1744 op, size = ppc64.AMOVD, 8
1746 op, size = ppc64.AMOVW, 4
1748 op, size = ppc64.AMOVH, 2
1752 p.To.Type = obj.TYPE_REG
1753 p.To.Reg = ppc64.REGTMP
1754 p.From.Type = obj.TYPE_MEM
1756 p.From.Offset = offset
1760 p.From.Type = obj.TYPE_REG
1761 p.From.Reg = ppc64.REGTMP
1762 p.To.Type = obj.TYPE_MEM
1764 p.To.Offset = offset
1769 case ssa.OpPPC64CALLstatic:
1772 case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter:
1773 p := s.Prog(ppc64.AMOVD)
1774 p.From.Type = obj.TYPE_REG
1775 p.From.Reg = v.Args[0].Reg()
1776 p.To.Type = obj.TYPE_REG
1777 p.To.Reg = ppc64.REG_LR
1779 if v.Args[0].Reg() != ppc64.REG_R12 {
1780 v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg)
1784 pp.To.Reg = ppc64.REG_LR
1786 // Insert a hint this is not a subroutine return.
1787 pp.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: 1})
1789 if base.Ctxt.Flag_shared {
1790 // When compiling Go into PIC, the function we just
1791 // called via pointer might have been implemented in
1792 // a separate module and so overwritten the TOC
1793 // pointer in R2; reload it.
1794 q := s.Prog(ppc64.AMOVD)
1795 q.From.Type = obj.TYPE_MEM
1797 q.From.Reg = ppc64.REGSP
1798 q.To.Type = obj.TYPE_REG
1799 q.To.Reg = ppc64.REG_R2
1802 case ssa.OpPPC64LoweredWB:
1803 p := s.Prog(obj.ACALL)
1804 p.To.Type = obj.TYPE_MEM
1805 p.To.Name = obj.NAME_EXTERN
1806 p.To.Sym = v.Aux.(*obj.LSym)
1808 case ssa.OpPPC64LoweredPanicBoundsA, ssa.OpPPC64LoweredPanicBoundsB, ssa.OpPPC64LoweredPanicBoundsC:
1809 p := s.Prog(obj.ACALL)
1810 p.To.Type = obj.TYPE_MEM
1811 p.To.Name = obj.NAME_EXTERN
1812 p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt]
1813 s.UseArgs(16) // space used in callee args area by assembly stubs
1815 case ssa.OpPPC64LoweredNilCheck:
1816 if objabi.GOOS == "aix" {
1820 // NOP (so the BNE has somewhere to land)
1823 p := s.Prog(ppc64.ACMP)
1824 p.From.Type = obj.TYPE_REG
1825 p.From.Reg = v.Args[0].Reg()
1826 p.To.Type = obj.TYPE_REG
1827 p.To.Reg = ppc64.REG_R0
1830 p2 := s.Prog(ppc64.ABNE)
1831 p2.To.Type = obj.TYPE_BRANCH
1834 // Write at 0 is forbidden and will trigger a SIGSEGV
1835 p = s.Prog(ppc64.AMOVW)
1836 p.From.Type = obj.TYPE_REG
1837 p.From.Reg = ppc64.REG_R0
1838 p.To.Type = obj.TYPE_MEM
1839 p.To.Reg = ppc64.REG_R0
1841 // NOP (so the BNE has somewhere to land)
1842 nop := s.Prog(obj.ANOP)
1843 p2.To.SetTarget(nop)
1846 // Issue a load which will fault if arg is nil.
1847 p := s.Prog(ppc64.AMOVBZ)
1848 p.From.Type = obj.TYPE_MEM
1849 p.From.Reg = v.Args[0].Reg()
1850 ssagen.AddAux(&p.From, v)
1851 p.To.Type = obj.TYPE_REG
1852 p.To.Reg = ppc64.REGTMP
1854 if logopt.Enabled() {
1855 logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
1857 if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
1858 base.WarnfAt(v.Pos, "generated nil check")
1861 // These should be resolved by rules and not make it here.
1862 case ssa.OpPPC64Equal, ssa.OpPPC64NotEqual, ssa.OpPPC64LessThan, ssa.OpPPC64FLessThan,
1863 ssa.OpPPC64LessEqual, ssa.OpPPC64GreaterThan, ssa.OpPPC64FGreaterThan, ssa.OpPPC64GreaterEqual,
1864 ssa.OpPPC64FLessEqual, ssa.OpPPC64FGreaterEqual:
1865 v.Fatalf("Pseudo-op should not make it to codegen: %s ###\n", v.LongString())
1866 case ssa.OpPPC64InvertFlags:
1867 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
1868 case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT:
1869 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
1871 // TODO: implement for clobberdead experiment. Nop is ok for now.
1873 v.Fatalf("genValue not implemented: %s", v.LongString())
1877 var blockJump = [...]struct {
1879 asmeq, invasmun bool
1881 ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false},
1882 ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false},
1884 ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false},
1885 ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false},
1886 ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false},
1887 ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false},
1889 // TODO: need to work FP comparisons into block jumps
1890 ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false},
1891 ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN
1892 ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN
1893 ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false},
1896 func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
1898 case ssa.BlockDefer:
1899 // defer returns in R3:
1900 // 0 if we should continue executing
1901 // 1 if we should jump to deferreturn call
1902 p := s.Prog(ppc64.ACMP)
1903 p.From.Type = obj.TYPE_REG
1904 p.From.Reg = ppc64.REG_R3
1905 p.To.Type = obj.TYPE_REG
1906 p.To.Reg = ppc64.REG_R0
1908 p = s.Prog(ppc64.ABNE)
1909 p.To.Type = obj.TYPE_BRANCH
1910 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[1].Block()})
1911 if b.Succs[0].Block() != next {
1912 p := s.Prog(obj.AJMP)
1913 p.To.Type = obj.TYPE_BRANCH
1914 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
1917 case ssa.BlockPlain:
1918 if b.Succs[0].Block() != next {
1919 p := s.Prog(obj.AJMP)
1920 p.To.Type = obj.TYPE_BRANCH
1921 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
1926 case ssa.BlockRetJmp:
1927 p := s.Prog(obj.AJMP)
1928 p.To.Type = obj.TYPE_MEM
1929 p.To.Name = obj.NAME_EXTERN
1930 p.To.Sym = b.Aux.(*obj.LSym)
1932 case ssa.BlockPPC64EQ, ssa.BlockPPC64NE,
1933 ssa.BlockPPC64LT, ssa.BlockPPC64GE,
1934 ssa.BlockPPC64LE, ssa.BlockPPC64GT,
1935 ssa.BlockPPC64FLT, ssa.BlockPPC64FGE,
1936 ssa.BlockPPC64FLE, ssa.BlockPPC64FGT:
1937 jmp := blockJump[b.Kind]
1939 case b.Succs[0].Block():
1940 s.Br(jmp.invasm, b.Succs[1].Block())
1942 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
1943 s.Br(ppc64.ABVS, b.Succs[1].Block())
1945 case b.Succs[1].Block():
1946 s.Br(jmp.asm, b.Succs[0].Block())
1948 s.Br(ppc64.ABEQ, b.Succs[0].Block())
1951 if b.Likely != ssa.BranchUnlikely {
1952 s.Br(jmp.asm, b.Succs[0].Block())
1954 s.Br(ppc64.ABEQ, b.Succs[0].Block())
1956 s.Br(obj.AJMP, b.Succs[1].Block())
1958 s.Br(jmp.invasm, b.Succs[1].Block())
1960 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
1961 s.Br(ppc64.ABVS, b.Succs[1].Block())
1963 s.Br(obj.AJMP, b.Succs[0].Block())
1967 b.Fatalf("branch not implemented: %s", b.LongString())