1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
8 "cmd/compile/internal/gc"
9 "cmd/compile/internal/logopt"
10 "cmd/compile/internal/ssa"
11 "cmd/compile/internal/types"
13 "cmd/internal/obj/ppc64"
19 // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
20 func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) {
21 // flive := b.FlagsLiveAtEnd
22 // if b.Control != nil && b.Control.Type.IsFlags() {
25 // for i := len(b.Values) - 1; i >= 0; i-- {
27 // if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) {
28 // // The "mark" is any non-nil Aux value.
31 // if v.Type.IsFlags() {
34 // for _, a := range v.Args {
35 // if a.Type.IsFlags() {
42 // loadByType returns the load instruction of the given type.
43 func loadByType(t *types.Type) obj.As {
75 panic("bad load type")
78 // storeByType returns the store instruction of the given type.
79 func storeByType(t *types.Type) obj.As {
99 panic("bad store type")
102 func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
125 case ssa.OpPPC64LoweredMuluhilo:
126 // MULHDU Rarg1, Rarg0, Reg0
127 // MULLD Rarg1, Rarg0, Reg1
128 r0 := v.Args[0].Reg()
129 r1 := v.Args[1].Reg()
130 p := s.Prog(ppc64.AMULHDU)
131 p.From.Type = obj.TYPE_REG
134 p.To.Type = obj.TYPE_REG
136 p1 := s.Prog(ppc64.AMULLD)
137 p1.From.Type = obj.TYPE_REG
140 p1.To.Type = obj.TYPE_REG
143 case ssa.OpPPC64LoweredAdd64Carry:
144 // ADDC Rarg2, -1, Rtmp
145 // ADDE Rarg1, Rarg0, Reg0
147 r0 := v.Args[0].Reg()
148 r1 := v.Args[1].Reg()
149 r2 := v.Args[2].Reg()
150 p := s.Prog(ppc64.AADDC)
151 p.From.Type = obj.TYPE_CONST
154 p.To.Type = obj.TYPE_REG
155 p.To.Reg = ppc64.REGTMP
156 p1 := s.Prog(ppc64.AADDE)
157 p1.From.Type = obj.TYPE_REG
160 p1.To.Type = obj.TYPE_REG
162 p2 := s.Prog(ppc64.AADDZE)
163 p2.From.Type = obj.TYPE_REG
164 p2.From.Reg = ppc64.REGZERO
165 p2.To.Type = obj.TYPE_REG
168 case ssa.OpPPC64LoweredAtomicAnd8,
169 ssa.OpPPC64LoweredAtomicOr8:
171 // LBAR (Rarg0), Rtmp
172 // AND/OR Rarg1, Rtmp
173 // STBCCC Rtmp, (Rarg0)
175 r0 := v.Args[0].Reg()
176 r1 := v.Args[1].Reg()
177 // LWSYNC - Assuming shared data not write-through-required nor
178 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
179 plwsync := s.Prog(ppc64.ALWSYNC)
180 plwsync.To.Type = obj.TYPE_NONE
181 p := s.Prog(ppc64.ALBAR)
182 p.From.Type = obj.TYPE_MEM
184 p.To.Type = obj.TYPE_REG
185 p.To.Reg = ppc64.REGTMP
186 p1 := s.Prog(v.Op.Asm())
187 p1.From.Type = obj.TYPE_REG
189 p1.To.Type = obj.TYPE_REG
190 p1.To.Reg = ppc64.REGTMP
191 p2 := s.Prog(ppc64.ASTBCCC)
192 p2.From.Type = obj.TYPE_REG
193 p2.From.Reg = ppc64.REGTMP
194 p2.To.Type = obj.TYPE_MEM
196 p2.RegTo2 = ppc64.REGTMP
197 p3 := s.Prog(ppc64.ABNE)
198 p3.To.Type = obj.TYPE_BRANCH
201 case ssa.OpPPC64LoweredAtomicAdd32,
202 ssa.OpPPC64LoweredAtomicAdd64:
204 // LDAR/LWAR (Rarg0), Rout
206 // STDCCC/STWCCC Rout, (Rarg0)
208 // MOVW Rout,Rout (if Add32)
211 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
215 r0 := v.Args[0].Reg()
216 r1 := v.Args[1].Reg()
218 // LWSYNC - Assuming shared data not write-through-required nor
219 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
220 plwsync := s.Prog(ppc64.ALWSYNC)
221 plwsync.To.Type = obj.TYPE_NONE
224 p.From.Type = obj.TYPE_MEM
226 p.To.Type = obj.TYPE_REG
229 p1 := s.Prog(ppc64.AADD)
230 p1.From.Type = obj.TYPE_REG
233 p1.To.Type = obj.TYPE_REG
236 p3.From.Type = obj.TYPE_REG
238 p3.To.Type = obj.TYPE_MEM
241 p4 := s.Prog(ppc64.ABNE)
242 p4.To.Type = obj.TYPE_BRANCH
245 // Ensure a 32 bit result
246 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
247 p5 := s.Prog(ppc64.AMOVWZ)
248 p5.To.Type = obj.TYPE_REG
250 p5.From.Type = obj.TYPE_REG
254 case ssa.OpPPC64LoweredAtomicExchange32,
255 ssa.OpPPC64LoweredAtomicExchange64:
257 // LDAR/LWAR (Rarg0), Rout
258 // STDCCC/STWCCC Rout, (Rarg0)
263 if v.Op == ssa.OpPPC64LoweredAtomicExchange32 {
267 r0 := v.Args[0].Reg()
268 r1 := v.Args[1].Reg()
270 // LWSYNC - Assuming shared data not write-through-required nor
271 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
272 plwsync := s.Prog(ppc64.ALWSYNC)
273 plwsync.To.Type = obj.TYPE_NONE
276 p.From.Type = obj.TYPE_MEM
278 p.To.Type = obj.TYPE_REG
282 p1.From.Type = obj.TYPE_REG
284 p1.To.Type = obj.TYPE_MEM
287 p2 := s.Prog(ppc64.ABNE)
288 p2.To.Type = obj.TYPE_BRANCH
291 pisync := s.Prog(ppc64.AISYNC)
292 pisync.To.Type = obj.TYPE_NONE
294 case ssa.OpPPC64LoweredAtomicLoad8,
295 ssa.OpPPC64LoweredAtomicLoad32,
296 ssa.OpPPC64LoweredAtomicLoad64,
297 ssa.OpPPC64LoweredAtomicLoadPtr:
299 // MOVB/MOVD/MOVW (Rarg0), Rout
306 case ssa.OpPPC64LoweredAtomicLoad8:
308 case ssa.OpPPC64LoweredAtomicLoad32:
312 arg0 := v.Args[0].Reg()
314 // SYNC when AuxInt == 1; otherwise, load-acquire
316 psync := s.Prog(ppc64.ASYNC)
317 psync.To.Type = obj.TYPE_NONE
321 p.From.Type = obj.TYPE_MEM
323 p.To.Type = obj.TYPE_REG
327 p1.From.Type = obj.TYPE_REG
329 p1.To.Type = obj.TYPE_REG
332 p2 := s.Prog(ppc64.ABNE)
333 p2.To.Type = obj.TYPE_BRANCH
335 pisync := s.Prog(ppc64.AISYNC)
336 pisync.To.Type = obj.TYPE_NONE
339 case ssa.OpPPC64LoweredAtomicStore8,
340 ssa.OpPPC64LoweredAtomicStore32,
341 ssa.OpPPC64LoweredAtomicStore64:
343 // MOVB/MOVW/MOVD arg1,(arg0)
346 case ssa.OpPPC64LoweredAtomicStore8:
348 case ssa.OpPPC64LoweredAtomicStore32:
351 arg0 := v.Args[0].Reg()
352 arg1 := v.Args[1].Reg()
353 // If AuxInt == 0, LWSYNC (Store-Release), else SYNC
355 syncOp := ppc64.ASYNC
357 syncOp = ppc64.ALWSYNC
359 psync := s.Prog(syncOp)
360 psync.To.Type = obj.TYPE_NONE
363 p.To.Type = obj.TYPE_MEM
365 p.From.Type = obj.TYPE_REG
368 case ssa.OpPPC64LoweredAtomicCas64,
369 ssa.OpPPC64LoweredAtomicCas32:
372 // LDAR (Rarg0), MutexHint, Rtmp
375 // STDCCC Rarg2, (Rarg0)
377 // LWSYNC // Only for sequential consistency; not required in CasRel.
386 if v.Op == ssa.OpPPC64LoweredAtomicCas32 {
391 r0 := v.Args[0].Reg()
392 r1 := v.Args[1].Reg()
393 r2 := v.Args[2].Reg()
395 // LWSYNC - Assuming shared data not write-through-required nor
396 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
397 plwsync1 := s.Prog(ppc64.ALWSYNC)
398 plwsync1.To.Type = obj.TYPE_NONE
401 p.From.Type = obj.TYPE_MEM
403 p.To.Type = obj.TYPE_REG
404 p.To.Reg = ppc64.REGTMP
405 // If it is a Compare-and-Swap-Release operation, set the EH field with
408 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: 0})
412 p1.From.Type = obj.TYPE_REG
414 p1.To.Reg = ppc64.REGTMP
415 p1.To.Type = obj.TYPE_REG
417 p2 := s.Prog(ppc64.ABNE)
418 p2.To.Type = obj.TYPE_BRANCH
421 p3.From.Type = obj.TYPE_REG
423 p3.To.Type = obj.TYPE_MEM
426 p4 := s.Prog(ppc64.ABNE)
427 p4.To.Type = obj.TYPE_BRANCH
429 // LWSYNC - Assuming shared data not write-through-required nor
430 // caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b.
431 // If the operation is a CAS-Release, then synchronization is not necessary.
433 plwsync2 := s.Prog(ppc64.ALWSYNC)
434 plwsync2.To.Type = obj.TYPE_NONE
437 p5 := s.Prog(ppc64.AMOVD)
438 p5.From.Type = obj.TYPE_CONST
440 p5.To.Type = obj.TYPE_REG
443 p6 := s.Prog(obj.AJMP)
444 p6.To.Type = obj.TYPE_BRANCH
446 p7 := s.Prog(ppc64.AMOVD)
447 p7.From.Type = obj.TYPE_CONST
449 p7.To.Type = obj.TYPE_REG
453 p8 := s.Prog(obj.ANOP)
456 case ssa.OpPPC64LoweredGetClosurePtr:
457 // Closure pointer is R11 (already)
458 gc.CheckLoweredGetClosurePtr(v)
460 case ssa.OpPPC64LoweredGetCallerSP:
461 // caller's SP is FixedFrameSize below the address of the first arg
462 p := s.Prog(ppc64.AMOVD)
463 p.From.Type = obj.TYPE_ADDR
464 p.From.Offset = -gc.Ctxt.FixedFrameSize()
465 p.From.Name = obj.NAME_PARAM
466 p.To.Type = obj.TYPE_REG
469 case ssa.OpPPC64LoweredGetCallerPC:
470 p := s.Prog(obj.AGETCALLERPC)
471 p.To.Type = obj.TYPE_REG
474 case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F:
475 // input is already rounded
478 loadOp := loadByType(v.Type)
480 gc.AddrAuto(&p.From, v.Args[0])
481 p.To.Type = obj.TYPE_REG
485 storeOp := storeByType(v.Type)
487 p.From.Type = obj.TYPE_REG
488 p.From.Reg = v.Args[0].Reg()
489 gc.AddrAuto(&p.To, v)
491 case ssa.OpPPC64DIVD:
501 r0 := v.Args[0].Reg()
502 r1 := v.Args[1].Reg()
504 p := s.Prog(ppc64.ACMP)
505 p.From.Type = obj.TYPE_REG
507 p.To.Type = obj.TYPE_CONST
510 pbahead := s.Prog(ppc64.ABEQ)
511 pbahead.To.Type = obj.TYPE_BRANCH
513 p = s.Prog(v.Op.Asm())
514 p.From.Type = obj.TYPE_REG
517 p.To.Type = obj.TYPE_REG
520 pbover := s.Prog(obj.AJMP)
521 pbover.To.Type = obj.TYPE_BRANCH
523 p = s.Prog(ppc64.ANEG)
524 p.To.Type = obj.TYPE_REG
526 p.From.Type = obj.TYPE_REG
533 case ssa.OpPPC64DIVW:
534 // word-width version of above
536 r0 := v.Args[0].Reg()
537 r1 := v.Args[1].Reg()
539 p := s.Prog(ppc64.ACMPW)
540 p.From.Type = obj.TYPE_REG
542 p.To.Type = obj.TYPE_CONST
545 pbahead := s.Prog(ppc64.ABEQ)
546 pbahead.To.Type = obj.TYPE_BRANCH
548 p = s.Prog(v.Op.Asm())
549 p.From.Type = obj.TYPE_REG
552 p.To.Type = obj.TYPE_REG
555 pbover := s.Prog(obj.AJMP)
556 pbover.To.Type = obj.TYPE_BRANCH
558 p = s.Prog(ppc64.ANEG)
559 p.To.Type = obj.TYPE_REG
561 p.From.Type = obj.TYPE_REG
568 case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS,
569 ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU,
570 ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW,
571 ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
572 ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
573 ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
574 ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV,
575 ssa.OpPPC64MODUD, ssa.OpPPC64MODSD, ssa.OpPPC64MODUW, ssa.OpPPC64MODSW:
577 r1 := v.Args[0].Reg()
578 r2 := v.Args[1].Reg()
579 p := s.Prog(v.Op.Asm())
580 p.From.Type = obj.TYPE_REG
583 p.To.Type = obj.TYPE_REG
586 case ssa.OpPPC64ANDCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC:
587 r1 := v.Args[0].Reg()
588 r2 := v.Args[1].Reg()
589 p := s.Prog(v.Op.Asm())
590 p.From.Type = obj.TYPE_REG
593 p.To.Type = obj.TYPE_REG
594 p.To.Reg = ppc64.REGTMP // result is not needed
596 case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
597 p := s.Prog(v.Op.Asm())
598 p.From.Type = obj.TYPE_CONST
599 p.From.Offset = v.AuxInt
600 p.Reg = v.Args[0].Reg()
601 p.To.Type = obj.TYPE_REG
604 case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
606 r1 := v.Args[0].Reg()
607 r2 := v.Args[1].Reg()
608 r3 := v.Args[2].Reg()
610 p := s.Prog(v.Op.Asm())
611 p.From.Type = obj.TYPE_REG
614 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: r2})
615 p.To.Type = obj.TYPE_REG
618 case ssa.OpPPC64MaskIfNotCarry:
620 p := s.Prog(v.Op.Asm())
621 p.From.Type = obj.TYPE_REG
622 p.From.Reg = ppc64.REGZERO
623 p.To.Type = obj.TYPE_REG
626 case ssa.OpPPC64ADDconstForCarry:
627 r1 := v.Args[0].Reg()
628 p := s.Prog(v.Op.Asm())
630 p.From.Type = obj.TYPE_CONST
631 p.From.Offset = v.AuxInt
632 p.To.Type = obj.TYPE_REG
633 p.To.Reg = ppc64.REGTMP // Ignored; this is for the carry effect.
635 case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL,
636 ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW,
637 ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS,
638 ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD:
640 p := s.Prog(v.Op.Asm())
641 p.To.Type = obj.TYPE_REG
643 p.From.Type = obj.TYPE_REG
644 p.From.Reg = v.Args[0].Reg()
646 case ssa.OpPPC64ADDconst, ssa.OpPPC64ANDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst,
647 ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst, ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst:
648 p := s.Prog(v.Op.Asm())
649 p.Reg = v.Args[0].Reg()
650 p.From.Type = obj.TYPE_CONST
651 p.From.Offset = v.AuxInt
652 p.To.Type = obj.TYPE_REG
655 case ssa.OpPPC64ANDCCconst:
656 p := s.Prog(v.Op.Asm())
657 p.Reg = v.Args[0].Reg()
658 p.From.Type = obj.TYPE_CONST
659 p.From.Offset = v.AuxInt
660 p.To.Type = obj.TYPE_REG
661 p.To.Reg = ppc64.REGTMP // discard result
663 case ssa.OpPPC64MOVDaddr:
664 switch v.Aux.(type) {
666 v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux)
668 // If aux offset and aux int are both 0, and the same
669 // input and output regs are used, no instruction
670 // needs to be generated, since it would just be
672 if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() {
673 p := s.Prog(ppc64.AMOVD)
674 p.From.Type = obj.TYPE_ADDR
675 p.From.Reg = v.Args[0].Reg()
676 p.From.Offset = v.AuxInt
677 p.To.Type = obj.TYPE_REG
681 case *obj.LSym, *gc.Node:
682 p := s.Prog(ppc64.AMOVD)
683 p.From.Type = obj.TYPE_ADDR
684 p.From.Reg = v.Args[0].Reg()
685 p.To.Type = obj.TYPE_REG
687 gc.AddAux(&p.From, v)
691 case ssa.OpPPC64MOVDconst:
692 p := s.Prog(v.Op.Asm())
693 p.From.Type = obj.TYPE_CONST
694 p.From.Offset = v.AuxInt
695 p.To.Type = obj.TYPE_REG
698 case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst:
699 p := s.Prog(v.Op.Asm())
700 p.From.Type = obj.TYPE_FCONST
701 p.From.Val = math.Float64frombits(uint64(v.AuxInt))
702 p.To.Type = obj.TYPE_REG
705 case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU:
706 p := s.Prog(v.Op.Asm())
707 p.From.Type = obj.TYPE_REG
708 p.From.Reg = v.Args[0].Reg()
709 p.To.Type = obj.TYPE_REG
710 p.To.Reg = v.Args[1].Reg()
712 case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst:
713 p := s.Prog(v.Op.Asm())
714 p.From.Type = obj.TYPE_REG
715 p.From.Reg = v.Args[0].Reg()
716 p.To.Type = obj.TYPE_CONST
717 p.To.Offset = v.AuxInt
719 case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg:
720 // Shift in register to required size
721 p := s.Prog(v.Op.Asm())
722 p.From.Type = obj.TYPE_REG
723 p.From.Reg = v.Args[0].Reg()
725 p.To.Type = obj.TYPE_REG
727 case ssa.OpPPC64MOVDload:
729 // MOVDload uses a DS instruction which requires the offset value of the data to be a multiple of 4.
730 // For offsets known at compile time, a MOVDload won't be selected, but in the case of a go.string,
731 // the offset is not known until link time. If the load of a go.string uses relocation for the
732 // offset field of the instruction, and if the offset is not aligned to 4, then a link error will occur.
733 // To avoid this problem, the full address of the go.string is computed and loaded into the base register,
734 // and that base register is used for the MOVDload using a 0 offset. This problem can only occur with
735 // go.string types because other types will have proper alignment.
738 switch n := v.Aux.(type) {
740 gostring = strings.HasPrefix(n.Name, "go.string.")
743 // Generate full addr of the go.string const
745 p := s.Prog(ppc64.AMOVD)
746 p.From.Type = obj.TYPE_ADDR
747 p.From.Reg = v.Args[0].Reg()
748 gc.AddAux(&p.From, v)
749 p.To.Type = obj.TYPE_REG
751 // Load go.string using 0 offset
752 p = s.Prog(v.Op.Asm())
753 p.From.Type = obj.TYPE_MEM
755 p.To.Type = obj.TYPE_REG
759 // Not a go.string, generate a normal load
762 case ssa.OpPPC64MOVWload, ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
763 p := s.Prog(v.Op.Asm())
764 p.From.Type = obj.TYPE_MEM
765 p.From.Reg = v.Args[0].Reg()
766 gc.AddAux(&p.From, v)
767 p.To.Type = obj.TYPE_REG
770 case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload:
771 p := s.Prog(v.Op.Asm())
772 p.From.Type = obj.TYPE_MEM
773 p.From.Reg = v.Args[0].Reg()
774 p.To.Type = obj.TYPE_REG
777 case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore:
778 p := s.Prog(v.Op.Asm())
779 p.To.Type = obj.TYPE_MEM
780 p.To.Reg = v.Args[0].Reg()
781 p.From.Type = obj.TYPE_REG
782 p.From.Reg = v.Args[1].Reg()
784 case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx,
785 ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx,
786 ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx:
787 p := s.Prog(v.Op.Asm())
788 p.From.Type = obj.TYPE_MEM
789 p.From.Reg = v.Args[0].Reg()
790 p.From.Index = v.Args[1].Reg()
791 p.To.Type = obj.TYPE_REG
794 case ssa.OpPPC64MOVDstorezero, ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero:
795 p := s.Prog(v.Op.Asm())
796 p.From.Type = obj.TYPE_REG
797 p.From.Reg = ppc64.REGZERO
798 p.To.Type = obj.TYPE_MEM
799 p.To.Reg = v.Args[0].Reg()
802 case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore:
803 p := s.Prog(v.Op.Asm())
804 p.From.Type = obj.TYPE_REG
805 p.From.Reg = v.Args[1].Reg()
806 p.To.Type = obj.TYPE_MEM
807 p.To.Reg = v.Args[0].Reg()
810 case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx,
811 ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx,
812 ssa.OpPPC64MOVHBRstoreidx:
813 p := s.Prog(v.Op.Asm())
814 p.From.Type = obj.TYPE_REG
815 p.From.Reg = v.Args[2].Reg()
816 p.To.Index = v.Args[1].Reg()
817 p.To.Type = obj.TYPE_MEM
818 p.To.Reg = v.Args[0].Reg()
820 case ssa.OpPPC64ISEL, ssa.OpPPC64ISELB:
822 // AuxInt value indicates condition: 0=LT 1=GT 2=EQ 4=GE 5=LE 6=NE
823 // ISEL only accepts 0, 1, 2 condition values but the others can be
824 // achieved by swapping operand order.
825 // arg0 ? arg1 : arg2 with conditions LT, GT, EQ
826 // arg0 ? arg2 : arg1 for conditions GE, LE, NE
827 // ISELB is used when a boolean result is needed, returning 0 or 1
828 p := s.Prog(ppc64.AISEL)
829 p.To.Type = obj.TYPE_REG
831 // For ISELB, boolean result 0 or 1. Use R0 for 0 operand to avoid load.
832 r := obj.Addr{Type: obj.TYPE_REG, Reg: ppc64.REG_R0}
833 if v.Op == ssa.OpPPC64ISEL {
834 r.Reg = v.Args[1].Reg()
836 // AuxInt values 4,5,6 implemented with reverse operand order from 0,1,2
839 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[0].Reg()})
841 p.Reg = v.Args[0].Reg()
844 p.From.Type = obj.TYPE_CONST
845 p.From.Offset = v.AuxInt & 3
847 case ssa.OpPPC64LoweredQuadZero, ssa.OpPPC64LoweredQuadZeroShort:
848 // The LoweredQuad code generation
849 // generates STXV instructions on
850 // power9. The Short variation is used
851 // if no loop is generated.
853 // sizes >= 64 generate a loop as follows:
855 // Set up loop counter in CTR, used by BC
856 // XXLXOR clears VS32
857 // XXLXOR VS32,VS32,VS32
858 // MOVD len/64,REG_TMP
868 // Bytes per iteration
874 // Only generate a loop if there is more
877 // Set up VS32 (V0) to hold 0s
878 p := s.Prog(ppc64.AXXLXOR)
879 p.From.Type = obj.TYPE_REG
880 p.From.Reg = ppc64.REG_VS32
881 p.To.Type = obj.TYPE_REG
882 p.To.Reg = ppc64.REG_VS32
883 p.Reg = ppc64.REG_VS32
885 // Set up CTR loop counter
886 p = s.Prog(ppc64.AMOVD)
887 p.From.Type = obj.TYPE_CONST
889 p.To.Type = obj.TYPE_REG
890 p.To.Reg = ppc64.REGTMP
892 p = s.Prog(ppc64.AMOVD)
893 p.From.Type = obj.TYPE_REG
894 p.From.Reg = ppc64.REGTMP
895 p.To.Type = obj.TYPE_REG
896 p.To.Reg = ppc64.REG_CTR
898 // Don't generate padding for
899 // loops with few iterations.
901 p = s.Prog(obj.APCALIGN)
902 p.From.Type = obj.TYPE_CONST
906 // generate 4 STXVs to zero 64 bytes
909 p = s.Prog(ppc64.ASTXV)
910 p.From.Type = obj.TYPE_REG
911 p.From.Reg = ppc64.REG_VS32
912 p.To.Type = obj.TYPE_MEM
913 p.To.Reg = v.Args[0].Reg()
915 // Save the top of loop
919 p = s.Prog(ppc64.ASTXV)
920 p.From.Type = obj.TYPE_REG
921 p.From.Reg = ppc64.REG_VS32
922 p.To.Type = obj.TYPE_MEM
923 p.To.Reg = v.Args[0].Reg()
926 p = s.Prog(ppc64.ASTXV)
927 p.From.Type = obj.TYPE_REG
928 p.From.Reg = ppc64.REG_VS32
929 p.To.Type = obj.TYPE_MEM
930 p.To.Reg = v.Args[0].Reg()
933 p = s.Prog(ppc64.ASTXV)
934 p.From.Type = obj.TYPE_REG
935 p.From.Reg = ppc64.REG_VS32
936 p.To.Type = obj.TYPE_MEM
937 p.To.Reg = v.Args[0].Reg()
940 // Increment address for the
941 // 64 bytes just zeroed.
942 p = s.Prog(ppc64.AADD)
943 p.Reg = v.Args[0].Reg()
944 p.From.Type = obj.TYPE_CONST
946 p.To.Type = obj.TYPE_REG
947 p.To.Reg = v.Args[0].Reg()
949 // Branch back to top of loop
951 // BC with BO_BCTR generates bdnz
952 p = s.Prog(ppc64.ABC)
953 p.From.Type = obj.TYPE_CONST
954 p.From.Offset = ppc64.BO_BCTR
956 p.To.Type = obj.TYPE_BRANCH
959 // When ctr == 1 the loop was not generated but
960 // there are at least 64 bytes to clear, so add
961 // that to the remainder to generate the code
962 // to clear those doublewords
967 // Clear the remainder starting at offset zero
970 if rem >= 16 && ctr <= 1 {
971 // If the XXLXOR hasn't already been
972 // generated, do it here to initialize
974 p := s.Prog(ppc64.AXXLXOR)
975 p.From.Type = obj.TYPE_REG
976 p.From.Reg = ppc64.REG_VS32
977 p.To.Type = obj.TYPE_REG
978 p.To.Reg = ppc64.REG_VS32
979 p.Reg = ppc64.REG_VS32
981 // Generate STXV for 32 or 64
984 p := s.Prog(ppc64.ASTXV)
985 p.From.Type = obj.TYPE_REG
986 p.From.Reg = ppc64.REG_VS32
987 p.To.Type = obj.TYPE_MEM
988 p.To.Reg = v.Args[0].Reg()
991 p = s.Prog(ppc64.ASTXV)
992 p.From.Type = obj.TYPE_REG
993 p.From.Reg = ppc64.REG_VS32
994 p.To.Type = obj.TYPE_MEM
995 p.To.Reg = v.Args[0].Reg()
996 p.To.Offset = offset + 16
1000 // Generate 16 bytes
1002 p := s.Prog(ppc64.ASTXV)
1003 p.From.Type = obj.TYPE_REG
1004 p.From.Reg = ppc64.REG_VS32
1005 p.To.Type = obj.TYPE_MEM
1006 p.To.Reg = v.Args[0].Reg()
1007 p.To.Offset = offset
1012 // first clear as many doublewords as possible
1013 // then clear remaining sizes as available
1015 op, size := ppc64.AMOVB, int64(1)
1018 op, size = ppc64.AMOVD, 8
1020 op, size = ppc64.AMOVW, 4
1022 op, size = ppc64.AMOVH, 2
1025 p.From.Type = obj.TYPE_REG
1026 p.From.Reg = ppc64.REG_R0
1027 p.To.Type = obj.TYPE_MEM
1028 p.To.Reg = v.Args[0].Reg()
1029 p.To.Offset = offset
1034 case ssa.OpPPC64LoweredZero, ssa.OpPPC64LoweredZeroShort:
1036 // Unaligned data doesn't hurt performance
1037 // for these instructions on power8.
1039 // For sizes >= 64 generate a loop as follows:
1041 // Set up loop counter in CTR, used by BC
1042 // XXLXOR VS32,VS32,VS32
1043 // MOVD len/32,REG_TMP
1047 // STXVD2X VS32,(R0)(R20)
1048 // STXVD2X VS32,(R31)(R20)
1052 // any remainder is done as described below
1054 // for sizes < 64 bytes, first clear as many doublewords as possible,
1055 // then handle the remainder
1060 // the remainder bytes are cleared using one or more
1061 // of the following instructions with the appropriate
1062 // offsets depending which instructions are needed
1064 // MOVW R0,n1(R20) 4 bytes
1065 // MOVH R0,n2(R20) 2 bytes
1066 // MOVB R0,n3(R20) 1 byte
1068 // 7 bytes: MOVW, MOVH, MOVB
1069 // 6 bytes: MOVW, MOVH
1070 // 5 bytes: MOVW, MOVB
1071 // 3 bytes: MOVH, MOVB
1073 // each loop iteration does 32 bytes
1074 ctr := v.AuxInt / 32
1077 rem := v.AuxInt % 32
1079 // only generate a loop if there is more
1080 // than 1 iteration.
1082 // Set up VS32 (V0) to hold 0s
1083 p := s.Prog(ppc64.AXXLXOR)
1084 p.From.Type = obj.TYPE_REG
1085 p.From.Reg = ppc64.REG_VS32
1086 p.To.Type = obj.TYPE_REG
1087 p.To.Reg = ppc64.REG_VS32
1088 p.Reg = ppc64.REG_VS32
1090 // Set up CTR loop counter
1091 p = s.Prog(ppc64.AMOVD)
1092 p.From.Type = obj.TYPE_CONST
1094 p.To.Type = obj.TYPE_REG
1095 p.To.Reg = ppc64.REGTMP
1097 p = s.Prog(ppc64.AMOVD)
1098 p.From.Type = obj.TYPE_REG
1099 p.From.Reg = ppc64.REGTMP
1100 p.To.Type = obj.TYPE_REG
1101 p.To.Reg = ppc64.REG_CTR
1103 // Set up R31 to hold index value 16
1104 p = s.Prog(ppc64.AMOVD)
1105 p.From.Type = obj.TYPE_CONST
1107 p.To.Type = obj.TYPE_REG
1108 p.To.Reg = ppc64.REGTMP
1110 // Don't add padding for alignment
1111 // with few loop iterations.
1113 p = s.Prog(obj.APCALIGN)
1114 p.From.Type = obj.TYPE_CONST
1118 // generate 2 STXVD2Xs to store 16 bytes
1119 // when this is a loop then the top must be saved
1121 // This is the top of loop
1123 p = s.Prog(ppc64.ASTXVD2X)
1124 p.From.Type = obj.TYPE_REG
1125 p.From.Reg = ppc64.REG_VS32
1126 p.To.Type = obj.TYPE_MEM
1127 p.To.Reg = v.Args[0].Reg()
1128 p.To.Index = ppc64.REGZERO
1129 // Save the top of loop
1133 p = s.Prog(ppc64.ASTXVD2X)
1134 p.From.Type = obj.TYPE_REG
1135 p.From.Reg = ppc64.REG_VS32
1136 p.To.Type = obj.TYPE_MEM
1137 p.To.Reg = v.Args[0].Reg()
1138 p.To.Index = ppc64.REGTMP
1140 // Increment address for the
1141 // 4 doublewords just zeroed.
1142 p = s.Prog(ppc64.AADD)
1143 p.Reg = v.Args[0].Reg()
1144 p.From.Type = obj.TYPE_CONST
1146 p.To.Type = obj.TYPE_REG
1147 p.To.Reg = v.Args[0].Reg()
1149 // Branch back to top of loop
1151 // BC with BO_BCTR generates bdnz
1152 p = s.Prog(ppc64.ABC)
1153 p.From.Type = obj.TYPE_CONST
1154 p.From.Offset = ppc64.BO_BCTR
1155 p.Reg = ppc64.REG_R0
1156 p.To.Type = obj.TYPE_BRANCH
1160 // when ctr == 1 the loop was not generated but
1161 // there are at least 32 bytes to clear, so add
1162 // that to the remainder to generate the code
1163 // to clear those doublewords
1168 // clear the remainder starting at offset zero
1171 // first clear as many doublewords as possible
1172 // then clear remaining sizes as available
1174 op, size := ppc64.AMOVB, int64(1)
1177 op, size = ppc64.AMOVD, 8
1179 op, size = ppc64.AMOVW, 4
1181 op, size = ppc64.AMOVH, 2
1184 p.From.Type = obj.TYPE_REG
1185 p.From.Reg = ppc64.REG_R0
1186 p.To.Type = obj.TYPE_MEM
1187 p.To.Reg = v.Args[0].Reg()
1188 p.To.Offset = offset
1193 case ssa.OpPPC64LoweredMove, ssa.OpPPC64LoweredMoveShort:
1195 bytesPerLoop := int64(32)
1196 // This will be used when moving more
1197 // than 8 bytes. Moves start with
1198 // as many 8 byte moves as possible, then
1199 // 4, 2, or 1 byte(s) as remaining. This will
1200 // work and be efficient for power8 or later.
1201 // If there are 64 or more bytes, then a
1202 // loop is generated to move 32 bytes and
1203 // update the src and dst addresses on each
1204 // iteration. When < 64 bytes, the appropriate
1205 // number of moves are generated based on the
1207 // When moving >= 64 bytes a loop is used
1208 // MOVD len/32,REG_TMP
1212 // LXVD2X (R0)(R21),VS32
1213 // LXVD2X (R31)(R21),VS33
1215 // STXVD2X VS32,(R0)(R20)
1216 // STXVD2X VS33,(R31)(R20)
1219 // Bytes not moved by this loop are moved
1220 // with a combination of the following instructions,
1221 // starting with the largest sizes and generating as
1222 // many as needed, using the appropriate offset value.
1232 // Each loop iteration moves 32 bytes
1233 ctr := v.AuxInt / bytesPerLoop
1235 // Remainder after the loop
1236 rem := v.AuxInt % bytesPerLoop
1238 dstReg := v.Args[0].Reg()
1239 srcReg := v.Args[1].Reg()
1241 // The set of registers used here, must match the clobbered reg list
1247 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1250 p := s.Prog(ppc64.AMOVD)
1251 p.From.Type = obj.TYPE_CONST
1253 p.To.Type = obj.TYPE_REG
1254 p.To.Reg = ppc64.REGTMP
1256 p = s.Prog(ppc64.AMOVD)
1257 p.From.Type = obj.TYPE_REG
1258 p.From.Reg = ppc64.REGTMP
1259 p.To.Type = obj.TYPE_REG
1260 p.To.Reg = ppc64.REG_CTR
1262 // Use REGTMP as index reg
1263 p = s.Prog(ppc64.AMOVD)
1264 p.From.Type = obj.TYPE_CONST
1266 p.To.Type = obj.TYPE_REG
1267 p.To.Reg = ppc64.REGTMP
1269 // Don't adding padding for
1270 // alignment with small iteration
1273 p = s.Prog(obj.APCALIGN)
1274 p.From.Type = obj.TYPE_CONST
1278 // Generate 16 byte loads and stores.
1279 // Use temp register for index (16)
1280 // on the second one.
1282 p = s.Prog(ppc64.ALXVD2X)
1283 p.From.Type = obj.TYPE_MEM
1285 p.From.Index = ppc64.REGZERO
1286 p.To.Type = obj.TYPE_REG
1287 p.To.Reg = ppc64.REG_VS32
1291 p = s.Prog(ppc64.ALXVD2X)
1292 p.From.Type = obj.TYPE_MEM
1294 p.From.Index = ppc64.REGTMP
1295 p.To.Type = obj.TYPE_REG
1296 p.To.Reg = ppc64.REG_VS33
1298 // increment the src reg for next iteration
1299 p = s.Prog(ppc64.AADD)
1301 p.From.Type = obj.TYPE_CONST
1302 p.From.Offset = bytesPerLoop
1303 p.To.Type = obj.TYPE_REG
1306 // generate 16 byte stores
1307 p = s.Prog(ppc64.ASTXVD2X)
1308 p.From.Type = obj.TYPE_REG
1309 p.From.Reg = ppc64.REG_VS32
1310 p.To.Type = obj.TYPE_MEM
1312 p.To.Index = ppc64.REGZERO
1314 p = s.Prog(ppc64.ASTXVD2X)
1315 p.From.Type = obj.TYPE_REG
1316 p.From.Reg = ppc64.REG_VS33
1317 p.To.Type = obj.TYPE_MEM
1319 p.To.Index = ppc64.REGTMP
1321 // increment the dst reg for next iteration
1322 p = s.Prog(ppc64.AADD)
1324 p.From.Type = obj.TYPE_CONST
1325 p.From.Offset = bytesPerLoop
1326 p.To.Type = obj.TYPE_REG
1329 // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1331 p = s.Prog(ppc64.ABC)
1332 p.From.Type = obj.TYPE_CONST
1333 p.From.Offset = ppc64.BO_BCTR
1334 p.Reg = ppc64.REG_R0
1335 p.To.Type = obj.TYPE_BRANCH
1338 // srcReg and dstReg were incremented in the loop, so
1339 // later instructions start with offset 0.
1343 // No loop was generated for one iteration, so
1344 // add 32 bytes to the remainder to move those bytes.
1350 // Generate 16 byte loads and stores.
1351 // Use temp register for index (value 16)
1352 // on the second one.
1353 p := s.Prog(ppc64.ALXVD2X)
1354 p.From.Type = obj.TYPE_MEM
1356 p.From.Index = ppc64.REGZERO
1357 p.To.Type = obj.TYPE_REG
1358 p.To.Reg = ppc64.REG_VS32
1360 p = s.Prog(ppc64.ASTXVD2X)
1361 p.From.Type = obj.TYPE_REG
1362 p.From.Reg = ppc64.REG_VS32
1363 p.To.Type = obj.TYPE_MEM
1365 p.To.Index = ppc64.REGZERO
1371 // Use REGTMP as index reg
1372 p := s.Prog(ppc64.AMOVD)
1373 p.From.Type = obj.TYPE_CONST
1375 p.To.Type = obj.TYPE_REG
1376 p.To.Reg = ppc64.REGTMP
1378 p = s.Prog(ppc64.ALXVD2X)
1379 p.From.Type = obj.TYPE_MEM
1381 p.From.Index = ppc64.REGTMP
1382 p.To.Type = obj.TYPE_REG
1383 p.To.Reg = ppc64.REG_VS32
1385 p = s.Prog(ppc64.ASTXVD2X)
1386 p.From.Type = obj.TYPE_REG
1387 p.From.Reg = ppc64.REG_VS32
1388 p.To.Type = obj.TYPE_MEM
1390 p.To.Index = ppc64.REGTMP
1397 // Generate all the remaining load and store pairs, starting with
1398 // as many 8 byte moves as possible, then 4, 2, 1.
1400 op, size := ppc64.AMOVB, int64(1)
1403 op, size = ppc64.AMOVD, 8
1405 op, size = ppc64.AMOVW, 4
1407 op, size = ppc64.AMOVH, 2
1411 p.To.Type = obj.TYPE_REG
1412 p.To.Reg = ppc64.REGTMP
1413 p.From.Type = obj.TYPE_MEM
1415 p.From.Offset = offset
1419 p.From.Type = obj.TYPE_REG
1420 p.From.Reg = ppc64.REGTMP
1421 p.To.Type = obj.TYPE_MEM
1423 p.To.Offset = offset
1428 case ssa.OpPPC64LoweredQuadMove, ssa.OpPPC64LoweredQuadMoveShort:
1429 bytesPerLoop := int64(64)
1430 // This is used when moving more
1431 // than 8 bytes on power9. Moves start with
1432 // as many 8 byte moves as possible, then
1433 // 4, 2, or 1 byte(s) as remaining. This will
1434 // work and be efficient for power8 or later.
1435 // If there are 64 or more bytes, then a
1436 // loop is generated to move 32 bytes and
1437 // update the src and dst addresses on each
1438 // iteration. When < 64 bytes, the appropriate
1439 // number of moves are generated based on the
1441 // When moving >= 64 bytes a loop is used
1442 // MOVD len/32,REG_TMP
1449 // STXV VS33,16(R20)
1452 // Bytes not moved by this loop are moved
1453 // with a combination of the following instructions,
1454 // starting with the largest sizes and generating as
1455 // many as needed, using the appropriate offset value.
1465 // Each loop iteration moves 32 bytes
1466 ctr := v.AuxInt / bytesPerLoop
1468 // Remainder after the loop
1469 rem := v.AuxInt % bytesPerLoop
1471 dstReg := v.Args[0].Reg()
1472 srcReg := v.Args[1].Reg()
1479 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1482 p := s.Prog(ppc64.AMOVD)
1483 p.From.Type = obj.TYPE_CONST
1485 p.To.Type = obj.TYPE_REG
1486 p.To.Reg = ppc64.REGTMP
1488 p = s.Prog(ppc64.AMOVD)
1489 p.From.Type = obj.TYPE_REG
1490 p.From.Reg = ppc64.REGTMP
1491 p.To.Type = obj.TYPE_REG
1492 p.To.Reg = ppc64.REG_CTR
1494 p = s.Prog(obj.APCALIGN)
1495 p.From.Type = obj.TYPE_CONST
1498 // Generate 16 byte loads and stores.
1499 p = s.Prog(ppc64.ALXV)
1500 p.From.Type = obj.TYPE_MEM
1502 p.From.Offset = offset
1503 p.To.Type = obj.TYPE_REG
1504 p.To.Reg = ppc64.REG_VS32
1508 p = s.Prog(ppc64.ALXV)
1509 p.From.Type = obj.TYPE_MEM
1511 p.From.Offset = offset + 16
1512 p.To.Type = obj.TYPE_REG
1513 p.To.Reg = ppc64.REG_VS33
1515 // generate 16 byte stores
1516 p = s.Prog(ppc64.ASTXV)
1517 p.From.Type = obj.TYPE_REG
1518 p.From.Reg = ppc64.REG_VS32
1519 p.To.Type = obj.TYPE_MEM
1521 p.To.Offset = offset
1523 p = s.Prog(ppc64.ASTXV)
1524 p.From.Type = obj.TYPE_REG
1525 p.From.Reg = ppc64.REG_VS33
1526 p.To.Type = obj.TYPE_MEM
1528 p.To.Offset = offset + 16
1530 // Generate 16 byte loads and stores.
1531 p = s.Prog(ppc64.ALXV)
1532 p.From.Type = obj.TYPE_MEM
1534 p.From.Offset = offset + 32
1535 p.To.Type = obj.TYPE_REG
1536 p.To.Reg = ppc64.REG_VS32
1538 p = s.Prog(ppc64.ALXV)
1539 p.From.Type = obj.TYPE_MEM
1541 p.From.Offset = offset + 48
1542 p.To.Type = obj.TYPE_REG
1543 p.To.Reg = ppc64.REG_VS33
1545 // generate 16 byte stores
1546 p = s.Prog(ppc64.ASTXV)
1547 p.From.Type = obj.TYPE_REG
1548 p.From.Reg = ppc64.REG_VS32
1549 p.To.Type = obj.TYPE_MEM
1551 p.To.Offset = offset + 32
1553 p = s.Prog(ppc64.ASTXV)
1554 p.From.Type = obj.TYPE_REG
1555 p.From.Reg = ppc64.REG_VS33
1556 p.To.Type = obj.TYPE_MEM
1558 p.To.Offset = offset + 48
1560 // increment the src reg for next iteration
1561 p = s.Prog(ppc64.AADD)
1563 p.From.Type = obj.TYPE_CONST
1564 p.From.Offset = bytesPerLoop
1565 p.To.Type = obj.TYPE_REG
1568 // increment the dst reg for next iteration
1569 p = s.Prog(ppc64.AADD)
1571 p.From.Type = obj.TYPE_CONST
1572 p.From.Offset = bytesPerLoop
1573 p.To.Type = obj.TYPE_REG
1576 // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1578 p = s.Prog(ppc64.ABC)
1579 p.From.Type = obj.TYPE_CONST
1580 p.From.Offset = ppc64.BO_BCTR
1581 p.Reg = ppc64.REG_R0
1582 p.To.Type = obj.TYPE_BRANCH
1585 // srcReg and dstReg were incremented in the loop, so
1586 // later instructions start with offset 0.
1590 // No loop was generated for one iteration, so
1591 // add 32 bytes to the remainder to move those bytes.
1596 p := s.Prog(ppc64.ALXV)
1597 p.From.Type = obj.TYPE_MEM
1599 p.To.Type = obj.TYPE_REG
1600 p.To.Reg = ppc64.REG_VS32
1602 p = s.Prog(ppc64.ALXV)
1603 p.From.Type = obj.TYPE_MEM
1606 p.To.Type = obj.TYPE_REG
1607 p.To.Reg = ppc64.REG_VS33
1609 p = s.Prog(ppc64.ASTXV)
1610 p.From.Type = obj.TYPE_REG
1611 p.From.Reg = ppc64.REG_VS32
1612 p.To.Type = obj.TYPE_MEM
1615 p = s.Prog(ppc64.ASTXV)
1616 p.From.Type = obj.TYPE_REG
1617 p.From.Reg = ppc64.REG_VS33
1618 p.To.Type = obj.TYPE_MEM
1627 // Generate 16 byte loads and stores.
1628 p := s.Prog(ppc64.ALXV)
1629 p.From.Type = obj.TYPE_MEM
1631 p.From.Offset = offset
1632 p.To.Type = obj.TYPE_REG
1633 p.To.Reg = ppc64.REG_VS32
1635 p = s.Prog(ppc64.ASTXV)
1636 p.From.Type = obj.TYPE_REG
1637 p.From.Reg = ppc64.REG_VS32
1638 p.To.Type = obj.TYPE_MEM
1640 p.To.Offset = offset
1646 p := s.Prog(ppc64.ALXV)
1647 p.From.Type = obj.TYPE_MEM
1649 p.From.Offset = offset
1650 p.To.Type = obj.TYPE_REG
1651 p.To.Reg = ppc64.REG_VS32
1653 p = s.Prog(ppc64.ASTXV)
1654 p.From.Type = obj.TYPE_REG
1655 p.From.Reg = ppc64.REG_VS32
1656 p.To.Type = obj.TYPE_MEM
1658 p.To.Offset = offset
1664 // Generate all the remaining load and store pairs, starting with
1665 // as many 8 byte moves as possible, then 4, 2, 1.
1667 op, size := ppc64.AMOVB, int64(1)
1670 op, size = ppc64.AMOVD, 8
1672 op, size = ppc64.AMOVW, 4
1674 op, size = ppc64.AMOVH, 2
1678 p.To.Type = obj.TYPE_REG
1679 p.To.Reg = ppc64.REGTMP
1680 p.From.Type = obj.TYPE_MEM
1682 p.From.Offset = offset
1686 p.From.Type = obj.TYPE_REG
1687 p.From.Reg = ppc64.REGTMP
1688 p.To.Type = obj.TYPE_MEM
1690 p.To.Offset = offset
1695 case ssa.OpPPC64CALLstatic:
1698 case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter:
1699 p := s.Prog(ppc64.AMOVD)
1700 p.From.Type = obj.TYPE_REG
1701 p.From.Reg = v.Args[0].Reg()
1702 p.To.Type = obj.TYPE_REG
1703 p.To.Reg = ppc64.REG_LR
1705 if v.Args[0].Reg() != ppc64.REG_R12 {
1706 v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg)
1710 pp.To.Reg = ppc64.REG_LR
1712 if gc.Ctxt.Flag_shared {
1713 // When compiling Go into PIC, the function we just
1714 // called via pointer might have been implemented in
1715 // a separate module and so overwritten the TOC
1716 // pointer in R2; reload it.
1717 q := s.Prog(ppc64.AMOVD)
1718 q.From.Type = obj.TYPE_MEM
1720 q.From.Reg = ppc64.REGSP
1721 q.To.Type = obj.TYPE_REG
1722 q.To.Reg = ppc64.REG_R2
1725 case ssa.OpPPC64LoweredWB:
1726 p := s.Prog(obj.ACALL)
1727 p.To.Type = obj.TYPE_MEM
1728 p.To.Name = obj.NAME_EXTERN
1729 p.To.Sym = v.Aux.(*obj.LSym)
1731 case ssa.OpPPC64LoweredPanicBoundsA, ssa.OpPPC64LoweredPanicBoundsB, ssa.OpPPC64LoweredPanicBoundsC:
1732 p := s.Prog(obj.ACALL)
1733 p.To.Type = obj.TYPE_MEM
1734 p.To.Name = obj.NAME_EXTERN
1735 p.To.Sym = gc.BoundsCheckFunc[v.AuxInt]
1736 s.UseArgs(16) // space used in callee args area by assembly stubs
1738 case ssa.OpPPC64LoweredNilCheck:
1739 if objabi.GOOS == "aix" {
1743 // NOP (so the BNE has somewhere to land)
1746 p := s.Prog(ppc64.ACMP)
1747 p.From.Type = obj.TYPE_REG
1748 p.From.Reg = v.Args[0].Reg()
1749 p.To.Type = obj.TYPE_REG
1750 p.To.Reg = ppc64.REG_R0
1753 p2 := s.Prog(ppc64.ABNE)
1754 p2.To.Type = obj.TYPE_BRANCH
1757 // Write at 0 is forbidden and will trigger a SIGSEGV
1758 p = s.Prog(ppc64.AMOVW)
1759 p.From.Type = obj.TYPE_REG
1760 p.From.Reg = ppc64.REG_R0
1761 p.To.Type = obj.TYPE_MEM
1762 p.To.Reg = ppc64.REG_R0
1764 // NOP (so the BNE has somewhere to land)
1765 nop := s.Prog(obj.ANOP)
1769 // Issue a load which will fault if arg is nil.
1770 p := s.Prog(ppc64.AMOVBZ)
1771 p.From.Type = obj.TYPE_MEM
1772 p.From.Reg = v.Args[0].Reg()
1773 gc.AddAux(&p.From, v)
1774 p.To.Type = obj.TYPE_REG
1775 p.To.Reg = ppc64.REGTMP
1777 if logopt.Enabled() {
1778 logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
1780 if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
1781 gc.Warnl(v.Pos, "generated nil check")
1784 // These should be resolved by rules and not make it here.
1785 case ssa.OpPPC64Equal, ssa.OpPPC64NotEqual, ssa.OpPPC64LessThan, ssa.OpPPC64FLessThan,
1786 ssa.OpPPC64LessEqual, ssa.OpPPC64GreaterThan, ssa.OpPPC64FGreaterThan, ssa.OpPPC64GreaterEqual,
1787 ssa.OpPPC64FLessEqual, ssa.OpPPC64FGreaterEqual:
1788 v.Fatalf("Pseudo-op should not make it to codegen: %s ###\n", v.LongString())
1789 case ssa.OpPPC64InvertFlags:
1790 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
1791 case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT, ssa.OpPPC64FlagCarrySet, ssa.OpPPC64FlagCarryClear:
1792 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
1794 // TODO: implement for clobberdead experiment. Nop is ok for now.
1796 v.Fatalf("genValue not implemented: %s", v.LongString())
1800 var blockJump = [...]struct {
1802 asmeq, invasmun bool
1804 ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false},
1805 ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false},
1807 ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false},
1808 ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false},
1809 ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false},
1810 ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false},
1812 // TODO: need to work FP comparisons into block jumps
1813 ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false},
1814 ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN
1815 ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN
1816 ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false},
1819 func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
1821 case ssa.BlockDefer:
1822 // defer returns in R3:
1823 // 0 if we should continue executing
1824 // 1 if we should jump to deferreturn call
1825 p := s.Prog(ppc64.ACMP)
1826 p.From.Type = obj.TYPE_REG
1827 p.From.Reg = ppc64.REG_R3
1828 p.To.Type = obj.TYPE_REG
1829 p.To.Reg = ppc64.REG_R0
1831 p = s.Prog(ppc64.ABNE)
1832 p.To.Type = obj.TYPE_BRANCH
1833 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
1834 if b.Succs[0].Block() != next {
1835 p := s.Prog(obj.AJMP)
1836 p.To.Type = obj.TYPE_BRANCH
1837 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
1840 case ssa.BlockPlain:
1841 if b.Succs[0].Block() != next {
1842 p := s.Prog(obj.AJMP)
1843 p.To.Type = obj.TYPE_BRANCH
1844 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
1849 case ssa.BlockRetJmp:
1850 p := s.Prog(obj.AJMP)
1851 p.To.Type = obj.TYPE_MEM
1852 p.To.Name = obj.NAME_EXTERN
1853 p.To.Sym = b.Aux.(*obj.LSym)
1855 case ssa.BlockPPC64EQ, ssa.BlockPPC64NE,
1856 ssa.BlockPPC64LT, ssa.BlockPPC64GE,
1857 ssa.BlockPPC64LE, ssa.BlockPPC64GT,
1858 ssa.BlockPPC64FLT, ssa.BlockPPC64FGE,
1859 ssa.BlockPPC64FLE, ssa.BlockPPC64FGT:
1860 jmp := blockJump[b.Kind]
1862 case b.Succs[0].Block():
1863 s.Br(jmp.invasm, b.Succs[1].Block())
1865 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
1866 s.Br(ppc64.ABVS, b.Succs[1].Block())
1868 case b.Succs[1].Block():
1869 s.Br(jmp.asm, b.Succs[0].Block())
1871 s.Br(ppc64.ABEQ, b.Succs[0].Block())
1874 if b.Likely != ssa.BranchUnlikely {
1875 s.Br(jmp.asm, b.Succs[0].Block())
1877 s.Br(ppc64.ABEQ, b.Succs[0].Block())
1879 s.Br(obj.AJMP, b.Succs[1].Block())
1881 s.Br(jmp.invasm, b.Succs[1].Block())
1883 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
1884 s.Br(ppc64.ABVS, b.Succs[1].Block())
1886 s.Br(obj.AJMP, b.Succs[0].Block())
1890 b.Fatalf("branch not implemented: %s", b.LongString())