1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
8 "cmd/compile/internal/gc"
9 "cmd/compile/internal/logopt"
10 "cmd/compile/internal/ssa"
11 "cmd/compile/internal/types"
13 "cmd/internal/obj/ppc64"
19 // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
20 func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) {
21 // flive := b.FlagsLiveAtEnd
22 // if b.Control != nil && b.Control.Type.IsFlags() {
25 // for i := len(b.Values) - 1; i >= 0; i-- {
27 // if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) {
28 // // The "mark" is any non-nil Aux value.
31 // if v.Type.IsFlags() {
34 // for _, a := range v.Args {
35 // if a.Type.IsFlags() {
42 // loadByType returns the load instruction of the given type.
43 func loadByType(t *types.Type) obj.As {
75 panic("bad load type")
78 // storeByType returns the store instruction of the given type.
79 func storeByType(t *types.Type) obj.As {
99 panic("bad store type")
102 func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
125 case ssa.OpPPC64LoweredMuluhilo:
126 // MULHDU Rarg1, Rarg0, Reg0
127 // MULLD Rarg1, Rarg0, Reg1
128 r0 := v.Args[0].Reg()
129 r1 := v.Args[1].Reg()
130 p := s.Prog(ppc64.AMULHDU)
131 p.From.Type = obj.TYPE_REG
134 p.To.Type = obj.TYPE_REG
136 p1 := s.Prog(ppc64.AMULLD)
137 p1.From.Type = obj.TYPE_REG
140 p1.To.Type = obj.TYPE_REG
143 case ssa.OpPPC64LoweredAdd64Carry:
144 // ADDC Rarg2, -1, Rtmp
145 // ADDE Rarg1, Rarg0, Reg0
147 r0 := v.Args[0].Reg()
148 r1 := v.Args[1].Reg()
149 r2 := v.Args[2].Reg()
150 p := s.Prog(ppc64.AADDC)
151 p.From.Type = obj.TYPE_CONST
154 p.To.Type = obj.TYPE_REG
155 p.To.Reg = ppc64.REGTMP
156 p1 := s.Prog(ppc64.AADDE)
157 p1.From.Type = obj.TYPE_REG
160 p1.To.Type = obj.TYPE_REG
162 p2 := s.Prog(ppc64.AADDZE)
163 p2.From.Type = obj.TYPE_REG
164 p2.From.Reg = ppc64.REGZERO
165 p2.To.Type = obj.TYPE_REG
168 case ssa.OpPPC64LoweredAtomicAnd8,
169 ssa.OpPPC64LoweredAtomicOr8:
171 // LBAR (Rarg0), Rtmp
172 // AND/OR Rarg1, Rtmp
173 // STBCCC Rtmp, (Rarg0)
175 r0 := v.Args[0].Reg()
176 r1 := v.Args[1].Reg()
177 // LWSYNC - Assuming shared data not write-through-required nor
178 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
179 plwsync := s.Prog(ppc64.ALWSYNC)
180 plwsync.To.Type = obj.TYPE_NONE
181 p := s.Prog(ppc64.ALBAR)
182 p.From.Type = obj.TYPE_MEM
184 p.To.Type = obj.TYPE_REG
185 p.To.Reg = ppc64.REGTMP
186 p1 := s.Prog(v.Op.Asm())
187 p1.From.Type = obj.TYPE_REG
189 p1.To.Type = obj.TYPE_REG
190 p1.To.Reg = ppc64.REGTMP
191 p2 := s.Prog(ppc64.ASTBCCC)
192 p2.From.Type = obj.TYPE_REG
193 p2.From.Reg = ppc64.REGTMP
194 p2.To.Type = obj.TYPE_MEM
196 p2.RegTo2 = ppc64.REGTMP
197 p3 := s.Prog(ppc64.ABNE)
198 p3.To.Type = obj.TYPE_BRANCH
201 case ssa.OpPPC64LoweredAtomicAdd32,
202 ssa.OpPPC64LoweredAtomicAdd64:
204 // LDAR/LWAR (Rarg0), Rout
206 // STDCCC/STWCCC Rout, (Rarg0)
208 // MOVW Rout,Rout (if Add32)
211 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
215 r0 := v.Args[0].Reg()
216 r1 := v.Args[1].Reg()
218 // LWSYNC - Assuming shared data not write-through-required nor
219 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
220 plwsync := s.Prog(ppc64.ALWSYNC)
221 plwsync.To.Type = obj.TYPE_NONE
224 p.From.Type = obj.TYPE_MEM
226 p.To.Type = obj.TYPE_REG
229 p1 := s.Prog(ppc64.AADD)
230 p1.From.Type = obj.TYPE_REG
233 p1.To.Type = obj.TYPE_REG
236 p3.From.Type = obj.TYPE_REG
238 p3.To.Type = obj.TYPE_MEM
241 p4 := s.Prog(ppc64.ABNE)
242 p4.To.Type = obj.TYPE_BRANCH
245 // Ensure a 32 bit result
246 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
247 p5 := s.Prog(ppc64.AMOVWZ)
248 p5.To.Type = obj.TYPE_REG
250 p5.From.Type = obj.TYPE_REG
254 case ssa.OpPPC64LoweredAtomicExchange32,
255 ssa.OpPPC64LoweredAtomicExchange64:
257 // LDAR/LWAR (Rarg0), Rout
258 // STDCCC/STWCCC Rout, (Rarg0)
263 if v.Op == ssa.OpPPC64LoweredAtomicExchange32 {
267 r0 := v.Args[0].Reg()
268 r1 := v.Args[1].Reg()
270 // LWSYNC - Assuming shared data not write-through-required nor
271 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
272 plwsync := s.Prog(ppc64.ALWSYNC)
273 plwsync.To.Type = obj.TYPE_NONE
276 p.From.Type = obj.TYPE_MEM
278 p.To.Type = obj.TYPE_REG
282 p1.From.Type = obj.TYPE_REG
284 p1.To.Type = obj.TYPE_MEM
287 p2 := s.Prog(ppc64.ABNE)
288 p2.To.Type = obj.TYPE_BRANCH
291 pisync := s.Prog(ppc64.AISYNC)
292 pisync.To.Type = obj.TYPE_NONE
294 case ssa.OpPPC64LoweredAtomicLoad8,
295 ssa.OpPPC64LoweredAtomicLoad32,
296 ssa.OpPPC64LoweredAtomicLoad64,
297 ssa.OpPPC64LoweredAtomicLoadPtr:
299 // MOVB/MOVD/MOVW (Rarg0), Rout
306 case ssa.OpPPC64LoweredAtomicLoad8:
308 case ssa.OpPPC64LoweredAtomicLoad32:
312 arg0 := v.Args[0].Reg()
314 // SYNC when AuxInt == 1; otherwise, load-acquire
316 psync := s.Prog(ppc64.ASYNC)
317 psync.To.Type = obj.TYPE_NONE
321 p.From.Type = obj.TYPE_MEM
323 p.To.Type = obj.TYPE_REG
327 p1.From.Type = obj.TYPE_REG
329 p1.To.Type = obj.TYPE_REG
332 p2 := s.Prog(ppc64.ABNE)
333 p2.To.Type = obj.TYPE_BRANCH
335 pisync := s.Prog(ppc64.AISYNC)
336 pisync.To.Type = obj.TYPE_NONE
339 case ssa.OpPPC64LoweredAtomicStore8,
340 ssa.OpPPC64LoweredAtomicStore32,
341 ssa.OpPPC64LoweredAtomicStore64:
343 // MOVB/MOVW/MOVD arg1,(arg0)
346 case ssa.OpPPC64LoweredAtomicStore8:
348 case ssa.OpPPC64LoweredAtomicStore32:
351 arg0 := v.Args[0].Reg()
352 arg1 := v.Args[1].Reg()
353 // If AuxInt == 0, LWSYNC (Store-Release), else SYNC
355 syncOp := ppc64.ASYNC
357 syncOp = ppc64.ALWSYNC
359 psync := s.Prog(syncOp)
360 psync.To.Type = obj.TYPE_NONE
363 p.To.Type = obj.TYPE_MEM
365 p.From.Type = obj.TYPE_REG
368 case ssa.OpPPC64LoweredAtomicCas64,
369 ssa.OpPPC64LoweredAtomicCas32:
372 // LDAR (Rarg0), MutexHint, Rtmp
375 // STDCCC Rarg2, (Rarg0)
377 // LWSYNC // Only for sequential consistency; not required in CasRel.
386 if v.Op == ssa.OpPPC64LoweredAtomicCas32 {
391 r0 := v.Args[0].Reg()
392 r1 := v.Args[1].Reg()
393 r2 := v.Args[2].Reg()
395 // LWSYNC - Assuming shared data not write-through-required nor
396 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
397 plwsync1 := s.Prog(ppc64.ALWSYNC)
398 plwsync1.To.Type = obj.TYPE_NONE
401 p.From.Type = obj.TYPE_MEM
403 p.To.Type = obj.TYPE_REG
404 p.To.Reg = ppc64.REGTMP
405 // If it is a Compare-and-Swap-Release operation, set the EH field with
408 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: 0})
412 p1.From.Type = obj.TYPE_REG
414 p1.To.Reg = ppc64.REGTMP
415 p1.To.Type = obj.TYPE_REG
417 p2 := s.Prog(ppc64.ABNE)
418 p2.To.Type = obj.TYPE_BRANCH
421 p3.From.Type = obj.TYPE_REG
423 p3.To.Type = obj.TYPE_MEM
426 p4 := s.Prog(ppc64.ABNE)
427 p4.To.Type = obj.TYPE_BRANCH
429 // LWSYNC - Assuming shared data not write-through-required nor
430 // caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b.
431 // If the operation is a CAS-Release, then synchronization is not necessary.
433 plwsync2 := s.Prog(ppc64.ALWSYNC)
434 plwsync2.To.Type = obj.TYPE_NONE
437 p5 := s.Prog(ppc64.AMOVD)
438 p5.From.Type = obj.TYPE_CONST
440 p5.To.Type = obj.TYPE_REG
443 p6 := s.Prog(obj.AJMP)
444 p6.To.Type = obj.TYPE_BRANCH
446 p7 := s.Prog(ppc64.AMOVD)
447 p7.From.Type = obj.TYPE_CONST
449 p7.To.Type = obj.TYPE_REG
453 p8 := s.Prog(obj.ANOP)
456 case ssa.OpPPC64LoweredGetClosurePtr:
457 // Closure pointer is R11 (already)
458 gc.CheckLoweredGetClosurePtr(v)
460 case ssa.OpPPC64LoweredGetCallerSP:
461 // caller's SP is FixedFrameSize below the address of the first arg
462 p := s.Prog(ppc64.AMOVD)
463 p.From.Type = obj.TYPE_ADDR
464 p.From.Offset = -gc.Ctxt.FixedFrameSize()
465 p.From.Name = obj.NAME_PARAM
466 p.To.Type = obj.TYPE_REG
469 case ssa.OpPPC64LoweredGetCallerPC:
470 p := s.Prog(obj.AGETCALLERPC)
471 p.To.Type = obj.TYPE_REG
474 case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F:
475 // input is already rounded
478 loadOp := loadByType(v.Type)
480 gc.AddrAuto(&p.From, v.Args[0])
481 p.To.Type = obj.TYPE_REG
485 storeOp := storeByType(v.Type)
487 p.From.Type = obj.TYPE_REG
488 p.From.Reg = v.Args[0].Reg()
489 gc.AddrAuto(&p.To, v)
491 case ssa.OpPPC64DIVD:
501 r0 := v.Args[0].Reg()
502 r1 := v.Args[1].Reg()
504 p := s.Prog(ppc64.ACMP)
505 p.From.Type = obj.TYPE_REG
507 p.To.Type = obj.TYPE_CONST
510 pbahead := s.Prog(ppc64.ABEQ)
511 pbahead.To.Type = obj.TYPE_BRANCH
513 p = s.Prog(v.Op.Asm())
514 p.From.Type = obj.TYPE_REG
517 p.To.Type = obj.TYPE_REG
520 pbover := s.Prog(obj.AJMP)
521 pbover.To.Type = obj.TYPE_BRANCH
523 p = s.Prog(ppc64.ANEG)
524 p.To.Type = obj.TYPE_REG
526 p.From.Type = obj.TYPE_REG
533 case ssa.OpPPC64DIVW:
534 // word-width version of above
536 r0 := v.Args[0].Reg()
537 r1 := v.Args[1].Reg()
539 p := s.Prog(ppc64.ACMPW)
540 p.From.Type = obj.TYPE_REG
542 p.To.Type = obj.TYPE_CONST
545 pbahead := s.Prog(ppc64.ABEQ)
546 pbahead.To.Type = obj.TYPE_BRANCH
548 p = s.Prog(v.Op.Asm())
549 p.From.Type = obj.TYPE_REG
552 p.To.Type = obj.TYPE_REG
555 pbover := s.Prog(obj.AJMP)
556 pbover.To.Type = obj.TYPE_BRANCH
558 p = s.Prog(ppc64.ANEG)
559 p.To.Type = obj.TYPE_REG
561 p.From.Type = obj.TYPE_REG
568 case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS,
569 ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU,
570 ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW,
571 ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
572 ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
573 ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
574 ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV,
575 ssa.OpPPC64MODUD, ssa.OpPPC64MODSD, ssa.OpPPC64MODUW, ssa.OpPPC64MODSW:
577 r1 := v.Args[0].Reg()
578 r2 := v.Args[1].Reg()
579 p := s.Prog(v.Op.Asm())
580 p.From.Type = obj.TYPE_REG
583 p.To.Type = obj.TYPE_REG
586 case ssa.OpPPC64ANDCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC:
587 r1 := v.Args[0].Reg()
588 r2 := v.Args[1].Reg()
589 p := s.Prog(v.Op.Asm())
590 p.From.Type = obj.TYPE_REG
593 p.To.Type = obj.TYPE_REG
594 p.To.Reg = ppc64.REGTMP // result is not needed
596 case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
597 p := s.Prog(v.Op.Asm())
598 p.From.Type = obj.TYPE_CONST
599 p.From.Offset = v.AuxInt
600 p.Reg = v.Args[0].Reg()
601 p.To.Type = obj.TYPE_REG
604 case ssa.OpPPC64MADDLD:
606 r1 := v.Args[0].Reg()
607 r2 := v.Args[1].Reg()
608 r3 := v.Args[2].Reg()
610 p := s.Prog(v.Op.Asm())
611 p.From.Type = obj.TYPE_REG
614 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: r3})
615 p.To.Type = obj.TYPE_REG
618 case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
620 r1 := v.Args[0].Reg()
621 r2 := v.Args[1].Reg()
622 r3 := v.Args[2].Reg()
624 p := s.Prog(v.Op.Asm())
625 p.From.Type = obj.TYPE_REG
628 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: r2})
629 p.To.Type = obj.TYPE_REG
632 case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL,
633 ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW,
634 ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS,
635 ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD:
637 p := s.Prog(v.Op.Asm())
638 p.To.Type = obj.TYPE_REG
640 p.From.Type = obj.TYPE_REG
641 p.From.Reg = v.Args[0].Reg()
643 case ssa.OpPPC64ADDconst, ssa.OpPPC64ANDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst,
644 ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst, ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst:
645 p := s.Prog(v.Op.Asm())
646 p.Reg = v.Args[0].Reg()
647 p.From.Type = obj.TYPE_CONST
648 p.From.Offset = v.AuxInt
649 p.To.Type = obj.TYPE_REG
652 case ssa.OpPPC64SUBFCconst:
653 p := s.Prog(v.Op.Asm())
654 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: v.AuxInt})
655 p.From.Type = obj.TYPE_REG
656 p.From.Reg = v.Args[0].Reg()
657 p.To.Type = obj.TYPE_REG
660 case ssa.OpPPC64ANDCCconst:
661 p := s.Prog(v.Op.Asm())
662 p.Reg = v.Args[0].Reg()
663 p.From.Type = obj.TYPE_CONST
664 p.From.Offset = v.AuxInt
665 p.To.Type = obj.TYPE_REG
666 p.To.Reg = ppc64.REGTMP // discard result
668 case ssa.OpPPC64MOVDaddr:
669 switch v.Aux.(type) {
671 v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux)
673 // If aux offset and aux int are both 0, and the same
674 // input and output regs are used, no instruction
675 // needs to be generated, since it would just be
677 if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() {
678 p := s.Prog(ppc64.AMOVD)
679 p.From.Type = obj.TYPE_ADDR
680 p.From.Reg = v.Args[0].Reg()
681 p.From.Offset = v.AuxInt
682 p.To.Type = obj.TYPE_REG
686 case *obj.LSym, *gc.Node:
687 p := s.Prog(ppc64.AMOVD)
688 p.From.Type = obj.TYPE_ADDR
689 p.From.Reg = v.Args[0].Reg()
690 p.To.Type = obj.TYPE_REG
692 gc.AddAux(&p.From, v)
696 case ssa.OpPPC64MOVDconst:
697 p := s.Prog(v.Op.Asm())
698 p.From.Type = obj.TYPE_CONST
699 p.From.Offset = v.AuxInt
700 p.To.Type = obj.TYPE_REG
703 case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst:
704 p := s.Prog(v.Op.Asm())
705 p.From.Type = obj.TYPE_FCONST
706 p.From.Val = math.Float64frombits(uint64(v.AuxInt))
707 p.To.Type = obj.TYPE_REG
710 case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU:
711 p := s.Prog(v.Op.Asm())
712 p.From.Type = obj.TYPE_REG
713 p.From.Reg = v.Args[0].Reg()
714 p.To.Type = obj.TYPE_REG
715 p.To.Reg = v.Args[1].Reg()
717 case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst:
718 p := s.Prog(v.Op.Asm())
719 p.From.Type = obj.TYPE_REG
720 p.From.Reg = v.Args[0].Reg()
721 p.To.Type = obj.TYPE_CONST
722 p.To.Offset = v.AuxInt
724 case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg:
725 // Shift in register to required size
726 p := s.Prog(v.Op.Asm())
727 p.From.Type = obj.TYPE_REG
728 p.From.Reg = v.Args[0].Reg()
730 p.To.Type = obj.TYPE_REG
732 case ssa.OpPPC64MOVDload:
734 // MOVDload uses a DS instruction which requires the offset value of the data to be a multiple of 4.
735 // For offsets known at compile time, a MOVDload won't be selected, but in the case of a go.string,
736 // the offset is not known until link time. If the load of a go.string uses relocation for the
737 // offset field of the instruction, and if the offset is not aligned to 4, then a link error will occur.
738 // To avoid this problem, the full address of the go.string is computed and loaded into the base register,
739 // and that base register is used for the MOVDload using a 0 offset. This problem can only occur with
740 // go.string types because other types will have proper alignment.
743 switch n := v.Aux.(type) {
745 gostring = strings.HasPrefix(n.Name, "go.string.")
748 // Generate full addr of the go.string const
750 p := s.Prog(ppc64.AMOVD)
751 p.From.Type = obj.TYPE_ADDR
752 p.From.Reg = v.Args[0].Reg()
753 gc.AddAux(&p.From, v)
754 p.To.Type = obj.TYPE_REG
756 // Load go.string using 0 offset
757 p = s.Prog(v.Op.Asm())
758 p.From.Type = obj.TYPE_MEM
760 p.To.Type = obj.TYPE_REG
764 // Not a go.string, generate a normal load
767 case ssa.OpPPC64MOVWload, ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
768 p := s.Prog(v.Op.Asm())
769 p.From.Type = obj.TYPE_MEM
770 p.From.Reg = v.Args[0].Reg()
771 gc.AddAux(&p.From, v)
772 p.To.Type = obj.TYPE_REG
775 case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload:
776 p := s.Prog(v.Op.Asm())
777 p.From.Type = obj.TYPE_MEM
778 p.From.Reg = v.Args[0].Reg()
779 p.To.Type = obj.TYPE_REG
782 case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore:
783 p := s.Prog(v.Op.Asm())
784 p.To.Type = obj.TYPE_MEM
785 p.To.Reg = v.Args[0].Reg()
786 p.From.Type = obj.TYPE_REG
787 p.From.Reg = v.Args[1].Reg()
789 case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx,
790 ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx,
791 ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx:
792 p := s.Prog(v.Op.Asm())
793 p.From.Type = obj.TYPE_MEM
794 p.From.Reg = v.Args[0].Reg()
795 p.From.Index = v.Args[1].Reg()
796 p.To.Type = obj.TYPE_REG
799 case ssa.OpPPC64MOVDstorezero, ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero:
800 p := s.Prog(v.Op.Asm())
801 p.From.Type = obj.TYPE_REG
802 p.From.Reg = ppc64.REGZERO
803 p.To.Type = obj.TYPE_MEM
804 p.To.Reg = v.Args[0].Reg()
807 case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore:
808 p := s.Prog(v.Op.Asm())
809 p.From.Type = obj.TYPE_REG
810 p.From.Reg = v.Args[1].Reg()
811 p.To.Type = obj.TYPE_MEM
812 p.To.Reg = v.Args[0].Reg()
815 case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx,
816 ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx,
817 ssa.OpPPC64MOVHBRstoreidx:
818 p := s.Prog(v.Op.Asm())
819 p.From.Type = obj.TYPE_REG
820 p.From.Reg = v.Args[2].Reg()
821 p.To.Index = v.Args[1].Reg()
822 p.To.Type = obj.TYPE_MEM
823 p.To.Reg = v.Args[0].Reg()
825 case ssa.OpPPC64ISEL, ssa.OpPPC64ISELB:
827 // AuxInt value indicates condition: 0=LT 1=GT 2=EQ 4=GE 5=LE 6=NE
828 // ISEL only accepts 0, 1, 2 condition values but the others can be
829 // achieved by swapping operand order.
830 // arg0 ? arg1 : arg2 with conditions LT, GT, EQ
831 // arg0 ? arg2 : arg1 for conditions GE, LE, NE
832 // ISELB is used when a boolean result is needed, returning 0 or 1
833 p := s.Prog(ppc64.AISEL)
834 p.To.Type = obj.TYPE_REG
836 // For ISELB, boolean result 0 or 1. Use R0 for 0 operand to avoid load.
837 r := obj.Addr{Type: obj.TYPE_REG, Reg: ppc64.REG_R0}
838 if v.Op == ssa.OpPPC64ISEL {
839 r.Reg = v.Args[1].Reg()
841 // AuxInt values 4,5,6 implemented with reverse operand order from 0,1,2
844 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[0].Reg()})
846 p.Reg = v.Args[0].Reg()
849 p.From.Type = obj.TYPE_CONST
850 p.From.Offset = v.AuxInt & 3
852 case ssa.OpPPC64LoweredQuadZero, ssa.OpPPC64LoweredQuadZeroShort:
853 // The LoweredQuad code generation
854 // generates STXV instructions on
855 // power9. The Short variation is used
856 // if no loop is generated.
858 // sizes >= 64 generate a loop as follows:
860 // Set up loop counter in CTR, used by BC
861 // XXLXOR clears VS32
862 // XXLXOR VS32,VS32,VS32
863 // MOVD len/64,REG_TMP
873 // Bytes per iteration
879 // Only generate a loop if there is more
882 // Set up VS32 (V0) to hold 0s
883 p := s.Prog(ppc64.AXXLXOR)
884 p.From.Type = obj.TYPE_REG
885 p.From.Reg = ppc64.REG_VS32
886 p.To.Type = obj.TYPE_REG
887 p.To.Reg = ppc64.REG_VS32
888 p.Reg = ppc64.REG_VS32
890 // Set up CTR loop counter
891 p = s.Prog(ppc64.AMOVD)
892 p.From.Type = obj.TYPE_CONST
894 p.To.Type = obj.TYPE_REG
895 p.To.Reg = ppc64.REGTMP
897 p = s.Prog(ppc64.AMOVD)
898 p.From.Type = obj.TYPE_REG
899 p.From.Reg = ppc64.REGTMP
900 p.To.Type = obj.TYPE_REG
901 p.To.Reg = ppc64.REG_CTR
903 // Don't generate padding for
904 // loops with few iterations.
906 p = s.Prog(obj.APCALIGN)
907 p.From.Type = obj.TYPE_CONST
911 // generate 4 STXVs to zero 64 bytes
914 p = s.Prog(ppc64.ASTXV)
915 p.From.Type = obj.TYPE_REG
916 p.From.Reg = ppc64.REG_VS32
917 p.To.Type = obj.TYPE_MEM
918 p.To.Reg = v.Args[0].Reg()
920 // Save the top of loop
924 p = s.Prog(ppc64.ASTXV)
925 p.From.Type = obj.TYPE_REG
926 p.From.Reg = ppc64.REG_VS32
927 p.To.Type = obj.TYPE_MEM
928 p.To.Reg = v.Args[0].Reg()
931 p = s.Prog(ppc64.ASTXV)
932 p.From.Type = obj.TYPE_REG
933 p.From.Reg = ppc64.REG_VS32
934 p.To.Type = obj.TYPE_MEM
935 p.To.Reg = v.Args[0].Reg()
938 p = s.Prog(ppc64.ASTXV)
939 p.From.Type = obj.TYPE_REG
940 p.From.Reg = ppc64.REG_VS32
941 p.To.Type = obj.TYPE_MEM
942 p.To.Reg = v.Args[0].Reg()
945 // Increment address for the
946 // 64 bytes just zeroed.
947 p = s.Prog(ppc64.AADD)
948 p.Reg = v.Args[0].Reg()
949 p.From.Type = obj.TYPE_CONST
951 p.To.Type = obj.TYPE_REG
952 p.To.Reg = v.Args[0].Reg()
954 // Branch back to top of loop
956 // BC with BO_BCTR generates bdnz
957 p = s.Prog(ppc64.ABC)
958 p.From.Type = obj.TYPE_CONST
959 p.From.Offset = ppc64.BO_BCTR
961 p.To.Type = obj.TYPE_BRANCH
964 // When ctr == 1 the loop was not generated but
965 // there are at least 64 bytes to clear, so add
966 // that to the remainder to generate the code
967 // to clear those doublewords
972 // Clear the remainder starting at offset zero
975 if rem >= 16 && ctr <= 1 {
976 // If the XXLXOR hasn't already been
977 // generated, do it here to initialize
979 p := s.Prog(ppc64.AXXLXOR)
980 p.From.Type = obj.TYPE_REG
981 p.From.Reg = ppc64.REG_VS32
982 p.To.Type = obj.TYPE_REG
983 p.To.Reg = ppc64.REG_VS32
984 p.Reg = ppc64.REG_VS32
986 // Generate STXV for 32 or 64
989 p := s.Prog(ppc64.ASTXV)
990 p.From.Type = obj.TYPE_REG
991 p.From.Reg = ppc64.REG_VS32
992 p.To.Type = obj.TYPE_MEM
993 p.To.Reg = v.Args[0].Reg()
996 p = s.Prog(ppc64.ASTXV)
997 p.From.Type = obj.TYPE_REG
998 p.From.Reg = ppc64.REG_VS32
999 p.To.Type = obj.TYPE_MEM
1000 p.To.Reg = v.Args[0].Reg()
1001 p.To.Offset = offset + 16
1005 // Generate 16 bytes
1007 p := s.Prog(ppc64.ASTXV)
1008 p.From.Type = obj.TYPE_REG
1009 p.From.Reg = ppc64.REG_VS32
1010 p.To.Type = obj.TYPE_MEM
1011 p.To.Reg = v.Args[0].Reg()
1012 p.To.Offset = offset
1017 // first clear as many doublewords as possible
1018 // then clear remaining sizes as available
1020 op, size := ppc64.AMOVB, int64(1)
1023 op, size = ppc64.AMOVD, 8
1025 op, size = ppc64.AMOVW, 4
1027 op, size = ppc64.AMOVH, 2
1030 p.From.Type = obj.TYPE_REG
1031 p.From.Reg = ppc64.REG_R0
1032 p.To.Type = obj.TYPE_MEM
1033 p.To.Reg = v.Args[0].Reg()
1034 p.To.Offset = offset
1039 case ssa.OpPPC64LoweredZero, ssa.OpPPC64LoweredZeroShort:
1041 // Unaligned data doesn't hurt performance
1042 // for these instructions on power8.
1044 // For sizes >= 64 generate a loop as follows:
1046 // Set up loop counter in CTR, used by BC
1047 // XXLXOR VS32,VS32,VS32
1048 // MOVD len/32,REG_TMP
1052 // STXVD2X VS32,(R0)(R20)
1053 // STXVD2X VS32,(R31)(R20)
1057 // any remainder is done as described below
1059 // for sizes < 64 bytes, first clear as many doublewords as possible,
1060 // then handle the remainder
1065 // the remainder bytes are cleared using one or more
1066 // of the following instructions with the appropriate
1067 // offsets depending which instructions are needed
1069 // MOVW R0,n1(R20) 4 bytes
1070 // MOVH R0,n2(R20) 2 bytes
1071 // MOVB R0,n3(R20) 1 byte
1073 // 7 bytes: MOVW, MOVH, MOVB
1074 // 6 bytes: MOVW, MOVH
1075 // 5 bytes: MOVW, MOVB
1076 // 3 bytes: MOVH, MOVB
1078 // each loop iteration does 32 bytes
1079 ctr := v.AuxInt / 32
1082 rem := v.AuxInt % 32
1084 // only generate a loop if there is more
1085 // than 1 iteration.
1087 // Set up VS32 (V0) to hold 0s
1088 p := s.Prog(ppc64.AXXLXOR)
1089 p.From.Type = obj.TYPE_REG
1090 p.From.Reg = ppc64.REG_VS32
1091 p.To.Type = obj.TYPE_REG
1092 p.To.Reg = ppc64.REG_VS32
1093 p.Reg = ppc64.REG_VS32
1095 // Set up CTR loop counter
1096 p = s.Prog(ppc64.AMOVD)
1097 p.From.Type = obj.TYPE_CONST
1099 p.To.Type = obj.TYPE_REG
1100 p.To.Reg = ppc64.REGTMP
1102 p = s.Prog(ppc64.AMOVD)
1103 p.From.Type = obj.TYPE_REG
1104 p.From.Reg = ppc64.REGTMP
1105 p.To.Type = obj.TYPE_REG
1106 p.To.Reg = ppc64.REG_CTR
1108 // Set up R31 to hold index value 16
1109 p = s.Prog(ppc64.AMOVD)
1110 p.From.Type = obj.TYPE_CONST
1112 p.To.Type = obj.TYPE_REG
1113 p.To.Reg = ppc64.REGTMP
1115 // Don't add padding for alignment
1116 // with few loop iterations.
1118 p = s.Prog(obj.APCALIGN)
1119 p.From.Type = obj.TYPE_CONST
1123 // generate 2 STXVD2Xs to store 16 bytes
1124 // when this is a loop then the top must be saved
1126 // This is the top of loop
1128 p = s.Prog(ppc64.ASTXVD2X)
1129 p.From.Type = obj.TYPE_REG
1130 p.From.Reg = ppc64.REG_VS32
1131 p.To.Type = obj.TYPE_MEM
1132 p.To.Reg = v.Args[0].Reg()
1133 p.To.Index = ppc64.REGZERO
1134 // Save the top of loop
1138 p = s.Prog(ppc64.ASTXVD2X)
1139 p.From.Type = obj.TYPE_REG
1140 p.From.Reg = ppc64.REG_VS32
1141 p.To.Type = obj.TYPE_MEM
1142 p.To.Reg = v.Args[0].Reg()
1143 p.To.Index = ppc64.REGTMP
1145 // Increment address for the
1146 // 4 doublewords just zeroed.
1147 p = s.Prog(ppc64.AADD)
1148 p.Reg = v.Args[0].Reg()
1149 p.From.Type = obj.TYPE_CONST
1151 p.To.Type = obj.TYPE_REG
1152 p.To.Reg = v.Args[0].Reg()
1154 // Branch back to top of loop
1156 // BC with BO_BCTR generates bdnz
1157 p = s.Prog(ppc64.ABC)
1158 p.From.Type = obj.TYPE_CONST
1159 p.From.Offset = ppc64.BO_BCTR
1160 p.Reg = ppc64.REG_R0
1161 p.To.Type = obj.TYPE_BRANCH
1165 // when ctr == 1 the loop was not generated but
1166 // there are at least 32 bytes to clear, so add
1167 // that to the remainder to generate the code
1168 // to clear those doublewords
1173 // clear the remainder starting at offset zero
1176 // first clear as many doublewords as possible
1177 // then clear remaining sizes as available
1179 op, size := ppc64.AMOVB, int64(1)
1182 op, size = ppc64.AMOVD, 8
1184 op, size = ppc64.AMOVW, 4
1186 op, size = ppc64.AMOVH, 2
1189 p.From.Type = obj.TYPE_REG
1190 p.From.Reg = ppc64.REG_R0
1191 p.To.Type = obj.TYPE_MEM
1192 p.To.Reg = v.Args[0].Reg()
1193 p.To.Offset = offset
1198 case ssa.OpPPC64LoweredMove, ssa.OpPPC64LoweredMoveShort:
1200 bytesPerLoop := int64(32)
1201 // This will be used when moving more
1202 // than 8 bytes. Moves start with
1203 // as many 8 byte moves as possible, then
1204 // 4, 2, or 1 byte(s) as remaining. This will
1205 // work and be efficient for power8 or later.
1206 // If there are 64 or more bytes, then a
1207 // loop is generated to move 32 bytes and
1208 // update the src and dst addresses on each
1209 // iteration. When < 64 bytes, the appropriate
1210 // number of moves are generated based on the
1212 // When moving >= 64 bytes a loop is used
1213 // MOVD len/32,REG_TMP
1217 // LXVD2X (R0)(R21),VS32
1218 // LXVD2X (R31)(R21),VS33
1220 // STXVD2X VS32,(R0)(R20)
1221 // STXVD2X VS33,(R31)(R20)
1224 // Bytes not moved by this loop are moved
1225 // with a combination of the following instructions,
1226 // starting with the largest sizes and generating as
1227 // many as needed, using the appropriate offset value.
1237 // Each loop iteration moves 32 bytes
1238 ctr := v.AuxInt / bytesPerLoop
1240 // Remainder after the loop
1241 rem := v.AuxInt % bytesPerLoop
1243 dstReg := v.Args[0].Reg()
1244 srcReg := v.Args[1].Reg()
1246 // The set of registers used here, must match the clobbered reg list
1252 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1255 p := s.Prog(ppc64.AMOVD)
1256 p.From.Type = obj.TYPE_CONST
1258 p.To.Type = obj.TYPE_REG
1259 p.To.Reg = ppc64.REGTMP
1261 p = s.Prog(ppc64.AMOVD)
1262 p.From.Type = obj.TYPE_REG
1263 p.From.Reg = ppc64.REGTMP
1264 p.To.Type = obj.TYPE_REG
1265 p.To.Reg = ppc64.REG_CTR
1267 // Use REGTMP as index reg
1268 p = s.Prog(ppc64.AMOVD)
1269 p.From.Type = obj.TYPE_CONST
1271 p.To.Type = obj.TYPE_REG
1272 p.To.Reg = ppc64.REGTMP
1274 // Don't adding padding for
1275 // alignment with small iteration
1278 p = s.Prog(obj.APCALIGN)
1279 p.From.Type = obj.TYPE_CONST
1283 // Generate 16 byte loads and stores.
1284 // Use temp register for index (16)
1285 // on the second one.
1287 p = s.Prog(ppc64.ALXVD2X)
1288 p.From.Type = obj.TYPE_MEM
1290 p.From.Index = ppc64.REGZERO
1291 p.To.Type = obj.TYPE_REG
1292 p.To.Reg = ppc64.REG_VS32
1296 p = s.Prog(ppc64.ALXVD2X)
1297 p.From.Type = obj.TYPE_MEM
1299 p.From.Index = ppc64.REGTMP
1300 p.To.Type = obj.TYPE_REG
1301 p.To.Reg = ppc64.REG_VS33
1303 // increment the src reg for next iteration
1304 p = s.Prog(ppc64.AADD)
1306 p.From.Type = obj.TYPE_CONST
1307 p.From.Offset = bytesPerLoop
1308 p.To.Type = obj.TYPE_REG
1311 // generate 16 byte stores
1312 p = s.Prog(ppc64.ASTXVD2X)
1313 p.From.Type = obj.TYPE_REG
1314 p.From.Reg = ppc64.REG_VS32
1315 p.To.Type = obj.TYPE_MEM
1317 p.To.Index = ppc64.REGZERO
1319 p = s.Prog(ppc64.ASTXVD2X)
1320 p.From.Type = obj.TYPE_REG
1321 p.From.Reg = ppc64.REG_VS33
1322 p.To.Type = obj.TYPE_MEM
1324 p.To.Index = ppc64.REGTMP
1326 // increment the dst reg for next iteration
1327 p = s.Prog(ppc64.AADD)
1329 p.From.Type = obj.TYPE_CONST
1330 p.From.Offset = bytesPerLoop
1331 p.To.Type = obj.TYPE_REG
1334 // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1336 p = s.Prog(ppc64.ABC)
1337 p.From.Type = obj.TYPE_CONST
1338 p.From.Offset = ppc64.BO_BCTR
1339 p.Reg = ppc64.REG_R0
1340 p.To.Type = obj.TYPE_BRANCH
1343 // srcReg and dstReg were incremented in the loop, so
1344 // later instructions start with offset 0.
1348 // No loop was generated for one iteration, so
1349 // add 32 bytes to the remainder to move those bytes.
1355 // Generate 16 byte loads and stores.
1356 // Use temp register for index (value 16)
1357 // on the second one.
1358 p := s.Prog(ppc64.ALXVD2X)
1359 p.From.Type = obj.TYPE_MEM
1361 p.From.Index = ppc64.REGZERO
1362 p.To.Type = obj.TYPE_REG
1363 p.To.Reg = ppc64.REG_VS32
1365 p = s.Prog(ppc64.ASTXVD2X)
1366 p.From.Type = obj.TYPE_REG
1367 p.From.Reg = ppc64.REG_VS32
1368 p.To.Type = obj.TYPE_MEM
1370 p.To.Index = ppc64.REGZERO
1376 // Use REGTMP as index reg
1377 p := s.Prog(ppc64.AMOVD)
1378 p.From.Type = obj.TYPE_CONST
1380 p.To.Type = obj.TYPE_REG
1381 p.To.Reg = ppc64.REGTMP
1383 p = s.Prog(ppc64.ALXVD2X)
1384 p.From.Type = obj.TYPE_MEM
1386 p.From.Index = ppc64.REGTMP
1387 p.To.Type = obj.TYPE_REG
1388 p.To.Reg = ppc64.REG_VS32
1390 p = s.Prog(ppc64.ASTXVD2X)
1391 p.From.Type = obj.TYPE_REG
1392 p.From.Reg = ppc64.REG_VS32
1393 p.To.Type = obj.TYPE_MEM
1395 p.To.Index = ppc64.REGTMP
1402 // Generate all the remaining load and store pairs, starting with
1403 // as many 8 byte moves as possible, then 4, 2, 1.
1405 op, size := ppc64.AMOVB, int64(1)
1408 op, size = ppc64.AMOVD, 8
1410 op, size = ppc64.AMOVW, 4
1412 op, size = ppc64.AMOVH, 2
1416 p.To.Type = obj.TYPE_REG
1417 p.To.Reg = ppc64.REGTMP
1418 p.From.Type = obj.TYPE_MEM
1420 p.From.Offset = offset
1424 p.From.Type = obj.TYPE_REG
1425 p.From.Reg = ppc64.REGTMP
1426 p.To.Type = obj.TYPE_MEM
1428 p.To.Offset = offset
1433 case ssa.OpPPC64LoweredQuadMove, ssa.OpPPC64LoweredQuadMoveShort:
1434 bytesPerLoop := int64(64)
1435 // This is used when moving more
1436 // than 8 bytes on power9. Moves start with
1437 // as many 8 byte moves as possible, then
1438 // 4, 2, or 1 byte(s) as remaining. This will
1439 // work and be efficient for power8 or later.
1440 // If there are 64 or more bytes, then a
1441 // loop is generated to move 32 bytes and
1442 // update the src and dst addresses on each
1443 // iteration. When < 64 bytes, the appropriate
1444 // number of moves are generated based on the
1446 // When moving >= 64 bytes a loop is used
1447 // MOVD len/32,REG_TMP
1454 // STXV VS33,16(R20)
1457 // Bytes not moved by this loop are moved
1458 // with a combination of the following instructions,
1459 // starting with the largest sizes and generating as
1460 // many as needed, using the appropriate offset value.
1470 // Each loop iteration moves 32 bytes
1471 ctr := v.AuxInt / bytesPerLoop
1473 // Remainder after the loop
1474 rem := v.AuxInt % bytesPerLoop
1476 dstReg := v.Args[0].Reg()
1477 srcReg := v.Args[1].Reg()
1484 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1487 p := s.Prog(ppc64.AMOVD)
1488 p.From.Type = obj.TYPE_CONST
1490 p.To.Type = obj.TYPE_REG
1491 p.To.Reg = ppc64.REGTMP
1493 p = s.Prog(ppc64.AMOVD)
1494 p.From.Type = obj.TYPE_REG
1495 p.From.Reg = ppc64.REGTMP
1496 p.To.Type = obj.TYPE_REG
1497 p.To.Reg = ppc64.REG_CTR
1499 p = s.Prog(obj.APCALIGN)
1500 p.From.Type = obj.TYPE_CONST
1503 // Generate 16 byte loads and stores.
1504 p = s.Prog(ppc64.ALXV)
1505 p.From.Type = obj.TYPE_MEM
1507 p.From.Offset = offset
1508 p.To.Type = obj.TYPE_REG
1509 p.To.Reg = ppc64.REG_VS32
1513 p = s.Prog(ppc64.ALXV)
1514 p.From.Type = obj.TYPE_MEM
1516 p.From.Offset = offset + 16
1517 p.To.Type = obj.TYPE_REG
1518 p.To.Reg = ppc64.REG_VS33
1520 // generate 16 byte stores
1521 p = s.Prog(ppc64.ASTXV)
1522 p.From.Type = obj.TYPE_REG
1523 p.From.Reg = ppc64.REG_VS32
1524 p.To.Type = obj.TYPE_MEM
1526 p.To.Offset = offset
1528 p = s.Prog(ppc64.ASTXV)
1529 p.From.Type = obj.TYPE_REG
1530 p.From.Reg = ppc64.REG_VS33
1531 p.To.Type = obj.TYPE_MEM
1533 p.To.Offset = offset + 16
1535 // Generate 16 byte loads and stores.
1536 p = s.Prog(ppc64.ALXV)
1537 p.From.Type = obj.TYPE_MEM
1539 p.From.Offset = offset + 32
1540 p.To.Type = obj.TYPE_REG
1541 p.To.Reg = ppc64.REG_VS32
1543 p = s.Prog(ppc64.ALXV)
1544 p.From.Type = obj.TYPE_MEM
1546 p.From.Offset = offset + 48
1547 p.To.Type = obj.TYPE_REG
1548 p.To.Reg = ppc64.REG_VS33
1550 // generate 16 byte stores
1551 p = s.Prog(ppc64.ASTXV)
1552 p.From.Type = obj.TYPE_REG
1553 p.From.Reg = ppc64.REG_VS32
1554 p.To.Type = obj.TYPE_MEM
1556 p.To.Offset = offset + 32
1558 p = s.Prog(ppc64.ASTXV)
1559 p.From.Type = obj.TYPE_REG
1560 p.From.Reg = ppc64.REG_VS33
1561 p.To.Type = obj.TYPE_MEM
1563 p.To.Offset = offset + 48
1565 // increment the src reg for next iteration
1566 p = s.Prog(ppc64.AADD)
1568 p.From.Type = obj.TYPE_CONST
1569 p.From.Offset = bytesPerLoop
1570 p.To.Type = obj.TYPE_REG
1573 // increment the dst reg for next iteration
1574 p = s.Prog(ppc64.AADD)
1576 p.From.Type = obj.TYPE_CONST
1577 p.From.Offset = bytesPerLoop
1578 p.To.Type = obj.TYPE_REG
1581 // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1583 p = s.Prog(ppc64.ABC)
1584 p.From.Type = obj.TYPE_CONST
1585 p.From.Offset = ppc64.BO_BCTR
1586 p.Reg = ppc64.REG_R0
1587 p.To.Type = obj.TYPE_BRANCH
1590 // srcReg and dstReg were incremented in the loop, so
1591 // later instructions start with offset 0.
1595 // No loop was generated for one iteration, so
1596 // add 32 bytes to the remainder to move those bytes.
1601 p := s.Prog(ppc64.ALXV)
1602 p.From.Type = obj.TYPE_MEM
1604 p.To.Type = obj.TYPE_REG
1605 p.To.Reg = ppc64.REG_VS32
1607 p = s.Prog(ppc64.ALXV)
1608 p.From.Type = obj.TYPE_MEM
1611 p.To.Type = obj.TYPE_REG
1612 p.To.Reg = ppc64.REG_VS33
1614 p = s.Prog(ppc64.ASTXV)
1615 p.From.Type = obj.TYPE_REG
1616 p.From.Reg = ppc64.REG_VS32
1617 p.To.Type = obj.TYPE_MEM
1620 p = s.Prog(ppc64.ASTXV)
1621 p.From.Type = obj.TYPE_REG
1622 p.From.Reg = ppc64.REG_VS33
1623 p.To.Type = obj.TYPE_MEM
1632 // Generate 16 byte loads and stores.
1633 p := s.Prog(ppc64.ALXV)
1634 p.From.Type = obj.TYPE_MEM
1636 p.From.Offset = offset
1637 p.To.Type = obj.TYPE_REG
1638 p.To.Reg = ppc64.REG_VS32
1640 p = s.Prog(ppc64.ASTXV)
1641 p.From.Type = obj.TYPE_REG
1642 p.From.Reg = ppc64.REG_VS32
1643 p.To.Type = obj.TYPE_MEM
1645 p.To.Offset = offset
1651 p := s.Prog(ppc64.ALXV)
1652 p.From.Type = obj.TYPE_MEM
1654 p.From.Offset = offset
1655 p.To.Type = obj.TYPE_REG
1656 p.To.Reg = ppc64.REG_VS32
1658 p = s.Prog(ppc64.ASTXV)
1659 p.From.Type = obj.TYPE_REG
1660 p.From.Reg = ppc64.REG_VS32
1661 p.To.Type = obj.TYPE_MEM
1663 p.To.Offset = offset
1669 // Generate all the remaining load and store pairs, starting with
1670 // as many 8 byte moves as possible, then 4, 2, 1.
1672 op, size := ppc64.AMOVB, int64(1)
1675 op, size = ppc64.AMOVD, 8
1677 op, size = ppc64.AMOVW, 4
1679 op, size = ppc64.AMOVH, 2
1683 p.To.Type = obj.TYPE_REG
1684 p.To.Reg = ppc64.REGTMP
1685 p.From.Type = obj.TYPE_MEM
1687 p.From.Offset = offset
1691 p.From.Type = obj.TYPE_REG
1692 p.From.Reg = ppc64.REGTMP
1693 p.To.Type = obj.TYPE_MEM
1695 p.To.Offset = offset
1700 case ssa.OpPPC64CALLstatic:
1703 case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter:
1704 p := s.Prog(ppc64.AMOVD)
1705 p.From.Type = obj.TYPE_REG
1706 p.From.Reg = v.Args[0].Reg()
1707 p.To.Type = obj.TYPE_REG
1708 p.To.Reg = ppc64.REG_LR
1710 if v.Args[0].Reg() != ppc64.REG_R12 {
1711 v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg)
1715 pp.To.Reg = ppc64.REG_LR
1717 if gc.Ctxt.Flag_shared {
1718 // When compiling Go into PIC, the function we just
1719 // called via pointer might have been implemented in
1720 // a separate module and so overwritten the TOC
1721 // pointer in R2; reload it.
1722 q := s.Prog(ppc64.AMOVD)
1723 q.From.Type = obj.TYPE_MEM
1725 q.From.Reg = ppc64.REGSP
1726 q.To.Type = obj.TYPE_REG
1727 q.To.Reg = ppc64.REG_R2
1730 case ssa.OpPPC64LoweredWB:
1731 p := s.Prog(obj.ACALL)
1732 p.To.Type = obj.TYPE_MEM
1733 p.To.Name = obj.NAME_EXTERN
1734 p.To.Sym = v.Aux.(*obj.LSym)
1736 case ssa.OpPPC64LoweredPanicBoundsA, ssa.OpPPC64LoweredPanicBoundsB, ssa.OpPPC64LoweredPanicBoundsC:
1737 p := s.Prog(obj.ACALL)
1738 p.To.Type = obj.TYPE_MEM
1739 p.To.Name = obj.NAME_EXTERN
1740 p.To.Sym = gc.BoundsCheckFunc[v.AuxInt]
1741 s.UseArgs(16) // space used in callee args area by assembly stubs
1743 case ssa.OpPPC64LoweredNilCheck:
1744 if objabi.GOOS == "aix" {
1748 // NOP (so the BNE has somewhere to land)
1751 p := s.Prog(ppc64.ACMP)
1752 p.From.Type = obj.TYPE_REG
1753 p.From.Reg = v.Args[0].Reg()
1754 p.To.Type = obj.TYPE_REG
1755 p.To.Reg = ppc64.REG_R0
1758 p2 := s.Prog(ppc64.ABNE)
1759 p2.To.Type = obj.TYPE_BRANCH
1762 // Write at 0 is forbidden and will trigger a SIGSEGV
1763 p = s.Prog(ppc64.AMOVW)
1764 p.From.Type = obj.TYPE_REG
1765 p.From.Reg = ppc64.REG_R0
1766 p.To.Type = obj.TYPE_MEM
1767 p.To.Reg = ppc64.REG_R0
1769 // NOP (so the BNE has somewhere to land)
1770 nop := s.Prog(obj.ANOP)
1774 // Issue a load which will fault if arg is nil.
1775 p := s.Prog(ppc64.AMOVBZ)
1776 p.From.Type = obj.TYPE_MEM
1777 p.From.Reg = v.Args[0].Reg()
1778 gc.AddAux(&p.From, v)
1779 p.To.Type = obj.TYPE_REG
1780 p.To.Reg = ppc64.REGTMP
1782 if logopt.Enabled() {
1783 logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
1785 if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
1786 gc.Warnl(v.Pos, "generated nil check")
1789 // These should be resolved by rules and not make it here.
1790 case ssa.OpPPC64Equal, ssa.OpPPC64NotEqual, ssa.OpPPC64LessThan, ssa.OpPPC64FLessThan,
1791 ssa.OpPPC64LessEqual, ssa.OpPPC64GreaterThan, ssa.OpPPC64FGreaterThan, ssa.OpPPC64GreaterEqual,
1792 ssa.OpPPC64FLessEqual, ssa.OpPPC64FGreaterEqual:
1793 v.Fatalf("Pseudo-op should not make it to codegen: %s ###\n", v.LongString())
1794 case ssa.OpPPC64InvertFlags:
1795 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
1796 case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT:
1797 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
1799 // TODO: implement for clobberdead experiment. Nop is ok for now.
1801 v.Fatalf("genValue not implemented: %s", v.LongString())
1805 var blockJump = [...]struct {
1807 asmeq, invasmun bool
1809 ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false},
1810 ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false},
1812 ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false},
1813 ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false},
1814 ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false},
1815 ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false},
1817 // TODO: need to work FP comparisons into block jumps
1818 ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false},
1819 ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN
1820 ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN
1821 ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false},
1824 func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
1826 case ssa.BlockDefer:
1827 // defer returns in R3:
1828 // 0 if we should continue executing
1829 // 1 if we should jump to deferreturn call
1830 p := s.Prog(ppc64.ACMP)
1831 p.From.Type = obj.TYPE_REG
1832 p.From.Reg = ppc64.REG_R3
1833 p.To.Type = obj.TYPE_REG
1834 p.To.Reg = ppc64.REG_R0
1836 p = s.Prog(ppc64.ABNE)
1837 p.To.Type = obj.TYPE_BRANCH
1838 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
1839 if b.Succs[0].Block() != next {
1840 p := s.Prog(obj.AJMP)
1841 p.To.Type = obj.TYPE_BRANCH
1842 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
1845 case ssa.BlockPlain:
1846 if b.Succs[0].Block() != next {
1847 p := s.Prog(obj.AJMP)
1848 p.To.Type = obj.TYPE_BRANCH
1849 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
1854 case ssa.BlockRetJmp:
1855 p := s.Prog(obj.AJMP)
1856 p.To.Type = obj.TYPE_MEM
1857 p.To.Name = obj.NAME_EXTERN
1858 p.To.Sym = b.Aux.(*obj.LSym)
1860 case ssa.BlockPPC64EQ, ssa.BlockPPC64NE,
1861 ssa.BlockPPC64LT, ssa.BlockPPC64GE,
1862 ssa.BlockPPC64LE, ssa.BlockPPC64GT,
1863 ssa.BlockPPC64FLT, ssa.BlockPPC64FGE,
1864 ssa.BlockPPC64FLE, ssa.BlockPPC64FGT:
1865 jmp := blockJump[b.Kind]
1867 case b.Succs[0].Block():
1868 s.Br(jmp.invasm, b.Succs[1].Block())
1870 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
1871 s.Br(ppc64.ABVS, b.Succs[1].Block())
1873 case b.Succs[1].Block():
1874 s.Br(jmp.asm, b.Succs[0].Block())
1876 s.Br(ppc64.ABEQ, b.Succs[0].Block())
1879 if b.Likely != ssa.BranchUnlikely {
1880 s.Br(jmp.asm, b.Succs[0].Block())
1882 s.Br(ppc64.ABEQ, b.Succs[0].Block())
1884 s.Br(obj.AJMP, b.Succs[1].Block())
1886 s.Br(jmp.invasm, b.Succs[1].Block())
1888 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
1889 s.Br(ppc64.ABVS, b.Succs[1].Block())
1891 s.Br(obj.AJMP, b.Succs[0].Block())
1895 b.Fatalf("branch not implemented: %s", b.LongString())