1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
8 "cmd/compile/internal/gc"
9 "cmd/compile/internal/logopt"
10 "cmd/compile/internal/ssa"
11 "cmd/compile/internal/types"
13 "cmd/internal/obj/ppc64"
19 // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
20 func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) {
21 // flive := b.FlagsLiveAtEnd
22 // if b.Control != nil && b.Control.Type.IsFlags() {
25 // for i := len(b.Values) - 1; i >= 0; i-- {
27 // if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) {
28 // // The "mark" is any non-nil Aux value.
31 // if v.Type.IsFlags() {
34 // for _, a := range v.Args {
35 // if a.Type.IsFlags() {
42 // loadByType returns the load instruction of the given type.
43 func loadByType(t *types.Type) obj.As {
75 panic("bad load type")
78 // storeByType returns the store instruction of the given type.
79 func storeByType(t *types.Type) obj.As {
99 panic("bad store type")
102 func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
125 case ssa.OpPPC64LoweredMuluhilo:
126 // MULHDU Rarg1, Rarg0, Reg0
127 // MULLD Rarg1, Rarg0, Reg1
128 r0 := v.Args[0].Reg()
129 r1 := v.Args[1].Reg()
130 p := s.Prog(ppc64.AMULHDU)
131 p.From.Type = obj.TYPE_REG
134 p.To.Type = obj.TYPE_REG
136 p1 := s.Prog(ppc64.AMULLD)
137 p1.From.Type = obj.TYPE_REG
140 p1.To.Type = obj.TYPE_REG
143 case ssa.OpPPC64LoweredAdd64Carry:
144 // ADDC Rarg2, -1, Rtmp
145 // ADDE Rarg1, Rarg0, Reg0
147 r0 := v.Args[0].Reg()
148 r1 := v.Args[1].Reg()
149 r2 := v.Args[2].Reg()
150 p := s.Prog(ppc64.AADDC)
151 p.From.Type = obj.TYPE_CONST
154 p.To.Type = obj.TYPE_REG
155 p.To.Reg = ppc64.REGTMP
156 p1 := s.Prog(ppc64.AADDE)
157 p1.From.Type = obj.TYPE_REG
160 p1.To.Type = obj.TYPE_REG
162 p2 := s.Prog(ppc64.AADDZE)
163 p2.From.Type = obj.TYPE_REG
164 p2.From.Reg = ppc64.REGZERO
165 p2.To.Type = obj.TYPE_REG
168 case ssa.OpPPC64LoweredAtomicAnd8,
169 ssa.OpPPC64LoweredAtomicOr8:
171 // LBAR (Rarg0), Rtmp
172 // AND/OR Rarg1, Rtmp
173 // STBCCC Rtmp, (Rarg0)
175 r0 := v.Args[0].Reg()
176 r1 := v.Args[1].Reg()
177 // LWSYNC - Assuming shared data not write-through-required nor
178 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
179 plwsync := s.Prog(ppc64.ALWSYNC)
180 plwsync.To.Type = obj.TYPE_NONE
181 p := s.Prog(ppc64.ALBAR)
182 p.From.Type = obj.TYPE_MEM
184 p.To.Type = obj.TYPE_REG
185 p.To.Reg = ppc64.REGTMP
186 p1 := s.Prog(v.Op.Asm())
187 p1.From.Type = obj.TYPE_REG
189 p1.To.Type = obj.TYPE_REG
190 p1.To.Reg = ppc64.REGTMP
191 p2 := s.Prog(ppc64.ASTBCCC)
192 p2.From.Type = obj.TYPE_REG
193 p2.From.Reg = ppc64.REGTMP
194 p2.To.Type = obj.TYPE_MEM
196 p2.RegTo2 = ppc64.REGTMP
197 p3 := s.Prog(ppc64.ABNE)
198 p3.To.Type = obj.TYPE_BRANCH
201 case ssa.OpPPC64LoweredAtomicAdd32,
202 ssa.OpPPC64LoweredAtomicAdd64:
204 // LDAR/LWAR (Rarg0), Rout
206 // STDCCC/STWCCC Rout, (Rarg0)
208 // MOVW Rout,Rout (if Add32)
211 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
215 r0 := v.Args[0].Reg()
216 r1 := v.Args[1].Reg()
218 // LWSYNC - Assuming shared data not write-through-required nor
219 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
220 plwsync := s.Prog(ppc64.ALWSYNC)
221 plwsync.To.Type = obj.TYPE_NONE
224 p.From.Type = obj.TYPE_MEM
226 p.To.Type = obj.TYPE_REG
229 p1 := s.Prog(ppc64.AADD)
230 p1.From.Type = obj.TYPE_REG
233 p1.To.Type = obj.TYPE_REG
236 p3.From.Type = obj.TYPE_REG
238 p3.To.Type = obj.TYPE_MEM
241 p4 := s.Prog(ppc64.ABNE)
242 p4.To.Type = obj.TYPE_BRANCH
245 // Ensure a 32 bit result
246 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
247 p5 := s.Prog(ppc64.AMOVWZ)
248 p5.To.Type = obj.TYPE_REG
250 p5.From.Type = obj.TYPE_REG
254 case ssa.OpPPC64LoweredAtomicExchange32,
255 ssa.OpPPC64LoweredAtomicExchange64:
257 // LDAR/LWAR (Rarg0), Rout
258 // STDCCC/STWCCC Rout, (Rarg0)
263 if v.Op == ssa.OpPPC64LoweredAtomicExchange32 {
267 r0 := v.Args[0].Reg()
268 r1 := v.Args[1].Reg()
270 // LWSYNC - Assuming shared data not write-through-required nor
271 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
272 plwsync := s.Prog(ppc64.ALWSYNC)
273 plwsync.To.Type = obj.TYPE_NONE
276 p.From.Type = obj.TYPE_MEM
278 p.To.Type = obj.TYPE_REG
282 p1.From.Type = obj.TYPE_REG
284 p1.To.Type = obj.TYPE_MEM
287 p2 := s.Prog(ppc64.ABNE)
288 p2.To.Type = obj.TYPE_BRANCH
291 pisync := s.Prog(ppc64.AISYNC)
292 pisync.To.Type = obj.TYPE_NONE
294 case ssa.OpPPC64LoweredAtomicLoad8,
295 ssa.OpPPC64LoweredAtomicLoad32,
296 ssa.OpPPC64LoweredAtomicLoad64,
297 ssa.OpPPC64LoweredAtomicLoadPtr:
299 // MOVB/MOVD/MOVW (Rarg0), Rout
306 case ssa.OpPPC64LoweredAtomicLoad8:
308 case ssa.OpPPC64LoweredAtomicLoad32:
312 arg0 := v.Args[0].Reg()
314 // SYNC when AuxInt == 1; otherwise, load-acquire
316 psync := s.Prog(ppc64.ASYNC)
317 psync.To.Type = obj.TYPE_NONE
321 p.From.Type = obj.TYPE_MEM
323 p.To.Type = obj.TYPE_REG
327 p1.From.Type = obj.TYPE_REG
329 p1.To.Type = obj.TYPE_REG
332 p2 := s.Prog(ppc64.ABNE)
333 p2.To.Type = obj.TYPE_BRANCH
335 pisync := s.Prog(ppc64.AISYNC)
336 pisync.To.Type = obj.TYPE_NONE
339 case ssa.OpPPC64LoweredAtomicStore8,
340 ssa.OpPPC64LoweredAtomicStore32,
341 ssa.OpPPC64LoweredAtomicStore64:
343 // MOVB/MOVW/MOVD arg1,(arg0)
346 case ssa.OpPPC64LoweredAtomicStore8:
348 case ssa.OpPPC64LoweredAtomicStore32:
351 arg0 := v.Args[0].Reg()
352 arg1 := v.Args[1].Reg()
353 // If AuxInt == 0, LWSYNC (Store-Release), else SYNC
355 syncOp := ppc64.ASYNC
357 syncOp = ppc64.ALWSYNC
359 psync := s.Prog(syncOp)
360 psync.To.Type = obj.TYPE_NONE
363 p.To.Type = obj.TYPE_MEM
365 p.From.Type = obj.TYPE_REG
368 case ssa.OpPPC64LoweredAtomicCas64,
369 ssa.OpPPC64LoweredAtomicCas32:
372 // LDAR (Rarg0), MutexHint, Rtmp
375 // STDCCC Rarg2, (Rarg0)
377 // LWSYNC // Only for sequential consistency; not required in CasRel.
386 if v.Op == ssa.OpPPC64LoweredAtomicCas32 {
391 r0 := v.Args[0].Reg()
392 r1 := v.Args[1].Reg()
393 r2 := v.Args[2].Reg()
395 // LWSYNC - Assuming shared data not write-through-required nor
396 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
397 plwsync1 := s.Prog(ppc64.ALWSYNC)
398 plwsync1.To.Type = obj.TYPE_NONE
401 p.From.Type = obj.TYPE_MEM
403 p.To.Type = obj.TYPE_REG
404 p.To.Reg = ppc64.REGTMP
405 // If it is a Compare-and-Swap-Release operation, set the EH field with
408 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: 0})
412 p1.From.Type = obj.TYPE_REG
414 p1.To.Reg = ppc64.REGTMP
415 p1.To.Type = obj.TYPE_REG
417 p2 := s.Prog(ppc64.ABNE)
418 p2.To.Type = obj.TYPE_BRANCH
421 p3.From.Type = obj.TYPE_REG
423 p3.To.Type = obj.TYPE_MEM
426 p4 := s.Prog(ppc64.ABNE)
427 p4.To.Type = obj.TYPE_BRANCH
429 // LWSYNC - Assuming shared data not write-through-required nor
430 // caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b.
431 // If the operation is a CAS-Release, then synchronization is not necessary.
433 plwsync2 := s.Prog(ppc64.ALWSYNC)
434 plwsync2.To.Type = obj.TYPE_NONE
437 p5 := s.Prog(ppc64.AMOVD)
438 p5.From.Type = obj.TYPE_CONST
440 p5.To.Type = obj.TYPE_REG
443 p6 := s.Prog(obj.AJMP)
444 p6.To.Type = obj.TYPE_BRANCH
446 p7 := s.Prog(ppc64.AMOVD)
447 p7.From.Type = obj.TYPE_CONST
449 p7.To.Type = obj.TYPE_REG
453 p8 := s.Prog(obj.ANOP)
456 case ssa.OpPPC64LoweredGetClosurePtr:
457 // Closure pointer is R11 (already)
458 gc.CheckLoweredGetClosurePtr(v)
460 case ssa.OpPPC64LoweredGetCallerSP:
461 // caller's SP is FixedFrameSize below the address of the first arg
462 p := s.Prog(ppc64.AMOVD)
463 p.From.Type = obj.TYPE_ADDR
464 p.From.Offset = -gc.Ctxt.FixedFrameSize()
465 p.From.Name = obj.NAME_PARAM
466 p.To.Type = obj.TYPE_REG
469 case ssa.OpPPC64LoweredGetCallerPC:
470 p := s.Prog(obj.AGETCALLERPC)
471 p.To.Type = obj.TYPE_REG
474 case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F:
475 // input is already rounded
478 loadOp := loadByType(v.Type)
480 gc.AddrAuto(&p.From, v.Args[0])
481 p.To.Type = obj.TYPE_REG
485 storeOp := storeByType(v.Type)
487 p.From.Type = obj.TYPE_REG
488 p.From.Reg = v.Args[0].Reg()
489 gc.AddrAuto(&p.To, v)
491 case ssa.OpPPC64DIVD:
501 r0 := v.Args[0].Reg()
502 r1 := v.Args[1].Reg()
504 p := s.Prog(ppc64.ACMP)
505 p.From.Type = obj.TYPE_REG
507 p.To.Type = obj.TYPE_CONST
510 pbahead := s.Prog(ppc64.ABEQ)
511 pbahead.To.Type = obj.TYPE_BRANCH
513 p = s.Prog(v.Op.Asm())
514 p.From.Type = obj.TYPE_REG
517 p.To.Type = obj.TYPE_REG
520 pbover := s.Prog(obj.AJMP)
521 pbover.To.Type = obj.TYPE_BRANCH
523 p = s.Prog(ppc64.ANEG)
524 p.To.Type = obj.TYPE_REG
526 p.From.Type = obj.TYPE_REG
533 case ssa.OpPPC64DIVW:
534 // word-width version of above
536 r0 := v.Args[0].Reg()
537 r1 := v.Args[1].Reg()
539 p := s.Prog(ppc64.ACMPW)
540 p.From.Type = obj.TYPE_REG
542 p.To.Type = obj.TYPE_CONST
545 pbahead := s.Prog(ppc64.ABEQ)
546 pbahead.To.Type = obj.TYPE_BRANCH
548 p = s.Prog(v.Op.Asm())
549 p.From.Type = obj.TYPE_REG
552 p.To.Type = obj.TYPE_REG
555 pbover := s.Prog(obj.AJMP)
556 pbover.To.Type = obj.TYPE_BRANCH
558 p = s.Prog(ppc64.ANEG)
559 p.To.Type = obj.TYPE_REG
561 p.From.Type = obj.TYPE_REG
568 case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS,
569 ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU,
570 ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW,
571 ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
572 ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
573 ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
574 ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV,
575 ssa.OpPPC64MODUD, ssa.OpPPC64MODSD, ssa.OpPPC64MODUW, ssa.OpPPC64MODSW:
577 r1 := v.Args[0].Reg()
578 r2 := v.Args[1].Reg()
579 p := s.Prog(v.Op.Asm())
580 p.From.Type = obj.TYPE_REG
583 p.To.Type = obj.TYPE_REG
586 case ssa.OpPPC64ANDCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC:
587 r1 := v.Args[0].Reg()
588 r2 := v.Args[1].Reg()
589 p := s.Prog(v.Op.Asm())
590 p.From.Type = obj.TYPE_REG
593 p.To.Type = obj.TYPE_REG
594 p.To.Reg = ppc64.REGTMP // result is not needed
596 case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
597 p := s.Prog(v.Op.Asm())
598 p.From.Type = obj.TYPE_CONST
599 p.From.Offset = v.AuxInt
600 p.Reg = v.Args[0].Reg()
601 p.To.Type = obj.TYPE_REG
604 case ssa.OpPPC64MADDLD:
606 r1 := v.Args[0].Reg()
607 r2 := v.Args[1].Reg()
608 r3 := v.Args[2].Reg()
610 p := s.Prog(v.Op.Asm())
611 p.From.Type = obj.TYPE_REG
614 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: r3})
615 p.To.Type = obj.TYPE_REG
618 case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
620 r1 := v.Args[0].Reg()
621 r2 := v.Args[1].Reg()
622 r3 := v.Args[2].Reg()
624 p := s.Prog(v.Op.Asm())
625 p.From.Type = obj.TYPE_REG
628 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: r2})
629 p.To.Type = obj.TYPE_REG
632 case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL,
633 ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW,
634 ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS,
635 ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD:
637 p := s.Prog(v.Op.Asm())
638 p.To.Type = obj.TYPE_REG
640 p.From.Type = obj.TYPE_REG
641 p.From.Reg = v.Args[0].Reg()
643 case ssa.OpPPC64ADDconst, ssa.OpPPC64ANDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst,
644 ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst, ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst:
645 p := s.Prog(v.Op.Asm())
646 p.Reg = v.Args[0].Reg()
647 p.From.Type = obj.TYPE_CONST
648 p.From.Offset = v.AuxInt
649 p.To.Type = obj.TYPE_REG
652 case ssa.OpPPC64ANDCCconst:
653 p := s.Prog(v.Op.Asm())
654 p.Reg = v.Args[0].Reg()
655 p.From.Type = obj.TYPE_CONST
656 p.From.Offset = v.AuxInt
657 p.To.Type = obj.TYPE_REG
658 p.To.Reg = ppc64.REGTMP // discard result
660 case ssa.OpPPC64MOVDaddr:
661 switch v.Aux.(type) {
663 v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux)
665 // If aux offset and aux int are both 0, and the same
666 // input and output regs are used, no instruction
667 // needs to be generated, since it would just be
669 if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() {
670 p := s.Prog(ppc64.AMOVD)
671 p.From.Type = obj.TYPE_ADDR
672 p.From.Reg = v.Args[0].Reg()
673 p.From.Offset = v.AuxInt
674 p.To.Type = obj.TYPE_REG
678 case *obj.LSym, *gc.Node:
679 p := s.Prog(ppc64.AMOVD)
680 p.From.Type = obj.TYPE_ADDR
681 p.From.Reg = v.Args[0].Reg()
682 p.To.Type = obj.TYPE_REG
684 gc.AddAux(&p.From, v)
688 case ssa.OpPPC64MOVDconst:
689 p := s.Prog(v.Op.Asm())
690 p.From.Type = obj.TYPE_CONST
691 p.From.Offset = v.AuxInt
692 p.To.Type = obj.TYPE_REG
695 case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst:
696 p := s.Prog(v.Op.Asm())
697 p.From.Type = obj.TYPE_FCONST
698 p.From.Val = math.Float64frombits(uint64(v.AuxInt))
699 p.To.Type = obj.TYPE_REG
702 case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU:
703 p := s.Prog(v.Op.Asm())
704 p.From.Type = obj.TYPE_REG
705 p.From.Reg = v.Args[0].Reg()
706 p.To.Type = obj.TYPE_REG
707 p.To.Reg = v.Args[1].Reg()
709 case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst:
710 p := s.Prog(v.Op.Asm())
711 p.From.Type = obj.TYPE_REG
712 p.From.Reg = v.Args[0].Reg()
713 p.To.Type = obj.TYPE_CONST
714 p.To.Offset = v.AuxInt
716 case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg:
717 // Shift in register to required size
718 p := s.Prog(v.Op.Asm())
719 p.From.Type = obj.TYPE_REG
720 p.From.Reg = v.Args[0].Reg()
722 p.To.Type = obj.TYPE_REG
724 case ssa.OpPPC64MOVDload:
726 // MOVDload uses a DS instruction which requires the offset value of the data to be a multiple of 4.
727 // For offsets known at compile time, a MOVDload won't be selected, but in the case of a go.string,
728 // the offset is not known until link time. If the load of a go.string uses relocation for the
729 // offset field of the instruction, and if the offset is not aligned to 4, then a link error will occur.
730 // To avoid this problem, the full address of the go.string is computed and loaded into the base register,
731 // and that base register is used for the MOVDload using a 0 offset. This problem can only occur with
732 // go.string types because other types will have proper alignment.
735 switch n := v.Aux.(type) {
737 gostring = strings.HasPrefix(n.Name, "go.string.")
740 // Generate full addr of the go.string const
742 p := s.Prog(ppc64.AMOVD)
743 p.From.Type = obj.TYPE_ADDR
744 p.From.Reg = v.Args[0].Reg()
745 gc.AddAux(&p.From, v)
746 p.To.Type = obj.TYPE_REG
748 // Load go.string using 0 offset
749 p = s.Prog(v.Op.Asm())
750 p.From.Type = obj.TYPE_MEM
752 p.To.Type = obj.TYPE_REG
756 // Not a go.string, generate a normal load
759 case ssa.OpPPC64MOVWload, ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
760 p := s.Prog(v.Op.Asm())
761 p.From.Type = obj.TYPE_MEM
762 p.From.Reg = v.Args[0].Reg()
763 gc.AddAux(&p.From, v)
764 p.To.Type = obj.TYPE_REG
767 case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload:
768 p := s.Prog(v.Op.Asm())
769 p.From.Type = obj.TYPE_MEM
770 p.From.Reg = v.Args[0].Reg()
771 p.To.Type = obj.TYPE_REG
774 case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore:
775 p := s.Prog(v.Op.Asm())
776 p.To.Type = obj.TYPE_MEM
777 p.To.Reg = v.Args[0].Reg()
778 p.From.Type = obj.TYPE_REG
779 p.From.Reg = v.Args[1].Reg()
781 case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx,
782 ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx,
783 ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx:
784 p := s.Prog(v.Op.Asm())
785 p.From.Type = obj.TYPE_MEM
786 p.From.Reg = v.Args[0].Reg()
787 p.From.Index = v.Args[1].Reg()
788 p.To.Type = obj.TYPE_REG
791 case ssa.OpPPC64MOVDstorezero, ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero:
792 p := s.Prog(v.Op.Asm())
793 p.From.Type = obj.TYPE_REG
794 p.From.Reg = ppc64.REGZERO
795 p.To.Type = obj.TYPE_MEM
796 p.To.Reg = v.Args[0].Reg()
799 case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore:
800 p := s.Prog(v.Op.Asm())
801 p.From.Type = obj.TYPE_REG
802 p.From.Reg = v.Args[1].Reg()
803 p.To.Type = obj.TYPE_MEM
804 p.To.Reg = v.Args[0].Reg()
807 case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx,
808 ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx,
809 ssa.OpPPC64MOVHBRstoreidx:
810 p := s.Prog(v.Op.Asm())
811 p.From.Type = obj.TYPE_REG
812 p.From.Reg = v.Args[2].Reg()
813 p.To.Index = v.Args[1].Reg()
814 p.To.Type = obj.TYPE_MEM
815 p.To.Reg = v.Args[0].Reg()
817 case ssa.OpPPC64ISEL, ssa.OpPPC64ISELB:
819 // AuxInt value indicates condition: 0=LT 1=GT 2=EQ 4=GE 5=LE 6=NE
820 // ISEL only accepts 0, 1, 2 condition values but the others can be
821 // achieved by swapping operand order.
822 // arg0 ? arg1 : arg2 with conditions LT, GT, EQ
823 // arg0 ? arg2 : arg1 for conditions GE, LE, NE
824 // ISELB is used when a boolean result is needed, returning 0 or 1
825 p := s.Prog(ppc64.AISEL)
826 p.To.Type = obj.TYPE_REG
828 // For ISELB, boolean result 0 or 1. Use R0 for 0 operand to avoid load.
829 r := obj.Addr{Type: obj.TYPE_REG, Reg: ppc64.REG_R0}
830 if v.Op == ssa.OpPPC64ISEL {
831 r.Reg = v.Args[1].Reg()
833 // AuxInt values 4,5,6 implemented with reverse operand order from 0,1,2
836 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[0].Reg()})
838 p.Reg = v.Args[0].Reg()
841 p.From.Type = obj.TYPE_CONST
842 p.From.Offset = v.AuxInt & 3
844 case ssa.OpPPC64LoweredQuadZero, ssa.OpPPC64LoweredQuadZeroShort:
845 // The LoweredQuad code generation
846 // generates STXV instructions on
847 // power9. The Short variation is used
848 // if no loop is generated.
850 // sizes >= 64 generate a loop as follows:
852 // Set up loop counter in CTR, used by BC
853 // XXLXOR clears VS32
854 // XXLXOR VS32,VS32,VS32
855 // MOVD len/64,REG_TMP
865 // Bytes per iteration
871 // Only generate a loop if there is more
874 // Set up VS32 (V0) to hold 0s
875 p := s.Prog(ppc64.AXXLXOR)
876 p.From.Type = obj.TYPE_REG
877 p.From.Reg = ppc64.REG_VS32
878 p.To.Type = obj.TYPE_REG
879 p.To.Reg = ppc64.REG_VS32
880 p.Reg = ppc64.REG_VS32
882 // Set up CTR loop counter
883 p = s.Prog(ppc64.AMOVD)
884 p.From.Type = obj.TYPE_CONST
886 p.To.Type = obj.TYPE_REG
887 p.To.Reg = ppc64.REGTMP
889 p = s.Prog(ppc64.AMOVD)
890 p.From.Type = obj.TYPE_REG
891 p.From.Reg = ppc64.REGTMP
892 p.To.Type = obj.TYPE_REG
893 p.To.Reg = ppc64.REG_CTR
895 // Don't generate padding for
896 // loops with few iterations.
898 p = s.Prog(obj.APCALIGN)
899 p.From.Type = obj.TYPE_CONST
903 // generate 4 STXVs to zero 64 bytes
906 p = s.Prog(ppc64.ASTXV)
907 p.From.Type = obj.TYPE_REG
908 p.From.Reg = ppc64.REG_VS32
909 p.To.Type = obj.TYPE_MEM
910 p.To.Reg = v.Args[0].Reg()
912 // Save the top of loop
916 p = s.Prog(ppc64.ASTXV)
917 p.From.Type = obj.TYPE_REG
918 p.From.Reg = ppc64.REG_VS32
919 p.To.Type = obj.TYPE_MEM
920 p.To.Reg = v.Args[0].Reg()
923 p = s.Prog(ppc64.ASTXV)
924 p.From.Type = obj.TYPE_REG
925 p.From.Reg = ppc64.REG_VS32
926 p.To.Type = obj.TYPE_MEM
927 p.To.Reg = v.Args[0].Reg()
930 p = s.Prog(ppc64.ASTXV)
931 p.From.Type = obj.TYPE_REG
932 p.From.Reg = ppc64.REG_VS32
933 p.To.Type = obj.TYPE_MEM
934 p.To.Reg = v.Args[0].Reg()
937 // Increment address for the
938 // 64 bytes just zeroed.
939 p = s.Prog(ppc64.AADD)
940 p.Reg = v.Args[0].Reg()
941 p.From.Type = obj.TYPE_CONST
943 p.To.Type = obj.TYPE_REG
944 p.To.Reg = v.Args[0].Reg()
946 // Branch back to top of loop
948 // BC with BO_BCTR generates bdnz
949 p = s.Prog(ppc64.ABC)
950 p.From.Type = obj.TYPE_CONST
951 p.From.Offset = ppc64.BO_BCTR
953 p.To.Type = obj.TYPE_BRANCH
956 // When ctr == 1 the loop was not generated but
957 // there are at least 64 bytes to clear, so add
958 // that to the remainder to generate the code
959 // to clear those doublewords
964 // Clear the remainder starting at offset zero
967 if rem >= 16 && ctr <= 1 {
968 // If the XXLXOR hasn't already been
969 // generated, do it here to initialize
971 p := s.Prog(ppc64.AXXLXOR)
972 p.From.Type = obj.TYPE_REG
973 p.From.Reg = ppc64.REG_VS32
974 p.To.Type = obj.TYPE_REG
975 p.To.Reg = ppc64.REG_VS32
976 p.Reg = ppc64.REG_VS32
978 // Generate STXV for 32 or 64
981 p := s.Prog(ppc64.ASTXV)
982 p.From.Type = obj.TYPE_REG
983 p.From.Reg = ppc64.REG_VS32
984 p.To.Type = obj.TYPE_MEM
985 p.To.Reg = v.Args[0].Reg()
988 p = s.Prog(ppc64.ASTXV)
989 p.From.Type = obj.TYPE_REG
990 p.From.Reg = ppc64.REG_VS32
991 p.To.Type = obj.TYPE_MEM
992 p.To.Reg = v.Args[0].Reg()
993 p.To.Offset = offset + 16
999 p := s.Prog(ppc64.ASTXV)
1000 p.From.Type = obj.TYPE_REG
1001 p.From.Reg = ppc64.REG_VS32
1002 p.To.Type = obj.TYPE_MEM
1003 p.To.Reg = v.Args[0].Reg()
1004 p.To.Offset = offset
1009 // first clear as many doublewords as possible
1010 // then clear remaining sizes as available
1012 op, size := ppc64.AMOVB, int64(1)
1015 op, size = ppc64.AMOVD, 8
1017 op, size = ppc64.AMOVW, 4
1019 op, size = ppc64.AMOVH, 2
1022 p.From.Type = obj.TYPE_REG
1023 p.From.Reg = ppc64.REG_R0
1024 p.To.Type = obj.TYPE_MEM
1025 p.To.Reg = v.Args[0].Reg()
1026 p.To.Offset = offset
1031 case ssa.OpPPC64LoweredZero, ssa.OpPPC64LoweredZeroShort:
1033 // Unaligned data doesn't hurt performance
1034 // for these instructions on power8.
1036 // For sizes >= 64 generate a loop as follows:
1038 // Set up loop counter in CTR, used by BC
1039 // XXLXOR VS32,VS32,VS32
1040 // MOVD len/32,REG_TMP
1044 // STXVD2X VS32,(R0)(R20)
1045 // STXVD2X VS32,(R31)(R20)
1049 // any remainder is done as described below
1051 // for sizes < 64 bytes, first clear as many doublewords as possible,
1052 // then handle the remainder
1057 // the remainder bytes are cleared using one or more
1058 // of the following instructions with the appropriate
1059 // offsets depending which instructions are needed
1061 // MOVW R0,n1(R20) 4 bytes
1062 // MOVH R0,n2(R20) 2 bytes
1063 // MOVB R0,n3(R20) 1 byte
1065 // 7 bytes: MOVW, MOVH, MOVB
1066 // 6 bytes: MOVW, MOVH
1067 // 5 bytes: MOVW, MOVB
1068 // 3 bytes: MOVH, MOVB
1070 // each loop iteration does 32 bytes
1071 ctr := v.AuxInt / 32
1074 rem := v.AuxInt % 32
1076 // only generate a loop if there is more
1077 // than 1 iteration.
1079 // Set up VS32 (V0) to hold 0s
1080 p := s.Prog(ppc64.AXXLXOR)
1081 p.From.Type = obj.TYPE_REG
1082 p.From.Reg = ppc64.REG_VS32
1083 p.To.Type = obj.TYPE_REG
1084 p.To.Reg = ppc64.REG_VS32
1085 p.Reg = ppc64.REG_VS32
1087 // Set up CTR loop counter
1088 p = s.Prog(ppc64.AMOVD)
1089 p.From.Type = obj.TYPE_CONST
1091 p.To.Type = obj.TYPE_REG
1092 p.To.Reg = ppc64.REGTMP
1094 p = s.Prog(ppc64.AMOVD)
1095 p.From.Type = obj.TYPE_REG
1096 p.From.Reg = ppc64.REGTMP
1097 p.To.Type = obj.TYPE_REG
1098 p.To.Reg = ppc64.REG_CTR
1100 // Set up R31 to hold index value 16
1101 p = s.Prog(ppc64.AMOVD)
1102 p.From.Type = obj.TYPE_CONST
1104 p.To.Type = obj.TYPE_REG
1105 p.To.Reg = ppc64.REGTMP
1107 // Don't add padding for alignment
1108 // with few loop iterations.
1110 p = s.Prog(obj.APCALIGN)
1111 p.From.Type = obj.TYPE_CONST
1115 // generate 2 STXVD2Xs to store 16 bytes
1116 // when this is a loop then the top must be saved
1118 // This is the top of loop
1120 p = s.Prog(ppc64.ASTXVD2X)
1121 p.From.Type = obj.TYPE_REG
1122 p.From.Reg = ppc64.REG_VS32
1123 p.To.Type = obj.TYPE_MEM
1124 p.To.Reg = v.Args[0].Reg()
1125 p.To.Index = ppc64.REGZERO
1126 // Save the top of loop
1130 p = s.Prog(ppc64.ASTXVD2X)
1131 p.From.Type = obj.TYPE_REG
1132 p.From.Reg = ppc64.REG_VS32
1133 p.To.Type = obj.TYPE_MEM
1134 p.To.Reg = v.Args[0].Reg()
1135 p.To.Index = ppc64.REGTMP
1137 // Increment address for the
1138 // 4 doublewords just zeroed.
1139 p = s.Prog(ppc64.AADD)
1140 p.Reg = v.Args[0].Reg()
1141 p.From.Type = obj.TYPE_CONST
1143 p.To.Type = obj.TYPE_REG
1144 p.To.Reg = v.Args[0].Reg()
1146 // Branch back to top of loop
1148 // BC with BO_BCTR generates bdnz
1149 p = s.Prog(ppc64.ABC)
1150 p.From.Type = obj.TYPE_CONST
1151 p.From.Offset = ppc64.BO_BCTR
1152 p.Reg = ppc64.REG_R0
1153 p.To.Type = obj.TYPE_BRANCH
1157 // when ctr == 1 the loop was not generated but
1158 // there are at least 32 bytes to clear, so add
1159 // that to the remainder to generate the code
1160 // to clear those doublewords
1165 // clear the remainder starting at offset zero
1168 // first clear as many doublewords as possible
1169 // then clear remaining sizes as available
1171 op, size := ppc64.AMOVB, int64(1)
1174 op, size = ppc64.AMOVD, 8
1176 op, size = ppc64.AMOVW, 4
1178 op, size = ppc64.AMOVH, 2
1181 p.From.Type = obj.TYPE_REG
1182 p.From.Reg = ppc64.REG_R0
1183 p.To.Type = obj.TYPE_MEM
1184 p.To.Reg = v.Args[0].Reg()
1185 p.To.Offset = offset
1190 case ssa.OpPPC64LoweredMove, ssa.OpPPC64LoweredMoveShort:
1192 bytesPerLoop := int64(32)
1193 // This will be used when moving more
1194 // than 8 bytes. Moves start with
1195 // as many 8 byte moves as possible, then
1196 // 4, 2, or 1 byte(s) as remaining. This will
1197 // work and be efficient for power8 or later.
1198 // If there are 64 or more bytes, then a
1199 // loop is generated to move 32 bytes and
1200 // update the src and dst addresses on each
1201 // iteration. When < 64 bytes, the appropriate
1202 // number of moves are generated based on the
1204 // When moving >= 64 bytes a loop is used
1205 // MOVD len/32,REG_TMP
1209 // LXVD2X (R0)(R21),VS32
1210 // LXVD2X (R31)(R21),VS33
1212 // STXVD2X VS32,(R0)(R20)
1213 // STXVD2X VS33,(R31)(R20)
1216 // Bytes not moved by this loop are moved
1217 // with a combination of the following instructions,
1218 // starting with the largest sizes and generating as
1219 // many as needed, using the appropriate offset value.
1229 // Each loop iteration moves 32 bytes
1230 ctr := v.AuxInt / bytesPerLoop
1232 // Remainder after the loop
1233 rem := v.AuxInt % bytesPerLoop
1235 dstReg := v.Args[0].Reg()
1236 srcReg := v.Args[1].Reg()
1238 // The set of registers used here, must match the clobbered reg list
1244 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1247 p := s.Prog(ppc64.AMOVD)
1248 p.From.Type = obj.TYPE_CONST
1250 p.To.Type = obj.TYPE_REG
1251 p.To.Reg = ppc64.REGTMP
1253 p = s.Prog(ppc64.AMOVD)
1254 p.From.Type = obj.TYPE_REG
1255 p.From.Reg = ppc64.REGTMP
1256 p.To.Type = obj.TYPE_REG
1257 p.To.Reg = ppc64.REG_CTR
1259 // Use REGTMP as index reg
1260 p = s.Prog(ppc64.AMOVD)
1261 p.From.Type = obj.TYPE_CONST
1263 p.To.Type = obj.TYPE_REG
1264 p.To.Reg = ppc64.REGTMP
1266 // Don't adding padding for
1267 // alignment with small iteration
1270 p = s.Prog(obj.APCALIGN)
1271 p.From.Type = obj.TYPE_CONST
1275 // Generate 16 byte loads and stores.
1276 // Use temp register for index (16)
1277 // on the second one.
1279 p = s.Prog(ppc64.ALXVD2X)
1280 p.From.Type = obj.TYPE_MEM
1282 p.From.Index = ppc64.REGZERO
1283 p.To.Type = obj.TYPE_REG
1284 p.To.Reg = ppc64.REG_VS32
1288 p = s.Prog(ppc64.ALXVD2X)
1289 p.From.Type = obj.TYPE_MEM
1291 p.From.Index = ppc64.REGTMP
1292 p.To.Type = obj.TYPE_REG
1293 p.To.Reg = ppc64.REG_VS33
1295 // increment the src reg for next iteration
1296 p = s.Prog(ppc64.AADD)
1298 p.From.Type = obj.TYPE_CONST
1299 p.From.Offset = bytesPerLoop
1300 p.To.Type = obj.TYPE_REG
1303 // generate 16 byte stores
1304 p = s.Prog(ppc64.ASTXVD2X)
1305 p.From.Type = obj.TYPE_REG
1306 p.From.Reg = ppc64.REG_VS32
1307 p.To.Type = obj.TYPE_MEM
1309 p.To.Index = ppc64.REGZERO
1311 p = s.Prog(ppc64.ASTXVD2X)
1312 p.From.Type = obj.TYPE_REG
1313 p.From.Reg = ppc64.REG_VS33
1314 p.To.Type = obj.TYPE_MEM
1316 p.To.Index = ppc64.REGTMP
1318 // increment the dst reg for next iteration
1319 p = s.Prog(ppc64.AADD)
1321 p.From.Type = obj.TYPE_CONST
1322 p.From.Offset = bytesPerLoop
1323 p.To.Type = obj.TYPE_REG
1326 // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1328 p = s.Prog(ppc64.ABC)
1329 p.From.Type = obj.TYPE_CONST
1330 p.From.Offset = ppc64.BO_BCTR
1331 p.Reg = ppc64.REG_R0
1332 p.To.Type = obj.TYPE_BRANCH
1335 // srcReg and dstReg were incremented in the loop, so
1336 // later instructions start with offset 0.
1340 // No loop was generated for one iteration, so
1341 // add 32 bytes to the remainder to move those bytes.
1347 // Generate 16 byte loads and stores.
1348 // Use temp register for index (value 16)
1349 // on the second one.
1350 p := s.Prog(ppc64.ALXVD2X)
1351 p.From.Type = obj.TYPE_MEM
1353 p.From.Index = ppc64.REGZERO
1354 p.To.Type = obj.TYPE_REG
1355 p.To.Reg = ppc64.REG_VS32
1357 p = s.Prog(ppc64.ASTXVD2X)
1358 p.From.Type = obj.TYPE_REG
1359 p.From.Reg = ppc64.REG_VS32
1360 p.To.Type = obj.TYPE_MEM
1362 p.To.Index = ppc64.REGZERO
1368 // Use REGTMP as index reg
1369 p := s.Prog(ppc64.AMOVD)
1370 p.From.Type = obj.TYPE_CONST
1372 p.To.Type = obj.TYPE_REG
1373 p.To.Reg = ppc64.REGTMP
1375 p = s.Prog(ppc64.ALXVD2X)
1376 p.From.Type = obj.TYPE_MEM
1378 p.From.Index = ppc64.REGTMP
1379 p.To.Type = obj.TYPE_REG
1380 p.To.Reg = ppc64.REG_VS32
1382 p = s.Prog(ppc64.ASTXVD2X)
1383 p.From.Type = obj.TYPE_REG
1384 p.From.Reg = ppc64.REG_VS32
1385 p.To.Type = obj.TYPE_MEM
1387 p.To.Index = ppc64.REGTMP
1394 // Generate all the remaining load and store pairs, starting with
1395 // as many 8 byte moves as possible, then 4, 2, 1.
1397 op, size := ppc64.AMOVB, int64(1)
1400 op, size = ppc64.AMOVD, 8
1402 op, size = ppc64.AMOVW, 4
1404 op, size = ppc64.AMOVH, 2
1408 p.To.Type = obj.TYPE_REG
1409 p.To.Reg = ppc64.REGTMP
1410 p.From.Type = obj.TYPE_MEM
1412 p.From.Offset = offset
1416 p.From.Type = obj.TYPE_REG
1417 p.From.Reg = ppc64.REGTMP
1418 p.To.Type = obj.TYPE_MEM
1420 p.To.Offset = offset
1425 case ssa.OpPPC64LoweredQuadMove, ssa.OpPPC64LoweredQuadMoveShort:
1426 bytesPerLoop := int64(64)
1427 // This is used when moving more
1428 // than 8 bytes on power9. Moves start with
1429 // as many 8 byte moves as possible, then
1430 // 4, 2, or 1 byte(s) as remaining. This will
1431 // work and be efficient for power8 or later.
1432 // If there are 64 or more bytes, then a
1433 // loop is generated to move 32 bytes and
1434 // update the src and dst addresses on each
1435 // iteration. When < 64 bytes, the appropriate
1436 // number of moves are generated based on the
1438 // When moving >= 64 bytes a loop is used
1439 // MOVD len/32,REG_TMP
1446 // STXV VS33,16(R20)
1449 // Bytes not moved by this loop are moved
1450 // with a combination of the following instructions,
1451 // starting with the largest sizes and generating as
1452 // many as needed, using the appropriate offset value.
1462 // Each loop iteration moves 32 bytes
1463 ctr := v.AuxInt / bytesPerLoop
1465 // Remainder after the loop
1466 rem := v.AuxInt % bytesPerLoop
1468 dstReg := v.Args[0].Reg()
1469 srcReg := v.Args[1].Reg()
1476 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1479 p := s.Prog(ppc64.AMOVD)
1480 p.From.Type = obj.TYPE_CONST
1482 p.To.Type = obj.TYPE_REG
1483 p.To.Reg = ppc64.REGTMP
1485 p = s.Prog(ppc64.AMOVD)
1486 p.From.Type = obj.TYPE_REG
1487 p.From.Reg = ppc64.REGTMP
1488 p.To.Type = obj.TYPE_REG
1489 p.To.Reg = ppc64.REG_CTR
1491 p = s.Prog(obj.APCALIGN)
1492 p.From.Type = obj.TYPE_CONST
1495 // Generate 16 byte loads and stores.
1496 p = s.Prog(ppc64.ALXV)
1497 p.From.Type = obj.TYPE_MEM
1499 p.From.Offset = offset
1500 p.To.Type = obj.TYPE_REG
1501 p.To.Reg = ppc64.REG_VS32
1505 p = s.Prog(ppc64.ALXV)
1506 p.From.Type = obj.TYPE_MEM
1508 p.From.Offset = offset + 16
1509 p.To.Type = obj.TYPE_REG
1510 p.To.Reg = ppc64.REG_VS33
1512 // generate 16 byte stores
1513 p = s.Prog(ppc64.ASTXV)
1514 p.From.Type = obj.TYPE_REG
1515 p.From.Reg = ppc64.REG_VS32
1516 p.To.Type = obj.TYPE_MEM
1518 p.To.Offset = offset
1520 p = s.Prog(ppc64.ASTXV)
1521 p.From.Type = obj.TYPE_REG
1522 p.From.Reg = ppc64.REG_VS33
1523 p.To.Type = obj.TYPE_MEM
1525 p.To.Offset = offset + 16
1527 // Generate 16 byte loads and stores.
1528 p = s.Prog(ppc64.ALXV)
1529 p.From.Type = obj.TYPE_MEM
1531 p.From.Offset = offset + 32
1532 p.To.Type = obj.TYPE_REG
1533 p.To.Reg = ppc64.REG_VS32
1535 p = s.Prog(ppc64.ALXV)
1536 p.From.Type = obj.TYPE_MEM
1538 p.From.Offset = offset + 48
1539 p.To.Type = obj.TYPE_REG
1540 p.To.Reg = ppc64.REG_VS33
1542 // generate 16 byte stores
1543 p = s.Prog(ppc64.ASTXV)
1544 p.From.Type = obj.TYPE_REG
1545 p.From.Reg = ppc64.REG_VS32
1546 p.To.Type = obj.TYPE_MEM
1548 p.To.Offset = offset + 32
1550 p = s.Prog(ppc64.ASTXV)
1551 p.From.Type = obj.TYPE_REG
1552 p.From.Reg = ppc64.REG_VS33
1553 p.To.Type = obj.TYPE_MEM
1555 p.To.Offset = offset + 48
1557 // increment the src reg for next iteration
1558 p = s.Prog(ppc64.AADD)
1560 p.From.Type = obj.TYPE_CONST
1561 p.From.Offset = bytesPerLoop
1562 p.To.Type = obj.TYPE_REG
1565 // increment the dst reg for next iteration
1566 p = s.Prog(ppc64.AADD)
1568 p.From.Type = obj.TYPE_CONST
1569 p.From.Offset = bytesPerLoop
1570 p.To.Type = obj.TYPE_REG
1573 // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1575 p = s.Prog(ppc64.ABC)
1576 p.From.Type = obj.TYPE_CONST
1577 p.From.Offset = ppc64.BO_BCTR
1578 p.Reg = ppc64.REG_R0
1579 p.To.Type = obj.TYPE_BRANCH
1582 // srcReg and dstReg were incremented in the loop, so
1583 // later instructions start with offset 0.
1587 // No loop was generated for one iteration, so
1588 // add 32 bytes to the remainder to move those bytes.
1593 p := s.Prog(ppc64.ALXV)
1594 p.From.Type = obj.TYPE_MEM
1596 p.To.Type = obj.TYPE_REG
1597 p.To.Reg = ppc64.REG_VS32
1599 p = s.Prog(ppc64.ALXV)
1600 p.From.Type = obj.TYPE_MEM
1603 p.To.Type = obj.TYPE_REG
1604 p.To.Reg = ppc64.REG_VS33
1606 p = s.Prog(ppc64.ASTXV)
1607 p.From.Type = obj.TYPE_REG
1608 p.From.Reg = ppc64.REG_VS32
1609 p.To.Type = obj.TYPE_MEM
1612 p = s.Prog(ppc64.ASTXV)
1613 p.From.Type = obj.TYPE_REG
1614 p.From.Reg = ppc64.REG_VS33
1615 p.To.Type = obj.TYPE_MEM
1624 // Generate 16 byte loads and stores.
1625 p := s.Prog(ppc64.ALXV)
1626 p.From.Type = obj.TYPE_MEM
1628 p.From.Offset = offset
1629 p.To.Type = obj.TYPE_REG
1630 p.To.Reg = ppc64.REG_VS32
1632 p = s.Prog(ppc64.ASTXV)
1633 p.From.Type = obj.TYPE_REG
1634 p.From.Reg = ppc64.REG_VS32
1635 p.To.Type = obj.TYPE_MEM
1637 p.To.Offset = offset
1643 p := s.Prog(ppc64.ALXV)
1644 p.From.Type = obj.TYPE_MEM
1646 p.From.Offset = offset
1647 p.To.Type = obj.TYPE_REG
1648 p.To.Reg = ppc64.REG_VS32
1650 p = s.Prog(ppc64.ASTXV)
1651 p.From.Type = obj.TYPE_REG
1652 p.From.Reg = ppc64.REG_VS32
1653 p.To.Type = obj.TYPE_MEM
1655 p.To.Offset = offset
1661 // Generate all the remaining load and store pairs, starting with
1662 // as many 8 byte moves as possible, then 4, 2, 1.
1664 op, size := ppc64.AMOVB, int64(1)
1667 op, size = ppc64.AMOVD, 8
1669 op, size = ppc64.AMOVW, 4
1671 op, size = ppc64.AMOVH, 2
1675 p.To.Type = obj.TYPE_REG
1676 p.To.Reg = ppc64.REGTMP
1677 p.From.Type = obj.TYPE_MEM
1679 p.From.Offset = offset
1683 p.From.Type = obj.TYPE_REG
1684 p.From.Reg = ppc64.REGTMP
1685 p.To.Type = obj.TYPE_MEM
1687 p.To.Offset = offset
1692 case ssa.OpPPC64CALLstatic:
1695 case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter:
1696 p := s.Prog(ppc64.AMOVD)
1697 p.From.Type = obj.TYPE_REG
1698 p.From.Reg = v.Args[0].Reg()
1699 p.To.Type = obj.TYPE_REG
1700 p.To.Reg = ppc64.REG_LR
1702 if v.Args[0].Reg() != ppc64.REG_R12 {
1703 v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg)
1707 pp.To.Reg = ppc64.REG_LR
1709 if gc.Ctxt.Flag_shared {
1710 // When compiling Go into PIC, the function we just
1711 // called via pointer might have been implemented in
1712 // a separate module and so overwritten the TOC
1713 // pointer in R2; reload it.
1714 q := s.Prog(ppc64.AMOVD)
1715 q.From.Type = obj.TYPE_MEM
1717 q.From.Reg = ppc64.REGSP
1718 q.To.Type = obj.TYPE_REG
1719 q.To.Reg = ppc64.REG_R2
1722 case ssa.OpPPC64LoweredWB:
1723 p := s.Prog(obj.ACALL)
1724 p.To.Type = obj.TYPE_MEM
1725 p.To.Name = obj.NAME_EXTERN
1726 p.To.Sym = v.Aux.(*obj.LSym)
1728 case ssa.OpPPC64LoweredPanicBoundsA, ssa.OpPPC64LoweredPanicBoundsB, ssa.OpPPC64LoweredPanicBoundsC:
1729 p := s.Prog(obj.ACALL)
1730 p.To.Type = obj.TYPE_MEM
1731 p.To.Name = obj.NAME_EXTERN
1732 p.To.Sym = gc.BoundsCheckFunc[v.AuxInt]
1733 s.UseArgs(16) // space used in callee args area by assembly stubs
1735 case ssa.OpPPC64LoweredNilCheck:
1736 if objabi.GOOS == "aix" {
1740 // NOP (so the BNE has somewhere to land)
1743 p := s.Prog(ppc64.ACMP)
1744 p.From.Type = obj.TYPE_REG
1745 p.From.Reg = v.Args[0].Reg()
1746 p.To.Type = obj.TYPE_REG
1747 p.To.Reg = ppc64.REG_R0
1750 p2 := s.Prog(ppc64.ABNE)
1751 p2.To.Type = obj.TYPE_BRANCH
1754 // Write at 0 is forbidden and will trigger a SIGSEGV
1755 p = s.Prog(ppc64.AMOVW)
1756 p.From.Type = obj.TYPE_REG
1757 p.From.Reg = ppc64.REG_R0
1758 p.To.Type = obj.TYPE_MEM
1759 p.To.Reg = ppc64.REG_R0
1761 // NOP (so the BNE has somewhere to land)
1762 nop := s.Prog(obj.ANOP)
1766 // Issue a load which will fault if arg is nil.
1767 p := s.Prog(ppc64.AMOVBZ)
1768 p.From.Type = obj.TYPE_MEM
1769 p.From.Reg = v.Args[0].Reg()
1770 gc.AddAux(&p.From, v)
1771 p.To.Type = obj.TYPE_REG
1772 p.To.Reg = ppc64.REGTMP
1774 if logopt.Enabled() {
1775 logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
1777 if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
1778 gc.Warnl(v.Pos, "generated nil check")
1781 // These should be resolved by rules and not make it here.
1782 case ssa.OpPPC64Equal, ssa.OpPPC64NotEqual, ssa.OpPPC64LessThan, ssa.OpPPC64FLessThan,
1783 ssa.OpPPC64LessEqual, ssa.OpPPC64GreaterThan, ssa.OpPPC64FGreaterThan, ssa.OpPPC64GreaterEqual,
1784 ssa.OpPPC64FLessEqual, ssa.OpPPC64FGreaterEqual:
1785 v.Fatalf("Pseudo-op should not make it to codegen: %s ###\n", v.LongString())
1786 case ssa.OpPPC64InvertFlags:
1787 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
1788 case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT:
1789 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
1791 // TODO: implement for clobberdead experiment. Nop is ok for now.
1793 v.Fatalf("genValue not implemented: %s", v.LongString())
1797 var blockJump = [...]struct {
1799 asmeq, invasmun bool
1801 ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false},
1802 ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false},
1804 ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false},
1805 ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false},
1806 ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false},
1807 ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false},
1809 // TODO: need to work FP comparisons into block jumps
1810 ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false},
1811 ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN
1812 ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN
1813 ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false},
1816 func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
1818 case ssa.BlockDefer:
1819 // defer returns in R3:
1820 // 0 if we should continue executing
1821 // 1 if we should jump to deferreturn call
1822 p := s.Prog(ppc64.ACMP)
1823 p.From.Type = obj.TYPE_REG
1824 p.From.Reg = ppc64.REG_R3
1825 p.To.Type = obj.TYPE_REG
1826 p.To.Reg = ppc64.REG_R0
1828 p = s.Prog(ppc64.ABNE)
1829 p.To.Type = obj.TYPE_BRANCH
1830 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
1831 if b.Succs[0].Block() != next {
1832 p := s.Prog(obj.AJMP)
1833 p.To.Type = obj.TYPE_BRANCH
1834 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
1837 case ssa.BlockPlain:
1838 if b.Succs[0].Block() != next {
1839 p := s.Prog(obj.AJMP)
1840 p.To.Type = obj.TYPE_BRANCH
1841 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
1846 case ssa.BlockRetJmp:
1847 p := s.Prog(obj.AJMP)
1848 p.To.Type = obj.TYPE_MEM
1849 p.To.Name = obj.NAME_EXTERN
1850 p.To.Sym = b.Aux.(*obj.LSym)
1852 case ssa.BlockPPC64EQ, ssa.BlockPPC64NE,
1853 ssa.BlockPPC64LT, ssa.BlockPPC64GE,
1854 ssa.BlockPPC64LE, ssa.BlockPPC64GT,
1855 ssa.BlockPPC64FLT, ssa.BlockPPC64FGE,
1856 ssa.BlockPPC64FLE, ssa.BlockPPC64FGT:
1857 jmp := blockJump[b.Kind]
1859 case b.Succs[0].Block():
1860 s.Br(jmp.invasm, b.Succs[1].Block())
1862 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
1863 s.Br(ppc64.ABVS, b.Succs[1].Block())
1865 case b.Succs[1].Block():
1866 s.Br(jmp.asm, b.Succs[0].Block())
1868 s.Br(ppc64.ABEQ, b.Succs[0].Block())
1871 if b.Likely != ssa.BranchUnlikely {
1872 s.Br(jmp.asm, b.Succs[0].Block())
1874 s.Br(ppc64.ABEQ, b.Succs[0].Block())
1876 s.Br(obj.AJMP, b.Succs[1].Block())
1878 s.Br(jmp.invasm, b.Succs[1].Block())
1880 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
1881 s.Br(ppc64.ABVS, b.Succs[1].Block())
1883 s.Br(obj.AJMP, b.Succs[0].Block())
1887 b.Fatalf("branch not implemented: %s", b.LongString())