1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
8 "cmd/compile/internal/gc"
9 "cmd/compile/internal/logopt"
10 "cmd/compile/internal/ssa"
11 "cmd/compile/internal/types"
13 "cmd/internal/obj/ppc64"
19 // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
20 func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) {
21 // flive := b.FlagsLiveAtEnd
22 // if b.Control != nil && b.Control.Type.IsFlags() {
25 // for i := len(b.Values) - 1; i >= 0; i-- {
27 // if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) {
28 // // The "mark" is any non-nil Aux value.
31 // if v.Type.IsFlags() {
34 // for _, a := range v.Args {
35 // if a.Type.IsFlags() {
42 // loadByType returns the load instruction of the given type.
43 func loadByType(t *types.Type) obj.As {
75 panic("bad load type")
78 // storeByType returns the store instruction of the given type.
79 func storeByType(t *types.Type) obj.As {
99 panic("bad store type")
102 func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
125 case ssa.OpPPC64LoweredMuluhilo:
126 // MULHDU Rarg1, Rarg0, Reg0
127 // MULLD Rarg1, Rarg0, Reg1
128 r0 := v.Args[0].Reg()
129 r1 := v.Args[1].Reg()
130 p := s.Prog(ppc64.AMULHDU)
131 p.From.Type = obj.TYPE_REG
134 p.To.Type = obj.TYPE_REG
136 p1 := s.Prog(ppc64.AMULLD)
137 p1.From.Type = obj.TYPE_REG
140 p1.To.Type = obj.TYPE_REG
143 case ssa.OpPPC64LoweredAdd64Carry:
144 // ADDC Rarg2, -1, Rtmp
145 // ADDE Rarg1, Rarg0, Reg0
147 r0 := v.Args[0].Reg()
148 r1 := v.Args[1].Reg()
149 r2 := v.Args[2].Reg()
150 p := s.Prog(ppc64.AADDC)
151 p.From.Type = obj.TYPE_CONST
154 p.To.Type = obj.TYPE_REG
155 p.To.Reg = ppc64.REGTMP
156 p1 := s.Prog(ppc64.AADDE)
157 p1.From.Type = obj.TYPE_REG
160 p1.To.Type = obj.TYPE_REG
162 p2 := s.Prog(ppc64.AADDZE)
163 p2.From.Type = obj.TYPE_REG
164 p2.From.Reg = ppc64.REGZERO
165 p2.To.Type = obj.TYPE_REG
168 case ssa.OpPPC64LoweredAtomicAnd8,
169 ssa.OpPPC64LoweredAtomicOr8:
171 // LBAR (Rarg0), Rtmp
172 // AND/OR Rarg1, Rtmp
173 // STBCCC Rtmp, (Rarg0)
175 r0 := v.Args[0].Reg()
176 r1 := v.Args[1].Reg()
177 // LWSYNC - Assuming shared data not write-through-required nor
178 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
179 plwsync := s.Prog(ppc64.ALWSYNC)
180 plwsync.To.Type = obj.TYPE_NONE
181 p := s.Prog(ppc64.ALBAR)
182 p.From.Type = obj.TYPE_MEM
184 p.To.Type = obj.TYPE_REG
185 p.To.Reg = ppc64.REGTMP
186 p1 := s.Prog(v.Op.Asm())
187 p1.From.Type = obj.TYPE_REG
189 p1.To.Type = obj.TYPE_REG
190 p1.To.Reg = ppc64.REGTMP
191 p2 := s.Prog(ppc64.ASTBCCC)
192 p2.From.Type = obj.TYPE_REG
193 p2.From.Reg = ppc64.REGTMP
194 p2.To.Type = obj.TYPE_MEM
196 p2.RegTo2 = ppc64.REGTMP
197 p3 := s.Prog(ppc64.ABNE)
198 p3.To.Type = obj.TYPE_BRANCH
201 case ssa.OpPPC64LoweredAtomicAdd32,
202 ssa.OpPPC64LoweredAtomicAdd64:
204 // LDAR/LWAR (Rarg0), Rout
206 // STDCCC/STWCCC Rout, (Rarg0)
208 // MOVW Rout,Rout (if Add32)
211 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
215 r0 := v.Args[0].Reg()
216 r1 := v.Args[1].Reg()
218 // LWSYNC - Assuming shared data not write-through-required nor
219 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
220 plwsync := s.Prog(ppc64.ALWSYNC)
221 plwsync.To.Type = obj.TYPE_NONE
224 p.From.Type = obj.TYPE_MEM
226 p.To.Type = obj.TYPE_REG
229 p1 := s.Prog(ppc64.AADD)
230 p1.From.Type = obj.TYPE_REG
233 p1.To.Type = obj.TYPE_REG
236 p3.From.Type = obj.TYPE_REG
238 p3.To.Type = obj.TYPE_MEM
241 p4 := s.Prog(ppc64.ABNE)
242 p4.To.Type = obj.TYPE_BRANCH
245 // Ensure a 32 bit result
246 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
247 p5 := s.Prog(ppc64.AMOVWZ)
248 p5.To.Type = obj.TYPE_REG
250 p5.From.Type = obj.TYPE_REG
254 case ssa.OpPPC64LoweredAtomicExchange32,
255 ssa.OpPPC64LoweredAtomicExchange64:
257 // LDAR/LWAR (Rarg0), Rout
258 // STDCCC/STWCCC Rout, (Rarg0)
263 if v.Op == ssa.OpPPC64LoweredAtomicExchange32 {
267 r0 := v.Args[0].Reg()
268 r1 := v.Args[1].Reg()
270 // LWSYNC - Assuming shared data not write-through-required nor
271 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
272 plwsync := s.Prog(ppc64.ALWSYNC)
273 plwsync.To.Type = obj.TYPE_NONE
276 p.From.Type = obj.TYPE_MEM
278 p.To.Type = obj.TYPE_REG
282 p1.From.Type = obj.TYPE_REG
284 p1.To.Type = obj.TYPE_MEM
287 p2 := s.Prog(ppc64.ABNE)
288 p2.To.Type = obj.TYPE_BRANCH
291 pisync := s.Prog(ppc64.AISYNC)
292 pisync.To.Type = obj.TYPE_NONE
294 case ssa.OpPPC64LoweredAtomicLoad8,
295 ssa.OpPPC64LoweredAtomicLoad32,
296 ssa.OpPPC64LoweredAtomicLoad64,
297 ssa.OpPPC64LoweredAtomicLoadPtr:
299 // MOVB/MOVD/MOVW (Rarg0), Rout
306 case ssa.OpPPC64LoweredAtomicLoad8:
308 case ssa.OpPPC64LoweredAtomicLoad32:
312 arg0 := v.Args[0].Reg()
314 // SYNC when AuxInt == 1; otherwise, load-acquire
316 psync := s.Prog(ppc64.ASYNC)
317 psync.To.Type = obj.TYPE_NONE
321 p.From.Type = obj.TYPE_MEM
323 p.To.Type = obj.TYPE_REG
327 p1.From.Type = obj.TYPE_REG
329 p1.To.Type = obj.TYPE_REG
332 p2 := s.Prog(ppc64.ABNE)
333 p2.To.Type = obj.TYPE_BRANCH
335 pisync := s.Prog(ppc64.AISYNC)
336 pisync.To.Type = obj.TYPE_NONE
339 case ssa.OpPPC64LoweredAtomicStore8,
340 ssa.OpPPC64LoweredAtomicStore32,
341 ssa.OpPPC64LoweredAtomicStore64:
343 // MOVB/MOVW/MOVD arg1,(arg0)
346 case ssa.OpPPC64LoweredAtomicStore8:
348 case ssa.OpPPC64LoweredAtomicStore32:
351 arg0 := v.Args[0].Reg()
352 arg1 := v.Args[1].Reg()
353 // If AuxInt == 0, LWSYNC (Store-Release), else SYNC
355 syncOp := ppc64.ASYNC
357 syncOp = ppc64.ALWSYNC
359 psync := s.Prog(syncOp)
360 psync.To.Type = obj.TYPE_NONE
363 p.To.Type = obj.TYPE_MEM
365 p.From.Type = obj.TYPE_REG
368 case ssa.OpPPC64LoweredAtomicCas64,
369 ssa.OpPPC64LoweredAtomicCas32:
372 // LDAR (Rarg0), MutexHint, Rtmp
375 // STDCCC Rarg2, (Rarg0)
377 // LWSYNC // Only for sequential consistency; not required in CasRel.
386 if v.Op == ssa.OpPPC64LoweredAtomicCas32 {
391 r0 := v.Args[0].Reg()
392 r1 := v.Args[1].Reg()
393 r2 := v.Args[2].Reg()
395 // LWSYNC - Assuming shared data not write-through-required nor
396 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
397 plwsync1 := s.Prog(ppc64.ALWSYNC)
398 plwsync1.To.Type = obj.TYPE_NONE
401 p.From.Type = obj.TYPE_MEM
403 p.To.Type = obj.TYPE_REG
404 p.To.Reg = ppc64.REGTMP
405 // If it is a Compare-and-Swap-Release operation, set the EH field with
408 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: 0})
412 p1.From.Type = obj.TYPE_REG
414 p1.To.Reg = ppc64.REGTMP
415 p1.To.Type = obj.TYPE_REG
417 p2 := s.Prog(ppc64.ABNE)
418 p2.To.Type = obj.TYPE_BRANCH
421 p3.From.Type = obj.TYPE_REG
423 p3.To.Type = obj.TYPE_MEM
426 p4 := s.Prog(ppc64.ABNE)
427 p4.To.Type = obj.TYPE_BRANCH
429 // LWSYNC - Assuming shared data not write-through-required nor
430 // caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b.
431 // If the operation is a CAS-Release, then synchronization is not necessary.
433 plwsync2 := s.Prog(ppc64.ALWSYNC)
434 plwsync2.To.Type = obj.TYPE_NONE
437 p5 := s.Prog(ppc64.AMOVD)
438 p5.From.Type = obj.TYPE_CONST
440 p5.To.Type = obj.TYPE_REG
443 p6 := s.Prog(obj.AJMP)
444 p6.To.Type = obj.TYPE_BRANCH
446 p7 := s.Prog(ppc64.AMOVD)
447 p7.From.Type = obj.TYPE_CONST
449 p7.To.Type = obj.TYPE_REG
453 p8 := s.Prog(obj.ANOP)
456 case ssa.OpPPC64LoweredGetClosurePtr:
457 // Closure pointer is R11 (already)
458 gc.CheckLoweredGetClosurePtr(v)
460 case ssa.OpPPC64LoweredGetCallerSP:
461 // caller's SP is FixedFrameSize below the address of the first arg
462 p := s.Prog(ppc64.AMOVD)
463 p.From.Type = obj.TYPE_ADDR
464 p.From.Offset = -gc.Ctxt.FixedFrameSize()
465 p.From.Name = obj.NAME_PARAM
466 p.To.Type = obj.TYPE_REG
469 case ssa.OpPPC64LoweredGetCallerPC:
470 p := s.Prog(obj.AGETCALLERPC)
471 p.To.Type = obj.TYPE_REG
474 case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F:
475 // input is already rounded
478 loadOp := loadByType(v.Type)
480 gc.AddrAuto(&p.From, v.Args[0])
481 p.To.Type = obj.TYPE_REG
485 storeOp := storeByType(v.Type)
487 p.From.Type = obj.TYPE_REG
488 p.From.Reg = v.Args[0].Reg()
489 gc.AddrAuto(&p.To, v)
491 case ssa.OpPPC64DIVD:
501 r0 := v.Args[0].Reg()
502 r1 := v.Args[1].Reg()
504 p := s.Prog(ppc64.ACMP)
505 p.From.Type = obj.TYPE_REG
507 p.To.Type = obj.TYPE_CONST
510 pbahead := s.Prog(ppc64.ABEQ)
511 pbahead.To.Type = obj.TYPE_BRANCH
513 p = s.Prog(v.Op.Asm())
514 p.From.Type = obj.TYPE_REG
517 p.To.Type = obj.TYPE_REG
520 pbover := s.Prog(obj.AJMP)
521 pbover.To.Type = obj.TYPE_BRANCH
523 p = s.Prog(ppc64.ANEG)
524 p.To.Type = obj.TYPE_REG
526 p.From.Type = obj.TYPE_REG
533 case ssa.OpPPC64DIVW:
534 // word-width version of above
536 r0 := v.Args[0].Reg()
537 r1 := v.Args[1].Reg()
539 p := s.Prog(ppc64.ACMPW)
540 p.From.Type = obj.TYPE_REG
542 p.To.Type = obj.TYPE_CONST
545 pbahead := s.Prog(ppc64.ABEQ)
546 pbahead.To.Type = obj.TYPE_BRANCH
548 p = s.Prog(v.Op.Asm())
549 p.From.Type = obj.TYPE_REG
552 p.To.Type = obj.TYPE_REG
555 pbover := s.Prog(obj.AJMP)
556 pbover.To.Type = obj.TYPE_BRANCH
558 p = s.Prog(ppc64.ANEG)
559 p.To.Type = obj.TYPE_REG
561 p.From.Type = obj.TYPE_REG
568 case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS,
569 ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU,
570 ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW,
571 ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
572 ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
573 ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
574 ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV:
576 r1 := v.Args[0].Reg()
577 r2 := v.Args[1].Reg()
578 p := s.Prog(v.Op.Asm())
579 p.From.Type = obj.TYPE_REG
582 p.To.Type = obj.TYPE_REG
585 case ssa.OpPPC64ANDCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC:
586 r1 := v.Args[0].Reg()
587 r2 := v.Args[1].Reg()
588 p := s.Prog(v.Op.Asm())
589 p.From.Type = obj.TYPE_REG
592 p.To.Type = obj.TYPE_REG
593 p.To.Reg = ppc64.REGTMP // result is not needed
595 case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
596 p := s.Prog(v.Op.Asm())
597 p.From.Type = obj.TYPE_CONST
598 p.From.Offset = v.AuxInt
599 p.Reg = v.Args[0].Reg()
600 p.To.Type = obj.TYPE_REG
603 case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
605 r1 := v.Args[0].Reg()
606 r2 := v.Args[1].Reg()
607 r3 := v.Args[2].Reg()
609 p := s.Prog(v.Op.Asm())
610 p.From.Type = obj.TYPE_REG
613 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: r2})
614 p.To.Type = obj.TYPE_REG
617 case ssa.OpPPC64MaskIfNotCarry:
619 p := s.Prog(v.Op.Asm())
620 p.From.Type = obj.TYPE_REG
621 p.From.Reg = ppc64.REGZERO
622 p.To.Type = obj.TYPE_REG
625 case ssa.OpPPC64ADDconstForCarry:
626 r1 := v.Args[0].Reg()
627 p := s.Prog(v.Op.Asm())
629 p.From.Type = obj.TYPE_CONST
630 p.From.Offset = v.AuxInt
631 p.To.Type = obj.TYPE_REG
632 p.To.Reg = ppc64.REGTMP // Ignored; this is for the carry effect.
634 case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL,
635 ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW,
636 ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS,
637 ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD:
639 p := s.Prog(v.Op.Asm())
640 p.To.Type = obj.TYPE_REG
642 p.From.Type = obj.TYPE_REG
643 p.From.Reg = v.Args[0].Reg()
645 case ssa.OpPPC64ADDconst, ssa.OpPPC64ANDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst,
646 ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst, ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst:
647 p := s.Prog(v.Op.Asm())
648 p.Reg = v.Args[0].Reg()
649 p.From.Type = obj.TYPE_CONST
650 p.From.Offset = v.AuxInt
651 p.To.Type = obj.TYPE_REG
654 case ssa.OpPPC64ANDCCconst:
655 p := s.Prog(v.Op.Asm())
656 p.Reg = v.Args[0].Reg()
659 p.From.Type = obj.TYPE_CONST
660 p.From.Offset = gc.AuxOffset(v)
662 p.From.Type = obj.TYPE_CONST
663 p.From.Offset = v.AuxInt
666 p.To.Type = obj.TYPE_REG
667 p.To.Reg = ppc64.REGTMP // discard result
669 case ssa.OpPPC64MOVDaddr:
670 switch v.Aux.(type) {
672 v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux)
674 // If aux offset and aux int are both 0, and the same
675 // input and output regs are used, no instruction
676 // needs to be generated, since it would just be
678 if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() {
679 p := s.Prog(ppc64.AMOVD)
680 p.From.Type = obj.TYPE_ADDR
681 p.From.Reg = v.Args[0].Reg()
682 p.From.Offset = v.AuxInt
683 p.To.Type = obj.TYPE_REG
687 case *obj.LSym, *gc.Node:
688 p := s.Prog(ppc64.AMOVD)
689 p.From.Type = obj.TYPE_ADDR
690 p.From.Reg = v.Args[0].Reg()
691 p.To.Type = obj.TYPE_REG
693 gc.AddAux(&p.From, v)
697 case ssa.OpPPC64MOVDconst:
698 p := s.Prog(v.Op.Asm())
699 p.From.Type = obj.TYPE_CONST
700 p.From.Offset = v.AuxInt
701 p.To.Type = obj.TYPE_REG
704 case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst:
705 p := s.Prog(v.Op.Asm())
706 p.From.Type = obj.TYPE_FCONST
707 p.From.Val = math.Float64frombits(uint64(v.AuxInt))
708 p.To.Type = obj.TYPE_REG
711 case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU:
712 p := s.Prog(v.Op.Asm())
713 p.From.Type = obj.TYPE_REG
714 p.From.Reg = v.Args[0].Reg()
715 p.To.Type = obj.TYPE_REG
716 p.To.Reg = v.Args[1].Reg()
718 case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst:
719 p := s.Prog(v.Op.Asm())
720 p.From.Type = obj.TYPE_REG
721 p.From.Reg = v.Args[0].Reg()
722 p.To.Type = obj.TYPE_CONST
723 p.To.Offset = v.AuxInt
725 case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg:
726 // Shift in register to required size
727 p := s.Prog(v.Op.Asm())
728 p.From.Type = obj.TYPE_REG
729 p.From.Reg = v.Args[0].Reg()
731 p.To.Type = obj.TYPE_REG
733 case ssa.OpPPC64MOVDload:
735 // MOVDload uses a DS instruction which requires the offset value of the data to be a multiple of 4.
736 // For offsets known at compile time, a MOVDload won't be selected, but in the case of a go.string,
737 // the offset is not known until link time. If the load of a go.string uses relocation for the
738 // offset field of the instruction, and if the offset is not aligned to 4, then a link error will occur.
739 // To avoid this problem, the full address of the go.string is computed and loaded into the base register,
740 // and that base register is used for the MOVDload using a 0 offset. This problem can only occur with
741 // go.string types because other types will have proper alignment.
744 switch n := v.Aux.(type) {
746 gostring = strings.HasPrefix(n.Name, "go.string.")
749 // Generate full addr of the go.string const
751 p := s.Prog(ppc64.AMOVD)
752 p.From.Type = obj.TYPE_ADDR
753 p.From.Reg = v.Args[0].Reg()
754 gc.AddAux(&p.From, v)
755 p.To.Type = obj.TYPE_REG
757 // Load go.string using 0 offset
758 p = s.Prog(v.Op.Asm())
759 p.From.Type = obj.TYPE_MEM
761 p.To.Type = obj.TYPE_REG
765 // Not a go.string, generate a normal load
768 case ssa.OpPPC64MOVWload, ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
769 p := s.Prog(v.Op.Asm())
770 p.From.Type = obj.TYPE_MEM
771 p.From.Reg = v.Args[0].Reg()
772 gc.AddAux(&p.From, v)
773 p.To.Type = obj.TYPE_REG
776 case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload:
777 p := s.Prog(v.Op.Asm())
778 p.From.Type = obj.TYPE_MEM
779 p.From.Reg = v.Args[0].Reg()
780 p.To.Type = obj.TYPE_REG
783 case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore:
784 p := s.Prog(v.Op.Asm())
785 p.To.Type = obj.TYPE_MEM
786 p.To.Reg = v.Args[0].Reg()
787 p.From.Type = obj.TYPE_REG
788 p.From.Reg = v.Args[1].Reg()
790 case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx,
791 ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx,
792 ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx:
793 p := s.Prog(v.Op.Asm())
794 p.From.Type = obj.TYPE_MEM
795 p.From.Reg = v.Args[0].Reg()
796 p.From.Index = v.Args[1].Reg()
797 p.To.Type = obj.TYPE_REG
800 case ssa.OpPPC64MOVDstorezero, ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero:
801 p := s.Prog(v.Op.Asm())
802 p.From.Type = obj.TYPE_REG
803 p.From.Reg = ppc64.REGZERO
804 p.To.Type = obj.TYPE_MEM
805 p.To.Reg = v.Args[0].Reg()
808 case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore:
809 p := s.Prog(v.Op.Asm())
810 p.From.Type = obj.TYPE_REG
811 p.From.Reg = v.Args[1].Reg()
812 p.To.Type = obj.TYPE_MEM
813 p.To.Reg = v.Args[0].Reg()
816 case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx,
817 ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx,
818 ssa.OpPPC64MOVHBRstoreidx:
819 p := s.Prog(v.Op.Asm())
820 p.From.Type = obj.TYPE_REG
821 p.From.Reg = v.Args[2].Reg()
822 p.To.Index = v.Args[1].Reg()
823 p.To.Type = obj.TYPE_MEM
824 p.To.Reg = v.Args[0].Reg()
826 case ssa.OpPPC64ISEL, ssa.OpPPC64ISELB:
828 // AuxInt value indicates condition: 0=LT 1=GT 2=EQ 4=GE 5=LE 6=NE
829 // ISEL only accepts 0, 1, 2 condition values but the others can be
830 // achieved by swapping operand order.
831 // arg0 ? arg1 : arg2 with conditions LT, GT, EQ
832 // arg0 ? arg2 : arg1 for conditions GE, LE, NE
833 // ISELB is used when a boolean result is needed, returning 0 or 1
834 p := s.Prog(ppc64.AISEL)
835 p.To.Type = obj.TYPE_REG
837 // For ISELB, boolean result 0 or 1. Use R0 for 0 operand to avoid load.
838 r := obj.Addr{Type: obj.TYPE_REG, Reg: ppc64.REG_R0}
839 if v.Op == ssa.OpPPC64ISEL {
840 r.Reg = v.Args[1].Reg()
842 // AuxInt values 4,5,6 implemented with reverse operand order from 0,1,2
845 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[0].Reg()})
847 p.Reg = v.Args[0].Reg()
850 p.From.Type = obj.TYPE_CONST
851 p.From.Offset = v.AuxInt & 3
853 case ssa.OpPPC64LoweredQuadZero, ssa.OpPPC64LoweredQuadZeroShort:
854 // The LoweredQuad code generation
855 // generates STXV instructions on
856 // power9. The Short variation is used
857 // if no loop is generated.
859 // sizes >= 64 generate a loop as follows:
861 // Set up loop counter in CTR, used by BC
862 // XXLXOR clears VS32
863 // XXLXOR VS32,VS32,VS32
864 // MOVD len/64,REG_TMP
874 // Bytes per iteration
880 // Only generate a loop if there is more
883 // Set up VS32 (V0) to hold 0s
884 p := s.Prog(ppc64.AXXLXOR)
885 p.From.Type = obj.TYPE_REG
886 p.From.Reg = ppc64.REG_VS32
887 p.To.Type = obj.TYPE_REG
888 p.To.Reg = ppc64.REG_VS32
889 p.Reg = ppc64.REG_VS32
891 // Set up CTR loop counter
892 p = s.Prog(ppc64.AMOVD)
893 p.From.Type = obj.TYPE_CONST
895 p.To.Type = obj.TYPE_REG
896 p.To.Reg = ppc64.REGTMP
898 p = s.Prog(ppc64.AMOVD)
899 p.From.Type = obj.TYPE_REG
900 p.From.Reg = ppc64.REGTMP
901 p.To.Type = obj.TYPE_REG
902 p.To.Reg = ppc64.REG_CTR
904 // Don't generate padding for
905 // loops with few iterations.
907 p = s.Prog(obj.APCALIGN)
908 p.From.Type = obj.TYPE_CONST
912 // generate 4 STXVs to zero 64 bytes
915 p = s.Prog(ppc64.ASTXV)
916 p.From.Type = obj.TYPE_REG
917 p.From.Reg = ppc64.REG_VS32
918 p.To.Type = obj.TYPE_MEM
919 p.To.Reg = v.Args[0].Reg()
921 // Save the top of loop
925 p = s.Prog(ppc64.ASTXV)
926 p.From.Type = obj.TYPE_REG
927 p.From.Reg = ppc64.REG_VS32
928 p.To.Type = obj.TYPE_MEM
929 p.To.Reg = v.Args[0].Reg()
932 p = s.Prog(ppc64.ASTXV)
933 p.From.Type = obj.TYPE_REG
934 p.From.Reg = ppc64.REG_VS32
935 p.To.Type = obj.TYPE_MEM
936 p.To.Reg = v.Args[0].Reg()
939 p = s.Prog(ppc64.ASTXV)
940 p.From.Type = obj.TYPE_REG
941 p.From.Reg = ppc64.REG_VS32
942 p.To.Type = obj.TYPE_MEM
943 p.To.Reg = v.Args[0].Reg()
946 // Increment address for the
947 // 64 bytes just zeroed.
948 p = s.Prog(ppc64.AADD)
949 p.Reg = v.Args[0].Reg()
950 p.From.Type = obj.TYPE_CONST
952 p.To.Type = obj.TYPE_REG
953 p.To.Reg = v.Args[0].Reg()
955 // Branch back to top of loop
957 // BC with BO_BCTR generates bdnz
958 p = s.Prog(ppc64.ABC)
959 p.From.Type = obj.TYPE_CONST
960 p.From.Offset = ppc64.BO_BCTR
962 p.To.Type = obj.TYPE_BRANCH
965 // When ctr == 1 the loop was not generated but
966 // there are at least 64 bytes to clear, so add
967 // that to the remainder to generate the code
968 // to clear those doublewords
973 // Clear the remainder starting at offset zero
976 if rem >= 16 && ctr <= 1 {
977 // If the XXLXOR hasn't already been
978 // generated, do it here to initialize
980 p := s.Prog(ppc64.AXXLXOR)
981 p.From.Type = obj.TYPE_REG
982 p.From.Reg = ppc64.REG_VS32
983 p.To.Type = obj.TYPE_REG
984 p.To.Reg = ppc64.REG_VS32
985 p.Reg = ppc64.REG_VS32
987 // Generate STXV for 32 or 64
990 p := s.Prog(ppc64.ASTXV)
991 p.From.Type = obj.TYPE_REG
992 p.From.Reg = ppc64.REG_VS32
993 p.To.Type = obj.TYPE_MEM
994 p.To.Reg = v.Args[0].Reg()
997 p = s.Prog(ppc64.ASTXV)
998 p.From.Type = obj.TYPE_REG
999 p.From.Reg = ppc64.REG_VS32
1000 p.To.Type = obj.TYPE_MEM
1001 p.To.Reg = v.Args[0].Reg()
1002 p.To.Offset = offset + 16
1006 // Generate 16 bytes
1008 p := s.Prog(ppc64.ASTXV)
1009 p.From.Type = obj.TYPE_REG
1010 p.From.Reg = ppc64.REG_VS32
1011 p.To.Type = obj.TYPE_MEM
1012 p.To.Reg = v.Args[0].Reg()
1013 p.To.Offset = offset
1018 // first clear as many doublewords as possible
1019 // then clear remaining sizes as available
1021 op, size := ppc64.AMOVB, int64(1)
1024 op, size = ppc64.AMOVD, 8
1026 op, size = ppc64.AMOVW, 4
1028 op, size = ppc64.AMOVH, 2
1031 p.From.Type = obj.TYPE_REG
1032 p.From.Reg = ppc64.REG_R0
1033 p.To.Type = obj.TYPE_MEM
1034 p.To.Reg = v.Args[0].Reg()
1035 p.To.Offset = offset
1040 case ssa.OpPPC64LoweredZero, ssa.OpPPC64LoweredZeroShort:
1042 // Unaligned data doesn't hurt performance
1043 // for these instructions on power8.
1045 // For sizes >= 64 generate a loop as follows:
1047 // Set up loop counter in CTR, used by BC
1048 // XXLXOR VS32,VS32,VS32
1049 // MOVD len/32,REG_TMP
1053 // STXVD2X VS32,(R0)(R20)
1054 // STXVD2X VS32,(R31)(R20)
1058 // any remainder is done as described below
1060 // for sizes < 64 bytes, first clear as many doublewords as possible,
1061 // then handle the remainder
1066 // the remainder bytes are cleared using one or more
1067 // of the following instructions with the appropriate
1068 // offsets depending which instructions are needed
1070 // MOVW R0,n1(R20) 4 bytes
1071 // MOVH R0,n2(R20) 2 bytes
1072 // MOVB R0,n3(R20) 1 byte
1074 // 7 bytes: MOVW, MOVH, MOVB
1075 // 6 bytes: MOVW, MOVH
1076 // 5 bytes: MOVW, MOVB
1077 // 3 bytes: MOVH, MOVB
1079 // each loop iteration does 32 bytes
1080 ctr := v.AuxInt / 32
1083 rem := v.AuxInt % 32
1085 // only generate a loop if there is more
1086 // than 1 iteration.
1088 // Set up VS32 (V0) to hold 0s
1089 p := s.Prog(ppc64.AXXLXOR)
1090 p.From.Type = obj.TYPE_REG
1091 p.From.Reg = ppc64.REG_VS32
1092 p.To.Type = obj.TYPE_REG
1093 p.To.Reg = ppc64.REG_VS32
1094 p.Reg = ppc64.REG_VS32
1096 // Set up CTR loop counter
1097 p = s.Prog(ppc64.AMOVD)
1098 p.From.Type = obj.TYPE_CONST
1100 p.To.Type = obj.TYPE_REG
1101 p.To.Reg = ppc64.REGTMP
1103 p = s.Prog(ppc64.AMOVD)
1104 p.From.Type = obj.TYPE_REG
1105 p.From.Reg = ppc64.REGTMP
1106 p.To.Type = obj.TYPE_REG
1107 p.To.Reg = ppc64.REG_CTR
1109 // Set up R31 to hold index value 16
1110 p = s.Prog(ppc64.AMOVD)
1111 p.From.Type = obj.TYPE_CONST
1113 p.To.Type = obj.TYPE_REG
1114 p.To.Reg = ppc64.REGTMP
1116 // Don't add padding for alignment
1117 // with few loop iterations.
1119 p = s.Prog(obj.APCALIGN)
1120 p.From.Type = obj.TYPE_CONST
1124 // generate 2 STXVD2Xs to store 16 bytes
1125 // when this is a loop then the top must be saved
1127 // This is the top of loop
1129 p = s.Prog(ppc64.ASTXVD2X)
1130 p.From.Type = obj.TYPE_REG
1131 p.From.Reg = ppc64.REG_VS32
1132 p.To.Type = obj.TYPE_MEM
1133 p.To.Reg = v.Args[0].Reg()
1134 p.To.Index = ppc64.REGZERO
1135 // Save the top of loop
1139 p = s.Prog(ppc64.ASTXVD2X)
1140 p.From.Type = obj.TYPE_REG
1141 p.From.Reg = ppc64.REG_VS32
1142 p.To.Type = obj.TYPE_MEM
1143 p.To.Reg = v.Args[0].Reg()
1144 p.To.Index = ppc64.REGTMP
1146 // Increment address for the
1147 // 4 doublewords just zeroed.
1148 p = s.Prog(ppc64.AADD)
1149 p.Reg = v.Args[0].Reg()
1150 p.From.Type = obj.TYPE_CONST
1152 p.To.Type = obj.TYPE_REG
1153 p.To.Reg = v.Args[0].Reg()
1155 // Branch back to top of loop
1157 // BC with BO_BCTR generates bdnz
1158 p = s.Prog(ppc64.ABC)
1159 p.From.Type = obj.TYPE_CONST
1160 p.From.Offset = ppc64.BO_BCTR
1161 p.Reg = ppc64.REG_R0
1162 p.To.Type = obj.TYPE_BRANCH
1166 // when ctr == 1 the loop was not generated but
1167 // there are at least 32 bytes to clear, so add
1168 // that to the remainder to generate the code
1169 // to clear those doublewords
1174 // clear the remainder starting at offset zero
1177 // first clear as many doublewords as possible
1178 // then clear remaining sizes as available
1180 op, size := ppc64.AMOVB, int64(1)
1183 op, size = ppc64.AMOVD, 8
1185 op, size = ppc64.AMOVW, 4
1187 op, size = ppc64.AMOVH, 2
1190 p.From.Type = obj.TYPE_REG
1191 p.From.Reg = ppc64.REG_R0
1192 p.To.Type = obj.TYPE_MEM
1193 p.To.Reg = v.Args[0].Reg()
1194 p.To.Offset = offset
1199 case ssa.OpPPC64LoweredMove, ssa.OpPPC64LoweredMoveShort:
1201 bytesPerLoop := int64(32)
1202 // This will be used when moving more
1203 // than 8 bytes. Moves start with
1204 // as many 8 byte moves as possible, then
1205 // 4, 2, or 1 byte(s) as remaining. This will
1206 // work and be efficient for power8 or later.
1207 // If there are 64 or more bytes, then a
1208 // loop is generated to move 32 bytes and
1209 // update the src and dst addresses on each
1210 // iteration. When < 64 bytes, the appropriate
1211 // number of moves are generated based on the
1213 // When moving >= 64 bytes a loop is used
1214 // MOVD len/32,REG_TMP
1218 // LXVD2X (R0)(R21),VS32
1219 // LXVD2X (R31)(R21),VS33
1221 // STXVD2X VS32,(R0)(R20)
1222 // STXVD2X VS33,(R31)(R20)
1225 // Bytes not moved by this loop are moved
1226 // with a combination of the following instructions,
1227 // starting with the largest sizes and generating as
1228 // many as needed, using the appropriate offset value.
1238 // Each loop iteration moves 32 bytes
1239 ctr := v.AuxInt / bytesPerLoop
1241 // Remainder after the loop
1242 rem := v.AuxInt % bytesPerLoop
1244 dstReg := v.Args[0].Reg()
1245 srcReg := v.Args[1].Reg()
1247 // The set of registers used here, must match the clobbered reg list
1253 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1256 p := s.Prog(ppc64.AMOVD)
1257 p.From.Type = obj.TYPE_CONST
1259 p.To.Type = obj.TYPE_REG
1260 p.To.Reg = ppc64.REGTMP
1262 p = s.Prog(ppc64.AMOVD)
1263 p.From.Type = obj.TYPE_REG
1264 p.From.Reg = ppc64.REGTMP
1265 p.To.Type = obj.TYPE_REG
1266 p.To.Reg = ppc64.REG_CTR
1268 // Use REGTMP as index reg
1269 p = s.Prog(ppc64.AMOVD)
1270 p.From.Type = obj.TYPE_CONST
1272 p.To.Type = obj.TYPE_REG
1273 p.To.Reg = ppc64.REGTMP
1275 // Don't adding padding for
1276 // alignment with small iteration
1279 p = s.Prog(obj.APCALIGN)
1280 p.From.Type = obj.TYPE_CONST
1284 // Generate 16 byte loads and stores.
1285 // Use temp register for index (16)
1286 // on the second one.
1288 p = s.Prog(ppc64.ALXVD2X)
1289 p.From.Type = obj.TYPE_MEM
1291 p.From.Index = ppc64.REGZERO
1292 p.To.Type = obj.TYPE_REG
1293 p.To.Reg = ppc64.REG_VS32
1297 p = s.Prog(ppc64.ALXVD2X)
1298 p.From.Type = obj.TYPE_MEM
1300 p.From.Index = ppc64.REGTMP
1301 p.To.Type = obj.TYPE_REG
1302 p.To.Reg = ppc64.REG_VS33
1304 // increment the src reg for next iteration
1305 p = s.Prog(ppc64.AADD)
1307 p.From.Type = obj.TYPE_CONST
1308 p.From.Offset = bytesPerLoop
1309 p.To.Type = obj.TYPE_REG
1312 // generate 16 byte stores
1313 p = s.Prog(ppc64.ASTXVD2X)
1314 p.From.Type = obj.TYPE_REG
1315 p.From.Reg = ppc64.REG_VS32
1316 p.To.Type = obj.TYPE_MEM
1318 p.To.Index = ppc64.REGZERO
1320 p = s.Prog(ppc64.ASTXVD2X)
1321 p.From.Type = obj.TYPE_REG
1322 p.From.Reg = ppc64.REG_VS33
1323 p.To.Type = obj.TYPE_MEM
1325 p.To.Index = ppc64.REGTMP
1327 // increment the dst reg for next iteration
1328 p = s.Prog(ppc64.AADD)
1330 p.From.Type = obj.TYPE_CONST
1331 p.From.Offset = bytesPerLoop
1332 p.To.Type = obj.TYPE_REG
1335 // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1337 p = s.Prog(ppc64.ABC)
1338 p.From.Type = obj.TYPE_CONST
1339 p.From.Offset = ppc64.BO_BCTR
1340 p.Reg = ppc64.REG_R0
1341 p.To.Type = obj.TYPE_BRANCH
1344 // srcReg and dstReg were incremented in the loop, so
1345 // later instructions start with offset 0.
1349 // No loop was generated for one iteration, so
1350 // add 32 bytes to the remainder to move those bytes.
1356 // Generate 16 byte loads and stores.
1357 // Use temp register for index (value 16)
1358 // on the second one.
1359 p := s.Prog(ppc64.ALXVD2X)
1360 p.From.Type = obj.TYPE_MEM
1362 p.From.Index = ppc64.REGZERO
1363 p.To.Type = obj.TYPE_REG
1364 p.To.Reg = ppc64.REG_VS32
1366 p = s.Prog(ppc64.ASTXVD2X)
1367 p.From.Type = obj.TYPE_REG
1368 p.From.Reg = ppc64.REG_VS32
1369 p.To.Type = obj.TYPE_MEM
1371 p.To.Index = ppc64.REGZERO
1377 // Use REGTMP as index reg
1378 p := s.Prog(ppc64.AMOVD)
1379 p.From.Type = obj.TYPE_CONST
1381 p.To.Type = obj.TYPE_REG
1382 p.To.Reg = ppc64.REGTMP
1384 p = s.Prog(ppc64.ALXVD2X)
1385 p.From.Type = obj.TYPE_MEM
1387 p.From.Index = ppc64.REGTMP
1388 p.To.Type = obj.TYPE_REG
1389 p.To.Reg = ppc64.REG_VS32
1391 p = s.Prog(ppc64.ASTXVD2X)
1392 p.From.Type = obj.TYPE_REG
1393 p.From.Reg = ppc64.REG_VS32
1394 p.To.Type = obj.TYPE_MEM
1396 p.To.Index = ppc64.REGTMP
1403 // Generate all the remaining load and store pairs, starting with
1404 // as many 8 byte moves as possible, then 4, 2, 1.
1406 op, size := ppc64.AMOVB, int64(1)
1409 op, size = ppc64.AMOVD, 8
1411 op, size = ppc64.AMOVW, 4
1413 op, size = ppc64.AMOVH, 2
1417 p.To.Type = obj.TYPE_REG
1418 p.To.Reg = ppc64.REGTMP
1419 p.From.Type = obj.TYPE_MEM
1421 p.From.Offset = offset
1425 p.From.Type = obj.TYPE_REG
1426 p.From.Reg = ppc64.REGTMP
1427 p.To.Type = obj.TYPE_MEM
1429 p.To.Offset = offset
1434 case ssa.OpPPC64LoweredQuadMove, ssa.OpPPC64LoweredQuadMoveShort:
1435 bytesPerLoop := int64(64)
1436 // This is used when moving more
1437 // than 8 bytes on power9. Moves start with
1438 // as many 8 byte moves as possible, then
1439 // 4, 2, or 1 byte(s) as remaining. This will
1440 // work and be efficient for power8 or later.
1441 // If there are 64 or more bytes, then a
1442 // loop is generated to move 32 bytes and
1443 // update the src and dst addresses on each
1444 // iteration. When < 64 bytes, the appropriate
1445 // number of moves are generated based on the
1447 // When moving >= 64 bytes a loop is used
1448 // MOVD len/32,REG_TMP
1455 // STXV VS33,16(R20)
1458 // Bytes not moved by this loop are moved
1459 // with a combination of the following instructions,
1460 // starting with the largest sizes and generating as
1461 // many as needed, using the appropriate offset value.
1471 // Each loop iteration moves 32 bytes
1472 ctr := v.AuxInt / bytesPerLoop
1474 // Remainder after the loop
1475 rem := v.AuxInt % bytesPerLoop
1477 dstReg := v.Args[0].Reg()
1478 srcReg := v.Args[1].Reg()
1485 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1488 p := s.Prog(ppc64.AMOVD)
1489 p.From.Type = obj.TYPE_CONST
1491 p.To.Type = obj.TYPE_REG
1492 p.To.Reg = ppc64.REGTMP
1494 p = s.Prog(ppc64.AMOVD)
1495 p.From.Type = obj.TYPE_REG
1496 p.From.Reg = ppc64.REGTMP
1497 p.To.Type = obj.TYPE_REG
1498 p.To.Reg = ppc64.REG_CTR
1500 p = s.Prog(obj.APCALIGN)
1501 p.From.Type = obj.TYPE_CONST
1504 // Generate 16 byte loads and stores.
1505 p = s.Prog(ppc64.ALXV)
1506 p.From.Type = obj.TYPE_MEM
1508 p.From.Offset = offset
1509 p.To.Type = obj.TYPE_REG
1510 p.To.Reg = ppc64.REG_VS32
1514 p = s.Prog(ppc64.ALXV)
1515 p.From.Type = obj.TYPE_MEM
1517 p.From.Offset = offset + 16
1518 p.To.Type = obj.TYPE_REG
1519 p.To.Reg = ppc64.REG_VS33
1521 // generate 16 byte stores
1522 p = s.Prog(ppc64.ASTXV)
1523 p.From.Type = obj.TYPE_REG
1524 p.From.Reg = ppc64.REG_VS32
1525 p.To.Type = obj.TYPE_MEM
1527 p.To.Offset = offset
1529 p = s.Prog(ppc64.ASTXV)
1530 p.From.Type = obj.TYPE_REG
1531 p.From.Reg = ppc64.REG_VS33
1532 p.To.Type = obj.TYPE_MEM
1534 p.To.Offset = offset + 16
1536 // Generate 16 byte loads and stores.
1537 p = s.Prog(ppc64.ALXV)
1538 p.From.Type = obj.TYPE_MEM
1540 p.From.Offset = offset + 32
1541 p.To.Type = obj.TYPE_REG
1542 p.To.Reg = ppc64.REG_VS32
1544 p = s.Prog(ppc64.ALXV)
1545 p.From.Type = obj.TYPE_MEM
1547 p.From.Offset = offset + 48
1548 p.To.Type = obj.TYPE_REG
1549 p.To.Reg = ppc64.REG_VS33
1551 // generate 16 byte stores
1552 p = s.Prog(ppc64.ASTXV)
1553 p.From.Type = obj.TYPE_REG
1554 p.From.Reg = ppc64.REG_VS32
1555 p.To.Type = obj.TYPE_MEM
1557 p.To.Offset = offset + 32
1559 p = s.Prog(ppc64.ASTXV)
1560 p.From.Type = obj.TYPE_REG
1561 p.From.Reg = ppc64.REG_VS33
1562 p.To.Type = obj.TYPE_MEM
1564 p.To.Offset = offset + 48
1566 // increment the src reg for next iteration
1567 p = s.Prog(ppc64.AADD)
1569 p.From.Type = obj.TYPE_CONST
1570 p.From.Offset = bytesPerLoop
1571 p.To.Type = obj.TYPE_REG
1574 // increment the dst reg for next iteration
1575 p = s.Prog(ppc64.AADD)
1577 p.From.Type = obj.TYPE_CONST
1578 p.From.Offset = bytesPerLoop
1579 p.To.Type = obj.TYPE_REG
1582 // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1584 p = s.Prog(ppc64.ABC)
1585 p.From.Type = obj.TYPE_CONST
1586 p.From.Offset = ppc64.BO_BCTR
1587 p.Reg = ppc64.REG_R0
1588 p.To.Type = obj.TYPE_BRANCH
1591 // srcReg and dstReg were incremented in the loop, so
1592 // later instructions start with offset 0.
1596 // No loop was generated for one iteration, so
1597 // add 32 bytes to the remainder to move those bytes.
1602 p := s.Prog(ppc64.ALXV)
1603 p.From.Type = obj.TYPE_MEM
1605 p.To.Type = obj.TYPE_REG
1606 p.To.Reg = ppc64.REG_VS32
1608 p = s.Prog(ppc64.ALXV)
1609 p.From.Type = obj.TYPE_MEM
1612 p.To.Type = obj.TYPE_REG
1613 p.To.Reg = ppc64.REG_VS33
1615 p = s.Prog(ppc64.ASTXV)
1616 p.From.Type = obj.TYPE_REG
1617 p.From.Reg = ppc64.REG_VS32
1618 p.To.Type = obj.TYPE_MEM
1621 p = s.Prog(ppc64.ASTXV)
1622 p.From.Type = obj.TYPE_REG
1623 p.From.Reg = ppc64.REG_VS33
1624 p.To.Type = obj.TYPE_MEM
1633 // Generate 16 byte loads and stores.
1634 p := s.Prog(ppc64.ALXV)
1635 p.From.Type = obj.TYPE_MEM
1637 p.From.Offset = offset
1638 p.To.Type = obj.TYPE_REG
1639 p.To.Reg = ppc64.REG_VS32
1641 p = s.Prog(ppc64.ASTXV)
1642 p.From.Type = obj.TYPE_REG
1643 p.From.Reg = ppc64.REG_VS32
1644 p.To.Type = obj.TYPE_MEM
1646 p.To.Offset = offset
1652 p := s.Prog(ppc64.ALXV)
1653 p.From.Type = obj.TYPE_MEM
1655 p.From.Offset = offset
1656 p.To.Type = obj.TYPE_REG
1657 p.To.Reg = ppc64.REG_VS32
1659 p = s.Prog(ppc64.ASTXV)
1660 p.From.Type = obj.TYPE_REG
1661 p.From.Reg = ppc64.REG_VS32
1662 p.To.Type = obj.TYPE_MEM
1664 p.To.Offset = offset
1670 // Generate all the remaining load and store pairs, starting with
1671 // as many 8 byte moves as possible, then 4, 2, 1.
1673 op, size := ppc64.AMOVB, int64(1)
1676 op, size = ppc64.AMOVD, 8
1678 op, size = ppc64.AMOVW, 4
1680 op, size = ppc64.AMOVH, 2
1684 p.To.Type = obj.TYPE_REG
1685 p.To.Reg = ppc64.REGTMP
1686 p.From.Type = obj.TYPE_MEM
1688 p.From.Offset = offset
1692 p.From.Type = obj.TYPE_REG
1693 p.From.Reg = ppc64.REGTMP
1694 p.To.Type = obj.TYPE_MEM
1696 p.To.Offset = offset
1701 case ssa.OpPPC64CALLstatic:
1704 case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter:
1705 p := s.Prog(ppc64.AMOVD)
1706 p.From.Type = obj.TYPE_REG
1707 p.From.Reg = v.Args[0].Reg()
1708 p.To.Type = obj.TYPE_REG
1709 p.To.Reg = ppc64.REG_LR
1711 if v.Args[0].Reg() != ppc64.REG_R12 {
1712 v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg)
1716 pp.To.Reg = ppc64.REG_LR
1718 if gc.Ctxt.Flag_shared {
1719 // When compiling Go into PIC, the function we just
1720 // called via pointer might have been implemented in
1721 // a separate module and so overwritten the TOC
1722 // pointer in R2; reload it.
1723 q := s.Prog(ppc64.AMOVD)
1724 q.From.Type = obj.TYPE_MEM
1726 q.From.Reg = ppc64.REGSP
1727 q.To.Type = obj.TYPE_REG
1728 q.To.Reg = ppc64.REG_R2
1731 case ssa.OpPPC64LoweredWB:
1732 p := s.Prog(obj.ACALL)
1733 p.To.Type = obj.TYPE_MEM
1734 p.To.Name = obj.NAME_EXTERN
1735 p.To.Sym = v.Aux.(*obj.LSym)
1737 case ssa.OpPPC64LoweredPanicBoundsA, ssa.OpPPC64LoweredPanicBoundsB, ssa.OpPPC64LoweredPanicBoundsC:
1738 p := s.Prog(obj.ACALL)
1739 p.To.Type = obj.TYPE_MEM
1740 p.To.Name = obj.NAME_EXTERN
1741 p.To.Sym = gc.BoundsCheckFunc[v.AuxInt]
1742 s.UseArgs(16) // space used in callee args area by assembly stubs
1744 case ssa.OpPPC64LoweredNilCheck:
1745 if objabi.GOOS == "aix" {
1749 // NOP (so the BNE has somewhere to land)
1752 p := s.Prog(ppc64.ACMP)
1753 p.From.Type = obj.TYPE_REG
1754 p.From.Reg = v.Args[0].Reg()
1755 p.To.Type = obj.TYPE_REG
1756 p.To.Reg = ppc64.REG_R0
1759 p2 := s.Prog(ppc64.ABNE)
1760 p2.To.Type = obj.TYPE_BRANCH
1763 // Write at 0 is forbidden and will trigger a SIGSEGV
1764 p = s.Prog(ppc64.AMOVW)
1765 p.From.Type = obj.TYPE_REG
1766 p.From.Reg = ppc64.REG_R0
1767 p.To.Type = obj.TYPE_MEM
1768 p.To.Reg = ppc64.REG_R0
1770 // NOP (so the BNE has somewhere to land)
1771 nop := s.Prog(obj.ANOP)
1775 // Issue a load which will fault if arg is nil.
1776 p := s.Prog(ppc64.AMOVBZ)
1777 p.From.Type = obj.TYPE_MEM
1778 p.From.Reg = v.Args[0].Reg()
1779 gc.AddAux(&p.From, v)
1780 p.To.Type = obj.TYPE_REG
1781 p.To.Reg = ppc64.REGTMP
1783 if logopt.Enabled() {
1784 logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
1786 if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
1787 gc.Warnl(v.Pos, "generated nil check")
1790 // These should be resolved by rules and not make it here.
1791 case ssa.OpPPC64Equal, ssa.OpPPC64NotEqual, ssa.OpPPC64LessThan, ssa.OpPPC64FLessThan,
1792 ssa.OpPPC64LessEqual, ssa.OpPPC64GreaterThan, ssa.OpPPC64FGreaterThan, ssa.OpPPC64GreaterEqual,
1793 ssa.OpPPC64FLessEqual, ssa.OpPPC64FGreaterEqual:
1794 v.Fatalf("Pseudo-op should not make it to codegen: %s ###\n", v.LongString())
1795 case ssa.OpPPC64InvertFlags:
1796 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
1797 case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT, ssa.OpPPC64FlagCarrySet, ssa.OpPPC64FlagCarryClear:
1798 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
1800 // TODO: implement for clobberdead experiment. Nop is ok for now.
1802 v.Fatalf("genValue not implemented: %s", v.LongString())
1806 var blockJump = [...]struct {
1808 asmeq, invasmun bool
1810 ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false},
1811 ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false},
1813 ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false},
1814 ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false},
1815 ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false},
1816 ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false},
1818 // TODO: need to work FP comparisons into block jumps
1819 ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false},
1820 ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN
1821 ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN
1822 ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false},
1825 func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
1827 case ssa.BlockDefer:
1828 // defer returns in R3:
1829 // 0 if we should continue executing
1830 // 1 if we should jump to deferreturn call
1831 p := s.Prog(ppc64.ACMP)
1832 p.From.Type = obj.TYPE_REG
1833 p.From.Reg = ppc64.REG_R3
1834 p.To.Type = obj.TYPE_REG
1835 p.To.Reg = ppc64.REG_R0
1837 p = s.Prog(ppc64.ABNE)
1838 p.To.Type = obj.TYPE_BRANCH
1839 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
1840 if b.Succs[0].Block() != next {
1841 p := s.Prog(obj.AJMP)
1842 p.To.Type = obj.TYPE_BRANCH
1843 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
1846 case ssa.BlockPlain:
1847 if b.Succs[0].Block() != next {
1848 p := s.Prog(obj.AJMP)
1849 p.To.Type = obj.TYPE_BRANCH
1850 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
1855 case ssa.BlockRetJmp:
1856 p := s.Prog(obj.AJMP)
1857 p.To.Type = obj.TYPE_MEM
1858 p.To.Name = obj.NAME_EXTERN
1859 p.To.Sym = b.Aux.(*obj.LSym)
1861 case ssa.BlockPPC64EQ, ssa.BlockPPC64NE,
1862 ssa.BlockPPC64LT, ssa.BlockPPC64GE,
1863 ssa.BlockPPC64LE, ssa.BlockPPC64GT,
1864 ssa.BlockPPC64FLT, ssa.BlockPPC64FGE,
1865 ssa.BlockPPC64FLE, ssa.BlockPPC64FGT:
1866 jmp := blockJump[b.Kind]
1868 case b.Succs[0].Block():
1869 s.Br(jmp.invasm, b.Succs[1].Block())
1871 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
1872 s.Br(ppc64.ABVS, b.Succs[1].Block())
1874 case b.Succs[1].Block():
1875 s.Br(jmp.asm, b.Succs[0].Block())
1877 s.Br(ppc64.ABEQ, b.Succs[0].Block())
1880 if b.Likely != ssa.BranchUnlikely {
1881 s.Br(jmp.asm, b.Succs[0].Block())
1883 s.Br(ppc64.ABEQ, b.Succs[0].Block())
1885 s.Br(obj.AJMP, b.Succs[1].Block())
1887 s.Br(jmp.invasm, b.Succs[1].Block())
1889 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
1890 s.Br(ppc64.ABVS, b.Succs[1].Block())
1892 s.Br(obj.AJMP, b.Succs[0].Block())
1896 b.Fatalf("branch not implemented: %s", b.LongString())