1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
8 "cmd/compile/internal/base"
9 "cmd/compile/internal/ir"
10 "cmd/compile/internal/logopt"
11 "cmd/compile/internal/objw"
12 "cmd/compile/internal/ssa"
13 "cmd/compile/internal/ssagen"
14 "cmd/compile/internal/types"
16 "cmd/internal/obj/ppc64"
22 // ssaMarkMoves marks any MOVXconst ops that need to avoid clobbering flags.
23 func ssaMarkMoves(s *ssagen.State, b *ssa.Block) {
24 // flive := b.FlagsLiveAtEnd
25 // if b.Control != nil && b.Control.Type.IsFlags() {
28 // for i := len(b.Values) - 1; i >= 0; i-- {
30 // if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) {
31 // // The "mark" is any non-nil Aux value.
34 // if v.Type.IsFlags() {
37 // for _, a := range v.Args {
38 // if a.Type.IsFlags() {
45 // loadByType returns the load instruction of the given type.
46 func loadByType(t *types.Type) obj.As {
78 panic("bad load type")
81 // storeByType returns the store instruction of the given type.
82 func storeByType(t *types.Type) obj.As {
102 panic("bad store type")
105 func ssaGenValue(s *ssagen.State, v *ssa.Value) {
128 case ssa.OpPPC64LoweredAtomicAnd8,
129 ssa.OpPPC64LoweredAtomicAnd32,
130 ssa.OpPPC64LoweredAtomicOr8,
131 ssa.OpPPC64LoweredAtomicOr32:
133 // LBAR/LWAR (Rarg0), Rtmp
134 // AND/OR Rarg1, Rtmp
135 // STBCCC/STWCCC Rtmp, (Rarg0)
139 if v.Op == ssa.OpPPC64LoweredAtomicAnd32 || v.Op == ssa.OpPPC64LoweredAtomicOr32 {
143 r0 := v.Args[0].Reg()
144 r1 := v.Args[1].Reg()
145 // LWSYNC - Assuming shared data not write-through-required nor
146 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
147 plwsync := s.Prog(ppc64.ALWSYNC)
148 plwsync.To.Type = obj.TYPE_NONE
151 p.From.Type = obj.TYPE_MEM
153 p.To.Type = obj.TYPE_REG
154 p.To.Reg = ppc64.REGTMP
156 p1 := s.Prog(v.Op.Asm())
157 p1.From.Type = obj.TYPE_REG
159 p1.To.Type = obj.TYPE_REG
160 p1.To.Reg = ppc64.REGTMP
163 p2.From.Type = obj.TYPE_REG
164 p2.From.Reg = ppc64.REGTMP
165 p2.To.Type = obj.TYPE_MEM
167 p2.RegTo2 = ppc64.REGTMP
169 p3 := s.Prog(ppc64.ABNE)
170 p3.To.Type = obj.TYPE_BRANCH
173 case ssa.OpPPC64LoweredAtomicAdd32,
174 ssa.OpPPC64LoweredAtomicAdd64:
176 // LDAR/LWAR (Rarg0), Rout
178 // STDCCC/STWCCC Rout, (Rarg0)
180 // MOVW Rout,Rout (if Add32)
183 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
187 r0 := v.Args[0].Reg()
188 r1 := v.Args[1].Reg()
190 // LWSYNC - Assuming shared data not write-through-required nor
191 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
192 plwsync := s.Prog(ppc64.ALWSYNC)
193 plwsync.To.Type = obj.TYPE_NONE
196 p.From.Type = obj.TYPE_MEM
198 p.To.Type = obj.TYPE_REG
201 p1 := s.Prog(ppc64.AADD)
202 p1.From.Type = obj.TYPE_REG
205 p1.To.Type = obj.TYPE_REG
208 p3.From.Type = obj.TYPE_REG
210 p3.To.Type = obj.TYPE_MEM
213 p4 := s.Prog(ppc64.ABNE)
214 p4.To.Type = obj.TYPE_BRANCH
217 // Ensure a 32 bit result
218 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
219 p5 := s.Prog(ppc64.AMOVWZ)
220 p5.To.Type = obj.TYPE_REG
222 p5.From.Type = obj.TYPE_REG
226 case ssa.OpPPC64LoweredAtomicExchange32,
227 ssa.OpPPC64LoweredAtomicExchange64:
229 // LDAR/LWAR (Rarg0), Rout
230 // STDCCC/STWCCC Rout, (Rarg0)
235 if v.Op == ssa.OpPPC64LoweredAtomicExchange32 {
239 r0 := v.Args[0].Reg()
240 r1 := v.Args[1].Reg()
242 // LWSYNC - Assuming shared data not write-through-required nor
243 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
244 plwsync := s.Prog(ppc64.ALWSYNC)
245 plwsync.To.Type = obj.TYPE_NONE
248 p.From.Type = obj.TYPE_MEM
250 p.To.Type = obj.TYPE_REG
254 p1.From.Type = obj.TYPE_REG
256 p1.To.Type = obj.TYPE_MEM
259 p2 := s.Prog(ppc64.ABNE)
260 p2.To.Type = obj.TYPE_BRANCH
263 pisync := s.Prog(ppc64.AISYNC)
264 pisync.To.Type = obj.TYPE_NONE
266 case ssa.OpPPC64LoweredAtomicLoad8,
267 ssa.OpPPC64LoweredAtomicLoad32,
268 ssa.OpPPC64LoweredAtomicLoad64,
269 ssa.OpPPC64LoweredAtomicLoadPtr:
271 // MOVB/MOVD/MOVW (Rarg0), Rout
278 case ssa.OpPPC64LoweredAtomicLoad8:
280 case ssa.OpPPC64LoweredAtomicLoad32:
284 arg0 := v.Args[0].Reg()
286 // SYNC when AuxInt == 1; otherwise, load-acquire
288 psync := s.Prog(ppc64.ASYNC)
289 psync.To.Type = obj.TYPE_NONE
293 p.From.Type = obj.TYPE_MEM
295 p.To.Type = obj.TYPE_REG
299 p1.From.Type = obj.TYPE_REG
301 p1.To.Type = obj.TYPE_REG
304 p2 := s.Prog(ppc64.ABNE)
305 p2.To.Type = obj.TYPE_BRANCH
307 pisync := s.Prog(ppc64.AISYNC)
308 pisync.To.Type = obj.TYPE_NONE
309 p2.To.SetTarget(pisync)
311 case ssa.OpPPC64LoweredAtomicStore8,
312 ssa.OpPPC64LoweredAtomicStore32,
313 ssa.OpPPC64LoweredAtomicStore64:
315 // MOVB/MOVW/MOVD arg1,(arg0)
318 case ssa.OpPPC64LoweredAtomicStore8:
320 case ssa.OpPPC64LoweredAtomicStore32:
323 arg0 := v.Args[0].Reg()
324 arg1 := v.Args[1].Reg()
325 // If AuxInt == 0, LWSYNC (Store-Release), else SYNC
327 syncOp := ppc64.ASYNC
329 syncOp = ppc64.ALWSYNC
331 psync := s.Prog(syncOp)
332 psync.To.Type = obj.TYPE_NONE
335 p.To.Type = obj.TYPE_MEM
337 p.From.Type = obj.TYPE_REG
340 case ssa.OpPPC64LoweredAtomicCas64,
341 ssa.OpPPC64LoweredAtomicCas32:
345 // LDAR (Rarg0), MutexHint, Rtmp
348 // STDCCC Rarg2, (Rarg0)
352 // LWSYNC // Only for sequential consistency; not required in CasRel.
356 if v.Op == ssa.OpPPC64LoweredAtomicCas32 {
361 r0 := v.Args[0].Reg()
362 r1 := v.Args[1].Reg()
363 r2 := v.Args[2].Reg()
365 // Initialize return value to false
366 p := s.Prog(ppc64.AMOVD)
367 p.From.Type = obj.TYPE_CONST
369 p.To.Type = obj.TYPE_REG
371 // LWSYNC - Assuming shared data not write-through-required nor
372 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
373 plwsync1 := s.Prog(ppc64.ALWSYNC)
374 plwsync1.To.Type = obj.TYPE_NONE
377 p0.From.Type = obj.TYPE_MEM
379 p0.To.Type = obj.TYPE_REG
380 p0.To.Reg = ppc64.REGTMP
381 // If it is a Compare-and-Swap-Release operation, set the EH field with
384 p0.AddRestSourceConst(0)
388 p1.From.Type = obj.TYPE_REG
390 p1.To.Reg = ppc64.REGTMP
391 p1.To.Type = obj.TYPE_REG
392 // BNE done with return value = false
393 p2 := s.Prog(ppc64.ABNE)
394 p2.To.Type = obj.TYPE_BRANCH
397 p3.From.Type = obj.TYPE_REG
399 p3.To.Type = obj.TYPE_MEM
402 p4 := s.Prog(ppc64.ABNE)
403 p4.To.Type = obj.TYPE_BRANCH
406 p5 := s.Prog(ppc64.AMOVD)
407 p5.From.Type = obj.TYPE_CONST
409 p5.To.Type = obj.TYPE_REG
411 // LWSYNC - Assuming shared data not write-through-required nor
412 // caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b.
413 // If the operation is a CAS-Release, then synchronization is not necessary.
415 plwsync2 := s.Prog(ppc64.ALWSYNC)
416 plwsync2.To.Type = obj.TYPE_NONE
417 p2.To.SetTarget(plwsync2)
420 p6 := s.Prog(obj.ANOP)
424 case ssa.OpPPC64LoweredPubBarrier:
428 case ssa.OpPPC64LoweredGetClosurePtr:
429 // Closure pointer is R11 (already)
430 ssagen.CheckLoweredGetClosurePtr(v)
432 case ssa.OpPPC64LoweredGetCallerSP:
433 // caller's SP is FixedFrameSize below the address of the first arg
434 p := s.Prog(ppc64.AMOVD)
435 p.From.Type = obj.TYPE_ADDR
436 p.From.Offset = -base.Ctxt.Arch.FixedFrameSize
437 p.From.Name = obj.NAME_PARAM
438 p.To.Type = obj.TYPE_REG
441 case ssa.OpPPC64LoweredGetCallerPC:
442 p := s.Prog(obj.AGETCALLERPC)
443 p.To.Type = obj.TYPE_REG
446 case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F:
447 // input is already rounded
450 loadOp := loadByType(v.Type)
452 ssagen.AddrAuto(&p.From, v.Args[0])
453 p.To.Type = obj.TYPE_REG
457 storeOp := storeByType(v.Type)
459 p.From.Type = obj.TYPE_REG
460 p.From.Reg = v.Args[0].Reg()
461 ssagen.AddrAuto(&p.To, v)
463 case ssa.OpArgIntReg, ssa.OpArgFloatReg:
464 // The assembler needs to wrap the entry safepoint/stack growth code with spill/unspill
465 // The loop only runs once.
466 for _, a := range v.Block.Func.RegArgs {
467 // Pass the spill/unspill information along to the assembler, offset by size of
468 // the saved LR slot.
469 addr := ssagen.SpillSlotAddr(a, ppc64.REGSP, base.Ctxt.Arch.FixedFrameSize)
470 s.FuncInfo().AddSpill(
471 obj.RegSpill{Reg: a.Reg, Addr: addr, Unspill: loadByType(a.Type), Spill: storeByType(a.Type)})
473 v.Block.Func.RegArgs = nil
475 ssagen.CheckArgReg(v)
477 case ssa.OpPPC64DIVD:
487 r0 := v.Args[0].Reg()
488 r1 := v.Args[1].Reg()
490 p := s.Prog(ppc64.ACMP)
491 p.From.Type = obj.TYPE_REG
493 p.To.Type = obj.TYPE_CONST
496 pbahead := s.Prog(ppc64.ABEQ)
497 pbahead.To.Type = obj.TYPE_BRANCH
499 p = s.Prog(v.Op.Asm())
500 p.From.Type = obj.TYPE_REG
503 p.To.Type = obj.TYPE_REG
506 pbover := s.Prog(obj.AJMP)
507 pbover.To.Type = obj.TYPE_BRANCH
509 p = s.Prog(ppc64.ANEG)
510 p.To.Type = obj.TYPE_REG
512 p.From.Type = obj.TYPE_REG
514 pbahead.To.SetTarget(p)
517 pbover.To.SetTarget(p)
519 case ssa.OpPPC64DIVW:
520 // word-width version of above
522 r0 := v.Args[0].Reg()
523 r1 := v.Args[1].Reg()
525 p := s.Prog(ppc64.ACMPW)
526 p.From.Type = obj.TYPE_REG
528 p.To.Type = obj.TYPE_CONST
531 pbahead := s.Prog(ppc64.ABEQ)
532 pbahead.To.Type = obj.TYPE_BRANCH
534 p = s.Prog(v.Op.Asm())
535 p.From.Type = obj.TYPE_REG
538 p.To.Type = obj.TYPE_REG
541 pbover := s.Prog(obj.AJMP)
542 pbover.To.Type = obj.TYPE_BRANCH
544 p = s.Prog(ppc64.ANEG)
545 p.To.Type = obj.TYPE_REG
547 p.From.Type = obj.TYPE_REG
549 pbahead.To.SetTarget(p)
552 pbover.To.SetTarget(p)
554 case ssa.OpPPC64CLRLSLWI:
556 r1 := v.Args[0].Reg()
558 p := s.Prog(v.Op.Asm())
559 // clrlslwi ra,rs,mb,sh will become rlwinm ra,rs,sh,mb-sh,31-sh as described in ISA
560 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
561 p.AddRestSourceConst(ssa.GetPPC64Shiftsh(shifts))
563 p.To.Type = obj.TYPE_REG
566 case ssa.OpPPC64CLRLSLDI:
568 r1 := v.Args[0].Reg()
570 p := s.Prog(v.Op.Asm())
571 // clrlsldi ra,rs,mb,sh will become rldic ra,rs,sh,mb-sh
572 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
573 p.AddRestSourceConst(ssa.GetPPC64Shiftsh(shifts))
575 p.To.Type = obj.TYPE_REG
578 case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS,
579 ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU,
580 ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW,
581 ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
582 ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
583 ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
584 ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV,
585 ssa.OpPPC64MODUD, ssa.OpPPC64MODSD, ssa.OpPPC64MODUW, ssa.OpPPC64MODSW:
587 r1 := v.Args[0].Reg()
588 r2 := v.Args[1].Reg()
589 p := s.Prog(v.Op.Asm())
590 p.From.Type = obj.TYPE_REG
593 p.To.Type = obj.TYPE_REG
596 case ssa.OpPPC64ADDCC, ssa.OpPPC64ANDCC, ssa.OpPPC64SUBCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC, ssa.OpPPC64NORCC,
598 r1 := v.Args[0].Reg()
599 r2 := v.Args[1].Reg()
600 p := s.Prog(v.Op.Asm())
601 p.From.Type = obj.TYPE_REG
604 p.To.Type = obj.TYPE_REG
607 case ssa.OpPPC64NEGCC, ssa.OpPPC64CNTLZDCC:
608 p := s.Prog(v.Op.Asm())
609 p.To.Type = obj.TYPE_REG
611 p.From.Type = obj.TYPE_REG
612 p.From.Reg = v.Args[0].Reg()
614 case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
615 p := s.Prog(v.Op.Asm())
616 p.From.Type = obj.TYPE_CONST
617 p.From.Offset = v.AuxInt
618 p.Reg = v.Args[0].Reg()
619 p.To.Type = obj.TYPE_REG
622 // Auxint holds encoded rotate + mask
623 case ssa.OpPPC64RLWINM, ssa.OpPPC64RLWMI:
624 sh, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
625 p := s.Prog(v.Op.Asm())
626 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
627 p.Reg = v.Args[0].Reg()
628 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: int64(sh)}
629 p.AddRestSourceArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}})
632 case ssa.OpPPC64RLDICL, ssa.OpPPC64RLDICR:
633 sh, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
634 p := s.Prog(v.Op.Asm())
635 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: sh}
637 case ssa.OpPPC64RLDICL:
638 p.AddRestSourceConst(mb)
639 case ssa.OpPPC64RLDICR:
640 p.AddRestSourceConst(me)
642 p.Reg = v.Args[0].Reg()
643 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
645 case ssa.OpPPC64RLWNM:
646 _, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
647 p := s.Prog(v.Op.Asm())
648 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
649 p.Reg = v.Args[0].Reg()
650 p.From = obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[1].Reg()}
651 p.AddRestSourceArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}})
653 case ssa.OpPPC64MADDLD:
655 r1 := v.Args[0].Reg()
656 r2 := v.Args[1].Reg()
657 r3 := v.Args[2].Reg()
659 p := s.Prog(v.Op.Asm())
660 p.From.Type = obj.TYPE_REG
663 p.AddRestSourceReg(r3)
664 p.To.Type = obj.TYPE_REG
667 case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
669 r1 := v.Args[0].Reg()
670 r2 := v.Args[1].Reg()
671 r3 := v.Args[2].Reg()
673 p := s.Prog(v.Op.Asm())
674 p.From.Type = obj.TYPE_REG
677 p.AddRestSourceReg(r2)
678 p.To.Type = obj.TYPE_REG
681 case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL,
682 ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW,
683 ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS,
684 ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD, ssa.OpPPC64BRH, ssa.OpPPC64BRW, ssa.OpPPC64BRD:
686 p := s.Prog(v.Op.Asm())
687 p.To.Type = obj.TYPE_REG
689 p.From.Type = obj.TYPE_REG
690 p.From.Reg = v.Args[0].Reg()
692 case ssa.OpPPC64ADDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst,
693 ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst,
694 ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst, ssa.OpPPC64EXTSWSLconst, ssa.OpPPC64MULLWconst, ssa.OpPPC64MULLDconst:
695 p := s.Prog(v.Op.Asm())
696 p.Reg = v.Args[0].Reg()
697 p.From.Type = obj.TYPE_CONST
698 p.From.Offset = v.AuxInt
699 p.To.Type = obj.TYPE_REG
702 case ssa.OpPPC64ADDC, ssa.OpPPC64ADDE, ssa.OpPPC64SUBC, ssa.OpPPC64SUBE:
703 r := v.Reg0() // CA is the first, implied argument.
704 r1 := v.Args[0].Reg()
705 r2 := v.Args[1].Reg()
706 p := s.Prog(v.Op.Asm())
707 p.From.Type = obj.TYPE_REG
710 p.To.Type = obj.TYPE_REG
713 case ssa.OpPPC64ADDZEzero, ssa.OpPPC64SUBZEzero:
714 p := s.Prog(v.Op.Asm())
715 p.From.Type = obj.TYPE_REG
716 p.From.Reg = ppc64.REG_R0
717 p.To.Type = obj.TYPE_REG
720 case ssa.OpPPC64ADDCconst:
721 p := s.Prog(v.Op.Asm())
722 p.Reg = v.Args[0].Reg()
723 p.From.Type = obj.TYPE_CONST
724 p.From.Offset = v.AuxInt
725 p.To.Type = obj.TYPE_REG
726 // Output is a pair, the second is the CA, which is implied.
729 case ssa.OpPPC64SUBCconst:
730 p := s.Prog(v.Op.Asm())
731 p.AddRestSourceConst(v.AuxInt)
732 p.From.Type = obj.TYPE_REG
733 p.From.Reg = v.Args[0].Reg()
734 p.To.Type = obj.TYPE_REG
737 case ssa.OpPPC64SUBFCconst:
738 p := s.Prog(v.Op.Asm())
739 p.AddRestSourceConst(v.AuxInt)
740 p.From.Type = obj.TYPE_REG
741 p.From.Reg = v.Args[0].Reg()
742 p.To.Type = obj.TYPE_REG
745 case ssa.OpPPC64ADDCCconst, ssa.OpPPC64ANDCCconst:
746 p := s.Prog(v.Op.Asm())
747 p.Reg = v.Args[0].Reg()
748 p.From.Type = obj.TYPE_CONST
749 p.From.Offset = v.AuxInt
750 p.To.Type = obj.TYPE_REG
753 case ssa.OpPPC64MOVDaddr:
754 switch v.Aux.(type) {
756 v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux)
758 // If aux offset and aux int are both 0, and the same
759 // input and output regs are used, no instruction
760 // needs to be generated, since it would just be
762 if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() {
763 p := s.Prog(ppc64.AMOVD)
764 p.From.Type = obj.TYPE_ADDR
765 p.From.Reg = v.Args[0].Reg()
766 p.From.Offset = v.AuxInt
767 p.To.Type = obj.TYPE_REG
771 case *obj.LSym, ir.Node:
772 p := s.Prog(ppc64.AMOVD)
773 p.From.Type = obj.TYPE_ADDR
774 p.From.Reg = v.Args[0].Reg()
775 p.To.Type = obj.TYPE_REG
777 ssagen.AddAux(&p.From, v)
781 case ssa.OpPPC64MOVDconst:
782 p := s.Prog(v.Op.Asm())
783 p.From.Type = obj.TYPE_CONST
784 p.From.Offset = v.AuxInt
785 p.To.Type = obj.TYPE_REG
788 case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst:
789 p := s.Prog(v.Op.Asm())
790 p.From.Type = obj.TYPE_FCONST
791 p.From.Val = math.Float64frombits(uint64(v.AuxInt))
792 p.To.Type = obj.TYPE_REG
795 case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU:
796 p := s.Prog(v.Op.Asm())
797 p.From.Type = obj.TYPE_REG
798 p.From.Reg = v.Args[0].Reg()
799 p.To.Type = obj.TYPE_REG
800 p.To.Reg = v.Args[1].Reg()
802 case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst:
803 p := s.Prog(v.Op.Asm())
804 p.From.Type = obj.TYPE_REG
805 p.From.Reg = v.Args[0].Reg()
806 p.To.Type = obj.TYPE_CONST
807 p.To.Offset = v.AuxInt
809 case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg:
810 // Shift in register to required size
811 p := s.Prog(v.Op.Asm())
812 p.From.Type = obj.TYPE_REG
813 p.From.Reg = v.Args[0].Reg()
815 p.To.Type = obj.TYPE_REG
817 case ssa.OpPPC64MOVDload, ssa.OpPPC64MOVWload:
819 // MOVDload and MOVWload are DS form instructions that are restricted to
820 // offsets that are a multiple of 4. If the offset is not a multiple of 4,
821 // then the address of the symbol to be loaded is computed (base + offset)
822 // and used as the new base register and the offset field in the instruction
823 // can be set to zero.
825 // This same problem can happen with gostrings since the final offset is not
826 // known yet, but could be unaligned after the relocation is resolved.
827 // So gostrings are handled the same way.
829 // This allows the MOVDload and MOVWload to be generated in more cases and
830 // eliminates some offset and alignment checking in the rules file.
832 fromAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
833 ssagen.AddAux(&fromAddr, v)
837 switch fromAddr.Name {
838 case obj.NAME_EXTERN, obj.NAME_STATIC:
839 // Special case for a rule combines the bytes of gostring.
840 // The v alignment might seem OK, but we don't want to load it
841 // using an offset because relocation comes later.
842 genAddr = strings.HasPrefix(fromAddr.Sym.Name, "go:string") || v.Type.Alignment()%4 != 0 || fromAddr.Offset%4 != 0
844 genAddr = fromAddr.Offset%4 != 0
847 // Load full address into the temp register.
848 p := s.Prog(ppc64.AMOVD)
849 p.From.Type = obj.TYPE_ADDR
850 p.From.Reg = v.Args[0].Reg()
851 ssagen.AddAux(&p.From, v)
852 // Load target using temp as base register
853 // and offset zero. Setting NAME_NONE
854 // prevents any extra offsets from being
856 p.To.Type = obj.TYPE_REG
857 p.To.Reg = ppc64.REGTMP
858 fromAddr.Reg = ppc64.REGTMP
859 // Clear the offset field and other
860 // information that might be used
861 // by the assembler to add to the
862 // final offset value.
864 fromAddr.Name = obj.NAME_NONE
867 p := s.Prog(v.Op.Asm())
869 p.To.Type = obj.TYPE_REG
872 case ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
873 p := s.Prog(v.Op.Asm())
874 p.From.Type = obj.TYPE_MEM
875 p.From.Reg = v.Args[0].Reg()
876 ssagen.AddAux(&p.From, v)
877 p.To.Type = obj.TYPE_REG
880 case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload:
881 p := s.Prog(v.Op.Asm())
882 p.From.Type = obj.TYPE_MEM
883 p.From.Reg = v.Args[0].Reg()
884 p.To.Type = obj.TYPE_REG
887 case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore:
888 p := s.Prog(v.Op.Asm())
889 p.To.Type = obj.TYPE_MEM
890 p.To.Reg = v.Args[0].Reg()
891 p.From.Type = obj.TYPE_REG
892 p.From.Reg = v.Args[1].Reg()
894 case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx,
895 ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx,
896 ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx:
897 p := s.Prog(v.Op.Asm())
898 p.From.Type = obj.TYPE_MEM
899 p.From.Reg = v.Args[0].Reg()
900 p.From.Index = v.Args[1].Reg()
901 p.To.Type = obj.TYPE_REG
904 case ssa.OpPPC64DCBT:
905 p := s.Prog(v.Op.Asm())
906 p.From.Type = obj.TYPE_MEM
907 p.From.Reg = v.Args[0].Reg()
908 p.To.Type = obj.TYPE_CONST
909 p.To.Offset = v.AuxInt
911 case ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero:
912 p := s.Prog(v.Op.Asm())
913 p.From.Type = obj.TYPE_REG
914 p.From.Reg = ppc64.REGZERO
915 p.To.Type = obj.TYPE_MEM
916 p.To.Reg = v.Args[0].Reg()
917 ssagen.AddAux(&p.To, v)
919 case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVDstorezero:
921 // MOVDstore and MOVDstorezero become DS form instructions that are restricted
922 // to offset values that are a multiple of 4. If the offset field is not a
923 // multiple of 4, then the full address of the store target is computed (base +
924 // offset) and used as the new base register and the offset in the instruction
927 // This allows the MOVDstore and MOVDstorezero to be generated in more cases,
928 // and prevents checking of the offset value and alignment in the rules.
930 toAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
931 ssagen.AddAux(&toAddr, v)
933 if toAddr.Offset%4 != 0 {
934 p := s.Prog(ppc64.AMOVD)
935 p.From.Type = obj.TYPE_ADDR
936 p.From.Reg = v.Args[0].Reg()
937 ssagen.AddAux(&p.From, v)
938 p.To.Type = obj.TYPE_REG
939 p.To.Reg = ppc64.REGTMP
940 toAddr.Reg = ppc64.REGTMP
941 // Clear the offset field and other
942 // information that might be used
943 // by the assembler to add to the
944 // final offset value.
946 toAddr.Name = obj.NAME_NONE
949 p := s.Prog(v.Op.Asm())
951 p.From.Type = obj.TYPE_REG
952 if v.Op == ssa.OpPPC64MOVDstorezero {
953 p.From.Reg = ppc64.REGZERO
955 p.From.Reg = v.Args[1].Reg()
958 case ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore:
959 p := s.Prog(v.Op.Asm())
960 p.From.Type = obj.TYPE_REG
961 p.From.Reg = v.Args[1].Reg()
962 p.To.Type = obj.TYPE_MEM
963 p.To.Reg = v.Args[0].Reg()
964 ssagen.AddAux(&p.To, v)
966 case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx,
967 ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx,
968 ssa.OpPPC64MOVHBRstoreidx:
969 p := s.Prog(v.Op.Asm())
970 p.From.Type = obj.TYPE_REG
971 p.From.Reg = v.Args[2].Reg()
972 p.To.Index = v.Args[1].Reg()
973 p.To.Type = obj.TYPE_MEM
974 p.To.Reg = v.Args[0].Reg()
976 case ssa.OpPPC64ISEL, ssa.OpPPC64ISELZ:
977 // ISEL AuxInt ? arg0 : arg1
978 // ISELZ is a special case of ISEL where arg1 is implicitly $0.
980 // AuxInt value indicates conditions 0=LT 1=GT 2=EQ 3=SO 4=GE 5=LE 6=NE 7=NSO.
981 // ISEL accepts a CR bit argument, not a condition as expressed by AuxInt.
982 // Convert the condition to a CR bit argument by the following conversion:
984 // AuxInt&3 ? arg0 : arg1 for conditions LT, GT, EQ, SO
985 // AuxInt&3 ? arg1 : arg0 for conditions GE, LE, NE, NSO
986 p := s.Prog(v.Op.Asm())
987 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
988 p.Reg = v.Args[0].Reg()
989 if v.Op == ssa.OpPPC64ISEL {
990 p.AddRestSourceReg(v.Args[1].Reg())
992 p.AddRestSourceReg(ppc64.REG_R0)
994 // AuxInt values 4,5,6 implemented with reverse operand order from 0,1,2
996 p.Reg, p.GetFrom3().Reg = p.GetFrom3().Reg, p.Reg
998 p.From.SetConst(v.AuxInt & 3)
1000 case ssa.OpPPC64SETBC, ssa.OpPPC64SETBCR:
1001 p := s.Prog(v.Op.Asm())
1002 p.To.Type = obj.TYPE_REG
1004 p.From.Type = obj.TYPE_REG
1005 p.From.Reg = int16(ppc64.REG_CR0LT + v.AuxInt)
1007 case ssa.OpPPC64LoweredQuadZero, ssa.OpPPC64LoweredQuadZeroShort:
1008 // The LoweredQuad code generation
1009 // generates STXV instructions on
1010 // power9. The Short variation is used
1011 // if no loop is generated.
1013 // sizes >= 64 generate a loop as follows:
1015 // Set up loop counter in CTR, used by BC
1016 // XXLXOR clears VS32
1017 // XXLXOR VS32,VS32,VS32
1018 // MOVD len/64,REG_TMP
1022 // STXV VS32,16(R20)
1023 // STXV VS32,32(R20)
1024 // STXV VS32,48(R20)
1028 // Bytes per iteration
1029 ctr := v.AuxInt / 64
1032 rem := v.AuxInt % 64
1034 // Only generate a loop if there is more
1035 // than 1 iteration.
1037 // Set up VS32 (V0) to hold 0s
1038 p := s.Prog(ppc64.AXXLXOR)
1039 p.From.Type = obj.TYPE_REG
1040 p.From.Reg = ppc64.REG_VS32
1041 p.To.Type = obj.TYPE_REG
1042 p.To.Reg = ppc64.REG_VS32
1043 p.Reg = ppc64.REG_VS32
1045 // Set up CTR loop counter
1046 p = s.Prog(ppc64.AMOVD)
1047 p.From.Type = obj.TYPE_CONST
1049 p.To.Type = obj.TYPE_REG
1050 p.To.Reg = ppc64.REGTMP
1052 p = s.Prog(ppc64.AMOVD)
1053 p.From.Type = obj.TYPE_REG
1054 p.From.Reg = ppc64.REGTMP
1055 p.To.Type = obj.TYPE_REG
1056 p.To.Reg = ppc64.REG_CTR
1058 // Don't generate padding for
1059 // loops with few iterations.
1061 p = s.Prog(obj.APCALIGN)
1062 p.From.Type = obj.TYPE_CONST
1066 // generate 4 STXVs to zero 64 bytes
1069 p = s.Prog(ppc64.ASTXV)
1070 p.From.Type = obj.TYPE_REG
1071 p.From.Reg = ppc64.REG_VS32
1072 p.To.Type = obj.TYPE_MEM
1073 p.To.Reg = v.Args[0].Reg()
1075 // Save the top of loop
1079 p = s.Prog(ppc64.ASTXV)
1080 p.From.Type = obj.TYPE_REG
1081 p.From.Reg = ppc64.REG_VS32
1082 p.To.Type = obj.TYPE_MEM
1083 p.To.Reg = v.Args[0].Reg()
1086 p = s.Prog(ppc64.ASTXV)
1087 p.From.Type = obj.TYPE_REG
1088 p.From.Reg = ppc64.REG_VS32
1089 p.To.Type = obj.TYPE_MEM
1090 p.To.Reg = v.Args[0].Reg()
1093 p = s.Prog(ppc64.ASTXV)
1094 p.From.Type = obj.TYPE_REG
1095 p.From.Reg = ppc64.REG_VS32
1096 p.To.Type = obj.TYPE_MEM
1097 p.To.Reg = v.Args[0].Reg()
1100 // Increment address for the
1101 // 64 bytes just zeroed.
1102 p = s.Prog(ppc64.AADD)
1103 p.Reg = v.Args[0].Reg()
1104 p.From.Type = obj.TYPE_CONST
1106 p.To.Type = obj.TYPE_REG
1107 p.To.Reg = v.Args[0].Reg()
1109 // Branch back to top of loop
1111 // BC with BO_BCTR generates bdnz
1112 p = s.Prog(ppc64.ABC)
1113 p.From.Type = obj.TYPE_CONST
1114 p.From.Offset = ppc64.BO_BCTR
1115 p.Reg = ppc64.REG_CR0LT
1116 p.To.Type = obj.TYPE_BRANCH
1119 // When ctr == 1 the loop was not generated but
1120 // there are at least 64 bytes to clear, so add
1121 // that to the remainder to generate the code
1122 // to clear those doublewords
1127 // Clear the remainder starting at offset zero
1130 if rem >= 16 && ctr <= 1 {
1131 // If the XXLXOR hasn't already been
1132 // generated, do it here to initialize
1134 p := s.Prog(ppc64.AXXLXOR)
1135 p.From.Type = obj.TYPE_REG
1136 p.From.Reg = ppc64.REG_VS32
1137 p.To.Type = obj.TYPE_REG
1138 p.To.Reg = ppc64.REG_VS32
1139 p.Reg = ppc64.REG_VS32
1141 // Generate STXV for 32 or 64
1144 p := s.Prog(ppc64.ASTXV)
1145 p.From.Type = obj.TYPE_REG
1146 p.From.Reg = ppc64.REG_VS32
1147 p.To.Type = obj.TYPE_MEM
1148 p.To.Reg = v.Args[0].Reg()
1149 p.To.Offset = offset
1151 p = s.Prog(ppc64.ASTXV)
1152 p.From.Type = obj.TYPE_REG
1153 p.From.Reg = ppc64.REG_VS32
1154 p.To.Type = obj.TYPE_MEM
1155 p.To.Reg = v.Args[0].Reg()
1156 p.To.Offset = offset + 16
1160 // Generate 16 bytes
1162 p := s.Prog(ppc64.ASTXV)
1163 p.From.Type = obj.TYPE_REG
1164 p.From.Reg = ppc64.REG_VS32
1165 p.To.Type = obj.TYPE_MEM
1166 p.To.Reg = v.Args[0].Reg()
1167 p.To.Offset = offset
1172 // first clear as many doublewords as possible
1173 // then clear remaining sizes as available
1175 op, size := ppc64.AMOVB, int64(1)
1178 op, size = ppc64.AMOVD, 8
1180 op, size = ppc64.AMOVW, 4
1182 op, size = ppc64.AMOVH, 2
1185 p.From.Type = obj.TYPE_REG
1186 p.From.Reg = ppc64.REG_R0
1187 p.To.Type = obj.TYPE_MEM
1188 p.To.Reg = v.Args[0].Reg()
1189 p.To.Offset = offset
1194 case ssa.OpPPC64LoweredZero, ssa.OpPPC64LoweredZeroShort:
1196 // Unaligned data doesn't hurt performance
1197 // for these instructions on power8.
1199 // For sizes >= 64 generate a loop as follows:
1201 // Set up loop counter in CTR, used by BC
1202 // XXLXOR VS32,VS32,VS32
1203 // MOVD len/32,REG_TMP
1207 // STXVD2X VS32,(R0)(R20)
1208 // STXVD2X VS32,(R31)(R20)
1212 // any remainder is done as described below
1214 // for sizes < 64 bytes, first clear as many doublewords as possible,
1215 // then handle the remainder
1220 // the remainder bytes are cleared using one or more
1221 // of the following instructions with the appropriate
1222 // offsets depending which instructions are needed
1224 // MOVW R0,n1(R20) 4 bytes
1225 // MOVH R0,n2(R20) 2 bytes
1226 // MOVB R0,n3(R20) 1 byte
1228 // 7 bytes: MOVW, MOVH, MOVB
1229 // 6 bytes: MOVW, MOVH
1230 // 5 bytes: MOVW, MOVB
1231 // 3 bytes: MOVH, MOVB
1233 // each loop iteration does 32 bytes
1234 ctr := v.AuxInt / 32
1237 rem := v.AuxInt % 32
1239 // only generate a loop if there is more
1240 // than 1 iteration.
1242 // Set up VS32 (V0) to hold 0s
1243 p := s.Prog(ppc64.AXXLXOR)
1244 p.From.Type = obj.TYPE_REG
1245 p.From.Reg = ppc64.REG_VS32
1246 p.To.Type = obj.TYPE_REG
1247 p.To.Reg = ppc64.REG_VS32
1248 p.Reg = ppc64.REG_VS32
1250 // Set up CTR loop counter
1251 p = s.Prog(ppc64.AMOVD)
1252 p.From.Type = obj.TYPE_CONST
1254 p.To.Type = obj.TYPE_REG
1255 p.To.Reg = ppc64.REGTMP
1257 p = s.Prog(ppc64.AMOVD)
1258 p.From.Type = obj.TYPE_REG
1259 p.From.Reg = ppc64.REGTMP
1260 p.To.Type = obj.TYPE_REG
1261 p.To.Reg = ppc64.REG_CTR
1263 // Set up R31 to hold index value 16
1264 p = s.Prog(ppc64.AMOVD)
1265 p.From.Type = obj.TYPE_CONST
1267 p.To.Type = obj.TYPE_REG
1268 p.To.Reg = ppc64.REGTMP
1270 // Don't add padding for alignment
1271 // with few loop iterations.
1273 p = s.Prog(obj.APCALIGN)
1274 p.From.Type = obj.TYPE_CONST
1278 // generate 2 STXVD2Xs to store 16 bytes
1279 // when this is a loop then the top must be saved
1281 // This is the top of loop
1283 p = s.Prog(ppc64.ASTXVD2X)
1284 p.From.Type = obj.TYPE_REG
1285 p.From.Reg = ppc64.REG_VS32
1286 p.To.Type = obj.TYPE_MEM
1287 p.To.Reg = v.Args[0].Reg()
1288 p.To.Index = ppc64.REGZERO
1289 // Save the top of loop
1293 p = s.Prog(ppc64.ASTXVD2X)
1294 p.From.Type = obj.TYPE_REG
1295 p.From.Reg = ppc64.REG_VS32
1296 p.To.Type = obj.TYPE_MEM
1297 p.To.Reg = v.Args[0].Reg()
1298 p.To.Index = ppc64.REGTMP
1300 // Increment address for the
1301 // 4 doublewords just zeroed.
1302 p = s.Prog(ppc64.AADD)
1303 p.Reg = v.Args[0].Reg()
1304 p.From.Type = obj.TYPE_CONST
1306 p.To.Type = obj.TYPE_REG
1307 p.To.Reg = v.Args[0].Reg()
1309 // Branch back to top of loop
1311 // BC with BO_BCTR generates bdnz
1312 p = s.Prog(ppc64.ABC)
1313 p.From.Type = obj.TYPE_CONST
1314 p.From.Offset = ppc64.BO_BCTR
1315 p.Reg = ppc64.REG_CR0LT
1316 p.To.Type = obj.TYPE_BRANCH
1320 // when ctr == 1 the loop was not generated but
1321 // there are at least 32 bytes to clear, so add
1322 // that to the remainder to generate the code
1323 // to clear those doublewords
1328 // clear the remainder starting at offset zero
1331 // first clear as many doublewords as possible
1332 // then clear remaining sizes as available
1334 op, size := ppc64.AMOVB, int64(1)
1337 op, size = ppc64.AMOVD, 8
1339 op, size = ppc64.AMOVW, 4
1341 op, size = ppc64.AMOVH, 2
1344 p.From.Type = obj.TYPE_REG
1345 p.From.Reg = ppc64.REG_R0
1346 p.To.Type = obj.TYPE_MEM
1347 p.To.Reg = v.Args[0].Reg()
1348 p.To.Offset = offset
1353 case ssa.OpPPC64LoweredMove, ssa.OpPPC64LoweredMoveShort:
1355 bytesPerLoop := int64(32)
1356 // This will be used when moving more
1357 // than 8 bytes. Moves start with
1358 // as many 8 byte moves as possible, then
1359 // 4, 2, or 1 byte(s) as remaining. This will
1360 // work and be efficient for power8 or later.
1361 // If there are 64 or more bytes, then a
1362 // loop is generated to move 32 bytes and
1363 // update the src and dst addresses on each
1364 // iteration. When < 64 bytes, the appropriate
1365 // number of moves are generated based on the
1367 // When moving >= 64 bytes a loop is used
1368 // MOVD len/32,REG_TMP
1372 // LXVD2X (R0)(R21),VS32
1373 // LXVD2X (R31)(R21),VS33
1375 // STXVD2X VS32,(R0)(R20)
1376 // STXVD2X VS33,(R31)(R20)
1379 // Bytes not moved by this loop are moved
1380 // with a combination of the following instructions,
1381 // starting with the largest sizes and generating as
1382 // many as needed, using the appropriate offset value.
1392 // Each loop iteration moves 32 bytes
1393 ctr := v.AuxInt / bytesPerLoop
1395 // Remainder after the loop
1396 rem := v.AuxInt % bytesPerLoop
1398 dstReg := v.Args[0].Reg()
1399 srcReg := v.Args[1].Reg()
1401 // The set of registers used here, must match the clobbered reg list
1407 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1410 p := s.Prog(ppc64.AMOVD)
1411 p.From.Type = obj.TYPE_CONST
1413 p.To.Type = obj.TYPE_REG
1414 p.To.Reg = ppc64.REGTMP
1416 p = s.Prog(ppc64.AMOVD)
1417 p.From.Type = obj.TYPE_REG
1418 p.From.Reg = ppc64.REGTMP
1419 p.To.Type = obj.TYPE_REG
1420 p.To.Reg = ppc64.REG_CTR
1422 // Use REGTMP as index reg
1423 p = s.Prog(ppc64.AMOVD)
1424 p.From.Type = obj.TYPE_CONST
1426 p.To.Type = obj.TYPE_REG
1427 p.To.Reg = ppc64.REGTMP
1429 // Don't adding padding for
1430 // alignment with small iteration
1433 p = s.Prog(obj.APCALIGN)
1434 p.From.Type = obj.TYPE_CONST
1438 // Generate 16 byte loads and stores.
1439 // Use temp register for index (16)
1440 // on the second one.
1442 p = s.Prog(ppc64.ALXVD2X)
1443 p.From.Type = obj.TYPE_MEM
1445 p.From.Index = ppc64.REGZERO
1446 p.To.Type = obj.TYPE_REG
1447 p.To.Reg = ppc64.REG_VS32
1451 p = s.Prog(ppc64.ALXVD2X)
1452 p.From.Type = obj.TYPE_MEM
1454 p.From.Index = ppc64.REGTMP
1455 p.To.Type = obj.TYPE_REG
1456 p.To.Reg = ppc64.REG_VS33
1458 // increment the src reg for next iteration
1459 p = s.Prog(ppc64.AADD)
1461 p.From.Type = obj.TYPE_CONST
1462 p.From.Offset = bytesPerLoop
1463 p.To.Type = obj.TYPE_REG
1466 // generate 16 byte stores
1467 p = s.Prog(ppc64.ASTXVD2X)
1468 p.From.Type = obj.TYPE_REG
1469 p.From.Reg = ppc64.REG_VS32
1470 p.To.Type = obj.TYPE_MEM
1472 p.To.Index = ppc64.REGZERO
1474 p = s.Prog(ppc64.ASTXVD2X)
1475 p.From.Type = obj.TYPE_REG
1476 p.From.Reg = ppc64.REG_VS33
1477 p.To.Type = obj.TYPE_MEM
1479 p.To.Index = ppc64.REGTMP
1481 // increment the dst reg for next iteration
1482 p = s.Prog(ppc64.AADD)
1484 p.From.Type = obj.TYPE_CONST
1485 p.From.Offset = bytesPerLoop
1486 p.To.Type = obj.TYPE_REG
1489 // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1491 p = s.Prog(ppc64.ABC)
1492 p.From.Type = obj.TYPE_CONST
1493 p.From.Offset = ppc64.BO_BCTR
1494 p.Reg = ppc64.REG_CR0LT
1495 p.To.Type = obj.TYPE_BRANCH
1498 // srcReg and dstReg were incremented in the loop, so
1499 // later instructions start with offset 0.
1503 // No loop was generated for one iteration, so
1504 // add 32 bytes to the remainder to move those bytes.
1510 // Generate 16 byte loads and stores.
1511 // Use temp register for index (value 16)
1512 // on the second one.
1513 p := s.Prog(ppc64.ALXVD2X)
1514 p.From.Type = obj.TYPE_MEM
1516 p.From.Index = ppc64.REGZERO
1517 p.To.Type = obj.TYPE_REG
1518 p.To.Reg = ppc64.REG_VS32
1520 p = s.Prog(ppc64.ASTXVD2X)
1521 p.From.Type = obj.TYPE_REG
1522 p.From.Reg = ppc64.REG_VS32
1523 p.To.Type = obj.TYPE_MEM
1525 p.To.Index = ppc64.REGZERO
1531 // Use REGTMP as index reg
1532 p := s.Prog(ppc64.AMOVD)
1533 p.From.Type = obj.TYPE_CONST
1535 p.To.Type = obj.TYPE_REG
1536 p.To.Reg = ppc64.REGTMP
1538 p = s.Prog(ppc64.ALXVD2X)
1539 p.From.Type = obj.TYPE_MEM
1541 p.From.Index = ppc64.REGTMP
1542 p.To.Type = obj.TYPE_REG
1543 p.To.Reg = ppc64.REG_VS32
1545 p = s.Prog(ppc64.ASTXVD2X)
1546 p.From.Type = obj.TYPE_REG
1547 p.From.Reg = ppc64.REG_VS32
1548 p.To.Type = obj.TYPE_MEM
1550 p.To.Index = ppc64.REGTMP
1557 // Generate all the remaining load and store pairs, starting with
1558 // as many 8 byte moves as possible, then 4, 2, 1.
1560 op, size := ppc64.AMOVB, int64(1)
1563 op, size = ppc64.AMOVD, 8
1565 op, size = ppc64.AMOVWZ, 4
1567 op, size = ppc64.AMOVH, 2
1571 p.To.Type = obj.TYPE_REG
1572 p.To.Reg = ppc64.REGTMP
1573 p.From.Type = obj.TYPE_MEM
1575 p.From.Offset = offset
1579 p.From.Type = obj.TYPE_REG
1580 p.From.Reg = ppc64.REGTMP
1581 p.To.Type = obj.TYPE_MEM
1583 p.To.Offset = offset
1588 case ssa.OpPPC64LoweredQuadMove, ssa.OpPPC64LoweredQuadMoveShort:
1589 bytesPerLoop := int64(64)
1590 // This is used when moving more
1591 // than 8 bytes on power9. Moves start with
1592 // as many 8 byte moves as possible, then
1593 // 4, 2, or 1 byte(s) as remaining. This will
1594 // work and be efficient for power8 or later.
1595 // If there are 64 or more bytes, then a
1596 // loop is generated to move 32 bytes and
1597 // update the src and dst addresses on each
1598 // iteration. When < 64 bytes, the appropriate
1599 // number of moves are generated based on the
1601 // When moving >= 64 bytes a loop is used
1602 // MOVD len/32,REG_TMP
1609 // STXV VS33,16(R20)
1612 // Bytes not moved by this loop are moved
1613 // with a combination of the following instructions,
1614 // starting with the largest sizes and generating as
1615 // many as needed, using the appropriate offset value.
1625 // Each loop iteration moves 32 bytes
1626 ctr := v.AuxInt / bytesPerLoop
1628 // Remainder after the loop
1629 rem := v.AuxInt % bytesPerLoop
1631 dstReg := v.Args[0].Reg()
1632 srcReg := v.Args[1].Reg()
1639 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1642 p := s.Prog(ppc64.AMOVD)
1643 p.From.Type = obj.TYPE_CONST
1645 p.To.Type = obj.TYPE_REG
1646 p.To.Reg = ppc64.REGTMP
1648 p = s.Prog(ppc64.AMOVD)
1649 p.From.Type = obj.TYPE_REG
1650 p.From.Reg = ppc64.REGTMP
1651 p.To.Type = obj.TYPE_REG
1652 p.To.Reg = ppc64.REG_CTR
1654 p = s.Prog(obj.APCALIGN)
1655 p.From.Type = obj.TYPE_CONST
1658 // Generate 16 byte loads and stores.
1659 p = s.Prog(ppc64.ALXV)
1660 p.From.Type = obj.TYPE_MEM
1662 p.From.Offset = offset
1663 p.To.Type = obj.TYPE_REG
1664 p.To.Reg = ppc64.REG_VS32
1668 p = s.Prog(ppc64.ALXV)
1669 p.From.Type = obj.TYPE_MEM
1671 p.From.Offset = offset + 16
1672 p.To.Type = obj.TYPE_REG
1673 p.To.Reg = ppc64.REG_VS33
1675 // generate 16 byte stores
1676 p = s.Prog(ppc64.ASTXV)
1677 p.From.Type = obj.TYPE_REG
1678 p.From.Reg = ppc64.REG_VS32
1679 p.To.Type = obj.TYPE_MEM
1681 p.To.Offset = offset
1683 p = s.Prog(ppc64.ASTXV)
1684 p.From.Type = obj.TYPE_REG
1685 p.From.Reg = ppc64.REG_VS33
1686 p.To.Type = obj.TYPE_MEM
1688 p.To.Offset = offset + 16
1690 // Generate 16 byte loads and stores.
1691 p = s.Prog(ppc64.ALXV)
1692 p.From.Type = obj.TYPE_MEM
1694 p.From.Offset = offset + 32
1695 p.To.Type = obj.TYPE_REG
1696 p.To.Reg = ppc64.REG_VS32
1698 p = s.Prog(ppc64.ALXV)
1699 p.From.Type = obj.TYPE_MEM
1701 p.From.Offset = offset + 48
1702 p.To.Type = obj.TYPE_REG
1703 p.To.Reg = ppc64.REG_VS33
1705 // generate 16 byte stores
1706 p = s.Prog(ppc64.ASTXV)
1707 p.From.Type = obj.TYPE_REG
1708 p.From.Reg = ppc64.REG_VS32
1709 p.To.Type = obj.TYPE_MEM
1711 p.To.Offset = offset + 32
1713 p = s.Prog(ppc64.ASTXV)
1714 p.From.Type = obj.TYPE_REG
1715 p.From.Reg = ppc64.REG_VS33
1716 p.To.Type = obj.TYPE_MEM
1718 p.To.Offset = offset + 48
1720 // increment the src reg for next iteration
1721 p = s.Prog(ppc64.AADD)
1723 p.From.Type = obj.TYPE_CONST
1724 p.From.Offset = bytesPerLoop
1725 p.To.Type = obj.TYPE_REG
1728 // increment the dst reg for next iteration
1729 p = s.Prog(ppc64.AADD)
1731 p.From.Type = obj.TYPE_CONST
1732 p.From.Offset = bytesPerLoop
1733 p.To.Type = obj.TYPE_REG
1736 // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1738 p = s.Prog(ppc64.ABC)
1739 p.From.Type = obj.TYPE_CONST
1740 p.From.Offset = ppc64.BO_BCTR
1741 p.Reg = ppc64.REG_CR0LT
1742 p.To.Type = obj.TYPE_BRANCH
1745 // srcReg and dstReg were incremented in the loop, so
1746 // later instructions start with offset 0.
1750 // No loop was generated for one iteration, so
1751 // add 32 bytes to the remainder to move those bytes.
1756 p := s.Prog(ppc64.ALXV)
1757 p.From.Type = obj.TYPE_MEM
1759 p.To.Type = obj.TYPE_REG
1760 p.To.Reg = ppc64.REG_VS32
1762 p = s.Prog(ppc64.ALXV)
1763 p.From.Type = obj.TYPE_MEM
1766 p.To.Type = obj.TYPE_REG
1767 p.To.Reg = ppc64.REG_VS33
1769 p = s.Prog(ppc64.ASTXV)
1770 p.From.Type = obj.TYPE_REG
1771 p.From.Reg = ppc64.REG_VS32
1772 p.To.Type = obj.TYPE_MEM
1775 p = s.Prog(ppc64.ASTXV)
1776 p.From.Type = obj.TYPE_REG
1777 p.From.Reg = ppc64.REG_VS33
1778 p.To.Type = obj.TYPE_MEM
1787 // Generate 16 byte loads and stores.
1788 p := s.Prog(ppc64.ALXV)
1789 p.From.Type = obj.TYPE_MEM
1791 p.From.Offset = offset
1792 p.To.Type = obj.TYPE_REG
1793 p.To.Reg = ppc64.REG_VS32
1795 p = s.Prog(ppc64.ASTXV)
1796 p.From.Type = obj.TYPE_REG
1797 p.From.Reg = ppc64.REG_VS32
1798 p.To.Type = obj.TYPE_MEM
1800 p.To.Offset = offset
1806 p := s.Prog(ppc64.ALXV)
1807 p.From.Type = obj.TYPE_MEM
1809 p.From.Offset = offset
1810 p.To.Type = obj.TYPE_REG
1811 p.To.Reg = ppc64.REG_VS32
1813 p = s.Prog(ppc64.ASTXV)
1814 p.From.Type = obj.TYPE_REG
1815 p.From.Reg = ppc64.REG_VS32
1816 p.To.Type = obj.TYPE_MEM
1818 p.To.Offset = offset
1824 // Generate all the remaining load and store pairs, starting with
1825 // as many 8 byte moves as possible, then 4, 2, 1.
1827 op, size := ppc64.AMOVB, int64(1)
1830 op, size = ppc64.AMOVD, 8
1832 op, size = ppc64.AMOVWZ, 4
1834 op, size = ppc64.AMOVH, 2
1838 p.To.Type = obj.TYPE_REG
1839 p.To.Reg = ppc64.REGTMP
1840 p.From.Type = obj.TYPE_MEM
1842 p.From.Offset = offset
1846 p.From.Type = obj.TYPE_REG
1847 p.From.Reg = ppc64.REGTMP
1848 p.To.Type = obj.TYPE_MEM
1850 p.To.Offset = offset
1855 case ssa.OpPPC64CALLstatic:
1858 case ssa.OpPPC64CALLtail:
1861 case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter:
1862 p := s.Prog(ppc64.AMOVD)
1863 p.From.Type = obj.TYPE_REG
1864 p.From.Reg = v.Args[0].Reg()
1865 p.To.Type = obj.TYPE_REG
1866 p.To.Reg = ppc64.REG_LR
1868 if v.Args[0].Reg() != ppc64.REG_R12 {
1869 v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg)
1874 // Convert the call into a blrl with hint this is not a subroutine return.
1875 // The full bclrl opcode must be specified when passing a hint.
1877 pp.From.Type = obj.TYPE_CONST
1878 pp.From.Offset = ppc64.BO_ALWAYS
1879 pp.Reg = ppc64.REG_CR0LT // The preferred value if BI is ignored.
1880 pp.To.Reg = ppc64.REG_LR
1881 pp.AddRestSourceConst(1)
1883 if ppc64.NeedTOCpointer(base.Ctxt) {
1884 // When compiling Go into PIC, the function we just
1885 // called via pointer might have been implemented in
1886 // a separate module and so overwritten the TOC
1887 // pointer in R2; reload it.
1888 q := s.Prog(ppc64.AMOVD)
1889 q.From.Type = obj.TYPE_MEM
1891 q.From.Reg = ppc64.REGSP
1892 q.To.Type = obj.TYPE_REG
1893 q.To.Reg = ppc64.REG_R2
1896 case ssa.OpPPC64LoweredWB:
1897 p := s.Prog(obj.ACALL)
1898 p.To.Type = obj.TYPE_MEM
1899 p.To.Name = obj.NAME_EXTERN
1900 // AuxInt encodes how many buffer entries we need.
1901 p.To.Sym = ir.Syms.GCWriteBarrier[v.AuxInt-1]
1903 case ssa.OpPPC64LoweredPanicBoundsA, ssa.OpPPC64LoweredPanicBoundsB, ssa.OpPPC64LoweredPanicBoundsC:
1904 p := s.Prog(obj.ACALL)
1905 p.To.Type = obj.TYPE_MEM
1906 p.To.Name = obj.NAME_EXTERN
1907 p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt]
1908 s.UseArgs(16) // space used in callee args area by assembly stubs
1910 case ssa.OpPPC64LoweredNilCheck:
1911 if buildcfg.GOOS == "aix" {
1915 // NOP (so the BNE has somewhere to land)
1918 p := s.Prog(ppc64.ACMP)
1919 p.From.Type = obj.TYPE_REG
1920 p.From.Reg = v.Args[0].Reg()
1921 p.To.Type = obj.TYPE_REG
1922 p.To.Reg = ppc64.REG_R0
1925 p2 := s.Prog(ppc64.ABNE)
1926 p2.To.Type = obj.TYPE_BRANCH
1929 // Write at 0 is forbidden and will trigger a SIGSEGV
1930 p = s.Prog(ppc64.AMOVW)
1931 p.From.Type = obj.TYPE_REG
1932 p.From.Reg = ppc64.REG_R0
1933 p.To.Type = obj.TYPE_MEM
1934 p.To.Reg = ppc64.REG_R0
1936 // NOP (so the BNE has somewhere to land)
1937 nop := s.Prog(obj.ANOP)
1938 p2.To.SetTarget(nop)
1941 // Issue a load which will fault if arg is nil.
1942 p := s.Prog(ppc64.AMOVBZ)
1943 p.From.Type = obj.TYPE_MEM
1944 p.From.Reg = v.Args[0].Reg()
1945 ssagen.AddAux(&p.From, v)
1946 p.To.Type = obj.TYPE_REG
1947 p.To.Reg = ppc64.REGTMP
1949 if logopt.Enabled() {
1950 logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
1952 if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
1953 base.WarnfAt(v.Pos, "generated nil check")
1956 // These should be resolved by rules and not make it here.
1957 case ssa.OpPPC64Equal, ssa.OpPPC64NotEqual, ssa.OpPPC64LessThan, ssa.OpPPC64FLessThan,
1958 ssa.OpPPC64LessEqual, ssa.OpPPC64GreaterThan, ssa.OpPPC64FGreaterThan, ssa.OpPPC64GreaterEqual,
1959 ssa.OpPPC64FLessEqual, ssa.OpPPC64FGreaterEqual:
1960 v.Fatalf("Pseudo-op should not make it to codegen: %s ###\n", v.LongString())
1961 case ssa.OpPPC64InvertFlags:
1962 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
1963 case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT:
1964 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
1965 case ssa.OpClobber, ssa.OpClobberReg:
1966 // TODO: implement for clobberdead experiment. Nop is ok for now.
1968 v.Fatalf("genValue not implemented: %s", v.LongString())
1972 var blockJump = [...]struct {
1974 asmeq, invasmun bool
1976 ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false},
1977 ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false},
1979 ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false},
1980 ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false},
1981 ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false},
1982 ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false},
1984 // TODO: need to work FP comparisons into block jumps
1985 ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false},
1986 ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN
1987 ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN
1988 ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false},
1991 func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
1993 case ssa.BlockDefer:
1994 // defer returns in R3:
1995 // 0 if we should continue executing
1996 // 1 if we should jump to deferreturn call
1997 p := s.Prog(ppc64.ACMP)
1998 p.From.Type = obj.TYPE_REG
1999 p.From.Reg = ppc64.REG_R3
2000 p.To.Type = obj.TYPE_REG
2001 p.To.Reg = ppc64.REG_R0
2003 p = s.Prog(ppc64.ABNE)
2004 p.To.Type = obj.TYPE_BRANCH
2005 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[1].Block()})
2006 if b.Succs[0].Block() != next {
2007 p := s.Prog(obj.AJMP)
2008 p.To.Type = obj.TYPE_BRANCH
2009 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
2012 case ssa.BlockPlain:
2013 if b.Succs[0].Block() != next {
2014 p := s.Prog(obj.AJMP)
2015 p.To.Type = obj.TYPE_BRANCH
2016 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
2018 case ssa.BlockExit, ssa.BlockRetJmp:
2022 case ssa.BlockPPC64EQ, ssa.BlockPPC64NE,
2023 ssa.BlockPPC64LT, ssa.BlockPPC64GE,
2024 ssa.BlockPPC64LE, ssa.BlockPPC64GT,
2025 ssa.BlockPPC64FLT, ssa.BlockPPC64FGE,
2026 ssa.BlockPPC64FLE, ssa.BlockPPC64FGT:
2027 jmp := blockJump[b.Kind]
2029 case b.Succs[0].Block():
2030 s.Br(jmp.invasm, b.Succs[1].Block())
2032 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
2033 s.Br(ppc64.ABVS, b.Succs[1].Block())
2035 case b.Succs[1].Block():
2036 s.Br(jmp.asm, b.Succs[0].Block())
2038 s.Br(ppc64.ABEQ, b.Succs[0].Block())
2041 if b.Likely != ssa.BranchUnlikely {
2042 s.Br(jmp.asm, b.Succs[0].Block())
2044 s.Br(ppc64.ABEQ, b.Succs[0].Block())
2046 s.Br(obj.AJMP, b.Succs[1].Block())
2048 s.Br(jmp.invasm, b.Succs[1].Block())
2050 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
2051 s.Br(ppc64.ABVS, b.Succs[1].Block())
2053 s.Br(obj.AJMP, b.Succs[0].Block())
2057 b.Fatalf("branch not implemented: %s", b.LongString())
2061 func loadRegResult(s *ssagen.State, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
2062 p := s.Prog(loadByType(t))
2063 p.From.Type = obj.TYPE_MEM
2064 p.From.Name = obj.NAME_AUTO
2065 p.From.Sym = n.Linksym()
2066 p.From.Offset = n.FrameOffset() + off
2067 p.To.Type = obj.TYPE_REG
2072 func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
2073 p = pp.Append(p, storeByType(t), obj.TYPE_REG, reg, 0, obj.TYPE_MEM, 0, n.FrameOffset()+off)
2074 p.To.Name = obj.NAME_PARAM
2075 p.To.Sym = n.Linksym()
2076 p.Pos = p.Pos.WithNotStmt()