1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
8 "cmd/compile/internal/base"
9 "cmd/compile/internal/ir"
10 "cmd/compile/internal/logopt"
11 "cmd/compile/internal/objw"
12 "cmd/compile/internal/ssa"
13 "cmd/compile/internal/ssagen"
14 "cmd/compile/internal/types"
16 "cmd/internal/obj/ppc64"
22 // ssaMarkMoves marks any MOVXconst ops that need to avoid clobbering flags.
23 func ssaMarkMoves(s *ssagen.State, b *ssa.Block) {
24 // flive := b.FlagsLiveAtEnd
25 // if b.Control != nil && b.Control.Type.IsFlags() {
28 // for i := len(b.Values) - 1; i >= 0; i-- {
30 // if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) {
31 // // The "mark" is any non-nil Aux value.
34 // if v.Type.IsFlags() {
37 // for _, a := range v.Args {
38 // if a.Type.IsFlags() {
45 // loadByType returns the load instruction of the given type.
46 func loadByType(t *types.Type) obj.As {
78 panic("bad load type")
81 // storeByType returns the store instruction of the given type.
82 func storeByType(t *types.Type) obj.As {
102 panic("bad store type")
105 func ssaGenValue(s *ssagen.State, v *ssa.Value) {
128 case ssa.OpPPC64LoweredAtomicAnd8,
129 ssa.OpPPC64LoweredAtomicAnd32,
130 ssa.OpPPC64LoweredAtomicOr8,
131 ssa.OpPPC64LoweredAtomicOr32:
133 // LBAR/LWAR (Rarg0), Rtmp
134 // AND/OR Rarg1, Rtmp
135 // STBCCC/STWCCC Rtmp, (Rarg0)
139 if v.Op == ssa.OpPPC64LoweredAtomicAnd32 || v.Op == ssa.OpPPC64LoweredAtomicOr32 {
143 r0 := v.Args[0].Reg()
144 r1 := v.Args[1].Reg()
145 // LWSYNC - Assuming shared data not write-through-required nor
146 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
147 plwsync := s.Prog(ppc64.ALWSYNC)
148 plwsync.To.Type = obj.TYPE_NONE
151 p.From.Type = obj.TYPE_MEM
153 p.To.Type = obj.TYPE_REG
154 p.To.Reg = ppc64.REGTMP
156 p1 := s.Prog(v.Op.Asm())
157 p1.From.Type = obj.TYPE_REG
159 p1.To.Type = obj.TYPE_REG
160 p1.To.Reg = ppc64.REGTMP
163 p2.From.Type = obj.TYPE_REG
164 p2.From.Reg = ppc64.REGTMP
165 p2.To.Type = obj.TYPE_MEM
167 p2.RegTo2 = ppc64.REGTMP
169 p3 := s.Prog(ppc64.ABNE)
170 p3.To.Type = obj.TYPE_BRANCH
173 case ssa.OpPPC64LoweredAtomicAdd32,
174 ssa.OpPPC64LoweredAtomicAdd64:
176 // LDAR/LWAR (Rarg0), Rout
178 // STDCCC/STWCCC Rout, (Rarg0)
180 // MOVW Rout,Rout (if Add32)
183 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
187 r0 := v.Args[0].Reg()
188 r1 := v.Args[1].Reg()
190 // LWSYNC - Assuming shared data not write-through-required nor
191 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
192 plwsync := s.Prog(ppc64.ALWSYNC)
193 plwsync.To.Type = obj.TYPE_NONE
196 p.From.Type = obj.TYPE_MEM
198 p.To.Type = obj.TYPE_REG
201 p1 := s.Prog(ppc64.AADD)
202 p1.From.Type = obj.TYPE_REG
205 p1.To.Type = obj.TYPE_REG
208 p3.From.Type = obj.TYPE_REG
210 p3.To.Type = obj.TYPE_MEM
213 p4 := s.Prog(ppc64.ABNE)
214 p4.To.Type = obj.TYPE_BRANCH
217 // Ensure a 32 bit result
218 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
219 p5 := s.Prog(ppc64.AMOVWZ)
220 p5.To.Type = obj.TYPE_REG
222 p5.From.Type = obj.TYPE_REG
226 case ssa.OpPPC64LoweredAtomicExchange32,
227 ssa.OpPPC64LoweredAtomicExchange64:
229 // LDAR/LWAR (Rarg0), Rout
230 // STDCCC/STWCCC Rout, (Rarg0)
235 if v.Op == ssa.OpPPC64LoweredAtomicExchange32 {
239 r0 := v.Args[0].Reg()
240 r1 := v.Args[1].Reg()
242 // LWSYNC - Assuming shared data not write-through-required nor
243 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
244 plwsync := s.Prog(ppc64.ALWSYNC)
245 plwsync.To.Type = obj.TYPE_NONE
248 p.From.Type = obj.TYPE_MEM
250 p.To.Type = obj.TYPE_REG
254 p1.From.Type = obj.TYPE_REG
256 p1.To.Type = obj.TYPE_MEM
259 p2 := s.Prog(ppc64.ABNE)
260 p2.To.Type = obj.TYPE_BRANCH
263 pisync := s.Prog(ppc64.AISYNC)
264 pisync.To.Type = obj.TYPE_NONE
266 case ssa.OpPPC64LoweredAtomicLoad8,
267 ssa.OpPPC64LoweredAtomicLoad32,
268 ssa.OpPPC64LoweredAtomicLoad64,
269 ssa.OpPPC64LoweredAtomicLoadPtr:
271 // MOVB/MOVD/MOVW (Rarg0), Rout
278 case ssa.OpPPC64LoweredAtomicLoad8:
280 case ssa.OpPPC64LoweredAtomicLoad32:
284 arg0 := v.Args[0].Reg()
286 // SYNC when AuxInt == 1; otherwise, load-acquire
288 psync := s.Prog(ppc64.ASYNC)
289 psync.To.Type = obj.TYPE_NONE
293 p.From.Type = obj.TYPE_MEM
295 p.To.Type = obj.TYPE_REG
299 p1.From.Type = obj.TYPE_REG
301 p1.To.Type = obj.TYPE_REG
304 p2 := s.Prog(ppc64.ABNE)
305 p2.To.Type = obj.TYPE_BRANCH
307 pisync := s.Prog(ppc64.AISYNC)
308 pisync.To.Type = obj.TYPE_NONE
309 p2.To.SetTarget(pisync)
311 case ssa.OpPPC64LoweredAtomicStore8,
312 ssa.OpPPC64LoweredAtomicStore32,
313 ssa.OpPPC64LoweredAtomicStore64:
315 // MOVB/MOVW/MOVD arg1,(arg0)
318 case ssa.OpPPC64LoweredAtomicStore8:
320 case ssa.OpPPC64LoweredAtomicStore32:
323 arg0 := v.Args[0].Reg()
324 arg1 := v.Args[1].Reg()
325 // If AuxInt == 0, LWSYNC (Store-Release), else SYNC
327 syncOp := ppc64.ASYNC
329 syncOp = ppc64.ALWSYNC
331 psync := s.Prog(syncOp)
332 psync.To.Type = obj.TYPE_NONE
335 p.To.Type = obj.TYPE_MEM
337 p.From.Type = obj.TYPE_REG
340 case ssa.OpPPC64LoweredAtomicCas64,
341 ssa.OpPPC64LoweredAtomicCas32:
345 // LDAR (Rarg0), MutexHint, Rtmp
348 // STDCCC Rarg2, (Rarg0)
352 // LWSYNC // Only for sequential consistency; not required in CasRel.
356 if v.Op == ssa.OpPPC64LoweredAtomicCas32 {
361 r0 := v.Args[0].Reg()
362 r1 := v.Args[1].Reg()
363 r2 := v.Args[2].Reg()
365 // Initialize return value to false
366 p := s.Prog(ppc64.AMOVD)
367 p.From.Type = obj.TYPE_CONST
369 p.To.Type = obj.TYPE_REG
371 // LWSYNC - Assuming shared data not write-through-required nor
372 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
373 plwsync1 := s.Prog(ppc64.ALWSYNC)
374 plwsync1.To.Type = obj.TYPE_NONE
377 p0.From.Type = obj.TYPE_MEM
379 p0.To.Type = obj.TYPE_REG
380 p0.To.Reg = ppc64.REGTMP
381 // If it is a Compare-and-Swap-Release operation, set the EH field with
384 p0.AddRestSourceConst(0)
388 p1.From.Type = obj.TYPE_REG
390 p1.To.Reg = ppc64.REGTMP
391 p1.To.Type = obj.TYPE_REG
392 // BNE done with return value = false
393 p2 := s.Prog(ppc64.ABNE)
394 p2.To.Type = obj.TYPE_BRANCH
397 p3.From.Type = obj.TYPE_REG
399 p3.To.Type = obj.TYPE_MEM
402 p4 := s.Prog(ppc64.ABNE)
403 p4.To.Type = obj.TYPE_BRANCH
406 p5 := s.Prog(ppc64.AMOVD)
407 p5.From.Type = obj.TYPE_CONST
409 p5.To.Type = obj.TYPE_REG
411 // LWSYNC - Assuming shared data not write-through-required nor
412 // caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b.
413 // If the operation is a CAS-Release, then synchronization is not necessary.
415 plwsync2 := s.Prog(ppc64.ALWSYNC)
416 plwsync2.To.Type = obj.TYPE_NONE
417 p2.To.SetTarget(plwsync2)
420 p6 := s.Prog(obj.ANOP)
424 case ssa.OpPPC64LoweredPubBarrier:
428 case ssa.OpPPC64LoweredGetClosurePtr:
429 // Closure pointer is R11 (already)
430 ssagen.CheckLoweredGetClosurePtr(v)
432 case ssa.OpPPC64LoweredGetCallerSP:
433 // caller's SP is FixedFrameSize below the address of the first arg
434 p := s.Prog(ppc64.AMOVD)
435 p.From.Type = obj.TYPE_ADDR
436 p.From.Offset = -base.Ctxt.Arch.FixedFrameSize
437 p.From.Name = obj.NAME_PARAM
438 p.To.Type = obj.TYPE_REG
441 case ssa.OpPPC64LoweredGetCallerPC:
442 p := s.Prog(obj.AGETCALLERPC)
443 p.To.Type = obj.TYPE_REG
446 case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F:
447 // input is already rounded
450 loadOp := loadByType(v.Type)
452 ssagen.AddrAuto(&p.From, v.Args[0])
453 p.To.Type = obj.TYPE_REG
457 storeOp := storeByType(v.Type)
459 p.From.Type = obj.TYPE_REG
460 p.From.Reg = v.Args[0].Reg()
461 ssagen.AddrAuto(&p.To, v)
463 case ssa.OpArgIntReg, ssa.OpArgFloatReg:
464 // The assembler needs to wrap the entry safepoint/stack growth code with spill/unspill
465 // The loop only runs once.
466 for _, a := range v.Block.Func.RegArgs {
467 // Pass the spill/unspill information along to the assembler, offset by size of
468 // the saved LR slot.
469 addr := ssagen.SpillSlotAddr(a, ppc64.REGSP, base.Ctxt.Arch.FixedFrameSize)
470 s.FuncInfo().AddSpill(
471 obj.RegSpill{Reg: a.Reg, Addr: addr, Unspill: loadByType(a.Type), Spill: storeByType(a.Type)})
473 v.Block.Func.RegArgs = nil
475 ssagen.CheckArgReg(v)
477 case ssa.OpPPC64DIVD:
487 r0 := v.Args[0].Reg()
488 r1 := v.Args[1].Reg()
490 p := s.Prog(ppc64.ACMP)
491 p.From.Type = obj.TYPE_REG
493 p.To.Type = obj.TYPE_CONST
496 pbahead := s.Prog(ppc64.ABEQ)
497 pbahead.To.Type = obj.TYPE_BRANCH
499 p = s.Prog(v.Op.Asm())
500 p.From.Type = obj.TYPE_REG
503 p.To.Type = obj.TYPE_REG
506 pbover := s.Prog(obj.AJMP)
507 pbover.To.Type = obj.TYPE_BRANCH
509 p = s.Prog(ppc64.ANEG)
510 p.To.Type = obj.TYPE_REG
512 p.From.Type = obj.TYPE_REG
514 pbahead.To.SetTarget(p)
517 pbover.To.SetTarget(p)
519 case ssa.OpPPC64DIVW:
520 // word-width version of above
522 r0 := v.Args[0].Reg()
523 r1 := v.Args[1].Reg()
525 p := s.Prog(ppc64.ACMPW)
526 p.From.Type = obj.TYPE_REG
528 p.To.Type = obj.TYPE_CONST
531 pbahead := s.Prog(ppc64.ABEQ)
532 pbahead.To.Type = obj.TYPE_BRANCH
534 p = s.Prog(v.Op.Asm())
535 p.From.Type = obj.TYPE_REG
538 p.To.Type = obj.TYPE_REG
541 pbover := s.Prog(obj.AJMP)
542 pbover.To.Type = obj.TYPE_BRANCH
544 p = s.Prog(ppc64.ANEG)
545 p.To.Type = obj.TYPE_REG
547 p.From.Type = obj.TYPE_REG
549 pbahead.To.SetTarget(p)
552 pbover.To.SetTarget(p)
554 case ssa.OpPPC64CLRLSLWI:
556 r1 := v.Args[0].Reg()
558 p := s.Prog(v.Op.Asm())
559 // clrlslwi ra,rs,mb,sh will become rlwinm ra,rs,sh,mb-sh,31-sh as described in ISA
560 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
561 p.AddRestSourceConst(ssa.GetPPC64Shiftsh(shifts))
563 p.To.Type = obj.TYPE_REG
566 case ssa.OpPPC64CLRLSLDI:
568 r1 := v.Args[0].Reg()
570 p := s.Prog(v.Op.Asm())
571 // clrlsldi ra,rs,mb,sh will become rldic ra,rs,sh,mb-sh
572 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
573 p.AddRestSourceConst(ssa.GetPPC64Shiftsh(shifts))
575 p.To.Type = obj.TYPE_REG
578 case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS,
579 ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU,
580 ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW,
581 ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
582 ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
583 ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
584 ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV,
585 ssa.OpPPC64MODUD, ssa.OpPPC64MODSD, ssa.OpPPC64MODUW, ssa.OpPPC64MODSW:
587 r1 := v.Args[0].Reg()
588 r2 := v.Args[1].Reg()
589 p := s.Prog(v.Op.Asm())
590 p.From.Type = obj.TYPE_REG
593 p.To.Type = obj.TYPE_REG
596 case ssa.OpPPC64ANDCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC:
597 r1 := v.Args[0].Reg()
598 r2 := v.Args[1].Reg()
599 p := s.Prog(v.Op.Asm())
600 p.From.Type = obj.TYPE_REG
603 p.To.Type = obj.TYPE_REG
606 case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
607 p := s.Prog(v.Op.Asm())
608 p.From.Type = obj.TYPE_CONST
609 p.From.Offset = v.AuxInt
610 p.Reg = v.Args[0].Reg()
611 p.To.Type = obj.TYPE_REG
614 // Auxint holds encoded rotate + mask
615 case ssa.OpPPC64RLWINM, ssa.OpPPC64RLWMI:
616 sh, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
617 p := s.Prog(v.Op.Asm())
618 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
619 p.Reg = v.Args[0].Reg()
620 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: int64(sh)}
621 p.AddRestSourceArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}})
624 case ssa.OpPPC64RLDICL, ssa.OpPPC64RLDICR:
625 sh, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
626 p := s.Prog(v.Op.Asm())
627 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: sh}
629 case ssa.OpPPC64RLDICL:
630 p.AddRestSourceConst(mb)
631 case ssa.OpPPC64RLDICR:
632 p.AddRestSourceConst(me)
634 p.Reg = v.Args[0].Reg()
635 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
637 case ssa.OpPPC64RLWNM:
638 _, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
639 p := s.Prog(v.Op.Asm())
640 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
641 p.Reg = v.Args[0].Reg()
642 p.From = obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[1].Reg()}
643 p.AddRestSourceArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}})
645 case ssa.OpPPC64MADDLD:
647 r1 := v.Args[0].Reg()
648 r2 := v.Args[1].Reg()
649 r3 := v.Args[2].Reg()
651 p := s.Prog(v.Op.Asm())
652 p.From.Type = obj.TYPE_REG
655 p.AddRestSourceReg(r3)
656 p.To.Type = obj.TYPE_REG
659 case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
661 r1 := v.Args[0].Reg()
662 r2 := v.Args[1].Reg()
663 r3 := v.Args[2].Reg()
665 p := s.Prog(v.Op.Asm())
666 p.From.Type = obj.TYPE_REG
669 p.AddRestSourceReg(r2)
670 p.To.Type = obj.TYPE_REG
673 case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL,
674 ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW,
675 ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS,
676 ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD, ssa.OpPPC64BRH, ssa.OpPPC64BRW, ssa.OpPPC64BRD:
678 p := s.Prog(v.Op.Asm())
679 p.To.Type = obj.TYPE_REG
681 p.From.Type = obj.TYPE_REG
682 p.From.Reg = v.Args[0].Reg()
684 case ssa.OpPPC64ADDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst,
685 ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst,
686 ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst, ssa.OpPPC64EXTSWSLconst, ssa.OpPPC64MULLWconst, ssa.OpPPC64MULLDconst:
687 p := s.Prog(v.Op.Asm())
688 p.Reg = v.Args[0].Reg()
689 p.From.Type = obj.TYPE_CONST
690 p.From.Offset = v.AuxInt
691 p.To.Type = obj.TYPE_REG
694 case ssa.OpPPC64ADDC, ssa.OpPPC64ADDE, ssa.OpPPC64SUBC, ssa.OpPPC64SUBE:
695 r := v.Reg0() // CA is the first, implied argument.
696 r1 := v.Args[0].Reg()
697 r2 := v.Args[1].Reg()
698 p := s.Prog(v.Op.Asm())
699 p.From.Type = obj.TYPE_REG
702 p.To.Type = obj.TYPE_REG
705 case ssa.OpPPC64ADDZEzero, ssa.OpPPC64SUBZEzero:
706 p := s.Prog(v.Op.Asm())
707 p.From.Type = obj.TYPE_REG
708 p.From.Reg = ppc64.REG_R0
709 p.To.Type = obj.TYPE_REG
712 case ssa.OpPPC64ADDCconst:
713 p := s.Prog(v.Op.Asm())
714 p.Reg = v.Args[0].Reg()
715 p.From.Type = obj.TYPE_CONST
716 p.From.Offset = v.AuxInt
717 p.To.Type = obj.TYPE_REG
718 // Output is a pair, the second is the CA, which is implied.
721 case ssa.OpPPC64SUBCconst:
722 p := s.Prog(v.Op.Asm())
723 p.AddRestSourceConst(v.AuxInt)
724 p.From.Type = obj.TYPE_REG
725 p.From.Reg = v.Args[0].Reg()
726 p.To.Type = obj.TYPE_REG
729 case ssa.OpPPC64SUBFCconst:
730 p := s.Prog(v.Op.Asm())
731 p.AddRestSourceConst(v.AuxInt)
732 p.From.Type = obj.TYPE_REG
733 p.From.Reg = v.Args[0].Reg()
734 p.To.Type = obj.TYPE_REG
737 case ssa.OpPPC64ANDCCconst:
738 p := s.Prog(v.Op.Asm())
739 p.Reg = v.Args[0].Reg()
740 p.From.Type = obj.TYPE_CONST
741 p.From.Offset = v.AuxInt
742 p.To.Type = obj.TYPE_REG
743 // p.To.Reg = ppc64.REGTMP // discard result
746 case ssa.OpPPC64MOVDaddr:
747 switch v.Aux.(type) {
749 v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux)
751 // If aux offset and aux int are both 0, and the same
752 // input and output regs are used, no instruction
753 // needs to be generated, since it would just be
755 if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() {
756 p := s.Prog(ppc64.AMOVD)
757 p.From.Type = obj.TYPE_ADDR
758 p.From.Reg = v.Args[0].Reg()
759 p.From.Offset = v.AuxInt
760 p.To.Type = obj.TYPE_REG
764 case *obj.LSym, ir.Node:
765 p := s.Prog(ppc64.AMOVD)
766 p.From.Type = obj.TYPE_ADDR
767 p.From.Reg = v.Args[0].Reg()
768 p.To.Type = obj.TYPE_REG
770 ssagen.AddAux(&p.From, v)
774 case ssa.OpPPC64MOVDconst:
775 p := s.Prog(v.Op.Asm())
776 p.From.Type = obj.TYPE_CONST
777 p.From.Offset = v.AuxInt
778 p.To.Type = obj.TYPE_REG
781 case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst:
782 p := s.Prog(v.Op.Asm())
783 p.From.Type = obj.TYPE_FCONST
784 p.From.Val = math.Float64frombits(uint64(v.AuxInt))
785 p.To.Type = obj.TYPE_REG
788 case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU:
789 p := s.Prog(v.Op.Asm())
790 p.From.Type = obj.TYPE_REG
791 p.From.Reg = v.Args[0].Reg()
792 p.To.Type = obj.TYPE_REG
793 p.To.Reg = v.Args[1].Reg()
795 case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst:
796 p := s.Prog(v.Op.Asm())
797 p.From.Type = obj.TYPE_REG
798 p.From.Reg = v.Args[0].Reg()
799 p.To.Type = obj.TYPE_CONST
800 p.To.Offset = v.AuxInt
802 case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg:
803 // Shift in register to required size
804 p := s.Prog(v.Op.Asm())
805 p.From.Type = obj.TYPE_REG
806 p.From.Reg = v.Args[0].Reg()
808 p.To.Type = obj.TYPE_REG
810 case ssa.OpPPC64MOVDload, ssa.OpPPC64MOVWload:
812 // MOVDload and MOVWload are DS form instructions that are restricted to
813 // offsets that are a multiple of 4. If the offset is not a multiple of 4,
814 // then the address of the symbol to be loaded is computed (base + offset)
815 // and used as the new base register and the offset field in the instruction
816 // can be set to zero.
818 // This same problem can happen with gostrings since the final offset is not
819 // known yet, but could be unaligned after the relocation is resolved.
820 // So gostrings are handled the same way.
822 // This allows the MOVDload and MOVWload to be generated in more cases and
823 // eliminates some offset and alignment checking in the rules file.
825 fromAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
826 ssagen.AddAux(&fromAddr, v)
830 switch fromAddr.Name {
831 case obj.NAME_EXTERN, obj.NAME_STATIC:
832 // Special case for a rule combines the bytes of gostring.
833 // The v alignment might seem OK, but we don't want to load it
834 // using an offset because relocation comes later.
835 genAddr = strings.HasPrefix(fromAddr.Sym.Name, "go:string") || v.Type.Alignment()%4 != 0 || fromAddr.Offset%4 != 0
837 genAddr = fromAddr.Offset%4 != 0
840 // Load full address into the temp register.
841 p := s.Prog(ppc64.AMOVD)
842 p.From.Type = obj.TYPE_ADDR
843 p.From.Reg = v.Args[0].Reg()
844 ssagen.AddAux(&p.From, v)
845 // Load target using temp as base register
846 // and offset zero. Setting NAME_NONE
847 // prevents any extra offsets from being
849 p.To.Type = obj.TYPE_REG
850 p.To.Reg = ppc64.REGTMP
851 fromAddr.Reg = ppc64.REGTMP
852 // Clear the offset field and other
853 // information that might be used
854 // by the assembler to add to the
855 // final offset value.
857 fromAddr.Name = obj.NAME_NONE
860 p := s.Prog(v.Op.Asm())
862 p.To.Type = obj.TYPE_REG
865 case ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
866 p := s.Prog(v.Op.Asm())
867 p.From.Type = obj.TYPE_MEM
868 p.From.Reg = v.Args[0].Reg()
869 ssagen.AddAux(&p.From, v)
870 p.To.Type = obj.TYPE_REG
873 case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload:
874 p := s.Prog(v.Op.Asm())
875 p.From.Type = obj.TYPE_MEM
876 p.From.Reg = v.Args[0].Reg()
877 p.To.Type = obj.TYPE_REG
880 case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore:
881 p := s.Prog(v.Op.Asm())
882 p.To.Type = obj.TYPE_MEM
883 p.To.Reg = v.Args[0].Reg()
884 p.From.Type = obj.TYPE_REG
885 p.From.Reg = v.Args[1].Reg()
887 case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx,
888 ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx,
889 ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx:
890 p := s.Prog(v.Op.Asm())
891 p.From.Type = obj.TYPE_MEM
892 p.From.Reg = v.Args[0].Reg()
893 p.From.Index = v.Args[1].Reg()
894 p.To.Type = obj.TYPE_REG
897 case ssa.OpPPC64DCBT:
898 p := s.Prog(v.Op.Asm())
899 p.From.Type = obj.TYPE_MEM
900 p.From.Reg = v.Args[0].Reg()
901 p.To.Type = obj.TYPE_CONST
902 p.To.Offset = v.AuxInt
904 case ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero:
905 p := s.Prog(v.Op.Asm())
906 p.From.Type = obj.TYPE_REG
907 p.From.Reg = ppc64.REGZERO
908 p.To.Type = obj.TYPE_MEM
909 p.To.Reg = v.Args[0].Reg()
910 ssagen.AddAux(&p.To, v)
912 case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVDstorezero:
914 // MOVDstore and MOVDstorezero become DS form instructions that are restricted
915 // to offset values that are a multiple of 4. If the offset field is not a
916 // multiple of 4, then the full address of the store target is computed (base +
917 // offset) and used as the new base register and the offset in the instruction
920 // This allows the MOVDstore and MOVDstorezero to be generated in more cases,
921 // and prevents checking of the offset value and alignment in the rules.
923 toAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
924 ssagen.AddAux(&toAddr, v)
926 if toAddr.Offset%4 != 0 {
927 p := s.Prog(ppc64.AMOVD)
928 p.From.Type = obj.TYPE_ADDR
929 p.From.Reg = v.Args[0].Reg()
930 ssagen.AddAux(&p.From, v)
931 p.To.Type = obj.TYPE_REG
932 p.To.Reg = ppc64.REGTMP
933 toAddr.Reg = ppc64.REGTMP
934 // Clear the offset field and other
935 // information that might be used
936 // by the assembler to add to the
937 // final offset value.
939 toAddr.Name = obj.NAME_NONE
942 p := s.Prog(v.Op.Asm())
944 p.From.Type = obj.TYPE_REG
945 if v.Op == ssa.OpPPC64MOVDstorezero {
946 p.From.Reg = ppc64.REGZERO
948 p.From.Reg = v.Args[1].Reg()
951 case ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore:
952 p := s.Prog(v.Op.Asm())
953 p.From.Type = obj.TYPE_REG
954 p.From.Reg = v.Args[1].Reg()
955 p.To.Type = obj.TYPE_MEM
956 p.To.Reg = v.Args[0].Reg()
957 ssagen.AddAux(&p.To, v)
959 case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx,
960 ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx,
961 ssa.OpPPC64MOVHBRstoreidx:
962 p := s.Prog(v.Op.Asm())
963 p.From.Type = obj.TYPE_REG
964 p.From.Reg = v.Args[2].Reg()
965 p.To.Index = v.Args[1].Reg()
966 p.To.Type = obj.TYPE_MEM
967 p.To.Reg = v.Args[0].Reg()
969 case ssa.OpPPC64ISEL, ssa.OpPPC64ISELZ:
970 // ISEL AuxInt ? arg0 : arg1
971 // ISELZ is a special case of ISEL where arg1 is implicitly $0.
973 // AuxInt value indicates conditions 0=LT 1=GT 2=EQ 3=SO 4=GE 5=LE 6=NE 7=NSO.
974 // ISEL accepts a CR bit argument, not a condition as expressed by AuxInt.
975 // Convert the condition to a CR bit argument by the following conversion:
977 // AuxInt&3 ? arg0 : arg1 for conditions LT, GT, EQ, SO
978 // AuxInt&3 ? arg1 : arg0 for conditions GE, LE, NE, NSO
979 p := s.Prog(v.Op.Asm())
980 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
981 p.Reg = v.Args[0].Reg()
982 if v.Op == ssa.OpPPC64ISEL {
983 p.AddRestSourceReg(v.Args[1].Reg())
985 p.AddRestSourceReg(ppc64.REG_R0)
987 // AuxInt values 4,5,6 implemented with reverse operand order from 0,1,2
989 p.Reg, p.GetFrom3().Reg = p.GetFrom3().Reg, p.Reg
991 p.From.SetConst(v.AuxInt & 3)
993 case ssa.OpPPC64SETBC, ssa.OpPPC64SETBCR:
994 p := s.Prog(v.Op.Asm())
995 p.To.Type = obj.TYPE_REG
997 p.From.Type = obj.TYPE_REG
998 p.From.Reg = int16(ppc64.REG_CR0LT + v.AuxInt)
1000 case ssa.OpPPC64LoweredQuadZero, ssa.OpPPC64LoweredQuadZeroShort:
1001 // The LoweredQuad code generation
1002 // generates STXV instructions on
1003 // power9. The Short variation is used
1004 // if no loop is generated.
1006 // sizes >= 64 generate a loop as follows:
1008 // Set up loop counter in CTR, used by BC
1009 // XXLXOR clears VS32
1010 // XXLXOR VS32,VS32,VS32
1011 // MOVD len/64,REG_TMP
1015 // STXV VS32,16(R20)
1016 // STXV VS32,32(R20)
1017 // STXV VS32,48(R20)
1021 // Bytes per iteration
1022 ctr := v.AuxInt / 64
1025 rem := v.AuxInt % 64
1027 // Only generate a loop if there is more
1028 // than 1 iteration.
1030 // Set up VS32 (V0) to hold 0s
1031 p := s.Prog(ppc64.AXXLXOR)
1032 p.From.Type = obj.TYPE_REG
1033 p.From.Reg = ppc64.REG_VS32
1034 p.To.Type = obj.TYPE_REG
1035 p.To.Reg = ppc64.REG_VS32
1036 p.Reg = ppc64.REG_VS32
1038 // Set up CTR loop counter
1039 p = s.Prog(ppc64.AMOVD)
1040 p.From.Type = obj.TYPE_CONST
1042 p.To.Type = obj.TYPE_REG
1043 p.To.Reg = ppc64.REGTMP
1045 p = s.Prog(ppc64.AMOVD)
1046 p.From.Type = obj.TYPE_REG
1047 p.From.Reg = ppc64.REGTMP
1048 p.To.Type = obj.TYPE_REG
1049 p.To.Reg = ppc64.REG_CTR
1051 // Don't generate padding for
1052 // loops with few iterations.
1054 p = s.Prog(obj.APCALIGN)
1055 p.From.Type = obj.TYPE_CONST
1059 // generate 4 STXVs to zero 64 bytes
1062 p = s.Prog(ppc64.ASTXV)
1063 p.From.Type = obj.TYPE_REG
1064 p.From.Reg = ppc64.REG_VS32
1065 p.To.Type = obj.TYPE_MEM
1066 p.To.Reg = v.Args[0].Reg()
1068 // Save the top of loop
1072 p = s.Prog(ppc64.ASTXV)
1073 p.From.Type = obj.TYPE_REG
1074 p.From.Reg = ppc64.REG_VS32
1075 p.To.Type = obj.TYPE_MEM
1076 p.To.Reg = v.Args[0].Reg()
1079 p = s.Prog(ppc64.ASTXV)
1080 p.From.Type = obj.TYPE_REG
1081 p.From.Reg = ppc64.REG_VS32
1082 p.To.Type = obj.TYPE_MEM
1083 p.To.Reg = v.Args[0].Reg()
1086 p = s.Prog(ppc64.ASTXV)
1087 p.From.Type = obj.TYPE_REG
1088 p.From.Reg = ppc64.REG_VS32
1089 p.To.Type = obj.TYPE_MEM
1090 p.To.Reg = v.Args[0].Reg()
1093 // Increment address for the
1094 // 64 bytes just zeroed.
1095 p = s.Prog(ppc64.AADD)
1096 p.Reg = v.Args[0].Reg()
1097 p.From.Type = obj.TYPE_CONST
1099 p.To.Type = obj.TYPE_REG
1100 p.To.Reg = v.Args[0].Reg()
1102 // Branch back to top of loop
1104 // BC with BO_BCTR generates bdnz
1105 p = s.Prog(ppc64.ABC)
1106 p.From.Type = obj.TYPE_CONST
1107 p.From.Offset = ppc64.BO_BCTR
1108 p.Reg = ppc64.REG_CR0LT
1109 p.To.Type = obj.TYPE_BRANCH
1112 // When ctr == 1 the loop was not generated but
1113 // there are at least 64 bytes to clear, so add
1114 // that to the remainder to generate the code
1115 // to clear those doublewords
1120 // Clear the remainder starting at offset zero
1123 if rem >= 16 && ctr <= 1 {
1124 // If the XXLXOR hasn't already been
1125 // generated, do it here to initialize
1127 p := s.Prog(ppc64.AXXLXOR)
1128 p.From.Type = obj.TYPE_REG
1129 p.From.Reg = ppc64.REG_VS32
1130 p.To.Type = obj.TYPE_REG
1131 p.To.Reg = ppc64.REG_VS32
1132 p.Reg = ppc64.REG_VS32
1134 // Generate STXV for 32 or 64
1137 p := s.Prog(ppc64.ASTXV)
1138 p.From.Type = obj.TYPE_REG
1139 p.From.Reg = ppc64.REG_VS32
1140 p.To.Type = obj.TYPE_MEM
1141 p.To.Reg = v.Args[0].Reg()
1142 p.To.Offset = offset
1144 p = s.Prog(ppc64.ASTXV)
1145 p.From.Type = obj.TYPE_REG
1146 p.From.Reg = ppc64.REG_VS32
1147 p.To.Type = obj.TYPE_MEM
1148 p.To.Reg = v.Args[0].Reg()
1149 p.To.Offset = offset + 16
1153 // Generate 16 bytes
1155 p := s.Prog(ppc64.ASTXV)
1156 p.From.Type = obj.TYPE_REG
1157 p.From.Reg = ppc64.REG_VS32
1158 p.To.Type = obj.TYPE_MEM
1159 p.To.Reg = v.Args[0].Reg()
1160 p.To.Offset = offset
1165 // first clear as many doublewords as possible
1166 // then clear remaining sizes as available
1168 op, size := ppc64.AMOVB, int64(1)
1171 op, size = ppc64.AMOVD, 8
1173 op, size = ppc64.AMOVW, 4
1175 op, size = ppc64.AMOVH, 2
1178 p.From.Type = obj.TYPE_REG
1179 p.From.Reg = ppc64.REG_R0
1180 p.To.Type = obj.TYPE_MEM
1181 p.To.Reg = v.Args[0].Reg()
1182 p.To.Offset = offset
1187 case ssa.OpPPC64LoweredZero, ssa.OpPPC64LoweredZeroShort:
1189 // Unaligned data doesn't hurt performance
1190 // for these instructions on power8.
1192 // For sizes >= 64 generate a loop as follows:
1194 // Set up loop counter in CTR, used by BC
1195 // XXLXOR VS32,VS32,VS32
1196 // MOVD len/32,REG_TMP
1200 // STXVD2X VS32,(R0)(R20)
1201 // STXVD2X VS32,(R31)(R20)
1205 // any remainder is done as described below
1207 // for sizes < 64 bytes, first clear as many doublewords as possible,
1208 // then handle the remainder
1213 // the remainder bytes are cleared using one or more
1214 // of the following instructions with the appropriate
1215 // offsets depending which instructions are needed
1217 // MOVW R0,n1(R20) 4 bytes
1218 // MOVH R0,n2(R20) 2 bytes
1219 // MOVB R0,n3(R20) 1 byte
1221 // 7 bytes: MOVW, MOVH, MOVB
1222 // 6 bytes: MOVW, MOVH
1223 // 5 bytes: MOVW, MOVB
1224 // 3 bytes: MOVH, MOVB
1226 // each loop iteration does 32 bytes
1227 ctr := v.AuxInt / 32
1230 rem := v.AuxInt % 32
1232 // only generate a loop if there is more
1233 // than 1 iteration.
1235 // Set up VS32 (V0) to hold 0s
1236 p := s.Prog(ppc64.AXXLXOR)
1237 p.From.Type = obj.TYPE_REG
1238 p.From.Reg = ppc64.REG_VS32
1239 p.To.Type = obj.TYPE_REG
1240 p.To.Reg = ppc64.REG_VS32
1241 p.Reg = ppc64.REG_VS32
1243 // Set up CTR loop counter
1244 p = s.Prog(ppc64.AMOVD)
1245 p.From.Type = obj.TYPE_CONST
1247 p.To.Type = obj.TYPE_REG
1248 p.To.Reg = ppc64.REGTMP
1250 p = s.Prog(ppc64.AMOVD)
1251 p.From.Type = obj.TYPE_REG
1252 p.From.Reg = ppc64.REGTMP
1253 p.To.Type = obj.TYPE_REG
1254 p.To.Reg = ppc64.REG_CTR
1256 // Set up R31 to hold index value 16
1257 p = s.Prog(ppc64.AMOVD)
1258 p.From.Type = obj.TYPE_CONST
1260 p.To.Type = obj.TYPE_REG
1261 p.To.Reg = ppc64.REGTMP
1263 // Don't add padding for alignment
1264 // with few loop iterations.
1266 p = s.Prog(obj.APCALIGN)
1267 p.From.Type = obj.TYPE_CONST
1271 // generate 2 STXVD2Xs to store 16 bytes
1272 // when this is a loop then the top must be saved
1274 // This is the top of loop
1276 p = s.Prog(ppc64.ASTXVD2X)
1277 p.From.Type = obj.TYPE_REG
1278 p.From.Reg = ppc64.REG_VS32
1279 p.To.Type = obj.TYPE_MEM
1280 p.To.Reg = v.Args[0].Reg()
1281 p.To.Index = ppc64.REGZERO
1282 // Save the top of loop
1286 p = s.Prog(ppc64.ASTXVD2X)
1287 p.From.Type = obj.TYPE_REG
1288 p.From.Reg = ppc64.REG_VS32
1289 p.To.Type = obj.TYPE_MEM
1290 p.To.Reg = v.Args[0].Reg()
1291 p.To.Index = ppc64.REGTMP
1293 // Increment address for the
1294 // 4 doublewords just zeroed.
1295 p = s.Prog(ppc64.AADD)
1296 p.Reg = v.Args[0].Reg()
1297 p.From.Type = obj.TYPE_CONST
1299 p.To.Type = obj.TYPE_REG
1300 p.To.Reg = v.Args[0].Reg()
1302 // Branch back to top of loop
1304 // BC with BO_BCTR generates bdnz
1305 p = s.Prog(ppc64.ABC)
1306 p.From.Type = obj.TYPE_CONST
1307 p.From.Offset = ppc64.BO_BCTR
1308 p.Reg = ppc64.REG_CR0LT
1309 p.To.Type = obj.TYPE_BRANCH
1313 // when ctr == 1 the loop was not generated but
1314 // there are at least 32 bytes to clear, so add
1315 // that to the remainder to generate the code
1316 // to clear those doublewords
1321 // clear the remainder starting at offset zero
1324 // first clear as many doublewords as possible
1325 // then clear remaining sizes as available
1327 op, size := ppc64.AMOVB, int64(1)
1330 op, size = ppc64.AMOVD, 8
1332 op, size = ppc64.AMOVW, 4
1334 op, size = ppc64.AMOVH, 2
1337 p.From.Type = obj.TYPE_REG
1338 p.From.Reg = ppc64.REG_R0
1339 p.To.Type = obj.TYPE_MEM
1340 p.To.Reg = v.Args[0].Reg()
1341 p.To.Offset = offset
1346 case ssa.OpPPC64LoweredMove, ssa.OpPPC64LoweredMoveShort:
1348 bytesPerLoop := int64(32)
1349 // This will be used when moving more
1350 // than 8 bytes. Moves start with
1351 // as many 8 byte moves as possible, then
1352 // 4, 2, or 1 byte(s) as remaining. This will
1353 // work and be efficient for power8 or later.
1354 // If there are 64 or more bytes, then a
1355 // loop is generated to move 32 bytes and
1356 // update the src and dst addresses on each
1357 // iteration. When < 64 bytes, the appropriate
1358 // number of moves are generated based on the
1360 // When moving >= 64 bytes a loop is used
1361 // MOVD len/32,REG_TMP
1365 // LXVD2X (R0)(R21),VS32
1366 // LXVD2X (R31)(R21),VS33
1368 // STXVD2X VS32,(R0)(R20)
1369 // STXVD2X VS33,(R31)(R20)
1372 // Bytes not moved by this loop are moved
1373 // with a combination of the following instructions,
1374 // starting with the largest sizes and generating as
1375 // many as needed, using the appropriate offset value.
1385 // Each loop iteration moves 32 bytes
1386 ctr := v.AuxInt / bytesPerLoop
1388 // Remainder after the loop
1389 rem := v.AuxInt % bytesPerLoop
1391 dstReg := v.Args[0].Reg()
1392 srcReg := v.Args[1].Reg()
1394 // The set of registers used here, must match the clobbered reg list
1400 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1403 p := s.Prog(ppc64.AMOVD)
1404 p.From.Type = obj.TYPE_CONST
1406 p.To.Type = obj.TYPE_REG
1407 p.To.Reg = ppc64.REGTMP
1409 p = s.Prog(ppc64.AMOVD)
1410 p.From.Type = obj.TYPE_REG
1411 p.From.Reg = ppc64.REGTMP
1412 p.To.Type = obj.TYPE_REG
1413 p.To.Reg = ppc64.REG_CTR
1415 // Use REGTMP as index reg
1416 p = s.Prog(ppc64.AMOVD)
1417 p.From.Type = obj.TYPE_CONST
1419 p.To.Type = obj.TYPE_REG
1420 p.To.Reg = ppc64.REGTMP
1422 // Don't adding padding for
1423 // alignment with small iteration
1426 p = s.Prog(obj.APCALIGN)
1427 p.From.Type = obj.TYPE_CONST
1431 // Generate 16 byte loads and stores.
1432 // Use temp register for index (16)
1433 // on the second one.
1435 p = s.Prog(ppc64.ALXVD2X)
1436 p.From.Type = obj.TYPE_MEM
1438 p.From.Index = ppc64.REGZERO
1439 p.To.Type = obj.TYPE_REG
1440 p.To.Reg = ppc64.REG_VS32
1444 p = s.Prog(ppc64.ALXVD2X)
1445 p.From.Type = obj.TYPE_MEM
1447 p.From.Index = ppc64.REGTMP
1448 p.To.Type = obj.TYPE_REG
1449 p.To.Reg = ppc64.REG_VS33
1451 // increment the src reg for next iteration
1452 p = s.Prog(ppc64.AADD)
1454 p.From.Type = obj.TYPE_CONST
1455 p.From.Offset = bytesPerLoop
1456 p.To.Type = obj.TYPE_REG
1459 // generate 16 byte stores
1460 p = s.Prog(ppc64.ASTXVD2X)
1461 p.From.Type = obj.TYPE_REG
1462 p.From.Reg = ppc64.REG_VS32
1463 p.To.Type = obj.TYPE_MEM
1465 p.To.Index = ppc64.REGZERO
1467 p = s.Prog(ppc64.ASTXVD2X)
1468 p.From.Type = obj.TYPE_REG
1469 p.From.Reg = ppc64.REG_VS33
1470 p.To.Type = obj.TYPE_MEM
1472 p.To.Index = ppc64.REGTMP
1474 // increment the dst reg for next iteration
1475 p = s.Prog(ppc64.AADD)
1477 p.From.Type = obj.TYPE_CONST
1478 p.From.Offset = bytesPerLoop
1479 p.To.Type = obj.TYPE_REG
1482 // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1484 p = s.Prog(ppc64.ABC)
1485 p.From.Type = obj.TYPE_CONST
1486 p.From.Offset = ppc64.BO_BCTR
1487 p.Reg = ppc64.REG_CR0LT
1488 p.To.Type = obj.TYPE_BRANCH
1491 // srcReg and dstReg were incremented in the loop, so
1492 // later instructions start with offset 0.
1496 // No loop was generated for one iteration, so
1497 // add 32 bytes to the remainder to move those bytes.
1503 // Generate 16 byte loads and stores.
1504 // Use temp register for index (value 16)
1505 // on the second one.
1506 p := s.Prog(ppc64.ALXVD2X)
1507 p.From.Type = obj.TYPE_MEM
1509 p.From.Index = ppc64.REGZERO
1510 p.To.Type = obj.TYPE_REG
1511 p.To.Reg = ppc64.REG_VS32
1513 p = s.Prog(ppc64.ASTXVD2X)
1514 p.From.Type = obj.TYPE_REG
1515 p.From.Reg = ppc64.REG_VS32
1516 p.To.Type = obj.TYPE_MEM
1518 p.To.Index = ppc64.REGZERO
1524 // Use REGTMP as index reg
1525 p := s.Prog(ppc64.AMOVD)
1526 p.From.Type = obj.TYPE_CONST
1528 p.To.Type = obj.TYPE_REG
1529 p.To.Reg = ppc64.REGTMP
1531 p = s.Prog(ppc64.ALXVD2X)
1532 p.From.Type = obj.TYPE_MEM
1534 p.From.Index = ppc64.REGTMP
1535 p.To.Type = obj.TYPE_REG
1536 p.To.Reg = ppc64.REG_VS32
1538 p = s.Prog(ppc64.ASTXVD2X)
1539 p.From.Type = obj.TYPE_REG
1540 p.From.Reg = ppc64.REG_VS32
1541 p.To.Type = obj.TYPE_MEM
1543 p.To.Index = ppc64.REGTMP
1550 // Generate all the remaining load and store pairs, starting with
1551 // as many 8 byte moves as possible, then 4, 2, 1.
1553 op, size := ppc64.AMOVB, int64(1)
1556 op, size = ppc64.AMOVD, 8
1558 op, size = ppc64.AMOVWZ, 4
1560 op, size = ppc64.AMOVH, 2
1564 p.To.Type = obj.TYPE_REG
1565 p.To.Reg = ppc64.REGTMP
1566 p.From.Type = obj.TYPE_MEM
1568 p.From.Offset = offset
1572 p.From.Type = obj.TYPE_REG
1573 p.From.Reg = ppc64.REGTMP
1574 p.To.Type = obj.TYPE_MEM
1576 p.To.Offset = offset
1581 case ssa.OpPPC64LoweredQuadMove, ssa.OpPPC64LoweredQuadMoveShort:
1582 bytesPerLoop := int64(64)
1583 // This is used when moving more
1584 // than 8 bytes on power9. Moves start with
1585 // as many 8 byte moves as possible, then
1586 // 4, 2, or 1 byte(s) as remaining. This will
1587 // work and be efficient for power8 or later.
1588 // If there are 64 or more bytes, then a
1589 // loop is generated to move 32 bytes and
1590 // update the src and dst addresses on each
1591 // iteration. When < 64 bytes, the appropriate
1592 // number of moves are generated based on the
1594 // When moving >= 64 bytes a loop is used
1595 // MOVD len/32,REG_TMP
1602 // STXV VS33,16(R20)
1605 // Bytes not moved by this loop are moved
1606 // with a combination of the following instructions,
1607 // starting with the largest sizes and generating as
1608 // many as needed, using the appropriate offset value.
1618 // Each loop iteration moves 32 bytes
1619 ctr := v.AuxInt / bytesPerLoop
1621 // Remainder after the loop
1622 rem := v.AuxInt % bytesPerLoop
1624 dstReg := v.Args[0].Reg()
1625 srcReg := v.Args[1].Reg()
1632 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1635 p := s.Prog(ppc64.AMOVD)
1636 p.From.Type = obj.TYPE_CONST
1638 p.To.Type = obj.TYPE_REG
1639 p.To.Reg = ppc64.REGTMP
1641 p = s.Prog(ppc64.AMOVD)
1642 p.From.Type = obj.TYPE_REG
1643 p.From.Reg = ppc64.REGTMP
1644 p.To.Type = obj.TYPE_REG
1645 p.To.Reg = ppc64.REG_CTR
1647 p = s.Prog(obj.APCALIGN)
1648 p.From.Type = obj.TYPE_CONST
1651 // Generate 16 byte loads and stores.
1652 p = s.Prog(ppc64.ALXV)
1653 p.From.Type = obj.TYPE_MEM
1655 p.From.Offset = offset
1656 p.To.Type = obj.TYPE_REG
1657 p.To.Reg = ppc64.REG_VS32
1661 p = s.Prog(ppc64.ALXV)
1662 p.From.Type = obj.TYPE_MEM
1664 p.From.Offset = offset + 16
1665 p.To.Type = obj.TYPE_REG
1666 p.To.Reg = ppc64.REG_VS33
1668 // generate 16 byte stores
1669 p = s.Prog(ppc64.ASTXV)
1670 p.From.Type = obj.TYPE_REG
1671 p.From.Reg = ppc64.REG_VS32
1672 p.To.Type = obj.TYPE_MEM
1674 p.To.Offset = offset
1676 p = s.Prog(ppc64.ASTXV)
1677 p.From.Type = obj.TYPE_REG
1678 p.From.Reg = ppc64.REG_VS33
1679 p.To.Type = obj.TYPE_MEM
1681 p.To.Offset = offset + 16
1683 // Generate 16 byte loads and stores.
1684 p = s.Prog(ppc64.ALXV)
1685 p.From.Type = obj.TYPE_MEM
1687 p.From.Offset = offset + 32
1688 p.To.Type = obj.TYPE_REG
1689 p.To.Reg = ppc64.REG_VS32
1691 p = s.Prog(ppc64.ALXV)
1692 p.From.Type = obj.TYPE_MEM
1694 p.From.Offset = offset + 48
1695 p.To.Type = obj.TYPE_REG
1696 p.To.Reg = ppc64.REG_VS33
1698 // generate 16 byte stores
1699 p = s.Prog(ppc64.ASTXV)
1700 p.From.Type = obj.TYPE_REG
1701 p.From.Reg = ppc64.REG_VS32
1702 p.To.Type = obj.TYPE_MEM
1704 p.To.Offset = offset + 32
1706 p = s.Prog(ppc64.ASTXV)
1707 p.From.Type = obj.TYPE_REG
1708 p.From.Reg = ppc64.REG_VS33
1709 p.To.Type = obj.TYPE_MEM
1711 p.To.Offset = offset + 48
1713 // increment the src reg for next iteration
1714 p = s.Prog(ppc64.AADD)
1716 p.From.Type = obj.TYPE_CONST
1717 p.From.Offset = bytesPerLoop
1718 p.To.Type = obj.TYPE_REG
1721 // increment the dst reg for next iteration
1722 p = s.Prog(ppc64.AADD)
1724 p.From.Type = obj.TYPE_CONST
1725 p.From.Offset = bytesPerLoop
1726 p.To.Type = obj.TYPE_REG
1729 // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1731 p = s.Prog(ppc64.ABC)
1732 p.From.Type = obj.TYPE_CONST
1733 p.From.Offset = ppc64.BO_BCTR
1734 p.Reg = ppc64.REG_CR0LT
1735 p.To.Type = obj.TYPE_BRANCH
1738 // srcReg and dstReg were incremented in the loop, so
1739 // later instructions start with offset 0.
1743 // No loop was generated for one iteration, so
1744 // add 32 bytes to the remainder to move those bytes.
1749 p := s.Prog(ppc64.ALXV)
1750 p.From.Type = obj.TYPE_MEM
1752 p.To.Type = obj.TYPE_REG
1753 p.To.Reg = ppc64.REG_VS32
1755 p = s.Prog(ppc64.ALXV)
1756 p.From.Type = obj.TYPE_MEM
1759 p.To.Type = obj.TYPE_REG
1760 p.To.Reg = ppc64.REG_VS33
1762 p = s.Prog(ppc64.ASTXV)
1763 p.From.Type = obj.TYPE_REG
1764 p.From.Reg = ppc64.REG_VS32
1765 p.To.Type = obj.TYPE_MEM
1768 p = s.Prog(ppc64.ASTXV)
1769 p.From.Type = obj.TYPE_REG
1770 p.From.Reg = ppc64.REG_VS33
1771 p.To.Type = obj.TYPE_MEM
1780 // Generate 16 byte loads and stores.
1781 p := s.Prog(ppc64.ALXV)
1782 p.From.Type = obj.TYPE_MEM
1784 p.From.Offset = offset
1785 p.To.Type = obj.TYPE_REG
1786 p.To.Reg = ppc64.REG_VS32
1788 p = s.Prog(ppc64.ASTXV)
1789 p.From.Type = obj.TYPE_REG
1790 p.From.Reg = ppc64.REG_VS32
1791 p.To.Type = obj.TYPE_MEM
1793 p.To.Offset = offset
1799 p := s.Prog(ppc64.ALXV)
1800 p.From.Type = obj.TYPE_MEM
1802 p.From.Offset = offset
1803 p.To.Type = obj.TYPE_REG
1804 p.To.Reg = ppc64.REG_VS32
1806 p = s.Prog(ppc64.ASTXV)
1807 p.From.Type = obj.TYPE_REG
1808 p.From.Reg = ppc64.REG_VS32
1809 p.To.Type = obj.TYPE_MEM
1811 p.To.Offset = offset
1817 // Generate all the remaining load and store pairs, starting with
1818 // as many 8 byte moves as possible, then 4, 2, 1.
1820 op, size := ppc64.AMOVB, int64(1)
1823 op, size = ppc64.AMOVD, 8
1825 op, size = ppc64.AMOVWZ, 4
1827 op, size = ppc64.AMOVH, 2
1831 p.To.Type = obj.TYPE_REG
1832 p.To.Reg = ppc64.REGTMP
1833 p.From.Type = obj.TYPE_MEM
1835 p.From.Offset = offset
1839 p.From.Type = obj.TYPE_REG
1840 p.From.Reg = ppc64.REGTMP
1841 p.To.Type = obj.TYPE_MEM
1843 p.To.Offset = offset
1848 case ssa.OpPPC64CALLstatic:
1851 case ssa.OpPPC64CALLtail:
1854 case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter:
1855 p := s.Prog(ppc64.AMOVD)
1856 p.From.Type = obj.TYPE_REG
1857 p.From.Reg = v.Args[0].Reg()
1858 p.To.Type = obj.TYPE_REG
1859 p.To.Reg = ppc64.REG_LR
1861 if v.Args[0].Reg() != ppc64.REG_R12 {
1862 v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg)
1867 // Convert the call into a blrl with hint this is not a subroutine return.
1868 // The full bclrl opcode must be specified when passing a hint.
1870 pp.From.Type = obj.TYPE_CONST
1871 pp.From.Offset = ppc64.BO_ALWAYS
1872 pp.Reg = ppc64.REG_CR0LT // The preferred value if BI is ignored.
1873 pp.To.Reg = ppc64.REG_LR
1874 pp.AddRestSourceConst(1)
1876 if ppc64.NeedTOCpointer(base.Ctxt) {
1877 // When compiling Go into PIC, the function we just
1878 // called via pointer might have been implemented in
1879 // a separate module and so overwritten the TOC
1880 // pointer in R2; reload it.
1881 q := s.Prog(ppc64.AMOVD)
1882 q.From.Type = obj.TYPE_MEM
1884 q.From.Reg = ppc64.REGSP
1885 q.To.Type = obj.TYPE_REG
1886 q.To.Reg = ppc64.REG_R2
1889 case ssa.OpPPC64LoweredWB:
1890 p := s.Prog(obj.ACALL)
1891 p.To.Type = obj.TYPE_MEM
1892 p.To.Name = obj.NAME_EXTERN
1893 // AuxInt encodes how many buffer entries we need.
1894 p.To.Sym = ir.Syms.GCWriteBarrier[v.AuxInt-1]
1896 case ssa.OpPPC64LoweredPanicBoundsA, ssa.OpPPC64LoweredPanicBoundsB, ssa.OpPPC64LoweredPanicBoundsC:
1897 p := s.Prog(obj.ACALL)
1898 p.To.Type = obj.TYPE_MEM
1899 p.To.Name = obj.NAME_EXTERN
1900 p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt]
1901 s.UseArgs(16) // space used in callee args area by assembly stubs
1903 case ssa.OpPPC64LoweredNilCheck:
1904 if buildcfg.GOOS == "aix" {
1908 // NOP (so the BNE has somewhere to land)
1911 p := s.Prog(ppc64.ACMP)
1912 p.From.Type = obj.TYPE_REG
1913 p.From.Reg = v.Args[0].Reg()
1914 p.To.Type = obj.TYPE_REG
1915 p.To.Reg = ppc64.REG_R0
1918 p2 := s.Prog(ppc64.ABNE)
1919 p2.To.Type = obj.TYPE_BRANCH
1922 // Write at 0 is forbidden and will trigger a SIGSEGV
1923 p = s.Prog(ppc64.AMOVW)
1924 p.From.Type = obj.TYPE_REG
1925 p.From.Reg = ppc64.REG_R0
1926 p.To.Type = obj.TYPE_MEM
1927 p.To.Reg = ppc64.REG_R0
1929 // NOP (so the BNE has somewhere to land)
1930 nop := s.Prog(obj.ANOP)
1931 p2.To.SetTarget(nop)
1934 // Issue a load which will fault if arg is nil.
1935 p := s.Prog(ppc64.AMOVBZ)
1936 p.From.Type = obj.TYPE_MEM
1937 p.From.Reg = v.Args[0].Reg()
1938 ssagen.AddAux(&p.From, v)
1939 p.To.Type = obj.TYPE_REG
1940 p.To.Reg = ppc64.REGTMP
1942 if logopt.Enabled() {
1943 logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
1945 if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
1946 base.WarnfAt(v.Pos, "generated nil check")
1949 // These should be resolved by rules and not make it here.
1950 case ssa.OpPPC64Equal, ssa.OpPPC64NotEqual, ssa.OpPPC64LessThan, ssa.OpPPC64FLessThan,
1951 ssa.OpPPC64LessEqual, ssa.OpPPC64GreaterThan, ssa.OpPPC64FGreaterThan, ssa.OpPPC64GreaterEqual,
1952 ssa.OpPPC64FLessEqual, ssa.OpPPC64FGreaterEqual:
1953 v.Fatalf("Pseudo-op should not make it to codegen: %s ###\n", v.LongString())
1954 case ssa.OpPPC64InvertFlags:
1955 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
1956 case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT:
1957 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
1958 case ssa.OpClobber, ssa.OpClobberReg:
1959 // TODO: implement for clobberdead experiment. Nop is ok for now.
1961 v.Fatalf("genValue not implemented: %s", v.LongString())
1965 var blockJump = [...]struct {
1967 asmeq, invasmun bool
1969 ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false},
1970 ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false},
1972 ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false},
1973 ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false},
1974 ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false},
1975 ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false},
1977 // TODO: need to work FP comparisons into block jumps
1978 ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false},
1979 ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN
1980 ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN
1981 ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false},
1984 func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
1986 case ssa.BlockDefer:
1987 // defer returns in R3:
1988 // 0 if we should continue executing
1989 // 1 if we should jump to deferreturn call
1990 p := s.Prog(ppc64.ACMP)
1991 p.From.Type = obj.TYPE_REG
1992 p.From.Reg = ppc64.REG_R3
1993 p.To.Type = obj.TYPE_REG
1994 p.To.Reg = ppc64.REG_R0
1996 p = s.Prog(ppc64.ABNE)
1997 p.To.Type = obj.TYPE_BRANCH
1998 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[1].Block()})
1999 if b.Succs[0].Block() != next {
2000 p := s.Prog(obj.AJMP)
2001 p.To.Type = obj.TYPE_BRANCH
2002 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
2005 case ssa.BlockPlain:
2006 if b.Succs[0].Block() != next {
2007 p := s.Prog(obj.AJMP)
2008 p.To.Type = obj.TYPE_BRANCH
2009 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
2011 case ssa.BlockExit, ssa.BlockRetJmp:
2015 case ssa.BlockPPC64EQ, ssa.BlockPPC64NE,
2016 ssa.BlockPPC64LT, ssa.BlockPPC64GE,
2017 ssa.BlockPPC64LE, ssa.BlockPPC64GT,
2018 ssa.BlockPPC64FLT, ssa.BlockPPC64FGE,
2019 ssa.BlockPPC64FLE, ssa.BlockPPC64FGT:
2020 jmp := blockJump[b.Kind]
2022 case b.Succs[0].Block():
2023 s.Br(jmp.invasm, b.Succs[1].Block())
2025 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
2026 s.Br(ppc64.ABVS, b.Succs[1].Block())
2028 case b.Succs[1].Block():
2029 s.Br(jmp.asm, b.Succs[0].Block())
2031 s.Br(ppc64.ABEQ, b.Succs[0].Block())
2034 if b.Likely != ssa.BranchUnlikely {
2035 s.Br(jmp.asm, b.Succs[0].Block())
2037 s.Br(ppc64.ABEQ, b.Succs[0].Block())
2039 s.Br(obj.AJMP, b.Succs[1].Block())
2041 s.Br(jmp.invasm, b.Succs[1].Block())
2043 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
2044 s.Br(ppc64.ABVS, b.Succs[1].Block())
2046 s.Br(obj.AJMP, b.Succs[0].Block())
2050 b.Fatalf("branch not implemented: %s", b.LongString())
2054 func loadRegResult(s *ssagen.State, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
2055 p := s.Prog(loadByType(t))
2056 p.From.Type = obj.TYPE_MEM
2057 p.From.Name = obj.NAME_AUTO
2058 p.From.Sym = n.Linksym()
2059 p.From.Offset = n.FrameOffset() + off
2060 p.To.Type = obj.TYPE_REG
2065 func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
2066 p = pp.Append(p, storeByType(t), obj.TYPE_REG, reg, 0, obj.TYPE_MEM, 0, n.FrameOffset()+off)
2067 p.To.Name = obj.NAME_PARAM
2068 p.To.Sym = n.Linksym()
2069 p.Pos = p.Pos.WithNotStmt()