package ppc64
import (
- "cmd/compile/internal/gc"
+ "cmd/compile/internal/base"
+ "cmd/compile/internal/ir"
"cmd/compile/internal/logopt"
+ "cmd/compile/internal/objw"
"cmd/compile/internal/ssa"
+ "cmd/compile/internal/ssagen"
"cmd/compile/internal/types"
"cmd/internal/obj"
"cmd/internal/obj/ppc64"
- "cmd/internal/objabi"
+ "internal/buildcfg"
"math"
"strings"
)
-// markMoves marks any MOVXconst ops that need to avoid clobbering flags.
-func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) {
+// ssaMarkMoves marks any MOVXconst ops that need to avoid clobbering flags.
+func ssaMarkMoves(s *ssagen.State, b *ssa.Block) {
// flive := b.FlagsLiveAtEnd
// if b.Control != nil && b.Control.Type.IsFlags() {
// flive = true
panic("bad store type")
}
-func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
+func ssaGenValue(s *ssagen.State, v *ssa.Value) {
switch v.Op {
case ssa.OpCopy:
t := v.Type
p.To.Reg = y
}
- case ssa.OpPPC64LoweredMuluhilo:
- // MULHDU Rarg1, Rarg0, Reg0
- // MULLD Rarg1, Rarg0, Reg1
- r0 := v.Args[0].Reg()
- r1 := v.Args[1].Reg()
- p := s.Prog(ppc64.AMULHDU)
- p.From.Type = obj.TYPE_REG
- p.From.Reg = r1
- p.Reg = r0
- p.To.Type = obj.TYPE_REG
- p.To.Reg = v.Reg0()
- p1 := s.Prog(ppc64.AMULLD)
- p1.From.Type = obj.TYPE_REG
- p1.From.Reg = r1
- p1.Reg = r0
- p1.To.Type = obj.TYPE_REG
- p1.To.Reg = v.Reg1()
-
- case ssa.OpPPC64LoweredAdd64Carry:
- // ADDC Rarg2, -1, Rtmp
- // ADDE Rarg1, Rarg0, Reg0
- // ADDZE Rzero, Reg1
- r0 := v.Args[0].Reg()
- r1 := v.Args[1].Reg()
- r2 := v.Args[2].Reg()
- p := s.Prog(ppc64.AADDC)
- p.From.Type = obj.TYPE_CONST
- p.From.Offset = -1
- p.Reg = r2
- p.To.Type = obj.TYPE_REG
- p.To.Reg = ppc64.REGTMP
- p1 := s.Prog(ppc64.AADDE)
- p1.From.Type = obj.TYPE_REG
- p1.From.Reg = r1
- p1.Reg = r0
- p1.To.Type = obj.TYPE_REG
- p1.To.Reg = v.Reg0()
- p2 := s.Prog(ppc64.AADDZE)
- p2.From.Type = obj.TYPE_REG
- p2.From.Reg = ppc64.REGZERO
- p2.To.Type = obj.TYPE_REG
- p2.To.Reg = v.Reg1()
-
case ssa.OpPPC64LoweredAtomicAnd8,
- ssa.OpPPC64LoweredAtomicOr8:
+ ssa.OpPPC64LoweredAtomicAnd32,
+ ssa.OpPPC64LoweredAtomicOr8,
+ ssa.OpPPC64LoweredAtomicOr32:
// LWSYNC
- // LBAR (Rarg0), Rtmp
+ // LBAR/LWAR (Rarg0), Rtmp
// AND/OR Rarg1, Rtmp
- // STBCCC Rtmp, (Rarg0)
+ // STBCCC/STWCCC Rtmp, (Rarg0)
// BNE -3(PC)
+ ld := ppc64.ALBAR
+ st := ppc64.ASTBCCC
+ if v.Op == ssa.OpPPC64LoweredAtomicAnd32 || v.Op == ssa.OpPPC64LoweredAtomicOr32 {
+ ld = ppc64.ALWAR
+ st = ppc64.ASTWCCC
+ }
r0 := v.Args[0].Reg()
r1 := v.Args[1].Reg()
// LWSYNC - Assuming shared data not write-through-required nor
// caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
plwsync := s.Prog(ppc64.ALWSYNC)
plwsync.To.Type = obj.TYPE_NONE
- p := s.Prog(ppc64.ALBAR)
+ // LBAR or LWAR
+ p := s.Prog(ld)
p.From.Type = obj.TYPE_MEM
p.From.Reg = r0
p.To.Type = obj.TYPE_REG
p.To.Reg = ppc64.REGTMP
+ // AND/OR reg1,out
p1 := s.Prog(v.Op.Asm())
p1.From.Type = obj.TYPE_REG
p1.From.Reg = r1
p1.To.Type = obj.TYPE_REG
p1.To.Reg = ppc64.REGTMP
- p2 := s.Prog(ppc64.ASTBCCC)
+ // STBCCC or STWCCC
+ p2 := s.Prog(st)
p2.From.Type = obj.TYPE_REG
p2.From.Reg = ppc64.REGTMP
p2.To.Type = obj.TYPE_MEM
p2.To.Reg = r0
p2.RegTo2 = ppc64.REGTMP
+ // BNE retry
p3 := s.Prog(ppc64.ABNE)
p3.To.Type = obj.TYPE_BRANCH
- gc.Patch(p3, p)
+ p3.To.SetTarget(p)
case ssa.OpPPC64LoweredAtomicAdd32,
ssa.OpPPC64LoweredAtomicAdd64:
// BNE retry
p4 := s.Prog(ppc64.ABNE)
p4.To.Type = obj.TYPE_BRANCH
- gc.Patch(p4, p)
+ p4.To.SetTarget(p)
// Ensure a 32 bit result
if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
// BNE retry
p2 := s.Prog(ppc64.ABNE)
p2.To.Type = obj.TYPE_BRANCH
- gc.Patch(p2, p)
+ p2.To.SetTarget(p)
// ISYNC
pisync := s.Prog(ppc64.AISYNC)
pisync.To.Type = obj.TYPE_NONE
// ISYNC
pisync := s.Prog(ppc64.AISYNC)
pisync.To.Type = obj.TYPE_NONE
- gc.Patch(p2, pisync)
+ p2.To.SetTarget(pisync)
case ssa.OpPPC64LoweredAtomicStore8,
ssa.OpPPC64LoweredAtomicStore32,
case ssa.OpPPC64LoweredAtomicCas64,
ssa.OpPPC64LoweredAtomicCas32:
+ // MOVD $0, Rout
// LWSYNC
// loop:
// LDAR (Rarg0), MutexHint, Rtmp
// CMP Rarg1, Rtmp
- // BNE fail
+ // BNE end
// STDCCC Rarg2, (Rarg0)
// BNE loop
- // LWSYNC // Only for sequential consistency; not required in CasRel.
// MOVD $1, Rout
- // BR end
- // fail:
- // MOVD $0, Rout
// end:
+ // LWSYNC // Only for sequential consistency; not required in CasRel.
ld := ppc64.ALDAR
st := ppc64.ASTDCCC
cmp := ppc64.ACMP
r1 := v.Args[1].Reg()
r2 := v.Args[2].Reg()
out := v.Reg0()
+ // Initialize return value to false
+ p := s.Prog(ppc64.AMOVD)
+ p.From.Type = obj.TYPE_CONST
+ p.From.Offset = 0
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = out
// LWSYNC - Assuming shared data not write-through-required nor
// caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
plwsync1 := s.Prog(ppc64.ALWSYNC)
plwsync1.To.Type = obj.TYPE_NONE
// LDAR or LWAR
- p := s.Prog(ld)
- p.From.Type = obj.TYPE_MEM
- p.From.Reg = r0
- p.To.Type = obj.TYPE_REG
- p.To.Reg = ppc64.REGTMP
+ p0 := s.Prog(ld)
+ p0.From.Type = obj.TYPE_MEM
+ p0.From.Reg = r0
+ p0.To.Type = obj.TYPE_REG
+ p0.To.Reg = ppc64.REGTMP
// If it is a Compare-and-Swap-Release operation, set the EH field with
// the release hint.
if v.AuxInt == 0 {
- p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: 0})
+ p0.AddRestSourceConst(0)
}
// CMP reg1,reg2
p1 := s.Prog(cmp)
p1.From.Reg = r1
p1.To.Reg = ppc64.REGTMP
p1.To.Type = obj.TYPE_REG
- // BNE cas_fail
+ // BNE done with return value = false
p2 := s.Prog(ppc64.ABNE)
p2.To.Type = obj.TYPE_BRANCH
// STDCCC or STWCCC
// BNE retry
p4 := s.Prog(ppc64.ABNE)
p4.To.Type = obj.TYPE_BRANCH
- gc.Patch(p4, p)
+ p4.To.SetTarget(p0)
+ // return value true
+ p5 := s.Prog(ppc64.AMOVD)
+ p5.From.Type = obj.TYPE_CONST
+ p5.From.Offset = 1
+ p5.To.Type = obj.TYPE_REG
+ p5.To.Reg = out
// LWSYNC - Assuming shared data not write-through-required nor
// caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b.
// If the operation is a CAS-Release, then synchronization is not necessary.
if v.AuxInt != 0 {
plwsync2 := s.Prog(ppc64.ALWSYNC)
plwsync2.To.Type = obj.TYPE_NONE
+ p2.To.SetTarget(plwsync2)
+ } else {
+ // done (label)
+ p6 := s.Prog(obj.ANOP)
+ p2.To.SetTarget(p6)
}
- // return true
- p5 := s.Prog(ppc64.AMOVD)
- p5.From.Type = obj.TYPE_CONST
- p5.From.Offset = 1
- p5.To.Type = obj.TYPE_REG
- p5.To.Reg = out
- // BR done
- p6 := s.Prog(obj.AJMP)
- p6.To.Type = obj.TYPE_BRANCH
- // return false
- p7 := s.Prog(ppc64.AMOVD)
- p7.From.Type = obj.TYPE_CONST
- p7.From.Offset = 0
- p7.To.Type = obj.TYPE_REG
- p7.To.Reg = out
- gc.Patch(p2, p7)
- // done (label)
- p8 := s.Prog(obj.ANOP)
- gc.Patch(p6, p8)
+
+ case ssa.OpPPC64LoweredPubBarrier:
+ // LWSYNC
+ s.Prog(v.Op.Asm())
case ssa.OpPPC64LoweredGetClosurePtr:
// Closure pointer is R11 (already)
- gc.CheckLoweredGetClosurePtr(v)
+ ssagen.CheckLoweredGetClosurePtr(v)
case ssa.OpPPC64LoweredGetCallerSP:
// caller's SP is FixedFrameSize below the address of the first arg
p := s.Prog(ppc64.AMOVD)
p.From.Type = obj.TYPE_ADDR
- p.From.Offset = -gc.Ctxt.FixedFrameSize()
+ p.From.Offset = -base.Ctxt.Arch.FixedFrameSize
p.From.Name = obj.NAME_PARAM
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
case ssa.OpLoadReg:
loadOp := loadByType(v.Type)
p := s.Prog(loadOp)
- gc.AddrAuto(&p.From, v.Args[0])
+ ssagen.AddrAuto(&p.From, v.Args[0])
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
p := s.Prog(storeOp)
p.From.Type = obj.TYPE_REG
p.From.Reg = v.Args[0].Reg()
- gc.AddrAuto(&p.To, v)
+ ssagen.AddrAuto(&p.To, v)
+
+ case ssa.OpArgIntReg, ssa.OpArgFloatReg:
+ // The assembler needs to wrap the entry safepoint/stack growth code with spill/unspill
+ // The loop only runs once.
+ for _, a := range v.Block.Func.RegArgs {
+ // Pass the spill/unspill information along to the assembler, offset by size of
+ // the saved LR slot.
+ addr := ssagen.SpillSlotAddr(a, ppc64.REGSP, base.Ctxt.Arch.FixedFrameSize)
+ s.FuncInfo().AddSpill(
+ obj.RegSpill{Reg: a.Reg, Addr: addr, Unspill: loadByType(a.Type), Spill: storeByType(a.Type)})
+ }
+ v.Block.Func.RegArgs = nil
+
+ ssagen.CheckArgReg(v)
case ssa.OpPPC64DIVD:
// For now,
p.To.Reg = r
p.From.Type = obj.TYPE_REG
p.From.Reg = r0
- gc.Patch(pbahead, p)
+ pbahead.To.SetTarget(p)
p = s.Prog(obj.ANOP)
- gc.Patch(pbover, p)
+ pbover.To.SetTarget(p)
case ssa.OpPPC64DIVW:
// word-width version of above
p.To.Reg = r
p.From.Type = obj.TYPE_REG
p.From.Reg = r0
- gc.Patch(pbahead, p)
+ pbahead.To.SetTarget(p)
p = s.Prog(obj.ANOP)
- gc.Patch(pbover, p)
+ pbover.To.SetTarget(p)
+
+ case ssa.OpPPC64CLRLSLWI:
+ r := v.Reg()
+ r1 := v.Args[0].Reg()
+ shifts := v.AuxInt
+ p := s.Prog(v.Op.Asm())
+ // clrlslwi ra,rs,mb,sh will become rlwinm ra,rs,sh,mb-sh,31-sh as described in ISA
+ p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
+ p.AddRestSourceConst(ssa.GetPPC64Shiftsh(shifts))
+ p.Reg = r1
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = r
+
+ case ssa.OpPPC64CLRLSLDI:
+ r := v.Reg()
+ r1 := v.Args[0].Reg()
+ shifts := v.AuxInt
+ p := s.Prog(v.Op.Asm())
+ // clrlsldi ra,rs,mb,sh will become rldic ra,rs,sh,mb-sh
+ p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
+ p.AddRestSourceConst(ssa.GetPPC64Shiftsh(shifts))
+ p.Reg = r1
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = r
case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS,
ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU,
ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
- ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV:
+ ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV,
+ ssa.OpPPC64MODUD, ssa.OpPPC64MODSD, ssa.OpPPC64MODUW, ssa.OpPPC64MODSW:
r := v.Reg()
r1 := v.Args[0].Reg()
r2 := v.Args[1].Reg()
p.To.Type = obj.TYPE_REG
p.To.Reg = r
- case ssa.OpPPC64ANDCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC:
+ case ssa.OpPPC64ADDCC, ssa.OpPPC64ANDCC, ssa.OpPPC64SUBCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC, ssa.OpPPC64NORCC,
+ ssa.OpPPC64ANDNCC:
r1 := v.Args[0].Reg()
r2 := v.Args[1].Reg()
p := s.Prog(v.Op.Asm())
p.From.Reg = r2
p.Reg = r1
p.To.Type = obj.TYPE_REG
- p.To.Reg = ppc64.REGTMP // result is not needed
+ p.To.Reg = v.Reg0()
+
+ case ssa.OpPPC64NEGCC, ssa.OpPPC64CNTLZDCC:
+ p := s.Prog(v.Op.Asm())
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = v.Reg0()
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = v.Args[0].Reg()
case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
p := s.Prog(v.Op.Asm())
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
- case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
+ // Auxint holds encoded rotate + mask
+ case ssa.OpPPC64RLWINM, ssa.OpPPC64RLWMI:
+ sh, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
+ p := s.Prog(v.Op.Asm())
+ p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
+ p.Reg = v.Args[0].Reg()
+ p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: int64(sh)}
+ p.AddRestSourceArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}})
+ // Auxint holds mask
+
+ case ssa.OpPPC64RLDICL, ssa.OpPPC64RLDICR:
+ sh, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
+ p := s.Prog(v.Op.Asm())
+ p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: sh}
+ switch v.Op {
+ case ssa.OpPPC64RLDICL:
+ p.AddRestSourceConst(mb)
+ case ssa.OpPPC64RLDICR:
+ p.AddRestSourceConst(me)
+ }
+ p.Reg = v.Args[0].Reg()
+ p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
+
+ case ssa.OpPPC64RLWNM:
+ _, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
+ p := s.Prog(v.Op.Asm())
+ p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
+ p.Reg = v.Args[0].Reg()
+ p.From = obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[1].Reg()}
+ p.AddRestSourceArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}})
+
+ case ssa.OpPPC64MADDLD:
r := v.Reg()
r1 := v.Args[0].Reg()
r2 := v.Args[1].Reg()
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = r1
- p.Reg = r3
- p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: r2})
+ p.Reg = r2
+ p.AddRestSourceReg(r3)
p.To.Type = obj.TYPE_REG
p.To.Reg = r
- case ssa.OpPPC64MaskIfNotCarry:
+ case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
r := v.Reg()
+ r1 := v.Args[0].Reg()
+ r2 := v.Args[1].Reg()
+ r3 := v.Args[2].Reg()
+ // r = r1*r2 ± r3
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
- p.From.Reg = ppc64.REGZERO
+ p.From.Reg = r1
+ p.Reg = r3
+ p.AddRestSourceReg(r2)
p.To.Type = obj.TYPE_REG
p.To.Reg = r
- case ssa.OpPPC64ADDconstForCarry:
- r1 := v.Args[0].Reg()
- p := s.Prog(v.Op.Asm())
- p.Reg = r1
- p.From.Type = obj.TYPE_CONST
- p.From.Offset = v.AuxInt
- p.To.Type = obj.TYPE_REG
- p.To.Reg = ppc64.REGTMP // Ignored; this is for the carry effect.
-
case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL,
ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW,
ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS,
- ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD:
+ ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD, ssa.OpPPC64BRH, ssa.OpPPC64BRW, ssa.OpPPC64BRD:
r := v.Reg()
p := s.Prog(v.Op.Asm())
p.To.Type = obj.TYPE_REG
p.From.Type = obj.TYPE_REG
p.From.Reg = v.Args[0].Reg()
- case ssa.OpPPC64ADDconst, ssa.OpPPC64ANDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst,
- ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst, ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst:
+ case ssa.OpPPC64ADDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst,
+ ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst,
+ ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst, ssa.OpPPC64EXTSWSLconst, ssa.OpPPC64MULLWconst, ssa.OpPPC64MULLDconst:
p := s.Prog(v.Op.Asm())
p.Reg = v.Args[0].Reg()
p.From.Type = obj.TYPE_CONST
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
- case ssa.OpPPC64ANDCCconst:
+ case ssa.OpPPC64ADDC, ssa.OpPPC64ADDE, ssa.OpPPC64SUBC, ssa.OpPPC64SUBE:
+ r := v.Reg0() // CA is the first, implied argument.
+ r1 := v.Args[0].Reg()
+ r2 := v.Args[1].Reg()
+ p := s.Prog(v.Op.Asm())
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = r2
+ p.Reg = r1
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = r
+
+ case ssa.OpPPC64ADDZEzero, ssa.OpPPC64SUBZEzero:
+ p := s.Prog(v.Op.Asm())
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = ppc64.REG_R0
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = v.Reg()
+
+ case ssa.OpPPC64ADDCconst:
p := s.Prog(v.Op.Asm())
p.Reg = v.Args[0].Reg()
+ p.From.Type = obj.TYPE_CONST
+ p.From.Offset = v.AuxInt
+ p.To.Type = obj.TYPE_REG
+ // Output is a pair, the second is the CA, which is implied.
+ p.To.Reg = v.Reg0()
- if v.Aux != nil {
- p.From.Type = obj.TYPE_CONST
- p.From.Offset = gc.AuxOffset(v)
- } else {
- p.From.Type = obj.TYPE_CONST
- p.From.Offset = v.AuxInt
- }
+ case ssa.OpPPC64SUBCconst:
+ p := s.Prog(v.Op.Asm())
+ p.AddRestSourceConst(v.AuxInt)
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = v.Args[0].Reg()
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = v.Reg0()
+
+ case ssa.OpPPC64SUBFCconst:
+ p := s.Prog(v.Op.Asm())
+ p.AddRestSourceConst(v.AuxInt)
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = v.Args[0].Reg()
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = v.Reg()
+ case ssa.OpPPC64ADDCCconst, ssa.OpPPC64ANDCCconst:
+ p := s.Prog(v.Op.Asm())
+ p.Reg = v.Args[0].Reg()
+ p.From.Type = obj.TYPE_CONST
+ p.From.Offset = v.AuxInt
p.To.Type = obj.TYPE_REG
- p.To.Reg = ppc64.REGTMP // discard result
+ p.To.Reg = v.Reg0()
case ssa.OpPPC64MOVDaddr:
switch v.Aux.(type) {
p.To.Reg = v.Reg()
}
- case *obj.LSym, *gc.Node:
+ case *obj.LSym, ir.Node:
p := s.Prog(ppc64.AMOVD)
p.From.Type = obj.TYPE_ADDR
p.From.Reg = v.Args[0].Reg()
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
- gc.AddAux(&p.From, v)
+ ssagen.AddAux(&p.From, v)
}
p.To.Reg = v.Reg()
p.To.Type = obj.TYPE_REG
- case ssa.OpPPC64MOVDload:
+ case ssa.OpPPC64MOVDload, ssa.OpPPC64MOVWload:
- // MOVDload uses a DS instruction which requires the offset value of the data to be a multiple of 4.
- // For offsets known at compile time, a MOVDload won't be selected, but in the case of a go.string,
- // the offset is not known until link time. If the load of a go.string uses relocation for the
- // offset field of the instruction, and if the offset is not aligned to 4, then a link error will occur.
- // To avoid this problem, the full address of the go.string is computed and loaded into the base register,
- // and that base register is used for the MOVDload using a 0 offset. This problem can only occur with
- // go.string types because other types will have proper alignment.
+ // MOVDload and MOVWload are DS form instructions that are restricted to
+ // offsets that are a multiple of 4. If the offset is not a multiple of 4,
+ // then the address of the symbol to be loaded is computed (base + offset)
+ // and used as the new base register and the offset field in the instruction
+ // can be set to zero.
- gostring := false
- switch n := v.Aux.(type) {
- case *obj.LSym:
- gostring = strings.HasPrefix(n.Name, "go.string.")
+ // This same problem can happen with gostrings since the final offset is not
+ // known yet, but could be unaligned after the relocation is resolved.
+ // So gostrings are handled the same way.
+
+ // This allows the MOVDload and MOVWload to be generated in more cases and
+ // eliminates some offset and alignment checking in the rules file.
+
+ fromAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
+ ssagen.AddAux(&fromAddr, v)
+
+ genAddr := false
+
+ switch fromAddr.Name {
+ case obj.NAME_EXTERN, obj.NAME_STATIC:
+ // Special case for a rule combines the bytes of gostring.
+ // The v alignment might seem OK, but we don't want to load it
+ // using an offset because relocation comes later.
+ genAddr = strings.HasPrefix(fromAddr.Sym.Name, "go:string") || v.Type.Alignment()%4 != 0 || fromAddr.Offset%4 != 0
+ default:
+ genAddr = fromAddr.Offset%4 != 0
}
- if gostring {
- // Generate full addr of the go.string const
- // including AuxInt
+ if genAddr {
+ // Load full address into the temp register.
p := s.Prog(ppc64.AMOVD)
p.From.Type = obj.TYPE_ADDR
p.From.Reg = v.Args[0].Reg()
- gc.AddAux(&p.From, v)
- p.To.Type = obj.TYPE_REG
- p.To.Reg = v.Reg()
- // Load go.string using 0 offset
- p = s.Prog(v.Op.Asm())
- p.From.Type = obj.TYPE_MEM
- p.From.Reg = v.Reg()
+ ssagen.AddAux(&p.From, v)
+ // Load target using temp as base register
+ // and offset zero. Setting NAME_NONE
+ // prevents any extra offsets from being
+ // added.
p.To.Type = obj.TYPE_REG
- p.To.Reg = v.Reg()
- break
+ p.To.Reg = ppc64.REGTMP
+ fromAddr.Reg = ppc64.REGTMP
+ // Clear the offset field and other
+ // information that might be used
+ // by the assembler to add to the
+ // final offset value.
+ fromAddr.Offset = 0
+ fromAddr.Name = obj.NAME_NONE
+ fromAddr.Sym = nil
}
- // Not a go.string, generate a normal load
- fallthrough
+ p := s.Prog(v.Op.Asm())
+ p.From = fromAddr
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = v.Reg()
- case ssa.OpPPC64MOVWload, ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
+ case ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_MEM
p.From.Reg = v.Args[0].Reg()
- gc.AddAux(&p.From, v)
+ ssagen.AddAux(&p.From, v)
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
p.From.Type = obj.TYPE_MEM
p.From.Reg = v.Args[0].Reg()
p.From.Index = v.Args[1].Reg()
- gc.AddAux(&p.From, v)
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
- case ssa.OpPPC64MOVDstorezero, ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero:
+ case ssa.OpPPC64DCBT:
+ p := s.Prog(v.Op.Asm())
+ p.From.Type = obj.TYPE_MEM
+ p.From.Reg = v.Args[0].Reg()
+ p.To.Type = obj.TYPE_CONST
+ p.To.Offset = v.AuxInt
+
+ case ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = ppc64.REGZERO
p.To.Type = obj.TYPE_MEM
p.To.Reg = v.Args[0].Reg()
- gc.AddAux(&p.To, v)
+ ssagen.AddAux(&p.To, v)
+
+ case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVDstorezero:
+
+ // MOVDstore and MOVDstorezero become DS form instructions that are restricted
+ // to offset values that are a multiple of 4. If the offset field is not a
+ // multiple of 4, then the full address of the store target is computed (base +
+ // offset) and used as the new base register and the offset in the instruction
+ // is set to 0.
+
+ // This allows the MOVDstore and MOVDstorezero to be generated in more cases,
+ // and prevents checking of the offset value and alignment in the rules.
+
+ toAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
+ ssagen.AddAux(&toAddr, v)
+
+ if toAddr.Offset%4 != 0 {
+ p := s.Prog(ppc64.AMOVD)
+ p.From.Type = obj.TYPE_ADDR
+ p.From.Reg = v.Args[0].Reg()
+ ssagen.AddAux(&p.From, v)
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = ppc64.REGTMP
+ toAddr.Reg = ppc64.REGTMP
+ // Clear the offset field and other
+ // information that might be used
+ // by the assembler to add to the
+ // final offset value.
+ toAddr.Offset = 0
+ toAddr.Name = obj.NAME_NONE
+ toAddr.Sym = nil
+ }
+ p := s.Prog(v.Op.Asm())
+ p.To = toAddr
+ p.From.Type = obj.TYPE_REG
+ if v.Op == ssa.OpPPC64MOVDstorezero {
+ p.From.Reg = ppc64.REGZERO
+ } else {
+ p.From.Reg = v.Args[1].Reg()
+ }
- case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore:
+ case ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = v.Args[1].Reg()
p.To.Type = obj.TYPE_MEM
p.To.Reg = v.Args[0].Reg()
- gc.AddAux(&p.To, v)
+ ssagen.AddAux(&p.To, v)
case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx,
ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx,
p.To.Index = v.Args[1].Reg()
p.To.Type = obj.TYPE_MEM
p.To.Reg = v.Args[0].Reg()
- gc.AddAux(&p.To, v)
-
- case ssa.OpPPC64ISEL, ssa.OpPPC64ISELB:
- // ISEL, ISELB
- // AuxInt value indicates condition: 0=LT 1=GT 2=EQ 4=GE 5=LE 6=NE
- // ISEL only accepts 0, 1, 2 condition values but the others can be
- // achieved by swapping operand order.
- // arg0 ? arg1 : arg2 with conditions LT, GT, EQ
- // arg0 ? arg2 : arg1 for conditions GE, LE, NE
- // ISELB is used when a boolean result is needed, returning 0 or 1
- p := s.Prog(ppc64.AISEL)
- p.To.Type = obj.TYPE_REG
- p.To.Reg = v.Reg()
- // For ISELB, boolean result 0 or 1. Use R0 for 0 operand to avoid load.
- r := obj.Addr{Type: obj.TYPE_REG, Reg: ppc64.REG_R0}
+
+ case ssa.OpPPC64ISEL, ssa.OpPPC64ISELZ:
+ // ISEL AuxInt ? arg0 : arg1
+ // ISELZ is a special case of ISEL where arg1 is implicitly $0.
+ //
+ // AuxInt value indicates conditions 0=LT 1=GT 2=EQ 3=SO 4=GE 5=LE 6=NE 7=NSO.
+ // ISEL accepts a CR bit argument, not a condition as expressed by AuxInt.
+ // Convert the condition to a CR bit argument by the following conversion:
+ //
+ // AuxInt&3 ? arg0 : arg1 for conditions LT, GT, EQ, SO
+ // AuxInt&3 ? arg1 : arg0 for conditions GE, LE, NE, NSO
+ p := s.Prog(v.Op.Asm())
+ p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
+ p.Reg = v.Args[0].Reg()
if v.Op == ssa.OpPPC64ISEL {
- r.Reg = v.Args[1].Reg()
+ p.AddRestSourceReg(v.Args[1].Reg())
+ } else {
+ p.AddRestSourceReg(ppc64.REG_R0)
}
// AuxInt values 4,5,6 implemented with reverse operand order from 0,1,2
if v.AuxInt > 3 {
- p.Reg = r.Reg
- p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[0].Reg()})
- } else {
- p.Reg = v.Args[0].Reg()
- p.SetFrom3(r)
+ p.Reg, p.GetFrom3().Reg = p.GetFrom3().Reg, p.Reg
}
- p.From.Type = obj.TYPE_CONST
- p.From.Offset = v.AuxInt & 3
+ p.From.SetConst(v.AuxInt & 3)
- case ssa.OpPPC64LoweredZero:
+ case ssa.OpPPC64SETBC, ssa.OpPPC64SETBCR:
+ p := s.Prog(v.Op.Asm())
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = v.Reg()
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = int16(ppc64.REG_CR0LT + v.AuxInt)
- // unaligned data doesn't hurt performance
- // for these instructions on power8 or later
+ case ssa.OpPPC64LoweredQuadZero, ssa.OpPPC64LoweredQuadZeroShort:
+ // The LoweredQuad code generation
+ // generates STXV instructions on
+ // power9. The Short variation is used
+ // if no loop is generated.
- // for sizes >= 64 generate a loop as follows:
+ // sizes >= 64 generate a loop as follows:
- // set up loop counter in CTR, used by BC
+ // Set up loop counter in CTR, used by BC
+ // XXLXOR clears VS32
// XXLXOR VS32,VS32,VS32
- // MOVD len/32,REG_TMP
- // MOVD REG_TMP,CTR
- // MOVD $16,REG_TMP
- // loop:
- // STXVD2X VS32,(R0)(R3)
- // STXVD2X VS32,(R31)(R3)
- // ADD $32,R3
- // BC 16, 0, loop
- //
- // any remainder is done as described below
-
- // for sizes < 64 bytes, first clear as many doublewords as possible,
- // then handle the remainder
- // MOVD R0,(R3)
- // MOVD R0,8(R3)
- // .... etc.
- //
- // the remainder bytes are cleared using one or more
- // of the following instructions with the appropriate
- // offsets depending which instructions are needed
- //
- // MOVW R0,n1(R3) 4 bytes
- // MOVH R0,n2(R3) 2 bytes
- // MOVB R0,n3(R3) 1 byte
- //
- // 7 bytes: MOVW, MOVH, MOVB
- // 6 bytes: MOVW, MOVH
- // 5 bytes: MOVW, MOVB
- // 3 bytes: MOVH, MOVB
-
- // each loop iteration does 32 bytes
- ctr := v.AuxInt / 32
-
- // remainder bytes
- rem := v.AuxInt % 32
-
- // only generate a loop if there is more
+ // MOVD len/64,REG_TMP
+ // MOVD REG_TMP,CTR
+ // loop:
+ // STXV VS32,0(R20)
+ // STXV VS32,16(R20)
+ // STXV VS32,32(R20)
+ // STXV VS32,48(R20)
+ // ADD $64,R20
+ // BC 16, 0, loop
+
+ // Bytes per iteration
+ ctr := v.AuxInt / 64
+
+ // Remainder bytes
+ rem := v.AuxInt % 64
+
+ // Only generate a loop if there is more
// than 1 iteration.
if ctr > 1 {
// Set up VS32 (V0) to hold 0s
p.To.Type = obj.TYPE_REG
p.To.Reg = ppc64.REG_CTR
- // Set up R31 to hold index value 16
- p = s.Prog(ppc64.AMOVD)
- p.From.Type = obj.TYPE_CONST
- p.From.Offset = 16
- p.To.Type = obj.TYPE_REG
- p.To.Reg = ppc64.REGTMP
+ // Don't generate padding for
+ // loops with few iterations.
+ if ctr > 3 {
+ p = s.Prog(obj.APCALIGN)
+ p.From.Type = obj.TYPE_CONST
+ p.From.Offset = 16
+ }
- // generate 2 STXVD2Xs to store 16 bytes
- // when this is a loop then the top must be saved
+ // generate 4 STXVs to zero 64 bytes
var top *obj.Prog
- // This is the top of loop
- p = s.Prog(ppc64.ASTXVD2X)
+
+ p = s.Prog(ppc64.ASTXV)
p.From.Type = obj.TYPE_REG
p.From.Reg = ppc64.REG_VS32
p.To.Type = obj.TYPE_MEM
p.To.Reg = v.Args[0].Reg()
- p.To.Index = ppc64.REGZERO
- // Save the top of loop
+
+ // Save the top of loop
if top == nil {
top = p
}
+ p = s.Prog(ppc64.ASTXV)
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = ppc64.REG_VS32
+ p.To.Type = obj.TYPE_MEM
+ p.To.Reg = v.Args[0].Reg()
+ p.To.Offset = 16
- p = s.Prog(ppc64.ASTXVD2X)
+ p = s.Prog(ppc64.ASTXV)
p.From.Type = obj.TYPE_REG
p.From.Reg = ppc64.REG_VS32
p.To.Type = obj.TYPE_MEM
p.To.Reg = v.Args[0].Reg()
- p.To.Index = ppc64.REGTMP
+ p.To.Offset = 32
+
+ p = s.Prog(ppc64.ASTXV)
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = ppc64.REG_VS32
+ p.To.Type = obj.TYPE_MEM
+ p.To.Reg = v.Args[0].Reg()
+ p.To.Offset = 48
// Increment address for the
- // 4 doublewords just zeroed.
+ // 64 bytes just zeroed.
p = s.Prog(ppc64.AADD)
p.Reg = v.Args[0].Reg()
p.From.Type = obj.TYPE_CONST
- p.From.Offset = 32
+ p.From.Offset = 64
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Args[0].Reg()
p = s.Prog(ppc64.ABC)
p.From.Type = obj.TYPE_CONST
p.From.Offset = ppc64.BO_BCTR
- p.Reg = ppc64.REG_R0
+ p.Reg = ppc64.REG_CR0LT
p.To.Type = obj.TYPE_BRANCH
- gc.Patch(p, top)
+ p.To.SetTarget(top)
}
-
- // when ctr == 1 the loop was not generated but
- // there are at least 32 bytes to clear, so add
+ // When ctr == 1 the loop was not generated but
+ // there are at least 64 bytes to clear, so add
// that to the remainder to generate the code
// to clear those doublewords
if ctr == 1 {
- rem += 32
+ rem += 64
+ }
+
+ // Clear the remainder starting at offset zero
+ offset := int64(0)
+
+ if rem >= 16 && ctr <= 1 {
+ // If the XXLXOR hasn't already been
+ // generated, do it here to initialize
+ // VS32 (V0) to 0.
+ p := s.Prog(ppc64.AXXLXOR)
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = ppc64.REG_VS32
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = ppc64.REG_VS32
+ p.Reg = ppc64.REG_VS32
+ }
+ // Generate STXV for 32 or 64
+ // bytes.
+ for rem >= 32 {
+ p := s.Prog(ppc64.ASTXV)
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = ppc64.REG_VS32
+ p.To.Type = obj.TYPE_MEM
+ p.To.Reg = v.Args[0].Reg()
+ p.To.Offset = offset
+
+ p = s.Prog(ppc64.ASTXV)
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = ppc64.REG_VS32
+ p.To.Type = obj.TYPE_MEM
+ p.To.Reg = v.Args[0].Reg()
+ p.To.Offset = offset + 16
+ offset += 32
+ rem -= 32
+ }
+ // Generate 16 bytes
+ if rem >= 16 {
+ p := s.Prog(ppc64.ASTXV)
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = ppc64.REG_VS32
+ p.To.Type = obj.TYPE_MEM
+ p.To.Reg = v.Args[0].Reg()
+ p.To.Offset = offset
+ offset += 16
+ rem -= 16
+ }
+
+ // first clear as many doublewords as possible
+ // then clear remaining sizes as available
+ for rem > 0 {
+ op, size := ppc64.AMOVB, int64(1)
+ switch {
+ case rem >= 8:
+ op, size = ppc64.AMOVD, 8
+ case rem >= 4:
+ op, size = ppc64.AMOVW, 4
+ case rem >= 2:
+ op, size = ppc64.AMOVH, 2
+ }
+ p := s.Prog(op)
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = ppc64.REG_R0
+ p.To.Type = obj.TYPE_MEM
+ p.To.Reg = v.Args[0].Reg()
+ p.To.Offset = offset
+ rem -= size
+ offset += size
+ }
+
+ case ssa.OpPPC64LoweredZero, ssa.OpPPC64LoweredZeroShort:
+
+ // Unaligned data doesn't hurt performance
+ // for these instructions on power8.
+
+ // For sizes >= 64 generate a loop as follows:
+
+ // Set up loop counter in CTR, used by BC
+ // XXLXOR VS32,VS32,VS32
+ // MOVD len/32,REG_TMP
+ // MOVD REG_TMP,CTR
+ // MOVD $16,REG_TMP
+ // loop:
+ // STXVD2X VS32,(R0)(R20)
+ // STXVD2X VS32,(R31)(R20)
+ // ADD $32,R20
+ // BC 16, 0, loop
+ //
+ // any remainder is done as described below
+
+ // for sizes < 64 bytes, first clear as many doublewords as possible,
+ // then handle the remainder
+ // MOVD R0,(R20)
+ // MOVD R0,8(R20)
+ // .... etc.
+ //
+ // the remainder bytes are cleared using one or more
+ // of the following instructions with the appropriate
+ // offsets depending which instructions are needed
+ //
+ // MOVW R0,n1(R20) 4 bytes
+ // MOVH R0,n2(R20) 2 bytes
+ // MOVB R0,n3(R20) 1 byte
+ //
+ // 7 bytes: MOVW, MOVH, MOVB
+ // 6 bytes: MOVW, MOVH
+ // 5 bytes: MOVW, MOVB
+ // 3 bytes: MOVH, MOVB
+
+ // each loop iteration does 32 bytes
+ ctr := v.AuxInt / 32
+
+ // remainder bytes
+ rem := v.AuxInt % 32
+
+ // only generate a loop if there is more
+ // than 1 iteration.
+ if ctr > 1 {
+ // Set up VS32 (V0) to hold 0s
+ p := s.Prog(ppc64.AXXLXOR)
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = ppc64.REG_VS32
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = ppc64.REG_VS32
+ p.Reg = ppc64.REG_VS32
+
+ // Set up CTR loop counter
+ p = s.Prog(ppc64.AMOVD)
+ p.From.Type = obj.TYPE_CONST
+ p.From.Offset = ctr
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = ppc64.REGTMP
+
+ p = s.Prog(ppc64.AMOVD)
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = ppc64.REGTMP
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = ppc64.REG_CTR
+
+ // Set up R31 to hold index value 16
+ p = s.Prog(ppc64.AMOVD)
+ p.From.Type = obj.TYPE_CONST
+ p.From.Offset = 16
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = ppc64.REGTMP
+
+ // Don't add padding for alignment
+ // with few loop iterations.
+ if ctr > 3 {
+ p = s.Prog(obj.APCALIGN)
+ p.From.Type = obj.TYPE_CONST
+ p.From.Offset = 16
+ }
+
+ // generate 2 STXVD2Xs to store 16 bytes
+ // when this is a loop then the top must be saved
+ var top *obj.Prog
+ // This is the top of loop
+
+ p = s.Prog(ppc64.ASTXVD2X)
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = ppc64.REG_VS32
+ p.To.Type = obj.TYPE_MEM
+ p.To.Reg = v.Args[0].Reg()
+ p.To.Index = ppc64.REGZERO
+ // Save the top of loop
+ if top == nil {
+ top = p
+ }
+ p = s.Prog(ppc64.ASTXVD2X)
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = ppc64.REG_VS32
+ p.To.Type = obj.TYPE_MEM
+ p.To.Reg = v.Args[0].Reg()
+ p.To.Index = ppc64.REGTMP
+
+ // Increment address for the
+ // 4 doublewords just zeroed.
+ p = s.Prog(ppc64.AADD)
+ p.Reg = v.Args[0].Reg()
+ p.From.Type = obj.TYPE_CONST
+ p.From.Offset = 32
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = v.Args[0].Reg()
+
+ // Branch back to top of loop
+ // based on CTR
+ // BC with BO_BCTR generates bdnz
+ p = s.Prog(ppc64.ABC)
+ p.From.Type = obj.TYPE_CONST
+ p.From.Offset = ppc64.BO_BCTR
+ p.Reg = ppc64.REG_CR0LT
+ p.To.Type = obj.TYPE_BRANCH
+ p.To.SetTarget(top)
+ }
+
+ // when ctr == 1 the loop was not generated but
+ // there are at least 32 bytes to clear, so add
+ // that to the remainder to generate the code
+ // to clear those doublewords
+ if ctr == 1 {
+ rem += 32
}
// clear the remainder starting at offset zero
offset += size
}
- case ssa.OpPPC64LoweredMove:
+ case ssa.OpPPC64LoweredMove, ssa.OpPPC64LoweredMoveShort:
+ bytesPerLoop := int64(32)
// This will be used when moving more
// than 8 bytes. Moves start with
// as many 8 byte moves as possible, then
// MOVD REG_TMP,CTR
// MOVD $16,REG_TMP
// top:
- // LXVD2X (R0)(R4),VS32
- // LXVD2X (R31)(R4),VS33
- // ADD $32,R4
- // STXVD2X VS32,(R0)(R3)
- // STXVD2X VS33,(R31)(R4)
- // ADD $32,R3
+ // LXVD2X (R0)(R21),VS32
+ // LXVD2X (R31)(R21),VS33
+ // ADD $32,R21
+ // STXVD2X VS32,(R0)(R20)
+ // STXVD2X VS33,(R31)(R20)
+ // ADD $32,R20
// BC 16,0,top
// Bytes not moved by this loop are moved
// with a combination of the following instructions,
// starting with the largest sizes and generating as
// many as needed, using the appropriate offset value.
- // MOVD n(R4),R14
- // MOVD R14,n(R3)
- // MOVW n1(R4),R14
- // MOVW R14,n1(R3)
- // MOVH n2(R4),R14
- // MOVH R14,n2(R3)
- // MOVB n3(R4),R14
- // MOVB R14,n3(R3)
+ // MOVD n(R21),R31
+ // MOVD R31,n(R20)
+ // MOVW n1(R21),R31
+ // MOVW R31,n1(R20)
+ // MOVH n2(R21),R31
+ // MOVH R31,n2(R20)
+ // MOVB n3(R21),R31
+ // MOVB R31,n3(R20)
// Each loop iteration moves 32 bytes
- ctr := v.AuxInt / 32
+ ctr := v.AuxInt / bytesPerLoop
// Remainder after the loop
- rem := v.AuxInt % 32
+ rem := v.AuxInt % bytesPerLoop
- dst_reg := v.Args[0].Reg()
- src_reg := v.Args[1].Reg()
+ dstReg := v.Args[0].Reg()
+ srcReg := v.Args[1].Reg()
// The set of registers used here, must match the clobbered reg list
// in PPC64Ops.go.
p.To.Type = obj.TYPE_REG
p.To.Reg = ppc64.REGTMP
+ // Don't adding padding for
+ // alignment with small iteration
+ // counts.
+ if ctr > 3 {
+ p = s.Prog(obj.APCALIGN)
+ p.From.Type = obj.TYPE_CONST
+ p.From.Offset = 16
+ }
+
// Generate 16 byte loads and stores.
// Use temp register for index (16)
// on the second one.
+
p = s.Prog(ppc64.ALXVD2X)
p.From.Type = obj.TYPE_MEM
- p.From.Reg = src_reg
+ p.From.Reg = srcReg
p.From.Index = ppc64.REGZERO
p.To.Type = obj.TYPE_REG
p.To.Reg = ppc64.REG_VS32
-
if top == nil {
top = p
}
-
p = s.Prog(ppc64.ALXVD2X)
p.From.Type = obj.TYPE_MEM
- p.From.Reg = src_reg
+ p.From.Reg = srcReg
p.From.Index = ppc64.REGTMP
p.To.Type = obj.TYPE_REG
p.To.Reg = ppc64.REG_VS33
// increment the src reg for next iteration
p = s.Prog(ppc64.AADD)
- p.Reg = src_reg
+ p.Reg = srcReg
p.From.Type = obj.TYPE_CONST
- p.From.Offset = 32
+ p.From.Offset = bytesPerLoop
p.To.Type = obj.TYPE_REG
- p.To.Reg = src_reg
+ p.To.Reg = srcReg
// generate 16 byte stores
p = s.Prog(ppc64.ASTXVD2X)
p.From.Type = obj.TYPE_REG
p.From.Reg = ppc64.REG_VS32
p.To.Type = obj.TYPE_MEM
- p.To.Reg = dst_reg
+ p.To.Reg = dstReg
p.To.Index = ppc64.REGZERO
p = s.Prog(ppc64.ASTXVD2X)
p.From.Type = obj.TYPE_REG
p.From.Reg = ppc64.REG_VS33
p.To.Type = obj.TYPE_MEM
- p.To.Reg = dst_reg
+ p.To.Reg = dstReg
p.To.Index = ppc64.REGTMP
// increment the dst reg for next iteration
p = s.Prog(ppc64.AADD)
- p.Reg = dst_reg
+ p.Reg = dstReg
p.From.Type = obj.TYPE_CONST
- p.From.Offset = 32
+ p.From.Offset = bytesPerLoop
p.To.Type = obj.TYPE_REG
- p.To.Reg = dst_reg
+ p.To.Reg = dstReg
// BC with BO_BCTR generates bdnz to branch on nonzero CTR
// to loop top.
p = s.Prog(ppc64.ABC)
p.From.Type = obj.TYPE_CONST
p.From.Offset = ppc64.BO_BCTR
- p.Reg = ppc64.REG_R0
+ p.Reg = ppc64.REG_CR0LT
p.To.Type = obj.TYPE_BRANCH
- gc.Patch(p, top)
+ p.To.SetTarget(top)
- // src_reg and dst_reg were incremented in the loop, so
+ // srcReg and dstReg were incremented in the loop, so
// later instructions start with offset 0.
offset = int64(0)
}
// No loop was generated for one iteration, so
// add 32 bytes to the remainder to move those bytes.
if ctr == 1 {
- rem += 32
+ rem += bytesPerLoop
}
if rem >= 16 {
// on the second one.
p := s.Prog(ppc64.ALXVD2X)
p.From.Type = obj.TYPE_MEM
- p.From.Reg = src_reg
+ p.From.Reg = srcReg
p.From.Index = ppc64.REGZERO
p.To.Type = obj.TYPE_REG
p.To.Reg = ppc64.REG_VS32
p.From.Type = obj.TYPE_REG
p.From.Reg = ppc64.REG_VS32
p.To.Type = obj.TYPE_MEM
- p.To.Reg = dst_reg
+ p.To.Reg = dstReg
p.To.Index = ppc64.REGZERO
offset = 16
if rem >= 16 {
// Use REGTMP as index reg
- p = s.Prog(ppc64.AMOVD)
+ p := s.Prog(ppc64.AMOVD)
p.From.Type = obj.TYPE_CONST
p.From.Offset = 16
p.To.Type = obj.TYPE_REG
p.To.Reg = ppc64.REGTMP
- // Generate 16 byte loads and stores.
- // Use temp register for index (16)
- // on the second one.
p = s.Prog(ppc64.ALXVD2X)
p.From.Type = obj.TYPE_MEM
- p.From.Reg = src_reg
+ p.From.Reg = srcReg
p.From.Index = ppc64.REGTMP
p.To.Type = obj.TYPE_REG
p.To.Reg = ppc64.REG_VS32
p.From.Type = obj.TYPE_REG
p.From.Reg = ppc64.REG_VS32
p.To.Type = obj.TYPE_MEM
- p.To.Reg = dst_reg
+ p.To.Reg = dstReg
p.To.Index = ppc64.REGTMP
offset = 32
case rem >= 8:
op, size = ppc64.AMOVD, 8
case rem >= 4:
- op, size = ppc64.AMOVW, 4
+ op, size = ppc64.AMOVWZ, 4
+ case rem >= 2:
+ op, size = ppc64.AMOVH, 2
+ }
+ // Load
+ p := s.Prog(op)
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = ppc64.REGTMP
+ p.From.Type = obj.TYPE_MEM
+ p.From.Reg = srcReg
+ p.From.Offset = offset
+
+ // Store
+ p = s.Prog(op)
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = ppc64.REGTMP
+ p.To.Type = obj.TYPE_MEM
+ p.To.Reg = dstReg
+ p.To.Offset = offset
+ rem -= size
+ offset += size
+ }
+
+ case ssa.OpPPC64LoweredQuadMove, ssa.OpPPC64LoweredQuadMoveShort:
+ bytesPerLoop := int64(64)
+ // This is used when moving more
+ // than 8 bytes on power9. Moves start with
+ // as many 8 byte moves as possible, then
+ // 4, 2, or 1 byte(s) as remaining. This will
+ // work and be efficient for power8 or later.
+ // If there are 64 or more bytes, then a
+ // loop is generated to move 32 bytes and
+ // update the src and dst addresses on each
+ // iteration. When < 64 bytes, the appropriate
+ // number of moves are generated based on the
+ // size.
+ // When moving >= 64 bytes a loop is used
+ // MOVD len/32,REG_TMP
+ // MOVD REG_TMP,CTR
+ // top:
+ // LXV 0(R21),VS32
+ // LXV 16(R21),VS33
+ // ADD $32,R21
+ // STXV VS32,0(R20)
+ // STXV VS33,16(R20)
+ // ADD $32,R20
+ // BC 16,0,top
+ // Bytes not moved by this loop are moved
+ // with a combination of the following instructions,
+ // starting with the largest sizes and generating as
+ // many as needed, using the appropriate offset value.
+ // MOVD n(R21),R31
+ // MOVD R31,n(R20)
+ // MOVW n1(R21),R31
+ // MOVW R31,n1(R20)
+ // MOVH n2(R21),R31
+ // MOVH R31,n2(R20)
+ // MOVB n3(R21),R31
+ // MOVB R31,n3(R20)
+
+ // Each loop iteration moves 32 bytes
+ ctr := v.AuxInt / bytesPerLoop
+
+ // Remainder after the loop
+ rem := v.AuxInt % bytesPerLoop
+
+ dstReg := v.Args[0].Reg()
+ srcReg := v.Args[1].Reg()
+
+ offset := int64(0)
+
+ // top of the loop
+ var top *obj.Prog
+
+ // Only generate looping code when loop counter is > 1 for >= 64 bytes
+ if ctr > 1 {
+ // Set up the CTR
+ p := s.Prog(ppc64.AMOVD)
+ p.From.Type = obj.TYPE_CONST
+ p.From.Offset = ctr
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = ppc64.REGTMP
+
+ p = s.Prog(ppc64.AMOVD)
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = ppc64.REGTMP
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = ppc64.REG_CTR
+
+ p = s.Prog(obj.APCALIGN)
+ p.From.Type = obj.TYPE_CONST
+ p.From.Offset = 16
+
+ // Generate 16 byte loads and stores.
+ p = s.Prog(ppc64.ALXV)
+ p.From.Type = obj.TYPE_MEM
+ p.From.Reg = srcReg
+ p.From.Offset = offset
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = ppc64.REG_VS32
+ if top == nil {
+ top = p
+ }
+ p = s.Prog(ppc64.ALXV)
+ p.From.Type = obj.TYPE_MEM
+ p.From.Reg = srcReg
+ p.From.Offset = offset + 16
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = ppc64.REG_VS33
+
+ // generate 16 byte stores
+ p = s.Prog(ppc64.ASTXV)
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = ppc64.REG_VS32
+ p.To.Type = obj.TYPE_MEM
+ p.To.Reg = dstReg
+ p.To.Offset = offset
+
+ p = s.Prog(ppc64.ASTXV)
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = ppc64.REG_VS33
+ p.To.Type = obj.TYPE_MEM
+ p.To.Reg = dstReg
+ p.To.Offset = offset + 16
+
+ // Generate 16 byte loads and stores.
+ p = s.Prog(ppc64.ALXV)
+ p.From.Type = obj.TYPE_MEM
+ p.From.Reg = srcReg
+ p.From.Offset = offset + 32
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = ppc64.REG_VS32
+
+ p = s.Prog(ppc64.ALXV)
+ p.From.Type = obj.TYPE_MEM
+ p.From.Reg = srcReg
+ p.From.Offset = offset + 48
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = ppc64.REG_VS33
+
+ // generate 16 byte stores
+ p = s.Prog(ppc64.ASTXV)
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = ppc64.REG_VS32
+ p.To.Type = obj.TYPE_MEM
+ p.To.Reg = dstReg
+ p.To.Offset = offset + 32
+
+ p = s.Prog(ppc64.ASTXV)
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = ppc64.REG_VS33
+ p.To.Type = obj.TYPE_MEM
+ p.To.Reg = dstReg
+ p.To.Offset = offset + 48
+
+ // increment the src reg for next iteration
+ p = s.Prog(ppc64.AADD)
+ p.Reg = srcReg
+ p.From.Type = obj.TYPE_CONST
+ p.From.Offset = bytesPerLoop
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = srcReg
+
+ // increment the dst reg for next iteration
+ p = s.Prog(ppc64.AADD)
+ p.Reg = dstReg
+ p.From.Type = obj.TYPE_CONST
+ p.From.Offset = bytesPerLoop
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = dstReg
+
+ // BC with BO_BCTR generates bdnz to branch on nonzero CTR
+ // to loop top.
+ p = s.Prog(ppc64.ABC)
+ p.From.Type = obj.TYPE_CONST
+ p.From.Offset = ppc64.BO_BCTR
+ p.Reg = ppc64.REG_CR0LT
+ p.To.Type = obj.TYPE_BRANCH
+ p.To.SetTarget(top)
+
+ // srcReg and dstReg were incremented in the loop, so
+ // later instructions start with offset 0.
+ offset = int64(0)
+ }
+
+ // No loop was generated for one iteration, so
+ // add 32 bytes to the remainder to move those bytes.
+ if ctr == 1 {
+ rem += bytesPerLoop
+ }
+ if rem >= 32 {
+ p := s.Prog(ppc64.ALXV)
+ p.From.Type = obj.TYPE_MEM
+ p.From.Reg = srcReg
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = ppc64.REG_VS32
+
+ p = s.Prog(ppc64.ALXV)
+ p.From.Type = obj.TYPE_MEM
+ p.From.Reg = srcReg
+ p.From.Offset = 16
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = ppc64.REG_VS33
+
+ p = s.Prog(ppc64.ASTXV)
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = ppc64.REG_VS32
+ p.To.Type = obj.TYPE_MEM
+ p.To.Reg = dstReg
+
+ p = s.Prog(ppc64.ASTXV)
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = ppc64.REG_VS33
+ p.To.Type = obj.TYPE_MEM
+ p.To.Reg = dstReg
+ p.To.Offset = 16
+
+ offset = 32
+ rem -= 32
+ }
+
+ if rem >= 16 {
+ // Generate 16 byte loads and stores.
+ p := s.Prog(ppc64.ALXV)
+ p.From.Type = obj.TYPE_MEM
+ p.From.Reg = srcReg
+ p.From.Offset = offset
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = ppc64.REG_VS32
+
+ p = s.Prog(ppc64.ASTXV)
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = ppc64.REG_VS32
+ p.To.Type = obj.TYPE_MEM
+ p.To.Reg = dstReg
+ p.To.Offset = offset
+
+ offset += 16
+ rem -= 16
+
+ if rem >= 16 {
+ p := s.Prog(ppc64.ALXV)
+ p.From.Type = obj.TYPE_MEM
+ p.From.Reg = srcReg
+ p.From.Offset = offset
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = ppc64.REG_VS32
+
+ p = s.Prog(ppc64.ASTXV)
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = ppc64.REG_VS32
+ p.To.Type = obj.TYPE_MEM
+ p.To.Reg = dstReg
+ p.To.Offset = offset
+
+ offset += 16
+ rem -= 16
+ }
+ }
+ // Generate all the remaining load and store pairs, starting with
+ // as many 8 byte moves as possible, then 4, 2, 1.
+ for rem > 0 {
+ op, size := ppc64.AMOVB, int64(1)
+ switch {
+ case rem >= 8:
+ op, size = ppc64.AMOVD, 8
+ case rem >= 4:
+ op, size = ppc64.AMOVWZ, 4
case rem >= 2:
op, size = ppc64.AMOVH, 2
}
// Load
p := s.Prog(op)
p.To.Type = obj.TYPE_REG
- p.To.Reg = ppc64.REG_R14
+ p.To.Reg = ppc64.REGTMP
p.From.Type = obj.TYPE_MEM
- p.From.Reg = src_reg
+ p.From.Reg = srcReg
p.From.Offset = offset
// Store
p = s.Prog(op)
p.From.Type = obj.TYPE_REG
- p.From.Reg = ppc64.REG_R14
+ p.From.Reg = ppc64.REGTMP
p.To.Type = obj.TYPE_MEM
- p.To.Reg = dst_reg
+ p.To.Reg = dstReg
p.To.Offset = offset
rem -= size
offset += size
case ssa.OpPPC64CALLstatic:
s.Call(v)
+ case ssa.OpPPC64CALLtail:
+ s.TailCall(v)
+
case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter:
p := s.Prog(ppc64.AMOVD)
p.From.Type = obj.TYPE_REG
}
pp := s.Call(v)
+
+ // Convert the call into a blrl with hint this is not a subroutine return.
+ // The full bclrl opcode must be specified when passing a hint.
+ pp.As = ppc64.ABCL
+ pp.From.Type = obj.TYPE_CONST
+ pp.From.Offset = ppc64.BO_ALWAYS
+ pp.Reg = ppc64.REG_CR0LT // The preferred value if BI is ignored.
pp.To.Reg = ppc64.REG_LR
+ pp.AddRestSourceConst(1)
- if gc.Ctxt.Flag_shared {
+ if ppc64.NeedTOCpointer(base.Ctxt) {
// When compiling Go into PIC, the function we just
// called via pointer might have been implemented in
// a separate module and so overwritten the TOC
p := s.Prog(obj.ACALL)
p.To.Type = obj.TYPE_MEM
p.To.Name = obj.NAME_EXTERN
- p.To.Sym = v.Aux.(*obj.LSym)
+ // AuxInt encodes how many buffer entries we need.
+ p.To.Sym = ir.Syms.GCWriteBarrier[v.AuxInt-1]
case ssa.OpPPC64LoweredPanicBoundsA, ssa.OpPPC64LoweredPanicBoundsB, ssa.OpPPC64LoweredPanicBoundsC:
p := s.Prog(obj.ACALL)
p.To.Type = obj.TYPE_MEM
p.To.Name = obj.NAME_EXTERN
- p.To.Sym = gc.BoundsCheckFunc[v.AuxInt]
+ p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt]
s.UseArgs(16) // space used in callee args area by assembly stubs
case ssa.OpPPC64LoweredNilCheck:
- if objabi.GOOS == "aix" {
+ if buildcfg.GOOS == "aix" {
// CMP Rarg0, R0
// BNE 2(PC)
// STW R0, 0(R0)
// NOP (so the BNE has somewhere to land)
nop := s.Prog(obj.ANOP)
- gc.Patch(p2, nop)
+ p2.To.SetTarget(nop)
} else {
// Issue a load which will fault if arg is nil.
p := s.Prog(ppc64.AMOVBZ)
p.From.Type = obj.TYPE_MEM
p.From.Reg = v.Args[0].Reg()
- gc.AddAux(&p.From, v)
+ ssagen.AddAux(&p.From, v)
p.To.Type = obj.TYPE_REG
p.To.Reg = ppc64.REGTMP
}
if logopt.Enabled() {
logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
}
- if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
- gc.Warnl(v.Pos, "generated nil check")
+ if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
+ base.WarnfAt(v.Pos, "generated nil check")
}
// These should be resolved by rules and not make it here.
v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT:
v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
- case ssa.OpClobber:
+ case ssa.OpClobber, ssa.OpClobberReg:
// TODO: implement for clobberdead experiment. Nop is ok for now.
default:
v.Fatalf("genValue not implemented: %s", v.LongString())
ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false},
}
-func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
+func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
switch b.Kind {
case ssa.BlockDefer:
// defer returns in R3:
p = s.Prog(ppc64.ABNE)
p.To.Type = obj.TYPE_BRANCH
- s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
+ s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[1].Block()})
if b.Succs[0].Block() != next {
p := s.Prog(obj.AJMP)
p.To.Type = obj.TYPE_BRANCH
- s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
+ s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
}
case ssa.BlockPlain:
if b.Succs[0].Block() != next {
p := s.Prog(obj.AJMP)
p.To.Type = obj.TYPE_BRANCH
- s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
+ s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
}
- case ssa.BlockExit:
+ case ssa.BlockExit, ssa.BlockRetJmp:
case ssa.BlockRet:
s.Prog(obj.ARET)
- case ssa.BlockRetJmp:
- p := s.Prog(obj.AJMP)
- p.To.Type = obj.TYPE_MEM
- p.To.Name = obj.NAME_EXTERN
- p.To.Sym = b.Aux.(*obj.LSym)
case ssa.BlockPPC64EQ, ssa.BlockPPC64NE,
ssa.BlockPPC64LT, ssa.BlockPPC64GE,
b.Fatalf("branch not implemented: %s", b.LongString())
}
}
+
+func loadRegResult(s *ssagen.State, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
+ p := s.Prog(loadByType(t))
+ p.From.Type = obj.TYPE_MEM
+ p.From.Name = obj.NAME_AUTO
+ p.From.Sym = n.Linksym()
+ p.From.Offset = n.FrameOffset() + off
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = reg
+ return p
+}
+
+func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
+ p = pp.Append(p, storeByType(t), obj.TYPE_REG, reg, 0, obj.TYPE_MEM, 0, n.FrameOffset()+off)
+ p.To.Name = obj.NAME_PARAM
+ p.To.Sym = n.Linksym()
+ p.Pos = p.Pos.WithNotStmt()
+ return p
+}