From b72bbaebf9035c59806fd8073f2582e2d07764d5 Mon Sep 17 00:00:00 2001 From: David Chase Date: Tue, 9 May 2023 10:34:52 -0400 Subject: [PATCH] cmd/compile: expand calls cleanup Convert expand calls into a smaller number of focused recursive rewrites, and rely on an enhanced version of "decompose" to clean up afterwards. Debugging information seems to emerge intact. Change-Id: Ic46da4207e3a4da5c8e2c47b637b0e35abbe56bb Reviewed-on: https://go-review.googlesource.com/c/go/+/507295 Run-TryBot: David Chase Reviewed-by: Cherry Mui Reviewed-by: Keith Randall TryBot-Result: Gopher Robot --- src/cmd/compile/internal/ssa/_gen/dec.rules | 106 + src/cmd/compile/internal/ssa/compile.go | 4 +- src/cmd/compile/internal/ssa/expand_calls.go | 2425 ++++++------------ src/cmd/compile/internal/ssa/op.go | 2 +- src/cmd/compile/internal/ssa/rewritedec.go | 654 +++++ src/cmd/compile/internal/ssagen/ssa.go | 2 +- test/abi/more_intstar_input.go | 8 +- test/abi/reg_not_ssa.go | 40 + test/codegen/condmove.go | 2 +- test/codegen/spectre.go | 4 +- 10 files changed, 1639 insertions(+), 1608 deletions(-) create mode 100644 test/abi/reg_not_ssa.go diff --git a/src/cmd/compile/internal/ssa/_gen/dec.rules b/src/cmd/compile/internal/ssa/_gen/dec.rules index b19489870d..4484cd7e28 100644 --- a/src/cmd/compile/internal/ssa/_gen/dec.rules +++ b/src/cmd/compile/internal/ssa/_gen/dec.rules @@ -91,3 +91,109 @@ (OffPtr [config.PtrSize] dst) data (Store {typ.Uintptr} dst itab mem)) + +// Helpers for expand calls +// Some of these are copied from generic.rules + +(IMake _typ (StructMake1 val)) => (IMake _typ val) +(StructSelect [0] (IData x)) => (IData x) + +(StructSelect (StructMake1 x)) => x +(StructSelect [0] (StructMake2 x _)) => x +(StructSelect [1] (StructMake2 _ x)) => x +(StructSelect [0] (StructMake3 x _ _)) => x +(StructSelect [1] (StructMake3 _ x _)) => x +(StructSelect [2] (StructMake3 _ _ x)) => x +(StructSelect [0] (StructMake4 x _ _ _)) => x +(StructSelect [1] (StructMake4 _ x _ _)) => x +(StructSelect [2] (StructMake4 _ _ x _)) => x +(StructSelect [3] (StructMake4 _ _ _ x)) => x + +// Special case coming from immediate interface rewriting +// Typical case: (StructSelect [0] (IData (IMake typ dat)) rewrites to (StructSelect [0] dat) +// but because the interface is immediate, the type of "IData" is a one-element struct containing +// a pointer that is not the pointer type of dat (can be a *uint8). +// More annoying case: (ArraySelect[0] (StructSelect[0] isAPtr)) +// There, result of the StructSelect is an Array (not a pointer) and +// the pre-rewrite input to the ArraySelect is a struct, not a pointer. +(StructSelect [0] x) && x.Type.IsPtr() => x +(ArraySelect [0] x) && x.Type.IsPtr() => x + +// These, too. Bits is bits. +(ArrayMake1 x) && x.Type.IsPtr() => x +(StructMake1 x) && x.Type.IsPtr() => x + +(Store dst (StructMake1 f0) mem) => + (Store {t.FieldType(0)} (OffPtr [0] dst) f0 mem) +(Store dst (StructMake2 f0 f1) mem) => + (Store {t.FieldType(1)} + (OffPtr [t.FieldOff(1)] dst) + f1 + (Store {t.FieldType(0)} + (OffPtr [0] dst) + f0 mem)) +(Store dst (StructMake3 f0 f1 f2) mem) => + (Store {t.FieldType(2)} + (OffPtr [t.FieldOff(2)] dst) + f2 + (Store {t.FieldType(1)} + (OffPtr [t.FieldOff(1)] dst) + f1 + (Store {t.FieldType(0)} + (OffPtr [0] dst) + f0 mem))) +(Store dst (StructMake4 f0 f1 f2 f3) mem) => + (Store {t.FieldType(3)} + (OffPtr [t.FieldOff(3)] dst) + f3 + (Store {t.FieldType(2)} + (OffPtr [t.FieldOff(2)] dst) + f2 + (Store {t.FieldType(1)} + (OffPtr [t.FieldOff(1)] dst) + f1 + (Store {t.FieldType(0)} + (OffPtr [0] dst) + f0 mem)))) + +(ArraySelect (ArrayMake1 x)) => x +(ArraySelect [0] (IData x)) => (IData x) + +(Store dst (ArrayMake1 e) mem) => (Store {e.Type} dst e mem) + +// NOTE removed must-not-be-SSA condition. +(ArraySelect [i] x:(Load ptr mem)) => + @x.Block (Load (OffPtr [t.Elem().Size()*i] ptr) mem) + +(StringPtr x:(Load ptr mem)) && t.IsString() => @x.Block (Load ptr mem) +(StringLen x:(Load ptr mem)) && t.IsString() => @x.Block (Load + (OffPtr [config.PtrSize] ptr) + mem) + +// NOTE removed must-not-be-SSA condition. +(StructSelect [i] x:(Load ptr mem)) => + @x.Block (Load (OffPtr [t.FieldOff(int(i))] ptr) mem) + +(ITab x:(Load ptr mem)) && t.IsInterface() => @x.Block (Load ptr mem) + +(IData x:(Load ptr mem)) && t.IsInterface() => @x.Block (Load + (OffPtr [config.PtrSize] ptr) + mem) + +(SlicePtr x:(Load ptr mem)) && t.IsSlice() => @x.Block (Load ptr mem) +(SliceLen x:(Load ptr mem)) && t.IsSlice() => @x.Block (Load + (OffPtr [config.PtrSize] ptr) + mem) +(SliceCap x:(Load ptr mem)) && t.IsSlice() => @x.Block (Load + (OffPtr [2*config.PtrSize] ptr) + mem) + +(ComplexReal x:(Load ptr mem)) && t.IsComplex() && t.Size() == 8 => @x.Block (Load ptr mem) +(ComplexImag x:(Load ptr mem)) && t.IsComplex() && t.Size() == 8 => @x.Block (Load + (OffPtr [4] ptr) + mem) + +(ComplexReal x:(Load ptr mem)) && t.IsComplex() && t.Size() == 16 => @x.Block (Load ptr mem) +(ComplexImag x:(Load ptr mem)) && t.IsComplex() && t.Size() == 16 => @x.Block (Load + (OffPtr [8] ptr) + mem) diff --git a/src/cmd/compile/internal/ssa/compile.go b/src/cmd/compile/internal/ssa/compile.go index 625c98bb1f..d125891f88 100644 --- a/src/cmd/compile/internal/ssa/compile.go +++ b/src/cmd/compile/internal/ssa/compile.go @@ -472,8 +472,8 @@ var passes = [...]pass{ {name: "nilcheckelim", fn: nilcheckelim}, {name: "prove", fn: prove}, {name: "early fuse", fn: fuseEarly}, - {name: "decompose builtin", fn: decomposeBuiltIn, required: true}, {name: "expand calls", fn: expandCalls, required: true}, + {name: "decompose builtin", fn: postExpandCallsDecompose, required: true}, {name: "softfloat", fn: softfloat, required: true}, {name: "late opt", fn: opt, required: true}, // TODO: split required rules and optimizing rules {name: "dead auto elim", fn: elimDeadAutosGeneric}, @@ -547,6 +547,8 @@ var passOrder = [...]constraint{ {"generic cse", "tighten"}, // checkbce needs the values removed {"generic deadcode", "check bce"}, + // decompose builtin now also cleans up after expand calls + {"expand calls", "decompose builtin"}, // don't run optimization pass until we've decomposed builtin objects {"decompose builtin", "late opt"}, // decompose builtin is the last pass that may introduce new float ops, so run softfloat after it diff --git a/src/cmd/compile/internal/ssa/expand_calls.go b/src/cmd/compile/internal/ssa/expand_calls.go index e6f7306fa8..29c180be34 100644 --- a/src/cmd/compile/internal/ssa/expand_calls.go +++ b/src/cmd/compile/internal/ssa/expand_calls.go @@ -11,1788 +11,1021 @@ import ( "cmd/compile/internal/types" "cmd/internal/src" "fmt" - "sort" ) -type selKey struct { - from *Value // what is selected from - offsetOrIndex int64 // whatever is appropriate for the selector - size int64 - typ *types.Type +func postExpandCallsDecompose(f *Func) { + decomposeUser(f) // redo user decompose to cleanup after expand calls + decomposeBuiltIn(f) // handles both regular decomposition and cleanup. } -type Abi1RO uint8 // An offset within a parameter's slice of register indices, for abi1. +func expandCalls(f *Func) { + // Convert each aggregate arg to a call into "dismantle aggregate, store/pass parts" + // Convert each aggregate result from a call into "assemble aggregate from parts" + // Convert each multivalue exit into "dismantle aggregate, store/return parts" + // Convert incoming aggregate arg into assembly of parts. + // Feed modified AST to decompose. -func isBlockMultiValueExit(b *Block) bool { - return (b.Kind == BlockRet || b.Kind == BlockRetJmp) && b.Controls[0] != nil && b.Controls[0].Op == OpMakeResult -} + sp, _ := f.spSb() -func badVal(s string, v *Value) error { - return fmt.Errorf("%s %s", s, v.LongString()) -} + x := &expandState{ + f: f, + debug: f.pass.debug, + regSize: f.Config.RegSize, + sp: sp, + typs: &f.Config.Types, + wideSelects: make(map[*Value]*Value), + commonArgs: make(map[selKey]*Value), + commonSelectors: make(map[selKey]*Value), + memForCall: make(map[ID]*Value), + } -// removeTrivialWrapperTypes unwraps layers of -// struct { singleField SomeType } and [1]SomeType -// until a non-wrapper type is reached. This is useful -// for working with assignments to/from interface data -// fields (either second operand to OpIMake or OpIData) -// where the wrapping or type conversion can be elided -// because of type conversions/assertions in source code -// that do not appear in SSA. -func removeTrivialWrapperTypes(t *types.Type) *types.Type { - for { - if t.IsStruct() && t.NumFields() == 1 { - t = t.Field(0).Type - continue - } - if t.IsArray() && t.NumElem() == 1 { - t = t.Elem() - continue - } - break + // For 32-bit, need to deal with decomposition of 64-bit integers, which depends on endianness. + if f.Config.BigEndian { + x.firstOp = OpInt64Hi + x.secondOp = OpInt64Lo + x.firstType = x.typs.Int32 + x.secondType = x.typs.UInt32 + } else { + x.firstOp = OpInt64Lo + x.secondOp = OpInt64Hi + x.firstType = x.typs.UInt32 + x.secondType = x.typs.Int32 } - return t -} -// A registerCursor tracks which register is used for an Arg or regValues, or a piece of such. -type registerCursor struct { - // TODO(register args) convert this to a generalized target cursor. - storeDest *Value // if there are no register targets, then this is the base of the store. - regsLen int // the number of registers available for this Arg/result (which is all in registers or not at all) - nextSlice Abi1RO // the next register/register-slice offset - config *abi.ABIConfig - regValues *[]*Value // values assigned to registers accumulate here -} + // Defer select processing until after all calls and selects are seen. + var selects []*Value + var calls []*Value + var args []*Value + var exitBlocks []*Block -func (c *registerCursor) String() string { - dest := "" - if c.storeDest != nil { - dest = c.storeDest.String() - } - regs := "" - if c.regValues != nil { - regs = "" - for i, x := range *c.regValues { - if i > 0 { - regs = regs + "; " + var m0 *Value + + // Accumulate lists of calls, args, selects, and exit blocks to process, + // note "wide" selects consumed by stores, + // rewrite mem for each call, + // rewrite each OpSelectNAddr. + for _, b := range f.Blocks { + for _, v := range b.Values { + switch v.Op { + case OpInitMem: + m0 = v + + case OpClosureLECall, OpInterLECall, OpStaticLECall, OpTailLECall: + calls = append(calls, v) + + case OpArg: + args = append(args, v) + + case OpStore: + if a := v.Args[1]; a.Op == OpSelectN && !CanSSA(a.Type) { + if a.Uses > 1 { + panic(fmt.Errorf("Saw double use of wide SelectN %s operand of Store %s", + a.LongString(), v.LongString())) + } + x.wideSelects[a] = v + } + + case OpSelectN: + if v.Type == types.TypeMem { + // rewrite the mem selector in place + call := v.Args[0] + aux := call.Aux.(*AuxCall) + mem := x.memForCall[call.ID] + if mem == nil { + v.AuxInt = int64(aux.abiInfo.OutRegistersUsed()) + x.memForCall[call.ID] = v + } else { + panic(fmt.Errorf("Saw two memories for call %v, %v and %v", call, mem, v)) + } + } else { + selects = append(selects, v) + } + + case OpSelectNAddr: + call := v.Args[0] + which := v.AuxInt + aux := call.Aux.(*AuxCall) + pt := v.Type + off := x.offsetFrom(x.f.Entry, x.sp, aux.OffsetOfResult(which), pt) + v.copyOf(off) } - regs = regs + x.LongString() } - } - // not printing the config because that has not been useful - return fmt.Sprintf("RCSR{storeDest=%v, regsLen=%d, nextSlice=%d, regValues=[%s]}", dest, c.regsLen, c.nextSlice, regs) -} -// next effectively post-increments the register cursor; the receiver is advanced, -// the old value is returned. -func (c *registerCursor) next(t *types.Type) registerCursor { - rc := *c - if int(c.nextSlice) < c.regsLen { - w := c.config.NumParamRegs(t) - c.nextSlice += Abi1RO(w) + // rewrite function results from an exit block + // values returned by function need to be split out into registers. + if isBlockMultiValueExit(b) { + exitBlocks = append(exitBlocks, b) + } } - return rc -} -// plus returns a register cursor offset from the original, without modifying the original. -func (c *registerCursor) plus(regWidth Abi1RO) registerCursor { - rc := *c - rc.nextSlice += regWidth - return rc -} + // Convert each aggregate arg into Make of its parts (and so on, to primitive types) + for _, v := range args { + var rc registerCursor + a := x.prAssignForArg(v) + aux := x.f.OwnAux + regs := a.Registers + var offset int64 + if len(regs) == 0 { + offset = a.FrameOffset(aux.abiInfo) + } + auxBase := x.offsetFrom(x.f.Entry, x.sp, offset, types.NewPtr(v.Type)) + rc.init(regs, aux.abiInfo, nil, auxBase, 0) + x.rewriteSelectOrArg(f.Entry.Pos, f.Entry, v, v, m0, v.Type, rc) + } -const ( - // Register offsets for fields of built-in aggregate types; the ones not listed are zero. - RO_complex_imag = 1 - RO_string_len = 1 - RO_slice_len = 1 - RO_slice_cap = 2 - RO_iface_data = 1 -) + // Rewrite selects of results (which may be aggregates) into make-aggregates of register/memory-targeted selects + for _, v := range selects { + if v.Op == OpInvalid { + continue + } -func (x *expandState) regWidth(t *types.Type) Abi1RO { - return Abi1RO(x.abi1.NumParamRegs(t)) -} + call := v.Args[0] + aux := call.Aux.(*AuxCall) + mem := x.memForCall[call.ID] + + i := v.AuxInt + regs := aux.RegsOfResult(i) + + // If this select cannot fit into SSA and is stored, either disaggregate to register stores, or mem-mem move. + if store := x.wideSelects[v]; store != nil { + // Use the mem that comes from the store operation. + storeAddr := store.Args[0] + mem := store.Args[2] + if len(regs) > 0 { + // Cannot do a rewrite that builds up a result from pieces; instead, copy pieces to the store operation. + var rc registerCursor + rc.init(regs, aux.abiInfo, nil, storeAddr, 0) + mem = x.rewriteWideSelectToStores(call.Pos, call.Block, v, mem, v.Type, rc) + store.copyOf(mem) + } else { + // Move directly from AuxBase to store target; rewrite the store instruction. + offset := aux.OffsetOfResult(i) + auxBase := x.offsetFrom(x.f.Entry, x.sp, offset, types.NewPtr(v.Type)) + // was Store dst, v, mem + // now Move dst, auxBase, mem + move := store.Block.NewValue3A(store.Pos, OpMove, types.TypeMem, v.Type, storeAddr, auxBase, mem) + move.AuxInt = v.Type.Size() + store.copyOf(move) + } + continue + } -// regOffset returns the register offset of the i'th element of type t -func (x *expandState) regOffset(t *types.Type, i int) Abi1RO { - // TODO maybe cache this in a map if profiling recommends. - if i == 0 { - return 0 - } - if t.IsArray() { - return Abi1RO(i) * x.regWidth(t.Elem()) - } - if t.IsStruct() { - k := Abi1RO(0) - for j := 0; j < i; j++ { - k += x.regWidth(t.FieldType(j)) + var auxBase *Value + if len(regs) == 0 { + offset := aux.OffsetOfResult(i) + auxBase = x.offsetFrom(x.f.Entry, x.sp, offset, types.NewPtr(v.Type)) } - return k + var rc registerCursor + rc.init(regs, aux.abiInfo, nil, auxBase, 0) + x.rewriteSelectOrArg(call.Pos, call.Block, v, v, mem, v.Type, rc) } - panic("Haven't implemented this case yet, do I need to?") -} -// at returns the register cursor for component i of t, where the first -// component is numbered 0. -func (c *registerCursor) at(t *types.Type, i int) registerCursor { - rc := *c - if i == 0 || c.regsLen == 0 { - return rc - } - if t.IsArray() { - w := c.config.NumParamRegs(t.Elem()) - rc.nextSlice += Abi1RO(i * w) - return rc + rewriteCall := func(v *Value, newOp Op, argStart int) { + // Break aggregate args passed to call into smaller pieces. + x.rewriteCallArgs(v, argStart) + v.Op = newOp + rts := abi.RegisterTypes(v.Aux.(*AuxCall).abiInfo.OutParams()) + v.Type = types.NewResults(append(rts, types.TypeMem)) } - if t.IsStruct() { - for j := 0; j < i; j++ { - rc.next(t.FieldType(j)) + + // Rewrite calls + for _, v := range calls { + switch v.Op { + case OpStaticLECall: + rewriteCall(v, OpStaticCall, 0) + case OpTailLECall: + rewriteCall(v, OpTailCall, 0) + case OpClosureLECall: + rewriteCall(v, OpClosureCall, 2) + case OpInterLECall: + rewriteCall(v, OpInterCall, 1) } - return rc } - panic("Haven't implemented this case yet, do I need to?") -} -func (c *registerCursor) init(regs []abi.RegIndex, info *abi.ABIParamResultInfo, result *[]*Value, storeDest *Value) { - c.regsLen = len(regs) - c.nextSlice = 0 - if len(regs) == 0 { - c.storeDest = storeDest // only save this if there are no registers, will explode if misused. - return + // Rewrite results from exit blocks + for _, b := range exitBlocks { + v := b.Controls[0] + x.rewriteFuncResults(v, b, f.OwnAux) + b.SetControl(v) } - c.config = info.Config() - c.regValues = result -} -func (c *registerCursor) addArg(v *Value) { - *c.regValues = append(*c.regValues, v) -} - -func (c *registerCursor) hasRegs() bool { - return c.regsLen > 0 } -type expandState struct { - f *Func - abi1 *abi.ABIConfig - debug int // odd values log lost statement markers, so likely settings are 1 (stmts), 2 (expansion), and 3 (both) - regSize int64 - sp *Value - typs *Types - ptrSize int64 - hiOffset int64 - lowOffset int64 - hiRo Abi1RO - loRo Abi1RO - namedSelects map[*Value][]namedVal - sdom SparseTree - commonSelectors map[selKey]*Value // used to de-dupe selectors - commonArgs map[selKey]*Value // used to de-dupe OpArg/OpArgIntReg/OpArgFloatReg - memForCall map[ID]*Value // For a call, need to know the unique selector that gets the mem. - transformedSelects map[ID]bool // OpSelectN after rewriting, either created or renumbered. - indentLevel int // Indentation for debugging recursion -} +func (x *expandState) rewriteFuncResults(v *Value, b *Block, aux *AuxCall) { + // This is very similar to rewriteCallArgs + // differences: + // firstArg + preArgs + // sp vs auxBase -// intPairTypes returns the pair of 32-bit int types needed to encode a 64-bit integer type on a target -// that has no 64-bit integer registers. -func (x *expandState) intPairTypes(et types.Kind) (tHi, tLo *types.Type) { - tHi = x.typs.UInt32 - if et == types.TINT64 { - tHi = x.typs.Int32 - } - tLo = x.typs.UInt32 - return -} + m0 := v.MemoryArg() + mem := m0 -// isAlreadyExpandedAggregateType returns whether a type is an SSA-able "aggregate" (multiple register) type -// that was expanded in an earlier phase (currently, expand_calls is intended to run after decomposeBuiltin, -// so this is all aggregate types -- small struct and array, complex, interface, string, slice, and 64-bit -// integer on 32-bit). -func (x *expandState) isAlreadyExpandedAggregateType(t *types.Type) bool { - if !CanSSA(t) { - return false - } - return t.IsStruct() || t.IsArray() || t.IsComplex() || t.IsInterface() || t.IsString() || t.IsSlice() || - (t.Size() > x.regSize && (t.IsInteger() || (x.f.Config.SoftFloat && t.IsFloat()))) -} + allResults := []*Value{} + var oldArgs []*Value + argsWithoutMem := v.Args[:len(v.Args)-1] -// offsetFrom creates an offset from a pointer, simplifying chained offsets and offsets from SP -// TODO should also optimize offsets from SB? -func (x *expandState) offsetFrom(b *Block, from *Value, offset int64, pt *types.Type) *Value { - ft := from.Type - if offset == 0 { - if ft == pt { - return from - } - // This captures common, (apparently) safe cases. The unsafe cases involve ft == uintptr - if (ft.IsPtr() || ft.IsUnsafePtr()) && pt.IsPtr() { - return from + for j, a := range argsWithoutMem { + oldArgs = append(oldArgs, a) + i := int64(j) + auxType := aux.TypeOfResult(i) + auxBase := b.NewValue2A(v.Pos, OpLocalAddr, types.NewPtr(auxType), aux.NameOfResult(i), x.sp, mem) + auxOffset := int64(0) + aRegs := aux.RegsOfResult(int64(j)) + if a.Op == OpDereference { + a.Op = OpLoad + } + var rc registerCursor + var result *[]*Value + if len(aRegs) > 0 { + result = &allResults + } else { + if a.Op == OpLoad && a.Args[0].Op == OpLocalAddr { + addr := a.Args[0] + if addr.MemoryArg() == a.MemoryArg() && addr.Aux == aux.NameOfResult(i) { + continue // Self move to output parameter + } + } } + rc.init(aRegs, aux.abiInfo, result, auxBase, auxOffset) + mem = x.decomposeAsNecessary(v.Pos, b, a, mem, rc) } - // Simplify, canonicalize - for from.Op == OpOffPtr { - offset += from.AuxInt - from = from.Args[0] - } - if from == x.sp { - return x.f.ConstOffPtrSP(pt, offset, x.sp) + v.resetArgs() + v.AddArgs(allResults...) + v.AddArg(mem) + for _, a := range oldArgs { + if a.Uses == 0 { + if x.debug > 1 { + x.Printf("...marking %v unused\n", a.LongString()) + } + x.invalidateRecursively(a) + } } - return b.NewValue1I(from.Pos.WithNotStmt(), OpOffPtr, pt, offset, from) + v.Type = types.NewResults(append(abi.RegisterTypes(aux.abiInfo.OutParams()), types.TypeMem)) + return } -// splitSlots splits one "field" (specified by sfx, offset, and ty) out of the LocalSlots in ls and returns the new LocalSlots this generates. -func (x *expandState) splitSlots(ls []*LocalSlot, sfx string, offset int64, ty *types.Type) []*LocalSlot { - var locs []*LocalSlot - for i := range ls { - locs = append(locs, x.f.SplitSlot(ls[i], sfx, offset, ty)) +func (x *expandState) rewriteCallArgs(v *Value, firstArg int) { + if x.debug > 1 { + x.indent(3) + defer x.indent(-3) + x.Printf("rewriteCallArgs(%s; %d)\n", v.LongString(), firstArg) } - return locs -} + // Thread the stores on the memory arg + aux := v.Aux.(*AuxCall) + m0 := v.MemoryArg() + mem := m0 + allResults := []*Value{} + oldArgs := []*Value{} + argsWithoutMem := v.Args[firstArg : len(v.Args)-1] // Also strip closure/interface Op-specific args -// prAssignForArg returns the ABIParamAssignment for v, assumed to be an OpArg. -func (x *expandState) prAssignForArg(v *Value) *abi.ABIParamAssignment { - if v.Op != OpArg { - panic(badVal("Wanted OpArg, instead saw", v)) + sp := x.sp + if v.Op == OpTailLECall { + // For tail call, we unwind the frame before the call so we'll use the caller's + // SP. + sp = x.f.Entry.NewValue1(src.NoXPos, OpGetCallerSP, x.typs.Uintptr, mem) } - return ParamAssignmentForArgName(x.f, v.Aux.(*ir.Name)) -} -// ParamAssignmentForArgName returns the ABIParamAssignment for f's arg with matching name. -func ParamAssignmentForArgName(f *Func, name *ir.Name) *abi.ABIParamAssignment { - abiInfo := f.OwnAux.abiInfo - ip := abiInfo.InParams() - for i, a := range ip { - if a.Name == name { - return &ip[i] + for i, a := range argsWithoutMem { // skip leading non-parameter SSA Args and trailing mem SSA Arg. + oldArgs = append(oldArgs, a) + auxI := int64(i) + aRegs := aux.RegsOfArg(auxI) + aType := aux.TypeOfArg(auxI) + + if a.Op == OpDereference { + a.Op = OpLoad + } + var rc registerCursor + var result *[]*Value + var aOffset int64 + if len(aRegs) > 0 { + result = &allResults + } else { + aOffset = aux.OffsetOfArg(auxI) + } + if v.Op == OpTailLECall && a.Op == OpArg && a.AuxInt == 0 { + // It's common for a tail call passing the same arguments (e.g. method wrapper), + // so this would be a self copy. Detect this and optimize it out. + n := a.Aux.(*ir.Name) + if n.Class == ir.PPARAM && n.FrameOffset()+x.f.Config.ctxt.Arch.FixedFrameSize == aOffset { + continue + } + } + if x.debug > 1 { + x.Printf("...storeArg %s, %v, %d\n", a.LongString(), aType, aOffset) + } + + rc.init(aRegs, aux.abiInfo, result, sp, aOffset) + mem = x.decomposeAsNecessary(v.Pos, v.Block, a, mem, rc) + } + var preArgStore [2]*Value + preArgs := append(preArgStore[:0], v.Args[0:firstArg]...) + v.resetArgs() + v.AddArgs(preArgs...) + v.AddArgs(allResults...) + v.AddArg(mem) + for _, a := range oldArgs { + if a.Uses == 0 { + x.invalidateRecursively(a) } } - panic(fmt.Errorf("Did not match param %v in prInfo %+v", name, abiInfo.InParams())) -} -// indent increments (or decrements) the indentation. -func (x *expandState) indent(n int) { - x.indentLevel += n + return } -// Printf does an indented fmt.Printf on the format and args. -func (x *expandState) Printf(format string, a ...interface{}) (n int, err error) { - if x.indentLevel > 0 { - fmt.Printf("%[1]*s", x.indentLevel, "") - } - return fmt.Printf(format, a...) +func (x *expandState) decomposePair(pos src.XPos, b *Block, a, mem *Value, t0, t1 *types.Type, o0, o1 Op, rc *registerCursor) *Value { + e := b.NewValue1(pos, o0, t0, a) + pos = pos.WithNotStmt() + mem = x.decomposeAsNecessary(pos, b, e, mem, rc.next(t0)) + e = b.NewValue1(pos, o1, t1, a) + mem = x.decomposeAsNecessary(pos, b, e, mem, rc.next(t1)) + return mem } -// Calls that need lowering have some number of inputs, including a memory input, -// and produce a tuple of (value1, value2, ..., mem) where valueK may or may not be SSA-able. - -// With the current ABI those inputs need to be converted into stores to memory, -// rethreading the call's memory input to the first, and the new call now receiving the last. - -// With the current ABI, the outputs need to be converted to loads, which will all use the call's -// memory output as their input. +func (x *expandState) decomposeOne(pos src.XPos, b *Block, a, mem *Value, t0 *types.Type, o0 Op, rc *registerCursor) *Value { + e := b.NewValue1(pos, o0, t0, a) + pos = pos.WithNotStmt() + mem = x.decomposeAsNecessary(pos, b, e, mem, rc.next(t0)) + return mem +} -// rewriteSelect recursively walks from leaf selector to a root (OpSelectN, OpLoad, OpArg) -// through a chain of Struct/Array/builtin Select operations. If the chain of selectors does not -// end in an expected root, it does nothing (this can happen depending on compiler phase ordering). -// The "leaf" provides the type, the root supplies the container, and the leaf-to-root path -// accumulates the offset. -// It emits the code necessary to implement the leaf select operation that leads to the root. -// -// TODO when registers really arrive, must also decompose anything split across two registers or registers and memory. -func (x *expandState) rewriteSelect(leaf *Value, selector *Value, offset int64, regOffset Abi1RO) []*LocalSlot { +// decomposeAsNecessary converts a value (perhaps an aggregate) passed to a call or returned by a function, +// into the appropriate sequence of stores and register assignments to transmit that value in a given ABI, and +// returns the current memory after this convert/rewrite (it may be the input memory, perhaps stores were needed.) +// 'pos' is the source position all this is tied to +// 'b' is the enclosing block +// 'a' is the value to decompose +// 'm0' is the input memory arg used for the first store (or returned if there are no stores) +// 'rc' is a registerCursor which identifies the register/memory destination for the value +func (x *expandState) decomposeAsNecessary(pos src.XPos, b *Block, a, m0 *Value, rc registerCursor) *Value { if x.debug > 1 { x.indent(3) defer x.indent(-3) - x.Printf("rewriteSelect(%s; %s; memOff=%d; regOff=%d)\n", leaf.LongString(), selector.LongString(), offset, regOffset) } - var locs []*LocalSlot - leafType := leaf.Type - if len(selector.Args) > 0 { - w := selector.Args[0] - if w.Op == OpCopy { - for w.Op == OpCopy { - w = w.Args[0] - } - selector.SetArg(0, w) - } + at := a.Type + if at.Size() == 0 { + return m0 } - switch selector.Op { - case OpArgIntReg, OpArgFloatReg: - if leafType == selector.Type { // OpIData leads us here, sometimes. - leaf.copyOf(selector) - } else { - x.f.Fatalf("Unexpected %s type, selector=%s, leaf=%s\n", selector.Op.String(), selector.LongString(), leaf.LongString()) - } + if a.Op == OpDereference { + a.Op = OpLoad // For purposes of parameter passing expansion, a Dereference is a Load. + } + + if !rc.hasRegs() && !CanSSA(at) { + dst := x.offsetFrom(b, rc.storeDest, rc.storeOffset, types.NewPtr(at)) if x.debug > 1 { - x.Printf("---%s, break\n", selector.Op.String()) + x.Printf("...recur store %s at %s\n", a.LongString(), dst.LongString()) } - case OpArg: - if !x.isAlreadyExpandedAggregateType(selector.Type) { - if leafType == selector.Type { // OpIData leads us here, sometimes. - x.newArgToMemOrRegs(selector, leaf, offset, regOffset, leafType, leaf.Pos) - } else { - x.f.Fatalf("Unexpected OpArg type, selector=%s, leaf=%s\n", selector.LongString(), leaf.LongString()) - } - if x.debug > 1 { - x.Printf("---OpArg, break\n") - } - break - } - switch leaf.Op { - case OpIData, OpStructSelect, OpArraySelect: - leafType = removeTrivialWrapperTypes(leaf.Type) + if a.Op == OpLoad { + m0 = b.NewValue3A(pos, OpMove, types.TypeMem, at, dst, a.Args[0], m0) + m0.AuxInt = at.Size() + return m0 + } else { + panic(fmt.Errorf("Store of not a load")) } - x.newArgToMemOrRegs(selector, leaf, offset, regOffset, leafType, leaf.Pos) + } - for _, s := range x.namedSelects[selector] { - locs = append(locs, x.f.Names[s.locIndex]) + mem := m0 + switch at.Kind() { + case types.TARRAY: + et := at.Elem() + for i := int64(0); i < at.NumElem(); i++ { + e := b.NewValue1I(pos, OpArraySelect, et, i, a) + pos = pos.WithNotStmt() + mem = x.decomposeAsNecessary(pos, b, e, mem, rc.next(et)) } + return mem - case OpLoad: // We end up here because of IData of immediate structures. - // Failure case: - // (note the failure case is very rare; w/o this case, make.bash and run.bash both pass, as well as - // the hard cases of building {syscall,math,math/cmplx,math/bits,go/constant} on ppc64le and mips-softfloat). - // - // GOSSAFUNC='(*dumper).dump' go build -gcflags=-l -tags=math_big_pure_go cmd/compile/internal/gc - // cmd/compile/internal/gc/dump.go:136:14: internal compiler error: '(*dumper).dump': not lowered: v827, StructSelect PTR PTR - // b2: ← b1 - // v20 (+142) = StaticLECall {AuxCall{reflect.Value.Interface([reflect.Value,0])[interface {},24]}} [40] v8 v1 - // v21 (142) = SelectN [1] v20 - // v22 (142) = SelectN [0] v20 - // b15: ← b8 - // v71 (+143) = IData v22 (v[Nodes]) - // v73 (+146) = StaticLECall <[]*Node,mem> {AuxCall{"".Nodes.Slice([Nodes,0])[[]*Node,8]}} [32] v71 v21 - // - // translates (w/o the "case OpLoad:" above) to: - // - // b2: ← b1 - // v20 (+142) = StaticCall {AuxCall{reflect.Value.Interface([reflect.Value,0])[interface {},24]}} [40] v715 - // v23 (142) = Load <*uintptr> v19 v20 - // v823 (142) = IsNonNil v23 - // v67 (+143) = Load <*[]*Node> v880 v20 - // b15: ← b8 - // v827 (146) = StructSelect <*[]*Node> [0] v67 - // v846 (146) = Store {*[]*Node} v769 v827 v20 - // v73 (+146) = StaticCall {AuxCall{"".Nodes.Slice([Nodes,0])[[]*Node,8]}} [32] v846 - // i.e., the struct select is generated and remains in because it is not applied to an actual structure. - // The OpLoad was created to load the single field of the IData - // This case removes that StructSelect. - if leafType != selector.Type { - if x.f.Config.SoftFloat && selector.Type.IsFloat() { - if x.debug > 1 { - x.Printf("---OpLoad, break\n") - } - break // softfloat pass will take care of that + case types.TSTRUCT: + for i := 0; i < at.NumFields(); i++ { + et := at.Field(i).Type // might need to read offsets from the fields + e := b.NewValue1I(pos, OpStructSelect, et, int64(i), a) + pos = pos.WithNotStmt() + if x.debug > 1 { + x.Printf("...recur decompose %s, %v\n", e.LongString(), et) } - x.f.Fatalf("Unexpected Load as selector, leaf=%s, selector=%s\n", leaf.LongString(), selector.LongString()) - } - leaf.copyOf(selector) - for _, s := range x.namedSelects[selector] { - locs = append(locs, x.f.Names[s.locIndex]) + mem = x.decomposeAsNecessary(pos, b, e, mem, rc.next(et)) } + return mem - case OpSelectN: - // TODO(register args) result case - // if applied to Op-mumble-call, the Aux tells us which result, regOffset specifies offset within result. If a register, should rewrite to OpSelectN for new call. - // TODO these may be duplicated. Should memoize. Intermediate selectors will go dead, no worries there. - call := selector.Args[0] - call0 := call - aux := call.Aux.(*AuxCall) - which := selector.AuxInt - if x.transformedSelects[selector.ID] { - // This is a minor hack. Either this select has had its operand adjusted (mem) or - // it is some other intermediate node that was rewritten to reference a register (not a generic arg). - // This can occur with chains of selection/indexing from single field/element aggregates. - leaf.copyOf(selector) - break - } - if which == aux.NResults() { // mem is after the results. - // rewrite v as a Copy of call -- the replacement call will produce a mem. - if leaf != selector { - panic(fmt.Errorf("Unexpected selector of memory, selector=%s, call=%s, leaf=%s", selector.LongString(), call.LongString(), leaf.LongString())) - } - if aux.abiInfo == nil { - panic(badVal("aux.abiInfo nil for call", call)) - } - if existing := x.memForCall[call.ID]; existing == nil { - selector.AuxInt = int64(aux.abiInfo.OutRegistersUsed()) - x.memForCall[call.ID] = selector - x.transformedSelects[selector.ID] = true // operand adjusted - } else { - selector.copyOf(existing) - } + case types.TSLICE: + mem = x.decomposeOne(pos, b, a, mem, x.typs.BytePtr, OpSlicePtr, &rc) + pos = pos.WithNotStmt() + mem = x.decomposeOne(pos, b, a, mem, x.typs.Int, OpSliceLen, &rc) + return x.decomposeOne(pos, b, a, mem, x.typs.Int, OpSliceCap, &rc) - } else { - leafType := removeTrivialWrapperTypes(leaf.Type) - if CanSSA(leafType) { - pt := types.NewPtr(leafType) - // Any selection right out of the arg area/registers has to be same Block as call, use call as mem input. - // Create a "mem" for any loads that need to occur. - if mem := x.memForCall[call.ID]; mem != nil { - if mem.Block != call.Block { - panic(fmt.Errorf("selector and call need to be in same block, selector=%s; call=%s", selector.LongString(), call.LongString())) - } - call = mem - } else { - mem = call.Block.NewValue1I(call.Pos.WithNotStmt(), OpSelectN, types.TypeMem, int64(aux.abiInfo.OutRegistersUsed()), call) - x.transformedSelects[mem.ID] = true // select uses post-expansion indexing - x.memForCall[call.ID] = mem - call = mem - } - outParam := aux.abiInfo.OutParam(int(which)) - if len(outParam.Registers) > 0 { - firstReg := uint32(0) - for i := 0; i < int(which); i++ { - firstReg += uint32(len(aux.abiInfo.OutParam(i).Registers)) - } - reg := int64(regOffset + Abi1RO(firstReg)) - if leaf.Block == call.Block { - leaf.reset(OpSelectN) - leaf.SetArgs1(call0) - leaf.Type = leafType - leaf.AuxInt = reg - x.transformedSelects[leaf.ID] = true // leaf, rewritten to use post-expansion indexing. - } else { - w := call.Block.NewValue1I(leaf.Pos, OpSelectN, leafType, reg, call0) - x.transformedSelects[w.ID] = true // select, using post-expansion indexing. - leaf.copyOf(w) - } - } else { - off := x.offsetFrom(x.f.Entry, x.sp, offset+aux.OffsetOfResult(which), pt) - if leaf.Block == call.Block { - leaf.reset(OpLoad) - leaf.SetArgs2(off, call) - leaf.Type = leafType - } else { - w := call.Block.NewValue2(leaf.Pos, OpLoad, leafType, off, call) - leaf.copyOf(w) - if x.debug > 1 { - x.Printf("---new %s\n", w.LongString()) - } - } - } - for _, s := range x.namedSelects[selector] { - locs = append(locs, x.f.Names[s.locIndex]) - } - } else { - x.f.Fatalf("Should not have non-SSA-able OpSelectN, selector=%s", selector.LongString()) - } - } + case types.TSTRING: + return x.decomposePair(pos, b, a, mem, x.typs.BytePtr, x.typs.Int, OpStringPtr, OpStringLen, &rc) - case OpStructSelect: - w := selector.Args[0] - var ls []*LocalSlot - if w.Type.Kind() != types.TSTRUCT { // IData artifact - ls = x.rewriteSelect(leaf, w, offset, regOffset) - } else { - fldi := int(selector.AuxInt) - ls = x.rewriteSelect(leaf, w, offset+w.Type.FieldOff(fldi), regOffset+x.regOffset(w.Type, fldi)) - if w.Op != OpIData { - for _, l := range ls { - locs = append(locs, x.f.SplitStruct(l, int(selector.AuxInt))) - } + case types.TINTER: + mem = x.decomposeOne(pos, b, a, mem, x.typs.Uintptr, OpITab, &rc) + pos = pos.WithNotStmt() + // Immediate interfaces cause so many headaches. + if a.Op == OpIMake { + data := a.Args[1] + for data.Op == OpStructMake1 || data.Op == OpArrayMake1 { + data = data.Args[0] } + return x.decomposeAsNecessary(pos, b, data, mem, rc.next(data.Type)) } + return x.decomposeOne(pos, b, a, mem, x.typs.BytePtr, OpIData, &rc) - case OpArraySelect: - w := selector.Args[0] - index := selector.AuxInt - x.rewriteSelect(leaf, w, offset+selector.Type.Size()*index, regOffset+x.regOffset(w.Type, int(index))) - - case OpInt64Hi: - w := selector.Args[0] - ls := x.rewriteSelect(leaf, w, offset+x.hiOffset, regOffset+x.hiRo) - locs = x.splitSlots(ls, ".hi", x.hiOffset, leafType) - - case OpInt64Lo: - w := selector.Args[0] - ls := x.rewriteSelect(leaf, w, offset+x.lowOffset, regOffset+x.loRo) - locs = x.splitSlots(ls, ".lo", x.lowOffset, leafType) - - case OpStringPtr: - ls := x.rewriteSelect(leaf, selector.Args[0], offset, regOffset) - locs = x.splitSlots(ls, ".ptr", 0, x.typs.BytePtr) - - case OpSlicePtr, OpSlicePtrUnchecked: - w := selector.Args[0] - ls := x.rewriteSelect(leaf, w, offset, regOffset) - locs = x.splitSlots(ls, ".ptr", 0, types.NewPtr(w.Type.Elem())) - - case OpITab: - w := selector.Args[0] - ls := x.rewriteSelect(leaf, w, offset, regOffset) - sfx := ".itab" - if w.Type.IsEmptyInterface() { - sfx = ".type" - } - locs = x.splitSlots(ls, sfx, 0, x.typs.Uintptr) - - case OpComplexReal: - ls := x.rewriteSelect(leaf, selector.Args[0], offset, regOffset) - locs = x.splitSlots(ls, ".real", 0, selector.Type) - - case OpComplexImag: - ls := x.rewriteSelect(leaf, selector.Args[0], offset+selector.Type.Size(), regOffset+RO_complex_imag) // result is FloatNN, width of result is offset of imaginary part. - locs = x.splitSlots(ls, ".imag", selector.Type.Size(), selector.Type) - - case OpStringLen, OpSliceLen: - ls := x.rewriteSelect(leaf, selector.Args[0], offset+x.ptrSize, regOffset+RO_slice_len) - locs = x.splitSlots(ls, ".len", x.ptrSize, leafType) - - case OpIData: - ls := x.rewriteSelect(leaf, selector.Args[0], offset+x.ptrSize, regOffset+RO_iface_data) - locs = x.splitSlots(ls, ".data", x.ptrSize, leafType) + case types.TCOMPLEX64: + return x.decomposePair(pos, b, a, mem, x.typs.Float32, x.typs.Float32, OpComplexReal, OpComplexImag, &rc) - case OpSliceCap: - ls := x.rewriteSelect(leaf, selector.Args[0], offset+2*x.ptrSize, regOffset+RO_slice_cap) - locs = x.splitSlots(ls, ".cap", 2*x.ptrSize, leafType) + case types.TCOMPLEX128: + return x.decomposePair(pos, b, a, mem, x.typs.Float64, x.typs.Float64, OpComplexReal, OpComplexImag, &rc) - case OpCopy: // If it's an intermediate result, recurse - locs = x.rewriteSelect(leaf, selector.Args[0], offset, regOffset) - for _, s := range x.namedSelects[selector] { - // this copy may have had its own name, preserve that, too. - locs = append(locs, x.f.Names[s.locIndex]) + case types.TINT64: + if at.Size() > x.regSize { + return x.decomposePair(pos, b, a, mem, x.firstType, x.secondType, x.firstOp, x.secondOp, &rc) + } + case types.TUINT64: + if at.Size() > x.regSize { + return x.decomposePair(pos, b, a, mem, x.typs.UInt32, x.typs.UInt32, x.firstOp, x.secondOp, &rc) } - - default: - // Ignore dead ends. These can occur if this phase is run before decompose builtin (which is not intended, but allowed). } - return locs -} + // An atomic type, either record the register or store it and update the memory. -func (x *expandState) rewriteDereference(b *Block, base, a, mem *Value, offset, size int64, typ *types.Type, pos src.XPos) *Value { - source := a.Args[0] - dst := x.offsetFrom(b, base, offset, source.Type) - if a.Uses == 1 && a.Block == b { - a.reset(OpMove) - a.Pos = pos - a.Type = types.TypeMem - a.Aux = typ - a.AuxInt = size - a.SetArgs3(dst, source, mem) - mem = a + if rc.hasRegs() { + if x.debug > 1 { + x.Printf("...recur addArg %s\n", a.LongString()) + } + rc.addArg(a) } else { - mem = b.NewValue3A(pos, OpMove, types.TypeMem, typ, dst, source, mem) - mem.AuxInt = size + dst := x.offsetFrom(b, rc.storeDest, rc.storeOffset, types.NewPtr(at)) + if x.debug > 1 { + x.Printf("...recur store %s at %s\n", a.LongString(), dst.LongString()) + } + mem = b.NewValue3A(pos, OpStore, types.TypeMem, at, dst, a, mem) } + return mem } -var indexNames [1]string = [1]string{"[0]"} - -// pathTo returns the selection path to the leaf type at offset within container. -// e.g. len(thing.field[0]) => ".field[0].len" -// this is for purposes of generating names ultimately fed to a debugger. -func (x *expandState) pathTo(container, leaf *types.Type, offset int64) string { - if container == leaf || offset == 0 && container.Size() == leaf.Size() { - return "" - } - path := "" -outer: - for { - switch container.Kind() { - case types.TARRAY: - container = container.Elem() - if container.Size() == 0 { - return path - } - i := offset / container.Size() - offset = offset % container.Size() - // If a future compiler/ABI supports larger SSA/Arg-able arrays, expand indexNames. - path = path + indexNames[i] - continue - case types.TSTRUCT: - for i := 0; i < container.NumFields(); i++ { - fld := container.Field(i) - if fld.Offset+fld.Type.Size() > offset { - offset -= fld.Offset - path += "." + fld.Sym.Name - container = fld.Type - continue outer - } - } - return path - case types.TINT64, types.TUINT64: - if container.Size() == x.regSize { - return path - } - if offset == x.hiOffset { - return path + ".hi" - } - return path + ".lo" - case types.TINTER: - if offset != 0 { - return path + ".data" - } - if container.IsEmptyInterface() { - return path + ".type" - } - return path + ".itab" - - case types.TSLICE: - if offset == 2*x.regSize { - return path + ".cap" - } - fallthrough - case types.TSTRING: - if offset == 0 { - return path + ".ptr" - } - return path + ".len" - case types.TCOMPLEX64, types.TCOMPLEX128: - if offset == 0 { - return path + ".real" - } - return path + ".imag" +// Convert scalar OpArg into the proper OpWhateverArg instruction +// Convert scalar OpSelectN into perhaps-differently-indexed OpSelectN +// Convert aggregate OpArg into Make of its parts (which are eventually scalars) +// Convert aggregate OpSelectN into Make of its parts (which are eventually scalars) +// Returns the converted value. +// +// - "pos" the position for any generated instructions +// - "b" the block for any generated instructions +// - "container" the outermost OpArg/OpSelectN +// - "a" the instruction to overwrite, if any (only the outermost caller) +// - "m0" the memory arg for any loads that are necessary +// - "at" the type of the Arg/part +// - "rc" the register/memory cursor locating the various parts of the Arg. +func (x *expandState) rewriteSelectOrArg(pos src.XPos, b *Block, container, a, m0 *Value, at *types.Type, rc registerCursor) *Value { + + if at == types.TypeMem { + a.copyOf(m0) + return a + } + + makeOf := func(a *Value, op Op, args []*Value) *Value { + if a == nil { + a = b.NewValue0(pos, op, at) + a.AddArgs(args...) + } else { + a.resetArgs() + a.Aux, a.AuxInt = nil, 0 + a.Pos, a.Op, a.Type = pos, op, at + a.AddArgs(args...) } - return path + return a } -} -// decomposeArg is a helper for storeArgOrLoad. -// It decomposes a Load or an Arg into smaller parts and returns the new mem. -// If the type does not match one of the expected aggregate types, it returns nil instead. -// Parameters: -// -// pos -- the location of any generated code. -// b -- the block into which any generated code should normally be placed -// source -- the value, possibly an aggregate, to be stored. -// mem -- the mem flowing into this decomposition (loads depend on it, stores updated it) -// t -- the type of the value to be stored -// storeOffset -- if the value is stored in memory, it is stored at base (see storeRc) + storeOffset -// loadRegOffset -- regarding source as a value in registers, the register offset in ABI1. Meaningful only if source is OpArg. -// storeRc -- storeRC; if the value is stored in registers, this specifies the registers. -// StoreRc also identifies whether the target is registers or memory, and has the base for the store operation. -func (x *expandState) decomposeArg(pos src.XPos, b *Block, source, mem *Value, t *types.Type, storeOffset int64, loadRegOffset Abi1RO, storeRc registerCursor) *Value { - - pa := x.prAssignForArg(source) - var locs []*LocalSlot - for _, s := range x.namedSelects[source] { - locs = append(locs, x.f.Names[s.locIndex]) + if at.Size() == 0 { + // For consistency, create these values even though they'll ultimately be unused + if at.IsArray() { + return makeOf(a, OpArrayMake0, nil) + } + if at.IsStruct() { + return makeOf(a, OpStructMake0, nil) + } + return a } - if len(pa.Registers) > 0 { - // Handle the in-registers case directly - rts, offs := pa.RegisterTypesAndOffsets() - last := loadRegOffset + x.regWidth(t) - if offs[loadRegOffset] != 0 { - // Document the problem before panicking. - for i := 0; i < len(rts); i++ { - rt := rts[i] - off := offs[i] - fmt.Printf("rt=%s, off=%d, rt.Width=%d, rt.Align=%d\n", rt.String(), off, rt.Size(), uint8(rt.Alignment())) - } - panic(fmt.Errorf("offset %d of requested register %d should be zero, source=%s", offs[loadRegOffset], loadRegOffset, source.LongString())) + sk := selKey{from: container, size: 0, offsetOrIndex: rc.storeOffset, typ: at} + dupe := x.commonSelectors[sk] + if dupe != nil { + if a == nil { + return dupe } + a.copyOf(dupe) + return a + } - if x.debug > 1 { - x.Printf("decompose arg %s has %d locs\n", source.LongString(), len(locs)) - } + var argStore [10]*Value + args := argStore[:0] - for i := loadRegOffset; i < last; i++ { - rt := rts[i] - off := offs[i] - w := x.commonArgs[selKey{source, off, rt.Size(), rt}] - if w == nil { - w = x.newArgToMemOrRegs(source, w, off, i, rt, pos) - suffix := x.pathTo(source.Type, rt, off) - if suffix != "" { - x.splitSlotsIntoNames(locs, suffix, off, rt, w) - } + addArg := func(a0 *Value) { + if a0 == nil { + as := "" + if a != nil { + as = a.LongString() } - if t.IsPtrShaped() { - // Preserve the original store type. This ensures pointer type - // properties aren't discarded (e.g, notinheap). - if rt.Size() != t.Size() || len(pa.Registers) != 1 || i != loadRegOffset { - b.Func.Fatalf("incompatible store type %v and %v, i=%d", t, rt, i) - } - rt = t - } - mem = x.storeArgOrLoad(pos, b, w, mem, rt, storeOffset+off, i, storeRc.next(rt)) + panic(fmt.Errorf("a0 should not be nil, a=%v, container=%v, at=%v", as, container.LongString(), at)) } - return mem + args = append(args, a0) } - u := source.Type - switch u.Kind() { + switch at.Kind() { case types.TARRAY: - elem := u.Elem() - elemRO := x.regWidth(elem) - for i := int64(0); i < u.NumElem(); i++ { - elemOff := i * elem.Size() - mem = storeOneArg(x, pos, b, locs, indexNames[i], source, mem, elem, elemOff, storeOffset+elemOff, loadRegOffset, storeRc.next(elem)) - loadRegOffset += elemRO - pos = pos.WithNotStmt() + et := at.Elem() + for i := int64(0); i < at.NumElem(); i++ { + e := x.rewriteSelectOrArg(pos, b, container, nil, m0, et, rc.next(et)) + addArg(e) } - return mem + a = makeOf(a, OpArrayMake1, args) + x.commonSelectors[sk] = a + return a + case types.TSTRUCT: - for i := 0; i < u.NumFields(); i++ { - fld := u.Field(i) - mem = storeOneArg(x, pos, b, locs, "."+fld.Sym.Name, source, mem, fld.Type, fld.Offset, storeOffset+fld.Offset, loadRegOffset, storeRc.next(fld.Type)) - loadRegOffset += x.regWidth(fld.Type) + // Assume ssagen/ssa.go (in buildssa) spills large aggregates so they won't appear here. + for i := 0; i < at.NumFields(); i++ { + et := at.Field(i).Type + e := x.rewriteSelectOrArg(pos, b, container, nil, m0, et, rc.next(et)) + if e == nil { + panic(fmt.Errorf("nil e, et=%v, et.Size()=%d, i=%d", et, et.Size(), i)) + } + addArg(e) pos = pos.WithNotStmt() } - return mem - case types.TINT64, types.TUINT64: - if t.Size() == x.regSize { - break + if at.NumFields() > 4 { + panic(fmt.Errorf("Too many fields (%d, %d bytes), container=%s", at.NumFields(), at.Size(), container.LongString())) } - tHi, tLo := x.intPairTypes(t.Kind()) - mem = storeOneArg(x, pos, b, locs, ".hi", source, mem, tHi, x.hiOffset, storeOffset+x.hiOffset, loadRegOffset+x.hiRo, storeRc.plus(x.hiRo)) + a = makeOf(a, StructMakeOp(at.NumFields()), args) + x.commonSelectors[sk] = a + return a + + case types.TSLICE: + addArg(x.rewriteSelectOrArg(pos, b, container, nil, m0, x.typs.BytePtr, rc.next(x.typs.BytePtr))) pos = pos.WithNotStmt() - return storeOneArg(x, pos, b, locs, ".lo", source, mem, tLo, x.lowOffset, storeOffset+x.lowOffset, loadRegOffset+x.loRo, storeRc.plus(x.loRo)) - case types.TINTER: - sfx := ".itab" - if u.IsEmptyInterface() { - sfx = ".type" - } - return storeTwoArg(x, pos, b, locs, sfx, ".idata", source, mem, x.typs.Uintptr, x.typs.BytePtr, 0, storeOffset, loadRegOffset, storeRc) + addArg(x.rewriteSelectOrArg(pos, b, container, nil, m0, x.typs.Int, rc.next(x.typs.Int))) + addArg(x.rewriteSelectOrArg(pos, b, container, nil, m0, x.typs.Int, rc.next(x.typs.Int))) + a = makeOf(a, OpSliceMake, args) + x.commonSelectors[sk] = a + return a + case types.TSTRING: - return storeTwoArg(x, pos, b, locs, ".ptr", ".len", source, mem, x.typs.BytePtr, x.typs.Int, 0, storeOffset, loadRegOffset, storeRc) - case types.TCOMPLEX64: - return storeTwoArg(x, pos, b, locs, ".real", ".imag", source, mem, x.typs.Float32, x.typs.Float32, 0, storeOffset, loadRegOffset, storeRc) - case types.TCOMPLEX128: - return storeTwoArg(x, pos, b, locs, ".real", ".imag", source, mem, x.typs.Float64, x.typs.Float64, 0, storeOffset, loadRegOffset, storeRc) - case types.TSLICE: - mem = storeOneArg(x, pos, b, locs, ".ptr", source, mem, x.typs.BytePtr, 0, storeOffset, loadRegOffset, storeRc.next(x.typs.BytePtr)) - return storeTwoArg(x, pos, b, locs, ".len", ".cap", source, mem, x.typs.Int, x.typs.Int, x.ptrSize, storeOffset+x.ptrSize, loadRegOffset+RO_slice_len, storeRc) - } - return nil -} + addArg(x.rewriteSelectOrArg(pos, b, container, nil, m0, x.typs.BytePtr, rc.next(x.typs.BytePtr))) + pos = pos.WithNotStmt() + addArg(x.rewriteSelectOrArg(pos, b, container, nil, m0, x.typs.Int, rc.next(x.typs.Int))) + a = makeOf(a, OpStringMake, args) + x.commonSelectors[sk] = a + return a -func (x *expandState) splitSlotsIntoNames(locs []*LocalSlot, suffix string, off int64, rt *types.Type, w *Value) { - wlocs := x.splitSlots(locs, suffix, off, rt) - for _, l := range wlocs { - old, ok := x.f.NamedValues[*l] - x.f.NamedValues[*l] = append(old, w) - if !ok { - x.f.Names = append(x.f.Names, l) - } - } -} + case types.TINTER: + addArg(x.rewriteSelectOrArg(pos, b, container, nil, m0, x.typs.Uintptr, rc.next(x.typs.Uintptr))) + pos = pos.WithNotStmt() + addArg(x.rewriteSelectOrArg(pos, b, container, nil, m0, x.typs.BytePtr, rc.next(x.typs.BytePtr))) + a = makeOf(a, OpIMake, args) + x.commonSelectors[sk] = a + return a -// decomposeLoad is a helper for storeArgOrLoad. -// It decomposes a Load into smaller parts and returns the new mem. -// If the type does not match one of the expected aggregate types, it returns nil instead. -// Parameters: -// -// pos -- the location of any generated code. -// b -- the block into which any generated code should normally be placed -// source -- the value, possibly an aggregate, to be stored. -// mem -- the mem flowing into this decomposition (loads depend on it, stores updated it) -// t -- the type of the value to be stored -// storeOffset -- if the value is stored in memory, it is stored at base (see storeRc) + offset -// loadRegOffset -- regarding source as a value in registers, the register offset in ABI1. Meaningful only if source is OpArg. -// storeRc -- storeRC; if the value is stored in registers, this specifies the registers. -// StoreRc also identifies whether the target is registers or memory, and has the base for the store operation. -// -// TODO -- this needs cleanup; it just works for SSA-able aggregates, and won't fully generalize to register-args aggregates. -func (x *expandState) decomposeLoad(pos src.XPos, b *Block, source, mem *Value, t *types.Type, storeOffset int64, loadRegOffset Abi1RO, storeRc registerCursor) *Value { - u := source.Type - switch u.Kind() { - case types.TARRAY: - elem := u.Elem() - elemRO := x.regWidth(elem) - for i := int64(0); i < u.NumElem(); i++ { - elemOff := i * elem.Size() - mem = storeOneLoad(x, pos, b, source, mem, elem, elemOff, storeOffset+elemOff, loadRegOffset, storeRc.next(elem)) - loadRegOffset += elemRO + case types.TCOMPLEX64: + addArg(x.rewriteSelectOrArg(pos, b, container, nil, m0, x.typs.Float32, rc.next(x.typs.Float32))) + pos = pos.WithNotStmt() + addArg(x.rewriteSelectOrArg(pos, b, container, nil, m0, x.typs.Float32, rc.next(x.typs.Float32))) + a = makeOf(a, OpComplexMake, args) + x.commonSelectors[sk] = a + return a + + case types.TCOMPLEX128: + addArg(x.rewriteSelectOrArg(pos, b, container, nil, m0, x.typs.Float64, rc.next(x.typs.Float64))) + pos = pos.WithNotStmt() + addArg(x.rewriteSelectOrArg(pos, b, container, nil, m0, x.typs.Float64, rc.next(x.typs.Float64))) + a = makeOf(a, OpComplexMake, args) + x.commonSelectors[sk] = a + return a + + case types.TINT64: + if at.Size() > x.regSize { + addArg(x.rewriteSelectOrArg(pos, b, container, nil, m0, x.firstType, rc.next(x.firstType))) pos = pos.WithNotStmt() + addArg(x.rewriteSelectOrArg(pos, b, container, nil, m0, x.secondType, rc.next(x.secondType))) + if !x.f.Config.BigEndian { + // Int64Make args are big, little + args[0], args[1] = args[1], args[0] + } + a = makeOf(a, OpInt64Make, args) + x.commonSelectors[sk] = a + return a } - return mem - case types.TSTRUCT: - for i := 0; i < u.NumFields(); i++ { - fld := u.Field(i) - mem = storeOneLoad(x, pos, b, source, mem, fld.Type, fld.Offset, storeOffset+fld.Offset, loadRegOffset, storeRc.next(fld.Type)) - loadRegOffset += x.regWidth(fld.Type) + case types.TUINT64: + if at.Size() > x.regSize { + addArg(x.rewriteSelectOrArg(pos, b, container, nil, m0, x.typs.UInt32, rc.next(x.typs.UInt32))) pos = pos.WithNotStmt() + addArg(x.rewriteSelectOrArg(pos, b, container, nil, m0, x.typs.UInt32, rc.next(x.typs.UInt32))) + if !x.f.Config.BigEndian { + // Int64Make args are big, little + args[0], args[1] = args[1], args[0] + } + a = makeOf(a, OpInt64Make, args) + x.commonSelectors[sk] = a + return a } - return mem - case types.TINT64, types.TUINT64: - if t.Size() == x.regSize { - break - } - tHi, tLo := x.intPairTypes(t.Kind()) - mem = storeOneLoad(x, pos, b, source, mem, tHi, x.hiOffset, storeOffset+x.hiOffset, loadRegOffset+x.hiRo, storeRc.plus(x.hiRo)) - pos = pos.WithNotStmt() - return storeOneLoad(x, pos, b, source, mem, tLo, x.lowOffset, storeOffset+x.lowOffset, loadRegOffset+x.loRo, storeRc.plus(x.loRo)) - case types.TINTER: - return storeTwoLoad(x, pos, b, source, mem, x.typs.Uintptr, x.typs.BytePtr, 0, storeOffset, loadRegOffset, storeRc) - case types.TSTRING: - return storeTwoLoad(x, pos, b, source, mem, x.typs.BytePtr, x.typs.Int, 0, storeOffset, loadRegOffset, storeRc) - case types.TCOMPLEX64: - return storeTwoLoad(x, pos, b, source, mem, x.typs.Float32, x.typs.Float32, 0, storeOffset, loadRegOffset, storeRc) - case types.TCOMPLEX128: - return storeTwoLoad(x, pos, b, source, mem, x.typs.Float64, x.typs.Float64, 0, storeOffset, loadRegOffset, storeRc) - case types.TSLICE: - mem = storeOneLoad(x, pos, b, source, mem, x.typs.BytePtr, 0, storeOffset, loadRegOffset, storeRc.next(x.typs.BytePtr)) - return storeTwoLoad(x, pos, b, source, mem, x.typs.Int, x.typs.Int, x.ptrSize, storeOffset+x.ptrSize, loadRegOffset+RO_slice_len, storeRc) - } - return nil -} - -// storeOneArg creates a decomposed (one step) arg that is then stored. -// pos and b locate the store instruction, source is the "base" of the value input, -// mem is the input mem, t is the type in question, and offArg and offStore are the offsets from the respective bases. -func storeOneArg(x *expandState, pos src.XPos, b *Block, locs []*LocalSlot, suffix string, source, mem *Value, t *types.Type, argOffset, storeOffset int64, loadRegOffset Abi1RO, storeRc registerCursor) *Value { - if x.debug > 1 { - x.indent(3) - defer x.indent(-3) - x.Printf("storeOneArg(%s; %s; %s; aO=%d; sO=%d; lrO=%d; %s)\n", source.LongString(), mem.String(), t.String(), argOffset, storeOffset, loadRegOffset, storeRc.String()) } - w := x.commonArgs[selKey{source, argOffset, t.Size(), t}] - if w == nil { - w = x.newArgToMemOrRegs(source, w, argOffset, loadRegOffset, t, pos) - x.splitSlotsIntoNames(locs, suffix, argOffset, t, w) - } - return x.storeArgOrLoad(pos, b, w, mem, t, storeOffset, loadRegOffset, storeRc) -} + // An atomic type, either record the register or store it and update the memory. -// storeOneLoad creates a decomposed (one step) load that is then stored. -func storeOneLoad(x *expandState, pos src.XPos, b *Block, source, mem *Value, t *types.Type, offArg, offStore int64, loadRegOffset Abi1RO, storeRc registerCursor) *Value { - from := x.offsetFrom(source.Block, source.Args[0], offArg, types.NewPtr(t)) - w := b.NewValue2(source.Pos, OpLoad, t, from, mem) - return x.storeArgOrLoad(pos, b, w, mem, t, offStore, loadRegOffset, storeRc) -} + // Depending on the container Op, the leaves are either OpSelectN or OpArg{Int,Float}Reg -func storeTwoArg(x *expandState, pos src.XPos, b *Block, locs []*LocalSlot, suffix1 string, suffix2 string, source, mem *Value, t1, t2 *types.Type, offArg, offStore int64, loadRegOffset Abi1RO, storeRc registerCursor) *Value { - mem = storeOneArg(x, pos, b, locs, suffix1, source, mem, t1, offArg, offStore, loadRegOffset, storeRc.next(t1)) - pos = pos.WithNotStmt() - t1Size := t1.Size() - return storeOneArg(x, pos, b, locs, suffix2, source, mem, t2, offArg+t1Size, offStore+t1Size, loadRegOffset+1, storeRc) -} + if container.Op == OpArg { + if rc.hasRegs() { + op, i := rc.ArgOpAndRegisterFor() + name := container.Aux.(*ir.Name) + a = makeOf(a, op, nil) + a.AuxInt = i + a.Aux = &AuxNameOffset{name, rc.storeOffset} + } else { + key := selKey{container, rc.storeOffset, at.Size(), at} + w := x.commonArgs[key] + if w != nil && w.Uses != 0 { + if a == nil { + a = w + } else { + a.copyOf(w) + } + } else { + if a == nil { + aux := container.Aux + auxInt := container.AuxInt + rc.storeOffset + a = container.Block.NewValue0IA(container.Pos, OpArg, at, auxInt, aux) + } else { + // do nothing, the original should be okay. + } + x.commonArgs[key] = a + } + } + } else if container.Op == OpSelectN { + call := container.Args[0] + aux := call.Aux.(*AuxCall) + which := container.AuxInt -// storeTwoLoad creates a pair of decomposed (one step) loads that are then stored. -// the elements of the pair must not require any additional alignment. -func storeTwoLoad(x *expandState, pos src.XPos, b *Block, source, mem *Value, t1, t2 *types.Type, offArg, offStore int64, loadRegOffset Abi1RO, storeRc registerCursor) *Value { - mem = storeOneLoad(x, pos, b, source, mem, t1, offArg, offStore, loadRegOffset, storeRc.next(t1)) - pos = pos.WithNotStmt() - t1Size := t1.Size() - return storeOneLoad(x, pos, b, source, mem, t2, offArg+t1Size, offStore+t1Size, loadRegOffset+1, storeRc) -} + if at == types.TypeMem { + if a != m0 || a != x.memForCall[call.ID] { + panic(fmt.Errorf("Memories %s, %s, and %s should all be equal after %s", a.LongString(), m0.LongString(), x.memForCall[call.ID], call.LongString())) + } + } else if rc.hasRegs() { + firstReg := uint32(0) + for i := 0; i < int(which); i++ { + firstReg += uint32(len(aux.abiInfo.OutParam(i).Registers)) + } + reg := int64(rc.nextSlice + Abi1RO(firstReg)) + a = makeOf(a, OpSelectN, []*Value{call}) + a.AuxInt = reg + } else { + off := x.offsetFrom(x.f.Entry, x.sp, rc.storeOffset+aux.OffsetOfResult(which), types.NewPtr(at)) + a = makeOf(a, OpLoad, []*Value{off, m0}) + } -// storeArgOrLoad converts stores of SSA-able potentially aggregatable arguments (passed to a call) into a series of primitive-typed -// stores of non-aggregate types. It recursively walks up a chain of selectors until it reaches a Load or an Arg. -// If it does not reach a Load or an Arg, nothing happens; this allows a little freedom in phase ordering. -func (x *expandState) storeArgOrLoad(pos src.XPos, b *Block, source, mem *Value, t *types.Type, storeOffset int64, loadRegOffset Abi1RO, storeRc registerCursor) *Value { - if x.debug > 1 { - x.indent(3) - defer x.indent(-3) - x.Printf("storeArgOrLoad(%s; %s; %s; %d; %s)\n", source.LongString(), mem.String(), t.String(), storeOffset, storeRc.String()) + } else { + panic(fmt.Errorf("Expected container OpArg or OpSelectN, saw %v instead", container.LongString())) } - // Start with Opcodes that can be disassembled - switch source.Op { - case OpCopy: - return x.storeArgOrLoad(pos, b, source.Args[0], mem, t, storeOffset, loadRegOffset, storeRc) + x.commonSelectors[sk] = a + return a +} - case OpLoad, OpDereference: - ret := x.decomposeLoad(pos, b, source, mem, t, storeOffset, loadRegOffset, storeRc) - if ret != nil { - return ret - } +// rewriteWideSelectToStores handles the case of a SelectN'd result from a function call that is too large for SSA, +// but is transferred in registers. In this case the register cursor tracks both operands; the register sources and +// the memory destinations. +// This returns the memory flowing out of the last store +func (x *expandState) rewriteWideSelectToStores(pos src.XPos, b *Block, container, m0 *Value, at *types.Type, rc registerCursor) *Value { - case OpArg: - ret := x.decomposeArg(pos, b, source, mem, t, storeOffset, loadRegOffset, storeRc) - if ret != nil { - return ret - } + if at.Size() == 0 { + return m0 + } - case OpArrayMake0, OpStructMake0: - // TODO(register args) is this correct for registers? - return mem + switch at.Kind() { + case types.TARRAY: + et := at.Elem() + for i := int64(0); i < at.NumElem(); i++ { + m0 = x.rewriteWideSelectToStores(pos, b, container, m0, et, rc.next(et)) + } + return m0 - case OpStructMake1, OpStructMake2, OpStructMake3, OpStructMake4: - for i := 0; i < t.NumFields(); i++ { - fld := t.Field(i) - mem = x.storeArgOrLoad(pos, b, source.Args[i], mem, fld.Type, storeOffset+fld.Offset, 0, storeRc.next(fld.Type)) + case types.TSTRUCT: + // Assume ssagen/ssa.go (in buildssa) spills large aggregates so they won't appear here. + for i := 0; i < at.NumFields(); i++ { + et := at.Field(i).Type + m0 = x.rewriteWideSelectToStores(pos, b, container, m0, et, rc.next(et)) pos = pos.WithNotStmt() } - return mem - - case OpArrayMake1: - return x.storeArgOrLoad(pos, b, source.Args[0], mem, t.Elem(), storeOffset, 0, storeRc.at(t, 0)) + return m0 - case OpInt64Make: - tHi, tLo := x.intPairTypes(t.Kind()) - mem = x.storeArgOrLoad(pos, b, source.Args[0], mem, tHi, storeOffset+x.hiOffset, 0, storeRc.next(tHi)) + case types.TSLICE: + m0 = x.rewriteWideSelectToStores(pos, b, container, m0, x.typs.BytePtr, rc.next(x.typs.BytePtr)) pos = pos.WithNotStmt() - return x.storeArgOrLoad(pos, b, source.Args[1], mem, tLo, storeOffset+x.lowOffset, 0, storeRc) + m0 = x.rewriteWideSelectToStores(pos, b, container, m0, x.typs.Int, rc.next(x.typs.Int)) + m0 = x.rewriteWideSelectToStores(pos, b, container, m0, x.typs.Int, rc.next(x.typs.Int)) + return m0 - case OpComplexMake: - tPart := x.typs.Float32 - wPart := t.Size() / 2 - if wPart == 8 { - tPart = x.typs.Float64 - } - mem = x.storeArgOrLoad(pos, b, source.Args[0], mem, tPart, storeOffset, 0, storeRc.next(tPart)) + case types.TSTRING: + m0 = x.rewriteWideSelectToStores(pos, b, container, m0, x.typs.BytePtr, rc.next(x.typs.BytePtr)) pos = pos.WithNotStmt() - return x.storeArgOrLoad(pos, b, source.Args[1], mem, tPart, storeOffset+wPart, 0, storeRc) + m0 = x.rewriteWideSelectToStores(pos, b, container, m0, x.typs.Int, rc.next(x.typs.Int)) + return m0 - case OpIMake: - mem = x.storeArgOrLoad(pos, b, source.Args[0], mem, x.typs.Uintptr, storeOffset, 0, storeRc.next(x.typs.Uintptr)) + case types.TINTER: + m0 = x.rewriteWideSelectToStores(pos, b, container, m0, x.typs.Uintptr, rc.next(x.typs.Uintptr)) pos = pos.WithNotStmt() - return x.storeArgOrLoad(pos, b, source.Args[1], mem, x.typs.BytePtr, storeOffset+x.ptrSize, 0, storeRc) + m0 = x.rewriteWideSelectToStores(pos, b, container, m0, x.typs.BytePtr, rc.next(x.typs.BytePtr)) + return m0 - case OpStringMake: - mem = x.storeArgOrLoad(pos, b, source.Args[0], mem, x.typs.BytePtr, storeOffset, 0, storeRc.next(x.typs.BytePtr)) + case types.TCOMPLEX64: + m0 = x.rewriteWideSelectToStores(pos, b, container, m0, x.typs.Float32, rc.next(x.typs.Float32)) pos = pos.WithNotStmt() - return x.storeArgOrLoad(pos, b, source.Args[1], mem, x.typs.Int, storeOffset+x.ptrSize, 0, storeRc) + m0 = x.rewriteWideSelectToStores(pos, b, container, m0, x.typs.Float32, rc.next(x.typs.Float32)) + return m0 - case OpSliceMake: - mem = x.storeArgOrLoad(pos, b, source.Args[0], mem, x.typs.BytePtr, storeOffset, 0, storeRc.next(x.typs.BytePtr)) + case types.TCOMPLEX128: + m0 = x.rewriteWideSelectToStores(pos, b, container, m0, x.typs.Float64, rc.next(x.typs.Float64)) pos = pos.WithNotStmt() - mem = x.storeArgOrLoad(pos, b, source.Args[1], mem, x.typs.Int, storeOffset+x.ptrSize, 0, storeRc.next(x.typs.Int)) - return x.storeArgOrLoad(pos, b, source.Args[2], mem, x.typs.Int, storeOffset+2*x.ptrSize, 0, storeRc) - } + m0 = x.rewriteWideSelectToStores(pos, b, container, m0, x.typs.Float64, rc.next(x.typs.Float64)) + return m0 - // For nodes that cannot be taken apart -- OpSelectN, other structure selectors. - switch t.Kind() { - case types.TARRAY: - elt := t.Elem() - if source.Type != t && t.NumElem() == 1 && elt.Size() == t.Size() && t.Size() == x.regSize { - t = removeTrivialWrapperTypes(t) - // it could be a leaf type, but the "leaf" could be complex64 (for example) - return x.storeArgOrLoad(pos, b, source, mem, t, storeOffset, loadRegOffset, storeRc) + case types.TINT64: + if at.Size() > x.regSize { + m0 = x.rewriteWideSelectToStores(pos, b, container, m0, x.firstType, rc.next(x.firstType)) + pos = pos.WithNotStmt() + m0 = x.rewriteWideSelectToStores(pos, b, container, m0, x.secondType, rc.next(x.secondType)) + return m0 } - eltRO := x.regWidth(elt) - source.Type = t - for i := int64(0); i < t.NumElem(); i++ { - sel := b.NewValue1I(pos, OpArraySelect, elt, i, source) - mem = x.storeArgOrLoad(pos, b, sel, mem, elt, storeOffset+i*elt.Size(), loadRegOffset, storeRc.at(t, 0)) - loadRegOffset += eltRO + case types.TUINT64: + if at.Size() > x.regSize { + m0 = x.rewriteWideSelectToStores(pos, b, container, m0, x.typs.UInt32, rc.next(x.typs.UInt32)) pos = pos.WithNotStmt() + m0 = x.rewriteWideSelectToStores(pos, b, container, m0, x.typs.UInt32, rc.next(x.typs.UInt32)) + return m0 } - return mem + } - case types.TSTRUCT: - if source.Type != t && t.NumFields() == 1 && t.Field(0).Type.Size() == t.Size() && t.Size() == x.regSize { - // This peculiar test deals with accesses to immediate interface data. - // It works okay because everything is the same size. - // Example code that triggers this can be found in go/constant/value.go, function ToComplex - // v119 (+881) = IData v6 - // v121 (+882) = StaticLECall {AuxCall{"".itof([intVal,0])[floatVal,8]}} [16] v119 v1 - // This corresponds to the generic rewrite rule "(StructSelect [0] (IData x)) => (IData x)" - // Guard against "struct{struct{*foo}}" - // Other rewriting phases create minor glitches when they transform IData, for instance the - // interface-typed Arg "x" of ToFloat in go/constant/value.go - // v6 (858) = Arg {x} (x[Value], x[Value]) - // is rewritten by decomposeArgs into - // v141 (858) = Arg {x} - // v139 (858) = Arg <*uint8> {x} [8] - // because of a type case clause on line 862 of go/constant/value.go - // case intVal: - // return itof(x) - // v139 is later stored as an intVal == struct{val *big.Int} which naively requires the fields of - // of a *uint8, which does not succeed. - t = removeTrivialWrapperTypes(t) - // it could be a leaf type, but the "leaf" could be complex64 (for example) - return x.storeArgOrLoad(pos, b, source, mem, t, storeOffset, loadRegOffset, storeRc) - } + // TODO could change treatment of too-large OpArg, would deal with it here. + if container.Op == OpSelectN { + call := container.Args[0] + aux := call.Aux.(*AuxCall) + which := container.AuxInt - source.Type = t - for i := 0; i < t.NumFields(); i++ { - fld := t.Field(i) - sel := b.NewValue1I(pos, OpStructSelect, fld.Type, int64(i), source) - mem = x.storeArgOrLoad(pos, b, sel, mem, fld.Type, storeOffset+fld.Offset, loadRegOffset, storeRc.next(fld.Type)) - loadRegOffset += x.regWidth(fld.Type) - pos = pos.WithNotStmt() + if rc.hasRegs() { + firstReg := uint32(0) + for i := 0; i < int(which); i++ { + firstReg += uint32(len(aux.abiInfo.OutParam(i).Registers)) + } + reg := int64(rc.nextSlice + Abi1RO(firstReg)) + a := b.NewValue1I(pos, OpSelectN, at, reg, call) + dst := x.offsetFrom(b, rc.storeDest, rc.storeOffset, types.NewPtr(at)) + m0 = b.NewValue3A(pos, OpStore, types.TypeMem, at, dst, a, m0) + } else { + panic(fmt.Errorf("Expected rc to have registers")) } - return mem + } else { + panic(fmt.Errorf("Expected container OpSelectN, saw %v instead", container.LongString())) + } + return m0 +} - case types.TINT64, types.TUINT64: - if t.Size() == x.regSize { - break - } - tHi, tLo := x.intPairTypes(t.Kind()) - sel := b.NewValue1(pos, OpInt64Hi, tHi, source) - mem = x.storeArgOrLoad(pos, b, sel, mem, tHi, storeOffset+x.hiOffset, loadRegOffset+x.hiRo, storeRc.plus(x.hiRo)) - pos = pos.WithNotStmt() - sel = b.NewValue1(pos, OpInt64Lo, tLo, source) - return x.storeArgOrLoad(pos, b, sel, mem, tLo, storeOffset+x.lowOffset, loadRegOffset+x.loRo, storeRc.plus(x.hiRo)) +func isBlockMultiValueExit(b *Block) bool { + return (b.Kind == BlockRet || b.Kind == BlockRetJmp) && b.Controls[0] != nil && b.Controls[0].Op == OpMakeResult +} - case types.TINTER: - sel := b.NewValue1(pos, OpITab, x.typs.BytePtr, source) - mem = x.storeArgOrLoad(pos, b, sel, mem, x.typs.BytePtr, storeOffset, loadRegOffset, storeRc.next(x.typs.BytePtr)) - pos = pos.WithNotStmt() - sel = b.NewValue1(pos, OpIData, x.typs.BytePtr, source) - return x.storeArgOrLoad(pos, b, sel, mem, x.typs.BytePtr, storeOffset+x.ptrSize, loadRegOffset+RO_iface_data, storeRc) +type Abi1RO uint8 // An offset within a parameter's slice of register indices, for abi1. - case types.TSTRING: - sel := b.NewValue1(pos, OpStringPtr, x.typs.BytePtr, source) - mem = x.storeArgOrLoad(pos, b, sel, mem, x.typs.BytePtr, storeOffset, loadRegOffset, storeRc.next(x.typs.BytePtr)) - pos = pos.WithNotStmt() - sel = b.NewValue1(pos, OpStringLen, x.typs.Int, source) - return x.storeArgOrLoad(pos, b, sel, mem, x.typs.Int, storeOffset+x.ptrSize, loadRegOffset+RO_string_len, storeRc) +// A registerCursor tracks which register is used for an Arg or regValues, or a piece of such. +type registerCursor struct { + storeDest *Value // if there are no register targets, then this is the base of the store. + storeOffset int64 + regs []abi.RegIndex // the registers available for this Arg/result (which is all in registers or not at all) + nextSlice Abi1RO // the next register/register-slice offset + config *abi.ABIConfig + regValues *[]*Value // values assigned to registers accumulate here +} - case types.TSLICE: - et := types.NewPtr(t.Elem()) - sel := b.NewValue1(pos, OpSlicePtr, et, source) - mem = x.storeArgOrLoad(pos, b, sel, mem, et, storeOffset, loadRegOffset, storeRc.next(et)) - pos = pos.WithNotStmt() - sel = b.NewValue1(pos, OpSliceLen, x.typs.Int, source) - mem = x.storeArgOrLoad(pos, b, sel, mem, x.typs.Int, storeOffset+x.ptrSize, loadRegOffset+RO_slice_len, storeRc.next(x.typs.Int)) - sel = b.NewValue1(pos, OpSliceCap, x.typs.Int, source) - return x.storeArgOrLoad(pos, b, sel, mem, x.typs.Int, storeOffset+2*x.ptrSize, loadRegOffset+RO_slice_cap, storeRc) +func (c *registerCursor) String() string { + dest := "" + if c.storeDest != nil { + dest = fmt.Sprintf("%s+%d", c.storeDest.String(), c.storeOffset) + } + regs := "" + if c.regValues != nil { + regs = "" + for i, x := range *c.regValues { + if i > 0 { + regs = regs + "; " + } + regs = regs + x.LongString() + } + } - case types.TCOMPLEX64: - sel := b.NewValue1(pos, OpComplexReal, x.typs.Float32, source) - mem = x.storeArgOrLoad(pos, b, sel, mem, x.typs.Float32, storeOffset, loadRegOffset, storeRc.next(x.typs.Float32)) - pos = pos.WithNotStmt() - sel = b.NewValue1(pos, OpComplexImag, x.typs.Float32, source) - return x.storeArgOrLoad(pos, b, sel, mem, x.typs.Float32, storeOffset+4, loadRegOffset+RO_complex_imag, storeRc) + // not printing the config because that has not been useful + return fmt.Sprintf("RCSR{storeDest=%v, regsLen=%d, nextSlice=%d, regValues=[%s]}", dest, len(c.regs), c.nextSlice, regs) +} - case types.TCOMPLEX128: - sel := b.NewValue1(pos, OpComplexReal, x.typs.Float64, source) - mem = x.storeArgOrLoad(pos, b, sel, mem, x.typs.Float64, storeOffset, loadRegOffset, storeRc.next(x.typs.Float64)) - pos = pos.WithNotStmt() - sel = b.NewValue1(pos, OpComplexImag, x.typs.Float64, source) - return x.storeArgOrLoad(pos, b, sel, mem, x.typs.Float64, storeOffset+8, loadRegOffset+RO_complex_imag, storeRc) +// next effectively post-increments the register cursor; the receiver is advanced, +// the (aligned) old value is returned. +func (c *registerCursor) next(t *types.Type) registerCursor { + c.storeOffset = types.RoundUp(c.storeOffset, t.Alignment()) + rc := *c + c.storeOffset = types.RoundUp(c.storeOffset+t.Size(), t.Alignment()) + if int(c.nextSlice) < len(c.regs) { + w := c.config.NumParamRegs(t) + c.nextSlice += Abi1RO(w) } + return rc +} + +// plus returns a register cursor offset from the original, without modifying the original. +func (c *registerCursor) plus(regWidth Abi1RO) registerCursor { + rc := *c + rc.nextSlice += regWidth + return rc +} - s := mem - if source.Op == OpDereference { - source.Op = OpLoad // For purposes of parameter passing expansion, a Dereference is a Load. +// at returns the register cursor for component i of t, where the first +// component is numbered 0. +func (c *registerCursor) at(t *types.Type, i int) registerCursor { + rc := *c + if i == 0 || len(c.regs) == 0 { + return rc } - if storeRc.hasRegs() { - storeRc.addArg(source) - } else { - dst := x.offsetFrom(b, storeRc.storeDest, storeOffset, types.NewPtr(t)) - s = b.NewValue3A(pos, OpStore, types.TypeMem, t, dst, source, mem) + if t.IsArray() { + w := c.config.NumParamRegs(t.Elem()) + rc.nextSlice += Abi1RO(i * w) + return rc } - if x.debug > 1 { - x.Printf("-->storeArg returns %s, storeRc=%s\n", s.LongString(), storeRc.String()) + if t.IsStruct() { + for j := 0; j < i; j++ { + rc.next(t.FieldType(j)) + } + return rc } - return s + panic("Haven't implemented this case yet, do I need to?") } -// rewriteArgs replaces all the call-parameter Args to a call with their register translation (if any). -// Preceding parameters (code pointers, closure pointer) are preserved, and the memory input is modified -// to account for any parameter stores required. -// Any of the old Args that have their use count fall to zero are marked OpInvalid. -func (x *expandState) rewriteArgs(v *Value, firstArg int) { - if x.debug > 1 { - x.indent(3) - defer x.indent(-3) - x.Printf("rewriteArgs(%s; %d)\n", v.LongString(), firstArg) - } - // Thread the stores on the memory arg - aux := v.Aux.(*AuxCall) - m0 := v.MemoryArg() - mem := m0 - newArgs := []*Value{} - oldArgs := []*Value{} - sp := x.sp - if v.Op == OpTailLECall { - // For tail call, we unwind the frame before the call so we'll use the caller's - // SP. - sp = x.f.Entry.NewValue1(src.NoXPos, OpGetCallerSP, x.typs.Uintptr, mem) - } - for i, a := range v.Args[firstArg : len(v.Args)-1] { // skip leading non-parameter SSA Args and trailing mem SSA Arg. - oldArgs = append(oldArgs, a) - auxI := int64(i) - aRegs := aux.RegsOfArg(auxI) - aType := aux.TypeOfArg(auxI) - if len(aRegs) == 0 && a.Op == OpDereference { - aOffset := aux.OffsetOfArg(auxI) - if a.MemoryArg() != m0 { - x.f.Fatalf("Op...LECall and OpDereference have mismatched mem, %s and %s", v.LongString(), a.LongString()) - } - if v.Op == OpTailLECall { - // It's common for a tail call passing the same arguments (e.g. method wrapper), - // so this would be a self copy. Detect this and optimize it out. - a0 := a.Args[0] - if a0.Op == OpLocalAddr { - n := a0.Aux.(*ir.Name) - if n.Class == ir.PPARAM && n.FrameOffset()+x.f.Config.ctxt.Arch.FixedFrameSize == aOffset { - continue - } - } - } - if x.debug > 1 { - x.Printf("...storeArg %s, %v, %d\n", a.LongString(), aType, aOffset) - } - // "Dereference" of addressed (probably not-SSA-eligible) value becomes Move - // TODO(register args) this will be more complicated with registers in the picture. - mem = x.rewriteDereference(v.Block, sp, a, mem, aOffset, aux.SizeOfArg(auxI), aType, v.Pos) - } else { - var rc registerCursor - var result *[]*Value - var aOffset int64 - if len(aRegs) > 0 { - result = &newArgs - } else { - aOffset = aux.OffsetOfArg(auxI) - } - if v.Op == OpTailLECall && a.Op == OpArg && a.AuxInt == 0 { - // It's common for a tail call passing the same arguments (e.g. method wrapper), - // so this would be a self copy. Detect this and optimize it out. - n := a.Aux.(*ir.Name) - if n.Class == ir.PPARAM && n.FrameOffset()+x.f.Config.ctxt.Arch.FixedFrameSize == aOffset { - continue - } - } - if x.debug > 1 { - x.Printf("...storeArg %s, %v, %d\n", a.LongString(), aType, aOffset) - } - rc.init(aRegs, aux.abiInfo, result, sp) - mem = x.storeArgOrLoad(v.Pos, v.Block, a, mem, aType, aOffset, 0, rc) - } - } - var preArgStore [2]*Value - preArgs := append(preArgStore[:0], v.Args[0:firstArg]...) - v.resetArgs() - v.AddArgs(preArgs...) - v.AddArgs(newArgs...) - v.AddArg(mem) - for _, a := range oldArgs { - if a.Uses == 0 { - x.invalidateRecursively(a) - } - } - - return +func (c *registerCursor) init(regs []abi.RegIndex, info *abi.ABIParamResultInfo, result *[]*Value, storeDest *Value, storeOffset int64) { + c.regs = regs + c.nextSlice = 0 + c.storeOffset = storeOffset + c.storeDest = storeDest + c.config = info.Config() + c.regValues = result } -func (x *expandState) invalidateRecursively(a *Value) { - var s string - if x.debug > 0 { - plus := " " - if a.Pos.IsStmt() == src.PosIsStmt { - plus = " +" - } - s = a.String() + plus + a.Pos.LineNumber() + " " + a.LongString() - if x.debug > 1 { - x.Printf("...marking %v unused\n", s) - } - } - lost := a.invalidateRecursively() - if x.debug&1 != 0 && lost { // For odd values of x.debug, do this. - x.Printf("Lost statement marker in %s on former %s\n", base.Ctxt.Pkgpath+"."+x.f.Name, s) - } +func (c *registerCursor) addArg(v *Value) { + *c.regValues = append(*c.regValues, v) } -// expandCalls converts LE (Late Expansion) calls that act like they receive value args into a lower-level form -// that is more oriented to a platform's ABI. The SelectN operations that extract results are rewritten into -// more appropriate forms, and any StructMake or ArrayMake inputs are decomposed until non-struct values are -// reached. On the callee side, OpArg nodes are not decomposed until this phase is run. -// TODO results should not be lowered until this phase. -func expandCalls(f *Func) { - // Calls that need lowering have some number of inputs, including a memory input, - // and produce a tuple of (value1, value2, ..., mem) where valueK may or may not be SSA-able. - - // With the current ABI those inputs need to be converted into stores to memory, - // rethreading the call's memory input to the first, and the new call now receiving the last. - - // With the current ABI, the outputs need to be converted to loads, which will all use the call's - // memory output as their input. - sp, _ := f.spSb() - x := &expandState{ - f: f, - abi1: f.ABI1, - debug: f.pass.debug, - regSize: f.Config.RegSize, - sp: sp, - typs: &f.Config.Types, - ptrSize: f.Config.PtrSize, - namedSelects: make(map[*Value][]namedVal), - sdom: f.Sdom(), - commonArgs: make(map[selKey]*Value), - memForCall: make(map[ID]*Value), - transformedSelects: make(map[ID]bool), - } - - // For 32-bit, need to deal with decomposition of 64-bit integers, which depends on endianness. - if f.Config.BigEndian { - x.lowOffset, x.hiOffset = 4, 0 - x.loRo, x.hiRo = 1, 0 - } else { - x.lowOffset, x.hiOffset = 0, 4 - x.loRo, x.hiRo = 0, 1 - } - - if x.debug > 1 { - x.Printf("\nexpandsCalls(%s)\n", f.Name) - } - - for i, name := range f.Names { - t := name.Type - if x.isAlreadyExpandedAggregateType(t) { - for j, v := range f.NamedValues[*name] { - if v.Op == OpSelectN || v.Op == OpArg && x.isAlreadyExpandedAggregateType(v.Type) { - ns := x.namedSelects[v] - x.namedSelects[v] = append(ns, namedVal{locIndex: i, valIndex: j}) - } - } - } - } - - // TODO if too slow, whole program iteration can be replaced w/ slices of appropriate values, accumulated in first loop here. +func (c *registerCursor) hasRegs() bool { + return len(c.regs) > 0 +} - // Step 0: rewrite the calls to convert args to calls into stores/register movement. - for _, b := range f.Blocks { - for _, v := range b.Values { - firstArg := 0 - switch v.Op { - case OpStaticLECall, OpTailLECall: - case OpInterLECall: - firstArg = 1 - case OpClosureLECall: - firstArg = 2 - default: - continue - } - x.rewriteArgs(v, firstArg) - } - if isBlockMultiValueExit(b) { - x.indent(3) - // Very similar to code in rewriteArgs, but results instead of args. - v := b.Controls[0] - m0 := v.MemoryArg() - mem := m0 - aux := f.OwnAux - allResults := []*Value{} - if x.debug > 1 { - x.Printf("multiValueExit rewriting %s\n", v.LongString()) - } - var oldArgs []*Value - for j, a := range v.Args[:len(v.Args)-1] { - oldArgs = append(oldArgs, a) - i := int64(j) - auxType := aux.TypeOfResult(i) - auxBase := b.NewValue2A(v.Pos, OpLocalAddr, types.NewPtr(auxType), aux.NameOfResult(i), x.sp, mem) - auxOffset := int64(0) - auxSize := aux.SizeOfResult(i) - aRegs := aux.RegsOfResult(int64(j)) - if len(aRegs) == 0 && a.Op == OpDereference { - // Avoid a self-move, and if one is detected try to remove the already-inserted VarDef for the assignment that won't happen. - if dAddr, dMem := a.Args[0], a.Args[1]; dAddr.Op == OpLocalAddr && dAddr.Args[0].Op == OpSP && - dAddr.Args[1] == dMem && dAddr.Aux == aux.NameOfResult(i) { - if dMem.Op == OpVarDef && dMem.Aux == dAddr.Aux { - dMem.copyOf(dMem.MemoryArg()) // elide the VarDef - } - continue - } - mem = x.rewriteDereference(v.Block, auxBase, a, mem, auxOffset, auxSize, auxType, a.Pos) - } else { - if a.Op == OpLoad && a.Args[0].Op == OpLocalAddr { - addr := a.Args[0] // This is a self-move. // TODO(register args) do what here for registers? - if addr.MemoryArg() == a.MemoryArg() && addr.Aux == aux.NameOfResult(i) { - continue - } - } - var rc registerCursor - var result *[]*Value - if len(aRegs) > 0 { - result = &allResults - } - rc.init(aRegs, aux.abiInfo, result, auxBase) - mem = x.storeArgOrLoad(v.Pos, b, a, mem, aux.TypeOfResult(i), auxOffset, 0, rc) - } - } - v.resetArgs() - v.AddArgs(allResults...) - v.AddArg(mem) - v.Type = types.NewResults(append(abi.RegisterTypes(aux.abiInfo.OutParams()), types.TypeMem)) - b.SetControl(v) - for _, a := range oldArgs { - if a.Uses == 0 { - if x.debug > 1 { - x.Printf("...marking %v unused\n", a.LongString()) - } - x.invalidateRecursively(a) - } - } - if x.debug > 1 { - x.Printf("...multiValueExit new result %s\n", v.LongString()) - } - x.indent(-3) - } - } +func (c *registerCursor) ArgOpAndRegisterFor() (Op, int64) { + r := c.regs[c.nextSlice] + return ArgOpAndRegisterFor(r, c.config) +} - // Step 1: any stores of aggregates remaining are believed to be sourced from call results or args. - // Decompose those stores into a series of smaller stores, adding selection ops as necessary. - for _, b := range f.Blocks { - for _, v := range b.Values { - if v.Op == OpStore { - t := v.Aux.(*types.Type) - source := v.Args[1] - tSrc := source.Type - iAEATt := x.isAlreadyExpandedAggregateType(t) - - if !iAEATt { - // guarding against store immediate struct into interface data field -- store type is *uint8 - // TODO can this happen recursively? - iAEATt = x.isAlreadyExpandedAggregateType(tSrc) - if iAEATt { - t = tSrc - } - } - dst, mem := v.Args[0], v.Args[2] - mem = x.storeArgOrLoad(v.Pos, b, source, mem, t, 0, 0, registerCursor{storeDest: dst}) - v.copyOf(mem) - } - } +// ArgOpAndRegisterFor converts an abi register index into an ssa Op and corresponding +// arg register index. +func ArgOpAndRegisterFor(r abi.RegIndex, abiConfig *abi.ABIConfig) (Op, int64) { + i := abiConfig.FloatIndexFor(r) + if i >= 0 { // float PR + return OpArgFloatReg, i } + return OpArgIntReg, int64(r) +} - val2Preds := make(map[*Value]int32) // Used to accumulate dependency graph of selection operations for topological ordering. - - // Step 2: transform or accumulate selection operations for rewrite in topological order. - // - // Aggregate types that have already (in earlier phases) been transformed must be lowered comprehensively to finish - // the transformation (user-defined structs and arrays, slices, strings, interfaces, complex, 64-bit on 32-bit architectures), - // - // Any select-for-addressing applied to call results can be transformed directly. - for _, b := range f.Blocks { - for _, v := range b.Values { - // Accumulate chains of selectors for processing in topological order - switch v.Op { - case OpStructSelect, OpArraySelect, - OpIData, OpITab, - OpStringPtr, OpStringLen, - OpSlicePtr, OpSliceLen, OpSliceCap, OpSlicePtrUnchecked, - OpComplexReal, OpComplexImag, - OpInt64Hi, OpInt64Lo: - w := v.Args[0] - switch w.Op { - case OpStructSelect, OpArraySelect, OpSelectN, OpArg: - val2Preds[w] += 1 - if x.debug > 1 { - x.Printf("v2p[%s] = %d\n", w.LongString(), val2Preds[w]) - } - } - fallthrough - - case OpSelectN: - if _, ok := val2Preds[v]; !ok { - val2Preds[v] = 0 - if x.debug > 1 { - x.Printf("v2p[%s] = %d\n", v.LongString(), val2Preds[v]) - } - } +type selKey struct { + from *Value // what is selected from + offsetOrIndex int64 // whatever is appropriate for the selector + size int64 + typ *types.Type +} - case OpArg: - if !x.isAlreadyExpandedAggregateType(v.Type) { - continue - } - if _, ok := val2Preds[v]; !ok { - val2Preds[v] = 0 - if x.debug > 1 { - x.Printf("v2p[%s] = %d\n", v.LongString(), val2Preds[v]) - } - } +type expandState struct { + f *Func + debug int // odd values log lost statement markers, so likely settings are 1 (stmts), 2 (expansion), and 3 (both) + regSize int64 + sp *Value + typs *Types + + firstOp Op // for 64-bit integers on 32-bit machines, first word in memory + secondOp Op // for 64-bit integers on 32-bit machines, second word in memory + firstType *types.Type // first half type, for Int64 + secondType *types.Type // second half type, for Int64 + + wideSelects map[*Value]*Value // Selects that are not SSA-able, mapped to consuming stores. + commonSelectors map[selKey]*Value // used to de-dupe selectors + commonArgs map[selKey]*Value // used to de-dupe OpArg/OpArgIntReg/OpArgFloatReg + memForCall map[ID]*Value // For a call, need to know the unique selector that gets the mem. + indentLevel int // Indentation for debugging recursion +} - case OpSelectNAddr: - // Do these directly, there are no chains of selectors. - call := v.Args[0] - which := v.AuxInt - aux := call.Aux.(*AuxCall) - pt := v.Type - off := x.offsetFrom(x.f.Entry, x.sp, aux.OffsetOfResult(which), pt) - v.copyOf(off) - } - } +// intPairTypes returns the pair of 32-bit int types needed to encode a 64-bit integer type on a target +// that has no 64-bit integer registers. +func (x *expandState) intPairTypes(et types.Kind) (tHi, tLo *types.Type) { + tHi = x.typs.UInt32 + if et == types.TINT64 { + tHi = x.typs.Int32 } + tLo = x.typs.UInt32 + return +} - // Step 3: Compute topological order of selectors, - // then process it in reverse to eliminate duplicates, - // then forwards to rewrite selectors. - // - // All chains of selectors end up in same block as the call. - - // Compilation must be deterministic, so sort after extracting first zeroes from map. - // Sorting allows dominators-last order within each batch, - // so that the backwards scan for duplicates will most often find copies from dominating blocks (it is best-effort). - var toProcess []*Value - less := func(i, j int) bool { - vi, vj := toProcess[i], toProcess[j] - bi, bj := vi.Block, vj.Block - if bi == bj { - return vi.ID < vj.ID +// offsetFrom creates an offset from a pointer, simplifying chained offsets and offsets from SP +func (x *expandState) offsetFrom(b *Block, from *Value, offset int64, pt *types.Type) *Value { + ft := from.Type + if offset == 0 { + if ft == pt { + return from } - return x.sdom.domorder(bi) > x.sdom.domorder(bj) // reverse the order to put dominators last. - } - - // Accumulate order in allOrdered - var allOrdered []*Value - for v, n := range val2Preds { - if n == 0 { - allOrdered = append(allOrdered, v) + // This captures common, (apparently) safe cases. The unsafe cases involve ft == uintptr + if (ft.IsPtr() || ft.IsUnsafePtr()) && pt.IsPtr() { + return from } } - last := 0 // allOrdered[0:last] has been top-sorted and processed - for len(val2Preds) > 0 { - toProcess = allOrdered[last:] - last = len(allOrdered) - sort.SliceStable(toProcess, less) - for _, v := range toProcess { - delete(val2Preds, v) - if v.Op == OpArg { - continue // no Args[0], hence done. - } - w := v.Args[0] - n, ok := val2Preds[w] - if !ok { - continue - } - if n == 1 { - allOrdered = append(allOrdered, w) - delete(val2Preds, w) - continue - } - val2Preds[w] = n - 1 - } + // Simplify, canonicalize + for from.Op == OpOffPtr { + offset += from.AuxInt + from = from.Args[0] } - - x.commonSelectors = make(map[selKey]*Value) - // Rewrite duplicate selectors as copies where possible. - for i := len(allOrdered) - 1; i >= 0; i-- { - v := allOrdered[i] - if v.Op == OpArg { - continue - } - w := v.Args[0] - if w.Op == OpCopy { - for w.Op == OpCopy { - w = w.Args[0] - } - v.SetArg(0, w) - } - typ := v.Type - if typ.IsMemory() { - continue // handled elsewhere, not an indexable result - } - size := typ.Size() - offset := int64(0) - switch v.Op { - case OpStructSelect: - if w.Type.Kind() == types.TSTRUCT { - offset = w.Type.FieldOff(int(v.AuxInt)) - } else { // Immediate interface data artifact, offset is zero. - f.Fatalf("Expand calls interface data problem, func %s, v=%s, w=%s\n", f.Name, v.LongString(), w.LongString()) - } - case OpArraySelect: - offset = size * v.AuxInt - case OpSelectN: - offset = v.AuxInt // offset is just a key, really. - case OpInt64Hi: - offset = x.hiOffset - case OpInt64Lo: - offset = x.lowOffset - case OpStringLen, OpSliceLen, OpIData: - offset = x.ptrSize - case OpSliceCap: - offset = 2 * x.ptrSize - case OpComplexImag: - offset = size - } - sk := selKey{from: w, size: size, offsetOrIndex: offset, typ: typ} - dupe := x.commonSelectors[sk] - if dupe == nil { - x.commonSelectors[sk] = v - } else if x.sdom.IsAncestorEq(dupe.Block, v.Block) { - if x.debug > 1 { - x.Printf("Duplicate, make %s copy of %s\n", v, dupe) - } - v.copyOf(dupe) - } else { - // Because values are processed in dominator order, the old common[s] will never dominate after a miss is seen. - // Installing the new value might match some future values. - x.commonSelectors[sk] = v - } + if from == x.sp { + return x.f.ConstOffPtrSP(pt, offset, x.sp) } + return b.NewValue1I(from.Pos.WithNotStmt(), OpOffPtr, pt, offset, from) +} - // Indices of entries in f.Names that need to be deleted. - var toDelete []namedVal - - // Rewrite selectors. - for i, v := range allOrdered { - if x.debug > 1 { - b := v.Block - x.Printf("allOrdered[%d] = b%d, %s, uses=%d\n", i, b.ID, v.LongString(), v.Uses) - } - if v.Uses == 0 { - x.invalidateRecursively(v) - continue - } - if v.Op == OpCopy { - continue - } - locs := x.rewriteSelect(v, v, 0, 0) - // Install new names. - if v.Type.IsMemory() { - continue - } - // Leaf types may have debug locations - if !x.isAlreadyExpandedAggregateType(v.Type) { - for _, l := range locs { - if _, ok := f.NamedValues[*l]; !ok { - f.Names = append(f.Names, l) - } - f.NamedValues[*l] = append(f.NamedValues[*l], v) - } - continue - } - if ns, ok := x.namedSelects[v]; ok { - // Not-leaf types that had debug locations need to lose them. +func (x *expandState) regWidth(t *types.Type) Abi1RO { + return Abi1RO(x.f.ABI1.NumParamRegs(t)) +} - toDelete = append(toDelete, ns...) - } +// regOffset returns the register offset of the i'th element of type t +func (x *expandState) regOffset(t *types.Type, i int) Abi1RO { + // TODO maybe cache this in a map if profiling recommends. + if i == 0 { + return 0 } - - deleteNamedVals(f, toDelete) - - // Step 4: rewrite the calls themselves, correcting the type. - for _, b := range f.Blocks { - for _, v := range b.Values { - switch v.Op { - case OpArg: - x.rewriteArgToMemOrRegs(v) - case OpStaticLECall: - v.Op = OpStaticCall - rts := abi.RegisterTypes(v.Aux.(*AuxCall).abiInfo.OutParams()) - v.Type = types.NewResults(append(rts, types.TypeMem)) - case OpTailLECall: - v.Op = OpTailCall - rts := abi.RegisterTypes(v.Aux.(*AuxCall).abiInfo.OutParams()) - v.Type = types.NewResults(append(rts, types.TypeMem)) - case OpClosureLECall: - v.Op = OpClosureCall - rts := abi.RegisterTypes(v.Aux.(*AuxCall).abiInfo.OutParams()) - v.Type = types.NewResults(append(rts, types.TypeMem)) - case OpInterLECall: - v.Op = OpInterCall - rts := abi.RegisterTypes(v.Aux.(*AuxCall).abiInfo.OutParams()) - v.Type = types.NewResults(append(rts, types.TypeMem)) - } - } + if t.IsArray() { + return Abi1RO(i) * x.regWidth(t.Elem()) } - - // Step 5: dedup OpArgXXXReg values. Mostly it is already dedup'd by commonArgs, - // but there are cases that we have same OpArgXXXReg values with different types. - // E.g. string is sometimes decomposed as { *int8, int }, sometimes as { unsafe.Pointer, uintptr }. - // (Can we avoid that?) - var IArg, FArg [32]*Value - for _, v := range f.Entry.Values { - switch v.Op { - case OpArgIntReg: - i := v.AuxInt - if w := IArg[i]; w != nil { - if w.Type.Size() != v.Type.Size() { - f.Fatalf("incompatible OpArgIntReg [%d]: %s and %s", i, v.LongString(), w.LongString()) - } - if w.Type.IsUnsafePtr() && !v.Type.IsUnsafePtr() { - // Update unsafe.Pointer type if we know the actual pointer type. - w.Type = v.Type - } - // TODO: don't dedup pointer and scalar? Rewrite to OpConvert? Can it happen? - v.copyOf(w) - } else { - IArg[i] = v - } - case OpArgFloatReg: - i := v.AuxInt - if w := FArg[i]; w != nil { - if w.Type.Size() != v.Type.Size() { - f.Fatalf("incompatible OpArgFloatReg [%d]: %v and %v", i, v, w) - } - v.copyOf(w) - } else { - FArg[i] = v - } + if t.IsStruct() { + k := Abi1RO(0) + for j := 0; j < i; j++ { + k += x.regWidth(t.FieldType(j)) } + return k } + panic("Haven't implemented this case yet, do I need to?") +} - // Step 6: elide any copies introduced. - // Update named values. - for _, name := range f.Names { - values := f.NamedValues[*name] - for i, v := range values { - if v.Op == OpCopy { - a := v.Args[0] - for a.Op == OpCopy { - a = a.Args[0] - } - values[i] = a - } - } - } - for _, b := range f.Blocks { - for _, v := range b.Values { - for i, a := range v.Args { - if a.Op != OpCopy { - continue - } - aa := copySource(a) - v.SetArg(i, aa) - for a.Uses == 0 { - b := a.Args[0] - x.invalidateRecursively(a) - a = b - } - } - } +// prAssignForArg returns the ABIParamAssignment for v, assumed to be an OpArg. +func (x *expandState) prAssignForArg(v *Value) *abi.ABIParamAssignment { + if v.Op != OpArg { + panic(fmt.Errorf("Wanted OpArg, instead saw %s", v.LongString())) } + return ParamAssignmentForArgName(x.f, v.Aux.(*ir.Name)) +} - // Rewriting can attach lines to values that are unlikely to survive code generation, so move them to a use. - for _, b := range f.Blocks { - for _, v := range b.Values { - for _, a := range v.Args { - if a.Pos.IsStmt() != src.PosIsStmt { - continue - } - if a.Type.IsMemory() { - continue - } - if a.Pos.Line() != v.Pos.Line() { - continue - } - if !a.Pos.SameFile(v.Pos) { - continue - } - switch a.Op { - case OpArgIntReg, OpArgFloatReg, OpSelectN: - v.Pos = v.Pos.WithIsStmt() - a.Pos = a.Pos.WithDefaultStmt() - } - } +// ParamAssignmentForArgName returns the ABIParamAssignment for f's arg with matching name. +func ParamAssignmentForArgName(f *Func, name *ir.Name) *abi.ABIParamAssignment { + abiInfo := f.OwnAux.abiInfo + ip := abiInfo.InParams() + for i, a := range ip { + if a.Name == name { + return &ip[i] } } + panic(fmt.Errorf("Did not match param %v in prInfo %+v", name, abiInfo.InParams())) } -// rewriteArgToMemOrRegs converts OpArg v in-place into the register version of v, -// if that is appropriate. -func (x *expandState) rewriteArgToMemOrRegs(v *Value) *Value { - if x.debug > 1 { - x.indent(3) - defer x.indent(-3) - x.Printf("rewriteArgToMemOrRegs(%s)\n", v.LongString()) - } - pa := x.prAssignForArg(v) - switch len(pa.Registers) { - case 0: - frameOff := v.Aux.(*ir.Name).FrameOffset() - if pa.Offset() != int32(frameOff+x.f.ABISelf.LocalsOffset()) { - panic(fmt.Errorf("Parameter assignment %d and OpArg.Aux frameOffset %d disagree, op=%s", - pa.Offset(), frameOff, v.LongString())) - } - case 1: - t := v.Type - key := selKey{v, 0, t.Size(), t} - w := x.commonArgs[key] - if w != nil && w.Uses != 0 { // do not reuse dead value - v.copyOf(w) - break - } - r := pa.Registers[0] - var i int64 - v.Op, i = ArgOpAndRegisterFor(r, x.f.ABISelf) - v.Aux = &AuxNameOffset{v.Aux.(*ir.Name), 0} - v.AuxInt = i - x.commonArgs[key] = v - - default: - panic(badVal("Saw unexpanded OpArg", v)) - } - if x.debug > 1 { - x.Printf("-->%s\n", v.LongString()) - } - return v +// indent increments (or decrements) the indentation. +func (x *expandState) indent(n int) { + x.indentLevel += n } -// newArgToMemOrRegs either rewrites toReplace into an OpArg referencing memory or into an OpArgXXXReg to a register, -// or rewrites it into a copy of the appropriate OpArgXXX. The actual OpArgXXX is determined by combining baseArg (an OpArg) -// with offset, regOffset, and t to determine which portion of it to reference (either all or a part, in memory or in registers). -func (x *expandState) newArgToMemOrRegs(baseArg, toReplace *Value, offset int64, regOffset Abi1RO, t *types.Type, pos src.XPos) *Value { - if x.debug > 1 { - x.indent(3) - defer x.indent(-3) - x.Printf("newArgToMemOrRegs(base=%s; toReplace=%s; t=%s; memOff=%d; regOff=%d)\n", baseArg.String(), toReplace.LongString(), t.String(), offset, regOffset) - } - key := selKey{baseArg, offset, t.Size(), t} - w := x.commonArgs[key] - if w != nil && w.Uses != 0 { // do not reuse dead value - if toReplace != nil { - toReplace.copyOf(w) - if x.debug > 1 { - x.Printf("...replace %s\n", toReplace.LongString()) - } - } - if x.debug > 1 { - x.Printf("-->%s\n", w.LongString()) - } - return w +// Printf does an indented fmt.Printf on the format and args. +func (x *expandState) Printf(format string, a ...interface{}) (n int, err error) { + if x.indentLevel > 0 { + fmt.Printf("%[1]*s", x.indentLevel, "") } + return fmt.Printf(format, a...) +} - pa := x.prAssignForArg(baseArg) - if len(pa.Registers) == 0 { // Arg is on stack - frameOff := baseArg.Aux.(*ir.Name).FrameOffset() - if pa.Offset() != int32(frameOff+x.f.ABISelf.LocalsOffset()) { - panic(fmt.Errorf("Parameter assignment %d and OpArg.Aux frameOffset %d disagree, op=%s", - pa.Offset(), frameOff, baseArg.LongString())) - } - aux := baseArg.Aux - auxInt := baseArg.AuxInt + offset - if toReplace != nil && toReplace.Block == baseArg.Block { - toReplace.reset(OpArg) - toReplace.Aux = aux - toReplace.AuxInt = auxInt - toReplace.Type = t - w = toReplace - } else { - w = baseArg.Block.NewValue0IA(baseArg.Pos, OpArg, t, auxInt, aux) - } - x.commonArgs[key] = w - if toReplace != nil { - toReplace.copyOf(w) +func (x *expandState) invalidateRecursively(a *Value) { + var s string + if x.debug > 0 { + plus := " " + if a.Pos.IsStmt() == src.PosIsStmt { + plus = " +" } + s = a.String() + plus + a.Pos.LineNumber() + " " + a.LongString() if x.debug > 1 { - x.Printf("-->%s\n", w.LongString()) + x.Printf("...marking %v unused\n", s) } - return w - } - // Arg is in registers - r := pa.Registers[regOffset] - op, auxInt := ArgOpAndRegisterFor(r, x.f.ABISelf) - if op == OpArgIntReg && t.IsFloat() || op == OpArgFloatReg && t.IsInteger() { - fmt.Printf("pa=%v\nx.f.OwnAux.abiInfo=%s\n", - pa.ToString(x.f.ABISelf, true), - x.f.OwnAux.abiInfo.String()) - panic(fmt.Errorf("Op/Type mismatch, op=%s, type=%s", op.String(), t.String())) - } - if baseArg.AuxInt != 0 { - base.Fatalf("BaseArg %s bound to registers has non-zero AuxInt", baseArg.LongString()) - } - aux := &AuxNameOffset{baseArg.Aux.(*ir.Name), offset} - if toReplace != nil && toReplace.Block == baseArg.Block { - toReplace.reset(op) - toReplace.Aux = aux - toReplace.AuxInt = auxInt - toReplace.Type = t - w = toReplace - } else { - w = baseArg.Block.NewValue0IA(baseArg.Pos, op, t, auxInt, aux) - } - x.commonArgs[key] = w - if toReplace != nil { - toReplace.copyOf(w) } - if x.debug > 1 { - x.Printf("-->%s\n", w.LongString()) - } - return w - -} - -// ArgOpAndRegisterFor converts an abi register index into an ssa Op and corresponding -// arg register index. -func ArgOpAndRegisterFor(r abi.RegIndex, abiConfig *abi.ABIConfig) (Op, int64) { - i := abiConfig.FloatIndexFor(r) - if i >= 0 { // float PR - return OpArgFloatReg, i + lost := a.invalidateRecursively() + if x.debug&1 != 0 && lost { // For odd values of x.debug, do this. + x.Printf("Lost statement marker in %s on former %s\n", base.Ctxt.Pkgpath+"."+x.f.Name, s) } - return OpArgIntReg, int64(r) } diff --git a/src/cmd/compile/internal/ssa/op.go b/src/cmd/compile/internal/ssa/op.go index 0fe9a9125f..cb151b2f6c 100644 --- a/src/cmd/compile/internal/ssa/op.go +++ b/src/cmd/compile/internal/ssa/op.go @@ -238,7 +238,7 @@ func (a *AuxCall) RegsOfArg(which int64) []abi.RegIndex { return a.abiInfo.InParam(int(which)).Registers } -// NameOfResult returns the type of result which (indexed 0, 1, etc). +// NameOfResult returns the ir.Name of result which (indexed 0, 1, etc). func (a *AuxCall) NameOfResult(which int64) *ir.Name { return a.abiInfo.OutParam(int(which)).Name } diff --git a/src/cmd/compile/internal/ssa/rewritedec.go b/src/cmd/compile/internal/ssa/rewritedec.go index 5c04708b27..fbfe15c0c5 100644 --- a/src/cmd/compile/internal/ssa/rewritedec.go +++ b/src/cmd/compile/internal/ssa/rewritedec.go @@ -6,12 +6,18 @@ import "cmd/compile/internal/types" func rewriteValuedec(v *Value) bool { switch v.Op { + case OpArrayMake1: + return rewriteValuedec_OpArrayMake1(v) + case OpArraySelect: + return rewriteValuedec_OpArraySelect(v) case OpComplexImag: return rewriteValuedec_OpComplexImag(v) case OpComplexReal: return rewriteValuedec_OpComplexReal(v) case OpIData: return rewriteValuedec_OpIData(v) + case OpIMake: + return rewriteValuedec_OpIMake(v) case OpITab: return rewriteValuedec_OpITab(v) case OpLoad: @@ -30,11 +36,92 @@ func rewriteValuedec(v *Value) bool { return rewriteValuedec_OpStringLen(v) case OpStringPtr: return rewriteValuedec_OpStringPtr(v) + case OpStructMake1: + return rewriteValuedec_OpStructMake1(v) + case OpStructSelect: + return rewriteValuedec_OpStructSelect(v) + } + return false +} +func rewriteValuedec_OpArrayMake1(v *Value) bool { + v_0 := v.Args[0] + // match: (ArrayMake1 x) + // cond: x.Type.IsPtr() + // result: x + for { + x := v_0 + if !(x.Type.IsPtr()) { + break + } + v.copyOf(x) + return true + } + return false +} +func rewriteValuedec_OpArraySelect(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + // match: (ArraySelect [0] x) + // cond: x.Type.IsPtr() + // result: x + for { + if auxIntToInt64(v.AuxInt) != 0 { + break + } + x := v_0 + if !(x.Type.IsPtr()) { + break + } + v.copyOf(x) + return true + } + // match: (ArraySelect (ArrayMake1 x)) + // result: x + for { + if v_0.Op != OpArrayMake1 { + break + } + x := v_0.Args[0] + v.copyOf(x) + return true + } + // match: (ArraySelect [0] (IData x)) + // result: (IData x) + for { + if auxIntToInt64(v.AuxInt) != 0 || v_0.Op != OpIData { + break + } + x := v_0.Args[0] + v.reset(OpIData) + v.AddArg(x) + return true + } + // match: (ArraySelect [i] x:(Load ptr mem)) + // result: @x.Block (Load (OffPtr [t.Elem().Size()*i] ptr) mem) + for { + i := auxIntToInt64(v.AuxInt) + x := v_0 + if x.Op != OpLoad { + break + } + t := x.Type + mem := x.Args[1] + ptr := x.Args[0] + b = x.Block + v0 := b.NewValue0(v.Pos, OpLoad, v.Type) + v.copyOf(v0) + v1 := b.NewValue0(v.Pos, OpOffPtr, v.Type.PtrTo()) + v1.AuxInt = int64ToAuxInt(t.Elem().Size() * i) + v1.AddArg(ptr) + v0.AddArg2(v1, mem) + return true } return false } func rewriteValuedec_OpComplexImag(v *Value) bool { v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types // match: (ComplexImag (ComplexMake _ imag )) // result: imag for { @@ -45,10 +132,58 @@ func rewriteValuedec_OpComplexImag(v *Value) bool { v.copyOf(imag) return true } + // match: (ComplexImag x:(Load ptr mem)) + // cond: t.IsComplex() && t.Size() == 8 + // result: @x.Block (Load (OffPtr [4] ptr) mem) + for { + x := v_0 + if x.Op != OpLoad { + break + } + t := x.Type + mem := x.Args[1] + ptr := x.Args[0] + if !(t.IsComplex() && t.Size() == 8) { + break + } + b = x.Block + v0 := b.NewValue0(v.Pos, OpLoad, typ.Float32) + v.copyOf(v0) + v1 := b.NewValue0(v.Pos, OpOffPtr, typ.Float32Ptr) + v1.AuxInt = int64ToAuxInt(4) + v1.AddArg(ptr) + v0.AddArg2(v1, mem) + return true + } + // match: (ComplexImag x:(Load ptr mem)) + // cond: t.IsComplex() && t.Size() == 16 + // result: @x.Block (Load (OffPtr [8] ptr) mem) + for { + x := v_0 + if x.Op != OpLoad { + break + } + t := x.Type + mem := x.Args[1] + ptr := x.Args[0] + if !(t.IsComplex() && t.Size() == 16) { + break + } + b = x.Block + v0 := b.NewValue0(v.Pos, OpLoad, typ.Float64) + v.copyOf(v0) + v1 := b.NewValue0(v.Pos, OpOffPtr, typ.Float64Ptr) + v1.AuxInt = int64ToAuxInt(8) + v1.AddArg(ptr) + v0.AddArg2(v1, mem) + return true + } return false } func rewriteValuedec_OpComplexReal(v *Value) bool { v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types // match: (ComplexReal (ComplexMake real _ )) // result: real for { @@ -59,10 +194,53 @@ func rewriteValuedec_OpComplexReal(v *Value) bool { v.copyOf(real) return true } + // match: (ComplexReal x:(Load ptr mem)) + // cond: t.IsComplex() && t.Size() == 8 + // result: @x.Block (Load ptr mem) + for { + x := v_0 + if x.Op != OpLoad { + break + } + t := x.Type + mem := x.Args[1] + ptr := x.Args[0] + if !(t.IsComplex() && t.Size() == 8) { + break + } + b = x.Block + v0 := b.NewValue0(v.Pos, OpLoad, typ.Float32) + v.copyOf(v0) + v0.AddArg2(ptr, mem) + return true + } + // match: (ComplexReal x:(Load ptr mem)) + // cond: t.IsComplex() && t.Size() == 16 + // result: @x.Block (Load ptr mem) + for { + x := v_0 + if x.Op != OpLoad { + break + } + t := x.Type + mem := x.Args[1] + ptr := x.Args[0] + if !(t.IsComplex() && t.Size() == 16) { + break + } + b = x.Block + v0 := b.NewValue0(v.Pos, OpLoad, typ.Float64) + v.copyOf(v0) + v0.AddArg2(ptr, mem) + return true + } return false } func rewriteValuedec_OpIData(v *Value) bool { v_0 := v.Args[0] + b := v.Block + config := b.Func.Config + typ := &b.Func.Config.Types // match: (IData (IMake _ data)) // result: data for { @@ -73,10 +251,52 @@ func rewriteValuedec_OpIData(v *Value) bool { v.copyOf(data) return true } + // match: (IData x:(Load ptr mem)) + // cond: t.IsInterface() + // result: @x.Block (Load (OffPtr [config.PtrSize] ptr) mem) + for { + x := v_0 + if x.Op != OpLoad { + break + } + t := x.Type + mem := x.Args[1] + ptr := x.Args[0] + if !(t.IsInterface()) { + break + } + b = x.Block + v0 := b.NewValue0(v.Pos, OpLoad, typ.BytePtr) + v.copyOf(v0) + v1 := b.NewValue0(v.Pos, OpOffPtr, typ.BytePtrPtr) + v1.AuxInt = int64ToAuxInt(config.PtrSize) + v1.AddArg(ptr) + v0.AddArg2(v1, mem) + return true + } + return false +} +func rewriteValuedec_OpIMake(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (IMake _typ (StructMake1 val)) + // result: (IMake _typ val) + for { + _typ := v_0 + if v_1.Op != OpStructMake1 { + break + } + val := v_1.Args[0] + v.reset(OpIMake) + v.AddArg2(_typ, val) + return true + } return false } func rewriteValuedec_OpITab(v *Value) bool { v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types // match: (ITab (IMake itab _)) // result: itab for { @@ -87,6 +307,26 @@ func rewriteValuedec_OpITab(v *Value) bool { v.copyOf(itab) return true } + // match: (ITab x:(Load ptr mem)) + // cond: t.IsInterface() + // result: @x.Block (Load ptr mem) + for { + x := v_0 + if x.Op != OpLoad { + break + } + t := x.Type + mem := x.Args[1] + ptr := x.Args[0] + if !(t.IsInterface()) { + break + } + b = x.Block + v0 := b.NewValue0(v.Pos, OpLoad, typ.Uintptr) + v.copyOf(v0) + v0.AddArg2(ptr, mem) + return true + } return false } func rewriteValuedec_OpLoad(v *Value) bool { @@ -209,6 +449,9 @@ func rewriteValuedec_OpLoad(v *Value) bool { } func rewriteValuedec_OpSliceCap(v *Value) bool { v_0 := v.Args[0] + b := v.Block + config := b.Func.Config + typ := &b.Func.Config.Types // match: (SliceCap (SliceMake _ _ cap)) // result: cap for { @@ -219,10 +462,36 @@ func rewriteValuedec_OpSliceCap(v *Value) bool { v.copyOf(cap) return true } + // match: (SliceCap x:(Load ptr mem)) + // cond: t.IsSlice() + // result: @x.Block (Load (OffPtr [2*config.PtrSize] ptr) mem) + for { + x := v_0 + if x.Op != OpLoad { + break + } + t := x.Type + mem := x.Args[1] + ptr := x.Args[0] + if !(t.IsSlice()) { + break + } + b = x.Block + v0 := b.NewValue0(v.Pos, OpLoad, typ.Int) + v.copyOf(v0) + v1 := b.NewValue0(v.Pos, OpOffPtr, typ.IntPtr) + v1.AuxInt = int64ToAuxInt(2 * config.PtrSize) + v1.AddArg(ptr) + v0.AddArg2(v1, mem) + return true + } return false } func rewriteValuedec_OpSliceLen(v *Value) bool { v_0 := v.Args[0] + b := v.Block + config := b.Func.Config + typ := &b.Func.Config.Types // match: (SliceLen (SliceMake _ len _)) // result: len for { @@ -233,10 +502,34 @@ func rewriteValuedec_OpSliceLen(v *Value) bool { v.copyOf(len) return true } + // match: (SliceLen x:(Load ptr mem)) + // cond: t.IsSlice() + // result: @x.Block (Load (OffPtr [config.PtrSize] ptr) mem) + for { + x := v_0 + if x.Op != OpLoad { + break + } + t := x.Type + mem := x.Args[1] + ptr := x.Args[0] + if !(t.IsSlice()) { + break + } + b = x.Block + v0 := b.NewValue0(v.Pos, OpLoad, typ.Int) + v.copyOf(v0) + v1 := b.NewValue0(v.Pos, OpOffPtr, typ.IntPtr) + v1.AuxInt = int64ToAuxInt(config.PtrSize) + v1.AddArg(ptr) + v0.AddArg2(v1, mem) + return true + } return false } func rewriteValuedec_OpSlicePtr(v *Value) bool { v_0 := v.Args[0] + b := v.Block // match: (SlicePtr (SliceMake ptr _ _ )) // result: ptr for { @@ -247,6 +540,26 @@ func rewriteValuedec_OpSlicePtr(v *Value) bool { v.copyOf(ptr) return true } + // match: (SlicePtr x:(Load ptr mem)) + // cond: t.IsSlice() + // result: @x.Block (Load ptr mem) + for { + x := v_0 + if x.Op != OpLoad { + break + } + t := x.Type + mem := x.Args[1] + ptr := x.Args[0] + if !(t.IsSlice()) { + break + } + b = x.Block + v0 := b.NewValue0(v.Pos, OpLoad, t.Elem().PtrTo()) + v.copyOf(v0) + v0.AddArg2(ptr, mem) + return true + } return false } func rewriteValuedec_OpSlicePtrUnchecked(v *Value) bool { @@ -393,10 +706,141 @@ func rewriteValuedec_OpStore(v *Value) bool { v.AddArg3(v0, data, v1) return true } + // match: (Store dst (StructMake1 f0) mem) + // result: (Store {t.FieldType(0)} (OffPtr [0] dst) f0 mem) + for { + dst := v_0 + if v_1.Op != OpStructMake1 { + break + } + t := v_1.Type + f0 := v_1.Args[0] + mem := v_2 + v.reset(OpStore) + v.Aux = typeToAux(t.FieldType(0)) + v0 := b.NewValue0(v.Pos, OpOffPtr, t.FieldType(0).PtrTo()) + v0.AuxInt = int64ToAuxInt(0) + v0.AddArg(dst) + v.AddArg3(v0, f0, mem) + return true + } + // match: (Store dst (StructMake2 f0 f1) mem) + // result: (Store {t.FieldType(1)} (OffPtr [t.FieldOff(1)] dst) f1 (Store {t.FieldType(0)} (OffPtr [0] dst) f0 mem)) + for { + dst := v_0 + if v_1.Op != OpStructMake2 { + break + } + t := v_1.Type + f1 := v_1.Args[1] + f0 := v_1.Args[0] + mem := v_2 + v.reset(OpStore) + v.Aux = typeToAux(t.FieldType(1)) + v0 := b.NewValue0(v.Pos, OpOffPtr, t.FieldType(1).PtrTo()) + v0.AuxInt = int64ToAuxInt(t.FieldOff(1)) + v0.AddArg(dst) + v1 := b.NewValue0(v.Pos, OpStore, types.TypeMem) + v1.Aux = typeToAux(t.FieldType(0)) + v2 := b.NewValue0(v.Pos, OpOffPtr, t.FieldType(0).PtrTo()) + v2.AuxInt = int64ToAuxInt(0) + v2.AddArg(dst) + v1.AddArg3(v2, f0, mem) + v.AddArg3(v0, f1, v1) + return true + } + // match: (Store dst (StructMake3 f0 f1 f2) mem) + // result: (Store {t.FieldType(2)} (OffPtr [t.FieldOff(2)] dst) f2 (Store {t.FieldType(1)} (OffPtr [t.FieldOff(1)] dst) f1 (Store {t.FieldType(0)} (OffPtr [0] dst) f0 mem))) + for { + dst := v_0 + if v_1.Op != OpStructMake3 { + break + } + t := v_1.Type + f2 := v_1.Args[2] + f0 := v_1.Args[0] + f1 := v_1.Args[1] + mem := v_2 + v.reset(OpStore) + v.Aux = typeToAux(t.FieldType(2)) + v0 := b.NewValue0(v.Pos, OpOffPtr, t.FieldType(2).PtrTo()) + v0.AuxInt = int64ToAuxInt(t.FieldOff(2)) + v0.AddArg(dst) + v1 := b.NewValue0(v.Pos, OpStore, types.TypeMem) + v1.Aux = typeToAux(t.FieldType(1)) + v2 := b.NewValue0(v.Pos, OpOffPtr, t.FieldType(1).PtrTo()) + v2.AuxInt = int64ToAuxInt(t.FieldOff(1)) + v2.AddArg(dst) + v3 := b.NewValue0(v.Pos, OpStore, types.TypeMem) + v3.Aux = typeToAux(t.FieldType(0)) + v4 := b.NewValue0(v.Pos, OpOffPtr, t.FieldType(0).PtrTo()) + v4.AuxInt = int64ToAuxInt(0) + v4.AddArg(dst) + v3.AddArg3(v4, f0, mem) + v1.AddArg3(v2, f1, v3) + v.AddArg3(v0, f2, v1) + return true + } + // match: (Store dst (StructMake4 f0 f1 f2 f3) mem) + // result: (Store {t.FieldType(3)} (OffPtr [t.FieldOff(3)] dst) f3 (Store {t.FieldType(2)} (OffPtr [t.FieldOff(2)] dst) f2 (Store {t.FieldType(1)} (OffPtr [t.FieldOff(1)] dst) f1 (Store {t.FieldType(0)} (OffPtr [0] dst) f0 mem)))) + for { + dst := v_0 + if v_1.Op != OpStructMake4 { + break + } + t := v_1.Type + f3 := v_1.Args[3] + f0 := v_1.Args[0] + f1 := v_1.Args[1] + f2 := v_1.Args[2] + mem := v_2 + v.reset(OpStore) + v.Aux = typeToAux(t.FieldType(3)) + v0 := b.NewValue0(v.Pos, OpOffPtr, t.FieldType(3).PtrTo()) + v0.AuxInt = int64ToAuxInt(t.FieldOff(3)) + v0.AddArg(dst) + v1 := b.NewValue0(v.Pos, OpStore, types.TypeMem) + v1.Aux = typeToAux(t.FieldType(2)) + v2 := b.NewValue0(v.Pos, OpOffPtr, t.FieldType(2).PtrTo()) + v2.AuxInt = int64ToAuxInt(t.FieldOff(2)) + v2.AddArg(dst) + v3 := b.NewValue0(v.Pos, OpStore, types.TypeMem) + v3.Aux = typeToAux(t.FieldType(1)) + v4 := b.NewValue0(v.Pos, OpOffPtr, t.FieldType(1).PtrTo()) + v4.AuxInt = int64ToAuxInt(t.FieldOff(1)) + v4.AddArg(dst) + v5 := b.NewValue0(v.Pos, OpStore, types.TypeMem) + v5.Aux = typeToAux(t.FieldType(0)) + v6 := b.NewValue0(v.Pos, OpOffPtr, t.FieldType(0).PtrTo()) + v6.AuxInt = int64ToAuxInt(0) + v6.AddArg(dst) + v5.AddArg3(v6, f0, mem) + v3.AddArg3(v4, f1, v5) + v1.AddArg3(v2, f2, v3) + v.AddArg3(v0, f3, v1) + return true + } + // match: (Store dst (ArrayMake1 e) mem) + // result: (Store {e.Type} dst e mem) + for { + dst := v_0 + if v_1.Op != OpArrayMake1 { + break + } + e := v_1.Args[0] + mem := v_2 + v.reset(OpStore) + v.Aux = typeToAux(e.Type) + v.AddArg3(dst, e, mem) + return true + } return false } func rewriteValuedec_OpStringLen(v *Value) bool { v_0 := v.Args[0] + b := v.Block + config := b.Func.Config + typ := &b.Func.Config.Types // match: (StringLen (StringMake _ len)) // result: len for { @@ -407,10 +851,35 @@ func rewriteValuedec_OpStringLen(v *Value) bool { v.copyOf(len) return true } + // match: (StringLen x:(Load ptr mem)) + // cond: t.IsString() + // result: @x.Block (Load (OffPtr [config.PtrSize] ptr) mem) + for { + x := v_0 + if x.Op != OpLoad { + break + } + t := x.Type + mem := x.Args[1] + ptr := x.Args[0] + if !(t.IsString()) { + break + } + b = x.Block + v0 := b.NewValue0(v.Pos, OpLoad, typ.Int) + v.copyOf(v0) + v1 := b.NewValue0(v.Pos, OpOffPtr, typ.IntPtr) + v1.AuxInt = int64ToAuxInt(config.PtrSize) + v1.AddArg(ptr) + v0.AddArg2(v1, mem) + return true + } return false } func rewriteValuedec_OpStringPtr(v *Value) bool { v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types // match: (StringPtr (StringMake ptr _)) // result: ptr for { @@ -421,6 +890,191 @@ func rewriteValuedec_OpStringPtr(v *Value) bool { v.copyOf(ptr) return true } + // match: (StringPtr x:(Load ptr mem)) + // cond: t.IsString() + // result: @x.Block (Load ptr mem) + for { + x := v_0 + if x.Op != OpLoad { + break + } + t := x.Type + mem := x.Args[1] + ptr := x.Args[0] + if !(t.IsString()) { + break + } + b = x.Block + v0 := b.NewValue0(v.Pos, OpLoad, typ.BytePtr) + v.copyOf(v0) + v0.AddArg2(ptr, mem) + return true + } + return false +} +func rewriteValuedec_OpStructMake1(v *Value) bool { + v_0 := v.Args[0] + // match: (StructMake1 x) + // cond: x.Type.IsPtr() + // result: x + for { + x := v_0 + if !(x.Type.IsPtr()) { + break + } + v.copyOf(x) + return true + } + return false +} +func rewriteValuedec_OpStructSelect(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + // match: (StructSelect [0] (IData x)) + // result: (IData x) + for { + if auxIntToInt64(v.AuxInt) != 0 || v_0.Op != OpIData { + break + } + x := v_0.Args[0] + v.reset(OpIData) + v.AddArg(x) + return true + } + // match: (StructSelect (StructMake1 x)) + // result: x + for { + if v_0.Op != OpStructMake1 { + break + } + x := v_0.Args[0] + v.copyOf(x) + return true + } + // match: (StructSelect [0] (StructMake2 x _)) + // result: x + for { + if auxIntToInt64(v.AuxInt) != 0 || v_0.Op != OpStructMake2 { + break + } + x := v_0.Args[0] + v.copyOf(x) + return true + } + // match: (StructSelect [1] (StructMake2 _ x)) + // result: x + for { + if auxIntToInt64(v.AuxInt) != 1 || v_0.Op != OpStructMake2 { + break + } + x := v_0.Args[1] + v.copyOf(x) + return true + } + // match: (StructSelect [0] (StructMake3 x _ _)) + // result: x + for { + if auxIntToInt64(v.AuxInt) != 0 || v_0.Op != OpStructMake3 { + break + } + x := v_0.Args[0] + v.copyOf(x) + return true + } + // match: (StructSelect [1] (StructMake3 _ x _)) + // result: x + for { + if auxIntToInt64(v.AuxInt) != 1 || v_0.Op != OpStructMake3 { + break + } + x := v_0.Args[1] + v.copyOf(x) + return true + } + // match: (StructSelect [2] (StructMake3 _ _ x)) + // result: x + for { + if auxIntToInt64(v.AuxInt) != 2 || v_0.Op != OpStructMake3 { + break + } + x := v_0.Args[2] + v.copyOf(x) + return true + } + // match: (StructSelect [0] (StructMake4 x _ _ _)) + // result: x + for { + if auxIntToInt64(v.AuxInt) != 0 || v_0.Op != OpStructMake4 { + break + } + x := v_0.Args[0] + v.copyOf(x) + return true + } + // match: (StructSelect [1] (StructMake4 _ x _ _)) + // result: x + for { + if auxIntToInt64(v.AuxInt) != 1 || v_0.Op != OpStructMake4 { + break + } + x := v_0.Args[1] + v.copyOf(x) + return true + } + // match: (StructSelect [2] (StructMake4 _ _ x _)) + // result: x + for { + if auxIntToInt64(v.AuxInt) != 2 || v_0.Op != OpStructMake4 { + break + } + x := v_0.Args[2] + v.copyOf(x) + return true + } + // match: (StructSelect [3] (StructMake4 _ _ _ x)) + // result: x + for { + if auxIntToInt64(v.AuxInt) != 3 || v_0.Op != OpStructMake4 { + break + } + x := v_0.Args[3] + v.copyOf(x) + return true + } + // match: (StructSelect [0] x) + // cond: x.Type.IsPtr() + // result: x + for { + if auxIntToInt64(v.AuxInt) != 0 { + break + } + x := v_0 + if !(x.Type.IsPtr()) { + break + } + v.copyOf(x) + return true + } + // match: (StructSelect [i] x:(Load ptr mem)) + // result: @x.Block (Load (OffPtr [t.FieldOff(int(i))] ptr) mem) + for { + i := auxIntToInt64(v.AuxInt) + x := v_0 + if x.Op != OpLoad { + break + } + t := x.Type + mem := x.Args[1] + ptr := x.Args[0] + b = x.Block + v0 := b.NewValue0(v.Pos, OpLoad, v.Type) + v.copyOf(v0) + v1 := b.NewValue0(v.Pos, OpOffPtr, v.Type.PtrTo()) + v1.AuxInt = int64ToAuxInt(t.FieldOff(int(i))) + v1.AddArg(ptr) + v0.AddArg2(v1, mem) + return true + } return false } func rewriteBlockdec(b *Block) bool { diff --git a/src/cmd/compile/internal/ssagen/ssa.go b/src/cmd/compile/internal/ssagen/ssa.go index 93643af294..af2e0e477e 100644 --- a/src/cmd/compile/internal/ssagen/ssa.go +++ b/src/cmd/compile/internal/ssagen/ssa.go @@ -526,7 +526,7 @@ func buildssa(fn *ir.Func, worker int) *ssa.Func { s.store(n.Type(), s.decladdrs[n], v) } else { // Too big for SSA. // Brute force, and early, do a bunch of stores from registers - // TODO fix the nasty storeArgOrLoad recursion in ssa/expand_calls.go so this Just Works with store of a big Arg. + // Note that expand calls knows about this and doesn't trouble itself with larger-than-SSA-able Args in registers. s.storeParameterRegsToStack(s.f.ABISelf, paramAssignment, n, s.decladdrs[n], false) } } diff --git a/test/abi/more_intstar_input.go b/test/abi/more_intstar_input.go index f0a48fbdc2..3eb0fbcc3d 100644 --- a/test/abi/more_intstar_input.go +++ b/test/abi/more_intstar_input.go @@ -12,10 +12,6 @@ package main -import ( - "fmt" -) - var sink int //go:registerparams @@ -33,12 +29,12 @@ func G(a, b, c, d, e, f, g, h, i, j, k, l, m *int) { var scratch [1000 * 100]int I := *c - *e - *l // zero. scratch[I] = *d - fmt.Println("Got this far!") + println("Got this far!") sink += scratch[0] } func main() { a, b, c, d, e, f, g, h, i, j, k, l, m := 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 F(&a, &b, &c, &d, &e, &f, &g, &h, &i, &j, &k, &l, &m) - fmt.Printf("Sink = %d\n", sink-7) + println("Sink =", sink-7) } diff --git a/test/abi/reg_not_ssa.go b/test/abi/reg_not_ssa.go new file mode 100644 index 0000000000..5bd4b51a6b --- /dev/null +++ b/test/abi/reg_not_ssa.go @@ -0,0 +1,40 @@ +// run + +//go:build !wasm +// +build !wasm + +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +// small enough for registers, too large for SSA +type T struct { + a, b, c, d, e int +} + +//go:noinline +func F() { + a, b := g(), g() + h(b, b) + h(a, g()) + if a.a == 1 { + a = g() + } + h(a, a) +} + +//go:noinline +func g() T { + return T{1, 2, 3, 4, 5} +} + +//go:noinline +func h(s, t T) { + if s != t { + println("NEQ") + } +} + +func main() { F() } diff --git a/test/codegen/condmove.go b/test/codegen/condmove.go index 6c08116b2c..1058910307 100644 --- a/test/codegen/condmove.go +++ b/test/codegen/condmove.go @@ -57,7 +57,7 @@ func cmov16bit(x, y uint16) uint16 { } // amd64:"CMOVW(HI|CS)" // arm64:"CSNEG\t(LS|HS)" - // ppc64x:"ISEL\t[$]0" + // ppc64x:"ISEL\t[$][01]" // wasm:"Select" return x } diff --git a/test/codegen/spectre.go b/test/codegen/spectre.go index d845da35ce..edc8b28028 100644 --- a/test/codegen/spectre.go +++ b/test/codegen/spectre.go @@ -13,12 +13,12 @@ func IndexArray(x *[10]int, i int) int { } func IndexString(x string, i int) byte { - // amd64:`CMOVQLS` + // amd64:`CMOVQ(LS|CC)` return x[i] } func IndexSlice(x []float64, i int) float64 { - // amd64:`CMOVQLS` + // amd64:`CMOVQ(LS|CC)` return x[i] } -- 2.44.0