cmd/compile: expand calls cleanup

author David Chase <drchase@google.com>

Tue, 9 May 2023 14:34:52 +0000 (10:34 -0400)

committer David Chase <drchase@google.com>

Fri, 6 Oct 2023 20:57:33 +0000 (20:57 +0000)
author David Chase <drchase@google.com>
Tue, 9 May 2023 14:34:52 +0000 (10:34 -0400)
committer David Chase <drchase@google.com>
Fri, 6 Oct 2023 20:57:33 +0000 (20:57 +0000)
diff --git a/src/cmd/compile/internal/ssa/_gen/dec.rules b/src/cmd/compile/internal/ssa/_gen/dec.rules

index b19489870dde044f2e93753b6cb2a502937ad340..4484cd7e28005040ce024e21daefca914a07956f 100644 (file)
--- a/src/cmd/compile/internal/ssa/_gen/dec.rules
+++ b/src/cmd/compile/internal/ssa/_gen/dec.rules
@@ -91,3 +91,109 @@
      (OffPtr <typ.BytePtrPtr> [config.PtrSize] dst)
      data
      (Store {typ.Uintptr} dst itab mem))
+
+// Helpers for expand calls
+// Some of these are copied from generic.rules
+
+(IMake _typ (StructMake1 val)) => (IMake _typ val)
+(StructSelect [0] (IData x)) => (IData x)
+
+(StructSelect (StructMake1 x)) => x
+(StructSelect [0] (StructMake2 x _)) => x
+(StructSelect [1] (StructMake2 _ x)) => x
+(StructSelect [0] (StructMake3 x _ _)) => x
+(StructSelect [1] (StructMake3 _ x _)) => x
+(StructSelect [2] (StructMake3 _ _ x)) => x
+(StructSelect [0] (StructMake4 x _ _ _)) => x
+(StructSelect [1] (StructMake4 _ x _ _)) => x
+(StructSelect [2] (StructMake4 _ _ x _)) => x
+(StructSelect [3] (StructMake4 _ _ _ x)) => x
+
+// Special case coming from immediate interface rewriting
+// Typical case: (StructSelect [0] (IData (IMake typ dat)) rewrites to (StructSelect [0] dat)
+// but because the interface is immediate, the type of "IData" is a one-element struct containing
+// a pointer that is not the pointer type of dat (can be a *uint8).
+// More annoying case: (ArraySelect[0] (StructSelect[0] isAPtr))
+// There, result of the StructSelect is an Array (not a pointer) and
+// the pre-rewrite input to the ArraySelect is a struct, not a pointer.
+(StructSelect [0] x) && x.Type.IsPtr()  => x
+(ArraySelect [0] x) && x.Type.IsPtr()  => x
+
+// These, too.  Bits is bits.
+(ArrayMake1 x) && x.Type.IsPtr() => x
+(StructMake1 x) && x.Type.IsPtr() => x
+
+(Store dst (StructMake1 <t> f0) mem) =>
+  (Store {t.FieldType(0)} (OffPtr <t.FieldType(0).PtrTo()> [0] dst) f0 mem)
+(Store dst (StructMake2 <t> f0 f1) mem) =>
+  (Store {t.FieldType(1)}
+    (OffPtr <t.FieldType(1).PtrTo()> [t.FieldOff(1)] dst)
+    f1
+    (Store {t.FieldType(0)}
+      (OffPtr <t.FieldType(0).PtrTo()> [0] dst)
+        f0 mem))
+(Store dst (StructMake3 <t> f0 f1 f2) mem) =>
+  (Store {t.FieldType(2)}
+    (OffPtr <t.FieldType(2).PtrTo()> [t.FieldOff(2)] dst)
+    f2
+    (Store {t.FieldType(1)}
+      (OffPtr <t.FieldType(1).PtrTo()> [t.FieldOff(1)] dst)
+      f1
+      (Store {t.FieldType(0)}
+        (OffPtr <t.FieldType(0).PtrTo()> [0] dst)
+          f0 mem)))
+(Store dst (StructMake4 <t> f0 f1 f2 f3) mem) =>
+  (Store {t.FieldType(3)}
+    (OffPtr <t.FieldType(3).PtrTo()> [t.FieldOff(3)] dst)
+    f3
+    (Store {t.FieldType(2)}
+      (OffPtr <t.FieldType(2).PtrTo()> [t.FieldOff(2)] dst)
+      f2
+      (Store {t.FieldType(1)}
+        (OffPtr <t.FieldType(1).PtrTo()> [t.FieldOff(1)] dst)
+        f1
+        (Store {t.FieldType(0)}
+          (OffPtr <t.FieldType(0).PtrTo()> [0] dst)
+            f0 mem))))
+
+(ArraySelect (ArrayMake1 x)) => x
+(ArraySelect [0] (IData x)) => (IData x)
+
+(Store dst (ArrayMake1 e) mem) => (Store {e.Type} dst e mem)
+
+// NOTE removed must-not-be-SSA condition.
+(ArraySelect [i] x:(Load <t> ptr mem)) =>
+  @x.Block (Load <v.Type> (OffPtr <v.Type.PtrTo()> [t.Elem().Size()*i] ptr) mem)
+
+(StringPtr x:(Load <t> ptr mem)) && t.IsString() => @x.Block (Load <typ.BytePtr> ptr mem)
+(StringLen x:(Load <t> ptr mem)) && t.IsString() => @x.Block (Load <typ.Int>
+      (OffPtr <typ.IntPtr> [config.PtrSize] ptr)
+      mem)
+
+// NOTE removed must-not-be-SSA condition.
+(StructSelect [i] x:(Load <t> ptr mem)) =>
+  @x.Block (Load <v.Type> (OffPtr <v.Type.PtrTo()> [t.FieldOff(int(i))] ptr) mem)
+
+(ITab x:(Load <t> ptr mem)) && t.IsInterface() => @x.Block (Load <typ.Uintptr> ptr mem)
+
+(IData x:(Load <t> ptr mem)) && t.IsInterface() => @x.Block (Load <typ.BytePtr>
+      (OffPtr <typ.BytePtrPtr> [config.PtrSize] ptr)
+      mem)
+
+(SlicePtr x:(Load <t> ptr mem)) && t.IsSlice() => @x.Block (Load <t.Elem().PtrTo()> ptr mem)
+(SliceLen x:(Load <t> ptr mem)) && t.IsSlice() => @x.Block (Load <typ.Int>
+      (OffPtr <typ.IntPtr> [config.PtrSize] ptr)
+      mem)
+(SliceCap x:(Load <t> ptr mem)) && t.IsSlice() => @x.Block (Load <typ.Int>
+      (OffPtr <typ.IntPtr> [2*config.PtrSize] ptr)
+      mem)
+
+(ComplexReal x:(Load <t> ptr mem)) && t.IsComplex() && t.Size() == 8 => @x.Block (Load <typ.Float32> ptr mem)
+(ComplexImag x:(Load <t> ptr mem)) && t.IsComplex() && t.Size() == 8 => @x.Block (Load <typ.Float32>
+      (OffPtr <typ.Float32Ptr> [4] ptr)
+      mem)
+
+(ComplexReal x:(Load <t> ptr mem)) && t.IsComplex() && t.Size() == 16 => @x.Block (Load <typ.Float64> ptr mem)
+(ComplexImag x:(Load <t> ptr mem)) && t.IsComplex() && t.Size() == 16 => @x.Block (Load <typ.Float64>
+      (OffPtr <typ.Float64Ptr> [8] ptr)
+      mem)
diff --git a/src/cmd/compile/internal/ssa/compile.go b/src/cmd/compile/internal/ssa/compile.go

index 625c98bb1f8fa58bdf02ac6503529fdd776ec2c2..d125891f88c58f798e9cd490ba999b0b2ef88ba8 100644 (file)
--- a/src/cmd/compile/internal/ssa/compile.go
+++ b/src/cmd/compile/internal/ssa/compile.go
@@ -472,8 +472,8 @@ var passes = [...]pass{
         {name: "nilcheckelim", fn: nilcheckelim},
         {name: "prove", fn: prove},
         {name: "early fuse", fn: fuseEarly},
-       {name: "decompose builtin", fn: decomposeBuiltIn, required: true},
         {name: "expand calls", fn: expandCalls, required: true},
+       {name: "decompose builtin", fn: postExpandCallsDecompose, required: true},
         {name: "softfloat", fn: softfloat, required: true},
         {name: "late opt", fn: opt, required: true}, // TODO: split required rules and optimizing rules
         {name: "dead auto elim", fn: elimDeadAutosGeneric},
@@ -547,6 +547,8 @@ var passOrder = [...]constraint{
         {"generic cse", "tighten"},
         // checkbce needs the values removed
         {"generic deadcode", "check bce"},
+       // decompose builtin now also cleans up after expand calls
+       {"expand calls", "decompose builtin"},
         // don't run optimization pass until we've decomposed builtin objects
         {"decompose builtin", "late opt"},
         // decompose builtin is the last pass that may introduce new float ops, so run softfloat after it
diff --git a/src/cmd/compile/internal/ssa/expand_calls.go b/src/cmd/compile/internal/ssa/expand_calls.go

index e6f7306fa89a375cc3c45c26d57a0bfe899a1cf4..29c180be34ef6cb605652346e85ef3a940e7fbd8 100644 (file)
--- a/src/cmd/compile/internal/ssa/expand_calls.go
+++ b/src/cmd/compile/internal/ssa/expand_calls.go
@@ -11,1788 +11,1021 @@ import (
         "cmd/compile/internal/types"
         "cmd/internal/src"
         "fmt"
-       "sort"
  )
  
-type selKey struct {
-       from          *Value // what is selected from
-       offsetOrIndex int64  // whatever is appropriate for the selector
-       size          int64
-       typ           *types.Type
+func postExpandCallsDecompose(f *Func) {
+       decomposeUser(f)    // redo user decompose to cleanup after expand calls
+       decomposeBuiltIn(f) // handles both regular decomposition and cleanup.
  }
  
-type Abi1RO uint8 // An offset within a parameter's slice of register indices, for abi1.
+func expandCalls(f *Func) {
+       // Convert each aggregate arg to a call into "dismantle aggregate, store/pass parts"
+       // Convert each aggregate result from a call into "assemble aggregate from parts"
+       // Convert each multivalue exit into "dismantle aggregate, store/return parts"
+       // Convert incoming aggregate arg into assembly of parts.
+       // Feed modified AST to decompose.
  
-func isBlockMultiValueExit(b *Block) bool {
-       return (b.Kind == BlockRet || b.Kind == BlockRetJmp) && b.Controls[0] != nil && b.Controls[0].Op == OpMakeResult
-}
+       sp, _ := f.spSb()
  
-func badVal(s string, v *Value) error {
-       return fmt.Errorf("%s %s", s, v.LongString())
-}
+       x := &expandState{
+               f:               f,
+               debug:           f.pass.debug,
+               regSize:         f.Config.RegSize,
+               sp:              sp,
+               typs:            &f.Config.Types,
+               wideSelects:     make(map[*Value]*Value),
+               commonArgs:      make(map[selKey]*Value),
+               commonSelectors: make(map[selKey]*Value),
+               memForCall:      make(map[ID]*Value),
+       }
  
-// removeTrivialWrapperTypes unwraps layers of
-// struct { singleField SomeType } and [1]SomeType
-// until a non-wrapper type is reached.  This is useful
-// for working with assignments to/from interface data
-// fields (either second operand to OpIMake or OpIData)
-// where the wrapping or type conversion can be elided
-// because of type conversions/assertions in source code
-// that do not appear in SSA.
-func removeTrivialWrapperTypes(t *types.Type) *types.Type {
-       for {
-               if t.IsStruct() && t.NumFields() == 1 {
-                       t = t.Field(0).Type
-                       continue
-               }
-               if t.IsArray() && t.NumElem() == 1 {
-                       t = t.Elem()
-                       continue
-               }
-               break
+       // For 32-bit, need to deal with decomposition of 64-bit integers, which depends on endianness.
+       if f.Config.BigEndian {
+               x.firstOp = OpInt64Hi
+               x.secondOp = OpInt64Lo
+               x.firstType = x.typs.Int32
+               x.secondType = x.typs.UInt32
+       } else {
+               x.firstOp = OpInt64Lo
+               x.secondOp = OpInt64Hi
+               x.firstType = x.typs.UInt32
+               x.secondType = x.typs.Int32
         }
-       return t
-}
  
-// A registerCursor tracks which register is used for an Arg or regValues, or a piece of such.
-type registerCursor struct {
-       // TODO(register args) convert this to a generalized target cursor.
-       storeDest *Value // if there are no register targets, then this is the base of the store.
-       regsLen   int    // the number of registers available for this Arg/result (which is all in registers or not at all)
-       nextSlice Abi1RO // the next register/register-slice offset
-       config    *abi.ABIConfig
-       regValues *[]*Value // values assigned to registers accumulate here
-}
+       // Defer select processing until after all calls and selects are seen.
+       var selects []*Value
+       var calls []*Value
+       var args []*Value
+       var exitBlocks []*Block
  
-func (c *registerCursor) String() string {
-       dest := "<none>"
-       if c.storeDest != nil {
-               dest = c.storeDest.String()
-       }
-       regs := "<none>"
-       if c.regValues != nil {
-               regs = ""
-               for i, x := range *c.regValues {
-                       if i > 0 {
-                               regs = regs + "; "
+       var m0 *Value
+
+       // Accumulate lists of calls, args, selects, and exit blocks to process,
+       // note "wide" selects consumed by stores,
+       // rewrite mem for each call,
+       // rewrite each OpSelectNAddr.
+       for _, b := range f.Blocks {
+               for _, v := range b.Values {
+                       switch v.Op {
+                       case OpInitMem:
+                               m0 = v
+
+                       case OpClosureLECall, OpInterLECall, OpStaticLECall, OpTailLECall:
+                               calls = append(calls, v)
+
+                       case OpArg:
+                               args = append(args, v)
+
+                       case OpStore:
+                               if a := v.Args[1]; a.Op == OpSelectN && !CanSSA(a.Type) {
+                                       if a.Uses > 1 {
+                                               panic(fmt.Errorf("Saw double use of wide SelectN %s operand of Store %s",
+                                                       a.LongString(), v.LongString()))
+                                       }
+                                       x.wideSelects[a] = v
+                               }
+
+                       case OpSelectN:
+                               if v.Type == types.TypeMem {
+                                       // rewrite the mem selector in place
+                                       call := v.Args[0]
+                                       aux := call.Aux.(*AuxCall)
+                                       mem := x.memForCall[call.ID]
+                                       if mem == nil {
+                                               v.AuxInt = int64(aux.abiInfo.OutRegistersUsed())
+                                               x.memForCall[call.ID] = v
+                                       } else {
+                                               panic(fmt.Errorf("Saw two memories for call %v, %v and %v", call, mem, v))
+                                       }
+                               } else {
+                                       selects = append(selects, v)
+                               }
+
+                       case OpSelectNAddr:
+                               call := v.Args[0]
+                               which := v.AuxInt
+                               aux := call.Aux.(*AuxCall)
+                               pt := v.Type
+                               off := x.offsetFrom(x.f.Entry, x.sp, aux.OffsetOfResult(which), pt)
+                               v.copyOf(off)
                         }
-                       regs = regs + x.LongString()
                 }
-       }
-       // not printing the config because that has not been useful
-       return fmt.Sprintf("RCSR{storeDest=%v, regsLen=%d, nextSlice=%d, regValues=[%s]}", dest, c.regsLen, c.nextSlice, regs)
-}
  
-// next effectively post-increments the register cursor; the receiver is advanced,
-// the old value is returned.
-func (c *registerCursor) next(t *types.Type) registerCursor {
-       rc := *c
-       if int(c.nextSlice) < c.regsLen {
-               w := c.config.NumParamRegs(t)
-               c.nextSlice += Abi1RO(w)
+               // rewrite function results from an exit block
+               // values returned by function need to be split out into registers.
+               if isBlockMultiValueExit(b) {
+                       exitBlocks = append(exitBlocks, b)
+               }
         }
-       return rc
-}
  
-// plus returns a register cursor offset from the original, without modifying the original.
-func (c *registerCursor) plus(regWidth Abi1RO) registerCursor {
-       rc := *c
-       rc.nextSlice += regWidth
-       return rc
-}
+       // Convert each aggregate arg into Make of its parts (and so on, to primitive types)
+       for _, v := range args {
+               var rc registerCursor
+               a := x.prAssignForArg(v)
+               aux := x.f.OwnAux
+               regs := a.Registers
+               var offset int64
+               if len(regs) == 0 {
+                       offset = a.FrameOffset(aux.abiInfo)
+               }
+               auxBase := x.offsetFrom(x.f.Entry, x.sp, offset, types.NewPtr(v.Type))
+               rc.init(regs, aux.abiInfo, nil, auxBase, 0)
+               x.rewriteSelectOrArg(f.Entry.Pos, f.Entry, v, v, m0, v.Type, rc)
+       }
  
-const (
-       // Register offsets for fields of built-in aggregate types; the ones not listed are zero.
-       RO_complex_imag = 1
-       RO_string_len   = 1
-       RO_slice_len    = 1
-       RO_slice_cap    = 2
-       RO_iface_data   = 1
-)
+       // Rewrite selects of results (which may be aggregates) into make-aggregates of register/memory-targeted selects
+       for _, v := range selects {
+               if v.Op == OpInvalid {
+                       continue
+               }
  
-func (x *expandState) regWidth(t *types.Type) Abi1RO {
-       return Abi1RO(x.abi1.NumParamRegs(t))
-}
+               call := v.Args[0]
+               aux := call.Aux.(*AuxCall)
+               mem := x.memForCall[call.ID]
+
+               i := v.AuxInt
+               regs := aux.RegsOfResult(i)
+
+               // If this select cannot fit into SSA and is stored, either disaggregate to register stores, or mem-mem move.
+               if store := x.wideSelects[v]; store != nil {
+                       // Use the mem that comes from the store operation.
+                       storeAddr := store.Args[0]
+                       mem := store.Args[2]
+                       if len(regs) > 0 {
+                               // Cannot do a rewrite that builds up a result from pieces; instead, copy pieces to the store operation.
+                               var rc registerCursor
+                               rc.init(regs, aux.abiInfo, nil, storeAddr, 0)
+                               mem = x.rewriteWideSelectToStores(call.Pos, call.Block, v, mem, v.Type, rc)
+                               store.copyOf(mem)
+                       } else {
+                               // Move directly from AuxBase to store target; rewrite the store instruction.
+                               offset := aux.OffsetOfResult(i)
+                               auxBase := x.offsetFrom(x.f.Entry, x.sp, offset, types.NewPtr(v.Type))
+                               // was Store dst, v, mem
+                               // now Move dst, auxBase, mem
+                               move := store.Block.NewValue3A(store.Pos, OpMove, types.TypeMem, v.Type, storeAddr, auxBase, mem)
+                               move.AuxInt = v.Type.Size()
+                               store.copyOf(move)
+                       }
+                       continue
+               }
  
-// regOffset returns the register offset of the i'th element of type t
-func (x *expandState) regOffset(t *types.Type, i int) Abi1RO {
-       // TODO maybe cache this in a map if profiling recommends.
-       if i == 0 {
-               return 0
-       }
-       if t.IsArray() {
-               return Abi1RO(i) * x.regWidth(t.Elem())
-       }
-       if t.IsStruct() {
-               k := Abi1RO(0)
-               for j := 0; j < i; j++ {
-                       k += x.regWidth(t.FieldType(j))
+               var auxBase *Value
+               if len(regs) == 0 {
+                       offset := aux.OffsetOfResult(i)
+                       auxBase = x.offsetFrom(x.f.Entry, x.sp, offset, types.NewPtr(v.Type))
                 }
-               return k
+               var rc registerCursor
+               rc.init(regs, aux.abiInfo, nil, auxBase, 0)
+               x.rewriteSelectOrArg(call.Pos, call.Block, v, v, mem, v.Type, rc)
         }
-       panic("Haven't implemented this case yet, do I need to?")
-}
  
-// at returns the register cursor for component i of t, where the first
-// component is numbered 0.
-func (c *registerCursor) at(t *types.Type, i int) registerCursor {
-       rc := *c
-       if i == 0 || c.regsLen == 0 {
-               return rc
-       }
-       if t.IsArray() {
-               w := c.config.NumParamRegs(t.Elem())
-               rc.nextSlice += Abi1RO(i * w)
-               return rc
+       rewriteCall := func(v *Value, newOp Op, argStart int) {
+               // Break aggregate args passed to call into smaller pieces.
+               x.rewriteCallArgs(v, argStart)
+               v.Op = newOp
+               rts := abi.RegisterTypes(v.Aux.(*AuxCall).abiInfo.OutParams())
+               v.Type = types.NewResults(append(rts, types.TypeMem))
         }
-       if t.IsStruct() {
-               for j := 0; j < i; j++ {
-                       rc.next(t.FieldType(j))
+
+       // Rewrite calls
+       for _, v := range calls {
+               switch v.Op {
+               case OpStaticLECall:
+                       rewriteCall(v, OpStaticCall, 0)
+               case OpTailLECall:
+                       rewriteCall(v, OpTailCall, 0)
+               case OpClosureLECall:
+                       rewriteCall(v, OpClosureCall, 2)
+               case OpInterLECall:
+                       rewriteCall(v, OpInterCall, 1)
                 }
-               return rc
         }
-       panic("Haven't implemented this case yet, do I need to?")
-}
  
-func (c *registerCursor) init(regs []abi.RegIndex, info *abi.ABIParamResultInfo, result *[]*Value, storeDest *Value) {
-       c.regsLen = len(regs)
-       c.nextSlice = 0
-       if len(regs) == 0 {
-               c.storeDest = storeDest // only save this if there are no registers, will explode if misused.
-               return
+       // Rewrite results from exit blocks
+       for _, b := range exitBlocks {
+               v := b.Controls[0]
+               x.rewriteFuncResults(v, b, f.OwnAux)
+               b.SetControl(v)
         }
-       c.config = info.Config()
-       c.regValues = result
-}
  
-func (c *registerCursor) addArg(v *Value) {
-       *c.regValues = append(*c.regValues, v)
-}
-
-func (c *registerCursor) hasRegs() bool {
-       return c.regsLen > 0
  }
  
-type expandState struct {
-       f                  *Func
-       abi1               *abi.ABIConfig
-       debug              int // odd values log lost statement markers, so likely settings are 1 (stmts), 2 (expansion), and 3 (both)
-       regSize            int64
-       sp                 *Value
-       typs               *Types
-       ptrSize            int64
-       hiOffset           int64
-       lowOffset          int64
-       hiRo               Abi1RO
-       loRo               Abi1RO
-       namedSelects       map[*Value][]namedVal
-       sdom               SparseTree
-       commonSelectors    map[selKey]*Value // used to de-dupe selectors
-       commonArgs         map[selKey]*Value // used to de-dupe OpArg/OpArgIntReg/OpArgFloatReg
-       memForCall         map[ID]*Value     // For a call, need to know the unique selector that gets the mem.
-       transformedSelects map[ID]bool       // OpSelectN after rewriting, either created or renumbered.
-       indentLevel        int               // Indentation for debugging recursion
-}
+func (x *expandState) rewriteFuncResults(v *Value, b *Block, aux *AuxCall) {
+       // This is very similar to rewriteCallArgs
+       // differences:
+       // firstArg + preArgs
+       // sp vs auxBase
  
-// intPairTypes returns the pair of 32-bit int types needed to encode a 64-bit integer type on a target
-// that has no 64-bit integer registers.
-func (x *expandState) intPairTypes(et types.Kind) (tHi, tLo *types.Type) {
-       tHi = x.typs.UInt32
-       if et == types.TINT64 {
-               tHi = x.typs.Int32
-       }
-       tLo = x.typs.UInt32
-       return
-}
+       m0 := v.MemoryArg()
+       mem := m0
  
-// isAlreadyExpandedAggregateType returns whether a type is an SSA-able "aggregate" (multiple register) type
-// that was expanded in an earlier phase (currently, expand_calls is intended to run after decomposeBuiltin,
-// so this is all aggregate types -- small struct and array, complex, interface, string, slice, and 64-bit
-// integer on 32-bit).
-func (x *expandState) isAlreadyExpandedAggregateType(t *types.Type) bool {
-       if !CanSSA(t) {
-               return false
-       }
-       return t.IsStruct() || t.IsArray() || t.IsComplex() || t.IsInterface() || t.IsString() || t.IsSlice() ||
-               (t.Size() > x.regSize && (t.IsInteger() || (x.f.Config.SoftFloat && t.IsFloat())))
-}
+       allResults := []*Value{}
+       var oldArgs []*Value
+       argsWithoutMem := v.Args[:len(v.Args)-1]
  
-// offsetFrom creates an offset from a pointer, simplifying chained offsets and offsets from SP
-// TODO should also optimize offsets from SB?
-func (x *expandState) offsetFrom(b *Block, from *Value, offset int64, pt *types.Type) *Value {
-       ft := from.Type
-       if offset == 0 {
-               if ft == pt {
-                       return from
-               }
-               // This captures common, (apparently) safe cases.  The unsafe cases involve ft == uintptr
-               if (ft.IsPtr() || ft.IsUnsafePtr()) && pt.IsPtr() {
-                       return from
+       for j, a := range argsWithoutMem {
+               oldArgs = append(oldArgs, a)
+               i := int64(j)
+               auxType := aux.TypeOfResult(i)
+               auxBase := b.NewValue2A(v.Pos, OpLocalAddr, types.NewPtr(auxType), aux.NameOfResult(i), x.sp, mem)
+               auxOffset := int64(0)
+               aRegs := aux.RegsOfResult(int64(j))
+               if a.Op == OpDereference {
+                       a.Op = OpLoad
+               }
+               var rc registerCursor
+               var result *[]*Value
+               if len(aRegs) > 0 {
+                       result = &allResults
+               } else {
+                       if a.Op == OpLoad && a.Args[0].Op == OpLocalAddr {
+                               addr := a.Args[0]
+                               if addr.MemoryArg() == a.MemoryArg() && addr.Aux == aux.NameOfResult(i) {
+                                       continue // Self move to output parameter
+                               }
+                       }
                 }
+               rc.init(aRegs, aux.abiInfo, result, auxBase, auxOffset)
+               mem = x.decomposeAsNecessary(v.Pos, b, a, mem, rc)
         }
-       // Simplify, canonicalize
-       for from.Op == OpOffPtr {
-               offset += from.AuxInt
-               from = from.Args[0]
-       }
-       if from == x.sp {
-               return x.f.ConstOffPtrSP(pt, offset, x.sp)
+       v.resetArgs()
+       v.AddArgs(allResults...)
+       v.AddArg(mem)
+       for _, a := range oldArgs {
+               if a.Uses == 0 {
+                       if x.debug > 1 {
+                               x.Printf("...marking %v unused\n", a.LongString())
+                       }
+                       x.invalidateRecursively(a)
+               }
         }
-       return b.NewValue1I(from.Pos.WithNotStmt(), OpOffPtr, pt, offset, from)
+       v.Type = types.NewResults(append(abi.RegisterTypes(aux.abiInfo.OutParams()), types.TypeMem))
+       return
  }
  
-// splitSlots splits one "field" (specified by sfx, offset, and ty) out of the LocalSlots in ls and returns the new LocalSlots this generates.
-func (x *expandState) splitSlots(ls []*LocalSlot, sfx string, offset int64, ty *types.Type) []*LocalSlot {
-       var locs []*LocalSlot
-       for i := range ls {
-               locs = append(locs, x.f.SplitSlot(ls[i], sfx, offset, ty))
+func (x *expandState) rewriteCallArgs(v *Value, firstArg int) {
+       if x.debug > 1 {
+               x.indent(3)
+               defer x.indent(-3)
+               x.Printf("rewriteCallArgs(%s; %d)\n", v.LongString(), firstArg)
         }
-       return locs
-}
+       // Thread the stores on the memory arg
+       aux := v.Aux.(*AuxCall)
+       m0 := v.MemoryArg()
+       mem := m0
+       allResults := []*Value{}
+       oldArgs := []*Value{}
+       argsWithoutMem := v.Args[firstArg : len(v.Args)-1] // Also strip closure/interface Op-specific args
  
-// prAssignForArg returns the ABIParamAssignment for v, assumed to be an OpArg.
-func (x *expandState) prAssignForArg(v *Value) *abi.ABIParamAssignment {
-       if v.Op != OpArg {
-               panic(badVal("Wanted OpArg, instead saw", v))
+       sp := x.sp
+       if v.Op == OpTailLECall {
+               // For tail call, we unwind the frame before the call so we'll use the caller's
+               // SP.
+               sp = x.f.Entry.NewValue1(src.NoXPos, OpGetCallerSP, x.typs.Uintptr, mem)
         }
-       return ParamAssignmentForArgName(x.f, v.Aux.(*ir.Name))
-}
  
-// ParamAssignmentForArgName returns the ABIParamAssignment for f's arg with matching name.
-func ParamAssignmentForArgName(f *Func, name *ir.Name) *abi.ABIParamAssignment {
-       abiInfo := f.OwnAux.abiInfo
-       ip := abiInfo.InParams()
-       for i, a := range ip {
-               if a.Name == name {
-                       return &ip[i]
+       for i, a := range argsWithoutMem { // skip leading non-parameter SSA Args and trailing mem SSA Arg.
+               oldArgs = append(oldArgs, a)
+               auxI := int64(i)
+               aRegs := aux.RegsOfArg(auxI)
+               aType := aux.TypeOfArg(auxI)
+
+               if a.Op == OpDereference {
+                       a.Op = OpLoad
+               }
+               var rc registerCursor
+               var result *[]*Value
+               var aOffset int64
+               if len(aRegs) > 0 {
+                       result = &allResults
+               } else {
+                       aOffset = aux.OffsetOfArg(auxI)
+               }
+               if v.Op == OpTailLECall && a.Op == OpArg && a.AuxInt == 0 {
+                       // It's common for a tail call passing the same arguments (e.g. method wrapper),
+                       // so this would be a self copy. Detect this and optimize it out.
+                       n := a.Aux.(*ir.Name)
+                       if n.Class == ir.PPARAM && n.FrameOffset()+x.f.Config.ctxt.Arch.FixedFrameSize == aOffset {
+                               continue
+                       }
+               }
+               if x.debug > 1 {
+                       x.Printf("...storeArg %s, %v, %d\n", a.LongString(), aType, aOffset)
+               }
+
+               rc.init(aRegs, aux.abiInfo, result, sp, aOffset)
+               mem = x.decomposeAsNecessary(v.Pos, v.Block, a, mem, rc)
+       }
+       var preArgStore [2]*Value
+       preArgs := append(preArgStore[:0], v.Args[0:firstArg]...)
+       v.resetArgs()
+       v.AddArgs(preArgs...)
+       v.AddArgs(allResults...)
+       v.AddArg(mem)
+       for _, a := range oldArgs {
+               if a.Uses == 0 {
+                       x.invalidateRecursively(a)
                 }
         }
-       panic(fmt.Errorf("Did not match param %v in prInfo %+v", name, abiInfo.InParams()))
-}
  
-// indent increments (or decrements) the indentation.
-func (x *expandState) indent(n int) {
-       x.indentLevel += n
+       return
  }
  
-// Printf does an indented fmt.Printf on the format and args.
-func (x *expandState) Printf(format string, a ...interface{}) (n int, err error) {
-       if x.indentLevel > 0 {
-               fmt.Printf("%[1]*s", x.indentLevel, "")
-       }
-       return fmt.Printf(format, a...)
+func (x *expandState) decomposePair(pos src.XPos, b *Block, a, mem *Value, t0, t1 *types.Type, o0, o1 Op, rc *registerCursor) *Value {
+       e := b.NewValue1(pos, o0, t0, a)
+       pos = pos.WithNotStmt()
+       mem = x.decomposeAsNecessary(pos, b, e, mem, rc.next(t0))
+       e = b.NewValue1(pos, o1, t1, a)
+       mem = x.decomposeAsNecessary(pos, b, e, mem, rc.next(t1))
+       return mem
  }
  
-// Calls that need lowering have some number of inputs, including a memory input,
-// and produce a tuple of (value1, value2, ..., mem) where valueK may or may not be SSA-able.
-
-// With the current ABI those inputs need to be converted into stores to memory,
-// rethreading the call's memory input to the first, and the new call now receiving the last.
-
-// With the current ABI, the outputs need to be converted to loads, which will all use the call's
-// memory output as their input.
+func (x *expandState) decomposeOne(pos src.XPos, b *Block, a, mem *Value, t0 *types.Type, o0 Op, rc *registerCursor) *Value {
+       e := b.NewValue1(pos, o0, t0, a)
+       pos = pos.WithNotStmt()
+       mem = x.decomposeAsNecessary(pos, b, e, mem, rc.next(t0))
+       return mem
+}
  
-// rewriteSelect recursively walks from leaf selector to a root (OpSelectN, OpLoad, OpArg)
-// through a chain of Struct/Array/builtin Select operations.  If the chain of selectors does not
-// end in an expected root, it does nothing (this can happen depending on compiler phase ordering).
-// The "leaf" provides the type, the root supplies the container, and the leaf-to-root path
-// accumulates the offset.
-// It emits the code necessary to implement the leaf select operation that leads to the root.
-//
-// TODO when registers really arrive, must also decompose anything split across two registers or registers and memory.
-func (x *expandState) rewriteSelect(leaf *Value, selector *Value, offset int64, regOffset Abi1RO) []*LocalSlot {
+// decomposeAsNecessary converts a value (perhaps an aggregate) passed to a call or returned by a function,
+// into the appropriate sequence of stores and register assignments to transmit that value in a given ABI, and
+// returns the current memory after this convert/rewrite (it may be the input memory, perhaps stores were needed.)
+// 'pos' is the source position all this is tied to
+// 'b' is the enclosing block
+// 'a' is the value to decompose
+// 'm0' is the input memory arg used for the first store (or returned if there are no stores)
+// 'rc' is a registerCursor which identifies the register/memory destination for the value
+func (x *expandState) decomposeAsNecessary(pos src.XPos, b *Block, a, m0 *Value, rc registerCursor) *Value {
         if x.debug > 1 {
                 x.indent(3)
                 defer x.indent(-3)
-               x.Printf("rewriteSelect(%s; %s; memOff=%d; regOff=%d)\n", leaf.LongString(), selector.LongString(), offset, regOffset)
         }
-       var locs []*LocalSlot
-       leafType := leaf.Type
-       if len(selector.Args) > 0 {
-               w := selector.Args[0]
-               if w.Op == OpCopy {
-                       for w.Op == OpCopy {
-                               w = w.Args[0]
-                       }
-                       selector.SetArg(0, w)
-               }
+       at := a.Type
+       if at.Size() == 0 {
+               return m0
         }
-       switch selector.Op {
-       case OpArgIntReg, OpArgFloatReg:
-               if leafType == selector.Type { // OpIData leads us here, sometimes.
-                       leaf.copyOf(selector)
-               } else {
-                       x.f.Fatalf("Unexpected %s type, selector=%s, leaf=%s\n", selector.Op.String(), selector.LongString(), leaf.LongString())
-               }
+       if a.Op == OpDereference {
+               a.Op = OpLoad // For purposes of parameter passing expansion, a Dereference is a Load.
+       }
+
+       if !rc.hasRegs() && !CanSSA(at) {
+               dst := x.offsetFrom(b, rc.storeDest, rc.storeOffset, types.NewPtr(at))
                 if x.debug > 1 {
-                       x.Printf("---%s, break\n", selector.Op.String())
+                       x.Printf("...recur store %s at %s\n", a.LongString(), dst.LongString())
                 }
-       case OpArg:
-               if !x.isAlreadyExpandedAggregateType(selector.Type) {
-                       if leafType == selector.Type { // OpIData leads us here, sometimes.
-                               x.newArgToMemOrRegs(selector, leaf, offset, regOffset, leafType, leaf.Pos)
-                       } else {
-                               x.f.Fatalf("Unexpected OpArg type, selector=%s, leaf=%s\n", selector.LongString(), leaf.LongString())
-                       }
-                       if x.debug > 1 {
-                               x.Printf("---OpArg, break\n")
-                       }
-                       break
-               }
-               switch leaf.Op {
-               case OpIData, OpStructSelect, OpArraySelect:
-                       leafType = removeTrivialWrapperTypes(leaf.Type)
+               if a.Op == OpLoad {
+                       m0 = b.NewValue3A(pos, OpMove, types.TypeMem, at, dst, a.Args[0], m0)
+                       m0.AuxInt = at.Size()
+                       return m0
+               } else {
+                       panic(fmt.Errorf("Store of not a load"))
                 }
-               x.newArgToMemOrRegs(selector, leaf, offset, regOffset, leafType, leaf.Pos)
+       }
  
-               for _, s := range x.namedSelects[selector] {
-                       locs = append(locs, x.f.Names[s.locIndex])
+       mem := m0
+       switch at.Kind() {
+       case types.TARRAY:
+               et := at.Elem()
+               for i := int64(0); i < at.NumElem(); i++ {
+                       e := b.NewValue1I(pos, OpArraySelect, et, i, a)
+                       pos = pos.WithNotStmt()
+                       mem = x.decomposeAsNecessary(pos, b, e, mem, rc.next(et))
                 }
+               return mem
  
-       case OpLoad: // We end up here because of IData of immediate structures.
-               // Failure case:
-               // (note the failure case is very rare; w/o this case, make.bash and run.bash both pass, as well as
-               // the hard cases of building {syscall,math,math/cmplx,math/bits,go/constant} on ppc64le and mips-softfloat).
-               //
-               // GOSSAFUNC='(*dumper).dump' go build -gcflags=-l -tags=math_big_pure_go cmd/compile/internal/gc
-               // cmd/compile/internal/gc/dump.go:136:14: internal compiler error: '(*dumper).dump': not lowered: v827, StructSelect PTR PTR
-               // b2: ← b1
-               // v20 (+142) = StaticLECall <interface {},mem> {AuxCall{reflect.Value.Interface([reflect.Value,0])[interface {},24]}} [40] v8 v1
-               // v21 (142) = SelectN <mem> [1] v20
-               // v22 (142) = SelectN <interface {}> [0] v20
-               // b15: ← b8
-               // v71 (+143) = IData <Nodes> v22 (v[Nodes])
-               // v73 (+146) = StaticLECall <[]*Node,mem> {AuxCall{"".Nodes.Slice([Nodes,0])[[]*Node,8]}} [32] v71 v21
-               //
-               // translates (w/o the "case OpLoad:" above) to:
-               //
-               // b2: ← b1
-               // v20 (+142) = StaticCall <mem> {AuxCall{reflect.Value.Interface([reflect.Value,0])[interface {},24]}} [40] v715
-               // v23 (142) = Load <*uintptr> v19 v20
-               // v823 (142) = IsNonNil <bool> v23
-               // v67 (+143) = Load <*[]*Node> v880 v20
-               // b15: ← b8
-               // v827 (146) = StructSelect <*[]*Node> [0] v67
-               // v846 (146) = Store <mem> {*[]*Node} v769 v827 v20
-               // v73 (+146) = StaticCall <mem> {AuxCall{"".Nodes.Slice([Nodes,0])[[]*Node,8]}} [32] v846
-               // i.e., the struct select is generated and remains in because it is not applied to an actual structure.
-               // The OpLoad was created to load the single field of the IData
-               // This case removes that StructSelect.
-               if leafType != selector.Type {
-                       if x.f.Config.SoftFloat && selector.Type.IsFloat() {
-                               if x.debug > 1 {
-                                       x.Printf("---OpLoad, break\n")
-                               }
-                               break // softfloat pass will take care of that
+       case types.TSTRUCT:
+               for i := 0; i < at.NumFields(); i++ {
+                       et := at.Field(i).Type // might need to read offsets from the fields
+                       e := b.NewValue1I(pos, OpStructSelect, et, int64(i), a)
+                       pos = pos.WithNotStmt()
+                       if x.debug > 1 {
+                               x.Printf("...recur decompose %s, %v\n", e.LongString(), et)
                         }
-                       x.f.Fatalf("Unexpected Load as selector, leaf=%s, selector=%s\n", leaf.LongString(), selector.LongString())
-               }
-               leaf.copyOf(selector)
-               for _, s := range x.namedSelects[selector] {
-                       locs = append(locs, x.f.Names[s.locIndex])
+                       mem = x.decomposeAsNecessary(pos, b, e, mem, rc.next(et))
                 }
+               return mem
  
-       case OpSelectN:
-               // TODO(register args) result case
-               // if applied to Op-mumble-call, the Aux tells us which result, regOffset specifies offset within result.  If a register, should rewrite to OpSelectN for new call.
-               // TODO these may be duplicated. Should memoize. Intermediate selectors will go dead, no worries there.
-               call := selector.Args[0]
-               call0 := call
-               aux := call.Aux.(*AuxCall)
-               which := selector.AuxInt
-               if x.transformedSelects[selector.ID] {
-                       // This is a minor hack.  Either this select has had its operand adjusted (mem) or
-                       // it is some other intermediate node that was rewritten to reference a register (not a generic arg).
-                       // This can occur with chains of selection/indexing from single field/element aggregates.
-                       leaf.copyOf(selector)
-                       break
-               }
-               if which == aux.NResults() { // mem is after the results.
-                       // rewrite v as a Copy of call -- the replacement call will produce a mem.
-                       if leaf != selector {
-                               panic(fmt.Errorf("Unexpected selector of memory, selector=%s, call=%s, leaf=%s", selector.LongString(), call.LongString(), leaf.LongString()))
-                       }
-                       if aux.abiInfo == nil {
-                               panic(badVal("aux.abiInfo nil for call", call))
-                       }
-                       if existing := x.memForCall[call.ID]; existing == nil {
-                               selector.AuxInt = int64(aux.abiInfo.OutRegistersUsed())
-                               x.memForCall[call.ID] = selector
-                               x.transformedSelects[selector.ID] = true // operand adjusted
-                       } else {
-                               selector.copyOf(existing)
-                       }
+       case types.TSLICE:
+               mem = x.decomposeOne(pos, b, a, mem, x.typs.BytePtr, OpSlicePtr, &rc)
+               pos = pos.WithNotStmt()
+               mem = x.decomposeOne(pos, b, a, mem, x.typs.Int, OpSliceLen, &rc)
+               return x.decomposeOne(pos, b, a, mem, x.typs.Int, OpSliceCap, &rc)
  
-               } else {
-                       leafType := removeTrivialWrapperTypes(leaf.Type)
-                       if CanSSA(leafType) {
-                               pt := types.NewPtr(leafType)
-                               // Any selection right out of the arg area/registers has to be same Block as call, use call as mem input.
-                               // Create a "mem" for any loads that need to occur.
-                               if mem := x.memForCall[call.ID]; mem != nil {
-                                       if mem.Block != call.Block {
-                                               panic(fmt.Errorf("selector and call need to be in same block, selector=%s; call=%s", selector.LongString(), call.LongString()))
-                                       }
-                                       call = mem
-                               } else {
-                                       mem = call.Block.NewValue1I(call.Pos.WithNotStmt(), OpSelectN, types.TypeMem, int64(aux.abiInfo.OutRegistersUsed()), call)
-                                       x.transformedSelects[mem.ID] = true // select uses post-expansion indexing
-                                       x.memForCall[call.ID] = mem
-                                       call = mem
-                               }
-                               outParam := aux.abiInfo.OutParam(int(which))
-                               if len(outParam.Registers) > 0 {
-                                       firstReg := uint32(0)
-                                       for i := 0; i < int(which); i++ {
-                                               firstReg += uint32(len(aux.abiInfo.OutParam(i).Registers))
-                                       }
-                                       reg := int64(regOffset + Abi1RO(firstReg))
-                                       if leaf.Block == call.Block {
-                                               leaf.reset(OpSelectN)
-                                               leaf.SetArgs1(call0)
-                                               leaf.Type = leafType
-                                               leaf.AuxInt = reg
-                                               x.transformedSelects[leaf.ID] = true // leaf, rewritten to use post-expansion indexing.
-                                       } else {
-                                               w := call.Block.NewValue1I(leaf.Pos, OpSelectN, leafType, reg, call0)
-                                               x.transformedSelects[w.ID] = true // select, using post-expansion indexing.
-                                               leaf.copyOf(w)
-                                       }
-                               } else {
-                                       off := x.offsetFrom(x.f.Entry, x.sp, offset+aux.OffsetOfResult(which), pt)
-                                       if leaf.Block == call.Block {
-                                               leaf.reset(OpLoad)
-                                               leaf.SetArgs2(off, call)
-                                               leaf.Type = leafType
-                                       } else {
-                                               w := call.Block.NewValue2(leaf.Pos, OpLoad, leafType, off, call)
-                                               leaf.copyOf(w)
-                                               if x.debug > 1 {
-                                                       x.Printf("---new %s\n", w.LongString())
-                                               }
-                                       }
-                               }
-                               for _, s := range x.namedSelects[selector] {
-                                       locs = append(locs, x.f.Names[s.locIndex])
-                               }
-                       } else {
-                               x.f.Fatalf("Should not have non-SSA-able OpSelectN, selector=%s", selector.LongString())
-                       }
-               }
+       case types.TSTRING:
+               return x.decomposePair(pos, b, a, mem, x.typs.BytePtr, x.typs.Int, OpStringPtr, OpStringLen, &rc)
  
-       case OpStructSelect:
-               w := selector.Args[0]
-               var ls []*LocalSlot
-               if w.Type.Kind() != types.TSTRUCT { // IData artifact
-                       ls = x.rewriteSelect(leaf, w, offset, regOffset)
-               } else {
-                       fldi := int(selector.AuxInt)
-                       ls = x.rewriteSelect(leaf, w, offset+w.Type.FieldOff(fldi), regOffset+x.regOffset(w.Type, fldi))
-                       if w.Op != OpIData {
-                               for _, l := range ls {
-                                       locs = append(locs, x.f.SplitStruct(l, int(selector.AuxInt)))
-                               }
+       case types.TINTER:
+               mem = x.decomposeOne(pos, b, a, mem, x.typs.Uintptr, OpITab, &rc)
+               pos = pos.WithNotStmt()
+               // Immediate interfaces cause so many headaches.
+               if a.Op == OpIMake {
+                       data := a.Args[1]
+                       for data.Op == OpStructMake1 || data.Op == OpArrayMake1 {
+                               data = data.Args[0]
                         }
+                       return x.decomposeAsNecessary(pos, b, data, mem, rc.next(data.Type))
                 }
+               return x.decomposeOne(pos, b, a, mem, x.typs.BytePtr, OpIData, &rc)
  
-       case OpArraySelect:
-               w := selector.Args[0]
-               index := selector.AuxInt
-               x.rewriteSelect(leaf, w, offset+selector.Type.Size()*index, regOffset+x.regOffset(w.Type, int(index)))
-
-       case OpInt64Hi:
-               w := selector.Args[0]
-               ls := x.rewriteSelect(leaf, w, offset+x.hiOffset, regOffset+x.hiRo)
-               locs = x.splitSlots(ls, ".hi", x.hiOffset, leafType)
-
-       case OpInt64Lo:
-               w := selector.Args[0]
-               ls := x.rewriteSelect(leaf, w, offset+x.lowOffset, regOffset+x.loRo)
-               locs = x.splitSlots(ls, ".lo", x.lowOffset, leafType)
-
-       case OpStringPtr:
-               ls := x.rewriteSelect(leaf, selector.Args[0], offset, regOffset)
-               locs = x.splitSlots(ls, ".ptr", 0, x.typs.BytePtr)
-
-       case OpSlicePtr, OpSlicePtrUnchecked:
-               w := selector.Args[0]
-               ls := x.rewriteSelect(leaf, w, offset, regOffset)
-               locs = x.splitSlots(ls, ".ptr", 0, types.NewPtr(w.Type.Elem()))
-
-       case OpITab:
-               w := selector.Args[0]
-               ls := x.rewriteSelect(leaf, w, offset, regOffset)
-               sfx := ".itab"
-               if w.Type.IsEmptyInterface() {
-                       sfx = ".type"
-               }
-               locs = x.splitSlots(ls, sfx, 0, x.typs.Uintptr)
-
-       case OpComplexReal:
-               ls := x.rewriteSelect(leaf, selector.Args[0], offset, regOffset)
-               locs = x.splitSlots(ls, ".real", 0, selector.Type)
-
-       case OpComplexImag:
-               ls := x.rewriteSelect(leaf, selector.Args[0], offset+selector.Type.Size(), regOffset+RO_complex_imag) // result is FloatNN, width of result is offset of imaginary part.
-               locs = x.splitSlots(ls, ".imag", selector.Type.Size(), selector.Type)
-
-       case OpStringLen, OpSliceLen:
-               ls := x.rewriteSelect(leaf, selector.Args[0], offset+x.ptrSize, regOffset+RO_slice_len)
-               locs = x.splitSlots(ls, ".len", x.ptrSize, leafType)
-
-       case OpIData:
-               ls := x.rewriteSelect(leaf, selector.Args[0], offset+x.ptrSize, regOffset+RO_iface_data)
-               locs = x.splitSlots(ls, ".data", x.ptrSize, leafType)
+       case types.TCOMPLEX64:
+               return x.decomposePair(pos, b, a, mem, x.typs.Float32, x.typs.Float32, OpComplexReal, OpComplexImag, &rc)
  
-       case OpSliceCap:
-               ls := x.rewriteSelect(leaf, selector.Args[0], offset+2*x.ptrSize, regOffset+RO_slice_cap)
-               locs = x.splitSlots(ls, ".cap", 2*x.ptrSize, leafType)
+       case types.TCOMPLEX128:
+               return x.decomposePair(pos, b, a, mem, x.typs.Float64, x.typs.Float64, OpComplexReal, OpComplexImag, &rc)
  
-       case OpCopy: // If it's an intermediate result, recurse
-               locs = x.rewriteSelect(leaf, selector.Args[0], offset, regOffset)
-               for _, s := range x.namedSelects[selector] {
-                       // this copy may have had its own name, preserve that, too.
-                       locs = append(locs, x.f.Names[s.locIndex])
+       case types.TINT64:
+               if at.Size() > x.regSize {
+                       return x.decomposePair(pos, b, a, mem, x.firstType, x.secondType, x.firstOp, x.secondOp, &rc)
+               }
+       case types.TUINT64:
+               if at.Size() > x.regSize {
+                       return x.decomposePair(pos, b, a, mem, x.typs.UInt32, x.typs.UInt32, x.firstOp, x.secondOp, &rc)
                 }
-
-       default:
-               // Ignore dead ends. These can occur if this phase is run before decompose builtin (which is not intended, but allowed).
         }
  
-       return locs
-}
+       // An atomic type, either record the register or store it and update the memory.
  
-func (x *expandState) rewriteDereference(b *Block, base, a, mem *Value, offset, size int64, typ *types.Type, pos src.XPos) *Value {
-       source := a.Args[0]
-       dst := x.offsetFrom(b, base, offset, source.Type)
-       if a.Uses == 1 && a.Block == b {
-               a.reset(OpMove)
-               a.Pos = pos
-               a.Type = types.TypeMem
-               a.Aux = typ
-               a.AuxInt = size
-               a.SetArgs3(dst, source, mem)
-               mem = a
+       if rc.hasRegs() {
+               if x.debug > 1 {
+                       x.Printf("...recur addArg %s\n", a.LongString())
+               }
+               rc.addArg(a)
         } else {
-               mem = b.NewValue3A(pos, OpMove, types.TypeMem, typ, dst, source, mem)
-               mem.AuxInt = size
+               dst := x.offsetFrom(b, rc.storeDest, rc.storeOffset, types.NewPtr(at))
+               if x.debug > 1 {
+                       x.Printf("...recur store %s at %s\n", a.LongString(), dst.LongString())
+               }
+               mem = b.NewValue3A(pos, OpStore, types.TypeMem, at, dst, a, mem)
         }
+
         return mem
  }
  
-var indexNames [1]string = [1]string{"[0]"}
-
-// pathTo returns the selection path to the leaf type at offset within container.
-// e.g. len(thing.field[0]) => ".field[0].len"
-// this is for purposes of generating names ultimately fed to a debugger.
-func (x *expandState) pathTo(container, leaf *types.Type, offset int64) string {
-       if container == leaf || offset == 0 && container.Size() == leaf.Size() {
-               return ""
-       }
-       path := ""
-outer:
-       for {
-               switch container.Kind() {
-               case types.TARRAY:
-                       container = container.Elem()
-                       if container.Size() == 0 {
-                               return path
-                       }
-                       i := offset / container.Size()
-                       offset = offset % container.Size()
-                       // If a future compiler/ABI supports larger SSA/Arg-able arrays, expand indexNames.
-                       path = path + indexNames[i]
-                       continue
-               case types.TSTRUCT:
-                       for i := 0; i < container.NumFields(); i++ {
-                               fld := container.Field(i)
-                               if fld.Offset+fld.Type.Size() > offset {
-                                       offset -= fld.Offset
-                                       path += "." + fld.Sym.Name
-                                       container = fld.Type
-                                       continue outer
-                               }
-                       }
-                       return path
-               case types.TINT64, types.TUINT64:
-                       if container.Size() == x.regSize {
-                               return path
-                       }
-                       if offset == x.hiOffset {
-                               return path + ".hi"
-                       }
-                       return path + ".lo"
-               case types.TINTER:
-                       if offset != 0 {
-                               return path + ".data"
-                       }
-                       if container.IsEmptyInterface() {
-                               return path + ".type"
-                       }
-                       return path + ".itab"
-
-               case types.TSLICE:
-                       if offset == 2*x.regSize {
-                               return path + ".cap"
-                       }
-                       fallthrough
-               case types.TSTRING:
-                       if offset == 0 {
-                               return path + ".ptr"
-                       }
-                       return path + ".len"
-               case types.TCOMPLEX64, types.TCOMPLEX128:
-                       if offset == 0 {
-                               return path + ".real"
-                       }
-                       return path + ".imag"
+// Convert scalar OpArg into the proper OpWhateverArg instruction
+// Convert scalar OpSelectN into perhaps-differently-indexed OpSelectN
+// Convert aggregate OpArg into Make of its parts (which are eventually scalars)
+// Convert aggregate OpSelectN into Make of its parts (which are eventually scalars)
+// Returns the converted value.
+//
+//   - "pos" the position for any generated instructions
+//   - "b" the block for any generated instructions
+//   - "container" the outermost OpArg/OpSelectN
+//   - "a" the instruction to overwrite, if any (only the outermost caller)
+//   - "m0" the memory arg for any loads that are necessary
+//   - "at" the type of the Arg/part
+//   - "rc" the register/memory cursor locating the various parts of the Arg.
+func (x *expandState) rewriteSelectOrArg(pos src.XPos, b *Block, container, a, m0 *Value, at *types.Type, rc registerCursor) *Value {
+
+       if at == types.TypeMem {
+               a.copyOf(m0)
+               return a
+       }
+
+       makeOf := func(a *Value, op Op, args []*Value) *Value {
+               if a == nil {
+                       a = b.NewValue0(pos, op, at)
+                       a.AddArgs(args...)
+               } else {
+                       a.resetArgs()
+                       a.Aux, a.AuxInt = nil, 0
+                       a.Pos, a.Op, a.Type = pos, op, at
+                       a.AddArgs(args...)
                 }
-               return path
+               return a
         }
-}
  
-// decomposeArg is a helper for storeArgOrLoad.
-// It decomposes a Load or an Arg into smaller parts and returns the new mem.
-// If the type does not match one of the expected aggregate types, it returns nil instead.
-// Parameters:
-//
-//     pos           -- the location of any generated code.
-//     b             -- the block into which any generated code should normally be placed
-//     source        -- the value, possibly an aggregate, to be stored.
-//     mem           -- the mem flowing into this decomposition (loads depend on it, stores updated it)
-//     t             -- the type of the value to be stored
-//     storeOffset   -- if the value is stored in memory, it is stored at base (see storeRc) + storeOffset
-//     loadRegOffset -- regarding source as a value in registers, the register offset in ABI1.  Meaningful only if source is OpArg.
-//     storeRc       -- storeRC; if the value is stored in registers, this specifies the registers.
-//                      StoreRc also identifies whether the target is registers or memory, and has the base for the store operation.
-func (x *expandState) decomposeArg(pos src.XPos, b *Block, source, mem *Value, t *types.Type, storeOffset int64, loadRegOffset Abi1RO, storeRc registerCursor) *Value {
-
-       pa := x.prAssignForArg(source)
-       var locs []*LocalSlot
-       for _, s := range x.namedSelects[source] {
-               locs = append(locs, x.f.Names[s.locIndex])
+       if at.Size() == 0 {
+               // For consistency, create these values even though they'll ultimately be unused
+               if at.IsArray() {
+                       return makeOf(a, OpArrayMake0, nil)
+               }
+               if at.IsStruct() {
+                       return makeOf(a, OpStructMake0, nil)
+               }
+               return a
         }
  
-       if len(pa.Registers) > 0 {
-               // Handle the in-registers case directly
-               rts, offs := pa.RegisterTypesAndOffsets()
-               last := loadRegOffset + x.regWidth(t)
-               if offs[loadRegOffset] != 0 {
-                       // Document the problem before panicking.
-                       for i := 0; i < len(rts); i++ {
-                               rt := rts[i]
-                               off := offs[i]
-                               fmt.Printf("rt=%s, off=%d, rt.Width=%d, rt.Align=%d\n", rt.String(), off, rt.Size(), uint8(rt.Alignment()))
-                       }
-                       panic(fmt.Errorf("offset %d of requested register %d should be zero, source=%s", offs[loadRegOffset], loadRegOffset, source.LongString()))
+       sk := selKey{from: container, size: 0, offsetOrIndex: rc.storeOffset, typ: at}
+       dupe := x.commonSelectors[sk]
+       if dupe != nil {
+               if a == nil {
+                       return dupe
                 }
+               a.copyOf(dupe)
+               return a
+       }
  
-               if x.debug > 1 {
-                       x.Printf("decompose arg %s has %d locs\n", source.LongString(), len(locs))
-               }
+       var argStore [10]*Value
+       args := argStore[:0]
  
-               for i := loadRegOffset; i < last; i++ {
-                       rt := rts[i]
-                       off := offs[i]
-                       w := x.commonArgs[selKey{source, off, rt.Size(), rt}]
-                       if w == nil {
-                               w = x.newArgToMemOrRegs(source, w, off, i, rt, pos)
-                               suffix := x.pathTo(source.Type, rt, off)
-                               if suffix != "" {
-                                       x.splitSlotsIntoNames(locs, suffix, off, rt, w)
-                               }
+       addArg := func(a0 *Value) {
+               if a0 == nil {
+                       as := "<nil>"
+                       if a != nil {
+                               as = a.LongString()
                         }
-                       if t.IsPtrShaped() {
-                               // Preserve the original store type. This ensures pointer type
-                               // properties aren't discarded (e.g, notinheap).
-                               if rt.Size() != t.Size() || len(pa.Registers) != 1 || i != loadRegOffset {
-                                       b.Func.Fatalf("incompatible store type %v and %v, i=%d", t, rt, i)
-                               }
-                               rt = t
-                       }
-                       mem = x.storeArgOrLoad(pos, b, w, mem, rt, storeOffset+off, i, storeRc.next(rt))
+                       panic(fmt.Errorf("a0 should not be nil, a=%v, container=%v, at=%v", as, container.LongString(), at))
                 }
-               return mem
+               args = append(args, a0)
         }
  
-       u := source.Type
-       switch u.Kind() {
+       switch at.Kind() {
         case types.TARRAY:
-               elem := u.Elem()
-               elemRO := x.regWidth(elem)
-               for i := int64(0); i < u.NumElem(); i++ {
-                       elemOff := i * elem.Size()
-                       mem = storeOneArg(x, pos, b, locs, indexNames[i], source, mem, elem, elemOff, storeOffset+elemOff, loadRegOffset, storeRc.next(elem))
-                       loadRegOffset += elemRO
-                       pos = pos.WithNotStmt()
+               et := at.Elem()
+               for i := int64(0); i < at.NumElem(); i++ {
+                       e := x.rewriteSelectOrArg(pos, b, container, nil, m0, et, rc.next(et))
+                       addArg(e)
                 }
-               return mem
+               a = makeOf(a, OpArrayMake1, args)
+               x.commonSelectors[sk] = a
+               return a
+
         case types.TSTRUCT:
-               for i := 0; i < u.NumFields(); i++ {
-                       fld := u.Field(i)
-                       mem = storeOneArg(x, pos, b, locs, "."+fld.Sym.Name, source, mem, fld.Type, fld.Offset, storeOffset+fld.Offset, loadRegOffset, storeRc.next(fld.Type))
-                       loadRegOffset += x.regWidth(fld.Type)
+               // Assume ssagen/ssa.go (in buildssa) spills large aggregates so they won't appear here.
+               for i := 0; i < at.NumFields(); i++ {
+                       et := at.Field(i).Type
+                       e := x.rewriteSelectOrArg(pos, b, container, nil, m0, et, rc.next(et))
+                       if e == nil {
+                               panic(fmt.Errorf("nil e, et=%v, et.Size()=%d, i=%d", et, et.Size(), i))
+                       }
+                       addArg(e)
                         pos = pos.WithNotStmt()
                 }
-               return mem
-       case types.TINT64, types.TUINT64:
-               if t.Size() == x.regSize {
-                       break
+               if at.NumFields() > 4 {
+                       panic(fmt.Errorf("Too many fields (%d, %d bytes), container=%s", at.NumFields(), at.Size(), container.LongString()))
                 }
-               tHi, tLo := x.intPairTypes(t.Kind())
-               mem = storeOneArg(x, pos, b, locs, ".hi", source, mem, tHi, x.hiOffset, storeOffset+x.hiOffset, loadRegOffset+x.hiRo, storeRc.plus(x.hiRo))
+               a = makeOf(a, StructMakeOp(at.NumFields()), args)
+               x.commonSelectors[sk] = a
+               return a
+
+       case types.TSLICE:
+               addArg(x.rewriteSelectOrArg(pos, b, container, nil, m0, x.typs.BytePtr, rc.next(x.typs.BytePtr)))
                 pos = pos.WithNotStmt()
-               return storeOneArg(x, pos, b, locs, ".lo", source, mem, tLo, x.lowOffset, storeOffset+x.lowOffset, loadRegOffset+x.loRo, storeRc.plus(x.loRo))
-       case types.TINTER:
-               sfx := ".itab"
-               if u.IsEmptyInterface() {
-                       sfx = ".type"
-               }
-               return storeTwoArg(x, pos, b, locs, sfx, ".idata", source, mem, x.typs.Uintptr, x.typs.BytePtr, 0, storeOffset, loadRegOffset, storeRc)
+               addArg(x.rewriteSelectOrArg(pos, b, container, nil, m0, x.typs.Int, rc.next(x.typs.Int)))
+               addArg(x.rewriteSelectOrArg(pos, b, container, nil, m0, x.typs.Int, rc.next(x.typs.Int)))
+               a = makeOf(a, OpSliceMake, args)
+               x.commonSelectors[sk] = a
+               return a
+
         case types.TSTRING:
-               return storeTwoArg(x, pos, b, locs, ".ptr", ".len", source, mem, x.typs.BytePtr, x.typs.Int, 0, storeOffset, loadRegOffset, storeRc)
-       case types.TCOMPLEX64:
-               return storeTwoArg(x, pos, b, locs, ".real", ".imag", source, mem, x.typs.Float32, x.typs.Float32, 0, storeOffset, loadRegOffset, storeRc)
-       case types.TCOMPLEX128:
-               return storeTwoArg(x, pos, b, locs, ".real", ".imag", source, mem, x.typs.Float64, x.typs.Float64, 0, storeOffset, loadRegOffset, storeRc)
-       case types.TSLICE:
-               mem = storeOneArg(x, pos, b, locs, ".ptr", source, mem, x.typs.BytePtr, 0, storeOffset, loadRegOffset, storeRc.next(x.typs.BytePtr))
-               return storeTwoArg(x, pos, b, locs, ".len", ".cap", source, mem, x.typs.Int, x.typs.Int, x.ptrSize, storeOffset+x.ptrSize, loadRegOffset+RO_slice_len, storeRc)
-       }
-       return nil
-}
+               addArg(x.rewriteSelectOrArg(pos, b, container, nil, m0, x.typs.BytePtr, rc.next(x.typs.BytePtr)))
+               pos = pos.WithNotStmt()
+               addArg(x.rewriteSelectOrArg(pos, b, container, nil, m0, x.typs.Int, rc.next(x.typs.Int)))
+               a = makeOf(a, OpStringMake, args)
+               x.commonSelectors[sk] = a
+               return a
  
-func (x *expandState) splitSlotsIntoNames(locs []*LocalSlot, suffix string, off int64, rt *types.Type, w *Value) {
-       wlocs := x.splitSlots(locs, suffix, off, rt)
-       for _, l := range wlocs {
-               old, ok := x.f.NamedValues[*l]
-               x.f.NamedValues[*l] = append(old, w)
-               if !ok {
-                       x.f.Names = append(x.f.Names, l)
-               }
-       }
-}
+       case types.TINTER:
+               addArg(x.rewriteSelectOrArg(pos, b, container, nil, m0, x.typs.Uintptr, rc.next(x.typs.Uintptr)))
+               pos = pos.WithNotStmt()
+               addArg(x.rewriteSelectOrArg(pos, b, container, nil, m0, x.typs.BytePtr, rc.next(x.typs.BytePtr)))
+               a = makeOf(a, OpIMake, args)
+               x.commonSelectors[sk] = a
+               return a
  
-// decomposeLoad is a helper for storeArgOrLoad.
-// It decomposes a Load  into smaller parts and returns the new mem.
-// If the type does not match one of the expected aggregate types, it returns nil instead.
-// Parameters:
-//
-//     pos           -- the location of any generated code.
-//     b             -- the block into which any generated code should normally be placed
-//     source        -- the value, possibly an aggregate, to be stored.
-//     mem           -- the mem flowing into this decomposition (loads depend on it, stores updated it)
-//     t             -- the type of the value to be stored
-//     storeOffset   -- if the value is stored in memory, it is stored at base (see storeRc) + offset
-//     loadRegOffset -- regarding source as a value in registers, the register offset in ABI1.  Meaningful only if source is OpArg.
-//     storeRc       -- storeRC; if the value is stored in registers, this specifies the registers.
-//                      StoreRc also identifies whether the target is registers or memory, and has the base for the store operation.
-//
-// TODO -- this needs cleanup; it just works for SSA-able aggregates, and won't fully generalize to register-args aggregates.
-func (x *expandState) decomposeLoad(pos src.XPos, b *Block, source, mem *Value, t *types.Type, storeOffset int64, loadRegOffset Abi1RO, storeRc registerCursor) *Value {
-       u := source.Type
-       switch u.Kind() {
-       case types.TARRAY:
-               elem := u.Elem()
-               elemRO := x.regWidth(elem)
-               for i := int64(0); i < u.NumElem(); i++ {
-                       elemOff := i * elem.Size()
-                       mem = storeOneLoad(x, pos, b, source, mem, elem, elemOff, storeOffset+elemOff, loadRegOffset, storeRc.next(elem))
-                       loadRegOffset += elemRO
+       case types.TCOMPLEX64:
+               addArg(x.rewriteSelectOrArg(pos, b, container, nil, m0, x.typs.Float32, rc.next(x.typs.Float32)))
+               pos = pos.WithNotStmt()
+               addArg(x.rewriteSelectOrArg(pos, b, container, nil, m0, x.typs.Float32, rc.next(x.typs.Float32)))
+               a = makeOf(a, OpComplexMake, args)
+               x.commonSelectors[sk] = a
+               return a
+
+       case types.TCOMPLEX128:
+               addArg(x.rewriteSelectOrArg(pos, b, container, nil, m0, x.typs.Float64, rc.next(x.typs.Float64)))
+               pos = pos.WithNotStmt()
+               addArg(x.rewriteSelectOrArg(pos, b, container, nil, m0, x.typs.Float64, rc.next(x.typs.Float64)))
+               a = makeOf(a, OpComplexMake, args)
+               x.commonSelectors[sk] = a
+               return a
+
+       case types.TINT64:
+               if at.Size() > x.regSize {
+                       addArg(x.rewriteSelectOrArg(pos, b, container, nil, m0, x.firstType, rc.next(x.firstType)))
                         pos = pos.WithNotStmt()
+                       addArg(x.rewriteSelectOrArg(pos, b, container, nil, m0, x.secondType, rc.next(x.secondType)))
+                       if !x.f.Config.BigEndian {
+                               // Int64Make args are big, little
+                               args[0], args[1] = args[1], args[0]
+                       }
+                       a = makeOf(a, OpInt64Make, args)
+                       x.commonSelectors[sk] = a
+                       return a
                 }
-               return mem
-       case types.TSTRUCT:
-               for i := 0; i < u.NumFields(); i++ {
-                       fld := u.Field(i)
-                       mem = storeOneLoad(x, pos, b, source, mem, fld.Type, fld.Offset, storeOffset+fld.Offset, loadRegOffset, storeRc.next(fld.Type))
-                       loadRegOffset += x.regWidth(fld.Type)
+       case types.TUINT64:
+               if at.Size() > x.regSize {
+                       addArg(x.rewriteSelectOrArg(pos, b, container, nil, m0, x.typs.UInt32, rc.next(x.typs.UInt32)))
                         pos = pos.WithNotStmt()
+                       addArg(x.rewriteSelectOrArg(pos, b, container, nil, m0, x.typs.UInt32, rc.next(x.typs.UInt32)))
+                       if !x.f.Config.BigEndian {
+                               // Int64Make args are big, little
+                               args[0], args[1] = args[1], args[0]
+                       }
+                       a = makeOf(a, OpInt64Make, args)
+                       x.commonSelectors[sk] = a
+                       return a
                 }
-               return mem
-       case types.TINT64, types.TUINT64:
-               if t.Size() == x.regSize {
-                       break
-               }
-               tHi, tLo := x.intPairTypes(t.Kind())
-               mem = storeOneLoad(x, pos, b, source, mem, tHi, x.hiOffset, storeOffset+x.hiOffset, loadRegOffset+x.hiRo, storeRc.plus(x.hiRo))
-               pos = pos.WithNotStmt()
-               return storeOneLoad(x, pos, b, source, mem, tLo, x.lowOffset, storeOffset+x.lowOffset, loadRegOffset+x.loRo, storeRc.plus(x.loRo))
-       case types.TINTER:
-               return storeTwoLoad(x, pos, b, source, mem, x.typs.Uintptr, x.typs.BytePtr, 0, storeOffset, loadRegOffset, storeRc)
-       case types.TSTRING:
-               return storeTwoLoad(x, pos, b, source, mem, x.typs.BytePtr, x.typs.Int, 0, storeOffset, loadRegOffset, storeRc)
-       case types.TCOMPLEX64:
-               return storeTwoLoad(x, pos, b, source, mem, x.typs.Float32, x.typs.Float32, 0, storeOffset, loadRegOffset, storeRc)
-       case types.TCOMPLEX128:
-               return storeTwoLoad(x, pos, b, source, mem, x.typs.Float64, x.typs.Float64, 0, storeOffset, loadRegOffset, storeRc)
-       case types.TSLICE:
-               mem = storeOneLoad(x, pos, b, source, mem, x.typs.BytePtr, 0, storeOffset, loadRegOffset, storeRc.next(x.typs.BytePtr))
-               return storeTwoLoad(x, pos, b, source, mem, x.typs.Int, x.typs.Int, x.ptrSize, storeOffset+x.ptrSize, loadRegOffset+RO_slice_len, storeRc)
-       }
-       return nil
-}
-
-// storeOneArg creates a decomposed (one step) arg that is then stored.
-// pos and b locate the store instruction, source is the "base" of the value input,
-// mem is the input mem, t is the type in question, and offArg and offStore are the offsets from the respective bases.
-func storeOneArg(x *expandState, pos src.XPos, b *Block, locs []*LocalSlot, suffix string, source, mem *Value, t *types.Type, argOffset, storeOffset int64, loadRegOffset Abi1RO, storeRc registerCursor) *Value {
-       if x.debug > 1 {
-               x.indent(3)
-               defer x.indent(-3)
-               x.Printf("storeOneArg(%s;  %s;  %s; aO=%d; sO=%d; lrO=%d; %s)\n", source.LongString(), mem.String(), t.String(), argOffset, storeOffset, loadRegOffset, storeRc.String())
         }
  
-       w := x.commonArgs[selKey{source, argOffset, t.Size(), t}]
-       if w == nil {
-               w = x.newArgToMemOrRegs(source, w, argOffset, loadRegOffset, t, pos)
-               x.splitSlotsIntoNames(locs, suffix, argOffset, t, w)
-       }
-       return x.storeArgOrLoad(pos, b, w, mem, t, storeOffset, loadRegOffset, storeRc)
-}
+       // An atomic type, either record the register or store it and update the memory.
  
-// storeOneLoad creates a decomposed (one step) load that is then stored.
-func storeOneLoad(x *expandState, pos src.XPos, b *Block, source, mem *Value, t *types.Type, offArg, offStore int64, loadRegOffset Abi1RO, storeRc registerCursor) *Value {
-       from := x.offsetFrom(source.Block, source.Args[0], offArg, types.NewPtr(t))
-       w := b.NewValue2(source.Pos, OpLoad, t, from, mem)
-       return x.storeArgOrLoad(pos, b, w, mem, t, offStore, loadRegOffset, storeRc)
-}
+       // Depending on the container Op, the leaves are either OpSelectN or OpArg{Int,Float}Reg
  
-func storeTwoArg(x *expandState, pos src.XPos, b *Block, locs []*LocalSlot, suffix1 string, suffix2 string, source, mem *Value, t1, t2 *types.Type, offArg, offStore int64, loadRegOffset Abi1RO, storeRc registerCursor) *Value {
-       mem = storeOneArg(x, pos, b, locs, suffix1, source, mem, t1, offArg, offStore, loadRegOffset, storeRc.next(t1))
-       pos = pos.WithNotStmt()
-       t1Size := t1.Size()
-       return storeOneArg(x, pos, b, locs, suffix2, source, mem, t2, offArg+t1Size, offStore+t1Size, loadRegOffset+1, storeRc)
-}
+       if container.Op == OpArg {
+               if rc.hasRegs() {
+                       op, i := rc.ArgOpAndRegisterFor()
+                       name := container.Aux.(*ir.Name)
+                       a = makeOf(a, op, nil)
+                       a.AuxInt = i
+                       a.Aux = &AuxNameOffset{name, rc.storeOffset}
+               } else {
+                       key := selKey{container, rc.storeOffset, at.Size(), at}
+                       w := x.commonArgs[key]
+                       if w != nil && w.Uses != 0 {
+                               if a == nil {
+                                       a = w
+                               } else {
+                                       a.copyOf(w)
+                               }
+                       } else {
+                               if a == nil {
+                                       aux := container.Aux
+                                       auxInt := container.AuxInt + rc.storeOffset
+                                       a = container.Block.NewValue0IA(container.Pos, OpArg, at, auxInt, aux)
+                               } else {
+                                       // do nothing, the original should be okay.
+                               }
+                               x.commonArgs[key] = a
+                       }
+               }
+       } else if container.Op == OpSelectN {
+               call := container.Args[0]
+               aux := call.Aux.(*AuxCall)
+               which := container.AuxInt
  
-// storeTwoLoad creates a pair of decomposed (one step) loads that are then stored.
-// the elements of the pair must not require any additional alignment.
-func storeTwoLoad(x *expandState, pos src.XPos, b *Block, source, mem *Value, t1, t2 *types.Type, offArg, offStore int64, loadRegOffset Abi1RO, storeRc registerCursor) *Value {
-       mem = storeOneLoad(x, pos, b, source, mem, t1, offArg, offStore, loadRegOffset, storeRc.next(t1))
-       pos = pos.WithNotStmt()
-       t1Size := t1.Size()
-       return storeOneLoad(x, pos, b, source, mem, t2, offArg+t1Size, offStore+t1Size, loadRegOffset+1, storeRc)
-}
+               if at == types.TypeMem {
+                       if a != m0 || a != x.memForCall[call.ID] {
+                               panic(fmt.Errorf("Memories %s, %s, and %s should all be equal after %s", a.LongString(), m0.LongString(), x.memForCall[call.ID], call.LongString()))
+                       }
+               } else if rc.hasRegs() {
+                       firstReg := uint32(0)
+                       for i := 0; i < int(which); i++ {
+                               firstReg += uint32(len(aux.abiInfo.OutParam(i).Registers))
+                       }
+                       reg := int64(rc.nextSlice + Abi1RO(firstReg))
+                       a = makeOf(a, OpSelectN, []*Value{call})
+                       a.AuxInt = reg
+               } else {
+                       off := x.offsetFrom(x.f.Entry, x.sp, rc.storeOffset+aux.OffsetOfResult(which), types.NewPtr(at))
+                       a = makeOf(a, OpLoad, []*Value{off, m0})
+               }
  
-// storeArgOrLoad converts stores of SSA-able potentially aggregatable arguments (passed to a call) into a series of primitive-typed
-// stores of non-aggregate types.  It recursively walks up a chain of selectors until it reaches a Load or an Arg.
-// If it does not reach a Load or an Arg, nothing happens; this allows a little freedom in phase ordering.
-func (x *expandState) storeArgOrLoad(pos src.XPos, b *Block, source, mem *Value, t *types.Type, storeOffset int64, loadRegOffset Abi1RO, storeRc registerCursor) *Value {
-       if x.debug > 1 {
-               x.indent(3)
-               defer x.indent(-3)
-               x.Printf("storeArgOrLoad(%s;  %s;  %s; %d; %s)\n", source.LongString(), mem.String(), t.String(), storeOffset, storeRc.String())
+       } else {
+               panic(fmt.Errorf("Expected container OpArg or OpSelectN, saw %v instead", container.LongString()))
         }
  
-       // Start with Opcodes that can be disassembled
-       switch source.Op {
-       case OpCopy:
-               return x.storeArgOrLoad(pos, b, source.Args[0], mem, t, storeOffset, loadRegOffset, storeRc)
+       x.commonSelectors[sk] = a
+       return a
+}
  
-       case OpLoad, OpDereference:
-               ret := x.decomposeLoad(pos, b, source, mem, t, storeOffset, loadRegOffset, storeRc)
-               if ret != nil {
-                       return ret
-               }
+// rewriteWideSelectToStores handles the case of a SelectN'd result from a function call that is too large for SSA,
+// but is transferred in registers.  In this case the register cursor tracks both operands; the register sources and
+// the memory destinations.
+// This returns the memory flowing out of the last store
+func (x *expandState) rewriteWideSelectToStores(pos src.XPos, b *Block, container, m0 *Value, at *types.Type, rc registerCursor) *Value {
  
-       case OpArg:
-               ret := x.decomposeArg(pos, b, source, mem, t, storeOffset, loadRegOffset, storeRc)
-               if ret != nil {
-                       return ret
-               }
+       if at.Size() == 0 {
+               return m0
+       }
  
-       case OpArrayMake0, OpStructMake0:
-               // TODO(register args) is this correct for registers?
-               return mem
+       switch at.Kind() {
+       case types.TARRAY:
+               et := at.Elem()
+               for i := int64(0); i < at.NumElem(); i++ {
+                       m0 = x.rewriteWideSelectToStores(pos, b, container, m0, et, rc.next(et))
+               }
+               return m0
  
-       case OpStructMake1, OpStructMake2, OpStructMake3, OpStructMake4:
-               for i := 0; i < t.NumFields(); i++ {
-                       fld := t.Field(i)
-                       mem = x.storeArgOrLoad(pos, b, source.Args[i], mem, fld.Type, storeOffset+fld.Offset, 0, storeRc.next(fld.Type))
+       case types.TSTRUCT:
+               // Assume ssagen/ssa.go (in buildssa) spills large aggregates so they won't appear here.
+               for i := 0; i < at.NumFields(); i++ {
+                       et := at.Field(i).Type
+                       m0 = x.rewriteWideSelectToStores(pos, b, container, m0, et, rc.next(et))
                         pos = pos.WithNotStmt()
                 }
-               return mem
-
-       case OpArrayMake1:
-               return x.storeArgOrLoad(pos, b, source.Args[0], mem, t.Elem(), storeOffset, 0, storeRc.at(t, 0))
+               return m0
  
-       case OpInt64Make:
-               tHi, tLo := x.intPairTypes(t.Kind())
-               mem = x.storeArgOrLoad(pos, b, source.Args[0], mem, tHi, storeOffset+x.hiOffset, 0, storeRc.next(tHi))
+       case types.TSLICE:
+               m0 = x.rewriteWideSelectToStores(pos, b, container, m0, x.typs.BytePtr, rc.next(x.typs.BytePtr))
                 pos = pos.WithNotStmt()
-               return x.storeArgOrLoad(pos, b, source.Args[1], mem, tLo, storeOffset+x.lowOffset, 0, storeRc)
+               m0 = x.rewriteWideSelectToStores(pos, b, container, m0, x.typs.Int, rc.next(x.typs.Int))
+               m0 = x.rewriteWideSelectToStores(pos, b, container, m0, x.typs.Int, rc.next(x.typs.Int))
+               return m0
  
-       case OpComplexMake:
-               tPart := x.typs.Float32
-               wPart := t.Size() / 2
-               if wPart == 8 {
-                       tPart = x.typs.Float64
-               }
-               mem = x.storeArgOrLoad(pos, b, source.Args[0], mem, tPart, storeOffset, 0, storeRc.next(tPart))
+       case types.TSTRING:
+               m0 = x.rewriteWideSelectToStores(pos, b, container, m0, x.typs.BytePtr, rc.next(x.typs.BytePtr))
                 pos = pos.WithNotStmt()
-               return x.storeArgOrLoad(pos, b, source.Args[1], mem, tPart, storeOffset+wPart, 0, storeRc)
+               m0 = x.rewriteWideSelectToStores(pos, b, container, m0, x.typs.Int, rc.next(x.typs.Int))
+               return m0
  
-       case OpIMake:
-               mem = x.storeArgOrLoad(pos, b, source.Args[0], mem, x.typs.Uintptr, storeOffset, 0, storeRc.next(x.typs.Uintptr))
+       case types.TINTER:
+               m0 = x.rewriteWideSelectToStores(pos, b, container, m0, x.typs.Uintptr, rc.next(x.typs.Uintptr))
                 pos = pos.WithNotStmt()
-               return x.storeArgOrLoad(pos, b, source.Args[1], mem, x.typs.BytePtr, storeOffset+x.ptrSize, 0, storeRc)
+               m0 = x.rewriteWideSelectToStores(pos, b, container, m0, x.typs.BytePtr, rc.next(x.typs.BytePtr))
+               return m0
  
-       case OpStringMake:
-               mem = x.storeArgOrLoad(pos, b, source.Args[0], mem, x.typs.BytePtr, storeOffset, 0, storeRc.next(x.typs.BytePtr))
+       case types.TCOMPLEX64:
+               m0 = x.rewriteWideSelectToStores(pos, b, container, m0, x.typs.Float32, rc.next(x.typs.Float32))
                 pos = pos.WithNotStmt()
-               return x.storeArgOrLoad(pos, b, source.Args[1], mem, x.typs.Int, storeOffset+x.ptrSize, 0, storeRc)
+               m0 = x.rewriteWideSelectToStores(pos, b, container, m0, x.typs.Float32, rc.next(x.typs.Float32))
+               return m0
  
-       case OpSliceMake:
-               mem = x.storeArgOrLoad(pos, b, source.Args[0], mem, x.typs.BytePtr, storeOffset, 0, storeRc.next(x.typs.BytePtr))
+       case types.TCOMPLEX128:
+               m0 = x.rewriteWideSelectToStores(pos, b, container, m0, x.typs.Float64, rc.next(x.typs.Float64))
                 pos = pos.WithNotStmt()
-               mem = x.storeArgOrLoad(pos, b, source.Args[1], mem, x.typs.Int, storeOffset+x.ptrSize, 0, storeRc.next(x.typs.Int))
-               return x.storeArgOrLoad(pos, b, source.Args[2], mem, x.typs.Int, storeOffset+2*x.ptrSize, 0, storeRc)
-       }
+               m0 = x.rewriteWideSelectToStores(pos, b, container, m0, x.typs.Float64, rc.next(x.typs.Float64))
+               return m0
  
-       // For nodes that cannot be taken apart -- OpSelectN, other structure selectors.
-       switch t.Kind() {
-       case types.TARRAY:
-               elt := t.Elem()
-               if source.Type != t && t.NumElem() == 1 && elt.Size() == t.Size() && t.Size() == x.regSize {
-                       t = removeTrivialWrapperTypes(t)
-                       // it could be a leaf type, but the "leaf" could be complex64 (for example)
-                       return x.storeArgOrLoad(pos, b, source, mem, t, storeOffset, loadRegOffset, storeRc)
+       case types.TINT64:
+               if at.Size() > x.regSize {
+                       m0 = x.rewriteWideSelectToStores(pos, b, container, m0, x.firstType, rc.next(x.firstType))
+                       pos = pos.WithNotStmt()
+                       m0 = x.rewriteWideSelectToStores(pos, b, container, m0, x.secondType, rc.next(x.secondType))
+                       return m0
                 }
-               eltRO := x.regWidth(elt)
-               source.Type = t
-               for i := int64(0); i < t.NumElem(); i++ {
-                       sel := b.NewValue1I(pos, OpArraySelect, elt, i, source)
-                       mem = x.storeArgOrLoad(pos, b, sel, mem, elt, storeOffset+i*elt.Size(), loadRegOffset, storeRc.at(t, 0))
-                       loadRegOffset += eltRO
+       case types.TUINT64:
+               if at.Size() > x.regSize {
+                       m0 = x.rewriteWideSelectToStores(pos, b, container, m0, x.typs.UInt32, rc.next(x.typs.UInt32))
                         pos = pos.WithNotStmt()
+                       m0 = x.rewriteWideSelectToStores(pos, b, container, m0, x.typs.UInt32, rc.next(x.typs.UInt32))
+                       return m0
                 }
-               return mem
+       }
  
-       case types.TSTRUCT:
-               if source.Type != t && t.NumFields() == 1 && t.Field(0).Type.Size() == t.Size() && t.Size() == x.regSize {
-                       // This peculiar test deals with accesses to immediate interface data.
-                       // It works okay because everything is the same size.
-                       // Example code that triggers this can be found in go/constant/value.go, function ToComplex
-                       // v119 (+881) = IData <intVal> v6
-                       // v121 (+882) = StaticLECall <floatVal,mem> {AuxCall{"".itof([intVal,0])[floatVal,8]}} [16] v119 v1
-                       // This corresponds to the generic rewrite rule "(StructSelect [0] (IData x)) => (IData x)"
-                       // Guard against "struct{struct{*foo}}"
-                       // Other rewriting phases create minor glitches when they transform IData, for instance the
-                       // interface-typed Arg "x" of ToFloat in go/constant/value.go
-                       //   v6 (858) = Arg <Value> {x} (x[Value], x[Value])
-                       // is rewritten by decomposeArgs into
-                       //   v141 (858) = Arg <uintptr> {x}
-                       //   v139 (858) = Arg <*uint8> {x} [8]
-                       // because of a type case clause on line 862 of go/constant/value.go
-                       //      case intVal:
-                       //                 return itof(x)
-                       // v139 is later stored as an intVal == struct{val *big.Int} which naively requires the fields of
-                       // of a *uint8, which does not succeed.
-                       t = removeTrivialWrapperTypes(t)
-                       // it could be a leaf type, but the "leaf" could be complex64 (for example)
-                       return x.storeArgOrLoad(pos, b, source, mem, t, storeOffset, loadRegOffset, storeRc)
-               }
+       // TODO could change treatment of too-large OpArg, would deal with it here.
+       if container.Op == OpSelectN {
+               call := container.Args[0]
+               aux := call.Aux.(*AuxCall)
+               which := container.AuxInt
  
-               source.Type = t
-               for i := 0; i < t.NumFields(); i++ {
-                       fld := t.Field(i)
-                       sel := b.NewValue1I(pos, OpStructSelect, fld.Type, int64(i), source)
-                       mem = x.storeArgOrLoad(pos, b, sel, mem, fld.Type, storeOffset+fld.Offset, loadRegOffset, storeRc.next(fld.Type))
-                       loadRegOffset += x.regWidth(fld.Type)
-                       pos = pos.WithNotStmt()
+               if rc.hasRegs() {
+                       firstReg := uint32(0)
+                       for i := 0; i < int(which); i++ {
+                               firstReg += uint32(len(aux.abiInfo.OutParam(i).Registers))
+                       }
+                       reg := int64(rc.nextSlice + Abi1RO(firstReg))
+                       a := b.NewValue1I(pos, OpSelectN, at, reg, call)
+                       dst := x.offsetFrom(b, rc.storeDest, rc.storeOffset, types.NewPtr(at))
+                       m0 = b.NewValue3A(pos, OpStore, types.TypeMem, at, dst, a, m0)
+               } else {
+                       panic(fmt.Errorf("Expected rc to have registers"))
                 }
-               return mem
+       } else {
+               panic(fmt.Errorf("Expected container OpSelectN, saw %v instead", container.LongString()))
+       }
+       return m0
+}
  
-       case types.TINT64, types.TUINT64:
-               if t.Size() == x.regSize {
-                       break
-               }
-               tHi, tLo := x.intPairTypes(t.Kind())
-               sel := b.NewValue1(pos, OpInt64Hi, tHi, source)
-               mem = x.storeArgOrLoad(pos, b, sel, mem, tHi, storeOffset+x.hiOffset, loadRegOffset+x.hiRo, storeRc.plus(x.hiRo))
-               pos = pos.WithNotStmt()
-               sel = b.NewValue1(pos, OpInt64Lo, tLo, source)
-               return x.storeArgOrLoad(pos, b, sel, mem, tLo, storeOffset+x.lowOffset, loadRegOffset+x.loRo, storeRc.plus(x.hiRo))
+func isBlockMultiValueExit(b *Block) bool {
+       return (b.Kind == BlockRet || b.Kind == BlockRetJmp) && b.Controls[0] != nil && b.Controls[0].Op == OpMakeResult
+}
  
-       case types.TINTER:
-               sel := b.NewValue1(pos, OpITab, x.typs.BytePtr, source)
-               mem = x.storeArgOrLoad(pos, b, sel, mem, x.typs.BytePtr, storeOffset, loadRegOffset, storeRc.next(x.typs.BytePtr))
-               pos = pos.WithNotStmt()
-               sel = b.NewValue1(pos, OpIData, x.typs.BytePtr, source)
-               return x.storeArgOrLoad(pos, b, sel, mem, x.typs.BytePtr, storeOffset+x.ptrSize, loadRegOffset+RO_iface_data, storeRc)
+type Abi1RO uint8 // An offset within a parameter's slice of register indices, for abi1.
  
-       case types.TSTRING:
-               sel := b.NewValue1(pos, OpStringPtr, x.typs.BytePtr, source)
-               mem = x.storeArgOrLoad(pos, b, sel, mem, x.typs.BytePtr, storeOffset, loadRegOffset, storeRc.next(x.typs.BytePtr))
-               pos = pos.WithNotStmt()
-               sel = b.NewValue1(pos, OpStringLen, x.typs.Int, source)
-               return x.storeArgOrLoad(pos, b, sel, mem, x.typs.Int, storeOffset+x.ptrSize, loadRegOffset+RO_string_len, storeRc)
+// A registerCursor tracks which register is used for an Arg or regValues, or a piece of such.
+type registerCursor struct {
+       storeDest   *Value // if there are no register targets, then this is the base of the store.
+       storeOffset int64
+       regs        []abi.RegIndex // the registers available for this Arg/result (which is all in registers or not at all)
+       nextSlice   Abi1RO         // the next register/register-slice offset
+       config      *abi.ABIConfig
+       regValues   *[]*Value // values assigned to registers accumulate here
+}
  
-       case types.TSLICE:
-               et := types.NewPtr(t.Elem())
-               sel := b.NewValue1(pos, OpSlicePtr, et, source)
-               mem = x.storeArgOrLoad(pos, b, sel, mem, et, storeOffset, loadRegOffset, storeRc.next(et))
-               pos = pos.WithNotStmt()
-               sel = b.NewValue1(pos, OpSliceLen, x.typs.Int, source)
-               mem = x.storeArgOrLoad(pos, b, sel, mem, x.typs.Int, storeOffset+x.ptrSize, loadRegOffset+RO_slice_len, storeRc.next(x.typs.Int))
-               sel = b.NewValue1(pos, OpSliceCap, x.typs.Int, source)
-               return x.storeArgOrLoad(pos, b, sel, mem, x.typs.Int, storeOffset+2*x.ptrSize, loadRegOffset+RO_slice_cap, storeRc)
+func (c *registerCursor) String() string {
+       dest := "<none>"
+       if c.storeDest != nil {
+               dest = fmt.Sprintf("%s+%d", c.storeDest.String(), c.storeOffset)
+       }
+       regs := "<none>"
+       if c.regValues != nil {
+               regs = ""
+               for i, x := range *c.regValues {
+                       if i > 0 {
+                               regs = regs + "; "
+                       }
+                       regs = regs + x.LongString()
+               }
+       }
  
-       case types.TCOMPLEX64:
-               sel := b.NewValue1(pos, OpComplexReal, x.typs.Float32, source)
-               mem = x.storeArgOrLoad(pos, b, sel, mem, x.typs.Float32, storeOffset, loadRegOffset, storeRc.next(x.typs.Float32))
-               pos = pos.WithNotStmt()
-               sel = b.NewValue1(pos, OpComplexImag, x.typs.Float32, source)
-               return x.storeArgOrLoad(pos, b, sel, mem, x.typs.Float32, storeOffset+4, loadRegOffset+RO_complex_imag, storeRc)
+       // not printing the config because that has not been useful
+       return fmt.Sprintf("RCSR{storeDest=%v, regsLen=%d, nextSlice=%d, regValues=[%s]}", dest, len(c.regs), c.nextSlice, regs)
+}
  
-       case types.TCOMPLEX128:
-               sel := b.NewValue1(pos, OpComplexReal, x.typs.Float64, source)
-               mem = x.storeArgOrLoad(pos, b, sel, mem, x.typs.Float64, storeOffset, loadRegOffset, storeRc.next(x.typs.Float64))
-               pos = pos.WithNotStmt()
-               sel = b.NewValue1(pos, OpComplexImag, x.typs.Float64, source)
-               return x.storeArgOrLoad(pos, b, sel, mem, x.typs.Float64, storeOffset+8, loadRegOffset+RO_complex_imag, storeRc)
+// next effectively post-increments the register cursor; the receiver is advanced,
+// the (aligned) old value is returned.
+func (c *registerCursor) next(t *types.Type) registerCursor {
+       c.storeOffset = types.RoundUp(c.storeOffset, t.Alignment())
+       rc := *c
+       c.storeOffset = types.RoundUp(c.storeOffset+t.Size(), t.Alignment())
+       if int(c.nextSlice) < len(c.regs) {
+               w := c.config.NumParamRegs(t)
+               c.nextSlice += Abi1RO(w)
         }
+       return rc
+}
+
+// plus returns a register cursor offset from the original, without modifying the original.
+func (c *registerCursor) plus(regWidth Abi1RO) registerCursor {
+       rc := *c
+       rc.nextSlice += regWidth
+       return rc
+}
  
-       s := mem
-       if source.Op == OpDereference {
-               source.Op = OpLoad // For purposes of parameter passing expansion, a Dereference is a Load.
+// at returns the register cursor for component i of t, where the first
+// component is numbered 0.
+func (c *registerCursor) at(t *types.Type, i int) registerCursor {
+       rc := *c
+       if i == 0 || len(c.regs) == 0 {
+               return rc
         }
-       if storeRc.hasRegs() {
-               storeRc.addArg(source)
-       } else {
-               dst := x.offsetFrom(b, storeRc.storeDest, storeOffset, types.NewPtr(t))
-               s = b.NewValue3A(pos, OpStore, types.TypeMem, t, dst, source, mem)
+       if t.IsArray() {
+               w := c.config.NumParamRegs(t.Elem())
+               rc.nextSlice += Abi1RO(i * w)
+               return rc
         }
-       if x.debug > 1 {
-               x.Printf("-->storeArg returns %s, storeRc=%s\n", s.LongString(), storeRc.String())
+       if t.IsStruct() {
+               for j := 0; j < i; j++ {
+                       rc.next(t.FieldType(j))
+               }
+               return rc
         }
-       return s
+       panic("Haven't implemented this case yet, do I need to?")
  }
  
-// rewriteArgs replaces all the call-parameter Args to a call with their register translation (if any).
-// Preceding parameters (code pointers, closure pointer) are preserved, and the memory input is modified
-// to account for any parameter stores required.
-// Any of the old Args that have their use count fall to zero are marked OpInvalid.
-func (x *expandState) rewriteArgs(v *Value, firstArg int) {
-       if x.debug > 1 {
-               x.indent(3)
-               defer x.indent(-3)
-               x.Printf("rewriteArgs(%s; %d)\n", v.LongString(), firstArg)
-       }
-       // Thread the stores on the memory arg
-       aux := v.Aux.(*AuxCall)
-       m0 := v.MemoryArg()
-       mem := m0
-       newArgs := []*Value{}
-       oldArgs := []*Value{}
-       sp := x.sp
-       if v.Op == OpTailLECall {
-               // For tail call, we unwind the frame before the call so we'll use the caller's
-               // SP.
-               sp = x.f.Entry.NewValue1(src.NoXPos, OpGetCallerSP, x.typs.Uintptr, mem)
-       }
-       for i, a := range v.Args[firstArg : len(v.Args)-1] { // skip leading non-parameter SSA Args and trailing mem SSA Arg.
-               oldArgs = append(oldArgs, a)
-               auxI := int64(i)
-               aRegs := aux.RegsOfArg(auxI)
-               aType := aux.TypeOfArg(auxI)
-               if len(aRegs) == 0 && a.Op == OpDereference {
-                       aOffset := aux.OffsetOfArg(auxI)
-                       if a.MemoryArg() != m0 {
-                               x.f.Fatalf("Op...LECall and OpDereference have mismatched mem, %s and %s", v.LongString(), a.LongString())
-                       }
-                       if v.Op == OpTailLECall {
-                               // It's common for a tail call passing the same arguments (e.g. method wrapper),
-                               // so this would be a self copy. Detect this and optimize it out.
-                               a0 := a.Args[0]
-                               if a0.Op == OpLocalAddr {
-                                       n := a0.Aux.(*ir.Name)
-                                       if n.Class == ir.PPARAM && n.FrameOffset()+x.f.Config.ctxt.Arch.FixedFrameSize == aOffset {
-                                               continue
-                                       }
-                               }
-                       }
-                       if x.debug > 1 {
-                               x.Printf("...storeArg %s, %v, %d\n", a.LongString(), aType, aOffset)
-                       }
-                       // "Dereference" of addressed (probably not-SSA-eligible) value becomes Move
-                       // TODO(register args) this will be more complicated with registers in the picture.
-                       mem = x.rewriteDereference(v.Block, sp, a, mem, aOffset, aux.SizeOfArg(auxI), aType, v.Pos)
-               } else {
-                       var rc registerCursor
-                       var result *[]*Value
-                       var aOffset int64
-                       if len(aRegs) > 0 {
-                               result = &newArgs
-                       } else {
-                               aOffset = aux.OffsetOfArg(auxI)
-                       }
-                       if v.Op == OpTailLECall && a.Op == OpArg && a.AuxInt == 0 {
-                               // It's common for a tail call passing the same arguments (e.g. method wrapper),
-                               // so this would be a self copy. Detect this and optimize it out.
-                               n := a.Aux.(*ir.Name)
-                               if n.Class == ir.PPARAM && n.FrameOffset()+x.f.Config.ctxt.Arch.FixedFrameSize == aOffset {
-                                       continue
-                               }
-                       }
-                       if x.debug > 1 {
-                               x.Printf("...storeArg %s, %v, %d\n", a.LongString(), aType, aOffset)
-                       }
-                       rc.init(aRegs, aux.abiInfo, result, sp)
-                       mem = x.storeArgOrLoad(v.Pos, v.Block, a, mem, aType, aOffset, 0, rc)
-               }
-       }
-       var preArgStore [2]*Value
-       preArgs := append(preArgStore[:0], v.Args[0:firstArg]...)
-       v.resetArgs()
-       v.AddArgs(preArgs...)
-       v.AddArgs(newArgs...)
-       v.AddArg(mem)
-       for _, a := range oldArgs {
-               if a.Uses == 0 {
-                       x.invalidateRecursively(a)
-               }
-       }
-
-       return
+func (c *registerCursor) init(regs []abi.RegIndex, info *abi.ABIParamResultInfo, result *[]*Value, storeDest *Value, storeOffset int64) {
+       c.regs = regs
+       c.nextSlice = 0
+       c.storeOffset = storeOffset
+       c.storeDest = storeDest
+       c.config = info.Config()
+       c.regValues = result
  }
  
-func (x *expandState) invalidateRecursively(a *Value) {
-       var s string
-       if x.debug > 0 {
-               plus := " "
-               if a.Pos.IsStmt() == src.PosIsStmt {
-                       plus = " +"
-               }
-               s = a.String() + plus + a.Pos.LineNumber() + " " + a.LongString()
-               if x.debug > 1 {
-                       x.Printf("...marking %v unused\n", s)
-               }
-       }
-       lost := a.invalidateRecursively()
-       if x.debug&1 != 0 && lost { // For odd values of x.debug, do this.
-               x.Printf("Lost statement marker in %s on former %s\n", base.Ctxt.Pkgpath+"."+x.f.Name, s)
-       }
+func (c *registerCursor) addArg(v *Value) {
+       *c.regValues = append(*c.regValues, v)
  }
  
-// expandCalls converts LE (Late Expansion) calls that act like they receive value args into a lower-level form
-// that is more oriented to a platform's ABI.  The SelectN operations that extract results are rewritten into
-// more appropriate forms, and any StructMake or ArrayMake inputs are decomposed until non-struct values are
-// reached.  On the callee side, OpArg nodes are not decomposed until this phase is run.
-// TODO results should not be lowered until this phase.
-func expandCalls(f *Func) {
-       // Calls that need lowering have some number of inputs, including a memory input,
-       // and produce a tuple of (value1, value2, ..., mem) where valueK may or may not be SSA-able.
-
-       // With the current ABI those inputs need to be converted into stores to memory,
-       // rethreading the call's memory input to the first, and the new call now receiving the last.
-
-       // With the current ABI, the outputs need to be converted to loads, which will all use the call's
-       // memory output as their input.
-       sp, _ := f.spSb()
-       x := &expandState{
-               f:                  f,
-               abi1:               f.ABI1,
-               debug:              f.pass.debug,
-               regSize:            f.Config.RegSize,
-               sp:                 sp,
-               typs:               &f.Config.Types,
-               ptrSize:            f.Config.PtrSize,
-               namedSelects:       make(map[*Value][]namedVal),
-               sdom:               f.Sdom(),
-               commonArgs:         make(map[selKey]*Value),
-               memForCall:         make(map[ID]*Value),
-               transformedSelects: make(map[ID]bool),
-       }
-
-       // For 32-bit, need to deal with decomposition of 64-bit integers, which depends on endianness.
-       if f.Config.BigEndian {
-               x.lowOffset, x.hiOffset = 4, 0
-               x.loRo, x.hiRo = 1, 0
-       } else {
-               x.lowOffset, x.hiOffset = 0, 4
-               x.loRo, x.hiRo = 0, 1
-       }
-
-       if x.debug > 1 {
-               x.Printf("\nexpandsCalls(%s)\n", f.Name)
-       }
-
-       for i, name := range f.Names {
-               t := name.Type
-               if x.isAlreadyExpandedAggregateType(t) {
-                       for j, v := range f.NamedValues[*name] {
-                               if v.Op == OpSelectN || v.Op == OpArg && x.isAlreadyExpandedAggregateType(v.Type) {
-                                       ns := x.namedSelects[v]
-                                       x.namedSelects[v] = append(ns, namedVal{locIndex: i, valIndex: j})
-                               }
-                       }
-               }
-       }
-
-       // TODO if too slow, whole program iteration can be replaced w/ slices of appropriate values, accumulated in first loop here.
+func (c *registerCursor) hasRegs() bool {
+       return len(c.regs) > 0
+}
  
-       // Step 0: rewrite the calls to convert args to calls into stores/register movement.
-       for _, b := range f.Blocks {
-               for _, v := range b.Values {
-                       firstArg := 0
-                       switch v.Op {
-                       case OpStaticLECall, OpTailLECall:
-                       case OpInterLECall:
-                               firstArg = 1
-                       case OpClosureLECall:
-                               firstArg = 2
-                       default:
-                               continue
-                       }
-                       x.rewriteArgs(v, firstArg)
-               }
-               if isBlockMultiValueExit(b) {
-                       x.indent(3)
-                       // Very similar to code in rewriteArgs, but results instead of args.
-                       v := b.Controls[0]
-                       m0 := v.MemoryArg()
-                       mem := m0
-                       aux := f.OwnAux
-                       allResults := []*Value{}
-                       if x.debug > 1 {
-                               x.Printf("multiValueExit rewriting %s\n", v.LongString())
-                       }
-                       var oldArgs []*Value
-                       for j, a := range v.Args[:len(v.Args)-1] {
-                               oldArgs = append(oldArgs, a)
-                               i := int64(j)
-                               auxType := aux.TypeOfResult(i)
-                               auxBase := b.NewValue2A(v.Pos, OpLocalAddr, types.NewPtr(auxType), aux.NameOfResult(i), x.sp, mem)
-                               auxOffset := int64(0)
-                               auxSize := aux.SizeOfResult(i)
-                               aRegs := aux.RegsOfResult(int64(j))
-                               if len(aRegs) == 0 && a.Op == OpDereference {
-                                       // Avoid a self-move, and if one is detected try to remove the already-inserted VarDef for the assignment that won't happen.
-                                       if dAddr, dMem := a.Args[0], a.Args[1]; dAddr.Op == OpLocalAddr && dAddr.Args[0].Op == OpSP &&
-                                               dAddr.Args[1] == dMem && dAddr.Aux == aux.NameOfResult(i) {
-                                               if dMem.Op == OpVarDef && dMem.Aux == dAddr.Aux {
-                                                       dMem.copyOf(dMem.MemoryArg()) // elide the VarDef
-                                               }
-                                               continue
-                                       }
-                                       mem = x.rewriteDereference(v.Block, auxBase, a, mem, auxOffset, auxSize, auxType, a.Pos)
-                               } else {
-                                       if a.Op == OpLoad && a.Args[0].Op == OpLocalAddr {
-                                               addr := a.Args[0] // This is a self-move. // TODO(register args) do what here for registers?
-                                               if addr.MemoryArg() == a.MemoryArg() && addr.Aux == aux.NameOfResult(i) {
-                                                       continue
-                                               }
-                                       }
-                                       var rc registerCursor
-                                       var result *[]*Value
-                                       if len(aRegs) > 0 {
-                                               result = &allResults
-                                       }
-                                       rc.init(aRegs, aux.abiInfo, result, auxBase)
-                                       mem = x.storeArgOrLoad(v.Pos, b, a, mem, aux.TypeOfResult(i), auxOffset, 0, rc)
-                               }
-                       }
-                       v.resetArgs()
-                       v.AddArgs(allResults...)
-                       v.AddArg(mem)
-                       v.Type = types.NewResults(append(abi.RegisterTypes(aux.abiInfo.OutParams()), types.TypeMem))
-                       b.SetControl(v)
-                       for _, a := range oldArgs {
-                               if a.Uses == 0 {
-                                       if x.debug > 1 {
-                                               x.Printf("...marking %v unused\n", a.LongString())
-                                       }
-                                       x.invalidateRecursively(a)
-                               }
-                       }
-                       if x.debug > 1 {
-                               x.Printf("...multiValueExit new result %s\n", v.LongString())
-                       }
-                       x.indent(-3)
-               }
-       }
+func (c *registerCursor) ArgOpAndRegisterFor() (Op, int64) {
+       r := c.regs[c.nextSlice]
+       return ArgOpAndRegisterFor(r, c.config)
+}
  
-       // Step 1: any stores of aggregates remaining are believed to be sourced from call results or args.
-       // Decompose those stores into a series of smaller stores, adding selection ops as necessary.
-       for _, b := range f.Blocks {
-               for _, v := range b.Values {
-                       if v.Op == OpStore {
-                               t := v.Aux.(*types.Type)
-                               source := v.Args[1]
-                               tSrc := source.Type
-                               iAEATt := x.isAlreadyExpandedAggregateType(t)
-
-                               if !iAEATt {
-                                       // guarding against store immediate struct into interface data field -- store type is *uint8
-                                       // TODO can this happen recursively?
-                                       iAEATt = x.isAlreadyExpandedAggregateType(tSrc)
-                                       if iAEATt {
-                                               t = tSrc
-                                       }
-                               }
-                               dst, mem := v.Args[0], v.Args[2]
-                               mem = x.storeArgOrLoad(v.Pos, b, source, mem, t, 0, 0, registerCursor{storeDest: dst})
-                               v.copyOf(mem)
-                       }
-               }
+// ArgOpAndRegisterFor converts an abi register index into an ssa Op and corresponding
+// arg register index.
+func ArgOpAndRegisterFor(r abi.RegIndex, abiConfig *abi.ABIConfig) (Op, int64) {
+       i := abiConfig.FloatIndexFor(r)
+       if i >= 0 { // float PR
+               return OpArgFloatReg, i
         }
+       return OpArgIntReg, int64(r)
+}
  
-       val2Preds := make(map[*Value]int32) // Used to accumulate dependency graph of selection operations for topological ordering.
-
-       // Step 2: transform or accumulate selection operations for rewrite in topological order.
-       //
-       // Aggregate types that have already (in earlier phases) been transformed must be lowered comprehensively to finish
-       // the transformation (user-defined structs and arrays, slices, strings, interfaces, complex, 64-bit on 32-bit architectures),
-       //
-       // Any select-for-addressing applied to call results can be transformed directly.
-       for _, b := range f.Blocks {
-               for _, v := range b.Values {
-                       // Accumulate chains of selectors for processing in topological order
-                       switch v.Op {
-                       case OpStructSelect, OpArraySelect,
-                               OpIData, OpITab,
-                               OpStringPtr, OpStringLen,
-                               OpSlicePtr, OpSliceLen, OpSliceCap, OpSlicePtrUnchecked,
-                               OpComplexReal, OpComplexImag,
-                               OpInt64Hi, OpInt64Lo:
-                               w := v.Args[0]
-                               switch w.Op {
-                               case OpStructSelect, OpArraySelect, OpSelectN, OpArg:
-                                       val2Preds[w] += 1
-                                       if x.debug > 1 {
-                                               x.Printf("v2p[%s] = %d\n", w.LongString(), val2Preds[w])
-                                       }
-                               }
-                               fallthrough
-
-                       case OpSelectN:
-                               if _, ok := val2Preds[v]; !ok {
-                                       val2Preds[v] = 0
-                                       if x.debug > 1 {
-                                               x.Printf("v2p[%s] = %d\n", v.LongString(), val2Preds[v])
-                                       }
-                               }
+type selKey struct {
+       from          *Value // what is selected from
+       offsetOrIndex int64  // whatever is appropriate for the selector
+       size          int64
+       typ           *types.Type
+}
  
-                       case OpArg:
-                               if !x.isAlreadyExpandedAggregateType(v.Type) {
-                                       continue
-                               }
-                               if _, ok := val2Preds[v]; !ok {
-                                       val2Preds[v] = 0
-                                       if x.debug > 1 {
-                                               x.Printf("v2p[%s] = %d\n", v.LongString(), val2Preds[v])
-                                       }
-                               }
+type expandState struct {
+       f       *Func
+       debug   int // odd values log lost statement markers, so likely settings are 1 (stmts), 2 (expansion), and 3 (both)
+       regSize int64
+       sp      *Value
+       typs    *Types
+
+       firstOp    Op          // for 64-bit integers on 32-bit machines, first word in memory
+       secondOp   Op          // for 64-bit integers on 32-bit machines, second word in memory
+       firstType  *types.Type // first half type, for Int64
+       secondType *types.Type // second half type, for Int64
+
+       wideSelects     map[*Value]*Value // Selects that are not SSA-able, mapped to consuming stores.
+       commonSelectors map[selKey]*Value // used to de-dupe selectors
+       commonArgs      map[selKey]*Value // used to de-dupe OpArg/OpArgIntReg/OpArgFloatReg
+       memForCall      map[ID]*Value     // For a call, need to know the unique selector that gets the mem.
+       indentLevel     int               // Indentation for debugging recursion
+}
  
-                       case OpSelectNAddr:
-                               // Do these directly, there are no chains of selectors.
-                               call := v.Args[0]
-                               which := v.AuxInt
-                               aux := call.Aux.(*AuxCall)
-                               pt := v.Type
-                               off := x.offsetFrom(x.f.Entry, x.sp, aux.OffsetOfResult(which), pt)
-                               v.copyOf(off)
-                       }
-               }
+// intPairTypes returns the pair of 32-bit int types needed to encode a 64-bit integer type on a target
+// that has no 64-bit integer registers.
+func (x *expandState) intPairTypes(et types.Kind) (tHi, tLo *types.Type) {
+       tHi = x.typs.UInt32
+       if et == types.TINT64 {
+               tHi = x.typs.Int32
         }
+       tLo = x.typs.UInt32
+       return
+}
  
-       // Step 3: Compute topological order of selectors,
-       // then process it in reverse to eliminate duplicates,
-       // then forwards to rewrite selectors.
-       //
-       // All chains of selectors end up in same block as the call.
-
-       // Compilation must be deterministic, so sort after extracting first zeroes from map.
-       // Sorting allows dominators-last order within each batch,
-       // so that the backwards scan for duplicates will most often find copies from dominating blocks (it is best-effort).
-       var toProcess []*Value
-       less := func(i, j int) bool {
-               vi, vj := toProcess[i], toProcess[j]
-               bi, bj := vi.Block, vj.Block
-               if bi == bj {
-                       return vi.ID < vj.ID
+// offsetFrom creates an offset from a pointer, simplifying chained offsets and offsets from SP
+func (x *expandState) offsetFrom(b *Block, from *Value, offset int64, pt *types.Type) *Value {
+       ft := from.Type
+       if offset == 0 {
+               if ft == pt {
+                       return from
                 }
-               return x.sdom.domorder(bi) > x.sdom.domorder(bj) // reverse the order to put dominators last.
-       }
-
-       // Accumulate order in allOrdered
-       var allOrdered []*Value
-       for v, n := range val2Preds {
-               if n == 0 {
-                       allOrdered = append(allOrdered, v)
+               // This captures common, (apparently) safe cases.  The unsafe cases involve ft == uintptr
+               if (ft.IsPtr() || ft.IsUnsafePtr()) && pt.IsPtr() {
+                       return from
                 }
         }
-       last := 0 // allOrdered[0:last] has been top-sorted and processed
-       for len(val2Preds) > 0 {
-               toProcess = allOrdered[last:]
-               last = len(allOrdered)
-               sort.SliceStable(toProcess, less)
-               for _, v := range toProcess {
-                       delete(val2Preds, v)
-                       if v.Op == OpArg {
-                               continue // no Args[0], hence done.
-                       }
-                       w := v.Args[0]
-                       n, ok := val2Preds[w]
-                       if !ok {
-                               continue
-                       }
-                       if n == 1 {
-                               allOrdered = append(allOrdered, w)
-                               delete(val2Preds, w)
-                               continue
-                       }
-                       val2Preds[w] = n - 1
-               }
+       // Simplify, canonicalize
+       for from.Op == OpOffPtr {
+               offset += from.AuxInt
+               from = from.Args[0]
         }
-
-       x.commonSelectors = make(map[selKey]*Value)
-       // Rewrite duplicate selectors as copies where possible.
-       for i := len(allOrdered) - 1; i >= 0; i-- {
-               v := allOrdered[i]
-               if v.Op == OpArg {
-                       continue
-               }
-               w := v.Args[0]
-               if w.Op == OpCopy {
-                       for w.Op == OpCopy {
-                               w = w.Args[0]
-                       }
-                       v.SetArg(0, w)
-               }
-               typ := v.Type
-               if typ.IsMemory() {
-                       continue // handled elsewhere, not an indexable result
-               }
-               size := typ.Size()
-               offset := int64(0)
-               switch v.Op {
-               case OpStructSelect:
-                       if w.Type.Kind() == types.TSTRUCT {
-                               offset = w.Type.FieldOff(int(v.AuxInt))
-                       } else { // Immediate interface data artifact, offset is zero.
-                               f.Fatalf("Expand calls interface data problem, func %s, v=%s, w=%s\n", f.Name, v.LongString(), w.LongString())
-                       }
-               case OpArraySelect:
-                       offset = size * v.AuxInt
-               case OpSelectN:
-                       offset = v.AuxInt // offset is just a key, really.
-               case OpInt64Hi:
-                       offset = x.hiOffset
-               case OpInt64Lo:
-                       offset = x.lowOffset
-               case OpStringLen, OpSliceLen, OpIData:
-                       offset = x.ptrSize
-               case OpSliceCap:
-                       offset = 2 * x.ptrSize
-               case OpComplexImag:
-                       offset = size
-               }
-               sk := selKey{from: w, size: size, offsetOrIndex: offset, typ: typ}
-               dupe := x.commonSelectors[sk]
-               if dupe == nil {
-                       x.commonSelectors[sk] = v
-               } else if x.sdom.IsAncestorEq(dupe.Block, v.Block) {
-                       if x.debug > 1 {
-                               x.Printf("Duplicate, make %s copy of %s\n", v, dupe)
-                       }
-                       v.copyOf(dupe)
-               } else {
-                       // Because values are processed in dominator order, the old common[s] will never dominate after a miss is seen.
-                       // Installing the new value might match some future values.
-                       x.commonSelectors[sk] = v
-               }
+       if from == x.sp {
+               return x.f.ConstOffPtrSP(pt, offset, x.sp)
         }
+       return b.NewValue1I(from.Pos.WithNotStmt(), OpOffPtr, pt, offset, from)
+}
  
-       // Indices of entries in f.Names that need to be deleted.
-       var toDelete []namedVal
-
-       // Rewrite selectors.
-       for i, v := range allOrdered {
-               if x.debug > 1 {
-                       b := v.Block
-                       x.Printf("allOrdered[%d] = b%d, %s, uses=%d\n", i, b.ID, v.LongString(), v.Uses)
-               }
-               if v.Uses == 0 {
-                       x.invalidateRecursively(v)
-                       continue
-               }
-               if v.Op == OpCopy {
-                       continue
-               }
-               locs := x.rewriteSelect(v, v, 0, 0)
-               // Install new names.
-               if v.Type.IsMemory() {
-                       continue
-               }
-               // Leaf types may have debug locations
-               if !x.isAlreadyExpandedAggregateType(v.Type) {
-                       for _, l := range locs {
-                               if _, ok := f.NamedValues[*l]; !ok {
-                                       f.Names = append(f.Names, l)
-                               }
-                               f.NamedValues[*l] = append(f.NamedValues[*l], v)
-                       }
-                       continue
-               }
-               if ns, ok := x.namedSelects[v]; ok {
-                       // Not-leaf types that had debug locations need to lose them.
+func (x *expandState) regWidth(t *types.Type) Abi1RO {
+       return Abi1RO(x.f.ABI1.NumParamRegs(t))
+}
  
-                       toDelete = append(toDelete, ns...)
-               }
+// regOffset returns the register offset of the i'th element of type t
+func (x *expandState) regOffset(t *types.Type, i int) Abi1RO {
+       // TODO maybe cache this in a map if profiling recommends.
+       if i == 0 {
+               return 0
         }
-
-       deleteNamedVals(f, toDelete)
-
-       // Step 4: rewrite the calls themselves, correcting the type.
-       for _, b := range f.Blocks {
-               for _, v := range b.Values {
-                       switch v.Op {
-                       case OpArg:
-                               x.rewriteArgToMemOrRegs(v)
-                       case OpStaticLECall:
-                               v.Op = OpStaticCall
-                               rts := abi.RegisterTypes(v.Aux.(*AuxCall).abiInfo.OutParams())
-                               v.Type = types.NewResults(append(rts, types.TypeMem))
-                       case OpTailLECall:
-                               v.Op = OpTailCall
-                               rts := abi.RegisterTypes(v.Aux.(*AuxCall).abiInfo.OutParams())
-                               v.Type = types.NewResults(append(rts, types.TypeMem))
-                       case OpClosureLECall:
-                               v.Op = OpClosureCall
-                               rts := abi.RegisterTypes(v.Aux.(*AuxCall).abiInfo.OutParams())
-                               v.Type = types.NewResults(append(rts, types.TypeMem))
-                       case OpInterLECall:
-                               v.Op = OpInterCall
-                               rts := abi.RegisterTypes(v.Aux.(*AuxCall).abiInfo.OutParams())
-                               v.Type = types.NewResults(append(rts, types.TypeMem))
-                       }
-               }
+       if t.IsArray() {
+               return Abi1RO(i) * x.regWidth(t.Elem())
         }
-
-       // Step 5: dedup OpArgXXXReg values. Mostly it is already dedup'd by commonArgs,
-       // but there are cases that we have same OpArgXXXReg values with different types.
-       // E.g. string is sometimes decomposed as { *int8, int }, sometimes as { unsafe.Pointer, uintptr }.
-       // (Can we avoid that?)
-       var IArg, FArg [32]*Value
-       for _, v := range f.Entry.Values {
-               switch v.Op {
-               case OpArgIntReg:
-                       i := v.AuxInt
-                       if w := IArg[i]; w != nil {
-                               if w.Type.Size() != v.Type.Size() {
-                                       f.Fatalf("incompatible OpArgIntReg [%d]: %s and %s", i, v.LongString(), w.LongString())
-                               }
-                               if w.Type.IsUnsafePtr() && !v.Type.IsUnsafePtr() {
-                                       // Update unsafe.Pointer type if we know the actual pointer type.
-                                       w.Type = v.Type
-                               }
-                               // TODO: don't dedup pointer and scalar? Rewrite to OpConvert? Can it happen?
-                               v.copyOf(w)
-                       } else {
-                               IArg[i] = v
-                       }
-               case OpArgFloatReg:
-                       i := v.AuxInt
-                       if w := FArg[i]; w != nil {
-                               if w.Type.Size() != v.Type.Size() {
-                                       f.Fatalf("incompatible OpArgFloatReg [%d]: %v and %v", i, v, w)
-                               }
-                               v.copyOf(w)
-                       } else {
-                               FArg[i] = v
-                       }
+       if t.IsStruct() {
+               k := Abi1RO(0)
+               for j := 0; j < i; j++ {
+                       k += x.regWidth(t.FieldType(j))
                 }
+               return k
         }
+       panic("Haven't implemented this case yet, do I need to?")
+}
  
-       // Step 6: elide any copies introduced.
-       // Update named values.
-       for _, name := range f.Names {
-               values := f.NamedValues[*name]
-               for i, v := range values {
-                       if v.Op == OpCopy {
-                               a := v.Args[0]
-                               for a.Op == OpCopy {
-                                       a = a.Args[0]
-                               }
-                               values[i] = a
-                       }
-               }
-       }
-       for _, b := range f.Blocks {
-               for _, v := range b.Values {
-                       for i, a := range v.Args {
-                               if a.Op != OpCopy {
-                                       continue
-                               }
-                               aa := copySource(a)
-                               v.SetArg(i, aa)
-                               for a.Uses == 0 {
-                                       b := a.Args[0]
-                                       x.invalidateRecursively(a)
-                                       a = b
-                               }
-                       }
-               }
+// prAssignForArg returns the ABIParamAssignment for v, assumed to be an OpArg.
+func (x *expandState) prAssignForArg(v *Value) *abi.ABIParamAssignment {
+       if v.Op != OpArg {
+               panic(fmt.Errorf("Wanted OpArg, instead saw %s", v.LongString()))
         }
+       return ParamAssignmentForArgName(x.f, v.Aux.(*ir.Name))
+}
  
-       // Rewriting can attach lines to values that are unlikely to survive code generation, so move them to a use.
-       for _, b := range f.Blocks {
-               for _, v := range b.Values {
-                       for _, a := range v.Args {
-                               if a.Pos.IsStmt() != src.PosIsStmt {
-                                       continue
-                               }
-                               if a.Type.IsMemory() {
-                                       continue
-                               }
-                               if a.Pos.Line() != v.Pos.Line() {
-                                       continue
-                               }
-                               if !a.Pos.SameFile(v.Pos) {
-                                       continue
-                               }
-                               switch a.Op {
-                               case OpArgIntReg, OpArgFloatReg, OpSelectN:
-                                       v.Pos = v.Pos.WithIsStmt()
-                                       a.Pos = a.Pos.WithDefaultStmt()
-                               }
-                       }
+// ParamAssignmentForArgName returns the ABIParamAssignment for f's arg with matching name.
+func ParamAssignmentForArgName(f *Func, name *ir.Name) *abi.ABIParamAssignment {
+       abiInfo := f.OwnAux.abiInfo
+       ip := abiInfo.InParams()
+       for i, a := range ip {
+               if a.Name == name {
+                       return &ip[i]
                 }
         }
+       panic(fmt.Errorf("Did not match param %v in prInfo %+v", name, abiInfo.InParams()))
  }
  
-// rewriteArgToMemOrRegs converts OpArg v in-place into the register version of v,
-// if that is appropriate.
-func (x *expandState) rewriteArgToMemOrRegs(v *Value) *Value {
-       if x.debug > 1 {
-               x.indent(3)
-               defer x.indent(-3)
-               x.Printf("rewriteArgToMemOrRegs(%s)\n", v.LongString())
-       }
-       pa := x.prAssignForArg(v)
-       switch len(pa.Registers) {
-       case 0:
-               frameOff := v.Aux.(*ir.Name).FrameOffset()
-               if pa.Offset() != int32(frameOff+x.f.ABISelf.LocalsOffset()) {
-                       panic(fmt.Errorf("Parameter assignment %d and OpArg.Aux frameOffset %d disagree, op=%s",
-                               pa.Offset(), frameOff, v.LongString()))
-               }
-       case 1:
-               t := v.Type
-               key := selKey{v, 0, t.Size(), t}
-               w := x.commonArgs[key]
-               if w != nil && w.Uses != 0 { // do not reuse dead value
-                       v.copyOf(w)
-                       break
-               }
-               r := pa.Registers[0]
-               var i int64
-               v.Op, i = ArgOpAndRegisterFor(r, x.f.ABISelf)
-               v.Aux = &AuxNameOffset{v.Aux.(*ir.Name), 0}
-               v.AuxInt = i
-               x.commonArgs[key] = v
-
-       default:
-               panic(badVal("Saw unexpanded OpArg", v))
-       }
-       if x.debug > 1 {
-               x.Printf("-->%s\n", v.LongString())
-       }
-       return v
+// indent increments (or decrements) the indentation.
+func (x *expandState) indent(n int) {
+       x.indentLevel += n
  }
  
-// newArgToMemOrRegs either rewrites toReplace into an OpArg referencing memory or into an OpArgXXXReg to a register,
-// or rewrites it into a copy of the appropriate OpArgXXX.  The actual OpArgXXX is determined by combining baseArg (an OpArg)
-// with offset, regOffset, and t to determine which portion of it to reference (either all or a part, in memory or in registers).
-func (x *expandState) newArgToMemOrRegs(baseArg, toReplace *Value, offset int64, regOffset Abi1RO, t *types.Type, pos src.XPos) *Value {
-       if x.debug > 1 {
-               x.indent(3)
-               defer x.indent(-3)
-               x.Printf("newArgToMemOrRegs(base=%s; toReplace=%s; t=%s; memOff=%d; regOff=%d)\n", baseArg.String(), toReplace.LongString(), t.String(), offset, regOffset)
-       }
-       key := selKey{baseArg, offset, t.Size(), t}
-       w := x.commonArgs[key]
-       if w != nil && w.Uses != 0 { // do not reuse dead value
-               if toReplace != nil {
-                       toReplace.copyOf(w)
-                       if x.debug > 1 {
-                               x.Printf("...replace %s\n", toReplace.LongString())
-                       }
-               }
-               if x.debug > 1 {
-                       x.Printf("-->%s\n", w.LongString())
-               }
-               return w
+// Printf does an indented fmt.Printf on the format and args.
+func (x *expandState) Printf(format string, a ...interface{}) (n int, err error) {
+       if x.indentLevel > 0 {
+               fmt.Printf("%[1]*s", x.indentLevel, "")
         }
+       return fmt.Printf(format, a...)
+}
  
-       pa := x.prAssignForArg(baseArg)
-       if len(pa.Registers) == 0 { // Arg is on stack
-               frameOff := baseArg.Aux.(*ir.Name).FrameOffset()
-               if pa.Offset() != int32(frameOff+x.f.ABISelf.LocalsOffset()) {
-                       panic(fmt.Errorf("Parameter assignment %d and OpArg.Aux frameOffset %d disagree, op=%s",
-                               pa.Offset(), frameOff, baseArg.LongString()))
-               }
-               aux := baseArg.Aux
-               auxInt := baseArg.AuxInt + offset
-               if toReplace != nil && toReplace.Block == baseArg.Block {
-                       toReplace.reset(OpArg)
-                       toReplace.Aux = aux
-                       toReplace.AuxInt = auxInt
-                       toReplace.Type = t
-                       w = toReplace
-               } else {
-                       w = baseArg.Block.NewValue0IA(baseArg.Pos, OpArg, t, auxInt, aux)
-               }
-               x.commonArgs[key] = w
-               if toReplace != nil {
-                       toReplace.copyOf(w)
+func (x *expandState) invalidateRecursively(a *Value) {
+       var s string
+       if x.debug > 0 {
+               plus := " "
+               if a.Pos.IsStmt() == src.PosIsStmt {
+                       plus = " +"
                 }
+               s = a.String() + plus + a.Pos.LineNumber() + " " + a.LongString()
                 if x.debug > 1 {
-                       x.Printf("-->%s\n", w.LongString())
+                       x.Printf("...marking %v unused\n", s)
                 }
-               return w
-       }
-       // Arg is in registers
-       r := pa.Registers[regOffset]
-       op, auxInt := ArgOpAndRegisterFor(r, x.f.ABISelf)
-       if op == OpArgIntReg && t.IsFloat() || op == OpArgFloatReg && t.IsInteger() {
-               fmt.Printf("pa=%v\nx.f.OwnAux.abiInfo=%s\n",
-                       pa.ToString(x.f.ABISelf, true),
-                       x.f.OwnAux.abiInfo.String())
-               panic(fmt.Errorf("Op/Type mismatch, op=%s, type=%s", op.String(), t.String()))
-       }
-       if baseArg.AuxInt != 0 {
-               base.Fatalf("BaseArg %s bound to registers has non-zero AuxInt", baseArg.LongString())
-       }
-       aux := &AuxNameOffset{baseArg.Aux.(*ir.Name), offset}
-       if toReplace != nil && toReplace.Block == baseArg.Block {
-               toReplace.reset(op)
-               toReplace.Aux = aux
-               toReplace.AuxInt = auxInt
-               toReplace.Type = t
-               w = toReplace
-       } else {
-               w = baseArg.Block.NewValue0IA(baseArg.Pos, op, t, auxInt, aux)
-       }
-       x.commonArgs[key] = w
-       if toReplace != nil {
-               toReplace.copyOf(w)
         }
-       if x.debug > 1 {
-               x.Printf("-->%s\n", w.LongString())
-       }
-       return w
-
-}
-
-// ArgOpAndRegisterFor converts an abi register index into an ssa Op and corresponding
-// arg register index.
-func ArgOpAndRegisterFor(r abi.RegIndex, abiConfig *abi.ABIConfig) (Op, int64) {
-       i := abiConfig.FloatIndexFor(r)
-       if i >= 0 { // float PR
-               return OpArgFloatReg, i
+       lost := a.invalidateRecursively()
+       if x.debug&1 != 0 && lost { // For odd values of x.debug, do this.
+               x.Printf("Lost statement marker in %s on former %s\n", base.Ctxt.Pkgpath+"."+x.f.Name, s)
         }
-       return OpArgIntReg, int64(r)
  }
diff --git a/src/cmd/compile/internal/ssa/op.go b/src/cmd/compile/internal/ssa/op.go

index 0fe9a9125f9eb2ba0cab18f602ab469f012cd870..cb151b2f6c6ad6f1ec95b5af4115d010254356b6 100644 (file)
--- a/src/cmd/compile/internal/ssa/op.go
+++ b/src/cmd/compile/internal/ssa/op.go
@@ -238,7 +238,7 @@ func (a *AuxCall) RegsOfArg(which int64) []abi.RegIndex {
         return a.abiInfo.InParam(int(which)).Registers
  }
  
-// NameOfResult returns the type of result which (indexed 0, 1, etc).
+// NameOfResult returns the ir.Name of result which (indexed 0, 1, etc).
  func (a *AuxCall) NameOfResult(which int64) *ir.Name {
         return a.abiInfo.OutParam(int(which)).Name
  }
diff --git a/src/cmd/compile/internal/ssa/rewritedec.go b/src/cmd/compile/internal/ssa/rewritedec.go

index 5c04708b277643860a82790cd9e755e348c5504f..fbfe15c0c58965f9160b1130107199e7f019a007 100644 (file)
--- a/src/cmd/compile/internal/ssa/rewritedec.go
+++ b/src/cmd/compile/internal/ssa/rewritedec.go
@@ -6,12 +6,18 @@ import "cmd/compile/internal/types"
  
  func rewriteValuedec(v *Value) bool {
         switch v.Op {
+       case OpArrayMake1:
+               return rewriteValuedec_OpArrayMake1(v)
+       case OpArraySelect:
+               return rewriteValuedec_OpArraySelect(v)
         case OpComplexImag:
                 return rewriteValuedec_OpComplexImag(v)
         case OpComplexReal:
                 return rewriteValuedec_OpComplexReal(v)
         case OpIData:
                 return rewriteValuedec_OpIData(v)
+       case OpIMake:
+               return rewriteValuedec_OpIMake(v)
         case OpITab:
                 return rewriteValuedec_OpITab(v)
         case OpLoad:
@@ -30,11 +36,92 @@ func rewriteValuedec(v *Value) bool {
                 return rewriteValuedec_OpStringLen(v)
         case OpStringPtr:
                 return rewriteValuedec_OpStringPtr(v)
+       case OpStructMake1:
+               return rewriteValuedec_OpStructMake1(v)
+       case OpStructSelect:
+               return rewriteValuedec_OpStructSelect(v)
+       }
+       return false
+}
+func rewriteValuedec_OpArrayMake1(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (ArrayMake1 x)
+       // cond: x.Type.IsPtr()
+       // result: x
+       for {
+               x := v_0
+               if !(x.Type.IsPtr()) {
+                       break
+               }
+               v.copyOf(x)
+               return true
+       }
+       return false
+}
+func rewriteValuedec_OpArraySelect(v *Value) bool {
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (ArraySelect [0] x)
+       // cond: x.Type.IsPtr()
+       // result: x
+       for {
+               if auxIntToInt64(v.AuxInt) != 0 {
+                       break
+               }
+               x := v_0
+               if !(x.Type.IsPtr()) {
+                       break
+               }
+               v.copyOf(x)
+               return true
+       }
+       // match: (ArraySelect (ArrayMake1 x))
+       // result: x
+       for {
+               if v_0.Op != OpArrayMake1 {
+                       break
+               }
+               x := v_0.Args[0]
+               v.copyOf(x)
+               return true
+       }
+       // match: (ArraySelect [0] (IData x))
+       // result: (IData x)
+       for {
+               if auxIntToInt64(v.AuxInt) != 0 || v_0.Op != OpIData {
+                       break
+               }
+               x := v_0.Args[0]
+               v.reset(OpIData)
+               v.AddArg(x)
+               return true
+       }
+       // match: (ArraySelect [i] x:(Load <t> ptr mem))
+       // result: @x.Block (Load <v.Type> (OffPtr <v.Type.PtrTo()> [t.Elem().Size()*i] ptr) mem)
+       for {
+               i := auxIntToInt64(v.AuxInt)
+               x := v_0
+               if x.Op != OpLoad {
+                       break
+               }
+               t := x.Type
+               mem := x.Args[1]
+               ptr := x.Args[0]
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpLoad, v.Type)
+               v.copyOf(v0)
+               v1 := b.NewValue0(v.Pos, OpOffPtr, v.Type.PtrTo())
+               v1.AuxInt = int64ToAuxInt(t.Elem().Size() * i)
+               v1.AddArg(ptr)
+               v0.AddArg2(v1, mem)
+               return true
         }
         return false
  }
  func rewriteValuedec_OpComplexImag(v *Value) bool {
         v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
         // match: (ComplexImag (ComplexMake _ imag ))
         // result: imag
         for {
@@ -45,10 +132,58 @@ func rewriteValuedec_OpComplexImag(v *Value) bool {
                 v.copyOf(imag)
                 return true
         }
+       // match: (ComplexImag x:(Load <t> ptr mem))
+       // cond: t.IsComplex() && t.Size() == 8
+       // result: @x.Block (Load <typ.Float32> (OffPtr <typ.Float32Ptr> [4] ptr) mem)
+       for {
+               x := v_0
+               if x.Op != OpLoad {
+                       break
+               }
+               t := x.Type
+               mem := x.Args[1]
+               ptr := x.Args[0]
+               if !(t.IsComplex() && t.Size() == 8) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpLoad, typ.Float32)
+               v.copyOf(v0)
+               v1 := b.NewValue0(v.Pos, OpOffPtr, typ.Float32Ptr)
+               v1.AuxInt = int64ToAuxInt(4)
+               v1.AddArg(ptr)
+               v0.AddArg2(v1, mem)
+               return true
+       }
+       // match: (ComplexImag x:(Load <t> ptr mem))
+       // cond: t.IsComplex() && t.Size() == 16
+       // result: @x.Block (Load <typ.Float64> (OffPtr <typ.Float64Ptr> [8] ptr) mem)
+       for {
+               x := v_0
+               if x.Op != OpLoad {
+                       break
+               }
+               t := x.Type
+               mem := x.Args[1]
+               ptr := x.Args[0]
+               if !(t.IsComplex() && t.Size() == 16) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpLoad, typ.Float64)
+               v.copyOf(v0)
+               v1 := b.NewValue0(v.Pos, OpOffPtr, typ.Float64Ptr)
+               v1.AuxInt = int64ToAuxInt(8)
+               v1.AddArg(ptr)
+               v0.AddArg2(v1, mem)
+               return true
+       }
         return false
  }
  func rewriteValuedec_OpComplexReal(v *Value) bool {
         v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
         // match: (ComplexReal (ComplexMake real _ ))
         // result: real
         for {
@@ -59,10 +194,53 @@ func rewriteValuedec_OpComplexReal(v *Value) bool {
                 v.copyOf(real)
                 return true
         }
+       // match: (ComplexReal x:(Load <t> ptr mem))
+       // cond: t.IsComplex() && t.Size() == 8
+       // result: @x.Block (Load <typ.Float32> ptr mem)
+       for {
+               x := v_0
+               if x.Op != OpLoad {
+                       break
+               }
+               t := x.Type
+               mem := x.Args[1]
+               ptr := x.Args[0]
+               if !(t.IsComplex() && t.Size() == 8) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpLoad, typ.Float32)
+               v.copyOf(v0)
+               v0.AddArg2(ptr, mem)
+               return true
+       }
+       // match: (ComplexReal x:(Load <t> ptr mem))
+       // cond: t.IsComplex() && t.Size() == 16
+       // result: @x.Block (Load <typ.Float64> ptr mem)
+       for {
+               x := v_0
+               if x.Op != OpLoad {
+                       break
+               }
+               t := x.Type
+               mem := x.Args[1]
+               ptr := x.Args[0]
+               if !(t.IsComplex() && t.Size() == 16) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpLoad, typ.Float64)
+               v.copyOf(v0)
+               v0.AddArg2(ptr, mem)
+               return true
+       }
         return false
  }
  func rewriteValuedec_OpIData(v *Value) bool {
         v_0 := v.Args[0]
+       b := v.Block
+       config := b.Func.Config
+       typ := &b.Func.Config.Types
         // match: (IData (IMake _ data))
         // result: data
         for {
@@ -73,10 +251,52 @@ func rewriteValuedec_OpIData(v *Value) bool {
                 v.copyOf(data)
                 return true
         }
+       // match: (IData x:(Load <t> ptr mem))
+       // cond: t.IsInterface()
+       // result: @x.Block (Load <typ.BytePtr> (OffPtr <typ.BytePtrPtr> [config.PtrSize] ptr) mem)
+       for {
+               x := v_0
+               if x.Op != OpLoad {
+                       break
+               }
+               t := x.Type
+               mem := x.Args[1]
+               ptr := x.Args[0]
+               if !(t.IsInterface()) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpLoad, typ.BytePtr)
+               v.copyOf(v0)
+               v1 := b.NewValue0(v.Pos, OpOffPtr, typ.BytePtrPtr)
+               v1.AuxInt = int64ToAuxInt(config.PtrSize)
+               v1.AddArg(ptr)
+               v0.AddArg2(v1, mem)
+               return true
+       }
+       return false
+}
+func rewriteValuedec_OpIMake(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (IMake _typ (StructMake1 val))
+       // result: (IMake _typ val)
+       for {
+               _typ := v_0
+               if v_1.Op != OpStructMake1 {
+                       break
+               }
+               val := v_1.Args[0]
+               v.reset(OpIMake)
+               v.AddArg2(_typ, val)
+               return true
+       }
         return false
  }
  func rewriteValuedec_OpITab(v *Value) bool {
         v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
         // match: (ITab (IMake itab _))
         // result: itab
         for {
@@ -87,6 +307,26 @@ func rewriteValuedec_OpITab(v *Value) bool {
                 v.copyOf(itab)
                 return true
         }
+       // match: (ITab x:(Load <t> ptr mem))
+       // cond: t.IsInterface()
+       // result: @x.Block (Load <typ.Uintptr> ptr mem)
+       for {
+               x := v_0
+               if x.Op != OpLoad {
+                       break
+               }
+               t := x.Type
+               mem := x.Args[1]
+               ptr := x.Args[0]
+               if !(t.IsInterface()) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpLoad, typ.Uintptr)
+               v.copyOf(v0)
+               v0.AddArg2(ptr, mem)
+               return true
+       }
         return false
  }
  func rewriteValuedec_OpLoad(v *Value) bool {
@@ -209,6 +449,9 @@ func rewriteValuedec_OpLoad(v *Value) bool {
  }
  func rewriteValuedec_OpSliceCap(v *Value) bool {
         v_0 := v.Args[0]
+       b := v.Block
+       config := b.Func.Config
+       typ := &b.Func.Config.Types
         // match: (SliceCap (SliceMake _ _ cap))
         // result: cap
         for {
@@ -219,10 +462,36 @@ func rewriteValuedec_OpSliceCap(v *Value) bool {
                 v.copyOf(cap)
                 return true
         }
+       // match: (SliceCap x:(Load <t> ptr mem))
+       // cond: t.IsSlice()
+       // result: @x.Block (Load <typ.Int> (OffPtr <typ.IntPtr> [2*config.PtrSize] ptr) mem)
+       for {
+               x := v_0
+               if x.Op != OpLoad {
+                       break
+               }
+               t := x.Type
+               mem := x.Args[1]
+               ptr := x.Args[0]
+               if !(t.IsSlice()) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpLoad, typ.Int)
+               v.copyOf(v0)
+               v1 := b.NewValue0(v.Pos, OpOffPtr, typ.IntPtr)
+               v1.AuxInt = int64ToAuxInt(2 * config.PtrSize)
+               v1.AddArg(ptr)
+               v0.AddArg2(v1, mem)
+               return true
+       }
         return false
  }
  func rewriteValuedec_OpSliceLen(v *Value) bool {
         v_0 := v.Args[0]
+       b := v.Block
+       config := b.Func.Config
+       typ := &b.Func.Config.Types
         // match: (SliceLen (SliceMake _ len _))
         // result: len
         for {
@@ -233,10 +502,34 @@ func rewriteValuedec_OpSliceLen(v *Value) bool {
                 v.copyOf(len)
                 return true
         }
+       // match: (SliceLen x:(Load <t> ptr mem))
+       // cond: t.IsSlice()
+       // result: @x.Block (Load <typ.Int> (OffPtr <typ.IntPtr> [config.PtrSize] ptr) mem)
+       for {
+               x := v_0
+               if x.Op != OpLoad {
+                       break
+               }
+               t := x.Type
+               mem := x.Args[1]
+               ptr := x.Args[0]
+               if !(t.IsSlice()) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpLoad, typ.Int)
+               v.copyOf(v0)
+               v1 := b.NewValue0(v.Pos, OpOffPtr, typ.IntPtr)
+               v1.AuxInt = int64ToAuxInt(config.PtrSize)
+               v1.AddArg(ptr)
+               v0.AddArg2(v1, mem)
+               return true
+       }
         return false
  }
  func rewriteValuedec_OpSlicePtr(v *Value) bool {
         v_0 := v.Args[0]
+       b := v.Block
         // match: (SlicePtr (SliceMake ptr _ _ ))
         // result: ptr
         for {
@@ -247,6 +540,26 @@ func rewriteValuedec_OpSlicePtr(v *Value) bool {
                 v.copyOf(ptr)
                 return true
         }
+       // match: (SlicePtr x:(Load <t> ptr mem))
+       // cond: t.IsSlice()
+       // result: @x.Block (Load <t.Elem().PtrTo()> ptr mem)
+       for {
+               x := v_0
+               if x.Op != OpLoad {
+                       break
+               }
+               t := x.Type
+               mem := x.Args[1]
+               ptr := x.Args[0]
+               if !(t.IsSlice()) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpLoad, t.Elem().PtrTo())
+               v.copyOf(v0)
+               v0.AddArg2(ptr, mem)
+               return true
+       }
         return false
  }
  func rewriteValuedec_OpSlicePtrUnchecked(v *Value) bool {
@@ -393,10 +706,141 @@ func rewriteValuedec_OpStore(v *Value) bool {
                 v.AddArg3(v0, data, v1)
                 return true
         }
+       // match: (Store dst (StructMake1 <t> f0) mem)
+       // result: (Store {t.FieldType(0)} (OffPtr <t.FieldType(0).PtrTo()> [0] dst) f0 mem)
+       for {
+               dst := v_0
+               if v_1.Op != OpStructMake1 {
+                       break
+               }
+               t := v_1.Type
+               f0 := v_1.Args[0]
+               mem := v_2
+               v.reset(OpStore)
+               v.Aux = typeToAux(t.FieldType(0))
+               v0 := b.NewValue0(v.Pos, OpOffPtr, t.FieldType(0).PtrTo())
+               v0.AuxInt = int64ToAuxInt(0)
+               v0.AddArg(dst)
+               v.AddArg3(v0, f0, mem)
+               return true
+       }
+       // match: (Store dst (StructMake2 <t> f0 f1) mem)
+       // result: (Store {t.FieldType(1)} (OffPtr <t.FieldType(1).PtrTo()> [t.FieldOff(1)] dst) f1 (Store {t.FieldType(0)} (OffPtr <t.FieldType(0).PtrTo()> [0] dst) f0 mem))
+       for {
+               dst := v_0
+               if v_1.Op != OpStructMake2 {
+                       break
+               }
+               t := v_1.Type
+               f1 := v_1.Args[1]
+               f0 := v_1.Args[0]
+               mem := v_2
+               v.reset(OpStore)
+               v.Aux = typeToAux(t.FieldType(1))
+               v0 := b.NewValue0(v.Pos, OpOffPtr, t.FieldType(1).PtrTo())
+               v0.AuxInt = int64ToAuxInt(t.FieldOff(1))
+               v0.AddArg(dst)
+               v1 := b.NewValue0(v.Pos, OpStore, types.TypeMem)
+               v1.Aux = typeToAux(t.FieldType(0))
+               v2 := b.NewValue0(v.Pos, OpOffPtr, t.FieldType(0).PtrTo())
+               v2.AuxInt = int64ToAuxInt(0)
+               v2.AddArg(dst)
+               v1.AddArg3(v2, f0, mem)
+               v.AddArg3(v0, f1, v1)
+               return true
+       }
+       // match: (Store dst (StructMake3 <t> f0 f1 f2) mem)
+       // result: (Store {t.FieldType(2)} (OffPtr <t.FieldType(2).PtrTo()> [t.FieldOff(2)] dst) f2 (Store {t.FieldType(1)} (OffPtr <t.FieldType(1).PtrTo()> [t.FieldOff(1)] dst) f1 (Store {t.FieldType(0)} (OffPtr <t.FieldType(0).PtrTo()> [0] dst) f0 mem)))
+       for {
+               dst := v_0
+               if v_1.Op != OpStructMake3 {
+                       break
+               }
+               t := v_1.Type
+               f2 := v_1.Args[2]
+               f0 := v_1.Args[0]
+               f1 := v_1.Args[1]
+               mem := v_2
+               v.reset(OpStore)
+               v.Aux = typeToAux(t.FieldType(2))
+               v0 := b.NewValue0(v.Pos, OpOffPtr, t.FieldType(2).PtrTo())
+               v0.AuxInt = int64ToAuxInt(t.FieldOff(2))
+               v0.AddArg(dst)
+               v1 := b.NewValue0(v.Pos, OpStore, types.TypeMem)
+               v1.Aux = typeToAux(t.FieldType(1))
+               v2 := b.NewValue0(v.Pos, OpOffPtr, t.FieldType(1).PtrTo())
+               v2.AuxInt = int64ToAuxInt(t.FieldOff(1))
+               v2.AddArg(dst)
+               v3 := b.NewValue0(v.Pos, OpStore, types.TypeMem)
+               v3.Aux = typeToAux(t.FieldType(0))
+               v4 := b.NewValue0(v.Pos, OpOffPtr, t.FieldType(0).PtrTo())
+               v4.AuxInt = int64ToAuxInt(0)
+               v4.AddArg(dst)
+               v3.AddArg3(v4, f0, mem)
+               v1.AddArg3(v2, f1, v3)
+               v.AddArg3(v0, f2, v1)
+               return true
+       }
+       // match: (Store dst (StructMake4 <t> f0 f1 f2 f3) mem)
+       // result: (Store {t.FieldType(3)} (OffPtr <t.FieldType(3).PtrTo()> [t.FieldOff(3)] dst) f3 (Store {t.FieldType(2)} (OffPtr <t.FieldType(2).PtrTo()> [t.FieldOff(2)] dst) f2 (Store {t.FieldType(1)} (OffPtr <t.FieldType(1).PtrTo()> [t.FieldOff(1)] dst) f1 (Store {t.FieldType(0)} (OffPtr <t.FieldType(0).PtrTo()> [0] dst) f0 mem))))
+       for {
+               dst := v_0
+               if v_1.Op != OpStructMake4 {
+                       break
+               }
+               t := v_1.Type
+               f3 := v_1.Args[3]
+               f0 := v_1.Args[0]
+               f1 := v_1.Args[1]
+               f2 := v_1.Args[2]
+               mem := v_2
+               v.reset(OpStore)
+               v.Aux = typeToAux(t.FieldType(3))
+               v0 := b.NewValue0(v.Pos, OpOffPtr, t.FieldType(3).PtrTo())
+               v0.AuxInt = int64ToAuxInt(t.FieldOff(3))
+               v0.AddArg(dst)
+               v1 := b.NewValue0(v.Pos, OpStore, types.TypeMem)
+               v1.Aux = typeToAux(t.FieldType(2))
+               v2 := b.NewValue0(v.Pos, OpOffPtr, t.FieldType(2).PtrTo())
+               v2.AuxInt = int64ToAuxInt(t.FieldOff(2))
+               v2.AddArg(dst)
+               v3 := b.NewValue0(v.Pos, OpStore, types.TypeMem)
+               v3.Aux = typeToAux(t.FieldType(1))
+               v4 := b.NewValue0(v.Pos, OpOffPtr, t.FieldType(1).PtrTo())
+               v4.AuxInt = int64ToAuxInt(t.FieldOff(1))
+               v4.AddArg(dst)
+               v5 := b.NewValue0(v.Pos, OpStore, types.TypeMem)
+               v5.Aux = typeToAux(t.FieldType(0))
+               v6 := b.NewValue0(v.Pos, OpOffPtr, t.FieldType(0).PtrTo())
+               v6.AuxInt = int64ToAuxInt(0)
+               v6.AddArg(dst)
+               v5.AddArg3(v6, f0, mem)
+               v3.AddArg3(v4, f1, v5)
+               v1.AddArg3(v2, f2, v3)
+               v.AddArg3(v0, f3, v1)
+               return true
+       }
+       // match: (Store dst (ArrayMake1 e) mem)
+       // result: (Store {e.Type} dst e mem)
+       for {
+               dst := v_0
+               if v_1.Op != OpArrayMake1 {
+                       break
+               }
+               e := v_1.Args[0]
+               mem := v_2
+               v.reset(OpStore)
+               v.Aux = typeToAux(e.Type)
+               v.AddArg3(dst, e, mem)
+               return true
+       }
         return false
  }
  func rewriteValuedec_OpStringLen(v *Value) bool {
         v_0 := v.Args[0]
+       b := v.Block
+       config := b.Func.Config
+       typ := &b.Func.Config.Types
         // match: (StringLen (StringMake _ len))
         // result: len
         for {
@@ -407,10 +851,35 @@ func rewriteValuedec_OpStringLen(v *Value) bool {
                 v.copyOf(len)
                 return true
         }
+       // match: (StringLen x:(Load <t> ptr mem))
+       // cond: t.IsString()
+       // result: @x.Block (Load <typ.Int> (OffPtr <typ.IntPtr> [config.PtrSize] ptr) mem)
+       for {
+               x := v_0
+               if x.Op != OpLoad {
+                       break
+               }
+               t := x.Type
+               mem := x.Args[1]
+               ptr := x.Args[0]
+               if !(t.IsString()) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpLoad, typ.Int)
+               v.copyOf(v0)
+               v1 := b.NewValue0(v.Pos, OpOffPtr, typ.IntPtr)
+               v1.AuxInt = int64ToAuxInt(config.PtrSize)
+               v1.AddArg(ptr)
+               v0.AddArg2(v1, mem)
+               return true
+       }
         return false
  }
  func rewriteValuedec_OpStringPtr(v *Value) bool {
         v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
         // match: (StringPtr (StringMake ptr _))
         // result: ptr
         for {
@@ -421,6 +890,191 @@ func rewriteValuedec_OpStringPtr(v *Value) bool {
                 v.copyOf(ptr)
                 return true
         }
+       // match: (StringPtr x:(Load <t> ptr mem))
+       // cond: t.IsString()
+       // result: @x.Block (Load <typ.BytePtr> ptr mem)
+       for {
+               x := v_0
+               if x.Op != OpLoad {
+                       break
+               }
+               t := x.Type
+               mem := x.Args[1]
+               ptr := x.Args[0]
+               if !(t.IsString()) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpLoad, typ.BytePtr)
+               v.copyOf(v0)
+               v0.AddArg2(ptr, mem)
+               return true
+       }
+       return false
+}
+func rewriteValuedec_OpStructMake1(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (StructMake1 x)
+       // cond: x.Type.IsPtr()
+       // result: x
+       for {
+               x := v_0
+               if !(x.Type.IsPtr()) {
+                       break
+               }
+               v.copyOf(x)
+               return true
+       }
+       return false
+}
+func rewriteValuedec_OpStructSelect(v *Value) bool {
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (StructSelect [0] (IData x))
+       // result: (IData x)
+       for {
+               if auxIntToInt64(v.AuxInt) != 0 || v_0.Op != OpIData {
+                       break
+               }
+               x := v_0.Args[0]
+               v.reset(OpIData)
+               v.AddArg(x)
+               return true
+       }
+       // match: (StructSelect (StructMake1 x))
+       // result: x
+       for {
+               if v_0.Op != OpStructMake1 {
+                       break
+               }
+               x := v_0.Args[0]
+               v.copyOf(x)
+               return true
+       }
+       // match: (StructSelect [0] (StructMake2 x _))
+       // result: x
+       for {
+               if auxIntToInt64(v.AuxInt) != 0 || v_0.Op != OpStructMake2 {
+                       break
+               }
+               x := v_0.Args[0]
+               v.copyOf(x)
+               return true
+       }
+       // match: (StructSelect [1] (StructMake2 _ x))
+       // result: x
+       for {
+               if auxIntToInt64(v.AuxInt) != 1 || v_0.Op != OpStructMake2 {
+                       break
+               }
+               x := v_0.Args[1]
+               v.copyOf(x)
+               return true
+       }
+       // match: (StructSelect [0] (StructMake3 x _ _))
+       // result: x
+       for {
+               if auxIntToInt64(v.AuxInt) != 0 || v_0.Op != OpStructMake3 {
+                       break
+               }
+               x := v_0.Args[0]
+               v.copyOf(x)
+               return true
+       }
+       // match: (StructSelect [1] (StructMake3 _ x _))
+       // result: x
+       for {
+               if auxIntToInt64(v.AuxInt) != 1 || v_0.Op != OpStructMake3 {
+                       break
+               }
+               x := v_0.Args[1]
+               v.copyOf(x)
+               return true
+       }
+       // match: (StructSelect [2] (StructMake3 _ _ x))
+       // result: x
+       for {
+               if auxIntToInt64(v.AuxInt) != 2 || v_0.Op != OpStructMake3 {
+                       break
+               }
+               x := v_0.Args[2]
+               v.copyOf(x)
+               return true
+       }
+       // match: (StructSelect [0] (StructMake4 x _ _ _))
+       // result: x
+       for {
+               if auxIntToInt64(v.AuxInt) != 0 || v_0.Op != OpStructMake4 {
+                       break
+               }
+               x := v_0.Args[0]
+               v.copyOf(x)
+               return true
+       }
+       // match: (StructSelect [1] (StructMake4 _ x _ _))
+       // result: x
+       for {
+               if auxIntToInt64(v.AuxInt) != 1 || v_0.Op != OpStructMake4 {
+                       break
+               }
+               x := v_0.Args[1]
+               v.copyOf(x)
+               return true
+       }
+       // match: (StructSelect [2] (StructMake4 _ _ x _))
+       // result: x
+       for {
+               if auxIntToInt64(v.AuxInt) != 2 || v_0.Op != OpStructMake4 {
+                       break
+               }
+               x := v_0.Args[2]
+               v.copyOf(x)
+               return true
+       }
+       // match: (StructSelect [3] (StructMake4 _ _ _ x))
+       // result: x
+       for {
+               if auxIntToInt64(v.AuxInt) != 3 || v_0.Op != OpStructMake4 {
+                       break
+               }
+               x := v_0.Args[3]
+               v.copyOf(x)
+               return true
+       }
+       // match: (StructSelect [0] x)
+       // cond: x.Type.IsPtr()
+       // result: x
+       for {
+               if auxIntToInt64(v.AuxInt) != 0 {
+                       break
+               }
+               x := v_0
+               if !(x.Type.IsPtr()) {
+                       break
+               }
+               v.copyOf(x)
+               return true
+       }
+       // match: (StructSelect [i] x:(Load <t> ptr mem))
+       // result: @x.Block (Load <v.Type> (OffPtr <v.Type.PtrTo()> [t.FieldOff(int(i))] ptr) mem)
+       for {
+               i := auxIntToInt64(v.AuxInt)
+               x := v_0
+               if x.Op != OpLoad {
+                       break
+               }
+               t := x.Type
+               mem := x.Args[1]
+               ptr := x.Args[0]
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpLoad, v.Type)
+               v.copyOf(v0)
+               v1 := b.NewValue0(v.Pos, OpOffPtr, v.Type.PtrTo())
+               v1.AuxInt = int64ToAuxInt(t.FieldOff(int(i)))
+               v1.AddArg(ptr)
+               v0.AddArg2(v1, mem)
+               return true
+       }
         return false
  }
  func rewriteBlockdec(b *Block) bool {
diff --git a/src/cmd/compile/internal/ssagen/ssa.go b/src/cmd/compile/internal/ssagen/ssa.go

index 93643af2943f904773e685a5f4d9d1c513ef0630..af2e0e477e27ee763de8d4124ac0d2f99d811918 100644 (file)
--- a/src/cmd/compile/internal/ssagen/ssa.go
+++ b/src/cmd/compile/internal/ssagen/ssa.go
@@ -526,7 +526,7 @@ func buildssa(fn *ir.Func, worker int) *ssa.Func {
                                                 s.store(n.Type(), s.decladdrs[n], v)
                                         } else { // Too big for SSA.
                                                 // Brute force, and early, do a bunch of stores from registers
-                                               // TODO fix the nasty storeArgOrLoad recursion in ssa/expand_calls.go so this Just Works with store of a big Arg.
+                                               // Note that expand calls knows about this and doesn't trouble itself with larger-than-SSA-able Args in registers.
                                                 s.storeParameterRegsToStack(s.f.ABISelf, paramAssignment, n, s.decladdrs[n], false)
                                         }
                                 }
diff --git a/test/abi/more_intstar_input.go b/test/abi/more_intstar_input.go

index f0a48fbdc24f0ac457254368abb258b2de68417b..3eb0fbcc3d818dbd3ddeba00c733957ca3619d89 100644 (file)
--- a/test/abi/more_intstar_input.go
+++ b/test/abi/more_intstar_input.go
@@ -12,10 +12,6 @@
  
  package main
  
-import (
-       "fmt"
-)
-
  var sink int
  
  //go:registerparams
@@ -33,12 +29,12 @@ func G(a, b, c, d, e, f, g, h, i, j, k, l, m *int) {
         var scratch [1000 * 100]int
         I := *c - *e - *l // zero.
         scratch[I] = *d
-       fmt.Println("Got this far!")
+       println("Got this far!")
         sink += scratch[0]
  }
  
  func main() {
         a, b, c, d, e, f, g, h, i, j, k, l, m := 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13
         F(&a, &b, &c, &d, &e, &f, &g, &h, &i, &j, &k, &l, &m)
-       fmt.Printf("Sink = %d\n", sink-7)
+       println("Sink =", sink-7)
  }
diff --git a/test/abi/reg_not_ssa.go b/test/abi/reg_not_ssa.go

new file mode 100644 (file)

index 0000000..5bd4b51
--- /dev/null
+++ b/test/abi/reg_not_ssa.go
@@ -0,0 +1,40 @@
+// run
+
+//go:build !wasm
+// +build !wasm
+
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+// small enough for registers, too large for SSA
+type T struct {
+       a, b, c, d, e int
+}
+
+//go:noinline
+func F() {
+       a, b := g(), g()
+       h(b, b)
+       h(a, g())
+       if a.a == 1 {
+               a = g()
+       }
+       h(a, a)
+}
+
+//go:noinline
+func g() T {
+       return T{1, 2, 3, 4, 5}
+}
+
+//go:noinline
+func h(s, t T) {
+       if s != t {
+               println("NEQ")
+       }
+}
+
+func main() { F() }
diff --git a/test/codegen/condmove.go b/test/codegen/condmove.go

index 6c08116b2c47b0b0121c56a93320af806e32d846..1058910307ce4b44b61d09600ed725c30f436322 100644 (file)
--- a/test/codegen/condmove.go
+++ b/test/codegen/condmove.go
@@ -57,7 +57,7 @@ func cmov16bit(x, y uint16) uint16 {
         }
         // amd64:"CMOVW(HI|CS)"
         // arm64:"CSNEG\t(LS|HS)"
-       // ppc64x:"ISEL\t[$]0"
+       // ppc64x:"ISEL\t[$][01]"
         // wasm:"Select"
         return x
  }
diff --git a/test/codegen/spectre.go b/test/codegen/spectre.go

index d845da35ced9274b49a5f8d0df0ea41d218ac056..edc8b28028066d926367fe983287d10fae61a3d6 100644 (file)
--- a/test/codegen/spectre.go
+++ b/test/codegen/spectre.go
@@ -13,12 +13,12 @@ func IndexArray(x *[10]int, i int) int {
  }
  
  func IndexString(x string, i int) byte {
-       // amd64:`CMOVQLS`
+       // amd64:`CMOVQ(LS|CC)`
         return x[i]
  }
  
  func IndexSlice(x []float64, i int) float64 {
-       // amd64:`CMOVQLS`
+       // amd64:`CMOVQ(LS|CC)`
         return x[i]
  }
author	David Chase <drchase@google.com>
	Tue, 9 May 2023 14:34:52 +0000 (10:34 -0400)
committer	David Chase <drchase@google.com>
	Fri, 6 Oct 2023 20:57:33 +0000 (20:57 +0000)
src/cmd/compile/internal/ssa/_gen/dec.rules		patch \| blob \| history
src/cmd/compile/internal/ssa/compile.go		patch \| blob \| history
src/cmd/compile/internal/ssa/expand_calls.go		patch \| blob \| history
src/cmd/compile/internal/ssa/op.go		patch \| blob \| history
src/cmd/compile/internal/ssa/rewritedec.go		patch \| blob \| history
src/cmd/compile/internal/ssagen/ssa.go		patch \| blob \| history
test/abi/more_intstar_input.go		patch \| blob \| history
test/abi/reg_not_ssa.go	[new file with mode: 0644]	patch \| blob
test/codegen/condmove.go		patch \| blob \| history
test/codegen/spectre.go		patch \| blob \| history