cmd/compile/internal/ssa: on PPC64, merge (CMPconst [0] (op ...)) more aggressively

[gostls13.git] / src / cmd / compile / internal / ssa / rewrite.go
diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go

index e82aa84cdfa96367e924aa6585fcc35aad04a7a4..fa4208228e78cbb49c16ecf5704b04d19ee068e5 100644 (file)
--- a/src/cmd/compile/internal/ssa/rewrite.go
+++ b/src/cmd/compile/internal/ssa/rewrite.go
@@ -5,7 +5,9 @@
  package ssa
  
  import (
+       "cmd/compile/internal/base"
         "cmd/compile/internal/logopt"
+       "cmd/compile/internal/reflectdata"
         "cmd/compile/internal/types"
         "cmd/internal/obj"
         "cmd/internal/obj/s390x"
@@ -13,11 +15,13 @@ import (
         "cmd/internal/src"
         "encoding/binary"
         "fmt"
+       "internal/buildcfg"
         "io"
         "math"
         "math/bits"
         "os"
         "path/filepath"
+       "strings"
  )
  
  type deadValueChoice bool
@@ -27,7 +31,7 @@ const (
         removeDeadValues                 = true
  )
  
-// deadcode indicates that rewrite should try to remove any values that become dead.
+// deadcode indicates whether rewrite should try to remove any values that become dead.
  func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValueChoice) {
         // repeat rewrites until we find no more rewrites
         pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block
@@ -36,8 +40,11 @@ func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValu
         if debug > 1 {
                 fmt.Printf("%s: rewriting for %s\n", f.pass.name, f.Name)
         }
+       var iters int
+       var states map[string]bool
         for {
                 change := false
+               deadChange := false
                 for _, b := range f.Blocks {
                         var b0 *Block
                         if debug > 1 {
@@ -71,7 +78,7 @@ func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValu
                                                 // Not quite a deadcode pass, because it does not handle cycles.
                                                 // But it should help Uses==1 rules to fire.
                                                 v.reset(OpInvalid)
-                                               change = true
+                                               deadChange = true
                                         }
                                         // No point rewriting values which aren't used.
                                         continue
@@ -143,9 +150,34 @@ func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValu
                                 }
                         }
                 }
-               if !change {
+               if !change && !deadChange {
                         break
                 }
+               iters++
+               if (iters > 1000 || debug >= 2) && change {
+                       // We've done a suspiciously large number of rewrites (or we're in debug mode).
+                       // As of Sep 2021, 90% of rewrites complete in 4 iterations or fewer
+                       // and the maximum value encountered during make.bash is 12.
+                       // Start checking for cycles. (This is too expensive to do routinely.)
+                       // Note: we avoid this path for deadChange-only iterations, to fix #51639.
+                       if states == nil {
+                               states = make(map[string]bool)
+                       }
+                       h := f.rewriteHash()
+                       if _, ok := states[h]; ok {
+                               // We've found a cycle.
+                               // To diagnose it, set debug to 2 and start again,
+                               // so that we'll print all rules applied until we complete another cycle.
+                               // If debug is already >= 2, we've already done that, so it's time to crash.
+                               if debug < 2 {
+                                       debug = 2
+                                       states = make(map[string]bool)
+                               } else {
+                                       f.Fatalf("rewrite cycle detected")
+                               }
+                       }
+                       states[h] = true
+               }
         }
         // remove clobbered values
         for _, b := range f.Blocks {
@@ -159,7 +191,7 @@ func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValu
                                 f.freeValue(v)
                                 continue
                         }
-                       if v.Pos.IsStmt() != src.PosNotStmt && pendingLines.get(vl) == int32(b.ID) {
+                       if v.Pos.IsStmt() != src.PosNotStmt && !notStmtBoundary(v.Op) && pendingLines.get(vl) == int32(b.ID) {
                                 pendingLines.remove(vl)
                                 v.Pos = v.Pos.WithIsStmt()
                         }
@@ -206,10 +238,6 @@ func isPtr(t *types.Type) bool {
         return t.IsPtrShaped()
  }
  
-func isSigned(t *types.Type) bool {
-       return t.IsSigned()
-}
-
  // mergeSym merges two symbolic offsets. There is no real merging of
  // offsets, we just pick the non-nil one.
  func mergeSym(x, y Sym) Sym {
@@ -383,13 +411,18 @@ func canMergeLoad(target, load *Value) bool {
         return true
  }
  
-// isSameCall reports whether sym is the same as the given named symbol
+// isSameCall reports whether sym is the same as the given named symbol.
  func isSameCall(sym interface{}, name string) bool {
         fn := sym.(*AuxCall).Fn
         return fn != nil && fn.String() == name
  }
  
-// nlz returns the number of leading zeros.
+// canLoadUnaligned reports if the architecture supports unaligned load operations.
+func canLoadUnaligned(c *Config) bool {
+       return c.ctxt.Arch.Alignment == 1
+}
+
+// nlzX returns the number of leading zeros.
  func nlz64(x int64) int { return bits.LeadingZeros64(uint64(x)) }
  func nlz32(x int32) int { return bits.LeadingZeros32(uint32(x)) }
  func nlz16(x int16) int { return bits.LeadingZeros16(uint16(x)) }
@@ -433,7 +466,7 @@ func log2uint32(n int64) int64 {
         return int64(bits.Len32(uint32(n))) - 1
  }
  
-// isPowerOfTwo functions report whether n is a power of 2.
+// isPowerOfTwoX functions report whether n is a power of 2.
  func isPowerOfTwo8(n int8) bool {
         return n > 0 && n&(n-1) == 0
  }
@@ -695,6 +728,13 @@ type Aux interface {
         CanBeAnSSAAux()
  }
  
+// for now only used to mark moves that need to avoid clobbering flags
+type auxMark bool
+
+func (auxMark) CanBeAnSSAAux() {}
+
+var AuxMark auxMark
+
  // stringAux wraps string values for use in Aux.
  type stringAux string
  
@@ -745,56 +785,35 @@ func uaddOvf(a, b int64) bool {
         return uint64(a)+uint64(b) < uint64(a)
  }
  
-// de-virtualize an InterCall
-// 'sym' is the symbol for the itab
-func devirt(v *Value, aux Aux, sym Sym, offset int64) *AuxCall {
-       f := v.Block.Func
-       n, ok := sym.(*obj.LSym)
-       if !ok {
-               return nil
-       }
-       lsym := f.fe.DerefItab(n, offset)
-       if f.pass.debug > 0 {
-               if lsym != nil {
-                       f.Warnl(v.Pos, "de-virtualizing call")
-               } else {
-                       f.Warnl(v.Pos, "couldn't de-virtualize call")
-               }
-       }
-       if lsym == nil {
+// loadLSymOffset simulates reading a word at an offset into a
+// read-only symbol's runtime memory. If it would read a pointer to
+// another symbol, that symbol is returned. Otherwise, it returns nil.
+func loadLSymOffset(lsym *obj.LSym, offset int64) *obj.LSym {
+       if lsym.Type != objabi.SRODATA {
                 return nil
         }
-       va := aux.(*AuxCall)
-       return StaticAuxCall(lsym, va.args, va.results)
-}
  
-// de-virtualize an InterLECall
-// 'sym' is the symbol for the itab
-func devirtLESym(v *Value, aux Aux, sym Sym, offset int64) *obj.LSym {
-       n, ok := sym.(*obj.LSym)
-       if !ok {
-               return nil
-       }
-
-       f := v.Block.Func
-       lsym := f.fe.DerefItab(n, offset)
-       if f.pass.debug > 0 {
-               if lsym != nil {
-                       f.Warnl(v.Pos, "de-virtualizing call")
-               } else {
-                       f.Warnl(v.Pos, "couldn't de-virtualize call")
+       for _, r := range lsym.R {
+               if int64(r.Off) == offset && r.Type&^objabi.R_WEAK == objabi.R_ADDR && r.Add == 0 {
+                       return r.Sym
                 }
         }
-       if lsym == nil {
-               return nil
-       }
-       return lsym
+
+       return nil
  }
  
  func devirtLECall(v *Value, sym *obj.LSym) *Value {
         v.Op = OpStaticLECall
-       v.Aux.(*AuxCall).Fn = sym
-       v.RemoveArg(0)
+       auxcall := v.Aux.(*AuxCall)
+       auxcall.Fn = sym
+       // Remove first arg
+       v.Args[0].Uses--
+       copy(v.Args[0:], v.Args[1:])
+       v.Args[len(v.Args)-1] = nil // aid GC
+       v.Args = v.Args[:len(v.Args)-1]
+       if f := v.Block.Func; f.pass.debug > 0 {
+               f.Warnl(v.Pos, "de-virtualizing call")
+       }
         return v
  }
  
@@ -810,9 +829,7 @@ func isSamePtr(p1, p2 *Value) bool {
         case OpOffPtr:
                 return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
         case OpAddr, OpLocalAddr:
-               // OpAddr's 0th arg is either OpSP or OpSB, which means that it is uniquely identified by its Op.
-               // Checking for value equality only works after [z]cse has run.
-               return p1.Aux == p2.Aux && p1.Args[0].Op == p2.Args[0].Op
+               return p1.Aux == p2.Aux
         case OpAddPtr:
                 return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
         }
@@ -842,6 +859,9 @@ func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool {
                         offset += base.AuxInt
                         base = base.Args[0]
                 }
+               if opcodeTable[base.Op].nilCheck {
+                       base = base.Args[0]
+               }
                 return base, offset
         }
         p1, off1 := baseAndOffset(p1)
@@ -858,18 +878,18 @@ func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool {
                 if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP {
                         return true
                 }
-               return p2.Op == OpArg && p1.Args[0].Op == OpSP
-       case OpArg:
+               return (p2.Op == OpArg || p2.Op == OpArgIntReg) && p1.Args[0].Op == OpSP
+       case OpArg, OpArgIntReg:
                 if p2.Op == OpSP || p2.Op == OpLocalAddr {
                         return true
                 }
         case OpSP:
-               return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpSP
+               return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpArgIntReg || p2.Op == OpSP
         }
         return false
  }
  
-// moveSize returns the number of bytes an aligned MOV instruction moves
+// moveSize returns the number of bytes an aligned MOV instruction moves.
  func moveSize(align int64, c *Config) int64 {
         switch {
         case align%8 == 0 && c.PtrSize == 8:
@@ -934,8 +954,9 @@ found:
  
  // clobber invalidates values. Returns true.
  // clobber is used by rewrite rules to:
-//   A) make sure the values are really dead and never used again.
-//   B) decrement use counts of the values' args.
+//
+//     A) make sure the values are really dead and never used again.
+//     B) decrement use counts of the values' args.
  func clobber(vv ...*Value) bool {
         for _, v := range vv {
                 v.reset(OpInvalid)
@@ -957,7 +978,9 @@ func clobberIfDead(v *Value) bool {
  
  // noteRule is an easy way to track if a rule is matched when writing
  // new ones.  Make the rule of interest also conditional on
-//     noteRule("note to self: rule of interest matched")
+//
+//     noteRule("note to self: rule of interest matched")
+//
  // and that message will print when the rule matches.
  func noteRule(s string) bool {
         fmt.Println(s)
@@ -987,7 +1010,7 @@ func warnRule(cond bool, v *Value, s string) bool {
         return true
  }
  
-// for a pseudo-op like (LessThan x), extract x
+// for a pseudo-op like (LessThan x), extract x.
  func flagArg(v *Value) *Value {
         if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() {
                 return nil
@@ -1133,7 +1156,7 @@ func ccARM64Eval(op Op, flags *Value) int {
  }
  
  // logRule logs the use of the rule s. This will only be enabled if
-// rewrite rules were generated with the -log option, see gen/rulegen.go.
+// rewrite rules were generated with the -log option, see _gen/rulegen.go.
  func logRule(s string) {
         if ruleFile == nil {
                 // Open a log file to write log to. We open in append
@@ -1163,6 +1186,12 @@ func min(x, y int64) int64 {
         }
         return y
  }
+func max(x, y int64) int64 {
+       if x > y {
+               return x
+       }
+       return y
+}
  
  func isConstZero(v *Value) bool {
         switch v.Op {
@@ -1218,7 +1247,7 @@ func reciprocalExact32(c float32) bool {
         }
  }
  
-// check if an immediate can be directly encoded into an ARM's instruction
+// check if an immediate can be directly encoded into an ARM's instruction.
  func isARMImmRot(v uint32) bool {
         for i := 0; i < 16; i++ {
                 if v&^0xff == 0 {
@@ -1261,8 +1290,12 @@ func zeroUpper32Bits(x *Value, depth int) bool {
                 OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst,
                 OpAMD64SHLL, OpAMD64SHLLconst:
                 return true
+       case OpARM64REV16W, OpARM64REVW, OpARM64RBITW, OpARM64CLZW, OpARM64EXTRWconst,
+               OpARM64MULW, OpARM64MNEGW, OpARM64UDIVW, OpARM64DIVW, OpARM64UMODW,
+               OpARM64MADDW, OpARM64MSUBW, OpARM64RORW, OpARM64RORWconst:
+               return true
         case OpArg:
-               return x.Type.Width == 4
+               return x.Type.Size() == 4
         case OpPhi, OpSelect0, OpSelect1:
                 // Phis can use each-other as an arguments, instead of tracking visited values,
                 // just limit recursion depth.
@@ -1280,13 +1313,13 @@ func zeroUpper32Bits(x *Value, depth int) bool {
         return false
  }
  
-// zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits
+// zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits.
  func zeroUpper48Bits(x *Value, depth int) bool {
         switch x.Op {
         case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2:
                 return true
         case OpArg:
-               return x.Type.Width == 2
+               return x.Type.Size() == 2
         case OpPhi, OpSelect0, OpSelect1:
                 // Phis can use each-other as an arguments, instead of tracking visited values,
                 // just limit recursion depth.
@@ -1304,13 +1337,13 @@ func zeroUpper48Bits(x *Value, depth int) bool {
         return false
  }
  
-// zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits
+// zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits.
  func zeroUpper56Bits(x *Value, depth int) bool {
         switch x.Op {
         case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1:
                 return true
         case OpArg:
-               return x.Type.Width == 1
+               return x.Type.Size() == 1
         case OpPhi, OpSelect0, OpSelect1:
                 // Phis can use each-other as an arguments, instead of tracking visited values,
                 // just limit recursion depth.
@@ -1328,9 +1361,25 @@ func zeroUpper56Bits(x *Value, depth int) bool {
         return false
  }
  
+func isInlinableMemclr(c *Config, sz int64) bool {
+       if sz < 0 {
+               return false
+       }
+       // TODO: expand this check to allow other architectures
+       // see CL 454255 and issue 56997
+       switch c.arch {
+       case "amd64", "arm64":
+               return true
+       case "ppc64le", "ppc64":
+               return sz < 512
+       }
+       return false
+}
+
  // isInlinableMemmove reports whether the given arch performs a Move of the given size
  // faster than memmove. It will only return true if replacing the memmove with a Move is
-// safe, either because Move is small or because the arguments are disjoint.
+// safe, either because Move will do all of its loads before any of its stores, or
+// because the arguments are known to be disjoint.
  // This is used as a check for replacing memmove with Move ops.
  func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
         // It is always safe to convert memmove into Move when its arguments are disjoint.
@@ -1344,11 +1393,14 @@ func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
                 return sz <= 8
         case "s390x", "ppc64", "ppc64le":
                 return sz <= 8 || disjoint(dst, sz, src, sz)
-       case "arm", "mips", "mips64", "mipsle", "mips64le":
+       case "arm", "loong64", "mips", "mips64", "mipsle", "mips64le":
                 return sz <= 4
         }
         return false
  }
+func IsInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
+       return isInlinableMemmove(dst, src, sz, c)
+}
  
  // logLargeCopy logs the occurrence of a large copy.
  // The best place to do this is in the rewrite rules where the size of the move is easy to find.
@@ -1362,6 +1414,14 @@ func logLargeCopy(v *Value, s int64) bool {
         }
         return true
  }
+func LogLargeCopy(funcName string, pos src.XPos, s int64) {
+       if s < 128 {
+               return
+       }
+       if logopt.Enabled() {
+               logopt.LogOpt(pos, "copy", "lower", funcName, fmt.Sprintf("%d bytes", s))
+       }
+}
  
  // hasSmallRotate reports whether the architecture has rotate instructions
  // for sizes < 32-bit.  This is used to decide whether to promote some rotations.
@@ -1374,6 +1434,12 @@ func hasSmallRotate(c *Config) bool {
         }
  }
  
+func supportsPPC64PCRel() bool {
+       // PCRel is currently supported for >= power10, linux only
+       // Internal and external linking supports this on ppc64le; internal linking on ppc64.
+       return buildcfg.GOPPC64 >= 10 && buildcfg.GOOS == "linux"
+}
+
  func newPPC64ShiftAuxInt(sh, mb, me, sz int64) int32 {
         if sh < 0 || sh >= sz {
                 panic("PPC64 shift arg sh out of range")
@@ -1413,7 +1479,7 @@ func isPPC64WordRotateMask(v64 int64) bool {
         return (v&vp == 0 || vn&vpn == 0) && v != 0
  }
  
-// Compress mask and and shift into single value of the form
+// Compress mask and shift into single value of the form
  // me | mb<<8 | rotate<<16 | nbits<<24 where me and mb can
  // be used to regenerate the input mask.
  func encodePPC64RotateMask(rotate, mask, nbits int64) int64 {
@@ -1421,7 +1487,7 @@ func encodePPC64RotateMask(rotate, mask, nbits int64) int64 {
  
         // Determine boundaries and then decode them
         if mask == 0 || ^mask == 0 || rotate >= nbits {
-               panic("Invalid PPC64 rotate mask")
+               panic(fmt.Sprintf("invalid PPC64 rotate mask: %x %d %d", uint64(mask), rotate, nbits))
         } else if nbits == 32 {
                 mb = bits.LeadingZeros32(uint32(mask))
                 me = 32 - bits.TrailingZeros32(uint32(mask))
@@ -1442,7 +1508,26 @@ func encodePPC64RotateMask(rotate, mask, nbits int64) int64 {
         return int64(me) | int64(mb<<8) | int64(rotate<<16) | int64(nbits<<24)
  }
  
-// The inverse operation of encodePPC64RotateMask.  The values returned as
+// Merge (RLDICL [encoded] (SRDconst [s] x)) into (RLDICL [new_encoded] x)
+// SRDconst on PPC64 is an extended mnemonic of RLDICL. If the input to an
+// RLDICL is an SRDconst, and the RLDICL does not rotate its value, the two
+// operations can be combined. This functions assumes the two opcodes can
+// be merged, and returns an encoded rotate+mask value of the combined RLDICL.
+func mergePPC64RLDICLandSRDconst(encoded, s int64) int64 {
+       mb := s
+       r := 64 - s
+       // A larger mb is a smaller mask.
+       if (encoded>>8)&0xFF < mb {
+               encoded = (encoded &^ 0xFF00) | mb<<8
+       }
+       // The rotate is expected to be 0.
+       if (encoded & 0xFF0000) != 0 {
+               panic("non-zero rotate")
+       }
+       return encoded | r<<16
+}
+
+// DecodePPC64RotateMask is the inverse operation of encodePPC64RotateMask.  The values returned as
  // mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask.
  func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) {
         auxint := uint64(sauxint)
@@ -1491,14 +1576,14 @@ func mergePPC64AndSrwi(m, s int64) int64 {
         if !isPPC64WordRotateMask(mask) {
                 return 0
         }
-       return encodePPC64RotateMask(32-s, mask, 32)
+       return encodePPC64RotateMask((32-s)&31, mask, 32)
  }
  
  // Test if a shift right feeding into a CLRLSLDI can be merged into RLWINM.
  // Return the encoded RLWINM constant, or 0 if they cannot be merged.
  func mergePPC64ClrlsldiSrw(sld, srw int64) int64 {
         mask_1 := uint64(0xFFFFFFFF >> uint(srw))
-       // for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left.
+       // for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
         mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
  
         // Rewrite mask to apply after the final left shift.
@@ -1518,7 +1603,7 @@ func mergePPC64ClrlsldiSrw(sld, srw int64) int64 {
  // the encoded RLWINM constant, or 0 if they cannot be merged.
  func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 {
         r_1, _, _, mask_1 := DecodePPC64RotateMask(rlw)
-       // for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left.
+       // for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
         mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
  
         // combine the masks, and adjust for the final left shift.
@@ -1545,17 +1630,67 @@ func mergePPC64SldiSrw(sld, srw int64) int64 {
         return encodePPC64RotateMask((32-srw+sld)&31, int64(mask), 32)
  }
  
+// Convert a PPC64 opcode from the Op to OpCC form. This converts (op x y)
+// to (Select0 (opCC x y)) without having to explicitly fixup every user
+// of op.
+//
+// E.g consider the case:
+// a = (ADD x y)
+// b = (CMPconst [0] a)
+// c = (OR a z)
+//
+// A rule like (CMPconst [0] (ADD x y)) => (CMPconst [0] (Select0 (ADDCC x y)))
+// would produce:
+// a  = (ADD x y)
+// a' = (ADDCC x y)
+// a” = (Select0 a')
+// b  = (CMPconst [0] a”)
+// c  = (OR a z)
+//
+// which makes it impossible to rewrite the second user. Instead the result
+// of this conversion is:
+// a' = (ADDCC x y)
+// a  = (Select0 a')
+// b  = (CMPconst [0] a)
+// c  = (OR a z)
+//
+// Which makes it trivial to rewrite b using a lowering rule.
+func convertPPC64OpToOpCC(op *Value) *Value {
+       ccOpMap := map[Op]Op{
+               OpPPC64ADD:      OpPPC64ADDCC,
+               OpPPC64ADDconst: OpPPC64ADDCCconst,
+               OpPPC64AND:      OpPPC64ANDCC,
+               OpPPC64ANDN:     OpPPC64ANDNCC,
+               OpPPC64CNTLZD:   OpPPC64CNTLZDCC,
+               OpPPC64OR:       OpPPC64ORCC,
+               OpPPC64SUB:      OpPPC64SUBCC,
+               OpPPC64NEG:      OpPPC64NEGCC,
+               OpPPC64NOR:      OpPPC64NORCC,
+               OpPPC64XOR:      OpPPC64XORCC,
+       }
+       b := op.Block
+       opCC := b.NewValue0I(op.Pos, ccOpMap[op.Op], types.NewTuple(op.Type, types.TypeFlags), op.AuxInt)
+       opCC.AddArgs(op.Args...)
+       op.reset(OpSelect0)
+       op.AddArgs(opCC)
+       return op
+}
+
  // Convenience function to rotate a 32 bit constant value by another constant.
  func rotateLeft32(v, rotate int64) int64 {
         return int64(bits.RotateLeft32(uint32(v), int(rotate)))
  }
  
+func rotateRight64(v, rotate int64) int64 {
+       return int64(bits.RotateLeft64(uint64(v), int(-rotate)))
+}
+
  // encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format.
  func armBFAuxInt(lsb, width int64) arm64BitField {
         if lsb < 0 || lsb > 63 {
                 panic("ARM(64) bit field lsb constant out of range")
         }
-       if width < 1 || width > 64 {
+       if width < 1 || lsb+width > 64 {
                 panic("ARM(64) bit field width constant out of range")
         }
         return arm64BitField(width | lsb<<8)
@@ -1577,7 +1712,7 @@ func isARM64BFMask(lsb, mask, rshift int64) bool {
         return shiftedMask != 0 && isPowerOfTwo64(shiftedMask+1) && nto(shiftedMask)+lsb < 64
  }
  
-// returns the bitfield width of mask >> rshift for arm64 bitfield ops
+// returns the bitfield width of mask >> rshift for arm64 bitfield ops.
  func arm64BFWidth(mask, rshift int64) int64 {
         shiftedMask := int64(uint64(mask) >> uint64(rshift))
         if shiftedMask == 0 {
@@ -1596,7 +1731,7 @@ func sizeof(t interface{}) int64 {
  // a register. It assumes float64 values will always fit into registers
  // even if that isn't strictly true.
  func registerizable(b *Block, typ *types.Type) bool {
-       if typ.IsPtrShaped() || typ.IsFloat() {
+       if typ.IsPtrShaped() || typ.IsFloat() || typ.IsBoolean() {
                 return true
         }
         if typ.IsInteger() {
@@ -1611,18 +1746,18 @@ func needRaceCleanup(sym *AuxCall, v *Value) bool {
         if !f.Config.Race {
                 return false
         }
-       if !isSameCall(sym, "runtime.racefuncenter") && !isSameCall(sym, "runtime.racefuncenterfp") && !isSameCall(sym, "runtime.racefuncexit") {
+       if !isSameCall(sym, "runtime.racefuncenter") && !isSameCall(sym, "runtime.racefuncexit") {
                 return false
         }
         for _, b := range f.Blocks {
                 for _, v := range b.Values {
                         switch v.Op {
-                       case OpStaticCall:
-                               // Check for racefuncenter/racefuncenterfp will encounter racefuncexit and vice versa.
+                       case OpStaticCall, OpStaticLECall:
+                               // Check for racefuncenter will encounter racefuncexit and vice versa.
                                 // Allow calls to panic*
                                 s := v.Aux.(*AuxCall).Fn.String()
                                 switch s {
-                               case "runtime.racefuncenter", "runtime.racefuncenterfp", "runtime.racefuncexit",
+                               case "runtime.racefuncenter", "runtime.racefuncexit",
                                         "runtime.panicdivide", "runtime.panicwrap",
                                         "runtime.panicshift":
                                         continue
@@ -1632,15 +1767,20 @@ func needRaceCleanup(sym *AuxCall, v *Value) bool {
                                 return false
                         case OpPanicBounds, OpPanicExtend:
                                 // Note: these are panic generators that are ok (like the static calls above).
-                       case OpClosureCall, OpInterCall:
+                       case OpClosureCall, OpInterCall, OpClosureLECall, OpInterLECall:
                                 // We must keep the race functions if there are any other call types.
                                 return false
                         }
                 }
         }
         if isSameCall(sym, "runtime.racefuncenter") {
+               // TODO REGISTER ABI this needs to be cleaned up.
                 // If we're removing racefuncenter, remove its argument as well.
                 if v.Args[0].Op != OpStore {
+                       if v.Op == OpStaticLECall {
+                               // there is no store, yet.
+                               return true
+                       }
                         return false
                 }
                 mem := v.Args[0].Args[2]
@@ -1670,6 +1810,77 @@ func symIsROZero(sym Sym) bool {
         return true
  }
  
+// isFixed32 returns true if the int32 at offset off in symbol sym
+// is known and constant.
+func isFixed32(c *Config, sym Sym, off int64) bool {
+       return isFixed(c, sym, off, 4)
+}
+
+// isFixed returns true if the range [off,off+size] of the symbol sym
+// is known and constant.
+func isFixed(c *Config, sym Sym, off, size int64) bool {
+       lsym := sym.(*obj.LSym)
+       if lsym.Extra == nil {
+               return false
+       }
+       if _, ok := (*lsym.Extra).(*obj.TypeInfo); ok {
+               if off == 2*c.PtrSize && size == 4 {
+                       return true // type hash field
+               }
+       }
+       return false
+}
+func fixed32(c *Config, sym Sym, off int64) int32 {
+       lsym := sym.(*obj.LSym)
+       if ti, ok := (*lsym.Extra).(*obj.TypeInfo); ok {
+               if off == 2*c.PtrSize {
+                       return int32(types.TypeHash(ti.Type.(*types.Type)))
+               }
+       }
+       base.Fatalf("fixed32 data not known for %s:%d", sym, off)
+       return 0
+}
+
+// isFixedSym returns true if the contents of sym at the given offset
+// is known and is the constant address of another symbol.
+func isFixedSym(sym Sym, off int64) bool {
+       lsym := sym.(*obj.LSym)
+       switch {
+       case lsym.Type == objabi.SRODATA:
+               // itabs, dictionaries
+       default:
+               return false
+       }
+       for _, r := range lsym.R {
+               if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off && r.Add == 0 {
+                       return true
+               }
+       }
+       return false
+}
+func fixedSym(f *Func, sym Sym, off int64) Sym {
+       lsym := sym.(*obj.LSym)
+       for _, r := range lsym.R {
+               if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off {
+                       if strings.HasPrefix(r.Sym.Name, "type:") {
+                               // In case we're loading a type out of a dictionary, we need to record
+                               // that the containing function might put that type in an interface.
+                               // That information is currently recorded in relocations in the dictionary,
+                               // but if we perform this load at compile time then the dictionary
+                               // might be dead.
+                               reflectdata.MarkTypeSymUsedInInterface(r.Sym, f.fe.Func().Linksym())
+                       } else if strings.HasPrefix(r.Sym.Name, "go:itab") {
+                               // Same, but if we're using an itab we need to record that the
+                               // itab._type might be put in an interface.
+                               reflectdata.MarkTypeSymUsedInInterface(r.Sym, f.fe.Func().Linksym())
+                       }
+                       return r.Sym
+               }
+       }
+       base.Fatalf("fixedSym data not known for %s:%d", sym, off)
+       return nil
+}
+
  // read8 reads one byte from the read-only global sym at offset off.
  func read8(sym interface{}, off int64) uint8 {
         lsym := sym.(*obj.LSym)
@@ -1723,6 +1934,9 @@ func read64(sym interface{}, off int64, byteorder binary.ByteOrder) uint64 {
  
  // sequentialAddresses reports true if it can prove that x + n == y
  func sequentialAddresses(x, y *Value, n int64) bool {
+       if x == y && n == 0 {
+               return true
+       }
         if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil &&
                 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
                         x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
@@ -1752,9 +1966,11 @@ func sequentialAddresses(x, y *Value, n int64) bool {
  // We happen to match the semantics to those of arm/arm64.
  // Note that these semantics differ from x86: the carry flag has the opposite
  // sense on a subtraction!
-//   On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C.
-//   On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C.
-//    (because it does x + ^y + C).
+//
+//     On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C.
+//     On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C.
+//      (because it does x + ^y + C).
+//
  // See https://en.wikipedia.org/wiki/Carry_flag#Vs._borrow_flag
  type flagConstant uint8
  
@@ -1912,3 +2128,84 @@ func logicFlags32(x int32) flagConstant {
         fcb.N = x < 0
         return fcb.encode()
  }
+
+func makeJumpTableSym(b *Block) *obj.LSym {
+       s := base.Ctxt.Lookup(fmt.Sprintf("%s.jump%d", b.Func.fe.Func().LSym.Name, b.ID))
+       s.Set(obj.AttrDuplicateOK, true)
+       s.Set(obj.AttrLocal, true)
+       return s
+}
+
+// canRotate reports whether the architecture supports
+// rotates of integer registers with the given number of bits.
+func canRotate(c *Config, bits int64) bool {
+       if bits > c.PtrSize*8 {
+               // Don't rewrite to rotates bigger than the machine word.
+               return false
+       }
+       switch c.arch {
+       case "386", "amd64", "arm64":
+               return true
+       case "arm", "s390x", "ppc64", "ppc64le", "wasm", "loong64":
+               return bits >= 32
+       default:
+               return false
+       }
+}
+
+// isARM64bitcon reports whether a constant can be encoded into a logical instruction.
+func isARM64bitcon(x uint64) bool {
+       if x == 1<<64-1 || x == 0 {
+               return false
+       }
+       // determine the period and sign-extend a unit to 64 bits
+       switch {
+       case x != x>>32|x<<32:
+               // period is 64
+               // nothing to do
+       case x != x>>16|x<<48:
+               // period is 32
+               x = uint64(int64(int32(x)))
+       case x != x>>8|x<<56:
+               // period is 16
+               x = uint64(int64(int16(x)))
+       case x != x>>4|x<<60:
+               // period is 8
+               x = uint64(int64(int8(x)))
+       default:
+               // period is 4 or 2, always true
+               // 0001, 0010, 0100, 1000 -- 0001 rotate
+               // 0011, 0110, 1100, 1001 -- 0011 rotate
+               // 0111, 1011, 1101, 1110 -- 0111 rotate
+               // 0101, 1010             -- 01   rotate, repeat
+               return true
+       }
+       return sequenceOfOnes(x) || sequenceOfOnes(^x)
+}
+
+// sequenceOfOnes tests whether a constant is a sequence of ones in binary, with leading and trailing zeros.
+func sequenceOfOnes(x uint64) bool {
+       y := x & -x // lowest set bit of x. x is good iff x+y is a power of 2
+       y += x
+       return (y-1)&y == 0
+}
+
+// isARM64addcon reports whether x can be encoded as the immediate value in an ADD or SUB instruction.
+func isARM64addcon(v int64) bool {
+       /* uimm12 or uimm24? */
+       if v < 0 {
+               return false
+       }
+       if (v & 0xFFF) == 0 {
+               v >>= 12
+       }
+       return v <= 0xFFF
+}
+
+// setPos sets the position of v to pos, then returns true.
+// Useful for setting the result of a rewrite's position to
+// something other than the default.
+func setPos(v *Value, pos src.XPos) bool {
+       v.Pos = pos
+       return true
+}