src/cmd/compile/internal/ssa/rewrite.go

   1 // Copyright 2015 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package ssa
   6
   7 import (
   8         "cmd/compile/internal/base"
   9         "cmd/compile/internal/logopt"
  10         "cmd/compile/internal/reflectdata"
  11         "cmd/compile/internal/types"
  12         "cmd/internal/obj"
  13         "cmd/internal/obj/s390x"
  14         "cmd/internal/objabi"
  15         "cmd/internal/src"
  16         "encoding/binary"
  17         "fmt"
  18         "internal/buildcfg"
  19         "io"
  20         "math"
  21         "math/bits"
  22         "os"
  23         "path/filepath"
  24         "strings"
  25 )
  26
  27 type deadValueChoice bool
  28
  29 const (
  30         leaveDeadValues  deadValueChoice = false
  31         removeDeadValues                 = true
  32 )
  33
  34 // deadcode indicates whether rewrite should try to remove any values that become dead.
  35 func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValueChoice) {
  36         // repeat rewrites until we find no more rewrites
  37         pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block
  38         pendingLines.clear()
  39         debug := f.pass.debug
  40         if debug > 1 {
  41                 fmt.Printf("%s: rewriting for %s\n", f.pass.name, f.Name)
  42         }
  43         var iters int
  44         var states map[string]bool
  45         for {
  46                 change := false
  47                 deadChange := false
  48                 for _, b := range f.Blocks {
  49                         var b0 *Block
  50                         if debug > 1 {
  51                                 b0 = new(Block)
  52                                 *b0 = *b
  53                                 b0.Succs = append([]Edge{}, b.Succs...) // make a new copy, not aliasing
  54                         }
  55                         for i, c := range b.ControlValues() {
  56                                 for c.Op == OpCopy {
  57                                         c = c.Args[0]
  58                                         b.ReplaceControl(i, c)
  59                                 }
  60                         }
  61                         if rb(b) {
  62                                 change = true
  63                                 if debug > 1 {
  64                                         fmt.Printf("rewriting %s  ->  %s\n", b0.LongString(), b.LongString())
  65                                 }
  66                         }
  67                         for j, v := range b.Values {
  68                                 var v0 *Value
  69                                 if debug > 1 {
  70                                         v0 = new(Value)
  71                                         *v0 = *v
  72                                         v0.Args = append([]*Value{}, v.Args...) // make a new copy, not aliasing
  73                                 }
  74                                 if v.Uses == 0 && v.removeable() {
  75                                         if v.Op != OpInvalid && deadcode == removeDeadValues {
  76                                                 // Reset any values that are now unused, so that we decrement
  77                                                 // the use count of all of its arguments.
  78                                                 // Not quite a deadcode pass, because it does not handle cycles.
  79                                                 // But it should help Uses==1 rules to fire.
  80                                                 v.reset(OpInvalid)
  81                                                 deadChange = true
  82                                         }
  83                                         // No point rewriting values which aren't used.
  84                                         continue
  85                                 }
  86
  87                                 vchange := phielimValue(v)
  88                                 if vchange && debug > 1 {
  89                                         fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
  90                                 }
  91
  92                                 // Eliminate copy inputs.
  93                                 // If any copy input becomes unused, mark it
  94                                 // as invalid and discard its argument. Repeat
  95                                 // recursively on the discarded argument.
  96                                 // This phase helps remove phantom "dead copy" uses
  97                                 // of a value so that a x.Uses==1 rule condition
  98                                 // fires reliably.
  99                                 for i, a := range v.Args {
 100                                         if a.Op != OpCopy {
 101                                                 continue
 102                                         }
 103                                         aa := copySource(a)
 104                                         v.SetArg(i, aa)
 105                                         // If a, a copy, has a line boundary indicator, attempt to find a new value
 106                                         // to hold it.  The first candidate is the value that will replace a (aa),
 107                                         // if it shares the same block and line and is eligible.
 108                                         // The second option is v, which has a as an input.  Because aa is earlier in
 109                                         // the data flow, it is the better choice.
 110                                         if a.Pos.IsStmt() == src.PosIsStmt {
 111                                                 if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt {
 112                                                         aa.Pos = aa.Pos.WithIsStmt()
 113                                                 } else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt {
 114                                                         v.Pos = v.Pos.WithIsStmt()
 115                                                 } else {
 116                                                         // Record the lost line and look for a new home after all rewrites are complete.
 117                                                         // TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same
 118                                                         // line to appear in more than one block, but only one block is stored, so if both end
 119                                                         // up here, then one will be lost.
 120                                                         pendingLines.set(a.Pos, int32(a.Block.ID))
 121                                                 }
 122                                                 a.Pos = a.Pos.WithNotStmt()
 123                                         }
 124                                         vchange = true
 125                                         for a.Uses == 0 {
 126                                                 b := a.Args[0]
 127                                                 a.reset(OpInvalid)
 128                                                 a = b
 129                                         }
 130                                 }
 131                                 if vchange && debug > 1 {
 132                                         fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
 133                                 }
 134
 135                                 // apply rewrite function
 136                                 if rv(v) {
 137                                         vchange = true
 138                                         // If value changed to a poor choice for a statement boundary, move the boundary
 139                                         if v.Pos.IsStmt() == src.PosIsStmt {
 140                                                 if k := nextGoodStatementIndex(v, j, b); k != j {
 141                                                         v.Pos = v.Pos.WithNotStmt()
 142                                                         b.Values[k].Pos = b.Values[k].Pos.WithIsStmt()
 143                                                 }
 144                                         }
 145                                 }
 146
 147                                 change = change || vchange
 148                                 if vchange && debug > 1 {
 149                                         fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
 150                                 }
 151                         }
 152                 }
 153                 if !change && !deadChange {
 154                         break
 155                 }
 156                 iters++
 157                 if (iters > 1000 || debug >= 2) && change {
 158                         // We've done a suspiciously large number of rewrites (or we're in debug mode).
 159                         // As of Sep 2021, 90% of rewrites complete in 4 iterations or fewer
 160                         // and the maximum value encountered during make.bash is 12.
 161                         // Start checking for cycles. (This is too expensive to do routinely.)
 162                         // Note: we avoid this path for deadChange-only iterations, to fix #51639.
 163                         if states == nil {
 164                                 states = make(map[string]bool)
 165                         }
 166                         h := f.rewriteHash()
 167                         if _, ok := states[h]; ok {
 168                                 // We've found a cycle.
 169                                 // To diagnose it, set debug to 2 and start again,
 170                                 // so that we'll print all rules applied until we complete another cycle.
 171                                 // If debug is already >= 2, we've already done that, so it's time to crash.
 172                                 if debug < 2 {
 173                                         debug = 2
 174                                         states = make(map[string]bool)
 175                                 } else {
 176                                         f.Fatalf("rewrite cycle detected")
 177                                 }
 178                         }
 179                         states[h] = true
 180                 }
 181         }
 182         // remove clobbered values
 183         for _, b := range f.Blocks {
 184                 j := 0
 185                 for i, v := range b.Values {
 186                         vl := v.Pos
 187                         if v.Op == OpInvalid {
 188                                 if v.Pos.IsStmt() == src.PosIsStmt {
 189                                         pendingLines.set(vl, int32(b.ID))
 190                                 }
 191                                 f.freeValue(v)
 192                                 continue
 193                         }
 194                         if v.Pos.IsStmt() != src.PosNotStmt && !notStmtBoundary(v.Op) && pendingLines.get(vl) == int32(b.ID) {
 195                                 pendingLines.remove(vl)
 196                                 v.Pos = v.Pos.WithIsStmt()
 197                         }
 198                         if i != j {
 199                                 b.Values[j] = v
 200                         }
 201                         j++
 202                 }
 203                 if pendingLines.get(b.Pos) == int32(b.ID) {
 204                         b.Pos = b.Pos.WithIsStmt()
 205                         pendingLines.remove(b.Pos)
 206                 }
 207                 b.truncateValues(j)
 208         }
 209 }
 210
 211 // Common functions called from rewriting rules
 212
 213 func is64BitFloat(t *types.Type) bool {
 214         return t.Size() == 8 && t.IsFloat()
 215 }
 216
 217 func is32BitFloat(t *types.Type) bool {
 218         return t.Size() == 4 && t.IsFloat()
 219 }
 220
 221 func is64BitInt(t *types.Type) bool {
 222         return t.Size() == 8 && t.IsInteger()
 223 }
 224
 225 func is32BitInt(t *types.Type) bool {
 226         return t.Size() == 4 && t.IsInteger()
 227 }
 228
 229 func is16BitInt(t *types.Type) bool {
 230         return t.Size() == 2 && t.IsInteger()
 231 }
 232
 233 func is8BitInt(t *types.Type) bool {
 234         return t.Size() == 1 && t.IsInteger()
 235 }
 236
 237 func isPtr(t *types.Type) bool {
 238         return t.IsPtrShaped()
 239 }
 240
 241 // mergeSym merges two symbolic offsets. There is no real merging of
 242 // offsets, we just pick the non-nil one.
 243 func mergeSym(x, y Sym) Sym {
 244         if x == nil {
 245                 return y
 246         }
 247         if y == nil {
 248                 return x
 249         }
 250         panic(fmt.Sprintf("mergeSym with two non-nil syms %v %v", x, y))
 251 }
 252
 253 func canMergeSym(x, y Sym) bool {
 254         return x == nil || y == nil
 255 }
 256
 257 // canMergeLoadClobber reports whether the load can be merged into target without
 258 // invalidating the schedule.
 259 // It also checks that the other non-load argument x is something we
 260 // are ok with clobbering.
 261 func canMergeLoadClobber(target, load, x *Value) bool {
 262         // The register containing x is going to get clobbered.
 263         // Don't merge if we still need the value of x.
 264         // We don't have liveness information here, but we can
 265         // approximate x dying with:
 266         //  1) target is x's only use.
 267         //  2) target is not in a deeper loop than x.
 268         if x.Uses != 1 {
 269                 return false
 270         }
 271         loopnest := x.Block.Func.loopnest()
 272         loopnest.calculateDepths()
 273         if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
 274                 return false
 275         }
 276         return canMergeLoad(target, load)
 277 }
 278
 279 // canMergeLoad reports whether the load can be merged into target without
 280 // invalidating the schedule.
 281 func canMergeLoad(target, load *Value) bool {
 282         if target.Block.ID != load.Block.ID {
 283                 // If the load is in a different block do not merge it.
 284                 return false
 285         }
 286
 287         // We can't merge the load into the target if the load
 288         // has more than one use.
 289         if load.Uses != 1 {
 290                 return false
 291         }
 292
 293         mem := load.MemoryArg()
 294
 295         // We need the load's memory arg to still be alive at target. That
 296         // can't be the case if one of target's args depends on a memory
 297         // state that is a successor of load's memory arg.
 298         //
 299         // For example, it would be invalid to merge load into target in
 300         // the following situation because newmem has killed oldmem
 301         // before target is reached:
 302         //     load = read ... oldmem
 303         //   newmem = write ... oldmem
 304         //     arg0 = read ... newmem
 305         //   target = add arg0 load
 306         //
 307         // If the argument comes from a different block then we can exclude
 308         // it immediately because it must dominate load (which is in the
 309         // same block as target).
 310         var args []*Value
 311         for _, a := range target.Args {
 312                 if a != load && a.Block.ID == target.Block.ID {
 313                         args = append(args, a)
 314                 }
 315         }
 316
 317         // memPreds contains memory states known to be predecessors of load's
 318         // memory state. It is lazily initialized.
 319         var memPreds map[*Value]bool
 320         for i := 0; len(args) > 0; i++ {
 321                 const limit = 100
 322                 if i >= limit {
 323                         // Give up if we have done a lot of iterations.
 324                         return false
 325                 }
 326                 v := args[len(args)-1]
 327                 args = args[:len(args)-1]
 328                 if target.Block.ID != v.Block.ID {
 329                         // Since target and load are in the same block
 330                         // we can stop searching when we leave the block.
 331                         continue
 332                 }
 333                 if v.Op == OpPhi {
 334                         // A Phi implies we have reached the top of the block.
 335                         // The memory phi, if it exists, is always
 336                         // the first logical store in the block.
 337                         continue
 338                 }
 339                 if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
 340                         // We could handle this situation however it is likely
 341                         // to be very rare.
 342                         return false
 343                 }
 344                 if v.Op.SymEffect()&SymAddr != 0 {
 345                         // This case prevents an operation that calculates the
 346                         // address of a local variable from being forced to schedule
 347                         // before its corresponding VarDef.
 348                         // See issue 28445.
 349                         //   v1 = LOAD ...
 350                         //   v2 = VARDEF
 351                         //   v3 = LEAQ
 352                         //   v4 = CMPQ v1 v3
 353                         // We don't want to combine the CMPQ with the load, because
 354                         // that would force the CMPQ to schedule before the VARDEF, which
 355                         // in turn requires the LEAQ to schedule before the VARDEF.
 356                         return false
 357                 }
 358                 if v.Type.IsMemory() {
 359                         if memPreds == nil {
 360                                 // Initialise a map containing memory states
 361                                 // known to be predecessors of load's memory
 362                                 // state.
 363                                 memPreds = make(map[*Value]bool)
 364                                 m := mem
 365                                 const limit = 50
 366                                 for i := 0; i < limit; i++ {
 367                                         if m.Op == OpPhi {
 368                                                 // The memory phi, if it exists, is always
 369                                                 // the first logical store in the block.
 370                                                 break
 371                                         }
 372                                         if m.Block.ID != target.Block.ID {
 373                                                 break
 374                                         }
 375                                         if !m.Type.IsMemory() {
 376                                                 break
 377                                         }
 378                                         memPreds[m] = true
 379                                         if len(m.Args) == 0 {
 380                                                 break
 381                                         }
 382                                         m = m.MemoryArg()
 383                                 }
 384                         }
 385
 386                         // We can merge if v is a predecessor of mem.
 387                         //
 388                         // For example, we can merge load into target in the
 389                         // following scenario:
 390                         //      x = read ... v
 391                         //    mem = write ... v
 392                         //   load = read ... mem
 393                         // target = add x load
 394                         if memPreds[v] {
 395                                 continue
 396                         }
 397                         return false
 398                 }
 399                 if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
 400                         // If v takes mem as an input then we know mem
 401                         // is valid at this point.
 402                         continue
 403                 }
 404                 for _, a := range v.Args {
 405                         if target.Block.ID == a.Block.ID {
 406                                 args = append(args, a)
 407                         }
 408                 }
 409         }
 410
 411         return true
 412 }
 413
 414 // isSameCall reports whether sym is the same as the given named symbol.
 415 func isSameCall(sym interface{}, name string) bool {
 416         fn := sym.(*AuxCall).Fn
 417         return fn != nil && fn.String() == name
 418 }
 419
 420 // canLoadUnaligned reports if the architecture supports unaligned load operations.
 421 func canLoadUnaligned(c *Config) bool {
 422         return c.ctxt.Arch.Alignment == 1
 423 }
 424
 425 // nlzX returns the number of leading zeros.
 426 func nlz64(x int64) int { return bits.LeadingZeros64(uint64(x)) }
 427 func nlz32(x int32) int { return bits.LeadingZeros32(uint32(x)) }
 428 func nlz16(x int16) int { return bits.LeadingZeros16(uint16(x)) }
 429 func nlz8(x int8) int   { return bits.LeadingZeros8(uint8(x)) }
 430
 431 // ntzX returns the number of trailing zeros.
 432 func ntz64(x int64) int { return bits.TrailingZeros64(uint64(x)) }
 433 func ntz32(x int32) int { return bits.TrailingZeros32(uint32(x)) }
 434 func ntz16(x int16) int { return bits.TrailingZeros16(uint16(x)) }
 435 func ntz8(x int8) int   { return bits.TrailingZeros8(uint8(x)) }
 436
 437 func oneBit(x int64) bool   { return x&(x-1) == 0 && x != 0 }
 438 func oneBit8(x int8) bool   { return x&(x-1) == 0 && x != 0 }
 439 func oneBit16(x int16) bool { return x&(x-1) == 0 && x != 0 }
 440 func oneBit32(x int32) bool { return x&(x-1) == 0 && x != 0 }
 441 func oneBit64(x int64) bool { return x&(x-1) == 0 && x != 0 }
 442
 443 // nto returns the number of trailing ones.
 444 func nto(x int64) int64 {
 445         return int64(ntz64(^x))
 446 }
 447
 448 // logX returns logarithm of n base 2.
 449 // n must be a positive power of 2 (isPowerOfTwoX returns true).
 450 func log8(n int8) int64 {
 451         return int64(bits.Len8(uint8(n))) - 1
 452 }
 453 func log16(n int16) int64 {
 454         return int64(bits.Len16(uint16(n))) - 1
 455 }
 456 func log32(n int32) int64 {
 457         return int64(bits.Len32(uint32(n))) - 1
 458 }
 459 func log64(n int64) int64 {
 460         return int64(bits.Len64(uint64(n))) - 1
 461 }
 462
 463 // log2uint32 returns logarithm in base 2 of uint32(n), with log2(0) = -1.
 464 // Rounds down.
 465 func log2uint32(n int64) int64 {
 466         return int64(bits.Len32(uint32(n))) - 1
 467 }
 468
 469 // isPowerOfTwoX functions report whether n is a power of 2.
 470 func isPowerOfTwo8(n int8) bool {
 471         return n > 0 && n&(n-1) == 0
 472 }
 473 func isPowerOfTwo16(n int16) bool {
 474         return n > 0 && n&(n-1) == 0
 475 }
 476 func isPowerOfTwo32(n int32) bool {
 477         return n > 0 && n&(n-1) == 0
 478 }
 479 func isPowerOfTwo64(n int64) bool {
 480         return n > 0 && n&(n-1) == 0
 481 }
 482
 483 // isUint64PowerOfTwo reports whether uint64(n) is a power of 2.
 484 func isUint64PowerOfTwo(in int64) bool {
 485         n := uint64(in)
 486         return n > 0 && n&(n-1) == 0
 487 }
 488
 489 // isUint32PowerOfTwo reports whether uint32(n) is a power of 2.
 490 func isUint32PowerOfTwo(in int64) bool {
 491         n := uint64(uint32(in))
 492         return n > 0 && n&(n-1) == 0
 493 }
 494
 495 // is32Bit reports whether n can be represented as a signed 32 bit integer.
 496 func is32Bit(n int64) bool {
 497         return n == int64(int32(n))
 498 }
 499
 500 // is16Bit reports whether n can be represented as a signed 16 bit integer.
 501 func is16Bit(n int64) bool {
 502         return n == int64(int16(n))
 503 }
 504
 505 // is8Bit reports whether n can be represented as a signed 8 bit integer.
 506 func is8Bit(n int64) bool {
 507         return n == int64(int8(n))
 508 }
 509
 510 // isU8Bit reports whether n can be represented as an unsigned 8 bit integer.
 511 func isU8Bit(n int64) bool {
 512         return n == int64(uint8(n))
 513 }
 514
 515 // isU12Bit reports whether n can be represented as an unsigned 12 bit integer.
 516 func isU12Bit(n int64) bool {
 517         return 0 <= n && n < (1<<12)
 518 }
 519
 520 // isU16Bit reports whether n can be represented as an unsigned 16 bit integer.
 521 func isU16Bit(n int64) bool {
 522         return n == int64(uint16(n))
 523 }
 524
 525 // isU32Bit reports whether n can be represented as an unsigned 32 bit integer.
 526 func isU32Bit(n int64) bool {
 527         return n == int64(uint32(n))
 528 }
 529
 530 // is20Bit reports whether n can be represented as a signed 20 bit integer.
 531 func is20Bit(n int64) bool {
 532         return -(1<<19) <= n && n < (1<<19)
 533 }
 534
 535 // b2i translates a boolean value to 0 or 1 for assigning to auxInt.
 536 func b2i(b bool) int64 {
 537         if b {
 538                 return 1
 539         }
 540         return 0
 541 }
 542
 543 // b2i32 translates a boolean value to 0 or 1.
 544 func b2i32(b bool) int32 {
 545         if b {
 546                 return 1
 547         }
 548         return 0
 549 }
 550
 551 // shiftIsBounded reports whether (left/right) shift Value v is known to be bounded.
 552 // A shift is bounded if it is shifting by less than the width of the shifted value.
 553 func shiftIsBounded(v *Value) bool {
 554         return v.AuxInt != 0
 555 }
 556
 557 // canonLessThan returns whether x is "ordered" less than y, for purposes of normalizing
 558 // generated code as much as possible.
 559 func canonLessThan(x, y *Value) bool {
 560         if x.Op != y.Op {
 561                 return x.Op < y.Op
 562         }
 563         if !x.Pos.SameFileAndLine(y.Pos) {
 564                 return x.Pos.Before(y.Pos)
 565         }
 566         return x.ID < y.ID
 567 }
 568
 569 // truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern
 570 // of the mantissa. It will panic if the truncation results in lost information.
 571 func truncate64Fto32F(f float64) float32 {
 572         if !isExactFloat32(f) {
 573                 panic("truncate64Fto32F: truncation is not exact")
 574         }
 575         if !math.IsNaN(f) {
 576                 return float32(f)
 577         }
 578         // NaN bit patterns aren't necessarily preserved across conversion
 579         // instructions so we need to do the conversion manually.
 580         b := math.Float64bits(f)
 581         m := b & ((1 << 52) - 1) // mantissa (a.k.a. significand)
 582         //          | sign                  | exponent   | mantissa       |
 583         r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23)))
 584         return math.Float32frombits(r)
 585 }
 586
 587 // extend32Fto64F converts a float32 value to a float64 value preserving the bit
 588 // pattern of the mantissa.
 589 func extend32Fto64F(f float32) float64 {
 590         if !math.IsNaN(float64(f)) {
 591                 return float64(f)
 592         }
 593         // NaN bit patterns aren't necessarily preserved across conversion
 594         // instructions so we need to do the conversion manually.
 595         b := uint64(math.Float32bits(f))
 596         //   | sign                  | exponent      | mantissa                    |
 597         r := ((b << 32) & (1 << 63)) | (0x7ff << 52) | ((b & 0x7fffff) << (52 - 23))
 598         return math.Float64frombits(r)
 599 }
 600
 601 // DivisionNeedsFixUp reports whether the division needs fix-up code.
 602 func DivisionNeedsFixUp(v *Value) bool {
 603         return v.AuxInt == 0
 604 }
 605
 606 // auxFrom64F encodes a float64 value so it can be stored in an AuxInt.
 607 func auxFrom64F(f float64) int64 {
 608         if f != f {
 609                 panic("can't encode a NaN in AuxInt field")
 610         }
 611         return int64(math.Float64bits(f))
 612 }
 613
 614 // auxFrom32F encodes a float32 value so it can be stored in an AuxInt.
 615 func auxFrom32F(f float32) int64 {
 616         if f != f {
 617                 panic("can't encode a NaN in AuxInt field")
 618         }
 619         return int64(math.Float64bits(extend32Fto64F(f)))
 620 }
 621
 622 // auxTo32F decodes a float32 from the AuxInt value provided.
 623 func auxTo32F(i int64) float32 {
 624         return truncate64Fto32F(math.Float64frombits(uint64(i)))
 625 }
 626
 627 // auxTo64F decodes a float64 from the AuxInt value provided.
 628 func auxTo64F(i int64) float64 {
 629         return math.Float64frombits(uint64(i))
 630 }
 631
 632 func auxIntToBool(i int64) bool {
 633         if i == 0 {
 634                 return false
 635         }
 636         return true
 637 }
 638 func auxIntToInt8(i int64) int8 {
 639         return int8(i)
 640 }
 641 func auxIntToInt16(i int64) int16 {
 642         return int16(i)
 643 }
 644 func auxIntToInt32(i int64) int32 {
 645         return int32(i)
 646 }
 647 func auxIntToInt64(i int64) int64 {
 648         return i
 649 }
 650 func auxIntToUint8(i int64) uint8 {
 651         return uint8(i)
 652 }
 653 func auxIntToFloat32(i int64) float32 {
 654         return float32(math.Float64frombits(uint64(i)))
 655 }
 656 func auxIntToFloat64(i int64) float64 {
 657         return math.Float64frombits(uint64(i))
 658 }
 659 func auxIntToValAndOff(i int64) ValAndOff {
 660         return ValAndOff(i)
 661 }
 662 func auxIntToArm64BitField(i int64) arm64BitField {
 663         return arm64BitField(i)
 664 }
 665 func auxIntToInt128(x int64) int128 {
 666         if x != 0 {
 667                 panic("nonzero int128 not allowed")
 668         }
 669         return 0
 670 }
 671 func auxIntToFlagConstant(x int64) flagConstant {
 672         return flagConstant(x)
 673 }
 674
 675 func auxIntToOp(cc int64) Op {
 676         return Op(cc)
 677 }
 678
 679 func boolToAuxInt(b bool) int64 {
 680         if b {
 681                 return 1
 682         }
 683         return 0
 684 }
 685 func int8ToAuxInt(i int8) int64 {
 686         return int64(i)
 687 }
 688 func int16ToAuxInt(i int16) int64 {
 689         return int64(i)
 690 }
 691 func int32ToAuxInt(i int32) int64 {
 692         return int64(i)
 693 }
 694 func int64ToAuxInt(i int64) int64 {
 695         return int64(i)
 696 }
 697 func uint8ToAuxInt(i uint8) int64 {
 698         return int64(int8(i))
 699 }
 700 func float32ToAuxInt(f float32) int64 {
 701         return int64(math.Float64bits(float64(f)))
 702 }
 703 func float64ToAuxInt(f float64) int64 {
 704         return int64(math.Float64bits(f))
 705 }
 706 func valAndOffToAuxInt(v ValAndOff) int64 {
 707         return int64(v)
 708 }
 709 func arm64BitFieldToAuxInt(v arm64BitField) int64 {
 710         return int64(v)
 711 }
 712 func int128ToAuxInt(x int128) int64 {
 713         if x != 0 {
 714                 panic("nonzero int128 not allowed")
 715         }
 716         return 0
 717 }
 718 func flagConstantToAuxInt(x flagConstant) int64 {
 719         return int64(x)
 720 }
 721
 722 func opToAuxInt(o Op) int64 {
 723         return int64(o)
 724 }
 725
 726 // Aux is an interface to hold miscellaneous data in Blocks and Values.
 727 type Aux interface {
 728         CanBeAnSSAAux()
 729 }
 730
 731 // for now only used to mark moves that need to avoid clobbering flags
 732 type auxMark bool
 733
 734 func (auxMark) CanBeAnSSAAux() {}
 735
 736 var AuxMark auxMark
 737
 738 // stringAux wraps string values for use in Aux.
 739 type stringAux string
 740
 741 func (stringAux) CanBeAnSSAAux() {}
 742
 743 func auxToString(i Aux) string {
 744         return string(i.(stringAux))
 745 }
 746 func auxToSym(i Aux) Sym {
 747         // TODO: kind of a hack - allows nil interface through
 748         s, _ := i.(Sym)
 749         return s
 750 }
 751 func auxToType(i Aux) *types.Type {
 752         return i.(*types.Type)
 753 }
 754 func auxToCall(i Aux) *AuxCall {
 755         return i.(*AuxCall)
 756 }
 757 func auxToS390xCCMask(i Aux) s390x.CCMask {
 758         return i.(s390x.CCMask)
 759 }
 760 func auxToS390xRotateParams(i Aux) s390x.RotateParams {
 761         return i.(s390x.RotateParams)
 762 }
 763
 764 func StringToAux(s string) Aux {
 765         return stringAux(s)
 766 }
 767 func symToAux(s Sym) Aux {
 768         return s
 769 }
 770 func callToAux(s *AuxCall) Aux {
 771         return s
 772 }
 773 func typeToAux(t *types.Type) Aux {
 774         return t
 775 }
 776 func s390xCCMaskToAux(c s390x.CCMask) Aux {
 777         return c
 778 }
 779 func s390xRotateParamsToAux(r s390x.RotateParams) Aux {
 780         return r
 781 }
 782
 783 // uaddOvf reports whether unsigned a+b would overflow.
 784 func uaddOvf(a, b int64) bool {
 785         return uint64(a)+uint64(b) < uint64(a)
 786 }
 787
 788 // loadLSymOffset simulates reading a word at an offset into a
 789 // read-only symbol's runtime memory. If it would read a pointer to
 790 // another symbol, that symbol is returned. Otherwise, it returns nil.
 791 func loadLSymOffset(lsym *obj.LSym, offset int64) *obj.LSym {
 792         if lsym.Type != objabi.SRODATA {
 793                 return nil
 794         }
 795
 796         for _, r := range lsym.R {
 797                 if int64(r.Off) == offset && r.Type&^objabi.R_WEAK == objabi.R_ADDR && r.Add == 0 {
 798                         return r.Sym
 799                 }
 800         }
 801
 802         return nil
 803 }
 804
 805 func devirtLECall(v *Value, sym *obj.LSym) *Value {
 806         v.Op = OpStaticLECall
 807         auxcall := v.Aux.(*AuxCall)
 808         auxcall.Fn = sym
 809         // Remove first arg
 810         v.Args[0].Uses--
 811         copy(v.Args[0:], v.Args[1:])
 812         v.Args[len(v.Args)-1] = nil // aid GC
 813         v.Args = v.Args[:len(v.Args)-1]
 814         if f := v.Block.Func; f.pass.debug > 0 {
 815                 f.Warnl(v.Pos, "de-virtualizing call")
 816         }
 817         return v
 818 }
 819
 820 // isSamePtr reports whether p1 and p2 point to the same address.
 821 func isSamePtr(p1, p2 *Value) bool {
 822         if p1 == p2 {
 823                 return true
 824         }
 825         if p1.Op != p2.Op {
 826                 return false
 827         }
 828         switch p1.Op {
 829         case OpOffPtr:
 830                 return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
 831         case OpAddr, OpLocalAddr:
 832                 return p1.Aux == p2.Aux
 833         case OpAddPtr:
 834                 return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
 835         }
 836         return false
 837 }
 838
 839 func isStackPtr(v *Value) bool {
 840         for v.Op == OpOffPtr || v.Op == OpAddPtr {
 841                 v = v.Args[0]
 842         }
 843         return v.Op == OpSP || v.Op == OpLocalAddr
 844 }
 845
 846 // disjoint reports whether the memory region specified by [p1:p1+n1)
 847 // does not overlap with [p2:p2+n2).
 848 // A return value of false does not imply the regions overlap.
 849 func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool {
 850         if n1 == 0 || n2 == 0 {
 851                 return true
 852         }
 853         if p1 == p2 {
 854                 return false
 855         }
 856         baseAndOffset := func(ptr *Value) (base *Value, offset int64) {
 857                 base, offset = ptr, 0
 858                 for base.Op == OpOffPtr {
 859                         offset += base.AuxInt
 860                         base = base.Args[0]
 861                 }
 862                 return base, offset
 863         }
 864         p1, off1 := baseAndOffset(p1)
 865         p2, off2 := baseAndOffset(p2)
 866         if isSamePtr(p1, p2) {
 867                 return !overlap(off1, n1, off2, n2)
 868         }
 869         // p1 and p2 are not the same, so if they are both OpAddrs then
 870         // they point to different variables.
 871         // If one pointer is on the stack and the other is an argument
 872         // then they can't overlap.
 873         switch p1.Op {
 874         case OpAddr, OpLocalAddr:
 875                 if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP {
 876                         return true
 877                 }
 878                 return (p2.Op == OpArg || p2.Op == OpArgIntReg) && p1.Args[0].Op == OpSP
 879         case OpArg, OpArgIntReg:
 880                 if p2.Op == OpSP || p2.Op == OpLocalAddr {
 881                         return true
 882                 }
 883         case OpSP:
 884                 return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpArgIntReg || p2.Op == OpSP
 885         }
 886         return false
 887 }
 888
 889 // moveSize returns the number of bytes an aligned MOV instruction moves.
 890 func moveSize(align int64, c *Config) int64 {
 891         switch {
 892         case align%8 == 0 && c.PtrSize == 8:
 893                 return 8
 894         case align%4 == 0:
 895                 return 4
 896         case align%2 == 0:
 897                 return 2
 898         }
 899         return 1
 900 }
 901
 902 // mergePoint finds a block among a's blocks which dominates b and is itself
 903 // dominated by all of a's blocks. Returns nil if it can't find one.
 904 // Might return nil even if one does exist.
 905 func mergePoint(b *Block, a ...*Value) *Block {
 906         // Walk backward from b looking for one of the a's blocks.
 907
 908         // Max distance
 909         d := 100
 910
 911         for d > 0 {
 912                 for _, x := range a {
 913                         if b == x.Block {
 914                                 goto found
 915                         }
 916                 }
 917                 if len(b.Preds) > 1 {
 918                         // Don't know which way to go back. Abort.
 919                         return nil
 920                 }
 921                 b = b.Preds[0].b
 922                 d--
 923         }
 924         return nil // too far away
 925 found:
 926         // At this point, r is the first value in a that we find by walking backwards.
 927         // if we return anything, r will be it.
 928         r := b
 929
 930         // Keep going, counting the other a's that we find. They must all dominate r.
 931         na := 0
 932         for d > 0 {
 933                 for _, x := range a {
 934                         if b == x.Block {
 935                                 na++
 936                         }
 937                 }
 938                 if na == len(a) {
 939                         // Found all of a in a backwards walk. We can return r.
 940                         return r
 941                 }
 942                 if len(b.Preds) > 1 {
 943                         return nil
 944                 }
 945                 b = b.Preds[0].b
 946                 d--
 947
 948         }
 949         return nil // too far away
 950 }
 951
 952 // clobber invalidates values. Returns true.
 953 // clobber is used by rewrite rules to:
 954 //
 955 //      A) make sure the values are really dead and never used again.
 956 //      B) decrement use counts of the values' args.
 957 func clobber(vv ...*Value) bool {
 958         for _, v := range vv {
 959                 v.reset(OpInvalid)
 960                 // Note: leave v.Block intact.  The Block field is used after clobber.
 961         }
 962         return true
 963 }
 964
 965 // clobberIfDead resets v when use count is 1. Returns true.
 966 // clobberIfDead is used by rewrite rules to decrement
 967 // use counts of v's args when v is dead and never used.
 968 func clobberIfDead(v *Value) bool {
 969         if v.Uses == 1 {
 970                 v.reset(OpInvalid)
 971         }
 972         // Note: leave v.Block intact.  The Block field is used after clobberIfDead.
 973         return true
 974 }
 975
 976 // noteRule is an easy way to track if a rule is matched when writing
 977 // new ones.  Make the rule of interest also conditional on
 978 //
 979 //      noteRule("note to self: rule of interest matched")
 980 //
 981 // and that message will print when the rule matches.
 982 func noteRule(s string) bool {
 983         fmt.Println(s)
 984         return true
 985 }
 986
 987 // countRule increments Func.ruleMatches[key].
 988 // If Func.ruleMatches is non-nil at the end
 989 // of compilation, it will be printed to stdout.
 990 // This is intended to make it easier to find which functions
 991 // which contain lots of rules matches when developing new rules.
 992 func countRule(v *Value, key string) bool {
 993         f := v.Block.Func
 994         if f.ruleMatches == nil {
 995                 f.ruleMatches = make(map[string]int)
 996         }
 997         f.ruleMatches[key]++
 998         return true
 999 }
1000
1001 // warnRule generates compiler debug output with string s when
1002 // v is not in autogenerated code, cond is true and the rule has fired.
1003 func warnRule(cond bool, v *Value, s string) bool {
1004         if pos := v.Pos; pos.Line() > 1 && cond {
1005                 v.Block.Func.Warnl(pos, s)
1006         }
1007         return true
1008 }
1009
1010 // for a pseudo-op like (LessThan x), extract x.
1011 func flagArg(v *Value) *Value {
1012         if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() {
1013                 return nil
1014         }
1015         return v.Args[0]
1016 }
1017
1018 // arm64Negate finds the complement to an ARM64 condition code,
1019 // for example !Equal -> NotEqual or !LessThan -> GreaterEqual
1020 //
1021 // For floating point, it's more subtle because NaN is unordered. We do
1022 // !LessThanF -> NotLessThanF, the latter takes care of NaNs.
1023 func arm64Negate(op Op) Op {
1024         switch op {
1025         case OpARM64LessThan:
1026                 return OpARM64GreaterEqual
1027         case OpARM64LessThanU:
1028                 return OpARM64GreaterEqualU
1029         case OpARM64GreaterThan:
1030                 return OpARM64LessEqual
1031         case OpARM64GreaterThanU:
1032                 return OpARM64LessEqualU
1033         case OpARM64LessEqual:
1034                 return OpARM64GreaterThan
1035         case OpARM64LessEqualU:
1036                 return OpARM64GreaterThanU
1037         case OpARM64GreaterEqual:
1038                 return OpARM64LessThan
1039         case OpARM64GreaterEqualU:
1040                 return OpARM64LessThanU
1041         case OpARM64Equal:
1042                 return OpARM64NotEqual
1043         case OpARM64NotEqual:
1044                 return OpARM64Equal
1045         case OpARM64LessThanF:
1046                 return OpARM64NotLessThanF
1047         case OpARM64NotLessThanF:
1048                 return OpARM64LessThanF
1049         case OpARM64LessEqualF:
1050                 return OpARM64NotLessEqualF
1051         case OpARM64NotLessEqualF:
1052                 return OpARM64LessEqualF
1053         case OpARM64GreaterThanF:
1054                 return OpARM64NotGreaterThanF
1055         case OpARM64NotGreaterThanF:
1056                 return OpARM64GreaterThanF
1057         case OpARM64GreaterEqualF:
1058                 return OpARM64NotGreaterEqualF
1059         case OpARM64NotGreaterEqualF:
1060                 return OpARM64GreaterEqualF
1061         default:
1062                 panic("unreachable")
1063         }
1064 }
1065
1066 // arm64Invert evaluates (InvertFlags op), which
1067 // is the same as altering the condition codes such
1068 // that the same result would be produced if the arguments
1069 // to the flag-generating instruction were reversed, e.g.
1070 // (InvertFlags (CMP x y)) -> (CMP y x)
1071 func arm64Invert(op Op) Op {
1072         switch op {
1073         case OpARM64LessThan:
1074                 return OpARM64GreaterThan
1075         case OpARM64LessThanU:
1076                 return OpARM64GreaterThanU
1077         case OpARM64GreaterThan:
1078                 return OpARM64LessThan
1079         case OpARM64GreaterThanU:
1080                 return OpARM64LessThanU
1081         case OpARM64LessEqual:
1082                 return OpARM64GreaterEqual
1083         case OpARM64LessEqualU:
1084                 return OpARM64GreaterEqualU
1085         case OpARM64GreaterEqual:
1086                 return OpARM64LessEqual
1087         case OpARM64GreaterEqualU:
1088                 return OpARM64LessEqualU
1089         case OpARM64Equal, OpARM64NotEqual:
1090                 return op
1091         case OpARM64LessThanF:
1092                 return OpARM64GreaterThanF
1093         case OpARM64GreaterThanF:
1094                 return OpARM64LessThanF
1095         case OpARM64LessEqualF:
1096                 return OpARM64GreaterEqualF
1097         case OpARM64GreaterEqualF:
1098                 return OpARM64LessEqualF
1099         case OpARM64NotLessThanF:
1100                 return OpARM64NotGreaterThanF
1101         case OpARM64NotGreaterThanF:
1102                 return OpARM64NotLessThanF
1103         case OpARM64NotLessEqualF:
1104                 return OpARM64NotGreaterEqualF
1105         case OpARM64NotGreaterEqualF:
1106                 return OpARM64NotLessEqualF
1107         default:
1108                 panic("unreachable")
1109         }
1110 }
1111
1112 // evaluate an ARM64 op against a flags value
1113 // that is potentially constant; return 1 for true,
1114 // -1 for false, and 0 for not constant.
1115 func ccARM64Eval(op Op, flags *Value) int {
1116         fop := flags.Op
1117         if fop == OpARM64InvertFlags {
1118                 return -ccARM64Eval(op, flags.Args[0])
1119         }
1120         if fop != OpARM64FlagConstant {
1121                 return 0
1122         }
1123         fc := flagConstant(flags.AuxInt)
1124         b2i := func(b bool) int {
1125                 if b {
1126                         return 1
1127                 }
1128                 return -1
1129         }
1130         switch op {
1131         case OpARM64Equal:
1132                 return b2i(fc.eq())
1133         case OpARM64NotEqual:
1134                 return b2i(fc.ne())
1135         case OpARM64LessThan:
1136                 return b2i(fc.lt())
1137         case OpARM64LessThanU:
1138                 return b2i(fc.ult())
1139         case OpARM64GreaterThan:
1140                 return b2i(fc.gt())
1141         case OpARM64GreaterThanU:
1142                 return b2i(fc.ugt())
1143         case OpARM64LessEqual:
1144                 return b2i(fc.le())
1145         case OpARM64LessEqualU:
1146                 return b2i(fc.ule())
1147         case OpARM64GreaterEqual:
1148                 return b2i(fc.ge())
1149         case OpARM64GreaterEqualU:
1150                 return b2i(fc.uge())
1151         }
1152         return 0
1153 }
1154
1155 // logRule logs the use of the rule s. This will only be enabled if
1156 // rewrite rules were generated with the -log option, see _gen/rulegen.go.
1157 func logRule(s string) {
1158         if ruleFile == nil {
1159                 // Open a log file to write log to. We open in append
1160                 // mode because all.bash runs the compiler lots of times,
1161                 // and we want the concatenation of all of those logs.
1162                 // This means, of course, that users need to rm the old log
1163                 // to get fresh data.
1164                 // TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow?
1165                 w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"),
1166                         os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
1167                 if err != nil {
1168                         panic(err)
1169                 }
1170                 ruleFile = w
1171         }
1172         _, err := fmt.Fprintln(ruleFile, s)
1173         if err != nil {
1174                 panic(err)
1175         }
1176 }
1177
1178 var ruleFile io.Writer
1179
1180 func min(x, y int64) int64 {
1181         if x < y {
1182                 return x
1183         }
1184         return y
1185 }
1186 func max(x, y int64) int64 {
1187         if x > y {
1188                 return x
1189         }
1190         return y
1191 }
1192
1193 func isConstZero(v *Value) bool {
1194         switch v.Op {
1195         case OpConstNil:
1196                 return true
1197         case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F:
1198                 return v.AuxInt == 0
1199         }
1200         return false
1201 }
1202
1203 // reciprocalExact64 reports whether 1/c is exactly representable.
1204 func reciprocalExact64(c float64) bool {
1205         b := math.Float64bits(c)
1206         man := b & (1<<52 - 1)
1207         if man != 0 {
1208                 return false // not a power of 2, denormal, or NaN
1209         }
1210         exp := b >> 52 & (1<<11 - 1)
1211         // exponent bias is 0x3ff.  So taking the reciprocal of a number
1212         // changes the exponent to 0x7fe-exp.
1213         switch exp {
1214         case 0:
1215                 return false // ±0
1216         case 0x7ff:
1217                 return false // ±inf
1218         case 0x7fe:
1219                 return false // exponent is not representable
1220         default:
1221                 return true
1222         }
1223 }
1224
1225 // reciprocalExact32 reports whether 1/c is exactly representable.
1226 func reciprocalExact32(c float32) bool {
1227         b := math.Float32bits(c)
1228         man := b & (1<<23 - 1)
1229         if man != 0 {
1230                 return false // not a power of 2, denormal, or NaN
1231         }
1232         exp := b >> 23 & (1<<8 - 1)
1233         // exponent bias is 0x7f.  So taking the reciprocal of a number
1234         // changes the exponent to 0xfe-exp.
1235         switch exp {
1236         case 0:
1237                 return false // ±0
1238         case 0xff:
1239                 return false // ±inf
1240         case 0xfe:
1241                 return false // exponent is not representable
1242         default:
1243                 return true
1244         }
1245 }
1246
1247 // check if an immediate can be directly encoded into an ARM's instruction.
1248 func isARMImmRot(v uint32) bool {
1249         for i := 0; i < 16; i++ {
1250                 if v&^0xff == 0 {
1251                         return true
1252                 }
1253                 v = v<<2 | v>>30
1254         }
1255
1256         return false
1257 }
1258
1259 // overlap reports whether the ranges given by the given offset and
1260 // size pairs overlap.
1261 func overlap(offset1, size1, offset2, size2 int64) bool {
1262         if offset1 >= offset2 && offset2+size2 > offset1 {
1263                 return true
1264         }
1265         if offset2 >= offset1 && offset1+size1 > offset2 {
1266                 return true
1267         }
1268         return false
1269 }
1270
1271 func areAdjacentOffsets(off1, off2, size int64) bool {
1272         return off1+size == off2 || off1 == off2+size
1273 }
1274
1275 // check if value zeroes out upper 32-bit of 64-bit register.
1276 // depth limits recursion depth. In AMD64.rules 3 is used as limit,
1277 // because it catches same amount of cases as 4.
1278 func zeroUpper32Bits(x *Value, depth int) bool {
1279         switch x.Op {
1280         case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1,
1281                 OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1,
1282                 OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload,
1283                 OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL,
1284                 OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst,
1285                 OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst,
1286                 OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL,
1287                 OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst,
1288                 OpAMD64SHLL, OpAMD64SHLLconst:
1289                 return true
1290         case OpARM64REV16W, OpARM64REVW, OpARM64RBITW, OpARM64CLZW, OpARM64EXTRWconst,
1291                 OpARM64MULW, OpARM64MNEGW, OpARM64UDIVW, OpARM64DIVW, OpARM64UMODW,
1292                 OpARM64MADDW, OpARM64MSUBW, OpARM64RORW, OpARM64RORWconst:
1293                 return true
1294         case OpArg:
1295                 return x.Type.Size() == 4
1296         case OpPhi, OpSelect0, OpSelect1:
1297                 // Phis can use each-other as an arguments, instead of tracking visited values,
1298                 // just limit recursion depth.
1299                 if depth <= 0 {
1300                         return false
1301                 }
1302                 for i := range x.Args {
1303                         if !zeroUpper32Bits(x.Args[i], depth-1) {
1304                                 return false
1305                         }
1306                 }
1307                 return true
1308
1309         }
1310         return false
1311 }
1312
1313 // zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits.
1314 func zeroUpper48Bits(x *Value, depth int) bool {
1315         switch x.Op {
1316         case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2:
1317                 return true
1318         case OpArg:
1319                 return x.Type.Size() == 2
1320         case OpPhi, OpSelect0, OpSelect1:
1321                 // Phis can use each-other as an arguments, instead of tracking visited values,
1322                 // just limit recursion depth.
1323                 if depth <= 0 {
1324                         return false
1325                 }
1326                 for i := range x.Args {
1327                         if !zeroUpper48Bits(x.Args[i], depth-1) {
1328                                 return false
1329                         }
1330                 }
1331                 return true
1332
1333         }
1334         return false
1335 }
1336
1337 // zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits.
1338 func zeroUpper56Bits(x *Value, depth int) bool {
1339         switch x.Op {
1340         case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1:
1341                 return true
1342         case OpArg:
1343                 return x.Type.Size() == 1
1344         case OpPhi, OpSelect0, OpSelect1:
1345                 // Phis can use each-other as an arguments, instead of tracking visited values,
1346                 // just limit recursion depth.
1347                 if depth <= 0 {
1348                         return false
1349                 }
1350                 for i := range x.Args {
1351                         if !zeroUpper56Bits(x.Args[i], depth-1) {
1352                                 return false
1353                         }
1354                 }
1355                 return true
1356
1357         }
1358         return false
1359 }
1360
1361 func isInlinableMemclr(c *Config, sz int64) bool {
1362         if sz < 0 {
1363                 return false
1364         }
1365         // TODO: expand this check to allow other architectures
1366         // see CL 454255 and issue 56997
1367         switch c.arch {
1368         case "amd64", "arm64":
1369                 return true
1370         case "ppc64le", "ppc64":
1371                 return sz < 512
1372         }
1373         return false
1374 }
1375
1376 // isInlinableMemmove reports whether the given arch performs a Move of the given size
1377 // faster than memmove. It will only return true if replacing the memmove with a Move is
1378 // safe, either because Move will do all of its loads before any of its stores, or
1379 // because the arguments are known to be disjoint.
1380 // This is used as a check for replacing memmove with Move ops.
1381 func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
1382         // It is always safe to convert memmove into Move when its arguments are disjoint.
1383         // Move ops may or may not be faster for large sizes depending on how the platform
1384         // lowers them, so we only perform this optimization on platforms that we know to
1385         // have fast Move ops.
1386         switch c.arch {
1387         case "amd64":
1388                 return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz))
1389         case "386", "arm64":
1390                 return sz <= 8
1391         case "s390x", "ppc64", "ppc64le":
1392                 return sz <= 8 || disjoint(dst, sz, src, sz)
1393         case "arm", "loong64", "mips", "mips64", "mipsle", "mips64le":
1394                 return sz <= 4
1395         }
1396         return false
1397 }
1398 func IsInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
1399         return isInlinableMemmove(dst, src, sz, c)
1400 }
1401
1402 // logLargeCopy logs the occurrence of a large copy.
1403 // The best place to do this is in the rewrite rules where the size of the move is easy to find.
1404 // "Large" is arbitrarily chosen to be 128 bytes; this may change.
1405 func logLargeCopy(v *Value, s int64) bool {
1406         if s < 128 {
1407                 return true
1408         }
1409         if logopt.Enabled() {
1410                 logopt.LogOpt(v.Pos, "copy", "lower", v.Block.Func.Name, fmt.Sprintf("%d bytes", s))
1411         }
1412         return true
1413 }
1414 func LogLargeCopy(funcName string, pos src.XPos, s int64) {
1415         if s < 128 {
1416                 return
1417         }
1418         if logopt.Enabled() {
1419                 logopt.LogOpt(pos, "copy", "lower", funcName, fmt.Sprintf("%d bytes", s))
1420         }
1421 }
1422
1423 // hasSmallRotate reports whether the architecture has rotate instructions
1424 // for sizes < 32-bit.  This is used to decide whether to promote some rotations.
1425 func hasSmallRotate(c *Config) bool {
1426         switch c.arch {
1427         case "amd64", "386":
1428                 return true
1429         default:
1430                 return false
1431         }
1432 }
1433
1434 func supportsPPC64PCRel() bool {
1435         // PCRel is currently supported for >= power10, linux only
1436         // Internal and external linking supports this on ppc64le; internal linking on ppc64.
1437         return buildcfg.GOPPC64 >= 10 && buildcfg.GOOS == "linux"
1438 }
1439
1440 func newPPC64ShiftAuxInt(sh, mb, me, sz int64) int32 {
1441         if sh < 0 || sh >= sz {
1442                 panic("PPC64 shift arg sh out of range")
1443         }
1444         if mb < 0 || mb >= sz {
1445                 panic("PPC64 shift arg mb out of range")
1446         }
1447         if me < 0 || me >= sz {
1448                 panic("PPC64 shift arg me out of range")
1449         }
1450         return int32(sh<<16 | mb<<8 | me)
1451 }
1452
1453 func GetPPC64Shiftsh(auxint int64) int64 {
1454         return int64(int8(auxint >> 16))
1455 }
1456
1457 func GetPPC64Shiftmb(auxint int64) int64 {
1458         return int64(int8(auxint >> 8))
1459 }
1460
1461 func GetPPC64Shiftme(auxint int64) int64 {
1462         return int64(int8(auxint))
1463 }
1464
1465 // Test if this value can encoded as a mask for a rlwinm like
1466 // operation.  Masks can also extend from the msb and wrap to
1467 // the lsb too.  That is, the valid masks are 32 bit strings
1468 // of the form: 0..01..10..0 or 1..10..01..1 or 1...1
1469 func isPPC64WordRotateMask(v64 int64) bool {
1470         // Isolate rightmost 1 (if none 0) and add.
1471         v := uint32(v64)
1472         vp := (v & -v) + v
1473         // Likewise, for the wrapping case.
1474         vn := ^v
1475         vpn := (vn & -vn) + vn
1476         return (v&vp == 0 || vn&vpn == 0) && v != 0
1477 }
1478
1479 // Compress mask and shift into single value of the form
1480 // me | mb<<8 | rotate<<16 | nbits<<24 where me and mb can
1481 // be used to regenerate the input mask.
1482 func encodePPC64RotateMask(rotate, mask, nbits int64) int64 {
1483         var mb, me, mbn, men int
1484
1485         // Determine boundaries and then decode them
1486         if mask == 0 || ^mask == 0 || rotate >= nbits {
1487                 panic(fmt.Sprintf("invalid PPC64 rotate mask: %x %d %d", uint64(mask), rotate, nbits))
1488         } else if nbits == 32 {
1489                 mb = bits.LeadingZeros32(uint32(mask))
1490                 me = 32 - bits.TrailingZeros32(uint32(mask))
1491                 mbn = bits.LeadingZeros32(^uint32(mask))
1492                 men = 32 - bits.TrailingZeros32(^uint32(mask))
1493         } else {
1494                 mb = bits.LeadingZeros64(uint64(mask))
1495                 me = 64 - bits.TrailingZeros64(uint64(mask))
1496                 mbn = bits.LeadingZeros64(^uint64(mask))
1497                 men = 64 - bits.TrailingZeros64(^uint64(mask))
1498         }
1499         // Check for a wrapping mask (e.g bits at 0 and 63)
1500         if mb == 0 && me == int(nbits) {
1501                 // swap the inverted values
1502                 mb, me = men, mbn
1503         }
1504
1505         return int64(me) | int64(mb<<8) | int64(rotate<<16) | int64(nbits<<24)
1506 }
1507
1508 // Merge (RLDICL [encoded] (SRDconst [s] x)) into (RLDICL [new_encoded] x)
1509 // SRDconst on PPC64 is an extended mnemonic of RLDICL. If the input to an
1510 // RLDICL is an SRDconst, and the RLDICL does not rotate its value, the two
1511 // operations can be combined. This functions assumes the two opcodes can
1512 // be merged, and returns an encoded rotate+mask value of the combined RLDICL.
1513 func mergePPC64RLDICLandSRDconst(encoded, s int64) int64 {
1514         mb := s
1515         r := 64 - s
1516         // A larger mb is a smaller mask.
1517         if (encoded>>8)&0xFF < mb {
1518                 encoded = (encoded &^ 0xFF00) | mb<<8
1519         }
1520         // The rotate is expected to be 0.
1521         if (encoded & 0xFF0000) != 0 {
1522                 panic("non-zero rotate")
1523         }
1524         return encoded | r<<16
1525 }
1526
1527 // DecodePPC64RotateMask is the inverse operation of encodePPC64RotateMask.  The values returned as
1528 // mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask.
1529 func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) {
1530         auxint := uint64(sauxint)
1531         rotate = int64((auxint >> 16) & 0xFF)
1532         mb = int64((auxint >> 8) & 0xFF)
1533         me = int64((auxint >> 0) & 0xFF)
1534         nbits := int64((auxint >> 24) & 0xFF)
1535         mask = ((1 << uint(nbits-mb)) - 1) ^ ((1 << uint(nbits-me)) - 1)
1536         if mb > me {
1537                 mask = ^mask
1538         }
1539         if nbits == 32 {
1540                 mask = uint64(uint32(mask))
1541         }
1542
1543         // Fixup ME to match ISA definition.  The second argument to MASK(..,me)
1544         // is inclusive.
1545         me = (me - 1) & (nbits - 1)
1546         return
1547 }
1548
1549 // This verifies that the mask is a set of
1550 // consecutive bits including the least
1551 // significant bit.
1552 func isPPC64ValidShiftMask(v int64) bool {
1553         if (v != 0) && ((v+1)&v) == 0 {
1554                 return true
1555         }
1556         return false
1557 }
1558
1559 func getPPC64ShiftMaskLength(v int64) int64 {
1560         return int64(bits.Len64(uint64(v)))
1561 }
1562
1563 // Decompose a shift right into an equivalent rotate/mask,
1564 // and return mask & m.
1565 func mergePPC64RShiftMask(m, s, nbits int64) int64 {
1566         smask := uint64((1<<uint(nbits))-1) >> uint(s)
1567         return m & int64(smask)
1568 }
1569
1570 // Combine (ANDconst [m] (SRWconst [s])) into (RLWINM [y]) or return 0
1571 func mergePPC64AndSrwi(m, s int64) int64 {
1572         mask := mergePPC64RShiftMask(m, s, 32)
1573         if !isPPC64WordRotateMask(mask) {
1574                 return 0
1575         }
1576         return encodePPC64RotateMask((32-s)&31, mask, 32)
1577 }
1578
1579 // Test if a shift right feeding into a CLRLSLDI can be merged into RLWINM.
1580 // Return the encoded RLWINM constant, or 0 if they cannot be merged.
1581 func mergePPC64ClrlsldiSrw(sld, srw int64) int64 {
1582         mask_1 := uint64(0xFFFFFFFF >> uint(srw))
1583         // for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
1584         mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
1585
1586         // Rewrite mask to apply after the final left shift.
1587         mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
1588
1589         r_1 := 32 - srw
1590         r_2 := GetPPC64Shiftsh(sld)
1591         r_3 := (r_1 + r_2) & 31 // This can wrap.
1592
1593         if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
1594                 return 0
1595         }
1596         return encodePPC64RotateMask(int64(r_3), int64(mask_3), 32)
1597 }
1598
1599 // Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM.  Return
1600 // the encoded RLWINM constant, or 0 if they cannot be merged.
1601 func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 {
1602         r_1, _, _, mask_1 := DecodePPC64RotateMask(rlw)
1603         // for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
1604         mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
1605
1606         // combine the masks, and adjust for the final left shift.
1607         mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(int64(sld)))
1608         r_2 := GetPPC64Shiftsh(int64(sld))
1609         r_3 := (r_1 + r_2) & 31 // This can wrap.
1610
1611         // Verify the result is still a valid bitmask of <= 32 bits.
1612         if !isPPC64WordRotateMask(int64(mask_3)) || uint64(uint32(mask_3)) != mask_3 {
1613                 return 0
1614         }
1615         return encodePPC64RotateMask(r_3, int64(mask_3), 32)
1616 }
1617
1618 // Compute the encoded RLWINM constant from combining (SLDconst [sld] (SRWconst [srw] x)),
1619 // or return 0 if they cannot be combined.
1620 func mergePPC64SldiSrw(sld, srw int64) int64 {
1621         if sld > srw || srw >= 32 {
1622                 return 0
1623         }
1624         mask_r := uint32(0xFFFFFFFF) >> uint(srw)
1625         mask_l := uint32(0xFFFFFFFF) >> uint(sld)
1626         mask := (mask_r & mask_l) << uint(sld)
1627         return encodePPC64RotateMask((32-srw+sld)&31, int64(mask), 32)
1628 }
1629
1630 // Convenience function to rotate a 32 bit constant value by another constant.
1631 func rotateLeft32(v, rotate int64) int64 {
1632         return int64(bits.RotateLeft32(uint32(v), int(rotate)))
1633 }
1634
1635 func rotateRight64(v, rotate int64) int64 {
1636         return int64(bits.RotateLeft64(uint64(v), int(-rotate)))
1637 }
1638
1639 // encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format.
1640 func armBFAuxInt(lsb, width int64) arm64BitField {
1641         if lsb < 0 || lsb > 63 {
1642                 panic("ARM(64) bit field lsb constant out of range")
1643         }
1644         if width < 1 || lsb+width > 64 {
1645                 panic("ARM(64) bit field width constant out of range")
1646         }
1647         return arm64BitField(width | lsb<<8)
1648 }
1649
1650 // returns the lsb part of the auxInt field of arm64 bitfield ops.
1651 func (bfc arm64BitField) getARM64BFlsb() int64 {
1652         return int64(uint64(bfc) >> 8)
1653 }
1654
1655 // returns the width part of the auxInt field of arm64 bitfield ops.
1656 func (bfc arm64BitField) getARM64BFwidth() int64 {
1657         return int64(bfc) & 0xff
1658 }
1659
1660 // checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask.
1661 func isARM64BFMask(lsb, mask, rshift int64) bool {
1662         shiftedMask := int64(uint64(mask) >> uint64(rshift))
1663         return shiftedMask != 0 && isPowerOfTwo64(shiftedMask+1) && nto(shiftedMask)+lsb < 64
1664 }
1665
1666 // returns the bitfield width of mask >> rshift for arm64 bitfield ops.
1667 func arm64BFWidth(mask, rshift int64) int64 {
1668         shiftedMask := int64(uint64(mask) >> uint64(rshift))
1669         if shiftedMask == 0 {
1670                 panic("ARM64 BF mask is zero")
1671         }
1672         return nto(shiftedMask)
1673 }
1674
1675 // sizeof returns the size of t in bytes.
1676 // It will panic if t is not a *types.Type.
1677 func sizeof(t interface{}) int64 {
1678         return t.(*types.Type).Size()
1679 }
1680
1681 // registerizable reports whether t is a primitive type that fits in
1682 // a register. It assumes float64 values will always fit into registers
1683 // even if that isn't strictly true.
1684 func registerizable(b *Block, typ *types.Type) bool {
1685         if typ.IsPtrShaped() || typ.IsFloat() || typ.IsBoolean() {
1686                 return true
1687         }
1688         if typ.IsInteger() {
1689                 return typ.Size() <= b.Func.Config.RegSize
1690         }
1691         return false
1692 }
1693
1694 // needRaceCleanup reports whether this call to racefuncenter/exit isn't needed.
1695 func needRaceCleanup(sym *AuxCall, v *Value) bool {
1696         f := v.Block.Func
1697         if !f.Config.Race {
1698                 return false
1699         }
1700         if !isSameCall(sym, "runtime.racefuncenter") && !isSameCall(sym, "runtime.racefuncexit") {
1701                 return false
1702         }
1703         for _, b := range f.Blocks {
1704                 for _, v := range b.Values {
1705                         switch v.Op {
1706                         case OpStaticCall, OpStaticLECall:
1707                                 // Check for racefuncenter will encounter racefuncexit and vice versa.
1708                                 // Allow calls to panic*
1709                                 s := v.Aux.(*AuxCall).Fn.String()
1710                                 switch s {
1711                                 case "runtime.racefuncenter", "runtime.racefuncexit",
1712                                         "runtime.panicdivide", "runtime.panicwrap",
1713                                         "runtime.panicshift":
1714                                         continue
1715                                 }
1716                                 // If we encountered any call, we need to keep racefunc*,
1717                                 // for accurate stacktraces.
1718                                 return false
1719                         case OpPanicBounds, OpPanicExtend:
1720                                 // Note: these are panic generators that are ok (like the static calls above).
1721                         case OpClosureCall, OpInterCall, OpClosureLECall, OpInterLECall:
1722                                 // We must keep the race functions if there are any other call types.
1723                                 return false
1724                         }
1725                 }
1726         }
1727         if isSameCall(sym, "runtime.racefuncenter") {
1728                 // TODO REGISTER ABI this needs to be cleaned up.
1729                 // If we're removing racefuncenter, remove its argument as well.
1730                 if v.Args[0].Op != OpStore {
1731                         if v.Op == OpStaticLECall {
1732                                 // there is no store, yet.
1733                                 return true
1734                         }
1735                         return false
1736                 }
1737                 mem := v.Args[0].Args[2]
1738                 v.Args[0].reset(OpCopy)
1739                 v.Args[0].AddArg(mem)
1740         }
1741         return true
1742 }
1743
1744 // symIsRO reports whether sym is a read-only global.
1745 func symIsRO(sym interface{}) bool {
1746         lsym := sym.(*obj.LSym)
1747         return lsym.Type == objabi.SRODATA && len(lsym.R) == 0
1748 }
1749
1750 // symIsROZero reports whether sym is a read-only global whose data contains all zeros.
1751 func symIsROZero(sym Sym) bool {
1752         lsym := sym.(*obj.LSym)
1753         if lsym.Type != objabi.SRODATA || len(lsym.R) != 0 {
1754                 return false
1755         }
1756         for _, b := range lsym.P {
1757                 if b != 0 {
1758                         return false
1759                 }
1760         }
1761         return true
1762 }
1763
1764 // isFixed32 returns true if the int32 at offset off in symbol sym
1765 // is known and constant.
1766 func isFixed32(c *Config, sym Sym, off int64) bool {
1767         return isFixed(c, sym, off, 4)
1768 }
1769
1770 // isFixed returns true if the range [off,off+size] of the symbol sym
1771 // is known and constant.
1772 func isFixed(c *Config, sym Sym, off, size int64) bool {
1773         lsym := sym.(*obj.LSym)
1774         if lsym.Extra == nil {
1775                 return false
1776         }
1777         if _, ok := (*lsym.Extra).(*obj.TypeInfo); ok {
1778                 if off == 2*c.PtrSize && size == 4 {
1779                         return true // type hash field
1780                 }
1781         }
1782         return false
1783 }
1784 func fixed32(c *Config, sym Sym, off int64) int32 {
1785         lsym := sym.(*obj.LSym)
1786         if ti, ok := (*lsym.Extra).(*obj.TypeInfo); ok {
1787                 if off == 2*c.PtrSize {
1788                         return int32(types.TypeHash(ti.Type.(*types.Type)))
1789                 }
1790         }
1791         base.Fatalf("fixed32 data not known for %s:%d", sym, off)
1792         return 0
1793 }
1794
1795 // isFixedSym returns true if the contents of sym at the given offset
1796 // is known and is the constant address of another symbol.
1797 func isFixedSym(sym Sym, off int64) bool {
1798         lsym := sym.(*obj.LSym)
1799         switch {
1800         case lsym.Type == objabi.SRODATA:
1801                 // itabs, dictionaries
1802         default:
1803                 return false
1804         }
1805         for _, r := range lsym.R {
1806                 if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off && r.Add == 0 {
1807                         return true
1808                 }
1809         }
1810         return false
1811 }
1812 func fixedSym(f *Func, sym Sym, off int64) Sym {
1813         lsym := sym.(*obj.LSym)
1814         for _, r := range lsym.R {
1815                 if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off {
1816                         if strings.HasPrefix(r.Sym.Name, "type:") {
1817                                 // In case we're loading a type out of a dictionary, we need to record
1818                                 // that the containing function might put that type in an interface.
1819                                 // That information is currently recorded in relocations in the dictionary,
1820                                 // but if we perform this load at compile time then the dictionary
1821                                 // might be dead.
1822                                 reflectdata.MarkTypeSymUsedInInterface(r.Sym, f.fe.Func().Linksym())
1823                         } else if strings.HasPrefix(r.Sym.Name, "go:itab") {
1824                                 // Same, but if we're using an itab we need to record that the
1825                                 // itab._type might be put in an interface.
1826                                 reflectdata.MarkTypeSymUsedInInterface(r.Sym, f.fe.Func().Linksym())
1827                         }
1828                         return r.Sym
1829                 }
1830         }
1831         base.Fatalf("fixedSym data not known for %s:%d", sym, off)
1832         return nil
1833 }
1834
1835 // read8 reads one byte from the read-only global sym at offset off.
1836 func read8(sym interface{}, off int64) uint8 {
1837         lsym := sym.(*obj.LSym)
1838         if off >= int64(len(lsym.P)) || off < 0 {
1839                 // Invalid index into the global sym.
1840                 // This can happen in dead code, so we don't want to panic.
1841                 // Just return any value, it will eventually get ignored.
1842                 // See issue 29215.
1843                 return 0
1844         }
1845         return lsym.P[off]
1846 }
1847
1848 // read16 reads two bytes from the read-only global sym at offset off.
1849 func read16(sym interface{}, off int64, byteorder binary.ByteOrder) uint16 {
1850         lsym := sym.(*obj.LSym)
1851         // lsym.P is written lazily.
1852         // Bytes requested after the end of lsym.P are 0.
1853         var src []byte
1854         if 0 <= off && off < int64(len(lsym.P)) {
1855                 src = lsym.P[off:]
1856         }
1857         buf := make([]byte, 2)
1858         copy(buf, src)
1859         return byteorder.Uint16(buf)
1860 }
1861
1862 // read32 reads four bytes from the read-only global sym at offset off.
1863 func read32(sym interface{}, off int64, byteorder binary.ByteOrder) uint32 {
1864         lsym := sym.(*obj.LSym)
1865         var src []byte
1866         if 0 <= off && off < int64(len(lsym.P)) {
1867                 src = lsym.P[off:]
1868         }
1869         buf := make([]byte, 4)
1870         copy(buf, src)
1871         return byteorder.Uint32(buf)
1872 }
1873
1874 // read64 reads eight bytes from the read-only global sym at offset off.
1875 func read64(sym interface{}, off int64, byteorder binary.ByteOrder) uint64 {
1876         lsym := sym.(*obj.LSym)
1877         var src []byte
1878         if 0 <= off && off < int64(len(lsym.P)) {
1879                 src = lsym.P[off:]
1880         }
1881         buf := make([]byte, 8)
1882         copy(buf, src)
1883         return byteorder.Uint64(buf)
1884 }
1885
1886 // sequentialAddresses reports true if it can prove that x + n == y
1887 func sequentialAddresses(x, y *Value, n int64) bool {
1888         if x == y && n == 0 {
1889                 return true
1890         }
1891         if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil &&
1892                 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1893                         x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1894                 return true
1895         }
1896         if x.Op == Op386LEAL1 && y.Op == Op386LEAL1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
1897                 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1898                         x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1899                 return true
1900         }
1901         if x.Op == OpAMD64ADDQ && y.Op == OpAMD64LEAQ1 && y.AuxInt == n && y.Aux == nil &&
1902                 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1903                         x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1904                 return true
1905         }
1906         if x.Op == OpAMD64LEAQ1 && y.Op == OpAMD64LEAQ1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
1907                 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1908                         x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1909                 return true
1910         }
1911         return false
1912 }
1913
1914 // flagConstant represents the result of a compile-time comparison.
1915 // The sense of these flags does not necessarily represent the hardware's notion
1916 // of a flags register - these are just a compile-time construct.
1917 // We happen to match the semantics to those of arm/arm64.
1918 // Note that these semantics differ from x86: the carry flag has the opposite
1919 // sense on a subtraction!
1920 //
1921 //      On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C.
1922 //      On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C.
1923 //       (because it does x + ^y + C).
1924 //
1925 // See https://en.wikipedia.org/wiki/Carry_flag#Vs._borrow_flag
1926 type flagConstant uint8
1927
1928 // N reports whether the result of an operation is negative (high bit set).
1929 func (fc flagConstant) N() bool {
1930         return fc&1 != 0
1931 }
1932
1933 // Z reports whether the result of an operation is 0.
1934 func (fc flagConstant) Z() bool {
1935         return fc&2 != 0
1936 }
1937
1938 // C reports whether an unsigned add overflowed (carry), or an
1939 // unsigned subtract did not underflow (borrow).
1940 func (fc flagConstant) C() bool {
1941         return fc&4 != 0
1942 }
1943
1944 // V reports whether a signed operation overflowed or underflowed.
1945 func (fc flagConstant) V() bool {
1946         return fc&8 != 0
1947 }
1948
1949 func (fc flagConstant) eq() bool {
1950         return fc.Z()
1951 }
1952 func (fc flagConstant) ne() bool {
1953         return !fc.Z()
1954 }
1955 func (fc flagConstant) lt() bool {
1956         return fc.N() != fc.V()
1957 }
1958 func (fc flagConstant) le() bool {
1959         return fc.Z() || fc.lt()
1960 }
1961 func (fc flagConstant) gt() bool {
1962         return !fc.Z() && fc.ge()
1963 }
1964 func (fc flagConstant) ge() bool {
1965         return fc.N() == fc.V()
1966 }
1967 func (fc flagConstant) ult() bool {
1968         return !fc.C()
1969 }
1970 func (fc flagConstant) ule() bool {
1971         return fc.Z() || fc.ult()
1972 }
1973 func (fc flagConstant) ugt() bool {
1974         return !fc.Z() && fc.uge()
1975 }
1976 func (fc flagConstant) uge() bool {
1977         return fc.C()
1978 }
1979
1980 func (fc flagConstant) ltNoov() bool {
1981         return fc.lt() && !fc.V()
1982 }
1983 func (fc flagConstant) leNoov() bool {
1984         return fc.le() && !fc.V()
1985 }
1986 func (fc flagConstant) gtNoov() bool {
1987         return fc.gt() && !fc.V()
1988 }
1989 func (fc flagConstant) geNoov() bool {
1990         return fc.ge() && !fc.V()
1991 }
1992
1993 func (fc flagConstant) String() string {
1994         return fmt.Sprintf("N=%v,Z=%v,C=%v,V=%v", fc.N(), fc.Z(), fc.C(), fc.V())
1995 }
1996
1997 type flagConstantBuilder struct {
1998         N bool
1999         Z bool
2000         C bool
2001         V bool
2002 }
2003
2004 func (fcs flagConstantBuilder) encode() flagConstant {
2005         var fc flagConstant
2006         if fcs.N {
2007                 fc |= 1
2008         }
2009         if fcs.Z {
2010                 fc |= 2
2011         }
2012         if fcs.C {
2013                 fc |= 4
2014         }
2015         if fcs.V {
2016                 fc |= 8
2017         }
2018         return fc
2019 }
2020
2021 // Note: addFlags(x,y) != subFlags(x,-y) in some situations:
2022 //  - the results of the C flag are different
2023 //  - the results of the V flag when y==minint are different
2024
2025 // addFlags64 returns the flags that would be set from computing x+y.
2026 func addFlags64(x, y int64) flagConstant {
2027         var fcb flagConstantBuilder
2028         fcb.Z = x+y == 0
2029         fcb.N = x+y < 0
2030         fcb.C = uint64(x+y) < uint64(x)
2031         fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
2032         return fcb.encode()
2033 }
2034
2035 // subFlags64 returns the flags that would be set from computing x-y.
2036 func subFlags64(x, y int64) flagConstant {
2037         var fcb flagConstantBuilder
2038         fcb.Z = x-y == 0
2039         fcb.N = x-y < 0
2040         fcb.C = uint64(y) <= uint64(x) // This code follows the arm carry flag model.
2041         fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
2042         return fcb.encode()
2043 }
2044
2045 // addFlags32 returns the flags that would be set from computing x+y.
2046 func addFlags32(x, y int32) flagConstant {
2047         var fcb flagConstantBuilder
2048         fcb.Z = x+y == 0
2049         fcb.N = x+y < 0
2050         fcb.C = uint32(x+y) < uint32(x)
2051         fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
2052         return fcb.encode()
2053 }
2054
2055 // subFlags32 returns the flags that would be set from computing x-y.
2056 func subFlags32(x, y int32) flagConstant {
2057         var fcb flagConstantBuilder
2058         fcb.Z = x-y == 0
2059         fcb.N = x-y < 0
2060         fcb.C = uint32(y) <= uint32(x) // This code follows the arm carry flag model.
2061         fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
2062         return fcb.encode()
2063 }
2064
2065 // logicFlags64 returns flags set to the sign/zeroness of x.
2066 // C and V are set to false.
2067 func logicFlags64(x int64) flagConstant {
2068         var fcb flagConstantBuilder
2069         fcb.Z = x == 0
2070         fcb.N = x < 0
2071         return fcb.encode()
2072 }
2073
2074 // logicFlags32 returns flags set to the sign/zeroness of x.
2075 // C and V are set to false.
2076 func logicFlags32(x int32) flagConstant {
2077         var fcb flagConstantBuilder
2078         fcb.Z = x == 0
2079         fcb.N = x < 0
2080         return fcb.encode()
2081 }
2082
2083 func makeJumpTableSym(b *Block) *obj.LSym {
2084         s := base.Ctxt.Lookup(fmt.Sprintf("%s.jump%d", b.Func.fe.Func().LSym.Name, b.ID))
2085         s.Set(obj.AttrDuplicateOK, true)
2086         s.Set(obj.AttrLocal, true)
2087         return s
2088 }
2089
2090 // canRotate reports whether the architecture supports
2091 // rotates of integer registers with the given number of bits.
2092 func canRotate(c *Config, bits int64) bool {
2093         if bits > c.PtrSize*8 {
2094                 // Don't rewrite to rotates bigger than the machine word.
2095                 return false
2096         }
2097         switch c.arch {
2098         case "386", "amd64", "arm64":
2099                 return true
2100         case "arm", "s390x", "ppc64", "ppc64le", "wasm", "loong64":
2101                 return bits >= 32
2102         default:
2103                 return false
2104         }
2105 }
2106
2107 // isARM64bitcon reports whether a constant can be encoded into a logical instruction.
2108 func isARM64bitcon(x uint64) bool {
2109         if x == 1<<64-1 || x == 0 {
2110                 return false
2111         }
2112         // determine the period and sign-extend a unit to 64 bits
2113         switch {
2114         case x != x>>32|x<<32:
2115                 // period is 64
2116                 // nothing to do
2117         case x != x>>16|x<<48:
2118                 // period is 32
2119                 x = uint64(int64(int32(x)))
2120         case x != x>>8|x<<56:
2121                 // period is 16
2122                 x = uint64(int64(int16(x)))
2123         case x != x>>4|x<<60:
2124                 // period is 8
2125                 x = uint64(int64(int8(x)))
2126         default:
2127                 // period is 4 or 2, always true
2128                 // 0001, 0010, 0100, 1000 -- 0001 rotate
2129                 // 0011, 0110, 1100, 1001 -- 0011 rotate
2130                 // 0111, 1011, 1101, 1110 -- 0111 rotate
2131                 // 0101, 1010             -- 01   rotate, repeat
2132                 return true
2133         }
2134         return sequenceOfOnes(x) || sequenceOfOnes(^x)
2135 }
2136
2137 // sequenceOfOnes tests whether a constant is a sequence of ones in binary, with leading and trailing zeros.
2138 func sequenceOfOnes(x uint64) bool {
2139         y := x & -x // lowest set bit of x. x is good iff x+y is a power of 2
2140         y += x
2141         return (y-1)&y == 0
2142 }
2143
2144 // isARM64addcon reports whether x can be encoded as the immediate value in an ADD or SUB instruction.
2145 func isARM64addcon(v int64) bool {
2146         /* uimm12 or uimm24? */
2147         if v < 0 {
2148                 return false
2149         }
2150         if (v & 0xFFF) == 0 {
2151                 v >>= 12
2152         }
2153         return v <= 0xFFF
2154 }
2155
2156 // setPos sets the position of v to pos, then returns true.
2157 // Useful for setting the result of a rewrite's position to
2158 // something other than the default.
2159 func setPos(v *Value, pos src.XPos) bool {
2160         v.Pos = pos
2161         return true
2162 }