src/cmd/compile/internal/ssa/rewrite.go

   1 // Copyright 2015 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package ssa
   6
   7 import (
   8         "cmd/compile/internal/base"
   9         "cmd/compile/internal/logopt"
  10         "cmd/compile/internal/types"
  11         "cmd/internal/obj"
  12         "cmd/internal/obj/s390x"
  13         "cmd/internal/objabi"
  14         "cmd/internal/src"
  15         "encoding/binary"
  16         "fmt"
  17         "io"
  18         "math"
  19         "math/bits"
  20         "os"
  21         "path/filepath"
  22 )
  23
  24 type deadValueChoice bool
  25
  26 const (
  27         leaveDeadValues  deadValueChoice = false
  28         removeDeadValues                 = true
  29 )
  30
  31 // deadcode indicates whether rewrite should try to remove any values that become dead.
  32 func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValueChoice) {
  33         // repeat rewrites until we find no more rewrites
  34         pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block
  35         pendingLines.clear()
  36         debug := f.pass.debug
  37         if debug > 1 {
  38                 fmt.Printf("%s: rewriting for %s\n", f.pass.name, f.Name)
  39         }
  40         var iters int
  41         var states map[string]bool
  42         for {
  43                 change := false
  44                 deadChange := false
  45                 for _, b := range f.Blocks {
  46                         var b0 *Block
  47                         if debug > 1 {
  48                                 b0 = new(Block)
  49                                 *b0 = *b
  50                                 b0.Succs = append([]Edge{}, b.Succs...) // make a new copy, not aliasing
  51                         }
  52                         for i, c := range b.ControlValues() {
  53                                 for c.Op == OpCopy {
  54                                         c = c.Args[0]
  55                                         b.ReplaceControl(i, c)
  56                                 }
  57                         }
  58                         if rb(b) {
  59                                 change = true
  60                                 if debug > 1 {
  61                                         fmt.Printf("rewriting %s  ->  %s\n", b0.LongString(), b.LongString())
  62                                 }
  63                         }
  64                         for j, v := range b.Values {
  65                                 var v0 *Value
  66                                 if debug > 1 {
  67                                         v0 = new(Value)
  68                                         *v0 = *v
  69                                         v0.Args = append([]*Value{}, v.Args...) // make a new copy, not aliasing
  70                                 }
  71                                 if v.Uses == 0 && v.removeable() {
  72                                         if v.Op != OpInvalid && deadcode == removeDeadValues {
  73                                                 // Reset any values that are now unused, so that we decrement
  74                                                 // the use count of all of its arguments.
  75                                                 // Not quite a deadcode pass, because it does not handle cycles.
  76                                                 // But it should help Uses==1 rules to fire.
  77                                                 v.reset(OpInvalid)
  78                                                 deadChange = true
  79                                         }
  80                                         // No point rewriting values which aren't used.
  81                                         continue
  82                                 }
  83
  84                                 vchange := phielimValue(v)
  85                                 if vchange && debug > 1 {
  86                                         fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
  87                                 }
  88
  89                                 // Eliminate copy inputs.
  90                                 // If any copy input becomes unused, mark it
  91                                 // as invalid and discard its argument. Repeat
  92                                 // recursively on the discarded argument.
  93                                 // This phase helps remove phantom "dead copy" uses
  94                                 // of a value so that a x.Uses==1 rule condition
  95                                 // fires reliably.
  96                                 for i, a := range v.Args {
  97                                         if a.Op != OpCopy {
  98                                                 continue
  99                                         }
 100                                         aa := copySource(a)
 101                                         v.SetArg(i, aa)
 102                                         // If a, a copy, has a line boundary indicator, attempt to find a new value
 103                                         // to hold it.  The first candidate is the value that will replace a (aa),
 104                                         // if it shares the same block and line and is eligible.
 105                                         // The second option is v, which has a as an input.  Because aa is earlier in
 106                                         // the data flow, it is the better choice.
 107                                         if a.Pos.IsStmt() == src.PosIsStmt {
 108                                                 if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt {
 109                                                         aa.Pos = aa.Pos.WithIsStmt()
 110                                                 } else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt {
 111                                                         v.Pos = v.Pos.WithIsStmt()
 112                                                 } else {
 113                                                         // Record the lost line and look for a new home after all rewrites are complete.
 114                                                         // TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same
 115                                                         // line to appear in more than one block, but only one block is stored, so if both end
 116                                                         // up here, then one will be lost.
 117                                                         pendingLines.set(a.Pos, int32(a.Block.ID))
 118                                                 }
 119                                                 a.Pos = a.Pos.WithNotStmt()
 120                                         }
 121                                         vchange = true
 122                                         for a.Uses == 0 {
 123                                                 b := a.Args[0]
 124                                                 a.reset(OpInvalid)
 125                                                 a = b
 126                                         }
 127                                 }
 128                                 if vchange && debug > 1 {
 129                                         fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
 130                                 }
 131
 132                                 // apply rewrite function
 133                                 if rv(v) {
 134                                         vchange = true
 135                                         // If value changed to a poor choice for a statement boundary, move the boundary
 136                                         if v.Pos.IsStmt() == src.PosIsStmt {
 137                                                 if k := nextGoodStatementIndex(v, j, b); k != j {
 138                                                         v.Pos = v.Pos.WithNotStmt()
 139                                                         b.Values[k].Pos = b.Values[k].Pos.WithIsStmt()
 140                                                 }
 141                                         }
 142                                 }
 143
 144                                 change = change || vchange
 145                                 if vchange && debug > 1 {
 146                                         fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
 147                                 }
 148                         }
 149                 }
 150                 if !change && !deadChange {
 151                         break
 152                 }
 153                 iters++
 154                 if (iters > 1000 || debug >= 2) && change {
 155                         // We've done a suspiciously large number of rewrites (or we're in debug mode).
 156                         // As of Sep 2021, 90% of rewrites complete in 4 iterations or fewer
 157                         // and the maximum value encountered during make.bash is 12.
 158                         // Start checking for cycles. (This is too expensive to do routinely.)
 159                         // Note: we avoid this path for deadChange-only iterations, to fix #51639.
 160                         if states == nil {
 161                                 states = make(map[string]bool)
 162                         }
 163                         h := f.rewriteHash()
 164                         if _, ok := states[h]; ok {
 165                                 // We've found a cycle.
 166                                 // To diagnose it, set debug to 2 and start again,
 167                                 // so that we'll print all rules applied until we complete another cycle.
 168                                 // If debug is already >= 2, we've already done that, so it's time to crash.
 169                                 if debug < 2 {
 170                                         debug = 2
 171                                         states = make(map[string]bool)
 172                                 } else {
 173                                         f.Fatalf("rewrite cycle detected")
 174                                 }
 175                         }
 176                         states[h] = true
 177                 }
 178         }
 179         // remove clobbered values
 180         for _, b := range f.Blocks {
 181                 j := 0
 182                 for i, v := range b.Values {
 183                         vl := v.Pos
 184                         if v.Op == OpInvalid {
 185                                 if v.Pos.IsStmt() == src.PosIsStmt {
 186                                         pendingLines.set(vl, int32(b.ID))
 187                                 }
 188                                 f.freeValue(v)
 189                                 continue
 190                         }
 191                         if v.Pos.IsStmt() != src.PosNotStmt && !notStmtBoundary(v.Op) && pendingLines.get(vl) == int32(b.ID) {
 192                                 pendingLines.remove(vl)
 193                                 v.Pos = v.Pos.WithIsStmt()
 194                         }
 195                         if i != j {
 196                                 b.Values[j] = v
 197                         }
 198                         j++
 199                 }
 200                 if pendingLines.get(b.Pos) == int32(b.ID) {
 201                         b.Pos = b.Pos.WithIsStmt()
 202                         pendingLines.remove(b.Pos)
 203                 }
 204                 b.truncateValues(j)
 205         }
 206 }
 207
 208 // Common functions called from rewriting rules
 209
 210 func is64BitFloat(t *types.Type) bool {
 211         return t.Size() == 8 && t.IsFloat()
 212 }
 213
 214 func is32BitFloat(t *types.Type) bool {
 215         return t.Size() == 4 && t.IsFloat()
 216 }
 217
 218 func is64BitInt(t *types.Type) bool {
 219         return t.Size() == 8 && t.IsInteger()
 220 }
 221
 222 func is32BitInt(t *types.Type) bool {
 223         return t.Size() == 4 && t.IsInteger()
 224 }
 225
 226 func is16BitInt(t *types.Type) bool {
 227         return t.Size() == 2 && t.IsInteger()
 228 }
 229
 230 func is8BitInt(t *types.Type) bool {
 231         return t.Size() == 1 && t.IsInteger()
 232 }
 233
 234 func isPtr(t *types.Type) bool {
 235         return t.IsPtrShaped()
 236 }
 237
 238 func isSigned(t *types.Type) bool {
 239         return t.IsSigned()
 240 }
 241
 242 // mergeSym merges two symbolic offsets. There is no real merging of
 243 // offsets, we just pick the non-nil one.
 244 func mergeSym(x, y Sym) Sym {
 245         if x == nil {
 246                 return y
 247         }
 248         if y == nil {
 249                 return x
 250         }
 251         panic(fmt.Sprintf("mergeSym with two non-nil syms %v %v", x, y))
 252 }
 253
 254 func canMergeSym(x, y Sym) bool {
 255         return x == nil || y == nil
 256 }
 257
 258 // canMergeLoadClobber reports whether the load can be merged into target without
 259 // invalidating the schedule.
 260 // It also checks that the other non-load argument x is something we
 261 // are ok with clobbering.
 262 func canMergeLoadClobber(target, load, x *Value) bool {
 263         // The register containing x is going to get clobbered.
 264         // Don't merge if we still need the value of x.
 265         // We don't have liveness information here, but we can
 266         // approximate x dying with:
 267         //  1) target is x's only use.
 268         //  2) target is not in a deeper loop than x.
 269         if x.Uses != 1 {
 270                 return false
 271         }
 272         loopnest := x.Block.Func.loopnest()
 273         loopnest.calculateDepths()
 274         if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
 275                 return false
 276         }
 277         return canMergeLoad(target, load)
 278 }
 279
 280 // canMergeLoad reports whether the load can be merged into target without
 281 // invalidating the schedule.
 282 func canMergeLoad(target, load *Value) bool {
 283         if target.Block.ID != load.Block.ID {
 284                 // If the load is in a different block do not merge it.
 285                 return false
 286         }
 287
 288         // We can't merge the load into the target if the load
 289         // has more than one use.
 290         if load.Uses != 1 {
 291                 return false
 292         }
 293
 294         mem := load.MemoryArg()
 295
 296         // We need the load's memory arg to still be alive at target. That
 297         // can't be the case if one of target's args depends on a memory
 298         // state that is a successor of load's memory arg.
 299         //
 300         // For example, it would be invalid to merge load into target in
 301         // the following situation because newmem has killed oldmem
 302         // before target is reached:
 303         //     load = read ... oldmem
 304         //   newmem = write ... oldmem
 305         //     arg0 = read ... newmem
 306         //   target = add arg0 load
 307         //
 308         // If the argument comes from a different block then we can exclude
 309         // it immediately because it must dominate load (which is in the
 310         // same block as target).
 311         var args []*Value
 312         for _, a := range target.Args {
 313                 if a != load && a.Block.ID == target.Block.ID {
 314                         args = append(args, a)
 315                 }
 316         }
 317
 318         // memPreds contains memory states known to be predecessors of load's
 319         // memory state. It is lazily initialized.
 320         var memPreds map[*Value]bool
 321         for i := 0; len(args) > 0; i++ {
 322                 const limit = 100
 323                 if i >= limit {
 324                         // Give up if we have done a lot of iterations.
 325                         return false
 326                 }
 327                 v := args[len(args)-1]
 328                 args = args[:len(args)-1]
 329                 if target.Block.ID != v.Block.ID {
 330                         // Since target and load are in the same block
 331                         // we can stop searching when we leave the block.
 332                         continue
 333                 }
 334                 if v.Op == OpPhi {
 335                         // A Phi implies we have reached the top of the block.
 336                         // The memory phi, if it exists, is always
 337                         // the first logical store in the block.
 338                         continue
 339                 }
 340                 if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
 341                         // We could handle this situation however it is likely
 342                         // to be very rare.
 343                         return false
 344                 }
 345                 if v.Op.SymEffect()&SymAddr != 0 {
 346                         // This case prevents an operation that calculates the
 347                         // address of a local variable from being forced to schedule
 348                         // before its corresponding VarDef.
 349                         // See issue 28445.
 350                         //   v1 = LOAD ...
 351                         //   v2 = VARDEF
 352                         //   v3 = LEAQ
 353                         //   v4 = CMPQ v1 v3
 354                         // We don't want to combine the CMPQ with the load, because
 355                         // that would force the CMPQ to schedule before the VARDEF, which
 356                         // in turn requires the LEAQ to schedule before the VARDEF.
 357                         return false
 358                 }
 359                 if v.Type.IsMemory() {
 360                         if memPreds == nil {
 361                                 // Initialise a map containing memory states
 362                                 // known to be predecessors of load's memory
 363                                 // state.
 364                                 memPreds = make(map[*Value]bool)
 365                                 m := mem
 366                                 const limit = 50
 367                                 for i := 0; i < limit; i++ {
 368                                         if m.Op == OpPhi {
 369                                                 // The memory phi, if it exists, is always
 370                                                 // the first logical store in the block.
 371                                                 break
 372                                         }
 373                                         if m.Block.ID != target.Block.ID {
 374                                                 break
 375                                         }
 376                                         if !m.Type.IsMemory() {
 377                                                 break
 378                                         }
 379                                         memPreds[m] = true
 380                                         if len(m.Args) == 0 {
 381                                                 break
 382                                         }
 383                                         m = m.MemoryArg()
 384                                 }
 385                         }
 386
 387                         // We can merge if v is a predecessor of mem.
 388                         //
 389                         // For example, we can merge load into target in the
 390                         // following scenario:
 391                         //      x = read ... v
 392                         //    mem = write ... v
 393                         //   load = read ... mem
 394                         // target = add x load
 395                         if memPreds[v] {
 396                                 continue
 397                         }
 398                         return false
 399                 }
 400                 if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
 401                         // If v takes mem as an input then we know mem
 402                         // is valid at this point.
 403                         continue
 404                 }
 405                 for _, a := range v.Args {
 406                         if target.Block.ID == a.Block.ID {
 407                                 args = append(args, a)
 408                         }
 409                 }
 410         }
 411
 412         return true
 413 }
 414
 415 // isSameCall reports whether sym is the same as the given named symbol.
 416 func isSameCall(sym interface{}, name string) bool {
 417         fn := sym.(*AuxCall).Fn
 418         return fn != nil && fn.String() == name
 419 }
 420
 421 // canLoadUnaligned reports if the architecture supports unaligned load operations.
 422 func canLoadUnaligned(c *Config) bool {
 423         return c.ctxt.Arch.Alignment == 1
 424 }
 425
 426 // nlzX returns the number of leading zeros.
 427 func nlz64(x int64) int { return bits.LeadingZeros64(uint64(x)) }
 428 func nlz32(x int32) int { return bits.LeadingZeros32(uint32(x)) }
 429 func nlz16(x int16) int { return bits.LeadingZeros16(uint16(x)) }
 430 func nlz8(x int8) int   { return bits.LeadingZeros8(uint8(x)) }
 431
 432 // ntzX returns the number of trailing zeros.
 433 func ntz64(x int64) int { return bits.TrailingZeros64(uint64(x)) }
 434 func ntz32(x int32) int { return bits.TrailingZeros32(uint32(x)) }
 435 func ntz16(x int16) int { return bits.TrailingZeros16(uint16(x)) }
 436 func ntz8(x int8) int   { return bits.TrailingZeros8(uint8(x)) }
 437
 438 func oneBit(x int64) bool   { return x&(x-1) == 0 && x != 0 }
 439 func oneBit8(x int8) bool   { return x&(x-1) == 0 && x != 0 }
 440 func oneBit16(x int16) bool { return x&(x-1) == 0 && x != 0 }
 441 func oneBit32(x int32) bool { return x&(x-1) == 0 && x != 0 }
 442 func oneBit64(x int64) bool { return x&(x-1) == 0 && x != 0 }
 443
 444 // nto returns the number of trailing ones.
 445 func nto(x int64) int64 {
 446         return int64(ntz64(^x))
 447 }
 448
 449 // logX returns logarithm of n base 2.
 450 // n must be a positive power of 2 (isPowerOfTwoX returns true).
 451 func log8(n int8) int64 {
 452         return int64(bits.Len8(uint8(n))) - 1
 453 }
 454 func log16(n int16) int64 {
 455         return int64(bits.Len16(uint16(n))) - 1
 456 }
 457 func log32(n int32) int64 {
 458         return int64(bits.Len32(uint32(n))) - 1
 459 }
 460 func log64(n int64) int64 {
 461         return int64(bits.Len64(uint64(n))) - 1
 462 }
 463
 464 // log2uint32 returns logarithm in base 2 of uint32(n), with log2(0) = -1.
 465 // Rounds down.
 466 func log2uint32(n int64) int64 {
 467         return int64(bits.Len32(uint32(n))) - 1
 468 }
 469
 470 // isPowerOfTwoX functions report whether n is a power of 2.
 471 func isPowerOfTwo8(n int8) bool {
 472         return n > 0 && n&(n-1) == 0
 473 }
 474 func isPowerOfTwo16(n int16) bool {
 475         return n > 0 && n&(n-1) == 0
 476 }
 477 func isPowerOfTwo32(n int32) bool {
 478         return n > 0 && n&(n-1) == 0
 479 }
 480 func isPowerOfTwo64(n int64) bool {
 481         return n > 0 && n&(n-1) == 0
 482 }
 483
 484 // isUint64PowerOfTwo reports whether uint64(n) is a power of 2.
 485 func isUint64PowerOfTwo(in int64) bool {
 486         n := uint64(in)
 487         return n > 0 && n&(n-1) == 0
 488 }
 489
 490 // isUint32PowerOfTwo reports whether uint32(n) is a power of 2.
 491 func isUint32PowerOfTwo(in int64) bool {
 492         n := uint64(uint32(in))
 493         return n > 0 && n&(n-1) == 0
 494 }
 495
 496 // is32Bit reports whether n can be represented as a signed 32 bit integer.
 497 func is32Bit(n int64) bool {
 498         return n == int64(int32(n))
 499 }
 500
 501 // is16Bit reports whether n can be represented as a signed 16 bit integer.
 502 func is16Bit(n int64) bool {
 503         return n == int64(int16(n))
 504 }
 505
 506 // is8Bit reports whether n can be represented as a signed 8 bit integer.
 507 func is8Bit(n int64) bool {
 508         return n == int64(int8(n))
 509 }
 510
 511 // isU8Bit reports whether n can be represented as an unsigned 8 bit integer.
 512 func isU8Bit(n int64) bool {
 513         return n == int64(uint8(n))
 514 }
 515
 516 // isU12Bit reports whether n can be represented as an unsigned 12 bit integer.
 517 func isU12Bit(n int64) bool {
 518         return 0 <= n && n < (1<<12)
 519 }
 520
 521 // isU16Bit reports whether n can be represented as an unsigned 16 bit integer.
 522 func isU16Bit(n int64) bool {
 523         return n == int64(uint16(n))
 524 }
 525
 526 // isU32Bit reports whether n can be represented as an unsigned 32 bit integer.
 527 func isU32Bit(n int64) bool {
 528         return n == int64(uint32(n))
 529 }
 530
 531 // is20Bit reports whether n can be represented as a signed 20 bit integer.
 532 func is20Bit(n int64) bool {
 533         return -(1<<19) <= n && n < (1<<19)
 534 }
 535
 536 // b2i translates a boolean value to 0 or 1 for assigning to auxInt.
 537 func b2i(b bool) int64 {
 538         if b {
 539                 return 1
 540         }
 541         return 0
 542 }
 543
 544 // b2i32 translates a boolean value to 0 or 1.
 545 func b2i32(b bool) int32 {
 546         if b {
 547                 return 1
 548         }
 549         return 0
 550 }
 551
 552 // shiftIsBounded reports whether (left/right) shift Value v is known to be bounded.
 553 // A shift is bounded if it is shifting by less than the width of the shifted value.
 554 func shiftIsBounded(v *Value) bool {
 555         return v.AuxInt != 0
 556 }
 557
 558 // canonLessThan returns whether x is "ordered" less than y, for purposes of normalizing
 559 // generated code as much as possible.
 560 func canonLessThan(x, y *Value) bool {
 561         if x.Op != y.Op {
 562                 return x.Op < y.Op
 563         }
 564         if !x.Pos.SameFileAndLine(y.Pos) {
 565                 return x.Pos.Before(y.Pos)
 566         }
 567         return x.ID < y.ID
 568 }
 569
 570 // truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern
 571 // of the mantissa. It will panic if the truncation results in lost information.
 572 func truncate64Fto32F(f float64) float32 {
 573         if !isExactFloat32(f) {
 574                 panic("truncate64Fto32F: truncation is not exact")
 575         }
 576         if !math.IsNaN(f) {
 577                 return float32(f)
 578         }
 579         // NaN bit patterns aren't necessarily preserved across conversion
 580         // instructions so we need to do the conversion manually.
 581         b := math.Float64bits(f)
 582         m := b & ((1 << 52) - 1) // mantissa (a.k.a. significand)
 583         //          | sign                  | exponent   | mantissa       |
 584         r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23)))
 585         return math.Float32frombits(r)
 586 }
 587
 588 // extend32Fto64F converts a float32 value to a float64 value preserving the bit
 589 // pattern of the mantissa.
 590 func extend32Fto64F(f float32) float64 {
 591         if !math.IsNaN(float64(f)) {
 592                 return float64(f)
 593         }
 594         // NaN bit patterns aren't necessarily preserved across conversion
 595         // instructions so we need to do the conversion manually.
 596         b := uint64(math.Float32bits(f))
 597         //   | sign                  | exponent      | mantissa                    |
 598         r := ((b << 32) & (1 << 63)) | (0x7ff << 52) | ((b & 0x7fffff) << (52 - 23))
 599         return math.Float64frombits(r)
 600 }
 601
 602 // DivisionNeedsFixUp reports whether the division needs fix-up code.
 603 func DivisionNeedsFixUp(v *Value) bool {
 604         return v.AuxInt == 0
 605 }
 606
 607 // auxFrom64F encodes a float64 value so it can be stored in an AuxInt.
 608 func auxFrom64F(f float64) int64 {
 609         if f != f {
 610                 panic("can't encode a NaN in AuxInt field")
 611         }
 612         return int64(math.Float64bits(f))
 613 }
 614
 615 // auxFrom32F encodes a float32 value so it can be stored in an AuxInt.
 616 func auxFrom32F(f float32) int64 {
 617         if f != f {
 618                 panic("can't encode a NaN in AuxInt field")
 619         }
 620         return int64(math.Float64bits(extend32Fto64F(f)))
 621 }
 622
 623 // auxTo32F decodes a float32 from the AuxInt value provided.
 624 func auxTo32F(i int64) float32 {
 625         return truncate64Fto32F(math.Float64frombits(uint64(i)))
 626 }
 627
 628 // auxTo64F decodes a float64 from the AuxInt value provided.
 629 func auxTo64F(i int64) float64 {
 630         return math.Float64frombits(uint64(i))
 631 }
 632
 633 func auxIntToBool(i int64) bool {
 634         if i == 0 {
 635                 return false
 636         }
 637         return true
 638 }
 639 func auxIntToInt8(i int64) int8 {
 640         return int8(i)
 641 }
 642 func auxIntToInt16(i int64) int16 {
 643         return int16(i)
 644 }
 645 func auxIntToInt32(i int64) int32 {
 646         return int32(i)
 647 }
 648 func auxIntToInt64(i int64) int64 {
 649         return i
 650 }
 651 func auxIntToUint8(i int64) uint8 {
 652         return uint8(i)
 653 }
 654 func auxIntToFloat32(i int64) float32 {
 655         return float32(math.Float64frombits(uint64(i)))
 656 }
 657 func auxIntToFloat64(i int64) float64 {
 658         return math.Float64frombits(uint64(i))
 659 }
 660 func auxIntToValAndOff(i int64) ValAndOff {
 661         return ValAndOff(i)
 662 }
 663 func auxIntToArm64BitField(i int64) arm64BitField {
 664         return arm64BitField(i)
 665 }
 666 func auxIntToInt128(x int64) int128 {
 667         if x != 0 {
 668                 panic("nonzero int128 not allowed")
 669         }
 670         return 0
 671 }
 672 func auxIntToFlagConstant(x int64) flagConstant {
 673         return flagConstant(x)
 674 }
 675
 676 func auxIntToOp(cc int64) Op {
 677         return Op(cc)
 678 }
 679
 680 func boolToAuxInt(b bool) int64 {
 681         if b {
 682                 return 1
 683         }
 684         return 0
 685 }
 686 func int8ToAuxInt(i int8) int64 {
 687         return int64(i)
 688 }
 689 func int16ToAuxInt(i int16) int64 {
 690         return int64(i)
 691 }
 692 func int32ToAuxInt(i int32) int64 {
 693         return int64(i)
 694 }
 695 func int64ToAuxInt(i int64) int64 {
 696         return int64(i)
 697 }
 698 func uint8ToAuxInt(i uint8) int64 {
 699         return int64(int8(i))
 700 }
 701 func float32ToAuxInt(f float32) int64 {
 702         return int64(math.Float64bits(float64(f)))
 703 }
 704 func float64ToAuxInt(f float64) int64 {
 705         return int64(math.Float64bits(f))
 706 }
 707 func valAndOffToAuxInt(v ValAndOff) int64 {
 708         return int64(v)
 709 }
 710 func arm64BitFieldToAuxInt(v arm64BitField) int64 {
 711         return int64(v)
 712 }
 713 func int128ToAuxInt(x int128) int64 {
 714         if x != 0 {
 715                 panic("nonzero int128 not allowed")
 716         }
 717         return 0
 718 }
 719 func flagConstantToAuxInt(x flagConstant) int64 {
 720         return int64(x)
 721 }
 722
 723 func opToAuxInt(o Op) int64 {
 724         return int64(o)
 725 }
 726
 727 // Aux is an interface to hold miscellaneous data in Blocks and Values.
 728 type Aux interface {
 729         CanBeAnSSAAux()
 730 }
 731
 732 // stringAux wraps string values for use in Aux.
 733 type stringAux string
 734
 735 func (stringAux) CanBeAnSSAAux() {}
 736
 737 func auxToString(i Aux) string {
 738         return string(i.(stringAux))
 739 }
 740 func auxToSym(i Aux) Sym {
 741         // TODO: kind of a hack - allows nil interface through
 742         s, _ := i.(Sym)
 743         return s
 744 }
 745 func auxToType(i Aux) *types.Type {
 746         return i.(*types.Type)
 747 }
 748 func auxToCall(i Aux) *AuxCall {
 749         return i.(*AuxCall)
 750 }
 751 func auxToS390xCCMask(i Aux) s390x.CCMask {
 752         return i.(s390x.CCMask)
 753 }
 754 func auxToS390xRotateParams(i Aux) s390x.RotateParams {
 755         return i.(s390x.RotateParams)
 756 }
 757
 758 func StringToAux(s string) Aux {
 759         return stringAux(s)
 760 }
 761 func symToAux(s Sym) Aux {
 762         return s
 763 }
 764 func callToAux(s *AuxCall) Aux {
 765         return s
 766 }
 767 func typeToAux(t *types.Type) Aux {
 768         return t
 769 }
 770 func s390xCCMaskToAux(c s390x.CCMask) Aux {
 771         return c
 772 }
 773 func s390xRotateParamsToAux(r s390x.RotateParams) Aux {
 774         return r
 775 }
 776
 777 // uaddOvf reports whether unsigned a+b would overflow.
 778 func uaddOvf(a, b int64) bool {
 779         return uint64(a)+uint64(b) < uint64(a)
 780 }
 781
 782 // loadLSymOffset simulates reading a word at an offset into a
 783 // read-only symbol's runtime memory. If it would read a pointer to
 784 // another symbol, that symbol is returned. Otherwise, it returns nil.
 785 func loadLSymOffset(lsym *obj.LSym, offset int64) *obj.LSym {
 786         if lsym.Type != objabi.SRODATA {
 787                 return nil
 788         }
 789
 790         for _, r := range lsym.R {
 791                 if int64(r.Off) == offset && r.Type&^objabi.R_WEAK == objabi.R_ADDR && r.Add == 0 {
 792                         return r.Sym
 793                 }
 794         }
 795
 796         return nil
 797 }
 798
 799 // de-virtualize an InterLECall
 800 // 'sym' is the symbol for the itab.
 801 func devirtLESym(v *Value, aux Aux, sym Sym, offset int64) *obj.LSym {
 802         n, ok := sym.(*obj.LSym)
 803         if !ok {
 804                 return nil
 805         }
 806
 807         lsym := loadLSymOffset(n, offset)
 808         if f := v.Block.Func; f.pass.debug > 0 {
 809                 if lsym != nil {
 810                         f.Warnl(v.Pos, "de-virtualizing call")
 811                 } else {
 812                         f.Warnl(v.Pos, "couldn't de-virtualize call")
 813                 }
 814         }
 815         return lsym
 816 }
 817
 818 func devirtLECall(v *Value, sym *obj.LSym) *Value {
 819         v.Op = OpStaticLECall
 820         auxcall := v.Aux.(*AuxCall)
 821         auxcall.Fn = sym
 822         // Remove first arg
 823         v.Args[0].Uses--
 824         copy(v.Args[0:], v.Args[1:])
 825         v.Args[len(v.Args)-1] = nil // aid GC
 826         v.Args = v.Args[:len(v.Args)-1]
 827         return v
 828 }
 829
 830 // isSamePtr reports whether p1 and p2 point to the same address.
 831 func isSamePtr(p1, p2 *Value) bool {
 832         if p1 == p2 {
 833                 return true
 834         }
 835         if p1.Op != p2.Op {
 836                 return false
 837         }
 838         switch p1.Op {
 839         case OpOffPtr:
 840                 return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
 841         case OpAddr, OpLocalAddr:
 842                 return p1.Aux == p2.Aux
 843         case OpAddPtr:
 844                 return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
 845         }
 846         return false
 847 }
 848
 849 func isStackPtr(v *Value) bool {
 850         for v.Op == OpOffPtr || v.Op == OpAddPtr {
 851                 v = v.Args[0]
 852         }
 853         return v.Op == OpSP || v.Op == OpLocalAddr
 854 }
 855
 856 // disjoint reports whether the memory region specified by [p1:p1+n1)
 857 // does not overlap with [p2:p2+n2).
 858 // A return value of false does not imply the regions overlap.
 859 func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool {
 860         if n1 == 0 || n2 == 0 {
 861                 return true
 862         }
 863         if p1 == p2 {
 864                 return false
 865         }
 866         baseAndOffset := func(ptr *Value) (base *Value, offset int64) {
 867                 base, offset = ptr, 0
 868                 for base.Op == OpOffPtr {
 869                         offset += base.AuxInt
 870                         base = base.Args[0]
 871                 }
 872                 return base, offset
 873         }
 874         p1, off1 := baseAndOffset(p1)
 875         p2, off2 := baseAndOffset(p2)
 876         if isSamePtr(p1, p2) {
 877                 return !overlap(off1, n1, off2, n2)
 878         }
 879         // p1 and p2 are not the same, so if they are both OpAddrs then
 880         // they point to different variables.
 881         // If one pointer is on the stack and the other is an argument
 882         // then they can't overlap.
 883         switch p1.Op {
 884         case OpAddr, OpLocalAddr:
 885                 if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP {
 886                         return true
 887                 }
 888                 return (p2.Op == OpArg || p2.Op == OpArgIntReg) && p1.Args[0].Op == OpSP
 889         case OpArg, OpArgIntReg:
 890                 if p2.Op == OpSP || p2.Op == OpLocalAddr {
 891                         return true
 892                 }
 893         case OpSP:
 894                 return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpArgIntReg || p2.Op == OpSP
 895         }
 896         return false
 897 }
 898
 899 // moveSize returns the number of bytes an aligned MOV instruction moves.
 900 func moveSize(align int64, c *Config) int64 {
 901         switch {
 902         case align%8 == 0 && c.PtrSize == 8:
 903                 return 8
 904         case align%4 == 0:
 905                 return 4
 906         case align%2 == 0:
 907                 return 2
 908         }
 909         return 1
 910 }
 911
 912 // mergePoint finds a block among a's blocks which dominates b and is itself
 913 // dominated by all of a's blocks. Returns nil if it can't find one.
 914 // Might return nil even if one does exist.
 915 func mergePoint(b *Block, a ...*Value) *Block {
 916         // Walk backward from b looking for one of the a's blocks.
 917
 918         // Max distance
 919         d := 100
 920
 921         for d > 0 {
 922                 for _, x := range a {
 923                         if b == x.Block {
 924                                 goto found
 925                         }
 926                 }
 927                 if len(b.Preds) > 1 {
 928                         // Don't know which way to go back. Abort.
 929                         return nil
 930                 }
 931                 b = b.Preds[0].b
 932                 d--
 933         }
 934         return nil // too far away
 935 found:
 936         // At this point, r is the first value in a that we find by walking backwards.
 937         // if we return anything, r will be it.
 938         r := b
 939
 940         // Keep going, counting the other a's that we find. They must all dominate r.
 941         na := 0
 942         for d > 0 {
 943                 for _, x := range a {
 944                         if b == x.Block {
 945                                 na++
 946                         }
 947                 }
 948                 if na == len(a) {
 949                         // Found all of a in a backwards walk. We can return r.
 950                         return r
 951                 }
 952                 if len(b.Preds) > 1 {
 953                         return nil
 954                 }
 955                 b = b.Preds[0].b
 956                 d--
 957
 958         }
 959         return nil // too far away
 960 }
 961
 962 // clobber invalidates values. Returns true.
 963 // clobber is used by rewrite rules to:
 964 //
 965 //      A) make sure the values are really dead and never used again.
 966 //      B) decrement use counts of the values' args.
 967 func clobber(vv ...*Value) bool {
 968         for _, v := range vv {
 969                 v.reset(OpInvalid)
 970                 // Note: leave v.Block intact.  The Block field is used after clobber.
 971         }
 972         return true
 973 }
 974
 975 // clobberIfDead resets v when use count is 1. Returns true.
 976 // clobberIfDead is used by rewrite rules to decrement
 977 // use counts of v's args when v is dead and never used.
 978 func clobberIfDead(v *Value) bool {
 979         if v.Uses == 1 {
 980                 v.reset(OpInvalid)
 981         }
 982         // Note: leave v.Block intact.  The Block field is used after clobberIfDead.
 983         return true
 984 }
 985
 986 // noteRule is an easy way to track if a rule is matched when writing
 987 // new ones.  Make the rule of interest also conditional on
 988 //
 989 //      noteRule("note to self: rule of interest matched")
 990 //
 991 // and that message will print when the rule matches.
 992 func noteRule(s string) bool {
 993         fmt.Println(s)
 994         return true
 995 }
 996
 997 // countRule increments Func.ruleMatches[key].
 998 // If Func.ruleMatches is non-nil at the end
 999 // of compilation, it will be printed to stdout.
1000 // This is intended to make it easier to find which functions
1001 // which contain lots of rules matches when developing new rules.
1002 func countRule(v *Value, key string) bool {
1003         f := v.Block.Func
1004         if f.ruleMatches == nil {
1005                 f.ruleMatches = make(map[string]int)
1006         }
1007         f.ruleMatches[key]++
1008         return true
1009 }
1010
1011 // warnRule generates compiler debug output with string s when
1012 // v is not in autogenerated code, cond is true and the rule has fired.
1013 func warnRule(cond bool, v *Value, s string) bool {
1014         if pos := v.Pos; pos.Line() > 1 && cond {
1015                 v.Block.Func.Warnl(pos, s)
1016         }
1017         return true
1018 }
1019
1020 // for a pseudo-op like (LessThan x), extract x.
1021 func flagArg(v *Value) *Value {
1022         if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() {
1023                 return nil
1024         }
1025         return v.Args[0]
1026 }
1027
1028 // arm64Negate finds the complement to an ARM64 condition code,
1029 // for example !Equal -> NotEqual or !LessThan -> GreaterEqual
1030 //
1031 // For floating point, it's more subtle because NaN is unordered. We do
1032 // !LessThanF -> NotLessThanF, the latter takes care of NaNs.
1033 func arm64Negate(op Op) Op {
1034         switch op {
1035         case OpARM64LessThan:
1036                 return OpARM64GreaterEqual
1037         case OpARM64LessThanU:
1038                 return OpARM64GreaterEqualU
1039         case OpARM64GreaterThan:
1040                 return OpARM64LessEqual
1041         case OpARM64GreaterThanU:
1042                 return OpARM64LessEqualU
1043         case OpARM64LessEqual:
1044                 return OpARM64GreaterThan
1045         case OpARM64LessEqualU:
1046                 return OpARM64GreaterThanU
1047         case OpARM64GreaterEqual:
1048                 return OpARM64LessThan
1049         case OpARM64GreaterEqualU:
1050                 return OpARM64LessThanU
1051         case OpARM64Equal:
1052                 return OpARM64NotEqual
1053         case OpARM64NotEqual:
1054                 return OpARM64Equal
1055         case OpARM64LessThanF:
1056                 return OpARM64NotLessThanF
1057         case OpARM64NotLessThanF:
1058                 return OpARM64LessThanF
1059         case OpARM64LessEqualF:
1060                 return OpARM64NotLessEqualF
1061         case OpARM64NotLessEqualF:
1062                 return OpARM64LessEqualF
1063         case OpARM64GreaterThanF:
1064                 return OpARM64NotGreaterThanF
1065         case OpARM64NotGreaterThanF:
1066                 return OpARM64GreaterThanF
1067         case OpARM64GreaterEqualF:
1068                 return OpARM64NotGreaterEqualF
1069         case OpARM64NotGreaterEqualF:
1070                 return OpARM64GreaterEqualF
1071         default:
1072                 panic("unreachable")
1073         }
1074 }
1075
1076 // arm64Invert evaluates (InvertFlags op), which
1077 // is the same as altering the condition codes such
1078 // that the same result would be produced if the arguments
1079 // to the flag-generating instruction were reversed, e.g.
1080 // (InvertFlags (CMP x y)) -> (CMP y x)
1081 func arm64Invert(op Op) Op {
1082         switch op {
1083         case OpARM64LessThan:
1084                 return OpARM64GreaterThan
1085         case OpARM64LessThanU:
1086                 return OpARM64GreaterThanU
1087         case OpARM64GreaterThan:
1088                 return OpARM64LessThan
1089         case OpARM64GreaterThanU:
1090                 return OpARM64LessThanU
1091         case OpARM64LessEqual:
1092                 return OpARM64GreaterEqual
1093         case OpARM64LessEqualU:
1094                 return OpARM64GreaterEqualU
1095         case OpARM64GreaterEqual:
1096                 return OpARM64LessEqual
1097         case OpARM64GreaterEqualU:
1098                 return OpARM64LessEqualU
1099         case OpARM64Equal, OpARM64NotEqual:
1100                 return op
1101         case OpARM64LessThanF:
1102                 return OpARM64GreaterThanF
1103         case OpARM64GreaterThanF:
1104                 return OpARM64LessThanF
1105         case OpARM64LessEqualF:
1106                 return OpARM64GreaterEqualF
1107         case OpARM64GreaterEqualF:
1108                 return OpARM64LessEqualF
1109         case OpARM64NotLessThanF:
1110                 return OpARM64NotGreaterThanF
1111         case OpARM64NotGreaterThanF:
1112                 return OpARM64NotLessThanF
1113         case OpARM64NotLessEqualF:
1114                 return OpARM64NotGreaterEqualF
1115         case OpARM64NotGreaterEqualF:
1116                 return OpARM64NotLessEqualF
1117         default:
1118                 panic("unreachable")
1119         }
1120 }
1121
1122 // evaluate an ARM64 op against a flags value
1123 // that is potentially constant; return 1 for true,
1124 // -1 for false, and 0 for not constant.
1125 func ccARM64Eval(op Op, flags *Value) int {
1126         fop := flags.Op
1127         if fop == OpARM64InvertFlags {
1128                 return -ccARM64Eval(op, flags.Args[0])
1129         }
1130         if fop != OpARM64FlagConstant {
1131                 return 0
1132         }
1133         fc := flagConstant(flags.AuxInt)
1134         b2i := func(b bool) int {
1135                 if b {
1136                         return 1
1137                 }
1138                 return -1
1139         }
1140         switch op {
1141         case OpARM64Equal:
1142                 return b2i(fc.eq())
1143         case OpARM64NotEqual:
1144                 return b2i(fc.ne())
1145         case OpARM64LessThan:
1146                 return b2i(fc.lt())
1147         case OpARM64LessThanU:
1148                 return b2i(fc.ult())
1149         case OpARM64GreaterThan:
1150                 return b2i(fc.gt())
1151         case OpARM64GreaterThanU:
1152                 return b2i(fc.ugt())
1153         case OpARM64LessEqual:
1154                 return b2i(fc.le())
1155         case OpARM64LessEqualU:
1156                 return b2i(fc.ule())
1157         case OpARM64GreaterEqual:
1158                 return b2i(fc.ge())
1159         case OpARM64GreaterEqualU:
1160                 return b2i(fc.uge())
1161         }
1162         return 0
1163 }
1164
1165 // logRule logs the use of the rule s. This will only be enabled if
1166 // rewrite rules were generated with the -log option, see _gen/rulegen.go.
1167 func logRule(s string) {
1168         if ruleFile == nil {
1169                 // Open a log file to write log to. We open in append
1170                 // mode because all.bash runs the compiler lots of times,
1171                 // and we want the concatenation of all of those logs.
1172                 // This means, of course, that users need to rm the old log
1173                 // to get fresh data.
1174                 // TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow?
1175                 w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"),
1176                         os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
1177                 if err != nil {
1178                         panic(err)
1179                 }
1180                 ruleFile = w
1181         }
1182         _, err := fmt.Fprintln(ruleFile, s)
1183         if err != nil {
1184                 panic(err)
1185         }
1186 }
1187
1188 var ruleFile io.Writer
1189
1190 func min(x, y int64) int64 {
1191         if x < y {
1192                 return x
1193         }
1194         return y
1195 }
1196
1197 func isConstZero(v *Value) bool {
1198         switch v.Op {
1199         case OpConstNil:
1200                 return true
1201         case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F:
1202                 return v.AuxInt == 0
1203         }
1204         return false
1205 }
1206
1207 // reciprocalExact64 reports whether 1/c is exactly representable.
1208 func reciprocalExact64(c float64) bool {
1209         b := math.Float64bits(c)
1210         man := b & (1<<52 - 1)
1211         if man != 0 {
1212                 return false // not a power of 2, denormal, or NaN
1213         }
1214         exp := b >> 52 & (1<<11 - 1)
1215         // exponent bias is 0x3ff.  So taking the reciprocal of a number
1216         // changes the exponent to 0x7fe-exp.
1217         switch exp {
1218         case 0:
1219                 return false // ±0
1220         case 0x7ff:
1221                 return false // ±inf
1222         case 0x7fe:
1223                 return false // exponent is not representable
1224         default:
1225                 return true
1226         }
1227 }
1228
1229 // reciprocalExact32 reports whether 1/c is exactly representable.
1230 func reciprocalExact32(c float32) bool {
1231         b := math.Float32bits(c)
1232         man := b & (1<<23 - 1)
1233         if man != 0 {
1234                 return false // not a power of 2, denormal, or NaN
1235         }
1236         exp := b >> 23 & (1<<8 - 1)
1237         // exponent bias is 0x7f.  So taking the reciprocal of a number
1238         // changes the exponent to 0xfe-exp.
1239         switch exp {
1240         case 0:
1241                 return false // ±0
1242         case 0xff:
1243                 return false // ±inf
1244         case 0xfe:
1245                 return false // exponent is not representable
1246         default:
1247                 return true
1248         }
1249 }
1250
1251 // check if an immediate can be directly encoded into an ARM's instruction.
1252 func isARMImmRot(v uint32) bool {
1253         for i := 0; i < 16; i++ {
1254                 if v&^0xff == 0 {
1255                         return true
1256                 }
1257                 v = v<<2 | v>>30
1258         }
1259
1260         return false
1261 }
1262
1263 // overlap reports whether the ranges given by the given offset and
1264 // size pairs overlap.
1265 func overlap(offset1, size1, offset2, size2 int64) bool {
1266         if offset1 >= offset2 && offset2+size2 > offset1 {
1267                 return true
1268         }
1269         if offset2 >= offset1 && offset1+size1 > offset2 {
1270                 return true
1271         }
1272         return false
1273 }
1274
1275 func areAdjacentOffsets(off1, off2, size int64) bool {
1276         return off1+size == off2 || off1 == off2+size
1277 }
1278
1279 // check if value zeroes out upper 32-bit of 64-bit register.
1280 // depth limits recursion depth. In AMD64.rules 3 is used as limit,
1281 // because it catches same amount of cases as 4.
1282 func zeroUpper32Bits(x *Value, depth int) bool {
1283         switch x.Op {
1284         case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1,
1285                 OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1,
1286                 OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload,
1287                 OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL,
1288                 OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst,
1289                 OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst,
1290                 OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL,
1291                 OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst,
1292                 OpAMD64SHLL, OpAMD64SHLLconst:
1293                 return true
1294         case OpArg:
1295                 return x.Type.Size() == 4
1296         case OpPhi, OpSelect0, OpSelect1:
1297                 // Phis can use each-other as an arguments, instead of tracking visited values,
1298                 // just limit recursion depth.
1299                 if depth <= 0 {
1300                         return false
1301                 }
1302                 for i := range x.Args {
1303                         if !zeroUpper32Bits(x.Args[i], depth-1) {
1304                                 return false
1305                         }
1306                 }
1307                 return true
1308
1309         }
1310         return false
1311 }
1312
1313 // zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits.
1314 func zeroUpper48Bits(x *Value, depth int) bool {
1315         switch x.Op {
1316         case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2:
1317                 return true
1318         case OpArg:
1319                 return x.Type.Size() == 2
1320         case OpPhi, OpSelect0, OpSelect1:
1321                 // Phis can use each-other as an arguments, instead of tracking visited values,
1322                 // just limit recursion depth.
1323                 if depth <= 0 {
1324                         return false
1325                 }
1326                 for i := range x.Args {
1327                         if !zeroUpper48Bits(x.Args[i], depth-1) {
1328                                 return false
1329                         }
1330                 }
1331                 return true
1332
1333         }
1334         return false
1335 }
1336
1337 // zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits.
1338 func zeroUpper56Bits(x *Value, depth int) bool {
1339         switch x.Op {
1340         case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1:
1341                 return true
1342         case OpArg:
1343                 return x.Type.Size() == 1
1344         case OpPhi, OpSelect0, OpSelect1:
1345                 // Phis can use each-other as an arguments, instead of tracking visited values,
1346                 // just limit recursion depth.
1347                 if depth <= 0 {
1348                         return false
1349                 }
1350                 for i := range x.Args {
1351                         if !zeroUpper56Bits(x.Args[i], depth-1) {
1352                                 return false
1353                         }
1354                 }
1355                 return true
1356
1357         }
1358         return false
1359 }
1360
1361 // isInlinableMemmove reports whether the given arch performs a Move of the given size
1362 // faster than memmove. It will only return true if replacing the memmove with a Move is
1363 // safe, either because Move will do all of its loads before any of its stores, or
1364 // because the arguments are known to be disjoint.
1365 // This is used as a check for replacing memmove with Move ops.
1366 func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
1367         // It is always safe to convert memmove into Move when its arguments are disjoint.
1368         // Move ops may or may not be faster for large sizes depending on how the platform
1369         // lowers them, so we only perform this optimization on platforms that we know to
1370         // have fast Move ops.
1371         switch c.arch {
1372         case "amd64":
1373                 return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz))
1374         case "386", "arm64":
1375                 return sz <= 8
1376         case "s390x", "ppc64", "ppc64le":
1377                 return sz <= 8 || disjoint(dst, sz, src, sz)
1378         case "arm", "loong64", "mips", "mips64", "mipsle", "mips64le":
1379                 return sz <= 4
1380         }
1381         return false
1382 }
1383 func IsInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
1384         return isInlinableMemmove(dst, src, sz, c)
1385 }
1386
1387 // logLargeCopy logs the occurrence of a large copy.
1388 // The best place to do this is in the rewrite rules where the size of the move is easy to find.
1389 // "Large" is arbitrarily chosen to be 128 bytes; this may change.
1390 func logLargeCopy(v *Value, s int64) bool {
1391         if s < 128 {
1392                 return true
1393         }
1394         if logopt.Enabled() {
1395                 logopt.LogOpt(v.Pos, "copy", "lower", v.Block.Func.Name, fmt.Sprintf("%d bytes", s))
1396         }
1397         return true
1398 }
1399 func LogLargeCopy(funcName string, pos src.XPos, s int64) {
1400         if s < 128 {
1401                 return
1402         }
1403         if logopt.Enabled() {
1404                 logopt.LogOpt(pos, "copy", "lower", funcName, fmt.Sprintf("%d bytes", s))
1405         }
1406 }
1407
1408 // hasSmallRotate reports whether the architecture has rotate instructions
1409 // for sizes < 32-bit.  This is used to decide whether to promote some rotations.
1410 func hasSmallRotate(c *Config) bool {
1411         switch c.arch {
1412         case "amd64", "386":
1413                 return true
1414         default:
1415                 return false
1416         }
1417 }
1418
1419 func newPPC64ShiftAuxInt(sh, mb, me, sz int64) int32 {
1420         if sh < 0 || sh >= sz {
1421                 panic("PPC64 shift arg sh out of range")
1422         }
1423         if mb < 0 || mb >= sz {
1424                 panic("PPC64 shift arg mb out of range")
1425         }
1426         if me < 0 || me >= sz {
1427                 panic("PPC64 shift arg me out of range")
1428         }
1429         return int32(sh<<16 | mb<<8 | me)
1430 }
1431
1432 func GetPPC64Shiftsh(auxint int64) int64 {
1433         return int64(int8(auxint >> 16))
1434 }
1435
1436 func GetPPC64Shiftmb(auxint int64) int64 {
1437         return int64(int8(auxint >> 8))
1438 }
1439
1440 func GetPPC64Shiftme(auxint int64) int64 {
1441         return int64(int8(auxint))
1442 }
1443
1444 // Test if this value can encoded as a mask for a rlwinm like
1445 // operation.  Masks can also extend from the msb and wrap to
1446 // the lsb too.  That is, the valid masks are 32 bit strings
1447 // of the form: 0..01..10..0 or 1..10..01..1 or 1...1
1448 func isPPC64WordRotateMask(v64 int64) bool {
1449         // Isolate rightmost 1 (if none 0) and add.
1450         v := uint32(v64)
1451         vp := (v & -v) + v
1452         // Likewise, for the wrapping case.
1453         vn := ^v
1454         vpn := (vn & -vn) + vn
1455         return (v&vp == 0 || vn&vpn == 0) && v != 0
1456 }
1457
1458 // Compress mask and shift into single value of the form
1459 // me | mb<<8 | rotate<<16 | nbits<<24 where me and mb can
1460 // be used to regenerate the input mask.
1461 func encodePPC64RotateMask(rotate, mask, nbits int64) int64 {
1462         var mb, me, mbn, men int
1463
1464         // Determine boundaries and then decode them
1465         if mask == 0 || ^mask == 0 || rotate >= nbits {
1466                 panic("Invalid PPC64 rotate mask")
1467         } else if nbits == 32 {
1468                 mb = bits.LeadingZeros32(uint32(mask))
1469                 me = 32 - bits.TrailingZeros32(uint32(mask))
1470                 mbn = bits.LeadingZeros32(^uint32(mask))
1471                 men = 32 - bits.TrailingZeros32(^uint32(mask))
1472         } else {
1473                 mb = bits.LeadingZeros64(uint64(mask))
1474                 me = 64 - bits.TrailingZeros64(uint64(mask))
1475                 mbn = bits.LeadingZeros64(^uint64(mask))
1476                 men = 64 - bits.TrailingZeros64(^uint64(mask))
1477         }
1478         // Check for a wrapping mask (e.g bits at 0 and 63)
1479         if mb == 0 && me == int(nbits) {
1480                 // swap the inverted values
1481                 mb, me = men, mbn
1482         }
1483
1484         return int64(me) | int64(mb<<8) | int64(rotate<<16) | int64(nbits<<24)
1485 }
1486
1487 // DecodePPC64RotateMask is the inverse operation of encodePPC64RotateMask.  The values returned as
1488 // mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask.
1489 func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) {
1490         auxint := uint64(sauxint)
1491         rotate = int64((auxint >> 16) & 0xFF)
1492         mb = int64((auxint >> 8) & 0xFF)
1493         me = int64((auxint >> 0) & 0xFF)
1494         nbits := int64((auxint >> 24) & 0xFF)
1495         mask = ((1 << uint(nbits-mb)) - 1) ^ ((1 << uint(nbits-me)) - 1)
1496         if mb > me {
1497                 mask = ^mask
1498         }
1499         if nbits == 32 {
1500                 mask = uint64(uint32(mask))
1501         }
1502
1503         // Fixup ME to match ISA definition.  The second argument to MASK(..,me)
1504         // is inclusive.
1505         me = (me - 1) & (nbits - 1)
1506         return
1507 }
1508
1509 // This verifies that the mask is a set of
1510 // consecutive bits including the least
1511 // significant bit.
1512 func isPPC64ValidShiftMask(v int64) bool {
1513         if (v != 0) && ((v+1)&v) == 0 {
1514                 return true
1515         }
1516         return false
1517 }
1518
1519 func getPPC64ShiftMaskLength(v int64) int64 {
1520         return int64(bits.Len64(uint64(v)))
1521 }
1522
1523 // Decompose a shift right into an equivalent rotate/mask,
1524 // and return mask & m.
1525 func mergePPC64RShiftMask(m, s, nbits int64) int64 {
1526         smask := uint64((1<<uint(nbits))-1) >> uint(s)
1527         return m & int64(smask)
1528 }
1529
1530 // Combine (ANDconst [m] (SRWconst [s])) into (RLWINM [y]) or return 0
1531 func mergePPC64AndSrwi(m, s int64) int64 {
1532         mask := mergePPC64RShiftMask(m, s, 32)
1533         if !isPPC64WordRotateMask(mask) {
1534                 return 0
1535         }
1536         return encodePPC64RotateMask((32-s)&31, mask, 32)
1537 }
1538
1539 // Test if a shift right feeding into a CLRLSLDI can be merged into RLWINM.
1540 // Return the encoded RLWINM constant, or 0 if they cannot be merged.
1541 func mergePPC64ClrlsldiSrw(sld, srw int64) int64 {
1542         mask_1 := uint64(0xFFFFFFFF >> uint(srw))
1543         // for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left.
1544         mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
1545
1546         // Rewrite mask to apply after the final left shift.
1547         mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
1548
1549         r_1 := 32 - srw
1550         r_2 := GetPPC64Shiftsh(sld)
1551         r_3 := (r_1 + r_2) & 31 // This can wrap.
1552
1553         if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
1554                 return 0
1555         }
1556         return encodePPC64RotateMask(int64(r_3), int64(mask_3), 32)
1557 }
1558
1559 // Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM.  Return
1560 // the encoded RLWINM constant, or 0 if they cannot be merged.
1561 func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 {
1562         r_1, _, _, mask_1 := DecodePPC64RotateMask(rlw)
1563         // for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left.
1564         mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
1565
1566         // combine the masks, and adjust for the final left shift.
1567         mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(int64(sld)))
1568         r_2 := GetPPC64Shiftsh(int64(sld))
1569         r_3 := (r_1 + r_2) & 31 // This can wrap.
1570
1571         // Verify the result is still a valid bitmask of <= 32 bits.
1572         if !isPPC64WordRotateMask(int64(mask_3)) || uint64(uint32(mask_3)) != mask_3 {
1573                 return 0
1574         }
1575         return encodePPC64RotateMask(r_3, int64(mask_3), 32)
1576 }
1577
1578 // Compute the encoded RLWINM constant from combining (SLDconst [sld] (SRWconst [srw] x)),
1579 // or return 0 if they cannot be combined.
1580 func mergePPC64SldiSrw(sld, srw int64) int64 {
1581         if sld > srw || srw >= 32 {
1582                 return 0
1583         }
1584         mask_r := uint32(0xFFFFFFFF) >> uint(srw)
1585         mask_l := uint32(0xFFFFFFFF) >> uint(sld)
1586         mask := (mask_r & mask_l) << uint(sld)
1587         return encodePPC64RotateMask((32-srw+sld)&31, int64(mask), 32)
1588 }
1589
1590 // Convenience function to rotate a 32 bit constant value by another constant.
1591 func rotateLeft32(v, rotate int64) int64 {
1592         return int64(bits.RotateLeft32(uint32(v), int(rotate)))
1593 }
1594
1595 func rotateRight64(v, rotate int64) int64 {
1596         return int64(bits.RotateLeft64(uint64(v), int(-rotate)))
1597 }
1598
1599 // encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format.
1600 func armBFAuxInt(lsb, width int64) arm64BitField {
1601         if lsb < 0 || lsb > 63 {
1602                 panic("ARM(64) bit field lsb constant out of range")
1603         }
1604         if width < 1 || lsb+width > 64 {
1605                 panic("ARM(64) bit field width constant out of range")
1606         }
1607         return arm64BitField(width | lsb<<8)
1608 }
1609
1610 // returns the lsb part of the auxInt field of arm64 bitfield ops.
1611 func (bfc arm64BitField) getARM64BFlsb() int64 {
1612         return int64(uint64(bfc) >> 8)
1613 }
1614
1615 // returns the width part of the auxInt field of arm64 bitfield ops.
1616 func (bfc arm64BitField) getARM64BFwidth() int64 {
1617         return int64(bfc) & 0xff
1618 }
1619
1620 // checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask.
1621 func isARM64BFMask(lsb, mask, rshift int64) bool {
1622         shiftedMask := int64(uint64(mask) >> uint64(rshift))
1623         return shiftedMask != 0 && isPowerOfTwo64(shiftedMask+1) && nto(shiftedMask)+lsb < 64
1624 }
1625
1626 // returns the bitfield width of mask >> rshift for arm64 bitfield ops.
1627 func arm64BFWidth(mask, rshift int64) int64 {
1628         shiftedMask := int64(uint64(mask) >> uint64(rshift))
1629         if shiftedMask == 0 {
1630                 panic("ARM64 BF mask is zero")
1631         }
1632         return nto(shiftedMask)
1633 }
1634
1635 // sizeof returns the size of t in bytes.
1636 // It will panic if t is not a *types.Type.
1637 func sizeof(t interface{}) int64 {
1638         return t.(*types.Type).Size()
1639 }
1640
1641 // registerizable reports whether t is a primitive type that fits in
1642 // a register. It assumes float64 values will always fit into registers
1643 // even if that isn't strictly true.
1644 func registerizable(b *Block, typ *types.Type) bool {
1645         if typ.IsPtrShaped() || typ.IsFloat() || typ.IsBoolean() {
1646                 return true
1647         }
1648         if typ.IsInteger() {
1649                 return typ.Size() <= b.Func.Config.RegSize
1650         }
1651         return false
1652 }
1653
1654 // needRaceCleanup reports whether this call to racefuncenter/exit isn't needed.
1655 func needRaceCleanup(sym *AuxCall, v *Value) bool {
1656         f := v.Block.Func
1657         if !f.Config.Race {
1658                 return false
1659         }
1660         if !isSameCall(sym, "runtime.racefuncenter") && !isSameCall(sym, "runtime.racefuncexit") {
1661                 return false
1662         }
1663         for _, b := range f.Blocks {
1664                 for _, v := range b.Values {
1665                         switch v.Op {
1666                         case OpStaticCall, OpStaticLECall:
1667                                 // Check for racefuncenter will encounter racefuncexit and vice versa.
1668                                 // Allow calls to panic*
1669                                 s := v.Aux.(*AuxCall).Fn.String()
1670                                 switch s {
1671                                 case "runtime.racefuncenter", "runtime.racefuncexit",
1672                                         "runtime.panicdivide", "runtime.panicwrap",
1673                                         "runtime.panicshift":
1674                                         continue
1675                                 }
1676                                 // If we encountered any call, we need to keep racefunc*,
1677                                 // for accurate stacktraces.
1678                                 return false
1679                         case OpPanicBounds, OpPanicExtend:
1680                                 // Note: these are panic generators that are ok (like the static calls above).
1681                         case OpClosureCall, OpInterCall, OpClosureLECall, OpInterLECall:
1682                                 // We must keep the race functions if there are any other call types.
1683                                 return false
1684                         }
1685                 }
1686         }
1687         if isSameCall(sym, "runtime.racefuncenter") {
1688                 // TODO REGISTER ABI this needs to be cleaned up.
1689                 // If we're removing racefuncenter, remove its argument as well.
1690                 if v.Args[0].Op != OpStore {
1691                         if v.Op == OpStaticLECall {
1692                                 // there is no store, yet.
1693                                 return true
1694                         }
1695                         return false
1696                 }
1697                 mem := v.Args[0].Args[2]
1698                 v.Args[0].reset(OpCopy)
1699                 v.Args[0].AddArg(mem)
1700         }
1701         return true
1702 }
1703
1704 // symIsRO reports whether sym is a read-only global.
1705 func symIsRO(sym interface{}) bool {
1706         lsym := sym.(*obj.LSym)
1707         return lsym.Type == objabi.SRODATA && len(lsym.R) == 0
1708 }
1709
1710 // symIsROZero reports whether sym is a read-only global whose data contains all zeros.
1711 func symIsROZero(sym Sym) bool {
1712         lsym := sym.(*obj.LSym)
1713         if lsym.Type != objabi.SRODATA || len(lsym.R) != 0 {
1714                 return false
1715         }
1716         for _, b := range lsym.P {
1717                 if b != 0 {
1718                         return false
1719                 }
1720         }
1721         return true
1722 }
1723
1724 // read8 reads one byte from the read-only global sym at offset off.
1725 func read8(sym interface{}, off int64) uint8 {
1726         lsym := sym.(*obj.LSym)
1727         if off >= int64(len(lsym.P)) || off < 0 {
1728                 // Invalid index into the global sym.
1729                 // This can happen in dead code, so we don't want to panic.
1730                 // Just return any value, it will eventually get ignored.
1731                 // See issue 29215.
1732                 return 0
1733         }
1734         return lsym.P[off]
1735 }
1736
1737 // read16 reads two bytes from the read-only global sym at offset off.
1738 func read16(sym interface{}, off int64, byteorder binary.ByteOrder) uint16 {
1739         lsym := sym.(*obj.LSym)
1740         // lsym.P is written lazily.
1741         // Bytes requested after the end of lsym.P are 0.
1742         var src []byte
1743         if 0 <= off && off < int64(len(lsym.P)) {
1744                 src = lsym.P[off:]
1745         }
1746         buf := make([]byte, 2)
1747         copy(buf, src)
1748         return byteorder.Uint16(buf)
1749 }
1750
1751 // read32 reads four bytes from the read-only global sym at offset off.
1752 func read32(sym interface{}, off int64, byteorder binary.ByteOrder) uint32 {
1753         lsym := sym.(*obj.LSym)
1754         var src []byte
1755         if 0 <= off && off < int64(len(lsym.P)) {
1756                 src = lsym.P[off:]
1757         }
1758         buf := make([]byte, 4)
1759         copy(buf, src)
1760         return byteorder.Uint32(buf)
1761 }
1762
1763 // read64 reads eight bytes from the read-only global sym at offset off.
1764 func read64(sym interface{}, off int64, byteorder binary.ByteOrder) uint64 {
1765         lsym := sym.(*obj.LSym)
1766         var src []byte
1767         if 0 <= off && off < int64(len(lsym.P)) {
1768                 src = lsym.P[off:]
1769         }
1770         buf := make([]byte, 8)
1771         copy(buf, src)
1772         return byteorder.Uint64(buf)
1773 }
1774
1775 // sequentialAddresses reports true if it can prove that x + n == y
1776 func sequentialAddresses(x, y *Value, n int64) bool {
1777         if x == y && n == 0 {
1778                 return true
1779         }
1780         if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil &&
1781                 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1782                         x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1783                 return true
1784         }
1785         if x.Op == Op386LEAL1 && y.Op == Op386LEAL1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
1786                 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1787                         x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1788                 return true
1789         }
1790         if x.Op == OpAMD64ADDQ && y.Op == OpAMD64LEAQ1 && y.AuxInt == n && y.Aux == nil &&
1791                 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1792                         x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1793                 return true
1794         }
1795         if x.Op == OpAMD64LEAQ1 && y.Op == OpAMD64LEAQ1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
1796                 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1797                         x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1798                 return true
1799         }
1800         return false
1801 }
1802
1803 // flagConstant represents the result of a compile-time comparison.
1804 // The sense of these flags does not necessarily represent the hardware's notion
1805 // of a flags register - these are just a compile-time construct.
1806 // We happen to match the semantics to those of arm/arm64.
1807 // Note that these semantics differ from x86: the carry flag has the opposite
1808 // sense on a subtraction!
1809 //
1810 //      On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C.
1811 //      On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C.
1812 //       (because it does x + ^y + C).
1813 //
1814 // See https://en.wikipedia.org/wiki/Carry_flag#Vs._borrow_flag
1815 type flagConstant uint8
1816
1817 // N reports whether the result of an operation is negative (high bit set).
1818 func (fc flagConstant) N() bool {
1819         return fc&1 != 0
1820 }
1821
1822 // Z reports whether the result of an operation is 0.
1823 func (fc flagConstant) Z() bool {
1824         return fc&2 != 0
1825 }
1826
1827 // C reports whether an unsigned add overflowed (carry), or an
1828 // unsigned subtract did not underflow (borrow).
1829 func (fc flagConstant) C() bool {
1830         return fc&4 != 0
1831 }
1832
1833 // V reports whether a signed operation overflowed or underflowed.
1834 func (fc flagConstant) V() bool {
1835         return fc&8 != 0
1836 }
1837
1838 func (fc flagConstant) eq() bool {
1839         return fc.Z()
1840 }
1841 func (fc flagConstant) ne() bool {
1842         return !fc.Z()
1843 }
1844 func (fc flagConstant) lt() bool {
1845         return fc.N() != fc.V()
1846 }
1847 func (fc flagConstant) le() bool {
1848         return fc.Z() || fc.lt()
1849 }
1850 func (fc flagConstant) gt() bool {
1851         return !fc.Z() && fc.ge()
1852 }
1853 func (fc flagConstant) ge() bool {
1854         return fc.N() == fc.V()
1855 }
1856 func (fc flagConstant) ult() bool {
1857         return !fc.C()
1858 }
1859 func (fc flagConstant) ule() bool {
1860         return fc.Z() || fc.ult()
1861 }
1862 func (fc flagConstant) ugt() bool {
1863         return !fc.Z() && fc.uge()
1864 }
1865 func (fc flagConstant) uge() bool {
1866         return fc.C()
1867 }
1868
1869 func (fc flagConstant) ltNoov() bool {
1870         return fc.lt() && !fc.V()
1871 }
1872 func (fc flagConstant) leNoov() bool {
1873         return fc.le() && !fc.V()
1874 }
1875 func (fc flagConstant) gtNoov() bool {
1876         return fc.gt() && !fc.V()
1877 }
1878 func (fc flagConstant) geNoov() bool {
1879         return fc.ge() && !fc.V()
1880 }
1881
1882 func (fc flagConstant) String() string {
1883         return fmt.Sprintf("N=%v,Z=%v,C=%v,V=%v", fc.N(), fc.Z(), fc.C(), fc.V())
1884 }
1885
1886 type flagConstantBuilder struct {
1887         N bool
1888         Z bool
1889         C bool
1890         V bool
1891 }
1892
1893 func (fcs flagConstantBuilder) encode() flagConstant {
1894         var fc flagConstant
1895         if fcs.N {
1896                 fc |= 1
1897         }
1898         if fcs.Z {
1899                 fc |= 2
1900         }
1901         if fcs.C {
1902                 fc |= 4
1903         }
1904         if fcs.V {
1905                 fc |= 8
1906         }
1907         return fc
1908 }
1909
1910 // Note: addFlags(x,y) != subFlags(x,-y) in some situations:
1911 //  - the results of the C flag are different
1912 //  - the results of the V flag when y==minint are different
1913
1914 // addFlags64 returns the flags that would be set from computing x+y.
1915 func addFlags64(x, y int64) flagConstant {
1916         var fcb flagConstantBuilder
1917         fcb.Z = x+y == 0
1918         fcb.N = x+y < 0
1919         fcb.C = uint64(x+y) < uint64(x)
1920         fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
1921         return fcb.encode()
1922 }
1923
1924 // subFlags64 returns the flags that would be set from computing x-y.
1925 func subFlags64(x, y int64) flagConstant {
1926         var fcb flagConstantBuilder
1927         fcb.Z = x-y == 0
1928         fcb.N = x-y < 0
1929         fcb.C = uint64(y) <= uint64(x) // This code follows the arm carry flag model.
1930         fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
1931         return fcb.encode()
1932 }
1933
1934 // addFlags32 returns the flags that would be set from computing x+y.
1935 func addFlags32(x, y int32) flagConstant {
1936         var fcb flagConstantBuilder
1937         fcb.Z = x+y == 0
1938         fcb.N = x+y < 0
1939         fcb.C = uint32(x+y) < uint32(x)
1940         fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
1941         return fcb.encode()
1942 }
1943
1944 // subFlags32 returns the flags that would be set from computing x-y.
1945 func subFlags32(x, y int32) flagConstant {
1946         var fcb flagConstantBuilder
1947         fcb.Z = x-y == 0
1948         fcb.N = x-y < 0
1949         fcb.C = uint32(y) <= uint32(x) // This code follows the arm carry flag model.
1950         fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
1951         return fcb.encode()
1952 }
1953
1954 // logicFlags64 returns flags set to the sign/zeroness of x.
1955 // C and V are set to false.
1956 func logicFlags64(x int64) flagConstant {
1957         var fcb flagConstantBuilder
1958         fcb.Z = x == 0
1959         fcb.N = x < 0
1960         return fcb.encode()
1961 }
1962
1963 // logicFlags32 returns flags set to the sign/zeroness of x.
1964 // C and V are set to false.
1965 func logicFlags32(x int32) flagConstant {
1966         var fcb flagConstantBuilder
1967         fcb.Z = x == 0
1968         fcb.N = x < 0
1969         return fcb.encode()
1970 }
1971
1972 func makeJumpTableSym(b *Block) *obj.LSym {
1973         s := base.Ctxt.Lookup(fmt.Sprintf("%s.jump%d", b.Func.fe.LSym(), b.ID))
1974         s.Set(obj.AttrDuplicateOK, true)
1975         s.Set(obj.AttrLocal, true)
1976         return s
1977 }
1978
1979 // canRotate reports whether the architecture supports
1980 // rotates of integer registers with the given number of bits.
1981 func canRotate(c *Config, bits int64) bool {
1982         if bits > c.PtrSize*8 {
1983                 // Don't rewrite to rotates bigger than the machine word.
1984                 return false
1985         }
1986         switch c.arch {
1987         case "386", "amd64", "arm64":
1988                 return true
1989         case "arm", "s390x", "ppc64", "ppc64le", "wasm", "loong64":
1990                 return bits >= 32
1991         default:
1992                 return false
1993         }
1994 }
1995
1996 // isARM64bitcon reports whether a constant can be encoded into a logical instruction.
1997 func isARM64bitcon(x uint64) bool {
1998         if x == 1<<64-1 || x == 0 {
1999                 return false
2000         }
2001         // determine the period and sign-extend a unit to 64 bits
2002         switch {
2003         case x != x>>32|x<<32:
2004                 // period is 64
2005                 // nothing to do
2006         case x != x>>16|x<<48:
2007                 // period is 32
2008                 x = uint64(int64(int32(x)))
2009         case x != x>>8|x<<56:
2010                 // period is 16
2011                 x = uint64(int64(int16(x)))
2012         case x != x>>4|x<<60:
2013                 // period is 8
2014                 x = uint64(int64(int8(x)))
2015         default:
2016                 // period is 4 or 2, always true
2017                 // 0001, 0010, 0100, 1000 -- 0001 rotate
2018                 // 0011, 0110, 1100, 1001 -- 0011 rotate
2019                 // 0111, 1011, 1101, 1110 -- 0111 rotate
2020                 // 0101, 1010             -- 01   rotate, repeat
2021                 return true
2022         }
2023         return sequenceOfOnes(x) || sequenceOfOnes(^x)
2024 }
2025
2026 // sequenceOfOnes tests whether a constant is a sequence of ones in binary, with leading and trailing zeros.
2027 func sequenceOfOnes(x uint64) bool {
2028         y := x & -x // lowest set bit of x. x is good iff x+y is a power of 2
2029         y += x
2030         return (y-1)&y == 0
2031 }
2032
2033 // isARM64addcon reports whether x can be encoded as the immediate value in an ADD or SUB instruction.
2034 func isARM64addcon(v int64) bool {
2035         /* uimm12 or uimm24? */
2036         if v < 0 {
2037                 return false
2038         }
2039         if (v & 0xFFF) == 0 {
2040                 v >>= 12
2041         }
2042         return v <= 0xFFF
2043 }