src/cmd/compile/internal/ssa/rewrite.go

   1 // Copyright 2015 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package ssa
   6
   7 import (
   8         "cmd/compile/internal/base"
   9         "cmd/compile/internal/logopt"
  10         "cmd/compile/internal/types"
  11         "cmd/internal/obj"
  12         "cmd/internal/obj/s390x"
  13         "cmd/internal/objabi"
  14         "cmd/internal/src"
  15         "encoding/binary"
  16         "fmt"
  17         "internal/buildcfg"
  18         "io"
  19         "math"
  20         "math/bits"
  21         "os"
  22         "path/filepath"
  23 )
  24
  25 type deadValueChoice bool
  26
  27 const (
  28         leaveDeadValues  deadValueChoice = false
  29         removeDeadValues                 = true
  30 )
  31
  32 // deadcode indicates whether rewrite should try to remove any values that become dead.
  33 func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValueChoice) {
  34         // repeat rewrites until we find no more rewrites
  35         pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block
  36         pendingLines.clear()
  37         debug := f.pass.debug
  38         if debug > 1 {
  39                 fmt.Printf("%s: rewriting for %s\n", f.pass.name, f.Name)
  40         }
  41         var iters int
  42         var states map[string]bool
  43         for {
  44                 change := false
  45                 deadChange := false
  46                 for _, b := range f.Blocks {
  47                         var b0 *Block
  48                         if debug > 1 {
  49                                 b0 = new(Block)
  50                                 *b0 = *b
  51                                 b0.Succs = append([]Edge{}, b.Succs...) // make a new copy, not aliasing
  52                         }
  53                         for i, c := range b.ControlValues() {
  54                                 for c.Op == OpCopy {
  55                                         c = c.Args[0]
  56                                         b.ReplaceControl(i, c)
  57                                 }
  58                         }
  59                         if rb(b) {
  60                                 change = true
  61                                 if debug > 1 {
  62                                         fmt.Printf("rewriting %s  ->  %s\n", b0.LongString(), b.LongString())
  63                                 }
  64                         }
  65                         for j, v := range b.Values {
  66                                 var v0 *Value
  67                                 if debug > 1 {
  68                                         v0 = new(Value)
  69                                         *v0 = *v
  70                                         v0.Args = append([]*Value{}, v.Args...) // make a new copy, not aliasing
  71                                 }
  72                                 if v.Uses == 0 && v.removeable() {
  73                                         if v.Op != OpInvalid && deadcode == removeDeadValues {
  74                                                 // Reset any values that are now unused, so that we decrement
  75                                                 // the use count of all of its arguments.
  76                                                 // Not quite a deadcode pass, because it does not handle cycles.
  77                                                 // But it should help Uses==1 rules to fire.
  78                                                 v.reset(OpInvalid)
  79                                                 deadChange = true
  80                                         }
  81                                         // No point rewriting values which aren't used.
  82                                         continue
  83                                 }
  84
  85                                 vchange := phielimValue(v)
  86                                 if vchange && debug > 1 {
  87                                         fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
  88                                 }
  89
  90                                 // Eliminate copy inputs.
  91                                 // If any copy input becomes unused, mark it
  92                                 // as invalid and discard its argument. Repeat
  93                                 // recursively on the discarded argument.
  94                                 // This phase helps remove phantom "dead copy" uses
  95                                 // of a value so that a x.Uses==1 rule condition
  96                                 // fires reliably.
  97                                 for i, a := range v.Args {
  98                                         if a.Op != OpCopy {
  99                                                 continue
 100                                         }
 101                                         aa := copySource(a)
 102                                         v.SetArg(i, aa)
 103                                         // If a, a copy, has a line boundary indicator, attempt to find a new value
 104                                         // to hold it.  The first candidate is the value that will replace a (aa),
 105                                         // if it shares the same block and line and is eligible.
 106                                         // The second option is v, which has a as an input.  Because aa is earlier in
 107                                         // the data flow, it is the better choice.
 108                                         if a.Pos.IsStmt() == src.PosIsStmt {
 109                                                 if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt {
 110                                                         aa.Pos = aa.Pos.WithIsStmt()
 111                                                 } else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt {
 112                                                         v.Pos = v.Pos.WithIsStmt()
 113                                                 } else {
 114                                                         // Record the lost line and look for a new home after all rewrites are complete.
 115                                                         // TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same
 116                                                         // line to appear in more than one block, but only one block is stored, so if both end
 117                                                         // up here, then one will be lost.
 118                                                         pendingLines.set(a.Pos, int32(a.Block.ID))
 119                                                 }
 120                                                 a.Pos = a.Pos.WithNotStmt()
 121                                         }
 122                                         vchange = true
 123                                         for a.Uses == 0 {
 124                                                 b := a.Args[0]
 125                                                 a.reset(OpInvalid)
 126                                                 a = b
 127                                         }
 128                                 }
 129                                 if vchange && debug > 1 {
 130                                         fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
 131                                 }
 132
 133                                 // apply rewrite function
 134                                 if rv(v) {
 135                                         vchange = true
 136                                         // If value changed to a poor choice for a statement boundary, move the boundary
 137                                         if v.Pos.IsStmt() == src.PosIsStmt {
 138                                                 if k := nextGoodStatementIndex(v, j, b); k != j {
 139                                                         v.Pos = v.Pos.WithNotStmt()
 140                                                         b.Values[k].Pos = b.Values[k].Pos.WithIsStmt()
 141                                                 }
 142                                         }
 143                                 }
 144
 145                                 change = change || vchange
 146                                 if vchange && debug > 1 {
 147                                         fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
 148                                 }
 149                         }
 150                 }
 151                 if !change && !deadChange {
 152                         break
 153                 }
 154                 iters++
 155                 if (iters > 1000 || debug >= 2) && change {
 156                         // We've done a suspiciously large number of rewrites (or we're in debug mode).
 157                         // As of Sep 2021, 90% of rewrites complete in 4 iterations or fewer
 158                         // and the maximum value encountered during make.bash is 12.
 159                         // Start checking for cycles. (This is too expensive to do routinely.)
 160                         // Note: we avoid this path for deadChange-only iterations, to fix #51639.
 161                         if states == nil {
 162                                 states = make(map[string]bool)
 163                         }
 164                         h := f.rewriteHash()
 165                         if _, ok := states[h]; ok {
 166                                 // We've found a cycle.
 167                                 // To diagnose it, set debug to 2 and start again,
 168                                 // so that we'll print all rules applied until we complete another cycle.
 169                                 // If debug is already >= 2, we've already done that, so it's time to crash.
 170                                 if debug < 2 {
 171                                         debug = 2
 172                                         states = make(map[string]bool)
 173                                 } else {
 174                                         f.Fatalf("rewrite cycle detected")
 175                                 }
 176                         }
 177                         states[h] = true
 178                 }
 179         }
 180         // remove clobbered values
 181         for _, b := range f.Blocks {
 182                 j := 0
 183                 for i, v := range b.Values {
 184                         vl := v.Pos
 185                         if v.Op == OpInvalid {
 186                                 if v.Pos.IsStmt() == src.PosIsStmt {
 187                                         pendingLines.set(vl, int32(b.ID))
 188                                 }
 189                                 f.freeValue(v)
 190                                 continue
 191                         }
 192                         if v.Pos.IsStmt() != src.PosNotStmt && !notStmtBoundary(v.Op) && pendingLines.get(vl) == int32(b.ID) {
 193                                 pendingLines.remove(vl)
 194                                 v.Pos = v.Pos.WithIsStmt()
 195                         }
 196                         if i != j {
 197                                 b.Values[j] = v
 198                         }
 199                         j++
 200                 }
 201                 if pendingLines.get(b.Pos) == int32(b.ID) {
 202                         b.Pos = b.Pos.WithIsStmt()
 203                         pendingLines.remove(b.Pos)
 204                 }
 205                 b.truncateValues(j)
 206         }
 207 }
 208
 209 // Common functions called from rewriting rules
 210
 211 func is64BitFloat(t *types.Type) bool {
 212         return t.Size() == 8 && t.IsFloat()
 213 }
 214
 215 func is32BitFloat(t *types.Type) bool {
 216         return t.Size() == 4 && t.IsFloat()
 217 }
 218
 219 func is64BitInt(t *types.Type) bool {
 220         return t.Size() == 8 && t.IsInteger()
 221 }
 222
 223 func is32BitInt(t *types.Type) bool {
 224         return t.Size() == 4 && t.IsInteger()
 225 }
 226
 227 func is16BitInt(t *types.Type) bool {
 228         return t.Size() == 2 && t.IsInteger()
 229 }
 230
 231 func is8BitInt(t *types.Type) bool {
 232         return t.Size() == 1 && t.IsInteger()
 233 }
 234
 235 func isPtr(t *types.Type) bool {
 236         return t.IsPtrShaped()
 237 }
 238
 239 // mergeSym merges two symbolic offsets. There is no real merging of
 240 // offsets, we just pick the non-nil one.
 241 func mergeSym(x, y Sym) Sym {
 242         if x == nil {
 243                 return y
 244         }
 245         if y == nil {
 246                 return x
 247         }
 248         panic(fmt.Sprintf("mergeSym with two non-nil syms %v %v", x, y))
 249 }
 250
 251 func canMergeSym(x, y Sym) bool {
 252         return x == nil || y == nil
 253 }
 254
 255 // canMergeLoadClobber reports whether the load can be merged into target without
 256 // invalidating the schedule.
 257 // It also checks that the other non-load argument x is something we
 258 // are ok with clobbering.
 259 func canMergeLoadClobber(target, load, x *Value) bool {
 260         // The register containing x is going to get clobbered.
 261         // Don't merge if we still need the value of x.
 262         // We don't have liveness information here, but we can
 263         // approximate x dying with:
 264         //  1) target is x's only use.
 265         //  2) target is not in a deeper loop than x.
 266         if x.Uses != 1 {
 267                 return false
 268         }
 269         loopnest := x.Block.Func.loopnest()
 270         loopnest.calculateDepths()
 271         if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
 272                 return false
 273         }
 274         return canMergeLoad(target, load)
 275 }
 276
 277 // canMergeLoad reports whether the load can be merged into target without
 278 // invalidating the schedule.
 279 func canMergeLoad(target, load *Value) bool {
 280         if target.Block.ID != load.Block.ID {
 281                 // If the load is in a different block do not merge it.
 282                 return false
 283         }
 284
 285         // We can't merge the load into the target if the load
 286         // has more than one use.
 287         if load.Uses != 1 {
 288                 return false
 289         }
 290
 291         mem := load.MemoryArg()
 292
 293         // We need the load's memory arg to still be alive at target. That
 294         // can't be the case if one of target's args depends on a memory
 295         // state that is a successor of load's memory arg.
 296         //
 297         // For example, it would be invalid to merge load into target in
 298         // the following situation because newmem has killed oldmem
 299         // before target is reached:
 300         //     load = read ... oldmem
 301         //   newmem = write ... oldmem
 302         //     arg0 = read ... newmem
 303         //   target = add arg0 load
 304         //
 305         // If the argument comes from a different block then we can exclude
 306         // it immediately because it must dominate load (which is in the
 307         // same block as target).
 308         var args []*Value
 309         for _, a := range target.Args {
 310                 if a != load && a.Block.ID == target.Block.ID {
 311                         args = append(args, a)
 312                 }
 313         }
 314
 315         // memPreds contains memory states known to be predecessors of load's
 316         // memory state. It is lazily initialized.
 317         var memPreds map[*Value]bool
 318         for i := 0; len(args) > 0; i++ {
 319                 const limit = 100
 320                 if i >= limit {
 321                         // Give up if we have done a lot of iterations.
 322                         return false
 323                 }
 324                 v := args[len(args)-1]
 325                 args = args[:len(args)-1]
 326                 if target.Block.ID != v.Block.ID {
 327                         // Since target and load are in the same block
 328                         // we can stop searching when we leave the block.
 329                         continue
 330                 }
 331                 if v.Op == OpPhi {
 332                         // A Phi implies we have reached the top of the block.
 333                         // The memory phi, if it exists, is always
 334                         // the first logical store in the block.
 335                         continue
 336                 }
 337                 if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
 338                         // We could handle this situation however it is likely
 339                         // to be very rare.
 340                         return false
 341                 }
 342                 if v.Op.SymEffect()&SymAddr != 0 {
 343                         // This case prevents an operation that calculates the
 344                         // address of a local variable from being forced to schedule
 345                         // before its corresponding VarDef.
 346                         // See issue 28445.
 347                         //   v1 = LOAD ...
 348                         //   v2 = VARDEF
 349                         //   v3 = LEAQ
 350                         //   v4 = CMPQ v1 v3
 351                         // We don't want to combine the CMPQ with the load, because
 352                         // that would force the CMPQ to schedule before the VARDEF, which
 353                         // in turn requires the LEAQ to schedule before the VARDEF.
 354                         return false
 355                 }
 356                 if v.Type.IsMemory() {
 357                         if memPreds == nil {
 358                                 // Initialise a map containing memory states
 359                                 // known to be predecessors of load's memory
 360                                 // state.
 361                                 memPreds = make(map[*Value]bool)
 362                                 m := mem
 363                                 const limit = 50
 364                                 for i := 0; i < limit; i++ {
 365                                         if m.Op == OpPhi {
 366                                                 // The memory phi, if it exists, is always
 367                                                 // the first logical store in the block.
 368                                                 break
 369                                         }
 370                                         if m.Block.ID != target.Block.ID {
 371                                                 break
 372                                         }
 373                                         if !m.Type.IsMemory() {
 374                                                 break
 375                                         }
 376                                         memPreds[m] = true
 377                                         if len(m.Args) == 0 {
 378                                                 break
 379                                         }
 380                                         m = m.MemoryArg()
 381                                 }
 382                         }
 383
 384                         // We can merge if v is a predecessor of mem.
 385                         //
 386                         // For example, we can merge load into target in the
 387                         // following scenario:
 388                         //      x = read ... v
 389                         //    mem = write ... v
 390                         //   load = read ... mem
 391                         // target = add x load
 392                         if memPreds[v] {
 393                                 continue
 394                         }
 395                         return false
 396                 }
 397                 if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
 398                         // If v takes mem as an input then we know mem
 399                         // is valid at this point.
 400                         continue
 401                 }
 402                 for _, a := range v.Args {
 403                         if target.Block.ID == a.Block.ID {
 404                                 args = append(args, a)
 405                         }
 406                 }
 407         }
 408
 409         return true
 410 }
 411
 412 // isSameCall reports whether sym is the same as the given named symbol.
 413 func isSameCall(sym interface{}, name string) bool {
 414         fn := sym.(*AuxCall).Fn
 415         return fn != nil && fn.String() == name
 416 }
 417
 418 // canLoadUnaligned reports if the architecture supports unaligned load operations.
 419 func canLoadUnaligned(c *Config) bool {
 420         return c.ctxt.Arch.Alignment == 1
 421 }
 422
 423 // nlzX returns the number of leading zeros.
 424 func nlz64(x int64) int { return bits.LeadingZeros64(uint64(x)) }
 425 func nlz32(x int32) int { return bits.LeadingZeros32(uint32(x)) }
 426 func nlz16(x int16) int { return bits.LeadingZeros16(uint16(x)) }
 427 func nlz8(x int8) int   { return bits.LeadingZeros8(uint8(x)) }
 428
 429 // ntzX returns the number of trailing zeros.
 430 func ntz64(x int64) int { return bits.TrailingZeros64(uint64(x)) }
 431 func ntz32(x int32) int { return bits.TrailingZeros32(uint32(x)) }
 432 func ntz16(x int16) int { return bits.TrailingZeros16(uint16(x)) }
 433 func ntz8(x int8) int   { return bits.TrailingZeros8(uint8(x)) }
 434
 435 func oneBit(x int64) bool   { return x&(x-1) == 0 && x != 0 }
 436 func oneBit8(x int8) bool   { return x&(x-1) == 0 && x != 0 }
 437 func oneBit16(x int16) bool { return x&(x-1) == 0 && x != 0 }
 438 func oneBit32(x int32) bool { return x&(x-1) == 0 && x != 0 }
 439 func oneBit64(x int64) bool { return x&(x-1) == 0 && x != 0 }
 440
 441 // nto returns the number of trailing ones.
 442 func nto(x int64) int64 {
 443         return int64(ntz64(^x))
 444 }
 445
 446 // logX returns logarithm of n base 2.
 447 // n must be a positive power of 2 (isPowerOfTwoX returns true).
 448 func log8(n int8) int64 {
 449         return int64(bits.Len8(uint8(n))) - 1
 450 }
 451 func log16(n int16) int64 {
 452         return int64(bits.Len16(uint16(n))) - 1
 453 }
 454 func log32(n int32) int64 {
 455         return int64(bits.Len32(uint32(n))) - 1
 456 }
 457 func log64(n int64) int64 {
 458         return int64(bits.Len64(uint64(n))) - 1
 459 }
 460
 461 // log2uint32 returns logarithm in base 2 of uint32(n), with log2(0) = -1.
 462 // Rounds down.
 463 func log2uint32(n int64) int64 {
 464         return int64(bits.Len32(uint32(n))) - 1
 465 }
 466
 467 // isPowerOfTwoX functions report whether n is a power of 2.
 468 func isPowerOfTwo8(n int8) bool {
 469         return n > 0 && n&(n-1) == 0
 470 }
 471 func isPowerOfTwo16(n int16) bool {
 472         return n > 0 && n&(n-1) == 0
 473 }
 474 func isPowerOfTwo32(n int32) bool {
 475         return n > 0 && n&(n-1) == 0
 476 }
 477 func isPowerOfTwo64(n int64) bool {
 478         return n > 0 && n&(n-1) == 0
 479 }
 480
 481 // isUint64PowerOfTwo reports whether uint64(n) is a power of 2.
 482 func isUint64PowerOfTwo(in int64) bool {
 483         n := uint64(in)
 484         return n > 0 && n&(n-1) == 0
 485 }
 486
 487 // isUint32PowerOfTwo reports whether uint32(n) is a power of 2.
 488 func isUint32PowerOfTwo(in int64) bool {
 489         n := uint64(uint32(in))
 490         return n > 0 && n&(n-1) == 0
 491 }
 492
 493 // is32Bit reports whether n can be represented as a signed 32 bit integer.
 494 func is32Bit(n int64) bool {
 495         return n == int64(int32(n))
 496 }
 497
 498 // is16Bit reports whether n can be represented as a signed 16 bit integer.
 499 func is16Bit(n int64) bool {
 500         return n == int64(int16(n))
 501 }
 502
 503 // is8Bit reports whether n can be represented as a signed 8 bit integer.
 504 func is8Bit(n int64) bool {
 505         return n == int64(int8(n))
 506 }
 507
 508 // isU8Bit reports whether n can be represented as an unsigned 8 bit integer.
 509 func isU8Bit(n int64) bool {
 510         return n == int64(uint8(n))
 511 }
 512
 513 // isU12Bit reports whether n can be represented as an unsigned 12 bit integer.
 514 func isU12Bit(n int64) bool {
 515         return 0 <= n && n < (1<<12)
 516 }
 517
 518 // isU16Bit reports whether n can be represented as an unsigned 16 bit integer.
 519 func isU16Bit(n int64) bool {
 520         return n == int64(uint16(n))
 521 }
 522
 523 // isU32Bit reports whether n can be represented as an unsigned 32 bit integer.
 524 func isU32Bit(n int64) bool {
 525         return n == int64(uint32(n))
 526 }
 527
 528 // is20Bit reports whether n can be represented as a signed 20 bit integer.
 529 func is20Bit(n int64) bool {
 530         return -(1<<19) <= n && n < (1<<19)
 531 }
 532
 533 // b2i translates a boolean value to 0 or 1 for assigning to auxInt.
 534 func b2i(b bool) int64 {
 535         if b {
 536                 return 1
 537         }
 538         return 0
 539 }
 540
 541 // b2i32 translates a boolean value to 0 or 1.
 542 func b2i32(b bool) int32 {
 543         if b {
 544                 return 1
 545         }
 546         return 0
 547 }
 548
 549 // shiftIsBounded reports whether (left/right) shift Value v is known to be bounded.
 550 // A shift is bounded if it is shifting by less than the width of the shifted value.
 551 func shiftIsBounded(v *Value) bool {
 552         return v.AuxInt != 0
 553 }
 554
 555 // canonLessThan returns whether x is "ordered" less than y, for purposes of normalizing
 556 // generated code as much as possible.
 557 func canonLessThan(x, y *Value) bool {
 558         if x.Op != y.Op {
 559                 return x.Op < y.Op
 560         }
 561         if !x.Pos.SameFileAndLine(y.Pos) {
 562                 return x.Pos.Before(y.Pos)
 563         }
 564         return x.ID < y.ID
 565 }
 566
 567 // truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern
 568 // of the mantissa. It will panic if the truncation results in lost information.
 569 func truncate64Fto32F(f float64) float32 {
 570         if !isExactFloat32(f) {
 571                 panic("truncate64Fto32F: truncation is not exact")
 572         }
 573         if !math.IsNaN(f) {
 574                 return float32(f)
 575         }
 576         // NaN bit patterns aren't necessarily preserved across conversion
 577         // instructions so we need to do the conversion manually.
 578         b := math.Float64bits(f)
 579         m := b & ((1 << 52) - 1) // mantissa (a.k.a. significand)
 580         //          | sign                  | exponent   | mantissa       |
 581         r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23)))
 582         return math.Float32frombits(r)
 583 }
 584
 585 // extend32Fto64F converts a float32 value to a float64 value preserving the bit
 586 // pattern of the mantissa.
 587 func extend32Fto64F(f float32) float64 {
 588         if !math.IsNaN(float64(f)) {
 589                 return float64(f)
 590         }
 591         // NaN bit patterns aren't necessarily preserved across conversion
 592         // instructions so we need to do the conversion manually.
 593         b := uint64(math.Float32bits(f))
 594         //   | sign                  | exponent      | mantissa                    |
 595         r := ((b << 32) & (1 << 63)) | (0x7ff << 52) | ((b & 0x7fffff) << (52 - 23))
 596         return math.Float64frombits(r)
 597 }
 598
 599 // DivisionNeedsFixUp reports whether the division needs fix-up code.
 600 func DivisionNeedsFixUp(v *Value) bool {
 601         return v.AuxInt == 0
 602 }
 603
 604 // auxFrom64F encodes a float64 value so it can be stored in an AuxInt.
 605 func auxFrom64F(f float64) int64 {
 606         if f != f {
 607                 panic("can't encode a NaN in AuxInt field")
 608         }
 609         return int64(math.Float64bits(f))
 610 }
 611
 612 // auxFrom32F encodes a float32 value so it can be stored in an AuxInt.
 613 func auxFrom32F(f float32) int64 {
 614         if f != f {
 615                 panic("can't encode a NaN in AuxInt field")
 616         }
 617         return int64(math.Float64bits(extend32Fto64F(f)))
 618 }
 619
 620 // auxTo32F decodes a float32 from the AuxInt value provided.
 621 func auxTo32F(i int64) float32 {
 622         return truncate64Fto32F(math.Float64frombits(uint64(i)))
 623 }
 624
 625 // auxTo64F decodes a float64 from the AuxInt value provided.
 626 func auxTo64F(i int64) float64 {
 627         return math.Float64frombits(uint64(i))
 628 }
 629
 630 func auxIntToBool(i int64) bool {
 631         if i == 0 {
 632                 return false
 633         }
 634         return true
 635 }
 636 func auxIntToInt8(i int64) int8 {
 637         return int8(i)
 638 }
 639 func auxIntToInt16(i int64) int16 {
 640         return int16(i)
 641 }
 642 func auxIntToInt32(i int64) int32 {
 643         return int32(i)
 644 }
 645 func auxIntToInt64(i int64) int64 {
 646         return i
 647 }
 648 func auxIntToUint8(i int64) uint8 {
 649         return uint8(i)
 650 }
 651 func auxIntToFloat32(i int64) float32 {
 652         return float32(math.Float64frombits(uint64(i)))
 653 }
 654 func auxIntToFloat64(i int64) float64 {
 655         return math.Float64frombits(uint64(i))
 656 }
 657 func auxIntToValAndOff(i int64) ValAndOff {
 658         return ValAndOff(i)
 659 }
 660 func auxIntToArm64BitField(i int64) arm64BitField {
 661         return arm64BitField(i)
 662 }
 663 func auxIntToInt128(x int64) int128 {
 664         if x != 0 {
 665                 panic("nonzero int128 not allowed")
 666         }
 667         return 0
 668 }
 669 func auxIntToFlagConstant(x int64) flagConstant {
 670         return flagConstant(x)
 671 }
 672
 673 func auxIntToOp(cc int64) Op {
 674         return Op(cc)
 675 }
 676
 677 func boolToAuxInt(b bool) int64 {
 678         if b {
 679                 return 1
 680         }
 681         return 0
 682 }
 683 func int8ToAuxInt(i int8) int64 {
 684         return int64(i)
 685 }
 686 func int16ToAuxInt(i int16) int64 {
 687         return int64(i)
 688 }
 689 func int32ToAuxInt(i int32) int64 {
 690         return int64(i)
 691 }
 692 func int64ToAuxInt(i int64) int64 {
 693         return int64(i)
 694 }
 695 func uint8ToAuxInt(i uint8) int64 {
 696         return int64(int8(i))
 697 }
 698 func float32ToAuxInt(f float32) int64 {
 699         return int64(math.Float64bits(float64(f)))
 700 }
 701 func float64ToAuxInt(f float64) int64 {
 702         return int64(math.Float64bits(f))
 703 }
 704 func valAndOffToAuxInt(v ValAndOff) int64 {
 705         return int64(v)
 706 }
 707 func arm64BitFieldToAuxInt(v arm64BitField) int64 {
 708         return int64(v)
 709 }
 710 func int128ToAuxInt(x int128) int64 {
 711         if x != 0 {
 712                 panic("nonzero int128 not allowed")
 713         }
 714         return 0
 715 }
 716 func flagConstantToAuxInt(x flagConstant) int64 {
 717         return int64(x)
 718 }
 719
 720 func opToAuxInt(o Op) int64 {
 721         return int64(o)
 722 }
 723
 724 // Aux is an interface to hold miscellaneous data in Blocks and Values.
 725 type Aux interface {
 726         CanBeAnSSAAux()
 727 }
 728
 729 // for now only used to mark moves that need to avoid clobbering flags
 730 type auxMark bool
 731
 732 func (auxMark) CanBeAnSSAAux() {}
 733
 734 var AuxMark auxMark
 735
 736 // stringAux wraps string values for use in Aux.
 737 type stringAux string
 738
 739 func (stringAux) CanBeAnSSAAux() {}
 740
 741 func auxToString(i Aux) string {
 742         return string(i.(stringAux))
 743 }
 744 func auxToSym(i Aux) Sym {
 745         // TODO: kind of a hack - allows nil interface through
 746         s, _ := i.(Sym)
 747         return s
 748 }
 749 func auxToType(i Aux) *types.Type {
 750         return i.(*types.Type)
 751 }
 752 func auxToCall(i Aux) *AuxCall {
 753         return i.(*AuxCall)
 754 }
 755 func auxToS390xCCMask(i Aux) s390x.CCMask {
 756         return i.(s390x.CCMask)
 757 }
 758 func auxToS390xRotateParams(i Aux) s390x.RotateParams {
 759         return i.(s390x.RotateParams)
 760 }
 761
 762 func StringToAux(s string) Aux {
 763         return stringAux(s)
 764 }
 765 func symToAux(s Sym) Aux {
 766         return s
 767 }
 768 func callToAux(s *AuxCall) Aux {
 769         return s
 770 }
 771 func typeToAux(t *types.Type) Aux {
 772         return t
 773 }
 774 func s390xCCMaskToAux(c s390x.CCMask) Aux {
 775         return c
 776 }
 777 func s390xRotateParamsToAux(r s390x.RotateParams) Aux {
 778         return r
 779 }
 780
 781 // uaddOvf reports whether unsigned a+b would overflow.
 782 func uaddOvf(a, b int64) bool {
 783         return uint64(a)+uint64(b) < uint64(a)
 784 }
 785
 786 // loadLSymOffset simulates reading a word at an offset into a
 787 // read-only symbol's runtime memory. If it would read a pointer to
 788 // another symbol, that symbol is returned. Otherwise, it returns nil.
 789 func loadLSymOffset(lsym *obj.LSym, offset int64) *obj.LSym {
 790         if lsym.Type != objabi.SRODATA {
 791                 return nil
 792         }
 793
 794         for _, r := range lsym.R {
 795                 if int64(r.Off) == offset && r.Type&^objabi.R_WEAK == objabi.R_ADDR && r.Add == 0 {
 796                         return r.Sym
 797                 }
 798         }
 799
 800         return nil
 801 }
 802
 803 // de-virtualize an InterLECall
 804 // 'sym' is the symbol for the itab.
 805 func devirtLESym(v *Value, aux Aux, sym Sym, offset int64) *obj.LSym {
 806         n, ok := sym.(*obj.LSym)
 807         if !ok {
 808                 return nil
 809         }
 810
 811         lsym := loadLSymOffset(n, offset)
 812         if f := v.Block.Func; f.pass.debug > 0 {
 813                 if lsym != nil {
 814                         f.Warnl(v.Pos, "de-virtualizing call")
 815                 } else {
 816                         f.Warnl(v.Pos, "couldn't de-virtualize call")
 817                 }
 818         }
 819         return lsym
 820 }
 821
 822 func devirtLECall(v *Value, sym *obj.LSym) *Value {
 823         v.Op = OpStaticLECall
 824         auxcall := v.Aux.(*AuxCall)
 825         auxcall.Fn = sym
 826         // Remove first arg
 827         v.Args[0].Uses--
 828         copy(v.Args[0:], v.Args[1:])
 829         v.Args[len(v.Args)-1] = nil // aid GC
 830         v.Args = v.Args[:len(v.Args)-1]
 831         return v
 832 }
 833
 834 // isSamePtr reports whether p1 and p2 point to the same address.
 835 func isSamePtr(p1, p2 *Value) bool {
 836         if p1 == p2 {
 837                 return true
 838         }
 839         if p1.Op != p2.Op {
 840                 return false
 841         }
 842         switch p1.Op {
 843         case OpOffPtr:
 844                 return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
 845         case OpAddr, OpLocalAddr:
 846                 return p1.Aux == p2.Aux
 847         case OpAddPtr:
 848                 return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
 849         }
 850         return false
 851 }
 852
 853 func isStackPtr(v *Value) bool {
 854         for v.Op == OpOffPtr || v.Op == OpAddPtr {
 855                 v = v.Args[0]
 856         }
 857         return v.Op == OpSP || v.Op == OpLocalAddr
 858 }
 859
 860 // disjoint reports whether the memory region specified by [p1:p1+n1)
 861 // does not overlap with [p2:p2+n2).
 862 // A return value of false does not imply the regions overlap.
 863 func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool {
 864         if n1 == 0 || n2 == 0 {
 865                 return true
 866         }
 867         if p1 == p2 {
 868                 return false
 869         }
 870         baseAndOffset := func(ptr *Value) (base *Value, offset int64) {
 871                 base, offset = ptr, 0
 872                 for base.Op == OpOffPtr {
 873                         offset += base.AuxInt
 874                         base = base.Args[0]
 875                 }
 876                 return base, offset
 877         }
 878         p1, off1 := baseAndOffset(p1)
 879         p2, off2 := baseAndOffset(p2)
 880         if isSamePtr(p1, p2) {
 881                 return !overlap(off1, n1, off2, n2)
 882         }
 883         // p1 and p2 are not the same, so if they are both OpAddrs then
 884         // they point to different variables.
 885         // If one pointer is on the stack and the other is an argument
 886         // then they can't overlap.
 887         switch p1.Op {
 888         case OpAddr, OpLocalAddr:
 889                 if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP {
 890                         return true
 891                 }
 892                 return (p2.Op == OpArg || p2.Op == OpArgIntReg) && p1.Args[0].Op == OpSP
 893         case OpArg, OpArgIntReg:
 894                 if p2.Op == OpSP || p2.Op == OpLocalAddr {
 895                         return true
 896                 }
 897         case OpSP:
 898                 return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpArgIntReg || p2.Op == OpSP
 899         }
 900         return false
 901 }
 902
 903 // moveSize returns the number of bytes an aligned MOV instruction moves.
 904 func moveSize(align int64, c *Config) int64 {
 905         switch {
 906         case align%8 == 0 && c.PtrSize == 8:
 907                 return 8
 908         case align%4 == 0:
 909                 return 4
 910         case align%2 == 0:
 911                 return 2
 912         }
 913         return 1
 914 }
 915
 916 // mergePoint finds a block among a's blocks which dominates b and is itself
 917 // dominated by all of a's blocks. Returns nil if it can't find one.
 918 // Might return nil even if one does exist.
 919 func mergePoint(b *Block, a ...*Value) *Block {
 920         // Walk backward from b looking for one of the a's blocks.
 921
 922         // Max distance
 923         d := 100
 924
 925         for d > 0 {
 926                 for _, x := range a {
 927                         if b == x.Block {
 928                                 goto found
 929                         }
 930                 }
 931                 if len(b.Preds) > 1 {
 932                         // Don't know which way to go back. Abort.
 933                         return nil
 934                 }
 935                 b = b.Preds[0].b
 936                 d--
 937         }
 938         return nil // too far away
 939 found:
 940         // At this point, r is the first value in a that we find by walking backwards.
 941         // if we return anything, r will be it.
 942         r := b
 943
 944         // Keep going, counting the other a's that we find. They must all dominate r.
 945         na := 0
 946         for d > 0 {
 947                 for _, x := range a {
 948                         if b == x.Block {
 949                                 na++
 950                         }
 951                 }
 952                 if na == len(a) {
 953                         // Found all of a in a backwards walk. We can return r.
 954                         return r
 955                 }
 956                 if len(b.Preds) > 1 {
 957                         return nil
 958                 }
 959                 b = b.Preds[0].b
 960                 d--
 961
 962         }
 963         return nil // too far away
 964 }
 965
 966 // clobber invalidates values. Returns true.
 967 // clobber is used by rewrite rules to:
 968 //
 969 //      A) make sure the values are really dead and never used again.
 970 //      B) decrement use counts of the values' args.
 971 func clobber(vv ...*Value) bool {
 972         for _, v := range vv {
 973                 v.reset(OpInvalid)
 974                 // Note: leave v.Block intact.  The Block field is used after clobber.
 975         }
 976         return true
 977 }
 978
 979 // clobberIfDead resets v when use count is 1. Returns true.
 980 // clobberIfDead is used by rewrite rules to decrement
 981 // use counts of v's args when v is dead and never used.
 982 func clobberIfDead(v *Value) bool {
 983         if v.Uses == 1 {
 984                 v.reset(OpInvalid)
 985         }
 986         // Note: leave v.Block intact.  The Block field is used after clobberIfDead.
 987         return true
 988 }
 989
 990 // noteRule is an easy way to track if a rule is matched when writing
 991 // new ones.  Make the rule of interest also conditional on
 992 //
 993 //      noteRule("note to self: rule of interest matched")
 994 //
 995 // and that message will print when the rule matches.
 996 func noteRule(s string) bool {
 997         fmt.Println(s)
 998         return true
 999 }
1000
1001 // countRule increments Func.ruleMatches[key].
1002 // If Func.ruleMatches is non-nil at the end
1003 // of compilation, it will be printed to stdout.
1004 // This is intended to make it easier to find which functions
1005 // which contain lots of rules matches when developing new rules.
1006 func countRule(v *Value, key string) bool {
1007         f := v.Block.Func
1008         if f.ruleMatches == nil {
1009                 f.ruleMatches = make(map[string]int)
1010         }
1011         f.ruleMatches[key]++
1012         return true
1013 }
1014
1015 // warnRule generates compiler debug output with string s when
1016 // v is not in autogenerated code, cond is true and the rule has fired.
1017 func warnRule(cond bool, v *Value, s string) bool {
1018         if pos := v.Pos; pos.Line() > 1 && cond {
1019                 v.Block.Func.Warnl(pos, s)
1020         }
1021         return true
1022 }
1023
1024 // for a pseudo-op like (LessThan x), extract x.
1025 func flagArg(v *Value) *Value {
1026         if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() {
1027                 return nil
1028         }
1029         return v.Args[0]
1030 }
1031
1032 // arm64Negate finds the complement to an ARM64 condition code,
1033 // for example !Equal -> NotEqual or !LessThan -> GreaterEqual
1034 //
1035 // For floating point, it's more subtle because NaN is unordered. We do
1036 // !LessThanF -> NotLessThanF, the latter takes care of NaNs.
1037 func arm64Negate(op Op) Op {
1038         switch op {
1039         case OpARM64LessThan:
1040                 return OpARM64GreaterEqual
1041         case OpARM64LessThanU:
1042                 return OpARM64GreaterEqualU
1043         case OpARM64GreaterThan:
1044                 return OpARM64LessEqual
1045         case OpARM64GreaterThanU:
1046                 return OpARM64LessEqualU
1047         case OpARM64LessEqual:
1048                 return OpARM64GreaterThan
1049         case OpARM64LessEqualU:
1050                 return OpARM64GreaterThanU
1051         case OpARM64GreaterEqual:
1052                 return OpARM64LessThan
1053         case OpARM64GreaterEqualU:
1054                 return OpARM64LessThanU
1055         case OpARM64Equal:
1056                 return OpARM64NotEqual
1057         case OpARM64NotEqual:
1058                 return OpARM64Equal
1059         case OpARM64LessThanF:
1060                 return OpARM64NotLessThanF
1061         case OpARM64NotLessThanF:
1062                 return OpARM64LessThanF
1063         case OpARM64LessEqualF:
1064                 return OpARM64NotLessEqualF
1065         case OpARM64NotLessEqualF:
1066                 return OpARM64LessEqualF
1067         case OpARM64GreaterThanF:
1068                 return OpARM64NotGreaterThanF
1069         case OpARM64NotGreaterThanF:
1070                 return OpARM64GreaterThanF
1071         case OpARM64GreaterEqualF:
1072                 return OpARM64NotGreaterEqualF
1073         case OpARM64NotGreaterEqualF:
1074                 return OpARM64GreaterEqualF
1075         default:
1076                 panic("unreachable")
1077         }
1078 }
1079
1080 // arm64Invert evaluates (InvertFlags op), which
1081 // is the same as altering the condition codes such
1082 // that the same result would be produced if the arguments
1083 // to the flag-generating instruction were reversed, e.g.
1084 // (InvertFlags (CMP x y)) -> (CMP y x)
1085 func arm64Invert(op Op) Op {
1086         switch op {
1087         case OpARM64LessThan:
1088                 return OpARM64GreaterThan
1089         case OpARM64LessThanU:
1090                 return OpARM64GreaterThanU
1091         case OpARM64GreaterThan:
1092                 return OpARM64LessThan
1093         case OpARM64GreaterThanU:
1094                 return OpARM64LessThanU
1095         case OpARM64LessEqual:
1096                 return OpARM64GreaterEqual
1097         case OpARM64LessEqualU:
1098                 return OpARM64GreaterEqualU
1099         case OpARM64GreaterEqual:
1100                 return OpARM64LessEqual
1101         case OpARM64GreaterEqualU:
1102                 return OpARM64LessEqualU
1103         case OpARM64Equal, OpARM64NotEqual:
1104                 return op
1105         case OpARM64LessThanF:
1106                 return OpARM64GreaterThanF
1107         case OpARM64GreaterThanF:
1108                 return OpARM64LessThanF
1109         case OpARM64LessEqualF:
1110                 return OpARM64GreaterEqualF
1111         case OpARM64GreaterEqualF:
1112                 return OpARM64LessEqualF
1113         case OpARM64NotLessThanF:
1114                 return OpARM64NotGreaterThanF
1115         case OpARM64NotGreaterThanF:
1116                 return OpARM64NotLessThanF
1117         case OpARM64NotLessEqualF:
1118                 return OpARM64NotGreaterEqualF
1119         case OpARM64NotGreaterEqualF:
1120                 return OpARM64NotLessEqualF
1121         default:
1122                 panic("unreachable")
1123         }
1124 }
1125
1126 // evaluate an ARM64 op against a flags value
1127 // that is potentially constant; return 1 for true,
1128 // -1 for false, and 0 for not constant.
1129 func ccARM64Eval(op Op, flags *Value) int {
1130         fop := flags.Op
1131         if fop == OpARM64InvertFlags {
1132                 return -ccARM64Eval(op, flags.Args[0])
1133         }
1134         if fop != OpARM64FlagConstant {
1135                 return 0
1136         }
1137         fc := flagConstant(flags.AuxInt)
1138         b2i := func(b bool) int {
1139                 if b {
1140                         return 1
1141                 }
1142                 return -1
1143         }
1144         switch op {
1145         case OpARM64Equal:
1146                 return b2i(fc.eq())
1147         case OpARM64NotEqual:
1148                 return b2i(fc.ne())
1149         case OpARM64LessThan:
1150                 return b2i(fc.lt())
1151         case OpARM64LessThanU:
1152                 return b2i(fc.ult())
1153         case OpARM64GreaterThan:
1154                 return b2i(fc.gt())
1155         case OpARM64GreaterThanU:
1156                 return b2i(fc.ugt())
1157         case OpARM64LessEqual:
1158                 return b2i(fc.le())
1159         case OpARM64LessEqualU:
1160                 return b2i(fc.ule())
1161         case OpARM64GreaterEqual:
1162                 return b2i(fc.ge())
1163         case OpARM64GreaterEqualU:
1164                 return b2i(fc.uge())
1165         }
1166         return 0
1167 }
1168
1169 // logRule logs the use of the rule s. This will only be enabled if
1170 // rewrite rules were generated with the -log option, see _gen/rulegen.go.
1171 func logRule(s string) {
1172         if ruleFile == nil {
1173                 // Open a log file to write log to. We open in append
1174                 // mode because all.bash runs the compiler lots of times,
1175                 // and we want the concatenation of all of those logs.
1176                 // This means, of course, that users need to rm the old log
1177                 // to get fresh data.
1178                 // TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow?
1179                 w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"),
1180                         os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
1181                 if err != nil {
1182                         panic(err)
1183                 }
1184                 ruleFile = w
1185         }
1186         _, err := fmt.Fprintln(ruleFile, s)
1187         if err != nil {
1188                 panic(err)
1189         }
1190 }
1191
1192 var ruleFile io.Writer
1193
1194 func min(x, y int64) int64 {
1195         if x < y {
1196                 return x
1197         }
1198         return y
1199 }
1200
1201 func isConstZero(v *Value) bool {
1202         switch v.Op {
1203         case OpConstNil:
1204                 return true
1205         case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F:
1206                 return v.AuxInt == 0
1207         }
1208         return false
1209 }
1210
1211 // reciprocalExact64 reports whether 1/c is exactly representable.
1212 func reciprocalExact64(c float64) bool {
1213         b := math.Float64bits(c)
1214         man := b & (1<<52 - 1)
1215         if man != 0 {
1216                 return false // not a power of 2, denormal, or NaN
1217         }
1218         exp := b >> 52 & (1<<11 - 1)
1219         // exponent bias is 0x3ff.  So taking the reciprocal of a number
1220         // changes the exponent to 0x7fe-exp.
1221         switch exp {
1222         case 0:
1223                 return false // ±0
1224         case 0x7ff:
1225                 return false // ±inf
1226         case 0x7fe:
1227                 return false // exponent is not representable
1228         default:
1229                 return true
1230         }
1231 }
1232
1233 // reciprocalExact32 reports whether 1/c is exactly representable.
1234 func reciprocalExact32(c float32) bool {
1235         b := math.Float32bits(c)
1236         man := b & (1<<23 - 1)
1237         if man != 0 {
1238                 return false // not a power of 2, denormal, or NaN
1239         }
1240         exp := b >> 23 & (1<<8 - 1)
1241         // exponent bias is 0x7f.  So taking the reciprocal of a number
1242         // changes the exponent to 0xfe-exp.
1243         switch exp {
1244         case 0:
1245                 return false // ±0
1246         case 0xff:
1247                 return false // ±inf
1248         case 0xfe:
1249                 return false // exponent is not representable
1250         default:
1251                 return true
1252         }
1253 }
1254
1255 // check if an immediate can be directly encoded into an ARM's instruction.
1256 func isARMImmRot(v uint32) bool {
1257         for i := 0; i < 16; i++ {
1258                 if v&^0xff == 0 {
1259                         return true
1260                 }
1261                 v = v<<2 | v>>30
1262         }
1263
1264         return false
1265 }
1266
1267 // overlap reports whether the ranges given by the given offset and
1268 // size pairs overlap.
1269 func overlap(offset1, size1, offset2, size2 int64) bool {
1270         if offset1 >= offset2 && offset2+size2 > offset1 {
1271                 return true
1272         }
1273         if offset2 >= offset1 && offset1+size1 > offset2 {
1274                 return true
1275         }
1276         return false
1277 }
1278
1279 func areAdjacentOffsets(off1, off2, size int64) bool {
1280         return off1+size == off2 || off1 == off2+size
1281 }
1282
1283 // check if value zeroes out upper 32-bit of 64-bit register.
1284 // depth limits recursion depth. In AMD64.rules 3 is used as limit,
1285 // because it catches same amount of cases as 4.
1286 func zeroUpper32Bits(x *Value, depth int) bool {
1287         switch x.Op {
1288         case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1,
1289                 OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1,
1290                 OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload,
1291                 OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL,
1292                 OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst,
1293                 OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst,
1294                 OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL,
1295                 OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst,
1296                 OpAMD64SHLL, OpAMD64SHLLconst:
1297                 return true
1298         case OpARM64REV16W, OpARM64REVW, OpARM64RBITW, OpARM64CLZW, OpARM64EXTRWconst,
1299                 OpARM64MULW, OpARM64MNEGW, OpARM64UDIVW, OpARM64DIVW, OpARM64UMODW,
1300                 OpARM64MADDW, OpARM64MSUBW, OpARM64RORW, OpARM64RORWconst:
1301                 return true
1302         case OpArg:
1303                 return x.Type.Size() == 4
1304         case OpPhi, OpSelect0, OpSelect1:
1305                 // Phis can use each-other as an arguments, instead of tracking visited values,
1306                 // just limit recursion depth.
1307                 if depth <= 0 {
1308                         return false
1309                 }
1310                 for i := range x.Args {
1311                         if !zeroUpper32Bits(x.Args[i], depth-1) {
1312                                 return false
1313                         }
1314                 }
1315                 return true
1316
1317         }
1318         return false
1319 }
1320
1321 // zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits.
1322 func zeroUpper48Bits(x *Value, depth int) bool {
1323         switch x.Op {
1324         case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2:
1325                 return true
1326         case OpArg:
1327                 return x.Type.Size() == 2
1328         case OpPhi, OpSelect0, OpSelect1:
1329                 // Phis can use each-other as an arguments, instead of tracking visited values,
1330                 // just limit recursion depth.
1331                 if depth <= 0 {
1332                         return false
1333                 }
1334                 for i := range x.Args {
1335                         if !zeroUpper48Bits(x.Args[i], depth-1) {
1336                                 return false
1337                         }
1338                 }
1339                 return true
1340
1341         }
1342         return false
1343 }
1344
1345 // zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits.
1346 func zeroUpper56Bits(x *Value, depth int) bool {
1347         switch x.Op {
1348         case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1:
1349                 return true
1350         case OpArg:
1351                 return x.Type.Size() == 1
1352         case OpPhi, OpSelect0, OpSelect1:
1353                 // Phis can use each-other as an arguments, instead of tracking visited values,
1354                 // just limit recursion depth.
1355                 if depth <= 0 {
1356                         return false
1357                 }
1358                 for i := range x.Args {
1359                         if !zeroUpper56Bits(x.Args[i], depth-1) {
1360                                 return false
1361                         }
1362                 }
1363                 return true
1364
1365         }
1366         return false
1367 }
1368
1369 func isInlinableMemclr(c *Config, sz int64) bool {
1370         if sz < 0 {
1371                 return false
1372         }
1373         // TODO: expand this check to allow other architectures
1374         // see CL 454255 and issue 56997
1375         switch c.arch {
1376         case "amd64", "arm64":
1377                 return true
1378         case "ppc64le", "ppc64":
1379                 return sz < 512
1380         }
1381         return false
1382 }
1383
1384 // isInlinableMemmove reports whether the given arch performs a Move of the given size
1385 // faster than memmove. It will only return true if replacing the memmove with a Move is
1386 // safe, either because Move will do all of its loads before any of its stores, or
1387 // because the arguments are known to be disjoint.
1388 // This is used as a check for replacing memmove with Move ops.
1389 func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
1390         // It is always safe to convert memmove into Move when its arguments are disjoint.
1391         // Move ops may or may not be faster for large sizes depending on how the platform
1392         // lowers them, so we only perform this optimization on platforms that we know to
1393         // have fast Move ops.
1394         switch c.arch {
1395         case "amd64":
1396                 return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz))
1397         case "386", "arm64":
1398                 return sz <= 8
1399         case "s390x", "ppc64", "ppc64le":
1400                 return sz <= 8 || disjoint(dst, sz, src, sz)
1401         case "arm", "loong64", "mips", "mips64", "mipsle", "mips64le":
1402                 return sz <= 4
1403         }
1404         return false
1405 }
1406 func IsInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
1407         return isInlinableMemmove(dst, src, sz, c)
1408 }
1409
1410 // logLargeCopy logs the occurrence of a large copy.
1411 // The best place to do this is in the rewrite rules where the size of the move is easy to find.
1412 // "Large" is arbitrarily chosen to be 128 bytes; this may change.
1413 func logLargeCopy(v *Value, s int64) bool {
1414         if s < 128 {
1415                 return true
1416         }
1417         if logopt.Enabled() {
1418                 logopt.LogOpt(v.Pos, "copy", "lower", v.Block.Func.Name, fmt.Sprintf("%d bytes", s))
1419         }
1420         return true
1421 }
1422 func LogLargeCopy(funcName string, pos src.XPos, s int64) {
1423         if s < 128 {
1424                 return
1425         }
1426         if logopt.Enabled() {
1427                 logopt.LogOpt(pos, "copy", "lower", funcName, fmt.Sprintf("%d bytes", s))
1428         }
1429 }
1430
1431 // hasSmallRotate reports whether the architecture has rotate instructions
1432 // for sizes < 32-bit.  This is used to decide whether to promote some rotations.
1433 func hasSmallRotate(c *Config) bool {
1434         switch c.arch {
1435         case "amd64", "386":
1436                 return true
1437         default:
1438                 return false
1439         }
1440 }
1441
1442 func supportsPPC64PCRel() bool {
1443         // PCRel is currently supported for >= power10, linux only
1444         // Internal and external linking supports this on ppc64le; internal linking on ppc64.
1445         return buildcfg.GOPPC64 >= 10 && buildcfg.GOOS == "linux"
1446 }
1447
1448 func newPPC64ShiftAuxInt(sh, mb, me, sz int64) int32 {
1449         if sh < 0 || sh >= sz {
1450                 panic("PPC64 shift arg sh out of range")
1451         }
1452         if mb < 0 || mb >= sz {
1453                 panic("PPC64 shift arg mb out of range")
1454         }
1455         if me < 0 || me >= sz {
1456                 panic("PPC64 shift arg me out of range")
1457         }
1458         return int32(sh<<16 | mb<<8 | me)
1459 }
1460
1461 func GetPPC64Shiftsh(auxint int64) int64 {
1462         return int64(int8(auxint >> 16))
1463 }
1464
1465 func GetPPC64Shiftmb(auxint int64) int64 {
1466         return int64(int8(auxint >> 8))
1467 }
1468
1469 func GetPPC64Shiftme(auxint int64) int64 {
1470         return int64(int8(auxint))
1471 }
1472
1473 // Test if this value can encoded as a mask for a rlwinm like
1474 // operation.  Masks can also extend from the msb and wrap to
1475 // the lsb too.  That is, the valid masks are 32 bit strings
1476 // of the form: 0..01..10..0 or 1..10..01..1 or 1...1
1477 func isPPC64WordRotateMask(v64 int64) bool {
1478         // Isolate rightmost 1 (if none 0) and add.
1479         v := uint32(v64)
1480         vp := (v & -v) + v
1481         // Likewise, for the wrapping case.
1482         vn := ^v
1483         vpn := (vn & -vn) + vn
1484         return (v&vp == 0 || vn&vpn == 0) && v != 0
1485 }
1486
1487 // Compress mask and shift into single value of the form
1488 // me | mb<<8 | rotate<<16 | nbits<<24 where me and mb can
1489 // be used to regenerate the input mask.
1490 func encodePPC64RotateMask(rotate, mask, nbits int64) int64 {
1491         var mb, me, mbn, men int
1492
1493         // Determine boundaries and then decode them
1494         if mask == 0 || ^mask == 0 || rotate >= nbits {
1495                 panic("Invalid PPC64 rotate mask")
1496         } else if nbits == 32 {
1497                 mb = bits.LeadingZeros32(uint32(mask))
1498                 me = 32 - bits.TrailingZeros32(uint32(mask))
1499                 mbn = bits.LeadingZeros32(^uint32(mask))
1500                 men = 32 - bits.TrailingZeros32(^uint32(mask))
1501         } else {
1502                 mb = bits.LeadingZeros64(uint64(mask))
1503                 me = 64 - bits.TrailingZeros64(uint64(mask))
1504                 mbn = bits.LeadingZeros64(^uint64(mask))
1505                 men = 64 - bits.TrailingZeros64(^uint64(mask))
1506         }
1507         // Check for a wrapping mask (e.g bits at 0 and 63)
1508         if mb == 0 && me == int(nbits) {
1509                 // swap the inverted values
1510                 mb, me = men, mbn
1511         }
1512
1513         return int64(me) | int64(mb<<8) | int64(rotate<<16) | int64(nbits<<24)
1514 }
1515
1516 // DecodePPC64RotateMask is the inverse operation of encodePPC64RotateMask.  The values returned as
1517 // mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask.
1518 func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) {
1519         auxint := uint64(sauxint)
1520         rotate = int64((auxint >> 16) & 0xFF)
1521         mb = int64((auxint >> 8) & 0xFF)
1522         me = int64((auxint >> 0) & 0xFF)
1523         nbits := int64((auxint >> 24) & 0xFF)
1524         mask = ((1 << uint(nbits-mb)) - 1) ^ ((1 << uint(nbits-me)) - 1)
1525         if mb > me {
1526                 mask = ^mask
1527         }
1528         if nbits == 32 {
1529                 mask = uint64(uint32(mask))
1530         }
1531
1532         // Fixup ME to match ISA definition.  The second argument to MASK(..,me)
1533         // is inclusive.
1534         me = (me - 1) & (nbits - 1)
1535         return
1536 }
1537
1538 // This verifies that the mask is a set of
1539 // consecutive bits including the least
1540 // significant bit.
1541 func isPPC64ValidShiftMask(v int64) bool {
1542         if (v != 0) && ((v+1)&v) == 0 {
1543                 return true
1544         }
1545         return false
1546 }
1547
1548 func getPPC64ShiftMaskLength(v int64) int64 {
1549         return int64(bits.Len64(uint64(v)))
1550 }
1551
1552 // Decompose a shift right into an equivalent rotate/mask,
1553 // and return mask & m.
1554 func mergePPC64RShiftMask(m, s, nbits int64) int64 {
1555         smask := uint64((1<<uint(nbits))-1) >> uint(s)
1556         return m & int64(smask)
1557 }
1558
1559 // Combine (ANDconst [m] (SRWconst [s])) into (RLWINM [y]) or return 0
1560 func mergePPC64AndSrwi(m, s int64) int64 {
1561         mask := mergePPC64RShiftMask(m, s, 32)
1562         if !isPPC64WordRotateMask(mask) {
1563                 return 0
1564         }
1565         return encodePPC64RotateMask((32-s)&31, mask, 32)
1566 }
1567
1568 // Test if a shift right feeding into a CLRLSLDI can be merged into RLWINM.
1569 // Return the encoded RLWINM constant, or 0 if they cannot be merged.
1570 func mergePPC64ClrlsldiSrw(sld, srw int64) int64 {
1571         mask_1 := uint64(0xFFFFFFFF >> uint(srw))
1572         // for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left.
1573         mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
1574
1575         // Rewrite mask to apply after the final left shift.
1576         mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
1577
1578         r_1 := 32 - srw
1579         r_2 := GetPPC64Shiftsh(sld)
1580         r_3 := (r_1 + r_2) & 31 // This can wrap.
1581
1582         if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
1583                 return 0
1584         }
1585         return encodePPC64RotateMask(int64(r_3), int64(mask_3), 32)
1586 }
1587
1588 // Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM.  Return
1589 // the encoded RLWINM constant, or 0 if they cannot be merged.
1590 func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 {
1591         r_1, _, _, mask_1 := DecodePPC64RotateMask(rlw)
1592         // for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left.
1593         mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
1594
1595         // combine the masks, and adjust for the final left shift.
1596         mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(int64(sld)))
1597         r_2 := GetPPC64Shiftsh(int64(sld))
1598         r_3 := (r_1 + r_2) & 31 // This can wrap.
1599
1600         // Verify the result is still a valid bitmask of <= 32 bits.
1601         if !isPPC64WordRotateMask(int64(mask_3)) || uint64(uint32(mask_3)) != mask_3 {
1602                 return 0
1603         }
1604         return encodePPC64RotateMask(r_3, int64(mask_3), 32)
1605 }
1606
1607 // Compute the encoded RLWINM constant from combining (SLDconst [sld] (SRWconst [srw] x)),
1608 // or return 0 if they cannot be combined.
1609 func mergePPC64SldiSrw(sld, srw int64) int64 {
1610         if sld > srw || srw >= 32 {
1611                 return 0
1612         }
1613         mask_r := uint32(0xFFFFFFFF) >> uint(srw)
1614         mask_l := uint32(0xFFFFFFFF) >> uint(sld)
1615         mask := (mask_r & mask_l) << uint(sld)
1616         return encodePPC64RotateMask((32-srw+sld)&31, int64(mask), 32)
1617 }
1618
1619 // Convenience function to rotate a 32 bit constant value by another constant.
1620 func rotateLeft32(v, rotate int64) int64 {
1621         return int64(bits.RotateLeft32(uint32(v), int(rotate)))
1622 }
1623
1624 func rotateRight64(v, rotate int64) int64 {
1625         return int64(bits.RotateLeft64(uint64(v), int(-rotate)))
1626 }
1627
1628 // encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format.
1629 func armBFAuxInt(lsb, width int64) arm64BitField {
1630         if lsb < 0 || lsb > 63 {
1631                 panic("ARM(64) bit field lsb constant out of range")
1632         }
1633         if width < 1 || lsb+width > 64 {
1634                 panic("ARM(64) bit field width constant out of range")
1635         }
1636         return arm64BitField(width | lsb<<8)
1637 }
1638
1639 // returns the lsb part of the auxInt field of arm64 bitfield ops.
1640 func (bfc arm64BitField) getARM64BFlsb() int64 {
1641         return int64(uint64(bfc) >> 8)
1642 }
1643
1644 // returns the width part of the auxInt field of arm64 bitfield ops.
1645 func (bfc arm64BitField) getARM64BFwidth() int64 {
1646         return int64(bfc) & 0xff
1647 }
1648
1649 // checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask.
1650 func isARM64BFMask(lsb, mask, rshift int64) bool {
1651         shiftedMask := int64(uint64(mask) >> uint64(rshift))
1652         return shiftedMask != 0 && isPowerOfTwo64(shiftedMask+1) && nto(shiftedMask)+lsb < 64
1653 }
1654
1655 // returns the bitfield width of mask >> rshift for arm64 bitfield ops.
1656 func arm64BFWidth(mask, rshift int64) int64 {
1657         shiftedMask := int64(uint64(mask) >> uint64(rshift))
1658         if shiftedMask == 0 {
1659                 panic("ARM64 BF mask is zero")
1660         }
1661         return nto(shiftedMask)
1662 }
1663
1664 // sizeof returns the size of t in bytes.
1665 // It will panic if t is not a *types.Type.
1666 func sizeof(t interface{}) int64 {
1667         return t.(*types.Type).Size()
1668 }
1669
1670 // registerizable reports whether t is a primitive type that fits in
1671 // a register. It assumes float64 values will always fit into registers
1672 // even if that isn't strictly true.
1673 func registerizable(b *Block, typ *types.Type) bool {
1674         if typ.IsPtrShaped() || typ.IsFloat() || typ.IsBoolean() {
1675                 return true
1676         }
1677         if typ.IsInteger() {
1678                 return typ.Size() <= b.Func.Config.RegSize
1679         }
1680         return false
1681 }
1682
1683 // needRaceCleanup reports whether this call to racefuncenter/exit isn't needed.
1684 func needRaceCleanup(sym *AuxCall, v *Value) bool {
1685         f := v.Block.Func
1686         if !f.Config.Race {
1687                 return false
1688         }
1689         if !isSameCall(sym, "runtime.racefuncenter") && !isSameCall(sym, "runtime.racefuncexit") {
1690                 return false
1691         }
1692         for _, b := range f.Blocks {
1693                 for _, v := range b.Values {
1694                         switch v.Op {
1695                         case OpStaticCall, OpStaticLECall:
1696                                 // Check for racefuncenter will encounter racefuncexit and vice versa.
1697                                 // Allow calls to panic*
1698                                 s := v.Aux.(*AuxCall).Fn.String()
1699                                 switch s {
1700                                 case "runtime.racefuncenter", "runtime.racefuncexit",
1701                                         "runtime.panicdivide", "runtime.panicwrap",
1702                                         "runtime.panicshift":
1703                                         continue
1704                                 }
1705                                 // If we encountered any call, we need to keep racefunc*,
1706                                 // for accurate stacktraces.
1707                                 return false
1708                         case OpPanicBounds, OpPanicExtend:
1709                                 // Note: these are panic generators that are ok (like the static calls above).
1710                         case OpClosureCall, OpInterCall, OpClosureLECall, OpInterLECall:
1711                                 // We must keep the race functions if there are any other call types.
1712                                 return false
1713                         }
1714                 }
1715         }
1716         if isSameCall(sym, "runtime.racefuncenter") {
1717                 // TODO REGISTER ABI this needs to be cleaned up.
1718                 // If we're removing racefuncenter, remove its argument as well.
1719                 if v.Args[0].Op != OpStore {
1720                         if v.Op == OpStaticLECall {
1721                                 // there is no store, yet.
1722                                 return true
1723                         }
1724                         return false
1725                 }
1726                 mem := v.Args[0].Args[2]
1727                 v.Args[0].reset(OpCopy)
1728                 v.Args[0].AddArg(mem)
1729         }
1730         return true
1731 }
1732
1733 // symIsRO reports whether sym is a read-only global.
1734 func symIsRO(sym interface{}) bool {
1735         lsym := sym.(*obj.LSym)
1736         return lsym.Type == objabi.SRODATA && len(lsym.R) == 0
1737 }
1738
1739 // symIsROZero reports whether sym is a read-only global whose data contains all zeros.
1740 func symIsROZero(sym Sym) bool {
1741         lsym := sym.(*obj.LSym)
1742         if lsym.Type != objabi.SRODATA || len(lsym.R) != 0 {
1743                 return false
1744         }
1745         for _, b := range lsym.P {
1746                 if b != 0 {
1747                         return false
1748                 }
1749         }
1750         return true
1751 }
1752
1753 // read8 reads one byte from the read-only global sym at offset off.
1754 func read8(sym interface{}, off int64) uint8 {
1755         lsym := sym.(*obj.LSym)
1756         if off >= int64(len(lsym.P)) || off < 0 {
1757                 // Invalid index into the global sym.
1758                 // This can happen in dead code, so we don't want to panic.
1759                 // Just return any value, it will eventually get ignored.
1760                 // See issue 29215.
1761                 return 0
1762         }
1763         return lsym.P[off]
1764 }
1765
1766 // read16 reads two bytes from the read-only global sym at offset off.
1767 func read16(sym interface{}, off int64, byteorder binary.ByteOrder) uint16 {
1768         lsym := sym.(*obj.LSym)
1769         // lsym.P is written lazily.
1770         // Bytes requested after the end of lsym.P are 0.
1771         var src []byte
1772         if 0 <= off && off < int64(len(lsym.P)) {
1773                 src = lsym.P[off:]
1774         }
1775         buf := make([]byte, 2)
1776         copy(buf, src)
1777         return byteorder.Uint16(buf)
1778 }
1779
1780 // read32 reads four bytes from the read-only global sym at offset off.
1781 func read32(sym interface{}, off int64, byteorder binary.ByteOrder) uint32 {
1782         lsym := sym.(*obj.LSym)
1783         var src []byte
1784         if 0 <= off && off < int64(len(lsym.P)) {
1785                 src = lsym.P[off:]
1786         }
1787         buf := make([]byte, 4)
1788         copy(buf, src)
1789         return byteorder.Uint32(buf)
1790 }
1791
1792 // read64 reads eight bytes from the read-only global sym at offset off.
1793 func read64(sym interface{}, off int64, byteorder binary.ByteOrder) uint64 {
1794         lsym := sym.(*obj.LSym)
1795         var src []byte
1796         if 0 <= off && off < int64(len(lsym.P)) {
1797                 src = lsym.P[off:]
1798         }
1799         buf := make([]byte, 8)
1800         copy(buf, src)
1801         return byteorder.Uint64(buf)
1802 }
1803
1804 // sequentialAddresses reports true if it can prove that x + n == y
1805 func sequentialAddresses(x, y *Value, n int64) bool {
1806         if x == y && n == 0 {
1807                 return true
1808         }
1809         if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil &&
1810                 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1811                         x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1812                 return true
1813         }
1814         if x.Op == Op386LEAL1 && y.Op == Op386LEAL1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
1815                 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1816                         x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1817                 return true
1818         }
1819         if x.Op == OpAMD64ADDQ && y.Op == OpAMD64LEAQ1 && y.AuxInt == n && y.Aux == nil &&
1820                 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1821                         x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1822                 return true
1823         }
1824         if x.Op == OpAMD64LEAQ1 && y.Op == OpAMD64LEAQ1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
1825                 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1826                         x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1827                 return true
1828         }
1829         return false
1830 }
1831
1832 // flagConstant represents the result of a compile-time comparison.
1833 // The sense of these flags does not necessarily represent the hardware's notion
1834 // of a flags register - these are just a compile-time construct.
1835 // We happen to match the semantics to those of arm/arm64.
1836 // Note that these semantics differ from x86: the carry flag has the opposite
1837 // sense on a subtraction!
1838 //
1839 //      On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C.
1840 //      On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C.
1841 //       (because it does x + ^y + C).
1842 //
1843 // See https://en.wikipedia.org/wiki/Carry_flag#Vs._borrow_flag
1844 type flagConstant uint8
1845
1846 // N reports whether the result of an operation is negative (high bit set).
1847 func (fc flagConstant) N() bool {
1848         return fc&1 != 0
1849 }
1850
1851 // Z reports whether the result of an operation is 0.
1852 func (fc flagConstant) Z() bool {
1853         return fc&2 != 0
1854 }
1855
1856 // C reports whether an unsigned add overflowed (carry), or an
1857 // unsigned subtract did not underflow (borrow).
1858 func (fc flagConstant) C() bool {
1859         return fc&4 != 0
1860 }
1861
1862 // V reports whether a signed operation overflowed or underflowed.
1863 func (fc flagConstant) V() bool {
1864         return fc&8 != 0
1865 }
1866
1867 func (fc flagConstant) eq() bool {
1868         return fc.Z()
1869 }
1870 func (fc flagConstant) ne() bool {
1871         return !fc.Z()
1872 }
1873 func (fc flagConstant) lt() bool {
1874         return fc.N() != fc.V()
1875 }
1876 func (fc flagConstant) le() bool {
1877         return fc.Z() || fc.lt()
1878 }
1879 func (fc flagConstant) gt() bool {
1880         return !fc.Z() && fc.ge()
1881 }
1882 func (fc flagConstant) ge() bool {
1883         return fc.N() == fc.V()
1884 }
1885 func (fc flagConstant) ult() bool {
1886         return !fc.C()
1887 }
1888 func (fc flagConstant) ule() bool {
1889         return fc.Z() || fc.ult()
1890 }
1891 func (fc flagConstant) ugt() bool {
1892         return !fc.Z() && fc.uge()
1893 }
1894 func (fc flagConstant) uge() bool {
1895         return fc.C()
1896 }
1897
1898 func (fc flagConstant) ltNoov() bool {
1899         return fc.lt() && !fc.V()
1900 }
1901 func (fc flagConstant) leNoov() bool {
1902         return fc.le() && !fc.V()
1903 }
1904 func (fc flagConstant) gtNoov() bool {
1905         return fc.gt() && !fc.V()
1906 }
1907 func (fc flagConstant) geNoov() bool {
1908         return fc.ge() && !fc.V()
1909 }
1910
1911 func (fc flagConstant) String() string {
1912         return fmt.Sprintf("N=%v,Z=%v,C=%v,V=%v", fc.N(), fc.Z(), fc.C(), fc.V())
1913 }
1914
1915 type flagConstantBuilder struct {
1916         N bool
1917         Z bool
1918         C bool
1919         V bool
1920 }
1921
1922 func (fcs flagConstantBuilder) encode() flagConstant {
1923         var fc flagConstant
1924         if fcs.N {
1925                 fc |= 1
1926         }
1927         if fcs.Z {
1928                 fc |= 2
1929         }
1930         if fcs.C {
1931                 fc |= 4
1932         }
1933         if fcs.V {
1934                 fc |= 8
1935         }
1936         return fc
1937 }
1938
1939 // Note: addFlags(x,y) != subFlags(x,-y) in some situations:
1940 //  - the results of the C flag are different
1941 //  - the results of the V flag when y==minint are different
1942
1943 // addFlags64 returns the flags that would be set from computing x+y.
1944 func addFlags64(x, y int64) flagConstant {
1945         var fcb flagConstantBuilder
1946         fcb.Z = x+y == 0
1947         fcb.N = x+y < 0
1948         fcb.C = uint64(x+y) < uint64(x)
1949         fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
1950         return fcb.encode()
1951 }
1952
1953 // subFlags64 returns the flags that would be set from computing x-y.
1954 func subFlags64(x, y int64) flagConstant {
1955         var fcb flagConstantBuilder
1956         fcb.Z = x-y == 0
1957         fcb.N = x-y < 0
1958         fcb.C = uint64(y) <= uint64(x) // This code follows the arm carry flag model.
1959         fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
1960         return fcb.encode()
1961 }
1962
1963 // addFlags32 returns the flags that would be set from computing x+y.
1964 func addFlags32(x, y int32) flagConstant {
1965         var fcb flagConstantBuilder
1966         fcb.Z = x+y == 0
1967         fcb.N = x+y < 0
1968         fcb.C = uint32(x+y) < uint32(x)
1969         fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
1970         return fcb.encode()
1971 }
1972
1973 // subFlags32 returns the flags that would be set from computing x-y.
1974 func subFlags32(x, y int32) flagConstant {
1975         var fcb flagConstantBuilder
1976         fcb.Z = x-y == 0
1977         fcb.N = x-y < 0
1978         fcb.C = uint32(y) <= uint32(x) // This code follows the arm carry flag model.
1979         fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
1980         return fcb.encode()
1981 }
1982
1983 // logicFlags64 returns flags set to the sign/zeroness of x.
1984 // C and V are set to false.
1985 func logicFlags64(x int64) flagConstant {
1986         var fcb flagConstantBuilder
1987         fcb.Z = x == 0
1988         fcb.N = x < 0
1989         return fcb.encode()
1990 }
1991
1992 // logicFlags32 returns flags set to the sign/zeroness of x.
1993 // C and V are set to false.
1994 func logicFlags32(x int32) flagConstant {
1995         var fcb flagConstantBuilder
1996         fcb.Z = x == 0
1997         fcb.N = x < 0
1998         return fcb.encode()
1999 }
2000
2001 func makeJumpTableSym(b *Block) *obj.LSym {
2002         s := base.Ctxt.Lookup(fmt.Sprintf("%s.jump%d", b.Func.fe.Func().LSym.Name, b.ID))
2003         s.Set(obj.AttrDuplicateOK, true)
2004         s.Set(obj.AttrLocal, true)
2005         return s
2006 }
2007
2008 // canRotate reports whether the architecture supports
2009 // rotates of integer registers with the given number of bits.
2010 func canRotate(c *Config, bits int64) bool {
2011         if bits > c.PtrSize*8 {
2012                 // Don't rewrite to rotates bigger than the machine word.
2013                 return false
2014         }
2015         switch c.arch {
2016         case "386", "amd64", "arm64":
2017                 return true
2018         case "arm", "s390x", "ppc64", "ppc64le", "wasm", "loong64":
2019                 return bits >= 32
2020         default:
2021                 return false
2022         }
2023 }
2024
2025 // isARM64bitcon reports whether a constant can be encoded into a logical instruction.
2026 func isARM64bitcon(x uint64) bool {
2027         if x == 1<<64-1 || x == 0 {
2028                 return false
2029         }
2030         // determine the period and sign-extend a unit to 64 bits
2031         switch {
2032         case x != x>>32|x<<32:
2033                 // period is 64
2034                 // nothing to do
2035         case x != x>>16|x<<48:
2036                 // period is 32
2037                 x = uint64(int64(int32(x)))
2038         case x != x>>8|x<<56:
2039                 // period is 16
2040                 x = uint64(int64(int16(x)))
2041         case x != x>>4|x<<60:
2042                 // period is 8
2043                 x = uint64(int64(int8(x)))
2044         default:
2045                 // period is 4 or 2, always true
2046                 // 0001, 0010, 0100, 1000 -- 0001 rotate
2047                 // 0011, 0110, 1100, 1001 -- 0011 rotate
2048                 // 0111, 1011, 1101, 1110 -- 0111 rotate
2049                 // 0101, 1010             -- 01   rotate, repeat
2050                 return true
2051         }
2052         return sequenceOfOnes(x) || sequenceOfOnes(^x)
2053 }
2054
2055 // sequenceOfOnes tests whether a constant is a sequence of ones in binary, with leading and trailing zeros.
2056 func sequenceOfOnes(x uint64) bool {
2057         y := x & -x // lowest set bit of x. x is good iff x+y is a power of 2
2058         y += x
2059         return (y-1)&y == 0
2060 }
2061
2062 // isARM64addcon reports whether x can be encoded as the immediate value in an ADD or SUB instruction.
2063 func isARM64addcon(v int64) bool {
2064         /* uimm12 or uimm24? */
2065         if v < 0 {
2066                 return false
2067         }
2068         if (v & 0xFFF) == 0 {
2069                 v >>= 12
2070         }
2071         return v <= 0xFFF
2072 }