src/cmd/compile/internal/ssa/rewrite.go

   1 // Copyright 2015 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package ssa
   6
   7 import (
   8         "cmd/compile/internal/base"
   9         "cmd/compile/internal/logopt"
  10         "cmd/compile/internal/reflectdata"
  11         "cmd/compile/internal/types"
  12         "cmd/internal/obj"
  13         "cmd/internal/obj/s390x"
  14         "cmd/internal/objabi"
  15         "cmd/internal/src"
  16         "encoding/binary"
  17         "fmt"
  18         "io"
  19         "math"
  20         "math/bits"
  21         "os"
  22         "path/filepath"
  23         "strings"
  24 )
  25
  26 type deadValueChoice bool
  27
  28 const (
  29         leaveDeadValues  deadValueChoice = false
  30         removeDeadValues                 = true
  31 )
  32
  33 // deadcode indicates whether rewrite should try to remove any values that become dead.
  34 func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValueChoice) {
  35         // repeat rewrites until we find no more rewrites
  36         pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block
  37         pendingLines.clear()
  38         debug := f.pass.debug
  39         if debug > 1 {
  40                 fmt.Printf("%s: rewriting for %s\n", f.pass.name, f.Name)
  41         }
  42         var iters int
  43         var states map[string]bool
  44         for {
  45                 change := false
  46                 deadChange := false
  47                 for _, b := range f.Blocks {
  48                         var b0 *Block
  49                         if debug > 1 {
  50                                 b0 = new(Block)
  51                                 *b0 = *b
  52                                 b0.Succs = append([]Edge{}, b.Succs...) // make a new copy, not aliasing
  53                         }
  54                         for i, c := range b.ControlValues() {
  55                                 for c.Op == OpCopy {
  56                                         c = c.Args[0]
  57                                         b.ReplaceControl(i, c)
  58                                 }
  59                         }
  60                         if rb(b) {
  61                                 change = true
  62                                 if debug > 1 {
  63                                         fmt.Printf("rewriting %s  ->  %s\n", b0.LongString(), b.LongString())
  64                                 }
  65                         }
  66                         for j, v := range b.Values {
  67                                 var v0 *Value
  68                                 if debug > 1 {
  69                                         v0 = new(Value)
  70                                         *v0 = *v
  71                                         v0.Args = append([]*Value{}, v.Args...) // make a new copy, not aliasing
  72                                 }
  73                                 if v.Uses == 0 && v.removeable() {
  74                                         if v.Op != OpInvalid && deadcode == removeDeadValues {
  75                                                 // Reset any values that are now unused, so that we decrement
  76                                                 // the use count of all of its arguments.
  77                                                 // Not quite a deadcode pass, because it does not handle cycles.
  78                                                 // But it should help Uses==1 rules to fire.
  79                                                 v.reset(OpInvalid)
  80                                                 deadChange = true
  81                                         }
  82                                         // No point rewriting values which aren't used.
  83                                         continue
  84                                 }
  85
  86                                 vchange := phielimValue(v)
  87                                 if vchange && debug > 1 {
  88                                         fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
  89                                 }
  90
  91                                 // Eliminate copy inputs.
  92                                 // If any copy input becomes unused, mark it
  93                                 // as invalid and discard its argument. Repeat
  94                                 // recursively on the discarded argument.
  95                                 // This phase helps remove phantom "dead copy" uses
  96                                 // of a value so that a x.Uses==1 rule condition
  97                                 // fires reliably.
  98                                 for i, a := range v.Args {
  99                                         if a.Op != OpCopy {
 100                                                 continue
 101                                         }
 102                                         aa := copySource(a)
 103                                         v.SetArg(i, aa)
 104                                         // If a, a copy, has a line boundary indicator, attempt to find a new value
 105                                         // to hold it.  The first candidate is the value that will replace a (aa),
 106                                         // if it shares the same block and line and is eligible.
 107                                         // The second option is v, which has a as an input.  Because aa is earlier in
 108                                         // the data flow, it is the better choice.
 109                                         if a.Pos.IsStmt() == src.PosIsStmt {
 110                                                 if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt {
 111                                                         aa.Pos = aa.Pos.WithIsStmt()
 112                                                 } else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt {
 113                                                         v.Pos = v.Pos.WithIsStmt()
 114                                                 } else {
 115                                                         // Record the lost line and look for a new home after all rewrites are complete.
 116                                                         // TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same
 117                                                         // line to appear in more than one block, but only one block is stored, so if both end
 118                                                         // up here, then one will be lost.
 119                                                         pendingLines.set(a.Pos, int32(a.Block.ID))
 120                                                 }
 121                                                 a.Pos = a.Pos.WithNotStmt()
 122                                         }
 123                                         vchange = true
 124                                         for a.Uses == 0 {
 125                                                 b := a.Args[0]
 126                                                 a.reset(OpInvalid)
 127                                                 a = b
 128                                         }
 129                                 }
 130                                 if vchange && debug > 1 {
 131                                         fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
 132                                 }
 133
 134                                 // apply rewrite function
 135                                 if rv(v) {
 136                                         vchange = true
 137                                         // If value changed to a poor choice for a statement boundary, move the boundary
 138                                         if v.Pos.IsStmt() == src.PosIsStmt {
 139                                                 if k := nextGoodStatementIndex(v, j, b); k != j {
 140                                                         v.Pos = v.Pos.WithNotStmt()
 141                                                         b.Values[k].Pos = b.Values[k].Pos.WithIsStmt()
 142                                                 }
 143                                         }
 144                                 }
 145
 146                                 change = change || vchange
 147                                 if vchange && debug > 1 {
 148                                         fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
 149                                 }
 150                         }
 151                 }
 152                 if !change && !deadChange {
 153                         break
 154                 }
 155                 iters++
 156                 if (iters > 1000 || debug >= 2) && change {
 157                         // We've done a suspiciously large number of rewrites (or we're in debug mode).
 158                         // As of Sep 2021, 90% of rewrites complete in 4 iterations or fewer
 159                         // and the maximum value encountered during make.bash is 12.
 160                         // Start checking for cycles. (This is too expensive to do routinely.)
 161                         // Note: we avoid this path for deadChange-only iterations, to fix #51639.
 162                         if states == nil {
 163                                 states = make(map[string]bool)
 164                         }
 165                         h := f.rewriteHash()
 166                         if _, ok := states[h]; ok {
 167                                 // We've found a cycle.
 168                                 // To diagnose it, set debug to 2 and start again,
 169                                 // so that we'll print all rules applied until we complete another cycle.
 170                                 // If debug is already >= 2, we've already done that, so it's time to crash.
 171                                 if debug < 2 {
 172                                         debug = 2
 173                                         states = make(map[string]bool)
 174                                 } else {
 175                                         f.Fatalf("rewrite cycle detected")
 176                                 }
 177                         }
 178                         states[h] = true
 179                 }
 180         }
 181         // remove clobbered values
 182         for _, b := range f.Blocks {
 183                 j := 0
 184                 for i, v := range b.Values {
 185                         vl := v.Pos
 186                         if v.Op == OpInvalid {
 187                                 if v.Pos.IsStmt() == src.PosIsStmt {
 188                                         pendingLines.set(vl, int32(b.ID))
 189                                 }
 190                                 f.freeValue(v)
 191                                 continue
 192                         }
 193                         if v.Pos.IsStmt() != src.PosNotStmt && !notStmtBoundary(v.Op) && pendingLines.get(vl) == int32(b.ID) {
 194                                 pendingLines.remove(vl)
 195                                 v.Pos = v.Pos.WithIsStmt()
 196                         }
 197                         if i != j {
 198                                 b.Values[j] = v
 199                         }
 200                         j++
 201                 }
 202                 if pendingLines.get(b.Pos) == int32(b.ID) {
 203                         b.Pos = b.Pos.WithIsStmt()
 204                         pendingLines.remove(b.Pos)
 205                 }
 206                 b.truncateValues(j)
 207         }
 208 }
 209
 210 // Common functions called from rewriting rules
 211
 212 func is64BitFloat(t *types.Type) bool {
 213         return t.Size() == 8 && t.IsFloat()
 214 }
 215
 216 func is32BitFloat(t *types.Type) bool {
 217         return t.Size() == 4 && t.IsFloat()
 218 }
 219
 220 func is64BitInt(t *types.Type) bool {
 221         return t.Size() == 8 && t.IsInteger()
 222 }
 223
 224 func is32BitInt(t *types.Type) bool {
 225         return t.Size() == 4 && t.IsInteger()
 226 }
 227
 228 func is16BitInt(t *types.Type) bool {
 229         return t.Size() == 2 && t.IsInteger()
 230 }
 231
 232 func is8BitInt(t *types.Type) bool {
 233         return t.Size() == 1 && t.IsInteger()
 234 }
 235
 236 func isPtr(t *types.Type) bool {
 237         return t.IsPtrShaped()
 238 }
 239
 240 // mergeSym merges two symbolic offsets. There is no real merging of
 241 // offsets, we just pick the non-nil one.
 242 func mergeSym(x, y Sym) Sym {
 243         if x == nil {
 244                 return y
 245         }
 246         if y == nil {
 247                 return x
 248         }
 249         panic(fmt.Sprintf("mergeSym with two non-nil syms %v %v", x, y))
 250 }
 251
 252 func canMergeSym(x, y Sym) bool {
 253         return x == nil || y == nil
 254 }
 255
 256 // canMergeLoadClobber reports whether the load can be merged into target without
 257 // invalidating the schedule.
 258 // It also checks that the other non-load argument x is something we
 259 // are ok with clobbering.
 260 func canMergeLoadClobber(target, load, x *Value) bool {
 261         // The register containing x is going to get clobbered.
 262         // Don't merge if we still need the value of x.
 263         // We don't have liveness information here, but we can
 264         // approximate x dying with:
 265         //  1) target is x's only use.
 266         //  2) target is not in a deeper loop than x.
 267         if x.Uses != 1 {
 268                 return false
 269         }
 270         loopnest := x.Block.Func.loopnest()
 271         loopnest.calculateDepths()
 272         if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
 273                 return false
 274         }
 275         return canMergeLoad(target, load)
 276 }
 277
 278 // canMergeLoad reports whether the load can be merged into target without
 279 // invalidating the schedule.
 280 func canMergeLoad(target, load *Value) bool {
 281         if target.Block.ID != load.Block.ID {
 282                 // If the load is in a different block do not merge it.
 283                 return false
 284         }
 285
 286         // We can't merge the load into the target if the load
 287         // has more than one use.
 288         if load.Uses != 1 {
 289                 return false
 290         }
 291
 292         mem := load.MemoryArg()
 293
 294         // We need the load's memory arg to still be alive at target. That
 295         // can't be the case if one of target's args depends on a memory
 296         // state that is a successor of load's memory arg.
 297         //
 298         // For example, it would be invalid to merge load into target in
 299         // the following situation because newmem has killed oldmem
 300         // before target is reached:
 301         //     load = read ... oldmem
 302         //   newmem = write ... oldmem
 303         //     arg0 = read ... newmem
 304         //   target = add arg0 load
 305         //
 306         // If the argument comes from a different block then we can exclude
 307         // it immediately because it must dominate load (which is in the
 308         // same block as target).
 309         var args []*Value
 310         for _, a := range target.Args {
 311                 if a != load && a.Block.ID == target.Block.ID {
 312                         args = append(args, a)
 313                 }
 314         }
 315
 316         // memPreds contains memory states known to be predecessors of load's
 317         // memory state. It is lazily initialized.
 318         var memPreds map[*Value]bool
 319         for i := 0; len(args) > 0; i++ {
 320                 const limit = 100
 321                 if i >= limit {
 322                         // Give up if we have done a lot of iterations.
 323                         return false
 324                 }
 325                 v := args[len(args)-1]
 326                 args = args[:len(args)-1]
 327                 if target.Block.ID != v.Block.ID {
 328                         // Since target and load are in the same block
 329                         // we can stop searching when we leave the block.
 330                         continue
 331                 }
 332                 if v.Op == OpPhi {
 333                         // A Phi implies we have reached the top of the block.
 334                         // The memory phi, if it exists, is always
 335                         // the first logical store in the block.
 336                         continue
 337                 }
 338                 if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
 339                         // We could handle this situation however it is likely
 340                         // to be very rare.
 341                         return false
 342                 }
 343                 if v.Op.SymEffect()&SymAddr != 0 {
 344                         // This case prevents an operation that calculates the
 345                         // address of a local variable from being forced to schedule
 346                         // before its corresponding VarDef.
 347                         // See issue 28445.
 348                         //   v1 = LOAD ...
 349                         //   v2 = VARDEF
 350                         //   v3 = LEAQ
 351                         //   v4 = CMPQ v1 v3
 352                         // We don't want to combine the CMPQ with the load, because
 353                         // that would force the CMPQ to schedule before the VARDEF, which
 354                         // in turn requires the LEAQ to schedule before the VARDEF.
 355                         return false
 356                 }
 357                 if v.Type.IsMemory() {
 358                         if memPreds == nil {
 359                                 // Initialise a map containing memory states
 360                                 // known to be predecessors of load's memory
 361                                 // state.
 362                                 memPreds = make(map[*Value]bool)
 363                                 m := mem
 364                                 const limit = 50
 365                                 for i := 0; i < limit; i++ {
 366                                         if m.Op == OpPhi {
 367                                                 // The memory phi, if it exists, is always
 368                                                 // the first logical store in the block.
 369                                                 break
 370                                         }
 371                                         if m.Block.ID != target.Block.ID {
 372                                                 break
 373                                         }
 374                                         if !m.Type.IsMemory() {
 375                                                 break
 376                                         }
 377                                         memPreds[m] = true
 378                                         if len(m.Args) == 0 {
 379                                                 break
 380                                         }
 381                                         m = m.MemoryArg()
 382                                 }
 383                         }
 384
 385                         // We can merge if v is a predecessor of mem.
 386                         //
 387                         // For example, we can merge load into target in the
 388                         // following scenario:
 389                         //      x = read ... v
 390                         //    mem = write ... v
 391                         //   load = read ... mem
 392                         // target = add x load
 393                         if memPreds[v] {
 394                                 continue
 395                         }
 396                         return false
 397                 }
 398                 if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
 399                         // If v takes mem as an input then we know mem
 400                         // is valid at this point.
 401                         continue
 402                 }
 403                 for _, a := range v.Args {
 404                         if target.Block.ID == a.Block.ID {
 405                                 args = append(args, a)
 406                         }
 407                 }
 408         }
 409
 410         return true
 411 }
 412
 413 // isSameCall reports whether sym is the same as the given named symbol.
 414 func isSameCall(sym interface{}, name string) bool {
 415         fn := sym.(*AuxCall).Fn
 416         return fn != nil && fn.String() == name
 417 }
 418
 419 // canLoadUnaligned reports if the architecture supports unaligned load operations.
 420 func canLoadUnaligned(c *Config) bool {
 421         return c.ctxt.Arch.Alignment == 1
 422 }
 423
 424 // nlzX returns the number of leading zeros.
 425 func nlz64(x int64) int { return bits.LeadingZeros64(uint64(x)) }
 426 func nlz32(x int32) int { return bits.LeadingZeros32(uint32(x)) }
 427 func nlz16(x int16) int { return bits.LeadingZeros16(uint16(x)) }
 428 func nlz8(x int8) int   { return bits.LeadingZeros8(uint8(x)) }
 429
 430 // ntzX returns the number of trailing zeros.
 431 func ntz64(x int64) int { return bits.TrailingZeros64(uint64(x)) }
 432 func ntz32(x int32) int { return bits.TrailingZeros32(uint32(x)) }
 433 func ntz16(x int16) int { return bits.TrailingZeros16(uint16(x)) }
 434 func ntz8(x int8) int   { return bits.TrailingZeros8(uint8(x)) }
 435
 436 func oneBit(x int64) bool   { return x&(x-1) == 0 && x != 0 }
 437 func oneBit8(x int8) bool   { return x&(x-1) == 0 && x != 0 }
 438 func oneBit16(x int16) bool { return x&(x-1) == 0 && x != 0 }
 439 func oneBit32(x int32) bool { return x&(x-1) == 0 && x != 0 }
 440 func oneBit64(x int64) bool { return x&(x-1) == 0 && x != 0 }
 441
 442 // nto returns the number of trailing ones.
 443 func nto(x int64) int64 {
 444         return int64(ntz64(^x))
 445 }
 446
 447 // logX returns logarithm of n base 2.
 448 // n must be a positive power of 2 (isPowerOfTwoX returns true).
 449 func log8(n int8) int64 {
 450         return int64(bits.Len8(uint8(n))) - 1
 451 }
 452 func log16(n int16) int64 {
 453         return int64(bits.Len16(uint16(n))) - 1
 454 }
 455 func log32(n int32) int64 {
 456         return int64(bits.Len32(uint32(n))) - 1
 457 }
 458 func log64(n int64) int64 {
 459         return int64(bits.Len64(uint64(n))) - 1
 460 }
 461
 462 // log2uint32 returns logarithm in base 2 of uint32(n), with log2(0) = -1.
 463 // Rounds down.
 464 func log2uint32(n int64) int64 {
 465         return int64(bits.Len32(uint32(n))) - 1
 466 }
 467
 468 // isPowerOfTwoX functions report whether n is a power of 2.
 469 func isPowerOfTwo8(n int8) bool {
 470         return n > 0 && n&(n-1) == 0
 471 }
 472 func isPowerOfTwo16(n int16) bool {
 473         return n > 0 && n&(n-1) == 0
 474 }
 475 func isPowerOfTwo32(n int32) bool {
 476         return n > 0 && n&(n-1) == 0
 477 }
 478 func isPowerOfTwo64(n int64) bool {
 479         return n > 0 && n&(n-1) == 0
 480 }
 481
 482 // isUint64PowerOfTwo reports whether uint64(n) is a power of 2.
 483 func isUint64PowerOfTwo(in int64) bool {
 484         n := uint64(in)
 485         return n > 0 && n&(n-1) == 0
 486 }
 487
 488 // isUint32PowerOfTwo reports whether uint32(n) is a power of 2.
 489 func isUint32PowerOfTwo(in int64) bool {
 490         n := uint64(uint32(in))
 491         return n > 0 && n&(n-1) == 0
 492 }
 493
 494 // is32Bit reports whether n can be represented as a signed 32 bit integer.
 495 func is32Bit(n int64) bool {
 496         return n == int64(int32(n))
 497 }
 498
 499 // is16Bit reports whether n can be represented as a signed 16 bit integer.
 500 func is16Bit(n int64) bool {
 501         return n == int64(int16(n))
 502 }
 503
 504 // is8Bit reports whether n can be represented as a signed 8 bit integer.
 505 func is8Bit(n int64) bool {
 506         return n == int64(int8(n))
 507 }
 508
 509 // isU8Bit reports whether n can be represented as an unsigned 8 bit integer.
 510 func isU8Bit(n int64) bool {
 511         return n == int64(uint8(n))
 512 }
 513
 514 // isU12Bit reports whether n can be represented as an unsigned 12 bit integer.
 515 func isU12Bit(n int64) bool {
 516         return 0 <= n && n < (1<<12)
 517 }
 518
 519 // isU16Bit reports whether n can be represented as an unsigned 16 bit integer.
 520 func isU16Bit(n int64) bool {
 521         return n == int64(uint16(n))
 522 }
 523
 524 // isU32Bit reports whether n can be represented as an unsigned 32 bit integer.
 525 func isU32Bit(n int64) bool {
 526         return n == int64(uint32(n))
 527 }
 528
 529 // is20Bit reports whether n can be represented as a signed 20 bit integer.
 530 func is20Bit(n int64) bool {
 531         return -(1<<19) <= n && n < (1<<19)
 532 }
 533
 534 // b2i translates a boolean value to 0 or 1 for assigning to auxInt.
 535 func b2i(b bool) int64 {
 536         if b {
 537                 return 1
 538         }
 539         return 0
 540 }
 541
 542 // b2i32 translates a boolean value to 0 or 1.
 543 func b2i32(b bool) int32 {
 544         if b {
 545                 return 1
 546         }
 547         return 0
 548 }
 549
 550 // shiftIsBounded reports whether (left/right) shift Value v is known to be bounded.
 551 // A shift is bounded if it is shifting by less than the width of the shifted value.
 552 func shiftIsBounded(v *Value) bool {
 553         return v.AuxInt != 0
 554 }
 555
 556 // canonLessThan returns whether x is "ordered" less than y, for purposes of normalizing
 557 // generated code as much as possible.
 558 func canonLessThan(x, y *Value) bool {
 559         if x.Op != y.Op {
 560                 return x.Op < y.Op
 561         }
 562         if !x.Pos.SameFileAndLine(y.Pos) {
 563                 return x.Pos.Before(y.Pos)
 564         }
 565         return x.ID < y.ID
 566 }
 567
 568 // truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern
 569 // of the mantissa. It will panic if the truncation results in lost information.
 570 func truncate64Fto32F(f float64) float32 {
 571         if !isExactFloat32(f) {
 572                 panic("truncate64Fto32F: truncation is not exact")
 573         }
 574         if !math.IsNaN(f) {
 575                 return float32(f)
 576         }
 577         // NaN bit patterns aren't necessarily preserved across conversion
 578         // instructions so we need to do the conversion manually.
 579         b := math.Float64bits(f)
 580         m := b & ((1 << 52) - 1) // mantissa (a.k.a. significand)
 581         //          | sign                  | exponent   | mantissa       |
 582         r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23)))
 583         return math.Float32frombits(r)
 584 }
 585
 586 // extend32Fto64F converts a float32 value to a float64 value preserving the bit
 587 // pattern of the mantissa.
 588 func extend32Fto64F(f float32) float64 {
 589         if !math.IsNaN(float64(f)) {
 590                 return float64(f)
 591         }
 592         // NaN bit patterns aren't necessarily preserved across conversion
 593         // instructions so we need to do the conversion manually.
 594         b := uint64(math.Float32bits(f))
 595         //   | sign                  | exponent      | mantissa                    |
 596         r := ((b << 32) & (1 << 63)) | (0x7ff << 52) | ((b & 0x7fffff) << (52 - 23))
 597         return math.Float64frombits(r)
 598 }
 599
 600 // DivisionNeedsFixUp reports whether the division needs fix-up code.
 601 func DivisionNeedsFixUp(v *Value) bool {
 602         return v.AuxInt == 0
 603 }
 604
 605 // auxFrom64F encodes a float64 value so it can be stored in an AuxInt.
 606 func auxFrom64F(f float64) int64 {
 607         if f != f {
 608                 panic("can't encode a NaN in AuxInt field")
 609         }
 610         return int64(math.Float64bits(f))
 611 }
 612
 613 // auxFrom32F encodes a float32 value so it can be stored in an AuxInt.
 614 func auxFrom32F(f float32) int64 {
 615         if f != f {
 616                 panic("can't encode a NaN in AuxInt field")
 617         }
 618         return int64(math.Float64bits(extend32Fto64F(f)))
 619 }
 620
 621 // auxTo32F decodes a float32 from the AuxInt value provided.
 622 func auxTo32F(i int64) float32 {
 623         return truncate64Fto32F(math.Float64frombits(uint64(i)))
 624 }
 625
 626 // auxTo64F decodes a float64 from the AuxInt value provided.
 627 func auxTo64F(i int64) float64 {
 628         return math.Float64frombits(uint64(i))
 629 }
 630
 631 func auxIntToBool(i int64) bool {
 632         if i == 0 {
 633                 return false
 634         }
 635         return true
 636 }
 637 func auxIntToInt8(i int64) int8 {
 638         return int8(i)
 639 }
 640 func auxIntToInt16(i int64) int16 {
 641         return int16(i)
 642 }
 643 func auxIntToInt32(i int64) int32 {
 644         return int32(i)
 645 }
 646 func auxIntToInt64(i int64) int64 {
 647         return i
 648 }
 649 func auxIntToUint8(i int64) uint8 {
 650         return uint8(i)
 651 }
 652 func auxIntToFloat32(i int64) float32 {
 653         return float32(math.Float64frombits(uint64(i)))
 654 }
 655 func auxIntToFloat64(i int64) float64 {
 656         return math.Float64frombits(uint64(i))
 657 }
 658 func auxIntToValAndOff(i int64) ValAndOff {
 659         return ValAndOff(i)
 660 }
 661 func auxIntToArm64BitField(i int64) arm64BitField {
 662         return arm64BitField(i)
 663 }
 664 func auxIntToInt128(x int64) int128 {
 665         if x != 0 {
 666                 panic("nonzero int128 not allowed")
 667         }
 668         return 0
 669 }
 670 func auxIntToFlagConstant(x int64) flagConstant {
 671         return flagConstant(x)
 672 }
 673
 674 func auxIntToOp(cc int64) Op {
 675         return Op(cc)
 676 }
 677
 678 func boolToAuxInt(b bool) int64 {
 679         if b {
 680                 return 1
 681         }
 682         return 0
 683 }
 684 func int8ToAuxInt(i int8) int64 {
 685         return int64(i)
 686 }
 687 func int16ToAuxInt(i int16) int64 {
 688         return int64(i)
 689 }
 690 func int32ToAuxInt(i int32) int64 {
 691         return int64(i)
 692 }
 693 func int64ToAuxInt(i int64) int64 {
 694         return int64(i)
 695 }
 696 func uint8ToAuxInt(i uint8) int64 {
 697         return int64(int8(i))
 698 }
 699 func float32ToAuxInt(f float32) int64 {
 700         return int64(math.Float64bits(float64(f)))
 701 }
 702 func float64ToAuxInt(f float64) int64 {
 703         return int64(math.Float64bits(f))
 704 }
 705 func valAndOffToAuxInt(v ValAndOff) int64 {
 706         return int64(v)
 707 }
 708 func arm64BitFieldToAuxInt(v arm64BitField) int64 {
 709         return int64(v)
 710 }
 711 func int128ToAuxInt(x int128) int64 {
 712         if x != 0 {
 713                 panic("nonzero int128 not allowed")
 714         }
 715         return 0
 716 }
 717 func flagConstantToAuxInt(x flagConstant) int64 {
 718         return int64(x)
 719 }
 720
 721 func opToAuxInt(o Op) int64 {
 722         return int64(o)
 723 }
 724
 725 // Aux is an interface to hold miscellaneous data in Blocks and Values.
 726 type Aux interface {
 727         CanBeAnSSAAux()
 728 }
 729
 730 // for now only used to mark moves that need to avoid clobbering flags
 731 type auxMark bool
 732
 733 func (auxMark) CanBeAnSSAAux() {}
 734
 735 var AuxMark auxMark
 736
 737 // stringAux wraps string values for use in Aux.
 738 type stringAux string
 739
 740 func (stringAux) CanBeAnSSAAux() {}
 741
 742 func auxToString(i Aux) string {
 743         return string(i.(stringAux))
 744 }
 745 func auxToSym(i Aux) Sym {
 746         // TODO: kind of a hack - allows nil interface through
 747         s, _ := i.(Sym)
 748         return s
 749 }
 750 func auxToType(i Aux) *types.Type {
 751         return i.(*types.Type)
 752 }
 753 func auxToCall(i Aux) *AuxCall {
 754         return i.(*AuxCall)
 755 }
 756 func auxToS390xCCMask(i Aux) s390x.CCMask {
 757         return i.(s390x.CCMask)
 758 }
 759 func auxToS390xRotateParams(i Aux) s390x.RotateParams {
 760         return i.(s390x.RotateParams)
 761 }
 762
 763 func StringToAux(s string) Aux {
 764         return stringAux(s)
 765 }
 766 func symToAux(s Sym) Aux {
 767         return s
 768 }
 769 func callToAux(s *AuxCall) Aux {
 770         return s
 771 }
 772 func typeToAux(t *types.Type) Aux {
 773         return t
 774 }
 775 func s390xCCMaskToAux(c s390x.CCMask) Aux {
 776         return c
 777 }
 778 func s390xRotateParamsToAux(r s390x.RotateParams) Aux {
 779         return r
 780 }
 781
 782 // uaddOvf reports whether unsigned a+b would overflow.
 783 func uaddOvf(a, b int64) bool {
 784         return uint64(a)+uint64(b) < uint64(a)
 785 }
 786
 787 // loadLSymOffset simulates reading a word at an offset into a
 788 // read-only symbol's runtime memory. If it would read a pointer to
 789 // another symbol, that symbol is returned. Otherwise, it returns nil.
 790 func loadLSymOffset(lsym *obj.LSym, offset int64) *obj.LSym {
 791         if lsym.Type != objabi.SRODATA {
 792                 return nil
 793         }
 794
 795         for _, r := range lsym.R {
 796                 if int64(r.Off) == offset && r.Type&^objabi.R_WEAK == objabi.R_ADDR && r.Add == 0 {
 797                         return r.Sym
 798                 }
 799         }
 800
 801         return nil
 802 }
 803
 804 func devirtLECall(v *Value, sym *obj.LSym) *Value {
 805         v.Op = OpStaticLECall
 806         auxcall := v.Aux.(*AuxCall)
 807         auxcall.Fn = sym
 808         // Remove first arg
 809         v.Args[0].Uses--
 810         copy(v.Args[0:], v.Args[1:])
 811         v.Args[len(v.Args)-1] = nil // aid GC
 812         v.Args = v.Args[:len(v.Args)-1]
 813         if f := v.Block.Func; f.pass.debug > 0 {
 814                 f.Warnl(v.Pos, "de-virtualizing call")
 815         }
 816         return v
 817 }
 818
 819 // isSamePtr reports whether p1 and p2 point to the same address.
 820 func isSamePtr(p1, p2 *Value) bool {
 821         if p1 == p2 {
 822                 return true
 823         }
 824         if p1.Op != p2.Op {
 825                 return false
 826         }
 827         switch p1.Op {
 828         case OpOffPtr:
 829                 return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
 830         case OpAddr, OpLocalAddr:
 831                 return p1.Aux == p2.Aux
 832         case OpAddPtr:
 833                 return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
 834         }
 835         return false
 836 }
 837
 838 func isStackPtr(v *Value) bool {
 839         for v.Op == OpOffPtr || v.Op == OpAddPtr {
 840                 v = v.Args[0]
 841         }
 842         return v.Op == OpSP || v.Op == OpLocalAddr
 843 }
 844
 845 // disjoint reports whether the memory region specified by [p1:p1+n1)
 846 // does not overlap with [p2:p2+n2).
 847 // A return value of false does not imply the regions overlap.
 848 func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool {
 849         if n1 == 0 || n2 == 0 {
 850                 return true
 851         }
 852         if p1 == p2 {
 853                 return false
 854         }
 855         baseAndOffset := func(ptr *Value) (base *Value, offset int64) {
 856                 base, offset = ptr, 0
 857                 for base.Op == OpOffPtr {
 858                         offset += base.AuxInt
 859                         base = base.Args[0]
 860                 }
 861                 return base, offset
 862         }
 863         p1, off1 := baseAndOffset(p1)
 864         p2, off2 := baseAndOffset(p2)
 865         if isSamePtr(p1, p2) {
 866                 return !overlap(off1, n1, off2, n2)
 867         }
 868         // p1 and p2 are not the same, so if they are both OpAddrs then
 869         // they point to different variables.
 870         // If one pointer is on the stack and the other is an argument
 871         // then they can't overlap.
 872         switch p1.Op {
 873         case OpAddr, OpLocalAddr:
 874                 if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP {
 875                         return true
 876                 }
 877                 return (p2.Op == OpArg || p2.Op == OpArgIntReg) && p1.Args[0].Op == OpSP
 878         case OpArg, OpArgIntReg:
 879                 if p2.Op == OpSP || p2.Op == OpLocalAddr {
 880                         return true
 881                 }
 882         case OpSP:
 883                 return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpArgIntReg || p2.Op == OpSP
 884         }
 885         return false
 886 }
 887
 888 // moveSize returns the number of bytes an aligned MOV instruction moves.
 889 func moveSize(align int64, c *Config) int64 {
 890         switch {
 891         case align%8 == 0 && c.PtrSize == 8:
 892                 return 8
 893         case align%4 == 0:
 894                 return 4
 895         case align%2 == 0:
 896                 return 2
 897         }
 898         return 1
 899 }
 900
 901 // mergePoint finds a block among a's blocks which dominates b and is itself
 902 // dominated by all of a's blocks. Returns nil if it can't find one.
 903 // Might return nil even if one does exist.
 904 func mergePoint(b *Block, a ...*Value) *Block {
 905         // Walk backward from b looking for one of the a's blocks.
 906
 907         // Max distance
 908         d := 100
 909
 910         for d > 0 {
 911                 for _, x := range a {
 912                         if b == x.Block {
 913                                 goto found
 914                         }
 915                 }
 916                 if len(b.Preds) > 1 {
 917                         // Don't know which way to go back. Abort.
 918                         return nil
 919                 }
 920                 b = b.Preds[0].b
 921                 d--
 922         }
 923         return nil // too far away
 924 found:
 925         // At this point, r is the first value in a that we find by walking backwards.
 926         // if we return anything, r will be it.
 927         r := b
 928
 929         // Keep going, counting the other a's that we find. They must all dominate r.
 930         na := 0
 931         for d > 0 {
 932                 for _, x := range a {
 933                         if b == x.Block {
 934                                 na++
 935                         }
 936                 }
 937                 if na == len(a) {
 938                         // Found all of a in a backwards walk. We can return r.
 939                         return r
 940                 }
 941                 if len(b.Preds) > 1 {
 942                         return nil
 943                 }
 944                 b = b.Preds[0].b
 945                 d--
 946
 947         }
 948         return nil // too far away
 949 }
 950
 951 // clobber invalidates values. Returns true.
 952 // clobber is used by rewrite rules to:
 953 //
 954 //      A) make sure the values are really dead and never used again.
 955 //      B) decrement use counts of the values' args.
 956 func clobber(vv ...*Value) bool {
 957         for _, v := range vv {
 958                 v.reset(OpInvalid)
 959                 // Note: leave v.Block intact.  The Block field is used after clobber.
 960         }
 961         return true
 962 }
 963
 964 // clobberIfDead resets v when use count is 1. Returns true.
 965 // clobberIfDead is used by rewrite rules to decrement
 966 // use counts of v's args when v is dead and never used.
 967 func clobberIfDead(v *Value) bool {
 968         if v.Uses == 1 {
 969                 v.reset(OpInvalid)
 970         }
 971         // Note: leave v.Block intact.  The Block field is used after clobberIfDead.
 972         return true
 973 }
 974
 975 // noteRule is an easy way to track if a rule is matched when writing
 976 // new ones.  Make the rule of interest also conditional on
 977 //
 978 //      noteRule("note to self: rule of interest matched")
 979 //
 980 // and that message will print when the rule matches.
 981 func noteRule(s string) bool {
 982         fmt.Println(s)
 983         return true
 984 }
 985
 986 // countRule increments Func.ruleMatches[key].
 987 // If Func.ruleMatches is non-nil at the end
 988 // of compilation, it will be printed to stdout.
 989 // This is intended to make it easier to find which functions
 990 // which contain lots of rules matches when developing new rules.
 991 func countRule(v *Value, key string) bool {
 992         f := v.Block.Func
 993         if f.ruleMatches == nil {
 994                 f.ruleMatches = make(map[string]int)
 995         }
 996         f.ruleMatches[key]++
 997         return true
 998 }
 999
1000 // warnRule generates compiler debug output with string s when
1001 // v is not in autogenerated code, cond is true and the rule has fired.
1002 func warnRule(cond bool, v *Value, s string) bool {
1003         if pos := v.Pos; pos.Line() > 1 && cond {
1004                 v.Block.Func.Warnl(pos, s)
1005         }
1006         return true
1007 }
1008
1009 // for a pseudo-op like (LessThan x), extract x.
1010 func flagArg(v *Value) *Value {
1011         if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() {
1012                 return nil
1013         }
1014         return v.Args[0]
1015 }
1016
1017 // arm64Negate finds the complement to an ARM64 condition code,
1018 // for example !Equal -> NotEqual or !LessThan -> GreaterEqual
1019 //
1020 // For floating point, it's more subtle because NaN is unordered. We do
1021 // !LessThanF -> NotLessThanF, the latter takes care of NaNs.
1022 func arm64Negate(op Op) Op {
1023         switch op {
1024         case OpARM64LessThan:
1025                 return OpARM64GreaterEqual
1026         case OpARM64LessThanU:
1027                 return OpARM64GreaterEqualU
1028         case OpARM64GreaterThan:
1029                 return OpARM64LessEqual
1030         case OpARM64GreaterThanU:
1031                 return OpARM64LessEqualU
1032         case OpARM64LessEqual:
1033                 return OpARM64GreaterThan
1034         case OpARM64LessEqualU:
1035                 return OpARM64GreaterThanU
1036         case OpARM64GreaterEqual:
1037                 return OpARM64LessThan
1038         case OpARM64GreaterEqualU:
1039                 return OpARM64LessThanU
1040         case OpARM64Equal:
1041                 return OpARM64NotEqual
1042         case OpARM64NotEqual:
1043                 return OpARM64Equal
1044         case OpARM64LessThanF:
1045                 return OpARM64NotLessThanF
1046         case OpARM64NotLessThanF:
1047                 return OpARM64LessThanF
1048         case OpARM64LessEqualF:
1049                 return OpARM64NotLessEqualF
1050         case OpARM64NotLessEqualF:
1051                 return OpARM64LessEqualF
1052         case OpARM64GreaterThanF:
1053                 return OpARM64NotGreaterThanF
1054         case OpARM64NotGreaterThanF:
1055                 return OpARM64GreaterThanF
1056         case OpARM64GreaterEqualF:
1057                 return OpARM64NotGreaterEqualF
1058         case OpARM64NotGreaterEqualF:
1059                 return OpARM64GreaterEqualF
1060         default:
1061                 panic("unreachable")
1062         }
1063 }
1064
1065 // arm64Invert evaluates (InvertFlags op), which
1066 // is the same as altering the condition codes such
1067 // that the same result would be produced if the arguments
1068 // to the flag-generating instruction were reversed, e.g.
1069 // (InvertFlags (CMP x y)) -> (CMP y x)
1070 func arm64Invert(op Op) Op {
1071         switch op {
1072         case OpARM64LessThan:
1073                 return OpARM64GreaterThan
1074         case OpARM64LessThanU:
1075                 return OpARM64GreaterThanU
1076         case OpARM64GreaterThan:
1077                 return OpARM64LessThan
1078         case OpARM64GreaterThanU:
1079                 return OpARM64LessThanU
1080         case OpARM64LessEqual:
1081                 return OpARM64GreaterEqual
1082         case OpARM64LessEqualU:
1083                 return OpARM64GreaterEqualU
1084         case OpARM64GreaterEqual:
1085                 return OpARM64LessEqual
1086         case OpARM64GreaterEqualU:
1087                 return OpARM64LessEqualU
1088         case OpARM64Equal, OpARM64NotEqual:
1089                 return op
1090         case OpARM64LessThanF:
1091                 return OpARM64GreaterThanF
1092         case OpARM64GreaterThanF:
1093                 return OpARM64LessThanF
1094         case OpARM64LessEqualF:
1095                 return OpARM64GreaterEqualF
1096         case OpARM64GreaterEqualF:
1097                 return OpARM64LessEqualF
1098         case OpARM64NotLessThanF:
1099                 return OpARM64NotGreaterThanF
1100         case OpARM64NotGreaterThanF:
1101                 return OpARM64NotLessThanF
1102         case OpARM64NotLessEqualF:
1103                 return OpARM64NotGreaterEqualF
1104         case OpARM64NotGreaterEqualF:
1105                 return OpARM64NotLessEqualF
1106         default:
1107                 panic("unreachable")
1108         }
1109 }
1110
1111 // evaluate an ARM64 op against a flags value
1112 // that is potentially constant; return 1 for true,
1113 // -1 for false, and 0 for not constant.
1114 func ccARM64Eval(op Op, flags *Value) int {
1115         fop := flags.Op
1116         if fop == OpARM64InvertFlags {
1117                 return -ccARM64Eval(op, flags.Args[0])
1118         }
1119         if fop != OpARM64FlagConstant {
1120                 return 0
1121         }
1122         fc := flagConstant(flags.AuxInt)
1123         b2i := func(b bool) int {
1124                 if b {
1125                         return 1
1126                 }
1127                 return -1
1128         }
1129         switch op {
1130         case OpARM64Equal:
1131                 return b2i(fc.eq())
1132         case OpARM64NotEqual:
1133                 return b2i(fc.ne())
1134         case OpARM64LessThan:
1135                 return b2i(fc.lt())
1136         case OpARM64LessThanU:
1137                 return b2i(fc.ult())
1138         case OpARM64GreaterThan:
1139                 return b2i(fc.gt())
1140         case OpARM64GreaterThanU:
1141                 return b2i(fc.ugt())
1142         case OpARM64LessEqual:
1143                 return b2i(fc.le())
1144         case OpARM64LessEqualU:
1145                 return b2i(fc.ule())
1146         case OpARM64GreaterEqual:
1147                 return b2i(fc.ge())
1148         case OpARM64GreaterEqualU:
1149                 return b2i(fc.uge())
1150         }
1151         return 0
1152 }
1153
1154 // logRule logs the use of the rule s. This will only be enabled if
1155 // rewrite rules were generated with the -log option, see _gen/rulegen.go.
1156 func logRule(s string) {
1157         if ruleFile == nil {
1158                 // Open a log file to write log to. We open in append
1159                 // mode because all.bash runs the compiler lots of times,
1160                 // and we want the concatenation of all of those logs.
1161                 // This means, of course, that users need to rm the old log
1162                 // to get fresh data.
1163                 // TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow?
1164                 w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"),
1165                         os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
1166                 if err != nil {
1167                         panic(err)
1168                 }
1169                 ruleFile = w
1170         }
1171         _, err := fmt.Fprintln(ruleFile, s)
1172         if err != nil {
1173                 panic(err)
1174         }
1175 }
1176
1177 var ruleFile io.Writer
1178
1179 func min(x, y int64) int64 {
1180         if x < y {
1181                 return x
1182         }
1183         return y
1184 }
1185
1186 func isConstZero(v *Value) bool {
1187         switch v.Op {
1188         case OpConstNil:
1189                 return true
1190         case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F:
1191                 return v.AuxInt == 0
1192         }
1193         return false
1194 }
1195
1196 // reciprocalExact64 reports whether 1/c is exactly representable.
1197 func reciprocalExact64(c float64) bool {
1198         b := math.Float64bits(c)
1199         man := b & (1<<52 - 1)
1200         if man != 0 {
1201                 return false // not a power of 2, denormal, or NaN
1202         }
1203         exp := b >> 52 & (1<<11 - 1)
1204         // exponent bias is 0x3ff.  So taking the reciprocal of a number
1205         // changes the exponent to 0x7fe-exp.
1206         switch exp {
1207         case 0:
1208                 return false // ±0
1209         case 0x7ff:
1210                 return false // ±inf
1211         case 0x7fe:
1212                 return false // exponent is not representable
1213         default:
1214                 return true
1215         }
1216 }
1217
1218 // reciprocalExact32 reports whether 1/c is exactly representable.
1219 func reciprocalExact32(c float32) bool {
1220         b := math.Float32bits(c)
1221         man := b & (1<<23 - 1)
1222         if man != 0 {
1223                 return false // not a power of 2, denormal, or NaN
1224         }
1225         exp := b >> 23 & (1<<8 - 1)
1226         // exponent bias is 0x7f.  So taking the reciprocal of a number
1227         // changes the exponent to 0xfe-exp.
1228         switch exp {
1229         case 0:
1230                 return false // ±0
1231         case 0xff:
1232                 return false // ±inf
1233         case 0xfe:
1234                 return false // exponent is not representable
1235         default:
1236                 return true
1237         }
1238 }
1239
1240 // check if an immediate can be directly encoded into an ARM's instruction.
1241 func isARMImmRot(v uint32) bool {
1242         for i := 0; i < 16; i++ {
1243                 if v&^0xff == 0 {
1244                         return true
1245                 }
1246                 v = v<<2 | v>>30
1247         }
1248
1249         return false
1250 }
1251
1252 // overlap reports whether the ranges given by the given offset and
1253 // size pairs overlap.
1254 func overlap(offset1, size1, offset2, size2 int64) bool {
1255         if offset1 >= offset2 && offset2+size2 > offset1 {
1256                 return true
1257         }
1258         if offset2 >= offset1 && offset1+size1 > offset2 {
1259                 return true
1260         }
1261         return false
1262 }
1263
1264 func areAdjacentOffsets(off1, off2, size int64) bool {
1265         return off1+size == off2 || off1 == off2+size
1266 }
1267
1268 // check if value zeroes out upper 32-bit of 64-bit register.
1269 // depth limits recursion depth. In AMD64.rules 3 is used as limit,
1270 // because it catches same amount of cases as 4.
1271 func zeroUpper32Bits(x *Value, depth int) bool {
1272         switch x.Op {
1273         case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1,
1274                 OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1,
1275                 OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload,
1276                 OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL,
1277                 OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst,
1278                 OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst,
1279                 OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL,
1280                 OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst,
1281                 OpAMD64SHLL, OpAMD64SHLLconst:
1282                 return true
1283         case OpARM64REV16W, OpARM64REVW, OpARM64RBITW, OpARM64CLZW, OpARM64EXTRWconst,
1284                 OpARM64MULW, OpARM64MNEGW, OpARM64UDIVW, OpARM64DIVW, OpARM64UMODW,
1285                 OpARM64MADDW, OpARM64MSUBW, OpARM64RORW, OpARM64RORWconst:
1286                 return true
1287         case OpArg:
1288                 return x.Type.Size() == 4
1289         case OpPhi, OpSelect0, OpSelect1:
1290                 // Phis can use each-other as an arguments, instead of tracking visited values,
1291                 // just limit recursion depth.
1292                 if depth <= 0 {
1293                         return false
1294                 }
1295                 for i := range x.Args {
1296                         if !zeroUpper32Bits(x.Args[i], depth-1) {
1297                                 return false
1298                         }
1299                 }
1300                 return true
1301
1302         }
1303         return false
1304 }
1305
1306 // zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits.
1307 func zeroUpper48Bits(x *Value, depth int) bool {
1308         switch x.Op {
1309         case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2:
1310                 return true
1311         case OpArg:
1312                 return x.Type.Size() == 2
1313         case OpPhi, OpSelect0, OpSelect1:
1314                 // Phis can use each-other as an arguments, instead of tracking visited values,
1315                 // just limit recursion depth.
1316                 if depth <= 0 {
1317                         return false
1318                 }
1319                 for i := range x.Args {
1320                         if !zeroUpper48Bits(x.Args[i], depth-1) {
1321                                 return false
1322                         }
1323                 }
1324                 return true
1325
1326         }
1327         return false
1328 }
1329
1330 // zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits.
1331 func zeroUpper56Bits(x *Value, depth int) bool {
1332         switch x.Op {
1333         case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1:
1334                 return true
1335         case OpArg:
1336                 return x.Type.Size() == 1
1337         case OpPhi, OpSelect0, OpSelect1:
1338                 // Phis can use each-other as an arguments, instead of tracking visited values,
1339                 // just limit recursion depth.
1340                 if depth <= 0 {
1341                         return false
1342                 }
1343                 for i := range x.Args {
1344                         if !zeroUpper56Bits(x.Args[i], depth-1) {
1345                                 return false
1346                         }
1347                 }
1348                 return true
1349
1350         }
1351         return false
1352 }
1353
1354 func isInlinableMemclr(c *Config, sz int64) bool {
1355         if sz < 0 {
1356                 return false
1357         }
1358         // TODO: expand this check to allow other architectures
1359         // see CL 454255 and issue 56997
1360         switch c.arch {
1361         case "amd64", "arm64":
1362                 return true
1363         case "ppc64le", "ppc64":
1364                 return sz < 512
1365         }
1366         return false
1367 }
1368
1369 // isInlinableMemmove reports whether the given arch performs a Move of the given size
1370 // faster than memmove. It will only return true if replacing the memmove with a Move is
1371 // safe, either because Move will do all of its loads before any of its stores, or
1372 // because the arguments are known to be disjoint.
1373 // This is used as a check for replacing memmove with Move ops.
1374 func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
1375         // It is always safe to convert memmove into Move when its arguments are disjoint.
1376         // Move ops may or may not be faster for large sizes depending on how the platform
1377         // lowers them, so we only perform this optimization on platforms that we know to
1378         // have fast Move ops.
1379         switch c.arch {
1380         case "amd64":
1381                 return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz))
1382         case "386", "arm64":
1383                 return sz <= 8
1384         case "s390x", "ppc64", "ppc64le":
1385                 return sz <= 8 || disjoint(dst, sz, src, sz)
1386         case "arm", "loong64", "mips", "mips64", "mipsle", "mips64le":
1387                 return sz <= 4
1388         }
1389         return false
1390 }
1391 func IsInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
1392         return isInlinableMemmove(dst, src, sz, c)
1393 }
1394
1395 // logLargeCopy logs the occurrence of a large copy.
1396 // The best place to do this is in the rewrite rules where the size of the move is easy to find.
1397 // "Large" is arbitrarily chosen to be 128 bytes; this may change.
1398 func logLargeCopy(v *Value, s int64) bool {
1399         if s < 128 {
1400                 return true
1401         }
1402         if logopt.Enabled() {
1403                 logopt.LogOpt(v.Pos, "copy", "lower", v.Block.Func.Name, fmt.Sprintf("%d bytes", s))
1404         }
1405         return true
1406 }
1407 func LogLargeCopy(funcName string, pos src.XPos, s int64) {
1408         if s < 128 {
1409                 return
1410         }
1411         if logopt.Enabled() {
1412                 logopt.LogOpt(pos, "copy", "lower", funcName, fmt.Sprintf("%d bytes", s))
1413         }
1414 }
1415
1416 // hasSmallRotate reports whether the architecture has rotate instructions
1417 // for sizes < 32-bit.  This is used to decide whether to promote some rotations.
1418 func hasSmallRotate(c *Config) bool {
1419         switch c.arch {
1420         case "amd64", "386":
1421                 return true
1422         default:
1423                 return false
1424         }
1425 }
1426
1427 func newPPC64ShiftAuxInt(sh, mb, me, sz int64) int32 {
1428         if sh < 0 || sh >= sz {
1429                 panic("PPC64 shift arg sh out of range")
1430         }
1431         if mb < 0 || mb >= sz {
1432                 panic("PPC64 shift arg mb out of range")
1433         }
1434         if me < 0 || me >= sz {
1435                 panic("PPC64 shift arg me out of range")
1436         }
1437         return int32(sh<<16 | mb<<8 | me)
1438 }
1439
1440 func GetPPC64Shiftsh(auxint int64) int64 {
1441         return int64(int8(auxint >> 16))
1442 }
1443
1444 func GetPPC64Shiftmb(auxint int64) int64 {
1445         return int64(int8(auxint >> 8))
1446 }
1447
1448 func GetPPC64Shiftme(auxint int64) int64 {
1449         return int64(int8(auxint))
1450 }
1451
1452 // Test if this value can encoded as a mask for a rlwinm like
1453 // operation.  Masks can also extend from the msb and wrap to
1454 // the lsb too.  That is, the valid masks are 32 bit strings
1455 // of the form: 0..01..10..0 or 1..10..01..1 or 1...1
1456 func isPPC64WordRotateMask(v64 int64) bool {
1457         // Isolate rightmost 1 (if none 0) and add.
1458         v := uint32(v64)
1459         vp := (v & -v) + v
1460         // Likewise, for the wrapping case.
1461         vn := ^v
1462         vpn := (vn & -vn) + vn
1463         return (v&vp == 0 || vn&vpn == 0) && v != 0
1464 }
1465
1466 // Compress mask and shift into single value of the form
1467 // me | mb<<8 | rotate<<16 | nbits<<24 where me and mb can
1468 // be used to regenerate the input mask.
1469 func encodePPC64RotateMask(rotate, mask, nbits int64) int64 {
1470         var mb, me, mbn, men int
1471
1472         // Determine boundaries and then decode them
1473         if mask == 0 || ^mask == 0 || rotate >= nbits {
1474                 panic("Invalid PPC64 rotate mask")
1475         } else if nbits == 32 {
1476                 mb = bits.LeadingZeros32(uint32(mask))
1477                 me = 32 - bits.TrailingZeros32(uint32(mask))
1478                 mbn = bits.LeadingZeros32(^uint32(mask))
1479                 men = 32 - bits.TrailingZeros32(^uint32(mask))
1480         } else {
1481                 mb = bits.LeadingZeros64(uint64(mask))
1482                 me = 64 - bits.TrailingZeros64(uint64(mask))
1483                 mbn = bits.LeadingZeros64(^uint64(mask))
1484                 men = 64 - bits.TrailingZeros64(^uint64(mask))
1485         }
1486         // Check for a wrapping mask (e.g bits at 0 and 63)
1487         if mb == 0 && me == int(nbits) {
1488                 // swap the inverted values
1489                 mb, me = men, mbn
1490         }
1491
1492         return int64(me) | int64(mb<<8) | int64(rotate<<16) | int64(nbits<<24)
1493 }
1494
1495 // DecodePPC64RotateMask is the inverse operation of encodePPC64RotateMask.  The values returned as
1496 // mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask.
1497 func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) {
1498         auxint := uint64(sauxint)
1499         rotate = int64((auxint >> 16) & 0xFF)
1500         mb = int64((auxint >> 8) & 0xFF)
1501         me = int64((auxint >> 0) & 0xFF)
1502         nbits := int64((auxint >> 24) & 0xFF)
1503         mask = ((1 << uint(nbits-mb)) - 1) ^ ((1 << uint(nbits-me)) - 1)
1504         if mb > me {
1505                 mask = ^mask
1506         }
1507         if nbits == 32 {
1508                 mask = uint64(uint32(mask))
1509         }
1510
1511         // Fixup ME to match ISA definition.  The second argument to MASK(..,me)
1512         // is inclusive.
1513         me = (me - 1) & (nbits - 1)
1514         return
1515 }
1516
1517 // This verifies that the mask is a set of
1518 // consecutive bits including the least
1519 // significant bit.
1520 func isPPC64ValidShiftMask(v int64) bool {
1521         if (v != 0) && ((v+1)&v) == 0 {
1522                 return true
1523         }
1524         return false
1525 }
1526
1527 func getPPC64ShiftMaskLength(v int64) int64 {
1528         return int64(bits.Len64(uint64(v)))
1529 }
1530
1531 // Decompose a shift right into an equivalent rotate/mask,
1532 // and return mask & m.
1533 func mergePPC64RShiftMask(m, s, nbits int64) int64 {
1534         smask := uint64((1<<uint(nbits))-1) >> uint(s)
1535         return m & int64(smask)
1536 }
1537
1538 // Combine (ANDconst [m] (SRWconst [s])) into (RLWINM [y]) or return 0
1539 func mergePPC64AndSrwi(m, s int64) int64 {
1540         mask := mergePPC64RShiftMask(m, s, 32)
1541         if !isPPC64WordRotateMask(mask) {
1542                 return 0
1543         }
1544         return encodePPC64RotateMask((32-s)&31, mask, 32)
1545 }
1546
1547 // Test if a shift right feeding into a CLRLSLDI can be merged into RLWINM.
1548 // Return the encoded RLWINM constant, or 0 if they cannot be merged.
1549 func mergePPC64ClrlsldiSrw(sld, srw int64) int64 {
1550         mask_1 := uint64(0xFFFFFFFF >> uint(srw))
1551         // for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left.
1552         mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
1553
1554         // Rewrite mask to apply after the final left shift.
1555         mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
1556
1557         r_1 := 32 - srw
1558         r_2 := GetPPC64Shiftsh(sld)
1559         r_3 := (r_1 + r_2) & 31 // This can wrap.
1560
1561         if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
1562                 return 0
1563         }
1564         return encodePPC64RotateMask(int64(r_3), int64(mask_3), 32)
1565 }
1566
1567 // Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM.  Return
1568 // the encoded RLWINM constant, or 0 if they cannot be merged.
1569 func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 {
1570         r_1, _, _, mask_1 := DecodePPC64RotateMask(rlw)
1571         // for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left.
1572         mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
1573
1574         // combine the masks, and adjust for the final left shift.
1575         mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(int64(sld)))
1576         r_2 := GetPPC64Shiftsh(int64(sld))
1577         r_3 := (r_1 + r_2) & 31 // This can wrap.
1578
1579         // Verify the result is still a valid bitmask of <= 32 bits.
1580         if !isPPC64WordRotateMask(int64(mask_3)) || uint64(uint32(mask_3)) != mask_3 {
1581                 return 0
1582         }
1583         return encodePPC64RotateMask(r_3, int64(mask_3), 32)
1584 }
1585
1586 // Compute the encoded RLWINM constant from combining (SLDconst [sld] (SRWconst [srw] x)),
1587 // or return 0 if they cannot be combined.
1588 func mergePPC64SldiSrw(sld, srw int64) int64 {
1589         if sld > srw || srw >= 32 {
1590                 return 0
1591         }
1592         mask_r := uint32(0xFFFFFFFF) >> uint(srw)
1593         mask_l := uint32(0xFFFFFFFF) >> uint(sld)
1594         mask := (mask_r & mask_l) << uint(sld)
1595         return encodePPC64RotateMask((32-srw+sld)&31, int64(mask), 32)
1596 }
1597
1598 // Convenience function to rotate a 32 bit constant value by another constant.
1599 func rotateLeft32(v, rotate int64) int64 {
1600         return int64(bits.RotateLeft32(uint32(v), int(rotate)))
1601 }
1602
1603 func rotateRight64(v, rotate int64) int64 {
1604         return int64(bits.RotateLeft64(uint64(v), int(-rotate)))
1605 }
1606
1607 // encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format.
1608 func armBFAuxInt(lsb, width int64) arm64BitField {
1609         if lsb < 0 || lsb > 63 {
1610                 panic("ARM(64) bit field lsb constant out of range")
1611         }
1612         if width < 1 || lsb+width > 64 {
1613                 panic("ARM(64) bit field width constant out of range")
1614         }
1615         return arm64BitField(width | lsb<<8)
1616 }
1617
1618 // returns the lsb part of the auxInt field of arm64 bitfield ops.
1619 func (bfc arm64BitField) getARM64BFlsb() int64 {
1620         return int64(uint64(bfc) >> 8)
1621 }
1622
1623 // returns the width part of the auxInt field of arm64 bitfield ops.
1624 func (bfc arm64BitField) getARM64BFwidth() int64 {
1625         return int64(bfc) & 0xff
1626 }
1627
1628 // checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask.
1629 func isARM64BFMask(lsb, mask, rshift int64) bool {
1630         shiftedMask := int64(uint64(mask) >> uint64(rshift))
1631         return shiftedMask != 0 && isPowerOfTwo64(shiftedMask+1) && nto(shiftedMask)+lsb < 64
1632 }
1633
1634 // returns the bitfield width of mask >> rshift for arm64 bitfield ops.
1635 func arm64BFWidth(mask, rshift int64) int64 {
1636         shiftedMask := int64(uint64(mask) >> uint64(rshift))
1637         if shiftedMask == 0 {
1638                 panic("ARM64 BF mask is zero")
1639         }
1640         return nto(shiftedMask)
1641 }
1642
1643 // sizeof returns the size of t in bytes.
1644 // It will panic if t is not a *types.Type.
1645 func sizeof(t interface{}) int64 {
1646         return t.(*types.Type).Size()
1647 }
1648
1649 // registerizable reports whether t is a primitive type that fits in
1650 // a register. It assumes float64 values will always fit into registers
1651 // even if that isn't strictly true.
1652 func registerizable(b *Block, typ *types.Type) bool {
1653         if typ.IsPtrShaped() || typ.IsFloat() || typ.IsBoolean() {
1654                 return true
1655         }
1656         if typ.IsInteger() {
1657                 return typ.Size() <= b.Func.Config.RegSize
1658         }
1659         return false
1660 }
1661
1662 // needRaceCleanup reports whether this call to racefuncenter/exit isn't needed.
1663 func needRaceCleanup(sym *AuxCall, v *Value) bool {
1664         f := v.Block.Func
1665         if !f.Config.Race {
1666                 return false
1667         }
1668         if !isSameCall(sym, "runtime.racefuncenter") && !isSameCall(sym, "runtime.racefuncexit") {
1669                 return false
1670         }
1671         for _, b := range f.Blocks {
1672                 for _, v := range b.Values {
1673                         switch v.Op {
1674                         case OpStaticCall, OpStaticLECall:
1675                                 // Check for racefuncenter will encounter racefuncexit and vice versa.
1676                                 // Allow calls to panic*
1677                                 s := v.Aux.(*AuxCall).Fn.String()
1678                                 switch s {
1679                                 case "runtime.racefuncenter", "runtime.racefuncexit",
1680                                         "runtime.panicdivide", "runtime.panicwrap",
1681                                         "runtime.panicshift":
1682                                         continue
1683                                 }
1684                                 // If we encountered any call, we need to keep racefunc*,
1685                                 // for accurate stacktraces.
1686                                 return false
1687                         case OpPanicBounds, OpPanicExtend:
1688                                 // Note: these are panic generators that are ok (like the static calls above).
1689                         case OpClosureCall, OpInterCall, OpClosureLECall, OpInterLECall:
1690                                 // We must keep the race functions if there are any other call types.
1691                                 return false
1692                         }
1693                 }
1694         }
1695         if isSameCall(sym, "runtime.racefuncenter") {
1696                 // TODO REGISTER ABI this needs to be cleaned up.
1697                 // If we're removing racefuncenter, remove its argument as well.
1698                 if v.Args[0].Op != OpStore {
1699                         if v.Op == OpStaticLECall {
1700                                 // there is no store, yet.
1701                                 return true
1702                         }
1703                         return false
1704                 }
1705                 mem := v.Args[0].Args[2]
1706                 v.Args[0].reset(OpCopy)
1707                 v.Args[0].AddArg(mem)
1708         }
1709         return true
1710 }
1711
1712 // symIsRO reports whether sym is a read-only global.
1713 func symIsRO(sym interface{}) bool {
1714         lsym := sym.(*obj.LSym)
1715         return lsym.Type == objabi.SRODATA && len(lsym.R) == 0
1716 }
1717
1718 // symIsROZero reports whether sym is a read-only global whose data contains all zeros.
1719 func symIsROZero(sym Sym) bool {
1720         lsym := sym.(*obj.LSym)
1721         if lsym.Type != objabi.SRODATA || len(lsym.R) != 0 {
1722                 return false
1723         }
1724         for _, b := range lsym.P {
1725                 if b != 0 {
1726                         return false
1727                 }
1728         }
1729         return true
1730 }
1731
1732 // isFixed32 returns true if the int32 at offset off in symbol sym
1733 // is known and constant.
1734 func isFixed32(c *Config, sym Sym, off int64) bool {
1735         return isFixed(c, sym, off, 4)
1736 }
1737
1738 // isFixed returns true if the range [off,off+size] of the symbol sym
1739 // is known and constant.
1740 func isFixed(c *Config, sym Sym, off, size int64) bool {
1741         lsym := sym.(*obj.LSym)
1742         if lsym.Extra == nil {
1743                 return false
1744         }
1745         if _, ok := (*lsym.Extra).(*obj.TypeInfo); ok {
1746                 if off == 2*c.PtrSize && size == 4 {
1747                         return true // type hash field
1748                 }
1749         }
1750         return false
1751 }
1752 func fixed32(c *Config, sym Sym, off int64) int32 {
1753         lsym := sym.(*obj.LSym)
1754         if ti, ok := (*lsym.Extra).(*obj.TypeInfo); ok {
1755                 if off == 2*c.PtrSize {
1756                         return int32(types.TypeHash(ti.Type.(*types.Type)))
1757                 }
1758         }
1759         base.Fatalf("fixed32 data not known for %s:%d", sym, off)
1760         return 0
1761 }
1762
1763 // isFixedSym returns true if the contents of sym at the given offset
1764 // is known and is the constant address of another symbol.
1765 func isFixedSym(sym Sym, off int64) bool {
1766         lsym := sym.(*obj.LSym)
1767         switch {
1768         case lsym.Type == objabi.SRODATA:
1769                 // itabs, dictionaries
1770         default:
1771                 return false
1772         }
1773         for _, r := range lsym.R {
1774                 if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off && r.Add == 0 {
1775                         return true
1776                 }
1777         }
1778         return false
1779 }
1780 func fixedSym(f *Func, sym Sym, off int64) Sym {
1781         lsym := sym.(*obj.LSym)
1782         for _, r := range lsym.R {
1783                 if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off {
1784                         if strings.HasPrefix(r.Sym.Name, "type:") {
1785                                 // In case we're loading a type out of a dictionary, we need to record
1786                                 // that the containing function might put that type in an interface.
1787                                 // That information is currently recorded in relocations in the dictionary,
1788                                 // but if we perform this load at compile time then the dictionary
1789                                 // might be dead.
1790                                 reflectdata.MarkTypeSymUsedInInterface(r.Sym, f.fe.Func().Linksym())
1791                         }
1792                         return r.Sym
1793                 }
1794         }
1795         base.Fatalf("fixedSym data not known for %s:%d", sym, off)
1796         return nil
1797 }
1798
1799 // read8 reads one byte from the read-only global sym at offset off.
1800 func read8(sym interface{}, off int64) uint8 {
1801         lsym := sym.(*obj.LSym)
1802         if off >= int64(len(lsym.P)) || off < 0 {
1803                 // Invalid index into the global sym.
1804                 // This can happen in dead code, so we don't want to panic.
1805                 // Just return any value, it will eventually get ignored.
1806                 // See issue 29215.
1807                 return 0
1808         }
1809         return lsym.P[off]
1810 }
1811
1812 // read16 reads two bytes from the read-only global sym at offset off.
1813 func read16(sym interface{}, off int64, byteorder binary.ByteOrder) uint16 {
1814         lsym := sym.(*obj.LSym)
1815         // lsym.P is written lazily.
1816         // Bytes requested after the end of lsym.P are 0.
1817         var src []byte
1818         if 0 <= off && off < int64(len(lsym.P)) {
1819                 src = lsym.P[off:]
1820         }
1821         buf := make([]byte, 2)
1822         copy(buf, src)
1823         return byteorder.Uint16(buf)
1824 }
1825
1826 // read32 reads four bytes from the read-only global sym at offset off.
1827 func read32(sym interface{}, off int64, byteorder binary.ByteOrder) uint32 {
1828         lsym := sym.(*obj.LSym)
1829         var src []byte
1830         if 0 <= off && off < int64(len(lsym.P)) {
1831                 src = lsym.P[off:]
1832         }
1833         buf := make([]byte, 4)
1834         copy(buf, src)
1835         return byteorder.Uint32(buf)
1836 }
1837
1838 // read64 reads eight bytes from the read-only global sym at offset off.
1839 func read64(sym interface{}, off int64, byteorder binary.ByteOrder) uint64 {
1840         lsym := sym.(*obj.LSym)
1841         var src []byte
1842         if 0 <= off && off < int64(len(lsym.P)) {
1843                 src = lsym.P[off:]
1844         }
1845         buf := make([]byte, 8)
1846         copy(buf, src)
1847         return byteorder.Uint64(buf)
1848 }
1849
1850 // sequentialAddresses reports true if it can prove that x + n == y
1851 func sequentialAddresses(x, y *Value, n int64) bool {
1852         if x == y && n == 0 {
1853                 return true
1854         }
1855         if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil &&
1856                 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1857                         x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1858                 return true
1859         }
1860         if x.Op == Op386LEAL1 && y.Op == Op386LEAL1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
1861                 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1862                         x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1863                 return true
1864         }
1865         if x.Op == OpAMD64ADDQ && y.Op == OpAMD64LEAQ1 && y.AuxInt == n && y.Aux == nil &&
1866                 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1867                         x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1868                 return true
1869         }
1870         if x.Op == OpAMD64LEAQ1 && y.Op == OpAMD64LEAQ1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
1871                 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1872                         x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1873                 return true
1874         }
1875         return false
1876 }
1877
1878 // flagConstant represents the result of a compile-time comparison.
1879 // The sense of these flags does not necessarily represent the hardware's notion
1880 // of a flags register - these are just a compile-time construct.
1881 // We happen to match the semantics to those of arm/arm64.
1882 // Note that these semantics differ from x86: the carry flag has the opposite
1883 // sense on a subtraction!
1884 //
1885 //      On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C.
1886 //      On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C.
1887 //       (because it does x + ^y + C).
1888 //
1889 // See https://en.wikipedia.org/wiki/Carry_flag#Vs._borrow_flag
1890 type flagConstant uint8
1891
1892 // N reports whether the result of an operation is negative (high bit set).
1893 func (fc flagConstant) N() bool {
1894         return fc&1 != 0
1895 }
1896
1897 // Z reports whether the result of an operation is 0.
1898 func (fc flagConstant) Z() bool {
1899         return fc&2 != 0
1900 }
1901
1902 // C reports whether an unsigned add overflowed (carry), or an
1903 // unsigned subtract did not underflow (borrow).
1904 func (fc flagConstant) C() bool {
1905         return fc&4 != 0
1906 }
1907
1908 // V reports whether a signed operation overflowed or underflowed.
1909 func (fc flagConstant) V() bool {
1910         return fc&8 != 0
1911 }
1912
1913 func (fc flagConstant) eq() bool {
1914         return fc.Z()
1915 }
1916 func (fc flagConstant) ne() bool {
1917         return !fc.Z()
1918 }
1919 func (fc flagConstant) lt() bool {
1920         return fc.N() != fc.V()
1921 }
1922 func (fc flagConstant) le() bool {
1923         return fc.Z() || fc.lt()
1924 }
1925 func (fc flagConstant) gt() bool {
1926         return !fc.Z() && fc.ge()
1927 }
1928 func (fc flagConstant) ge() bool {
1929         return fc.N() == fc.V()
1930 }
1931 func (fc flagConstant) ult() bool {
1932         return !fc.C()
1933 }
1934 func (fc flagConstant) ule() bool {
1935         return fc.Z() || fc.ult()
1936 }
1937 func (fc flagConstant) ugt() bool {
1938         return !fc.Z() && fc.uge()
1939 }
1940 func (fc flagConstant) uge() bool {
1941         return fc.C()
1942 }
1943
1944 func (fc flagConstant) ltNoov() bool {
1945         return fc.lt() && !fc.V()
1946 }
1947 func (fc flagConstant) leNoov() bool {
1948         return fc.le() && !fc.V()
1949 }
1950 func (fc flagConstant) gtNoov() bool {
1951         return fc.gt() && !fc.V()
1952 }
1953 func (fc flagConstant) geNoov() bool {
1954         return fc.ge() && !fc.V()
1955 }
1956
1957 func (fc flagConstant) String() string {
1958         return fmt.Sprintf("N=%v,Z=%v,C=%v,V=%v", fc.N(), fc.Z(), fc.C(), fc.V())
1959 }
1960
1961 type flagConstantBuilder struct {
1962         N bool
1963         Z bool
1964         C bool
1965         V bool
1966 }
1967
1968 func (fcs flagConstantBuilder) encode() flagConstant {
1969         var fc flagConstant
1970         if fcs.N {
1971                 fc |= 1
1972         }
1973         if fcs.Z {
1974                 fc |= 2
1975         }
1976         if fcs.C {
1977                 fc |= 4
1978         }
1979         if fcs.V {
1980                 fc |= 8
1981         }
1982         return fc
1983 }
1984
1985 // Note: addFlags(x,y) != subFlags(x,-y) in some situations:
1986 //  - the results of the C flag are different
1987 //  - the results of the V flag when y==minint are different
1988
1989 // addFlags64 returns the flags that would be set from computing x+y.
1990 func addFlags64(x, y int64) flagConstant {
1991         var fcb flagConstantBuilder
1992         fcb.Z = x+y == 0
1993         fcb.N = x+y < 0
1994         fcb.C = uint64(x+y) < uint64(x)
1995         fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
1996         return fcb.encode()
1997 }
1998
1999 // subFlags64 returns the flags that would be set from computing x-y.
2000 func subFlags64(x, y int64) flagConstant {
2001         var fcb flagConstantBuilder
2002         fcb.Z = x-y == 0
2003         fcb.N = x-y < 0
2004         fcb.C = uint64(y) <= uint64(x) // This code follows the arm carry flag model.
2005         fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
2006         return fcb.encode()
2007 }
2008
2009 // addFlags32 returns the flags that would be set from computing x+y.
2010 func addFlags32(x, y int32) flagConstant {
2011         var fcb flagConstantBuilder
2012         fcb.Z = x+y == 0
2013         fcb.N = x+y < 0
2014         fcb.C = uint32(x+y) < uint32(x)
2015         fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
2016         return fcb.encode()
2017 }
2018
2019 // subFlags32 returns the flags that would be set from computing x-y.
2020 func subFlags32(x, y int32) flagConstant {
2021         var fcb flagConstantBuilder
2022         fcb.Z = x-y == 0
2023         fcb.N = x-y < 0
2024         fcb.C = uint32(y) <= uint32(x) // This code follows the arm carry flag model.
2025         fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
2026         return fcb.encode()
2027 }
2028
2029 // logicFlags64 returns flags set to the sign/zeroness of x.
2030 // C and V are set to false.
2031 func logicFlags64(x int64) flagConstant {
2032         var fcb flagConstantBuilder
2033         fcb.Z = x == 0
2034         fcb.N = x < 0
2035         return fcb.encode()
2036 }
2037
2038 // logicFlags32 returns flags set to the sign/zeroness of x.
2039 // C and V are set to false.
2040 func logicFlags32(x int32) flagConstant {
2041         var fcb flagConstantBuilder
2042         fcb.Z = x == 0
2043         fcb.N = x < 0
2044         return fcb.encode()
2045 }
2046
2047 func makeJumpTableSym(b *Block) *obj.LSym {
2048         s := base.Ctxt.Lookup(fmt.Sprintf("%s.jump%d", b.Func.fe.Func().LSym.Name, b.ID))
2049         s.Set(obj.AttrDuplicateOK, true)
2050         s.Set(obj.AttrLocal, true)
2051         return s
2052 }
2053
2054 // canRotate reports whether the architecture supports
2055 // rotates of integer registers with the given number of bits.
2056 func canRotate(c *Config, bits int64) bool {
2057         if bits > c.PtrSize*8 {
2058                 // Don't rewrite to rotates bigger than the machine word.
2059                 return false
2060         }
2061         switch c.arch {
2062         case "386", "amd64", "arm64":
2063                 return true
2064         case "arm", "s390x", "ppc64", "ppc64le", "wasm", "loong64":
2065                 return bits >= 32
2066         default:
2067                 return false
2068         }
2069 }
2070
2071 // isARM64bitcon reports whether a constant can be encoded into a logical instruction.
2072 func isARM64bitcon(x uint64) bool {
2073         if x == 1<<64-1 || x == 0 {
2074                 return false
2075         }
2076         // determine the period and sign-extend a unit to 64 bits
2077         switch {
2078         case x != x>>32|x<<32:
2079                 // period is 64
2080                 // nothing to do
2081         case x != x>>16|x<<48:
2082                 // period is 32
2083                 x = uint64(int64(int32(x)))
2084         case x != x>>8|x<<56:
2085                 // period is 16
2086                 x = uint64(int64(int16(x)))
2087         case x != x>>4|x<<60:
2088                 // period is 8
2089                 x = uint64(int64(int8(x)))
2090         default:
2091                 // period is 4 or 2, always true
2092                 // 0001, 0010, 0100, 1000 -- 0001 rotate
2093                 // 0011, 0110, 1100, 1001 -- 0011 rotate
2094                 // 0111, 1011, 1101, 1110 -- 0111 rotate
2095                 // 0101, 1010             -- 01   rotate, repeat
2096                 return true
2097         }
2098         return sequenceOfOnes(x) || sequenceOfOnes(^x)
2099 }
2100
2101 // sequenceOfOnes tests whether a constant is a sequence of ones in binary, with leading and trailing zeros.
2102 func sequenceOfOnes(x uint64) bool {
2103         y := x & -x // lowest set bit of x. x is good iff x+y is a power of 2
2104         y += x
2105         return (y-1)&y == 0
2106 }
2107
2108 // isARM64addcon reports whether x can be encoded as the immediate value in an ADD or SUB instruction.
2109 func isARM64addcon(v int64) bool {
2110         /* uimm12 or uimm24? */
2111         if v < 0 {
2112                 return false
2113         }
2114         if (v & 0xFFF) == 0 {
2115                 v >>= 12
2116         }
2117         return v <= 0xFFF
2118 }