src/cmd/compile/internal/ssa/rewrite.go

   1 // Copyright 2015 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package ssa
   6
   7 import (
   8         "cmd/compile/internal/base"
   9         "cmd/compile/internal/logopt"
  10         "cmd/compile/internal/types"
  11         "cmd/internal/obj"
  12         "cmd/internal/obj/s390x"
  13         "cmd/internal/objabi"
  14         "cmd/internal/src"
  15         "encoding/binary"
  16         "fmt"
  17         "io"
  18         "math"
  19         "math/bits"
  20         "os"
  21         "path/filepath"
  22 )
  23
  24 type deadValueChoice bool
  25
  26 const (
  27         leaveDeadValues  deadValueChoice = false
  28         removeDeadValues                 = true
  29 )
  30
  31 // deadcode indicates whether rewrite should try to remove any values that become dead.
  32 func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValueChoice) {
  33         // repeat rewrites until we find no more rewrites
  34         pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block
  35         pendingLines.clear()
  36         debug := f.pass.debug
  37         if debug > 1 {
  38                 fmt.Printf("%s: rewriting for %s\n", f.pass.name, f.Name)
  39         }
  40         var iters int
  41         var states map[string]bool
  42         for {
  43                 change := false
  44                 deadChange := false
  45                 for _, b := range f.Blocks {
  46                         var b0 *Block
  47                         if debug > 1 {
  48                                 b0 = new(Block)
  49                                 *b0 = *b
  50                                 b0.Succs = append([]Edge{}, b.Succs...) // make a new copy, not aliasing
  51                         }
  52                         for i, c := range b.ControlValues() {
  53                                 for c.Op == OpCopy {
  54                                         c = c.Args[0]
  55                                         b.ReplaceControl(i, c)
  56                                 }
  57                         }
  58                         if rb(b) {
  59                                 change = true
  60                                 if debug > 1 {
  61                                         fmt.Printf("rewriting %s  ->  %s\n", b0.LongString(), b.LongString())
  62                                 }
  63                         }
  64                         for j, v := range b.Values {
  65                                 var v0 *Value
  66                                 if debug > 1 {
  67                                         v0 = new(Value)
  68                                         *v0 = *v
  69                                         v0.Args = append([]*Value{}, v.Args...) // make a new copy, not aliasing
  70                                 }
  71                                 if v.Uses == 0 && v.removeable() {
  72                                         if v.Op != OpInvalid && deadcode == removeDeadValues {
  73                                                 // Reset any values that are now unused, so that we decrement
  74                                                 // the use count of all of its arguments.
  75                                                 // Not quite a deadcode pass, because it does not handle cycles.
  76                                                 // But it should help Uses==1 rules to fire.
  77                                                 v.reset(OpInvalid)
  78                                                 deadChange = true
  79                                         }
  80                                         // No point rewriting values which aren't used.
  81                                         continue
  82                                 }
  83
  84                                 vchange := phielimValue(v)
  85                                 if vchange && debug > 1 {
  86                                         fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
  87                                 }
  88
  89                                 // Eliminate copy inputs.
  90                                 // If any copy input becomes unused, mark it
  91                                 // as invalid and discard its argument. Repeat
  92                                 // recursively on the discarded argument.
  93                                 // This phase helps remove phantom "dead copy" uses
  94                                 // of a value so that a x.Uses==1 rule condition
  95                                 // fires reliably.
  96                                 for i, a := range v.Args {
  97                                         if a.Op != OpCopy {
  98                                                 continue
  99                                         }
 100                                         aa := copySource(a)
 101                                         v.SetArg(i, aa)
 102                                         // If a, a copy, has a line boundary indicator, attempt to find a new value
 103                                         // to hold it.  The first candidate is the value that will replace a (aa),
 104                                         // if it shares the same block and line and is eligible.
 105                                         // The second option is v, which has a as an input.  Because aa is earlier in
 106                                         // the data flow, it is the better choice.
 107                                         if a.Pos.IsStmt() == src.PosIsStmt {
 108                                                 if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt {
 109                                                         aa.Pos = aa.Pos.WithIsStmt()
 110                                                 } else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt {
 111                                                         v.Pos = v.Pos.WithIsStmt()
 112                                                 } else {
 113                                                         // Record the lost line and look for a new home after all rewrites are complete.
 114                                                         // TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same
 115                                                         // line to appear in more than one block, but only one block is stored, so if both end
 116                                                         // up here, then one will be lost.
 117                                                         pendingLines.set(a.Pos, int32(a.Block.ID))
 118                                                 }
 119                                                 a.Pos = a.Pos.WithNotStmt()
 120                                         }
 121                                         vchange = true
 122                                         for a.Uses == 0 {
 123                                                 b := a.Args[0]
 124                                                 a.reset(OpInvalid)
 125                                                 a = b
 126                                         }
 127                                 }
 128                                 if vchange && debug > 1 {
 129                                         fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
 130                                 }
 131
 132                                 // apply rewrite function
 133                                 if rv(v) {
 134                                         vchange = true
 135                                         // If value changed to a poor choice for a statement boundary, move the boundary
 136                                         if v.Pos.IsStmt() == src.PosIsStmt {
 137                                                 if k := nextGoodStatementIndex(v, j, b); k != j {
 138                                                         v.Pos = v.Pos.WithNotStmt()
 139                                                         b.Values[k].Pos = b.Values[k].Pos.WithIsStmt()
 140                                                 }
 141                                         }
 142                                 }
 143
 144                                 change = change || vchange
 145                                 if vchange && debug > 1 {
 146                                         fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
 147                                 }
 148                         }
 149                 }
 150                 if !change && !deadChange {
 151                         break
 152                 }
 153                 iters++
 154                 if (iters > 1000 || debug >= 2) && change {
 155                         // We've done a suspiciously large number of rewrites (or we're in debug mode).
 156                         // As of Sep 2021, 90% of rewrites complete in 4 iterations or fewer
 157                         // and the maximum value encountered during make.bash is 12.
 158                         // Start checking for cycles. (This is too expensive to do routinely.)
 159                         // Note: we avoid this path for deadChange-only iterations, to fix #51639.
 160                         if states == nil {
 161                                 states = make(map[string]bool)
 162                         }
 163                         h := f.rewriteHash()
 164                         if _, ok := states[h]; ok {
 165                                 // We've found a cycle.
 166                                 // To diagnose it, set debug to 2 and start again,
 167                                 // so that we'll print all rules applied until we complete another cycle.
 168                                 // If debug is already >= 2, we've already done that, so it's time to crash.
 169                                 if debug < 2 {
 170                                         debug = 2
 171                                         states = make(map[string]bool)
 172                                 } else {
 173                                         f.Fatalf("rewrite cycle detected")
 174                                 }
 175                         }
 176                         states[h] = true
 177                 }
 178         }
 179         // remove clobbered values
 180         for _, b := range f.Blocks {
 181                 j := 0
 182                 for i, v := range b.Values {
 183                         vl := v.Pos
 184                         if v.Op == OpInvalid {
 185                                 if v.Pos.IsStmt() == src.PosIsStmt {
 186                                         pendingLines.set(vl, int32(b.ID))
 187                                 }
 188                                 f.freeValue(v)
 189                                 continue
 190                         }
 191                         if v.Pos.IsStmt() != src.PosNotStmt && !notStmtBoundary(v.Op) && pendingLines.get(vl) == int32(b.ID) {
 192                                 pendingLines.remove(vl)
 193                                 v.Pos = v.Pos.WithIsStmt()
 194                         }
 195                         if i != j {
 196                                 b.Values[j] = v
 197                         }
 198                         j++
 199                 }
 200                 if pendingLines.get(b.Pos) == int32(b.ID) {
 201                         b.Pos = b.Pos.WithIsStmt()
 202                         pendingLines.remove(b.Pos)
 203                 }
 204                 b.truncateValues(j)
 205         }
 206 }
 207
 208 // Common functions called from rewriting rules
 209
 210 func is64BitFloat(t *types.Type) bool {
 211         return t.Size() == 8 && t.IsFloat()
 212 }
 213
 214 func is32BitFloat(t *types.Type) bool {
 215         return t.Size() == 4 && t.IsFloat()
 216 }
 217
 218 func is64BitInt(t *types.Type) bool {
 219         return t.Size() == 8 && t.IsInteger()
 220 }
 221
 222 func is32BitInt(t *types.Type) bool {
 223         return t.Size() == 4 && t.IsInteger()
 224 }
 225
 226 func is16BitInt(t *types.Type) bool {
 227         return t.Size() == 2 && t.IsInteger()
 228 }
 229
 230 func is8BitInt(t *types.Type) bool {
 231         return t.Size() == 1 && t.IsInteger()
 232 }
 233
 234 func isPtr(t *types.Type) bool {
 235         return t.IsPtrShaped()
 236 }
 237
 238 func isSigned(t *types.Type) bool {
 239         return t.IsSigned()
 240 }
 241
 242 // mergeSym merges two symbolic offsets. There is no real merging of
 243 // offsets, we just pick the non-nil one.
 244 func mergeSym(x, y Sym) Sym {
 245         if x == nil {
 246                 return y
 247         }
 248         if y == nil {
 249                 return x
 250         }
 251         panic(fmt.Sprintf("mergeSym with two non-nil syms %v %v", x, y))
 252 }
 253
 254 func canMergeSym(x, y Sym) bool {
 255         return x == nil || y == nil
 256 }
 257
 258 // canMergeLoadClobber reports whether the load can be merged into target without
 259 // invalidating the schedule.
 260 // It also checks that the other non-load argument x is something we
 261 // are ok with clobbering.
 262 func canMergeLoadClobber(target, load, x *Value) bool {
 263         // The register containing x is going to get clobbered.
 264         // Don't merge if we still need the value of x.
 265         // We don't have liveness information here, but we can
 266         // approximate x dying with:
 267         //  1) target is x's only use.
 268         //  2) target is not in a deeper loop than x.
 269         if x.Uses != 1 {
 270                 return false
 271         }
 272         loopnest := x.Block.Func.loopnest()
 273         loopnest.calculateDepths()
 274         if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
 275                 return false
 276         }
 277         return canMergeLoad(target, load)
 278 }
 279
 280 // canMergeLoad reports whether the load can be merged into target without
 281 // invalidating the schedule.
 282 func canMergeLoad(target, load *Value) bool {
 283         if target.Block.ID != load.Block.ID {
 284                 // If the load is in a different block do not merge it.
 285                 return false
 286         }
 287
 288         // We can't merge the load into the target if the load
 289         // has more than one use.
 290         if load.Uses != 1 {
 291                 return false
 292         }
 293
 294         mem := load.MemoryArg()
 295
 296         // We need the load's memory arg to still be alive at target. That
 297         // can't be the case if one of target's args depends on a memory
 298         // state that is a successor of load's memory arg.
 299         //
 300         // For example, it would be invalid to merge load into target in
 301         // the following situation because newmem has killed oldmem
 302         // before target is reached:
 303         //     load = read ... oldmem
 304         //   newmem = write ... oldmem
 305         //     arg0 = read ... newmem
 306         //   target = add arg0 load
 307         //
 308         // If the argument comes from a different block then we can exclude
 309         // it immediately because it must dominate load (which is in the
 310         // same block as target).
 311         var args []*Value
 312         for _, a := range target.Args {
 313                 if a != load && a.Block.ID == target.Block.ID {
 314                         args = append(args, a)
 315                 }
 316         }
 317
 318         // memPreds contains memory states known to be predecessors of load's
 319         // memory state. It is lazily initialized.
 320         var memPreds map[*Value]bool
 321         for i := 0; len(args) > 0; i++ {
 322                 const limit = 100
 323                 if i >= limit {
 324                         // Give up if we have done a lot of iterations.
 325                         return false
 326                 }
 327                 v := args[len(args)-1]
 328                 args = args[:len(args)-1]
 329                 if target.Block.ID != v.Block.ID {
 330                         // Since target and load are in the same block
 331                         // we can stop searching when we leave the block.
 332                         continue
 333                 }
 334                 if v.Op == OpPhi {
 335                         // A Phi implies we have reached the top of the block.
 336                         // The memory phi, if it exists, is always
 337                         // the first logical store in the block.
 338                         continue
 339                 }
 340                 if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
 341                         // We could handle this situation however it is likely
 342                         // to be very rare.
 343                         return false
 344                 }
 345                 if v.Op.SymEffect()&SymAddr != 0 {
 346                         // This case prevents an operation that calculates the
 347                         // address of a local variable from being forced to schedule
 348                         // before its corresponding VarDef.
 349                         // See issue 28445.
 350                         //   v1 = LOAD ...
 351                         //   v2 = VARDEF
 352                         //   v3 = LEAQ
 353                         //   v4 = CMPQ v1 v3
 354                         // We don't want to combine the CMPQ with the load, because
 355                         // that would force the CMPQ to schedule before the VARDEF, which
 356                         // in turn requires the LEAQ to schedule before the VARDEF.
 357                         return false
 358                 }
 359                 if v.Type.IsMemory() {
 360                         if memPreds == nil {
 361                                 // Initialise a map containing memory states
 362                                 // known to be predecessors of load's memory
 363                                 // state.
 364                                 memPreds = make(map[*Value]bool)
 365                                 m := mem
 366                                 const limit = 50
 367                                 for i := 0; i < limit; i++ {
 368                                         if m.Op == OpPhi {
 369                                                 // The memory phi, if it exists, is always
 370                                                 // the first logical store in the block.
 371                                                 break
 372                                         }
 373                                         if m.Block.ID != target.Block.ID {
 374                                                 break
 375                                         }
 376                                         if !m.Type.IsMemory() {
 377                                                 break
 378                                         }
 379                                         memPreds[m] = true
 380                                         if len(m.Args) == 0 {
 381                                                 break
 382                                         }
 383                                         m = m.MemoryArg()
 384                                 }
 385                         }
 386
 387                         // We can merge if v is a predecessor of mem.
 388                         //
 389                         // For example, we can merge load into target in the
 390                         // following scenario:
 391                         //      x = read ... v
 392                         //    mem = write ... v
 393                         //   load = read ... mem
 394                         // target = add x load
 395                         if memPreds[v] {
 396                                 continue
 397                         }
 398                         return false
 399                 }
 400                 if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
 401                         // If v takes mem as an input then we know mem
 402                         // is valid at this point.
 403                         continue
 404                 }
 405                 for _, a := range v.Args {
 406                         if target.Block.ID == a.Block.ID {
 407                                 args = append(args, a)
 408                         }
 409                 }
 410         }
 411
 412         return true
 413 }
 414
 415 // isSameCall reports whether sym is the same as the given named symbol.
 416 func isSameCall(sym interface{}, name string) bool {
 417         fn := sym.(*AuxCall).Fn
 418         return fn != nil && fn.String() == name
 419 }
 420
 421 // canLoadUnaligned reports if the architecture supports unaligned load operations.
 422 func canLoadUnaligned(c *Config) bool {
 423         return c.ctxt.Arch.Alignment == 1
 424 }
 425
 426 // nlzX returns the number of leading zeros.
 427 func nlz64(x int64) int { return bits.LeadingZeros64(uint64(x)) }
 428 func nlz32(x int32) int { return bits.LeadingZeros32(uint32(x)) }
 429 func nlz16(x int16) int { return bits.LeadingZeros16(uint16(x)) }
 430 func nlz8(x int8) int   { return bits.LeadingZeros8(uint8(x)) }
 431
 432 // ntzX returns the number of trailing zeros.
 433 func ntz64(x int64) int { return bits.TrailingZeros64(uint64(x)) }
 434 func ntz32(x int32) int { return bits.TrailingZeros32(uint32(x)) }
 435 func ntz16(x int16) int { return bits.TrailingZeros16(uint16(x)) }
 436 func ntz8(x int8) int   { return bits.TrailingZeros8(uint8(x)) }
 437
 438 func oneBit(x int64) bool   { return x&(x-1) == 0 && x != 0 }
 439 func oneBit8(x int8) bool   { return x&(x-1) == 0 && x != 0 }
 440 func oneBit16(x int16) bool { return x&(x-1) == 0 && x != 0 }
 441 func oneBit32(x int32) bool { return x&(x-1) == 0 && x != 0 }
 442 func oneBit64(x int64) bool { return x&(x-1) == 0 && x != 0 }
 443
 444 // nto returns the number of trailing ones.
 445 func nto(x int64) int64 {
 446         return int64(ntz64(^x))
 447 }
 448
 449 // logX returns logarithm of n base 2.
 450 // n must be a positive power of 2 (isPowerOfTwoX returns true).
 451 func log8(n int8) int64 {
 452         return int64(bits.Len8(uint8(n))) - 1
 453 }
 454 func log16(n int16) int64 {
 455         return int64(bits.Len16(uint16(n))) - 1
 456 }
 457 func log32(n int32) int64 {
 458         return int64(bits.Len32(uint32(n))) - 1
 459 }
 460 func log64(n int64) int64 {
 461         return int64(bits.Len64(uint64(n))) - 1
 462 }
 463
 464 // log2uint32 returns logarithm in base 2 of uint32(n), with log2(0) = -1.
 465 // Rounds down.
 466 func log2uint32(n int64) int64 {
 467         return int64(bits.Len32(uint32(n))) - 1
 468 }
 469
 470 // isPowerOfTwoX functions report whether n is a power of 2.
 471 func isPowerOfTwo8(n int8) bool {
 472         return n > 0 && n&(n-1) == 0
 473 }
 474 func isPowerOfTwo16(n int16) bool {
 475         return n > 0 && n&(n-1) == 0
 476 }
 477 func isPowerOfTwo32(n int32) bool {
 478         return n > 0 && n&(n-1) == 0
 479 }
 480 func isPowerOfTwo64(n int64) bool {
 481         return n > 0 && n&(n-1) == 0
 482 }
 483
 484 // isUint64PowerOfTwo reports whether uint64(n) is a power of 2.
 485 func isUint64PowerOfTwo(in int64) bool {
 486         n := uint64(in)
 487         return n > 0 && n&(n-1) == 0
 488 }
 489
 490 // isUint32PowerOfTwo reports whether uint32(n) is a power of 2.
 491 func isUint32PowerOfTwo(in int64) bool {
 492         n := uint64(uint32(in))
 493         return n > 0 && n&(n-1) == 0
 494 }
 495
 496 // is32Bit reports whether n can be represented as a signed 32 bit integer.
 497 func is32Bit(n int64) bool {
 498         return n == int64(int32(n))
 499 }
 500
 501 // is16Bit reports whether n can be represented as a signed 16 bit integer.
 502 func is16Bit(n int64) bool {
 503         return n == int64(int16(n))
 504 }
 505
 506 // is8Bit reports whether n can be represented as a signed 8 bit integer.
 507 func is8Bit(n int64) bool {
 508         return n == int64(int8(n))
 509 }
 510
 511 // isU8Bit reports whether n can be represented as an unsigned 8 bit integer.
 512 func isU8Bit(n int64) bool {
 513         return n == int64(uint8(n))
 514 }
 515
 516 // isU12Bit reports whether n can be represented as an unsigned 12 bit integer.
 517 func isU12Bit(n int64) bool {
 518         return 0 <= n && n < (1<<12)
 519 }
 520
 521 // isU16Bit reports whether n can be represented as an unsigned 16 bit integer.
 522 func isU16Bit(n int64) bool {
 523         return n == int64(uint16(n))
 524 }
 525
 526 // isU32Bit reports whether n can be represented as an unsigned 32 bit integer.
 527 func isU32Bit(n int64) bool {
 528         return n == int64(uint32(n))
 529 }
 530
 531 // is20Bit reports whether n can be represented as a signed 20 bit integer.
 532 func is20Bit(n int64) bool {
 533         return -(1<<19) <= n && n < (1<<19)
 534 }
 535
 536 // b2i translates a boolean value to 0 or 1 for assigning to auxInt.
 537 func b2i(b bool) int64 {
 538         if b {
 539                 return 1
 540         }
 541         return 0
 542 }
 543
 544 // b2i32 translates a boolean value to 0 or 1.
 545 func b2i32(b bool) int32 {
 546         if b {
 547                 return 1
 548         }
 549         return 0
 550 }
 551
 552 // shiftIsBounded reports whether (left/right) shift Value v is known to be bounded.
 553 // A shift is bounded if it is shifting by less than the width of the shifted value.
 554 func shiftIsBounded(v *Value) bool {
 555         return v.AuxInt != 0
 556 }
 557
 558 // canonLessThan returns whether x is "ordered" less than y, for purposes of normalizing
 559 // generated code as much as possible.
 560 func canonLessThan(x, y *Value) bool {
 561         if x.Op != y.Op {
 562                 return x.Op < y.Op
 563         }
 564         if !x.Pos.SameFileAndLine(y.Pos) {
 565                 return x.Pos.Before(y.Pos)
 566         }
 567         return x.ID < y.ID
 568 }
 569
 570 // truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern
 571 // of the mantissa. It will panic if the truncation results in lost information.
 572 func truncate64Fto32F(f float64) float32 {
 573         if !isExactFloat32(f) {
 574                 panic("truncate64Fto32F: truncation is not exact")
 575         }
 576         if !math.IsNaN(f) {
 577                 return float32(f)
 578         }
 579         // NaN bit patterns aren't necessarily preserved across conversion
 580         // instructions so we need to do the conversion manually.
 581         b := math.Float64bits(f)
 582         m := b & ((1 << 52) - 1) // mantissa (a.k.a. significand)
 583         //          | sign                  | exponent   | mantissa       |
 584         r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23)))
 585         return math.Float32frombits(r)
 586 }
 587
 588 // extend32Fto64F converts a float32 value to a float64 value preserving the bit
 589 // pattern of the mantissa.
 590 func extend32Fto64F(f float32) float64 {
 591         if !math.IsNaN(float64(f)) {
 592                 return float64(f)
 593         }
 594         // NaN bit patterns aren't necessarily preserved across conversion
 595         // instructions so we need to do the conversion manually.
 596         b := uint64(math.Float32bits(f))
 597         //   | sign                  | exponent      | mantissa                    |
 598         r := ((b << 32) & (1 << 63)) | (0x7ff << 52) | ((b & 0x7fffff) << (52 - 23))
 599         return math.Float64frombits(r)
 600 }
 601
 602 // DivisionNeedsFixUp reports whether the division needs fix-up code.
 603 func DivisionNeedsFixUp(v *Value) bool {
 604         return v.AuxInt == 0
 605 }
 606
 607 // auxFrom64F encodes a float64 value so it can be stored in an AuxInt.
 608 func auxFrom64F(f float64) int64 {
 609         if f != f {
 610                 panic("can't encode a NaN in AuxInt field")
 611         }
 612         return int64(math.Float64bits(f))
 613 }
 614
 615 // auxFrom32F encodes a float32 value so it can be stored in an AuxInt.
 616 func auxFrom32F(f float32) int64 {
 617         if f != f {
 618                 panic("can't encode a NaN in AuxInt field")
 619         }
 620         return int64(math.Float64bits(extend32Fto64F(f)))
 621 }
 622
 623 // auxTo32F decodes a float32 from the AuxInt value provided.
 624 func auxTo32F(i int64) float32 {
 625         return truncate64Fto32F(math.Float64frombits(uint64(i)))
 626 }
 627
 628 // auxTo64F decodes a float64 from the AuxInt value provided.
 629 func auxTo64F(i int64) float64 {
 630         return math.Float64frombits(uint64(i))
 631 }
 632
 633 func auxIntToBool(i int64) bool {
 634         if i == 0 {
 635                 return false
 636         }
 637         return true
 638 }
 639 func auxIntToInt8(i int64) int8 {
 640         return int8(i)
 641 }
 642 func auxIntToInt16(i int64) int16 {
 643         return int16(i)
 644 }
 645 func auxIntToInt32(i int64) int32 {
 646         return int32(i)
 647 }
 648 func auxIntToInt64(i int64) int64 {
 649         return i
 650 }
 651 func auxIntToUint8(i int64) uint8 {
 652         return uint8(i)
 653 }
 654 func auxIntToFloat32(i int64) float32 {
 655         return float32(math.Float64frombits(uint64(i)))
 656 }
 657 func auxIntToFloat64(i int64) float64 {
 658         return math.Float64frombits(uint64(i))
 659 }
 660 func auxIntToValAndOff(i int64) ValAndOff {
 661         return ValAndOff(i)
 662 }
 663 func auxIntToArm64BitField(i int64) arm64BitField {
 664         return arm64BitField(i)
 665 }
 666 func auxIntToInt128(x int64) int128 {
 667         if x != 0 {
 668                 panic("nonzero int128 not allowed")
 669         }
 670         return 0
 671 }
 672 func auxIntToFlagConstant(x int64) flagConstant {
 673         return flagConstant(x)
 674 }
 675
 676 func auxIntToOp(cc int64) Op {
 677         return Op(cc)
 678 }
 679
 680 func boolToAuxInt(b bool) int64 {
 681         if b {
 682                 return 1
 683         }
 684         return 0
 685 }
 686 func int8ToAuxInt(i int8) int64 {
 687         return int64(i)
 688 }
 689 func int16ToAuxInt(i int16) int64 {
 690         return int64(i)
 691 }
 692 func int32ToAuxInt(i int32) int64 {
 693         return int64(i)
 694 }
 695 func int64ToAuxInt(i int64) int64 {
 696         return int64(i)
 697 }
 698 func uint8ToAuxInt(i uint8) int64 {
 699         return int64(int8(i))
 700 }
 701 func float32ToAuxInt(f float32) int64 {
 702         return int64(math.Float64bits(float64(f)))
 703 }
 704 func float64ToAuxInt(f float64) int64 {
 705         return int64(math.Float64bits(f))
 706 }
 707 func valAndOffToAuxInt(v ValAndOff) int64 {
 708         return int64(v)
 709 }
 710 func arm64BitFieldToAuxInt(v arm64BitField) int64 {
 711         return int64(v)
 712 }
 713 func int128ToAuxInt(x int128) int64 {
 714         if x != 0 {
 715                 panic("nonzero int128 not allowed")
 716         }
 717         return 0
 718 }
 719 func flagConstantToAuxInt(x flagConstant) int64 {
 720         return int64(x)
 721 }
 722
 723 func opToAuxInt(o Op) int64 {
 724         return int64(o)
 725 }
 726
 727 // Aux is an interface to hold miscellaneous data in Blocks and Values.
 728 type Aux interface {
 729         CanBeAnSSAAux()
 730 }
 731
 732 // for now only used to mark moves that need to avoid clobbering flags
 733 type auxMark bool
 734
 735 func (auxMark) CanBeAnSSAAux() {}
 736
 737 var AuxMark auxMark
 738
 739 // stringAux wraps string values for use in Aux.
 740 type stringAux string
 741
 742 func (stringAux) CanBeAnSSAAux() {}
 743
 744 func auxToString(i Aux) string {
 745         return string(i.(stringAux))
 746 }
 747 func auxToSym(i Aux) Sym {
 748         // TODO: kind of a hack - allows nil interface through
 749         s, _ := i.(Sym)
 750         return s
 751 }
 752 func auxToType(i Aux) *types.Type {
 753         return i.(*types.Type)
 754 }
 755 func auxToCall(i Aux) *AuxCall {
 756         return i.(*AuxCall)
 757 }
 758 func auxToS390xCCMask(i Aux) s390x.CCMask {
 759         return i.(s390x.CCMask)
 760 }
 761 func auxToS390xRotateParams(i Aux) s390x.RotateParams {
 762         return i.(s390x.RotateParams)
 763 }
 764
 765 func StringToAux(s string) Aux {
 766         return stringAux(s)
 767 }
 768 func symToAux(s Sym) Aux {
 769         return s
 770 }
 771 func callToAux(s *AuxCall) Aux {
 772         return s
 773 }
 774 func typeToAux(t *types.Type) Aux {
 775         return t
 776 }
 777 func s390xCCMaskToAux(c s390x.CCMask) Aux {
 778         return c
 779 }
 780 func s390xRotateParamsToAux(r s390x.RotateParams) Aux {
 781         return r
 782 }
 783
 784 // uaddOvf reports whether unsigned a+b would overflow.
 785 func uaddOvf(a, b int64) bool {
 786         return uint64(a)+uint64(b) < uint64(a)
 787 }
 788
 789 // loadLSymOffset simulates reading a word at an offset into a
 790 // read-only symbol's runtime memory. If it would read a pointer to
 791 // another symbol, that symbol is returned. Otherwise, it returns nil.
 792 func loadLSymOffset(lsym *obj.LSym, offset int64) *obj.LSym {
 793         if lsym.Type != objabi.SRODATA {
 794                 return nil
 795         }
 796
 797         for _, r := range lsym.R {
 798                 if int64(r.Off) == offset && r.Type&^objabi.R_WEAK == objabi.R_ADDR && r.Add == 0 {
 799                         return r.Sym
 800                 }
 801         }
 802
 803         return nil
 804 }
 805
 806 // de-virtualize an InterLECall
 807 // 'sym' is the symbol for the itab.
 808 func devirtLESym(v *Value, aux Aux, sym Sym, offset int64) *obj.LSym {
 809         n, ok := sym.(*obj.LSym)
 810         if !ok {
 811                 return nil
 812         }
 813
 814         lsym := loadLSymOffset(n, offset)
 815         if f := v.Block.Func; f.pass.debug > 0 {
 816                 if lsym != nil {
 817                         f.Warnl(v.Pos, "de-virtualizing call")
 818                 } else {
 819                         f.Warnl(v.Pos, "couldn't de-virtualize call")
 820                 }
 821         }
 822         return lsym
 823 }
 824
 825 func devirtLECall(v *Value, sym *obj.LSym) *Value {
 826         v.Op = OpStaticLECall
 827         auxcall := v.Aux.(*AuxCall)
 828         auxcall.Fn = sym
 829         // Remove first arg
 830         v.Args[0].Uses--
 831         copy(v.Args[0:], v.Args[1:])
 832         v.Args[len(v.Args)-1] = nil // aid GC
 833         v.Args = v.Args[:len(v.Args)-1]
 834         return v
 835 }
 836
 837 // isSamePtr reports whether p1 and p2 point to the same address.
 838 func isSamePtr(p1, p2 *Value) bool {
 839         if p1 == p2 {
 840                 return true
 841         }
 842         if p1.Op != p2.Op {
 843                 return false
 844         }
 845         switch p1.Op {
 846         case OpOffPtr:
 847                 return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
 848         case OpAddr, OpLocalAddr:
 849                 return p1.Aux == p2.Aux
 850         case OpAddPtr:
 851                 return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
 852         }
 853         return false
 854 }
 855
 856 func isStackPtr(v *Value) bool {
 857         for v.Op == OpOffPtr || v.Op == OpAddPtr {
 858                 v = v.Args[0]
 859         }
 860         return v.Op == OpSP || v.Op == OpLocalAddr
 861 }
 862
 863 // disjoint reports whether the memory region specified by [p1:p1+n1)
 864 // does not overlap with [p2:p2+n2).
 865 // A return value of false does not imply the regions overlap.
 866 func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool {
 867         if n1 == 0 || n2 == 0 {
 868                 return true
 869         }
 870         if p1 == p2 {
 871                 return false
 872         }
 873         baseAndOffset := func(ptr *Value) (base *Value, offset int64) {
 874                 base, offset = ptr, 0
 875                 for base.Op == OpOffPtr {
 876                         offset += base.AuxInt
 877                         base = base.Args[0]
 878                 }
 879                 return base, offset
 880         }
 881         p1, off1 := baseAndOffset(p1)
 882         p2, off2 := baseAndOffset(p2)
 883         if isSamePtr(p1, p2) {
 884                 return !overlap(off1, n1, off2, n2)
 885         }
 886         // p1 and p2 are not the same, so if they are both OpAddrs then
 887         // they point to different variables.
 888         // If one pointer is on the stack and the other is an argument
 889         // then they can't overlap.
 890         switch p1.Op {
 891         case OpAddr, OpLocalAddr:
 892                 if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP {
 893                         return true
 894                 }
 895                 return (p2.Op == OpArg || p2.Op == OpArgIntReg) && p1.Args[0].Op == OpSP
 896         case OpArg, OpArgIntReg:
 897                 if p2.Op == OpSP || p2.Op == OpLocalAddr {
 898                         return true
 899                 }
 900         case OpSP:
 901                 return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpArgIntReg || p2.Op == OpSP
 902         }
 903         return false
 904 }
 905
 906 // moveSize returns the number of bytes an aligned MOV instruction moves.
 907 func moveSize(align int64, c *Config) int64 {
 908         switch {
 909         case align%8 == 0 && c.PtrSize == 8:
 910                 return 8
 911         case align%4 == 0:
 912                 return 4
 913         case align%2 == 0:
 914                 return 2
 915         }
 916         return 1
 917 }
 918
 919 // mergePoint finds a block among a's blocks which dominates b and is itself
 920 // dominated by all of a's blocks. Returns nil if it can't find one.
 921 // Might return nil even if one does exist.
 922 func mergePoint(b *Block, a ...*Value) *Block {
 923         // Walk backward from b looking for one of the a's blocks.
 924
 925         // Max distance
 926         d := 100
 927
 928         for d > 0 {
 929                 for _, x := range a {
 930                         if b == x.Block {
 931                                 goto found
 932                         }
 933                 }
 934                 if len(b.Preds) > 1 {
 935                         // Don't know which way to go back. Abort.
 936                         return nil
 937                 }
 938                 b = b.Preds[0].b
 939                 d--
 940         }
 941         return nil // too far away
 942 found:
 943         // At this point, r is the first value in a that we find by walking backwards.
 944         // if we return anything, r will be it.
 945         r := b
 946
 947         // Keep going, counting the other a's that we find. They must all dominate r.
 948         na := 0
 949         for d > 0 {
 950                 for _, x := range a {
 951                         if b == x.Block {
 952                                 na++
 953                         }
 954                 }
 955                 if na == len(a) {
 956                         // Found all of a in a backwards walk. We can return r.
 957                         return r
 958                 }
 959                 if len(b.Preds) > 1 {
 960                         return nil
 961                 }
 962                 b = b.Preds[0].b
 963                 d--
 964
 965         }
 966         return nil // too far away
 967 }
 968
 969 // clobber invalidates values. Returns true.
 970 // clobber is used by rewrite rules to:
 971 //
 972 //      A) make sure the values are really dead and never used again.
 973 //      B) decrement use counts of the values' args.
 974 func clobber(vv ...*Value) bool {
 975         for _, v := range vv {
 976                 v.reset(OpInvalid)
 977                 // Note: leave v.Block intact.  The Block field is used after clobber.
 978         }
 979         return true
 980 }
 981
 982 // clobberIfDead resets v when use count is 1. Returns true.
 983 // clobberIfDead is used by rewrite rules to decrement
 984 // use counts of v's args when v is dead and never used.
 985 func clobberIfDead(v *Value) bool {
 986         if v.Uses == 1 {
 987                 v.reset(OpInvalid)
 988         }
 989         // Note: leave v.Block intact.  The Block field is used after clobberIfDead.
 990         return true
 991 }
 992
 993 // noteRule is an easy way to track if a rule is matched when writing
 994 // new ones.  Make the rule of interest also conditional on
 995 //
 996 //      noteRule("note to self: rule of interest matched")
 997 //
 998 // and that message will print when the rule matches.
 999 func noteRule(s string) bool {
1000         fmt.Println(s)
1001         return true
1002 }
1003
1004 // countRule increments Func.ruleMatches[key].
1005 // If Func.ruleMatches is non-nil at the end
1006 // of compilation, it will be printed to stdout.
1007 // This is intended to make it easier to find which functions
1008 // which contain lots of rules matches when developing new rules.
1009 func countRule(v *Value, key string) bool {
1010         f := v.Block.Func
1011         if f.ruleMatches == nil {
1012                 f.ruleMatches = make(map[string]int)
1013         }
1014         f.ruleMatches[key]++
1015         return true
1016 }
1017
1018 // warnRule generates compiler debug output with string s when
1019 // v is not in autogenerated code, cond is true and the rule has fired.
1020 func warnRule(cond bool, v *Value, s string) bool {
1021         if pos := v.Pos; pos.Line() > 1 && cond {
1022                 v.Block.Func.Warnl(pos, s)
1023         }
1024         return true
1025 }
1026
1027 // for a pseudo-op like (LessThan x), extract x.
1028 func flagArg(v *Value) *Value {
1029         if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() {
1030                 return nil
1031         }
1032         return v.Args[0]
1033 }
1034
1035 // arm64Negate finds the complement to an ARM64 condition code,
1036 // for example !Equal -> NotEqual or !LessThan -> GreaterEqual
1037 //
1038 // For floating point, it's more subtle because NaN is unordered. We do
1039 // !LessThanF -> NotLessThanF, the latter takes care of NaNs.
1040 func arm64Negate(op Op) Op {
1041         switch op {
1042         case OpARM64LessThan:
1043                 return OpARM64GreaterEqual
1044         case OpARM64LessThanU:
1045                 return OpARM64GreaterEqualU
1046         case OpARM64GreaterThan:
1047                 return OpARM64LessEqual
1048         case OpARM64GreaterThanU:
1049                 return OpARM64LessEqualU
1050         case OpARM64LessEqual:
1051                 return OpARM64GreaterThan
1052         case OpARM64LessEqualU:
1053                 return OpARM64GreaterThanU
1054         case OpARM64GreaterEqual:
1055                 return OpARM64LessThan
1056         case OpARM64GreaterEqualU:
1057                 return OpARM64LessThanU
1058         case OpARM64Equal:
1059                 return OpARM64NotEqual
1060         case OpARM64NotEqual:
1061                 return OpARM64Equal
1062         case OpARM64LessThanF:
1063                 return OpARM64NotLessThanF
1064         case OpARM64NotLessThanF:
1065                 return OpARM64LessThanF
1066         case OpARM64LessEqualF:
1067                 return OpARM64NotLessEqualF
1068         case OpARM64NotLessEqualF:
1069                 return OpARM64LessEqualF
1070         case OpARM64GreaterThanF:
1071                 return OpARM64NotGreaterThanF
1072         case OpARM64NotGreaterThanF:
1073                 return OpARM64GreaterThanF
1074         case OpARM64GreaterEqualF:
1075                 return OpARM64NotGreaterEqualF
1076         case OpARM64NotGreaterEqualF:
1077                 return OpARM64GreaterEqualF
1078         default:
1079                 panic("unreachable")
1080         }
1081 }
1082
1083 // arm64Invert evaluates (InvertFlags op), which
1084 // is the same as altering the condition codes such
1085 // that the same result would be produced if the arguments
1086 // to the flag-generating instruction were reversed, e.g.
1087 // (InvertFlags (CMP x y)) -> (CMP y x)
1088 func arm64Invert(op Op) Op {
1089         switch op {
1090         case OpARM64LessThan:
1091                 return OpARM64GreaterThan
1092         case OpARM64LessThanU:
1093                 return OpARM64GreaterThanU
1094         case OpARM64GreaterThan:
1095                 return OpARM64LessThan
1096         case OpARM64GreaterThanU:
1097                 return OpARM64LessThanU
1098         case OpARM64LessEqual:
1099                 return OpARM64GreaterEqual
1100         case OpARM64LessEqualU:
1101                 return OpARM64GreaterEqualU
1102         case OpARM64GreaterEqual:
1103                 return OpARM64LessEqual
1104         case OpARM64GreaterEqualU:
1105                 return OpARM64LessEqualU
1106         case OpARM64Equal, OpARM64NotEqual:
1107                 return op
1108         case OpARM64LessThanF:
1109                 return OpARM64GreaterThanF
1110         case OpARM64GreaterThanF:
1111                 return OpARM64LessThanF
1112         case OpARM64LessEqualF:
1113                 return OpARM64GreaterEqualF
1114         case OpARM64GreaterEqualF:
1115                 return OpARM64LessEqualF
1116         case OpARM64NotLessThanF:
1117                 return OpARM64NotGreaterThanF
1118         case OpARM64NotGreaterThanF:
1119                 return OpARM64NotLessThanF
1120         case OpARM64NotLessEqualF:
1121                 return OpARM64NotGreaterEqualF
1122         case OpARM64NotGreaterEqualF:
1123                 return OpARM64NotLessEqualF
1124         default:
1125                 panic("unreachable")
1126         }
1127 }
1128
1129 // evaluate an ARM64 op against a flags value
1130 // that is potentially constant; return 1 for true,
1131 // -1 for false, and 0 for not constant.
1132 func ccARM64Eval(op Op, flags *Value) int {
1133         fop := flags.Op
1134         if fop == OpARM64InvertFlags {
1135                 return -ccARM64Eval(op, flags.Args[0])
1136         }
1137         if fop != OpARM64FlagConstant {
1138                 return 0
1139         }
1140         fc := flagConstant(flags.AuxInt)
1141         b2i := func(b bool) int {
1142                 if b {
1143                         return 1
1144                 }
1145                 return -1
1146         }
1147         switch op {
1148         case OpARM64Equal:
1149                 return b2i(fc.eq())
1150         case OpARM64NotEqual:
1151                 return b2i(fc.ne())
1152         case OpARM64LessThan:
1153                 return b2i(fc.lt())
1154         case OpARM64LessThanU:
1155                 return b2i(fc.ult())
1156         case OpARM64GreaterThan:
1157                 return b2i(fc.gt())
1158         case OpARM64GreaterThanU:
1159                 return b2i(fc.ugt())
1160         case OpARM64LessEqual:
1161                 return b2i(fc.le())
1162         case OpARM64LessEqualU:
1163                 return b2i(fc.ule())
1164         case OpARM64GreaterEqual:
1165                 return b2i(fc.ge())
1166         case OpARM64GreaterEqualU:
1167                 return b2i(fc.uge())
1168         }
1169         return 0
1170 }
1171
1172 // logRule logs the use of the rule s. This will only be enabled if
1173 // rewrite rules were generated with the -log option, see _gen/rulegen.go.
1174 func logRule(s string) {
1175         if ruleFile == nil {
1176                 // Open a log file to write log to. We open in append
1177                 // mode because all.bash runs the compiler lots of times,
1178                 // and we want the concatenation of all of those logs.
1179                 // This means, of course, that users need to rm the old log
1180                 // to get fresh data.
1181                 // TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow?
1182                 w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"),
1183                         os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
1184                 if err != nil {
1185                         panic(err)
1186                 }
1187                 ruleFile = w
1188         }
1189         _, err := fmt.Fprintln(ruleFile, s)
1190         if err != nil {
1191                 panic(err)
1192         }
1193 }
1194
1195 var ruleFile io.Writer
1196
1197 func min(x, y int64) int64 {
1198         if x < y {
1199                 return x
1200         }
1201         return y
1202 }
1203
1204 func isConstZero(v *Value) bool {
1205         switch v.Op {
1206         case OpConstNil:
1207                 return true
1208         case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F:
1209                 return v.AuxInt == 0
1210         }
1211         return false
1212 }
1213
1214 // reciprocalExact64 reports whether 1/c is exactly representable.
1215 func reciprocalExact64(c float64) bool {
1216         b := math.Float64bits(c)
1217         man := b & (1<<52 - 1)
1218         if man != 0 {
1219                 return false // not a power of 2, denormal, or NaN
1220         }
1221         exp := b >> 52 & (1<<11 - 1)
1222         // exponent bias is 0x3ff.  So taking the reciprocal of a number
1223         // changes the exponent to 0x7fe-exp.
1224         switch exp {
1225         case 0:
1226                 return false // ±0
1227         case 0x7ff:
1228                 return false // ±inf
1229         case 0x7fe:
1230                 return false // exponent is not representable
1231         default:
1232                 return true
1233         }
1234 }
1235
1236 // reciprocalExact32 reports whether 1/c is exactly representable.
1237 func reciprocalExact32(c float32) bool {
1238         b := math.Float32bits(c)
1239         man := b & (1<<23 - 1)
1240         if man != 0 {
1241                 return false // not a power of 2, denormal, or NaN
1242         }
1243         exp := b >> 23 & (1<<8 - 1)
1244         // exponent bias is 0x7f.  So taking the reciprocal of a number
1245         // changes the exponent to 0xfe-exp.
1246         switch exp {
1247         case 0:
1248                 return false // ±0
1249         case 0xff:
1250                 return false // ±inf
1251         case 0xfe:
1252                 return false // exponent is not representable
1253         default:
1254                 return true
1255         }
1256 }
1257
1258 // check if an immediate can be directly encoded into an ARM's instruction.
1259 func isARMImmRot(v uint32) bool {
1260         for i := 0; i < 16; i++ {
1261                 if v&^0xff == 0 {
1262                         return true
1263                 }
1264                 v = v<<2 | v>>30
1265         }
1266
1267         return false
1268 }
1269
1270 // overlap reports whether the ranges given by the given offset and
1271 // size pairs overlap.
1272 func overlap(offset1, size1, offset2, size2 int64) bool {
1273         if offset1 >= offset2 && offset2+size2 > offset1 {
1274                 return true
1275         }
1276         if offset2 >= offset1 && offset1+size1 > offset2 {
1277                 return true
1278         }
1279         return false
1280 }
1281
1282 func areAdjacentOffsets(off1, off2, size int64) bool {
1283         return off1+size == off2 || off1 == off2+size
1284 }
1285
1286 // check if value zeroes out upper 32-bit of 64-bit register.
1287 // depth limits recursion depth. In AMD64.rules 3 is used as limit,
1288 // because it catches same amount of cases as 4.
1289 func zeroUpper32Bits(x *Value, depth int) bool {
1290         switch x.Op {
1291         case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1,
1292                 OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1,
1293                 OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload,
1294                 OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL,
1295                 OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst,
1296                 OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst,
1297                 OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL,
1298                 OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst,
1299                 OpAMD64SHLL, OpAMD64SHLLconst:
1300                 return true
1301         case OpARM64REV16W, OpARM64REVW, OpARM64RBITW, OpARM64CLZW, OpARM64EXTRWconst,
1302                 OpARM64MULW, OpARM64MNEGW, OpARM64UDIVW, OpARM64DIVW, OpARM64UMODW,
1303                 OpARM64MADDW, OpARM64MSUBW, OpARM64RORW, OpARM64RORWconst:
1304                 return true
1305         case OpArg:
1306                 return x.Type.Size() == 4
1307         case OpPhi, OpSelect0, OpSelect1:
1308                 // Phis can use each-other as an arguments, instead of tracking visited values,
1309                 // just limit recursion depth.
1310                 if depth <= 0 {
1311                         return false
1312                 }
1313                 for i := range x.Args {
1314                         if !zeroUpper32Bits(x.Args[i], depth-1) {
1315                                 return false
1316                         }
1317                 }
1318                 return true
1319
1320         }
1321         return false
1322 }
1323
1324 // zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits.
1325 func zeroUpper48Bits(x *Value, depth int) bool {
1326         switch x.Op {
1327         case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2:
1328                 return true
1329         case OpArg:
1330                 return x.Type.Size() == 2
1331         case OpPhi, OpSelect0, OpSelect1:
1332                 // Phis can use each-other as an arguments, instead of tracking visited values,
1333                 // just limit recursion depth.
1334                 if depth <= 0 {
1335                         return false
1336                 }
1337                 for i := range x.Args {
1338                         if !zeroUpper48Bits(x.Args[i], depth-1) {
1339                                 return false
1340                         }
1341                 }
1342                 return true
1343
1344         }
1345         return false
1346 }
1347
1348 // zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits.
1349 func zeroUpper56Bits(x *Value, depth int) bool {
1350         switch x.Op {
1351         case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1:
1352                 return true
1353         case OpArg:
1354                 return x.Type.Size() == 1
1355         case OpPhi, OpSelect0, OpSelect1:
1356                 // Phis can use each-other as an arguments, instead of tracking visited values,
1357                 // just limit recursion depth.
1358                 if depth <= 0 {
1359                         return false
1360                 }
1361                 for i := range x.Args {
1362                         if !zeroUpper56Bits(x.Args[i], depth-1) {
1363                                 return false
1364                         }
1365                 }
1366                 return true
1367
1368         }
1369         return false
1370 }
1371
1372 func isInlinableMemclr(c *Config, sz int64) bool {
1373         // TODO: expand this check to allow other architectures
1374         // see CL 454255 and issue 56997
1375         switch c.arch {
1376         case "amd64", "arm64":
1377                 return true
1378         case "ppc64le", "ppc64":
1379                 return sz < 512
1380         }
1381         return false
1382 }
1383
1384 // isInlinableMemmove reports whether the given arch performs a Move of the given size
1385 // faster than memmove. It will only return true if replacing the memmove with a Move is
1386 // safe, either because Move will do all of its loads before any of its stores, or
1387 // because the arguments are known to be disjoint.
1388 // This is used as a check for replacing memmove with Move ops.
1389 func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
1390         // It is always safe to convert memmove into Move when its arguments are disjoint.
1391         // Move ops may or may not be faster for large sizes depending on how the platform
1392         // lowers them, so we only perform this optimization on platforms that we know to
1393         // have fast Move ops.
1394         switch c.arch {
1395         case "amd64":
1396                 return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz))
1397         case "386", "arm64":
1398                 return sz <= 8
1399         case "s390x", "ppc64", "ppc64le":
1400                 return sz <= 8 || disjoint(dst, sz, src, sz)
1401         case "arm", "loong64", "mips", "mips64", "mipsle", "mips64le":
1402                 return sz <= 4
1403         }
1404         return false
1405 }
1406 func IsInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
1407         return isInlinableMemmove(dst, src, sz, c)
1408 }
1409
1410 // logLargeCopy logs the occurrence of a large copy.
1411 // The best place to do this is in the rewrite rules where the size of the move is easy to find.
1412 // "Large" is arbitrarily chosen to be 128 bytes; this may change.
1413 func logLargeCopy(v *Value, s int64) bool {
1414         if s < 128 {
1415                 return true
1416         }
1417         if logopt.Enabled() {
1418                 logopt.LogOpt(v.Pos, "copy", "lower", v.Block.Func.Name, fmt.Sprintf("%d bytes", s))
1419         }
1420         return true
1421 }
1422 func LogLargeCopy(funcName string, pos src.XPos, s int64) {
1423         if s < 128 {
1424                 return
1425         }
1426         if logopt.Enabled() {
1427                 logopt.LogOpt(pos, "copy", "lower", funcName, fmt.Sprintf("%d bytes", s))
1428         }
1429 }
1430
1431 // hasSmallRotate reports whether the architecture has rotate instructions
1432 // for sizes < 32-bit.  This is used to decide whether to promote some rotations.
1433 func hasSmallRotate(c *Config) bool {
1434         switch c.arch {
1435         case "amd64", "386":
1436                 return true
1437         default:
1438                 return false
1439         }
1440 }
1441
1442 func newPPC64ShiftAuxInt(sh, mb, me, sz int64) int32 {
1443         if sh < 0 || sh >= sz {
1444                 panic("PPC64 shift arg sh out of range")
1445         }
1446         if mb < 0 || mb >= sz {
1447                 panic("PPC64 shift arg mb out of range")
1448         }
1449         if me < 0 || me >= sz {
1450                 panic("PPC64 shift arg me out of range")
1451         }
1452         return int32(sh<<16 | mb<<8 | me)
1453 }
1454
1455 func GetPPC64Shiftsh(auxint int64) int64 {
1456         return int64(int8(auxint >> 16))
1457 }
1458
1459 func GetPPC64Shiftmb(auxint int64) int64 {
1460         return int64(int8(auxint >> 8))
1461 }
1462
1463 func GetPPC64Shiftme(auxint int64) int64 {
1464         return int64(int8(auxint))
1465 }
1466
1467 // Test if this value can encoded as a mask for a rlwinm like
1468 // operation.  Masks can also extend from the msb and wrap to
1469 // the lsb too.  That is, the valid masks are 32 bit strings
1470 // of the form: 0..01..10..0 or 1..10..01..1 or 1...1
1471 func isPPC64WordRotateMask(v64 int64) bool {
1472         // Isolate rightmost 1 (if none 0) and add.
1473         v := uint32(v64)
1474         vp := (v & -v) + v
1475         // Likewise, for the wrapping case.
1476         vn := ^v
1477         vpn := (vn & -vn) + vn
1478         return (v&vp == 0 || vn&vpn == 0) && v != 0
1479 }
1480
1481 // Compress mask and shift into single value of the form
1482 // me | mb<<8 | rotate<<16 | nbits<<24 where me and mb can
1483 // be used to regenerate the input mask.
1484 func encodePPC64RotateMask(rotate, mask, nbits int64) int64 {
1485         var mb, me, mbn, men int
1486
1487         // Determine boundaries and then decode them
1488         if mask == 0 || ^mask == 0 || rotate >= nbits {
1489                 panic("Invalid PPC64 rotate mask")
1490         } else if nbits == 32 {
1491                 mb = bits.LeadingZeros32(uint32(mask))
1492                 me = 32 - bits.TrailingZeros32(uint32(mask))
1493                 mbn = bits.LeadingZeros32(^uint32(mask))
1494                 men = 32 - bits.TrailingZeros32(^uint32(mask))
1495         } else {
1496                 mb = bits.LeadingZeros64(uint64(mask))
1497                 me = 64 - bits.TrailingZeros64(uint64(mask))
1498                 mbn = bits.LeadingZeros64(^uint64(mask))
1499                 men = 64 - bits.TrailingZeros64(^uint64(mask))
1500         }
1501         // Check for a wrapping mask (e.g bits at 0 and 63)
1502         if mb == 0 && me == int(nbits) {
1503                 // swap the inverted values
1504                 mb, me = men, mbn
1505         }
1506
1507         return int64(me) | int64(mb<<8) | int64(rotate<<16) | int64(nbits<<24)
1508 }
1509
1510 // DecodePPC64RotateMask is the inverse operation of encodePPC64RotateMask.  The values returned as
1511 // mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask.
1512 func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) {
1513         auxint := uint64(sauxint)
1514         rotate = int64((auxint >> 16) & 0xFF)
1515         mb = int64((auxint >> 8) & 0xFF)
1516         me = int64((auxint >> 0) & 0xFF)
1517         nbits := int64((auxint >> 24) & 0xFF)
1518         mask = ((1 << uint(nbits-mb)) - 1) ^ ((1 << uint(nbits-me)) - 1)
1519         if mb > me {
1520                 mask = ^mask
1521         }
1522         if nbits == 32 {
1523                 mask = uint64(uint32(mask))
1524         }
1525
1526         // Fixup ME to match ISA definition.  The second argument to MASK(..,me)
1527         // is inclusive.
1528         me = (me - 1) & (nbits - 1)
1529         return
1530 }
1531
1532 // This verifies that the mask is a set of
1533 // consecutive bits including the least
1534 // significant bit.
1535 func isPPC64ValidShiftMask(v int64) bool {
1536         if (v != 0) && ((v+1)&v) == 0 {
1537                 return true
1538         }
1539         return false
1540 }
1541
1542 func getPPC64ShiftMaskLength(v int64) int64 {
1543         return int64(bits.Len64(uint64(v)))
1544 }
1545
1546 // Decompose a shift right into an equivalent rotate/mask,
1547 // and return mask & m.
1548 func mergePPC64RShiftMask(m, s, nbits int64) int64 {
1549         smask := uint64((1<<uint(nbits))-1) >> uint(s)
1550         return m & int64(smask)
1551 }
1552
1553 // Combine (ANDconst [m] (SRWconst [s])) into (RLWINM [y]) or return 0
1554 func mergePPC64AndSrwi(m, s int64) int64 {
1555         mask := mergePPC64RShiftMask(m, s, 32)
1556         if !isPPC64WordRotateMask(mask) {
1557                 return 0
1558         }
1559         return encodePPC64RotateMask((32-s)&31, mask, 32)
1560 }
1561
1562 // Test if a shift right feeding into a CLRLSLDI can be merged into RLWINM.
1563 // Return the encoded RLWINM constant, or 0 if they cannot be merged.
1564 func mergePPC64ClrlsldiSrw(sld, srw int64) int64 {
1565         mask_1 := uint64(0xFFFFFFFF >> uint(srw))
1566         // for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left.
1567         mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
1568
1569         // Rewrite mask to apply after the final left shift.
1570         mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
1571
1572         r_1 := 32 - srw
1573         r_2 := GetPPC64Shiftsh(sld)
1574         r_3 := (r_1 + r_2) & 31 // This can wrap.
1575
1576         if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
1577                 return 0
1578         }
1579         return encodePPC64RotateMask(int64(r_3), int64(mask_3), 32)
1580 }
1581
1582 // Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM.  Return
1583 // the encoded RLWINM constant, or 0 if they cannot be merged.
1584 func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 {
1585         r_1, _, _, mask_1 := DecodePPC64RotateMask(rlw)
1586         // for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left.
1587         mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
1588
1589         // combine the masks, and adjust for the final left shift.
1590         mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(int64(sld)))
1591         r_2 := GetPPC64Shiftsh(int64(sld))
1592         r_3 := (r_1 + r_2) & 31 // This can wrap.
1593
1594         // Verify the result is still a valid bitmask of <= 32 bits.
1595         if !isPPC64WordRotateMask(int64(mask_3)) || uint64(uint32(mask_3)) != mask_3 {
1596                 return 0
1597         }
1598         return encodePPC64RotateMask(r_3, int64(mask_3), 32)
1599 }
1600
1601 // Compute the encoded RLWINM constant from combining (SLDconst [sld] (SRWconst [srw] x)),
1602 // or return 0 if they cannot be combined.
1603 func mergePPC64SldiSrw(sld, srw int64) int64 {
1604         if sld > srw || srw >= 32 {
1605                 return 0
1606         }
1607         mask_r := uint32(0xFFFFFFFF) >> uint(srw)
1608         mask_l := uint32(0xFFFFFFFF) >> uint(sld)
1609         mask := (mask_r & mask_l) << uint(sld)
1610         return encodePPC64RotateMask((32-srw+sld)&31, int64(mask), 32)
1611 }
1612
1613 // Convenience function to rotate a 32 bit constant value by another constant.
1614 func rotateLeft32(v, rotate int64) int64 {
1615         return int64(bits.RotateLeft32(uint32(v), int(rotate)))
1616 }
1617
1618 func rotateRight64(v, rotate int64) int64 {
1619         return int64(bits.RotateLeft64(uint64(v), int(-rotate)))
1620 }
1621
1622 // encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format.
1623 func armBFAuxInt(lsb, width int64) arm64BitField {
1624         if lsb < 0 || lsb > 63 {
1625                 panic("ARM(64) bit field lsb constant out of range")
1626         }
1627         if width < 1 || lsb+width > 64 {
1628                 panic("ARM(64) bit field width constant out of range")
1629         }
1630         return arm64BitField(width | lsb<<8)
1631 }
1632
1633 // returns the lsb part of the auxInt field of arm64 bitfield ops.
1634 func (bfc arm64BitField) getARM64BFlsb() int64 {
1635         return int64(uint64(bfc) >> 8)
1636 }
1637
1638 // returns the width part of the auxInt field of arm64 bitfield ops.
1639 func (bfc arm64BitField) getARM64BFwidth() int64 {
1640         return int64(bfc) & 0xff
1641 }
1642
1643 // checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask.
1644 func isARM64BFMask(lsb, mask, rshift int64) bool {
1645         shiftedMask := int64(uint64(mask) >> uint64(rshift))
1646         return shiftedMask != 0 && isPowerOfTwo64(shiftedMask+1) && nto(shiftedMask)+lsb < 64
1647 }
1648
1649 // returns the bitfield width of mask >> rshift for arm64 bitfield ops.
1650 func arm64BFWidth(mask, rshift int64) int64 {
1651         shiftedMask := int64(uint64(mask) >> uint64(rshift))
1652         if shiftedMask == 0 {
1653                 panic("ARM64 BF mask is zero")
1654         }
1655         return nto(shiftedMask)
1656 }
1657
1658 // sizeof returns the size of t in bytes.
1659 // It will panic if t is not a *types.Type.
1660 func sizeof(t interface{}) int64 {
1661         return t.(*types.Type).Size()
1662 }
1663
1664 // registerizable reports whether t is a primitive type that fits in
1665 // a register. It assumes float64 values will always fit into registers
1666 // even if that isn't strictly true.
1667 func registerizable(b *Block, typ *types.Type) bool {
1668         if typ.IsPtrShaped() || typ.IsFloat() || typ.IsBoolean() {
1669                 return true
1670         }
1671         if typ.IsInteger() {
1672                 return typ.Size() <= b.Func.Config.RegSize
1673         }
1674         return false
1675 }
1676
1677 // needRaceCleanup reports whether this call to racefuncenter/exit isn't needed.
1678 func needRaceCleanup(sym *AuxCall, v *Value) bool {
1679         f := v.Block.Func
1680         if !f.Config.Race {
1681                 return false
1682         }
1683         if !isSameCall(sym, "runtime.racefuncenter") && !isSameCall(sym, "runtime.racefuncexit") {
1684                 return false
1685         }
1686         for _, b := range f.Blocks {
1687                 for _, v := range b.Values {
1688                         switch v.Op {
1689                         case OpStaticCall, OpStaticLECall:
1690                                 // Check for racefuncenter will encounter racefuncexit and vice versa.
1691                                 // Allow calls to panic*
1692                                 s := v.Aux.(*AuxCall).Fn.String()
1693                                 switch s {
1694                                 case "runtime.racefuncenter", "runtime.racefuncexit",
1695                                         "runtime.panicdivide", "runtime.panicwrap",
1696                                         "runtime.panicshift":
1697                                         continue
1698                                 }
1699                                 // If we encountered any call, we need to keep racefunc*,
1700                                 // for accurate stacktraces.
1701                                 return false
1702                         case OpPanicBounds, OpPanicExtend:
1703                                 // Note: these are panic generators that are ok (like the static calls above).
1704                         case OpClosureCall, OpInterCall, OpClosureLECall, OpInterLECall:
1705                                 // We must keep the race functions if there are any other call types.
1706                                 return false
1707                         }
1708                 }
1709         }
1710         if isSameCall(sym, "runtime.racefuncenter") {
1711                 // TODO REGISTER ABI this needs to be cleaned up.
1712                 // If we're removing racefuncenter, remove its argument as well.
1713                 if v.Args[0].Op != OpStore {
1714                         if v.Op == OpStaticLECall {
1715                                 // there is no store, yet.
1716                                 return true
1717                         }
1718                         return false
1719                 }
1720                 mem := v.Args[0].Args[2]
1721                 v.Args[0].reset(OpCopy)
1722                 v.Args[0].AddArg(mem)
1723         }
1724         return true
1725 }
1726
1727 // symIsRO reports whether sym is a read-only global.
1728 func symIsRO(sym interface{}) bool {
1729         lsym := sym.(*obj.LSym)
1730         return lsym.Type == objabi.SRODATA && len(lsym.R) == 0
1731 }
1732
1733 // symIsROZero reports whether sym is a read-only global whose data contains all zeros.
1734 func symIsROZero(sym Sym) bool {
1735         lsym := sym.(*obj.LSym)
1736         if lsym.Type != objabi.SRODATA || len(lsym.R) != 0 {
1737                 return false
1738         }
1739         for _, b := range lsym.P {
1740                 if b != 0 {
1741                         return false
1742                 }
1743         }
1744         return true
1745 }
1746
1747 // read8 reads one byte from the read-only global sym at offset off.
1748 func read8(sym interface{}, off int64) uint8 {
1749         lsym := sym.(*obj.LSym)
1750         if off >= int64(len(lsym.P)) || off < 0 {
1751                 // Invalid index into the global sym.
1752                 // This can happen in dead code, so we don't want to panic.
1753                 // Just return any value, it will eventually get ignored.
1754                 // See issue 29215.
1755                 return 0
1756         }
1757         return lsym.P[off]
1758 }
1759
1760 // read16 reads two bytes from the read-only global sym at offset off.
1761 func read16(sym interface{}, off int64, byteorder binary.ByteOrder) uint16 {
1762         lsym := sym.(*obj.LSym)
1763         // lsym.P is written lazily.
1764         // Bytes requested after the end of lsym.P are 0.
1765         var src []byte
1766         if 0 <= off && off < int64(len(lsym.P)) {
1767                 src = lsym.P[off:]
1768         }
1769         buf := make([]byte, 2)
1770         copy(buf, src)
1771         return byteorder.Uint16(buf)
1772 }
1773
1774 // read32 reads four bytes from the read-only global sym at offset off.
1775 func read32(sym interface{}, off int64, byteorder binary.ByteOrder) uint32 {
1776         lsym := sym.(*obj.LSym)
1777         var src []byte
1778         if 0 <= off && off < int64(len(lsym.P)) {
1779                 src = lsym.P[off:]
1780         }
1781         buf := make([]byte, 4)
1782         copy(buf, src)
1783         return byteorder.Uint32(buf)
1784 }
1785
1786 // read64 reads eight bytes from the read-only global sym at offset off.
1787 func read64(sym interface{}, off int64, byteorder binary.ByteOrder) uint64 {
1788         lsym := sym.(*obj.LSym)
1789         var src []byte
1790         if 0 <= off && off < int64(len(lsym.P)) {
1791                 src = lsym.P[off:]
1792         }
1793         buf := make([]byte, 8)
1794         copy(buf, src)
1795         return byteorder.Uint64(buf)
1796 }
1797
1798 // sequentialAddresses reports true if it can prove that x + n == y
1799 func sequentialAddresses(x, y *Value, n int64) bool {
1800         if x == y && n == 0 {
1801                 return true
1802         }
1803         if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil &&
1804                 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1805                         x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1806                 return true
1807         }
1808         if x.Op == Op386LEAL1 && y.Op == Op386LEAL1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
1809                 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1810                         x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1811                 return true
1812         }
1813         if x.Op == OpAMD64ADDQ && y.Op == OpAMD64LEAQ1 && y.AuxInt == n && y.Aux == nil &&
1814                 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1815                         x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1816                 return true
1817         }
1818         if x.Op == OpAMD64LEAQ1 && y.Op == OpAMD64LEAQ1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
1819                 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1820                         x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1821                 return true
1822         }
1823         return false
1824 }
1825
1826 // flagConstant represents the result of a compile-time comparison.
1827 // The sense of these flags does not necessarily represent the hardware's notion
1828 // of a flags register - these are just a compile-time construct.
1829 // We happen to match the semantics to those of arm/arm64.
1830 // Note that these semantics differ from x86: the carry flag has the opposite
1831 // sense on a subtraction!
1832 //
1833 //      On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C.
1834 //      On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C.
1835 //       (because it does x + ^y + C).
1836 //
1837 // See https://en.wikipedia.org/wiki/Carry_flag#Vs._borrow_flag
1838 type flagConstant uint8
1839
1840 // N reports whether the result of an operation is negative (high bit set).
1841 func (fc flagConstant) N() bool {
1842         return fc&1 != 0
1843 }
1844
1845 // Z reports whether the result of an operation is 0.
1846 func (fc flagConstant) Z() bool {
1847         return fc&2 != 0
1848 }
1849
1850 // C reports whether an unsigned add overflowed (carry), or an
1851 // unsigned subtract did not underflow (borrow).
1852 func (fc flagConstant) C() bool {
1853         return fc&4 != 0
1854 }
1855
1856 // V reports whether a signed operation overflowed or underflowed.
1857 func (fc flagConstant) V() bool {
1858         return fc&8 != 0
1859 }
1860
1861 func (fc flagConstant) eq() bool {
1862         return fc.Z()
1863 }
1864 func (fc flagConstant) ne() bool {
1865         return !fc.Z()
1866 }
1867 func (fc flagConstant) lt() bool {
1868         return fc.N() != fc.V()
1869 }
1870 func (fc flagConstant) le() bool {
1871         return fc.Z() || fc.lt()
1872 }
1873 func (fc flagConstant) gt() bool {
1874         return !fc.Z() && fc.ge()
1875 }
1876 func (fc flagConstant) ge() bool {
1877         return fc.N() == fc.V()
1878 }
1879 func (fc flagConstant) ult() bool {
1880         return !fc.C()
1881 }
1882 func (fc flagConstant) ule() bool {
1883         return fc.Z() || fc.ult()
1884 }
1885 func (fc flagConstant) ugt() bool {
1886         return !fc.Z() && fc.uge()
1887 }
1888 func (fc flagConstant) uge() bool {
1889         return fc.C()
1890 }
1891
1892 func (fc flagConstant) ltNoov() bool {
1893         return fc.lt() && !fc.V()
1894 }
1895 func (fc flagConstant) leNoov() bool {
1896         return fc.le() && !fc.V()
1897 }
1898 func (fc flagConstant) gtNoov() bool {
1899         return fc.gt() && !fc.V()
1900 }
1901 func (fc flagConstant) geNoov() bool {
1902         return fc.ge() && !fc.V()
1903 }
1904
1905 func (fc flagConstant) String() string {
1906         return fmt.Sprintf("N=%v,Z=%v,C=%v,V=%v", fc.N(), fc.Z(), fc.C(), fc.V())
1907 }
1908
1909 type flagConstantBuilder struct {
1910         N bool
1911         Z bool
1912         C bool
1913         V bool
1914 }
1915
1916 func (fcs flagConstantBuilder) encode() flagConstant {
1917         var fc flagConstant
1918         if fcs.N {
1919                 fc |= 1
1920         }
1921         if fcs.Z {
1922                 fc |= 2
1923         }
1924         if fcs.C {
1925                 fc |= 4
1926         }
1927         if fcs.V {
1928                 fc |= 8
1929         }
1930         return fc
1931 }
1932
1933 // Note: addFlags(x,y) != subFlags(x,-y) in some situations:
1934 //  - the results of the C flag are different
1935 //  - the results of the V flag when y==minint are different
1936
1937 // addFlags64 returns the flags that would be set from computing x+y.
1938 func addFlags64(x, y int64) flagConstant {
1939         var fcb flagConstantBuilder
1940         fcb.Z = x+y == 0
1941         fcb.N = x+y < 0
1942         fcb.C = uint64(x+y) < uint64(x)
1943         fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
1944         return fcb.encode()
1945 }
1946
1947 // subFlags64 returns the flags that would be set from computing x-y.
1948 func subFlags64(x, y int64) flagConstant {
1949         var fcb flagConstantBuilder
1950         fcb.Z = x-y == 0
1951         fcb.N = x-y < 0
1952         fcb.C = uint64(y) <= uint64(x) // This code follows the arm carry flag model.
1953         fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
1954         return fcb.encode()
1955 }
1956
1957 // addFlags32 returns the flags that would be set from computing x+y.
1958 func addFlags32(x, y int32) flagConstant {
1959         var fcb flagConstantBuilder
1960         fcb.Z = x+y == 0
1961         fcb.N = x+y < 0
1962         fcb.C = uint32(x+y) < uint32(x)
1963         fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
1964         return fcb.encode()
1965 }
1966
1967 // subFlags32 returns the flags that would be set from computing x-y.
1968 func subFlags32(x, y int32) flagConstant {
1969         var fcb flagConstantBuilder
1970         fcb.Z = x-y == 0
1971         fcb.N = x-y < 0
1972         fcb.C = uint32(y) <= uint32(x) // This code follows the arm carry flag model.
1973         fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
1974         return fcb.encode()
1975 }
1976
1977 // logicFlags64 returns flags set to the sign/zeroness of x.
1978 // C and V are set to false.
1979 func logicFlags64(x int64) flagConstant {
1980         var fcb flagConstantBuilder
1981         fcb.Z = x == 0
1982         fcb.N = x < 0
1983         return fcb.encode()
1984 }
1985
1986 // logicFlags32 returns flags set to the sign/zeroness of x.
1987 // C and V are set to false.
1988 func logicFlags32(x int32) flagConstant {
1989         var fcb flagConstantBuilder
1990         fcb.Z = x == 0
1991         fcb.N = x < 0
1992         return fcb.encode()
1993 }
1994
1995 func makeJumpTableSym(b *Block) *obj.LSym {
1996         s := base.Ctxt.Lookup(fmt.Sprintf("%s.jump%d", b.Func.fe.LSym(), b.ID))
1997         s.Set(obj.AttrDuplicateOK, true)
1998         s.Set(obj.AttrLocal, true)
1999         return s
2000 }
2001
2002 // canRotate reports whether the architecture supports
2003 // rotates of integer registers with the given number of bits.
2004 func canRotate(c *Config, bits int64) bool {
2005         if bits > c.PtrSize*8 {
2006                 // Don't rewrite to rotates bigger than the machine word.
2007                 return false
2008         }
2009         switch c.arch {
2010         case "386", "amd64", "arm64":
2011                 return true
2012         case "arm", "s390x", "ppc64", "ppc64le", "wasm", "loong64":
2013                 return bits >= 32
2014         default:
2015                 return false
2016         }
2017 }
2018
2019 // isARM64bitcon reports whether a constant can be encoded into a logical instruction.
2020 func isARM64bitcon(x uint64) bool {
2021         if x == 1<<64-1 || x == 0 {
2022                 return false
2023         }
2024         // determine the period and sign-extend a unit to 64 bits
2025         switch {
2026         case x != x>>32|x<<32:
2027                 // period is 64
2028                 // nothing to do
2029         case x != x>>16|x<<48:
2030                 // period is 32
2031                 x = uint64(int64(int32(x)))
2032         case x != x>>8|x<<56:
2033                 // period is 16
2034                 x = uint64(int64(int16(x)))
2035         case x != x>>4|x<<60:
2036                 // period is 8
2037                 x = uint64(int64(int8(x)))
2038         default:
2039                 // period is 4 or 2, always true
2040                 // 0001, 0010, 0100, 1000 -- 0001 rotate
2041                 // 0011, 0110, 1100, 1001 -- 0011 rotate
2042                 // 0111, 1011, 1101, 1110 -- 0111 rotate
2043                 // 0101, 1010             -- 01   rotate, repeat
2044                 return true
2045         }
2046         return sequenceOfOnes(x) || sequenceOfOnes(^x)
2047 }
2048
2049 // sequenceOfOnes tests whether a constant is a sequence of ones in binary, with leading and trailing zeros.
2050 func sequenceOfOnes(x uint64) bool {
2051         y := x & -x // lowest set bit of x. x is good iff x+y is a power of 2
2052         y += x
2053         return (y-1)&y == 0
2054 }
2055
2056 // isARM64addcon reports whether x can be encoded as the immediate value in an ADD or SUB instruction.
2057 func isARM64addcon(v int64) bool {
2058         /* uimm12 or uimm24? */
2059         if v < 0 {
2060                 return false
2061         }
2062         if (v & 0xFFF) == 0 {
2063                 v >>= 12
2064         }
2065         return v <= 0xFFF
2066 }