src/cmd/compile/internal/ssa/rewrite.go

   1 // Copyright 2015 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package ssa
   6
   7 import (
   8         "cmd/compile/internal/base"
   9         "cmd/compile/internal/logopt"
  10         "cmd/compile/internal/types"
  11         "cmd/internal/obj"
  12         "cmd/internal/obj/s390x"
  13         "cmd/internal/objabi"
  14         "cmd/internal/src"
  15         "encoding/binary"
  16         "fmt"
  17         "io"
  18         "math"
  19         "math/bits"
  20         "os"
  21         "path/filepath"
  22 )
  23
  24 type deadValueChoice bool
  25
  26 const (
  27         leaveDeadValues  deadValueChoice = false
  28         removeDeadValues                 = true
  29 )
  30
  31 // deadcode indicates whether rewrite should try to remove any values that become dead.
  32 func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValueChoice) {
  33         // repeat rewrites until we find no more rewrites
  34         pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block
  35         pendingLines.clear()
  36         debug := f.pass.debug
  37         if debug > 1 {
  38                 fmt.Printf("%s: rewriting for %s\n", f.pass.name, f.Name)
  39         }
  40         var iters int
  41         var states map[string]bool
  42         for {
  43                 change := false
  44                 deadChange := false
  45                 for _, b := range f.Blocks {
  46                         var b0 *Block
  47                         if debug > 1 {
  48                                 b0 = new(Block)
  49                                 *b0 = *b
  50                                 b0.Succs = append([]Edge{}, b.Succs...) // make a new copy, not aliasing
  51                         }
  52                         for i, c := range b.ControlValues() {
  53                                 for c.Op == OpCopy {
  54                                         c = c.Args[0]
  55                                         b.ReplaceControl(i, c)
  56                                 }
  57                         }
  58                         if rb(b) {
  59                                 change = true
  60                                 if debug > 1 {
  61                                         fmt.Printf("rewriting %s  ->  %s\n", b0.LongString(), b.LongString())
  62                                 }
  63                         }
  64                         for j, v := range b.Values {
  65                                 var v0 *Value
  66                                 if debug > 1 {
  67                                         v0 = new(Value)
  68                                         *v0 = *v
  69                                         v0.Args = append([]*Value{}, v.Args...) // make a new copy, not aliasing
  70                                 }
  71                                 if v.Uses == 0 && v.removeable() {
  72                                         if v.Op != OpInvalid && deadcode == removeDeadValues {
  73                                                 // Reset any values that are now unused, so that we decrement
  74                                                 // the use count of all of its arguments.
  75                                                 // Not quite a deadcode pass, because it does not handle cycles.
  76                                                 // But it should help Uses==1 rules to fire.
  77                                                 v.reset(OpInvalid)
  78                                                 deadChange = true
  79                                         }
  80                                         // No point rewriting values which aren't used.
  81                                         continue
  82                                 }
  83
  84                                 vchange := phielimValue(v)
  85                                 if vchange && debug > 1 {
  86                                         fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
  87                                 }
  88
  89                                 // Eliminate copy inputs.
  90                                 // If any copy input becomes unused, mark it
  91                                 // as invalid and discard its argument. Repeat
  92                                 // recursively on the discarded argument.
  93                                 // This phase helps remove phantom "dead copy" uses
  94                                 // of a value so that a x.Uses==1 rule condition
  95                                 // fires reliably.
  96                                 for i, a := range v.Args {
  97                                         if a.Op != OpCopy {
  98                                                 continue
  99                                         }
 100                                         aa := copySource(a)
 101                                         v.SetArg(i, aa)
 102                                         // If a, a copy, has a line boundary indicator, attempt to find a new value
 103                                         // to hold it.  The first candidate is the value that will replace a (aa),
 104                                         // if it shares the same block and line and is eligible.
 105                                         // The second option is v, which has a as an input.  Because aa is earlier in
 106                                         // the data flow, it is the better choice.
 107                                         if a.Pos.IsStmt() == src.PosIsStmt {
 108                                                 if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt {
 109                                                         aa.Pos = aa.Pos.WithIsStmt()
 110                                                 } else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt {
 111                                                         v.Pos = v.Pos.WithIsStmt()
 112                                                 } else {
 113                                                         // Record the lost line and look for a new home after all rewrites are complete.
 114                                                         // TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same
 115                                                         // line to appear in more than one block, but only one block is stored, so if both end
 116                                                         // up here, then one will be lost.
 117                                                         pendingLines.set(a.Pos, int32(a.Block.ID))
 118                                                 }
 119                                                 a.Pos = a.Pos.WithNotStmt()
 120                                         }
 121                                         vchange = true
 122                                         for a.Uses == 0 {
 123                                                 b := a.Args[0]
 124                                                 a.reset(OpInvalid)
 125                                                 a = b
 126                                         }
 127                                 }
 128                                 if vchange && debug > 1 {
 129                                         fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
 130                                 }
 131
 132                                 // apply rewrite function
 133                                 if rv(v) {
 134                                         vchange = true
 135                                         // If value changed to a poor choice for a statement boundary, move the boundary
 136                                         if v.Pos.IsStmt() == src.PosIsStmt {
 137                                                 if k := nextGoodStatementIndex(v, j, b); k != j {
 138                                                         v.Pos = v.Pos.WithNotStmt()
 139                                                         b.Values[k].Pos = b.Values[k].Pos.WithIsStmt()
 140                                                 }
 141                                         }
 142                                 }
 143
 144                                 change = change || vchange
 145                                 if vchange && debug > 1 {
 146                                         fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
 147                                 }
 148                         }
 149                 }
 150                 if !change && !deadChange {
 151                         break
 152                 }
 153                 iters++
 154                 if (iters > 1000 || debug >= 2) && change {
 155                         // We've done a suspiciously large number of rewrites (or we're in debug mode).
 156                         // As of Sep 2021, 90% of rewrites complete in 4 iterations or fewer
 157                         // and the maximum value encountered during make.bash is 12.
 158                         // Start checking for cycles. (This is too expensive to do routinely.)
 159                         // Note: we avoid this path for deadChange-only iterations, to fix #51639.
 160                         if states == nil {
 161                                 states = make(map[string]bool)
 162                         }
 163                         h := f.rewriteHash()
 164                         if _, ok := states[h]; ok {
 165                                 // We've found a cycle.
 166                                 // To diagnose it, set debug to 2 and start again,
 167                                 // so that we'll print all rules applied until we complete another cycle.
 168                                 // If debug is already >= 2, we've already done that, so it's time to crash.
 169                                 if debug < 2 {
 170                                         debug = 2
 171                                         states = make(map[string]bool)
 172                                 } else {
 173                                         f.Fatalf("rewrite cycle detected")
 174                                 }
 175                         }
 176                         states[h] = true
 177                 }
 178         }
 179         // remove clobbered values
 180         for _, b := range f.Blocks {
 181                 j := 0
 182                 for i, v := range b.Values {
 183                         vl := v.Pos
 184                         if v.Op == OpInvalid {
 185                                 if v.Pos.IsStmt() == src.PosIsStmt {
 186                                         pendingLines.set(vl, int32(b.ID))
 187                                 }
 188                                 f.freeValue(v)
 189                                 continue
 190                         }
 191                         if v.Pos.IsStmt() != src.PosNotStmt && !notStmtBoundary(v.Op) && pendingLines.get(vl) == int32(b.ID) {
 192                                 pendingLines.remove(vl)
 193                                 v.Pos = v.Pos.WithIsStmt()
 194                         }
 195                         if i != j {
 196                                 b.Values[j] = v
 197                         }
 198                         j++
 199                 }
 200                 if pendingLines.get(b.Pos) == int32(b.ID) {
 201                         b.Pos = b.Pos.WithIsStmt()
 202                         pendingLines.remove(b.Pos)
 203                 }
 204                 b.truncateValues(j)
 205         }
 206 }
 207
 208 // Common functions called from rewriting rules
 209
 210 func is64BitFloat(t *types.Type) bool {
 211         return t.Size() == 8 && t.IsFloat()
 212 }
 213
 214 func is32BitFloat(t *types.Type) bool {
 215         return t.Size() == 4 && t.IsFloat()
 216 }
 217
 218 func is64BitInt(t *types.Type) bool {
 219         return t.Size() == 8 && t.IsInteger()
 220 }
 221
 222 func is32BitInt(t *types.Type) bool {
 223         return t.Size() == 4 && t.IsInteger()
 224 }
 225
 226 func is16BitInt(t *types.Type) bool {
 227         return t.Size() == 2 && t.IsInteger()
 228 }
 229
 230 func is8BitInt(t *types.Type) bool {
 231         return t.Size() == 1 && t.IsInteger()
 232 }
 233
 234 func isPtr(t *types.Type) bool {
 235         return t.IsPtrShaped()
 236 }
 237
 238 func isSigned(t *types.Type) bool {
 239         return t.IsSigned()
 240 }
 241
 242 // mergeSym merges two symbolic offsets. There is no real merging of
 243 // offsets, we just pick the non-nil one.
 244 func mergeSym(x, y Sym) Sym {
 245         if x == nil {
 246                 return y
 247         }
 248         if y == nil {
 249                 return x
 250         }
 251         panic(fmt.Sprintf("mergeSym with two non-nil syms %v %v", x, y))
 252 }
 253
 254 func canMergeSym(x, y Sym) bool {
 255         return x == nil || y == nil
 256 }
 257
 258 // canMergeLoadClobber reports whether the load can be merged into target without
 259 // invalidating the schedule.
 260 // It also checks that the other non-load argument x is something we
 261 // are ok with clobbering.
 262 func canMergeLoadClobber(target, load, x *Value) bool {
 263         // The register containing x is going to get clobbered.
 264         // Don't merge if we still need the value of x.
 265         // We don't have liveness information here, but we can
 266         // approximate x dying with:
 267         //  1) target is x's only use.
 268         //  2) target is not in a deeper loop than x.
 269         if x.Uses != 1 {
 270                 return false
 271         }
 272         loopnest := x.Block.Func.loopnest()
 273         loopnest.calculateDepths()
 274         if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
 275                 return false
 276         }
 277         return canMergeLoad(target, load)
 278 }
 279
 280 // canMergeLoad reports whether the load can be merged into target without
 281 // invalidating the schedule.
 282 func canMergeLoad(target, load *Value) bool {
 283         if target.Block.ID != load.Block.ID {
 284                 // If the load is in a different block do not merge it.
 285                 return false
 286         }
 287
 288         // We can't merge the load into the target if the load
 289         // has more than one use.
 290         if load.Uses != 1 {
 291                 return false
 292         }
 293
 294         mem := load.MemoryArg()
 295
 296         // We need the load's memory arg to still be alive at target. That
 297         // can't be the case if one of target's args depends on a memory
 298         // state that is a successor of load's memory arg.
 299         //
 300         // For example, it would be invalid to merge load into target in
 301         // the following situation because newmem has killed oldmem
 302         // before target is reached:
 303         //     load = read ... oldmem
 304         //   newmem = write ... oldmem
 305         //     arg0 = read ... newmem
 306         //   target = add arg0 load
 307         //
 308         // If the argument comes from a different block then we can exclude
 309         // it immediately because it must dominate load (which is in the
 310         // same block as target).
 311         var args []*Value
 312         for _, a := range target.Args {
 313                 if a != load && a.Block.ID == target.Block.ID {
 314                         args = append(args, a)
 315                 }
 316         }
 317
 318         // memPreds contains memory states known to be predecessors of load's
 319         // memory state. It is lazily initialized.
 320         var memPreds map[*Value]bool
 321         for i := 0; len(args) > 0; i++ {
 322                 const limit = 100
 323                 if i >= limit {
 324                         // Give up if we have done a lot of iterations.
 325                         return false
 326                 }
 327                 v := args[len(args)-1]
 328                 args = args[:len(args)-1]
 329                 if target.Block.ID != v.Block.ID {
 330                         // Since target and load are in the same block
 331                         // we can stop searching when we leave the block.
 332                         continue
 333                 }
 334                 if v.Op == OpPhi {
 335                         // A Phi implies we have reached the top of the block.
 336                         // The memory phi, if it exists, is always
 337                         // the first logical store in the block.
 338                         continue
 339                 }
 340                 if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
 341                         // We could handle this situation however it is likely
 342                         // to be very rare.
 343                         return false
 344                 }
 345                 if v.Op.SymEffect()&SymAddr != 0 {
 346                         // This case prevents an operation that calculates the
 347                         // address of a local variable from being forced to schedule
 348                         // before its corresponding VarDef.
 349                         // See issue 28445.
 350                         //   v1 = LOAD ...
 351                         //   v2 = VARDEF
 352                         //   v3 = LEAQ
 353                         //   v4 = CMPQ v1 v3
 354                         // We don't want to combine the CMPQ with the load, because
 355                         // that would force the CMPQ to schedule before the VARDEF, which
 356                         // in turn requires the LEAQ to schedule before the VARDEF.
 357                         return false
 358                 }
 359                 if v.Type.IsMemory() {
 360                         if memPreds == nil {
 361                                 // Initialise a map containing memory states
 362                                 // known to be predecessors of load's memory
 363                                 // state.
 364                                 memPreds = make(map[*Value]bool)
 365                                 m := mem
 366                                 const limit = 50
 367                                 for i := 0; i < limit; i++ {
 368                                         if m.Op == OpPhi {
 369                                                 // The memory phi, if it exists, is always
 370                                                 // the first logical store in the block.
 371                                                 break
 372                                         }
 373                                         if m.Block.ID != target.Block.ID {
 374                                                 break
 375                                         }
 376                                         if !m.Type.IsMemory() {
 377                                                 break
 378                                         }
 379                                         memPreds[m] = true
 380                                         if len(m.Args) == 0 {
 381                                                 break
 382                                         }
 383                                         m = m.MemoryArg()
 384                                 }
 385                         }
 386
 387                         // We can merge if v is a predecessor of mem.
 388                         //
 389                         // For example, we can merge load into target in the
 390                         // following scenario:
 391                         //      x = read ... v
 392                         //    mem = write ... v
 393                         //   load = read ... mem
 394                         // target = add x load
 395                         if memPreds[v] {
 396                                 continue
 397                         }
 398                         return false
 399                 }
 400                 if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
 401                         // If v takes mem as an input then we know mem
 402                         // is valid at this point.
 403                         continue
 404                 }
 405                 for _, a := range v.Args {
 406                         if target.Block.ID == a.Block.ID {
 407                                 args = append(args, a)
 408                         }
 409                 }
 410         }
 411
 412         return true
 413 }
 414
 415 // isSameCall reports whether sym is the same as the given named symbol.
 416 func isSameCall(sym interface{}, name string) bool {
 417         fn := sym.(*AuxCall).Fn
 418         return fn != nil && fn.String() == name
 419 }
 420
 421 // canLoadUnaligned reports if the architecture supports unaligned load operations.
 422 func canLoadUnaligned(c *Config) bool {
 423         return c.ctxt.Arch.Alignment == 1
 424 }
 425
 426 // nlz returns the number of leading zeros.
 427 func nlz64(x int64) int { return bits.LeadingZeros64(uint64(x)) }
 428 func nlz32(x int32) int { return bits.LeadingZeros32(uint32(x)) }
 429 func nlz16(x int16) int { return bits.LeadingZeros16(uint16(x)) }
 430 func nlz8(x int8) int   { return bits.LeadingZeros8(uint8(x)) }
 431
 432 // ntzX returns the number of trailing zeros.
 433 func ntz64(x int64) int { return bits.TrailingZeros64(uint64(x)) }
 434 func ntz32(x int32) int { return bits.TrailingZeros32(uint32(x)) }
 435 func ntz16(x int16) int { return bits.TrailingZeros16(uint16(x)) }
 436 func ntz8(x int8) int   { return bits.TrailingZeros8(uint8(x)) }
 437
 438 func oneBit(x int64) bool   { return x&(x-1) == 0 && x != 0 }
 439 func oneBit8(x int8) bool   { return x&(x-1) == 0 && x != 0 }
 440 func oneBit16(x int16) bool { return x&(x-1) == 0 && x != 0 }
 441 func oneBit32(x int32) bool { return x&(x-1) == 0 && x != 0 }
 442 func oneBit64(x int64) bool { return x&(x-1) == 0 && x != 0 }
 443
 444 // nto returns the number of trailing ones.
 445 func nto(x int64) int64 {
 446         return int64(ntz64(^x))
 447 }
 448
 449 // logX returns logarithm of n base 2.
 450 // n must be a positive power of 2 (isPowerOfTwoX returns true).
 451 func log8(n int8) int64 {
 452         return int64(bits.Len8(uint8(n))) - 1
 453 }
 454 func log16(n int16) int64 {
 455         return int64(bits.Len16(uint16(n))) - 1
 456 }
 457 func log32(n int32) int64 {
 458         return int64(bits.Len32(uint32(n))) - 1
 459 }
 460 func log64(n int64) int64 {
 461         return int64(bits.Len64(uint64(n))) - 1
 462 }
 463
 464 // log2uint32 returns logarithm in base 2 of uint32(n), with log2(0) = -1.
 465 // Rounds down.
 466 func log2uint32(n int64) int64 {
 467         return int64(bits.Len32(uint32(n))) - 1
 468 }
 469
 470 // isPowerOfTwo functions report whether n is a power of 2.
 471 func isPowerOfTwo8(n int8) bool {
 472         return n > 0 && n&(n-1) == 0
 473 }
 474 func isPowerOfTwo16(n int16) bool {
 475         return n > 0 && n&(n-1) == 0
 476 }
 477 func isPowerOfTwo32(n int32) bool {
 478         return n > 0 && n&(n-1) == 0
 479 }
 480 func isPowerOfTwo64(n int64) bool {
 481         return n > 0 && n&(n-1) == 0
 482 }
 483
 484 // isUint64PowerOfTwo reports whether uint64(n) is a power of 2.
 485 func isUint64PowerOfTwo(in int64) bool {
 486         n := uint64(in)
 487         return n > 0 && n&(n-1) == 0
 488 }
 489
 490 // isUint32PowerOfTwo reports whether uint32(n) is a power of 2.
 491 func isUint32PowerOfTwo(in int64) bool {
 492         n := uint64(uint32(in))
 493         return n > 0 && n&(n-1) == 0
 494 }
 495
 496 // is32Bit reports whether n can be represented as a signed 32 bit integer.
 497 func is32Bit(n int64) bool {
 498         return n == int64(int32(n))
 499 }
 500
 501 // is16Bit reports whether n can be represented as a signed 16 bit integer.
 502 func is16Bit(n int64) bool {
 503         return n == int64(int16(n))
 504 }
 505
 506 // is8Bit reports whether n can be represented as a signed 8 bit integer.
 507 func is8Bit(n int64) bool {
 508         return n == int64(int8(n))
 509 }
 510
 511 // isU8Bit reports whether n can be represented as an unsigned 8 bit integer.
 512 func isU8Bit(n int64) bool {
 513         return n == int64(uint8(n))
 514 }
 515
 516 // isU12Bit reports whether n can be represented as an unsigned 12 bit integer.
 517 func isU12Bit(n int64) bool {
 518         return 0 <= n && n < (1<<12)
 519 }
 520
 521 // isU16Bit reports whether n can be represented as an unsigned 16 bit integer.
 522 func isU16Bit(n int64) bool {
 523         return n == int64(uint16(n))
 524 }
 525
 526 // isU32Bit reports whether n can be represented as an unsigned 32 bit integer.
 527 func isU32Bit(n int64) bool {
 528         return n == int64(uint32(n))
 529 }
 530
 531 // is20Bit reports whether n can be represented as a signed 20 bit integer.
 532 func is20Bit(n int64) bool {
 533         return -(1<<19) <= n && n < (1<<19)
 534 }
 535
 536 // b2i translates a boolean value to 0 or 1 for assigning to auxInt.
 537 func b2i(b bool) int64 {
 538         if b {
 539                 return 1
 540         }
 541         return 0
 542 }
 543
 544 // b2i32 translates a boolean value to 0 or 1.
 545 func b2i32(b bool) int32 {
 546         if b {
 547                 return 1
 548         }
 549         return 0
 550 }
 551
 552 // shiftIsBounded reports whether (left/right) shift Value v is known to be bounded.
 553 // A shift is bounded if it is shifting by less than the width of the shifted value.
 554 func shiftIsBounded(v *Value) bool {
 555         return v.AuxInt != 0
 556 }
 557
 558 // canonLessThan returns whether x is "ordered" less than y, for purposes of normalizing
 559 // generated code as much as possible.
 560 func canonLessThan(x, y *Value) bool {
 561         if x.Op != y.Op {
 562                 return x.Op < y.Op
 563         }
 564         if !x.Pos.SameFileAndLine(y.Pos) {
 565                 return x.Pos.Before(y.Pos)
 566         }
 567         return x.ID < y.ID
 568 }
 569
 570 // truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern
 571 // of the mantissa. It will panic if the truncation results in lost information.
 572 func truncate64Fto32F(f float64) float32 {
 573         if !isExactFloat32(f) {
 574                 panic("truncate64Fto32F: truncation is not exact")
 575         }
 576         if !math.IsNaN(f) {
 577                 return float32(f)
 578         }
 579         // NaN bit patterns aren't necessarily preserved across conversion
 580         // instructions so we need to do the conversion manually.
 581         b := math.Float64bits(f)
 582         m := b & ((1 << 52) - 1) // mantissa (a.k.a. significand)
 583         //          | sign                  | exponent   | mantissa       |
 584         r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23)))
 585         return math.Float32frombits(r)
 586 }
 587
 588 // extend32Fto64F converts a float32 value to a float64 value preserving the bit
 589 // pattern of the mantissa.
 590 func extend32Fto64F(f float32) float64 {
 591         if !math.IsNaN(float64(f)) {
 592                 return float64(f)
 593         }
 594         // NaN bit patterns aren't necessarily preserved across conversion
 595         // instructions so we need to do the conversion manually.
 596         b := uint64(math.Float32bits(f))
 597         //   | sign                  | exponent      | mantissa                    |
 598         r := ((b << 32) & (1 << 63)) | (0x7ff << 52) | ((b & 0x7fffff) << (52 - 23))
 599         return math.Float64frombits(r)
 600 }
 601
 602 // DivisionNeedsFixUp reports whether the division needs fix-up code.
 603 func DivisionNeedsFixUp(v *Value) bool {
 604         return v.AuxInt == 0
 605 }
 606
 607 // auxFrom64F encodes a float64 value so it can be stored in an AuxInt.
 608 func auxFrom64F(f float64) int64 {
 609         if f != f {
 610                 panic("can't encode a NaN in AuxInt field")
 611         }
 612         return int64(math.Float64bits(f))
 613 }
 614
 615 // auxFrom32F encodes a float32 value so it can be stored in an AuxInt.
 616 func auxFrom32F(f float32) int64 {
 617         if f != f {
 618                 panic("can't encode a NaN in AuxInt field")
 619         }
 620         return int64(math.Float64bits(extend32Fto64F(f)))
 621 }
 622
 623 // auxTo32F decodes a float32 from the AuxInt value provided.
 624 func auxTo32F(i int64) float32 {
 625         return truncate64Fto32F(math.Float64frombits(uint64(i)))
 626 }
 627
 628 // auxTo64F decodes a float64 from the AuxInt value provided.
 629 func auxTo64F(i int64) float64 {
 630         return math.Float64frombits(uint64(i))
 631 }
 632
 633 func auxIntToBool(i int64) bool {
 634         if i == 0 {
 635                 return false
 636         }
 637         return true
 638 }
 639 func auxIntToInt8(i int64) int8 {
 640         return int8(i)
 641 }
 642 func auxIntToInt16(i int64) int16 {
 643         return int16(i)
 644 }
 645 func auxIntToInt32(i int64) int32 {
 646         return int32(i)
 647 }
 648 func auxIntToInt64(i int64) int64 {
 649         return i
 650 }
 651 func auxIntToUint8(i int64) uint8 {
 652         return uint8(i)
 653 }
 654 func auxIntToFloat32(i int64) float32 {
 655         return float32(math.Float64frombits(uint64(i)))
 656 }
 657 func auxIntToFloat64(i int64) float64 {
 658         return math.Float64frombits(uint64(i))
 659 }
 660 func auxIntToValAndOff(i int64) ValAndOff {
 661         return ValAndOff(i)
 662 }
 663 func auxIntToArm64BitField(i int64) arm64BitField {
 664         return arm64BitField(i)
 665 }
 666 func auxIntToInt128(x int64) int128 {
 667         if x != 0 {
 668                 panic("nonzero int128 not allowed")
 669         }
 670         return 0
 671 }
 672 func auxIntToFlagConstant(x int64) flagConstant {
 673         return flagConstant(x)
 674 }
 675
 676 func auxIntToOp(cc int64) Op {
 677         return Op(cc)
 678 }
 679
 680 func boolToAuxInt(b bool) int64 {
 681         if b {
 682                 return 1
 683         }
 684         return 0
 685 }
 686 func int8ToAuxInt(i int8) int64 {
 687         return int64(i)
 688 }
 689 func int16ToAuxInt(i int16) int64 {
 690         return int64(i)
 691 }
 692 func int32ToAuxInt(i int32) int64 {
 693         return int64(i)
 694 }
 695 func int64ToAuxInt(i int64) int64 {
 696         return int64(i)
 697 }
 698 func uint8ToAuxInt(i uint8) int64 {
 699         return int64(int8(i))
 700 }
 701 func float32ToAuxInt(f float32) int64 {
 702         return int64(math.Float64bits(float64(f)))
 703 }
 704 func float64ToAuxInt(f float64) int64 {
 705         return int64(math.Float64bits(f))
 706 }
 707 func valAndOffToAuxInt(v ValAndOff) int64 {
 708         return int64(v)
 709 }
 710 func arm64BitFieldToAuxInt(v arm64BitField) int64 {
 711         return int64(v)
 712 }
 713 func int128ToAuxInt(x int128) int64 {
 714         if x != 0 {
 715                 panic("nonzero int128 not allowed")
 716         }
 717         return 0
 718 }
 719 func flagConstantToAuxInt(x flagConstant) int64 {
 720         return int64(x)
 721 }
 722
 723 func opToAuxInt(o Op) int64 {
 724         return int64(o)
 725 }
 726
 727 // Aux is an interface to hold miscellaneous data in Blocks and Values.
 728 type Aux interface {
 729         CanBeAnSSAAux()
 730 }
 731
 732 // stringAux wraps string values for use in Aux.
 733 type stringAux string
 734
 735 func (stringAux) CanBeAnSSAAux() {}
 736
 737 func auxToString(i Aux) string {
 738         return string(i.(stringAux))
 739 }
 740 func auxToSym(i Aux) Sym {
 741         // TODO: kind of a hack - allows nil interface through
 742         s, _ := i.(Sym)
 743         return s
 744 }
 745 func auxToType(i Aux) *types.Type {
 746         return i.(*types.Type)
 747 }
 748 func auxToCall(i Aux) *AuxCall {
 749         return i.(*AuxCall)
 750 }
 751 func auxToS390xCCMask(i Aux) s390x.CCMask {
 752         return i.(s390x.CCMask)
 753 }
 754 func auxToS390xRotateParams(i Aux) s390x.RotateParams {
 755         return i.(s390x.RotateParams)
 756 }
 757
 758 func StringToAux(s string) Aux {
 759         return stringAux(s)
 760 }
 761 func symToAux(s Sym) Aux {
 762         return s
 763 }
 764 func callToAux(s *AuxCall) Aux {
 765         return s
 766 }
 767 func typeToAux(t *types.Type) Aux {
 768         return t
 769 }
 770 func s390xCCMaskToAux(c s390x.CCMask) Aux {
 771         return c
 772 }
 773 func s390xRotateParamsToAux(r s390x.RotateParams) Aux {
 774         return r
 775 }
 776
 777 // uaddOvf reports whether unsigned a+b would overflow.
 778 func uaddOvf(a, b int64) bool {
 779         return uint64(a)+uint64(b) < uint64(a)
 780 }
 781
 782 // loadLSymOffset simulates reading a word at an offset into a
 783 // read-only symbol's runtime memory. If it would read a pointer to
 784 // another symbol, that symbol is returned. Otherwise, it returns nil.
 785 func loadLSymOffset(lsym *obj.LSym, offset int64) *obj.LSym {
 786         if lsym.Type != objabi.SRODATA {
 787                 return nil
 788         }
 789
 790         for _, r := range lsym.R {
 791                 if int64(r.Off) == offset && r.Type&^objabi.R_WEAK == objabi.R_ADDR && r.Add == 0 {
 792                         return r.Sym
 793                 }
 794         }
 795
 796         return nil
 797 }
 798
 799 // de-virtualize an InterLECall
 800 // 'sym' is the symbol for the itab.
 801 func devirtLESym(v *Value, aux Aux, sym Sym, offset int64) *obj.LSym {
 802         n, ok := sym.(*obj.LSym)
 803         if !ok {
 804                 return nil
 805         }
 806
 807         lsym := loadLSymOffset(n, offset)
 808         if f := v.Block.Func; f.pass.debug > 0 {
 809                 if lsym != nil {
 810                         f.Warnl(v.Pos, "de-virtualizing call")
 811                 } else {
 812                         f.Warnl(v.Pos, "couldn't de-virtualize call")
 813                 }
 814         }
 815         return lsym
 816 }
 817
 818 func devirtLECall(v *Value, sym *obj.LSym) *Value {
 819         v.Op = OpStaticLECall
 820         auxcall := v.Aux.(*AuxCall)
 821         auxcall.Fn = sym
 822         // Remove first arg
 823         v.Args[0].Uses--
 824         copy(v.Args[0:], v.Args[1:])
 825         v.Args[len(v.Args)-1] = nil // aid GC
 826         v.Args = v.Args[:len(v.Args)-1]
 827         return v
 828 }
 829
 830 // isSamePtr reports whether p1 and p2 point to the same address.
 831 func isSamePtr(p1, p2 *Value) bool {
 832         if p1 == p2 {
 833                 return true
 834         }
 835         if p1.Op != p2.Op {
 836                 return false
 837         }
 838         switch p1.Op {
 839         case OpOffPtr:
 840                 return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
 841         case OpAddr, OpLocalAddr:
 842                 // OpAddr's 0th arg is either OpSP or OpSB, which means that it is uniquely identified by its Op.
 843                 // Checking for value equality only works after [z]cse has run.
 844                 return p1.Aux == p2.Aux && p1.Args[0].Op == p2.Args[0].Op
 845         case OpAddPtr:
 846                 return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
 847         }
 848         return false
 849 }
 850
 851 func isStackPtr(v *Value) bool {
 852         for v.Op == OpOffPtr || v.Op == OpAddPtr {
 853                 v = v.Args[0]
 854         }
 855         return v.Op == OpSP || v.Op == OpLocalAddr
 856 }
 857
 858 // disjoint reports whether the memory region specified by [p1:p1+n1)
 859 // does not overlap with [p2:p2+n2).
 860 // A return value of false does not imply the regions overlap.
 861 func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool {
 862         if n1 == 0 || n2 == 0 {
 863                 return true
 864         }
 865         if p1 == p2 {
 866                 return false
 867         }
 868         baseAndOffset := func(ptr *Value) (base *Value, offset int64) {
 869                 base, offset = ptr, 0
 870                 for base.Op == OpOffPtr {
 871                         offset += base.AuxInt
 872                         base = base.Args[0]
 873                 }
 874                 return base, offset
 875         }
 876         p1, off1 := baseAndOffset(p1)
 877         p2, off2 := baseAndOffset(p2)
 878         if isSamePtr(p1, p2) {
 879                 return !overlap(off1, n1, off2, n2)
 880         }
 881         // p1 and p2 are not the same, so if they are both OpAddrs then
 882         // they point to different variables.
 883         // If one pointer is on the stack and the other is an argument
 884         // then they can't overlap.
 885         switch p1.Op {
 886         case OpAddr, OpLocalAddr:
 887                 if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP {
 888                         return true
 889                 }
 890                 return (p2.Op == OpArg || p2.Op == OpArgIntReg) && p1.Args[0].Op == OpSP
 891         case OpArg, OpArgIntReg:
 892                 if p2.Op == OpSP || p2.Op == OpLocalAddr {
 893                         return true
 894                 }
 895         case OpSP:
 896                 return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpArgIntReg || p2.Op == OpSP
 897         }
 898         return false
 899 }
 900
 901 // moveSize returns the number of bytes an aligned MOV instruction moves.
 902 func moveSize(align int64, c *Config) int64 {
 903         switch {
 904         case align%8 == 0 && c.PtrSize == 8:
 905                 return 8
 906         case align%4 == 0:
 907                 return 4
 908         case align%2 == 0:
 909                 return 2
 910         }
 911         return 1
 912 }
 913
 914 // mergePoint finds a block among a's blocks which dominates b and is itself
 915 // dominated by all of a's blocks. Returns nil if it can't find one.
 916 // Might return nil even if one does exist.
 917 func mergePoint(b *Block, a ...*Value) *Block {
 918         // Walk backward from b looking for one of the a's blocks.
 919
 920         // Max distance
 921         d := 100
 922
 923         for d > 0 {
 924                 for _, x := range a {
 925                         if b == x.Block {
 926                                 goto found
 927                         }
 928                 }
 929                 if len(b.Preds) > 1 {
 930                         // Don't know which way to go back. Abort.
 931                         return nil
 932                 }
 933                 b = b.Preds[0].b
 934                 d--
 935         }
 936         return nil // too far away
 937 found:
 938         // At this point, r is the first value in a that we find by walking backwards.
 939         // if we return anything, r will be it.
 940         r := b
 941
 942         // Keep going, counting the other a's that we find. They must all dominate r.
 943         na := 0
 944         for d > 0 {
 945                 for _, x := range a {
 946                         if b == x.Block {
 947                                 na++
 948                         }
 949                 }
 950                 if na == len(a) {
 951                         // Found all of a in a backwards walk. We can return r.
 952                         return r
 953                 }
 954                 if len(b.Preds) > 1 {
 955                         return nil
 956                 }
 957                 b = b.Preds[0].b
 958                 d--
 959
 960         }
 961         return nil // too far away
 962 }
 963
 964 // clobber invalidates values. Returns true.
 965 // clobber is used by rewrite rules to:
 966 //
 967 //      A) make sure the values are really dead and never used again.
 968 //      B) decrement use counts of the values' args.
 969 func clobber(vv ...*Value) bool {
 970         for _, v := range vv {
 971                 v.reset(OpInvalid)
 972                 // Note: leave v.Block intact.  The Block field is used after clobber.
 973         }
 974         return true
 975 }
 976
 977 // clobberIfDead resets v when use count is 1. Returns true.
 978 // clobberIfDead is used by rewrite rules to decrement
 979 // use counts of v's args when v is dead and never used.
 980 func clobberIfDead(v *Value) bool {
 981         if v.Uses == 1 {
 982                 v.reset(OpInvalid)
 983         }
 984         // Note: leave v.Block intact.  The Block field is used after clobberIfDead.
 985         return true
 986 }
 987
 988 // noteRule is an easy way to track if a rule is matched when writing
 989 // new ones.  Make the rule of interest also conditional on
 990 //
 991 //      noteRule("note to self: rule of interest matched")
 992 //
 993 // and that message will print when the rule matches.
 994 func noteRule(s string) bool {
 995         fmt.Println(s)
 996         return true
 997 }
 998
 999 // countRule increments Func.ruleMatches[key].
1000 // If Func.ruleMatches is non-nil at the end
1001 // of compilation, it will be printed to stdout.
1002 // This is intended to make it easier to find which functions
1003 // which contain lots of rules matches when developing new rules.
1004 func countRule(v *Value, key string) bool {
1005         f := v.Block.Func
1006         if f.ruleMatches == nil {
1007                 f.ruleMatches = make(map[string]int)
1008         }
1009         f.ruleMatches[key]++
1010         return true
1011 }
1012
1013 // warnRule generates compiler debug output with string s when
1014 // v is not in autogenerated code, cond is true and the rule has fired.
1015 func warnRule(cond bool, v *Value, s string) bool {
1016         if pos := v.Pos; pos.Line() > 1 && cond {
1017                 v.Block.Func.Warnl(pos, s)
1018         }
1019         return true
1020 }
1021
1022 // for a pseudo-op like (LessThan x), extract x.
1023 func flagArg(v *Value) *Value {
1024         if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() {
1025                 return nil
1026         }
1027         return v.Args[0]
1028 }
1029
1030 // arm64Negate finds the complement to an ARM64 condition code,
1031 // for example !Equal -> NotEqual or !LessThan -> GreaterEqual
1032 //
1033 // For floating point, it's more subtle because NaN is unordered. We do
1034 // !LessThanF -> NotLessThanF, the latter takes care of NaNs.
1035 func arm64Negate(op Op) Op {
1036         switch op {
1037         case OpARM64LessThan:
1038                 return OpARM64GreaterEqual
1039         case OpARM64LessThanU:
1040                 return OpARM64GreaterEqualU
1041         case OpARM64GreaterThan:
1042                 return OpARM64LessEqual
1043         case OpARM64GreaterThanU:
1044                 return OpARM64LessEqualU
1045         case OpARM64LessEqual:
1046                 return OpARM64GreaterThan
1047         case OpARM64LessEqualU:
1048                 return OpARM64GreaterThanU
1049         case OpARM64GreaterEqual:
1050                 return OpARM64LessThan
1051         case OpARM64GreaterEqualU:
1052                 return OpARM64LessThanU
1053         case OpARM64Equal:
1054                 return OpARM64NotEqual
1055         case OpARM64NotEqual:
1056                 return OpARM64Equal
1057         case OpARM64LessThanF:
1058                 return OpARM64NotLessThanF
1059         case OpARM64NotLessThanF:
1060                 return OpARM64LessThanF
1061         case OpARM64LessEqualF:
1062                 return OpARM64NotLessEqualF
1063         case OpARM64NotLessEqualF:
1064                 return OpARM64LessEqualF
1065         case OpARM64GreaterThanF:
1066                 return OpARM64NotGreaterThanF
1067         case OpARM64NotGreaterThanF:
1068                 return OpARM64GreaterThanF
1069         case OpARM64GreaterEqualF:
1070                 return OpARM64NotGreaterEqualF
1071         case OpARM64NotGreaterEqualF:
1072                 return OpARM64GreaterEqualF
1073         default:
1074                 panic("unreachable")
1075         }
1076 }
1077
1078 // arm64Invert evaluates (InvertFlags op), which
1079 // is the same as altering the condition codes such
1080 // that the same result would be produced if the arguments
1081 // to the flag-generating instruction were reversed, e.g.
1082 // (InvertFlags (CMP x y)) -> (CMP y x)
1083 func arm64Invert(op Op) Op {
1084         switch op {
1085         case OpARM64LessThan:
1086                 return OpARM64GreaterThan
1087         case OpARM64LessThanU:
1088                 return OpARM64GreaterThanU
1089         case OpARM64GreaterThan:
1090                 return OpARM64LessThan
1091         case OpARM64GreaterThanU:
1092                 return OpARM64LessThanU
1093         case OpARM64LessEqual:
1094                 return OpARM64GreaterEqual
1095         case OpARM64LessEqualU:
1096                 return OpARM64GreaterEqualU
1097         case OpARM64GreaterEqual:
1098                 return OpARM64LessEqual
1099         case OpARM64GreaterEqualU:
1100                 return OpARM64LessEqualU
1101         case OpARM64Equal, OpARM64NotEqual:
1102                 return op
1103         case OpARM64LessThanF:
1104                 return OpARM64GreaterThanF
1105         case OpARM64GreaterThanF:
1106                 return OpARM64LessThanF
1107         case OpARM64LessEqualF:
1108                 return OpARM64GreaterEqualF
1109         case OpARM64GreaterEqualF:
1110                 return OpARM64LessEqualF
1111         case OpARM64NotLessThanF:
1112                 return OpARM64NotGreaterThanF
1113         case OpARM64NotGreaterThanF:
1114                 return OpARM64NotLessThanF
1115         case OpARM64NotLessEqualF:
1116                 return OpARM64NotGreaterEqualF
1117         case OpARM64NotGreaterEqualF:
1118                 return OpARM64NotLessEqualF
1119         default:
1120                 panic("unreachable")
1121         }
1122 }
1123
1124 // evaluate an ARM64 op against a flags value
1125 // that is potentially constant; return 1 for true,
1126 // -1 for false, and 0 for not constant.
1127 func ccARM64Eval(op Op, flags *Value) int {
1128         fop := flags.Op
1129         if fop == OpARM64InvertFlags {
1130                 return -ccARM64Eval(op, flags.Args[0])
1131         }
1132         if fop != OpARM64FlagConstant {
1133                 return 0
1134         }
1135         fc := flagConstant(flags.AuxInt)
1136         b2i := func(b bool) int {
1137                 if b {
1138                         return 1
1139                 }
1140                 return -1
1141         }
1142         switch op {
1143         case OpARM64Equal:
1144                 return b2i(fc.eq())
1145         case OpARM64NotEqual:
1146                 return b2i(fc.ne())
1147         case OpARM64LessThan:
1148                 return b2i(fc.lt())
1149         case OpARM64LessThanU:
1150                 return b2i(fc.ult())
1151         case OpARM64GreaterThan:
1152                 return b2i(fc.gt())
1153         case OpARM64GreaterThanU:
1154                 return b2i(fc.ugt())
1155         case OpARM64LessEqual:
1156                 return b2i(fc.le())
1157         case OpARM64LessEqualU:
1158                 return b2i(fc.ule())
1159         case OpARM64GreaterEqual:
1160                 return b2i(fc.ge())
1161         case OpARM64GreaterEqualU:
1162                 return b2i(fc.uge())
1163         }
1164         return 0
1165 }
1166
1167 // logRule logs the use of the rule s. This will only be enabled if
1168 // rewrite rules were generated with the -log option, see _gen/rulegen.go.
1169 func logRule(s string) {
1170         if ruleFile == nil {
1171                 // Open a log file to write log to. We open in append
1172                 // mode because all.bash runs the compiler lots of times,
1173                 // and we want the concatenation of all of those logs.
1174                 // This means, of course, that users need to rm the old log
1175                 // to get fresh data.
1176                 // TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow?
1177                 w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"),
1178                         os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
1179                 if err != nil {
1180                         panic(err)
1181                 }
1182                 ruleFile = w
1183         }
1184         _, err := fmt.Fprintln(ruleFile, s)
1185         if err != nil {
1186                 panic(err)
1187         }
1188 }
1189
1190 var ruleFile io.Writer
1191
1192 func min(x, y int64) int64 {
1193         if x < y {
1194                 return x
1195         }
1196         return y
1197 }
1198
1199 func isConstZero(v *Value) bool {
1200         switch v.Op {
1201         case OpConstNil:
1202                 return true
1203         case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F:
1204                 return v.AuxInt == 0
1205         }
1206         return false
1207 }
1208
1209 // reciprocalExact64 reports whether 1/c is exactly representable.
1210 func reciprocalExact64(c float64) bool {
1211         b := math.Float64bits(c)
1212         man := b & (1<<52 - 1)
1213         if man != 0 {
1214                 return false // not a power of 2, denormal, or NaN
1215         }
1216         exp := b >> 52 & (1<<11 - 1)
1217         // exponent bias is 0x3ff.  So taking the reciprocal of a number
1218         // changes the exponent to 0x7fe-exp.
1219         switch exp {
1220         case 0:
1221                 return false // ±0
1222         case 0x7ff:
1223                 return false // ±inf
1224         case 0x7fe:
1225                 return false // exponent is not representable
1226         default:
1227                 return true
1228         }
1229 }
1230
1231 // reciprocalExact32 reports whether 1/c is exactly representable.
1232 func reciprocalExact32(c float32) bool {
1233         b := math.Float32bits(c)
1234         man := b & (1<<23 - 1)
1235         if man != 0 {
1236                 return false // not a power of 2, denormal, or NaN
1237         }
1238         exp := b >> 23 & (1<<8 - 1)
1239         // exponent bias is 0x7f.  So taking the reciprocal of a number
1240         // changes the exponent to 0xfe-exp.
1241         switch exp {
1242         case 0:
1243                 return false // ±0
1244         case 0xff:
1245                 return false // ±inf
1246         case 0xfe:
1247                 return false // exponent is not representable
1248         default:
1249                 return true
1250         }
1251 }
1252
1253 // check if an immediate can be directly encoded into an ARM's instruction.
1254 func isARMImmRot(v uint32) bool {
1255         for i := 0; i < 16; i++ {
1256                 if v&^0xff == 0 {
1257                         return true
1258                 }
1259                 v = v<<2 | v>>30
1260         }
1261
1262         return false
1263 }
1264
1265 // overlap reports whether the ranges given by the given offset and
1266 // size pairs overlap.
1267 func overlap(offset1, size1, offset2, size2 int64) bool {
1268         if offset1 >= offset2 && offset2+size2 > offset1 {
1269                 return true
1270         }
1271         if offset2 >= offset1 && offset1+size1 > offset2 {
1272                 return true
1273         }
1274         return false
1275 }
1276
1277 func areAdjacentOffsets(off1, off2, size int64) bool {
1278         return off1+size == off2 || off1 == off2+size
1279 }
1280
1281 // check if value zeroes out upper 32-bit of 64-bit register.
1282 // depth limits recursion depth. In AMD64.rules 3 is used as limit,
1283 // because it catches same amount of cases as 4.
1284 func zeroUpper32Bits(x *Value, depth int) bool {
1285         switch x.Op {
1286         case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1,
1287                 OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1,
1288                 OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload,
1289                 OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL,
1290                 OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst,
1291                 OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst,
1292                 OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL,
1293                 OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst,
1294                 OpAMD64SHLL, OpAMD64SHLLconst:
1295                 return true
1296         case OpArg:
1297                 return x.Type.Size() == 4
1298         case OpPhi, OpSelect0, OpSelect1:
1299                 // Phis can use each-other as an arguments, instead of tracking visited values,
1300                 // just limit recursion depth.
1301                 if depth <= 0 {
1302                         return false
1303                 }
1304                 for i := range x.Args {
1305                         if !zeroUpper32Bits(x.Args[i], depth-1) {
1306                                 return false
1307                         }
1308                 }
1309                 return true
1310
1311         }
1312         return false
1313 }
1314
1315 // zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits.
1316 func zeroUpper48Bits(x *Value, depth int) bool {
1317         switch x.Op {
1318         case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2:
1319                 return true
1320         case OpArg:
1321                 return x.Type.Size() == 2
1322         case OpPhi, OpSelect0, OpSelect1:
1323                 // Phis can use each-other as an arguments, instead of tracking visited values,
1324                 // just limit recursion depth.
1325                 if depth <= 0 {
1326                         return false
1327                 }
1328                 for i := range x.Args {
1329                         if !zeroUpper48Bits(x.Args[i], depth-1) {
1330                                 return false
1331                         }
1332                 }
1333                 return true
1334
1335         }
1336         return false
1337 }
1338
1339 // zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits.
1340 func zeroUpper56Bits(x *Value, depth int) bool {
1341         switch x.Op {
1342         case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1:
1343                 return true
1344         case OpArg:
1345                 return x.Type.Size() == 1
1346         case OpPhi, OpSelect0, OpSelect1:
1347                 // Phis can use each-other as an arguments, instead of tracking visited values,
1348                 // just limit recursion depth.
1349                 if depth <= 0 {
1350                         return false
1351                 }
1352                 for i := range x.Args {
1353                         if !zeroUpper56Bits(x.Args[i], depth-1) {
1354                                 return false
1355                         }
1356                 }
1357                 return true
1358
1359         }
1360         return false
1361 }
1362
1363 // isInlinableMemmove reports whether the given arch performs a Move of the given size
1364 // faster than memmove. It will only return true if replacing the memmove with a Move is
1365 // safe, either because Move will do all of its loads before any of its stores, or
1366 // because the arguments are known to be disjoint.
1367 // This is used as a check for replacing memmove with Move ops.
1368 func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
1369         // It is always safe to convert memmove into Move when its arguments are disjoint.
1370         // Move ops may or may not be faster for large sizes depending on how the platform
1371         // lowers them, so we only perform this optimization on platforms that we know to
1372         // have fast Move ops.
1373         switch c.arch {
1374         case "amd64":
1375                 return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz))
1376         case "386", "arm64":
1377                 return sz <= 8
1378         case "s390x", "ppc64", "ppc64le":
1379                 return sz <= 8 || disjoint(dst, sz, src, sz)
1380         case "arm", "loong64", "mips", "mips64", "mipsle", "mips64le":
1381                 return sz <= 4
1382         }
1383         return false
1384 }
1385 func IsInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
1386         return isInlinableMemmove(dst, src, sz, c)
1387 }
1388
1389 // logLargeCopy logs the occurrence of a large copy.
1390 // The best place to do this is in the rewrite rules where the size of the move is easy to find.
1391 // "Large" is arbitrarily chosen to be 128 bytes; this may change.
1392 func logLargeCopy(v *Value, s int64) bool {
1393         if s < 128 {
1394                 return true
1395         }
1396         if logopt.Enabled() {
1397                 logopt.LogOpt(v.Pos, "copy", "lower", v.Block.Func.Name, fmt.Sprintf("%d bytes", s))
1398         }
1399         return true
1400 }
1401 func LogLargeCopy(funcName string, pos src.XPos, s int64) {
1402         if s < 128 {
1403                 return
1404         }
1405         if logopt.Enabled() {
1406                 logopt.LogOpt(pos, "copy", "lower", funcName, fmt.Sprintf("%d bytes", s))
1407         }
1408 }
1409
1410 // hasSmallRotate reports whether the architecture has rotate instructions
1411 // for sizes < 32-bit.  This is used to decide whether to promote some rotations.
1412 func hasSmallRotate(c *Config) bool {
1413         switch c.arch {
1414         case "amd64", "386":
1415                 return true
1416         default:
1417                 return false
1418         }
1419 }
1420
1421 func newPPC64ShiftAuxInt(sh, mb, me, sz int64) int32 {
1422         if sh < 0 || sh >= sz {
1423                 panic("PPC64 shift arg sh out of range")
1424         }
1425         if mb < 0 || mb >= sz {
1426                 panic("PPC64 shift arg mb out of range")
1427         }
1428         if me < 0 || me >= sz {
1429                 panic("PPC64 shift arg me out of range")
1430         }
1431         return int32(sh<<16 | mb<<8 | me)
1432 }
1433
1434 func GetPPC64Shiftsh(auxint int64) int64 {
1435         return int64(int8(auxint >> 16))
1436 }
1437
1438 func GetPPC64Shiftmb(auxint int64) int64 {
1439         return int64(int8(auxint >> 8))
1440 }
1441
1442 func GetPPC64Shiftme(auxint int64) int64 {
1443         return int64(int8(auxint))
1444 }
1445
1446 // Test if this value can encoded as a mask for a rlwinm like
1447 // operation.  Masks can also extend from the msb and wrap to
1448 // the lsb too.  That is, the valid masks are 32 bit strings
1449 // of the form: 0..01..10..0 or 1..10..01..1 or 1...1
1450 func isPPC64WordRotateMask(v64 int64) bool {
1451         // Isolate rightmost 1 (if none 0) and add.
1452         v := uint32(v64)
1453         vp := (v & -v) + v
1454         // Likewise, for the wrapping case.
1455         vn := ^v
1456         vpn := (vn & -vn) + vn
1457         return (v&vp == 0 || vn&vpn == 0) && v != 0
1458 }
1459
1460 // Compress mask and shift into single value of the form
1461 // me | mb<<8 | rotate<<16 | nbits<<24 where me and mb can
1462 // be used to regenerate the input mask.
1463 func encodePPC64RotateMask(rotate, mask, nbits int64) int64 {
1464         var mb, me, mbn, men int
1465
1466         // Determine boundaries and then decode them
1467         if mask == 0 || ^mask == 0 || rotate >= nbits {
1468                 panic("Invalid PPC64 rotate mask")
1469         } else if nbits == 32 {
1470                 mb = bits.LeadingZeros32(uint32(mask))
1471                 me = 32 - bits.TrailingZeros32(uint32(mask))
1472                 mbn = bits.LeadingZeros32(^uint32(mask))
1473                 men = 32 - bits.TrailingZeros32(^uint32(mask))
1474         } else {
1475                 mb = bits.LeadingZeros64(uint64(mask))
1476                 me = 64 - bits.TrailingZeros64(uint64(mask))
1477                 mbn = bits.LeadingZeros64(^uint64(mask))
1478                 men = 64 - bits.TrailingZeros64(^uint64(mask))
1479         }
1480         // Check for a wrapping mask (e.g bits at 0 and 63)
1481         if mb == 0 && me == int(nbits) {
1482                 // swap the inverted values
1483                 mb, me = men, mbn
1484         }
1485
1486         return int64(me) | int64(mb<<8) | int64(rotate<<16) | int64(nbits<<24)
1487 }
1488
1489 // DecodePPC64RotateMask is the inverse operation of encodePPC64RotateMask.  The values returned as
1490 // mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask.
1491 func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) {
1492         auxint := uint64(sauxint)
1493         rotate = int64((auxint >> 16) & 0xFF)
1494         mb = int64((auxint >> 8) & 0xFF)
1495         me = int64((auxint >> 0) & 0xFF)
1496         nbits := int64((auxint >> 24) & 0xFF)
1497         mask = ((1 << uint(nbits-mb)) - 1) ^ ((1 << uint(nbits-me)) - 1)
1498         if mb > me {
1499                 mask = ^mask
1500         }
1501         if nbits == 32 {
1502                 mask = uint64(uint32(mask))
1503         }
1504
1505         // Fixup ME to match ISA definition.  The second argument to MASK(..,me)
1506         // is inclusive.
1507         me = (me - 1) & (nbits - 1)
1508         return
1509 }
1510
1511 // This verifies that the mask is a set of
1512 // consecutive bits including the least
1513 // significant bit.
1514 func isPPC64ValidShiftMask(v int64) bool {
1515         if (v != 0) && ((v+1)&v) == 0 {
1516                 return true
1517         }
1518         return false
1519 }
1520
1521 func getPPC64ShiftMaskLength(v int64) int64 {
1522         return int64(bits.Len64(uint64(v)))
1523 }
1524
1525 // Decompose a shift right into an equivalent rotate/mask,
1526 // and return mask & m.
1527 func mergePPC64RShiftMask(m, s, nbits int64) int64 {
1528         smask := uint64((1<<uint(nbits))-1) >> uint(s)
1529         return m & int64(smask)
1530 }
1531
1532 // Combine (ANDconst [m] (SRWconst [s])) into (RLWINM [y]) or return 0
1533 func mergePPC64AndSrwi(m, s int64) int64 {
1534         mask := mergePPC64RShiftMask(m, s, 32)
1535         if !isPPC64WordRotateMask(mask) {
1536                 return 0
1537         }
1538         return encodePPC64RotateMask((32-s)&31, mask, 32)
1539 }
1540
1541 // Test if a shift right feeding into a CLRLSLDI can be merged into RLWINM.
1542 // Return the encoded RLWINM constant, or 0 if they cannot be merged.
1543 func mergePPC64ClrlsldiSrw(sld, srw int64) int64 {
1544         mask_1 := uint64(0xFFFFFFFF >> uint(srw))
1545         // for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left.
1546         mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
1547
1548         // Rewrite mask to apply after the final left shift.
1549         mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
1550
1551         r_1 := 32 - srw
1552         r_2 := GetPPC64Shiftsh(sld)
1553         r_3 := (r_1 + r_2) & 31 // This can wrap.
1554
1555         if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
1556                 return 0
1557         }
1558         return encodePPC64RotateMask(int64(r_3), int64(mask_3), 32)
1559 }
1560
1561 // Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM.  Return
1562 // the encoded RLWINM constant, or 0 if they cannot be merged.
1563 func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 {
1564         r_1, _, _, mask_1 := DecodePPC64RotateMask(rlw)
1565         // for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left.
1566         mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
1567
1568         // combine the masks, and adjust for the final left shift.
1569         mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(int64(sld)))
1570         r_2 := GetPPC64Shiftsh(int64(sld))
1571         r_3 := (r_1 + r_2) & 31 // This can wrap.
1572
1573         // Verify the result is still a valid bitmask of <= 32 bits.
1574         if !isPPC64WordRotateMask(int64(mask_3)) || uint64(uint32(mask_3)) != mask_3 {
1575                 return 0
1576         }
1577         return encodePPC64RotateMask(r_3, int64(mask_3), 32)
1578 }
1579
1580 // Compute the encoded RLWINM constant from combining (SLDconst [sld] (SRWconst [srw] x)),
1581 // or return 0 if they cannot be combined.
1582 func mergePPC64SldiSrw(sld, srw int64) int64 {
1583         if sld > srw || srw >= 32 {
1584                 return 0
1585         }
1586         mask_r := uint32(0xFFFFFFFF) >> uint(srw)
1587         mask_l := uint32(0xFFFFFFFF) >> uint(sld)
1588         mask := (mask_r & mask_l) << uint(sld)
1589         return encodePPC64RotateMask((32-srw+sld)&31, int64(mask), 32)
1590 }
1591
1592 // Convenience function to rotate a 32 bit constant value by another constant.
1593 func rotateLeft32(v, rotate int64) int64 {
1594         return int64(bits.RotateLeft32(uint32(v), int(rotate)))
1595 }
1596
1597 func rotateRight64(v, rotate int64) int64 {
1598         return int64(bits.RotateLeft64(uint64(v), int(-rotate)))
1599 }
1600
1601 // encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format.
1602 func armBFAuxInt(lsb, width int64) arm64BitField {
1603         if lsb < 0 || lsb > 63 {
1604                 panic("ARM(64) bit field lsb constant out of range")
1605         }
1606         if width < 1 || lsb+width > 64 {
1607                 panic("ARM(64) bit field width constant out of range")
1608         }
1609         return arm64BitField(width | lsb<<8)
1610 }
1611
1612 // returns the lsb part of the auxInt field of arm64 bitfield ops.
1613 func (bfc arm64BitField) getARM64BFlsb() int64 {
1614         return int64(uint64(bfc) >> 8)
1615 }
1616
1617 // returns the width part of the auxInt field of arm64 bitfield ops.
1618 func (bfc arm64BitField) getARM64BFwidth() int64 {
1619         return int64(bfc) & 0xff
1620 }
1621
1622 // checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask.
1623 func isARM64BFMask(lsb, mask, rshift int64) bool {
1624         shiftedMask := int64(uint64(mask) >> uint64(rshift))
1625         return shiftedMask != 0 && isPowerOfTwo64(shiftedMask+1) && nto(shiftedMask)+lsb < 64
1626 }
1627
1628 // returns the bitfield width of mask >> rshift for arm64 bitfield ops.
1629 func arm64BFWidth(mask, rshift int64) int64 {
1630         shiftedMask := int64(uint64(mask) >> uint64(rshift))
1631         if shiftedMask == 0 {
1632                 panic("ARM64 BF mask is zero")
1633         }
1634         return nto(shiftedMask)
1635 }
1636
1637 // sizeof returns the size of t in bytes.
1638 // It will panic if t is not a *types.Type.
1639 func sizeof(t interface{}) int64 {
1640         return t.(*types.Type).Size()
1641 }
1642
1643 // registerizable reports whether t is a primitive type that fits in
1644 // a register. It assumes float64 values will always fit into registers
1645 // even if that isn't strictly true.
1646 func registerizable(b *Block, typ *types.Type) bool {
1647         if typ.IsPtrShaped() || typ.IsFloat() || typ.IsBoolean() {
1648                 return true
1649         }
1650         if typ.IsInteger() {
1651                 return typ.Size() <= b.Func.Config.RegSize
1652         }
1653         return false
1654 }
1655
1656 // needRaceCleanup reports whether this call to racefuncenter/exit isn't needed.
1657 func needRaceCleanup(sym *AuxCall, v *Value) bool {
1658         f := v.Block.Func
1659         if !f.Config.Race {
1660                 return false
1661         }
1662         if !isSameCall(sym, "runtime.racefuncenter") && !isSameCall(sym, "runtime.racefuncexit") {
1663                 return false
1664         }
1665         for _, b := range f.Blocks {
1666                 for _, v := range b.Values {
1667                         switch v.Op {
1668                         case OpStaticCall, OpStaticLECall:
1669                                 // Check for racefuncenter will encounter racefuncexit and vice versa.
1670                                 // Allow calls to panic*
1671                                 s := v.Aux.(*AuxCall).Fn.String()
1672                                 switch s {
1673                                 case "runtime.racefuncenter", "runtime.racefuncexit",
1674                                         "runtime.panicdivide", "runtime.panicwrap",
1675                                         "runtime.panicshift":
1676                                         continue
1677                                 }
1678                                 // If we encountered any call, we need to keep racefunc*,
1679                                 // for accurate stacktraces.
1680                                 return false
1681                         case OpPanicBounds, OpPanicExtend:
1682                                 // Note: these are panic generators that are ok (like the static calls above).
1683                         case OpClosureCall, OpInterCall, OpClosureLECall, OpInterLECall:
1684                                 // We must keep the race functions if there are any other call types.
1685                                 return false
1686                         }
1687                 }
1688         }
1689         if isSameCall(sym, "runtime.racefuncenter") {
1690                 // TODO REGISTER ABI this needs to be cleaned up.
1691                 // If we're removing racefuncenter, remove its argument as well.
1692                 if v.Args[0].Op != OpStore {
1693                         if v.Op == OpStaticLECall {
1694                                 // there is no store, yet.
1695                                 return true
1696                         }
1697                         return false
1698                 }
1699                 mem := v.Args[0].Args[2]
1700                 v.Args[0].reset(OpCopy)
1701                 v.Args[0].AddArg(mem)
1702         }
1703         return true
1704 }
1705
1706 // symIsRO reports whether sym is a read-only global.
1707 func symIsRO(sym interface{}) bool {
1708         lsym := sym.(*obj.LSym)
1709         return lsym.Type == objabi.SRODATA && len(lsym.R) == 0
1710 }
1711
1712 // symIsROZero reports whether sym is a read-only global whose data contains all zeros.
1713 func symIsROZero(sym Sym) bool {
1714         lsym := sym.(*obj.LSym)
1715         if lsym.Type != objabi.SRODATA || len(lsym.R) != 0 {
1716                 return false
1717         }
1718         for _, b := range lsym.P {
1719                 if b != 0 {
1720                         return false
1721                 }
1722         }
1723         return true
1724 }
1725
1726 // read8 reads one byte from the read-only global sym at offset off.
1727 func read8(sym interface{}, off int64) uint8 {
1728         lsym := sym.(*obj.LSym)
1729         if off >= int64(len(lsym.P)) || off < 0 {
1730                 // Invalid index into the global sym.
1731                 // This can happen in dead code, so we don't want to panic.
1732                 // Just return any value, it will eventually get ignored.
1733                 // See issue 29215.
1734                 return 0
1735         }
1736         return lsym.P[off]
1737 }
1738
1739 // read16 reads two bytes from the read-only global sym at offset off.
1740 func read16(sym interface{}, off int64, byteorder binary.ByteOrder) uint16 {
1741         lsym := sym.(*obj.LSym)
1742         // lsym.P is written lazily.
1743         // Bytes requested after the end of lsym.P are 0.
1744         var src []byte
1745         if 0 <= off && off < int64(len(lsym.P)) {
1746                 src = lsym.P[off:]
1747         }
1748         buf := make([]byte, 2)
1749         copy(buf, src)
1750         return byteorder.Uint16(buf)
1751 }
1752
1753 // read32 reads four bytes from the read-only global sym at offset off.
1754 func read32(sym interface{}, off int64, byteorder binary.ByteOrder) uint32 {
1755         lsym := sym.(*obj.LSym)
1756         var src []byte
1757         if 0 <= off && off < int64(len(lsym.P)) {
1758                 src = lsym.P[off:]
1759         }
1760         buf := make([]byte, 4)
1761         copy(buf, src)
1762         return byteorder.Uint32(buf)
1763 }
1764
1765 // read64 reads eight bytes from the read-only global sym at offset off.
1766 func read64(sym interface{}, off int64, byteorder binary.ByteOrder) uint64 {
1767         lsym := sym.(*obj.LSym)
1768         var src []byte
1769         if 0 <= off && off < int64(len(lsym.P)) {
1770                 src = lsym.P[off:]
1771         }
1772         buf := make([]byte, 8)
1773         copy(buf, src)
1774         return byteorder.Uint64(buf)
1775 }
1776
1777 // sequentialAddresses reports true if it can prove that x + n == y
1778 func sequentialAddresses(x, y *Value, n int64) bool {
1779         if x == y && n == 0 {
1780                 return true
1781         }
1782         if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil &&
1783                 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1784                         x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1785                 return true
1786         }
1787         if x.Op == Op386LEAL1 && y.Op == Op386LEAL1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
1788                 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1789                         x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1790                 return true
1791         }
1792         if x.Op == OpAMD64ADDQ && y.Op == OpAMD64LEAQ1 && y.AuxInt == n && y.Aux == nil &&
1793                 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1794                         x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1795                 return true
1796         }
1797         if x.Op == OpAMD64LEAQ1 && y.Op == OpAMD64LEAQ1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
1798                 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1799                         x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1800                 return true
1801         }
1802         return false
1803 }
1804
1805 // flagConstant represents the result of a compile-time comparison.
1806 // The sense of these flags does not necessarily represent the hardware's notion
1807 // of a flags register - these are just a compile-time construct.
1808 // We happen to match the semantics to those of arm/arm64.
1809 // Note that these semantics differ from x86: the carry flag has the opposite
1810 // sense on a subtraction!
1811 //
1812 //      On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C.
1813 //      On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C.
1814 //       (because it does x + ^y + C).
1815 //
1816 // See https://en.wikipedia.org/wiki/Carry_flag#Vs._borrow_flag
1817 type flagConstant uint8
1818
1819 // N reports whether the result of an operation is negative (high bit set).
1820 func (fc flagConstant) N() bool {
1821         return fc&1 != 0
1822 }
1823
1824 // Z reports whether the result of an operation is 0.
1825 func (fc flagConstant) Z() bool {
1826         return fc&2 != 0
1827 }
1828
1829 // C reports whether an unsigned add overflowed (carry), or an
1830 // unsigned subtract did not underflow (borrow).
1831 func (fc flagConstant) C() bool {
1832         return fc&4 != 0
1833 }
1834
1835 // V reports whether a signed operation overflowed or underflowed.
1836 func (fc flagConstant) V() bool {
1837         return fc&8 != 0
1838 }
1839
1840 func (fc flagConstant) eq() bool {
1841         return fc.Z()
1842 }
1843 func (fc flagConstant) ne() bool {
1844         return !fc.Z()
1845 }
1846 func (fc flagConstant) lt() bool {
1847         return fc.N() != fc.V()
1848 }
1849 func (fc flagConstant) le() bool {
1850         return fc.Z() || fc.lt()
1851 }
1852 func (fc flagConstant) gt() bool {
1853         return !fc.Z() && fc.ge()
1854 }
1855 func (fc flagConstant) ge() bool {
1856         return fc.N() == fc.V()
1857 }
1858 func (fc flagConstant) ult() bool {
1859         return !fc.C()
1860 }
1861 func (fc flagConstant) ule() bool {
1862         return fc.Z() || fc.ult()
1863 }
1864 func (fc flagConstant) ugt() bool {
1865         return !fc.Z() && fc.uge()
1866 }
1867 func (fc flagConstant) uge() bool {
1868         return fc.C()
1869 }
1870
1871 func (fc flagConstant) ltNoov() bool {
1872         return fc.lt() && !fc.V()
1873 }
1874 func (fc flagConstant) leNoov() bool {
1875         return fc.le() && !fc.V()
1876 }
1877 func (fc flagConstant) gtNoov() bool {
1878         return fc.gt() && !fc.V()
1879 }
1880 func (fc flagConstant) geNoov() bool {
1881         return fc.ge() && !fc.V()
1882 }
1883
1884 func (fc flagConstant) String() string {
1885         return fmt.Sprintf("N=%v,Z=%v,C=%v,V=%v", fc.N(), fc.Z(), fc.C(), fc.V())
1886 }
1887
1888 type flagConstantBuilder struct {
1889         N bool
1890         Z bool
1891         C bool
1892         V bool
1893 }
1894
1895 func (fcs flagConstantBuilder) encode() flagConstant {
1896         var fc flagConstant
1897         if fcs.N {
1898                 fc |= 1
1899         }
1900         if fcs.Z {
1901                 fc |= 2
1902         }
1903         if fcs.C {
1904                 fc |= 4
1905         }
1906         if fcs.V {
1907                 fc |= 8
1908         }
1909         return fc
1910 }
1911
1912 // Note: addFlags(x,y) != subFlags(x,-y) in some situations:
1913 //  - the results of the C flag are different
1914 //  - the results of the V flag when y==minint are different
1915
1916 // addFlags64 returns the flags that would be set from computing x+y.
1917 func addFlags64(x, y int64) flagConstant {
1918         var fcb flagConstantBuilder
1919         fcb.Z = x+y == 0
1920         fcb.N = x+y < 0
1921         fcb.C = uint64(x+y) < uint64(x)
1922         fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
1923         return fcb.encode()
1924 }
1925
1926 // subFlags64 returns the flags that would be set from computing x-y.
1927 func subFlags64(x, y int64) flagConstant {
1928         var fcb flagConstantBuilder
1929         fcb.Z = x-y == 0
1930         fcb.N = x-y < 0
1931         fcb.C = uint64(y) <= uint64(x) // This code follows the arm carry flag model.
1932         fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
1933         return fcb.encode()
1934 }
1935
1936 // addFlags32 returns the flags that would be set from computing x+y.
1937 func addFlags32(x, y int32) flagConstant {
1938         var fcb flagConstantBuilder
1939         fcb.Z = x+y == 0
1940         fcb.N = x+y < 0
1941         fcb.C = uint32(x+y) < uint32(x)
1942         fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
1943         return fcb.encode()
1944 }
1945
1946 // subFlags32 returns the flags that would be set from computing x-y.
1947 func subFlags32(x, y int32) flagConstant {
1948         var fcb flagConstantBuilder
1949         fcb.Z = x-y == 0
1950         fcb.N = x-y < 0
1951         fcb.C = uint32(y) <= uint32(x) // This code follows the arm carry flag model.
1952         fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
1953         return fcb.encode()
1954 }
1955
1956 // logicFlags64 returns flags set to the sign/zeroness of x.
1957 // C and V are set to false.
1958 func logicFlags64(x int64) flagConstant {
1959         var fcb flagConstantBuilder
1960         fcb.Z = x == 0
1961         fcb.N = x < 0
1962         return fcb.encode()
1963 }
1964
1965 // logicFlags32 returns flags set to the sign/zeroness of x.
1966 // C and V are set to false.
1967 func logicFlags32(x int32) flagConstant {
1968         var fcb flagConstantBuilder
1969         fcb.Z = x == 0
1970         fcb.N = x < 0
1971         return fcb.encode()
1972 }
1973
1974 func makeJumpTableSym(b *Block) *obj.LSym {
1975         s := base.Ctxt.Lookup(fmt.Sprintf("%s.jump%d", b.Func.fe.LSym(), b.ID))
1976         s.Set(obj.AttrDuplicateOK, true)
1977         s.Set(obj.AttrLocal, true)
1978         return s
1979 }
1980
1981 // canRotate reports whether the architecture supports
1982 // rotates of integer registers with the given number of bits.
1983 func canRotate(c *Config, bits int64) bool {
1984         if bits > c.PtrSize*8 {
1985                 // Don't rewrite to rotates bigger than the machine word.
1986                 return false
1987         }
1988         switch c.arch {
1989         case "386", "amd64", "arm64":
1990                 return true
1991         case "arm", "s390x", "ppc64", "ppc64le", "wasm", "loong64":
1992                 return bits >= 32
1993         default:
1994                 return false
1995         }
1996 }
1997
1998 // isARM64bitcon reports whether a constant can be encoded into a logical instruction.
1999 func isARM64bitcon(x uint64) bool {
2000         if x == 1<<64-1 || x == 0 {
2001                 return false
2002         }
2003         // determine the period and sign-extend a unit to 64 bits
2004         switch {
2005         case x != x>>32|x<<32:
2006                 // period is 64
2007                 // nothing to do
2008         case x != x>>16|x<<48:
2009                 // period is 32
2010                 x = uint64(int64(int32(x)))
2011         case x != x>>8|x<<56:
2012                 // period is 16
2013                 x = uint64(int64(int16(x)))
2014         case x != x>>4|x<<60:
2015                 // period is 8
2016                 x = uint64(int64(int8(x)))
2017         default:
2018                 // period is 4 or 2, always true
2019                 // 0001, 0010, 0100, 1000 -- 0001 rotate
2020                 // 0011, 0110, 1100, 1001 -- 0011 rotate
2021                 // 0111, 1011, 1101, 1110 -- 0111 rotate
2022                 // 0101, 1010             -- 01   rotate, repeat
2023                 return true
2024         }
2025         return sequenceOfOnes(x) || sequenceOfOnes(^x)
2026 }
2027
2028 // sequenceOfOnes tests whether a constant is a sequence of ones in binary, with leading and trailing zeros.
2029 func sequenceOfOnes(x uint64) bool {
2030         y := x & -x // lowest set bit of x. x is good iff x+y is a power of 2
2031         y += x
2032         return (y-1)&y == 0
2033 }
2034
2035 // isARM64addcon reports whether x can be encoded as the immediate value in an ADD or SUB instruction.
2036 func isARM64addcon(v int64) bool {
2037         /* uimm12 or uimm24? */
2038         if v < 0 {
2039                 return false
2040         }
2041         if (v & 0xFFF) == 0 {
2042                 v >>= 12
2043         }
2044         return v <= 0xFFF
2045 }