src/cmd/compile/internal/ssa/rewrite.go

   1 // Copyright 2015 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package ssa
   6
   7 import (
   8         "cmd/compile/internal/base"
   9         "cmd/compile/internal/logopt"
  10         "cmd/compile/internal/types"
  11         "cmd/internal/obj"
  12         "cmd/internal/obj/s390x"
  13         "cmd/internal/objabi"
  14         "cmd/internal/src"
  15         "encoding/binary"
  16         "fmt"
  17         "io"
  18         "math"
  19         "math/bits"
  20         "os"
  21         "path/filepath"
  22 )
  23
  24 type deadValueChoice bool
  25
  26 const (
  27         leaveDeadValues  deadValueChoice = false
  28         removeDeadValues                 = true
  29 )
  30
  31 // deadcode indicates whether rewrite should try to remove any values that become dead.
  32 func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValueChoice) {
  33         // repeat rewrites until we find no more rewrites
  34         pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block
  35         pendingLines.clear()
  36         debug := f.pass.debug
  37         if debug > 1 {
  38                 fmt.Printf("%s: rewriting for %s\n", f.pass.name, f.Name)
  39         }
  40         var iters int
  41         var states map[string]bool
  42         for {
  43                 change := false
  44                 deadChange := false
  45                 for _, b := range f.Blocks {
  46                         var b0 *Block
  47                         if debug > 1 {
  48                                 b0 = new(Block)
  49                                 *b0 = *b
  50                                 b0.Succs = append([]Edge{}, b.Succs...) // make a new copy, not aliasing
  51                         }
  52                         for i, c := range b.ControlValues() {
  53                                 for c.Op == OpCopy {
  54                                         c = c.Args[0]
  55                                         b.ReplaceControl(i, c)
  56                                 }
  57                         }
  58                         if rb(b) {
  59                                 change = true
  60                                 if debug > 1 {
  61                                         fmt.Printf("rewriting %s  ->  %s\n", b0.LongString(), b.LongString())
  62                                 }
  63                         }
  64                         for j, v := range b.Values {
  65                                 var v0 *Value
  66                                 if debug > 1 {
  67                                         v0 = new(Value)
  68                                         *v0 = *v
  69                                         v0.Args = append([]*Value{}, v.Args...) // make a new copy, not aliasing
  70                                 }
  71                                 if v.Uses == 0 && v.removeable() {
  72                                         if v.Op != OpInvalid && deadcode == removeDeadValues {
  73                                                 // Reset any values that are now unused, so that we decrement
  74                                                 // the use count of all of its arguments.
  75                                                 // Not quite a deadcode pass, because it does not handle cycles.
  76                                                 // But it should help Uses==1 rules to fire.
  77                                                 v.reset(OpInvalid)
  78                                                 deadChange = true
  79                                         }
  80                                         // No point rewriting values which aren't used.
  81                                         continue
  82                                 }
  83
  84                                 vchange := phielimValue(v)
  85                                 if vchange && debug > 1 {
  86                                         fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
  87                                 }
  88
  89                                 // Eliminate copy inputs.
  90                                 // If any copy input becomes unused, mark it
  91                                 // as invalid and discard its argument. Repeat
  92                                 // recursively on the discarded argument.
  93                                 // This phase helps remove phantom "dead copy" uses
  94                                 // of a value so that a x.Uses==1 rule condition
  95                                 // fires reliably.
  96                                 for i, a := range v.Args {
  97                                         if a.Op != OpCopy {
  98                                                 continue
  99                                         }
 100                                         aa := copySource(a)
 101                                         v.SetArg(i, aa)
 102                                         // If a, a copy, has a line boundary indicator, attempt to find a new value
 103                                         // to hold it.  The first candidate is the value that will replace a (aa),
 104                                         // if it shares the same block and line and is eligible.
 105                                         // The second option is v, which has a as an input.  Because aa is earlier in
 106                                         // the data flow, it is the better choice.
 107                                         if a.Pos.IsStmt() == src.PosIsStmt {
 108                                                 if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt {
 109                                                         aa.Pos = aa.Pos.WithIsStmt()
 110                                                 } else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt {
 111                                                         v.Pos = v.Pos.WithIsStmt()
 112                                                 } else {
 113                                                         // Record the lost line and look for a new home after all rewrites are complete.
 114                                                         // TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same
 115                                                         // line to appear in more than one block, but only one block is stored, so if both end
 116                                                         // up here, then one will be lost.
 117                                                         pendingLines.set(a.Pos, int32(a.Block.ID))
 118                                                 }
 119                                                 a.Pos = a.Pos.WithNotStmt()
 120                                         }
 121                                         vchange = true
 122                                         for a.Uses == 0 {
 123                                                 b := a.Args[0]
 124                                                 a.reset(OpInvalid)
 125                                                 a = b
 126                                         }
 127                                 }
 128                                 if vchange && debug > 1 {
 129                                         fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
 130                                 }
 131
 132                                 // apply rewrite function
 133                                 if rv(v) {
 134                                         vchange = true
 135                                         // If value changed to a poor choice for a statement boundary, move the boundary
 136                                         if v.Pos.IsStmt() == src.PosIsStmt {
 137                                                 if k := nextGoodStatementIndex(v, j, b); k != j {
 138                                                         v.Pos = v.Pos.WithNotStmt()
 139                                                         b.Values[k].Pos = b.Values[k].Pos.WithIsStmt()
 140                                                 }
 141                                         }
 142                                 }
 143
 144                                 change = change || vchange
 145                                 if vchange && debug > 1 {
 146                                         fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
 147                                 }
 148                         }
 149                 }
 150                 if !change && !deadChange {
 151                         break
 152                 }
 153                 iters++
 154                 if (iters > 1000 || debug >= 2) && change {
 155                         // We've done a suspiciously large number of rewrites (or we're in debug mode).
 156                         // As of Sep 2021, 90% of rewrites complete in 4 iterations or fewer
 157                         // and the maximum value encountered during make.bash is 12.
 158                         // Start checking for cycles. (This is too expensive to do routinely.)
 159                         // Note: we avoid this path for deadChange-only iterations, to fix #51639.
 160                         if states == nil {
 161                                 states = make(map[string]bool)
 162                         }
 163                         h := f.rewriteHash()
 164                         if _, ok := states[h]; ok {
 165                                 // We've found a cycle.
 166                                 // To diagnose it, set debug to 2 and start again,
 167                                 // so that we'll print all rules applied until we complete another cycle.
 168                                 // If debug is already >= 2, we've already done that, so it's time to crash.
 169                                 if debug < 2 {
 170                                         debug = 2
 171                                         states = make(map[string]bool)
 172                                 } else {
 173                                         f.Fatalf("rewrite cycle detected")
 174                                 }
 175                         }
 176                         states[h] = true
 177                 }
 178         }
 179         // remove clobbered values
 180         for _, b := range f.Blocks {
 181                 j := 0
 182                 for i, v := range b.Values {
 183                         vl := v.Pos
 184                         if v.Op == OpInvalid {
 185                                 if v.Pos.IsStmt() == src.PosIsStmt {
 186                                         pendingLines.set(vl, int32(b.ID))
 187                                 }
 188                                 f.freeValue(v)
 189                                 continue
 190                         }
 191                         if v.Pos.IsStmt() != src.PosNotStmt && !notStmtBoundary(v.Op) && pendingLines.get(vl) == int32(b.ID) {
 192                                 pendingLines.remove(vl)
 193                                 v.Pos = v.Pos.WithIsStmt()
 194                         }
 195                         if i != j {
 196                                 b.Values[j] = v
 197                         }
 198                         j++
 199                 }
 200                 if pendingLines.get(b.Pos) == int32(b.ID) {
 201                         b.Pos = b.Pos.WithIsStmt()
 202                         pendingLines.remove(b.Pos)
 203                 }
 204                 b.truncateValues(j)
 205         }
 206 }
 207
 208 // Common functions called from rewriting rules
 209
 210 func is64BitFloat(t *types.Type) bool {
 211         return t.Size() == 8 && t.IsFloat()
 212 }
 213
 214 func is32BitFloat(t *types.Type) bool {
 215         return t.Size() == 4 && t.IsFloat()
 216 }
 217
 218 func is64BitInt(t *types.Type) bool {
 219         return t.Size() == 8 && t.IsInteger()
 220 }
 221
 222 func is32BitInt(t *types.Type) bool {
 223         return t.Size() == 4 && t.IsInteger()
 224 }
 225
 226 func is16BitInt(t *types.Type) bool {
 227         return t.Size() == 2 && t.IsInteger()
 228 }
 229
 230 func is8BitInt(t *types.Type) bool {
 231         return t.Size() == 1 && t.IsInteger()
 232 }
 233
 234 func isPtr(t *types.Type) bool {
 235         return t.IsPtrShaped()
 236 }
 237
 238 func isSigned(t *types.Type) bool {
 239         return t.IsSigned()
 240 }
 241
 242 // mergeSym merges two symbolic offsets. There is no real merging of
 243 // offsets, we just pick the non-nil one.
 244 func mergeSym(x, y Sym) Sym {
 245         if x == nil {
 246                 return y
 247         }
 248         if y == nil {
 249                 return x
 250         }
 251         panic(fmt.Sprintf("mergeSym with two non-nil syms %v %v", x, y))
 252 }
 253
 254 func canMergeSym(x, y Sym) bool {
 255         return x == nil || y == nil
 256 }
 257
 258 // canMergeLoadClobber reports whether the load can be merged into target without
 259 // invalidating the schedule.
 260 // It also checks that the other non-load argument x is something we
 261 // are ok with clobbering.
 262 func canMergeLoadClobber(target, load, x *Value) bool {
 263         // The register containing x is going to get clobbered.
 264         // Don't merge if we still need the value of x.
 265         // We don't have liveness information here, but we can
 266         // approximate x dying with:
 267         //  1) target is x's only use.
 268         //  2) target is not in a deeper loop than x.
 269         if x.Uses != 1 {
 270                 return false
 271         }
 272         loopnest := x.Block.Func.loopnest()
 273         loopnest.calculateDepths()
 274         if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
 275                 return false
 276         }
 277         return canMergeLoad(target, load)
 278 }
 279
 280 // canMergeLoad reports whether the load can be merged into target without
 281 // invalidating the schedule.
 282 func canMergeLoad(target, load *Value) bool {
 283         if target.Block.ID != load.Block.ID {
 284                 // If the load is in a different block do not merge it.
 285                 return false
 286         }
 287
 288         // We can't merge the load into the target if the load
 289         // has more than one use.
 290         if load.Uses != 1 {
 291                 return false
 292         }
 293
 294         mem := load.MemoryArg()
 295
 296         // We need the load's memory arg to still be alive at target. That
 297         // can't be the case if one of target's args depends on a memory
 298         // state that is a successor of load's memory arg.
 299         //
 300         // For example, it would be invalid to merge load into target in
 301         // the following situation because newmem has killed oldmem
 302         // before target is reached:
 303         //     load = read ... oldmem
 304         //   newmem = write ... oldmem
 305         //     arg0 = read ... newmem
 306         //   target = add arg0 load
 307         //
 308         // If the argument comes from a different block then we can exclude
 309         // it immediately because it must dominate load (which is in the
 310         // same block as target).
 311         var args []*Value
 312         for _, a := range target.Args {
 313                 if a != load && a.Block.ID == target.Block.ID {
 314                         args = append(args, a)
 315                 }
 316         }
 317
 318         // memPreds contains memory states known to be predecessors of load's
 319         // memory state. It is lazily initialized.
 320         var memPreds map[*Value]bool
 321         for i := 0; len(args) > 0; i++ {
 322                 const limit = 100
 323                 if i >= limit {
 324                         // Give up if we have done a lot of iterations.
 325                         return false
 326                 }
 327                 v := args[len(args)-1]
 328                 args = args[:len(args)-1]
 329                 if target.Block.ID != v.Block.ID {
 330                         // Since target and load are in the same block
 331                         // we can stop searching when we leave the block.
 332                         continue
 333                 }
 334                 if v.Op == OpPhi {
 335                         // A Phi implies we have reached the top of the block.
 336                         // The memory phi, if it exists, is always
 337                         // the first logical store in the block.
 338                         continue
 339                 }
 340                 if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
 341                         // We could handle this situation however it is likely
 342                         // to be very rare.
 343                         return false
 344                 }
 345                 if v.Op.SymEffect()&SymAddr != 0 {
 346                         // This case prevents an operation that calculates the
 347                         // address of a local variable from being forced to schedule
 348                         // before its corresponding VarDef.
 349                         // See issue 28445.
 350                         //   v1 = LOAD ...
 351                         //   v2 = VARDEF
 352                         //   v3 = LEAQ
 353                         //   v4 = CMPQ v1 v3
 354                         // We don't want to combine the CMPQ with the load, because
 355                         // that would force the CMPQ to schedule before the VARDEF, which
 356                         // in turn requires the LEAQ to schedule before the VARDEF.
 357                         return false
 358                 }
 359                 if v.Type.IsMemory() {
 360                         if memPreds == nil {
 361                                 // Initialise a map containing memory states
 362                                 // known to be predecessors of load's memory
 363                                 // state.
 364                                 memPreds = make(map[*Value]bool)
 365                                 m := mem
 366                                 const limit = 50
 367                                 for i := 0; i < limit; i++ {
 368                                         if m.Op == OpPhi {
 369                                                 // The memory phi, if it exists, is always
 370                                                 // the first logical store in the block.
 371                                                 break
 372                                         }
 373                                         if m.Block.ID != target.Block.ID {
 374                                                 break
 375                                         }
 376                                         if !m.Type.IsMemory() {
 377                                                 break
 378                                         }
 379                                         memPreds[m] = true
 380                                         if len(m.Args) == 0 {
 381                                                 break
 382                                         }
 383                                         m = m.MemoryArg()
 384                                 }
 385                         }
 386
 387                         // We can merge if v is a predecessor of mem.
 388                         //
 389                         // For example, we can merge load into target in the
 390                         // following scenario:
 391                         //      x = read ... v
 392                         //    mem = write ... v
 393                         //   load = read ... mem
 394                         // target = add x load
 395                         if memPreds[v] {
 396                                 continue
 397                         }
 398                         return false
 399                 }
 400                 if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
 401                         // If v takes mem as an input then we know mem
 402                         // is valid at this point.
 403                         continue
 404                 }
 405                 for _, a := range v.Args {
 406                         if target.Block.ID == a.Block.ID {
 407                                 args = append(args, a)
 408                         }
 409                 }
 410         }
 411
 412         return true
 413 }
 414
 415 // isSameCall reports whether sym is the same as the given named symbol.
 416 func isSameCall(sym interface{}, name string) bool {
 417         fn := sym.(*AuxCall).Fn
 418         return fn != nil && fn.String() == name
 419 }
 420
 421 // canLoadUnaligned reports if the architecture supports unaligned load operations.
 422 func canLoadUnaligned(c *Config) bool {
 423         return c.ctxt.Arch.Alignment == 1
 424 }
 425
 426 // nlzX returns the number of leading zeros.
 427 func nlz64(x int64) int { return bits.LeadingZeros64(uint64(x)) }
 428 func nlz32(x int32) int { return bits.LeadingZeros32(uint32(x)) }
 429 func nlz16(x int16) int { return bits.LeadingZeros16(uint16(x)) }
 430 func nlz8(x int8) int   { return bits.LeadingZeros8(uint8(x)) }
 431
 432 // ntzX returns the number of trailing zeros.
 433 func ntz64(x int64) int { return bits.TrailingZeros64(uint64(x)) }
 434 func ntz32(x int32) int { return bits.TrailingZeros32(uint32(x)) }
 435 func ntz16(x int16) int { return bits.TrailingZeros16(uint16(x)) }
 436 func ntz8(x int8) int   { return bits.TrailingZeros8(uint8(x)) }
 437
 438 func oneBit(x int64) bool   { return x&(x-1) == 0 && x != 0 }
 439 func oneBit8(x int8) bool   { return x&(x-1) == 0 && x != 0 }
 440 func oneBit16(x int16) bool { return x&(x-1) == 0 && x != 0 }
 441 func oneBit32(x int32) bool { return x&(x-1) == 0 && x != 0 }
 442 func oneBit64(x int64) bool { return x&(x-1) == 0 && x != 0 }
 443
 444 // nto returns the number of trailing ones.
 445 func nto(x int64) int64 {
 446         return int64(ntz64(^x))
 447 }
 448
 449 // logX returns logarithm of n base 2.
 450 // n must be a positive power of 2 (isPowerOfTwoX returns true).
 451 func log8(n int8) int64 {
 452         return int64(bits.Len8(uint8(n))) - 1
 453 }
 454 func log16(n int16) int64 {
 455         return int64(bits.Len16(uint16(n))) - 1
 456 }
 457 func log32(n int32) int64 {
 458         return int64(bits.Len32(uint32(n))) - 1
 459 }
 460 func log64(n int64) int64 {
 461         return int64(bits.Len64(uint64(n))) - 1
 462 }
 463
 464 // log2uint32 returns logarithm in base 2 of uint32(n), with log2(0) = -1.
 465 // Rounds down.
 466 func log2uint32(n int64) int64 {
 467         return int64(bits.Len32(uint32(n))) - 1
 468 }
 469
 470 // isPowerOfTwoX functions report whether n is a power of 2.
 471 func isPowerOfTwo8(n int8) bool {
 472         return n > 0 && n&(n-1) == 0
 473 }
 474 func isPowerOfTwo16(n int16) bool {
 475         return n > 0 && n&(n-1) == 0
 476 }
 477 func isPowerOfTwo32(n int32) bool {
 478         return n > 0 && n&(n-1) == 0
 479 }
 480 func isPowerOfTwo64(n int64) bool {
 481         return n > 0 && n&(n-1) == 0
 482 }
 483
 484 // isUint64PowerOfTwo reports whether uint64(n) is a power of 2.
 485 func isUint64PowerOfTwo(in int64) bool {
 486         n := uint64(in)
 487         return n > 0 && n&(n-1) == 0
 488 }
 489
 490 // isUint32PowerOfTwo reports whether uint32(n) is a power of 2.
 491 func isUint32PowerOfTwo(in int64) bool {
 492         n := uint64(uint32(in))
 493         return n > 0 && n&(n-1) == 0
 494 }
 495
 496 // is32Bit reports whether n can be represented as a signed 32 bit integer.
 497 func is32Bit(n int64) bool {
 498         return n == int64(int32(n))
 499 }
 500
 501 // is16Bit reports whether n can be represented as a signed 16 bit integer.
 502 func is16Bit(n int64) bool {
 503         return n == int64(int16(n))
 504 }
 505
 506 // is8Bit reports whether n can be represented as a signed 8 bit integer.
 507 func is8Bit(n int64) bool {
 508         return n == int64(int8(n))
 509 }
 510
 511 // isU8Bit reports whether n can be represented as an unsigned 8 bit integer.
 512 func isU8Bit(n int64) bool {
 513         return n == int64(uint8(n))
 514 }
 515
 516 // isU12Bit reports whether n can be represented as an unsigned 12 bit integer.
 517 func isU12Bit(n int64) bool {
 518         return 0 <= n && n < (1<<12)
 519 }
 520
 521 // isU16Bit reports whether n can be represented as an unsigned 16 bit integer.
 522 func isU16Bit(n int64) bool {
 523         return n == int64(uint16(n))
 524 }
 525
 526 // isU32Bit reports whether n can be represented as an unsigned 32 bit integer.
 527 func isU32Bit(n int64) bool {
 528         return n == int64(uint32(n))
 529 }
 530
 531 // is20Bit reports whether n can be represented as a signed 20 bit integer.
 532 func is20Bit(n int64) bool {
 533         return -(1<<19) <= n && n < (1<<19)
 534 }
 535
 536 // b2i translates a boolean value to 0 or 1 for assigning to auxInt.
 537 func b2i(b bool) int64 {
 538         if b {
 539                 return 1
 540         }
 541         return 0
 542 }
 543
 544 // b2i32 translates a boolean value to 0 or 1.
 545 func b2i32(b bool) int32 {
 546         if b {
 547                 return 1
 548         }
 549         return 0
 550 }
 551
 552 // shiftIsBounded reports whether (left/right) shift Value v is known to be bounded.
 553 // A shift is bounded if it is shifting by less than the width of the shifted value.
 554 func shiftIsBounded(v *Value) bool {
 555         return v.AuxInt != 0
 556 }
 557
 558 // canonLessThan returns whether x is "ordered" less than y, for purposes of normalizing
 559 // generated code as much as possible.
 560 func canonLessThan(x, y *Value) bool {
 561         if x.Op != y.Op {
 562                 return x.Op < y.Op
 563         }
 564         if !x.Pos.SameFileAndLine(y.Pos) {
 565                 return x.Pos.Before(y.Pos)
 566         }
 567         return x.ID < y.ID
 568 }
 569
 570 // truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern
 571 // of the mantissa. It will panic if the truncation results in lost information.
 572 func truncate64Fto32F(f float64) float32 {
 573         if !isExactFloat32(f) {
 574                 panic("truncate64Fto32F: truncation is not exact")
 575         }
 576         if !math.IsNaN(f) {
 577                 return float32(f)
 578         }
 579         // NaN bit patterns aren't necessarily preserved across conversion
 580         // instructions so we need to do the conversion manually.
 581         b := math.Float64bits(f)
 582         m := b & ((1 << 52) - 1) // mantissa (a.k.a. significand)
 583         //          | sign                  | exponent   | mantissa       |
 584         r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23)))
 585         return math.Float32frombits(r)
 586 }
 587
 588 // extend32Fto64F converts a float32 value to a float64 value preserving the bit
 589 // pattern of the mantissa.
 590 func extend32Fto64F(f float32) float64 {
 591         if !math.IsNaN(float64(f)) {
 592                 return float64(f)
 593         }
 594         // NaN bit patterns aren't necessarily preserved across conversion
 595         // instructions so we need to do the conversion manually.
 596         b := uint64(math.Float32bits(f))
 597         //   | sign                  | exponent      | mantissa                    |
 598         r := ((b << 32) & (1 << 63)) | (0x7ff << 52) | ((b & 0x7fffff) << (52 - 23))
 599         return math.Float64frombits(r)
 600 }
 601
 602 // DivisionNeedsFixUp reports whether the division needs fix-up code.
 603 func DivisionNeedsFixUp(v *Value) bool {
 604         return v.AuxInt == 0
 605 }
 606
 607 // auxFrom64F encodes a float64 value so it can be stored in an AuxInt.
 608 func auxFrom64F(f float64) int64 {
 609         if f != f {
 610                 panic("can't encode a NaN in AuxInt field")
 611         }
 612         return int64(math.Float64bits(f))
 613 }
 614
 615 // auxFrom32F encodes a float32 value so it can be stored in an AuxInt.
 616 func auxFrom32F(f float32) int64 {
 617         if f != f {
 618                 panic("can't encode a NaN in AuxInt field")
 619         }
 620         return int64(math.Float64bits(extend32Fto64F(f)))
 621 }
 622
 623 // auxTo32F decodes a float32 from the AuxInt value provided.
 624 func auxTo32F(i int64) float32 {
 625         return truncate64Fto32F(math.Float64frombits(uint64(i)))
 626 }
 627
 628 // auxTo64F decodes a float64 from the AuxInt value provided.
 629 func auxTo64F(i int64) float64 {
 630         return math.Float64frombits(uint64(i))
 631 }
 632
 633 func auxIntToBool(i int64) bool {
 634         if i == 0 {
 635                 return false
 636         }
 637         return true
 638 }
 639 func auxIntToInt8(i int64) int8 {
 640         return int8(i)
 641 }
 642 func auxIntToInt16(i int64) int16 {
 643         return int16(i)
 644 }
 645 func auxIntToInt32(i int64) int32 {
 646         return int32(i)
 647 }
 648 func auxIntToInt64(i int64) int64 {
 649         return i
 650 }
 651 func auxIntToUint8(i int64) uint8 {
 652         return uint8(i)
 653 }
 654 func auxIntToFloat32(i int64) float32 {
 655         return float32(math.Float64frombits(uint64(i)))
 656 }
 657 func auxIntToFloat64(i int64) float64 {
 658         return math.Float64frombits(uint64(i))
 659 }
 660 func auxIntToValAndOff(i int64) ValAndOff {
 661         return ValAndOff(i)
 662 }
 663 func auxIntToArm64BitField(i int64) arm64BitField {
 664         return arm64BitField(i)
 665 }
 666 func auxIntToInt128(x int64) int128 {
 667         if x != 0 {
 668                 panic("nonzero int128 not allowed")
 669         }
 670         return 0
 671 }
 672 func auxIntToFlagConstant(x int64) flagConstant {
 673         return flagConstant(x)
 674 }
 675
 676 func auxIntToOp(cc int64) Op {
 677         return Op(cc)
 678 }
 679
 680 func boolToAuxInt(b bool) int64 {
 681         if b {
 682                 return 1
 683         }
 684         return 0
 685 }
 686 func int8ToAuxInt(i int8) int64 {
 687         return int64(i)
 688 }
 689 func int16ToAuxInt(i int16) int64 {
 690         return int64(i)
 691 }
 692 func int32ToAuxInt(i int32) int64 {
 693         return int64(i)
 694 }
 695 func int64ToAuxInt(i int64) int64 {
 696         return int64(i)
 697 }
 698 func uint8ToAuxInt(i uint8) int64 {
 699         return int64(int8(i))
 700 }
 701 func float32ToAuxInt(f float32) int64 {
 702         return int64(math.Float64bits(float64(f)))
 703 }
 704 func float64ToAuxInt(f float64) int64 {
 705         return int64(math.Float64bits(f))
 706 }
 707 func valAndOffToAuxInt(v ValAndOff) int64 {
 708         return int64(v)
 709 }
 710 func arm64BitFieldToAuxInt(v arm64BitField) int64 {
 711         return int64(v)
 712 }
 713 func int128ToAuxInt(x int128) int64 {
 714         if x != 0 {
 715                 panic("nonzero int128 not allowed")
 716         }
 717         return 0
 718 }
 719 func flagConstantToAuxInt(x flagConstant) int64 {
 720         return int64(x)
 721 }
 722
 723 func opToAuxInt(o Op) int64 {
 724         return int64(o)
 725 }
 726
 727 // Aux is an interface to hold miscellaneous data in Blocks and Values.
 728 type Aux interface {
 729         CanBeAnSSAAux()
 730 }
 731
 732 // for now only used to mark moves that need to avoid clobbering flags
 733 type auxMark bool
 734
 735 func (auxMark) CanBeAnSSAAux() {}
 736
 737 var AuxMark auxMark
 738
 739 // stringAux wraps string values for use in Aux.
 740 type stringAux string
 741
 742 func (stringAux) CanBeAnSSAAux() {}
 743
 744 func auxToString(i Aux) string {
 745         return string(i.(stringAux))
 746 }
 747 func auxToSym(i Aux) Sym {
 748         // TODO: kind of a hack - allows nil interface through
 749         s, _ := i.(Sym)
 750         return s
 751 }
 752 func auxToType(i Aux) *types.Type {
 753         return i.(*types.Type)
 754 }
 755 func auxToCall(i Aux) *AuxCall {
 756         return i.(*AuxCall)
 757 }
 758 func auxToS390xCCMask(i Aux) s390x.CCMask {
 759         return i.(s390x.CCMask)
 760 }
 761 func auxToS390xRotateParams(i Aux) s390x.RotateParams {
 762         return i.(s390x.RotateParams)
 763 }
 764
 765 func StringToAux(s string) Aux {
 766         return stringAux(s)
 767 }
 768 func symToAux(s Sym) Aux {
 769         return s
 770 }
 771 func callToAux(s *AuxCall) Aux {
 772         return s
 773 }
 774 func typeToAux(t *types.Type) Aux {
 775         return t
 776 }
 777 func s390xCCMaskToAux(c s390x.CCMask) Aux {
 778         return c
 779 }
 780 func s390xRotateParamsToAux(r s390x.RotateParams) Aux {
 781         return r
 782 }
 783
 784 // uaddOvf reports whether unsigned a+b would overflow.
 785 func uaddOvf(a, b int64) bool {
 786         return uint64(a)+uint64(b) < uint64(a)
 787 }
 788
 789 // loadLSymOffset simulates reading a word at an offset into a
 790 // read-only symbol's runtime memory. If it would read a pointer to
 791 // another symbol, that symbol is returned. Otherwise, it returns nil.
 792 func loadLSymOffset(lsym *obj.LSym, offset int64) *obj.LSym {
 793         if lsym.Type != objabi.SRODATA {
 794                 return nil
 795         }
 796
 797         for _, r := range lsym.R {
 798                 if int64(r.Off) == offset && r.Type&^objabi.R_WEAK == objabi.R_ADDR && r.Add == 0 {
 799                         return r.Sym
 800                 }
 801         }
 802
 803         return nil
 804 }
 805
 806 // de-virtualize an InterLECall
 807 // 'sym' is the symbol for the itab.
 808 func devirtLESym(v *Value, aux Aux, sym Sym, offset int64) *obj.LSym {
 809         n, ok := sym.(*obj.LSym)
 810         if !ok {
 811                 return nil
 812         }
 813
 814         lsym := loadLSymOffset(n, offset)
 815         if f := v.Block.Func; f.pass.debug > 0 {
 816                 if lsym != nil {
 817                         f.Warnl(v.Pos, "de-virtualizing call")
 818                 } else {
 819                         f.Warnl(v.Pos, "couldn't de-virtualize call")
 820                 }
 821         }
 822         return lsym
 823 }
 824
 825 func devirtLECall(v *Value, sym *obj.LSym) *Value {
 826         v.Op = OpStaticLECall
 827         auxcall := v.Aux.(*AuxCall)
 828         auxcall.Fn = sym
 829         // Remove first arg
 830         v.Args[0].Uses--
 831         copy(v.Args[0:], v.Args[1:])
 832         v.Args[len(v.Args)-1] = nil // aid GC
 833         v.Args = v.Args[:len(v.Args)-1]
 834         return v
 835 }
 836
 837 // isSamePtr reports whether p1 and p2 point to the same address.
 838 func isSamePtr(p1, p2 *Value) bool {
 839         if p1 == p2 {
 840                 return true
 841         }
 842         if p1.Op != p2.Op {
 843                 return false
 844         }
 845         switch p1.Op {
 846         case OpOffPtr:
 847                 return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
 848         case OpAddr, OpLocalAddr:
 849                 return p1.Aux == p2.Aux
 850         case OpAddPtr:
 851                 return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
 852         }
 853         return false
 854 }
 855
 856 func isStackPtr(v *Value) bool {
 857         for v.Op == OpOffPtr || v.Op == OpAddPtr {
 858                 v = v.Args[0]
 859         }
 860         return v.Op == OpSP || v.Op == OpLocalAddr
 861 }
 862
 863 // disjoint reports whether the memory region specified by [p1:p1+n1)
 864 // does not overlap with [p2:p2+n2).
 865 // A return value of false does not imply the regions overlap.
 866 func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool {
 867         if n1 == 0 || n2 == 0 {
 868                 return true
 869         }
 870         if p1 == p2 {
 871                 return false
 872         }
 873         baseAndOffset := func(ptr *Value) (base *Value, offset int64) {
 874                 base, offset = ptr, 0
 875                 for base.Op == OpOffPtr {
 876                         offset += base.AuxInt
 877                         base = base.Args[0]
 878                 }
 879                 return base, offset
 880         }
 881         p1, off1 := baseAndOffset(p1)
 882         p2, off2 := baseAndOffset(p2)
 883         if isSamePtr(p1, p2) {
 884                 return !overlap(off1, n1, off2, n2)
 885         }
 886         // p1 and p2 are not the same, so if they are both OpAddrs then
 887         // they point to different variables.
 888         // If one pointer is on the stack and the other is an argument
 889         // then they can't overlap.
 890         switch p1.Op {
 891         case OpAddr, OpLocalAddr:
 892                 if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP {
 893                         return true
 894                 }
 895                 return (p2.Op == OpArg || p2.Op == OpArgIntReg) && p1.Args[0].Op == OpSP
 896         case OpArg, OpArgIntReg:
 897                 if p2.Op == OpSP || p2.Op == OpLocalAddr {
 898                         return true
 899                 }
 900         case OpSP:
 901                 return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpArgIntReg || p2.Op == OpSP
 902         }
 903         return false
 904 }
 905
 906 // moveSize returns the number of bytes an aligned MOV instruction moves.
 907 func moveSize(align int64, c *Config) int64 {
 908         switch {
 909         case align%8 == 0 && c.PtrSize == 8:
 910                 return 8
 911         case align%4 == 0:
 912                 return 4
 913         case align%2 == 0:
 914                 return 2
 915         }
 916         return 1
 917 }
 918
 919 // mergePoint finds a block among a's blocks which dominates b and is itself
 920 // dominated by all of a's blocks. Returns nil if it can't find one.
 921 // Might return nil even if one does exist.
 922 func mergePoint(b *Block, a ...*Value) *Block {
 923         // Walk backward from b looking for one of the a's blocks.
 924
 925         // Max distance
 926         d := 100
 927
 928         for d > 0 {
 929                 for _, x := range a {
 930                         if b == x.Block {
 931                                 goto found
 932                         }
 933                 }
 934                 if len(b.Preds) > 1 {
 935                         // Don't know which way to go back. Abort.
 936                         return nil
 937                 }
 938                 b = b.Preds[0].b
 939                 d--
 940         }
 941         return nil // too far away
 942 found:
 943         // At this point, r is the first value in a that we find by walking backwards.
 944         // if we return anything, r will be it.
 945         r := b
 946
 947         // Keep going, counting the other a's that we find. They must all dominate r.
 948         na := 0
 949         for d > 0 {
 950                 for _, x := range a {
 951                         if b == x.Block {
 952                                 na++
 953                         }
 954                 }
 955                 if na == len(a) {
 956                         // Found all of a in a backwards walk. We can return r.
 957                         return r
 958                 }
 959                 if len(b.Preds) > 1 {
 960                         return nil
 961                 }
 962                 b = b.Preds[0].b
 963                 d--
 964
 965         }
 966         return nil // too far away
 967 }
 968
 969 // clobber invalidates values. Returns true.
 970 // clobber is used by rewrite rules to:
 971 //
 972 //      A) make sure the values are really dead and never used again.
 973 //      B) decrement use counts of the values' args.
 974 func clobber(vv ...*Value) bool {
 975         for _, v := range vv {
 976                 v.reset(OpInvalid)
 977                 // Note: leave v.Block intact.  The Block field is used after clobber.
 978         }
 979         return true
 980 }
 981
 982 // clobberIfDead resets v when use count is 1. Returns true.
 983 // clobberIfDead is used by rewrite rules to decrement
 984 // use counts of v's args when v is dead and never used.
 985 func clobberIfDead(v *Value) bool {
 986         if v.Uses == 1 {
 987                 v.reset(OpInvalid)
 988         }
 989         // Note: leave v.Block intact.  The Block field is used after clobberIfDead.
 990         return true
 991 }
 992
 993 // noteRule is an easy way to track if a rule is matched when writing
 994 // new ones.  Make the rule of interest also conditional on
 995 //
 996 //      noteRule("note to self: rule of interest matched")
 997 //
 998 // and that message will print when the rule matches.
 999 func noteRule(s string) bool {
1000         fmt.Println(s)
1001         return true
1002 }
1003
1004 // countRule increments Func.ruleMatches[key].
1005 // If Func.ruleMatches is non-nil at the end
1006 // of compilation, it will be printed to stdout.
1007 // This is intended to make it easier to find which functions
1008 // which contain lots of rules matches when developing new rules.
1009 func countRule(v *Value, key string) bool {
1010         f := v.Block.Func
1011         if f.ruleMatches == nil {
1012                 f.ruleMatches = make(map[string]int)
1013         }
1014         f.ruleMatches[key]++
1015         return true
1016 }
1017
1018 // warnRule generates compiler debug output with string s when
1019 // v is not in autogenerated code, cond is true and the rule has fired.
1020 func warnRule(cond bool, v *Value, s string) bool {
1021         if pos := v.Pos; pos.Line() > 1 && cond {
1022                 v.Block.Func.Warnl(pos, s)
1023         }
1024         return true
1025 }
1026
1027 // for a pseudo-op like (LessThan x), extract x.
1028 func flagArg(v *Value) *Value {
1029         if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() {
1030                 return nil
1031         }
1032         return v.Args[0]
1033 }
1034
1035 // arm64Negate finds the complement to an ARM64 condition code,
1036 // for example !Equal -> NotEqual or !LessThan -> GreaterEqual
1037 //
1038 // For floating point, it's more subtle because NaN is unordered. We do
1039 // !LessThanF -> NotLessThanF, the latter takes care of NaNs.
1040 func arm64Negate(op Op) Op {
1041         switch op {
1042         case OpARM64LessThan:
1043                 return OpARM64GreaterEqual
1044         case OpARM64LessThanU:
1045                 return OpARM64GreaterEqualU
1046         case OpARM64GreaterThan:
1047                 return OpARM64LessEqual
1048         case OpARM64GreaterThanU:
1049                 return OpARM64LessEqualU
1050         case OpARM64LessEqual:
1051                 return OpARM64GreaterThan
1052         case OpARM64LessEqualU:
1053                 return OpARM64GreaterThanU
1054         case OpARM64GreaterEqual:
1055                 return OpARM64LessThan
1056         case OpARM64GreaterEqualU:
1057                 return OpARM64LessThanU
1058         case OpARM64Equal:
1059                 return OpARM64NotEqual
1060         case OpARM64NotEqual:
1061                 return OpARM64Equal
1062         case OpARM64LessThanF:
1063                 return OpARM64NotLessThanF
1064         case OpARM64NotLessThanF:
1065                 return OpARM64LessThanF
1066         case OpARM64LessEqualF:
1067                 return OpARM64NotLessEqualF
1068         case OpARM64NotLessEqualF:
1069                 return OpARM64LessEqualF
1070         case OpARM64GreaterThanF:
1071                 return OpARM64NotGreaterThanF
1072         case OpARM64NotGreaterThanF:
1073                 return OpARM64GreaterThanF
1074         case OpARM64GreaterEqualF:
1075                 return OpARM64NotGreaterEqualF
1076         case OpARM64NotGreaterEqualF:
1077                 return OpARM64GreaterEqualF
1078         default:
1079                 panic("unreachable")
1080         }
1081 }
1082
1083 // arm64Invert evaluates (InvertFlags op), which
1084 // is the same as altering the condition codes such
1085 // that the same result would be produced if the arguments
1086 // to the flag-generating instruction were reversed, e.g.
1087 // (InvertFlags (CMP x y)) -> (CMP y x)
1088 func arm64Invert(op Op) Op {
1089         switch op {
1090         case OpARM64LessThan:
1091                 return OpARM64GreaterThan
1092         case OpARM64LessThanU:
1093                 return OpARM64GreaterThanU
1094         case OpARM64GreaterThan:
1095                 return OpARM64LessThan
1096         case OpARM64GreaterThanU:
1097                 return OpARM64LessThanU
1098         case OpARM64LessEqual:
1099                 return OpARM64GreaterEqual
1100         case OpARM64LessEqualU:
1101                 return OpARM64GreaterEqualU
1102         case OpARM64GreaterEqual:
1103                 return OpARM64LessEqual
1104         case OpARM64GreaterEqualU:
1105                 return OpARM64LessEqualU
1106         case OpARM64Equal, OpARM64NotEqual:
1107                 return op
1108         case OpARM64LessThanF:
1109                 return OpARM64GreaterThanF
1110         case OpARM64GreaterThanF:
1111                 return OpARM64LessThanF
1112         case OpARM64LessEqualF:
1113                 return OpARM64GreaterEqualF
1114         case OpARM64GreaterEqualF:
1115                 return OpARM64LessEqualF
1116         case OpARM64NotLessThanF:
1117                 return OpARM64NotGreaterThanF
1118         case OpARM64NotGreaterThanF:
1119                 return OpARM64NotLessThanF
1120         case OpARM64NotLessEqualF:
1121                 return OpARM64NotGreaterEqualF
1122         case OpARM64NotGreaterEqualF:
1123                 return OpARM64NotLessEqualF
1124         default:
1125                 panic("unreachable")
1126         }
1127 }
1128
1129 // evaluate an ARM64 op against a flags value
1130 // that is potentially constant; return 1 for true,
1131 // -1 for false, and 0 for not constant.
1132 func ccARM64Eval(op Op, flags *Value) int {
1133         fop := flags.Op
1134         if fop == OpARM64InvertFlags {
1135                 return -ccARM64Eval(op, flags.Args[0])
1136         }
1137         if fop != OpARM64FlagConstant {
1138                 return 0
1139         }
1140         fc := flagConstant(flags.AuxInt)
1141         b2i := func(b bool) int {
1142                 if b {
1143                         return 1
1144                 }
1145                 return -1
1146         }
1147         switch op {
1148         case OpARM64Equal:
1149                 return b2i(fc.eq())
1150         case OpARM64NotEqual:
1151                 return b2i(fc.ne())
1152         case OpARM64LessThan:
1153                 return b2i(fc.lt())
1154         case OpARM64LessThanU:
1155                 return b2i(fc.ult())
1156         case OpARM64GreaterThan:
1157                 return b2i(fc.gt())
1158         case OpARM64GreaterThanU:
1159                 return b2i(fc.ugt())
1160         case OpARM64LessEqual:
1161                 return b2i(fc.le())
1162         case OpARM64LessEqualU:
1163                 return b2i(fc.ule())
1164         case OpARM64GreaterEqual:
1165                 return b2i(fc.ge())
1166         case OpARM64GreaterEqualU:
1167                 return b2i(fc.uge())
1168         }
1169         return 0
1170 }
1171
1172 // logRule logs the use of the rule s. This will only be enabled if
1173 // rewrite rules were generated with the -log option, see _gen/rulegen.go.
1174 func logRule(s string) {
1175         if ruleFile == nil {
1176                 // Open a log file to write log to. We open in append
1177                 // mode because all.bash runs the compiler lots of times,
1178                 // and we want the concatenation of all of those logs.
1179                 // This means, of course, that users need to rm the old log
1180                 // to get fresh data.
1181                 // TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow?
1182                 w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"),
1183                         os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
1184                 if err != nil {
1185                         panic(err)
1186                 }
1187                 ruleFile = w
1188         }
1189         _, err := fmt.Fprintln(ruleFile, s)
1190         if err != nil {
1191                 panic(err)
1192         }
1193 }
1194
1195 var ruleFile io.Writer
1196
1197 func min(x, y int64) int64 {
1198         if x < y {
1199                 return x
1200         }
1201         return y
1202 }
1203
1204 func isConstZero(v *Value) bool {
1205         switch v.Op {
1206         case OpConstNil:
1207                 return true
1208         case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F:
1209                 return v.AuxInt == 0
1210         }
1211         return false
1212 }
1213
1214 // reciprocalExact64 reports whether 1/c is exactly representable.
1215 func reciprocalExact64(c float64) bool {
1216         b := math.Float64bits(c)
1217         man := b & (1<<52 - 1)
1218         if man != 0 {
1219                 return false // not a power of 2, denormal, or NaN
1220         }
1221         exp := b >> 52 & (1<<11 - 1)
1222         // exponent bias is 0x3ff.  So taking the reciprocal of a number
1223         // changes the exponent to 0x7fe-exp.
1224         switch exp {
1225         case 0:
1226                 return false // ±0
1227         case 0x7ff:
1228                 return false // ±inf
1229         case 0x7fe:
1230                 return false // exponent is not representable
1231         default:
1232                 return true
1233         }
1234 }
1235
1236 // reciprocalExact32 reports whether 1/c is exactly representable.
1237 func reciprocalExact32(c float32) bool {
1238         b := math.Float32bits(c)
1239         man := b & (1<<23 - 1)
1240         if man != 0 {
1241                 return false // not a power of 2, denormal, or NaN
1242         }
1243         exp := b >> 23 & (1<<8 - 1)
1244         // exponent bias is 0x7f.  So taking the reciprocal of a number
1245         // changes the exponent to 0xfe-exp.
1246         switch exp {
1247         case 0:
1248                 return false // ±0
1249         case 0xff:
1250                 return false // ±inf
1251         case 0xfe:
1252                 return false // exponent is not representable
1253         default:
1254                 return true
1255         }
1256 }
1257
1258 // check if an immediate can be directly encoded into an ARM's instruction.
1259 func isARMImmRot(v uint32) bool {
1260         for i := 0; i < 16; i++ {
1261                 if v&^0xff == 0 {
1262                         return true
1263                 }
1264                 v = v<<2 | v>>30
1265         }
1266
1267         return false
1268 }
1269
1270 // overlap reports whether the ranges given by the given offset and
1271 // size pairs overlap.
1272 func overlap(offset1, size1, offset2, size2 int64) bool {
1273         if offset1 >= offset2 && offset2+size2 > offset1 {
1274                 return true
1275         }
1276         if offset2 >= offset1 && offset1+size1 > offset2 {
1277                 return true
1278         }
1279         return false
1280 }
1281
1282 func areAdjacentOffsets(off1, off2, size int64) bool {
1283         return off1+size == off2 || off1 == off2+size
1284 }
1285
1286 // check if value zeroes out upper 32-bit of 64-bit register.
1287 // depth limits recursion depth. In AMD64.rules 3 is used as limit,
1288 // because it catches same amount of cases as 4.
1289 func zeroUpper32Bits(x *Value, depth int) bool {
1290         switch x.Op {
1291         case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1,
1292                 OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1,
1293                 OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload,
1294                 OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL,
1295                 OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst,
1296                 OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst,
1297                 OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL,
1298                 OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst,
1299                 OpAMD64SHLL, OpAMD64SHLLconst:
1300                 return true
1301         case OpArg:
1302                 return x.Type.Size() == 4
1303         case OpPhi, OpSelect0, OpSelect1:
1304                 // Phis can use each-other as an arguments, instead of tracking visited values,
1305                 // just limit recursion depth.
1306                 if depth <= 0 {
1307                         return false
1308                 }
1309                 for i := range x.Args {
1310                         if !zeroUpper32Bits(x.Args[i], depth-1) {
1311                                 return false
1312                         }
1313                 }
1314                 return true
1315
1316         }
1317         return false
1318 }
1319
1320 // zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits.
1321 func zeroUpper48Bits(x *Value, depth int) bool {
1322         switch x.Op {
1323         case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2:
1324                 return true
1325         case OpArg:
1326                 return x.Type.Size() == 2
1327         case OpPhi, OpSelect0, OpSelect1:
1328                 // Phis can use each-other as an arguments, instead of tracking visited values,
1329                 // just limit recursion depth.
1330                 if depth <= 0 {
1331                         return false
1332                 }
1333                 for i := range x.Args {
1334                         if !zeroUpper48Bits(x.Args[i], depth-1) {
1335                                 return false
1336                         }
1337                 }
1338                 return true
1339
1340         }
1341         return false
1342 }
1343
1344 // zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits.
1345 func zeroUpper56Bits(x *Value, depth int) bool {
1346         switch x.Op {
1347         case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1:
1348                 return true
1349         case OpArg:
1350                 return x.Type.Size() == 1
1351         case OpPhi, OpSelect0, OpSelect1:
1352                 // Phis can use each-other as an arguments, instead of tracking visited values,
1353                 // just limit recursion depth.
1354                 if depth <= 0 {
1355                         return false
1356                 }
1357                 for i := range x.Args {
1358                         if !zeroUpper56Bits(x.Args[i], depth-1) {
1359                                 return false
1360                         }
1361                 }
1362                 return true
1363
1364         }
1365         return false
1366 }
1367
1368 func isInlinableMemclr(c *Config) bool {
1369         // TODO: expand this check to allow other architectures
1370         // see CL 454255 and issue 56997
1371         return c.arch == "amd64" || c.arch == "arm64"
1372 }
1373
1374 // isInlinableMemmove reports whether the given arch performs a Move of the given size
1375 // faster than memmove. It will only return true if replacing the memmove with a Move is
1376 // safe, either because Move will do all of its loads before any of its stores, or
1377 // because the arguments are known to be disjoint.
1378 // This is used as a check for replacing memmove with Move ops.
1379 func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
1380         // It is always safe to convert memmove into Move when its arguments are disjoint.
1381         // Move ops may or may not be faster for large sizes depending on how the platform
1382         // lowers them, so we only perform this optimization on platforms that we know to
1383         // have fast Move ops.
1384         switch c.arch {
1385         case "amd64":
1386                 return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz))
1387         case "386", "arm64":
1388                 return sz <= 8
1389         case "s390x", "ppc64", "ppc64le":
1390                 return sz <= 8 || disjoint(dst, sz, src, sz)
1391         case "arm", "loong64", "mips", "mips64", "mipsle", "mips64le":
1392                 return sz <= 4
1393         }
1394         return false
1395 }
1396 func IsInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
1397         return isInlinableMemmove(dst, src, sz, c)
1398 }
1399
1400 // logLargeCopy logs the occurrence of a large copy.
1401 // The best place to do this is in the rewrite rules where the size of the move is easy to find.
1402 // "Large" is arbitrarily chosen to be 128 bytes; this may change.
1403 func logLargeCopy(v *Value, s int64) bool {
1404         if s < 128 {
1405                 return true
1406         }
1407         if logopt.Enabled() {
1408                 logopt.LogOpt(v.Pos, "copy", "lower", v.Block.Func.Name, fmt.Sprintf("%d bytes", s))
1409         }
1410         return true
1411 }
1412 func LogLargeCopy(funcName string, pos src.XPos, s int64) {
1413         if s < 128 {
1414                 return
1415         }
1416         if logopt.Enabled() {
1417                 logopt.LogOpt(pos, "copy", "lower", funcName, fmt.Sprintf("%d bytes", s))
1418         }
1419 }
1420
1421 // hasSmallRotate reports whether the architecture has rotate instructions
1422 // for sizes < 32-bit.  This is used to decide whether to promote some rotations.
1423 func hasSmallRotate(c *Config) bool {
1424         switch c.arch {
1425         case "amd64", "386":
1426                 return true
1427         default:
1428                 return false
1429         }
1430 }
1431
1432 func newPPC64ShiftAuxInt(sh, mb, me, sz int64) int32 {
1433         if sh < 0 || sh >= sz {
1434                 panic("PPC64 shift arg sh out of range")
1435         }
1436         if mb < 0 || mb >= sz {
1437                 panic("PPC64 shift arg mb out of range")
1438         }
1439         if me < 0 || me >= sz {
1440                 panic("PPC64 shift arg me out of range")
1441         }
1442         return int32(sh<<16 | mb<<8 | me)
1443 }
1444
1445 func GetPPC64Shiftsh(auxint int64) int64 {
1446         return int64(int8(auxint >> 16))
1447 }
1448
1449 func GetPPC64Shiftmb(auxint int64) int64 {
1450         return int64(int8(auxint >> 8))
1451 }
1452
1453 func GetPPC64Shiftme(auxint int64) int64 {
1454         return int64(int8(auxint))
1455 }
1456
1457 // Test if this value can encoded as a mask for a rlwinm like
1458 // operation.  Masks can also extend from the msb and wrap to
1459 // the lsb too.  That is, the valid masks are 32 bit strings
1460 // of the form: 0..01..10..0 or 1..10..01..1 or 1...1
1461 func isPPC64WordRotateMask(v64 int64) bool {
1462         // Isolate rightmost 1 (if none 0) and add.
1463         v := uint32(v64)
1464         vp := (v & -v) + v
1465         // Likewise, for the wrapping case.
1466         vn := ^v
1467         vpn := (vn & -vn) + vn
1468         return (v&vp == 0 || vn&vpn == 0) && v != 0
1469 }
1470
1471 // Compress mask and shift into single value of the form
1472 // me | mb<<8 | rotate<<16 | nbits<<24 where me and mb can
1473 // be used to regenerate the input mask.
1474 func encodePPC64RotateMask(rotate, mask, nbits int64) int64 {
1475         var mb, me, mbn, men int
1476
1477         // Determine boundaries and then decode them
1478         if mask == 0 || ^mask == 0 || rotate >= nbits {
1479                 panic("Invalid PPC64 rotate mask")
1480         } else if nbits == 32 {
1481                 mb = bits.LeadingZeros32(uint32(mask))
1482                 me = 32 - bits.TrailingZeros32(uint32(mask))
1483                 mbn = bits.LeadingZeros32(^uint32(mask))
1484                 men = 32 - bits.TrailingZeros32(^uint32(mask))
1485         } else {
1486                 mb = bits.LeadingZeros64(uint64(mask))
1487                 me = 64 - bits.TrailingZeros64(uint64(mask))
1488                 mbn = bits.LeadingZeros64(^uint64(mask))
1489                 men = 64 - bits.TrailingZeros64(^uint64(mask))
1490         }
1491         // Check for a wrapping mask (e.g bits at 0 and 63)
1492         if mb == 0 && me == int(nbits) {
1493                 // swap the inverted values
1494                 mb, me = men, mbn
1495         }
1496
1497         return int64(me) | int64(mb<<8) | int64(rotate<<16) | int64(nbits<<24)
1498 }
1499
1500 // DecodePPC64RotateMask is the inverse operation of encodePPC64RotateMask.  The values returned as
1501 // mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask.
1502 func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) {
1503         auxint := uint64(sauxint)
1504         rotate = int64((auxint >> 16) & 0xFF)
1505         mb = int64((auxint >> 8) & 0xFF)
1506         me = int64((auxint >> 0) & 0xFF)
1507         nbits := int64((auxint >> 24) & 0xFF)
1508         mask = ((1 << uint(nbits-mb)) - 1) ^ ((1 << uint(nbits-me)) - 1)
1509         if mb > me {
1510                 mask = ^mask
1511         }
1512         if nbits == 32 {
1513                 mask = uint64(uint32(mask))
1514         }
1515
1516         // Fixup ME to match ISA definition.  The second argument to MASK(..,me)
1517         // is inclusive.
1518         me = (me - 1) & (nbits - 1)
1519         return
1520 }
1521
1522 // This verifies that the mask is a set of
1523 // consecutive bits including the least
1524 // significant bit.
1525 func isPPC64ValidShiftMask(v int64) bool {
1526         if (v != 0) && ((v+1)&v) == 0 {
1527                 return true
1528         }
1529         return false
1530 }
1531
1532 func getPPC64ShiftMaskLength(v int64) int64 {
1533         return int64(bits.Len64(uint64(v)))
1534 }
1535
1536 // Decompose a shift right into an equivalent rotate/mask,
1537 // and return mask & m.
1538 func mergePPC64RShiftMask(m, s, nbits int64) int64 {
1539         smask := uint64((1<<uint(nbits))-1) >> uint(s)
1540         return m & int64(smask)
1541 }
1542
1543 // Combine (ANDconst [m] (SRWconst [s])) into (RLWINM [y]) or return 0
1544 func mergePPC64AndSrwi(m, s int64) int64 {
1545         mask := mergePPC64RShiftMask(m, s, 32)
1546         if !isPPC64WordRotateMask(mask) {
1547                 return 0
1548         }
1549         return encodePPC64RotateMask((32-s)&31, mask, 32)
1550 }
1551
1552 // Test if a shift right feeding into a CLRLSLDI can be merged into RLWINM.
1553 // Return the encoded RLWINM constant, or 0 if they cannot be merged.
1554 func mergePPC64ClrlsldiSrw(sld, srw int64) int64 {
1555         mask_1 := uint64(0xFFFFFFFF >> uint(srw))
1556         // for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left.
1557         mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
1558
1559         // Rewrite mask to apply after the final left shift.
1560         mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
1561
1562         r_1 := 32 - srw
1563         r_2 := GetPPC64Shiftsh(sld)
1564         r_3 := (r_1 + r_2) & 31 // This can wrap.
1565
1566         if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
1567                 return 0
1568         }
1569         return encodePPC64RotateMask(int64(r_3), int64(mask_3), 32)
1570 }
1571
1572 // Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM.  Return
1573 // the encoded RLWINM constant, or 0 if they cannot be merged.
1574 func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 {
1575         r_1, _, _, mask_1 := DecodePPC64RotateMask(rlw)
1576         // for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left.
1577         mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
1578
1579         // combine the masks, and adjust for the final left shift.
1580         mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(int64(sld)))
1581         r_2 := GetPPC64Shiftsh(int64(sld))
1582         r_3 := (r_1 + r_2) & 31 // This can wrap.
1583
1584         // Verify the result is still a valid bitmask of <= 32 bits.
1585         if !isPPC64WordRotateMask(int64(mask_3)) || uint64(uint32(mask_3)) != mask_3 {
1586                 return 0
1587         }
1588         return encodePPC64RotateMask(r_3, int64(mask_3), 32)
1589 }
1590
1591 // Compute the encoded RLWINM constant from combining (SLDconst [sld] (SRWconst [srw] x)),
1592 // or return 0 if they cannot be combined.
1593 func mergePPC64SldiSrw(sld, srw int64) int64 {
1594         if sld > srw || srw >= 32 {
1595                 return 0
1596         }
1597         mask_r := uint32(0xFFFFFFFF) >> uint(srw)
1598         mask_l := uint32(0xFFFFFFFF) >> uint(sld)
1599         mask := (mask_r & mask_l) << uint(sld)
1600         return encodePPC64RotateMask((32-srw+sld)&31, int64(mask), 32)
1601 }
1602
1603 // Convenience function to rotate a 32 bit constant value by another constant.
1604 func rotateLeft32(v, rotate int64) int64 {
1605         return int64(bits.RotateLeft32(uint32(v), int(rotate)))
1606 }
1607
1608 func rotateRight64(v, rotate int64) int64 {
1609         return int64(bits.RotateLeft64(uint64(v), int(-rotate)))
1610 }
1611
1612 // encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format.
1613 func armBFAuxInt(lsb, width int64) arm64BitField {
1614         if lsb < 0 || lsb > 63 {
1615                 panic("ARM(64) bit field lsb constant out of range")
1616         }
1617         if width < 1 || lsb+width > 64 {
1618                 panic("ARM(64) bit field width constant out of range")
1619         }
1620         return arm64BitField(width | lsb<<8)
1621 }
1622
1623 // returns the lsb part of the auxInt field of arm64 bitfield ops.
1624 func (bfc arm64BitField) getARM64BFlsb() int64 {
1625         return int64(uint64(bfc) >> 8)
1626 }
1627
1628 // returns the width part of the auxInt field of arm64 bitfield ops.
1629 func (bfc arm64BitField) getARM64BFwidth() int64 {
1630         return int64(bfc) & 0xff
1631 }
1632
1633 // checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask.
1634 func isARM64BFMask(lsb, mask, rshift int64) bool {
1635         shiftedMask := int64(uint64(mask) >> uint64(rshift))
1636         return shiftedMask != 0 && isPowerOfTwo64(shiftedMask+1) && nto(shiftedMask)+lsb < 64
1637 }
1638
1639 // returns the bitfield width of mask >> rshift for arm64 bitfield ops.
1640 func arm64BFWidth(mask, rshift int64) int64 {
1641         shiftedMask := int64(uint64(mask) >> uint64(rshift))
1642         if shiftedMask == 0 {
1643                 panic("ARM64 BF mask is zero")
1644         }
1645         return nto(shiftedMask)
1646 }
1647
1648 // sizeof returns the size of t in bytes.
1649 // It will panic if t is not a *types.Type.
1650 func sizeof(t interface{}) int64 {
1651         return t.(*types.Type).Size()
1652 }
1653
1654 // registerizable reports whether t is a primitive type that fits in
1655 // a register. It assumes float64 values will always fit into registers
1656 // even if that isn't strictly true.
1657 func registerizable(b *Block, typ *types.Type) bool {
1658         if typ.IsPtrShaped() || typ.IsFloat() || typ.IsBoolean() {
1659                 return true
1660         }
1661         if typ.IsInteger() {
1662                 return typ.Size() <= b.Func.Config.RegSize
1663         }
1664         return false
1665 }
1666
1667 // needRaceCleanup reports whether this call to racefuncenter/exit isn't needed.
1668 func needRaceCleanup(sym *AuxCall, v *Value) bool {
1669         f := v.Block.Func
1670         if !f.Config.Race {
1671                 return false
1672         }
1673         if !isSameCall(sym, "runtime.racefuncenter") && !isSameCall(sym, "runtime.racefuncexit") {
1674                 return false
1675         }
1676         for _, b := range f.Blocks {
1677                 for _, v := range b.Values {
1678                         switch v.Op {
1679                         case OpStaticCall, OpStaticLECall:
1680                                 // Check for racefuncenter will encounter racefuncexit and vice versa.
1681                                 // Allow calls to panic*
1682                                 s := v.Aux.(*AuxCall).Fn.String()
1683                                 switch s {
1684                                 case "runtime.racefuncenter", "runtime.racefuncexit",
1685                                         "runtime.panicdivide", "runtime.panicwrap",
1686                                         "runtime.panicshift":
1687                                         continue
1688                                 }
1689                                 // If we encountered any call, we need to keep racefunc*,
1690                                 // for accurate stacktraces.
1691                                 return false
1692                         case OpPanicBounds, OpPanicExtend:
1693                                 // Note: these are panic generators that are ok (like the static calls above).
1694                         case OpClosureCall, OpInterCall, OpClosureLECall, OpInterLECall:
1695                                 // We must keep the race functions if there are any other call types.
1696                                 return false
1697                         }
1698                 }
1699         }
1700         if isSameCall(sym, "runtime.racefuncenter") {
1701                 // TODO REGISTER ABI this needs to be cleaned up.
1702                 // If we're removing racefuncenter, remove its argument as well.
1703                 if v.Args[0].Op != OpStore {
1704                         if v.Op == OpStaticLECall {
1705                                 // there is no store, yet.
1706                                 return true
1707                         }
1708                         return false
1709                 }
1710                 mem := v.Args[0].Args[2]
1711                 v.Args[0].reset(OpCopy)
1712                 v.Args[0].AddArg(mem)
1713         }
1714         return true
1715 }
1716
1717 // symIsRO reports whether sym is a read-only global.
1718 func symIsRO(sym interface{}) bool {
1719         lsym := sym.(*obj.LSym)
1720         return lsym.Type == objabi.SRODATA && len(lsym.R) == 0
1721 }
1722
1723 // symIsROZero reports whether sym is a read-only global whose data contains all zeros.
1724 func symIsROZero(sym Sym) bool {
1725         lsym := sym.(*obj.LSym)
1726         if lsym.Type != objabi.SRODATA || len(lsym.R) != 0 {
1727                 return false
1728         }
1729         for _, b := range lsym.P {
1730                 if b != 0 {
1731                         return false
1732                 }
1733         }
1734         return true
1735 }
1736
1737 // read8 reads one byte from the read-only global sym at offset off.
1738 func read8(sym interface{}, off int64) uint8 {
1739         lsym := sym.(*obj.LSym)
1740         if off >= int64(len(lsym.P)) || off < 0 {
1741                 // Invalid index into the global sym.
1742                 // This can happen in dead code, so we don't want to panic.
1743                 // Just return any value, it will eventually get ignored.
1744                 // See issue 29215.
1745                 return 0
1746         }
1747         return lsym.P[off]
1748 }
1749
1750 // read16 reads two bytes from the read-only global sym at offset off.
1751 func read16(sym interface{}, off int64, byteorder binary.ByteOrder) uint16 {
1752         lsym := sym.(*obj.LSym)
1753         // lsym.P is written lazily.
1754         // Bytes requested after the end of lsym.P are 0.
1755         var src []byte
1756         if 0 <= off && off < int64(len(lsym.P)) {
1757                 src = lsym.P[off:]
1758         }
1759         buf := make([]byte, 2)
1760         copy(buf, src)
1761         return byteorder.Uint16(buf)
1762 }
1763
1764 // read32 reads four bytes from the read-only global sym at offset off.
1765 func read32(sym interface{}, off int64, byteorder binary.ByteOrder) uint32 {
1766         lsym := sym.(*obj.LSym)
1767         var src []byte
1768         if 0 <= off && off < int64(len(lsym.P)) {
1769                 src = lsym.P[off:]
1770         }
1771         buf := make([]byte, 4)
1772         copy(buf, src)
1773         return byteorder.Uint32(buf)
1774 }
1775
1776 // read64 reads eight bytes from the read-only global sym at offset off.
1777 func read64(sym interface{}, off int64, byteorder binary.ByteOrder) uint64 {
1778         lsym := sym.(*obj.LSym)
1779         var src []byte
1780         if 0 <= off && off < int64(len(lsym.P)) {
1781                 src = lsym.P[off:]
1782         }
1783         buf := make([]byte, 8)
1784         copy(buf, src)
1785         return byteorder.Uint64(buf)
1786 }
1787
1788 // sequentialAddresses reports true if it can prove that x + n == y
1789 func sequentialAddresses(x, y *Value, n int64) bool {
1790         if x == y && n == 0 {
1791                 return true
1792         }
1793         if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil &&
1794                 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1795                         x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1796                 return true
1797         }
1798         if x.Op == Op386LEAL1 && y.Op == Op386LEAL1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
1799                 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1800                         x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1801                 return true
1802         }
1803         if x.Op == OpAMD64ADDQ && y.Op == OpAMD64LEAQ1 && y.AuxInt == n && y.Aux == nil &&
1804                 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1805                         x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1806                 return true
1807         }
1808         if x.Op == OpAMD64LEAQ1 && y.Op == OpAMD64LEAQ1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
1809                 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1810                         x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1811                 return true
1812         }
1813         return false
1814 }
1815
1816 // flagConstant represents the result of a compile-time comparison.
1817 // The sense of these flags does not necessarily represent the hardware's notion
1818 // of a flags register - these are just a compile-time construct.
1819 // We happen to match the semantics to those of arm/arm64.
1820 // Note that these semantics differ from x86: the carry flag has the opposite
1821 // sense on a subtraction!
1822 //
1823 //      On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C.
1824 //      On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C.
1825 //       (because it does x + ^y + C).
1826 //
1827 // See https://en.wikipedia.org/wiki/Carry_flag#Vs._borrow_flag
1828 type flagConstant uint8
1829
1830 // N reports whether the result of an operation is negative (high bit set).
1831 func (fc flagConstant) N() bool {
1832         return fc&1 != 0
1833 }
1834
1835 // Z reports whether the result of an operation is 0.
1836 func (fc flagConstant) Z() bool {
1837         return fc&2 != 0
1838 }
1839
1840 // C reports whether an unsigned add overflowed (carry), or an
1841 // unsigned subtract did not underflow (borrow).
1842 func (fc flagConstant) C() bool {
1843         return fc&4 != 0
1844 }
1845
1846 // V reports whether a signed operation overflowed or underflowed.
1847 func (fc flagConstant) V() bool {
1848         return fc&8 != 0
1849 }
1850
1851 func (fc flagConstant) eq() bool {
1852         return fc.Z()
1853 }
1854 func (fc flagConstant) ne() bool {
1855         return !fc.Z()
1856 }
1857 func (fc flagConstant) lt() bool {
1858         return fc.N() != fc.V()
1859 }
1860 func (fc flagConstant) le() bool {
1861         return fc.Z() || fc.lt()
1862 }
1863 func (fc flagConstant) gt() bool {
1864         return !fc.Z() && fc.ge()
1865 }
1866 func (fc flagConstant) ge() bool {
1867         return fc.N() == fc.V()
1868 }
1869 func (fc flagConstant) ult() bool {
1870         return !fc.C()
1871 }
1872 func (fc flagConstant) ule() bool {
1873         return fc.Z() || fc.ult()
1874 }
1875 func (fc flagConstant) ugt() bool {
1876         return !fc.Z() && fc.uge()
1877 }
1878 func (fc flagConstant) uge() bool {
1879         return fc.C()
1880 }
1881
1882 func (fc flagConstant) ltNoov() bool {
1883         return fc.lt() && !fc.V()
1884 }
1885 func (fc flagConstant) leNoov() bool {
1886         return fc.le() && !fc.V()
1887 }
1888 func (fc flagConstant) gtNoov() bool {
1889         return fc.gt() && !fc.V()
1890 }
1891 func (fc flagConstant) geNoov() bool {
1892         return fc.ge() && !fc.V()
1893 }
1894
1895 func (fc flagConstant) String() string {
1896         return fmt.Sprintf("N=%v,Z=%v,C=%v,V=%v", fc.N(), fc.Z(), fc.C(), fc.V())
1897 }
1898
1899 type flagConstantBuilder struct {
1900         N bool
1901         Z bool
1902         C bool
1903         V bool
1904 }
1905
1906 func (fcs flagConstantBuilder) encode() flagConstant {
1907         var fc flagConstant
1908         if fcs.N {
1909                 fc |= 1
1910         }
1911         if fcs.Z {
1912                 fc |= 2
1913         }
1914         if fcs.C {
1915                 fc |= 4
1916         }
1917         if fcs.V {
1918                 fc |= 8
1919         }
1920         return fc
1921 }
1922
1923 // Note: addFlags(x,y) != subFlags(x,-y) in some situations:
1924 //  - the results of the C flag are different
1925 //  - the results of the V flag when y==minint are different
1926
1927 // addFlags64 returns the flags that would be set from computing x+y.
1928 func addFlags64(x, y int64) flagConstant {
1929         var fcb flagConstantBuilder
1930         fcb.Z = x+y == 0
1931         fcb.N = x+y < 0
1932         fcb.C = uint64(x+y) < uint64(x)
1933         fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
1934         return fcb.encode()
1935 }
1936
1937 // subFlags64 returns the flags that would be set from computing x-y.
1938 func subFlags64(x, y int64) flagConstant {
1939         var fcb flagConstantBuilder
1940         fcb.Z = x-y == 0
1941         fcb.N = x-y < 0
1942         fcb.C = uint64(y) <= uint64(x) // This code follows the arm carry flag model.
1943         fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
1944         return fcb.encode()
1945 }
1946
1947 // addFlags32 returns the flags that would be set from computing x+y.
1948 func addFlags32(x, y int32) flagConstant {
1949         var fcb flagConstantBuilder
1950         fcb.Z = x+y == 0
1951         fcb.N = x+y < 0
1952         fcb.C = uint32(x+y) < uint32(x)
1953         fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
1954         return fcb.encode()
1955 }
1956
1957 // subFlags32 returns the flags that would be set from computing x-y.
1958 func subFlags32(x, y int32) flagConstant {
1959         var fcb flagConstantBuilder
1960         fcb.Z = x-y == 0
1961         fcb.N = x-y < 0
1962         fcb.C = uint32(y) <= uint32(x) // This code follows the arm carry flag model.
1963         fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
1964         return fcb.encode()
1965 }
1966
1967 // logicFlags64 returns flags set to the sign/zeroness of x.
1968 // C and V are set to false.
1969 func logicFlags64(x int64) flagConstant {
1970         var fcb flagConstantBuilder
1971         fcb.Z = x == 0
1972         fcb.N = x < 0
1973         return fcb.encode()
1974 }
1975
1976 // logicFlags32 returns flags set to the sign/zeroness of x.
1977 // C and V are set to false.
1978 func logicFlags32(x int32) flagConstant {
1979         var fcb flagConstantBuilder
1980         fcb.Z = x == 0
1981         fcb.N = x < 0
1982         return fcb.encode()
1983 }
1984
1985 func makeJumpTableSym(b *Block) *obj.LSym {
1986         s := base.Ctxt.Lookup(fmt.Sprintf("%s.jump%d", b.Func.fe.LSym(), b.ID))
1987         s.Set(obj.AttrDuplicateOK, true)
1988         s.Set(obj.AttrLocal, true)
1989         return s
1990 }
1991
1992 // canRotate reports whether the architecture supports
1993 // rotates of integer registers with the given number of bits.
1994 func canRotate(c *Config, bits int64) bool {
1995         if bits > c.PtrSize*8 {
1996                 // Don't rewrite to rotates bigger than the machine word.
1997                 return false
1998         }
1999         switch c.arch {
2000         case "386", "amd64", "arm64":
2001                 return true
2002         case "arm", "s390x", "ppc64", "ppc64le", "wasm", "loong64":
2003                 return bits >= 32
2004         default:
2005                 return false
2006         }
2007 }
2008
2009 // isARM64bitcon reports whether a constant can be encoded into a logical instruction.
2010 func isARM64bitcon(x uint64) bool {
2011         if x == 1<<64-1 || x == 0 {
2012                 return false
2013         }
2014         // determine the period and sign-extend a unit to 64 bits
2015         switch {
2016         case x != x>>32|x<<32:
2017                 // period is 64
2018                 // nothing to do
2019         case x != x>>16|x<<48:
2020                 // period is 32
2021                 x = uint64(int64(int32(x)))
2022         case x != x>>8|x<<56:
2023                 // period is 16
2024                 x = uint64(int64(int16(x)))
2025         case x != x>>4|x<<60:
2026                 // period is 8
2027                 x = uint64(int64(int8(x)))
2028         default:
2029                 // period is 4 or 2, always true
2030                 // 0001, 0010, 0100, 1000 -- 0001 rotate
2031                 // 0011, 0110, 1100, 1001 -- 0011 rotate
2032                 // 0111, 1011, 1101, 1110 -- 0111 rotate
2033                 // 0101, 1010             -- 01   rotate, repeat
2034                 return true
2035         }
2036         return sequenceOfOnes(x) || sequenceOfOnes(^x)
2037 }
2038
2039 // sequenceOfOnes tests whether a constant is a sequence of ones in binary, with leading and trailing zeros.
2040 func sequenceOfOnes(x uint64) bool {
2041         y := x & -x // lowest set bit of x. x is good iff x+y is a power of 2
2042         y += x
2043         return (y-1)&y == 0
2044 }
2045
2046 // isARM64addcon reports whether x can be encoded as the immediate value in an ADD or SUB instruction.
2047 func isARM64addcon(v int64) bool {
2048         /* uimm12 or uimm24? */
2049         if v < 0 {
2050                 return false
2051         }
2052         if (v & 0xFFF) == 0 {
2053                 v >>= 12
2054         }
2055         return v <= 0xFFF
2056 }