src/cmd/compile/internal/ssa/rewrite.go

   1 // Copyright 2015 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package ssa
   6
   7 import (
   8         "cmd/compile/internal/base"
   9         "cmd/compile/internal/logopt"
  10         "cmd/compile/internal/types"
  11         "cmd/internal/obj"
  12         "cmd/internal/obj/s390x"
  13         "cmd/internal/objabi"
  14         "cmd/internal/src"
  15         "encoding/binary"
  16         "fmt"
  17         "io"
  18         "math"
  19         "math/bits"
  20         "os"
  21         "path/filepath"
  22 )
  23
  24 type deadValueChoice bool
  25
  26 const (
  27         leaveDeadValues  deadValueChoice = false
  28         removeDeadValues                 = true
  29 )
  30
  31 // deadcode indicates whether rewrite should try to remove any values that become dead.
  32 func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValueChoice) {
  33         // repeat rewrites until we find no more rewrites
  34         pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block
  35         pendingLines.clear()
  36         debug := f.pass.debug
  37         if debug > 1 {
  38                 fmt.Printf("%s: rewriting for %s\n", f.pass.name, f.Name)
  39         }
  40         var iters int
  41         var states map[string]bool
  42         for {
  43                 change := false
  44                 deadChange := false
  45                 for _, b := range f.Blocks {
  46                         var b0 *Block
  47                         if debug > 1 {
  48                                 b0 = new(Block)
  49                                 *b0 = *b
  50                                 b0.Succs = append([]Edge{}, b.Succs...) // make a new copy, not aliasing
  51                         }
  52                         for i, c := range b.ControlValues() {
  53                                 for c.Op == OpCopy {
  54                                         c = c.Args[0]
  55                                         b.ReplaceControl(i, c)
  56                                 }
  57                         }
  58                         if rb(b) {
  59                                 change = true
  60                                 if debug > 1 {
  61                                         fmt.Printf("rewriting %s  ->  %s\n", b0.LongString(), b.LongString())
  62                                 }
  63                         }
  64                         for j, v := range b.Values {
  65                                 var v0 *Value
  66                                 if debug > 1 {
  67                                         v0 = new(Value)
  68                                         *v0 = *v
  69                                         v0.Args = append([]*Value{}, v.Args...) // make a new copy, not aliasing
  70                                 }
  71                                 if v.Uses == 0 && v.removeable() {
  72                                         if v.Op != OpInvalid && deadcode == removeDeadValues {
  73                                                 // Reset any values that are now unused, so that we decrement
  74                                                 // the use count of all of its arguments.
  75                                                 // Not quite a deadcode pass, because it does not handle cycles.
  76                                                 // But it should help Uses==1 rules to fire.
  77                                                 v.reset(OpInvalid)
  78                                                 deadChange = true
  79                                         }
  80                                         // No point rewriting values which aren't used.
  81                                         continue
  82                                 }
  83
  84                                 vchange := phielimValue(v)
  85                                 if vchange && debug > 1 {
  86                                         fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
  87                                 }
  88
  89                                 // Eliminate copy inputs.
  90                                 // If any copy input becomes unused, mark it
  91                                 // as invalid and discard its argument. Repeat
  92                                 // recursively on the discarded argument.
  93                                 // This phase helps remove phantom "dead copy" uses
  94                                 // of a value so that a x.Uses==1 rule condition
  95                                 // fires reliably.
  96                                 for i, a := range v.Args {
  97                                         if a.Op != OpCopy {
  98                                                 continue
  99                                         }
 100                                         aa := copySource(a)
 101                                         v.SetArg(i, aa)
 102                                         // If a, a copy, has a line boundary indicator, attempt to find a new value
 103                                         // to hold it.  The first candidate is the value that will replace a (aa),
 104                                         // if it shares the same block and line and is eligible.
 105                                         // The second option is v, which has a as an input.  Because aa is earlier in
 106                                         // the data flow, it is the better choice.
 107                                         if a.Pos.IsStmt() == src.PosIsStmt {
 108                                                 if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt {
 109                                                         aa.Pos = aa.Pos.WithIsStmt()
 110                                                 } else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt {
 111                                                         v.Pos = v.Pos.WithIsStmt()
 112                                                 } else {
 113                                                         // Record the lost line and look for a new home after all rewrites are complete.
 114                                                         // TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same
 115                                                         // line to appear in more than one block, but only one block is stored, so if both end
 116                                                         // up here, then one will be lost.
 117                                                         pendingLines.set(a.Pos, int32(a.Block.ID))
 118                                                 }
 119                                                 a.Pos = a.Pos.WithNotStmt()
 120                                         }
 121                                         vchange = true
 122                                         for a.Uses == 0 {
 123                                                 b := a.Args[0]
 124                                                 a.reset(OpInvalid)
 125                                                 a = b
 126                                         }
 127                                 }
 128                                 if vchange && debug > 1 {
 129                                         fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
 130                                 }
 131
 132                                 // apply rewrite function
 133                                 if rv(v) {
 134                                         vchange = true
 135                                         // If value changed to a poor choice for a statement boundary, move the boundary
 136                                         if v.Pos.IsStmt() == src.PosIsStmt {
 137                                                 if k := nextGoodStatementIndex(v, j, b); k != j {
 138                                                         v.Pos = v.Pos.WithNotStmt()
 139                                                         b.Values[k].Pos = b.Values[k].Pos.WithIsStmt()
 140                                                 }
 141                                         }
 142                                 }
 143
 144                                 change = change || vchange
 145                                 if vchange && debug > 1 {
 146                                         fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
 147                                 }
 148                         }
 149                 }
 150                 if !change && !deadChange {
 151                         break
 152                 }
 153                 iters++
 154                 if (iters > 1000 || debug >= 2) && change {
 155                         // We've done a suspiciously large number of rewrites (or we're in debug mode).
 156                         // As of Sep 2021, 90% of rewrites complete in 4 iterations or fewer
 157                         // and the maximum value encountered during make.bash is 12.
 158                         // Start checking for cycles. (This is too expensive to do routinely.)
 159                         // Note: we avoid this path for deadChange-only iterations, to fix #51639.
 160                         if states == nil {
 161                                 states = make(map[string]bool)
 162                         }
 163                         h := f.rewriteHash()
 164                         if _, ok := states[h]; ok {
 165                                 // We've found a cycle.
 166                                 // To diagnose it, set debug to 2 and start again,
 167                                 // so that we'll print all rules applied until we complete another cycle.
 168                                 // If debug is already >= 2, we've already done that, so it's time to crash.
 169                                 if debug < 2 {
 170                                         debug = 2
 171                                         states = make(map[string]bool)
 172                                 } else {
 173                                         f.Fatalf("rewrite cycle detected")
 174                                 }
 175                         }
 176                         states[h] = true
 177                 }
 178         }
 179         // remove clobbered values
 180         for _, b := range f.Blocks {
 181                 j := 0
 182                 for i, v := range b.Values {
 183                         vl := v.Pos
 184                         if v.Op == OpInvalid {
 185                                 if v.Pos.IsStmt() == src.PosIsStmt {
 186                                         pendingLines.set(vl, int32(b.ID))
 187                                 }
 188                                 f.freeValue(v)
 189                                 continue
 190                         }
 191                         if v.Pos.IsStmt() != src.PosNotStmt && !notStmtBoundary(v.Op) && pendingLines.get(vl) == int32(b.ID) {
 192                                 pendingLines.remove(vl)
 193                                 v.Pos = v.Pos.WithIsStmt()
 194                         }
 195                         if i != j {
 196                                 b.Values[j] = v
 197                         }
 198                         j++
 199                 }
 200                 if pendingLines.get(b.Pos) == int32(b.ID) {
 201                         b.Pos = b.Pos.WithIsStmt()
 202                         pendingLines.remove(b.Pos)
 203                 }
 204                 b.truncateValues(j)
 205         }
 206 }
 207
 208 // Common functions called from rewriting rules
 209
 210 func is64BitFloat(t *types.Type) bool {
 211         return t.Size() == 8 && t.IsFloat()
 212 }
 213
 214 func is32BitFloat(t *types.Type) bool {
 215         return t.Size() == 4 && t.IsFloat()
 216 }
 217
 218 func is64BitInt(t *types.Type) bool {
 219         return t.Size() == 8 && t.IsInteger()
 220 }
 221
 222 func is32BitInt(t *types.Type) bool {
 223         return t.Size() == 4 && t.IsInteger()
 224 }
 225
 226 func is16BitInt(t *types.Type) bool {
 227         return t.Size() == 2 && t.IsInteger()
 228 }
 229
 230 func is8BitInt(t *types.Type) bool {
 231         return t.Size() == 1 && t.IsInteger()
 232 }
 233
 234 func isPtr(t *types.Type) bool {
 235         return t.IsPtrShaped()
 236 }
 237
 238 // mergeSym merges two symbolic offsets. There is no real merging of
 239 // offsets, we just pick the non-nil one.
 240 func mergeSym(x, y Sym) Sym {
 241         if x == nil {
 242                 return y
 243         }
 244         if y == nil {
 245                 return x
 246         }
 247         panic(fmt.Sprintf("mergeSym with two non-nil syms %v %v", x, y))
 248 }
 249
 250 func canMergeSym(x, y Sym) bool {
 251         return x == nil || y == nil
 252 }
 253
 254 // canMergeLoadClobber reports whether the load can be merged into target without
 255 // invalidating the schedule.
 256 // It also checks that the other non-load argument x is something we
 257 // are ok with clobbering.
 258 func canMergeLoadClobber(target, load, x *Value) bool {
 259         // The register containing x is going to get clobbered.
 260         // Don't merge if we still need the value of x.
 261         // We don't have liveness information here, but we can
 262         // approximate x dying with:
 263         //  1) target is x's only use.
 264         //  2) target is not in a deeper loop than x.
 265         if x.Uses != 1 {
 266                 return false
 267         }
 268         loopnest := x.Block.Func.loopnest()
 269         loopnest.calculateDepths()
 270         if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
 271                 return false
 272         }
 273         return canMergeLoad(target, load)
 274 }
 275
 276 // canMergeLoad reports whether the load can be merged into target without
 277 // invalidating the schedule.
 278 func canMergeLoad(target, load *Value) bool {
 279         if target.Block.ID != load.Block.ID {
 280                 // If the load is in a different block do not merge it.
 281                 return false
 282         }
 283
 284         // We can't merge the load into the target if the load
 285         // has more than one use.
 286         if load.Uses != 1 {
 287                 return false
 288         }
 289
 290         mem := load.MemoryArg()
 291
 292         // We need the load's memory arg to still be alive at target. That
 293         // can't be the case if one of target's args depends on a memory
 294         // state that is a successor of load's memory arg.
 295         //
 296         // For example, it would be invalid to merge load into target in
 297         // the following situation because newmem has killed oldmem
 298         // before target is reached:
 299         //     load = read ... oldmem
 300         //   newmem = write ... oldmem
 301         //     arg0 = read ... newmem
 302         //   target = add arg0 load
 303         //
 304         // If the argument comes from a different block then we can exclude
 305         // it immediately because it must dominate load (which is in the
 306         // same block as target).
 307         var args []*Value
 308         for _, a := range target.Args {
 309                 if a != load && a.Block.ID == target.Block.ID {
 310                         args = append(args, a)
 311                 }
 312         }
 313
 314         // memPreds contains memory states known to be predecessors of load's
 315         // memory state. It is lazily initialized.
 316         var memPreds map[*Value]bool
 317         for i := 0; len(args) > 0; i++ {
 318                 const limit = 100
 319                 if i >= limit {
 320                         // Give up if we have done a lot of iterations.
 321                         return false
 322                 }
 323                 v := args[len(args)-1]
 324                 args = args[:len(args)-1]
 325                 if target.Block.ID != v.Block.ID {
 326                         // Since target and load are in the same block
 327                         // we can stop searching when we leave the block.
 328                         continue
 329                 }
 330                 if v.Op == OpPhi {
 331                         // A Phi implies we have reached the top of the block.
 332                         // The memory phi, if it exists, is always
 333                         // the first logical store in the block.
 334                         continue
 335                 }
 336                 if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
 337                         // We could handle this situation however it is likely
 338                         // to be very rare.
 339                         return false
 340                 }
 341                 if v.Op.SymEffect()&SymAddr != 0 {
 342                         // This case prevents an operation that calculates the
 343                         // address of a local variable from being forced to schedule
 344                         // before its corresponding VarDef.
 345                         // See issue 28445.
 346                         //   v1 = LOAD ...
 347                         //   v2 = VARDEF
 348                         //   v3 = LEAQ
 349                         //   v4 = CMPQ v1 v3
 350                         // We don't want to combine the CMPQ with the load, because
 351                         // that would force the CMPQ to schedule before the VARDEF, which
 352                         // in turn requires the LEAQ to schedule before the VARDEF.
 353                         return false
 354                 }
 355                 if v.Type.IsMemory() {
 356                         if memPreds == nil {
 357                                 // Initialise a map containing memory states
 358                                 // known to be predecessors of load's memory
 359                                 // state.
 360                                 memPreds = make(map[*Value]bool)
 361                                 m := mem
 362                                 const limit = 50
 363                                 for i := 0; i < limit; i++ {
 364                                         if m.Op == OpPhi {
 365                                                 // The memory phi, if it exists, is always
 366                                                 // the first logical store in the block.
 367                                                 break
 368                                         }
 369                                         if m.Block.ID != target.Block.ID {
 370                                                 break
 371                                         }
 372                                         if !m.Type.IsMemory() {
 373                                                 break
 374                                         }
 375                                         memPreds[m] = true
 376                                         if len(m.Args) == 0 {
 377                                                 break
 378                                         }
 379                                         m = m.MemoryArg()
 380                                 }
 381                         }
 382
 383                         // We can merge if v is a predecessor of mem.
 384                         //
 385                         // For example, we can merge load into target in the
 386                         // following scenario:
 387                         //      x = read ... v
 388                         //    mem = write ... v
 389                         //   load = read ... mem
 390                         // target = add x load
 391                         if memPreds[v] {
 392                                 continue
 393                         }
 394                         return false
 395                 }
 396                 if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
 397                         // If v takes mem as an input then we know mem
 398                         // is valid at this point.
 399                         continue
 400                 }
 401                 for _, a := range v.Args {
 402                         if target.Block.ID == a.Block.ID {
 403                                 args = append(args, a)
 404                         }
 405                 }
 406         }
 407
 408         return true
 409 }
 410
 411 // isSameCall reports whether sym is the same as the given named symbol.
 412 func isSameCall(sym interface{}, name string) bool {
 413         fn := sym.(*AuxCall).Fn
 414         return fn != nil && fn.String() == name
 415 }
 416
 417 // canLoadUnaligned reports if the architecture supports unaligned load operations.
 418 func canLoadUnaligned(c *Config) bool {
 419         return c.ctxt.Arch.Alignment == 1
 420 }
 421
 422 // nlzX returns the number of leading zeros.
 423 func nlz64(x int64) int { return bits.LeadingZeros64(uint64(x)) }
 424 func nlz32(x int32) int { return bits.LeadingZeros32(uint32(x)) }
 425 func nlz16(x int16) int { return bits.LeadingZeros16(uint16(x)) }
 426 func nlz8(x int8) int   { return bits.LeadingZeros8(uint8(x)) }
 427
 428 // ntzX returns the number of trailing zeros.
 429 func ntz64(x int64) int { return bits.TrailingZeros64(uint64(x)) }
 430 func ntz32(x int32) int { return bits.TrailingZeros32(uint32(x)) }
 431 func ntz16(x int16) int { return bits.TrailingZeros16(uint16(x)) }
 432 func ntz8(x int8) int   { return bits.TrailingZeros8(uint8(x)) }
 433
 434 func oneBit(x int64) bool   { return x&(x-1) == 0 && x != 0 }
 435 func oneBit8(x int8) bool   { return x&(x-1) == 0 && x != 0 }
 436 func oneBit16(x int16) bool { return x&(x-1) == 0 && x != 0 }
 437 func oneBit32(x int32) bool { return x&(x-1) == 0 && x != 0 }
 438 func oneBit64(x int64) bool { return x&(x-1) == 0 && x != 0 }
 439
 440 // nto returns the number of trailing ones.
 441 func nto(x int64) int64 {
 442         return int64(ntz64(^x))
 443 }
 444
 445 // logX returns logarithm of n base 2.
 446 // n must be a positive power of 2 (isPowerOfTwoX returns true).
 447 func log8(n int8) int64 {
 448         return int64(bits.Len8(uint8(n))) - 1
 449 }
 450 func log16(n int16) int64 {
 451         return int64(bits.Len16(uint16(n))) - 1
 452 }
 453 func log32(n int32) int64 {
 454         return int64(bits.Len32(uint32(n))) - 1
 455 }
 456 func log64(n int64) int64 {
 457         return int64(bits.Len64(uint64(n))) - 1
 458 }
 459
 460 // log2uint32 returns logarithm in base 2 of uint32(n), with log2(0) = -1.
 461 // Rounds down.
 462 func log2uint32(n int64) int64 {
 463         return int64(bits.Len32(uint32(n))) - 1
 464 }
 465
 466 // isPowerOfTwoX functions report whether n is a power of 2.
 467 func isPowerOfTwo8(n int8) bool {
 468         return n > 0 && n&(n-1) == 0
 469 }
 470 func isPowerOfTwo16(n int16) bool {
 471         return n > 0 && n&(n-1) == 0
 472 }
 473 func isPowerOfTwo32(n int32) bool {
 474         return n > 0 && n&(n-1) == 0
 475 }
 476 func isPowerOfTwo64(n int64) bool {
 477         return n > 0 && n&(n-1) == 0
 478 }
 479
 480 // isUint64PowerOfTwo reports whether uint64(n) is a power of 2.
 481 func isUint64PowerOfTwo(in int64) bool {
 482         n := uint64(in)
 483         return n > 0 && n&(n-1) == 0
 484 }
 485
 486 // isUint32PowerOfTwo reports whether uint32(n) is a power of 2.
 487 func isUint32PowerOfTwo(in int64) bool {
 488         n := uint64(uint32(in))
 489         return n > 0 && n&(n-1) == 0
 490 }
 491
 492 // is32Bit reports whether n can be represented as a signed 32 bit integer.
 493 func is32Bit(n int64) bool {
 494         return n == int64(int32(n))
 495 }
 496
 497 // is16Bit reports whether n can be represented as a signed 16 bit integer.
 498 func is16Bit(n int64) bool {
 499         return n == int64(int16(n))
 500 }
 501
 502 // is8Bit reports whether n can be represented as a signed 8 bit integer.
 503 func is8Bit(n int64) bool {
 504         return n == int64(int8(n))
 505 }
 506
 507 // isU8Bit reports whether n can be represented as an unsigned 8 bit integer.
 508 func isU8Bit(n int64) bool {
 509         return n == int64(uint8(n))
 510 }
 511
 512 // isU12Bit reports whether n can be represented as an unsigned 12 bit integer.
 513 func isU12Bit(n int64) bool {
 514         return 0 <= n && n < (1<<12)
 515 }
 516
 517 // isU16Bit reports whether n can be represented as an unsigned 16 bit integer.
 518 func isU16Bit(n int64) bool {
 519         return n == int64(uint16(n))
 520 }
 521
 522 // isU32Bit reports whether n can be represented as an unsigned 32 bit integer.
 523 func isU32Bit(n int64) bool {
 524         return n == int64(uint32(n))
 525 }
 526
 527 // is20Bit reports whether n can be represented as a signed 20 bit integer.
 528 func is20Bit(n int64) bool {
 529         return -(1<<19) <= n && n < (1<<19)
 530 }
 531
 532 // b2i translates a boolean value to 0 or 1 for assigning to auxInt.
 533 func b2i(b bool) int64 {
 534         if b {
 535                 return 1
 536         }
 537         return 0
 538 }
 539
 540 // b2i32 translates a boolean value to 0 or 1.
 541 func b2i32(b bool) int32 {
 542         if b {
 543                 return 1
 544         }
 545         return 0
 546 }
 547
 548 // shiftIsBounded reports whether (left/right) shift Value v is known to be bounded.
 549 // A shift is bounded if it is shifting by less than the width of the shifted value.
 550 func shiftIsBounded(v *Value) bool {
 551         return v.AuxInt != 0
 552 }
 553
 554 // canonLessThan returns whether x is "ordered" less than y, for purposes of normalizing
 555 // generated code as much as possible.
 556 func canonLessThan(x, y *Value) bool {
 557         if x.Op != y.Op {
 558                 return x.Op < y.Op
 559         }
 560         if !x.Pos.SameFileAndLine(y.Pos) {
 561                 return x.Pos.Before(y.Pos)
 562         }
 563         return x.ID < y.ID
 564 }
 565
 566 // truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern
 567 // of the mantissa. It will panic if the truncation results in lost information.
 568 func truncate64Fto32F(f float64) float32 {
 569         if !isExactFloat32(f) {
 570                 panic("truncate64Fto32F: truncation is not exact")
 571         }
 572         if !math.IsNaN(f) {
 573                 return float32(f)
 574         }
 575         // NaN bit patterns aren't necessarily preserved across conversion
 576         // instructions so we need to do the conversion manually.
 577         b := math.Float64bits(f)
 578         m := b & ((1 << 52) - 1) // mantissa (a.k.a. significand)
 579         //          | sign                  | exponent   | mantissa       |
 580         r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23)))
 581         return math.Float32frombits(r)
 582 }
 583
 584 // extend32Fto64F converts a float32 value to a float64 value preserving the bit
 585 // pattern of the mantissa.
 586 func extend32Fto64F(f float32) float64 {
 587         if !math.IsNaN(float64(f)) {
 588                 return float64(f)
 589         }
 590         // NaN bit patterns aren't necessarily preserved across conversion
 591         // instructions so we need to do the conversion manually.
 592         b := uint64(math.Float32bits(f))
 593         //   | sign                  | exponent      | mantissa                    |
 594         r := ((b << 32) & (1 << 63)) | (0x7ff << 52) | ((b & 0x7fffff) << (52 - 23))
 595         return math.Float64frombits(r)
 596 }
 597
 598 // DivisionNeedsFixUp reports whether the division needs fix-up code.
 599 func DivisionNeedsFixUp(v *Value) bool {
 600         return v.AuxInt == 0
 601 }
 602
 603 // auxFrom64F encodes a float64 value so it can be stored in an AuxInt.
 604 func auxFrom64F(f float64) int64 {
 605         if f != f {
 606                 panic("can't encode a NaN in AuxInt field")
 607         }
 608         return int64(math.Float64bits(f))
 609 }
 610
 611 // auxFrom32F encodes a float32 value so it can be stored in an AuxInt.
 612 func auxFrom32F(f float32) int64 {
 613         if f != f {
 614                 panic("can't encode a NaN in AuxInt field")
 615         }
 616         return int64(math.Float64bits(extend32Fto64F(f)))
 617 }
 618
 619 // auxTo32F decodes a float32 from the AuxInt value provided.
 620 func auxTo32F(i int64) float32 {
 621         return truncate64Fto32F(math.Float64frombits(uint64(i)))
 622 }
 623
 624 // auxTo64F decodes a float64 from the AuxInt value provided.
 625 func auxTo64F(i int64) float64 {
 626         return math.Float64frombits(uint64(i))
 627 }
 628
 629 func auxIntToBool(i int64) bool {
 630         if i == 0 {
 631                 return false
 632         }
 633         return true
 634 }
 635 func auxIntToInt8(i int64) int8 {
 636         return int8(i)
 637 }
 638 func auxIntToInt16(i int64) int16 {
 639         return int16(i)
 640 }
 641 func auxIntToInt32(i int64) int32 {
 642         return int32(i)
 643 }
 644 func auxIntToInt64(i int64) int64 {
 645         return i
 646 }
 647 func auxIntToUint8(i int64) uint8 {
 648         return uint8(i)
 649 }
 650 func auxIntToFloat32(i int64) float32 {
 651         return float32(math.Float64frombits(uint64(i)))
 652 }
 653 func auxIntToFloat64(i int64) float64 {
 654         return math.Float64frombits(uint64(i))
 655 }
 656 func auxIntToValAndOff(i int64) ValAndOff {
 657         return ValAndOff(i)
 658 }
 659 func auxIntToArm64BitField(i int64) arm64BitField {
 660         return arm64BitField(i)
 661 }
 662 func auxIntToInt128(x int64) int128 {
 663         if x != 0 {
 664                 panic("nonzero int128 not allowed")
 665         }
 666         return 0
 667 }
 668 func auxIntToFlagConstant(x int64) flagConstant {
 669         return flagConstant(x)
 670 }
 671
 672 func auxIntToOp(cc int64) Op {
 673         return Op(cc)
 674 }
 675
 676 func boolToAuxInt(b bool) int64 {
 677         if b {
 678                 return 1
 679         }
 680         return 0
 681 }
 682 func int8ToAuxInt(i int8) int64 {
 683         return int64(i)
 684 }
 685 func int16ToAuxInt(i int16) int64 {
 686         return int64(i)
 687 }
 688 func int32ToAuxInt(i int32) int64 {
 689         return int64(i)
 690 }
 691 func int64ToAuxInt(i int64) int64 {
 692         return int64(i)
 693 }
 694 func uint8ToAuxInt(i uint8) int64 {
 695         return int64(int8(i))
 696 }
 697 func float32ToAuxInt(f float32) int64 {
 698         return int64(math.Float64bits(float64(f)))
 699 }
 700 func float64ToAuxInt(f float64) int64 {
 701         return int64(math.Float64bits(f))
 702 }
 703 func valAndOffToAuxInt(v ValAndOff) int64 {
 704         return int64(v)
 705 }
 706 func arm64BitFieldToAuxInt(v arm64BitField) int64 {
 707         return int64(v)
 708 }
 709 func int128ToAuxInt(x int128) int64 {
 710         if x != 0 {
 711                 panic("nonzero int128 not allowed")
 712         }
 713         return 0
 714 }
 715 func flagConstantToAuxInt(x flagConstant) int64 {
 716         return int64(x)
 717 }
 718
 719 func opToAuxInt(o Op) int64 {
 720         return int64(o)
 721 }
 722
 723 // Aux is an interface to hold miscellaneous data in Blocks and Values.
 724 type Aux interface {
 725         CanBeAnSSAAux()
 726 }
 727
 728 // for now only used to mark moves that need to avoid clobbering flags
 729 type auxMark bool
 730
 731 func (auxMark) CanBeAnSSAAux() {}
 732
 733 var AuxMark auxMark
 734
 735 // stringAux wraps string values for use in Aux.
 736 type stringAux string
 737
 738 func (stringAux) CanBeAnSSAAux() {}
 739
 740 func auxToString(i Aux) string {
 741         return string(i.(stringAux))
 742 }
 743 func auxToSym(i Aux) Sym {
 744         // TODO: kind of a hack - allows nil interface through
 745         s, _ := i.(Sym)
 746         return s
 747 }
 748 func auxToType(i Aux) *types.Type {
 749         return i.(*types.Type)
 750 }
 751 func auxToCall(i Aux) *AuxCall {
 752         return i.(*AuxCall)
 753 }
 754 func auxToS390xCCMask(i Aux) s390x.CCMask {
 755         return i.(s390x.CCMask)
 756 }
 757 func auxToS390xRotateParams(i Aux) s390x.RotateParams {
 758         return i.(s390x.RotateParams)
 759 }
 760
 761 func StringToAux(s string) Aux {
 762         return stringAux(s)
 763 }
 764 func symToAux(s Sym) Aux {
 765         return s
 766 }
 767 func callToAux(s *AuxCall) Aux {
 768         return s
 769 }
 770 func typeToAux(t *types.Type) Aux {
 771         return t
 772 }
 773 func s390xCCMaskToAux(c s390x.CCMask) Aux {
 774         return c
 775 }
 776 func s390xRotateParamsToAux(r s390x.RotateParams) Aux {
 777         return r
 778 }
 779
 780 // uaddOvf reports whether unsigned a+b would overflow.
 781 func uaddOvf(a, b int64) bool {
 782         return uint64(a)+uint64(b) < uint64(a)
 783 }
 784
 785 // loadLSymOffset simulates reading a word at an offset into a
 786 // read-only symbol's runtime memory. If it would read a pointer to
 787 // another symbol, that symbol is returned. Otherwise, it returns nil.
 788 func loadLSymOffset(lsym *obj.LSym, offset int64) *obj.LSym {
 789         if lsym.Type != objabi.SRODATA {
 790                 return nil
 791         }
 792
 793         for _, r := range lsym.R {
 794                 if int64(r.Off) == offset && r.Type&^objabi.R_WEAK == objabi.R_ADDR && r.Add == 0 {
 795                         return r.Sym
 796                 }
 797         }
 798
 799         return nil
 800 }
 801
 802 // de-virtualize an InterLECall
 803 // 'sym' is the symbol for the itab.
 804 func devirtLESym(v *Value, aux Aux, sym Sym, offset int64) *obj.LSym {
 805         n, ok := sym.(*obj.LSym)
 806         if !ok {
 807                 return nil
 808         }
 809
 810         lsym := loadLSymOffset(n, offset)
 811         if f := v.Block.Func; f.pass.debug > 0 {
 812                 if lsym != nil {
 813                         f.Warnl(v.Pos, "de-virtualizing call")
 814                 } else {
 815                         f.Warnl(v.Pos, "couldn't de-virtualize call")
 816                 }
 817         }
 818         return lsym
 819 }
 820
 821 func devirtLECall(v *Value, sym *obj.LSym) *Value {
 822         v.Op = OpStaticLECall
 823         auxcall := v.Aux.(*AuxCall)
 824         auxcall.Fn = sym
 825         // Remove first arg
 826         v.Args[0].Uses--
 827         copy(v.Args[0:], v.Args[1:])
 828         v.Args[len(v.Args)-1] = nil // aid GC
 829         v.Args = v.Args[:len(v.Args)-1]
 830         return v
 831 }
 832
 833 // isSamePtr reports whether p1 and p2 point to the same address.
 834 func isSamePtr(p1, p2 *Value) bool {
 835         if p1 == p2 {
 836                 return true
 837         }
 838         if p1.Op != p2.Op {
 839                 return false
 840         }
 841         switch p1.Op {
 842         case OpOffPtr:
 843                 return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
 844         case OpAddr, OpLocalAddr:
 845                 return p1.Aux == p2.Aux
 846         case OpAddPtr:
 847                 return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
 848         }
 849         return false
 850 }
 851
 852 func isStackPtr(v *Value) bool {
 853         for v.Op == OpOffPtr || v.Op == OpAddPtr {
 854                 v = v.Args[0]
 855         }
 856         return v.Op == OpSP || v.Op == OpLocalAddr
 857 }
 858
 859 // disjoint reports whether the memory region specified by [p1:p1+n1)
 860 // does not overlap with [p2:p2+n2).
 861 // A return value of false does not imply the regions overlap.
 862 func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool {
 863         if n1 == 0 || n2 == 0 {
 864                 return true
 865         }
 866         if p1 == p2 {
 867                 return false
 868         }
 869         baseAndOffset := func(ptr *Value) (base *Value, offset int64) {
 870                 base, offset = ptr, 0
 871                 for base.Op == OpOffPtr {
 872                         offset += base.AuxInt
 873                         base = base.Args[0]
 874                 }
 875                 return base, offset
 876         }
 877         p1, off1 := baseAndOffset(p1)
 878         p2, off2 := baseAndOffset(p2)
 879         if isSamePtr(p1, p2) {
 880                 return !overlap(off1, n1, off2, n2)
 881         }
 882         // p1 and p2 are not the same, so if they are both OpAddrs then
 883         // they point to different variables.
 884         // If one pointer is on the stack and the other is an argument
 885         // then they can't overlap.
 886         switch p1.Op {
 887         case OpAddr, OpLocalAddr:
 888                 if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP {
 889                         return true
 890                 }
 891                 return (p2.Op == OpArg || p2.Op == OpArgIntReg) && p1.Args[0].Op == OpSP
 892         case OpArg, OpArgIntReg:
 893                 if p2.Op == OpSP || p2.Op == OpLocalAddr {
 894                         return true
 895                 }
 896         case OpSP:
 897                 return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpArgIntReg || p2.Op == OpSP
 898         }
 899         return false
 900 }
 901
 902 // moveSize returns the number of bytes an aligned MOV instruction moves.
 903 func moveSize(align int64, c *Config) int64 {
 904         switch {
 905         case align%8 == 0 && c.PtrSize == 8:
 906                 return 8
 907         case align%4 == 0:
 908                 return 4
 909         case align%2 == 0:
 910                 return 2
 911         }
 912         return 1
 913 }
 914
 915 // mergePoint finds a block among a's blocks which dominates b and is itself
 916 // dominated by all of a's blocks. Returns nil if it can't find one.
 917 // Might return nil even if one does exist.
 918 func mergePoint(b *Block, a ...*Value) *Block {
 919         // Walk backward from b looking for one of the a's blocks.
 920
 921         // Max distance
 922         d := 100
 923
 924         for d > 0 {
 925                 for _, x := range a {
 926                         if b == x.Block {
 927                                 goto found
 928                         }
 929                 }
 930                 if len(b.Preds) > 1 {
 931                         // Don't know which way to go back. Abort.
 932                         return nil
 933                 }
 934                 b = b.Preds[0].b
 935                 d--
 936         }
 937         return nil // too far away
 938 found:
 939         // At this point, r is the first value in a that we find by walking backwards.
 940         // if we return anything, r will be it.
 941         r := b
 942
 943         // Keep going, counting the other a's that we find. They must all dominate r.
 944         na := 0
 945         for d > 0 {
 946                 for _, x := range a {
 947                         if b == x.Block {
 948                                 na++
 949                         }
 950                 }
 951                 if na == len(a) {
 952                         // Found all of a in a backwards walk. We can return r.
 953                         return r
 954                 }
 955                 if len(b.Preds) > 1 {
 956                         return nil
 957                 }
 958                 b = b.Preds[0].b
 959                 d--
 960
 961         }
 962         return nil // too far away
 963 }
 964
 965 // clobber invalidates values. Returns true.
 966 // clobber is used by rewrite rules to:
 967 //
 968 //      A) make sure the values are really dead and never used again.
 969 //      B) decrement use counts of the values' args.
 970 func clobber(vv ...*Value) bool {
 971         for _, v := range vv {
 972                 v.reset(OpInvalid)
 973                 // Note: leave v.Block intact.  The Block field is used after clobber.
 974         }
 975         return true
 976 }
 977
 978 // clobberIfDead resets v when use count is 1. Returns true.
 979 // clobberIfDead is used by rewrite rules to decrement
 980 // use counts of v's args when v is dead and never used.
 981 func clobberIfDead(v *Value) bool {
 982         if v.Uses == 1 {
 983                 v.reset(OpInvalid)
 984         }
 985         // Note: leave v.Block intact.  The Block field is used after clobberIfDead.
 986         return true
 987 }
 988
 989 // noteRule is an easy way to track if a rule is matched when writing
 990 // new ones.  Make the rule of interest also conditional on
 991 //
 992 //      noteRule("note to self: rule of interest matched")
 993 //
 994 // and that message will print when the rule matches.
 995 func noteRule(s string) bool {
 996         fmt.Println(s)
 997         return true
 998 }
 999
1000 // countRule increments Func.ruleMatches[key].
1001 // If Func.ruleMatches is non-nil at the end
1002 // of compilation, it will be printed to stdout.
1003 // This is intended to make it easier to find which functions
1004 // which contain lots of rules matches when developing new rules.
1005 func countRule(v *Value, key string) bool {
1006         f := v.Block.Func
1007         if f.ruleMatches == nil {
1008                 f.ruleMatches = make(map[string]int)
1009         }
1010         f.ruleMatches[key]++
1011         return true
1012 }
1013
1014 // warnRule generates compiler debug output with string s when
1015 // v is not in autogenerated code, cond is true and the rule has fired.
1016 func warnRule(cond bool, v *Value, s string) bool {
1017         if pos := v.Pos; pos.Line() > 1 && cond {
1018                 v.Block.Func.Warnl(pos, s)
1019         }
1020         return true
1021 }
1022
1023 // for a pseudo-op like (LessThan x), extract x.
1024 func flagArg(v *Value) *Value {
1025         if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() {
1026                 return nil
1027         }
1028         return v.Args[0]
1029 }
1030
1031 // arm64Negate finds the complement to an ARM64 condition code,
1032 // for example !Equal -> NotEqual or !LessThan -> GreaterEqual
1033 //
1034 // For floating point, it's more subtle because NaN is unordered. We do
1035 // !LessThanF -> NotLessThanF, the latter takes care of NaNs.
1036 func arm64Negate(op Op) Op {
1037         switch op {
1038         case OpARM64LessThan:
1039                 return OpARM64GreaterEqual
1040         case OpARM64LessThanU:
1041                 return OpARM64GreaterEqualU
1042         case OpARM64GreaterThan:
1043                 return OpARM64LessEqual
1044         case OpARM64GreaterThanU:
1045                 return OpARM64LessEqualU
1046         case OpARM64LessEqual:
1047                 return OpARM64GreaterThan
1048         case OpARM64LessEqualU:
1049                 return OpARM64GreaterThanU
1050         case OpARM64GreaterEqual:
1051                 return OpARM64LessThan
1052         case OpARM64GreaterEqualU:
1053                 return OpARM64LessThanU
1054         case OpARM64Equal:
1055                 return OpARM64NotEqual
1056         case OpARM64NotEqual:
1057                 return OpARM64Equal
1058         case OpARM64LessThanF:
1059                 return OpARM64NotLessThanF
1060         case OpARM64NotLessThanF:
1061                 return OpARM64LessThanF
1062         case OpARM64LessEqualF:
1063                 return OpARM64NotLessEqualF
1064         case OpARM64NotLessEqualF:
1065                 return OpARM64LessEqualF
1066         case OpARM64GreaterThanF:
1067                 return OpARM64NotGreaterThanF
1068         case OpARM64NotGreaterThanF:
1069                 return OpARM64GreaterThanF
1070         case OpARM64GreaterEqualF:
1071                 return OpARM64NotGreaterEqualF
1072         case OpARM64NotGreaterEqualF:
1073                 return OpARM64GreaterEqualF
1074         default:
1075                 panic("unreachable")
1076         }
1077 }
1078
1079 // arm64Invert evaluates (InvertFlags op), which
1080 // is the same as altering the condition codes such
1081 // that the same result would be produced if the arguments
1082 // to the flag-generating instruction were reversed, e.g.
1083 // (InvertFlags (CMP x y)) -> (CMP y x)
1084 func arm64Invert(op Op) Op {
1085         switch op {
1086         case OpARM64LessThan:
1087                 return OpARM64GreaterThan
1088         case OpARM64LessThanU:
1089                 return OpARM64GreaterThanU
1090         case OpARM64GreaterThan:
1091                 return OpARM64LessThan
1092         case OpARM64GreaterThanU:
1093                 return OpARM64LessThanU
1094         case OpARM64LessEqual:
1095                 return OpARM64GreaterEqual
1096         case OpARM64LessEqualU:
1097                 return OpARM64GreaterEqualU
1098         case OpARM64GreaterEqual:
1099                 return OpARM64LessEqual
1100         case OpARM64GreaterEqualU:
1101                 return OpARM64LessEqualU
1102         case OpARM64Equal, OpARM64NotEqual:
1103                 return op
1104         case OpARM64LessThanF:
1105                 return OpARM64GreaterThanF
1106         case OpARM64GreaterThanF:
1107                 return OpARM64LessThanF
1108         case OpARM64LessEqualF:
1109                 return OpARM64GreaterEqualF
1110         case OpARM64GreaterEqualF:
1111                 return OpARM64LessEqualF
1112         case OpARM64NotLessThanF:
1113                 return OpARM64NotGreaterThanF
1114         case OpARM64NotGreaterThanF:
1115                 return OpARM64NotLessThanF
1116         case OpARM64NotLessEqualF:
1117                 return OpARM64NotGreaterEqualF
1118         case OpARM64NotGreaterEqualF:
1119                 return OpARM64NotLessEqualF
1120         default:
1121                 panic("unreachable")
1122         }
1123 }
1124
1125 // evaluate an ARM64 op against a flags value
1126 // that is potentially constant; return 1 for true,
1127 // -1 for false, and 0 for not constant.
1128 func ccARM64Eval(op Op, flags *Value) int {
1129         fop := flags.Op
1130         if fop == OpARM64InvertFlags {
1131                 return -ccARM64Eval(op, flags.Args[0])
1132         }
1133         if fop != OpARM64FlagConstant {
1134                 return 0
1135         }
1136         fc := flagConstant(flags.AuxInt)
1137         b2i := func(b bool) int {
1138                 if b {
1139                         return 1
1140                 }
1141                 return -1
1142         }
1143         switch op {
1144         case OpARM64Equal:
1145                 return b2i(fc.eq())
1146         case OpARM64NotEqual:
1147                 return b2i(fc.ne())
1148         case OpARM64LessThan:
1149                 return b2i(fc.lt())
1150         case OpARM64LessThanU:
1151                 return b2i(fc.ult())
1152         case OpARM64GreaterThan:
1153                 return b2i(fc.gt())
1154         case OpARM64GreaterThanU:
1155                 return b2i(fc.ugt())
1156         case OpARM64LessEqual:
1157                 return b2i(fc.le())
1158         case OpARM64LessEqualU:
1159                 return b2i(fc.ule())
1160         case OpARM64GreaterEqual:
1161                 return b2i(fc.ge())
1162         case OpARM64GreaterEqualU:
1163                 return b2i(fc.uge())
1164         }
1165         return 0
1166 }
1167
1168 // logRule logs the use of the rule s. This will only be enabled if
1169 // rewrite rules were generated with the -log option, see _gen/rulegen.go.
1170 func logRule(s string) {
1171         if ruleFile == nil {
1172                 // Open a log file to write log to. We open in append
1173                 // mode because all.bash runs the compiler lots of times,
1174                 // and we want the concatenation of all of those logs.
1175                 // This means, of course, that users need to rm the old log
1176                 // to get fresh data.
1177                 // TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow?
1178                 w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"),
1179                         os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
1180                 if err != nil {
1181                         panic(err)
1182                 }
1183                 ruleFile = w
1184         }
1185         _, err := fmt.Fprintln(ruleFile, s)
1186         if err != nil {
1187                 panic(err)
1188         }
1189 }
1190
1191 var ruleFile io.Writer
1192
1193 func min(x, y int64) int64 {
1194         if x < y {
1195                 return x
1196         }
1197         return y
1198 }
1199
1200 func isConstZero(v *Value) bool {
1201         switch v.Op {
1202         case OpConstNil:
1203                 return true
1204         case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F:
1205                 return v.AuxInt == 0
1206         }
1207         return false
1208 }
1209
1210 // reciprocalExact64 reports whether 1/c is exactly representable.
1211 func reciprocalExact64(c float64) bool {
1212         b := math.Float64bits(c)
1213         man := b & (1<<52 - 1)
1214         if man != 0 {
1215                 return false // not a power of 2, denormal, or NaN
1216         }
1217         exp := b >> 52 & (1<<11 - 1)
1218         // exponent bias is 0x3ff.  So taking the reciprocal of a number
1219         // changes the exponent to 0x7fe-exp.
1220         switch exp {
1221         case 0:
1222                 return false // ±0
1223         case 0x7ff:
1224                 return false // ±inf
1225         case 0x7fe:
1226                 return false // exponent is not representable
1227         default:
1228                 return true
1229         }
1230 }
1231
1232 // reciprocalExact32 reports whether 1/c is exactly representable.
1233 func reciprocalExact32(c float32) bool {
1234         b := math.Float32bits(c)
1235         man := b & (1<<23 - 1)
1236         if man != 0 {
1237                 return false // not a power of 2, denormal, or NaN
1238         }
1239         exp := b >> 23 & (1<<8 - 1)
1240         // exponent bias is 0x7f.  So taking the reciprocal of a number
1241         // changes the exponent to 0xfe-exp.
1242         switch exp {
1243         case 0:
1244                 return false // ±0
1245         case 0xff:
1246                 return false // ±inf
1247         case 0xfe:
1248                 return false // exponent is not representable
1249         default:
1250                 return true
1251         }
1252 }
1253
1254 // check if an immediate can be directly encoded into an ARM's instruction.
1255 func isARMImmRot(v uint32) bool {
1256         for i := 0; i < 16; i++ {
1257                 if v&^0xff == 0 {
1258                         return true
1259                 }
1260                 v = v<<2 | v>>30
1261         }
1262
1263         return false
1264 }
1265
1266 // overlap reports whether the ranges given by the given offset and
1267 // size pairs overlap.
1268 func overlap(offset1, size1, offset2, size2 int64) bool {
1269         if offset1 >= offset2 && offset2+size2 > offset1 {
1270                 return true
1271         }
1272         if offset2 >= offset1 && offset1+size1 > offset2 {
1273                 return true
1274         }
1275         return false
1276 }
1277
1278 func areAdjacentOffsets(off1, off2, size int64) bool {
1279         return off1+size == off2 || off1 == off2+size
1280 }
1281
1282 // check if value zeroes out upper 32-bit of 64-bit register.
1283 // depth limits recursion depth. In AMD64.rules 3 is used as limit,
1284 // because it catches same amount of cases as 4.
1285 func zeroUpper32Bits(x *Value, depth int) bool {
1286         switch x.Op {
1287         case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1,
1288                 OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1,
1289                 OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload,
1290                 OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL,
1291                 OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst,
1292                 OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst,
1293                 OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL,
1294                 OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst,
1295                 OpAMD64SHLL, OpAMD64SHLLconst:
1296                 return true
1297         case OpARM64REV16W, OpARM64REVW, OpARM64RBITW, OpARM64CLZW, OpARM64EXTRWconst,
1298                 OpARM64MULW, OpARM64MNEGW, OpARM64UDIVW, OpARM64DIVW, OpARM64UMODW,
1299                 OpARM64MADDW, OpARM64MSUBW, OpARM64RORW, OpARM64RORWconst:
1300                 return true
1301         case OpArg:
1302                 return x.Type.Size() == 4
1303         case OpPhi, OpSelect0, OpSelect1:
1304                 // Phis can use each-other as an arguments, instead of tracking visited values,
1305                 // just limit recursion depth.
1306                 if depth <= 0 {
1307                         return false
1308                 }
1309                 for i := range x.Args {
1310                         if !zeroUpper32Bits(x.Args[i], depth-1) {
1311                                 return false
1312                         }
1313                 }
1314                 return true
1315
1316         }
1317         return false
1318 }
1319
1320 // zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits.
1321 func zeroUpper48Bits(x *Value, depth int) bool {
1322         switch x.Op {
1323         case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2:
1324                 return true
1325         case OpArg:
1326                 return x.Type.Size() == 2
1327         case OpPhi, OpSelect0, OpSelect1:
1328                 // Phis can use each-other as an arguments, instead of tracking visited values,
1329                 // just limit recursion depth.
1330                 if depth <= 0 {
1331                         return false
1332                 }
1333                 for i := range x.Args {
1334                         if !zeroUpper48Bits(x.Args[i], depth-1) {
1335                                 return false
1336                         }
1337                 }
1338                 return true
1339
1340         }
1341         return false
1342 }
1343
1344 // zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits.
1345 func zeroUpper56Bits(x *Value, depth int) bool {
1346         switch x.Op {
1347         case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1:
1348                 return true
1349         case OpArg:
1350                 return x.Type.Size() == 1
1351         case OpPhi, OpSelect0, OpSelect1:
1352                 // Phis can use each-other as an arguments, instead of tracking visited values,
1353                 // just limit recursion depth.
1354                 if depth <= 0 {
1355                         return false
1356                 }
1357                 for i := range x.Args {
1358                         if !zeroUpper56Bits(x.Args[i], depth-1) {
1359                                 return false
1360                         }
1361                 }
1362                 return true
1363
1364         }
1365         return false
1366 }
1367
1368 func isInlinableMemclr(c *Config, sz int64) bool {
1369         if sz < 0 {
1370                 return false
1371         }
1372         // TODO: expand this check to allow other architectures
1373         // see CL 454255 and issue 56997
1374         switch c.arch {
1375         case "amd64", "arm64":
1376                 return true
1377         case "ppc64le", "ppc64":
1378                 return sz < 512
1379         }
1380         return false
1381 }
1382
1383 // isInlinableMemmove reports whether the given arch performs a Move of the given size
1384 // faster than memmove. It will only return true if replacing the memmove with a Move is
1385 // safe, either because Move will do all of its loads before any of its stores, or
1386 // because the arguments are known to be disjoint.
1387 // This is used as a check for replacing memmove with Move ops.
1388 func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
1389         // It is always safe to convert memmove into Move when its arguments are disjoint.
1390         // Move ops may or may not be faster for large sizes depending on how the platform
1391         // lowers them, so we only perform this optimization on platforms that we know to
1392         // have fast Move ops.
1393         switch c.arch {
1394         case "amd64":
1395                 return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz))
1396         case "386", "arm64":
1397                 return sz <= 8
1398         case "s390x", "ppc64", "ppc64le":
1399                 return sz <= 8 || disjoint(dst, sz, src, sz)
1400         case "arm", "loong64", "mips", "mips64", "mipsle", "mips64le":
1401                 return sz <= 4
1402         }
1403         return false
1404 }
1405 func IsInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
1406         return isInlinableMemmove(dst, src, sz, c)
1407 }
1408
1409 // logLargeCopy logs the occurrence of a large copy.
1410 // The best place to do this is in the rewrite rules where the size of the move is easy to find.
1411 // "Large" is arbitrarily chosen to be 128 bytes; this may change.
1412 func logLargeCopy(v *Value, s int64) bool {
1413         if s < 128 {
1414                 return true
1415         }
1416         if logopt.Enabled() {
1417                 logopt.LogOpt(v.Pos, "copy", "lower", v.Block.Func.Name, fmt.Sprintf("%d bytes", s))
1418         }
1419         return true
1420 }
1421 func LogLargeCopy(funcName string, pos src.XPos, s int64) {
1422         if s < 128 {
1423                 return
1424         }
1425         if logopt.Enabled() {
1426                 logopt.LogOpt(pos, "copy", "lower", funcName, fmt.Sprintf("%d bytes", s))
1427         }
1428 }
1429
1430 // hasSmallRotate reports whether the architecture has rotate instructions
1431 // for sizes < 32-bit.  This is used to decide whether to promote some rotations.
1432 func hasSmallRotate(c *Config) bool {
1433         switch c.arch {
1434         case "amd64", "386":
1435                 return true
1436         default:
1437                 return false
1438         }
1439 }
1440
1441 func newPPC64ShiftAuxInt(sh, mb, me, sz int64) int32 {
1442         if sh < 0 || sh >= sz {
1443                 panic("PPC64 shift arg sh out of range")
1444         }
1445         if mb < 0 || mb >= sz {
1446                 panic("PPC64 shift arg mb out of range")
1447         }
1448         if me < 0 || me >= sz {
1449                 panic("PPC64 shift arg me out of range")
1450         }
1451         return int32(sh<<16 | mb<<8 | me)
1452 }
1453
1454 func GetPPC64Shiftsh(auxint int64) int64 {
1455         return int64(int8(auxint >> 16))
1456 }
1457
1458 func GetPPC64Shiftmb(auxint int64) int64 {
1459         return int64(int8(auxint >> 8))
1460 }
1461
1462 func GetPPC64Shiftme(auxint int64) int64 {
1463         return int64(int8(auxint))
1464 }
1465
1466 // Test if this value can encoded as a mask for a rlwinm like
1467 // operation.  Masks can also extend from the msb and wrap to
1468 // the lsb too.  That is, the valid masks are 32 bit strings
1469 // of the form: 0..01..10..0 or 1..10..01..1 or 1...1
1470 func isPPC64WordRotateMask(v64 int64) bool {
1471         // Isolate rightmost 1 (if none 0) and add.
1472         v := uint32(v64)
1473         vp := (v & -v) + v
1474         // Likewise, for the wrapping case.
1475         vn := ^v
1476         vpn := (vn & -vn) + vn
1477         return (v&vp == 0 || vn&vpn == 0) && v != 0
1478 }
1479
1480 // Compress mask and shift into single value of the form
1481 // me | mb<<8 | rotate<<16 | nbits<<24 where me and mb can
1482 // be used to regenerate the input mask.
1483 func encodePPC64RotateMask(rotate, mask, nbits int64) int64 {
1484         var mb, me, mbn, men int
1485
1486         // Determine boundaries and then decode them
1487         if mask == 0 || ^mask == 0 || rotate >= nbits {
1488                 panic("Invalid PPC64 rotate mask")
1489         } else if nbits == 32 {
1490                 mb = bits.LeadingZeros32(uint32(mask))
1491                 me = 32 - bits.TrailingZeros32(uint32(mask))
1492                 mbn = bits.LeadingZeros32(^uint32(mask))
1493                 men = 32 - bits.TrailingZeros32(^uint32(mask))
1494         } else {
1495                 mb = bits.LeadingZeros64(uint64(mask))
1496                 me = 64 - bits.TrailingZeros64(uint64(mask))
1497                 mbn = bits.LeadingZeros64(^uint64(mask))
1498                 men = 64 - bits.TrailingZeros64(^uint64(mask))
1499         }
1500         // Check for a wrapping mask (e.g bits at 0 and 63)
1501         if mb == 0 && me == int(nbits) {
1502                 // swap the inverted values
1503                 mb, me = men, mbn
1504         }
1505
1506         return int64(me) | int64(mb<<8) | int64(rotate<<16) | int64(nbits<<24)
1507 }
1508
1509 // DecodePPC64RotateMask is the inverse operation of encodePPC64RotateMask.  The values returned as
1510 // mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask.
1511 func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) {
1512         auxint := uint64(sauxint)
1513         rotate = int64((auxint >> 16) & 0xFF)
1514         mb = int64((auxint >> 8) & 0xFF)
1515         me = int64((auxint >> 0) & 0xFF)
1516         nbits := int64((auxint >> 24) & 0xFF)
1517         mask = ((1 << uint(nbits-mb)) - 1) ^ ((1 << uint(nbits-me)) - 1)
1518         if mb > me {
1519                 mask = ^mask
1520         }
1521         if nbits == 32 {
1522                 mask = uint64(uint32(mask))
1523         }
1524
1525         // Fixup ME to match ISA definition.  The second argument to MASK(..,me)
1526         // is inclusive.
1527         me = (me - 1) & (nbits - 1)
1528         return
1529 }
1530
1531 // This verifies that the mask is a set of
1532 // consecutive bits including the least
1533 // significant bit.
1534 func isPPC64ValidShiftMask(v int64) bool {
1535         if (v != 0) && ((v+1)&v) == 0 {
1536                 return true
1537         }
1538         return false
1539 }
1540
1541 func getPPC64ShiftMaskLength(v int64) int64 {
1542         return int64(bits.Len64(uint64(v)))
1543 }
1544
1545 // Decompose a shift right into an equivalent rotate/mask,
1546 // and return mask & m.
1547 func mergePPC64RShiftMask(m, s, nbits int64) int64 {
1548         smask := uint64((1<<uint(nbits))-1) >> uint(s)
1549         return m & int64(smask)
1550 }
1551
1552 // Combine (ANDconst [m] (SRWconst [s])) into (RLWINM [y]) or return 0
1553 func mergePPC64AndSrwi(m, s int64) int64 {
1554         mask := mergePPC64RShiftMask(m, s, 32)
1555         if !isPPC64WordRotateMask(mask) {
1556                 return 0
1557         }
1558         return encodePPC64RotateMask((32-s)&31, mask, 32)
1559 }
1560
1561 // Test if a shift right feeding into a CLRLSLDI can be merged into RLWINM.
1562 // Return the encoded RLWINM constant, or 0 if they cannot be merged.
1563 func mergePPC64ClrlsldiSrw(sld, srw int64) int64 {
1564         mask_1 := uint64(0xFFFFFFFF >> uint(srw))
1565         // for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left.
1566         mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
1567
1568         // Rewrite mask to apply after the final left shift.
1569         mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
1570
1571         r_1 := 32 - srw
1572         r_2 := GetPPC64Shiftsh(sld)
1573         r_3 := (r_1 + r_2) & 31 // This can wrap.
1574
1575         if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
1576                 return 0
1577         }
1578         return encodePPC64RotateMask(int64(r_3), int64(mask_3), 32)
1579 }
1580
1581 // Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM.  Return
1582 // the encoded RLWINM constant, or 0 if they cannot be merged.
1583 func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 {
1584         r_1, _, _, mask_1 := DecodePPC64RotateMask(rlw)
1585         // for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left.
1586         mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
1587
1588         // combine the masks, and adjust for the final left shift.
1589         mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(int64(sld)))
1590         r_2 := GetPPC64Shiftsh(int64(sld))
1591         r_3 := (r_1 + r_2) & 31 // This can wrap.
1592
1593         // Verify the result is still a valid bitmask of <= 32 bits.
1594         if !isPPC64WordRotateMask(int64(mask_3)) || uint64(uint32(mask_3)) != mask_3 {
1595                 return 0
1596         }
1597         return encodePPC64RotateMask(r_3, int64(mask_3), 32)
1598 }
1599
1600 // Compute the encoded RLWINM constant from combining (SLDconst [sld] (SRWconst [srw] x)),
1601 // or return 0 if they cannot be combined.
1602 func mergePPC64SldiSrw(sld, srw int64) int64 {
1603         if sld > srw || srw >= 32 {
1604                 return 0
1605         }
1606         mask_r := uint32(0xFFFFFFFF) >> uint(srw)
1607         mask_l := uint32(0xFFFFFFFF) >> uint(sld)
1608         mask := (mask_r & mask_l) << uint(sld)
1609         return encodePPC64RotateMask((32-srw+sld)&31, int64(mask), 32)
1610 }
1611
1612 // Convenience function to rotate a 32 bit constant value by another constant.
1613 func rotateLeft32(v, rotate int64) int64 {
1614         return int64(bits.RotateLeft32(uint32(v), int(rotate)))
1615 }
1616
1617 func rotateRight64(v, rotate int64) int64 {
1618         return int64(bits.RotateLeft64(uint64(v), int(-rotate)))
1619 }
1620
1621 // encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format.
1622 func armBFAuxInt(lsb, width int64) arm64BitField {
1623         if lsb < 0 || lsb > 63 {
1624                 panic("ARM(64) bit field lsb constant out of range")
1625         }
1626         if width < 1 || lsb+width > 64 {
1627                 panic("ARM(64) bit field width constant out of range")
1628         }
1629         return arm64BitField(width | lsb<<8)
1630 }
1631
1632 // returns the lsb part of the auxInt field of arm64 bitfield ops.
1633 func (bfc arm64BitField) getARM64BFlsb() int64 {
1634         return int64(uint64(bfc) >> 8)
1635 }
1636
1637 // returns the width part of the auxInt field of arm64 bitfield ops.
1638 func (bfc arm64BitField) getARM64BFwidth() int64 {
1639         return int64(bfc) & 0xff
1640 }
1641
1642 // checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask.
1643 func isARM64BFMask(lsb, mask, rshift int64) bool {
1644         shiftedMask := int64(uint64(mask) >> uint64(rshift))
1645         return shiftedMask != 0 && isPowerOfTwo64(shiftedMask+1) && nto(shiftedMask)+lsb < 64
1646 }
1647
1648 // returns the bitfield width of mask >> rshift for arm64 bitfield ops.
1649 func arm64BFWidth(mask, rshift int64) int64 {
1650         shiftedMask := int64(uint64(mask) >> uint64(rshift))
1651         if shiftedMask == 0 {
1652                 panic("ARM64 BF mask is zero")
1653         }
1654         return nto(shiftedMask)
1655 }
1656
1657 // sizeof returns the size of t in bytes.
1658 // It will panic if t is not a *types.Type.
1659 func sizeof(t interface{}) int64 {
1660         return t.(*types.Type).Size()
1661 }
1662
1663 // registerizable reports whether t is a primitive type that fits in
1664 // a register. It assumes float64 values will always fit into registers
1665 // even if that isn't strictly true.
1666 func registerizable(b *Block, typ *types.Type) bool {
1667         if typ.IsPtrShaped() || typ.IsFloat() || typ.IsBoolean() {
1668                 return true
1669         }
1670         if typ.IsInteger() {
1671                 return typ.Size() <= b.Func.Config.RegSize
1672         }
1673         return false
1674 }
1675
1676 // needRaceCleanup reports whether this call to racefuncenter/exit isn't needed.
1677 func needRaceCleanup(sym *AuxCall, v *Value) bool {
1678         f := v.Block.Func
1679         if !f.Config.Race {
1680                 return false
1681         }
1682         if !isSameCall(sym, "runtime.racefuncenter") && !isSameCall(sym, "runtime.racefuncexit") {
1683                 return false
1684         }
1685         for _, b := range f.Blocks {
1686                 for _, v := range b.Values {
1687                         switch v.Op {
1688                         case OpStaticCall, OpStaticLECall:
1689                                 // Check for racefuncenter will encounter racefuncexit and vice versa.
1690                                 // Allow calls to panic*
1691                                 s := v.Aux.(*AuxCall).Fn.String()
1692                                 switch s {
1693                                 case "runtime.racefuncenter", "runtime.racefuncexit",
1694                                         "runtime.panicdivide", "runtime.panicwrap",
1695                                         "runtime.panicshift":
1696                                         continue
1697                                 }
1698                                 // If we encountered any call, we need to keep racefunc*,
1699                                 // for accurate stacktraces.
1700                                 return false
1701                         case OpPanicBounds, OpPanicExtend:
1702                                 // Note: these are panic generators that are ok (like the static calls above).
1703                         case OpClosureCall, OpInterCall, OpClosureLECall, OpInterLECall:
1704                                 // We must keep the race functions if there are any other call types.
1705                                 return false
1706                         }
1707                 }
1708         }
1709         if isSameCall(sym, "runtime.racefuncenter") {
1710                 // TODO REGISTER ABI this needs to be cleaned up.
1711                 // If we're removing racefuncenter, remove its argument as well.
1712                 if v.Args[0].Op != OpStore {
1713                         if v.Op == OpStaticLECall {
1714                                 // there is no store, yet.
1715                                 return true
1716                         }
1717                         return false
1718                 }
1719                 mem := v.Args[0].Args[2]
1720                 v.Args[0].reset(OpCopy)
1721                 v.Args[0].AddArg(mem)
1722         }
1723         return true
1724 }
1725
1726 // symIsRO reports whether sym is a read-only global.
1727 func symIsRO(sym interface{}) bool {
1728         lsym := sym.(*obj.LSym)
1729         return lsym.Type == objabi.SRODATA && len(lsym.R) == 0
1730 }
1731
1732 // symIsROZero reports whether sym is a read-only global whose data contains all zeros.
1733 func symIsROZero(sym Sym) bool {
1734         lsym := sym.(*obj.LSym)
1735         if lsym.Type != objabi.SRODATA || len(lsym.R) != 0 {
1736                 return false
1737         }
1738         for _, b := range lsym.P {
1739                 if b != 0 {
1740                         return false
1741                 }
1742         }
1743         return true
1744 }
1745
1746 // read8 reads one byte from the read-only global sym at offset off.
1747 func read8(sym interface{}, off int64) uint8 {
1748         lsym := sym.(*obj.LSym)
1749         if off >= int64(len(lsym.P)) || off < 0 {
1750                 // Invalid index into the global sym.
1751                 // This can happen in dead code, so we don't want to panic.
1752                 // Just return any value, it will eventually get ignored.
1753                 // See issue 29215.
1754                 return 0
1755         }
1756         return lsym.P[off]
1757 }
1758
1759 // read16 reads two bytes from the read-only global sym at offset off.
1760 func read16(sym interface{}, off int64, byteorder binary.ByteOrder) uint16 {
1761         lsym := sym.(*obj.LSym)
1762         // lsym.P is written lazily.
1763         // Bytes requested after the end of lsym.P are 0.
1764         var src []byte
1765         if 0 <= off && off < int64(len(lsym.P)) {
1766                 src = lsym.P[off:]
1767         }
1768         buf := make([]byte, 2)
1769         copy(buf, src)
1770         return byteorder.Uint16(buf)
1771 }
1772
1773 // read32 reads four bytes from the read-only global sym at offset off.
1774 func read32(sym interface{}, off int64, byteorder binary.ByteOrder) uint32 {
1775         lsym := sym.(*obj.LSym)
1776         var src []byte
1777         if 0 <= off && off < int64(len(lsym.P)) {
1778                 src = lsym.P[off:]
1779         }
1780         buf := make([]byte, 4)
1781         copy(buf, src)
1782         return byteorder.Uint32(buf)
1783 }
1784
1785 // read64 reads eight bytes from the read-only global sym at offset off.
1786 func read64(sym interface{}, off int64, byteorder binary.ByteOrder) uint64 {
1787         lsym := sym.(*obj.LSym)
1788         var src []byte
1789         if 0 <= off && off < int64(len(lsym.P)) {
1790                 src = lsym.P[off:]
1791         }
1792         buf := make([]byte, 8)
1793         copy(buf, src)
1794         return byteorder.Uint64(buf)
1795 }
1796
1797 // sequentialAddresses reports true if it can prove that x + n == y
1798 func sequentialAddresses(x, y *Value, n int64) bool {
1799         if x == y && n == 0 {
1800                 return true
1801         }
1802         if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil &&
1803                 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1804                         x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1805                 return true
1806         }
1807         if x.Op == Op386LEAL1 && y.Op == Op386LEAL1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
1808                 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1809                         x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1810                 return true
1811         }
1812         if x.Op == OpAMD64ADDQ && y.Op == OpAMD64LEAQ1 && y.AuxInt == n && y.Aux == nil &&
1813                 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1814                         x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1815                 return true
1816         }
1817         if x.Op == OpAMD64LEAQ1 && y.Op == OpAMD64LEAQ1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
1818                 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1819                         x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1820                 return true
1821         }
1822         return false
1823 }
1824
1825 // flagConstant represents the result of a compile-time comparison.
1826 // The sense of these flags does not necessarily represent the hardware's notion
1827 // of a flags register - these are just a compile-time construct.
1828 // We happen to match the semantics to those of arm/arm64.
1829 // Note that these semantics differ from x86: the carry flag has the opposite
1830 // sense on a subtraction!
1831 //
1832 //      On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C.
1833 //      On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C.
1834 //       (because it does x + ^y + C).
1835 //
1836 // See https://en.wikipedia.org/wiki/Carry_flag#Vs._borrow_flag
1837 type flagConstant uint8
1838
1839 // N reports whether the result of an operation is negative (high bit set).
1840 func (fc flagConstant) N() bool {
1841         return fc&1 != 0
1842 }
1843
1844 // Z reports whether the result of an operation is 0.
1845 func (fc flagConstant) Z() bool {
1846         return fc&2 != 0
1847 }
1848
1849 // C reports whether an unsigned add overflowed (carry), or an
1850 // unsigned subtract did not underflow (borrow).
1851 func (fc flagConstant) C() bool {
1852         return fc&4 != 0
1853 }
1854
1855 // V reports whether a signed operation overflowed or underflowed.
1856 func (fc flagConstant) V() bool {
1857         return fc&8 != 0
1858 }
1859
1860 func (fc flagConstant) eq() bool {
1861         return fc.Z()
1862 }
1863 func (fc flagConstant) ne() bool {
1864         return !fc.Z()
1865 }
1866 func (fc flagConstant) lt() bool {
1867         return fc.N() != fc.V()
1868 }
1869 func (fc flagConstant) le() bool {
1870         return fc.Z() || fc.lt()
1871 }
1872 func (fc flagConstant) gt() bool {
1873         return !fc.Z() && fc.ge()
1874 }
1875 func (fc flagConstant) ge() bool {
1876         return fc.N() == fc.V()
1877 }
1878 func (fc flagConstant) ult() bool {
1879         return !fc.C()
1880 }
1881 func (fc flagConstant) ule() bool {
1882         return fc.Z() || fc.ult()
1883 }
1884 func (fc flagConstant) ugt() bool {
1885         return !fc.Z() && fc.uge()
1886 }
1887 func (fc flagConstant) uge() bool {
1888         return fc.C()
1889 }
1890
1891 func (fc flagConstant) ltNoov() bool {
1892         return fc.lt() && !fc.V()
1893 }
1894 func (fc flagConstant) leNoov() bool {
1895         return fc.le() && !fc.V()
1896 }
1897 func (fc flagConstant) gtNoov() bool {
1898         return fc.gt() && !fc.V()
1899 }
1900 func (fc flagConstant) geNoov() bool {
1901         return fc.ge() && !fc.V()
1902 }
1903
1904 func (fc flagConstant) String() string {
1905         return fmt.Sprintf("N=%v,Z=%v,C=%v,V=%v", fc.N(), fc.Z(), fc.C(), fc.V())
1906 }
1907
1908 type flagConstantBuilder struct {
1909         N bool
1910         Z bool
1911         C bool
1912         V bool
1913 }
1914
1915 func (fcs flagConstantBuilder) encode() flagConstant {
1916         var fc flagConstant
1917         if fcs.N {
1918                 fc |= 1
1919         }
1920         if fcs.Z {
1921                 fc |= 2
1922         }
1923         if fcs.C {
1924                 fc |= 4
1925         }
1926         if fcs.V {
1927                 fc |= 8
1928         }
1929         return fc
1930 }
1931
1932 // Note: addFlags(x,y) != subFlags(x,-y) in some situations:
1933 //  - the results of the C flag are different
1934 //  - the results of the V flag when y==minint are different
1935
1936 // addFlags64 returns the flags that would be set from computing x+y.
1937 func addFlags64(x, y int64) flagConstant {
1938         var fcb flagConstantBuilder
1939         fcb.Z = x+y == 0
1940         fcb.N = x+y < 0
1941         fcb.C = uint64(x+y) < uint64(x)
1942         fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
1943         return fcb.encode()
1944 }
1945
1946 // subFlags64 returns the flags that would be set from computing x-y.
1947 func subFlags64(x, y int64) flagConstant {
1948         var fcb flagConstantBuilder
1949         fcb.Z = x-y == 0
1950         fcb.N = x-y < 0
1951         fcb.C = uint64(y) <= uint64(x) // This code follows the arm carry flag model.
1952         fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
1953         return fcb.encode()
1954 }
1955
1956 // addFlags32 returns the flags that would be set from computing x+y.
1957 func addFlags32(x, y int32) flagConstant {
1958         var fcb flagConstantBuilder
1959         fcb.Z = x+y == 0
1960         fcb.N = x+y < 0
1961         fcb.C = uint32(x+y) < uint32(x)
1962         fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
1963         return fcb.encode()
1964 }
1965
1966 // subFlags32 returns the flags that would be set from computing x-y.
1967 func subFlags32(x, y int32) flagConstant {
1968         var fcb flagConstantBuilder
1969         fcb.Z = x-y == 0
1970         fcb.N = x-y < 0
1971         fcb.C = uint32(y) <= uint32(x) // This code follows the arm carry flag model.
1972         fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
1973         return fcb.encode()
1974 }
1975
1976 // logicFlags64 returns flags set to the sign/zeroness of x.
1977 // C and V are set to false.
1978 func logicFlags64(x int64) flagConstant {
1979         var fcb flagConstantBuilder
1980         fcb.Z = x == 0
1981         fcb.N = x < 0
1982         return fcb.encode()
1983 }
1984
1985 // logicFlags32 returns flags set to the sign/zeroness of x.
1986 // C and V are set to false.
1987 func logicFlags32(x int32) flagConstant {
1988         var fcb flagConstantBuilder
1989         fcb.Z = x == 0
1990         fcb.N = x < 0
1991         return fcb.encode()
1992 }
1993
1994 func makeJumpTableSym(b *Block) *obj.LSym {
1995         s := base.Ctxt.Lookup(fmt.Sprintf("%s.jump%d", b.Func.fe.LSym(), b.ID))
1996         s.Set(obj.AttrDuplicateOK, true)
1997         s.Set(obj.AttrLocal, true)
1998         return s
1999 }
2000
2001 // canRotate reports whether the architecture supports
2002 // rotates of integer registers with the given number of bits.
2003 func canRotate(c *Config, bits int64) bool {
2004         if bits > c.PtrSize*8 {
2005                 // Don't rewrite to rotates bigger than the machine word.
2006                 return false
2007         }
2008         switch c.arch {
2009         case "386", "amd64", "arm64":
2010                 return true
2011         case "arm", "s390x", "ppc64", "ppc64le", "wasm", "loong64":
2012                 return bits >= 32
2013         default:
2014                 return false
2015         }
2016 }
2017
2018 // isARM64bitcon reports whether a constant can be encoded into a logical instruction.
2019 func isARM64bitcon(x uint64) bool {
2020         if x == 1<<64-1 || x == 0 {
2021                 return false
2022         }
2023         // determine the period and sign-extend a unit to 64 bits
2024         switch {
2025         case x != x>>32|x<<32:
2026                 // period is 64
2027                 // nothing to do
2028         case x != x>>16|x<<48:
2029                 // period is 32
2030                 x = uint64(int64(int32(x)))
2031         case x != x>>8|x<<56:
2032                 // period is 16
2033                 x = uint64(int64(int16(x)))
2034         case x != x>>4|x<<60:
2035                 // period is 8
2036                 x = uint64(int64(int8(x)))
2037         default:
2038                 // period is 4 or 2, always true
2039                 // 0001, 0010, 0100, 1000 -- 0001 rotate
2040                 // 0011, 0110, 1100, 1001 -- 0011 rotate
2041                 // 0111, 1011, 1101, 1110 -- 0111 rotate
2042                 // 0101, 1010             -- 01   rotate, repeat
2043                 return true
2044         }
2045         return sequenceOfOnes(x) || sequenceOfOnes(^x)
2046 }
2047
2048 // sequenceOfOnes tests whether a constant is a sequence of ones in binary, with leading and trailing zeros.
2049 func sequenceOfOnes(x uint64) bool {
2050         y := x & -x // lowest set bit of x. x is good iff x+y is a power of 2
2051         y += x
2052         return (y-1)&y == 0
2053 }
2054
2055 // isARM64addcon reports whether x can be encoded as the immediate value in an ADD or SUB instruction.
2056 func isARM64addcon(v int64) bool {
2057         /* uimm12 or uimm24? */
2058         if v < 0 {
2059                 return false
2060         }
2061         if (v & 0xFFF) == 0 {
2062                 v >>= 12
2063         }
2064         return v <= 0xFFF
2065 }