src/cmd/compile/internal/ssa/rewrite.go

   1 // Copyright 2015 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package ssa
   6
   7 import (
   8         "cmd/compile/internal/logopt"
   9         "cmd/compile/internal/types"
  10         "cmd/internal/obj"
  11         "cmd/internal/obj/s390x"
  12         "cmd/internal/objabi"
  13         "cmd/internal/src"
  14         "encoding/binary"
  15         "fmt"
  16         "io"
  17         "math"
  18         "math/bits"
  19         "os"
  20         "path/filepath"
  21 )
  22
  23 type deadValueChoice bool
  24
  25 const (
  26         leaveDeadValues  deadValueChoice = false
  27         removeDeadValues                 = true
  28 )
  29
  30 // deadcode indicates that rewrite should try to remove any values that become dead.
  31 func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValueChoice) {
  32         // repeat rewrites until we find no more rewrites
  33         pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block
  34         pendingLines.clear()
  35         debug := f.pass.debug
  36         if debug > 1 {
  37                 fmt.Printf("%s: rewriting for %s\n", f.pass.name, f.Name)
  38         }
  39         for {
  40                 change := false
  41                 for _, b := range f.Blocks {
  42                         var b0 *Block
  43                         if debug > 1 {
  44                                 b0 = new(Block)
  45                                 *b0 = *b
  46                                 b0.Succs = append([]Edge{}, b.Succs...) // make a new copy, not aliasing
  47                         }
  48                         for i, c := range b.ControlValues() {
  49                                 for c.Op == OpCopy {
  50                                         c = c.Args[0]
  51                                         b.ReplaceControl(i, c)
  52                                 }
  53                         }
  54                         if rb(b) {
  55                                 change = true
  56                                 if debug > 1 {
  57                                         fmt.Printf("rewriting %s  ->  %s\n", b0.LongString(), b.LongString())
  58                                 }
  59                         }
  60                         for j, v := range b.Values {
  61                                 var v0 *Value
  62                                 if debug > 1 {
  63                                         v0 = new(Value)
  64                                         *v0 = *v
  65                                         v0.Args = append([]*Value{}, v.Args...) // make a new copy, not aliasing
  66                                 }
  67                                 if v.Uses == 0 && v.removeable() {
  68                                         if v.Op != OpInvalid && deadcode == removeDeadValues {
  69                                                 // Reset any values that are now unused, so that we decrement
  70                                                 // the use count of all of its arguments.
  71                                                 // Not quite a deadcode pass, because it does not handle cycles.
  72                                                 // But it should help Uses==1 rules to fire.
  73                                                 v.reset(OpInvalid)
  74                                                 change = true
  75                                         }
  76                                         // No point rewriting values which aren't used.
  77                                         continue
  78                                 }
  79
  80                                 vchange := phielimValue(v)
  81                                 if vchange && debug > 1 {
  82                                         fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
  83                                 }
  84
  85                                 // Eliminate copy inputs.
  86                                 // If any copy input becomes unused, mark it
  87                                 // as invalid and discard its argument. Repeat
  88                                 // recursively on the discarded argument.
  89                                 // This phase helps remove phantom "dead copy" uses
  90                                 // of a value so that a x.Uses==1 rule condition
  91                                 // fires reliably.
  92                                 for i, a := range v.Args {
  93                                         if a.Op != OpCopy {
  94                                                 continue
  95                                         }
  96                                         aa := copySource(a)
  97                                         v.SetArg(i, aa)
  98                                         // If a, a copy, has a line boundary indicator, attempt to find a new value
  99                                         // to hold it.  The first candidate is the value that will replace a (aa),
 100                                         // if it shares the same block and line and is eligible.
 101                                         // The second option is v, which has a as an input.  Because aa is earlier in
 102                                         // the data flow, it is the better choice.
 103                                         if a.Pos.IsStmt() == src.PosIsStmt {
 104                                                 if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt {
 105                                                         aa.Pos = aa.Pos.WithIsStmt()
 106                                                 } else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt {
 107                                                         v.Pos = v.Pos.WithIsStmt()
 108                                                 } else {
 109                                                         // Record the lost line and look for a new home after all rewrites are complete.
 110                                                         // TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same
 111                                                         // line to appear in more than one block, but only one block is stored, so if both end
 112                                                         // up here, then one will be lost.
 113                                                         pendingLines.set(a.Pos, int32(a.Block.ID))
 114                                                 }
 115                                                 a.Pos = a.Pos.WithNotStmt()
 116                                         }
 117                                         vchange = true
 118                                         for a.Uses == 0 {
 119                                                 b := a.Args[0]
 120                                                 a.reset(OpInvalid)
 121                                                 a = b
 122                                         }
 123                                 }
 124                                 if vchange && debug > 1 {
 125                                         fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
 126                                 }
 127
 128                                 // apply rewrite function
 129                                 if rv(v) {
 130                                         vchange = true
 131                                         // If value changed to a poor choice for a statement boundary, move the boundary
 132                                         if v.Pos.IsStmt() == src.PosIsStmt {
 133                                                 if k := nextGoodStatementIndex(v, j, b); k != j {
 134                                                         v.Pos = v.Pos.WithNotStmt()
 135                                                         b.Values[k].Pos = b.Values[k].Pos.WithIsStmt()
 136                                                 }
 137                                         }
 138                                 }
 139
 140                                 change = change || vchange
 141                                 if vchange && debug > 1 {
 142                                         fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
 143                                 }
 144                         }
 145                 }
 146                 if !change {
 147                         break
 148                 }
 149         }
 150         // remove clobbered values
 151         for _, b := range f.Blocks {
 152                 j := 0
 153                 for i, v := range b.Values {
 154                         vl := v.Pos
 155                         if v.Op == OpInvalid {
 156                                 if v.Pos.IsStmt() == src.PosIsStmt {
 157                                         pendingLines.set(vl, int32(b.ID))
 158                                 }
 159                                 f.freeValue(v)
 160                                 continue
 161                         }
 162                         if v.Pos.IsStmt() != src.PosNotStmt && pendingLines.get(vl) == int32(b.ID) {
 163                                 pendingLines.remove(vl)
 164                                 v.Pos = v.Pos.WithIsStmt()
 165                         }
 166                         if i != j {
 167                                 b.Values[j] = v
 168                         }
 169                         j++
 170                 }
 171                 if pendingLines.get(b.Pos) == int32(b.ID) {
 172                         b.Pos = b.Pos.WithIsStmt()
 173                         pendingLines.remove(b.Pos)
 174                 }
 175                 b.truncateValues(j)
 176         }
 177 }
 178
 179 // Common functions called from rewriting rules
 180
 181 func is64BitFloat(t *types.Type) bool {
 182         return t.Size() == 8 && t.IsFloat()
 183 }
 184
 185 func is32BitFloat(t *types.Type) bool {
 186         return t.Size() == 4 && t.IsFloat()
 187 }
 188
 189 func is64BitInt(t *types.Type) bool {
 190         return t.Size() == 8 && t.IsInteger()
 191 }
 192
 193 func is32BitInt(t *types.Type) bool {
 194         return t.Size() == 4 && t.IsInteger()
 195 }
 196
 197 func is16BitInt(t *types.Type) bool {
 198         return t.Size() == 2 && t.IsInteger()
 199 }
 200
 201 func is8BitInt(t *types.Type) bool {
 202         return t.Size() == 1 && t.IsInteger()
 203 }
 204
 205 func isPtr(t *types.Type) bool {
 206         return t.IsPtrShaped()
 207 }
 208
 209 func isSigned(t *types.Type) bool {
 210         return t.IsSigned()
 211 }
 212
 213 // mergeSym merges two symbolic offsets. There is no real merging of
 214 // offsets, we just pick the non-nil one.
 215 func mergeSym(x, y interface{}) interface{} {
 216         if x == nil {
 217                 return y
 218         }
 219         if y == nil {
 220                 return x
 221         }
 222         panic(fmt.Sprintf("mergeSym with two non-nil syms %s %s", x, y))
 223 }
 224
 225 func canMergeSym(x, y interface{}) bool {
 226         return x == nil || y == nil
 227 }
 228
 229 func mergeSymTyped(x, y Sym) Sym {
 230         if x == nil {
 231                 return y
 232         }
 233         if y == nil {
 234                 return x
 235         }
 236         panic(fmt.Sprintf("mergeSym with two non-nil syms %v %v", x, y))
 237 }
 238
 239 // canMergeLoadClobber reports whether the load can be merged into target without
 240 // invalidating the schedule.
 241 // It also checks that the other non-load argument x is something we
 242 // are ok with clobbering.
 243 func canMergeLoadClobber(target, load, x *Value) bool {
 244         // The register containing x is going to get clobbered.
 245         // Don't merge if we still need the value of x.
 246         // We don't have liveness information here, but we can
 247         // approximate x dying with:
 248         //  1) target is x's only use.
 249         //  2) target is not in a deeper loop than x.
 250         if x.Uses != 1 {
 251                 return false
 252         }
 253         loopnest := x.Block.Func.loopnest()
 254         loopnest.calculateDepths()
 255         if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
 256                 return false
 257         }
 258         return canMergeLoad(target, load)
 259 }
 260
 261 // canMergeLoad reports whether the load can be merged into target without
 262 // invalidating the schedule.
 263 func canMergeLoad(target, load *Value) bool {
 264         if target.Block.ID != load.Block.ID {
 265                 // If the load is in a different block do not merge it.
 266                 return false
 267         }
 268
 269         // We can't merge the load into the target if the load
 270         // has more than one use.
 271         if load.Uses != 1 {
 272                 return false
 273         }
 274
 275         mem := load.MemoryArg()
 276
 277         // We need the load's memory arg to still be alive at target. That
 278         // can't be the case if one of target's args depends on a memory
 279         // state that is a successor of load's memory arg.
 280         //
 281         // For example, it would be invalid to merge load into target in
 282         // the following situation because newmem has killed oldmem
 283         // before target is reached:
 284         //     load = read ... oldmem
 285         //   newmem = write ... oldmem
 286         //     arg0 = read ... newmem
 287         //   target = add arg0 load
 288         //
 289         // If the argument comes from a different block then we can exclude
 290         // it immediately because it must dominate load (which is in the
 291         // same block as target).
 292         var args []*Value
 293         for _, a := range target.Args {
 294                 if a != load && a.Block.ID == target.Block.ID {
 295                         args = append(args, a)
 296                 }
 297         }
 298
 299         // memPreds contains memory states known to be predecessors of load's
 300         // memory state. It is lazily initialized.
 301         var memPreds map[*Value]bool
 302         for i := 0; len(args) > 0; i++ {
 303                 const limit = 100
 304                 if i >= limit {
 305                         // Give up if we have done a lot of iterations.
 306                         return false
 307                 }
 308                 v := args[len(args)-1]
 309                 args = args[:len(args)-1]
 310                 if target.Block.ID != v.Block.ID {
 311                         // Since target and load are in the same block
 312                         // we can stop searching when we leave the block.
 313                         continue
 314                 }
 315                 if v.Op == OpPhi {
 316                         // A Phi implies we have reached the top of the block.
 317                         // The memory phi, if it exists, is always
 318                         // the first logical store in the block.
 319                         continue
 320                 }
 321                 if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
 322                         // We could handle this situation however it is likely
 323                         // to be very rare.
 324                         return false
 325                 }
 326                 if v.Op.SymEffect()&SymAddr != 0 {
 327                         // This case prevents an operation that calculates the
 328                         // address of a local variable from being forced to schedule
 329                         // before its corresponding VarDef.
 330                         // See issue 28445.
 331                         //   v1 = LOAD ...
 332                         //   v2 = VARDEF
 333                         //   v3 = LEAQ
 334                         //   v4 = CMPQ v1 v3
 335                         // We don't want to combine the CMPQ with the load, because
 336                         // that would force the CMPQ to schedule before the VARDEF, which
 337                         // in turn requires the LEAQ to schedule before the VARDEF.
 338                         return false
 339                 }
 340                 if v.Type.IsMemory() {
 341                         if memPreds == nil {
 342                                 // Initialise a map containing memory states
 343                                 // known to be predecessors of load's memory
 344                                 // state.
 345                                 memPreds = make(map[*Value]bool)
 346                                 m := mem
 347                                 const limit = 50
 348                                 for i := 0; i < limit; i++ {
 349                                         if m.Op == OpPhi {
 350                                                 // The memory phi, if it exists, is always
 351                                                 // the first logical store in the block.
 352                                                 break
 353                                         }
 354                                         if m.Block.ID != target.Block.ID {
 355                                                 break
 356                                         }
 357                                         if !m.Type.IsMemory() {
 358                                                 break
 359                                         }
 360                                         memPreds[m] = true
 361                                         if len(m.Args) == 0 {
 362                                                 break
 363                                         }
 364                                         m = m.MemoryArg()
 365                                 }
 366                         }
 367
 368                         // We can merge if v is a predecessor of mem.
 369                         //
 370                         // For example, we can merge load into target in the
 371                         // following scenario:
 372                         //      x = read ... v
 373                         //    mem = write ... v
 374                         //   load = read ... mem
 375                         // target = add x load
 376                         if memPreds[v] {
 377                                 continue
 378                         }
 379                         return false
 380                 }
 381                 if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
 382                         // If v takes mem as an input then we know mem
 383                         // is valid at this point.
 384                         continue
 385                 }
 386                 for _, a := range v.Args {
 387                         if target.Block.ID == a.Block.ID {
 388                                 args = append(args, a)
 389                         }
 390                 }
 391         }
 392
 393         return true
 394 }
 395
 396 // isSameCall reports whether sym is the same as the given named symbol
 397 func isSameCall(sym interface{}, name string) bool {
 398         fn := sym.(*AuxCall).Fn
 399         return fn != nil && fn.String() == name
 400 }
 401
 402 // nlz returns the number of leading zeros.
 403 func nlz64(x int64) int { return bits.LeadingZeros64(uint64(x)) }
 404 func nlz32(x int32) int { return bits.LeadingZeros32(uint32(x)) }
 405 func nlz16(x int16) int { return bits.LeadingZeros16(uint16(x)) }
 406 func nlz8(x int8) int   { return bits.LeadingZeros8(uint8(x)) }
 407
 408 // ntzX returns the number of trailing zeros.
 409 func ntz64(x int64) int { return bits.TrailingZeros64(uint64(x)) }
 410 func ntz32(x int32) int { return bits.TrailingZeros32(uint32(x)) }
 411 func ntz16(x int16) int { return bits.TrailingZeros16(uint16(x)) }
 412 func ntz8(x int8) int   { return bits.TrailingZeros8(uint8(x)) }
 413
 414 func oneBit(x int64) bool   { return x&(x-1) == 0 && x != 0 }
 415 func oneBit8(x int8) bool   { return x&(x-1) == 0 && x != 0 }
 416 func oneBit16(x int16) bool { return x&(x-1) == 0 && x != 0 }
 417 func oneBit32(x int32) bool { return x&(x-1) == 0 && x != 0 }
 418 func oneBit64(x int64) bool { return x&(x-1) == 0 && x != 0 }
 419
 420 // nto returns the number of trailing ones.
 421 func nto(x int64) int64 {
 422         return int64(ntz64(^x))
 423 }
 424
 425 // log2 returns logarithm in base 2 of uint64(n), with log2(0) = -1.
 426 // Rounds down.
 427 func log2(n int64) int64 {
 428         return int64(bits.Len64(uint64(n))) - 1
 429 }
 430
 431 // logX returns logarithm of n base 2.
 432 // n must be a positive power of 2 (isPowerOfTwoX returns true).
 433 func log8(n int8) int64 {
 434         return int64(bits.Len8(uint8(n))) - 1
 435 }
 436 func log16(n int16) int64 {
 437         return int64(bits.Len16(uint16(n))) - 1
 438 }
 439 func log32(n int32) int64 {
 440         return int64(bits.Len32(uint32(n))) - 1
 441 }
 442 func log64(n int64) int64 {
 443         return int64(bits.Len64(uint64(n))) - 1
 444 }
 445
 446 // log2uint32 returns logarithm in base 2 of uint32(n), with log2(0) = -1.
 447 // Rounds down.
 448 func log2uint32(n int64) int64 {
 449         return int64(bits.Len32(uint32(n))) - 1
 450 }
 451
 452 // isPowerOfTwo reports whether n is a power of 2.
 453 func isPowerOfTwo(n int64) bool {
 454         return n > 0 && n&(n-1) == 0
 455 }
 456 func isPowerOfTwo8(n int8) bool {
 457         return n > 0 && n&(n-1) == 0
 458 }
 459 func isPowerOfTwo16(n int16) bool {
 460         return n > 0 && n&(n-1) == 0
 461 }
 462 func isPowerOfTwo32(n int32) bool {
 463         return n > 0 && n&(n-1) == 0
 464 }
 465 func isPowerOfTwo64(n int64) bool {
 466         return n > 0 && n&(n-1) == 0
 467 }
 468
 469 // isUint64PowerOfTwo reports whether uint64(n) is a power of 2.
 470 func isUint64PowerOfTwo(in int64) bool {
 471         n := uint64(in)
 472         return n > 0 && n&(n-1) == 0
 473 }
 474
 475 // isUint32PowerOfTwo reports whether uint32(n) is a power of 2.
 476 func isUint32PowerOfTwo(in int64) bool {
 477         n := uint64(uint32(in))
 478         return n > 0 && n&(n-1) == 0
 479 }
 480
 481 // is32Bit reports whether n can be represented as a signed 32 bit integer.
 482 func is32Bit(n int64) bool {
 483         return n == int64(int32(n))
 484 }
 485
 486 // is16Bit reports whether n can be represented as a signed 16 bit integer.
 487 func is16Bit(n int64) bool {
 488         return n == int64(int16(n))
 489 }
 490
 491 // is8Bit reports whether n can be represented as a signed 8 bit integer.
 492 func is8Bit(n int64) bool {
 493         return n == int64(int8(n))
 494 }
 495
 496 // isU8Bit reports whether n can be represented as an unsigned 8 bit integer.
 497 func isU8Bit(n int64) bool {
 498         return n == int64(uint8(n))
 499 }
 500
 501 // isU12Bit reports whether n can be represented as an unsigned 12 bit integer.
 502 func isU12Bit(n int64) bool {
 503         return 0 <= n && n < (1<<12)
 504 }
 505
 506 // isU16Bit reports whether n can be represented as an unsigned 16 bit integer.
 507 func isU16Bit(n int64) bool {
 508         return n == int64(uint16(n))
 509 }
 510
 511 // isU32Bit reports whether n can be represented as an unsigned 32 bit integer.
 512 func isU32Bit(n int64) bool {
 513         return n == int64(uint32(n))
 514 }
 515
 516 // is20Bit reports whether n can be represented as a signed 20 bit integer.
 517 func is20Bit(n int64) bool {
 518         return -(1<<19) <= n && n < (1<<19)
 519 }
 520
 521 // b2i translates a boolean value to 0 or 1 for assigning to auxInt.
 522 func b2i(b bool) int64 {
 523         if b {
 524                 return 1
 525         }
 526         return 0
 527 }
 528
 529 // b2i32 translates a boolean value to 0 or 1.
 530 func b2i32(b bool) int32 {
 531         if b {
 532                 return 1
 533         }
 534         return 0
 535 }
 536
 537 // shiftIsBounded reports whether (left/right) shift Value v is known to be bounded.
 538 // A shift is bounded if it is shifting by less than the width of the shifted value.
 539 func shiftIsBounded(v *Value) bool {
 540         return v.AuxInt != 0
 541 }
 542
 543 // truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern
 544 // of the mantissa. It will panic if the truncation results in lost information.
 545 func truncate64Fto32F(f float64) float32 {
 546         if !isExactFloat32(f) {
 547                 panic("truncate64Fto32F: truncation is not exact")
 548         }
 549         if !math.IsNaN(f) {
 550                 return float32(f)
 551         }
 552         // NaN bit patterns aren't necessarily preserved across conversion
 553         // instructions so we need to do the conversion manually.
 554         b := math.Float64bits(f)
 555         m := b & ((1 << 52) - 1) // mantissa (a.k.a. significand)
 556         //          | sign                  | exponent   | mantissa       |
 557         r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23)))
 558         return math.Float32frombits(r)
 559 }
 560
 561 // extend32Fto64F converts a float32 value to a float64 value preserving the bit
 562 // pattern of the mantissa.
 563 func extend32Fto64F(f float32) float64 {
 564         if !math.IsNaN(float64(f)) {
 565                 return float64(f)
 566         }
 567         // NaN bit patterns aren't necessarily preserved across conversion
 568         // instructions so we need to do the conversion manually.
 569         b := uint64(math.Float32bits(f))
 570         //   | sign                  | exponent      | mantissa                    |
 571         r := ((b << 32) & (1 << 63)) | (0x7ff << 52) | ((b & 0x7fffff) << (52 - 23))
 572         return math.Float64frombits(r)
 573 }
 574
 575 // DivisionNeedsFixUp reports whether the division needs fix-up code.
 576 func DivisionNeedsFixUp(v *Value) bool {
 577         return v.AuxInt == 0
 578 }
 579
 580 // auxFrom64F encodes a float64 value so it can be stored in an AuxInt.
 581 func auxFrom64F(f float64) int64 {
 582         if f != f {
 583                 panic("can't encode a NaN in AuxInt field")
 584         }
 585         return int64(math.Float64bits(f))
 586 }
 587
 588 // auxFrom32F encodes a float32 value so it can be stored in an AuxInt.
 589 func auxFrom32F(f float32) int64 {
 590         if f != f {
 591                 panic("can't encode a NaN in AuxInt field")
 592         }
 593         return int64(math.Float64bits(extend32Fto64F(f)))
 594 }
 595
 596 // auxTo32F decodes a float32 from the AuxInt value provided.
 597 func auxTo32F(i int64) float32 {
 598         return truncate64Fto32F(math.Float64frombits(uint64(i)))
 599 }
 600
 601 // auxTo64F decodes a float64 from the AuxInt value provided.
 602 func auxTo64F(i int64) float64 {
 603         return math.Float64frombits(uint64(i))
 604 }
 605
 606 func auxIntToBool(i int64) bool {
 607         if i == 0 {
 608                 return false
 609         }
 610         return true
 611 }
 612 func auxIntToInt8(i int64) int8 {
 613         return int8(i)
 614 }
 615 func auxIntToInt16(i int64) int16 {
 616         return int16(i)
 617 }
 618 func auxIntToInt32(i int64) int32 {
 619         return int32(i)
 620 }
 621 func auxIntToInt64(i int64) int64 {
 622         return i
 623 }
 624 func auxIntToUint8(i int64) uint8 {
 625         return uint8(i)
 626 }
 627 func auxIntToFloat32(i int64) float32 {
 628         return float32(math.Float64frombits(uint64(i)))
 629 }
 630 func auxIntToFloat64(i int64) float64 {
 631         return math.Float64frombits(uint64(i))
 632 }
 633 func auxIntToValAndOff(i int64) ValAndOff {
 634         return ValAndOff(i)
 635 }
 636 func auxIntToArm64BitField(i int64) arm64BitField {
 637         return arm64BitField(i)
 638 }
 639 func auxIntToInt128(x int64) int128 {
 640         if x != 0 {
 641                 panic("nonzero int128 not allowed")
 642         }
 643         return 0
 644 }
 645 func auxIntToFlagConstant(x int64) flagConstant {
 646         return flagConstant(x)
 647 }
 648
 649 func auxIntToOp(cc int64) Op {
 650         return Op(cc)
 651 }
 652
 653 func boolToAuxInt(b bool) int64 {
 654         if b {
 655                 return 1
 656         }
 657         return 0
 658 }
 659 func int8ToAuxInt(i int8) int64 {
 660         return int64(i)
 661 }
 662 func int16ToAuxInt(i int16) int64 {
 663         return int64(i)
 664 }
 665 func int32ToAuxInt(i int32) int64 {
 666         return int64(i)
 667 }
 668 func int64ToAuxInt(i int64) int64 {
 669         return int64(i)
 670 }
 671 func uint8ToAuxInt(i uint8) int64 {
 672         return int64(int8(i))
 673 }
 674 func float32ToAuxInt(f float32) int64 {
 675         return int64(math.Float64bits(float64(f)))
 676 }
 677 func float64ToAuxInt(f float64) int64 {
 678         return int64(math.Float64bits(f))
 679 }
 680 func valAndOffToAuxInt(v ValAndOff) int64 {
 681         return int64(v)
 682 }
 683 func arm64BitFieldToAuxInt(v arm64BitField) int64 {
 684         return int64(v)
 685 }
 686 func int128ToAuxInt(x int128) int64 {
 687         if x != 0 {
 688                 panic("nonzero int128 not allowed")
 689         }
 690         return 0
 691 }
 692 func flagConstantToAuxInt(x flagConstant) int64 {
 693         return int64(x)
 694 }
 695
 696 func opToAuxInt(o Op) int64 {
 697         return int64(o)
 698 }
 699
 700 func auxToString(i interface{}) string {
 701         return i.(string)
 702 }
 703 func auxToSym(i interface{}) Sym {
 704         // TODO: kind of a hack - allows nil interface through
 705         s, _ := i.(Sym)
 706         return s
 707 }
 708 func auxToType(i interface{}) *types.Type {
 709         return i.(*types.Type)
 710 }
 711 func auxToCall(i interface{}) *AuxCall {
 712         return i.(*AuxCall)
 713 }
 714 func auxToS390xCCMask(i interface{}) s390x.CCMask {
 715         return i.(s390x.CCMask)
 716 }
 717 func auxToS390xRotateParams(i interface{}) s390x.RotateParams {
 718         return i.(s390x.RotateParams)
 719 }
 720
 721 func stringToAux(s string) interface{} {
 722         return s
 723 }
 724 func symToAux(s Sym) interface{} {
 725         return s
 726 }
 727 func callToAux(s *AuxCall) interface{} {
 728         return s
 729 }
 730 func typeToAux(t *types.Type) interface{} {
 731         return t
 732 }
 733 func s390xCCMaskToAux(c s390x.CCMask) interface{} {
 734         return c
 735 }
 736 func s390xRotateParamsToAux(r s390x.RotateParams) interface{} {
 737         return r
 738 }
 739
 740 // uaddOvf reports whether unsigned a+b would overflow.
 741 func uaddOvf(a, b int64) bool {
 742         return uint64(a)+uint64(b) < uint64(a)
 743 }
 744
 745 // de-virtualize an InterCall
 746 // 'sym' is the symbol for the itab
 747 func devirt(v *Value, aux interface{}, sym Sym, offset int64) *AuxCall {
 748         f := v.Block.Func
 749         n, ok := sym.(*obj.LSym)
 750         if !ok {
 751                 return nil
 752         }
 753         lsym := f.fe.DerefItab(n, offset)
 754         if f.pass.debug > 0 {
 755                 if lsym != nil {
 756                         f.Warnl(v.Pos, "de-virtualizing call")
 757                 } else {
 758                         f.Warnl(v.Pos, "couldn't de-virtualize call")
 759                 }
 760         }
 761         if lsym == nil {
 762                 return nil
 763         }
 764         va := aux.(*AuxCall)
 765         return StaticAuxCall(lsym, va.args, va.results)
 766 }
 767
 768 // de-virtualize an InterLECall
 769 // 'sym' is the symbol for the itab
 770 func devirtLESym(v *Value, aux interface{}, sym Sym, offset int64) *obj.LSym {
 771         n, ok := sym.(*obj.LSym)
 772         if !ok {
 773                 return nil
 774         }
 775
 776         f := v.Block.Func
 777         lsym := f.fe.DerefItab(n, offset)
 778         if f.pass.debug > 0 {
 779                 if lsym != nil {
 780                         f.Warnl(v.Pos, "de-virtualizing call")
 781                 } else {
 782                         f.Warnl(v.Pos, "couldn't de-virtualize call")
 783                 }
 784         }
 785         if lsym == nil {
 786                 return nil
 787         }
 788         return lsym
 789 }
 790
 791 func devirtLECall(v *Value, sym *obj.LSym) *Value {
 792         v.Op = OpStaticLECall
 793         v.Aux.(*AuxCall).Fn = sym
 794         v.RemoveArg(0)
 795         return v
 796 }
 797
 798 // isSamePtr reports whether p1 and p2 point to the same address.
 799 func isSamePtr(p1, p2 *Value) bool {
 800         if p1 == p2 {
 801                 return true
 802         }
 803         if p1.Op != p2.Op {
 804                 return false
 805         }
 806         switch p1.Op {
 807         case OpOffPtr:
 808                 return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
 809         case OpAddr, OpLocalAddr:
 810                 // OpAddr's 0th arg is either OpSP or OpSB, which means that it is uniquely identified by its Op.
 811                 // Checking for value equality only works after [z]cse has run.
 812                 return p1.Aux == p2.Aux && p1.Args[0].Op == p2.Args[0].Op
 813         case OpAddPtr:
 814                 return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
 815         }
 816         return false
 817 }
 818
 819 func isStackPtr(v *Value) bool {
 820         for v.Op == OpOffPtr || v.Op == OpAddPtr {
 821                 v = v.Args[0]
 822         }
 823         return v.Op == OpSP || v.Op == OpLocalAddr
 824 }
 825
 826 // disjoint reports whether the memory region specified by [p1:p1+n1)
 827 // does not overlap with [p2:p2+n2).
 828 // A return value of false does not imply the regions overlap.
 829 func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool {
 830         if n1 == 0 || n2 == 0 {
 831                 return true
 832         }
 833         if p1 == p2 {
 834                 return false
 835         }
 836         baseAndOffset := func(ptr *Value) (base *Value, offset int64) {
 837                 base, offset = ptr, 0
 838                 for base.Op == OpOffPtr {
 839                         offset += base.AuxInt
 840                         base = base.Args[0]
 841                 }
 842                 return base, offset
 843         }
 844         p1, off1 := baseAndOffset(p1)
 845         p2, off2 := baseAndOffset(p2)
 846         if isSamePtr(p1, p2) {
 847                 return !overlap(off1, n1, off2, n2)
 848         }
 849         // p1 and p2 are not the same, so if they are both OpAddrs then
 850         // they point to different variables.
 851         // If one pointer is on the stack and the other is an argument
 852         // then they can't overlap.
 853         switch p1.Op {
 854         case OpAddr, OpLocalAddr:
 855                 if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP {
 856                         return true
 857                 }
 858                 return p2.Op == OpArg && p1.Args[0].Op == OpSP
 859         case OpArg:
 860                 if p2.Op == OpSP || p2.Op == OpLocalAddr {
 861                         return true
 862                 }
 863         case OpSP:
 864                 return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpSP
 865         }
 866         return false
 867 }
 868
 869 // moveSize returns the number of bytes an aligned MOV instruction moves
 870 func moveSize(align int64, c *Config) int64 {
 871         switch {
 872         case align%8 == 0 && c.PtrSize == 8:
 873                 return 8
 874         case align%4 == 0:
 875                 return 4
 876         case align%2 == 0:
 877                 return 2
 878         }
 879         return 1
 880 }
 881
 882 // mergePoint finds a block among a's blocks which dominates b and is itself
 883 // dominated by all of a's blocks. Returns nil if it can't find one.
 884 // Might return nil even if one does exist.
 885 func mergePoint(b *Block, a ...*Value) *Block {
 886         // Walk backward from b looking for one of the a's blocks.
 887
 888         // Max distance
 889         d := 100
 890
 891         for d > 0 {
 892                 for _, x := range a {
 893                         if b == x.Block {
 894                                 goto found
 895                         }
 896                 }
 897                 if len(b.Preds) > 1 {
 898                         // Don't know which way to go back. Abort.
 899                         return nil
 900                 }
 901                 b = b.Preds[0].b
 902                 d--
 903         }
 904         return nil // too far away
 905 found:
 906         // At this point, r is the first value in a that we find by walking backwards.
 907         // if we return anything, r will be it.
 908         r := b
 909
 910         // Keep going, counting the other a's that we find. They must all dominate r.
 911         na := 0
 912         for d > 0 {
 913                 for _, x := range a {
 914                         if b == x.Block {
 915                                 na++
 916                         }
 917                 }
 918                 if na == len(a) {
 919                         // Found all of a in a backwards walk. We can return r.
 920                         return r
 921                 }
 922                 if len(b.Preds) > 1 {
 923                         return nil
 924                 }
 925                 b = b.Preds[0].b
 926                 d--
 927
 928         }
 929         return nil // too far away
 930 }
 931
 932 // clobber invalidates values. Returns true.
 933 // clobber is used by rewrite rules to:
 934 //   A) make sure the values are really dead and never used again.
 935 //   B) decrement use counts of the values' args.
 936 func clobber(vv ...*Value) bool {
 937         for _, v := range vv {
 938                 v.reset(OpInvalid)
 939                 // Note: leave v.Block intact.  The Block field is used after clobber.
 940         }
 941         return true
 942 }
 943
 944 // clobberIfDead resets v when use count is 1. Returns true.
 945 // clobberIfDead is used by rewrite rules to decrement
 946 // use counts of v's args when v is dead and never used.
 947 func clobberIfDead(v *Value) bool {
 948         if v.Uses == 1 {
 949                 v.reset(OpInvalid)
 950         }
 951         // Note: leave v.Block intact.  The Block field is used after clobberIfDead.
 952         return true
 953 }
 954
 955 // noteRule is an easy way to track if a rule is matched when writing
 956 // new ones.  Make the rule of interest also conditional on
 957 //     noteRule("note to self: rule of interest matched")
 958 // and that message will print when the rule matches.
 959 func noteRule(s string) bool {
 960         fmt.Println(s)
 961         return true
 962 }
 963
 964 // countRule increments Func.ruleMatches[key].
 965 // If Func.ruleMatches is non-nil at the end
 966 // of compilation, it will be printed to stdout.
 967 // This is intended to make it easier to find which functions
 968 // which contain lots of rules matches when developing new rules.
 969 func countRule(v *Value, key string) bool {
 970         f := v.Block.Func
 971         if f.ruleMatches == nil {
 972                 f.ruleMatches = make(map[string]int)
 973         }
 974         f.ruleMatches[key]++
 975         return true
 976 }
 977
 978 // warnRule generates compiler debug output with string s when
 979 // v is not in autogenerated code, cond is true and the rule has fired.
 980 func warnRule(cond bool, v *Value, s string) bool {
 981         if pos := v.Pos; pos.Line() > 1 && cond {
 982                 v.Block.Func.Warnl(pos, s)
 983         }
 984         return true
 985 }
 986
 987 // for a pseudo-op like (LessThan x), extract x
 988 func flagArg(v *Value) *Value {
 989         if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() {
 990                 return nil
 991         }
 992         return v.Args[0]
 993 }
 994
 995 // arm64Negate finds the complement to an ARM64 condition code,
 996 // for example Equal -> NotEqual or LessThan -> GreaterEqual
 997 //
 998 // TODO: add floating-point conditions
 999 func arm64Negate(op Op) Op {
1000         switch op {
1001         case OpARM64LessThan:
1002                 return OpARM64GreaterEqual
1003         case OpARM64LessThanU:
1004                 return OpARM64GreaterEqualU
1005         case OpARM64GreaterThan:
1006                 return OpARM64LessEqual
1007         case OpARM64GreaterThanU:
1008                 return OpARM64LessEqualU
1009         case OpARM64LessEqual:
1010                 return OpARM64GreaterThan
1011         case OpARM64LessEqualU:
1012                 return OpARM64GreaterThanU
1013         case OpARM64GreaterEqual:
1014                 return OpARM64LessThan
1015         case OpARM64GreaterEqualU:
1016                 return OpARM64LessThanU
1017         case OpARM64Equal:
1018                 return OpARM64NotEqual
1019         case OpARM64NotEqual:
1020                 return OpARM64Equal
1021         case OpARM64LessThanF:
1022                 return OpARM64GreaterEqualF
1023         case OpARM64GreaterThanF:
1024                 return OpARM64LessEqualF
1025         case OpARM64LessEqualF:
1026                 return OpARM64GreaterThanF
1027         case OpARM64GreaterEqualF:
1028                 return OpARM64LessThanF
1029         default:
1030                 panic("unreachable")
1031         }
1032 }
1033
1034 // arm64Invert evaluates (InvertFlags op), which
1035 // is the same as altering the condition codes such
1036 // that the same result would be produced if the arguments
1037 // to the flag-generating instruction were reversed, e.g.
1038 // (InvertFlags (CMP x y)) -> (CMP y x)
1039 //
1040 // TODO: add floating-point conditions
1041 func arm64Invert(op Op) Op {
1042         switch op {
1043         case OpARM64LessThan:
1044                 return OpARM64GreaterThan
1045         case OpARM64LessThanU:
1046                 return OpARM64GreaterThanU
1047         case OpARM64GreaterThan:
1048                 return OpARM64LessThan
1049         case OpARM64GreaterThanU:
1050                 return OpARM64LessThanU
1051         case OpARM64LessEqual:
1052                 return OpARM64GreaterEqual
1053         case OpARM64LessEqualU:
1054                 return OpARM64GreaterEqualU
1055         case OpARM64GreaterEqual:
1056                 return OpARM64LessEqual
1057         case OpARM64GreaterEqualU:
1058                 return OpARM64LessEqualU
1059         case OpARM64Equal, OpARM64NotEqual:
1060                 return op
1061         case OpARM64LessThanF:
1062                 return OpARM64GreaterThanF
1063         case OpARM64GreaterThanF:
1064                 return OpARM64LessThanF
1065         case OpARM64LessEqualF:
1066                 return OpARM64GreaterEqualF
1067         case OpARM64GreaterEqualF:
1068                 return OpARM64LessEqualF
1069         default:
1070                 panic("unreachable")
1071         }
1072 }
1073
1074 // evaluate an ARM64 op against a flags value
1075 // that is potentially constant; return 1 for true,
1076 // -1 for false, and 0 for not constant.
1077 func ccARM64Eval(op Op, flags *Value) int {
1078         fop := flags.Op
1079         if fop == OpARM64InvertFlags {
1080                 return -ccARM64Eval(op, flags.Args[0])
1081         }
1082         if fop != OpARM64FlagConstant {
1083                 return 0
1084         }
1085         fc := flagConstant(flags.AuxInt)
1086         b2i := func(b bool) int {
1087                 if b {
1088                         return 1
1089                 }
1090                 return -1
1091         }
1092         switch op {
1093         case OpARM64Equal:
1094                 return b2i(fc.eq())
1095         case OpARM64NotEqual:
1096                 return b2i(fc.ne())
1097         case OpARM64LessThan:
1098                 return b2i(fc.lt())
1099         case OpARM64LessThanU:
1100                 return b2i(fc.ult())
1101         case OpARM64GreaterThan:
1102                 return b2i(fc.gt())
1103         case OpARM64GreaterThanU:
1104                 return b2i(fc.ugt())
1105         case OpARM64LessEqual:
1106                 return b2i(fc.le())
1107         case OpARM64LessEqualU:
1108                 return b2i(fc.ule())
1109         case OpARM64GreaterEqual:
1110                 return b2i(fc.ge())
1111         case OpARM64GreaterEqualU:
1112                 return b2i(fc.uge())
1113         }
1114         return 0
1115 }
1116
1117 // logRule logs the use of the rule s. This will only be enabled if
1118 // rewrite rules were generated with the -log option, see gen/rulegen.go.
1119 func logRule(s string) {
1120         if ruleFile == nil {
1121                 // Open a log file to write log to. We open in append
1122                 // mode because all.bash runs the compiler lots of times,
1123                 // and we want the concatenation of all of those logs.
1124                 // This means, of course, that users need to rm the old log
1125                 // to get fresh data.
1126                 // TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow?
1127                 w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"),
1128                         os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
1129                 if err != nil {
1130                         panic(err)
1131                 }
1132                 ruleFile = w
1133         }
1134         _, err := fmt.Fprintln(ruleFile, s)
1135         if err != nil {
1136                 panic(err)
1137         }
1138 }
1139
1140 var ruleFile io.Writer
1141
1142 func min(x, y int64) int64 {
1143         if x < y {
1144                 return x
1145         }
1146         return y
1147 }
1148
1149 func isConstZero(v *Value) bool {
1150         switch v.Op {
1151         case OpConstNil:
1152                 return true
1153         case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F:
1154                 return v.AuxInt == 0
1155         }
1156         return false
1157 }
1158
1159 // reciprocalExact64 reports whether 1/c is exactly representable.
1160 func reciprocalExact64(c float64) bool {
1161         b := math.Float64bits(c)
1162         man := b & (1<<52 - 1)
1163         if man != 0 {
1164                 return false // not a power of 2, denormal, or NaN
1165         }
1166         exp := b >> 52 & (1<<11 - 1)
1167         // exponent bias is 0x3ff.  So taking the reciprocal of a number
1168         // changes the exponent to 0x7fe-exp.
1169         switch exp {
1170         case 0:
1171                 return false // ±0
1172         case 0x7ff:
1173                 return false // ±inf
1174         case 0x7fe:
1175                 return false // exponent is not representable
1176         default:
1177                 return true
1178         }
1179 }
1180
1181 // reciprocalExact32 reports whether 1/c is exactly representable.
1182 func reciprocalExact32(c float32) bool {
1183         b := math.Float32bits(c)
1184         man := b & (1<<23 - 1)
1185         if man != 0 {
1186                 return false // not a power of 2, denormal, or NaN
1187         }
1188         exp := b >> 23 & (1<<8 - 1)
1189         // exponent bias is 0x7f.  So taking the reciprocal of a number
1190         // changes the exponent to 0xfe-exp.
1191         switch exp {
1192         case 0:
1193                 return false // ±0
1194         case 0xff:
1195                 return false // ±inf
1196         case 0xfe:
1197                 return false // exponent is not representable
1198         default:
1199                 return true
1200         }
1201 }
1202
1203 // check if an immediate can be directly encoded into an ARM's instruction
1204 func isARMImmRot(v uint32) bool {
1205         for i := 0; i < 16; i++ {
1206                 if v&^0xff == 0 {
1207                         return true
1208                 }
1209                 v = v<<2 | v>>30
1210         }
1211
1212         return false
1213 }
1214
1215 // overlap reports whether the ranges given by the given offset and
1216 // size pairs overlap.
1217 func overlap(offset1, size1, offset2, size2 int64) bool {
1218         if offset1 >= offset2 && offset2+size2 > offset1 {
1219                 return true
1220         }
1221         if offset2 >= offset1 && offset1+size1 > offset2 {
1222                 return true
1223         }
1224         return false
1225 }
1226
1227 func areAdjacentOffsets(off1, off2, size int64) bool {
1228         return off1+size == off2 || off1 == off2+size
1229 }
1230
1231 // check if value zeroes out upper 32-bit of 64-bit register.
1232 // depth limits recursion depth. In AMD64.rules 3 is used as limit,
1233 // because it catches same amount of cases as 4.
1234 func zeroUpper32Bits(x *Value, depth int) bool {
1235         switch x.Op {
1236         case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1,
1237                 OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1,
1238                 OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload,
1239                 OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL,
1240                 OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst,
1241                 OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst,
1242                 OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL,
1243                 OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst,
1244                 OpAMD64SHLL, OpAMD64SHLLconst:
1245                 return true
1246         case OpArg:
1247                 return x.Type.Width == 4
1248         case OpPhi, OpSelect0, OpSelect1:
1249                 // Phis can use each-other as an arguments, instead of tracking visited values,
1250                 // just limit recursion depth.
1251                 if depth <= 0 {
1252                         return false
1253                 }
1254                 for i := range x.Args {
1255                         if !zeroUpper32Bits(x.Args[i], depth-1) {
1256                                 return false
1257                         }
1258                 }
1259                 return true
1260
1261         }
1262         return false
1263 }
1264
1265 // zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits
1266 func zeroUpper48Bits(x *Value, depth int) bool {
1267         switch x.Op {
1268         case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2:
1269                 return true
1270         case OpArg:
1271                 return x.Type.Width == 2
1272         case OpPhi, OpSelect0, OpSelect1:
1273                 // Phis can use each-other as an arguments, instead of tracking visited values,
1274                 // just limit recursion depth.
1275                 if depth <= 0 {
1276                         return false
1277                 }
1278                 for i := range x.Args {
1279                         if !zeroUpper48Bits(x.Args[i], depth-1) {
1280                                 return false
1281                         }
1282                 }
1283                 return true
1284
1285         }
1286         return false
1287 }
1288
1289 // zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits
1290 func zeroUpper56Bits(x *Value, depth int) bool {
1291         switch x.Op {
1292         case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1:
1293                 return true
1294         case OpArg:
1295                 return x.Type.Width == 1
1296         case OpPhi, OpSelect0, OpSelect1:
1297                 // Phis can use each-other as an arguments, instead of tracking visited values,
1298                 // just limit recursion depth.
1299                 if depth <= 0 {
1300                         return false
1301                 }
1302                 for i := range x.Args {
1303                         if !zeroUpper56Bits(x.Args[i], depth-1) {
1304                                 return false
1305                         }
1306                 }
1307                 return true
1308
1309         }
1310         return false
1311 }
1312
1313 // isInlinableMemmove reports whether the given arch performs a Move of the given size
1314 // faster than memmove. It will only return true if replacing the memmove with a Move is
1315 // safe, either because Move is small or because the arguments are disjoint.
1316 // This is used as a check for replacing memmove with Move ops.
1317 func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
1318         // It is always safe to convert memmove into Move when its arguments are disjoint.
1319         // Move ops may or may not be faster for large sizes depending on how the platform
1320         // lowers them, so we only perform this optimization on platforms that we know to
1321         // have fast Move ops.
1322         switch c.arch {
1323         case "amd64":
1324                 return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz))
1325         case "386", "arm64":
1326                 return sz <= 8
1327         case "s390x", "ppc64", "ppc64le":
1328                 return sz <= 8 || disjoint(dst, sz, src, sz)
1329         case "arm", "mips", "mips64", "mipsle", "mips64le":
1330                 return sz <= 4
1331         }
1332         return false
1333 }
1334
1335 // logLargeCopy logs the occurrence of a large copy.
1336 // The best place to do this is in the rewrite rules where the size of the move is easy to find.
1337 // "Large" is arbitrarily chosen to be 128 bytes; this may change.
1338 func logLargeCopy(v *Value, s int64) bool {
1339         if s < 128 {
1340                 return true
1341         }
1342         if logopt.Enabled() {
1343                 logopt.LogOpt(v.Pos, "copy", "lower", v.Block.Func.Name, fmt.Sprintf("%d bytes", s))
1344         }
1345         return true
1346 }
1347
1348 // hasSmallRotate reports whether the architecture has rotate instructions
1349 // for sizes < 32-bit.  This is used to decide whether to promote some rotations.
1350 func hasSmallRotate(c *Config) bool {
1351         switch c.arch {
1352         case "amd64", "386":
1353                 return true
1354         default:
1355                 return false
1356         }
1357 }
1358
1359 func newPPC64ShiftAuxInt(sh, mb, me, sz int64) int32 {
1360         if sh < 0 || sh >= sz {
1361                 panic("PPC64 shift arg sh out of range")
1362         }
1363         if mb < 0 || mb >= sz {
1364                 panic("PPC64 shift arg mb out of range")
1365         }
1366         if me < 0 || me >= sz {
1367                 panic("PPC64 shift arg me out of range")
1368         }
1369         return int32(sh<<16 | mb<<8 | me)
1370 }
1371
1372 func GetPPC64Shiftsh(auxint int64) int64 {
1373         return int64(int8(auxint >> 16))
1374 }
1375
1376 func GetPPC64Shiftmb(auxint int64) int64 {
1377         return int64(int8(auxint >> 8))
1378 }
1379
1380 func GetPPC64Shiftme(auxint int64) int64 {
1381         return int64(int8(auxint))
1382 }
1383
1384 // This verifies that the mask occupies the
1385 // rightmost bits.
1386 func isPPC64ValidShiftMask(v int64) bool {
1387         if ((v + 1) & v) == 0 {
1388                 return true
1389         }
1390         return false
1391 }
1392
1393 func getPPC64ShiftMaskLength(v int64) int64 {
1394         return int64(bits.Len64(uint64(v)))
1395 }
1396
1397 // encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format.
1398 func armBFAuxInt(lsb, width int64) arm64BitField {
1399         if lsb < 0 || lsb > 63 {
1400                 panic("ARM(64) bit field lsb constant out of range")
1401         }
1402         if width < 1 || width > 64 {
1403                 panic("ARM(64) bit field width constant out of range")
1404         }
1405         return arm64BitField(width | lsb<<8)
1406 }
1407
1408 // returns the lsb part of the auxInt field of arm64 bitfield ops.
1409 func (bfc arm64BitField) getARM64BFlsb() int64 {
1410         return int64(uint64(bfc) >> 8)
1411 }
1412
1413 // returns the width part of the auxInt field of arm64 bitfield ops.
1414 func (bfc arm64BitField) getARM64BFwidth() int64 {
1415         return int64(bfc) & 0xff
1416 }
1417
1418 // checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask.
1419 func isARM64BFMask(lsb, mask, rshift int64) bool {
1420         shiftedMask := int64(uint64(mask) >> uint64(rshift))
1421         return shiftedMask != 0 && isPowerOfTwo(shiftedMask+1) && nto(shiftedMask)+lsb < 64
1422 }
1423
1424 // returns the bitfield width of mask >> rshift for arm64 bitfield ops
1425 func arm64BFWidth(mask, rshift int64) int64 {
1426         shiftedMask := int64(uint64(mask) >> uint64(rshift))
1427         if shiftedMask == 0 {
1428                 panic("ARM64 BF mask is zero")
1429         }
1430         return nto(shiftedMask)
1431 }
1432
1433 // sizeof returns the size of t in bytes.
1434 // It will panic if t is not a *types.Type.
1435 func sizeof(t interface{}) int64 {
1436         return t.(*types.Type).Size()
1437 }
1438
1439 // registerizable reports whether t is a primitive type that fits in
1440 // a register. It assumes float64 values will always fit into registers
1441 // even if that isn't strictly true.
1442 func registerizable(b *Block, typ *types.Type) bool {
1443         if typ.IsPtrShaped() || typ.IsFloat() {
1444                 return true
1445         }
1446         if typ.IsInteger() {
1447                 return typ.Size() <= b.Func.Config.RegSize
1448         }
1449         return false
1450 }
1451
1452 // needRaceCleanup reports whether this call to racefuncenter/exit isn't needed.
1453 func needRaceCleanup(sym *AuxCall, v *Value) bool {
1454         f := v.Block.Func
1455         if !f.Config.Race {
1456                 return false
1457         }
1458         if !isSameCall(sym, "runtime.racefuncenter") && !isSameCall(sym, "runtime.racefuncexit") {
1459                 return false
1460         }
1461         for _, b := range f.Blocks {
1462                 for _, v := range b.Values {
1463                         switch v.Op {
1464                         case OpStaticCall:
1465                                 // Check for racefuncenter will encounter racefuncexit and vice versa.
1466                                 // Allow calls to panic*
1467                                 s := v.Aux.(*AuxCall).Fn.String()
1468                                 switch s {
1469                                 case "runtime.racefuncenter", "runtime.racefuncexit",
1470                                         "runtime.panicdivide", "runtime.panicwrap",
1471                                         "runtime.panicshift":
1472                                         continue
1473                                 }
1474                                 // If we encountered any call, we need to keep racefunc*,
1475                                 // for accurate stacktraces.
1476                                 return false
1477                         case OpPanicBounds, OpPanicExtend:
1478                                 // Note: these are panic generators that are ok (like the static calls above).
1479                         case OpClosureCall, OpInterCall:
1480                                 // We must keep the race functions if there are any other call types.
1481                                 return false
1482                         }
1483                 }
1484         }
1485         if isSameCall(sym, "runtime.racefuncenter") {
1486                 // If we're removing racefuncenter, remove its argument as well.
1487                 if v.Args[0].Op != OpStore {
1488                         return false
1489                 }
1490                 mem := v.Args[0].Args[2]
1491                 v.Args[0].reset(OpCopy)
1492                 v.Args[0].AddArg(mem)
1493         }
1494         return true
1495 }
1496
1497 // symIsRO reports whether sym is a read-only global.
1498 func symIsRO(sym interface{}) bool {
1499         lsym := sym.(*obj.LSym)
1500         return lsym.Type == objabi.SRODATA && len(lsym.R) == 0
1501 }
1502
1503 // symIsROZero reports whether sym is a read-only global whose data contains all zeros.
1504 func symIsROZero(sym Sym) bool {
1505         lsym := sym.(*obj.LSym)
1506         if lsym.Type != objabi.SRODATA || len(lsym.R) != 0 {
1507                 return false
1508         }
1509         for _, b := range lsym.P {
1510                 if b != 0 {
1511                         return false
1512                 }
1513         }
1514         return true
1515 }
1516
1517 // read8 reads one byte from the read-only global sym at offset off.
1518 func read8(sym interface{}, off int64) uint8 {
1519         lsym := sym.(*obj.LSym)
1520         if off >= int64(len(lsym.P)) || off < 0 {
1521                 // Invalid index into the global sym.
1522                 // This can happen in dead code, so we don't want to panic.
1523                 // Just return any value, it will eventually get ignored.
1524                 // See issue 29215.
1525                 return 0
1526         }
1527         return lsym.P[off]
1528 }
1529
1530 // read16 reads two bytes from the read-only global sym at offset off.
1531 func read16(sym interface{}, off int64, byteorder binary.ByteOrder) uint16 {
1532         lsym := sym.(*obj.LSym)
1533         // lsym.P is written lazily.
1534         // Bytes requested after the end of lsym.P are 0.
1535         var src []byte
1536         if 0 <= off && off < int64(len(lsym.P)) {
1537                 src = lsym.P[off:]
1538         }
1539         buf := make([]byte, 2)
1540         copy(buf, src)
1541         return byteorder.Uint16(buf)
1542 }
1543
1544 // read32 reads four bytes from the read-only global sym at offset off.
1545 func read32(sym interface{}, off int64, byteorder binary.ByteOrder) uint32 {
1546         lsym := sym.(*obj.LSym)
1547         var src []byte
1548         if 0 <= off && off < int64(len(lsym.P)) {
1549                 src = lsym.P[off:]
1550         }
1551         buf := make([]byte, 4)
1552         copy(buf, src)
1553         return byteorder.Uint32(buf)
1554 }
1555
1556 // read64 reads eight bytes from the read-only global sym at offset off.
1557 func read64(sym interface{}, off int64, byteorder binary.ByteOrder) uint64 {
1558         lsym := sym.(*obj.LSym)
1559         var src []byte
1560         if 0 <= off && off < int64(len(lsym.P)) {
1561                 src = lsym.P[off:]
1562         }
1563         buf := make([]byte, 8)
1564         copy(buf, src)
1565         return byteorder.Uint64(buf)
1566 }
1567
1568 // sequentialAddresses reports true if it can prove that x + n == y
1569 func sequentialAddresses(x, y *Value, n int64) bool {
1570         if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil &&
1571                 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1572                         x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1573                 return true
1574         }
1575         if x.Op == Op386LEAL1 && y.Op == Op386LEAL1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
1576                 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1577                         x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1578                 return true
1579         }
1580         if x.Op == OpAMD64ADDQ && y.Op == OpAMD64LEAQ1 && y.AuxInt == n && y.Aux == nil &&
1581                 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1582                         x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1583                 return true
1584         }
1585         if x.Op == OpAMD64LEAQ1 && y.Op == OpAMD64LEAQ1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
1586                 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1587                         x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1588                 return true
1589         }
1590         return false
1591 }
1592
1593 // flagConstant represents the result of a compile-time comparison.
1594 // The sense of these flags does not necessarily represent the hardware's notion
1595 // of a flags register - these are just a compile-time construct.
1596 // We happen to match the semantics to those of arm/arm64.
1597 // Note that these semantics differ from x86: the carry flag has the opposite
1598 // sense on a subtraction!
1599 //   On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C.
1600 //   On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C.
1601 //    (because it does x + ^y + C).
1602 // See https://en.wikipedia.org/wiki/Carry_flag#Vs._borrow_flag
1603 type flagConstant uint8
1604
1605 // N reports whether the result of an operation is negative (high bit set).
1606 func (fc flagConstant) N() bool {
1607         return fc&1 != 0
1608 }
1609
1610 // Z reports whether the result of an operation is 0.
1611 func (fc flagConstant) Z() bool {
1612         return fc&2 != 0
1613 }
1614
1615 // C reports whether an unsigned add overflowed (carry), or an
1616 // unsigned subtract did not underflow (borrow).
1617 func (fc flagConstant) C() bool {
1618         return fc&4 != 0
1619 }
1620
1621 // V reports whether a signed operation overflowed or underflowed.
1622 func (fc flagConstant) V() bool {
1623         return fc&8 != 0
1624 }
1625
1626 func (fc flagConstant) eq() bool {
1627         return fc.Z()
1628 }
1629 func (fc flagConstant) ne() bool {
1630         return !fc.Z()
1631 }
1632 func (fc flagConstant) lt() bool {
1633         return fc.N() != fc.V()
1634 }
1635 func (fc flagConstant) le() bool {
1636         return fc.Z() || fc.lt()
1637 }
1638 func (fc flagConstant) gt() bool {
1639         return !fc.Z() && fc.ge()
1640 }
1641 func (fc flagConstant) ge() bool {
1642         return fc.N() == fc.V()
1643 }
1644 func (fc flagConstant) ult() bool {
1645         return !fc.C()
1646 }
1647 func (fc flagConstant) ule() bool {
1648         return fc.Z() || fc.ult()
1649 }
1650 func (fc flagConstant) ugt() bool {
1651         return !fc.Z() && fc.uge()
1652 }
1653 func (fc flagConstant) uge() bool {
1654         return fc.C()
1655 }
1656
1657 func (fc flagConstant) ltNoov() bool {
1658         return fc.lt() && !fc.V()
1659 }
1660 func (fc flagConstant) leNoov() bool {
1661         return fc.le() && !fc.V()
1662 }
1663 func (fc flagConstant) gtNoov() bool {
1664         return fc.gt() && !fc.V()
1665 }
1666 func (fc flagConstant) geNoov() bool {
1667         return fc.ge() && !fc.V()
1668 }
1669
1670 func (fc flagConstant) String() string {
1671         return fmt.Sprintf("N=%v,Z=%v,C=%v,V=%v", fc.N(), fc.Z(), fc.C(), fc.V())
1672 }
1673
1674 type flagConstantBuilder struct {
1675         N bool
1676         Z bool
1677         C bool
1678         V bool
1679 }
1680
1681 func (fcs flagConstantBuilder) encode() flagConstant {
1682         var fc flagConstant
1683         if fcs.N {
1684                 fc |= 1
1685         }
1686         if fcs.Z {
1687                 fc |= 2
1688         }
1689         if fcs.C {
1690                 fc |= 4
1691         }
1692         if fcs.V {
1693                 fc |= 8
1694         }
1695         return fc
1696 }
1697
1698 // Note: addFlags(x,y) != subFlags(x,-y) in some situations:
1699 //  - the results of the C flag are different
1700 //  - the results of the V flag when y==minint are different
1701
1702 // addFlags64 returns the flags that would be set from computing x+y.
1703 func addFlags64(x, y int64) flagConstant {
1704         var fcb flagConstantBuilder
1705         fcb.Z = x+y == 0
1706         fcb.N = x+y < 0
1707         fcb.C = uint64(x+y) < uint64(x)
1708         fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
1709         return fcb.encode()
1710 }
1711
1712 // subFlags64 returns the flags that would be set from computing x-y.
1713 func subFlags64(x, y int64) flagConstant {
1714         var fcb flagConstantBuilder
1715         fcb.Z = x-y == 0
1716         fcb.N = x-y < 0
1717         fcb.C = uint64(y) <= uint64(x) // This code follows the arm carry flag model.
1718         fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
1719         return fcb.encode()
1720 }
1721
1722 // addFlags32 returns the flags that would be set from computing x+y.
1723 func addFlags32(x, y int32) flagConstant {
1724         var fcb flagConstantBuilder
1725         fcb.Z = x+y == 0
1726         fcb.N = x+y < 0
1727         fcb.C = uint32(x+y) < uint32(x)
1728         fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
1729         return fcb.encode()
1730 }
1731
1732 // subFlags32 returns the flags that would be set from computing x-y.
1733 func subFlags32(x, y int32) flagConstant {
1734         var fcb flagConstantBuilder
1735         fcb.Z = x-y == 0
1736         fcb.N = x-y < 0
1737         fcb.C = uint32(y) <= uint32(x) // This code follows the arm carry flag model.
1738         fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
1739         return fcb.encode()
1740 }
1741
1742 // logicFlags64 returns flags set to the sign/zeroness of x.
1743 // C and V are set to false.
1744 func logicFlags64(x int64) flagConstant {
1745         var fcb flagConstantBuilder
1746         fcb.Z = x == 0
1747         fcb.N = x < 0
1748         return fcb.encode()
1749 }
1750
1751 // logicFlags32 returns flags set to the sign/zeroness of x.
1752 // C and V are set to false.
1753 func logicFlags32(x int32) flagConstant {
1754         var fcb flagConstantBuilder
1755         fcb.Z = x == 0
1756         fcb.N = x < 0
1757         return fcb.encode()
1758 }