src/cmd/compile/internal/ssa/rewrite.go

   1 // Copyright 2015 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package ssa
   6
   7 import (
   8         "cmd/compile/internal/logopt"
   9         "cmd/compile/internal/types"
  10         "cmd/internal/obj"
  11         "cmd/internal/obj/s390x"
  12         "cmd/internal/objabi"
  13         "cmd/internal/src"
  14         "encoding/binary"
  15         "fmt"
  16         "io"
  17         "math"
  18         "math/bits"
  19         "os"
  20         "path/filepath"
  21 )
  22
  23 type deadValueChoice bool
  24
  25 const (
  26         leaveDeadValues  deadValueChoice = false
  27         removeDeadValues                 = true
  28 )
  29
  30 // deadcode indicates that rewrite should try to remove any values that become dead.
  31 func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValueChoice) {
  32         // repeat rewrites until we find no more rewrites
  33         pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block
  34         pendingLines.clear()
  35         debug := f.pass.debug
  36         if debug > 1 {
  37                 fmt.Printf("%s: rewriting for %s\n", f.pass.name, f.Name)
  38         }
  39         for {
  40                 change := false
  41                 for _, b := range f.Blocks {
  42                         var b0 *Block
  43                         if debug > 1 {
  44                                 b0 = new(Block)
  45                                 *b0 = *b
  46                                 b0.Succs = append([]Edge{}, b.Succs...) // make a new copy, not aliasing
  47                         }
  48                         for i, c := range b.ControlValues() {
  49                                 for c.Op == OpCopy {
  50                                         c = c.Args[0]
  51                                         b.ReplaceControl(i, c)
  52                                 }
  53                         }
  54                         if rb(b) {
  55                                 change = true
  56                                 if debug > 1 {
  57                                         fmt.Printf("rewriting %s  ->  %s\n", b0.LongString(), b.LongString())
  58                                 }
  59                         }
  60                         for j, v := range b.Values {
  61                                 var v0 *Value
  62                                 if debug > 1 {
  63                                         v0 = new(Value)
  64                                         *v0 = *v
  65                                         v0.Args = append([]*Value{}, v.Args...) // make a new copy, not aliasing
  66                                 }
  67                                 if v.Uses == 0 && v.removeable() {
  68                                         if v.Op != OpInvalid && deadcode == removeDeadValues {
  69                                                 // Reset any values that are now unused, so that we decrement
  70                                                 // the use count of all of its arguments.
  71                                                 // Not quite a deadcode pass, because it does not handle cycles.
  72                                                 // But it should help Uses==1 rules to fire.
  73                                                 v.reset(OpInvalid)
  74                                                 change = true
  75                                         }
  76                                         // No point rewriting values which aren't used.
  77                                         continue
  78                                 }
  79
  80                                 vchange := phielimValue(v)
  81                                 if vchange && debug > 1 {
  82                                         fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
  83                                 }
  84
  85                                 // Eliminate copy inputs.
  86                                 // If any copy input becomes unused, mark it
  87                                 // as invalid and discard its argument. Repeat
  88                                 // recursively on the discarded argument.
  89                                 // This phase helps remove phantom "dead copy" uses
  90                                 // of a value so that a x.Uses==1 rule condition
  91                                 // fires reliably.
  92                                 for i, a := range v.Args {
  93                                         if a.Op != OpCopy {
  94                                                 continue
  95                                         }
  96                                         aa := copySource(a)
  97                                         v.SetArg(i, aa)
  98                                         // If a, a copy, has a line boundary indicator, attempt to find a new value
  99                                         // to hold it.  The first candidate is the value that will replace a (aa),
 100                                         // if it shares the same block and line and is eligible.
 101                                         // The second option is v, which has a as an input.  Because aa is earlier in
 102                                         // the data flow, it is the better choice.
 103                                         if a.Pos.IsStmt() == src.PosIsStmt {
 104                                                 if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt {
 105                                                         aa.Pos = aa.Pos.WithIsStmt()
 106                                                 } else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt {
 107                                                         v.Pos = v.Pos.WithIsStmt()
 108                                                 } else {
 109                                                         // Record the lost line and look for a new home after all rewrites are complete.
 110                                                         // TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same
 111                                                         // line to appear in more than one block, but only one block is stored, so if both end
 112                                                         // up here, then one will be lost.
 113                                                         pendingLines.set(a.Pos, int32(a.Block.ID))
 114                                                 }
 115                                                 a.Pos = a.Pos.WithNotStmt()
 116                                         }
 117                                         vchange = true
 118                                         for a.Uses == 0 {
 119                                                 b := a.Args[0]
 120                                                 a.reset(OpInvalid)
 121                                                 a = b
 122                                         }
 123                                 }
 124                                 if vchange && debug > 1 {
 125                                         fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
 126                                 }
 127
 128                                 // apply rewrite function
 129                                 if rv(v) {
 130                                         vchange = true
 131                                         // If value changed to a poor choice for a statement boundary, move the boundary
 132                                         if v.Pos.IsStmt() == src.PosIsStmt {
 133                                                 if k := nextGoodStatementIndex(v, j, b); k != j {
 134                                                         v.Pos = v.Pos.WithNotStmt()
 135                                                         b.Values[k].Pos = b.Values[k].Pos.WithIsStmt()
 136                                                 }
 137                                         }
 138                                 }
 139
 140                                 change = change || vchange
 141                                 if vchange && debug > 1 {
 142                                         fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
 143                                 }
 144                         }
 145                 }
 146                 if !change {
 147                         break
 148                 }
 149         }
 150         // remove clobbered values
 151         for _, b := range f.Blocks {
 152                 j := 0
 153                 for i, v := range b.Values {
 154                         vl := v.Pos
 155                         if v.Op == OpInvalid {
 156                                 if v.Pos.IsStmt() == src.PosIsStmt {
 157                                         pendingLines.set(vl, int32(b.ID))
 158                                 }
 159                                 f.freeValue(v)
 160                                 continue
 161                         }
 162                         if v.Pos.IsStmt() != src.PosNotStmt && pendingLines.get(vl) == int32(b.ID) {
 163                                 pendingLines.remove(vl)
 164                                 v.Pos = v.Pos.WithIsStmt()
 165                         }
 166                         if i != j {
 167                                 b.Values[j] = v
 168                         }
 169                         j++
 170                 }
 171                 if pendingLines.get(b.Pos) == int32(b.ID) {
 172                         b.Pos = b.Pos.WithIsStmt()
 173                         pendingLines.remove(b.Pos)
 174                 }
 175                 b.truncateValues(j)
 176         }
 177 }
 178
 179 // Common functions called from rewriting rules
 180
 181 func is64BitFloat(t *types.Type) bool {
 182         return t.Size() == 8 && t.IsFloat()
 183 }
 184
 185 func is32BitFloat(t *types.Type) bool {
 186         return t.Size() == 4 && t.IsFloat()
 187 }
 188
 189 func is64BitInt(t *types.Type) bool {
 190         return t.Size() == 8 && t.IsInteger()
 191 }
 192
 193 func is32BitInt(t *types.Type) bool {
 194         return t.Size() == 4 && t.IsInteger()
 195 }
 196
 197 func is16BitInt(t *types.Type) bool {
 198         return t.Size() == 2 && t.IsInteger()
 199 }
 200
 201 func is8BitInt(t *types.Type) bool {
 202         return t.Size() == 1 && t.IsInteger()
 203 }
 204
 205 func isPtr(t *types.Type) bool {
 206         return t.IsPtrShaped()
 207 }
 208
 209 func isSigned(t *types.Type) bool {
 210         return t.IsSigned()
 211 }
 212
 213 // mergeSym merges two symbolic offsets. There is no real merging of
 214 // offsets, we just pick the non-nil one.
 215 func mergeSym(x, y Sym) Sym {
 216         if x == nil {
 217                 return y
 218         }
 219         if y == nil {
 220                 return x
 221         }
 222         panic(fmt.Sprintf("mergeSym with two non-nil syms %v %v", x, y))
 223 }
 224
 225 func canMergeSym(x, y Sym) bool {
 226         return x == nil || y == nil
 227 }
 228
 229 // canMergeLoadClobber reports whether the load can be merged into target without
 230 // invalidating the schedule.
 231 // It also checks that the other non-load argument x is something we
 232 // are ok with clobbering.
 233 func canMergeLoadClobber(target, load, x *Value) bool {
 234         // The register containing x is going to get clobbered.
 235         // Don't merge if we still need the value of x.
 236         // We don't have liveness information here, but we can
 237         // approximate x dying with:
 238         //  1) target is x's only use.
 239         //  2) target is not in a deeper loop than x.
 240         if x.Uses != 1 {
 241                 return false
 242         }
 243         loopnest := x.Block.Func.loopnest()
 244         loopnest.calculateDepths()
 245         if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
 246                 return false
 247         }
 248         return canMergeLoad(target, load)
 249 }
 250
 251 // canMergeLoad reports whether the load can be merged into target without
 252 // invalidating the schedule.
 253 func canMergeLoad(target, load *Value) bool {
 254         if target.Block.ID != load.Block.ID {
 255                 // If the load is in a different block do not merge it.
 256                 return false
 257         }
 258
 259         // We can't merge the load into the target if the load
 260         // has more than one use.
 261         if load.Uses != 1 {
 262                 return false
 263         }
 264
 265         mem := load.MemoryArg()
 266
 267         // We need the load's memory arg to still be alive at target. That
 268         // can't be the case if one of target's args depends on a memory
 269         // state that is a successor of load's memory arg.
 270         //
 271         // For example, it would be invalid to merge load into target in
 272         // the following situation because newmem has killed oldmem
 273         // before target is reached:
 274         //     load = read ... oldmem
 275         //   newmem = write ... oldmem
 276         //     arg0 = read ... newmem
 277         //   target = add arg0 load
 278         //
 279         // If the argument comes from a different block then we can exclude
 280         // it immediately because it must dominate load (which is in the
 281         // same block as target).
 282         var args []*Value
 283         for _, a := range target.Args {
 284                 if a != load && a.Block.ID == target.Block.ID {
 285                         args = append(args, a)
 286                 }
 287         }
 288
 289         // memPreds contains memory states known to be predecessors of load's
 290         // memory state. It is lazily initialized.
 291         var memPreds map[*Value]bool
 292         for i := 0; len(args) > 0; i++ {
 293                 const limit = 100
 294                 if i >= limit {
 295                         // Give up if we have done a lot of iterations.
 296                         return false
 297                 }
 298                 v := args[len(args)-1]
 299                 args = args[:len(args)-1]
 300                 if target.Block.ID != v.Block.ID {
 301                         // Since target and load are in the same block
 302                         // we can stop searching when we leave the block.
 303                         continue
 304                 }
 305                 if v.Op == OpPhi {
 306                         // A Phi implies we have reached the top of the block.
 307                         // The memory phi, if it exists, is always
 308                         // the first logical store in the block.
 309                         continue
 310                 }
 311                 if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
 312                         // We could handle this situation however it is likely
 313                         // to be very rare.
 314                         return false
 315                 }
 316                 if v.Op.SymEffect()&SymAddr != 0 {
 317                         // This case prevents an operation that calculates the
 318                         // address of a local variable from being forced to schedule
 319                         // before its corresponding VarDef.
 320                         // See issue 28445.
 321                         //   v1 = LOAD ...
 322                         //   v2 = VARDEF
 323                         //   v3 = LEAQ
 324                         //   v4 = CMPQ v1 v3
 325                         // We don't want to combine the CMPQ with the load, because
 326                         // that would force the CMPQ to schedule before the VARDEF, which
 327                         // in turn requires the LEAQ to schedule before the VARDEF.
 328                         return false
 329                 }
 330                 if v.Type.IsMemory() {
 331                         if memPreds == nil {
 332                                 // Initialise a map containing memory states
 333                                 // known to be predecessors of load's memory
 334                                 // state.
 335                                 memPreds = make(map[*Value]bool)
 336                                 m := mem
 337                                 const limit = 50
 338                                 for i := 0; i < limit; i++ {
 339                                         if m.Op == OpPhi {
 340                                                 // The memory phi, if it exists, is always
 341                                                 // the first logical store in the block.
 342                                                 break
 343                                         }
 344                                         if m.Block.ID != target.Block.ID {
 345                                                 break
 346                                         }
 347                                         if !m.Type.IsMemory() {
 348                                                 break
 349                                         }
 350                                         memPreds[m] = true
 351                                         if len(m.Args) == 0 {
 352                                                 break
 353                                         }
 354                                         m = m.MemoryArg()
 355                                 }
 356                         }
 357
 358                         // We can merge if v is a predecessor of mem.
 359                         //
 360                         // For example, we can merge load into target in the
 361                         // following scenario:
 362                         //      x = read ... v
 363                         //    mem = write ... v
 364                         //   load = read ... mem
 365                         // target = add x load
 366                         if memPreds[v] {
 367                                 continue
 368                         }
 369                         return false
 370                 }
 371                 if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
 372                         // If v takes mem as an input then we know mem
 373                         // is valid at this point.
 374                         continue
 375                 }
 376                 for _, a := range v.Args {
 377                         if target.Block.ID == a.Block.ID {
 378                                 args = append(args, a)
 379                         }
 380                 }
 381         }
 382
 383         return true
 384 }
 385
 386 // isSameCall reports whether sym is the same as the given named symbol
 387 func isSameCall(sym interface{}, name string) bool {
 388         fn := sym.(*AuxCall).Fn
 389         return fn != nil && fn.String() == name
 390 }
 391
 392 // nlz returns the number of leading zeros.
 393 func nlz64(x int64) int { return bits.LeadingZeros64(uint64(x)) }
 394 func nlz32(x int32) int { return bits.LeadingZeros32(uint32(x)) }
 395 func nlz16(x int16) int { return bits.LeadingZeros16(uint16(x)) }
 396 func nlz8(x int8) int   { return bits.LeadingZeros8(uint8(x)) }
 397
 398 // ntzX returns the number of trailing zeros.
 399 func ntz64(x int64) int { return bits.TrailingZeros64(uint64(x)) }
 400 func ntz32(x int32) int { return bits.TrailingZeros32(uint32(x)) }
 401 func ntz16(x int16) int { return bits.TrailingZeros16(uint16(x)) }
 402 func ntz8(x int8) int   { return bits.TrailingZeros8(uint8(x)) }
 403
 404 func oneBit(x int64) bool   { return x&(x-1) == 0 && x != 0 }
 405 func oneBit8(x int8) bool   { return x&(x-1) == 0 && x != 0 }
 406 func oneBit16(x int16) bool { return x&(x-1) == 0 && x != 0 }
 407 func oneBit32(x int32) bool { return x&(x-1) == 0 && x != 0 }
 408 func oneBit64(x int64) bool { return x&(x-1) == 0 && x != 0 }
 409
 410 // nto returns the number of trailing ones.
 411 func nto(x int64) int64 {
 412         return int64(ntz64(^x))
 413 }
 414
 415 // logX returns logarithm of n base 2.
 416 // n must be a positive power of 2 (isPowerOfTwoX returns true).
 417 func log8(n int8) int64 {
 418         return int64(bits.Len8(uint8(n))) - 1
 419 }
 420 func log16(n int16) int64 {
 421         return int64(bits.Len16(uint16(n))) - 1
 422 }
 423 func log32(n int32) int64 {
 424         return int64(bits.Len32(uint32(n))) - 1
 425 }
 426 func log64(n int64) int64 {
 427         return int64(bits.Len64(uint64(n))) - 1
 428 }
 429
 430 // log2uint32 returns logarithm in base 2 of uint32(n), with log2(0) = -1.
 431 // Rounds down.
 432 func log2uint32(n int64) int64 {
 433         return int64(bits.Len32(uint32(n))) - 1
 434 }
 435
 436 // isPowerOfTwo functions report whether n is a power of 2.
 437 func isPowerOfTwo8(n int8) bool {
 438         return n > 0 && n&(n-1) == 0
 439 }
 440 func isPowerOfTwo16(n int16) bool {
 441         return n > 0 && n&(n-1) == 0
 442 }
 443 func isPowerOfTwo32(n int32) bool {
 444         return n > 0 && n&(n-1) == 0
 445 }
 446 func isPowerOfTwo64(n int64) bool {
 447         return n > 0 && n&(n-1) == 0
 448 }
 449
 450 // isUint64PowerOfTwo reports whether uint64(n) is a power of 2.
 451 func isUint64PowerOfTwo(in int64) bool {
 452         n := uint64(in)
 453         return n > 0 && n&(n-1) == 0
 454 }
 455
 456 // isUint32PowerOfTwo reports whether uint32(n) is a power of 2.
 457 func isUint32PowerOfTwo(in int64) bool {
 458         n := uint64(uint32(in))
 459         return n > 0 && n&(n-1) == 0
 460 }
 461
 462 // is32Bit reports whether n can be represented as a signed 32 bit integer.
 463 func is32Bit(n int64) bool {
 464         return n == int64(int32(n))
 465 }
 466
 467 // is16Bit reports whether n can be represented as a signed 16 bit integer.
 468 func is16Bit(n int64) bool {
 469         return n == int64(int16(n))
 470 }
 471
 472 // is8Bit reports whether n can be represented as a signed 8 bit integer.
 473 func is8Bit(n int64) bool {
 474         return n == int64(int8(n))
 475 }
 476
 477 // isU8Bit reports whether n can be represented as an unsigned 8 bit integer.
 478 func isU8Bit(n int64) bool {
 479         return n == int64(uint8(n))
 480 }
 481
 482 // isU12Bit reports whether n can be represented as an unsigned 12 bit integer.
 483 func isU12Bit(n int64) bool {
 484         return 0 <= n && n < (1<<12)
 485 }
 486
 487 // isU16Bit reports whether n can be represented as an unsigned 16 bit integer.
 488 func isU16Bit(n int64) bool {
 489         return n == int64(uint16(n))
 490 }
 491
 492 // isU32Bit reports whether n can be represented as an unsigned 32 bit integer.
 493 func isU32Bit(n int64) bool {
 494         return n == int64(uint32(n))
 495 }
 496
 497 // is20Bit reports whether n can be represented as a signed 20 bit integer.
 498 func is20Bit(n int64) bool {
 499         return -(1<<19) <= n && n < (1<<19)
 500 }
 501
 502 // b2i translates a boolean value to 0 or 1 for assigning to auxInt.
 503 func b2i(b bool) int64 {
 504         if b {
 505                 return 1
 506         }
 507         return 0
 508 }
 509
 510 // b2i32 translates a boolean value to 0 or 1.
 511 func b2i32(b bool) int32 {
 512         if b {
 513                 return 1
 514         }
 515         return 0
 516 }
 517
 518 // shiftIsBounded reports whether (left/right) shift Value v is known to be bounded.
 519 // A shift is bounded if it is shifting by less than the width of the shifted value.
 520 func shiftIsBounded(v *Value) bool {
 521         return v.AuxInt != 0
 522 }
 523
 524 // truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern
 525 // of the mantissa. It will panic if the truncation results in lost information.
 526 func truncate64Fto32F(f float64) float32 {
 527         if !isExactFloat32(f) {
 528                 panic("truncate64Fto32F: truncation is not exact")
 529         }
 530         if !math.IsNaN(f) {
 531                 return float32(f)
 532         }
 533         // NaN bit patterns aren't necessarily preserved across conversion
 534         // instructions so we need to do the conversion manually.
 535         b := math.Float64bits(f)
 536         m := b & ((1 << 52) - 1) // mantissa (a.k.a. significand)
 537         //          | sign                  | exponent   | mantissa       |
 538         r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23)))
 539         return math.Float32frombits(r)
 540 }
 541
 542 // extend32Fto64F converts a float32 value to a float64 value preserving the bit
 543 // pattern of the mantissa.
 544 func extend32Fto64F(f float32) float64 {
 545         if !math.IsNaN(float64(f)) {
 546                 return float64(f)
 547         }
 548         // NaN bit patterns aren't necessarily preserved across conversion
 549         // instructions so we need to do the conversion manually.
 550         b := uint64(math.Float32bits(f))
 551         //   | sign                  | exponent      | mantissa                    |
 552         r := ((b << 32) & (1 << 63)) | (0x7ff << 52) | ((b & 0x7fffff) << (52 - 23))
 553         return math.Float64frombits(r)
 554 }
 555
 556 // DivisionNeedsFixUp reports whether the division needs fix-up code.
 557 func DivisionNeedsFixUp(v *Value) bool {
 558         return v.AuxInt == 0
 559 }
 560
 561 // auxFrom64F encodes a float64 value so it can be stored in an AuxInt.
 562 func auxFrom64F(f float64) int64 {
 563         if f != f {
 564                 panic("can't encode a NaN in AuxInt field")
 565         }
 566         return int64(math.Float64bits(f))
 567 }
 568
 569 // auxFrom32F encodes a float32 value so it can be stored in an AuxInt.
 570 func auxFrom32F(f float32) int64 {
 571         if f != f {
 572                 panic("can't encode a NaN in AuxInt field")
 573         }
 574         return int64(math.Float64bits(extend32Fto64F(f)))
 575 }
 576
 577 // auxTo32F decodes a float32 from the AuxInt value provided.
 578 func auxTo32F(i int64) float32 {
 579         return truncate64Fto32F(math.Float64frombits(uint64(i)))
 580 }
 581
 582 // auxTo64F decodes a float64 from the AuxInt value provided.
 583 func auxTo64F(i int64) float64 {
 584         return math.Float64frombits(uint64(i))
 585 }
 586
 587 func auxIntToBool(i int64) bool {
 588         if i == 0 {
 589                 return false
 590         }
 591         return true
 592 }
 593 func auxIntToInt8(i int64) int8 {
 594         return int8(i)
 595 }
 596 func auxIntToInt16(i int64) int16 {
 597         return int16(i)
 598 }
 599 func auxIntToInt32(i int64) int32 {
 600         return int32(i)
 601 }
 602 func auxIntToInt64(i int64) int64 {
 603         return i
 604 }
 605 func auxIntToUint8(i int64) uint8 {
 606         return uint8(i)
 607 }
 608 func auxIntToFloat32(i int64) float32 {
 609         return float32(math.Float64frombits(uint64(i)))
 610 }
 611 func auxIntToFloat64(i int64) float64 {
 612         return math.Float64frombits(uint64(i))
 613 }
 614 func auxIntToValAndOff(i int64) ValAndOff {
 615         return ValAndOff(i)
 616 }
 617 func auxIntToArm64BitField(i int64) arm64BitField {
 618         return arm64BitField(i)
 619 }
 620 func auxIntToInt128(x int64) int128 {
 621         if x != 0 {
 622                 panic("nonzero int128 not allowed")
 623         }
 624         return 0
 625 }
 626 func auxIntToFlagConstant(x int64) flagConstant {
 627         return flagConstant(x)
 628 }
 629
 630 func auxIntToOp(cc int64) Op {
 631         return Op(cc)
 632 }
 633
 634 func boolToAuxInt(b bool) int64 {
 635         if b {
 636                 return 1
 637         }
 638         return 0
 639 }
 640 func int8ToAuxInt(i int8) int64 {
 641         return int64(i)
 642 }
 643 func int16ToAuxInt(i int16) int64 {
 644         return int64(i)
 645 }
 646 func int32ToAuxInt(i int32) int64 {
 647         return int64(i)
 648 }
 649 func int64ToAuxInt(i int64) int64 {
 650         return int64(i)
 651 }
 652 func uint8ToAuxInt(i uint8) int64 {
 653         return int64(int8(i))
 654 }
 655 func float32ToAuxInt(f float32) int64 {
 656         return int64(math.Float64bits(float64(f)))
 657 }
 658 func float64ToAuxInt(f float64) int64 {
 659         return int64(math.Float64bits(f))
 660 }
 661 func valAndOffToAuxInt(v ValAndOff) int64 {
 662         return int64(v)
 663 }
 664 func arm64BitFieldToAuxInt(v arm64BitField) int64 {
 665         return int64(v)
 666 }
 667 func int128ToAuxInt(x int128) int64 {
 668         if x != 0 {
 669                 panic("nonzero int128 not allowed")
 670         }
 671         return 0
 672 }
 673 func flagConstantToAuxInt(x flagConstant) int64 {
 674         return int64(x)
 675 }
 676
 677 func opToAuxInt(o Op) int64 {
 678         return int64(o)
 679 }
 680
 681 func auxToString(i interface{}) string {
 682         return i.(string)
 683 }
 684 func auxToSym(i interface{}) Sym {
 685         // TODO: kind of a hack - allows nil interface through
 686         s, _ := i.(Sym)
 687         return s
 688 }
 689 func auxToType(i interface{}) *types.Type {
 690         return i.(*types.Type)
 691 }
 692 func auxToCall(i interface{}) *AuxCall {
 693         return i.(*AuxCall)
 694 }
 695 func auxToS390xCCMask(i interface{}) s390x.CCMask {
 696         return i.(s390x.CCMask)
 697 }
 698 func auxToS390xRotateParams(i interface{}) s390x.RotateParams {
 699         return i.(s390x.RotateParams)
 700 }
 701
 702 func stringToAux(s string) interface{} {
 703         return s
 704 }
 705 func symToAux(s Sym) interface{} {
 706         return s
 707 }
 708 func callToAux(s *AuxCall) interface{} {
 709         return s
 710 }
 711 func typeToAux(t *types.Type) interface{} {
 712         return t
 713 }
 714 func s390xCCMaskToAux(c s390x.CCMask) interface{} {
 715         return c
 716 }
 717 func s390xRotateParamsToAux(r s390x.RotateParams) interface{} {
 718         return r
 719 }
 720
 721 // uaddOvf reports whether unsigned a+b would overflow.
 722 func uaddOvf(a, b int64) bool {
 723         return uint64(a)+uint64(b) < uint64(a)
 724 }
 725
 726 // de-virtualize an InterCall
 727 // 'sym' is the symbol for the itab
 728 func devirt(v *Value, aux interface{}, sym Sym, offset int64) *AuxCall {
 729         f := v.Block.Func
 730         n, ok := sym.(*obj.LSym)
 731         if !ok {
 732                 return nil
 733         }
 734         lsym := f.fe.DerefItab(n, offset)
 735         if f.pass.debug > 0 {
 736                 if lsym != nil {
 737                         f.Warnl(v.Pos, "de-virtualizing call")
 738                 } else {
 739                         f.Warnl(v.Pos, "couldn't de-virtualize call")
 740                 }
 741         }
 742         if lsym == nil {
 743                 return nil
 744         }
 745         va := aux.(*AuxCall)
 746         return StaticAuxCall(lsym, va.args, va.results)
 747 }
 748
 749 // de-virtualize an InterLECall
 750 // 'sym' is the symbol for the itab
 751 func devirtLESym(v *Value, aux interface{}, sym Sym, offset int64) *obj.LSym {
 752         n, ok := sym.(*obj.LSym)
 753         if !ok {
 754                 return nil
 755         }
 756
 757         f := v.Block.Func
 758         lsym := f.fe.DerefItab(n, offset)
 759         if f.pass.debug > 0 {
 760                 if lsym != nil {
 761                         f.Warnl(v.Pos, "de-virtualizing call")
 762                 } else {
 763                         f.Warnl(v.Pos, "couldn't de-virtualize call")
 764                 }
 765         }
 766         if lsym == nil {
 767                 return nil
 768         }
 769         return lsym
 770 }
 771
 772 func devirtLECall(v *Value, sym *obj.LSym) *Value {
 773         v.Op = OpStaticLECall
 774         v.Aux.(*AuxCall).Fn = sym
 775         v.RemoveArg(0)
 776         return v
 777 }
 778
 779 // isSamePtr reports whether p1 and p2 point to the same address.
 780 func isSamePtr(p1, p2 *Value) bool {
 781         if p1 == p2 {
 782                 return true
 783         }
 784         if p1.Op != p2.Op {
 785                 return false
 786         }
 787         switch p1.Op {
 788         case OpOffPtr:
 789                 return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
 790         case OpAddr, OpLocalAddr:
 791                 // OpAddr's 0th arg is either OpSP or OpSB, which means that it is uniquely identified by its Op.
 792                 // Checking for value equality only works after [z]cse has run.
 793                 return p1.Aux == p2.Aux && p1.Args[0].Op == p2.Args[0].Op
 794         case OpAddPtr:
 795                 return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
 796         }
 797         return false
 798 }
 799
 800 func isStackPtr(v *Value) bool {
 801         for v.Op == OpOffPtr || v.Op == OpAddPtr {
 802                 v = v.Args[0]
 803         }
 804         return v.Op == OpSP || v.Op == OpLocalAddr
 805 }
 806
 807 // disjoint reports whether the memory region specified by [p1:p1+n1)
 808 // does not overlap with [p2:p2+n2).
 809 // A return value of false does not imply the regions overlap.
 810 func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool {
 811         if n1 == 0 || n2 == 0 {
 812                 return true
 813         }
 814         if p1 == p2 {
 815                 return false
 816         }
 817         baseAndOffset := func(ptr *Value) (base *Value, offset int64) {
 818                 base, offset = ptr, 0
 819                 for base.Op == OpOffPtr {
 820                         offset += base.AuxInt
 821                         base = base.Args[0]
 822                 }
 823                 return base, offset
 824         }
 825         p1, off1 := baseAndOffset(p1)
 826         p2, off2 := baseAndOffset(p2)
 827         if isSamePtr(p1, p2) {
 828                 return !overlap(off1, n1, off2, n2)
 829         }
 830         // p1 and p2 are not the same, so if they are both OpAddrs then
 831         // they point to different variables.
 832         // If one pointer is on the stack and the other is an argument
 833         // then they can't overlap.
 834         switch p1.Op {
 835         case OpAddr, OpLocalAddr:
 836                 if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP {
 837                         return true
 838                 }
 839                 return p2.Op == OpArg && p1.Args[0].Op == OpSP
 840         case OpArg:
 841                 if p2.Op == OpSP || p2.Op == OpLocalAddr {
 842                         return true
 843                 }
 844         case OpSP:
 845                 return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpSP
 846         }
 847         return false
 848 }
 849
 850 // moveSize returns the number of bytes an aligned MOV instruction moves
 851 func moveSize(align int64, c *Config) int64 {
 852         switch {
 853         case align%8 == 0 && c.PtrSize == 8:
 854                 return 8
 855         case align%4 == 0:
 856                 return 4
 857         case align%2 == 0:
 858                 return 2
 859         }
 860         return 1
 861 }
 862
 863 // mergePoint finds a block among a's blocks which dominates b and is itself
 864 // dominated by all of a's blocks. Returns nil if it can't find one.
 865 // Might return nil even if one does exist.
 866 func mergePoint(b *Block, a ...*Value) *Block {
 867         // Walk backward from b looking for one of the a's blocks.
 868
 869         // Max distance
 870         d := 100
 871
 872         for d > 0 {
 873                 for _, x := range a {
 874                         if b == x.Block {
 875                                 goto found
 876                         }
 877                 }
 878                 if len(b.Preds) > 1 {
 879                         // Don't know which way to go back. Abort.
 880                         return nil
 881                 }
 882                 b = b.Preds[0].b
 883                 d--
 884         }
 885         return nil // too far away
 886 found:
 887         // At this point, r is the first value in a that we find by walking backwards.
 888         // if we return anything, r will be it.
 889         r := b
 890
 891         // Keep going, counting the other a's that we find. They must all dominate r.
 892         na := 0
 893         for d > 0 {
 894                 for _, x := range a {
 895                         if b == x.Block {
 896                                 na++
 897                         }
 898                 }
 899                 if na == len(a) {
 900                         // Found all of a in a backwards walk. We can return r.
 901                         return r
 902                 }
 903                 if len(b.Preds) > 1 {
 904                         return nil
 905                 }
 906                 b = b.Preds[0].b
 907                 d--
 908
 909         }
 910         return nil // too far away
 911 }
 912
 913 // clobber invalidates values. Returns true.
 914 // clobber is used by rewrite rules to:
 915 //   A) make sure the values are really dead and never used again.
 916 //   B) decrement use counts of the values' args.
 917 func clobber(vv ...*Value) bool {
 918         for _, v := range vv {
 919                 v.reset(OpInvalid)
 920                 // Note: leave v.Block intact.  The Block field is used after clobber.
 921         }
 922         return true
 923 }
 924
 925 // clobberIfDead resets v when use count is 1. Returns true.
 926 // clobberIfDead is used by rewrite rules to decrement
 927 // use counts of v's args when v is dead and never used.
 928 func clobberIfDead(v *Value) bool {
 929         if v.Uses == 1 {
 930                 v.reset(OpInvalid)
 931         }
 932         // Note: leave v.Block intact.  The Block field is used after clobberIfDead.
 933         return true
 934 }
 935
 936 // noteRule is an easy way to track if a rule is matched when writing
 937 // new ones.  Make the rule of interest also conditional on
 938 //     noteRule("note to self: rule of interest matched")
 939 // and that message will print when the rule matches.
 940 func noteRule(s string) bool {
 941         fmt.Println(s)
 942         return true
 943 }
 944
 945 // countRule increments Func.ruleMatches[key].
 946 // If Func.ruleMatches is non-nil at the end
 947 // of compilation, it will be printed to stdout.
 948 // This is intended to make it easier to find which functions
 949 // which contain lots of rules matches when developing new rules.
 950 func countRule(v *Value, key string) bool {
 951         f := v.Block.Func
 952         if f.ruleMatches == nil {
 953                 f.ruleMatches = make(map[string]int)
 954         }
 955         f.ruleMatches[key]++
 956         return true
 957 }
 958
 959 // warnRule generates compiler debug output with string s when
 960 // v is not in autogenerated code, cond is true and the rule has fired.
 961 func warnRule(cond bool, v *Value, s string) bool {
 962         if pos := v.Pos; pos.Line() > 1 && cond {
 963                 v.Block.Func.Warnl(pos, s)
 964         }
 965         return true
 966 }
 967
 968 // for a pseudo-op like (LessThan x), extract x
 969 func flagArg(v *Value) *Value {
 970         if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() {
 971                 return nil
 972         }
 973         return v.Args[0]
 974 }
 975
 976 // arm64Negate finds the complement to an ARM64 condition code,
 977 // for example Equal -> NotEqual or LessThan -> GreaterEqual
 978 //
 979 // TODO: add floating-point conditions
 980 func arm64Negate(op Op) Op {
 981         switch op {
 982         case OpARM64LessThan:
 983                 return OpARM64GreaterEqual
 984         case OpARM64LessThanU:
 985                 return OpARM64GreaterEqualU
 986         case OpARM64GreaterThan:
 987                 return OpARM64LessEqual
 988         case OpARM64GreaterThanU:
 989                 return OpARM64LessEqualU
 990         case OpARM64LessEqual:
 991                 return OpARM64GreaterThan
 992         case OpARM64LessEqualU:
 993                 return OpARM64GreaterThanU
 994         case OpARM64GreaterEqual:
 995                 return OpARM64LessThan
 996         case OpARM64GreaterEqualU:
 997                 return OpARM64LessThanU
 998         case OpARM64Equal:
 999                 return OpARM64NotEqual
1000         case OpARM64NotEqual:
1001                 return OpARM64Equal
1002         case OpARM64LessThanF:
1003                 return OpARM64GreaterEqualF
1004         case OpARM64GreaterThanF:
1005                 return OpARM64LessEqualF
1006         case OpARM64LessEqualF:
1007                 return OpARM64GreaterThanF
1008         case OpARM64GreaterEqualF:
1009                 return OpARM64LessThanF
1010         default:
1011                 panic("unreachable")
1012         }
1013 }
1014
1015 // arm64Invert evaluates (InvertFlags op), which
1016 // is the same as altering the condition codes such
1017 // that the same result would be produced if the arguments
1018 // to the flag-generating instruction were reversed, e.g.
1019 // (InvertFlags (CMP x y)) -> (CMP y x)
1020 //
1021 // TODO: add floating-point conditions
1022 func arm64Invert(op Op) Op {
1023         switch op {
1024         case OpARM64LessThan:
1025                 return OpARM64GreaterThan
1026         case OpARM64LessThanU:
1027                 return OpARM64GreaterThanU
1028         case OpARM64GreaterThan:
1029                 return OpARM64LessThan
1030         case OpARM64GreaterThanU:
1031                 return OpARM64LessThanU
1032         case OpARM64LessEqual:
1033                 return OpARM64GreaterEqual
1034         case OpARM64LessEqualU:
1035                 return OpARM64GreaterEqualU
1036         case OpARM64GreaterEqual:
1037                 return OpARM64LessEqual
1038         case OpARM64GreaterEqualU:
1039                 return OpARM64LessEqualU
1040         case OpARM64Equal, OpARM64NotEqual:
1041                 return op
1042         case OpARM64LessThanF:
1043                 return OpARM64GreaterThanF
1044         case OpARM64GreaterThanF:
1045                 return OpARM64LessThanF
1046         case OpARM64LessEqualF:
1047                 return OpARM64GreaterEqualF
1048         case OpARM64GreaterEqualF:
1049                 return OpARM64LessEqualF
1050         default:
1051                 panic("unreachable")
1052         }
1053 }
1054
1055 // evaluate an ARM64 op against a flags value
1056 // that is potentially constant; return 1 for true,
1057 // -1 for false, and 0 for not constant.
1058 func ccARM64Eval(op Op, flags *Value) int {
1059         fop := flags.Op
1060         if fop == OpARM64InvertFlags {
1061                 return -ccARM64Eval(op, flags.Args[0])
1062         }
1063         if fop != OpARM64FlagConstant {
1064                 return 0
1065         }
1066         fc := flagConstant(flags.AuxInt)
1067         b2i := func(b bool) int {
1068                 if b {
1069                         return 1
1070                 }
1071                 return -1
1072         }
1073         switch op {
1074         case OpARM64Equal:
1075                 return b2i(fc.eq())
1076         case OpARM64NotEqual:
1077                 return b2i(fc.ne())
1078         case OpARM64LessThan:
1079                 return b2i(fc.lt())
1080         case OpARM64LessThanU:
1081                 return b2i(fc.ult())
1082         case OpARM64GreaterThan:
1083                 return b2i(fc.gt())
1084         case OpARM64GreaterThanU:
1085                 return b2i(fc.ugt())
1086         case OpARM64LessEqual:
1087                 return b2i(fc.le())
1088         case OpARM64LessEqualU:
1089                 return b2i(fc.ule())
1090         case OpARM64GreaterEqual:
1091                 return b2i(fc.ge())
1092         case OpARM64GreaterEqualU:
1093                 return b2i(fc.uge())
1094         }
1095         return 0
1096 }
1097
1098 // logRule logs the use of the rule s. This will only be enabled if
1099 // rewrite rules were generated with the -log option, see gen/rulegen.go.
1100 func logRule(s string) {
1101         if ruleFile == nil {
1102                 // Open a log file to write log to. We open in append
1103                 // mode because all.bash runs the compiler lots of times,
1104                 // and we want the concatenation of all of those logs.
1105                 // This means, of course, that users need to rm the old log
1106                 // to get fresh data.
1107                 // TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow?
1108                 w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"),
1109                         os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
1110                 if err != nil {
1111                         panic(err)
1112                 }
1113                 ruleFile = w
1114         }
1115         _, err := fmt.Fprintln(ruleFile, s)
1116         if err != nil {
1117                 panic(err)
1118         }
1119 }
1120
1121 var ruleFile io.Writer
1122
1123 func min(x, y int64) int64 {
1124         if x < y {
1125                 return x
1126         }
1127         return y
1128 }
1129
1130 func isConstZero(v *Value) bool {
1131         switch v.Op {
1132         case OpConstNil:
1133                 return true
1134         case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F:
1135                 return v.AuxInt == 0
1136         }
1137         return false
1138 }
1139
1140 // reciprocalExact64 reports whether 1/c is exactly representable.
1141 func reciprocalExact64(c float64) bool {
1142         b := math.Float64bits(c)
1143         man := b & (1<<52 - 1)
1144         if man != 0 {
1145                 return false // not a power of 2, denormal, or NaN
1146         }
1147         exp := b >> 52 & (1<<11 - 1)
1148         // exponent bias is 0x3ff.  So taking the reciprocal of a number
1149         // changes the exponent to 0x7fe-exp.
1150         switch exp {
1151         case 0:
1152                 return false // ±0
1153         case 0x7ff:
1154                 return false // ±inf
1155         case 0x7fe:
1156                 return false // exponent is not representable
1157         default:
1158                 return true
1159         }
1160 }
1161
1162 // reciprocalExact32 reports whether 1/c is exactly representable.
1163 func reciprocalExact32(c float32) bool {
1164         b := math.Float32bits(c)
1165         man := b & (1<<23 - 1)
1166         if man != 0 {
1167                 return false // not a power of 2, denormal, or NaN
1168         }
1169         exp := b >> 23 & (1<<8 - 1)
1170         // exponent bias is 0x7f.  So taking the reciprocal of a number
1171         // changes the exponent to 0xfe-exp.
1172         switch exp {
1173         case 0:
1174                 return false // ±0
1175         case 0xff:
1176                 return false // ±inf
1177         case 0xfe:
1178                 return false // exponent is not representable
1179         default:
1180                 return true
1181         }
1182 }
1183
1184 // check if an immediate can be directly encoded into an ARM's instruction
1185 func isARMImmRot(v uint32) bool {
1186         for i := 0; i < 16; i++ {
1187                 if v&^0xff == 0 {
1188                         return true
1189                 }
1190                 v = v<<2 | v>>30
1191         }
1192
1193         return false
1194 }
1195
1196 // overlap reports whether the ranges given by the given offset and
1197 // size pairs overlap.
1198 func overlap(offset1, size1, offset2, size2 int64) bool {
1199         if offset1 >= offset2 && offset2+size2 > offset1 {
1200                 return true
1201         }
1202         if offset2 >= offset1 && offset1+size1 > offset2 {
1203                 return true
1204         }
1205         return false
1206 }
1207
1208 func areAdjacentOffsets(off1, off2, size int64) bool {
1209         return off1+size == off2 || off1 == off2+size
1210 }
1211
1212 // check if value zeroes out upper 32-bit of 64-bit register.
1213 // depth limits recursion depth. In AMD64.rules 3 is used as limit,
1214 // because it catches same amount of cases as 4.
1215 func zeroUpper32Bits(x *Value, depth int) bool {
1216         switch x.Op {
1217         case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1,
1218                 OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1,
1219                 OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload,
1220                 OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL,
1221                 OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst,
1222                 OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst,
1223                 OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL,
1224                 OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst,
1225                 OpAMD64SHLL, OpAMD64SHLLconst:
1226                 return true
1227         case OpArg:
1228                 return x.Type.Width == 4
1229         case OpPhi, OpSelect0, OpSelect1:
1230                 // Phis can use each-other as an arguments, instead of tracking visited values,
1231                 // just limit recursion depth.
1232                 if depth <= 0 {
1233                         return false
1234                 }
1235                 for i := range x.Args {
1236                         if !zeroUpper32Bits(x.Args[i], depth-1) {
1237                                 return false
1238                         }
1239                 }
1240                 return true
1241
1242         }
1243         return false
1244 }
1245
1246 // zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits
1247 func zeroUpper48Bits(x *Value, depth int) bool {
1248         switch x.Op {
1249         case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2:
1250                 return true
1251         case OpArg:
1252                 return x.Type.Width == 2
1253         case OpPhi, OpSelect0, OpSelect1:
1254                 // Phis can use each-other as an arguments, instead of tracking visited values,
1255                 // just limit recursion depth.
1256                 if depth <= 0 {
1257                         return false
1258                 }
1259                 for i := range x.Args {
1260                         if !zeroUpper48Bits(x.Args[i], depth-1) {
1261                                 return false
1262                         }
1263                 }
1264                 return true
1265
1266         }
1267         return false
1268 }
1269
1270 // zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits
1271 func zeroUpper56Bits(x *Value, depth int) bool {
1272         switch x.Op {
1273         case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1:
1274                 return true
1275         case OpArg:
1276                 return x.Type.Width == 1
1277         case OpPhi, OpSelect0, OpSelect1:
1278                 // Phis can use each-other as an arguments, instead of tracking visited values,
1279                 // just limit recursion depth.
1280                 if depth <= 0 {
1281                         return false
1282                 }
1283                 for i := range x.Args {
1284                         if !zeroUpper56Bits(x.Args[i], depth-1) {
1285                                 return false
1286                         }
1287                 }
1288                 return true
1289
1290         }
1291         return false
1292 }
1293
1294 // isInlinableMemmove reports whether the given arch performs a Move of the given size
1295 // faster than memmove. It will only return true if replacing the memmove with a Move is
1296 // safe, either because Move is small or because the arguments are disjoint.
1297 // This is used as a check for replacing memmove with Move ops.
1298 func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
1299         // It is always safe to convert memmove into Move when its arguments are disjoint.
1300         // Move ops may or may not be faster for large sizes depending on how the platform
1301         // lowers them, so we only perform this optimization on platforms that we know to
1302         // have fast Move ops.
1303         switch c.arch {
1304         case "amd64":
1305                 return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz))
1306         case "386", "arm64":
1307                 return sz <= 8
1308         case "s390x", "ppc64", "ppc64le":
1309                 return sz <= 8 || disjoint(dst, sz, src, sz)
1310         case "arm", "mips", "mips64", "mipsle", "mips64le":
1311                 return sz <= 4
1312         }
1313         return false
1314 }
1315
1316 // logLargeCopy logs the occurrence of a large copy.
1317 // The best place to do this is in the rewrite rules where the size of the move is easy to find.
1318 // "Large" is arbitrarily chosen to be 128 bytes; this may change.
1319 func logLargeCopy(v *Value, s int64) bool {
1320         if s < 128 {
1321                 return true
1322         }
1323         if logopt.Enabled() {
1324                 logopt.LogOpt(v.Pos, "copy", "lower", v.Block.Func.Name, fmt.Sprintf("%d bytes", s))
1325         }
1326         return true
1327 }
1328
1329 // hasSmallRotate reports whether the architecture has rotate instructions
1330 // for sizes < 32-bit.  This is used to decide whether to promote some rotations.
1331 func hasSmallRotate(c *Config) bool {
1332         switch c.arch {
1333         case "amd64", "386":
1334                 return true
1335         default:
1336                 return false
1337         }
1338 }
1339
1340 func newPPC64ShiftAuxInt(sh, mb, me, sz int64) int32 {
1341         if sh < 0 || sh >= sz {
1342                 panic("PPC64 shift arg sh out of range")
1343         }
1344         if mb < 0 || mb >= sz {
1345                 panic("PPC64 shift arg mb out of range")
1346         }
1347         if me < 0 || me >= sz {
1348                 panic("PPC64 shift arg me out of range")
1349         }
1350         return int32(sh<<16 | mb<<8 | me)
1351 }
1352
1353 func GetPPC64Shiftsh(auxint int64) int64 {
1354         return int64(int8(auxint >> 16))
1355 }
1356
1357 func GetPPC64Shiftmb(auxint int64) int64 {
1358         return int64(int8(auxint >> 8))
1359 }
1360
1361 func GetPPC64Shiftme(auxint int64) int64 {
1362         return int64(int8(auxint))
1363 }
1364
1365 // Test if this value can encoded as a mask for a rlwinm like
1366 // operation.  Masks can also extend from the msb and wrap to
1367 // the lsb too.  That is, the valid masks are 32 bit strings
1368 // of the form: 0..01..10..0 or 1..10..01..1 or 1...1
1369 func isPPC64WordRotateMask(v64 int64) bool {
1370         // Isolate rightmost 1 (if none 0) and add.
1371         v := uint32(v64)
1372         vp := (v & -v) + v
1373         // Likewise, for the wrapping case.
1374         vn := ^v
1375         vpn := (vn & -vn) + vn
1376         return (v&vp == 0 || vn&vpn == 0) && v != 0
1377 }
1378
1379 // Compress mask and and shift into single value of the form
1380 // me | mb<<8 | rotate<<16 | nbits<<24 where me and mb can
1381 // be used to regenerate the input mask.
1382 func encodePPC64RotateMask(rotate, mask, nbits int64) int64 {
1383         var mb, me, mbn, men int
1384
1385         // Determine boundaries and then decode them
1386         if mask == 0 || ^mask == 0 || rotate >= nbits {
1387                 panic("Invalid PPC64 rotate mask")
1388         } else if nbits == 32 {
1389                 mb = bits.LeadingZeros32(uint32(mask))
1390                 me = 32 - bits.TrailingZeros32(uint32(mask))
1391                 mbn = bits.LeadingZeros32(^uint32(mask))
1392                 men = 32 - bits.TrailingZeros32(^uint32(mask))
1393         } else {
1394                 mb = bits.LeadingZeros64(uint64(mask))
1395                 me = 64 - bits.TrailingZeros64(uint64(mask))
1396                 mbn = bits.LeadingZeros64(^uint64(mask))
1397                 men = 64 - bits.TrailingZeros64(^uint64(mask))
1398         }
1399         // Check for a wrapping mask (e.g bits at 0 and 63)
1400         if mb == 0 && me == int(nbits) {
1401                 // swap the inverted values
1402                 mb, me = men, mbn
1403         }
1404
1405         return int64(me) | int64(mb<<8) | int64(rotate<<16) | int64(nbits<<24)
1406 }
1407
1408 // The inverse operation of encodePPC64RotateMask.  The values returned as
1409 // mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask.
1410 func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) {
1411         auxint := uint64(sauxint)
1412         rotate = int64((auxint >> 16) & 0xFF)
1413         mb = int64((auxint >> 8) & 0xFF)
1414         me = int64((auxint >> 0) & 0xFF)
1415         nbits := int64((auxint >> 24) & 0xFF)
1416         mask = ((1 << uint(nbits-mb)) - 1) ^ ((1 << uint(nbits-me)) - 1)
1417         if mb > me {
1418                 mask = ^mask
1419         }
1420         if nbits == 32 {
1421                 mask = uint64(uint32(mask))
1422         }
1423
1424         // Fixup ME to match ISA definition.  The second argument to MASK(..,me)
1425         // is inclusive.
1426         me = (me - 1) & (nbits - 1)
1427         return
1428 }
1429
1430 // This verifies that the mask is a set of
1431 // consecutive bits including the least
1432 // significant bit.
1433 func isPPC64ValidShiftMask(v int64) bool {
1434         if (v != 0) && ((v+1)&v) == 0 {
1435                 return true
1436         }
1437         return false
1438 }
1439
1440 func getPPC64ShiftMaskLength(v int64) int64 {
1441         return int64(bits.Len64(uint64(v)))
1442 }
1443
1444 // Decompose a shift right into an equivalent rotate/mask,
1445 // and return mask & m.
1446 func mergePPC64RShiftMask(m, s, nbits int64) int64 {
1447         smask := uint64((1<<uint(nbits))-1) >> uint(s)
1448         return m & int64(smask)
1449 }
1450
1451 // Combine (ANDconst [m] (SRWconst [s])) into (RLWINM [y]) or return 0
1452 func mergePPC64AndSrwi(m, s int64) int64 {
1453         mask := mergePPC64RShiftMask(m, s, 32)
1454         if !isPPC64WordRotateMask(mask) {
1455                 return 0
1456         }
1457         return encodePPC64RotateMask(32-s, mask, 32)
1458 }
1459
1460 // Test if a shift right feeding into a CLRLSLDI can be merged into RLWINM.
1461 // Return the encoded RLWINM constant, or 0 if they cannot be merged.
1462 func mergePPC64ClrlsldiSrw(sld, srw int64) int64 {
1463         mask_1 := uint64(0xFFFFFFFF >> uint(srw))
1464         // for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left.
1465         mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
1466
1467         // Rewrite mask to apply after the final left shift.
1468         mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
1469
1470         r_1 := 32 - srw
1471         r_2 := GetPPC64Shiftsh(sld)
1472         r_3 := (r_1 + r_2) & 31 // This can wrap.
1473
1474         if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
1475                 return 0
1476         }
1477         return encodePPC64RotateMask(int64(r_3), int64(mask_3), 32)
1478 }
1479
1480 // Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM.  Return
1481 // the encoded RLWINM constant, or 0 if they cannot be merged.
1482 func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 {
1483         r_1, _, _, mask_1 := DecodePPC64RotateMask(rlw)
1484         // for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left.
1485         mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
1486
1487         // combine the masks, and adjust for the final left shift.
1488         mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(int64(sld)))
1489         r_2 := GetPPC64Shiftsh(int64(sld))
1490         r_3 := (r_1 + r_2) & 31 // This can wrap.
1491
1492         // Verify the result is still a valid bitmask of <= 32 bits.
1493         if !isPPC64WordRotateMask(int64(mask_3)) || uint64(uint32(mask_3)) != mask_3 {
1494                 return 0
1495         }
1496         return encodePPC64RotateMask(r_3, int64(mask_3), 32)
1497 }
1498
1499 // Compute the encoded RLWINM constant from combining (SLDconst [sld] (SRWconst [srw] x)),
1500 // or return 0 if they cannot be combined.
1501 func mergePPC64SldiSrw(sld, srw int64) int64 {
1502         if sld > srw || srw >= 32 {
1503                 return 0
1504         }
1505         mask_r := uint32(0xFFFFFFFF) >> uint(srw)
1506         mask_l := uint32(0xFFFFFFFF) >> uint(sld)
1507         mask := (mask_r & mask_l) << uint(sld)
1508         return encodePPC64RotateMask((32-srw+sld)&31, int64(mask), 32)
1509 }
1510
1511 // Convenience function to rotate a 32 bit constant value by another constant.
1512 func rotateLeft32(v, rotate int64) int64 {
1513         return int64(bits.RotateLeft32(uint32(v), int(rotate)))
1514 }
1515
1516 // encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format.
1517 func armBFAuxInt(lsb, width int64) arm64BitField {
1518         if lsb < 0 || lsb > 63 {
1519                 panic("ARM(64) bit field lsb constant out of range")
1520         }
1521         if width < 1 || width > 64 {
1522                 panic("ARM(64) bit field width constant out of range")
1523         }
1524         return arm64BitField(width | lsb<<8)
1525 }
1526
1527 // returns the lsb part of the auxInt field of arm64 bitfield ops.
1528 func (bfc arm64BitField) getARM64BFlsb() int64 {
1529         return int64(uint64(bfc) >> 8)
1530 }
1531
1532 // returns the width part of the auxInt field of arm64 bitfield ops.
1533 func (bfc arm64BitField) getARM64BFwidth() int64 {
1534         return int64(bfc) & 0xff
1535 }
1536
1537 // checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask.
1538 func isARM64BFMask(lsb, mask, rshift int64) bool {
1539         shiftedMask := int64(uint64(mask) >> uint64(rshift))
1540         return shiftedMask != 0 && isPowerOfTwo64(shiftedMask+1) && nto(shiftedMask)+lsb < 64
1541 }
1542
1543 // returns the bitfield width of mask >> rshift for arm64 bitfield ops
1544 func arm64BFWidth(mask, rshift int64) int64 {
1545         shiftedMask := int64(uint64(mask) >> uint64(rshift))
1546         if shiftedMask == 0 {
1547                 panic("ARM64 BF mask is zero")
1548         }
1549         return nto(shiftedMask)
1550 }
1551
1552 // sizeof returns the size of t in bytes.
1553 // It will panic if t is not a *types.Type.
1554 func sizeof(t interface{}) int64 {
1555         return t.(*types.Type).Size()
1556 }
1557
1558 // registerizable reports whether t is a primitive type that fits in
1559 // a register. It assumes float64 values will always fit into registers
1560 // even if that isn't strictly true.
1561 func registerizable(b *Block, typ *types.Type) bool {
1562         if typ.IsPtrShaped() || typ.IsFloat() {
1563                 return true
1564         }
1565         if typ.IsInteger() {
1566                 return typ.Size() <= b.Func.Config.RegSize
1567         }
1568         return false
1569 }
1570
1571 // needRaceCleanup reports whether this call to racefuncenter/exit isn't needed.
1572 func needRaceCleanup(sym *AuxCall, v *Value) bool {
1573         f := v.Block.Func
1574         if !f.Config.Race {
1575                 return false
1576         }
1577         if !isSameCall(sym, "runtime.racefuncenter") && !isSameCall(sym, "runtime.racefuncenterfp") && !isSameCall(sym, "runtime.racefuncexit") {
1578                 return false
1579         }
1580         for _, b := range f.Blocks {
1581                 for _, v := range b.Values {
1582                         switch v.Op {
1583                         case OpStaticCall:
1584                                 // Check for racefuncenter/racefuncenterfp will encounter racefuncexit and vice versa.
1585                                 // Allow calls to panic*
1586                                 s := v.Aux.(*AuxCall).Fn.String()
1587                                 switch s {
1588                                 case "runtime.racefuncenter", "runtime.racefuncenterfp", "runtime.racefuncexit",
1589                                         "runtime.panicdivide", "runtime.panicwrap",
1590                                         "runtime.panicshift":
1591                                         continue
1592                                 }
1593                                 // If we encountered any call, we need to keep racefunc*,
1594                                 // for accurate stacktraces.
1595                                 return false
1596                         case OpPanicBounds, OpPanicExtend:
1597                                 // Note: these are panic generators that are ok (like the static calls above).
1598                         case OpClosureCall, OpInterCall:
1599                                 // We must keep the race functions if there are any other call types.
1600                                 return false
1601                         }
1602                 }
1603         }
1604         if isSameCall(sym, "runtime.racefuncenter") {
1605                 // If we're removing racefuncenter, remove its argument as well.
1606                 if v.Args[0].Op != OpStore {
1607                         return false
1608                 }
1609                 mem := v.Args[0].Args[2]
1610                 v.Args[0].reset(OpCopy)
1611                 v.Args[0].AddArg(mem)
1612         }
1613         return true
1614 }
1615
1616 // symIsRO reports whether sym is a read-only global.
1617 func symIsRO(sym interface{}) bool {
1618         lsym := sym.(*obj.LSym)
1619         return lsym.Type == objabi.SRODATA && len(lsym.R) == 0
1620 }
1621
1622 // symIsROZero reports whether sym is a read-only global whose data contains all zeros.
1623 func symIsROZero(sym Sym) bool {
1624         lsym := sym.(*obj.LSym)
1625         if lsym.Type != objabi.SRODATA || len(lsym.R) != 0 {
1626                 return false
1627         }
1628         for _, b := range lsym.P {
1629                 if b != 0 {
1630                         return false
1631                 }
1632         }
1633         return true
1634 }
1635
1636 // read8 reads one byte from the read-only global sym at offset off.
1637 func read8(sym interface{}, off int64) uint8 {
1638         lsym := sym.(*obj.LSym)
1639         if off >= int64(len(lsym.P)) || off < 0 {
1640                 // Invalid index into the global sym.
1641                 // This can happen in dead code, so we don't want to panic.
1642                 // Just return any value, it will eventually get ignored.
1643                 // See issue 29215.
1644                 return 0
1645         }
1646         return lsym.P[off]
1647 }
1648
1649 // read16 reads two bytes from the read-only global sym at offset off.
1650 func read16(sym interface{}, off int64, byteorder binary.ByteOrder) uint16 {
1651         lsym := sym.(*obj.LSym)
1652         // lsym.P is written lazily.
1653         // Bytes requested after the end of lsym.P are 0.
1654         var src []byte
1655         if 0 <= off && off < int64(len(lsym.P)) {
1656                 src = lsym.P[off:]
1657         }
1658         buf := make([]byte, 2)
1659         copy(buf, src)
1660         return byteorder.Uint16(buf)
1661 }
1662
1663 // read32 reads four bytes from the read-only global sym at offset off.
1664 func read32(sym interface{}, off int64, byteorder binary.ByteOrder) uint32 {
1665         lsym := sym.(*obj.LSym)
1666         var src []byte
1667         if 0 <= off && off < int64(len(lsym.P)) {
1668                 src = lsym.P[off:]
1669         }
1670         buf := make([]byte, 4)
1671         copy(buf, src)
1672         return byteorder.Uint32(buf)
1673 }
1674
1675 // read64 reads eight bytes from the read-only global sym at offset off.
1676 func read64(sym interface{}, off int64, byteorder binary.ByteOrder) uint64 {
1677         lsym := sym.(*obj.LSym)
1678         var src []byte
1679         if 0 <= off && off < int64(len(lsym.P)) {
1680                 src = lsym.P[off:]
1681         }
1682         buf := make([]byte, 8)
1683         copy(buf, src)
1684         return byteorder.Uint64(buf)
1685 }
1686
1687 // sequentialAddresses reports true if it can prove that x + n == y
1688 func sequentialAddresses(x, y *Value, n int64) bool {
1689         if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil &&
1690                 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1691                         x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1692                 return true
1693         }
1694         if x.Op == Op386LEAL1 && y.Op == Op386LEAL1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
1695                 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1696                         x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1697                 return true
1698         }
1699         if x.Op == OpAMD64ADDQ && y.Op == OpAMD64LEAQ1 && y.AuxInt == n && y.Aux == nil &&
1700                 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1701                         x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1702                 return true
1703         }
1704         if x.Op == OpAMD64LEAQ1 && y.Op == OpAMD64LEAQ1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
1705                 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1706                         x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1707                 return true
1708         }
1709         return false
1710 }
1711
1712 // flagConstant represents the result of a compile-time comparison.
1713 // The sense of these flags does not necessarily represent the hardware's notion
1714 // of a flags register - these are just a compile-time construct.
1715 // We happen to match the semantics to those of arm/arm64.
1716 // Note that these semantics differ from x86: the carry flag has the opposite
1717 // sense on a subtraction!
1718 //   On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C.
1719 //   On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C.
1720 //    (because it does x + ^y + C).
1721 // See https://en.wikipedia.org/wiki/Carry_flag#Vs._borrow_flag
1722 type flagConstant uint8
1723
1724 // N reports whether the result of an operation is negative (high bit set).
1725 func (fc flagConstant) N() bool {
1726         return fc&1 != 0
1727 }
1728
1729 // Z reports whether the result of an operation is 0.
1730 func (fc flagConstant) Z() bool {
1731         return fc&2 != 0
1732 }
1733
1734 // C reports whether an unsigned add overflowed (carry), or an
1735 // unsigned subtract did not underflow (borrow).
1736 func (fc flagConstant) C() bool {
1737         return fc&4 != 0
1738 }
1739
1740 // V reports whether a signed operation overflowed or underflowed.
1741 func (fc flagConstant) V() bool {
1742         return fc&8 != 0
1743 }
1744
1745 func (fc flagConstant) eq() bool {
1746         return fc.Z()
1747 }
1748 func (fc flagConstant) ne() bool {
1749         return !fc.Z()
1750 }
1751 func (fc flagConstant) lt() bool {
1752         return fc.N() != fc.V()
1753 }
1754 func (fc flagConstant) le() bool {
1755         return fc.Z() || fc.lt()
1756 }
1757 func (fc flagConstant) gt() bool {
1758         return !fc.Z() && fc.ge()
1759 }
1760 func (fc flagConstant) ge() bool {
1761         return fc.N() == fc.V()
1762 }
1763 func (fc flagConstant) ult() bool {
1764         return !fc.C()
1765 }
1766 func (fc flagConstant) ule() bool {
1767         return fc.Z() || fc.ult()
1768 }
1769 func (fc flagConstant) ugt() bool {
1770         return !fc.Z() && fc.uge()
1771 }
1772 func (fc flagConstant) uge() bool {
1773         return fc.C()
1774 }
1775
1776 func (fc flagConstant) ltNoov() bool {
1777         return fc.lt() && !fc.V()
1778 }
1779 func (fc flagConstant) leNoov() bool {
1780         return fc.le() && !fc.V()
1781 }
1782 func (fc flagConstant) gtNoov() bool {
1783         return fc.gt() && !fc.V()
1784 }
1785 func (fc flagConstant) geNoov() bool {
1786         return fc.ge() && !fc.V()
1787 }
1788
1789 func (fc flagConstant) String() string {
1790         return fmt.Sprintf("N=%v,Z=%v,C=%v,V=%v", fc.N(), fc.Z(), fc.C(), fc.V())
1791 }
1792
1793 type flagConstantBuilder struct {
1794         N bool
1795         Z bool
1796         C bool
1797         V bool
1798 }
1799
1800 func (fcs flagConstantBuilder) encode() flagConstant {
1801         var fc flagConstant
1802         if fcs.N {
1803                 fc |= 1
1804         }
1805         if fcs.Z {
1806                 fc |= 2
1807         }
1808         if fcs.C {
1809                 fc |= 4
1810         }
1811         if fcs.V {
1812                 fc |= 8
1813         }
1814         return fc
1815 }
1816
1817 // Note: addFlags(x,y) != subFlags(x,-y) in some situations:
1818 //  - the results of the C flag are different
1819 //  - the results of the V flag when y==minint are different
1820
1821 // addFlags64 returns the flags that would be set from computing x+y.
1822 func addFlags64(x, y int64) flagConstant {
1823         var fcb flagConstantBuilder
1824         fcb.Z = x+y == 0
1825         fcb.N = x+y < 0
1826         fcb.C = uint64(x+y) < uint64(x)
1827         fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
1828         return fcb.encode()
1829 }
1830
1831 // subFlags64 returns the flags that would be set from computing x-y.
1832 func subFlags64(x, y int64) flagConstant {
1833         var fcb flagConstantBuilder
1834         fcb.Z = x-y == 0
1835         fcb.N = x-y < 0
1836         fcb.C = uint64(y) <= uint64(x) // This code follows the arm carry flag model.
1837         fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
1838         return fcb.encode()
1839 }
1840
1841 // addFlags32 returns the flags that would be set from computing x+y.
1842 func addFlags32(x, y int32) flagConstant {
1843         var fcb flagConstantBuilder
1844         fcb.Z = x+y == 0
1845         fcb.N = x+y < 0
1846         fcb.C = uint32(x+y) < uint32(x)
1847         fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
1848         return fcb.encode()
1849 }
1850
1851 // subFlags32 returns the flags that would be set from computing x-y.
1852 func subFlags32(x, y int32) flagConstant {
1853         var fcb flagConstantBuilder
1854         fcb.Z = x-y == 0
1855         fcb.N = x-y < 0
1856         fcb.C = uint32(y) <= uint32(x) // This code follows the arm carry flag model.
1857         fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
1858         return fcb.encode()
1859 }
1860
1861 // logicFlags64 returns flags set to the sign/zeroness of x.
1862 // C and V are set to false.
1863 func logicFlags64(x int64) flagConstant {
1864         var fcb flagConstantBuilder
1865         fcb.Z = x == 0
1866         fcb.N = x < 0
1867         return fcb.encode()
1868 }
1869
1870 // logicFlags32 returns flags set to the sign/zeroness of x.
1871 // C and V are set to false.
1872 func logicFlags32(x int32) flagConstant {
1873         var fcb flagConstantBuilder
1874         fcb.Z = x == 0
1875         fcb.N = x < 0
1876         return fcb.encode()
1877 }