src/cmd/compile/internal/ssa/rewrite.go

   1 // Copyright 2015 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package ssa
   6
   7 import (
   8         "cmd/compile/internal/logopt"
   9         "cmd/compile/internal/types"
  10         "cmd/internal/obj"
  11         "cmd/internal/obj/s390x"
  12         "cmd/internal/objabi"
  13         "cmd/internal/src"
  14         "encoding/binary"
  15         "fmt"
  16         "io"
  17         "math"
  18         "math/bits"
  19         "os"
  20         "path/filepath"
  21 )
  22
  23 type deadValueChoice bool
  24
  25 const (
  26         leaveDeadValues  deadValueChoice = false
  27         removeDeadValues                 = true
  28 )
  29
  30 // deadcode indicates that rewrite should try to remove any values that become dead.
  31 func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValueChoice) {
  32         // repeat rewrites until we find no more rewrites
  33         pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block
  34         pendingLines.clear()
  35         debug := f.pass.debug
  36         if debug > 1 {
  37                 fmt.Printf("%s: rewriting for %s\n", f.pass.name, f.Name)
  38         }
  39         for {
  40                 change := false
  41                 for _, b := range f.Blocks {
  42                         var b0 *Block
  43                         if debug > 1 {
  44                                 b0 = new(Block)
  45                                 *b0 = *b
  46                                 b0.Succs = append([]Edge{}, b.Succs...) // make a new copy, not aliasing
  47                         }
  48                         for i, c := range b.ControlValues() {
  49                                 for c.Op == OpCopy {
  50                                         c = c.Args[0]
  51                                         b.ReplaceControl(i, c)
  52                                 }
  53                         }
  54                         if rb(b) {
  55                                 change = true
  56                                 if debug > 1 {
  57                                         fmt.Printf("rewriting %s  ->  %s\n", b0.LongString(), b.LongString())
  58                                 }
  59                         }
  60                         for j, v := range b.Values {
  61                                 var v0 *Value
  62                                 if debug > 1 {
  63                                         v0 = new(Value)
  64                                         *v0 = *v
  65                                         v0.Args = append([]*Value{}, v.Args...) // make a new copy, not aliasing
  66                                 }
  67                                 if v.Uses == 0 && v.removeable() {
  68                                         if v.Op != OpInvalid && deadcode == removeDeadValues {
  69                                                 // Reset any values that are now unused, so that we decrement
  70                                                 // the use count of all of its arguments.
  71                                                 // Not quite a deadcode pass, because it does not handle cycles.
  72                                                 // But it should help Uses==1 rules to fire.
  73                                                 v.reset(OpInvalid)
  74                                                 change = true
  75                                         }
  76                                         // No point rewriting values which aren't used.
  77                                         continue
  78                                 }
  79
  80                                 vchange := phielimValue(v)
  81                                 if vchange && debug > 1 {
  82                                         fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
  83                                 }
  84
  85                                 // Eliminate copy inputs.
  86                                 // If any copy input becomes unused, mark it
  87                                 // as invalid and discard its argument. Repeat
  88                                 // recursively on the discarded argument.
  89                                 // This phase helps remove phantom "dead copy" uses
  90                                 // of a value so that a x.Uses==1 rule condition
  91                                 // fires reliably.
  92                                 for i, a := range v.Args {
  93                                         if a.Op != OpCopy {
  94                                                 continue
  95                                         }
  96                                         aa := copySource(a)
  97                                         v.SetArg(i, aa)
  98                                         // If a, a copy, has a line boundary indicator, attempt to find a new value
  99                                         // to hold it.  The first candidate is the value that will replace a (aa),
 100                                         // if it shares the same block and line and is eligible.
 101                                         // The second option is v, which has a as an input.  Because aa is earlier in
 102                                         // the data flow, it is the better choice.
 103                                         if a.Pos.IsStmt() == src.PosIsStmt {
 104                                                 if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt {
 105                                                         aa.Pos = aa.Pos.WithIsStmt()
 106                                                 } else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt {
 107                                                         v.Pos = v.Pos.WithIsStmt()
 108                                                 } else {
 109                                                         // Record the lost line and look for a new home after all rewrites are complete.
 110                                                         // TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same
 111                                                         // line to appear in more than one block, but only one block is stored, so if both end
 112                                                         // up here, then one will be lost.
 113                                                         pendingLines.set(a.Pos, int32(a.Block.ID))
 114                                                 }
 115                                                 a.Pos = a.Pos.WithNotStmt()
 116                                         }
 117                                         vchange = true
 118                                         for a.Uses == 0 {
 119                                                 b := a.Args[0]
 120                                                 a.reset(OpInvalid)
 121                                                 a = b
 122                                         }
 123                                 }
 124                                 if vchange && debug > 1 {
 125                                         fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
 126                                 }
 127
 128                                 // apply rewrite function
 129                                 if rv(v) {
 130                                         vchange = true
 131                                         // If value changed to a poor choice for a statement boundary, move the boundary
 132                                         if v.Pos.IsStmt() == src.PosIsStmt {
 133                                                 if k := nextGoodStatementIndex(v, j, b); k != j {
 134                                                         v.Pos = v.Pos.WithNotStmt()
 135                                                         b.Values[k].Pos = b.Values[k].Pos.WithIsStmt()
 136                                                 }
 137                                         }
 138                                 }
 139
 140                                 change = change || vchange
 141                                 if vchange && debug > 1 {
 142                                         fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
 143                                 }
 144                         }
 145                 }
 146                 if !change {
 147                         break
 148                 }
 149         }
 150         // remove clobbered values
 151         for _, b := range f.Blocks {
 152                 j := 0
 153                 for i, v := range b.Values {
 154                         vl := v.Pos
 155                         if v.Op == OpInvalid {
 156                                 if v.Pos.IsStmt() == src.PosIsStmt {
 157                                         pendingLines.set(vl, int32(b.ID))
 158                                 }
 159                                 f.freeValue(v)
 160                                 continue
 161                         }
 162                         if v.Pos.IsStmt() != src.PosNotStmt && pendingLines.get(vl) == int32(b.ID) {
 163                                 pendingLines.remove(vl)
 164                                 v.Pos = v.Pos.WithIsStmt()
 165                         }
 166                         if i != j {
 167                                 b.Values[j] = v
 168                         }
 169                         j++
 170                 }
 171                 if pendingLines.get(b.Pos) == int32(b.ID) {
 172                         b.Pos = b.Pos.WithIsStmt()
 173                         pendingLines.remove(b.Pos)
 174                 }
 175                 b.truncateValues(j)
 176         }
 177 }
 178
 179 // Common functions called from rewriting rules
 180
 181 func is64BitFloat(t *types.Type) bool {
 182         return t.Size() == 8 && t.IsFloat()
 183 }
 184
 185 func is32BitFloat(t *types.Type) bool {
 186         return t.Size() == 4 && t.IsFloat()
 187 }
 188
 189 func is64BitInt(t *types.Type) bool {
 190         return t.Size() == 8 && t.IsInteger()
 191 }
 192
 193 func is32BitInt(t *types.Type) bool {
 194         return t.Size() == 4 && t.IsInteger()
 195 }
 196
 197 func is16BitInt(t *types.Type) bool {
 198         return t.Size() == 2 && t.IsInteger()
 199 }
 200
 201 func is8BitInt(t *types.Type) bool {
 202         return t.Size() == 1 && t.IsInteger()
 203 }
 204
 205 func isPtr(t *types.Type) bool {
 206         return t.IsPtrShaped()
 207 }
 208
 209 func isSigned(t *types.Type) bool {
 210         return t.IsSigned()
 211 }
 212
 213 // mergeSym merges two symbolic offsets. There is no real merging of
 214 // offsets, we just pick the non-nil one.
 215 func mergeSym(x, y Sym) Sym {
 216         if x == nil {
 217                 return y
 218         }
 219         if y == nil {
 220                 return x
 221         }
 222         panic(fmt.Sprintf("mergeSym with two non-nil syms %v %v", x, y))
 223 }
 224
 225 func canMergeSym(x, y Sym) bool {
 226         return x == nil || y == nil
 227 }
 228
 229 // canMergeLoadClobber reports whether the load can be merged into target without
 230 // invalidating the schedule.
 231 // It also checks that the other non-load argument x is something we
 232 // are ok with clobbering.
 233 func canMergeLoadClobber(target, load, x *Value) bool {
 234         // The register containing x is going to get clobbered.
 235         // Don't merge if we still need the value of x.
 236         // We don't have liveness information here, but we can
 237         // approximate x dying with:
 238         //  1) target is x's only use.
 239         //  2) target is not in a deeper loop than x.
 240         if x.Uses != 1 {
 241                 return false
 242         }
 243         loopnest := x.Block.Func.loopnest()
 244         loopnest.calculateDepths()
 245         if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
 246                 return false
 247         }
 248         return canMergeLoad(target, load)
 249 }
 250
 251 // canMergeLoad reports whether the load can be merged into target without
 252 // invalidating the schedule.
 253 func canMergeLoad(target, load *Value) bool {
 254         if target.Block.ID != load.Block.ID {
 255                 // If the load is in a different block do not merge it.
 256                 return false
 257         }
 258
 259         // We can't merge the load into the target if the load
 260         // has more than one use.
 261         if load.Uses != 1 {
 262                 return false
 263         }
 264
 265         mem := load.MemoryArg()
 266
 267         // We need the load's memory arg to still be alive at target. That
 268         // can't be the case if one of target's args depends on a memory
 269         // state that is a successor of load's memory arg.
 270         //
 271         // For example, it would be invalid to merge load into target in
 272         // the following situation because newmem has killed oldmem
 273         // before target is reached:
 274         //     load = read ... oldmem
 275         //   newmem = write ... oldmem
 276         //     arg0 = read ... newmem
 277         //   target = add arg0 load
 278         //
 279         // If the argument comes from a different block then we can exclude
 280         // it immediately because it must dominate load (which is in the
 281         // same block as target).
 282         var args []*Value
 283         for _, a := range target.Args {
 284                 if a != load && a.Block.ID == target.Block.ID {
 285                         args = append(args, a)
 286                 }
 287         }
 288
 289         // memPreds contains memory states known to be predecessors of load's
 290         // memory state. It is lazily initialized.
 291         var memPreds map[*Value]bool
 292         for i := 0; len(args) > 0; i++ {
 293                 const limit = 100
 294                 if i >= limit {
 295                         // Give up if we have done a lot of iterations.
 296                         return false
 297                 }
 298                 v := args[len(args)-1]
 299                 args = args[:len(args)-1]
 300                 if target.Block.ID != v.Block.ID {
 301                         // Since target and load are in the same block
 302                         // we can stop searching when we leave the block.
 303                         continue
 304                 }
 305                 if v.Op == OpPhi {
 306                         // A Phi implies we have reached the top of the block.
 307                         // The memory phi, if it exists, is always
 308                         // the first logical store in the block.
 309                         continue
 310                 }
 311                 if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
 312                         // We could handle this situation however it is likely
 313                         // to be very rare.
 314                         return false
 315                 }
 316                 if v.Op.SymEffect()&SymAddr != 0 {
 317                         // This case prevents an operation that calculates the
 318                         // address of a local variable from being forced to schedule
 319                         // before its corresponding VarDef.
 320                         // See issue 28445.
 321                         //   v1 = LOAD ...
 322                         //   v2 = VARDEF
 323                         //   v3 = LEAQ
 324                         //   v4 = CMPQ v1 v3
 325                         // We don't want to combine the CMPQ with the load, because
 326                         // that would force the CMPQ to schedule before the VARDEF, which
 327                         // in turn requires the LEAQ to schedule before the VARDEF.
 328                         return false
 329                 }
 330                 if v.Type.IsMemory() {
 331                         if memPreds == nil {
 332                                 // Initialise a map containing memory states
 333                                 // known to be predecessors of load's memory
 334                                 // state.
 335                                 memPreds = make(map[*Value]bool)
 336                                 m := mem
 337                                 const limit = 50
 338                                 for i := 0; i < limit; i++ {
 339                                         if m.Op == OpPhi {
 340                                                 // The memory phi, if it exists, is always
 341                                                 // the first logical store in the block.
 342                                                 break
 343                                         }
 344                                         if m.Block.ID != target.Block.ID {
 345                                                 break
 346                                         }
 347                                         if !m.Type.IsMemory() {
 348                                                 break
 349                                         }
 350                                         memPreds[m] = true
 351                                         if len(m.Args) == 0 {
 352                                                 break
 353                                         }
 354                                         m = m.MemoryArg()
 355                                 }
 356                         }
 357
 358                         // We can merge if v is a predecessor of mem.
 359                         //
 360                         // For example, we can merge load into target in the
 361                         // following scenario:
 362                         //      x = read ... v
 363                         //    mem = write ... v
 364                         //   load = read ... mem
 365                         // target = add x load
 366                         if memPreds[v] {
 367                                 continue
 368                         }
 369                         return false
 370                 }
 371                 if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
 372                         // If v takes mem as an input then we know mem
 373                         // is valid at this point.
 374                         continue
 375                 }
 376                 for _, a := range v.Args {
 377                         if target.Block.ID == a.Block.ID {
 378                                 args = append(args, a)
 379                         }
 380                 }
 381         }
 382
 383         return true
 384 }
 385
 386 // isSameCall reports whether sym is the same as the given named symbol
 387 func isSameCall(sym interface{}, name string) bool {
 388         fn := sym.(*AuxCall).Fn
 389         return fn != nil && fn.String() == name
 390 }
 391
 392 // nlz returns the number of leading zeros.
 393 func nlz64(x int64) int { return bits.LeadingZeros64(uint64(x)) }
 394 func nlz32(x int32) int { return bits.LeadingZeros32(uint32(x)) }
 395 func nlz16(x int16) int { return bits.LeadingZeros16(uint16(x)) }
 396 func nlz8(x int8) int   { return bits.LeadingZeros8(uint8(x)) }
 397
 398 // ntzX returns the number of trailing zeros.
 399 func ntz64(x int64) int { return bits.TrailingZeros64(uint64(x)) }
 400 func ntz32(x int32) int { return bits.TrailingZeros32(uint32(x)) }
 401 func ntz16(x int16) int { return bits.TrailingZeros16(uint16(x)) }
 402 func ntz8(x int8) int   { return bits.TrailingZeros8(uint8(x)) }
 403
 404 func oneBit(x int64) bool   { return x&(x-1) == 0 && x != 0 }
 405 func oneBit8(x int8) bool   { return x&(x-1) == 0 && x != 0 }
 406 func oneBit16(x int16) bool { return x&(x-1) == 0 && x != 0 }
 407 func oneBit32(x int32) bool { return x&(x-1) == 0 && x != 0 }
 408 func oneBit64(x int64) bool { return x&(x-1) == 0 && x != 0 }
 409
 410 // nto returns the number of trailing ones.
 411 func nto(x int64) int64 {
 412         return int64(ntz64(^x))
 413 }
 414
 415 // logX returns logarithm of n base 2.
 416 // n must be a positive power of 2 (isPowerOfTwoX returns true).
 417 func log8(n int8) int64 {
 418         return int64(bits.Len8(uint8(n))) - 1
 419 }
 420 func log16(n int16) int64 {
 421         return int64(bits.Len16(uint16(n))) - 1
 422 }
 423 func log32(n int32) int64 {
 424         return int64(bits.Len32(uint32(n))) - 1
 425 }
 426 func log64(n int64) int64 {
 427         return int64(bits.Len64(uint64(n))) - 1
 428 }
 429
 430 // log2uint32 returns logarithm in base 2 of uint32(n), with log2(0) = -1.
 431 // Rounds down.
 432 func log2uint32(n int64) int64 {
 433         return int64(bits.Len32(uint32(n))) - 1
 434 }
 435
 436 // isPowerOfTwo functions report whether n is a power of 2.
 437 func isPowerOfTwo8(n int8) bool {
 438         return n > 0 && n&(n-1) == 0
 439 }
 440 func isPowerOfTwo16(n int16) bool {
 441         return n > 0 && n&(n-1) == 0
 442 }
 443 func isPowerOfTwo32(n int32) bool {
 444         return n > 0 && n&(n-1) == 0
 445 }
 446 func isPowerOfTwo64(n int64) bool {
 447         return n > 0 && n&(n-1) == 0
 448 }
 449
 450 // isUint64PowerOfTwo reports whether uint64(n) is a power of 2.
 451 func isUint64PowerOfTwo(in int64) bool {
 452         n := uint64(in)
 453         return n > 0 && n&(n-1) == 0
 454 }
 455
 456 // isUint32PowerOfTwo reports whether uint32(n) is a power of 2.
 457 func isUint32PowerOfTwo(in int64) bool {
 458         n := uint64(uint32(in))
 459         return n > 0 && n&(n-1) == 0
 460 }
 461
 462 // is32Bit reports whether n can be represented as a signed 32 bit integer.
 463 func is32Bit(n int64) bool {
 464         return n == int64(int32(n))
 465 }
 466
 467 // is16Bit reports whether n can be represented as a signed 16 bit integer.
 468 func is16Bit(n int64) bool {
 469         return n == int64(int16(n))
 470 }
 471
 472 // is8Bit reports whether n can be represented as a signed 8 bit integer.
 473 func is8Bit(n int64) bool {
 474         return n == int64(int8(n))
 475 }
 476
 477 // isU8Bit reports whether n can be represented as an unsigned 8 bit integer.
 478 func isU8Bit(n int64) bool {
 479         return n == int64(uint8(n))
 480 }
 481
 482 // isU12Bit reports whether n can be represented as an unsigned 12 bit integer.
 483 func isU12Bit(n int64) bool {
 484         return 0 <= n && n < (1<<12)
 485 }
 486
 487 // isU16Bit reports whether n can be represented as an unsigned 16 bit integer.
 488 func isU16Bit(n int64) bool {
 489         return n == int64(uint16(n))
 490 }
 491
 492 // isU32Bit reports whether n can be represented as an unsigned 32 bit integer.
 493 func isU32Bit(n int64) bool {
 494         return n == int64(uint32(n))
 495 }
 496
 497 // is20Bit reports whether n can be represented as a signed 20 bit integer.
 498 func is20Bit(n int64) bool {
 499         return -(1<<19) <= n && n < (1<<19)
 500 }
 501
 502 // b2i translates a boolean value to 0 or 1 for assigning to auxInt.
 503 func b2i(b bool) int64 {
 504         if b {
 505                 return 1
 506         }
 507         return 0
 508 }
 509
 510 // b2i32 translates a boolean value to 0 or 1.
 511 func b2i32(b bool) int32 {
 512         if b {
 513                 return 1
 514         }
 515         return 0
 516 }
 517
 518 // shiftIsBounded reports whether (left/right) shift Value v is known to be bounded.
 519 // A shift is bounded if it is shifting by less than the width of the shifted value.
 520 func shiftIsBounded(v *Value) bool {
 521         return v.AuxInt != 0
 522 }
 523
 524 // truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern
 525 // of the mantissa. It will panic if the truncation results in lost information.
 526 func truncate64Fto32F(f float64) float32 {
 527         if !isExactFloat32(f) {
 528                 panic("truncate64Fto32F: truncation is not exact")
 529         }
 530         if !math.IsNaN(f) {
 531                 return float32(f)
 532         }
 533         // NaN bit patterns aren't necessarily preserved across conversion
 534         // instructions so we need to do the conversion manually.
 535         b := math.Float64bits(f)
 536         m := b & ((1 << 52) - 1) // mantissa (a.k.a. significand)
 537         //          | sign                  | exponent   | mantissa       |
 538         r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23)))
 539         return math.Float32frombits(r)
 540 }
 541
 542 // extend32Fto64F converts a float32 value to a float64 value preserving the bit
 543 // pattern of the mantissa.
 544 func extend32Fto64F(f float32) float64 {
 545         if !math.IsNaN(float64(f)) {
 546                 return float64(f)
 547         }
 548         // NaN bit patterns aren't necessarily preserved across conversion
 549         // instructions so we need to do the conversion manually.
 550         b := uint64(math.Float32bits(f))
 551         //   | sign                  | exponent      | mantissa                    |
 552         r := ((b << 32) & (1 << 63)) | (0x7ff << 52) | ((b & 0x7fffff) << (52 - 23))
 553         return math.Float64frombits(r)
 554 }
 555
 556 // DivisionNeedsFixUp reports whether the division needs fix-up code.
 557 func DivisionNeedsFixUp(v *Value) bool {
 558         return v.AuxInt == 0
 559 }
 560
 561 // auxFrom64F encodes a float64 value so it can be stored in an AuxInt.
 562 func auxFrom64F(f float64) int64 {
 563         if f != f {
 564                 panic("can't encode a NaN in AuxInt field")
 565         }
 566         return int64(math.Float64bits(f))
 567 }
 568
 569 // auxFrom32F encodes a float32 value so it can be stored in an AuxInt.
 570 func auxFrom32F(f float32) int64 {
 571         if f != f {
 572                 panic("can't encode a NaN in AuxInt field")
 573         }
 574         return int64(math.Float64bits(extend32Fto64F(f)))
 575 }
 576
 577 // auxTo32F decodes a float32 from the AuxInt value provided.
 578 func auxTo32F(i int64) float32 {
 579         return truncate64Fto32F(math.Float64frombits(uint64(i)))
 580 }
 581
 582 // auxTo64F decodes a float64 from the AuxInt value provided.
 583 func auxTo64F(i int64) float64 {
 584         return math.Float64frombits(uint64(i))
 585 }
 586
 587 func auxIntToBool(i int64) bool {
 588         if i == 0 {
 589                 return false
 590         }
 591         return true
 592 }
 593 func auxIntToInt8(i int64) int8 {
 594         return int8(i)
 595 }
 596 func auxIntToInt16(i int64) int16 {
 597         return int16(i)
 598 }
 599 func auxIntToInt32(i int64) int32 {
 600         return int32(i)
 601 }
 602 func auxIntToInt64(i int64) int64 {
 603         return i
 604 }
 605 func auxIntToUint8(i int64) uint8 {
 606         return uint8(i)
 607 }
 608 func auxIntToFloat32(i int64) float32 {
 609         return float32(math.Float64frombits(uint64(i)))
 610 }
 611 func auxIntToFloat64(i int64) float64 {
 612         return math.Float64frombits(uint64(i))
 613 }
 614 func auxIntToValAndOff(i int64) ValAndOff {
 615         return ValAndOff(i)
 616 }
 617 func auxIntToArm64BitField(i int64) arm64BitField {
 618         return arm64BitField(i)
 619 }
 620 func auxIntToInt128(x int64) int128 {
 621         if x != 0 {
 622                 panic("nonzero int128 not allowed")
 623         }
 624         return 0
 625 }
 626 func auxIntToFlagConstant(x int64) flagConstant {
 627         return flagConstant(x)
 628 }
 629
 630 func auxIntToOp(cc int64) Op {
 631         return Op(cc)
 632 }
 633
 634 func boolToAuxInt(b bool) int64 {
 635         if b {
 636                 return 1
 637         }
 638         return 0
 639 }
 640 func int8ToAuxInt(i int8) int64 {
 641         return int64(i)
 642 }
 643 func int16ToAuxInt(i int16) int64 {
 644         return int64(i)
 645 }
 646 func int32ToAuxInt(i int32) int64 {
 647         return int64(i)
 648 }
 649 func int64ToAuxInt(i int64) int64 {
 650         return int64(i)
 651 }
 652 func uint8ToAuxInt(i uint8) int64 {
 653         return int64(int8(i))
 654 }
 655 func float32ToAuxInt(f float32) int64 {
 656         return int64(math.Float64bits(float64(f)))
 657 }
 658 func float64ToAuxInt(f float64) int64 {
 659         return int64(math.Float64bits(f))
 660 }
 661 func valAndOffToAuxInt(v ValAndOff) int64 {
 662         return int64(v)
 663 }
 664 func arm64BitFieldToAuxInt(v arm64BitField) int64 {
 665         return int64(v)
 666 }
 667 func int128ToAuxInt(x int128) int64 {
 668         if x != 0 {
 669                 panic("nonzero int128 not allowed")
 670         }
 671         return 0
 672 }
 673 func flagConstantToAuxInt(x flagConstant) int64 {
 674         return int64(x)
 675 }
 676
 677 func opToAuxInt(o Op) int64 {
 678         return int64(o)
 679 }
 680
 681 // Aux is an interface to hold miscellaneous data in Blocks and Values.
 682 type Aux interface {
 683         CanBeAnSSAAux()
 684 }
 685
 686 // stringAux wraps string values for use in Aux.
 687 type stringAux string
 688
 689 func (stringAux) CanBeAnSSAAux() {}
 690
 691 func auxToString(i Aux) string {
 692         return string(i.(stringAux))
 693 }
 694 func auxToSym(i Aux) Sym {
 695         // TODO: kind of a hack - allows nil interface through
 696         s, _ := i.(Sym)
 697         return s
 698 }
 699 func auxToType(i Aux) *types.Type {
 700         return i.(*types.Type)
 701 }
 702 func auxToCall(i Aux) *AuxCall {
 703         return i.(*AuxCall)
 704 }
 705 func auxToS390xCCMask(i Aux) s390x.CCMask {
 706         return i.(s390x.CCMask)
 707 }
 708 func auxToS390xRotateParams(i Aux) s390x.RotateParams {
 709         return i.(s390x.RotateParams)
 710 }
 711
 712 func StringToAux(s string) Aux {
 713         return stringAux(s)
 714 }
 715 func symToAux(s Sym) Aux {
 716         return s
 717 }
 718 func callToAux(s *AuxCall) Aux {
 719         return s
 720 }
 721 func typeToAux(t *types.Type) Aux {
 722         return t
 723 }
 724 func s390xCCMaskToAux(c s390x.CCMask) Aux {
 725         return c
 726 }
 727 func s390xRotateParamsToAux(r s390x.RotateParams) Aux {
 728         return r
 729 }
 730
 731 // uaddOvf reports whether unsigned a+b would overflow.
 732 func uaddOvf(a, b int64) bool {
 733         return uint64(a)+uint64(b) < uint64(a)
 734 }
 735
 736 // de-virtualize an InterCall
 737 // 'sym' is the symbol for the itab
 738 func devirt(v *Value, aux Aux, sym Sym, offset int64) *AuxCall {
 739         f := v.Block.Func
 740         n, ok := sym.(*obj.LSym)
 741         if !ok {
 742                 return nil
 743         }
 744         lsym := f.fe.DerefItab(n, offset)
 745         if f.pass.debug > 0 {
 746                 if lsym != nil {
 747                         f.Warnl(v.Pos, "de-virtualizing call")
 748                 } else {
 749                         f.Warnl(v.Pos, "couldn't de-virtualize call")
 750                 }
 751         }
 752         if lsym == nil {
 753                 return nil
 754         }
 755         va := aux.(*AuxCall)
 756         return StaticAuxCall(lsym, va.args, va.results)
 757 }
 758
 759 // de-virtualize an InterLECall
 760 // 'sym' is the symbol for the itab
 761 func devirtLESym(v *Value, aux Aux, sym Sym, offset int64) *obj.LSym {
 762         n, ok := sym.(*obj.LSym)
 763         if !ok {
 764                 return nil
 765         }
 766
 767         f := v.Block.Func
 768         lsym := f.fe.DerefItab(n, offset)
 769         if f.pass.debug > 0 {
 770                 if lsym != nil {
 771                         f.Warnl(v.Pos, "de-virtualizing call")
 772                 } else {
 773                         f.Warnl(v.Pos, "couldn't de-virtualize call")
 774                 }
 775         }
 776         if lsym == nil {
 777                 return nil
 778         }
 779         return lsym
 780 }
 781
 782 func devirtLECall(v *Value, sym *obj.LSym) *Value {
 783         v.Op = OpStaticLECall
 784         v.Aux.(*AuxCall).Fn = sym
 785         v.RemoveArg(0)
 786         return v
 787 }
 788
 789 // isSamePtr reports whether p1 and p2 point to the same address.
 790 func isSamePtr(p1, p2 *Value) bool {
 791         if p1 == p2 {
 792                 return true
 793         }
 794         if p1.Op != p2.Op {
 795                 return false
 796         }
 797         switch p1.Op {
 798         case OpOffPtr:
 799                 return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
 800         case OpAddr, OpLocalAddr:
 801                 // OpAddr's 0th arg is either OpSP or OpSB, which means that it is uniquely identified by its Op.
 802                 // Checking for value equality only works after [z]cse has run.
 803                 return p1.Aux == p2.Aux && p1.Args[0].Op == p2.Args[0].Op
 804         case OpAddPtr:
 805                 return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
 806         }
 807         return false
 808 }
 809
 810 func isStackPtr(v *Value) bool {
 811         for v.Op == OpOffPtr || v.Op == OpAddPtr {
 812                 v = v.Args[0]
 813         }
 814         return v.Op == OpSP || v.Op == OpLocalAddr
 815 }
 816
 817 // disjoint reports whether the memory region specified by [p1:p1+n1)
 818 // does not overlap with [p2:p2+n2).
 819 // A return value of false does not imply the regions overlap.
 820 func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool {
 821         if n1 == 0 || n2 == 0 {
 822                 return true
 823         }
 824         if p1 == p2 {
 825                 return false
 826         }
 827         baseAndOffset := func(ptr *Value) (base *Value, offset int64) {
 828                 base, offset = ptr, 0
 829                 for base.Op == OpOffPtr {
 830                         offset += base.AuxInt
 831                         base = base.Args[0]
 832                 }
 833                 return base, offset
 834         }
 835         p1, off1 := baseAndOffset(p1)
 836         p2, off2 := baseAndOffset(p2)
 837         if isSamePtr(p1, p2) {
 838                 return !overlap(off1, n1, off2, n2)
 839         }
 840         // p1 and p2 are not the same, so if they are both OpAddrs then
 841         // they point to different variables.
 842         // If one pointer is on the stack and the other is an argument
 843         // then they can't overlap.
 844         switch p1.Op {
 845         case OpAddr, OpLocalAddr:
 846                 if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP {
 847                         return true
 848                 }
 849                 return p2.Op == OpArg && p1.Args[0].Op == OpSP
 850         case OpArg:
 851                 if p2.Op == OpSP || p2.Op == OpLocalAddr {
 852                         return true
 853                 }
 854         case OpSP:
 855                 return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpSP
 856         }
 857         return false
 858 }
 859
 860 // moveSize returns the number of bytes an aligned MOV instruction moves
 861 func moveSize(align int64, c *Config) int64 {
 862         switch {
 863         case align%8 == 0 && c.PtrSize == 8:
 864                 return 8
 865         case align%4 == 0:
 866                 return 4
 867         case align%2 == 0:
 868                 return 2
 869         }
 870         return 1
 871 }
 872
 873 // mergePoint finds a block among a's blocks which dominates b and is itself
 874 // dominated by all of a's blocks. Returns nil if it can't find one.
 875 // Might return nil even if one does exist.
 876 func mergePoint(b *Block, a ...*Value) *Block {
 877         // Walk backward from b looking for one of the a's blocks.
 878
 879         // Max distance
 880         d := 100
 881
 882         for d > 0 {
 883                 for _, x := range a {
 884                         if b == x.Block {
 885                                 goto found
 886                         }
 887                 }
 888                 if len(b.Preds) > 1 {
 889                         // Don't know which way to go back. Abort.
 890                         return nil
 891                 }
 892                 b = b.Preds[0].b
 893                 d--
 894         }
 895         return nil // too far away
 896 found:
 897         // At this point, r is the first value in a that we find by walking backwards.
 898         // if we return anything, r will be it.
 899         r := b
 900
 901         // Keep going, counting the other a's that we find. They must all dominate r.
 902         na := 0
 903         for d > 0 {
 904                 for _, x := range a {
 905                         if b == x.Block {
 906                                 na++
 907                         }
 908                 }
 909                 if na == len(a) {
 910                         // Found all of a in a backwards walk. We can return r.
 911                         return r
 912                 }
 913                 if len(b.Preds) > 1 {
 914                         return nil
 915                 }
 916                 b = b.Preds[0].b
 917                 d--
 918
 919         }
 920         return nil // too far away
 921 }
 922
 923 // clobber invalidates values. Returns true.
 924 // clobber is used by rewrite rules to:
 925 //   A) make sure the values are really dead and never used again.
 926 //   B) decrement use counts of the values' args.
 927 func clobber(vv ...*Value) bool {
 928         for _, v := range vv {
 929                 v.reset(OpInvalid)
 930                 // Note: leave v.Block intact.  The Block field is used after clobber.
 931         }
 932         return true
 933 }
 934
 935 // clobberIfDead resets v when use count is 1. Returns true.
 936 // clobberIfDead is used by rewrite rules to decrement
 937 // use counts of v's args when v is dead and never used.
 938 func clobberIfDead(v *Value) bool {
 939         if v.Uses == 1 {
 940                 v.reset(OpInvalid)
 941         }
 942         // Note: leave v.Block intact.  The Block field is used after clobberIfDead.
 943         return true
 944 }
 945
 946 // noteRule is an easy way to track if a rule is matched when writing
 947 // new ones.  Make the rule of interest also conditional on
 948 //     noteRule("note to self: rule of interest matched")
 949 // and that message will print when the rule matches.
 950 func noteRule(s string) bool {
 951         fmt.Println(s)
 952         return true
 953 }
 954
 955 // countRule increments Func.ruleMatches[key].
 956 // If Func.ruleMatches is non-nil at the end
 957 // of compilation, it will be printed to stdout.
 958 // This is intended to make it easier to find which functions
 959 // which contain lots of rules matches when developing new rules.
 960 func countRule(v *Value, key string) bool {
 961         f := v.Block.Func
 962         if f.ruleMatches == nil {
 963                 f.ruleMatches = make(map[string]int)
 964         }
 965         f.ruleMatches[key]++
 966         return true
 967 }
 968
 969 // warnRule generates compiler debug output with string s when
 970 // v is not in autogenerated code, cond is true and the rule has fired.
 971 func warnRule(cond bool, v *Value, s string) bool {
 972         if pos := v.Pos; pos.Line() > 1 && cond {
 973                 v.Block.Func.Warnl(pos, s)
 974         }
 975         return true
 976 }
 977
 978 // for a pseudo-op like (LessThan x), extract x
 979 func flagArg(v *Value) *Value {
 980         if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() {
 981                 return nil
 982         }
 983         return v.Args[0]
 984 }
 985
 986 // arm64Negate finds the complement to an ARM64 condition code,
 987 // for example Equal -> NotEqual or LessThan -> GreaterEqual
 988 //
 989 // TODO: add floating-point conditions
 990 func arm64Negate(op Op) Op {
 991         switch op {
 992         case OpARM64LessThan:
 993                 return OpARM64GreaterEqual
 994         case OpARM64LessThanU:
 995                 return OpARM64GreaterEqualU
 996         case OpARM64GreaterThan:
 997                 return OpARM64LessEqual
 998         case OpARM64GreaterThanU:
 999                 return OpARM64LessEqualU
1000         case OpARM64LessEqual:
1001                 return OpARM64GreaterThan
1002         case OpARM64LessEqualU:
1003                 return OpARM64GreaterThanU
1004         case OpARM64GreaterEqual:
1005                 return OpARM64LessThan
1006         case OpARM64GreaterEqualU:
1007                 return OpARM64LessThanU
1008         case OpARM64Equal:
1009                 return OpARM64NotEqual
1010         case OpARM64NotEqual:
1011                 return OpARM64Equal
1012         case OpARM64LessThanF:
1013                 return OpARM64GreaterEqualF
1014         case OpARM64GreaterThanF:
1015                 return OpARM64LessEqualF
1016         case OpARM64LessEqualF:
1017                 return OpARM64GreaterThanF
1018         case OpARM64GreaterEqualF:
1019                 return OpARM64LessThanF
1020         default:
1021                 panic("unreachable")
1022         }
1023 }
1024
1025 // arm64Invert evaluates (InvertFlags op), which
1026 // is the same as altering the condition codes such
1027 // that the same result would be produced if the arguments
1028 // to the flag-generating instruction were reversed, e.g.
1029 // (InvertFlags (CMP x y)) -> (CMP y x)
1030 //
1031 // TODO: add floating-point conditions
1032 func arm64Invert(op Op) Op {
1033         switch op {
1034         case OpARM64LessThan:
1035                 return OpARM64GreaterThan
1036         case OpARM64LessThanU:
1037                 return OpARM64GreaterThanU
1038         case OpARM64GreaterThan:
1039                 return OpARM64LessThan
1040         case OpARM64GreaterThanU:
1041                 return OpARM64LessThanU
1042         case OpARM64LessEqual:
1043                 return OpARM64GreaterEqual
1044         case OpARM64LessEqualU:
1045                 return OpARM64GreaterEqualU
1046         case OpARM64GreaterEqual:
1047                 return OpARM64LessEqual
1048         case OpARM64GreaterEqualU:
1049                 return OpARM64LessEqualU
1050         case OpARM64Equal, OpARM64NotEqual:
1051                 return op
1052         case OpARM64LessThanF:
1053                 return OpARM64GreaterThanF
1054         case OpARM64GreaterThanF:
1055                 return OpARM64LessThanF
1056         case OpARM64LessEqualF:
1057                 return OpARM64GreaterEqualF
1058         case OpARM64GreaterEqualF:
1059                 return OpARM64LessEqualF
1060         default:
1061                 panic("unreachable")
1062         }
1063 }
1064
1065 // evaluate an ARM64 op against a flags value
1066 // that is potentially constant; return 1 for true,
1067 // -1 for false, and 0 for not constant.
1068 func ccARM64Eval(op Op, flags *Value) int {
1069         fop := flags.Op
1070         if fop == OpARM64InvertFlags {
1071                 return -ccARM64Eval(op, flags.Args[0])
1072         }
1073         if fop != OpARM64FlagConstant {
1074                 return 0
1075         }
1076         fc := flagConstant(flags.AuxInt)
1077         b2i := func(b bool) int {
1078                 if b {
1079                         return 1
1080                 }
1081                 return -1
1082         }
1083         switch op {
1084         case OpARM64Equal:
1085                 return b2i(fc.eq())
1086         case OpARM64NotEqual:
1087                 return b2i(fc.ne())
1088         case OpARM64LessThan:
1089                 return b2i(fc.lt())
1090         case OpARM64LessThanU:
1091                 return b2i(fc.ult())
1092         case OpARM64GreaterThan:
1093                 return b2i(fc.gt())
1094         case OpARM64GreaterThanU:
1095                 return b2i(fc.ugt())
1096         case OpARM64LessEqual:
1097                 return b2i(fc.le())
1098         case OpARM64LessEqualU:
1099                 return b2i(fc.ule())
1100         case OpARM64GreaterEqual:
1101                 return b2i(fc.ge())
1102         case OpARM64GreaterEqualU:
1103                 return b2i(fc.uge())
1104         }
1105         return 0
1106 }
1107
1108 // logRule logs the use of the rule s. This will only be enabled if
1109 // rewrite rules were generated with the -log option, see gen/rulegen.go.
1110 func logRule(s string) {
1111         if ruleFile == nil {
1112                 // Open a log file to write log to. We open in append
1113                 // mode because all.bash runs the compiler lots of times,
1114                 // and we want the concatenation of all of those logs.
1115                 // This means, of course, that users need to rm the old log
1116                 // to get fresh data.
1117                 // TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow?
1118                 w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"),
1119                         os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
1120                 if err != nil {
1121                         panic(err)
1122                 }
1123                 ruleFile = w
1124         }
1125         _, err := fmt.Fprintln(ruleFile, s)
1126         if err != nil {
1127                 panic(err)
1128         }
1129 }
1130
1131 var ruleFile io.Writer
1132
1133 func min(x, y int64) int64 {
1134         if x < y {
1135                 return x
1136         }
1137         return y
1138 }
1139
1140 func isConstZero(v *Value) bool {
1141         switch v.Op {
1142         case OpConstNil:
1143                 return true
1144         case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F:
1145                 return v.AuxInt == 0
1146         }
1147         return false
1148 }
1149
1150 // reciprocalExact64 reports whether 1/c is exactly representable.
1151 func reciprocalExact64(c float64) bool {
1152         b := math.Float64bits(c)
1153         man := b & (1<<52 - 1)
1154         if man != 0 {
1155                 return false // not a power of 2, denormal, or NaN
1156         }
1157         exp := b >> 52 & (1<<11 - 1)
1158         // exponent bias is 0x3ff.  So taking the reciprocal of a number
1159         // changes the exponent to 0x7fe-exp.
1160         switch exp {
1161         case 0:
1162                 return false // ±0
1163         case 0x7ff:
1164                 return false // ±inf
1165         case 0x7fe:
1166                 return false // exponent is not representable
1167         default:
1168                 return true
1169         }
1170 }
1171
1172 // reciprocalExact32 reports whether 1/c is exactly representable.
1173 func reciprocalExact32(c float32) bool {
1174         b := math.Float32bits(c)
1175         man := b & (1<<23 - 1)
1176         if man != 0 {
1177                 return false // not a power of 2, denormal, or NaN
1178         }
1179         exp := b >> 23 & (1<<8 - 1)
1180         // exponent bias is 0x7f.  So taking the reciprocal of a number
1181         // changes the exponent to 0xfe-exp.
1182         switch exp {
1183         case 0:
1184                 return false // ±0
1185         case 0xff:
1186                 return false // ±inf
1187         case 0xfe:
1188                 return false // exponent is not representable
1189         default:
1190                 return true
1191         }
1192 }
1193
1194 // check if an immediate can be directly encoded into an ARM's instruction
1195 func isARMImmRot(v uint32) bool {
1196         for i := 0; i < 16; i++ {
1197                 if v&^0xff == 0 {
1198                         return true
1199                 }
1200                 v = v<<2 | v>>30
1201         }
1202
1203         return false
1204 }
1205
1206 // overlap reports whether the ranges given by the given offset and
1207 // size pairs overlap.
1208 func overlap(offset1, size1, offset2, size2 int64) bool {
1209         if offset1 >= offset2 && offset2+size2 > offset1 {
1210                 return true
1211         }
1212         if offset2 >= offset1 && offset1+size1 > offset2 {
1213                 return true
1214         }
1215         return false
1216 }
1217
1218 func areAdjacentOffsets(off1, off2, size int64) bool {
1219         return off1+size == off2 || off1 == off2+size
1220 }
1221
1222 // check if value zeroes out upper 32-bit of 64-bit register.
1223 // depth limits recursion depth. In AMD64.rules 3 is used as limit,
1224 // because it catches same amount of cases as 4.
1225 func zeroUpper32Bits(x *Value, depth int) bool {
1226         switch x.Op {
1227         case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1,
1228                 OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1,
1229                 OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload,
1230                 OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL,
1231                 OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst,
1232                 OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst,
1233                 OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL,
1234                 OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst,
1235                 OpAMD64SHLL, OpAMD64SHLLconst:
1236                 return true
1237         case OpArg:
1238                 return x.Type.Width == 4
1239         case OpPhi, OpSelect0, OpSelect1:
1240                 // Phis can use each-other as an arguments, instead of tracking visited values,
1241                 // just limit recursion depth.
1242                 if depth <= 0 {
1243                         return false
1244                 }
1245                 for i := range x.Args {
1246                         if !zeroUpper32Bits(x.Args[i], depth-1) {
1247                                 return false
1248                         }
1249                 }
1250                 return true
1251
1252         }
1253         return false
1254 }
1255
1256 // zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits
1257 func zeroUpper48Bits(x *Value, depth int) bool {
1258         switch x.Op {
1259         case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2:
1260                 return true
1261         case OpArg:
1262                 return x.Type.Width == 2
1263         case OpPhi, OpSelect0, OpSelect1:
1264                 // Phis can use each-other as an arguments, instead of tracking visited values,
1265                 // just limit recursion depth.
1266                 if depth <= 0 {
1267                         return false
1268                 }
1269                 for i := range x.Args {
1270                         if !zeroUpper48Bits(x.Args[i], depth-1) {
1271                                 return false
1272                         }
1273                 }
1274                 return true
1275
1276         }
1277         return false
1278 }
1279
1280 // zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits
1281 func zeroUpper56Bits(x *Value, depth int) bool {
1282         switch x.Op {
1283         case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1:
1284                 return true
1285         case OpArg:
1286                 return x.Type.Width == 1
1287         case OpPhi, OpSelect0, OpSelect1:
1288                 // Phis can use each-other as an arguments, instead of tracking visited values,
1289                 // just limit recursion depth.
1290                 if depth <= 0 {
1291                         return false
1292                 }
1293                 for i := range x.Args {
1294                         if !zeroUpper56Bits(x.Args[i], depth-1) {
1295                                 return false
1296                         }
1297                 }
1298                 return true
1299
1300         }
1301         return false
1302 }
1303
1304 // isInlinableMemmove reports whether the given arch performs a Move of the given size
1305 // faster than memmove. It will only return true if replacing the memmove with a Move is
1306 // safe, either because Move is small or because the arguments are disjoint.
1307 // This is used as a check for replacing memmove with Move ops.
1308 func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
1309         // It is always safe to convert memmove into Move when its arguments are disjoint.
1310         // Move ops may or may not be faster for large sizes depending on how the platform
1311         // lowers them, so we only perform this optimization on platforms that we know to
1312         // have fast Move ops.
1313         switch c.arch {
1314         case "amd64":
1315                 return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz))
1316         case "386", "arm64":
1317                 return sz <= 8
1318         case "s390x", "ppc64", "ppc64le":
1319                 return sz <= 8 || disjoint(dst, sz, src, sz)
1320         case "arm", "mips", "mips64", "mipsle", "mips64le":
1321                 return sz <= 4
1322         }
1323         return false
1324 }
1325
1326 // logLargeCopy logs the occurrence of a large copy.
1327 // The best place to do this is in the rewrite rules where the size of the move is easy to find.
1328 // "Large" is arbitrarily chosen to be 128 bytes; this may change.
1329 func logLargeCopy(v *Value, s int64) bool {
1330         if s < 128 {
1331                 return true
1332         }
1333         if logopt.Enabled() {
1334                 logopt.LogOpt(v.Pos, "copy", "lower", v.Block.Func.Name, fmt.Sprintf("%d bytes", s))
1335         }
1336         return true
1337 }
1338
1339 // hasSmallRotate reports whether the architecture has rotate instructions
1340 // for sizes < 32-bit.  This is used to decide whether to promote some rotations.
1341 func hasSmallRotate(c *Config) bool {
1342         switch c.arch {
1343         case "amd64", "386":
1344                 return true
1345         default:
1346                 return false
1347         }
1348 }
1349
1350 func newPPC64ShiftAuxInt(sh, mb, me, sz int64) int32 {
1351         if sh < 0 || sh >= sz {
1352                 panic("PPC64 shift arg sh out of range")
1353         }
1354         if mb < 0 || mb >= sz {
1355                 panic("PPC64 shift arg mb out of range")
1356         }
1357         if me < 0 || me >= sz {
1358                 panic("PPC64 shift arg me out of range")
1359         }
1360         return int32(sh<<16 | mb<<8 | me)
1361 }
1362
1363 func GetPPC64Shiftsh(auxint int64) int64 {
1364         return int64(int8(auxint >> 16))
1365 }
1366
1367 func GetPPC64Shiftmb(auxint int64) int64 {
1368         return int64(int8(auxint >> 8))
1369 }
1370
1371 func GetPPC64Shiftme(auxint int64) int64 {
1372         return int64(int8(auxint))
1373 }
1374
1375 // Test if this value can encoded as a mask for a rlwinm like
1376 // operation.  Masks can also extend from the msb and wrap to
1377 // the lsb too.  That is, the valid masks are 32 bit strings
1378 // of the form: 0..01..10..0 or 1..10..01..1 or 1...1
1379 func isPPC64WordRotateMask(v64 int64) bool {
1380         // Isolate rightmost 1 (if none 0) and add.
1381         v := uint32(v64)
1382         vp := (v & -v) + v
1383         // Likewise, for the wrapping case.
1384         vn := ^v
1385         vpn := (vn & -vn) + vn
1386         return (v&vp == 0 || vn&vpn == 0) && v != 0
1387 }
1388
1389 // Compress mask and and shift into single value of the form
1390 // me | mb<<8 | rotate<<16 | nbits<<24 where me and mb can
1391 // be used to regenerate the input mask.
1392 func encodePPC64RotateMask(rotate, mask, nbits int64) int64 {
1393         var mb, me, mbn, men int
1394
1395         // Determine boundaries and then decode them
1396         if mask == 0 || ^mask == 0 || rotate >= nbits {
1397                 panic("Invalid PPC64 rotate mask")
1398         } else if nbits == 32 {
1399                 mb = bits.LeadingZeros32(uint32(mask))
1400                 me = 32 - bits.TrailingZeros32(uint32(mask))
1401                 mbn = bits.LeadingZeros32(^uint32(mask))
1402                 men = 32 - bits.TrailingZeros32(^uint32(mask))
1403         } else {
1404                 mb = bits.LeadingZeros64(uint64(mask))
1405                 me = 64 - bits.TrailingZeros64(uint64(mask))
1406                 mbn = bits.LeadingZeros64(^uint64(mask))
1407                 men = 64 - bits.TrailingZeros64(^uint64(mask))
1408         }
1409         // Check for a wrapping mask (e.g bits at 0 and 63)
1410         if mb == 0 && me == int(nbits) {
1411                 // swap the inverted values
1412                 mb, me = men, mbn
1413         }
1414
1415         return int64(me) | int64(mb<<8) | int64(rotate<<16) | int64(nbits<<24)
1416 }
1417
1418 // The inverse operation of encodePPC64RotateMask.  The values returned as
1419 // mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask.
1420 func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) {
1421         auxint := uint64(sauxint)
1422         rotate = int64((auxint >> 16) & 0xFF)
1423         mb = int64((auxint >> 8) & 0xFF)
1424         me = int64((auxint >> 0) & 0xFF)
1425         nbits := int64((auxint >> 24) & 0xFF)
1426         mask = ((1 << uint(nbits-mb)) - 1) ^ ((1 << uint(nbits-me)) - 1)
1427         if mb > me {
1428                 mask = ^mask
1429         }
1430         if nbits == 32 {
1431                 mask = uint64(uint32(mask))
1432         }
1433
1434         // Fixup ME to match ISA definition.  The second argument to MASK(..,me)
1435         // is inclusive.
1436         me = (me - 1) & (nbits - 1)
1437         return
1438 }
1439
1440 // This verifies that the mask is a set of
1441 // consecutive bits including the least
1442 // significant bit.
1443 func isPPC64ValidShiftMask(v int64) bool {
1444         if (v != 0) && ((v+1)&v) == 0 {
1445                 return true
1446         }
1447         return false
1448 }
1449
1450 func getPPC64ShiftMaskLength(v int64) int64 {
1451         return int64(bits.Len64(uint64(v)))
1452 }
1453
1454 // Decompose a shift right into an equivalent rotate/mask,
1455 // and return mask & m.
1456 func mergePPC64RShiftMask(m, s, nbits int64) int64 {
1457         smask := uint64((1<<uint(nbits))-1) >> uint(s)
1458         return m & int64(smask)
1459 }
1460
1461 // Combine (ANDconst [m] (SRWconst [s])) into (RLWINM [y]) or return 0
1462 func mergePPC64AndSrwi(m, s int64) int64 {
1463         mask := mergePPC64RShiftMask(m, s, 32)
1464         if !isPPC64WordRotateMask(mask) {
1465                 return 0
1466         }
1467         return encodePPC64RotateMask(32-s, mask, 32)
1468 }
1469
1470 // Test if a shift right feeding into a CLRLSLDI can be merged into RLWINM.
1471 // Return the encoded RLWINM constant, or 0 if they cannot be merged.
1472 func mergePPC64ClrlsldiSrw(sld, srw int64) int64 {
1473         mask_1 := uint64(0xFFFFFFFF >> uint(srw))
1474         // for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left.
1475         mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
1476
1477         // Rewrite mask to apply after the final left shift.
1478         mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
1479
1480         r_1 := 32 - srw
1481         r_2 := GetPPC64Shiftsh(sld)
1482         r_3 := (r_1 + r_2) & 31 // This can wrap.
1483
1484         if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
1485                 return 0
1486         }
1487         return encodePPC64RotateMask(int64(r_3), int64(mask_3), 32)
1488 }
1489
1490 // Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM.  Return
1491 // the encoded RLWINM constant, or 0 if they cannot be merged.
1492 func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 {
1493         r_1, _, _, mask_1 := DecodePPC64RotateMask(rlw)
1494         // for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left.
1495         mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
1496
1497         // combine the masks, and adjust for the final left shift.
1498         mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(int64(sld)))
1499         r_2 := GetPPC64Shiftsh(int64(sld))
1500         r_3 := (r_1 + r_2) & 31 // This can wrap.
1501
1502         // Verify the result is still a valid bitmask of <= 32 bits.
1503         if !isPPC64WordRotateMask(int64(mask_3)) || uint64(uint32(mask_3)) != mask_3 {
1504                 return 0
1505         }
1506         return encodePPC64RotateMask(r_3, int64(mask_3), 32)
1507 }
1508
1509 // Compute the encoded RLWINM constant from combining (SLDconst [sld] (SRWconst [srw] x)),
1510 // or return 0 if they cannot be combined.
1511 func mergePPC64SldiSrw(sld, srw int64) int64 {
1512         if sld > srw || srw >= 32 {
1513                 return 0
1514         }
1515         mask_r := uint32(0xFFFFFFFF) >> uint(srw)
1516         mask_l := uint32(0xFFFFFFFF) >> uint(sld)
1517         mask := (mask_r & mask_l) << uint(sld)
1518         return encodePPC64RotateMask((32-srw+sld)&31, int64(mask), 32)
1519 }
1520
1521 // Convenience function to rotate a 32 bit constant value by another constant.
1522 func rotateLeft32(v, rotate int64) int64 {
1523         return int64(bits.RotateLeft32(uint32(v), int(rotate)))
1524 }
1525
1526 // encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format.
1527 func armBFAuxInt(lsb, width int64) arm64BitField {
1528         if lsb < 0 || lsb > 63 {
1529                 panic("ARM(64) bit field lsb constant out of range")
1530         }
1531         if width < 1 || width > 64 {
1532                 panic("ARM(64) bit field width constant out of range")
1533         }
1534         return arm64BitField(width | lsb<<8)
1535 }
1536
1537 // returns the lsb part of the auxInt field of arm64 bitfield ops.
1538 func (bfc arm64BitField) getARM64BFlsb() int64 {
1539         return int64(uint64(bfc) >> 8)
1540 }
1541
1542 // returns the width part of the auxInt field of arm64 bitfield ops.
1543 func (bfc arm64BitField) getARM64BFwidth() int64 {
1544         return int64(bfc) & 0xff
1545 }
1546
1547 // checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask.
1548 func isARM64BFMask(lsb, mask, rshift int64) bool {
1549         shiftedMask := int64(uint64(mask) >> uint64(rshift))
1550         return shiftedMask != 0 && isPowerOfTwo64(shiftedMask+1) && nto(shiftedMask)+lsb < 64
1551 }
1552
1553 // returns the bitfield width of mask >> rshift for arm64 bitfield ops
1554 func arm64BFWidth(mask, rshift int64) int64 {
1555         shiftedMask := int64(uint64(mask) >> uint64(rshift))
1556         if shiftedMask == 0 {
1557                 panic("ARM64 BF mask is zero")
1558         }
1559         return nto(shiftedMask)
1560 }
1561
1562 // sizeof returns the size of t in bytes.
1563 // It will panic if t is not a *types.Type.
1564 func sizeof(t interface{}) int64 {
1565         return t.(*types.Type).Size()
1566 }
1567
1568 // registerizable reports whether t is a primitive type that fits in
1569 // a register. It assumes float64 values will always fit into registers
1570 // even if that isn't strictly true.
1571 func registerizable(b *Block, typ *types.Type) bool {
1572         if typ.IsPtrShaped() || typ.IsFloat() {
1573                 return true
1574         }
1575         if typ.IsInteger() {
1576                 return typ.Size() <= b.Func.Config.RegSize
1577         }
1578         return false
1579 }
1580
1581 // needRaceCleanup reports whether this call to racefuncenter/exit isn't needed.
1582 func needRaceCleanup(sym *AuxCall, v *Value) bool {
1583         f := v.Block.Func
1584         if !f.Config.Race {
1585                 return false
1586         }
1587         if !isSameCall(sym, "runtime.racefuncenter") && !isSameCall(sym, "runtime.racefuncenterfp") && !isSameCall(sym, "runtime.racefuncexit") {
1588                 return false
1589         }
1590         for _, b := range f.Blocks {
1591                 for _, v := range b.Values {
1592                         switch v.Op {
1593                         case OpStaticCall:
1594                                 // Check for racefuncenter/racefuncenterfp will encounter racefuncexit and vice versa.
1595                                 // Allow calls to panic*
1596                                 s := v.Aux.(*AuxCall).Fn.String()
1597                                 switch s {
1598                                 case "runtime.racefuncenter", "runtime.racefuncenterfp", "runtime.racefuncexit",
1599                                         "runtime.panicdivide", "runtime.panicwrap",
1600                                         "runtime.panicshift":
1601                                         continue
1602                                 }
1603                                 // If we encountered any call, we need to keep racefunc*,
1604                                 // for accurate stacktraces.
1605                                 return false
1606                         case OpPanicBounds, OpPanicExtend:
1607                                 // Note: these are panic generators that are ok (like the static calls above).
1608                         case OpClosureCall, OpInterCall:
1609                                 // We must keep the race functions if there are any other call types.
1610                                 return false
1611                         }
1612                 }
1613         }
1614         if isSameCall(sym, "runtime.racefuncenter") {
1615                 // If we're removing racefuncenter, remove its argument as well.
1616                 if v.Args[0].Op != OpStore {
1617                         return false
1618                 }
1619                 mem := v.Args[0].Args[2]
1620                 v.Args[0].reset(OpCopy)
1621                 v.Args[0].AddArg(mem)
1622         }
1623         return true
1624 }
1625
1626 // symIsRO reports whether sym is a read-only global.
1627 func symIsRO(sym interface{}) bool {
1628         lsym := sym.(*obj.LSym)
1629         return lsym.Type == objabi.SRODATA && len(lsym.R) == 0
1630 }
1631
1632 // symIsROZero reports whether sym is a read-only global whose data contains all zeros.
1633 func symIsROZero(sym Sym) bool {
1634         lsym := sym.(*obj.LSym)
1635         if lsym.Type != objabi.SRODATA || len(lsym.R) != 0 {
1636                 return false
1637         }
1638         for _, b := range lsym.P {
1639                 if b != 0 {
1640                         return false
1641                 }
1642         }
1643         return true
1644 }
1645
1646 // read8 reads one byte from the read-only global sym at offset off.
1647 func read8(sym interface{}, off int64) uint8 {
1648         lsym := sym.(*obj.LSym)
1649         if off >= int64(len(lsym.P)) || off < 0 {
1650                 // Invalid index into the global sym.
1651                 // This can happen in dead code, so we don't want to panic.
1652                 // Just return any value, it will eventually get ignored.
1653                 // See issue 29215.
1654                 return 0
1655         }
1656         return lsym.P[off]
1657 }
1658
1659 // read16 reads two bytes from the read-only global sym at offset off.
1660 func read16(sym interface{}, off int64, byteorder binary.ByteOrder) uint16 {
1661         lsym := sym.(*obj.LSym)
1662         // lsym.P is written lazily.
1663         // Bytes requested after the end of lsym.P are 0.
1664         var src []byte
1665         if 0 <= off && off < int64(len(lsym.P)) {
1666                 src = lsym.P[off:]
1667         }
1668         buf := make([]byte, 2)
1669         copy(buf, src)
1670         return byteorder.Uint16(buf)
1671 }
1672
1673 // read32 reads four bytes from the read-only global sym at offset off.
1674 func read32(sym interface{}, off int64, byteorder binary.ByteOrder) uint32 {
1675         lsym := sym.(*obj.LSym)
1676         var src []byte
1677         if 0 <= off && off < int64(len(lsym.P)) {
1678                 src = lsym.P[off:]
1679         }
1680         buf := make([]byte, 4)
1681         copy(buf, src)
1682         return byteorder.Uint32(buf)
1683 }
1684
1685 // read64 reads eight bytes from the read-only global sym at offset off.
1686 func read64(sym interface{}, off int64, byteorder binary.ByteOrder) uint64 {
1687         lsym := sym.(*obj.LSym)
1688         var src []byte
1689         if 0 <= off && off < int64(len(lsym.P)) {
1690                 src = lsym.P[off:]
1691         }
1692         buf := make([]byte, 8)
1693         copy(buf, src)
1694         return byteorder.Uint64(buf)
1695 }
1696
1697 // sequentialAddresses reports true if it can prove that x + n == y
1698 func sequentialAddresses(x, y *Value, n int64) bool {
1699         if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil &&
1700                 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1701                         x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1702                 return true
1703         }
1704         if x.Op == Op386LEAL1 && y.Op == Op386LEAL1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
1705                 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1706                         x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1707                 return true
1708         }
1709         if x.Op == OpAMD64ADDQ && y.Op == OpAMD64LEAQ1 && y.AuxInt == n && y.Aux == nil &&
1710                 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1711                         x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1712                 return true
1713         }
1714         if x.Op == OpAMD64LEAQ1 && y.Op == OpAMD64LEAQ1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
1715                 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1716                         x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1717                 return true
1718         }
1719         return false
1720 }
1721
1722 // flagConstant represents the result of a compile-time comparison.
1723 // The sense of these flags does not necessarily represent the hardware's notion
1724 // of a flags register - these are just a compile-time construct.
1725 // We happen to match the semantics to those of arm/arm64.
1726 // Note that these semantics differ from x86: the carry flag has the opposite
1727 // sense on a subtraction!
1728 //   On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C.
1729 //   On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C.
1730 //    (because it does x + ^y + C).
1731 // See https://en.wikipedia.org/wiki/Carry_flag#Vs._borrow_flag
1732 type flagConstant uint8
1733
1734 // N reports whether the result of an operation is negative (high bit set).
1735 func (fc flagConstant) N() bool {
1736         return fc&1 != 0
1737 }
1738
1739 // Z reports whether the result of an operation is 0.
1740 func (fc flagConstant) Z() bool {
1741         return fc&2 != 0
1742 }
1743
1744 // C reports whether an unsigned add overflowed (carry), or an
1745 // unsigned subtract did not underflow (borrow).
1746 func (fc flagConstant) C() bool {
1747         return fc&4 != 0
1748 }
1749
1750 // V reports whether a signed operation overflowed or underflowed.
1751 func (fc flagConstant) V() bool {
1752         return fc&8 != 0
1753 }
1754
1755 func (fc flagConstant) eq() bool {
1756         return fc.Z()
1757 }
1758 func (fc flagConstant) ne() bool {
1759         return !fc.Z()
1760 }
1761 func (fc flagConstant) lt() bool {
1762         return fc.N() != fc.V()
1763 }
1764 func (fc flagConstant) le() bool {
1765         return fc.Z() || fc.lt()
1766 }
1767 func (fc flagConstant) gt() bool {
1768         return !fc.Z() && fc.ge()
1769 }
1770 func (fc flagConstant) ge() bool {
1771         return fc.N() == fc.V()
1772 }
1773 func (fc flagConstant) ult() bool {
1774         return !fc.C()
1775 }
1776 func (fc flagConstant) ule() bool {
1777         return fc.Z() || fc.ult()
1778 }
1779 func (fc flagConstant) ugt() bool {
1780         return !fc.Z() && fc.uge()
1781 }
1782 func (fc flagConstant) uge() bool {
1783         return fc.C()
1784 }
1785
1786 func (fc flagConstant) ltNoov() bool {
1787         return fc.lt() && !fc.V()
1788 }
1789 func (fc flagConstant) leNoov() bool {
1790         return fc.le() && !fc.V()
1791 }
1792 func (fc flagConstant) gtNoov() bool {
1793         return fc.gt() && !fc.V()
1794 }
1795 func (fc flagConstant) geNoov() bool {
1796         return fc.ge() && !fc.V()
1797 }
1798
1799 func (fc flagConstant) String() string {
1800         return fmt.Sprintf("N=%v,Z=%v,C=%v,V=%v", fc.N(), fc.Z(), fc.C(), fc.V())
1801 }
1802
1803 type flagConstantBuilder struct {
1804         N bool
1805         Z bool
1806         C bool
1807         V bool
1808 }
1809
1810 func (fcs flagConstantBuilder) encode() flagConstant {
1811         var fc flagConstant
1812         if fcs.N {
1813                 fc |= 1
1814         }
1815         if fcs.Z {
1816                 fc |= 2
1817         }
1818         if fcs.C {
1819                 fc |= 4
1820         }
1821         if fcs.V {
1822                 fc |= 8
1823         }
1824         return fc
1825 }
1826
1827 // Note: addFlags(x,y) != subFlags(x,-y) in some situations:
1828 //  - the results of the C flag are different
1829 //  - the results of the V flag when y==minint are different
1830
1831 // addFlags64 returns the flags that would be set from computing x+y.
1832 func addFlags64(x, y int64) flagConstant {
1833         var fcb flagConstantBuilder
1834         fcb.Z = x+y == 0
1835         fcb.N = x+y < 0
1836         fcb.C = uint64(x+y) < uint64(x)
1837         fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
1838         return fcb.encode()
1839 }
1840
1841 // subFlags64 returns the flags that would be set from computing x-y.
1842 func subFlags64(x, y int64) flagConstant {
1843         var fcb flagConstantBuilder
1844         fcb.Z = x-y == 0
1845         fcb.N = x-y < 0
1846         fcb.C = uint64(y) <= uint64(x) // This code follows the arm carry flag model.
1847         fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
1848         return fcb.encode()
1849 }
1850
1851 // addFlags32 returns the flags that would be set from computing x+y.
1852 func addFlags32(x, y int32) flagConstant {
1853         var fcb flagConstantBuilder
1854         fcb.Z = x+y == 0
1855         fcb.N = x+y < 0
1856         fcb.C = uint32(x+y) < uint32(x)
1857         fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
1858         return fcb.encode()
1859 }
1860
1861 // subFlags32 returns the flags that would be set from computing x-y.
1862 func subFlags32(x, y int32) flagConstant {
1863         var fcb flagConstantBuilder
1864         fcb.Z = x-y == 0
1865         fcb.N = x-y < 0
1866         fcb.C = uint32(y) <= uint32(x) // This code follows the arm carry flag model.
1867         fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
1868         return fcb.encode()
1869 }
1870
1871 // logicFlags64 returns flags set to the sign/zeroness of x.
1872 // C and V are set to false.
1873 func logicFlags64(x int64) flagConstant {
1874         var fcb flagConstantBuilder
1875         fcb.Z = x == 0
1876         fcb.N = x < 0
1877         return fcb.encode()
1878 }
1879
1880 // logicFlags32 returns flags set to the sign/zeroness of x.
1881 // C and V are set to false.
1882 func logicFlags32(x int32) flagConstant {
1883         var fcb flagConstantBuilder
1884         fcb.Z = x == 0
1885         fcb.N = x < 0
1886         return fcb.encode()
1887 }