src/cmd/compile/internal/ssa/rewrite.go

   1 // Copyright 2015 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package ssa
   6
   7 import (
   8         "cmd/compile/internal/logopt"
   9         "cmd/compile/internal/types"
  10         "cmd/internal/obj"
  11         "cmd/internal/obj/s390x"
  12         "cmd/internal/objabi"
  13         "cmd/internal/src"
  14         "encoding/binary"
  15         "fmt"
  16         "io"
  17         "math"
  18         "math/bits"
  19         "os"
  20         "path/filepath"
  21 )
  22
  23 type deadValueChoice bool
  24
  25 const (
  26         leaveDeadValues  deadValueChoice = false
  27         removeDeadValues                 = true
  28 )
  29
  30 // deadcode indicates that rewrite should try to remove any values that become dead.
  31 func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValueChoice) {
  32         // repeat rewrites until we find no more rewrites
  33         pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block
  34         pendingLines.clear()
  35         debug := f.pass.debug
  36         if debug > 1 {
  37                 fmt.Printf("%s: rewriting for %s\n", f.pass.name, f.Name)
  38         }
  39         for {
  40                 change := false
  41                 for _, b := range f.Blocks {
  42                         var b0 *Block
  43                         if debug > 1 {
  44                                 b0 = new(Block)
  45                                 *b0 = *b
  46                                 b0.Succs = append([]Edge{}, b.Succs...) // make a new copy, not aliasing
  47                         }
  48                         for i, c := range b.ControlValues() {
  49                                 for c.Op == OpCopy {
  50                                         c = c.Args[0]
  51                                         b.ReplaceControl(i, c)
  52                                 }
  53                         }
  54                         if rb(b) {
  55                                 change = true
  56                                 if debug > 1 {
  57                                         fmt.Printf("rewriting %s  ->  %s\n", b0.LongString(), b.LongString())
  58                                 }
  59                         }
  60                         for j, v := range b.Values {
  61                                 var v0 *Value
  62                                 if debug > 1 {
  63                                         v0 = new(Value)
  64                                         *v0 = *v
  65                                         v0.Args = append([]*Value{}, v.Args...) // make a new copy, not aliasing
  66                                 }
  67                                 if v.Uses == 0 && v.removeable() {
  68                                         if v.Op != OpInvalid && deadcode == removeDeadValues {
  69                                                 // Reset any values that are now unused, so that we decrement
  70                                                 // the use count of all of its arguments.
  71                                                 // Not quite a deadcode pass, because it does not handle cycles.
  72                                                 // But it should help Uses==1 rules to fire.
  73                                                 v.reset(OpInvalid)
  74                                                 change = true
  75                                         }
  76                                         // No point rewriting values which aren't used.
  77                                         continue
  78                                 }
  79
  80                                 vchange := phielimValue(v)
  81                                 if vchange && debug > 1 {
  82                                         fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
  83                                 }
  84
  85                                 // Eliminate copy inputs.
  86                                 // If any copy input becomes unused, mark it
  87                                 // as invalid and discard its argument. Repeat
  88                                 // recursively on the discarded argument.
  89                                 // This phase helps remove phantom "dead copy" uses
  90                                 // of a value so that a x.Uses==1 rule condition
  91                                 // fires reliably.
  92                                 for i, a := range v.Args {
  93                                         if a.Op != OpCopy {
  94                                                 continue
  95                                         }
  96                                         aa := copySource(a)
  97                                         v.SetArg(i, aa)
  98                                         // If a, a copy, has a line boundary indicator, attempt to find a new value
  99                                         // to hold it.  The first candidate is the value that will replace a (aa),
 100                                         // if it shares the same block and line and is eligible.
 101                                         // The second option is v, which has a as an input.  Because aa is earlier in
 102                                         // the data flow, it is the better choice.
 103                                         if a.Pos.IsStmt() == src.PosIsStmt {
 104                                                 if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt {
 105                                                         aa.Pos = aa.Pos.WithIsStmt()
 106                                                 } else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt {
 107                                                         v.Pos = v.Pos.WithIsStmt()
 108                                                 } else {
 109                                                         // Record the lost line and look for a new home after all rewrites are complete.
 110                                                         // TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same
 111                                                         // line to appear in more than one block, but only one block is stored, so if both end
 112                                                         // up here, then one will be lost.
 113                                                         pendingLines.set(a.Pos, int32(a.Block.ID))
 114                                                 }
 115                                                 a.Pos = a.Pos.WithNotStmt()
 116                                         }
 117                                         vchange = true
 118                                         for a.Uses == 0 {
 119                                                 b := a.Args[0]
 120                                                 a.reset(OpInvalid)
 121                                                 a = b
 122                                         }
 123                                 }
 124                                 if vchange && debug > 1 {
 125                                         fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
 126                                 }
 127
 128                                 // apply rewrite function
 129                                 if rv(v) {
 130                                         vchange = true
 131                                         // If value changed to a poor choice for a statement boundary, move the boundary
 132                                         if v.Pos.IsStmt() == src.PosIsStmt {
 133                                                 if k := nextGoodStatementIndex(v, j, b); k != j {
 134                                                         v.Pos = v.Pos.WithNotStmt()
 135                                                         b.Values[k].Pos = b.Values[k].Pos.WithIsStmt()
 136                                                 }
 137                                         }
 138                                 }
 139
 140                                 change = change || vchange
 141                                 if vchange && debug > 1 {
 142                                         fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
 143                                 }
 144                         }
 145                 }
 146                 if !change {
 147                         break
 148                 }
 149         }
 150         // remove clobbered values
 151         for _, b := range f.Blocks {
 152                 j := 0
 153                 for i, v := range b.Values {
 154                         vl := v.Pos
 155                         if v.Op == OpInvalid {
 156                                 if v.Pos.IsStmt() == src.PosIsStmt {
 157                                         pendingLines.set(vl, int32(b.ID))
 158                                 }
 159                                 f.freeValue(v)
 160                                 continue
 161                         }
 162                         if v.Pos.IsStmt() != src.PosNotStmt && pendingLines.get(vl) == int32(b.ID) {
 163                                 pendingLines.remove(vl)
 164                                 v.Pos = v.Pos.WithIsStmt()
 165                         }
 166                         if i != j {
 167                                 b.Values[j] = v
 168                         }
 169                         j++
 170                 }
 171                 if pendingLines.get(b.Pos) == int32(b.ID) {
 172                         b.Pos = b.Pos.WithIsStmt()
 173                         pendingLines.remove(b.Pos)
 174                 }
 175                 b.truncateValues(j)
 176         }
 177 }
 178
 179 // Common functions called from rewriting rules
 180
 181 func is64BitFloat(t *types.Type) bool {
 182         return t.Size() == 8 && t.IsFloat()
 183 }
 184
 185 func is32BitFloat(t *types.Type) bool {
 186         return t.Size() == 4 && t.IsFloat()
 187 }
 188
 189 func is64BitInt(t *types.Type) bool {
 190         return t.Size() == 8 && t.IsInteger()
 191 }
 192
 193 func is32BitInt(t *types.Type) bool {
 194         return t.Size() == 4 && t.IsInteger()
 195 }
 196
 197 func is16BitInt(t *types.Type) bool {
 198         return t.Size() == 2 && t.IsInteger()
 199 }
 200
 201 func is8BitInt(t *types.Type) bool {
 202         return t.Size() == 1 && t.IsInteger()
 203 }
 204
 205 func isPtr(t *types.Type) bool {
 206         return t.IsPtrShaped()
 207 }
 208
 209 func isSigned(t *types.Type) bool {
 210         return t.IsSigned()
 211 }
 212
 213 // mergeSym merges two symbolic offsets. There is no real merging of
 214 // offsets, we just pick the non-nil one.
 215 func mergeSym(x, y interface{}) interface{} {
 216         if x == nil {
 217                 return y
 218         }
 219         if y == nil {
 220                 return x
 221         }
 222         panic(fmt.Sprintf("mergeSym with two non-nil syms %s %s", x, y))
 223 }
 224
 225 func canMergeSym(x, y interface{}) bool {
 226         return x == nil || y == nil
 227 }
 228
 229 func mergeSymTyped(x, y Sym) Sym {
 230         if x == nil {
 231                 return y
 232         }
 233         if y == nil {
 234                 return x
 235         }
 236         panic(fmt.Sprintf("mergeSym with two non-nil syms %v %v", x, y))
 237 }
 238
 239 // canMergeLoadClobber reports whether the load can be merged into target without
 240 // invalidating the schedule.
 241 // It also checks that the other non-load argument x is something we
 242 // are ok with clobbering.
 243 func canMergeLoadClobber(target, load, x *Value) bool {
 244         // The register containing x is going to get clobbered.
 245         // Don't merge if we still need the value of x.
 246         // We don't have liveness information here, but we can
 247         // approximate x dying with:
 248         //  1) target is x's only use.
 249         //  2) target is not in a deeper loop than x.
 250         if x.Uses != 1 {
 251                 return false
 252         }
 253         loopnest := x.Block.Func.loopnest()
 254         loopnest.calculateDepths()
 255         if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
 256                 return false
 257         }
 258         return canMergeLoad(target, load)
 259 }
 260
 261 // canMergeLoad reports whether the load can be merged into target without
 262 // invalidating the schedule.
 263 func canMergeLoad(target, load *Value) bool {
 264         if target.Block.ID != load.Block.ID {
 265                 // If the load is in a different block do not merge it.
 266                 return false
 267         }
 268
 269         // We can't merge the load into the target if the load
 270         // has more than one use.
 271         if load.Uses != 1 {
 272                 return false
 273         }
 274
 275         mem := load.MemoryArg()
 276
 277         // We need the load's memory arg to still be alive at target. That
 278         // can't be the case if one of target's args depends on a memory
 279         // state that is a successor of load's memory arg.
 280         //
 281         // For example, it would be invalid to merge load into target in
 282         // the following situation because newmem has killed oldmem
 283         // before target is reached:
 284         //     load = read ... oldmem
 285         //   newmem = write ... oldmem
 286         //     arg0 = read ... newmem
 287         //   target = add arg0 load
 288         //
 289         // If the argument comes from a different block then we can exclude
 290         // it immediately because it must dominate load (which is in the
 291         // same block as target).
 292         var args []*Value
 293         for _, a := range target.Args {
 294                 if a != load && a.Block.ID == target.Block.ID {
 295                         args = append(args, a)
 296                 }
 297         }
 298
 299         // memPreds contains memory states known to be predecessors of load's
 300         // memory state. It is lazily initialized.
 301         var memPreds map[*Value]bool
 302         for i := 0; len(args) > 0; i++ {
 303                 const limit = 100
 304                 if i >= limit {
 305                         // Give up if we have done a lot of iterations.
 306                         return false
 307                 }
 308                 v := args[len(args)-1]
 309                 args = args[:len(args)-1]
 310                 if target.Block.ID != v.Block.ID {
 311                         // Since target and load are in the same block
 312                         // we can stop searching when we leave the block.
 313                         continue
 314                 }
 315                 if v.Op == OpPhi {
 316                         // A Phi implies we have reached the top of the block.
 317                         // The memory phi, if it exists, is always
 318                         // the first logical store in the block.
 319                         continue
 320                 }
 321                 if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
 322                         // We could handle this situation however it is likely
 323                         // to be very rare.
 324                         return false
 325                 }
 326                 if v.Op.SymEffect()&SymAddr != 0 {
 327                         // This case prevents an operation that calculates the
 328                         // address of a local variable from being forced to schedule
 329                         // before its corresponding VarDef.
 330                         // See issue 28445.
 331                         //   v1 = LOAD ...
 332                         //   v2 = VARDEF
 333                         //   v3 = LEAQ
 334                         //   v4 = CMPQ v1 v3
 335                         // We don't want to combine the CMPQ with the load, because
 336                         // that would force the CMPQ to schedule before the VARDEF, which
 337                         // in turn requires the LEAQ to schedule before the VARDEF.
 338                         return false
 339                 }
 340                 if v.Type.IsMemory() {
 341                         if memPreds == nil {
 342                                 // Initialise a map containing memory states
 343                                 // known to be predecessors of load's memory
 344                                 // state.
 345                                 memPreds = make(map[*Value]bool)
 346                                 m := mem
 347                                 const limit = 50
 348                                 for i := 0; i < limit; i++ {
 349                                         if m.Op == OpPhi {
 350                                                 // The memory phi, if it exists, is always
 351                                                 // the first logical store in the block.
 352                                                 break
 353                                         }
 354                                         if m.Block.ID != target.Block.ID {
 355                                                 break
 356                                         }
 357                                         if !m.Type.IsMemory() {
 358                                                 break
 359                                         }
 360                                         memPreds[m] = true
 361                                         if len(m.Args) == 0 {
 362                                                 break
 363                                         }
 364                                         m = m.MemoryArg()
 365                                 }
 366                         }
 367
 368                         // We can merge if v is a predecessor of mem.
 369                         //
 370                         // For example, we can merge load into target in the
 371                         // following scenario:
 372                         //      x = read ... v
 373                         //    mem = write ... v
 374                         //   load = read ... mem
 375                         // target = add x load
 376                         if memPreds[v] {
 377                                 continue
 378                         }
 379                         return false
 380                 }
 381                 if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
 382                         // If v takes mem as an input then we know mem
 383                         // is valid at this point.
 384                         continue
 385                 }
 386                 for _, a := range v.Args {
 387                         if target.Block.ID == a.Block.ID {
 388                                 args = append(args, a)
 389                         }
 390                 }
 391         }
 392
 393         return true
 394 }
 395
 396 // isSameCall reports whether sym is the same as the given named symbol
 397 func isSameCall(sym interface{}, name string) bool {
 398         return sym.(*AuxCall).Fn.String() == name
 399 }
 400
 401 // nlz returns the number of leading zeros.
 402 func nlz64(x int64) int { return bits.LeadingZeros64(uint64(x)) }
 403 func nlz32(x int32) int { return bits.LeadingZeros32(uint32(x)) }
 404 func nlz16(x int16) int { return bits.LeadingZeros16(uint16(x)) }
 405 func nlz8(x int8) int   { return bits.LeadingZeros8(uint8(x)) }
 406
 407 // ntzX returns the number of trailing zeros.
 408 func ntz64(x int64) int { return bits.TrailingZeros64(uint64(x)) }
 409 func ntz32(x int32) int { return bits.TrailingZeros32(uint32(x)) }
 410 func ntz16(x int16) int { return bits.TrailingZeros16(uint16(x)) }
 411 func ntz8(x int8) int   { return bits.TrailingZeros8(uint8(x)) }
 412
 413 func oneBit(x int64) bool   { return x&(x-1) == 0 && x != 0 }
 414 func oneBit8(x int8) bool   { return x&(x-1) == 0 && x != 0 }
 415 func oneBit16(x int16) bool { return x&(x-1) == 0 && x != 0 }
 416 func oneBit32(x int32) bool { return x&(x-1) == 0 && x != 0 }
 417 func oneBit64(x int64) bool { return x&(x-1) == 0 && x != 0 }
 418
 419 // nto returns the number of trailing ones.
 420 func nto(x int64) int64 {
 421         return int64(ntz64(^x))
 422 }
 423
 424 // log2 returns logarithm in base 2 of uint64(n), with log2(0) = -1.
 425 // Rounds down.
 426 func log2(n int64) int64 {
 427         return int64(bits.Len64(uint64(n))) - 1
 428 }
 429
 430 // logX returns logarithm of n base 2.
 431 // n must be a positive power of 2 (isPowerOfTwoX returns true).
 432 func log8(n int8) int64 {
 433         return int64(bits.Len8(uint8(n))) - 1
 434 }
 435 func log16(n int16) int64 {
 436         return int64(bits.Len16(uint16(n))) - 1
 437 }
 438 func log32(n int32) int64 {
 439         return int64(bits.Len32(uint32(n))) - 1
 440 }
 441 func log64(n int64) int64 {
 442         return int64(bits.Len64(uint64(n))) - 1
 443 }
 444
 445 // log2uint32 returns logarithm in base 2 of uint32(n), with log2(0) = -1.
 446 // Rounds down.
 447 func log2uint32(n int64) int64 {
 448         return int64(bits.Len32(uint32(n))) - 1
 449 }
 450
 451 // isPowerOfTwo reports whether n is a power of 2.
 452 func isPowerOfTwo(n int64) bool {
 453         return n > 0 && n&(n-1) == 0
 454 }
 455 func isPowerOfTwo8(n int8) bool {
 456         return n > 0 && n&(n-1) == 0
 457 }
 458 func isPowerOfTwo16(n int16) bool {
 459         return n > 0 && n&(n-1) == 0
 460 }
 461 func isPowerOfTwo32(n int32) bool {
 462         return n > 0 && n&(n-1) == 0
 463 }
 464 func isPowerOfTwo64(n int64) bool {
 465         return n > 0 && n&(n-1) == 0
 466 }
 467
 468 // isUint64PowerOfTwo reports whether uint64(n) is a power of 2.
 469 func isUint64PowerOfTwo(in int64) bool {
 470         n := uint64(in)
 471         return n > 0 && n&(n-1) == 0
 472 }
 473
 474 // isUint32PowerOfTwo reports whether uint32(n) is a power of 2.
 475 func isUint32PowerOfTwo(in int64) bool {
 476         n := uint64(uint32(in))
 477         return n > 0 && n&(n-1) == 0
 478 }
 479
 480 // is32Bit reports whether n can be represented as a signed 32 bit integer.
 481 func is32Bit(n int64) bool {
 482         return n == int64(int32(n))
 483 }
 484
 485 // is16Bit reports whether n can be represented as a signed 16 bit integer.
 486 func is16Bit(n int64) bool {
 487         return n == int64(int16(n))
 488 }
 489
 490 // is8Bit reports whether n can be represented as a signed 8 bit integer.
 491 func is8Bit(n int64) bool {
 492         return n == int64(int8(n))
 493 }
 494
 495 // isU8Bit reports whether n can be represented as an unsigned 8 bit integer.
 496 func isU8Bit(n int64) bool {
 497         return n == int64(uint8(n))
 498 }
 499
 500 // isU12Bit reports whether n can be represented as an unsigned 12 bit integer.
 501 func isU12Bit(n int64) bool {
 502         return 0 <= n && n < (1<<12)
 503 }
 504
 505 // isU16Bit reports whether n can be represented as an unsigned 16 bit integer.
 506 func isU16Bit(n int64) bool {
 507         return n == int64(uint16(n))
 508 }
 509
 510 // isU32Bit reports whether n can be represented as an unsigned 32 bit integer.
 511 func isU32Bit(n int64) bool {
 512         return n == int64(uint32(n))
 513 }
 514
 515 // is20Bit reports whether n can be represented as a signed 20 bit integer.
 516 func is20Bit(n int64) bool {
 517         return -(1<<19) <= n && n < (1<<19)
 518 }
 519
 520 // b2i translates a boolean value to 0 or 1 for assigning to auxInt.
 521 func b2i(b bool) int64 {
 522         if b {
 523                 return 1
 524         }
 525         return 0
 526 }
 527
 528 // b2i32 translates a boolean value to 0 or 1.
 529 func b2i32(b bool) int32 {
 530         if b {
 531                 return 1
 532         }
 533         return 0
 534 }
 535
 536 // shiftIsBounded reports whether (left/right) shift Value v is known to be bounded.
 537 // A shift is bounded if it is shifting by less than the width of the shifted value.
 538 func shiftIsBounded(v *Value) bool {
 539         return v.AuxInt != 0
 540 }
 541
 542 // truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern
 543 // of the mantissa. It will panic if the truncation results in lost information.
 544 func truncate64Fto32F(f float64) float32 {
 545         if !isExactFloat32(f) {
 546                 panic("truncate64Fto32F: truncation is not exact")
 547         }
 548         if !math.IsNaN(f) {
 549                 return float32(f)
 550         }
 551         // NaN bit patterns aren't necessarily preserved across conversion
 552         // instructions so we need to do the conversion manually.
 553         b := math.Float64bits(f)
 554         m := b & ((1 << 52) - 1) // mantissa (a.k.a. significand)
 555         //          | sign                  | exponent   | mantissa       |
 556         r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23)))
 557         return math.Float32frombits(r)
 558 }
 559
 560 // extend32Fto64F converts a float32 value to a float64 value preserving the bit
 561 // pattern of the mantissa.
 562 func extend32Fto64F(f float32) float64 {
 563         if !math.IsNaN(float64(f)) {
 564                 return float64(f)
 565         }
 566         // NaN bit patterns aren't necessarily preserved across conversion
 567         // instructions so we need to do the conversion manually.
 568         b := uint64(math.Float32bits(f))
 569         //   | sign                  | exponent      | mantissa                    |
 570         r := ((b << 32) & (1 << 63)) | (0x7ff << 52) | ((b & 0x7fffff) << (52 - 23))
 571         return math.Float64frombits(r)
 572 }
 573
 574 // DivisionNeedsFixUp reports whether the division needs fix-up code.
 575 func DivisionNeedsFixUp(v *Value) bool {
 576         return v.AuxInt == 0
 577 }
 578
 579 // auxFrom64F encodes a float64 value so it can be stored in an AuxInt.
 580 func auxFrom64F(f float64) int64 {
 581         if f != f {
 582                 panic("can't encode a NaN in AuxInt field")
 583         }
 584         return int64(math.Float64bits(f))
 585 }
 586
 587 // auxFrom32F encodes a float32 value so it can be stored in an AuxInt.
 588 func auxFrom32F(f float32) int64 {
 589         if f != f {
 590                 panic("can't encode a NaN in AuxInt field")
 591         }
 592         return int64(math.Float64bits(extend32Fto64F(f)))
 593 }
 594
 595 // auxTo32F decodes a float32 from the AuxInt value provided.
 596 func auxTo32F(i int64) float32 {
 597         return truncate64Fto32F(math.Float64frombits(uint64(i)))
 598 }
 599
 600 // auxTo64F decodes a float64 from the AuxInt value provided.
 601 func auxTo64F(i int64) float64 {
 602         return math.Float64frombits(uint64(i))
 603 }
 604
 605 func auxIntToBool(i int64) bool {
 606         if i == 0 {
 607                 return false
 608         }
 609         return true
 610 }
 611 func auxIntToInt8(i int64) int8 {
 612         return int8(i)
 613 }
 614 func auxIntToInt16(i int64) int16 {
 615         return int16(i)
 616 }
 617 func auxIntToInt32(i int64) int32 {
 618         return int32(i)
 619 }
 620 func auxIntToInt64(i int64) int64 {
 621         return i
 622 }
 623 func auxIntToUint8(i int64) uint8 {
 624         return uint8(i)
 625 }
 626 func auxIntToFloat32(i int64) float32 {
 627         return float32(math.Float64frombits(uint64(i)))
 628 }
 629 func auxIntToFloat64(i int64) float64 {
 630         return math.Float64frombits(uint64(i))
 631 }
 632 func auxIntToValAndOff(i int64) ValAndOff {
 633         return ValAndOff(i)
 634 }
 635 func auxIntToArm64BitField(i int64) arm64BitField {
 636         return arm64BitField(i)
 637 }
 638 func auxIntToInt128(x int64) int128 {
 639         if x != 0 {
 640                 panic("nonzero int128 not allowed")
 641         }
 642         return 0
 643 }
 644 func auxIntToFlagConstant(x int64) flagConstant {
 645         return flagConstant(x)
 646 }
 647
 648 func auxIntToOp(cc int64) Op {
 649         return Op(cc)
 650 }
 651
 652 func boolToAuxInt(b bool) int64 {
 653         if b {
 654                 return 1
 655         }
 656         return 0
 657 }
 658 func int8ToAuxInt(i int8) int64 {
 659         return int64(i)
 660 }
 661 func int16ToAuxInt(i int16) int64 {
 662         return int64(i)
 663 }
 664 func int32ToAuxInt(i int32) int64 {
 665         return int64(i)
 666 }
 667 func int64ToAuxInt(i int64) int64 {
 668         return int64(i)
 669 }
 670 func uint8ToAuxInt(i uint8) int64 {
 671         return int64(int8(i))
 672 }
 673 func float32ToAuxInt(f float32) int64 {
 674         return int64(math.Float64bits(float64(f)))
 675 }
 676 func float64ToAuxInt(f float64) int64 {
 677         return int64(math.Float64bits(f))
 678 }
 679 func valAndOffToAuxInt(v ValAndOff) int64 {
 680         return int64(v)
 681 }
 682 func arm64BitFieldToAuxInt(v arm64BitField) int64 {
 683         return int64(v)
 684 }
 685 func int128ToAuxInt(x int128) int64 {
 686         if x != 0 {
 687                 panic("nonzero int128 not allowed")
 688         }
 689         return 0
 690 }
 691 func flagConstantToAuxInt(x flagConstant) int64 {
 692         return int64(x)
 693 }
 694
 695 func opToAuxInt(o Op) int64 {
 696         return int64(o)
 697 }
 698
 699 func auxToString(i interface{}) string {
 700         return i.(string)
 701 }
 702 func auxToSym(i interface{}) Sym {
 703         // TODO: kind of a hack - allows nil interface through
 704         s, _ := i.(Sym)
 705         return s
 706 }
 707 func auxToType(i interface{}) *types.Type {
 708         return i.(*types.Type)
 709 }
 710 func auxToCall(i interface{}) *AuxCall {
 711         return i.(*AuxCall)
 712 }
 713 func auxToS390xCCMask(i interface{}) s390x.CCMask {
 714         return i.(s390x.CCMask)
 715 }
 716 func auxToS390xRotateParams(i interface{}) s390x.RotateParams {
 717         return i.(s390x.RotateParams)
 718 }
 719
 720 func stringToAux(s string) interface{} {
 721         return s
 722 }
 723 func symToAux(s Sym) interface{} {
 724         return s
 725 }
 726 func callToAux(s *AuxCall) interface{} {
 727         return s
 728 }
 729 func typeToAux(t *types.Type) interface{} {
 730         return t
 731 }
 732 func s390xCCMaskToAux(c s390x.CCMask) interface{} {
 733         return c
 734 }
 735 func s390xRotateParamsToAux(r s390x.RotateParams) interface{} {
 736         return r
 737 }
 738
 739 // uaddOvf reports whether unsigned a+b would overflow.
 740 func uaddOvf(a, b int64) bool {
 741         return uint64(a)+uint64(b) < uint64(a)
 742 }
 743
 744 // de-virtualize an InterCall
 745 // 'sym' is the symbol for the itab
 746 func devirt(v *Value, aux interface{}, sym Sym, offset int64) *AuxCall {
 747         f := v.Block.Func
 748         n, ok := sym.(*obj.LSym)
 749         if !ok {
 750                 return nil
 751         }
 752         lsym := f.fe.DerefItab(n, offset)
 753         if f.pass.debug > 0 {
 754                 if lsym != nil {
 755                         f.Warnl(v.Pos, "de-virtualizing call")
 756                 } else {
 757                         f.Warnl(v.Pos, "couldn't de-virtualize call")
 758                 }
 759         }
 760         if lsym == nil {
 761                 return nil
 762         }
 763         va := aux.(*AuxCall)
 764         return StaticAuxCall(lsym, va.args, va.results)
 765 }
 766
 767 // isSamePtr reports whether p1 and p2 point to the same address.
 768 func isSamePtr(p1, p2 *Value) bool {
 769         if p1 == p2 {
 770                 return true
 771         }
 772         if p1.Op != p2.Op {
 773                 return false
 774         }
 775         switch p1.Op {
 776         case OpOffPtr:
 777                 return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
 778         case OpAddr, OpLocalAddr:
 779                 // OpAddr's 0th arg is either OpSP or OpSB, which means that it is uniquely identified by its Op.
 780                 // Checking for value equality only works after [z]cse has run.
 781                 return p1.Aux == p2.Aux && p1.Args[0].Op == p2.Args[0].Op
 782         case OpAddPtr:
 783                 return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
 784         }
 785         return false
 786 }
 787
 788 func isStackPtr(v *Value) bool {
 789         for v.Op == OpOffPtr || v.Op == OpAddPtr {
 790                 v = v.Args[0]
 791         }
 792         return v.Op == OpSP || v.Op == OpLocalAddr
 793 }
 794
 795 // disjoint reports whether the memory region specified by [p1:p1+n1)
 796 // does not overlap with [p2:p2+n2).
 797 // A return value of false does not imply the regions overlap.
 798 func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool {
 799         if n1 == 0 || n2 == 0 {
 800                 return true
 801         }
 802         if p1 == p2 {
 803                 return false
 804         }
 805         baseAndOffset := func(ptr *Value) (base *Value, offset int64) {
 806                 base, offset = ptr, 0
 807                 for base.Op == OpOffPtr {
 808                         offset += base.AuxInt
 809                         base = base.Args[0]
 810                 }
 811                 return base, offset
 812         }
 813         p1, off1 := baseAndOffset(p1)
 814         p2, off2 := baseAndOffset(p2)
 815         if isSamePtr(p1, p2) {
 816                 return !overlap(off1, n1, off2, n2)
 817         }
 818         // p1 and p2 are not the same, so if they are both OpAddrs then
 819         // they point to different variables.
 820         // If one pointer is on the stack and the other is an argument
 821         // then they can't overlap.
 822         switch p1.Op {
 823         case OpAddr, OpLocalAddr:
 824                 if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP {
 825                         return true
 826                 }
 827                 return p2.Op == OpArg && p1.Args[0].Op == OpSP
 828         case OpArg:
 829                 if p2.Op == OpSP || p2.Op == OpLocalAddr {
 830                         return true
 831                 }
 832         case OpSP:
 833                 return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpSP
 834         }
 835         return false
 836 }
 837
 838 // moveSize returns the number of bytes an aligned MOV instruction moves
 839 func moveSize(align int64, c *Config) int64 {
 840         switch {
 841         case align%8 == 0 && c.PtrSize == 8:
 842                 return 8
 843         case align%4 == 0:
 844                 return 4
 845         case align%2 == 0:
 846                 return 2
 847         }
 848         return 1
 849 }
 850
 851 // mergePoint finds a block among a's blocks which dominates b and is itself
 852 // dominated by all of a's blocks. Returns nil if it can't find one.
 853 // Might return nil even if one does exist.
 854 func mergePoint(b *Block, a ...*Value) *Block {
 855         // Walk backward from b looking for one of the a's blocks.
 856
 857         // Max distance
 858         d := 100
 859
 860         for d > 0 {
 861                 for _, x := range a {
 862                         if b == x.Block {
 863                                 goto found
 864                         }
 865                 }
 866                 if len(b.Preds) > 1 {
 867                         // Don't know which way to go back. Abort.
 868                         return nil
 869                 }
 870                 b = b.Preds[0].b
 871                 d--
 872         }
 873         return nil // too far away
 874 found:
 875         // At this point, r is the first value in a that we find by walking backwards.
 876         // if we return anything, r will be it.
 877         r := b
 878
 879         // Keep going, counting the other a's that we find. They must all dominate r.
 880         na := 0
 881         for d > 0 {
 882                 for _, x := range a {
 883                         if b == x.Block {
 884                                 na++
 885                         }
 886                 }
 887                 if na == len(a) {
 888                         // Found all of a in a backwards walk. We can return r.
 889                         return r
 890                 }
 891                 if len(b.Preds) > 1 {
 892                         return nil
 893                 }
 894                 b = b.Preds[0].b
 895                 d--
 896
 897         }
 898         return nil // too far away
 899 }
 900
 901 // clobber invalidates values. Returns true.
 902 // clobber is used by rewrite rules to:
 903 //   A) make sure the values are really dead and never used again.
 904 //   B) decrement use counts of the values' args.
 905 func clobber(vv ...*Value) bool {
 906         for _, v := range vv {
 907                 v.reset(OpInvalid)
 908                 // Note: leave v.Block intact.  The Block field is used after clobber.
 909         }
 910         return true
 911 }
 912
 913 // clobberIfDead resets v when use count is 1. Returns true.
 914 // clobberIfDead is used by rewrite rules to decrement
 915 // use counts of v's args when v is dead and never used.
 916 func clobberIfDead(v *Value) bool {
 917         if v.Uses == 1 {
 918                 v.reset(OpInvalid)
 919         }
 920         // Note: leave v.Block intact.  The Block field is used after clobberIfDead.
 921         return true
 922 }
 923
 924 // noteRule is an easy way to track if a rule is matched when writing
 925 // new ones.  Make the rule of interest also conditional on
 926 //     noteRule("note to self: rule of interest matched")
 927 // and that message will print when the rule matches.
 928 func noteRule(s string) bool {
 929         fmt.Println(s)
 930         return true
 931 }
 932
 933 // countRule increments Func.ruleMatches[key].
 934 // If Func.ruleMatches is non-nil at the end
 935 // of compilation, it will be printed to stdout.
 936 // This is intended to make it easier to find which functions
 937 // which contain lots of rules matches when developing new rules.
 938 func countRule(v *Value, key string) bool {
 939         f := v.Block.Func
 940         if f.ruleMatches == nil {
 941                 f.ruleMatches = make(map[string]int)
 942         }
 943         f.ruleMatches[key]++
 944         return true
 945 }
 946
 947 // warnRule generates compiler debug output with string s when
 948 // v is not in autogenerated code, cond is true and the rule has fired.
 949 func warnRule(cond bool, v *Value, s string) bool {
 950         if pos := v.Pos; pos.Line() > 1 && cond {
 951                 v.Block.Func.Warnl(pos, s)
 952         }
 953         return true
 954 }
 955
 956 // for a pseudo-op like (LessThan x), extract x
 957 func flagArg(v *Value) *Value {
 958         if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() {
 959                 return nil
 960         }
 961         return v.Args[0]
 962 }
 963
 964 // arm64Negate finds the complement to an ARM64 condition code,
 965 // for example Equal -> NotEqual or LessThan -> GreaterEqual
 966 //
 967 // TODO: add floating-point conditions
 968 func arm64Negate(op Op) Op {
 969         switch op {
 970         case OpARM64LessThan:
 971                 return OpARM64GreaterEqual
 972         case OpARM64LessThanU:
 973                 return OpARM64GreaterEqualU
 974         case OpARM64GreaterThan:
 975                 return OpARM64LessEqual
 976         case OpARM64GreaterThanU:
 977                 return OpARM64LessEqualU
 978         case OpARM64LessEqual:
 979                 return OpARM64GreaterThan
 980         case OpARM64LessEqualU:
 981                 return OpARM64GreaterThanU
 982         case OpARM64GreaterEqual:
 983                 return OpARM64LessThan
 984         case OpARM64GreaterEqualU:
 985                 return OpARM64LessThanU
 986         case OpARM64Equal:
 987                 return OpARM64NotEqual
 988         case OpARM64NotEqual:
 989                 return OpARM64Equal
 990         case OpARM64LessThanF:
 991                 return OpARM64GreaterEqualF
 992         case OpARM64GreaterThanF:
 993                 return OpARM64LessEqualF
 994         case OpARM64LessEqualF:
 995                 return OpARM64GreaterThanF
 996         case OpARM64GreaterEqualF:
 997                 return OpARM64LessThanF
 998         default:
 999                 panic("unreachable")
1000         }
1001 }
1002
1003 // arm64Invert evaluates (InvertFlags op), which
1004 // is the same as altering the condition codes such
1005 // that the same result would be produced if the arguments
1006 // to the flag-generating instruction were reversed, e.g.
1007 // (InvertFlags (CMP x y)) -> (CMP y x)
1008 //
1009 // TODO: add floating-point conditions
1010 func arm64Invert(op Op) Op {
1011         switch op {
1012         case OpARM64LessThan:
1013                 return OpARM64GreaterThan
1014         case OpARM64LessThanU:
1015                 return OpARM64GreaterThanU
1016         case OpARM64GreaterThan:
1017                 return OpARM64LessThan
1018         case OpARM64GreaterThanU:
1019                 return OpARM64LessThanU
1020         case OpARM64LessEqual:
1021                 return OpARM64GreaterEqual
1022         case OpARM64LessEqualU:
1023                 return OpARM64GreaterEqualU
1024         case OpARM64GreaterEqual:
1025                 return OpARM64LessEqual
1026         case OpARM64GreaterEqualU:
1027                 return OpARM64LessEqualU
1028         case OpARM64Equal, OpARM64NotEqual:
1029                 return op
1030         case OpARM64LessThanF:
1031                 return OpARM64GreaterThanF
1032         case OpARM64GreaterThanF:
1033                 return OpARM64LessThanF
1034         case OpARM64LessEqualF:
1035                 return OpARM64GreaterEqualF
1036         case OpARM64GreaterEqualF:
1037                 return OpARM64LessEqualF
1038         default:
1039                 panic("unreachable")
1040         }
1041 }
1042
1043 // evaluate an ARM64 op against a flags value
1044 // that is potentially constant; return 1 for true,
1045 // -1 for false, and 0 for not constant.
1046 func ccARM64Eval(op Op, flags *Value) int {
1047         fop := flags.Op
1048         if fop == OpARM64InvertFlags {
1049                 return -ccARM64Eval(op, flags.Args[0])
1050         }
1051         if fop != OpARM64FlagConstant {
1052                 return 0
1053         }
1054         fc := flagConstant(flags.AuxInt)
1055         b2i := func(b bool) int {
1056                 if b {
1057                         return 1
1058                 }
1059                 return -1
1060         }
1061         switch op {
1062         case OpARM64Equal:
1063                 return b2i(fc.eq())
1064         case OpARM64NotEqual:
1065                 return b2i(fc.ne())
1066         case OpARM64LessThan:
1067                 return b2i(fc.lt())
1068         case OpARM64LessThanU:
1069                 return b2i(fc.ult())
1070         case OpARM64GreaterThan:
1071                 return b2i(fc.gt())
1072         case OpARM64GreaterThanU:
1073                 return b2i(fc.ugt())
1074         case OpARM64LessEqual:
1075                 return b2i(fc.le())
1076         case OpARM64LessEqualU:
1077                 return b2i(fc.ule())
1078         case OpARM64GreaterEqual:
1079                 return b2i(fc.ge())
1080         case OpARM64GreaterEqualU:
1081                 return b2i(fc.uge())
1082         }
1083         return 0
1084 }
1085
1086 // logRule logs the use of the rule s. This will only be enabled if
1087 // rewrite rules were generated with the -log option, see gen/rulegen.go.
1088 func logRule(s string) {
1089         if ruleFile == nil {
1090                 // Open a log file to write log to. We open in append
1091                 // mode because all.bash runs the compiler lots of times,
1092                 // and we want the concatenation of all of those logs.
1093                 // This means, of course, that users need to rm the old log
1094                 // to get fresh data.
1095                 // TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow?
1096                 w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"),
1097                         os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
1098                 if err != nil {
1099                         panic(err)
1100                 }
1101                 ruleFile = w
1102         }
1103         _, err := fmt.Fprintln(ruleFile, s)
1104         if err != nil {
1105                 panic(err)
1106         }
1107 }
1108
1109 var ruleFile io.Writer
1110
1111 func min(x, y int64) int64 {
1112         if x < y {
1113                 return x
1114         }
1115         return y
1116 }
1117
1118 func isConstZero(v *Value) bool {
1119         switch v.Op {
1120         case OpConstNil:
1121                 return true
1122         case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F:
1123                 return v.AuxInt == 0
1124         }
1125         return false
1126 }
1127
1128 // reciprocalExact64 reports whether 1/c is exactly representable.
1129 func reciprocalExact64(c float64) bool {
1130         b := math.Float64bits(c)
1131         man := b & (1<<52 - 1)
1132         if man != 0 {
1133                 return false // not a power of 2, denormal, or NaN
1134         }
1135         exp := b >> 52 & (1<<11 - 1)
1136         // exponent bias is 0x3ff.  So taking the reciprocal of a number
1137         // changes the exponent to 0x7fe-exp.
1138         switch exp {
1139         case 0:
1140                 return false // ±0
1141         case 0x7ff:
1142                 return false // ±inf
1143         case 0x7fe:
1144                 return false // exponent is not representable
1145         default:
1146                 return true
1147         }
1148 }
1149
1150 // reciprocalExact32 reports whether 1/c is exactly representable.
1151 func reciprocalExact32(c float32) bool {
1152         b := math.Float32bits(c)
1153         man := b & (1<<23 - 1)
1154         if man != 0 {
1155                 return false // not a power of 2, denormal, or NaN
1156         }
1157         exp := b >> 23 & (1<<8 - 1)
1158         // exponent bias is 0x7f.  So taking the reciprocal of a number
1159         // changes the exponent to 0xfe-exp.
1160         switch exp {
1161         case 0:
1162                 return false // ±0
1163         case 0xff:
1164                 return false // ±inf
1165         case 0xfe:
1166                 return false // exponent is not representable
1167         default:
1168                 return true
1169         }
1170 }
1171
1172 // check if an immediate can be directly encoded into an ARM's instruction
1173 func isARMImmRot(v uint32) bool {
1174         for i := 0; i < 16; i++ {
1175                 if v&^0xff == 0 {
1176                         return true
1177                 }
1178                 v = v<<2 | v>>30
1179         }
1180
1181         return false
1182 }
1183
1184 // overlap reports whether the ranges given by the given offset and
1185 // size pairs overlap.
1186 func overlap(offset1, size1, offset2, size2 int64) bool {
1187         if offset1 >= offset2 && offset2+size2 > offset1 {
1188                 return true
1189         }
1190         if offset2 >= offset1 && offset1+size1 > offset2 {
1191                 return true
1192         }
1193         return false
1194 }
1195
1196 func areAdjacentOffsets(off1, off2, size int64) bool {
1197         return off1+size == off2 || off1 == off2+size
1198 }
1199
1200 // check if value zeroes out upper 32-bit of 64-bit register.
1201 // depth limits recursion depth. In AMD64.rules 3 is used as limit,
1202 // because it catches same amount of cases as 4.
1203 func zeroUpper32Bits(x *Value, depth int) bool {
1204         switch x.Op {
1205         case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1,
1206                 OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1,
1207                 OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload,
1208                 OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL,
1209                 OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst,
1210                 OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst,
1211                 OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL,
1212                 OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst,
1213                 OpAMD64SHLL, OpAMD64SHLLconst:
1214                 return true
1215         case OpArg:
1216                 return x.Type.Width == 4
1217         case OpPhi, OpSelect0, OpSelect1:
1218                 // Phis can use each-other as an arguments, instead of tracking visited values,
1219                 // just limit recursion depth.
1220                 if depth <= 0 {
1221                         return false
1222                 }
1223                 for i := range x.Args {
1224                         if !zeroUpper32Bits(x.Args[i], depth-1) {
1225                                 return false
1226                         }
1227                 }
1228                 return true
1229
1230         }
1231         return false
1232 }
1233
1234 // zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits
1235 func zeroUpper48Bits(x *Value, depth int) bool {
1236         switch x.Op {
1237         case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2:
1238                 return true
1239         case OpArg:
1240                 return x.Type.Width == 2
1241         case OpPhi, OpSelect0, OpSelect1:
1242                 // Phis can use each-other as an arguments, instead of tracking visited values,
1243                 // just limit recursion depth.
1244                 if depth <= 0 {
1245                         return false
1246                 }
1247                 for i := range x.Args {
1248                         if !zeroUpper48Bits(x.Args[i], depth-1) {
1249                                 return false
1250                         }
1251                 }
1252                 return true
1253
1254         }
1255         return false
1256 }
1257
1258 // zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits
1259 func zeroUpper56Bits(x *Value, depth int) bool {
1260         switch x.Op {
1261         case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1:
1262                 return true
1263         case OpArg:
1264                 return x.Type.Width == 1
1265         case OpPhi, OpSelect0, OpSelect1:
1266                 // Phis can use each-other as an arguments, instead of tracking visited values,
1267                 // just limit recursion depth.
1268                 if depth <= 0 {
1269                         return false
1270                 }
1271                 for i := range x.Args {
1272                         if !zeroUpper56Bits(x.Args[i], depth-1) {
1273                                 return false
1274                         }
1275                 }
1276                 return true
1277
1278         }
1279         return false
1280 }
1281
1282 // isInlinableMemmove reports whether the given arch performs a Move of the given size
1283 // faster than memmove. It will only return true if replacing the memmove with a Move is
1284 // safe, either because Move is small or because the arguments are disjoint.
1285 // This is used as a check for replacing memmove with Move ops.
1286 func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
1287         // It is always safe to convert memmove into Move when its arguments are disjoint.
1288         // Move ops may or may not be faster for large sizes depending on how the platform
1289         // lowers them, so we only perform this optimization on platforms that we know to
1290         // have fast Move ops.
1291         switch c.arch {
1292         case "amd64":
1293                 return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz))
1294         case "386", "arm64":
1295                 return sz <= 8
1296         case "s390x", "ppc64", "ppc64le":
1297                 return sz <= 8 || disjoint(dst, sz, src, sz)
1298         case "arm", "mips", "mips64", "mipsle", "mips64le":
1299                 return sz <= 4
1300         }
1301         return false
1302 }
1303
1304 // logLargeCopy logs the occurrence of a large copy.
1305 // The best place to do this is in the rewrite rules where the size of the move is easy to find.
1306 // "Large" is arbitrarily chosen to be 128 bytes; this may change.
1307 func logLargeCopy(v *Value, s int64) bool {
1308         if s < 128 {
1309                 return true
1310         }
1311         if logopt.Enabled() {
1312                 logopt.LogOpt(v.Pos, "copy", "lower", v.Block.Func.Name, fmt.Sprintf("%d bytes", s))
1313         }
1314         return true
1315 }
1316
1317 // hasSmallRotate reports whether the architecture has rotate instructions
1318 // for sizes < 32-bit.  This is used to decide whether to promote some rotations.
1319 func hasSmallRotate(c *Config) bool {
1320         switch c.arch {
1321         case "amd64", "386":
1322                 return true
1323         default:
1324                 return false
1325         }
1326 }
1327
1328 func newPPC64ShiftAuxInt(sh, mb, me, sz int64) int32 {
1329         if sh < 0 || sh >= sz {
1330                 panic("PPC64 shift arg sh out of range")
1331         }
1332         if mb < 0 || mb >= sz {
1333                 panic("PPC64 shift arg mb out of range")
1334         }
1335         if me < 0 || me >= sz {
1336                 panic("PPC64 shift arg me out of range")
1337         }
1338         return int32(sh<<16 | mb<<8 | me)
1339 }
1340
1341 func GetPPC64Shiftsh(auxint int64) int64 {
1342         return int64(int8(auxint >> 16))
1343 }
1344
1345 func GetPPC64Shiftmb(auxint int64) int64 {
1346         return int64(int8(auxint >> 8))
1347 }
1348
1349 func GetPPC64Shiftme(auxint int64) int64 {
1350         return int64(int8(auxint))
1351 }
1352
1353 // Catch the simple ones first
1354 // TODO: Later catch more cases
1355 func isPPC64ValidShiftMask(v int64) bool {
1356         if ((v + 1) & v) == 0 {
1357                 return true
1358         }
1359         return false
1360 }
1361
1362 func getPPC64ShiftMaskLength(v int64) int64 {
1363         return int64(bits.Len64(uint64(v)))
1364 }
1365
1366 // encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format.
1367 func armBFAuxInt(lsb, width int64) arm64BitField {
1368         if lsb < 0 || lsb > 63 {
1369                 panic("ARM(64) bit field lsb constant out of range")
1370         }
1371         if width < 1 || width > 64 {
1372                 panic("ARM(64) bit field width constant out of range")
1373         }
1374         return arm64BitField(width | lsb<<8)
1375 }
1376
1377 // returns the lsb part of the auxInt field of arm64 bitfield ops.
1378 func (bfc arm64BitField) getARM64BFlsb() int64 {
1379         return int64(uint64(bfc) >> 8)
1380 }
1381
1382 // returns the width part of the auxInt field of arm64 bitfield ops.
1383 func (bfc arm64BitField) getARM64BFwidth() int64 {
1384         return int64(bfc) & 0xff
1385 }
1386
1387 // checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask.
1388 func isARM64BFMask(lsb, mask, rshift int64) bool {
1389         shiftedMask := int64(uint64(mask) >> uint64(rshift))
1390         return shiftedMask != 0 && isPowerOfTwo(shiftedMask+1) && nto(shiftedMask)+lsb < 64
1391 }
1392
1393 // returns the bitfield width of mask >> rshift for arm64 bitfield ops
1394 func arm64BFWidth(mask, rshift int64) int64 {
1395         shiftedMask := int64(uint64(mask) >> uint64(rshift))
1396         if shiftedMask == 0 {
1397                 panic("ARM64 BF mask is zero")
1398         }
1399         return nto(shiftedMask)
1400 }
1401
1402 // sizeof returns the size of t in bytes.
1403 // It will panic if t is not a *types.Type.
1404 func sizeof(t interface{}) int64 {
1405         return t.(*types.Type).Size()
1406 }
1407
1408 // registerizable reports whether t is a primitive type that fits in
1409 // a register. It assumes float64 values will always fit into registers
1410 // even if that isn't strictly true.
1411 func registerizable(b *Block, typ *types.Type) bool {
1412         if typ.IsPtrShaped() || typ.IsFloat() {
1413                 return true
1414         }
1415         if typ.IsInteger() {
1416                 return typ.Size() <= b.Func.Config.RegSize
1417         }
1418         return false
1419 }
1420
1421 // needRaceCleanup reports whether this call to racefuncenter/exit isn't needed.
1422 func needRaceCleanup(sym *AuxCall, v *Value) bool {
1423         f := v.Block.Func
1424         if !f.Config.Race {
1425                 return false
1426         }
1427         if !isSameCall(sym, "runtime.racefuncenter") && !isSameCall(sym, "runtime.racefuncexit") {
1428                 return false
1429         }
1430         for _, b := range f.Blocks {
1431                 for _, v := range b.Values {
1432                         switch v.Op {
1433                         case OpStaticCall:
1434                                 // Check for racefuncenter will encounter racefuncexit and vice versa.
1435                                 // Allow calls to panic*
1436                                 s := v.Aux.(*AuxCall).Fn.String()
1437                                 switch s {
1438                                 case "runtime.racefuncenter", "runtime.racefuncexit",
1439                                         "runtime.panicdivide", "runtime.panicwrap",
1440                                         "runtime.panicshift":
1441                                         continue
1442                                 }
1443                                 // If we encountered any call, we need to keep racefunc*,
1444                                 // for accurate stacktraces.
1445                                 return false
1446                         case OpPanicBounds, OpPanicExtend:
1447                                 // Note: these are panic generators that are ok (like the static calls above).
1448                         case OpClosureCall, OpInterCall:
1449                                 // We must keep the race functions if there are any other call types.
1450                                 return false
1451                         }
1452                 }
1453         }
1454         if isSameCall(sym, "runtime.racefuncenter") {
1455                 // If we're removing racefuncenter, remove its argument as well.
1456                 if v.Args[0].Op != OpStore {
1457                         return false
1458                 }
1459                 mem := v.Args[0].Args[2]
1460                 v.Args[0].reset(OpCopy)
1461                 v.Args[0].AddArg(mem)
1462         }
1463         return true
1464 }
1465
1466 // symIsRO reports whether sym is a read-only global.
1467 func symIsRO(sym interface{}) bool {
1468         lsym := sym.(*obj.LSym)
1469         return lsym.Type == objabi.SRODATA && len(lsym.R) == 0
1470 }
1471
1472 // symIsROZero reports whether sym is a read-only global whose data contains all zeros.
1473 func symIsROZero(sym Sym) bool {
1474         lsym := sym.(*obj.LSym)
1475         if lsym.Type != objabi.SRODATA || len(lsym.R) != 0 {
1476                 return false
1477         }
1478         for _, b := range lsym.P {
1479                 if b != 0 {
1480                         return false
1481                 }
1482         }
1483         return true
1484 }
1485
1486 // read8 reads one byte from the read-only global sym at offset off.
1487 func read8(sym interface{}, off int64) uint8 {
1488         lsym := sym.(*obj.LSym)
1489         if off >= int64(len(lsym.P)) || off < 0 {
1490                 // Invalid index into the global sym.
1491                 // This can happen in dead code, so we don't want to panic.
1492                 // Just return any value, it will eventually get ignored.
1493                 // See issue 29215.
1494                 return 0
1495         }
1496         return lsym.P[off]
1497 }
1498
1499 // read16 reads two bytes from the read-only global sym at offset off.
1500 func read16(sym interface{}, off int64, byteorder binary.ByteOrder) uint16 {
1501         lsym := sym.(*obj.LSym)
1502         // lsym.P is written lazily.
1503         // Bytes requested after the end of lsym.P are 0.
1504         var src []byte
1505         if 0 <= off && off < int64(len(lsym.P)) {
1506                 src = lsym.P[off:]
1507         }
1508         buf := make([]byte, 2)
1509         copy(buf, src)
1510         return byteorder.Uint16(buf)
1511 }
1512
1513 // read32 reads four bytes from the read-only global sym at offset off.
1514 func read32(sym interface{}, off int64, byteorder binary.ByteOrder) uint32 {
1515         lsym := sym.(*obj.LSym)
1516         var src []byte
1517         if 0 <= off && off < int64(len(lsym.P)) {
1518                 src = lsym.P[off:]
1519         }
1520         buf := make([]byte, 4)
1521         copy(buf, src)
1522         return byteorder.Uint32(buf)
1523 }
1524
1525 // read64 reads eight bytes from the read-only global sym at offset off.
1526 func read64(sym interface{}, off int64, byteorder binary.ByteOrder) uint64 {
1527         lsym := sym.(*obj.LSym)
1528         var src []byte
1529         if 0 <= off && off < int64(len(lsym.P)) {
1530                 src = lsym.P[off:]
1531         }
1532         buf := make([]byte, 8)
1533         copy(buf, src)
1534         return byteorder.Uint64(buf)
1535 }
1536
1537 // sequentialAddresses reports true if it can prove that x + n == y
1538 func sequentialAddresses(x, y *Value, n int64) bool {
1539         if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil &&
1540                 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1541                         x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1542                 return true
1543         }
1544         if x.Op == Op386LEAL1 && y.Op == Op386LEAL1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
1545                 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1546                         x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1547                 return true
1548         }
1549         if x.Op == OpAMD64ADDQ && y.Op == OpAMD64LEAQ1 && y.AuxInt == n && y.Aux == nil &&
1550                 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1551                         x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1552                 return true
1553         }
1554         if x.Op == OpAMD64LEAQ1 && y.Op == OpAMD64LEAQ1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
1555                 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1556                         x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1557                 return true
1558         }
1559         return false
1560 }
1561
1562 // flagConstant represents the result of a compile-time comparison.
1563 // The sense of these flags does not necessarily represent the hardware's notion
1564 // of a flags register - these are just a compile-time construct.
1565 // We happen to match the semantics to those of arm/arm64.
1566 // Note that these semantics differ from x86: the carry flag has the opposite
1567 // sense on a subtraction!
1568 //   On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C.
1569 //   On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C.
1570 //    (because it does x + ^y + C).
1571 // See https://en.wikipedia.org/wiki/Carry_flag#Vs._borrow_flag
1572 type flagConstant uint8
1573
1574 // N reports whether the result of an operation is negative (high bit set).
1575 func (fc flagConstant) N() bool {
1576         return fc&1 != 0
1577 }
1578
1579 // Z reports whether the result of an operation is 0.
1580 func (fc flagConstant) Z() bool {
1581         return fc&2 != 0
1582 }
1583
1584 // C reports whether an unsigned add overflowed (carry), or an
1585 // unsigned subtract did not underflow (borrow).
1586 func (fc flagConstant) C() bool {
1587         return fc&4 != 0
1588 }
1589
1590 // V reports whether a signed operation overflowed or underflowed.
1591 func (fc flagConstant) V() bool {
1592         return fc&8 != 0
1593 }
1594
1595 func (fc flagConstant) eq() bool {
1596         return fc.Z()
1597 }
1598 func (fc flagConstant) ne() bool {
1599         return !fc.Z()
1600 }
1601 func (fc flagConstant) lt() bool {
1602         return fc.N() != fc.V()
1603 }
1604 func (fc flagConstant) le() bool {
1605         return fc.Z() || fc.lt()
1606 }
1607 func (fc flagConstant) gt() bool {
1608         return !fc.Z() && fc.ge()
1609 }
1610 func (fc flagConstant) ge() bool {
1611         return fc.N() == fc.V()
1612 }
1613 func (fc flagConstant) ult() bool {
1614         return !fc.C()
1615 }
1616 func (fc flagConstant) ule() bool {
1617         return fc.Z() || fc.ult()
1618 }
1619 func (fc flagConstant) ugt() bool {
1620         return !fc.Z() && fc.uge()
1621 }
1622 func (fc flagConstant) uge() bool {
1623         return fc.C()
1624 }
1625
1626 func (fc flagConstant) ltNoov() bool {
1627         return fc.lt() && !fc.V()
1628 }
1629 func (fc flagConstant) leNoov() bool {
1630         return fc.le() && !fc.V()
1631 }
1632 func (fc flagConstant) gtNoov() bool {
1633         return fc.gt() && !fc.V()
1634 }
1635 func (fc flagConstant) geNoov() bool {
1636         return fc.ge() && !fc.V()
1637 }
1638
1639 func (fc flagConstant) String() string {
1640         return fmt.Sprintf("N=%v,Z=%v,C=%v,V=%v", fc.N(), fc.Z(), fc.C(), fc.V())
1641 }
1642
1643 type flagConstantBuilder struct {
1644         N bool
1645         Z bool
1646         C bool
1647         V bool
1648 }
1649
1650 func (fcs flagConstantBuilder) encode() flagConstant {
1651         var fc flagConstant
1652         if fcs.N {
1653                 fc |= 1
1654         }
1655         if fcs.Z {
1656                 fc |= 2
1657         }
1658         if fcs.C {
1659                 fc |= 4
1660         }
1661         if fcs.V {
1662                 fc |= 8
1663         }
1664         return fc
1665 }
1666
1667 // Note: addFlags(x,y) != subFlags(x,-y) in some situations:
1668 //  - the results of the C flag are different
1669 //  - the results of the V flag when y==minint are different
1670
1671 // addFlags64 returns the flags that would be set from computing x+y.
1672 func addFlags64(x, y int64) flagConstant {
1673         var fcb flagConstantBuilder
1674         fcb.Z = x+y == 0
1675         fcb.N = x+y < 0
1676         fcb.C = uint64(x+y) < uint64(x)
1677         fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
1678         return fcb.encode()
1679 }
1680
1681 // subFlags64 returns the flags that would be set from computing x-y.
1682 func subFlags64(x, y int64) flagConstant {
1683         var fcb flagConstantBuilder
1684         fcb.Z = x-y == 0
1685         fcb.N = x-y < 0
1686         fcb.C = uint64(y) <= uint64(x) // This code follows the arm carry flag model.
1687         fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
1688         return fcb.encode()
1689 }
1690
1691 // addFlags32 returns the flags that would be set from computing x+y.
1692 func addFlags32(x, y int32) flagConstant {
1693         var fcb flagConstantBuilder
1694         fcb.Z = x+y == 0
1695         fcb.N = x+y < 0
1696         fcb.C = uint32(x+y) < uint32(x)
1697         fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
1698         return fcb.encode()
1699 }
1700
1701 // subFlags32 returns the flags that would be set from computing x-y.
1702 func subFlags32(x, y int32) flagConstant {
1703         var fcb flagConstantBuilder
1704         fcb.Z = x-y == 0
1705         fcb.N = x-y < 0
1706         fcb.C = uint32(y) <= uint32(x) // This code follows the arm carry flag model.
1707         fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
1708         return fcb.encode()
1709 }
1710
1711 // logicFlags64 returns flags set to the sign/zeroness of x.
1712 // C and V are set to false.
1713 func logicFlags64(x int64) flagConstant {
1714         var fcb flagConstantBuilder
1715         fcb.Z = x == 0
1716         fcb.N = x < 0
1717         return fcb.encode()
1718 }
1719
1720 // logicFlags32 returns flags set to the sign/zeroness of x.
1721 // C and V are set to false.
1722 func logicFlags32(x int32) flagConstant {
1723         var fcb flagConstantBuilder
1724         fcb.Z = x == 0
1725         fcb.N = x < 0
1726         return fcb.encode()
1727 }