1 // Copyright 2015 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
8 "cmd/compile/internal/logopt"
9 "cmd/compile/internal/types"
11 "cmd/internal/obj/s390x"
23 type deadValueChoice bool
26 leaveDeadValues deadValueChoice = false
27 removeDeadValues = true
30 // deadcode indicates that rewrite should try to remove any values that become dead.
31 func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValueChoice) {
32 // repeat rewrites until we find no more rewrites
33 pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block
37 fmt.Printf("%s: rewriting for %s\n", f.pass.name, f.Name)
41 for _, b := range f.Blocks {
46 b0.Succs = append([]Edge{}, b.Succs...) // make a new copy, not aliasing
48 for i, c := range b.ControlValues() {
51 b.ReplaceControl(i, c)
57 fmt.Printf("rewriting %s -> %s\n", b0.LongString(), b.LongString())
60 for j, v := range b.Values {
65 v0.Args = append([]*Value{}, v.Args...) // make a new copy, not aliasing
67 if v.Uses == 0 && v.removeable() {
68 if v.Op != OpInvalid && deadcode == removeDeadValues {
69 // Reset any values that are now unused, so that we decrement
70 // the use count of all of its arguments.
71 // Not quite a deadcode pass, because it does not handle cycles.
72 // But it should help Uses==1 rules to fire.
76 // No point rewriting values which aren't used.
80 vchange := phielimValue(v)
81 if vchange && debug > 1 {
82 fmt.Printf("rewriting %s -> %s\n", v0.LongString(), v.LongString())
85 // Eliminate copy inputs.
86 // If any copy input becomes unused, mark it
87 // as invalid and discard its argument. Repeat
88 // recursively on the discarded argument.
89 // This phase helps remove phantom "dead copy" uses
90 // of a value so that a x.Uses==1 rule condition
92 for i, a := range v.Args {
98 // If a, a copy, has a line boundary indicator, attempt to find a new value
99 // to hold it. The first candidate is the value that will replace a (aa),
100 // if it shares the same block and line and is eligible.
101 // The second option is v, which has a as an input. Because aa is earlier in
102 // the data flow, it is the better choice.
103 if a.Pos.IsStmt() == src.PosIsStmt {
104 if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt {
105 aa.Pos = aa.Pos.WithIsStmt()
106 } else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt {
107 v.Pos = v.Pos.WithIsStmt()
109 // Record the lost line and look for a new home after all rewrites are complete.
110 // TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same
111 // line to appear in more than one block, but only one block is stored, so if both end
112 // up here, then one will be lost.
113 pendingLines.set(a.Pos, int32(a.Block.ID))
115 a.Pos = a.Pos.WithNotStmt()
124 if vchange && debug > 1 {
125 fmt.Printf("rewriting %s -> %s\n", v0.LongString(), v.LongString())
128 // apply rewrite function
131 // If value changed to a poor choice for a statement boundary, move the boundary
132 if v.Pos.IsStmt() == src.PosIsStmt {
133 if k := nextGoodStatementIndex(v, j, b); k != j {
134 v.Pos = v.Pos.WithNotStmt()
135 b.Values[k].Pos = b.Values[k].Pos.WithIsStmt()
140 change = change || vchange
141 if vchange && debug > 1 {
142 fmt.Printf("rewriting %s -> %s\n", v0.LongString(), v.LongString())
150 // remove clobbered values
151 for _, b := range f.Blocks {
153 for i, v := range b.Values {
155 if v.Op == OpInvalid {
156 if v.Pos.IsStmt() == src.PosIsStmt {
157 pendingLines.set(vl, int32(b.ID))
162 if v.Pos.IsStmt() != src.PosNotStmt && pendingLines.get(vl) == int32(b.ID) {
163 pendingLines.remove(vl)
164 v.Pos = v.Pos.WithIsStmt()
171 if pendingLines.get(b.Pos) == int32(b.ID) {
172 b.Pos = b.Pos.WithIsStmt()
173 pendingLines.remove(b.Pos)
179 // Common functions called from rewriting rules
181 func is64BitFloat(t *types.Type) bool {
182 return t.Size() == 8 && t.IsFloat()
185 func is32BitFloat(t *types.Type) bool {
186 return t.Size() == 4 && t.IsFloat()
189 func is64BitInt(t *types.Type) bool {
190 return t.Size() == 8 && t.IsInteger()
193 func is32BitInt(t *types.Type) bool {
194 return t.Size() == 4 && t.IsInteger()
197 func is16BitInt(t *types.Type) bool {
198 return t.Size() == 2 && t.IsInteger()
201 func is8BitInt(t *types.Type) bool {
202 return t.Size() == 1 && t.IsInteger()
205 func isPtr(t *types.Type) bool {
206 return t.IsPtrShaped()
209 func isSigned(t *types.Type) bool {
213 // mergeSym merges two symbolic offsets. There is no real merging of
214 // offsets, we just pick the non-nil one.
215 func mergeSym(x, y interface{}) interface{} {
222 panic(fmt.Sprintf("mergeSym with two non-nil syms %s %s", x, y))
225 func canMergeSym(x, y interface{}) bool {
226 return x == nil || y == nil
229 func mergeSymTyped(x, y Sym) Sym {
236 panic(fmt.Sprintf("mergeSym with two non-nil syms %v %v", x, y))
239 // canMergeLoadClobber reports whether the load can be merged into target without
240 // invalidating the schedule.
241 // It also checks that the other non-load argument x is something we
242 // are ok with clobbering.
243 func canMergeLoadClobber(target, load, x *Value) bool {
244 // The register containing x is going to get clobbered.
245 // Don't merge if we still need the value of x.
246 // We don't have liveness information here, but we can
247 // approximate x dying with:
248 // 1) target is x's only use.
249 // 2) target is not in a deeper loop than x.
253 loopnest := x.Block.Func.loopnest()
254 loopnest.calculateDepths()
255 if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
258 return canMergeLoad(target, load)
261 // canMergeLoad reports whether the load can be merged into target without
262 // invalidating the schedule.
263 func canMergeLoad(target, load *Value) bool {
264 if target.Block.ID != load.Block.ID {
265 // If the load is in a different block do not merge it.
269 // We can't merge the load into the target if the load
270 // has more than one use.
275 mem := load.MemoryArg()
277 // We need the load's memory arg to still be alive at target. That
278 // can't be the case if one of target's args depends on a memory
279 // state that is a successor of load's memory arg.
281 // For example, it would be invalid to merge load into target in
282 // the following situation because newmem has killed oldmem
283 // before target is reached:
284 // load = read ... oldmem
285 // newmem = write ... oldmem
286 // arg0 = read ... newmem
287 // target = add arg0 load
289 // If the argument comes from a different block then we can exclude
290 // it immediately because it must dominate load (which is in the
291 // same block as target).
293 for _, a := range target.Args {
294 if a != load && a.Block.ID == target.Block.ID {
295 args = append(args, a)
299 // memPreds contains memory states known to be predecessors of load's
300 // memory state. It is lazily initialized.
301 var memPreds map[*Value]bool
302 for i := 0; len(args) > 0; i++ {
305 // Give up if we have done a lot of iterations.
308 v := args[len(args)-1]
309 args = args[:len(args)-1]
310 if target.Block.ID != v.Block.ID {
311 // Since target and load are in the same block
312 // we can stop searching when we leave the block.
316 // A Phi implies we have reached the top of the block.
317 // The memory phi, if it exists, is always
318 // the first logical store in the block.
321 if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
322 // We could handle this situation however it is likely
326 if v.Op.SymEffect()&SymAddr != 0 {
327 // This case prevents an operation that calculates the
328 // address of a local variable from being forced to schedule
329 // before its corresponding VarDef.
335 // We don't want to combine the CMPQ with the load, because
336 // that would force the CMPQ to schedule before the VARDEF, which
337 // in turn requires the LEAQ to schedule before the VARDEF.
340 if v.Type.IsMemory() {
342 // Initialise a map containing memory states
343 // known to be predecessors of load's memory
345 memPreds = make(map[*Value]bool)
348 for i := 0; i < limit; i++ {
350 // The memory phi, if it exists, is always
351 // the first logical store in the block.
354 if m.Block.ID != target.Block.ID {
357 if !m.Type.IsMemory() {
361 if len(m.Args) == 0 {
368 // We can merge if v is a predecessor of mem.
370 // For example, we can merge load into target in the
371 // following scenario:
374 // load = read ... mem
375 // target = add x load
381 if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
382 // If v takes mem as an input then we know mem
383 // is valid at this point.
386 for _, a := range v.Args {
387 if target.Block.ID == a.Block.ID {
388 args = append(args, a)
396 // isSameCall reports whether sym is the same as the given named symbol
397 func isSameCall(sym interface{}, name string) bool {
398 fn := sym.(*AuxCall).Fn
399 return fn != nil && fn.String() == name
402 // nlz returns the number of leading zeros.
403 func nlz64(x int64) int { return bits.LeadingZeros64(uint64(x)) }
404 func nlz32(x int32) int { return bits.LeadingZeros32(uint32(x)) }
405 func nlz16(x int16) int { return bits.LeadingZeros16(uint16(x)) }
406 func nlz8(x int8) int { return bits.LeadingZeros8(uint8(x)) }
408 // ntzX returns the number of trailing zeros.
409 func ntz64(x int64) int { return bits.TrailingZeros64(uint64(x)) }
410 func ntz32(x int32) int { return bits.TrailingZeros32(uint32(x)) }
411 func ntz16(x int16) int { return bits.TrailingZeros16(uint16(x)) }
412 func ntz8(x int8) int { return bits.TrailingZeros8(uint8(x)) }
414 func oneBit(x int64) bool { return x&(x-1) == 0 && x != 0 }
415 func oneBit8(x int8) bool { return x&(x-1) == 0 && x != 0 }
416 func oneBit16(x int16) bool { return x&(x-1) == 0 && x != 0 }
417 func oneBit32(x int32) bool { return x&(x-1) == 0 && x != 0 }
418 func oneBit64(x int64) bool { return x&(x-1) == 0 && x != 0 }
420 // nto returns the number of trailing ones.
421 func nto(x int64) int64 {
422 return int64(ntz64(^x))
425 // log2 returns logarithm in base 2 of uint64(n), with log2(0) = -1.
427 func log2(n int64) int64 {
428 return int64(bits.Len64(uint64(n))) - 1
431 // logX returns logarithm of n base 2.
432 // n must be a positive power of 2 (isPowerOfTwoX returns true).
433 func log8(n int8) int64 {
434 return int64(bits.Len8(uint8(n))) - 1
436 func log16(n int16) int64 {
437 return int64(bits.Len16(uint16(n))) - 1
439 func log32(n int32) int64 {
440 return int64(bits.Len32(uint32(n))) - 1
442 func log64(n int64) int64 {
443 return int64(bits.Len64(uint64(n))) - 1
446 // log2uint32 returns logarithm in base 2 of uint32(n), with log2(0) = -1.
448 func log2uint32(n int64) int64 {
449 return int64(bits.Len32(uint32(n))) - 1
452 // isPowerOfTwo reports whether n is a power of 2.
453 func isPowerOfTwo(n int64) bool {
454 return n > 0 && n&(n-1) == 0
456 func isPowerOfTwo8(n int8) bool {
457 return n > 0 && n&(n-1) == 0
459 func isPowerOfTwo16(n int16) bool {
460 return n > 0 && n&(n-1) == 0
462 func isPowerOfTwo32(n int32) bool {
463 return n > 0 && n&(n-1) == 0
465 func isPowerOfTwo64(n int64) bool {
466 return n > 0 && n&(n-1) == 0
469 // isUint64PowerOfTwo reports whether uint64(n) is a power of 2.
470 func isUint64PowerOfTwo(in int64) bool {
472 return n > 0 && n&(n-1) == 0
475 // isUint32PowerOfTwo reports whether uint32(n) is a power of 2.
476 func isUint32PowerOfTwo(in int64) bool {
477 n := uint64(uint32(in))
478 return n > 0 && n&(n-1) == 0
481 // is32Bit reports whether n can be represented as a signed 32 bit integer.
482 func is32Bit(n int64) bool {
483 return n == int64(int32(n))
486 // is16Bit reports whether n can be represented as a signed 16 bit integer.
487 func is16Bit(n int64) bool {
488 return n == int64(int16(n))
491 // is8Bit reports whether n can be represented as a signed 8 bit integer.
492 func is8Bit(n int64) bool {
493 return n == int64(int8(n))
496 // isU8Bit reports whether n can be represented as an unsigned 8 bit integer.
497 func isU8Bit(n int64) bool {
498 return n == int64(uint8(n))
501 // isU12Bit reports whether n can be represented as an unsigned 12 bit integer.
502 func isU12Bit(n int64) bool {
503 return 0 <= n && n < (1<<12)
506 // isU16Bit reports whether n can be represented as an unsigned 16 bit integer.
507 func isU16Bit(n int64) bool {
508 return n == int64(uint16(n))
511 // isU32Bit reports whether n can be represented as an unsigned 32 bit integer.
512 func isU32Bit(n int64) bool {
513 return n == int64(uint32(n))
516 // is20Bit reports whether n can be represented as a signed 20 bit integer.
517 func is20Bit(n int64) bool {
518 return -(1<<19) <= n && n < (1<<19)
521 // b2i translates a boolean value to 0 or 1 for assigning to auxInt.
522 func b2i(b bool) int64 {
529 // b2i32 translates a boolean value to 0 or 1.
530 func b2i32(b bool) int32 {
537 // shiftIsBounded reports whether (left/right) shift Value v is known to be bounded.
538 // A shift is bounded if it is shifting by less than the width of the shifted value.
539 func shiftIsBounded(v *Value) bool {
543 // truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern
544 // of the mantissa. It will panic if the truncation results in lost information.
545 func truncate64Fto32F(f float64) float32 {
546 if !isExactFloat32(f) {
547 panic("truncate64Fto32F: truncation is not exact")
552 // NaN bit patterns aren't necessarily preserved across conversion
553 // instructions so we need to do the conversion manually.
554 b := math.Float64bits(f)
555 m := b & ((1 << 52) - 1) // mantissa (a.k.a. significand)
556 // | sign | exponent | mantissa |
557 r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23)))
558 return math.Float32frombits(r)
561 // extend32Fto64F converts a float32 value to a float64 value preserving the bit
562 // pattern of the mantissa.
563 func extend32Fto64F(f float32) float64 {
564 if !math.IsNaN(float64(f)) {
567 // NaN bit patterns aren't necessarily preserved across conversion
568 // instructions so we need to do the conversion manually.
569 b := uint64(math.Float32bits(f))
570 // | sign | exponent | mantissa |
571 r := ((b << 32) & (1 << 63)) | (0x7ff << 52) | ((b & 0x7fffff) << (52 - 23))
572 return math.Float64frombits(r)
575 // DivisionNeedsFixUp reports whether the division needs fix-up code.
576 func DivisionNeedsFixUp(v *Value) bool {
580 // auxFrom64F encodes a float64 value so it can be stored in an AuxInt.
581 func auxFrom64F(f float64) int64 {
583 panic("can't encode a NaN in AuxInt field")
585 return int64(math.Float64bits(f))
588 // auxFrom32F encodes a float32 value so it can be stored in an AuxInt.
589 func auxFrom32F(f float32) int64 {
591 panic("can't encode a NaN in AuxInt field")
593 return int64(math.Float64bits(extend32Fto64F(f)))
596 // auxTo32F decodes a float32 from the AuxInt value provided.
597 func auxTo32F(i int64) float32 {
598 return truncate64Fto32F(math.Float64frombits(uint64(i)))
601 // auxTo64F decodes a float64 from the AuxInt value provided.
602 func auxTo64F(i int64) float64 {
603 return math.Float64frombits(uint64(i))
606 func auxIntToBool(i int64) bool {
612 func auxIntToInt8(i int64) int8 {
615 func auxIntToInt16(i int64) int16 {
618 func auxIntToInt32(i int64) int32 {
621 func auxIntToInt64(i int64) int64 {
624 func auxIntToUint8(i int64) uint8 {
627 func auxIntToFloat32(i int64) float32 {
628 return float32(math.Float64frombits(uint64(i)))
630 func auxIntToFloat64(i int64) float64 {
631 return math.Float64frombits(uint64(i))
633 func auxIntToValAndOff(i int64) ValAndOff {
636 func auxIntToArm64BitField(i int64) arm64BitField {
637 return arm64BitField(i)
639 func auxIntToInt128(x int64) int128 {
641 panic("nonzero int128 not allowed")
645 func auxIntToFlagConstant(x int64) flagConstant {
646 return flagConstant(x)
649 func auxIntToOp(cc int64) Op {
653 func boolToAuxInt(b bool) int64 {
659 func int8ToAuxInt(i int8) int64 {
662 func int16ToAuxInt(i int16) int64 {
665 func int32ToAuxInt(i int32) int64 {
668 func int64ToAuxInt(i int64) int64 {
671 func uint8ToAuxInt(i uint8) int64 {
672 return int64(int8(i))
674 func float32ToAuxInt(f float32) int64 {
675 return int64(math.Float64bits(float64(f)))
677 func float64ToAuxInt(f float64) int64 {
678 return int64(math.Float64bits(f))
680 func valAndOffToAuxInt(v ValAndOff) int64 {
683 func arm64BitFieldToAuxInt(v arm64BitField) int64 {
686 func int128ToAuxInt(x int128) int64 {
688 panic("nonzero int128 not allowed")
692 func flagConstantToAuxInt(x flagConstant) int64 {
696 func opToAuxInt(o Op) int64 {
700 func auxToString(i interface{}) string {
703 func auxToSym(i interface{}) Sym {
704 // TODO: kind of a hack - allows nil interface through
708 func auxToType(i interface{}) *types.Type {
709 return i.(*types.Type)
711 func auxToCall(i interface{}) *AuxCall {
714 func auxToS390xCCMask(i interface{}) s390x.CCMask {
715 return i.(s390x.CCMask)
717 func auxToS390xRotateParams(i interface{}) s390x.RotateParams {
718 return i.(s390x.RotateParams)
721 func stringToAux(s string) interface{} {
724 func symToAux(s Sym) interface{} {
727 func callToAux(s *AuxCall) interface{} {
730 func typeToAux(t *types.Type) interface{} {
733 func s390xCCMaskToAux(c s390x.CCMask) interface{} {
736 func s390xRotateParamsToAux(r s390x.RotateParams) interface{} {
740 // uaddOvf reports whether unsigned a+b would overflow.
741 func uaddOvf(a, b int64) bool {
742 return uint64(a)+uint64(b) < uint64(a)
745 // de-virtualize an InterCall
746 // 'sym' is the symbol for the itab
747 func devirt(v *Value, aux interface{}, sym Sym, offset int64) *AuxCall {
749 n, ok := sym.(*obj.LSym)
753 lsym := f.fe.DerefItab(n, offset)
754 if f.pass.debug > 0 {
756 f.Warnl(v.Pos, "de-virtualizing call")
758 f.Warnl(v.Pos, "couldn't de-virtualize call")
765 return StaticAuxCall(lsym, va.args, va.results)
768 // de-virtualize an InterLECall
769 // 'sym' is the symbol for the itab
770 func devirtLESym(v *Value, aux interface{}, sym Sym, offset int64) *obj.LSym {
771 n, ok := sym.(*obj.LSym)
777 lsym := f.fe.DerefItab(n, offset)
778 if f.pass.debug > 0 {
780 f.Warnl(v.Pos, "de-virtualizing call")
782 f.Warnl(v.Pos, "couldn't de-virtualize call")
791 func devirtLECall(v *Value, sym *obj.LSym) *Value {
792 v.Op = OpStaticLECall
793 v.Aux.(*AuxCall).Fn = sym
798 // isSamePtr reports whether p1 and p2 point to the same address.
799 func isSamePtr(p1, p2 *Value) bool {
808 return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
809 case OpAddr, OpLocalAddr:
810 // OpAddr's 0th arg is either OpSP or OpSB, which means that it is uniquely identified by its Op.
811 // Checking for value equality only works after [z]cse has run.
812 return p1.Aux == p2.Aux && p1.Args[0].Op == p2.Args[0].Op
814 return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
819 func isStackPtr(v *Value) bool {
820 for v.Op == OpOffPtr || v.Op == OpAddPtr {
823 return v.Op == OpSP || v.Op == OpLocalAddr
826 // disjoint reports whether the memory region specified by [p1:p1+n1)
827 // does not overlap with [p2:p2+n2).
828 // A return value of false does not imply the regions overlap.
829 func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool {
830 if n1 == 0 || n2 == 0 {
836 baseAndOffset := func(ptr *Value) (base *Value, offset int64) {
837 base, offset = ptr, 0
838 for base.Op == OpOffPtr {
839 offset += base.AuxInt
844 p1, off1 := baseAndOffset(p1)
845 p2, off2 := baseAndOffset(p2)
846 if isSamePtr(p1, p2) {
847 return !overlap(off1, n1, off2, n2)
849 // p1 and p2 are not the same, so if they are both OpAddrs then
850 // they point to different variables.
851 // If one pointer is on the stack and the other is an argument
852 // then they can't overlap.
854 case OpAddr, OpLocalAddr:
855 if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP {
858 return p2.Op == OpArg && p1.Args[0].Op == OpSP
860 if p2.Op == OpSP || p2.Op == OpLocalAddr {
864 return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpSP
869 // moveSize returns the number of bytes an aligned MOV instruction moves
870 func moveSize(align int64, c *Config) int64 {
872 case align%8 == 0 && c.PtrSize == 8:
882 // mergePoint finds a block among a's blocks which dominates b and is itself
883 // dominated by all of a's blocks. Returns nil if it can't find one.
884 // Might return nil even if one does exist.
885 func mergePoint(b *Block, a ...*Value) *Block {
886 // Walk backward from b looking for one of the a's blocks.
892 for _, x := range a {
897 if len(b.Preds) > 1 {
898 // Don't know which way to go back. Abort.
904 return nil // too far away
906 // At this point, r is the first value in a that we find by walking backwards.
907 // if we return anything, r will be it.
910 // Keep going, counting the other a's that we find. They must all dominate r.
913 for _, x := range a {
919 // Found all of a in a backwards walk. We can return r.
922 if len(b.Preds) > 1 {
929 return nil // too far away
932 // clobber invalidates values. Returns true.
933 // clobber is used by rewrite rules to:
934 // A) make sure the values are really dead and never used again.
935 // B) decrement use counts of the values' args.
936 func clobber(vv ...*Value) bool {
937 for _, v := range vv {
939 // Note: leave v.Block intact. The Block field is used after clobber.
944 // clobberIfDead resets v when use count is 1. Returns true.
945 // clobberIfDead is used by rewrite rules to decrement
946 // use counts of v's args when v is dead and never used.
947 func clobberIfDead(v *Value) bool {
951 // Note: leave v.Block intact. The Block field is used after clobberIfDead.
955 // noteRule is an easy way to track if a rule is matched when writing
956 // new ones. Make the rule of interest also conditional on
957 // noteRule("note to self: rule of interest matched")
958 // and that message will print when the rule matches.
959 func noteRule(s string) bool {
964 // countRule increments Func.ruleMatches[key].
965 // If Func.ruleMatches is non-nil at the end
966 // of compilation, it will be printed to stdout.
967 // This is intended to make it easier to find which functions
968 // which contain lots of rules matches when developing new rules.
969 func countRule(v *Value, key string) bool {
971 if f.ruleMatches == nil {
972 f.ruleMatches = make(map[string]int)
978 // warnRule generates compiler debug output with string s when
979 // v is not in autogenerated code, cond is true and the rule has fired.
980 func warnRule(cond bool, v *Value, s string) bool {
981 if pos := v.Pos; pos.Line() > 1 && cond {
982 v.Block.Func.Warnl(pos, s)
987 // for a pseudo-op like (LessThan x), extract x
988 func flagArg(v *Value) *Value {
989 if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() {
995 // arm64Negate finds the complement to an ARM64 condition code,
996 // for example Equal -> NotEqual or LessThan -> GreaterEqual
998 // TODO: add floating-point conditions
999 func arm64Negate(op Op) Op {
1001 case OpARM64LessThan:
1002 return OpARM64GreaterEqual
1003 case OpARM64LessThanU:
1004 return OpARM64GreaterEqualU
1005 case OpARM64GreaterThan:
1006 return OpARM64LessEqual
1007 case OpARM64GreaterThanU:
1008 return OpARM64LessEqualU
1009 case OpARM64LessEqual:
1010 return OpARM64GreaterThan
1011 case OpARM64LessEqualU:
1012 return OpARM64GreaterThanU
1013 case OpARM64GreaterEqual:
1014 return OpARM64LessThan
1015 case OpARM64GreaterEqualU:
1016 return OpARM64LessThanU
1018 return OpARM64NotEqual
1019 case OpARM64NotEqual:
1021 case OpARM64LessThanF:
1022 return OpARM64GreaterEqualF
1023 case OpARM64GreaterThanF:
1024 return OpARM64LessEqualF
1025 case OpARM64LessEqualF:
1026 return OpARM64GreaterThanF
1027 case OpARM64GreaterEqualF:
1028 return OpARM64LessThanF
1030 panic("unreachable")
1034 // arm64Invert evaluates (InvertFlags op), which
1035 // is the same as altering the condition codes such
1036 // that the same result would be produced if the arguments
1037 // to the flag-generating instruction were reversed, e.g.
1038 // (InvertFlags (CMP x y)) -> (CMP y x)
1040 // TODO: add floating-point conditions
1041 func arm64Invert(op Op) Op {
1043 case OpARM64LessThan:
1044 return OpARM64GreaterThan
1045 case OpARM64LessThanU:
1046 return OpARM64GreaterThanU
1047 case OpARM64GreaterThan:
1048 return OpARM64LessThan
1049 case OpARM64GreaterThanU:
1050 return OpARM64LessThanU
1051 case OpARM64LessEqual:
1052 return OpARM64GreaterEqual
1053 case OpARM64LessEqualU:
1054 return OpARM64GreaterEqualU
1055 case OpARM64GreaterEqual:
1056 return OpARM64LessEqual
1057 case OpARM64GreaterEqualU:
1058 return OpARM64LessEqualU
1059 case OpARM64Equal, OpARM64NotEqual:
1061 case OpARM64LessThanF:
1062 return OpARM64GreaterThanF
1063 case OpARM64GreaterThanF:
1064 return OpARM64LessThanF
1065 case OpARM64LessEqualF:
1066 return OpARM64GreaterEqualF
1067 case OpARM64GreaterEqualF:
1068 return OpARM64LessEqualF
1070 panic("unreachable")
1074 // evaluate an ARM64 op against a flags value
1075 // that is potentially constant; return 1 for true,
1076 // -1 for false, and 0 for not constant.
1077 func ccARM64Eval(op Op, flags *Value) int {
1079 if fop == OpARM64InvertFlags {
1080 return -ccARM64Eval(op, flags.Args[0])
1082 if fop != OpARM64FlagConstant {
1085 fc := flagConstant(flags.AuxInt)
1086 b2i := func(b bool) int {
1095 case OpARM64NotEqual:
1097 case OpARM64LessThan:
1099 case OpARM64LessThanU:
1100 return b2i(fc.ult())
1101 case OpARM64GreaterThan:
1103 case OpARM64GreaterThanU:
1104 return b2i(fc.ugt())
1105 case OpARM64LessEqual:
1107 case OpARM64LessEqualU:
1108 return b2i(fc.ule())
1109 case OpARM64GreaterEqual:
1111 case OpARM64GreaterEqualU:
1112 return b2i(fc.uge())
1117 // logRule logs the use of the rule s. This will only be enabled if
1118 // rewrite rules were generated with the -log option, see gen/rulegen.go.
1119 func logRule(s string) {
1120 if ruleFile == nil {
1121 // Open a log file to write log to. We open in append
1122 // mode because all.bash runs the compiler lots of times,
1123 // and we want the concatenation of all of those logs.
1124 // This means, of course, that users need to rm the old log
1125 // to get fresh data.
1126 // TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow?
1127 w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"),
1128 os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
1134 _, err := fmt.Fprintln(ruleFile, s)
1140 var ruleFile io.Writer
1142 func min(x, y int64) int64 {
1149 func isConstZero(v *Value) bool {
1153 case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F:
1154 return v.AuxInt == 0
1159 // reciprocalExact64 reports whether 1/c is exactly representable.
1160 func reciprocalExact64(c float64) bool {
1161 b := math.Float64bits(c)
1162 man := b & (1<<52 - 1)
1164 return false // not a power of 2, denormal, or NaN
1166 exp := b >> 52 & (1<<11 - 1)
1167 // exponent bias is 0x3ff. So taking the reciprocal of a number
1168 // changes the exponent to 0x7fe-exp.
1173 return false // ±inf
1175 return false // exponent is not representable
1181 // reciprocalExact32 reports whether 1/c is exactly representable.
1182 func reciprocalExact32(c float32) bool {
1183 b := math.Float32bits(c)
1184 man := b & (1<<23 - 1)
1186 return false // not a power of 2, denormal, or NaN
1188 exp := b >> 23 & (1<<8 - 1)
1189 // exponent bias is 0x7f. So taking the reciprocal of a number
1190 // changes the exponent to 0xfe-exp.
1195 return false // ±inf
1197 return false // exponent is not representable
1203 // check if an immediate can be directly encoded into an ARM's instruction
1204 func isARMImmRot(v uint32) bool {
1205 for i := 0; i < 16; i++ {
1215 // overlap reports whether the ranges given by the given offset and
1216 // size pairs overlap.
1217 func overlap(offset1, size1, offset2, size2 int64) bool {
1218 if offset1 >= offset2 && offset2+size2 > offset1 {
1221 if offset2 >= offset1 && offset1+size1 > offset2 {
1227 func areAdjacentOffsets(off1, off2, size int64) bool {
1228 return off1+size == off2 || off1 == off2+size
1231 // check if value zeroes out upper 32-bit of 64-bit register.
1232 // depth limits recursion depth. In AMD64.rules 3 is used as limit,
1233 // because it catches same amount of cases as 4.
1234 func zeroUpper32Bits(x *Value, depth int) bool {
1236 case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1,
1237 OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1,
1238 OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload,
1239 OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL,
1240 OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst,
1241 OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst,
1242 OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL,
1243 OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst,
1244 OpAMD64SHLL, OpAMD64SHLLconst:
1247 return x.Type.Width == 4
1248 case OpPhi, OpSelect0, OpSelect1:
1249 // Phis can use each-other as an arguments, instead of tracking visited values,
1250 // just limit recursion depth.
1254 for i := range x.Args {
1255 if !zeroUpper32Bits(x.Args[i], depth-1) {
1265 // zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits
1266 func zeroUpper48Bits(x *Value, depth int) bool {
1268 case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2:
1271 return x.Type.Width == 2
1272 case OpPhi, OpSelect0, OpSelect1:
1273 // Phis can use each-other as an arguments, instead of tracking visited values,
1274 // just limit recursion depth.
1278 for i := range x.Args {
1279 if !zeroUpper48Bits(x.Args[i], depth-1) {
1289 // zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits
1290 func zeroUpper56Bits(x *Value, depth int) bool {
1292 case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1:
1295 return x.Type.Width == 1
1296 case OpPhi, OpSelect0, OpSelect1:
1297 // Phis can use each-other as an arguments, instead of tracking visited values,
1298 // just limit recursion depth.
1302 for i := range x.Args {
1303 if !zeroUpper56Bits(x.Args[i], depth-1) {
1313 // isInlinableMemmove reports whether the given arch performs a Move of the given size
1314 // faster than memmove. It will only return true if replacing the memmove with a Move is
1315 // safe, either because Move is small or because the arguments are disjoint.
1316 // This is used as a check for replacing memmove with Move ops.
1317 func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
1318 // It is always safe to convert memmove into Move when its arguments are disjoint.
1319 // Move ops may or may not be faster for large sizes depending on how the platform
1320 // lowers them, so we only perform this optimization on platforms that we know to
1321 // have fast Move ops.
1324 return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz))
1325 case "386", "arm64":
1327 case "s390x", "ppc64", "ppc64le":
1328 return sz <= 8 || disjoint(dst, sz, src, sz)
1329 case "arm", "mips", "mips64", "mipsle", "mips64le":
1335 // logLargeCopy logs the occurrence of a large copy.
1336 // The best place to do this is in the rewrite rules where the size of the move is easy to find.
1337 // "Large" is arbitrarily chosen to be 128 bytes; this may change.
1338 func logLargeCopy(v *Value, s int64) bool {
1342 if logopt.Enabled() {
1343 logopt.LogOpt(v.Pos, "copy", "lower", v.Block.Func.Name, fmt.Sprintf("%d bytes", s))
1348 // hasSmallRotate reports whether the architecture has rotate instructions
1349 // for sizes < 32-bit. This is used to decide whether to promote some rotations.
1350 func hasSmallRotate(c *Config) bool {
1352 case "amd64", "386":
1359 func newPPC64ShiftAuxInt(sh, mb, me, sz int64) int32 {
1360 if sh < 0 || sh >= sz {
1361 panic("PPC64 shift arg sh out of range")
1363 if mb < 0 || mb >= sz {
1364 panic("PPC64 shift arg mb out of range")
1366 if me < 0 || me >= sz {
1367 panic("PPC64 shift arg me out of range")
1369 return int32(sh<<16 | mb<<8 | me)
1372 func GetPPC64Shiftsh(auxint int64) int64 {
1373 return int64(int8(auxint >> 16))
1376 func GetPPC64Shiftmb(auxint int64) int64 {
1377 return int64(int8(auxint >> 8))
1380 func GetPPC64Shiftme(auxint int64) int64 {
1381 return int64(int8(auxint))
1384 // This verifies that the mask occupies the
1386 func isPPC64ValidShiftMask(v int64) bool {
1387 if ((v + 1) & v) == 0 {
1393 func getPPC64ShiftMaskLength(v int64) int64 {
1394 return int64(bits.Len64(uint64(v)))
1397 // encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format.
1398 func armBFAuxInt(lsb, width int64) arm64BitField {
1399 if lsb < 0 || lsb > 63 {
1400 panic("ARM(64) bit field lsb constant out of range")
1402 if width < 1 || width > 64 {
1403 panic("ARM(64) bit field width constant out of range")
1405 return arm64BitField(width | lsb<<8)
1408 // returns the lsb part of the auxInt field of arm64 bitfield ops.
1409 func (bfc arm64BitField) getARM64BFlsb() int64 {
1410 return int64(uint64(bfc) >> 8)
1413 // returns the width part of the auxInt field of arm64 bitfield ops.
1414 func (bfc arm64BitField) getARM64BFwidth() int64 {
1415 return int64(bfc) & 0xff
1418 // checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask.
1419 func isARM64BFMask(lsb, mask, rshift int64) bool {
1420 shiftedMask := int64(uint64(mask) >> uint64(rshift))
1421 return shiftedMask != 0 && isPowerOfTwo(shiftedMask+1) && nto(shiftedMask)+lsb < 64
1424 // returns the bitfield width of mask >> rshift for arm64 bitfield ops
1425 func arm64BFWidth(mask, rshift int64) int64 {
1426 shiftedMask := int64(uint64(mask) >> uint64(rshift))
1427 if shiftedMask == 0 {
1428 panic("ARM64 BF mask is zero")
1430 return nto(shiftedMask)
1433 // sizeof returns the size of t in bytes.
1434 // It will panic if t is not a *types.Type.
1435 func sizeof(t interface{}) int64 {
1436 return t.(*types.Type).Size()
1439 // registerizable reports whether t is a primitive type that fits in
1440 // a register. It assumes float64 values will always fit into registers
1441 // even if that isn't strictly true.
1442 func registerizable(b *Block, typ *types.Type) bool {
1443 if typ.IsPtrShaped() || typ.IsFloat() {
1446 if typ.IsInteger() {
1447 return typ.Size() <= b.Func.Config.RegSize
1452 // needRaceCleanup reports whether this call to racefuncenter/exit isn't needed.
1453 func needRaceCleanup(sym *AuxCall, v *Value) bool {
1458 if !isSameCall(sym, "runtime.racefuncenter") && !isSameCall(sym, "runtime.racefuncexit") {
1461 for _, b := range f.Blocks {
1462 for _, v := range b.Values {
1465 // Check for racefuncenter will encounter racefuncexit and vice versa.
1466 // Allow calls to panic*
1467 s := v.Aux.(*AuxCall).Fn.String()
1469 case "runtime.racefuncenter", "runtime.racefuncexit",
1470 "runtime.panicdivide", "runtime.panicwrap",
1471 "runtime.panicshift":
1474 // If we encountered any call, we need to keep racefunc*,
1475 // for accurate stacktraces.
1477 case OpPanicBounds, OpPanicExtend:
1478 // Note: these are panic generators that are ok (like the static calls above).
1479 case OpClosureCall, OpInterCall:
1480 // We must keep the race functions if there are any other call types.
1485 if isSameCall(sym, "runtime.racefuncenter") {
1486 // If we're removing racefuncenter, remove its argument as well.
1487 if v.Args[0].Op != OpStore {
1490 mem := v.Args[0].Args[2]
1491 v.Args[0].reset(OpCopy)
1492 v.Args[0].AddArg(mem)
1497 // symIsRO reports whether sym is a read-only global.
1498 func symIsRO(sym interface{}) bool {
1499 lsym := sym.(*obj.LSym)
1500 return lsym.Type == objabi.SRODATA && len(lsym.R) == 0
1503 // symIsROZero reports whether sym is a read-only global whose data contains all zeros.
1504 func symIsROZero(sym Sym) bool {
1505 lsym := sym.(*obj.LSym)
1506 if lsym.Type != objabi.SRODATA || len(lsym.R) != 0 {
1509 for _, b := range lsym.P {
1517 // read8 reads one byte from the read-only global sym at offset off.
1518 func read8(sym interface{}, off int64) uint8 {
1519 lsym := sym.(*obj.LSym)
1520 if off >= int64(len(lsym.P)) || off < 0 {
1521 // Invalid index into the global sym.
1522 // This can happen in dead code, so we don't want to panic.
1523 // Just return any value, it will eventually get ignored.
1530 // read16 reads two bytes from the read-only global sym at offset off.
1531 func read16(sym interface{}, off int64, byteorder binary.ByteOrder) uint16 {
1532 lsym := sym.(*obj.LSym)
1533 // lsym.P is written lazily.
1534 // Bytes requested after the end of lsym.P are 0.
1536 if 0 <= off && off < int64(len(lsym.P)) {
1539 buf := make([]byte, 2)
1541 return byteorder.Uint16(buf)
1544 // read32 reads four bytes from the read-only global sym at offset off.
1545 func read32(sym interface{}, off int64, byteorder binary.ByteOrder) uint32 {
1546 lsym := sym.(*obj.LSym)
1548 if 0 <= off && off < int64(len(lsym.P)) {
1551 buf := make([]byte, 4)
1553 return byteorder.Uint32(buf)
1556 // read64 reads eight bytes from the read-only global sym at offset off.
1557 func read64(sym interface{}, off int64, byteorder binary.ByteOrder) uint64 {
1558 lsym := sym.(*obj.LSym)
1560 if 0 <= off && off < int64(len(lsym.P)) {
1563 buf := make([]byte, 8)
1565 return byteorder.Uint64(buf)
1568 // sequentialAddresses reports true if it can prove that x + n == y
1569 func sequentialAddresses(x, y *Value, n int64) bool {
1570 if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil &&
1571 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1572 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1575 if x.Op == Op386LEAL1 && y.Op == Op386LEAL1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
1576 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1577 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1580 if x.Op == OpAMD64ADDQ && y.Op == OpAMD64LEAQ1 && y.AuxInt == n && y.Aux == nil &&
1581 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1582 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1585 if x.Op == OpAMD64LEAQ1 && y.Op == OpAMD64LEAQ1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
1586 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1587 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1593 // flagConstant represents the result of a compile-time comparison.
1594 // The sense of these flags does not necessarily represent the hardware's notion
1595 // of a flags register - these are just a compile-time construct.
1596 // We happen to match the semantics to those of arm/arm64.
1597 // Note that these semantics differ from x86: the carry flag has the opposite
1598 // sense on a subtraction!
1599 // On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C.
1600 // On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C.
1601 // (because it does x + ^y + C).
1602 // See https://en.wikipedia.org/wiki/Carry_flag#Vs._borrow_flag
1603 type flagConstant uint8
1605 // N reports whether the result of an operation is negative (high bit set).
1606 func (fc flagConstant) N() bool {
1610 // Z reports whether the result of an operation is 0.
1611 func (fc flagConstant) Z() bool {
1615 // C reports whether an unsigned add overflowed (carry), or an
1616 // unsigned subtract did not underflow (borrow).
1617 func (fc flagConstant) C() bool {
1621 // V reports whether a signed operation overflowed or underflowed.
1622 func (fc flagConstant) V() bool {
1626 func (fc flagConstant) eq() bool {
1629 func (fc flagConstant) ne() bool {
1632 func (fc flagConstant) lt() bool {
1633 return fc.N() != fc.V()
1635 func (fc flagConstant) le() bool {
1636 return fc.Z() || fc.lt()
1638 func (fc flagConstant) gt() bool {
1639 return !fc.Z() && fc.ge()
1641 func (fc flagConstant) ge() bool {
1642 return fc.N() == fc.V()
1644 func (fc flagConstant) ult() bool {
1647 func (fc flagConstant) ule() bool {
1648 return fc.Z() || fc.ult()
1650 func (fc flagConstant) ugt() bool {
1651 return !fc.Z() && fc.uge()
1653 func (fc flagConstant) uge() bool {
1657 func (fc flagConstant) ltNoov() bool {
1658 return fc.lt() && !fc.V()
1660 func (fc flagConstant) leNoov() bool {
1661 return fc.le() && !fc.V()
1663 func (fc flagConstant) gtNoov() bool {
1664 return fc.gt() && !fc.V()
1666 func (fc flagConstant) geNoov() bool {
1667 return fc.ge() && !fc.V()
1670 func (fc flagConstant) String() string {
1671 return fmt.Sprintf("N=%v,Z=%v,C=%v,V=%v", fc.N(), fc.Z(), fc.C(), fc.V())
1674 type flagConstantBuilder struct {
1681 func (fcs flagConstantBuilder) encode() flagConstant {
1698 // Note: addFlags(x,y) != subFlags(x,-y) in some situations:
1699 // - the results of the C flag are different
1700 // - the results of the V flag when y==minint are different
1702 // addFlags64 returns the flags that would be set from computing x+y.
1703 func addFlags64(x, y int64) flagConstant {
1704 var fcb flagConstantBuilder
1707 fcb.C = uint64(x+y) < uint64(x)
1708 fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
1712 // subFlags64 returns the flags that would be set from computing x-y.
1713 func subFlags64(x, y int64) flagConstant {
1714 var fcb flagConstantBuilder
1717 fcb.C = uint64(y) <= uint64(x) // This code follows the arm carry flag model.
1718 fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
1722 // addFlags32 returns the flags that would be set from computing x+y.
1723 func addFlags32(x, y int32) flagConstant {
1724 var fcb flagConstantBuilder
1727 fcb.C = uint32(x+y) < uint32(x)
1728 fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
1732 // subFlags32 returns the flags that would be set from computing x-y.
1733 func subFlags32(x, y int32) flagConstant {
1734 var fcb flagConstantBuilder
1737 fcb.C = uint32(y) <= uint32(x) // This code follows the arm carry flag model.
1738 fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
1742 // logicFlags64 returns flags set to the sign/zeroness of x.
1743 // C and V are set to false.
1744 func logicFlags64(x int64) flagConstant {
1745 var fcb flagConstantBuilder
1751 // logicFlags32 returns flags set to the sign/zeroness of x.
1752 // C and V are set to false.
1753 func logicFlags32(x int32) flagConstant {
1754 var fcb flagConstantBuilder