1 // Copyright 2015 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
8 "cmd/compile/internal/logopt"
9 "cmd/compile/internal/types"
11 "cmd/internal/obj/s390x"
23 type deadValueChoice bool
26 leaveDeadValues deadValueChoice = false
27 removeDeadValues = true
30 // deadcode indicates that rewrite should try to remove any values that become dead.
31 func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValueChoice) {
32 // repeat rewrites until we find no more rewrites
33 pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block
37 fmt.Printf("%s: rewriting for %s\n", f.pass.name, f.Name)
41 for _, b := range f.Blocks {
46 b0.Succs = append([]Edge{}, b.Succs...) // make a new copy, not aliasing
48 for i, c := range b.ControlValues() {
51 b.ReplaceControl(i, c)
57 fmt.Printf("rewriting %s -> %s\n", b0.LongString(), b.LongString())
60 for j, v := range b.Values {
65 v0.Args = append([]*Value{}, v.Args...) // make a new copy, not aliasing
67 if v.Uses == 0 && v.removeable() {
68 if v.Op != OpInvalid && deadcode == removeDeadValues {
69 // Reset any values that are now unused, so that we decrement
70 // the use count of all of its arguments.
71 // Not quite a deadcode pass, because it does not handle cycles.
72 // But it should help Uses==1 rules to fire.
76 // No point rewriting values which aren't used.
80 vchange := phielimValue(v)
81 if vchange && debug > 1 {
82 fmt.Printf("rewriting %s -> %s\n", v0.LongString(), v.LongString())
85 // Eliminate copy inputs.
86 // If any copy input becomes unused, mark it
87 // as invalid and discard its argument. Repeat
88 // recursively on the discarded argument.
89 // This phase helps remove phantom "dead copy" uses
90 // of a value so that a x.Uses==1 rule condition
92 for i, a := range v.Args {
98 // If a, a copy, has a line boundary indicator, attempt to find a new value
99 // to hold it. The first candidate is the value that will replace a (aa),
100 // if it shares the same block and line and is eligible.
101 // The second option is v, which has a as an input. Because aa is earlier in
102 // the data flow, it is the better choice.
103 if a.Pos.IsStmt() == src.PosIsStmt {
104 if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt {
105 aa.Pos = aa.Pos.WithIsStmt()
106 } else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt {
107 v.Pos = v.Pos.WithIsStmt()
109 // Record the lost line and look for a new home after all rewrites are complete.
110 // TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same
111 // line to appear in more than one block, but only one block is stored, so if both end
112 // up here, then one will be lost.
113 pendingLines.set(a.Pos, int32(a.Block.ID))
115 a.Pos = a.Pos.WithNotStmt()
124 if vchange && debug > 1 {
125 fmt.Printf("rewriting %s -> %s\n", v0.LongString(), v.LongString())
128 // apply rewrite function
131 // If value changed to a poor choice for a statement boundary, move the boundary
132 if v.Pos.IsStmt() == src.PosIsStmt {
133 if k := nextGoodStatementIndex(v, j, b); k != j {
134 v.Pos = v.Pos.WithNotStmt()
135 b.Values[k].Pos = b.Values[k].Pos.WithIsStmt()
140 change = change || vchange
141 if vchange && debug > 1 {
142 fmt.Printf("rewriting %s -> %s\n", v0.LongString(), v.LongString())
150 // remove clobbered values
151 for _, b := range f.Blocks {
153 for i, v := range b.Values {
155 if v.Op == OpInvalid {
156 if v.Pos.IsStmt() == src.PosIsStmt {
157 pendingLines.set(vl, int32(b.ID))
162 if v.Pos.IsStmt() != src.PosNotStmt && pendingLines.get(vl) == int32(b.ID) {
163 pendingLines.remove(vl)
164 v.Pos = v.Pos.WithIsStmt()
171 if pendingLines.get(b.Pos) == int32(b.ID) {
172 b.Pos = b.Pos.WithIsStmt()
173 pendingLines.remove(b.Pos)
179 // Common functions called from rewriting rules
181 func is64BitFloat(t *types.Type) bool {
182 return t.Size() == 8 && t.IsFloat()
185 func is32BitFloat(t *types.Type) bool {
186 return t.Size() == 4 && t.IsFloat()
189 func is64BitInt(t *types.Type) bool {
190 return t.Size() == 8 && t.IsInteger()
193 func is32BitInt(t *types.Type) bool {
194 return t.Size() == 4 && t.IsInteger()
197 func is16BitInt(t *types.Type) bool {
198 return t.Size() == 2 && t.IsInteger()
201 func is8BitInt(t *types.Type) bool {
202 return t.Size() == 1 && t.IsInteger()
205 func isPtr(t *types.Type) bool {
206 return t.IsPtrShaped()
209 func isSigned(t *types.Type) bool {
213 // mergeSym merges two symbolic offsets. There is no real merging of
214 // offsets, we just pick the non-nil one.
215 func mergeSym(x, y interface{}) interface{} {
222 panic(fmt.Sprintf("mergeSym with two non-nil syms %s %s", x, y))
225 func canMergeSym(x, y interface{}) bool {
226 return x == nil || y == nil
229 func mergeSymTyped(x, y Sym) Sym {
236 panic(fmt.Sprintf("mergeSym with two non-nil syms %v %v", x, y))
239 // canMergeLoadClobber reports whether the load can be merged into target without
240 // invalidating the schedule.
241 // It also checks that the other non-load argument x is something we
242 // are ok with clobbering.
243 func canMergeLoadClobber(target, load, x *Value) bool {
244 // The register containing x is going to get clobbered.
245 // Don't merge if we still need the value of x.
246 // We don't have liveness information here, but we can
247 // approximate x dying with:
248 // 1) target is x's only use.
249 // 2) target is not in a deeper loop than x.
253 loopnest := x.Block.Func.loopnest()
254 loopnest.calculateDepths()
255 if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
258 return canMergeLoad(target, load)
261 // canMergeLoad reports whether the load can be merged into target without
262 // invalidating the schedule.
263 func canMergeLoad(target, load *Value) bool {
264 if target.Block.ID != load.Block.ID {
265 // If the load is in a different block do not merge it.
269 // We can't merge the load into the target if the load
270 // has more than one use.
275 mem := load.MemoryArg()
277 // We need the load's memory arg to still be alive at target. That
278 // can't be the case if one of target's args depends on a memory
279 // state that is a successor of load's memory arg.
281 // For example, it would be invalid to merge load into target in
282 // the following situation because newmem has killed oldmem
283 // before target is reached:
284 // load = read ... oldmem
285 // newmem = write ... oldmem
286 // arg0 = read ... newmem
287 // target = add arg0 load
289 // If the argument comes from a different block then we can exclude
290 // it immediately because it must dominate load (which is in the
291 // same block as target).
293 for _, a := range target.Args {
294 if a != load && a.Block.ID == target.Block.ID {
295 args = append(args, a)
299 // memPreds contains memory states known to be predecessors of load's
300 // memory state. It is lazily initialized.
301 var memPreds map[*Value]bool
302 for i := 0; len(args) > 0; i++ {
305 // Give up if we have done a lot of iterations.
308 v := args[len(args)-1]
309 args = args[:len(args)-1]
310 if target.Block.ID != v.Block.ID {
311 // Since target and load are in the same block
312 // we can stop searching when we leave the block.
316 // A Phi implies we have reached the top of the block.
317 // The memory phi, if it exists, is always
318 // the first logical store in the block.
321 if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
322 // We could handle this situation however it is likely
326 if v.Op.SymEffect()&SymAddr != 0 {
327 // This case prevents an operation that calculates the
328 // address of a local variable from being forced to schedule
329 // before its corresponding VarDef.
335 // We don't want to combine the CMPQ with the load, because
336 // that would force the CMPQ to schedule before the VARDEF, which
337 // in turn requires the LEAQ to schedule before the VARDEF.
340 if v.Type.IsMemory() {
342 // Initialise a map containing memory states
343 // known to be predecessors of load's memory
345 memPreds = make(map[*Value]bool)
348 for i := 0; i < limit; i++ {
350 // The memory phi, if it exists, is always
351 // the first logical store in the block.
354 if m.Block.ID != target.Block.ID {
357 if !m.Type.IsMemory() {
361 if len(m.Args) == 0 {
368 // We can merge if v is a predecessor of mem.
370 // For example, we can merge load into target in the
371 // following scenario:
374 // load = read ... mem
375 // target = add x load
381 if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
382 // If v takes mem as an input then we know mem
383 // is valid at this point.
386 for _, a := range v.Args {
387 if target.Block.ID == a.Block.ID {
388 args = append(args, a)
396 // isSameCall reports whether sym is the same as the given named symbol
397 func isSameCall(sym interface{}, name string) bool {
398 return sym.(*AuxCall).Fn.String() == name
401 // nlz returns the number of leading zeros.
402 func nlz64(x int64) int { return bits.LeadingZeros64(uint64(x)) }
403 func nlz32(x int32) int { return bits.LeadingZeros32(uint32(x)) }
404 func nlz16(x int16) int { return bits.LeadingZeros16(uint16(x)) }
405 func nlz8(x int8) int { return bits.LeadingZeros8(uint8(x)) }
407 // ntzX returns the number of trailing zeros.
408 func ntz64(x int64) int { return bits.TrailingZeros64(uint64(x)) }
409 func ntz32(x int32) int { return bits.TrailingZeros32(uint32(x)) }
410 func ntz16(x int16) int { return bits.TrailingZeros16(uint16(x)) }
411 func ntz8(x int8) int { return bits.TrailingZeros8(uint8(x)) }
413 func oneBit(x int64) bool { return x&(x-1) == 0 && x != 0 }
414 func oneBit8(x int8) bool { return x&(x-1) == 0 && x != 0 }
415 func oneBit16(x int16) bool { return x&(x-1) == 0 && x != 0 }
416 func oneBit32(x int32) bool { return x&(x-1) == 0 && x != 0 }
417 func oneBit64(x int64) bool { return x&(x-1) == 0 && x != 0 }
419 // nto returns the number of trailing ones.
420 func nto(x int64) int64 {
421 return int64(ntz64(^x))
424 // log2 returns logarithm in base 2 of uint64(n), with log2(0) = -1.
426 func log2(n int64) int64 {
427 return int64(bits.Len64(uint64(n))) - 1
430 // logX returns logarithm of n base 2.
431 // n must be a positive power of 2 (isPowerOfTwoX returns true).
432 func log8(n int8) int64 {
433 return int64(bits.Len8(uint8(n))) - 1
435 func log16(n int16) int64 {
436 return int64(bits.Len16(uint16(n))) - 1
438 func log32(n int32) int64 {
439 return int64(bits.Len32(uint32(n))) - 1
441 func log64(n int64) int64 {
442 return int64(bits.Len64(uint64(n))) - 1
445 // log2uint32 returns logarithm in base 2 of uint32(n), with log2(0) = -1.
447 func log2uint32(n int64) int64 {
448 return int64(bits.Len32(uint32(n))) - 1
451 // isPowerOfTwo reports whether n is a power of 2.
452 func isPowerOfTwo(n int64) bool {
453 return n > 0 && n&(n-1) == 0
455 func isPowerOfTwo8(n int8) bool {
456 return n > 0 && n&(n-1) == 0
458 func isPowerOfTwo16(n int16) bool {
459 return n > 0 && n&(n-1) == 0
461 func isPowerOfTwo32(n int32) bool {
462 return n > 0 && n&(n-1) == 0
464 func isPowerOfTwo64(n int64) bool {
465 return n > 0 && n&(n-1) == 0
468 // isUint64PowerOfTwo reports whether uint64(n) is a power of 2.
469 func isUint64PowerOfTwo(in int64) bool {
471 return n > 0 && n&(n-1) == 0
474 // isUint32PowerOfTwo reports whether uint32(n) is a power of 2.
475 func isUint32PowerOfTwo(in int64) bool {
476 n := uint64(uint32(in))
477 return n > 0 && n&(n-1) == 0
480 // is32Bit reports whether n can be represented as a signed 32 bit integer.
481 func is32Bit(n int64) bool {
482 return n == int64(int32(n))
485 // is16Bit reports whether n can be represented as a signed 16 bit integer.
486 func is16Bit(n int64) bool {
487 return n == int64(int16(n))
490 // is8Bit reports whether n can be represented as a signed 8 bit integer.
491 func is8Bit(n int64) bool {
492 return n == int64(int8(n))
495 // isU8Bit reports whether n can be represented as an unsigned 8 bit integer.
496 func isU8Bit(n int64) bool {
497 return n == int64(uint8(n))
500 // isU12Bit reports whether n can be represented as an unsigned 12 bit integer.
501 func isU12Bit(n int64) bool {
502 return 0 <= n && n < (1<<12)
505 // isU16Bit reports whether n can be represented as an unsigned 16 bit integer.
506 func isU16Bit(n int64) bool {
507 return n == int64(uint16(n))
510 // isU32Bit reports whether n can be represented as an unsigned 32 bit integer.
511 func isU32Bit(n int64) bool {
512 return n == int64(uint32(n))
515 // is20Bit reports whether n can be represented as a signed 20 bit integer.
516 func is20Bit(n int64) bool {
517 return -(1<<19) <= n && n < (1<<19)
520 // b2i translates a boolean value to 0 or 1 for assigning to auxInt.
521 func b2i(b bool) int64 {
528 // b2i32 translates a boolean value to 0 or 1.
529 func b2i32(b bool) int32 {
536 // shiftIsBounded reports whether (left/right) shift Value v is known to be bounded.
537 // A shift is bounded if it is shifting by less than the width of the shifted value.
538 func shiftIsBounded(v *Value) bool {
542 // truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern
543 // of the mantissa. It will panic if the truncation results in lost information.
544 func truncate64Fto32F(f float64) float32 {
545 if !isExactFloat32(f) {
546 panic("truncate64Fto32F: truncation is not exact")
551 // NaN bit patterns aren't necessarily preserved across conversion
552 // instructions so we need to do the conversion manually.
553 b := math.Float64bits(f)
554 m := b & ((1 << 52) - 1) // mantissa (a.k.a. significand)
555 // | sign | exponent | mantissa |
556 r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23)))
557 return math.Float32frombits(r)
560 // extend32Fto64F converts a float32 value to a float64 value preserving the bit
561 // pattern of the mantissa.
562 func extend32Fto64F(f float32) float64 {
563 if !math.IsNaN(float64(f)) {
566 // NaN bit patterns aren't necessarily preserved across conversion
567 // instructions so we need to do the conversion manually.
568 b := uint64(math.Float32bits(f))
569 // | sign | exponent | mantissa |
570 r := ((b << 32) & (1 << 63)) | (0x7ff << 52) | ((b & 0x7fffff) << (52 - 23))
571 return math.Float64frombits(r)
574 // DivisionNeedsFixUp reports whether the division needs fix-up code.
575 func DivisionNeedsFixUp(v *Value) bool {
579 // auxFrom64F encodes a float64 value so it can be stored in an AuxInt.
580 func auxFrom64F(f float64) int64 {
582 panic("can't encode a NaN in AuxInt field")
584 return int64(math.Float64bits(f))
587 // auxFrom32F encodes a float32 value so it can be stored in an AuxInt.
588 func auxFrom32F(f float32) int64 {
590 panic("can't encode a NaN in AuxInt field")
592 return int64(math.Float64bits(extend32Fto64F(f)))
595 // auxTo32F decodes a float32 from the AuxInt value provided.
596 func auxTo32F(i int64) float32 {
597 return truncate64Fto32F(math.Float64frombits(uint64(i)))
600 // auxTo64F decodes a float64 from the AuxInt value provided.
601 func auxTo64F(i int64) float64 {
602 return math.Float64frombits(uint64(i))
605 func auxIntToBool(i int64) bool {
611 func auxIntToInt8(i int64) int8 {
614 func auxIntToInt16(i int64) int16 {
617 func auxIntToInt32(i int64) int32 {
620 func auxIntToInt64(i int64) int64 {
623 func auxIntToUint8(i int64) uint8 {
626 func auxIntToFloat32(i int64) float32 {
627 return float32(math.Float64frombits(uint64(i)))
629 func auxIntToFloat64(i int64) float64 {
630 return math.Float64frombits(uint64(i))
632 func auxIntToValAndOff(i int64) ValAndOff {
635 func auxIntToArm64BitField(i int64) arm64BitField {
636 return arm64BitField(i)
638 func auxIntToInt128(x int64) int128 {
640 panic("nonzero int128 not allowed")
644 func auxIntToFlagConstant(x int64) flagConstant {
645 return flagConstant(x)
648 func auxIntToOp(cc int64) Op {
652 func boolToAuxInt(b bool) int64 {
658 func int8ToAuxInt(i int8) int64 {
661 func int16ToAuxInt(i int16) int64 {
664 func int32ToAuxInt(i int32) int64 {
667 func int64ToAuxInt(i int64) int64 {
670 func uint8ToAuxInt(i uint8) int64 {
671 return int64(int8(i))
673 func float32ToAuxInt(f float32) int64 {
674 return int64(math.Float64bits(float64(f)))
676 func float64ToAuxInt(f float64) int64 {
677 return int64(math.Float64bits(f))
679 func valAndOffToAuxInt(v ValAndOff) int64 {
682 func arm64BitFieldToAuxInt(v arm64BitField) int64 {
685 func int128ToAuxInt(x int128) int64 {
687 panic("nonzero int128 not allowed")
691 func flagConstantToAuxInt(x flagConstant) int64 {
695 func opToAuxInt(o Op) int64 {
699 func auxToString(i interface{}) string {
702 func auxToSym(i interface{}) Sym {
703 // TODO: kind of a hack - allows nil interface through
707 func auxToType(i interface{}) *types.Type {
708 return i.(*types.Type)
710 func auxToCall(i interface{}) *AuxCall {
713 func auxToS390xCCMask(i interface{}) s390x.CCMask {
714 return i.(s390x.CCMask)
716 func auxToS390xRotateParams(i interface{}) s390x.RotateParams {
717 return i.(s390x.RotateParams)
720 func stringToAux(s string) interface{} {
723 func symToAux(s Sym) interface{} {
726 func callToAux(s *AuxCall) interface{} {
729 func typeToAux(t *types.Type) interface{} {
732 func s390xCCMaskToAux(c s390x.CCMask) interface{} {
735 func s390xRotateParamsToAux(r s390x.RotateParams) interface{} {
739 // uaddOvf reports whether unsigned a+b would overflow.
740 func uaddOvf(a, b int64) bool {
741 return uint64(a)+uint64(b) < uint64(a)
744 // de-virtualize an InterCall
745 // 'sym' is the symbol for the itab
746 func devirt(v *Value, aux interface{}, sym Sym, offset int64) *AuxCall {
748 n, ok := sym.(*obj.LSym)
752 lsym := f.fe.DerefItab(n, offset)
753 if f.pass.debug > 0 {
755 f.Warnl(v.Pos, "de-virtualizing call")
757 f.Warnl(v.Pos, "couldn't de-virtualize call")
764 return StaticAuxCall(lsym, va.args, va.results)
767 // isSamePtr reports whether p1 and p2 point to the same address.
768 func isSamePtr(p1, p2 *Value) bool {
777 return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
778 case OpAddr, OpLocalAddr:
779 // OpAddr's 0th arg is either OpSP or OpSB, which means that it is uniquely identified by its Op.
780 // Checking for value equality only works after [z]cse has run.
781 return p1.Aux == p2.Aux && p1.Args[0].Op == p2.Args[0].Op
783 return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
788 func isStackPtr(v *Value) bool {
789 for v.Op == OpOffPtr || v.Op == OpAddPtr {
792 return v.Op == OpSP || v.Op == OpLocalAddr
795 // disjoint reports whether the memory region specified by [p1:p1+n1)
796 // does not overlap with [p2:p2+n2).
797 // A return value of false does not imply the regions overlap.
798 func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool {
799 if n1 == 0 || n2 == 0 {
805 baseAndOffset := func(ptr *Value) (base *Value, offset int64) {
806 base, offset = ptr, 0
807 for base.Op == OpOffPtr {
808 offset += base.AuxInt
813 p1, off1 := baseAndOffset(p1)
814 p2, off2 := baseAndOffset(p2)
815 if isSamePtr(p1, p2) {
816 return !overlap(off1, n1, off2, n2)
818 // p1 and p2 are not the same, so if they are both OpAddrs then
819 // they point to different variables.
820 // If one pointer is on the stack and the other is an argument
821 // then they can't overlap.
823 case OpAddr, OpLocalAddr:
824 if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP {
827 return p2.Op == OpArg && p1.Args[0].Op == OpSP
829 if p2.Op == OpSP || p2.Op == OpLocalAddr {
833 return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpSP
838 // moveSize returns the number of bytes an aligned MOV instruction moves
839 func moveSize(align int64, c *Config) int64 {
841 case align%8 == 0 && c.PtrSize == 8:
851 // mergePoint finds a block among a's blocks which dominates b and is itself
852 // dominated by all of a's blocks. Returns nil if it can't find one.
853 // Might return nil even if one does exist.
854 func mergePoint(b *Block, a ...*Value) *Block {
855 // Walk backward from b looking for one of the a's blocks.
861 for _, x := range a {
866 if len(b.Preds) > 1 {
867 // Don't know which way to go back. Abort.
873 return nil // too far away
875 // At this point, r is the first value in a that we find by walking backwards.
876 // if we return anything, r will be it.
879 // Keep going, counting the other a's that we find. They must all dominate r.
882 for _, x := range a {
888 // Found all of a in a backwards walk. We can return r.
891 if len(b.Preds) > 1 {
898 return nil // too far away
901 // clobber invalidates values. Returns true.
902 // clobber is used by rewrite rules to:
903 // A) make sure the values are really dead and never used again.
904 // B) decrement use counts of the values' args.
905 func clobber(vv ...*Value) bool {
906 for _, v := range vv {
908 // Note: leave v.Block intact. The Block field is used after clobber.
913 // clobberIfDead resets v when use count is 1. Returns true.
914 // clobberIfDead is used by rewrite rules to decrement
915 // use counts of v's args when v is dead and never used.
916 func clobberIfDead(v *Value) bool {
920 // Note: leave v.Block intact. The Block field is used after clobberIfDead.
924 // noteRule is an easy way to track if a rule is matched when writing
925 // new ones. Make the rule of interest also conditional on
926 // noteRule("note to self: rule of interest matched")
927 // and that message will print when the rule matches.
928 func noteRule(s string) bool {
933 // countRule increments Func.ruleMatches[key].
934 // If Func.ruleMatches is non-nil at the end
935 // of compilation, it will be printed to stdout.
936 // This is intended to make it easier to find which functions
937 // which contain lots of rules matches when developing new rules.
938 func countRule(v *Value, key string) bool {
940 if f.ruleMatches == nil {
941 f.ruleMatches = make(map[string]int)
947 // warnRule generates compiler debug output with string s when
948 // v is not in autogenerated code, cond is true and the rule has fired.
949 func warnRule(cond bool, v *Value, s string) bool {
950 if pos := v.Pos; pos.Line() > 1 && cond {
951 v.Block.Func.Warnl(pos, s)
956 // for a pseudo-op like (LessThan x), extract x
957 func flagArg(v *Value) *Value {
958 if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() {
964 // arm64Negate finds the complement to an ARM64 condition code,
965 // for example Equal -> NotEqual or LessThan -> GreaterEqual
967 // TODO: add floating-point conditions
968 func arm64Negate(op Op) Op {
970 case OpARM64LessThan:
971 return OpARM64GreaterEqual
972 case OpARM64LessThanU:
973 return OpARM64GreaterEqualU
974 case OpARM64GreaterThan:
975 return OpARM64LessEqual
976 case OpARM64GreaterThanU:
977 return OpARM64LessEqualU
978 case OpARM64LessEqual:
979 return OpARM64GreaterThan
980 case OpARM64LessEqualU:
981 return OpARM64GreaterThanU
982 case OpARM64GreaterEqual:
983 return OpARM64LessThan
984 case OpARM64GreaterEqualU:
985 return OpARM64LessThanU
987 return OpARM64NotEqual
988 case OpARM64NotEqual:
990 case OpARM64LessThanF:
991 return OpARM64GreaterEqualF
992 case OpARM64GreaterThanF:
993 return OpARM64LessEqualF
994 case OpARM64LessEqualF:
995 return OpARM64GreaterThanF
996 case OpARM64GreaterEqualF:
997 return OpARM64LessThanF
1003 // arm64Invert evaluates (InvertFlags op), which
1004 // is the same as altering the condition codes such
1005 // that the same result would be produced if the arguments
1006 // to the flag-generating instruction were reversed, e.g.
1007 // (InvertFlags (CMP x y)) -> (CMP y x)
1009 // TODO: add floating-point conditions
1010 func arm64Invert(op Op) Op {
1012 case OpARM64LessThan:
1013 return OpARM64GreaterThan
1014 case OpARM64LessThanU:
1015 return OpARM64GreaterThanU
1016 case OpARM64GreaterThan:
1017 return OpARM64LessThan
1018 case OpARM64GreaterThanU:
1019 return OpARM64LessThanU
1020 case OpARM64LessEqual:
1021 return OpARM64GreaterEqual
1022 case OpARM64LessEqualU:
1023 return OpARM64GreaterEqualU
1024 case OpARM64GreaterEqual:
1025 return OpARM64LessEqual
1026 case OpARM64GreaterEqualU:
1027 return OpARM64LessEqualU
1028 case OpARM64Equal, OpARM64NotEqual:
1030 case OpARM64LessThanF:
1031 return OpARM64GreaterThanF
1032 case OpARM64GreaterThanF:
1033 return OpARM64LessThanF
1034 case OpARM64LessEqualF:
1035 return OpARM64GreaterEqualF
1036 case OpARM64GreaterEqualF:
1037 return OpARM64LessEqualF
1039 panic("unreachable")
1043 // evaluate an ARM64 op against a flags value
1044 // that is potentially constant; return 1 for true,
1045 // -1 for false, and 0 for not constant.
1046 func ccARM64Eval(op Op, flags *Value) int {
1048 if fop == OpARM64InvertFlags {
1049 return -ccARM64Eval(op, flags.Args[0])
1051 if fop != OpARM64FlagConstant {
1054 fc := flagConstant(flags.AuxInt)
1055 b2i := func(b bool) int {
1064 case OpARM64NotEqual:
1066 case OpARM64LessThan:
1068 case OpARM64LessThanU:
1069 return b2i(fc.ult())
1070 case OpARM64GreaterThan:
1072 case OpARM64GreaterThanU:
1073 return b2i(fc.ugt())
1074 case OpARM64LessEqual:
1076 case OpARM64LessEqualU:
1077 return b2i(fc.ule())
1078 case OpARM64GreaterEqual:
1080 case OpARM64GreaterEqualU:
1081 return b2i(fc.uge())
1086 // logRule logs the use of the rule s. This will only be enabled if
1087 // rewrite rules were generated with the -log option, see gen/rulegen.go.
1088 func logRule(s string) {
1089 if ruleFile == nil {
1090 // Open a log file to write log to. We open in append
1091 // mode because all.bash runs the compiler lots of times,
1092 // and we want the concatenation of all of those logs.
1093 // This means, of course, that users need to rm the old log
1094 // to get fresh data.
1095 // TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow?
1096 w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"),
1097 os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
1103 _, err := fmt.Fprintln(ruleFile, s)
1109 var ruleFile io.Writer
1111 func min(x, y int64) int64 {
1118 func isConstZero(v *Value) bool {
1122 case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F:
1123 return v.AuxInt == 0
1128 // reciprocalExact64 reports whether 1/c is exactly representable.
1129 func reciprocalExact64(c float64) bool {
1130 b := math.Float64bits(c)
1131 man := b & (1<<52 - 1)
1133 return false // not a power of 2, denormal, or NaN
1135 exp := b >> 52 & (1<<11 - 1)
1136 // exponent bias is 0x3ff. So taking the reciprocal of a number
1137 // changes the exponent to 0x7fe-exp.
1142 return false // ±inf
1144 return false // exponent is not representable
1150 // reciprocalExact32 reports whether 1/c is exactly representable.
1151 func reciprocalExact32(c float32) bool {
1152 b := math.Float32bits(c)
1153 man := b & (1<<23 - 1)
1155 return false // not a power of 2, denormal, or NaN
1157 exp := b >> 23 & (1<<8 - 1)
1158 // exponent bias is 0x7f. So taking the reciprocal of a number
1159 // changes the exponent to 0xfe-exp.
1164 return false // ±inf
1166 return false // exponent is not representable
1172 // check if an immediate can be directly encoded into an ARM's instruction
1173 func isARMImmRot(v uint32) bool {
1174 for i := 0; i < 16; i++ {
1184 // overlap reports whether the ranges given by the given offset and
1185 // size pairs overlap.
1186 func overlap(offset1, size1, offset2, size2 int64) bool {
1187 if offset1 >= offset2 && offset2+size2 > offset1 {
1190 if offset2 >= offset1 && offset1+size1 > offset2 {
1196 func areAdjacentOffsets(off1, off2, size int64) bool {
1197 return off1+size == off2 || off1 == off2+size
1200 // check if value zeroes out upper 32-bit of 64-bit register.
1201 // depth limits recursion depth. In AMD64.rules 3 is used as limit,
1202 // because it catches same amount of cases as 4.
1203 func zeroUpper32Bits(x *Value, depth int) bool {
1205 case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1,
1206 OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1,
1207 OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload,
1208 OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL,
1209 OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst,
1210 OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst,
1211 OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL,
1212 OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst,
1213 OpAMD64SHLL, OpAMD64SHLLconst:
1216 return x.Type.Width == 4
1217 case OpPhi, OpSelect0, OpSelect1:
1218 // Phis can use each-other as an arguments, instead of tracking visited values,
1219 // just limit recursion depth.
1223 for i := range x.Args {
1224 if !zeroUpper32Bits(x.Args[i], depth-1) {
1234 // zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits
1235 func zeroUpper48Bits(x *Value, depth int) bool {
1237 case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2:
1240 return x.Type.Width == 2
1241 case OpPhi, OpSelect0, OpSelect1:
1242 // Phis can use each-other as an arguments, instead of tracking visited values,
1243 // just limit recursion depth.
1247 for i := range x.Args {
1248 if !zeroUpper48Bits(x.Args[i], depth-1) {
1258 // zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits
1259 func zeroUpper56Bits(x *Value, depth int) bool {
1261 case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1:
1264 return x.Type.Width == 1
1265 case OpPhi, OpSelect0, OpSelect1:
1266 // Phis can use each-other as an arguments, instead of tracking visited values,
1267 // just limit recursion depth.
1271 for i := range x.Args {
1272 if !zeroUpper56Bits(x.Args[i], depth-1) {
1282 // isInlinableMemmove reports whether the given arch performs a Move of the given size
1283 // faster than memmove. It will only return true if replacing the memmove with a Move is
1284 // safe, either because Move is small or because the arguments are disjoint.
1285 // This is used as a check for replacing memmove with Move ops.
1286 func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
1287 // It is always safe to convert memmove into Move when its arguments are disjoint.
1288 // Move ops may or may not be faster for large sizes depending on how the platform
1289 // lowers them, so we only perform this optimization on platforms that we know to
1290 // have fast Move ops.
1293 return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz))
1294 case "386", "arm64":
1296 case "s390x", "ppc64", "ppc64le":
1297 return sz <= 8 || disjoint(dst, sz, src, sz)
1298 case "arm", "mips", "mips64", "mipsle", "mips64le":
1304 // logLargeCopy logs the occurrence of a large copy.
1305 // The best place to do this is in the rewrite rules where the size of the move is easy to find.
1306 // "Large" is arbitrarily chosen to be 128 bytes; this may change.
1307 func logLargeCopy(v *Value, s int64) bool {
1311 if logopt.Enabled() {
1312 logopt.LogOpt(v.Pos, "copy", "lower", v.Block.Func.Name, fmt.Sprintf("%d bytes", s))
1317 // hasSmallRotate reports whether the architecture has rotate instructions
1318 // for sizes < 32-bit. This is used to decide whether to promote some rotations.
1319 func hasSmallRotate(c *Config) bool {
1321 case "amd64", "386":
1328 func newPPC64ShiftAuxInt(sh, mb, me, sz int64) int32 {
1329 if sh < 0 || sh >= sz {
1330 panic("PPC64 shift arg sh out of range")
1332 if mb < 0 || mb >= sz {
1333 panic("PPC64 shift arg mb out of range")
1335 if me < 0 || me >= sz {
1336 panic("PPC64 shift arg me out of range")
1338 return int32(sh<<16 | mb<<8 | me)
1341 func GetPPC64Shiftsh(auxint int64) int64 {
1342 return int64(int8(auxint >> 16))
1345 func GetPPC64Shiftmb(auxint int64) int64 {
1346 return int64(int8(auxint >> 8))
1349 func GetPPC64Shiftme(auxint int64) int64 {
1350 return int64(int8(auxint))
1353 // Catch the simple ones first
1354 // TODO: Later catch more cases
1355 func isPPC64ValidShiftMask(v int64) bool {
1356 if ((v + 1) & v) == 0 {
1362 func getPPC64ShiftMaskLength(v int64) int64 {
1363 return int64(bits.Len64(uint64(v)))
1366 // encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format.
1367 func armBFAuxInt(lsb, width int64) arm64BitField {
1368 if lsb < 0 || lsb > 63 {
1369 panic("ARM(64) bit field lsb constant out of range")
1371 if width < 1 || width > 64 {
1372 panic("ARM(64) bit field width constant out of range")
1374 return arm64BitField(width | lsb<<8)
1377 // returns the lsb part of the auxInt field of arm64 bitfield ops.
1378 func (bfc arm64BitField) getARM64BFlsb() int64 {
1379 return int64(uint64(bfc) >> 8)
1382 // returns the width part of the auxInt field of arm64 bitfield ops.
1383 func (bfc arm64BitField) getARM64BFwidth() int64 {
1384 return int64(bfc) & 0xff
1387 // checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask.
1388 func isARM64BFMask(lsb, mask, rshift int64) bool {
1389 shiftedMask := int64(uint64(mask) >> uint64(rshift))
1390 return shiftedMask != 0 && isPowerOfTwo(shiftedMask+1) && nto(shiftedMask)+lsb < 64
1393 // returns the bitfield width of mask >> rshift for arm64 bitfield ops
1394 func arm64BFWidth(mask, rshift int64) int64 {
1395 shiftedMask := int64(uint64(mask) >> uint64(rshift))
1396 if shiftedMask == 0 {
1397 panic("ARM64 BF mask is zero")
1399 return nto(shiftedMask)
1402 // sizeof returns the size of t in bytes.
1403 // It will panic if t is not a *types.Type.
1404 func sizeof(t interface{}) int64 {
1405 return t.(*types.Type).Size()
1408 // registerizable reports whether t is a primitive type that fits in
1409 // a register. It assumes float64 values will always fit into registers
1410 // even if that isn't strictly true.
1411 func registerizable(b *Block, typ *types.Type) bool {
1412 if typ.IsPtrShaped() || typ.IsFloat() {
1415 if typ.IsInteger() {
1416 return typ.Size() <= b.Func.Config.RegSize
1421 // needRaceCleanup reports whether this call to racefuncenter/exit isn't needed.
1422 func needRaceCleanup(sym *AuxCall, v *Value) bool {
1427 if !isSameCall(sym, "runtime.racefuncenter") && !isSameCall(sym, "runtime.racefuncexit") {
1430 for _, b := range f.Blocks {
1431 for _, v := range b.Values {
1434 // Check for racefuncenter will encounter racefuncexit and vice versa.
1435 // Allow calls to panic*
1436 s := v.Aux.(*AuxCall).Fn.String()
1438 case "runtime.racefuncenter", "runtime.racefuncexit",
1439 "runtime.panicdivide", "runtime.panicwrap",
1440 "runtime.panicshift":
1443 // If we encountered any call, we need to keep racefunc*,
1444 // for accurate stacktraces.
1446 case OpPanicBounds, OpPanicExtend:
1447 // Note: these are panic generators that are ok (like the static calls above).
1448 case OpClosureCall, OpInterCall:
1449 // We must keep the race functions if there are any other call types.
1454 if isSameCall(sym, "runtime.racefuncenter") {
1455 // If we're removing racefuncenter, remove its argument as well.
1456 if v.Args[0].Op != OpStore {
1459 mem := v.Args[0].Args[2]
1460 v.Args[0].reset(OpCopy)
1461 v.Args[0].AddArg(mem)
1466 // symIsRO reports whether sym is a read-only global.
1467 func symIsRO(sym interface{}) bool {
1468 lsym := sym.(*obj.LSym)
1469 return lsym.Type == objabi.SRODATA && len(lsym.R) == 0
1472 // symIsROZero reports whether sym is a read-only global whose data contains all zeros.
1473 func symIsROZero(sym Sym) bool {
1474 lsym := sym.(*obj.LSym)
1475 if lsym.Type != objabi.SRODATA || len(lsym.R) != 0 {
1478 for _, b := range lsym.P {
1486 // read8 reads one byte from the read-only global sym at offset off.
1487 func read8(sym interface{}, off int64) uint8 {
1488 lsym := sym.(*obj.LSym)
1489 if off >= int64(len(lsym.P)) || off < 0 {
1490 // Invalid index into the global sym.
1491 // This can happen in dead code, so we don't want to panic.
1492 // Just return any value, it will eventually get ignored.
1499 // read16 reads two bytes from the read-only global sym at offset off.
1500 func read16(sym interface{}, off int64, byteorder binary.ByteOrder) uint16 {
1501 lsym := sym.(*obj.LSym)
1502 // lsym.P is written lazily.
1503 // Bytes requested after the end of lsym.P are 0.
1505 if 0 <= off && off < int64(len(lsym.P)) {
1508 buf := make([]byte, 2)
1510 return byteorder.Uint16(buf)
1513 // read32 reads four bytes from the read-only global sym at offset off.
1514 func read32(sym interface{}, off int64, byteorder binary.ByteOrder) uint32 {
1515 lsym := sym.(*obj.LSym)
1517 if 0 <= off && off < int64(len(lsym.P)) {
1520 buf := make([]byte, 4)
1522 return byteorder.Uint32(buf)
1525 // read64 reads eight bytes from the read-only global sym at offset off.
1526 func read64(sym interface{}, off int64, byteorder binary.ByteOrder) uint64 {
1527 lsym := sym.(*obj.LSym)
1529 if 0 <= off && off < int64(len(lsym.P)) {
1532 buf := make([]byte, 8)
1534 return byteorder.Uint64(buf)
1537 // sequentialAddresses reports true if it can prove that x + n == y
1538 func sequentialAddresses(x, y *Value, n int64) bool {
1539 if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil &&
1540 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1541 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1544 if x.Op == Op386LEAL1 && y.Op == Op386LEAL1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
1545 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1546 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1549 if x.Op == OpAMD64ADDQ && y.Op == OpAMD64LEAQ1 && y.AuxInt == n && y.Aux == nil &&
1550 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1551 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1554 if x.Op == OpAMD64LEAQ1 && y.Op == OpAMD64LEAQ1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
1555 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1556 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1562 // flagConstant represents the result of a compile-time comparison.
1563 // The sense of these flags does not necessarily represent the hardware's notion
1564 // of a flags register - these are just a compile-time construct.
1565 // We happen to match the semantics to those of arm/arm64.
1566 // Note that these semantics differ from x86: the carry flag has the opposite
1567 // sense on a subtraction!
1568 // On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C.
1569 // On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C.
1570 // (because it does x + ^y + C).
1571 // See https://en.wikipedia.org/wiki/Carry_flag#Vs._borrow_flag
1572 type flagConstant uint8
1574 // N reports whether the result of an operation is negative (high bit set).
1575 func (fc flagConstant) N() bool {
1579 // Z reports whether the result of an operation is 0.
1580 func (fc flagConstant) Z() bool {
1584 // C reports whether an unsigned add overflowed (carry), or an
1585 // unsigned subtract did not underflow (borrow).
1586 func (fc flagConstant) C() bool {
1590 // V reports whether a signed operation overflowed or underflowed.
1591 func (fc flagConstant) V() bool {
1595 func (fc flagConstant) eq() bool {
1598 func (fc flagConstant) ne() bool {
1601 func (fc flagConstant) lt() bool {
1602 return fc.N() != fc.V()
1604 func (fc flagConstant) le() bool {
1605 return fc.Z() || fc.lt()
1607 func (fc flagConstant) gt() bool {
1608 return !fc.Z() && fc.ge()
1610 func (fc flagConstant) ge() bool {
1611 return fc.N() == fc.V()
1613 func (fc flagConstant) ult() bool {
1616 func (fc flagConstant) ule() bool {
1617 return fc.Z() || fc.ult()
1619 func (fc flagConstant) ugt() bool {
1620 return !fc.Z() && fc.uge()
1622 func (fc flagConstant) uge() bool {
1626 func (fc flagConstant) ltNoov() bool {
1627 return fc.lt() && !fc.V()
1629 func (fc flagConstant) leNoov() bool {
1630 return fc.le() && !fc.V()
1632 func (fc flagConstant) gtNoov() bool {
1633 return fc.gt() && !fc.V()
1635 func (fc flagConstant) geNoov() bool {
1636 return fc.ge() && !fc.V()
1639 func (fc flagConstant) String() string {
1640 return fmt.Sprintf("N=%v,Z=%v,C=%v,V=%v", fc.N(), fc.Z(), fc.C(), fc.V())
1643 type flagConstantBuilder struct {
1650 func (fcs flagConstantBuilder) encode() flagConstant {
1667 // Note: addFlags(x,y) != subFlags(x,-y) in some situations:
1668 // - the results of the C flag are different
1669 // - the results of the V flag when y==minint are different
1671 // addFlags64 returns the flags that would be set from computing x+y.
1672 func addFlags64(x, y int64) flagConstant {
1673 var fcb flagConstantBuilder
1676 fcb.C = uint64(x+y) < uint64(x)
1677 fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
1681 // subFlags64 returns the flags that would be set from computing x-y.
1682 func subFlags64(x, y int64) flagConstant {
1683 var fcb flagConstantBuilder
1686 fcb.C = uint64(y) <= uint64(x) // This code follows the arm carry flag model.
1687 fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
1691 // addFlags32 returns the flags that would be set from computing x+y.
1692 func addFlags32(x, y int32) flagConstant {
1693 var fcb flagConstantBuilder
1696 fcb.C = uint32(x+y) < uint32(x)
1697 fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
1701 // subFlags32 returns the flags that would be set from computing x-y.
1702 func subFlags32(x, y int32) flagConstant {
1703 var fcb flagConstantBuilder
1706 fcb.C = uint32(y) <= uint32(x) // This code follows the arm carry flag model.
1707 fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
1711 // logicFlags64 returns flags set to the sign/zeroness of x.
1712 // C and V are set to false.
1713 func logicFlags64(x int64) flagConstant {
1714 var fcb flagConstantBuilder
1720 // logicFlags32 returns flags set to the sign/zeroness of x.
1721 // C and V are set to false.
1722 func logicFlags32(x int32) flagConstant {
1723 var fcb flagConstantBuilder