1 // Copyright 2015 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
8 "cmd/compile/internal/base"
9 "cmd/compile/internal/logopt"
10 "cmd/compile/internal/types"
12 "cmd/internal/obj/s390x"
24 type deadValueChoice bool
27 leaveDeadValues deadValueChoice = false
28 removeDeadValues = true
31 // deadcode indicates whether rewrite should try to remove any values that become dead.
32 func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValueChoice) {
33 // repeat rewrites until we find no more rewrites
34 pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block
38 fmt.Printf("%s: rewriting for %s\n", f.pass.name, f.Name)
41 var states map[string]bool
45 for _, b := range f.Blocks {
50 b0.Succs = append([]Edge{}, b.Succs...) // make a new copy, not aliasing
52 for i, c := range b.ControlValues() {
55 b.ReplaceControl(i, c)
61 fmt.Printf("rewriting %s -> %s\n", b0.LongString(), b.LongString())
64 for j, v := range b.Values {
69 v0.Args = append([]*Value{}, v.Args...) // make a new copy, not aliasing
71 if v.Uses == 0 && v.removeable() {
72 if v.Op != OpInvalid && deadcode == removeDeadValues {
73 // Reset any values that are now unused, so that we decrement
74 // the use count of all of its arguments.
75 // Not quite a deadcode pass, because it does not handle cycles.
76 // But it should help Uses==1 rules to fire.
80 // No point rewriting values which aren't used.
84 vchange := phielimValue(v)
85 if vchange && debug > 1 {
86 fmt.Printf("rewriting %s -> %s\n", v0.LongString(), v.LongString())
89 // Eliminate copy inputs.
90 // If any copy input becomes unused, mark it
91 // as invalid and discard its argument. Repeat
92 // recursively on the discarded argument.
93 // This phase helps remove phantom "dead copy" uses
94 // of a value so that a x.Uses==1 rule condition
96 for i, a := range v.Args {
102 // If a, a copy, has a line boundary indicator, attempt to find a new value
103 // to hold it. The first candidate is the value that will replace a (aa),
104 // if it shares the same block and line and is eligible.
105 // The second option is v, which has a as an input. Because aa is earlier in
106 // the data flow, it is the better choice.
107 if a.Pos.IsStmt() == src.PosIsStmt {
108 if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt {
109 aa.Pos = aa.Pos.WithIsStmt()
110 } else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt {
111 v.Pos = v.Pos.WithIsStmt()
113 // Record the lost line and look for a new home after all rewrites are complete.
114 // TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same
115 // line to appear in more than one block, but only one block is stored, so if both end
116 // up here, then one will be lost.
117 pendingLines.set(a.Pos, int32(a.Block.ID))
119 a.Pos = a.Pos.WithNotStmt()
128 if vchange && debug > 1 {
129 fmt.Printf("rewriting %s -> %s\n", v0.LongString(), v.LongString())
132 // apply rewrite function
135 // If value changed to a poor choice for a statement boundary, move the boundary
136 if v.Pos.IsStmt() == src.PosIsStmt {
137 if k := nextGoodStatementIndex(v, j, b); k != j {
138 v.Pos = v.Pos.WithNotStmt()
139 b.Values[k].Pos = b.Values[k].Pos.WithIsStmt()
144 change = change || vchange
145 if vchange && debug > 1 {
146 fmt.Printf("rewriting %s -> %s\n", v0.LongString(), v.LongString())
150 if !change && !deadChange {
154 if (iters > 1000 || debug >= 2) && change {
155 // We've done a suspiciously large number of rewrites (or we're in debug mode).
156 // As of Sep 2021, 90% of rewrites complete in 4 iterations or fewer
157 // and the maximum value encountered during make.bash is 12.
158 // Start checking for cycles. (This is too expensive to do routinely.)
159 // Note: we avoid this path for deadChange-only iterations, to fix #51639.
161 states = make(map[string]bool)
164 if _, ok := states[h]; ok {
165 // We've found a cycle.
166 // To diagnose it, set debug to 2 and start again,
167 // so that we'll print all rules applied until we complete another cycle.
168 // If debug is already >= 2, we've already done that, so it's time to crash.
171 states = make(map[string]bool)
173 f.Fatalf("rewrite cycle detected")
179 // remove clobbered values
180 for _, b := range f.Blocks {
182 for i, v := range b.Values {
184 if v.Op == OpInvalid {
185 if v.Pos.IsStmt() == src.PosIsStmt {
186 pendingLines.set(vl, int32(b.ID))
191 if v.Pos.IsStmt() != src.PosNotStmt && !notStmtBoundary(v.Op) && pendingLines.get(vl) == int32(b.ID) {
192 pendingLines.remove(vl)
193 v.Pos = v.Pos.WithIsStmt()
200 if pendingLines.get(b.Pos) == int32(b.ID) {
201 b.Pos = b.Pos.WithIsStmt()
202 pendingLines.remove(b.Pos)
208 // Common functions called from rewriting rules
210 func is64BitFloat(t *types.Type) bool {
211 return t.Size() == 8 && t.IsFloat()
214 func is32BitFloat(t *types.Type) bool {
215 return t.Size() == 4 && t.IsFloat()
218 func is64BitInt(t *types.Type) bool {
219 return t.Size() == 8 && t.IsInteger()
222 func is32BitInt(t *types.Type) bool {
223 return t.Size() == 4 && t.IsInteger()
226 func is16BitInt(t *types.Type) bool {
227 return t.Size() == 2 && t.IsInteger()
230 func is8BitInt(t *types.Type) bool {
231 return t.Size() == 1 && t.IsInteger()
234 func isPtr(t *types.Type) bool {
235 return t.IsPtrShaped()
238 func isSigned(t *types.Type) bool {
242 // mergeSym merges two symbolic offsets. There is no real merging of
243 // offsets, we just pick the non-nil one.
244 func mergeSym(x, y Sym) Sym {
251 panic(fmt.Sprintf("mergeSym with two non-nil syms %v %v", x, y))
254 func canMergeSym(x, y Sym) bool {
255 return x == nil || y == nil
258 // canMergeLoadClobber reports whether the load can be merged into target without
259 // invalidating the schedule.
260 // It also checks that the other non-load argument x is something we
261 // are ok with clobbering.
262 func canMergeLoadClobber(target, load, x *Value) bool {
263 // The register containing x is going to get clobbered.
264 // Don't merge if we still need the value of x.
265 // We don't have liveness information here, but we can
266 // approximate x dying with:
267 // 1) target is x's only use.
268 // 2) target is not in a deeper loop than x.
272 loopnest := x.Block.Func.loopnest()
273 loopnest.calculateDepths()
274 if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
277 return canMergeLoad(target, load)
280 // canMergeLoad reports whether the load can be merged into target without
281 // invalidating the schedule.
282 func canMergeLoad(target, load *Value) bool {
283 if target.Block.ID != load.Block.ID {
284 // If the load is in a different block do not merge it.
288 // We can't merge the load into the target if the load
289 // has more than one use.
294 mem := load.MemoryArg()
296 // We need the load's memory arg to still be alive at target. That
297 // can't be the case if one of target's args depends on a memory
298 // state that is a successor of load's memory arg.
300 // For example, it would be invalid to merge load into target in
301 // the following situation because newmem has killed oldmem
302 // before target is reached:
303 // load = read ... oldmem
304 // newmem = write ... oldmem
305 // arg0 = read ... newmem
306 // target = add arg0 load
308 // If the argument comes from a different block then we can exclude
309 // it immediately because it must dominate load (which is in the
310 // same block as target).
312 for _, a := range target.Args {
313 if a != load && a.Block.ID == target.Block.ID {
314 args = append(args, a)
318 // memPreds contains memory states known to be predecessors of load's
319 // memory state. It is lazily initialized.
320 var memPreds map[*Value]bool
321 for i := 0; len(args) > 0; i++ {
324 // Give up if we have done a lot of iterations.
327 v := args[len(args)-1]
328 args = args[:len(args)-1]
329 if target.Block.ID != v.Block.ID {
330 // Since target and load are in the same block
331 // we can stop searching when we leave the block.
335 // A Phi implies we have reached the top of the block.
336 // The memory phi, if it exists, is always
337 // the first logical store in the block.
340 if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
341 // We could handle this situation however it is likely
345 if v.Op.SymEffect()&SymAddr != 0 {
346 // This case prevents an operation that calculates the
347 // address of a local variable from being forced to schedule
348 // before its corresponding VarDef.
354 // We don't want to combine the CMPQ with the load, because
355 // that would force the CMPQ to schedule before the VARDEF, which
356 // in turn requires the LEAQ to schedule before the VARDEF.
359 if v.Type.IsMemory() {
361 // Initialise a map containing memory states
362 // known to be predecessors of load's memory
364 memPreds = make(map[*Value]bool)
367 for i := 0; i < limit; i++ {
369 // The memory phi, if it exists, is always
370 // the first logical store in the block.
373 if m.Block.ID != target.Block.ID {
376 if !m.Type.IsMemory() {
380 if len(m.Args) == 0 {
387 // We can merge if v is a predecessor of mem.
389 // For example, we can merge load into target in the
390 // following scenario:
393 // load = read ... mem
394 // target = add x load
400 if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
401 // If v takes mem as an input then we know mem
402 // is valid at this point.
405 for _, a := range v.Args {
406 if target.Block.ID == a.Block.ID {
407 args = append(args, a)
415 // isSameCall reports whether sym is the same as the given named symbol.
416 func isSameCall(sym interface{}, name string) bool {
417 fn := sym.(*AuxCall).Fn
418 return fn != nil && fn.String() == name
421 // canLoadUnaligned reports if the architecture supports unaligned load operations.
422 func canLoadUnaligned(c *Config) bool {
423 return c.ctxt.Arch.Alignment == 1
426 // nlzX returns the number of leading zeros.
427 func nlz64(x int64) int { return bits.LeadingZeros64(uint64(x)) }
428 func nlz32(x int32) int { return bits.LeadingZeros32(uint32(x)) }
429 func nlz16(x int16) int { return bits.LeadingZeros16(uint16(x)) }
430 func nlz8(x int8) int { return bits.LeadingZeros8(uint8(x)) }
432 // ntzX returns the number of trailing zeros.
433 func ntz64(x int64) int { return bits.TrailingZeros64(uint64(x)) }
434 func ntz32(x int32) int { return bits.TrailingZeros32(uint32(x)) }
435 func ntz16(x int16) int { return bits.TrailingZeros16(uint16(x)) }
436 func ntz8(x int8) int { return bits.TrailingZeros8(uint8(x)) }
438 func oneBit(x int64) bool { return x&(x-1) == 0 && x != 0 }
439 func oneBit8(x int8) bool { return x&(x-1) == 0 && x != 0 }
440 func oneBit16(x int16) bool { return x&(x-1) == 0 && x != 0 }
441 func oneBit32(x int32) bool { return x&(x-1) == 0 && x != 0 }
442 func oneBit64(x int64) bool { return x&(x-1) == 0 && x != 0 }
444 // nto returns the number of trailing ones.
445 func nto(x int64) int64 {
446 return int64(ntz64(^x))
449 // logX returns logarithm of n base 2.
450 // n must be a positive power of 2 (isPowerOfTwoX returns true).
451 func log8(n int8) int64 {
452 return int64(bits.Len8(uint8(n))) - 1
454 func log16(n int16) int64 {
455 return int64(bits.Len16(uint16(n))) - 1
457 func log32(n int32) int64 {
458 return int64(bits.Len32(uint32(n))) - 1
460 func log64(n int64) int64 {
461 return int64(bits.Len64(uint64(n))) - 1
464 // log2uint32 returns logarithm in base 2 of uint32(n), with log2(0) = -1.
466 func log2uint32(n int64) int64 {
467 return int64(bits.Len32(uint32(n))) - 1
470 // isPowerOfTwoX functions report whether n is a power of 2.
471 func isPowerOfTwo8(n int8) bool {
472 return n > 0 && n&(n-1) == 0
474 func isPowerOfTwo16(n int16) bool {
475 return n > 0 && n&(n-1) == 0
477 func isPowerOfTwo32(n int32) bool {
478 return n > 0 && n&(n-1) == 0
480 func isPowerOfTwo64(n int64) bool {
481 return n > 0 && n&(n-1) == 0
484 // isUint64PowerOfTwo reports whether uint64(n) is a power of 2.
485 func isUint64PowerOfTwo(in int64) bool {
487 return n > 0 && n&(n-1) == 0
490 // isUint32PowerOfTwo reports whether uint32(n) is a power of 2.
491 func isUint32PowerOfTwo(in int64) bool {
492 n := uint64(uint32(in))
493 return n > 0 && n&(n-1) == 0
496 // is32Bit reports whether n can be represented as a signed 32 bit integer.
497 func is32Bit(n int64) bool {
498 return n == int64(int32(n))
501 // is16Bit reports whether n can be represented as a signed 16 bit integer.
502 func is16Bit(n int64) bool {
503 return n == int64(int16(n))
506 // is8Bit reports whether n can be represented as a signed 8 bit integer.
507 func is8Bit(n int64) bool {
508 return n == int64(int8(n))
511 // isU8Bit reports whether n can be represented as an unsigned 8 bit integer.
512 func isU8Bit(n int64) bool {
513 return n == int64(uint8(n))
516 // isU12Bit reports whether n can be represented as an unsigned 12 bit integer.
517 func isU12Bit(n int64) bool {
518 return 0 <= n && n < (1<<12)
521 // isU16Bit reports whether n can be represented as an unsigned 16 bit integer.
522 func isU16Bit(n int64) bool {
523 return n == int64(uint16(n))
526 // isU32Bit reports whether n can be represented as an unsigned 32 bit integer.
527 func isU32Bit(n int64) bool {
528 return n == int64(uint32(n))
531 // is20Bit reports whether n can be represented as a signed 20 bit integer.
532 func is20Bit(n int64) bool {
533 return -(1<<19) <= n && n < (1<<19)
536 // b2i translates a boolean value to 0 or 1 for assigning to auxInt.
537 func b2i(b bool) int64 {
544 // b2i32 translates a boolean value to 0 or 1.
545 func b2i32(b bool) int32 {
552 // shiftIsBounded reports whether (left/right) shift Value v is known to be bounded.
553 // A shift is bounded if it is shifting by less than the width of the shifted value.
554 func shiftIsBounded(v *Value) bool {
558 // canonLessThan returns whether x is "ordered" less than y, for purposes of normalizing
559 // generated code as much as possible.
560 func canonLessThan(x, y *Value) bool {
564 if !x.Pos.SameFileAndLine(y.Pos) {
565 return x.Pos.Before(y.Pos)
570 // truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern
571 // of the mantissa. It will panic if the truncation results in lost information.
572 func truncate64Fto32F(f float64) float32 {
573 if !isExactFloat32(f) {
574 panic("truncate64Fto32F: truncation is not exact")
579 // NaN bit patterns aren't necessarily preserved across conversion
580 // instructions so we need to do the conversion manually.
581 b := math.Float64bits(f)
582 m := b & ((1 << 52) - 1) // mantissa (a.k.a. significand)
583 // | sign | exponent | mantissa |
584 r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23)))
585 return math.Float32frombits(r)
588 // extend32Fto64F converts a float32 value to a float64 value preserving the bit
589 // pattern of the mantissa.
590 func extend32Fto64F(f float32) float64 {
591 if !math.IsNaN(float64(f)) {
594 // NaN bit patterns aren't necessarily preserved across conversion
595 // instructions so we need to do the conversion manually.
596 b := uint64(math.Float32bits(f))
597 // | sign | exponent | mantissa |
598 r := ((b << 32) & (1 << 63)) | (0x7ff << 52) | ((b & 0x7fffff) << (52 - 23))
599 return math.Float64frombits(r)
602 // DivisionNeedsFixUp reports whether the division needs fix-up code.
603 func DivisionNeedsFixUp(v *Value) bool {
607 // auxFrom64F encodes a float64 value so it can be stored in an AuxInt.
608 func auxFrom64F(f float64) int64 {
610 panic("can't encode a NaN in AuxInt field")
612 return int64(math.Float64bits(f))
615 // auxFrom32F encodes a float32 value so it can be stored in an AuxInt.
616 func auxFrom32F(f float32) int64 {
618 panic("can't encode a NaN in AuxInt field")
620 return int64(math.Float64bits(extend32Fto64F(f)))
623 // auxTo32F decodes a float32 from the AuxInt value provided.
624 func auxTo32F(i int64) float32 {
625 return truncate64Fto32F(math.Float64frombits(uint64(i)))
628 // auxTo64F decodes a float64 from the AuxInt value provided.
629 func auxTo64F(i int64) float64 {
630 return math.Float64frombits(uint64(i))
633 func auxIntToBool(i int64) bool {
639 func auxIntToInt8(i int64) int8 {
642 func auxIntToInt16(i int64) int16 {
645 func auxIntToInt32(i int64) int32 {
648 func auxIntToInt64(i int64) int64 {
651 func auxIntToUint8(i int64) uint8 {
654 func auxIntToFloat32(i int64) float32 {
655 return float32(math.Float64frombits(uint64(i)))
657 func auxIntToFloat64(i int64) float64 {
658 return math.Float64frombits(uint64(i))
660 func auxIntToValAndOff(i int64) ValAndOff {
663 func auxIntToArm64BitField(i int64) arm64BitField {
664 return arm64BitField(i)
666 func auxIntToInt128(x int64) int128 {
668 panic("nonzero int128 not allowed")
672 func auxIntToFlagConstant(x int64) flagConstant {
673 return flagConstant(x)
676 func auxIntToOp(cc int64) Op {
680 func boolToAuxInt(b bool) int64 {
686 func int8ToAuxInt(i int8) int64 {
689 func int16ToAuxInt(i int16) int64 {
692 func int32ToAuxInt(i int32) int64 {
695 func int64ToAuxInt(i int64) int64 {
698 func uint8ToAuxInt(i uint8) int64 {
699 return int64(int8(i))
701 func float32ToAuxInt(f float32) int64 {
702 return int64(math.Float64bits(float64(f)))
704 func float64ToAuxInt(f float64) int64 {
705 return int64(math.Float64bits(f))
707 func valAndOffToAuxInt(v ValAndOff) int64 {
710 func arm64BitFieldToAuxInt(v arm64BitField) int64 {
713 func int128ToAuxInt(x int128) int64 {
715 panic("nonzero int128 not allowed")
719 func flagConstantToAuxInt(x flagConstant) int64 {
723 func opToAuxInt(o Op) int64 {
727 // Aux is an interface to hold miscellaneous data in Blocks and Values.
732 // for now only used to mark moves that need to avoid clobbering flags
735 func (auxMark) CanBeAnSSAAux() {}
739 // stringAux wraps string values for use in Aux.
740 type stringAux string
742 func (stringAux) CanBeAnSSAAux() {}
744 func auxToString(i Aux) string {
745 return string(i.(stringAux))
747 func auxToSym(i Aux) Sym {
748 // TODO: kind of a hack - allows nil interface through
752 func auxToType(i Aux) *types.Type {
753 return i.(*types.Type)
755 func auxToCall(i Aux) *AuxCall {
758 func auxToS390xCCMask(i Aux) s390x.CCMask {
759 return i.(s390x.CCMask)
761 func auxToS390xRotateParams(i Aux) s390x.RotateParams {
762 return i.(s390x.RotateParams)
765 func StringToAux(s string) Aux {
768 func symToAux(s Sym) Aux {
771 func callToAux(s *AuxCall) Aux {
774 func typeToAux(t *types.Type) Aux {
777 func s390xCCMaskToAux(c s390x.CCMask) Aux {
780 func s390xRotateParamsToAux(r s390x.RotateParams) Aux {
784 // uaddOvf reports whether unsigned a+b would overflow.
785 func uaddOvf(a, b int64) bool {
786 return uint64(a)+uint64(b) < uint64(a)
789 // loadLSymOffset simulates reading a word at an offset into a
790 // read-only symbol's runtime memory. If it would read a pointer to
791 // another symbol, that symbol is returned. Otherwise, it returns nil.
792 func loadLSymOffset(lsym *obj.LSym, offset int64) *obj.LSym {
793 if lsym.Type != objabi.SRODATA {
797 for _, r := range lsym.R {
798 if int64(r.Off) == offset && r.Type&^objabi.R_WEAK == objabi.R_ADDR && r.Add == 0 {
806 // de-virtualize an InterLECall
807 // 'sym' is the symbol for the itab.
808 func devirtLESym(v *Value, aux Aux, sym Sym, offset int64) *obj.LSym {
809 n, ok := sym.(*obj.LSym)
814 lsym := loadLSymOffset(n, offset)
815 if f := v.Block.Func; f.pass.debug > 0 {
817 f.Warnl(v.Pos, "de-virtualizing call")
819 f.Warnl(v.Pos, "couldn't de-virtualize call")
825 func devirtLECall(v *Value, sym *obj.LSym) *Value {
826 v.Op = OpStaticLECall
827 auxcall := v.Aux.(*AuxCall)
831 copy(v.Args[0:], v.Args[1:])
832 v.Args[len(v.Args)-1] = nil // aid GC
833 v.Args = v.Args[:len(v.Args)-1]
837 // isSamePtr reports whether p1 and p2 point to the same address.
838 func isSamePtr(p1, p2 *Value) bool {
847 return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
848 case OpAddr, OpLocalAddr:
849 return p1.Aux == p2.Aux
851 return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
856 func isStackPtr(v *Value) bool {
857 for v.Op == OpOffPtr || v.Op == OpAddPtr {
860 return v.Op == OpSP || v.Op == OpLocalAddr
863 // disjoint reports whether the memory region specified by [p1:p1+n1)
864 // does not overlap with [p2:p2+n2).
865 // A return value of false does not imply the regions overlap.
866 func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool {
867 if n1 == 0 || n2 == 0 {
873 baseAndOffset := func(ptr *Value) (base *Value, offset int64) {
874 base, offset = ptr, 0
875 for base.Op == OpOffPtr {
876 offset += base.AuxInt
881 p1, off1 := baseAndOffset(p1)
882 p2, off2 := baseAndOffset(p2)
883 if isSamePtr(p1, p2) {
884 return !overlap(off1, n1, off2, n2)
886 // p1 and p2 are not the same, so if they are both OpAddrs then
887 // they point to different variables.
888 // If one pointer is on the stack and the other is an argument
889 // then they can't overlap.
891 case OpAddr, OpLocalAddr:
892 if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP {
895 return (p2.Op == OpArg || p2.Op == OpArgIntReg) && p1.Args[0].Op == OpSP
896 case OpArg, OpArgIntReg:
897 if p2.Op == OpSP || p2.Op == OpLocalAddr {
901 return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpArgIntReg || p2.Op == OpSP
906 // moveSize returns the number of bytes an aligned MOV instruction moves.
907 func moveSize(align int64, c *Config) int64 {
909 case align%8 == 0 && c.PtrSize == 8:
919 // mergePoint finds a block among a's blocks which dominates b and is itself
920 // dominated by all of a's blocks. Returns nil if it can't find one.
921 // Might return nil even if one does exist.
922 func mergePoint(b *Block, a ...*Value) *Block {
923 // Walk backward from b looking for one of the a's blocks.
929 for _, x := range a {
934 if len(b.Preds) > 1 {
935 // Don't know which way to go back. Abort.
941 return nil // too far away
943 // At this point, r is the first value in a that we find by walking backwards.
944 // if we return anything, r will be it.
947 // Keep going, counting the other a's that we find. They must all dominate r.
950 for _, x := range a {
956 // Found all of a in a backwards walk. We can return r.
959 if len(b.Preds) > 1 {
966 return nil // too far away
969 // clobber invalidates values. Returns true.
970 // clobber is used by rewrite rules to:
972 // A) make sure the values are really dead and never used again.
973 // B) decrement use counts of the values' args.
974 func clobber(vv ...*Value) bool {
975 for _, v := range vv {
977 // Note: leave v.Block intact. The Block field is used after clobber.
982 // clobberIfDead resets v when use count is 1. Returns true.
983 // clobberIfDead is used by rewrite rules to decrement
984 // use counts of v's args when v is dead and never used.
985 func clobberIfDead(v *Value) bool {
989 // Note: leave v.Block intact. The Block field is used after clobberIfDead.
993 // noteRule is an easy way to track if a rule is matched when writing
994 // new ones. Make the rule of interest also conditional on
996 // noteRule("note to self: rule of interest matched")
998 // and that message will print when the rule matches.
999 func noteRule(s string) bool {
1004 // countRule increments Func.ruleMatches[key].
1005 // If Func.ruleMatches is non-nil at the end
1006 // of compilation, it will be printed to stdout.
1007 // This is intended to make it easier to find which functions
1008 // which contain lots of rules matches when developing new rules.
1009 func countRule(v *Value, key string) bool {
1011 if f.ruleMatches == nil {
1012 f.ruleMatches = make(map[string]int)
1014 f.ruleMatches[key]++
1018 // warnRule generates compiler debug output with string s when
1019 // v is not in autogenerated code, cond is true and the rule has fired.
1020 func warnRule(cond bool, v *Value, s string) bool {
1021 if pos := v.Pos; pos.Line() > 1 && cond {
1022 v.Block.Func.Warnl(pos, s)
1027 // for a pseudo-op like (LessThan x), extract x.
1028 func flagArg(v *Value) *Value {
1029 if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() {
1035 // arm64Negate finds the complement to an ARM64 condition code,
1036 // for example !Equal -> NotEqual or !LessThan -> GreaterEqual
1038 // For floating point, it's more subtle because NaN is unordered. We do
1039 // !LessThanF -> NotLessThanF, the latter takes care of NaNs.
1040 func arm64Negate(op Op) Op {
1042 case OpARM64LessThan:
1043 return OpARM64GreaterEqual
1044 case OpARM64LessThanU:
1045 return OpARM64GreaterEqualU
1046 case OpARM64GreaterThan:
1047 return OpARM64LessEqual
1048 case OpARM64GreaterThanU:
1049 return OpARM64LessEqualU
1050 case OpARM64LessEqual:
1051 return OpARM64GreaterThan
1052 case OpARM64LessEqualU:
1053 return OpARM64GreaterThanU
1054 case OpARM64GreaterEqual:
1055 return OpARM64LessThan
1056 case OpARM64GreaterEqualU:
1057 return OpARM64LessThanU
1059 return OpARM64NotEqual
1060 case OpARM64NotEqual:
1062 case OpARM64LessThanF:
1063 return OpARM64NotLessThanF
1064 case OpARM64NotLessThanF:
1065 return OpARM64LessThanF
1066 case OpARM64LessEqualF:
1067 return OpARM64NotLessEqualF
1068 case OpARM64NotLessEqualF:
1069 return OpARM64LessEqualF
1070 case OpARM64GreaterThanF:
1071 return OpARM64NotGreaterThanF
1072 case OpARM64NotGreaterThanF:
1073 return OpARM64GreaterThanF
1074 case OpARM64GreaterEqualF:
1075 return OpARM64NotGreaterEqualF
1076 case OpARM64NotGreaterEqualF:
1077 return OpARM64GreaterEqualF
1079 panic("unreachable")
1083 // arm64Invert evaluates (InvertFlags op), which
1084 // is the same as altering the condition codes such
1085 // that the same result would be produced if the arguments
1086 // to the flag-generating instruction were reversed, e.g.
1087 // (InvertFlags (CMP x y)) -> (CMP y x)
1088 func arm64Invert(op Op) Op {
1090 case OpARM64LessThan:
1091 return OpARM64GreaterThan
1092 case OpARM64LessThanU:
1093 return OpARM64GreaterThanU
1094 case OpARM64GreaterThan:
1095 return OpARM64LessThan
1096 case OpARM64GreaterThanU:
1097 return OpARM64LessThanU
1098 case OpARM64LessEqual:
1099 return OpARM64GreaterEqual
1100 case OpARM64LessEqualU:
1101 return OpARM64GreaterEqualU
1102 case OpARM64GreaterEqual:
1103 return OpARM64LessEqual
1104 case OpARM64GreaterEqualU:
1105 return OpARM64LessEqualU
1106 case OpARM64Equal, OpARM64NotEqual:
1108 case OpARM64LessThanF:
1109 return OpARM64GreaterThanF
1110 case OpARM64GreaterThanF:
1111 return OpARM64LessThanF
1112 case OpARM64LessEqualF:
1113 return OpARM64GreaterEqualF
1114 case OpARM64GreaterEqualF:
1115 return OpARM64LessEqualF
1116 case OpARM64NotLessThanF:
1117 return OpARM64NotGreaterThanF
1118 case OpARM64NotGreaterThanF:
1119 return OpARM64NotLessThanF
1120 case OpARM64NotLessEqualF:
1121 return OpARM64NotGreaterEqualF
1122 case OpARM64NotGreaterEqualF:
1123 return OpARM64NotLessEqualF
1125 panic("unreachable")
1129 // evaluate an ARM64 op against a flags value
1130 // that is potentially constant; return 1 for true,
1131 // -1 for false, and 0 for not constant.
1132 func ccARM64Eval(op Op, flags *Value) int {
1134 if fop == OpARM64InvertFlags {
1135 return -ccARM64Eval(op, flags.Args[0])
1137 if fop != OpARM64FlagConstant {
1140 fc := flagConstant(flags.AuxInt)
1141 b2i := func(b bool) int {
1150 case OpARM64NotEqual:
1152 case OpARM64LessThan:
1154 case OpARM64LessThanU:
1155 return b2i(fc.ult())
1156 case OpARM64GreaterThan:
1158 case OpARM64GreaterThanU:
1159 return b2i(fc.ugt())
1160 case OpARM64LessEqual:
1162 case OpARM64LessEqualU:
1163 return b2i(fc.ule())
1164 case OpARM64GreaterEqual:
1166 case OpARM64GreaterEqualU:
1167 return b2i(fc.uge())
1172 // logRule logs the use of the rule s. This will only be enabled if
1173 // rewrite rules were generated with the -log option, see _gen/rulegen.go.
1174 func logRule(s string) {
1175 if ruleFile == nil {
1176 // Open a log file to write log to. We open in append
1177 // mode because all.bash runs the compiler lots of times,
1178 // and we want the concatenation of all of those logs.
1179 // This means, of course, that users need to rm the old log
1180 // to get fresh data.
1181 // TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow?
1182 w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"),
1183 os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
1189 _, err := fmt.Fprintln(ruleFile, s)
1195 var ruleFile io.Writer
1197 func min(x, y int64) int64 {
1204 func isConstZero(v *Value) bool {
1208 case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F:
1209 return v.AuxInt == 0
1214 // reciprocalExact64 reports whether 1/c is exactly representable.
1215 func reciprocalExact64(c float64) bool {
1216 b := math.Float64bits(c)
1217 man := b & (1<<52 - 1)
1219 return false // not a power of 2, denormal, or NaN
1221 exp := b >> 52 & (1<<11 - 1)
1222 // exponent bias is 0x3ff. So taking the reciprocal of a number
1223 // changes the exponent to 0x7fe-exp.
1228 return false // ±inf
1230 return false // exponent is not representable
1236 // reciprocalExact32 reports whether 1/c is exactly representable.
1237 func reciprocalExact32(c float32) bool {
1238 b := math.Float32bits(c)
1239 man := b & (1<<23 - 1)
1241 return false // not a power of 2, denormal, or NaN
1243 exp := b >> 23 & (1<<8 - 1)
1244 // exponent bias is 0x7f. So taking the reciprocal of a number
1245 // changes the exponent to 0xfe-exp.
1250 return false // ±inf
1252 return false // exponent is not representable
1258 // check if an immediate can be directly encoded into an ARM's instruction.
1259 func isARMImmRot(v uint32) bool {
1260 for i := 0; i < 16; i++ {
1270 // overlap reports whether the ranges given by the given offset and
1271 // size pairs overlap.
1272 func overlap(offset1, size1, offset2, size2 int64) bool {
1273 if offset1 >= offset2 && offset2+size2 > offset1 {
1276 if offset2 >= offset1 && offset1+size1 > offset2 {
1282 func areAdjacentOffsets(off1, off2, size int64) bool {
1283 return off1+size == off2 || off1 == off2+size
1286 // check if value zeroes out upper 32-bit of 64-bit register.
1287 // depth limits recursion depth. In AMD64.rules 3 is used as limit,
1288 // because it catches same amount of cases as 4.
1289 func zeroUpper32Bits(x *Value, depth int) bool {
1291 case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1,
1292 OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1,
1293 OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload,
1294 OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL,
1295 OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst,
1296 OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst,
1297 OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL,
1298 OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst,
1299 OpAMD64SHLL, OpAMD64SHLLconst:
1301 case OpARM64REV16W, OpARM64REVW, OpARM64RBITW, OpARM64CLZW, OpARM64EXTRWconst,
1302 OpARM64MULW, OpARM64MNEGW, OpARM64UDIVW, OpARM64DIVW, OpARM64UMODW,
1303 OpARM64MADDW, OpARM64MSUBW, OpARM64RORW, OpARM64RORWconst:
1306 return x.Type.Size() == 4
1307 case OpPhi, OpSelect0, OpSelect1:
1308 // Phis can use each-other as an arguments, instead of tracking visited values,
1309 // just limit recursion depth.
1313 for i := range x.Args {
1314 if !zeroUpper32Bits(x.Args[i], depth-1) {
1324 // zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits.
1325 func zeroUpper48Bits(x *Value, depth int) bool {
1327 case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2:
1330 return x.Type.Size() == 2
1331 case OpPhi, OpSelect0, OpSelect1:
1332 // Phis can use each-other as an arguments, instead of tracking visited values,
1333 // just limit recursion depth.
1337 for i := range x.Args {
1338 if !zeroUpper48Bits(x.Args[i], depth-1) {
1348 // zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits.
1349 func zeroUpper56Bits(x *Value, depth int) bool {
1351 case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1:
1354 return x.Type.Size() == 1
1355 case OpPhi, OpSelect0, OpSelect1:
1356 // Phis can use each-other as an arguments, instead of tracking visited values,
1357 // just limit recursion depth.
1361 for i := range x.Args {
1362 if !zeroUpper56Bits(x.Args[i], depth-1) {
1372 func isInlinableMemclr(c *Config, sz int64) bool {
1373 // TODO: expand this check to allow other architectures
1374 // see CL 454255 and issue 56997
1376 case "amd64", "arm64":
1378 case "ppc64le", "ppc64":
1384 // isInlinableMemmove reports whether the given arch performs a Move of the given size
1385 // faster than memmove. It will only return true if replacing the memmove with a Move is
1386 // safe, either because Move will do all of its loads before any of its stores, or
1387 // because the arguments are known to be disjoint.
1388 // This is used as a check for replacing memmove with Move ops.
1389 func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
1390 // It is always safe to convert memmove into Move when its arguments are disjoint.
1391 // Move ops may or may not be faster for large sizes depending on how the platform
1392 // lowers them, so we only perform this optimization on platforms that we know to
1393 // have fast Move ops.
1396 return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz))
1397 case "386", "arm64":
1399 case "s390x", "ppc64", "ppc64le":
1400 return sz <= 8 || disjoint(dst, sz, src, sz)
1401 case "arm", "loong64", "mips", "mips64", "mipsle", "mips64le":
1406 func IsInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
1407 return isInlinableMemmove(dst, src, sz, c)
1410 // logLargeCopy logs the occurrence of a large copy.
1411 // The best place to do this is in the rewrite rules where the size of the move is easy to find.
1412 // "Large" is arbitrarily chosen to be 128 bytes; this may change.
1413 func logLargeCopy(v *Value, s int64) bool {
1417 if logopt.Enabled() {
1418 logopt.LogOpt(v.Pos, "copy", "lower", v.Block.Func.Name, fmt.Sprintf("%d bytes", s))
1422 func LogLargeCopy(funcName string, pos src.XPos, s int64) {
1426 if logopt.Enabled() {
1427 logopt.LogOpt(pos, "copy", "lower", funcName, fmt.Sprintf("%d bytes", s))
1431 // hasSmallRotate reports whether the architecture has rotate instructions
1432 // for sizes < 32-bit. This is used to decide whether to promote some rotations.
1433 func hasSmallRotate(c *Config) bool {
1435 case "amd64", "386":
1442 func newPPC64ShiftAuxInt(sh, mb, me, sz int64) int32 {
1443 if sh < 0 || sh >= sz {
1444 panic("PPC64 shift arg sh out of range")
1446 if mb < 0 || mb >= sz {
1447 panic("PPC64 shift arg mb out of range")
1449 if me < 0 || me >= sz {
1450 panic("PPC64 shift arg me out of range")
1452 return int32(sh<<16 | mb<<8 | me)
1455 func GetPPC64Shiftsh(auxint int64) int64 {
1456 return int64(int8(auxint >> 16))
1459 func GetPPC64Shiftmb(auxint int64) int64 {
1460 return int64(int8(auxint >> 8))
1463 func GetPPC64Shiftme(auxint int64) int64 {
1464 return int64(int8(auxint))
1467 // Test if this value can encoded as a mask for a rlwinm like
1468 // operation. Masks can also extend from the msb and wrap to
1469 // the lsb too. That is, the valid masks are 32 bit strings
1470 // of the form: 0..01..10..0 or 1..10..01..1 or 1...1
1471 func isPPC64WordRotateMask(v64 int64) bool {
1472 // Isolate rightmost 1 (if none 0) and add.
1475 // Likewise, for the wrapping case.
1477 vpn := (vn & -vn) + vn
1478 return (v&vp == 0 || vn&vpn == 0) && v != 0
1481 // Compress mask and shift into single value of the form
1482 // me | mb<<8 | rotate<<16 | nbits<<24 where me and mb can
1483 // be used to regenerate the input mask.
1484 func encodePPC64RotateMask(rotate, mask, nbits int64) int64 {
1485 var mb, me, mbn, men int
1487 // Determine boundaries and then decode them
1488 if mask == 0 || ^mask == 0 || rotate >= nbits {
1489 panic("Invalid PPC64 rotate mask")
1490 } else if nbits == 32 {
1491 mb = bits.LeadingZeros32(uint32(mask))
1492 me = 32 - bits.TrailingZeros32(uint32(mask))
1493 mbn = bits.LeadingZeros32(^uint32(mask))
1494 men = 32 - bits.TrailingZeros32(^uint32(mask))
1496 mb = bits.LeadingZeros64(uint64(mask))
1497 me = 64 - bits.TrailingZeros64(uint64(mask))
1498 mbn = bits.LeadingZeros64(^uint64(mask))
1499 men = 64 - bits.TrailingZeros64(^uint64(mask))
1501 // Check for a wrapping mask (e.g bits at 0 and 63)
1502 if mb == 0 && me == int(nbits) {
1503 // swap the inverted values
1507 return int64(me) | int64(mb<<8) | int64(rotate<<16) | int64(nbits<<24)
1510 // DecodePPC64RotateMask is the inverse operation of encodePPC64RotateMask. The values returned as
1511 // mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask.
1512 func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) {
1513 auxint := uint64(sauxint)
1514 rotate = int64((auxint >> 16) & 0xFF)
1515 mb = int64((auxint >> 8) & 0xFF)
1516 me = int64((auxint >> 0) & 0xFF)
1517 nbits := int64((auxint >> 24) & 0xFF)
1518 mask = ((1 << uint(nbits-mb)) - 1) ^ ((1 << uint(nbits-me)) - 1)
1523 mask = uint64(uint32(mask))
1526 // Fixup ME to match ISA definition. The second argument to MASK(..,me)
1528 me = (me - 1) & (nbits - 1)
1532 // This verifies that the mask is a set of
1533 // consecutive bits including the least
1535 func isPPC64ValidShiftMask(v int64) bool {
1536 if (v != 0) && ((v+1)&v) == 0 {
1542 func getPPC64ShiftMaskLength(v int64) int64 {
1543 return int64(bits.Len64(uint64(v)))
1546 // Decompose a shift right into an equivalent rotate/mask,
1547 // and return mask & m.
1548 func mergePPC64RShiftMask(m, s, nbits int64) int64 {
1549 smask := uint64((1<<uint(nbits))-1) >> uint(s)
1550 return m & int64(smask)
1553 // Combine (ANDconst [m] (SRWconst [s])) into (RLWINM [y]) or return 0
1554 func mergePPC64AndSrwi(m, s int64) int64 {
1555 mask := mergePPC64RShiftMask(m, s, 32)
1556 if !isPPC64WordRotateMask(mask) {
1559 return encodePPC64RotateMask((32-s)&31, mask, 32)
1562 // Test if a shift right feeding into a CLRLSLDI can be merged into RLWINM.
1563 // Return the encoded RLWINM constant, or 0 if they cannot be merged.
1564 func mergePPC64ClrlsldiSrw(sld, srw int64) int64 {
1565 mask_1 := uint64(0xFFFFFFFF >> uint(srw))
1566 // for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left.
1567 mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
1569 // Rewrite mask to apply after the final left shift.
1570 mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
1573 r_2 := GetPPC64Shiftsh(sld)
1574 r_3 := (r_1 + r_2) & 31 // This can wrap.
1576 if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
1579 return encodePPC64RotateMask(int64(r_3), int64(mask_3), 32)
1582 // Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM. Return
1583 // the encoded RLWINM constant, or 0 if they cannot be merged.
1584 func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 {
1585 r_1, _, _, mask_1 := DecodePPC64RotateMask(rlw)
1586 // for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left.
1587 mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
1589 // combine the masks, and adjust for the final left shift.
1590 mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(int64(sld)))
1591 r_2 := GetPPC64Shiftsh(int64(sld))
1592 r_3 := (r_1 + r_2) & 31 // This can wrap.
1594 // Verify the result is still a valid bitmask of <= 32 bits.
1595 if !isPPC64WordRotateMask(int64(mask_3)) || uint64(uint32(mask_3)) != mask_3 {
1598 return encodePPC64RotateMask(r_3, int64(mask_3), 32)
1601 // Compute the encoded RLWINM constant from combining (SLDconst [sld] (SRWconst [srw] x)),
1602 // or return 0 if they cannot be combined.
1603 func mergePPC64SldiSrw(sld, srw int64) int64 {
1604 if sld > srw || srw >= 32 {
1607 mask_r := uint32(0xFFFFFFFF) >> uint(srw)
1608 mask_l := uint32(0xFFFFFFFF) >> uint(sld)
1609 mask := (mask_r & mask_l) << uint(sld)
1610 return encodePPC64RotateMask((32-srw+sld)&31, int64(mask), 32)
1613 // Convenience function to rotate a 32 bit constant value by another constant.
1614 func rotateLeft32(v, rotate int64) int64 {
1615 return int64(bits.RotateLeft32(uint32(v), int(rotate)))
1618 func rotateRight64(v, rotate int64) int64 {
1619 return int64(bits.RotateLeft64(uint64(v), int(-rotate)))
1622 // encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format.
1623 func armBFAuxInt(lsb, width int64) arm64BitField {
1624 if lsb < 0 || lsb > 63 {
1625 panic("ARM(64) bit field lsb constant out of range")
1627 if width < 1 || lsb+width > 64 {
1628 panic("ARM(64) bit field width constant out of range")
1630 return arm64BitField(width | lsb<<8)
1633 // returns the lsb part of the auxInt field of arm64 bitfield ops.
1634 func (bfc arm64BitField) getARM64BFlsb() int64 {
1635 return int64(uint64(bfc) >> 8)
1638 // returns the width part of the auxInt field of arm64 bitfield ops.
1639 func (bfc arm64BitField) getARM64BFwidth() int64 {
1640 return int64(bfc) & 0xff
1643 // checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask.
1644 func isARM64BFMask(lsb, mask, rshift int64) bool {
1645 shiftedMask := int64(uint64(mask) >> uint64(rshift))
1646 return shiftedMask != 0 && isPowerOfTwo64(shiftedMask+1) && nto(shiftedMask)+lsb < 64
1649 // returns the bitfield width of mask >> rshift for arm64 bitfield ops.
1650 func arm64BFWidth(mask, rshift int64) int64 {
1651 shiftedMask := int64(uint64(mask) >> uint64(rshift))
1652 if shiftedMask == 0 {
1653 panic("ARM64 BF mask is zero")
1655 return nto(shiftedMask)
1658 // sizeof returns the size of t in bytes.
1659 // It will panic if t is not a *types.Type.
1660 func sizeof(t interface{}) int64 {
1661 return t.(*types.Type).Size()
1664 // registerizable reports whether t is a primitive type that fits in
1665 // a register. It assumes float64 values will always fit into registers
1666 // even if that isn't strictly true.
1667 func registerizable(b *Block, typ *types.Type) bool {
1668 if typ.IsPtrShaped() || typ.IsFloat() || typ.IsBoolean() {
1671 if typ.IsInteger() {
1672 return typ.Size() <= b.Func.Config.RegSize
1677 // needRaceCleanup reports whether this call to racefuncenter/exit isn't needed.
1678 func needRaceCleanup(sym *AuxCall, v *Value) bool {
1683 if !isSameCall(sym, "runtime.racefuncenter") && !isSameCall(sym, "runtime.racefuncexit") {
1686 for _, b := range f.Blocks {
1687 for _, v := range b.Values {
1689 case OpStaticCall, OpStaticLECall:
1690 // Check for racefuncenter will encounter racefuncexit and vice versa.
1691 // Allow calls to panic*
1692 s := v.Aux.(*AuxCall).Fn.String()
1694 case "runtime.racefuncenter", "runtime.racefuncexit",
1695 "runtime.panicdivide", "runtime.panicwrap",
1696 "runtime.panicshift":
1699 // If we encountered any call, we need to keep racefunc*,
1700 // for accurate stacktraces.
1702 case OpPanicBounds, OpPanicExtend:
1703 // Note: these are panic generators that are ok (like the static calls above).
1704 case OpClosureCall, OpInterCall, OpClosureLECall, OpInterLECall:
1705 // We must keep the race functions if there are any other call types.
1710 if isSameCall(sym, "runtime.racefuncenter") {
1711 // TODO REGISTER ABI this needs to be cleaned up.
1712 // If we're removing racefuncenter, remove its argument as well.
1713 if v.Args[0].Op != OpStore {
1714 if v.Op == OpStaticLECall {
1715 // there is no store, yet.
1720 mem := v.Args[0].Args[2]
1721 v.Args[0].reset(OpCopy)
1722 v.Args[0].AddArg(mem)
1727 // symIsRO reports whether sym is a read-only global.
1728 func symIsRO(sym interface{}) bool {
1729 lsym := sym.(*obj.LSym)
1730 return lsym.Type == objabi.SRODATA && len(lsym.R) == 0
1733 // symIsROZero reports whether sym is a read-only global whose data contains all zeros.
1734 func symIsROZero(sym Sym) bool {
1735 lsym := sym.(*obj.LSym)
1736 if lsym.Type != objabi.SRODATA || len(lsym.R) != 0 {
1739 for _, b := range lsym.P {
1747 // read8 reads one byte from the read-only global sym at offset off.
1748 func read8(sym interface{}, off int64) uint8 {
1749 lsym := sym.(*obj.LSym)
1750 if off >= int64(len(lsym.P)) || off < 0 {
1751 // Invalid index into the global sym.
1752 // This can happen in dead code, so we don't want to panic.
1753 // Just return any value, it will eventually get ignored.
1760 // read16 reads two bytes from the read-only global sym at offset off.
1761 func read16(sym interface{}, off int64, byteorder binary.ByteOrder) uint16 {
1762 lsym := sym.(*obj.LSym)
1763 // lsym.P is written lazily.
1764 // Bytes requested after the end of lsym.P are 0.
1766 if 0 <= off && off < int64(len(lsym.P)) {
1769 buf := make([]byte, 2)
1771 return byteorder.Uint16(buf)
1774 // read32 reads four bytes from the read-only global sym at offset off.
1775 func read32(sym interface{}, off int64, byteorder binary.ByteOrder) uint32 {
1776 lsym := sym.(*obj.LSym)
1778 if 0 <= off && off < int64(len(lsym.P)) {
1781 buf := make([]byte, 4)
1783 return byteorder.Uint32(buf)
1786 // read64 reads eight bytes from the read-only global sym at offset off.
1787 func read64(sym interface{}, off int64, byteorder binary.ByteOrder) uint64 {
1788 lsym := sym.(*obj.LSym)
1790 if 0 <= off && off < int64(len(lsym.P)) {
1793 buf := make([]byte, 8)
1795 return byteorder.Uint64(buf)
1798 // sequentialAddresses reports true if it can prove that x + n == y
1799 func sequentialAddresses(x, y *Value, n int64) bool {
1800 if x == y && n == 0 {
1803 if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil &&
1804 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1805 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1808 if x.Op == Op386LEAL1 && y.Op == Op386LEAL1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
1809 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1810 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1813 if x.Op == OpAMD64ADDQ && y.Op == OpAMD64LEAQ1 && y.AuxInt == n && y.Aux == nil &&
1814 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1815 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1818 if x.Op == OpAMD64LEAQ1 && y.Op == OpAMD64LEAQ1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
1819 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1820 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1826 // flagConstant represents the result of a compile-time comparison.
1827 // The sense of these flags does not necessarily represent the hardware's notion
1828 // of a flags register - these are just a compile-time construct.
1829 // We happen to match the semantics to those of arm/arm64.
1830 // Note that these semantics differ from x86: the carry flag has the opposite
1831 // sense on a subtraction!
1833 // On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C.
1834 // On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C.
1835 // (because it does x + ^y + C).
1837 // See https://en.wikipedia.org/wiki/Carry_flag#Vs._borrow_flag
1838 type flagConstant uint8
1840 // N reports whether the result of an operation is negative (high bit set).
1841 func (fc flagConstant) N() bool {
1845 // Z reports whether the result of an operation is 0.
1846 func (fc flagConstant) Z() bool {
1850 // C reports whether an unsigned add overflowed (carry), or an
1851 // unsigned subtract did not underflow (borrow).
1852 func (fc flagConstant) C() bool {
1856 // V reports whether a signed operation overflowed or underflowed.
1857 func (fc flagConstant) V() bool {
1861 func (fc flagConstant) eq() bool {
1864 func (fc flagConstant) ne() bool {
1867 func (fc flagConstant) lt() bool {
1868 return fc.N() != fc.V()
1870 func (fc flagConstant) le() bool {
1871 return fc.Z() || fc.lt()
1873 func (fc flagConstant) gt() bool {
1874 return !fc.Z() && fc.ge()
1876 func (fc flagConstant) ge() bool {
1877 return fc.N() == fc.V()
1879 func (fc flagConstant) ult() bool {
1882 func (fc flagConstant) ule() bool {
1883 return fc.Z() || fc.ult()
1885 func (fc flagConstant) ugt() bool {
1886 return !fc.Z() && fc.uge()
1888 func (fc flagConstant) uge() bool {
1892 func (fc flagConstant) ltNoov() bool {
1893 return fc.lt() && !fc.V()
1895 func (fc flagConstant) leNoov() bool {
1896 return fc.le() && !fc.V()
1898 func (fc flagConstant) gtNoov() bool {
1899 return fc.gt() && !fc.V()
1901 func (fc flagConstant) geNoov() bool {
1902 return fc.ge() && !fc.V()
1905 func (fc flagConstant) String() string {
1906 return fmt.Sprintf("N=%v,Z=%v,C=%v,V=%v", fc.N(), fc.Z(), fc.C(), fc.V())
1909 type flagConstantBuilder struct {
1916 func (fcs flagConstantBuilder) encode() flagConstant {
1933 // Note: addFlags(x,y) != subFlags(x,-y) in some situations:
1934 // - the results of the C flag are different
1935 // - the results of the V flag when y==minint are different
1937 // addFlags64 returns the flags that would be set from computing x+y.
1938 func addFlags64(x, y int64) flagConstant {
1939 var fcb flagConstantBuilder
1942 fcb.C = uint64(x+y) < uint64(x)
1943 fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
1947 // subFlags64 returns the flags that would be set from computing x-y.
1948 func subFlags64(x, y int64) flagConstant {
1949 var fcb flagConstantBuilder
1952 fcb.C = uint64(y) <= uint64(x) // This code follows the arm carry flag model.
1953 fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
1957 // addFlags32 returns the flags that would be set from computing x+y.
1958 func addFlags32(x, y int32) flagConstant {
1959 var fcb flagConstantBuilder
1962 fcb.C = uint32(x+y) < uint32(x)
1963 fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
1967 // subFlags32 returns the flags that would be set from computing x-y.
1968 func subFlags32(x, y int32) flagConstant {
1969 var fcb flagConstantBuilder
1972 fcb.C = uint32(y) <= uint32(x) // This code follows the arm carry flag model.
1973 fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
1977 // logicFlags64 returns flags set to the sign/zeroness of x.
1978 // C and V are set to false.
1979 func logicFlags64(x int64) flagConstant {
1980 var fcb flagConstantBuilder
1986 // logicFlags32 returns flags set to the sign/zeroness of x.
1987 // C and V are set to false.
1988 func logicFlags32(x int32) flagConstant {
1989 var fcb flagConstantBuilder
1995 func makeJumpTableSym(b *Block) *obj.LSym {
1996 s := base.Ctxt.Lookup(fmt.Sprintf("%s.jump%d", b.Func.fe.LSym(), b.ID))
1997 s.Set(obj.AttrDuplicateOK, true)
1998 s.Set(obj.AttrLocal, true)
2002 // canRotate reports whether the architecture supports
2003 // rotates of integer registers with the given number of bits.
2004 func canRotate(c *Config, bits int64) bool {
2005 if bits > c.PtrSize*8 {
2006 // Don't rewrite to rotates bigger than the machine word.
2010 case "386", "amd64", "arm64":
2012 case "arm", "s390x", "ppc64", "ppc64le", "wasm", "loong64":
2019 // isARM64bitcon reports whether a constant can be encoded into a logical instruction.
2020 func isARM64bitcon(x uint64) bool {
2021 if x == 1<<64-1 || x == 0 {
2024 // determine the period and sign-extend a unit to 64 bits
2026 case x != x>>32|x<<32:
2029 case x != x>>16|x<<48:
2031 x = uint64(int64(int32(x)))
2032 case x != x>>8|x<<56:
2034 x = uint64(int64(int16(x)))
2035 case x != x>>4|x<<60:
2037 x = uint64(int64(int8(x)))
2039 // period is 4 or 2, always true
2040 // 0001, 0010, 0100, 1000 -- 0001 rotate
2041 // 0011, 0110, 1100, 1001 -- 0011 rotate
2042 // 0111, 1011, 1101, 1110 -- 0111 rotate
2043 // 0101, 1010 -- 01 rotate, repeat
2046 return sequenceOfOnes(x) || sequenceOfOnes(^x)
2049 // sequenceOfOnes tests whether a constant is a sequence of ones in binary, with leading and trailing zeros.
2050 func sequenceOfOnes(x uint64) bool {
2051 y := x & -x // lowest set bit of x. x is good iff x+y is a power of 2
2056 // isARM64addcon reports whether x can be encoded as the immediate value in an ADD or SUB instruction.
2057 func isARM64addcon(v int64) bool {
2058 /* uimm12 or uimm24? */
2062 if (v & 0xFFF) == 0 {