1 // Copyright 2015 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
8 "cmd/compile/internal/base"
9 "cmd/compile/internal/logopt"
10 "cmd/compile/internal/types"
12 "cmd/internal/obj/s390x"
24 type deadValueChoice bool
27 leaveDeadValues deadValueChoice = false
28 removeDeadValues = true
31 // deadcode indicates whether rewrite should try to remove any values that become dead.
32 func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValueChoice) {
33 // repeat rewrites until we find no more rewrites
34 pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block
38 fmt.Printf("%s: rewriting for %s\n", f.pass.name, f.Name)
41 var states map[string]bool
45 for _, b := range f.Blocks {
50 b0.Succs = append([]Edge{}, b.Succs...) // make a new copy, not aliasing
52 for i, c := range b.ControlValues() {
55 b.ReplaceControl(i, c)
61 fmt.Printf("rewriting %s -> %s\n", b0.LongString(), b.LongString())
64 for j, v := range b.Values {
69 v0.Args = append([]*Value{}, v.Args...) // make a new copy, not aliasing
71 if v.Uses == 0 && v.removeable() {
72 if v.Op != OpInvalid && deadcode == removeDeadValues {
73 // Reset any values that are now unused, so that we decrement
74 // the use count of all of its arguments.
75 // Not quite a deadcode pass, because it does not handle cycles.
76 // But it should help Uses==1 rules to fire.
80 // No point rewriting values which aren't used.
84 vchange := phielimValue(v)
85 if vchange && debug > 1 {
86 fmt.Printf("rewriting %s -> %s\n", v0.LongString(), v.LongString())
89 // Eliminate copy inputs.
90 // If any copy input becomes unused, mark it
91 // as invalid and discard its argument. Repeat
92 // recursively on the discarded argument.
93 // This phase helps remove phantom "dead copy" uses
94 // of a value so that a x.Uses==1 rule condition
96 for i, a := range v.Args {
102 // If a, a copy, has a line boundary indicator, attempt to find a new value
103 // to hold it. The first candidate is the value that will replace a (aa),
104 // if it shares the same block and line and is eligible.
105 // The second option is v, which has a as an input. Because aa is earlier in
106 // the data flow, it is the better choice.
107 if a.Pos.IsStmt() == src.PosIsStmt {
108 if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt {
109 aa.Pos = aa.Pos.WithIsStmt()
110 } else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt {
111 v.Pos = v.Pos.WithIsStmt()
113 // Record the lost line and look for a new home after all rewrites are complete.
114 // TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same
115 // line to appear in more than one block, but only one block is stored, so if both end
116 // up here, then one will be lost.
117 pendingLines.set(a.Pos, int32(a.Block.ID))
119 a.Pos = a.Pos.WithNotStmt()
128 if vchange && debug > 1 {
129 fmt.Printf("rewriting %s -> %s\n", v0.LongString(), v.LongString())
132 // apply rewrite function
135 // If value changed to a poor choice for a statement boundary, move the boundary
136 if v.Pos.IsStmt() == src.PosIsStmt {
137 if k := nextGoodStatementIndex(v, j, b); k != j {
138 v.Pos = v.Pos.WithNotStmt()
139 b.Values[k].Pos = b.Values[k].Pos.WithIsStmt()
144 change = change || vchange
145 if vchange && debug > 1 {
146 fmt.Printf("rewriting %s -> %s\n", v0.LongString(), v.LongString())
150 if !change && !deadChange {
154 if (iters > 1000 || debug >= 2) && change {
155 // We've done a suspiciously large number of rewrites (or we're in debug mode).
156 // As of Sep 2021, 90% of rewrites complete in 4 iterations or fewer
157 // and the maximum value encountered during make.bash is 12.
158 // Start checking for cycles. (This is too expensive to do routinely.)
159 // Note: we avoid this path for deadChange-only iterations, to fix #51639.
161 states = make(map[string]bool)
164 if _, ok := states[h]; ok {
165 // We've found a cycle.
166 // To diagnose it, set debug to 2 and start again,
167 // so that we'll print all rules applied until we complete another cycle.
168 // If debug is already >= 2, we've already done that, so it's time to crash.
171 states = make(map[string]bool)
173 f.Fatalf("rewrite cycle detected")
179 // remove clobbered values
180 for _, b := range f.Blocks {
182 for i, v := range b.Values {
184 if v.Op == OpInvalid {
185 if v.Pos.IsStmt() == src.PosIsStmt {
186 pendingLines.set(vl, int32(b.ID))
191 if v.Pos.IsStmt() != src.PosNotStmt && !notStmtBoundary(v.Op) && pendingLines.get(vl) == int32(b.ID) {
192 pendingLines.remove(vl)
193 v.Pos = v.Pos.WithIsStmt()
200 if pendingLines.get(b.Pos) == int32(b.ID) {
201 b.Pos = b.Pos.WithIsStmt()
202 pendingLines.remove(b.Pos)
208 // Common functions called from rewriting rules
210 func is64BitFloat(t *types.Type) bool {
211 return t.Size() == 8 && t.IsFloat()
214 func is32BitFloat(t *types.Type) bool {
215 return t.Size() == 4 && t.IsFloat()
218 func is64BitInt(t *types.Type) bool {
219 return t.Size() == 8 && t.IsInteger()
222 func is32BitInt(t *types.Type) bool {
223 return t.Size() == 4 && t.IsInteger()
226 func is16BitInt(t *types.Type) bool {
227 return t.Size() == 2 && t.IsInteger()
230 func is8BitInt(t *types.Type) bool {
231 return t.Size() == 1 && t.IsInteger()
234 func isPtr(t *types.Type) bool {
235 return t.IsPtrShaped()
238 // mergeSym merges two symbolic offsets. There is no real merging of
239 // offsets, we just pick the non-nil one.
240 func mergeSym(x, y Sym) Sym {
247 panic(fmt.Sprintf("mergeSym with two non-nil syms %v %v", x, y))
250 func canMergeSym(x, y Sym) bool {
251 return x == nil || y == nil
254 // canMergeLoadClobber reports whether the load can be merged into target without
255 // invalidating the schedule.
256 // It also checks that the other non-load argument x is something we
257 // are ok with clobbering.
258 func canMergeLoadClobber(target, load, x *Value) bool {
259 // The register containing x is going to get clobbered.
260 // Don't merge if we still need the value of x.
261 // We don't have liveness information here, but we can
262 // approximate x dying with:
263 // 1) target is x's only use.
264 // 2) target is not in a deeper loop than x.
268 loopnest := x.Block.Func.loopnest()
269 loopnest.calculateDepths()
270 if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
273 return canMergeLoad(target, load)
276 // canMergeLoad reports whether the load can be merged into target without
277 // invalidating the schedule.
278 func canMergeLoad(target, load *Value) bool {
279 if target.Block.ID != load.Block.ID {
280 // If the load is in a different block do not merge it.
284 // We can't merge the load into the target if the load
285 // has more than one use.
290 mem := load.MemoryArg()
292 // We need the load's memory arg to still be alive at target. That
293 // can't be the case if one of target's args depends on a memory
294 // state that is a successor of load's memory arg.
296 // For example, it would be invalid to merge load into target in
297 // the following situation because newmem has killed oldmem
298 // before target is reached:
299 // load = read ... oldmem
300 // newmem = write ... oldmem
301 // arg0 = read ... newmem
302 // target = add arg0 load
304 // If the argument comes from a different block then we can exclude
305 // it immediately because it must dominate load (which is in the
306 // same block as target).
308 for _, a := range target.Args {
309 if a != load && a.Block.ID == target.Block.ID {
310 args = append(args, a)
314 // memPreds contains memory states known to be predecessors of load's
315 // memory state. It is lazily initialized.
316 var memPreds map[*Value]bool
317 for i := 0; len(args) > 0; i++ {
320 // Give up if we have done a lot of iterations.
323 v := args[len(args)-1]
324 args = args[:len(args)-1]
325 if target.Block.ID != v.Block.ID {
326 // Since target and load are in the same block
327 // we can stop searching when we leave the block.
331 // A Phi implies we have reached the top of the block.
332 // The memory phi, if it exists, is always
333 // the first logical store in the block.
336 if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
337 // We could handle this situation however it is likely
341 if v.Op.SymEffect()&SymAddr != 0 {
342 // This case prevents an operation that calculates the
343 // address of a local variable from being forced to schedule
344 // before its corresponding VarDef.
350 // We don't want to combine the CMPQ with the load, because
351 // that would force the CMPQ to schedule before the VARDEF, which
352 // in turn requires the LEAQ to schedule before the VARDEF.
355 if v.Type.IsMemory() {
357 // Initialise a map containing memory states
358 // known to be predecessors of load's memory
360 memPreds = make(map[*Value]bool)
363 for i := 0; i < limit; i++ {
365 // The memory phi, if it exists, is always
366 // the first logical store in the block.
369 if m.Block.ID != target.Block.ID {
372 if !m.Type.IsMemory() {
376 if len(m.Args) == 0 {
383 // We can merge if v is a predecessor of mem.
385 // For example, we can merge load into target in the
386 // following scenario:
389 // load = read ... mem
390 // target = add x load
396 if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
397 // If v takes mem as an input then we know mem
398 // is valid at this point.
401 for _, a := range v.Args {
402 if target.Block.ID == a.Block.ID {
403 args = append(args, a)
411 // isSameCall reports whether sym is the same as the given named symbol.
412 func isSameCall(sym interface{}, name string) bool {
413 fn := sym.(*AuxCall).Fn
414 return fn != nil && fn.String() == name
417 // canLoadUnaligned reports if the architecture supports unaligned load operations.
418 func canLoadUnaligned(c *Config) bool {
419 return c.ctxt.Arch.Alignment == 1
422 // nlzX returns the number of leading zeros.
423 func nlz64(x int64) int { return bits.LeadingZeros64(uint64(x)) }
424 func nlz32(x int32) int { return bits.LeadingZeros32(uint32(x)) }
425 func nlz16(x int16) int { return bits.LeadingZeros16(uint16(x)) }
426 func nlz8(x int8) int { return bits.LeadingZeros8(uint8(x)) }
428 // ntzX returns the number of trailing zeros.
429 func ntz64(x int64) int { return bits.TrailingZeros64(uint64(x)) }
430 func ntz32(x int32) int { return bits.TrailingZeros32(uint32(x)) }
431 func ntz16(x int16) int { return bits.TrailingZeros16(uint16(x)) }
432 func ntz8(x int8) int { return bits.TrailingZeros8(uint8(x)) }
434 func oneBit(x int64) bool { return x&(x-1) == 0 && x != 0 }
435 func oneBit8(x int8) bool { return x&(x-1) == 0 && x != 0 }
436 func oneBit16(x int16) bool { return x&(x-1) == 0 && x != 0 }
437 func oneBit32(x int32) bool { return x&(x-1) == 0 && x != 0 }
438 func oneBit64(x int64) bool { return x&(x-1) == 0 && x != 0 }
440 // nto returns the number of trailing ones.
441 func nto(x int64) int64 {
442 return int64(ntz64(^x))
445 // logX returns logarithm of n base 2.
446 // n must be a positive power of 2 (isPowerOfTwoX returns true).
447 func log8(n int8) int64 {
448 return int64(bits.Len8(uint8(n))) - 1
450 func log16(n int16) int64 {
451 return int64(bits.Len16(uint16(n))) - 1
453 func log32(n int32) int64 {
454 return int64(bits.Len32(uint32(n))) - 1
456 func log64(n int64) int64 {
457 return int64(bits.Len64(uint64(n))) - 1
460 // log2uint32 returns logarithm in base 2 of uint32(n), with log2(0) = -1.
462 func log2uint32(n int64) int64 {
463 return int64(bits.Len32(uint32(n))) - 1
466 // isPowerOfTwoX functions report whether n is a power of 2.
467 func isPowerOfTwo8(n int8) bool {
468 return n > 0 && n&(n-1) == 0
470 func isPowerOfTwo16(n int16) bool {
471 return n > 0 && n&(n-1) == 0
473 func isPowerOfTwo32(n int32) bool {
474 return n > 0 && n&(n-1) == 0
476 func isPowerOfTwo64(n int64) bool {
477 return n > 0 && n&(n-1) == 0
480 // isUint64PowerOfTwo reports whether uint64(n) is a power of 2.
481 func isUint64PowerOfTwo(in int64) bool {
483 return n > 0 && n&(n-1) == 0
486 // isUint32PowerOfTwo reports whether uint32(n) is a power of 2.
487 func isUint32PowerOfTwo(in int64) bool {
488 n := uint64(uint32(in))
489 return n > 0 && n&(n-1) == 0
492 // is32Bit reports whether n can be represented as a signed 32 bit integer.
493 func is32Bit(n int64) bool {
494 return n == int64(int32(n))
497 // is16Bit reports whether n can be represented as a signed 16 bit integer.
498 func is16Bit(n int64) bool {
499 return n == int64(int16(n))
502 // is8Bit reports whether n can be represented as a signed 8 bit integer.
503 func is8Bit(n int64) bool {
504 return n == int64(int8(n))
507 // isU8Bit reports whether n can be represented as an unsigned 8 bit integer.
508 func isU8Bit(n int64) bool {
509 return n == int64(uint8(n))
512 // isU12Bit reports whether n can be represented as an unsigned 12 bit integer.
513 func isU12Bit(n int64) bool {
514 return 0 <= n && n < (1<<12)
517 // isU16Bit reports whether n can be represented as an unsigned 16 bit integer.
518 func isU16Bit(n int64) bool {
519 return n == int64(uint16(n))
522 // isU32Bit reports whether n can be represented as an unsigned 32 bit integer.
523 func isU32Bit(n int64) bool {
524 return n == int64(uint32(n))
527 // is20Bit reports whether n can be represented as a signed 20 bit integer.
528 func is20Bit(n int64) bool {
529 return -(1<<19) <= n && n < (1<<19)
532 // b2i translates a boolean value to 0 or 1 for assigning to auxInt.
533 func b2i(b bool) int64 {
540 // b2i32 translates a boolean value to 0 or 1.
541 func b2i32(b bool) int32 {
548 // shiftIsBounded reports whether (left/right) shift Value v is known to be bounded.
549 // A shift is bounded if it is shifting by less than the width of the shifted value.
550 func shiftIsBounded(v *Value) bool {
554 // canonLessThan returns whether x is "ordered" less than y, for purposes of normalizing
555 // generated code as much as possible.
556 func canonLessThan(x, y *Value) bool {
560 if !x.Pos.SameFileAndLine(y.Pos) {
561 return x.Pos.Before(y.Pos)
566 // truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern
567 // of the mantissa. It will panic if the truncation results in lost information.
568 func truncate64Fto32F(f float64) float32 {
569 if !isExactFloat32(f) {
570 panic("truncate64Fto32F: truncation is not exact")
575 // NaN bit patterns aren't necessarily preserved across conversion
576 // instructions so we need to do the conversion manually.
577 b := math.Float64bits(f)
578 m := b & ((1 << 52) - 1) // mantissa (a.k.a. significand)
579 // | sign | exponent | mantissa |
580 r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23)))
581 return math.Float32frombits(r)
584 // extend32Fto64F converts a float32 value to a float64 value preserving the bit
585 // pattern of the mantissa.
586 func extend32Fto64F(f float32) float64 {
587 if !math.IsNaN(float64(f)) {
590 // NaN bit patterns aren't necessarily preserved across conversion
591 // instructions so we need to do the conversion manually.
592 b := uint64(math.Float32bits(f))
593 // | sign | exponent | mantissa |
594 r := ((b << 32) & (1 << 63)) | (0x7ff << 52) | ((b & 0x7fffff) << (52 - 23))
595 return math.Float64frombits(r)
598 // DivisionNeedsFixUp reports whether the division needs fix-up code.
599 func DivisionNeedsFixUp(v *Value) bool {
603 // auxFrom64F encodes a float64 value so it can be stored in an AuxInt.
604 func auxFrom64F(f float64) int64 {
606 panic("can't encode a NaN in AuxInt field")
608 return int64(math.Float64bits(f))
611 // auxFrom32F encodes a float32 value so it can be stored in an AuxInt.
612 func auxFrom32F(f float32) int64 {
614 panic("can't encode a NaN in AuxInt field")
616 return int64(math.Float64bits(extend32Fto64F(f)))
619 // auxTo32F decodes a float32 from the AuxInt value provided.
620 func auxTo32F(i int64) float32 {
621 return truncate64Fto32F(math.Float64frombits(uint64(i)))
624 // auxTo64F decodes a float64 from the AuxInt value provided.
625 func auxTo64F(i int64) float64 {
626 return math.Float64frombits(uint64(i))
629 func auxIntToBool(i int64) bool {
635 func auxIntToInt8(i int64) int8 {
638 func auxIntToInt16(i int64) int16 {
641 func auxIntToInt32(i int64) int32 {
644 func auxIntToInt64(i int64) int64 {
647 func auxIntToUint8(i int64) uint8 {
650 func auxIntToFloat32(i int64) float32 {
651 return float32(math.Float64frombits(uint64(i)))
653 func auxIntToFloat64(i int64) float64 {
654 return math.Float64frombits(uint64(i))
656 func auxIntToValAndOff(i int64) ValAndOff {
659 func auxIntToArm64BitField(i int64) arm64BitField {
660 return arm64BitField(i)
662 func auxIntToInt128(x int64) int128 {
664 panic("nonzero int128 not allowed")
668 func auxIntToFlagConstant(x int64) flagConstant {
669 return flagConstant(x)
672 func auxIntToOp(cc int64) Op {
676 func boolToAuxInt(b bool) int64 {
682 func int8ToAuxInt(i int8) int64 {
685 func int16ToAuxInt(i int16) int64 {
688 func int32ToAuxInt(i int32) int64 {
691 func int64ToAuxInt(i int64) int64 {
694 func uint8ToAuxInt(i uint8) int64 {
695 return int64(int8(i))
697 func float32ToAuxInt(f float32) int64 {
698 return int64(math.Float64bits(float64(f)))
700 func float64ToAuxInt(f float64) int64 {
701 return int64(math.Float64bits(f))
703 func valAndOffToAuxInt(v ValAndOff) int64 {
706 func arm64BitFieldToAuxInt(v arm64BitField) int64 {
709 func int128ToAuxInt(x int128) int64 {
711 panic("nonzero int128 not allowed")
715 func flagConstantToAuxInt(x flagConstant) int64 {
719 func opToAuxInt(o Op) int64 {
723 // Aux is an interface to hold miscellaneous data in Blocks and Values.
728 // for now only used to mark moves that need to avoid clobbering flags
731 func (auxMark) CanBeAnSSAAux() {}
735 // stringAux wraps string values for use in Aux.
736 type stringAux string
738 func (stringAux) CanBeAnSSAAux() {}
740 func auxToString(i Aux) string {
741 return string(i.(stringAux))
743 func auxToSym(i Aux) Sym {
744 // TODO: kind of a hack - allows nil interface through
748 func auxToType(i Aux) *types.Type {
749 return i.(*types.Type)
751 func auxToCall(i Aux) *AuxCall {
754 func auxToS390xCCMask(i Aux) s390x.CCMask {
755 return i.(s390x.CCMask)
757 func auxToS390xRotateParams(i Aux) s390x.RotateParams {
758 return i.(s390x.RotateParams)
761 func StringToAux(s string) Aux {
764 func symToAux(s Sym) Aux {
767 func callToAux(s *AuxCall) Aux {
770 func typeToAux(t *types.Type) Aux {
773 func s390xCCMaskToAux(c s390x.CCMask) Aux {
776 func s390xRotateParamsToAux(r s390x.RotateParams) Aux {
780 // uaddOvf reports whether unsigned a+b would overflow.
781 func uaddOvf(a, b int64) bool {
782 return uint64(a)+uint64(b) < uint64(a)
785 // loadLSymOffset simulates reading a word at an offset into a
786 // read-only symbol's runtime memory. If it would read a pointer to
787 // another symbol, that symbol is returned. Otherwise, it returns nil.
788 func loadLSymOffset(lsym *obj.LSym, offset int64) *obj.LSym {
789 if lsym.Type != objabi.SRODATA {
793 for _, r := range lsym.R {
794 if int64(r.Off) == offset && r.Type&^objabi.R_WEAK == objabi.R_ADDR && r.Add == 0 {
802 // de-virtualize an InterLECall
803 // 'sym' is the symbol for the itab.
804 func devirtLESym(v *Value, aux Aux, sym Sym, offset int64) *obj.LSym {
805 n, ok := sym.(*obj.LSym)
810 lsym := loadLSymOffset(n, offset)
811 if f := v.Block.Func; f.pass.debug > 0 {
813 f.Warnl(v.Pos, "de-virtualizing call")
815 f.Warnl(v.Pos, "couldn't de-virtualize call")
821 func devirtLECall(v *Value, sym *obj.LSym) *Value {
822 v.Op = OpStaticLECall
823 auxcall := v.Aux.(*AuxCall)
827 copy(v.Args[0:], v.Args[1:])
828 v.Args[len(v.Args)-1] = nil // aid GC
829 v.Args = v.Args[:len(v.Args)-1]
833 // isSamePtr reports whether p1 and p2 point to the same address.
834 func isSamePtr(p1, p2 *Value) bool {
843 return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
844 case OpAddr, OpLocalAddr:
845 return p1.Aux == p2.Aux
847 return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
852 func isStackPtr(v *Value) bool {
853 for v.Op == OpOffPtr || v.Op == OpAddPtr {
856 return v.Op == OpSP || v.Op == OpLocalAddr
859 // disjoint reports whether the memory region specified by [p1:p1+n1)
860 // does not overlap with [p2:p2+n2).
861 // A return value of false does not imply the regions overlap.
862 func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool {
863 if n1 == 0 || n2 == 0 {
869 baseAndOffset := func(ptr *Value) (base *Value, offset int64) {
870 base, offset = ptr, 0
871 for base.Op == OpOffPtr {
872 offset += base.AuxInt
877 p1, off1 := baseAndOffset(p1)
878 p2, off2 := baseAndOffset(p2)
879 if isSamePtr(p1, p2) {
880 return !overlap(off1, n1, off2, n2)
882 // p1 and p2 are not the same, so if they are both OpAddrs then
883 // they point to different variables.
884 // If one pointer is on the stack and the other is an argument
885 // then they can't overlap.
887 case OpAddr, OpLocalAddr:
888 if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP {
891 return (p2.Op == OpArg || p2.Op == OpArgIntReg) && p1.Args[0].Op == OpSP
892 case OpArg, OpArgIntReg:
893 if p2.Op == OpSP || p2.Op == OpLocalAddr {
897 return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpArgIntReg || p2.Op == OpSP
902 // moveSize returns the number of bytes an aligned MOV instruction moves.
903 func moveSize(align int64, c *Config) int64 {
905 case align%8 == 0 && c.PtrSize == 8:
915 // mergePoint finds a block among a's blocks which dominates b and is itself
916 // dominated by all of a's blocks. Returns nil if it can't find one.
917 // Might return nil even if one does exist.
918 func mergePoint(b *Block, a ...*Value) *Block {
919 // Walk backward from b looking for one of the a's blocks.
925 for _, x := range a {
930 if len(b.Preds) > 1 {
931 // Don't know which way to go back. Abort.
937 return nil // too far away
939 // At this point, r is the first value in a that we find by walking backwards.
940 // if we return anything, r will be it.
943 // Keep going, counting the other a's that we find. They must all dominate r.
946 for _, x := range a {
952 // Found all of a in a backwards walk. We can return r.
955 if len(b.Preds) > 1 {
962 return nil // too far away
965 // clobber invalidates values. Returns true.
966 // clobber is used by rewrite rules to:
968 // A) make sure the values are really dead and never used again.
969 // B) decrement use counts of the values' args.
970 func clobber(vv ...*Value) bool {
971 for _, v := range vv {
973 // Note: leave v.Block intact. The Block field is used after clobber.
978 // clobberIfDead resets v when use count is 1. Returns true.
979 // clobberIfDead is used by rewrite rules to decrement
980 // use counts of v's args when v is dead and never used.
981 func clobberIfDead(v *Value) bool {
985 // Note: leave v.Block intact. The Block field is used after clobberIfDead.
989 // noteRule is an easy way to track if a rule is matched when writing
990 // new ones. Make the rule of interest also conditional on
992 // noteRule("note to self: rule of interest matched")
994 // and that message will print when the rule matches.
995 func noteRule(s string) bool {
1000 // countRule increments Func.ruleMatches[key].
1001 // If Func.ruleMatches is non-nil at the end
1002 // of compilation, it will be printed to stdout.
1003 // This is intended to make it easier to find which functions
1004 // which contain lots of rules matches when developing new rules.
1005 func countRule(v *Value, key string) bool {
1007 if f.ruleMatches == nil {
1008 f.ruleMatches = make(map[string]int)
1010 f.ruleMatches[key]++
1014 // warnRule generates compiler debug output with string s when
1015 // v is not in autogenerated code, cond is true and the rule has fired.
1016 func warnRule(cond bool, v *Value, s string) bool {
1017 if pos := v.Pos; pos.Line() > 1 && cond {
1018 v.Block.Func.Warnl(pos, s)
1023 // for a pseudo-op like (LessThan x), extract x.
1024 func flagArg(v *Value) *Value {
1025 if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() {
1031 // arm64Negate finds the complement to an ARM64 condition code,
1032 // for example !Equal -> NotEqual or !LessThan -> GreaterEqual
1034 // For floating point, it's more subtle because NaN is unordered. We do
1035 // !LessThanF -> NotLessThanF, the latter takes care of NaNs.
1036 func arm64Negate(op Op) Op {
1038 case OpARM64LessThan:
1039 return OpARM64GreaterEqual
1040 case OpARM64LessThanU:
1041 return OpARM64GreaterEqualU
1042 case OpARM64GreaterThan:
1043 return OpARM64LessEqual
1044 case OpARM64GreaterThanU:
1045 return OpARM64LessEqualU
1046 case OpARM64LessEqual:
1047 return OpARM64GreaterThan
1048 case OpARM64LessEqualU:
1049 return OpARM64GreaterThanU
1050 case OpARM64GreaterEqual:
1051 return OpARM64LessThan
1052 case OpARM64GreaterEqualU:
1053 return OpARM64LessThanU
1055 return OpARM64NotEqual
1056 case OpARM64NotEqual:
1058 case OpARM64LessThanF:
1059 return OpARM64NotLessThanF
1060 case OpARM64NotLessThanF:
1061 return OpARM64LessThanF
1062 case OpARM64LessEqualF:
1063 return OpARM64NotLessEqualF
1064 case OpARM64NotLessEqualF:
1065 return OpARM64LessEqualF
1066 case OpARM64GreaterThanF:
1067 return OpARM64NotGreaterThanF
1068 case OpARM64NotGreaterThanF:
1069 return OpARM64GreaterThanF
1070 case OpARM64GreaterEqualF:
1071 return OpARM64NotGreaterEqualF
1072 case OpARM64NotGreaterEqualF:
1073 return OpARM64GreaterEqualF
1075 panic("unreachable")
1079 // arm64Invert evaluates (InvertFlags op), which
1080 // is the same as altering the condition codes such
1081 // that the same result would be produced if the arguments
1082 // to the flag-generating instruction were reversed, e.g.
1083 // (InvertFlags (CMP x y)) -> (CMP y x)
1084 func arm64Invert(op Op) Op {
1086 case OpARM64LessThan:
1087 return OpARM64GreaterThan
1088 case OpARM64LessThanU:
1089 return OpARM64GreaterThanU
1090 case OpARM64GreaterThan:
1091 return OpARM64LessThan
1092 case OpARM64GreaterThanU:
1093 return OpARM64LessThanU
1094 case OpARM64LessEqual:
1095 return OpARM64GreaterEqual
1096 case OpARM64LessEqualU:
1097 return OpARM64GreaterEqualU
1098 case OpARM64GreaterEqual:
1099 return OpARM64LessEqual
1100 case OpARM64GreaterEqualU:
1101 return OpARM64LessEqualU
1102 case OpARM64Equal, OpARM64NotEqual:
1104 case OpARM64LessThanF:
1105 return OpARM64GreaterThanF
1106 case OpARM64GreaterThanF:
1107 return OpARM64LessThanF
1108 case OpARM64LessEqualF:
1109 return OpARM64GreaterEqualF
1110 case OpARM64GreaterEqualF:
1111 return OpARM64LessEqualF
1112 case OpARM64NotLessThanF:
1113 return OpARM64NotGreaterThanF
1114 case OpARM64NotGreaterThanF:
1115 return OpARM64NotLessThanF
1116 case OpARM64NotLessEqualF:
1117 return OpARM64NotGreaterEqualF
1118 case OpARM64NotGreaterEqualF:
1119 return OpARM64NotLessEqualF
1121 panic("unreachable")
1125 // evaluate an ARM64 op against a flags value
1126 // that is potentially constant; return 1 for true,
1127 // -1 for false, and 0 for not constant.
1128 func ccARM64Eval(op Op, flags *Value) int {
1130 if fop == OpARM64InvertFlags {
1131 return -ccARM64Eval(op, flags.Args[0])
1133 if fop != OpARM64FlagConstant {
1136 fc := flagConstant(flags.AuxInt)
1137 b2i := func(b bool) int {
1146 case OpARM64NotEqual:
1148 case OpARM64LessThan:
1150 case OpARM64LessThanU:
1151 return b2i(fc.ult())
1152 case OpARM64GreaterThan:
1154 case OpARM64GreaterThanU:
1155 return b2i(fc.ugt())
1156 case OpARM64LessEqual:
1158 case OpARM64LessEqualU:
1159 return b2i(fc.ule())
1160 case OpARM64GreaterEqual:
1162 case OpARM64GreaterEqualU:
1163 return b2i(fc.uge())
1168 // logRule logs the use of the rule s. This will only be enabled if
1169 // rewrite rules were generated with the -log option, see _gen/rulegen.go.
1170 func logRule(s string) {
1171 if ruleFile == nil {
1172 // Open a log file to write log to. We open in append
1173 // mode because all.bash runs the compiler lots of times,
1174 // and we want the concatenation of all of those logs.
1175 // This means, of course, that users need to rm the old log
1176 // to get fresh data.
1177 // TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow?
1178 w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"),
1179 os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
1185 _, err := fmt.Fprintln(ruleFile, s)
1191 var ruleFile io.Writer
1193 func min(x, y int64) int64 {
1200 func isConstZero(v *Value) bool {
1204 case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F:
1205 return v.AuxInt == 0
1210 // reciprocalExact64 reports whether 1/c is exactly representable.
1211 func reciprocalExact64(c float64) bool {
1212 b := math.Float64bits(c)
1213 man := b & (1<<52 - 1)
1215 return false // not a power of 2, denormal, or NaN
1217 exp := b >> 52 & (1<<11 - 1)
1218 // exponent bias is 0x3ff. So taking the reciprocal of a number
1219 // changes the exponent to 0x7fe-exp.
1224 return false // ±inf
1226 return false // exponent is not representable
1232 // reciprocalExact32 reports whether 1/c is exactly representable.
1233 func reciprocalExact32(c float32) bool {
1234 b := math.Float32bits(c)
1235 man := b & (1<<23 - 1)
1237 return false // not a power of 2, denormal, or NaN
1239 exp := b >> 23 & (1<<8 - 1)
1240 // exponent bias is 0x7f. So taking the reciprocal of a number
1241 // changes the exponent to 0xfe-exp.
1246 return false // ±inf
1248 return false // exponent is not representable
1254 // check if an immediate can be directly encoded into an ARM's instruction.
1255 func isARMImmRot(v uint32) bool {
1256 for i := 0; i < 16; i++ {
1266 // overlap reports whether the ranges given by the given offset and
1267 // size pairs overlap.
1268 func overlap(offset1, size1, offset2, size2 int64) bool {
1269 if offset1 >= offset2 && offset2+size2 > offset1 {
1272 if offset2 >= offset1 && offset1+size1 > offset2 {
1278 func areAdjacentOffsets(off1, off2, size int64) bool {
1279 return off1+size == off2 || off1 == off2+size
1282 // check if value zeroes out upper 32-bit of 64-bit register.
1283 // depth limits recursion depth. In AMD64.rules 3 is used as limit,
1284 // because it catches same amount of cases as 4.
1285 func zeroUpper32Bits(x *Value, depth int) bool {
1287 case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1,
1288 OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1,
1289 OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload,
1290 OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL,
1291 OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst,
1292 OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst,
1293 OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL,
1294 OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst,
1295 OpAMD64SHLL, OpAMD64SHLLconst:
1297 case OpARM64REV16W, OpARM64REVW, OpARM64RBITW, OpARM64CLZW, OpARM64EXTRWconst,
1298 OpARM64MULW, OpARM64MNEGW, OpARM64UDIVW, OpARM64DIVW, OpARM64UMODW,
1299 OpARM64MADDW, OpARM64MSUBW, OpARM64RORW, OpARM64RORWconst:
1302 return x.Type.Size() == 4
1303 case OpPhi, OpSelect0, OpSelect1:
1304 // Phis can use each-other as an arguments, instead of tracking visited values,
1305 // just limit recursion depth.
1309 for i := range x.Args {
1310 if !zeroUpper32Bits(x.Args[i], depth-1) {
1320 // zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits.
1321 func zeroUpper48Bits(x *Value, depth int) bool {
1323 case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2:
1326 return x.Type.Size() == 2
1327 case OpPhi, OpSelect0, OpSelect1:
1328 // Phis can use each-other as an arguments, instead of tracking visited values,
1329 // just limit recursion depth.
1333 for i := range x.Args {
1334 if !zeroUpper48Bits(x.Args[i], depth-1) {
1344 // zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits.
1345 func zeroUpper56Bits(x *Value, depth int) bool {
1347 case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1:
1350 return x.Type.Size() == 1
1351 case OpPhi, OpSelect0, OpSelect1:
1352 // Phis can use each-other as an arguments, instead of tracking visited values,
1353 // just limit recursion depth.
1357 for i := range x.Args {
1358 if !zeroUpper56Bits(x.Args[i], depth-1) {
1368 func isInlinableMemclr(c *Config, sz int64) bool {
1372 // TODO: expand this check to allow other architectures
1373 // see CL 454255 and issue 56997
1375 case "amd64", "arm64":
1377 case "ppc64le", "ppc64":
1383 // isInlinableMemmove reports whether the given arch performs a Move of the given size
1384 // faster than memmove. It will only return true if replacing the memmove with a Move is
1385 // safe, either because Move will do all of its loads before any of its stores, or
1386 // because the arguments are known to be disjoint.
1387 // This is used as a check for replacing memmove with Move ops.
1388 func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
1389 // It is always safe to convert memmove into Move when its arguments are disjoint.
1390 // Move ops may or may not be faster for large sizes depending on how the platform
1391 // lowers them, so we only perform this optimization on platforms that we know to
1392 // have fast Move ops.
1395 return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz))
1396 case "386", "arm64":
1398 case "s390x", "ppc64", "ppc64le":
1399 return sz <= 8 || disjoint(dst, sz, src, sz)
1400 case "arm", "loong64", "mips", "mips64", "mipsle", "mips64le":
1405 func IsInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
1406 return isInlinableMemmove(dst, src, sz, c)
1409 // logLargeCopy logs the occurrence of a large copy.
1410 // The best place to do this is in the rewrite rules where the size of the move is easy to find.
1411 // "Large" is arbitrarily chosen to be 128 bytes; this may change.
1412 func logLargeCopy(v *Value, s int64) bool {
1416 if logopt.Enabled() {
1417 logopt.LogOpt(v.Pos, "copy", "lower", v.Block.Func.Name, fmt.Sprintf("%d bytes", s))
1421 func LogLargeCopy(funcName string, pos src.XPos, s int64) {
1425 if logopt.Enabled() {
1426 logopt.LogOpt(pos, "copy", "lower", funcName, fmt.Sprintf("%d bytes", s))
1430 // hasSmallRotate reports whether the architecture has rotate instructions
1431 // for sizes < 32-bit. This is used to decide whether to promote some rotations.
1432 func hasSmallRotate(c *Config) bool {
1434 case "amd64", "386":
1441 func newPPC64ShiftAuxInt(sh, mb, me, sz int64) int32 {
1442 if sh < 0 || sh >= sz {
1443 panic("PPC64 shift arg sh out of range")
1445 if mb < 0 || mb >= sz {
1446 panic("PPC64 shift arg mb out of range")
1448 if me < 0 || me >= sz {
1449 panic("PPC64 shift arg me out of range")
1451 return int32(sh<<16 | mb<<8 | me)
1454 func GetPPC64Shiftsh(auxint int64) int64 {
1455 return int64(int8(auxint >> 16))
1458 func GetPPC64Shiftmb(auxint int64) int64 {
1459 return int64(int8(auxint >> 8))
1462 func GetPPC64Shiftme(auxint int64) int64 {
1463 return int64(int8(auxint))
1466 // Test if this value can encoded as a mask for a rlwinm like
1467 // operation. Masks can also extend from the msb and wrap to
1468 // the lsb too. That is, the valid masks are 32 bit strings
1469 // of the form: 0..01..10..0 or 1..10..01..1 or 1...1
1470 func isPPC64WordRotateMask(v64 int64) bool {
1471 // Isolate rightmost 1 (if none 0) and add.
1474 // Likewise, for the wrapping case.
1476 vpn := (vn & -vn) + vn
1477 return (v&vp == 0 || vn&vpn == 0) && v != 0
1480 // Compress mask and shift into single value of the form
1481 // me | mb<<8 | rotate<<16 | nbits<<24 where me and mb can
1482 // be used to regenerate the input mask.
1483 func encodePPC64RotateMask(rotate, mask, nbits int64) int64 {
1484 var mb, me, mbn, men int
1486 // Determine boundaries and then decode them
1487 if mask == 0 || ^mask == 0 || rotate >= nbits {
1488 panic("Invalid PPC64 rotate mask")
1489 } else if nbits == 32 {
1490 mb = bits.LeadingZeros32(uint32(mask))
1491 me = 32 - bits.TrailingZeros32(uint32(mask))
1492 mbn = bits.LeadingZeros32(^uint32(mask))
1493 men = 32 - bits.TrailingZeros32(^uint32(mask))
1495 mb = bits.LeadingZeros64(uint64(mask))
1496 me = 64 - bits.TrailingZeros64(uint64(mask))
1497 mbn = bits.LeadingZeros64(^uint64(mask))
1498 men = 64 - bits.TrailingZeros64(^uint64(mask))
1500 // Check for a wrapping mask (e.g bits at 0 and 63)
1501 if mb == 0 && me == int(nbits) {
1502 // swap the inverted values
1506 return int64(me) | int64(mb<<8) | int64(rotate<<16) | int64(nbits<<24)
1509 // DecodePPC64RotateMask is the inverse operation of encodePPC64RotateMask. The values returned as
1510 // mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask.
1511 func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) {
1512 auxint := uint64(sauxint)
1513 rotate = int64((auxint >> 16) & 0xFF)
1514 mb = int64((auxint >> 8) & 0xFF)
1515 me = int64((auxint >> 0) & 0xFF)
1516 nbits := int64((auxint >> 24) & 0xFF)
1517 mask = ((1 << uint(nbits-mb)) - 1) ^ ((1 << uint(nbits-me)) - 1)
1522 mask = uint64(uint32(mask))
1525 // Fixup ME to match ISA definition. The second argument to MASK(..,me)
1527 me = (me - 1) & (nbits - 1)
1531 // This verifies that the mask is a set of
1532 // consecutive bits including the least
1534 func isPPC64ValidShiftMask(v int64) bool {
1535 if (v != 0) && ((v+1)&v) == 0 {
1541 func getPPC64ShiftMaskLength(v int64) int64 {
1542 return int64(bits.Len64(uint64(v)))
1545 // Decompose a shift right into an equivalent rotate/mask,
1546 // and return mask & m.
1547 func mergePPC64RShiftMask(m, s, nbits int64) int64 {
1548 smask := uint64((1<<uint(nbits))-1) >> uint(s)
1549 return m & int64(smask)
1552 // Combine (ANDconst [m] (SRWconst [s])) into (RLWINM [y]) or return 0
1553 func mergePPC64AndSrwi(m, s int64) int64 {
1554 mask := mergePPC64RShiftMask(m, s, 32)
1555 if !isPPC64WordRotateMask(mask) {
1558 return encodePPC64RotateMask((32-s)&31, mask, 32)
1561 // Test if a shift right feeding into a CLRLSLDI can be merged into RLWINM.
1562 // Return the encoded RLWINM constant, or 0 if they cannot be merged.
1563 func mergePPC64ClrlsldiSrw(sld, srw int64) int64 {
1564 mask_1 := uint64(0xFFFFFFFF >> uint(srw))
1565 // for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left.
1566 mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
1568 // Rewrite mask to apply after the final left shift.
1569 mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
1572 r_2 := GetPPC64Shiftsh(sld)
1573 r_3 := (r_1 + r_2) & 31 // This can wrap.
1575 if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
1578 return encodePPC64RotateMask(int64(r_3), int64(mask_3), 32)
1581 // Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM. Return
1582 // the encoded RLWINM constant, or 0 if they cannot be merged.
1583 func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 {
1584 r_1, _, _, mask_1 := DecodePPC64RotateMask(rlw)
1585 // for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left.
1586 mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
1588 // combine the masks, and adjust for the final left shift.
1589 mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(int64(sld)))
1590 r_2 := GetPPC64Shiftsh(int64(sld))
1591 r_3 := (r_1 + r_2) & 31 // This can wrap.
1593 // Verify the result is still a valid bitmask of <= 32 bits.
1594 if !isPPC64WordRotateMask(int64(mask_3)) || uint64(uint32(mask_3)) != mask_3 {
1597 return encodePPC64RotateMask(r_3, int64(mask_3), 32)
1600 // Compute the encoded RLWINM constant from combining (SLDconst [sld] (SRWconst [srw] x)),
1601 // or return 0 if they cannot be combined.
1602 func mergePPC64SldiSrw(sld, srw int64) int64 {
1603 if sld > srw || srw >= 32 {
1606 mask_r := uint32(0xFFFFFFFF) >> uint(srw)
1607 mask_l := uint32(0xFFFFFFFF) >> uint(sld)
1608 mask := (mask_r & mask_l) << uint(sld)
1609 return encodePPC64RotateMask((32-srw+sld)&31, int64(mask), 32)
1612 // Convenience function to rotate a 32 bit constant value by another constant.
1613 func rotateLeft32(v, rotate int64) int64 {
1614 return int64(bits.RotateLeft32(uint32(v), int(rotate)))
1617 func rotateRight64(v, rotate int64) int64 {
1618 return int64(bits.RotateLeft64(uint64(v), int(-rotate)))
1621 // encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format.
1622 func armBFAuxInt(lsb, width int64) arm64BitField {
1623 if lsb < 0 || lsb > 63 {
1624 panic("ARM(64) bit field lsb constant out of range")
1626 if width < 1 || lsb+width > 64 {
1627 panic("ARM(64) bit field width constant out of range")
1629 return arm64BitField(width | lsb<<8)
1632 // returns the lsb part of the auxInt field of arm64 bitfield ops.
1633 func (bfc arm64BitField) getARM64BFlsb() int64 {
1634 return int64(uint64(bfc) >> 8)
1637 // returns the width part of the auxInt field of arm64 bitfield ops.
1638 func (bfc arm64BitField) getARM64BFwidth() int64 {
1639 return int64(bfc) & 0xff
1642 // checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask.
1643 func isARM64BFMask(lsb, mask, rshift int64) bool {
1644 shiftedMask := int64(uint64(mask) >> uint64(rshift))
1645 return shiftedMask != 0 && isPowerOfTwo64(shiftedMask+1) && nto(shiftedMask)+lsb < 64
1648 // returns the bitfield width of mask >> rshift for arm64 bitfield ops.
1649 func arm64BFWidth(mask, rshift int64) int64 {
1650 shiftedMask := int64(uint64(mask) >> uint64(rshift))
1651 if shiftedMask == 0 {
1652 panic("ARM64 BF mask is zero")
1654 return nto(shiftedMask)
1657 // sizeof returns the size of t in bytes.
1658 // It will panic if t is not a *types.Type.
1659 func sizeof(t interface{}) int64 {
1660 return t.(*types.Type).Size()
1663 // registerizable reports whether t is a primitive type that fits in
1664 // a register. It assumes float64 values will always fit into registers
1665 // even if that isn't strictly true.
1666 func registerizable(b *Block, typ *types.Type) bool {
1667 if typ.IsPtrShaped() || typ.IsFloat() || typ.IsBoolean() {
1670 if typ.IsInteger() {
1671 return typ.Size() <= b.Func.Config.RegSize
1676 // needRaceCleanup reports whether this call to racefuncenter/exit isn't needed.
1677 func needRaceCleanup(sym *AuxCall, v *Value) bool {
1682 if !isSameCall(sym, "runtime.racefuncenter") && !isSameCall(sym, "runtime.racefuncexit") {
1685 for _, b := range f.Blocks {
1686 for _, v := range b.Values {
1688 case OpStaticCall, OpStaticLECall:
1689 // Check for racefuncenter will encounter racefuncexit and vice versa.
1690 // Allow calls to panic*
1691 s := v.Aux.(*AuxCall).Fn.String()
1693 case "runtime.racefuncenter", "runtime.racefuncexit",
1694 "runtime.panicdivide", "runtime.panicwrap",
1695 "runtime.panicshift":
1698 // If we encountered any call, we need to keep racefunc*,
1699 // for accurate stacktraces.
1701 case OpPanicBounds, OpPanicExtend:
1702 // Note: these are panic generators that are ok (like the static calls above).
1703 case OpClosureCall, OpInterCall, OpClosureLECall, OpInterLECall:
1704 // We must keep the race functions if there are any other call types.
1709 if isSameCall(sym, "runtime.racefuncenter") {
1710 // TODO REGISTER ABI this needs to be cleaned up.
1711 // If we're removing racefuncenter, remove its argument as well.
1712 if v.Args[0].Op != OpStore {
1713 if v.Op == OpStaticLECall {
1714 // there is no store, yet.
1719 mem := v.Args[0].Args[2]
1720 v.Args[0].reset(OpCopy)
1721 v.Args[0].AddArg(mem)
1726 // symIsRO reports whether sym is a read-only global.
1727 func symIsRO(sym interface{}) bool {
1728 lsym := sym.(*obj.LSym)
1729 return lsym.Type == objabi.SRODATA && len(lsym.R) == 0
1732 // symIsROZero reports whether sym is a read-only global whose data contains all zeros.
1733 func symIsROZero(sym Sym) bool {
1734 lsym := sym.(*obj.LSym)
1735 if lsym.Type != objabi.SRODATA || len(lsym.R) != 0 {
1738 for _, b := range lsym.P {
1746 // read8 reads one byte from the read-only global sym at offset off.
1747 func read8(sym interface{}, off int64) uint8 {
1748 lsym := sym.(*obj.LSym)
1749 if off >= int64(len(lsym.P)) || off < 0 {
1750 // Invalid index into the global sym.
1751 // This can happen in dead code, so we don't want to panic.
1752 // Just return any value, it will eventually get ignored.
1759 // read16 reads two bytes from the read-only global sym at offset off.
1760 func read16(sym interface{}, off int64, byteorder binary.ByteOrder) uint16 {
1761 lsym := sym.(*obj.LSym)
1762 // lsym.P is written lazily.
1763 // Bytes requested after the end of lsym.P are 0.
1765 if 0 <= off && off < int64(len(lsym.P)) {
1768 buf := make([]byte, 2)
1770 return byteorder.Uint16(buf)
1773 // read32 reads four bytes from the read-only global sym at offset off.
1774 func read32(sym interface{}, off int64, byteorder binary.ByteOrder) uint32 {
1775 lsym := sym.(*obj.LSym)
1777 if 0 <= off && off < int64(len(lsym.P)) {
1780 buf := make([]byte, 4)
1782 return byteorder.Uint32(buf)
1785 // read64 reads eight bytes from the read-only global sym at offset off.
1786 func read64(sym interface{}, off int64, byteorder binary.ByteOrder) uint64 {
1787 lsym := sym.(*obj.LSym)
1789 if 0 <= off && off < int64(len(lsym.P)) {
1792 buf := make([]byte, 8)
1794 return byteorder.Uint64(buf)
1797 // sequentialAddresses reports true if it can prove that x + n == y
1798 func sequentialAddresses(x, y *Value, n int64) bool {
1799 if x == y && n == 0 {
1802 if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil &&
1803 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1804 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1807 if x.Op == Op386LEAL1 && y.Op == Op386LEAL1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
1808 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1809 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1812 if x.Op == OpAMD64ADDQ && y.Op == OpAMD64LEAQ1 && y.AuxInt == n && y.Aux == nil &&
1813 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1814 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1817 if x.Op == OpAMD64LEAQ1 && y.Op == OpAMD64LEAQ1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
1818 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1819 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1825 // flagConstant represents the result of a compile-time comparison.
1826 // The sense of these flags does not necessarily represent the hardware's notion
1827 // of a flags register - these are just a compile-time construct.
1828 // We happen to match the semantics to those of arm/arm64.
1829 // Note that these semantics differ from x86: the carry flag has the opposite
1830 // sense on a subtraction!
1832 // On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C.
1833 // On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C.
1834 // (because it does x + ^y + C).
1836 // See https://en.wikipedia.org/wiki/Carry_flag#Vs._borrow_flag
1837 type flagConstant uint8
1839 // N reports whether the result of an operation is negative (high bit set).
1840 func (fc flagConstant) N() bool {
1844 // Z reports whether the result of an operation is 0.
1845 func (fc flagConstant) Z() bool {
1849 // C reports whether an unsigned add overflowed (carry), or an
1850 // unsigned subtract did not underflow (borrow).
1851 func (fc flagConstant) C() bool {
1855 // V reports whether a signed operation overflowed or underflowed.
1856 func (fc flagConstant) V() bool {
1860 func (fc flagConstant) eq() bool {
1863 func (fc flagConstant) ne() bool {
1866 func (fc flagConstant) lt() bool {
1867 return fc.N() != fc.V()
1869 func (fc flagConstant) le() bool {
1870 return fc.Z() || fc.lt()
1872 func (fc flagConstant) gt() bool {
1873 return !fc.Z() && fc.ge()
1875 func (fc flagConstant) ge() bool {
1876 return fc.N() == fc.V()
1878 func (fc flagConstant) ult() bool {
1881 func (fc flagConstant) ule() bool {
1882 return fc.Z() || fc.ult()
1884 func (fc flagConstant) ugt() bool {
1885 return !fc.Z() && fc.uge()
1887 func (fc flagConstant) uge() bool {
1891 func (fc flagConstant) ltNoov() bool {
1892 return fc.lt() && !fc.V()
1894 func (fc flagConstant) leNoov() bool {
1895 return fc.le() && !fc.V()
1897 func (fc flagConstant) gtNoov() bool {
1898 return fc.gt() && !fc.V()
1900 func (fc flagConstant) geNoov() bool {
1901 return fc.ge() && !fc.V()
1904 func (fc flagConstant) String() string {
1905 return fmt.Sprintf("N=%v,Z=%v,C=%v,V=%v", fc.N(), fc.Z(), fc.C(), fc.V())
1908 type flagConstantBuilder struct {
1915 func (fcs flagConstantBuilder) encode() flagConstant {
1932 // Note: addFlags(x,y) != subFlags(x,-y) in some situations:
1933 // - the results of the C flag are different
1934 // - the results of the V flag when y==minint are different
1936 // addFlags64 returns the flags that would be set from computing x+y.
1937 func addFlags64(x, y int64) flagConstant {
1938 var fcb flagConstantBuilder
1941 fcb.C = uint64(x+y) < uint64(x)
1942 fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
1946 // subFlags64 returns the flags that would be set from computing x-y.
1947 func subFlags64(x, y int64) flagConstant {
1948 var fcb flagConstantBuilder
1951 fcb.C = uint64(y) <= uint64(x) // This code follows the arm carry flag model.
1952 fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
1956 // addFlags32 returns the flags that would be set from computing x+y.
1957 func addFlags32(x, y int32) flagConstant {
1958 var fcb flagConstantBuilder
1961 fcb.C = uint32(x+y) < uint32(x)
1962 fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
1966 // subFlags32 returns the flags that would be set from computing x-y.
1967 func subFlags32(x, y int32) flagConstant {
1968 var fcb flagConstantBuilder
1971 fcb.C = uint32(y) <= uint32(x) // This code follows the arm carry flag model.
1972 fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
1976 // logicFlags64 returns flags set to the sign/zeroness of x.
1977 // C and V are set to false.
1978 func logicFlags64(x int64) flagConstant {
1979 var fcb flagConstantBuilder
1985 // logicFlags32 returns flags set to the sign/zeroness of x.
1986 // C and V are set to false.
1987 func logicFlags32(x int32) flagConstant {
1988 var fcb flagConstantBuilder
1994 func makeJumpTableSym(b *Block) *obj.LSym {
1995 s := base.Ctxt.Lookup(fmt.Sprintf("%s.jump%d", b.Func.fe.LSym(), b.ID))
1996 s.Set(obj.AttrDuplicateOK, true)
1997 s.Set(obj.AttrLocal, true)
2001 // canRotate reports whether the architecture supports
2002 // rotates of integer registers with the given number of bits.
2003 func canRotate(c *Config, bits int64) bool {
2004 if bits > c.PtrSize*8 {
2005 // Don't rewrite to rotates bigger than the machine word.
2009 case "386", "amd64", "arm64":
2011 case "arm", "s390x", "ppc64", "ppc64le", "wasm", "loong64":
2018 // isARM64bitcon reports whether a constant can be encoded into a logical instruction.
2019 func isARM64bitcon(x uint64) bool {
2020 if x == 1<<64-1 || x == 0 {
2023 // determine the period and sign-extend a unit to 64 bits
2025 case x != x>>32|x<<32:
2028 case x != x>>16|x<<48:
2030 x = uint64(int64(int32(x)))
2031 case x != x>>8|x<<56:
2033 x = uint64(int64(int16(x)))
2034 case x != x>>4|x<<60:
2036 x = uint64(int64(int8(x)))
2038 // period is 4 or 2, always true
2039 // 0001, 0010, 0100, 1000 -- 0001 rotate
2040 // 0011, 0110, 1100, 1001 -- 0011 rotate
2041 // 0111, 1011, 1101, 1110 -- 0111 rotate
2042 // 0101, 1010 -- 01 rotate, repeat
2045 return sequenceOfOnes(x) || sequenceOfOnes(^x)
2048 // sequenceOfOnes tests whether a constant is a sequence of ones in binary, with leading and trailing zeros.
2049 func sequenceOfOnes(x uint64) bool {
2050 y := x & -x // lowest set bit of x. x is good iff x+y is a power of 2
2055 // isARM64addcon reports whether x can be encoded as the immediate value in an ADD or SUB instruction.
2056 func isARM64addcon(v int64) bool {
2057 /* uimm12 or uimm24? */
2061 if (v & 0xFFF) == 0 {