1 // Copyright 2015 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
8 "cmd/compile/internal/base"
9 "cmd/compile/internal/logopt"
10 "cmd/compile/internal/reflectdata"
11 "cmd/compile/internal/types"
13 "cmd/internal/obj/s390x"
26 type deadValueChoice bool
29 leaveDeadValues deadValueChoice = false
30 removeDeadValues = true
33 // deadcode indicates whether rewrite should try to remove any values that become dead.
34 func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValueChoice) {
35 // repeat rewrites until we find no more rewrites
36 pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block
40 fmt.Printf("%s: rewriting for %s\n", f.pass.name, f.Name)
43 var states map[string]bool
47 for _, b := range f.Blocks {
52 b0.Succs = append([]Edge{}, b.Succs...) // make a new copy, not aliasing
54 for i, c := range b.ControlValues() {
57 b.ReplaceControl(i, c)
63 fmt.Printf("rewriting %s -> %s\n", b0.LongString(), b.LongString())
66 for j, v := range b.Values {
71 v0.Args = append([]*Value{}, v.Args...) // make a new copy, not aliasing
73 if v.Uses == 0 && v.removeable() {
74 if v.Op != OpInvalid && deadcode == removeDeadValues {
75 // Reset any values that are now unused, so that we decrement
76 // the use count of all of its arguments.
77 // Not quite a deadcode pass, because it does not handle cycles.
78 // But it should help Uses==1 rules to fire.
82 // No point rewriting values which aren't used.
86 vchange := phielimValue(v)
87 if vchange && debug > 1 {
88 fmt.Printf("rewriting %s -> %s\n", v0.LongString(), v.LongString())
91 // Eliminate copy inputs.
92 // If any copy input becomes unused, mark it
93 // as invalid and discard its argument. Repeat
94 // recursively on the discarded argument.
95 // This phase helps remove phantom "dead copy" uses
96 // of a value so that a x.Uses==1 rule condition
98 for i, a := range v.Args {
104 // If a, a copy, has a line boundary indicator, attempt to find a new value
105 // to hold it. The first candidate is the value that will replace a (aa),
106 // if it shares the same block and line and is eligible.
107 // The second option is v, which has a as an input. Because aa is earlier in
108 // the data flow, it is the better choice.
109 if a.Pos.IsStmt() == src.PosIsStmt {
110 if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt {
111 aa.Pos = aa.Pos.WithIsStmt()
112 } else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt {
113 v.Pos = v.Pos.WithIsStmt()
115 // Record the lost line and look for a new home after all rewrites are complete.
116 // TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same
117 // line to appear in more than one block, but only one block is stored, so if both end
118 // up here, then one will be lost.
119 pendingLines.set(a.Pos, int32(a.Block.ID))
121 a.Pos = a.Pos.WithNotStmt()
130 if vchange && debug > 1 {
131 fmt.Printf("rewriting %s -> %s\n", v0.LongString(), v.LongString())
134 // apply rewrite function
137 // If value changed to a poor choice for a statement boundary, move the boundary
138 if v.Pos.IsStmt() == src.PosIsStmt {
139 if k := nextGoodStatementIndex(v, j, b); k != j {
140 v.Pos = v.Pos.WithNotStmt()
141 b.Values[k].Pos = b.Values[k].Pos.WithIsStmt()
146 change = change || vchange
147 if vchange && debug > 1 {
148 fmt.Printf("rewriting %s -> %s\n", v0.LongString(), v.LongString())
152 if !change && !deadChange {
156 if (iters > 1000 || debug >= 2) && change {
157 // We've done a suspiciously large number of rewrites (or we're in debug mode).
158 // As of Sep 2021, 90% of rewrites complete in 4 iterations or fewer
159 // and the maximum value encountered during make.bash is 12.
160 // Start checking for cycles. (This is too expensive to do routinely.)
161 // Note: we avoid this path for deadChange-only iterations, to fix #51639.
163 states = make(map[string]bool)
166 if _, ok := states[h]; ok {
167 // We've found a cycle.
168 // To diagnose it, set debug to 2 and start again,
169 // so that we'll print all rules applied until we complete another cycle.
170 // If debug is already >= 2, we've already done that, so it's time to crash.
173 states = make(map[string]bool)
175 f.Fatalf("rewrite cycle detected")
181 // remove clobbered values
182 for _, b := range f.Blocks {
184 for i, v := range b.Values {
186 if v.Op == OpInvalid {
187 if v.Pos.IsStmt() == src.PosIsStmt {
188 pendingLines.set(vl, int32(b.ID))
193 if v.Pos.IsStmt() != src.PosNotStmt && !notStmtBoundary(v.Op) && pendingLines.get(vl) == int32(b.ID) {
194 pendingLines.remove(vl)
195 v.Pos = v.Pos.WithIsStmt()
202 if pendingLines.get(b.Pos) == int32(b.ID) {
203 b.Pos = b.Pos.WithIsStmt()
204 pendingLines.remove(b.Pos)
210 // Common functions called from rewriting rules
212 func is64BitFloat(t *types.Type) bool {
213 return t.Size() == 8 && t.IsFloat()
216 func is32BitFloat(t *types.Type) bool {
217 return t.Size() == 4 && t.IsFloat()
220 func is64BitInt(t *types.Type) bool {
221 return t.Size() == 8 && t.IsInteger()
224 func is32BitInt(t *types.Type) bool {
225 return t.Size() == 4 && t.IsInteger()
228 func is16BitInt(t *types.Type) bool {
229 return t.Size() == 2 && t.IsInteger()
232 func is8BitInt(t *types.Type) bool {
233 return t.Size() == 1 && t.IsInteger()
236 func isPtr(t *types.Type) bool {
237 return t.IsPtrShaped()
240 // mergeSym merges two symbolic offsets. There is no real merging of
241 // offsets, we just pick the non-nil one.
242 func mergeSym(x, y Sym) Sym {
249 panic(fmt.Sprintf("mergeSym with two non-nil syms %v %v", x, y))
252 func canMergeSym(x, y Sym) bool {
253 return x == nil || y == nil
256 // canMergeLoadClobber reports whether the load can be merged into target without
257 // invalidating the schedule.
258 // It also checks that the other non-load argument x is something we
259 // are ok with clobbering.
260 func canMergeLoadClobber(target, load, x *Value) bool {
261 // The register containing x is going to get clobbered.
262 // Don't merge if we still need the value of x.
263 // We don't have liveness information here, but we can
264 // approximate x dying with:
265 // 1) target is x's only use.
266 // 2) target is not in a deeper loop than x.
270 loopnest := x.Block.Func.loopnest()
271 loopnest.calculateDepths()
272 if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
275 return canMergeLoad(target, load)
278 // canMergeLoad reports whether the load can be merged into target without
279 // invalidating the schedule.
280 func canMergeLoad(target, load *Value) bool {
281 if target.Block.ID != load.Block.ID {
282 // If the load is in a different block do not merge it.
286 // We can't merge the load into the target if the load
287 // has more than one use.
292 mem := load.MemoryArg()
294 // We need the load's memory arg to still be alive at target. That
295 // can't be the case if one of target's args depends on a memory
296 // state that is a successor of load's memory arg.
298 // For example, it would be invalid to merge load into target in
299 // the following situation because newmem has killed oldmem
300 // before target is reached:
301 // load = read ... oldmem
302 // newmem = write ... oldmem
303 // arg0 = read ... newmem
304 // target = add arg0 load
306 // If the argument comes from a different block then we can exclude
307 // it immediately because it must dominate load (which is in the
308 // same block as target).
310 for _, a := range target.Args {
311 if a != load && a.Block.ID == target.Block.ID {
312 args = append(args, a)
316 // memPreds contains memory states known to be predecessors of load's
317 // memory state. It is lazily initialized.
318 var memPreds map[*Value]bool
319 for i := 0; len(args) > 0; i++ {
322 // Give up if we have done a lot of iterations.
325 v := args[len(args)-1]
326 args = args[:len(args)-1]
327 if target.Block.ID != v.Block.ID {
328 // Since target and load are in the same block
329 // we can stop searching when we leave the block.
333 // A Phi implies we have reached the top of the block.
334 // The memory phi, if it exists, is always
335 // the first logical store in the block.
338 if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
339 // We could handle this situation however it is likely
343 if v.Op.SymEffect()&SymAddr != 0 {
344 // This case prevents an operation that calculates the
345 // address of a local variable from being forced to schedule
346 // before its corresponding VarDef.
352 // We don't want to combine the CMPQ with the load, because
353 // that would force the CMPQ to schedule before the VARDEF, which
354 // in turn requires the LEAQ to schedule before the VARDEF.
357 if v.Type.IsMemory() {
359 // Initialise a map containing memory states
360 // known to be predecessors of load's memory
362 memPreds = make(map[*Value]bool)
365 for i := 0; i < limit; i++ {
367 // The memory phi, if it exists, is always
368 // the first logical store in the block.
371 if m.Block.ID != target.Block.ID {
374 if !m.Type.IsMemory() {
378 if len(m.Args) == 0 {
385 // We can merge if v is a predecessor of mem.
387 // For example, we can merge load into target in the
388 // following scenario:
391 // load = read ... mem
392 // target = add x load
398 if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
399 // If v takes mem as an input then we know mem
400 // is valid at this point.
403 for _, a := range v.Args {
404 if target.Block.ID == a.Block.ID {
405 args = append(args, a)
413 // isSameCall reports whether sym is the same as the given named symbol.
414 func isSameCall(sym interface{}, name string) bool {
415 fn := sym.(*AuxCall).Fn
416 return fn != nil && fn.String() == name
419 // canLoadUnaligned reports if the architecture supports unaligned load operations.
420 func canLoadUnaligned(c *Config) bool {
421 return c.ctxt.Arch.Alignment == 1
424 // nlzX returns the number of leading zeros.
425 func nlz64(x int64) int { return bits.LeadingZeros64(uint64(x)) }
426 func nlz32(x int32) int { return bits.LeadingZeros32(uint32(x)) }
427 func nlz16(x int16) int { return bits.LeadingZeros16(uint16(x)) }
428 func nlz8(x int8) int { return bits.LeadingZeros8(uint8(x)) }
430 // ntzX returns the number of trailing zeros.
431 func ntz64(x int64) int { return bits.TrailingZeros64(uint64(x)) }
432 func ntz32(x int32) int { return bits.TrailingZeros32(uint32(x)) }
433 func ntz16(x int16) int { return bits.TrailingZeros16(uint16(x)) }
434 func ntz8(x int8) int { return bits.TrailingZeros8(uint8(x)) }
436 func oneBit(x int64) bool { return x&(x-1) == 0 && x != 0 }
437 func oneBit8(x int8) bool { return x&(x-1) == 0 && x != 0 }
438 func oneBit16(x int16) bool { return x&(x-1) == 0 && x != 0 }
439 func oneBit32(x int32) bool { return x&(x-1) == 0 && x != 0 }
440 func oneBit64(x int64) bool { return x&(x-1) == 0 && x != 0 }
442 // nto returns the number of trailing ones.
443 func nto(x int64) int64 {
444 return int64(ntz64(^x))
447 // logX returns logarithm of n base 2.
448 // n must be a positive power of 2 (isPowerOfTwoX returns true).
449 func log8(n int8) int64 {
450 return int64(bits.Len8(uint8(n))) - 1
452 func log16(n int16) int64 {
453 return int64(bits.Len16(uint16(n))) - 1
455 func log32(n int32) int64 {
456 return int64(bits.Len32(uint32(n))) - 1
458 func log64(n int64) int64 {
459 return int64(bits.Len64(uint64(n))) - 1
462 // log2uint32 returns logarithm in base 2 of uint32(n), with log2(0) = -1.
464 func log2uint32(n int64) int64 {
465 return int64(bits.Len32(uint32(n))) - 1
468 // isPowerOfTwoX functions report whether n is a power of 2.
469 func isPowerOfTwo8(n int8) bool {
470 return n > 0 && n&(n-1) == 0
472 func isPowerOfTwo16(n int16) bool {
473 return n > 0 && n&(n-1) == 0
475 func isPowerOfTwo32(n int32) bool {
476 return n > 0 && n&(n-1) == 0
478 func isPowerOfTwo64(n int64) bool {
479 return n > 0 && n&(n-1) == 0
482 // isUint64PowerOfTwo reports whether uint64(n) is a power of 2.
483 func isUint64PowerOfTwo(in int64) bool {
485 return n > 0 && n&(n-1) == 0
488 // isUint32PowerOfTwo reports whether uint32(n) is a power of 2.
489 func isUint32PowerOfTwo(in int64) bool {
490 n := uint64(uint32(in))
491 return n > 0 && n&(n-1) == 0
494 // is32Bit reports whether n can be represented as a signed 32 bit integer.
495 func is32Bit(n int64) bool {
496 return n == int64(int32(n))
499 // is16Bit reports whether n can be represented as a signed 16 bit integer.
500 func is16Bit(n int64) bool {
501 return n == int64(int16(n))
504 // is8Bit reports whether n can be represented as a signed 8 bit integer.
505 func is8Bit(n int64) bool {
506 return n == int64(int8(n))
509 // isU8Bit reports whether n can be represented as an unsigned 8 bit integer.
510 func isU8Bit(n int64) bool {
511 return n == int64(uint8(n))
514 // isU12Bit reports whether n can be represented as an unsigned 12 bit integer.
515 func isU12Bit(n int64) bool {
516 return 0 <= n && n < (1<<12)
519 // isU16Bit reports whether n can be represented as an unsigned 16 bit integer.
520 func isU16Bit(n int64) bool {
521 return n == int64(uint16(n))
524 // isU32Bit reports whether n can be represented as an unsigned 32 bit integer.
525 func isU32Bit(n int64) bool {
526 return n == int64(uint32(n))
529 // is20Bit reports whether n can be represented as a signed 20 bit integer.
530 func is20Bit(n int64) bool {
531 return -(1<<19) <= n && n < (1<<19)
534 // b2i translates a boolean value to 0 or 1 for assigning to auxInt.
535 func b2i(b bool) int64 {
542 // b2i32 translates a boolean value to 0 or 1.
543 func b2i32(b bool) int32 {
550 // shiftIsBounded reports whether (left/right) shift Value v is known to be bounded.
551 // A shift is bounded if it is shifting by less than the width of the shifted value.
552 func shiftIsBounded(v *Value) bool {
556 // canonLessThan returns whether x is "ordered" less than y, for purposes of normalizing
557 // generated code as much as possible.
558 func canonLessThan(x, y *Value) bool {
562 if !x.Pos.SameFileAndLine(y.Pos) {
563 return x.Pos.Before(y.Pos)
568 // truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern
569 // of the mantissa. It will panic if the truncation results in lost information.
570 func truncate64Fto32F(f float64) float32 {
571 if !isExactFloat32(f) {
572 panic("truncate64Fto32F: truncation is not exact")
577 // NaN bit patterns aren't necessarily preserved across conversion
578 // instructions so we need to do the conversion manually.
579 b := math.Float64bits(f)
580 m := b & ((1 << 52) - 1) // mantissa (a.k.a. significand)
581 // | sign | exponent | mantissa |
582 r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23)))
583 return math.Float32frombits(r)
586 // extend32Fto64F converts a float32 value to a float64 value preserving the bit
587 // pattern of the mantissa.
588 func extend32Fto64F(f float32) float64 {
589 if !math.IsNaN(float64(f)) {
592 // NaN bit patterns aren't necessarily preserved across conversion
593 // instructions so we need to do the conversion manually.
594 b := uint64(math.Float32bits(f))
595 // | sign | exponent | mantissa |
596 r := ((b << 32) & (1 << 63)) | (0x7ff << 52) | ((b & 0x7fffff) << (52 - 23))
597 return math.Float64frombits(r)
600 // DivisionNeedsFixUp reports whether the division needs fix-up code.
601 func DivisionNeedsFixUp(v *Value) bool {
605 // auxFrom64F encodes a float64 value so it can be stored in an AuxInt.
606 func auxFrom64F(f float64) int64 {
608 panic("can't encode a NaN in AuxInt field")
610 return int64(math.Float64bits(f))
613 // auxFrom32F encodes a float32 value so it can be stored in an AuxInt.
614 func auxFrom32F(f float32) int64 {
616 panic("can't encode a NaN in AuxInt field")
618 return int64(math.Float64bits(extend32Fto64F(f)))
621 // auxTo32F decodes a float32 from the AuxInt value provided.
622 func auxTo32F(i int64) float32 {
623 return truncate64Fto32F(math.Float64frombits(uint64(i)))
626 // auxTo64F decodes a float64 from the AuxInt value provided.
627 func auxTo64F(i int64) float64 {
628 return math.Float64frombits(uint64(i))
631 func auxIntToBool(i int64) bool {
637 func auxIntToInt8(i int64) int8 {
640 func auxIntToInt16(i int64) int16 {
643 func auxIntToInt32(i int64) int32 {
646 func auxIntToInt64(i int64) int64 {
649 func auxIntToUint8(i int64) uint8 {
652 func auxIntToFloat32(i int64) float32 {
653 return float32(math.Float64frombits(uint64(i)))
655 func auxIntToFloat64(i int64) float64 {
656 return math.Float64frombits(uint64(i))
658 func auxIntToValAndOff(i int64) ValAndOff {
661 func auxIntToArm64BitField(i int64) arm64BitField {
662 return arm64BitField(i)
664 func auxIntToInt128(x int64) int128 {
666 panic("nonzero int128 not allowed")
670 func auxIntToFlagConstant(x int64) flagConstant {
671 return flagConstant(x)
674 func auxIntToOp(cc int64) Op {
678 func boolToAuxInt(b bool) int64 {
684 func int8ToAuxInt(i int8) int64 {
687 func int16ToAuxInt(i int16) int64 {
690 func int32ToAuxInt(i int32) int64 {
693 func int64ToAuxInt(i int64) int64 {
696 func uint8ToAuxInt(i uint8) int64 {
697 return int64(int8(i))
699 func float32ToAuxInt(f float32) int64 {
700 return int64(math.Float64bits(float64(f)))
702 func float64ToAuxInt(f float64) int64 {
703 return int64(math.Float64bits(f))
705 func valAndOffToAuxInt(v ValAndOff) int64 {
708 func arm64BitFieldToAuxInt(v arm64BitField) int64 {
711 func int128ToAuxInt(x int128) int64 {
713 panic("nonzero int128 not allowed")
717 func flagConstantToAuxInt(x flagConstant) int64 {
721 func opToAuxInt(o Op) int64 {
725 // Aux is an interface to hold miscellaneous data in Blocks and Values.
730 // for now only used to mark moves that need to avoid clobbering flags
733 func (auxMark) CanBeAnSSAAux() {}
737 // stringAux wraps string values for use in Aux.
738 type stringAux string
740 func (stringAux) CanBeAnSSAAux() {}
742 func auxToString(i Aux) string {
743 return string(i.(stringAux))
745 func auxToSym(i Aux) Sym {
746 // TODO: kind of a hack - allows nil interface through
750 func auxToType(i Aux) *types.Type {
751 return i.(*types.Type)
753 func auxToCall(i Aux) *AuxCall {
756 func auxToS390xCCMask(i Aux) s390x.CCMask {
757 return i.(s390x.CCMask)
759 func auxToS390xRotateParams(i Aux) s390x.RotateParams {
760 return i.(s390x.RotateParams)
763 func StringToAux(s string) Aux {
766 func symToAux(s Sym) Aux {
769 func callToAux(s *AuxCall) Aux {
772 func typeToAux(t *types.Type) Aux {
775 func s390xCCMaskToAux(c s390x.CCMask) Aux {
778 func s390xRotateParamsToAux(r s390x.RotateParams) Aux {
782 // uaddOvf reports whether unsigned a+b would overflow.
783 func uaddOvf(a, b int64) bool {
784 return uint64(a)+uint64(b) < uint64(a)
787 // loadLSymOffset simulates reading a word at an offset into a
788 // read-only symbol's runtime memory. If it would read a pointer to
789 // another symbol, that symbol is returned. Otherwise, it returns nil.
790 func loadLSymOffset(lsym *obj.LSym, offset int64) *obj.LSym {
791 if lsym.Type != objabi.SRODATA {
795 for _, r := range lsym.R {
796 if int64(r.Off) == offset && r.Type&^objabi.R_WEAK == objabi.R_ADDR && r.Add == 0 {
804 func devirtLECall(v *Value, sym *obj.LSym) *Value {
805 v.Op = OpStaticLECall
806 auxcall := v.Aux.(*AuxCall)
810 copy(v.Args[0:], v.Args[1:])
811 v.Args[len(v.Args)-1] = nil // aid GC
812 v.Args = v.Args[:len(v.Args)-1]
813 if f := v.Block.Func; f.pass.debug > 0 {
814 f.Warnl(v.Pos, "de-virtualizing call")
819 // isSamePtr reports whether p1 and p2 point to the same address.
820 func isSamePtr(p1, p2 *Value) bool {
829 return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
830 case OpAddr, OpLocalAddr:
831 return p1.Aux == p2.Aux
833 return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
838 func isStackPtr(v *Value) bool {
839 for v.Op == OpOffPtr || v.Op == OpAddPtr {
842 return v.Op == OpSP || v.Op == OpLocalAddr
845 // disjoint reports whether the memory region specified by [p1:p1+n1)
846 // does not overlap with [p2:p2+n2).
847 // A return value of false does not imply the regions overlap.
848 func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool {
849 if n1 == 0 || n2 == 0 {
855 baseAndOffset := func(ptr *Value) (base *Value, offset int64) {
856 base, offset = ptr, 0
857 for base.Op == OpOffPtr {
858 offset += base.AuxInt
863 p1, off1 := baseAndOffset(p1)
864 p2, off2 := baseAndOffset(p2)
865 if isSamePtr(p1, p2) {
866 return !overlap(off1, n1, off2, n2)
868 // p1 and p2 are not the same, so if they are both OpAddrs then
869 // they point to different variables.
870 // If one pointer is on the stack and the other is an argument
871 // then they can't overlap.
873 case OpAddr, OpLocalAddr:
874 if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP {
877 return (p2.Op == OpArg || p2.Op == OpArgIntReg) && p1.Args[0].Op == OpSP
878 case OpArg, OpArgIntReg:
879 if p2.Op == OpSP || p2.Op == OpLocalAddr {
883 return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpArgIntReg || p2.Op == OpSP
888 // moveSize returns the number of bytes an aligned MOV instruction moves.
889 func moveSize(align int64, c *Config) int64 {
891 case align%8 == 0 && c.PtrSize == 8:
901 // mergePoint finds a block among a's blocks which dominates b and is itself
902 // dominated by all of a's blocks. Returns nil if it can't find one.
903 // Might return nil even if one does exist.
904 func mergePoint(b *Block, a ...*Value) *Block {
905 // Walk backward from b looking for one of the a's blocks.
911 for _, x := range a {
916 if len(b.Preds) > 1 {
917 // Don't know which way to go back. Abort.
923 return nil // too far away
925 // At this point, r is the first value in a that we find by walking backwards.
926 // if we return anything, r will be it.
929 // Keep going, counting the other a's that we find. They must all dominate r.
932 for _, x := range a {
938 // Found all of a in a backwards walk. We can return r.
941 if len(b.Preds) > 1 {
948 return nil // too far away
951 // clobber invalidates values. Returns true.
952 // clobber is used by rewrite rules to:
954 // A) make sure the values are really dead and never used again.
955 // B) decrement use counts of the values' args.
956 func clobber(vv ...*Value) bool {
957 for _, v := range vv {
959 // Note: leave v.Block intact. The Block field is used after clobber.
964 // clobberIfDead resets v when use count is 1. Returns true.
965 // clobberIfDead is used by rewrite rules to decrement
966 // use counts of v's args when v is dead and never used.
967 func clobberIfDead(v *Value) bool {
971 // Note: leave v.Block intact. The Block field is used after clobberIfDead.
975 // noteRule is an easy way to track if a rule is matched when writing
976 // new ones. Make the rule of interest also conditional on
978 // noteRule("note to self: rule of interest matched")
980 // and that message will print when the rule matches.
981 func noteRule(s string) bool {
986 // countRule increments Func.ruleMatches[key].
987 // If Func.ruleMatches is non-nil at the end
988 // of compilation, it will be printed to stdout.
989 // This is intended to make it easier to find which functions
990 // which contain lots of rules matches when developing new rules.
991 func countRule(v *Value, key string) bool {
993 if f.ruleMatches == nil {
994 f.ruleMatches = make(map[string]int)
1000 // warnRule generates compiler debug output with string s when
1001 // v is not in autogenerated code, cond is true and the rule has fired.
1002 func warnRule(cond bool, v *Value, s string) bool {
1003 if pos := v.Pos; pos.Line() > 1 && cond {
1004 v.Block.Func.Warnl(pos, s)
1009 // for a pseudo-op like (LessThan x), extract x.
1010 func flagArg(v *Value) *Value {
1011 if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() {
1017 // arm64Negate finds the complement to an ARM64 condition code,
1018 // for example !Equal -> NotEqual or !LessThan -> GreaterEqual
1020 // For floating point, it's more subtle because NaN is unordered. We do
1021 // !LessThanF -> NotLessThanF, the latter takes care of NaNs.
1022 func arm64Negate(op Op) Op {
1024 case OpARM64LessThan:
1025 return OpARM64GreaterEqual
1026 case OpARM64LessThanU:
1027 return OpARM64GreaterEqualU
1028 case OpARM64GreaterThan:
1029 return OpARM64LessEqual
1030 case OpARM64GreaterThanU:
1031 return OpARM64LessEqualU
1032 case OpARM64LessEqual:
1033 return OpARM64GreaterThan
1034 case OpARM64LessEqualU:
1035 return OpARM64GreaterThanU
1036 case OpARM64GreaterEqual:
1037 return OpARM64LessThan
1038 case OpARM64GreaterEqualU:
1039 return OpARM64LessThanU
1041 return OpARM64NotEqual
1042 case OpARM64NotEqual:
1044 case OpARM64LessThanF:
1045 return OpARM64NotLessThanF
1046 case OpARM64NotLessThanF:
1047 return OpARM64LessThanF
1048 case OpARM64LessEqualF:
1049 return OpARM64NotLessEqualF
1050 case OpARM64NotLessEqualF:
1051 return OpARM64LessEqualF
1052 case OpARM64GreaterThanF:
1053 return OpARM64NotGreaterThanF
1054 case OpARM64NotGreaterThanF:
1055 return OpARM64GreaterThanF
1056 case OpARM64GreaterEqualF:
1057 return OpARM64NotGreaterEqualF
1058 case OpARM64NotGreaterEqualF:
1059 return OpARM64GreaterEqualF
1061 panic("unreachable")
1065 // arm64Invert evaluates (InvertFlags op), which
1066 // is the same as altering the condition codes such
1067 // that the same result would be produced if the arguments
1068 // to the flag-generating instruction were reversed, e.g.
1069 // (InvertFlags (CMP x y)) -> (CMP y x)
1070 func arm64Invert(op Op) Op {
1072 case OpARM64LessThan:
1073 return OpARM64GreaterThan
1074 case OpARM64LessThanU:
1075 return OpARM64GreaterThanU
1076 case OpARM64GreaterThan:
1077 return OpARM64LessThan
1078 case OpARM64GreaterThanU:
1079 return OpARM64LessThanU
1080 case OpARM64LessEqual:
1081 return OpARM64GreaterEqual
1082 case OpARM64LessEqualU:
1083 return OpARM64GreaterEqualU
1084 case OpARM64GreaterEqual:
1085 return OpARM64LessEqual
1086 case OpARM64GreaterEqualU:
1087 return OpARM64LessEqualU
1088 case OpARM64Equal, OpARM64NotEqual:
1090 case OpARM64LessThanF:
1091 return OpARM64GreaterThanF
1092 case OpARM64GreaterThanF:
1093 return OpARM64LessThanF
1094 case OpARM64LessEqualF:
1095 return OpARM64GreaterEqualF
1096 case OpARM64GreaterEqualF:
1097 return OpARM64LessEqualF
1098 case OpARM64NotLessThanF:
1099 return OpARM64NotGreaterThanF
1100 case OpARM64NotGreaterThanF:
1101 return OpARM64NotLessThanF
1102 case OpARM64NotLessEqualF:
1103 return OpARM64NotGreaterEqualF
1104 case OpARM64NotGreaterEqualF:
1105 return OpARM64NotLessEqualF
1107 panic("unreachable")
1111 // evaluate an ARM64 op against a flags value
1112 // that is potentially constant; return 1 for true,
1113 // -1 for false, and 0 for not constant.
1114 func ccARM64Eval(op Op, flags *Value) int {
1116 if fop == OpARM64InvertFlags {
1117 return -ccARM64Eval(op, flags.Args[0])
1119 if fop != OpARM64FlagConstant {
1122 fc := flagConstant(flags.AuxInt)
1123 b2i := func(b bool) int {
1132 case OpARM64NotEqual:
1134 case OpARM64LessThan:
1136 case OpARM64LessThanU:
1137 return b2i(fc.ult())
1138 case OpARM64GreaterThan:
1140 case OpARM64GreaterThanU:
1141 return b2i(fc.ugt())
1142 case OpARM64LessEqual:
1144 case OpARM64LessEqualU:
1145 return b2i(fc.ule())
1146 case OpARM64GreaterEqual:
1148 case OpARM64GreaterEqualU:
1149 return b2i(fc.uge())
1154 // logRule logs the use of the rule s. This will only be enabled if
1155 // rewrite rules were generated with the -log option, see _gen/rulegen.go.
1156 func logRule(s string) {
1157 if ruleFile == nil {
1158 // Open a log file to write log to. We open in append
1159 // mode because all.bash runs the compiler lots of times,
1160 // and we want the concatenation of all of those logs.
1161 // This means, of course, that users need to rm the old log
1162 // to get fresh data.
1163 // TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow?
1164 w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"),
1165 os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
1171 _, err := fmt.Fprintln(ruleFile, s)
1177 var ruleFile io.Writer
1179 func min(x, y int64) int64 {
1186 func isConstZero(v *Value) bool {
1190 case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F:
1191 return v.AuxInt == 0
1196 // reciprocalExact64 reports whether 1/c is exactly representable.
1197 func reciprocalExact64(c float64) bool {
1198 b := math.Float64bits(c)
1199 man := b & (1<<52 - 1)
1201 return false // not a power of 2, denormal, or NaN
1203 exp := b >> 52 & (1<<11 - 1)
1204 // exponent bias is 0x3ff. So taking the reciprocal of a number
1205 // changes the exponent to 0x7fe-exp.
1210 return false // ±inf
1212 return false // exponent is not representable
1218 // reciprocalExact32 reports whether 1/c is exactly representable.
1219 func reciprocalExact32(c float32) bool {
1220 b := math.Float32bits(c)
1221 man := b & (1<<23 - 1)
1223 return false // not a power of 2, denormal, or NaN
1225 exp := b >> 23 & (1<<8 - 1)
1226 // exponent bias is 0x7f. So taking the reciprocal of a number
1227 // changes the exponent to 0xfe-exp.
1232 return false // ±inf
1234 return false // exponent is not representable
1240 // check if an immediate can be directly encoded into an ARM's instruction.
1241 func isARMImmRot(v uint32) bool {
1242 for i := 0; i < 16; i++ {
1252 // overlap reports whether the ranges given by the given offset and
1253 // size pairs overlap.
1254 func overlap(offset1, size1, offset2, size2 int64) bool {
1255 if offset1 >= offset2 && offset2+size2 > offset1 {
1258 if offset2 >= offset1 && offset1+size1 > offset2 {
1264 func areAdjacentOffsets(off1, off2, size int64) bool {
1265 return off1+size == off2 || off1 == off2+size
1268 // check if value zeroes out upper 32-bit of 64-bit register.
1269 // depth limits recursion depth. In AMD64.rules 3 is used as limit,
1270 // because it catches same amount of cases as 4.
1271 func zeroUpper32Bits(x *Value, depth int) bool {
1273 case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1,
1274 OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1,
1275 OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload,
1276 OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL,
1277 OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst,
1278 OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst,
1279 OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL,
1280 OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst,
1281 OpAMD64SHLL, OpAMD64SHLLconst:
1283 case OpARM64REV16W, OpARM64REVW, OpARM64RBITW, OpARM64CLZW, OpARM64EXTRWconst,
1284 OpARM64MULW, OpARM64MNEGW, OpARM64UDIVW, OpARM64DIVW, OpARM64UMODW,
1285 OpARM64MADDW, OpARM64MSUBW, OpARM64RORW, OpARM64RORWconst:
1288 return x.Type.Size() == 4
1289 case OpPhi, OpSelect0, OpSelect1:
1290 // Phis can use each-other as an arguments, instead of tracking visited values,
1291 // just limit recursion depth.
1295 for i := range x.Args {
1296 if !zeroUpper32Bits(x.Args[i], depth-1) {
1306 // zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits.
1307 func zeroUpper48Bits(x *Value, depth int) bool {
1309 case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2:
1312 return x.Type.Size() == 2
1313 case OpPhi, OpSelect0, OpSelect1:
1314 // Phis can use each-other as an arguments, instead of tracking visited values,
1315 // just limit recursion depth.
1319 for i := range x.Args {
1320 if !zeroUpper48Bits(x.Args[i], depth-1) {
1330 // zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits.
1331 func zeroUpper56Bits(x *Value, depth int) bool {
1333 case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1:
1336 return x.Type.Size() == 1
1337 case OpPhi, OpSelect0, OpSelect1:
1338 // Phis can use each-other as an arguments, instead of tracking visited values,
1339 // just limit recursion depth.
1343 for i := range x.Args {
1344 if !zeroUpper56Bits(x.Args[i], depth-1) {
1354 func isInlinableMemclr(c *Config, sz int64) bool {
1358 // TODO: expand this check to allow other architectures
1359 // see CL 454255 and issue 56997
1361 case "amd64", "arm64":
1363 case "ppc64le", "ppc64":
1369 // isInlinableMemmove reports whether the given arch performs a Move of the given size
1370 // faster than memmove. It will only return true if replacing the memmove with a Move is
1371 // safe, either because Move will do all of its loads before any of its stores, or
1372 // because the arguments are known to be disjoint.
1373 // This is used as a check for replacing memmove with Move ops.
1374 func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
1375 // It is always safe to convert memmove into Move when its arguments are disjoint.
1376 // Move ops may or may not be faster for large sizes depending on how the platform
1377 // lowers them, so we only perform this optimization on platforms that we know to
1378 // have fast Move ops.
1381 return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz))
1382 case "386", "arm64":
1384 case "s390x", "ppc64", "ppc64le":
1385 return sz <= 8 || disjoint(dst, sz, src, sz)
1386 case "arm", "loong64", "mips", "mips64", "mipsle", "mips64le":
1391 func IsInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
1392 return isInlinableMemmove(dst, src, sz, c)
1395 // logLargeCopy logs the occurrence of a large copy.
1396 // The best place to do this is in the rewrite rules where the size of the move is easy to find.
1397 // "Large" is arbitrarily chosen to be 128 bytes; this may change.
1398 func logLargeCopy(v *Value, s int64) bool {
1402 if logopt.Enabled() {
1403 logopt.LogOpt(v.Pos, "copy", "lower", v.Block.Func.Name, fmt.Sprintf("%d bytes", s))
1407 func LogLargeCopy(funcName string, pos src.XPos, s int64) {
1411 if logopt.Enabled() {
1412 logopt.LogOpt(pos, "copy", "lower", funcName, fmt.Sprintf("%d bytes", s))
1416 // hasSmallRotate reports whether the architecture has rotate instructions
1417 // for sizes < 32-bit. This is used to decide whether to promote some rotations.
1418 func hasSmallRotate(c *Config) bool {
1420 case "amd64", "386":
1427 func newPPC64ShiftAuxInt(sh, mb, me, sz int64) int32 {
1428 if sh < 0 || sh >= sz {
1429 panic("PPC64 shift arg sh out of range")
1431 if mb < 0 || mb >= sz {
1432 panic("PPC64 shift arg mb out of range")
1434 if me < 0 || me >= sz {
1435 panic("PPC64 shift arg me out of range")
1437 return int32(sh<<16 | mb<<8 | me)
1440 func GetPPC64Shiftsh(auxint int64) int64 {
1441 return int64(int8(auxint >> 16))
1444 func GetPPC64Shiftmb(auxint int64) int64 {
1445 return int64(int8(auxint >> 8))
1448 func GetPPC64Shiftme(auxint int64) int64 {
1449 return int64(int8(auxint))
1452 // Test if this value can encoded as a mask for a rlwinm like
1453 // operation. Masks can also extend from the msb and wrap to
1454 // the lsb too. That is, the valid masks are 32 bit strings
1455 // of the form: 0..01..10..0 or 1..10..01..1 or 1...1
1456 func isPPC64WordRotateMask(v64 int64) bool {
1457 // Isolate rightmost 1 (if none 0) and add.
1460 // Likewise, for the wrapping case.
1462 vpn := (vn & -vn) + vn
1463 return (v&vp == 0 || vn&vpn == 0) && v != 0
1466 // Compress mask and shift into single value of the form
1467 // me | mb<<8 | rotate<<16 | nbits<<24 where me and mb can
1468 // be used to regenerate the input mask.
1469 func encodePPC64RotateMask(rotate, mask, nbits int64) int64 {
1470 var mb, me, mbn, men int
1472 // Determine boundaries and then decode them
1473 if mask == 0 || ^mask == 0 || rotate >= nbits {
1474 panic("Invalid PPC64 rotate mask")
1475 } else if nbits == 32 {
1476 mb = bits.LeadingZeros32(uint32(mask))
1477 me = 32 - bits.TrailingZeros32(uint32(mask))
1478 mbn = bits.LeadingZeros32(^uint32(mask))
1479 men = 32 - bits.TrailingZeros32(^uint32(mask))
1481 mb = bits.LeadingZeros64(uint64(mask))
1482 me = 64 - bits.TrailingZeros64(uint64(mask))
1483 mbn = bits.LeadingZeros64(^uint64(mask))
1484 men = 64 - bits.TrailingZeros64(^uint64(mask))
1486 // Check for a wrapping mask (e.g bits at 0 and 63)
1487 if mb == 0 && me == int(nbits) {
1488 // swap the inverted values
1492 return int64(me) | int64(mb<<8) | int64(rotate<<16) | int64(nbits<<24)
1495 // DecodePPC64RotateMask is the inverse operation of encodePPC64RotateMask. The values returned as
1496 // mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask.
1497 func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) {
1498 auxint := uint64(sauxint)
1499 rotate = int64((auxint >> 16) & 0xFF)
1500 mb = int64((auxint >> 8) & 0xFF)
1501 me = int64((auxint >> 0) & 0xFF)
1502 nbits := int64((auxint >> 24) & 0xFF)
1503 mask = ((1 << uint(nbits-mb)) - 1) ^ ((1 << uint(nbits-me)) - 1)
1508 mask = uint64(uint32(mask))
1511 // Fixup ME to match ISA definition. The second argument to MASK(..,me)
1513 me = (me - 1) & (nbits - 1)
1517 // This verifies that the mask is a set of
1518 // consecutive bits including the least
1520 func isPPC64ValidShiftMask(v int64) bool {
1521 if (v != 0) && ((v+1)&v) == 0 {
1527 func getPPC64ShiftMaskLength(v int64) int64 {
1528 return int64(bits.Len64(uint64(v)))
1531 // Decompose a shift right into an equivalent rotate/mask,
1532 // and return mask & m.
1533 func mergePPC64RShiftMask(m, s, nbits int64) int64 {
1534 smask := uint64((1<<uint(nbits))-1) >> uint(s)
1535 return m & int64(smask)
1538 // Combine (ANDconst [m] (SRWconst [s])) into (RLWINM [y]) or return 0
1539 func mergePPC64AndSrwi(m, s int64) int64 {
1540 mask := mergePPC64RShiftMask(m, s, 32)
1541 if !isPPC64WordRotateMask(mask) {
1544 return encodePPC64RotateMask((32-s)&31, mask, 32)
1547 // Test if a shift right feeding into a CLRLSLDI can be merged into RLWINM.
1548 // Return the encoded RLWINM constant, or 0 if they cannot be merged.
1549 func mergePPC64ClrlsldiSrw(sld, srw int64) int64 {
1550 mask_1 := uint64(0xFFFFFFFF >> uint(srw))
1551 // for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left.
1552 mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
1554 // Rewrite mask to apply after the final left shift.
1555 mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
1558 r_2 := GetPPC64Shiftsh(sld)
1559 r_3 := (r_1 + r_2) & 31 // This can wrap.
1561 if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
1564 return encodePPC64RotateMask(int64(r_3), int64(mask_3), 32)
1567 // Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM. Return
1568 // the encoded RLWINM constant, or 0 if they cannot be merged.
1569 func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 {
1570 r_1, _, _, mask_1 := DecodePPC64RotateMask(rlw)
1571 // for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left.
1572 mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
1574 // combine the masks, and adjust for the final left shift.
1575 mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(int64(sld)))
1576 r_2 := GetPPC64Shiftsh(int64(sld))
1577 r_3 := (r_1 + r_2) & 31 // This can wrap.
1579 // Verify the result is still a valid bitmask of <= 32 bits.
1580 if !isPPC64WordRotateMask(int64(mask_3)) || uint64(uint32(mask_3)) != mask_3 {
1583 return encodePPC64RotateMask(r_3, int64(mask_3), 32)
1586 // Compute the encoded RLWINM constant from combining (SLDconst [sld] (SRWconst [srw] x)),
1587 // or return 0 if they cannot be combined.
1588 func mergePPC64SldiSrw(sld, srw int64) int64 {
1589 if sld > srw || srw >= 32 {
1592 mask_r := uint32(0xFFFFFFFF) >> uint(srw)
1593 mask_l := uint32(0xFFFFFFFF) >> uint(sld)
1594 mask := (mask_r & mask_l) << uint(sld)
1595 return encodePPC64RotateMask((32-srw+sld)&31, int64(mask), 32)
1598 // Convenience function to rotate a 32 bit constant value by another constant.
1599 func rotateLeft32(v, rotate int64) int64 {
1600 return int64(bits.RotateLeft32(uint32(v), int(rotate)))
1603 func rotateRight64(v, rotate int64) int64 {
1604 return int64(bits.RotateLeft64(uint64(v), int(-rotate)))
1607 // encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format.
1608 func armBFAuxInt(lsb, width int64) arm64BitField {
1609 if lsb < 0 || lsb > 63 {
1610 panic("ARM(64) bit field lsb constant out of range")
1612 if width < 1 || lsb+width > 64 {
1613 panic("ARM(64) bit field width constant out of range")
1615 return arm64BitField(width | lsb<<8)
1618 // returns the lsb part of the auxInt field of arm64 bitfield ops.
1619 func (bfc arm64BitField) getARM64BFlsb() int64 {
1620 return int64(uint64(bfc) >> 8)
1623 // returns the width part of the auxInt field of arm64 bitfield ops.
1624 func (bfc arm64BitField) getARM64BFwidth() int64 {
1625 return int64(bfc) & 0xff
1628 // checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask.
1629 func isARM64BFMask(lsb, mask, rshift int64) bool {
1630 shiftedMask := int64(uint64(mask) >> uint64(rshift))
1631 return shiftedMask != 0 && isPowerOfTwo64(shiftedMask+1) && nto(shiftedMask)+lsb < 64
1634 // returns the bitfield width of mask >> rshift for arm64 bitfield ops.
1635 func arm64BFWidth(mask, rshift int64) int64 {
1636 shiftedMask := int64(uint64(mask) >> uint64(rshift))
1637 if shiftedMask == 0 {
1638 panic("ARM64 BF mask is zero")
1640 return nto(shiftedMask)
1643 // sizeof returns the size of t in bytes.
1644 // It will panic if t is not a *types.Type.
1645 func sizeof(t interface{}) int64 {
1646 return t.(*types.Type).Size()
1649 // registerizable reports whether t is a primitive type that fits in
1650 // a register. It assumes float64 values will always fit into registers
1651 // even if that isn't strictly true.
1652 func registerizable(b *Block, typ *types.Type) bool {
1653 if typ.IsPtrShaped() || typ.IsFloat() || typ.IsBoolean() {
1656 if typ.IsInteger() {
1657 return typ.Size() <= b.Func.Config.RegSize
1662 // needRaceCleanup reports whether this call to racefuncenter/exit isn't needed.
1663 func needRaceCleanup(sym *AuxCall, v *Value) bool {
1668 if !isSameCall(sym, "runtime.racefuncenter") && !isSameCall(sym, "runtime.racefuncexit") {
1671 for _, b := range f.Blocks {
1672 for _, v := range b.Values {
1674 case OpStaticCall, OpStaticLECall:
1675 // Check for racefuncenter will encounter racefuncexit and vice versa.
1676 // Allow calls to panic*
1677 s := v.Aux.(*AuxCall).Fn.String()
1679 case "runtime.racefuncenter", "runtime.racefuncexit",
1680 "runtime.panicdivide", "runtime.panicwrap",
1681 "runtime.panicshift":
1684 // If we encountered any call, we need to keep racefunc*,
1685 // for accurate stacktraces.
1687 case OpPanicBounds, OpPanicExtend:
1688 // Note: these are panic generators that are ok (like the static calls above).
1689 case OpClosureCall, OpInterCall, OpClosureLECall, OpInterLECall:
1690 // We must keep the race functions if there are any other call types.
1695 if isSameCall(sym, "runtime.racefuncenter") {
1696 // TODO REGISTER ABI this needs to be cleaned up.
1697 // If we're removing racefuncenter, remove its argument as well.
1698 if v.Args[0].Op != OpStore {
1699 if v.Op == OpStaticLECall {
1700 // there is no store, yet.
1705 mem := v.Args[0].Args[2]
1706 v.Args[0].reset(OpCopy)
1707 v.Args[0].AddArg(mem)
1712 // symIsRO reports whether sym is a read-only global.
1713 func symIsRO(sym interface{}) bool {
1714 lsym := sym.(*obj.LSym)
1715 return lsym.Type == objabi.SRODATA && len(lsym.R) == 0
1718 // symIsROZero reports whether sym is a read-only global whose data contains all zeros.
1719 func symIsROZero(sym Sym) bool {
1720 lsym := sym.(*obj.LSym)
1721 if lsym.Type != objabi.SRODATA || len(lsym.R) != 0 {
1724 for _, b := range lsym.P {
1732 // isFixed32 returns true if the int32 at offset off in symbol sym
1733 // is known and constant.
1734 func isFixed32(c *Config, sym Sym, off int64) bool {
1735 return isFixed(c, sym, off, 4)
1738 // isFixed returns true if the range [off,off+size] of the symbol sym
1739 // is known and constant.
1740 func isFixed(c *Config, sym Sym, off, size int64) bool {
1741 lsym := sym.(*obj.LSym)
1742 if lsym.Extra == nil {
1745 if _, ok := (*lsym.Extra).(*obj.TypeInfo); ok {
1746 if off == 2*c.PtrSize && size == 4 {
1747 return true // type hash field
1752 func fixed32(c *Config, sym Sym, off int64) int32 {
1753 lsym := sym.(*obj.LSym)
1754 if ti, ok := (*lsym.Extra).(*obj.TypeInfo); ok {
1755 if off == 2*c.PtrSize {
1756 return int32(types.TypeHash(ti.Type.(*types.Type)))
1759 base.Fatalf("fixed32 data not known for %s:%d", sym, off)
1763 // isFixedSym returns true if the contents of sym at the given offset
1764 // is known and is the constant address of another symbol.
1765 func isFixedSym(sym Sym, off int64) bool {
1766 lsym := sym.(*obj.LSym)
1768 case lsym.Type == objabi.SRODATA:
1769 // itabs, dictionaries
1773 for _, r := range lsym.R {
1774 if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off && r.Add == 0 {
1780 func fixedSym(f *Func, sym Sym, off int64) Sym {
1781 lsym := sym.(*obj.LSym)
1782 for _, r := range lsym.R {
1783 if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off {
1784 if strings.HasPrefix(r.Sym.Name, "type:") {
1785 // In case we're loading a type out of a dictionary, we need to record
1786 // that the containing function might put that type in an interface.
1787 // That information is currently recorded in relocations in the dictionary,
1788 // but if we perform this load at compile time then the dictionary
1790 reflectdata.MarkTypeSymUsedInInterface(r.Sym, f.fe.Func().Linksym())
1795 base.Fatalf("fixedSym data not known for %s:%d", sym, off)
1799 // read8 reads one byte from the read-only global sym at offset off.
1800 func read8(sym interface{}, off int64) uint8 {
1801 lsym := sym.(*obj.LSym)
1802 if off >= int64(len(lsym.P)) || off < 0 {
1803 // Invalid index into the global sym.
1804 // This can happen in dead code, so we don't want to panic.
1805 // Just return any value, it will eventually get ignored.
1812 // read16 reads two bytes from the read-only global sym at offset off.
1813 func read16(sym interface{}, off int64, byteorder binary.ByteOrder) uint16 {
1814 lsym := sym.(*obj.LSym)
1815 // lsym.P is written lazily.
1816 // Bytes requested after the end of lsym.P are 0.
1818 if 0 <= off && off < int64(len(lsym.P)) {
1821 buf := make([]byte, 2)
1823 return byteorder.Uint16(buf)
1826 // read32 reads four bytes from the read-only global sym at offset off.
1827 func read32(sym interface{}, off int64, byteorder binary.ByteOrder) uint32 {
1828 lsym := sym.(*obj.LSym)
1830 if 0 <= off && off < int64(len(lsym.P)) {
1833 buf := make([]byte, 4)
1835 return byteorder.Uint32(buf)
1838 // read64 reads eight bytes from the read-only global sym at offset off.
1839 func read64(sym interface{}, off int64, byteorder binary.ByteOrder) uint64 {
1840 lsym := sym.(*obj.LSym)
1842 if 0 <= off && off < int64(len(lsym.P)) {
1845 buf := make([]byte, 8)
1847 return byteorder.Uint64(buf)
1850 // sequentialAddresses reports true if it can prove that x + n == y
1851 func sequentialAddresses(x, y *Value, n int64) bool {
1852 if x == y && n == 0 {
1855 if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil &&
1856 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1857 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1860 if x.Op == Op386LEAL1 && y.Op == Op386LEAL1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
1861 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1862 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1865 if x.Op == OpAMD64ADDQ && y.Op == OpAMD64LEAQ1 && y.AuxInt == n && y.Aux == nil &&
1866 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1867 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1870 if x.Op == OpAMD64LEAQ1 && y.Op == OpAMD64LEAQ1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
1871 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1872 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1878 // flagConstant represents the result of a compile-time comparison.
1879 // The sense of these flags does not necessarily represent the hardware's notion
1880 // of a flags register - these are just a compile-time construct.
1881 // We happen to match the semantics to those of arm/arm64.
1882 // Note that these semantics differ from x86: the carry flag has the opposite
1883 // sense on a subtraction!
1885 // On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C.
1886 // On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C.
1887 // (because it does x + ^y + C).
1889 // See https://en.wikipedia.org/wiki/Carry_flag#Vs._borrow_flag
1890 type flagConstant uint8
1892 // N reports whether the result of an operation is negative (high bit set).
1893 func (fc flagConstant) N() bool {
1897 // Z reports whether the result of an operation is 0.
1898 func (fc flagConstant) Z() bool {
1902 // C reports whether an unsigned add overflowed (carry), or an
1903 // unsigned subtract did not underflow (borrow).
1904 func (fc flagConstant) C() bool {
1908 // V reports whether a signed operation overflowed or underflowed.
1909 func (fc flagConstant) V() bool {
1913 func (fc flagConstant) eq() bool {
1916 func (fc flagConstant) ne() bool {
1919 func (fc flagConstant) lt() bool {
1920 return fc.N() != fc.V()
1922 func (fc flagConstant) le() bool {
1923 return fc.Z() || fc.lt()
1925 func (fc flagConstant) gt() bool {
1926 return !fc.Z() && fc.ge()
1928 func (fc flagConstant) ge() bool {
1929 return fc.N() == fc.V()
1931 func (fc flagConstant) ult() bool {
1934 func (fc flagConstant) ule() bool {
1935 return fc.Z() || fc.ult()
1937 func (fc flagConstant) ugt() bool {
1938 return !fc.Z() && fc.uge()
1940 func (fc flagConstant) uge() bool {
1944 func (fc flagConstant) ltNoov() bool {
1945 return fc.lt() && !fc.V()
1947 func (fc flagConstant) leNoov() bool {
1948 return fc.le() && !fc.V()
1950 func (fc flagConstant) gtNoov() bool {
1951 return fc.gt() && !fc.V()
1953 func (fc flagConstant) geNoov() bool {
1954 return fc.ge() && !fc.V()
1957 func (fc flagConstant) String() string {
1958 return fmt.Sprintf("N=%v,Z=%v,C=%v,V=%v", fc.N(), fc.Z(), fc.C(), fc.V())
1961 type flagConstantBuilder struct {
1968 func (fcs flagConstantBuilder) encode() flagConstant {
1985 // Note: addFlags(x,y) != subFlags(x,-y) in some situations:
1986 // - the results of the C flag are different
1987 // - the results of the V flag when y==minint are different
1989 // addFlags64 returns the flags that would be set from computing x+y.
1990 func addFlags64(x, y int64) flagConstant {
1991 var fcb flagConstantBuilder
1994 fcb.C = uint64(x+y) < uint64(x)
1995 fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
1999 // subFlags64 returns the flags that would be set from computing x-y.
2000 func subFlags64(x, y int64) flagConstant {
2001 var fcb flagConstantBuilder
2004 fcb.C = uint64(y) <= uint64(x) // This code follows the arm carry flag model.
2005 fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
2009 // addFlags32 returns the flags that would be set from computing x+y.
2010 func addFlags32(x, y int32) flagConstant {
2011 var fcb flagConstantBuilder
2014 fcb.C = uint32(x+y) < uint32(x)
2015 fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
2019 // subFlags32 returns the flags that would be set from computing x-y.
2020 func subFlags32(x, y int32) flagConstant {
2021 var fcb flagConstantBuilder
2024 fcb.C = uint32(y) <= uint32(x) // This code follows the arm carry flag model.
2025 fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
2029 // logicFlags64 returns flags set to the sign/zeroness of x.
2030 // C and V are set to false.
2031 func logicFlags64(x int64) flagConstant {
2032 var fcb flagConstantBuilder
2038 // logicFlags32 returns flags set to the sign/zeroness of x.
2039 // C and V are set to false.
2040 func logicFlags32(x int32) flagConstant {
2041 var fcb flagConstantBuilder
2047 func makeJumpTableSym(b *Block) *obj.LSym {
2048 s := base.Ctxt.Lookup(fmt.Sprintf("%s.jump%d", b.Func.fe.Func().LSym.Name, b.ID))
2049 s.Set(obj.AttrDuplicateOK, true)
2050 s.Set(obj.AttrLocal, true)
2054 // canRotate reports whether the architecture supports
2055 // rotates of integer registers with the given number of bits.
2056 func canRotate(c *Config, bits int64) bool {
2057 if bits > c.PtrSize*8 {
2058 // Don't rewrite to rotates bigger than the machine word.
2062 case "386", "amd64", "arm64":
2064 case "arm", "s390x", "ppc64", "ppc64le", "wasm", "loong64":
2071 // isARM64bitcon reports whether a constant can be encoded into a logical instruction.
2072 func isARM64bitcon(x uint64) bool {
2073 if x == 1<<64-1 || x == 0 {
2076 // determine the period and sign-extend a unit to 64 bits
2078 case x != x>>32|x<<32:
2081 case x != x>>16|x<<48:
2083 x = uint64(int64(int32(x)))
2084 case x != x>>8|x<<56:
2086 x = uint64(int64(int16(x)))
2087 case x != x>>4|x<<60:
2089 x = uint64(int64(int8(x)))
2091 // period is 4 or 2, always true
2092 // 0001, 0010, 0100, 1000 -- 0001 rotate
2093 // 0011, 0110, 1100, 1001 -- 0011 rotate
2094 // 0111, 1011, 1101, 1110 -- 0111 rotate
2095 // 0101, 1010 -- 01 rotate, repeat
2098 return sequenceOfOnes(x) || sequenceOfOnes(^x)
2101 // sequenceOfOnes tests whether a constant is a sequence of ones in binary, with leading and trailing zeros.
2102 func sequenceOfOnes(x uint64) bool {
2103 y := x & -x // lowest set bit of x. x is good iff x+y is a power of 2
2108 // isARM64addcon reports whether x can be encoded as the immediate value in an ADD or SUB instruction.
2109 func isARM64addcon(v int64) bool {
2110 /* uimm12 or uimm24? */
2114 if (v & 0xFFF) == 0 {