1 // Copyright 2015 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
8 "cmd/compile/internal/base"
9 "cmd/compile/internal/logopt"
10 "cmd/compile/internal/reflectdata"
11 "cmd/compile/internal/types"
13 "cmd/internal/obj/s390x"
27 type deadValueChoice bool
30 leaveDeadValues deadValueChoice = false
31 removeDeadValues = true
34 // deadcode indicates whether rewrite should try to remove any values that become dead.
35 func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValueChoice) {
36 // repeat rewrites until we find no more rewrites
37 pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block
41 fmt.Printf("%s: rewriting for %s\n", f.pass.name, f.Name)
44 var states map[string]bool
48 for _, b := range f.Blocks {
53 b0.Succs = append([]Edge{}, b.Succs...) // make a new copy, not aliasing
55 for i, c := range b.ControlValues() {
58 b.ReplaceControl(i, c)
64 fmt.Printf("rewriting %s -> %s\n", b0.LongString(), b.LongString())
67 for j, v := range b.Values {
72 v0.Args = append([]*Value{}, v.Args...) // make a new copy, not aliasing
74 if v.Uses == 0 && v.removeable() {
75 if v.Op != OpInvalid && deadcode == removeDeadValues {
76 // Reset any values that are now unused, so that we decrement
77 // the use count of all of its arguments.
78 // Not quite a deadcode pass, because it does not handle cycles.
79 // But it should help Uses==1 rules to fire.
83 // No point rewriting values which aren't used.
87 vchange := phielimValue(v)
88 if vchange && debug > 1 {
89 fmt.Printf("rewriting %s -> %s\n", v0.LongString(), v.LongString())
92 // Eliminate copy inputs.
93 // If any copy input becomes unused, mark it
94 // as invalid and discard its argument. Repeat
95 // recursively on the discarded argument.
96 // This phase helps remove phantom "dead copy" uses
97 // of a value so that a x.Uses==1 rule condition
99 for i, a := range v.Args {
105 // If a, a copy, has a line boundary indicator, attempt to find a new value
106 // to hold it. The first candidate is the value that will replace a (aa),
107 // if it shares the same block and line and is eligible.
108 // The second option is v, which has a as an input. Because aa is earlier in
109 // the data flow, it is the better choice.
110 if a.Pos.IsStmt() == src.PosIsStmt {
111 if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt {
112 aa.Pos = aa.Pos.WithIsStmt()
113 } else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt {
114 v.Pos = v.Pos.WithIsStmt()
116 // Record the lost line and look for a new home after all rewrites are complete.
117 // TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same
118 // line to appear in more than one block, but only one block is stored, so if both end
119 // up here, then one will be lost.
120 pendingLines.set(a.Pos, int32(a.Block.ID))
122 a.Pos = a.Pos.WithNotStmt()
131 if vchange && debug > 1 {
132 fmt.Printf("rewriting %s -> %s\n", v0.LongString(), v.LongString())
135 // apply rewrite function
138 // If value changed to a poor choice for a statement boundary, move the boundary
139 if v.Pos.IsStmt() == src.PosIsStmt {
140 if k := nextGoodStatementIndex(v, j, b); k != j {
141 v.Pos = v.Pos.WithNotStmt()
142 b.Values[k].Pos = b.Values[k].Pos.WithIsStmt()
147 change = change || vchange
148 if vchange && debug > 1 {
149 fmt.Printf("rewriting %s -> %s\n", v0.LongString(), v.LongString())
153 if !change && !deadChange {
157 if (iters > 1000 || debug >= 2) && change {
158 // We've done a suspiciously large number of rewrites (or we're in debug mode).
159 // As of Sep 2021, 90% of rewrites complete in 4 iterations or fewer
160 // and the maximum value encountered during make.bash is 12.
161 // Start checking for cycles. (This is too expensive to do routinely.)
162 // Note: we avoid this path for deadChange-only iterations, to fix #51639.
164 states = make(map[string]bool)
167 if _, ok := states[h]; ok {
168 // We've found a cycle.
169 // To diagnose it, set debug to 2 and start again,
170 // so that we'll print all rules applied until we complete another cycle.
171 // If debug is already >= 2, we've already done that, so it's time to crash.
174 states = make(map[string]bool)
176 f.Fatalf("rewrite cycle detected")
182 // remove clobbered values
183 for _, b := range f.Blocks {
185 for i, v := range b.Values {
187 if v.Op == OpInvalid {
188 if v.Pos.IsStmt() == src.PosIsStmt {
189 pendingLines.set(vl, int32(b.ID))
194 if v.Pos.IsStmt() != src.PosNotStmt && !notStmtBoundary(v.Op) && pendingLines.get(vl) == int32(b.ID) {
195 pendingLines.remove(vl)
196 v.Pos = v.Pos.WithIsStmt()
203 if pendingLines.get(b.Pos) == int32(b.ID) {
204 b.Pos = b.Pos.WithIsStmt()
205 pendingLines.remove(b.Pos)
211 // Common functions called from rewriting rules
213 func is64BitFloat(t *types.Type) bool {
214 return t.Size() == 8 && t.IsFloat()
217 func is32BitFloat(t *types.Type) bool {
218 return t.Size() == 4 && t.IsFloat()
221 func is64BitInt(t *types.Type) bool {
222 return t.Size() == 8 && t.IsInteger()
225 func is32BitInt(t *types.Type) bool {
226 return t.Size() == 4 && t.IsInteger()
229 func is16BitInt(t *types.Type) bool {
230 return t.Size() == 2 && t.IsInteger()
233 func is8BitInt(t *types.Type) bool {
234 return t.Size() == 1 && t.IsInteger()
237 func isPtr(t *types.Type) bool {
238 return t.IsPtrShaped()
241 // mergeSym merges two symbolic offsets. There is no real merging of
242 // offsets, we just pick the non-nil one.
243 func mergeSym(x, y Sym) Sym {
250 panic(fmt.Sprintf("mergeSym with two non-nil syms %v %v", x, y))
253 func canMergeSym(x, y Sym) bool {
254 return x == nil || y == nil
257 // canMergeLoadClobber reports whether the load can be merged into target without
258 // invalidating the schedule.
259 // It also checks that the other non-load argument x is something we
260 // are ok with clobbering.
261 func canMergeLoadClobber(target, load, x *Value) bool {
262 // The register containing x is going to get clobbered.
263 // Don't merge if we still need the value of x.
264 // We don't have liveness information here, but we can
265 // approximate x dying with:
266 // 1) target is x's only use.
267 // 2) target is not in a deeper loop than x.
271 loopnest := x.Block.Func.loopnest()
272 loopnest.calculateDepths()
273 if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
276 return canMergeLoad(target, load)
279 // canMergeLoad reports whether the load can be merged into target without
280 // invalidating the schedule.
281 func canMergeLoad(target, load *Value) bool {
282 if target.Block.ID != load.Block.ID {
283 // If the load is in a different block do not merge it.
287 // We can't merge the load into the target if the load
288 // has more than one use.
293 mem := load.MemoryArg()
295 // We need the load's memory arg to still be alive at target. That
296 // can't be the case if one of target's args depends on a memory
297 // state that is a successor of load's memory arg.
299 // For example, it would be invalid to merge load into target in
300 // the following situation because newmem has killed oldmem
301 // before target is reached:
302 // load = read ... oldmem
303 // newmem = write ... oldmem
304 // arg0 = read ... newmem
305 // target = add arg0 load
307 // If the argument comes from a different block then we can exclude
308 // it immediately because it must dominate load (which is in the
309 // same block as target).
311 for _, a := range target.Args {
312 if a != load && a.Block.ID == target.Block.ID {
313 args = append(args, a)
317 // memPreds contains memory states known to be predecessors of load's
318 // memory state. It is lazily initialized.
319 var memPreds map[*Value]bool
320 for i := 0; len(args) > 0; i++ {
323 // Give up if we have done a lot of iterations.
326 v := args[len(args)-1]
327 args = args[:len(args)-1]
328 if target.Block.ID != v.Block.ID {
329 // Since target and load are in the same block
330 // we can stop searching when we leave the block.
334 // A Phi implies we have reached the top of the block.
335 // The memory phi, if it exists, is always
336 // the first logical store in the block.
339 if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
340 // We could handle this situation however it is likely
344 if v.Op.SymEffect()&SymAddr != 0 {
345 // This case prevents an operation that calculates the
346 // address of a local variable from being forced to schedule
347 // before its corresponding VarDef.
353 // We don't want to combine the CMPQ with the load, because
354 // that would force the CMPQ to schedule before the VARDEF, which
355 // in turn requires the LEAQ to schedule before the VARDEF.
358 if v.Type.IsMemory() {
360 // Initialise a map containing memory states
361 // known to be predecessors of load's memory
363 memPreds = make(map[*Value]bool)
366 for i := 0; i < limit; i++ {
368 // The memory phi, if it exists, is always
369 // the first logical store in the block.
372 if m.Block.ID != target.Block.ID {
375 if !m.Type.IsMemory() {
379 if len(m.Args) == 0 {
386 // We can merge if v is a predecessor of mem.
388 // For example, we can merge load into target in the
389 // following scenario:
392 // load = read ... mem
393 // target = add x load
399 if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
400 // If v takes mem as an input then we know mem
401 // is valid at this point.
404 for _, a := range v.Args {
405 if target.Block.ID == a.Block.ID {
406 args = append(args, a)
414 // isSameCall reports whether sym is the same as the given named symbol.
415 func isSameCall(sym interface{}, name string) bool {
416 fn := sym.(*AuxCall).Fn
417 return fn != nil && fn.String() == name
420 // canLoadUnaligned reports if the architecture supports unaligned load operations.
421 func canLoadUnaligned(c *Config) bool {
422 return c.ctxt.Arch.Alignment == 1
425 // nlzX returns the number of leading zeros.
426 func nlz64(x int64) int { return bits.LeadingZeros64(uint64(x)) }
427 func nlz32(x int32) int { return bits.LeadingZeros32(uint32(x)) }
428 func nlz16(x int16) int { return bits.LeadingZeros16(uint16(x)) }
429 func nlz8(x int8) int { return bits.LeadingZeros8(uint8(x)) }
431 // ntzX returns the number of trailing zeros.
432 func ntz64(x int64) int { return bits.TrailingZeros64(uint64(x)) }
433 func ntz32(x int32) int { return bits.TrailingZeros32(uint32(x)) }
434 func ntz16(x int16) int { return bits.TrailingZeros16(uint16(x)) }
435 func ntz8(x int8) int { return bits.TrailingZeros8(uint8(x)) }
437 func oneBit(x int64) bool { return x&(x-1) == 0 && x != 0 }
438 func oneBit8(x int8) bool { return x&(x-1) == 0 && x != 0 }
439 func oneBit16(x int16) bool { return x&(x-1) == 0 && x != 0 }
440 func oneBit32(x int32) bool { return x&(x-1) == 0 && x != 0 }
441 func oneBit64(x int64) bool { return x&(x-1) == 0 && x != 0 }
443 // nto returns the number of trailing ones.
444 func nto(x int64) int64 {
445 return int64(ntz64(^x))
448 // logX returns logarithm of n base 2.
449 // n must be a positive power of 2 (isPowerOfTwoX returns true).
450 func log8(n int8) int64 {
451 return int64(bits.Len8(uint8(n))) - 1
453 func log16(n int16) int64 {
454 return int64(bits.Len16(uint16(n))) - 1
456 func log32(n int32) int64 {
457 return int64(bits.Len32(uint32(n))) - 1
459 func log64(n int64) int64 {
460 return int64(bits.Len64(uint64(n))) - 1
463 // log2uint32 returns logarithm in base 2 of uint32(n), with log2(0) = -1.
465 func log2uint32(n int64) int64 {
466 return int64(bits.Len32(uint32(n))) - 1
469 // isPowerOfTwoX functions report whether n is a power of 2.
470 func isPowerOfTwo8(n int8) bool {
471 return n > 0 && n&(n-1) == 0
473 func isPowerOfTwo16(n int16) bool {
474 return n > 0 && n&(n-1) == 0
476 func isPowerOfTwo32(n int32) bool {
477 return n > 0 && n&(n-1) == 0
479 func isPowerOfTwo64(n int64) bool {
480 return n > 0 && n&(n-1) == 0
483 // isUint64PowerOfTwo reports whether uint64(n) is a power of 2.
484 func isUint64PowerOfTwo(in int64) bool {
486 return n > 0 && n&(n-1) == 0
489 // isUint32PowerOfTwo reports whether uint32(n) is a power of 2.
490 func isUint32PowerOfTwo(in int64) bool {
491 n := uint64(uint32(in))
492 return n > 0 && n&(n-1) == 0
495 // is32Bit reports whether n can be represented as a signed 32 bit integer.
496 func is32Bit(n int64) bool {
497 return n == int64(int32(n))
500 // is16Bit reports whether n can be represented as a signed 16 bit integer.
501 func is16Bit(n int64) bool {
502 return n == int64(int16(n))
505 // is8Bit reports whether n can be represented as a signed 8 bit integer.
506 func is8Bit(n int64) bool {
507 return n == int64(int8(n))
510 // isU8Bit reports whether n can be represented as an unsigned 8 bit integer.
511 func isU8Bit(n int64) bool {
512 return n == int64(uint8(n))
515 // isU12Bit reports whether n can be represented as an unsigned 12 bit integer.
516 func isU12Bit(n int64) bool {
517 return 0 <= n && n < (1<<12)
520 // isU16Bit reports whether n can be represented as an unsigned 16 bit integer.
521 func isU16Bit(n int64) bool {
522 return n == int64(uint16(n))
525 // isU32Bit reports whether n can be represented as an unsigned 32 bit integer.
526 func isU32Bit(n int64) bool {
527 return n == int64(uint32(n))
530 // is20Bit reports whether n can be represented as a signed 20 bit integer.
531 func is20Bit(n int64) bool {
532 return -(1<<19) <= n && n < (1<<19)
535 // b2i translates a boolean value to 0 or 1 for assigning to auxInt.
536 func b2i(b bool) int64 {
543 // b2i32 translates a boolean value to 0 or 1.
544 func b2i32(b bool) int32 {
551 // shiftIsBounded reports whether (left/right) shift Value v is known to be bounded.
552 // A shift is bounded if it is shifting by less than the width of the shifted value.
553 func shiftIsBounded(v *Value) bool {
557 // canonLessThan returns whether x is "ordered" less than y, for purposes of normalizing
558 // generated code as much as possible.
559 func canonLessThan(x, y *Value) bool {
563 if !x.Pos.SameFileAndLine(y.Pos) {
564 return x.Pos.Before(y.Pos)
569 // truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern
570 // of the mantissa. It will panic if the truncation results in lost information.
571 func truncate64Fto32F(f float64) float32 {
572 if !isExactFloat32(f) {
573 panic("truncate64Fto32F: truncation is not exact")
578 // NaN bit patterns aren't necessarily preserved across conversion
579 // instructions so we need to do the conversion manually.
580 b := math.Float64bits(f)
581 m := b & ((1 << 52) - 1) // mantissa (a.k.a. significand)
582 // | sign | exponent | mantissa |
583 r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23)))
584 return math.Float32frombits(r)
587 // extend32Fto64F converts a float32 value to a float64 value preserving the bit
588 // pattern of the mantissa.
589 func extend32Fto64F(f float32) float64 {
590 if !math.IsNaN(float64(f)) {
593 // NaN bit patterns aren't necessarily preserved across conversion
594 // instructions so we need to do the conversion manually.
595 b := uint64(math.Float32bits(f))
596 // | sign | exponent | mantissa |
597 r := ((b << 32) & (1 << 63)) | (0x7ff << 52) | ((b & 0x7fffff) << (52 - 23))
598 return math.Float64frombits(r)
601 // DivisionNeedsFixUp reports whether the division needs fix-up code.
602 func DivisionNeedsFixUp(v *Value) bool {
606 // auxFrom64F encodes a float64 value so it can be stored in an AuxInt.
607 func auxFrom64F(f float64) int64 {
609 panic("can't encode a NaN in AuxInt field")
611 return int64(math.Float64bits(f))
614 // auxFrom32F encodes a float32 value so it can be stored in an AuxInt.
615 func auxFrom32F(f float32) int64 {
617 panic("can't encode a NaN in AuxInt field")
619 return int64(math.Float64bits(extend32Fto64F(f)))
622 // auxTo32F decodes a float32 from the AuxInt value provided.
623 func auxTo32F(i int64) float32 {
624 return truncate64Fto32F(math.Float64frombits(uint64(i)))
627 // auxTo64F decodes a float64 from the AuxInt value provided.
628 func auxTo64F(i int64) float64 {
629 return math.Float64frombits(uint64(i))
632 func auxIntToBool(i int64) bool {
638 func auxIntToInt8(i int64) int8 {
641 func auxIntToInt16(i int64) int16 {
644 func auxIntToInt32(i int64) int32 {
647 func auxIntToInt64(i int64) int64 {
650 func auxIntToUint8(i int64) uint8 {
653 func auxIntToFloat32(i int64) float32 {
654 return float32(math.Float64frombits(uint64(i)))
656 func auxIntToFloat64(i int64) float64 {
657 return math.Float64frombits(uint64(i))
659 func auxIntToValAndOff(i int64) ValAndOff {
662 func auxIntToArm64BitField(i int64) arm64BitField {
663 return arm64BitField(i)
665 func auxIntToInt128(x int64) int128 {
667 panic("nonzero int128 not allowed")
671 func auxIntToFlagConstant(x int64) flagConstant {
672 return flagConstant(x)
675 func auxIntToOp(cc int64) Op {
679 func boolToAuxInt(b bool) int64 {
685 func int8ToAuxInt(i int8) int64 {
688 func int16ToAuxInt(i int16) int64 {
691 func int32ToAuxInt(i int32) int64 {
694 func int64ToAuxInt(i int64) int64 {
697 func uint8ToAuxInt(i uint8) int64 {
698 return int64(int8(i))
700 func float32ToAuxInt(f float32) int64 {
701 return int64(math.Float64bits(float64(f)))
703 func float64ToAuxInt(f float64) int64 {
704 return int64(math.Float64bits(f))
706 func valAndOffToAuxInt(v ValAndOff) int64 {
709 func arm64BitFieldToAuxInt(v arm64BitField) int64 {
712 func int128ToAuxInt(x int128) int64 {
714 panic("nonzero int128 not allowed")
718 func flagConstantToAuxInt(x flagConstant) int64 {
722 func opToAuxInt(o Op) int64 {
726 // Aux is an interface to hold miscellaneous data in Blocks and Values.
731 // for now only used to mark moves that need to avoid clobbering flags
734 func (auxMark) CanBeAnSSAAux() {}
738 // stringAux wraps string values for use in Aux.
739 type stringAux string
741 func (stringAux) CanBeAnSSAAux() {}
743 func auxToString(i Aux) string {
744 return string(i.(stringAux))
746 func auxToSym(i Aux) Sym {
747 // TODO: kind of a hack - allows nil interface through
751 func auxToType(i Aux) *types.Type {
752 return i.(*types.Type)
754 func auxToCall(i Aux) *AuxCall {
757 func auxToS390xCCMask(i Aux) s390x.CCMask {
758 return i.(s390x.CCMask)
760 func auxToS390xRotateParams(i Aux) s390x.RotateParams {
761 return i.(s390x.RotateParams)
764 func StringToAux(s string) Aux {
767 func symToAux(s Sym) Aux {
770 func callToAux(s *AuxCall) Aux {
773 func typeToAux(t *types.Type) Aux {
776 func s390xCCMaskToAux(c s390x.CCMask) Aux {
779 func s390xRotateParamsToAux(r s390x.RotateParams) Aux {
783 // uaddOvf reports whether unsigned a+b would overflow.
784 func uaddOvf(a, b int64) bool {
785 return uint64(a)+uint64(b) < uint64(a)
788 // loadLSymOffset simulates reading a word at an offset into a
789 // read-only symbol's runtime memory. If it would read a pointer to
790 // another symbol, that symbol is returned. Otherwise, it returns nil.
791 func loadLSymOffset(lsym *obj.LSym, offset int64) *obj.LSym {
792 if lsym.Type != objabi.SRODATA {
796 for _, r := range lsym.R {
797 if int64(r.Off) == offset && r.Type&^objabi.R_WEAK == objabi.R_ADDR && r.Add == 0 {
805 func devirtLECall(v *Value, sym *obj.LSym) *Value {
806 v.Op = OpStaticLECall
807 auxcall := v.Aux.(*AuxCall)
811 copy(v.Args[0:], v.Args[1:])
812 v.Args[len(v.Args)-1] = nil // aid GC
813 v.Args = v.Args[:len(v.Args)-1]
814 if f := v.Block.Func; f.pass.debug > 0 {
815 f.Warnl(v.Pos, "de-virtualizing call")
820 // isSamePtr reports whether p1 and p2 point to the same address.
821 func isSamePtr(p1, p2 *Value) bool {
830 return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
831 case OpAddr, OpLocalAddr:
832 return p1.Aux == p2.Aux
834 return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
839 func isStackPtr(v *Value) bool {
840 for v.Op == OpOffPtr || v.Op == OpAddPtr {
843 return v.Op == OpSP || v.Op == OpLocalAddr
846 // disjoint reports whether the memory region specified by [p1:p1+n1)
847 // does not overlap with [p2:p2+n2).
848 // A return value of false does not imply the regions overlap.
849 func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool {
850 if n1 == 0 || n2 == 0 {
856 baseAndOffset := func(ptr *Value) (base *Value, offset int64) {
857 base, offset = ptr, 0
858 for base.Op == OpOffPtr {
859 offset += base.AuxInt
862 if opcodeTable[base.Op].nilCheck {
867 p1, off1 := baseAndOffset(p1)
868 p2, off2 := baseAndOffset(p2)
869 if isSamePtr(p1, p2) {
870 return !overlap(off1, n1, off2, n2)
872 // p1 and p2 are not the same, so if they are both OpAddrs then
873 // they point to different variables.
874 // If one pointer is on the stack and the other is an argument
875 // then they can't overlap.
877 case OpAddr, OpLocalAddr:
878 if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP {
881 return (p2.Op == OpArg || p2.Op == OpArgIntReg) && p1.Args[0].Op == OpSP
882 case OpArg, OpArgIntReg:
883 if p2.Op == OpSP || p2.Op == OpLocalAddr {
887 return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpArgIntReg || p2.Op == OpSP
892 // moveSize returns the number of bytes an aligned MOV instruction moves.
893 func moveSize(align int64, c *Config) int64 {
895 case align%8 == 0 && c.PtrSize == 8:
905 // mergePoint finds a block among a's blocks which dominates b and is itself
906 // dominated by all of a's blocks. Returns nil if it can't find one.
907 // Might return nil even if one does exist.
908 func mergePoint(b *Block, a ...*Value) *Block {
909 // Walk backward from b looking for one of the a's blocks.
915 for _, x := range a {
920 if len(b.Preds) > 1 {
921 // Don't know which way to go back. Abort.
927 return nil // too far away
929 // At this point, r is the first value in a that we find by walking backwards.
930 // if we return anything, r will be it.
933 // Keep going, counting the other a's that we find. They must all dominate r.
936 for _, x := range a {
942 // Found all of a in a backwards walk. We can return r.
945 if len(b.Preds) > 1 {
952 return nil // too far away
955 // clobber invalidates values. Returns true.
956 // clobber is used by rewrite rules to:
958 // A) make sure the values are really dead and never used again.
959 // B) decrement use counts of the values' args.
960 func clobber(vv ...*Value) bool {
961 for _, v := range vv {
963 // Note: leave v.Block intact. The Block field is used after clobber.
968 // clobberIfDead resets v when use count is 1. Returns true.
969 // clobberIfDead is used by rewrite rules to decrement
970 // use counts of v's args when v is dead and never used.
971 func clobberIfDead(v *Value) bool {
975 // Note: leave v.Block intact. The Block field is used after clobberIfDead.
979 // noteRule is an easy way to track if a rule is matched when writing
980 // new ones. Make the rule of interest also conditional on
982 // noteRule("note to self: rule of interest matched")
984 // and that message will print when the rule matches.
985 func noteRule(s string) bool {
990 // countRule increments Func.ruleMatches[key].
991 // If Func.ruleMatches is non-nil at the end
992 // of compilation, it will be printed to stdout.
993 // This is intended to make it easier to find which functions
994 // which contain lots of rules matches when developing new rules.
995 func countRule(v *Value, key string) bool {
997 if f.ruleMatches == nil {
998 f.ruleMatches = make(map[string]int)
1000 f.ruleMatches[key]++
1004 // warnRule generates compiler debug output with string s when
1005 // v is not in autogenerated code, cond is true and the rule has fired.
1006 func warnRule(cond bool, v *Value, s string) bool {
1007 if pos := v.Pos; pos.Line() > 1 && cond {
1008 v.Block.Func.Warnl(pos, s)
1013 // for a pseudo-op like (LessThan x), extract x.
1014 func flagArg(v *Value) *Value {
1015 if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() {
1021 // arm64Negate finds the complement to an ARM64 condition code,
1022 // for example !Equal -> NotEqual or !LessThan -> GreaterEqual
1024 // For floating point, it's more subtle because NaN is unordered. We do
1025 // !LessThanF -> NotLessThanF, the latter takes care of NaNs.
1026 func arm64Negate(op Op) Op {
1028 case OpARM64LessThan:
1029 return OpARM64GreaterEqual
1030 case OpARM64LessThanU:
1031 return OpARM64GreaterEqualU
1032 case OpARM64GreaterThan:
1033 return OpARM64LessEqual
1034 case OpARM64GreaterThanU:
1035 return OpARM64LessEqualU
1036 case OpARM64LessEqual:
1037 return OpARM64GreaterThan
1038 case OpARM64LessEqualU:
1039 return OpARM64GreaterThanU
1040 case OpARM64GreaterEqual:
1041 return OpARM64LessThan
1042 case OpARM64GreaterEqualU:
1043 return OpARM64LessThanU
1045 return OpARM64NotEqual
1046 case OpARM64NotEqual:
1048 case OpARM64LessThanF:
1049 return OpARM64NotLessThanF
1050 case OpARM64NotLessThanF:
1051 return OpARM64LessThanF
1052 case OpARM64LessEqualF:
1053 return OpARM64NotLessEqualF
1054 case OpARM64NotLessEqualF:
1055 return OpARM64LessEqualF
1056 case OpARM64GreaterThanF:
1057 return OpARM64NotGreaterThanF
1058 case OpARM64NotGreaterThanF:
1059 return OpARM64GreaterThanF
1060 case OpARM64GreaterEqualF:
1061 return OpARM64NotGreaterEqualF
1062 case OpARM64NotGreaterEqualF:
1063 return OpARM64GreaterEqualF
1065 panic("unreachable")
1069 // arm64Invert evaluates (InvertFlags op), which
1070 // is the same as altering the condition codes such
1071 // that the same result would be produced if the arguments
1072 // to the flag-generating instruction were reversed, e.g.
1073 // (InvertFlags (CMP x y)) -> (CMP y x)
1074 func arm64Invert(op Op) Op {
1076 case OpARM64LessThan:
1077 return OpARM64GreaterThan
1078 case OpARM64LessThanU:
1079 return OpARM64GreaterThanU
1080 case OpARM64GreaterThan:
1081 return OpARM64LessThan
1082 case OpARM64GreaterThanU:
1083 return OpARM64LessThanU
1084 case OpARM64LessEqual:
1085 return OpARM64GreaterEqual
1086 case OpARM64LessEqualU:
1087 return OpARM64GreaterEqualU
1088 case OpARM64GreaterEqual:
1089 return OpARM64LessEqual
1090 case OpARM64GreaterEqualU:
1091 return OpARM64LessEqualU
1092 case OpARM64Equal, OpARM64NotEqual:
1094 case OpARM64LessThanF:
1095 return OpARM64GreaterThanF
1096 case OpARM64GreaterThanF:
1097 return OpARM64LessThanF
1098 case OpARM64LessEqualF:
1099 return OpARM64GreaterEqualF
1100 case OpARM64GreaterEqualF:
1101 return OpARM64LessEqualF
1102 case OpARM64NotLessThanF:
1103 return OpARM64NotGreaterThanF
1104 case OpARM64NotGreaterThanF:
1105 return OpARM64NotLessThanF
1106 case OpARM64NotLessEqualF:
1107 return OpARM64NotGreaterEqualF
1108 case OpARM64NotGreaterEqualF:
1109 return OpARM64NotLessEqualF
1111 panic("unreachable")
1115 // evaluate an ARM64 op against a flags value
1116 // that is potentially constant; return 1 for true,
1117 // -1 for false, and 0 for not constant.
1118 func ccARM64Eval(op Op, flags *Value) int {
1120 if fop == OpARM64InvertFlags {
1121 return -ccARM64Eval(op, flags.Args[0])
1123 if fop != OpARM64FlagConstant {
1126 fc := flagConstant(flags.AuxInt)
1127 b2i := func(b bool) int {
1136 case OpARM64NotEqual:
1138 case OpARM64LessThan:
1140 case OpARM64LessThanU:
1141 return b2i(fc.ult())
1142 case OpARM64GreaterThan:
1144 case OpARM64GreaterThanU:
1145 return b2i(fc.ugt())
1146 case OpARM64LessEqual:
1148 case OpARM64LessEqualU:
1149 return b2i(fc.ule())
1150 case OpARM64GreaterEqual:
1152 case OpARM64GreaterEqualU:
1153 return b2i(fc.uge())
1158 // logRule logs the use of the rule s. This will only be enabled if
1159 // rewrite rules were generated with the -log option, see _gen/rulegen.go.
1160 func logRule(s string) {
1161 if ruleFile == nil {
1162 // Open a log file to write log to. We open in append
1163 // mode because all.bash runs the compiler lots of times,
1164 // and we want the concatenation of all of those logs.
1165 // This means, of course, that users need to rm the old log
1166 // to get fresh data.
1167 // TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow?
1168 w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"),
1169 os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
1175 _, err := fmt.Fprintln(ruleFile, s)
1181 var ruleFile io.Writer
1183 func min(x, y int64) int64 {
1189 func max(x, y int64) int64 {
1196 func isConstZero(v *Value) bool {
1200 case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F:
1201 return v.AuxInt == 0
1206 // reciprocalExact64 reports whether 1/c is exactly representable.
1207 func reciprocalExact64(c float64) bool {
1208 b := math.Float64bits(c)
1209 man := b & (1<<52 - 1)
1211 return false // not a power of 2, denormal, or NaN
1213 exp := b >> 52 & (1<<11 - 1)
1214 // exponent bias is 0x3ff. So taking the reciprocal of a number
1215 // changes the exponent to 0x7fe-exp.
1220 return false // ±inf
1222 return false // exponent is not representable
1228 // reciprocalExact32 reports whether 1/c is exactly representable.
1229 func reciprocalExact32(c float32) bool {
1230 b := math.Float32bits(c)
1231 man := b & (1<<23 - 1)
1233 return false // not a power of 2, denormal, or NaN
1235 exp := b >> 23 & (1<<8 - 1)
1236 // exponent bias is 0x7f. So taking the reciprocal of a number
1237 // changes the exponent to 0xfe-exp.
1242 return false // ±inf
1244 return false // exponent is not representable
1250 // check if an immediate can be directly encoded into an ARM's instruction.
1251 func isARMImmRot(v uint32) bool {
1252 for i := 0; i < 16; i++ {
1262 // overlap reports whether the ranges given by the given offset and
1263 // size pairs overlap.
1264 func overlap(offset1, size1, offset2, size2 int64) bool {
1265 if offset1 >= offset2 && offset2+size2 > offset1 {
1268 if offset2 >= offset1 && offset1+size1 > offset2 {
1274 func areAdjacentOffsets(off1, off2, size int64) bool {
1275 return off1+size == off2 || off1 == off2+size
1278 // check if value zeroes out upper 32-bit of 64-bit register.
1279 // depth limits recursion depth. In AMD64.rules 3 is used as limit,
1280 // because it catches same amount of cases as 4.
1281 func zeroUpper32Bits(x *Value, depth int) bool {
1283 case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1,
1284 OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1,
1285 OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload,
1286 OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL,
1287 OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst,
1288 OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst,
1289 OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL,
1290 OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst,
1291 OpAMD64SHLL, OpAMD64SHLLconst:
1293 case OpARM64REV16W, OpARM64REVW, OpARM64RBITW, OpARM64CLZW, OpARM64EXTRWconst,
1294 OpARM64MULW, OpARM64MNEGW, OpARM64UDIVW, OpARM64DIVW, OpARM64UMODW,
1295 OpARM64MADDW, OpARM64MSUBW, OpARM64RORW, OpARM64RORWconst:
1298 return x.Type.Size() == 4
1299 case OpPhi, OpSelect0, OpSelect1:
1300 // Phis can use each-other as an arguments, instead of tracking visited values,
1301 // just limit recursion depth.
1305 for i := range x.Args {
1306 if !zeroUpper32Bits(x.Args[i], depth-1) {
1316 // zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits.
1317 func zeroUpper48Bits(x *Value, depth int) bool {
1319 case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2:
1322 return x.Type.Size() == 2
1323 case OpPhi, OpSelect0, OpSelect1:
1324 // Phis can use each-other as an arguments, instead of tracking visited values,
1325 // just limit recursion depth.
1329 for i := range x.Args {
1330 if !zeroUpper48Bits(x.Args[i], depth-1) {
1340 // zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits.
1341 func zeroUpper56Bits(x *Value, depth int) bool {
1343 case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1:
1346 return x.Type.Size() == 1
1347 case OpPhi, OpSelect0, OpSelect1:
1348 // Phis can use each-other as an arguments, instead of tracking visited values,
1349 // just limit recursion depth.
1353 for i := range x.Args {
1354 if !zeroUpper56Bits(x.Args[i], depth-1) {
1364 func isInlinableMemclr(c *Config, sz int64) bool {
1368 // TODO: expand this check to allow other architectures
1369 // see CL 454255 and issue 56997
1371 case "amd64", "arm64":
1373 case "ppc64le", "ppc64":
1379 // isInlinableMemmove reports whether the given arch performs a Move of the given size
1380 // faster than memmove. It will only return true if replacing the memmove with a Move is
1381 // safe, either because Move will do all of its loads before any of its stores, or
1382 // because the arguments are known to be disjoint.
1383 // This is used as a check for replacing memmove with Move ops.
1384 func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
1385 // It is always safe to convert memmove into Move when its arguments are disjoint.
1386 // Move ops may or may not be faster for large sizes depending on how the platform
1387 // lowers them, so we only perform this optimization on platforms that we know to
1388 // have fast Move ops.
1391 return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz))
1392 case "386", "arm64":
1394 case "s390x", "ppc64", "ppc64le":
1395 return sz <= 8 || disjoint(dst, sz, src, sz)
1396 case "arm", "loong64", "mips", "mips64", "mipsle", "mips64le":
1401 func IsInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
1402 return isInlinableMemmove(dst, src, sz, c)
1405 // logLargeCopy logs the occurrence of a large copy.
1406 // The best place to do this is in the rewrite rules where the size of the move is easy to find.
1407 // "Large" is arbitrarily chosen to be 128 bytes; this may change.
1408 func logLargeCopy(v *Value, s int64) bool {
1412 if logopt.Enabled() {
1413 logopt.LogOpt(v.Pos, "copy", "lower", v.Block.Func.Name, fmt.Sprintf("%d bytes", s))
1417 func LogLargeCopy(funcName string, pos src.XPos, s int64) {
1421 if logopt.Enabled() {
1422 logopt.LogOpt(pos, "copy", "lower", funcName, fmt.Sprintf("%d bytes", s))
1426 // hasSmallRotate reports whether the architecture has rotate instructions
1427 // for sizes < 32-bit. This is used to decide whether to promote some rotations.
1428 func hasSmallRotate(c *Config) bool {
1430 case "amd64", "386":
1437 func supportsPPC64PCRel() bool {
1438 // PCRel is currently supported for >= power10, linux only
1439 // Internal and external linking supports this on ppc64le; internal linking on ppc64.
1440 return buildcfg.GOPPC64 >= 10 && buildcfg.GOOS == "linux"
1443 func newPPC64ShiftAuxInt(sh, mb, me, sz int64) int32 {
1444 if sh < 0 || sh >= sz {
1445 panic("PPC64 shift arg sh out of range")
1447 if mb < 0 || mb >= sz {
1448 panic("PPC64 shift arg mb out of range")
1450 if me < 0 || me >= sz {
1451 panic("PPC64 shift arg me out of range")
1453 return int32(sh<<16 | mb<<8 | me)
1456 func GetPPC64Shiftsh(auxint int64) int64 {
1457 return int64(int8(auxint >> 16))
1460 func GetPPC64Shiftmb(auxint int64) int64 {
1461 return int64(int8(auxint >> 8))
1464 func GetPPC64Shiftme(auxint int64) int64 {
1465 return int64(int8(auxint))
1468 // Test if this value can encoded as a mask for a rlwinm like
1469 // operation. Masks can also extend from the msb and wrap to
1470 // the lsb too. That is, the valid masks are 32 bit strings
1471 // of the form: 0..01..10..0 or 1..10..01..1 or 1...1
1472 func isPPC64WordRotateMask(v64 int64) bool {
1473 // Isolate rightmost 1 (if none 0) and add.
1476 // Likewise, for the wrapping case.
1478 vpn := (vn & -vn) + vn
1479 return (v&vp == 0 || vn&vpn == 0) && v != 0
1482 // Compress mask and shift into single value of the form
1483 // me | mb<<8 | rotate<<16 | nbits<<24 where me and mb can
1484 // be used to regenerate the input mask.
1485 func encodePPC64RotateMask(rotate, mask, nbits int64) int64 {
1486 var mb, me, mbn, men int
1488 // Determine boundaries and then decode them
1489 if mask == 0 || ^mask == 0 || rotate >= nbits {
1490 panic(fmt.Sprintf("invalid PPC64 rotate mask: %x %d %d", uint64(mask), rotate, nbits))
1491 } else if nbits == 32 {
1492 mb = bits.LeadingZeros32(uint32(mask))
1493 me = 32 - bits.TrailingZeros32(uint32(mask))
1494 mbn = bits.LeadingZeros32(^uint32(mask))
1495 men = 32 - bits.TrailingZeros32(^uint32(mask))
1497 mb = bits.LeadingZeros64(uint64(mask))
1498 me = 64 - bits.TrailingZeros64(uint64(mask))
1499 mbn = bits.LeadingZeros64(^uint64(mask))
1500 men = 64 - bits.TrailingZeros64(^uint64(mask))
1502 // Check for a wrapping mask (e.g bits at 0 and 63)
1503 if mb == 0 && me == int(nbits) {
1504 // swap the inverted values
1508 return int64(me) | int64(mb<<8) | int64(rotate<<16) | int64(nbits<<24)
1511 // Merge (RLDICL [encoded] (SRDconst [s] x)) into (RLDICL [new_encoded] x)
1512 // SRDconst on PPC64 is an extended mnemonic of RLDICL. If the input to an
1513 // RLDICL is an SRDconst, and the RLDICL does not rotate its value, the two
1514 // operations can be combined. This functions assumes the two opcodes can
1515 // be merged, and returns an encoded rotate+mask value of the combined RLDICL.
1516 func mergePPC64RLDICLandSRDconst(encoded, s int64) int64 {
1519 // A larger mb is a smaller mask.
1520 if (encoded>>8)&0xFF < mb {
1521 encoded = (encoded &^ 0xFF00) | mb<<8
1523 // The rotate is expected to be 0.
1524 if (encoded & 0xFF0000) != 0 {
1525 panic("non-zero rotate")
1527 return encoded | r<<16
1530 // DecodePPC64RotateMask is the inverse operation of encodePPC64RotateMask. The values returned as
1531 // mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask.
1532 func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) {
1533 auxint := uint64(sauxint)
1534 rotate = int64((auxint >> 16) & 0xFF)
1535 mb = int64((auxint >> 8) & 0xFF)
1536 me = int64((auxint >> 0) & 0xFF)
1537 nbits := int64((auxint >> 24) & 0xFF)
1538 mask = ((1 << uint(nbits-mb)) - 1) ^ ((1 << uint(nbits-me)) - 1)
1543 mask = uint64(uint32(mask))
1546 // Fixup ME to match ISA definition. The second argument to MASK(..,me)
1548 me = (me - 1) & (nbits - 1)
1552 // This verifies that the mask is a set of
1553 // consecutive bits including the least
1555 func isPPC64ValidShiftMask(v int64) bool {
1556 if (v != 0) && ((v+1)&v) == 0 {
1562 func getPPC64ShiftMaskLength(v int64) int64 {
1563 return int64(bits.Len64(uint64(v)))
1566 // Decompose a shift right into an equivalent rotate/mask,
1567 // and return mask & m.
1568 func mergePPC64RShiftMask(m, s, nbits int64) int64 {
1569 smask := uint64((1<<uint(nbits))-1) >> uint(s)
1570 return m & int64(smask)
1573 // Combine (ANDconst [m] (SRWconst [s])) into (RLWINM [y]) or return 0
1574 func mergePPC64AndSrwi(m, s int64) int64 {
1575 mask := mergePPC64RShiftMask(m, s, 32)
1576 if !isPPC64WordRotateMask(mask) {
1579 return encodePPC64RotateMask((32-s)&31, mask, 32)
1582 // Test if a shift right feeding into a CLRLSLDI can be merged into RLWINM.
1583 // Return the encoded RLWINM constant, or 0 if they cannot be merged.
1584 func mergePPC64ClrlsldiSrw(sld, srw int64) int64 {
1585 mask_1 := uint64(0xFFFFFFFF >> uint(srw))
1586 // for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
1587 mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
1589 // Rewrite mask to apply after the final left shift.
1590 mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
1593 r_2 := GetPPC64Shiftsh(sld)
1594 r_3 := (r_1 + r_2) & 31 // This can wrap.
1596 if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
1599 return encodePPC64RotateMask(int64(r_3), int64(mask_3), 32)
1602 // Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM. Return
1603 // the encoded RLWINM constant, or 0 if they cannot be merged.
1604 func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 {
1605 r_1, _, _, mask_1 := DecodePPC64RotateMask(rlw)
1606 // for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
1607 mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
1609 // combine the masks, and adjust for the final left shift.
1610 mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(int64(sld)))
1611 r_2 := GetPPC64Shiftsh(int64(sld))
1612 r_3 := (r_1 + r_2) & 31 // This can wrap.
1614 // Verify the result is still a valid bitmask of <= 32 bits.
1615 if !isPPC64WordRotateMask(int64(mask_3)) || uint64(uint32(mask_3)) != mask_3 {
1618 return encodePPC64RotateMask(r_3, int64(mask_3), 32)
1621 // Compute the encoded RLWINM constant from combining (SLDconst [sld] (SRWconst [srw] x)),
1622 // or return 0 if they cannot be combined.
1623 func mergePPC64SldiSrw(sld, srw int64) int64 {
1624 if sld > srw || srw >= 32 {
1627 mask_r := uint32(0xFFFFFFFF) >> uint(srw)
1628 mask_l := uint32(0xFFFFFFFF) >> uint(sld)
1629 mask := (mask_r & mask_l) << uint(sld)
1630 return encodePPC64RotateMask((32-srw+sld)&31, int64(mask), 32)
1633 // Convert a PPC64 opcode from the Op to OpCC form. This converts (op x y)
1634 // to (Select0 (opCC x y)) without having to explicitly fixup every user
1637 // E.g consider the case:
1639 // b = (CMPconst [0] a)
1642 // A rule like (CMPconst [0] (ADD x y)) => (CMPconst [0] (Select0 (ADDCC x y)))
1646 // a” = (Select0 a')
1647 // b = (CMPconst [0] a”)
1650 // which makes it impossible to rewrite the second user. Instead the result
1651 // of this conversion is:
1654 // b = (CMPconst [0] a)
1657 // Which makes it trivial to rewrite b using a lowering rule.
1658 func convertPPC64OpToOpCC(op *Value) *Value {
1659 ccOpMap := map[Op]Op{
1660 OpPPC64ADD: OpPPC64ADDCC,
1661 OpPPC64ADDconst: OpPPC64ADDCCconst,
1662 OpPPC64AND: OpPPC64ANDCC,
1663 OpPPC64ANDN: OpPPC64ANDNCC,
1664 OpPPC64CNTLZD: OpPPC64CNTLZDCC,
1665 OpPPC64OR: OpPPC64ORCC,
1666 OpPPC64SUB: OpPPC64SUBCC,
1667 OpPPC64NEG: OpPPC64NEGCC,
1668 OpPPC64NOR: OpPPC64NORCC,
1669 OpPPC64XOR: OpPPC64XORCC,
1672 opCC := b.NewValue0I(op.Pos, ccOpMap[op.Op], types.NewTuple(op.Type, types.TypeFlags), op.AuxInt)
1673 opCC.AddArgs(op.Args...)
1679 // Convenience function to rotate a 32 bit constant value by another constant.
1680 func rotateLeft32(v, rotate int64) int64 {
1681 return int64(bits.RotateLeft32(uint32(v), int(rotate)))
1684 func rotateRight64(v, rotate int64) int64 {
1685 return int64(bits.RotateLeft64(uint64(v), int(-rotate)))
1688 // encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format.
1689 func armBFAuxInt(lsb, width int64) arm64BitField {
1690 if lsb < 0 || lsb > 63 {
1691 panic("ARM(64) bit field lsb constant out of range")
1693 if width < 1 || lsb+width > 64 {
1694 panic("ARM(64) bit field width constant out of range")
1696 return arm64BitField(width | lsb<<8)
1699 // returns the lsb part of the auxInt field of arm64 bitfield ops.
1700 func (bfc arm64BitField) getARM64BFlsb() int64 {
1701 return int64(uint64(bfc) >> 8)
1704 // returns the width part of the auxInt field of arm64 bitfield ops.
1705 func (bfc arm64BitField) getARM64BFwidth() int64 {
1706 return int64(bfc) & 0xff
1709 // checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask.
1710 func isARM64BFMask(lsb, mask, rshift int64) bool {
1711 shiftedMask := int64(uint64(mask) >> uint64(rshift))
1712 return shiftedMask != 0 && isPowerOfTwo64(shiftedMask+1) && nto(shiftedMask)+lsb < 64
1715 // returns the bitfield width of mask >> rshift for arm64 bitfield ops.
1716 func arm64BFWidth(mask, rshift int64) int64 {
1717 shiftedMask := int64(uint64(mask) >> uint64(rshift))
1718 if shiftedMask == 0 {
1719 panic("ARM64 BF mask is zero")
1721 return nto(shiftedMask)
1724 // sizeof returns the size of t in bytes.
1725 // It will panic if t is not a *types.Type.
1726 func sizeof(t interface{}) int64 {
1727 return t.(*types.Type).Size()
1730 // registerizable reports whether t is a primitive type that fits in
1731 // a register. It assumes float64 values will always fit into registers
1732 // even if that isn't strictly true.
1733 func registerizable(b *Block, typ *types.Type) bool {
1734 if typ.IsPtrShaped() || typ.IsFloat() || typ.IsBoolean() {
1737 if typ.IsInteger() {
1738 return typ.Size() <= b.Func.Config.RegSize
1743 // needRaceCleanup reports whether this call to racefuncenter/exit isn't needed.
1744 func needRaceCleanup(sym *AuxCall, v *Value) bool {
1749 if !isSameCall(sym, "runtime.racefuncenter") && !isSameCall(sym, "runtime.racefuncexit") {
1752 for _, b := range f.Blocks {
1753 for _, v := range b.Values {
1755 case OpStaticCall, OpStaticLECall:
1756 // Check for racefuncenter will encounter racefuncexit and vice versa.
1757 // Allow calls to panic*
1758 s := v.Aux.(*AuxCall).Fn.String()
1760 case "runtime.racefuncenter", "runtime.racefuncexit",
1761 "runtime.panicdivide", "runtime.panicwrap",
1762 "runtime.panicshift":
1765 // If we encountered any call, we need to keep racefunc*,
1766 // for accurate stacktraces.
1768 case OpPanicBounds, OpPanicExtend:
1769 // Note: these are panic generators that are ok (like the static calls above).
1770 case OpClosureCall, OpInterCall, OpClosureLECall, OpInterLECall:
1771 // We must keep the race functions if there are any other call types.
1776 if isSameCall(sym, "runtime.racefuncenter") {
1777 // TODO REGISTER ABI this needs to be cleaned up.
1778 // If we're removing racefuncenter, remove its argument as well.
1779 if v.Args[0].Op != OpStore {
1780 if v.Op == OpStaticLECall {
1781 // there is no store, yet.
1786 mem := v.Args[0].Args[2]
1787 v.Args[0].reset(OpCopy)
1788 v.Args[0].AddArg(mem)
1793 // symIsRO reports whether sym is a read-only global.
1794 func symIsRO(sym interface{}) bool {
1795 lsym := sym.(*obj.LSym)
1796 return lsym.Type == objabi.SRODATA && len(lsym.R) == 0
1799 // symIsROZero reports whether sym is a read-only global whose data contains all zeros.
1800 func symIsROZero(sym Sym) bool {
1801 lsym := sym.(*obj.LSym)
1802 if lsym.Type != objabi.SRODATA || len(lsym.R) != 0 {
1805 for _, b := range lsym.P {
1813 // isFixed32 returns true if the int32 at offset off in symbol sym
1814 // is known and constant.
1815 func isFixed32(c *Config, sym Sym, off int64) bool {
1816 return isFixed(c, sym, off, 4)
1819 // isFixed returns true if the range [off,off+size] of the symbol sym
1820 // is known and constant.
1821 func isFixed(c *Config, sym Sym, off, size int64) bool {
1822 lsym := sym.(*obj.LSym)
1823 if lsym.Extra == nil {
1826 if _, ok := (*lsym.Extra).(*obj.TypeInfo); ok {
1827 if off == 2*c.PtrSize && size == 4 {
1828 return true // type hash field
1833 func fixed32(c *Config, sym Sym, off int64) int32 {
1834 lsym := sym.(*obj.LSym)
1835 if ti, ok := (*lsym.Extra).(*obj.TypeInfo); ok {
1836 if off == 2*c.PtrSize {
1837 return int32(types.TypeHash(ti.Type.(*types.Type)))
1840 base.Fatalf("fixed32 data not known for %s:%d", sym, off)
1844 // isFixedSym returns true if the contents of sym at the given offset
1845 // is known and is the constant address of another symbol.
1846 func isFixedSym(sym Sym, off int64) bool {
1847 lsym := sym.(*obj.LSym)
1849 case lsym.Type == objabi.SRODATA:
1850 // itabs, dictionaries
1854 for _, r := range lsym.R {
1855 if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off && r.Add == 0 {
1861 func fixedSym(f *Func, sym Sym, off int64) Sym {
1862 lsym := sym.(*obj.LSym)
1863 for _, r := range lsym.R {
1864 if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off {
1865 if strings.HasPrefix(r.Sym.Name, "type:") {
1866 // In case we're loading a type out of a dictionary, we need to record
1867 // that the containing function might put that type in an interface.
1868 // That information is currently recorded in relocations in the dictionary,
1869 // but if we perform this load at compile time then the dictionary
1871 reflectdata.MarkTypeSymUsedInInterface(r.Sym, f.fe.Func().Linksym())
1872 } else if strings.HasPrefix(r.Sym.Name, "go:itab") {
1873 // Same, but if we're using an itab we need to record that the
1874 // itab._type might be put in an interface.
1875 reflectdata.MarkTypeSymUsedInInterface(r.Sym, f.fe.Func().Linksym())
1880 base.Fatalf("fixedSym data not known for %s:%d", sym, off)
1884 // read8 reads one byte from the read-only global sym at offset off.
1885 func read8(sym interface{}, off int64) uint8 {
1886 lsym := sym.(*obj.LSym)
1887 if off >= int64(len(lsym.P)) || off < 0 {
1888 // Invalid index into the global sym.
1889 // This can happen in dead code, so we don't want to panic.
1890 // Just return any value, it will eventually get ignored.
1897 // read16 reads two bytes from the read-only global sym at offset off.
1898 func read16(sym interface{}, off int64, byteorder binary.ByteOrder) uint16 {
1899 lsym := sym.(*obj.LSym)
1900 // lsym.P is written lazily.
1901 // Bytes requested after the end of lsym.P are 0.
1903 if 0 <= off && off < int64(len(lsym.P)) {
1906 buf := make([]byte, 2)
1908 return byteorder.Uint16(buf)
1911 // read32 reads four bytes from the read-only global sym at offset off.
1912 func read32(sym interface{}, off int64, byteorder binary.ByteOrder) uint32 {
1913 lsym := sym.(*obj.LSym)
1915 if 0 <= off && off < int64(len(lsym.P)) {
1918 buf := make([]byte, 4)
1920 return byteorder.Uint32(buf)
1923 // read64 reads eight bytes from the read-only global sym at offset off.
1924 func read64(sym interface{}, off int64, byteorder binary.ByteOrder) uint64 {
1925 lsym := sym.(*obj.LSym)
1927 if 0 <= off && off < int64(len(lsym.P)) {
1930 buf := make([]byte, 8)
1932 return byteorder.Uint64(buf)
1935 // sequentialAddresses reports true if it can prove that x + n == y
1936 func sequentialAddresses(x, y *Value, n int64) bool {
1937 if x == y && n == 0 {
1940 if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil &&
1941 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1942 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1945 if x.Op == Op386LEAL1 && y.Op == Op386LEAL1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
1946 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1947 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1950 if x.Op == OpAMD64ADDQ && y.Op == OpAMD64LEAQ1 && y.AuxInt == n && y.Aux == nil &&
1951 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1952 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1955 if x.Op == OpAMD64LEAQ1 && y.Op == OpAMD64LEAQ1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
1956 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1957 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1963 // flagConstant represents the result of a compile-time comparison.
1964 // The sense of these flags does not necessarily represent the hardware's notion
1965 // of a flags register - these are just a compile-time construct.
1966 // We happen to match the semantics to those of arm/arm64.
1967 // Note that these semantics differ from x86: the carry flag has the opposite
1968 // sense on a subtraction!
1970 // On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C.
1971 // On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C.
1972 // (because it does x + ^y + C).
1974 // See https://en.wikipedia.org/wiki/Carry_flag#Vs._borrow_flag
1975 type flagConstant uint8
1977 // N reports whether the result of an operation is negative (high bit set).
1978 func (fc flagConstant) N() bool {
1982 // Z reports whether the result of an operation is 0.
1983 func (fc flagConstant) Z() bool {
1987 // C reports whether an unsigned add overflowed (carry), or an
1988 // unsigned subtract did not underflow (borrow).
1989 func (fc flagConstant) C() bool {
1993 // V reports whether a signed operation overflowed or underflowed.
1994 func (fc flagConstant) V() bool {
1998 func (fc flagConstant) eq() bool {
2001 func (fc flagConstant) ne() bool {
2004 func (fc flagConstant) lt() bool {
2005 return fc.N() != fc.V()
2007 func (fc flagConstant) le() bool {
2008 return fc.Z() || fc.lt()
2010 func (fc flagConstant) gt() bool {
2011 return !fc.Z() && fc.ge()
2013 func (fc flagConstant) ge() bool {
2014 return fc.N() == fc.V()
2016 func (fc flagConstant) ult() bool {
2019 func (fc flagConstant) ule() bool {
2020 return fc.Z() || fc.ult()
2022 func (fc flagConstant) ugt() bool {
2023 return !fc.Z() && fc.uge()
2025 func (fc flagConstant) uge() bool {
2029 func (fc flagConstant) ltNoov() bool {
2030 return fc.lt() && !fc.V()
2032 func (fc flagConstant) leNoov() bool {
2033 return fc.le() && !fc.V()
2035 func (fc flagConstant) gtNoov() bool {
2036 return fc.gt() && !fc.V()
2038 func (fc flagConstant) geNoov() bool {
2039 return fc.ge() && !fc.V()
2042 func (fc flagConstant) String() string {
2043 return fmt.Sprintf("N=%v,Z=%v,C=%v,V=%v", fc.N(), fc.Z(), fc.C(), fc.V())
2046 type flagConstantBuilder struct {
2053 func (fcs flagConstantBuilder) encode() flagConstant {
2070 // Note: addFlags(x,y) != subFlags(x,-y) in some situations:
2071 // - the results of the C flag are different
2072 // - the results of the V flag when y==minint are different
2074 // addFlags64 returns the flags that would be set from computing x+y.
2075 func addFlags64(x, y int64) flagConstant {
2076 var fcb flagConstantBuilder
2079 fcb.C = uint64(x+y) < uint64(x)
2080 fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
2084 // subFlags64 returns the flags that would be set from computing x-y.
2085 func subFlags64(x, y int64) flagConstant {
2086 var fcb flagConstantBuilder
2089 fcb.C = uint64(y) <= uint64(x) // This code follows the arm carry flag model.
2090 fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
2094 // addFlags32 returns the flags that would be set from computing x+y.
2095 func addFlags32(x, y int32) flagConstant {
2096 var fcb flagConstantBuilder
2099 fcb.C = uint32(x+y) < uint32(x)
2100 fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
2104 // subFlags32 returns the flags that would be set from computing x-y.
2105 func subFlags32(x, y int32) flagConstant {
2106 var fcb flagConstantBuilder
2109 fcb.C = uint32(y) <= uint32(x) // This code follows the arm carry flag model.
2110 fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
2114 // logicFlags64 returns flags set to the sign/zeroness of x.
2115 // C and V are set to false.
2116 func logicFlags64(x int64) flagConstant {
2117 var fcb flagConstantBuilder
2123 // logicFlags32 returns flags set to the sign/zeroness of x.
2124 // C and V are set to false.
2125 func logicFlags32(x int32) flagConstant {
2126 var fcb flagConstantBuilder
2132 func makeJumpTableSym(b *Block) *obj.LSym {
2133 s := base.Ctxt.Lookup(fmt.Sprintf("%s.jump%d", b.Func.fe.Func().LSym.Name, b.ID))
2134 s.Set(obj.AttrDuplicateOK, true)
2135 s.Set(obj.AttrLocal, true)
2139 // canRotate reports whether the architecture supports
2140 // rotates of integer registers with the given number of bits.
2141 func canRotate(c *Config, bits int64) bool {
2142 if bits > c.PtrSize*8 {
2143 // Don't rewrite to rotates bigger than the machine word.
2147 case "386", "amd64", "arm64":
2149 case "arm", "s390x", "ppc64", "ppc64le", "wasm", "loong64":
2156 // isARM64bitcon reports whether a constant can be encoded into a logical instruction.
2157 func isARM64bitcon(x uint64) bool {
2158 if x == 1<<64-1 || x == 0 {
2161 // determine the period and sign-extend a unit to 64 bits
2163 case x != x>>32|x<<32:
2166 case x != x>>16|x<<48:
2168 x = uint64(int64(int32(x)))
2169 case x != x>>8|x<<56:
2171 x = uint64(int64(int16(x)))
2172 case x != x>>4|x<<60:
2174 x = uint64(int64(int8(x)))
2176 // period is 4 or 2, always true
2177 // 0001, 0010, 0100, 1000 -- 0001 rotate
2178 // 0011, 0110, 1100, 1001 -- 0011 rotate
2179 // 0111, 1011, 1101, 1110 -- 0111 rotate
2180 // 0101, 1010 -- 01 rotate, repeat
2183 return sequenceOfOnes(x) || sequenceOfOnes(^x)
2186 // sequenceOfOnes tests whether a constant is a sequence of ones in binary, with leading and trailing zeros.
2187 func sequenceOfOnes(x uint64) bool {
2188 y := x & -x // lowest set bit of x. x is good iff x+y is a power of 2
2193 // isARM64addcon reports whether x can be encoded as the immediate value in an ADD or SUB instruction.
2194 func isARM64addcon(v int64) bool {
2195 /* uimm12 or uimm24? */
2199 if (v & 0xFFF) == 0 {
2205 // setPos sets the position of v to pos, then returns true.
2206 // Useful for setting the result of a rewrite's position to
2207 // something other than the default.
2208 func setPos(v *Value, pos src.XPos) bool {