1 // Copyright 2015 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
8 "cmd/compile/internal/base"
9 "cmd/compile/internal/logopt"
10 "cmd/compile/internal/reflectdata"
11 "cmd/compile/internal/types"
13 "cmd/internal/obj/s390x"
27 type deadValueChoice bool
30 leaveDeadValues deadValueChoice = false
31 removeDeadValues = true
34 // deadcode indicates whether rewrite should try to remove any values that become dead.
35 func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValueChoice) {
36 // repeat rewrites until we find no more rewrites
37 pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block
41 fmt.Printf("%s: rewriting for %s\n", f.pass.name, f.Name)
44 var states map[string]bool
48 for _, b := range f.Blocks {
53 b0.Succs = append([]Edge{}, b.Succs...) // make a new copy, not aliasing
55 for i, c := range b.ControlValues() {
58 b.ReplaceControl(i, c)
64 fmt.Printf("rewriting %s -> %s\n", b0.LongString(), b.LongString())
67 for j, v := range b.Values {
72 v0.Args = append([]*Value{}, v.Args...) // make a new copy, not aliasing
74 if v.Uses == 0 && v.removeable() {
75 if v.Op != OpInvalid && deadcode == removeDeadValues {
76 // Reset any values that are now unused, so that we decrement
77 // the use count of all of its arguments.
78 // Not quite a deadcode pass, because it does not handle cycles.
79 // But it should help Uses==1 rules to fire.
83 // No point rewriting values which aren't used.
87 vchange := phielimValue(v)
88 if vchange && debug > 1 {
89 fmt.Printf("rewriting %s -> %s\n", v0.LongString(), v.LongString())
92 // Eliminate copy inputs.
93 // If any copy input becomes unused, mark it
94 // as invalid and discard its argument. Repeat
95 // recursively on the discarded argument.
96 // This phase helps remove phantom "dead copy" uses
97 // of a value so that a x.Uses==1 rule condition
99 for i, a := range v.Args {
105 // If a, a copy, has a line boundary indicator, attempt to find a new value
106 // to hold it. The first candidate is the value that will replace a (aa),
107 // if it shares the same block and line and is eligible.
108 // The second option is v, which has a as an input. Because aa is earlier in
109 // the data flow, it is the better choice.
110 if a.Pos.IsStmt() == src.PosIsStmt {
111 if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt {
112 aa.Pos = aa.Pos.WithIsStmt()
113 } else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt {
114 v.Pos = v.Pos.WithIsStmt()
116 // Record the lost line and look for a new home after all rewrites are complete.
117 // TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same
118 // line to appear in more than one block, but only one block is stored, so if both end
119 // up here, then one will be lost.
120 pendingLines.set(a.Pos, int32(a.Block.ID))
122 a.Pos = a.Pos.WithNotStmt()
131 if vchange && debug > 1 {
132 fmt.Printf("rewriting %s -> %s\n", v0.LongString(), v.LongString())
135 // apply rewrite function
138 // If value changed to a poor choice for a statement boundary, move the boundary
139 if v.Pos.IsStmt() == src.PosIsStmt {
140 if k := nextGoodStatementIndex(v, j, b); k != j {
141 v.Pos = v.Pos.WithNotStmt()
142 b.Values[k].Pos = b.Values[k].Pos.WithIsStmt()
147 change = change || vchange
148 if vchange && debug > 1 {
149 fmt.Printf("rewriting %s -> %s\n", v0.LongString(), v.LongString())
153 if !change && !deadChange {
157 if (iters > 1000 || debug >= 2) && change {
158 // We've done a suspiciously large number of rewrites (or we're in debug mode).
159 // As of Sep 2021, 90% of rewrites complete in 4 iterations or fewer
160 // and the maximum value encountered during make.bash is 12.
161 // Start checking for cycles. (This is too expensive to do routinely.)
162 // Note: we avoid this path for deadChange-only iterations, to fix #51639.
164 states = make(map[string]bool)
167 if _, ok := states[h]; ok {
168 // We've found a cycle.
169 // To diagnose it, set debug to 2 and start again,
170 // so that we'll print all rules applied until we complete another cycle.
171 // If debug is already >= 2, we've already done that, so it's time to crash.
174 states = make(map[string]bool)
176 f.Fatalf("rewrite cycle detected")
182 // remove clobbered values
183 for _, b := range f.Blocks {
185 for i, v := range b.Values {
187 if v.Op == OpInvalid {
188 if v.Pos.IsStmt() == src.PosIsStmt {
189 pendingLines.set(vl, int32(b.ID))
194 if v.Pos.IsStmt() != src.PosNotStmt && !notStmtBoundary(v.Op) && pendingLines.get(vl) == int32(b.ID) {
195 pendingLines.remove(vl)
196 v.Pos = v.Pos.WithIsStmt()
203 if pendingLines.get(b.Pos) == int32(b.ID) {
204 b.Pos = b.Pos.WithIsStmt()
205 pendingLines.remove(b.Pos)
211 // Common functions called from rewriting rules
213 func is64BitFloat(t *types.Type) bool {
214 return t.Size() == 8 && t.IsFloat()
217 func is32BitFloat(t *types.Type) bool {
218 return t.Size() == 4 && t.IsFloat()
221 func is64BitInt(t *types.Type) bool {
222 return t.Size() == 8 && t.IsInteger()
225 func is32BitInt(t *types.Type) bool {
226 return t.Size() == 4 && t.IsInteger()
229 func is16BitInt(t *types.Type) bool {
230 return t.Size() == 2 && t.IsInteger()
233 func is8BitInt(t *types.Type) bool {
234 return t.Size() == 1 && t.IsInteger()
237 func isPtr(t *types.Type) bool {
238 return t.IsPtrShaped()
241 // mergeSym merges two symbolic offsets. There is no real merging of
242 // offsets, we just pick the non-nil one.
243 func mergeSym(x, y Sym) Sym {
250 panic(fmt.Sprintf("mergeSym with two non-nil syms %v %v", x, y))
253 func canMergeSym(x, y Sym) bool {
254 return x == nil || y == nil
257 // canMergeLoadClobber reports whether the load can be merged into target without
258 // invalidating the schedule.
259 // It also checks that the other non-load argument x is something we
260 // are ok with clobbering.
261 func canMergeLoadClobber(target, load, x *Value) bool {
262 // The register containing x is going to get clobbered.
263 // Don't merge if we still need the value of x.
264 // We don't have liveness information here, but we can
265 // approximate x dying with:
266 // 1) target is x's only use.
267 // 2) target is not in a deeper loop than x.
271 loopnest := x.Block.Func.loopnest()
272 loopnest.calculateDepths()
273 if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
276 return canMergeLoad(target, load)
279 // canMergeLoad reports whether the load can be merged into target without
280 // invalidating the schedule.
281 func canMergeLoad(target, load *Value) bool {
282 if target.Block.ID != load.Block.ID {
283 // If the load is in a different block do not merge it.
287 // We can't merge the load into the target if the load
288 // has more than one use.
293 mem := load.MemoryArg()
295 // We need the load's memory arg to still be alive at target. That
296 // can't be the case if one of target's args depends on a memory
297 // state that is a successor of load's memory arg.
299 // For example, it would be invalid to merge load into target in
300 // the following situation because newmem has killed oldmem
301 // before target is reached:
302 // load = read ... oldmem
303 // newmem = write ... oldmem
304 // arg0 = read ... newmem
305 // target = add arg0 load
307 // If the argument comes from a different block then we can exclude
308 // it immediately because it must dominate load (which is in the
309 // same block as target).
311 for _, a := range target.Args {
312 if a != load && a.Block.ID == target.Block.ID {
313 args = append(args, a)
317 // memPreds contains memory states known to be predecessors of load's
318 // memory state. It is lazily initialized.
319 var memPreds map[*Value]bool
320 for i := 0; len(args) > 0; i++ {
323 // Give up if we have done a lot of iterations.
326 v := args[len(args)-1]
327 args = args[:len(args)-1]
328 if target.Block.ID != v.Block.ID {
329 // Since target and load are in the same block
330 // we can stop searching when we leave the block.
334 // A Phi implies we have reached the top of the block.
335 // The memory phi, if it exists, is always
336 // the first logical store in the block.
339 if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
340 // We could handle this situation however it is likely
344 if v.Op.SymEffect()&SymAddr != 0 {
345 // This case prevents an operation that calculates the
346 // address of a local variable from being forced to schedule
347 // before its corresponding VarDef.
353 // We don't want to combine the CMPQ with the load, because
354 // that would force the CMPQ to schedule before the VARDEF, which
355 // in turn requires the LEAQ to schedule before the VARDEF.
358 if v.Type.IsMemory() {
360 // Initialise a map containing memory states
361 // known to be predecessors of load's memory
363 memPreds = make(map[*Value]bool)
366 for i := 0; i < limit; i++ {
368 // The memory phi, if it exists, is always
369 // the first logical store in the block.
372 if m.Block.ID != target.Block.ID {
375 if !m.Type.IsMemory() {
379 if len(m.Args) == 0 {
386 // We can merge if v is a predecessor of mem.
388 // For example, we can merge load into target in the
389 // following scenario:
392 // load = read ... mem
393 // target = add x load
399 if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
400 // If v takes mem as an input then we know mem
401 // is valid at this point.
404 for _, a := range v.Args {
405 if target.Block.ID == a.Block.ID {
406 args = append(args, a)
414 // isSameCall reports whether sym is the same as the given named symbol.
415 func isSameCall(sym interface{}, name string) bool {
416 fn := sym.(*AuxCall).Fn
417 return fn != nil && fn.String() == name
420 // canLoadUnaligned reports if the architecture supports unaligned load operations.
421 func canLoadUnaligned(c *Config) bool {
422 return c.ctxt.Arch.Alignment == 1
425 // nlzX returns the number of leading zeros.
426 func nlz64(x int64) int { return bits.LeadingZeros64(uint64(x)) }
427 func nlz32(x int32) int { return bits.LeadingZeros32(uint32(x)) }
428 func nlz16(x int16) int { return bits.LeadingZeros16(uint16(x)) }
429 func nlz8(x int8) int { return bits.LeadingZeros8(uint8(x)) }
431 // ntzX returns the number of trailing zeros.
432 func ntz64(x int64) int { return bits.TrailingZeros64(uint64(x)) }
433 func ntz32(x int32) int { return bits.TrailingZeros32(uint32(x)) }
434 func ntz16(x int16) int { return bits.TrailingZeros16(uint16(x)) }
435 func ntz8(x int8) int { return bits.TrailingZeros8(uint8(x)) }
437 func oneBit(x int64) bool { return x&(x-1) == 0 && x != 0 }
438 func oneBit8(x int8) bool { return x&(x-1) == 0 && x != 0 }
439 func oneBit16(x int16) bool { return x&(x-1) == 0 && x != 0 }
440 func oneBit32(x int32) bool { return x&(x-1) == 0 && x != 0 }
441 func oneBit64(x int64) bool { return x&(x-1) == 0 && x != 0 }
443 // nto returns the number of trailing ones.
444 func nto(x int64) int64 {
445 return int64(ntz64(^x))
448 // logX returns logarithm of n base 2.
449 // n must be a positive power of 2 (isPowerOfTwoX returns true).
450 func log8(n int8) int64 {
451 return int64(bits.Len8(uint8(n))) - 1
453 func log16(n int16) int64 {
454 return int64(bits.Len16(uint16(n))) - 1
456 func log32(n int32) int64 {
457 return int64(bits.Len32(uint32(n))) - 1
459 func log64(n int64) int64 {
460 return int64(bits.Len64(uint64(n))) - 1
463 // log2uint32 returns logarithm in base 2 of uint32(n), with log2(0) = -1.
465 func log2uint32(n int64) int64 {
466 return int64(bits.Len32(uint32(n))) - 1
469 // isPowerOfTwoX functions report whether n is a power of 2.
470 func isPowerOfTwo8(n int8) bool {
471 return n > 0 && n&(n-1) == 0
473 func isPowerOfTwo16(n int16) bool {
474 return n > 0 && n&(n-1) == 0
476 func isPowerOfTwo32(n int32) bool {
477 return n > 0 && n&(n-1) == 0
479 func isPowerOfTwo64(n int64) bool {
480 return n > 0 && n&(n-1) == 0
483 // isUint64PowerOfTwo reports whether uint64(n) is a power of 2.
484 func isUint64PowerOfTwo(in int64) bool {
486 return n > 0 && n&(n-1) == 0
489 // isUint32PowerOfTwo reports whether uint32(n) is a power of 2.
490 func isUint32PowerOfTwo(in int64) bool {
491 n := uint64(uint32(in))
492 return n > 0 && n&(n-1) == 0
495 // is32Bit reports whether n can be represented as a signed 32 bit integer.
496 func is32Bit(n int64) bool {
497 return n == int64(int32(n))
500 // is16Bit reports whether n can be represented as a signed 16 bit integer.
501 func is16Bit(n int64) bool {
502 return n == int64(int16(n))
505 // is8Bit reports whether n can be represented as a signed 8 bit integer.
506 func is8Bit(n int64) bool {
507 return n == int64(int8(n))
510 // isU8Bit reports whether n can be represented as an unsigned 8 bit integer.
511 func isU8Bit(n int64) bool {
512 return n == int64(uint8(n))
515 // isU12Bit reports whether n can be represented as an unsigned 12 bit integer.
516 func isU12Bit(n int64) bool {
517 return 0 <= n && n < (1<<12)
520 // isU16Bit reports whether n can be represented as an unsigned 16 bit integer.
521 func isU16Bit(n int64) bool {
522 return n == int64(uint16(n))
525 // isU32Bit reports whether n can be represented as an unsigned 32 bit integer.
526 func isU32Bit(n int64) bool {
527 return n == int64(uint32(n))
530 // is20Bit reports whether n can be represented as a signed 20 bit integer.
531 func is20Bit(n int64) bool {
532 return -(1<<19) <= n && n < (1<<19)
535 // b2i translates a boolean value to 0 or 1 for assigning to auxInt.
536 func b2i(b bool) int64 {
543 // b2i32 translates a boolean value to 0 or 1.
544 func b2i32(b bool) int32 {
551 // shiftIsBounded reports whether (left/right) shift Value v is known to be bounded.
552 // A shift is bounded if it is shifting by less than the width of the shifted value.
553 func shiftIsBounded(v *Value) bool {
557 // canonLessThan returns whether x is "ordered" less than y, for purposes of normalizing
558 // generated code as much as possible.
559 func canonLessThan(x, y *Value) bool {
563 if !x.Pos.SameFileAndLine(y.Pos) {
564 return x.Pos.Before(y.Pos)
569 // truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern
570 // of the mantissa. It will panic if the truncation results in lost information.
571 func truncate64Fto32F(f float64) float32 {
572 if !isExactFloat32(f) {
573 panic("truncate64Fto32F: truncation is not exact")
578 // NaN bit patterns aren't necessarily preserved across conversion
579 // instructions so we need to do the conversion manually.
580 b := math.Float64bits(f)
581 m := b & ((1 << 52) - 1) // mantissa (a.k.a. significand)
582 // | sign | exponent | mantissa |
583 r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23)))
584 return math.Float32frombits(r)
587 // extend32Fto64F converts a float32 value to a float64 value preserving the bit
588 // pattern of the mantissa.
589 func extend32Fto64F(f float32) float64 {
590 if !math.IsNaN(float64(f)) {
593 // NaN bit patterns aren't necessarily preserved across conversion
594 // instructions so we need to do the conversion manually.
595 b := uint64(math.Float32bits(f))
596 // | sign | exponent | mantissa |
597 r := ((b << 32) & (1 << 63)) | (0x7ff << 52) | ((b & 0x7fffff) << (52 - 23))
598 return math.Float64frombits(r)
601 // DivisionNeedsFixUp reports whether the division needs fix-up code.
602 func DivisionNeedsFixUp(v *Value) bool {
606 // auxFrom64F encodes a float64 value so it can be stored in an AuxInt.
607 func auxFrom64F(f float64) int64 {
609 panic("can't encode a NaN in AuxInt field")
611 return int64(math.Float64bits(f))
614 // auxFrom32F encodes a float32 value so it can be stored in an AuxInt.
615 func auxFrom32F(f float32) int64 {
617 panic("can't encode a NaN in AuxInt field")
619 return int64(math.Float64bits(extend32Fto64F(f)))
622 // auxTo32F decodes a float32 from the AuxInt value provided.
623 func auxTo32F(i int64) float32 {
624 return truncate64Fto32F(math.Float64frombits(uint64(i)))
627 // auxTo64F decodes a float64 from the AuxInt value provided.
628 func auxTo64F(i int64) float64 {
629 return math.Float64frombits(uint64(i))
632 func auxIntToBool(i int64) bool {
638 func auxIntToInt8(i int64) int8 {
641 func auxIntToInt16(i int64) int16 {
644 func auxIntToInt32(i int64) int32 {
647 func auxIntToInt64(i int64) int64 {
650 func auxIntToUint8(i int64) uint8 {
653 func auxIntToFloat32(i int64) float32 {
654 return float32(math.Float64frombits(uint64(i)))
656 func auxIntToFloat64(i int64) float64 {
657 return math.Float64frombits(uint64(i))
659 func auxIntToValAndOff(i int64) ValAndOff {
662 func auxIntToArm64BitField(i int64) arm64BitField {
663 return arm64BitField(i)
665 func auxIntToInt128(x int64) int128 {
667 panic("nonzero int128 not allowed")
671 func auxIntToFlagConstant(x int64) flagConstant {
672 return flagConstant(x)
675 func auxIntToOp(cc int64) Op {
679 func boolToAuxInt(b bool) int64 {
685 func int8ToAuxInt(i int8) int64 {
688 func int16ToAuxInt(i int16) int64 {
691 func int32ToAuxInt(i int32) int64 {
694 func int64ToAuxInt(i int64) int64 {
697 func uint8ToAuxInt(i uint8) int64 {
698 return int64(int8(i))
700 func float32ToAuxInt(f float32) int64 {
701 return int64(math.Float64bits(float64(f)))
703 func float64ToAuxInt(f float64) int64 {
704 return int64(math.Float64bits(f))
706 func valAndOffToAuxInt(v ValAndOff) int64 {
709 func arm64BitFieldToAuxInt(v arm64BitField) int64 {
712 func int128ToAuxInt(x int128) int64 {
714 panic("nonzero int128 not allowed")
718 func flagConstantToAuxInt(x flagConstant) int64 {
722 func opToAuxInt(o Op) int64 {
726 // Aux is an interface to hold miscellaneous data in Blocks and Values.
731 // for now only used to mark moves that need to avoid clobbering flags
734 func (auxMark) CanBeAnSSAAux() {}
738 // stringAux wraps string values for use in Aux.
739 type stringAux string
741 func (stringAux) CanBeAnSSAAux() {}
743 func auxToString(i Aux) string {
744 return string(i.(stringAux))
746 func auxToSym(i Aux) Sym {
747 // TODO: kind of a hack - allows nil interface through
751 func auxToType(i Aux) *types.Type {
752 return i.(*types.Type)
754 func auxToCall(i Aux) *AuxCall {
757 func auxToS390xCCMask(i Aux) s390x.CCMask {
758 return i.(s390x.CCMask)
760 func auxToS390xRotateParams(i Aux) s390x.RotateParams {
761 return i.(s390x.RotateParams)
764 func StringToAux(s string) Aux {
767 func symToAux(s Sym) Aux {
770 func callToAux(s *AuxCall) Aux {
773 func typeToAux(t *types.Type) Aux {
776 func s390xCCMaskToAux(c s390x.CCMask) Aux {
779 func s390xRotateParamsToAux(r s390x.RotateParams) Aux {
783 // uaddOvf reports whether unsigned a+b would overflow.
784 func uaddOvf(a, b int64) bool {
785 return uint64(a)+uint64(b) < uint64(a)
788 // loadLSymOffset simulates reading a word at an offset into a
789 // read-only symbol's runtime memory. If it would read a pointer to
790 // another symbol, that symbol is returned. Otherwise, it returns nil.
791 func loadLSymOffset(lsym *obj.LSym, offset int64) *obj.LSym {
792 if lsym.Type != objabi.SRODATA {
796 for _, r := range lsym.R {
797 if int64(r.Off) == offset && r.Type&^objabi.R_WEAK == objabi.R_ADDR && r.Add == 0 {
805 func devirtLECall(v *Value, sym *obj.LSym) *Value {
806 v.Op = OpStaticLECall
807 auxcall := v.Aux.(*AuxCall)
811 copy(v.Args[0:], v.Args[1:])
812 v.Args[len(v.Args)-1] = nil // aid GC
813 v.Args = v.Args[:len(v.Args)-1]
814 if f := v.Block.Func; f.pass.debug > 0 {
815 f.Warnl(v.Pos, "de-virtualizing call")
820 // isSamePtr reports whether p1 and p2 point to the same address.
821 func isSamePtr(p1, p2 *Value) bool {
830 return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
831 case OpAddr, OpLocalAddr:
832 return p1.Aux == p2.Aux
834 return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
839 func isStackPtr(v *Value) bool {
840 for v.Op == OpOffPtr || v.Op == OpAddPtr {
843 return v.Op == OpSP || v.Op == OpLocalAddr
846 // disjoint reports whether the memory region specified by [p1:p1+n1)
847 // does not overlap with [p2:p2+n2).
848 // A return value of false does not imply the regions overlap.
849 func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool {
850 if n1 == 0 || n2 == 0 {
856 baseAndOffset := func(ptr *Value) (base *Value, offset int64) {
857 base, offset = ptr, 0
858 for base.Op == OpOffPtr {
859 offset += base.AuxInt
864 p1, off1 := baseAndOffset(p1)
865 p2, off2 := baseAndOffset(p2)
866 if isSamePtr(p1, p2) {
867 return !overlap(off1, n1, off2, n2)
869 // p1 and p2 are not the same, so if they are both OpAddrs then
870 // they point to different variables.
871 // If one pointer is on the stack and the other is an argument
872 // then they can't overlap.
874 case OpAddr, OpLocalAddr:
875 if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP {
878 return (p2.Op == OpArg || p2.Op == OpArgIntReg) && p1.Args[0].Op == OpSP
879 case OpArg, OpArgIntReg:
880 if p2.Op == OpSP || p2.Op == OpLocalAddr {
884 return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpArgIntReg || p2.Op == OpSP
889 // moveSize returns the number of bytes an aligned MOV instruction moves.
890 func moveSize(align int64, c *Config) int64 {
892 case align%8 == 0 && c.PtrSize == 8:
902 // mergePoint finds a block among a's blocks which dominates b and is itself
903 // dominated by all of a's blocks. Returns nil if it can't find one.
904 // Might return nil even if one does exist.
905 func mergePoint(b *Block, a ...*Value) *Block {
906 // Walk backward from b looking for one of the a's blocks.
912 for _, x := range a {
917 if len(b.Preds) > 1 {
918 // Don't know which way to go back. Abort.
924 return nil // too far away
926 // At this point, r is the first value in a that we find by walking backwards.
927 // if we return anything, r will be it.
930 // Keep going, counting the other a's that we find. They must all dominate r.
933 for _, x := range a {
939 // Found all of a in a backwards walk. We can return r.
942 if len(b.Preds) > 1 {
949 return nil // too far away
952 // clobber invalidates values. Returns true.
953 // clobber is used by rewrite rules to:
955 // A) make sure the values are really dead and never used again.
956 // B) decrement use counts of the values' args.
957 func clobber(vv ...*Value) bool {
958 for _, v := range vv {
960 // Note: leave v.Block intact. The Block field is used after clobber.
965 // clobberIfDead resets v when use count is 1. Returns true.
966 // clobberIfDead is used by rewrite rules to decrement
967 // use counts of v's args when v is dead and never used.
968 func clobberIfDead(v *Value) bool {
972 // Note: leave v.Block intact. The Block field is used after clobberIfDead.
976 // noteRule is an easy way to track if a rule is matched when writing
977 // new ones. Make the rule of interest also conditional on
979 // noteRule("note to self: rule of interest matched")
981 // and that message will print when the rule matches.
982 func noteRule(s string) bool {
987 // countRule increments Func.ruleMatches[key].
988 // If Func.ruleMatches is non-nil at the end
989 // of compilation, it will be printed to stdout.
990 // This is intended to make it easier to find which functions
991 // which contain lots of rules matches when developing new rules.
992 func countRule(v *Value, key string) bool {
994 if f.ruleMatches == nil {
995 f.ruleMatches = make(map[string]int)
1001 // warnRule generates compiler debug output with string s when
1002 // v is not in autogenerated code, cond is true and the rule has fired.
1003 func warnRule(cond bool, v *Value, s string) bool {
1004 if pos := v.Pos; pos.Line() > 1 && cond {
1005 v.Block.Func.Warnl(pos, s)
1010 // for a pseudo-op like (LessThan x), extract x.
1011 func flagArg(v *Value) *Value {
1012 if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() {
1018 // arm64Negate finds the complement to an ARM64 condition code,
1019 // for example !Equal -> NotEqual or !LessThan -> GreaterEqual
1021 // For floating point, it's more subtle because NaN is unordered. We do
1022 // !LessThanF -> NotLessThanF, the latter takes care of NaNs.
1023 func arm64Negate(op Op) Op {
1025 case OpARM64LessThan:
1026 return OpARM64GreaterEqual
1027 case OpARM64LessThanU:
1028 return OpARM64GreaterEqualU
1029 case OpARM64GreaterThan:
1030 return OpARM64LessEqual
1031 case OpARM64GreaterThanU:
1032 return OpARM64LessEqualU
1033 case OpARM64LessEqual:
1034 return OpARM64GreaterThan
1035 case OpARM64LessEqualU:
1036 return OpARM64GreaterThanU
1037 case OpARM64GreaterEqual:
1038 return OpARM64LessThan
1039 case OpARM64GreaterEqualU:
1040 return OpARM64LessThanU
1042 return OpARM64NotEqual
1043 case OpARM64NotEqual:
1045 case OpARM64LessThanF:
1046 return OpARM64NotLessThanF
1047 case OpARM64NotLessThanF:
1048 return OpARM64LessThanF
1049 case OpARM64LessEqualF:
1050 return OpARM64NotLessEqualF
1051 case OpARM64NotLessEqualF:
1052 return OpARM64LessEqualF
1053 case OpARM64GreaterThanF:
1054 return OpARM64NotGreaterThanF
1055 case OpARM64NotGreaterThanF:
1056 return OpARM64GreaterThanF
1057 case OpARM64GreaterEqualF:
1058 return OpARM64NotGreaterEqualF
1059 case OpARM64NotGreaterEqualF:
1060 return OpARM64GreaterEqualF
1062 panic("unreachable")
1066 // arm64Invert evaluates (InvertFlags op), which
1067 // is the same as altering the condition codes such
1068 // that the same result would be produced if the arguments
1069 // to the flag-generating instruction were reversed, e.g.
1070 // (InvertFlags (CMP x y)) -> (CMP y x)
1071 func arm64Invert(op Op) Op {
1073 case OpARM64LessThan:
1074 return OpARM64GreaterThan
1075 case OpARM64LessThanU:
1076 return OpARM64GreaterThanU
1077 case OpARM64GreaterThan:
1078 return OpARM64LessThan
1079 case OpARM64GreaterThanU:
1080 return OpARM64LessThanU
1081 case OpARM64LessEqual:
1082 return OpARM64GreaterEqual
1083 case OpARM64LessEqualU:
1084 return OpARM64GreaterEqualU
1085 case OpARM64GreaterEqual:
1086 return OpARM64LessEqual
1087 case OpARM64GreaterEqualU:
1088 return OpARM64LessEqualU
1089 case OpARM64Equal, OpARM64NotEqual:
1091 case OpARM64LessThanF:
1092 return OpARM64GreaterThanF
1093 case OpARM64GreaterThanF:
1094 return OpARM64LessThanF
1095 case OpARM64LessEqualF:
1096 return OpARM64GreaterEqualF
1097 case OpARM64GreaterEqualF:
1098 return OpARM64LessEqualF
1099 case OpARM64NotLessThanF:
1100 return OpARM64NotGreaterThanF
1101 case OpARM64NotGreaterThanF:
1102 return OpARM64NotLessThanF
1103 case OpARM64NotLessEqualF:
1104 return OpARM64NotGreaterEqualF
1105 case OpARM64NotGreaterEqualF:
1106 return OpARM64NotLessEqualF
1108 panic("unreachable")
1112 // evaluate an ARM64 op against a flags value
1113 // that is potentially constant; return 1 for true,
1114 // -1 for false, and 0 for not constant.
1115 func ccARM64Eval(op Op, flags *Value) int {
1117 if fop == OpARM64InvertFlags {
1118 return -ccARM64Eval(op, flags.Args[0])
1120 if fop != OpARM64FlagConstant {
1123 fc := flagConstant(flags.AuxInt)
1124 b2i := func(b bool) int {
1133 case OpARM64NotEqual:
1135 case OpARM64LessThan:
1137 case OpARM64LessThanU:
1138 return b2i(fc.ult())
1139 case OpARM64GreaterThan:
1141 case OpARM64GreaterThanU:
1142 return b2i(fc.ugt())
1143 case OpARM64LessEqual:
1145 case OpARM64LessEqualU:
1146 return b2i(fc.ule())
1147 case OpARM64GreaterEqual:
1149 case OpARM64GreaterEqualU:
1150 return b2i(fc.uge())
1155 // logRule logs the use of the rule s. This will only be enabled if
1156 // rewrite rules were generated with the -log option, see _gen/rulegen.go.
1157 func logRule(s string) {
1158 if ruleFile == nil {
1159 // Open a log file to write log to. We open in append
1160 // mode because all.bash runs the compiler lots of times,
1161 // and we want the concatenation of all of those logs.
1162 // This means, of course, that users need to rm the old log
1163 // to get fresh data.
1164 // TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow?
1165 w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"),
1166 os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
1172 _, err := fmt.Fprintln(ruleFile, s)
1178 var ruleFile io.Writer
1180 func min(x, y int64) int64 {
1186 func max(x, y int64) int64 {
1193 func isConstZero(v *Value) bool {
1197 case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F:
1198 return v.AuxInt == 0
1203 // reciprocalExact64 reports whether 1/c is exactly representable.
1204 func reciprocalExact64(c float64) bool {
1205 b := math.Float64bits(c)
1206 man := b & (1<<52 - 1)
1208 return false // not a power of 2, denormal, or NaN
1210 exp := b >> 52 & (1<<11 - 1)
1211 // exponent bias is 0x3ff. So taking the reciprocal of a number
1212 // changes the exponent to 0x7fe-exp.
1217 return false // ±inf
1219 return false // exponent is not representable
1225 // reciprocalExact32 reports whether 1/c is exactly representable.
1226 func reciprocalExact32(c float32) bool {
1227 b := math.Float32bits(c)
1228 man := b & (1<<23 - 1)
1230 return false // not a power of 2, denormal, or NaN
1232 exp := b >> 23 & (1<<8 - 1)
1233 // exponent bias is 0x7f. So taking the reciprocal of a number
1234 // changes the exponent to 0xfe-exp.
1239 return false // ±inf
1241 return false // exponent is not representable
1247 // check if an immediate can be directly encoded into an ARM's instruction.
1248 func isARMImmRot(v uint32) bool {
1249 for i := 0; i < 16; i++ {
1259 // overlap reports whether the ranges given by the given offset and
1260 // size pairs overlap.
1261 func overlap(offset1, size1, offset2, size2 int64) bool {
1262 if offset1 >= offset2 && offset2+size2 > offset1 {
1265 if offset2 >= offset1 && offset1+size1 > offset2 {
1271 func areAdjacentOffsets(off1, off2, size int64) bool {
1272 return off1+size == off2 || off1 == off2+size
1275 // check if value zeroes out upper 32-bit of 64-bit register.
1276 // depth limits recursion depth. In AMD64.rules 3 is used as limit,
1277 // because it catches same amount of cases as 4.
1278 func zeroUpper32Bits(x *Value, depth int) bool {
1280 case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1,
1281 OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1,
1282 OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload,
1283 OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL,
1284 OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst,
1285 OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst,
1286 OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL,
1287 OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst,
1288 OpAMD64SHLL, OpAMD64SHLLconst:
1290 case OpARM64REV16W, OpARM64REVW, OpARM64RBITW, OpARM64CLZW, OpARM64EXTRWconst,
1291 OpARM64MULW, OpARM64MNEGW, OpARM64UDIVW, OpARM64DIVW, OpARM64UMODW,
1292 OpARM64MADDW, OpARM64MSUBW, OpARM64RORW, OpARM64RORWconst:
1295 return x.Type.Size() == 4
1296 case OpPhi, OpSelect0, OpSelect1:
1297 // Phis can use each-other as an arguments, instead of tracking visited values,
1298 // just limit recursion depth.
1302 for i := range x.Args {
1303 if !zeroUpper32Bits(x.Args[i], depth-1) {
1313 // zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits.
1314 func zeroUpper48Bits(x *Value, depth int) bool {
1316 case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2:
1319 return x.Type.Size() == 2
1320 case OpPhi, OpSelect0, OpSelect1:
1321 // Phis can use each-other as an arguments, instead of tracking visited values,
1322 // just limit recursion depth.
1326 for i := range x.Args {
1327 if !zeroUpper48Bits(x.Args[i], depth-1) {
1337 // zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits.
1338 func zeroUpper56Bits(x *Value, depth int) bool {
1340 case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1:
1343 return x.Type.Size() == 1
1344 case OpPhi, OpSelect0, OpSelect1:
1345 // Phis can use each-other as an arguments, instead of tracking visited values,
1346 // just limit recursion depth.
1350 for i := range x.Args {
1351 if !zeroUpper56Bits(x.Args[i], depth-1) {
1361 func isInlinableMemclr(c *Config, sz int64) bool {
1365 // TODO: expand this check to allow other architectures
1366 // see CL 454255 and issue 56997
1368 case "amd64", "arm64":
1370 case "ppc64le", "ppc64":
1376 // isInlinableMemmove reports whether the given arch performs a Move of the given size
1377 // faster than memmove. It will only return true if replacing the memmove with a Move is
1378 // safe, either because Move will do all of its loads before any of its stores, or
1379 // because the arguments are known to be disjoint.
1380 // This is used as a check for replacing memmove with Move ops.
1381 func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
1382 // It is always safe to convert memmove into Move when its arguments are disjoint.
1383 // Move ops may or may not be faster for large sizes depending on how the platform
1384 // lowers them, so we only perform this optimization on platforms that we know to
1385 // have fast Move ops.
1388 return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz))
1389 case "386", "arm64":
1391 case "s390x", "ppc64", "ppc64le":
1392 return sz <= 8 || disjoint(dst, sz, src, sz)
1393 case "arm", "loong64", "mips", "mips64", "mipsle", "mips64le":
1398 func IsInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
1399 return isInlinableMemmove(dst, src, sz, c)
1402 // logLargeCopy logs the occurrence of a large copy.
1403 // The best place to do this is in the rewrite rules where the size of the move is easy to find.
1404 // "Large" is arbitrarily chosen to be 128 bytes; this may change.
1405 func logLargeCopy(v *Value, s int64) bool {
1409 if logopt.Enabled() {
1410 logopt.LogOpt(v.Pos, "copy", "lower", v.Block.Func.Name, fmt.Sprintf("%d bytes", s))
1414 func LogLargeCopy(funcName string, pos src.XPos, s int64) {
1418 if logopt.Enabled() {
1419 logopt.LogOpt(pos, "copy", "lower", funcName, fmt.Sprintf("%d bytes", s))
1423 // hasSmallRotate reports whether the architecture has rotate instructions
1424 // for sizes < 32-bit. This is used to decide whether to promote some rotations.
1425 func hasSmallRotate(c *Config) bool {
1427 case "amd64", "386":
1434 func supportsPPC64PCRel() bool {
1435 // PCRel is currently supported for >= power10, linux only
1436 // Internal and external linking supports this on ppc64le; internal linking on ppc64.
1437 return buildcfg.GOPPC64 >= 10 && buildcfg.GOOS == "linux"
1440 func newPPC64ShiftAuxInt(sh, mb, me, sz int64) int32 {
1441 if sh < 0 || sh >= sz {
1442 panic("PPC64 shift arg sh out of range")
1444 if mb < 0 || mb >= sz {
1445 panic("PPC64 shift arg mb out of range")
1447 if me < 0 || me >= sz {
1448 panic("PPC64 shift arg me out of range")
1450 return int32(sh<<16 | mb<<8 | me)
1453 func GetPPC64Shiftsh(auxint int64) int64 {
1454 return int64(int8(auxint >> 16))
1457 func GetPPC64Shiftmb(auxint int64) int64 {
1458 return int64(int8(auxint >> 8))
1461 func GetPPC64Shiftme(auxint int64) int64 {
1462 return int64(int8(auxint))
1465 // Test if this value can encoded as a mask for a rlwinm like
1466 // operation. Masks can also extend from the msb and wrap to
1467 // the lsb too. That is, the valid masks are 32 bit strings
1468 // of the form: 0..01..10..0 or 1..10..01..1 or 1...1
1469 func isPPC64WordRotateMask(v64 int64) bool {
1470 // Isolate rightmost 1 (if none 0) and add.
1473 // Likewise, for the wrapping case.
1475 vpn := (vn & -vn) + vn
1476 return (v&vp == 0 || vn&vpn == 0) && v != 0
1479 // Compress mask and shift into single value of the form
1480 // me | mb<<8 | rotate<<16 | nbits<<24 where me and mb can
1481 // be used to regenerate the input mask.
1482 func encodePPC64RotateMask(rotate, mask, nbits int64) int64 {
1483 var mb, me, mbn, men int
1485 // Determine boundaries and then decode them
1486 if mask == 0 || ^mask == 0 || rotate >= nbits {
1487 panic(fmt.Sprintf("invalid PPC64 rotate mask: %x %d %d", uint64(mask), rotate, nbits))
1488 } else if nbits == 32 {
1489 mb = bits.LeadingZeros32(uint32(mask))
1490 me = 32 - bits.TrailingZeros32(uint32(mask))
1491 mbn = bits.LeadingZeros32(^uint32(mask))
1492 men = 32 - bits.TrailingZeros32(^uint32(mask))
1494 mb = bits.LeadingZeros64(uint64(mask))
1495 me = 64 - bits.TrailingZeros64(uint64(mask))
1496 mbn = bits.LeadingZeros64(^uint64(mask))
1497 men = 64 - bits.TrailingZeros64(^uint64(mask))
1499 // Check for a wrapping mask (e.g bits at 0 and 63)
1500 if mb == 0 && me == int(nbits) {
1501 // swap the inverted values
1505 return int64(me) | int64(mb<<8) | int64(rotate<<16) | int64(nbits<<24)
1508 // Merge (RLDICL [encoded] (SRDconst [s] x)) into (RLDICL [new_encoded] x)
1509 // SRDconst on PPC64 is an extended mnemonic of RLDICL. If the input to an
1510 // RLDICL is an SRDconst, and the RLDICL does not rotate its value, the two
1511 // operations can be combined. This functions assumes the two opcodes can
1512 // be merged, and returns an encoded rotate+mask value of the combined RLDICL.
1513 func mergePPC64RLDICLandSRDconst(encoded, s int64) int64 {
1516 // A larger mb is a smaller mask.
1517 if (encoded>>8)&0xFF < mb {
1518 encoded = (encoded &^ 0xFF00) | mb<<8
1520 // The rotate is expected to be 0.
1521 if (encoded & 0xFF0000) != 0 {
1522 panic("non-zero rotate")
1524 return encoded | r<<16
1527 // DecodePPC64RotateMask is the inverse operation of encodePPC64RotateMask. The values returned as
1528 // mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask.
1529 func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) {
1530 auxint := uint64(sauxint)
1531 rotate = int64((auxint >> 16) & 0xFF)
1532 mb = int64((auxint >> 8) & 0xFF)
1533 me = int64((auxint >> 0) & 0xFF)
1534 nbits := int64((auxint >> 24) & 0xFF)
1535 mask = ((1 << uint(nbits-mb)) - 1) ^ ((1 << uint(nbits-me)) - 1)
1540 mask = uint64(uint32(mask))
1543 // Fixup ME to match ISA definition. The second argument to MASK(..,me)
1545 me = (me - 1) & (nbits - 1)
1549 // This verifies that the mask is a set of
1550 // consecutive bits including the least
1552 func isPPC64ValidShiftMask(v int64) bool {
1553 if (v != 0) && ((v+1)&v) == 0 {
1559 func getPPC64ShiftMaskLength(v int64) int64 {
1560 return int64(bits.Len64(uint64(v)))
1563 // Decompose a shift right into an equivalent rotate/mask,
1564 // and return mask & m.
1565 func mergePPC64RShiftMask(m, s, nbits int64) int64 {
1566 smask := uint64((1<<uint(nbits))-1) >> uint(s)
1567 return m & int64(smask)
1570 // Combine (ANDconst [m] (SRWconst [s])) into (RLWINM [y]) or return 0
1571 func mergePPC64AndSrwi(m, s int64) int64 {
1572 mask := mergePPC64RShiftMask(m, s, 32)
1573 if !isPPC64WordRotateMask(mask) {
1576 return encodePPC64RotateMask((32-s)&31, mask, 32)
1579 // Test if a shift right feeding into a CLRLSLDI can be merged into RLWINM.
1580 // Return the encoded RLWINM constant, or 0 if they cannot be merged.
1581 func mergePPC64ClrlsldiSrw(sld, srw int64) int64 {
1582 mask_1 := uint64(0xFFFFFFFF >> uint(srw))
1583 // for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
1584 mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
1586 // Rewrite mask to apply after the final left shift.
1587 mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
1590 r_2 := GetPPC64Shiftsh(sld)
1591 r_3 := (r_1 + r_2) & 31 // This can wrap.
1593 if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
1596 return encodePPC64RotateMask(int64(r_3), int64(mask_3), 32)
1599 // Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM. Return
1600 // the encoded RLWINM constant, or 0 if they cannot be merged.
1601 func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 {
1602 r_1, _, _, mask_1 := DecodePPC64RotateMask(rlw)
1603 // for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
1604 mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
1606 // combine the masks, and adjust for the final left shift.
1607 mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(int64(sld)))
1608 r_2 := GetPPC64Shiftsh(int64(sld))
1609 r_3 := (r_1 + r_2) & 31 // This can wrap.
1611 // Verify the result is still a valid bitmask of <= 32 bits.
1612 if !isPPC64WordRotateMask(int64(mask_3)) || uint64(uint32(mask_3)) != mask_3 {
1615 return encodePPC64RotateMask(r_3, int64(mask_3), 32)
1618 // Compute the encoded RLWINM constant from combining (SLDconst [sld] (SRWconst [srw] x)),
1619 // or return 0 if they cannot be combined.
1620 func mergePPC64SldiSrw(sld, srw int64) int64 {
1621 if sld > srw || srw >= 32 {
1624 mask_r := uint32(0xFFFFFFFF) >> uint(srw)
1625 mask_l := uint32(0xFFFFFFFF) >> uint(sld)
1626 mask := (mask_r & mask_l) << uint(sld)
1627 return encodePPC64RotateMask((32-srw+sld)&31, int64(mask), 32)
1630 // Convenience function to rotate a 32 bit constant value by another constant.
1631 func rotateLeft32(v, rotate int64) int64 {
1632 return int64(bits.RotateLeft32(uint32(v), int(rotate)))
1635 func rotateRight64(v, rotate int64) int64 {
1636 return int64(bits.RotateLeft64(uint64(v), int(-rotate)))
1639 // encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format.
1640 func armBFAuxInt(lsb, width int64) arm64BitField {
1641 if lsb < 0 || lsb > 63 {
1642 panic("ARM(64) bit field lsb constant out of range")
1644 if width < 1 || lsb+width > 64 {
1645 panic("ARM(64) bit field width constant out of range")
1647 return arm64BitField(width | lsb<<8)
1650 // returns the lsb part of the auxInt field of arm64 bitfield ops.
1651 func (bfc arm64BitField) getARM64BFlsb() int64 {
1652 return int64(uint64(bfc) >> 8)
1655 // returns the width part of the auxInt field of arm64 bitfield ops.
1656 func (bfc arm64BitField) getARM64BFwidth() int64 {
1657 return int64(bfc) & 0xff
1660 // checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask.
1661 func isARM64BFMask(lsb, mask, rshift int64) bool {
1662 shiftedMask := int64(uint64(mask) >> uint64(rshift))
1663 return shiftedMask != 0 && isPowerOfTwo64(shiftedMask+1) && nto(shiftedMask)+lsb < 64
1666 // returns the bitfield width of mask >> rshift for arm64 bitfield ops.
1667 func arm64BFWidth(mask, rshift int64) int64 {
1668 shiftedMask := int64(uint64(mask) >> uint64(rshift))
1669 if shiftedMask == 0 {
1670 panic("ARM64 BF mask is zero")
1672 return nto(shiftedMask)
1675 // sizeof returns the size of t in bytes.
1676 // It will panic if t is not a *types.Type.
1677 func sizeof(t interface{}) int64 {
1678 return t.(*types.Type).Size()
1681 // registerizable reports whether t is a primitive type that fits in
1682 // a register. It assumes float64 values will always fit into registers
1683 // even if that isn't strictly true.
1684 func registerizable(b *Block, typ *types.Type) bool {
1685 if typ.IsPtrShaped() || typ.IsFloat() || typ.IsBoolean() {
1688 if typ.IsInteger() {
1689 return typ.Size() <= b.Func.Config.RegSize
1694 // needRaceCleanup reports whether this call to racefuncenter/exit isn't needed.
1695 func needRaceCleanup(sym *AuxCall, v *Value) bool {
1700 if !isSameCall(sym, "runtime.racefuncenter") && !isSameCall(sym, "runtime.racefuncexit") {
1703 for _, b := range f.Blocks {
1704 for _, v := range b.Values {
1706 case OpStaticCall, OpStaticLECall:
1707 // Check for racefuncenter will encounter racefuncexit and vice versa.
1708 // Allow calls to panic*
1709 s := v.Aux.(*AuxCall).Fn.String()
1711 case "runtime.racefuncenter", "runtime.racefuncexit",
1712 "runtime.panicdivide", "runtime.panicwrap",
1713 "runtime.panicshift":
1716 // If we encountered any call, we need to keep racefunc*,
1717 // for accurate stacktraces.
1719 case OpPanicBounds, OpPanicExtend:
1720 // Note: these are panic generators that are ok (like the static calls above).
1721 case OpClosureCall, OpInterCall, OpClosureLECall, OpInterLECall:
1722 // We must keep the race functions if there are any other call types.
1727 if isSameCall(sym, "runtime.racefuncenter") {
1728 // TODO REGISTER ABI this needs to be cleaned up.
1729 // If we're removing racefuncenter, remove its argument as well.
1730 if v.Args[0].Op != OpStore {
1731 if v.Op == OpStaticLECall {
1732 // there is no store, yet.
1737 mem := v.Args[0].Args[2]
1738 v.Args[0].reset(OpCopy)
1739 v.Args[0].AddArg(mem)
1744 // symIsRO reports whether sym is a read-only global.
1745 func symIsRO(sym interface{}) bool {
1746 lsym := sym.(*obj.LSym)
1747 return lsym.Type == objabi.SRODATA && len(lsym.R) == 0
1750 // symIsROZero reports whether sym is a read-only global whose data contains all zeros.
1751 func symIsROZero(sym Sym) bool {
1752 lsym := sym.(*obj.LSym)
1753 if lsym.Type != objabi.SRODATA || len(lsym.R) != 0 {
1756 for _, b := range lsym.P {
1764 // isFixed32 returns true if the int32 at offset off in symbol sym
1765 // is known and constant.
1766 func isFixed32(c *Config, sym Sym, off int64) bool {
1767 return isFixed(c, sym, off, 4)
1770 // isFixed returns true if the range [off,off+size] of the symbol sym
1771 // is known and constant.
1772 func isFixed(c *Config, sym Sym, off, size int64) bool {
1773 lsym := sym.(*obj.LSym)
1774 if lsym.Extra == nil {
1777 if _, ok := (*lsym.Extra).(*obj.TypeInfo); ok {
1778 if off == 2*c.PtrSize && size == 4 {
1779 return true // type hash field
1784 func fixed32(c *Config, sym Sym, off int64) int32 {
1785 lsym := sym.(*obj.LSym)
1786 if ti, ok := (*lsym.Extra).(*obj.TypeInfo); ok {
1787 if off == 2*c.PtrSize {
1788 return int32(types.TypeHash(ti.Type.(*types.Type)))
1791 base.Fatalf("fixed32 data not known for %s:%d", sym, off)
1795 // isFixedSym returns true if the contents of sym at the given offset
1796 // is known and is the constant address of another symbol.
1797 func isFixedSym(sym Sym, off int64) bool {
1798 lsym := sym.(*obj.LSym)
1800 case lsym.Type == objabi.SRODATA:
1801 // itabs, dictionaries
1805 for _, r := range lsym.R {
1806 if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off && r.Add == 0 {
1812 func fixedSym(f *Func, sym Sym, off int64) Sym {
1813 lsym := sym.(*obj.LSym)
1814 for _, r := range lsym.R {
1815 if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off {
1816 if strings.HasPrefix(r.Sym.Name, "type:") {
1817 // In case we're loading a type out of a dictionary, we need to record
1818 // that the containing function might put that type in an interface.
1819 // That information is currently recorded in relocations in the dictionary,
1820 // but if we perform this load at compile time then the dictionary
1822 reflectdata.MarkTypeSymUsedInInterface(r.Sym, f.fe.Func().Linksym())
1823 } else if strings.HasPrefix(r.Sym.Name, "go:itab") {
1824 // Same, but if we're using an itab we need to record that the
1825 // itab._type might be put in an interface.
1826 reflectdata.MarkTypeSymUsedInInterface(r.Sym, f.fe.Func().Linksym())
1831 base.Fatalf("fixedSym data not known for %s:%d", sym, off)
1835 // read8 reads one byte from the read-only global sym at offset off.
1836 func read8(sym interface{}, off int64) uint8 {
1837 lsym := sym.(*obj.LSym)
1838 if off >= int64(len(lsym.P)) || off < 0 {
1839 // Invalid index into the global sym.
1840 // This can happen in dead code, so we don't want to panic.
1841 // Just return any value, it will eventually get ignored.
1848 // read16 reads two bytes from the read-only global sym at offset off.
1849 func read16(sym interface{}, off int64, byteorder binary.ByteOrder) uint16 {
1850 lsym := sym.(*obj.LSym)
1851 // lsym.P is written lazily.
1852 // Bytes requested after the end of lsym.P are 0.
1854 if 0 <= off && off < int64(len(lsym.P)) {
1857 buf := make([]byte, 2)
1859 return byteorder.Uint16(buf)
1862 // read32 reads four bytes from the read-only global sym at offset off.
1863 func read32(sym interface{}, off int64, byteorder binary.ByteOrder) uint32 {
1864 lsym := sym.(*obj.LSym)
1866 if 0 <= off && off < int64(len(lsym.P)) {
1869 buf := make([]byte, 4)
1871 return byteorder.Uint32(buf)
1874 // read64 reads eight bytes from the read-only global sym at offset off.
1875 func read64(sym interface{}, off int64, byteorder binary.ByteOrder) uint64 {
1876 lsym := sym.(*obj.LSym)
1878 if 0 <= off && off < int64(len(lsym.P)) {
1881 buf := make([]byte, 8)
1883 return byteorder.Uint64(buf)
1886 // sequentialAddresses reports true if it can prove that x + n == y
1887 func sequentialAddresses(x, y *Value, n int64) bool {
1888 if x == y && n == 0 {
1891 if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil &&
1892 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1893 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1896 if x.Op == Op386LEAL1 && y.Op == Op386LEAL1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
1897 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1898 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1901 if x.Op == OpAMD64ADDQ && y.Op == OpAMD64LEAQ1 && y.AuxInt == n && y.Aux == nil &&
1902 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1903 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1906 if x.Op == OpAMD64LEAQ1 && y.Op == OpAMD64LEAQ1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
1907 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1908 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1914 // flagConstant represents the result of a compile-time comparison.
1915 // The sense of these flags does not necessarily represent the hardware's notion
1916 // of a flags register - these are just a compile-time construct.
1917 // We happen to match the semantics to those of arm/arm64.
1918 // Note that these semantics differ from x86: the carry flag has the opposite
1919 // sense on a subtraction!
1921 // On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C.
1922 // On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C.
1923 // (because it does x + ^y + C).
1925 // See https://en.wikipedia.org/wiki/Carry_flag#Vs._borrow_flag
1926 type flagConstant uint8
1928 // N reports whether the result of an operation is negative (high bit set).
1929 func (fc flagConstant) N() bool {
1933 // Z reports whether the result of an operation is 0.
1934 func (fc flagConstant) Z() bool {
1938 // C reports whether an unsigned add overflowed (carry), or an
1939 // unsigned subtract did not underflow (borrow).
1940 func (fc flagConstant) C() bool {
1944 // V reports whether a signed operation overflowed or underflowed.
1945 func (fc flagConstant) V() bool {
1949 func (fc flagConstant) eq() bool {
1952 func (fc flagConstant) ne() bool {
1955 func (fc flagConstant) lt() bool {
1956 return fc.N() != fc.V()
1958 func (fc flagConstant) le() bool {
1959 return fc.Z() || fc.lt()
1961 func (fc flagConstant) gt() bool {
1962 return !fc.Z() && fc.ge()
1964 func (fc flagConstant) ge() bool {
1965 return fc.N() == fc.V()
1967 func (fc flagConstant) ult() bool {
1970 func (fc flagConstant) ule() bool {
1971 return fc.Z() || fc.ult()
1973 func (fc flagConstant) ugt() bool {
1974 return !fc.Z() && fc.uge()
1976 func (fc flagConstant) uge() bool {
1980 func (fc flagConstant) ltNoov() bool {
1981 return fc.lt() && !fc.V()
1983 func (fc flagConstant) leNoov() bool {
1984 return fc.le() && !fc.V()
1986 func (fc flagConstant) gtNoov() bool {
1987 return fc.gt() && !fc.V()
1989 func (fc flagConstant) geNoov() bool {
1990 return fc.ge() && !fc.V()
1993 func (fc flagConstant) String() string {
1994 return fmt.Sprintf("N=%v,Z=%v,C=%v,V=%v", fc.N(), fc.Z(), fc.C(), fc.V())
1997 type flagConstantBuilder struct {
2004 func (fcs flagConstantBuilder) encode() flagConstant {
2021 // Note: addFlags(x,y) != subFlags(x,-y) in some situations:
2022 // - the results of the C flag are different
2023 // - the results of the V flag when y==minint are different
2025 // addFlags64 returns the flags that would be set from computing x+y.
2026 func addFlags64(x, y int64) flagConstant {
2027 var fcb flagConstantBuilder
2030 fcb.C = uint64(x+y) < uint64(x)
2031 fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
2035 // subFlags64 returns the flags that would be set from computing x-y.
2036 func subFlags64(x, y int64) flagConstant {
2037 var fcb flagConstantBuilder
2040 fcb.C = uint64(y) <= uint64(x) // This code follows the arm carry flag model.
2041 fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
2045 // addFlags32 returns the flags that would be set from computing x+y.
2046 func addFlags32(x, y int32) flagConstant {
2047 var fcb flagConstantBuilder
2050 fcb.C = uint32(x+y) < uint32(x)
2051 fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
2055 // subFlags32 returns the flags that would be set from computing x-y.
2056 func subFlags32(x, y int32) flagConstant {
2057 var fcb flagConstantBuilder
2060 fcb.C = uint32(y) <= uint32(x) // This code follows the arm carry flag model.
2061 fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
2065 // logicFlags64 returns flags set to the sign/zeroness of x.
2066 // C and V are set to false.
2067 func logicFlags64(x int64) flagConstant {
2068 var fcb flagConstantBuilder
2074 // logicFlags32 returns flags set to the sign/zeroness of x.
2075 // C and V are set to false.
2076 func logicFlags32(x int32) flagConstant {
2077 var fcb flagConstantBuilder
2083 func makeJumpTableSym(b *Block) *obj.LSym {
2084 s := base.Ctxt.Lookup(fmt.Sprintf("%s.jump%d", b.Func.fe.Func().LSym.Name, b.ID))
2085 s.Set(obj.AttrDuplicateOK, true)
2086 s.Set(obj.AttrLocal, true)
2090 // canRotate reports whether the architecture supports
2091 // rotates of integer registers with the given number of bits.
2092 func canRotate(c *Config, bits int64) bool {
2093 if bits > c.PtrSize*8 {
2094 // Don't rewrite to rotates bigger than the machine word.
2098 case "386", "amd64", "arm64":
2100 case "arm", "s390x", "ppc64", "ppc64le", "wasm", "loong64":
2107 // isARM64bitcon reports whether a constant can be encoded into a logical instruction.
2108 func isARM64bitcon(x uint64) bool {
2109 if x == 1<<64-1 || x == 0 {
2112 // determine the period and sign-extend a unit to 64 bits
2114 case x != x>>32|x<<32:
2117 case x != x>>16|x<<48:
2119 x = uint64(int64(int32(x)))
2120 case x != x>>8|x<<56:
2122 x = uint64(int64(int16(x)))
2123 case x != x>>4|x<<60:
2125 x = uint64(int64(int8(x)))
2127 // period is 4 or 2, always true
2128 // 0001, 0010, 0100, 1000 -- 0001 rotate
2129 // 0011, 0110, 1100, 1001 -- 0011 rotate
2130 // 0111, 1011, 1101, 1110 -- 0111 rotate
2131 // 0101, 1010 -- 01 rotate, repeat
2134 return sequenceOfOnes(x) || sequenceOfOnes(^x)
2137 // sequenceOfOnes tests whether a constant is a sequence of ones in binary, with leading and trailing zeros.
2138 func sequenceOfOnes(x uint64) bool {
2139 y := x & -x // lowest set bit of x. x is good iff x+y is a power of 2
2144 // isARM64addcon reports whether x can be encoded as the immediate value in an ADD or SUB instruction.
2145 func isARM64addcon(v int64) bool {
2146 /* uimm12 or uimm24? */
2150 if (v & 0xFFF) == 0 {
2156 // setPos sets the position of v to pos, then returns true.
2157 // Useful for setting the result of a rewrite's position to
2158 // something other than the default.
2159 func setPos(v *Value, pos src.XPos) bool {