1 // Copyright 2015 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
8 "cmd/compile/internal/base"
9 "cmd/compile/internal/logopt"
10 "cmd/compile/internal/types"
12 "cmd/internal/obj/s390x"
25 type deadValueChoice bool
28 leaveDeadValues deadValueChoice = false
29 removeDeadValues = true
32 // deadcode indicates whether rewrite should try to remove any values that become dead.
33 func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValueChoice) {
34 // repeat rewrites until we find no more rewrites
35 pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block
39 fmt.Printf("%s: rewriting for %s\n", f.pass.name, f.Name)
42 var states map[string]bool
46 for _, b := range f.Blocks {
51 b0.Succs = append([]Edge{}, b.Succs...) // make a new copy, not aliasing
53 for i, c := range b.ControlValues() {
56 b.ReplaceControl(i, c)
62 fmt.Printf("rewriting %s -> %s\n", b0.LongString(), b.LongString())
65 for j, v := range b.Values {
70 v0.Args = append([]*Value{}, v.Args...) // make a new copy, not aliasing
72 if v.Uses == 0 && v.removeable() {
73 if v.Op != OpInvalid && deadcode == removeDeadValues {
74 // Reset any values that are now unused, so that we decrement
75 // the use count of all of its arguments.
76 // Not quite a deadcode pass, because it does not handle cycles.
77 // But it should help Uses==1 rules to fire.
81 // No point rewriting values which aren't used.
85 vchange := phielimValue(v)
86 if vchange && debug > 1 {
87 fmt.Printf("rewriting %s -> %s\n", v0.LongString(), v.LongString())
90 // Eliminate copy inputs.
91 // If any copy input becomes unused, mark it
92 // as invalid and discard its argument. Repeat
93 // recursively on the discarded argument.
94 // This phase helps remove phantom "dead copy" uses
95 // of a value so that a x.Uses==1 rule condition
97 for i, a := range v.Args {
103 // If a, a copy, has a line boundary indicator, attempt to find a new value
104 // to hold it. The first candidate is the value that will replace a (aa),
105 // if it shares the same block and line and is eligible.
106 // The second option is v, which has a as an input. Because aa is earlier in
107 // the data flow, it is the better choice.
108 if a.Pos.IsStmt() == src.PosIsStmt {
109 if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt {
110 aa.Pos = aa.Pos.WithIsStmt()
111 } else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt {
112 v.Pos = v.Pos.WithIsStmt()
114 // Record the lost line and look for a new home after all rewrites are complete.
115 // TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same
116 // line to appear in more than one block, but only one block is stored, so if both end
117 // up here, then one will be lost.
118 pendingLines.set(a.Pos, int32(a.Block.ID))
120 a.Pos = a.Pos.WithNotStmt()
129 if vchange && debug > 1 {
130 fmt.Printf("rewriting %s -> %s\n", v0.LongString(), v.LongString())
133 // apply rewrite function
136 // If value changed to a poor choice for a statement boundary, move the boundary
137 if v.Pos.IsStmt() == src.PosIsStmt {
138 if k := nextGoodStatementIndex(v, j, b); k != j {
139 v.Pos = v.Pos.WithNotStmt()
140 b.Values[k].Pos = b.Values[k].Pos.WithIsStmt()
145 change = change || vchange
146 if vchange && debug > 1 {
147 fmt.Printf("rewriting %s -> %s\n", v0.LongString(), v.LongString())
151 if !change && !deadChange {
155 if (iters > 1000 || debug >= 2) && change {
156 // We've done a suspiciously large number of rewrites (or we're in debug mode).
157 // As of Sep 2021, 90% of rewrites complete in 4 iterations or fewer
158 // and the maximum value encountered during make.bash is 12.
159 // Start checking for cycles. (This is too expensive to do routinely.)
160 // Note: we avoid this path for deadChange-only iterations, to fix #51639.
162 states = make(map[string]bool)
165 if _, ok := states[h]; ok {
166 // We've found a cycle.
167 // To diagnose it, set debug to 2 and start again,
168 // so that we'll print all rules applied until we complete another cycle.
169 // If debug is already >= 2, we've already done that, so it's time to crash.
172 states = make(map[string]bool)
174 f.Fatalf("rewrite cycle detected")
180 // remove clobbered values
181 for _, b := range f.Blocks {
183 for i, v := range b.Values {
185 if v.Op == OpInvalid {
186 if v.Pos.IsStmt() == src.PosIsStmt {
187 pendingLines.set(vl, int32(b.ID))
192 if v.Pos.IsStmt() != src.PosNotStmt && !notStmtBoundary(v.Op) && pendingLines.get(vl) == int32(b.ID) {
193 pendingLines.remove(vl)
194 v.Pos = v.Pos.WithIsStmt()
201 if pendingLines.get(b.Pos) == int32(b.ID) {
202 b.Pos = b.Pos.WithIsStmt()
203 pendingLines.remove(b.Pos)
209 // Common functions called from rewriting rules
211 func is64BitFloat(t *types.Type) bool {
212 return t.Size() == 8 && t.IsFloat()
215 func is32BitFloat(t *types.Type) bool {
216 return t.Size() == 4 && t.IsFloat()
219 func is64BitInt(t *types.Type) bool {
220 return t.Size() == 8 && t.IsInteger()
223 func is32BitInt(t *types.Type) bool {
224 return t.Size() == 4 && t.IsInteger()
227 func is16BitInt(t *types.Type) bool {
228 return t.Size() == 2 && t.IsInteger()
231 func is8BitInt(t *types.Type) bool {
232 return t.Size() == 1 && t.IsInteger()
235 func isPtr(t *types.Type) bool {
236 return t.IsPtrShaped()
239 // mergeSym merges two symbolic offsets. There is no real merging of
240 // offsets, we just pick the non-nil one.
241 func mergeSym(x, y Sym) Sym {
248 panic(fmt.Sprintf("mergeSym with two non-nil syms %v %v", x, y))
251 func canMergeSym(x, y Sym) bool {
252 return x == nil || y == nil
255 // canMergeLoadClobber reports whether the load can be merged into target without
256 // invalidating the schedule.
257 // It also checks that the other non-load argument x is something we
258 // are ok with clobbering.
259 func canMergeLoadClobber(target, load, x *Value) bool {
260 // The register containing x is going to get clobbered.
261 // Don't merge if we still need the value of x.
262 // We don't have liveness information here, but we can
263 // approximate x dying with:
264 // 1) target is x's only use.
265 // 2) target is not in a deeper loop than x.
269 loopnest := x.Block.Func.loopnest()
270 loopnest.calculateDepths()
271 if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
274 return canMergeLoad(target, load)
277 // canMergeLoad reports whether the load can be merged into target without
278 // invalidating the schedule.
279 func canMergeLoad(target, load *Value) bool {
280 if target.Block.ID != load.Block.ID {
281 // If the load is in a different block do not merge it.
285 // We can't merge the load into the target if the load
286 // has more than one use.
291 mem := load.MemoryArg()
293 // We need the load's memory arg to still be alive at target. That
294 // can't be the case if one of target's args depends on a memory
295 // state that is a successor of load's memory arg.
297 // For example, it would be invalid to merge load into target in
298 // the following situation because newmem has killed oldmem
299 // before target is reached:
300 // load = read ... oldmem
301 // newmem = write ... oldmem
302 // arg0 = read ... newmem
303 // target = add arg0 load
305 // If the argument comes from a different block then we can exclude
306 // it immediately because it must dominate load (which is in the
307 // same block as target).
309 for _, a := range target.Args {
310 if a != load && a.Block.ID == target.Block.ID {
311 args = append(args, a)
315 // memPreds contains memory states known to be predecessors of load's
316 // memory state. It is lazily initialized.
317 var memPreds map[*Value]bool
318 for i := 0; len(args) > 0; i++ {
321 // Give up if we have done a lot of iterations.
324 v := args[len(args)-1]
325 args = args[:len(args)-1]
326 if target.Block.ID != v.Block.ID {
327 // Since target and load are in the same block
328 // we can stop searching when we leave the block.
332 // A Phi implies we have reached the top of the block.
333 // The memory phi, if it exists, is always
334 // the first logical store in the block.
337 if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
338 // We could handle this situation however it is likely
342 if v.Op.SymEffect()&SymAddr != 0 {
343 // This case prevents an operation that calculates the
344 // address of a local variable from being forced to schedule
345 // before its corresponding VarDef.
351 // We don't want to combine the CMPQ with the load, because
352 // that would force the CMPQ to schedule before the VARDEF, which
353 // in turn requires the LEAQ to schedule before the VARDEF.
356 if v.Type.IsMemory() {
358 // Initialise a map containing memory states
359 // known to be predecessors of load's memory
361 memPreds = make(map[*Value]bool)
364 for i := 0; i < limit; i++ {
366 // The memory phi, if it exists, is always
367 // the first logical store in the block.
370 if m.Block.ID != target.Block.ID {
373 if !m.Type.IsMemory() {
377 if len(m.Args) == 0 {
384 // We can merge if v is a predecessor of mem.
386 // For example, we can merge load into target in the
387 // following scenario:
390 // load = read ... mem
391 // target = add x load
397 if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
398 // If v takes mem as an input then we know mem
399 // is valid at this point.
402 for _, a := range v.Args {
403 if target.Block.ID == a.Block.ID {
404 args = append(args, a)
412 // isSameCall reports whether sym is the same as the given named symbol.
413 func isSameCall(sym interface{}, name string) bool {
414 fn := sym.(*AuxCall).Fn
415 return fn != nil && fn.String() == name
418 // canLoadUnaligned reports if the architecture supports unaligned load operations.
419 func canLoadUnaligned(c *Config) bool {
420 return c.ctxt.Arch.Alignment == 1
423 // nlzX returns the number of leading zeros.
424 func nlz64(x int64) int { return bits.LeadingZeros64(uint64(x)) }
425 func nlz32(x int32) int { return bits.LeadingZeros32(uint32(x)) }
426 func nlz16(x int16) int { return bits.LeadingZeros16(uint16(x)) }
427 func nlz8(x int8) int { return bits.LeadingZeros8(uint8(x)) }
429 // ntzX returns the number of trailing zeros.
430 func ntz64(x int64) int { return bits.TrailingZeros64(uint64(x)) }
431 func ntz32(x int32) int { return bits.TrailingZeros32(uint32(x)) }
432 func ntz16(x int16) int { return bits.TrailingZeros16(uint16(x)) }
433 func ntz8(x int8) int { return bits.TrailingZeros8(uint8(x)) }
435 func oneBit(x int64) bool { return x&(x-1) == 0 && x != 0 }
436 func oneBit8(x int8) bool { return x&(x-1) == 0 && x != 0 }
437 func oneBit16(x int16) bool { return x&(x-1) == 0 && x != 0 }
438 func oneBit32(x int32) bool { return x&(x-1) == 0 && x != 0 }
439 func oneBit64(x int64) bool { return x&(x-1) == 0 && x != 0 }
441 // nto returns the number of trailing ones.
442 func nto(x int64) int64 {
443 return int64(ntz64(^x))
446 // logX returns logarithm of n base 2.
447 // n must be a positive power of 2 (isPowerOfTwoX returns true).
448 func log8(n int8) int64 {
449 return int64(bits.Len8(uint8(n))) - 1
451 func log16(n int16) int64 {
452 return int64(bits.Len16(uint16(n))) - 1
454 func log32(n int32) int64 {
455 return int64(bits.Len32(uint32(n))) - 1
457 func log64(n int64) int64 {
458 return int64(bits.Len64(uint64(n))) - 1
461 // log2uint32 returns logarithm in base 2 of uint32(n), with log2(0) = -1.
463 func log2uint32(n int64) int64 {
464 return int64(bits.Len32(uint32(n))) - 1
467 // isPowerOfTwoX functions report whether n is a power of 2.
468 func isPowerOfTwo8(n int8) bool {
469 return n > 0 && n&(n-1) == 0
471 func isPowerOfTwo16(n int16) bool {
472 return n > 0 && n&(n-1) == 0
474 func isPowerOfTwo32(n int32) bool {
475 return n > 0 && n&(n-1) == 0
477 func isPowerOfTwo64(n int64) bool {
478 return n > 0 && n&(n-1) == 0
481 // isUint64PowerOfTwo reports whether uint64(n) is a power of 2.
482 func isUint64PowerOfTwo(in int64) bool {
484 return n > 0 && n&(n-1) == 0
487 // isUint32PowerOfTwo reports whether uint32(n) is a power of 2.
488 func isUint32PowerOfTwo(in int64) bool {
489 n := uint64(uint32(in))
490 return n > 0 && n&(n-1) == 0
493 // is32Bit reports whether n can be represented as a signed 32 bit integer.
494 func is32Bit(n int64) bool {
495 return n == int64(int32(n))
498 // is16Bit reports whether n can be represented as a signed 16 bit integer.
499 func is16Bit(n int64) bool {
500 return n == int64(int16(n))
503 // is8Bit reports whether n can be represented as a signed 8 bit integer.
504 func is8Bit(n int64) bool {
505 return n == int64(int8(n))
508 // isU8Bit reports whether n can be represented as an unsigned 8 bit integer.
509 func isU8Bit(n int64) bool {
510 return n == int64(uint8(n))
513 // isU12Bit reports whether n can be represented as an unsigned 12 bit integer.
514 func isU12Bit(n int64) bool {
515 return 0 <= n && n < (1<<12)
518 // isU16Bit reports whether n can be represented as an unsigned 16 bit integer.
519 func isU16Bit(n int64) bool {
520 return n == int64(uint16(n))
523 // isU32Bit reports whether n can be represented as an unsigned 32 bit integer.
524 func isU32Bit(n int64) bool {
525 return n == int64(uint32(n))
528 // is20Bit reports whether n can be represented as a signed 20 bit integer.
529 func is20Bit(n int64) bool {
530 return -(1<<19) <= n && n < (1<<19)
533 // b2i translates a boolean value to 0 or 1 for assigning to auxInt.
534 func b2i(b bool) int64 {
541 // b2i32 translates a boolean value to 0 or 1.
542 func b2i32(b bool) int32 {
549 // shiftIsBounded reports whether (left/right) shift Value v is known to be bounded.
550 // A shift is bounded if it is shifting by less than the width of the shifted value.
551 func shiftIsBounded(v *Value) bool {
555 // canonLessThan returns whether x is "ordered" less than y, for purposes of normalizing
556 // generated code as much as possible.
557 func canonLessThan(x, y *Value) bool {
561 if !x.Pos.SameFileAndLine(y.Pos) {
562 return x.Pos.Before(y.Pos)
567 // truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern
568 // of the mantissa. It will panic if the truncation results in lost information.
569 func truncate64Fto32F(f float64) float32 {
570 if !isExactFloat32(f) {
571 panic("truncate64Fto32F: truncation is not exact")
576 // NaN bit patterns aren't necessarily preserved across conversion
577 // instructions so we need to do the conversion manually.
578 b := math.Float64bits(f)
579 m := b & ((1 << 52) - 1) // mantissa (a.k.a. significand)
580 // | sign | exponent | mantissa |
581 r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23)))
582 return math.Float32frombits(r)
585 // extend32Fto64F converts a float32 value to a float64 value preserving the bit
586 // pattern of the mantissa.
587 func extend32Fto64F(f float32) float64 {
588 if !math.IsNaN(float64(f)) {
591 // NaN bit patterns aren't necessarily preserved across conversion
592 // instructions so we need to do the conversion manually.
593 b := uint64(math.Float32bits(f))
594 // | sign | exponent | mantissa |
595 r := ((b << 32) & (1 << 63)) | (0x7ff << 52) | ((b & 0x7fffff) << (52 - 23))
596 return math.Float64frombits(r)
599 // DivisionNeedsFixUp reports whether the division needs fix-up code.
600 func DivisionNeedsFixUp(v *Value) bool {
604 // auxFrom64F encodes a float64 value so it can be stored in an AuxInt.
605 func auxFrom64F(f float64) int64 {
607 panic("can't encode a NaN in AuxInt field")
609 return int64(math.Float64bits(f))
612 // auxFrom32F encodes a float32 value so it can be stored in an AuxInt.
613 func auxFrom32F(f float32) int64 {
615 panic("can't encode a NaN in AuxInt field")
617 return int64(math.Float64bits(extend32Fto64F(f)))
620 // auxTo32F decodes a float32 from the AuxInt value provided.
621 func auxTo32F(i int64) float32 {
622 return truncate64Fto32F(math.Float64frombits(uint64(i)))
625 // auxTo64F decodes a float64 from the AuxInt value provided.
626 func auxTo64F(i int64) float64 {
627 return math.Float64frombits(uint64(i))
630 func auxIntToBool(i int64) bool {
636 func auxIntToInt8(i int64) int8 {
639 func auxIntToInt16(i int64) int16 {
642 func auxIntToInt32(i int64) int32 {
645 func auxIntToInt64(i int64) int64 {
648 func auxIntToUint8(i int64) uint8 {
651 func auxIntToFloat32(i int64) float32 {
652 return float32(math.Float64frombits(uint64(i)))
654 func auxIntToFloat64(i int64) float64 {
655 return math.Float64frombits(uint64(i))
657 func auxIntToValAndOff(i int64) ValAndOff {
660 func auxIntToArm64BitField(i int64) arm64BitField {
661 return arm64BitField(i)
663 func auxIntToInt128(x int64) int128 {
665 panic("nonzero int128 not allowed")
669 func auxIntToFlagConstant(x int64) flagConstant {
670 return flagConstant(x)
673 func auxIntToOp(cc int64) Op {
677 func boolToAuxInt(b bool) int64 {
683 func int8ToAuxInt(i int8) int64 {
686 func int16ToAuxInt(i int16) int64 {
689 func int32ToAuxInt(i int32) int64 {
692 func int64ToAuxInt(i int64) int64 {
695 func uint8ToAuxInt(i uint8) int64 {
696 return int64(int8(i))
698 func float32ToAuxInt(f float32) int64 {
699 return int64(math.Float64bits(float64(f)))
701 func float64ToAuxInt(f float64) int64 {
702 return int64(math.Float64bits(f))
704 func valAndOffToAuxInt(v ValAndOff) int64 {
707 func arm64BitFieldToAuxInt(v arm64BitField) int64 {
710 func int128ToAuxInt(x int128) int64 {
712 panic("nonzero int128 not allowed")
716 func flagConstantToAuxInt(x flagConstant) int64 {
720 func opToAuxInt(o Op) int64 {
724 // Aux is an interface to hold miscellaneous data in Blocks and Values.
729 // for now only used to mark moves that need to avoid clobbering flags
732 func (auxMark) CanBeAnSSAAux() {}
736 // stringAux wraps string values for use in Aux.
737 type stringAux string
739 func (stringAux) CanBeAnSSAAux() {}
741 func auxToString(i Aux) string {
742 return string(i.(stringAux))
744 func auxToSym(i Aux) Sym {
745 // TODO: kind of a hack - allows nil interface through
749 func auxToType(i Aux) *types.Type {
750 return i.(*types.Type)
752 func auxToCall(i Aux) *AuxCall {
755 func auxToS390xCCMask(i Aux) s390x.CCMask {
756 return i.(s390x.CCMask)
758 func auxToS390xRotateParams(i Aux) s390x.RotateParams {
759 return i.(s390x.RotateParams)
762 func StringToAux(s string) Aux {
765 func symToAux(s Sym) Aux {
768 func callToAux(s *AuxCall) Aux {
771 func typeToAux(t *types.Type) Aux {
774 func s390xCCMaskToAux(c s390x.CCMask) Aux {
777 func s390xRotateParamsToAux(r s390x.RotateParams) Aux {
781 // uaddOvf reports whether unsigned a+b would overflow.
782 func uaddOvf(a, b int64) bool {
783 return uint64(a)+uint64(b) < uint64(a)
786 // loadLSymOffset simulates reading a word at an offset into a
787 // read-only symbol's runtime memory. If it would read a pointer to
788 // another symbol, that symbol is returned. Otherwise, it returns nil.
789 func loadLSymOffset(lsym *obj.LSym, offset int64) *obj.LSym {
790 if lsym.Type != objabi.SRODATA {
794 for _, r := range lsym.R {
795 if int64(r.Off) == offset && r.Type&^objabi.R_WEAK == objabi.R_ADDR && r.Add == 0 {
803 // de-virtualize an InterLECall
804 // 'sym' is the symbol for the itab.
805 func devirtLESym(v *Value, aux Aux, sym Sym, offset int64) *obj.LSym {
806 n, ok := sym.(*obj.LSym)
811 lsym := loadLSymOffset(n, offset)
812 if f := v.Block.Func; f.pass.debug > 0 {
814 f.Warnl(v.Pos, "de-virtualizing call")
816 f.Warnl(v.Pos, "couldn't de-virtualize call")
822 func devirtLECall(v *Value, sym *obj.LSym) *Value {
823 v.Op = OpStaticLECall
824 auxcall := v.Aux.(*AuxCall)
828 copy(v.Args[0:], v.Args[1:])
829 v.Args[len(v.Args)-1] = nil // aid GC
830 v.Args = v.Args[:len(v.Args)-1]
834 // isSamePtr reports whether p1 and p2 point to the same address.
835 func isSamePtr(p1, p2 *Value) bool {
844 return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
845 case OpAddr, OpLocalAddr:
846 return p1.Aux == p2.Aux
848 return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
853 func isStackPtr(v *Value) bool {
854 for v.Op == OpOffPtr || v.Op == OpAddPtr {
857 return v.Op == OpSP || v.Op == OpLocalAddr
860 // disjoint reports whether the memory region specified by [p1:p1+n1)
861 // does not overlap with [p2:p2+n2).
862 // A return value of false does not imply the regions overlap.
863 func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool {
864 if n1 == 0 || n2 == 0 {
870 baseAndOffset := func(ptr *Value) (base *Value, offset int64) {
871 base, offset = ptr, 0
872 for base.Op == OpOffPtr {
873 offset += base.AuxInt
878 p1, off1 := baseAndOffset(p1)
879 p2, off2 := baseAndOffset(p2)
880 if isSamePtr(p1, p2) {
881 return !overlap(off1, n1, off2, n2)
883 // p1 and p2 are not the same, so if they are both OpAddrs then
884 // they point to different variables.
885 // If one pointer is on the stack and the other is an argument
886 // then they can't overlap.
888 case OpAddr, OpLocalAddr:
889 if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP {
892 return (p2.Op == OpArg || p2.Op == OpArgIntReg) && p1.Args[0].Op == OpSP
893 case OpArg, OpArgIntReg:
894 if p2.Op == OpSP || p2.Op == OpLocalAddr {
898 return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpArgIntReg || p2.Op == OpSP
903 // moveSize returns the number of bytes an aligned MOV instruction moves.
904 func moveSize(align int64, c *Config) int64 {
906 case align%8 == 0 && c.PtrSize == 8:
916 // mergePoint finds a block among a's blocks which dominates b and is itself
917 // dominated by all of a's blocks. Returns nil if it can't find one.
918 // Might return nil even if one does exist.
919 func mergePoint(b *Block, a ...*Value) *Block {
920 // Walk backward from b looking for one of the a's blocks.
926 for _, x := range a {
931 if len(b.Preds) > 1 {
932 // Don't know which way to go back. Abort.
938 return nil // too far away
940 // At this point, r is the first value in a that we find by walking backwards.
941 // if we return anything, r will be it.
944 // Keep going, counting the other a's that we find. They must all dominate r.
947 for _, x := range a {
953 // Found all of a in a backwards walk. We can return r.
956 if len(b.Preds) > 1 {
963 return nil // too far away
966 // clobber invalidates values. Returns true.
967 // clobber is used by rewrite rules to:
969 // A) make sure the values are really dead and never used again.
970 // B) decrement use counts of the values' args.
971 func clobber(vv ...*Value) bool {
972 for _, v := range vv {
974 // Note: leave v.Block intact. The Block field is used after clobber.
979 // clobberIfDead resets v when use count is 1. Returns true.
980 // clobberIfDead is used by rewrite rules to decrement
981 // use counts of v's args when v is dead and never used.
982 func clobberIfDead(v *Value) bool {
986 // Note: leave v.Block intact. The Block field is used after clobberIfDead.
990 // noteRule is an easy way to track if a rule is matched when writing
991 // new ones. Make the rule of interest also conditional on
993 // noteRule("note to self: rule of interest matched")
995 // and that message will print when the rule matches.
996 func noteRule(s string) bool {
1001 // countRule increments Func.ruleMatches[key].
1002 // If Func.ruleMatches is non-nil at the end
1003 // of compilation, it will be printed to stdout.
1004 // This is intended to make it easier to find which functions
1005 // which contain lots of rules matches when developing new rules.
1006 func countRule(v *Value, key string) bool {
1008 if f.ruleMatches == nil {
1009 f.ruleMatches = make(map[string]int)
1011 f.ruleMatches[key]++
1015 // warnRule generates compiler debug output with string s when
1016 // v is not in autogenerated code, cond is true and the rule has fired.
1017 func warnRule(cond bool, v *Value, s string) bool {
1018 if pos := v.Pos; pos.Line() > 1 && cond {
1019 v.Block.Func.Warnl(pos, s)
1024 // for a pseudo-op like (LessThan x), extract x.
1025 func flagArg(v *Value) *Value {
1026 if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() {
1032 // arm64Negate finds the complement to an ARM64 condition code,
1033 // for example !Equal -> NotEqual or !LessThan -> GreaterEqual
1035 // For floating point, it's more subtle because NaN is unordered. We do
1036 // !LessThanF -> NotLessThanF, the latter takes care of NaNs.
1037 func arm64Negate(op Op) Op {
1039 case OpARM64LessThan:
1040 return OpARM64GreaterEqual
1041 case OpARM64LessThanU:
1042 return OpARM64GreaterEqualU
1043 case OpARM64GreaterThan:
1044 return OpARM64LessEqual
1045 case OpARM64GreaterThanU:
1046 return OpARM64LessEqualU
1047 case OpARM64LessEqual:
1048 return OpARM64GreaterThan
1049 case OpARM64LessEqualU:
1050 return OpARM64GreaterThanU
1051 case OpARM64GreaterEqual:
1052 return OpARM64LessThan
1053 case OpARM64GreaterEqualU:
1054 return OpARM64LessThanU
1056 return OpARM64NotEqual
1057 case OpARM64NotEqual:
1059 case OpARM64LessThanF:
1060 return OpARM64NotLessThanF
1061 case OpARM64NotLessThanF:
1062 return OpARM64LessThanF
1063 case OpARM64LessEqualF:
1064 return OpARM64NotLessEqualF
1065 case OpARM64NotLessEqualF:
1066 return OpARM64LessEqualF
1067 case OpARM64GreaterThanF:
1068 return OpARM64NotGreaterThanF
1069 case OpARM64NotGreaterThanF:
1070 return OpARM64GreaterThanF
1071 case OpARM64GreaterEqualF:
1072 return OpARM64NotGreaterEqualF
1073 case OpARM64NotGreaterEqualF:
1074 return OpARM64GreaterEqualF
1076 panic("unreachable")
1080 // arm64Invert evaluates (InvertFlags op), which
1081 // is the same as altering the condition codes such
1082 // that the same result would be produced if the arguments
1083 // to the flag-generating instruction were reversed, e.g.
1084 // (InvertFlags (CMP x y)) -> (CMP y x)
1085 func arm64Invert(op Op) Op {
1087 case OpARM64LessThan:
1088 return OpARM64GreaterThan
1089 case OpARM64LessThanU:
1090 return OpARM64GreaterThanU
1091 case OpARM64GreaterThan:
1092 return OpARM64LessThan
1093 case OpARM64GreaterThanU:
1094 return OpARM64LessThanU
1095 case OpARM64LessEqual:
1096 return OpARM64GreaterEqual
1097 case OpARM64LessEqualU:
1098 return OpARM64GreaterEqualU
1099 case OpARM64GreaterEqual:
1100 return OpARM64LessEqual
1101 case OpARM64GreaterEqualU:
1102 return OpARM64LessEqualU
1103 case OpARM64Equal, OpARM64NotEqual:
1105 case OpARM64LessThanF:
1106 return OpARM64GreaterThanF
1107 case OpARM64GreaterThanF:
1108 return OpARM64LessThanF
1109 case OpARM64LessEqualF:
1110 return OpARM64GreaterEqualF
1111 case OpARM64GreaterEqualF:
1112 return OpARM64LessEqualF
1113 case OpARM64NotLessThanF:
1114 return OpARM64NotGreaterThanF
1115 case OpARM64NotGreaterThanF:
1116 return OpARM64NotLessThanF
1117 case OpARM64NotLessEqualF:
1118 return OpARM64NotGreaterEqualF
1119 case OpARM64NotGreaterEqualF:
1120 return OpARM64NotLessEqualF
1122 panic("unreachable")
1126 // evaluate an ARM64 op against a flags value
1127 // that is potentially constant; return 1 for true,
1128 // -1 for false, and 0 for not constant.
1129 func ccARM64Eval(op Op, flags *Value) int {
1131 if fop == OpARM64InvertFlags {
1132 return -ccARM64Eval(op, flags.Args[0])
1134 if fop != OpARM64FlagConstant {
1137 fc := flagConstant(flags.AuxInt)
1138 b2i := func(b bool) int {
1147 case OpARM64NotEqual:
1149 case OpARM64LessThan:
1151 case OpARM64LessThanU:
1152 return b2i(fc.ult())
1153 case OpARM64GreaterThan:
1155 case OpARM64GreaterThanU:
1156 return b2i(fc.ugt())
1157 case OpARM64LessEqual:
1159 case OpARM64LessEqualU:
1160 return b2i(fc.ule())
1161 case OpARM64GreaterEqual:
1163 case OpARM64GreaterEqualU:
1164 return b2i(fc.uge())
1169 // logRule logs the use of the rule s. This will only be enabled if
1170 // rewrite rules were generated with the -log option, see _gen/rulegen.go.
1171 func logRule(s string) {
1172 if ruleFile == nil {
1173 // Open a log file to write log to. We open in append
1174 // mode because all.bash runs the compiler lots of times,
1175 // and we want the concatenation of all of those logs.
1176 // This means, of course, that users need to rm the old log
1177 // to get fresh data.
1178 // TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow?
1179 w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"),
1180 os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
1186 _, err := fmt.Fprintln(ruleFile, s)
1192 var ruleFile io.Writer
1194 func min(x, y int64) int64 {
1201 func isConstZero(v *Value) bool {
1205 case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F:
1206 return v.AuxInt == 0
1211 // reciprocalExact64 reports whether 1/c is exactly representable.
1212 func reciprocalExact64(c float64) bool {
1213 b := math.Float64bits(c)
1214 man := b & (1<<52 - 1)
1216 return false // not a power of 2, denormal, or NaN
1218 exp := b >> 52 & (1<<11 - 1)
1219 // exponent bias is 0x3ff. So taking the reciprocal of a number
1220 // changes the exponent to 0x7fe-exp.
1225 return false // ±inf
1227 return false // exponent is not representable
1233 // reciprocalExact32 reports whether 1/c is exactly representable.
1234 func reciprocalExact32(c float32) bool {
1235 b := math.Float32bits(c)
1236 man := b & (1<<23 - 1)
1238 return false // not a power of 2, denormal, or NaN
1240 exp := b >> 23 & (1<<8 - 1)
1241 // exponent bias is 0x7f. So taking the reciprocal of a number
1242 // changes the exponent to 0xfe-exp.
1247 return false // ±inf
1249 return false // exponent is not representable
1255 // check if an immediate can be directly encoded into an ARM's instruction.
1256 func isARMImmRot(v uint32) bool {
1257 for i := 0; i < 16; i++ {
1267 // overlap reports whether the ranges given by the given offset and
1268 // size pairs overlap.
1269 func overlap(offset1, size1, offset2, size2 int64) bool {
1270 if offset1 >= offset2 && offset2+size2 > offset1 {
1273 if offset2 >= offset1 && offset1+size1 > offset2 {
1279 func areAdjacentOffsets(off1, off2, size int64) bool {
1280 return off1+size == off2 || off1 == off2+size
1283 // check if value zeroes out upper 32-bit of 64-bit register.
1284 // depth limits recursion depth. In AMD64.rules 3 is used as limit,
1285 // because it catches same amount of cases as 4.
1286 func zeroUpper32Bits(x *Value, depth int) bool {
1288 case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1,
1289 OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1,
1290 OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload,
1291 OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL,
1292 OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst,
1293 OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst,
1294 OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL,
1295 OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst,
1296 OpAMD64SHLL, OpAMD64SHLLconst:
1298 case OpARM64REV16W, OpARM64REVW, OpARM64RBITW, OpARM64CLZW, OpARM64EXTRWconst,
1299 OpARM64MULW, OpARM64MNEGW, OpARM64UDIVW, OpARM64DIVW, OpARM64UMODW,
1300 OpARM64MADDW, OpARM64MSUBW, OpARM64RORW, OpARM64RORWconst:
1303 return x.Type.Size() == 4
1304 case OpPhi, OpSelect0, OpSelect1:
1305 // Phis can use each-other as an arguments, instead of tracking visited values,
1306 // just limit recursion depth.
1310 for i := range x.Args {
1311 if !zeroUpper32Bits(x.Args[i], depth-1) {
1321 // zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits.
1322 func zeroUpper48Bits(x *Value, depth int) bool {
1324 case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2:
1327 return x.Type.Size() == 2
1328 case OpPhi, OpSelect0, OpSelect1:
1329 // Phis can use each-other as an arguments, instead of tracking visited values,
1330 // just limit recursion depth.
1334 for i := range x.Args {
1335 if !zeroUpper48Bits(x.Args[i], depth-1) {
1345 // zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits.
1346 func zeroUpper56Bits(x *Value, depth int) bool {
1348 case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1:
1351 return x.Type.Size() == 1
1352 case OpPhi, OpSelect0, OpSelect1:
1353 // Phis can use each-other as an arguments, instead of tracking visited values,
1354 // just limit recursion depth.
1358 for i := range x.Args {
1359 if !zeroUpper56Bits(x.Args[i], depth-1) {
1369 func isInlinableMemclr(c *Config, sz int64) bool {
1373 // TODO: expand this check to allow other architectures
1374 // see CL 454255 and issue 56997
1376 case "amd64", "arm64":
1378 case "ppc64le", "ppc64":
1384 // isInlinableMemmove reports whether the given arch performs a Move of the given size
1385 // faster than memmove. It will only return true if replacing the memmove with a Move is
1386 // safe, either because Move will do all of its loads before any of its stores, or
1387 // because the arguments are known to be disjoint.
1388 // This is used as a check for replacing memmove with Move ops.
1389 func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
1390 // It is always safe to convert memmove into Move when its arguments are disjoint.
1391 // Move ops may or may not be faster for large sizes depending on how the platform
1392 // lowers them, so we only perform this optimization on platforms that we know to
1393 // have fast Move ops.
1396 return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz))
1397 case "386", "arm64":
1399 case "s390x", "ppc64", "ppc64le":
1400 return sz <= 8 || disjoint(dst, sz, src, sz)
1401 case "arm", "loong64", "mips", "mips64", "mipsle", "mips64le":
1406 func IsInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
1407 return isInlinableMemmove(dst, src, sz, c)
1410 // logLargeCopy logs the occurrence of a large copy.
1411 // The best place to do this is in the rewrite rules where the size of the move is easy to find.
1412 // "Large" is arbitrarily chosen to be 128 bytes; this may change.
1413 func logLargeCopy(v *Value, s int64) bool {
1417 if logopt.Enabled() {
1418 logopt.LogOpt(v.Pos, "copy", "lower", v.Block.Func.Name, fmt.Sprintf("%d bytes", s))
1422 func LogLargeCopy(funcName string, pos src.XPos, s int64) {
1426 if logopt.Enabled() {
1427 logopt.LogOpt(pos, "copy", "lower", funcName, fmt.Sprintf("%d bytes", s))
1431 // hasSmallRotate reports whether the architecture has rotate instructions
1432 // for sizes < 32-bit. This is used to decide whether to promote some rotations.
1433 func hasSmallRotate(c *Config) bool {
1435 case "amd64", "386":
1442 func supportsPPC64PCRel() bool {
1443 // PCRel is currently supported for >= power10, linux only
1444 // Internal and external linking supports this on ppc64le; internal linking on ppc64.
1445 return buildcfg.GOPPC64 >= 10 && buildcfg.GOOS == "linux"
1448 func newPPC64ShiftAuxInt(sh, mb, me, sz int64) int32 {
1449 if sh < 0 || sh >= sz {
1450 panic("PPC64 shift arg sh out of range")
1452 if mb < 0 || mb >= sz {
1453 panic("PPC64 shift arg mb out of range")
1455 if me < 0 || me >= sz {
1456 panic("PPC64 shift arg me out of range")
1458 return int32(sh<<16 | mb<<8 | me)
1461 func GetPPC64Shiftsh(auxint int64) int64 {
1462 return int64(int8(auxint >> 16))
1465 func GetPPC64Shiftmb(auxint int64) int64 {
1466 return int64(int8(auxint >> 8))
1469 func GetPPC64Shiftme(auxint int64) int64 {
1470 return int64(int8(auxint))
1473 // Test if this value can encoded as a mask for a rlwinm like
1474 // operation. Masks can also extend from the msb and wrap to
1475 // the lsb too. That is, the valid masks are 32 bit strings
1476 // of the form: 0..01..10..0 or 1..10..01..1 or 1...1
1477 func isPPC64WordRotateMask(v64 int64) bool {
1478 // Isolate rightmost 1 (if none 0) and add.
1481 // Likewise, for the wrapping case.
1483 vpn := (vn & -vn) + vn
1484 return (v&vp == 0 || vn&vpn == 0) && v != 0
1487 // Compress mask and shift into single value of the form
1488 // me | mb<<8 | rotate<<16 | nbits<<24 where me and mb can
1489 // be used to regenerate the input mask.
1490 func encodePPC64RotateMask(rotate, mask, nbits int64) int64 {
1491 var mb, me, mbn, men int
1493 // Determine boundaries and then decode them
1494 if mask == 0 || ^mask == 0 || rotate >= nbits {
1495 panic("Invalid PPC64 rotate mask")
1496 } else if nbits == 32 {
1497 mb = bits.LeadingZeros32(uint32(mask))
1498 me = 32 - bits.TrailingZeros32(uint32(mask))
1499 mbn = bits.LeadingZeros32(^uint32(mask))
1500 men = 32 - bits.TrailingZeros32(^uint32(mask))
1502 mb = bits.LeadingZeros64(uint64(mask))
1503 me = 64 - bits.TrailingZeros64(uint64(mask))
1504 mbn = bits.LeadingZeros64(^uint64(mask))
1505 men = 64 - bits.TrailingZeros64(^uint64(mask))
1507 // Check for a wrapping mask (e.g bits at 0 and 63)
1508 if mb == 0 && me == int(nbits) {
1509 // swap the inverted values
1513 return int64(me) | int64(mb<<8) | int64(rotate<<16) | int64(nbits<<24)
1516 // DecodePPC64RotateMask is the inverse operation of encodePPC64RotateMask. The values returned as
1517 // mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask.
1518 func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) {
1519 auxint := uint64(sauxint)
1520 rotate = int64((auxint >> 16) & 0xFF)
1521 mb = int64((auxint >> 8) & 0xFF)
1522 me = int64((auxint >> 0) & 0xFF)
1523 nbits := int64((auxint >> 24) & 0xFF)
1524 mask = ((1 << uint(nbits-mb)) - 1) ^ ((1 << uint(nbits-me)) - 1)
1529 mask = uint64(uint32(mask))
1532 // Fixup ME to match ISA definition. The second argument to MASK(..,me)
1534 me = (me - 1) & (nbits - 1)
1538 // This verifies that the mask is a set of
1539 // consecutive bits including the least
1541 func isPPC64ValidShiftMask(v int64) bool {
1542 if (v != 0) && ((v+1)&v) == 0 {
1548 func getPPC64ShiftMaskLength(v int64) int64 {
1549 return int64(bits.Len64(uint64(v)))
1552 // Decompose a shift right into an equivalent rotate/mask,
1553 // and return mask & m.
1554 func mergePPC64RShiftMask(m, s, nbits int64) int64 {
1555 smask := uint64((1<<uint(nbits))-1) >> uint(s)
1556 return m & int64(smask)
1559 // Combine (ANDconst [m] (SRWconst [s])) into (RLWINM [y]) or return 0
1560 func mergePPC64AndSrwi(m, s int64) int64 {
1561 mask := mergePPC64RShiftMask(m, s, 32)
1562 if !isPPC64WordRotateMask(mask) {
1565 return encodePPC64RotateMask((32-s)&31, mask, 32)
1568 // Test if a shift right feeding into a CLRLSLDI can be merged into RLWINM.
1569 // Return the encoded RLWINM constant, or 0 if they cannot be merged.
1570 func mergePPC64ClrlsldiSrw(sld, srw int64) int64 {
1571 mask_1 := uint64(0xFFFFFFFF >> uint(srw))
1572 // for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left.
1573 mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
1575 // Rewrite mask to apply after the final left shift.
1576 mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
1579 r_2 := GetPPC64Shiftsh(sld)
1580 r_3 := (r_1 + r_2) & 31 // This can wrap.
1582 if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
1585 return encodePPC64RotateMask(int64(r_3), int64(mask_3), 32)
1588 // Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM. Return
1589 // the encoded RLWINM constant, or 0 if they cannot be merged.
1590 func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 {
1591 r_1, _, _, mask_1 := DecodePPC64RotateMask(rlw)
1592 // for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left.
1593 mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
1595 // combine the masks, and adjust for the final left shift.
1596 mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(int64(sld)))
1597 r_2 := GetPPC64Shiftsh(int64(sld))
1598 r_3 := (r_1 + r_2) & 31 // This can wrap.
1600 // Verify the result is still a valid bitmask of <= 32 bits.
1601 if !isPPC64WordRotateMask(int64(mask_3)) || uint64(uint32(mask_3)) != mask_3 {
1604 return encodePPC64RotateMask(r_3, int64(mask_3), 32)
1607 // Compute the encoded RLWINM constant from combining (SLDconst [sld] (SRWconst [srw] x)),
1608 // or return 0 if they cannot be combined.
1609 func mergePPC64SldiSrw(sld, srw int64) int64 {
1610 if sld > srw || srw >= 32 {
1613 mask_r := uint32(0xFFFFFFFF) >> uint(srw)
1614 mask_l := uint32(0xFFFFFFFF) >> uint(sld)
1615 mask := (mask_r & mask_l) << uint(sld)
1616 return encodePPC64RotateMask((32-srw+sld)&31, int64(mask), 32)
1619 // Convenience function to rotate a 32 bit constant value by another constant.
1620 func rotateLeft32(v, rotate int64) int64 {
1621 return int64(bits.RotateLeft32(uint32(v), int(rotate)))
1624 func rotateRight64(v, rotate int64) int64 {
1625 return int64(bits.RotateLeft64(uint64(v), int(-rotate)))
1628 // encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format.
1629 func armBFAuxInt(lsb, width int64) arm64BitField {
1630 if lsb < 0 || lsb > 63 {
1631 panic("ARM(64) bit field lsb constant out of range")
1633 if width < 1 || lsb+width > 64 {
1634 panic("ARM(64) bit field width constant out of range")
1636 return arm64BitField(width | lsb<<8)
1639 // returns the lsb part of the auxInt field of arm64 bitfield ops.
1640 func (bfc arm64BitField) getARM64BFlsb() int64 {
1641 return int64(uint64(bfc) >> 8)
1644 // returns the width part of the auxInt field of arm64 bitfield ops.
1645 func (bfc arm64BitField) getARM64BFwidth() int64 {
1646 return int64(bfc) & 0xff
1649 // checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask.
1650 func isARM64BFMask(lsb, mask, rshift int64) bool {
1651 shiftedMask := int64(uint64(mask) >> uint64(rshift))
1652 return shiftedMask != 0 && isPowerOfTwo64(shiftedMask+1) && nto(shiftedMask)+lsb < 64
1655 // returns the bitfield width of mask >> rshift for arm64 bitfield ops.
1656 func arm64BFWidth(mask, rshift int64) int64 {
1657 shiftedMask := int64(uint64(mask) >> uint64(rshift))
1658 if shiftedMask == 0 {
1659 panic("ARM64 BF mask is zero")
1661 return nto(shiftedMask)
1664 // sizeof returns the size of t in bytes.
1665 // It will panic if t is not a *types.Type.
1666 func sizeof(t interface{}) int64 {
1667 return t.(*types.Type).Size()
1670 // registerizable reports whether t is a primitive type that fits in
1671 // a register. It assumes float64 values will always fit into registers
1672 // even if that isn't strictly true.
1673 func registerizable(b *Block, typ *types.Type) bool {
1674 if typ.IsPtrShaped() || typ.IsFloat() || typ.IsBoolean() {
1677 if typ.IsInteger() {
1678 return typ.Size() <= b.Func.Config.RegSize
1683 // needRaceCleanup reports whether this call to racefuncenter/exit isn't needed.
1684 func needRaceCleanup(sym *AuxCall, v *Value) bool {
1689 if !isSameCall(sym, "runtime.racefuncenter") && !isSameCall(sym, "runtime.racefuncexit") {
1692 for _, b := range f.Blocks {
1693 for _, v := range b.Values {
1695 case OpStaticCall, OpStaticLECall:
1696 // Check for racefuncenter will encounter racefuncexit and vice versa.
1697 // Allow calls to panic*
1698 s := v.Aux.(*AuxCall).Fn.String()
1700 case "runtime.racefuncenter", "runtime.racefuncexit",
1701 "runtime.panicdivide", "runtime.panicwrap",
1702 "runtime.panicshift":
1705 // If we encountered any call, we need to keep racefunc*,
1706 // for accurate stacktraces.
1708 case OpPanicBounds, OpPanicExtend:
1709 // Note: these are panic generators that are ok (like the static calls above).
1710 case OpClosureCall, OpInterCall, OpClosureLECall, OpInterLECall:
1711 // We must keep the race functions if there are any other call types.
1716 if isSameCall(sym, "runtime.racefuncenter") {
1717 // TODO REGISTER ABI this needs to be cleaned up.
1718 // If we're removing racefuncenter, remove its argument as well.
1719 if v.Args[0].Op != OpStore {
1720 if v.Op == OpStaticLECall {
1721 // there is no store, yet.
1726 mem := v.Args[0].Args[2]
1727 v.Args[0].reset(OpCopy)
1728 v.Args[0].AddArg(mem)
1733 // symIsRO reports whether sym is a read-only global.
1734 func symIsRO(sym interface{}) bool {
1735 lsym := sym.(*obj.LSym)
1736 return lsym.Type == objabi.SRODATA && len(lsym.R) == 0
1739 // symIsROZero reports whether sym is a read-only global whose data contains all zeros.
1740 func symIsROZero(sym Sym) bool {
1741 lsym := sym.(*obj.LSym)
1742 if lsym.Type != objabi.SRODATA || len(lsym.R) != 0 {
1745 for _, b := range lsym.P {
1753 // read8 reads one byte from the read-only global sym at offset off.
1754 func read8(sym interface{}, off int64) uint8 {
1755 lsym := sym.(*obj.LSym)
1756 if off >= int64(len(lsym.P)) || off < 0 {
1757 // Invalid index into the global sym.
1758 // This can happen in dead code, so we don't want to panic.
1759 // Just return any value, it will eventually get ignored.
1766 // read16 reads two bytes from the read-only global sym at offset off.
1767 func read16(sym interface{}, off int64, byteorder binary.ByteOrder) uint16 {
1768 lsym := sym.(*obj.LSym)
1769 // lsym.P is written lazily.
1770 // Bytes requested after the end of lsym.P are 0.
1772 if 0 <= off && off < int64(len(lsym.P)) {
1775 buf := make([]byte, 2)
1777 return byteorder.Uint16(buf)
1780 // read32 reads four bytes from the read-only global sym at offset off.
1781 func read32(sym interface{}, off int64, byteorder binary.ByteOrder) uint32 {
1782 lsym := sym.(*obj.LSym)
1784 if 0 <= off && off < int64(len(lsym.P)) {
1787 buf := make([]byte, 4)
1789 return byteorder.Uint32(buf)
1792 // read64 reads eight bytes from the read-only global sym at offset off.
1793 func read64(sym interface{}, off int64, byteorder binary.ByteOrder) uint64 {
1794 lsym := sym.(*obj.LSym)
1796 if 0 <= off && off < int64(len(lsym.P)) {
1799 buf := make([]byte, 8)
1801 return byteorder.Uint64(buf)
1804 // sequentialAddresses reports true if it can prove that x + n == y
1805 func sequentialAddresses(x, y *Value, n int64) bool {
1806 if x == y && n == 0 {
1809 if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil &&
1810 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1811 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1814 if x.Op == Op386LEAL1 && y.Op == Op386LEAL1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
1815 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1816 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1819 if x.Op == OpAMD64ADDQ && y.Op == OpAMD64LEAQ1 && y.AuxInt == n && y.Aux == nil &&
1820 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1821 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1824 if x.Op == OpAMD64LEAQ1 && y.Op == OpAMD64LEAQ1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
1825 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1826 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1832 // flagConstant represents the result of a compile-time comparison.
1833 // The sense of these flags does not necessarily represent the hardware's notion
1834 // of a flags register - these are just a compile-time construct.
1835 // We happen to match the semantics to those of arm/arm64.
1836 // Note that these semantics differ from x86: the carry flag has the opposite
1837 // sense on a subtraction!
1839 // On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C.
1840 // On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C.
1841 // (because it does x + ^y + C).
1843 // See https://en.wikipedia.org/wiki/Carry_flag#Vs._borrow_flag
1844 type flagConstant uint8
1846 // N reports whether the result of an operation is negative (high bit set).
1847 func (fc flagConstant) N() bool {
1851 // Z reports whether the result of an operation is 0.
1852 func (fc flagConstant) Z() bool {
1856 // C reports whether an unsigned add overflowed (carry), or an
1857 // unsigned subtract did not underflow (borrow).
1858 func (fc flagConstant) C() bool {
1862 // V reports whether a signed operation overflowed or underflowed.
1863 func (fc flagConstant) V() bool {
1867 func (fc flagConstant) eq() bool {
1870 func (fc flagConstant) ne() bool {
1873 func (fc flagConstant) lt() bool {
1874 return fc.N() != fc.V()
1876 func (fc flagConstant) le() bool {
1877 return fc.Z() || fc.lt()
1879 func (fc flagConstant) gt() bool {
1880 return !fc.Z() && fc.ge()
1882 func (fc flagConstant) ge() bool {
1883 return fc.N() == fc.V()
1885 func (fc flagConstant) ult() bool {
1888 func (fc flagConstant) ule() bool {
1889 return fc.Z() || fc.ult()
1891 func (fc flagConstant) ugt() bool {
1892 return !fc.Z() && fc.uge()
1894 func (fc flagConstant) uge() bool {
1898 func (fc flagConstant) ltNoov() bool {
1899 return fc.lt() && !fc.V()
1901 func (fc flagConstant) leNoov() bool {
1902 return fc.le() && !fc.V()
1904 func (fc flagConstant) gtNoov() bool {
1905 return fc.gt() && !fc.V()
1907 func (fc flagConstant) geNoov() bool {
1908 return fc.ge() && !fc.V()
1911 func (fc flagConstant) String() string {
1912 return fmt.Sprintf("N=%v,Z=%v,C=%v,V=%v", fc.N(), fc.Z(), fc.C(), fc.V())
1915 type flagConstantBuilder struct {
1922 func (fcs flagConstantBuilder) encode() flagConstant {
1939 // Note: addFlags(x,y) != subFlags(x,-y) in some situations:
1940 // - the results of the C flag are different
1941 // - the results of the V flag when y==minint are different
1943 // addFlags64 returns the flags that would be set from computing x+y.
1944 func addFlags64(x, y int64) flagConstant {
1945 var fcb flagConstantBuilder
1948 fcb.C = uint64(x+y) < uint64(x)
1949 fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
1953 // subFlags64 returns the flags that would be set from computing x-y.
1954 func subFlags64(x, y int64) flagConstant {
1955 var fcb flagConstantBuilder
1958 fcb.C = uint64(y) <= uint64(x) // This code follows the arm carry flag model.
1959 fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
1963 // addFlags32 returns the flags that would be set from computing x+y.
1964 func addFlags32(x, y int32) flagConstant {
1965 var fcb flagConstantBuilder
1968 fcb.C = uint32(x+y) < uint32(x)
1969 fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
1973 // subFlags32 returns the flags that would be set from computing x-y.
1974 func subFlags32(x, y int32) flagConstant {
1975 var fcb flagConstantBuilder
1978 fcb.C = uint32(y) <= uint32(x) // This code follows the arm carry flag model.
1979 fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
1983 // logicFlags64 returns flags set to the sign/zeroness of x.
1984 // C and V are set to false.
1985 func logicFlags64(x int64) flagConstant {
1986 var fcb flagConstantBuilder
1992 // logicFlags32 returns flags set to the sign/zeroness of x.
1993 // C and V are set to false.
1994 func logicFlags32(x int32) flagConstant {
1995 var fcb flagConstantBuilder
2001 func makeJumpTableSym(b *Block) *obj.LSym {
2002 s := base.Ctxt.Lookup(fmt.Sprintf("%s.jump%d", b.Func.fe.Func().LSym.Name, b.ID))
2003 s.Set(obj.AttrDuplicateOK, true)
2004 s.Set(obj.AttrLocal, true)
2008 // canRotate reports whether the architecture supports
2009 // rotates of integer registers with the given number of bits.
2010 func canRotate(c *Config, bits int64) bool {
2011 if bits > c.PtrSize*8 {
2012 // Don't rewrite to rotates bigger than the machine word.
2016 case "386", "amd64", "arm64":
2018 case "arm", "s390x", "ppc64", "ppc64le", "wasm", "loong64":
2025 // isARM64bitcon reports whether a constant can be encoded into a logical instruction.
2026 func isARM64bitcon(x uint64) bool {
2027 if x == 1<<64-1 || x == 0 {
2030 // determine the period and sign-extend a unit to 64 bits
2032 case x != x>>32|x<<32:
2035 case x != x>>16|x<<48:
2037 x = uint64(int64(int32(x)))
2038 case x != x>>8|x<<56:
2040 x = uint64(int64(int16(x)))
2041 case x != x>>4|x<<60:
2043 x = uint64(int64(int8(x)))
2045 // period is 4 or 2, always true
2046 // 0001, 0010, 0100, 1000 -- 0001 rotate
2047 // 0011, 0110, 1100, 1001 -- 0011 rotate
2048 // 0111, 1011, 1101, 1110 -- 0111 rotate
2049 // 0101, 1010 -- 01 rotate, repeat
2052 return sequenceOfOnes(x) || sequenceOfOnes(^x)
2055 // sequenceOfOnes tests whether a constant is a sequence of ones in binary, with leading and trailing zeros.
2056 func sequenceOfOnes(x uint64) bool {
2057 y := x & -x // lowest set bit of x. x is good iff x+y is a power of 2
2062 // isARM64addcon reports whether x can be encoded as the immediate value in an ADD or SUB instruction.
2063 func isARM64addcon(v int64) bool {
2064 /* uimm12 or uimm24? */
2068 if (v & 0xFFF) == 0 {