1 // Copyright 2015 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
8 "cmd/compile/internal/logopt"
9 "cmd/compile/internal/types"
11 "cmd/internal/obj/s390x"
23 type deadValueChoice bool
26 leaveDeadValues deadValueChoice = false
27 removeDeadValues = true
30 // deadcode indicates whether rewrite should try to remove any values that become dead.
31 func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValueChoice) {
32 // repeat rewrites until we find no more rewrites
33 pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block
37 fmt.Printf("%s: rewriting for %s\n", f.pass.name, f.Name)
40 var states map[string]bool
43 for _, b := range f.Blocks {
48 b0.Succs = append([]Edge{}, b.Succs...) // make a new copy, not aliasing
50 for i, c := range b.ControlValues() {
53 b.ReplaceControl(i, c)
59 fmt.Printf("rewriting %s -> %s\n", b0.LongString(), b.LongString())
62 for j, v := range b.Values {
67 v0.Args = append([]*Value{}, v.Args...) // make a new copy, not aliasing
69 if v.Uses == 0 && v.removeable() {
70 if v.Op != OpInvalid && deadcode == removeDeadValues {
71 // Reset any values that are now unused, so that we decrement
72 // the use count of all of its arguments.
73 // Not quite a deadcode pass, because it does not handle cycles.
74 // But it should help Uses==1 rules to fire.
78 // No point rewriting values which aren't used.
82 vchange := phielimValue(v)
83 if vchange && debug > 1 {
84 fmt.Printf("rewriting %s -> %s\n", v0.LongString(), v.LongString())
87 // Eliminate copy inputs.
88 // If any copy input becomes unused, mark it
89 // as invalid and discard its argument. Repeat
90 // recursively on the discarded argument.
91 // This phase helps remove phantom "dead copy" uses
92 // of a value so that a x.Uses==1 rule condition
94 for i, a := range v.Args {
100 // If a, a copy, has a line boundary indicator, attempt to find a new value
101 // to hold it. The first candidate is the value that will replace a (aa),
102 // if it shares the same block and line and is eligible.
103 // The second option is v, which has a as an input. Because aa is earlier in
104 // the data flow, it is the better choice.
105 if a.Pos.IsStmt() == src.PosIsStmt {
106 if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt {
107 aa.Pos = aa.Pos.WithIsStmt()
108 } else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt {
109 v.Pos = v.Pos.WithIsStmt()
111 // Record the lost line and look for a new home after all rewrites are complete.
112 // TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same
113 // line to appear in more than one block, but only one block is stored, so if both end
114 // up here, then one will be lost.
115 pendingLines.set(a.Pos, int32(a.Block.ID))
117 a.Pos = a.Pos.WithNotStmt()
126 if vchange && debug > 1 {
127 fmt.Printf("rewriting %s -> %s\n", v0.LongString(), v.LongString())
130 // apply rewrite function
133 // If value changed to a poor choice for a statement boundary, move the boundary
134 if v.Pos.IsStmt() == src.PosIsStmt {
135 if k := nextGoodStatementIndex(v, j, b); k != j {
136 v.Pos = v.Pos.WithNotStmt()
137 b.Values[k].Pos = b.Values[k].Pos.WithIsStmt()
142 change = change || vchange
143 if vchange && debug > 1 {
144 fmt.Printf("rewriting %s -> %s\n", v0.LongString(), v.LongString())
152 if iters > 1000 || debug >= 2 {
153 // We've done a suspiciously large number of rewrites (or we're in debug mode).
154 // As of Sep 2021, 90% of rewrites complete in 4 iterations or fewer
155 // and the maximum value encountered during make.bash is 12.
156 // Start checking for cycles. (This is too expensive to do routinely.)
158 states = make(map[string]bool)
161 if _, ok := states[h]; ok {
162 // We've found a cycle.
163 // To diagnose it, set debug to 2 and start again,
164 // so that we'll print all rules applied until we complete another cycle.
165 // If debug is already >= 2, we've already done that, so it's time to crash.
168 states = make(map[string]bool)
170 f.Fatalf("rewrite cycle detected")
176 // remove clobbered values
177 for _, b := range f.Blocks {
179 for i, v := range b.Values {
181 if v.Op == OpInvalid {
182 if v.Pos.IsStmt() == src.PosIsStmt {
183 pendingLines.set(vl, int32(b.ID))
188 if v.Pos.IsStmt() != src.PosNotStmt && !notStmtBoundary(v.Op) && pendingLines.get(vl) == int32(b.ID) {
189 pendingLines.remove(vl)
190 v.Pos = v.Pos.WithIsStmt()
197 if pendingLines.get(b.Pos) == int32(b.ID) {
198 b.Pos = b.Pos.WithIsStmt()
199 pendingLines.remove(b.Pos)
205 // Common functions called from rewriting rules
207 func is64BitFloat(t *types.Type) bool {
208 return t.Size() == 8 && t.IsFloat()
211 func is32BitFloat(t *types.Type) bool {
212 return t.Size() == 4 && t.IsFloat()
215 func is64BitInt(t *types.Type) bool {
216 return t.Size() == 8 && t.IsInteger()
219 func is32BitInt(t *types.Type) bool {
220 return t.Size() == 4 && t.IsInteger()
223 func is16BitInt(t *types.Type) bool {
224 return t.Size() == 2 && t.IsInteger()
227 func is8BitInt(t *types.Type) bool {
228 return t.Size() == 1 && t.IsInteger()
231 func isPtr(t *types.Type) bool {
232 return t.IsPtrShaped()
235 func isSigned(t *types.Type) bool {
239 // mergeSym merges two symbolic offsets. There is no real merging of
240 // offsets, we just pick the non-nil one.
241 func mergeSym(x, y Sym) Sym {
248 panic(fmt.Sprintf("mergeSym with two non-nil syms %v %v", x, y))
251 func canMergeSym(x, y Sym) bool {
252 return x == nil || y == nil
255 // canMergeLoadClobber reports whether the load can be merged into target without
256 // invalidating the schedule.
257 // It also checks that the other non-load argument x is something we
258 // are ok with clobbering.
259 func canMergeLoadClobber(target, load, x *Value) bool {
260 // The register containing x is going to get clobbered.
261 // Don't merge if we still need the value of x.
262 // We don't have liveness information here, but we can
263 // approximate x dying with:
264 // 1) target is x's only use.
265 // 2) target is not in a deeper loop than x.
269 loopnest := x.Block.Func.loopnest()
270 loopnest.calculateDepths()
271 if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
274 return canMergeLoad(target, load)
277 // canMergeLoad reports whether the load can be merged into target without
278 // invalidating the schedule.
279 func canMergeLoad(target, load *Value) bool {
280 if target.Block.ID != load.Block.ID {
281 // If the load is in a different block do not merge it.
285 // We can't merge the load into the target if the load
286 // has more than one use.
291 mem := load.MemoryArg()
293 // We need the load's memory arg to still be alive at target. That
294 // can't be the case if one of target's args depends on a memory
295 // state that is a successor of load's memory arg.
297 // For example, it would be invalid to merge load into target in
298 // the following situation because newmem has killed oldmem
299 // before target is reached:
300 // load = read ... oldmem
301 // newmem = write ... oldmem
302 // arg0 = read ... newmem
303 // target = add arg0 load
305 // If the argument comes from a different block then we can exclude
306 // it immediately because it must dominate load (which is in the
307 // same block as target).
309 for _, a := range target.Args {
310 if a != load && a.Block.ID == target.Block.ID {
311 args = append(args, a)
315 // memPreds contains memory states known to be predecessors of load's
316 // memory state. It is lazily initialized.
317 var memPreds map[*Value]bool
318 for i := 0; len(args) > 0; i++ {
321 // Give up if we have done a lot of iterations.
324 v := args[len(args)-1]
325 args = args[:len(args)-1]
326 if target.Block.ID != v.Block.ID {
327 // Since target and load are in the same block
328 // we can stop searching when we leave the block.
332 // A Phi implies we have reached the top of the block.
333 // The memory phi, if it exists, is always
334 // the first logical store in the block.
337 if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
338 // We could handle this situation however it is likely
342 if v.Op.SymEffect()&SymAddr != 0 {
343 // This case prevents an operation that calculates the
344 // address of a local variable from being forced to schedule
345 // before its corresponding VarDef.
351 // We don't want to combine the CMPQ with the load, because
352 // that would force the CMPQ to schedule before the VARDEF, which
353 // in turn requires the LEAQ to schedule before the VARDEF.
356 if v.Type.IsMemory() {
358 // Initialise a map containing memory states
359 // known to be predecessors of load's memory
361 memPreds = make(map[*Value]bool)
364 for i := 0; i < limit; i++ {
366 // The memory phi, if it exists, is always
367 // the first logical store in the block.
370 if m.Block.ID != target.Block.ID {
373 if !m.Type.IsMemory() {
377 if len(m.Args) == 0 {
384 // We can merge if v is a predecessor of mem.
386 // For example, we can merge load into target in the
387 // following scenario:
390 // load = read ... mem
391 // target = add x load
397 if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
398 // If v takes mem as an input then we know mem
399 // is valid at this point.
402 for _, a := range v.Args {
403 if target.Block.ID == a.Block.ID {
404 args = append(args, a)
412 // isSameCall reports whether sym is the same as the given named symbol
413 func isSameCall(sym interface{}, name string) bool {
414 fn := sym.(*AuxCall).Fn
415 return fn != nil && fn.String() == name
418 // canLoadUnaligned reports if the achitecture supports unaligned load operations
419 func canLoadUnaligned(c *Config) bool {
420 return c.ctxt.Arch.Alignment == 1
423 // nlz returns the number of leading zeros.
424 func nlz64(x int64) int { return bits.LeadingZeros64(uint64(x)) }
425 func nlz32(x int32) int { return bits.LeadingZeros32(uint32(x)) }
426 func nlz16(x int16) int { return bits.LeadingZeros16(uint16(x)) }
427 func nlz8(x int8) int { return bits.LeadingZeros8(uint8(x)) }
429 // ntzX returns the number of trailing zeros.
430 func ntz64(x int64) int { return bits.TrailingZeros64(uint64(x)) }
431 func ntz32(x int32) int { return bits.TrailingZeros32(uint32(x)) }
432 func ntz16(x int16) int { return bits.TrailingZeros16(uint16(x)) }
433 func ntz8(x int8) int { return bits.TrailingZeros8(uint8(x)) }
435 func oneBit(x int64) bool { return x&(x-1) == 0 && x != 0 }
436 func oneBit8(x int8) bool { return x&(x-1) == 0 && x != 0 }
437 func oneBit16(x int16) bool { return x&(x-1) == 0 && x != 0 }
438 func oneBit32(x int32) bool { return x&(x-1) == 0 && x != 0 }
439 func oneBit64(x int64) bool { return x&(x-1) == 0 && x != 0 }
441 // nto returns the number of trailing ones.
442 func nto(x int64) int64 {
443 return int64(ntz64(^x))
446 // logX returns logarithm of n base 2.
447 // n must be a positive power of 2 (isPowerOfTwoX returns true).
448 func log8(n int8) int64 {
449 return int64(bits.Len8(uint8(n))) - 1
451 func log16(n int16) int64 {
452 return int64(bits.Len16(uint16(n))) - 1
454 func log32(n int32) int64 {
455 return int64(bits.Len32(uint32(n))) - 1
457 func log64(n int64) int64 {
458 return int64(bits.Len64(uint64(n))) - 1
461 // log2uint32 returns logarithm in base 2 of uint32(n), with log2(0) = -1.
463 func log2uint32(n int64) int64 {
464 return int64(bits.Len32(uint32(n))) - 1
467 // isPowerOfTwo functions report whether n is a power of 2.
468 func isPowerOfTwo8(n int8) bool {
469 return n > 0 && n&(n-1) == 0
471 func isPowerOfTwo16(n int16) bool {
472 return n > 0 && n&(n-1) == 0
474 func isPowerOfTwo32(n int32) bool {
475 return n > 0 && n&(n-1) == 0
477 func isPowerOfTwo64(n int64) bool {
478 return n > 0 && n&(n-1) == 0
481 // isUint64PowerOfTwo reports whether uint64(n) is a power of 2.
482 func isUint64PowerOfTwo(in int64) bool {
484 return n > 0 && n&(n-1) == 0
487 // isUint32PowerOfTwo reports whether uint32(n) is a power of 2.
488 func isUint32PowerOfTwo(in int64) bool {
489 n := uint64(uint32(in))
490 return n > 0 && n&(n-1) == 0
493 // is32Bit reports whether n can be represented as a signed 32 bit integer.
494 func is32Bit(n int64) bool {
495 return n == int64(int32(n))
498 // is16Bit reports whether n can be represented as a signed 16 bit integer.
499 func is16Bit(n int64) bool {
500 return n == int64(int16(n))
503 // is8Bit reports whether n can be represented as a signed 8 bit integer.
504 func is8Bit(n int64) bool {
505 return n == int64(int8(n))
508 // isU8Bit reports whether n can be represented as an unsigned 8 bit integer.
509 func isU8Bit(n int64) bool {
510 return n == int64(uint8(n))
513 // isU12Bit reports whether n can be represented as an unsigned 12 bit integer.
514 func isU12Bit(n int64) bool {
515 return 0 <= n && n < (1<<12)
518 // isU16Bit reports whether n can be represented as an unsigned 16 bit integer.
519 func isU16Bit(n int64) bool {
520 return n == int64(uint16(n))
523 // isU32Bit reports whether n can be represented as an unsigned 32 bit integer.
524 func isU32Bit(n int64) bool {
525 return n == int64(uint32(n))
528 // is20Bit reports whether n can be represented as a signed 20 bit integer.
529 func is20Bit(n int64) bool {
530 return -(1<<19) <= n && n < (1<<19)
533 // b2i translates a boolean value to 0 or 1 for assigning to auxInt.
534 func b2i(b bool) int64 {
541 // b2i32 translates a boolean value to 0 or 1.
542 func b2i32(b bool) int32 {
549 // shiftIsBounded reports whether (left/right) shift Value v is known to be bounded.
550 // A shift is bounded if it is shifting by less than the width of the shifted value.
551 func shiftIsBounded(v *Value) bool {
555 // canonLessThan returns whether x is "ordered" less than y, for purposes of normalizing
556 // generated code as much as possible.
557 func canonLessThan(x, y *Value) bool {
561 if !x.Pos.SameFileAndLine(y.Pos) {
562 return x.Pos.Before(y.Pos)
567 // truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern
568 // of the mantissa. It will panic if the truncation results in lost information.
569 func truncate64Fto32F(f float64) float32 {
570 if !isExactFloat32(f) {
571 panic("truncate64Fto32F: truncation is not exact")
576 // NaN bit patterns aren't necessarily preserved across conversion
577 // instructions so we need to do the conversion manually.
578 b := math.Float64bits(f)
579 m := b & ((1 << 52) - 1) // mantissa (a.k.a. significand)
580 // | sign | exponent | mantissa |
581 r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23)))
582 return math.Float32frombits(r)
585 // extend32Fto64F converts a float32 value to a float64 value preserving the bit
586 // pattern of the mantissa.
587 func extend32Fto64F(f float32) float64 {
588 if !math.IsNaN(float64(f)) {
591 // NaN bit patterns aren't necessarily preserved across conversion
592 // instructions so we need to do the conversion manually.
593 b := uint64(math.Float32bits(f))
594 // | sign | exponent | mantissa |
595 r := ((b << 32) & (1 << 63)) | (0x7ff << 52) | ((b & 0x7fffff) << (52 - 23))
596 return math.Float64frombits(r)
599 // DivisionNeedsFixUp reports whether the division needs fix-up code.
600 func DivisionNeedsFixUp(v *Value) bool {
604 // auxFrom64F encodes a float64 value so it can be stored in an AuxInt.
605 func auxFrom64F(f float64) int64 {
607 panic("can't encode a NaN in AuxInt field")
609 return int64(math.Float64bits(f))
612 // auxFrom32F encodes a float32 value so it can be stored in an AuxInt.
613 func auxFrom32F(f float32) int64 {
615 panic("can't encode a NaN in AuxInt field")
617 return int64(math.Float64bits(extend32Fto64F(f)))
620 // auxTo32F decodes a float32 from the AuxInt value provided.
621 func auxTo32F(i int64) float32 {
622 return truncate64Fto32F(math.Float64frombits(uint64(i)))
625 // auxTo64F decodes a float64 from the AuxInt value provided.
626 func auxTo64F(i int64) float64 {
627 return math.Float64frombits(uint64(i))
630 func auxIntToBool(i int64) bool {
636 func auxIntToInt8(i int64) int8 {
639 func auxIntToInt16(i int64) int16 {
642 func auxIntToInt32(i int64) int32 {
645 func auxIntToInt64(i int64) int64 {
648 func auxIntToUint8(i int64) uint8 {
651 func auxIntToFloat32(i int64) float32 {
652 return float32(math.Float64frombits(uint64(i)))
654 func auxIntToFloat64(i int64) float64 {
655 return math.Float64frombits(uint64(i))
657 func auxIntToValAndOff(i int64) ValAndOff {
660 func auxIntToArm64BitField(i int64) arm64BitField {
661 return arm64BitField(i)
663 func auxIntToInt128(x int64) int128 {
665 panic("nonzero int128 not allowed")
669 func auxIntToFlagConstant(x int64) flagConstant {
670 return flagConstant(x)
673 func auxIntToOp(cc int64) Op {
677 func boolToAuxInt(b bool) int64 {
683 func int8ToAuxInt(i int8) int64 {
686 func int16ToAuxInt(i int16) int64 {
689 func int32ToAuxInt(i int32) int64 {
692 func int64ToAuxInt(i int64) int64 {
695 func uint8ToAuxInt(i uint8) int64 {
696 return int64(int8(i))
698 func float32ToAuxInt(f float32) int64 {
699 return int64(math.Float64bits(float64(f)))
701 func float64ToAuxInt(f float64) int64 {
702 return int64(math.Float64bits(f))
704 func valAndOffToAuxInt(v ValAndOff) int64 {
707 func arm64BitFieldToAuxInt(v arm64BitField) int64 {
710 func int128ToAuxInt(x int128) int64 {
712 panic("nonzero int128 not allowed")
716 func flagConstantToAuxInt(x flagConstant) int64 {
720 func opToAuxInt(o Op) int64 {
724 // Aux is an interface to hold miscellaneous data in Blocks and Values.
729 // stringAux wraps string values for use in Aux.
730 type stringAux string
732 func (stringAux) CanBeAnSSAAux() {}
734 func auxToString(i Aux) string {
735 return string(i.(stringAux))
737 func auxToSym(i Aux) Sym {
738 // TODO: kind of a hack - allows nil interface through
742 func auxToType(i Aux) *types.Type {
743 return i.(*types.Type)
745 func auxToCall(i Aux) *AuxCall {
748 func auxToS390xCCMask(i Aux) s390x.CCMask {
749 return i.(s390x.CCMask)
751 func auxToS390xRotateParams(i Aux) s390x.RotateParams {
752 return i.(s390x.RotateParams)
755 func StringToAux(s string) Aux {
758 func symToAux(s Sym) Aux {
761 func callToAux(s *AuxCall) Aux {
764 func typeToAux(t *types.Type) Aux {
767 func s390xCCMaskToAux(c s390x.CCMask) Aux {
770 func s390xRotateParamsToAux(r s390x.RotateParams) Aux {
774 // uaddOvf reports whether unsigned a+b would overflow.
775 func uaddOvf(a, b int64) bool {
776 return uint64(a)+uint64(b) < uint64(a)
779 // loadLSymOffset simulates reading a word at an offset into a
780 // read-only symbol's runtime memory. If it would read a pointer to
781 // another symbol, that symbol is returned. Otherwise, it returns nil.
782 func loadLSymOffset(lsym *obj.LSym, offset int64) *obj.LSym {
783 if lsym.Type != objabi.SRODATA {
787 for _, r := range lsym.R {
788 if int64(r.Off) == offset && r.Type&^objabi.R_WEAK == objabi.R_ADDR && r.Add == 0 {
796 // de-virtualize an InterLECall
797 // 'sym' is the symbol for the itab
798 func devirtLESym(v *Value, aux Aux, sym Sym, offset int64) *obj.LSym {
799 n, ok := sym.(*obj.LSym)
804 lsym := loadLSymOffset(n, offset)
805 if f := v.Block.Func; f.pass.debug > 0 {
807 f.Warnl(v.Pos, "de-virtualizing call")
809 f.Warnl(v.Pos, "couldn't de-virtualize call")
815 func devirtLECall(v *Value, sym *obj.LSym) *Value {
816 v.Op = OpStaticLECall
817 auxcall := v.Aux.(*AuxCall)
821 copy(v.Args[0:], v.Args[1:])
822 v.Args[len(v.Args)-1] = nil // aid GC
823 v.Args = v.Args[:len(v.Args)-1]
827 // isSamePtr reports whether p1 and p2 point to the same address.
828 func isSamePtr(p1, p2 *Value) bool {
837 return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
838 case OpAddr, OpLocalAddr:
839 // OpAddr's 0th arg is either OpSP or OpSB, which means that it is uniquely identified by its Op.
840 // Checking for value equality only works after [z]cse has run.
841 return p1.Aux == p2.Aux && p1.Args[0].Op == p2.Args[0].Op
843 return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
848 func isStackPtr(v *Value) bool {
849 for v.Op == OpOffPtr || v.Op == OpAddPtr {
852 return v.Op == OpSP || v.Op == OpLocalAddr
855 // disjoint reports whether the memory region specified by [p1:p1+n1)
856 // does not overlap with [p2:p2+n2).
857 // A return value of false does not imply the regions overlap.
858 func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool {
859 if n1 == 0 || n2 == 0 {
865 baseAndOffset := func(ptr *Value) (base *Value, offset int64) {
866 base, offset = ptr, 0
867 for base.Op == OpOffPtr {
868 offset += base.AuxInt
873 p1, off1 := baseAndOffset(p1)
874 p2, off2 := baseAndOffset(p2)
875 if isSamePtr(p1, p2) {
876 return !overlap(off1, n1, off2, n2)
878 // p1 and p2 are not the same, so if they are both OpAddrs then
879 // they point to different variables.
880 // If one pointer is on the stack and the other is an argument
881 // then they can't overlap.
883 case OpAddr, OpLocalAddr:
884 if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP {
887 return (p2.Op == OpArg || p2.Op == OpArgIntReg) && p1.Args[0].Op == OpSP
888 case OpArg, OpArgIntReg:
889 if p2.Op == OpSP || p2.Op == OpLocalAddr {
893 return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpArgIntReg || p2.Op == OpSP
898 // moveSize returns the number of bytes an aligned MOV instruction moves
899 func moveSize(align int64, c *Config) int64 {
901 case align%8 == 0 && c.PtrSize == 8:
911 // mergePoint finds a block among a's blocks which dominates b and is itself
912 // dominated by all of a's blocks. Returns nil if it can't find one.
913 // Might return nil even if one does exist.
914 func mergePoint(b *Block, a ...*Value) *Block {
915 // Walk backward from b looking for one of the a's blocks.
921 for _, x := range a {
926 if len(b.Preds) > 1 {
927 // Don't know which way to go back. Abort.
933 return nil // too far away
935 // At this point, r is the first value in a that we find by walking backwards.
936 // if we return anything, r will be it.
939 // Keep going, counting the other a's that we find. They must all dominate r.
942 for _, x := range a {
948 // Found all of a in a backwards walk. We can return r.
951 if len(b.Preds) > 1 {
958 return nil // too far away
961 // clobber invalidates values. Returns true.
962 // clobber is used by rewrite rules to:
963 // A) make sure the values are really dead and never used again.
964 // B) decrement use counts of the values' args.
965 func clobber(vv ...*Value) bool {
966 for _, v := range vv {
968 // Note: leave v.Block intact. The Block field is used after clobber.
973 // clobberIfDead resets v when use count is 1. Returns true.
974 // clobberIfDead is used by rewrite rules to decrement
975 // use counts of v's args when v is dead and never used.
976 func clobberIfDead(v *Value) bool {
980 // Note: leave v.Block intact. The Block field is used after clobberIfDead.
984 // noteRule is an easy way to track if a rule is matched when writing
985 // new ones. Make the rule of interest also conditional on
986 // noteRule("note to self: rule of interest matched")
987 // and that message will print when the rule matches.
988 func noteRule(s string) bool {
993 // countRule increments Func.ruleMatches[key].
994 // If Func.ruleMatches is non-nil at the end
995 // of compilation, it will be printed to stdout.
996 // This is intended to make it easier to find which functions
997 // which contain lots of rules matches when developing new rules.
998 func countRule(v *Value, key string) bool {
1000 if f.ruleMatches == nil {
1001 f.ruleMatches = make(map[string]int)
1003 f.ruleMatches[key]++
1007 // warnRule generates compiler debug output with string s when
1008 // v is not in autogenerated code, cond is true and the rule has fired.
1009 func warnRule(cond bool, v *Value, s string) bool {
1010 if pos := v.Pos; pos.Line() > 1 && cond {
1011 v.Block.Func.Warnl(pos, s)
1016 // for a pseudo-op like (LessThan x), extract x
1017 func flagArg(v *Value) *Value {
1018 if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() {
1024 // arm64Negate finds the complement to an ARM64 condition code,
1025 // for example !Equal -> NotEqual or !LessThan -> GreaterEqual
1027 // For floating point, it's more subtle because NaN is unordered. We do
1028 // !LessThanF -> NotLessThanF, the latter takes care of NaNs.
1029 func arm64Negate(op Op) Op {
1031 case OpARM64LessThan:
1032 return OpARM64GreaterEqual
1033 case OpARM64LessThanU:
1034 return OpARM64GreaterEqualU
1035 case OpARM64GreaterThan:
1036 return OpARM64LessEqual
1037 case OpARM64GreaterThanU:
1038 return OpARM64LessEqualU
1039 case OpARM64LessEqual:
1040 return OpARM64GreaterThan
1041 case OpARM64LessEqualU:
1042 return OpARM64GreaterThanU
1043 case OpARM64GreaterEqual:
1044 return OpARM64LessThan
1045 case OpARM64GreaterEqualU:
1046 return OpARM64LessThanU
1048 return OpARM64NotEqual
1049 case OpARM64NotEqual:
1051 case OpARM64LessThanF:
1052 return OpARM64NotLessThanF
1053 case OpARM64NotLessThanF:
1054 return OpARM64LessThanF
1055 case OpARM64LessEqualF:
1056 return OpARM64NotLessEqualF
1057 case OpARM64NotLessEqualF:
1058 return OpARM64LessEqualF
1059 case OpARM64GreaterThanF:
1060 return OpARM64NotGreaterThanF
1061 case OpARM64NotGreaterThanF:
1062 return OpARM64GreaterThanF
1063 case OpARM64GreaterEqualF:
1064 return OpARM64NotGreaterEqualF
1065 case OpARM64NotGreaterEqualF:
1066 return OpARM64GreaterEqualF
1068 panic("unreachable")
1072 // arm64Invert evaluates (InvertFlags op), which
1073 // is the same as altering the condition codes such
1074 // that the same result would be produced if the arguments
1075 // to the flag-generating instruction were reversed, e.g.
1076 // (InvertFlags (CMP x y)) -> (CMP y x)
1077 func arm64Invert(op Op) Op {
1079 case OpARM64LessThan:
1080 return OpARM64GreaterThan
1081 case OpARM64LessThanU:
1082 return OpARM64GreaterThanU
1083 case OpARM64GreaterThan:
1084 return OpARM64LessThan
1085 case OpARM64GreaterThanU:
1086 return OpARM64LessThanU
1087 case OpARM64LessEqual:
1088 return OpARM64GreaterEqual
1089 case OpARM64LessEqualU:
1090 return OpARM64GreaterEqualU
1091 case OpARM64GreaterEqual:
1092 return OpARM64LessEqual
1093 case OpARM64GreaterEqualU:
1094 return OpARM64LessEqualU
1095 case OpARM64Equal, OpARM64NotEqual:
1097 case OpARM64LessThanF:
1098 return OpARM64GreaterThanF
1099 case OpARM64GreaterThanF:
1100 return OpARM64LessThanF
1101 case OpARM64LessEqualF:
1102 return OpARM64GreaterEqualF
1103 case OpARM64GreaterEqualF:
1104 return OpARM64LessEqualF
1105 case OpARM64NotLessThanF:
1106 return OpARM64NotGreaterThanF
1107 case OpARM64NotGreaterThanF:
1108 return OpARM64NotLessThanF
1109 case OpARM64NotLessEqualF:
1110 return OpARM64NotGreaterEqualF
1111 case OpARM64NotGreaterEqualF:
1112 return OpARM64NotLessEqualF
1114 panic("unreachable")
1118 // evaluate an ARM64 op against a flags value
1119 // that is potentially constant; return 1 for true,
1120 // -1 for false, and 0 for not constant.
1121 func ccARM64Eval(op Op, flags *Value) int {
1123 if fop == OpARM64InvertFlags {
1124 return -ccARM64Eval(op, flags.Args[0])
1126 if fop != OpARM64FlagConstant {
1129 fc := flagConstant(flags.AuxInt)
1130 b2i := func(b bool) int {
1139 case OpARM64NotEqual:
1141 case OpARM64LessThan:
1143 case OpARM64LessThanU:
1144 return b2i(fc.ult())
1145 case OpARM64GreaterThan:
1147 case OpARM64GreaterThanU:
1148 return b2i(fc.ugt())
1149 case OpARM64LessEqual:
1151 case OpARM64LessEqualU:
1152 return b2i(fc.ule())
1153 case OpARM64GreaterEqual:
1155 case OpARM64GreaterEqualU:
1156 return b2i(fc.uge())
1161 // logRule logs the use of the rule s. This will only be enabled if
1162 // rewrite rules were generated with the -log option, see gen/rulegen.go.
1163 func logRule(s string) {
1164 if ruleFile == nil {
1165 // Open a log file to write log to. We open in append
1166 // mode because all.bash runs the compiler lots of times,
1167 // and we want the concatenation of all of those logs.
1168 // This means, of course, that users need to rm the old log
1169 // to get fresh data.
1170 // TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow?
1171 w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"),
1172 os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
1178 _, err := fmt.Fprintln(ruleFile, s)
1184 var ruleFile io.Writer
1186 func min(x, y int64) int64 {
1193 func isConstZero(v *Value) bool {
1197 case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F:
1198 return v.AuxInt == 0
1203 // reciprocalExact64 reports whether 1/c is exactly representable.
1204 func reciprocalExact64(c float64) bool {
1205 b := math.Float64bits(c)
1206 man := b & (1<<52 - 1)
1208 return false // not a power of 2, denormal, or NaN
1210 exp := b >> 52 & (1<<11 - 1)
1211 // exponent bias is 0x3ff. So taking the reciprocal of a number
1212 // changes the exponent to 0x7fe-exp.
1217 return false // ±inf
1219 return false // exponent is not representable
1225 // reciprocalExact32 reports whether 1/c is exactly representable.
1226 func reciprocalExact32(c float32) bool {
1227 b := math.Float32bits(c)
1228 man := b & (1<<23 - 1)
1230 return false // not a power of 2, denormal, or NaN
1232 exp := b >> 23 & (1<<8 - 1)
1233 // exponent bias is 0x7f. So taking the reciprocal of a number
1234 // changes the exponent to 0xfe-exp.
1239 return false // ±inf
1241 return false // exponent is not representable
1247 // check if an immediate can be directly encoded into an ARM's instruction
1248 func isARMImmRot(v uint32) bool {
1249 for i := 0; i < 16; i++ {
1259 // overlap reports whether the ranges given by the given offset and
1260 // size pairs overlap.
1261 func overlap(offset1, size1, offset2, size2 int64) bool {
1262 if offset1 >= offset2 && offset2+size2 > offset1 {
1265 if offset2 >= offset1 && offset1+size1 > offset2 {
1271 func areAdjacentOffsets(off1, off2, size int64) bool {
1272 return off1+size == off2 || off1 == off2+size
1275 // check if value zeroes out upper 32-bit of 64-bit register.
1276 // depth limits recursion depth. In AMD64.rules 3 is used as limit,
1277 // because it catches same amount of cases as 4.
1278 func zeroUpper32Bits(x *Value, depth int) bool {
1280 case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1,
1281 OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1,
1282 OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload,
1283 OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL,
1284 OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst,
1285 OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst,
1286 OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL,
1287 OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst,
1288 OpAMD64SHLL, OpAMD64SHLLconst:
1291 return x.Type.Size() == 4
1292 case OpPhi, OpSelect0, OpSelect1:
1293 // Phis can use each-other as an arguments, instead of tracking visited values,
1294 // just limit recursion depth.
1298 for i := range x.Args {
1299 if !zeroUpper32Bits(x.Args[i], depth-1) {
1309 // zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits
1310 func zeroUpper48Bits(x *Value, depth int) bool {
1312 case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2:
1315 return x.Type.Size() == 2
1316 case OpPhi, OpSelect0, OpSelect1:
1317 // Phis can use each-other as an arguments, instead of tracking visited values,
1318 // just limit recursion depth.
1322 for i := range x.Args {
1323 if !zeroUpper48Bits(x.Args[i], depth-1) {
1333 // zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits
1334 func zeroUpper56Bits(x *Value, depth int) bool {
1336 case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1:
1339 return x.Type.Size() == 1
1340 case OpPhi, OpSelect0, OpSelect1:
1341 // Phis can use each-other as an arguments, instead of tracking visited values,
1342 // just limit recursion depth.
1346 for i := range x.Args {
1347 if !zeroUpper56Bits(x.Args[i], depth-1) {
1357 // isInlinableMemmove reports whether the given arch performs a Move of the given size
1358 // faster than memmove. It will only return true if replacing the memmove with a Move is
1359 // safe, either because Move is small or because the arguments are disjoint.
1360 // This is used as a check for replacing memmove with Move ops.
1361 func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
1362 // It is always safe to convert memmove into Move when its arguments are disjoint.
1363 // Move ops may or may not be faster for large sizes depending on how the platform
1364 // lowers them, so we only perform this optimization on platforms that we know to
1365 // have fast Move ops.
1368 return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz))
1369 case "386", "arm64":
1371 case "s390x", "ppc64", "ppc64le":
1372 return sz <= 8 || disjoint(dst, sz, src, sz)
1373 case "arm", "mips", "mips64", "mipsle", "mips64le":
1379 // logLargeCopy logs the occurrence of a large copy.
1380 // The best place to do this is in the rewrite rules where the size of the move is easy to find.
1381 // "Large" is arbitrarily chosen to be 128 bytes; this may change.
1382 func logLargeCopy(v *Value, s int64) bool {
1386 if logopt.Enabled() {
1387 logopt.LogOpt(v.Pos, "copy", "lower", v.Block.Func.Name, fmt.Sprintf("%d bytes", s))
1392 // hasSmallRotate reports whether the architecture has rotate instructions
1393 // for sizes < 32-bit. This is used to decide whether to promote some rotations.
1394 func hasSmallRotate(c *Config) bool {
1396 case "amd64", "386":
1403 func newPPC64ShiftAuxInt(sh, mb, me, sz int64) int32 {
1404 if sh < 0 || sh >= sz {
1405 panic("PPC64 shift arg sh out of range")
1407 if mb < 0 || mb >= sz {
1408 panic("PPC64 shift arg mb out of range")
1410 if me < 0 || me >= sz {
1411 panic("PPC64 shift arg me out of range")
1413 return int32(sh<<16 | mb<<8 | me)
1416 func GetPPC64Shiftsh(auxint int64) int64 {
1417 return int64(int8(auxint >> 16))
1420 func GetPPC64Shiftmb(auxint int64) int64 {
1421 return int64(int8(auxint >> 8))
1424 func GetPPC64Shiftme(auxint int64) int64 {
1425 return int64(int8(auxint))
1428 // Test if this value can encoded as a mask for a rlwinm like
1429 // operation. Masks can also extend from the msb and wrap to
1430 // the lsb too. That is, the valid masks are 32 bit strings
1431 // of the form: 0..01..10..0 or 1..10..01..1 or 1...1
1432 func isPPC64WordRotateMask(v64 int64) bool {
1433 // Isolate rightmost 1 (if none 0) and add.
1436 // Likewise, for the wrapping case.
1438 vpn := (vn & -vn) + vn
1439 return (v&vp == 0 || vn&vpn == 0) && v != 0
1442 // Compress mask and shift into single value of the form
1443 // me | mb<<8 | rotate<<16 | nbits<<24 where me and mb can
1444 // be used to regenerate the input mask.
1445 func encodePPC64RotateMask(rotate, mask, nbits int64) int64 {
1446 var mb, me, mbn, men int
1448 // Determine boundaries and then decode them
1449 if mask == 0 || ^mask == 0 || rotate >= nbits {
1450 panic("Invalid PPC64 rotate mask")
1451 } else if nbits == 32 {
1452 mb = bits.LeadingZeros32(uint32(mask))
1453 me = 32 - bits.TrailingZeros32(uint32(mask))
1454 mbn = bits.LeadingZeros32(^uint32(mask))
1455 men = 32 - bits.TrailingZeros32(^uint32(mask))
1457 mb = bits.LeadingZeros64(uint64(mask))
1458 me = 64 - bits.TrailingZeros64(uint64(mask))
1459 mbn = bits.LeadingZeros64(^uint64(mask))
1460 men = 64 - bits.TrailingZeros64(^uint64(mask))
1462 // Check for a wrapping mask (e.g bits at 0 and 63)
1463 if mb == 0 && me == int(nbits) {
1464 // swap the inverted values
1468 return int64(me) | int64(mb<<8) | int64(rotate<<16) | int64(nbits<<24)
1471 // The inverse operation of encodePPC64RotateMask. The values returned as
1472 // mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask.
1473 func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) {
1474 auxint := uint64(sauxint)
1475 rotate = int64((auxint >> 16) & 0xFF)
1476 mb = int64((auxint >> 8) & 0xFF)
1477 me = int64((auxint >> 0) & 0xFF)
1478 nbits := int64((auxint >> 24) & 0xFF)
1479 mask = ((1 << uint(nbits-mb)) - 1) ^ ((1 << uint(nbits-me)) - 1)
1484 mask = uint64(uint32(mask))
1487 // Fixup ME to match ISA definition. The second argument to MASK(..,me)
1489 me = (me - 1) & (nbits - 1)
1493 // This verifies that the mask is a set of
1494 // consecutive bits including the least
1496 func isPPC64ValidShiftMask(v int64) bool {
1497 if (v != 0) && ((v+1)&v) == 0 {
1503 func getPPC64ShiftMaskLength(v int64) int64 {
1504 return int64(bits.Len64(uint64(v)))
1507 // Decompose a shift right into an equivalent rotate/mask,
1508 // and return mask & m.
1509 func mergePPC64RShiftMask(m, s, nbits int64) int64 {
1510 smask := uint64((1<<uint(nbits))-1) >> uint(s)
1511 return m & int64(smask)
1514 // Combine (ANDconst [m] (SRWconst [s])) into (RLWINM [y]) or return 0
1515 func mergePPC64AndSrwi(m, s int64) int64 {
1516 mask := mergePPC64RShiftMask(m, s, 32)
1517 if !isPPC64WordRotateMask(mask) {
1520 return encodePPC64RotateMask((32-s)&31, mask, 32)
1523 // Test if a shift right feeding into a CLRLSLDI can be merged into RLWINM.
1524 // Return the encoded RLWINM constant, or 0 if they cannot be merged.
1525 func mergePPC64ClrlsldiSrw(sld, srw int64) int64 {
1526 mask_1 := uint64(0xFFFFFFFF >> uint(srw))
1527 // for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left.
1528 mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
1530 // Rewrite mask to apply after the final left shift.
1531 mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
1534 r_2 := GetPPC64Shiftsh(sld)
1535 r_3 := (r_1 + r_2) & 31 // This can wrap.
1537 if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
1540 return encodePPC64RotateMask(int64(r_3), int64(mask_3), 32)
1543 // Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM. Return
1544 // the encoded RLWINM constant, or 0 if they cannot be merged.
1545 func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 {
1546 r_1, _, _, mask_1 := DecodePPC64RotateMask(rlw)
1547 // for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left.
1548 mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
1550 // combine the masks, and adjust for the final left shift.
1551 mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(int64(sld)))
1552 r_2 := GetPPC64Shiftsh(int64(sld))
1553 r_3 := (r_1 + r_2) & 31 // This can wrap.
1555 // Verify the result is still a valid bitmask of <= 32 bits.
1556 if !isPPC64WordRotateMask(int64(mask_3)) || uint64(uint32(mask_3)) != mask_3 {
1559 return encodePPC64RotateMask(r_3, int64(mask_3), 32)
1562 // Compute the encoded RLWINM constant from combining (SLDconst [sld] (SRWconst [srw] x)),
1563 // or return 0 if they cannot be combined.
1564 func mergePPC64SldiSrw(sld, srw int64) int64 {
1565 if sld > srw || srw >= 32 {
1568 mask_r := uint32(0xFFFFFFFF) >> uint(srw)
1569 mask_l := uint32(0xFFFFFFFF) >> uint(sld)
1570 mask := (mask_r & mask_l) << uint(sld)
1571 return encodePPC64RotateMask((32-srw+sld)&31, int64(mask), 32)
1574 // Convenience function to rotate a 32 bit constant value by another constant.
1575 func rotateLeft32(v, rotate int64) int64 {
1576 return int64(bits.RotateLeft32(uint32(v), int(rotate)))
1579 func rotateRight64(v, rotate int64) int64 {
1580 return int64(bits.RotateLeft64(uint64(v), int(-rotate)))
1583 // encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format.
1584 func armBFAuxInt(lsb, width int64) arm64BitField {
1585 if lsb < 0 || lsb > 63 {
1586 panic("ARM(64) bit field lsb constant out of range")
1588 if width < 1 || lsb+width > 64 {
1589 panic("ARM(64) bit field width constant out of range")
1591 return arm64BitField(width | lsb<<8)
1594 // returns the lsb part of the auxInt field of arm64 bitfield ops.
1595 func (bfc arm64BitField) getARM64BFlsb() int64 {
1596 return int64(uint64(bfc) >> 8)
1599 // returns the width part of the auxInt field of arm64 bitfield ops.
1600 func (bfc arm64BitField) getARM64BFwidth() int64 {
1601 return int64(bfc) & 0xff
1604 // checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask.
1605 func isARM64BFMask(lsb, mask, rshift int64) bool {
1606 shiftedMask := int64(uint64(mask) >> uint64(rshift))
1607 return shiftedMask != 0 && isPowerOfTwo64(shiftedMask+1) && nto(shiftedMask)+lsb < 64
1610 // returns the bitfield width of mask >> rshift for arm64 bitfield ops
1611 func arm64BFWidth(mask, rshift int64) int64 {
1612 shiftedMask := int64(uint64(mask) >> uint64(rshift))
1613 if shiftedMask == 0 {
1614 panic("ARM64 BF mask is zero")
1616 return nto(shiftedMask)
1619 // sizeof returns the size of t in bytes.
1620 // It will panic if t is not a *types.Type.
1621 func sizeof(t interface{}) int64 {
1622 return t.(*types.Type).Size()
1625 // registerizable reports whether t is a primitive type that fits in
1626 // a register. It assumes float64 values will always fit into registers
1627 // even if that isn't strictly true.
1628 func registerizable(b *Block, typ *types.Type) bool {
1629 if typ.IsPtrShaped() || typ.IsFloat() {
1632 if typ.IsInteger() {
1633 return typ.Size() <= b.Func.Config.RegSize
1638 // needRaceCleanup reports whether this call to racefuncenter/exit isn't needed.
1639 func needRaceCleanup(sym *AuxCall, v *Value) bool {
1644 if !isSameCall(sym, "runtime.racefuncenter") && !isSameCall(sym, "runtime.racefuncexit") {
1647 for _, b := range f.Blocks {
1648 for _, v := range b.Values {
1650 case OpStaticCall, OpStaticLECall:
1651 // Check for racefuncenter will encounter racefuncexit and vice versa.
1652 // Allow calls to panic*
1653 s := v.Aux.(*AuxCall).Fn.String()
1655 case "runtime.racefuncenter", "runtime.racefuncexit",
1656 "runtime.panicdivide", "runtime.panicwrap",
1657 "runtime.panicshift":
1660 // If we encountered any call, we need to keep racefunc*,
1661 // for accurate stacktraces.
1663 case OpPanicBounds, OpPanicExtend:
1664 // Note: these are panic generators that are ok (like the static calls above).
1665 case OpClosureCall, OpInterCall, OpClosureLECall, OpInterLECall:
1666 // We must keep the race functions if there are any other call types.
1671 if isSameCall(sym, "runtime.racefuncenter") {
1672 // TODO REGISTER ABI this needs to be cleaned up.
1673 // If we're removing racefuncenter, remove its argument as well.
1674 if v.Args[0].Op != OpStore {
1675 if v.Op == OpStaticLECall {
1676 // there is no store, yet.
1681 mem := v.Args[0].Args[2]
1682 v.Args[0].reset(OpCopy)
1683 v.Args[0].AddArg(mem)
1688 // symIsRO reports whether sym is a read-only global.
1689 func symIsRO(sym interface{}) bool {
1690 lsym := sym.(*obj.LSym)
1691 return lsym.Type == objabi.SRODATA && len(lsym.R) == 0
1694 // symIsROZero reports whether sym is a read-only global whose data contains all zeros.
1695 func symIsROZero(sym Sym) bool {
1696 lsym := sym.(*obj.LSym)
1697 if lsym.Type != objabi.SRODATA || len(lsym.R) != 0 {
1700 for _, b := range lsym.P {
1708 // read8 reads one byte from the read-only global sym at offset off.
1709 func read8(sym interface{}, off int64) uint8 {
1710 lsym := sym.(*obj.LSym)
1711 if off >= int64(len(lsym.P)) || off < 0 {
1712 // Invalid index into the global sym.
1713 // This can happen in dead code, so we don't want to panic.
1714 // Just return any value, it will eventually get ignored.
1721 // read16 reads two bytes from the read-only global sym at offset off.
1722 func read16(sym interface{}, off int64, byteorder binary.ByteOrder) uint16 {
1723 lsym := sym.(*obj.LSym)
1724 // lsym.P is written lazily.
1725 // Bytes requested after the end of lsym.P are 0.
1727 if 0 <= off && off < int64(len(lsym.P)) {
1730 buf := make([]byte, 2)
1732 return byteorder.Uint16(buf)
1735 // read32 reads four bytes from the read-only global sym at offset off.
1736 func read32(sym interface{}, off int64, byteorder binary.ByteOrder) uint32 {
1737 lsym := sym.(*obj.LSym)
1739 if 0 <= off && off < int64(len(lsym.P)) {
1742 buf := make([]byte, 4)
1744 return byteorder.Uint32(buf)
1747 // read64 reads eight bytes from the read-only global sym at offset off.
1748 func read64(sym interface{}, off int64, byteorder binary.ByteOrder) uint64 {
1749 lsym := sym.(*obj.LSym)
1751 if 0 <= off && off < int64(len(lsym.P)) {
1754 buf := make([]byte, 8)
1756 return byteorder.Uint64(buf)
1759 // sequentialAddresses reports true if it can prove that x + n == y
1760 func sequentialAddresses(x, y *Value, n int64) bool {
1761 if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil &&
1762 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1763 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1766 if x.Op == Op386LEAL1 && y.Op == Op386LEAL1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
1767 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1768 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1771 if x.Op == OpAMD64ADDQ && y.Op == OpAMD64LEAQ1 && y.AuxInt == n && y.Aux == nil &&
1772 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1773 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1776 if x.Op == OpAMD64LEAQ1 && y.Op == OpAMD64LEAQ1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
1777 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1778 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1784 // flagConstant represents the result of a compile-time comparison.
1785 // The sense of these flags does not necessarily represent the hardware's notion
1786 // of a flags register - these are just a compile-time construct.
1787 // We happen to match the semantics to those of arm/arm64.
1788 // Note that these semantics differ from x86: the carry flag has the opposite
1789 // sense on a subtraction!
1790 // On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C.
1791 // On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C.
1792 // (because it does x + ^y + C).
1793 // See https://en.wikipedia.org/wiki/Carry_flag#Vs._borrow_flag
1794 type flagConstant uint8
1796 // N reports whether the result of an operation is negative (high bit set).
1797 func (fc flagConstant) N() bool {
1801 // Z reports whether the result of an operation is 0.
1802 func (fc flagConstant) Z() bool {
1806 // C reports whether an unsigned add overflowed (carry), or an
1807 // unsigned subtract did not underflow (borrow).
1808 func (fc flagConstant) C() bool {
1812 // V reports whether a signed operation overflowed or underflowed.
1813 func (fc flagConstant) V() bool {
1817 func (fc flagConstant) eq() bool {
1820 func (fc flagConstant) ne() bool {
1823 func (fc flagConstant) lt() bool {
1824 return fc.N() != fc.V()
1826 func (fc flagConstant) le() bool {
1827 return fc.Z() || fc.lt()
1829 func (fc flagConstant) gt() bool {
1830 return !fc.Z() && fc.ge()
1832 func (fc flagConstant) ge() bool {
1833 return fc.N() == fc.V()
1835 func (fc flagConstant) ult() bool {
1838 func (fc flagConstant) ule() bool {
1839 return fc.Z() || fc.ult()
1841 func (fc flagConstant) ugt() bool {
1842 return !fc.Z() && fc.uge()
1844 func (fc flagConstant) uge() bool {
1848 func (fc flagConstant) ltNoov() bool {
1849 return fc.lt() && !fc.V()
1851 func (fc flagConstant) leNoov() bool {
1852 return fc.le() && !fc.V()
1854 func (fc flagConstant) gtNoov() bool {
1855 return fc.gt() && !fc.V()
1857 func (fc flagConstant) geNoov() bool {
1858 return fc.ge() && !fc.V()
1861 func (fc flagConstant) String() string {
1862 return fmt.Sprintf("N=%v,Z=%v,C=%v,V=%v", fc.N(), fc.Z(), fc.C(), fc.V())
1865 type flagConstantBuilder struct {
1872 func (fcs flagConstantBuilder) encode() flagConstant {
1889 // Note: addFlags(x,y) != subFlags(x,-y) in some situations:
1890 // - the results of the C flag are different
1891 // - the results of the V flag when y==minint are different
1893 // addFlags64 returns the flags that would be set from computing x+y.
1894 func addFlags64(x, y int64) flagConstant {
1895 var fcb flagConstantBuilder
1898 fcb.C = uint64(x+y) < uint64(x)
1899 fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
1903 // subFlags64 returns the flags that would be set from computing x-y.
1904 func subFlags64(x, y int64) flagConstant {
1905 var fcb flagConstantBuilder
1908 fcb.C = uint64(y) <= uint64(x) // This code follows the arm carry flag model.
1909 fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
1913 // addFlags32 returns the flags that would be set from computing x+y.
1914 func addFlags32(x, y int32) flagConstant {
1915 var fcb flagConstantBuilder
1918 fcb.C = uint32(x+y) < uint32(x)
1919 fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
1923 // subFlags32 returns the flags that would be set from computing x-y.
1924 func subFlags32(x, y int32) flagConstant {
1925 var fcb flagConstantBuilder
1928 fcb.C = uint32(y) <= uint32(x) // This code follows the arm carry flag model.
1929 fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
1933 // logicFlags64 returns flags set to the sign/zeroness of x.
1934 // C and V are set to false.
1935 func logicFlags64(x int64) flagConstant {
1936 var fcb flagConstantBuilder
1942 // logicFlags32 returns flags set to the sign/zeroness of x.
1943 // C and V are set to false.
1944 func logicFlags32(x int32) flagConstant {
1945 var fcb flagConstantBuilder