1 // Copyright 2015 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
8 "cmd/compile/internal/logopt"
9 "cmd/compile/internal/types"
11 "cmd/internal/obj/s390x"
23 type deadValueChoice bool
26 leaveDeadValues deadValueChoice = false
27 removeDeadValues = true
30 // deadcode indicates whether rewrite should try to remove any values that become dead.
31 func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValueChoice) {
32 // repeat rewrites until we find no more rewrites
33 pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block
37 fmt.Printf("%s: rewriting for %s\n", f.pass.name, f.Name)
40 var states map[string]bool
43 for _, b := range f.Blocks {
48 b0.Succs = append([]Edge{}, b.Succs...) // make a new copy, not aliasing
50 for i, c := range b.ControlValues() {
53 b.ReplaceControl(i, c)
59 fmt.Printf("rewriting %s -> %s\n", b0.LongString(), b.LongString())
62 for j, v := range b.Values {
67 v0.Args = append([]*Value{}, v.Args...) // make a new copy, not aliasing
69 if v.Uses == 0 && v.removeable() {
70 if v.Op != OpInvalid && deadcode == removeDeadValues {
71 // Reset any values that are now unused, so that we decrement
72 // the use count of all of its arguments.
73 // Not quite a deadcode pass, because it does not handle cycles.
74 // But it should help Uses==1 rules to fire.
78 // No point rewriting values which aren't used.
82 vchange := phielimValue(v)
83 if vchange && debug > 1 {
84 fmt.Printf("rewriting %s -> %s\n", v0.LongString(), v.LongString())
87 // Eliminate copy inputs.
88 // If any copy input becomes unused, mark it
89 // as invalid and discard its argument. Repeat
90 // recursively on the discarded argument.
91 // This phase helps remove phantom "dead copy" uses
92 // of a value so that a x.Uses==1 rule condition
94 for i, a := range v.Args {
100 // If a, a copy, has a line boundary indicator, attempt to find a new value
101 // to hold it. The first candidate is the value that will replace a (aa),
102 // if it shares the same block and line and is eligible.
103 // The second option is v, which has a as an input. Because aa is earlier in
104 // the data flow, it is the better choice.
105 if a.Pos.IsStmt() == src.PosIsStmt {
106 if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt {
107 aa.Pos = aa.Pos.WithIsStmt()
108 } else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt {
109 v.Pos = v.Pos.WithIsStmt()
111 // Record the lost line and look for a new home after all rewrites are complete.
112 // TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same
113 // line to appear in more than one block, but only one block is stored, so if both end
114 // up here, then one will be lost.
115 pendingLines.set(a.Pos, int32(a.Block.ID))
117 a.Pos = a.Pos.WithNotStmt()
126 if vchange && debug > 1 {
127 fmt.Printf("rewriting %s -> %s\n", v0.LongString(), v.LongString())
130 // apply rewrite function
133 // If value changed to a poor choice for a statement boundary, move the boundary
134 if v.Pos.IsStmt() == src.PosIsStmt {
135 if k := nextGoodStatementIndex(v, j, b); k != j {
136 v.Pos = v.Pos.WithNotStmt()
137 b.Values[k].Pos = b.Values[k].Pos.WithIsStmt()
142 change = change || vchange
143 if vchange && debug > 1 {
144 fmt.Printf("rewriting %s -> %s\n", v0.LongString(), v.LongString())
152 if iters > 1000 || debug >= 2 {
153 // We've done a suspiciously large number of rewrites (or we're in debug mode).
154 // As of Sep 2021, 90% of rewrites complete in 4 iterations or fewer
155 // and the maximum value encountered during make.bash is 12.
156 // Start checking for cycles. (This is too expensive to do routinely.)
158 states = make(map[string]bool)
161 if _, ok := states[h]; ok {
162 // We've found a cycle.
163 // To diagnose it, set debug to 2 and start again,
164 // so that we'll print all rules applied until we complete another cycle.
165 // If debug is already >= 2, we've already done that, so it's time to crash.
168 states = make(map[string]bool)
170 f.Fatalf("rewrite cycle detected")
176 // remove clobbered values
177 for _, b := range f.Blocks {
179 for i, v := range b.Values {
181 if v.Op == OpInvalid {
182 if v.Pos.IsStmt() == src.PosIsStmt {
183 pendingLines.set(vl, int32(b.ID))
188 if v.Pos.IsStmt() != src.PosNotStmt && !notStmtBoundary(v.Op) && pendingLines.get(vl) == int32(b.ID) {
189 pendingLines.remove(vl)
190 v.Pos = v.Pos.WithIsStmt()
197 if pendingLines.get(b.Pos) == int32(b.ID) {
198 b.Pos = b.Pos.WithIsStmt()
199 pendingLines.remove(b.Pos)
205 // Common functions called from rewriting rules
207 func is64BitFloat(t *types.Type) bool {
208 return t.Size() == 8 && t.IsFloat()
211 func is32BitFloat(t *types.Type) bool {
212 return t.Size() == 4 && t.IsFloat()
215 func is64BitInt(t *types.Type) bool {
216 return t.Size() == 8 && t.IsInteger()
219 func is32BitInt(t *types.Type) bool {
220 return t.Size() == 4 && t.IsInteger()
223 func is16BitInt(t *types.Type) bool {
224 return t.Size() == 2 && t.IsInteger()
227 func is8BitInt(t *types.Type) bool {
228 return t.Size() == 1 && t.IsInteger()
231 func isPtr(t *types.Type) bool {
232 return t.IsPtrShaped()
235 func isSigned(t *types.Type) bool {
239 // mergeSym merges two symbolic offsets. There is no real merging of
240 // offsets, we just pick the non-nil one.
241 func mergeSym(x, y Sym) Sym {
248 panic(fmt.Sprintf("mergeSym with two non-nil syms %v %v", x, y))
251 func canMergeSym(x, y Sym) bool {
252 return x == nil || y == nil
255 // canMergeLoadClobber reports whether the load can be merged into target without
256 // invalidating the schedule.
257 // It also checks that the other non-load argument x is something we
258 // are ok with clobbering.
259 func canMergeLoadClobber(target, load, x *Value) bool {
260 // The register containing x is going to get clobbered.
261 // Don't merge if we still need the value of x.
262 // We don't have liveness information here, but we can
263 // approximate x dying with:
264 // 1) target is x's only use.
265 // 2) target is not in a deeper loop than x.
269 loopnest := x.Block.Func.loopnest()
270 loopnest.calculateDepths()
271 if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
274 return canMergeLoad(target, load)
277 // canMergeLoad reports whether the load can be merged into target without
278 // invalidating the schedule.
279 func canMergeLoad(target, load *Value) bool {
280 if target.Block.ID != load.Block.ID {
281 // If the load is in a different block do not merge it.
285 // We can't merge the load into the target if the load
286 // has more than one use.
291 mem := load.MemoryArg()
293 // We need the load's memory arg to still be alive at target. That
294 // can't be the case if one of target's args depends on a memory
295 // state that is a successor of load's memory arg.
297 // For example, it would be invalid to merge load into target in
298 // the following situation because newmem has killed oldmem
299 // before target is reached:
300 // load = read ... oldmem
301 // newmem = write ... oldmem
302 // arg0 = read ... newmem
303 // target = add arg0 load
305 // If the argument comes from a different block then we can exclude
306 // it immediately because it must dominate load (which is in the
307 // same block as target).
309 for _, a := range target.Args {
310 if a != load && a.Block.ID == target.Block.ID {
311 args = append(args, a)
315 // memPreds contains memory states known to be predecessors of load's
316 // memory state. It is lazily initialized.
317 var memPreds map[*Value]bool
318 for i := 0; len(args) > 0; i++ {
321 // Give up if we have done a lot of iterations.
324 v := args[len(args)-1]
325 args = args[:len(args)-1]
326 if target.Block.ID != v.Block.ID {
327 // Since target and load are in the same block
328 // we can stop searching when we leave the block.
332 // A Phi implies we have reached the top of the block.
333 // The memory phi, if it exists, is always
334 // the first logical store in the block.
337 if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
338 // We could handle this situation however it is likely
342 if v.Op.SymEffect()&SymAddr != 0 {
343 // This case prevents an operation that calculates the
344 // address of a local variable from being forced to schedule
345 // before its corresponding VarDef.
351 // We don't want to combine the CMPQ with the load, because
352 // that would force the CMPQ to schedule before the VARDEF, which
353 // in turn requires the LEAQ to schedule before the VARDEF.
356 if v.Type.IsMemory() {
358 // Initialise a map containing memory states
359 // known to be predecessors of load's memory
361 memPreds = make(map[*Value]bool)
364 for i := 0; i < limit; i++ {
366 // The memory phi, if it exists, is always
367 // the first logical store in the block.
370 if m.Block.ID != target.Block.ID {
373 if !m.Type.IsMemory() {
377 if len(m.Args) == 0 {
384 // We can merge if v is a predecessor of mem.
386 // For example, we can merge load into target in the
387 // following scenario:
390 // load = read ... mem
391 // target = add x load
397 if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
398 // If v takes mem as an input then we know mem
399 // is valid at this point.
402 for _, a := range v.Args {
403 if target.Block.ID == a.Block.ID {
404 args = append(args, a)
412 // isSameCall reports whether sym is the same as the given named symbol
413 func isSameCall(sym interface{}, name string) bool {
414 fn := sym.(*AuxCall).Fn
415 return fn != nil && fn.String() == name
418 // nlz returns the number of leading zeros.
419 func nlz64(x int64) int { return bits.LeadingZeros64(uint64(x)) }
420 func nlz32(x int32) int { return bits.LeadingZeros32(uint32(x)) }
421 func nlz16(x int16) int { return bits.LeadingZeros16(uint16(x)) }
422 func nlz8(x int8) int { return bits.LeadingZeros8(uint8(x)) }
424 // ntzX returns the number of trailing zeros.
425 func ntz64(x int64) int { return bits.TrailingZeros64(uint64(x)) }
426 func ntz32(x int32) int { return bits.TrailingZeros32(uint32(x)) }
427 func ntz16(x int16) int { return bits.TrailingZeros16(uint16(x)) }
428 func ntz8(x int8) int { return bits.TrailingZeros8(uint8(x)) }
430 func oneBit(x int64) bool { return x&(x-1) == 0 && x != 0 }
431 func oneBit8(x int8) bool { return x&(x-1) == 0 && x != 0 }
432 func oneBit16(x int16) bool { return x&(x-1) == 0 && x != 0 }
433 func oneBit32(x int32) bool { return x&(x-1) == 0 && x != 0 }
434 func oneBit64(x int64) bool { return x&(x-1) == 0 && x != 0 }
436 // nto returns the number of trailing ones.
437 func nto(x int64) int64 {
438 return int64(ntz64(^x))
441 // logX returns logarithm of n base 2.
442 // n must be a positive power of 2 (isPowerOfTwoX returns true).
443 func log8(n int8) int64 {
444 return int64(bits.Len8(uint8(n))) - 1
446 func log16(n int16) int64 {
447 return int64(bits.Len16(uint16(n))) - 1
449 func log32(n int32) int64 {
450 return int64(bits.Len32(uint32(n))) - 1
452 func log64(n int64) int64 {
453 return int64(bits.Len64(uint64(n))) - 1
456 // log2uint32 returns logarithm in base 2 of uint32(n), with log2(0) = -1.
458 func log2uint32(n int64) int64 {
459 return int64(bits.Len32(uint32(n))) - 1
462 // isPowerOfTwo functions report whether n is a power of 2.
463 func isPowerOfTwo8(n int8) bool {
464 return n > 0 && n&(n-1) == 0
466 func isPowerOfTwo16(n int16) bool {
467 return n > 0 && n&(n-1) == 0
469 func isPowerOfTwo32(n int32) bool {
470 return n > 0 && n&(n-1) == 0
472 func isPowerOfTwo64(n int64) bool {
473 return n > 0 && n&(n-1) == 0
476 // isUint64PowerOfTwo reports whether uint64(n) is a power of 2.
477 func isUint64PowerOfTwo(in int64) bool {
479 return n > 0 && n&(n-1) == 0
482 // isUint32PowerOfTwo reports whether uint32(n) is a power of 2.
483 func isUint32PowerOfTwo(in int64) bool {
484 n := uint64(uint32(in))
485 return n > 0 && n&(n-1) == 0
488 // is32Bit reports whether n can be represented as a signed 32 bit integer.
489 func is32Bit(n int64) bool {
490 return n == int64(int32(n))
493 // is16Bit reports whether n can be represented as a signed 16 bit integer.
494 func is16Bit(n int64) bool {
495 return n == int64(int16(n))
498 // is8Bit reports whether n can be represented as a signed 8 bit integer.
499 func is8Bit(n int64) bool {
500 return n == int64(int8(n))
503 // isU8Bit reports whether n can be represented as an unsigned 8 bit integer.
504 func isU8Bit(n int64) bool {
505 return n == int64(uint8(n))
508 // isU12Bit reports whether n can be represented as an unsigned 12 bit integer.
509 func isU12Bit(n int64) bool {
510 return 0 <= n && n < (1<<12)
513 // isU16Bit reports whether n can be represented as an unsigned 16 bit integer.
514 func isU16Bit(n int64) bool {
515 return n == int64(uint16(n))
518 // isU32Bit reports whether n can be represented as an unsigned 32 bit integer.
519 func isU32Bit(n int64) bool {
520 return n == int64(uint32(n))
523 // is20Bit reports whether n can be represented as a signed 20 bit integer.
524 func is20Bit(n int64) bool {
525 return -(1<<19) <= n && n < (1<<19)
528 // b2i translates a boolean value to 0 or 1 for assigning to auxInt.
529 func b2i(b bool) int64 {
536 // b2i32 translates a boolean value to 0 or 1.
537 func b2i32(b bool) int32 {
544 // shiftIsBounded reports whether (left/right) shift Value v is known to be bounded.
545 // A shift is bounded if it is shifting by less than the width of the shifted value.
546 func shiftIsBounded(v *Value) bool {
550 // canonLessThan returns whether x is "ordered" less than y, for purposes of normalizing
551 // generated code as much as possible.
552 func canonLessThan(x, y *Value) bool {
556 if !x.Pos.SameFileAndLine(y.Pos) {
557 return x.Pos.Before(y.Pos)
562 // truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern
563 // of the mantissa. It will panic if the truncation results in lost information.
564 func truncate64Fto32F(f float64) float32 {
565 if !isExactFloat32(f) {
566 panic("truncate64Fto32F: truncation is not exact")
571 // NaN bit patterns aren't necessarily preserved across conversion
572 // instructions so we need to do the conversion manually.
573 b := math.Float64bits(f)
574 m := b & ((1 << 52) - 1) // mantissa (a.k.a. significand)
575 // | sign | exponent | mantissa |
576 r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23)))
577 return math.Float32frombits(r)
580 // extend32Fto64F converts a float32 value to a float64 value preserving the bit
581 // pattern of the mantissa.
582 func extend32Fto64F(f float32) float64 {
583 if !math.IsNaN(float64(f)) {
586 // NaN bit patterns aren't necessarily preserved across conversion
587 // instructions so we need to do the conversion manually.
588 b := uint64(math.Float32bits(f))
589 // | sign | exponent | mantissa |
590 r := ((b << 32) & (1 << 63)) | (0x7ff << 52) | ((b & 0x7fffff) << (52 - 23))
591 return math.Float64frombits(r)
594 // DivisionNeedsFixUp reports whether the division needs fix-up code.
595 func DivisionNeedsFixUp(v *Value) bool {
599 // auxFrom64F encodes a float64 value so it can be stored in an AuxInt.
600 func auxFrom64F(f float64) int64 {
602 panic("can't encode a NaN in AuxInt field")
604 return int64(math.Float64bits(f))
607 // auxFrom32F encodes a float32 value so it can be stored in an AuxInt.
608 func auxFrom32F(f float32) int64 {
610 panic("can't encode a NaN in AuxInt field")
612 return int64(math.Float64bits(extend32Fto64F(f)))
615 // auxTo32F decodes a float32 from the AuxInt value provided.
616 func auxTo32F(i int64) float32 {
617 return truncate64Fto32F(math.Float64frombits(uint64(i)))
620 // auxTo64F decodes a float64 from the AuxInt value provided.
621 func auxTo64F(i int64) float64 {
622 return math.Float64frombits(uint64(i))
625 func auxIntToBool(i int64) bool {
631 func auxIntToInt8(i int64) int8 {
634 func auxIntToInt16(i int64) int16 {
637 func auxIntToInt32(i int64) int32 {
640 func auxIntToInt64(i int64) int64 {
643 func auxIntToUint8(i int64) uint8 {
646 func auxIntToFloat32(i int64) float32 {
647 return float32(math.Float64frombits(uint64(i)))
649 func auxIntToFloat64(i int64) float64 {
650 return math.Float64frombits(uint64(i))
652 func auxIntToValAndOff(i int64) ValAndOff {
655 func auxIntToArm64BitField(i int64) arm64BitField {
656 return arm64BitField(i)
658 func auxIntToInt128(x int64) int128 {
660 panic("nonzero int128 not allowed")
664 func auxIntToFlagConstant(x int64) flagConstant {
665 return flagConstant(x)
668 func auxIntToOp(cc int64) Op {
672 func boolToAuxInt(b bool) int64 {
678 func int8ToAuxInt(i int8) int64 {
681 func int16ToAuxInt(i int16) int64 {
684 func int32ToAuxInt(i int32) int64 {
687 func int64ToAuxInt(i int64) int64 {
690 func uint8ToAuxInt(i uint8) int64 {
691 return int64(int8(i))
693 func float32ToAuxInt(f float32) int64 {
694 return int64(math.Float64bits(float64(f)))
696 func float64ToAuxInt(f float64) int64 {
697 return int64(math.Float64bits(f))
699 func valAndOffToAuxInt(v ValAndOff) int64 {
702 func arm64BitFieldToAuxInt(v arm64BitField) int64 {
705 func int128ToAuxInt(x int128) int64 {
707 panic("nonzero int128 not allowed")
711 func flagConstantToAuxInt(x flagConstant) int64 {
715 func opToAuxInt(o Op) int64 {
719 // Aux is an interface to hold miscellaneous data in Blocks and Values.
724 // stringAux wraps string values for use in Aux.
725 type stringAux string
727 func (stringAux) CanBeAnSSAAux() {}
729 func auxToString(i Aux) string {
730 return string(i.(stringAux))
732 func auxToSym(i Aux) Sym {
733 // TODO: kind of a hack - allows nil interface through
737 func auxToType(i Aux) *types.Type {
738 return i.(*types.Type)
740 func auxToCall(i Aux) *AuxCall {
743 func auxToS390xCCMask(i Aux) s390x.CCMask {
744 return i.(s390x.CCMask)
746 func auxToS390xRotateParams(i Aux) s390x.RotateParams {
747 return i.(s390x.RotateParams)
750 func StringToAux(s string) Aux {
753 func symToAux(s Sym) Aux {
756 func callToAux(s *AuxCall) Aux {
759 func typeToAux(t *types.Type) Aux {
762 func s390xCCMaskToAux(c s390x.CCMask) Aux {
765 func s390xRotateParamsToAux(r s390x.RotateParams) Aux {
769 // uaddOvf reports whether unsigned a+b would overflow.
770 func uaddOvf(a, b int64) bool {
771 return uint64(a)+uint64(b) < uint64(a)
774 // loadLSymOffset simulates reading a word at an offset into a
775 // read-only symbol's runtime memory. If it would read a pointer to
776 // another symbol, that symbol is returned. Otherwise, it returns nil.
777 func loadLSymOffset(lsym *obj.LSym, offset int64) *obj.LSym {
778 if lsym.Type != objabi.SRODATA {
782 for _, r := range lsym.R {
783 if int64(r.Off) == offset && r.Type&^objabi.R_WEAK == objabi.R_ADDR && r.Add == 0 {
791 // de-virtualize an InterLECall
792 // 'sym' is the symbol for the itab
793 func devirtLESym(v *Value, aux Aux, sym Sym, offset int64) *obj.LSym {
794 n, ok := sym.(*obj.LSym)
799 lsym := loadLSymOffset(n, offset)
800 if f := v.Block.Func; f.pass.debug > 0 {
802 f.Warnl(v.Pos, "de-virtualizing call")
804 f.Warnl(v.Pos, "couldn't de-virtualize call")
810 func devirtLECall(v *Value, sym *obj.LSym) *Value {
811 v.Op = OpStaticLECall
812 auxcall := v.Aux.(*AuxCall)
818 // isSamePtr reports whether p1 and p2 point to the same address.
819 func isSamePtr(p1, p2 *Value) bool {
828 return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
829 case OpAddr, OpLocalAddr:
830 // OpAddr's 0th arg is either OpSP or OpSB, which means that it is uniquely identified by its Op.
831 // Checking for value equality only works after [z]cse has run.
832 return p1.Aux == p2.Aux && p1.Args[0].Op == p2.Args[0].Op
834 return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
839 func isStackPtr(v *Value) bool {
840 for v.Op == OpOffPtr || v.Op == OpAddPtr {
843 return v.Op == OpSP || v.Op == OpLocalAddr
846 // disjoint reports whether the memory region specified by [p1:p1+n1)
847 // does not overlap with [p2:p2+n2).
848 // A return value of false does not imply the regions overlap.
849 func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool {
850 if n1 == 0 || n2 == 0 {
856 baseAndOffset := func(ptr *Value) (base *Value, offset int64) {
857 base, offset = ptr, 0
858 for base.Op == OpOffPtr {
859 offset += base.AuxInt
864 p1, off1 := baseAndOffset(p1)
865 p2, off2 := baseAndOffset(p2)
866 if isSamePtr(p1, p2) {
867 return !overlap(off1, n1, off2, n2)
869 // p1 and p2 are not the same, so if they are both OpAddrs then
870 // they point to different variables.
871 // If one pointer is on the stack and the other is an argument
872 // then they can't overlap.
874 case OpAddr, OpLocalAddr:
875 if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP {
878 return (p2.Op == OpArg || p2.Op == OpArgIntReg) && p1.Args[0].Op == OpSP
879 case OpArg, OpArgIntReg:
880 if p2.Op == OpSP || p2.Op == OpLocalAddr {
884 return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpArgIntReg || p2.Op == OpSP
889 // moveSize returns the number of bytes an aligned MOV instruction moves
890 func moveSize(align int64, c *Config) int64 {
892 case align%8 == 0 && c.PtrSize == 8:
902 // mergePoint finds a block among a's blocks which dominates b and is itself
903 // dominated by all of a's blocks. Returns nil if it can't find one.
904 // Might return nil even if one does exist.
905 func mergePoint(b *Block, a ...*Value) *Block {
906 // Walk backward from b looking for one of the a's blocks.
912 for _, x := range a {
917 if len(b.Preds) > 1 {
918 // Don't know which way to go back. Abort.
924 return nil // too far away
926 // At this point, r is the first value in a that we find by walking backwards.
927 // if we return anything, r will be it.
930 // Keep going, counting the other a's that we find. They must all dominate r.
933 for _, x := range a {
939 // Found all of a in a backwards walk. We can return r.
942 if len(b.Preds) > 1 {
949 return nil // too far away
952 // clobber invalidates values. Returns true.
953 // clobber is used by rewrite rules to:
954 // A) make sure the values are really dead and never used again.
955 // B) decrement use counts of the values' args.
956 func clobber(vv ...*Value) bool {
957 for _, v := range vv {
959 // Note: leave v.Block intact. The Block field is used after clobber.
964 // clobberIfDead resets v when use count is 1. Returns true.
965 // clobberIfDead is used by rewrite rules to decrement
966 // use counts of v's args when v is dead and never used.
967 func clobberIfDead(v *Value) bool {
971 // Note: leave v.Block intact. The Block field is used after clobberIfDead.
975 // noteRule is an easy way to track if a rule is matched when writing
976 // new ones. Make the rule of interest also conditional on
977 // noteRule("note to self: rule of interest matched")
978 // and that message will print when the rule matches.
979 func noteRule(s string) bool {
984 // countRule increments Func.ruleMatches[key].
985 // If Func.ruleMatches is non-nil at the end
986 // of compilation, it will be printed to stdout.
987 // This is intended to make it easier to find which functions
988 // which contain lots of rules matches when developing new rules.
989 func countRule(v *Value, key string) bool {
991 if f.ruleMatches == nil {
992 f.ruleMatches = make(map[string]int)
998 // warnRule generates compiler debug output with string s when
999 // v is not in autogenerated code, cond is true and the rule has fired.
1000 func warnRule(cond bool, v *Value, s string) bool {
1001 if pos := v.Pos; pos.Line() > 1 && cond {
1002 v.Block.Func.Warnl(pos, s)
1007 // for a pseudo-op like (LessThan x), extract x
1008 func flagArg(v *Value) *Value {
1009 if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() {
1015 // arm64Negate finds the complement to an ARM64 condition code,
1016 // for example !Equal -> NotEqual or !LessThan -> GreaterEqual
1018 // For floating point, it's more subtle because NaN is unordered. We do
1019 // !LessThanF -> NotLessThanF, the latter takes care of NaNs.
1020 func arm64Negate(op Op) Op {
1022 case OpARM64LessThan:
1023 return OpARM64GreaterEqual
1024 case OpARM64LessThanU:
1025 return OpARM64GreaterEqualU
1026 case OpARM64GreaterThan:
1027 return OpARM64LessEqual
1028 case OpARM64GreaterThanU:
1029 return OpARM64LessEqualU
1030 case OpARM64LessEqual:
1031 return OpARM64GreaterThan
1032 case OpARM64LessEqualU:
1033 return OpARM64GreaterThanU
1034 case OpARM64GreaterEqual:
1035 return OpARM64LessThan
1036 case OpARM64GreaterEqualU:
1037 return OpARM64LessThanU
1039 return OpARM64NotEqual
1040 case OpARM64NotEqual:
1042 case OpARM64LessThanF:
1043 return OpARM64NotLessThanF
1044 case OpARM64NotLessThanF:
1045 return OpARM64LessThanF
1046 case OpARM64LessEqualF:
1047 return OpARM64NotLessEqualF
1048 case OpARM64NotLessEqualF:
1049 return OpARM64LessEqualF
1050 case OpARM64GreaterThanF:
1051 return OpARM64NotGreaterThanF
1052 case OpARM64NotGreaterThanF:
1053 return OpARM64GreaterThanF
1054 case OpARM64GreaterEqualF:
1055 return OpARM64NotGreaterEqualF
1056 case OpARM64NotGreaterEqualF:
1057 return OpARM64GreaterEqualF
1059 panic("unreachable")
1063 // arm64Invert evaluates (InvertFlags op), which
1064 // is the same as altering the condition codes such
1065 // that the same result would be produced if the arguments
1066 // to the flag-generating instruction were reversed, e.g.
1067 // (InvertFlags (CMP x y)) -> (CMP y x)
1068 func arm64Invert(op Op) Op {
1070 case OpARM64LessThan:
1071 return OpARM64GreaterThan
1072 case OpARM64LessThanU:
1073 return OpARM64GreaterThanU
1074 case OpARM64GreaterThan:
1075 return OpARM64LessThan
1076 case OpARM64GreaterThanU:
1077 return OpARM64LessThanU
1078 case OpARM64LessEqual:
1079 return OpARM64GreaterEqual
1080 case OpARM64LessEqualU:
1081 return OpARM64GreaterEqualU
1082 case OpARM64GreaterEqual:
1083 return OpARM64LessEqual
1084 case OpARM64GreaterEqualU:
1085 return OpARM64LessEqualU
1086 case OpARM64Equal, OpARM64NotEqual:
1088 case OpARM64LessThanF:
1089 return OpARM64GreaterThanF
1090 case OpARM64GreaterThanF:
1091 return OpARM64LessThanF
1092 case OpARM64LessEqualF:
1093 return OpARM64GreaterEqualF
1094 case OpARM64GreaterEqualF:
1095 return OpARM64LessEqualF
1096 case OpARM64NotLessThanF:
1097 return OpARM64NotGreaterThanF
1098 case OpARM64NotGreaterThanF:
1099 return OpARM64NotLessThanF
1100 case OpARM64NotLessEqualF:
1101 return OpARM64NotGreaterEqualF
1102 case OpARM64NotGreaterEqualF:
1103 return OpARM64NotLessEqualF
1105 panic("unreachable")
1109 // evaluate an ARM64 op against a flags value
1110 // that is potentially constant; return 1 for true,
1111 // -1 for false, and 0 for not constant.
1112 func ccARM64Eval(op Op, flags *Value) int {
1114 if fop == OpARM64InvertFlags {
1115 return -ccARM64Eval(op, flags.Args[0])
1117 if fop != OpARM64FlagConstant {
1120 fc := flagConstant(flags.AuxInt)
1121 b2i := func(b bool) int {
1130 case OpARM64NotEqual:
1132 case OpARM64LessThan:
1134 case OpARM64LessThanU:
1135 return b2i(fc.ult())
1136 case OpARM64GreaterThan:
1138 case OpARM64GreaterThanU:
1139 return b2i(fc.ugt())
1140 case OpARM64LessEqual:
1142 case OpARM64LessEqualU:
1143 return b2i(fc.ule())
1144 case OpARM64GreaterEqual:
1146 case OpARM64GreaterEqualU:
1147 return b2i(fc.uge())
1152 // logRule logs the use of the rule s. This will only be enabled if
1153 // rewrite rules were generated with the -log option, see gen/rulegen.go.
1154 func logRule(s string) {
1155 if ruleFile == nil {
1156 // Open a log file to write log to. We open in append
1157 // mode because all.bash runs the compiler lots of times,
1158 // and we want the concatenation of all of those logs.
1159 // This means, of course, that users need to rm the old log
1160 // to get fresh data.
1161 // TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow?
1162 w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"),
1163 os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
1169 _, err := fmt.Fprintln(ruleFile, s)
1175 var ruleFile io.Writer
1177 func min(x, y int64) int64 {
1184 func isConstZero(v *Value) bool {
1188 case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F:
1189 return v.AuxInt == 0
1194 // reciprocalExact64 reports whether 1/c is exactly representable.
1195 func reciprocalExact64(c float64) bool {
1196 b := math.Float64bits(c)
1197 man := b & (1<<52 - 1)
1199 return false // not a power of 2, denormal, or NaN
1201 exp := b >> 52 & (1<<11 - 1)
1202 // exponent bias is 0x3ff. So taking the reciprocal of a number
1203 // changes the exponent to 0x7fe-exp.
1208 return false // ±inf
1210 return false // exponent is not representable
1216 // reciprocalExact32 reports whether 1/c is exactly representable.
1217 func reciprocalExact32(c float32) bool {
1218 b := math.Float32bits(c)
1219 man := b & (1<<23 - 1)
1221 return false // not a power of 2, denormal, or NaN
1223 exp := b >> 23 & (1<<8 - 1)
1224 // exponent bias is 0x7f. So taking the reciprocal of a number
1225 // changes the exponent to 0xfe-exp.
1230 return false // ±inf
1232 return false // exponent is not representable
1238 // check if an immediate can be directly encoded into an ARM's instruction
1239 func isARMImmRot(v uint32) bool {
1240 for i := 0; i < 16; i++ {
1250 // overlap reports whether the ranges given by the given offset and
1251 // size pairs overlap.
1252 func overlap(offset1, size1, offset2, size2 int64) bool {
1253 if offset1 >= offset2 && offset2+size2 > offset1 {
1256 if offset2 >= offset1 && offset1+size1 > offset2 {
1262 func areAdjacentOffsets(off1, off2, size int64) bool {
1263 return off1+size == off2 || off1 == off2+size
1266 // check if value zeroes out upper 32-bit of 64-bit register.
1267 // depth limits recursion depth. In AMD64.rules 3 is used as limit,
1268 // because it catches same amount of cases as 4.
1269 func zeroUpper32Bits(x *Value, depth int) bool {
1271 case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1,
1272 OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1,
1273 OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload,
1274 OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL,
1275 OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst,
1276 OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst,
1277 OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL,
1278 OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst,
1279 OpAMD64SHLL, OpAMD64SHLLconst:
1282 return x.Type.Size() == 4
1283 case OpPhi, OpSelect0, OpSelect1:
1284 // Phis can use each-other as an arguments, instead of tracking visited values,
1285 // just limit recursion depth.
1289 for i := range x.Args {
1290 if !zeroUpper32Bits(x.Args[i], depth-1) {
1300 // zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits
1301 func zeroUpper48Bits(x *Value, depth int) bool {
1303 case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2:
1306 return x.Type.Size() == 2
1307 case OpPhi, OpSelect0, OpSelect1:
1308 // Phis can use each-other as an arguments, instead of tracking visited values,
1309 // just limit recursion depth.
1313 for i := range x.Args {
1314 if !zeroUpper48Bits(x.Args[i], depth-1) {
1324 // zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits
1325 func zeroUpper56Bits(x *Value, depth int) bool {
1327 case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1:
1330 return x.Type.Size() == 1
1331 case OpPhi, OpSelect0, OpSelect1:
1332 // Phis can use each-other as an arguments, instead of tracking visited values,
1333 // just limit recursion depth.
1337 for i := range x.Args {
1338 if !zeroUpper56Bits(x.Args[i], depth-1) {
1348 // isInlinableMemmove reports whether the given arch performs a Move of the given size
1349 // faster than memmove. It will only return true if replacing the memmove with a Move is
1350 // safe, either because Move is small or because the arguments are disjoint.
1351 // This is used as a check for replacing memmove with Move ops.
1352 func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
1353 // It is always safe to convert memmove into Move when its arguments are disjoint.
1354 // Move ops may or may not be faster for large sizes depending on how the platform
1355 // lowers them, so we only perform this optimization on platforms that we know to
1356 // have fast Move ops.
1359 return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz))
1360 case "386", "arm64":
1362 case "s390x", "ppc64", "ppc64le":
1363 return sz <= 8 || disjoint(dst, sz, src, sz)
1364 case "arm", "mips", "mips64", "mipsle", "mips64le":
1370 // logLargeCopy logs the occurrence of a large copy.
1371 // The best place to do this is in the rewrite rules where the size of the move is easy to find.
1372 // "Large" is arbitrarily chosen to be 128 bytes; this may change.
1373 func logLargeCopy(v *Value, s int64) bool {
1377 if logopt.Enabled() {
1378 logopt.LogOpt(v.Pos, "copy", "lower", v.Block.Func.Name, fmt.Sprintf("%d bytes", s))
1383 // hasSmallRotate reports whether the architecture has rotate instructions
1384 // for sizes < 32-bit. This is used to decide whether to promote some rotations.
1385 func hasSmallRotate(c *Config) bool {
1387 case "amd64", "386":
1394 func newPPC64ShiftAuxInt(sh, mb, me, sz int64) int32 {
1395 if sh < 0 || sh >= sz {
1396 panic("PPC64 shift arg sh out of range")
1398 if mb < 0 || mb >= sz {
1399 panic("PPC64 shift arg mb out of range")
1401 if me < 0 || me >= sz {
1402 panic("PPC64 shift arg me out of range")
1404 return int32(sh<<16 | mb<<8 | me)
1407 func GetPPC64Shiftsh(auxint int64) int64 {
1408 return int64(int8(auxint >> 16))
1411 func GetPPC64Shiftmb(auxint int64) int64 {
1412 return int64(int8(auxint >> 8))
1415 func GetPPC64Shiftme(auxint int64) int64 {
1416 return int64(int8(auxint))
1419 // Test if this value can encoded as a mask for a rlwinm like
1420 // operation. Masks can also extend from the msb and wrap to
1421 // the lsb too. That is, the valid masks are 32 bit strings
1422 // of the form: 0..01..10..0 or 1..10..01..1 or 1...1
1423 func isPPC64WordRotateMask(v64 int64) bool {
1424 // Isolate rightmost 1 (if none 0) and add.
1427 // Likewise, for the wrapping case.
1429 vpn := (vn & -vn) + vn
1430 return (v&vp == 0 || vn&vpn == 0) && v != 0
1433 // Compress mask and shift into single value of the form
1434 // me | mb<<8 | rotate<<16 | nbits<<24 where me and mb can
1435 // be used to regenerate the input mask.
1436 func encodePPC64RotateMask(rotate, mask, nbits int64) int64 {
1437 var mb, me, mbn, men int
1439 // Determine boundaries and then decode them
1440 if mask == 0 || ^mask == 0 || rotate >= nbits {
1441 panic("Invalid PPC64 rotate mask")
1442 } else if nbits == 32 {
1443 mb = bits.LeadingZeros32(uint32(mask))
1444 me = 32 - bits.TrailingZeros32(uint32(mask))
1445 mbn = bits.LeadingZeros32(^uint32(mask))
1446 men = 32 - bits.TrailingZeros32(^uint32(mask))
1448 mb = bits.LeadingZeros64(uint64(mask))
1449 me = 64 - bits.TrailingZeros64(uint64(mask))
1450 mbn = bits.LeadingZeros64(^uint64(mask))
1451 men = 64 - bits.TrailingZeros64(^uint64(mask))
1453 // Check for a wrapping mask (e.g bits at 0 and 63)
1454 if mb == 0 && me == int(nbits) {
1455 // swap the inverted values
1459 return int64(me) | int64(mb<<8) | int64(rotate<<16) | int64(nbits<<24)
1462 // The inverse operation of encodePPC64RotateMask. The values returned as
1463 // mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask.
1464 func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) {
1465 auxint := uint64(sauxint)
1466 rotate = int64((auxint >> 16) & 0xFF)
1467 mb = int64((auxint >> 8) & 0xFF)
1468 me = int64((auxint >> 0) & 0xFF)
1469 nbits := int64((auxint >> 24) & 0xFF)
1470 mask = ((1 << uint(nbits-mb)) - 1) ^ ((1 << uint(nbits-me)) - 1)
1475 mask = uint64(uint32(mask))
1478 // Fixup ME to match ISA definition. The second argument to MASK(..,me)
1480 me = (me - 1) & (nbits - 1)
1484 // This verifies that the mask is a set of
1485 // consecutive bits including the least
1487 func isPPC64ValidShiftMask(v int64) bool {
1488 if (v != 0) && ((v+1)&v) == 0 {
1494 func getPPC64ShiftMaskLength(v int64) int64 {
1495 return int64(bits.Len64(uint64(v)))
1498 // Decompose a shift right into an equivalent rotate/mask,
1499 // and return mask & m.
1500 func mergePPC64RShiftMask(m, s, nbits int64) int64 {
1501 smask := uint64((1<<uint(nbits))-1) >> uint(s)
1502 return m & int64(smask)
1505 // Combine (ANDconst [m] (SRWconst [s])) into (RLWINM [y]) or return 0
1506 func mergePPC64AndSrwi(m, s int64) int64 {
1507 mask := mergePPC64RShiftMask(m, s, 32)
1508 if !isPPC64WordRotateMask(mask) {
1511 return encodePPC64RotateMask((32-s)&31, mask, 32)
1514 // Test if a shift right feeding into a CLRLSLDI can be merged into RLWINM.
1515 // Return the encoded RLWINM constant, or 0 if they cannot be merged.
1516 func mergePPC64ClrlsldiSrw(sld, srw int64) int64 {
1517 mask_1 := uint64(0xFFFFFFFF >> uint(srw))
1518 // for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left.
1519 mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
1521 // Rewrite mask to apply after the final left shift.
1522 mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
1525 r_2 := GetPPC64Shiftsh(sld)
1526 r_3 := (r_1 + r_2) & 31 // This can wrap.
1528 if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
1531 return encodePPC64RotateMask(int64(r_3), int64(mask_3), 32)
1534 // Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM. Return
1535 // the encoded RLWINM constant, or 0 if they cannot be merged.
1536 func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 {
1537 r_1, _, _, mask_1 := DecodePPC64RotateMask(rlw)
1538 // for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left.
1539 mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
1541 // combine the masks, and adjust for the final left shift.
1542 mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(int64(sld)))
1543 r_2 := GetPPC64Shiftsh(int64(sld))
1544 r_3 := (r_1 + r_2) & 31 // This can wrap.
1546 // Verify the result is still a valid bitmask of <= 32 bits.
1547 if !isPPC64WordRotateMask(int64(mask_3)) || uint64(uint32(mask_3)) != mask_3 {
1550 return encodePPC64RotateMask(r_3, int64(mask_3), 32)
1553 // Compute the encoded RLWINM constant from combining (SLDconst [sld] (SRWconst [srw] x)),
1554 // or return 0 if they cannot be combined.
1555 func mergePPC64SldiSrw(sld, srw int64) int64 {
1556 if sld > srw || srw >= 32 {
1559 mask_r := uint32(0xFFFFFFFF) >> uint(srw)
1560 mask_l := uint32(0xFFFFFFFF) >> uint(sld)
1561 mask := (mask_r & mask_l) << uint(sld)
1562 return encodePPC64RotateMask((32-srw+sld)&31, int64(mask), 32)
1565 // Convenience function to rotate a 32 bit constant value by another constant.
1566 func rotateLeft32(v, rotate int64) int64 {
1567 return int64(bits.RotateLeft32(uint32(v), int(rotate)))
1570 func rotateRight64(v, rotate int64) int64 {
1571 return int64(bits.RotateLeft64(uint64(v), int(-rotate)))
1574 // encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format.
1575 func armBFAuxInt(lsb, width int64) arm64BitField {
1576 if lsb < 0 || lsb > 63 {
1577 panic("ARM(64) bit field lsb constant out of range")
1579 if width < 1 || lsb+width > 64 {
1580 panic("ARM(64) bit field width constant out of range")
1582 return arm64BitField(width | lsb<<8)
1585 // returns the lsb part of the auxInt field of arm64 bitfield ops.
1586 func (bfc arm64BitField) getARM64BFlsb() int64 {
1587 return int64(uint64(bfc) >> 8)
1590 // returns the width part of the auxInt field of arm64 bitfield ops.
1591 func (bfc arm64BitField) getARM64BFwidth() int64 {
1592 return int64(bfc) & 0xff
1595 // checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask.
1596 func isARM64BFMask(lsb, mask, rshift int64) bool {
1597 shiftedMask := int64(uint64(mask) >> uint64(rshift))
1598 return shiftedMask != 0 && isPowerOfTwo64(shiftedMask+1) && nto(shiftedMask)+lsb < 64
1601 // returns the bitfield width of mask >> rshift for arm64 bitfield ops
1602 func arm64BFWidth(mask, rshift int64) int64 {
1603 shiftedMask := int64(uint64(mask) >> uint64(rshift))
1604 if shiftedMask == 0 {
1605 panic("ARM64 BF mask is zero")
1607 return nto(shiftedMask)
1610 // sizeof returns the size of t in bytes.
1611 // It will panic if t is not a *types.Type.
1612 func sizeof(t interface{}) int64 {
1613 return t.(*types.Type).Size()
1616 // registerizable reports whether t is a primitive type that fits in
1617 // a register. It assumes float64 values will always fit into registers
1618 // even if that isn't strictly true.
1619 func registerizable(b *Block, typ *types.Type) bool {
1620 if typ.IsPtrShaped() || typ.IsFloat() {
1623 if typ.IsInteger() {
1624 return typ.Size() <= b.Func.Config.RegSize
1629 // needRaceCleanup reports whether this call to racefuncenter/exit isn't needed.
1630 func needRaceCleanup(sym *AuxCall, v *Value) bool {
1635 if !isSameCall(sym, "runtime.racefuncenter") && !isSameCall(sym, "runtime.racefuncexit") {
1638 for _, b := range f.Blocks {
1639 for _, v := range b.Values {
1641 case OpStaticCall, OpStaticLECall:
1642 // Check for racefuncenter will encounter racefuncexit and vice versa.
1643 // Allow calls to panic*
1644 s := v.Aux.(*AuxCall).Fn.String()
1646 case "runtime.racefuncenter", "runtime.racefuncexit",
1647 "runtime.panicdivide", "runtime.panicwrap",
1648 "runtime.panicshift":
1651 // If we encountered any call, we need to keep racefunc*,
1652 // for accurate stacktraces.
1654 case OpPanicBounds, OpPanicExtend:
1655 // Note: these are panic generators that are ok (like the static calls above).
1656 case OpClosureCall, OpInterCall, OpClosureLECall, OpInterLECall:
1657 // We must keep the race functions if there are any other call types.
1662 if isSameCall(sym, "runtime.racefuncenter") {
1663 // TODO REGISTER ABI this needs to be cleaned up.
1664 // If we're removing racefuncenter, remove its argument as well.
1665 if v.Args[0].Op != OpStore {
1666 if v.Op == OpStaticLECall {
1667 // there is no store, yet.
1672 mem := v.Args[0].Args[2]
1673 v.Args[0].reset(OpCopy)
1674 v.Args[0].AddArg(mem)
1679 // symIsRO reports whether sym is a read-only global.
1680 func symIsRO(sym interface{}) bool {
1681 lsym := sym.(*obj.LSym)
1682 return lsym.Type == objabi.SRODATA && len(lsym.R) == 0
1685 // symIsROZero reports whether sym is a read-only global whose data contains all zeros.
1686 func symIsROZero(sym Sym) bool {
1687 lsym := sym.(*obj.LSym)
1688 if lsym.Type != objabi.SRODATA || len(lsym.R) != 0 {
1691 for _, b := range lsym.P {
1699 // read8 reads one byte from the read-only global sym at offset off.
1700 func read8(sym interface{}, off int64) uint8 {
1701 lsym := sym.(*obj.LSym)
1702 if off >= int64(len(lsym.P)) || off < 0 {
1703 // Invalid index into the global sym.
1704 // This can happen in dead code, so we don't want to panic.
1705 // Just return any value, it will eventually get ignored.
1712 // read16 reads two bytes from the read-only global sym at offset off.
1713 func read16(sym interface{}, off int64, byteorder binary.ByteOrder) uint16 {
1714 lsym := sym.(*obj.LSym)
1715 // lsym.P is written lazily.
1716 // Bytes requested after the end of lsym.P are 0.
1718 if 0 <= off && off < int64(len(lsym.P)) {
1721 buf := make([]byte, 2)
1723 return byteorder.Uint16(buf)
1726 // read32 reads four bytes from the read-only global sym at offset off.
1727 func read32(sym interface{}, off int64, byteorder binary.ByteOrder) uint32 {
1728 lsym := sym.(*obj.LSym)
1730 if 0 <= off && off < int64(len(lsym.P)) {
1733 buf := make([]byte, 4)
1735 return byteorder.Uint32(buf)
1738 // read64 reads eight bytes from the read-only global sym at offset off.
1739 func read64(sym interface{}, off int64, byteorder binary.ByteOrder) uint64 {
1740 lsym := sym.(*obj.LSym)
1742 if 0 <= off && off < int64(len(lsym.P)) {
1745 buf := make([]byte, 8)
1747 return byteorder.Uint64(buf)
1750 // sequentialAddresses reports true if it can prove that x + n == y
1751 func sequentialAddresses(x, y *Value, n int64) bool {
1752 if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil &&
1753 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1754 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1757 if x.Op == Op386LEAL1 && y.Op == Op386LEAL1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
1758 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1759 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1762 if x.Op == OpAMD64ADDQ && y.Op == OpAMD64LEAQ1 && y.AuxInt == n && y.Aux == nil &&
1763 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1764 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1767 if x.Op == OpAMD64LEAQ1 && y.Op == OpAMD64LEAQ1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
1768 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1769 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1775 // flagConstant represents the result of a compile-time comparison.
1776 // The sense of these flags does not necessarily represent the hardware's notion
1777 // of a flags register - these are just a compile-time construct.
1778 // We happen to match the semantics to those of arm/arm64.
1779 // Note that these semantics differ from x86: the carry flag has the opposite
1780 // sense on a subtraction!
1781 // On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C.
1782 // On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C.
1783 // (because it does x + ^y + C).
1784 // See https://en.wikipedia.org/wiki/Carry_flag#Vs._borrow_flag
1785 type flagConstant uint8
1787 // N reports whether the result of an operation is negative (high bit set).
1788 func (fc flagConstant) N() bool {
1792 // Z reports whether the result of an operation is 0.
1793 func (fc flagConstant) Z() bool {
1797 // C reports whether an unsigned add overflowed (carry), or an
1798 // unsigned subtract did not underflow (borrow).
1799 func (fc flagConstant) C() bool {
1803 // V reports whether a signed operation overflowed or underflowed.
1804 func (fc flagConstant) V() bool {
1808 func (fc flagConstant) eq() bool {
1811 func (fc flagConstant) ne() bool {
1814 func (fc flagConstant) lt() bool {
1815 return fc.N() != fc.V()
1817 func (fc flagConstant) le() bool {
1818 return fc.Z() || fc.lt()
1820 func (fc flagConstant) gt() bool {
1821 return !fc.Z() && fc.ge()
1823 func (fc flagConstant) ge() bool {
1824 return fc.N() == fc.V()
1826 func (fc flagConstant) ult() bool {
1829 func (fc flagConstant) ule() bool {
1830 return fc.Z() || fc.ult()
1832 func (fc flagConstant) ugt() bool {
1833 return !fc.Z() && fc.uge()
1835 func (fc flagConstant) uge() bool {
1839 func (fc flagConstant) ltNoov() bool {
1840 return fc.lt() && !fc.V()
1842 func (fc flagConstant) leNoov() bool {
1843 return fc.le() && !fc.V()
1845 func (fc flagConstant) gtNoov() bool {
1846 return fc.gt() && !fc.V()
1848 func (fc flagConstant) geNoov() bool {
1849 return fc.ge() && !fc.V()
1852 func (fc flagConstant) String() string {
1853 return fmt.Sprintf("N=%v,Z=%v,C=%v,V=%v", fc.N(), fc.Z(), fc.C(), fc.V())
1856 type flagConstantBuilder struct {
1863 func (fcs flagConstantBuilder) encode() flagConstant {
1880 // Note: addFlags(x,y) != subFlags(x,-y) in some situations:
1881 // - the results of the C flag are different
1882 // - the results of the V flag when y==minint are different
1884 // addFlags64 returns the flags that would be set from computing x+y.
1885 func addFlags64(x, y int64) flagConstant {
1886 var fcb flagConstantBuilder
1889 fcb.C = uint64(x+y) < uint64(x)
1890 fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
1894 // subFlags64 returns the flags that would be set from computing x-y.
1895 func subFlags64(x, y int64) flagConstant {
1896 var fcb flagConstantBuilder
1899 fcb.C = uint64(y) <= uint64(x) // This code follows the arm carry flag model.
1900 fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
1904 // addFlags32 returns the flags that would be set from computing x+y.
1905 func addFlags32(x, y int32) flagConstant {
1906 var fcb flagConstantBuilder
1909 fcb.C = uint32(x+y) < uint32(x)
1910 fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
1914 // subFlags32 returns the flags that would be set from computing x-y.
1915 func subFlags32(x, y int32) flagConstant {
1916 var fcb flagConstantBuilder
1919 fcb.C = uint32(y) <= uint32(x) // This code follows the arm carry flag model.
1920 fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
1924 // logicFlags64 returns flags set to the sign/zeroness of x.
1925 // C and V are set to false.
1926 func logicFlags64(x int64) flagConstant {
1927 var fcb flagConstantBuilder
1933 // logicFlags32 returns flags set to the sign/zeroness of x.
1934 // C and V are set to false.
1935 func logicFlags32(x int32) flagConstant {
1936 var fcb flagConstantBuilder