1 // Copyright 2015 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
8 "cmd/compile/internal/logopt"
9 "cmd/compile/internal/types"
11 "cmd/internal/obj/s390x"
23 type deadValueChoice bool
26 leaveDeadValues deadValueChoice = false
27 removeDeadValues = true
30 // deadcode indicates that rewrite should try to remove any values that become dead.
31 func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValueChoice) {
32 // repeat rewrites until we find no more rewrites
33 pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block
37 fmt.Printf("%s: rewriting for %s\n", f.pass.name, f.Name)
41 for _, b := range f.Blocks {
46 b0.Succs = append([]Edge{}, b.Succs...) // make a new copy, not aliasing
48 for i, c := range b.ControlValues() {
51 b.ReplaceControl(i, c)
57 fmt.Printf("rewriting %s -> %s\n", b0.LongString(), b.LongString())
60 for j, v := range b.Values {
65 v0.Args = append([]*Value{}, v.Args...) // make a new copy, not aliasing
67 if v.Uses == 0 && v.removeable() {
68 if v.Op != OpInvalid && deadcode == removeDeadValues {
69 // Reset any values that are now unused, so that we decrement
70 // the use count of all of its arguments.
71 // Not quite a deadcode pass, because it does not handle cycles.
72 // But it should help Uses==1 rules to fire.
76 // No point rewriting values which aren't used.
80 vchange := phielimValue(v)
81 if vchange && debug > 1 {
82 fmt.Printf("rewriting %s -> %s\n", v0.LongString(), v.LongString())
85 // Eliminate copy inputs.
86 // If any copy input becomes unused, mark it
87 // as invalid and discard its argument. Repeat
88 // recursively on the discarded argument.
89 // This phase helps remove phantom "dead copy" uses
90 // of a value so that a x.Uses==1 rule condition
92 for i, a := range v.Args {
98 // If a, a copy, has a line boundary indicator, attempt to find a new value
99 // to hold it. The first candidate is the value that will replace a (aa),
100 // if it shares the same block and line and is eligible.
101 // The second option is v, which has a as an input. Because aa is earlier in
102 // the data flow, it is the better choice.
103 if a.Pos.IsStmt() == src.PosIsStmt {
104 if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt {
105 aa.Pos = aa.Pos.WithIsStmt()
106 } else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt {
107 v.Pos = v.Pos.WithIsStmt()
109 // Record the lost line and look for a new home after all rewrites are complete.
110 // TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same
111 // line to appear in more than one block, but only one block is stored, so if both end
112 // up here, then one will be lost.
113 pendingLines.set(a.Pos, int32(a.Block.ID))
115 a.Pos = a.Pos.WithNotStmt()
124 if vchange && debug > 1 {
125 fmt.Printf("rewriting %s -> %s\n", v0.LongString(), v.LongString())
128 // apply rewrite function
131 // If value changed to a poor choice for a statement boundary, move the boundary
132 if v.Pos.IsStmt() == src.PosIsStmt {
133 if k := nextGoodStatementIndex(v, j, b); k != j {
134 v.Pos = v.Pos.WithNotStmt()
135 b.Values[k].Pos = b.Values[k].Pos.WithIsStmt()
140 change = change || vchange
141 if vchange && debug > 1 {
142 fmt.Printf("rewriting %s -> %s\n", v0.LongString(), v.LongString())
150 // remove clobbered values
151 for _, b := range f.Blocks {
153 for i, v := range b.Values {
155 if v.Op == OpInvalid {
156 if v.Pos.IsStmt() == src.PosIsStmt {
157 pendingLines.set(vl, int32(b.ID))
162 if v.Pos.IsStmt() != src.PosNotStmt && pendingLines.get(vl) == int32(b.ID) {
163 pendingLines.remove(vl)
164 v.Pos = v.Pos.WithIsStmt()
171 if pendingLines.get(b.Pos) == int32(b.ID) {
172 b.Pos = b.Pos.WithIsStmt()
173 pendingLines.remove(b.Pos)
179 // Common functions called from rewriting rules
181 func is64BitFloat(t *types.Type) bool {
182 return t.Size() == 8 && t.IsFloat()
185 func is32BitFloat(t *types.Type) bool {
186 return t.Size() == 4 && t.IsFloat()
189 func is64BitInt(t *types.Type) bool {
190 return t.Size() == 8 && t.IsInteger()
193 func is32BitInt(t *types.Type) bool {
194 return t.Size() == 4 && t.IsInteger()
197 func is16BitInt(t *types.Type) bool {
198 return t.Size() == 2 && t.IsInteger()
201 func is8BitInt(t *types.Type) bool {
202 return t.Size() == 1 && t.IsInteger()
205 func isPtr(t *types.Type) bool {
206 return t.IsPtrShaped()
209 func isSigned(t *types.Type) bool {
213 // mergeSym merges two symbolic offsets. There is no real merging of
214 // offsets, we just pick the non-nil one.
215 func mergeSym(x, y Sym) Sym {
222 panic(fmt.Sprintf("mergeSym with two non-nil syms %v %v", x, y))
225 func canMergeSym(x, y Sym) bool {
226 return x == nil || y == nil
229 // canMergeLoadClobber reports whether the load can be merged into target without
230 // invalidating the schedule.
231 // It also checks that the other non-load argument x is something we
232 // are ok with clobbering.
233 func canMergeLoadClobber(target, load, x *Value) bool {
234 // The register containing x is going to get clobbered.
235 // Don't merge if we still need the value of x.
236 // We don't have liveness information here, but we can
237 // approximate x dying with:
238 // 1) target is x's only use.
239 // 2) target is not in a deeper loop than x.
243 loopnest := x.Block.Func.loopnest()
244 loopnest.calculateDepths()
245 if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
248 return canMergeLoad(target, load)
251 // canMergeLoad reports whether the load can be merged into target without
252 // invalidating the schedule.
253 func canMergeLoad(target, load *Value) bool {
254 if target.Block.ID != load.Block.ID {
255 // If the load is in a different block do not merge it.
259 // We can't merge the load into the target if the load
260 // has more than one use.
265 mem := load.MemoryArg()
267 // We need the load's memory arg to still be alive at target. That
268 // can't be the case if one of target's args depends on a memory
269 // state that is a successor of load's memory arg.
271 // For example, it would be invalid to merge load into target in
272 // the following situation because newmem has killed oldmem
273 // before target is reached:
274 // load = read ... oldmem
275 // newmem = write ... oldmem
276 // arg0 = read ... newmem
277 // target = add arg0 load
279 // If the argument comes from a different block then we can exclude
280 // it immediately because it must dominate load (which is in the
281 // same block as target).
283 for _, a := range target.Args {
284 if a != load && a.Block.ID == target.Block.ID {
285 args = append(args, a)
289 // memPreds contains memory states known to be predecessors of load's
290 // memory state. It is lazily initialized.
291 var memPreds map[*Value]bool
292 for i := 0; len(args) > 0; i++ {
295 // Give up if we have done a lot of iterations.
298 v := args[len(args)-1]
299 args = args[:len(args)-1]
300 if target.Block.ID != v.Block.ID {
301 // Since target and load are in the same block
302 // we can stop searching when we leave the block.
306 // A Phi implies we have reached the top of the block.
307 // The memory phi, if it exists, is always
308 // the first logical store in the block.
311 if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
312 // We could handle this situation however it is likely
316 if v.Op.SymEffect()&SymAddr != 0 {
317 // This case prevents an operation that calculates the
318 // address of a local variable from being forced to schedule
319 // before its corresponding VarDef.
325 // We don't want to combine the CMPQ with the load, because
326 // that would force the CMPQ to schedule before the VARDEF, which
327 // in turn requires the LEAQ to schedule before the VARDEF.
330 if v.Type.IsMemory() {
332 // Initialise a map containing memory states
333 // known to be predecessors of load's memory
335 memPreds = make(map[*Value]bool)
338 for i := 0; i < limit; i++ {
340 // The memory phi, if it exists, is always
341 // the first logical store in the block.
344 if m.Block.ID != target.Block.ID {
347 if !m.Type.IsMemory() {
351 if len(m.Args) == 0 {
358 // We can merge if v is a predecessor of mem.
360 // For example, we can merge load into target in the
361 // following scenario:
364 // load = read ... mem
365 // target = add x load
371 if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
372 // If v takes mem as an input then we know mem
373 // is valid at this point.
376 for _, a := range v.Args {
377 if target.Block.ID == a.Block.ID {
378 args = append(args, a)
386 // isSameCall reports whether sym is the same as the given named symbol
387 func isSameCall(sym interface{}, name string) bool {
388 fn := sym.(*AuxCall).Fn
389 return fn != nil && fn.String() == name
392 // nlz returns the number of leading zeros.
393 func nlz64(x int64) int { return bits.LeadingZeros64(uint64(x)) }
394 func nlz32(x int32) int { return bits.LeadingZeros32(uint32(x)) }
395 func nlz16(x int16) int { return bits.LeadingZeros16(uint16(x)) }
396 func nlz8(x int8) int { return bits.LeadingZeros8(uint8(x)) }
398 // ntzX returns the number of trailing zeros.
399 func ntz64(x int64) int { return bits.TrailingZeros64(uint64(x)) }
400 func ntz32(x int32) int { return bits.TrailingZeros32(uint32(x)) }
401 func ntz16(x int16) int { return bits.TrailingZeros16(uint16(x)) }
402 func ntz8(x int8) int { return bits.TrailingZeros8(uint8(x)) }
404 func oneBit(x int64) bool { return x&(x-1) == 0 && x != 0 }
405 func oneBit8(x int8) bool { return x&(x-1) == 0 && x != 0 }
406 func oneBit16(x int16) bool { return x&(x-1) == 0 && x != 0 }
407 func oneBit32(x int32) bool { return x&(x-1) == 0 && x != 0 }
408 func oneBit64(x int64) bool { return x&(x-1) == 0 && x != 0 }
410 // nto returns the number of trailing ones.
411 func nto(x int64) int64 {
412 return int64(ntz64(^x))
415 // logX returns logarithm of n base 2.
416 // n must be a positive power of 2 (isPowerOfTwoX returns true).
417 func log8(n int8) int64 {
418 return int64(bits.Len8(uint8(n))) - 1
420 func log16(n int16) int64 {
421 return int64(bits.Len16(uint16(n))) - 1
423 func log32(n int32) int64 {
424 return int64(bits.Len32(uint32(n))) - 1
426 func log64(n int64) int64 {
427 return int64(bits.Len64(uint64(n))) - 1
430 // log2uint32 returns logarithm in base 2 of uint32(n), with log2(0) = -1.
432 func log2uint32(n int64) int64 {
433 return int64(bits.Len32(uint32(n))) - 1
436 // isPowerOfTwo functions report whether n is a power of 2.
437 func isPowerOfTwo8(n int8) bool {
438 return n > 0 && n&(n-1) == 0
440 func isPowerOfTwo16(n int16) bool {
441 return n > 0 && n&(n-1) == 0
443 func isPowerOfTwo32(n int32) bool {
444 return n > 0 && n&(n-1) == 0
446 func isPowerOfTwo64(n int64) bool {
447 return n > 0 && n&(n-1) == 0
450 // isUint64PowerOfTwo reports whether uint64(n) is a power of 2.
451 func isUint64PowerOfTwo(in int64) bool {
453 return n > 0 && n&(n-1) == 0
456 // isUint32PowerOfTwo reports whether uint32(n) is a power of 2.
457 func isUint32PowerOfTwo(in int64) bool {
458 n := uint64(uint32(in))
459 return n > 0 && n&(n-1) == 0
462 // is32Bit reports whether n can be represented as a signed 32 bit integer.
463 func is32Bit(n int64) bool {
464 return n == int64(int32(n))
467 // is16Bit reports whether n can be represented as a signed 16 bit integer.
468 func is16Bit(n int64) bool {
469 return n == int64(int16(n))
472 // is8Bit reports whether n can be represented as a signed 8 bit integer.
473 func is8Bit(n int64) bool {
474 return n == int64(int8(n))
477 // isU8Bit reports whether n can be represented as an unsigned 8 bit integer.
478 func isU8Bit(n int64) bool {
479 return n == int64(uint8(n))
482 // isU12Bit reports whether n can be represented as an unsigned 12 bit integer.
483 func isU12Bit(n int64) bool {
484 return 0 <= n && n < (1<<12)
487 // isU16Bit reports whether n can be represented as an unsigned 16 bit integer.
488 func isU16Bit(n int64) bool {
489 return n == int64(uint16(n))
492 // isU32Bit reports whether n can be represented as an unsigned 32 bit integer.
493 func isU32Bit(n int64) bool {
494 return n == int64(uint32(n))
497 // is20Bit reports whether n can be represented as a signed 20 bit integer.
498 func is20Bit(n int64) bool {
499 return -(1<<19) <= n && n < (1<<19)
502 // b2i translates a boolean value to 0 or 1 for assigning to auxInt.
503 func b2i(b bool) int64 {
510 // b2i32 translates a boolean value to 0 or 1.
511 func b2i32(b bool) int32 {
518 // shiftIsBounded reports whether (left/right) shift Value v is known to be bounded.
519 // A shift is bounded if it is shifting by less than the width of the shifted value.
520 func shiftIsBounded(v *Value) bool {
524 // canonLessThan returns whether x is "ordered" less than y, for purposes of normalizing
525 // generated code as much as possible.
526 func canonLessThan(x, y *Value) bool {
530 if !x.Pos.SameFileAndLine(y.Pos) {
531 return x.Pos.Before(y.Pos)
536 // truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern
537 // of the mantissa. It will panic if the truncation results in lost information.
538 func truncate64Fto32F(f float64) float32 {
539 if !isExactFloat32(f) {
540 panic("truncate64Fto32F: truncation is not exact")
545 // NaN bit patterns aren't necessarily preserved across conversion
546 // instructions so we need to do the conversion manually.
547 b := math.Float64bits(f)
548 m := b & ((1 << 52) - 1) // mantissa (a.k.a. significand)
549 // | sign | exponent | mantissa |
550 r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23)))
551 return math.Float32frombits(r)
554 // extend32Fto64F converts a float32 value to a float64 value preserving the bit
555 // pattern of the mantissa.
556 func extend32Fto64F(f float32) float64 {
557 if !math.IsNaN(float64(f)) {
560 // NaN bit patterns aren't necessarily preserved across conversion
561 // instructions so we need to do the conversion manually.
562 b := uint64(math.Float32bits(f))
563 // | sign | exponent | mantissa |
564 r := ((b << 32) & (1 << 63)) | (0x7ff << 52) | ((b & 0x7fffff) << (52 - 23))
565 return math.Float64frombits(r)
568 // DivisionNeedsFixUp reports whether the division needs fix-up code.
569 func DivisionNeedsFixUp(v *Value) bool {
573 // auxFrom64F encodes a float64 value so it can be stored in an AuxInt.
574 func auxFrom64F(f float64) int64 {
576 panic("can't encode a NaN in AuxInt field")
578 return int64(math.Float64bits(f))
581 // auxFrom32F encodes a float32 value so it can be stored in an AuxInt.
582 func auxFrom32F(f float32) int64 {
584 panic("can't encode a NaN in AuxInt field")
586 return int64(math.Float64bits(extend32Fto64F(f)))
589 // auxTo32F decodes a float32 from the AuxInt value provided.
590 func auxTo32F(i int64) float32 {
591 return truncate64Fto32F(math.Float64frombits(uint64(i)))
594 // auxTo64F decodes a float64 from the AuxInt value provided.
595 func auxTo64F(i int64) float64 {
596 return math.Float64frombits(uint64(i))
599 func auxIntToBool(i int64) bool {
605 func auxIntToInt8(i int64) int8 {
608 func auxIntToInt16(i int64) int16 {
611 func auxIntToInt32(i int64) int32 {
614 func auxIntToInt64(i int64) int64 {
617 func auxIntToUint8(i int64) uint8 {
620 func auxIntToFloat32(i int64) float32 {
621 return float32(math.Float64frombits(uint64(i)))
623 func auxIntToFloat64(i int64) float64 {
624 return math.Float64frombits(uint64(i))
626 func auxIntToValAndOff(i int64) ValAndOff {
629 func auxIntToArm64BitField(i int64) arm64BitField {
630 return arm64BitField(i)
632 func auxIntToInt128(x int64) int128 {
634 panic("nonzero int128 not allowed")
638 func auxIntToFlagConstant(x int64) flagConstant {
639 return flagConstant(x)
642 func auxIntToOp(cc int64) Op {
646 func boolToAuxInt(b bool) int64 {
652 func int8ToAuxInt(i int8) int64 {
655 func int16ToAuxInt(i int16) int64 {
658 func int32ToAuxInt(i int32) int64 {
661 func int64ToAuxInt(i int64) int64 {
664 func uint8ToAuxInt(i uint8) int64 {
665 return int64(int8(i))
667 func float32ToAuxInt(f float32) int64 {
668 return int64(math.Float64bits(float64(f)))
670 func float64ToAuxInt(f float64) int64 {
671 return int64(math.Float64bits(f))
673 func valAndOffToAuxInt(v ValAndOff) int64 {
676 func arm64BitFieldToAuxInt(v arm64BitField) int64 {
679 func int128ToAuxInt(x int128) int64 {
681 panic("nonzero int128 not allowed")
685 func flagConstantToAuxInt(x flagConstant) int64 {
689 func opToAuxInt(o Op) int64 {
693 // Aux is an interface to hold miscellaneous data in Blocks and Values.
698 // stringAux wraps string values for use in Aux.
699 type stringAux string
701 func (stringAux) CanBeAnSSAAux() {}
703 func auxToString(i Aux) string {
704 return string(i.(stringAux))
706 func auxToSym(i Aux) Sym {
707 // TODO: kind of a hack - allows nil interface through
711 func auxToType(i Aux) *types.Type {
712 return i.(*types.Type)
714 func auxToCall(i Aux) *AuxCall {
717 func auxToS390xCCMask(i Aux) s390x.CCMask {
718 return i.(s390x.CCMask)
720 func auxToS390xRotateParams(i Aux) s390x.RotateParams {
721 return i.(s390x.RotateParams)
724 func StringToAux(s string) Aux {
727 func symToAux(s Sym) Aux {
730 func callToAux(s *AuxCall) Aux {
733 func typeToAux(t *types.Type) Aux {
736 func s390xCCMaskToAux(c s390x.CCMask) Aux {
739 func s390xRotateParamsToAux(r s390x.RotateParams) Aux {
743 // uaddOvf reports whether unsigned a+b would overflow.
744 func uaddOvf(a, b int64) bool {
745 return uint64(a)+uint64(b) < uint64(a)
748 // de-virtualize an InterCall
749 // 'sym' is the symbol for the itab
750 func devirt(v *Value, aux Aux, sym Sym, offset int64) *AuxCall {
752 n, ok := sym.(*obj.LSym)
756 lsym := f.fe.DerefItab(n, offset)
757 if f.pass.debug > 0 {
759 f.Warnl(v.Pos, "de-virtualizing call")
761 f.Warnl(v.Pos, "couldn't de-virtualize call")
768 return StaticAuxCall(lsym, va.args, va.results, va.abiInfo)
771 // de-virtualize an InterLECall
772 // 'sym' is the symbol for the itab
773 func devirtLESym(v *Value, aux Aux, sym Sym, offset int64) *obj.LSym {
774 n, ok := sym.(*obj.LSym)
780 lsym := f.fe.DerefItab(n, offset)
781 if f.pass.debug > 0 {
783 f.Warnl(v.Pos, "de-virtualizing call")
785 f.Warnl(v.Pos, "couldn't de-virtualize call")
794 func devirtLECall(v *Value, sym *obj.LSym) *Value {
795 v.Op = OpStaticLECall
796 auxcall := v.Aux.(*AuxCall)
798 // TODO(register args) this should not be necessary when fully transition to the new register ABI.
799 auxcall.abiInfo = v.Block.Func.ABIDefault.ABIAnalyzeTypes(nil, ACParamsToTypes(auxcall.args), ACParamsToTypes(auxcall.results))
804 // isSamePtr reports whether p1 and p2 point to the same address.
805 func isSamePtr(p1, p2 *Value) bool {
814 return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
815 case OpAddr, OpLocalAddr:
816 // OpAddr's 0th arg is either OpSP or OpSB, which means that it is uniquely identified by its Op.
817 // Checking for value equality only works after [z]cse has run.
818 return p1.Aux == p2.Aux && p1.Args[0].Op == p2.Args[0].Op
820 return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
825 func isStackPtr(v *Value) bool {
826 for v.Op == OpOffPtr || v.Op == OpAddPtr {
829 return v.Op == OpSP || v.Op == OpLocalAddr
832 // disjoint reports whether the memory region specified by [p1:p1+n1)
833 // does not overlap with [p2:p2+n2).
834 // A return value of false does not imply the regions overlap.
835 func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool {
836 if n1 == 0 || n2 == 0 {
842 baseAndOffset := func(ptr *Value) (base *Value, offset int64) {
843 base, offset = ptr, 0
844 for base.Op == OpOffPtr {
845 offset += base.AuxInt
850 p1, off1 := baseAndOffset(p1)
851 p2, off2 := baseAndOffset(p2)
852 if isSamePtr(p1, p2) {
853 return !overlap(off1, n1, off2, n2)
855 // p1 and p2 are not the same, so if they are both OpAddrs then
856 // they point to different variables.
857 // If one pointer is on the stack and the other is an argument
858 // then they can't overlap.
860 case OpAddr, OpLocalAddr:
861 if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP {
864 return p2.Op == OpArg && p1.Args[0].Op == OpSP
866 if p2.Op == OpSP || p2.Op == OpLocalAddr {
870 return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpSP
875 // moveSize returns the number of bytes an aligned MOV instruction moves
876 func moveSize(align int64, c *Config) int64 {
878 case align%8 == 0 && c.PtrSize == 8:
888 // mergePoint finds a block among a's blocks which dominates b and is itself
889 // dominated by all of a's blocks. Returns nil if it can't find one.
890 // Might return nil even if one does exist.
891 func mergePoint(b *Block, a ...*Value) *Block {
892 // Walk backward from b looking for one of the a's blocks.
898 for _, x := range a {
903 if len(b.Preds) > 1 {
904 // Don't know which way to go back. Abort.
910 return nil // too far away
912 // At this point, r is the first value in a that we find by walking backwards.
913 // if we return anything, r will be it.
916 // Keep going, counting the other a's that we find. They must all dominate r.
919 for _, x := range a {
925 // Found all of a in a backwards walk. We can return r.
928 if len(b.Preds) > 1 {
935 return nil // too far away
938 // clobber invalidates values. Returns true.
939 // clobber is used by rewrite rules to:
940 // A) make sure the values are really dead and never used again.
941 // B) decrement use counts of the values' args.
942 func clobber(vv ...*Value) bool {
943 for _, v := range vv {
945 // Note: leave v.Block intact. The Block field is used after clobber.
950 // clobberIfDead resets v when use count is 1. Returns true.
951 // clobberIfDead is used by rewrite rules to decrement
952 // use counts of v's args when v is dead and never used.
953 func clobberIfDead(v *Value) bool {
957 // Note: leave v.Block intact. The Block field is used after clobberIfDead.
961 // noteRule is an easy way to track if a rule is matched when writing
962 // new ones. Make the rule of interest also conditional on
963 // noteRule("note to self: rule of interest matched")
964 // and that message will print when the rule matches.
965 func noteRule(s string) bool {
970 // countRule increments Func.ruleMatches[key].
971 // If Func.ruleMatches is non-nil at the end
972 // of compilation, it will be printed to stdout.
973 // This is intended to make it easier to find which functions
974 // which contain lots of rules matches when developing new rules.
975 func countRule(v *Value, key string) bool {
977 if f.ruleMatches == nil {
978 f.ruleMatches = make(map[string]int)
984 // warnRule generates compiler debug output with string s when
985 // v is not in autogenerated code, cond is true and the rule has fired.
986 func warnRule(cond bool, v *Value, s string) bool {
987 if pos := v.Pos; pos.Line() > 1 && cond {
988 v.Block.Func.Warnl(pos, s)
993 // for a pseudo-op like (LessThan x), extract x
994 func flagArg(v *Value) *Value {
995 if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() {
1001 // arm64Negate finds the complement to an ARM64 condition code,
1002 // for example !Equal -> NotEqual or !LessThan -> GreaterEqual
1004 // For floating point, it's more subtle because NaN is unordered. We do
1005 // !LessThanF -> NotLessThanF, the latter takes care of NaNs.
1006 func arm64Negate(op Op) Op {
1008 case OpARM64LessThan:
1009 return OpARM64GreaterEqual
1010 case OpARM64LessThanU:
1011 return OpARM64GreaterEqualU
1012 case OpARM64GreaterThan:
1013 return OpARM64LessEqual
1014 case OpARM64GreaterThanU:
1015 return OpARM64LessEqualU
1016 case OpARM64LessEqual:
1017 return OpARM64GreaterThan
1018 case OpARM64LessEqualU:
1019 return OpARM64GreaterThanU
1020 case OpARM64GreaterEqual:
1021 return OpARM64LessThan
1022 case OpARM64GreaterEqualU:
1023 return OpARM64LessThanU
1025 return OpARM64NotEqual
1026 case OpARM64NotEqual:
1028 case OpARM64LessThanF:
1029 return OpARM64NotLessThanF
1030 case OpARM64NotLessThanF:
1031 return OpARM64LessThanF
1032 case OpARM64LessEqualF:
1033 return OpARM64NotLessEqualF
1034 case OpARM64NotLessEqualF:
1035 return OpARM64LessEqualF
1036 case OpARM64GreaterThanF:
1037 return OpARM64NotGreaterThanF
1038 case OpARM64NotGreaterThanF:
1039 return OpARM64GreaterThanF
1040 case OpARM64GreaterEqualF:
1041 return OpARM64NotGreaterEqualF
1042 case OpARM64NotGreaterEqualF:
1043 return OpARM64GreaterEqualF
1045 panic("unreachable")
1049 // arm64Invert evaluates (InvertFlags op), which
1050 // is the same as altering the condition codes such
1051 // that the same result would be produced if the arguments
1052 // to the flag-generating instruction were reversed, e.g.
1053 // (InvertFlags (CMP x y)) -> (CMP y x)
1054 func arm64Invert(op Op) Op {
1056 case OpARM64LessThan:
1057 return OpARM64GreaterThan
1058 case OpARM64LessThanU:
1059 return OpARM64GreaterThanU
1060 case OpARM64GreaterThan:
1061 return OpARM64LessThan
1062 case OpARM64GreaterThanU:
1063 return OpARM64LessThanU
1064 case OpARM64LessEqual:
1065 return OpARM64GreaterEqual
1066 case OpARM64LessEqualU:
1067 return OpARM64GreaterEqualU
1068 case OpARM64GreaterEqual:
1069 return OpARM64LessEqual
1070 case OpARM64GreaterEqualU:
1071 return OpARM64LessEqualU
1072 case OpARM64Equal, OpARM64NotEqual:
1074 case OpARM64LessThanF:
1075 return OpARM64GreaterThanF
1076 case OpARM64GreaterThanF:
1077 return OpARM64LessThanF
1078 case OpARM64LessEqualF:
1079 return OpARM64GreaterEqualF
1080 case OpARM64GreaterEqualF:
1081 return OpARM64LessEqualF
1082 case OpARM64NotLessThanF:
1083 return OpARM64NotGreaterThanF
1084 case OpARM64NotGreaterThanF:
1085 return OpARM64NotLessThanF
1086 case OpARM64NotLessEqualF:
1087 return OpARM64NotGreaterEqualF
1088 case OpARM64NotGreaterEqualF:
1089 return OpARM64NotLessEqualF
1091 panic("unreachable")
1095 // evaluate an ARM64 op against a flags value
1096 // that is potentially constant; return 1 for true,
1097 // -1 for false, and 0 for not constant.
1098 func ccARM64Eval(op Op, flags *Value) int {
1100 if fop == OpARM64InvertFlags {
1101 return -ccARM64Eval(op, flags.Args[0])
1103 if fop != OpARM64FlagConstant {
1106 fc := flagConstant(flags.AuxInt)
1107 b2i := func(b bool) int {
1116 case OpARM64NotEqual:
1118 case OpARM64LessThan:
1120 case OpARM64LessThanU:
1121 return b2i(fc.ult())
1122 case OpARM64GreaterThan:
1124 case OpARM64GreaterThanU:
1125 return b2i(fc.ugt())
1126 case OpARM64LessEqual:
1128 case OpARM64LessEqualU:
1129 return b2i(fc.ule())
1130 case OpARM64GreaterEqual:
1132 case OpARM64GreaterEqualU:
1133 return b2i(fc.uge())
1138 // logRule logs the use of the rule s. This will only be enabled if
1139 // rewrite rules were generated with the -log option, see gen/rulegen.go.
1140 func logRule(s string) {
1141 if ruleFile == nil {
1142 // Open a log file to write log to. We open in append
1143 // mode because all.bash runs the compiler lots of times,
1144 // and we want the concatenation of all of those logs.
1145 // This means, of course, that users need to rm the old log
1146 // to get fresh data.
1147 // TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow?
1148 w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"),
1149 os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
1155 _, err := fmt.Fprintln(ruleFile, s)
1161 var ruleFile io.Writer
1163 func min(x, y int64) int64 {
1170 func isConstZero(v *Value) bool {
1174 case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F:
1175 return v.AuxInt == 0
1180 // reciprocalExact64 reports whether 1/c is exactly representable.
1181 func reciprocalExact64(c float64) bool {
1182 b := math.Float64bits(c)
1183 man := b & (1<<52 - 1)
1185 return false // not a power of 2, denormal, or NaN
1187 exp := b >> 52 & (1<<11 - 1)
1188 // exponent bias is 0x3ff. So taking the reciprocal of a number
1189 // changes the exponent to 0x7fe-exp.
1194 return false // ±inf
1196 return false // exponent is not representable
1202 // reciprocalExact32 reports whether 1/c is exactly representable.
1203 func reciprocalExact32(c float32) bool {
1204 b := math.Float32bits(c)
1205 man := b & (1<<23 - 1)
1207 return false // not a power of 2, denormal, or NaN
1209 exp := b >> 23 & (1<<8 - 1)
1210 // exponent bias is 0x7f. So taking the reciprocal of a number
1211 // changes the exponent to 0xfe-exp.
1216 return false // ±inf
1218 return false // exponent is not representable
1224 // check if an immediate can be directly encoded into an ARM's instruction
1225 func isARMImmRot(v uint32) bool {
1226 for i := 0; i < 16; i++ {
1236 // overlap reports whether the ranges given by the given offset and
1237 // size pairs overlap.
1238 func overlap(offset1, size1, offset2, size2 int64) bool {
1239 if offset1 >= offset2 && offset2+size2 > offset1 {
1242 if offset2 >= offset1 && offset1+size1 > offset2 {
1248 func areAdjacentOffsets(off1, off2, size int64) bool {
1249 return off1+size == off2 || off1 == off2+size
1252 // check if value zeroes out upper 32-bit of 64-bit register.
1253 // depth limits recursion depth. In AMD64.rules 3 is used as limit,
1254 // because it catches same amount of cases as 4.
1255 func zeroUpper32Bits(x *Value, depth int) bool {
1257 case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1,
1258 OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1,
1259 OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload,
1260 OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL,
1261 OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst,
1262 OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst,
1263 OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL,
1264 OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst,
1265 OpAMD64SHLL, OpAMD64SHLLconst:
1268 return x.Type.Width == 4
1269 case OpPhi, OpSelect0, OpSelect1:
1270 // Phis can use each-other as an arguments, instead of tracking visited values,
1271 // just limit recursion depth.
1275 for i := range x.Args {
1276 if !zeroUpper32Bits(x.Args[i], depth-1) {
1286 // zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits
1287 func zeroUpper48Bits(x *Value, depth int) bool {
1289 case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2:
1292 return x.Type.Width == 2
1293 case OpPhi, OpSelect0, OpSelect1:
1294 // Phis can use each-other as an arguments, instead of tracking visited values,
1295 // just limit recursion depth.
1299 for i := range x.Args {
1300 if !zeroUpper48Bits(x.Args[i], depth-1) {
1310 // zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits
1311 func zeroUpper56Bits(x *Value, depth int) bool {
1313 case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1:
1316 return x.Type.Width == 1
1317 case OpPhi, OpSelect0, OpSelect1:
1318 // Phis can use each-other as an arguments, instead of tracking visited values,
1319 // just limit recursion depth.
1323 for i := range x.Args {
1324 if !zeroUpper56Bits(x.Args[i], depth-1) {
1334 // isInlinableMemmove reports whether the given arch performs a Move of the given size
1335 // faster than memmove. It will only return true if replacing the memmove with a Move is
1336 // safe, either because Move is small or because the arguments are disjoint.
1337 // This is used as a check for replacing memmove with Move ops.
1338 func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
1339 // It is always safe to convert memmove into Move when its arguments are disjoint.
1340 // Move ops may or may not be faster for large sizes depending on how the platform
1341 // lowers them, so we only perform this optimization on platforms that we know to
1342 // have fast Move ops.
1345 return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz))
1346 case "386", "arm64":
1348 case "s390x", "ppc64", "ppc64le":
1349 return sz <= 8 || disjoint(dst, sz, src, sz)
1350 case "arm", "mips", "mips64", "mipsle", "mips64le":
1356 // logLargeCopy logs the occurrence of a large copy.
1357 // The best place to do this is in the rewrite rules where the size of the move is easy to find.
1358 // "Large" is arbitrarily chosen to be 128 bytes; this may change.
1359 func logLargeCopy(v *Value, s int64) bool {
1363 if logopt.Enabled() {
1364 logopt.LogOpt(v.Pos, "copy", "lower", v.Block.Func.Name, fmt.Sprintf("%d bytes", s))
1369 // hasSmallRotate reports whether the architecture has rotate instructions
1370 // for sizes < 32-bit. This is used to decide whether to promote some rotations.
1371 func hasSmallRotate(c *Config) bool {
1373 case "amd64", "386":
1380 func newPPC64ShiftAuxInt(sh, mb, me, sz int64) int32 {
1381 if sh < 0 || sh >= sz {
1382 panic("PPC64 shift arg sh out of range")
1384 if mb < 0 || mb >= sz {
1385 panic("PPC64 shift arg mb out of range")
1387 if me < 0 || me >= sz {
1388 panic("PPC64 shift arg me out of range")
1390 return int32(sh<<16 | mb<<8 | me)
1393 func GetPPC64Shiftsh(auxint int64) int64 {
1394 return int64(int8(auxint >> 16))
1397 func GetPPC64Shiftmb(auxint int64) int64 {
1398 return int64(int8(auxint >> 8))
1401 func GetPPC64Shiftme(auxint int64) int64 {
1402 return int64(int8(auxint))
1405 // Test if this value can encoded as a mask for a rlwinm like
1406 // operation. Masks can also extend from the msb and wrap to
1407 // the lsb too. That is, the valid masks are 32 bit strings
1408 // of the form: 0..01..10..0 or 1..10..01..1 or 1...1
1409 func isPPC64WordRotateMask(v64 int64) bool {
1410 // Isolate rightmost 1 (if none 0) and add.
1413 // Likewise, for the wrapping case.
1415 vpn := (vn & -vn) + vn
1416 return (v&vp == 0 || vn&vpn == 0) && v != 0
1419 // Compress mask and and shift into single value of the form
1420 // me | mb<<8 | rotate<<16 | nbits<<24 where me and mb can
1421 // be used to regenerate the input mask.
1422 func encodePPC64RotateMask(rotate, mask, nbits int64) int64 {
1423 var mb, me, mbn, men int
1425 // Determine boundaries and then decode them
1426 if mask == 0 || ^mask == 0 || rotate >= nbits {
1427 panic("Invalid PPC64 rotate mask")
1428 } else if nbits == 32 {
1429 mb = bits.LeadingZeros32(uint32(mask))
1430 me = 32 - bits.TrailingZeros32(uint32(mask))
1431 mbn = bits.LeadingZeros32(^uint32(mask))
1432 men = 32 - bits.TrailingZeros32(^uint32(mask))
1434 mb = bits.LeadingZeros64(uint64(mask))
1435 me = 64 - bits.TrailingZeros64(uint64(mask))
1436 mbn = bits.LeadingZeros64(^uint64(mask))
1437 men = 64 - bits.TrailingZeros64(^uint64(mask))
1439 // Check for a wrapping mask (e.g bits at 0 and 63)
1440 if mb == 0 && me == int(nbits) {
1441 // swap the inverted values
1445 return int64(me) | int64(mb<<8) | int64(rotate<<16) | int64(nbits<<24)
1448 // The inverse operation of encodePPC64RotateMask. The values returned as
1449 // mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask.
1450 func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) {
1451 auxint := uint64(sauxint)
1452 rotate = int64((auxint >> 16) & 0xFF)
1453 mb = int64((auxint >> 8) & 0xFF)
1454 me = int64((auxint >> 0) & 0xFF)
1455 nbits := int64((auxint >> 24) & 0xFF)
1456 mask = ((1 << uint(nbits-mb)) - 1) ^ ((1 << uint(nbits-me)) - 1)
1461 mask = uint64(uint32(mask))
1464 // Fixup ME to match ISA definition. The second argument to MASK(..,me)
1466 me = (me - 1) & (nbits - 1)
1470 // This verifies that the mask is a set of
1471 // consecutive bits including the least
1473 func isPPC64ValidShiftMask(v int64) bool {
1474 if (v != 0) && ((v+1)&v) == 0 {
1480 func getPPC64ShiftMaskLength(v int64) int64 {
1481 return int64(bits.Len64(uint64(v)))
1484 // Decompose a shift right into an equivalent rotate/mask,
1485 // and return mask & m.
1486 func mergePPC64RShiftMask(m, s, nbits int64) int64 {
1487 smask := uint64((1<<uint(nbits))-1) >> uint(s)
1488 return m & int64(smask)
1491 // Combine (ANDconst [m] (SRWconst [s])) into (RLWINM [y]) or return 0
1492 func mergePPC64AndSrwi(m, s int64) int64 {
1493 mask := mergePPC64RShiftMask(m, s, 32)
1494 if !isPPC64WordRotateMask(mask) {
1497 return encodePPC64RotateMask(32-s, mask, 32)
1500 // Test if a shift right feeding into a CLRLSLDI can be merged into RLWINM.
1501 // Return the encoded RLWINM constant, or 0 if they cannot be merged.
1502 func mergePPC64ClrlsldiSrw(sld, srw int64) int64 {
1503 mask_1 := uint64(0xFFFFFFFF >> uint(srw))
1504 // for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left.
1505 mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
1507 // Rewrite mask to apply after the final left shift.
1508 mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
1511 r_2 := GetPPC64Shiftsh(sld)
1512 r_3 := (r_1 + r_2) & 31 // This can wrap.
1514 if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
1517 return encodePPC64RotateMask(int64(r_3), int64(mask_3), 32)
1520 // Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM. Return
1521 // the encoded RLWINM constant, or 0 if they cannot be merged.
1522 func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 {
1523 r_1, _, _, mask_1 := DecodePPC64RotateMask(rlw)
1524 // for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left.
1525 mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
1527 // combine the masks, and adjust for the final left shift.
1528 mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(int64(sld)))
1529 r_2 := GetPPC64Shiftsh(int64(sld))
1530 r_3 := (r_1 + r_2) & 31 // This can wrap.
1532 // Verify the result is still a valid bitmask of <= 32 bits.
1533 if !isPPC64WordRotateMask(int64(mask_3)) || uint64(uint32(mask_3)) != mask_3 {
1536 return encodePPC64RotateMask(r_3, int64(mask_3), 32)
1539 // Compute the encoded RLWINM constant from combining (SLDconst [sld] (SRWconst [srw] x)),
1540 // or return 0 if they cannot be combined.
1541 func mergePPC64SldiSrw(sld, srw int64) int64 {
1542 if sld > srw || srw >= 32 {
1545 mask_r := uint32(0xFFFFFFFF) >> uint(srw)
1546 mask_l := uint32(0xFFFFFFFF) >> uint(sld)
1547 mask := (mask_r & mask_l) << uint(sld)
1548 return encodePPC64RotateMask((32-srw+sld)&31, int64(mask), 32)
1551 // Convenience function to rotate a 32 bit constant value by another constant.
1552 func rotateLeft32(v, rotate int64) int64 {
1553 return int64(bits.RotateLeft32(uint32(v), int(rotate)))
1556 // encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format.
1557 func armBFAuxInt(lsb, width int64) arm64BitField {
1558 if lsb < 0 || lsb > 63 {
1559 panic("ARM(64) bit field lsb constant out of range")
1561 if width < 1 || width > 64 {
1562 panic("ARM(64) bit field width constant out of range")
1564 return arm64BitField(width | lsb<<8)
1567 // returns the lsb part of the auxInt field of arm64 bitfield ops.
1568 func (bfc arm64BitField) getARM64BFlsb() int64 {
1569 return int64(uint64(bfc) >> 8)
1572 // returns the width part of the auxInt field of arm64 bitfield ops.
1573 func (bfc arm64BitField) getARM64BFwidth() int64 {
1574 return int64(bfc) & 0xff
1577 // checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask.
1578 func isARM64BFMask(lsb, mask, rshift int64) bool {
1579 shiftedMask := int64(uint64(mask) >> uint64(rshift))
1580 return shiftedMask != 0 && isPowerOfTwo64(shiftedMask+1) && nto(shiftedMask)+lsb < 64
1583 // returns the bitfield width of mask >> rshift for arm64 bitfield ops
1584 func arm64BFWidth(mask, rshift int64) int64 {
1585 shiftedMask := int64(uint64(mask) >> uint64(rshift))
1586 if shiftedMask == 0 {
1587 panic("ARM64 BF mask is zero")
1589 return nto(shiftedMask)
1592 // sizeof returns the size of t in bytes.
1593 // It will panic if t is not a *types.Type.
1594 func sizeof(t interface{}) int64 {
1595 return t.(*types.Type).Size()
1598 // registerizable reports whether t is a primitive type that fits in
1599 // a register. It assumes float64 values will always fit into registers
1600 // even if that isn't strictly true.
1601 func registerizable(b *Block, typ *types.Type) bool {
1602 if typ.IsPtrShaped() || typ.IsFloat() {
1605 if typ.IsInteger() {
1606 return typ.Size() <= b.Func.Config.RegSize
1611 // needRaceCleanup reports whether this call to racefuncenter/exit isn't needed.
1612 func needRaceCleanup(sym *AuxCall, v *Value) bool {
1617 if !isSameCall(sym, "runtime.racefuncenter") && !isSameCall(sym, "runtime.racefuncenterfp") && !isSameCall(sym, "runtime.racefuncexit") {
1620 for _, b := range f.Blocks {
1621 for _, v := range b.Values {
1623 case OpStaticCall, OpStaticLECall:
1624 // Check for racefuncenter/racefuncenterfp will encounter racefuncexit and vice versa.
1625 // Allow calls to panic*
1626 s := v.Aux.(*AuxCall).Fn.String()
1628 case "runtime.racefuncenter", "runtime.racefuncenterfp", "runtime.racefuncexit",
1629 "runtime.panicdivide", "runtime.panicwrap",
1630 "runtime.panicshift":
1633 // If we encountered any call, we need to keep racefunc*,
1634 // for accurate stacktraces.
1636 case OpPanicBounds, OpPanicExtend:
1637 // Note: these are panic generators that are ok (like the static calls above).
1638 case OpClosureCall, OpInterCall, OpClosureLECall, OpInterLECall:
1639 // We must keep the race functions if there are any other call types.
1644 if isSameCall(sym, "runtime.racefuncenter") {
1645 // TODO REGISTER ABI this needs to be cleaned up.
1646 // If we're removing racefuncenter, remove its argument as well.
1647 if v.Args[0].Op != OpStore {
1648 if v.Op == OpStaticLECall {
1649 // there is no store, yet.
1654 mem := v.Args[0].Args[2]
1655 v.Args[0].reset(OpCopy)
1656 v.Args[0].AddArg(mem)
1661 // symIsRO reports whether sym is a read-only global.
1662 func symIsRO(sym interface{}) bool {
1663 lsym := sym.(*obj.LSym)
1664 return lsym.Type == objabi.SRODATA && len(lsym.R) == 0
1667 // symIsROZero reports whether sym is a read-only global whose data contains all zeros.
1668 func symIsROZero(sym Sym) bool {
1669 lsym := sym.(*obj.LSym)
1670 if lsym.Type != objabi.SRODATA || len(lsym.R) != 0 {
1673 for _, b := range lsym.P {
1681 // read8 reads one byte from the read-only global sym at offset off.
1682 func read8(sym interface{}, off int64) uint8 {
1683 lsym := sym.(*obj.LSym)
1684 if off >= int64(len(lsym.P)) || off < 0 {
1685 // Invalid index into the global sym.
1686 // This can happen in dead code, so we don't want to panic.
1687 // Just return any value, it will eventually get ignored.
1694 // read16 reads two bytes from the read-only global sym at offset off.
1695 func read16(sym interface{}, off int64, byteorder binary.ByteOrder) uint16 {
1696 lsym := sym.(*obj.LSym)
1697 // lsym.P is written lazily.
1698 // Bytes requested after the end of lsym.P are 0.
1700 if 0 <= off && off < int64(len(lsym.P)) {
1703 buf := make([]byte, 2)
1705 return byteorder.Uint16(buf)
1708 // read32 reads four bytes from the read-only global sym at offset off.
1709 func read32(sym interface{}, off int64, byteorder binary.ByteOrder) uint32 {
1710 lsym := sym.(*obj.LSym)
1712 if 0 <= off && off < int64(len(lsym.P)) {
1715 buf := make([]byte, 4)
1717 return byteorder.Uint32(buf)
1720 // read64 reads eight bytes from the read-only global sym at offset off.
1721 func read64(sym interface{}, off int64, byteorder binary.ByteOrder) uint64 {
1722 lsym := sym.(*obj.LSym)
1724 if 0 <= off && off < int64(len(lsym.P)) {
1727 buf := make([]byte, 8)
1729 return byteorder.Uint64(buf)
1732 // sequentialAddresses reports true if it can prove that x + n == y
1733 func sequentialAddresses(x, y *Value, n int64) bool {
1734 if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil &&
1735 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1736 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1739 if x.Op == Op386LEAL1 && y.Op == Op386LEAL1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
1740 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1741 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1744 if x.Op == OpAMD64ADDQ && y.Op == OpAMD64LEAQ1 && y.AuxInt == n && y.Aux == nil &&
1745 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1746 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1749 if x.Op == OpAMD64LEAQ1 && y.Op == OpAMD64LEAQ1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
1750 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1751 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1757 // flagConstant represents the result of a compile-time comparison.
1758 // The sense of these flags does not necessarily represent the hardware's notion
1759 // of a flags register - these are just a compile-time construct.
1760 // We happen to match the semantics to those of arm/arm64.
1761 // Note that these semantics differ from x86: the carry flag has the opposite
1762 // sense on a subtraction!
1763 // On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C.
1764 // On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C.
1765 // (because it does x + ^y + C).
1766 // See https://en.wikipedia.org/wiki/Carry_flag#Vs._borrow_flag
1767 type flagConstant uint8
1769 // N reports whether the result of an operation is negative (high bit set).
1770 func (fc flagConstant) N() bool {
1774 // Z reports whether the result of an operation is 0.
1775 func (fc flagConstant) Z() bool {
1779 // C reports whether an unsigned add overflowed (carry), or an
1780 // unsigned subtract did not underflow (borrow).
1781 func (fc flagConstant) C() bool {
1785 // V reports whether a signed operation overflowed or underflowed.
1786 func (fc flagConstant) V() bool {
1790 func (fc flagConstant) eq() bool {
1793 func (fc flagConstant) ne() bool {
1796 func (fc flagConstant) lt() bool {
1797 return fc.N() != fc.V()
1799 func (fc flagConstant) le() bool {
1800 return fc.Z() || fc.lt()
1802 func (fc flagConstant) gt() bool {
1803 return !fc.Z() && fc.ge()
1805 func (fc flagConstant) ge() bool {
1806 return fc.N() == fc.V()
1808 func (fc flagConstant) ult() bool {
1811 func (fc flagConstant) ule() bool {
1812 return fc.Z() || fc.ult()
1814 func (fc flagConstant) ugt() bool {
1815 return !fc.Z() && fc.uge()
1817 func (fc flagConstant) uge() bool {
1821 func (fc flagConstant) ltNoov() bool {
1822 return fc.lt() && !fc.V()
1824 func (fc flagConstant) leNoov() bool {
1825 return fc.le() && !fc.V()
1827 func (fc flagConstant) gtNoov() bool {
1828 return fc.gt() && !fc.V()
1830 func (fc flagConstant) geNoov() bool {
1831 return fc.ge() && !fc.V()
1834 func (fc flagConstant) String() string {
1835 return fmt.Sprintf("N=%v,Z=%v,C=%v,V=%v", fc.N(), fc.Z(), fc.C(), fc.V())
1838 type flagConstantBuilder struct {
1845 func (fcs flagConstantBuilder) encode() flagConstant {
1862 // Note: addFlags(x,y) != subFlags(x,-y) in some situations:
1863 // - the results of the C flag are different
1864 // - the results of the V flag when y==minint are different
1866 // addFlags64 returns the flags that would be set from computing x+y.
1867 func addFlags64(x, y int64) flagConstant {
1868 var fcb flagConstantBuilder
1871 fcb.C = uint64(x+y) < uint64(x)
1872 fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
1876 // subFlags64 returns the flags that would be set from computing x-y.
1877 func subFlags64(x, y int64) flagConstant {
1878 var fcb flagConstantBuilder
1881 fcb.C = uint64(y) <= uint64(x) // This code follows the arm carry flag model.
1882 fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
1886 // addFlags32 returns the flags that would be set from computing x+y.
1887 func addFlags32(x, y int32) flagConstant {
1888 var fcb flagConstantBuilder
1891 fcb.C = uint32(x+y) < uint32(x)
1892 fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
1896 // subFlags32 returns the flags that would be set from computing x-y.
1897 func subFlags32(x, y int32) flagConstant {
1898 var fcb flagConstantBuilder
1901 fcb.C = uint32(y) <= uint32(x) // This code follows the arm carry flag model.
1902 fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
1906 // logicFlags64 returns flags set to the sign/zeroness of x.
1907 // C and V are set to false.
1908 func logicFlags64(x int64) flagConstant {
1909 var fcb flagConstantBuilder
1915 // logicFlags32 returns flags set to the sign/zeroness of x.
1916 // C and V are set to false.
1917 func logicFlags32(x int32) flagConstant {
1918 var fcb flagConstantBuilder