]> Cypherpunks.ru repositories - gostls13.git/commitdiff
cmd/compile: Add shiftIsBounded check for logic shifts of arm64
authorruinan <ruinan.sun@arm.com>
Fri, 1 Jul 2022 06:25:12 +0000 (06:25 +0000)
committerKeith Randall <khr@google.com>
Wed, 7 Sep 2022 20:10:13 +0000 (20:10 +0000)
This CL adds shiftIsBounded checks for the Lsh* and Rsh* rules in arm64.
There is no need to check the shift value again with CMP + CSEL when the
shift value is valid.

Change-Id: I54620de64f02a1b5a11089add237248ae2de01b4
Reviewed-on: https://go-review.googlesource.com/c/go/+/417714
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Heschi Kreinick <heschi@google.com>
src/cmd/compile/internal/ssa/gen/ARM64.rules
src/cmd/compile/internal/ssa/rewriteARM64.go
test/codegen/shift.go

index 0d319609aaee7598c04a3a4440ce7fce6217d0c9..a70600918b323ce1102411765a6ab6704dc56dad 100644 (file)
 // we compare to 64 to ensure Go semantics for large shifts
 // Rules about rotates with non-const shift are based on the following rules,
 // if the following rules change, please also modify the rules based on them.
-(Lsh64x64 <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
-(Lsh64x32 <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
-(Lsh64x16 <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
-(Lsh64x8  <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
-
-(Lsh32x64 <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
-(Lsh32x32 <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
-(Lsh32x16 <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
-(Lsh32x8  <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
-
-(Lsh16x64 <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
-(Lsh16x32 <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
-(Lsh16x16 <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
-(Lsh16x8  <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
-
-(Lsh8x64 <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
-(Lsh8x32 <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
-(Lsh8x16 <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
-(Lsh8x8  <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
-
-(Rsh64Ux64 <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
-(Rsh64Ux32 <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
-(Rsh64Ux16 <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
-(Rsh64Ux8  <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> x (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
-
-(Rsh32Ux64 <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt32to64 x) y) (Const64 <t> [0]) (CMPconst [64] y))
-(Rsh32Ux32 <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt32to64 x) (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
-(Rsh32Ux16 <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt32to64 x) (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
-(Rsh32Ux8  <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt32to64 x) (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
-
-(Rsh16Ux64 <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt16to64 x) y) (Const64 <t> [0]) (CMPconst [64] y))
-(Rsh16Ux32 <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt16to64 x) (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
-(Rsh16Ux16 <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt16to64 x) (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
-(Rsh16Ux8  <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt16to64 x) (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
-
-(Rsh8Ux64 <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt8to64 x) y) (Const64 <t> [0]) (CMPconst [64] y))
-(Rsh8Ux32 <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt8to64 x) (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
-(Rsh8Ux16 <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt8to64 x) (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
-(Rsh8Ux8  <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt8to64 x) (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
-
-(Rsh64x64 x y) => (SRA x (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
-(Rsh64x32 x y) => (SRA x (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt32to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
-(Rsh64x16 x y) => (SRA x (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt16to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
-(Rsh64x8  x y) => (SRA x (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt8to64  y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64  y))))
-
-(Rsh32x64 x y) => (SRA (SignExt32to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
-(Rsh32x32 x y) => (SRA (SignExt32to64 x) (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt32to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
-(Rsh32x16 x y) => (SRA (SignExt32to64 x) (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt16to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
-(Rsh32x8  x y) => (SRA (SignExt32to64 x) (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt8to64  y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64  y))))
-
-(Rsh16x64 x y) => (SRA (SignExt16to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
-(Rsh16x32 x y) => (SRA (SignExt16to64 x) (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt32to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
-(Rsh16x16 x y) => (SRA (SignExt16to64 x) (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt16to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
-(Rsh16x8  x y) => (SRA (SignExt16to64 x) (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt8to64  y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64  y))))
-
-(Rsh8x64 x y) => (SRA (SignExt8to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
-(Rsh8x32 x y) => (SRA (SignExt8to64 x) (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt32to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
-(Rsh8x16 x y) => (SRA (SignExt8to64 x) (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt16to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
-(Rsh8x8  x y) => (SRA (SignExt8to64 x) (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt8to64  y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64  y))))
+
+// check shiftIsBounded first, if shift value is proved to be valid then we
+// can do the shift directly.
+// left shift
+(Lsh(64|32|16|8)x64 <t> x y) && shiftIsBounded(v) => (SLL <t> x y)
+(Lsh(64|32|16|8)x32 <t> x y) && shiftIsBounded(v) => (SLL <t> x y)
+(Lsh(64|32|16|8)x16 <t> x y) && shiftIsBounded(v) => (SLL <t> x y)
+(Lsh(64|32|16|8)x8 <t> x y) && shiftIsBounded(v) => (SLL <t> x y)
+
+// signed right shift
+(Rsh64x(64|32|16|8) <t> x y) && shiftIsBounded(v) => (SRA <t> x y)
+(Rsh32x(64|32|16|8) <t> x y) && shiftIsBounded(v) => (SRA <t> (SignExt32to64 x) y)
+(Rsh16x(64|32|16|8) <t> x y) && shiftIsBounded(v) => (SRA <t> (SignExt16to64 x) y)
+(Rsh8x(64|32|16|8)  <t> x y) && shiftIsBounded(v) => (SRA <t> (SignExt8to64 x) y)
+
+// unsigned right shift
+(Rsh64Ux(64|32|16|8) <t> x y) && shiftIsBounded(v) => (SRL <t> x y)
+(Rsh32Ux(64|32|16|8) <t> x y) && shiftIsBounded(v) => (SRL <t> (ZeroExt32to64 x) y)
+(Rsh16Ux(64|32|16|8) <t> x y) && shiftIsBounded(v) => (SRL <t> (ZeroExt16to64 x) y)
+(Rsh8Ux(64|32|16|8)  <t> x y) && shiftIsBounded(v) => (SRL <t> (ZeroExt8to64 x) y)
+
+// shift value may be out of range, use CMP + CSEL instead
+(Lsh64x64 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
+(Lsh64x32 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
+(Lsh64x16 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
+(Lsh64x8  <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
+
+(Lsh32x64 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
+(Lsh32x32 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
+(Lsh32x16 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
+(Lsh32x8  <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
+
+(Lsh16x64 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
+(Lsh16x32 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
+(Lsh16x16 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
+(Lsh16x8  <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
+
+(Lsh8x64 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
+(Lsh8x32 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
+(Lsh8x16 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
+(Lsh8x8  <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
+
+(Rsh64Ux64 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
+(Rsh64Ux32 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
+(Rsh64Ux16 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
+(Rsh64Ux8  <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
+
+(Rsh32Ux64 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt32to64 x) y) (Const64 <t> [0]) (CMPconst [64] y))
+(Rsh32Ux32 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt32to64 x) y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
+(Rsh32Ux16 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt32to64 x) y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
+(Rsh32Ux8  <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt32to64 x) y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
+
+(Rsh16Ux64 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt16to64 x) y) (Const64 <t> [0]) (CMPconst [64] y))
+(Rsh16Ux32 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt16to64 x) y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
+(Rsh16Ux16 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt16to64 x) y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
+(Rsh16Ux8  <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt16to64 x) y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
+
+(Rsh8Ux64 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt8to64 x) y) (Const64 <t> [0]) (CMPconst [64] y))
+(Rsh8Ux32 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt8to64 x) y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
+(Rsh8Ux16 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt8to64 x) y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
+(Rsh8Ux8  <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt8to64 x) y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
+
+(Rsh64x64 x y) && !shiftIsBounded(v) => (SRA x (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
+(Rsh64x32 x y) && !shiftIsBounded(v) => (SRA x (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
+(Rsh64x16 x y) && !shiftIsBounded(v) => (SRA x (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
+(Rsh64x8  x y) && !shiftIsBounded(v) => (SRA x (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64  y))))
+
+(Rsh32x64 x y) && !shiftIsBounded(v) => (SRA (SignExt32to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
+(Rsh32x32 x y) && !shiftIsBounded(v) => (SRA (SignExt32to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
+(Rsh32x16 x y) && !shiftIsBounded(v) => (SRA (SignExt32to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
+(Rsh32x8  x y) && !shiftIsBounded(v) => (SRA (SignExt32to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64  y))))
+
+(Rsh16x64 x y) && !shiftIsBounded(v) => (SRA (SignExt16to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
+(Rsh16x32 x y) && !shiftIsBounded(v) => (SRA (SignExt16to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
+(Rsh16x16 x y) && !shiftIsBounded(v) => (SRA (SignExt16to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
+(Rsh16x8  x y) && !shiftIsBounded(v) => (SRA (SignExt16to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64  y))))
+
+(Rsh8x64 x y) && !shiftIsBounded(v) => (SRA (SignExt8to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
+(Rsh8x32 x y) && !shiftIsBounded(v) => (SRA (SignExt8to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
+(Rsh8x16 x y) && !shiftIsBounded(v) => (SRA (SignExt8to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
+(Rsh8x8  x y) && !shiftIsBounded(v) => (SRA (SignExt8to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64  y))))
 
 // constants
 (Const(64|32|16|8) [val]) => (MOVDconst [int64(val)])
index 097d1772ab3b2dd3c8b9848c10c07068720f40bf..d39f69c22fcd92011cd370af6b3fbd399fdccc7a 100644 (file)
@@ -24229,25 +24229,45 @@ func rewriteValueARM64_OpLsh16x16(v *Value) bool {
        b := v.Block
        typ := &b.Func.Config.Types
        // match: (Lsh16x16 <t> x y)
-       // result: (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
+       // cond: shiftIsBounded(v)
+       // result: (SLL <t> x y)
        for {
                t := v.Type
                x := v_0
                y := v_1
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpARM64SLL)
+               v.Type = t
+               v.AddArg2(x, y)
+               return true
+       }
+       // match: (Lsh16x16 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
+       for {
+               t := v.Type
+               x := v_0
+               y := v_1
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
                v.reset(OpARM64CSEL)
                v.AuxInt = opToAuxInt(OpARM64LessThanU)
                v0 := b.NewValue0(v.Pos, OpARM64SLL, t)
-               v1 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
-               v1.AddArg(y)
-               v0.AddArg2(x, v1)
-               v2 := b.NewValue0(v.Pos, OpConst64, t)
-               v2.AuxInt = int64ToAuxInt(0)
-               v3 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
-               v3.AuxInt = int64ToAuxInt(64)
-               v3.AddArg(v1)
-               v.AddArg3(v0, v2, v3)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpConst64, t)
+               v1.AuxInt = int64ToAuxInt(0)
+               v2 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
+               v2.AuxInt = int64ToAuxInt(64)
+               v3 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
+               v3.AddArg(y)
+               v2.AddArg(v3)
+               v.AddArg3(v0, v1, v2)
                return true
        }
+       return false
 }
 func rewriteValueARM64_OpLsh16x32(v *Value) bool {
        v_1 := v.Args[1]
@@ -24255,36 +24275,75 @@ func rewriteValueARM64_OpLsh16x32(v *Value) bool {
        b := v.Block
        typ := &b.Func.Config.Types
        // match: (Lsh16x32 <t> x y)
-       // result: (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
+       // cond: shiftIsBounded(v)
+       // result: (SLL <t> x y)
        for {
                t := v.Type
                x := v_0
                y := v_1
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpARM64SLL)
+               v.Type = t
+               v.AddArg2(x, y)
+               return true
+       }
+       // match: (Lsh16x32 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
+       for {
+               t := v.Type
+               x := v_0
+               y := v_1
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
                v.reset(OpARM64CSEL)
                v.AuxInt = opToAuxInt(OpARM64LessThanU)
                v0 := b.NewValue0(v.Pos, OpARM64SLL, t)
-               v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
-               v1.AddArg(y)
-               v0.AddArg2(x, v1)
-               v2 := b.NewValue0(v.Pos, OpConst64, t)
-               v2.AuxInt = int64ToAuxInt(0)
-               v3 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
-               v3.AuxInt = int64ToAuxInt(64)
-               v3.AddArg(v1)
-               v.AddArg3(v0, v2, v3)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpConst64, t)
+               v1.AuxInt = int64ToAuxInt(0)
+               v2 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
+               v2.AuxInt = int64ToAuxInt(64)
+               v3 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
+               v3.AddArg(y)
+               v2.AddArg(v3)
+               v.AddArg3(v0, v1, v2)
                return true
        }
+       return false
 }
 func rewriteValueARM64_OpLsh16x64(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
        // match: (Lsh16x64 <t> x y)
+       // cond: shiftIsBounded(v)
+       // result: (SLL <t> x y)
+       for {
+               t := v.Type
+               x := v_0
+               y := v_1
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpARM64SLL)
+               v.Type = t
+               v.AddArg2(x, y)
+               return true
+       }
+       // match: (Lsh16x64 <t> x y)
+       // cond: !shiftIsBounded(v)
        // result: (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
        for {
                t := v.Type
                x := v_0
                y := v_1
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
                v.reset(OpARM64CSEL)
                v.AuxInt = opToAuxInt(OpARM64LessThanU)
                v0 := b.NewValue0(v.Pos, OpARM64SLL, t)
@@ -24297,6 +24356,7 @@ func rewriteValueARM64_OpLsh16x64(v *Value) bool {
                v.AddArg3(v0, v1, v2)
                return true
        }
+       return false
 }
 func rewriteValueARM64_OpLsh16x8(v *Value) bool {
        v_1 := v.Args[1]
@@ -24304,25 +24364,45 @@ func rewriteValueARM64_OpLsh16x8(v *Value) bool {
        b := v.Block
        typ := &b.Func.Config.Types
        // match: (Lsh16x8 <t> x y)
-       // result: (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt8to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y)))
+       // cond: shiftIsBounded(v)
+       // result: (SLL <t> x y)
        for {
                t := v.Type
                x := v_0
                y := v_1
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpARM64SLL)
+               v.Type = t
+               v.AddArg2(x, y)
+               return true
+       }
+       // match: (Lsh16x8 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y)))
+       for {
+               t := v.Type
+               x := v_0
+               y := v_1
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
                v.reset(OpARM64CSEL)
                v.AuxInt = opToAuxInt(OpARM64LessThanU)
                v0 := b.NewValue0(v.Pos, OpARM64SLL, t)
-               v1 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
-               v1.AddArg(y)
-               v0.AddArg2(x, v1)
-               v2 := b.NewValue0(v.Pos, OpConst64, t)
-               v2.AuxInt = int64ToAuxInt(0)
-               v3 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
-               v3.AuxInt = int64ToAuxInt(64)
-               v3.AddArg(v1)
-               v.AddArg3(v0, v2, v3)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpConst64, t)
+               v1.AuxInt = int64ToAuxInt(0)
+               v2 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
+               v2.AuxInt = int64ToAuxInt(64)
+               v3 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
+               v3.AddArg(y)
+               v2.AddArg(v3)
+               v.AddArg3(v0, v1, v2)
                return true
        }
+       return false
 }
 func rewriteValueARM64_OpLsh32x16(v *Value) bool {
        v_1 := v.Args[1]
@@ -24330,25 +24410,45 @@ func rewriteValueARM64_OpLsh32x16(v *Value) bool {
        b := v.Block
        typ := &b.Func.Config.Types
        // match: (Lsh32x16 <t> x y)
-       // result: (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
+       // cond: shiftIsBounded(v)
+       // result: (SLL <t> x y)
        for {
                t := v.Type
                x := v_0
                y := v_1
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpARM64SLL)
+               v.Type = t
+               v.AddArg2(x, y)
+               return true
+       }
+       // match: (Lsh32x16 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
+       for {
+               t := v.Type
+               x := v_0
+               y := v_1
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
                v.reset(OpARM64CSEL)
                v.AuxInt = opToAuxInt(OpARM64LessThanU)
                v0 := b.NewValue0(v.Pos, OpARM64SLL, t)
-               v1 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
-               v1.AddArg(y)
-               v0.AddArg2(x, v1)
-               v2 := b.NewValue0(v.Pos, OpConst64, t)
-               v2.AuxInt = int64ToAuxInt(0)
-               v3 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
-               v3.AuxInt = int64ToAuxInt(64)
-               v3.AddArg(v1)
-               v.AddArg3(v0, v2, v3)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpConst64, t)
+               v1.AuxInt = int64ToAuxInt(0)
+               v2 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
+               v2.AuxInt = int64ToAuxInt(64)
+               v3 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
+               v3.AddArg(y)
+               v2.AddArg(v3)
+               v.AddArg3(v0, v1, v2)
                return true
        }
+       return false
 }
 func rewriteValueARM64_OpLsh32x32(v *Value) bool {
        v_1 := v.Args[1]
@@ -24356,36 +24456,75 @@ func rewriteValueARM64_OpLsh32x32(v *Value) bool {
        b := v.Block
        typ := &b.Func.Config.Types
        // match: (Lsh32x32 <t> x y)
-       // result: (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
+       // cond: shiftIsBounded(v)
+       // result: (SLL <t> x y)
        for {
                t := v.Type
                x := v_0
                y := v_1
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpARM64SLL)
+               v.Type = t
+               v.AddArg2(x, y)
+               return true
+       }
+       // match: (Lsh32x32 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
+       for {
+               t := v.Type
+               x := v_0
+               y := v_1
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
                v.reset(OpARM64CSEL)
                v.AuxInt = opToAuxInt(OpARM64LessThanU)
                v0 := b.NewValue0(v.Pos, OpARM64SLL, t)
-               v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
-               v1.AddArg(y)
-               v0.AddArg2(x, v1)
-               v2 := b.NewValue0(v.Pos, OpConst64, t)
-               v2.AuxInt = int64ToAuxInt(0)
-               v3 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
-               v3.AuxInt = int64ToAuxInt(64)
-               v3.AddArg(v1)
-               v.AddArg3(v0, v2, v3)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpConst64, t)
+               v1.AuxInt = int64ToAuxInt(0)
+               v2 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
+               v2.AuxInt = int64ToAuxInt(64)
+               v3 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
+               v3.AddArg(y)
+               v2.AddArg(v3)
+               v.AddArg3(v0, v1, v2)
                return true
        }
+       return false
 }
 func rewriteValueARM64_OpLsh32x64(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
        // match: (Lsh32x64 <t> x y)
+       // cond: shiftIsBounded(v)
+       // result: (SLL <t> x y)
+       for {
+               t := v.Type
+               x := v_0
+               y := v_1
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpARM64SLL)
+               v.Type = t
+               v.AddArg2(x, y)
+               return true
+       }
+       // match: (Lsh32x64 <t> x y)
+       // cond: !shiftIsBounded(v)
        // result: (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
        for {
                t := v.Type
                x := v_0
                y := v_1
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
                v.reset(OpARM64CSEL)
                v.AuxInt = opToAuxInt(OpARM64LessThanU)
                v0 := b.NewValue0(v.Pos, OpARM64SLL, t)
@@ -24398,6 +24537,7 @@ func rewriteValueARM64_OpLsh32x64(v *Value) bool {
                v.AddArg3(v0, v1, v2)
                return true
        }
+       return false
 }
 func rewriteValueARM64_OpLsh32x8(v *Value) bool {
        v_1 := v.Args[1]
@@ -24405,25 +24545,45 @@ func rewriteValueARM64_OpLsh32x8(v *Value) bool {
        b := v.Block
        typ := &b.Func.Config.Types
        // match: (Lsh32x8 <t> x y)
-       // result: (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt8to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y)))
+       // cond: shiftIsBounded(v)
+       // result: (SLL <t> x y)
+       for {
+               t := v.Type
+               x := v_0
+               y := v_1
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpARM64SLL)
+               v.Type = t
+               v.AddArg2(x, y)
+               return true
+       }
+       // match: (Lsh32x8 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y)))
        for {
                t := v.Type
                x := v_0
                y := v_1
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
                v.reset(OpARM64CSEL)
                v.AuxInt = opToAuxInt(OpARM64LessThanU)
                v0 := b.NewValue0(v.Pos, OpARM64SLL, t)
-               v1 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
-               v1.AddArg(y)
-               v0.AddArg2(x, v1)
-               v2 := b.NewValue0(v.Pos, OpConst64, t)
-               v2.AuxInt = int64ToAuxInt(0)
-               v3 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
-               v3.AuxInt = int64ToAuxInt(64)
-               v3.AddArg(v1)
-               v.AddArg3(v0, v2, v3)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpConst64, t)
+               v1.AuxInt = int64ToAuxInt(0)
+               v2 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
+               v2.AuxInt = int64ToAuxInt(64)
+               v3 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
+               v3.AddArg(y)
+               v2.AddArg(v3)
+               v.AddArg3(v0, v1, v2)
                return true
        }
+       return false
 }
 func rewriteValueARM64_OpLsh64x16(v *Value) bool {
        v_1 := v.Args[1]
@@ -24431,25 +24591,45 @@ func rewriteValueARM64_OpLsh64x16(v *Value) bool {
        b := v.Block
        typ := &b.Func.Config.Types
        // match: (Lsh64x16 <t> x y)
-       // result: (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
+       // cond: shiftIsBounded(v)
+       // result: (SLL <t> x y)
+       for {
+               t := v.Type
+               x := v_0
+               y := v_1
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpARM64SLL)
+               v.Type = t
+               v.AddArg2(x, y)
+               return true
+       }
+       // match: (Lsh64x16 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
        for {
                t := v.Type
                x := v_0
                y := v_1
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
                v.reset(OpARM64CSEL)
                v.AuxInt = opToAuxInt(OpARM64LessThanU)
                v0 := b.NewValue0(v.Pos, OpARM64SLL, t)
-               v1 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
-               v1.AddArg(y)
-               v0.AddArg2(x, v1)
-               v2 := b.NewValue0(v.Pos, OpConst64, t)
-               v2.AuxInt = int64ToAuxInt(0)
-               v3 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
-               v3.AuxInt = int64ToAuxInt(64)
-               v3.AddArg(v1)
-               v.AddArg3(v0, v2, v3)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpConst64, t)
+               v1.AuxInt = int64ToAuxInt(0)
+               v2 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
+               v2.AuxInt = int64ToAuxInt(64)
+               v3 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
+               v3.AddArg(y)
+               v2.AddArg(v3)
+               v.AddArg3(v0, v1, v2)
                return true
        }
+       return false
 }
 func rewriteValueARM64_OpLsh64x32(v *Value) bool {
        v_1 := v.Args[1]
@@ -24457,36 +24637,75 @@ func rewriteValueARM64_OpLsh64x32(v *Value) bool {
        b := v.Block
        typ := &b.Func.Config.Types
        // match: (Lsh64x32 <t> x y)
-       // result: (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
+       // cond: shiftIsBounded(v)
+       // result: (SLL <t> x y)
        for {
                t := v.Type
                x := v_0
                y := v_1
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpARM64SLL)
+               v.Type = t
+               v.AddArg2(x, y)
+               return true
+       }
+       // match: (Lsh64x32 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
+       for {
+               t := v.Type
+               x := v_0
+               y := v_1
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
                v.reset(OpARM64CSEL)
                v.AuxInt = opToAuxInt(OpARM64LessThanU)
                v0 := b.NewValue0(v.Pos, OpARM64SLL, t)
-               v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
-               v1.AddArg(y)
-               v0.AddArg2(x, v1)
-               v2 := b.NewValue0(v.Pos, OpConst64, t)
-               v2.AuxInt = int64ToAuxInt(0)
-               v3 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
-               v3.AuxInt = int64ToAuxInt(64)
-               v3.AddArg(v1)
-               v.AddArg3(v0, v2, v3)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpConst64, t)
+               v1.AuxInt = int64ToAuxInt(0)
+               v2 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
+               v2.AuxInt = int64ToAuxInt(64)
+               v3 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
+               v3.AddArg(y)
+               v2.AddArg(v3)
+               v.AddArg3(v0, v1, v2)
                return true
        }
+       return false
 }
 func rewriteValueARM64_OpLsh64x64(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
        // match: (Lsh64x64 <t> x y)
+       // cond: shiftIsBounded(v)
+       // result: (SLL <t> x y)
+       for {
+               t := v.Type
+               x := v_0
+               y := v_1
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpARM64SLL)
+               v.Type = t
+               v.AddArg2(x, y)
+               return true
+       }
+       // match: (Lsh64x64 <t> x y)
+       // cond: !shiftIsBounded(v)
        // result: (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
        for {
                t := v.Type
                x := v_0
                y := v_1
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
                v.reset(OpARM64CSEL)
                v.AuxInt = opToAuxInt(OpARM64LessThanU)
                v0 := b.NewValue0(v.Pos, OpARM64SLL, t)
@@ -24499,6 +24718,7 @@ func rewriteValueARM64_OpLsh64x64(v *Value) bool {
                v.AddArg3(v0, v1, v2)
                return true
        }
+       return false
 }
 func rewriteValueARM64_OpLsh64x8(v *Value) bool {
        v_1 := v.Args[1]
@@ -24506,25 +24726,45 @@ func rewriteValueARM64_OpLsh64x8(v *Value) bool {
        b := v.Block
        typ := &b.Func.Config.Types
        // match: (Lsh64x8 <t> x y)
-       // result: (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt8to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y)))
+       // cond: shiftIsBounded(v)
+       // result: (SLL <t> x y)
        for {
                t := v.Type
                x := v_0
                y := v_1
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpARM64SLL)
+               v.Type = t
+               v.AddArg2(x, y)
+               return true
+       }
+       // match: (Lsh64x8 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y)))
+       for {
+               t := v.Type
+               x := v_0
+               y := v_1
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
                v.reset(OpARM64CSEL)
                v.AuxInt = opToAuxInt(OpARM64LessThanU)
                v0 := b.NewValue0(v.Pos, OpARM64SLL, t)
-               v1 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
-               v1.AddArg(y)
-               v0.AddArg2(x, v1)
-               v2 := b.NewValue0(v.Pos, OpConst64, t)
-               v2.AuxInt = int64ToAuxInt(0)
-               v3 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
-               v3.AuxInt = int64ToAuxInt(64)
-               v3.AddArg(v1)
-               v.AddArg3(v0, v2, v3)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpConst64, t)
+               v1.AuxInt = int64ToAuxInt(0)
+               v2 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
+               v2.AuxInt = int64ToAuxInt(64)
+               v3 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
+               v3.AddArg(y)
+               v2.AddArg(v3)
+               v.AddArg3(v0, v1, v2)
                return true
        }
+       return false
 }
 func rewriteValueARM64_OpLsh8x16(v *Value) bool {
        v_1 := v.Args[1]
@@ -24532,25 +24772,45 @@ func rewriteValueARM64_OpLsh8x16(v *Value) bool {
        b := v.Block
        typ := &b.Func.Config.Types
        // match: (Lsh8x16 <t> x y)
-       // result: (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
+       // cond: shiftIsBounded(v)
+       // result: (SLL <t> x y)
+       for {
+               t := v.Type
+               x := v_0
+               y := v_1
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpARM64SLL)
+               v.Type = t
+               v.AddArg2(x, y)
+               return true
+       }
+       // match: (Lsh8x16 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
        for {
                t := v.Type
                x := v_0
                y := v_1
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
                v.reset(OpARM64CSEL)
                v.AuxInt = opToAuxInt(OpARM64LessThanU)
                v0 := b.NewValue0(v.Pos, OpARM64SLL, t)
-               v1 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
-               v1.AddArg(y)
-               v0.AddArg2(x, v1)
-               v2 := b.NewValue0(v.Pos, OpConst64, t)
-               v2.AuxInt = int64ToAuxInt(0)
-               v3 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
-               v3.AuxInt = int64ToAuxInt(64)
-               v3.AddArg(v1)
-               v.AddArg3(v0, v2, v3)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpConst64, t)
+               v1.AuxInt = int64ToAuxInt(0)
+               v2 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
+               v2.AuxInt = int64ToAuxInt(64)
+               v3 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
+               v3.AddArg(y)
+               v2.AddArg(v3)
+               v.AddArg3(v0, v1, v2)
                return true
        }
+       return false
 }
 func rewriteValueARM64_OpLsh8x32(v *Value) bool {
        v_1 := v.Args[1]
@@ -24558,36 +24818,75 @@ func rewriteValueARM64_OpLsh8x32(v *Value) bool {
        b := v.Block
        typ := &b.Func.Config.Types
        // match: (Lsh8x32 <t> x y)
-       // result: (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
+       // cond: shiftIsBounded(v)
+       // result: (SLL <t> x y)
+       for {
+               t := v.Type
+               x := v_0
+               y := v_1
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpARM64SLL)
+               v.Type = t
+               v.AddArg2(x, y)
+               return true
+       }
+       // match: (Lsh8x32 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
        for {
                t := v.Type
                x := v_0
                y := v_1
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
                v.reset(OpARM64CSEL)
                v.AuxInt = opToAuxInt(OpARM64LessThanU)
                v0 := b.NewValue0(v.Pos, OpARM64SLL, t)
-               v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
-               v1.AddArg(y)
-               v0.AddArg2(x, v1)
-               v2 := b.NewValue0(v.Pos, OpConst64, t)
-               v2.AuxInt = int64ToAuxInt(0)
-               v3 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
-               v3.AuxInt = int64ToAuxInt(64)
-               v3.AddArg(v1)
-               v.AddArg3(v0, v2, v3)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpConst64, t)
+               v1.AuxInt = int64ToAuxInt(0)
+               v2 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
+               v2.AuxInt = int64ToAuxInt(64)
+               v3 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
+               v3.AddArg(y)
+               v2.AddArg(v3)
+               v.AddArg3(v0, v1, v2)
                return true
        }
+       return false
 }
 func rewriteValueARM64_OpLsh8x64(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
        // match: (Lsh8x64 <t> x y)
+       // cond: shiftIsBounded(v)
+       // result: (SLL <t> x y)
+       for {
+               t := v.Type
+               x := v_0
+               y := v_1
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpARM64SLL)
+               v.Type = t
+               v.AddArg2(x, y)
+               return true
+       }
+       // match: (Lsh8x64 <t> x y)
+       // cond: !shiftIsBounded(v)
        // result: (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
        for {
                t := v.Type
                x := v_0
                y := v_1
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
                v.reset(OpARM64CSEL)
                v.AuxInt = opToAuxInt(OpARM64LessThanU)
                v0 := b.NewValue0(v.Pos, OpARM64SLL, t)
@@ -24600,6 +24899,7 @@ func rewriteValueARM64_OpLsh8x64(v *Value) bool {
                v.AddArg3(v0, v1, v2)
                return true
        }
+       return false
 }
 func rewriteValueARM64_OpLsh8x8(v *Value) bool {
        v_1 := v.Args[1]
@@ -24607,25 +24907,45 @@ func rewriteValueARM64_OpLsh8x8(v *Value) bool {
        b := v.Block
        typ := &b.Func.Config.Types
        // match: (Lsh8x8 <t> x y)
-       // result: (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt8to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y)))
+       // cond: shiftIsBounded(v)
+       // result: (SLL <t> x y)
        for {
                t := v.Type
                x := v_0
                y := v_1
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpARM64SLL)
+               v.Type = t
+               v.AddArg2(x, y)
+               return true
+       }
+       // match: (Lsh8x8 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y)))
+       for {
+               t := v.Type
+               x := v_0
+               y := v_1
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
                v.reset(OpARM64CSEL)
                v.AuxInt = opToAuxInt(OpARM64LessThanU)
                v0 := b.NewValue0(v.Pos, OpARM64SLL, t)
-               v1 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
-               v1.AddArg(y)
-               v0.AddArg2(x, v1)
-               v2 := b.NewValue0(v.Pos, OpConst64, t)
-               v2.AuxInt = int64ToAuxInt(0)
-               v3 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
-               v3.AuxInt = int64ToAuxInt(64)
-               v3.AddArg(v1)
-               v.AddArg3(v0, v2, v3)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpConst64, t)
+               v1.AuxInt = int64ToAuxInt(0)
+               v2 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
+               v2.AuxInt = int64ToAuxInt(64)
+               v3 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
+               v3.AddArg(y)
+               v2.AddArg(v3)
+               v.AddArg3(v0, v1, v2)
                return true
        }
+       return false
 }
 func rewriteValueARM64_OpMod16(v *Value) bool {
        v_1 := v.Args[1]
@@ -25718,56 +26038,100 @@ func rewriteValueARM64_OpRsh16Ux16(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
        typ := &b.Func.Config.Types
-       // match: (Rsh16Ux16 <t> x y)
-       // result: (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt16to64 x) (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
+       // match: (Rsh16Ux16 <t> x y)
+       // cond: shiftIsBounded(v)
+       // result: (SRL <t> (ZeroExt16to64 x) y)
+       for {
+               t := v.Type
+               x := v_0
+               y := v_1
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpARM64SRL)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
+               v0.AddArg(x)
+               v.AddArg2(v0, y)
+               return true
+       }
+       // match: (Rsh16Ux16 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt16to64 x) y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
+       for {
+               t := v.Type
+               x := v_0
+               y := v_1
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpARM64CSEL)
+               v.AuxInt = opToAuxInt(OpARM64LessThanU)
+               v0 := b.NewValue0(v.Pos, OpARM64SRL, t)
+               v1 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
+               v1.AddArg(x)
+               v0.AddArg2(v1, y)
+               v2 := b.NewValue0(v.Pos, OpConst64, t)
+               v2.AuxInt = int64ToAuxInt(0)
+               v3 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
+               v3.AuxInt = int64ToAuxInt(64)
+               v4 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
+               v4.AddArg(y)
+               v3.AddArg(v4)
+               v.AddArg3(v0, v2, v3)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpRsh16Ux32(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (Rsh16Ux32 <t> x y)
+       // cond: shiftIsBounded(v)
+       // result: (SRL <t> (ZeroExt16to64 x) y)
        for {
                t := v.Type
                x := v_0
                y := v_1
-               v.reset(OpARM64CSEL)
-               v.AuxInt = opToAuxInt(OpARM64LessThanU)
-               v0 := b.NewValue0(v.Pos, OpARM64SRL, t)
-               v1 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
-               v1.AddArg(x)
-               v2 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
-               v2.AddArg(y)
-               v0.AddArg2(v1, v2)
-               v3 := b.NewValue0(v.Pos, OpConst64, t)
-               v3.AuxInt = int64ToAuxInt(0)
-               v4 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
-               v4.AuxInt = int64ToAuxInt(64)
-               v4.AddArg(v2)
-               v.AddArg3(v0, v3, v4)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpARM64SRL)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
+               v0.AddArg(x)
+               v.AddArg2(v0, y)
                return true
        }
-}
-func rewriteValueARM64_OpRsh16Ux32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
        // match: (Rsh16Ux32 <t> x y)
-       // result: (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt16to64 x) (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
+       // cond: !shiftIsBounded(v)
+       // result: (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt16to64 x) y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
        for {
                t := v.Type
                x := v_0
                y := v_1
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
                v.reset(OpARM64CSEL)
                v.AuxInt = opToAuxInt(OpARM64LessThanU)
                v0 := b.NewValue0(v.Pos, OpARM64SRL, t)
                v1 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
                v1.AddArg(x)
-               v2 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
-               v2.AddArg(y)
-               v0.AddArg2(v1, v2)
-               v3 := b.NewValue0(v.Pos, OpConst64, t)
-               v3.AuxInt = int64ToAuxInt(0)
-               v4 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
-               v4.AuxInt = int64ToAuxInt(64)
-               v4.AddArg(v2)
-               v.AddArg3(v0, v3, v4)
+               v0.AddArg2(v1, y)
+               v2 := b.NewValue0(v.Pos, OpConst64, t)
+               v2.AuxInt = int64ToAuxInt(0)
+               v3 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
+               v3.AuxInt = int64ToAuxInt(64)
+               v4 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
+               v4.AddArg(y)
+               v3.AddArg(v4)
+               v.AddArg3(v0, v2, v3)
                return true
        }
+       return false
 }
 func rewriteValueARM64_OpRsh16Ux64(v *Value) bool {
        v_1 := v.Args[1]
@@ -25775,11 +26139,32 @@ func rewriteValueARM64_OpRsh16Ux64(v *Value) bool {
        b := v.Block
        typ := &b.Func.Config.Types
        // match: (Rsh16Ux64 <t> x y)
+       // cond: shiftIsBounded(v)
+       // result: (SRL <t> (ZeroExt16to64 x) y)
+       for {
+               t := v.Type
+               x := v_0
+               y := v_1
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpARM64SRL)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
+               v0.AddArg(x)
+               v.AddArg2(v0, y)
+               return true
+       }
+       // match: (Rsh16Ux64 <t> x y)
+       // cond: !shiftIsBounded(v)
        // result: (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt16to64 x) y) (Const64 <t> [0]) (CMPconst [64] y))
        for {
                t := v.Type
                x := v_0
                y := v_1
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
                v.reset(OpARM64CSEL)
                v.AuxInt = opToAuxInt(OpARM64LessThanU)
                v0 := b.NewValue0(v.Pos, OpARM64SRL, t)
@@ -25794,6 +26179,7 @@ func rewriteValueARM64_OpRsh16Ux64(v *Value) bool {
                v.AddArg3(v0, v2, v3)
                return true
        }
+       return false
 }
 func rewriteValueARM64_OpRsh16Ux8(v *Value) bool {
        v_1 := v.Args[1]
@@ -25801,92 +26187,179 @@ func rewriteValueARM64_OpRsh16Ux8(v *Value) bool {
        b := v.Block
        typ := &b.Func.Config.Types
        // match: (Rsh16Ux8 <t> x y)
-       // result: (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt16to64 x) (ZeroExt8to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y)))
+       // cond: shiftIsBounded(v)
+       // result: (SRL <t> (ZeroExt16to64 x) y)
+       for {
+               t := v.Type
+               x := v_0
+               y := v_1
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpARM64SRL)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
+               v0.AddArg(x)
+               v.AddArg2(v0, y)
+               return true
+       }
+       // match: (Rsh16Ux8 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt16to64 x) y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y)))
        for {
                t := v.Type
                x := v_0
                y := v_1
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
                v.reset(OpARM64CSEL)
                v.AuxInt = opToAuxInt(OpARM64LessThanU)
                v0 := b.NewValue0(v.Pos, OpARM64SRL, t)
                v1 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
                v1.AddArg(x)
-               v2 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
-               v2.AddArg(y)
-               v0.AddArg2(v1, v2)
-               v3 := b.NewValue0(v.Pos, OpConst64, t)
-               v3.AuxInt = int64ToAuxInt(0)
-               v4 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
-               v4.AuxInt = int64ToAuxInt(64)
-               v4.AddArg(v2)
-               v.AddArg3(v0, v3, v4)
+               v0.AddArg2(v1, y)
+               v2 := b.NewValue0(v.Pos, OpConst64, t)
+               v2.AuxInt = int64ToAuxInt(0)
+               v3 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
+               v3.AuxInt = int64ToAuxInt(64)
+               v4 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
+               v4.AddArg(y)
+               v3.AddArg(v4)
+               v.AddArg3(v0, v2, v3)
                return true
        }
+       return false
 }
 func rewriteValueARM64_OpRsh16x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
        typ := &b.Func.Config.Types
+       // match: (Rsh16x16 <t> x y)
+       // cond: shiftIsBounded(v)
+       // result: (SRA <t> (SignExt16to64 x) y)
+       for {
+               t := v.Type
+               x := v_0
+               y := v_1
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpARM64SRA)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpSignExt16to64, typ.Int64)
+               v0.AddArg(x)
+               v.AddArg2(v0, y)
+               return true
+       }
        // match: (Rsh16x16 x y)
-       // result: (SRA (SignExt16to64 x) (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt16to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
+       // cond: !shiftIsBounded(v)
+       // result: (SRA (SignExt16to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
        for {
                x := v_0
                y := v_1
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
                v.reset(OpARM64SRA)
                v0 := b.NewValue0(v.Pos, OpSignExt16to64, typ.Int64)
                v0.AddArg(x)
                v1 := b.NewValue0(v.Pos, OpARM64CSEL, y.Type)
                v1.AuxInt = opToAuxInt(OpARM64LessThanU)
-               v2 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
-               v2.AddArg(y)
-               v3 := b.NewValue0(v.Pos, OpConst64, y.Type)
-               v3.AuxInt = int64ToAuxInt(63)
-               v4 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
-               v4.AuxInt = int64ToAuxInt(64)
-               v4.AddArg(v2)
-               v1.AddArg3(v2, v3, v4)
+               v2 := b.NewValue0(v.Pos, OpConst64, y.Type)
+               v2.AuxInt = int64ToAuxInt(63)
+               v3 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
+               v3.AuxInt = int64ToAuxInt(64)
+               v4 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
+               v4.AddArg(y)
+               v3.AddArg(v4)
+               v1.AddArg3(y, v2, v3)
                v.AddArg2(v0, v1)
                return true
        }
+       return false
 }
 func rewriteValueARM64_OpRsh16x32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
        typ := &b.Func.Config.Types
+       // match: (Rsh16x32 <t> x y)
+       // cond: shiftIsBounded(v)
+       // result: (SRA <t> (SignExt16to64 x) y)
+       for {
+               t := v.Type
+               x := v_0
+               y := v_1
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpARM64SRA)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpSignExt16to64, typ.Int64)
+               v0.AddArg(x)
+               v.AddArg2(v0, y)
+               return true
+       }
        // match: (Rsh16x32 x y)
-       // result: (SRA (SignExt16to64 x) (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt32to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
+       // cond: !shiftIsBounded(v)
+       // result: (SRA (SignExt16to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
        for {
                x := v_0
                y := v_1
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
                v.reset(OpARM64SRA)
                v0 := b.NewValue0(v.Pos, OpSignExt16to64, typ.Int64)
                v0.AddArg(x)
                v1 := b.NewValue0(v.Pos, OpARM64CSEL, y.Type)
                v1.AuxInt = opToAuxInt(OpARM64LessThanU)
-               v2 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
-               v2.AddArg(y)
-               v3 := b.NewValue0(v.Pos, OpConst64, y.Type)
-               v3.AuxInt = int64ToAuxInt(63)
-               v4 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
-               v4.AuxInt = int64ToAuxInt(64)
-               v4.AddArg(v2)
-               v1.AddArg3(v2, v3, v4)
+               v2 := b.NewValue0(v.Pos, OpConst64, y.Type)
+               v2.AuxInt = int64ToAuxInt(63)
+               v3 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
+               v3.AuxInt = int64ToAuxInt(64)
+               v4 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
+               v4.AddArg(y)
+               v3.AddArg(v4)
+               v1.AddArg3(y, v2, v3)
                v.AddArg2(v0, v1)
                return true
        }
+       return false
 }
 func rewriteValueARM64_OpRsh16x64(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
        typ := &b.Func.Config.Types
+       // match: (Rsh16x64 <t> x y)
+       // cond: shiftIsBounded(v)
+       // result: (SRA <t> (SignExt16to64 x) y)
+       for {
+               t := v.Type
+               x := v_0
+               y := v_1
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpARM64SRA)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpSignExt16to64, typ.Int64)
+               v0.AddArg(x)
+               v.AddArg2(v0, y)
+               return true
+       }
        // match: (Rsh16x64 x y)
+       // cond: !shiftIsBounded(v)
        // result: (SRA (SignExt16to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
        for {
                x := v_0
                y := v_1
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
                v.reset(OpARM64SRA)
                v0 := b.NewValue0(v.Pos, OpSignExt16to64, typ.Int64)
                v0.AddArg(x)
@@ -25901,33 +26374,56 @@ func rewriteValueARM64_OpRsh16x64(v *Value) bool {
                v.AddArg2(v0, v1)
                return true
        }
+       return false
 }
 func rewriteValueARM64_OpRsh16x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
        typ := &b.Func.Config.Types
+       // match: (Rsh16x8 <t> x y)
+       // cond: shiftIsBounded(v)
+       // result: (SRA <t> (SignExt16to64 x) y)
+       for {
+               t := v.Type
+               x := v_0
+               y := v_1
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpARM64SRA)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpSignExt16to64, typ.Int64)
+               v0.AddArg(x)
+               v.AddArg2(v0, y)
+               return true
+       }
        // match: (Rsh16x8 x y)
-       // result: (SRA (SignExt16to64 x) (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt8to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64 y))))
+       // cond: !shiftIsBounded(v)
+       // result: (SRA (SignExt16to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64 y))))
        for {
                x := v_0
                y := v_1
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
                v.reset(OpARM64SRA)
                v0 := b.NewValue0(v.Pos, OpSignExt16to64, typ.Int64)
                v0.AddArg(x)
                v1 := b.NewValue0(v.Pos, OpARM64CSEL, y.Type)
                v1.AuxInt = opToAuxInt(OpARM64LessThanU)
-               v2 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
-               v2.AddArg(y)
-               v3 := b.NewValue0(v.Pos, OpConst64, y.Type)
-               v3.AuxInt = int64ToAuxInt(63)
-               v4 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
-               v4.AuxInt = int64ToAuxInt(64)
-               v4.AddArg(v2)
-               v1.AddArg3(v2, v3, v4)
+               v2 := b.NewValue0(v.Pos, OpConst64, y.Type)
+               v2.AuxInt = int64ToAuxInt(63)
+               v3 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
+               v3.AuxInt = int64ToAuxInt(64)
+               v4 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
+               v4.AddArg(y)
+               v3.AddArg(v4)
+               v1.AddArg3(y, v2, v3)
                v.AddArg2(v0, v1)
                return true
        }
+       return false
 }
 func rewriteValueARM64_OpRsh32Ux16(v *Value) bool {
        v_1 := v.Args[1]
@@ -25935,27 +26431,49 @@ func rewriteValueARM64_OpRsh32Ux16(v *Value) bool {
        b := v.Block
        typ := &b.Func.Config.Types
        // match: (Rsh32Ux16 <t> x y)
-       // result: (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt32to64 x) (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
+       // cond: shiftIsBounded(v)
+       // result: (SRL <t> (ZeroExt32to64 x) y)
+       for {
+               t := v.Type
+               x := v_0
+               y := v_1
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpARM64SRL)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
+               v0.AddArg(x)
+               v.AddArg2(v0, y)
+               return true
+       }
+       // match: (Rsh32Ux16 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt32to64 x) y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
        for {
                t := v.Type
                x := v_0
                y := v_1
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
                v.reset(OpARM64CSEL)
                v.AuxInt = opToAuxInt(OpARM64LessThanU)
                v0 := b.NewValue0(v.Pos, OpARM64SRL, t)
                v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
                v1.AddArg(x)
-               v2 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
-               v2.AddArg(y)
-               v0.AddArg2(v1, v2)
-               v3 := b.NewValue0(v.Pos, OpConst64, t)
-               v3.AuxInt = int64ToAuxInt(0)
-               v4 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
-               v4.AuxInt = int64ToAuxInt(64)
-               v4.AddArg(v2)
-               v.AddArg3(v0, v3, v4)
+               v0.AddArg2(v1, y)
+               v2 := b.NewValue0(v.Pos, OpConst64, t)
+               v2.AuxInt = int64ToAuxInt(0)
+               v3 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
+               v3.AuxInt = int64ToAuxInt(64)
+               v4 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
+               v4.AddArg(y)
+               v3.AddArg(v4)
+               v.AddArg3(v0, v2, v3)
                return true
        }
+       return false
 }
 func rewriteValueARM64_OpRsh32Ux32(v *Value) bool {
        v_1 := v.Args[1]
@@ -25963,27 +26481,49 @@ func rewriteValueARM64_OpRsh32Ux32(v *Value) bool {
        b := v.Block
        typ := &b.Func.Config.Types
        // match: (Rsh32Ux32 <t> x y)
-       // result: (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt32to64 x) (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
+       // cond: shiftIsBounded(v)
+       // result: (SRL <t> (ZeroExt32to64 x) y)
+       for {
+               t := v.Type
+               x := v_0
+               y := v_1
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpARM64SRL)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
+               v0.AddArg(x)
+               v.AddArg2(v0, y)
+               return true
+       }
+       // match: (Rsh32Ux32 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt32to64 x) y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
        for {
                t := v.Type
                x := v_0
                y := v_1
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
                v.reset(OpARM64CSEL)
                v.AuxInt = opToAuxInt(OpARM64LessThanU)
                v0 := b.NewValue0(v.Pos, OpARM64SRL, t)
                v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
                v1.AddArg(x)
-               v2 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
-               v2.AddArg(y)
-               v0.AddArg2(v1, v2)
-               v3 := b.NewValue0(v.Pos, OpConst64, t)
-               v3.AuxInt = int64ToAuxInt(0)
-               v4 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
-               v4.AuxInt = int64ToAuxInt(64)
-               v4.AddArg(v2)
-               v.AddArg3(v0, v3, v4)
+               v0.AddArg2(v1, y)
+               v2 := b.NewValue0(v.Pos, OpConst64, t)
+               v2.AuxInt = int64ToAuxInt(0)
+               v3 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
+               v3.AuxInt = int64ToAuxInt(64)
+               v4 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
+               v4.AddArg(y)
+               v3.AddArg(v4)
+               v.AddArg3(v0, v2, v3)
                return true
        }
+       return false
 }
 func rewriteValueARM64_OpRsh32Ux64(v *Value) bool {
        v_1 := v.Args[1]
@@ -25991,11 +26531,80 @@ func rewriteValueARM64_OpRsh32Ux64(v *Value) bool {
        b := v.Block
        typ := &b.Func.Config.Types
        // match: (Rsh32Ux64 <t> x y)
+       // cond: shiftIsBounded(v)
+       // result: (SRL <t> (ZeroExt32to64 x) y)
+       for {
+               t := v.Type
+               x := v_0
+               y := v_1
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpARM64SRL)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
+               v0.AddArg(x)
+               v.AddArg2(v0, y)
+               return true
+       }
+       // match: (Rsh32Ux64 <t> x y)
+       // cond: !shiftIsBounded(v)
        // result: (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt32to64 x) y) (Const64 <t> [0]) (CMPconst [64] y))
        for {
                t := v.Type
                x := v_0
                y := v_1
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpARM64CSEL)
+               v.AuxInt = opToAuxInt(OpARM64LessThanU)
+               v0 := b.NewValue0(v.Pos, OpARM64SRL, t)
+               v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
+               v1.AddArg(x)
+               v0.AddArg2(v1, y)
+               v2 := b.NewValue0(v.Pos, OpConst64, t)
+               v2.AuxInt = int64ToAuxInt(0)
+               v3 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
+               v3.AuxInt = int64ToAuxInt(64)
+               v3.AddArg(y)
+               v.AddArg3(v0, v2, v3)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpRsh32Ux8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (Rsh32Ux8 <t> x y)
+       // cond: shiftIsBounded(v)
+       // result: (SRL <t> (ZeroExt32to64 x) y)
+       for {
+               t := v.Type
+               x := v_0
+               y := v_1
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpARM64SRL)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
+               v0.AddArg(x)
+               v.AddArg2(v0, y)
+               return true
+       }
+       // match: (Rsh32Ux8 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt32to64 x) y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y)))
+       for {
+               t := v.Type
+               x := v_0
+               y := v_1
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
                v.reset(OpARM64CSEL)
                v.AuxInt = opToAuxInt(OpARM64LessThanU)
                v0 := b.NewValue0(v.Pos, OpARM64SRL, t)
@@ -26006,103 +26615,143 @@ func rewriteValueARM64_OpRsh32Ux64(v *Value) bool {
                v2.AuxInt = int64ToAuxInt(0)
                v3 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
                v3.AuxInt = int64ToAuxInt(64)
-               v3.AddArg(y)
+               v4 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
+               v4.AddArg(y)
+               v3.AddArg(v4)
                v.AddArg3(v0, v2, v3)
                return true
        }
+       return false
 }
-func rewriteValueARM64_OpRsh32Ux8(v *Value) bool {
+func rewriteValueARM64_OpRsh32x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
        typ := &b.Func.Config.Types
-       // match: (Rsh32Ux8 <t> x y)
-       // result: (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt32to64 x) (ZeroExt8to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y)))
+       // match: (Rsh32x16 <t> x y)
+       // cond: shiftIsBounded(v)
+       // result: (SRA <t> (SignExt32to64 x) y)
        for {
                t := v.Type
                x := v_0
                y := v_1
-               v.reset(OpARM64CSEL)
-               v.AuxInt = opToAuxInt(OpARM64LessThanU)
-               v0 := b.NewValue0(v.Pos, OpARM64SRL, t)
-               v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
-               v1.AddArg(x)
-               v2 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
-               v2.AddArg(y)
-               v0.AddArg2(v1, v2)
-               v3 := b.NewValue0(v.Pos, OpConst64, t)
-               v3.AuxInt = int64ToAuxInt(0)
-               v4 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
-               v4.AuxInt = int64ToAuxInt(64)
-               v4.AddArg(v2)
-               v.AddArg3(v0, v3, v4)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpARM64SRA)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64)
+               v0.AddArg(x)
+               v.AddArg2(v0, y)
                return true
        }
-}
-func rewriteValueARM64_OpRsh32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
        // match: (Rsh32x16 x y)
-       // result: (SRA (SignExt32to64 x) (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt16to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
+       // cond: !shiftIsBounded(v)
+       // result: (SRA (SignExt32to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
        for {
                x := v_0
                y := v_1
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
                v.reset(OpARM64SRA)
                v0 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64)
                v0.AddArg(x)
                v1 := b.NewValue0(v.Pos, OpARM64CSEL, y.Type)
                v1.AuxInt = opToAuxInt(OpARM64LessThanU)
-               v2 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
-               v2.AddArg(y)
-               v3 := b.NewValue0(v.Pos, OpConst64, y.Type)
-               v3.AuxInt = int64ToAuxInt(63)
-               v4 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
-               v4.AuxInt = int64ToAuxInt(64)
-               v4.AddArg(v2)
-               v1.AddArg3(v2, v3, v4)
+               v2 := b.NewValue0(v.Pos, OpConst64, y.Type)
+               v2.AuxInt = int64ToAuxInt(63)
+               v3 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
+               v3.AuxInt = int64ToAuxInt(64)
+               v4 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
+               v4.AddArg(y)
+               v3.AddArg(v4)
+               v1.AddArg3(y, v2, v3)
                v.AddArg2(v0, v1)
                return true
        }
+       return false
 }
 func rewriteValueARM64_OpRsh32x32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
        typ := &b.Func.Config.Types
+       // match: (Rsh32x32 <t> x y)
+       // cond: shiftIsBounded(v)
+       // result: (SRA <t> (SignExt32to64 x) y)
+       for {
+               t := v.Type
+               x := v_0
+               y := v_1
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpARM64SRA)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64)
+               v0.AddArg(x)
+               v.AddArg2(v0, y)
+               return true
+       }
        // match: (Rsh32x32 x y)
-       // result: (SRA (SignExt32to64 x) (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt32to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
+       // cond: !shiftIsBounded(v)
+       // result: (SRA (SignExt32to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
        for {
                x := v_0
                y := v_1
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
                v.reset(OpARM64SRA)
                v0 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64)
                v0.AddArg(x)
                v1 := b.NewValue0(v.Pos, OpARM64CSEL, y.Type)
                v1.AuxInt = opToAuxInt(OpARM64LessThanU)
-               v2 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
-               v2.AddArg(y)
-               v3 := b.NewValue0(v.Pos, OpConst64, y.Type)
-               v3.AuxInt = int64ToAuxInt(63)
-               v4 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
-               v4.AuxInt = int64ToAuxInt(64)
-               v4.AddArg(v2)
-               v1.AddArg3(v2, v3, v4)
+               v2 := b.NewValue0(v.Pos, OpConst64, y.Type)
+               v2.AuxInt = int64ToAuxInt(63)
+               v3 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
+               v3.AuxInt = int64ToAuxInt(64)
+               v4 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
+               v4.AddArg(y)
+               v3.AddArg(v4)
+               v1.AddArg3(y, v2, v3)
                v.AddArg2(v0, v1)
                return true
        }
+       return false
 }
 func rewriteValueARM64_OpRsh32x64(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
        typ := &b.Func.Config.Types
+       // match: (Rsh32x64 <t> x y)
+       // cond: shiftIsBounded(v)
+       // result: (SRA <t> (SignExt32to64 x) y)
+       for {
+               t := v.Type
+               x := v_0
+               y := v_1
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpARM64SRA)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64)
+               v0.AddArg(x)
+               v.AddArg2(v0, y)
+               return true
+       }
        // match: (Rsh32x64 x y)
+       // cond: !shiftIsBounded(v)
        // result: (SRA (SignExt32to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
        for {
                x := v_0
                y := v_1
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
                v.reset(OpARM64SRA)
                v0 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64)
                v0.AddArg(x)
@@ -26117,33 +26766,56 @@ func rewriteValueARM64_OpRsh32x64(v *Value) bool {
                v.AddArg2(v0, v1)
                return true
        }
+       return false
 }
 func rewriteValueARM64_OpRsh32x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
        typ := &b.Func.Config.Types
+       // match: (Rsh32x8 <t> x y)
+       // cond: shiftIsBounded(v)
+       // result: (SRA <t> (SignExt32to64 x) y)
+       for {
+               t := v.Type
+               x := v_0
+               y := v_1
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpARM64SRA)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64)
+               v0.AddArg(x)
+               v.AddArg2(v0, y)
+               return true
+       }
        // match: (Rsh32x8 x y)
-       // result: (SRA (SignExt32to64 x) (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt8to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64 y))))
+       // cond: !shiftIsBounded(v)
+       // result: (SRA (SignExt32to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64 y))))
        for {
                x := v_0
                y := v_1
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
                v.reset(OpARM64SRA)
                v0 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64)
                v0.AddArg(x)
                v1 := b.NewValue0(v.Pos, OpARM64CSEL, y.Type)
                v1.AuxInt = opToAuxInt(OpARM64LessThanU)
-               v2 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
-               v2.AddArg(y)
-               v3 := b.NewValue0(v.Pos, OpConst64, y.Type)
-               v3.AuxInt = int64ToAuxInt(63)
-               v4 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
-               v4.AuxInt = int64ToAuxInt(64)
-               v4.AddArg(v2)
-               v1.AddArg3(v2, v3, v4)
+               v2 := b.NewValue0(v.Pos, OpConst64, y.Type)
+               v2.AuxInt = int64ToAuxInt(63)
+               v3 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
+               v3.AuxInt = int64ToAuxInt(64)
+               v4 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
+               v4.AddArg(y)
+               v3.AddArg(v4)
+               v1.AddArg3(y, v2, v3)
                v.AddArg2(v0, v1)
                return true
        }
+       return false
 }
 func rewriteValueARM64_OpRsh64Ux16(v *Value) bool {
        v_1 := v.Args[1]
@@ -26151,25 +26823,45 @@ func rewriteValueARM64_OpRsh64Ux16(v *Value) bool {
        b := v.Block
        typ := &b.Func.Config.Types
        // match: (Rsh64Ux16 <t> x y)
-       // result: (CSEL [OpARM64LessThanU] (SRL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
+       // cond: shiftIsBounded(v)
+       // result: (SRL <t> x y)
+       for {
+               t := v.Type
+               x := v_0
+               y := v_1
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpARM64SRL)
+               v.Type = t
+               v.AddArg2(x, y)
+               return true
+       }
+       // match: (Rsh64Ux16 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (CSEL [OpARM64LessThanU] (SRL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
        for {
                t := v.Type
                x := v_0
                y := v_1
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
                v.reset(OpARM64CSEL)
                v.AuxInt = opToAuxInt(OpARM64LessThanU)
                v0 := b.NewValue0(v.Pos, OpARM64SRL, t)
-               v1 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
-               v1.AddArg(y)
-               v0.AddArg2(x, v1)
-               v2 := b.NewValue0(v.Pos, OpConst64, t)
-               v2.AuxInt = int64ToAuxInt(0)
-               v3 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
-               v3.AuxInt = int64ToAuxInt(64)
-               v3.AddArg(v1)
-               v.AddArg3(v0, v2, v3)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpConst64, t)
+               v1.AuxInt = int64ToAuxInt(0)
+               v2 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
+               v2.AuxInt = int64ToAuxInt(64)
+               v3 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
+               v3.AddArg(y)
+               v2.AddArg(v3)
+               v.AddArg3(v0, v1, v2)
                return true
        }
+       return false
 }
 func rewriteValueARM64_OpRsh64Ux32(v *Value) bool {
        v_1 := v.Args[1]
@@ -26177,36 +26869,75 @@ func rewriteValueARM64_OpRsh64Ux32(v *Value) bool {
        b := v.Block
        typ := &b.Func.Config.Types
        // match: (Rsh64Ux32 <t> x y)
-       // result: (CSEL [OpARM64LessThanU] (SRL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
+       // cond: shiftIsBounded(v)
+       // result: (SRL <t> x y)
+       for {
+               t := v.Type
+               x := v_0
+               y := v_1
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpARM64SRL)
+               v.Type = t
+               v.AddArg2(x, y)
+               return true
+       }
+       // match: (Rsh64Ux32 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (CSEL [OpARM64LessThanU] (SRL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
        for {
                t := v.Type
                x := v_0
                y := v_1
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
                v.reset(OpARM64CSEL)
                v.AuxInt = opToAuxInt(OpARM64LessThanU)
                v0 := b.NewValue0(v.Pos, OpARM64SRL, t)
-               v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
-               v1.AddArg(y)
-               v0.AddArg2(x, v1)
-               v2 := b.NewValue0(v.Pos, OpConst64, t)
-               v2.AuxInt = int64ToAuxInt(0)
-               v3 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
-               v3.AuxInt = int64ToAuxInt(64)
-               v3.AddArg(v1)
-               v.AddArg3(v0, v2, v3)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpConst64, t)
+               v1.AuxInt = int64ToAuxInt(0)
+               v2 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
+               v2.AuxInt = int64ToAuxInt(64)
+               v3 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
+               v3.AddArg(y)
+               v2.AddArg(v3)
+               v.AddArg3(v0, v1, v2)
                return true
        }
+       return false
 }
 func rewriteValueARM64_OpRsh64Ux64(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
        // match: (Rsh64Ux64 <t> x y)
+       // cond: shiftIsBounded(v)
+       // result: (SRL <t> x y)
+       for {
+               t := v.Type
+               x := v_0
+               y := v_1
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpARM64SRL)
+               v.Type = t
+               v.AddArg2(x, y)
+               return true
+       }
+       // match: (Rsh64Ux64 <t> x y)
+       // cond: !shiftIsBounded(v)
        // result: (CSEL [OpARM64LessThanU] (SRL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
        for {
                t := v.Type
                x := v_0
                y := v_1
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
                v.reset(OpARM64CSEL)
                v.AuxInt = opToAuxInt(OpARM64LessThanU)
                v0 := b.NewValue0(v.Pos, OpARM64SRL, t)
@@ -26219,6 +26950,7 @@ func rewriteValueARM64_OpRsh64Ux64(v *Value) bool {
                v.AddArg3(v0, v1, v2)
                return true
        }
+       return false
 }
 func rewriteValueARM64_OpRsh64Ux8(v *Value) bool {
        v_1 := v.Args[1]
@@ -26226,85 +26958,164 @@ func rewriteValueARM64_OpRsh64Ux8(v *Value) bool {
        b := v.Block
        typ := &b.Func.Config.Types
        // match: (Rsh64Ux8 <t> x y)
-       // result: (CSEL [OpARM64LessThanU] (SRL <t> x (ZeroExt8to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y)))
+       // cond: shiftIsBounded(v)
+       // result: (SRL <t> x y)
+       for {
+               t := v.Type
+               x := v_0
+               y := v_1
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpARM64SRL)
+               v.Type = t
+               v.AddArg2(x, y)
+               return true
+       }
+       // match: (Rsh64Ux8 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (CSEL [OpARM64LessThanU] (SRL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y)))
        for {
                t := v.Type
                x := v_0
                y := v_1
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
                v.reset(OpARM64CSEL)
                v.AuxInt = opToAuxInt(OpARM64LessThanU)
                v0 := b.NewValue0(v.Pos, OpARM64SRL, t)
-               v1 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
-               v1.AddArg(y)
-               v0.AddArg2(x, v1)
-               v2 := b.NewValue0(v.Pos, OpConst64, t)
-               v2.AuxInt = int64ToAuxInt(0)
-               v3 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
-               v3.AuxInt = int64ToAuxInt(64)
-               v3.AddArg(v1)
-               v.AddArg3(v0, v2, v3)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpConst64, t)
+               v1.AuxInt = int64ToAuxInt(0)
+               v2 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
+               v2.AuxInt = int64ToAuxInt(64)
+               v3 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
+               v3.AddArg(y)
+               v2.AddArg(v3)
+               v.AddArg3(v0, v1, v2)
                return true
        }
+       return false
 }
 func rewriteValueARM64_OpRsh64x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
        typ := &b.Func.Config.Types
-       // match: (Rsh64x16 x y)
-       // result: (SRA x (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt16to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
+       // match: (Rsh64x16 <t> x y)
+       // cond: shiftIsBounded(v)
+       // result: (SRA <t> x y)
+       for {
+               t := v.Type
+               x := v_0
+               y := v_1
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpARM64SRA)
+               v.Type = t
+               v.AddArg2(x, y)
+               return true
+       }
+       // match: (Rsh64x16 x y)
+       // cond: !shiftIsBounded(v)
+       // result: (SRA x (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
+       for {
+               x := v_0
+               y := v_1
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpARM64SRA)
+               v0 := b.NewValue0(v.Pos, OpARM64CSEL, y.Type)
+               v0.AuxInt = opToAuxInt(OpARM64LessThanU)
+               v1 := b.NewValue0(v.Pos, OpConst64, y.Type)
+               v1.AuxInt = int64ToAuxInt(63)
+               v2 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
+               v2.AuxInt = int64ToAuxInt(64)
+               v3 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
+               v3.AddArg(y)
+               v2.AddArg(v3)
+               v0.AddArg3(y, v1, v2)
+               v.AddArg2(x, v0)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpRsh64x32(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (Rsh64x32 <t> x y)
+       // cond: shiftIsBounded(v)
+       // result: (SRA <t> x y)
        for {
+               t := v.Type
                x := v_0
                y := v_1
+               if !(shiftIsBounded(v)) {
+                       break
+               }
                v.reset(OpARM64SRA)
-               v0 := b.NewValue0(v.Pos, OpARM64CSEL, y.Type)
-               v0.AuxInt = opToAuxInt(OpARM64LessThanU)
-               v1 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
-               v1.AddArg(y)
-               v2 := b.NewValue0(v.Pos, OpConst64, y.Type)
-               v2.AuxInt = int64ToAuxInt(63)
-               v3 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
-               v3.AuxInt = int64ToAuxInt(64)
-               v3.AddArg(v1)
-               v0.AddArg3(v1, v2, v3)
-               v.AddArg2(x, v0)
+               v.Type = t
+               v.AddArg2(x, y)
                return true
        }
-}
-func rewriteValueARM64_OpRsh64x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
        // match: (Rsh64x32 x y)
-       // result: (SRA x (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt32to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
+       // cond: !shiftIsBounded(v)
+       // result: (SRA x (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
        for {
                x := v_0
                y := v_1
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
                v.reset(OpARM64SRA)
                v0 := b.NewValue0(v.Pos, OpARM64CSEL, y.Type)
                v0.AuxInt = opToAuxInt(OpARM64LessThanU)
-               v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
-               v1.AddArg(y)
-               v2 := b.NewValue0(v.Pos, OpConst64, y.Type)
-               v2.AuxInt = int64ToAuxInt(63)
-               v3 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
-               v3.AuxInt = int64ToAuxInt(64)
-               v3.AddArg(v1)
-               v0.AddArg3(v1, v2, v3)
+               v1 := b.NewValue0(v.Pos, OpConst64, y.Type)
+               v1.AuxInt = int64ToAuxInt(63)
+               v2 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
+               v2.AuxInt = int64ToAuxInt(64)
+               v3 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
+               v3.AddArg(y)
+               v2.AddArg(v3)
+               v0.AddArg3(y, v1, v2)
                v.AddArg2(x, v0)
                return true
        }
+       return false
 }
 func rewriteValueARM64_OpRsh64x64(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
+       // match: (Rsh64x64 <t> x y)
+       // cond: shiftIsBounded(v)
+       // result: (SRA <t> x y)
+       for {
+               t := v.Type
+               x := v_0
+               y := v_1
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpARM64SRA)
+               v.Type = t
+               v.AddArg2(x, y)
+               return true
+       }
        // match: (Rsh64x64 x y)
+       // cond: !shiftIsBounded(v)
        // result: (SRA x (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
        for {
                x := v_0
                y := v_1
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
                v.reset(OpARM64SRA)
                v0 := b.NewValue0(v.Pos, OpARM64CSEL, y.Type)
                v0.AuxInt = opToAuxInt(OpARM64LessThanU)
@@ -26317,31 +27128,52 @@ func rewriteValueARM64_OpRsh64x64(v *Value) bool {
                v.AddArg2(x, v0)
                return true
        }
+       return false
 }
 func rewriteValueARM64_OpRsh64x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
        typ := &b.Func.Config.Types
+       // match: (Rsh64x8 <t> x y)
+       // cond: shiftIsBounded(v)
+       // result: (SRA <t> x y)
+       for {
+               t := v.Type
+               x := v_0
+               y := v_1
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpARM64SRA)
+               v.Type = t
+               v.AddArg2(x, y)
+               return true
+       }
        // match: (Rsh64x8 x y)
-       // result: (SRA x (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt8to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64 y))))
+       // cond: !shiftIsBounded(v)
+       // result: (SRA x (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64 y))))
        for {
                x := v_0
                y := v_1
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
                v.reset(OpARM64SRA)
                v0 := b.NewValue0(v.Pos, OpARM64CSEL, y.Type)
                v0.AuxInt = opToAuxInt(OpARM64LessThanU)
-               v1 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
-               v1.AddArg(y)
-               v2 := b.NewValue0(v.Pos, OpConst64, y.Type)
-               v2.AuxInt = int64ToAuxInt(63)
-               v3 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
-               v3.AuxInt = int64ToAuxInt(64)
-               v3.AddArg(v1)
-               v0.AddArg3(v1, v2, v3)
+               v1 := b.NewValue0(v.Pos, OpConst64, y.Type)
+               v1.AuxInt = int64ToAuxInt(63)
+               v2 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
+               v2.AuxInt = int64ToAuxInt(64)
+               v3 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
+               v3.AddArg(y)
+               v2.AddArg(v3)
+               v0.AddArg3(y, v1, v2)
                v.AddArg2(x, v0)
                return true
        }
+       return false
 }
 func rewriteValueARM64_OpRsh8Ux16(v *Value) bool {
        v_1 := v.Args[1]
@@ -26349,27 +27181,49 @@ func rewriteValueARM64_OpRsh8Ux16(v *Value) bool {
        b := v.Block
        typ := &b.Func.Config.Types
        // match: (Rsh8Ux16 <t> x y)
-       // result: (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt8to64 x) (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
+       // cond: shiftIsBounded(v)
+       // result: (SRL <t> (ZeroExt8to64 x) y)
        for {
                t := v.Type
                x := v_0
                y := v_1
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpARM64SRL)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
+               v0.AddArg(x)
+               v.AddArg2(v0, y)
+               return true
+       }
+       // match: (Rsh8Ux16 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt8to64 x) y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
+       for {
+               t := v.Type
+               x := v_0
+               y := v_1
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
                v.reset(OpARM64CSEL)
                v.AuxInt = opToAuxInt(OpARM64LessThanU)
                v0 := b.NewValue0(v.Pos, OpARM64SRL, t)
                v1 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
                v1.AddArg(x)
-               v2 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
-               v2.AddArg(y)
-               v0.AddArg2(v1, v2)
-               v3 := b.NewValue0(v.Pos, OpConst64, t)
-               v3.AuxInt = int64ToAuxInt(0)
-               v4 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
-               v4.AuxInt = int64ToAuxInt(64)
-               v4.AddArg(v2)
-               v.AddArg3(v0, v3, v4)
+               v0.AddArg2(v1, y)
+               v2 := b.NewValue0(v.Pos, OpConst64, t)
+               v2.AuxInt = int64ToAuxInt(0)
+               v3 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
+               v3.AuxInt = int64ToAuxInt(64)
+               v4 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
+               v4.AddArg(y)
+               v3.AddArg(v4)
+               v.AddArg3(v0, v2, v3)
                return true
        }
+       return false
 }
 func rewriteValueARM64_OpRsh8Ux32(v *Value) bool {
        v_1 := v.Args[1]
@@ -26377,27 +27231,49 @@ func rewriteValueARM64_OpRsh8Ux32(v *Value) bool {
        b := v.Block
        typ := &b.Func.Config.Types
        // match: (Rsh8Ux32 <t> x y)
-       // result: (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt8to64 x) (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
+       // cond: shiftIsBounded(v)
+       // result: (SRL <t> (ZeroExt8to64 x) y)
+       for {
+               t := v.Type
+               x := v_0
+               y := v_1
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpARM64SRL)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
+               v0.AddArg(x)
+               v.AddArg2(v0, y)
+               return true
+       }
+       // match: (Rsh8Ux32 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt8to64 x) y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
        for {
                t := v.Type
                x := v_0
                y := v_1
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
                v.reset(OpARM64CSEL)
                v.AuxInt = opToAuxInt(OpARM64LessThanU)
                v0 := b.NewValue0(v.Pos, OpARM64SRL, t)
                v1 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
                v1.AddArg(x)
-               v2 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
-               v2.AddArg(y)
-               v0.AddArg2(v1, v2)
-               v3 := b.NewValue0(v.Pos, OpConst64, t)
-               v3.AuxInt = int64ToAuxInt(0)
-               v4 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
-               v4.AuxInt = int64ToAuxInt(64)
-               v4.AddArg(v2)
-               v.AddArg3(v0, v3, v4)
+               v0.AddArg2(v1, y)
+               v2 := b.NewValue0(v.Pos, OpConst64, t)
+               v2.AuxInt = int64ToAuxInt(0)
+               v3 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
+               v3.AuxInt = int64ToAuxInt(64)
+               v4 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
+               v4.AddArg(y)
+               v3.AddArg(v4)
+               v.AddArg3(v0, v2, v3)
                return true
        }
+       return false
 }
 func rewriteValueARM64_OpRsh8Ux64(v *Value) bool {
        v_1 := v.Args[1]
@@ -26405,11 +27281,32 @@ func rewriteValueARM64_OpRsh8Ux64(v *Value) bool {
        b := v.Block
        typ := &b.Func.Config.Types
        // match: (Rsh8Ux64 <t> x y)
+       // cond: shiftIsBounded(v)
+       // result: (SRL <t> (ZeroExt8to64 x) y)
+       for {
+               t := v.Type
+               x := v_0
+               y := v_1
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpARM64SRL)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
+               v0.AddArg(x)
+               v.AddArg2(v0, y)
+               return true
+       }
+       // match: (Rsh8Ux64 <t> x y)
+       // cond: !shiftIsBounded(v)
        // result: (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt8to64 x) y) (Const64 <t> [0]) (CMPconst [64] y))
        for {
                t := v.Type
                x := v_0
                y := v_1
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
                v.reset(OpARM64CSEL)
                v.AuxInt = opToAuxInt(OpARM64LessThanU)
                v0 := b.NewValue0(v.Pos, OpARM64SRL, t)
@@ -26424,6 +27321,7 @@ func rewriteValueARM64_OpRsh8Ux64(v *Value) bool {
                v.AddArg3(v0, v2, v3)
                return true
        }
+       return false
 }
 func rewriteValueARM64_OpRsh8Ux8(v *Value) bool {
        v_1 := v.Args[1]
@@ -26431,92 +27329,179 @@ func rewriteValueARM64_OpRsh8Ux8(v *Value) bool {
        b := v.Block
        typ := &b.Func.Config.Types
        // match: (Rsh8Ux8 <t> x y)
-       // result: (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt8to64 x) (ZeroExt8to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y)))
+       // cond: shiftIsBounded(v)
+       // result: (SRL <t> (ZeroExt8to64 x) y)
+       for {
+               t := v.Type
+               x := v_0
+               y := v_1
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpARM64SRL)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
+               v0.AddArg(x)
+               v.AddArg2(v0, y)
+               return true
+       }
+       // match: (Rsh8Ux8 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt8to64 x) y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y)))
        for {
                t := v.Type
                x := v_0
                y := v_1
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
                v.reset(OpARM64CSEL)
                v.AuxInt = opToAuxInt(OpARM64LessThanU)
                v0 := b.NewValue0(v.Pos, OpARM64SRL, t)
                v1 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
                v1.AddArg(x)
-               v2 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
-               v2.AddArg(y)
-               v0.AddArg2(v1, v2)
-               v3 := b.NewValue0(v.Pos, OpConst64, t)
-               v3.AuxInt = int64ToAuxInt(0)
-               v4 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
-               v4.AuxInt = int64ToAuxInt(64)
-               v4.AddArg(v2)
-               v.AddArg3(v0, v3, v4)
+               v0.AddArg2(v1, y)
+               v2 := b.NewValue0(v.Pos, OpConst64, t)
+               v2.AuxInt = int64ToAuxInt(0)
+               v3 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
+               v3.AuxInt = int64ToAuxInt(64)
+               v4 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
+               v4.AddArg(y)
+               v3.AddArg(v4)
+               v.AddArg3(v0, v2, v3)
                return true
        }
+       return false
 }
 func rewriteValueARM64_OpRsh8x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
        typ := &b.Func.Config.Types
+       // match: (Rsh8x16 <t> x y)
+       // cond: shiftIsBounded(v)
+       // result: (SRA <t> (SignExt8to64 x) y)
+       for {
+               t := v.Type
+               x := v_0
+               y := v_1
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpARM64SRA)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpSignExt8to64, typ.Int64)
+               v0.AddArg(x)
+               v.AddArg2(v0, y)
+               return true
+       }
        // match: (Rsh8x16 x y)
-       // result: (SRA (SignExt8to64 x) (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt16to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
+       // cond: !shiftIsBounded(v)
+       // result: (SRA (SignExt8to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
        for {
                x := v_0
                y := v_1
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
                v.reset(OpARM64SRA)
                v0 := b.NewValue0(v.Pos, OpSignExt8to64, typ.Int64)
                v0.AddArg(x)
                v1 := b.NewValue0(v.Pos, OpARM64CSEL, y.Type)
                v1.AuxInt = opToAuxInt(OpARM64LessThanU)
-               v2 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
-               v2.AddArg(y)
-               v3 := b.NewValue0(v.Pos, OpConst64, y.Type)
-               v3.AuxInt = int64ToAuxInt(63)
-               v4 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
-               v4.AuxInt = int64ToAuxInt(64)
-               v4.AddArg(v2)
-               v1.AddArg3(v2, v3, v4)
+               v2 := b.NewValue0(v.Pos, OpConst64, y.Type)
+               v2.AuxInt = int64ToAuxInt(63)
+               v3 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
+               v3.AuxInt = int64ToAuxInt(64)
+               v4 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
+               v4.AddArg(y)
+               v3.AddArg(v4)
+               v1.AddArg3(y, v2, v3)
                v.AddArg2(v0, v1)
                return true
        }
+       return false
 }
 func rewriteValueARM64_OpRsh8x32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
        typ := &b.Func.Config.Types
+       // match: (Rsh8x32 <t> x y)
+       // cond: shiftIsBounded(v)
+       // result: (SRA <t> (SignExt8to64 x) y)
+       for {
+               t := v.Type
+               x := v_0
+               y := v_1
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpARM64SRA)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpSignExt8to64, typ.Int64)
+               v0.AddArg(x)
+               v.AddArg2(v0, y)
+               return true
+       }
        // match: (Rsh8x32 x y)
-       // result: (SRA (SignExt8to64 x) (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt32to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
+       // cond: !shiftIsBounded(v)
+       // result: (SRA (SignExt8to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
        for {
                x := v_0
                y := v_1
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
                v.reset(OpARM64SRA)
                v0 := b.NewValue0(v.Pos, OpSignExt8to64, typ.Int64)
                v0.AddArg(x)
                v1 := b.NewValue0(v.Pos, OpARM64CSEL, y.Type)
                v1.AuxInt = opToAuxInt(OpARM64LessThanU)
-               v2 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
-               v2.AddArg(y)
-               v3 := b.NewValue0(v.Pos, OpConst64, y.Type)
-               v3.AuxInt = int64ToAuxInt(63)
-               v4 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
-               v4.AuxInt = int64ToAuxInt(64)
-               v4.AddArg(v2)
-               v1.AddArg3(v2, v3, v4)
+               v2 := b.NewValue0(v.Pos, OpConst64, y.Type)
+               v2.AuxInt = int64ToAuxInt(63)
+               v3 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
+               v3.AuxInt = int64ToAuxInt(64)
+               v4 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
+               v4.AddArg(y)
+               v3.AddArg(v4)
+               v1.AddArg3(y, v2, v3)
                v.AddArg2(v0, v1)
                return true
        }
+       return false
 }
 func rewriteValueARM64_OpRsh8x64(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
        typ := &b.Func.Config.Types
+       // match: (Rsh8x64 <t> x y)
+       // cond: shiftIsBounded(v)
+       // result: (SRA <t> (SignExt8to64 x) y)
+       for {
+               t := v.Type
+               x := v_0
+               y := v_1
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpARM64SRA)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpSignExt8to64, typ.Int64)
+               v0.AddArg(x)
+               v.AddArg2(v0, y)
+               return true
+       }
        // match: (Rsh8x64 x y)
+       // cond: !shiftIsBounded(v)
        // result: (SRA (SignExt8to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
        for {
                x := v_0
                y := v_1
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
                v.reset(OpARM64SRA)
                v0 := b.NewValue0(v.Pos, OpSignExt8to64, typ.Int64)
                v0.AddArg(x)
@@ -26531,33 +27516,56 @@ func rewriteValueARM64_OpRsh8x64(v *Value) bool {
                v.AddArg2(v0, v1)
                return true
        }
+       return false
 }
 func rewriteValueARM64_OpRsh8x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
        typ := &b.Func.Config.Types
+       // match: (Rsh8x8 <t> x y)
+       // cond: shiftIsBounded(v)
+       // result: (SRA <t> (SignExt8to64 x) y)
+       for {
+               t := v.Type
+               x := v_0
+               y := v_1
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpARM64SRA)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpSignExt8to64, typ.Int64)
+               v0.AddArg(x)
+               v.AddArg2(v0, y)
+               return true
+       }
        // match: (Rsh8x8 x y)
-       // result: (SRA (SignExt8to64 x) (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt8to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64 y))))
+       // cond: !shiftIsBounded(v)
+       // result: (SRA (SignExt8to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64 y))))
        for {
                x := v_0
                y := v_1
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
                v.reset(OpARM64SRA)
                v0 := b.NewValue0(v.Pos, OpSignExt8to64, typ.Int64)
                v0.AddArg(x)
                v1 := b.NewValue0(v.Pos, OpARM64CSEL, y.Type)
                v1.AuxInt = opToAuxInt(OpARM64LessThanU)
-               v2 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
-               v2.AddArg(y)
-               v3 := b.NewValue0(v.Pos, OpConst64, y.Type)
-               v3.AuxInt = int64ToAuxInt(63)
-               v4 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
-               v4.AuxInt = int64ToAuxInt(64)
-               v4.AddArg(v2)
-               v1.AddArg3(v2, v3, v4)
+               v2 := b.NewValue0(v.Pos, OpConst64, y.Type)
+               v2.AuxInt = int64ToAuxInt(63)
+               v3 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
+               v3.AuxInt = int64ToAuxInt(64)
+               v4 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
+               v4.AddArg(y)
+               v3.AddArg(v4)
+               v1.AddArg3(y, v2, v3)
                v.AddArg2(v0, v1)
                return true
        }
+       return false
 }
 func rewriteValueARM64_OpSelect0(v *Value) bool {
        v_0 := v.Args[0]
index f09a531dcbdc54e2b38a5fad7d0b98efa880d96f..5a2391358c31d57863299b1b57febaadb78f3163 100644 (file)
@@ -91,7 +91,7 @@ func rshMask64Ux64(v uint64, s uint64) uint64 {
        // ppc64le:"ANDCC",-"ORN",-"ISEL"
        // riscv64:"SRL",-"AND\t",-"SLTIU"
        // s390x:-"RISBGZ",-"AND",-"LOCGR"
-       // arm64:"LSR",-"AND"
+       // arm64:"LSR",-"AND",-"CSEL"
        return v >> (s & 63)
 }
 
@@ -100,7 +100,7 @@ func rshMask64x64(v int64, s uint64) int64 {
        // ppc64le:"ANDCC",-ORN",-"ISEL"
        // riscv64:"SRA",-"OR",-"SLTIU"
        // s390x:-"RISBGZ",-"AND",-"LOCGR"
-       // arm64:"ASR",-"AND"
+       // arm64:"ASR",-"AND",-"CSEL"
        return v >> (s & 63)
 }
 
@@ -145,7 +145,7 @@ func rshMask64Ux32(v uint64, s uint32) uint64 {
        // ppc64le:"ANDCC",-"ORN"
        // riscv64:"SRL",-"AND\t",-"SLTIU"
        // s390x:-"RISBGZ",-"AND",-"LOCGR"
-       // arm64:"LSR",-"AND"
+       // arm64:"LSR",-"AND",-"CSEL"
        return v >> (s & 63)
 }
 
@@ -154,7 +154,7 @@ func rshMask64x32(v int64, s uint32) int64 {
        // ppc64le:"ANDCC",-"ORN",-"ISEL"
        // riscv64:"SRA",-"OR",-"SLTIU"
        // s390x:-"RISBGZ",-"AND",-"LOCGR"
-       // arm64:"ASR",-"AND"
+       // arm64:"ASR",-"AND",-"CSEL"
        return v >> (s & 63)
 }
 
@@ -219,6 +219,7 @@ func lshGuarded64(v int64, s uint) int64 {
                // riscv64:"SLL",-"AND",-"SLTIU"
                // s390x:-"RISBGZ",-"AND",-"LOCGR"
                // wasm:-"Select",-".*LtU"
+               // arm64:"LSL",-"CSEL"
                return v << s
        }
        panic("shift too large")
@@ -229,6 +230,7 @@ func rshGuarded64U(v uint64, s uint) uint64 {
                // riscv64:"SRL",-"AND",-"SLTIU"
                // s390x:-"RISBGZ",-"AND",-"LOCGR"
                // wasm:-"Select",-".*LtU"
+               // arm64:"LSR",-"CSEL"
                return v >> s
        }
        panic("shift too large")
@@ -239,11 +241,92 @@ func rshGuarded64(v int64, s uint) int64 {
                // riscv64:"SRA",-"OR",-"SLTIU"
                // s390x:-"RISBGZ",-"AND",-"LOCGR"
                // wasm:-"Select",-".*LtU"
+               // arm64:"ASR",-"CSEL"
                return v >> s
        }
        panic("shift too large")
 }
 
+func provedUnsignedShiftLeft(val64 uint64, val32 uint32, val16 uint16, val8 uint8, shift int) (r1 uint64, r2 uint32, r3 uint16, r4 uint8) {
+       if shift >= 0 && shift < 64 {
+               // arm64:"LSL",-"CSEL"
+               r1 = val64 << shift
+       }
+       if shift >= 0 && shift < 32 {
+               // arm64:"LSL",-"CSEL"
+               r2 = val32 << shift
+       }
+       if shift >= 0 && shift < 16 {
+               // arm64:"LSL",-"CSEL"
+               r3 = val16 << shift
+       }
+       if shift >= 0 && shift < 8 {
+               // arm64:"LSL",-"CSEL"
+               r4 = val8 << shift
+       }
+       return r1, r2, r3, r4
+}
+
+func provedSignedShiftLeft(val64 int64, val32 int32, val16 int16, val8 int8, shift int) (r1 int64, r2 int32, r3 int16, r4 int8) {
+       if shift >= 0 && shift < 64 {
+               // arm64:"LSL",-"CSEL"
+               r1 = val64 << shift
+       }
+       if shift >= 0 && shift < 32 {
+               // arm64:"LSL",-"CSEL"
+               r2 = val32 << shift
+       }
+       if shift >= 0 && shift < 16 {
+               // arm64:"LSL",-"CSEL"
+               r3 = val16 << shift
+       }
+       if shift >= 0 && shift < 8 {
+               // arm64:"LSL",-"CSEL"
+               r4 = val8 << shift
+       }
+       return r1, r2, r3, r4
+}
+
+func provedUnsignedShiftRight(val64 uint64, val32 uint32, val16 uint16, val8 uint8, shift int) (r1 uint64, r2 uint32, r3 uint16, r4 uint8) {
+       if shift >= 0 && shift < 64 {
+               // arm64:"LSR",-"CSEL"
+               r1 = val64 >> shift
+       }
+       if shift >= 0 && shift < 32 {
+               // arm64:"LSR",-"CSEL"
+               r2 = val32 >> shift
+       }
+       if shift >= 0 && shift < 16 {
+               // arm64:"LSR",-"CSEL"
+               r3 = val16 >> shift
+       }
+       if shift >= 0 && shift < 8 {
+               // arm64:"LSR",-"CSEL"
+               r4 = val8 >> shift
+       }
+       return r1, r2, r3, r4
+}
+
+func provedSignedShiftRight(val64 int64, val32 int32, val16 int16, val8 int8, shift int) (r1 int64, r2 int32, r3 int16, r4 int8) {
+       if shift >= 0 && shift < 64 {
+               // arm64:"ASR",-"CSEL"
+               r1 = val64 >> shift
+       }
+       if shift >= 0 && shift < 32 {
+               // arm64:"ASR",-"CSEL"
+               r2 = val32 >> shift
+       }
+       if shift >= 0 && shift < 16 {
+               // arm64:"ASR",-"CSEL"
+               r3 = val16 >> shift
+       }
+       if shift >= 0 && shift < 8 {
+               // arm64:"ASR",-"CSEL"
+               r4 = val8 >> shift
+       }
+       return r1, r2, r3, r4
+}
+
 func checkUnneededTrunc(tab *[100000]uint32, d uint64, v uint32, h uint16, b byte) (uint32, uint64) {
 
        // ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"