]> Cypherpunks.ru repositories - gostls13.git/commitdiff
cmd/compile: don't use BTS when OR works, add direct memory BTS operations
authorKeith Randall <khr@golang.org>
Tue, 1 Aug 2023 21:32:56 +0000 (14:32 -0700)
committerKeith Randall <khr@google.com>
Fri, 4 Aug 2023 16:40:24 +0000 (16:40 +0000)
Stop using BTSconst and friends when ORLconst can be used instead.
OR can be issued by more function units than BTS can, so it could
lead to better IPC. OR might take a few more bytes to encode, but
not a lot more.

Still use BTSconst for cases where the constant otherwise wouldn't
fit and would require a separate movabs instruction to materialize
the constant. This happens when setting bits 31-63 of 64-bit targets.

Add BTS-to-memory operations so we don't need to load/bts/store.

Fixes #61694

Change-Id: I00379608df8fb0167cb01466e97d11dec7c1596c
Reviewed-on: https://go-review.googlesource.com/c/go/+/515755
Reviewed-by: Keith Randall <khr@google.com>
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
src/cmd/compile/internal/amd64/ssa.go
src/cmd/compile/internal/ssa/_gen/AMD64.rules
src/cmd/compile/internal/ssa/_gen/AMD64Ops.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteAMD64.go
test/codegen/bits.go
test/codegen/mathbits.go
test/codegen/memops.go

index d32ea7ec161034be4291c595bc3cc280fa415fe2..ab762c24f67034e5d43c17878affc9a055664ab8 100644 (file)
@@ -714,9 +714,9 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
                p.To.Offset = v.AuxInt
        case ssa.OpAMD64BTLconst, ssa.OpAMD64BTQconst,
                ssa.OpAMD64TESTQconst, ssa.OpAMD64TESTLconst, ssa.OpAMD64TESTWconst, ssa.OpAMD64TESTBconst,
-               ssa.OpAMD64BTSLconst, ssa.OpAMD64BTSQconst,
-               ssa.OpAMD64BTCLconst, ssa.OpAMD64BTCQconst,
-               ssa.OpAMD64BTRLconst, ssa.OpAMD64BTRQconst:
+               ssa.OpAMD64BTSQconst,
+               ssa.OpAMD64BTCQconst,
+               ssa.OpAMD64BTRQconst:
                op := v.Op
                if op == ssa.OpAMD64BTQconst && v.AuxInt < 32 {
                        // Emit 32-bit version because it's shorter
@@ -851,7 +851,8 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
                }
                fallthrough
        case ssa.OpAMD64ANDQconstmodify, ssa.OpAMD64ANDLconstmodify, ssa.OpAMD64ORQconstmodify, ssa.OpAMD64ORLconstmodify,
-               ssa.OpAMD64XORQconstmodify, ssa.OpAMD64XORLconstmodify:
+               ssa.OpAMD64XORQconstmodify, ssa.OpAMD64XORLconstmodify,
+               ssa.OpAMD64BTSQconstmodify, ssa.OpAMD64BTRQconstmodify, ssa.OpAMD64BTCQconstmodify:
                sc := v.AuxValAndOff()
                off := sc.Off64()
                val := sc.Val64()
index b6937de80055001a34033fc2414e66bfcecae056..c4f74bb0d90ed324a33edf45aa6653439e0fac3b 100644 (file)
@@ -82,8 +82,8 @@
 (Ctz32 x)     && buildcfg.GOAMD64 >= 3 => (TZCNTL x)
 (Ctz64 <t> x) && buildcfg.GOAMD64 <  3 => (CMOVQEQ (Select0 <t> (BSFQ x)) (MOVQconst <t> [64]) (Select1 <types.TypeFlags> (BSFQ x)))
 (Ctz32 x)     && buildcfg.GOAMD64 <  3 => (Select0 (BSFQ (BTSQconst <typ.UInt64> [32] x)))
-(Ctz16 x) => (BSFL (BTSLconst <typ.UInt32> [16] x))
-(Ctz8  x) => (BSFL (BTSLconst <typ.UInt32> [ 8] x))
+(Ctz16 x) => (BSFL (ORLconst <typ.UInt32> [1<<16] x))
+(Ctz8  x) => (BSFL (ORLconst <typ.UInt32> [1<<8 ] x))
 
 (Ctz64NonZero x) && buildcfg.GOAMD64 >= 3 => (TZCNTQ x)
 (Ctz32NonZero x) && buildcfg.GOAMD64 >= 3 => (TZCNTL x)
 // Recognize bit setting (a |= 1<<b) and toggling (a ^= 1<<b)
 (OR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) => (BTS(Q|L) x y)
 (XOR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) => (BTC(Q|L) x y)
-
-// Convert ORconst into BTS, if the code gets smaller, with boundary being
-// (ORL $40,AX is 3 bytes, ORL $80,AX is 6 bytes).
-((ORQ|XORQ)const [c] x) && isUint64PowerOfTwo(int64(c)) && uint64(c) >= 128
-    => (BT(S|C)Qconst [int8(log32(c))] x)
-((ORL|XORL)const [c] x) && isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128
-    => (BT(S|C)Lconst [int8(log32(c))] x)
-((ORQ|XORQ) (MOVQconst [c]) x) && isUint64PowerOfTwo(c) && uint64(c) >= 128
-    => (BT(S|C)Qconst [int8(log64(c))] x)
-((ORL|XORL) (MOVLconst [c]) x) && isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128
-    => (BT(S|C)Lconst [int8(log32(c))] x)
+// Note: only convert OR/XOR to BTS/BTC if the constant wouldn't fit in
+// the constant field of the OR/XOR instruction. See issue 61694.
+((OR|XOR)Q (MOVQconst [c]) x) && isUint64PowerOfTwo(c) && uint64(c) >= 1<<31 => (BT(S|C)Qconst [int8(log64(c))] x)
 
 // Recognize bit clearing: a &^= 1<<b
 (AND(Q|L) (NOT(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y)) x) => (BTR(Q|L) x y)
 (ANDN(Q|L) x (SHL(Q|L) (MOV(Q|L)const [1]) y)) => (BTR(Q|L) x y)
-(ANDQconst [c] x) && isUint64PowerOfTwo(int64(^c)) && uint64(^c) >= 128
-    => (BTRQconst [int8(log32(^c))] x)
-(ANDLconst [c] x) && isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128
-    => (BTRLconst [int8(log32(^c))] x)
-(ANDQ (MOVQconst [c]) x) && isUint64PowerOfTwo(^c) && uint64(^c) >= 128
-    => (BTRQconst [int8(log64(^c))] x)
-(ANDL (MOVLconst [c]) x) && isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128
-    => (BTRLconst [int8(log32(^c))] x)
+// Note: only convert AND to BTR if the constant wouldn't fit in
+// the constant field of the AND instruction. See issue 61694.
+(ANDQ (MOVQconst [c]) x) && isUint64PowerOfTwo(^c) && uint64(^c) >= 1<<31 => (BTRQconst [int8(log64(^c))] x)
 
 // Special-case bit patterns on first/last bit.
 // generic.rules changes ANDs of high-part/low-part masks into a couple of shifts,
 
 // Special case resetting first/last bit
 (SHL(L|Q)const [1] (SHR(L|Q)const [1] x))
-       => (BTR(L|Q)const [0] x)
+       => (AND(L|Q)const [-2] x)
 (SHRLconst [1] (SHLLconst [1] x))
-       => (BTRLconst [31] x)
+       => (ANDLconst [0x7fffffff] x)
 (SHRQconst [1] (SHLQconst [1] x))
        => (BTRQconst [63] x)
 
     => (SET(B|AE)store [off] {sym} ptr (BTLconst [31] x) mem)
 
 // Fold combinations of bit ops on same bit. An example is math.Copysign(c,-1)
-(BTS(Q|L)const [c] (BTR(Q|L)const [c] x)) => (BTS(Q|L)const [c] x)
-(BTS(Q|L)const [c] (BTC(Q|L)const [c] x)) => (BTS(Q|L)const [c] x)
-(BTR(Q|L)const [c] (BTS(Q|L)const [c] x)) => (BTR(Q|L)const [c] x)
-(BTR(Q|L)const [c] (BTC(Q|L)const [c] x)) => (BTR(Q|L)const [c] x)
+(BTSQconst [c] (BTRQconst [c] x)) => (BTSQconst [c] x)
+(BTSQconst [c] (BTCQconst [c] x)) => (BTSQconst [c] x)
+(BTRQconst [c] (BTSQconst [c] x)) => (BTRQconst [c] x)
+(BTRQconst [c] (BTCQconst [c] x)) => (BTRQconst [c] x)
 
 // Fold boolean negation into SETcc.
 (XORLconst [1] (SETNE x)) => (SETEQ x)
 (XOR(L|Q)const [c] (XOR(L|Q)const [d] x)) => (XOR(L|Q)const [c ^ d] x)
 (OR(L|Q)const  [c] (OR(L|Q)const  [d] x)) => (OR(L|Q)const  [c | d] x)
 
-(BTRLconst [c] (ANDLconst [d] x)) => (ANDLconst [d &^ (1<<uint32(c))] x)
-(ANDLconst [c] (BTRLconst [d] x)) => (ANDLconst [c &^ (1<<uint32(d))] x)
-(BTRLconst [c] (BTRLconst [d] x)) => (ANDLconst [^(1<<uint32(c) | 1<<uint32(d))] x)
-
-(BTCLconst [c] (XORLconst [d] x)) => (XORLconst [d ^ 1<<uint32(c)] x)
-(XORLconst [c] (BTCLconst [d] x)) => (XORLconst [c ^ 1<<uint32(d)] x)
-(BTCLconst [c] (BTCLconst [d] x)) => (XORLconst [1<<uint32(c) | 1<<uint32(d)] x)
-
-(BTSLconst [c] (ORLconst  [d] x)) => (ORLconst [d | 1<<uint32(c)] x)
-(ORLconst  [c] (BTSLconst [d] x)) => (ORLconst [c | 1<<uint32(d)] x)
-(BTSLconst [c] (BTSLconst [d] x)) => (ORLconst [1<<uint32(c) | 1<<uint32(d)] x)
-
-(BTRQconst [c] (ANDQconst [d] x)) && is32Bit(int64(d) &^ (1<<uint32(c)))     => (ANDQconst [d &^ (1<<uint32(c))] x)
-(ANDQconst [c] (BTRQconst [d] x)) && is32Bit(int64(c) &^ (1<<uint32(d)))     => (ANDQconst [c &^ (1<<uint32(d))] x)
-(BTRQconst [c] (BTRQconst [d] x)) && is32Bit(^(1<<uint32(c) | 1<<uint32(d))) => (ANDQconst [^(1<<uint32(c) | 1<<uint32(d))] x)
-
-(BTCQconst [c] (XORQconst [d] x)) && is32Bit(int64(d) ^ 1<<uint32(c))     => (XORQconst [d ^ 1<<uint32(c)] x)
-(XORQconst [c] (BTCQconst [d] x)) && is32Bit(int64(c) ^ 1<<uint32(d))     => (XORQconst [c ^ 1<<uint32(d)] x)
-(BTCQconst [c] (BTCQconst [d] x)) && is32Bit(1<<uint32(c) ^ 1<<uint32(d)) => (XORQconst [1<<uint32(c) ^ 1<<uint32(d)] x)
-
-(BTSQconst [c] (ORQconst  [d] x)) && is32Bit(int64(d) | 1<<uint32(c))     => (ORQconst [d | 1<<uint32(c)] x)
-(ORQconst  [c] (BTSQconst [d] x)) && is32Bit(int64(c) | 1<<uint32(d))     => (ORQconst [c | 1<<uint32(d)] x)
-(BTSQconst [c] (BTSQconst [d] x)) && is32Bit(1<<uint32(c) | 1<<uint32(d)) => (ORQconst [1<<uint32(c) | 1<<uint32(d)] x)
-
-
 (MULLconst [c] (MULLconst [d] x)) => (MULLconst [c * d] x)
 (MULQconst [c] (MULQconst [d] x)) && is32Bit(int64(c)*int64(d)) => (MULQconst [c * d] x)
 
 (NOTQ (MOVQconst [c])) => (MOVQconst [^c])
 (NOTL (MOVLconst [c])) => (MOVLconst [^c])
 (BTSQconst [c] (MOVQconst [d])) => (MOVQconst [d|(1<<uint32(c))])
-(BTSLconst [c] (MOVLconst [d])) => (MOVLconst [d|(1<<uint32(c))])
 (BTRQconst [c] (MOVQconst [d])) => (MOVQconst [d&^(1<<uint32(c))])
-(BTRLconst [c] (MOVLconst [d])) => (MOVLconst [d&^(1<<uint32(c))])
 (BTCQconst [c] (MOVQconst [d])) => (MOVQconst [d^(1<<uint32(c))])
-(BTCLconst [c] (MOVLconst [d])) => (MOVLconst [d^(1<<uint32(c))])
 
 // If c or d doesn't fit into 32 bits, then we can't construct ORQconst,
 // but we can still constant-fold.
 (MOVQstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Qload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) => ((ADD|AND|OR|XOR)Qmodify [off] {sym} ptr x mem)
 (MOVQstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR)Q l:(MOVQload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) =>
        ((ADD|SUB|AND|OR|XOR)Qmodify [off] {sym} ptr x mem)
+(MOVQstore {sym} [off] ptr x:(BT(S|R|C)Qconst [c] l:(MOVQload {sym} [off] ptr mem)) mem) && x.Uses == 1 && l.Uses == 1 && clobber(x, l) =>
+       (BT(S|R|C)Qconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem)
 
 // Merge ADDQconst and LEAQ into atomic loads.
 (MOV(Q|L|B)atomicload [off1] {sym} (ADDQconst [off2] ptr) mem) && is32Bit(int64(off1)+int64(off2)) =>
index e9205d56c6181c788b5391f9266b3b93c0134a51..606171947bbd75b188bfff8c0ee9bc65f26c59d6 100644 (file)
@@ -399,12 +399,27 @@ func init() {
                {name: "BTSQ", argLength: 2, reg: gp21, asm: "BTSQ", resultInArg0: true, clobberFlags: true},                   // set bit arg1%64 in arg0
                {name: "BTLconst", argLength: 1, reg: gp1flags, asm: "BTL", typ: "Flags", aux: "Int8"},                         // test whether bit auxint in arg0 is set, 0 <= auxint < 32
                {name: "BTQconst", argLength: 1, reg: gp1flags, asm: "BTQ", typ: "Flags", aux: "Int8"},                         // test whether bit auxint in arg0 is set, 0 <= auxint < 64
-               {name: "BTCLconst", argLength: 1, reg: gp11, asm: "BTCL", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // complement bit auxint in arg0, 0 <= auxint < 32
-               {name: "BTCQconst", argLength: 1, reg: gp11, asm: "BTCQ", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // complement bit auxint in arg0, 0 <= auxint < 64
-               {name: "BTRLconst", argLength: 1, reg: gp11, asm: "BTRL", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // reset bit auxint in arg0, 0 <= auxint < 32
-               {name: "BTRQconst", argLength: 1, reg: gp11, asm: "BTRQ", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // reset bit auxint in arg0, 0 <= auxint < 64
-               {name: "BTSLconst", argLength: 1, reg: gp11, asm: "BTSL", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // set bit auxint in arg0, 0 <= auxint < 32
-               {name: "BTSQconst", argLength: 1, reg: gp11, asm: "BTSQ", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // set bit auxint in arg0, 0 <= auxint < 64
+               {name: "BTCQconst", argLength: 1, reg: gp11, asm: "BTCQ", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // complement bit auxint in arg0, 31 <= auxint < 64
+               {name: "BTRQconst", argLength: 1, reg: gp11, asm: "BTRQ", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // reset bit auxint in arg0, 31 <= auxint < 64
+               {name: "BTSQconst", argLength: 1, reg: gp11, asm: "BTSQ", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // set bit auxint in arg0, 31 <= auxint < 64
+
+               // BT[SRC]Qconstmodify
+               //
+               //  S: set bit
+               //  R: reset (clear) bit
+               //  C: complement bit
+               //
+               // Apply operation to bit ValAndOff(AuxInt).Val() in the 64 bits at
+               // memory address arg0+ValAndOff(AuxInt).Off()+aux
+               // Bit index must be in range (31-63).
+               // (We use OR/AND/XOR for thinner targets and lower bit indexes.)
+               // arg1=mem, returns mem
+               //
+               // Note that there aren't non-const versions of these instructions.
+               // Well, there are such instructions, but they are slow and weird so we don't use them.
+               {name: "BTSQconstmodify", argLength: 2, reg: gpstoreconst, asm: "BTSQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"},
+               {name: "BTRQconstmodify", argLength: 2, reg: gpstoreconst, asm: "BTRQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"},
+               {name: "BTCQconstmodify", argLength: 2, reg: gpstoreconst, asm: "BTCQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"},
 
                // TESTx: compare (arg0 & arg1) to 0
                {name: "TESTQ", argLength: 2, reg: gp2flags, commutative: true, asm: "TESTQ", typ: "Flags"},
index 64aea38afe8b33393934f796a642e2f09d17c004..84dcd9a3cc8abb6ecd69cfb0dfd7f482e0be524a 100644 (file)
@@ -716,12 +716,12 @@ const (
        OpAMD64BTSQ
        OpAMD64BTLconst
        OpAMD64BTQconst
-       OpAMD64BTCLconst
        OpAMD64BTCQconst
-       OpAMD64BTRLconst
        OpAMD64BTRQconst
-       OpAMD64BTSLconst
        OpAMD64BTSQconst
+       OpAMD64BTSQconstmodify
+       OpAMD64BTRQconstmodify
+       OpAMD64BTCQconstmodify
        OpAMD64TESTQ
        OpAMD64TESTL
        OpAMD64TESTW
@@ -8779,12 +8779,12 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:         "BTCLconst",
+               name:         "BTCQconst",
                auxType:      auxInt8,
                argLen:       1,
                resultInArg0: true,
                clobberFlags: true,
-               asm:          x86.ABTCL,
+               asm:          x86.ABTCQ,
                reg: regInfo{
                        inputs: []inputInfo{
                                {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
@@ -8795,12 +8795,12 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:         "BTCQconst",
+               name:         "BTRQconst",
                auxType:      auxInt8,
                argLen:       1,
                resultInArg0: true,
                clobberFlags: true,
-               asm:          x86.ABTCQ,
+               asm:          x86.ABTRQ,
                reg: regInfo{
                        inputs: []inputInfo{
                                {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
@@ -8811,12 +8811,12 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:         "BTRLconst",
+               name:         "BTSQconst",
                auxType:      auxInt8,
                argLen:       1,
                resultInArg0: true,
                clobberFlags: true,
-               asm:          x86.ABTRL,
+               asm:          x86.ABTSQ,
                reg: regInfo{
                        inputs: []inputInfo{
                                {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
@@ -8827,50 +8827,44 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:         "BTRQconst",
-               auxType:      auxInt8,
-               argLen:       1,
-               resultInArg0: true,
-               clobberFlags: true,
-               asm:          x86.ABTRQ,
+               name:           "BTSQconstmodify",
+               auxType:        auxSymValAndOff,
+               argLen:         2,
+               clobberFlags:   true,
+               faultOnNilArg0: true,
+               symEffect:      SymRead | SymWrite,
+               asm:            x86.ABTSQ,
                reg: regInfo{
                        inputs: []inputInfo{
-                               {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
-                       },
-                       outputs: []outputInfo{
-                               {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                               {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
                        },
                },
        },
        {
-               name:         "BTSLconst",
-               auxType:      auxInt8,
-               argLen:       1,
-               resultInArg0: true,
-               clobberFlags: true,
-               asm:          x86.ABTSL,
+               name:           "BTRQconstmodify",
+               auxType:        auxSymValAndOff,
+               argLen:         2,
+               clobberFlags:   true,
+               faultOnNilArg0: true,
+               symEffect:      SymRead | SymWrite,
+               asm:            x86.ABTRQ,
                reg: regInfo{
                        inputs: []inputInfo{
-                               {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
-                       },
-                       outputs: []outputInfo{
-                               {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                               {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
                        },
                },
        },
        {
-               name:         "BTSQconst",
-               auxType:      auxInt8,
-               argLen:       1,
-               resultInArg0: true,
-               clobberFlags: true,
-               asm:          x86.ABTSQ,
+               name:           "BTCQconstmodify",
+               auxType:        auxSymValAndOff,
+               argLen:         2,
+               clobberFlags:   true,
+               faultOnNilArg0: true,
+               symEffect:      SymRead | SymWrite,
+               asm:            x86.ABTCQ,
                reg: regInfo{
                        inputs: []inputInfo{
-                               {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
-                       },
-                       outputs: []outputInfo{
-                               {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                               {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
                        },
                },
        },
index afe9ed257a5612ef50379f469fafe4d12d7cea76..979d9be3a726435c601d277f454ac62b4567d164 100644 (file)
@@ -73,20 +73,14 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpAMD64BSWAPL(v)
        case OpAMD64BSWAPQ:
                return rewriteValueAMD64_OpAMD64BSWAPQ(v)
-       case OpAMD64BTCLconst:
-               return rewriteValueAMD64_OpAMD64BTCLconst(v)
        case OpAMD64BTCQconst:
                return rewriteValueAMD64_OpAMD64BTCQconst(v)
        case OpAMD64BTLconst:
                return rewriteValueAMD64_OpAMD64BTLconst(v)
        case OpAMD64BTQconst:
                return rewriteValueAMD64_OpAMD64BTQconst(v)
-       case OpAMD64BTRLconst:
-               return rewriteValueAMD64_OpAMD64BTRLconst(v)
        case OpAMD64BTRQconst:
                return rewriteValueAMD64_OpAMD64BTRQconst(v)
-       case OpAMD64BTSLconst:
-               return rewriteValueAMD64_OpAMD64BTSLconst(v)
        case OpAMD64BTSQconst:
                return rewriteValueAMD64_OpAMD64BTSQconst(v)
        case OpAMD64CMOVLCC:
@@ -2626,26 +2620,6 @@ func rewriteValueAMD64_OpAMD64ANDL(v *Value) bool {
                }
                break
        }
-       // match: (ANDL (MOVLconst [c]) x)
-       // cond: isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128
-       // result: (BTRLconst [int8(log32(^c))] x)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       if v_0.Op != OpAMD64MOVLconst {
-                               continue
-                       }
-                       c := auxIntToInt32(v_0.AuxInt)
-                       x := v_1
-                       if !(isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128) {
-                               continue
-                       }
-                       v.reset(OpAMD64BTRLconst)
-                       v.AuxInt = int8ToAuxInt(int8(log32(^c)))
-                       v.AddArg(x)
-                       return true
-               }
-               break
-       }
        // match: (ANDL x (MOVLconst [c]))
        // result: (ANDLconst [c] x)
        for {
@@ -2754,20 +2728,6 @@ func rewriteValueAMD64_OpAMD64ANDL(v *Value) bool {
 }
 func rewriteValueAMD64_OpAMD64ANDLconst(v *Value) bool {
        v_0 := v.Args[0]
-       // match: (ANDLconst [c] x)
-       // cond: isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128
-       // result: (BTRLconst [int8(log32(^c))] x)
-       for {
-               c := auxIntToInt32(v.AuxInt)
-               x := v_0
-               if !(isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128) {
-                       break
-               }
-               v.reset(OpAMD64BTRLconst)
-               v.AuxInt = int8ToAuxInt(int8(log32(^c)))
-               v.AddArg(x)
-               return true
-       }
        // match: (ANDLconst [c] (ANDLconst [d] x))
        // result: (ANDLconst [c & d] x)
        for {
@@ -2782,20 +2742,6 @@ func rewriteValueAMD64_OpAMD64ANDLconst(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ANDLconst [c] (BTRLconst [d] x))
-       // result: (ANDLconst [c &^ (1<<uint32(d))] x)
-       for {
-               c := auxIntToInt32(v.AuxInt)
-               if v_0.Op != OpAMD64BTRLconst {
-                       break
-               }
-               d := auxIntToInt8(v_0.AuxInt)
-               x := v_0.Args[0]
-               v.reset(OpAMD64ANDLconst)
-               v.AuxInt = int32ToAuxInt(c &^ (1 << uint32(d)))
-               v.AddArg(x)
-               return true
-       }
        // match: (ANDLconst [ 0xFF] x)
        // result: (MOVBQZX x)
        for {
@@ -3099,7 +3045,7 @@ func rewriteValueAMD64_OpAMD64ANDQ(v *Value) bool {
                break
        }
        // match: (ANDQ (MOVQconst [c]) x)
-       // cond: isUint64PowerOfTwo(^c) && uint64(^c) >= 128
+       // cond: isUint64PowerOfTwo(^c) && uint64(^c) >= 1<<31
        // result: (BTRQconst [int8(log64(^c))] x)
        for {
                for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
@@ -3108,7 +3054,7 @@ func rewriteValueAMD64_OpAMD64ANDQ(v *Value) bool {
                        }
                        c := auxIntToInt64(v_0.AuxInt)
                        x := v_1
-                       if !(isUint64PowerOfTwo(^c) && uint64(^c) >= 128) {
+                       if !(isUint64PowerOfTwo(^c) && uint64(^c) >= 1<<31) {
                                continue
                        }
                        v.reset(OpAMD64BTRQconst)
@@ -3230,20 +3176,6 @@ func rewriteValueAMD64_OpAMD64ANDQ(v *Value) bool {
 }
 func rewriteValueAMD64_OpAMD64ANDQconst(v *Value) bool {
        v_0 := v.Args[0]
-       // match: (ANDQconst [c] x)
-       // cond: isUint64PowerOfTwo(int64(^c)) && uint64(^c) >= 128
-       // result: (BTRQconst [int8(log32(^c))] x)
-       for {
-               c := auxIntToInt32(v.AuxInt)
-               x := v_0
-               if !(isUint64PowerOfTwo(int64(^c)) && uint64(^c) >= 128) {
-                       break
-               }
-               v.reset(OpAMD64BTRQconst)
-               v.AuxInt = int8ToAuxInt(int8(log32(^c)))
-               v.AddArg(x)
-               return true
-       }
        // match: (ANDQconst [c] (ANDQconst [d] x))
        // result: (ANDQconst [c & d] x)
        for {
@@ -3258,24 +3190,6 @@ func rewriteValueAMD64_OpAMD64ANDQconst(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ANDQconst [c] (BTRQconst [d] x))
-       // cond: is32Bit(int64(c) &^ (1<<uint32(d)))
-       // result: (ANDQconst [c &^ (1<<uint32(d))] x)
-       for {
-               c := auxIntToInt32(v.AuxInt)
-               if v_0.Op != OpAMD64BTRQconst {
-                       break
-               }
-               d := auxIntToInt8(v_0.AuxInt)
-               x := v_0.Args[0]
-               if !(is32Bit(int64(c) &^ (1 << uint32(d)))) {
-                       break
-               }
-               v.reset(OpAMD64ANDQconst)
-               v.AuxInt = int32ToAuxInt(c &^ (1 << uint32(d)))
-               v.AddArg(x)
-               return true
-       }
        // match: (ANDQconst [ 0xFF] x)
        // result: (MOVBQZX x)
        for {
@@ -3677,88 +3591,8 @@ func rewriteValueAMD64_OpAMD64BSWAPQ(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64BTCLconst(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (BTCLconst [c] (XORLconst [d] x))
-       // result: (XORLconst [d ^ 1<<uint32(c)] x)
-       for {
-               c := auxIntToInt8(v.AuxInt)
-               if v_0.Op != OpAMD64XORLconst {
-                       break
-               }
-               d := auxIntToInt32(v_0.AuxInt)
-               x := v_0.Args[0]
-               v.reset(OpAMD64XORLconst)
-               v.AuxInt = int32ToAuxInt(d ^ 1<<uint32(c))
-               v.AddArg(x)
-               return true
-       }
-       // match: (BTCLconst [c] (BTCLconst [d] x))
-       // result: (XORLconst [1<<uint32(c) | 1<<uint32(d)] x)
-       for {
-               c := auxIntToInt8(v.AuxInt)
-               if v_0.Op != OpAMD64BTCLconst {
-                       break
-               }
-               d := auxIntToInt8(v_0.AuxInt)
-               x := v_0.Args[0]
-               v.reset(OpAMD64XORLconst)
-               v.AuxInt = int32ToAuxInt(1<<uint32(c) | 1<<uint32(d))
-               v.AddArg(x)
-               return true
-       }
-       // match: (BTCLconst [c] (MOVLconst [d]))
-       // result: (MOVLconst [d^(1<<uint32(c))])
-       for {
-               c := auxIntToInt8(v.AuxInt)
-               if v_0.Op != OpAMD64MOVLconst {
-                       break
-               }
-               d := auxIntToInt32(v_0.AuxInt)
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = int32ToAuxInt(d ^ (1 << uint32(c)))
-               return true
-       }
-       return false
-}
 func rewriteValueAMD64_OpAMD64BTCQconst(v *Value) bool {
        v_0 := v.Args[0]
-       // match: (BTCQconst [c] (XORQconst [d] x))
-       // cond: is32Bit(int64(d) ^ 1<<uint32(c))
-       // result: (XORQconst [d ^ 1<<uint32(c)] x)
-       for {
-               c := auxIntToInt8(v.AuxInt)
-               if v_0.Op != OpAMD64XORQconst {
-                       break
-               }
-               d := auxIntToInt32(v_0.AuxInt)
-               x := v_0.Args[0]
-               if !(is32Bit(int64(d) ^ 1<<uint32(c))) {
-                       break
-               }
-               v.reset(OpAMD64XORQconst)
-               v.AuxInt = int32ToAuxInt(d ^ 1<<uint32(c))
-               v.AddArg(x)
-               return true
-       }
-       // match: (BTCQconst [c] (BTCQconst [d] x))
-       // cond: is32Bit(1<<uint32(c) ^ 1<<uint32(d))
-       // result: (XORQconst [1<<uint32(c) ^ 1<<uint32(d)] x)
-       for {
-               c := auxIntToInt8(v.AuxInt)
-               if v_0.Op != OpAMD64BTCQconst {
-                       break
-               }
-               d := auxIntToInt8(v_0.AuxInt)
-               x := v_0.Args[0]
-               if !(is32Bit(1<<uint32(c) ^ 1<<uint32(d))) {
-                       break
-               }
-               v.reset(OpAMD64XORQconst)
-               v.AuxInt = int32ToAuxInt(1<<uint32(c) ^ 1<<uint32(d))
-               v.AddArg(x)
-               return true
-       }
        // match: (BTCQconst [c] (MOVQconst [d]))
        // result: (MOVQconst [d^(1<<uint32(c))])
        for {
@@ -3953,76 +3787,6 @@ func rewriteValueAMD64_OpAMD64BTQconst(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64BTRLconst(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (BTRLconst [c] (BTSLconst [c] x))
-       // result: (BTRLconst [c] x)
-       for {
-               c := auxIntToInt8(v.AuxInt)
-               if v_0.Op != OpAMD64BTSLconst || auxIntToInt8(v_0.AuxInt) != c {
-                       break
-               }
-               x := v_0.Args[0]
-               v.reset(OpAMD64BTRLconst)
-               v.AuxInt = int8ToAuxInt(c)
-               v.AddArg(x)
-               return true
-       }
-       // match: (BTRLconst [c] (BTCLconst [c] x))
-       // result: (BTRLconst [c] x)
-       for {
-               c := auxIntToInt8(v.AuxInt)
-               if v_0.Op != OpAMD64BTCLconst || auxIntToInt8(v_0.AuxInt) != c {
-                       break
-               }
-               x := v_0.Args[0]
-               v.reset(OpAMD64BTRLconst)
-               v.AuxInt = int8ToAuxInt(c)
-               v.AddArg(x)
-               return true
-       }
-       // match: (BTRLconst [c] (ANDLconst [d] x))
-       // result: (ANDLconst [d &^ (1<<uint32(c))] x)
-       for {
-               c := auxIntToInt8(v.AuxInt)
-               if v_0.Op != OpAMD64ANDLconst {
-                       break
-               }
-               d := auxIntToInt32(v_0.AuxInt)
-               x := v_0.Args[0]
-               v.reset(OpAMD64ANDLconst)
-               v.AuxInt = int32ToAuxInt(d &^ (1 << uint32(c)))
-               v.AddArg(x)
-               return true
-       }
-       // match: (BTRLconst [c] (BTRLconst [d] x))
-       // result: (ANDLconst [^(1<<uint32(c) | 1<<uint32(d))] x)
-       for {
-               c := auxIntToInt8(v.AuxInt)
-               if v_0.Op != OpAMD64BTRLconst {
-                       break
-               }
-               d := auxIntToInt8(v_0.AuxInt)
-               x := v_0.Args[0]
-               v.reset(OpAMD64ANDLconst)
-               v.AuxInt = int32ToAuxInt(^(1<<uint32(c) | 1<<uint32(d)))
-               v.AddArg(x)
-               return true
-       }
-       // match: (BTRLconst [c] (MOVLconst [d]))
-       // result: (MOVLconst [d&^(1<<uint32(c))])
-       for {
-               c := auxIntToInt8(v.AuxInt)
-               if v_0.Op != OpAMD64MOVLconst {
-                       break
-               }
-               d := auxIntToInt32(v_0.AuxInt)
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = int32ToAuxInt(d &^ (1 << uint32(c)))
-               return true
-       }
-       return false
-}
 func rewriteValueAMD64_OpAMD64BTRQconst(v *Value) bool {
        v_0 := v.Args[0]
        // match: (BTRQconst [c] (BTSQconst [c] x))
@@ -4051,42 +3815,6 @@ func rewriteValueAMD64_OpAMD64BTRQconst(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (BTRQconst [c] (ANDQconst [d] x))
-       // cond: is32Bit(int64(d) &^ (1<<uint32(c)))
-       // result: (ANDQconst [d &^ (1<<uint32(c))] x)
-       for {
-               c := auxIntToInt8(v.AuxInt)
-               if v_0.Op != OpAMD64ANDQconst {
-                       break
-               }
-               d := auxIntToInt32(v_0.AuxInt)
-               x := v_0.Args[0]
-               if !(is32Bit(int64(d) &^ (1 << uint32(c)))) {
-                       break
-               }
-               v.reset(OpAMD64ANDQconst)
-               v.AuxInt = int32ToAuxInt(d &^ (1 << uint32(c)))
-               v.AddArg(x)
-               return true
-       }
-       // match: (BTRQconst [c] (BTRQconst [d] x))
-       // cond: is32Bit(^(1<<uint32(c) | 1<<uint32(d)))
-       // result: (ANDQconst [^(1<<uint32(c) | 1<<uint32(d))] x)
-       for {
-               c := auxIntToInt8(v.AuxInt)
-               if v_0.Op != OpAMD64BTRQconst {
-                       break
-               }
-               d := auxIntToInt8(v_0.AuxInt)
-               x := v_0.Args[0]
-               if !(is32Bit(^(1<<uint32(c) | 1<<uint32(d)))) {
-                       break
-               }
-               v.reset(OpAMD64ANDQconst)
-               v.AuxInt = int32ToAuxInt(^(1<<uint32(c) | 1<<uint32(d)))
-               v.AddArg(x)
-               return true
-       }
        // match: (BTRQconst [c] (MOVQconst [d]))
        // result: (MOVQconst [d&^(1<<uint32(c))])
        for {
@@ -4101,76 +3829,6 @@ func rewriteValueAMD64_OpAMD64BTRQconst(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64BTSLconst(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (BTSLconst [c] (BTRLconst [c] x))
-       // result: (BTSLconst [c] x)
-       for {
-               c := auxIntToInt8(v.AuxInt)
-               if v_0.Op != OpAMD64BTRLconst || auxIntToInt8(v_0.AuxInt) != c {
-                       break
-               }
-               x := v_0.Args[0]
-               v.reset(OpAMD64BTSLconst)
-               v.AuxInt = int8ToAuxInt(c)
-               v.AddArg(x)
-               return true
-       }
-       // match: (BTSLconst [c] (BTCLconst [c] x))
-       // result: (BTSLconst [c] x)
-       for {
-               c := auxIntToInt8(v.AuxInt)
-               if v_0.Op != OpAMD64BTCLconst || auxIntToInt8(v_0.AuxInt) != c {
-                       break
-               }
-               x := v_0.Args[0]
-               v.reset(OpAMD64BTSLconst)
-               v.AuxInt = int8ToAuxInt(c)
-               v.AddArg(x)
-               return true
-       }
-       // match: (BTSLconst [c] (ORLconst [d] x))
-       // result: (ORLconst [d | 1<<uint32(c)] x)
-       for {
-               c := auxIntToInt8(v.AuxInt)
-               if v_0.Op != OpAMD64ORLconst {
-                       break
-               }
-               d := auxIntToInt32(v_0.AuxInt)
-               x := v_0.Args[0]
-               v.reset(OpAMD64ORLconst)
-               v.AuxInt = int32ToAuxInt(d | 1<<uint32(c))
-               v.AddArg(x)
-               return true
-       }
-       // match: (BTSLconst [c] (BTSLconst [d] x))
-       // result: (ORLconst [1<<uint32(c) | 1<<uint32(d)] x)
-       for {
-               c := auxIntToInt8(v.AuxInt)
-               if v_0.Op != OpAMD64BTSLconst {
-                       break
-               }
-               d := auxIntToInt8(v_0.AuxInt)
-               x := v_0.Args[0]
-               v.reset(OpAMD64ORLconst)
-               v.AuxInt = int32ToAuxInt(1<<uint32(c) | 1<<uint32(d))
-               v.AddArg(x)
-               return true
-       }
-       // match: (BTSLconst [c] (MOVLconst [d]))
-       // result: (MOVLconst [d|(1<<uint32(c))])
-       for {
-               c := auxIntToInt8(v.AuxInt)
-               if v_0.Op != OpAMD64MOVLconst {
-                       break
-               }
-               d := auxIntToInt32(v_0.AuxInt)
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = int32ToAuxInt(d | (1 << uint32(c)))
-               return true
-       }
-       return false
-}
 func rewriteValueAMD64_OpAMD64BTSQconst(v *Value) bool {
        v_0 := v.Args[0]
        // match: (BTSQconst [c] (BTRQconst [c] x))
@@ -4199,42 +3857,6 @@ func rewriteValueAMD64_OpAMD64BTSQconst(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (BTSQconst [c] (ORQconst [d] x))
-       // cond: is32Bit(int64(d) | 1<<uint32(c))
-       // result: (ORQconst [d | 1<<uint32(c)] x)
-       for {
-               c := auxIntToInt8(v.AuxInt)
-               if v_0.Op != OpAMD64ORQconst {
-                       break
-               }
-               d := auxIntToInt32(v_0.AuxInt)
-               x := v_0.Args[0]
-               if !(is32Bit(int64(d) | 1<<uint32(c))) {
-                       break
-               }
-               v.reset(OpAMD64ORQconst)
-               v.AuxInt = int32ToAuxInt(d | 1<<uint32(c))
-               v.AddArg(x)
-               return true
-       }
-       // match: (BTSQconst [c] (BTSQconst [d] x))
-       // cond: is32Bit(1<<uint32(c) | 1<<uint32(d))
-       // result: (ORQconst [1<<uint32(c) | 1<<uint32(d)] x)
-       for {
-               c := auxIntToInt8(v.AuxInt)
-               if v_0.Op != OpAMD64BTSQconst {
-                       break
-               }
-               d := auxIntToInt8(v_0.AuxInt)
-               x := v_0.Args[0]
-               if !(is32Bit(1<<uint32(c) | 1<<uint32(d))) {
-                       break
-               }
-               v.reset(OpAMD64ORQconst)
-               v.AuxInt = int32ToAuxInt(1<<uint32(c) | 1<<uint32(d))
-               v.AddArg(x)
-               return true
-       }
        // match: (BTSQconst [c] (MOVQconst [d]))
        // result: (MOVQconst [d|(1<<uint32(c))])
        for {
@@ -12306,6 +11928,84 @@ func rewriteValueAMD64_OpAMD64MOVQstore(v *Value) bool {
                }
                break
        }
+       // match: (MOVQstore {sym} [off] ptr x:(BTSQconst [c] l:(MOVQload {sym} [off] ptr mem)) mem)
+       // cond: x.Uses == 1 && l.Uses == 1 && clobber(x, l)
+       // result: (BTSQconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem)
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               x := v_1
+               if x.Op != OpAMD64BTSQconst {
+                       break
+               }
+               c := auxIntToInt8(x.AuxInt)
+               l := x.Args[0]
+               if l.Op != OpAMD64MOVQload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym {
+                       break
+               }
+               mem := l.Args[1]
+               if ptr != l.Args[0] || mem != v_2 || !(x.Uses == 1 && l.Uses == 1 && clobber(x, l)) {
+                       break
+               }
+               v.reset(OpAMD64BTSQconstmodify)
+               v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off))
+               v.Aux = symToAux(sym)
+               v.AddArg2(ptr, mem)
+               return true
+       }
+       // match: (MOVQstore {sym} [off] ptr x:(BTRQconst [c] l:(MOVQload {sym} [off] ptr mem)) mem)
+       // cond: x.Uses == 1 && l.Uses == 1 && clobber(x, l)
+       // result: (BTRQconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem)
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               x := v_1
+               if x.Op != OpAMD64BTRQconst {
+                       break
+               }
+               c := auxIntToInt8(x.AuxInt)
+               l := x.Args[0]
+               if l.Op != OpAMD64MOVQload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym {
+                       break
+               }
+               mem := l.Args[1]
+               if ptr != l.Args[0] || mem != v_2 || !(x.Uses == 1 && l.Uses == 1 && clobber(x, l)) {
+                       break
+               }
+               v.reset(OpAMD64BTRQconstmodify)
+               v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off))
+               v.Aux = symToAux(sym)
+               v.AddArg2(ptr, mem)
+               return true
+       }
+       // match: (MOVQstore {sym} [off] ptr x:(BTCQconst [c] l:(MOVQload {sym} [off] ptr mem)) mem)
+       // cond: x.Uses == 1 && l.Uses == 1 && clobber(x, l)
+       // result: (BTCQconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem)
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               x := v_1
+               if x.Op != OpAMD64BTCQconst {
+                       break
+               }
+               c := auxIntToInt8(x.AuxInt)
+               l := x.Args[0]
+               if l.Op != OpAMD64MOVQload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym {
+                       break
+               }
+               mem := l.Args[1]
+               if ptr != l.Args[0] || mem != v_2 || !(x.Uses == 1 && l.Uses == 1 && clobber(x, l)) {
+                       break
+               }
+               v.reset(OpAMD64BTCQconstmodify)
+               v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off))
+               v.Aux = symToAux(sym)
+               v.AddArg2(ptr, mem)
+               return true
+       }
        // match: (MOVQstore [off] {sym} ptr a:(ADDQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
        // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a)
        // result: (ADDQconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem)
@@ -14643,26 +14343,6 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
                }
                break
        }
-       // match: (ORL (MOVLconst [c]) x)
-       // cond: isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128
-       // result: (BTSLconst [int8(log32(c))] x)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       if v_0.Op != OpAMD64MOVLconst {
-                               continue
-                       }
-                       c := auxIntToInt32(v_0.AuxInt)
-                       x := v_1
-                       if !(isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128) {
-                               continue
-                       }
-                       v.reset(OpAMD64BTSLconst)
-                       v.AuxInt = int8ToAuxInt(int8(log32(c)))
-                       v.AddArg(x)
-                       return true
-               }
-               break
-       }
        // match: (ORL x (MOVLconst [c]))
        // result: (ORLconst [c] x)
        for {
@@ -14718,20 +14398,6 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
 }
 func rewriteValueAMD64_OpAMD64ORLconst(v *Value) bool {
        v_0 := v.Args[0]
-       // match: (ORLconst [c] x)
-       // cond: isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128
-       // result: (BTSLconst [int8(log32(c))] x)
-       for {
-               c := auxIntToInt32(v.AuxInt)
-               x := v_0
-               if !(isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128) {
-                       break
-               }
-               v.reset(OpAMD64BTSLconst)
-               v.AuxInt = int8ToAuxInt(int8(log32(c)))
-               v.AddArg(x)
-               return true
-       }
        // match: (ORLconst [c] (ORLconst [d] x))
        // result: (ORLconst [c | d] x)
        for {
@@ -14746,20 +14412,6 @@ func rewriteValueAMD64_OpAMD64ORLconst(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ORLconst [c] (BTSLconst [d] x))
-       // result: (ORLconst [c | 1<<uint32(d)] x)
-       for {
-               c := auxIntToInt32(v.AuxInt)
-               if v_0.Op != OpAMD64BTSLconst {
-                       break
-               }
-               d := auxIntToInt8(v_0.AuxInt)
-               x := v_0.Args[0]
-               v.reset(OpAMD64ORLconst)
-               v.AuxInt = int32ToAuxInt(c | 1<<uint32(d))
-               v.AddArg(x)
-               return true
-       }
        // match: (ORLconst [c] x)
        // cond: c==0
        // result: x
@@ -14993,7 +14645,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                break
        }
        // match: (ORQ (MOVQconst [c]) x)
-       // cond: isUint64PowerOfTwo(c) && uint64(c) >= 128
+       // cond: isUint64PowerOfTwo(c) && uint64(c) >= 1<<31
        // result: (BTSQconst [int8(log64(c))] x)
        for {
                for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
@@ -15002,7 +14654,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                        }
                        c := auxIntToInt64(v_0.AuxInt)
                        x := v_1
-                       if !(isUint64PowerOfTwo(c) && uint64(c) >= 128) {
+                       if !(isUint64PowerOfTwo(c) && uint64(c) >= 1<<31) {
                                continue
                        }
                        v.reset(OpAMD64BTSQconst)
@@ -15201,20 +14853,6 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
 }
 func rewriteValueAMD64_OpAMD64ORQconst(v *Value) bool {
        v_0 := v.Args[0]
-       // match: (ORQconst [c] x)
-       // cond: isUint64PowerOfTwo(int64(c)) && uint64(c) >= 128
-       // result: (BTSQconst [int8(log32(c))] x)
-       for {
-               c := auxIntToInt32(v.AuxInt)
-               x := v_0
-               if !(isUint64PowerOfTwo(int64(c)) && uint64(c) >= 128) {
-                       break
-               }
-               v.reset(OpAMD64BTSQconst)
-               v.AuxInt = int8ToAuxInt(int8(log32(c)))
-               v.AddArg(x)
-               return true
-       }
        // match: (ORQconst [c] (ORQconst [d] x))
        // result: (ORQconst [c | d] x)
        for {
@@ -15229,24 +14867,6 @@ func rewriteValueAMD64_OpAMD64ORQconst(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ORQconst [c] (BTSQconst [d] x))
-       // cond: is32Bit(int64(c) | 1<<uint32(d))
-       // result: (ORQconst [c | 1<<uint32(d)] x)
-       for {
-               c := auxIntToInt32(v.AuxInt)
-               if v_0.Op != OpAMD64BTSQconst {
-                       break
-               }
-               d := auxIntToInt8(v_0.AuxInt)
-               x := v_0.Args[0]
-               if !(is32Bit(int64(c) | 1<<uint32(d))) {
-                       break
-               }
-               v.reset(OpAMD64ORQconst)
-               v.AuxInt = int32ToAuxInt(c | 1<<uint32(d))
-               v.AddArg(x)
-               return true
-       }
        // match: (ORQconst [0] x)
        // result: x
        for {
@@ -21058,14 +20678,14 @@ func rewriteValueAMD64_OpAMD64SHLL(v *Value) bool {
 func rewriteValueAMD64_OpAMD64SHLLconst(v *Value) bool {
        v_0 := v.Args[0]
        // match: (SHLLconst [1] (SHRLconst [1] x))
-       // result: (BTRLconst [0] x)
+       // result: (ANDLconst [-2] x)
        for {
                if auxIntToInt8(v.AuxInt) != 1 || v_0.Op != OpAMD64SHRLconst || auxIntToInt8(v_0.AuxInt) != 1 {
                        break
                }
                x := v_0.Args[0]
-               v.reset(OpAMD64BTRLconst)
-               v.AuxInt = int8ToAuxInt(0)
+               v.reset(OpAMD64ANDLconst)
+               v.AuxInt = int32ToAuxInt(-2)
                v.AddArg(x)
                return true
        }
@@ -21314,14 +20934,14 @@ func rewriteValueAMD64_OpAMD64SHLQ(v *Value) bool {
 func rewriteValueAMD64_OpAMD64SHLQconst(v *Value) bool {
        v_0 := v.Args[0]
        // match: (SHLQconst [1] (SHRQconst [1] x))
-       // result: (BTRQconst [0] x)
+       // result: (ANDQconst [-2] x)
        for {
                if auxIntToInt8(v.AuxInt) != 1 || v_0.Op != OpAMD64SHRQconst || auxIntToInt8(v_0.AuxInt) != 1 {
                        break
                }
                x := v_0.Args[0]
-               v.reset(OpAMD64BTRQconst)
-               v.AuxInt = int8ToAuxInt(0)
+               v.reset(OpAMD64ANDQconst)
+               v.AuxInt = int32ToAuxInt(-2)
                v.AddArg(x)
                return true
        }
@@ -21741,14 +21361,14 @@ func rewriteValueAMD64_OpAMD64SHRL(v *Value) bool {
 func rewriteValueAMD64_OpAMD64SHRLconst(v *Value) bool {
        v_0 := v.Args[0]
        // match: (SHRLconst [1] (SHLLconst [1] x))
-       // result: (BTRLconst [31] x)
+       // result: (ANDLconst [0x7fffffff] x)
        for {
                if auxIntToInt8(v.AuxInt) != 1 || v_0.Op != OpAMD64SHLLconst || auxIntToInt8(v_0.AuxInt) != 1 {
                        break
                }
                x := v_0.Args[0]
-               v.reset(OpAMD64BTRLconst)
-               v.AuxInt = int8ToAuxInt(31)
+               v.reset(OpAMD64ANDLconst)
+               v.AuxInt = int32ToAuxInt(0x7fffffff)
                v.AddArg(x)
                return true
        }
@@ -23450,26 +23070,6 @@ func rewriteValueAMD64_OpAMD64XORL(v *Value) bool {
                }
                break
        }
-       // match: (XORL (MOVLconst [c]) x)
-       // cond: isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128
-       // result: (BTCLconst [int8(log32(c))] x)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       if v_0.Op != OpAMD64MOVLconst {
-                               continue
-                       }
-                       c := auxIntToInt32(v_0.AuxInt)
-                       x := v_1
-                       if !(isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128) {
-                               continue
-                       }
-                       v.reset(OpAMD64BTCLconst)
-                       v.AuxInt = int8ToAuxInt(int8(log32(c)))
-                       v.AddArg(x)
-                       return true
-               }
-               break
-       }
        // match: (XORL x (MOVLconst [c]))
        // result: (XORLconst [c] x)
        for {
@@ -23541,20 +23141,6 @@ func rewriteValueAMD64_OpAMD64XORL(v *Value) bool {
 }
 func rewriteValueAMD64_OpAMD64XORLconst(v *Value) bool {
        v_0 := v.Args[0]
-       // match: (XORLconst [c] x)
-       // cond: isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128
-       // result: (BTCLconst [int8(log32(c))] x)
-       for {
-               c := auxIntToInt32(v.AuxInt)
-               x := v_0
-               if !(isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128) {
-                       break
-               }
-               v.reset(OpAMD64BTCLconst)
-               v.AuxInt = int8ToAuxInt(int8(log32(c)))
-               v.AddArg(x)
-               return true
-       }
        // match: (XORLconst [1] (SETNE x))
        // result: (SETEQ x)
        for {
@@ -23679,20 +23265,6 @@ func rewriteValueAMD64_OpAMD64XORLconst(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (XORLconst [c] (BTCLconst [d] x))
-       // result: (XORLconst [c ^ 1<<uint32(d)] x)
-       for {
-               c := auxIntToInt32(v.AuxInt)
-               if v_0.Op != OpAMD64BTCLconst {
-                       break
-               }
-               d := auxIntToInt8(v_0.AuxInt)
-               x := v_0.Args[0]
-               v.reset(OpAMD64XORLconst)
-               v.AuxInt = int32ToAuxInt(c ^ 1<<uint32(d))
-               v.AddArg(x)
-               return true
-       }
        // match: (XORLconst [c] x)
        // cond: c==0
        // result: x
@@ -23914,7 +23486,7 @@ func rewriteValueAMD64_OpAMD64XORQ(v *Value) bool {
                break
        }
        // match: (XORQ (MOVQconst [c]) x)
-       // cond: isUint64PowerOfTwo(c) && uint64(c) >= 128
+       // cond: isUint64PowerOfTwo(c) && uint64(c) >= 1<<31
        // result: (BTCQconst [int8(log64(c))] x)
        for {
                for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
@@ -23923,7 +23495,7 @@ func rewriteValueAMD64_OpAMD64XORQ(v *Value) bool {
                        }
                        c := auxIntToInt64(v_0.AuxInt)
                        x := v_1
-                       if !(isUint64PowerOfTwo(c) && uint64(c) >= 128) {
+                       if !(isUint64PowerOfTwo(c) && uint64(c) >= 1<<31) {
                                continue
                        }
                        v.reset(OpAMD64BTCQconst)
@@ -24008,20 +23580,6 @@ func rewriteValueAMD64_OpAMD64XORQ(v *Value) bool {
 }
 func rewriteValueAMD64_OpAMD64XORQconst(v *Value) bool {
        v_0 := v.Args[0]
-       // match: (XORQconst [c] x)
-       // cond: isUint64PowerOfTwo(int64(c)) && uint64(c) >= 128
-       // result: (BTCQconst [int8(log32(c))] x)
-       for {
-               c := auxIntToInt32(v.AuxInt)
-               x := v_0
-               if !(isUint64PowerOfTwo(int64(c)) && uint64(c) >= 128) {
-                       break
-               }
-               v.reset(OpAMD64BTCQconst)
-               v.AuxInt = int8ToAuxInt(int8(log32(c)))
-               v.AddArg(x)
-               return true
-       }
        // match: (XORQconst [c] (XORQconst [d] x))
        // result: (XORQconst [c ^ d] x)
        for {
@@ -24036,24 +23594,6 @@ func rewriteValueAMD64_OpAMD64XORQconst(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (XORQconst [c] (BTCQconst [d] x))
-       // cond: is32Bit(int64(c) ^ 1<<uint32(d))
-       // result: (XORQconst [c ^ 1<<uint32(d)] x)
-       for {
-               c := auxIntToInt32(v.AuxInt)
-               if v_0.Op != OpAMD64BTCQconst {
-                       break
-               }
-               d := auxIntToInt8(v_0.AuxInt)
-               x := v_0.Args[0]
-               if !(is32Bit(int64(c) ^ 1<<uint32(d))) {
-                       break
-               }
-               v.reset(OpAMD64XORQconst)
-               v.AuxInt = int32ToAuxInt(c ^ 1<<uint32(d))
-               v.AddArg(x)
-               return true
-       }
        // match: (XORQconst [0] x)
        // result: x
        for {
@@ -25670,12 +25210,12 @@ func rewriteValueAMD64_OpCtz16(v *Value) bool {
        b := v.Block
        typ := &b.Func.Config.Types
        // match: (Ctz16 x)
-       // result: (BSFL (BTSLconst <typ.UInt32> [16] x))
+       // result: (BSFL (ORLconst <typ.UInt32> [1<<16] x))
        for {
                x := v_0
                v.reset(OpAMD64BSFL)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTSLconst, typ.UInt32)
-               v0.AuxInt = int8ToAuxInt(16)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORLconst, typ.UInt32)
+               v0.AuxInt = int32ToAuxInt(1 << 16)
                v0.AddArg(x)
                v.AddArg(v0)
                return true
@@ -25848,12 +25388,12 @@ func rewriteValueAMD64_OpCtz8(v *Value) bool {
        b := v.Block
        typ := &b.Func.Config.Types
        // match: (Ctz8 x)
-       // result: (BSFL (BTSLconst <typ.UInt32> [ 8] x))
+       // result: (BSFL (ORLconst <typ.UInt32> [1<<8 ] x))
        for {
                x := v_0
                v.reset(OpAMD64BSFL)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTSLconst, typ.UInt32)
-               v0.AuxInt = int8ToAuxInt(8)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORLconst, typ.UInt32)
+               v0.AuxInt = int32ToAuxInt(1 << 8)
                v0.AddArg(x)
                v.AddArg(v0)
                return true
index 018f5b909e6d64942c07821d451545114510765d..88d5ebe9cf0949b0e9eae80323bbd934b5dacfaa 100644 (file)
@@ -220,10 +220,10 @@ func biton32(a, b uint32) (n uint32) {
        // amd64:"BTSL"
        n += b | (1 << (a & 31))
 
-       // amd64:"BTSL\t[$]31"
+       // amd64:"ORL\t[$]-2147483648"
        n += a | (1 << 31)
 
-       // amd64:"BTSL\t[$]28"
+       // amd64:"ORL\t[$]268435456"
        n += a | (1 << 28)
 
        // amd64:"ORL\t[$]1"
@@ -236,10 +236,10 @@ func bitoff32(a, b uint32) (n uint32) {
        // amd64:"BTRL"
        n += b &^ (1 << (a & 31))
 
-       // amd64:"BTRL\t[$]31"
+       // amd64:"ANDL\t[$]2147483647"
        n += a &^ (1 << 31)
 
-       // amd64:"BTRL\t[$]28"
+       // amd64:"ANDL\t[$]-268435457"
        n += a &^ (1 << 28)
 
        // amd64:"ANDL\t[$]-2"
@@ -252,10 +252,10 @@ func bitcompl32(a, b uint32) (n uint32) {
        // amd64:"BTCL"
        n += b ^ (1 << (a & 31))
 
-       // amd64:"BTCL\t[$]31"
+       // amd64:"XORL\t[$]-2147483648"
        n += a ^ (1 << 31)
 
-       // amd64:"BTCL\t[$]28"
+       // amd64:"XORL\t[$]268435456"
        n += a ^ (1 << 28)
 
        // amd64:"XORL\t[$]1"
index 797aa23b678733284e8c05576109966262211e6a..d80bfaeec07d9693540191cff90e256f7c812e50 100644 (file)
@@ -335,7 +335,7 @@ func TrailingZeros32(n uint32) int {
 }
 
 func TrailingZeros16(n uint16) int {
-       // amd64:"BSFL","BTSL\\t\\$16"
+       // amd64:"BSFL","ORL\\t\\$65536"
        // 386:"BSFL\t"
        // arm:"ORR\t\\$65536","CLZ",-"MOVHU\tR"
        // arm64:"ORR\t\\$65536","RBITW","CLZW",-"MOVHU\tR",-"RBIT\t",-"CLZ\t"
@@ -347,7 +347,7 @@ func TrailingZeros16(n uint16) int {
 }
 
 func TrailingZeros8(n uint8) int {
-       // amd64:"BSFL","BTSL\\t\\$8"
+       // amd64:"BSFL","ORL\\t\\$256"
        // 386:"BSFL"
        // arm:"ORR\t\\$256","CLZ",-"MOVBU\tR"
        // arm64:"ORR\t\\$256","RBITW","CLZW",-"MOVBU\tR",-"RBIT\t",-"CLZ\t"
index f6cf9450a1eb455b58f130c8d8bc46f3e8081160..e5e89c2acc9f4470a3489d47c6bdc0b8dc8fdc65 100644 (file)
@@ -372,3 +372,32 @@ func storeTest(a []bool, v int, i int) {
        // amd64: `BTL\t\$1,`,`SETCS\t3\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
        a[3+i] = v&2 != 0
 }
+
+func bitOps(p *[12]uint64) {
+       // amd64: `ORQ\t\$8, \(AX\)`
+       p[0] |= 8
+       // amd64: `ORQ\t\$1073741824, 8\(AX\)`
+       p[1] |= 1 << 30
+       // amd64: `BTSQ\t\$31, 16\(AX\)`
+       p[2] |= 1 << 31
+       // amd64: `BTSQ\t\$63, 24\(AX\)`
+       p[3] |= 1 << 63
+
+       // amd64: `ANDQ\t\$-9, 32\(AX\)`
+       p[4] &^= 8
+       // amd64: `ANDQ\t\$-1073741825, 40\(AX\)`
+       p[5] &^= 1 << 30
+       // amd64: `BTRQ\t\$31, 48\(AX\)`
+       p[6] &^= 1 << 31
+       // amd64: `BTRQ\t\$63, 56\(AX\)`
+       p[7] &^= 1 << 63
+
+       // amd64: `XORQ\t\$8, 64\(AX\)`
+       p[8] ^= 8
+       // amd64: `XORQ\t\$1073741824, 72\(AX\)`
+       p[9] ^= 1 << 30
+       // amd64: `BTCQ\t\$31, 80\(AX\)`
+       p[10] ^= 1 << 31
+       // amd64: `BTCQ\t\$63, 88\(AX\)`
+       p[11] ^= 1 << 63
+}