p.To.Type = obj.TYPE_REG
p.To.Reg = r
- case ssa.OpAMD64CMOVQEQconst, ssa.OpAMD64CMOVLEQconst, ssa.OpAMD64CMOVWEQconst,
- ssa.OpAMD64CMOVQNEconst, ssa.OpAMD64CMOVLNEconst, ssa.OpAMD64CMOVWNEconst:
+ case ssa.OpAMD64CMOVQEQ, ssa.OpAMD64CMOVLEQ:
r := gc.SSARegNum(v)
if r != gc.SSARegNum(v.Args[0]) {
v.Fatalf("input[0] and output not in same register %s", v.LongString())
}
-
- // Constant into AX
- p := gc.Prog(moveByType(v.Type))
- p.From.Type = obj.TYPE_CONST
- p.From.Offset = v.AuxInt
- p.To.Type = obj.TYPE_REG
- p.To.Reg = x86.REG_AX
-
- p = gc.Prog(v.Op.Asm())
+ p := gc.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
- p.From.Reg = x86.REG_AX
+ p.From.Reg = gc.SSARegNum(v.Args[1])
p.To.Type = obj.TYPE_REG
p.To.Reg = r
p := gc.Prog(v.Op.Asm())
p.To.Type = obj.TYPE_REG
p.To.Reg = r
- case ssa.OpAMD64BSFQ, ssa.OpAMD64BSFL, ssa.OpAMD64BSFW,
- ssa.OpAMD64BSRQ, ssa.OpAMD64BSRL, ssa.OpAMD64BSRW,
- ssa.OpAMD64SQRTSD:
+ case ssa.OpAMD64BSFQ, ssa.OpAMD64BSFL:
+ p := gc.Prog(v.Op.Asm())
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = gc.SSARegNum(v.Args[0])
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = gc.SSARegNum0(v)
+ case ssa.OpAMD64SQRTSD:
p := gc.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = gc.SSARegNum(v.Args[0])
if s != nil && s.Pkg != nil && s.Pkg.Path == "runtime/internal/sys" {
switch s.Name {
case
- "Ctz64", "Ctz32", "Ctz16",
+ "Ctz64", "Ctz32",
"Bswap64", "Bswap32":
return true
}
result = s.newValue1(ssa.OpCtz64, Types[TUINT64], s.intrinsicFirstArg(n))
case "Ctz32":
result = s.newValue1(ssa.OpCtz32, Types[TUINT32], s.intrinsicFirstArg(n))
- case "Ctz16":
- result = s.newValue1(ssa.OpCtz16, Types[TUINT16], s.intrinsicFirstArg(n))
case "Bswap64":
result = s.newValue1(ssa.OpBswap64, Types[TUINT64], s.intrinsicFirstArg(n))
case "Bswap32":
(OffPtr [off] ptr) && config.PtrSize == 4 -> (ADDLconst [off] ptr)
// Lowering other arithmetic
-// TODO: CMPQconst 0 below is redundant because BSF sets Z but how to remove?
-(Ctz64 <t> x) -> (CMOVQEQconst (BSFQ <t> x) (CMPQconst x [0]) [64])
-(Ctz32 <t> x) -> (CMOVLEQconst (BSFL <t> x) (CMPLconst x [0]) [32])
-(Ctz16 <t> x) -> (CMOVWEQconst (BSFW <t> x) (CMPWconst x [0]) [16])
+(Ctz64 <t> x) -> (CMOVQEQ (Select0 <t> (BSFQ x)) (MOVQconst <t> [64]) (Select1 <TypeFlags> (BSFQ x)))
+(Ctz32 <t> x) -> (CMOVLEQ (Select0 <t> (BSFL x)) (MOVLconst <t> [32]) (Select1 <TypeFlags> (BSFL x)))
(Bswap64 x) -> (BSWAPQ x)
(Bswap32 x) -> (BSWAPL x)
(CMPWconst x [0]) -> (TESTW x x)
(CMPBconst x [0]) -> (TESTB x x)
-// Optimizing conditional moves
-(CMOVQEQconst x (InvertFlags y) [c]) -> (CMOVQNEconst x y [c])
-(CMOVLEQconst x (InvertFlags y) [c]) -> (CMOVLNEconst x y [c])
-(CMOVWEQconst x (InvertFlags y) [c]) -> (CMOVWNEconst x y [c])
-
-(CMOVQEQconst _ (FlagEQ) [c]) -> (Const64 [c])
-(CMOVLEQconst _ (FlagEQ) [c]) -> (Const32 [c])
-(CMOVWEQconst _ (FlagEQ) [c]) -> (Const16 [c])
-
-(CMOVQEQconst x (FlagLT_ULT)) -> x
-(CMOVLEQconst x (FlagLT_ULT)) -> x
-(CMOVWEQconst x (FlagLT_ULT)) -> x
-
-(CMOVQEQconst x (FlagLT_UGT)) -> x
-(CMOVLEQconst x (FlagLT_UGT)) -> x
-(CMOVWEQconst x (FlagLT_UGT)) -> x
-
-(CMOVQEQconst x (FlagGT_ULT)) -> x
-(CMOVLEQconst x (FlagGT_ULT)) -> x
-(CMOVWEQconst x (FlagGT_ULT)) -> x
-
-(CMOVQEQconst x (FlagGT_UGT)) -> x
-(CMOVLEQconst x (FlagGT_UGT)) -> x
-(CMOVWEQconst x (FlagGT_UGT)) -> x
-
// Combining byte loads into larger (unaligned) loads.
// There are many ways these combinations could occur. This is
// designed to match the way encoding/binary.LittleEndian does it.
gp1flags = regInfo{inputs: []regMask{gpsp}}
flagsgp = regInfo{inputs: nil, outputs: gponly}
- // for CMOVconst -- uses AX to hold constant temporary.
- gp1flagsgp = regInfo{inputs: []regMask{gp &^ ax}, clobbers: ax, outputs: []regMask{gp &^ ax}}
+ gp11flags = regInfo{inputs: []regMask{gp}, outputs: []regMask{gp, 0}}
readflags = regInfo{inputs: nil, outputs: gponly}
flagsgpax = regInfo{inputs: nil, clobbers: ax, outputs: []regMask{gp &^ ax}}
{name: "NOTQ", argLength: 1, reg: gp11, asm: "NOTQ", resultInArg0: true, clobberFlags: true}, // ^arg0
{name: "NOTL", argLength: 1, reg: gp11, asm: "NOTL", resultInArg0: true, clobberFlags: true}, // ^arg0
- {name: "BSFQ", argLength: 1, reg: gp11, asm: "BSFQ", clobberFlags: true}, // arg0 # of low-order zeroes ; undef if zero
- {name: "BSFL", argLength: 1, reg: gp11, asm: "BSFL", clobberFlags: true}, // arg0 # of low-order zeroes ; undef if zero
- {name: "BSFW", argLength: 1, reg: gp11, asm: "BSFW", clobberFlags: true}, // arg0 # of low-order zeroes ; undef if zero
-
- {name: "BSRQ", argLength: 1, reg: gp11, asm: "BSRQ", clobberFlags: true}, // arg0 # of high-order zeroes ; undef if zero
- {name: "BSRL", argLength: 1, reg: gp11, asm: "BSRL", clobberFlags: true}, // arg0 # of high-order zeroes ; undef if zero
- {name: "BSRW", argLength: 1, reg: gp11, asm: "BSRW", clobberFlags: true}, // arg0 # of high-order zeroes ; undef if zero
+ // BSF{L,Q} returns a tuple [result, flags]
+ // result is undefined if the input is zero.
+ // flags are set to "equal" if the input is zero, "not equal" otherwise.
+ {name: "BSFQ", argLength: 1, reg: gp11flags, asm: "BSFQ", typ: "(UInt64,Flags)"}, // # of low-order zeroes in 64-bit arg
+ {name: "BSFL", argLength: 1, reg: gp11flags, asm: "BSFL", typ: "(UInt32,Flags)"}, // # of low-order zeroes in 32-bit arg
// Note ASM for ops moves whole register
- {name: "CMOVQEQconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVQEQ", typ: "UInt64", aux: "Int64", resultInArg0: true, clobberFlags: true}, // replace arg0 w/ constant if Z set
- {name: "CMOVLEQconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVLEQ", typ: "UInt32", aux: "Int32", resultInArg0: true, clobberFlags: true}, // replace arg0 w/ constant if Z set
- {name: "CMOVWEQconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVLEQ", typ: "UInt16", aux: "Int16", resultInArg0: true, clobberFlags: true}, // replace arg0 w/ constant if Z set
- {name: "CMOVQNEconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVQNE", typ: "UInt64", aux: "Int64", resultInArg0: true, clobberFlags: true}, // replace arg0 w/ constant if Z not set
- {name: "CMOVLNEconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVLNE", typ: "UInt32", aux: "Int32", resultInArg0: true, clobberFlags: true}, // replace arg0 w/ constant if Z not set
- {name: "CMOVWNEconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVLNE", typ: "UInt16", aux: "Int16", resultInArg0: true, clobberFlags: true}, // replace arg0 w/ constant if Z not set
+ //
+ {name: "CMOVQEQ", argLength: 3, reg: gp21, asm: "CMOVQEQ", resultInArg0: true}, // if arg2 encodes "equal" return arg1 else arg0
+ {name: "CMOVLEQ", argLength: 3, reg: gp21, asm: "CMOVLEQ", resultInArg0: true}, // if arg2 encodes "equal" return arg1 else arg0
{name: "BSWAPQ", argLength: 1, reg: gp11, asm: "BSWAPQ", resultInArg0: true, clobberFlags: true}, // arg0 swap bytes
{name: "BSWAPL", argLength: 1, reg: gp11, asm: "BSWAPL", resultInArg0: true, clobberFlags: true}, // arg0 swap bytes
{name: "Com32", argLength: 1},
{name: "Com64", argLength: 1},
- {name: "Ctz16", argLength: 1}, // Count trailing (low order) zeroes (returns 0-16)
- {name: "Ctz32", argLength: 1}, // Count trailing zeroes (returns 0-32)
+ {name: "Ctz32", argLength: 1}, // Count trailing (low order) zeroes (returns 0-32)
{name: "Ctz64", argLength: 1}, // Count trailing zeroes (returns 0-64)
- {name: "Clz16", argLength: 1}, // Count leading (high order) zeroes (returns 0-16)
- {name: "Clz32", argLength: 1}, // Count leading zeroes (returns 0-32)
- {name: "Clz64", argLength: 1}, // Count leading zeroes (returns 0-64)
-
{name: "Bswap32", argLength: 1}, // Swap bytes
{name: "Bswap64", argLength: 1}, // Swap bytes
OpAMD64NOTL
OpAMD64BSFQ
OpAMD64BSFL
- OpAMD64BSFW
- OpAMD64BSRQ
- OpAMD64BSRL
- OpAMD64BSRW
- OpAMD64CMOVQEQconst
- OpAMD64CMOVLEQconst
- OpAMD64CMOVWEQconst
- OpAMD64CMOVQNEconst
- OpAMD64CMOVLNEconst
- OpAMD64CMOVWNEconst
+ OpAMD64CMOVQEQ
+ OpAMD64CMOVLEQ
OpAMD64BSWAPQ
OpAMD64BSWAPL
OpAMD64SQRTSD
OpCom16
OpCom32
OpCom64
- OpCtz16
OpCtz32
OpCtz64
- OpClz16
- OpClz32
- OpClz64
OpBswap32
OpBswap64
OpSqrt
},
},
{
- name: "BSFQ",
- argLen: 1,
- clobberFlags: true,
- asm: x86.ABSFQ,
- reg: regInfo{
- inputs: []inputInfo{
- {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
- },
- outputs: []outputInfo{
- {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
- },
- },
- },
- {
- name: "BSFL",
- argLen: 1,
- clobberFlags: true,
- asm: x86.ABSFL,
- reg: regInfo{
- inputs: []inputInfo{
- {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
- },
- outputs: []outputInfo{
- {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
- },
- },
- },
- {
- name: "BSFW",
- argLen: 1,
- clobberFlags: true,
- asm: x86.ABSFW,
- reg: regInfo{
- inputs: []inputInfo{
- {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
- },
- outputs: []outputInfo{
- {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
- },
- },
- },
- {
- name: "BSRQ",
- argLen: 1,
- clobberFlags: true,
- asm: x86.ABSRQ,
- reg: regInfo{
- inputs: []inputInfo{
- {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
- },
- outputs: []outputInfo{
- {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
- },
- },
- },
- {
- name: "BSRL",
- argLen: 1,
- clobberFlags: true,
- asm: x86.ABSRL,
+ name: "BSFQ",
+ argLen: 1,
+ asm: x86.ABSFQ,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
+ {1, 0},
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
- name: "BSRW",
- argLen: 1,
- clobberFlags: true,
- asm: x86.ABSRW,
+ name: "BSFL",
+ argLen: 1,
+ asm: x86.ABSFL,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
+ {1, 0},
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
- name: "CMOVQEQconst",
- auxType: auxInt64,
- argLen: 2,
+ name: "CMOVQEQ",
+ argLen: 3,
resultInArg0: true,
- clobberFlags: true,
asm: x86.ACMOVQEQ,
reg: regInfo{
inputs: []inputInfo{
- {0, 65518}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
- },
- clobbers: 1, // AX
- outputs: []outputInfo{
- {0, 65518}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
- },
- },
- },
- {
- name: "CMOVLEQconst",
- auxType: auxInt32,
- argLen: 2,
- resultInArg0: true,
- clobberFlags: true,
- asm: x86.ACMOVLEQ,
- reg: regInfo{
- inputs: []inputInfo{
- {0, 65518}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
- clobbers: 1, // AX
outputs: []outputInfo{
- {0, 65518}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
- name: "CMOVWEQconst",
- auxType: auxInt16,
- argLen: 2,
+ name: "CMOVLEQ",
+ argLen: 3,
resultInArg0: true,
- clobberFlags: true,
asm: x86.ACMOVLEQ,
reg: regInfo{
inputs: []inputInfo{
- {0, 65518}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
- },
- clobbers: 1, // AX
- outputs: []outputInfo{
- {0, 65518}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
- },
- },
- },
- {
- name: "CMOVQNEconst",
- auxType: auxInt64,
- argLen: 2,
- resultInArg0: true,
- clobberFlags: true,
- asm: x86.ACMOVQNE,
- reg: regInfo{
- inputs: []inputInfo{
- {0, 65518}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
- },
- clobbers: 1, // AX
- outputs: []outputInfo{
- {0, 65518}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
- },
- },
- },
- {
- name: "CMOVLNEconst",
- auxType: auxInt32,
- argLen: 2,
- resultInArg0: true,
- clobberFlags: true,
- asm: x86.ACMOVLNE,
- reg: regInfo{
- inputs: []inputInfo{
- {0, 65518}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
- clobbers: 1, // AX
outputs: []outputInfo{
- {0, 65518}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
- },
- },
- },
- {
- name: "CMOVWNEconst",
- auxType: auxInt16,
- argLen: 2,
- resultInArg0: true,
- clobberFlags: true,
- asm: x86.ACMOVLNE,
- reg: regInfo{
- inputs: []inputInfo{
- {0, 65518}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
- },
- clobbers: 1, // AX
- outputs: []outputInfo{
- {0, 65518}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
argLen: 1,
generic: true,
},
- {
- name: "Ctz16",
- argLen: 1,
- generic: true,
- },
{
name: "Ctz32",
argLen: 1,
argLen: 1,
generic: true,
},
- {
- name: "Clz16",
- argLen: 1,
- generic: true,
- },
- {
- name: "Clz32",
- argLen: 1,
- generic: true,
- },
- {
- name: "Clz64",
- argLen: 1,
- generic: true,
- },
{
name: "Bswap32",
argLen: 1,
return rewriteValueAMD64_OpAMD64ANDQ(v, config)
case OpAMD64ANDQconst:
return rewriteValueAMD64_OpAMD64ANDQconst(v, config)
- case OpAMD64CMOVLEQconst:
- return rewriteValueAMD64_OpAMD64CMOVLEQconst(v, config)
- case OpAMD64CMOVQEQconst:
- return rewriteValueAMD64_OpAMD64CMOVQEQconst(v, config)
- case OpAMD64CMOVWEQconst:
- return rewriteValueAMD64_OpAMD64CMOVWEQconst(v, config)
case OpAMD64CMPB:
return rewriteValueAMD64_OpAMD64CMPB(v, config)
case OpAMD64CMPBconst:
return rewriteValueAMD64_OpConstNil(v, config)
case OpConvert:
return rewriteValueAMD64_OpConvert(v, config)
- case OpCtz16:
- return rewriteValueAMD64_OpCtz16(v, config)
case OpCtz32:
return rewriteValueAMD64_OpCtz32(v, config)
case OpCtz64:
}
return false
}
-func rewriteValueAMD64_OpAMD64CMOVLEQconst(v *Value, config *Config) bool {
- b := v.Block
- _ = b
- // match: (CMOVLEQconst x (InvertFlags y) [c])
- // cond:
- // result: (CMOVLNEconst x y [c])
- for {
- c := v.AuxInt
- x := v.Args[0]
- v_1 := v.Args[1]
- if v_1.Op != OpAMD64InvertFlags {
- break
- }
- y := v_1.Args[0]
- v.reset(OpAMD64CMOVLNEconst)
- v.AuxInt = c
- v.AddArg(x)
- v.AddArg(y)
- return true
- }
- // match: (CMOVLEQconst _ (FlagEQ) [c])
- // cond:
- // result: (Const32 [c])
- for {
- c := v.AuxInt
- v_1 := v.Args[1]
- if v_1.Op != OpAMD64FlagEQ {
- break
- }
- v.reset(OpConst32)
- v.AuxInt = c
- return true
- }
- // match: (CMOVLEQconst x (FlagLT_ULT))
- // cond:
- // result: x
- for {
- x := v.Args[0]
- v_1 := v.Args[1]
- if v_1.Op != OpAMD64FlagLT_ULT {
- break
- }
- v.reset(OpCopy)
- v.Type = x.Type
- v.AddArg(x)
- return true
- }
- // match: (CMOVLEQconst x (FlagLT_UGT))
- // cond:
- // result: x
- for {
- x := v.Args[0]
- v_1 := v.Args[1]
- if v_1.Op != OpAMD64FlagLT_UGT {
- break
- }
- v.reset(OpCopy)
- v.Type = x.Type
- v.AddArg(x)
- return true
- }
- // match: (CMOVLEQconst x (FlagGT_ULT))
- // cond:
- // result: x
- for {
- x := v.Args[0]
- v_1 := v.Args[1]
- if v_1.Op != OpAMD64FlagGT_ULT {
- break
- }
- v.reset(OpCopy)
- v.Type = x.Type
- v.AddArg(x)
- return true
- }
- // match: (CMOVLEQconst x (FlagGT_UGT))
- // cond:
- // result: x
- for {
- x := v.Args[0]
- v_1 := v.Args[1]
- if v_1.Op != OpAMD64FlagGT_UGT {
- break
- }
- v.reset(OpCopy)
- v.Type = x.Type
- v.AddArg(x)
- return true
- }
- return false
-}
-func rewriteValueAMD64_OpAMD64CMOVQEQconst(v *Value, config *Config) bool {
- b := v.Block
- _ = b
- // match: (CMOVQEQconst x (InvertFlags y) [c])
- // cond:
- // result: (CMOVQNEconst x y [c])
- for {
- c := v.AuxInt
- x := v.Args[0]
- v_1 := v.Args[1]
- if v_1.Op != OpAMD64InvertFlags {
- break
- }
- y := v_1.Args[0]
- v.reset(OpAMD64CMOVQNEconst)
- v.AuxInt = c
- v.AddArg(x)
- v.AddArg(y)
- return true
- }
- // match: (CMOVQEQconst _ (FlagEQ) [c])
- // cond:
- // result: (Const64 [c])
- for {
- c := v.AuxInt
- v_1 := v.Args[1]
- if v_1.Op != OpAMD64FlagEQ {
- break
- }
- v.reset(OpConst64)
- v.AuxInt = c
- return true
- }
- // match: (CMOVQEQconst x (FlagLT_ULT))
- // cond:
- // result: x
- for {
- x := v.Args[0]
- v_1 := v.Args[1]
- if v_1.Op != OpAMD64FlagLT_ULT {
- break
- }
- v.reset(OpCopy)
- v.Type = x.Type
- v.AddArg(x)
- return true
- }
- // match: (CMOVQEQconst x (FlagLT_UGT))
- // cond:
- // result: x
- for {
- x := v.Args[0]
- v_1 := v.Args[1]
- if v_1.Op != OpAMD64FlagLT_UGT {
- break
- }
- v.reset(OpCopy)
- v.Type = x.Type
- v.AddArg(x)
- return true
- }
- // match: (CMOVQEQconst x (FlagGT_ULT))
- // cond:
- // result: x
- for {
- x := v.Args[0]
- v_1 := v.Args[1]
- if v_1.Op != OpAMD64FlagGT_ULT {
- break
- }
- v.reset(OpCopy)
- v.Type = x.Type
- v.AddArg(x)
- return true
- }
- // match: (CMOVQEQconst x (FlagGT_UGT))
- // cond:
- // result: x
- for {
- x := v.Args[0]
- v_1 := v.Args[1]
- if v_1.Op != OpAMD64FlagGT_UGT {
- break
- }
- v.reset(OpCopy)
- v.Type = x.Type
- v.AddArg(x)
- return true
- }
- return false
-}
-func rewriteValueAMD64_OpAMD64CMOVWEQconst(v *Value, config *Config) bool {
- b := v.Block
- _ = b
- // match: (CMOVWEQconst x (InvertFlags y) [c])
- // cond:
- // result: (CMOVWNEconst x y [c])
- for {
- c := v.AuxInt
- x := v.Args[0]
- v_1 := v.Args[1]
- if v_1.Op != OpAMD64InvertFlags {
- break
- }
- y := v_1.Args[0]
- v.reset(OpAMD64CMOVWNEconst)
- v.AuxInt = c
- v.AddArg(x)
- v.AddArg(y)
- return true
- }
- // match: (CMOVWEQconst _ (FlagEQ) [c])
- // cond:
- // result: (Const16 [c])
- for {
- c := v.AuxInt
- v_1 := v.Args[1]
- if v_1.Op != OpAMD64FlagEQ {
- break
- }
- v.reset(OpConst16)
- v.AuxInt = c
- return true
- }
- // match: (CMOVWEQconst x (FlagLT_ULT))
- // cond:
- // result: x
- for {
- x := v.Args[0]
- v_1 := v.Args[1]
- if v_1.Op != OpAMD64FlagLT_ULT {
- break
- }
- v.reset(OpCopy)
- v.Type = x.Type
- v.AddArg(x)
- return true
- }
- // match: (CMOVWEQconst x (FlagLT_UGT))
- // cond:
- // result: x
- for {
- x := v.Args[0]
- v_1 := v.Args[1]
- if v_1.Op != OpAMD64FlagLT_UGT {
- break
- }
- v.reset(OpCopy)
- v.Type = x.Type
- v.AddArg(x)
- return true
- }
- // match: (CMOVWEQconst x (FlagGT_ULT))
- // cond:
- // result: x
- for {
- x := v.Args[0]
- v_1 := v.Args[1]
- if v_1.Op != OpAMD64FlagGT_ULT {
- break
- }
- v.reset(OpCopy)
- v.Type = x.Type
- v.AddArg(x)
- return true
- }
- // match: (CMOVWEQconst x (FlagGT_UGT))
- // cond:
- // result: x
- for {
- x := v.Args[0]
- v_1 := v.Args[1]
- if v_1.Op != OpAMD64FlagGT_UGT {
- break
- }
- v.reset(OpCopy)
- v.Type = x.Type
- v.AddArg(x)
- return true
- }
- return false
-}
func rewriteValueAMD64_OpAMD64CMPB(v *Value, config *Config) bool {
b := v.Block
_ = b
}
return false
}
-func rewriteValueAMD64_OpCtz16(v *Value, config *Config) bool {
- b := v.Block
- _ = b
- // match: (Ctz16 <t> x)
- // cond:
- // result: (CMOVWEQconst (BSFW <t> x) (CMPWconst x [0]) [16])
- for {
- t := v.Type
- x := v.Args[0]
- v.reset(OpAMD64CMOVWEQconst)
- v.AuxInt = 16
- v0 := b.NewValue0(v.Line, OpAMD64BSFW, t)
- v0.AddArg(x)
- v.AddArg(v0)
- v1 := b.NewValue0(v.Line, OpAMD64CMPWconst, TypeFlags)
- v1.AuxInt = 0
- v1.AddArg(x)
- v.AddArg(v1)
- return true
- }
-}
func rewriteValueAMD64_OpCtz32(v *Value, config *Config) bool {
b := v.Block
_ = b
// match: (Ctz32 <t> x)
// cond:
- // result: (CMOVLEQconst (BSFL <t> x) (CMPLconst x [0]) [32])
+ // result: (CMOVLEQ (Select0 <t> (BSFL x)) (MOVLconst <t> [32]) (Select1 <TypeFlags> (BSFL x)))
for {
t := v.Type
x := v.Args[0]
- v.reset(OpAMD64CMOVLEQconst)
- v.AuxInt = 32
- v0 := b.NewValue0(v.Line, OpAMD64BSFL, t)
- v0.AddArg(x)
- v.AddArg(v0)
- v1 := b.NewValue0(v.Line, OpAMD64CMPLconst, TypeFlags)
- v1.AuxInt = 0
+ v.reset(OpAMD64CMOVLEQ)
+ v0 := b.NewValue0(v.Line, OpSelect0, t)
+ v1 := b.NewValue0(v.Line, OpAMD64BSFL, MakeTuple(config.fe.TypeUInt32(), TypeFlags))
v1.AddArg(x)
- v.AddArg(v1)
+ v0.AddArg(v1)
+ v.AddArg(v0)
+ v2 := b.NewValue0(v.Line, OpAMD64MOVLconst, t)
+ v2.AuxInt = 32
+ v.AddArg(v2)
+ v3 := b.NewValue0(v.Line, OpSelect1, TypeFlags)
+ v4 := b.NewValue0(v.Line, OpAMD64BSFL, MakeTuple(config.fe.TypeUInt32(), TypeFlags))
+ v4.AddArg(x)
+ v3.AddArg(v4)
+ v.AddArg(v3)
return true
}
}
_ = b
// match: (Ctz64 <t> x)
// cond:
- // result: (CMOVQEQconst (BSFQ <t> x) (CMPQconst x [0]) [64])
+ // result: (CMOVQEQ (Select0 <t> (BSFQ x)) (MOVQconst <t> [64]) (Select1 <TypeFlags> (BSFQ x)))
for {
t := v.Type
x := v.Args[0]
- v.reset(OpAMD64CMOVQEQconst)
- v.AuxInt = 64
- v0 := b.NewValue0(v.Line, OpAMD64BSFQ, t)
- v0.AddArg(x)
- v.AddArg(v0)
- v1 := b.NewValue0(v.Line, OpAMD64CMPQconst, TypeFlags)
- v1.AuxInt = 0
+ v.reset(OpAMD64CMOVQEQ)
+ v0 := b.NewValue0(v.Line, OpSelect0, t)
+ v1 := b.NewValue0(v.Line, OpAMD64BSFQ, MakeTuple(config.fe.TypeUInt64(), TypeFlags))
v1.AddArg(x)
- v.AddArg(v1)
+ v0.AddArg(v1)
+ v.AddArg(v0)
+ v2 := b.NewValue0(v.Line, OpAMD64MOVQconst, t)
+ v2.AuxInt = 64
+ v.AddArg(v2)
+ v3 := b.NewValue0(v.Line, OpSelect1, TypeFlags)
+ v4 := b.NewValue0(v.Line, OpAMD64BSFQ, MakeTuple(config.fe.TypeUInt64(), TypeFlags))
+ v4.AddArg(x)
+ v3.AddArg(v4)
+ v.AddArg(v3)
return true
}
}
30, 9, 19, 24, 29, 18, 28, 27,
}
-const deBruijn16 = 0x09af
-
-var deBruijnIdx16 = [16]byte{
- 0, 1, 2, 5, 3, 9, 6, 11,
- 15, 4, 8, 10, 14, 7, 13, 12,
-}
-
-const deBruijn8 = 0x17
-
-var deBruijnIdx8 = [8]byte{
- 0, 1, 2, 4, 7, 3, 6, 5,
-}
-
// Ctz64 counts trailing (low-order) zeroes,
// and if all are zero, then 64.
func Ctz64(x uint64) uint64 {
return y + z
}
-// Ctz16 counts trailing (low-order) zeroes,
-// and if all are zero, then 16.
-func Ctz16(x uint16) uint16 {
- x &= -x // isolate low-order bit
- y := x * deBruijn16 >> 12 // extract part of deBruijn sequence
- y = uint16(deBruijnIdx16[y]) // convert to bit index
- z := (x - 1) >> 11 & 16 // adjustment if zero
- return y + z
-}
-
-// Ctz8 counts trailing (low-order) zeroes,
-// and if all are zero, then 8.
-func Ctz8(x uint8) uint8 {
- x &= -x // isolate low-order bit
- y := x * deBruijn8 >> 5 // extract part of deBruijn sequence
- y = uint8(deBruijnIdx8[y]) // convert to bit index
- z := (x - 1) >> 4 & 8 // adjustment if zero
- return y + z
-}
-
// Bswap64 returns its input with byte order reversed
// 0x0102030405060708 -> 0x0807060504030201
func Bswap64(x uint64) uint64 {
MOVL AX, ret+4(FP)
RET
-TEXT runtime∕internal∕sys·Ctz16(SB), NOSPLIT, $0-6
- MOVW x+0(FP), AX
- BSFW AX, AX
- JNZ 2(PC)
- MOVW $16, AX
- MOVW AX, ret+4(FP)
- RET
-
-TEXT runtime∕internal∕sys·Ctz8(SB), NOSPLIT, $0-5
- MOVBLZX x+0(FP), AX
- BSFL AX, AX
- JNZ 2(PC)
- MOVB $8, AX
- MOVB AX, ret+4(FP)
- RET
-
TEXT runtime∕internal∕sys·Bswap64(SB), NOSPLIT, $0-16
MOVL x_lo+0(FP), AX
MOVL x_hi+4(FP), BX
func Ctz64(x uint64) uint64
func Ctz32(x uint32) uint32
-func Ctz16(x uint16) uint16
-func Ctz8(x uint8) uint8
func Bswap64(x uint64) uint64
func Bswap32(x uint32) uint32
}
}
}
-func TestCtz16(t *testing.T) {
- for i := uint(0); i <= 16; i++ {
- x := uint16(5) << i
- if got := sys.Ctz16(x); got != uint16(i) {
- t.Errorf("Ctz16(%d)=%d, want %d", x, got, i)
- }
- }
-}
-func TestCtz8(t *testing.T) {
- for i := uint(0); i <= 8; i++ {
- x := uint8(5) << i
- if got := sys.Ctz8(x); got != uint8(i) {
- t.Errorf("Ctz8(%d)=%d, want %d", x, got, i)
- }
- }
-}
func TestBswap64(t *testing.T) {
x := uint64(0x1122334455667788)
logf("Ctz32(0x%x) expected %d but got %d\n", x32, i, t32)
}
}
- if i <= 16 {
- x16 := uint16(x)
- t16 := T.Ctz16(x16) // ERROR "intrinsic substitution for Ctz16"
- if uint16(i) != t16 {
- logf("Ctz16(0x%x) expected %d but got %d\n", x16, i, t16)
- }
- x16 = -x16
- t16 = T.Ctz16(x16) // ERROR "intrinsic substitution for Ctz16"
- if uint16(i) != t16 {
- logf("Ctz16(0x%x) expected %d but got %d\n", x16, i, t16)
- }
- }
}
func main() {
}
// Zero is a special case, be sure it is done right.
- if T.Ctz16(0) != 16 { // ERROR "intrinsic substitution for Ctz16"
- logf("ctz16(0) != 16")
- }
if T.Ctz32(0) != 32 { // ERROR "intrinsic substitution for Ctz32"
logf("ctz32(0) != 32")
}