]> Cypherpunks.ru repositories - gostls13.git/commitdiff
cmd/compile: intrinsify RotateLeft{32,64} on loong64
authorWayne Zuo <wdvxdr@golangcn.org>
Tue, 9 Aug 2022 15:53:37 +0000 (23:53 +0800)
committerDavid Chase <drchase@google.com>
Tue, 30 Aug 2022 03:21:44 +0000 (03:21 +0000)
Benchmark on crypto/sha256 (provided by Xiaodong Liu):
name               old time/op    new time/op    delta
Hash8Bytes/New       1.19µs ± 0%    0.97µs ± 0%  -18.75%  (p=0.000 n=9+9)
Hash8Bytes/Sum224    1.21µs ± 0%    0.97µs ± 0%  -20.04%  (p=0.000 n=9+10)
Hash8Bytes/Sum256    1.21µs ± 0%    0.98µs ± 0%  -19.16%  (p=0.000 n=10+7)
Hash1K/New           15.9µs ± 0%    12.4µs ± 0%  -22.10%  (p=0.000 n=10+10)
Hash1K/Sum224        15.9µs ± 0%    12.4µs ± 0%  -22.18%  (p=0.000 n=8+10)
Hash1K/Sum256        15.9µs ± 0%    12.4µs ± 0%  -22.15%  (p=0.000 n=10+9)
Hash8K/New            119µs ± 0%      92µs ± 0%  -22.40%  (p=0.000 n=10+9)
Hash8K/Sum224         119µs ± 0%      92µs ± 0%  -22.41%  (p=0.000 n=9+10)
Hash8K/Sum256         119µs ± 0%      92µs ± 0%  -22.40%  (p=0.000 n=9+9)

name               old speed      new speed      delta
Hash8Bytes/New     6.70MB/s ± 0%  8.25MB/s ± 0%  +23.13%  (p=0.000 n=10+10)
Hash8Bytes/Sum224  6.60MB/s ± 0%  8.26MB/s ± 0%  +25.06%  (p=0.000 n=10+10)
Hash8Bytes/Sum256  6.59MB/s ± 0%  8.15MB/s ± 0%  +23.67%  (p=0.000 n=10+7)
Hash1K/New         64.3MB/s ± 0%  82.5MB/s ± 0%  +28.36%  (p=0.000 n=10+10)
Hash1K/Sum224      64.3MB/s ± 0%  82.6MB/s ± 0%  +28.51%  (p=0.000 n=10+10)
Hash1K/Sum256      64.3MB/s ± 0%  82.6MB/s ± 0%  +28.46%  (p=0.000 n=9+9)
Hash8K/New         69.0MB/s ± 0%  89.0MB/s ± 0%  +28.87%  (p=0.000 n=10+8)
Hash8K/Sum224      69.0MB/s ± 0%  89.0MB/s ± 0%  +28.88%  (p=0.000 n=9+10)
Hash8K/Sum256      69.0MB/s ± 0%  88.9MB/s ± 0%  +28.87%  (p=0.000 n=8+9)

Benchmark on crypto/sha512 (provided by Xiaodong Liu):
name               old time/op    new time/op     delta
Hash8Bytes/New       1.55µs ± 0%     1.31µs ± 0%  -15.67%  (p=0.000 n=10+10)
Hash8Bytes/Sum384    1.59µs ± 0%     1.35µs ± 0%  -14.97%  (p=0.000 n=10+10)
Hash8Bytes/Sum512    1.62µs ± 0%     1.39µs ± 0%  -14.02%  (p=0.000 n=10+10)
Hash1K/New           10.7µs ± 0%      8.6µs ± 0%  -19.60%  (p=0.000 n=8+8)
Hash1K/Sum384        10.8µs ± 0%      8.7µs ± 0%  -19.40%  (p=0.000 n=9+9)
Hash1K/Sum512        10.8µs ± 0%      8.7µs ± 0%  -19.35%  (p=0.000 n=9+10)
Hash8K/New           74.6µs ± 0%     59.6µs ± 0%  -20.08%  (p=0.000 n=10+9)
Hash8K/Sum384        74.7µs ± 0%     59.7µs ± 0%  -20.04%  (p=0.000 n=9+8)
Hash8K/Sum512        74.7µs ± 0%     59.7µs ± 0%  -20.01%  (p=0.000 n=10+10)

name               old speed      new speed       delta
Hash8Bytes/New     5.16MB/s ± 0%   6.12MB/s ± 0%  +18.60%  (p=0.000 n=10+8)
Hash8Bytes/Sum384  5.02MB/s ± 0%   5.90MB/s ± 0%  +17.56%  (p=0.000 n=10+10)
Hash8Bytes/Sum512  4.94MB/s ± 0%   5.74MB/s ± 0%  +16.29%  (p=0.000 n=10+9)
Hash1K/New         95.4MB/s ± 0%  118.6MB/s ± 0%  +24.38%  (p=0.000 n=10+10)
Hash1K/Sum384      95.0MB/s ± 0%  117.9MB/s ± 0%  +24.06%  (p=0.000 n=8+9)
Hash1K/Sum512      94.8MB/s ± 0%  117.5MB/s ± 0%  +23.99%  (p=0.000 n=8+9)
Hash8K/New          110MB/s ± 0%    137MB/s ± 0%  +25.11%  (p=0.000 n=9+6)
Hash8K/Sum384       110MB/s ± 0%    137MB/s ± 0%  +25.07%  (p=0.000 n=9+8)
Hash8K/Sum512       110MB/s ± 0%    137MB/s ± 0%  +25.01%  (p=0.000 n=10+10)

Change-Id: I28ccfce634659305a336c8e0a3f8589f7361d661
Reviewed-on: https://go-review.googlesource.com/c/go/+/422317
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Wayne Zuo <wdvxdr@golangcn.org>
Reviewed-by: Ian Lance Taylor <iant@google.com>
Reviewed-by: David Chase <drchase@google.com>
src/cmd/compile/internal/loong64/ssa.go
src/cmd/compile/internal/ssa/gen/LOONG64.rules
src/cmd/compile/internal/ssa/gen/LOONG64Ops.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewrite.go
src/cmd/compile/internal/ssa/rewriteLOONG64.go
src/cmd/compile/internal/ssagen/ssa.go
test/codegen/rotate.go

index ed1fcb35f29de7a22a9a3da06075f7310143d05c..e004c6f7fbecf1abadabbdad967ee40354b2829c 100644 (file)
@@ -134,6 +134,8 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
                ssa.OpLOONG64SLLV,
                ssa.OpLOONG64SRLV,
                ssa.OpLOONG64SRAV,
+               ssa.OpLOONG64ROTR,
+               ssa.OpLOONG64ROTRV,
                ssa.OpLOONG64ADDF,
                ssa.OpLOONG64ADDD,
                ssa.OpLOONG64SUBF,
@@ -165,6 +167,8 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
                ssa.OpLOONG64SLLVconst,
                ssa.OpLOONG64SRLVconst,
                ssa.OpLOONG64SRAVconst,
+               ssa.OpLOONG64ROTRconst,
+               ssa.OpLOONG64ROTRVconst,
                ssa.OpLOONG64SGTconst,
                ssa.OpLOONG64SGTUconst:
                p := s.Prog(v.Op.Asm())
index 3ba25e0a95bda56a47677656fc096e08342f59f8..431d95f0969d291922682da69e92c53fde7b54da 100644 (file)
 // rotates
 (RotateLeft8 <t> x (MOVVconst [c])) => (Or8 (Lsh8x64 <t> x (MOVVconst [c&7])) (Rsh8Ux64 <t> x (MOVVconst [-c&7])))
 (RotateLeft16 <t> x (MOVVconst [c])) => (Or16 (Lsh16x64 <t> x (MOVVconst [c&15])) (Rsh16Ux64 <t> x (MOVVconst [-c&15])))
-(RotateLeft32 <t> x (MOVVconst [c])) => (Or32 (Lsh32x64 <t> x (MOVVconst [c&31])) (Rsh32Ux64 <t> x (MOVVconst [-c&31])))
-(RotateLeft64 <t> x (MOVVconst [c])) => (Or64 (Lsh64x64 <t> x (MOVVconst [c&63])) (Rsh64Ux64 <t> x (MOVVconst [-c&63])))
+(RotateLeft32 x y) => (ROTR  x (NEGV <y.Type> y))
+(RotateLeft64 x y) => (ROTRV x (NEGV <y.Type> y))
 
 // unary ops
 (Neg(64|32|16|8) ...) => (NEGV ...)
 (SLLV x (MOVVconst [c])) => (SLLVconst x [c])
 (SRLV x (MOVVconst [c])) => (SRLVconst x [c])
 (SRAV x (MOVVconst [c])) => (SRAVconst x [c])
+(ROTR x (MOVVconst [c]))  => (ROTRconst x [c&31])
+(ROTRV x (MOVVconst [c])) => (ROTRVconst x [c&63])
 
 (SGT  (MOVVconst [c]) x) && is32Bit(c) => (SGTconst  [c] x)
 (SGTU (MOVVconst [c]) x) && is32Bit(c) => (SGTUconst [c] x)
index e06ad166bb8b33c550907291e7632703006aa248..a59b592445ac0cf49be866b005e8f7c7d3788d70 100644 (file)
@@ -196,12 +196,16 @@ func init() {
                {name: "SQRTF", argLength: 1, reg: fp11, asm: "SQRTF"}, // sqrt(arg0), float32
 
                // shifts
-               {name: "SLLV", argLength: 2, reg: gp21, asm: "SLLV"},                    // arg0 << arg1, shift amount is mod 64
-               {name: "SLLVconst", argLength: 1, reg: gp11, asm: "SLLV", aux: "Int64"}, // arg0 << auxInt
-               {name: "SRLV", argLength: 2, reg: gp21, asm: "SRLV"},                    // arg0 >> arg1, unsigned, shift amount is mod 64
-               {name: "SRLVconst", argLength: 1, reg: gp11, asm: "SRLV", aux: "Int64"}, // arg0 >> auxInt, unsigned
-               {name: "SRAV", argLength: 2, reg: gp21, asm: "SRAV"},                    // arg0 >> arg1, signed, shift amount is mod 64
-               {name: "SRAVconst", argLength: 1, reg: gp11, asm: "SRAV", aux: "Int64"}, // arg0 >> auxInt, signed
+               {name: "SLLV", argLength: 2, reg: gp21, asm: "SLLV"},                      // arg0 << arg1, shift amount is mod 64
+               {name: "SLLVconst", argLength: 1, reg: gp11, asm: "SLLV", aux: "Int64"},   // arg0 << auxInt
+               {name: "SRLV", argLength: 2, reg: gp21, asm: "SRLV"},                      // arg0 >> arg1, unsigned, shift amount is mod 64
+               {name: "SRLVconst", argLength: 1, reg: gp11, asm: "SRLV", aux: "Int64"},   // arg0 >> auxInt, unsigned
+               {name: "SRAV", argLength: 2, reg: gp21, asm: "SRAV"},                      // arg0 >> arg1, signed, shift amount is mod 64
+               {name: "SRAVconst", argLength: 1, reg: gp11, asm: "SRAV", aux: "Int64"},   // arg0 >> auxInt, signed
+               {name: "ROTR", argLength: 2, reg: gp21, asm: "ROTR"},                      // arg0 right rotate by (arg1 mod 32) bits
+               {name: "ROTRV", argLength: 2, reg: gp21, asm: "ROTRV"},                    // arg0 right rotate by (arg1 mod 64) bits
+               {name: "ROTRconst", argLength: 1, reg: gp11, asm: "ROTR", aux: "Int64"},   // uint32(arg0) right rotate by auxInt bits, auxInt should be in the range 0 to 31.
+               {name: "ROTRVconst", argLength: 1, reg: gp11, asm: "ROTRV", aux: "Int64"}, // arg0 right rotate by auxInt bits, auxInt should be in the range 0 to 63.
 
                // comparisons
                {name: "SGT", argLength: 2, reg: gp21, asm: "SGT", typ: "Bool"},                      // 1 if arg0 > arg1 (signed), 0 otherwise
index 2e94f2e6241ea3f9f6296d7e43fbec726e247984..7755ee3c15439e882d53dd921bf3c4a7bee22f5d 100644 (file)
@@ -1746,6 +1746,10 @@ const (
        OpLOONG64SRLVconst
        OpLOONG64SRAV
        OpLOONG64SRAVconst
+       OpLOONG64ROTR
+       OpLOONG64ROTRV
+       OpLOONG64ROTRconst
+       OpLOONG64ROTRVconst
        OpLOONG64SGT
        OpLOONG64SGTconst
        OpLOONG64SGTU
@@ -23344,6 +23348,62 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:   "ROTR",
+               argLen: 2,
+               asm:    loong64.AROTR,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31
+                               {1, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31
+                       },
+                       outputs: []outputInfo{
+                               {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31
+                       },
+               },
+       },
+       {
+               name:   "ROTRV",
+               argLen: 2,
+               asm:    loong64.AROTRV,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31
+                               {1, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31
+                       },
+                       outputs: []outputInfo{
+                               {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31
+                       },
+               },
+       },
+       {
+               name:    "ROTRconst",
+               auxType: auxInt64,
+               argLen:  1,
+               asm:     loong64.AROTR,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31
+                       },
+                       outputs: []outputInfo{
+                               {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31
+                       },
+               },
+       },
+       {
+               name:    "ROTRVconst",
+               auxType: auxInt64,
+               argLen:  1,
+               asm:     loong64.AROTRV,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31
+                       },
+                       outputs: []outputInfo{
+                               {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31
+                       },
+               },
+       },
        {
                name:   "SGT",
                argLen: 2,
index 91f8fd7f729f1aad3ceabdfea2dfeb4cb1065d96..44b5173968b744616c41b2024d671e7d6b5f2525 100644 (file)
@@ -1988,7 +1988,7 @@ func canRotate(c *Config, bits int64) bool {
        switch c.arch {
        case "386", "amd64":
                return true
-       case "arm", "arm64", "s390x", "ppc64", "ppc64le", "wasm":
+       case "arm", "arm64", "s390x", "ppc64", "ppc64le", "wasm", "loong64":
                return bits >= 32
        default:
                return false
index 3fc10104b940b92d54079b447061791aa46bc9d6..d2e7e8e061ffc6b6b7badb95667c58f3a4e5024f 100644 (file)
@@ -291,6 +291,10 @@ func rewriteValueLOONG64(v *Value) bool {
                return rewriteValueLOONG64_OpLOONG64OR(v)
        case OpLOONG64ORconst:
                return rewriteValueLOONG64_OpLOONG64ORconst(v)
+       case OpLOONG64ROTR:
+               return rewriteValueLOONG64_OpLOONG64ROTR(v)
+       case OpLOONG64ROTRV:
+               return rewriteValueLOONG64_OpLOONG64ROTRV(v)
        case OpLOONG64SGT:
                return rewriteValueLOONG64_OpLOONG64SGT(v)
        case OpLOONG64SGTU:
@@ -3307,6 +3311,42 @@ func rewriteValueLOONG64_OpLOONG64ORconst(v *Value) bool {
        }
        return false
 }
+func rewriteValueLOONG64_OpLOONG64ROTR(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (ROTR x (MOVVconst [c]))
+       // result: (ROTRconst x [c&31])
+       for {
+               x := v_0
+               if v_1.Op != OpLOONG64MOVVconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpLOONG64ROTRconst)
+               v.AuxInt = int64ToAuxInt(c & 31)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueLOONG64_OpLOONG64ROTRV(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (ROTRV x (MOVVconst [c]))
+       // result: (ROTRVconst x [c&63])
+       for {
+               x := v_0
+               if v_1.Op != OpLOONG64MOVVconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpLOONG64ROTRVconst)
+               v.AuxInt = int64ToAuxInt(c & 63)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
 func rewriteValueLOONG64_OpLOONG64SGT(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
@@ -5827,57 +5867,33 @@ func rewriteValueLOONG64_OpRotateLeft32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (RotateLeft32 <t> x (MOVVconst [c]))
-       // result: (Or32 (Lsh32x64 <t> x (MOVVconst [c&31])) (Rsh32Ux64 <t> x (MOVVconst [-c&31])))
+       // match: (RotateLeft32 x y)
+       // result: (ROTR x (NEGV <y.Type> y))
        for {
-               t := v.Type
                x := v_0
-               if v_1.Op != OpLOONG64MOVVconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               v.reset(OpOr32)
-               v0 := b.NewValue0(v.Pos, OpLsh32x64, t)
-               v1 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64)
-               v1.AuxInt = int64ToAuxInt(c & 31)
-               v0.AddArg2(x, v1)
-               v2 := b.NewValue0(v.Pos, OpRsh32Ux64, t)
-               v3 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64)
-               v3.AuxInt = int64ToAuxInt(-c & 31)
-               v2.AddArg2(x, v3)
-               v.AddArg2(v0, v2)
+               y := v_1
+               v.reset(OpLOONG64ROTR)
+               v0 := b.NewValue0(v.Pos, OpLOONG64NEGV, y.Type)
+               v0.AddArg(y)
+               v.AddArg2(x, v0)
                return true
        }
-       return false
 }
 func rewriteValueLOONG64_OpRotateLeft64(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (RotateLeft64 <t> x (MOVVconst [c]))
-       // result: (Or64 (Lsh64x64 <t> x (MOVVconst [c&63])) (Rsh64Ux64 <t> x (MOVVconst [-c&63])))
+       // match: (RotateLeft64 x y)
+       // result: (ROTRV x (NEGV <y.Type> y))
        for {
-               t := v.Type
                x := v_0
-               if v_1.Op != OpLOONG64MOVVconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               v.reset(OpOr64)
-               v0 := b.NewValue0(v.Pos, OpLsh64x64, t)
-               v1 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64)
-               v1.AuxInt = int64ToAuxInt(c & 63)
-               v0.AddArg2(x, v1)
-               v2 := b.NewValue0(v.Pos, OpRsh64Ux64, t)
-               v3 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64)
-               v3.AuxInt = int64ToAuxInt(-c & 63)
-               v2.AddArg2(x, v3)
-               v.AddArg2(v0, v2)
+               y := v_1
+               v.reset(OpLOONG64ROTRV)
+               v0 := b.NewValue0(v.Pos, OpLOONG64NEGV, y.Type)
+               v0.AddArg(y)
+               v.AddArg2(x, v0)
                return true
        }
-       return false
 }
 func rewriteValueLOONG64_OpRotateLeft8(v *Value) bool {
        v_1 := v.Args[1]
index 26e14e2d21ce325f22ae75dffa9815fef0c377d4..cef842ceb01aade93287dbe45ff857fa02f30d39 100644 (file)
@@ -4643,12 +4643,12 @@ func InitTables() {
                func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
                        return s.newValue2(ssa.OpRotateLeft32, types.Types[types.TUINT32], args[0], args[1])
                },
-               sys.AMD64, sys.ARM, sys.ARM64, sys.S390X, sys.PPC64, sys.Wasm)
+               sys.AMD64, sys.ARM, sys.ARM64, sys.S390X, sys.PPC64, sys.Wasm, sys.Loong64)
        addF("math/bits", "RotateLeft64",
                func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
                        return s.newValue2(ssa.OpRotateLeft64, types.Types[types.TUINT64], args[0], args[1])
                },
-               sys.AMD64, sys.ARM64, sys.S390X, sys.PPC64, sys.Wasm)
+               sys.AMD64, sys.ARM64, sys.S390X, sys.PPC64, sys.Wasm, sys.Loong64)
        alias("math/bits", "RotateLeft", "math/bits", "RotateLeft64", p8...)
 
        makeOnesCountAMD64 := func(op ssa.Op) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
index 5876050ba009c8e1b48fea08896baacd346ddadb..f42993532d14219451b9f4da434a45ee7673f4ab 100644 (file)
@@ -18,6 +18,7 @@ func rot64(x uint64) uint64 {
        // amd64:"ROLQ\t[$]7"
        // ppc64:"ROTL\t[$]7"
        // ppc64le:"ROTL\t[$]7"
+       // loong64: "ROTRV\t[$]57"
        a += x<<7 | x>>57
 
        // amd64:"ROLQ\t[$]8"
@@ -25,6 +26,7 @@ func rot64(x uint64) uint64 {
        // s390x:"RISBGZ\t[$]0, [$]63, [$]8, "
        // ppc64:"ROTL\t[$]8"
        // ppc64le:"ROTL\t[$]8"
+       // loong64: "ROTRV\t[$]56"
        a += x<<8 + x>>56
 
        // amd64:"ROLQ\t[$]9"
@@ -32,6 +34,7 @@ func rot64(x uint64) uint64 {
        // s390x:"RISBGZ\t[$]0, [$]63, [$]9, "
        // ppc64:"ROTL\t[$]9"
        // ppc64le:"ROTL\t[$]9"
+       // loong64: "ROTRV\t[$]55"
        a += x<<9 ^ x>>55
 
        // amd64:"ROLQ\t[$]10"
@@ -41,6 +44,7 @@ func rot64(x uint64) uint64 {
        // ppc64le:"ROTL\t[$]10"
        // arm64:"ROR\t[$]54"
        // s390x:"RISBGZ\t[$]0, [$]63, [$]10, "
+       // loong64: "ROTRV\t[$]54"
        a += bits.RotateLeft64(x, 10)
 
        return a
@@ -53,6 +57,7 @@ func rot32(x uint32) uint32 {
        // arm:"MOVW\tR\\d+@>25"
        // ppc64:"ROTLW\t[$]7"
        // ppc64le:"ROTLW\t[$]7"
+       // loong64: "ROTR\t[$]25"
        a += x<<7 | x>>25
 
        // amd64:`ROLL\t[$]8`
@@ -61,6 +66,7 @@ func rot32(x uint32) uint32 {
        // s390x:"RLL\t[$]8"
        // ppc64:"ROTLW\t[$]8"
        // ppc64le:"ROTLW\t[$]8"
+       // loong64: "ROTR\t[$]24"
        a += x<<8 + x>>24
 
        // amd64:"ROLL\t[$]9"
@@ -69,6 +75,7 @@ func rot32(x uint32) uint32 {
        // s390x:"RLL\t[$]9"
        // ppc64:"ROTLW\t[$]9"
        // ppc64le:"ROTLW\t[$]9"
+       // loong64: "ROTR\t[$]23"
        a += x<<9 ^ x>>23
 
        // amd64:"ROLL\t[$]10"
@@ -79,6 +86,7 @@ func rot32(x uint32) uint32 {
        // ppc64le:"ROTLW\t[$]10"
        // arm64:"RORW\t[$]22"
        // s390x:"RLL\t[$]10"
+       // loong64: "ROTR\t[$]22"
        a += bits.RotateLeft32(x, 10)
 
        return a
@@ -127,12 +135,14 @@ func rot64nc(x uint64, z uint) uint64 {
        // arm64:"ROR","NEG",-"AND"
        // ppc64:"ROTL",-"NEG",-"AND"
        // ppc64le:"ROTL",-"NEG",-"AND"
+       // loong64: "ROTRV", -"AND"
        a += x<<z | x>>(64-z)
 
        // amd64:"RORQ",-"AND"
        // arm64:"ROR",-"NEG",-"AND"
        // ppc64:"ROTL","NEG",-"AND"
        // ppc64le:"ROTL","NEG",-"AND"
+       // loong64: "ROTRV", -"AND"
        a += x>>z | x<<(64-z)
 
        return a
@@ -147,12 +157,14 @@ func rot32nc(x uint32, z uint) uint32 {
        // arm64:"ROR","NEG",-"AND"
        // ppc64:"ROTLW",-"NEG",-"AND"
        // ppc64le:"ROTLW",-"NEG",-"AND"
+       // loong64: "ROTR", -"AND"
        a += x<<z | x>>(32-z)
 
        // amd64:"RORL",-"AND"
        // arm64:"ROR",-"NEG",-"AND"
        // ppc64:"ROTLW","NEG",-"AND"
        // ppc64le:"ROTLW","NEG",-"AND"
+       // loong64: "ROTR", -"AND"
        a += x>>z | x<<(32-z)
 
        return a