crypto/rsa,crypto/internal/bigmod: improve verify/encrypt performance

author Filippo Valsorda <filippo@golang.org>

Wed, 27 Dec 2023 15:05:19 +0000 (16:05 +0100)

committer Filippo Valsorda <filippo@golang.org>

Fri, 12 Jan 2024 00:56:20 +0000 (00:56 +0000)
author Filippo Valsorda <filippo@golang.org>
Wed, 27 Dec 2023 15:05:19 +0000 (16:05 +0100)
committer Filippo Valsorda <filippo@golang.org>
Fri, 12 Jan 2024 00:56:20 +0000 (00:56 +0000)
diff --git a/src/crypto/internal/bigmod/nat.go b/src/crypto/internal/bigmod/nat.go

index 5605e9f1c34bb94c1cd3af1af4909b4e27388436..7fdd8ef17793383f5b16cba585b2a73a8659ce71 100644 (file)
--- a/src/crypto/internal/bigmod/nat.go
+++ b/src/crypto/internal/bigmod/nat.go
@@ -318,14 +318,48 @@ type Modulus struct {
  // rr returns R*R with R = 2^(_W * n) and n = len(m.nat.limbs).
  func rr(m *Modulus) *Nat {
         rr := NewNat().ExpandFor(m)
-       // R*R is 2^(2 * _W * n). We can safely get 2^(_W * (n - 1)) by setting the
-       // most significant limb to 1. We then get to R*R by shifting left by _W
-       // n + 1 times.
-       n := len(rr.limbs)
-       rr.limbs[n-1] = 1
-       for i := n - 1; i < 2*n; i++ {
-               rr.shiftIn(0, m) // x = x * 2^_W mod m
+       n := uint(len(rr.limbs))
+       mLen := uint(m.BitLen())
+       logR := _W * n
+
+       // We start by computing R = 2^(_W * n) mod m. We can get pretty close, to
+       // 2^⌊log₂m⌋, by setting the highest bit we can without having to reduce.
+       rr.limbs[n-1] = 1 << ((mLen - 1) % _W)
+       // Then we double until we reach 2^(_W * n).
+       for i := mLen - 1; i < logR; i++ {
+               rr.Add(rr, m)
+       }
+
+       // Next we need to get from R to 2^(_W * n) R mod m (aka from one to R in
+       // the Montgomery domain, meaning we can use Montgomery multiplication now).
+       // We could do that by doubling _W * n times, or with a square-and-double
+       // chain log2(_W * n) long. Turns out the fastest thing is to start out with
+       // doublings, and switch to square-and-double once the exponent is large
+       // enough to justify the cost of the multiplications.
+
+       // The threshold is selected experimentally as a linear function of n.
+       threshold := n / 4
+
+       // We calculate how many of the most-significant bits of the exponent we can
+       // compute before crossing the threshold, and we do it with doublings.
+       i := bits.UintSize
+       for logR>>i <= threshold {
+               i--
+       }
+       for k := uint(0); k < logR>>i; k++ {
+               rr.Add(rr, m)
+       }
+
+       // Then we process the remaining bits of the exponent with a
+       // square-and-double chain.
+       for i > 0 {
+               rr.montgomeryMul(rr, rr, m)
+               i--
+               if logR>>i&1 != 0 {
+                       rr.Add(rr, m)
+               }
         }
+
         return rr
  }
  
@@ -745,26 +779,21 @@ func (out *Nat) Exp(x *Nat, e []byte, m *Modulus) *Nat {
         return out.montgomeryReduction(m)
  }
  
-// ExpShort calculates out = x^e mod m.
+// ExpShortVarTime calculates out = x^e mod m.
  //
  // The output will be resized to the size of m and overwritten. x must already
-// be reduced modulo m. This leaks the exact bit size of the exponent.
-func (out *Nat) ExpShort(x *Nat, e uint, m *Modulus) *Nat {
-       xR := NewNat().set(x).montgomeryRepresentation(m)
-
-       out.resetFor(m)
-       out.limbs[0] = 1
-       out.montgomeryRepresentation(m)
-
+// be reduced modulo m. This leaks the exponent through timing side-channels.
+func (out *Nat) ExpShortVarTime(x *Nat, e uint, m *Modulus) *Nat {
         // For short exponents, precomputing a table and using a window like in Exp
-       // doesn't pay off. Instead, we do a simple constant-time conditional
-       // square-and-multiply chain, skipping the initial run of zeroes.
-       tmp := NewNat().ExpandFor(m)
-       for i := bits.UintSize - bitLen(e); i < bits.UintSize; i++ {
+       // doesn't pay off. Instead, we do a simple conditional square-and-multiply
+       // chain, skipping the initial run of zeroes.
+       xR := NewNat().set(x).montgomeryRepresentation(m)
+       out.set(xR)
+       for i := bits.UintSize - bitLen(e) + 1; i < bits.UintSize; i++ {
                 out.montgomeryMul(out, out, m)
-               k := (e >> (bits.UintSize - i - 1)) & 1
-               tmp.montgomeryMul(out, xR, m)
-               out.assign(ctEq(k, 1), tmp)
+               if k := (e >> (bits.UintSize - i - 1)) & 1; k != 0 {
+                       out.montgomeryMul(out, xR, m)
+               }
         }
         return out.montgomeryReduction(m)
  }
diff --git a/src/crypto/internal/bigmod/nat_test.go b/src/crypto/internal/bigmod/nat_test.go

index 76e557048c38c267d279ec7758586dccda0bb446..7a956e3a57d30036160ea3edbf12a3ec3211cd95 100644 (file)
--- a/src/crypto/internal/bigmod/nat_test.go
+++ b/src/crypto/internal/bigmod/nat_test.go
@@ -303,7 +303,7 @@ func TestExpShort(t *testing.T) {
         m := modulusFromBytes([]byte{13})
         x := &Nat{[]uint{3}}
         out := &Nat{[]uint{0}}
-       out.ExpShort(x, 12, m)
+       out.ExpShortVarTime(x, 12, m)
         expected := &Nat{[]uint{1}}
         if out.Equal(expected) != 1 {
                 t.Errorf("%+v != %+v", out, expected)
diff --git a/src/crypto/rsa/rsa.go b/src/crypto/rsa/rsa.go

index 0715421187d8b126c23356f6cba688d22da7af76..9342930dc1236f67e971eefd50e8e39becccdf05 100644 (file)
--- a/src/crypto/rsa/rsa.go
+++ b/src/crypto/rsa/rsa.go
@@ -43,6 +43,10 @@ import (
  var bigOne = big.NewInt(1)
  
  // A PublicKey represents the public part of an RSA key.
+//
+// The value of the modulus N is considered secret by this library and protected
+// from leaking through timing side-channels. However, neither the value of the
+// exponent E nor the precise bit size of N are similarly protected.
  type PublicKey struct {
         N *big.Int // modulus
         E int      // public exponent
@@ -478,10 +482,6 @@ var ErrMessageTooLong = errors.New("crypto/rsa: message too long for RSA key siz
  func encrypt(pub *PublicKey, plaintext []byte) ([]byte, error) {
         boring.Unreachable()
  
-       // Most of the CPU time for encryption and verification is spent in this
-       // NewModulusFromBig call, because PublicKey doesn't have a Precomputed
-       // field. If performance becomes an issue, consider placing a private
-       // sync.Once on PublicKey to compute this.
         N, err := bigmod.NewModulusFromBig(pub.N)
         if err != nil {
                 return nil, err
@@ -492,7 +492,7 @@ func encrypt(pub *PublicKey, plaintext []byte) ([]byte, error) {
         }
         e := uint(pub.E)
  
-       return bigmod.NewNat().ExpShort(m, e, N).Bytes(N), nil
+       return bigmod.NewNat().ExpShortVarTime(m, e, N).Bytes(N), nil
  }
  
  // EncryptOAEP encrypts the given message with RSA-OAEP.
@@ -686,7 +686,7 @@ func decrypt(priv *PrivateKey, ciphertext []byte, check bool) ([]byte, error) {
         }
  
         if check {
-               c1 := bigmod.NewNat().ExpShort(m, uint(priv.E), N)
+               c1 := bigmod.NewNat().ExpShortVarTime(m, uint(priv.E), N)
                 if c1.Equal(c) != 1 {
                         return nil, ErrDecryption
                 }
author	Filippo Valsorda <filippo@golang.org>
	Wed, 27 Dec 2023 15:05:19 +0000 (16:05 +0100)
committer	Filippo Valsorda <filippo@golang.org>
	Fri, 12 Jan 2024 00:56:20 +0000 (00:56 +0000)
src/crypto/internal/bigmod/nat.go		patch \| blob \| history
src/crypto/internal/bigmod/nat_test.go		patch \| blob \| history
src/crypto/rsa/rsa.go		patch \| blob \| history