encoding: require unique alphabet for base32 and base64

author Joe Tsai <joetsai@digital-static.net>

Thu, 22 Jun 2023 18:44:55 +0000 (11:44 -0700)

committer Gopher Robot <gobot@golang.org>

Fri, 18 Aug 2023 02:08:03 +0000 (02:08 +0000)
author Joe Tsai <joetsai@digital-static.net>
Thu, 22 Jun 2023 18:44:55 +0000 (11:44 -0700)
committer Gopher Robot <gobot@golang.org>
Fri, 18 Aug 2023 02:08:03 +0000 (02:08 +0000)
diff --git a/src/encoding/base32/base32.go b/src/encoding/base32/base32.go

index 6e2360790ab92067c1fd9ec428ebbfd709e00da8..de95df0043339fbb34ca686d431ac689e3a7c7ff 100644 (file)
--- a/src/encoding/base32/base32.go
+++ b/src/encoding/base32/base32.go
@@ -20,8 +20,8 @@ import (
  // introduced for SASL GSSAPI and standardized in RFC 4648.
  // The alternate "base32hex" encoding is used in DNSSEC.
  type Encoding struct {
-       encode    [32]byte
-       decodeMap [256]byte
+       encode    [32]byte   // mapping of symbol index to symbol byte value
+       decodeMap [256]uint8 // mapping of symbol byte value to symbol index
         padChar   rune
  }
  
@@ -45,14 +45,19 @@ const (
                 "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" +
                 "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" +
                 "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
+       invalidIndex = '\xff'
  )
  
  const encodeStd = "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567"
  const encodeHex = "0123456789ABCDEFGHIJKLMNOPQRSTUV"
  
-// NewEncoding returns a new Encoding defined by the given alphabet,
-// which must be a 32-byte string. The alphabet is treated as sequence
-// of byte values without any special treatment for multi-byte UTF-8.
+// NewEncoding returns a new padded Encoding defined by the given alphabet,
+// which must be a 32-byte string that contains unique byte values and
+// does not contain the padding character or CR / LF ('\r', '\n').
+// The alphabet is treated as a sequence of byte values
+// without any special treatment for multi-byte UTF-8.
+// The resulting Encoding uses the default padding character ('='),
+// which may be changed or disabled via WithPadding.
  func NewEncoding(encoder string) *Encoding {
         if len(encoder) != 32 {
                 panic("encoding alphabet is not 32-bytes long")
@@ -64,7 +69,16 @@ func NewEncoding(encoder string) *Encoding {
         copy(e.decodeMap[:], decodeMapInitialize)
  
         for i := 0; i < len(encoder); i++ {
-               e.decodeMap[encoder[i]] = byte(i)
+               // Note: While we document that the alphabet cannot contain
+               // the padding character, we do not enforce it since we do not know
+               // if the caller intends to switch the padding from StdPadding later.
+               switch {
+               case encoder[i] == '\n' || encoder[i] == '\r':
+                       panic("encoding alphabet contains newline character")
+               case e.decodeMap[encoder[i]] != invalidIndex:
+                       panic("encoding alphabet includes duplicate symbols")
+               }
+               e.decodeMap[encoder[i]] = uint8(i)
         }
         return e
  }
@@ -85,16 +99,12 @@ var HexEncoding = NewEncoding(encodeHex)
  // Padding characters above '\x7f' are encoded as their exact byte value
  // rather than using the UTF-8 representation of the codepoint.
  func (enc Encoding) WithPadding(padding rune) *Encoding {
-       if padding < NoPadding || padding == '\r' || padding == '\n' || padding > 0xff {
+       switch {
+       case padding < NoPadding || padding == '\r' || padding == '\n' || padding > 0xff:
                 panic("invalid padding")
+       case padding != NoPadding && enc.decodeMap[byte(padding)] != invalidIndex:
+               panic("padding contained in alphabet")
         }
-
-       for i := 0; i < len(enc.encode); i++ {
-               if rune(enc.encode[i]) == padding {
-                       panic("padding contained in alphabet")
-               }
-       }
-
         enc.padChar = padding
         return &enc
  }
diff --git a/src/encoding/base64/base64.go b/src/encoding/base64/base64.go

index 28ed7a012375c6356207a7c5e31da025ed6698f2..802ef14c38992ec84d800beaffb268cbfd4db962 100644 (file)
--- a/src/encoding/base64/base64.go
+++ b/src/encoding/base64/base64.go
@@ -22,8 +22,8 @@ import (
  // (RFC 1421).  RFC 4648 also defines an alternate encoding, which is
  // the standard encoding with - and _ substituted for + and /.
  type Encoding struct {
-       encode    [64]byte
-       decodeMap [256]byte
+       encode    [64]byte   // mapping of symbol index to symbol byte value
+       decodeMap [256]uint8 // mapping of symbol byte value to symbol index
         padChar   rune
         strict    bool
  }
@@ -48,14 +48,16 @@ const (
                 "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" +
                 "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" +
                 "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
+       invalidIndex = '\xff'
  )
  
  const encodeStd = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
  const encodeURL = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"
  
  // NewEncoding returns a new padded Encoding defined by the given alphabet,
-// which must be a 64-byte string that does not contain the padding character
-// or CR / LF ('\r', '\n'). The alphabet is treated as sequence of byte values
+// which must be a 64-byte string that contains unique byte values and
+// does not contain the padding character or CR / LF ('\r', '\n').
+// The alphabet is treated as a sequence of byte values
  // without any special treatment for multi-byte UTF-8.
  // The resulting Encoding uses the default padding character ('='),
  // which may be changed or disabled via WithPadding.
@@ -63,11 +65,6 @@ func NewEncoding(encoder string) *Encoding {
         if len(encoder) != 64 {
                 panic("encoding alphabet is not 64-bytes long")
         }
-       for i := 0; i < len(encoder); i++ {
-               if encoder[i] == '\n' || encoder[i] == '\r' {
-                       panic("encoding alphabet contains newline character")
-               }
-       }
  
         e := new(Encoding)
         e.padChar = StdPadding
@@ -75,7 +72,16 @@ func NewEncoding(encoder string) *Encoding {
         copy(e.decodeMap[:], decodeMapInitialize)
  
         for i := 0; i < len(encoder); i++ {
-               e.decodeMap[encoder[i]] = byte(i)
+               // Note: While we document that the alphabet cannot contain
+               // the padding character, we do not enforce it since we do not know
+               // if the caller intends to switch the padding from StdPadding later.
+               switch {
+               case encoder[i] == '\n' || encoder[i] == '\r':
+                       panic("encoding alphabet contains newline character")
+               case e.decodeMap[encoder[i]] != invalidIndex:
+                       panic("encoding alphabet includes duplicate symbols")
+               }
+               e.decodeMap[encoder[i]] = uint8(i)
         }
         return e
  }
@@ -88,16 +94,12 @@ func NewEncoding(encoder string) *Encoding {
  // Padding characters above '\x7f' are encoded as their exact byte value
  // rather than using the UTF-8 representation of the codepoint.
  func (enc Encoding) WithPadding(padding rune) *Encoding {
-       if padding < NoPadding || padding == '\r' || padding == '\n' || padding > 0xff {
+       switch {
+       case padding < NoPadding || padding == '\r' || padding == '\n' || padding > 0xff:
                 panic("invalid padding")
+       case padding != NoPadding && enc.decodeMap[byte(padding)] != invalidIndex:
+               panic("padding contained in alphabet")
         }
-
-       for i := 0; i < len(enc.encode); i++ {
-               if rune(enc.encode[i]) == padding {
-                       panic("padding contained in alphabet")
-               }
-       }
-
         enc.padChar = padding
         return &enc
  }
author	Joe Tsai <joetsai@digital-static.net>
	Thu, 22 Jun 2023 18:44:55 +0000 (11:44 -0700)
committer	Gopher Robot <gobot@golang.org>
	Fri, 18 Aug 2023 02:08:03 +0000 (02:08 +0000)
src/encoding/base32/base32.go		patch \| blob \| history
src/encoding/base64/base64.go		patch \| blob \| history