// introduced for SASL GSSAPI and standardized in RFC 4648.
// The alternate "base32hex" encoding is used in DNSSEC.
type Encoding struct {
- encode [32]byte
- decodeMap [256]byte
+ encode [32]byte // mapping of symbol index to symbol byte value
+ decodeMap [256]uint8 // mapping of symbol byte value to symbol index
padChar rune
}
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" +
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" +
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
+ invalidIndex = '\xff'
)
const encodeStd = "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567"
const encodeHex = "0123456789ABCDEFGHIJKLMNOPQRSTUV"
-// NewEncoding returns a new Encoding defined by the given alphabet,
-// which must be a 32-byte string. The alphabet is treated as sequence
-// of byte values without any special treatment for multi-byte UTF-8.
+// NewEncoding returns a new padded Encoding defined by the given alphabet,
+// which must be a 32-byte string that contains unique byte values and
+// does not contain the padding character or CR / LF ('\r', '\n').
+// The alphabet is treated as a sequence of byte values
+// without any special treatment for multi-byte UTF-8.
+// The resulting Encoding uses the default padding character ('='),
+// which may be changed or disabled via WithPadding.
func NewEncoding(encoder string) *Encoding {
if len(encoder) != 32 {
panic("encoding alphabet is not 32-bytes long")
copy(e.decodeMap[:], decodeMapInitialize)
for i := 0; i < len(encoder); i++ {
- e.decodeMap[encoder[i]] = byte(i)
+ // Note: While we document that the alphabet cannot contain
+ // the padding character, we do not enforce it since we do not know
+ // if the caller intends to switch the padding from StdPadding later.
+ switch {
+ case encoder[i] == '\n' || encoder[i] == '\r':
+ panic("encoding alphabet contains newline character")
+ case e.decodeMap[encoder[i]] != invalidIndex:
+ panic("encoding alphabet includes duplicate symbols")
+ }
+ e.decodeMap[encoder[i]] = uint8(i)
}
return e
}
// Padding characters above '\x7f' are encoded as their exact byte value
// rather than using the UTF-8 representation of the codepoint.
func (enc Encoding) WithPadding(padding rune) *Encoding {
- if padding < NoPadding || padding == '\r' || padding == '\n' || padding > 0xff {
+ switch {
+ case padding < NoPadding || padding == '\r' || padding == '\n' || padding > 0xff:
panic("invalid padding")
+ case padding != NoPadding && enc.decodeMap[byte(padding)] != invalidIndex:
+ panic("padding contained in alphabet")
}
-
- for i := 0; i < len(enc.encode); i++ {
- if rune(enc.encode[i]) == padding {
- panic("padding contained in alphabet")
- }
- }
-
enc.padChar = padding
return &enc
}
// (RFC 1421). RFC 4648 also defines an alternate encoding, which is
// the standard encoding with - and _ substituted for + and /.
type Encoding struct {
- encode [64]byte
- decodeMap [256]byte
+ encode [64]byte // mapping of symbol index to symbol byte value
+ decodeMap [256]uint8 // mapping of symbol byte value to symbol index
padChar rune
strict bool
}
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" +
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" +
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
+ invalidIndex = '\xff'
)
const encodeStd = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
const encodeURL = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"
// NewEncoding returns a new padded Encoding defined by the given alphabet,
-// which must be a 64-byte string that does not contain the padding character
-// or CR / LF ('\r', '\n'). The alphabet is treated as sequence of byte values
+// which must be a 64-byte string that contains unique byte values and
+// does not contain the padding character or CR / LF ('\r', '\n').
+// The alphabet is treated as a sequence of byte values
// without any special treatment for multi-byte UTF-8.
// The resulting Encoding uses the default padding character ('='),
// which may be changed or disabled via WithPadding.
if len(encoder) != 64 {
panic("encoding alphabet is not 64-bytes long")
}
- for i := 0; i < len(encoder); i++ {
- if encoder[i] == '\n' || encoder[i] == '\r' {
- panic("encoding alphabet contains newline character")
- }
- }
e := new(Encoding)
e.padChar = StdPadding
copy(e.decodeMap[:], decodeMapInitialize)
for i := 0; i < len(encoder); i++ {
- e.decodeMap[encoder[i]] = byte(i)
+ // Note: While we document that the alphabet cannot contain
+ // the padding character, we do not enforce it since we do not know
+ // if the caller intends to switch the padding from StdPadding later.
+ switch {
+ case encoder[i] == '\n' || encoder[i] == '\r':
+ panic("encoding alphabet contains newline character")
+ case e.decodeMap[encoder[i]] != invalidIndex:
+ panic("encoding alphabet includes duplicate symbols")
+ }
+ e.decodeMap[encoder[i]] = uint8(i)
}
return e
}
// Padding characters above '\x7f' are encoded as their exact byte value
// rather than using the UTF-8 representation of the codepoint.
func (enc Encoding) WithPadding(padding rune) *Encoding {
- if padding < NoPadding || padding == '\r' || padding == '\n' || padding > 0xff {
+ switch {
+ case padding < NoPadding || padding == '\r' || padding == '\n' || padding > 0xff:
panic("invalid padding")
+ case padding != NoPadding && enc.decodeMap[byte(padding)] != invalidIndex:
+ panic("padding contained in alphabet")
}
-
- for i := 0; i < len(enc.encode); i++ {
- if rune(enc.encode[i]) == padding {
- panic("padding contained in alphabet")
- }
- }
-
enc.padChar = padding
return &enc
}