1 // Copyright 2015 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
18 // A WordEncoder is an RFC 2047 encoded-word encoder.
22 // BEncoding represents Base64 encoding scheme as defined by RFC 2045.
23 BEncoding = WordEncoder('b')
24 // QEncoding represents the Q-encoding scheme as defined by RFC 2047.
25 QEncoding = WordEncoder('q')
29 errInvalidWord = errors.New("mime: invalid RFC 2047 encoded-word")
32 // Encode returns the encoded-word form of s. If s is ASCII without special
33 // characters, it is returned unchanged. The provided charset is the IANA
34 // charset name of s. It is case insensitive.
35 func (e WordEncoder) Encode(charset, s string) string {
36 if !needsEncoding(s) {
39 return e.encodeWord(charset, s)
42 func needsEncoding(s string) bool {
44 if (b < ' ' || b > '~') && b != '\t' {
51 // encodeWord encodes a string into an encoded-word.
52 func (e WordEncoder) encodeWord(charset, s string) string {
53 var buf strings.Builder
54 // Could use a hint like len(s)*3, but that's not enough for cases
55 // with word splits and too much for simpler inputs.
56 // 48 is close to maxEncodedWordLen/2, but adjusted to allocator size class.
59 e.openWord(&buf, charset)
61 e.bEncode(&buf, charset, s)
63 e.qEncode(&buf, charset, s)
71 // The maximum length of an encoded-word is 75 characters.
72 // See RFC 2047, section 2.
73 maxEncodedWordLen = 75
74 // maxContentLen is how much content can be encoded, ignoring the header and
76 maxContentLen = maxEncodedWordLen - len("=?UTF-8?q?") - len("?=")
79 var maxBase64Len = base64.StdEncoding.DecodedLen(maxContentLen)
81 // bEncode encodes s using base64 encoding and writes it to buf.
82 func (e WordEncoder) bEncode(buf *strings.Builder, charset, s string) {
83 w := base64.NewEncoder(base64.StdEncoding, buf)
84 // If the charset is not UTF-8 or if the content is short, do not bother
85 // splitting the encoded-word.
86 if !isUTF8(charset) || base64.StdEncoding.EncodedLen(len(s)) <= maxContentLen {
92 var currentLen, last, runeLen int
93 for i := 0; i < len(s); i += runeLen {
94 // Multi-byte characters must not be split across encoded-words.
95 // See RFC 2047, section 5.3.
96 _, runeLen = utf8.DecodeRuneInString(s[i:])
98 if currentLen+runeLen <= maxBase64Len {
101 io.WriteString(w, s[last:i])
103 e.splitWord(buf, charset)
108 io.WriteString(w, s[last:])
112 // qEncode encodes s using Q encoding and writes it to buf. It splits the
113 // encoded-words when necessary.
114 func (e WordEncoder) qEncode(buf *strings.Builder, charset, s string) {
115 // We only split encoded-words when the charset is UTF-8.
116 if !isUTF8(charset) {
121 var currentLen, runeLen int
122 for i := 0; i < len(s); i += runeLen {
124 // Multi-byte characters must not be split across encoded-words.
125 // See RFC 2047, section 5.3.
127 if b >= ' ' && b <= '~' && b != '=' && b != '?' && b != '_' {
128 runeLen, encLen = 1, 1
130 _, runeLen = utf8.DecodeRuneInString(s[i:])
134 if currentLen+encLen > maxContentLen {
135 e.splitWord(buf, charset)
138 writeQString(buf, s[i:i+runeLen])
143 // writeQString encodes s using Q encoding and writes it to buf.
144 func writeQString(buf *strings.Builder, s string) {
145 for i := 0; i < len(s); i++ {
149 case b >= '!' && b <= '~' && b != '=' && b != '?' && b != '_':
153 buf.WriteByte(upperhex[b>>4])
154 buf.WriteByte(upperhex[b&0x0f])
159 // openWord writes the beginning of an encoded-word into buf.
160 func (e WordEncoder) openWord(buf *strings.Builder, charset string) {
161 buf.WriteString("=?")
162 buf.WriteString(charset)
164 buf.WriteByte(byte(e))
168 // closeWord writes the end of an encoded-word into buf.
169 func closeWord(buf *strings.Builder) {
170 buf.WriteString("?=")
173 // splitWord closes the current encoded-word and opens a new one.
174 func (e WordEncoder) splitWord(buf *strings.Builder, charset string) {
177 e.openWord(buf, charset)
180 func isUTF8(charset string) bool {
181 return strings.EqualFold(charset, "UTF-8")
184 const upperhex = "0123456789ABCDEF"
186 // A WordDecoder decodes MIME headers containing RFC 2047 encoded-words.
187 type WordDecoder struct {
188 // CharsetReader, if non-nil, defines a function to generate
189 // charset-conversion readers, converting from the provided
190 // charset into UTF-8.
191 // Charsets are always lower-case. utf-8, iso-8859-1 and us-ascii charsets
192 // are handled by default.
193 // One of the CharsetReader's result values must be non-nil.
194 CharsetReader func(charset string, input io.Reader) (io.Reader, error)
197 // Decode decodes an RFC 2047 encoded-word.
198 func (d *WordDecoder) Decode(word string) (string, error) {
199 // See https://tools.ietf.org/html/rfc2047#section-2 for details.
200 // Our decoder is permissive, we accept empty encoded-text.
201 if len(word) < 8 || !strings.HasPrefix(word, "=?") || !strings.HasSuffix(word, "?=") || strings.Count(word, "?") != 4 {
202 return "", errInvalidWord
204 word = word[2 : len(word)-2]
206 // split word "UTF-8?q?text" into "UTF-8", 'q', and "text"
207 charset, text, _ := strings.Cut(word, "?")
209 return "", errInvalidWord
211 encoding, text, _ := strings.Cut(text, "?")
212 if len(encoding) != 1 {
213 return "", errInvalidWord
216 content, err := decode(encoding[0], text)
221 var buf strings.Builder
222 if err := d.convert(&buf, charset, content); err != nil {
225 return buf.String(), nil
228 // DecodeHeader decodes all encoded-words of the given string. It returns an
229 // error if and only if CharsetReader of d returns an error.
230 func (d *WordDecoder) DecodeHeader(header string) (string, error) {
231 // If there is no encoded-word, returns before creating a buffer.
232 i := strings.Index(header, "=?")
237 var buf strings.Builder
239 buf.WriteString(header[:i])
242 betweenWords := false
244 start := strings.Index(header, "=?")
248 cur := start + len("=?")
250 i := strings.Index(header[cur:], "?")
254 charset := header[cur : cur+i]
257 if len(header) < cur+len("Q??=") {
260 encoding := header[cur]
263 if header[cur] != '?' {
268 j := strings.Index(header[cur:], "?=")
272 text := header[cur : cur+j]
273 end := cur + j + len("?=")
275 content, err := decode(encoding, text)
278 buf.WriteString(header[:start+2])
279 header = header[start+2:]
283 // Write characters before the encoded-word. White-space and newline
284 // characters separating two encoded-words must be deleted.
285 if start > 0 && (!betweenWords || hasNonWhitespace(header[:start])) {
286 buf.WriteString(header[:start])
289 if err := d.convert(&buf, charset, content); err != nil {
293 header = header[end:]
298 buf.WriteString(header)
301 return buf.String(), nil
304 func decode(encoding byte, text string) ([]byte, error) {
307 return base64.StdEncoding.DecodeString(text)
311 return nil, errInvalidWord
315 func (d *WordDecoder) convert(buf *strings.Builder, charset string, content []byte) error {
317 case strings.EqualFold("utf-8", charset):
319 case strings.EqualFold("iso-8859-1", charset):
320 for _, c := range content {
321 buf.WriteRune(rune(c))
323 case strings.EqualFold("us-ascii", charset):
324 for _, c := range content {
325 if c >= utf8.RuneSelf {
326 buf.WriteRune(unicode.ReplacementChar)
332 if d.CharsetReader == nil {
333 return fmt.Errorf("mime: unhandled charset %q", charset)
335 r, err := d.CharsetReader(strings.ToLower(charset), bytes.NewReader(content))
339 if _, err = io.Copy(buf, r); err != nil {
346 // hasNonWhitespace reports whether s (assumed to be ASCII) contains at least
347 // one byte of non-whitespace.
348 func hasNonWhitespace(s string) bool {
349 for _, b := range s {
351 // Encoded-words can only be separated by linear white spaces which does
352 // not include vertical tabs (\v).
353 case ' ', '\t', '\n', '\r':
361 // qDecode decodes a Q encoded string.
362 func qDecode(s string) ([]byte, error) {
363 dec := make([]byte, len(s))
365 for i := 0; i < len(s); i++ {
371 return nil, errInvalidWord
373 b, err := readHexByte(s[i+1], s[i+2])
379 case (c <= '~' && c >= ' ') || c == '\n' || c == '\r' || c == '\t':
382 return nil, errInvalidWord
390 // readHexByte returns the byte from its quoted-printable representation.
391 func readHexByte(a, b byte) (byte, error) {
394 if hb, err = fromHex(a); err != nil {
397 if lb, err = fromHex(b); err != nil {
400 return hb<<4 | lb, nil
403 func fromHex(b byte) (byte, error) {
405 case b >= '0' && b <= '9':
407 case b >= 'A' && b <= 'F':
408 return b - 'A' + 10, nil
409 // Accept badly encoded bytes.
410 case b >= 'a' && b <= 'f':
411 return b - 'a' + 10, nil
413 return 0, fmt.Errorf("mime: invalid hex byte %#02x", b)