]> Cypherpunks.ru repositories - gostls13.git/commitdiff
net/http: support gzip, x-gzip Transfer-Encodings
authorEmmanuel T Odeke <emmanuel@orijtech.com>
Sun, 10 Mar 2019 06:33:43 +0000 (22:33 -0800)
committerEmmanuel Odeke <emm.odeke@gmail.com>
Fri, 8 Nov 2019 19:24:30 +0000 (19:24 +0000)
Support "gzip" aka "x-gzip" as a transfer-encoding for
requests and responses as per RFC 7230 Section 3.3.1.

"gzip" and "x-gzip" are equivalents as requested by
RFC 7230 Section 4.2.3.

Transfer-Encoding is an on-fly property of the body
that can be applied by proxies, other servers and basically
any intermediary to transport the content e.g. across data centers
or backends/machine to machine that need compression.

For this change, "gzip" is both explicitly and implicitly combined
with transfer-encoding "chunked" in an ordering such as:

    Transfer-Encoding: gzip, chunked

and NOT

    Transfer-Encoding: chunked, gzip

Obviously the latter form is counter-intuitive for streaming.
Thus "chunked" is the last value to appear in that transfer-encoding header,
if explicitly included.

When parsing the response, the chunked body is concatenated as "chunked" does,
before finally being decompressed as "gzip".

A chunked and compressed body would typically look like this:

<LENGTH_1>\r\n<CHUNK_1_GZIPPED_BODY>\r\n<LENGTH_2>\r\n<CHUNK_2_GZIPPED_BODY>\0\r\n

which when being processed we would contentate

    <FULL_BODY>  := <CHUNK_1_GZIPPED_BODY> + <CHUNK_2_GZIPPED_BODY> + ...

and then finally gunzip it
    <FINAL_BODY> := gunzip(<FULL_BODY>)

If a "chunked" transfer-encoding is NOT applied but "gzip" is applied,
we implicitly assume that they requested using "chunked" at the end.
This is as per the recommendation of RFC 3.3.1. which explicitly says
that for:

* Request:
"  If any transfer coding
   other than chunked is applied to a request payload body, the sender
   MUST apply chunked as the final transfer coding to ensure that the
   message is properly framed."

* Response:
"  If any transfer coding other than
   chunked is applied to a response payload body, the sender MUST either
   apply chunked as the final transfer coding or terminate the message
   by closing the connection."

RELNOTE=yes

Fixes #29162

Change-Id: Icb8b8b838cf4119705605b29725cabb1fe258491
Reviewed-on: https://go-review.googlesource.com/c/go/+/166517
Run-TryBot: Emmanuel Odeke <emm.odeke@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
src/net/http/transfer.go
src/net/http/transfer_test.go

index 2e01a07f84fccdbf8ded4873f31ee151a8d2b414..e28d0be020bf70531868f7532735c445da98e9ee 100644 (file)
@@ -7,6 +7,7 @@ package http
 import (
        "bufio"
        "bytes"
+       "compress/gzip"
        "errors"
        "fmt"
        "io"
@@ -466,6 +467,34 @@ func suppressedHeaders(status int) []string {
        return nil
 }
 
+// proxyingReadCloser is a composite type that accepts and proxies
+// io.Read and io.Close calls to its respective Reader and Closer.
+//
+// It is composed of:
+// a) a top-level reader e.g. the result of decompression
+// b) a symbolic Closer e.g. the result of decompression, the
+//    original body and the connection itself.
+type proxyingReadCloser struct {
+       io.Reader
+       io.Closer
+}
+
+// multiCloser implements io.Closer and allows a bunch of io.Closer values
+// to all be closed once.
+// Example usage is with proxyingReadCloser if we are decompressing a response
+// body on the fly and would like to close both *gzip.Reader and underlying body.
+type multiCloser []io.Closer
+
+func (mc multiCloser) Close() error {
+       var err error
+       for _, c := range mc {
+               if err1 := c.Close(); err1 != nil && err == nil {
+                       err = err1
+               }
+       }
+       return err
+}
+
 // msg is *Request or *Response.
 func readTransfer(msg interface{}, r *bufio.Reader) (err error) {
        t := &transferReader{RequestMethod: "GET"}
@@ -543,7 +572,7 @@ func readTransfer(msg interface{}, r *bufio.Reader) (err error) {
        // Prepare body reader. ContentLength < 0 means chunked encoding
        // or close connection when finished, since multipart is not supported yet
        switch {
-       case chunked(t.TransferEncoding):
+       case chunked(t.TransferEncoding) || implicitlyChunked(t.TransferEncoding):
                if noResponseBodyExpected(t.RequestMethod) || !bodyAllowedForStatus(t.StatusCode) {
                        t.Body = NoBody
                } else {
@@ -564,6 +593,21 @@ func readTransfer(msg interface{}, r *bufio.Reader) (err error) {
                }
        }
 
+       // Finally if "gzip" was one of the requested transfer-encodings,
+       // we'll unzip the concatenated body/payload of the request.
+       // TODO: As we support more transfer-encodings, extract
+       // this code and apply the un-codings in reverse.
+       if t.Body != NoBody && gzipped(t.TransferEncoding) {
+               zr, err := gzip.NewReader(t.Body)
+               if err != nil {
+                       return fmt.Errorf("http: failed to gunzip body: %v", err)
+               }
+               t.Body = &proxyingReadCloser{
+                       Reader: zr,
+                       Closer: multiCloser{zr, t.Body},
+               }
+       }
+
        // Unify output
        switch rr := msg.(type) {
        case *Request:
@@ -583,8 +627,41 @@ func readTransfer(msg interface{}, r *bufio.Reader) (err error) {
        return nil
 }
 
-// Checks whether chunked is part of the encodings stack
-func chunked(te []string) bool { return len(te) > 0 && te[0] == "chunked" }
+// Checks whether chunked is the last part of the encodings stack
+func chunked(te []string) bool { return len(te) > 0 && te[len(te)-1] == "chunked" }
+
+// implicitlyChunked is a helper to check for implicity of chunked, because
+// RFC 7230 Section 3.3.1 says that the sender MUST apply chunked as the final
+// payload body to ensure that the message is framed for both the request
+// and the body. Since "identity" is incompatabile with any other transformational
+// encoding cannot co-exist, the presence of "identity" will cause implicitlyChunked
+// to return false.
+func implicitlyChunked(te []string) bool {
+       if len(te) == 0 { // No transfer-encodings passed in, so not implicity chunked.
+               return false
+       }
+       for _, tei := range te {
+               if tei == "identity" {
+                       return false
+               }
+       }
+       return true
+}
+
+func isGzipTransferEncoding(tei string) bool {
+       // RFC 7230 4.2.3 requests that "x-gzip" SHOULD be considered the same as "gzip".
+       return tei == "gzip" || tei == "x-gzip"
+}
+
+// Checks where either of "gzip" or "x-gzip" are contained in transfer encodings.
+func gzipped(te []string) bool {
+       for _, tei := range te {
+               if isGzipTransferEncoding(tei) {
+                       return true
+               }
+       }
+       return false
+}
 
 // Checks whether the encoding is explicitly "identity".
 func isIdentity(te []string) bool { return len(te) == 1 && te[0] == "identity" }
@@ -620,25 +697,47 @@ func (t *transferReader) fixTransferEncoding() error {
 
        encodings := strings.Split(raw[0], ",")
        te := make([]string, 0, len(encodings))
-       // TODO: Even though we only support "identity" and "chunked"
-       // encodings, the loop below is designed with foresight. One
-       // invariant that must be maintained is that, if present,
-       // chunked encoding must always come first.
-       for _, encoding := range encodings {
+
+       // When adding new encodings, please maintain the invariant:
+       //   if chunked encoding is present, it must always
+       //   come last and it must be applied only once.
+       // See RFC 7230 Section 3.3.1 Transfer-Encoding.
+       for i, encoding := range encodings {
                encoding = strings.ToLower(strings.TrimSpace(encoding))
-               // "identity" encoding is not recorded
+
                if encoding == "identity" {
+                       // "identity" should not be mixed with other transfer-encodings/compressions
+                       // because it means "no compression, no transformation".
+                       if len(encodings) != 1 {
+                               return &badStringError{`"identity" when present must be the only transfer encoding`, strings.Join(encodings, ",")}
+                       }
+                       // "identity" is not recorded.
                        break
                }
-               if encoding != "chunked" {
+
+               switch {
+               case encoding == "chunked":
+                       // "chunked" MUST ALWAYS be the last
+                       // encoding as per the  loop invariant.
+                       // That is:
+                       //     Invalid: [chunked, gzip]
+                       //     Valid:   [gzip, chunked]
+                       if i+1 != len(encodings) {
+                               return &badStringError{"chunked must be applied only once, as the last encoding", strings.Join(encodings, ",")}
+                       }
+                       // Supported otherwise.
+
+               case isGzipTransferEncoding(encoding):
+                       // Supported
+
+               default:
                        return &unsupportedTEError{fmt.Sprintf("unsupported transfer encoding: %q", encoding)}
                }
+
                te = te[0 : len(te)+1]
                te[len(te)-1] = encoding
        }
-       if len(te) > 1 {
-               return &badStringError{"too many transfer encodings", strings.Join(te, ",")}
-       }
+
        if len(te) > 0 {
                // RFC 7230 3.3.2 says "A sender MUST NOT send a
                // Content-Length header field in any message that
index 65009ee8bf7fd5c9cb97a04e393d194ccc964aa0..a8ce2d3709ab3d6ab37635a30b3e4faf156aabc8 100644 (file)
@@ -7,6 +7,7 @@ package http
 import (
        "bufio"
        "bytes"
+       "compress/gzip"
        "crypto/rand"
        "fmt"
        "io"
@@ -61,7 +62,6 @@ func TestFinalChunkedBodyReadEOF(t *testing.T) {
        buf := make([]byte, len(want))
        n, err := res.Body.Read(buf)
        if n != len(want) || err != io.EOF {
-               t.Logf("body = %#v", res.Body)
                t.Errorf("Read = %v, %v; want %d, EOF", n, err, len(want))
        }
        if string(buf) != want {
@@ -290,7 +290,7 @@ func TestFixTransferEncoding(t *testing.T) {
                },
                {
                        hdr:     Header{"Transfer-Encoding": {"chunked, chunked", "identity", "chunked"}},
-                       wantErr: &badStringError{"too many transfer encodings", "chunked,chunked"},
+                       wantErr: &badStringError{"chunked must be applied only once, as the last encoding", "chunked, chunked"},
                },
                {
                        hdr:     Header{"Transfer-Encoding": {"chunked"}},
@@ -310,3 +310,283 @@ func TestFixTransferEncoding(t *testing.T) {
                }
        }
 }
+
+func gzipIt(s string) string {
+       buf := new(bytes.Buffer)
+       gw := gzip.NewWriter(buf)
+       gw.Write([]byte(s))
+       gw.Close()
+       return buf.String()
+}
+
+func TestUnitTestProxyingReadCloserClosesBody(t *testing.T) {
+       var checker closeChecker
+       buf := new(bytes.Buffer)
+       buf.WriteString("Hello, Gophers!")
+       prc := &proxyingReadCloser{
+               Reader: buf,
+               Closer: &checker,
+       }
+       prc.Close()
+
+       read, err := ioutil.ReadAll(prc)
+       if err != nil {
+               t.Fatalf("Read error: %v", err)
+       }
+       if g, w := string(read), "Hello, Gophers!"; g != w {
+               t.Errorf("Read mismatch: got %q want %q", g, w)
+       }
+
+       if checker.closed != true {
+               t.Fatal("closeChecker.Close was never invoked")
+       }
+}
+
+func TestGzipTransferEncoding_request(t *testing.T) {
+       helloWorldGzipped := gzipIt("Hello, World!")
+
+       tests := []struct {
+               payload  string
+               wantErr  string
+               wantBody string
+       }{
+
+               {
+                       // The case of "chunked" properly applied as the last encoding
+                       // and a gzipped request payload that is streamed in 3 parts.
+                       payload: `POST / HTTP/1.1
+Host: golang.org
+Transfer-Encoding: gzip, chunked
+Content-Type: text/html; charset=UTF-8
+
+` + fmt.Sprintf("%02x\r\n%s\r\n%02x\r\n%s\r\n%02x\r\n%s\r\n0\r\n\r\n",
+                               3, helloWorldGzipped[:3],
+                               5, helloWorldGzipped[3:8],
+                               len(helloWorldGzipped)-8, helloWorldGzipped[8:]),
+                       wantBody: `Hello, World!`,
+               },
+
+               {
+                       // The request specifies "Transfer-Encoding: chunked" so its body must be left untouched.
+                       payload: `PUT / HTTP/1.1
+Host: golang.org
+Transfer-Encoding: chunked
+Connection: close
+Content-Type: text/html; charset=UTF-8
+
+` + fmt.Sprintf("%0x\r\n%s\r\n0\r\n\r\n", len(helloWorldGzipped), helloWorldGzipped),
+                       // We want that payload as it was sent.
+                       wantBody: helloWorldGzipped,
+               },
+
+               {
+                       // Valid request, the body doesn't have "Transfer-Encoding: chunked" but implicitly encoded
+                       // for chunking as per the advisory from RFC 7230 3.3.1 which advises for cases where.
+                       payload: `POST / HTTP/1.1
+Host: localhost
+Transfer-Encoding: gzip
+Content-Type: text/html; charset=UTF-8
+
+` + fmt.Sprintf("%0x\r\n%s\r\n0\r\n\r\n", len(helloWorldGzipped), helloWorldGzipped),
+                       wantBody: `Hello, World!`,
+               },
+
+               {
+                       // Invalid request, the body isn't chunked nor is the connection terminated immediately
+                       // hence invalid as per the advisory from RFC 7230 3.3.1 which advises for cases where
+                       // a Transfer-Encoding that isn't finally chunked is provided.
+                       payload: `PUT / HTTP/1.1
+Host: golang.org
+Transfer-Encoding: gzip
+Content-Length: 0
+Connection: close
+Content-Type: text/html; charset=UTF-8
+
+`,
+                       wantErr: `EOF`,
+               },
+
+               {
+                       // The case of chunked applied before another encoding.
+                       payload: `PUT / HTTP/1.1
+Location: golang.org
+Transfer-Encoding: chunked, gzip
+Content-Length: 0
+Connection: close
+Content-Type: text/html; charset=UTF-8
+
+`,
+                       wantErr: `chunked must be applied only once, as the last encoding "chunked, gzip"`,
+               },
+
+               {
+                       // The case of chunked properly applied as the
+                       // last encoding BUT with a bad "Content-Length".
+                       payload: `POST / HTTP/1.1
+Host: golang.org
+Transfer-Encoding: gzip, chunked
+Content-Length: 10
+Connection: close
+Content-Type: text/html; charset=UTF-8
+
+` + "0\r\n\r\n",
+                       wantErr: "EOF",
+               },
+       }
+
+       for i, tt := range tests {
+               req, err := ReadRequest(bufio.NewReader(strings.NewReader(tt.payload)))
+               if tt.wantErr != "" {
+                       if err == nil || !strings.Contains(err.Error(), tt.wantErr) {
+                               t.Errorf("test %d. Error mismatch\nGot:  %v\nWant: %s", i, err, tt.wantErr)
+                       }
+                       continue
+               }
+
+               if err != nil {
+                       t.Errorf("test %d. Unexpected ReadRequest error: %v\nPayload:\n%s", i, err, tt.payload)
+                       continue
+               }
+
+               got, err := ioutil.ReadAll(req.Body)
+               req.Body.Close()
+               if err != nil {
+                       t.Errorf("test %d. Failed to read response body: %v", i, err)
+               }
+               if g, w := string(got), tt.wantBody; g != w {
+                       t.Errorf("test %d. Request body mimsatch\nGot:\n%s\n\nWant:\n%s", i, g, w)
+               }
+       }
+}
+
+func TestGzipTransferEncoding_response(t *testing.T) {
+       helloWorldGzipped := gzipIt("Hello, World!")
+
+       tests := []struct {
+               payload  string
+               wantErr  string
+               wantBody string
+       }{
+
+               {
+                       // The case of "chunked" properly applied as the last encoding
+                       // and a gzipped payload that is streamed in 3 parts.
+                       payload: `HTTP/1.1 302 Found
+Location: https://golang.org/
+Transfer-Encoding: gzip, chunked
+Connection: close
+Content-Type: text/html; charset=UTF-8
+
+` + fmt.Sprintf("%02x\r\n%s\r\n%02x\r\n%s\r\n%02x\r\n%s\r\n0\r\n\r\n",
+                               3, helloWorldGzipped[:3],
+                               5, helloWorldGzipped[3:8],
+                               len(helloWorldGzipped)-8, helloWorldGzipped[8:]),
+                       wantBody: `Hello, World!`,
+               },
+
+               {
+                       // The response specifies "Transfer-Encoding: chunked" so response body must be left untouched.
+                       payload: `HTTP/1.1 302 Found
+Location: https://golang.org/
+Transfer-Encoding: chunked
+Connection: close
+Content-Type: text/html; charset=UTF-8
+
+` + fmt.Sprintf("%0x\r\n%s\r\n0\r\n\r\n", len(helloWorldGzipped), helloWorldGzipped),
+                       // We want that payload as it was sent.
+                       wantBody: helloWorldGzipped,
+               },
+
+               {
+                       // Valid response, the body doesn't have "Transfer-Encoding: chunked" but implicitly encoded
+                       // for chunking as per the advisory from RFC 7230 3.3.1 which advises for cases where.
+                       payload: `HTTP/1.1 302 Found
+Location: https://golang.org/
+Transfer-Encoding: gzip
+Connection: close
+Content-Type: text/html; charset=UTF-8
+
+` + fmt.Sprintf("%0x\r\n%s\r\n0\r\n\r\n", len(helloWorldGzipped), helloWorldGzipped),
+                       wantBody: `Hello, World!`,
+               },
+
+               {
+                       // Invalid response, the body isn't chunked nor is the connection terminated immediately
+                       // hence invalid as per the advisory from RFC 7230 3.3.1 which advises for cases where
+                       // a Transfer-Encoding that isn't finally chunked is provided.
+                       payload: `HTTP/1.1 302 Found
+Location: https://golang.org/
+Transfer-Encoding: gzip
+Content-Length: 0
+Connection: close
+Content-Type: text/html; charset=UTF-8
+
+`,
+                       wantErr: `EOF`,
+               },
+
+               {
+                       // The case of chunked applied before another encoding.
+                       payload: `HTTP/1.1 302 Found
+Location: https://golang.org/
+Transfer-Encoding: chunked, gzip
+Content-Length: 0
+Connection: close
+Content-Type: text/html; charset=UTF-8
+
+`,
+                       wantErr: `chunked must be applied only once, as the last encoding "chunked, gzip"`,
+               },
+
+               {
+                       // The case of chunked properly applied as the
+                       // last encoding BUT with a bad "Content-Length".
+                       payload: `HTTP/1.1 302 Found
+Location: https://golang.org/
+Transfer-Encoding: gzip, chunked
+Content-Length: 10
+Connection: close
+Content-Type: text/html; charset=UTF-8
+
+` + "0\r\n\r\n",
+                       wantErr: "EOF",
+               },
+
+               {
+                       // Including "identity" more than once.
+                       payload: `HTTP/1.1 200 OK
+Location: https://golang.org/
+Transfer-Encoding: identity, identity
+Content-Length: 0
+Connection: close
+Content-Type: text/html; charset=UTF-8
+
+` + "0\r\n\r\n",
+                       wantErr: `"identity" when present must be the only transfer encoding "identity, identity"`,
+               },
+       }
+
+       for i, tt := range tests {
+               res, err := ReadResponse(bufio.NewReader(strings.NewReader(tt.payload)), nil)
+               if tt.wantErr != "" {
+                       if err == nil || !strings.Contains(err.Error(), tt.wantErr) {
+                               t.Errorf("test %d. Error mismatch\nGot:  %v\nWant: %s", i, err, tt.wantErr)
+                       }
+                       continue
+               }
+
+               if err != nil {
+                       t.Errorf("test %d. Unexpected ReadResponse error: %v\nPayload:\n%s", i, err, tt.payload)
+                       continue
+               }
+
+               got, err := ioutil.ReadAll(res.Body)
+               res.Body.Close()
+               if err != nil {
+                       t.Errorf("test %d. Failed to read response body: %v", i, err)
+               }
+               if g, w := string(got), tt.wantBody; g != w {
+                       t.Errorf("test %d. Response body mimsatch\nGot:\n%s\n\nWant:\n%s", i, g, w)
+               }
+       }
+}