The existing implementation allocates a new 4KB buffer each time it opens flate-encoded file in a zip archive. This commit allows the flate reader to reuse the buffer on call Reset instead of allocating a new one.
It is noticeable when a zip archive contains a huge amount of files, e.g. zip archive has 50_000 files, for each file 4KB buffer is allocated, so it is 200MB memory allocations. If files are read sequentially only one buffer is needed.
Fixes #59774
Change-Id: Ib16336b101ba58e8f0f30a45dc5fd4eeebc801a1
GitHub-Last-Rev:
f3f395b2ad95b7ad7ce9df6f5e49c7b6a0627627
GitHub-Pull-Request: golang/go#59775
Reviewed-on: https://go-review.googlesource.com/c/go/+/487675
Run-TryBot: Ian Lance Taylor <iant@golang.org>
Auto-Submit: Ian Lance Taylor <iant@google.com>
Run-TryBot: Ian Lance Taylor <iant@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@google.com>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
for i := 0; i < nFiles; i++ {
_, err := w.CreateHeader(&FileHeader{
Name: fmt.Sprintf("%d.dat", i),
- Method: Store, // avoid Issue 6136 and Issue 6138
+ Method: Store, // Deflate is too slow when it is compiled with -race flag
})
if err != nil {
t.Fatalf("creating file %d: %v", i, err)
type decompressor struct {
// Input source.
r Reader
+ rBuf *bufio.Reader // created if provided io.Reader does not implement io.ByteReader
roffset int64
// Input bits, in top of b.
}
}
-func makeReader(r io.Reader) Reader {
+func (f *decompressor) makeReader(r io.Reader) {
if rr, ok := r.(Reader); ok {
- return rr
+ f.rBuf = nil
+ f.r = rr
+ return
+ }
+ // Reuse rBuf if possible. Invariant: rBuf is always created (and owned) by decompressor.
+ if f.rBuf != nil {
+ f.rBuf.Reset(r)
+ } else {
+ // bufio.NewReader will not return r, as r does not implement flate.Reader, so it is not bufio.Reader.
+ f.rBuf = bufio.NewReader(r)
}
- return bufio.NewReader(r)
+ f.r = f.rBuf
}
func fixedHuffmanDecoderInit() {
func (f *decompressor) Reset(r io.Reader, dict []byte) error {
*f = decompressor{
- r: makeReader(r),
+ rBuf: f.rBuf,
bits: f.bits,
codebits: f.codebits,
dict: f.dict,
step: (*decompressor).nextBlock,
}
+ f.makeReader(r)
f.dict.init(maxMatchOffset, dict)
return nil
}
fixedHuffmanDecoderInit()
var f decompressor
- f.r = makeReader(r)
+ f.makeReader(r)
f.bits = new([maxNumLit + maxNumDist]int)
f.codebits = new([numCodes]int)
f.step = (*decompressor).nextBlock
fixedHuffmanDecoderInit()
var f decompressor
- f.r = makeReader(r)
+ f.makeReader(r)
f.bits = new([maxNumLit + maxNumDist]int)
f.codebits = new([numCodes]int)
f.step = (*decompressor).nextBlock
package flate
import (
+ "bufio"
"bytes"
"io"
"strings"
}
}
}
+
+func TestReaderReusesReaderBuffer(t *testing.T) {
+ encodedReader := bytes.NewReader([]byte{})
+ encodedNotByteReader := struct{ io.Reader }{encodedReader}
+
+ t.Run("BufferIsReused", func(t *testing.T) {
+ f := NewReader(encodedNotByteReader).(*decompressor)
+ bufioR, ok := f.r.(*bufio.Reader)
+ if !ok {
+ t.Fatalf("bufio.Reader should be created")
+ }
+ f.Reset(encodedNotByteReader, nil)
+ if bufioR != f.r {
+ t.Fatalf("bufio.Reader was not reused")
+ }
+ })
+ t.Run("BufferIsNotReusedWhenGotByteReader", func(t *testing.T) {
+ f := NewReader(encodedNotByteReader).(*decompressor)
+ if _, ok := f.r.(*bufio.Reader); !ok {
+ t.Fatalf("bufio.Reader should be created")
+ }
+ f.Reset(encodedReader, nil)
+ if f.r != encodedReader {
+ t.Fatalf("provided io.ByteReader should be used directly")
+ }
+ })
+ t.Run("BufferIsCreatedAfterByteReader", func(t *testing.T) {
+ for i, r := range []io.Reader{encodedReader, bufio.NewReader(encodedReader)} {
+ f := NewReader(r).(*decompressor)
+ if f.r != r {
+ t.Fatalf("provided io.ByteReader should be used directly, i=%d", i)
+ }
+ f.Reset(encodedNotByteReader, nil)
+ if _, ok := f.r.(*bufio.Reader); !ok {
+ t.Fatalf("bufio.Reader should be created, i=%d", i)
+ }
+ }
+ })
+}