diff --git a/chunkreader/chunkreader.go b/chunkreader/chunkreader.go new file mode 100644 index 00000000..afea1c52 --- /dev/null +++ b/chunkreader/chunkreader.go @@ -0,0 +1,104 @@ +// Package chunkreader provides an io.Reader wrapper that minimizes IO reads and memory allocations. +package chunkreader + +import ( + "io" +) + +// ChunkReader is a io.Reader wrapper that minimizes IO reads and memory allocations. It allocates memory in chunks and +// will read as much as will fit in the current buffer in a single call regardless of how large a read is actually +// requested. The memory returned via Next is owned by the caller. This avoids the need for an additional copy. +// +// The downside of this approach is that a large buffer can be pinned in memory even if only a small slice is +// referenced. For example, an entire 4096 byte block could be pinned in memory by even a 1 byte slice. In these rare +// cases it would be advantageous to copy the bytes to another slice. +type ChunkReader struct { + r io.Reader + + buf []byte + rp, wp int // buf read position and write position + + config Config +} + +// Config contains configuration parameters for ChunkReader. +type Config struct { + MinBufLen int // Minimum buffer length +} + +// New creates and returns a new ChunkReader for r with default configuration. +func New(r io.Reader) *ChunkReader { + cr, err := NewConfig(r, Config{}) + if err != nil { + panic("default config can't be bad") + } + + return cr +} + +// NewConfig creates and a new ChunkReader for r configured by config. +func NewConfig(r io.Reader, config Config) (*ChunkReader, error) { + if config.MinBufLen == 0 { + // By historical reasons Postgres currently has 8KB send buffer inside, + // so here we want to have at least the same size buffer. + // @see https://github.com/postgres/postgres/blob/249d64999615802752940e017ee5166e726bc7cd/src/backend/libpq/pqcomm.c#L134 + // @see https://www.postgresql.org/message-id/0cdc5485-cb3c-5e16-4a46-e3b2f7a41322%40ya.ru + config.MinBufLen = 8192 + } + + return &ChunkReader{ + r: r, + buf: make([]byte, config.MinBufLen), + config: config, + }, nil +} + +// Next returns buf filled with the next n bytes. The caller gains ownership of buf. It is not necessary to make a copy +// of buf. If an error occurs, buf will be nil. +func (r *ChunkReader) Next(n int) (buf []byte, err error) { + // n bytes already in buf + if (r.wp - r.rp) >= n { + buf = r.buf[r.rp : r.rp+n] + r.rp += n + return buf, err + } + + // available space in buf is less than n + if len(r.buf) < n { + r.copyBufContents(r.newBuf(n)) + } + + // buf is large enough, but need to shift filled area to start to make enough contiguous space + minReadCount := n - (r.wp - r.rp) + if (len(r.buf) - r.wp) < minReadCount { + newBuf := r.newBuf(n) + r.copyBufContents(newBuf) + } + + if err := r.appendAtLeast(minReadCount); err != nil { + return nil, err + } + + buf = r.buf[r.rp : r.rp+n] + r.rp += n + return buf, nil +} + +func (r *ChunkReader) appendAtLeast(fillLen int) error { + n, err := io.ReadAtLeast(r.r, r.buf[r.wp:], fillLen) + r.wp += n + return err +} + +func (r *ChunkReader) newBuf(size int) []byte { + if size < r.config.MinBufLen { + size = r.config.MinBufLen + } + return make([]byte, size) +} + +func (r *ChunkReader) copyBufContents(dest []byte) { + r.wp = copy(dest, r.buf[r.rp:r.wp]) + r.rp = 0 + r.buf = dest +} diff --git a/chunkreader/chunkreader_test.go b/chunkreader/chunkreader_test.go new file mode 100644 index 00000000..ddc2fbf6 --- /dev/null +++ b/chunkreader/chunkreader_test.go @@ -0,0 +1,128 @@ +package chunkreader + +import ( + "bytes" + "math/rand" + "testing" +) + +func TestChunkReaderNextDoesNotReadIfAlreadyBuffered(t *testing.T) { + server := &bytes.Buffer{} + r, err := NewConfig(server, Config{MinBufLen: 4}) + if err != nil { + t.Fatal(err) + } + + src := []byte{1, 2, 3, 4} + server.Write(src) + + n1, err := r.Next(2) + if err != nil { + t.Fatal(err) + } + if bytes.Compare(n1, src[0:2]) != 0 { + t.Fatalf("Expected read bytes to be %v, but they were %v", src[0:2], n1) + } + + n2, err := r.Next(2) + if err != nil { + t.Fatal(err) + } + if bytes.Compare(n2, src[2:4]) != 0 { + t.Fatalf("Expected read bytes to be %v, but they were %v", src[2:4], n2) + } + + if bytes.Compare(r.buf, src) != 0 { + t.Fatalf("Expected r.buf to be %v, but it was %v", src, r.buf) + } + if r.rp != 4 { + t.Fatalf("Expected r.rp to be %v, but it was %v", 4, r.rp) + } + if r.wp != 4 { + t.Fatalf("Expected r.wp to be %v, but it was %v", 4, r.wp) + } +} + +func TestChunkReaderNextExpandsBufAsNeeded(t *testing.T) { + server := &bytes.Buffer{} + r, err := NewConfig(server, Config{MinBufLen: 4}) + if err != nil { + t.Fatal(err) + } + + src := []byte{1, 2, 3, 4, 5, 6, 7, 8} + server.Write(src) + + n1, err := r.Next(5) + if err != nil { + t.Fatal(err) + } + if bytes.Compare(n1, src[0:5]) != 0 { + t.Fatalf("Expected read bytes to be %v, but they were %v", src[0:5], n1) + } + if len(r.buf) != 5 { + t.Fatalf("Expected len(r.buf) to be %v, but it was %v", 5, len(r.buf)) + } +} + +func TestChunkReaderDoesNotReuseBuf(t *testing.T) { + server := &bytes.Buffer{} + r, err := NewConfig(server, Config{MinBufLen: 4}) + if err != nil { + t.Fatal(err) + } + + src := []byte{1, 2, 3, 4, 5, 6, 7, 8} + server.Write(src) + + n1, err := r.Next(4) + if err != nil { + t.Fatal(err) + } + if bytes.Compare(n1, src[0:4]) != 0 { + t.Fatalf("Expected read bytes to be %v, but they were %v", src[0:4], n1) + } + + n2, err := r.Next(4) + if err != nil { + t.Fatal(err) + } + if bytes.Compare(n2, src[4:8]) != 0 { + t.Fatalf("Expected read bytes to be %v, but they were %v", src[4:8], n2) + } + + if bytes.Compare(n1, src[0:4]) != 0 { + t.Fatalf("Expected KeepLast to prevent Next from overwriting buf, expected %v but it was %v", src[0:4], n1) + } +} + +type randomReader struct { + rnd *rand.Rand +} + +// Read reads a random number of random bytes. +func (r *randomReader) Read(p []byte) (n int, err error) { + n = r.rnd.Intn(len(p) + 1) + return r.rnd.Read(p[:n]) +} + +func TestChunkReaderNextFuzz(t *testing.T) { + rr := &randomReader{rnd: rand.New(rand.NewSource(1))} + r, err := NewConfig(rr, Config{MinBufLen: 8192}) + if err != nil { + t.Fatal(err) + } + + randomSizes := rand.New(rand.NewSource(0)) + + for i := 0; i < 100000; i++ { + size := randomSizes.Intn(16384) + 1 + buf, err := r.Next(size) + if err != nil { + t.Fatal(err) + } + if len(buf) != size { + t.Fatalf("Expected to get %v bytes but got %v bytes", size, len(buf)) + } + } +}