Merge branch 'chunkreaderimport' into v5-dev

query-exec-mode
Jack Christensen 2022-02-21 14:27:21 -06:00
commit 44375443e1
2 changed files with 232 additions and 0 deletions

104
chunkreader/chunkreader.go Normal file
View File

@ -0,0 +1,104 @@
// Package chunkreader provides an io.Reader wrapper that minimizes IO reads and memory allocations.
package chunkreader
import (
"io"
)
// ChunkReader is a io.Reader wrapper that minimizes IO reads and memory allocations. It allocates memory in chunks and
// will read as much as will fit in the current buffer in a single call regardless of how large a read is actually
// requested. The memory returned via Next is owned by the caller. This avoids the need for an additional copy.
//
// The downside of this approach is that a large buffer can be pinned in memory even if only a small slice is
// referenced. For example, an entire 4096 byte block could be pinned in memory by even a 1 byte slice. In these rare
// cases it would be advantageous to copy the bytes to another slice.
type ChunkReader struct {
r io.Reader
buf []byte
rp, wp int // buf read position and write position
config Config
}
// Config contains configuration parameters for ChunkReader.
type Config struct {
MinBufLen int // Minimum buffer length
}
// New creates and returns a new ChunkReader for r with default configuration.
func New(r io.Reader) *ChunkReader {
cr, err := NewConfig(r, Config{})
if err != nil {
panic("default config can't be bad")
}
return cr
}
// NewConfig creates and a new ChunkReader for r configured by config.
func NewConfig(r io.Reader, config Config) (*ChunkReader, error) {
if config.MinBufLen == 0 {
// By historical reasons Postgres currently has 8KB send buffer inside,
// so here we want to have at least the same size buffer.
// @see https://github.com/postgres/postgres/blob/249d64999615802752940e017ee5166e726bc7cd/src/backend/libpq/pqcomm.c#L134
// @see https://www.postgresql.org/message-id/0cdc5485-cb3c-5e16-4a46-e3b2f7a41322%40ya.ru
config.MinBufLen = 8192
}
return &ChunkReader{
r: r,
buf: make([]byte, config.MinBufLen),
config: config,
}, nil
}
// Next returns buf filled with the next n bytes. The caller gains ownership of buf. It is not necessary to make a copy
// of buf. If an error occurs, buf will be nil.
func (r *ChunkReader) Next(n int) (buf []byte, err error) {
// n bytes already in buf
if (r.wp - r.rp) >= n {
buf = r.buf[r.rp : r.rp+n]
r.rp += n
return buf, err
}
// available space in buf is less than n
if len(r.buf) < n {
r.copyBufContents(r.newBuf(n))
}
// buf is large enough, but need to shift filled area to start to make enough contiguous space
minReadCount := n - (r.wp - r.rp)
if (len(r.buf) - r.wp) < minReadCount {
newBuf := r.newBuf(n)
r.copyBufContents(newBuf)
}
if err := r.appendAtLeast(minReadCount); err != nil {
return nil, err
}
buf = r.buf[r.rp : r.rp+n]
r.rp += n
return buf, nil
}
func (r *ChunkReader) appendAtLeast(fillLen int) error {
n, err := io.ReadAtLeast(r.r, r.buf[r.wp:], fillLen)
r.wp += n
return err
}
func (r *ChunkReader) newBuf(size int) []byte {
if size < r.config.MinBufLen {
size = r.config.MinBufLen
}
return make([]byte, size)
}
func (r *ChunkReader) copyBufContents(dest []byte) {
r.wp = copy(dest, r.buf[r.rp:r.wp])
r.rp = 0
r.buf = dest
}

View File

@ -0,0 +1,128 @@
package chunkreader
import (
"bytes"
"math/rand"
"testing"
)
func TestChunkReaderNextDoesNotReadIfAlreadyBuffered(t *testing.T) {
server := &bytes.Buffer{}
r, err := NewConfig(server, Config{MinBufLen: 4})
if err != nil {
t.Fatal(err)
}
src := []byte{1, 2, 3, 4}
server.Write(src)
n1, err := r.Next(2)
if err != nil {
t.Fatal(err)
}
if bytes.Compare(n1, src[0:2]) != 0 {
t.Fatalf("Expected read bytes to be %v, but they were %v", src[0:2], n1)
}
n2, err := r.Next(2)
if err != nil {
t.Fatal(err)
}
if bytes.Compare(n2, src[2:4]) != 0 {
t.Fatalf("Expected read bytes to be %v, but they were %v", src[2:4], n2)
}
if bytes.Compare(r.buf, src) != 0 {
t.Fatalf("Expected r.buf to be %v, but it was %v", src, r.buf)
}
if r.rp != 4 {
t.Fatalf("Expected r.rp to be %v, but it was %v", 4, r.rp)
}
if r.wp != 4 {
t.Fatalf("Expected r.wp to be %v, but it was %v", 4, r.wp)
}
}
func TestChunkReaderNextExpandsBufAsNeeded(t *testing.T) {
server := &bytes.Buffer{}
r, err := NewConfig(server, Config{MinBufLen: 4})
if err != nil {
t.Fatal(err)
}
src := []byte{1, 2, 3, 4, 5, 6, 7, 8}
server.Write(src)
n1, err := r.Next(5)
if err != nil {
t.Fatal(err)
}
if bytes.Compare(n1, src[0:5]) != 0 {
t.Fatalf("Expected read bytes to be %v, but they were %v", src[0:5], n1)
}
if len(r.buf) != 5 {
t.Fatalf("Expected len(r.buf) to be %v, but it was %v", 5, len(r.buf))
}
}
func TestChunkReaderDoesNotReuseBuf(t *testing.T) {
server := &bytes.Buffer{}
r, err := NewConfig(server, Config{MinBufLen: 4})
if err != nil {
t.Fatal(err)
}
src := []byte{1, 2, 3, 4, 5, 6, 7, 8}
server.Write(src)
n1, err := r.Next(4)
if err != nil {
t.Fatal(err)
}
if bytes.Compare(n1, src[0:4]) != 0 {
t.Fatalf("Expected read bytes to be %v, but they were %v", src[0:4], n1)
}
n2, err := r.Next(4)
if err != nil {
t.Fatal(err)
}
if bytes.Compare(n2, src[4:8]) != 0 {
t.Fatalf("Expected read bytes to be %v, but they were %v", src[4:8], n2)
}
if bytes.Compare(n1, src[0:4]) != 0 {
t.Fatalf("Expected KeepLast to prevent Next from overwriting buf, expected %v but it was %v", src[0:4], n1)
}
}
type randomReader struct {
rnd *rand.Rand
}
// Read reads a random number of random bytes.
func (r *randomReader) Read(p []byte) (n int, err error) {
n = r.rnd.Intn(len(p) + 1)
return r.rnd.Read(p[:n])
}
func TestChunkReaderNextFuzz(t *testing.T) {
rr := &randomReader{rnd: rand.New(rand.NewSource(1))}
r, err := NewConfig(rr, Config{MinBufLen: 8192})
if err != nil {
t.Fatal(err)
}
randomSizes := rand.New(rand.NewSource(0))
for i := 0; i < 100000; i++ {
size := randomSizes.Intn(16384) + 1
buf, err := r.Next(size)
if err != nil {
t.Fatal(err)
}
if len(buf) != size {
t.Fatalf("Expected to get %v bytes but got %v bytes", size, len(buf))
}
}
}