hstore: Make binary parsing 2X faster

* use []string for value string pointers: one allocation instead of
  one per value.
* use one string for all key/value pairs, instead of one for each.

After this change, one Hstore will share two allocations: one string
and one []string. The disadvantage is that it cannot be deallocated
until all key/value pairs are unused. This means if an application
takes a single key or value from the Hstore and holds on to it, its
memory footprint will increase. I would guess this is an unlikely
problem, but it is possible.

The benchstat results from my M1 Max are below.

goos: darwin
goarch: arm64
pkg: github.com/jackc/pgx/v5/pgtype
                               │   orig.txt   │               new.txt               │
                               │    sec/op    │   sec/op     vs base                │
HstoreScan/databasesql.Scan-10    82.11µ ± 1%   82.66µ ± 2%        ~ (p=0.436 n=10)
HstoreScan/text-10                83.30µ ± 1%   84.24µ ± 3%        ~ (p=0.165 n=10)
HstoreScan/binary-10             15.987µ ± 2%   7.459µ ± 6%  -53.35% (p=0.000 n=10)
geomean                           47.82µ        37.31µ       -21.98%

                               │   orig.txt   │               new.txt               │
                               │     B/op     │     B/op      vs base               │
HstoreScan/databasesql.Scan-10   56.23Ki ± 0%   56.23Ki ± 0%       ~ (p=0.324 n=10)
HstoreScan/text-10               65.12Ki ± 0%   65.12Ki ± 0%       ~ (p=0.675 n=10)
HstoreScan/binary-10             21.09Ki ± 0%   20.73Ki ± 0%  -1.70% (p=0.000 n=10)
geomean                          42.58Ki        42.34Ki       -0.57%

                               │  orig.txt   │               new.txt                │
                               │  allocs/op  │ allocs/op   vs base                  │
HstoreScan/databasesql.Scan-10    744.0 ± 0%   744.0 ± 0%        ~ (p=1.000 n=10) ¹
HstoreScan/text-10                743.0 ± 0%   743.0 ± 0%        ~ (p=1.000 n=10) ¹
HstoreScan/binary-10             464.00 ± 0%   41.00 ± 0%  -91.16% (p=0.000 n=10)
geomean                           635.4        283.0       -55.46%
¹ all samples are equal
pull/1648/head
Evan Jones 2023-06-15 11:24:42 -04:00 committed by Jack Christensen
parent 07670dddca
commit 2de94187f5
1 changed files with 13 additions and 17 deletions

View File

@ -183,47 +183,43 @@ func (scanPlanBinaryHstoreToHstoreScanner) Scan(src []byte, dst any) error {
rp := 0
if len(src[rp:]) < 4 {
const uint32Len = 4
if len(src[rp:]) < uint32Len {
return fmt.Errorf("hstore incomplete %v", src)
}
pairCount := int(int32(binary.BigEndian.Uint32(src[rp:])))
rp += 4
rp += uint32Len
hstore := make(Hstore, pairCount)
// one allocation for all strings, rather than one per string
valueStrings := make([]string, pairCount)
// one shared string for all key/value strings
keyValueString := string(src[rp:])
for i := 0; i < pairCount; i++ {
if len(src[rp:]) < 4 {
if len(src[rp:]) < uint32Len {
return fmt.Errorf("hstore incomplete %v", src)
}
keyLen := int(int32(binary.BigEndian.Uint32(src[rp:])))
rp += 4
rp += uint32Len
if len(src[rp:]) < keyLen {
return fmt.Errorf("hstore incomplete %v", src)
}
key := string(src[rp : rp+keyLen])
key := string(keyValueString[rp-uint32Len : rp-uint32Len+keyLen])
rp += keyLen
if len(src[rp:]) < 4 {
if len(src[rp:]) < uint32Len {
return fmt.Errorf("hstore incomplete %v", src)
}
valueLen := int(int32(binary.BigEndian.Uint32(src[rp:])))
rp += 4
var valueBuf []byte
if valueLen >= 0 {
valueBuf = src[rp : rp+valueLen]
valueStrings[i] = string(keyValueString[rp-uint32Len : rp-uint32Len+valueLen])
rp += valueLen
}
var value Text
err := scanPlanTextAnyToTextScanner{}.Scan(valueBuf, &value)
if err != nil {
return err
}
if value.Valid {
hstore[key] = &value.String
hstore[key] = &valueStrings[i]
} else {
hstore[key] = nil
}