do not share the original input string

This allows the original input string to be garbage collected, so it
should not change the memory footprint. This is a slower than the
version that shares a string, but only a small amount. It is still
faster than binary parsing (until that is optimized).

benchstat difference of original versus this version:

                               │  orig.txt   │     new-do-not-share-string.txt     │
                               │   sec/op    │   sec/op     vs base                │
HstoreScan/databasesql.Scan-10   82.11µ ± 1%   14.24µ ± 2%  -82.66% (p=0.000 n=10)
HstoreScan/text-10               83.30µ ± 1%   14.97µ ± 1%  -82.03% (p=0.000 n=10)
HstoreScan/binary-10             15.99µ ± 2%   15.80µ ± 0%   -1.16% (p=0.024 n=10)
geomean                          47.82µ        14.99µ       -68.66%

                               │   orig.txt   │     new-do-not-share-string.txt      │
                               │     B/op     │     B/op      vs base                │
HstoreScan/databasesql.Scan-10   56.23Ki ± 0%   20.11Ki ± 0%  -64.24% (p=0.000 n=10)
HstoreScan/text-10               65.12Ki ± 0%   29.00Ki ± 0%  -55.47% (p=0.000 n=10)
HstoreScan/binary-10             21.09Ki ± 0%   21.09Ki ± 0%        ~ (p=0.722 n=10)
geomean                          42.58Ki        23.08Ki       -45.80%

                               │  orig.txt  │     new-do-not-share-string.txt      │
                               │ allocs/op  │ allocs/op   vs base                  │
HstoreScan/databasesql.Scan-10   744.0 ± 0%   340.0 ± 0%  -54.30% (p=0.000 n=10)
HstoreScan/text-10               743.0 ± 0%   339.0 ± 0%  -54.37% (p=0.000 n=10)
HstoreScan/binary-10             464.0 ± 0%   464.0 ± 0%        ~ (p=1.000 n=10) ¹
geomean                          635.4        376.8       -40.70%
¹ all samples are equal

benchstat difference of the shared string versus not:

                               │ new-share-string.txt │     new-do-not-share-string.txt     │
                               │        sec/op        │   sec/op     vs base                │
HstoreScan/databasesql.Scan-10            10.57µ ± 2%   14.24µ ± 2%  +34.69% (p=0.000 n=10)
HstoreScan/text-10                        11.60µ ± 2%   14.97µ ± 1%  +29.03% (p=0.000 n=10)
HstoreScan/binary-10                      15.87µ ± 2%   15.80µ ± 0%        ~ (p=0.280 n=10)
geomean                                   12.48µ        14.99µ       +20.07%

                               │ new-share-string.txt │     new-do-not-share-string.txt      │
                               │         B/op         │     B/op      vs base                │
HstoreScan/databasesql.Scan-10           11.68Ki ± 0%   20.11Ki ± 0%  +72.17% (p=0.000 n=10)
HstoreScan/text-10                       20.58Ki ± 0%   29.00Ki ± 0%  +40.93% (p=0.000 n=10)
HstoreScan/binary-10                     21.08Ki ± 0%   21.09Ki ± 0%        ~ (p=0.427 n=10)
geomean                                  17.17Ki        23.08Ki       +34.39%

                               │ new-share-string.txt │      new-do-not-share-string.txt       │
                               │      allocs/op       │  allocs/op   vs base                   │
HstoreScan/databasesql.Scan-10             44.00 ± 0%   340.00 ± 0%  +672.73% (p=0.000 n=10)
HstoreScan/text-10                         44.00 ± 0%   339.00 ± 0%  +670.45% (p=0.000 n=10)
HstoreScan/binary-10                       464.0 ± 0%    464.0 ± 0%         ~ (p=1.000 n=10) ¹
geomean                                    96.49         376.8       +290.47%
pull/1648/head
Evan Jones 2023-06-16 15:37:40 -04:00 committed by Jack Christensen
parent d48d36dc02
commit 07670dddca
1 changed files with 6 additions and 4 deletions

View File

@ -332,7 +332,7 @@ func (p *hstoreParser) consumeExpected2(one byte, two byte) error {
var errEOSInQuoted = errors.New(`found end before closing double-quote ('"')`)
// consumeDoubleQuoted consumes a double-quoted string from p. The double quote must have been
// parsed already.
// parsed already. This copies the string from the backing string so it can be garbage collected.
func (p *hstoreParser) consumeDoubleQuoted() (string, error) {
// fast path: assume most keys/values do not contain escapes
nextDoubleQuote := strings.IndexByte(p.str[p.pos:], '"')
@ -341,8 +341,9 @@ func (p *hstoreParser) consumeDoubleQuoted() (string, error) {
}
nextDoubleQuote += p.pos
if p.nextBackslash == -1 || p.nextBackslash > nextDoubleQuote {
// no escapes in this string
s := p.str[p.pos:nextDoubleQuote]
// clone the string from the source string to ensure it can be garbage collected separately
// TODO: use strings.Clone on Go 1.20; this could get optimized away
s := strings.Clone(p.str[p.pos:nextDoubleQuote])
p.pos = nextDoubleQuote + 1
return s, nil
}
@ -357,7 +358,8 @@ func (p *hstoreParser) consumeDoubleQuoted() (string, error) {
}
// consumeDoubleQuotedWithEscapes consumes a double-quoted string containing escapes, starting
// at p.pos, and with the first backslash at firstBackslash.
// at p.pos, and with the first backslash at firstBackslash. This copies the string so it can be
// garbage collected separately.
func (p *hstoreParser) consumeDoubleQuotedWithEscapes(firstBackslash int) (string, error) {
// copy the prefix that does not contain backslashes
var builder strings.Builder