pgtype/hstore: Avoid Postgres Mac OS X parsing bug

Postgres on Mac OS X has a bug in how it parses hstore text values
that causes it to misinterpret some Unicode values as spaces. This
causes values sent by pgx to be misinterpreted. To avoid this, always
quote hstore values, which is how Postgres serializes them itself.
The test change fails on Mac OS X without this fix.

While I suspect this should not be performance critical for any
application, I added a quick benchmark to test the performance of the
encoding. This change actually makes encoding slightly faster on my
M1 Pro. The output from the benchstat program on this banchmark is:

goos: darwin
goarch: arm64
pkg: github.com/jackc/pgx/v5/pgtype
                          │   orig.txt   │           new-quotes.txt            │
                          │    sec/op    │   sec/op     vs base                │
HstoreSerialize/text-10      207.1n ± 0%   142.3n ± 1%  -31.31% (p=0.000 n=10)
HstoreSerialize/binary-10   100.10n ± 0%   99.64n ± 1%   -0.45% (p=0.013 n=10)
geomean                      144.0n        119.1n       -17.31%

I have also attempted to fix the Postgres bug, but it will take a
long time for this fix to get upstream:

https://www.postgresql.org/message-id/CA%2BHWA9awUW0%2BRV_gO9r1ABZwGoZxPztcJxPy8vMFSTbTfi4jig%40mail.gmail.com
pull/1633/head^2
Evan Jones 2023-06-07 11:58:05 -04:00 committed by Jack Christensen
parent d9560c78b8
commit ee04d4a74d
2 changed files with 42 additions and 16 deletions

View File

@ -6,7 +6,6 @@ import (
"encoding/binary"
"errors"
"fmt"
"strings"
"unicode"
"unicode/utf8"
@ -137,13 +136,20 @@ func (encodePlanHstoreCodecText) Encode(value any, buf []byte) (newBuf []byte, e
buf = append(buf, ',')
}
buf = append(buf, quoteHstoreElementIfNeeded(k)...)
// unconditionally quote hstore keys/values like Postgres does
// this avoids a Mac OS X Postgres hstore parsing bug:
// https://www.postgresql.org/message-id/CA%2BHWA9awUW0%2BRV_gO9r1ABZwGoZxPztcJxPy8vMFSTbTfi4jig%40mail.gmail.com
buf = append(buf, '"')
buf = append(buf, quoteArrayReplacer.Replace(k)...)
buf = append(buf, '"')
buf = append(buf, "=>"...)
if v == nil {
buf = append(buf, "NULL"...)
} else {
buf = append(buf, quoteHstoreElementIfNeeded(*v)...)
buf = append(buf, '"')
buf = append(buf, quoteArrayReplacer.Replace(*v)...)
buf = append(buf, '"')
}
}
@ -271,19 +277,6 @@ func (c HstoreCodec) DecodeValue(m *Map, oid uint32, format int16, src []byte) (
return hstore, nil
}
func quoteHstoreElementIfNeeded(src string) string {
// Double-quote keys and values that include whitespace, commas, =s or >s. To include a double
// quote or a backslash in a key or value, escape it with a backslash.
// From: https://www.postgresql.org/docs/current/hstore.html
// whitespace appears to be defined as the isspace() C function: \t\n\v\f\r\n and space
const quoteRequiredChars = `,"\=> ` + "\t\n\v\f\r"
if src == "" || (len(src) == 4 && strings.ToLower(src) == "null") || strings.ContainsAny(src, quoteRequiredChars) {
return quoteArrayElement(src)
}
return src
}
const (
hsPre = iota
hsKey

View File

@ -129,6 +129,9 @@ func TestHstoreCodec(t *testing.T) {
"form\\ffeed",
"carriage\rreturn",
"curly{}braces",
// Postgres on Mac OS X hstore parsing bug:
// ą = "\xc4\x85" in UTF-8; isspace(0x85) on Mac OS X returns true instead of false
"mac_bugą",
}
for _, s := range specialStrings {
// Special key values
@ -255,3 +258,33 @@ func TestParseInvalidInputs(t *testing.T) {
}
}
}
func BenchmarkHstoreEncode(b *testing.B) {
stringPtr := func(s string) *string {
return &s
}
h := pgtype.Hstore{"a x": stringPtr("100"), "b": stringPtr("200"), "c": stringPtr("300"),
"d": stringPtr("400"), "e": stringPtr("500")}
serializeConfigs := []struct {
name string
encodePlan pgtype.EncodePlan
}{
{"text", pgtype.HstoreCodec{}.PlanEncode(nil, 0, pgtype.TextFormatCode, h)},
{"binary", pgtype.HstoreCodec{}.PlanEncode(nil, 0, pgtype.BinaryFormatCode, h)},
}
for _, serializeConfig := range serializeConfigs {
var buf []byte
b.Run(serializeConfig.name, func(b *testing.B) {
for i := 0; i < b.N; i++ {
var err error
buf, err = serializeConfig.encodePlan.Encode(h, buf)
if err != nil {
b.Fatal(err)
}
buf = buf[:0]
}
})
}
}