mirror of https://github.com/jackc/pgx.git
pgtype/hstore: Avoid Postgres Mac OS X parsing bug
Postgres on Mac OS X has a bug in how it parses hstore text values that causes it to misinterpret some Unicode values as spaces. This causes values sent by pgx to be misinterpreted. To avoid this, always quote hstore values, which is how Postgres serializes them itself. The test change fails on Mac OS X without this fix. While I suspect this should not be performance critical for any application, I added a quick benchmark to test the performance of the encoding. This change actually makes encoding slightly faster on my M1 Pro. The output from the benchstat program on this banchmark is: goos: darwin goarch: arm64 pkg: github.com/jackc/pgx/v5/pgtype │ orig.txt │ new-quotes.txt │ │ sec/op │ sec/op vs base │ HstoreSerialize/text-10 207.1n ± 0% 142.3n ± 1% -31.31% (p=0.000 n=10) HstoreSerialize/binary-10 100.10n ± 0% 99.64n ± 1% -0.45% (p=0.013 n=10) geomean 144.0n 119.1n -17.31% I have also attempted to fix the Postgres bug, but it will take a long time for this fix to get upstream: https://www.postgresql.org/message-id/CA%2BHWA9awUW0%2BRV_gO9r1ABZwGoZxPztcJxPy8vMFSTbTfi4jig%40mail.gmail.compull/1633/head^2
parent
d9560c78b8
commit
ee04d4a74d
|
@ -6,7 +6,6 @@ import (
|
|||
"encoding/binary"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
|
||||
|
@ -137,13 +136,20 @@ func (encodePlanHstoreCodecText) Encode(value any, buf []byte) (newBuf []byte, e
|
|||
buf = append(buf, ',')
|
||||
}
|
||||
|
||||
buf = append(buf, quoteHstoreElementIfNeeded(k)...)
|
||||
// unconditionally quote hstore keys/values like Postgres does
|
||||
// this avoids a Mac OS X Postgres hstore parsing bug:
|
||||
// https://www.postgresql.org/message-id/CA%2BHWA9awUW0%2BRV_gO9r1ABZwGoZxPztcJxPy8vMFSTbTfi4jig%40mail.gmail.com
|
||||
buf = append(buf, '"')
|
||||
buf = append(buf, quoteArrayReplacer.Replace(k)...)
|
||||
buf = append(buf, '"')
|
||||
buf = append(buf, "=>"...)
|
||||
|
||||
if v == nil {
|
||||
buf = append(buf, "NULL"...)
|
||||
} else {
|
||||
buf = append(buf, quoteHstoreElementIfNeeded(*v)...)
|
||||
buf = append(buf, '"')
|
||||
buf = append(buf, quoteArrayReplacer.Replace(*v)...)
|
||||
buf = append(buf, '"')
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -271,19 +277,6 @@ func (c HstoreCodec) DecodeValue(m *Map, oid uint32, format int16, src []byte) (
|
|||
return hstore, nil
|
||||
}
|
||||
|
||||
func quoteHstoreElementIfNeeded(src string) string {
|
||||
// Double-quote keys and values that include whitespace, commas, =s or >s. To include a double
|
||||
// quote or a backslash in a key or value, escape it with a backslash.
|
||||
// From: https://www.postgresql.org/docs/current/hstore.html
|
||||
// whitespace appears to be defined as the isspace() C function: \t\n\v\f\r\n and space
|
||||
const quoteRequiredChars = `,"\=> ` + "\t\n\v\f\r"
|
||||
if src == "" || (len(src) == 4 && strings.ToLower(src) == "null") || strings.ContainsAny(src, quoteRequiredChars) {
|
||||
return quoteArrayElement(src)
|
||||
}
|
||||
|
||||
return src
|
||||
}
|
||||
|
||||
const (
|
||||
hsPre = iota
|
||||
hsKey
|
||||
|
|
|
@ -129,6 +129,9 @@ func TestHstoreCodec(t *testing.T) {
|
|||
"form\\ffeed",
|
||||
"carriage\rreturn",
|
||||
"curly{}braces",
|
||||
// Postgres on Mac OS X hstore parsing bug:
|
||||
// ą = "\xc4\x85" in UTF-8; isspace(0x85) on Mac OS X returns true instead of false
|
||||
"mac_bugą",
|
||||
}
|
||||
for _, s := range specialStrings {
|
||||
// Special key values
|
||||
|
@ -255,3 +258,33 @@ func TestParseInvalidInputs(t *testing.T) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkHstoreEncode(b *testing.B) {
|
||||
stringPtr := func(s string) *string {
|
||||
return &s
|
||||
}
|
||||
h := pgtype.Hstore{"a x": stringPtr("100"), "b": stringPtr("200"), "c": stringPtr("300"),
|
||||
"d": stringPtr("400"), "e": stringPtr("500")}
|
||||
|
||||
serializeConfigs := []struct {
|
||||
name string
|
||||
encodePlan pgtype.EncodePlan
|
||||
}{
|
||||
{"text", pgtype.HstoreCodec{}.PlanEncode(nil, 0, pgtype.TextFormatCode, h)},
|
||||
{"binary", pgtype.HstoreCodec{}.PlanEncode(nil, 0, pgtype.BinaryFormatCode, h)},
|
||||
}
|
||||
|
||||
for _, serializeConfig := range serializeConfigs {
|
||||
var buf []byte
|
||||
b.Run(serializeConfig.name, func(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
var err error
|
||||
buf, err = serializeConfig.encodePlan.Encode(h, buf)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
buf = buf[:0]
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue