fix: parser handling comments and empty lines (#446)

pull/456/head
Michael Fridman 2023-01-26 09:42:38 -05:00 committed by GitHub
parent 203277344b
commit bf2656094c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 179 additions and 86 deletions

View File

@ -7,7 +7,6 @@ import (
"fmt"
"io"
"log"
"regexp"
"strings"
"sync"
)
@ -37,11 +36,10 @@ func (s *stateMachine) Set(new parserState) {
const scanBufSize = 4 * 1024 * 1024
var matchEmptyLines = regexp.MustCompile(`^\s*$`)
var bufferPool = sync.Pool{
New: func() interface{} {
return make([]byte, scanBufSize)
buf := make([]byte, scanBufSize)
return &buf
},
}
@ -56,9 +54,9 @@ var bufferPool = sync.Pool{
// 'StatementBegin' and 'StatementEnd' to allow the script to
// tell us to ignore semicolons.
func ParseSQLMigration(r io.Reader, direction bool) (stmts []string, useTx bool, err error) {
var buf bytes.Buffer
scanBuf := bufferPool.Get().([]byte)
defer bufferPool.Put(scanBuf)
scanBufPtr := bufferPool.Get().(*[]byte)
scanBuf := *scanBufPtr
defer bufferPool.Put(scanBufPtr)
scanner := bufio.NewScanner(r)
scanner.Buffer(scanBuf, scanBufSize)
@ -66,19 +64,22 @@ func ParseSQLMigration(r io.Reader, direction bool) (stmts []string, useTx bool,
stateMachine := stateMachine(start)
useTx = true
firstLineFound := false
var buf bytes.Buffer
for scanner.Scan() {
line := scanner.Text()
if verbose {
log.Println(line)
}
if stateMachine.Get() == start && strings.TrimSpace(line) == "" {
continue
}
// TODO(mf): validate annotations to avoid common user errors:
// https://github.com/pressly/goose/issues/163#issuecomment-501736725
if strings.HasPrefix(line, "--") {
cmd := strings.TrimSpace(strings.TrimPrefix(line, "--"))
switch cmd {
case "+goose Up":
firstLineFound = true
switch stateMachine.Get() {
case start:
stateMachine.Set(gooseUp)
@ -88,7 +89,6 @@ func ParseSQLMigration(r io.Reader, direction bool) (stmts []string, useTx bool,
continue
case "+goose Down":
firstLineFound = true
switch stateMachine.Get() {
case gooseUp, gooseStatementEndUp:
stateMachine.Set(gooseDown)
@ -98,7 +98,6 @@ func ParseSQLMigration(r io.Reader, direction bool) (stmts []string, useTx bool,
continue
case "+goose StatementBegin":
firstLineFound = true
switch stateMachine.Get() {
case gooseUp, gooseStatementEndUp:
stateMachine.Set(gooseStatementBeginUp)
@ -110,7 +109,6 @@ func ParseSQLMigration(r io.Reader, direction bool) (stmts []string, useTx bool,
continue
case "+goose StatementEnd":
firstLineFound = true
switch stateMachine.Get() {
case gooseStatementBeginUp:
stateMachine.Set(gooseStatementEndUp)
@ -123,25 +121,27 @@ func ParseSQLMigration(r io.Reader, direction bool) (stmts []string, useTx bool,
case "+goose NO TRANSACTION":
useTx = false
continue
default:
// Ignore comments.
}
}
// Once we've started parsing a statement the buffer is no longer empty,
// we keep all comments up until the end of the statement (the buffer will be reset).
// All other comments in the file are ignored.
if buf.Len() == 0 {
// This check ensures leading comments and empty lines prior to a statement are ignored.
if strings.HasPrefix(strings.TrimSpace(line), "--") || line == "" {
verboseInfo("StateMachine: ignore comment")
continue
}
}
// Ignore empty lines until first line is found.
if !firstLineFound && matchEmptyLines.MatchString(line) {
verboseInfo("StateMachine: ignore empty line")
continue
switch stateMachine.Get() {
case gooseStatementEndDown, gooseStatementEndUp:
// Do not include the "+goose StatementEnd" annotation in the final statement.
default:
// Write SQL line to a buffer.
if _, err := buf.WriteString(line + "\n"); err != nil {
return nil, false, fmt.Errorf("failed to write to buf: %w", err)
}
}
// Write SQL line to a buffer.
if _, err := buf.WriteString(line + "\n"); err != nil {
return nil, false, fmt.Errorf("failed to write to buf: %w", err)
}
// Read SQL body one by line, if we're in the right direction.
//
// 1) basic query with semicolon; 2) psql statement
@ -161,29 +161,29 @@ func ParseSQLMigration(r io.Reader, direction bool) (stmts []string, useTx bool,
continue
}
default:
return nil, false, fmt.Errorf("failed to parse migration: unexpected state %q on line %q, see https://github.com/pressly/goose#sql-migrations", stateMachine, line)
return nil, false, fmt.Errorf("failed to parse migration: unexpected state %d on line %q, see https://github.com/pressly/goose#sql-migrations", stateMachine, line)
}
switch stateMachine.Get() {
case gooseUp:
if endsWithSemicolon(line) {
stmts = append(stmts, buf.String())
stmts = append(stmts, cleanupStatement(buf.String()))
buf.Reset()
verboseInfo("StateMachine: store simple Up query")
}
case gooseDown:
if endsWithSemicolon(line) {
stmts = append(stmts, buf.String())
stmts = append(stmts, cleanupStatement(buf.String()))
buf.Reset()
verboseInfo("StateMachine: store simple Down query")
}
case gooseStatementEndUp:
stmts = append(stmts, buf.String())
stmts = append(stmts, cleanupStatement(buf.String()))
buf.Reset()
verboseInfo("StateMachine: store Up statement")
stateMachine.Set(gooseUp)
case gooseStatementEndDown:
stmts = append(stmts, buf.String())
stmts = append(stmts, cleanupStatement(buf.String()))
buf.Reset()
verboseInfo("StateMachine: store Down statement")
stateMachine.Set(gooseDown)
@ -202,17 +202,28 @@ func ParseSQLMigration(r io.Reader, direction bool) (stmts []string, useTx bool,
}
if bufferRemaining := strings.TrimSpace(buf.String()); len(bufferRemaining) > 0 {
return nil, false, fmt.Errorf("failed to parse migration: state %q, direction: %v: unexpected unfinished SQL query: %q: missing semicolon?", stateMachine, direction, bufferRemaining)
return nil, false, fmt.Errorf("failed to parse migration: state %d, direction: %v: unexpected unfinished SQL query: %q: missing semicolon?", stateMachine, direction, bufferRemaining)
}
return stmts, useTx, nil
}
// cleanupStatement attempts to find the last semicolon and trims
// the remaining chars from the input string. This is useful for cleaning
// up a statement containing trailing comments or empty lines.
func cleanupStatement(input string) string {
if n := strings.LastIndex(input, ";"); n > 0 {
return input[:n+1]
}
return input
}
// Checks the line to see if the line has a statement-ending semicolon
// or if the line contains a double-dash comment.
func endsWithSemicolon(line string) bool {
scanBuf := bufferPool.Get().([]byte)
defer bufferPool.Put(scanBuf)
scanBufPtr := bufferPool.Get().(*[]byte)
scanBuf := *scanBufPtr
defer bufferPool.Put(scanBufPtr)
prev := ""
scanner := bufio.NewScanner(strings.NewReader(line))

View File

@ -2,7 +2,6 @@ package sqlparser
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strconv"
@ -79,25 +78,6 @@ func TestSplitStatements(t *testing.T) {
}
}
func TestKeepEmptyLines(t *testing.T) {
stmts, _, err := ParseSQLMigration(strings.NewReader(emptyLineSQL), true)
if err != nil {
t.Errorf("Failed to parse SQL migration. %v", err)
}
expected := `INSERT INTO post (id, title, body)
VALUES ('id_01', 'my_title', '
this is an insert statement including empty lines.
empty (blank) lines can be meaningful.
leave the lines to keep the text syntax.
');
`
if stmts[0] != expected {
t.Errorf("incorrect stmts. got %v, want %v", stmts, expected)
}
}
func TestUseTransactions(t *testing.T) {
t.Parallel()
@ -162,20 +142,6 @@ SELECT 4; -- 4th stmt
DROP TABLE post; -- 1st stmt
`
var emptyLineSQL = `-- +goose Up
INSERT INTO post (id, title, body)
VALUES ('id_01', 'my_title', '
this is an insert statement including empty lines.
empty (blank) lines can be meaningful.
leave the lines to keep the text syntax.
');
-- +goose Down
TRUNCATE TABLE post;
`
var functxt = `-- +goose Up
CREATE TABLE IF NOT EXISTS histories (
id BIGSERIAL PRIMARY KEY,
@ -401,9 +367,10 @@ func TestValidUp(t *testing.T) {
{Name: "test01", StatementsCount: 3},
{Name: "test02", StatementsCount: 1},
{Name: "test03", StatementsCount: 1},
{Name: "test04", StatementsCount: 2},
{Name: "test04", StatementsCount: 3},
{Name: "test05", StatementsCount: 2},
{Name: "test06", StatementsCount: 3},
{Name: "test06", StatementsCount: 5},
{Name: "test07", StatementsCount: 1},
}
for _, tc := range tests {
path := filepath.Join("testdata", "valid-up", tc.Name)
@ -428,16 +395,14 @@ func testValidUp(t *testing.T, dir string, count int) {
func compareStatements(t *testing.T, dir string, statements []string) {
t.Helper()
files, err := os.ReadDir(dir)
files, err := filepath.Glob(filepath.Join(dir, "*.golden.sql"))
check.NoError(t, err)
if len(statements) != len(files) {
t.Fatalf("mismatch between parsed statements (%d) and golden files (%d), did you check in NN.golden.sql file in %q?", len(statements), len(files), dir)
}
for _, goldenFile := range files {
if goldenFile.Name() == "input.sql" {
continue
}
if !strings.HasSuffix(goldenFile.Name(), ".golden.sql") {
t.Fatalf("expecting golden file with format <name>.golden.sql: got: %q. Try running `make clean` to remove previous failed files?", goldenFile.Name())
}
before, _, ok := cut(goldenFile.Name(), ".")
goldenFile = filepath.Base(goldenFile)
before, _, ok := cut(goldenFile, ".")
if !ok {
t.Fatal(`failed to cut on file delimiter ".", must be of the format NN.golden.sql`)
}
@ -445,7 +410,7 @@ func compareStatements(t *testing.T, dir string, statements []string) {
check.NoError(t, err)
index--
goldenFilePath := filepath.Join(dir, goldenFile.Name())
goldenFilePath := filepath.Join(dir, goldenFile)
by, err := os.ReadFile(goldenFilePath)
check.NoError(t, err)
@ -460,7 +425,7 @@ func compareStatements(t *testing.T, dir string, statements []string) {
filepath.Join("internal", "sqlparser", goldenFilePath+".FAIL"),
filepath.Join("internal", "sqlparser", goldenFilePath),
)
err := ioutil.WriteFile(goldenFilePath+".FAIL", []byte(got+"\n"), 0644)
err := os.WriteFile(goldenFilePath+".FAIL", []byte(got+"\n"), 0644)
check.NoError(t, err)
}
}

View File

@ -19,4 +19,3 @@ CREATE FUNCTION emp_stamp() RETURNS trigger AS $emp_stamp$
RETURN NEW;
END;
$emp_stamp$ LANGUAGE plpgsql;
-- +goose StatementEnd

View File

@ -31,4 +31,3 @@ $emp_audit$ LANGUAGE plpgsql;
CREATE TRIGGER emp_audit
AFTER INSERT OR UPDATE OR DELETE ON emp
FOR EACH ROW EXECUTE FUNCTION process_emp_audit();
-- +goose StatementEnd

View File

@ -11,6 +11,10 @@ BEGIN
v_url ALIAS FOR $3;
BEGIN '';
--
-- Notice how we scan through the results of a query in a FOR loop
-- using the FOR <record> construct.
--
FOR referrer_keys IN SELECT * FROM cs_referrer_keys ORDER BY try_order LOOP
a_output := a_output || '' IF v_'' || referrer_keys.kind || '' LIKE ''''''''''
@ -26,4 +30,3 @@ BEGIN
EXECUTE a_output;
END;
' LANGUAGE 'plpgsql';
-- +goose StatementEnd

View File

@ -31,7 +31,8 @@ BEGIN
EXECUTE a_output;
END;
' LANGUAGE 'plpgsql';
' LANGUAGE 'plpgsql'; -- This comment will NOT be preserved.
-- And neither will this one.
-- +goose StatementEnd
-- +goose Down

View File

@ -4,5 +4,5 @@ M7Zifl2NyVVCPb0hELr2JJla1u/1CgiuqDpcjP2cCu2YxB/JGyCvcon+3tETUz3Ri9NGzHCZ
fkuWRZjkUvy7nfPLjzM+t6SEvY4lbn3ihLPumZjwgvuCY3vDZY8V1/NMoP8MKATGR+S7D7gv
I6KD9jkiSsTJMiotb/dRkXE3bG0nmjchhhLzMG551G8IZEpWBHDqEisCIl8yCd9YZV69BZTu
L48zPl/CFvA+KJJ6LklxfwWeVDQ+ve2OIW0B1uLhR/MsoYbDQztbgIayg6ieMO/KlQIDAQAB
-----END RSA PUBLIC KEY-----
');
-- +goose StatementEnd

View File

@ -0,0 +1,22 @@
INSERT INTO ssh_keys (id, "publicKey") VALUES (2, '-----BEGIN OPENSSH PRIVATE KEY-----
b3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAABsQAAAAdzc2gtZH
NzAAAAgQDKYlGRv+Ul4WYKN3F83zchQC24ZV17EJCKwakASf//u2p1+K4iuTQbPgcUWpET
w4HkCRlOVwYkM4ZL2QncUDyEX3o0UDEgrnhWhLJJR2yCMSsqTNffME3X7YdP2LcE0OM9ZP
9vb5+TwLr6c4edwlu3QYc4VStWSstdR9DD7vun9QAAABUA+DRjW9u+5IEPHyx+FhEoKsRm
8aEAAACAU2auArSDuTVbJBE/oP6x+4vrud2qFtALsuFaLPfqN5gVBofRJlIOpQobyi022R
6SPtX/DEbWkVuHfiyxkjb0Gb0obBzGM+RNqjOF6OE9sh7OuBfLaWW44OZasg1VXzkRcoWv
7jClfKYi2Q/LxHGhZoqy1uYlHPYP5CmCpiELrUMAAACAfp0KpTVyYQjO2nLPuBhnsepfxQ
kT+FDqDBp1rZfB4uKx4q466Aq0jeev1OeQEYZpj3+q4b2XX54zXDwvJLuiD9WSmC7jvT0+
EUmF55PHW4inloG9pMUzeQnx3k8WDcRJbcAMalpoCCsb0jEPIiyBGBtQu0gOoLL+N+G2Cl
U+/FEAAAHgOKSlwjikpcIAAAAHc3NoLWRzcwAAAIEAymJRkb/lJeFmCjdxfN83IUAtuGVd
exCQisGpAEn//7tqdfiuIrk0Gz4HFFqRE8OB5AkZTlcGJDOGS9kJ3FA8hF96NFAxIK54Vo
SySUdsgjErKkzX3zBN1+2HT9i3BNDjPWT/b2+fk8C6+nOHncJbt0GHOFUrVkrLXUfQw+77
p/UAAAAVAPg0Y1vbvuSBDx8sfhYRKCrEZvGhAAAAgFNmrgK0g7k1WyQRP6D+sfuL67ndqh
bQC7LhWiz36jeYFQaH0SZSDqUKG8otNtkekj7V/wxG1pFbh34ssZI29Bm9KGwcxjPkTaoz
hejhPbIezrgXy2lluODmWrINVV85EXKFr+4wpXymItkPy8RxoWaKstbmJRz2D+QpgqYhC6
1DAAAAgH6dCqU1cmEIztpyz7gYZ7HqX8UJE/hQ6gwada2XweLiseKuOugKtI3nr9TnkBGG
aY9/quG9l1+eM1w8LyS7og/Vkpgu4709PhFJheeTx1uIp5aBvaTFM3kJ8d5PFg3ESW3ADG
paaAgrG9IxDyIsgRgbULtIDqCy/jfhtgpVPvxRAAAAFQCPXzpVtY5yJTN1zBo9pTGeg+f3
EgAAAAZub25hbWUBAgME
-----END OPENSSH PRIVATE KEY-----
');

View File

@ -16,4 +16,27 @@ L48zPl/CFvA+KJJ6LklxfwWeVDQ+ve2OIW0B1uLhR/MsoYbDQztbgIayg6ieMO/KlQIDAQAB
');
-- +goose StatementEnd
INSERT INTO ssh_keys (id, "publicKey") VALUES (2, '-----BEGIN OPENSSH PRIVATE KEY-----
b3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAABsQAAAAdzc2gtZH
NzAAAAgQDKYlGRv+Ul4WYKN3F83zchQC24ZV17EJCKwakASf//u2p1+K4iuTQbPgcUWpET
w4HkCRlOVwYkM4ZL2QncUDyEX3o0UDEgrnhWhLJJR2yCMSsqTNffME3X7YdP2LcE0OM9ZP
9vb5+TwLr6c4edwlu3QYc4VStWSstdR9DD7vun9QAAABUA+DRjW9u+5IEPHyx+FhEoKsRm
8aEAAACAU2auArSDuTVbJBE/oP6x+4vrud2qFtALsuFaLPfqN5gVBofRJlIOpQobyi022R
6SPtX/DEbWkVuHfiyxkjb0Gb0obBzGM+RNqjOF6OE9sh7OuBfLaWW44OZasg1VXzkRcoWv
7jClfKYi2Q/LxHGhZoqy1uYlHPYP5CmCpiELrUMAAACAfp0KpTVyYQjO2nLPuBhnsepfxQ
kT+FDqDBp1rZfB4uKx4q466Aq0jeev1OeQEYZpj3+q4b2XX54zXDwvJLuiD9WSmC7jvT0+
EUmF55PHW4inloG9pMUzeQnx3k8WDcRJbcAMalpoCCsb0jEPIiyBGBtQu0gOoLL+N+G2Cl
U+/FEAAAHgOKSlwjikpcIAAAAHc3NoLWRzcwAAAIEAymJRkb/lJeFmCjdxfN83IUAtuGVd
exCQisGpAEn//7tqdfiuIrk0Gz4HFFqRE8OB5AkZTlcGJDOGS9kJ3FA8hF96NFAxIK54Vo
SySUdsgjErKkzX3zBN1+2HT9i3BNDjPWT/b2+fk8C6+nOHncJbt0GHOFUrVkrLXUfQw+77
p/UAAAAVAPg0Y1vbvuSBDx8sfhYRKCrEZvGhAAAAgFNmrgK0g7k1WyQRP6D+sfuL67ndqh
bQC7LhWiz36jeYFQaH0SZSDqUKG8otNtkekj7V/wxG1pFbh34ssZI29Bm9KGwcxjPkTaoz
hejhPbIezrgXy2lluODmWrINVV85EXKFr+4wpXymItkPy8RxoWaKstbmJRz2D+QpgqYhC6
1DAAAAgH6dCqU1cmEIztpyz7gYZ7HqX8UJE/hQ6gwada2XweLiseKuOugKtI3nr9TnkBGG
aY9/quG9l1+eM1w8LyS7og/Vkpgu4709PhFJheeTx1uIp5aBvaTFM3kJ8d5PFg3ESW3ADG
paaAgrG9IxDyIsgRgbULtIDqCy/jfhtgpVPvxRAAAAFQCPXzpVtY5yJTN1zBo9pTGeg+f3
EgAAAAZub25hbWUBAgME
-----END OPENSSH PRIVATE KEY-----
');
-- +goose Down

View File

@ -1,4 +1,5 @@
CREATE TABLE ssh_keys (
id integer NOT NULL,
"publicKey" text
-- insert comment there
);

View File

@ -4,6 +4,7 @@ markdown doc
first paragraph
-- with a comment
-- with an indent comment
second paragraph');

View File

@ -0,0 +1,12 @@
CREATE FUNCTION do_something(sql TEXT) RETURNS INTEGER AS $$
BEGIN
-- initiate technology
PERFORM something_or_other(sql);
-- increase technology
PERFORM some_other_thing(sql);
-- technology was successful
RETURN 1;
END;
$$ LANGUAGE plpgsql;

View File

@ -0,0 +1,8 @@
INSERT INTO post (id, title, body)
VALUES ('id_01', 'my_title', '
this is an insert statement including empty lines.
empty (blank) lines can be meaningful.
leave the lines to keep the text syntax.
');

View File

@ -21,5 +21,42 @@ first paragraph
second paragraph');
-- +goose StatementBegin
-- 1 this comment will NOT be preserved
-- 2 this comment will NOT be preserved
CREATE FUNCTION do_something(sql TEXT) RETURNS INTEGER AS $$
BEGIN
-- initiate technology
PERFORM something_or_other(sql);
-- increase technology
PERFORM some_other_thing(sql);
-- technology was successful
RETURN 1;
END;
$$ LANGUAGE plpgsql;
-- 3 this comment will NOT be preserved
-- 4 this comment will NOT be preserved
-- +goose StatementEnd
INSERT INTO post (id, title, body)
VALUES ('id_01', 'my_title', '
this is an insert statement including empty lines.
empty (blank) lines can be meaningful.
leave the lines to keep the text syntax.
');
-- +goose Down
TRUNCATE TABLE post;

View File

@ -0,0 +1 @@
CREATE INDEX ON public.users (user_id);

View File

@ -0,0 +1,10 @@
-- +goose Up
-- +goose StatementBegin
CREATE INDEX ON public.users (user_id);
-- +goose StatementEnd
-- +goose Down
-- +goose StatementBegin
DROP INDEX IF EXISTS users_user_id_idx;
-- +goose StatementEnd