Skip to content

Commit 56c9fca

Browse files
committed
Fix missing error context in error collection crashing sqlparse
1 parent e4f46ec commit 56c9fca

File tree

5 files changed

+81
-7
lines changed

5 files changed

+81
-7
lines changed

CHANGES.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
## Unreleased
44
- Export `Statement` in both Python and Javascript target
55
- Fixed query parsing when expression includes special characters like `\n`, `\r`, or `\t`
6+
- Fixed sqlparse crash on missing error context
67

78
## 2024/09/18 v0.0.7
89
- Improve error matching on single statement

cratedb_sqlparse_js/cratedb_sqlparse/parser.js

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -124,11 +124,22 @@ class ExceptionCollectorListener extends ErrorListener {
124124

125125
syntaxError(recognizer, offendingSymbol, line, column, msg, e) {
126126
super.syntaxError(recognizer, offendingSymbol, line, column, msg, e);
127-
const error = new ParseError(
128-
e.ctx.parser.getTokenStream().getText(new Interval(
127+
let query;
128+
129+
if (e) {
130+
query = e.ctx.parser.getTokenStream().getText(new Interval(
129131
e.ctx.start,
130132
e.offendingToken.tokenIndex)
131-
),
133+
)
134+
135+
} else {
136+
const min_to_check = Math.max(1, offendingSymbol.tokenIndex - 2)
137+
const tokens = recognizer.getTokenStream().tokens.slice(min_to_check, offendingSymbol.tokenIndex + 1)
138+
query = tokens.map((el) => el.text).join("")
139+
}
140+
141+
const error = new ParseError(
142+
query,
132143
msg,
133144
offendingSymbol,
134145
e
@@ -221,6 +232,7 @@ export function sqlparse(query, raise_exception = false) {
221232
const statementsContext = tree.children.filter((children) => children instanceof SqlBaseParser.StatementContext)
222233

223234
let statements = []
235+
224236
for (const statementContext of statementsContext) {
225237
let stmt = new Statement(statementContext)
226238

cratedb_sqlparse_js/tests/exceptions.test.js

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,14 @@ test('Error should be collected and not thrown by default', () => {
1515
expect(() => stmts).not.toThrowError()
1616
})
1717

18-
test('Several Errors should be collected and not thrown by default', () => {
18+
test('Single error should be collected', () => {
19+
const stmt = sqlparse("SELECT A,B,C,D FROM tbl1 WHERE A ? '%A'")
20+
expect(stmt[0].exception).toBeDefined()
21+
expect(stmt[0].exception.msg).toBe("mismatched input '?' expecting {<EOF>, ';'}")
22+
expect(stmt[0].exception.query).toBe("SELECT A,B,C,D FROM tbl1 WHERE A ?")
23+
})
24+
25+
test('Several errors should be collected and not thrown by default', () => {
1926
const stmts = sqlparse(`
2027
SELECT A FROM tbl1 where;
2128
SELECT 1;
@@ -73,6 +80,31 @@ test('Exception message is correct', () => {
7380
})
7481

7582

83+
test('White or special characters should not avoid exception catching', () => {
84+
// https://github.com/crate/cratedb-sqlparse/issues/67
85+
const stmts = [
86+
`SELECT 1\n limit `,
87+
`SELECT 1\r limit `,
88+
`SELECT 1\t limit `,
89+
`SELECT 1 limit `
90+
]
91+
for (const stmt in stmts) {
92+
let r = sqlparse(stmt)
93+
expect(r[0].exception).toBeDefined();
94+
}
95+
})
96+
97+
test('Missing token error should not panic', ()=> {
98+
// See https://github.com/crate/cratedb-sqlparse/issues/66
99+
sqlparse(`
100+
CREATE TABLE t01 (
101+
"x" OBJECT (DYNAMIC),
102+
"y" OBJECT (DYNAMIC) AS ("z" ARRAY(OBJECT (DYNAMIC))
103+
);
104+
`)
105+
})
106+
107+
76108
test('Whitetest or special characters should not avoid exception catching', () => {
77109
// https://github.com/crate/cratedb-sqlparse/issues/67
78110
const stmts = [

cratedb_sqlparse_py/cratedb_sqlparse/parser.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -114,13 +114,29 @@ def __init__(self):
114114
self.errors = []
115115

116116
def syntaxError(self, recognizer, offendingSymbol, line, column, msg, e):
117+
if e:
118+
query = recognizer.getTokenStream().getText(e.ctx.start, offendingSymbol.tokenIndex)
119+
120+
else:
121+
# If antlr4 doesn't give us an error object, we heuristically create a query, or a piece of it
122+
# so we increase the chances of it being correctly assigned.
123+
# It means that theoretically if you input two wrong queries that antlr4 manages
124+
# to distinguish as two different statements (which is hard already), and both are similar
125+
# the errors could be matched wrongly. Still pretty rare, and it is very hard to come up with
126+
# an actual query that does it.
127+
128+
# The newly generated query will be either the offendingToken + one token to the left
129+
# or offendingToken + two tokens to the left, if the second is possible it takes precedence.
130+
min_token_to_check = max(1, offendingSymbol.tokenIndex - 2)
131+
tokens = recognizer.getTokenStream().tokens[min_token_to_check : offendingSymbol.tokenIndex + 1]
132+
query = "".join(token.text for token in tokens)
133+
117134
error = ParsingException(
118135
msg=msg,
119136
offending_token=offendingSymbol,
120-
e=e,
121-
query=e.ctx.parser.getTokenStream().getText(e.ctx.start, e.offendingToken.tokenIndex),
137+
e=e if e else type("NotViableInput", (Exception,), {})(),
138+
query=query,
122139
)
123-
124140
self.errors.append(error)
125141

126142

cratedb_sqlparse_py/tests/test_exceptions.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,3 +98,16 @@ def test_sqlparse_catches_exception():
9898
"""
9999
for stmt in stmts:
100100
assert sqlparse(stmt)[0].exception
101+
102+
103+
def test_sqlparse_should_not_panic():
104+
from cratedb_sqlparse import sqlparse
105+
106+
sqlparse("""
107+
CREATE TABLE t01 (
108+
"x" OBJECT (DYNAMIC),
109+
"y" OBJECT (DYNAMIC) AS ("z" ARRAY(OBJECT (DYNAMIC))
110+
);
111+
""")[0]
112+
113+
# That's it, it shouldn't raise a runtime Exception.

0 commit comments

Comments
 (0)