diff --git a/crates/squawk_lexer/src/cursor.rs b/crates/squawk_lexer/src/cursor.rs index 7388f419..d3d1a857 100644 --- a/crates/squawk_lexer/src/cursor.rs +++ b/crates/squawk_lexer/src/cursor.rs @@ -55,8 +55,7 @@ impl<'a> Cursor<'a> { /// Moves to the next character. pub(crate) fn bump(&mut self) -> Option { - let c = self.chars.next()?; - Some(c) + self.chars.next() } /// Eats symbols while predicate returns true or until the end of file is reached. diff --git a/crates/squawk_lexer/src/lib.rs b/crates/squawk_lexer/src/lib.rs index ddffe565..99348b59 100644 --- a/crates/squawk_lexer/src/lib.rs +++ b/crates/squawk_lexer/src/lib.rs @@ -166,10 +166,10 @@ impl Cursor<'_> { fn ident_or_unknown_prefix(&mut self) -> TokenKind { // Start is already eaten, eat the rest of identifier. self.eat_while(is_ident_cont); - // Known prefixes must have been handled earlier. So if - // we see a prefix here, it is definitely an unknown prefix. + // Known string prefixes must have been handled earlier. So if + // we see a prefix here, it is definitely unknown. match self.first() { - '"' | '\'' => TokenKind::UnknownPrefix, + '\'' => TokenKind::UnknownPrefix, _ => TokenKind::Ident, } } diff --git a/crates/squawk_parser/src/grammar.rs b/crates/squawk_parser/src/grammar.rs index 769577c8..ca2876ae 100644 --- a/crates/squawk_parser/src/grammar.rs +++ b/crates/squawk_parser/src/grammar.rs @@ -832,6 +832,7 @@ fn atom_expr(p: &mut Parser<'_>) -> Option { (SOME_KW | ALL_KW | ANY_KW, L_PAREN) => some_any_all_fn(p), (EXISTS_KW, L_PAREN) => exists_fn(p), (COLLATION_KW, FOR_KW) => collation_for_fn(p), + (ROW_KW, L_PAREN) => tuple_expr(p), _ if p.at_ts(NAME_REF_FIRST) => name_ref_(p)?, (L_PAREN, _) => tuple_expr(p), (ARRAY_KW, L_BRACK | L_PAREN) => { @@ -842,7 +843,6 @@ fn atom_expr(p: &mut Parser<'_>) -> Option { // nested array exprs: // array[[1,2],[3,4]] (L_BRACK, _) => array_expr(p, None), - (ROW_KW, L_PAREN) => tuple_expr(p), (CASE_KW, _) => case_expr(p), _ => { p.err_and_bump("expected expression in atom_expr"); @@ -2398,8 +2398,6 @@ fn current_op(p: &Parser<'_>, r: &Restrictions) -> (u8, SyntaxKind, Associativit IS_KW if !r.is_disabled && p.at(IS_DISTINCT_FROM) => (4, IS_DISTINCT_FROM, Left), // is not distinct from IS_KW if !r.is_disabled && p.at(IS_NOT_DISTINCT_FROM) => (4, IS_NOT_DISTINCT_FROM, Left), - // is not json - IS_KW if !r.is_disabled && p.at(IS_NOT_JSON) => NOT_AN_OP, // is not json object IS_KW if !r.is_disabled && p.at(IS_NOT_JSON_OBJECT) => NOT_AN_OP, // is not json array @@ -2408,6 +2406,8 @@ fn current_op(p: &Parser<'_>, r: &Restrictions) -> (u8, SyntaxKind, Associativit IS_KW if !r.is_disabled && p.at(IS_NOT_JSON_VALUE) => NOT_AN_OP, // is not json scalar IS_KW if !r.is_disabled && p.at(IS_NOT_JSON_SCALAR) => NOT_AN_OP, + // is not json + IS_KW if !r.is_disabled && p.at(IS_NOT_JSON) => NOT_AN_OP, // is json object IS_KW if !r.is_disabled && p.at(IS_JSON_OBJECT) => NOT_AN_OP, // is json array @@ -3596,7 +3596,6 @@ const SEQUENCE_OPTION_FIRST: TokenSet = TokenSet::new(&[ UNLOGGED_KW, START_KW, OWNED_KW, - OWNED_KW, MAXVALUE_KW, MINVALUE_KW, NO_KW, @@ -4433,72 +4432,6 @@ fn opt_constraint_option_list(p: &mut Parser<'_>) { } } -const COLUMN_NAME_KEYWORDS: TokenSet = TokenSet::new(&[ - BETWEEN_KW, - BIGINT_KW, - BIT_KW, - BOOLEAN_KW, - CHAR_KW, - CHARACTER_KW, - COALESCE_KW, - DEC_KW, - DECIMAL_KW, - EXISTS_KW, - EXTRACT_KW, - FLOAT_KW, - GREATEST_KW, - GROUPING_KW, - INOUT_KW, - INT_KW, - INTEGER_KW, - INTERVAL_KW, - JSON_KW, - JSON_ARRAY_KW, - JSON_ARRAYAGG_KW, - JSON_EXISTS_KW, - JSON_OBJECT_KW, - JSON_OBJECTAGG_KW, - JSON_QUERY_KW, - JSON_SCALAR_KW, - JSON_SERIALIZE_KW, - JSON_TABLE_KW, - JSON_VALUE_KW, - LEAST_KW, - MERGE_ACTION_KW, - NATIONAL_KW, - NCHAR_KW, - NONE_KW, - NORMALIZE_KW, - NULLIF_KW, - NUMERIC_KW, - OUT_KW, - OVERLAY_KW, - POSITION_KW, - PRECISION_KW, - REAL_KW, - ROW_KW, - SETOF_KW, - SMALLINT_KW, - SUBSTRING_KW, - TIME_KW, - TIMESTAMP_KW, - TREAT_KW, - TRIM_KW, - VALUES_KW, - VARCHAR_KW, - XMLATTRIBUTES_KW, - XMLCONCAT_KW, - XMLELEMENT_KW, - XMLEXISTS_KW, - XMLFOREST_KW, - XMLNAMESPACES_KW, - XMLPARSE_KW, - XMLPI_KW, - XMLROOT_KW, - XMLSERIALIZE_KW, - XMLTABLE_KW, -]); - const COL_DEF_FIRST: TokenSet = TokenSet::new(&[LIKE_KW]) .union(TABLE_CONSTRAINT_FIRST) .union(NAME_FIRST); @@ -4974,13 +4907,13 @@ fn paren_expr_list(p: &mut Parser<'_>) { /// All keywords const COL_LABEL_FIRST: TokenSet = TokenSet::new(&[IDENT]) .union(UNRESERVED_KEYWORDS) - .union(COLUMN_NAME_KEYWORDS) + .union(COL_NAME_KEYWORD_FIRST) .union(TYPE_FUNC_NAME_KEYWORDS) .union(RESERVED_KEYWORDS); const NAME_FIRST: TokenSet = TokenSet::new(&[IDENT]) .union(UNRESERVED_KEYWORDS) - .union(COLUMN_NAME_KEYWORDS); + .union(COL_NAME_KEYWORD_FIRST); const BARE_COL_LABEL_FIRST: TokenSet = TokenSet::new(&[IDENT]).union(BARE_LABEL_KEYWORDS); @@ -5068,7 +5001,6 @@ const TARGET_FOLLOW: TokenSet = TokenSet::new(&[ INTO_KW, HAVING_KW, WINDOW_KW, - HAVING_KW, FETCH_KW, FOR_KW, R_PAREN, @@ -5080,7 +5012,6 @@ const TARGET_FOLLOW: TokenSet = TokenSet::new(&[ // unquoted column name CREATE_KW, DO_KW, - CREATE_KW, GRANT_KW, END_KW, ANALYZE_KW, @@ -5864,7 +5795,7 @@ fn stmt(p: &mut Parser, r: &StmtRestrictions) -> Option { PARSER_KW => Some(alter_text_search_parser(p)), TEMPLATE_KW => Some(alter_text_search_template(p)), _ => { - p.err_and_bump("expected TEMPLATE, CONFIGURATION, DICTIONARY, PARSER, or TEMPLATE"); + p.err_and_bump("expected CONFIGURATION, DICTIONARY, PARSER, or TEMPLATE"); None } }, @@ -5960,7 +5891,7 @@ fn stmt(p: &mut Parser, r: &StmtRestrictions) -> Option { PARSER_KW => Some(create_text_search_parser(p)), TEMPLATE_KW => Some(create_text_search_template(p)), _ => { - p.err_and_bump("expected TEMPLATE, CONFIGURATION, DICTIONARY, PARSER, or TEMPLATE"); + p.err_and_bump("expected CONFIGURATION, DICTIONARY, PARSER, or TEMPLATE"); None } }, @@ -6023,7 +5954,7 @@ fn stmt(p: &mut Parser, r: &StmtRestrictions) -> Option { PARSER_KW => Some(drop_text_search_parser(p)), TEMPLATE_KW => Some(drop_text_search_template(p)), _ => { - p.err_and_bump("expected TEMPLATE, CONFIGURATION, DICTIONARY, PARSER, or TEMPLATE"); + p.err_and_bump("expected CONFIGURATION, DICTIONARY, PARSER, or TEMPLATE"); None } }, @@ -6086,7 +6017,7 @@ fn stmt(p: &mut Parser, r: &StmtRestrictions) -> Option { _ => Some(set(p)), }, (SET_KW, TRANSACTION_KW) => Some(set_transaction(p)), - (SET_KW, TIME_KW | _) => Some(set(p)), + (SET_KW, _) => Some(set(p)), (SHOW_KW, _) => Some(show(p)), (START_KW, TRANSACTION_KW) => Some(begin(p)), (TRUNCATE_KW, _) => Some(truncate(p)), @@ -14326,7 +14257,9 @@ fn param(p: &mut Parser<'_>, kind: ParamKind) { opt_param_default(p); } ParamKind::TypeOnly => { - type_name(p); + if !opt_type_name(p) { + p.err_and_bump("expected type name"); + } } } m.complete(p, PARAM); @@ -14919,7 +14852,7 @@ const COLUMN_FIRST: TokenSet = TokenSet::new(&[IDENT]) const NON_RESERVED_WORD: TokenSet = TokenSet::new(&[IDENT]) .union(UNRESERVED_KEYWORDS) - .union(COLUMN_NAME_KEYWORDS) + .union(COL_NAME_KEYWORD_FIRST) .union(TYPE_FUNC_NAME_KEYWORDS); const RELATION_NAME_FIRST: TokenSet = TokenSet::new(&[ONLY_KW]).union(PATH_FIRST); diff --git a/crates/squawk_parser/tests/data/err/type_only_params.sql b/crates/squawk_parser/tests/data/err/type_only_params.sql new file mode 100644 index 00000000..beeb33be --- /dev/null +++ b/crates/squawk_parser/tests/data/err/type_only_params.sql @@ -0,0 +1,6 @@ +-- type-only parameter lists should recover instead of getting stuck +prepare p (variadic) as select 1; +prepare p (in) as select 1; +create operator class c for type int using btree as function 1 (variadic) f(int); +alter operator family f using btree add function 1 (variadic) f(int); +alter operator family f using btree drop function 1 (variadic); diff --git a/crates/squawk_parser/tests/data/ok/select_cte.sql b/crates/squawk_parser/tests/data/ok/select_cte.sql index 826d5102..f2bc2c9c 100644 --- a/crates/squawk_parser/tests/data/ok/select_cte.sql +++ b/crates/squawk_parser/tests/data/ok/select_cte.sql @@ -26,6 +26,10 @@ with t(a, b) as ( ) select * from t; +-- adjacent quoted aliases +with t as (select 1 e, 2 b, 3 u) +select e"bar", b"b", u"u" from t; + -- materialized with t as materialized ( select 1 diff --git a/crates/squawk_parser/tests/snapshots/tests__select_cte_ok.snap b/crates/squawk_parser/tests/snapshots/tests__select_cte_ok.snap index 0d3fd8cc..ee08ae12 100644 --- a/crates/squawk_parser/tests/snapshots/tests__select_cte_ok.snap +++ b/crates/squawk_parser/tests/snapshots/tests__select_cte_ok.snap @@ -209,6 +209,86 @@ SOURCE_FILE IDENT "t" SEMICOLON ";" WHITESPACE "\n\n" + COMMENT "-- adjacent quoted aliases" + WHITESPACE "\n" + SELECT + WITH_CLAUSE + WITH_KW "with" + WHITESPACE " " + WITH_TABLE + NAME + IDENT "t" + WHITESPACE " " + AS_KW "as" + WHITESPACE " " + L_PAREN "(" + SELECT + SELECT_CLAUSE + SELECT_KW "select" + WHITESPACE " " + TARGET_LIST + TARGET + LITERAL + INT_NUMBER "1" + WHITESPACE " " + AS_NAME + NAME + IDENT "e" + COMMA "," + WHITESPACE " " + TARGET + LITERAL + INT_NUMBER "2" + WHITESPACE " " + AS_NAME + NAME + IDENT "b" + COMMA "," + WHITESPACE " " + TARGET + LITERAL + INT_NUMBER "3" + WHITESPACE " " + AS_NAME + NAME + IDENT "u" + R_PAREN ")" + WHITESPACE "\n" + SELECT_CLAUSE + SELECT_KW "select" + WHITESPACE " " + TARGET_LIST + TARGET + NAME_REF + IDENT "e" + AS_NAME + NAME + IDENT "\"bar\"" + COMMA "," + WHITESPACE " " + TARGET + NAME_REF + IDENT "b" + AS_NAME + NAME + IDENT "\"b\"" + COMMA "," + WHITESPACE " " + TARGET + NAME_REF + IDENT "u" + AS_NAME + NAME + IDENT "\"u\"" + WHITESPACE " " + FROM_CLAUSE + FROM_KW "from" + WHITESPACE " " + FROM_ITEM + NAME_REF + IDENT "t" + SEMICOLON ";" + WHITESPACE "\n\n" COMMENT "-- materialized " WHITESPACE "\n" SELECT diff --git a/crates/squawk_parser/tests/snapshots/tests__select_ok.snap b/crates/squawk_parser/tests/snapshots/tests__select_ok.snap index ae55cd20..948d76bf 100644 --- a/crates/squawk_parser/tests/snapshots/tests__select_ok.snap +++ b/crates/squawk_parser/tests/snapshots/tests__select_ok.snap @@ -4555,25 +4555,20 @@ SOURCE_FILE WHITESPACE " " TARGET_LIST TARGET - CALL_EXPR - NAME_REF - ROW_KW "row" - ARG_LIST - L_PAREN "(" - ARG - LITERAL - STRING "'fuzzy dice'" - COMMA "," - WHITESPACE " " - ARG - LITERAL - INT_NUMBER "42" - COMMA "," - WHITESPACE " " - ARG - LITERAL - NUMERIC_NUMBER "1.99" - R_PAREN ")" + TUPLE_EXPR + ROW_KW "row" + L_PAREN "(" + LITERAL + STRING "'fuzzy dice'" + COMMA "," + WHITESPACE " " + LITERAL + INT_NUMBER "42" + COMMA "," + WHITESPACE " " + LITERAL + NUMERIC_NUMBER "1.99" + R_PAREN ")" SEMICOLON ";" WHITESPACE "\n" SELECT diff --git a/crates/squawk_parser/tests/snapshots/tests__type_only_params_err.snap b/crates/squawk_parser/tests/snapshots/tests__type_only_params_err.snap new file mode 100644 index 00000000..e62f462e --- /dev/null +++ b/crates/squawk_parser/tests/snapshots/tests__type_only_params_err.snap @@ -0,0 +1,222 @@ +--- +source: crates/squawk_parser/tests/tests.rs +input_file: crates/squawk_parser/tests/data/err/type_only_params.sql +--- +SOURCE_FILE + COMMENT "-- type-only parameter lists should recover instead of getting stuck" + WHITESPACE "\n" + PREPARE + PREPARE_KW "prepare" + WHITESPACE " " + NAME + IDENT "p" + WHITESPACE " " + PARAM_LIST + L_PAREN "(" + PARAM + ERROR + VARIADIC_KW "variadic" + R_PAREN ")" + WHITESPACE " " + AS_KW "as" + WHITESPACE " " + SELECT + SELECT_CLAUSE + SELECT_KW "select" + WHITESPACE " " + TARGET_LIST + TARGET + LITERAL + INT_NUMBER "1" + SEMICOLON ";" + WHITESPACE "\n" + PREPARE + PREPARE_KW "prepare" + WHITESPACE " " + NAME + IDENT "p" + WHITESPACE " " + PARAM_LIST + L_PAREN "(" + PARAM + ERROR + IN_KW "in" + R_PAREN ")" + WHITESPACE " " + AS_KW "as" + WHITESPACE " " + SELECT + SELECT_CLAUSE + SELECT_KW "select" + WHITESPACE " " + TARGET_LIST + TARGET + LITERAL + INT_NUMBER "1" + SEMICOLON ";" + WHITESPACE "\n" + CREATE_OPERATOR_CLASS + CREATE_KW "create" + WHITESPACE " " + OPERATOR_KW "operator" + WHITESPACE " " + CLASS_KW "class" + WHITESPACE " " + PATH + PATH_SEGMENT + NAME + IDENT "c" + WHITESPACE " " + FOR_KW "for" + WHITESPACE " " + TYPE_KW "type" + WHITESPACE " " + PATH_TYPE + PATH + PATH_SEGMENT + NAME_REF + INT_KW "int" + WHITESPACE " " + USING_KW "using" + WHITESPACE " " + NAME_REF + IDENT "btree" + WHITESPACE " " + AS_KW "as" + WHITESPACE " " + OPERATOR_CLASS_OPTION_LIST + OP_CLASS_OPTION + FUNCTION_KW "function" + WHITESPACE " " + LITERAL + INT_NUMBER "1" + WHITESPACE " " + PARAM_LIST + L_PAREN "(" + PARAM + ERROR + VARIADIC_KW "variadic" + R_PAREN ")" + WHITESPACE " " + FUNCTION_SIG + PATH + PATH_SEGMENT + NAME_REF + IDENT "f" + PARAM_LIST + L_PAREN "(" + PARAM + PATH_TYPE + PATH + PATH_SEGMENT + NAME_REF + INT_KW "int" + R_PAREN ")" + SEMICOLON ";" + WHITESPACE "\n" + ALTER_OPERATOR_FAMILY + ALTER_KW "alter" + WHITESPACE " " + OPERATOR_KW "operator" + WHITESPACE " " + FAMILY_KW "family" + WHITESPACE " " + PATH + PATH_SEGMENT + NAME_REF + IDENT "f" + WHITESPACE " " + USING_KW "using" + WHITESPACE " " + NAME_REF + IDENT "btree" + WHITESPACE " " + ADD_OP_CLASS_OPTIONS + ADD_KW "add" + WHITESPACE " " + OPERATOR_CLASS_OPTION_LIST + OP_CLASS_OPTION + FUNCTION_KW "function" + WHITESPACE " " + LITERAL + INT_NUMBER "1" + WHITESPACE " " + PARAM_LIST + L_PAREN "(" + PARAM + ERROR + VARIADIC_KW "variadic" + R_PAREN ")" + WHITESPACE " " + FUNCTION_SIG + PATH + PATH_SEGMENT + NAME_REF + IDENT "f" + PARAM_LIST + L_PAREN "(" + PARAM + PATH_TYPE + PATH + PATH_SEGMENT + NAME_REF + INT_KW "int" + R_PAREN ")" + SEMICOLON ";" + WHITESPACE "\n" + ALTER_OPERATOR_FAMILY + ALTER_KW "alter" + WHITESPACE " " + OPERATOR_KW "operator" + WHITESPACE " " + FAMILY_KW "family" + WHITESPACE " " + PATH + PATH_SEGMENT + NAME_REF + IDENT "f" + WHITESPACE " " + USING_KW "using" + WHITESPACE " " + NAME_REF + IDENT "btree" + WHITESPACE " " + DROP_OP_CLASS_OPTIONS + DROP_KW "drop" + WHITESPACE " " + DROP_OP_CLASS_OPTION_LIST + DROP_OP_CLASS_OPTION + FUNCTION_KW "function" + WHITESPACE " " + LITERAL + INT_NUMBER "1" + WHITESPACE " " + PARAM_LIST + L_PAREN "(" + PARAM + ERROR + VARIADIC_KW "variadic" + R_PAREN ")" + SEMICOLON ";" + WHITESPACE "\n" +--- +error[syntax-error]: expected type name + ╭▸ +2 │ prepare p (variadic) as select 1; + ╰╴ ━ +error[syntax-error]: expected type name + ╭▸ +3 │ prepare p (in) as select 1; + ╰╴ ━ +error[syntax-error]: expected type name + ╭▸ +4 │ create operator class c for type int using btree as function 1 (variadic) f(int); + ╰╴ ━ +error[syntax-error]: expected type name + ╭▸ +5 │ alter operator family f using btree add function 1 (variadic) f(int); + ╰╴ ━ +error[syntax-error]: expected type name + ╭▸ +6 │ alter operator family f using btree drop function 1 (variadic); + ╰╴ ━