diff --git a/crates/squawk_parser/src/generated/token_sets.rs b/crates/squawk_parser/src/generated/token_sets.rs index c393c45f..5033790d 100644 --- a/crates/squawk_parser/src/generated/token_sets.rs +++ b/crates/squawk_parser/src/generated/token_sets.rs @@ -853,6 +853,99 @@ pub(crate) const TYPE_KEYWORDS: TokenSet = TokenSet::new(&[ SyntaxKind::ZONE_KW, ]); +pub(crate) const COL_NAME_KEYWORD_FIRST: TokenSet = TokenSet::new(&[ + SyntaxKind::BETWEEN_KW, + SyntaxKind::BIGINT_KW, + SyntaxKind::BIT_KW, + SyntaxKind::BOOLEAN_KW, + SyntaxKind::CHAR_KW, + SyntaxKind::CHARACTER_KW, + SyntaxKind::COALESCE_KW, + SyntaxKind::DEC_KW, + SyntaxKind::DECIMAL_KW, + SyntaxKind::EXISTS_KW, + SyntaxKind::EXTRACT_KW, + SyntaxKind::FLOAT_KW, + SyntaxKind::GRAPH_TABLE_KW, + SyntaxKind::GREATEST_KW, + SyntaxKind::GROUPING_KW, + SyntaxKind::INOUT_KW, + SyntaxKind::INT_KW, + SyntaxKind::INTEGER_KW, + SyntaxKind::INTERVAL_KW, + SyntaxKind::JSON_KW, + SyntaxKind::JSON_ARRAY_KW, + SyntaxKind::JSON_ARRAYAGG_KW, + SyntaxKind::JSON_EXISTS_KW, + SyntaxKind::JSON_OBJECT_KW, + SyntaxKind::JSON_OBJECTAGG_KW, + SyntaxKind::JSON_QUERY_KW, + SyntaxKind::JSON_SCALAR_KW, + SyntaxKind::JSON_SERIALIZE_KW, + SyntaxKind::JSON_TABLE_KW, + SyntaxKind::JSON_VALUE_KW, + SyntaxKind::LEAST_KW, + SyntaxKind::MERGE_ACTION_KW, + SyntaxKind::NATIONAL_KW, + SyntaxKind::NCHAR_KW, + SyntaxKind::NONE_KW, + SyntaxKind::NORMALIZE_KW, + SyntaxKind::NULLIF_KW, + SyntaxKind::NUMERIC_KW, + SyntaxKind::OUT_KW, + SyntaxKind::OVERLAY_KW, + SyntaxKind::POSITION_KW, + SyntaxKind::PRECISION_KW, + SyntaxKind::REAL_KW, + SyntaxKind::ROW_KW, + SyntaxKind::SETOF_KW, + SyntaxKind::SMALLINT_KW, + SyntaxKind::SUBSTRING_KW, + SyntaxKind::TIME_KW, + SyntaxKind::TIMESTAMP_KW, + SyntaxKind::TREAT_KW, + SyntaxKind::TRIM_KW, + SyntaxKind::VALUES_KW, + SyntaxKind::VARCHAR_KW, + SyntaxKind::XMLATTRIBUTES_KW, + SyntaxKind::XMLCONCAT_KW, + SyntaxKind::XMLELEMENT_KW, + SyntaxKind::XMLEXISTS_KW, + SyntaxKind::XMLFOREST_KW, + SyntaxKind::XMLNAMESPACES_KW, + SyntaxKind::XMLPARSE_KW, + SyntaxKind::XMLPI_KW, + SyntaxKind::XMLROOT_KW, + SyntaxKind::XMLSERIALIZE_KW, + SyntaxKind::XMLTABLE_KW, +]); + +pub(crate) const TYPE_FUNC_NAME_KEYWORDS: TokenSet = TokenSet::new(&[ + SyntaxKind::AUTHORIZATION_KW, + SyntaxKind::BINARY_KW, + SyntaxKind::COLLATION_KW, + SyntaxKind::CONCURRENTLY_KW, + SyntaxKind::CROSS_KW, + SyntaxKind::CURRENT_SCHEMA_KW, + SyntaxKind::FREEZE_KW, + SyntaxKind::FULL_KW, + SyntaxKind::ILIKE_KW, + SyntaxKind::INNER_KW, + SyntaxKind::IS_KW, + SyntaxKind::ISNULL_KW, + SyntaxKind::JOIN_KW, + SyntaxKind::LEFT_KW, + SyntaxKind::LIKE_KW, + SyntaxKind::NATURAL_KW, + SyntaxKind::NOTNULL_KW, + SyntaxKind::OUTER_KW, + SyntaxKind::OVERLAPS_KW, + SyntaxKind::RIGHT_KW, + SyntaxKind::SIMILAR_KW, + SyntaxKind::TABLESAMPLE_KW, + SyntaxKind::VERBOSE_KW, +]); + pub(crate) const ALL_KEYWORDS: TokenSet = TokenSet::new(&[ SyntaxKind::ABORT_KW, SyntaxKind::ABSENT_KW, diff --git a/crates/squawk_parser/src/grammar.rs b/crates/squawk_parser/src/grammar.rs index c9ca49b8..fef3d757 100644 --- a/crates/squawk_parser/src/grammar.rs +++ b/crates/squawk_parser/src/grammar.rs @@ -4,8 +4,8 @@ use crate::{ CompletedMarker, Marker, Parser, generated::token_sets::{ - ALL_KEYWORDS, BARE_LABEL_KEYWORDS, COLUMN_OR_TABLE_KEYWORDS, RESERVED_KEYWORDS, - TYPE_KEYWORDS, UNRESERVED_KEYWORDS, + ALL_KEYWORDS, BARE_LABEL_KEYWORDS, COL_NAME_KEYWORD_FIRST, COLUMN_OR_TABLE_KEYWORDS, + RESERVED_KEYWORDS, TYPE_FUNC_NAME_KEYWORDS, TYPE_KEYWORDS, UNRESERVED_KEYWORDS, }, syntax_kind::SyntaxKind::{self, *}, token_set::TokenSet, @@ -3061,74 +3061,6 @@ fn opt_from_clause(p: &mut Parser<'_>) -> Option { Some(m.complete(p, FROM_CLAUSE)) } -// https://github.com/postgres/postgres/blob/b3219c69fc1e161df8d380c464b3f2cce3b6cab9/src/backend/parser/gram.y#L18042 -const COL_NAME_KEYWORD_FIRST: TokenSet = TokenSet::new(&[ - BETWEEN_KW, - BIGINT_KW, - BIT_KW, - BOOLEAN_KW, - CHAR_KW, - CHARACTER_KW, - COALESCE_KW, - DEC_KW, - DECIMAL_KW, - EXISTS_KW, - EXTRACT_KW, - FLOAT_KW, - GREATEST_KW, - GRAPH_TABLE_KW, - GROUPING_KW, - INOUT_KW, - INT_KW, - INTEGER_KW, - INTERVAL_KW, - JSON_KW, - JSON_ARRAY_KW, - JSON_ARRAYAGG_KW, - JSON_EXISTS_KW, - JSON_OBJECT_KW, - JSON_OBJECTAGG_KW, - JSON_QUERY_KW, - JSON_SCALAR_KW, - JSON_SERIALIZE_KW, - JSON_TABLE_KW, - JSON_VALUE_KW, - LEAST_KW, - MERGE_ACTION_KW, - NATIONAL_KW, - NCHAR_KW, - NONE_KW, - NORMALIZE_KW, - NULLIF_KW, - NUMERIC_KW, - OUT_KW, - OVERLAY_KW, - POSITION_KW, - PRECISION_KW, - REAL_KW, - ROW_KW, - SETOF_KW, - SMALLINT_KW, - SUBSTRING_KW, - TIME_KW, - TIMESTAMP_KW, - TREAT_KW, - TRIM_KW, - VALUES_KW, - VARCHAR_KW, - XMLATTRIBUTES_KW, - XMLCONCAT_KW, - XMLELEMENT_KW, - XMLEXISTS_KW, - XMLFOREST_KW, - XMLNAMESPACES_KW, - XMLPARSE_KW, - XMLPI_KW, - XMLROOT_KW, - XMLSERIALIZE_KW, - XMLTABLE_KW, -]); - // https://github.com/postgres/postgres/blob/2421e9a51d20bb83154e54a16ce628f9249fa907/src/backend/parser/gram.y#L15798C13-L16258 // Generated via the above grammar, but we only take the keywords that are // single items. So `CURRENT_DATE` but not `COLLATION FOR '(' a_expr ')'` @@ -14165,33 +14097,6 @@ fn opt_param_default(p: &mut Parser<'_>) -> Option { } } -/// see: -const TYPE_FUNC_NAME_KEYWORDS: TokenSet = TokenSet::new(&[ - AUTHORIZATION_KW, - BINARY_KW, - COLLATION_KW, - CONCURRENTLY_KW, - CROSS_KW, - CURRENT_SCHEMA_KW, - FREEZE_KW, - FULL_KW, - ILIKE_KW, - INNER_KW, - IS_KW, - ISNULL_KW, - JOIN_KW, - LEFT_KW, - LIKE_KW, - NATURAL_KW, - NOTNULL_KW, - OUTER_KW, - OVERLAPS_KW, - RIGHT_KW, - SIMILAR_KW, - TABLESAMPLE_KW, - VERBOSE_KW, -]); - const PARAM_FIRST: TokenSet = PARAM_MODE_FIRST.union(NAME_FIRST).union(TYPE_NAME_FIRST); fn opt_param(p: &mut Parser<'_>, kind: ParamKind) -> bool { diff --git a/crates/xtask/src/codegen.rs b/crates/xtask/src/codegen.rs index d138ecc6..dab6b359 100644 --- a/crates/xtask/src/codegen.rs +++ b/crates/xtask/src/codegen.rs @@ -362,6 +362,18 @@ fn generate_token_sets(keyword_kinds: &KeywordKinds) -> Result { .map(|key| format_ident!("{}_KW", key.to_case(Case::UpperSnake))) .collect::>(); + let col_name_keywords = keyword_kinds + .col_name_keywords + .iter() + .map(|key| format_ident!("{}_KW", key.to_case(Case::UpperSnake))) + .collect::>(); + + let type_func_name_keywords = keyword_kinds + .type_func_name_keywords + .iter() + .map(|key| format_ident!("{}_KW", key.to_case(Case::UpperSnake))) + .collect::>(); + let all_keywords = &keyword_kinds .all_keywords .iter() @@ -396,6 +408,14 @@ fn generate_token_sets(keyword_kinds: &KeywordKinds) -> Result { #(SyntaxKind::#type_keywords),* ]); + pub(crate) const COL_NAME_KEYWORD_FIRST: TokenSet = TokenSet::new(&[ + #(SyntaxKind::#col_name_keywords),* + ]); + + pub(crate) const TYPE_FUNC_NAME_KEYWORDS: TokenSet = TokenSet::new(&[ + #(SyntaxKind::#type_func_name_keywords),* + ]); + pub(crate) const ALL_KEYWORDS: TokenSet = TokenSet::new(&[ #(SyntaxKind::#all_keywords),* ]); diff --git a/crates/xtask/src/keywords.rs b/crates/xtask/src/keywords.rs index 48c37ad1..a33f14b3 100644 --- a/crates/xtask/src/keywords.rs +++ b/crates/xtask/src/keywords.rs @@ -122,6 +122,8 @@ pub(crate) struct KeywordKinds { pub(crate) bare_label_keywords: Vec, pub(crate) unreserved_keywords: Vec, pub(crate) reserved_keywords: Vec, + pub(crate) col_name_keywords: Vec, + pub(crate) type_func_name_keywords: Vec, pub(crate) col_table_keywords: Vec, pub(crate) type_keywords: Vec, } @@ -152,6 +154,20 @@ pub(crate) fn keyword_kinds() -> Result { .collect::>(); reserved_keywords.sort(); + let mut col_name_keywords = keywords + .iter() + .filter(|(_key, value)| matches!(value.category, KeywordCategory::ColName)) + .map(|(key, _value)| key.to_owned()) + .collect::>(); + col_name_keywords.sort(); + + let mut type_func_name_keywords = keywords + .iter() + .filter(|(_key, value)| matches!(value.category, KeywordCategory::TypeFuncName)) + .map(|(key, _value)| key.to_owned()) + .collect::>(); + type_func_name_keywords.sort(); + let mut all_keywords = keywords .keys() .map(|key| key.to_owned()) @@ -193,6 +209,8 @@ pub(crate) fn keyword_kinds() -> Result { bare_label_keywords, unreserved_keywords, reserved_keywords, + col_name_keywords, + type_func_name_keywords, col_table_keywords, type_keywords, })