Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions crates/squawk_fmt/src/fmt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -492,6 +492,7 @@ fn build_literal<'a>(lit: ast::Literal) -> Doc<'a> {
| LitKind::ByteString(_)
| LitKind::DollarQuotedString(_)
| LitKind::EscString(_)
| LitKind::NationalString(_)
| LitKind::String(_)
| LitKind::UnicodeEscString(_) => build_string_literal(&lit),
}
Expand Down
8 changes: 5 additions & 3 deletions crates/squawk_ide/src/column_name.rs
Original file line number Diff line number Diff line change
Expand Up @@ -486,9 +486,11 @@ fn name_from_expr(expr: ast::Expr, in_type: bool) -> Option<(ColumnName, SyntaxN
}
}
ast::Expr::Literal(literal) => {
if literal.syntax().first_token().is_some_and(|token| {
token.kind() == SyntaxKind::STRING && token.text().starts_with(['n', 'N'])
}) {
if literal
.syntax()
.first_token()
.is_some_and(|token| token.kind() == SyntaxKind::NATIONAL_STRING)
{
return Some((ColumnName::UnknownColumn(Some("bpchar".to_string())), node));
}
return Some((ColumnName::UnknownColumn(None), node));
Expand Down
15 changes: 15 additions & 0 deletions crates/squawk_ide/src/hover.rs
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ fn hover_literal(literal: &ast::Literal) -> Option<Hover> {
| LitKind::BitString(_)
| LitKind::ByteString(_)
| LitKind::EscString(_)
| LitKind::NationalString(_)
| LitKind::UnicodeEscString(_)
| LitKind::DollarQuotedString(_)
) {
Expand All @@ -170,6 +171,7 @@ fn hover_literal(literal: &ast::Literal) -> Option<Hover> {
LitKind::ByteString(_) => format_bit_value_comment(&value, 16),
LitKind::String(_)
| LitKind::EscString(_)
| LitKind::NationalString(_)
| LitKind::UnicodeEscString(_)
| LitKind::DollarQuotedString(_) => match value.find('\n') {
Some(idx) => {
Expand Down Expand Up @@ -5604,6 +5606,19 @@ select 'foo$0';
");
}

#[test]
fn hover_national_string() {
assert_snapshot!(check_hover_info(r"
select N'fo$0o';
").markdown(), @"
```sql
text
```
---
value of literal: ` foo `
");
}

#[test]
fn hover_plain_string_escaped_quotes() {
assert_snapshot!(check_hover_info(r"
Expand Down
1 change: 1 addition & 0 deletions crates/squawk_ide/src/infer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ pub(crate) fn infer_type_from_literal(literal: &ast::Literal) -> Option<Type> {
SyntaxKind::STRING
| SyntaxKind::DOLLAR_QUOTED_STRING
| SyntaxKind::ESC_STRING
| SyntaxKind::NATIONAL_STRING
| SyntaxKind::UNICODE_ESC_STRING => Some(Type::Text),
SyntaxKind::BIT_STRING | SyntaxKind::BYTE_STRING => Some(Type::Bit),
SyntaxKind::TRUE_KW | SyntaxKind::FALSE_KW => Some(Type::Boolean),
Expand Down
4 changes: 4 additions & 0 deletions crates/squawk_ide/src/literals.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,10 @@ pub(crate) fn literal_string_value(literal: &ast::Literal) -> Option<String> {
out.push_str(inner);
return Some(out);
}
SyntaxKind::NATIONAL_STRING => {
let inner = strip_prefixed_quotes(token.text(), ['n', 'N'])?;
decode_plain_string(inner, &mut out);
}
SyntaxKind::STRING => {
let inner = strip_quotes(token.text())?;
match decoding {
Expand Down
1 change: 1 addition & 0 deletions crates/squawk_ide/src/tokens.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,6 @@ pub(crate) fn is_string_or_comment(kind: SyntaxKind) -> bool {
| SyntaxKind::BIT_STRING
| SyntaxKind::DOLLAR_QUOTED_STRING
| SyntaxKind::ESC_STRING
| SyntaxKind::NATIONAL_STRING
)
}
5 changes: 5 additions & 0 deletions crates/squawk_lexer/src/cursor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,11 @@ impl<'a> Cursor<'a> {
self.chars.as_str().is_empty()
}

/// Returns a clone of the remaining chars, for cheap lookahead.
pub(crate) fn chars(&self) -> Chars<'a> {
self.chars.clone()
}

/// Returns amount of already consumed symbols.
pub(crate) fn pos_within_token(&self) -> u32 {
(self.len_remaining - self.chars.as_str().len()) as u32
Expand Down
112 changes: 98 additions & 14 deletions crates/squawk_lexer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ impl Cursor<'_> {
self.bump();
let terminated = self.single_quoted_string(false);
TokenKind::Literal {
kind: LiteralKind::Str { terminated },
kind: LiteralKind::NationalStr { terminated },
}
}
_ => self.ident(),
Expand Down Expand Up @@ -125,8 +125,7 @@ impl Cursor<'_> {
'?' => TokenKind::Question,
':' => TokenKind::Colon,
'$' => {
// Dollar quoted strings
if is_ident_start(self.first()) || self.first() == '$' {
if self.is_dollar_quote_start() {
self.dollar_quoted_string()
} else {
// Parameters
Expand Down Expand Up @@ -162,7 +161,10 @@ impl Cursor<'_> {
// Quoted indentifiers
'"' => {
let terminated = self.double_quoted_string();
TokenKind::QuotedIdent { terminated }
TokenKind::QuotedIdent {
terminated,
uescape: false,
}
}
_ => TokenKind::Unknown,
};
Expand Down Expand Up @@ -235,7 +237,10 @@ impl Cursor<'_> {
'"' if allows_double => {
self.bump();
let terminated = self.double_quoted_string();
TokenKind::QuotedIdent { terminated }
TokenKind::QuotedIdent {
terminated,
uescape: true,
}
}
_ => self.ident(),
}
Expand Down Expand Up @@ -295,7 +300,10 @@ impl Cursor<'_> {
};

match self.first() {
'.' => self.eat_fractional(),
'.' => {
self.bump();
self.eat_fractional()
}
'e' | 'E' => {
let exponent_start = self.pos_within_token();
self.bump();
Expand Down Expand Up @@ -373,6 +381,28 @@ impl Cursor<'_> {
false
}

/// Check for `$$` and `$tag$`
fn is_dollar_quote_start(&self) -> bool {
let mut chars = self.chars();
match chars.next() {
// `$$...` -- empty tag
Some('$') => true,
// `$tag$...` -- tag chars terminated by `$`
Some(c) if is_ident_start(c) => {
for c in chars {
if c == '$' {
return true;
}
if !is_ident_cont(c) {
return false;
}
}
false
}
_ => false,
}
}

// https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-DOLLAR-QUOTING
fn dollar_quoted_string(&mut self) -> TokenKind {
// Get the start sequence of the dollar quote, i.e., 'foo' in
Expand Down Expand Up @@ -487,11 +517,13 @@ impl Cursor<'_> {
/// Eats the numeric exponent. Returns true if at least one digit was met,
/// and returns false otherwise.
fn eat_numeric_exponent(&mut self) -> bool {
if self.first() == '_' {
return false;
}
if self.first() == '-' || self.first() == '+' {
if !self.second().is_ascii_digit() {
return false;
}
self.bump();
} else if !self.first().is_ascii_digit() {
return false;
}
self.eat_decimal_digits()
}
Expand All @@ -503,9 +535,6 @@ impl Cursor<'_> {
}

pub(crate) fn eat_fractional(&mut self) -> crate::LiteralKind {
// might have stuff after the ., and if it does, it needs to start
// with a number
self.bump();
let mut empty_exponent_start = None;
if self.first().is_ascii_digit() {
self.eat_decimal_digits();
Expand Down Expand Up @@ -715,6 +744,42 @@ $foo$hello$world$bar$
"#))
}

#[test]
fn numeric_leading_dot_with_separators() {
assert_debug_snapshot!(lex(".1_2 .5_5 .1_2e3"), @r#"
[
".1_2" @ Literal { kind: Numeric { empty_exponent_start: None, trailing_junk_start: 4 } },
" " @ Whitespace,
".5_5" @ Literal { kind: Numeric { empty_exponent_start: None, trailing_junk_start: 4 } },
" " @ Whitespace,
".1_2e3" @ Literal { kind: Numeric { empty_exponent_start: None, trailing_junk_start: 6 } },
]
"#)
}

#[test]
fn numeric_exponent_underscore_after_sign() {
assert_debug_snapshot!(lex("1e+_2 1e-_2 1.0e+_2 .1e+_2"), @r#"
[
"1e" @ Literal { kind: Numeric { empty_exponent_start: Some(1), trailing_junk_start: 2 } },
"+" @ Plus,
"_2" @ Ident,
" " @ Whitespace,
"1e" @ Literal { kind: Numeric { empty_exponent_start: Some(1), trailing_junk_start: 2 } },
"-" @ Minus,
"_2" @ Ident,
" " @ Whitespace,
"1.0e" @ Literal { kind: Numeric { empty_exponent_start: Some(3), trailing_junk_start: 4 } },
"+" @ Plus,
"_2" @ Ident,
" " @ Whitespace,
".1e" @ Literal { kind: Numeric { empty_exponent_start: Some(2), trailing_junk_start: 3 } },
"+" @ Plus,
"_2" @ Ident,
]
"#)
}

#[test]
fn select_with_period() {
assert_debug_snapshot!(lex(r#"
Expand All @@ -736,9 +801,9 @@ x'1FF'
fn national_character_string() {
assert_debug_snapshot!(lex("N'foo' n'bar' numeric'1'"), @r#"
[
"N'foo'" @ Literal { kind: Str { terminated: true } },
"N'foo'" @ Literal { kind: NationalStr { terminated: true } },
" " @ Whitespace,
"n'bar'" @ Literal { kind: Str { terminated: true } },
"n'bar'" @ Literal { kind: NationalStr { terminated: true } },
" " @ Whitespace,
"numeric" @ Ident,
"'1'" @ Literal { kind: Str { terminated: true } },
Expand Down Expand Up @@ -902,6 +967,25 @@ U&"d!0061t!+000061" UESCAPE '!'
"#);
}

#[test]
fn unclosed_dollar_tag_does_not_swallow_rest_of_input() {
assert_debug_snapshot!(lex("select $x;\ndrop table users;"), @r#"
[
"select" @ Ident,
" " @ Whitespace,
"$x" @ PositionalParam { trailing_junk_start: 1 },
";" @ Semi,
"\n" @ Whitespace,
"drop" @ Ident,
" " @ Whitespace,
"table" @ Ident,
" " @ Whitespace,
"users" @ Ident,
";" @ Semi,
]
"#);
}

#[test]
fn ident_non_ascii_above_latin1() {
assert_debug_snapshot!(lex("ẞ Ā 漢字 𐐷"), @r#"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ expression: "lex(r#\"\n\"hello &1 -world\";\n\n\n\"hello-world\n\"#)"
---
[
"\n" @ Whitespace,
"\"hello &1 -world\"" @ QuotedIdent { terminated: true },
"\"hello &1 -world\"" @ QuotedIdent { terminated: true, uescape: false },
";" @ Semi,
"\n\n\n" @ Whitespace,
"\"hello-world\n" @ QuotedIdent { terminated: false },
"\"hello-world\n" @ QuotedIdent { terminated: false, uescape: false },
]
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@ expression: "lex(r#\"\n\"foo \"\" bar\"\n\"#)"
---
[
"\n" @ Whitespace,
"\"foo \"\" bar\"" @ QuotedIdent { terminated: true },
"\"foo \"\" bar\"" @ QuotedIdent { terminated: true, uescape: false },
"\n" @ Whitespace,
]
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@ expression: "lex(r#\"\nU&\"d\\0061t\\+000061\"\n\nU&\"\\0441\\043B\\043E\\043D\"
---
[
"\n" @ Whitespace,
"U&\"d\\0061t\\+000061\"" @ QuotedIdent { terminated: true },
"U&\"d\\0061t\\+000061\"" @ QuotedIdent { terminated: true, uescape: true },
"\n\n" @ Whitespace,
"U&\"\\0441\\043B\\043E\\043D\"" @ QuotedIdent { terminated: true },
"U&\"\\0441\\043B\\043E\\043D\"" @ QuotedIdent { terminated: true, uescape: true },
"\n\n" @ Whitespace,
"u&'\\0441\\043B'" @ Literal { kind: UnicodeEscStr { terminated: true } },
"\n\n" @ Whitespace,
"U&\"d!0061t!+000061\"" @ QuotedIdent { terminated: true },
"U&\"d!0061t!+000061\"" @ QuotedIdent { terminated: true, uescape: true },
" " @ Whitespace,
"UESCAPE" @ Ident,
" " @ Whitespace,
Expand Down
4 changes: 3 additions & 1 deletion crates/squawk_lexer/src/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ pub enum TokenKind {
/// These are case-sensitive, unlike [`TokenKind::Ident`]
///
/// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS>
QuotedIdent { terminated: bool },
QuotedIdent { terminated: bool, uescape: bool },
}

/// Parsed token.
Expand Down Expand Up @@ -143,6 +143,8 @@ pub enum LiteralKind {
///
/// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-STRINGS>
Str { terminated: bool },
/// National character string, e.g., `N'foo'`
NationalStr { terminated: bool },
/// Hexidecimal Bit String, e.g., `X'1FF'`
///
/// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-BIT-STRINGS>
Expand Down
1 change: 1 addition & 0 deletions crates/squawk_parser/src/generated/syntax_kind.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 7 additions & 1 deletion crates/squawk_parser/src/grammar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,12 @@ fn literal(p: &mut Parser<'_>) -> Option<CompletedMarker> {
if p.eat(UESCAPE_KW) {
p.expect(STRING);
}
} else if p.eat(STRING) || p.eat(ESC_STRING) || p.eat(BIT_STRING) || p.eat(BYTE_STRING) {
} else if p.eat(NATIONAL_STRING)
|| p.eat(STRING)
|| p.eat(ESC_STRING)
|| p.eat(BIT_STRING)
|| p.eat(BYTE_STRING)
{
while !p.at(EOF) && p.eat(STRING) {}
} else {
p.bump_any();
Expand Down Expand Up @@ -4882,6 +4887,7 @@ const STRING_FIRST: TokenSet = TokenSet::new(&[
BIT_STRING,
DOLLAR_QUOTED_STRING,
ESC_STRING,
NATIONAL_STRING,
]);

// via https://www.postgresql.org/docs/17/sql-createoperator.html
Expand Down
Loading
Loading