From 3c9982c279c183aad10c87a584143696634319f7 Mon Sep 17 00:00:00 2001 From: Varun Date: Thu, 4 Jun 2026 03:20:19 -0700 Subject: [PATCH] Add SQL histogram and date_histogram bucket functions --- .../src/main/antlr4/OpenSearchSQLParser.g4 | 6 + sql/src/main/antlr/OpenSearchSQLParser.g4 | 6 + .../sql/sql/parser/AstExpressionBuilder.java | 15 +- .../parser/bucket/BucketFunctionExpander.java | 22 ++ .../parser/bucket/BucketFunctionRegistry.java | 32 ++ .../parser/bucket/BucketFunctionUtils.java | 44 +++ .../parser/bucket/DateHistogramExpander.java | 132 +++++++ .../sql/parser/bucket/HistogramExpander.java | 71 ++++ .../sql/sql/parser/bucket/NamedArguments.java | 142 +++++++ .../bucket/BucketFunctionRegistryTest.java | 53 +++ .../bucket/BucketFunctionUtilsTest.java | 50 +++ .../bucket/DateHistogramExpanderTest.java | 351 ++++++++++++++++++ .../parser/bucket/HistogramExpanderTest.java | 295 +++++++++++++++ .../sql/parser/bucket/NamedArgumentsTest.java | 221 +++++++++++ 14 files changed, 1439 insertions(+), 1 deletion(-) create mode 100644 sql/src/main/java/org/opensearch/sql/sql/parser/bucket/BucketFunctionExpander.java create mode 100644 sql/src/main/java/org/opensearch/sql/sql/parser/bucket/BucketFunctionRegistry.java create mode 100644 sql/src/main/java/org/opensearch/sql/sql/parser/bucket/BucketFunctionUtils.java create mode 100644 sql/src/main/java/org/opensearch/sql/sql/parser/bucket/DateHistogramExpander.java create mode 100644 sql/src/main/java/org/opensearch/sql/sql/parser/bucket/HistogramExpander.java create mode 100644 sql/src/main/java/org/opensearch/sql/sql/parser/bucket/NamedArguments.java create mode 100644 sql/src/test/java/org/opensearch/sql/sql/parser/bucket/BucketFunctionRegistryTest.java create mode 100644 sql/src/test/java/org/opensearch/sql/sql/parser/bucket/BucketFunctionUtilsTest.java create mode 100644 sql/src/test/java/org/opensearch/sql/sql/parser/bucket/DateHistogramExpanderTest.java create mode 100644 sql/src/test/java/org/opensearch/sql/sql/parser/bucket/HistogramExpanderTest.java create mode 100644 sql/src/test/java/org/opensearch/sql/sql/parser/bucket/NamedArgumentsTest.java diff --git a/language-grammar/src/main/antlr4/OpenSearchSQLParser.g4 b/language-grammar/src/main/antlr4/OpenSearchSQLParser.g4 index 5f7361160b3..4a2ab35a89b 100644 --- a/language-grammar/src/main/antlr4/OpenSearchSQLParser.g4 +++ b/language-grammar/src/main/antlr4/OpenSearchSQLParser.g4 @@ -411,6 +411,12 @@ scalarFunctionName | flowControlFunctionName | systemFunctionName | nestedFunctionName + | bucketFunctionName + ; + +bucketFunctionName + : HISTOGRAM + | DATE_HISTOGRAM ; specificFunction diff --git a/sql/src/main/antlr/OpenSearchSQLParser.g4 b/sql/src/main/antlr/OpenSearchSQLParser.g4 index 492f6dee9c6..8cf5a69a882 100644 --- a/sql/src/main/antlr/OpenSearchSQLParser.g4 +++ b/sql/src/main/antlr/OpenSearchSQLParser.g4 @@ -443,6 +443,12 @@ scalarFunctionName | flowControlFunctionName | systemFunctionName | nestedFunctionName + | bucketFunctionName + ; + +bucketFunctionName + : HISTOGRAM + | DATE_HISTOGRAM ; specificFunction diff --git a/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java b/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java index 273076af40f..4cdd8465525 100644 --- a/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java +++ b/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java @@ -97,6 +97,8 @@ import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.OrExpressionContext; import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.TableNameContext; import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParserBaseVisitor; +import org.opensearch.sql.sql.parser.bucket.BucketFunctionExpander; +import org.opensearch.sql.sql.parser.bucket.BucketFunctionRegistry; /** Expression builder to parse text to expression in AST. */ public class AstExpressionBuilder extends OpenSearchSQLParserBaseVisitor { @@ -139,7 +141,18 @@ public UnresolvedExpression visitNestedAllFunctionCall(NestedAllFunctionCallCont @Override public UnresolvedExpression visitScalarFunctionCall(ScalarFunctionCallContext ctx) { - return buildFunction(ctx.scalarFunctionName().getText(), ctx.functionArgs().functionArg()); + String functionName = ctx.scalarFunctionName().getText(); + List args = + ctx.functionArgs().functionArg().stream() + .map(this::visitFunctionArg) + .collect(Collectors.toList()); + + Optional bucketExpander = BucketFunctionRegistry.lookup(functionName); + if (bucketExpander.isPresent()) { + return bucketExpander.get().expand(args); + } + + return new Function(functionName, args); } @Override diff --git a/sql/src/main/java/org/opensearch/sql/sql/parser/bucket/BucketFunctionExpander.java b/sql/src/main/java/org/opensearch/sql/sql/parser/bucket/BucketFunctionExpander.java new file mode 100644 index 00000000000..d6d2dc2283d --- /dev/null +++ b/sql/src/main/java/org/opensearch/sql/sql/parser/bucket/BucketFunctionExpander.java @@ -0,0 +1,22 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.sql.parser.bucket; + +import java.util.List; +import org.opensearch.sql.ast.expression.UnresolvedExpression; + +/** + * Parse-time expander for a bucket function call. Each implementation lowers calls to one bucket + * function (e.g. {@code histogram}) into standard SQL constructs the rest of the engine already + * understands. + * + *

Implementations are stateless and registered by name in {@link BucketFunctionRegistry}. + */ +public interface BucketFunctionExpander { + + /** Lowers a bucket function call into its bucket-key expression. */ + UnresolvedExpression expand(List args); +} diff --git a/sql/src/main/java/org/opensearch/sql/sql/parser/bucket/BucketFunctionRegistry.java b/sql/src/main/java/org/opensearch/sql/sql/parser/bucket/BucketFunctionRegistry.java new file mode 100644 index 00000000000..e1471597689 --- /dev/null +++ b/sql/src/main/java/org/opensearch/sql/sql/parser/bucket/BucketFunctionRegistry.java @@ -0,0 +1,32 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.sql.parser.bucket; + +import java.util.Locale; +import java.util.Map; +import java.util.Optional; + +/** Lookup table mapping bucket-function names to their {@link BucketFunctionExpander}. */ +public final class BucketFunctionRegistry { + + private static final Map EXPANDERS = + Map.of( + HistogramExpander.FUNCTION_NAME, new HistogramExpander(), + DateHistogramExpander.FUNCTION_NAME, new DateHistogramExpander()); + + private BucketFunctionRegistry() {} + + /** + * Returns the expander for {@code functionName} (case-insensitive), or empty if not a bucket + * function. + */ + public static Optional lookup(String functionName) { + if (functionName == null) { + return Optional.empty(); + } + return Optional.ofNullable(EXPANDERS.get(functionName.toUpperCase(Locale.ROOT))); + } +} diff --git a/sql/src/main/java/org/opensearch/sql/sql/parser/bucket/BucketFunctionUtils.java b/sql/src/main/java/org/opensearch/sql/sql/parser/bucket/BucketFunctionUtils.java new file mode 100644 index 00000000000..850d7ba92be --- /dev/null +++ b/sql/src/main/java/org/opensearch/sql/sql/parser/bucket/BucketFunctionUtils.java @@ -0,0 +1,44 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.sql.parser.bucket; + +import java.util.List; +import org.opensearch.sql.ast.dsl.AstDSL; +import org.opensearch.sql.ast.expression.DataType; +import org.opensearch.sql.ast.expression.Function; +import org.opensearch.sql.ast.expression.Literal; +import org.opensearch.sql.ast.expression.QualifiedName; +import org.opensearch.sql.ast.expression.UnresolvedExpression; + +/** + * Shared parameter helpers for bucket-function expanders. Operates on values pulled from a {@link + * NamedArguments} or from a positional argument list. + */ +final class BucketFunctionUtils { + + private BucketFunctionUtils() {} + + /** + * Named-argument form accepts string-literal field names ({@code 'field'='age'}). Coerce them to + * {@link QualifiedName} so downstream sees a column reference regardless of how the user spelled + * it. + */ + static UnresolvedExpression normalizeFieldRef(UnresolvedExpression expr) { + if (expr instanceof Literal lit && lit.getType() == DataType.STRING) { + return AstDSL.qualifiedName(lit.getValue().toString()); + } + return expr; + } + + /** If {@code missingOrNull} is non-null, wrap field with {@code COALESCE(field, missing)}. */ + static UnresolvedExpression applyMissing( + UnresolvedExpression field, UnresolvedExpression missingOrNull) { + if (missingOrNull == null) { + return field; + } + return new Function("coalesce", List.of(field, missingOrNull)); + } +} diff --git a/sql/src/main/java/org/opensearch/sql/sql/parser/bucket/DateHistogramExpander.java b/sql/src/main/java/org/opensearch/sql/sql/parser/bucket/DateHistogramExpander.java new file mode 100644 index 00000000000..d9cd7a098c2 --- /dev/null +++ b/sql/src/main/java/org/opensearch/sql/sql/parser/bucket/DateHistogramExpander.java @@ -0,0 +1,132 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.sql.parser.bucket; + +import static org.opensearch.sql.sql.parser.bucket.BucketFunctionUtils.applyMissing; +import static org.opensearch.sql.sql.parser.bucket.BucketFunctionUtils.normalizeFieldRef; + +import java.time.ZoneOffset; +import java.util.List; +import java.util.Objects; +import java.util.stream.Stream; +import org.opensearch.sql.ast.dsl.AstDSL; +import org.opensearch.sql.ast.expression.Function; +import org.opensearch.sql.ast.expression.Literal; +import org.opensearch.sql.ast.expression.Span; +import org.opensearch.sql.ast.expression.UnresolvedExpression; +import org.opensearch.sql.exception.SemanticCheckException; + +/** + * Lowers {@code date_histogram(...)} calls to a {@link Span} expression with the time unit inferred + * from the interval string. Optional parameters wrap the bucket key: + * + *

    + *
  • {@code missing} — wraps the field with {@code COALESCE(field, missing)} before bucketing. + *
  • {@code time_zone} — shifts the field with {@code TIMESTAMPADD(SECOND, offset, field)} + * before bucketing. Validated as a {@link java.time.ZoneOffset} at parse time. + *
  • {@code format} — wraps the bucket with {@code DATE_FORMAT(span, format)}. + *
+ * + *

{@code interval}, {@code fixed_interval}, and {@code calendar_interval} are accepted as + * mutually-exclusive syntactic synonyms; this lowering does not preserve the calendar-vs-fixed + * distinction across them. + * + *

TODO: V1 also accepts the following parameters; they are currently rejected: + * + *

    + *
  • {@code min_doc_count} — would lower to {@code HAVING COUNT(*) >= N}. Needs parser-side + * plumbing to inject a HAVING clause from inside a scalar function call. + *
  • {@code order} — would lower to {@code ORDER BY}. Same plumbing requirement as above. + *
  • {@code alias} — would set the surrounding SELECT-list alias. Needs reaching outside the + * function call to mutate the parent SELECT element. + *
  • {@code offset} — would shift bucket boundaries via {@code TIMESTAMPADD(SECOND, -offset, + * field)} before bucketing and {@code TIMESTAMPADD(SECOND, offset, span)} after. Needs a + * duration-string parser ({@code '1h'}, {@code '2d'}, etc.) distinct from {@code time_zone}'s + * {@code ZoneOffset} format. + *
+ */ +final class DateHistogramExpander implements BucketFunctionExpander { + + static final String FUNCTION_NAME = "DATE_HISTOGRAM"; + + @Override + public UnresolvedExpression expand(List args) { + if (!NamedArguments.isNamedArguments(args)) { + throw new SemanticCheckException( + "date_histogram requires named arguments: date_histogram('field'=," + + " 'interval'=)"); + } + NamedArguments named = NamedArguments.parse(args); + UnresolvedExpression field = named.require("field", FUNCTION_NAME); + Literal intervalLiteral = extractIntervalLiteral(named); + Literal formatLiteral = named.requireStringIfPresent("format"); + Literal timeZoneLiteral = named.requireStringIfPresent("time_zone"); + UnresolvedExpression missing = named.remove("missing"); + named.rejectRemaining(FUNCTION_NAME); + return buildBucket(field, intervalLiteral, formatLiteral, timeZoneLiteral, missing); + } + + /** + * Pulls the interval from the named arguments accepting any of {@code interval}, {@code + * fixed_interval}, {@code calendar_interval}. Exactly one must be present. + */ + private static Literal extractIntervalLiteral(NamedArguments named) { + Literal interval = named.requireStringIfPresent("interval"); + Literal fixedInterval = named.requireStringIfPresent("fixed_interval"); + Literal calendarInterval = named.requireStringIfPresent("calendar_interval"); + + List suppliedIntervals = + Stream.of(interval, fixedInterval, calendarInterval).filter(Objects::nonNull).toList(); + + if (suppliedIntervals.isEmpty()) { + throw new SemanticCheckException( + "date_histogram requires one of: interval, fixed_interval, calendar_interval"); + } + if (suppliedIntervals.size() > 1) { + throw new SemanticCheckException( + "date_histogram accepts only one of: interval, fixed_interval, calendar_interval"); + } + return suppliedIntervals.get(0); + } + + private static UnresolvedExpression buildBucket( + UnresolvedExpression field, + Literal intervalLiteral, + Literal formatLiteral, + Literal timeZoneLiteral, + UnresolvedExpression missingOrNull) { + UnresolvedExpression resolvedField = applyMissing(normalizeFieldRef(field), missingOrNull); + UnresolvedExpression shiftedField = + timeZoneLiteral != null + ? applyTimeZoneShift(resolvedField, timeZoneLiteral) + : resolvedField; + Span span = AstDSL.spanFromSpanLengthLiteral(shiftedField, intervalLiteral); + if (formatLiteral == null) { + return span; + } + return new Function("date_format", List.of(span, formatLiteral)); + } + + /** + * Wraps the field with a {@code TIMESTAMPADD(SECOND, offset, field)} shift derived from a + * timezone literal. Validates the literal at parse time as a {@link ZoneOffset} (e.g. {@code + * '+05:30'}, {@code 'Z'}); runtime arithmetic is plain second addition. + */ + private static UnresolvedExpression applyTimeZoneShift( + UnresolvedExpression field, Literal timeZoneLiteral) { + String tzString = timeZoneLiteral.getValue().toString(); + int offsetSeconds; + try { + offsetSeconds = ZoneOffset.of(tzString).getTotalSeconds(); + } catch (RuntimeException ex) { + throw new SemanticCheckException( + "time_zone must be a valid offset like '+05:30' or 'Z'; got '" + tzString + "'"); + } + return new Function( + "timestampadd", + List.of(AstDSL.stringLiteral("SECOND"), AstDSL.intLiteral(offsetSeconds), field)); + } +} diff --git a/sql/src/main/java/org/opensearch/sql/sql/parser/bucket/HistogramExpander.java b/sql/src/main/java/org/opensearch/sql/sql/parser/bucket/HistogramExpander.java new file mode 100644 index 00000000000..8f89d0defba --- /dev/null +++ b/sql/src/main/java/org/opensearch/sql/sql/parser/bucket/HistogramExpander.java @@ -0,0 +1,71 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.sql.parser.bucket; + +import static org.opensearch.sql.sql.parser.bucket.BucketFunctionUtils.applyMissing; +import static org.opensearch.sql.sql.parser.bucket.BucketFunctionUtils.normalizeFieldRef; + +import java.util.List; +import org.opensearch.sql.ast.dsl.AstDSL; +import org.opensearch.sql.ast.expression.Function; +import org.opensearch.sql.ast.expression.Span; +import org.opensearch.sql.ast.expression.SpanUnit; +import org.opensearch.sql.ast.expression.UnresolvedExpression; +import org.opensearch.sql.exception.SemanticCheckException; + +/** + * Lowers {@code histogram(...)} calls to a {@link Span} expression with {@code SpanUnit.NONE}. + * Optional parameters wrap the bucket key: + * + *
    + *
  • {@code missing} — wraps the field with {@code COALESCE(field, missing)} before bucketing. + *
  • {@code offset} — wraps as {@code +(Span(-(field, offset), interval, NONE), offset)} to + * preserve the standard {@code [k*interval+offset, (k+1)*interval+offset)} boundaries. + *
+ * + *

TODO: V1 also accepts the following parameters; they are currently rejected: + * + *

    + *
  • {@code min_doc_count} — would lower to {@code HAVING COUNT(*) >= N}. Needs parser-side + * plumbing to inject a HAVING clause from inside a scalar function call. + *
  • {@code order} — would lower to {@code ORDER BY}. Same plumbing requirement as above. + *
  • {@code alias} — would set the surrounding SELECT-list alias. Needs reaching outside the + * function call to mutate the parent SELECT element. + *
+ */ +final class HistogramExpander implements BucketFunctionExpander { + + static final String FUNCTION_NAME = "HISTOGRAM"; + + @Override + public UnresolvedExpression expand(List args) { + if (!NamedArguments.isNamedArguments(args)) { + throw new SemanticCheckException( + "histogram requires named arguments: histogram('field'=, 'interval'=)"); + } + NamedArguments named = NamedArguments.parse(args); + UnresolvedExpression field = named.require("field", FUNCTION_NAME); + UnresolvedExpression interval = named.require("interval", FUNCTION_NAME); + UnresolvedExpression offset = named.remove("offset"); + UnresolvedExpression missing = named.remove("missing"); + named.rejectRemaining(FUNCTION_NAME); + return buildBucket(field, interval, offset, missing); + } + + private static UnresolvedExpression buildBucket( + UnresolvedExpression field, + UnresolvedExpression interval, + UnresolvedExpression offsetOrNull, + UnresolvedExpression missingOrNull) { + UnresolvedExpression resolvedField = applyMissing(normalizeFieldRef(field), missingOrNull); + if (offsetOrNull == null) { + return AstDSL.span(resolvedField, interval, SpanUnit.NONE); + } + UnresolvedExpression shifted = new Function("-", List.of(resolvedField, offsetOrNull)); + Span bucket = (Span) AstDSL.span(shifted, interval, SpanUnit.NONE); + return new Function("+", List.of(bucket, offsetOrNull)); + } +} diff --git a/sql/src/main/java/org/opensearch/sql/sql/parser/bucket/NamedArguments.java b/sql/src/main/java/org/opensearch/sql/sql/parser/bucket/NamedArguments.java new file mode 100644 index 00000000000..5a1ced9a949 --- /dev/null +++ b/sql/src/main/java/org/opensearch/sql/sql/parser/bucket/NamedArguments.java @@ -0,0 +1,142 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.sql.parser.bucket; + +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Set; +import org.opensearch.sql.ast.expression.DataType; +import org.opensearch.sql.ast.expression.Function; +import org.opensearch.sql.ast.expression.Literal; +import org.opensearch.sql.ast.expression.UnresolvedExpression; +import org.opensearch.sql.exception.SemanticCheckException; + +/** + * Parses and validates named-argument style function arguments. The arg shape is {@code + * Function("=", [StringLiteral(key), value])} — what ANTLR produces for {@code 'key'=value}. Keys + * are lower-cased on parse; iteration order matches source order. + * + *

Drain semantics. Every extraction method ({@code require}, {@code remove}, {@code + * requireString}, {@code requireStringIfPresent}, {@code rejectIfPresent}, {@code consumeSilently}) + * removes its key from the collection. After the caller has extracted everything it recognizes, + * {@code rejectRemaining} sweeps what is left and treats those keys as unknown parameters — so + * extracted keys must drain out, otherwise they would be re-rejected. + */ +public final class NamedArguments { + + private final Map arguments; + + private NamedArguments(Map arguments) { + this.arguments = arguments; + } + + /** True iff every arg is a {@code 'key'=value} key-value pair. Empty list returns false. */ + public static boolean isNamedArguments(List args) { + if (args.isEmpty()) { + return false; + } + return args.stream().allMatch(NamedArguments::isKeyValuePair); + } + + private static boolean isKeyValuePair(UnresolvedExpression arg) { + if (!(arg instanceof Function fn) || !"=".equals(fn.getFuncName())) { + return false; + } + if (fn.getFuncArgs().size() != 2) { + return false; + } + return fn.getFuncArgs().get(0) instanceof Literal keyLiteral + && keyLiteral.getType() == DataType.STRING; + } + + /** + * Parses the given args into a {@code NamedArguments}. Each arg must match the {@code + * 'key'=value} shape — a non-matching arg raises {@link SemanticCheckException}. Duplicate keys + * also raise {@link SemanticCheckException}. + */ + public static NamedArguments parse(List args) { + Map arguments = new LinkedHashMap<>(); + for (UnresolvedExpression arg : args) { + if (!isKeyValuePair(arg)) { + throw new SemanticCheckException("Named arguments must be of form 'key'=value; got " + arg); + } + Function fn = (Function) arg; + Literal keyLiteral = (Literal) fn.getFuncArgs().get(0); + String key = keyLiteral.getValue().toString().toLowerCase(Locale.ROOT); + UnresolvedExpression value = fn.getFuncArgs().get(1); + if (arguments.put(key, value) != null) { + throw new SemanticCheckException("Duplicate parameter: " + key); + } + } + return new NamedArguments(arguments); + } + + /** Removes and returns the value for {@code key}, or {@code null} if not present. */ + public UnresolvedExpression remove(String key) { + return arguments.remove(key); + } + + /** Removes and returns the value for {@code key}; throws if absent. */ + public UnresolvedExpression require(String key, String funcName) { + UnresolvedExpression value = arguments.remove(key); + if (value == null) { + throw new SemanticCheckException( + funcName.toLowerCase(Locale.ROOT) + " requires " + key + " parameter"); + } + return value; + } + + /** As {@link #require}, additionally enforcing string-literal type. */ + public Literal requireString(String key, String funcName) { + return asStringLiteral(require(key, funcName), key); + } + + /** As {@link #remove}, additionally enforcing string-literal type when present. */ + public Literal requireStringIfPresent(String key) { + UnresolvedExpression value = arguments.remove(key); + return value == null ? null : asStringLiteral(value, key); + } + + private static Literal asStringLiteral(UnresolvedExpression expr, String paramName) { + if (!(expr instanceof Literal literal) || literal.getType() != DataType.STRING) { + throw new SemanticCheckException( + paramName + " must be a string literal (e.g. '1d', '15m'); got " + expr); + } + return literal; + } + + /** If {@code key} is present, throws with the supplied message; otherwise no-op. */ + public void rejectIfPresent(String key, String message) { + if (arguments.remove(key) != null) { + throw new SemanticCheckException(message); + } + } + + /** Drops the listed keys without inspecting their values. */ + public void consumeSilently(Set keys) { + for (String key : keys) { + arguments.remove(key); + } + } + + /** Treats any keys still remaining as unsupported parameters. Call last. */ + public void rejectRemaining(String funcName) { + if (arguments.isEmpty()) { + return; + } + String label = arguments.size() == 1 ? "parameter" : "parameters"; + String unsupported = String.join(", ", arguments.keySet()); + throw new SemanticCheckException( + funcName.toLowerCase(Locale.ROOT) + " does not accept " + label + ": " + unsupported); + } + + /** Number of unconsumed keys. Primarily for tests. */ + int size() { + return arguments.size(); + } +} diff --git a/sql/src/test/java/org/opensearch/sql/sql/parser/bucket/BucketFunctionRegistryTest.java b/sql/src/test/java/org/opensearch/sql/sql/parser/bucket/BucketFunctionRegistryTest.java new file mode 100644 index 00000000000..9dc5acf2572 --- /dev/null +++ b/sql/src/test/java/org/opensearch/sql/sql/parser/bucket/BucketFunctionRegistryTest.java @@ -0,0 +1,53 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.sql.parser.bucket; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertInstanceOf; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.Optional; +import org.junit.jupiter.api.DisplayNameGeneration; +import org.junit.jupiter.api.DisplayNameGenerator; +import org.junit.jupiter.api.Test; + +@DisplayNameGeneration(DisplayNameGenerator.ReplaceUnderscores.class) +class BucketFunctionRegistryTest { + + @Test + void lookup_returns_HistogramExpander_for_HISTOGRAM() { + Optional expander = BucketFunctionRegistry.lookup("HISTOGRAM"); + assertTrue(expander.isPresent()); + assertInstanceOf(HistogramExpander.class, expander.get()); + } + + @Test + void lookup_returns_DateHistogramExpander_for_DATE_HISTOGRAM() { + Optional expander = BucketFunctionRegistry.lookup("DATE_HISTOGRAM"); + assertTrue(expander.isPresent()); + assertInstanceOf(DateHistogramExpander.class, expander.get()); + } + + @Test + void lookup_is_case_insensitive() { + assertTrue(BucketFunctionRegistry.lookup("histogram").isPresent()); + assertTrue(BucketFunctionRegistry.lookup("Histogram").isPresent()); + assertTrue(BucketFunctionRegistry.lookup("date_histogram").isPresent()); + assertTrue(BucketFunctionRegistry.lookup("Date_Histogram").isPresent()); + } + + @Test + void lookup_returns_empty_for_unknown_function() { + assertFalse(BucketFunctionRegistry.lookup("range").isPresent()); + assertFalse(BucketFunctionRegistry.lookup("SUM").isPresent()); + assertFalse(BucketFunctionRegistry.lookup("FLOOR").isPresent()); + } + + @Test + void lookup_returns_empty_for_null() { + assertFalse(BucketFunctionRegistry.lookup(null).isPresent()); + } +} diff --git a/sql/src/test/java/org/opensearch/sql/sql/parser/bucket/BucketFunctionUtilsTest.java b/sql/src/test/java/org/opensearch/sql/sql/parser/bucket/BucketFunctionUtilsTest.java new file mode 100644 index 00000000000..b19e8a89466 --- /dev/null +++ b/sql/src/test/java/org/opensearch/sql/sql/parser/bucket/BucketFunctionUtilsTest.java @@ -0,0 +1,50 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.sql.parser.bucket; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertSame; + +import java.util.List; +import org.junit.jupiter.api.DisplayNameGeneration; +import org.junit.jupiter.api.DisplayNameGenerator; +import org.junit.jupiter.api.Test; +import org.opensearch.sql.ast.dsl.AstDSL; +import org.opensearch.sql.ast.expression.Function; +import org.opensearch.sql.ast.expression.QualifiedName; +import org.opensearch.sql.ast.expression.UnresolvedExpression; + +@DisplayNameGeneration(DisplayNameGenerator.ReplaceUnderscores.class) +class BucketFunctionUtilsTest { + + @Test + void normalizeFieldRef_string_literal_becomes_qualified_name() { + UnresolvedExpression result = + BucketFunctionUtils.normalizeFieldRef(AstDSL.stringLiteral("age")); + assertEquals(AstDSL.qualifiedName("age"), result); + } + + @Test + void normalizeFieldRef_qualified_name_passes_through_unchanged() { + QualifiedName input = AstDSL.qualifiedName("age"); + assertSame(input, BucketFunctionUtils.normalizeFieldRef(input)); + } + + @Test + void applyMissing_null_returns_field_unchanged() { + QualifiedName field = AstDSL.qualifiedName("age"); + assertSame(field, BucketFunctionUtils.applyMissing(field, null)); + } + + @Test + void applyMissing_non_null_wraps_with_coalesce() { + QualifiedName field = AstDSL.qualifiedName("age"); + UnresolvedExpression missing = AstDSL.intLiteral(0); + assertEquals( + new Function("coalesce", List.of(field, missing)), + BucketFunctionUtils.applyMissing(field, missing)); + } +} diff --git a/sql/src/test/java/org/opensearch/sql/sql/parser/bucket/DateHistogramExpanderTest.java b/sql/src/test/java/org/opensearch/sql/sql/parser/bucket/DateHistogramExpanderTest.java new file mode 100644 index 00000000000..5a71a612897 --- /dev/null +++ b/sql/src/test/java/org/opensearch/sql/sql/parser/bucket/DateHistogramExpanderTest.java @@ -0,0 +1,351 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.sql.parser.bucket; + +import static java.util.Collections.emptyList; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import com.google.common.collect.ImmutableList; +import java.util.List; +import org.junit.jupiter.api.DisplayNameGeneration; +import org.junit.jupiter.api.DisplayNameGenerator; +import org.junit.jupiter.api.Test; +import org.opensearch.sql.ast.dsl.AstDSL; +import org.opensearch.sql.ast.expression.AllFields; +import org.opensearch.sql.ast.expression.Function; +import org.opensearch.sql.ast.expression.QualifiedName; +import org.opensearch.sql.ast.expression.Span; +import org.opensearch.sql.ast.expression.SpanUnit; +import org.opensearch.sql.ast.expression.UnresolvedExpression; +import org.opensearch.sql.ast.tree.UnresolvedPlan; +import org.opensearch.sql.exception.SemanticCheckException; +import org.opensearch.sql.sql.parser.AstBuilderTestBase; + +@DisplayNameGeneration(DisplayNameGenerator.ReplaceUnderscores.class) +class DateHistogramExpanderTest extends AstBuilderTestBase { + + private final DateHistogramExpander expander = new DateHistogramExpander(); + + @Test + void rejects_positional_invocation_with_clear_message() { + SemanticCheckException ex = + assertThrows( + SemanticCheckException.class, + () -> expander.expand(List.of(AstDSL.qualifiedName("ts"), AstDSL.stringLiteral("1d")))); + assertTrue(ex.getMessage().contains("named arguments")); + assertTrue(ex.getMessage().contains("date_histogram")); + } + + @Test + void property_bag_with_interval_param_lowers_to_span() { + UnresolvedExpression result = + expander.expand( + List.of( + kv("field", AstDSL.stringLiteral("ts")), + kv("interval", AstDSL.stringLiteral("1d")))); + + assertEquals(new Span(AstDSL.qualifiedName("ts"), AstDSL.intLiteral(1), SpanUnit.D), result); + } + + @Test + void property_bag_with_qualified_name_field_passes_through_unchanged() { + QualifiedName ts = AstDSL.qualifiedName("ts"); + UnresolvedExpression result = + expander.expand(List.of(kv("field", ts), kv("interval", AstDSL.stringLiteral("1d")))); + + assertEquals(new Span(ts, AstDSL.intLiteral(1), SpanUnit.D), result); + } + + @Test + void property_bag_with_fixed_interval_param_lowers_to_span() { + UnresolvedExpression result = + expander.expand( + List.of( + kv("field", AstDSL.stringLiteral("ts")), + kv("fixed_interval", AstDSL.stringLiteral("15m")))); + + assertEquals(new Span(AstDSL.qualifiedName("ts"), AstDSL.intLiteral(15), SpanUnit.m), result); + } + + @Test + void property_bag_with_calendar_interval_param_lowers_to_span() { + UnresolvedExpression result = + expander.expand( + List.of( + kv("field", AstDSL.stringLiteral("ts")), + kv("calendar_interval", AstDSL.stringLiteral("1d")))); + + assertEquals(new Span(AstDSL.qualifiedName("ts"), AstDSL.intLiteral(1), SpanUnit.D), result); + } + + @Test + void property_bag_rejects_both_interval_and_fixed_interval() { + assertThrows( + SemanticCheckException.class, + () -> + expander.expand( + List.of( + kv("field", AstDSL.stringLiteral("ts")), + kv("interval", AstDSL.stringLiteral("1d")), + kv("fixed_interval", AstDSL.stringLiteral("15m"))))); + } + + @Test + void property_bag_format_wraps_with_date_format() { + UnresolvedExpression result = + expander.expand( + List.of( + kv("field", AstDSL.stringLiteral("ts")), + kv("interval", AstDSL.stringLiteral("1d")), + kv("format", AstDSL.stringLiteral("yyyy-MM-dd")))); + + Span innerSpan = new Span(AstDSL.qualifiedName("ts"), AstDSL.intLiteral(1), SpanUnit.D); + Function expected = + new Function("date_format", List.of(innerSpan, AstDSL.stringLiteral("yyyy-MM-dd"))); + assertEquals(expected, result); + } + + @Test + void property_bag_time_zone_wraps_field_with_timestampadd() { + UnresolvedExpression result = + expander.expand( + List.of( + kv("field", AstDSL.stringLiteral("ts")), + kv("interval", AstDSL.stringLiteral("1d")), + kv("time_zone", AstDSL.stringLiteral("+05:30")))); + + // +05:30 = 5*3600 + 30*60 = 19800 seconds + Function shiftedField = + new Function( + "timestampadd", + List.of( + AstDSL.stringLiteral("SECOND"), + AstDSL.intLiteral(19800), + AstDSL.qualifiedName("ts"))); + Span expected = new Span(shiftedField, AstDSL.intLiteral(1), SpanUnit.D); + assertEquals(expected, result); + } + + @Test + void property_bag_format_and_time_zone_compose() { + UnresolvedExpression result = + expander.expand( + List.of( + kv("field", AstDSL.stringLiteral("ts")), + kv("interval", AstDSL.stringLiteral("1d")), + kv("format", AstDSL.stringLiteral("yyyy")), + kv("time_zone", AstDSL.stringLiteral("Z")))); + + // Z = 0 offset + Function shiftedField = + new Function( + "timestampadd", + List.of( + AstDSL.stringLiteral("SECOND"), AstDSL.intLiteral(0), AstDSL.qualifiedName("ts"))); + Span innerSpan = new Span(shiftedField, AstDSL.intLiteral(1), SpanUnit.D); + Function expected = + new Function("date_format", List.of(innerSpan, AstDSL.stringLiteral("yyyy"))); + assertEquals(expected, result); + } + + @Test + void property_bag_rejects_invalid_time_zone() { + SemanticCheckException ex = + assertThrows( + SemanticCheckException.class, + () -> + expander.expand( + List.of( + kv("field", AstDSL.stringLiteral("ts")), + kv("interval", AstDSL.stringLiteral("1d")), + kv("time_zone", AstDSL.stringLiteral("not-a-tz"))))); + assertTrue(ex.getMessage().contains("time_zone")); + } + + @Test + void property_bag_rejects_alias() { + assertThrows( + SemanticCheckException.class, + () -> + expander.expand( + List.of( + kv("field", AstDSL.stringLiteral("ts")), + kv("interval", AstDSL.stringLiteral("1d")), + kv("alias", AstDSL.stringLiteral("my_label"))))); + } + + @Test + void property_bag_rejects_nested() { + assertThrows( + SemanticCheckException.class, + () -> + expander.expand( + List.of( + kv("field", AstDSL.stringLiteral("ts")), + kv("interval", AstDSL.stringLiteral("1d")), + kv("nested", AstDSL.stringLiteral("path"))))); + } + + @Test + void property_bag_rejects_reverse_nested() { + assertThrows( + SemanticCheckException.class, + () -> + expander.expand( + List.of( + kv("field", AstDSL.stringLiteral("ts")), + kv("interval", AstDSL.stringLiteral("1d")), + kv("reverse_nested", AstDSL.stringLiteral("path"))))); + } + + @Test + void property_bag_rejects_children() { + assertThrows( + SemanticCheckException.class, + () -> + expander.expand( + List.of( + kv("field", AstDSL.stringLiteral("ts")), + kv("interval", AstDSL.stringLiteral("1d")), + kv("children", AstDSL.stringLiteral("ignored"))))); + } + + @Test + void property_bag_missing_wraps_field_with_coalesce() { + UnresolvedExpression result = + expander.expand( + List.of( + kv("field", AstDSL.stringLiteral("ts")), + kv("interval", AstDSL.stringLiteral("1d")), + kv("missing", AstDSL.stringLiteral("2024-01-01")))); + + Function coalesced = + new Function( + "coalesce", List.of(AstDSL.qualifiedName("ts"), AstDSL.stringLiteral("2024-01-01"))); + assertEquals(new Span(coalesced, AstDSL.intLiteral(1), SpanUnit.D), result); + } + + @Test + void property_bag_rejects_offset() { + SemanticCheckException ex = + assertThrows( + SemanticCheckException.class, + () -> + expander.expand( + List.of( + kv("field", AstDSL.stringLiteral("ts")), + kv("interval", AstDSL.stringLiteral("1d")), + kv("offset", AstDSL.stringLiteral("1h"))))); + assertTrue(ex.getMessage().contains("offset")); + assertTrue(ex.getMessage().contains("does not accept")); + } + + @Test + void property_bag_rejects_min_doc_count() { + assertThrows( + SemanticCheckException.class, + () -> + expander.expand( + List.of( + kv("field", AstDSL.stringLiteral("ts")), + kv("interval", AstDSL.stringLiteral("1d")), + kv("min_doc_count", AstDSL.intLiteral(5))))); + } + + @Test + void property_bag_rejects_extended_bounds() { + assertThrows( + SemanticCheckException.class, + () -> + expander.expand( + List.of( + kv("field", AstDSL.stringLiteral("ts")), + kv("interval", AstDSL.stringLiteral("1d")), + kv("extended_bounds", AstDSL.stringLiteral("a:b"))))); + } + + @Test + void property_bag_rejects_unknown_param() { + SemanticCheckException ex = + assertThrows( + SemanticCheckException.class, + () -> + expander.expand( + List.of( + kv("field", AstDSL.stringLiteral("ts")), + kv("interval", AstDSL.stringLiteral("1d")), + kv("missing_param", AstDSL.stringLiteral("foo"))))); + assertTrue(ex.getMessage().contains("missing_param")); + } + + @Test + void property_bag_rejects_duplicate_keys() { + assertThrows( + SemanticCheckException.class, + () -> + expander.expand( + List.of( + kv("field", AstDSL.stringLiteral("ts")), + kv("field", AstDSL.stringLiteral("created_at")), + kv("interval", AstDSL.stringLiteral("1d"))))); + } + + @Test + void property_bag_rejects_missing_field() { + assertThrows( + SemanticCheckException.class, + () -> expander.expand(List.of(kv("interval", AstDSL.stringLiteral("1d"))))); + } + + @Test + void property_bag_rejects_when_no_interval_synonym_provided() { + SemanticCheckException ex = + assertThrows( + SemanticCheckException.class, + () -> expander.expand(List.of(kv("field", AstDSL.stringLiteral("ts"))))); + assertTrue(ex.getMessage().contains("requires one of")); + } + + @Test + void via_sql_with_interval_param_lowers_to_span() { + QualifiedName ts = AstDSL.qualifiedName("ts"); + Span bucket = AstDSL.span(ts, AstDSL.intLiteral(1), SpanUnit.D); + + UnresolvedPlan result = + buildAST( + "SELECT date_histogram('field'='ts', 'interval'='1d'), COUNT(*) FROM events " + + "GROUP BY date_histogram('field'='ts', 'interval'='1d')"); + + assertEquals( + AstDSL.project( + AstDSL.agg( + AstDSL.relation("events"), + ImmutableList.of( + AstDSL.alias("COUNT(*)", AstDSL.aggregate("COUNT", AllFields.of()))), + emptyList(), + ImmutableList.of(AstDSL.alias(bucket.toString(), bucket)), + emptyList()), + AstDSL.alias("date_histogram('field'='ts', 'interval'='1d')", bucket), + AstDSL.alias("COUNT(*)", AstDSL.aggregate("COUNT", AllFields.of()))), + result); + } + + @Test + void via_sql_rejects_positional_invocation() { + assertThrows( + SemanticCheckException.class, + () -> + buildAST( + "SELECT date_histogram(ts, '1d') FROM events GROUP BY date_histogram(ts, '1d')")); + } + + /** Builds a Function("=", [stringLiteral(key), value]) — same shape ANTLR produces for 'k'=v. */ + private static UnresolvedExpression kv(String key, UnresolvedExpression value) { + return new Function("=", List.of(AstDSL.stringLiteral(key), value)); + } +} diff --git a/sql/src/test/java/org/opensearch/sql/sql/parser/bucket/HistogramExpanderTest.java b/sql/src/test/java/org/opensearch/sql/sql/parser/bucket/HistogramExpanderTest.java new file mode 100644 index 00000000000..8e2695f9015 --- /dev/null +++ b/sql/src/test/java/org/opensearch/sql/sql/parser/bucket/HistogramExpanderTest.java @@ -0,0 +1,295 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.sql.parser.bucket; + +import static java.util.Collections.emptyList; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import com.google.common.collect.ImmutableList; +import java.util.List; +import org.junit.jupiter.api.DisplayNameGeneration; +import org.junit.jupiter.api.DisplayNameGenerator; +import org.junit.jupiter.api.Test; +import org.opensearch.sql.ast.dsl.AstDSL; +import org.opensearch.sql.ast.expression.AllFields; +import org.opensearch.sql.ast.expression.Function; +import org.opensearch.sql.ast.expression.QualifiedName; +import org.opensearch.sql.ast.expression.Span; +import org.opensearch.sql.ast.expression.SpanUnit; +import org.opensearch.sql.ast.expression.UnresolvedExpression; +import org.opensearch.sql.ast.tree.UnresolvedPlan; +import org.opensearch.sql.exception.SemanticCheckException; +import org.opensearch.sql.sql.parser.AstBuilderTestBase; + +@DisplayNameGeneration(DisplayNameGenerator.ReplaceUnderscores.class) +class HistogramExpanderTest extends AstBuilderTestBase { + + private final HistogramExpander expander = new HistogramExpander(); + + @Test + void rejects_positional_invocation_with_clear_message() { + SemanticCheckException ex = + assertThrows( + SemanticCheckException.class, + () -> expander.expand(List.of(AstDSL.qualifiedName("price"), AstDSL.intLiteral(100)))); + assertTrue(ex.getMessage().contains("named arguments")); + assertTrue(ex.getMessage().contains("histogram")); + } + + @Test + void property_bag_with_string_field_coerces_to_qualified_name() { + UnresolvedExpression result = + expander.expand( + List.of( + kv("field", AstDSL.stringLiteral("age")), kv("interval", AstDSL.intLiteral(10)))); + + assertEquals( + new Span(AstDSL.qualifiedName("age"), AstDSL.intLiteral(10), SpanUnit.NONE), result); + } + + @Test + void property_bag_with_qualified_name_field_passes_through_unchanged() { + QualifiedName age = AstDSL.qualifiedName("age"); + UnresolvedExpression result = + expander.expand(List.of(kv("field", age), kv("interval", AstDSL.intLiteral(10)))); + + assertEquals(new Span(age, AstDSL.intLiteral(10), SpanUnit.NONE), result); + } + + @Test + void property_bag_rejects_alias() { + assertThrows( + SemanticCheckException.class, + () -> + expander.expand( + List.of( + kv("field", AstDSL.stringLiteral("age")), + kv("interval", AstDSL.intLiteral(10)), + kv("alias", AstDSL.stringLiteral("my_label"))))); + } + + @Test + void property_bag_rejects_nested() { + assertThrows( + SemanticCheckException.class, + () -> + expander.expand( + List.of( + kv("field", AstDSL.stringLiteral("age")), + kv("interval", AstDSL.intLiteral(10)), + kv("nested", AstDSL.stringLiteral("path"))))); + } + + @Test + void property_bag_rejects_reverse_nested() { + assertThrows( + SemanticCheckException.class, + () -> + expander.expand( + List.of( + kv("field", AstDSL.stringLiteral("age")), + kv("interval", AstDSL.intLiteral(10)), + kv("reverse_nested", AstDSL.stringLiteral("path"))))); + } + + @Test + void property_bag_rejects_children() { + assertThrows( + SemanticCheckException.class, + () -> + expander.expand( + List.of( + kv("field", AstDSL.stringLiteral("age")), + kv("interval", AstDSL.intLiteral(10)), + kv("children", AstDSL.stringLiteral("ignored"))))); + } + + @Test + void property_bag_rejects_format() { + assertThrows( + SemanticCheckException.class, + () -> + expander.expand( + List.of( + kv("field", AstDSL.stringLiteral("age")), + kv("interval", AstDSL.intLiteral(10)), + kv("format", AstDSL.stringLiteral("yyyy"))))); + } + + @Test + void property_bag_rejects_time_zone() { + assertThrows( + SemanticCheckException.class, + () -> + expander.expand( + List.of( + kv("field", AstDSL.stringLiteral("age")), + kv("interval", AstDSL.intLiteral(10)), + kv("time_zone", AstDSL.stringLiteral("+05:30"))))); + } + + @Test + void property_bag_rejects_min_doc_count() { + assertThrows( + SemanticCheckException.class, + () -> + expander.expand( + List.of( + kv("field", AstDSL.stringLiteral("age")), + kv("interval", AstDSL.intLiteral(10)), + kv("min_doc_count", AstDSL.intLiteral(5))))); + } + + @Test + void property_bag_rejects_order() { + assertThrows( + SemanticCheckException.class, + () -> + expander.expand( + List.of( + kv("field", AstDSL.stringLiteral("age")), + kv("interval", AstDSL.intLiteral(10)), + kv("order", AstDSL.stringLiteral("count_desc"))))); + } + + @Test + void property_bag_rejects_extended_bounds() { + assertThrows( + SemanticCheckException.class, + () -> + expander.expand( + List.of( + kv("field", AstDSL.stringLiteral("age")), + kv("interval", AstDSL.intLiteral(10)), + kv("extended_bounds", AstDSL.stringLiteral("0:100"))))); + } + + @Test + void property_bag_rejects_unknown_param() { + SemanticCheckException ex = + assertThrows( + SemanticCheckException.class, + () -> + expander.expand( + List.of( + kv("field", AstDSL.stringLiteral("age")), + kv("interval", AstDSL.intLiteral(10)), + kv("missing_param", AstDSL.stringLiteral("foo"))))); + assertTrue(ex.getMessage().contains("missing_param")); + } + + @Test + void property_bag_rejects_duplicate_keys() { + assertThrows( + SemanticCheckException.class, + () -> + expander.expand( + List.of( + kv("field", AstDSL.stringLiteral("age")), + kv("field", AstDSL.stringLiteral("size")), + kv("interval", AstDSL.intLiteral(10))))); + } + + @Test + void property_bag_rejects_missing_field() { + assertThrows( + SemanticCheckException.class, + () -> expander.expand(List.of(kv("interval", AstDSL.intLiteral(10))))); + } + + @Test + void property_bag_rejects_missing_interval() { + assertThrows( + SemanticCheckException.class, + () -> expander.expand(List.of(kv("field", AstDSL.stringLiteral("age"))))); + } + + @Test + void property_bag_offset_shifts_bucket_boundaries() { + UnresolvedExpression result = + expander.expand( + List.of( + kv("field", AstDSL.stringLiteral("age")), + kv("interval", AstDSL.intLiteral(10)), + kv("offset", AstDSL.intLiteral(3)))); + + QualifiedName age = AstDSL.qualifiedName("age"); + Function shiftedField = new Function("-", List.of(age, AstDSL.intLiteral(3))); + Span bucket = new Span(shiftedField, AstDSL.intLiteral(10), SpanUnit.NONE); + Function expected = new Function("+", List.of(bucket, AstDSL.intLiteral(3))); + assertEquals(expected, result); + } + + @Test + void property_bag_missing_wraps_field_with_coalesce() { + UnresolvedExpression result = + expander.expand( + List.of( + kv("field", AstDSL.stringLiteral("age")), + kv("interval", AstDSL.intLiteral(10)), + kv("missing", AstDSL.intLiteral(0)))); + + Function coalesced = + new Function("coalesce", List.of(AstDSL.qualifiedName("age"), AstDSL.intLiteral(0))); + assertEquals(new Span(coalesced, AstDSL.intLiteral(10), SpanUnit.NONE), result); + } + + @Test + void property_bag_offset_and_missing_compose_in_correct_order() { + UnresolvedExpression result = + expander.expand( + List.of( + kv("field", AstDSL.stringLiteral("age")), + kv("interval", AstDSL.intLiteral(10)), + kv("offset", AstDSL.intLiteral(3)), + kv("missing", AstDSL.intLiteral(0)))); + + QualifiedName age = AstDSL.qualifiedName("age"); + Function coalesced = new Function("coalesce", List.of(age, AstDSL.intLiteral(0))); + Function shifted = new Function("-", List.of(coalesced, AstDSL.intLiteral(3))); + Span bucket = new Span(shifted, AstDSL.intLiteral(10), SpanUnit.NONE); + Function expected = new Function("+", List.of(bucket, AstDSL.intLiteral(3))); + assertEquals(expected, result); + } + + @Test + void via_sql_lowers_to_span() { + QualifiedName age = AstDSL.qualifiedName("age"); + Span bucket = AstDSL.span(age, AstDSL.intLiteral(10), SpanUnit.NONE); + + UnresolvedPlan result = + buildAST( + "SELECT histogram('field'='age', 'interval'=10), COUNT(*) FROM accounts " + + "GROUP BY histogram('field'='age', 'interval'=10)"); + + assertEquals( + AstDSL.project( + AstDSL.agg( + AstDSL.relation("accounts"), + ImmutableList.of( + AstDSL.alias("COUNT(*)", AstDSL.aggregate("COUNT", AllFields.of()))), + emptyList(), + ImmutableList.of(AstDSL.alias(bucket.toString(), bucket)), + emptyList()), + AstDSL.alias("histogram('field'='age', 'interval'=10)", bucket), + AstDSL.alias("COUNT(*)", AstDSL.aggregate("COUNT", AllFields.of()))), + result); + } + + @Test + void via_sql_rejects_positional_invocation() { + assertThrows( + SemanticCheckException.class, + () -> buildAST("SELECT histogram(price, 100) FROM orders GROUP BY histogram(price, 100)")); + } + + /** Builds a Function("=", [stringLiteral(key), value]) — same shape ANTLR produces for 'k'=v. */ + private static UnresolvedExpression kv(String key, UnresolvedExpression value) { + return new Function("=", List.of(AstDSL.stringLiteral(key), value)); + } +} diff --git a/sql/src/test/java/org/opensearch/sql/sql/parser/bucket/NamedArgumentsTest.java b/sql/src/test/java/org/opensearch/sql/sql/parser/bucket/NamedArgumentsTest.java new file mode 100644 index 00000000000..31539773c42 --- /dev/null +++ b/sql/src/test/java/org/opensearch/sql/sql/parser/bucket/NamedArgumentsTest.java @@ -0,0 +1,221 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.sql.parser.bucket; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.List; +import java.util.Set; +import org.junit.jupiter.api.DisplayNameGeneration; +import org.junit.jupiter.api.DisplayNameGenerator; +import org.junit.jupiter.api.Test; +import org.opensearch.sql.ast.dsl.AstDSL; +import org.opensearch.sql.ast.expression.Function; +import org.opensearch.sql.ast.expression.Literal; +import org.opensearch.sql.ast.expression.UnresolvedExpression; +import org.opensearch.sql.exception.SemanticCheckException; + +@DisplayNameGeneration(DisplayNameGenerator.ReplaceUnderscores.class) +class NamedArgumentsTest { + + @Test + void empty_arg_list_is_not_named_arguments() { + assertFalse(NamedArguments.isNamedArguments(List.of())); + } + + @Test + void single_kv_pair_is_named_arguments() { + assertTrue(NamedArguments.isNamedArguments(List.of(kv("k", AstDSL.intLiteral(1))))); + } + + @Test + void plain_function_call_is_not_named_arguments() { + UnresolvedExpression nonKv = AstDSL.qualifiedName("col"); + assertFalse(NamedArguments.isNamedArguments(List.of(nonKv))); + } + + @Test + void mixed_args_are_not_named_arguments() { + assertFalse( + NamedArguments.isNamedArguments( + List.of(kv("k", AstDSL.intLiteral(1)), AstDSL.qualifiedName("col")))); + } + + @Test + void non_equals_function_is_not_named_arguments() { + UnresolvedExpression notEq = + new Function("+", List.of(AstDSL.stringLiteral("a"), AstDSL.intLiteral(1))); + assertFalse(NamedArguments.isNamedArguments(List.of(notEq))); + } + + @Test + void equals_with_non_string_left_is_not_named_arguments() { + UnresolvedExpression intEqInt = + new Function("=", List.of(AstDSL.intLiteral(1), AstDSL.intLiteral(2))); + assertFalse(NamedArguments.isNamedArguments(List.of(intEqInt))); + } + + @Test + void parse_keeps_keys_in_source_order_and_lower_cases_them() { + NamedArguments bag = + NamedArguments.parse( + List.of( + kv("Field", AstDSL.stringLiteral("ts")), + kv("INTERVAL", AstDSL.stringLiteral("1d")))); + + assertEquals(AstDSL.stringLiteral("ts"), bag.remove("field")); + assertEquals(AstDSL.stringLiteral("1d"), bag.remove("interval")); + assertEquals(0, bag.size()); + } + + @Test + void parse_rejects_duplicate_keys() { + SemanticCheckException ex = + assertThrows( + SemanticCheckException.class, + () -> + NamedArguments.parse( + List.of( + kv("field", AstDSL.stringLiteral("a")), + kv("field", AstDSL.stringLiteral("b"))))); + assertTrue(ex.getMessage().contains("field")); + } + + @Test + void parse_rejects_non_key_value_arg_with_clear_message() { + UnresolvedExpression bareColumn = AstDSL.qualifiedName("age"); + SemanticCheckException ex = + assertThrows( + SemanticCheckException.class, + () -> + NamedArguments.parse(List.of(kv("field", AstDSL.stringLiteral("a")), bareColumn))); + assertTrue(ex.getMessage().contains("'key'=value")); + } + + @Test + void remove_returns_value_when_present_and_null_when_absent() { + NamedArguments bag = NamedArguments.parse(List.of(kv("field", AstDSL.stringLiteral("ts")))); + assertEquals(AstDSL.stringLiteral("ts"), bag.remove("field")); + assertNull(bag.remove("field")); + assertNull(bag.remove("never_inserted")); + assertEquals(0, bag.size()); + } + + @Test + void require_returns_value_and_removes_it() { + NamedArguments bag = NamedArguments.parse(List.of(kv("field", AstDSL.stringLiteral("ts")))); + assertEquals(AstDSL.stringLiteral("ts"), bag.require("field", "histogram")); + assertEquals(0, bag.size()); + } + + @Test + void require_throws_when_missing_with_function_name_in_message() { + NamedArguments bag = NamedArguments.parse(List.of(kv("other", AstDSL.intLiteral(1)))); + SemanticCheckException ex = + assertThrows(SemanticCheckException.class, () -> bag.require("field", "HISTOGRAM")); + assertTrue(ex.getMessage().contains("histogram")); + assertTrue(ex.getMessage().contains("field")); + } + + @Test + void requireString_returns_string_literal() { + NamedArguments bag = NamedArguments.parse(List.of(kv("interval", AstDSL.stringLiteral("1d")))); + Literal interval = bag.requireString("interval", "date_histogram"); + assertEquals(AstDSL.stringLiteral("1d"), interval); + } + + @Test + void requireString_rejects_non_string_value() { + NamedArguments bag = NamedArguments.parse(List.of(kv("interval", AstDSL.intLiteral(100)))); + assertThrows( + SemanticCheckException.class, () -> bag.requireString("interval", "date_histogram")); + } + + @Test + void requireStringIfPresent_returns_null_when_absent() { + NamedArguments bag = NamedArguments.parse(List.of(kv("field", AstDSL.stringLiteral("ts")))); + assertNull(bag.requireStringIfPresent("format")); + } + + @Test + void requireStringIfPresent_returns_value_when_present() { + NamedArguments bag = NamedArguments.parse(List.of(kv("format", AstDSL.stringLiteral("yyyy")))); + assertEquals(AstDSL.stringLiteral("yyyy"), bag.requireStringIfPresent("format")); + } + + @Test + void requireStringIfPresent_rejects_non_string_value_when_present() { + NamedArguments bag = NamedArguments.parse(List.of(kv("format", AstDSL.intLiteral(2024)))); + assertThrows(SemanticCheckException.class, () -> bag.requireStringIfPresent("format")); + } + + @Test + void rejectIfPresent_throws_when_key_present() { + NamedArguments bag = NamedArguments.parse(List.of(kv("script", AstDSL.stringLiteral("x")))); + SemanticCheckException ex = + assertThrows(SemanticCheckException.class, () -> bag.rejectIfPresent("script", "no!")); + assertTrue(ex.getMessage().contains("no!")); + } + + @Test + void rejectIfPresent_no_op_when_key_absent() { + NamedArguments bag = NamedArguments.parse(List.of(kv("field", AstDSL.stringLiteral("ts")))); + bag.rejectIfPresent("script", "no!"); + assertEquals(1, bag.size()); + } + + @Test + void consumeSilently_drops_listed_keys() { + NamedArguments bag = + NamedArguments.parse( + List.of( + kv("alias", AstDSL.stringLiteral("x")), + kv("nested", AstDSL.stringLiteral("p")), + kv("interval", AstDSL.intLiteral(10)))); + bag.consumeSilently(Set.of("alias", "nested")); + assertEquals(1, bag.size()); + } + + @Test + void rejectRemaining_single_key_uses_parameter_label() { + NamedArguments bag = NamedArguments.parse(List.of(kv("mystery", AstDSL.intLiteral(5)))); + SemanticCheckException ex = + assertThrows(SemanticCheckException.class, () -> bag.rejectRemaining("HISTOGRAM")); + assertTrue(ex.getMessage().contains("histogram")); + assertTrue(ex.getMessage().contains("does not accept parameter:")); + assertTrue(ex.getMessage().contains("mystery")); + } + + @Test + void rejectRemaining_multiple_keys_listed_in_source_order_with_plural_label() { + NamedArguments bag = + NamedArguments.parse( + List.of( + kv("foo", AstDSL.intLiteral(1)), + kv("bar", AstDSL.intLiteral(2)), + kv("baz", AstDSL.intLiteral(3)))); + SemanticCheckException ex = + assertThrows(SemanticCheckException.class, () -> bag.rejectRemaining("HISTOGRAM")); + assertTrue(ex.getMessage().contains("does not accept parameters:")); + assertTrue(ex.getMessage().contains("foo, bar, baz")); + } + + @Test + void rejectRemaining_no_op_when_bag_empty() { + NamedArguments bag = NamedArguments.parse(List.of(kv("alias", AstDSL.stringLiteral("x")))); + bag.consumeSilently(Set.of("alias")); + bag.rejectRemaining("histogram"); // does not throw + } + + /** Builds a Function("=", [stringLiteral(key), value]) — same shape ANTLR produces for 'k'=v. */ + private static UnresolvedExpression kv(String key, UnresolvedExpression value) { + return new Function("=", List.of(AstDSL.stringLiteral(key), value)); + } +}