/*
* This file is part of the Intuitive DSL project.
* Copyright (c) 2026 DBA Labs - Switzerland. All rights reserved.
*
* This program is dual-licensed under a commercial license and the AGPLv3.
* For commercial licensing, contact us at [email protected] or visit https://www.dbalabs.ch.
*
* AGPLv3 licensing:
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, version 3 (19 November 2007).
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/agpl-3.0.html>.
*/
package ch.dbalabs.intuitivedsl.parser;
import ch.dbalabs.intuitivedsl.exception.DslSyntaxException;
import ch.dbalabs.intuitivedsl.grammar.*;
import java.util.ArrayList;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import java.util.function.Function;
/**
* Core LL(k) recursive descent parser.
*
* The parser supports backtracking across sibling nodes inside an alternative.
* This is especially important when a repeatable node is followed by another node
* that can start with an explicit keyword. In such cases, the parser must not let
* a greedy repeatable parameter steal tokens that belong to the following sibling.
*
* @author DBA Labs
*/
public class AstNavigator {
public ParseResult parse(String rawInput, List<Token> tokens, SyntaxNode rootAst, Function<String, List<String>> macroResolver) {
ParsingContext ctx = new ParsingContext(tokens, macroResolver);
if (matchNodeRepeatable(rootAst, ctx)) {
if (ctx.cursor < tokens.size() && tokens.get(ctx.cursor).type() != TokenType.EOF) {
ctx.registerFailure("EOF (End Of Command)");
Token unexpected = tokens.get(ctx.maxCursor);
throw new DslSyntaxException(rawInput, unexpected, new ArrayList<>(ctx.expectedAtMaxCursor));
}
return ctx.result;
}
Token unexpected = ctx.maxCursor < tokens.size() ? tokens.get(ctx.maxCursor) : null;
throw new DslSyntaxException(rawInput, unexpected, new ArrayList<>(ctx.expectedAtMaxCursor));
}
private boolean matchNodeRepeatable(SyntaxNode node, ParsingContext ctx) {
int startCursor = ctx.cursor;
if (!matchSingleNode(node, ctx)) {
return false;
}
if (node.isRepeatable()) {
if (ctx.cursor == startCursor) {
return true; // Infinite loop protection for zero-width optional groups
}
while (true) {
int savedCursor = ctx.cursor;
ParseResult savedResult = ctx.result.copy();
String savedKeyword = ctx.activeKeyword;
if (!matchSingleNode(node, ctx)) {
ctx.cursor = savedCursor;
ctx.result = savedResult;
ctx.activeKeyword = savedKeyword;
break;
}
if (ctx.cursor == savedCursor) {
ctx.cursor = savedCursor;
ctx.result = savedResult;
ctx.activeKeyword = savedKeyword;
break;
}
}
}
return true;
}
private boolean matchSingleNode(SyntaxNode node, ParsingContext ctx) {
if (node instanceof KeywordNode kw) {
String[] parts = kw.word().split(" ");
boolean match = true;
int mismatchIndex = 0;
for (int i = 0; i < parts.length; i++) {
if (ctx.cursor + i >= ctx.tokens.size() ||
!ctx.tokens.get(ctx.cursor + i).value().equalsIgnoreCase(parts[i])) {
match = false;
mismatchIndex = i;
break;
}
}
if (match) {
ctx.activeKeyword = kw.word();
ctx.result.addKeyword(kw.word());
ctx.cursor += parts.length;
return true;
}
int originalCursor = ctx.cursor;
ctx.cursor = originalCursor + mismatchIndex;
ctx.registerFailure(parts[mismatchIndex].toUpperCase());
ctx.cursor = originalCursor;
return false;
}
if (node instanceof ParameterNode param) {
if (ctx.cursor < ctx.tokens.size()) {
Token t = ctx.tokens.get(ctx.cursor);
if (t.type() == TokenType.WORD || t.type() == TokenType.STRING) {
ctx.result.addParameter(param.name(), ctx.activeKeyword, t.value());
ctx.cursor++;
return true;
}
}
ctx.registerFailure("<" + param.name() + ">");
return false;
}
if (node instanceof DelimiterNode del) {
if (ctx.cursor < ctx.tokens.size() && ctx.tokens.get(ctx.cursor).value().equals(del.value())) {
ctx.cursor++;
return true;
}
ctx.registerFailure("'" + del.value() + "'");
return false;
}
if (node instanceof MacroNode mac) {
return matchMacroNode(mac, ctx);
}
if (node instanceof AlternativeNode alt) {
return matchAlternativeSequence(alt.children(), 0, ctx);
}
if (node instanceof GroupNode grp) {
for (SyntaxNode altBranch : grp.children()) {
int savedCursor = ctx.cursor;
ParseResult savedResult = ctx.result.copy();
String savedKeyword = ctx.activeKeyword;
if (matchNodeRepeatable(altBranch, ctx)) {
return true;
}
ctx.cursor = savedCursor;
ctx.result = savedResult;
ctx.activeKeyword = savedKeyword;
}
return grp.type() == GroupType.OPTIONAL;
}
return false;
}
/**
* Matches the children of an AlternativeNode with sibling-aware backtracking.
*
* Strategy:
* - non-repeatable nodes keep the straightforward recursive behavior;
* - repeatable nodes try the smallest number of occurrences that still allows the
* remaining siblings to match.
*
* This prevents a repeatable generic parameter from swallowing tokens that should
* belong to a following explicit clause such as "IN scope".
*/
private boolean matchAlternativeSequence(List<SyntaxNode> children, int index, ParsingContext ctx) {
if (index >= children.size()) {
return true;
}
SyntaxNode node = children.get(index);
int originalCursor = ctx.cursor;
ParseResult originalResult = ctx.result.copy();
String originalKeyword = ctx.activeKeyword;
if (node instanceof GroupNode grp) {
// Optional groups may always be skipped entirely.
if (grp.type() == GroupType.OPTIONAL) {
ctx.cursor = originalCursor;
ctx.result = originalResult.copy();
ctx.activeKeyword = originalKeyword;
if (matchAlternativeSequence(children, index + 1, ctx)) {
return true;
}
}
// Non-repeatable group: each branch must be matched together with the outer continuation.
// This is critical when a branch ends with an optional/repeatable construct whose valid
// occurrence count depends on the following siblings outside the group.
if (!grp.isRepeatable()) {
List<SyntaxNode> continuation = children.subList(index + 1, children.size());
for (SyntaxNode altBranch : grp.children()) {
ctx.cursor = originalCursor;
ctx.result = originalResult.copy();
ctx.activeKeyword = originalKeyword;
List<SyntaxNode> spliced = spliceBranchWithContinuation(altBranch, continuation);
if (matchAlternativeSequence(spliced, 0, ctx)) {
return true;
}
}
ctx.cursor = originalCursor;
ctx.result = originalResult;
ctx.activeKeyword = originalKeyword;
return false;
}
// Repeatable group: consume the minimum number of occurrences that lets the rest match.
int currentCursor = originalCursor;
ParseResult currentResult = originalResult.copy();
String currentKeyword = originalKeyword;
while (true) {
ctx.cursor = currentCursor;
ctx.result = currentResult.copy();
ctx.activeKeyword = currentKeyword;
if (!matchOneConsumingGroupOccurrence(grp, ctx)) {
break;
}
int afterOccurrenceCursor = ctx.cursor;
ParseResult afterOccurrenceResult = ctx.result.copy();
String afterOccurrenceKeyword = ctx.activeKeyword;
if (matchAlternativeSequence(children, index + 1, ctx)) {
return true;
}
currentCursor = afterOccurrenceCursor;
currentResult = afterOccurrenceResult;
currentKeyword = afterOccurrenceKeyword;
}
ctx.cursor = originalCursor;
ctx.result = originalResult;
ctx.activeKeyword = originalKeyword;
return false;
}
if (!node.isRepeatable()) {
if (!matchSingleNode(node, ctx)) {
ctx.cursor = originalCursor;
ctx.result = originalResult;
ctx.activeKeyword = originalKeyword;
return false;
}
if (matchAlternativeSequence(children, index + 1, ctx)) {
return true;
}
ctx.cursor = originalCursor;
ctx.result = originalResult;
ctx.activeKeyword = originalKeyword;
return false;
}
int currentCursor = originalCursor;
ParseResult currentResult = originalResult.copy();
String currentKeyword = originalKeyword;
while (true) {
ctx.cursor = currentCursor;
ctx.result = currentResult.copy();
ctx.activeKeyword = currentKeyword;
if (!matchSingleNode(node, ctx)) {
break;
}
if (ctx.cursor == currentCursor) {
// Zero-width success must never be repeated indefinitely.
break;
}
int afterOccurrenceCursor = ctx.cursor;
ParseResult afterOccurrenceResult = ctx.result.copy();
String afterOccurrenceKeyword = ctx.activeKeyword;
if (matchAlternativeSequence(children, index + 1, ctx)) {
return true;
}
currentCursor = afterOccurrenceCursor;
currentResult = afterOccurrenceResult;
currentKeyword = afterOccurrenceKeyword;
}
ctx.cursor = originalCursor;
ctx.result = originalResult;
ctx.activeKeyword = originalKeyword;
return false;
}
/**
* Builds a synthetic sequence where a selected group branch is immediately followed by the
* outer siblings that come after the group.
*/
private List<SyntaxNode> spliceBranchWithContinuation(SyntaxNode altBranch, List<SyntaxNode> continuation) {
List<SyntaxNode> combined = new ArrayList<>();
if (altBranch instanceof AlternativeNode alt) {
combined.addAll(alt.children());
} else {
combined.add(altBranch);
}
combined.addAll(continuation);
return combined;
}
/**
* Tries to match exactly one consuming occurrence of a group.
*
* This helper is required because optional groups may legally succeed without consuming
* any token, but a repeatable occurrence loop must only advance when real input was consumed.
*/
private boolean matchOneConsumingGroupOccurrence(GroupNode grp, ParsingContext ctx) {
int startCursor = ctx.cursor;
ParseResult startResult = ctx.result.copy();
String startKeyword = ctx.activeKeyword;
for (SyntaxNode altBranch : grp.children()) {
ctx.cursor = startCursor;
ctx.result = startResult.copy();
ctx.activeKeyword = startKeyword;
if (matchNodeRepeatable(altBranch, ctx) && ctx.cursor > startCursor) {
return true;
}
}
ctx.cursor = startCursor;
ctx.result = startResult;
ctx.activeKeyword = startKeyword;
return false;
}
private boolean matchMacroNode(MacroNode mac, ParsingContext ctx) {
List<String> rawChoices = ctx.macroResolver.apply(mac.macroName());
if (rawChoices == null) rawChoices = List.of();
// Filter blank choices, trim them, and keep a deterministic order.
List<String> validChoices = rawChoices.stream()
.filter(c -> c != null && !c.trim().isEmpty())
.map(String::trim)
.distinct()
.toList();
if (validChoices.isEmpty()) {
ctx.registerFailure("<" + mac.macroName() + ">");
return false;
}
Lexer tempLexer = new Lexer();
record MacroChoice(String original, List<Token> tokens, String normalizedBindingValue) {}
List<MacroChoice> macroChoices = validChoices.stream()
.map(choice -> {
List<Token> choiceTokens = tempLexer.tokenize(choice).stream()
.filter(t -> t.type() != TokenType.EOF)
.toList();
// Bind the semantic value of the macro choice, not its raw source fragment.
// This keeps macro binding aligned with normal quoted-string binding.
String normalizedBindingValue = choiceTokens.stream()
.map(Token::value)
.reduce((left, right) -> left + " " + right)
.orElse("");
return new MacroChoice(choice, choiceTokens, normalizedBindingValue);
})
.sorted((a, b) -> Integer.compare(b.tokens.size(), a.tokens.size()))
.toList();
for (MacroChoice choice : macroChoices) {
boolean match = true;
for (int i = 0; i < choice.tokens.size(); i++) {
if (ctx.cursor + i >= ctx.tokens.size() ||
!ctx.tokens.get(ctx.cursor + i).value().equalsIgnoreCase(choice.tokens.get(i).value())) {
match = false;
break;
}
}
if (match) {
ctx.cursor += choice.tokens.size();
ctx.result.addParameter(mac.macroName(), ctx.activeKeyword, choice.normalizedBindingValue);
return true;
}
}
ctx.registerFailure("<" + mac.macroName() + ">");
return false;
}
private static class ParsingContext {
final List<Token> tokens;
final Function<String, List<String>> macroResolver;
int cursor = 0;
int maxCursor = -1;
final Set<String> expectedAtMaxCursor = new LinkedHashSet<>();
ParseResult result = new ParseResult();
String activeKeyword = "";
ParsingContext(List<Token> tokens, Function<String, List<String>> macroResolver) {
this.tokens = tokens;
this.macroResolver = macroResolver;
}
void registerFailure(String expected) {
if (cursor > maxCursor) {
maxCursor = cursor;
expectedAtMaxCursor.clear();
expectedAtMaxCursor.add(expected);
} else if (cursor == maxCursor) {
expectedAtMaxCursor.add(expected);
}
}
}
}