Intuitive DSL for Java

Version 2.0.0 · src/main/java/ch/dbalabs/intuitivedsl/parser/AstNavigator.java

Git clone
git clone https://www.dbalabs.ch/git/intuitive-dsl-java.git

AstNavigator.java

/*
 * This file is part of the Intuitive DSL project.
 * Copyright (c) 2026 DBA Labs - Switzerland. All rights reserved.
 *
 * This program is dual-licensed under a commercial license and the AGPLv3.
 * For commercial licensing, contact us at [email protected] or visit https://www.dbalabs.ch.
 *
 * AGPLv3 licensing:
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, version 3 (19 November 2007).
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <https://www.gnu.org/licenses/agpl-3.0.html>.
 */

package ch.dbalabs.intuitivedsl.parser;

import ch.dbalabs.intuitivedsl.exception.DslSyntaxException;
import ch.dbalabs.intuitivedsl.grammar.*;

import java.util.ArrayList;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import java.util.function.Function;

/**
 * Core LL(k) recursive descent parser.
 *
 * The parser supports backtracking across sibling nodes inside an alternative.
 * This is especially important when a repeatable node is followed by another node
 * that can start with an explicit keyword. In such cases, the parser must not let
 * a greedy repeatable parameter steal tokens that belong to the following sibling.
 *
 * @author DBA Labs
 */
public class AstNavigator {

    public ParseResult parse(String rawInput, List<Token> tokens, SyntaxNode rootAst, Function<String, List<String>> macroResolver) {
        ParsingContext ctx = new ParsingContext(tokens, macroResolver);

        if (matchNodeRepeatable(rootAst, ctx)) {
            if (ctx.cursor < tokens.size() && tokens.get(ctx.cursor).type() != TokenType.EOF) {
                ctx.registerFailure("EOF (End Of Command)");
                Token unexpected = tokens.get(ctx.maxCursor);
                throw new DslSyntaxException(rawInput, unexpected, new ArrayList<>(ctx.expectedAtMaxCursor));
            }
            return ctx.result;
        }

        Token unexpected = ctx.maxCursor < tokens.size() ? tokens.get(ctx.maxCursor) : null;
        throw new DslSyntaxException(rawInput, unexpected, new ArrayList<>(ctx.expectedAtMaxCursor));
    }

    private boolean matchNodeRepeatable(SyntaxNode node, ParsingContext ctx) {
        int startCursor = ctx.cursor;

        if (!matchSingleNode(node, ctx)) {
            return false;
        }

        if (node.isRepeatable()) {
            if (ctx.cursor == startCursor) {
                return true; // Infinite loop protection for zero-width optional groups
            }

            while (true) {
                int savedCursor = ctx.cursor;
                ParseResult savedResult = ctx.result.copy();
                String savedKeyword = ctx.activeKeyword;

                if (!matchSingleNode(node, ctx)) {
                    ctx.cursor = savedCursor;
                    ctx.result = savedResult;
                    ctx.activeKeyword = savedKeyword;
                    break;
                }

                if (ctx.cursor == savedCursor) {
                    ctx.cursor = savedCursor;
                    ctx.result = savedResult;
                    ctx.activeKeyword = savedKeyword;
                    break;
                }
            }
        }
        return true;
    }

    private boolean matchSingleNode(SyntaxNode node, ParsingContext ctx) {

        if (node instanceof KeywordNode kw) {
            String[] parts = kw.word().split(" ");
            boolean match = true;
            int mismatchIndex = 0;

            for (int i = 0; i < parts.length; i++) {
                if (ctx.cursor + i >= ctx.tokens.size() ||
                        !ctx.tokens.get(ctx.cursor + i).value().equalsIgnoreCase(parts[i])) {
                    match = false;
                    mismatchIndex = i;
                    break;
                }
            }

            if (match) {
                ctx.activeKeyword = kw.word();
                ctx.result.addKeyword(kw.word());
                ctx.cursor += parts.length;
                return true;
            }

            int originalCursor = ctx.cursor;
            ctx.cursor = originalCursor + mismatchIndex;
            ctx.registerFailure(parts[mismatchIndex].toUpperCase());
            ctx.cursor = originalCursor;
            return false;
        }

        if (node instanceof ParameterNode param) {
            if (ctx.cursor < ctx.tokens.size()) {
                Token t = ctx.tokens.get(ctx.cursor);
                if (t.type() == TokenType.WORD || t.type() == TokenType.STRING) {
                    ctx.result.addParameter(param.name(), ctx.activeKeyword, t.value());
                    ctx.cursor++;
                    return true;
                }
            }
            ctx.registerFailure("<" + param.name() + ">");
            return false;
        }

        if (node instanceof DelimiterNode del) {
            if (ctx.cursor < ctx.tokens.size() && ctx.tokens.get(ctx.cursor).value().equals(del.value())) {
                ctx.cursor++;
                return true;
            }
            ctx.registerFailure("'" + del.value() + "'");
            return false;
        }

        if (node instanceof MacroNode mac) {
            return matchMacroNode(mac, ctx);
        }

        if (node instanceof AlternativeNode alt) {
            return matchAlternativeSequence(alt.children(), 0, ctx);
        }

        if (node instanceof GroupNode grp) {
            for (SyntaxNode altBranch : grp.children()) {
                int savedCursor = ctx.cursor;
                ParseResult savedResult = ctx.result.copy();
                String savedKeyword = ctx.activeKeyword;

                if (matchNodeRepeatable(altBranch, ctx)) {
                    return true;
                }

                ctx.cursor = savedCursor;
                ctx.result = savedResult;
                ctx.activeKeyword = savedKeyword;
            }
            return grp.type() == GroupType.OPTIONAL;
        }

        return false;
    }

    /**
     * Matches the children of an AlternativeNode with sibling-aware backtracking.
     *
     * Strategy:
     * - non-repeatable nodes keep the straightforward recursive behavior;
     * - repeatable nodes try the smallest number of occurrences that still allows the
     *   remaining siblings to match.
     *
     * This prevents a repeatable generic parameter from swallowing tokens that should
     * belong to a following explicit clause such as "IN scope".
     */
    private boolean matchAlternativeSequence(List<SyntaxNode> children, int index, ParsingContext ctx) {
        if (index >= children.size()) {
            return true;
        }

        SyntaxNode node = children.get(index);
        int originalCursor = ctx.cursor;
        ParseResult originalResult = ctx.result.copy();
        String originalKeyword = ctx.activeKeyword;

        if (node instanceof GroupNode grp) {
            // Optional groups may always be skipped entirely.
            if (grp.type() == GroupType.OPTIONAL) {
                ctx.cursor = originalCursor;
                ctx.result = originalResult.copy();
                ctx.activeKeyword = originalKeyword;
                if (matchAlternativeSequence(children, index + 1, ctx)) {
                    return true;
                }
            }

            // Non-repeatable group: each branch must be matched together with the outer continuation.
            // This is critical when a branch ends with an optional/repeatable construct whose valid
            // occurrence count depends on the following siblings outside the group.
            if (!grp.isRepeatable()) {
                List<SyntaxNode> continuation = children.subList(index + 1, children.size());
                for (SyntaxNode altBranch : grp.children()) {
                    ctx.cursor = originalCursor;
                    ctx.result = originalResult.copy();
                    ctx.activeKeyword = originalKeyword;

                    List<SyntaxNode> spliced = spliceBranchWithContinuation(altBranch, continuation);
                    if (matchAlternativeSequence(spliced, 0, ctx)) {
                        return true;
                    }
                }

                ctx.cursor = originalCursor;
                ctx.result = originalResult;
                ctx.activeKeyword = originalKeyword;
                return false;
            }

            // Repeatable group: consume the minimum number of occurrences that lets the rest match.
            int currentCursor = originalCursor;
            ParseResult currentResult = originalResult.copy();
            String currentKeyword = originalKeyword;

            while (true) {
                ctx.cursor = currentCursor;
                ctx.result = currentResult.copy();
                ctx.activeKeyword = currentKeyword;

                if (!matchOneConsumingGroupOccurrence(grp, ctx)) {
                    break;
                }

                int afterOccurrenceCursor = ctx.cursor;
                ParseResult afterOccurrenceResult = ctx.result.copy();
                String afterOccurrenceKeyword = ctx.activeKeyword;

                if (matchAlternativeSequence(children, index + 1, ctx)) {
                    return true;
                }

                currentCursor = afterOccurrenceCursor;
                currentResult = afterOccurrenceResult;
                currentKeyword = afterOccurrenceKeyword;
            }

            ctx.cursor = originalCursor;
            ctx.result = originalResult;
            ctx.activeKeyword = originalKeyword;
            return false;
        }

        if (!node.isRepeatable()) {
            if (!matchSingleNode(node, ctx)) {
                ctx.cursor = originalCursor;
                ctx.result = originalResult;
                ctx.activeKeyword = originalKeyword;
                return false;
            }

            if (matchAlternativeSequence(children, index + 1, ctx)) {
                return true;
            }

            ctx.cursor = originalCursor;
            ctx.result = originalResult;
            ctx.activeKeyword = originalKeyword;
            return false;
        }

        int currentCursor = originalCursor;
        ParseResult currentResult = originalResult.copy();
        String currentKeyword = originalKeyword;

        while (true) {
            ctx.cursor = currentCursor;
            ctx.result = currentResult.copy();
            ctx.activeKeyword = currentKeyword;

            if (!matchSingleNode(node, ctx)) {
                break;
            }

            if (ctx.cursor == currentCursor) {
                // Zero-width success must never be repeated indefinitely.
                break;
            }

            int afterOccurrenceCursor = ctx.cursor;
            ParseResult afterOccurrenceResult = ctx.result.copy();
            String afterOccurrenceKeyword = ctx.activeKeyword;

            if (matchAlternativeSequence(children, index + 1, ctx)) {
                return true;
            }

            currentCursor = afterOccurrenceCursor;
            currentResult = afterOccurrenceResult;
            currentKeyword = afterOccurrenceKeyword;
        }

        ctx.cursor = originalCursor;
        ctx.result = originalResult;
        ctx.activeKeyword = originalKeyword;
        return false;
    }

    /**
     * Builds a synthetic sequence where a selected group branch is immediately followed by the
     * outer siblings that come after the group.
     */
    private List<SyntaxNode> spliceBranchWithContinuation(SyntaxNode altBranch, List<SyntaxNode> continuation) {
        List<SyntaxNode> combined = new ArrayList<>();
        if (altBranch instanceof AlternativeNode alt) {
            combined.addAll(alt.children());
        } else {
            combined.add(altBranch);
        }
        combined.addAll(continuation);
        return combined;
    }

    /**
     * Tries to match exactly one consuming occurrence of a group.
     *
     * This helper is required because optional groups may legally succeed without consuming
     * any token, but a repeatable occurrence loop must only advance when real input was consumed.
     */
    private boolean matchOneConsumingGroupOccurrence(GroupNode grp, ParsingContext ctx) {
        int startCursor = ctx.cursor;
        ParseResult startResult = ctx.result.copy();
        String startKeyword = ctx.activeKeyword;

        for (SyntaxNode altBranch : grp.children()) {
            ctx.cursor = startCursor;
            ctx.result = startResult.copy();
            ctx.activeKeyword = startKeyword;

            if (matchNodeRepeatable(altBranch, ctx) && ctx.cursor > startCursor) {
                return true;
            }
        }

        ctx.cursor = startCursor;
        ctx.result = startResult;
        ctx.activeKeyword = startKeyword;
        return false;
    }

    private boolean matchMacroNode(MacroNode mac, ParsingContext ctx) {
        List<String> rawChoices = ctx.macroResolver.apply(mac.macroName());
        if (rawChoices == null) rawChoices = List.of();

        // Filter blank choices, trim them, and keep a deterministic order.
        List<String> validChoices = rawChoices.stream()
                .filter(c -> c != null && !c.trim().isEmpty())
                .map(String::trim)
                .distinct()
                .toList();

        if (validChoices.isEmpty()) {
            ctx.registerFailure("<" + mac.macroName() + ">");
            return false;
        }

        Lexer tempLexer = new Lexer();
        record MacroChoice(String original, List<Token> tokens, String normalizedBindingValue) {}

        List<MacroChoice> macroChoices = validChoices.stream()
                .map(choice -> {
                    List<Token> choiceTokens = tempLexer.tokenize(choice).stream()
                            .filter(t -> t.type() != TokenType.EOF)
                            .toList();

                    // Bind the semantic value of the macro choice, not its raw source fragment.
                    // This keeps macro binding aligned with normal quoted-string binding.
                    String normalizedBindingValue = choiceTokens.stream()
                            .map(Token::value)
                            .reduce((left, right) -> left + " " + right)
                            .orElse("");

                    return new MacroChoice(choice, choiceTokens, normalizedBindingValue);
                })
                .sorted((a, b) -> Integer.compare(b.tokens.size(), a.tokens.size()))
                .toList();

        for (MacroChoice choice : macroChoices) {
            boolean match = true;
            for (int i = 0; i < choice.tokens.size(); i++) {
                if (ctx.cursor + i >= ctx.tokens.size() ||
                        !ctx.tokens.get(ctx.cursor + i).value().equalsIgnoreCase(choice.tokens.get(i).value())) {
                    match = false;
                    break;
                }
            }

            if (match) {
                ctx.cursor += choice.tokens.size();
                ctx.result.addParameter(mac.macroName(), ctx.activeKeyword, choice.normalizedBindingValue);
                return true;
            }
        }

        ctx.registerFailure("<" + mac.macroName() + ">");
        return false;
    }

    private static class ParsingContext {
        final List<Token> tokens;
        final Function<String, List<String>> macroResolver;

        int cursor = 0;
        int maxCursor = -1;
        final Set<String> expectedAtMaxCursor = new LinkedHashSet<>();

        ParseResult result = new ParseResult();
        String activeKeyword = "";

        ParsingContext(List<Token> tokens, Function<String, List<String>> macroResolver) {
            this.tokens = tokens;
            this.macroResolver = macroResolver;
        }

        void registerFailure(String expected) {
            if (cursor > maxCursor) {
                maxCursor = cursor;
                expectedAtMaxCursor.clear();
                expectedAtMaxCursor.add(expected);
            } else if (cursor == maxCursor) {
                expectedAtMaxCursor.add(expected);
            }
        }
    }
}