Intuitive DSL for Java

Version 2.0.0 · src/main/java/ch/dbalabs/intuitivedsl/parser/Lexer.java

Git clone
git clone https://www.dbalabs.ch/git/intuitive-dsl-java.git

Lexer.java

/*
 * This file is part of the Intuitive DSL project.
 * Copyright (c) 2026 DBA Labs - Switzerland. All rights reserved.
 *
 * This program is dual-licensed under a commercial license and the AGPLv3.
 * For commercial licensing, contact us at [email protected] or visit https://www.dbalabs.ch.
 *
 * AGPLv3 licensing:
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, version 3 (19 November 2007).
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <https://www.gnu.org/licenses/agpl-3.0.html>.
 */

package ch.dbalabs.intuitivedsl.parser;

import ch.dbalabs.intuitivedsl.exception.DslSyntaxException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

/**
 * Character-by-character scanner for input strings.
 * Handles quoted strings, escaped quotes (''), and precise positional tracking.
 *
 * @author DBA Labs
 */
public class Lexer {

    private static final String DELIMITERS = "().,;:?!-+/ *@[]{}|";

    public List<Token> tokenize(String input) {
        List<Token> tokens = new ArrayList<>();
        if (input == null) return tokens;

        int pos = 0;
        int length = input.length();

        while (pos < length) {
            char c = input.charAt(pos);

            // 1. Whitespace handling
            if (Character.isWhitespace(c)) {
                pos++;
                continue;
            }

            // 2. Quoted Strings ('...') with escaped quote support ('')
            if (c == '\'') {
                int start = pos;
                pos++; // skip opening quote
                StringBuilder sb = new StringBuilder();
                boolean closed = false;

                while (pos < length) {
                    char current = input.charAt(pos);
                    if (current == '\'') {
                        if (pos + 1 < length && input.charAt(pos + 1) == '\'') {
                            sb.append('\'');
                            pos += 2;
                        } else {
                            pos++;
                            closed = true;
                            break;
                        }
                    } else {
                        sb.append(current);
                        pos++;
                    }
                }

                if (!closed) {
                    throw new DslSyntaxException("Unclosed string literal",
                            null, List.of("closing quote (')"));
                }
                tokens.add(new Token(sb.toString(), TokenType.STRING, start));
                continue;
            }

            // 3. Repeatable Operator (...)
            if (c == '.' && pos + 2 < length && input.charAt(pos + 1) == '.' && input.charAt(pos + 2) == '.') {
                tokens.add(new Token("...", TokenType.DELIMITER, pos));
                pos += 3;
                continue;
            }

            // 4. Structural Delimiters
            if (DELIMITERS.indexOf(c) != -1) {
                tokens.add(new Token(String.valueOf(c), TokenType.DELIMITER, pos));
                pos++;
                continue;
            }

            // 5. Standard Words (Keywords or Parameters)
            int start = pos;
            StringBuilder word = new StringBuilder();
            while (pos < length) {
                char current = input.charAt(pos);
                if (Character.isWhitespace(current) || DELIMITERS.indexOf(current) != -1 || current == '\'') {
                    break;
                }
                // Stop at ellipsis start
                if (current == '.' && pos + 2 < length && input.charAt(pos + 1) == '.' && input.charAt(pos + 2) == '.') {
                    break;
                }
                word.append(current);
                pos++;
            }
            tokens.add(new Token(word.toString(), TokenType.WORD, start));
        }

        // AUDIT FIX: Explicitly assign "EOF" as the value so the exception
        // formats correctly as "unexpected token 'EOF'".
        tokens.add(new Token("EOF", TokenType.EOF, pos));
        return tokens;
    }
}