/*
* This file is part of the Intuitive DSL project.
* Copyright (c) 2026 DBA Labs - Switzerland. All rights reserved.
*
* This program is dual-licensed under a commercial license and the AGPLv3.
* For commercial licensing, contact us at [email protected] or visit https://www.dbalabs.ch.
*
* AGPLv3 licensing:
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, version 3 (19 November 2007).
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/agpl-3.0.html>.
*/
package ch.dbalabs.intuitivedsl.parser;
import ch.dbalabs.intuitivedsl.exception.DslSyntaxException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
/**
* Character-by-character scanner for input strings.
* Handles quoted strings, escaped quotes (''), and precise positional tracking.
*
* @author DBA Labs
*/
public class Lexer {
private static final String DELIMITERS = "().,;:?!-+/ *@[]{}|";
public List<Token> tokenize(String input) {
List<Token> tokens = new ArrayList<>();
if (input == null) return tokens;
int pos = 0;
int length = input.length();
while (pos < length) {
char c = input.charAt(pos);
// 1. Whitespace handling
if (Character.isWhitespace(c)) {
pos++;
continue;
}
// 2. Quoted Strings ('...') with escaped quote support ('')
if (c == '\'') {
int start = pos;
pos++; // skip opening quote
StringBuilder sb = new StringBuilder();
boolean closed = false;
while (pos < length) {
char current = input.charAt(pos);
if (current == '\'') {
if (pos + 1 < length && input.charAt(pos + 1) == '\'') {
sb.append('\'');
pos += 2;
} else {
pos++;
closed = true;
break;
}
} else {
sb.append(current);
pos++;
}
}
if (!closed) {
throw new DslSyntaxException("Unclosed string literal",
null, List.of("closing quote (')"));
}
tokens.add(new Token(sb.toString(), TokenType.STRING, start));
continue;
}
// 3. Repeatable Operator (...)
if (c == '.' && pos + 2 < length && input.charAt(pos + 1) == '.' && input.charAt(pos + 2) == '.') {
tokens.add(new Token("...", TokenType.DELIMITER, pos));
pos += 3;
continue;
}
// 4. Structural Delimiters
if (DELIMITERS.indexOf(c) != -1) {
tokens.add(new Token(String.valueOf(c), TokenType.DELIMITER, pos));
pos++;
continue;
}
// 5. Standard Words (Keywords or Parameters)
int start = pos;
StringBuilder word = new StringBuilder();
while (pos < length) {
char current = input.charAt(pos);
if (Character.isWhitespace(current) || DELIMITERS.indexOf(current) != -1 || current == '\'') {
break;
}
// Stop at ellipsis start
if (current == '.' && pos + 2 < length && input.charAt(pos + 1) == '.' && input.charAt(pos + 2) == '.') {
break;
}
word.append(current);
pos++;
}
tokens.add(new Token(word.toString(), TokenType.WORD, start));
}
// AUDIT FIX: Explicitly assign "EOF" as the value so the exception
// formats correctly as "unexpected token 'EOF'".
tokens.add(new Token("EOF", TokenType.EOF, pos));
return tokens;
}
}