Remove old lexer+parser implementation

This commit is contained in:
Tibo De Peuter 2025-04-27 18:22:03 +02:00
parent d5632e9217
commit a4ec29f084
Signed by: tdpeuter
GPG key ID: 38297DE43F75FFE2
15 changed files with 0 additions and 706 deletions

View file

@ -1,127 +0,0 @@
package lexer
import lexer.errors.LexingError
import lexer.errors.LexingErrorType
import lexer.state.LexerPosition
import lexer.state.TokenPosition
class Lexer(private val source: String) {
private var tokens: List<Token> = emptyList()
private val position = LexerPosition(0, 0, -1)
/**
* Scans the source code and returns a list of tokens.
* @return List of [Token]s
*/
fun scan(): List<Token> {
while (hasNext()) {
val char: Char = peek()
tokens += when {
char == '(' -> scanSymbol(TokenType.PARENTHESIS_LEFT)
char == ')' -> scanSymbol(TokenType.PARENTHESIS_RIGHT)
char == '.' -> scanSymbol(TokenType.DOT)
char == '"' -> scanQuotedString()
char == '%' -> { scanComment(); continue }
char.isLetterOrDigit() -> scanAlphanumeric()
char.isWhitespace() -> { scanWhitespace(); continue }
else -> throw LexingError(LexingErrorType.UNKNOWN_TOKEN, "Did not recognize $char", position)
}
}
tokens += Token(TokenType.EOF, "EOF", getPosition(0))
return tokens
}
private fun hasNext(): Boolean {
// Check if the position is within the source length
return position.offset < source.length
}
private fun peek(): Char {
// Peek should only be called if there is a next character
require(hasNext()) {
LexingError(LexingErrorType.UNEXPECTED_END_OF_INPUT, "Expected additional character", position)
}
return source[position.offset]
}
private fun next(): Char {
// Advance the position and return the character
val char = peek()
position.offset++
position.column++
return char
}
private fun getPosition(length: Int = 1): TokenPosition {
// Return a new TokenPosition based on the current LexerPosition
return TokenPosition(position.line, position.column, length)
}
/* * * * * * *
* Scanners *
* * * * * * */
/**
* Scans a symbol token, given the expected [TokenType].
* @param tokenType The expected [TokenType]
* @return The scanned [Token]
*/
private fun scanSymbol(tokenType: TokenType): Token {
return Token(tokenType, next().toString(), getPosition(1))
}
private fun scanAlphanumeric(): Token {
// Scan all alphanumeric characters
var length = 0
while (hasNext() && peek().isLetterOrDigit()) {
next()
length++
}
val value = source.substring(position.offset - length, position.offset)
return Token(TokenType.ALPHANUMERIC, value, getPosition(length))
}
private fun scanQuotedString(): Token {
// "Assert" that the next character is the start of a quoted string
require(next() == '"') {
LexingError(LexingErrorType.UNEXPECTED_TOKEN, "Expected opening quote '('", position)
}
var length = 0
while (hasNext() && peek() != '"') {
next()
length++
}
// "Assert" that the next character is the end of the quoted string
require(next() == '"') {
LexingError(LexingErrorType.UNEXPECTED_TOKEN, "Expected closing quote ')'", position)
}
val value = source.substring(position.offset - length - 1, position.offset - 1)
return Token(TokenType.ALPHANUMERIC, value, getPosition(length))
}
private fun scanComment() {
// "Assert" that the next character is the start of a comment
require(next() == '%') {
LexingError(LexingErrorType.UNEXPECTED_TOKEN, "Expected opening comment '%'", position)
}
// Skip all characters until the end of the line
while (hasNext() && peek() != '\n') {
next()
}
}
private fun scanWhitespace() {
// Skip all whitespace characters
while (hasNext() && peek().isWhitespace()) {
if (next() == '\n') {
position.line++
position.column = 0
}
}
}
}

View file

@ -1,9 +0,0 @@
package lexer
import lexer.state.TokenPosition
data class Token(
val type: TokenType,
val value: String,
val position: TokenPosition
)

View file

@ -1,15 +0,0 @@
package lexer
enum class TokenType {
ALPHANUMERIC,
// TODO Replace with SMALL_LETTER, CAPITAL_LETTER, DIGIT, HEX_DIGIT, ... ?
// Structure
COMMA,
DOT,
PARENTHESIS_LEFT, PARENTHESIS_RIGHT,
// Special
EOF
}

View file

@ -1,13 +0,0 @@
package lexer.errors
import lexer.state.LexerPosition
data class LexingError(
val type: LexingErrorType,
override val message: String,
val position: LexerPosition
) : Throwable(
"""
${position.line}:${position.column + 1} ${type}: $message
""".trimIndent()
)

View file

@ -1,7 +0,0 @@
package lexer.errors
enum class LexingErrorType {
UNKNOWN_TOKEN,
UNEXPECTED_TOKEN,
UNEXPECTED_END_OF_INPUT,
}

View file

@ -1,3 +0,0 @@
package lexer.state
data class LexerPosition(var offset: Int, var line: Int, var column: Int)

View file

@ -1,3 +0,0 @@
package lexer.state
data class TokenPosition(val line: Int, val column: Int, val length: Int)

View file

@ -1,137 +0,0 @@
package parser
import lexer.Token
import lexer.TokenType
import parser.errors.ParsingError
import parser.errors.ParsingErrorType
import parser.state.ParserPosition
import prolog.ast.logic.Clause
import prolog.ast.logic.Fact
import prolog.ast.logic.Rule
import prolog.ast.terms.Atom
import prolog.ast.terms.Structure
import prolog.ast.terms.Term
class Parser(private val tokens: List<Token>) {
private val position: ParserPosition = ParserPosition(0)
fun parse(): List<Term> {
val terms = mutableListOf<Term>()
while (hasNext()) {
position.save()
var term: Term? = null
while (term == null) {
// Try each parser rule in order
}
require(term != null) {
ParsingError(ParsingErrorType.UNEXPECTED_TOKEN, "Expected a term", position)
}
terms.add(term)
}
return terms
}
/**
* Matches the current token with any of the expected types.
* If it matches, it consumes the token and returns true.
*
* @param types The list of expected token types.
* @return True if the current token matches any of the expected types, false otherwise.
*/
private fun match(types: List<TokenType>): Boolean {
for (type in types) {
if (check(type)) {
next()
return true
}
}
return false
}
/**
* Checks if the current token matches the expected type.
*/
private fun check(type: TokenType): Boolean {
return hasNext() && peek().type == type
}
private fun hasNext(): Boolean {
// Check if the position is within the tokens list
// TODO Check for EOF instead?
return position.offset < tokens.size
}
private fun peek(): Token {
require(hasNext()) { "Unexpected end of input" }
return tokens[position.offset]
}
private fun next(): Token {
val token = peek()
position.offset++
return token
}
private fun previous(): Token {
require(0 < position.offset) { "No previous token" }
return tokens[position.offset - 1]
}
/* * * * * *
* Parsers *
* * * * * */
private fun parseWithTry(parseRule: () -> Term): Term {
try {
return parseRule()
} catch (e: Exception) {
throw ParsingError(ParsingErrorType.UNEXPECTED_TOKEN, "Unexpected token", position)
}
}
private fun parseClause(): Clause {
return try {
Fact(parseStructure())
} catch (e: Exception) {
Fact(parseAtom())
}
}
private fun parseStructure(): Structure {
val name = parseAtom()
val args = mutableListOf<Term>()
require(match(listOf(TokenType.PARENTHESIS_LEFT))) {
ParsingError(ParsingErrorType.UNEXPECTED_TOKEN, "Expected '(' after structure name", position)
}
// TODO Handle arguments
require(match(listOf(TokenType.PARENTHESIS_RIGHT))) {
ParsingError(ParsingErrorType.UNEXPECTED_TOKEN, "Expected ')' after structure arguments", position)
}
return Structure(name, args)
}
private fun parseAtom(): Atom {
return Atom(parseLetterDigit())
}
private fun parseLetterDigit(): String {
require(match(listOf(TokenType.ALPHANUMERIC)) && previous().value[0].isLowerCase()) {
ParsingError(ParsingErrorType.UNEXPECTED_TOKEN, "Expected lowercase letter", position)
}
return previous().value
}
}

View file

@ -1,12 +0,0 @@
package parser.errors
import parser.state.ParserPosition
class ParsingError(private val type: ParsingErrorType, override val message: String, private val position: ParserPosition) :
Throwable() {
override fun toString(): String {
return """
($position) ${type}: $message
""".trimIndent()
}
}

View file

@ -1,7 +0,0 @@
package parser.errors
enum class ParsingErrorType {
UNEXPECTED_TOKEN,
INTERNAL_ERROR,
}

View file

@ -1,25 +0,0 @@
package parser.state
import parser.errors.ParsingError
import parser.errors.ParsingErrorType
data class ParserPosition(var offset: Int) {
private val checkpoints: ArrayDeque<ParserPosition> = ArrayDeque()
fun save() {
checkpoints.addLast(this.copy())
}
fun reload() {
require(checkpoints.isNotEmpty()) {
ParsingError(ParsingErrorType.INTERNAL_ERROR, "No checkpoint to reload from", this)
}
val checkpoint = checkpoints.removeLast()
offset = checkpoint.offset
}
override fun toString(): String {
return "at $offset"
}
}