Checkpoint

2025-04-17 17:49:53 +02:00 · 2025-04-17 17:49:53 +02:00 · 48f94c30df
commit 48f94c30df
parent e749f8c6cb
15 changed files with 175 additions and 67 deletions
--- a/src/lexer/Error.kt
+++ b/src/lexer/Error.kt
@ -1,5 +0,0 @@
 package lexer
 class Error(message: String, position: LexerPosition) : Exception("""
    ${position.line}:${position.column + 1}: $message
 """.trimIndent())
--- a/src/lexer/Lexer.kt
+++ b/src/lexer/Lexer.kt
@ -1,5 +1,10 @@
 package lexer
 import lexer.errors.LexingError
 import lexer.errors.LexingErrorType
 import lexer.state.LexerPosition
 import lexer.state.TokenPosition
 class Lexer(private val source: String) {
    private var tokens: List<Token> = emptyList()
    private val position = LexerPosition(0, 0, -1)
@ -12,14 +17,14 @@ class Lexer(private val source: String) {
        while (hasNext()) {
            val char: Char = peek()
            tokens += when {
-                char == '(' -> scanSymbol(TokenType.LEFT_PARENTHESIS)
+                char == '(' -> scanSymbol(TokenType.PARENTHESIS_LEFT)
-                char == ')' -> scanSymbol(TokenType.RIGHT_PARENTHESIS)
+                char == ')' -> scanSymbol(TokenType.PARENTHESIS_RIGHT)
                char == '.' -> scanSymbol(TokenType.DOT)
                char == '"' -> scanQuotedString()
                char == '%' -> { scanComment(); continue }
                char.isLetterOrDigit() -> scanAlphanumeric()
                char.isWhitespace()    -> { scanWhitespace(); continue }
-                else -> throw Error("Unknown symbol: $char", position)
+                else -> throw LexingError(LexingErrorType.UNKNOWN_TOKEN, "Did not recognize $char", position)
            }
        }
        tokens += Token(TokenType.EOF, "EOF", getPosition(0))
@ -33,8 +38,8 @@ class Lexer(private val source: String) {
    private fun peek(): Char {
        // Peek should only be called if there is a next character
-        if (!hasNext()) {
+        require(hasNext()) {
-            throw Error("Unexpected end of input", position)
+            LexingError(LexingErrorType.UNEXPECTED_END_OF_INPUT, "Expected additional character", position)
        }
        return source[position.offset]
@ -79,8 +84,8 @@ class Lexer(private val source: String) {
    private fun scanQuotedString(): Token {
        // "Assert" that the next character is the start of a quoted string
-        if (next() != '"') {
+        require(next() == '"') {
-            throw Error("Illegal state: Expected opening quote", position)
+            LexingError(LexingErrorType.UNEXPECTED_TOKEN, "Expected opening quote '('", position)
        }
        var length = 0
@ -90,8 +95,8 @@ class Lexer(private val source: String) {
        }
        // "Assert" that the next character is the end of the quoted string
-        if (next() != '"') {
+        require(next() == '"') {
-            throw Error("Illegal state: Expected closing quote", position)
+            LexingError(LexingErrorType.UNEXPECTED_TOKEN, "Expected closing quote ')'", position)
        }
        val value = source.substring(position.offset - length - 1, position.offset - 1)
@ -100,8 +105,8 @@ class Lexer(private val source: String) {
    private fun scanComment() {
        // "Assert" that the next character is the start of a comment
-        if (next() != '%') {
+        require(next() == '%') {
-            throw Error("Illegal state: Expected opening comment", position)
+            LexingError(LexingErrorType.UNEXPECTED_TOKEN, "Expected opening comment '%'", position)
        }
        // Skip all characters until the end of the line
--- a/src/lexer/Token.kt
+++ b/src/lexer/Token.kt
@ -1,5 +1,7 @@
 package lexer
 import lexer.state.TokenPosition
 data class Token(
    val type: TokenType,
    val value: String,
--- a/src/lexer/TokenType.kt
+++ b/src/lexer/TokenType.kt
@ -4,8 +4,12 @@ enum class TokenType {
    ALPHANUMERIC,
    // TODO Replace with SMALL_LETTER, CAPITAL_LETTER, DIGIT, HEX_DIGIT, ... ?
-    LEFT_PARENTHESIS, RIGHT_PARENTHESIS,
+    // Structure
    COMMA,
    DOT,
    PARENTHESIS_LEFT, PARENTHESIS_RIGHT,
    // Special
    EOF
 }
--- a/src/lexer/errors/LexingError.kt
+++ b/src/lexer/errors/LexingError.kt
@ -0,0 +1,13 @@
 package lexer.errors
 import lexer.state.LexerPosition
 data class LexingError(
    val type: LexingErrorType,
    override val message: String,
    val position: LexerPosition
 ) : Throwable(
    """
    ${position.line}:${position.column + 1} ${type}: $message
 """.trimIndent()
 )
--- a/src/lexer/errors/LexingErrorType.kt
+++ b/src/lexer/errors/LexingErrorType.kt
@ -0,0 +1,7 @@
 package lexer.errors
 enum class LexingErrorType {
    UNKNOWN_TOKEN,
    UNEXPECTED_TOKEN,
    UNEXPECTED_END_OF_INPUT,
 }
--- a/src/lexer/state/LexerPosition.kt
+++ b/src/lexer/state/LexerPosition.kt
@ -1,3 +1,3 @@
-package lexer
+package lexer.state
 data class LexerPosition(var offset: Int, var line: Int, var column: Int)
--- a/src/lexer/state/TokenPosition.kt
+++ b/src/lexer/state/TokenPosition.kt
@ -1,3 +1,3 @@
-package lexer
+package lexer.state
 data class TokenPosition(val line: Int, val column: Int, val length: Int)
--- a/src/parser/Parser.kt
+++ b/src/parser/Parser.kt
@ -2,18 +2,37 @@ package parser
 import lexer.Token
 import lexer.TokenType
 import parser.errors.ParsingError
 import parser.errors.ParsingErrorType
 import parser.state.ParserPosition
 import prolog.ast.logic.Clause
 import prolog.ast.logic.Fact
 import prolog.ast.logic.Rule
 import prolog.ast.terms.Atom
 import prolog.ast.terms.Structure
 import prolog.ast.terms.Term
 class Parser(private val tokens: List<Token>) {
-    private var position: Int = 0
+    private val position: ParserPosition = ParserPosition(0)
    fun parse(): List<Term> {
        val terms = mutableListOf<Term>()
        // TODO
        while (hasNext()) {
-            terms.add(parseTerm())
+            position.save()
            var term: Term? = null
            while (term == null) {
                // Try each parser rule in order
            }
            require(term != null) {
                ParsingError(ParsingErrorType.UNEXPECTED_TOKEN, "Expected a term", position)
            }
            terms.add(term)
        }
        return terms
@ -21,6 +40,7 @@ class Parser(private val tokens: List<Token>) {
    /**
     * Matches the current token with any of the expected types.
     * If it matches, it consumes the token and returns true.
     *
     * @param types The list of expected token types.
     * @return True if the current token matches any of the expected types, false otherwise.
@ -46,71 +66,72 @@ class Parser(private val tokens: List<Token>) {
    private fun hasNext(): Boolean {
        // Check if the position is within the tokens list
        // TODO Check for EOF instead?
-        return position < tokens.size
+        return position.offset < tokens.size
    }
    private fun peek(): Token {
-        // Peek should only be called if there is a next token
+        require(hasNext()) { "Unexpected end of input" }
        if (!hasNext()) {
            throw Error("Unexpected end of input")
        }
-        return tokens[position]
+        return tokens[position.offset]
    }
    private fun next(): Token {
        val token = peek()
-        position++
+        position.offset++
        return token
    }
    private fun previous(): Token {
-        // Previous should only be called if there is a previous token
+        require(0 < position.offset) { "No previous token" }
-        if (position == 0) {
+        return tokens[position.offset - 1]
            throw Error("No previous token")
        }
        return tokens[position - 1]
    }
    /* * * * * *
     * Parsers *
     * * * * * */
-    private fun parseTerm(): Term {
+    private fun parseWithTry(parseRule: () -> Term): Term {
-        // TODO Variable
+        try {
-        // TODO braced term
+            return parseRule()
-        // TODO Integer Term
+        } catch (e: Exception) {
-        // TODO Float term
+            throw ParsingError(ParsingErrorType.UNEXPECTED_TOKEN, "Unexpected token", position)
-        // TODO Compound term
+        }
-        // TODO Binary operator
+    }
-        // TODO Unary operator
+
-        // TODO list term
+    private fun parseClause(): Clause {
-        // TODO curly bracketed term
+        return try {
-        return parseAtom()
+            Fact(parseStructure())
        } catch (e: Exception) {
            Fact(parseAtom())
        }
    }
    private fun parseStructure(): Structure {
        val name = parseAtom()
        val args = mutableListOf<Term>()
        require(match(listOf(TokenType.PARENTHESIS_LEFT))) {
            ParsingError(ParsingErrorType.UNEXPECTED_TOKEN, "Expected '(' after structure name", position)
        }
        // TODO Handle arguments
        require(match(listOf(TokenType.PARENTHESIS_RIGHT))) {
            ParsingError(ParsingErrorType.UNEXPECTED_TOKEN, "Expected ')' after structure arguments", position)
        }
        return Structure(name, args)
    }
    private fun parseAtom(): Atom {
        // TODO empty list
        // TODO empty braces
        return Atom(parseLetterDigit())
        // TODO graphic
        // TODO quoted
        // TODO double quoted
        // TODO back quoted
        // TODO semicolon
        // TODO cut
    }
    private fun parseLetterDigit(): String {
-        // Check if the first character is a lowercase letter
+        require(match(listOf(TokenType.ALPHANUMERIC)) && previous().value[0].isLowerCase()) {
-        if (match(listOf(TokenType.ALPHANUMERIC)) && previous().value[0].isLowerCase()) {
+            ParsingError(ParsingErrorType.UNEXPECTED_TOKEN, "Expected lowercase letter", position)
            return previous().value
        }
-        // TODO How to fix?
+        return previous().value
        return ""
    }
 }
--- a/src/parser/errors/ParsingError.kt
+++ b/src/parser/errors/ParsingError.kt
@ -0,0 +1,12 @@
 package parser.errors
 import parser.state.ParserPosition
 class ParsingError(private val type: ParsingErrorType, override val message: String, private val position: ParserPosition) :
    Throwable() {
    override fun toString(): String {
        return """
            ($position) ${type}: $message
        """.trimIndent()
    }
 }
--- a/src/parser/errors/ParsingErrorType.kt
+++ b/src/parser/errors/ParsingErrorType.kt
@ -0,0 +1,7 @@
 package parser.errors
 enum class ParsingErrorType {
    UNEXPECTED_TOKEN,
    INTERNAL_ERROR,
 }
--- a/src/parser/state/ParserPosition.kt
+++ b/src/parser/state/ParserPosition.kt
@ -0,0 +1,25 @@
 package parser.state
 import parser.errors.ParsingError
 import parser.errors.ParsingErrorType
 data class ParserPosition(var offset: Int) {
    private val checkpoints: ArrayDeque<ParserPosition> = ArrayDeque()
    fun save() {
        checkpoints.addLast(this.copy())
    }
    fun reload() {
        require(checkpoints.isNotEmpty()) {
            ParsingError(ParsingErrorType.INTERNAL_ERROR, "No checkpoint to reload from", this)
        }
        val checkpoint = checkpoints.removeLast()
        offset = checkpoint.offset
    }
    override fun toString(): String {
        return "at $offset"
    }
 }
--- a/tests/lexer/ScanPrologTests.kt
+++ b/tests/lexer/ScanPrologTests.kt
@ -1,5 +1,6 @@
 package lexer
 import lexer.errors.LexingError
 import org.junit.jupiter.api.Test
 import org.junit.jupiter.api.assertThrows
 import kotlin.test.assertEquals
@ -56,6 +57,6 @@ class ScanPrologTests {
    @Test
    fun scan_variable_that_starts_with_a_number() {
-        assertThrows<Error> { Lexer("1X.").scan() }
+        assertThrows<LexingError> { Lexer("1X.").scan() }
    }
 }
--- a/tests/lexer/ScanTests.kt
+++ b/tests/lexer/ScanTests.kt
@ -1,5 +1,6 @@
 package lexer
 import lexer.errors.LexingError
 import org.junit.jupiter.api.Test
 import org.junit.jupiter.api.assertThrows
 import org.junit.jupiter.api.Assertions.*
@ -14,7 +15,7 @@ class ScanTests {
    @Test
    fun scan_unknownSymbol_returns_Error() {
-        assertThrows<Error> { Lexer("€").scan() }
+        assertThrows<LexingError> { Lexer("€").scan() }
    }
    @Test
@ -127,12 +128,12 @@ class ScanTests {
        assertEquals(3, tokens.size)
        assertEquals(
-            TokenType.LEFT_PARENTHESIS,
+            TokenType.PARENTHESIS_LEFT,
            tokens[0].type,
            "Expected LEFT_PARENTHESES token, got ${tokens[0].type}"
        )
        assertEquals(
-            TokenType.RIGHT_PARENTHESIS,
+            TokenType.PARENTHESIS_RIGHT,
            tokens[1].type,
            "Expected RIGHT_PARENTHESES token, got ${tokens[1].type}"
        )
--- a/tests/parser/ParseTests.kt
+++ b/tests/parser/ParseTests.kt
@ -1,11 +1,10 @@
 package parser
 import lexer.Token
-import lexer.TokenPosition
+import lexer.state.TokenPosition
 import lexer.TokenType
 import org.junit.jupiter.api.Assertions.assertEquals
 import org.junit.jupiter.api.Assertions.assertTrue
 import org.junit.jupiter.api.Disabled
 import org.junit.jupiter.api.Test
 import prolog.ast.terms.Atom
 import prolog.ast.terms.CompoundTerm
@ -73,4 +72,20 @@ class ParseTests {
        assertEquals(1, result.size, "Expected 1 term")
        assertEquals(Atom(name), result[0], "Expected atom 'my_FooBar1'")
    }
    @Test
    fun `parse compound term f()`() {
        val input = listOf(
            Token(TokenType.ALPHANUMERIC, "f", TokenPosition(0, 0, 1)),
            Token(TokenType.PARENTHESIS_LEFT, "(", TokenPosition(0, 1, 2)),
            Token(TokenType.PARENTHESIS_RIGHT, ")", TokenPosition(0, 3, 4))
        )
        val result = Parser(input).parse()
        assertEquals(1, result.size, "Expected 1 term")
        assertTrue(result[0] is CompoundTerm)
        assertEquals("f", (result[0] as CompoundTerm).name)
        assertEquals(0, (result[0] as CompoundTerm).arguments.size)
    }
 }
`@ -1,3 +1,3 @@`
	`package lexer`	`package lexer.state`

	`data class LexerPosition(var offset: Int, var line: Int, var column: Int)`	`data class LexerPosition(var offset: Int, var line: Int, var column: Int)`