Checkpoint

2025-04-17 17:49:53 +02:00 · 2025-04-17 17:49:53 +02:00 · 48f94c30df
commit 48f94c30df
parent e749f8c6cb
15 changed files with 175 additions and 67 deletions
--- a/src/lexer/Error.kt
+++ b/src/lexer/Error.kt
@ -1,5 +0,0 @@
-package lexer
-
-class Error(message: String, position: LexerPosition) : Exception("""
-    ${position.line}:${position.column + 1}: $message
-""".trimIndent())
--- a/src/lexer/Lexer.kt
+++ b/src/lexer/Lexer.kt
@ -1,5 +1,10 @@
 package lexer

+import lexer.errors.LexingError
+import lexer.errors.LexingErrorType
+import lexer.state.LexerPosition
+import lexer.state.TokenPosition
+
 class Lexer(private val source: String) {
    private var tokens: List<Token> = emptyList()
    private val position = LexerPosition(0, 0, -1)
@ -12,14 +17,14 @@ class Lexer(private val source: String) {
        while (hasNext()) {
            val char: Char = peek()
            tokens += when {
-                char == '(' -> scanSymbol(TokenType.LEFT_PARENTHESIS)
-                char == ')' -> scanSymbol(TokenType.RIGHT_PARENTHESIS)
+                char == '(' -> scanSymbol(TokenType.PARENTHESIS_LEFT)
+                char == ')' -> scanSymbol(TokenType.PARENTHESIS_RIGHT)
                char == '.' -> scanSymbol(TokenType.DOT)
                char == '"' -> scanQuotedString()
                char == '%' -> { scanComment(); continue }
                char.isLetterOrDigit() -> scanAlphanumeric()
                char.isWhitespace()    -> { scanWhitespace(); continue }
-                else -> throw Error("Unknown symbol: $char", position)
+                else -> throw LexingError(LexingErrorType.UNKNOWN_TOKEN, "Did not recognize $char", position)
            }
        }
        tokens += Token(TokenType.EOF, "EOF", getPosition(0))
@ -33,8 +38,8 @@ class Lexer(private val source: String) {

    private fun peek(): Char {
        // Peek should only be called if there is a next character
-        if (!hasNext()) {
-            throw Error("Unexpected end of input", position)
+        require(hasNext()) {
+            LexingError(LexingErrorType.UNEXPECTED_END_OF_INPUT, "Expected additional character", position)
        }

        return source[position.offset]
@ -79,8 +84,8 @@ class Lexer(private val source: String) {

    private fun scanQuotedString(): Token {
        // "Assert" that the next character is the start of a quoted string
-        if (next() != '"') {
-            throw Error("Illegal state: Expected opening quote", position)
+        require(next() == '"') {
+            LexingError(LexingErrorType.UNEXPECTED_TOKEN, "Expected opening quote '('", position)
        }

        var length = 0
@ -90,8 +95,8 @@ class Lexer(private val source: String) {
        }

        // "Assert" that the next character is the end of the quoted string
-        if (next() != '"') {
-            throw Error("Illegal state: Expected closing quote", position)
+        require(next() == '"') {
+            LexingError(LexingErrorType.UNEXPECTED_TOKEN, "Expected closing quote ')'", position)
        }

        val value = source.substring(position.offset - length - 1, position.offset - 1)
@ -100,8 +105,8 @@ class Lexer(private val source: String) {

    private fun scanComment() {
        // "Assert" that the next character is the start of a comment
-        if (next() != '%') {
-            throw Error("Illegal state: Expected opening comment", position)
+        require(next() == '%') {
+            LexingError(LexingErrorType.UNEXPECTED_TOKEN, "Expected opening comment '%'", position)
        }

        // Skip all characters until the end of the line
--- a/src/lexer/Token.kt
+++ b/src/lexer/Token.kt
@ -1,5 +1,7 @@
 package lexer

+import lexer.state.TokenPosition
+
 data class Token(
    val type: TokenType,
    val value: String,
--- a/src/lexer/TokenType.kt
+++ b/src/lexer/TokenType.kt
@ -4,8 +4,12 @@ enum class TokenType {
    ALPHANUMERIC,
    // TODO Replace with SMALL_LETTER, CAPITAL_LETTER, DIGIT, HEX_DIGIT, ... ?

-    LEFT_PARENTHESIS, RIGHT_PARENTHESIS,
+    // Structure
+    COMMA,
    DOT,
+    PARENTHESIS_LEFT, PARENTHESIS_RIGHT,
+
+    // Special

    EOF
 }
--- a/src/lexer/errors/LexingError.kt
+++ b/src/lexer/errors/LexingError.kt
@ -0,0 +1,13 @@
+package lexer.errors
+
+import lexer.state.LexerPosition
+
+data class LexingError(
+    val type: LexingErrorType,
+    override val message: String,
+    val position: LexerPosition
+) : Throwable(
+    """
+    ${position.line}:${position.column + 1} ${type}: $message
+""".trimIndent()
+)
--- a/src/lexer/errors/LexingErrorType.kt
+++ b/src/lexer/errors/LexingErrorType.kt
@ -0,0 +1,7 @@
+package lexer.errors
+
+enum class LexingErrorType {
+    UNKNOWN_TOKEN,
+    UNEXPECTED_TOKEN,
+    UNEXPECTED_END_OF_INPUT,
+}
--- a/src/lexer/state/LexerPosition.kt
+++ b/src/lexer/state/LexerPosition.kt
@ -1,3 +1,3 @@
-package lexer
+package lexer.state

 data class LexerPosition(var offset: Int, var line: Int, var column: Int)
--- a/src/lexer/state/TokenPosition.kt
+++ b/src/lexer/state/TokenPosition.kt
@ -1,3 +1,3 @@
-package lexer
+package lexer.state

 data class TokenPosition(val line: Int, val column: Int, val length: Int)
--- a/src/parser/Parser.kt
+++ b/src/parser/Parser.kt
@ -2,18 +2,37 @@ package parser

 import lexer.Token
 import lexer.TokenType
+import parser.errors.ParsingError
+import parser.errors.ParsingErrorType
+import parser.state.ParserPosition
+import prolog.ast.logic.Clause
+import prolog.ast.logic.Fact
+import prolog.ast.logic.Rule
 import prolog.ast.terms.Atom
+import prolog.ast.terms.Structure
 import prolog.ast.terms.Term

 class Parser(private val tokens: List<Token>) {
-    private var position: Int = 0
+    private val position: ParserPosition = ParserPosition(0)

    fun parse(): List<Term> {
        val terms = mutableListOf<Term>()

-        // TODO
        while (hasNext()) {
-            terms.add(parseTerm())
+            position.save()
+
+            var term: Term? = null
+
+            while (term == null) {
+                // Try each parser rule in order
+
+            }
+
+            require(term != null) {
+                ParsingError(ParsingErrorType.UNEXPECTED_TOKEN, "Expected a term", position)
+            }
+
+            terms.add(term)
        }

        return terms
@ -21,6 +40,7 @@ class Parser(private val tokens: List<Token>) {

    /**
     * Matches the current token with any of the expected types.
+     * If it matches, it consumes the token and returns true.
     *
     * @param types The list of expected token types.
     * @return True if the current token matches any of the expected types, false otherwise.
@ -46,71 +66,72 @@ class Parser(private val tokens: List<Token>) {
    private fun hasNext(): Boolean {
        // Check if the position is within the tokens list
        // TODO Check for EOF instead?
-        return position < tokens.size
+        return position.offset < tokens.size
    }

    private fun peek(): Token {
-        // Peek should only be called if there is a next token
-        if (!hasNext()) {
-            throw Error("Unexpected end of input")
-        }
+        require(hasNext()) { "Unexpected end of input" }

-        return tokens[position]
+        return tokens[position.offset]
    }

    private fun next(): Token {
        val token = peek()
-        position++
+        position.offset++
        return token
    }

    private fun previous(): Token {
-        // Previous should only be called if there is a previous token
-        if (position == 0) {
-            throw Error("No previous token")
-        }
-
-        return tokens[position - 1]
+        require(0 < position.offset) { "No previous token" }
+        return tokens[position.offset - 1]
    }

    /* * * * * *
     * Parsers *
     * * * * * */

-    private fun parseTerm(): Term {
-        // TODO Variable
-        // TODO braced term
-        // TODO Integer Term
-        // TODO Float term
-        // TODO Compound term
-        // TODO Binary operator
-        // TODO Unary operator
-        // TODO list term
-        // TODO curly bracketed term
-        return parseAtom()
+    private fun parseWithTry(parseRule: () -> Term): Term {
+        try {
+            return parseRule()
+        } catch (e: Exception) {
+            throw ParsingError(ParsingErrorType.UNEXPECTED_TOKEN, "Unexpected token", position)
+        }
+    }
+
+    private fun parseClause(): Clause {
+        return try {
+            Fact(parseStructure())
+        } catch (e: Exception) {
+            Fact(parseAtom())
+        }
+    }
+
+    private fun parseStructure(): Structure {
+        val name = parseAtom()
+        val args = mutableListOf<Term>()
+
+        require(match(listOf(TokenType.PARENTHESIS_LEFT))) {
+            ParsingError(ParsingErrorType.UNEXPECTED_TOKEN, "Expected '(' after structure name", position)
+        }
+
+        // TODO Handle arguments
+
+        require(match(listOf(TokenType.PARENTHESIS_RIGHT))) {
+            ParsingError(ParsingErrorType.UNEXPECTED_TOKEN, "Expected ')' after structure arguments", position)
+        }
+
+        return Structure(name, args)
    }

    private fun parseAtom(): Atom {
-        // TODO empty list
-        // TODO empty braces
-
        return Atom(parseLetterDigit())
-
-        // TODO graphic
-        // TODO quoted
-        // TODO double quoted
-        // TODO back quoted
-        // TODO semicolon
-        // TODO cut
    }

    private fun parseLetterDigit(): String {
-        // Check if the first character is a lowercase letter
-        if (match(listOf(TokenType.ALPHANUMERIC)) && previous().value[0].isLowerCase()) {
-            return previous().value
+        require(match(listOf(TokenType.ALPHANUMERIC)) && previous().value[0].isLowerCase()) {
+            ParsingError(ParsingErrorType.UNEXPECTED_TOKEN, "Expected lowercase letter", position)
        }

-        // TODO How to fix?
-        return ""
+        return previous().value
    }
 }
--- a/src/parser/errors/ParsingError.kt
+++ b/src/parser/errors/ParsingError.kt
@ -0,0 +1,12 @@
+package parser.errors
+
+import parser.state.ParserPosition
+
+class ParsingError(private val type: ParsingErrorType, override val message: String, private val position: ParserPosition) :
+    Throwable() {
+    override fun toString(): String {
+        return """
+            ($position) ${type}: $message
+        """.trimIndent()
+    }
+}
--- a/src/parser/errors/ParsingErrorType.kt
+++ b/src/parser/errors/ParsingErrorType.kt
@ -0,0 +1,7 @@
+package parser.errors
+
+enum class ParsingErrorType {
+    UNEXPECTED_TOKEN,
+
+    INTERNAL_ERROR,
+}
--- a/src/parser/state/ParserPosition.kt
+++ b/src/parser/state/ParserPosition.kt
@ -0,0 +1,25 @@
+package parser.state
+
+import parser.errors.ParsingError
+import parser.errors.ParsingErrorType
+
+data class ParserPosition(var offset: Int) {
+    private val checkpoints: ArrayDeque<ParserPosition> = ArrayDeque()
+
+    fun save() {
+        checkpoints.addLast(this.copy())
+    }
+
+    fun reload() {
+        require(checkpoints.isNotEmpty()) {
+            ParsingError(ParsingErrorType.INTERNAL_ERROR, "No checkpoint to reload from", this)
+        }
+
+        val checkpoint = checkpoints.removeLast()
+        offset = checkpoint.offset
+    }
+
+    override fun toString(): String {
+        return "at $offset"
+    }
+}
--- a/tests/lexer/ScanPrologTests.kt
+++ b/tests/lexer/ScanPrologTests.kt
@ -1,5 +1,6 @@
 package lexer

+import lexer.errors.LexingError
 import org.junit.jupiter.api.Test
 import org.junit.jupiter.api.assertThrows
 import kotlin.test.assertEquals
@ -56,6 +57,6 @@ class ScanPrologTests {

    @Test
    fun scan_variable_that_starts_with_a_number() {
-        assertThrows<Error> { Lexer("1X.").scan() }
+        assertThrows<LexingError> { Lexer("1X.").scan() }
    }
 }
--- a/tests/lexer/ScanTests.kt
+++ b/tests/lexer/ScanTests.kt
@ -1,5 +1,6 @@
 package lexer

+import lexer.errors.LexingError
 import org.junit.jupiter.api.Test
 import org.junit.jupiter.api.assertThrows
 import org.junit.jupiter.api.Assertions.*
@ -14,7 +15,7 @@ class ScanTests {

    @Test
    fun scan_unknownSymbol_returns_Error() {
-        assertThrows<Error> { Lexer("€").scan() }
+        assertThrows<LexingError> { Lexer("€").scan() }
    }

    @Test
@ -127,12 +128,12 @@ class ScanTests {
        assertEquals(3, tokens.size)

        assertEquals(
-            TokenType.LEFT_PARENTHESIS,
+            TokenType.PARENTHESIS_LEFT,
            tokens[0].type,
            "Expected LEFT_PARENTHESES token, got ${tokens[0].type}"
        )
        assertEquals(
-            TokenType.RIGHT_PARENTHESIS,
+            TokenType.PARENTHESIS_RIGHT,
            tokens[1].type,
            "Expected RIGHT_PARENTHESES token, got ${tokens[1].type}"
        )
--- a/tests/parser/ParseTests.kt
+++ b/tests/parser/ParseTests.kt
@ -1,11 +1,10 @@
 package parser

 import lexer.Token
-import lexer.TokenPosition
+import lexer.state.TokenPosition
 import lexer.TokenType
 import org.junit.jupiter.api.Assertions.assertEquals
 import org.junit.jupiter.api.Assertions.assertTrue
-import org.junit.jupiter.api.Disabled
 import org.junit.jupiter.api.Test
 import prolog.ast.terms.Atom
 import prolog.ast.terms.CompoundTerm
@ -73,4 +72,20 @@ class ParseTests {
        assertEquals(1, result.size, "Expected 1 term")
        assertEquals(Atom(name), result[0], "Expected atom 'my_FooBar1'")
    }
+
+    @Test
+    fun `parse compound term f()`() {
+        val input = listOf(
+            Token(TokenType.ALPHANUMERIC, "f", TokenPosition(0, 0, 1)),
+            Token(TokenType.PARENTHESIS_LEFT, "(", TokenPosition(0, 1, 2)),
+            Token(TokenType.PARENTHESIS_RIGHT, ")", TokenPosition(0, 3, 4))
+        )
+
+        val result = Parser(input).parse()
+
+        assertEquals(1, result.size, "Expected 1 term")
+        assertTrue(result[0] is CompoundTerm)
+        assertEquals("f", (result[0] as CompoundTerm).name)
+        assertEquals(0, (result[0] as CompoundTerm).arguments.size)
+    }
 }