diff --git a/src/lexer/Error.kt b/src/lexer/Error.kt deleted file mode 100644 index 1477867..0000000 --- a/src/lexer/Error.kt +++ /dev/null @@ -1,5 +0,0 @@ -package lexer - -class Error(message: String, position: LexerPosition) : Exception(""" - ${position.line}:${position.column + 1}: $message -""".trimIndent()) \ No newline at end of file diff --git a/src/lexer/Lexer.kt b/src/lexer/Lexer.kt index 7e92cd6..c239fbd 100644 --- a/src/lexer/Lexer.kt +++ b/src/lexer/Lexer.kt @@ -1,5 +1,10 @@ package lexer +import lexer.errors.LexingError +import lexer.errors.LexingErrorType +import lexer.state.LexerPosition +import lexer.state.TokenPosition + class Lexer(private val source: String) { private var tokens: List = emptyList() private val position = LexerPosition(0, 0, -1) @@ -12,14 +17,14 @@ class Lexer(private val source: String) { while (hasNext()) { val char: Char = peek() tokens += when { - char == '(' -> scanSymbol(TokenType.LEFT_PARENTHESIS) - char == ')' -> scanSymbol(TokenType.RIGHT_PARENTHESIS) + char == '(' -> scanSymbol(TokenType.PARENTHESIS_LEFT) + char == ')' -> scanSymbol(TokenType.PARENTHESIS_RIGHT) char == '.' -> scanSymbol(TokenType.DOT) char == '"' -> scanQuotedString() char == '%' -> { scanComment(); continue } char.isLetterOrDigit() -> scanAlphanumeric() char.isWhitespace() -> { scanWhitespace(); continue } - else -> throw Error("Unknown symbol: $char", position) + else -> throw LexingError(LexingErrorType.UNKNOWN_TOKEN, "Did not recognize $char", position) } } tokens += Token(TokenType.EOF, "EOF", getPosition(0)) @@ -33,8 +38,8 @@ class Lexer(private val source: String) { private fun peek(): Char { // Peek should only be called if there is a next character - if (!hasNext()) { - throw Error("Unexpected end of input", position) + require(hasNext()) { + LexingError(LexingErrorType.UNEXPECTED_END_OF_INPUT, "Expected additional character", position) } return source[position.offset] @@ -79,8 +84,8 @@ class Lexer(private val source: String) { private fun scanQuotedString(): Token { // "Assert" that the next character is the start of a quoted string - if (next() != '"') { - throw Error("Illegal state: Expected opening quote", position) + require(next() == '"') { + LexingError(LexingErrorType.UNEXPECTED_TOKEN, "Expected opening quote '('", position) } var length = 0 @@ -90,8 +95,8 @@ class Lexer(private val source: String) { } // "Assert" that the next character is the end of the quoted string - if (next() != '"') { - throw Error("Illegal state: Expected closing quote", position) + require(next() == '"') { + LexingError(LexingErrorType.UNEXPECTED_TOKEN, "Expected closing quote ')'", position) } val value = source.substring(position.offset - length - 1, position.offset - 1) @@ -100,8 +105,8 @@ class Lexer(private val source: String) { private fun scanComment() { // "Assert" that the next character is the start of a comment - if (next() != '%') { - throw Error("Illegal state: Expected opening comment", position) + require(next() == '%') { + LexingError(LexingErrorType.UNEXPECTED_TOKEN, "Expected opening comment '%'", position) } // Skip all characters until the end of the line diff --git a/src/lexer/Token.kt b/src/lexer/Token.kt index 3841fa3..c163bfd 100644 --- a/src/lexer/Token.kt +++ b/src/lexer/Token.kt @@ -1,5 +1,7 @@ package lexer +import lexer.state.TokenPosition + data class Token( val type: TokenType, val value: String, diff --git a/src/lexer/TokenType.kt b/src/lexer/TokenType.kt index a5216c6..50d5141 100644 --- a/src/lexer/TokenType.kt +++ b/src/lexer/TokenType.kt @@ -4,8 +4,12 @@ enum class TokenType { ALPHANUMERIC, // TODO Replace with SMALL_LETTER, CAPITAL_LETTER, DIGIT, HEX_DIGIT, ... ? - LEFT_PARENTHESIS, RIGHT_PARENTHESIS, + // Structure + COMMA, DOT, + PARENTHESIS_LEFT, PARENTHESIS_RIGHT, + + // Special EOF } diff --git a/src/lexer/errors/LexingError.kt b/src/lexer/errors/LexingError.kt new file mode 100644 index 0000000..7a5f4c9 --- /dev/null +++ b/src/lexer/errors/LexingError.kt @@ -0,0 +1,13 @@ +package lexer.errors + +import lexer.state.LexerPosition + +data class LexingError( + val type: LexingErrorType, + override val message: String, + val position: LexerPosition +) : Throwable( + """ + ${position.line}:${position.column + 1} ${type}: $message +""".trimIndent() +) diff --git a/src/lexer/errors/LexingErrorType.kt b/src/lexer/errors/LexingErrorType.kt new file mode 100644 index 0000000..bff243a --- /dev/null +++ b/src/lexer/errors/LexingErrorType.kt @@ -0,0 +1,7 @@ +package lexer.errors + +enum class LexingErrorType { + UNKNOWN_TOKEN, + UNEXPECTED_TOKEN, + UNEXPECTED_END_OF_INPUT, +} \ No newline at end of file diff --git a/src/lexer/LexerPosition.kt b/src/lexer/state/LexerPosition.kt similarity index 78% rename from src/lexer/LexerPosition.kt rename to src/lexer/state/LexerPosition.kt index 6437cc3..583bf29 100644 --- a/src/lexer/LexerPosition.kt +++ b/src/lexer/state/LexerPosition.kt @@ -1,3 +1,3 @@ -package lexer +package lexer.state data class LexerPosition(var offset: Int, var line: Int, var column: Int) diff --git a/src/lexer/TokenPosition.kt b/src/lexer/state/TokenPosition.kt similarity index 78% rename from src/lexer/TokenPosition.kt rename to src/lexer/state/TokenPosition.kt index 5f8165e..2f19f76 100644 --- a/src/lexer/TokenPosition.kt +++ b/src/lexer/state/TokenPosition.kt @@ -1,3 +1,3 @@ -package lexer +package lexer.state data class TokenPosition(val line: Int, val column: Int, val length: Int) diff --git a/src/parser/Parser.kt b/src/parser/Parser.kt index 977c227..e2e63e8 100644 --- a/src/parser/Parser.kt +++ b/src/parser/Parser.kt @@ -2,18 +2,37 @@ package parser import lexer.Token import lexer.TokenType +import parser.errors.ParsingError +import parser.errors.ParsingErrorType +import parser.state.ParserPosition +import prolog.ast.logic.Clause +import prolog.ast.logic.Fact +import prolog.ast.logic.Rule import prolog.ast.terms.Atom +import prolog.ast.terms.Structure import prolog.ast.terms.Term class Parser(private val tokens: List) { - private var position: Int = 0 + private val position: ParserPosition = ParserPosition(0) fun parse(): List { val terms = mutableListOf() - // TODO while (hasNext()) { - terms.add(parseTerm()) + position.save() + + var term: Term? = null + + while (term == null) { + // Try each parser rule in order + + } + + require(term != null) { + ParsingError(ParsingErrorType.UNEXPECTED_TOKEN, "Expected a term", position) + } + + terms.add(term) } return terms @@ -21,6 +40,7 @@ class Parser(private val tokens: List) { /** * Matches the current token with any of the expected types. + * If it matches, it consumes the token and returns true. * * @param types The list of expected token types. * @return True if the current token matches any of the expected types, false otherwise. @@ -46,71 +66,72 @@ class Parser(private val tokens: List) { private fun hasNext(): Boolean { // Check if the position is within the tokens list // TODO Check for EOF instead? - return position < tokens.size + return position.offset < tokens.size } private fun peek(): Token { - // Peek should only be called if there is a next token - if (!hasNext()) { - throw Error("Unexpected end of input") - } + require(hasNext()) { "Unexpected end of input" } - return tokens[position] + return tokens[position.offset] } private fun next(): Token { val token = peek() - position++ + position.offset++ return token } private fun previous(): Token { - // Previous should only be called if there is a previous token - if (position == 0) { - throw Error("No previous token") - } - - return tokens[position - 1] + require(0 < position.offset) { "No previous token" } + return tokens[position.offset - 1] } /* * * * * * * Parsers * * * * * * */ - private fun parseTerm(): Term { - // TODO Variable - // TODO braced term - // TODO Integer Term - // TODO Float term - // TODO Compound term - // TODO Binary operator - // TODO Unary operator - // TODO list term - // TODO curly bracketed term - return parseAtom() + private fun parseWithTry(parseRule: () -> Term): Term { + try { + return parseRule() + } catch (e: Exception) { + throw ParsingError(ParsingErrorType.UNEXPECTED_TOKEN, "Unexpected token", position) + } + } + + private fun parseClause(): Clause { + return try { + Fact(parseStructure()) + } catch (e: Exception) { + Fact(parseAtom()) + } + } + + private fun parseStructure(): Structure { + val name = parseAtom() + val args = mutableListOf() + + require(match(listOf(TokenType.PARENTHESIS_LEFT))) { + ParsingError(ParsingErrorType.UNEXPECTED_TOKEN, "Expected '(' after structure name", position) + } + + // TODO Handle arguments + + require(match(listOf(TokenType.PARENTHESIS_RIGHT))) { + ParsingError(ParsingErrorType.UNEXPECTED_TOKEN, "Expected ')' after structure arguments", position) + } + + return Structure(name, args) } private fun parseAtom(): Atom { - // TODO empty list - // TODO empty braces - return Atom(parseLetterDigit()) - - // TODO graphic - // TODO quoted - // TODO double quoted - // TODO back quoted - // TODO semicolon - // TODO cut } private fun parseLetterDigit(): String { - // Check if the first character is a lowercase letter - if (match(listOf(TokenType.ALPHANUMERIC)) && previous().value[0].isLowerCase()) { - return previous().value + require(match(listOf(TokenType.ALPHANUMERIC)) && previous().value[0].isLowerCase()) { + ParsingError(ParsingErrorType.UNEXPECTED_TOKEN, "Expected lowercase letter", position) } - // TODO How to fix? - return "" + return previous().value } } diff --git a/src/parser/errors/ParsingError.kt b/src/parser/errors/ParsingError.kt new file mode 100644 index 0000000..7ddbfc2 --- /dev/null +++ b/src/parser/errors/ParsingError.kt @@ -0,0 +1,12 @@ +package parser.errors + +import parser.state.ParserPosition + +class ParsingError(private val type: ParsingErrorType, override val message: String, private val position: ParserPosition) : + Throwable() { + override fun toString(): String { + return """ + ($position) ${type}: $message + """.trimIndent() + } +} \ No newline at end of file diff --git a/src/parser/errors/ParsingErrorType.kt b/src/parser/errors/ParsingErrorType.kt new file mode 100644 index 0000000..5e017d8 --- /dev/null +++ b/src/parser/errors/ParsingErrorType.kt @@ -0,0 +1,7 @@ +package parser.errors + +enum class ParsingErrorType { + UNEXPECTED_TOKEN, + + INTERNAL_ERROR, +} \ No newline at end of file diff --git a/src/parser/state/ParserPosition.kt b/src/parser/state/ParserPosition.kt new file mode 100644 index 0000000..f3b5586 --- /dev/null +++ b/src/parser/state/ParserPosition.kt @@ -0,0 +1,25 @@ +package parser.state + +import parser.errors.ParsingError +import parser.errors.ParsingErrorType + +data class ParserPosition(var offset: Int) { + private val checkpoints: ArrayDeque = ArrayDeque() + + fun save() { + checkpoints.addLast(this.copy()) + } + + fun reload() { + require(checkpoints.isNotEmpty()) { + ParsingError(ParsingErrorType.INTERNAL_ERROR, "No checkpoint to reload from", this) + } + + val checkpoint = checkpoints.removeLast() + offset = checkpoint.offset + } + + override fun toString(): String { + return "at $offset" + } +} diff --git a/tests/lexer/ScanPrologTests.kt b/tests/lexer/ScanPrologTests.kt index 4f39a60..101cc49 100644 --- a/tests/lexer/ScanPrologTests.kt +++ b/tests/lexer/ScanPrologTests.kt @@ -1,5 +1,6 @@ package lexer +import lexer.errors.LexingError import org.junit.jupiter.api.Test import org.junit.jupiter.api.assertThrows import kotlin.test.assertEquals @@ -56,6 +57,6 @@ class ScanPrologTests { @Test fun scan_variable_that_starts_with_a_number() { - assertThrows { Lexer("1X.").scan() } + assertThrows { Lexer("1X.").scan() } } } diff --git a/tests/lexer/ScanTests.kt b/tests/lexer/ScanTests.kt index cf21d26..a21f571 100644 --- a/tests/lexer/ScanTests.kt +++ b/tests/lexer/ScanTests.kt @@ -1,5 +1,6 @@ package lexer +import lexer.errors.LexingError import org.junit.jupiter.api.Test import org.junit.jupiter.api.assertThrows import org.junit.jupiter.api.Assertions.* @@ -14,7 +15,7 @@ class ScanTests { @Test fun scan_unknownSymbol_returns_Error() { - assertThrows { Lexer("€").scan() } + assertThrows { Lexer("€").scan() } } @Test @@ -127,12 +128,12 @@ class ScanTests { assertEquals(3, tokens.size) assertEquals( - TokenType.LEFT_PARENTHESIS, + TokenType.PARENTHESIS_LEFT, tokens[0].type, "Expected LEFT_PARENTHESES token, got ${tokens[0].type}" ) assertEquals( - TokenType.RIGHT_PARENTHESIS, + TokenType.PARENTHESIS_RIGHT, tokens[1].type, "Expected RIGHT_PARENTHESES token, got ${tokens[1].type}" ) diff --git a/tests/parser/ParseTests.kt b/tests/parser/ParseTests.kt index 6b3d214..4056820 100644 --- a/tests/parser/ParseTests.kt +++ b/tests/parser/ParseTests.kt @@ -1,11 +1,10 @@ package parser import lexer.Token -import lexer.TokenPosition +import lexer.state.TokenPosition import lexer.TokenType import org.junit.jupiter.api.Assertions.assertEquals import org.junit.jupiter.api.Assertions.assertTrue -import org.junit.jupiter.api.Disabled import org.junit.jupiter.api.Test import prolog.ast.terms.Atom import prolog.ast.terms.CompoundTerm @@ -73,4 +72,20 @@ class ParseTests { assertEquals(1, result.size, "Expected 1 term") assertEquals(Atom(name), result[0], "Expected atom 'my_FooBar1'") } + + @Test + fun `parse compound term f()`() { + val input = listOf( + Token(TokenType.ALPHANUMERIC, "f", TokenPosition(0, 0, 1)), + Token(TokenType.PARENTHESIS_LEFT, "(", TokenPosition(0, 1, 2)), + Token(TokenType.PARENTHESIS_RIGHT, ")", TokenPosition(0, 3, 4)) + ) + + val result = Parser(input).parse() + + assertEquals(1, result.size, "Expected 1 term") + assertTrue(result[0] is CompoundTerm) + assertEquals("f", (result[0] as CompoundTerm).name) + assertEquals(0, (result[0] as CompoundTerm).arguments.size) + } } \ No newline at end of file