Checkpoint

This commit is contained in:
Tibo De Peuter 2025-04-17 17:49:53 +02:00
parent e749f8c6cb
commit 48f94c30df
Signed by: tdpeuter
GPG key ID: 38297DE43F75FFE2
15 changed files with 175 additions and 67 deletions

View file

@ -1,5 +0,0 @@
package lexer
class Error(message: String, position: LexerPosition) : Exception("""
${position.line}:${position.column + 1}: $message
""".trimIndent())

View file

@ -1,5 +1,10 @@
package lexer package lexer
import lexer.errors.LexingError
import lexer.errors.LexingErrorType
import lexer.state.LexerPosition
import lexer.state.TokenPosition
class Lexer(private val source: String) { class Lexer(private val source: String) {
private var tokens: List<Token> = emptyList() private var tokens: List<Token> = emptyList()
private val position = LexerPosition(0, 0, -1) private val position = LexerPosition(0, 0, -1)
@ -12,14 +17,14 @@ class Lexer(private val source: String) {
while (hasNext()) { while (hasNext()) {
val char: Char = peek() val char: Char = peek()
tokens += when { tokens += when {
char == '(' -> scanSymbol(TokenType.LEFT_PARENTHESIS) char == '(' -> scanSymbol(TokenType.PARENTHESIS_LEFT)
char == ')' -> scanSymbol(TokenType.RIGHT_PARENTHESIS) char == ')' -> scanSymbol(TokenType.PARENTHESIS_RIGHT)
char == '.' -> scanSymbol(TokenType.DOT) char == '.' -> scanSymbol(TokenType.DOT)
char == '"' -> scanQuotedString() char == '"' -> scanQuotedString()
char == '%' -> { scanComment(); continue } char == '%' -> { scanComment(); continue }
char.isLetterOrDigit() -> scanAlphanumeric() char.isLetterOrDigit() -> scanAlphanumeric()
char.isWhitespace() -> { scanWhitespace(); continue } char.isWhitespace() -> { scanWhitespace(); continue }
else -> throw Error("Unknown symbol: $char", position) else -> throw LexingError(LexingErrorType.UNKNOWN_TOKEN, "Did not recognize $char", position)
} }
} }
tokens += Token(TokenType.EOF, "EOF", getPosition(0)) tokens += Token(TokenType.EOF, "EOF", getPosition(0))
@ -33,8 +38,8 @@ class Lexer(private val source: String) {
private fun peek(): Char { private fun peek(): Char {
// Peek should only be called if there is a next character // Peek should only be called if there is a next character
if (!hasNext()) { require(hasNext()) {
throw Error("Unexpected end of input", position) LexingError(LexingErrorType.UNEXPECTED_END_OF_INPUT, "Expected additional character", position)
} }
return source[position.offset] return source[position.offset]
@ -79,8 +84,8 @@ class Lexer(private val source: String) {
private fun scanQuotedString(): Token { private fun scanQuotedString(): Token {
// "Assert" that the next character is the start of a quoted string // "Assert" that the next character is the start of a quoted string
if (next() != '"') { require(next() == '"') {
throw Error("Illegal state: Expected opening quote", position) LexingError(LexingErrorType.UNEXPECTED_TOKEN, "Expected opening quote '('", position)
} }
var length = 0 var length = 0
@ -90,8 +95,8 @@ class Lexer(private val source: String) {
} }
// "Assert" that the next character is the end of the quoted string // "Assert" that the next character is the end of the quoted string
if (next() != '"') { require(next() == '"') {
throw Error("Illegal state: Expected closing quote", position) LexingError(LexingErrorType.UNEXPECTED_TOKEN, "Expected closing quote ')'", position)
} }
val value = source.substring(position.offset - length - 1, position.offset - 1) val value = source.substring(position.offset - length - 1, position.offset - 1)
@ -100,8 +105,8 @@ class Lexer(private val source: String) {
private fun scanComment() { private fun scanComment() {
// "Assert" that the next character is the start of a comment // "Assert" that the next character is the start of a comment
if (next() != '%') { require(next() == '%') {
throw Error("Illegal state: Expected opening comment", position) LexingError(LexingErrorType.UNEXPECTED_TOKEN, "Expected opening comment '%'", position)
} }
// Skip all characters until the end of the line // Skip all characters until the end of the line

View file

@ -1,5 +1,7 @@
package lexer package lexer
import lexer.state.TokenPosition
data class Token( data class Token(
val type: TokenType, val type: TokenType,
val value: String, val value: String,

View file

@ -4,8 +4,12 @@ enum class TokenType {
ALPHANUMERIC, ALPHANUMERIC,
// TODO Replace with SMALL_LETTER, CAPITAL_LETTER, DIGIT, HEX_DIGIT, ... ? // TODO Replace with SMALL_LETTER, CAPITAL_LETTER, DIGIT, HEX_DIGIT, ... ?
LEFT_PARENTHESIS, RIGHT_PARENTHESIS, // Structure
COMMA,
DOT, DOT,
PARENTHESIS_LEFT, PARENTHESIS_RIGHT,
// Special
EOF EOF
} }

View file

@ -0,0 +1,13 @@
package lexer.errors
import lexer.state.LexerPosition
data class LexingError(
val type: LexingErrorType,
override val message: String,
val position: LexerPosition
) : Throwable(
"""
${position.line}:${position.column + 1} ${type}: $message
""".trimIndent()
)

View file

@ -0,0 +1,7 @@
package lexer.errors
enum class LexingErrorType {
UNKNOWN_TOKEN,
UNEXPECTED_TOKEN,
UNEXPECTED_END_OF_INPUT,
}

View file

@ -1,3 +1,3 @@
package lexer package lexer.state
data class LexerPosition(var offset: Int, var line: Int, var column: Int) data class LexerPosition(var offset: Int, var line: Int, var column: Int)

View file

@ -1,3 +1,3 @@
package lexer package lexer.state
data class TokenPosition(val line: Int, val column: Int, val length: Int) data class TokenPosition(val line: Int, val column: Int, val length: Int)

View file

@ -2,18 +2,37 @@ package parser
import lexer.Token import lexer.Token
import lexer.TokenType import lexer.TokenType
import parser.errors.ParsingError
import parser.errors.ParsingErrorType
import parser.state.ParserPosition
import prolog.ast.logic.Clause
import prolog.ast.logic.Fact
import prolog.ast.logic.Rule
import prolog.ast.terms.Atom import prolog.ast.terms.Atom
import prolog.ast.terms.Structure
import prolog.ast.terms.Term import prolog.ast.terms.Term
class Parser(private val tokens: List<Token>) { class Parser(private val tokens: List<Token>) {
private var position: Int = 0 private val position: ParserPosition = ParserPosition(0)
fun parse(): List<Term> { fun parse(): List<Term> {
val terms = mutableListOf<Term>() val terms = mutableListOf<Term>()
// TODO
while (hasNext()) { while (hasNext()) {
terms.add(parseTerm()) position.save()
var term: Term? = null
while (term == null) {
// Try each parser rule in order
}
require(term != null) {
ParsingError(ParsingErrorType.UNEXPECTED_TOKEN, "Expected a term", position)
}
terms.add(term)
} }
return terms return terms
@ -21,6 +40,7 @@ class Parser(private val tokens: List<Token>) {
/** /**
* Matches the current token with any of the expected types. * Matches the current token with any of the expected types.
* If it matches, it consumes the token and returns true.
* *
* @param types The list of expected token types. * @param types The list of expected token types.
* @return True if the current token matches any of the expected types, false otherwise. * @return True if the current token matches any of the expected types, false otherwise.
@ -46,71 +66,72 @@ class Parser(private val tokens: List<Token>) {
private fun hasNext(): Boolean { private fun hasNext(): Boolean {
// Check if the position is within the tokens list // Check if the position is within the tokens list
// TODO Check for EOF instead? // TODO Check for EOF instead?
return position < tokens.size return position.offset < tokens.size
} }
private fun peek(): Token { private fun peek(): Token {
// Peek should only be called if there is a next token require(hasNext()) { "Unexpected end of input" }
if (!hasNext()) {
throw Error("Unexpected end of input")
}
return tokens[position] return tokens[position.offset]
} }
private fun next(): Token { private fun next(): Token {
val token = peek() val token = peek()
position++ position.offset++
return token return token
} }
private fun previous(): Token { private fun previous(): Token {
// Previous should only be called if there is a previous token require(0 < position.offset) { "No previous token" }
if (position == 0) { return tokens[position.offset - 1]
throw Error("No previous token")
}
return tokens[position - 1]
} }
/* * * * * * /* * * * * *
* Parsers * * Parsers *
* * * * * */ * * * * * */
private fun parseTerm(): Term { private fun parseWithTry(parseRule: () -> Term): Term {
// TODO Variable try {
// TODO braced term return parseRule()
// TODO Integer Term } catch (e: Exception) {
// TODO Float term throw ParsingError(ParsingErrorType.UNEXPECTED_TOKEN, "Unexpected token", position)
// TODO Compound term }
// TODO Binary operator }
// TODO Unary operator
// TODO list term private fun parseClause(): Clause {
// TODO curly bracketed term return try {
return parseAtom() Fact(parseStructure())
} catch (e: Exception) {
Fact(parseAtom())
}
}
private fun parseStructure(): Structure {
val name = parseAtom()
val args = mutableListOf<Term>()
require(match(listOf(TokenType.PARENTHESIS_LEFT))) {
ParsingError(ParsingErrorType.UNEXPECTED_TOKEN, "Expected '(' after structure name", position)
}
// TODO Handle arguments
require(match(listOf(TokenType.PARENTHESIS_RIGHT))) {
ParsingError(ParsingErrorType.UNEXPECTED_TOKEN, "Expected ')' after structure arguments", position)
}
return Structure(name, args)
} }
private fun parseAtom(): Atom { private fun parseAtom(): Atom {
// TODO empty list
// TODO empty braces
return Atom(parseLetterDigit()) return Atom(parseLetterDigit())
// TODO graphic
// TODO quoted
// TODO double quoted
// TODO back quoted
// TODO semicolon
// TODO cut
} }
private fun parseLetterDigit(): String { private fun parseLetterDigit(): String {
// Check if the first character is a lowercase letter require(match(listOf(TokenType.ALPHANUMERIC)) && previous().value[0].isLowerCase()) {
if (match(listOf(TokenType.ALPHANUMERIC)) && previous().value[0].isLowerCase()) { ParsingError(ParsingErrorType.UNEXPECTED_TOKEN, "Expected lowercase letter", position)
return previous().value
} }
// TODO How to fix? return previous().value
return ""
} }
} }

View file

@ -0,0 +1,12 @@
package parser.errors
import parser.state.ParserPosition
class ParsingError(private val type: ParsingErrorType, override val message: String, private val position: ParserPosition) :
Throwable() {
override fun toString(): String {
return """
($position) ${type}: $message
""".trimIndent()
}
}

View file

@ -0,0 +1,7 @@
package parser.errors
enum class ParsingErrorType {
UNEXPECTED_TOKEN,
INTERNAL_ERROR,
}

View file

@ -0,0 +1,25 @@
package parser.state
import parser.errors.ParsingError
import parser.errors.ParsingErrorType
data class ParserPosition(var offset: Int) {
private val checkpoints: ArrayDeque<ParserPosition> = ArrayDeque()
fun save() {
checkpoints.addLast(this.copy())
}
fun reload() {
require(checkpoints.isNotEmpty()) {
ParsingError(ParsingErrorType.INTERNAL_ERROR, "No checkpoint to reload from", this)
}
val checkpoint = checkpoints.removeLast()
offset = checkpoint.offset
}
override fun toString(): String {
return "at $offset"
}
}

View file

@ -1,5 +1,6 @@
package lexer package lexer
import lexer.errors.LexingError
import org.junit.jupiter.api.Test import org.junit.jupiter.api.Test
import org.junit.jupiter.api.assertThrows import org.junit.jupiter.api.assertThrows
import kotlin.test.assertEquals import kotlin.test.assertEquals
@ -56,6 +57,6 @@ class ScanPrologTests {
@Test @Test
fun scan_variable_that_starts_with_a_number() { fun scan_variable_that_starts_with_a_number() {
assertThrows<Error> { Lexer("1X.").scan() } assertThrows<LexingError> { Lexer("1X.").scan() }
} }
} }

View file

@ -1,5 +1,6 @@
package lexer package lexer
import lexer.errors.LexingError
import org.junit.jupiter.api.Test import org.junit.jupiter.api.Test
import org.junit.jupiter.api.assertThrows import org.junit.jupiter.api.assertThrows
import org.junit.jupiter.api.Assertions.* import org.junit.jupiter.api.Assertions.*
@ -14,7 +15,7 @@ class ScanTests {
@Test @Test
fun scan_unknownSymbol_returns_Error() { fun scan_unknownSymbol_returns_Error() {
assertThrows<Error> { Lexer("").scan() } assertThrows<LexingError> { Lexer("").scan() }
} }
@Test @Test
@ -127,12 +128,12 @@ class ScanTests {
assertEquals(3, tokens.size) assertEquals(3, tokens.size)
assertEquals( assertEquals(
TokenType.LEFT_PARENTHESIS, TokenType.PARENTHESIS_LEFT,
tokens[0].type, tokens[0].type,
"Expected LEFT_PARENTHESES token, got ${tokens[0].type}" "Expected LEFT_PARENTHESES token, got ${tokens[0].type}"
) )
assertEquals( assertEquals(
TokenType.RIGHT_PARENTHESIS, TokenType.PARENTHESIS_RIGHT,
tokens[1].type, tokens[1].type,
"Expected RIGHT_PARENTHESES token, got ${tokens[1].type}" "Expected RIGHT_PARENTHESES token, got ${tokens[1].type}"
) )

View file

@ -1,11 +1,10 @@
package parser package parser
import lexer.Token import lexer.Token
import lexer.TokenPosition import lexer.state.TokenPosition
import lexer.TokenType import lexer.TokenType
import org.junit.jupiter.api.Assertions.assertEquals import org.junit.jupiter.api.Assertions.assertEquals
import org.junit.jupiter.api.Assertions.assertTrue import org.junit.jupiter.api.Assertions.assertTrue
import org.junit.jupiter.api.Disabled
import org.junit.jupiter.api.Test import org.junit.jupiter.api.Test
import prolog.ast.terms.Atom import prolog.ast.terms.Atom
import prolog.ast.terms.CompoundTerm import prolog.ast.terms.CompoundTerm
@ -73,4 +72,20 @@ class ParseTests {
assertEquals(1, result.size, "Expected 1 term") assertEquals(1, result.size, "Expected 1 term")
assertEquals(Atom(name), result[0], "Expected atom 'my_FooBar1'") assertEquals(Atom(name), result[0], "Expected atom 'my_FooBar1'")
} }
@Test
fun `parse compound term f()`() {
val input = listOf(
Token(TokenType.ALPHANUMERIC, "f", TokenPosition(0, 0, 1)),
Token(TokenType.PARENTHESIS_LEFT, "(", TokenPosition(0, 1, 2)),
Token(TokenType.PARENTHESIS_RIGHT, ")", TokenPosition(0, 3, 4))
)
val result = Parser(input).parse()
assertEquals(1, result.size, "Expected 1 term")
assertTrue(result[0] is CompoundTerm)
assertEquals("f", (result[0] as CompoundTerm).name)
assertEquals(0, (result[0] as CompoundTerm).arguments.size)
}
} }