Checkpoint
This commit is contained in:
parent
e749f8c6cb
commit
48f94c30df
15 changed files with 175 additions and 67 deletions
|
@ -1,5 +0,0 @@
|
||||||
package lexer
|
|
||||||
|
|
||||||
class Error(message: String, position: LexerPosition) : Exception("""
|
|
||||||
${position.line}:${position.column + 1}: $message
|
|
||||||
""".trimIndent())
|
|
|
@ -1,5 +1,10 @@
|
||||||
package lexer
|
package lexer
|
||||||
|
|
||||||
|
import lexer.errors.LexingError
|
||||||
|
import lexer.errors.LexingErrorType
|
||||||
|
import lexer.state.LexerPosition
|
||||||
|
import lexer.state.TokenPosition
|
||||||
|
|
||||||
class Lexer(private val source: String) {
|
class Lexer(private val source: String) {
|
||||||
private var tokens: List<Token> = emptyList()
|
private var tokens: List<Token> = emptyList()
|
||||||
private val position = LexerPosition(0, 0, -1)
|
private val position = LexerPosition(0, 0, -1)
|
||||||
|
@ -12,14 +17,14 @@ class Lexer(private val source: String) {
|
||||||
while (hasNext()) {
|
while (hasNext()) {
|
||||||
val char: Char = peek()
|
val char: Char = peek()
|
||||||
tokens += when {
|
tokens += when {
|
||||||
char == '(' -> scanSymbol(TokenType.LEFT_PARENTHESIS)
|
char == '(' -> scanSymbol(TokenType.PARENTHESIS_LEFT)
|
||||||
char == ')' -> scanSymbol(TokenType.RIGHT_PARENTHESIS)
|
char == ')' -> scanSymbol(TokenType.PARENTHESIS_RIGHT)
|
||||||
char == '.' -> scanSymbol(TokenType.DOT)
|
char == '.' -> scanSymbol(TokenType.DOT)
|
||||||
char == '"' -> scanQuotedString()
|
char == '"' -> scanQuotedString()
|
||||||
char == '%' -> { scanComment(); continue }
|
char == '%' -> { scanComment(); continue }
|
||||||
char.isLetterOrDigit() -> scanAlphanumeric()
|
char.isLetterOrDigit() -> scanAlphanumeric()
|
||||||
char.isWhitespace() -> { scanWhitespace(); continue }
|
char.isWhitespace() -> { scanWhitespace(); continue }
|
||||||
else -> throw Error("Unknown symbol: $char", position)
|
else -> throw LexingError(LexingErrorType.UNKNOWN_TOKEN, "Did not recognize $char", position)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
tokens += Token(TokenType.EOF, "EOF", getPosition(0))
|
tokens += Token(TokenType.EOF, "EOF", getPosition(0))
|
||||||
|
@ -33,8 +38,8 @@ class Lexer(private val source: String) {
|
||||||
|
|
||||||
private fun peek(): Char {
|
private fun peek(): Char {
|
||||||
// Peek should only be called if there is a next character
|
// Peek should only be called if there is a next character
|
||||||
if (!hasNext()) {
|
require(hasNext()) {
|
||||||
throw Error("Unexpected end of input", position)
|
LexingError(LexingErrorType.UNEXPECTED_END_OF_INPUT, "Expected additional character", position)
|
||||||
}
|
}
|
||||||
|
|
||||||
return source[position.offset]
|
return source[position.offset]
|
||||||
|
@ -79,8 +84,8 @@ class Lexer(private val source: String) {
|
||||||
|
|
||||||
private fun scanQuotedString(): Token {
|
private fun scanQuotedString(): Token {
|
||||||
// "Assert" that the next character is the start of a quoted string
|
// "Assert" that the next character is the start of a quoted string
|
||||||
if (next() != '"') {
|
require(next() == '"') {
|
||||||
throw Error("Illegal state: Expected opening quote", position)
|
LexingError(LexingErrorType.UNEXPECTED_TOKEN, "Expected opening quote '('", position)
|
||||||
}
|
}
|
||||||
|
|
||||||
var length = 0
|
var length = 0
|
||||||
|
@ -90,8 +95,8 @@ class Lexer(private val source: String) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// "Assert" that the next character is the end of the quoted string
|
// "Assert" that the next character is the end of the quoted string
|
||||||
if (next() != '"') {
|
require(next() == '"') {
|
||||||
throw Error("Illegal state: Expected closing quote", position)
|
LexingError(LexingErrorType.UNEXPECTED_TOKEN, "Expected closing quote ')'", position)
|
||||||
}
|
}
|
||||||
|
|
||||||
val value = source.substring(position.offset - length - 1, position.offset - 1)
|
val value = source.substring(position.offset - length - 1, position.offset - 1)
|
||||||
|
@ -100,8 +105,8 @@ class Lexer(private val source: String) {
|
||||||
|
|
||||||
private fun scanComment() {
|
private fun scanComment() {
|
||||||
// "Assert" that the next character is the start of a comment
|
// "Assert" that the next character is the start of a comment
|
||||||
if (next() != '%') {
|
require(next() == '%') {
|
||||||
throw Error("Illegal state: Expected opening comment", position)
|
LexingError(LexingErrorType.UNEXPECTED_TOKEN, "Expected opening comment '%'", position)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Skip all characters until the end of the line
|
// Skip all characters until the end of the line
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
package lexer
|
package lexer
|
||||||
|
|
||||||
|
import lexer.state.TokenPosition
|
||||||
|
|
||||||
data class Token(
|
data class Token(
|
||||||
val type: TokenType,
|
val type: TokenType,
|
||||||
val value: String,
|
val value: String,
|
||||||
|
|
|
@ -4,8 +4,12 @@ enum class TokenType {
|
||||||
ALPHANUMERIC,
|
ALPHANUMERIC,
|
||||||
// TODO Replace with SMALL_LETTER, CAPITAL_LETTER, DIGIT, HEX_DIGIT, ... ?
|
// TODO Replace with SMALL_LETTER, CAPITAL_LETTER, DIGIT, HEX_DIGIT, ... ?
|
||||||
|
|
||||||
LEFT_PARENTHESIS, RIGHT_PARENTHESIS,
|
// Structure
|
||||||
|
COMMA,
|
||||||
DOT,
|
DOT,
|
||||||
|
PARENTHESIS_LEFT, PARENTHESIS_RIGHT,
|
||||||
|
|
||||||
|
// Special
|
||||||
|
|
||||||
EOF
|
EOF
|
||||||
}
|
}
|
||||||
|
|
13
src/lexer/errors/LexingError.kt
Normal file
13
src/lexer/errors/LexingError.kt
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
package lexer.errors
|
||||||
|
|
||||||
|
import lexer.state.LexerPosition
|
||||||
|
|
||||||
|
data class LexingError(
|
||||||
|
val type: LexingErrorType,
|
||||||
|
override val message: String,
|
||||||
|
val position: LexerPosition
|
||||||
|
) : Throwable(
|
||||||
|
"""
|
||||||
|
${position.line}:${position.column + 1} ${type}: $message
|
||||||
|
""".trimIndent()
|
||||||
|
)
|
7
src/lexer/errors/LexingErrorType.kt
Normal file
7
src/lexer/errors/LexingErrorType.kt
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
package lexer.errors
|
||||||
|
|
||||||
|
enum class LexingErrorType {
|
||||||
|
UNKNOWN_TOKEN,
|
||||||
|
UNEXPECTED_TOKEN,
|
||||||
|
UNEXPECTED_END_OF_INPUT,
|
||||||
|
}
|
|
@ -1,3 +1,3 @@
|
||||||
package lexer
|
package lexer.state
|
||||||
|
|
||||||
data class LexerPosition(var offset: Int, var line: Int, var column: Int)
|
data class LexerPosition(var offset: Int, var line: Int, var column: Int)
|
|
@ -1,3 +1,3 @@
|
||||||
package lexer
|
package lexer.state
|
||||||
|
|
||||||
data class TokenPosition(val line: Int, val column: Int, val length: Int)
|
data class TokenPosition(val line: Int, val column: Int, val length: Int)
|
|
@ -2,18 +2,37 @@ package parser
|
||||||
|
|
||||||
import lexer.Token
|
import lexer.Token
|
||||||
import lexer.TokenType
|
import lexer.TokenType
|
||||||
|
import parser.errors.ParsingError
|
||||||
|
import parser.errors.ParsingErrorType
|
||||||
|
import parser.state.ParserPosition
|
||||||
|
import prolog.ast.logic.Clause
|
||||||
|
import prolog.ast.logic.Fact
|
||||||
|
import prolog.ast.logic.Rule
|
||||||
import prolog.ast.terms.Atom
|
import prolog.ast.terms.Atom
|
||||||
|
import prolog.ast.terms.Structure
|
||||||
import prolog.ast.terms.Term
|
import prolog.ast.terms.Term
|
||||||
|
|
||||||
class Parser(private val tokens: List<Token>) {
|
class Parser(private val tokens: List<Token>) {
|
||||||
private var position: Int = 0
|
private val position: ParserPosition = ParserPosition(0)
|
||||||
|
|
||||||
fun parse(): List<Term> {
|
fun parse(): List<Term> {
|
||||||
val terms = mutableListOf<Term>()
|
val terms = mutableListOf<Term>()
|
||||||
|
|
||||||
// TODO
|
|
||||||
while (hasNext()) {
|
while (hasNext()) {
|
||||||
terms.add(parseTerm())
|
position.save()
|
||||||
|
|
||||||
|
var term: Term? = null
|
||||||
|
|
||||||
|
while (term == null) {
|
||||||
|
// Try each parser rule in order
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
require(term != null) {
|
||||||
|
ParsingError(ParsingErrorType.UNEXPECTED_TOKEN, "Expected a term", position)
|
||||||
|
}
|
||||||
|
|
||||||
|
terms.add(term)
|
||||||
}
|
}
|
||||||
|
|
||||||
return terms
|
return terms
|
||||||
|
@ -21,6 +40,7 @@ class Parser(private val tokens: List<Token>) {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Matches the current token with any of the expected types.
|
* Matches the current token with any of the expected types.
|
||||||
|
* If it matches, it consumes the token and returns true.
|
||||||
*
|
*
|
||||||
* @param types The list of expected token types.
|
* @param types The list of expected token types.
|
||||||
* @return True if the current token matches any of the expected types, false otherwise.
|
* @return True if the current token matches any of the expected types, false otherwise.
|
||||||
|
@ -46,71 +66,72 @@ class Parser(private val tokens: List<Token>) {
|
||||||
private fun hasNext(): Boolean {
|
private fun hasNext(): Boolean {
|
||||||
// Check if the position is within the tokens list
|
// Check if the position is within the tokens list
|
||||||
// TODO Check for EOF instead?
|
// TODO Check for EOF instead?
|
||||||
return position < tokens.size
|
return position.offset < tokens.size
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun peek(): Token {
|
private fun peek(): Token {
|
||||||
// Peek should only be called if there is a next token
|
require(hasNext()) { "Unexpected end of input" }
|
||||||
if (!hasNext()) {
|
|
||||||
throw Error("Unexpected end of input")
|
|
||||||
}
|
|
||||||
|
|
||||||
return tokens[position]
|
return tokens[position.offset]
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun next(): Token {
|
private fun next(): Token {
|
||||||
val token = peek()
|
val token = peek()
|
||||||
position++
|
position.offset++
|
||||||
return token
|
return token
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun previous(): Token {
|
private fun previous(): Token {
|
||||||
// Previous should only be called if there is a previous token
|
require(0 < position.offset) { "No previous token" }
|
||||||
if (position == 0) {
|
return tokens[position.offset - 1]
|
||||||
throw Error("No previous token")
|
|
||||||
}
|
|
||||||
|
|
||||||
return tokens[position - 1]
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* * * * * *
|
/* * * * * *
|
||||||
* Parsers *
|
* Parsers *
|
||||||
* * * * * */
|
* * * * * */
|
||||||
|
|
||||||
private fun parseTerm(): Term {
|
private fun parseWithTry(parseRule: () -> Term): Term {
|
||||||
// TODO Variable
|
try {
|
||||||
// TODO braced term
|
return parseRule()
|
||||||
// TODO Integer Term
|
} catch (e: Exception) {
|
||||||
// TODO Float term
|
throw ParsingError(ParsingErrorType.UNEXPECTED_TOKEN, "Unexpected token", position)
|
||||||
// TODO Compound term
|
}
|
||||||
// TODO Binary operator
|
}
|
||||||
// TODO Unary operator
|
|
||||||
// TODO list term
|
private fun parseClause(): Clause {
|
||||||
// TODO curly bracketed term
|
return try {
|
||||||
return parseAtom()
|
Fact(parseStructure())
|
||||||
|
} catch (e: Exception) {
|
||||||
|
Fact(parseAtom())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun parseStructure(): Structure {
|
||||||
|
val name = parseAtom()
|
||||||
|
val args = mutableListOf<Term>()
|
||||||
|
|
||||||
|
require(match(listOf(TokenType.PARENTHESIS_LEFT))) {
|
||||||
|
ParsingError(ParsingErrorType.UNEXPECTED_TOKEN, "Expected '(' after structure name", position)
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO Handle arguments
|
||||||
|
|
||||||
|
require(match(listOf(TokenType.PARENTHESIS_RIGHT))) {
|
||||||
|
ParsingError(ParsingErrorType.UNEXPECTED_TOKEN, "Expected ')' after structure arguments", position)
|
||||||
|
}
|
||||||
|
|
||||||
|
return Structure(name, args)
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun parseAtom(): Atom {
|
private fun parseAtom(): Atom {
|
||||||
// TODO empty list
|
|
||||||
// TODO empty braces
|
|
||||||
|
|
||||||
return Atom(parseLetterDigit())
|
return Atom(parseLetterDigit())
|
||||||
|
|
||||||
// TODO graphic
|
|
||||||
// TODO quoted
|
|
||||||
// TODO double quoted
|
|
||||||
// TODO back quoted
|
|
||||||
// TODO semicolon
|
|
||||||
// TODO cut
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun parseLetterDigit(): String {
|
private fun parseLetterDigit(): String {
|
||||||
// Check if the first character is a lowercase letter
|
require(match(listOf(TokenType.ALPHANUMERIC)) && previous().value[0].isLowerCase()) {
|
||||||
if (match(listOf(TokenType.ALPHANUMERIC)) && previous().value[0].isLowerCase()) {
|
ParsingError(ParsingErrorType.UNEXPECTED_TOKEN, "Expected lowercase letter", position)
|
||||||
return previous().value
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO How to fix?
|
return previous().value
|
||||||
return ""
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
12
src/parser/errors/ParsingError.kt
Normal file
12
src/parser/errors/ParsingError.kt
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
package parser.errors
|
||||||
|
|
||||||
|
import parser.state.ParserPosition
|
||||||
|
|
||||||
|
class ParsingError(private val type: ParsingErrorType, override val message: String, private val position: ParserPosition) :
|
||||||
|
Throwable() {
|
||||||
|
override fun toString(): String {
|
||||||
|
return """
|
||||||
|
($position) ${type}: $message
|
||||||
|
""".trimIndent()
|
||||||
|
}
|
||||||
|
}
|
7
src/parser/errors/ParsingErrorType.kt
Normal file
7
src/parser/errors/ParsingErrorType.kt
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
package parser.errors
|
||||||
|
|
||||||
|
enum class ParsingErrorType {
|
||||||
|
UNEXPECTED_TOKEN,
|
||||||
|
|
||||||
|
INTERNAL_ERROR,
|
||||||
|
}
|
25
src/parser/state/ParserPosition.kt
Normal file
25
src/parser/state/ParserPosition.kt
Normal file
|
@ -0,0 +1,25 @@
|
||||||
|
package parser.state
|
||||||
|
|
||||||
|
import parser.errors.ParsingError
|
||||||
|
import parser.errors.ParsingErrorType
|
||||||
|
|
||||||
|
data class ParserPosition(var offset: Int) {
|
||||||
|
private val checkpoints: ArrayDeque<ParserPosition> = ArrayDeque()
|
||||||
|
|
||||||
|
fun save() {
|
||||||
|
checkpoints.addLast(this.copy())
|
||||||
|
}
|
||||||
|
|
||||||
|
fun reload() {
|
||||||
|
require(checkpoints.isNotEmpty()) {
|
||||||
|
ParsingError(ParsingErrorType.INTERNAL_ERROR, "No checkpoint to reload from", this)
|
||||||
|
}
|
||||||
|
|
||||||
|
val checkpoint = checkpoints.removeLast()
|
||||||
|
offset = checkpoint.offset
|
||||||
|
}
|
||||||
|
|
||||||
|
override fun toString(): String {
|
||||||
|
return "at $offset"
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,5 +1,6 @@
|
||||||
package lexer
|
package lexer
|
||||||
|
|
||||||
|
import lexer.errors.LexingError
|
||||||
import org.junit.jupiter.api.Test
|
import org.junit.jupiter.api.Test
|
||||||
import org.junit.jupiter.api.assertThrows
|
import org.junit.jupiter.api.assertThrows
|
||||||
import kotlin.test.assertEquals
|
import kotlin.test.assertEquals
|
||||||
|
@ -56,6 +57,6 @@ class ScanPrologTests {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
fun scan_variable_that_starts_with_a_number() {
|
fun scan_variable_that_starts_with_a_number() {
|
||||||
assertThrows<Error> { Lexer("1X.").scan() }
|
assertThrows<LexingError> { Lexer("1X.").scan() }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
package lexer
|
package lexer
|
||||||
|
|
||||||
|
import lexer.errors.LexingError
|
||||||
import org.junit.jupiter.api.Test
|
import org.junit.jupiter.api.Test
|
||||||
import org.junit.jupiter.api.assertThrows
|
import org.junit.jupiter.api.assertThrows
|
||||||
import org.junit.jupiter.api.Assertions.*
|
import org.junit.jupiter.api.Assertions.*
|
||||||
|
@ -14,7 +15,7 @@ class ScanTests {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
fun scan_unknownSymbol_returns_Error() {
|
fun scan_unknownSymbol_returns_Error() {
|
||||||
assertThrows<Error> { Lexer("€").scan() }
|
assertThrows<LexingError> { Lexer("€").scan() }
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -127,12 +128,12 @@ class ScanTests {
|
||||||
assertEquals(3, tokens.size)
|
assertEquals(3, tokens.size)
|
||||||
|
|
||||||
assertEquals(
|
assertEquals(
|
||||||
TokenType.LEFT_PARENTHESIS,
|
TokenType.PARENTHESIS_LEFT,
|
||||||
tokens[0].type,
|
tokens[0].type,
|
||||||
"Expected LEFT_PARENTHESES token, got ${tokens[0].type}"
|
"Expected LEFT_PARENTHESES token, got ${tokens[0].type}"
|
||||||
)
|
)
|
||||||
assertEquals(
|
assertEquals(
|
||||||
TokenType.RIGHT_PARENTHESIS,
|
TokenType.PARENTHESIS_RIGHT,
|
||||||
tokens[1].type,
|
tokens[1].type,
|
||||||
"Expected RIGHT_PARENTHESES token, got ${tokens[1].type}"
|
"Expected RIGHT_PARENTHESES token, got ${tokens[1].type}"
|
||||||
)
|
)
|
||||||
|
|
|
@ -1,11 +1,10 @@
|
||||||
package parser
|
package parser
|
||||||
|
|
||||||
import lexer.Token
|
import lexer.Token
|
||||||
import lexer.TokenPosition
|
import lexer.state.TokenPosition
|
||||||
import lexer.TokenType
|
import lexer.TokenType
|
||||||
import org.junit.jupiter.api.Assertions.assertEquals
|
import org.junit.jupiter.api.Assertions.assertEquals
|
||||||
import org.junit.jupiter.api.Assertions.assertTrue
|
import org.junit.jupiter.api.Assertions.assertTrue
|
||||||
import org.junit.jupiter.api.Disabled
|
|
||||||
import org.junit.jupiter.api.Test
|
import org.junit.jupiter.api.Test
|
||||||
import prolog.ast.terms.Atom
|
import prolog.ast.terms.Atom
|
||||||
import prolog.ast.terms.CompoundTerm
|
import prolog.ast.terms.CompoundTerm
|
||||||
|
@ -73,4 +72,20 @@ class ParseTests {
|
||||||
assertEquals(1, result.size, "Expected 1 term")
|
assertEquals(1, result.size, "Expected 1 term")
|
||||||
assertEquals(Atom(name), result[0], "Expected atom 'my_FooBar1'")
|
assertEquals(Atom(name), result[0], "Expected atom 'my_FooBar1'")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun `parse compound term f()`() {
|
||||||
|
val input = listOf(
|
||||||
|
Token(TokenType.ALPHANUMERIC, "f", TokenPosition(0, 0, 1)),
|
||||||
|
Token(TokenType.PARENTHESIS_LEFT, "(", TokenPosition(0, 1, 2)),
|
||||||
|
Token(TokenType.PARENTHESIS_RIGHT, ")", TokenPosition(0, 3, 4))
|
||||||
|
)
|
||||||
|
|
||||||
|
val result = Parser(input).parse()
|
||||||
|
|
||||||
|
assertEquals(1, result.size, "Expected 1 term")
|
||||||
|
assertTrue(result[0] is CompoundTerm)
|
||||||
|
assertEquals("f", (result[0] as CompoundTerm).name)
|
||||||
|
assertEquals(0, (result[0] as CompoundTerm).arguments.size)
|
||||||
|
}
|
||||||
}
|
}
|
Reference in a new issue