Remove old lexer+parser implementation
This commit is contained in:
parent
d5632e9217
commit
a4ec29f084
15 changed files with 0 additions and 706 deletions
|
@ -1,127 +0,0 @@
|
|||
package lexer
|
||||
|
||||
import lexer.errors.LexingError
|
||||
import lexer.errors.LexingErrorType
|
||||
import lexer.state.LexerPosition
|
||||
import lexer.state.TokenPosition
|
||||
|
||||
class Lexer(private val source: String) {
|
||||
private var tokens: List<Token> = emptyList()
|
||||
private val position = LexerPosition(0, 0, -1)
|
||||
|
||||
/**
|
||||
* Scans the source code and returns a list of tokens.
|
||||
* @return List of [Token]s
|
||||
*/
|
||||
fun scan(): List<Token> {
|
||||
while (hasNext()) {
|
||||
val char: Char = peek()
|
||||
tokens += when {
|
||||
char == '(' -> scanSymbol(TokenType.PARENTHESIS_LEFT)
|
||||
char == ')' -> scanSymbol(TokenType.PARENTHESIS_RIGHT)
|
||||
char == '.' -> scanSymbol(TokenType.DOT)
|
||||
char == '"' -> scanQuotedString()
|
||||
char == '%' -> { scanComment(); continue }
|
||||
char.isLetterOrDigit() -> scanAlphanumeric()
|
||||
char.isWhitespace() -> { scanWhitespace(); continue }
|
||||
else -> throw LexingError(LexingErrorType.UNKNOWN_TOKEN, "Did not recognize $char", position)
|
||||
}
|
||||
}
|
||||
tokens += Token(TokenType.EOF, "EOF", getPosition(0))
|
||||
return tokens
|
||||
}
|
||||
|
||||
private fun hasNext(): Boolean {
|
||||
// Check if the position is within the source length
|
||||
return position.offset < source.length
|
||||
}
|
||||
|
||||
private fun peek(): Char {
|
||||
// Peek should only be called if there is a next character
|
||||
require(hasNext()) {
|
||||
LexingError(LexingErrorType.UNEXPECTED_END_OF_INPUT, "Expected additional character", position)
|
||||
}
|
||||
|
||||
return source[position.offset]
|
||||
}
|
||||
|
||||
private fun next(): Char {
|
||||
// Advance the position and return the character
|
||||
val char = peek()
|
||||
position.offset++
|
||||
position.column++
|
||||
return char
|
||||
}
|
||||
|
||||
private fun getPosition(length: Int = 1): TokenPosition {
|
||||
// Return a new TokenPosition based on the current LexerPosition
|
||||
return TokenPosition(position.line, position.column, length)
|
||||
}
|
||||
|
||||
/* * * * * * *
|
||||
* Scanners *
|
||||
* * * * * * */
|
||||
|
||||
/**
|
||||
* Scans a symbol token, given the expected [TokenType].
|
||||
* @param tokenType The expected [TokenType]
|
||||
* @return The scanned [Token]
|
||||
*/
|
||||
private fun scanSymbol(tokenType: TokenType): Token {
|
||||
return Token(tokenType, next().toString(), getPosition(1))
|
||||
}
|
||||
|
||||
private fun scanAlphanumeric(): Token {
|
||||
// Scan all alphanumeric characters
|
||||
var length = 0
|
||||
while (hasNext() && peek().isLetterOrDigit()) {
|
||||
next()
|
||||
length++
|
||||
}
|
||||
val value = source.substring(position.offset - length, position.offset)
|
||||
return Token(TokenType.ALPHANUMERIC, value, getPosition(length))
|
||||
}
|
||||
|
||||
private fun scanQuotedString(): Token {
|
||||
// "Assert" that the next character is the start of a quoted string
|
||||
require(next() == '"') {
|
||||
LexingError(LexingErrorType.UNEXPECTED_TOKEN, "Expected opening quote '('", position)
|
||||
}
|
||||
|
||||
var length = 0
|
||||
while (hasNext() && peek() != '"') {
|
||||
next()
|
||||
length++
|
||||
}
|
||||
|
||||
// "Assert" that the next character is the end of the quoted string
|
||||
require(next() == '"') {
|
||||
LexingError(LexingErrorType.UNEXPECTED_TOKEN, "Expected closing quote ')'", position)
|
||||
}
|
||||
|
||||
val value = source.substring(position.offset - length - 1, position.offset - 1)
|
||||
return Token(TokenType.ALPHANUMERIC, value, getPosition(length))
|
||||
}
|
||||
|
||||
private fun scanComment() {
|
||||
// "Assert" that the next character is the start of a comment
|
||||
require(next() == '%') {
|
||||
LexingError(LexingErrorType.UNEXPECTED_TOKEN, "Expected opening comment '%'", position)
|
||||
}
|
||||
|
||||
// Skip all characters until the end of the line
|
||||
while (hasNext() && peek() != '\n') {
|
||||
next()
|
||||
}
|
||||
}
|
||||
|
||||
private fun scanWhitespace() {
|
||||
// Skip all whitespace characters
|
||||
while (hasNext() && peek().isWhitespace()) {
|
||||
if (next() == '\n') {
|
||||
position.line++
|
||||
position.column = 0
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,9 +0,0 @@
|
|||
package lexer
|
||||
|
||||
import lexer.state.TokenPosition
|
||||
|
||||
data class Token(
|
||||
val type: TokenType,
|
||||
val value: String,
|
||||
val position: TokenPosition
|
||||
)
|
|
@ -1,15 +0,0 @@
|
|||
package lexer
|
||||
|
||||
enum class TokenType {
|
||||
ALPHANUMERIC,
|
||||
// TODO Replace with SMALL_LETTER, CAPITAL_LETTER, DIGIT, HEX_DIGIT, ... ?
|
||||
|
||||
// Structure
|
||||
COMMA,
|
||||
DOT,
|
||||
PARENTHESIS_LEFT, PARENTHESIS_RIGHT,
|
||||
|
||||
// Special
|
||||
|
||||
EOF
|
||||
}
|
|
@ -1,13 +0,0 @@
|
|||
package lexer.errors
|
||||
|
||||
import lexer.state.LexerPosition
|
||||
|
||||
data class LexingError(
|
||||
val type: LexingErrorType,
|
||||
override val message: String,
|
||||
val position: LexerPosition
|
||||
) : Throwable(
|
||||
"""
|
||||
${position.line}:${position.column + 1} ${type}: $message
|
||||
""".trimIndent()
|
||||
)
|
|
@ -1,7 +0,0 @@
|
|||
package lexer.errors
|
||||
|
||||
enum class LexingErrorType {
|
||||
UNKNOWN_TOKEN,
|
||||
UNEXPECTED_TOKEN,
|
||||
UNEXPECTED_END_OF_INPUT,
|
||||
}
|
|
@ -1,3 +0,0 @@
|
|||
package lexer.state
|
||||
|
||||
data class LexerPosition(var offset: Int, var line: Int, var column: Int)
|
|
@ -1,3 +0,0 @@
|
|||
package lexer.state
|
||||
|
||||
data class TokenPosition(val line: Int, val column: Int, val length: Int)
|
|
@ -1,137 +0,0 @@
|
|||
package parser
|
||||
|
||||
import lexer.Token
|
||||
import lexer.TokenType
|
||||
import parser.errors.ParsingError
|
||||
import parser.errors.ParsingErrorType
|
||||
import parser.state.ParserPosition
|
||||
import prolog.ast.logic.Clause
|
||||
import prolog.ast.logic.Fact
|
||||
import prolog.ast.logic.Rule
|
||||
import prolog.ast.terms.Atom
|
||||
import prolog.ast.terms.Structure
|
||||
import prolog.ast.terms.Term
|
||||
|
||||
class Parser(private val tokens: List<Token>) {
|
||||
private val position: ParserPosition = ParserPosition(0)
|
||||
|
||||
fun parse(): List<Term> {
|
||||
val terms = mutableListOf<Term>()
|
||||
|
||||
while (hasNext()) {
|
||||
position.save()
|
||||
|
||||
var term: Term? = null
|
||||
|
||||
while (term == null) {
|
||||
// Try each parser rule in order
|
||||
|
||||
}
|
||||
|
||||
require(term != null) {
|
||||
ParsingError(ParsingErrorType.UNEXPECTED_TOKEN, "Expected a term", position)
|
||||
}
|
||||
|
||||
terms.add(term)
|
||||
}
|
||||
|
||||
return terms
|
||||
}
|
||||
|
||||
/**
|
||||
* Matches the current token with any of the expected types.
|
||||
* If it matches, it consumes the token and returns true.
|
||||
*
|
||||
* @param types The list of expected token types.
|
||||
* @return True if the current token matches any of the expected types, false otherwise.
|
||||
*/
|
||||
private fun match(types: List<TokenType>): Boolean {
|
||||
for (type in types) {
|
||||
if (check(type)) {
|
||||
next()
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the current token matches the expected type.
|
||||
*/
|
||||
private fun check(type: TokenType): Boolean {
|
||||
return hasNext() && peek().type == type
|
||||
}
|
||||
|
||||
private fun hasNext(): Boolean {
|
||||
// Check if the position is within the tokens list
|
||||
// TODO Check for EOF instead?
|
||||
return position.offset < tokens.size
|
||||
}
|
||||
|
||||
private fun peek(): Token {
|
||||
require(hasNext()) { "Unexpected end of input" }
|
||||
|
||||
return tokens[position.offset]
|
||||
}
|
||||
|
||||
private fun next(): Token {
|
||||
val token = peek()
|
||||
position.offset++
|
||||
return token
|
||||
}
|
||||
|
||||
private fun previous(): Token {
|
||||
require(0 < position.offset) { "No previous token" }
|
||||
return tokens[position.offset - 1]
|
||||
}
|
||||
|
||||
/* * * * * *
|
||||
* Parsers *
|
||||
* * * * * */
|
||||
|
||||
private fun parseWithTry(parseRule: () -> Term): Term {
|
||||
try {
|
||||
return parseRule()
|
||||
} catch (e: Exception) {
|
||||
throw ParsingError(ParsingErrorType.UNEXPECTED_TOKEN, "Unexpected token", position)
|
||||
}
|
||||
}
|
||||
|
||||
private fun parseClause(): Clause {
|
||||
return try {
|
||||
Fact(parseStructure())
|
||||
} catch (e: Exception) {
|
||||
Fact(parseAtom())
|
||||
}
|
||||
}
|
||||
|
||||
private fun parseStructure(): Structure {
|
||||
val name = parseAtom()
|
||||
val args = mutableListOf<Term>()
|
||||
|
||||
require(match(listOf(TokenType.PARENTHESIS_LEFT))) {
|
||||
ParsingError(ParsingErrorType.UNEXPECTED_TOKEN, "Expected '(' after structure name", position)
|
||||
}
|
||||
|
||||
// TODO Handle arguments
|
||||
|
||||
require(match(listOf(TokenType.PARENTHESIS_RIGHT))) {
|
||||
ParsingError(ParsingErrorType.UNEXPECTED_TOKEN, "Expected ')' after structure arguments", position)
|
||||
}
|
||||
|
||||
return Structure(name, args)
|
||||
}
|
||||
|
||||
private fun parseAtom(): Atom {
|
||||
return Atom(parseLetterDigit())
|
||||
}
|
||||
|
||||
private fun parseLetterDigit(): String {
|
||||
require(match(listOf(TokenType.ALPHANUMERIC)) && previous().value[0].isLowerCase()) {
|
||||
ParsingError(ParsingErrorType.UNEXPECTED_TOKEN, "Expected lowercase letter", position)
|
||||
}
|
||||
|
||||
return previous().value
|
||||
}
|
||||
}
|
|
@ -1,12 +0,0 @@
|
|||
package parser.errors
|
||||
|
||||
import parser.state.ParserPosition
|
||||
|
||||
class ParsingError(private val type: ParsingErrorType, override val message: String, private val position: ParserPosition) :
|
||||
Throwable() {
|
||||
override fun toString(): String {
|
||||
return """
|
||||
($position) ${type}: $message
|
||||
""".trimIndent()
|
||||
}
|
||||
}
|
|
@ -1,7 +0,0 @@
|
|||
package parser.errors
|
||||
|
||||
enum class ParsingErrorType {
|
||||
UNEXPECTED_TOKEN,
|
||||
|
||||
INTERNAL_ERROR,
|
||||
}
|
|
@ -1,25 +0,0 @@
|
|||
package parser.state
|
||||
|
||||
import parser.errors.ParsingError
|
||||
import parser.errors.ParsingErrorType
|
||||
|
||||
data class ParserPosition(var offset: Int) {
|
||||
private val checkpoints: ArrayDeque<ParserPosition> = ArrayDeque()
|
||||
|
||||
fun save() {
|
||||
checkpoints.addLast(this.copy())
|
||||
}
|
||||
|
||||
fun reload() {
|
||||
require(checkpoints.isNotEmpty()) {
|
||||
ParsingError(ParsingErrorType.INTERNAL_ERROR, "No checkpoint to reload from", this)
|
||||
}
|
||||
|
||||
val checkpoint = checkpoints.removeLast()
|
||||
offset = checkpoint.offset
|
||||
}
|
||||
|
||||
override fun toString(): String {
|
||||
return "at $offset"
|
||||
}
|
||||
}
|
Reference in a new issue