feat(lexer): Comments

This commit is contained in:
Tibo De Peuter 2025-03-27 18:34:24 +01:00
parent dc9e43e9ba
commit 8429733200
Signed by: tdpeuter
GPG key ID: 38297DE43F75FFE2
3 changed files with 61 additions and 3 deletions

View file

@ -1,5 +1,5 @@
package lexer package lexer
class Error(message: String, position: LexerPosition) : Exception(""" class Error(message: String, position: LexerPosition) : Exception("""
Error at ${position.line}:${position.column}: $message ${position.line}:${position.column + 1}: $message
""".trimIndent()) """.trimIndent())

View file

@ -6,6 +6,10 @@ class Lexer(private val source: String) {
private var tokens: List<Token> = LinkedList() private var tokens: List<Token> = LinkedList()
private val position = LexerPosition(0, 0, -1) private val position = LexerPosition(0, 0, -1)
/**
* Scans the source code and returns a list of tokens.
* @return List of [Token]s
*/
fun scan(): List<Token> { fun scan(): List<Token> {
while (hasNext()) { while (hasNext()) {
val char: Char = peek() val char: Char = peek()
@ -14,8 +18,9 @@ class Lexer(private val source: String) {
char == ')' -> scanSymbol(TokenType.RIGHT_PARENTHESES) char == ')' -> scanSymbol(TokenType.RIGHT_PARENTHESES)
char == '.' -> scanSymbol(TokenType.DOT) char == '.' -> scanSymbol(TokenType.DOT)
char == '"' -> scanQuotedString() char == '"' -> scanQuotedString()
char == '%' -> { scanComment(); continue }
char.isLetterOrDigit() -> scanAlphanumeric() char.isLetterOrDigit() -> scanAlphanumeric()
char.isWhitespace() -> { scanWhitespace(); continue } char.isWhitespace() -> { scanWhitespace(); continue }
else -> throw Error("Unknown symbol: $char", position) else -> throw Error("Unknown symbol: $char", position)
} }
} }
@ -24,10 +29,12 @@ class Lexer(private val source: String) {
} }
private fun hasNext(): Boolean { private fun hasNext(): Boolean {
// Check if the position is within the source length
return position.offset < source.length return position.offset < source.length
} }
private fun peek(): Char { private fun peek(): Char {
// Peek should only be called if there is a next character
if (!hasNext()) { if (!hasNext()) {
throw Error("Unexpected end of input", position) throw Error("Unexpected end of input", position)
} }
@ -36,6 +43,7 @@ class Lexer(private val source: String) {
} }
private fun next(): Char { private fun next(): Char {
// Advance the position and return the character
val char = peek() val char = peek()
position.offset++ position.offset++
position.column++ position.column++
@ -43,16 +51,25 @@ class Lexer(private val source: String) {
} }
private fun getPosition(length: Int = 1): TokenPosition { private fun getPosition(length: Int = 1): TokenPosition {
// Return a new TokenPosition based on the current LexerPosition
return TokenPosition(position.line, position.column, length) return TokenPosition(position.line, position.column, length)
} }
// Scanners /* * * * * * *
* Scanners *
* * * * * * */
/**
* Scans a symbol token, given the expected [TokenType].
* @param tokenType The expected [TokenType]
* @return The scanned [Token]
*/
private fun scanSymbol(tokenType: TokenType): Token { private fun scanSymbol(tokenType: TokenType): Token {
return Token(tokenType, next().toString(), getPosition(1)) return Token(tokenType, next().toString(), getPosition(1))
} }
private fun scanAlphanumeric(): Token { private fun scanAlphanumeric(): Token {
// Scan all alphanumeric characters
var length = 0 var length = 0
while (hasNext() && peek().isLetterOrDigit()) { while (hasNext() && peek().isLetterOrDigit()) {
next() next()
@ -63,22 +80,40 @@ class Lexer(private val source: String) {
} }
private fun scanQuotedString(): Token { private fun scanQuotedString(): Token {
// "Assert" that the next character is the start of a quoted string
if (next() != '"') { if (next() != '"') {
throw Error("Illegal state: Expected opening quote", position) throw Error("Illegal state: Expected opening quote", position)
} }
var length = 0 var length = 0
while (hasNext() && peek() != '"') { while (hasNext() && peek() != '"') {
next() next()
length++ length++
} }
// "Assert" that the next character is the end of the quoted string
if (next() != '"') { if (next() != '"') {
throw Error("Illegal state: Expected closing quote", position) throw Error("Illegal state: Expected closing quote", position)
} }
val value = source.substring(position.offset - length - 1, position.offset - 1) val value = source.substring(position.offset - length - 1, position.offset - 1)
return Token(TokenType.ALPHANUMERIC, value, getPosition(length)) return Token(TokenType.ALPHANUMERIC, value, getPosition(length))
} }
private fun scanComment() {
// "Assert" that the next character is the start of a comment
if (next() != '%') {
throw Error("Illegal state: Expected opening comment", position)
}
// Skip all characters until the end of the line
while (hasNext() && peek() != '\n') {
next()
}
}
private fun scanWhitespace() { private fun scanWhitespace() {
// Skip all whitespace characters
while (hasNext() && peek().isWhitespace()) { while (hasNext() && peek().isWhitespace()) {
if (next() == '\n') { if (next() == '\n') {
position.line++ position.line++

View file

@ -167,4 +167,27 @@ class LexerScanTest {
assertEquals("string with space", tokens[0].value, "Expected 'string with space', got ${tokens[0].value}") assertEquals("string with space", tokens[0].value, "Expected 'string with space', got ${tokens[0].value}")
} }
@Test
fun scan_comments_returns_nothing() {
val lexer = Lexer("% comment")
val tokens = lexer.scan()
assertEquals(1, tokens.size)
assertEquals(TokenType.EOF, tokens[0].type, "Expected EOF token, got ${tokens[0].type}")
}
@Test
fun scan_comment_and_sentence_returns_sentence() {
val tokens = Lexer("""
% comment
sentence
""".trimIndent()).scan()
assertEquals(2, tokens.size)
assertEquals(TokenType.ALPHANUMERIC, tokens[0].type, "Expected ALPHANUMERIC token, got ${tokens[0].type}")
assertEquals("sentence", tokens[0].value, "Expected 'sentence', got ${tokens[0].value}")
}
} }