feat(lexer): Scan alphanumerics & whitespace

This commit is contained in:
Tibo De Peuter 2025-03-27 17:25:58 +01:00
parent e0754650bc
commit e1f632ca40
Signed by: tdpeuter
GPG key ID: 38297DE43F75FFE2
5 changed files with 60 additions and 40 deletions

View file

@ -4,34 +4,24 @@ import java.util.LinkedList
class Lexer(private val source: String) {
private var tokens: List<Token> = LinkedList()
private val position: LexerPosition = LexerPosition(0, 0, 0)
private var offset: Int = 0
private val position = LexerPosition(0, 0, -1)
fun scan(): List<Token> {
while (hasNext()) {
tokens += scanToken()
val char: Char = peek()
tokens += when {
char == '.' -> scanDot()
char.isLetterOrDigit() -> scanAlphanumeric()
char.isWhitespace() -> { scanWhitespace(); continue }
else -> throw Error("Unknown symbol: $char", position)
}
}
position.length = 0
tokens += Token(TokenType.EOF, position)
tokens += Token(TokenType.EOF, "EOF", getPosition(0))
return tokens
}
private fun scanToken(): Token {
val char: Char = peek()
position.length = 1
return when {
char == '.' -> scanDot()
char.isLetterOrDigit() -> scanAlphanumeric()
else -> throw Error("Unknown symbol: $char", position)
}
}
private fun hasNext(): Boolean {
return offset < source.length
return position.offset < source.length
}
private fun peek(): Char {
@ -39,29 +29,45 @@ class Lexer(private val source: String) {
throw Error("Unexpected end of input", position)
}
return source[offset]
return source[position.offset]
}
private fun next(): Char {
val char = peek()
position.offset++
position.column++
return char
}
private fun getPosition(length: Int = 1): TokenPosition {
return TokenPosition(position.line, position.column, length)
}
// Scanners
private fun scanDot(): Token {
val token = Token(TokenType.DOT, position)
offset++
position.column++
return token
return Token(TokenType.DOT, next().toString(), getPosition(1))
}
private fun scanAlphanumeric(): Token {
val token = Token(TokenType.ALPHANUMERIC, position)
offset++
position.column++
var length = 0
var value = ""
while (hasNext() && peek().isLetterOrDigit()) {
offset++
position.column++
position.length++
value += next()
length++
}
return token
return Token(TokenType.ALPHANUMERIC, value, getPosition(length))
}
private fun scanWhitespace() {
while (hasNext() && peek().isWhitespace()) {
val char = next()
if (char == '\n') {
position.line++
position.column = 0
}
}
}
}

View file

@ -1,5 +1,3 @@
package lexer
class LexerPosition(val line: Int, var column: Int, var length: Int) {
// Do nothing
}
data class LexerPosition(var offset: Int, var line: Int, var column: Int)

View file

@ -1,8 +1,7 @@
package lexer
class Token(
data class Token(
val type: TokenType,
val position: LexerPosition
) {
// Do nothing
}
val value: String,
val position: TokenPosition
)

View file

@ -0,0 +1,3 @@
package lexer
data class TokenPosition(val line: Int, val column: Int, val length: Int)