73 lines
1.8 KiB
Kotlin
73 lines
1.8 KiB
Kotlin
package lexer
|
|
|
|
import java.util.LinkedList
|
|
|
|
class Lexer(private val source: String) {
|
|
private var tokens: List<Token> = LinkedList()
|
|
private val position = LexerPosition(0, 0, -1)
|
|
|
|
fun scan(): List<Token> {
|
|
while (hasNext()) {
|
|
val char: Char = peek()
|
|
tokens += when {
|
|
char == '.' -> scanDot()
|
|
char.isLetterOrDigit() -> scanAlphanumeric()
|
|
char.isWhitespace() -> { scanWhitespace(); continue }
|
|
else -> throw Error("Unknown symbol: $char", position)
|
|
}
|
|
}
|
|
tokens += Token(TokenType.EOF, "EOF", getPosition(0))
|
|
return tokens
|
|
}
|
|
|
|
private fun hasNext(): Boolean {
|
|
return position.offset < source.length
|
|
}
|
|
|
|
private fun peek(): Char {
|
|
if (!hasNext()) {
|
|
throw Error("Unexpected end of input", position)
|
|
}
|
|
|
|
return source[position.offset]
|
|
}
|
|
|
|
private fun next(): Char {
|
|
val char = peek()
|
|
position.offset++
|
|
position.column++
|
|
return char
|
|
}
|
|
|
|
private fun getPosition(length: Int = 1): TokenPosition {
|
|
return TokenPosition(position.line, position.column, length)
|
|
}
|
|
|
|
// Scanners
|
|
|
|
private fun scanDot(): Token {
|
|
return Token(TokenType.DOT, next().toString(), getPosition(1))
|
|
}
|
|
|
|
private fun scanAlphanumeric(): Token {
|
|
var length = 0
|
|
var value = ""
|
|
|
|
while (hasNext() && peek().isLetterOrDigit()) {
|
|
value += next()
|
|
length++
|
|
}
|
|
|
|
return Token(TokenType.ALPHANUMERIC, value, getPosition(length))
|
|
}
|
|
|
|
private fun scanWhitespace() {
|
|
while (hasNext() && peek().isWhitespace()) {
|
|
val char = next()
|
|
if (char == '\n') {
|
|
position.line++
|
|
position.column = 0
|
|
}
|
|
}
|
|
}
|
|
}
|