feat(lexer): Scan alphanumerics & whitespace
This commit is contained in:
parent
e0754650bc
commit
e1f632ca40
5 changed files with 60 additions and 40 deletions
|
@ -4,34 +4,24 @@ import java.util.LinkedList
|
|||
|
||||
class Lexer(private val source: String) {
|
||||
private var tokens: List<Token> = LinkedList()
|
||||
private val position: LexerPosition = LexerPosition(0, 0, 0)
|
||||
private var offset: Int = 0
|
||||
private val position = LexerPosition(0, 0, -1)
|
||||
|
||||
fun scan(): List<Token> {
|
||||
while (hasNext()) {
|
||||
tokens += scanToken()
|
||||
val char: Char = peek()
|
||||
tokens += when {
|
||||
char == '.' -> scanDot()
|
||||
char.isLetterOrDigit() -> scanAlphanumeric()
|
||||
char.isWhitespace() -> { scanWhitespace(); continue }
|
||||
else -> throw Error("Unknown symbol: $char", position)
|
||||
}
|
||||
}
|
||||
|
||||
position.length = 0
|
||||
tokens += Token(TokenType.EOF, position)
|
||||
|
||||
tokens += Token(TokenType.EOF, "EOF", getPosition(0))
|
||||
return tokens
|
||||
}
|
||||
|
||||
private fun scanToken(): Token {
|
||||
val char: Char = peek()
|
||||
|
||||
position.length = 1
|
||||
|
||||
return when {
|
||||
char == '.' -> scanDot()
|
||||
char.isLetterOrDigit() -> scanAlphanumeric()
|
||||
else -> throw Error("Unknown symbol: $char", position)
|
||||
}
|
||||
}
|
||||
|
||||
private fun hasNext(): Boolean {
|
||||
return offset < source.length
|
||||
return position.offset < source.length
|
||||
}
|
||||
|
||||
private fun peek(): Char {
|
||||
|
@ -39,29 +29,45 @@ class Lexer(private val source: String) {
|
|||
throw Error("Unexpected end of input", position)
|
||||
}
|
||||
|
||||
return source[offset]
|
||||
return source[position.offset]
|
||||
}
|
||||
|
||||
private fun next(): Char {
|
||||
val char = peek()
|
||||
position.offset++
|
||||
position.column++
|
||||
return char
|
||||
}
|
||||
|
||||
private fun getPosition(length: Int = 1): TokenPosition {
|
||||
return TokenPosition(position.line, position.column, length)
|
||||
}
|
||||
|
||||
// Scanners
|
||||
|
||||
private fun scanDot(): Token {
|
||||
val token = Token(TokenType.DOT, position)
|
||||
offset++
|
||||
position.column++
|
||||
return token
|
||||
return Token(TokenType.DOT, next().toString(), getPosition(1))
|
||||
}
|
||||
|
||||
private fun scanAlphanumeric(): Token {
|
||||
val token = Token(TokenType.ALPHANUMERIC, position)
|
||||
offset++
|
||||
position.column++
|
||||
var length = 0
|
||||
var value = ""
|
||||
|
||||
while (hasNext() && peek().isLetterOrDigit()) {
|
||||
offset++
|
||||
position.column++
|
||||
position.length++
|
||||
value += next()
|
||||
length++
|
||||
}
|
||||
|
||||
return token
|
||||
return Token(TokenType.ALPHANUMERIC, value, getPosition(length))
|
||||
}
|
||||
|
||||
private fun scanWhitespace() {
|
||||
while (hasNext() && peek().isWhitespace()) {
|
||||
val char = next()
|
||||
if (char == '\n') {
|
||||
position.line++
|
||||
position.column = 0
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Reference in a new issue