feat(lexer): Scan alphanumerics & whitespace
This commit is contained in:
parent
e0754650bc
commit
e1f632ca40
5 changed files with 60 additions and 40 deletions
|
@ -4,34 +4,24 @@ import java.util.LinkedList
|
|||
|
||||
class Lexer(private val source: String) {
|
||||
private var tokens: List<Token> = LinkedList()
|
||||
private val position: LexerPosition = LexerPosition(0, 0, 0)
|
||||
private var offset: Int = 0
|
||||
private val position = LexerPosition(0, 0, -1)
|
||||
|
||||
fun scan(): List<Token> {
|
||||
while (hasNext()) {
|
||||
tokens += scanToken()
|
||||
val char: Char = peek()
|
||||
tokens += when {
|
||||
char == '.' -> scanDot()
|
||||
char.isLetterOrDigit() -> scanAlphanumeric()
|
||||
char.isWhitespace() -> { scanWhitespace(); continue }
|
||||
else -> throw Error("Unknown symbol: $char", position)
|
||||
}
|
||||
}
|
||||
|
||||
position.length = 0
|
||||
tokens += Token(TokenType.EOF, position)
|
||||
|
||||
tokens += Token(TokenType.EOF, "EOF", getPosition(0))
|
||||
return tokens
|
||||
}
|
||||
|
||||
private fun scanToken(): Token {
|
||||
val char: Char = peek()
|
||||
|
||||
position.length = 1
|
||||
|
||||
return when {
|
||||
char == '.' -> scanDot()
|
||||
char.isLetterOrDigit() -> scanAlphanumeric()
|
||||
else -> throw Error("Unknown symbol: $char", position)
|
||||
}
|
||||
}
|
||||
|
||||
private fun hasNext(): Boolean {
|
||||
return offset < source.length
|
||||
return position.offset < source.length
|
||||
}
|
||||
|
||||
private fun peek(): Char {
|
||||
|
@ -39,29 +29,45 @@ class Lexer(private val source: String) {
|
|||
throw Error("Unexpected end of input", position)
|
||||
}
|
||||
|
||||
return source[offset]
|
||||
return source[position.offset]
|
||||
}
|
||||
|
||||
private fun next(): Char {
|
||||
val char = peek()
|
||||
position.offset++
|
||||
position.column++
|
||||
return char
|
||||
}
|
||||
|
||||
private fun getPosition(length: Int = 1): TokenPosition {
|
||||
return TokenPosition(position.line, position.column, length)
|
||||
}
|
||||
|
||||
// Scanners
|
||||
|
||||
private fun scanDot(): Token {
|
||||
val token = Token(TokenType.DOT, position)
|
||||
offset++
|
||||
position.column++
|
||||
return token
|
||||
return Token(TokenType.DOT, next().toString(), getPosition(1))
|
||||
}
|
||||
|
||||
private fun scanAlphanumeric(): Token {
|
||||
val token = Token(TokenType.ALPHANUMERIC, position)
|
||||
offset++
|
||||
position.column++
|
||||
var length = 0
|
||||
var value = ""
|
||||
|
||||
while (hasNext() && peek().isLetterOrDigit()) {
|
||||
offset++
|
||||
position.column++
|
||||
position.length++
|
||||
value += next()
|
||||
length++
|
||||
}
|
||||
|
||||
return token
|
||||
return Token(TokenType.ALPHANUMERIC, value, getPosition(length))
|
||||
}
|
||||
|
||||
private fun scanWhitespace() {
|
||||
while (hasNext() && peek().isWhitespace()) {
|
||||
val char = next()
|
||||
if (char == '\n') {
|
||||
position.line++
|
||||
position.column = 0
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,5 +1,3 @@
|
|||
package lexer
|
||||
|
||||
class LexerPosition(val line: Int, var column: Int, var length: Int) {
|
||||
// Do nothing
|
||||
}
|
||||
data class LexerPosition(var offset: Int, var line: Int, var column: Int)
|
||||
|
|
|
@ -1,8 +1,7 @@
|
|||
package lexer
|
||||
|
||||
class Token(
|
||||
data class Token(
|
||||
val type: TokenType,
|
||||
val position: LexerPosition
|
||||
) {
|
||||
// Do nothing
|
||||
}
|
||||
val value: String,
|
||||
val position: TokenPosition
|
||||
)
|
||||
|
|
3
src/lexer/TokenPosition.kt
Normal file
3
src/lexer/TokenPosition.kt
Normal file
|
@ -0,0 +1,3 @@
|
|||
package lexer
|
||||
|
||||
data class TokenPosition(val line: Int, val column: Int, val length: Int)
|
Reference in a new issue