feat(lexer): Scan alphanumerics & whitespace
This commit is contained in:
parent
e0754650bc
commit
e1f632ca40
5 changed files with 60 additions and 40 deletions
|
@ -4,34 +4,24 @@ import java.util.LinkedList
|
||||||
|
|
||||||
class Lexer(private val source: String) {
|
class Lexer(private val source: String) {
|
||||||
private var tokens: List<Token> = LinkedList()
|
private var tokens: List<Token> = LinkedList()
|
||||||
private val position: LexerPosition = LexerPosition(0, 0, 0)
|
private val position = LexerPosition(0, 0, -1)
|
||||||
private var offset: Int = 0
|
|
||||||
|
|
||||||
fun scan(): List<Token> {
|
fun scan(): List<Token> {
|
||||||
while (hasNext()) {
|
while (hasNext()) {
|
||||||
tokens += scanToken()
|
val char: Char = peek()
|
||||||
|
tokens += when {
|
||||||
|
char == '.' -> scanDot()
|
||||||
|
char.isLetterOrDigit() -> scanAlphanumeric()
|
||||||
|
char.isWhitespace() -> { scanWhitespace(); continue }
|
||||||
|
else -> throw Error("Unknown symbol: $char", position)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
tokens += Token(TokenType.EOF, "EOF", getPosition(0))
|
||||||
position.length = 0
|
|
||||||
tokens += Token(TokenType.EOF, position)
|
|
||||||
|
|
||||||
return tokens
|
return tokens
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun scanToken(): Token {
|
|
||||||
val char: Char = peek()
|
|
||||||
|
|
||||||
position.length = 1
|
|
||||||
|
|
||||||
return when {
|
|
||||||
char == '.' -> scanDot()
|
|
||||||
char.isLetterOrDigit() -> scanAlphanumeric()
|
|
||||||
else -> throw Error("Unknown symbol: $char", position)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun hasNext(): Boolean {
|
private fun hasNext(): Boolean {
|
||||||
return offset < source.length
|
return position.offset < source.length
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun peek(): Char {
|
private fun peek(): Char {
|
||||||
|
@ -39,29 +29,45 @@ class Lexer(private val source: String) {
|
||||||
throw Error("Unexpected end of input", position)
|
throw Error("Unexpected end of input", position)
|
||||||
}
|
}
|
||||||
|
|
||||||
return source[offset]
|
return source[position.offset]
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun next(): Char {
|
||||||
|
val char = peek()
|
||||||
|
position.offset++
|
||||||
|
position.column++
|
||||||
|
return char
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun getPosition(length: Int = 1): TokenPosition {
|
||||||
|
return TokenPosition(position.line, position.column, length)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Scanners
|
// Scanners
|
||||||
|
|
||||||
private fun scanDot(): Token {
|
private fun scanDot(): Token {
|
||||||
val token = Token(TokenType.DOT, position)
|
return Token(TokenType.DOT, next().toString(), getPosition(1))
|
||||||
offset++
|
|
||||||
position.column++
|
|
||||||
return token
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun scanAlphanumeric(): Token {
|
private fun scanAlphanumeric(): Token {
|
||||||
val token = Token(TokenType.ALPHANUMERIC, position)
|
var length = 0
|
||||||
offset++
|
var value = ""
|
||||||
position.column++
|
|
||||||
|
|
||||||
while (hasNext() && peek().isLetterOrDigit()) {
|
while (hasNext() && peek().isLetterOrDigit()) {
|
||||||
offset++
|
value += next()
|
||||||
position.column++
|
length++
|
||||||
position.length++
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return token
|
return Token(TokenType.ALPHANUMERIC, value, getPosition(length))
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun scanWhitespace() {
|
||||||
|
while (hasNext() && peek().isWhitespace()) {
|
||||||
|
val char = next()
|
||||||
|
if (char == '\n') {
|
||||||
|
position.line++
|
||||||
|
position.column = 0
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,3 @@
|
||||||
package lexer
|
package lexer
|
||||||
|
|
||||||
class LexerPosition(val line: Int, var column: Int, var length: Int) {
|
data class LexerPosition(var offset: Int, var line: Int, var column: Int)
|
||||||
// Do nothing
|
|
||||||
}
|
|
||||||
|
|
|
@ -1,8 +1,7 @@
|
||||||
package lexer
|
package lexer
|
||||||
|
|
||||||
class Token(
|
data class Token(
|
||||||
val type: TokenType,
|
val type: TokenType,
|
||||||
val position: LexerPosition
|
val value: String,
|
||||||
) {
|
val position: TokenPosition
|
||||||
// Do nothing
|
)
|
||||||
}
|
|
||||||
|
|
3
src/lexer/TokenPosition.kt
Normal file
3
src/lexer/TokenPosition.kt
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
package lexer
|
||||||
|
|
||||||
|
data class TokenPosition(val line: Int, val column: Int, val length: Int)
|
|
@ -53,6 +53,8 @@ class LexerScanTest {
|
||||||
assertEquals(TokenType.ALPHANUMERIC, tokens[0].type, "Expected ALPHANUMERIC token, got ${tokens[0].type}")
|
assertEquals(TokenType.ALPHANUMERIC, tokens[0].type, "Expected ALPHANUMERIC token, got ${tokens[0].type}")
|
||||||
assertEquals(TokenType.EOF, tokens[1].type, "Expected EOF token, got ${tokens[1].type}")
|
assertEquals(TokenType.EOF, tokens[1].type, "Expected EOF token, got ${tokens[1].type}")
|
||||||
|
|
||||||
|
assertEquals(0, tokens[0].position.line, "Expected line 0, got ${tokens[0].position.line}")
|
||||||
|
assertEquals(0, tokens[0].position.column, "Expected column 0, got ${tokens[0].position.column}")
|
||||||
assertEquals(1, tokens[0].position.length, "Expected length 1, got ${tokens[0].position.length}")
|
assertEquals(1, tokens[0].position.length, "Expected length 1, got ${tokens[0].position.length}")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -67,5 +69,17 @@ class LexerScanTest {
|
||||||
assertEquals(TokenType.EOF, tokens[1].type, "Expected EOF token, got ${tokens[1].type}")
|
assertEquals(TokenType.EOF, tokens[1].type, "Expected EOF token, got ${tokens[1].type}")
|
||||||
|
|
||||||
assertEquals(4, tokens[0].position.length, "Expected length 4, got ${tokens[0].position.length}")
|
assertEquals(4, tokens[0].position.length, "Expected length 4, got ${tokens[0].position.length}")
|
||||||
|
|
||||||
|
assertEquals("word", tokens[0].value, "Expected 'word', got ${tokens[0].value}")
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun scan_whitespace_returns_nothing() {
|
||||||
|
val lexer = Lexer(" ")
|
||||||
|
val tokens = lexer.scan()
|
||||||
|
|
||||||
|
assertEquals(1, tokens.size)
|
||||||
|
|
||||||
|
assertEquals(TokenType.EOF, tokens[0].type, "Expected EOF token, got ${tokens[0].type}")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Reference in a new issue