From e1f632ca40e024e4f18d5605c7c93e3e9ffc214c Mon Sep 17 00:00:00 2001 From: Tibo De Peuter Date: Thu, 27 Mar 2025 17:25:58 +0100 Subject: [PATCH] feat(lexer): Scan alphanumerics & whitespace --- src/lexer/Lexer.kt | 70 +++++++++++++++++++----------------- src/lexer/LexerPosition.kt | 4 +-- src/lexer/Token.kt | 9 +++-- src/lexer/TokenPosition.kt | 3 ++ tests/lexer/LexerScanTest.kt | 14 ++++++++ 5 files changed, 60 insertions(+), 40 deletions(-) create mode 100644 src/lexer/TokenPosition.kt diff --git a/src/lexer/Lexer.kt b/src/lexer/Lexer.kt index 8d0a1fa..eed0e5a 100644 --- a/src/lexer/Lexer.kt +++ b/src/lexer/Lexer.kt @@ -4,34 +4,24 @@ import java.util.LinkedList class Lexer(private val source: String) { private var tokens: List = LinkedList() - private val position: LexerPosition = LexerPosition(0, 0, 0) - private var offset: Int = 0 + private val position = LexerPosition(0, 0, -1) fun scan(): List { while (hasNext()) { - tokens += scanToken() + val char: Char = peek() + tokens += when { + char == '.' -> scanDot() + char.isLetterOrDigit() -> scanAlphanumeric() + char.isWhitespace() -> { scanWhitespace(); continue } + else -> throw Error("Unknown symbol: $char", position) + } } - - position.length = 0 - tokens += Token(TokenType.EOF, position) - + tokens += Token(TokenType.EOF, "EOF", getPosition(0)) return tokens } - private fun scanToken(): Token { - val char: Char = peek() - - position.length = 1 - - return when { - char == '.' -> scanDot() - char.isLetterOrDigit() -> scanAlphanumeric() - else -> throw Error("Unknown symbol: $char", position) - } - } - private fun hasNext(): Boolean { - return offset < source.length + return position.offset < source.length } private fun peek(): Char { @@ -39,29 +29,45 @@ class Lexer(private val source: String) { throw Error("Unexpected end of input", position) } - return source[offset] + return source[position.offset] + } + + private fun next(): Char { + val char = peek() + position.offset++ + position.column++ + return char + } + + private fun getPosition(length: Int = 1): TokenPosition { + return TokenPosition(position.line, position.column, length) } // Scanners private fun scanDot(): Token { - val token = Token(TokenType.DOT, position) - offset++ - position.column++ - return token + return Token(TokenType.DOT, next().toString(), getPosition(1)) } private fun scanAlphanumeric(): Token { - val token = Token(TokenType.ALPHANUMERIC, position) - offset++ - position.column++ + var length = 0 + var value = "" while (hasNext() && peek().isLetterOrDigit()) { - offset++ - position.column++ - position.length++ + value += next() + length++ } - return token + return Token(TokenType.ALPHANUMERIC, value, getPosition(length)) + } + + private fun scanWhitespace() { + while (hasNext() && peek().isWhitespace()) { + val char = next() + if (char == '\n') { + position.line++ + position.column = 0 + } + } } } diff --git a/src/lexer/LexerPosition.kt b/src/lexer/LexerPosition.kt index 49cce6c..6437cc3 100644 --- a/src/lexer/LexerPosition.kt +++ b/src/lexer/LexerPosition.kt @@ -1,5 +1,3 @@ package lexer -class LexerPosition(val line: Int, var column: Int, var length: Int) { - // Do nothing -} +data class LexerPosition(var offset: Int, var line: Int, var column: Int) diff --git a/src/lexer/Token.kt b/src/lexer/Token.kt index e7f1727..3841fa3 100644 --- a/src/lexer/Token.kt +++ b/src/lexer/Token.kt @@ -1,8 +1,7 @@ package lexer -class Token( +data class Token( val type: TokenType, - val position: LexerPosition -) { - // Do nothing -} + val value: String, + val position: TokenPosition +) diff --git a/src/lexer/TokenPosition.kt b/src/lexer/TokenPosition.kt new file mode 100644 index 0000000..5f8165e --- /dev/null +++ b/src/lexer/TokenPosition.kt @@ -0,0 +1,3 @@ +package lexer + +data class TokenPosition(val line: Int, val column: Int, val length: Int) diff --git a/tests/lexer/LexerScanTest.kt b/tests/lexer/LexerScanTest.kt index 8133800..ff375f6 100644 --- a/tests/lexer/LexerScanTest.kt +++ b/tests/lexer/LexerScanTest.kt @@ -53,6 +53,8 @@ class LexerScanTest { assertEquals(TokenType.ALPHANUMERIC, tokens[0].type, "Expected ALPHANUMERIC token, got ${tokens[0].type}") assertEquals(TokenType.EOF, tokens[1].type, "Expected EOF token, got ${tokens[1].type}") + assertEquals(0, tokens[0].position.line, "Expected line 0, got ${tokens[0].position.line}") + assertEquals(0, tokens[0].position.column, "Expected column 0, got ${tokens[0].position.column}") assertEquals(1, tokens[0].position.length, "Expected length 1, got ${tokens[0].position.length}") } @@ -67,5 +69,17 @@ class LexerScanTest { assertEquals(TokenType.EOF, tokens[1].type, "Expected EOF token, got ${tokens[1].type}") assertEquals(4, tokens[0].position.length, "Expected length 4, got ${tokens[0].position.length}") + + assertEquals("word", tokens[0].value, "Expected 'word', got ${tokens[0].value}") + } + + @Test + fun scan_whitespace_returns_nothing() { + val lexer = Lexer(" ") + val tokens = lexer.scan() + + assertEquals(1, tokens.size) + + assertEquals(TokenType.EOF, tokens[0].type, "Expected EOF token, got ${tokens[0].type}") } }