feat(lexer): Comments
This commit is contained in:
parent
dc9e43e9ba
commit
8429733200
3 changed files with 61 additions and 3 deletions
|
@ -1,5 +1,5 @@
|
|||
package lexer
|
||||
|
||||
class Error(message: String, position: LexerPosition) : Exception("""
|
||||
Error at ${position.line}:${position.column}: $message
|
||||
${position.line}:${position.column + 1}: $message
|
||||
""".trimIndent())
|
|
@ -6,6 +6,10 @@ class Lexer(private val source: String) {
|
|||
private var tokens: List<Token> = LinkedList()
|
||||
private val position = LexerPosition(0, 0, -1)
|
||||
|
||||
/**
|
||||
* Scans the source code and returns a list of tokens.
|
||||
* @return List of [Token]s
|
||||
*/
|
||||
fun scan(): List<Token> {
|
||||
while (hasNext()) {
|
||||
val char: Char = peek()
|
||||
|
@ -14,8 +18,9 @@ class Lexer(private val source: String) {
|
|||
char == ')' -> scanSymbol(TokenType.RIGHT_PARENTHESES)
|
||||
char == '.' -> scanSymbol(TokenType.DOT)
|
||||
char == '"' -> scanQuotedString()
|
||||
char == '%' -> { scanComment(); continue }
|
||||
char.isLetterOrDigit() -> scanAlphanumeric()
|
||||
char.isWhitespace() -> { scanWhitespace(); continue }
|
||||
char.isWhitespace() -> { scanWhitespace(); continue }
|
||||
else -> throw Error("Unknown symbol: $char", position)
|
||||
}
|
||||
}
|
||||
|
@ -24,10 +29,12 @@ class Lexer(private val source: String) {
|
|||
}
|
||||
|
||||
private fun hasNext(): Boolean {
|
||||
// Check if the position is within the source length
|
||||
return position.offset < source.length
|
||||
}
|
||||
|
||||
private fun peek(): Char {
|
||||
// Peek should only be called if there is a next character
|
||||
if (!hasNext()) {
|
||||
throw Error("Unexpected end of input", position)
|
||||
}
|
||||
|
@ -36,6 +43,7 @@ class Lexer(private val source: String) {
|
|||
}
|
||||
|
||||
private fun next(): Char {
|
||||
// Advance the position and return the character
|
||||
val char = peek()
|
||||
position.offset++
|
||||
position.column++
|
||||
|
@ -43,16 +51,25 @@ class Lexer(private val source: String) {
|
|||
}
|
||||
|
||||
private fun getPosition(length: Int = 1): TokenPosition {
|
||||
// Return a new TokenPosition based on the current LexerPosition
|
||||
return TokenPosition(position.line, position.column, length)
|
||||
}
|
||||
|
||||
// Scanners
|
||||
/* * * * * * *
|
||||
* Scanners *
|
||||
* * * * * * */
|
||||
|
||||
/**
|
||||
* Scans a symbol token, given the expected [TokenType].
|
||||
* @param tokenType The expected [TokenType]
|
||||
* @return The scanned [Token]
|
||||
*/
|
||||
private fun scanSymbol(tokenType: TokenType): Token {
|
||||
return Token(tokenType, next().toString(), getPosition(1))
|
||||
}
|
||||
|
||||
private fun scanAlphanumeric(): Token {
|
||||
// Scan all alphanumeric characters
|
||||
var length = 0
|
||||
while (hasNext() && peek().isLetterOrDigit()) {
|
||||
next()
|
||||
|
@ -63,22 +80,40 @@ class Lexer(private val source: String) {
|
|||
}
|
||||
|
||||
private fun scanQuotedString(): Token {
|
||||
// "Assert" that the next character is the start of a quoted string
|
||||
if (next() != '"') {
|
||||
throw Error("Illegal state: Expected opening quote", position)
|
||||
}
|
||||
|
||||
var length = 0
|
||||
while (hasNext() && peek() != '"') {
|
||||
next()
|
||||
length++
|
||||
}
|
||||
|
||||
// "Assert" that the next character is the end of the quoted string
|
||||
if (next() != '"') {
|
||||
throw Error("Illegal state: Expected closing quote", position)
|
||||
}
|
||||
|
||||
val value = source.substring(position.offset - length - 1, position.offset - 1)
|
||||
return Token(TokenType.ALPHANUMERIC, value, getPosition(length))
|
||||
}
|
||||
|
||||
private fun scanComment() {
|
||||
// "Assert" that the next character is the start of a comment
|
||||
if (next() != '%') {
|
||||
throw Error("Illegal state: Expected opening comment", position)
|
||||
}
|
||||
|
||||
// Skip all characters until the end of the line
|
||||
while (hasNext() && peek() != '\n') {
|
||||
next()
|
||||
}
|
||||
}
|
||||
|
||||
private fun scanWhitespace() {
|
||||
// Skip all whitespace characters
|
||||
while (hasNext() && peek().isWhitespace()) {
|
||||
if (next() == '\n') {
|
||||
position.line++
|
||||
|
|
|
@ -167,4 +167,27 @@ class LexerScanTest {
|
|||
|
||||
assertEquals("string with space", tokens[0].value, "Expected 'string with space', got ${tokens[0].value}")
|
||||
}
|
||||
|
||||
@Test
|
||||
fun scan_comments_returns_nothing() {
|
||||
val lexer = Lexer("% comment")
|
||||
val tokens = lexer.scan()
|
||||
|
||||
assertEquals(1, tokens.size)
|
||||
|
||||
assertEquals(TokenType.EOF, tokens[0].type, "Expected EOF token, got ${tokens[0].type}")
|
||||
}
|
||||
|
||||
@Test
|
||||
fun scan_comment_and_sentence_returns_sentence() {
|
||||
val tokens = Lexer("""
|
||||
% comment
|
||||
sentence
|
||||
""".trimIndent()).scan()
|
||||
|
||||
assertEquals(2, tokens.size)
|
||||
|
||||
assertEquals(TokenType.ALPHANUMERIC, tokens[0].type, "Expected ALPHANUMERIC token, got ${tokens[0].type}")
|
||||
assertEquals("sentence", tokens[0].value, "Expected 'sentence', got ${tokens[0].value}")
|
||||
}
|
||||
}
|
||||
|
|
Reference in a new issue