feat(lexer): Comments
This commit is contained in:
parent
dc9e43e9ba
commit
8429733200
3 changed files with 61 additions and 3 deletions
|
@ -1,5 +1,5 @@
|
||||||
package lexer
|
package lexer
|
||||||
|
|
||||||
class Error(message: String, position: LexerPosition) : Exception("""
|
class Error(message: String, position: LexerPosition) : Exception("""
|
||||||
Error at ${position.line}:${position.column}: $message
|
${position.line}:${position.column + 1}: $message
|
||||||
""".trimIndent())
|
""".trimIndent())
|
|
@ -6,6 +6,10 @@ class Lexer(private val source: String) {
|
||||||
private var tokens: List<Token> = LinkedList()
|
private var tokens: List<Token> = LinkedList()
|
||||||
private val position = LexerPosition(0, 0, -1)
|
private val position = LexerPosition(0, 0, -1)
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Scans the source code and returns a list of tokens.
|
||||||
|
* @return List of [Token]s
|
||||||
|
*/
|
||||||
fun scan(): List<Token> {
|
fun scan(): List<Token> {
|
||||||
while (hasNext()) {
|
while (hasNext()) {
|
||||||
val char: Char = peek()
|
val char: Char = peek()
|
||||||
|
@ -14,8 +18,9 @@ class Lexer(private val source: String) {
|
||||||
char == ')' -> scanSymbol(TokenType.RIGHT_PARENTHESES)
|
char == ')' -> scanSymbol(TokenType.RIGHT_PARENTHESES)
|
||||||
char == '.' -> scanSymbol(TokenType.DOT)
|
char == '.' -> scanSymbol(TokenType.DOT)
|
||||||
char == '"' -> scanQuotedString()
|
char == '"' -> scanQuotedString()
|
||||||
|
char == '%' -> { scanComment(); continue }
|
||||||
char.isLetterOrDigit() -> scanAlphanumeric()
|
char.isLetterOrDigit() -> scanAlphanumeric()
|
||||||
char.isWhitespace() -> { scanWhitespace(); continue }
|
char.isWhitespace() -> { scanWhitespace(); continue }
|
||||||
else -> throw Error("Unknown symbol: $char", position)
|
else -> throw Error("Unknown symbol: $char", position)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -24,10 +29,12 @@ class Lexer(private val source: String) {
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun hasNext(): Boolean {
|
private fun hasNext(): Boolean {
|
||||||
|
// Check if the position is within the source length
|
||||||
return position.offset < source.length
|
return position.offset < source.length
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun peek(): Char {
|
private fun peek(): Char {
|
||||||
|
// Peek should only be called if there is a next character
|
||||||
if (!hasNext()) {
|
if (!hasNext()) {
|
||||||
throw Error("Unexpected end of input", position)
|
throw Error("Unexpected end of input", position)
|
||||||
}
|
}
|
||||||
|
@ -36,6 +43,7 @@ class Lexer(private val source: String) {
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun next(): Char {
|
private fun next(): Char {
|
||||||
|
// Advance the position and return the character
|
||||||
val char = peek()
|
val char = peek()
|
||||||
position.offset++
|
position.offset++
|
||||||
position.column++
|
position.column++
|
||||||
|
@ -43,16 +51,25 @@ class Lexer(private val source: String) {
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun getPosition(length: Int = 1): TokenPosition {
|
private fun getPosition(length: Int = 1): TokenPosition {
|
||||||
|
// Return a new TokenPosition based on the current LexerPosition
|
||||||
return TokenPosition(position.line, position.column, length)
|
return TokenPosition(position.line, position.column, length)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Scanners
|
/* * * * * * *
|
||||||
|
* Scanners *
|
||||||
|
* * * * * * */
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Scans a symbol token, given the expected [TokenType].
|
||||||
|
* @param tokenType The expected [TokenType]
|
||||||
|
* @return The scanned [Token]
|
||||||
|
*/
|
||||||
private fun scanSymbol(tokenType: TokenType): Token {
|
private fun scanSymbol(tokenType: TokenType): Token {
|
||||||
return Token(tokenType, next().toString(), getPosition(1))
|
return Token(tokenType, next().toString(), getPosition(1))
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun scanAlphanumeric(): Token {
|
private fun scanAlphanumeric(): Token {
|
||||||
|
// Scan all alphanumeric characters
|
||||||
var length = 0
|
var length = 0
|
||||||
while (hasNext() && peek().isLetterOrDigit()) {
|
while (hasNext() && peek().isLetterOrDigit()) {
|
||||||
next()
|
next()
|
||||||
|
@ -63,22 +80,40 @@ class Lexer(private val source: String) {
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun scanQuotedString(): Token {
|
private fun scanQuotedString(): Token {
|
||||||
|
// "Assert" that the next character is the start of a quoted string
|
||||||
if (next() != '"') {
|
if (next() != '"') {
|
||||||
throw Error("Illegal state: Expected opening quote", position)
|
throw Error("Illegal state: Expected opening quote", position)
|
||||||
}
|
}
|
||||||
|
|
||||||
var length = 0
|
var length = 0
|
||||||
while (hasNext() && peek() != '"') {
|
while (hasNext() && peek() != '"') {
|
||||||
next()
|
next()
|
||||||
length++
|
length++
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// "Assert" that the next character is the end of the quoted string
|
||||||
if (next() != '"') {
|
if (next() != '"') {
|
||||||
throw Error("Illegal state: Expected closing quote", position)
|
throw Error("Illegal state: Expected closing quote", position)
|
||||||
}
|
}
|
||||||
|
|
||||||
val value = source.substring(position.offset - length - 1, position.offset - 1)
|
val value = source.substring(position.offset - length - 1, position.offset - 1)
|
||||||
return Token(TokenType.ALPHANUMERIC, value, getPosition(length))
|
return Token(TokenType.ALPHANUMERIC, value, getPosition(length))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private fun scanComment() {
|
||||||
|
// "Assert" that the next character is the start of a comment
|
||||||
|
if (next() != '%') {
|
||||||
|
throw Error("Illegal state: Expected opening comment", position)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip all characters until the end of the line
|
||||||
|
while (hasNext() && peek() != '\n') {
|
||||||
|
next()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private fun scanWhitespace() {
|
private fun scanWhitespace() {
|
||||||
|
// Skip all whitespace characters
|
||||||
while (hasNext() && peek().isWhitespace()) {
|
while (hasNext() && peek().isWhitespace()) {
|
||||||
if (next() == '\n') {
|
if (next() == '\n') {
|
||||||
position.line++
|
position.line++
|
||||||
|
|
|
@ -167,4 +167,27 @@ class LexerScanTest {
|
||||||
|
|
||||||
assertEquals("string with space", tokens[0].value, "Expected 'string with space', got ${tokens[0].value}")
|
assertEquals("string with space", tokens[0].value, "Expected 'string with space', got ${tokens[0].value}")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun scan_comments_returns_nothing() {
|
||||||
|
val lexer = Lexer("% comment")
|
||||||
|
val tokens = lexer.scan()
|
||||||
|
|
||||||
|
assertEquals(1, tokens.size)
|
||||||
|
|
||||||
|
assertEquals(TokenType.EOF, tokens[0].type, "Expected EOF token, got ${tokens[0].type}")
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun scan_comment_and_sentence_returns_sentence() {
|
||||||
|
val tokens = Lexer("""
|
||||||
|
% comment
|
||||||
|
sentence
|
||||||
|
""".trimIndent()).scan()
|
||||||
|
|
||||||
|
assertEquals(2, tokens.size)
|
||||||
|
|
||||||
|
assertEquals(TokenType.ALPHANUMERIC, tokens[0].type, "Expected ALPHANUMERIC token, got ${tokens[0].type}")
|
||||||
|
assertEquals("sentence", tokens[0].value, "Expected 'sentence', got ${tokens[0].value}")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Reference in a new issue