feat(lexer): Parentheses and quoted strings

This commit is contained in:
Tibo De Peuter 2025-03-27 18:16:53 +01:00
parent e1f632ca40
commit dc9e43e9ba
Signed by: tdpeuter
GPG key ID: 38297DE43F75FFE2
4 changed files with 155 additions and 23 deletions

View file

@ -10,7 +10,10 @@ class Lexer(private val source: String) {
while (hasNext()) {
val char: Char = peek()
tokens += when {
char == '.' -> scanDot()
char == '(' -> scanSymbol(TokenType.LEFT_PARENTHESES)
char == ')' -> scanSymbol(TokenType.RIGHT_PARENTHESES)
char == '.' -> scanSymbol(TokenType.DOT)
char == '"' -> scanQuotedString()
char.isLetterOrDigit() -> scanAlphanumeric()
char.isWhitespace() -> { scanWhitespace(); continue }
else -> throw Error("Unknown symbol: $char", position)
@ -45,26 +48,39 @@ class Lexer(private val source: String) {
// Scanners
private fun scanDot(): Token {
return Token(TokenType.DOT, next().toString(), getPosition(1))
private fun scanSymbol(tokenType: TokenType): Token {
return Token(tokenType, next().toString(), getPosition(1))
}
private fun scanAlphanumeric(): Token {
var length = 0
var value = ""
while (hasNext() && peek().isLetterOrDigit()) {
value += next()
next()
length++
}
val value = source.substring(position.offset - length, position.offset)
return Token(TokenType.ALPHANUMERIC, value, getPosition(length))
}
private fun scanQuotedString(): Token {
if (next() != '"') {
throw Error("Illegal state: Expected opening quote", position)
}
var length = 0
while (hasNext() && peek() != '"') {
next()
length++
}
if (next() != '"') {
throw Error("Illegal state: Expected closing quote", position)
}
val value = source.substring(position.offset - length - 1, position.offset - 1)
return Token(TokenType.ALPHANUMERIC, value, getPosition(length))
}
private fun scanWhitespace() {
while (hasNext() && peek().isWhitespace()) {
val char = next()
if (char == '\n') {
if (next() == '\n') {
position.line++
position.column = 0
}

View file

@ -3,7 +3,10 @@ package lexer
enum class TokenType {
ALPHANUMERIC,
LEFT_PARENTHESES, RIGHT_PARENTHESES,
DOT,
// Operators
EOF
}

View file

@ -0,0 +1,28 @@
package lexer
import org.junit.jupiter.api.Test
import kotlin.test.assertEquals
class LexerScanPrologTest {
@Test
fun scan_simple_atom() {
val tokens = Lexer("atom.").scan()
assertEquals(3, tokens.size)
assertEquals(TokenType.ALPHANUMERIC, tokens[0].type, "Expected ALPHANUMERIC token, got ${tokens[0].type}")
assertEquals(TokenType.DOT, tokens[1].type, "Expected DOT token, got ${tokens[1].type}")
assertEquals(TokenType.EOF, tokens[2].type, "Expected EOF token, got ${tokens[2].type}")
}
@Test
fun scan_variable() {
val tokens = Lexer("X.").scan()
assertEquals(3, tokens.size)
assertEquals(TokenType.ALPHANUMERIC, tokens[0].type, "Expected ALPHANUMERIC token, got ${tokens[0].type}")
assertEquals(TokenType.DOT, tokens[1].type, "Expected DOT token, got ${tokens[1].type}")
assertEquals(TokenType.EOF, tokens[2].type, "Expected EOF token, got ${tokens[2].type}")
}
}

View file

@ -5,29 +5,24 @@ import lexer.Lexer
import lexer.TokenType
import org.junit.jupiter.api.Test
import org.junit.jupiter.api.assertThrows
import kotlin.test.assertEquals
import org.junit.jupiter.api.Assertions.*
class LexerScanTest {
@Test
fun scan_emptyString_returns_EOF() {
val lexer = Lexer("")
val tokens = lexer.scan()
val tokens = Lexer("").scan()
assertEquals(1, tokens.size, "Expected 1 token, got ${tokens.size}")
assertEquals(TokenType.EOF, tokens[0].type, "Expected EOF token, got ${tokens[0].type}")
}
@Test
fun scan_unknownSymbol_returns_Error() {
val lexer = Lexer("")
assertThrows<Error>({
val tokens = lexer.scan()
})
assertThrows<Error> { Lexer("").scan() }
}
@Test
fun scan_dot_returns_Dot() {
val lexer = Lexer(".")
val tokens = lexer.scan()
val tokens = Lexer(".").scan()
assertEquals(2, tokens.size)
assertEquals(TokenType.DOT, tokens[0].type, "Expected DOT token, got ${tokens[0].type}")
assertEquals(TokenType.EOF, tokens[1].type, "Expected EOF token, got ${tokens[1].type}")
@ -35,18 +30,16 @@ class LexerScanTest {
@Test
fun scan_two_dots_returns_two_dots() {
val lexer = Lexer("..")
val tokens = lexer.scan()
val tokens = Lexer("..").scan()
assertEquals(3, tokens.size)
assertEquals(TokenType.DOT, tokens[0].type, "Expected DOT token, got ${tokens[0].type}")
assertEquals(TokenType.DOT, tokens[1].type, "Expected DOT token, got ${tokens[1].type}")
assertEquals(TokenType.EOF, tokens[2].type, "Expected EOF token, got ${tokens[2].type}")
}
@Test
fun scan_letter_returns_letter() {
val lexer = Lexer("a")
val tokens = lexer.scan()
val tokens = Lexer("a").scan()
assertEquals(2, tokens.size)
@ -74,7 +67,7 @@ class LexerScanTest {
}
@Test
fun scan_whitespace_returns_nothing() {
fun scan_space_returns_nothing() {
val lexer = Lexer(" ")
val tokens = lexer.scan()
@ -82,4 +75,96 @@ class LexerScanTest {
assertEquals(TokenType.EOF, tokens[0].type, "Expected EOF token, got ${tokens[0].type}")
}
@Test
fun scan_whitespace_various_returns_nothing() {
val lexer = Lexer(" \t\n\r")
val tokens = lexer.scan()
assertEquals(1, tokens.size)
assertEquals(TokenType.EOF, tokens[0].type, "Expected EOF token, got ${tokens[0].type}")
}
@Test
fun scan_separated_words() {
val tokens = Lexer("word1 word2").scan()
assertEquals(3, tokens.size, "Expected 3 tokens, got ${tokens.size}")
assertEquals(TokenType.ALPHANUMERIC, tokens[0].type, "Expected ALPHANUMERIC token, got ${tokens[0].type}")
assertEquals("word1", tokens[0].value, "Expected 'word1', got ${tokens[0].value}")
assertEquals(5, tokens[0].position.length, "Expected length 5, got ${tokens[0].position.length}")
assertEquals(TokenType.ALPHANUMERIC, tokens[1].type, "Expected ALPHANUMERIC token, got ${tokens[1].type}")
assertEquals("word2", tokens[1].value, "Expected 'word2', got ${tokens[1].value}")
assertEquals(5, tokens[1].position.length, "Expected length 5, got ${tokens[1].position.length}")
}
@Test
fun scan_multiline() {
val tokens = Lexer(
"""
word1
word2
""".trimIndent()
).scan()
assertEquals(3, tokens.size, "Expected 3 tokens, got ${tokens.size}")
assertEquals(TokenType.ALPHANUMERIC, tokens[0].type, "Expected ALPHANUMERIC token, got ${tokens[0].type}")
assertEquals("word1", tokens[0].value, "Expected 'word1', got ${tokens[0].value}")
assertEquals(5, tokens[0].position.length, "Expected length 5, got ${tokens[0].position.length}")
assertEquals(TokenType.ALPHANUMERIC, tokens[1].type, "Expected ALPHANUMERIC token, got ${tokens[1].type}")
assertEquals("word2", tokens[1].value, "Expected 'word2', got ${tokens[1].value}")
assertEquals(5, tokens[1].position.length, "Expected length 5, got ${tokens[1].position.length}")
}
@Test
fun scan_parenthesis_returns_parenthesis() {
val lexer = Lexer("()")
val tokens = lexer.scan()
assertEquals(3, tokens.size)
assertEquals(
TokenType.LEFT_PARENTHESES,
tokens[0].type,
"Expected LEFT_PARENTHESES token, got ${tokens[0].type}"
)
assertEquals(
TokenType.RIGHT_PARENTHESES,
tokens[1].type,
"Expected RIGHT_PARENTHESES token, got ${tokens[1].type}"
)
assertEquals(TokenType.EOF, tokens[2].type, "Expected EOF token, got ${tokens[2].type}")
}
@Test
fun scan_simple_quoted_string_returns_string() {
val lexer = Lexer("\"string\"")
val tokens = lexer.scan()
assertEquals(2, tokens.size)
assertEquals(TokenType.ALPHANUMERIC, tokens[0].type, "Expected ALPHANUMERIC token, got ${tokens[0].type}")
assertEquals(TokenType.EOF, tokens[1].type, "Expected EOF token, got ${tokens[1].type}")
assertEquals("string", tokens[0].value, "Expected 'string', got ${tokens[0].value}")
}
@Test
fun scan_quoted_string_with_space_returns_string() {
val lexer = Lexer("\"string with space\"")
val tokens = lexer.scan()
assertEquals(2, tokens.size)
assertEquals(TokenType.ALPHANUMERIC, tokens[0].type, "Expected ALPHANUMERIC token, got ${tokens[0].type}")
assertEquals(TokenType.EOF, tokens[1].type, "Expected EOF token, got ${tokens[1].type}")
assertEquals("string with space", tokens[0].value, "Expected 'string with space', got ${tokens[0].value}")
}
}