feat(lexer): Parentheses and quoted strings
This commit is contained in:
parent
e1f632ca40
commit
dc9e43e9ba
4 changed files with 155 additions and 23 deletions
|
@ -10,7 +10,10 @@ class Lexer(private val source: String) {
|
|||
while (hasNext()) {
|
||||
val char: Char = peek()
|
||||
tokens += when {
|
||||
char == '.' -> scanDot()
|
||||
char == '(' -> scanSymbol(TokenType.LEFT_PARENTHESES)
|
||||
char == ')' -> scanSymbol(TokenType.RIGHT_PARENTHESES)
|
||||
char == '.' -> scanSymbol(TokenType.DOT)
|
||||
char == '"' -> scanQuotedString()
|
||||
char.isLetterOrDigit() -> scanAlphanumeric()
|
||||
char.isWhitespace() -> { scanWhitespace(); continue }
|
||||
else -> throw Error("Unknown symbol: $char", position)
|
||||
|
@ -45,26 +48,39 @@ class Lexer(private val source: String) {
|
|||
|
||||
// Scanners
|
||||
|
||||
private fun scanDot(): Token {
|
||||
return Token(TokenType.DOT, next().toString(), getPosition(1))
|
||||
private fun scanSymbol(tokenType: TokenType): Token {
|
||||
return Token(tokenType, next().toString(), getPosition(1))
|
||||
}
|
||||
|
||||
private fun scanAlphanumeric(): Token {
|
||||
var length = 0
|
||||
var value = ""
|
||||
|
||||
while (hasNext() && peek().isLetterOrDigit()) {
|
||||
value += next()
|
||||
next()
|
||||
length++
|
||||
}
|
||||
val value = source.substring(position.offset - length, position.offset)
|
||||
return Token(TokenType.ALPHANUMERIC, value, getPosition(length))
|
||||
}
|
||||
|
||||
private fun scanQuotedString(): Token {
|
||||
if (next() != '"') {
|
||||
throw Error("Illegal state: Expected opening quote", position)
|
||||
}
|
||||
var length = 0
|
||||
while (hasNext() && peek() != '"') {
|
||||
next()
|
||||
length++
|
||||
}
|
||||
if (next() != '"') {
|
||||
throw Error("Illegal state: Expected closing quote", position)
|
||||
}
|
||||
val value = source.substring(position.offset - length - 1, position.offset - 1)
|
||||
return Token(TokenType.ALPHANUMERIC, value, getPosition(length))
|
||||
}
|
||||
|
||||
private fun scanWhitespace() {
|
||||
while (hasNext() && peek().isWhitespace()) {
|
||||
val char = next()
|
||||
if (char == '\n') {
|
||||
if (next() == '\n') {
|
||||
position.line++
|
||||
position.column = 0
|
||||
}
|
||||
|
|
|
@ -3,7 +3,10 @@ package lexer
|
|||
enum class TokenType {
|
||||
ALPHANUMERIC,
|
||||
|
||||
LEFT_PARENTHESES, RIGHT_PARENTHESES,
|
||||
DOT,
|
||||
|
||||
// Operators
|
||||
|
||||
EOF
|
||||
}
|
||||
|
|
28
tests/lexer/LexerScanPrologTest.kt
Normal file
28
tests/lexer/LexerScanPrologTest.kt
Normal file
|
@ -0,0 +1,28 @@
|
|||
package lexer
|
||||
|
||||
import org.junit.jupiter.api.Test
|
||||
import kotlin.test.assertEquals
|
||||
|
||||
class LexerScanPrologTest {
|
||||
@Test
|
||||
fun scan_simple_atom() {
|
||||
val tokens = Lexer("atom.").scan()
|
||||
|
||||
assertEquals(3, tokens.size)
|
||||
|
||||
assertEquals(TokenType.ALPHANUMERIC, tokens[0].type, "Expected ALPHANUMERIC token, got ${tokens[0].type}")
|
||||
assertEquals(TokenType.DOT, tokens[1].type, "Expected DOT token, got ${tokens[1].type}")
|
||||
assertEquals(TokenType.EOF, tokens[2].type, "Expected EOF token, got ${tokens[2].type}")
|
||||
}
|
||||
|
||||
@Test
|
||||
fun scan_variable() {
|
||||
val tokens = Lexer("X.").scan()
|
||||
|
||||
assertEquals(3, tokens.size)
|
||||
|
||||
assertEquals(TokenType.ALPHANUMERIC, tokens[0].type, "Expected ALPHANUMERIC token, got ${tokens[0].type}")
|
||||
assertEquals(TokenType.DOT, tokens[1].type, "Expected DOT token, got ${tokens[1].type}")
|
||||
assertEquals(TokenType.EOF, tokens[2].type, "Expected EOF token, got ${tokens[2].type}")
|
||||
}
|
||||
}
|
|
@ -5,29 +5,24 @@ import lexer.Lexer
|
|||
import lexer.TokenType
|
||||
import org.junit.jupiter.api.Test
|
||||
import org.junit.jupiter.api.assertThrows
|
||||
import kotlin.test.assertEquals
|
||||
import org.junit.jupiter.api.Assertions.*
|
||||
|
||||
class LexerScanTest {
|
||||
@Test
|
||||
fun scan_emptyString_returns_EOF() {
|
||||
val lexer = Lexer("")
|
||||
val tokens = lexer.scan()
|
||||
val tokens = Lexer("").scan()
|
||||
assertEquals(1, tokens.size, "Expected 1 token, got ${tokens.size}")
|
||||
assertEquals(TokenType.EOF, tokens[0].type, "Expected EOF token, got ${tokens[0].type}")
|
||||
}
|
||||
|
||||
@Test
|
||||
fun scan_unknownSymbol_returns_Error() {
|
||||
val lexer = Lexer("€")
|
||||
assertThrows<Error>({
|
||||
val tokens = lexer.scan()
|
||||
})
|
||||
assertThrows<Error> { Lexer("€").scan() }
|
||||
}
|
||||
|
||||
@Test
|
||||
fun scan_dot_returns_Dot() {
|
||||
val lexer = Lexer(".")
|
||||
val tokens = lexer.scan()
|
||||
val tokens = Lexer(".").scan()
|
||||
assertEquals(2, tokens.size)
|
||||
assertEquals(TokenType.DOT, tokens[0].type, "Expected DOT token, got ${tokens[0].type}")
|
||||
assertEquals(TokenType.EOF, tokens[1].type, "Expected EOF token, got ${tokens[1].type}")
|
||||
|
@ -35,18 +30,16 @@ class LexerScanTest {
|
|||
|
||||
@Test
|
||||
fun scan_two_dots_returns_two_dots() {
|
||||
val lexer = Lexer("..")
|
||||
val tokens = lexer.scan()
|
||||
val tokens = Lexer("..").scan()
|
||||
assertEquals(3, tokens.size)
|
||||
assertEquals(TokenType.DOT, tokens[0].type, "Expected DOT token, got ${tokens[0].type}")
|
||||
assertEquals(TokenType.DOT, tokens[1].type, "Expected DOT token, got ${tokens[1].type}")
|
||||
assertEquals(TokenType.EOF, tokens[2].type, "Expected EOF token, got ${tokens[2].type}")
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
fun scan_letter_returns_letter() {
|
||||
val lexer = Lexer("a")
|
||||
val tokens = lexer.scan()
|
||||
val tokens = Lexer("a").scan()
|
||||
|
||||
assertEquals(2, tokens.size)
|
||||
|
||||
|
@ -74,7 +67,7 @@ class LexerScanTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
fun scan_whitespace_returns_nothing() {
|
||||
fun scan_space_returns_nothing() {
|
||||
val lexer = Lexer(" ")
|
||||
val tokens = lexer.scan()
|
||||
|
||||
|
@ -82,4 +75,96 @@ class LexerScanTest {
|
|||
|
||||
assertEquals(TokenType.EOF, tokens[0].type, "Expected EOF token, got ${tokens[0].type}")
|
||||
}
|
||||
|
||||
@Test
|
||||
fun scan_whitespace_various_returns_nothing() {
|
||||
val lexer = Lexer(" \t\n\r")
|
||||
val tokens = lexer.scan()
|
||||
|
||||
assertEquals(1, tokens.size)
|
||||
|
||||
assertEquals(TokenType.EOF, tokens[0].type, "Expected EOF token, got ${tokens[0].type}")
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
fun scan_separated_words() {
|
||||
val tokens = Lexer("word1 word2").scan()
|
||||
|
||||
assertEquals(3, tokens.size, "Expected 3 tokens, got ${tokens.size}")
|
||||
|
||||
assertEquals(TokenType.ALPHANUMERIC, tokens[0].type, "Expected ALPHANUMERIC token, got ${tokens[0].type}")
|
||||
assertEquals("word1", tokens[0].value, "Expected 'word1', got ${tokens[0].value}")
|
||||
assertEquals(5, tokens[0].position.length, "Expected length 5, got ${tokens[0].position.length}")
|
||||
|
||||
assertEquals(TokenType.ALPHANUMERIC, tokens[1].type, "Expected ALPHANUMERIC token, got ${tokens[1].type}")
|
||||
assertEquals("word2", tokens[1].value, "Expected 'word2', got ${tokens[1].value}")
|
||||
assertEquals(5, tokens[1].position.length, "Expected length 5, got ${tokens[1].position.length}")
|
||||
}
|
||||
|
||||
@Test
|
||||
fun scan_multiline() {
|
||||
val tokens = Lexer(
|
||||
"""
|
||||
word1
|
||||
word2
|
||||
""".trimIndent()
|
||||
).scan()
|
||||
|
||||
assertEquals(3, tokens.size, "Expected 3 tokens, got ${tokens.size}")
|
||||
|
||||
assertEquals(TokenType.ALPHANUMERIC, tokens[0].type, "Expected ALPHANUMERIC token, got ${tokens[0].type}")
|
||||
assertEquals("word1", tokens[0].value, "Expected 'word1', got ${tokens[0].value}")
|
||||
assertEquals(5, tokens[0].position.length, "Expected length 5, got ${tokens[0].position.length}")
|
||||
|
||||
assertEquals(TokenType.ALPHANUMERIC, tokens[1].type, "Expected ALPHANUMERIC token, got ${tokens[1].type}")
|
||||
assertEquals("word2", tokens[1].value, "Expected 'word2', got ${tokens[1].value}")
|
||||
assertEquals(5, tokens[1].position.length, "Expected length 5, got ${tokens[1].position.length}")
|
||||
}
|
||||
|
||||
@Test
|
||||
fun scan_parenthesis_returns_parenthesis() {
|
||||
val lexer = Lexer("()")
|
||||
val tokens = lexer.scan()
|
||||
|
||||
assertEquals(3, tokens.size)
|
||||
|
||||
assertEquals(
|
||||
TokenType.LEFT_PARENTHESES,
|
||||
tokens[0].type,
|
||||
"Expected LEFT_PARENTHESES token, got ${tokens[0].type}"
|
||||
)
|
||||
assertEquals(
|
||||
TokenType.RIGHT_PARENTHESES,
|
||||
tokens[1].type,
|
||||
"Expected RIGHT_PARENTHESES token, got ${tokens[1].type}"
|
||||
)
|
||||
assertEquals(TokenType.EOF, tokens[2].type, "Expected EOF token, got ${tokens[2].type}")
|
||||
}
|
||||
|
||||
@Test
|
||||
fun scan_simple_quoted_string_returns_string() {
|
||||
val lexer = Lexer("\"string\"")
|
||||
val tokens = lexer.scan()
|
||||
|
||||
assertEquals(2, tokens.size)
|
||||
|
||||
assertEquals(TokenType.ALPHANUMERIC, tokens[0].type, "Expected ALPHANUMERIC token, got ${tokens[0].type}")
|
||||
assertEquals(TokenType.EOF, tokens[1].type, "Expected EOF token, got ${tokens[1].type}")
|
||||
|
||||
assertEquals("string", tokens[0].value, "Expected 'string', got ${tokens[0].value}")
|
||||
}
|
||||
|
||||
@Test
|
||||
fun scan_quoted_string_with_space_returns_string() {
|
||||
val lexer = Lexer("\"string with space\"")
|
||||
val tokens = lexer.scan()
|
||||
|
||||
assertEquals(2, tokens.size)
|
||||
|
||||
assertEquals(TokenType.ALPHANUMERIC, tokens[0].type, "Expected ALPHANUMERIC token, got ${tokens[0].type}")
|
||||
assertEquals(TokenType.EOF, tokens[1].type, "Expected EOF token, got ${tokens[1].type}")
|
||||
|
||||
assertEquals("string with space", tokens[0].value, "Expected 'string with space', got ${tokens[0].value}")
|
||||
}
|
||||
}
|
||||
|
|
Reference in a new issue