feat(lexer): Parentheses and quoted strings
This commit is contained in:
parent
e1f632ca40
commit
dc9e43e9ba
4 changed files with 155 additions and 23 deletions
|
@ -10,7 +10,10 @@ class Lexer(private val source: String) {
|
||||||
while (hasNext()) {
|
while (hasNext()) {
|
||||||
val char: Char = peek()
|
val char: Char = peek()
|
||||||
tokens += when {
|
tokens += when {
|
||||||
char == '.' -> scanDot()
|
char == '(' -> scanSymbol(TokenType.LEFT_PARENTHESES)
|
||||||
|
char == ')' -> scanSymbol(TokenType.RIGHT_PARENTHESES)
|
||||||
|
char == '.' -> scanSymbol(TokenType.DOT)
|
||||||
|
char == '"' -> scanQuotedString()
|
||||||
char.isLetterOrDigit() -> scanAlphanumeric()
|
char.isLetterOrDigit() -> scanAlphanumeric()
|
||||||
char.isWhitespace() -> { scanWhitespace(); continue }
|
char.isWhitespace() -> { scanWhitespace(); continue }
|
||||||
else -> throw Error("Unknown symbol: $char", position)
|
else -> throw Error("Unknown symbol: $char", position)
|
||||||
|
@ -45,26 +48,39 @@ class Lexer(private val source: String) {
|
||||||
|
|
||||||
// Scanners
|
// Scanners
|
||||||
|
|
||||||
private fun scanDot(): Token {
|
private fun scanSymbol(tokenType: TokenType): Token {
|
||||||
return Token(TokenType.DOT, next().toString(), getPosition(1))
|
return Token(tokenType, next().toString(), getPosition(1))
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun scanAlphanumeric(): Token {
|
private fun scanAlphanumeric(): Token {
|
||||||
var length = 0
|
var length = 0
|
||||||
var value = ""
|
|
||||||
|
|
||||||
while (hasNext() && peek().isLetterOrDigit()) {
|
while (hasNext() && peek().isLetterOrDigit()) {
|
||||||
value += next()
|
next()
|
||||||
length++
|
length++
|
||||||
}
|
}
|
||||||
|
val value = source.substring(position.offset - length, position.offset)
|
||||||
|
return Token(TokenType.ALPHANUMERIC, value, getPosition(length))
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun scanQuotedString(): Token {
|
||||||
|
if (next() != '"') {
|
||||||
|
throw Error("Illegal state: Expected opening quote", position)
|
||||||
|
}
|
||||||
|
var length = 0
|
||||||
|
while (hasNext() && peek() != '"') {
|
||||||
|
next()
|
||||||
|
length++
|
||||||
|
}
|
||||||
|
if (next() != '"') {
|
||||||
|
throw Error("Illegal state: Expected closing quote", position)
|
||||||
|
}
|
||||||
|
val value = source.substring(position.offset - length - 1, position.offset - 1)
|
||||||
return Token(TokenType.ALPHANUMERIC, value, getPosition(length))
|
return Token(TokenType.ALPHANUMERIC, value, getPosition(length))
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun scanWhitespace() {
|
private fun scanWhitespace() {
|
||||||
while (hasNext() && peek().isWhitespace()) {
|
while (hasNext() && peek().isWhitespace()) {
|
||||||
val char = next()
|
if (next() == '\n') {
|
||||||
if (char == '\n') {
|
|
||||||
position.line++
|
position.line++
|
||||||
position.column = 0
|
position.column = 0
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,7 +3,10 @@ package lexer
|
||||||
enum class TokenType {
|
enum class TokenType {
|
||||||
ALPHANUMERIC,
|
ALPHANUMERIC,
|
||||||
|
|
||||||
|
LEFT_PARENTHESES, RIGHT_PARENTHESES,
|
||||||
DOT,
|
DOT,
|
||||||
|
|
||||||
|
// Operators
|
||||||
|
|
||||||
EOF
|
EOF
|
||||||
}
|
}
|
||||||
|
|
28
tests/lexer/LexerScanPrologTest.kt
Normal file
28
tests/lexer/LexerScanPrologTest.kt
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
package lexer
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.Test
|
||||||
|
import kotlin.test.assertEquals
|
||||||
|
|
||||||
|
class LexerScanPrologTest {
|
||||||
|
@Test
|
||||||
|
fun scan_simple_atom() {
|
||||||
|
val tokens = Lexer("atom.").scan()
|
||||||
|
|
||||||
|
assertEquals(3, tokens.size)
|
||||||
|
|
||||||
|
assertEquals(TokenType.ALPHANUMERIC, tokens[0].type, "Expected ALPHANUMERIC token, got ${tokens[0].type}")
|
||||||
|
assertEquals(TokenType.DOT, tokens[1].type, "Expected DOT token, got ${tokens[1].type}")
|
||||||
|
assertEquals(TokenType.EOF, tokens[2].type, "Expected EOF token, got ${tokens[2].type}")
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun scan_variable() {
|
||||||
|
val tokens = Lexer("X.").scan()
|
||||||
|
|
||||||
|
assertEquals(3, tokens.size)
|
||||||
|
|
||||||
|
assertEquals(TokenType.ALPHANUMERIC, tokens[0].type, "Expected ALPHANUMERIC token, got ${tokens[0].type}")
|
||||||
|
assertEquals(TokenType.DOT, tokens[1].type, "Expected DOT token, got ${tokens[1].type}")
|
||||||
|
assertEquals(TokenType.EOF, tokens[2].type, "Expected EOF token, got ${tokens[2].type}")
|
||||||
|
}
|
||||||
|
}
|
|
@ -5,29 +5,24 @@ import lexer.Lexer
|
||||||
import lexer.TokenType
|
import lexer.TokenType
|
||||||
import org.junit.jupiter.api.Test
|
import org.junit.jupiter.api.Test
|
||||||
import org.junit.jupiter.api.assertThrows
|
import org.junit.jupiter.api.assertThrows
|
||||||
import kotlin.test.assertEquals
|
import org.junit.jupiter.api.Assertions.*
|
||||||
|
|
||||||
class LexerScanTest {
|
class LexerScanTest {
|
||||||
@Test
|
@Test
|
||||||
fun scan_emptyString_returns_EOF() {
|
fun scan_emptyString_returns_EOF() {
|
||||||
val lexer = Lexer("")
|
val tokens = Lexer("").scan()
|
||||||
val tokens = lexer.scan()
|
|
||||||
assertEquals(1, tokens.size, "Expected 1 token, got ${tokens.size}")
|
assertEquals(1, tokens.size, "Expected 1 token, got ${tokens.size}")
|
||||||
assertEquals(TokenType.EOF, tokens[0].type, "Expected EOF token, got ${tokens[0].type}")
|
assertEquals(TokenType.EOF, tokens[0].type, "Expected EOF token, got ${tokens[0].type}")
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
fun scan_unknownSymbol_returns_Error() {
|
fun scan_unknownSymbol_returns_Error() {
|
||||||
val lexer = Lexer("€")
|
assertThrows<Error> { Lexer("€").scan() }
|
||||||
assertThrows<Error>({
|
|
||||||
val tokens = lexer.scan()
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
fun scan_dot_returns_Dot() {
|
fun scan_dot_returns_Dot() {
|
||||||
val lexer = Lexer(".")
|
val tokens = Lexer(".").scan()
|
||||||
val tokens = lexer.scan()
|
|
||||||
assertEquals(2, tokens.size)
|
assertEquals(2, tokens.size)
|
||||||
assertEquals(TokenType.DOT, tokens[0].type, "Expected DOT token, got ${tokens[0].type}")
|
assertEquals(TokenType.DOT, tokens[0].type, "Expected DOT token, got ${tokens[0].type}")
|
||||||
assertEquals(TokenType.EOF, tokens[1].type, "Expected EOF token, got ${tokens[1].type}")
|
assertEquals(TokenType.EOF, tokens[1].type, "Expected EOF token, got ${tokens[1].type}")
|
||||||
|
@ -35,8 +30,7 @@ class LexerScanTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
fun scan_two_dots_returns_two_dots() {
|
fun scan_two_dots_returns_two_dots() {
|
||||||
val lexer = Lexer("..")
|
val tokens = Lexer("..").scan()
|
||||||
val tokens = lexer.scan()
|
|
||||||
assertEquals(3, tokens.size)
|
assertEquals(3, tokens.size)
|
||||||
assertEquals(TokenType.DOT, tokens[0].type, "Expected DOT token, got ${tokens[0].type}")
|
assertEquals(TokenType.DOT, tokens[0].type, "Expected DOT token, got ${tokens[0].type}")
|
||||||
assertEquals(TokenType.DOT, tokens[1].type, "Expected DOT token, got ${tokens[1].type}")
|
assertEquals(TokenType.DOT, tokens[1].type, "Expected DOT token, got ${tokens[1].type}")
|
||||||
|
@ -45,8 +39,7 @@ class LexerScanTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
fun scan_letter_returns_letter() {
|
fun scan_letter_returns_letter() {
|
||||||
val lexer = Lexer("a")
|
val tokens = Lexer("a").scan()
|
||||||
val tokens = lexer.scan()
|
|
||||||
|
|
||||||
assertEquals(2, tokens.size)
|
assertEquals(2, tokens.size)
|
||||||
|
|
||||||
|
@ -74,7 +67,7 @@ class LexerScanTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
fun scan_whitespace_returns_nothing() {
|
fun scan_space_returns_nothing() {
|
||||||
val lexer = Lexer(" ")
|
val lexer = Lexer(" ")
|
||||||
val tokens = lexer.scan()
|
val tokens = lexer.scan()
|
||||||
|
|
||||||
|
@ -82,4 +75,96 @@ class LexerScanTest {
|
||||||
|
|
||||||
assertEquals(TokenType.EOF, tokens[0].type, "Expected EOF token, got ${tokens[0].type}")
|
assertEquals(TokenType.EOF, tokens[0].type, "Expected EOF token, got ${tokens[0].type}")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun scan_whitespace_various_returns_nothing() {
|
||||||
|
val lexer = Lexer(" \t\n\r")
|
||||||
|
val tokens = lexer.scan()
|
||||||
|
|
||||||
|
assertEquals(1, tokens.size)
|
||||||
|
|
||||||
|
assertEquals(TokenType.EOF, tokens[0].type, "Expected EOF token, got ${tokens[0].type}")
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun scan_separated_words() {
|
||||||
|
val tokens = Lexer("word1 word2").scan()
|
||||||
|
|
||||||
|
assertEquals(3, tokens.size, "Expected 3 tokens, got ${tokens.size}")
|
||||||
|
|
||||||
|
assertEquals(TokenType.ALPHANUMERIC, tokens[0].type, "Expected ALPHANUMERIC token, got ${tokens[0].type}")
|
||||||
|
assertEquals("word1", tokens[0].value, "Expected 'word1', got ${tokens[0].value}")
|
||||||
|
assertEquals(5, tokens[0].position.length, "Expected length 5, got ${tokens[0].position.length}")
|
||||||
|
|
||||||
|
assertEquals(TokenType.ALPHANUMERIC, tokens[1].type, "Expected ALPHANUMERIC token, got ${tokens[1].type}")
|
||||||
|
assertEquals("word2", tokens[1].value, "Expected 'word2', got ${tokens[1].value}")
|
||||||
|
assertEquals(5, tokens[1].position.length, "Expected length 5, got ${tokens[1].position.length}")
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun scan_multiline() {
|
||||||
|
val tokens = Lexer(
|
||||||
|
"""
|
||||||
|
word1
|
||||||
|
word2
|
||||||
|
""".trimIndent()
|
||||||
|
).scan()
|
||||||
|
|
||||||
|
assertEquals(3, tokens.size, "Expected 3 tokens, got ${tokens.size}")
|
||||||
|
|
||||||
|
assertEquals(TokenType.ALPHANUMERIC, tokens[0].type, "Expected ALPHANUMERIC token, got ${tokens[0].type}")
|
||||||
|
assertEquals("word1", tokens[0].value, "Expected 'word1', got ${tokens[0].value}")
|
||||||
|
assertEquals(5, tokens[0].position.length, "Expected length 5, got ${tokens[0].position.length}")
|
||||||
|
|
||||||
|
assertEquals(TokenType.ALPHANUMERIC, tokens[1].type, "Expected ALPHANUMERIC token, got ${tokens[1].type}")
|
||||||
|
assertEquals("word2", tokens[1].value, "Expected 'word2', got ${tokens[1].value}")
|
||||||
|
assertEquals(5, tokens[1].position.length, "Expected length 5, got ${tokens[1].position.length}")
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun scan_parenthesis_returns_parenthesis() {
|
||||||
|
val lexer = Lexer("()")
|
||||||
|
val tokens = lexer.scan()
|
||||||
|
|
||||||
|
assertEquals(3, tokens.size)
|
||||||
|
|
||||||
|
assertEquals(
|
||||||
|
TokenType.LEFT_PARENTHESES,
|
||||||
|
tokens[0].type,
|
||||||
|
"Expected LEFT_PARENTHESES token, got ${tokens[0].type}"
|
||||||
|
)
|
||||||
|
assertEquals(
|
||||||
|
TokenType.RIGHT_PARENTHESES,
|
||||||
|
tokens[1].type,
|
||||||
|
"Expected RIGHT_PARENTHESES token, got ${tokens[1].type}"
|
||||||
|
)
|
||||||
|
assertEquals(TokenType.EOF, tokens[2].type, "Expected EOF token, got ${tokens[2].type}")
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun scan_simple_quoted_string_returns_string() {
|
||||||
|
val lexer = Lexer("\"string\"")
|
||||||
|
val tokens = lexer.scan()
|
||||||
|
|
||||||
|
assertEquals(2, tokens.size)
|
||||||
|
|
||||||
|
assertEquals(TokenType.ALPHANUMERIC, tokens[0].type, "Expected ALPHANUMERIC token, got ${tokens[0].type}")
|
||||||
|
assertEquals(TokenType.EOF, tokens[1].type, "Expected EOF token, got ${tokens[1].type}")
|
||||||
|
|
||||||
|
assertEquals("string", tokens[0].value, "Expected 'string', got ${tokens[0].value}")
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun scan_quoted_string_with_space_returns_string() {
|
||||||
|
val lexer = Lexer("\"string with space\"")
|
||||||
|
val tokens = lexer.scan()
|
||||||
|
|
||||||
|
assertEquals(2, tokens.size)
|
||||||
|
|
||||||
|
assertEquals(TokenType.ALPHANUMERIC, tokens[0].type, "Expected ALPHANUMERIC token, got ${tokens[0].type}")
|
||||||
|
assertEquals(TokenType.EOF, tokens[1].type, "Expected EOF token, got ${tokens[1].type}")
|
||||||
|
|
||||||
|
assertEquals("string with space", tokens[0].value, "Expected 'string with space', got ${tokens[0].value}")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Reference in a new issue