diff --git a/src/lexer/Lexer.kt b/src/lexer/Lexer.kt index eed0e5a..b01c5c6 100644 --- a/src/lexer/Lexer.kt +++ b/src/lexer/Lexer.kt @@ -10,7 +10,10 @@ class Lexer(private val source: String) { while (hasNext()) { val char: Char = peek() tokens += when { - char == '.' -> scanDot() + char == '(' -> scanSymbol(TokenType.LEFT_PARENTHESES) + char == ')' -> scanSymbol(TokenType.RIGHT_PARENTHESES) + char == '.' -> scanSymbol(TokenType.DOT) + char == '"' -> scanQuotedString() char.isLetterOrDigit() -> scanAlphanumeric() char.isWhitespace() -> { scanWhitespace(); continue } else -> throw Error("Unknown symbol: $char", position) @@ -45,26 +48,39 @@ class Lexer(private val source: String) { // Scanners - private fun scanDot(): Token { - return Token(TokenType.DOT, next().toString(), getPosition(1)) + private fun scanSymbol(tokenType: TokenType): Token { + return Token(tokenType, next().toString(), getPosition(1)) } private fun scanAlphanumeric(): Token { var length = 0 - var value = "" - while (hasNext() && peek().isLetterOrDigit()) { - value += next() + next() length++ } + val value = source.substring(position.offset - length, position.offset) + return Token(TokenType.ALPHANUMERIC, value, getPosition(length)) + } + private fun scanQuotedString(): Token { + if (next() != '"') { + throw Error("Illegal state: Expected opening quote", position) + } + var length = 0 + while (hasNext() && peek() != '"') { + next() + length++ + } + if (next() != '"') { + throw Error("Illegal state: Expected closing quote", position) + } + val value = source.substring(position.offset - length - 1, position.offset - 1) return Token(TokenType.ALPHANUMERIC, value, getPosition(length)) } private fun scanWhitespace() { while (hasNext() && peek().isWhitespace()) { - val char = next() - if (char == '\n') { + if (next() == '\n') { position.line++ position.column = 0 } diff --git a/src/lexer/TokenType.kt b/src/lexer/TokenType.kt index 0d88a45..c2b9441 100644 --- a/src/lexer/TokenType.kt +++ b/src/lexer/TokenType.kt @@ -3,7 +3,10 @@ package lexer enum class TokenType { ALPHANUMERIC, + LEFT_PARENTHESES, RIGHT_PARENTHESES, DOT, + // Operators + EOF } diff --git a/tests/lexer/LexerScanPrologTest.kt b/tests/lexer/LexerScanPrologTest.kt new file mode 100644 index 0000000..9e9a054 --- /dev/null +++ b/tests/lexer/LexerScanPrologTest.kt @@ -0,0 +1,28 @@ +package lexer + +import org.junit.jupiter.api.Test +import kotlin.test.assertEquals + +class LexerScanPrologTest { + @Test + fun scan_simple_atom() { + val tokens = Lexer("atom.").scan() + + assertEquals(3, tokens.size) + + assertEquals(TokenType.ALPHANUMERIC, tokens[0].type, "Expected ALPHANUMERIC token, got ${tokens[0].type}") + assertEquals(TokenType.DOT, tokens[1].type, "Expected DOT token, got ${tokens[1].type}") + assertEquals(TokenType.EOF, tokens[2].type, "Expected EOF token, got ${tokens[2].type}") + } + + @Test + fun scan_variable() { + val tokens = Lexer("X.").scan() + + assertEquals(3, tokens.size) + + assertEquals(TokenType.ALPHANUMERIC, tokens[0].type, "Expected ALPHANUMERIC token, got ${tokens[0].type}") + assertEquals(TokenType.DOT, tokens[1].type, "Expected DOT token, got ${tokens[1].type}") + assertEquals(TokenType.EOF, tokens[2].type, "Expected EOF token, got ${tokens[2].type}") + } +} \ No newline at end of file diff --git a/tests/lexer/LexerScanTest.kt b/tests/lexer/LexerScanTest.kt index ff375f6..d04d0f9 100644 --- a/tests/lexer/LexerScanTest.kt +++ b/tests/lexer/LexerScanTest.kt @@ -5,29 +5,24 @@ import lexer.Lexer import lexer.TokenType import org.junit.jupiter.api.Test import org.junit.jupiter.api.assertThrows -import kotlin.test.assertEquals +import org.junit.jupiter.api.Assertions.* class LexerScanTest { @Test fun scan_emptyString_returns_EOF() { - val lexer = Lexer("") - val tokens = lexer.scan() + val tokens = Lexer("").scan() assertEquals(1, tokens.size, "Expected 1 token, got ${tokens.size}") assertEquals(TokenType.EOF, tokens[0].type, "Expected EOF token, got ${tokens[0].type}") } @Test fun scan_unknownSymbol_returns_Error() { - val lexer = Lexer("€") - assertThrows({ - val tokens = lexer.scan() - }) + assertThrows { Lexer("€").scan() } } @Test fun scan_dot_returns_Dot() { - val lexer = Lexer(".") - val tokens = lexer.scan() + val tokens = Lexer(".").scan() assertEquals(2, tokens.size) assertEquals(TokenType.DOT, tokens[0].type, "Expected DOT token, got ${tokens[0].type}") assertEquals(TokenType.EOF, tokens[1].type, "Expected EOF token, got ${tokens[1].type}") @@ -35,18 +30,16 @@ class LexerScanTest { @Test fun scan_two_dots_returns_two_dots() { - val lexer = Lexer("..") - val tokens = lexer.scan() + val tokens = Lexer("..").scan() assertEquals(3, tokens.size) assertEquals(TokenType.DOT, tokens[0].type, "Expected DOT token, got ${tokens[0].type}") assertEquals(TokenType.DOT, tokens[1].type, "Expected DOT token, got ${tokens[1].type}") assertEquals(TokenType.EOF, tokens[2].type, "Expected EOF token, got ${tokens[2].type}") } - + @Test fun scan_letter_returns_letter() { - val lexer = Lexer("a") - val tokens = lexer.scan() + val tokens = Lexer("a").scan() assertEquals(2, tokens.size) @@ -74,7 +67,7 @@ class LexerScanTest { } @Test - fun scan_whitespace_returns_nothing() { + fun scan_space_returns_nothing() { val lexer = Lexer(" ") val tokens = lexer.scan() @@ -82,4 +75,96 @@ class LexerScanTest { assertEquals(TokenType.EOF, tokens[0].type, "Expected EOF token, got ${tokens[0].type}") } + + @Test + fun scan_whitespace_various_returns_nothing() { + val lexer = Lexer(" \t\n\r") + val tokens = lexer.scan() + + assertEquals(1, tokens.size) + + assertEquals(TokenType.EOF, tokens[0].type, "Expected EOF token, got ${tokens[0].type}") + } + + + @Test + fun scan_separated_words() { + val tokens = Lexer("word1 word2").scan() + + assertEquals(3, tokens.size, "Expected 3 tokens, got ${tokens.size}") + + assertEquals(TokenType.ALPHANUMERIC, tokens[0].type, "Expected ALPHANUMERIC token, got ${tokens[0].type}") + assertEquals("word1", tokens[0].value, "Expected 'word1', got ${tokens[0].value}") + assertEquals(5, tokens[0].position.length, "Expected length 5, got ${tokens[0].position.length}") + + assertEquals(TokenType.ALPHANUMERIC, tokens[1].type, "Expected ALPHANUMERIC token, got ${tokens[1].type}") + assertEquals("word2", tokens[1].value, "Expected 'word2', got ${tokens[1].value}") + assertEquals(5, tokens[1].position.length, "Expected length 5, got ${tokens[1].position.length}") + } + + @Test + fun scan_multiline() { + val tokens = Lexer( + """ + word1 + word2 + """.trimIndent() + ).scan() + + assertEquals(3, tokens.size, "Expected 3 tokens, got ${tokens.size}") + + assertEquals(TokenType.ALPHANUMERIC, tokens[0].type, "Expected ALPHANUMERIC token, got ${tokens[0].type}") + assertEquals("word1", tokens[0].value, "Expected 'word1', got ${tokens[0].value}") + assertEquals(5, tokens[0].position.length, "Expected length 5, got ${tokens[0].position.length}") + + assertEquals(TokenType.ALPHANUMERIC, tokens[1].type, "Expected ALPHANUMERIC token, got ${tokens[1].type}") + assertEquals("word2", tokens[1].value, "Expected 'word2', got ${tokens[1].value}") + assertEquals(5, tokens[1].position.length, "Expected length 5, got ${tokens[1].position.length}") + } + + @Test + fun scan_parenthesis_returns_parenthesis() { + val lexer = Lexer("()") + val tokens = lexer.scan() + + assertEquals(3, tokens.size) + + assertEquals( + TokenType.LEFT_PARENTHESES, + tokens[0].type, + "Expected LEFT_PARENTHESES token, got ${tokens[0].type}" + ) + assertEquals( + TokenType.RIGHT_PARENTHESES, + tokens[1].type, + "Expected RIGHT_PARENTHESES token, got ${tokens[1].type}" + ) + assertEquals(TokenType.EOF, tokens[2].type, "Expected EOF token, got ${tokens[2].type}") + } + + @Test + fun scan_simple_quoted_string_returns_string() { + val lexer = Lexer("\"string\"") + val tokens = lexer.scan() + + assertEquals(2, tokens.size) + + assertEquals(TokenType.ALPHANUMERIC, tokens[0].type, "Expected ALPHANUMERIC token, got ${tokens[0].type}") + assertEquals(TokenType.EOF, tokens[1].type, "Expected EOF token, got ${tokens[1].type}") + + assertEquals("string", tokens[0].value, "Expected 'string', got ${tokens[0].value}") + } + + @Test + fun scan_quoted_string_with_space_returns_string() { + val lexer = Lexer("\"string with space\"") + val tokens = lexer.scan() + + assertEquals(2, tokens.size) + + assertEquals(TokenType.ALPHANUMERIC, tokens[0].type, "Expected ALPHANUMERIC token, got ${tokens[0].type}") + assertEquals(TokenType.EOF, tokens[1].type, "Expected EOF token, got ${tokens[1].type}") + + assertEquals("string with space", tokens[0].value, "Expected 'string with space', got ${tokens[0].value}") + } }