package lexer import java.util.LinkedList class Lexer(private val source: String) { private var tokens: List = LinkedList() private val position = LexerPosition(0, 0, -1) /** * Scans the source code and returns a list of tokens. * @return List of [Token]s */ fun scan(): List { while (hasNext()) { val char: Char = peek() tokens += when { char == '(' -> scanSymbol(TokenType.LEFT_PARENTHESES) char == ')' -> scanSymbol(TokenType.RIGHT_PARENTHESES) char == '.' -> scanSymbol(TokenType.DOT) char == '"' -> scanQuotedString() char == '%' -> { scanComment(); continue } char.isLetterOrDigit() -> scanAlphanumeric() char.isWhitespace() -> { scanWhitespace(); continue } else -> throw Error("Unknown symbol: $char", position) } } tokens += Token(TokenType.EOF, "EOF", getPosition(0)) return tokens } private fun hasNext(): Boolean { // Check if the position is within the source length return position.offset < source.length } private fun peek(): Char { // Peek should only be called if there is a next character if (!hasNext()) { throw Error("Unexpected end of input", position) } return source[position.offset] } private fun next(): Char { // Advance the position and return the character val char = peek() position.offset++ position.column++ return char } private fun getPosition(length: Int = 1): TokenPosition { // Return a new TokenPosition based on the current LexerPosition return TokenPosition(position.line, position.column, length) } /* * * * * * * * Scanners * * * * * * * */ /** * Scans a symbol token, given the expected [TokenType]. * @param tokenType The expected [TokenType] * @return The scanned [Token] */ private fun scanSymbol(tokenType: TokenType): Token { return Token(tokenType, next().toString(), getPosition(1)) } private fun scanAlphanumeric(): Token { // Scan all alphanumeric characters var length = 0 while (hasNext() && peek().isLetterOrDigit()) { next() length++ } val value = source.substring(position.offset - length, position.offset) return Token(TokenType.ALPHANUMERIC, value, getPosition(length)) } private fun scanQuotedString(): Token { // "Assert" that the next character is the start of a quoted string if (next() != '"') { throw Error("Illegal state: Expected opening quote", position) } var length = 0 while (hasNext() && peek() != '"') { next() length++ } // "Assert" that the next character is the end of the quoted string if (next() != '"') { throw Error("Illegal state: Expected closing quote", position) } val value = source.substring(position.offset - length - 1, position.offset - 1) return Token(TokenType.ALPHANUMERIC, value, getPosition(length)) } private fun scanComment() { // "Assert" that the next character is the start of a comment if (next() != '%') { throw Error("Illegal state: Expected opening comment", position) } // Skip all characters until the end of the line while (hasNext() && peek() != '\n') { next() } } private fun scanWhitespace() { // Skip all whitespace characters while (hasNext() && peek().isWhitespace()) { if (next() == '\n') { position.line++ position.column = 0 } } } }