Rework parsing structure

2025-04-27 19:31:29 +02:00 · 2025-04-27 19:31:29 +02:00 · b9f419a59d
commit b9f419a59d
parent a4ec29f084
17 changed files with 246 additions and 278 deletions
--- a/src/parser/Parser.kt
+++ b/src/parser/Parser.kt
@ -0,0 +1,11 @@
+package parser
+
+interface Parser {
+    /**
+     * Parses the input string and returns the parsed result.
+     *
+     * @param input The input string to parse.
+     * @return The parsed result, which is the AST of the input.
+     */
+    fun parse(input: String): Any
+}
--- a/src/parser/ReplParser.kt
+++ b/src/parser/ReplParser.kt
@ -0,0 +1,9 @@
+package parser
+
+import prolog.builtins.Query
+
+class ReplParser: Parser {
+    override fun parse(input: String): Query {
+        TODO("Not yet implemented")
+    }
+}
--- a/src/parser/ScriptParser.kt
+++ b/src/parser/ScriptParser.kt
@ -0,0 +1,12 @@
+package parser
+
+import com.github.h0tk3y.betterParse.grammar.Grammar
+import com.github.h0tk3y.betterParse.grammar.parseToEnd
+import parser.grammars.LogicGrammar
+import prolog.ast.logic.Clause
+
+class ScriptParser: Parser {
+    private val grammar: Grammar<List<Clause>> = LogicGrammar() as Grammar<List<Clause>>
+
+    override fun parse(input: String): List<Clause> = grammar.parseToEnd(input)
+}
--- a/src/parser/grammars/LogicGrammar.kt
+++ b/src/parser/grammars/LogicGrammar.kt
@ -0,0 +1,22 @@
+package parser.grammars
+
+import com.github.h0tk3y.betterParse.combinators.oneOrMore
+import com.github.h0tk3y.betterParse.combinators.or
+import com.github.h0tk3y.betterParse.combinators.separated
+import com.github.h0tk3y.betterParse.combinators.times
+import com.github.h0tk3y.betterParse.combinators.unaryMinus
+import com.github.h0tk3y.betterParse.combinators.use
+import com.github.h0tk3y.betterParse.parser.Parser
+import prolog.ast.logic.Clause
+import prolog.ast.logic.Fact
+import prolog.ast.logic.Rule
+
+class LogicGrammar : TermsGrammar() {
+    protected val rule: Parser<Rule> by (head * -neck * body) use { Rule(t1, t2) }
+    protected val fact: Parser<Fact> by head use { Fact(this) }
+
+    protected val clause: Parser<Clause> by ((rule or fact) * -dot)
+    protected val clauses: Parser<List<Clause>> by oneOrMore(clause)
+
+    override val rootParser: Parser<Any> by clauses
+}
--- a/src/parser/grammars/TermsGrammar.kt
+++ b/src/parser/grammars/TermsGrammar.kt
@ -0,0 +1,74 @@
+package parser.grammars
+
+import com.github.h0tk3y.betterParse.combinators.or
+import com.github.h0tk3y.betterParse.combinators.separated
+import com.github.h0tk3y.betterParse.combinators.times
+import com.github.h0tk3y.betterParse.combinators.unaryMinus
+import com.github.h0tk3y.betterParse.combinators.use
+import com.github.h0tk3y.betterParse.grammar.parser
+import com.github.h0tk3y.betterParse.parser.Parser
+import prolog.ast.arithmetic.Float
+import prolog.ast.arithmetic.Integer
+import prolog.ast.logic.LogicOperand
+import prolog.ast.logic.LogicOperator
+import prolog.ast.terms.Atom
+import prolog.ast.terms.Body
+import prolog.ast.terms.Head
+import prolog.ast.terms.Operator
+import prolog.ast.terms.Structure
+import prolog.ast.terms.Term
+import prolog.ast.terms.Variable
+import prolog.builtins.Conjunction
+
+open class TermsGrammar : Tokens() {
+    // Basic named terms
+    protected val variable: Parser<Variable> by variableToken use { Variable(text) }
+    protected val atom: Parser<Atom> by nameToken use { Atom(text) }
+    protected val compound: Parser<Structure> by (atom * -leftParenthesis * separated(
+        parser(::term),
+        comma,
+        acceptZero = true
+    ) * -rightParenthesis) use {
+        Structure(t1, t2.terms)
+    }
+
+    // Basic arithmetic
+    protected val int: Parser<Integer> by integerToken use { Integer(text.toInt()) }
+    protected val float: Parser<Float> by floatToken use { Float(text.toFloat()) }
+
+    // Operators
+    protected val simpleLogicOperand: Parser<LogicOperand> by (dummy
+            or compound
+            or atom
+            )
+    protected val logicOperand: Parser<LogicOperand> by (dummy
+            or parser(::operator)
+            or simpleLogicOperand
+            )
+    protected val logicOperator: Parser<LogicOperator> by (simpleLogicOperand * -comma * logicOperand) use {
+        Conjunction(t1, t2)
+    }
+    protected val operator: Parser<Operator> by (dummy
+            or logicOperator
+            )
+
+    // Parts
+    protected val head: Parser<Head> by (dummy
+            or compound
+            or atom
+            )
+    protected val body: Parser<Body> by (dummy
+            or operator
+            or head
+            ) use { this as Body }
+
+    protected val term: Parser<Term> by (dummy
+            or float
+            or int
+            or variable
+            or compound
+            or atom
+            )
+
+    override val rootParser: Parser<Any> by term
+}
--- a/src/parser/grammars/Tokens.kt
+++ b/src/parser/grammars/Tokens.kt
@ -0,0 +1,32 @@
+package parser.grammars
+
+import com.github.h0tk3y.betterParse.combinators.use
+import com.github.h0tk3y.betterParse.grammar.Grammar
+import com.github.h0tk3y.betterParse.lexer.Token
+import com.github.h0tk3y.betterParse.lexer.literalToken
+import com.github.h0tk3y.betterParse.lexer.regexToken
+import com.github.h0tk3y.betterParse.lexer.token
+
+abstract class Tokens : Grammar<Any>() {
+    // Prolog tokens
+    protected val nameToken: Token by regexToken("[a-z][a-zA-Z0-9_]*")
+    protected val variableToken: Token by regexToken("[A-Z][a-zA-Z0-9_]*")
+
+    // Arithmetic tokens
+    protected val floatToken: Token by regexToken("-?[1-9][0-9]*\\.[0-9]+")
+    protected val integerToken: Token by regexToken("-?([1-9][0-9]*|0)")
+
+    // Special tokens
+    protected val neck by literalToken(":-")
+    protected val comma: Token by literalToken(",")
+    protected val leftParenthesis: Token by literalToken("(")
+    protected val rightParenthesis: Token by literalToken(")")
+    protected val dot by literalToken(".")
+
+    // Ignored tokens
+    protected val whitespace: Token by regexToken("\\s+", ignore = true)
+    protected val singleLineComment: Token by regexToken("%[^\\n]*", ignore = true)
+    protected val multiLineComment: Token by regexToken("/\\*.*?\\*/", ignore = true)
+
+    protected val dummy by token { _, _ -> -1 } use { throw IllegalStateException("This parser should not be used") }
+}