Rework parsing structure

2025-04-27 19:31:29 +02:00 · 2025-04-27 19:31:29 +02:00 · b9f419a59d
commit b9f419a59d
parent a4ec29f084
17 changed files with 246 additions and 278 deletions
--- a/src/better_parser/PrologParser.kt
+++ b/src/better_parser/PrologParser.kt
@ -1,17 +0,0 @@
-package better_parser
-
-import com.github.h0tk3y.betterParse.grammar.Grammar
-import com.github.h0tk3y.betterParse.grammar.parseToEnd
-import prolog.Program
-import prolog.ast.logic.Clause
-import prolog.ast.terms.Atom
-
-class PrologParser {
-    private val parser: Grammar<List<Clause>> = SimpleSourceParser() as Grammar<List<Clause>>
-
-    public fun parse(input: String) {
-        val clauses: List<Clause> = parser.parseToEnd(input)
-
-        Program.load(clauses)
-    }
-}
--- a/src/better_parser/PrologSourceParser.kt
+++ b/src/better_parser/PrologSourceParser.kt
@ -1,70 +0,0 @@
-package better_parser
-
-import com.github.h0tk3y.betterParse.combinators.*
-import com.github.h0tk3y.betterParse.grammar.Grammar
-import com.github.h0tk3y.betterParse.lexer.literalToken
-import com.github.h0tk3y.betterParse.lexer.regexToken
-import com.github.h0tk3y.betterParse.parser.Parser
-import prolog.ast.logic.Fact
-import prolog.ast.arithmetic.Integer
-import prolog.ast.arithmetic.Float
-import prolog.ast.logic.Clause
-import prolog.ast.logic.LogicOperand
-import prolog.ast.logic.Rule
-import prolog.ast.terms.*
-import prolog.builtins.Conjunction
-import prolog.builtins.Disjunction
-
-class PrologSourceParser : Grammar<List<Clause>>() {
-    // Define the tokens
-    private val atom by regexToken("[a-z][a-zA-Z0-9_]*")
-    private val variable by regexToken("[A-Z][a-zA-Z0-9_]*")
-    private val number by regexToken("-?[0-9]+(\\.[0-9]+)?")
-    private val whitespace by regexToken("\\s+", ignore = true)
-
-    private val comma by literalToken(",")
-    private val semicolon by literalToken(";")
-    private val neck by literalToken(":-")
-    private val lparen by literalToken("(")
-    private val rparen by literalToken(")")
-    private val dot by literalToken(".")
-
-    private val atomParser by atom use { Atom(text) }
-    private val variableParser by variable use { Variable(text) }
-    private val intParser by number use { Integer(text.toInt()) }
-    private val floatParser by number use { Float(text.toFloat()) }
-    private val numberParser by (intParser or floatParser)
-    private val compoundTermParser by (atomParser and skip(lparen) and separated(
-        atomParser or variableParser,
-        comma
-    ) and skip(rparen)) use {
-        CompoundTerm(t1, t2.terms)
-    }
-
-    private val termParser: Parser<Term> by (numberParser or variableParser or compoundTermParser or atomParser)
-
-    private val logicOperandParser: Parser<LogicOperand> by (termParser or compoundTermParser or atomParser) map {
-        it as LogicOperand
-    }
-
-    private val conjunctionParser: Parser<Conjunction> by (logicOperandParser and comma and logicOperandParser) use {
-        Conjunction(t1, t3)
-    }
-    private val disjunctionParser: Parser<Disjunction> by (logicOperandParser and semicolon and logicOperandParser) use {
-        Disjunction(t1, t3)
-    }
-
-    private val operatorParser: Parser<Operator> by (conjunctionParser or disjunctionParser)
-
-    private val headParser by (compoundTermParser or atomParser)
-    private val bodyParser by (operatorParser or compoundTermParser or atomParser)
-
-    private val factParser by (headParser and dot) use { Fact(t1 as Head) }
-    private val ruleParser by (headParser and neck and bodyParser and dot) use {
-        Rule(t1 as Head, t3 as Body)
-    }
-
-    private val clauseParser: Parser<Clause> by (factParser or ruleParser)
-
-    override val rootParser: Parser<List<Clause>> by zeroOrMore(clauseParser)
-}
--- a/src/better_parser/SimplePrologParser.kt
+++ b/src/better_parser/SimplePrologParser.kt
@ -1,67 +0,0 @@
-package better_parser
-
-import com.github.h0tk3y.betterParse.combinators.*
-import com.github.h0tk3y.betterParse.grammar.Grammar
-import com.github.h0tk3y.betterParse.grammar.parser
-import com.github.h0tk3y.betterParse.lexer.Token
-import com.github.h0tk3y.betterParse.lexer.literalToken
-import com.github.h0tk3y.betterParse.lexer.regexToken
-import com.github.h0tk3y.betterParse.lexer.token
-import com.github.h0tk3y.betterParse.parser.Parser
-import prolog.ast.arithmetic.Float
-import prolog.ast.arithmetic.Integer
-import prolog.ast.terms.Atom
-import prolog.ast.terms.Structure
-import prolog.ast.terms.Term
-import prolog.ast.terms.Variable
-
-open class SimplePrologParser : Grammar<Any>() {
-    // Prolog tokens
-    protected val nameToken: Token by regexToken("[a-z][a-zA-Z0-9_]*")
-    protected val variableToken: Token by regexToken("[A-Z][a-zA-Z0-9_]*")
-
-    // Arithmetic tokens
-    private val floatToken: Token by regexToken("-?[1-9][0-9]*\\.[0-9]+")
-    private val integerToken: Token by regexToken("-?([1-9][0-9]*|0)")
-
-    // Special tokens
-    protected val neck by literalToken(":-")
-    protected val comma: Token by literalToken(",")
-    protected val leftParenthesis: Token by literalToken("(")
-    protected val rightParenthesis: Token by literalToken(")")
-    protected val dot by literalToken(".")
-
-    // Ignored tokens
-    protected val whitespace: Token by regexToken("\\s+", ignore = true)
-    protected val singleLineComment: Token by regexToken("%[^\\n]*", ignore = true)
-    protected val multiLineComment: Token by regexToken("/\\*.*?\\*/", ignore = true)
-
-    protected val dummy by token { _, _ -> -1 } use { throw IllegalStateException("This parser should not be used") }
-
-    // Prolog parsers
-    protected val variable: Parser<Variable> by variableToken use { Variable(text) }
-    protected val atom: Parser<Atom> by nameToken use { Atom(text) }
-    protected val compound: Parser<Structure> by (atom and skip(leftParenthesis) and separated(
-        parser(::term),
-        comma,
-        acceptZero = true
-    ) and skip(rightParenthesis)) use {
-        Structure(t1, t2.terms)
-    }
-
-    // Arithmetic parsers
-    private val int: Parser<Integer> by integerToken use { Integer(text.toInt()) }
-    private val float: Parser<Float> by floatToken use {
-        Float(text.toFloat())
-    }
-
-    protected val term: Parser<Term> by (dummy
-            or float
-            or int
-            or variable
-            or compound
-            or atom
-    ) map { it }
-
-    override val rootParser: Parser<Any> = term
-}
--- a/src/better_parser/SimpleReplParser.kt
+++ b/src/better_parser/SimpleReplParser.kt
@ -1,27 +0,0 @@
-package better_parser
-
-import com.github.h0tk3y.betterParse.combinators.times
-import com.github.h0tk3y.betterParse.combinators.unaryMinus
-import com.github.h0tk3y.betterParse.combinators.use
-import com.github.h0tk3y.betterParse.grammar.parseToEnd
-import com.github.h0tk3y.betterParse.parser.Parser
-import prolog.ast.logic.LogicOperand
-import prolog.builtins.Query
-
-class SimpleReplParser(val debug: Boolean = false) : SimpleSourceParser() {
-    override val rootParser: Parser<Query> by (body * -dot) use { Query(this as LogicOperand) }
-
-    fun parse(input: String): Query {
-        if (debug) {
-            println("Parsing input: $input")
-        }
-
-        val query = parseToEnd(input) as Query
-
-        if (debug) {
-            println("Parsed query: $query")
-        }
-
-        return query
-    }
-}
--- a/src/better_parser/SimpleSourceParser.kt
+++ b/src/better_parser/SimpleSourceParser.kt
@ -1,48 +0,0 @@
-package better_parser
-
-import com.github.h0tk3y.betterParse.combinators.*
-import com.github.h0tk3y.betterParse.grammar.parser
-import com.github.h0tk3y.betterParse.lexer.literalToken
-import com.github.h0tk3y.betterParse.parser.Parser
-import prolog.ast.arithmetic.ArithmeticOperator
-import prolog.ast.logic.*
-import prolog.ast.terms.*
-import prolog.builtins.Conjunction
-import prolog.builtins.Disjunction
-
-open class SimpleSourceParser : SimplePrologParser() {
-    protected val simpleLogicOperand: Parser<LogicOperand> by (dummy
-            or compound
-            or atom
-            )
-    protected val logicOperand: Parser<LogicOperand> by (dummy
-        or parser(::operator)
-        or simpleLogicOperand
-    )
-
-    protected val arithmeticOperator: Parser<ArithmeticOperator> by dummy
-    protected val logicOperator: Parser<LogicOperator> by (simpleLogicOperand * comma * logicOperand) use {
-        Conjunction(t1, t3)
-    }
-
-    protected val operator: Parser<Operator> by (arithmeticOperator or logicOperator) use { this as Operator }
-
-    protected val head: Parser<Head> by (dummy
-            or compound
-            or atom
-            )
-    protected val body: Parser<Body> by (dummy
-            or operator
-            or head
-            ) use { this as Body }
-
-    // ----
-
-    private val rule: Parser<Rule> by (head * -neck * body) use { Rule(t1, t2) }
-    private val fact: Parser<Fact> by head use { Fact(this) }
-
-    private val clause: Parser<Clause> by ((rule or fact) * -dot)
-    private val clauses: Parser<List<Clause>> by zeroOrMore(clause)
-
-    override val rootParser: Parser<Any> by clauses
-}
--- a/src/parser/Parser.kt
+++ b/src/parser/Parser.kt
@ -0,0 +1,11 @@
+package parser
+
+interface Parser {
+    /**
+     * Parses the input string and returns the parsed result.
+     *
+     * @param input The input string to parse.
+     * @return The parsed result, which is the AST of the input.
+     */
+    fun parse(input: String): Any
+}
--- a/src/parser/ReplParser.kt
+++ b/src/parser/ReplParser.kt
@ -0,0 +1,9 @@
+package parser
+
+import prolog.builtins.Query
+
+class ReplParser: Parser {
+    override fun parse(input: String): Query {
+        TODO("Not yet implemented")
+    }
+}
--- a/src/parser/ScriptParser.kt
+++ b/src/parser/ScriptParser.kt
@ -0,0 +1,12 @@
+package parser
+
+import com.github.h0tk3y.betterParse.grammar.Grammar
+import com.github.h0tk3y.betterParse.grammar.parseToEnd
+import parser.grammars.LogicGrammar
+import prolog.ast.logic.Clause
+
+class ScriptParser: Parser {
+    private val grammar: Grammar<List<Clause>> = LogicGrammar() as Grammar<List<Clause>>
+
+    override fun parse(input: String): List<Clause> = grammar.parseToEnd(input)
+}
--- a/src/parser/grammars/LogicGrammar.kt
+++ b/src/parser/grammars/LogicGrammar.kt
@ -0,0 +1,22 @@
+package parser.grammars
+
+import com.github.h0tk3y.betterParse.combinators.oneOrMore
+import com.github.h0tk3y.betterParse.combinators.or
+import com.github.h0tk3y.betterParse.combinators.separated
+import com.github.h0tk3y.betterParse.combinators.times
+import com.github.h0tk3y.betterParse.combinators.unaryMinus
+import com.github.h0tk3y.betterParse.combinators.use
+import com.github.h0tk3y.betterParse.parser.Parser
+import prolog.ast.logic.Clause
+import prolog.ast.logic.Fact
+import prolog.ast.logic.Rule
+
+class LogicGrammar : TermsGrammar() {
+    protected val rule: Parser<Rule> by (head * -neck * body) use { Rule(t1, t2) }
+    protected val fact: Parser<Fact> by head use { Fact(this) }
+
+    protected val clause: Parser<Clause> by ((rule or fact) * -dot)
+    protected val clauses: Parser<List<Clause>> by oneOrMore(clause)
+
+    override val rootParser: Parser<Any> by clauses
+}
--- a/src/parser/grammars/TermsGrammar.kt
+++ b/src/parser/grammars/TermsGrammar.kt
@ -0,0 +1,74 @@
+package parser.grammars
+
+import com.github.h0tk3y.betterParse.combinators.or
+import com.github.h0tk3y.betterParse.combinators.separated
+import com.github.h0tk3y.betterParse.combinators.times
+import com.github.h0tk3y.betterParse.combinators.unaryMinus
+import com.github.h0tk3y.betterParse.combinators.use
+import com.github.h0tk3y.betterParse.grammar.parser
+import com.github.h0tk3y.betterParse.parser.Parser
+import prolog.ast.arithmetic.Float
+import prolog.ast.arithmetic.Integer
+import prolog.ast.logic.LogicOperand
+import prolog.ast.logic.LogicOperator
+import prolog.ast.terms.Atom
+import prolog.ast.terms.Body
+import prolog.ast.terms.Head
+import prolog.ast.terms.Operator
+import prolog.ast.terms.Structure
+import prolog.ast.terms.Term
+import prolog.ast.terms.Variable
+import prolog.builtins.Conjunction
+
+open class TermsGrammar : Tokens() {
+    // Basic named terms
+    protected val variable: Parser<Variable> by variableToken use { Variable(text) }
+    protected val atom: Parser<Atom> by nameToken use { Atom(text) }
+    protected val compound: Parser<Structure> by (atom * -leftParenthesis * separated(
+        parser(::term),
+        comma,
+        acceptZero = true
+    ) * -rightParenthesis) use {
+        Structure(t1, t2.terms)
+    }
+
+    // Basic arithmetic
+    protected val int: Parser<Integer> by integerToken use { Integer(text.toInt()) }
+    protected val float: Parser<Float> by floatToken use { Float(text.toFloat()) }
+
+    // Operators
+    protected val simpleLogicOperand: Parser<LogicOperand> by (dummy
+            or compound
+            or atom
+            )
+    protected val logicOperand: Parser<LogicOperand> by (dummy
+            or parser(::operator)
+            or simpleLogicOperand
+            )
+    protected val logicOperator: Parser<LogicOperator> by (simpleLogicOperand * -comma * logicOperand) use {
+        Conjunction(t1, t2)
+    }
+    protected val operator: Parser<Operator> by (dummy
+            or logicOperator
+            )
+
+    // Parts
+    protected val head: Parser<Head> by (dummy
+            or compound
+            or atom
+            )
+    protected val body: Parser<Body> by (dummy
+            or operator
+            or head
+            ) use { this as Body }
+
+    protected val term: Parser<Term> by (dummy
+            or float
+            or int
+            or variable
+            or compound
+            or atom
+            )
+
+    override val rootParser: Parser<Any> by term
+}
--- a/src/parser/grammars/Tokens.kt
+++ b/src/parser/grammars/Tokens.kt
@ -0,0 +1,32 @@
+package parser.grammars
+
+import com.github.h0tk3y.betterParse.combinators.use
+import com.github.h0tk3y.betterParse.grammar.Grammar
+import com.github.h0tk3y.betterParse.lexer.Token
+import com.github.h0tk3y.betterParse.lexer.literalToken
+import com.github.h0tk3y.betterParse.lexer.regexToken
+import com.github.h0tk3y.betterParse.lexer.token
+
+abstract class Tokens : Grammar<Any>() {
+    // Prolog tokens
+    protected val nameToken: Token by regexToken("[a-z][a-zA-Z0-9_]*")
+    protected val variableToken: Token by regexToken("[A-Z][a-zA-Z0-9_]*")
+
+    // Arithmetic tokens
+    protected val floatToken: Token by regexToken("-?[1-9][0-9]*\\.[0-9]+")
+    protected val integerToken: Token by regexToken("-?([1-9][0-9]*|0)")
+
+    // Special tokens
+    protected val neck by literalToken(":-")
+    protected val comma: Token by literalToken(",")
+    protected val leftParenthesis: Token by literalToken("(")
+    protected val rightParenthesis: Token by literalToken(")")
+    protected val dot by literalToken(".")
+
+    // Ignored tokens
+    protected val whitespace: Token by regexToken("\\s+", ignore = true)
+    protected val singleLineComment: Token by regexToken("%[^\\n]*", ignore = true)
+    protected val multiLineComment: Token by regexToken("/\\*.*?\\*/", ignore = true)
+
+    protected val dummy by token { _, _ -> -1 } use { throw IllegalStateException("This parser should not be used") }
+}