Rework parsing structure

This commit is contained in:
Tibo De Peuter 2025-04-27 19:31:29 +02:00
parent a4ec29f084
commit b9f419a59d
Signed by: tdpeuter
GPG key ID: 38297DE43F75FFE2
17 changed files with 246 additions and 278 deletions

View file

@ -1,17 +0,0 @@
package better_parser
import com.github.h0tk3y.betterParse.grammar.Grammar
import com.github.h0tk3y.betterParse.grammar.parseToEnd
import prolog.Program
import prolog.ast.logic.Clause
import prolog.ast.terms.Atom
class PrologParser {
private val parser: Grammar<List<Clause>> = SimpleSourceParser() as Grammar<List<Clause>>
public fun parse(input: String) {
val clauses: List<Clause> = parser.parseToEnd(input)
Program.load(clauses)
}
}

View file

@ -1,70 +0,0 @@
package better_parser
import com.github.h0tk3y.betterParse.combinators.*
import com.github.h0tk3y.betterParse.grammar.Grammar
import com.github.h0tk3y.betterParse.lexer.literalToken
import com.github.h0tk3y.betterParse.lexer.regexToken
import com.github.h0tk3y.betterParse.parser.Parser
import prolog.ast.logic.Fact
import prolog.ast.arithmetic.Integer
import prolog.ast.arithmetic.Float
import prolog.ast.logic.Clause
import prolog.ast.logic.LogicOperand
import prolog.ast.logic.Rule
import prolog.ast.terms.*
import prolog.builtins.Conjunction
import prolog.builtins.Disjunction
class PrologSourceParser : Grammar<List<Clause>>() {
// Define the tokens
private val atom by regexToken("[a-z][a-zA-Z0-9_]*")
private val variable by regexToken("[A-Z][a-zA-Z0-9_]*")
private val number by regexToken("-?[0-9]+(\\.[0-9]+)?")
private val whitespace by regexToken("\\s+", ignore = true)
private val comma by literalToken(",")
private val semicolon by literalToken(";")
private val neck by literalToken(":-")
private val lparen by literalToken("(")
private val rparen by literalToken(")")
private val dot by literalToken(".")
private val atomParser by atom use { Atom(text) }
private val variableParser by variable use { Variable(text) }
private val intParser by number use { Integer(text.toInt()) }
private val floatParser by number use { Float(text.toFloat()) }
private val numberParser by (intParser or floatParser)
private val compoundTermParser by (atomParser and skip(lparen) and separated(
atomParser or variableParser,
comma
) and skip(rparen)) use {
CompoundTerm(t1, t2.terms)
}
private val termParser: Parser<Term> by (numberParser or variableParser or compoundTermParser or atomParser)
private val logicOperandParser: Parser<LogicOperand> by (termParser or compoundTermParser or atomParser) map {
it as LogicOperand
}
private val conjunctionParser: Parser<Conjunction> by (logicOperandParser and comma and logicOperandParser) use {
Conjunction(t1, t3)
}
private val disjunctionParser: Parser<Disjunction> by (logicOperandParser and semicolon and logicOperandParser) use {
Disjunction(t1, t3)
}
private val operatorParser: Parser<Operator> by (conjunctionParser or disjunctionParser)
private val headParser by (compoundTermParser or atomParser)
private val bodyParser by (operatorParser or compoundTermParser or atomParser)
private val factParser by (headParser and dot) use { Fact(t1 as Head) }
private val ruleParser by (headParser and neck and bodyParser and dot) use {
Rule(t1 as Head, t3 as Body)
}
private val clauseParser: Parser<Clause> by (factParser or ruleParser)
override val rootParser: Parser<List<Clause>> by zeroOrMore(clauseParser)
}

View file

@ -1,67 +0,0 @@
package better_parser
import com.github.h0tk3y.betterParse.combinators.*
import com.github.h0tk3y.betterParse.grammar.Grammar
import com.github.h0tk3y.betterParse.grammar.parser
import com.github.h0tk3y.betterParse.lexer.Token
import com.github.h0tk3y.betterParse.lexer.literalToken
import com.github.h0tk3y.betterParse.lexer.regexToken
import com.github.h0tk3y.betterParse.lexer.token
import com.github.h0tk3y.betterParse.parser.Parser
import prolog.ast.arithmetic.Float
import prolog.ast.arithmetic.Integer
import prolog.ast.terms.Atom
import prolog.ast.terms.Structure
import prolog.ast.terms.Term
import prolog.ast.terms.Variable
open class SimplePrologParser : Grammar<Any>() {
// Prolog tokens
protected val nameToken: Token by regexToken("[a-z][a-zA-Z0-9_]*")
protected val variableToken: Token by regexToken("[A-Z][a-zA-Z0-9_]*")
// Arithmetic tokens
private val floatToken: Token by regexToken("-?[1-9][0-9]*\\.[0-9]+")
private val integerToken: Token by regexToken("-?([1-9][0-9]*|0)")
// Special tokens
protected val neck by literalToken(":-")
protected val comma: Token by literalToken(",")
protected val leftParenthesis: Token by literalToken("(")
protected val rightParenthesis: Token by literalToken(")")
protected val dot by literalToken(".")
// Ignored tokens
protected val whitespace: Token by regexToken("\\s+", ignore = true)
protected val singleLineComment: Token by regexToken("%[^\\n]*", ignore = true)
protected val multiLineComment: Token by regexToken("/\\*.*?\\*/", ignore = true)
protected val dummy by token { _, _ -> -1 } use { throw IllegalStateException("This parser should not be used") }
// Prolog parsers
protected val variable: Parser<Variable> by variableToken use { Variable(text) }
protected val atom: Parser<Atom> by nameToken use { Atom(text) }
protected val compound: Parser<Structure> by (atom and skip(leftParenthesis) and separated(
parser(::term),
comma,
acceptZero = true
) and skip(rightParenthesis)) use {
Structure(t1, t2.terms)
}
// Arithmetic parsers
private val int: Parser<Integer> by integerToken use { Integer(text.toInt()) }
private val float: Parser<Float> by floatToken use {
Float(text.toFloat())
}
protected val term: Parser<Term> by (dummy
or float
or int
or variable
or compound
or atom
) map { it }
override val rootParser: Parser<Any> = term
}

View file

@ -1,27 +0,0 @@
package better_parser
import com.github.h0tk3y.betterParse.combinators.times
import com.github.h0tk3y.betterParse.combinators.unaryMinus
import com.github.h0tk3y.betterParse.combinators.use
import com.github.h0tk3y.betterParse.grammar.parseToEnd
import com.github.h0tk3y.betterParse.parser.Parser
import prolog.ast.logic.LogicOperand
import prolog.builtins.Query
class SimpleReplParser(val debug: Boolean = false) : SimpleSourceParser() {
override val rootParser: Parser<Query> by (body * -dot) use { Query(this as LogicOperand) }
fun parse(input: String): Query {
if (debug) {
println("Parsing input: $input")
}
val query = parseToEnd(input) as Query
if (debug) {
println("Parsed query: $query")
}
return query
}
}

View file

@ -1,48 +0,0 @@
package better_parser
import com.github.h0tk3y.betterParse.combinators.*
import com.github.h0tk3y.betterParse.grammar.parser
import com.github.h0tk3y.betterParse.lexer.literalToken
import com.github.h0tk3y.betterParse.parser.Parser
import prolog.ast.arithmetic.ArithmeticOperator
import prolog.ast.logic.*
import prolog.ast.terms.*
import prolog.builtins.Conjunction
import prolog.builtins.Disjunction
open class SimpleSourceParser : SimplePrologParser() {
protected val simpleLogicOperand: Parser<LogicOperand> by (dummy
or compound
or atom
)
protected val logicOperand: Parser<LogicOperand> by (dummy
or parser(::operator)
or simpleLogicOperand
)
protected val arithmeticOperator: Parser<ArithmeticOperator> by dummy
protected val logicOperator: Parser<LogicOperator> by (simpleLogicOperand * comma * logicOperand) use {
Conjunction(t1, t3)
}
protected val operator: Parser<Operator> by (arithmeticOperator or logicOperator) use { this as Operator }
protected val head: Parser<Head> by (dummy
or compound
or atom
)
protected val body: Parser<Body> by (dummy
or operator
or head
) use { this as Body }
// ----
private val rule: Parser<Rule> by (head * -neck * body) use { Rule(t1, t2) }
private val fact: Parser<Fact> by head use { Fact(this) }
private val clause: Parser<Clause> by ((rule or fact) * -dot)
private val clauses: Parser<List<Clause>> by zeroOrMore(clause)
override val rootParser: Parser<Any> by clauses
}

11
src/parser/Parser.kt Normal file
View file

@ -0,0 +1,11 @@
package parser
interface Parser {
/**
* Parses the input string and returns the parsed result.
*
* @param input The input string to parse.
* @return The parsed result, which is the AST of the input.
*/
fun parse(input: String): Any
}

9
src/parser/ReplParser.kt Normal file
View file

@ -0,0 +1,9 @@
package parser
import prolog.builtins.Query
class ReplParser: Parser {
override fun parse(input: String): Query {
TODO("Not yet implemented")
}
}

View file

@ -0,0 +1,12 @@
package parser
import com.github.h0tk3y.betterParse.grammar.Grammar
import com.github.h0tk3y.betterParse.grammar.parseToEnd
import parser.grammars.LogicGrammar
import prolog.ast.logic.Clause
class ScriptParser: Parser {
private val grammar: Grammar<List<Clause>> = LogicGrammar() as Grammar<List<Clause>>
override fun parse(input: String): List<Clause> = grammar.parseToEnd(input)
}

View file

@ -0,0 +1,22 @@
package parser.grammars
import com.github.h0tk3y.betterParse.combinators.oneOrMore
import com.github.h0tk3y.betterParse.combinators.or
import com.github.h0tk3y.betterParse.combinators.separated
import com.github.h0tk3y.betterParse.combinators.times
import com.github.h0tk3y.betterParse.combinators.unaryMinus
import com.github.h0tk3y.betterParse.combinators.use
import com.github.h0tk3y.betterParse.parser.Parser
import prolog.ast.logic.Clause
import prolog.ast.logic.Fact
import prolog.ast.logic.Rule
class LogicGrammar : TermsGrammar() {
protected val rule: Parser<Rule> by (head * -neck * body) use { Rule(t1, t2) }
protected val fact: Parser<Fact> by head use { Fact(this) }
protected val clause: Parser<Clause> by ((rule or fact) * -dot)
protected val clauses: Parser<List<Clause>> by oneOrMore(clause)
override val rootParser: Parser<Any> by clauses
}

View file

@ -0,0 +1,74 @@
package parser.grammars
import com.github.h0tk3y.betterParse.combinators.or
import com.github.h0tk3y.betterParse.combinators.separated
import com.github.h0tk3y.betterParse.combinators.times
import com.github.h0tk3y.betterParse.combinators.unaryMinus
import com.github.h0tk3y.betterParse.combinators.use
import com.github.h0tk3y.betterParse.grammar.parser
import com.github.h0tk3y.betterParse.parser.Parser
import prolog.ast.arithmetic.Float
import prolog.ast.arithmetic.Integer
import prolog.ast.logic.LogicOperand
import prolog.ast.logic.LogicOperator
import prolog.ast.terms.Atom
import prolog.ast.terms.Body
import prolog.ast.terms.Head
import prolog.ast.terms.Operator
import prolog.ast.terms.Structure
import prolog.ast.terms.Term
import prolog.ast.terms.Variable
import prolog.builtins.Conjunction
open class TermsGrammar : Tokens() {
// Basic named terms
protected val variable: Parser<Variable> by variableToken use { Variable(text) }
protected val atom: Parser<Atom> by nameToken use { Atom(text) }
protected val compound: Parser<Structure> by (atom * -leftParenthesis * separated(
parser(::term),
comma,
acceptZero = true
) * -rightParenthesis) use {
Structure(t1, t2.terms)
}
// Basic arithmetic
protected val int: Parser<Integer> by integerToken use { Integer(text.toInt()) }
protected val float: Parser<Float> by floatToken use { Float(text.toFloat()) }
// Operators
protected val simpleLogicOperand: Parser<LogicOperand> by (dummy
or compound
or atom
)
protected val logicOperand: Parser<LogicOperand> by (dummy
or parser(::operator)
or simpleLogicOperand
)
protected val logicOperator: Parser<LogicOperator> by (simpleLogicOperand * -comma * logicOperand) use {
Conjunction(t1, t2)
}
protected val operator: Parser<Operator> by (dummy
or logicOperator
)
// Parts
protected val head: Parser<Head> by (dummy
or compound
or atom
)
protected val body: Parser<Body> by (dummy
or operator
or head
) use { this as Body }
protected val term: Parser<Term> by (dummy
or float
or int
or variable
or compound
or atom
)
override val rootParser: Parser<Any> by term
}

View file

@ -0,0 +1,32 @@
package parser.grammars
import com.github.h0tk3y.betterParse.combinators.use
import com.github.h0tk3y.betterParse.grammar.Grammar
import com.github.h0tk3y.betterParse.lexer.Token
import com.github.h0tk3y.betterParse.lexer.literalToken
import com.github.h0tk3y.betterParse.lexer.regexToken
import com.github.h0tk3y.betterParse.lexer.token
abstract class Tokens : Grammar<Any>() {
// Prolog tokens
protected val nameToken: Token by regexToken("[a-z][a-zA-Z0-9_]*")
protected val variableToken: Token by regexToken("[A-Z][a-zA-Z0-9_]*")
// Arithmetic tokens
protected val floatToken: Token by regexToken("-?[1-9][0-9]*\\.[0-9]+")
protected val integerToken: Token by regexToken("-?([1-9][0-9]*|0)")
// Special tokens
protected val neck by literalToken(":-")
protected val comma: Token by literalToken(",")
protected val leftParenthesis: Token by literalToken("(")
protected val rightParenthesis: Token by literalToken(")")
protected val dot by literalToken(".")
// Ignored tokens
protected val whitespace: Token by regexToken("\\s+", ignore = true)
protected val singleLineComment: Token by regexToken("%[^\\n]*", ignore = true)
protected val multiLineComment: Token by regexToken("/\\*.*?\\*/", ignore = true)
protected val dummy by token { _, _ -> -1 } use { throw IllegalStateException("This parser should not be used") }
}