my-regex-engine/lib/src/main/kotlin/org/example/Parser.kt

86 lines
No EOL
3.3 KiB
Kotlin

package org.example
import com.github.h0tk3y.betterParse.combinators.*
import com.github.h0tk3y.betterParse.grammar.Grammar
import com.github.h0tk3y.betterParse.grammar.parseToEnd
import com.github.h0tk3y.betterParse.grammar.parser
import com.github.h0tk3y.betterParse.lexer.literalToken
import com.github.h0tk3y.betterParse.lexer.regexToken
import com.github.h0tk3y.betterParse.lexer.token
import com.github.h0tk3y.betterParse.parser.Parser
class RegexParser : Grammar<RegexItem>() {
private var groupCounter = 0
val escapedCharacter by regexToken("\\\\[+*?.$^()|\\[\\]]")
val postfixOperator by regexToken("[+*?]")
val anchorOperator by regexToken("[$^]")
val alternationSymbol by literalToken("|")
val openParenSymbol by literalToken("(")
val closeParenSymbol by literalToken(")")
val bracketContent by
token(
name = "bracketContent",
matcher = { seq, from ->
if (seq[from] != '[') {
0 // 대괄호로 시작하지 않으면 매칭 실패
} else {
// 대괄호의 시작 위치에서부터 ']'를 찾음
var to = seq.indexOf(']', from)
// 이스케이프 ']' 건너 뛰기
while (to >= 0 && to > from && seq[to - 1] == '\\') {
to = seq.indexOf(']', to + 1)
}
if (to < 0) {
0
} else if (to == from + 1) {
0 // 빈 대괄호는 허용하지 않음
} else {
to - from + 1 // 대괄호의 시작 위치부터 ']'까지의 길이
}
}
}
)
val dot by literalToken(".")
val charToken by regexToken("[a-zA-Z0-9]")
val char by charToken map { CharItem(it.text) }
val item: Parser<RegexItem> by
char or
(anchorOperator map { AnchorItem(it.text) }) or
(dot asJust DotItem()) or
(escapedCharacter map { CharItem(it.text.substring(1)) }) or
(bracketContent map { BracketItem(it.text.substring(1, it.text.length - 1)) }) or
(skip(openParenSymbol) and
(parser(::rootParser)) and
skip(closeParenSymbol) map
{
val groupName = "${groupCounter++}"
GroupItem(it, groupName)
})
val term: Parser<RegexItem> by
(item and optional(postfixOperator)) map { (item, op) ->
when (op?.text) {
"+" -> PlusItem(item)
"*" -> StarItem(item)
"?" -> QuestionItem(item)
else -> item
}
}
val andThen: Parser<RegexItem> by
oneOrMore(term) map { items -> items.reduce { left, right -> AndThenItem(left, right) } }
val termWithAlternation: Parser<RegexItem> by
leftAssociative(
andThen,
alternationSymbol,
) { left, _, right -> AlternationItem(left, right) }
override val rootParser: Parser<RegexItem> by termWithAlternation
}
fun compileRegex(input: String): RegexItem {
return RegexParser().parseToEnd(input)
}