86 lines
No EOL
3.3 KiB
Kotlin
86 lines
No EOL
3.3 KiB
Kotlin
package org.example
|
|
|
|
import com.github.h0tk3y.betterParse.combinators.*
|
|
import com.github.h0tk3y.betterParse.grammar.Grammar
|
|
import com.github.h0tk3y.betterParse.grammar.parseToEnd
|
|
import com.github.h0tk3y.betterParse.grammar.parser
|
|
import com.github.h0tk3y.betterParse.lexer.literalToken
|
|
import com.github.h0tk3y.betterParse.lexer.regexToken
|
|
import com.github.h0tk3y.betterParse.lexer.token
|
|
import com.github.h0tk3y.betterParse.parser.Parser
|
|
|
|
class RegexParser : Grammar<RegexItem>() {
|
|
private var groupCounter = 0
|
|
|
|
val escapedCharacter by regexToken("\\\\[+*?.$^()|\\[\\]]")
|
|
val postfixOperator by regexToken("[+*?]")
|
|
val anchorOperator by regexToken("[$^]")
|
|
val alternationSymbol by literalToken("|")
|
|
val openParenSymbol by literalToken("(")
|
|
val closeParenSymbol by literalToken(")")
|
|
val bracketContent by
|
|
token(
|
|
name = "bracketContent",
|
|
matcher = { seq, from ->
|
|
if (seq[from] != '[') {
|
|
0 // 대괄호로 시작하지 않으면 매칭 실패
|
|
} else {
|
|
// 대괄호의 시작 위치에서부터 ']'를 찾음
|
|
var to = seq.indexOf(']', from)
|
|
// 이스케이프 ']' 건너 뛰기
|
|
while (to >= 0 && to > from && seq[to - 1] == '\\') {
|
|
to = seq.indexOf(']', to + 1)
|
|
}
|
|
if (to < 0) {
|
|
0
|
|
} else if (to == from + 1) {
|
|
0 // 빈 대괄호는 허용하지 않음
|
|
} else {
|
|
to - from + 1 // 대괄호의 시작 위치부터 ']'까지의 길이
|
|
}
|
|
}
|
|
}
|
|
)
|
|
|
|
val dot by literalToken(".")
|
|
val charToken by regexToken("[a-zA-Z0-9]")
|
|
|
|
val char by charToken map { CharItem(it.text) }
|
|
|
|
val item: Parser<RegexItem> by
|
|
char or
|
|
(anchorOperator map { AnchorItem(it.text) }) or
|
|
(dot asJust DotItem()) or
|
|
(escapedCharacter map { CharItem(it.text.substring(1)) }) or
|
|
(bracketContent map { BracketItem(it.text.substring(1, it.text.length - 1)) }) or
|
|
(skip(openParenSymbol) and
|
|
(parser(::rootParser)) and
|
|
skip(closeParenSymbol) map
|
|
{
|
|
val groupName = "${groupCounter++}"
|
|
GroupItem(it, groupName)
|
|
})
|
|
|
|
val term: Parser<RegexItem> by
|
|
(item and optional(postfixOperator)) map { (item, op) ->
|
|
when (op?.text) {
|
|
"+" -> PlusItem(item)
|
|
"*" -> StarItem(item)
|
|
"?" -> QuestionItem(item)
|
|
else -> item
|
|
}
|
|
}
|
|
val andThen: Parser<RegexItem> by
|
|
oneOrMore(term) map { items -> items.reduce { left, right -> AndThenItem(left, right) } }
|
|
val termWithAlternation: Parser<RegexItem> by
|
|
leftAssociative(
|
|
andThen,
|
|
alternationSymbol,
|
|
) { left, _, right -> AlternationItem(left, right) }
|
|
|
|
override val rootParser: Parser<RegexItem> by termWithAlternation
|
|
}
|
|
|
|
fun compileRegex(input: String): RegexItem {
|
|
return RegexParser().parseToEnd(input)
|
|
} |