feat: enhance RegexParser with group handling and capture functionality
This commit is contained in:
parent
78472511e7
commit
5536b872b4
3 changed files with 95 additions and 48 deletions
|
@ -7,8 +7,11 @@ import com.github.h0tk3y.betterParse.lexer.literalToken
|
|||
import com.github.h0tk3y.betterParse.lexer.regexToken
|
||||
import com.github.h0tk3y.betterParse.lexer.token
|
||||
import com.github.h0tk3y.betterParse.parser.Parser
|
||||
import com.github.h0tk3y.betterParse.grammar.parseToEnd
|
||||
|
||||
class RegexParser : Grammar<RegexItem>() {
|
||||
private var groupCounter = 0
|
||||
|
||||
// val bracketContent by regexToken("[^\\]]*")
|
||||
val escapedCharacter by regexToken("\\\\[+*?.()|\\[\\]]")
|
||||
val postfixOperator by regexToken("[+*?]")
|
||||
|
@ -49,7 +52,13 @@ class RegexParser : Grammar<RegexItem>() {
|
|||
(dot asJust DotItem()) or
|
||||
(escapedCharacter map { CharItem(it.text.substring(1)) }) or
|
||||
(bracketContent map { BracketItem(it.text.substring(1, it.text.length - 1)) }) or
|
||||
(skip(openParenSymbol) and (parser(::rootParser)) and skip(closeParenSymbol))
|
||||
(skip(openParenSymbol) and
|
||||
(parser(::rootParser)) and
|
||||
skip(closeParenSymbol) map
|
||||
{
|
||||
val groupName = "${groupCounter++}"
|
||||
GroupItem(it, groupName)
|
||||
})
|
||||
|
||||
val term: Parser<RegexItem> by
|
||||
(item and optional(postfixOperator)) map
|
||||
|
@ -76,3 +85,7 @@ class RegexParser : Grammar<RegexItem>() {
|
|||
|
||||
override val rootParser: Parser<RegexItem> by termWithAlternation
|
||||
}
|
||||
|
||||
fun compileRegex(input: String): RegexItem {
|
||||
return RegexParser().parseToEnd(input)
|
||||
}
|
|
@ -3,6 +3,7 @@ package org.example
|
|||
data class State(
|
||||
val matched: String,
|
||||
val remaining: String,
|
||||
val captures: Map<String, String> = emptyMap()
|
||||
)
|
||||
|
||||
data class AvailableState(val seq: Sequence<State> = emptySequence()) : Sequence<State> by seq {
|
||||
|
@ -38,6 +39,11 @@ fun RegexItem.match(item: String): MatchResult {
|
|||
return MatchResult(this.findMatch(item))
|
||||
}
|
||||
|
||||
fun RegexItem.test(item: String): Boolean {
|
||||
// 매칭 결과가 성공인지 확인하는 헬퍼 함수
|
||||
return this.match(item).isSuccess
|
||||
}
|
||||
|
||||
class AndThenItem(val left: RegexItem, val right: RegexItem) : RegexItem {
|
||||
override fun toString(): String = "${left}${right}"
|
||||
override fun findMatch(str: String): AvailableState {
|
||||
|
@ -54,7 +60,11 @@ class AndThenItem(val left: RegexItem, val right: RegexItem) : RegexItem {
|
|||
if (!rightMatch.isEmpty) {
|
||||
// If right match is successful, combine the matched parts
|
||||
rightMatch.map { rightState ->
|
||||
State(state.matched + rightState.matched, rightState.remaining)
|
||||
State(
|
||||
state.matched + rightState.matched,
|
||||
rightState.remaining,
|
||||
state.captures + rightState.captures
|
||||
) // Combine captures
|
||||
}
|
||||
} else {
|
||||
// If right match fails, return an empty sequence
|
||||
|
@ -107,6 +117,21 @@ class BracketItem(val content: String) : RegexItem {
|
|||
}
|
||||
}
|
||||
|
||||
class GroupItem(val item: RegexItem, val name: String) : RegexItem {
|
||||
override fun toString(): String = "(${item})"
|
||||
|
||||
override fun findMatch(str: String): AvailableState {
|
||||
// 그룹은 내부 아이템과 동일하게 매칭을 시도
|
||||
val ret = item.findMatch(str)
|
||||
// 매칭된 상태에 그룹 이름을 추가하여 반환
|
||||
return AvailableState(
|
||||
ret.seq.map { state ->
|
||||
State(state.matched, state.remaining, state.captures + (name to state.matched))
|
||||
}
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
fun matchMany(
|
||||
str: String,
|
||||
item: RegexItem,
|
||||
|
@ -191,7 +216,7 @@ class DotItem : RegexItem {
|
|||
}
|
||||
|
||||
class AlternationItem(val left: RegexItem, val right: RegexItem) : RegexItem {
|
||||
override fun toString(): String = "(${left}|${right})"
|
||||
override fun toString(): String = "${left}|${right}"
|
||||
override fun findMatch(str: String): AvailableState {
|
||||
// Alternation은 왼쪽 또는 오른쪽 항목 중 하나와 매칭되므로, 각각 시도해보고 성공하는 경우를 반환
|
||||
val leftMatch = left.findMatch(str)
|
||||
|
|
|
@ -2,21 +2,19 @@ package org.example
|
|||
|
||||
import kotlin.test.Test
|
||||
import kotlin.test.assertEquals
|
||||
import com.github.h0tk3y.betterParse.grammar.parseToEnd
|
||||
|
||||
class ParserTest {
|
||||
|
||||
@Test
|
||||
fun testSimpleCharacter() {
|
||||
val input = "a"
|
||||
val result = RegexParser().parseToEnd(input)
|
||||
assertEquals("a",result.toString())
|
||||
val result = compileRegex(input)
|
||||
assertEquals("a", result.toString())
|
||||
}
|
||||
@Test
|
||||
fun testCharacterWithPlus() {
|
||||
val input = "a+"
|
||||
val parser = RegexParser()
|
||||
val result = parser.parseToEnd(input)
|
||||
val result = compileRegex(input)
|
||||
assertEquals("a+", result.toString())
|
||||
assert(result.match("a").isSuccess)
|
||||
assert(result.match("aa").isSuccess)
|
||||
|
@ -25,8 +23,7 @@ class ParserTest {
|
|||
@Test
|
||||
fun testCharacterWithStar() {
|
||||
val input = "b*"
|
||||
val parser = RegexParser()
|
||||
val result = parser.parseToEnd(input)
|
||||
val result = compileRegex(input)
|
||||
assertEquals("b*", result.toString())
|
||||
assert(result.match("").isSuccess)
|
||||
assert(result.match("b").isSuccess)
|
||||
|
@ -36,8 +33,7 @@ class ParserTest {
|
|||
@Test
|
||||
fun testCharacterWithQuestion() {
|
||||
val input = "c?"
|
||||
val parser = RegexParser()
|
||||
val result = parser.parseToEnd(input)
|
||||
val result = compileRegex(input)
|
||||
assertEquals("c?", result.toString())
|
||||
assert(result.match("").isSuccess)
|
||||
assert(result.match("c").isSuccess)
|
||||
|
@ -46,8 +42,7 @@ class ParserTest {
|
|||
@Test
|
||||
fun testDot() {
|
||||
val input = "."
|
||||
val parser = RegexParser()
|
||||
val result = parser.parseToEnd(input)
|
||||
val result = compileRegex(input)
|
||||
assertEquals(".", result.toString())
|
||||
assert(result.match("a").isSuccess)
|
||||
assert(result.match("1").isSuccess)
|
||||
|
@ -57,9 +52,8 @@ class ParserTest {
|
|||
@Test
|
||||
fun testAlternation() {
|
||||
val input = "a|b"
|
||||
val parser = RegexParser()
|
||||
val result = parser.parseToEnd(input)
|
||||
assertEquals("(a|b)", result.toString())
|
||||
val result = compileRegex(input)
|
||||
assertEquals("a|b", result.toString())
|
||||
assert(result.match("a").isSuccess)
|
||||
assert(result.match("b").isSuccess)
|
||||
assert(!result.match("c").isSuccess)
|
||||
|
@ -67,9 +61,8 @@ class ParserTest {
|
|||
@Test
|
||||
fun testParentheses() {
|
||||
val input = "(d)"
|
||||
val parser = RegexParser()
|
||||
val result = parser.parseToEnd(input)
|
||||
assertEquals("d", result.toString())
|
||||
val result = compileRegex(input)
|
||||
assertEquals("(d)", result.toString())
|
||||
assert(result.match("d").isSuccess)
|
||||
assert(!result.match("e").isSuccess)
|
||||
}
|
||||
|
@ -77,8 +70,7 @@ class ParserTest {
|
|||
@Test
|
||||
fun testComplexExpression() {
|
||||
val input = "a(b|c)*d+"
|
||||
val parser = RegexParser()
|
||||
val result = parser.parseToEnd(input)
|
||||
val result = compileRegex(input)
|
||||
assertEquals("a(b|c)*d+", result.toString())
|
||||
assert(result.match("ad").isSuccess)
|
||||
assert(!result.match("ab").isSuccess)
|
||||
|
@ -90,8 +82,7 @@ class ParserTest {
|
|||
@Test
|
||||
fun testAndThen() {
|
||||
val input = "ab"
|
||||
val parser = RegexParser()
|
||||
val result = parser.parseToEnd(input)
|
||||
val result = compileRegex(input)
|
||||
assertEquals("ab", result.toString())
|
||||
assert(result.match("ab").isSuccess)
|
||||
assert(!result.match("a").isSuccess)
|
||||
|
@ -99,9 +90,8 @@ class ParserTest {
|
|||
}
|
||||
@Test
|
||||
fun testDotAndPlus() {
|
||||
val input = ".+a";
|
||||
val parser = RegexParser()
|
||||
val result = parser.parseToEnd(input)
|
||||
val input = ".+a"
|
||||
val result = compileRegex(input)
|
||||
assertEquals(".+a", result.toString())
|
||||
assert(!result.match("a").isSuccess)
|
||||
assert(result.match("ba").isSuccess)
|
||||
|
@ -110,8 +100,7 @@ class ParserTest {
|
|||
@Test
|
||||
fun testEscapedCharacter() {
|
||||
val input = "\\+"
|
||||
val parser = RegexParser()
|
||||
val result = parser.parseToEnd(input)
|
||||
val result = compileRegex(input)
|
||||
assertEquals("\\+", result.toString())
|
||||
assert(result.match("+").isSuccess)
|
||||
assert(!result.match("a").isSuccess)
|
||||
|
@ -119,12 +108,32 @@ class ParserTest {
|
|||
@Test
|
||||
fun testBracketContent() {
|
||||
val input = "[abc]"
|
||||
val parser = RegexParser()
|
||||
val result = parser.parseToEnd(input)
|
||||
val result = compileRegex(input)
|
||||
assertEquals("[abc]", result.toString())
|
||||
assert(result.match("a").isSuccess)
|
||||
assert(result.match("b").isSuccess)
|
||||
assert(result.match("c").isSuccess)
|
||||
assert(!result.match("d").isSuccess)
|
||||
}
|
||||
@Test
|
||||
fun testNestedGroups() {
|
||||
val input = "(a(b|c)d)+"
|
||||
val result = compileRegex(input)
|
||||
assertEquals("(a(b|c)d)+", result.toString())
|
||||
assert(!result.match("ad").isSuccess)
|
||||
assert(result.match("abd").isSuccess)
|
||||
assert(result.match("acd").isSuccess)
|
||||
assert(!result.match("a").isSuccess)
|
||||
}
|
||||
@Test
|
||||
fun testCaptureGroups() {
|
||||
val input = "(a)(b)"
|
||||
val result = compileRegex(input)
|
||||
assertEquals("(a)(b)", result.toString())
|
||||
val matchResult = result.match("ab")
|
||||
assert(matchResult.isSuccess)
|
||||
val captures = matchResult.available.first();
|
||||
assertEquals("a", captures.captures.get("0"))
|
||||
assertEquals("b", captures.captures.get("1"))
|
||||
}
|
||||
}
|
Loading…
Add table
Reference in a new issue