feat: enhance RegexParser with group handling and capture functionality

This commit is contained in:
monoid 2025-06-29 14:15:21 +09:00
parent 78472511e7
commit 5536b872b4
3 changed files with 95 additions and 48 deletions

View file

@ -7,8 +7,11 @@ import com.github.h0tk3y.betterParse.lexer.literalToken
import com.github.h0tk3y.betterParse.lexer.regexToken import com.github.h0tk3y.betterParse.lexer.regexToken
import com.github.h0tk3y.betterParse.lexer.token import com.github.h0tk3y.betterParse.lexer.token
import com.github.h0tk3y.betterParse.parser.Parser import com.github.h0tk3y.betterParse.parser.Parser
import com.github.h0tk3y.betterParse.grammar.parseToEnd
class RegexParser : Grammar<RegexItem>() { class RegexParser : Grammar<RegexItem>() {
private var groupCounter = 0
// val bracketContent by regexToken("[^\\]]*") // val bracketContent by regexToken("[^\\]]*")
val escapedCharacter by regexToken("\\\\[+*?.()|\\[\\]]") val escapedCharacter by regexToken("\\\\[+*?.()|\\[\\]]")
val postfixOperator by regexToken("[+*?]") val postfixOperator by regexToken("[+*?]")
@ -49,7 +52,13 @@ class RegexParser : Grammar<RegexItem>() {
(dot asJust DotItem()) or (dot asJust DotItem()) or
(escapedCharacter map { CharItem(it.text.substring(1)) }) or (escapedCharacter map { CharItem(it.text.substring(1)) }) or
(bracketContent map { BracketItem(it.text.substring(1, it.text.length - 1)) }) or (bracketContent map { BracketItem(it.text.substring(1, it.text.length - 1)) }) or
(skip(openParenSymbol) and (parser(::rootParser)) and skip(closeParenSymbol)) (skip(openParenSymbol) and
(parser(::rootParser)) and
skip(closeParenSymbol) map
{
val groupName = "${groupCounter++}"
GroupItem(it, groupName)
})
val term: Parser<RegexItem> by val term: Parser<RegexItem> by
(item and optional(postfixOperator)) map (item and optional(postfixOperator)) map
@ -76,3 +85,7 @@ class RegexParser : Grammar<RegexItem>() {
override val rootParser: Parser<RegexItem> by termWithAlternation override val rootParser: Parser<RegexItem> by termWithAlternation
} }
fun compileRegex(input: String): RegexItem {
return RegexParser().parseToEnd(input)
}

View file

@ -3,6 +3,7 @@ package org.example
data class State( data class State(
val matched: String, val matched: String,
val remaining: String, val remaining: String,
val captures: Map<String, String> = emptyMap()
) )
data class AvailableState(val seq: Sequence<State> = emptySequence()) : Sequence<State> by seq { data class AvailableState(val seq: Sequence<State> = emptySequence()) : Sequence<State> by seq {
@ -38,6 +39,11 @@ fun RegexItem.match(item: String): MatchResult {
return MatchResult(this.findMatch(item)) return MatchResult(this.findMatch(item))
} }
fun RegexItem.test(item: String): Boolean {
// 매칭 결과가 성공인지 확인하는 헬퍼 함수
return this.match(item).isSuccess
}
class AndThenItem(val left: RegexItem, val right: RegexItem) : RegexItem { class AndThenItem(val left: RegexItem, val right: RegexItem) : RegexItem {
override fun toString(): String = "${left}${right}" override fun toString(): String = "${left}${right}"
override fun findMatch(str: String): AvailableState { override fun findMatch(str: String): AvailableState {
@ -54,7 +60,11 @@ class AndThenItem(val left: RegexItem, val right: RegexItem) : RegexItem {
if (!rightMatch.isEmpty) { if (!rightMatch.isEmpty) {
// If right match is successful, combine the matched parts // If right match is successful, combine the matched parts
rightMatch.map { rightState -> rightMatch.map { rightState ->
State(state.matched + rightState.matched, rightState.remaining) State(
state.matched + rightState.matched,
rightState.remaining,
state.captures + rightState.captures
) // Combine captures
} }
} else { } else {
// If right match fails, return an empty sequence // If right match fails, return an empty sequence
@ -107,6 +117,21 @@ class BracketItem(val content: String) : RegexItem {
} }
} }
class GroupItem(val item: RegexItem, val name: String) : RegexItem {
override fun toString(): String = "(${item})"
override fun findMatch(str: String): AvailableState {
// 그룹은 내부 아이템과 동일하게 매칭을 시도
val ret = item.findMatch(str)
// 매칭된 상태에 그룹 이름을 추가하여 반환
return AvailableState(
ret.seq.map { state ->
State(state.matched, state.remaining, state.captures + (name to state.matched))
}
)
}
}
fun matchMany( fun matchMany(
str: String, str: String,
item: RegexItem, item: RegexItem,
@ -191,7 +216,7 @@ class DotItem : RegexItem {
} }
class AlternationItem(val left: RegexItem, val right: RegexItem) : RegexItem { class AlternationItem(val left: RegexItem, val right: RegexItem) : RegexItem {
override fun toString(): String = "(${left}|${right})" override fun toString(): String = "${left}|${right}"
override fun findMatch(str: String): AvailableState { override fun findMatch(str: String): AvailableState {
// Alternation은 왼쪽 또는 오른쪽 항목 중 하나와 매칭되므로, 각각 시도해보고 성공하는 경우를 반환 // Alternation은 왼쪽 또는 오른쪽 항목 중 하나와 매칭되므로, 각각 시도해보고 성공하는 경우를 반환
val leftMatch = left.findMatch(str) val leftMatch = left.findMatch(str)

View file

@ -2,21 +2,19 @@ package org.example
import kotlin.test.Test import kotlin.test.Test
import kotlin.test.assertEquals import kotlin.test.assertEquals
import com.github.h0tk3y.betterParse.grammar.parseToEnd
class ParserTest { class ParserTest {
@Test @Test
fun testSimpleCharacter() { fun testSimpleCharacter() {
val input = "a" val input = "a"
val result = RegexParser().parseToEnd(input) val result = compileRegex(input)
assertEquals("a",result.toString()) assertEquals("a", result.toString())
} }
@Test @Test
fun testCharacterWithPlus() { fun testCharacterWithPlus() {
val input = "a+" val input = "a+"
val parser = RegexParser() val result = compileRegex(input)
val result = parser.parseToEnd(input)
assertEquals("a+", result.toString()) assertEquals("a+", result.toString())
assert(result.match("a").isSuccess) assert(result.match("a").isSuccess)
assert(result.match("aa").isSuccess) assert(result.match("aa").isSuccess)
@ -25,8 +23,7 @@ class ParserTest {
@Test @Test
fun testCharacterWithStar() { fun testCharacterWithStar() {
val input = "b*" val input = "b*"
val parser = RegexParser() val result = compileRegex(input)
val result = parser.parseToEnd(input)
assertEquals("b*", result.toString()) assertEquals("b*", result.toString())
assert(result.match("").isSuccess) assert(result.match("").isSuccess)
assert(result.match("b").isSuccess) assert(result.match("b").isSuccess)
@ -36,8 +33,7 @@ class ParserTest {
@Test @Test
fun testCharacterWithQuestion() { fun testCharacterWithQuestion() {
val input = "c?" val input = "c?"
val parser = RegexParser() val result = compileRegex(input)
val result = parser.parseToEnd(input)
assertEquals("c?", result.toString()) assertEquals("c?", result.toString())
assert(result.match("").isSuccess) assert(result.match("").isSuccess)
assert(result.match("c").isSuccess) assert(result.match("c").isSuccess)
@ -46,8 +42,7 @@ class ParserTest {
@Test @Test
fun testDot() { fun testDot() {
val input = "." val input = "."
val parser = RegexParser() val result = compileRegex(input)
val result = parser.parseToEnd(input)
assertEquals(".", result.toString()) assertEquals(".", result.toString())
assert(result.match("a").isSuccess) assert(result.match("a").isSuccess)
assert(result.match("1").isSuccess) assert(result.match("1").isSuccess)
@ -57,9 +52,8 @@ class ParserTest {
@Test @Test
fun testAlternation() { fun testAlternation() {
val input = "a|b" val input = "a|b"
val parser = RegexParser() val result = compileRegex(input)
val result = parser.parseToEnd(input) assertEquals("a|b", result.toString())
assertEquals("(a|b)", result.toString())
assert(result.match("a").isSuccess) assert(result.match("a").isSuccess)
assert(result.match("b").isSuccess) assert(result.match("b").isSuccess)
assert(!result.match("c").isSuccess) assert(!result.match("c").isSuccess)
@ -67,9 +61,8 @@ class ParserTest {
@Test @Test
fun testParentheses() { fun testParentheses() {
val input = "(d)" val input = "(d)"
val parser = RegexParser() val result = compileRegex(input)
val result = parser.parseToEnd(input) assertEquals("(d)", result.toString())
assertEquals("d", result.toString())
assert(result.match("d").isSuccess) assert(result.match("d").isSuccess)
assert(!result.match("e").isSuccess) assert(!result.match("e").isSuccess)
} }
@ -77,8 +70,7 @@ class ParserTest {
@Test @Test
fun testComplexExpression() { fun testComplexExpression() {
val input = "a(b|c)*d+" val input = "a(b|c)*d+"
val parser = RegexParser() val result = compileRegex(input)
val result = parser.parseToEnd(input)
assertEquals("a(b|c)*d+", result.toString()) assertEquals("a(b|c)*d+", result.toString())
assert(result.match("ad").isSuccess) assert(result.match("ad").isSuccess)
assert(!result.match("ab").isSuccess) assert(!result.match("ab").isSuccess)
@ -90,8 +82,7 @@ class ParserTest {
@Test @Test
fun testAndThen() { fun testAndThen() {
val input = "ab" val input = "ab"
val parser = RegexParser() val result = compileRegex(input)
val result = parser.parseToEnd(input)
assertEquals("ab", result.toString()) assertEquals("ab", result.toString())
assert(result.match("ab").isSuccess) assert(result.match("ab").isSuccess)
assert(!result.match("a").isSuccess) assert(!result.match("a").isSuccess)
@ -99,9 +90,8 @@ class ParserTest {
} }
@Test @Test
fun testDotAndPlus() { fun testDotAndPlus() {
val input = ".+a"; val input = ".+a"
val parser = RegexParser() val result = compileRegex(input)
val result = parser.parseToEnd(input)
assertEquals(".+a", result.toString()) assertEquals(".+a", result.toString())
assert(!result.match("a").isSuccess) assert(!result.match("a").isSuccess)
assert(result.match("ba").isSuccess) assert(result.match("ba").isSuccess)
@ -110,8 +100,7 @@ class ParserTest {
@Test @Test
fun testEscapedCharacter() { fun testEscapedCharacter() {
val input = "\\+" val input = "\\+"
val parser = RegexParser() val result = compileRegex(input)
val result = parser.parseToEnd(input)
assertEquals("\\+", result.toString()) assertEquals("\\+", result.toString())
assert(result.match("+").isSuccess) assert(result.match("+").isSuccess)
assert(!result.match("a").isSuccess) assert(!result.match("a").isSuccess)
@ -119,12 +108,32 @@ class ParserTest {
@Test @Test
fun testBracketContent() { fun testBracketContent() {
val input = "[abc]" val input = "[abc]"
val parser = RegexParser() val result = compileRegex(input)
val result = parser.parseToEnd(input)
assertEquals("[abc]", result.toString()) assertEquals("[abc]", result.toString())
assert(result.match("a").isSuccess) assert(result.match("a").isSuccess)
assert(result.match("b").isSuccess) assert(result.match("b").isSuccess)
assert(result.match("c").isSuccess) assert(result.match("c").isSuccess)
assert(!result.match("d").isSuccess) assert(!result.match("d").isSuccess)
} }
@Test
fun testNestedGroups() {
val input = "(a(b|c)d)+"
val result = compileRegex(input)
assertEquals("(a(b|c)d)+", result.toString())
assert(!result.match("ad").isSuccess)
assert(result.match("abd").isSuccess)
assert(result.match("acd").isSuccess)
assert(!result.match("a").isSuccess)
}
@Test
fun testCaptureGroups() {
val input = "(a)(b)"
val result = compileRegex(input)
assertEquals("(a)(b)", result.toString())
val matchResult = result.match("ab")
assert(matchResult.isSuccess)
val captures = matchResult.available.first();
assertEquals("a", captures.captures.get("0"))
assertEquals("b", captures.captures.get("1"))
}
} }