feat: enhance RegexParser with group handling and capture functionality
This commit is contained in:
parent
78472511e7
commit
5536b872b4
3 changed files with 95 additions and 48 deletions
|
@ -7,8 +7,11 @@ import com.github.h0tk3y.betterParse.lexer.literalToken
|
||||||
import com.github.h0tk3y.betterParse.lexer.regexToken
|
import com.github.h0tk3y.betterParse.lexer.regexToken
|
||||||
import com.github.h0tk3y.betterParse.lexer.token
|
import com.github.h0tk3y.betterParse.lexer.token
|
||||||
import com.github.h0tk3y.betterParse.parser.Parser
|
import com.github.h0tk3y.betterParse.parser.Parser
|
||||||
|
import com.github.h0tk3y.betterParse.grammar.parseToEnd
|
||||||
|
|
||||||
class RegexParser : Grammar<RegexItem>() {
|
class RegexParser : Grammar<RegexItem>() {
|
||||||
|
private var groupCounter = 0
|
||||||
|
|
||||||
// val bracketContent by regexToken("[^\\]]*")
|
// val bracketContent by regexToken("[^\\]]*")
|
||||||
val escapedCharacter by regexToken("\\\\[+*?.()|\\[\\]]")
|
val escapedCharacter by regexToken("\\\\[+*?.()|\\[\\]]")
|
||||||
val postfixOperator by regexToken("[+*?]")
|
val postfixOperator by regexToken("[+*?]")
|
||||||
|
@ -49,7 +52,13 @@ class RegexParser : Grammar<RegexItem>() {
|
||||||
(dot asJust DotItem()) or
|
(dot asJust DotItem()) or
|
||||||
(escapedCharacter map { CharItem(it.text.substring(1)) }) or
|
(escapedCharacter map { CharItem(it.text.substring(1)) }) or
|
||||||
(bracketContent map { BracketItem(it.text.substring(1, it.text.length - 1)) }) or
|
(bracketContent map { BracketItem(it.text.substring(1, it.text.length - 1)) }) or
|
||||||
(skip(openParenSymbol) and (parser(::rootParser)) and skip(closeParenSymbol))
|
(skip(openParenSymbol) and
|
||||||
|
(parser(::rootParser)) and
|
||||||
|
skip(closeParenSymbol) map
|
||||||
|
{
|
||||||
|
val groupName = "${groupCounter++}"
|
||||||
|
GroupItem(it, groupName)
|
||||||
|
})
|
||||||
|
|
||||||
val term: Parser<RegexItem> by
|
val term: Parser<RegexItem> by
|
||||||
(item and optional(postfixOperator)) map
|
(item and optional(postfixOperator)) map
|
||||||
|
@ -76,3 +85,7 @@ class RegexParser : Grammar<RegexItem>() {
|
||||||
|
|
||||||
override val rootParser: Parser<RegexItem> by termWithAlternation
|
override val rootParser: Parser<RegexItem> by termWithAlternation
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fun compileRegex(input: String): RegexItem {
|
||||||
|
return RegexParser().parseToEnd(input)
|
||||||
|
}
|
|
@ -3,6 +3,7 @@ package org.example
|
||||||
data class State(
|
data class State(
|
||||||
val matched: String,
|
val matched: String,
|
||||||
val remaining: String,
|
val remaining: String,
|
||||||
|
val captures: Map<String, String> = emptyMap()
|
||||||
)
|
)
|
||||||
|
|
||||||
data class AvailableState(val seq: Sequence<State> = emptySequence()) : Sequence<State> by seq {
|
data class AvailableState(val seq: Sequence<State> = emptySequence()) : Sequence<State> by seq {
|
||||||
|
@ -38,6 +39,11 @@ fun RegexItem.match(item: String): MatchResult {
|
||||||
return MatchResult(this.findMatch(item))
|
return MatchResult(this.findMatch(item))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fun RegexItem.test(item: String): Boolean {
|
||||||
|
// 매칭 결과가 성공인지 확인하는 헬퍼 함수
|
||||||
|
return this.match(item).isSuccess
|
||||||
|
}
|
||||||
|
|
||||||
class AndThenItem(val left: RegexItem, val right: RegexItem) : RegexItem {
|
class AndThenItem(val left: RegexItem, val right: RegexItem) : RegexItem {
|
||||||
override fun toString(): String = "${left}${right}"
|
override fun toString(): String = "${left}${right}"
|
||||||
override fun findMatch(str: String): AvailableState {
|
override fun findMatch(str: String): AvailableState {
|
||||||
|
@ -54,7 +60,11 @@ class AndThenItem(val left: RegexItem, val right: RegexItem) : RegexItem {
|
||||||
if (!rightMatch.isEmpty) {
|
if (!rightMatch.isEmpty) {
|
||||||
// If right match is successful, combine the matched parts
|
// If right match is successful, combine the matched parts
|
||||||
rightMatch.map { rightState ->
|
rightMatch.map { rightState ->
|
||||||
State(state.matched + rightState.matched, rightState.remaining)
|
State(
|
||||||
|
state.matched + rightState.matched,
|
||||||
|
rightState.remaining,
|
||||||
|
state.captures + rightState.captures
|
||||||
|
) // Combine captures
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// If right match fails, return an empty sequence
|
// If right match fails, return an empty sequence
|
||||||
|
@ -66,21 +76,21 @@ class AndThenItem(val left: RegexItem, val right: RegexItem) : RegexItem {
|
||||||
}
|
}
|
||||||
|
|
||||||
class CharItem(val value: String) : RegexItem {
|
class CharItem(val value: String) : RegexItem {
|
||||||
override fun toString(): String =
|
override fun toString(): String =
|
||||||
// escape 특수 문자를 처리하여 출력
|
// escape 특수 문자를 처리하여 출력
|
||||||
when (value) {
|
when (value) {
|
||||||
"+" -> "\\+"
|
"+" -> "\\+"
|
||||||
"*" -> "\\*"
|
"*" -> "\\*"
|
||||||
"?" -> "\\?"
|
"?" -> "\\?"
|
||||||
"." -> "\\."
|
"." -> "\\."
|
||||||
"(" -> "\\("
|
"(" -> "\\("
|
||||||
")" -> "\\)"
|
")" -> "\\)"
|
||||||
"|" -> "\\|"
|
"|" -> "\\|"
|
||||||
"[" -> "\\["
|
"[" -> "\\["
|
||||||
"]" -> "\\]"
|
"]" -> "\\]"
|
||||||
else -> value // 일반 문자 그대로 반환
|
else -> value // 일반 문자 그대로 반환
|
||||||
}
|
}
|
||||||
|
|
||||||
override fun findMatch(str: String): AvailableState {
|
override fun findMatch(str: String): AvailableState {
|
||||||
return when {
|
return when {
|
||||||
// 첫번째 문자가 value와 일치하는지 확인
|
// 첫번째 문자가 value와 일치하는지 확인
|
||||||
|
@ -94,7 +104,7 @@ class CharItem(val value: String) : RegexItem {
|
||||||
|
|
||||||
class BracketItem(val content: String) : RegexItem {
|
class BracketItem(val content: String) : RegexItem {
|
||||||
override fun toString(): String = "[$content]"
|
override fun toString(): String = "[$content]"
|
||||||
|
|
||||||
// TODO: 범위 처리
|
// TODO: 범위 처리
|
||||||
override fun findMatch(str: String): AvailableState {
|
override fun findMatch(str: String): AvailableState {
|
||||||
// 대괄호 안의 내용과 일치하는 첫 문자를 찾음
|
// 대괄호 안의 내용과 일치하는 첫 문자를 찾음
|
||||||
|
@ -107,6 +117,21 @@ class BracketItem(val content: String) : RegexItem {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class GroupItem(val item: RegexItem, val name: String) : RegexItem {
|
||||||
|
override fun toString(): String = "(${item})"
|
||||||
|
|
||||||
|
override fun findMatch(str: String): AvailableState {
|
||||||
|
// 그룹은 내부 아이템과 동일하게 매칭을 시도
|
||||||
|
val ret = item.findMatch(str)
|
||||||
|
// 매칭된 상태에 그룹 이름을 추가하여 반환
|
||||||
|
return AvailableState(
|
||||||
|
ret.seq.map { state ->
|
||||||
|
State(state.matched, state.remaining, state.captures + (name to state.matched))
|
||||||
|
}
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fun matchMany(
|
fun matchMany(
|
||||||
str: String,
|
str: String,
|
||||||
item: RegexItem,
|
item: RegexItem,
|
||||||
|
@ -191,7 +216,7 @@ class DotItem : RegexItem {
|
||||||
}
|
}
|
||||||
|
|
||||||
class AlternationItem(val left: RegexItem, val right: RegexItem) : RegexItem {
|
class AlternationItem(val left: RegexItem, val right: RegexItem) : RegexItem {
|
||||||
override fun toString(): String = "(${left}|${right})"
|
override fun toString(): String = "${left}|${right}"
|
||||||
override fun findMatch(str: String): AvailableState {
|
override fun findMatch(str: String): AvailableState {
|
||||||
// Alternation은 왼쪽 또는 오른쪽 항목 중 하나와 매칭되므로, 각각 시도해보고 성공하는 경우를 반환
|
// Alternation은 왼쪽 또는 오른쪽 항목 중 하나와 매칭되므로, 각각 시도해보고 성공하는 경우를 반환
|
||||||
val leftMatch = left.findMatch(str)
|
val leftMatch = left.findMatch(str)
|
||||||
|
|
|
@ -2,21 +2,19 @@ package org.example
|
||||||
|
|
||||||
import kotlin.test.Test
|
import kotlin.test.Test
|
||||||
import kotlin.test.assertEquals
|
import kotlin.test.assertEquals
|
||||||
import com.github.h0tk3y.betterParse.grammar.parseToEnd
|
|
||||||
|
|
||||||
class ParserTest {
|
class ParserTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
fun testSimpleCharacter() {
|
fun testSimpleCharacter() {
|
||||||
val input = "a"
|
val input = "a"
|
||||||
val result = RegexParser().parseToEnd(input)
|
val result = compileRegex(input)
|
||||||
assertEquals("a",result.toString())
|
assertEquals("a", result.toString())
|
||||||
}
|
}
|
||||||
@Test
|
@Test
|
||||||
fun testCharacterWithPlus() {
|
fun testCharacterWithPlus() {
|
||||||
val input = "a+"
|
val input = "a+"
|
||||||
val parser = RegexParser()
|
val result = compileRegex(input)
|
||||||
val result = parser.parseToEnd(input)
|
|
||||||
assertEquals("a+", result.toString())
|
assertEquals("a+", result.toString())
|
||||||
assert(result.match("a").isSuccess)
|
assert(result.match("a").isSuccess)
|
||||||
assert(result.match("aa").isSuccess)
|
assert(result.match("aa").isSuccess)
|
||||||
|
@ -25,8 +23,7 @@ class ParserTest {
|
||||||
@Test
|
@Test
|
||||||
fun testCharacterWithStar() {
|
fun testCharacterWithStar() {
|
||||||
val input = "b*"
|
val input = "b*"
|
||||||
val parser = RegexParser()
|
val result = compileRegex(input)
|
||||||
val result = parser.parseToEnd(input)
|
|
||||||
assertEquals("b*", result.toString())
|
assertEquals("b*", result.toString())
|
||||||
assert(result.match("").isSuccess)
|
assert(result.match("").isSuccess)
|
||||||
assert(result.match("b").isSuccess)
|
assert(result.match("b").isSuccess)
|
||||||
|
@ -36,8 +33,7 @@ class ParserTest {
|
||||||
@Test
|
@Test
|
||||||
fun testCharacterWithQuestion() {
|
fun testCharacterWithQuestion() {
|
||||||
val input = "c?"
|
val input = "c?"
|
||||||
val parser = RegexParser()
|
val result = compileRegex(input)
|
||||||
val result = parser.parseToEnd(input)
|
|
||||||
assertEquals("c?", result.toString())
|
assertEquals("c?", result.toString())
|
||||||
assert(result.match("").isSuccess)
|
assert(result.match("").isSuccess)
|
||||||
assert(result.match("c").isSuccess)
|
assert(result.match("c").isSuccess)
|
||||||
|
@ -46,8 +42,7 @@ class ParserTest {
|
||||||
@Test
|
@Test
|
||||||
fun testDot() {
|
fun testDot() {
|
||||||
val input = "."
|
val input = "."
|
||||||
val parser = RegexParser()
|
val result = compileRegex(input)
|
||||||
val result = parser.parseToEnd(input)
|
|
||||||
assertEquals(".", result.toString())
|
assertEquals(".", result.toString())
|
||||||
assert(result.match("a").isSuccess)
|
assert(result.match("a").isSuccess)
|
||||||
assert(result.match("1").isSuccess)
|
assert(result.match("1").isSuccess)
|
||||||
|
@ -57,9 +52,8 @@ class ParserTest {
|
||||||
@Test
|
@Test
|
||||||
fun testAlternation() {
|
fun testAlternation() {
|
||||||
val input = "a|b"
|
val input = "a|b"
|
||||||
val parser = RegexParser()
|
val result = compileRegex(input)
|
||||||
val result = parser.parseToEnd(input)
|
assertEquals("a|b", result.toString())
|
||||||
assertEquals("(a|b)", result.toString())
|
|
||||||
assert(result.match("a").isSuccess)
|
assert(result.match("a").isSuccess)
|
||||||
assert(result.match("b").isSuccess)
|
assert(result.match("b").isSuccess)
|
||||||
assert(!result.match("c").isSuccess)
|
assert(!result.match("c").isSuccess)
|
||||||
|
@ -67,9 +61,8 @@ class ParserTest {
|
||||||
@Test
|
@Test
|
||||||
fun testParentheses() {
|
fun testParentheses() {
|
||||||
val input = "(d)"
|
val input = "(d)"
|
||||||
val parser = RegexParser()
|
val result = compileRegex(input)
|
||||||
val result = parser.parseToEnd(input)
|
assertEquals("(d)", result.toString())
|
||||||
assertEquals("d", result.toString())
|
|
||||||
assert(result.match("d").isSuccess)
|
assert(result.match("d").isSuccess)
|
||||||
assert(!result.match("e").isSuccess)
|
assert(!result.match("e").isSuccess)
|
||||||
}
|
}
|
||||||
|
@ -77,8 +70,7 @@ class ParserTest {
|
||||||
@Test
|
@Test
|
||||||
fun testComplexExpression() {
|
fun testComplexExpression() {
|
||||||
val input = "a(b|c)*d+"
|
val input = "a(b|c)*d+"
|
||||||
val parser = RegexParser()
|
val result = compileRegex(input)
|
||||||
val result = parser.parseToEnd(input)
|
|
||||||
assertEquals("a(b|c)*d+", result.toString())
|
assertEquals("a(b|c)*d+", result.toString())
|
||||||
assert(result.match("ad").isSuccess)
|
assert(result.match("ad").isSuccess)
|
||||||
assert(!result.match("ab").isSuccess)
|
assert(!result.match("ab").isSuccess)
|
||||||
|
@ -90,8 +82,7 @@ class ParserTest {
|
||||||
@Test
|
@Test
|
||||||
fun testAndThen() {
|
fun testAndThen() {
|
||||||
val input = "ab"
|
val input = "ab"
|
||||||
val parser = RegexParser()
|
val result = compileRegex(input)
|
||||||
val result = parser.parseToEnd(input)
|
|
||||||
assertEquals("ab", result.toString())
|
assertEquals("ab", result.toString())
|
||||||
assert(result.match("ab").isSuccess)
|
assert(result.match("ab").isSuccess)
|
||||||
assert(!result.match("a").isSuccess)
|
assert(!result.match("a").isSuccess)
|
||||||
|
@ -99,9 +90,8 @@ class ParserTest {
|
||||||
}
|
}
|
||||||
@Test
|
@Test
|
||||||
fun testDotAndPlus() {
|
fun testDotAndPlus() {
|
||||||
val input = ".+a";
|
val input = ".+a"
|
||||||
val parser = RegexParser()
|
val result = compileRegex(input)
|
||||||
val result = parser.parseToEnd(input)
|
|
||||||
assertEquals(".+a", result.toString())
|
assertEquals(".+a", result.toString())
|
||||||
assert(!result.match("a").isSuccess)
|
assert(!result.match("a").isSuccess)
|
||||||
assert(result.match("ba").isSuccess)
|
assert(result.match("ba").isSuccess)
|
||||||
|
@ -110,8 +100,7 @@ class ParserTest {
|
||||||
@Test
|
@Test
|
||||||
fun testEscapedCharacter() {
|
fun testEscapedCharacter() {
|
||||||
val input = "\\+"
|
val input = "\\+"
|
||||||
val parser = RegexParser()
|
val result = compileRegex(input)
|
||||||
val result = parser.parseToEnd(input)
|
|
||||||
assertEquals("\\+", result.toString())
|
assertEquals("\\+", result.toString())
|
||||||
assert(result.match("+").isSuccess)
|
assert(result.match("+").isSuccess)
|
||||||
assert(!result.match("a").isSuccess)
|
assert(!result.match("a").isSuccess)
|
||||||
|
@ -119,12 +108,32 @@ class ParserTest {
|
||||||
@Test
|
@Test
|
||||||
fun testBracketContent() {
|
fun testBracketContent() {
|
||||||
val input = "[abc]"
|
val input = "[abc]"
|
||||||
val parser = RegexParser()
|
val result = compileRegex(input)
|
||||||
val result = parser.parseToEnd(input)
|
|
||||||
assertEquals("[abc]", result.toString())
|
assertEquals("[abc]", result.toString())
|
||||||
assert(result.match("a").isSuccess)
|
assert(result.match("a").isSuccess)
|
||||||
assert(result.match("b").isSuccess)
|
assert(result.match("b").isSuccess)
|
||||||
assert(result.match("c").isSuccess)
|
assert(result.match("c").isSuccess)
|
||||||
assert(!result.match("d").isSuccess)
|
assert(!result.match("d").isSuccess)
|
||||||
}
|
}
|
||||||
}
|
@Test
|
||||||
|
fun testNestedGroups() {
|
||||||
|
val input = "(a(b|c)d)+"
|
||||||
|
val result = compileRegex(input)
|
||||||
|
assertEquals("(a(b|c)d)+", result.toString())
|
||||||
|
assert(!result.match("ad").isSuccess)
|
||||||
|
assert(result.match("abd").isSuccess)
|
||||||
|
assert(result.match("acd").isSuccess)
|
||||||
|
assert(!result.match("a").isSuccess)
|
||||||
|
}
|
||||||
|
@Test
|
||||||
|
fun testCaptureGroups() {
|
||||||
|
val input = "(a)(b)"
|
||||||
|
val result = compileRegex(input)
|
||||||
|
assertEquals("(a)(b)", result.toString())
|
||||||
|
val matchResult = result.match("ab")
|
||||||
|
assert(matchResult.isSuccess)
|
||||||
|
val captures = matchResult.available.first();
|
||||||
|
assertEquals("a", captures.captures.get("0"))
|
||||||
|
assertEquals("b", captures.captures.get("1"))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Add table
Reference in a new issue