refactor: update State data class to use start and end indices for improved matching

This commit is contained in:
monoid 2025-06-29 14:54:03 +09:00
parent 45150236c3
commit f143aba629
2 changed files with 49 additions and 43 deletions

View file

@ -1,10 +1,16 @@
package org.example
data class State(
val matched: String,
val remaining: String,
val input: String,
val startIndex: Int,
val endIndex: Int,
val captures: Map<String, String> = emptyMap()
)
) {
val matched: String
get() = input.substring(startIndex, endIndex)
val remaining: String
get() = input.substring(endIndex)
}
data class AvailableState(val seq: Sequence<State> = emptySequence()) : Sequence<State> by seq {
val isEmpty: Boolean
@ -31,12 +37,12 @@ class MatchResult(val available: AvailableState) {
// 재귀 하향 분석기.
interface RegexItem {
override fun toString(): String
fun findMatch(str: String): AvailableState
fun findMatch(str: String, position: Int = 0): AvailableState
}
fun RegexItem.match(item: String): MatchResult {
// 기본 매칭 함수. AvailableState를 MatchResult로 변환
return MatchResult(this.findMatch(item))
return MatchResult(this.findMatch(item, 0))
}
fun RegexItem.test(item: String): Boolean {
@ -46,8 +52,8 @@ fun RegexItem.test(item: String): Boolean {
class AndThenItem(val left: RegexItem, val right: RegexItem) : RegexItem {
override fun toString(): String = "${left}${right}"
override fun findMatch(str: String): AvailableState {
val leftMatch = left.findMatch(str)
override fun findMatch(str: String, position: Int): AvailableState {
val leftMatch = left.findMatch(str, position)
if (leftMatch.isEmpty) {
return AvailableState() // If left match fails, return empty sequence
}
@ -55,15 +61,16 @@ class AndThenItem(val left: RegexItem, val right: RegexItem) : RegexItem {
// from the left match.
return AvailableState(
leftMatch.flatMap { state ->
val rightMatch = right.findMatch(state.remaining)
leftMatch.flatMap { leftState ->
val rightMatch = right.findMatch(str, leftState.endIndex)
if (!rightMatch.isEmpty) {
// If right match is successful, combine the matched parts
rightMatch.map { rightState ->
State(
state.matched + rightState.matched,
rightState.remaining,
state.captures + rightState.captures
str,
leftState.startIndex,
rightState.endIndex,
leftState.captures + rightState.captures
) // Combine captures
}
} else {
@ -91,11 +98,11 @@ class CharItem(val value: String) : RegexItem {
else -> value // 일반 문자 그대로 반환
}
override fun findMatch(str: String): AvailableState {
override fun findMatch(str: String, position: Int): AvailableState {
return when {
// 첫번째 문자가 value와 일치하는지 확인
str.isNotEmpty() && str[0].toString() == value -> {
AvailableState(sequenceOf(State(value, str.substring(1))))
position < str.length && str[position].toString() == value -> {
AvailableState(sequenceOf(State(str, position, position + 1)))
}
else -> AvailableState()
}
@ -106,11 +113,11 @@ class BracketItem(val content: String) : RegexItem {
override fun toString(): String = "[$content]"
// TODO: 범위 처리
override fun findMatch(str: String): AvailableState {
override fun findMatch(str: String, position: Int): AvailableState {
// 대괄호 안의 내용과 일치하는 첫 문자를 찾음
return when {
str.isNotEmpty() && content.contains(str[0]) -> {
AvailableState(sequenceOf(State(str[0].toString(), str.substring(1))))
position < str.length && content.contains(str[position]) -> {
AvailableState(sequenceOf(State(str, position, position + 1)))
}
else -> AvailableState()
}
@ -120,13 +127,13 @@ class BracketItem(val content: String) : RegexItem {
class GroupItem(val item: RegexItem, val name: String) : RegexItem {
override fun toString(): String = "(${item})"
override fun findMatch(str: String): AvailableState {
override fun findMatch(str: String, position: Int): AvailableState {
// 그룹은 내부 아이템과 동일하게 매칭을 시도
val ret = item.findMatch(str)
val ret = item.findMatch(str, position)
// 매칭된 상태에 그룹 이름을 추가하여 반환
return AvailableState(
ret.seq.map { state ->
State(state.matched, state.remaining, state.captures + (name to state.matched))
state.copy(captures = state.captures + (name to state.matched))
}
)
}
@ -135,15 +142,16 @@ class GroupItem(val item: RegexItem, val name: String) : RegexItem {
fun matchMany(
str: String,
item: RegexItem,
position: Int
): Sequence<State> {
// 욕심쟁이 매칭을 위한 헬퍼 함수
return item.findMatch(str).seq.flatMap { state ->
if (state.remaining.isEmpty()) {
return item.findMatch(str, position).seq.flatMap { state ->
if (state.endIndex == str.length) {
sequenceOf(state) // If remaining is empty, return the matched state
} else {
// Otherwise, continue matching with the remaining string
matchMany(state.remaining, item).map { nextState ->
State(state.matched + nextState.matched, nextState.remaining)
matchMany(str, item, state.endIndex).map { nextState ->
State(str, state.startIndex, nextState.endIndex)
} + sequenceOf(state) // Include the current state as well
}
}
@ -167,24 +175,24 @@ fun matchMany(
class PlusItem(val item: RegexItem) : RegexItem {
override fun toString(): String = "${item}+"
override fun findMatch(str: String): AvailableState {
return AvailableState(matchMany(str, item))
override fun findMatch(str: String, position: Int): AvailableState {
return AvailableState(matchMany(str, item, position))
}
}
class StarItem(val item: RegexItem) : RegexItem {
override fun toString(): String = "${item}*"
override fun findMatch(str: String): AvailableState {
override fun findMatch(str: String, position: Int): AvailableState {
// *는 0개 이상의 매칭을 의미하므로, 먼저 시도해보고 실패하면 빈 시퀀스를 반환
val matchResult = this.item.findMatch(str)
val matchResult = this.item.findMatch(str, position)
if (matchResult.isEmpty) {
// If the item does not match, return an empty sequence
return AvailableState(sequenceOf(State("", str)))
return AvailableState(sequenceOf(State(str, position, position)))
}
// If it matches, return the successful match and continue matching with the remaining string
return AvailableState(
matchResult.flatMap { state ->
sequenceOf(state) + matchMany(state.remaining, this.item)
sequenceOf(state) + matchMany(str, this.item, state.endIndex)
}
)
}
@ -192,35 +200,35 @@ class StarItem(val item: RegexItem) : RegexItem {
class QuestionItem(val item: RegexItem) : RegexItem {
override fun toString(): String = "${item}?"
override fun findMatch(str: String): AvailableState {
override fun findMatch(str: String, position: Int): AvailableState {
// ?는 0개 또는 1개 매칭을 의미하므로, 먼저 시도해보고 실패하면 빈 시퀀스를 반환
val matchResult = this.item.findMatch(str)
val matchResult = this.item.findMatch(str, position)
if (matchResult.isEmpty) {
// If the item does not match, return an empty sequence
return AvailableState(sequenceOf(State("", str)))
return AvailableState(sequenceOf(State(str, position, position)))
}
// If it matches, return the successful match
return AvailableState(matchResult.map { State(it.matched, it.remaining) })
return AvailableState(matchResult.map { State(str, it.startIndex, it.endIndex) })
}
}
class DotItem : RegexItem {
override fun toString(): String = "."
override fun findMatch(str: String): AvailableState =
override fun findMatch(str: String, position: Int): AvailableState =
// .은 임의의 한 문자와 매칭되므로, 첫 문자가 존재하면 매칭 성공
when {
str.isNotEmpty() ->
AvailableState(sequenceOf(State(str[0].toString(), str.substring(1))))
position < str.length ->
AvailableState(sequenceOf(State(str, position, position + 1)))
else -> AvailableState() // 빈 문자열에 대해서는 매칭 실패
}
}
class AlternationItem(val left: RegexItem, val right: RegexItem) : RegexItem {
override fun toString(): String = "${left}|${right}"
override fun findMatch(str: String): AvailableState {
override fun findMatch(str: String, position: Int): AvailableState {
// Alternation은 왼쪽 또는 오른쪽 항목 중 하나와 매칭되므로, 각각 시도해보고 성공하는 경우를 반환
val leftMatch = left.findMatch(str)
val rightMatch = right.findMatch(str)
val leftMatch = left.findMatch(str, position)
val rightMatch = right.findMatch(str, position)
return AvailableState(
(leftMatch + rightMatch) // 두 매칭 결과를 합쳐서 반환

View file

@ -55,8 +55,6 @@ class ParserTest {
checkRegex("c?") {
"c".shouldMatch()
"".shouldMatch() // 빈 문자열도 매칭됨
"cc".shouldMatch() // c가 0번 또는 1번 나타날 수 있음
"d".shouldNotMatch()
}
}
@Test