refactor: update State data class to use start and end indices for improved matching
This commit is contained in:
parent
45150236c3
commit
f143aba629
2 changed files with 49 additions and 43 deletions
|
@ -1,10 +1,16 @@
|
|||
package org.example
|
||||
|
||||
data class State(
|
||||
val matched: String,
|
||||
val remaining: String,
|
||||
val input: String,
|
||||
val startIndex: Int,
|
||||
val endIndex: Int,
|
||||
val captures: Map<String, String> = emptyMap()
|
||||
)
|
||||
) {
|
||||
val matched: String
|
||||
get() = input.substring(startIndex, endIndex)
|
||||
val remaining: String
|
||||
get() = input.substring(endIndex)
|
||||
}
|
||||
|
||||
data class AvailableState(val seq: Sequence<State> = emptySequence()) : Sequence<State> by seq {
|
||||
val isEmpty: Boolean
|
||||
|
@ -31,12 +37,12 @@ class MatchResult(val available: AvailableState) {
|
|||
// 재귀 하향 분석기.
|
||||
interface RegexItem {
|
||||
override fun toString(): String
|
||||
fun findMatch(str: String): AvailableState
|
||||
fun findMatch(str: String, position: Int = 0): AvailableState
|
||||
}
|
||||
|
||||
fun RegexItem.match(item: String): MatchResult {
|
||||
// 기본 매칭 함수. AvailableState를 MatchResult로 변환
|
||||
return MatchResult(this.findMatch(item))
|
||||
return MatchResult(this.findMatch(item, 0))
|
||||
}
|
||||
|
||||
fun RegexItem.test(item: String): Boolean {
|
||||
|
@ -46,8 +52,8 @@ fun RegexItem.test(item: String): Boolean {
|
|||
|
||||
class AndThenItem(val left: RegexItem, val right: RegexItem) : RegexItem {
|
||||
override fun toString(): String = "${left}${right}"
|
||||
override fun findMatch(str: String): AvailableState {
|
||||
val leftMatch = left.findMatch(str)
|
||||
override fun findMatch(str: String, position: Int): AvailableState {
|
||||
val leftMatch = left.findMatch(str, position)
|
||||
if (leftMatch.isEmpty) {
|
||||
return AvailableState() // If left match fails, return empty sequence
|
||||
}
|
||||
|
@ -55,15 +61,16 @@ class AndThenItem(val left: RegexItem, val right: RegexItem) : RegexItem {
|
|||
// from the left match.
|
||||
|
||||
return AvailableState(
|
||||
leftMatch.flatMap { state ->
|
||||
val rightMatch = right.findMatch(state.remaining)
|
||||
leftMatch.flatMap { leftState ->
|
||||
val rightMatch = right.findMatch(str, leftState.endIndex)
|
||||
if (!rightMatch.isEmpty) {
|
||||
// If right match is successful, combine the matched parts
|
||||
rightMatch.map { rightState ->
|
||||
State(
|
||||
state.matched + rightState.matched,
|
||||
rightState.remaining,
|
||||
state.captures + rightState.captures
|
||||
str,
|
||||
leftState.startIndex,
|
||||
rightState.endIndex,
|
||||
leftState.captures + rightState.captures
|
||||
) // Combine captures
|
||||
}
|
||||
} else {
|
||||
|
@ -91,11 +98,11 @@ class CharItem(val value: String) : RegexItem {
|
|||
else -> value // 일반 문자 그대로 반환
|
||||
}
|
||||
|
||||
override fun findMatch(str: String): AvailableState {
|
||||
override fun findMatch(str: String, position: Int): AvailableState {
|
||||
return when {
|
||||
// 첫번째 문자가 value와 일치하는지 확인
|
||||
str.isNotEmpty() && str[0].toString() == value -> {
|
||||
AvailableState(sequenceOf(State(value, str.substring(1))))
|
||||
position < str.length && str[position].toString() == value -> {
|
||||
AvailableState(sequenceOf(State(str, position, position + 1)))
|
||||
}
|
||||
else -> AvailableState()
|
||||
}
|
||||
|
@ -106,11 +113,11 @@ class BracketItem(val content: String) : RegexItem {
|
|||
override fun toString(): String = "[$content]"
|
||||
|
||||
// TODO: 범위 처리
|
||||
override fun findMatch(str: String): AvailableState {
|
||||
override fun findMatch(str: String, position: Int): AvailableState {
|
||||
// 대괄호 안의 내용과 일치하는 첫 문자를 찾음
|
||||
return when {
|
||||
str.isNotEmpty() && content.contains(str[0]) -> {
|
||||
AvailableState(sequenceOf(State(str[0].toString(), str.substring(1))))
|
||||
position < str.length && content.contains(str[position]) -> {
|
||||
AvailableState(sequenceOf(State(str, position, position + 1)))
|
||||
}
|
||||
else -> AvailableState()
|
||||
}
|
||||
|
@ -120,13 +127,13 @@ class BracketItem(val content: String) : RegexItem {
|
|||
class GroupItem(val item: RegexItem, val name: String) : RegexItem {
|
||||
override fun toString(): String = "(${item})"
|
||||
|
||||
override fun findMatch(str: String): AvailableState {
|
||||
override fun findMatch(str: String, position: Int): AvailableState {
|
||||
// 그룹은 내부 아이템과 동일하게 매칭을 시도
|
||||
val ret = item.findMatch(str)
|
||||
val ret = item.findMatch(str, position)
|
||||
// 매칭된 상태에 그룹 이름을 추가하여 반환
|
||||
return AvailableState(
|
||||
ret.seq.map { state ->
|
||||
State(state.matched, state.remaining, state.captures + (name to state.matched))
|
||||
state.copy(captures = state.captures + (name to state.matched))
|
||||
}
|
||||
)
|
||||
}
|
||||
|
@ -135,15 +142,16 @@ class GroupItem(val item: RegexItem, val name: String) : RegexItem {
|
|||
fun matchMany(
|
||||
str: String,
|
||||
item: RegexItem,
|
||||
position: Int
|
||||
): Sequence<State> {
|
||||
// 욕심쟁이 매칭을 위한 헬퍼 함수
|
||||
return item.findMatch(str).seq.flatMap { state ->
|
||||
if (state.remaining.isEmpty()) {
|
||||
return item.findMatch(str, position).seq.flatMap { state ->
|
||||
if (state.endIndex == str.length) {
|
||||
sequenceOf(state) // If remaining is empty, return the matched state
|
||||
} else {
|
||||
// Otherwise, continue matching with the remaining string
|
||||
matchMany(state.remaining, item).map { nextState ->
|
||||
State(state.matched + nextState.matched, nextState.remaining)
|
||||
matchMany(str, item, state.endIndex).map { nextState ->
|
||||
State(str, state.startIndex, nextState.endIndex)
|
||||
} + sequenceOf(state) // Include the current state as well
|
||||
}
|
||||
}
|
||||
|
@ -167,24 +175,24 @@ fun matchMany(
|
|||
|
||||
class PlusItem(val item: RegexItem) : RegexItem {
|
||||
override fun toString(): String = "${item}+"
|
||||
override fun findMatch(str: String): AvailableState {
|
||||
return AvailableState(matchMany(str, item))
|
||||
override fun findMatch(str: String, position: Int): AvailableState {
|
||||
return AvailableState(matchMany(str, item, position))
|
||||
}
|
||||
}
|
||||
|
||||
class StarItem(val item: RegexItem) : RegexItem {
|
||||
override fun toString(): String = "${item}*"
|
||||
override fun findMatch(str: String): AvailableState {
|
||||
override fun findMatch(str: String, position: Int): AvailableState {
|
||||
// *는 0개 이상의 매칭을 의미하므로, 먼저 시도해보고 실패하면 빈 시퀀스를 반환
|
||||
val matchResult = this.item.findMatch(str)
|
||||
val matchResult = this.item.findMatch(str, position)
|
||||
if (matchResult.isEmpty) {
|
||||
// If the item does not match, return an empty sequence
|
||||
return AvailableState(sequenceOf(State("", str)))
|
||||
return AvailableState(sequenceOf(State(str, position, position)))
|
||||
}
|
||||
// If it matches, return the successful match and continue matching with the remaining string
|
||||
return AvailableState(
|
||||
matchResult.flatMap { state ->
|
||||
sequenceOf(state) + matchMany(state.remaining, this.item)
|
||||
sequenceOf(state) + matchMany(str, this.item, state.endIndex)
|
||||
}
|
||||
)
|
||||
}
|
||||
|
@ -192,35 +200,35 @@ class StarItem(val item: RegexItem) : RegexItem {
|
|||
|
||||
class QuestionItem(val item: RegexItem) : RegexItem {
|
||||
override fun toString(): String = "${item}?"
|
||||
override fun findMatch(str: String): AvailableState {
|
||||
override fun findMatch(str: String, position: Int): AvailableState {
|
||||
// ?는 0개 또는 1개 매칭을 의미하므로, 먼저 시도해보고 실패하면 빈 시퀀스를 반환
|
||||
val matchResult = this.item.findMatch(str)
|
||||
val matchResult = this.item.findMatch(str, position)
|
||||
if (matchResult.isEmpty) {
|
||||
// If the item does not match, return an empty sequence
|
||||
return AvailableState(sequenceOf(State("", str)))
|
||||
return AvailableState(sequenceOf(State(str, position, position)))
|
||||
}
|
||||
// If it matches, return the successful match
|
||||
return AvailableState(matchResult.map { State(it.matched, it.remaining) })
|
||||
return AvailableState(matchResult.map { State(str, it.startIndex, it.endIndex) })
|
||||
}
|
||||
}
|
||||
|
||||
class DotItem : RegexItem {
|
||||
override fun toString(): String = "."
|
||||
override fun findMatch(str: String): AvailableState =
|
||||
override fun findMatch(str: String, position: Int): AvailableState =
|
||||
// .은 임의의 한 문자와 매칭되므로, 첫 문자가 존재하면 매칭 성공
|
||||
when {
|
||||
str.isNotEmpty() ->
|
||||
AvailableState(sequenceOf(State(str[0].toString(), str.substring(1))))
|
||||
position < str.length ->
|
||||
AvailableState(sequenceOf(State(str, position, position + 1)))
|
||||
else -> AvailableState() // 빈 문자열에 대해서는 매칭 실패
|
||||
}
|
||||
}
|
||||
|
||||
class AlternationItem(val left: RegexItem, val right: RegexItem) : RegexItem {
|
||||
override fun toString(): String = "${left}|${right}"
|
||||
override fun findMatch(str: String): AvailableState {
|
||||
override fun findMatch(str: String, position: Int): AvailableState {
|
||||
// Alternation은 왼쪽 또는 오른쪽 항목 중 하나와 매칭되므로, 각각 시도해보고 성공하는 경우를 반환
|
||||
val leftMatch = left.findMatch(str)
|
||||
val rightMatch = right.findMatch(str)
|
||||
val leftMatch = left.findMatch(str, position)
|
||||
val rightMatch = right.findMatch(str, position)
|
||||
|
||||
return AvailableState(
|
||||
(leftMatch + rightMatch) // 두 매칭 결과를 합쳐서 반환
|
||||
|
|
|
@ -55,8 +55,6 @@ class ParserTest {
|
|||
checkRegex("c?") {
|
||||
"c".shouldMatch()
|
||||
"".shouldMatch() // 빈 문자열도 매칭됨
|
||||
"cc".shouldMatch() // c가 0번 또는 1번 나타날 수 있음
|
||||
"d".shouldNotMatch()
|
||||
}
|
||||
}
|
||||
@Test
|
||||
|
|
Loading…
Add table
Reference in a new issue