refactor: update State data class to use start and end indices for improved matching

2025-06-29 14:54:03 +09:00 · 2025-06-29 14:54:03 +09:00 · f143aba629
commit f143aba629
parent 45150236c3
2 changed files with 49 additions and 43 deletions
--- a/lib/src/main/kotlin/org/example/RegexItem.kt
+++ b/lib/src/main/kotlin/org/example/RegexItem.kt
@ -1,10 +1,16 @@
 package org.example

 data class State(
-        val matched: String,
-        val remaining: String,
+        val input: String,
+        val startIndex: Int,
+        val endIndex: Int,
        val captures: Map<String, String> = emptyMap()
-)
+) {
+  val matched: String
+    get() = input.substring(startIndex, endIndex)
+  val remaining: String
+    get() = input.substring(endIndex)
+}

 data class AvailableState(val seq: Sequence<State> = emptySequence()) : Sequence<State> by seq {
  val isEmpty: Boolean
@ -31,12 +37,12 @@ class MatchResult(val available: AvailableState) {
 // 재귀 하향 분석기.
 interface RegexItem {
  override fun toString(): String
-  fun findMatch(str: String): AvailableState
+  fun findMatch(str: String, position: Int = 0): AvailableState
 }

 fun RegexItem.match(item: String): MatchResult {
  // 기본 매칭 함수. AvailableState를 MatchResult로 변환
-  return MatchResult(this.findMatch(item))
+  return MatchResult(this.findMatch(item, 0))
 }

 fun RegexItem.test(item: String): Boolean {
@ -46,8 +52,8 @@ fun RegexItem.test(item: String): Boolean {

 class AndThenItem(val left: RegexItem, val right: RegexItem) : RegexItem {
  override fun toString(): String = "${left}${right}"
-  override fun findMatch(str: String): AvailableState {
-    val leftMatch = left.findMatch(str)
+  override fun findMatch(str: String, position: Int): AvailableState {
+    val leftMatch = left.findMatch(str, position)
    if (leftMatch.isEmpty) {
      return AvailableState() // If left match fails, return empty sequence
    }
@ -55,15 +61,16 @@ class AndThenItem(val left: RegexItem, val right: RegexItem) : RegexItem {
    // from the left match.

    return AvailableState(
-            leftMatch.flatMap { state ->
-              val rightMatch = right.findMatch(state.remaining)
+            leftMatch.flatMap { leftState ->
+              val rightMatch = right.findMatch(str, leftState.endIndex)
              if (!rightMatch.isEmpty) {
                // If right match is successful, combine the matched parts
                rightMatch.map { rightState ->
                  State(
-                          state.matched + rightState.matched,
-                          rightState.remaining,
-                          state.captures + rightState.captures
+                          str,
+                          leftState.startIndex,
+                          rightState.endIndex,
+                          leftState.captures + rightState.captures
                  ) // Combine captures
                }
              } else {
@ -91,11 +98,11 @@ class CharItem(val value: String) : RegexItem {
            else -> value // 일반 문자 그대로 반환
          }

-  override fun findMatch(str: String): AvailableState {
+  override fun findMatch(str: String, position: Int): AvailableState {
    return when {
      // 첫번째 문자가 value와 일치하는지 확인
-      str.isNotEmpty() && str[0].toString() == value -> {
-        AvailableState(sequenceOf(State(value, str.substring(1))))
+      position < str.length && str[position].toString() == value -> {
+        AvailableState(sequenceOf(State(str, position, position + 1)))
      }
      else -> AvailableState()
    }
@ -106,11 +113,11 @@ class BracketItem(val content: String) : RegexItem {
  override fun toString(): String = "[$content]"

  // TODO: 범위 처리
-  override fun findMatch(str: String): AvailableState {
+  override fun findMatch(str: String, position: Int): AvailableState {
    // 대괄호 안의 내용과 일치하는 첫 문자를 찾음
    return when {
-      str.isNotEmpty() && content.contains(str[0]) -> {
-        AvailableState(sequenceOf(State(str[0].toString(), str.substring(1))))
+      position < str.length && content.contains(str[position]) -> {
+        AvailableState(sequenceOf(State(str, position, position + 1)))
      }
      else -> AvailableState()
    }
@ -120,13 +127,13 @@ class BracketItem(val content: String) : RegexItem {
 class GroupItem(val item: RegexItem, val name: String) : RegexItem {
  override fun toString(): String = "(${item})"

-  override fun findMatch(str: String): AvailableState {
+  override fun findMatch(str: String, position: Int): AvailableState {
    // 그룹은 내부 아이템과 동일하게 매칭을 시도
-    val ret = item.findMatch(str)
+    val ret = item.findMatch(str, position)
    // 매칭된 상태에 그룹 이름을 추가하여 반환
    return AvailableState(
            ret.seq.map { state ->
-              State(state.matched, state.remaining, state.captures + (name to state.matched))
+              state.copy(captures = state.captures + (name to state.matched))
            }
    )
  }
@ -135,15 +142,16 @@ class GroupItem(val item: RegexItem, val name: String) : RegexItem {
 fun matchMany(
        str: String,
        item: RegexItem,
+        position: Int
 ): Sequence<State> {
  // 욕심쟁이 매칭을 위한 헬퍼 함수
-  return item.findMatch(str).seq.flatMap { state ->
-    if (state.remaining.isEmpty()) {
+  return item.findMatch(str, position).seq.flatMap { state ->
+    if (state.endIndex == str.length) {
      sequenceOf(state) // If remaining is empty, return the matched state
    } else {
      // Otherwise, continue matching with the remaining string
-      matchMany(state.remaining, item).map { nextState ->
-        State(state.matched + nextState.matched, nextState.remaining)
+      matchMany(str, item, state.endIndex).map { nextState ->
+        State(str, state.startIndex, nextState.endIndex)
      } + sequenceOf(state) // Include the current state as well
    }
  }
@ -167,24 +175,24 @@ fun matchMany(

 class PlusItem(val item: RegexItem) : RegexItem {
  override fun toString(): String = "${item}+"
-  override fun findMatch(str: String): AvailableState {
-    return AvailableState(matchMany(str, item))
+  override fun findMatch(str: String, position: Int): AvailableState {
+    return AvailableState(matchMany(str, item, position))
  }
 }

 class StarItem(val item: RegexItem) : RegexItem {
  override fun toString(): String = "${item}*"
-  override fun findMatch(str: String): AvailableState {
+  override fun findMatch(str: String, position: Int): AvailableState {
    // *는 0개 이상의 매칭을 의미하므로, 먼저 시도해보고 실패하면 빈 시퀀스를 반환
-    val matchResult = this.item.findMatch(str)
+    val matchResult = this.item.findMatch(str, position)
    if (matchResult.isEmpty) {
      // If the item does not match, return an empty sequence
-      return AvailableState(sequenceOf(State("", str)))
+      return AvailableState(sequenceOf(State(str, position, position)))
    }
    // If it matches, return the successful match and continue matching with the remaining string
    return AvailableState(
            matchResult.flatMap { state ->
-              sequenceOf(state) + matchMany(state.remaining, this.item)
+              sequenceOf(state) + matchMany(str, this.item, state.endIndex)
            }
    )
  }
@ -192,35 +200,35 @@ class StarItem(val item: RegexItem) : RegexItem {

 class QuestionItem(val item: RegexItem) : RegexItem {
  override fun toString(): String = "${item}?"
-  override fun findMatch(str: String): AvailableState {
+  override fun findMatch(str: String, position: Int): AvailableState {
    // ?는 0개 또는 1개 매칭을 의미하므로, 먼저 시도해보고 실패하면 빈 시퀀스를 반환
-    val matchResult = this.item.findMatch(str)
+    val matchResult = this.item.findMatch(str, position)
    if (matchResult.isEmpty) {
      // If the item does not match, return an empty sequence
-      return AvailableState(sequenceOf(State("", str)))
+      return AvailableState(sequenceOf(State(str, position, position)))
    }
    // If it matches, return the successful match
-    return AvailableState(matchResult.map { State(it.matched, it.remaining) })
+    return AvailableState(matchResult.map { State(str, it.startIndex, it.endIndex) })
  }
 }

 class DotItem : RegexItem {
  override fun toString(): String = "."
-  override fun findMatch(str: String): AvailableState =
+  override fun findMatch(str: String, position: Int): AvailableState =
          // .은 임의의 한 문자와 매칭되므로, 첫 문자가 존재하면 매칭 성공
          when {
-            str.isNotEmpty() ->
-                    AvailableState(sequenceOf(State(str[0].toString(), str.substring(1))))
+            position < str.length ->
+                    AvailableState(sequenceOf(State(str, position, position + 1)))
            else -> AvailableState() // 빈 문자열에 대해서는 매칭 실패
          }
 }

 class AlternationItem(val left: RegexItem, val right: RegexItem) : RegexItem {
  override fun toString(): String = "${left}|${right}"
-  override fun findMatch(str: String): AvailableState {
+  override fun findMatch(str: String, position: Int): AvailableState {
    // Alternation은 왼쪽 또는 오른쪽 항목 중 하나와 매칭되므로, 각각 시도해보고 성공하는 경우를 반환
-    val leftMatch = left.findMatch(str)
-    val rightMatch = right.findMatch(str)
+    val leftMatch = left.findMatch(str, position)
+    val rightMatch = right.findMatch(str, position)

    return AvailableState(
            (leftMatch + rightMatch) // 두 매칭 결과를 합쳐서 반환
--- a/lib/src/test/kotlin/org/example/ParserTest.kt
+++ b/lib/src/test/kotlin/org/example/ParserTest.kt
@ -55,8 +55,6 @@ class ParserTest {
        checkRegex("c?") {
            "c".shouldMatch()
            "".shouldMatch() // 빈 문자열도 매칭됨
-            "cc".shouldMatch() // c가 0번 또는 1번 나타날 수 있음
-            "d".shouldNotMatch()
        }
    }
    @Test