feat: add bracket content parsing and corresponding tests

This commit is contained in:
monoid 2025-06-29 13:45:05 +09:00
parent 60ec5916d0
commit 78472511e7
3 changed files with 52 additions and 3 deletions

View file

@ -5,6 +5,7 @@ import com.github.h0tk3y.betterParse.grammar.Grammar
import com.github.h0tk3y.betterParse.grammar.parser import com.github.h0tk3y.betterParse.grammar.parser
import com.github.h0tk3y.betterParse.lexer.literalToken import com.github.h0tk3y.betterParse.lexer.literalToken
import com.github.h0tk3y.betterParse.lexer.regexToken import com.github.h0tk3y.betterParse.lexer.regexToken
import com.github.h0tk3y.betterParse.lexer.token
import com.github.h0tk3y.betterParse.parser.Parser import com.github.h0tk3y.betterParse.parser.Parser
class RegexParser : Grammar<RegexItem>() { class RegexParser : Grammar<RegexItem>() {
@ -14,9 +15,30 @@ class RegexParser : Grammar<RegexItem>() {
val alternationSymbol by literalToken("|") val alternationSymbol by literalToken("|")
val openParenSymbol by literalToken("(") val openParenSymbol by literalToken("(")
val closeParenSymbol by literalToken(")") val closeParenSymbol by literalToken(")")
val bracketOpen by literalToken("[") val bracketContent by
val bracketClose by literalToken("]") token(
name = "bracketContent",
matcher = { seq, from ->
if (seq[from] != '[') {
0 // 대괄호로 시작하지 않으면 매칭 실패
} else {
// 대괄호의 시작 위치에서부터 ']'를 찾음
var to = seq.indexOf(']', from)
// 이스케이프 ']' 건너 뛰기
while (to >= 0 && to > from && seq[to - 1] == '\\') {
to = seq.indexOf(']', to + 1)
}
if (to < 0) {
0
} else if (to == from + 1) {
0 // 빈 대괄호는 허용하지 않음
} else {
to - from + 1 // 대괄호의 시작 위치부터 ']'까지의 길이
}
}
}
)
val dot by literalToken(".") val dot by literalToken(".")
val charToken by regexToken("[a-zA-Z0-9]") val charToken by regexToken("[a-zA-Z0-9]")
@ -26,6 +48,7 @@ class RegexParser : Grammar<RegexItem>() {
char or char or
(dot asJust DotItem()) or (dot asJust DotItem()) or
(escapedCharacter map { CharItem(it.text.substring(1)) }) or (escapedCharacter map { CharItem(it.text.substring(1)) }) or
(bracketContent map { BracketItem(it.text.substring(1, it.text.length - 1)) }) or
(skip(openParenSymbol) and (parser(::rootParser)) and skip(closeParenSymbol)) (skip(openParenSymbol) and (parser(::rootParser)) and skip(closeParenSymbol))
val term: Parser<RegexItem> by val term: Parser<RegexItem> by

View file

@ -92,6 +92,21 @@ class CharItem(val value: String) : RegexItem {
} }
} }
class BracketItem(val content: String) : RegexItem {
override fun toString(): String = "[$content]"
// TODO: 범위 처리
override fun findMatch(str: String): AvailableState {
// 대괄호 안의 내용과 일치하는 첫 문자를 찾음
return when {
str.isNotEmpty() && content.contains(str[0]) -> {
AvailableState(sequenceOf(State(str[0].toString(), str.substring(1))))
}
else -> AvailableState()
}
}
}
fun matchMany( fun matchMany(
str: String, str: String,
item: RegexItem, item: RegexItem,

View file

@ -116,4 +116,15 @@ class ParserTest {
assert(result.match("+").isSuccess) assert(result.match("+").isSuccess)
assert(!result.match("a").isSuccess) assert(!result.match("a").isSuccess)
} }
@Test
fun testBracketContent() {
val input = "[abc]"
val parser = RegexParser()
val result = parser.parseToEnd(input)
assertEquals("[abc]", result.toString())
assert(result.match("a").isSuccess)
assert(result.match("b").isSuccess)
assert(result.match("c").isSuccess)
assert(!result.match("d").isSuccess)
}
} }