feat: add anchor operator support in RegexParser and implement AnchorItem class

This commit is contained in:
monoid 2025-06-29 15:01:00 +09:00
parent f143aba629
commit fb1cf96fed
3 changed files with 36 additions and 3 deletions

View file

@ -2,19 +2,20 @@ package org.example
import com.github.h0tk3y.betterParse.combinators.* import com.github.h0tk3y.betterParse.combinators.*
import com.github.h0tk3y.betterParse.grammar.Grammar import com.github.h0tk3y.betterParse.grammar.Grammar
import com.github.h0tk3y.betterParse.grammar.parseToEnd
import com.github.h0tk3y.betterParse.grammar.parser import com.github.h0tk3y.betterParse.grammar.parser
import com.github.h0tk3y.betterParse.lexer.literalToken import com.github.h0tk3y.betterParse.lexer.literalToken
import com.github.h0tk3y.betterParse.lexer.regexToken import com.github.h0tk3y.betterParse.lexer.regexToken
import com.github.h0tk3y.betterParse.lexer.token import com.github.h0tk3y.betterParse.lexer.token
import com.github.h0tk3y.betterParse.parser.Parser import com.github.h0tk3y.betterParse.parser.Parser
import com.github.h0tk3y.betterParse.grammar.parseToEnd
class RegexParser : Grammar<RegexItem>() { class RegexParser : Grammar<RegexItem>() {
private var groupCounter = 0 private var groupCounter = 0
// val bracketContent by regexToken("[^\\]]*") // val bracketContent by regexToken("[^\\]]*")
val escapedCharacter by regexToken("\\\\[+*?.()|\\[\\]]") val escapedCharacter by regexToken("\\\\[+*?.$^()|\\[\\]]")
val postfixOperator by regexToken("[+*?]") val postfixOperator by regexToken("[+*?]")
val anchorOperator by regexToken("[$^]")
val alternationSymbol by literalToken("|") val alternationSymbol by literalToken("|")
val openParenSymbol by literalToken("(") val openParenSymbol by literalToken("(")
val closeParenSymbol by literalToken(")") val closeParenSymbol by literalToken(")")
@ -49,6 +50,7 @@ class RegexParser : Grammar<RegexItem>() {
val item: Parser<RegexItem> by val item: Parser<RegexItem> by
char or char or
(anchorOperator map { AnchorItem(it.text) }) or
(dot asJust DotItem()) or (dot asJust DotItem()) or
(escapedCharacter map { CharItem(it.text.substring(1)) }) or (escapedCharacter map { CharItem(it.text.substring(1)) }) or
(bracketContent map { BracketItem(it.text.substring(1, it.text.length - 1)) }) or (bracketContent map { BracketItem(it.text.substring(1, it.text.length - 1)) }) or

View file

@ -139,6 +139,28 @@ class GroupItem(val item: RegexItem, val name: String) : RegexItem {
} }
} }
class AnchorItem(val anchor: String) : RegexItem {
override fun toString(): String = anchor
override fun findMatch(str: String, position: Int): AvailableState {
// 앵커는 문자열의 시작(^) 또는 끝($)과 매칭됨
return when (anchor) {
"^" -> if (position == 0) {
AvailableState(sequenceOf(State(str, 0, 0)))
} else {
AvailableState() // 시작 앵커가 실패하면 빈 시퀀스 반환
}
"$" -> if (position == str.length) {
AvailableState(sequenceOf(State(str, str.length, str.length)))
} else {
AvailableState() // 끝 앵커가 실패하면 빈 시퀀스 반환
}
// 다른 앵커는 지원하지 않음
else -> throw IllegalArgumentException("Unknown anchor: $anchor")
}
}
}
fun matchMany( fun matchMany(
str: String, str: String,
item: RegexItem, item: RegexItem,

View file

@ -137,6 +137,15 @@ class ParserTest {
} }
} }
@Test @Test
fun testAnchorOperators() {
checkRegex("^abc$") {
"abc".shouldMatch()
"ab".shouldNotMatch()
"abcd".shouldNotMatch()
"xabc".shouldNotMatch()
}
}
@Test
fun testCaptureGroups() { fun testCaptureGroups() {
val input = "(a)(b)" val input = "(a)(b)"
val result = compileRegex(input) val result = compileRegex(input)