feat: add anchor operator support in RegexParser and implement AnchorItem class
This commit is contained in:
parent
f143aba629
commit
fb1cf96fed
3 changed files with 36 additions and 3 deletions
|
@ -2,19 +2,20 @@ package org.example
|
|||
|
||||
import com.github.h0tk3y.betterParse.combinators.*
|
||||
import com.github.h0tk3y.betterParse.grammar.Grammar
|
||||
import com.github.h0tk3y.betterParse.grammar.parseToEnd
|
||||
import com.github.h0tk3y.betterParse.grammar.parser
|
||||
import com.github.h0tk3y.betterParse.lexer.literalToken
|
||||
import com.github.h0tk3y.betterParse.lexer.regexToken
|
||||
import com.github.h0tk3y.betterParse.lexer.token
|
||||
import com.github.h0tk3y.betterParse.parser.Parser
|
||||
import com.github.h0tk3y.betterParse.grammar.parseToEnd
|
||||
|
||||
class RegexParser : Grammar<RegexItem>() {
|
||||
private var groupCounter = 0
|
||||
|
||||
// val bracketContent by regexToken("[^\\]]*")
|
||||
val escapedCharacter by regexToken("\\\\[+*?.()|\\[\\]]")
|
||||
val escapedCharacter by regexToken("\\\\[+*?.$^()|\\[\\]]")
|
||||
val postfixOperator by regexToken("[+*?]")
|
||||
val anchorOperator by regexToken("[$^]")
|
||||
val alternationSymbol by literalToken("|")
|
||||
val openParenSymbol by literalToken("(")
|
||||
val closeParenSymbol by literalToken(")")
|
||||
|
@ -49,6 +50,7 @@ class RegexParser : Grammar<RegexItem>() {
|
|||
|
||||
val item: Parser<RegexItem> by
|
||||
char or
|
||||
(anchorOperator map { AnchorItem(it.text) }) or
|
||||
(dot asJust DotItem()) or
|
||||
(escapedCharacter map { CharItem(it.text.substring(1)) }) or
|
||||
(bracketContent map { BracketItem(it.text.substring(1, it.text.length - 1)) }) or
|
||||
|
@ -88,4 +90,4 @@ class RegexParser : Grammar<RegexItem>() {
|
|||
|
||||
fun compileRegex(input: String): RegexItem {
|
||||
return RegexParser().parseToEnd(input)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -139,6 +139,28 @@ class GroupItem(val item: RegexItem, val name: String) : RegexItem {
|
|||
}
|
||||
}
|
||||
|
||||
class AnchorItem(val anchor: String) : RegexItem {
|
||||
override fun toString(): String = anchor
|
||||
|
||||
override fun findMatch(str: String, position: Int): AvailableState {
|
||||
// 앵커는 문자열의 시작(^) 또는 끝($)과 매칭됨
|
||||
return when (anchor) {
|
||||
"^" -> if (position == 0) {
|
||||
AvailableState(sequenceOf(State(str, 0, 0)))
|
||||
} else {
|
||||
AvailableState() // 시작 앵커가 실패하면 빈 시퀀스 반환
|
||||
}
|
||||
"$" -> if (position == str.length) {
|
||||
AvailableState(sequenceOf(State(str, str.length, str.length)))
|
||||
} else {
|
||||
AvailableState() // 끝 앵커가 실패하면 빈 시퀀스 반환
|
||||
}
|
||||
// 다른 앵커는 지원하지 않음
|
||||
else -> throw IllegalArgumentException("Unknown anchor: $anchor")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fun matchMany(
|
||||
str: String,
|
||||
item: RegexItem,
|
||||
|
|
|
@ -137,6 +137,15 @@ class ParserTest {
|
|||
}
|
||||
}
|
||||
@Test
|
||||
fun testAnchorOperators() {
|
||||
checkRegex("^abc$") {
|
||||
"abc".shouldMatch()
|
||||
"ab".shouldNotMatch()
|
||||
"abcd".shouldNotMatch()
|
||||
"xabc".shouldNotMatch()
|
||||
}
|
||||
}
|
||||
@Test
|
||||
fun testCaptureGroups() {
|
||||
val input = "(a)(b)"
|
||||
val result = compileRegex(input)
|
||||
|
|
Loading…
Add table
Reference in a new issue