feat: add anchor operator support in RegexParser and implement AnchorItem class
This commit is contained in:
parent
f143aba629
commit
fb1cf96fed
3 changed files with 36 additions and 3 deletions
|
@ -2,19 +2,20 @@ package org.example
|
||||||
|
|
||||||
import com.github.h0tk3y.betterParse.combinators.*
|
import com.github.h0tk3y.betterParse.combinators.*
|
||||||
import com.github.h0tk3y.betterParse.grammar.Grammar
|
import com.github.h0tk3y.betterParse.grammar.Grammar
|
||||||
|
import com.github.h0tk3y.betterParse.grammar.parseToEnd
|
||||||
import com.github.h0tk3y.betterParse.grammar.parser
|
import com.github.h0tk3y.betterParse.grammar.parser
|
||||||
import com.github.h0tk3y.betterParse.lexer.literalToken
|
import com.github.h0tk3y.betterParse.lexer.literalToken
|
||||||
import com.github.h0tk3y.betterParse.lexer.regexToken
|
import com.github.h0tk3y.betterParse.lexer.regexToken
|
||||||
import com.github.h0tk3y.betterParse.lexer.token
|
import com.github.h0tk3y.betterParse.lexer.token
|
||||||
import com.github.h0tk3y.betterParse.parser.Parser
|
import com.github.h0tk3y.betterParse.parser.Parser
|
||||||
import com.github.h0tk3y.betterParse.grammar.parseToEnd
|
|
||||||
|
|
||||||
class RegexParser : Grammar<RegexItem>() {
|
class RegexParser : Grammar<RegexItem>() {
|
||||||
private var groupCounter = 0
|
private var groupCounter = 0
|
||||||
|
|
||||||
// val bracketContent by regexToken("[^\\]]*")
|
// val bracketContent by regexToken("[^\\]]*")
|
||||||
val escapedCharacter by regexToken("\\\\[+*?.()|\\[\\]]")
|
val escapedCharacter by regexToken("\\\\[+*?.$^()|\\[\\]]")
|
||||||
val postfixOperator by regexToken("[+*?]")
|
val postfixOperator by regexToken("[+*?]")
|
||||||
|
val anchorOperator by regexToken("[$^]")
|
||||||
val alternationSymbol by literalToken("|")
|
val alternationSymbol by literalToken("|")
|
||||||
val openParenSymbol by literalToken("(")
|
val openParenSymbol by literalToken("(")
|
||||||
val closeParenSymbol by literalToken(")")
|
val closeParenSymbol by literalToken(")")
|
||||||
|
@ -49,6 +50,7 @@ class RegexParser : Grammar<RegexItem>() {
|
||||||
|
|
||||||
val item: Parser<RegexItem> by
|
val item: Parser<RegexItem> by
|
||||||
char or
|
char or
|
||||||
|
(anchorOperator map { AnchorItem(it.text) }) or
|
||||||
(dot asJust DotItem()) or
|
(dot asJust DotItem()) or
|
||||||
(escapedCharacter map { CharItem(it.text.substring(1)) }) or
|
(escapedCharacter map { CharItem(it.text.substring(1)) }) or
|
||||||
(bracketContent map { BracketItem(it.text.substring(1, it.text.length - 1)) }) or
|
(bracketContent map { BracketItem(it.text.substring(1, it.text.length - 1)) }) or
|
||||||
|
@ -88,4 +90,4 @@ class RegexParser : Grammar<RegexItem>() {
|
||||||
|
|
||||||
fun compileRegex(input: String): RegexItem {
|
fun compileRegex(input: String): RegexItem {
|
||||||
return RegexParser().parseToEnd(input)
|
return RegexParser().parseToEnd(input)
|
||||||
}
|
}
|
||||||
|
|
|
@ -139,6 +139,28 @@ class GroupItem(val item: RegexItem, val name: String) : RegexItem {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class AnchorItem(val anchor: String) : RegexItem {
|
||||||
|
override fun toString(): String = anchor
|
||||||
|
|
||||||
|
override fun findMatch(str: String, position: Int): AvailableState {
|
||||||
|
// 앵커는 문자열의 시작(^) 또는 끝($)과 매칭됨
|
||||||
|
return when (anchor) {
|
||||||
|
"^" -> if (position == 0) {
|
||||||
|
AvailableState(sequenceOf(State(str, 0, 0)))
|
||||||
|
} else {
|
||||||
|
AvailableState() // 시작 앵커가 실패하면 빈 시퀀스 반환
|
||||||
|
}
|
||||||
|
"$" -> if (position == str.length) {
|
||||||
|
AvailableState(sequenceOf(State(str, str.length, str.length)))
|
||||||
|
} else {
|
||||||
|
AvailableState() // 끝 앵커가 실패하면 빈 시퀀스 반환
|
||||||
|
}
|
||||||
|
// 다른 앵커는 지원하지 않음
|
||||||
|
else -> throw IllegalArgumentException("Unknown anchor: $anchor")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fun matchMany(
|
fun matchMany(
|
||||||
str: String,
|
str: String,
|
||||||
item: RegexItem,
|
item: RegexItem,
|
||||||
|
|
|
@ -137,6 +137,15 @@ class ParserTest {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@Test
|
@Test
|
||||||
|
fun testAnchorOperators() {
|
||||||
|
checkRegex("^abc$") {
|
||||||
|
"abc".shouldMatch()
|
||||||
|
"ab".shouldNotMatch()
|
||||||
|
"abcd".shouldNotMatch()
|
||||||
|
"xabc".shouldNotMatch()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@Test
|
||||||
fun testCaptureGroups() {
|
fun testCaptureGroups() {
|
||||||
val input = "(a)(b)"
|
val input = "(a)(b)"
|
||||||
val result = compileRegex(input)
|
val result = compileRegex(input)
|
||||||
|
|
Loading…
Add table
Reference in a new issue