From fb1cf96fed6c35c05d5b013b84d6d62641b32a83 Mon Sep 17 00:00:00 2001 From: monoid Date: Sun, 29 Jun 2025 15:01:00 +0900 Subject: [PATCH] feat: add anchor operator support in RegexParser and implement AnchorItem class --- lib/src/main/kotlin/org/example/Parser.kt | 8 ++++--- lib/src/main/kotlin/org/example/RegexItem.kt | 22 +++++++++++++++++++ lib/src/test/kotlin/org/example/ParserTest.kt | 9 ++++++++ 3 files changed, 36 insertions(+), 3 deletions(-) diff --git a/lib/src/main/kotlin/org/example/Parser.kt b/lib/src/main/kotlin/org/example/Parser.kt index c97c5cc..9f70507 100644 --- a/lib/src/main/kotlin/org/example/Parser.kt +++ b/lib/src/main/kotlin/org/example/Parser.kt @@ -2,19 +2,20 @@ package org.example import com.github.h0tk3y.betterParse.combinators.* import com.github.h0tk3y.betterParse.grammar.Grammar +import com.github.h0tk3y.betterParse.grammar.parseToEnd import com.github.h0tk3y.betterParse.grammar.parser import com.github.h0tk3y.betterParse.lexer.literalToken import com.github.h0tk3y.betterParse.lexer.regexToken import com.github.h0tk3y.betterParse.lexer.token import com.github.h0tk3y.betterParse.parser.Parser -import com.github.h0tk3y.betterParse.grammar.parseToEnd class RegexParser : Grammar() { private var groupCounter = 0 // val bracketContent by regexToken("[^\\]]*") - val escapedCharacter by regexToken("\\\\[+*?.()|\\[\\]]") + val escapedCharacter by regexToken("\\\\[+*?.$^()|\\[\\]]") val postfixOperator by regexToken("[+*?]") + val anchorOperator by regexToken("[$^]") val alternationSymbol by literalToken("|") val openParenSymbol by literalToken("(") val closeParenSymbol by literalToken(")") @@ -49,6 +50,7 @@ class RegexParser : Grammar() { val item: Parser by char or + (anchorOperator map { AnchorItem(it.text) }) or (dot asJust DotItem()) or (escapedCharacter map { CharItem(it.text.substring(1)) }) or (bracketContent map { BracketItem(it.text.substring(1, it.text.length - 1)) }) or @@ -88,4 +90,4 @@ class RegexParser : Grammar() { fun compileRegex(input: String): RegexItem { return RegexParser().parseToEnd(input) -} \ No newline at end of file +} diff --git a/lib/src/main/kotlin/org/example/RegexItem.kt b/lib/src/main/kotlin/org/example/RegexItem.kt index 4852883..13900f0 100644 --- a/lib/src/main/kotlin/org/example/RegexItem.kt +++ b/lib/src/main/kotlin/org/example/RegexItem.kt @@ -139,6 +139,28 @@ class GroupItem(val item: RegexItem, val name: String) : RegexItem { } } +class AnchorItem(val anchor: String) : RegexItem { + override fun toString(): String = anchor + + override fun findMatch(str: String, position: Int): AvailableState { + // 앵커는 문자열의 시작(^) 또는 끝($)과 매칭됨 + return when (anchor) { + "^" -> if (position == 0) { + AvailableState(sequenceOf(State(str, 0, 0))) + } else { + AvailableState() // 시작 앵커가 실패하면 빈 시퀀스 반환 + } + "$" -> if (position == str.length) { + AvailableState(sequenceOf(State(str, str.length, str.length))) + } else { + AvailableState() // 끝 앵커가 실패하면 빈 시퀀스 반환 + } + // 다른 앵커는 지원하지 않음 + else -> throw IllegalArgumentException("Unknown anchor: $anchor") + } + } +} + fun matchMany( str: String, item: RegexItem, diff --git a/lib/src/test/kotlin/org/example/ParserTest.kt b/lib/src/test/kotlin/org/example/ParserTest.kt index 37d2bc3..6ef8535 100644 --- a/lib/src/test/kotlin/org/example/ParserTest.kt +++ b/lib/src/test/kotlin/org/example/ParserTest.kt @@ -137,6 +137,15 @@ class ParserTest { } } @Test + fun testAnchorOperators() { + checkRegex("^abc$") { + "abc".shouldMatch() + "ab".shouldNotMatch() + "abcd".shouldNotMatch() + "xabc".shouldNotMatch() + } + } + @Test fun testCaptureGroups() { val input = "(a)(b)" val result = compileRegex(input)