feat: add eng_tag

This commit is contained in:
monoid 2022-02-22 16:47:16 +09:00
parent 883f39d645
commit 609174b089
2 changed files with 44 additions and 6 deletions

42
eng_tags.json Normal file
View File

@ -0,0 +1,42 @@
[
{
"name": "[PAD]",
"index": 0
},
{
"name": "B-LOC",
"index": 1
},
{
"name": "B-MISC",
"index": 2
},
{
"name": "B-ORG",
"index": 3
},
{
"name": "B-PER",
"index": 4
},
{
"name": "I-LOC",
"index": 5
},
{
"name": "I-MISC",
"index": 6
},
{
"name": "I-ORG",
"index": 7
},
{
"name": "I-PER",
"index": 8
},
{
"name": "O",
"index": 9
}
]

View File

@ -77,9 +77,7 @@ def readKoreanDataAll():
def readEnglishDataAll():
with open(f"{EnglishBase}/valid.txt", encoding="utf-8") as fp:
print("a")
dev = readEnglishData(fp)
print("b")
with open(f"{EnglishBase}/test.txt", encoding="utf-8") as fp:
test = readEnglishData(fp)
with open(f"{EnglishBase}/train.txt", encoding="utf-8") as fp:
@ -173,8 +171,6 @@ extracts and stores tags set from the given data.
"""
if __name__ == "__main__":
from tqdm import tqdm
t = TagIdConverter()
train, dev, test = readEnglishDataAll()
vocab = set()
def getTags(lst: List[Sentence]):
@ -205,5 +201,5 @@ if __name__ == "__main__":
tags.append({"name":v,"index":i})
i += 1
print(tags)
#with open("tags.json","w",encoding="utf-8") as fp:
# json.dump(tags,fp,ensure_ascii=False, indent=2)
with open("eng_tags.json","w",encoding="utf-8") as fp:
json.dump(tags,fp,ensure_ascii=False, indent=2)