From 609174b089de90ea5d591136cf0db22a2e2eca70 Mon Sep 17 00:00:00 2001 From: monoid Date: Tue, 22 Feb 2022 16:47:16 +0900 Subject: [PATCH] feat: add eng_tag --- eng_tags.json | 42 ++++++++++++++++++++++++++++++++++++++++++ read_data.py | 8 ++------ 2 files changed, 44 insertions(+), 6 deletions(-) create mode 100644 eng_tags.json diff --git a/eng_tags.json b/eng_tags.json new file mode 100644 index 0000000..433a750 --- /dev/null +++ b/eng_tags.json @@ -0,0 +1,42 @@ +[ + { + "name": "[PAD]", + "index": 0 + }, + { + "name": "B-LOC", + "index": 1 + }, + { + "name": "B-MISC", + "index": 2 + }, + { + "name": "B-ORG", + "index": 3 + }, + { + "name": "B-PER", + "index": 4 + }, + { + "name": "I-LOC", + "index": 5 + }, + { + "name": "I-MISC", + "index": 6 + }, + { + "name": "I-ORG", + "index": 7 + }, + { + "name": "I-PER", + "index": 8 + }, + { + "name": "O", + "index": 9 + } +] \ No newline at end of file diff --git a/read_data.py b/read_data.py index 5be3817..93e276d 100644 --- a/read_data.py +++ b/read_data.py @@ -77,9 +77,7 @@ def readKoreanDataAll(): def readEnglishDataAll(): with open(f"{EnglishBase}/valid.txt", encoding="utf-8") as fp: - print("a") dev = readEnglishData(fp) - print("b") with open(f"{EnglishBase}/test.txt", encoding="utf-8") as fp: test = readEnglishData(fp) with open(f"{EnglishBase}/train.txt", encoding="utf-8") as fp: @@ -173,8 +171,6 @@ extracts and stores tags set from the given data. """ if __name__ == "__main__": from tqdm import tqdm - t = TagIdConverter() - train, dev, test = readEnglishDataAll() vocab = set() def getTags(lst: List[Sentence]): @@ -205,5 +201,5 @@ if __name__ == "__main__": tags.append({"name":v,"index":i}) i += 1 print(tags) - #with open("tags.json","w",encoding="utf-8") as fp: - # json.dump(tags,fp,ensure_ascii=False, indent=2) \ No newline at end of file + with open("eng_tags.json","w",encoding="utf-8") as fp: + json.dump(tags,fp,ensure_ascii=False, indent=2) \ No newline at end of file