ner-study/count_o.py

22 lines
458 B
Python

from dataset import readPreporcssedDataAll
from tqdm import tqdm
"""
count outside tokens(O tokens)
"""
if __name__ == "__main__":
train, _, _ = readPreporcssedDataAll()
total_l = 0
total_o = 0
for item in tqdm(train):
entities = item["entity"]
l = len(entities)
o = sum(map(lambda x: 1 if x == "O" else 0,entities))
total_l += l
total_o += o
print(f"{total_o}/{total_l} = {total_o/total_l}")