22 lines
458 B
Python
22 lines
458 B
Python
from dataset import readPreporcssedDataAll
|
|
from tqdm import tqdm
|
|
|
|
|
|
"""
|
|
count outside tokens(O tokens)
|
|
"""
|
|
if __name__ == "__main__":
|
|
|
|
train, _, _ = readPreporcssedDataAll()
|
|
|
|
total_l = 0
|
|
total_o = 0
|
|
|
|
for item in tqdm(train):
|
|
entities = item["entity"]
|
|
l = len(entities)
|
|
o = sum(map(lambda x: 1 if x == "O" else 0,entities))
|
|
total_l += l
|
|
total_o += o
|
|
|
|
print(f"{total_o}/{total_l} = {total_o/total_l}") |