22 lines
458 B
Python
22 lines
458 B
Python
|
from dataset import readPreporcssedDataAll
|
||
|
from tqdm import tqdm
|
||
|
|
||
|
|
||
|
"""
|
||
|
count outside tokens(O tokens)
|
||
|
"""
|
||
|
if __name__ == "__main__":
|
||
|
|
||
|
train, _, _ = readPreporcssedDataAll()
|
||
|
|
||
|
total_l = 0
|
||
|
total_o = 0
|
||
|
|
||
|
for item in tqdm(train):
|
||
|
entities = item["entity"]
|
||
|
l = len(entities)
|
||
|
o = sum(map(lambda x: 1 if x == "O" else 0,entities))
|
||
|
total_l += l
|
||
|
total_o += o
|
||
|
|
||
|
print(f"{total_o}/{total_l} = {total_o/total_l}")
|