from dataset import readPreporcssedDataAll from tqdm import tqdm """ count outside tokens(O tokens) """ if __name__ == "__main__": train, _, _ = readPreporcssedDataAll() total_l = 0 total_o = 0 for item in tqdm(train): entities = item["entity"] l = len(entities) o = sum(map(lambda x: 1 if x == "O" else 0,entities)) total_l += l total_o += o print(f"{total_o}/{total_l} = {total_o/total_l}")