目标检测任务重,担心数据集中各标签类别不均衡,想统计XML标注文件中各标注类别的标签数量,可以使用以下脚本:
import os
import glob
import xml.etree.ElementTree as etdef count_labels(source_dir):file_list = glob.glob(os.path.join(source_dir,"*.xml"))labels = {}for file in file_list:tree = et.ElementTree()tree.parse(file)objects = tree.findall(".//object")for object in objects:name = object.find(".//name")label = name.textif label in labels.keys():labels[label] += 1else:labels[label] = 1count_all = 0print(source_dir)for i in labels.keys():print(i,labels[i])count_all += labels[i]print("all_labels : ", count_all)print("all_images : ", len(file_list))if __name__ == '__main__':count_labels("/disk/test_xml")