isolators-transfer/datasets/xml2txt.py
fly6516 b42afef5d7 feat(datasets): 添加训练和验证数据集的标签文件
- 在 datasets/train/labels 和 datasets/valid/labels 目录下添加了多个标签文件
- 文件命名格式为 [数字]_jpeg.rf.[哈希值].txt
-标签文件内容包含物体的类别和位置信息
- 新增 insulator 相关的标签文件和 XML 注释文件
2025-04-09 19:49:30 +08:00

50 lines
1.5 KiB
Python

import os
import xml.etree.ElementTree as ET
from PIL import Image
voc_dir = "insulator"
annotations_dir = os.path.join(voc_dir, "Annotations")
images_dir = os.path.join(voc_dir, "images")
labels_dir = os.path.join(voc_dir, "labels")
os.makedirs(labels_dir, exist_ok=True)
def convert(size, box):
dw, dh = 1.0 / size[0], 1.0 / size[1]
x = (box[0] + box[1]) / 2.0
y = (box[2] + box[3]) / 2.0
w = box[1] - box[0]
h = box[3] - box[2]
return x * dw, y * dh, w * dw, h * dh
for xml_file in os.listdir(annotations_dir):
if not xml_file.endswith(".xml"):
continue
image_id = os.path.splitext(xml_file)[0]
img_path = os.path.join(images_dir, image_id + ".jpg")
label_path = os.path.join(labels_dir, image_id + ".txt")
if not os.path.exists(img_path):
continue
with Image.open(img_path) as img:
w, h = img.size
tree = ET.parse(os.path.join(annotations_dir, xml_file))
root = tree.getroot()
with open(label_path, "w") as f:
for obj in root.findall("object"):
cls = obj.find("name").text.strip()
if cls != "insulator":
continue # 只保留 insulator 类别
xmlbox = obj.find("bndbox")
b = (
float(xmlbox.find("xmin").text),
float(xmlbox.find("xmax").text),
float(xmlbox.find("ymin").text),
float(xmlbox.find("ymax").text),
)
bb = convert((w, h), b)
f.write(f"1 {' '.join([str(round(v, 6)) for v in bb])}\n")