Labelme标注数据的一些操作

文章目录

1、删除没有标注的图片
2、对图片重新命名
3、对标注后的数据重新命名
4、两个Labelme数据文件夹合并
5、其他格式的图片转jpg
6、Labelme的json文件中加入imagedata
7、将json中的imagedata转为图像
8、修改标注类别的名字

1、删除没有标注的图片

有些图片没有标注，只有图片没有json文件，需要将多余的图片删除，代码如下：

python">import glob
import  os
imagelist=glob.glob('../images/*.jpg')for i in range(len(imagelist)):if not os.path.exists(imagelist[i].replace("jpg","json")):os.remove(imagelist[i])

或者

python">import glob
import os
import time
# 获取所有JPEG图片文件
jpg_images = glob.glob('./data/*.jpg')
timestr = int(time.time())
# 遍历所有JPEG图片文件
for jpg_img in jpg_images:# 获取图片文件名（不包括扩展名）img_name = os.path.splitext(os.path.basename(jpg_img))[0]# 构造对应的JSON文件路径json_file_path = os.path.join('./data', img_name + '.json')# 检查JSON文件是否存在if os.path.exists(json_file_path):# 如果存在，则保留图片（实际上这里不需要做任何操作，因为图片没有被删除）print(f"保留图片: {jpg_img}, 因为存在对应的JSON文件: {json_file_path}")else:os.remove(jpg_img)print(f"删除图片: {jpg_img}, 因为不存在对应的JSON文件")print("处理完成。")

2、对图片重新命名

标注前，图片名字杂乱，需要对图片重新命名，我使用时间戳对图片重新命名，防止图片的名字重复，代码如下：

python">import glob
import os
import shutil
import time
import randomimaglist=glob.glob('data/*.jpg')
random.shuffle(imaglist)
print(imaglist)
os.makedirs('image_new',exist_ok=True
)
i=0
timesp=int(time.time())
for image_path in imaglist:print(image_path)shutil.copy(image_path,'image_new/'+str(timesp+i)+".jpg")i=i+1

3、对标注后的数据重新命名

数据标注完成后，对标注的数据统一命名，我这里使用时间戳命名，代码如下：

python">
import glob
import os
import time
import jsonimag_dir = "../image_new2/"
# 获取所有JPEG图片文件
jpg_images = glob.glob(os.path.join(imag_dir, '*.jpg'))timestamp = int(time.time())# 将时间戳转换为字符串，以便在文件名中使用# 遍历所有JPEG图片文件
for jpg_img in jpg_images:timestamp=timestamp+1timestamp_str = str(timestamp)# 获取图片文件名（包括路径，但不包括扩展名）img_path, img_name_ext = os.path.split(jpg_img)img_name, img_ext = os.path.splitext(img_name_ext)print(img_ext)# 构造新的图片和JSON文件路径new_img_name = f"{timestamp_str}{img_ext}"new_img_path = os.path.join(img_path, new_img_name)new_json_name = f"{timestamp_str}.json"new_json_path = os.path.join(img_path, new_json_name)# 重命名图片文件os.rename(jpg_img, new_img_path)# 检查JSON文件是否存在，如果存在则更新它json_path = os.path.join(img_path, f"{img_name}.json")if os.path.exists(json_path):try:with open(json_path, 'r', encoding='utf-8') as json_file:json_data = json.load(json_file)# 更新JSON文件中的imagePath字段（假设它是相对于imag_dir的路径）if 'imagePath' in json_data:# 注意：这里需要处理imagePath是完整路径还是相对路径的情况# 如果imagePath是完整路径，则可能需要更复杂的逻辑来更新它# 这里假设imagePath只是文件名（不包括目录），因此直接替换json_data['imagePath'] = new_img_name# 将更新后的JSON数据写回文件with open(new_json_path, 'w', encoding='utf-8') as json_file:json.dump(json_data, json_file, ensure_ascii=False, indent=4)# 可选：删除旧的JSON文件（如果不需要保留的话）os.remove(json_path)except json.JSONDecodeError:print(f"Error decoding JSON file: {json_path}")except Exception as e:print(f"An error occurred while processing {json_path}: {e}")else:print(f"No JSON file found for image: {jpg_img}")print("Renaming completed.")

4、两个Labelme数据文件夹合并

图片由两个人共同标注，每个人关注不同的类别，这时候就需要对数据做合并，两个标注好的数据图片存在重叠，但是类别不存在重叠，需要将同一张图片的两个json文件合并，代码如下：

python">import os
import json
import shutildef merge_json_annotations(json_obj1, json_obj2, image_id):# 假设每个JSON对象都有一个'shapes'字段，包含所有的标注形状shapes1 = json_obj1.get('shapes', [])shapes2 = json_obj2.get('shapes', [])# 合并shapes数组merged_shapes = shapes1 + shapes2# 创建一个新的JSON对象，将合并后的shapes放入其中merged_json = {'version': json_obj1.get('version', '4.5.13'),  # 假设版本号是相同的，或者你可以根据需要选择'flags': {},  # 这个字段可能为空，根据你的JSON结构来决定是否保留或合并'shapes': merged_shapes,'imagePath': json_obj1.get('imagePath', json_obj2.get('imagePath', '')),  # 保留一个有效的imagePath'imageData': json_obj1['imageData'],  # 这个字段通常包含图像的Base64编码，但在合并时可能不需要'imageHeight': json_obj1.get('imageHeight', json_obj2.get('imageHeight', 0)),'imageWidth': json_obj1.get('imageWidth', json_obj2.get('imageWidth', 0)),'lineColor':json_obj1.get('lineColor'),'fillColor':json_obj1.get('fillColor'),# 可能还有其他字段需要合并，比如'type'（通常是'annotation'），这里省略了}# 如果JSON对象中有额外的字段需要合并，可以在这里添加逻辑# 比如合并'region_shape_attributes'等自定义字段# 为了确保唯一性，可以根据需要修改shape的ID或其他唯一标识符# 这个例子中没有处理ID冲突，因为具体的处理逻辑取决于你的需求return merged_jsondef copy_or_merge_files(src_dir1, src_dir2, dst_dir, image_extension='.jpg', json_extension='.json'):if not os.path.exists(dst_dir):os.makedirs(dst_dir)# 使用一个集合来跟踪已经处理的图片文件名，以避免重复处理processed_images = set()for src_dir in [src_dir1, src_dir2]:print(src_dir)for root, dirs, files in os.walk(src_dir):for file in files:print(file)file_extension = os.path.splitext(file)[1]if file_extension == json_extension:base_name = os.path.splitext(file)[0]image_file = os.path.join(root, base_name + image_extension)# 确保图片文件存在，因为JSON文件是基于图片文件的if os.path.exists(image_file):# 读取JSON文件with open(os.path.join(root, file), 'r', encoding='utf-8') as f:json_obj = json.load(f)# 在目标目录中查找同名的图片和JSON文件dst_image_file = os.path.join(dst_dir, base_name + image_extension)dst_json_file = os.path.join(dst_dir, base_name + json_extension)# 如果目标目录中已经有同名的图片文件if os.path.exists(dst_image_file):# 检查是否存在同名的JSON文件，并合并内容if os.path.exists(dst_json_file):with open(dst_json_file, 'r', encoding='utf-8') as f:existing_json_obj = json.load(f)# 合并JSON对象merged_json_obj = merge_json_annotations(existing_json_obj, json_obj, base_name)# 写入合并后的JSON文件with open(dst_json_file, 'w', encoding='utf-8') as f:json.dump(merged_json_obj, f, ensure_ascii=False, indent=4)print(f"Merged JSON file: {dst_json_file}")else:# 如果目标目录中没有同名的JSON文件，则直接复制新的JSON文件shutil.copy2(os.path.join(root, file), dst_json_file)print(f"Copied JSON file: {os.path.join(root, file)} to {dst_json_file}")else:# 如果目标目录中没有同名的图片文件，则复制图片和JSON文件shutil.copy2(image_file, dst_image_file)shutil.copy2(os.path.join(root, file), dst_json_file)print(f"Copied image and JSON files: {image_file} and {os.path.join(root, file)} to {dst_dir}")elif file_extension == image_extension:# 如果只遇到图片文件而没有对应的JSON文件，可以选择跳过或根据需要处理print(f"Skipped image file without corresponding JSON: {file}")def main():src_dir1 = 'image_new1'src_dir2 = 'image_new'dst_dir = 'image_new2'copy_or_merge_files(src_dir1, src_dir2, dst_dir)if __name__ == "__main__":main()

5、其他格式的图片转jpg

文件夹中存到多种图片，将其他格式的图片转为jpg，如果转换失败则删除，代码如下：

python"># 将其他格式的图片转为jpg格式的图片，如果不能转换则删除。
import os
from PIL import Imagedef convert_images_to_jpg(directory):# 遍历目录中的所有文件for root, dirs, files in os.walk(directory):for file in files:# 获取文件的完整路径file_path = os.path.join(root, file)# 获取文件的扩展名_, file_extension = os.path.splitext(file)file_extension = file_extension.lower()  # 转换为小写进行比较# 判断文件是否为非 JPG 图像文件if file_extension in ['.png', '.bmp', '.gif', '.tiff', '.jpeg'] and file_extension != '.jpg':try:# 打开图像文件with Image.open(file_path) as img:# 构造新的 JPG 文件路径（将原文件的扩展名替换为 .jpg）jpg_file_path = os.path.splitext(file_path)[0] + '.jpg'# 保存为 JPG 格式img.save(jpg_file_path, 'JPEG')print(f"Converted {file_path} to {jpg_file_path}")os.remove(file_path)except Exception as e:os.remove(file_path)print(f"Failed to convert {file_path}: {e}")# 指定要处理的目录路径
directory_path = 'data'
convert_images_to_jpg(directory_path)

6、Labelme的json文件中加入imagedata

json文件中没有imagedata的时候，需要加入，代码如下：

from labelme import utils# 指定包含Labelme JSON文件的目录
json_dir = "./data/"# 遍历目录中的所有JSON文件
for json_filename in os.listdir(json_dir):if json_filename.endswith(".json"):json_path = os.path.join(json_dir, json_filename)# 读取JSON文件with open(json_path, 'r', encoding='utf-8') as f:json_data = json.load(f)# 从JSON文件中提取图像文件名（假设它存储在imagePath字段中，且是相对于某个目录的路径）# 注意：这里需要根据您的实际情况调整imagePath的处理逻辑# 如果imagePath是完整路径，您可能需要使用os.path.basename来获取文件名# 如果imagePath是相对路径，您可能需要将其与某个基础目录拼接起来以获取完整路径# 这里假设imagePath只是文件名（不包括目录），并且图像文件与JSON文件在同一目录下# （这通常不是Labelme的默认行为，但在此示例中我们这样假设）image_filename = json_data.get('imagePath', '').split('/')[-1]  # 假设imagePath是文件名或包含文件名的路径的一部分image_path = os.path.join(json_dir, image_filename)  # 假设图像文件与JSON文件在同一目录下# 检查图像文件是否存在if os.path.isfile(image_path):# 将Base64字符串添加到JSON数据中json_data['imageData'] = base64.b64encode(open(image_path, "rb").read()).decode('utf-8')# 将更新后的JSON数据写回文件with open(json_path, 'w', encoding='utf-8') as f:json.dump(json_data, f, ensure_ascii=False, indent=4)else:print(f"Warning: Image file not found for {json_filename}")print("Processing completed.")

7、将json中的imagedata转为图像

标注数据的图片丢失了，只有json文件的时候，我们可以通过json中的imagedata获取图片，代码如下：

python">import json
import os
import base64
from PIL import Image
from io import BytesIO# 假设这是包含所有Labelme JSON文件的目录
json_directory = './data/'# 遍历目录中的所有文件
for filename in os.listdir(json_directory):if filename.endswith('.json'):# 构建JSON文件的完整路径json_path = os.path.join(json_directory, filename)# 读取JSON文件with open(json_path, 'r') as f:data = json.load(f)# 检查是否有imageData字段if 'imageData' in data:# 提取Base64编码的图像数据image_data_b64 = data['imageData']# 解码Base64数据为字节串image_data_bytes = base64.b64decode(image_data_b64)# 使用PIL将字节串转换为图像对象image = Image.open(BytesIO(image_data_bytes))# 构建输出图片的路径（使用与JSON文件相同的名字，但扩展名为.png）image_path = os.path.splitext(json_path)[0] + '.jpg'# 检查图片文件是否已经存在if not os.path.exists(image_path):# 保存图像image.save(image_path)print(f"Saved image from {json_path} to {image_path}")else:print(f"Image already exists: {image_path}")else:print(f"No imageData in {json_path}")

8、修改标注类别的名字

python">import glob
import jsondef replace_category_in_json(json_path, old_category, new_category):# 读取JSON文件with open(json_path, 'r', encoding='utf-8') as f:data = json.load(f)# 遍历shapes并替换类别for shape in data['shapes']:if shape['label'] == old_category:shape['label'] = new_category# 将修改后的数据写回到同一个JSON文件with open(json_path, 'w', encoding='utf-8') as f:json.dump(data, f, ensure_ascii=False, indent=4)# 使用示例
json_list=glob.glob('../image_new/*.json')
old_category='person'
new_category='head'
for i in range(len(json_list)):json_file=json_list[i]replace_category_in_json(json_file, old_category, new_category)