导 读
Step2:为项目创建一个骨架,这有助于避免不知疲倦地注释单个图像的过程。确保所有图像的尺寸为 640x640,因为 YOLO Pose 训练仅接受此尺寸。
Step4:单击保存并转到任务部分并以 CocoKeypoints 格式导出任务数据集。
Step6:在给定的 python 脚本中运行 json,将 Coco.json 转换为 YOLO 格式。这会自动给出规范化注释。标准化关键点应该在0到1的范围内。否则 Yolo Pose不会进行训练:
import json
import os
def convert_coco_to_yolo(coco_json_path, output_dir, image_width, image_height):
# Load COCO JSON file
with open(coco_json_path, 'r') as f:
coco_data = json.load(f)
# Create output directory if it doesn't exist
if not os.path.exists(output_dir):
# Iterate over each image in the dataset
for image_data in coco_data['images']:
image_id = image_data['id']
image_name = image_data['file_name']
keypoints_list = []
# Find annotations for the current image
for annotation in coco_data['annotations']:
if annotation['image_id'] == image_id:
keypoints = annotation['keypoints']
# Skip images without annotations
if not keypoints_list:
# Create YOLO annotation file
annotation_file_name = os.path.splitext(image_name)[0] + '.txt'
annotation_file_path = os.path.join(output_dir, annotation_file_name)
with open(annotation_file_path, 'w') as f:
for keypoints in keypoints_list:
# Find bounding box coordinates
x_min = min(keypoints[0::3])
y_min = min(keypoints[1::3])
x_max = max(keypoints[0::3])
y_max = max(keypoints[1::3])
# Normalize bounding box coordinates to range [0, 1]
x_center = (x_min + x_max) / (2 * image_width)
y_center = (y_min + y_max) / (2 * image_height)
width = (x_max - x_min) / image_width
height = (y_max - y_min) / image_height
# Write the annotation to the YOLO file
f.write(f'{0} {round(x_center, 6)} {round(y_center, 6)} {round(width, 6)} {round(height, 6)} ')
# Append normalized keypoints to the annotation
for i in range(0, len(keypoints), 3):
x = round(keypoints[i] / image_width, 6)
y = round(keypoints[i + 1] / image_height, 6)
v = round(keypoints[i + 2], 6)
f.write(f'{x} {y} {v} ')
print('Conversion complete.')
# Example usage
coco_json_path = "C:\\Users\\jaikr\\Downloads\\Subset\\annotations\\person_keypoints_default.json"
output_dir = "C:\\Users\\jaikr\\Downloads\\Subset640"
image_width = 640 #Recommended for YOLOV8_Pose
image_height = 640 #Recommended for YOLOV8_Pose
convert_coco_to_yolo(coco_json_path, output_dir, image_width, image_height)
#Output format expected for eg img.txt files for each img
0 0.485896 0.332236 0.255865 0.248009 0.357964 0.351019
2 0.545229 0.208231 2 0.613828 0.456241 2
0:class name
0.485896 0.332236 0.255865 0.248009 :Bounding boxes
0.357964 0.351019 2 #first keypoint[0] x,y,visible(whether keypoint will be visible
0.545229 0.208231 2 kpt[1]
0.613828 0.456241 2 kpt[2]
Main dir
!pip install ultralytics
from ultralytics import YOLO
# Load a model
model = YOLO('yolov8n-pose.pt') # load a pretrained model (recommended for training)
# Train the model
results = model.train(data='data.yaml', epochs=100, imgsz=640)
Data.yaml should be of this:
#change this to your file structure
train: /content/RobotArm/train/images
val: /content/RobotArm/val/images
test: /content/RobotArm/test/images
nc: 1
0: joints
# Keypoints
kpt_shape: [3, 3] # number of keypoints,number of dims (2 for x,y or 3 for x,y,visible)
#( in my robot arm its 3 kpts,and 3 for visible kpts)
0: joints
I ve used number/index as if multiple classes were there means we can augment
0: joints
1: skeleton
2: face_keypoints
For proper mapping of classes used while annoattaion and assigning the,m while training is important in multi class.
import matplotlib.pyplot as plt
from ultralytics import YOLO
from PIL import Image, ImageDraw
model = YOLO("C:\\Users\\jaikr\\Downloads\\Final\\best.pt")
results = model.predict(source="C:\\Users\\jaikr\\Downloads\\Subset640\\train\\images\\WIN_20230915_20_27_08_Pro.jpg")
for r in results:
# this line is changed
keypoints = r.keypoints.xy.int().numpy() # get the keypoints
img_array = r.plot(kpt_line=True, kpt_radius=6) # plot a BGR array of predictions
im = Image.fromarray(img_array[..., ::-1]) # Convert array to a PIL Image
draw = ImageDraw.Draw(im)
draw.line([(keypoints[0][0][0], keypoints[0][0][1]), (keypoints[0][1][0],
keypoints[0][1][1]), (keypoints[0][2][0], keypoints[0][2][1])],
fill=(0, 0,255), width=5)
#change the keypoints order and no.of keypoints accordingly
#As keypoints returns a tensor in r.keypoints we convert extract the kptpoints
import os
import cv2
from ultralytics import YOLO
from PIL import Image, ImageDraw
import subprocess
import time # Add the time module
model = YOLO("C:\\Users\\jaikr\\Downloads\\Final\\best.pt")
# Folder containing input images
input_folder = "C:\\Users\\jaikr\\Downloads\\Final\\train\\images"
output_folder = "output_images" # Output folder for saving images
output_video = "output_video.mp4" # Output video file name
# Create the output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)
# Get a list of image files in the input folder
image_files = [f for f in os.listdir(input_folder) if f.endswith((".jpg", ".jpeg", ".png"))]
# Sort the image files to maintain order
for idx, image_file in enumerate(image_files):
image_path = os.path.join(input_folder, image_file)
results = model.predict(source=image_path)
for r in results:
# This line is changed
keypoints = r.keypoints.xy.int().numpy() # Get the keypoints
img_array = r.plot(kpt_line=True, kpt_radius=6) # Plot a BGR array of predictions
im = Image.fromarray(img_array[..., ::-1]) # Convert array to a PIL Image
draw = ImageDraw.Draw(im)
draw.line([(keypoints[0][0][0], keypoints[0][0][1]), (keypoints[0][1][0],
(keypoints[0][2][0], keypoints[0][2][1])], fill=(255, 0, 0), width=5)
# Save the image with a sequence number
output_path = os.path.join(output_folder, f"output_image_{idx:04d}.png")
print(f"Processed image '{image_file}'.")
except Exception as e:
print(f"Error processing image '{image_file}': {e}")
continue # Continue to the next image if an error occurs
print("Image processing completed.")
# Use OpenCV to create a video from the saved images with a delay of 0.5 seconds
frame_array = []
for i in range(len(image_files)):
img_path = os.path.join(output_folder, f"output_image_{i:04d}.png")
# Check if the image file exists
if not os.path.exists(img_path):
print(f"Image '{img_path}' not found or has an issue with format. Skipping.")
img = cv2.imread(img_path)
height, width, layers = img.shape
size = (width, height)
out = cv2.VideoWriter(output_video, cv2.VideoWriter_fourcc(*'mp4v'), 30, size)
for i in range(len(frame_array)):
time.sleep(1) # Add a delay of 0.5 seconds between frames
print(f"Video '{output_video}' created successfully.")
