导 读
本文主要介绍YoloV8自定义姿势关键点检测教程:机械臂关键点姿势跟踪。
背景介绍
下面是通过自定义姿势关键点检测下棋机器人效果:
自定义姿势关键点检测是一种计算机视觉技术,涉及识别和跟踪对象上的特定点或关键点。对于下棋机器人手臂来说,这些关键点可以代表棋子的位置、棋盘的方向,甚至机器人手臂本身的配置。
为什么下棋机器人需要自定义姿态关键点检测?
实现步骤
下面是具体步骤:
【1】使用CVAT标注样本。可以参考下面视频操作:
https://youtu.be/kOlEC30hK7o
Step1:创建项目->任务->导入数据集。
Step2:为项目创建一个骨架,这有助于避免不知疲倦地注释单个图像的过程。确保所有图像的尺寸为 640x640,因为 YOLO Pose 训练仅接受此尺寸。
底部会有一个名为“骨架”的选项,选择您当前的图像并根据您的需要调整点。
Step3:底部会有一个名为“骨架”的选项,选择您当前的图像并根据您的需要调整点。
Step4:单击保存并转到任务部分并以 CocoKeypoints 格式导出任务数据集。
Step5:创建train文件夹并将样本放入子文件夹images和labels中。
Step6:在给定的 python 脚本中运行 json,将 Coco.json 转换为 YOLO 格式。这会自动给出规范化注释。标准化关键点应该在0到1的范围内。否则 Yolo Pose不会进行训练:
import json
import os
def convert_coco_to_yolo(coco_json_path, output_dir, image_width, image_height):
# Load COCO JSON file
with open(coco_json_path, 'r') as f:
coco_data = json.load(f)
# Create output directory if it doesn't exist
if not os.path.exists(output_dir):
os.makedirs(output_dir)
# Iterate over each image in the dataset
for image_data in coco_data['images']:
image_id = image_data['id']
image_name = image_data['file_name']
keypoints_list = []
# Find annotations for the current image
for annotation in coco_data['annotations']:
if annotation['image_id'] == image_id:
keypoints = annotation['keypoints']
keypoints_list.append(keypoints)
# Skip images without annotations
if not keypoints_list:
continue
# Create YOLO annotation file
annotation_file_name = os.path.splitext(image_name)[0] + '.txt'
annotation_file_path = os.path.join(output_dir, annotation_file_name)
with open(annotation_file_path, 'w') as f:
for keypoints in keypoints_list:
# Find bounding box coordinates
x_min = min(keypoints[0::3])
y_min = min(keypoints[1::3])
x_max = max(keypoints[0::3])
y_max = max(keypoints[1::3])
# Normalize bounding box coordinates to range [0, 1]
x_center = (x_min + x_max) / (2 * image_width)
y_center = (y_min + y_max) / (2 * image_height)
width = (x_max - x_min) / image_width
height = (y_max - y_min) / image_height
# Write the annotation to the YOLO file
f.write(f'{0} {round(x_center, 6)} {round(y_center, 6)} {round(width, 6)} {round(height, 6)} ')
# Append normalized keypoints to the annotation
for i in range(0, len(keypoints), 3):
x = round(keypoints[i] / image_width, 6)
y = round(keypoints[i + 1] / image_height, 6)
v = round(keypoints[i + 2], 6)
f.write(f'{x} {y} {v} ')
f.write('\n')
print('Conversion complete.')
# Example usage
coco_json_path = "C:\\Users\\jaikr\\Downloads\\Subset\\annotations\\person_keypoints_default.json"
output_dir = "C:\\Users\\jaikr\\Downloads\\Subset640"
image_width = 640 #Recommended for YOLOV8_Pose
image_height = 640 #Recommended for YOLOV8_Pose
print(output_dir)
convert_coco_to_yolo(coco_json_path, output_dir, image_width, image_height)
#Output format expected for eg img.txt files for each img
0 0.485896 0.332236 0.255865 0.248009 0.357964 0.351019
2 0.545229 0.208231 2 0.613828 0.456241 2
#where
0:class name
0.485896 0.332236 0.255865 0.248009 :Bounding boxes
0.357964 0.351019 2 #first keypoint[0] x,y,visible(whether keypoint will be visible
0.545229 0.208231 2 kpt[1]
0.613828 0.456241 2 kpt[2]
【2】在colab中训练。文件夹结构创建如下:
Main dir
-Train
-images
-labels
-Val
-images
-labels
!pip install ultralytics
from ultralytics import YOLO
# Load a model
model = YOLO('yolov8n-pose.pt') # load a pretrained model (recommended for training)
# Train the model
results = model.train(data='data.yaml', epochs=100, imgsz=640)
Data.yaml should be of this:
#change this to your file structure
train: /content/RobotArm/train/images
val: /content/RobotArm/val/images
test: /content/RobotArm/test/images
nc: 1
names:
0: joints
# Keypoints
kpt_shape: [3, 3] # number of keypoints,number of dims (2 for x,y or 3 for x,y,visible)
#( in my robot arm its 3 kpts,and 3 for visible kpts)
```
names:
0: joints
```
I ve used number/index as if multiple classes were there means we can augment
```
names:
0: joints
1: skeleton
2: face_keypoints
```
For proper mapping of classes used while annoattaion and assigning the,m while training is important in multi class.
【3】在本地使用best.pt进行预测。图片预测:
import matplotlib.pyplot as plt
from ultralytics import YOLO
from PIL import Image, ImageDraw
model = YOLO("C:\\Users\\jaikr\\Downloads\\Final\\best.pt")
results = model.predict(source="C:\\Users\\jaikr\\Downloads\\Subset640\\train\\images\\WIN_20230915_20_27_08_Pro.jpg")
for r in results:
print(r.keypoints)
# this line is changed
keypoints = r.keypoints.xy.int().numpy() # get the keypoints
img_array = r.plot(kpt_line=True, kpt_radius=6) # plot a BGR array of predictions
im = Image.fromarray(img_array[..., ::-1]) # Convert array to a PIL Image
draw = ImageDraw.Draw(im)
draw.line([(keypoints[0][0][0], keypoints[0][0][1]), (keypoints[0][1][0],
keypoints[0][1][1]), (keypoints[0][2][0], keypoints[0][2][1])],
fill=(0, 0,255), width=5)
im.show()
#change the keypoints order and no.of keypoints accordingly
#As keypoints returns a tensor in r.keypoints we convert extract the kptpoints
视频预测:
import os
import cv2
from ultralytics import YOLO
from PIL import Image, ImageDraw
import subprocess
import time # Add the time module
model = YOLO("C:\\Users\\jaikr\\Downloads\\Final\\best.pt")
# Folder containing input images
input_folder = "C:\\Users\\jaikr\\Downloads\\Final\\train\\images"
output_folder = "output_images" # Output folder for saving images
output_video = "output_video.mp4" # Output video file name
# Create the output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)
# Get a list of image files in the input folder
image_files = [f for f in os.listdir(input_folder) if f.endswith((".jpg", ".jpeg", ".png"))]
# Sort the image files to maintain order
image_files.sort()
for idx, image_file in enumerate(image_files):
image_path = os.path.join(input_folder, image_file)
try:
results = model.predict(source=image_path)
for r in results:
print(r.keypoints)
# This line is changed
keypoints = r.keypoints.xy.int().numpy() # Get the keypoints
img_array = r.plot(kpt_line=True, kpt_radius=6) # Plot a BGR array of predictions
im = Image.fromarray(img_array[..., ::-1]) # Convert array to a PIL Image
draw = ImageDraw.Draw(im)
draw.line([(keypoints[0][0][0], keypoints[0][0][1]), (keypoints[0][1][0],
keypoints[0][1][1]),
(keypoints[0][2][0], keypoints[0][2][1])], fill=(255, 0, 0), width=5)
# Save the image with a sequence number
output_path = os.path.join(output_folder, f"output_image_{idx:04d}.png")
im.save(output_path)
print(f"Processed image '{image_file}'.")
except Exception as e:
print(f"Error processing image '{image_file}': {e}")
continue # Continue to the next image if an error occurs
print("Image processing completed.")
# Use OpenCV to create a video from the saved images with a delay of 0.5 seconds
frame_array = []
for i in range(len(image_files)):
img_path = os.path.join(output_folder, f"output_image_{i:04d}.png")
# Check if the image file exists
if not os.path.exists(img_path):
print(f"Image '{img_path}' not found or has an issue with format. Skipping.")
continue
img = cv2.imread(img_path)
height, width, layers = img.shape
size = (width, height)
frame_array.append(img)
out = cv2.VideoWriter(output_video, cv2.VideoWriter_fourcc(*'mp4v'), 30, size)
for i in range(len(frame_array)):
out.write(frame_array[i])
time.sleep(1) # Add a delay of 0.5 seconds between frames
out.release()
print(f"Video '{output_video}' created successfully.")
源码下载:
https://github.com/Jaykumaran/Robotic_Arm_Keypoint_Tracking_YoloV8-Pose/tree/main
本文分享自 OpenCV与AI深度学习 微信公众号,前往查看
如有侵权,请联系 cloudcommunity@tencent.com 删除。
本文参与 腾讯云自媒体同步曝光计划 ,欢迎热爱写作的你一起参与!