

图像分割是数字图像处理的核心技术之一,它的本质是将图像划分为具有语义意义的多个区域,使得每个区域内部的像素具有相似的特征(如灰度、颜色、纹理等),而不同区域之间的特征存在显著差异。从应用角度来说,图像分割是目标检测、图像识别、图像理解等高级视觉任务的基础 —— 比如医学影像中肿瘤区域的分割、自动驾驶中道路和车辆的分割、工业质检中缺陷区域的分割,都离不开这一技术。

本章将系统讲解图像分割的核心方法,从基础的边缘检测、阈值处理,到区域生长、聚类分割,再到图割、分水岭等进阶算法,每个知识点都配套可直接运行的 Python 代码和效果对比,帮你彻底吃透图像分割的原理与实战。


图像分割的核心目标是将图像划分为互不重叠的子区域,满足:
常用的分割思路可分为三类:
点、线、边缘是图像中最基础的特征:
检测的核心原理是差分算子(计算灰度梯度),常用算子包括:Roberts、Prewitt、Sobel、Laplacian、Canny 等。

import cv2
import numpy as np
import matplotlib.pyplot as plt
# 设置中文字体(解决matplotlib中文显示问题)
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
# 1. 读取图像并转为灰度图
img = cv2.imread('test_img.jpg') # 替换为你的图像路径
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 2. 定义3x3邻域的孤立点检测函数
def detect_isolated_points(gray_img, threshold=30):
"""
检测图像中的孤立点
:param gray_img: 灰度图像
:param threshold: 灰度差阈值
:return: 标记孤立点的图像
"""
h, w = gray_img.shape
result = np.zeros_like(gray_img) # 初始化结果图像
# 遍历图像(跳过边界)
for i in range(1, h-1):
for j in range(1, w-1):
# 获取3x3邻域
neighborhood = gray_img[i-1:i+2, j-1:j+2]
# 计算中心像素与邻域的最大灰度差
center_pixel = gray_img[i, j]
max_diff = np.max(np.abs(neighborhood - center_pixel))
# 超过阈值则标记为孤立点(255)
if max_diff > threshold:
result[i, j] = 255
return result
# 3. 执行检测
isolated_points = detect_isolated_points(gray, threshold=30)
# 4. 可视化对比
plt.figure(figsize=(12, 6))
# 原图
plt.subplot(1, 2, 1)
plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
plt.title('原始图像')
plt.axis('off')
# 孤立点检测结果
plt.subplot(1, 2, 2)
plt.imshow(isolated_points, cmap='gray')
plt.title('孤立点检测结果')
plt.axis('off')
plt.show()

线检测使用方向模板(如水平、垂直、45°、135°)与图像卷积,响应值超过阈值则判定为对应方向的线。常用模板:

import cv2
import numpy as np
import matplotlib.pyplot as plt
# 设置中文字体
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
# 1. 读取图像并转为灰度图
img = cv2.imread('test_img.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 图像降噪(高斯模糊)
gray_blur = cv2.GaussianBlur(gray, (3, 3), 0)
# 2. 定义方向线检测模板
# 水平、垂直、45°、135°模板
horizontal_kernel = np.array([[1, 1, 1], [-1, -1, -1], [1, 1, 1]], dtype=np.float32)
vertical_kernel = np.array([[1, -1, 1], [1, -1, 1], [1, -1, 1]], dtype=np.float32)
angle45_kernel = np.array([[1, 1, -1], [1, -1, 1], [-1, 1, 1]], dtype=np.float32)
angle135_kernel = np.array([[-1, 1, 1], [1, -1, 1], [1, 1, -1]], dtype=np.float32)
# 3. 卷积计算响应值
horizontal_lines = cv2.filter2D(gray_blur, -1, horizontal_kernel)
vertical_lines = cv2.filter2D(gray_blur, -1, vertical_kernel)
angle45_lines = cv2.filter2D(gray_blur, -1, angle45_kernel)
angle135_lines = cv2.filter2D(gray_blur, -1, angle135_kernel)
# 4. 阈值处理(提取响应值高的线)
threshold = 100
horizontal_lines_bin = np.where(horizontal_lines > threshold, 255, 0).astype(np.uint8)
vertical_lines_bin = np.where(vertical_lines > threshold, 255, 0).astype(np.uint8)
angle45_lines_bin = np.where(angle45_lines > threshold, 255, 0).astype(np.uint8)
angle135_lines_bin = np.where(angle135_lines > threshold, 255, 0).astype(np.uint8)
# 5. 可视化对比
plt.figure(figsize=(16, 12))
# 原图
plt.subplot(2, 3, 1)
plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
plt.title('原始图像')
plt.axis('off')
# 水平线
plt.subplot(2, 3, 2)
plt.imshow(horizontal_lines_bin, cmap='gray')
plt.title('水平线检测')
plt.axis('off')
# 垂直线
plt.subplot(2, 3, 3)
plt.imshow(vertical_lines_bin, cmap='gray')
plt.title('垂直线检测')
plt.axis('off')
# 45°线
plt.subplot(2, 3, 4)
plt.imshow(angle45_lines_bin, cmap='gray')
plt.title('45°线检测')
plt.axis('off')
# 135°线
plt.subplot(2, 3, 5)
plt.imshow(angle135_lines_bin, cmap='gray')
plt.title('135°线检测')
plt.axis('off')
# 所有线合并
all_lines = cv2.bitwise_or(cv2.bitwise_or(horizontal_lines_bin, vertical_lines_bin),
cv2.bitwise_or(angle45_lines_bin, angle135_lines_bin))
plt.subplot(2, 3, 6)
plt.imshow(all_lines, cmap='gray')
plt.title('所有方向线合并')
plt.axis('off')
plt.show()

边缘的灰度分布可分为三类:
数学上,边缘的强度由梯度描述:

基本边缘检测算子包括:
import cv2
import numpy as np
import matplotlib.pyplot as plt
# 设置中文字体
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
# 1. 读取图像并预处理
img = cv2.imread('test_img.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray_blur = cv2.GaussianBlur(gray, (3, 3), 0) # 降噪
# 2. 不同边缘检测算子实现
# Roberts算子
roberts_x = np.array([[1, 0], [0, -1]], dtype=np.float32)
roberts_y = np.array([[0, 1], [-1, 0]], dtype=np.float32)
roberts_x_edge = cv2.filter2D(gray_blur, -1, roberts_x)
roberts_y_edge = cv2.filter2D(gray_blur, -1, roberts_y)
roberts_edge = cv2.addWeighted(roberts_x_edge, 0.5, roberts_y_edge, 0.5, 0)
# Prewitt算子
prewitt_x = np.array([[-1, 0, 1], [-1, 0, 1], [-1, 0, 1]], dtype=np.float32)
prewitt_y = np.array([[-1, -1, -1], [0, 0, 0], [1, 1, 1]], dtype=np.float32)
prewitt_x_edge = cv2.filter2D(gray_blur, -1, prewitt_x)
prewitt_y_edge = cv2.filter2D(gray_blur, -1, prewitt_y)
prewitt_edge = cv2.addWeighted(prewitt_x_edge, 0.5, prewitt_y_edge, 0.5, 0)
# Sobel算子
sobel_x = cv2.Sobel(gray_blur, cv2.CV_64F, 1, 0, ksize=3)
sobel_y = cv2.Sobel(gray_blur, cv2.CV_64F, 0, 1, ksize=3)
sobel_edge = cv2.magnitude(sobel_x, sobel_y)
sobel_edge = np.uint8(np.clip(sobel_edge, 0, 255)) # 归一化
# 3. 阈值处理(二值化)
threshold = 50
roberts_edge_bin = np.where(roberts_edge > threshold, 255, 0).astype(np.uint8)
prewitt_edge_bin = np.where(prewitt_edge > threshold, 255, 0).astype(np.uint8)
sobel_edge_bin = np.where(sobel_edge > threshold, 255, 0).astype(np.uint8)
# 4. 可视化对比
plt.figure(figsize=(18, 12))
# 原图
plt.subplot(2, 2, 1)
plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
plt.title('原始图像')
plt.axis('off')
# Roberts
plt.subplot(2, 2, 2)
plt.imshow(roberts_edge_bin, cmap='gray')
plt.title('Roberts算子边缘检测')
plt.axis('off')
# Prewitt
plt.subplot(2, 2, 3)
plt.imshow(prewitt_edge_bin, cmap='gray')
plt.title('Prewitt算子边缘检测')
plt.axis('off')
# Sobel
plt.subplot(2, 2, 4)
plt.imshow(sobel_edge_bin, cmap='gray')
plt.title('Sobel算子边缘检测')
plt.axis('off')
plt.show()


Canny 边缘检测是业界标准,分为 4 步:
import cv2
import numpy as np
import matplotlib.pyplot as plt
# 设置中文字体
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
# 1. 读取图像并预处理
img = cv2.imread('test_img.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 2. Canny边缘检测(不同阈值对比)
# 低阈值组合
canny_low = cv2.Canny(gray, 50, 100)
# 高阈值组合
canny_high = cv2.Canny(gray, 100, 200)
# 3. 与Sobel对比
sobel_edge = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
sobel_edge = np.uint8(np.clip(np.abs(sobel_edge), 0, 255))
# 4. 可视化对比
plt.figure(figsize=(18, 6))
# 原图
plt.subplot(1, 4, 1)
plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
plt.title('原始图像')
plt.axis('off')
# Sobel
plt.subplot(1, 4, 2)
plt.imshow(sobel_edge, cmap='gray')
plt.title('Sobel边缘检测')
plt.axis('off')
# Canny低阈值
plt.subplot(1, 4, 3)
plt.imshow(canny_low, cmap='gray')
plt.title('Canny(低阈值 50/100)')
plt.axis('off')
# Canny高阈值
plt.subplot(1, 4, 4)
plt.imshow(canny_high, cmap='gray')
plt.title('Canny(高阈值 100/200)')
plt.axis('off')
plt.show()

边缘连接的核心是基于边缘的梯度方向和灰度相似性,将离散的边缘点连接成连续的边缘。常用方法:
import cv2
import numpy as np
import matplotlib.pyplot as plt
# 设置中文字体
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
# 1. 读取图像并提取Canny边缘
img = cv2.imread('test_img.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
canny_edge = cv2.Canny(gray, 50, 150)
# 2. 霍夫直线变换(连接边缘为直线)
# 标准霍夫变换
lines = cv2.HoughLines(canny_edge, 1, np.pi/180, threshold=100)
# 绘制霍夫直线
hough_lines = img.copy()
if lines is not None:
for line in lines:
rho, theta = line[0]
a = np.cos(theta)
b = np.sin(theta)
x0 = a * rho
y0 = b * rho
x1 = int(x0 + 1000 * (-b))
y1 = int(y0 + 1000 * (a))
x2 = int(x0 - 1000 * (-b))
y2 = int(y0 - 1000 * (a))
cv2.line(hough_lines, (x1, y1), (x2, y2), (0, 0, 255), 2)
# 3. 概率霍夫变换(更高效)
lines_p = cv2.HoughLinesP(canny_edge, 1, np.pi/180, threshold=50, minLineLength=50, maxLineGap=10)
hough_lines_p = img.copy()
if lines_p is not None:
for line in lines_p:
x1, y1, x2, y2 = line[0]
cv2.line(hough_lines_p, (x1, y1), (x2, y2), (0, 255, 0), 2)
# 4. 可视化对比
plt.figure(figsize=(18, 6))
# Canny边缘
plt.subplot(1, 3, 1)
plt.imshow(canny_edge, cmap='gray')
plt.title('Canny离散边缘')
plt.axis('off')
# 标准霍夫变换
plt.subplot(1, 3, 2)
plt.imshow(cv2.cvtColor(hough_lines, cv2.COLOR_BGR2RGB))
plt.title('标准霍夫变换(红色直线)')
plt.axis('off')
# 概率霍夫变换
plt.subplot(1, 3, 3)
plt.imshow(cv2.cvtColor(hough_lines_p, cv2.COLOR_BGR2RGB))
plt.title('概率霍夫变换(绿色直线)')
plt.axis('off')
plt.show()

阈值处理是最经典的分割方法,核心是根据灰度值将像素分为两类(前景 / 背景):

阈值处理的分类:
手动设定一个全局阈值,将图像二值化。关键是选择合适的阈值(可通过灰度直方图确定)。
import cv2
import numpy as np
import matplotlib.pyplot as plt
# 设置中文字体
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
# 1. 读取图像并转为灰度图
img = cv2.imread('test_img.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 2. 手动设定不同全局阈值
thresholds = [80, 120, 160]
binary_imgs = []
for t in thresholds:
_, binary = cv2.threshold(gray, t, 255, cv2.THRESH_BINARY)
binary_imgs.append(binary)
# 3. 绘制灰度直方图
hist = cv2.calcHist([gray], [0], None, [256], [0, 256])
# 4. 可视化对比
plt.figure(figsize=(18, 10))
# 原图
plt.subplot(2, 2, 1)
plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
plt.title('原始图像')
plt.axis('off')
# 灰度直方图
plt.subplot(2, 2, 2)
plt.plot(hist, color='black')
plt.title('灰度直方图')
plt.xlabel('灰度值')
plt.ylabel('像素数量')
plt.axvline(x=80, color='r', linestyle='--', label='阈值80')
plt.axvline(x=120, color='g', linestyle='--', label='阈值120')
plt.axvline(x=160, color='b', linestyle='--', label='阈值160')
plt.legend()
# 阈值80
plt.subplot(2, 2, 3)
plt.imshow(binary_imgs[0], cmap='gray')
plt.title(f'全局阈值 T={thresholds[0]}')
plt.axis('off')
# 阈值120
plt.subplot(2, 2, 4)
plt.imshow(binary_imgs[1], cmap='gray')
plt.title(f'全局阈值 T={thresholds[1]}')
plt.axis('off')
# 阈值160(可新增子图)
plt.figure(figsize=(6, 6))
plt.imshow(binary_imgs[2], cmap='gray')
plt.title(f'全局阈值 T={thresholds[2]}')
plt.axis('off')
plt.show()


import cv2
import numpy as np
import matplotlib.pyplot as plt
# 设置中文字体
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
# 1. 读取图像并转为灰度图
img = cv2.imread('test_img.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 2. 手动阈值 vs Otsu阈值
# 手动阈值(120)
_, binary_manual = cv2.threshold(gray, 120, 255, cv2.THRESH_BINARY)
# Otsu自动阈值
otsu_threshold, binary_otsu = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# 3. 可视化对比
plt.figure(figsize=(18, 6))
# 原图
plt.subplot(1, 3, 1)
plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
plt.title('原始图像')
plt.axis('off')
# 手动阈值
plt.subplot(1, 3, 2)
plt.imshow(binary_manual, cmap='gray')
plt.title(f'手动阈值 T=120')
plt.axis('off')
# Otsu阈值
plt.subplot(1, 3, 3)
plt.imshow(binary_otsu, cmap='gray')
plt.title(f'Otsu最优阈值 T={otsu_threshold:.1f}')
plt.axis('off')
plt.show()
# 输出Otsu计算的阈值
print(f'Otsu自动计算的最优阈值:{otsu_threshold}')
噪声会导致灰度直方图模糊,影响阈值选择。先对图像进行平滑(高斯模糊、中值滤波),再进行阈值处理,可提升分割效果。
import cv2
import numpy as np
import matplotlib.pyplot as plt
# 设置中文字体
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
# 1. 读取图像并添加噪声(模拟噪声场景)
img = cv2.imread('test_img.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 添加椒盐噪声
noise = np.random.choice([0, 255], size=gray.shape, p=[0.95, 0.05])
gray_noisy = gray.copy()
gray_noisy[noise == 255] = 255
gray_noisy[noise == 0] = 0
# 2. 不同处理方式对比
# 直接Otsu阈值(含噪声)
_, binary_noisy = cv2.threshold(gray_noisy, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# 高斯模糊+Otsu
gray_blur = cv2.GaussianBlur(gray_noisy, (5, 5), 0)
_, binary_blur = cv2.threshold(gray_blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# 中值滤波+Otsu
gray_median = cv2.medianBlur(gray_noisy, 5)
_, binary_median = cv2.threshold(gray_median, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# 3. 可视化对比
plt.figure(figsize=(18, 12))
# 含噪声图像
plt.subplot(2, 2, 1)
plt.imshow(gray_noisy, cmap='gray')
plt.title('含椒盐噪声的图像')
plt.axis('off')
# 直接Otsu
plt.subplot(2, 2, 2)
plt.imshow(binary_noisy, cmap='gray')
plt.title('直接Otsu阈值(含噪声)')
plt.axis('off')
# 高斯模糊+Otsu
plt.subplot(2, 2, 3)
plt.imshow(binary_blur, cmap='gray')
plt.title('高斯模糊+Otsu阈值')
plt.axis('off')
# 中值滤波+Otsu
plt.subplot(2, 2, 4)
plt.imshow(binary_median, cmap='gray')
plt.title('中值滤波+Otsu阈值')
plt.axis('off')
plt.show()
结合边缘信息优化阈值:先检测边缘,再根据边缘区域的灰度分布调整阈值,避免阈值过高 / 过低导致边缘丢失。
import cv2
import numpy as np
import matplotlib.pyplot as plt
# 设置中文字体
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
# 1. 读取图像并预处理
img = cv2.imread('test_img.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 2. 步骤1:检测边缘
canny_edge = cv2.Canny(gray, 50, 150)
# 步骤2:提取边缘区域的灰度值
edge_pixels = gray[canny_edge > 0]
# 步骤3:计算边缘区域的灰度均值作为阈值参考
edge_mean = np.mean(edge_pixels) if len(edge_pixels) > 0 else 128
# 步骤4:结合Otsu和边缘均值的阈值
_, binary_otsu = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
combined_threshold = (edge_mean + otsu_threshold) / 2
_, binary_combined = cv2.threshold(gray, combined_threshold, 255, cv2.THRESH_BINARY)
# 3. 可视化对比
plt.figure(figsize=(18, 6))
# 原图
plt.subplot(1, 3, 1)
plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
plt.title('原始图像')
plt.axis('off')
# 纯Otsu
plt.subplot(1, 3, 2)
plt.imshow(binary_otsu, cmap='gray')
plt.title(f'纯Otsu阈值 T={otsu_threshold:.1f}')
plt.axis('off')
# 边缘改进阈值
plt.subplot(1, 3, 3)
plt.imshow(binary_combined, cmap='gray')
plt.title(f'边缘改进阈值 T={combined_threshold:.1f}')
plt.axis('off')
plt.show()
多阈值处理使用多个阈值将图像分为多个灰度区域:

import cv2
import numpy as np
import matplotlib.pyplot as plt
# 设置中文字体
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
# 1. 读取图像并转为灰度图
img = cv2.imread('test_img.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 2. 多阈值处理
T1 = 80
T2 = 160
# 初始化多阈值图像
multi_threshold = np.zeros_like(gray)
# 赋值:0, 128, 255
multi_threshold[(gray > T1) & (gray <= T2)] = 128
multi_threshold[gray > T2] = 255
# 3. 对比二值阈值
_, binary = cv2.threshold(gray, 120, 255, cv2.THRESH_BINARY)
# 4. 可视化对比
plt.figure(figsize=(18, 6))
# 原图
plt.subplot(1, 3, 1)
plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
plt.title('原始图像')
plt.axis('off')
# 二值阈值
plt.subplot(1, 3, 2)
plt.imshow(binary, cmap='gray')
plt.title('二值阈值(T=120)')
plt.axis('off')
# 多阈值
plt.subplot(1, 3, 3)
plt.imshow(multi_threshold, cmap='gray')
plt.title(f'多阈值(T1={T1}, T2={T2})')
plt.axis('off')
plt.show()
局部阈值(自适应阈值)为每个像素的邻域计算单独的阈值,适用于光照不均匀的图像:

import cv2
import numpy as np
import matplotlib.pyplot as plt
# 设置中文字体
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
# 1. 读取光照不均匀的图像
img = cv2.imread('uneven_light.jpg') # 替换为光照不均匀的图像
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 2. 全局阈值 vs 局部阈值
# 全局Otsu
_, binary_global = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# 局部自适应阈值(均值法)
binary_local = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
cv2.THRESH_BINARY, 15, 5)
# 局部自适应阈值(高斯法)
binary_local_gauss = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, 15, 5)
# 3. 可视化对比
plt.figure(figsize=(18, 12))
# 原图
plt.subplot(2, 2, 1)
plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
plt.title('光照不均匀的原始图像')
plt.axis('off')
# 全局阈值
plt.subplot(2, 2, 2)
plt.imshow(binary_global, cmap='gray')
plt.title('全局Otsu阈值')
plt.axis('off')
# 局部均值阈值
plt.subplot(2, 2, 3)
plt.imshow(binary_local, cmap='gray')
plt.title('局部自适应阈值(均值法)')
plt.axis('off')
# 局部高斯阈值
plt.subplot(2, 2, 4)
plt.imshow(binary_local_gauss, cmap='gray')
plt.title('局部自适应阈值(高斯法)')
plt.axis('off')
plt.show()

区域生长的核心是从种子点出发,将邻域中满足灰度相似性的像素合并到种子区域,步骤:
import cv2
import numpy as np
import matplotlib.pyplot as plt
# 设置中文字体
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
def region_growing(gray_img, seed, threshold=10):
"""
区域生长算法实现
:param gray_img: 灰度图像
:param seed: 种子点 (x, y)
:param threshold: 灰度差阈值
:return: 区域生长后的二值图像
"""
h, w = gray_img.shape
# 初始化结果图像和访问标记
result = np.zeros_like(gray_img)
visited = np.zeros_like(gray_img, dtype=bool)
# 种子点灰度值
seed_val = gray_img[seed[1], seed[0]]
# 8邻域
directions = [(-1, -1), (-1, 0), (-1, 1),
(0, -1), (0, 1),
(1, -1), (1, 0), (1, 1)]
# 队列(BFS)
queue = [seed]
visited[seed[1], seed[0]] = True
result[seed[1], seed[0]] = 255
while queue:
x, y = queue.pop(0)
# 遍历8邻域
for dx, dy in directions:
nx = x + dx
ny = y + dy
# 检查边界
if 0 <= nx < w and 0 <= ny < h and not visited[ny, nx]:
# 检查灰度差
if abs(gray_img[ny, nx] - seed_val) < threshold:
result[ny, nx] = 255
visited[ny, nx] = True
queue.append((nx, ny))
return result
# 1. 读取图像并转为灰度图
img = cv2.imread('test_img.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray_blur = cv2.GaussianBlur(gray, (3, 3), 0) # 降噪
# 2. 选择种子点(手动指定,可根据图像调整)
seed_point = (100, 100) # (x, y)
# 3. 区域生长
# 不同阈值对比
rg_low = region_growing(gray_blur, seed_point, threshold=5)
rg_high = region_growing(gray_blur, seed_point, threshold=20)
# 4. 可视化对比
plt.figure(figsize=(18, 6))
# 原图(标记种子点)
plt.subplot(1, 3, 1)
img_seed = img.copy()
cv2.circle(img_seed, seed_point, 5, (0, 0, 255), -1) # 红色标记种子点
plt.imshow(cv2.cvtColor(img_seed, cv2.COLOR_BGR2RGB))
plt.title(f'原始图像(种子点:{seed_point})')
plt.axis('off')
# 低阈值
plt.subplot(1, 3, 2)
plt.imshow(rg_low, cmap='gray')
plt.title('区域生长(阈值=5)')
plt.axis('off')
# 高阈值
plt.subplot(1, 3, 3)
plt.imshow(rg_high, cmap='gray')
plt.title('区域生长(阈值=20)')
plt.axis('off')
plt.show()
区域分离与聚合是 “自顶向下” 的分割方法:
一致性准则:区域内灰度标准差 < 阈值。
import cv2
import numpy as np
import matplotlib.pyplot as plt
# 设置中文字体
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
def split_and_merge(gray_img, threshold=15):
"""
区域分离与聚合算法
:param gray_img: 灰度图像(尺寸需为2^n x 2^n,本例中先裁剪)
:param threshold: 灰度标准差阈值
:return: 分割后的图像
"""
h, w = gray_img.shape
# 裁剪为2^n x 2^n(简化实现)
size = min(2**int(np.log2(h)), 2**int(np.log2(w)))
gray = gray_img[:size, :size]
result = np.zeros_like(gray)
def split(x, y, size):
"""递归分割"""
region = gray[y:y+size, x:x+size]
# 计算区域灰度标准差
std = np.std(region)
if std < threshold or size == 1:
# 满足一致性,填充均值
result[y:y+size, x:x+size] = np.mean(region)
return
# 否则分割为4个子区域
half = size // 2
split(x, y, half)
split(x+half, y, half)
split(x, y+half, half)
split(x+half, y+half, half)
# 从整个图像开始分割
split(0, 0, size)
# 聚合(合并相邻相似区域,简化实现:阈值内的区域合并)
# 此处省略复杂聚合逻辑,仅展示分割效果
return result
# 1. 读取图像并转为灰度图
img = cv2.imread('test_img.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 2. 区域分离与聚合
# 不同阈值对比
sam_low = split_and_merge(gray, threshold=10)
sam_high = split_and_merge(gray, threshold=30)
# 3. 可视化对比
plt.figure(figsize=(18, 6))
# 原图
plt.subplot(1, 3, 1)
plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
plt.title('原始图像')
plt.axis('off')
# 低阈值
plt.subplot(1, 3, 2)
plt.imshow(sam_low, cmap='gray')
plt.title('分离聚合(阈值=10)')
plt.axis('off')
# 高阈值
plt.subplot(1, 3, 3)
plt.imshow(sam_high, cmap='gray')
plt.title('分离聚合(阈值=30)')
plt.axis('off')
plt.show()
k 均值聚类将像素的灰度 / 颜色特征作为样本,聚为 k 类,每类对应一个分割区域:
import cv2
import numpy as np
import matplotlib.pyplot as plt
# 设置中文字体
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
# 1. 读取图像(支持彩色/灰度)
img = cv2.imread('test_img.jpg')
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # 转为RGB
# 2. 预处理:将图像转为样本矩阵
# 彩色图像:(h*w, 3),灰度图像:(h*w, 1)
h, w, c = img_rgb.shape
pixels = img_rgb.reshape((-1, c)).astype(np.float32)
# 3. k均值聚类
# 参数设置
k = 3 # 聚类数(可调整)
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 100, 0.2)
# 执行聚类
_, labels, centers = cv2.kmeans(pixels, k, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS)
# 4. 重构分割图像
centers = np.uint8(centers)
segmented_img = centers[labels.flatten()]
segmented_img = segmented_img.reshape(img_rgb.shape)
# 5. 不同k值对比
k2_labels, k2_centers = cv2.kmeans(pixels, 2, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS)[1:]
k2_img = np.uint8(k2_centers)[k2_labels.flatten()].reshape(img_rgb.shape)
k4_labels, k4_centers = cv2.kmeans(pixels, 4, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS)[1:]
k4_img = np.uint8(k4_centers)[k4_labels.flatten()].reshape(img_rgb.shape)
# 6. 可视化对比
plt.figure(figsize=(18, 6))
# 原图
plt.subplot(1, 4, 1)
plt.imshow(img_rgb)
plt.title('原始图像')
plt.axis('off')
# k=2
plt.subplot(1, 4, 2)
plt.imshow(k2_img)
plt.title('k均值聚类(k=2)')
plt.axis('off')
# k=3
plt.subplot(1, 4, 3)
plt.imshow(segmented_img)
plt.title('k均值聚类(k=3)')
plt.axis('off')
# k=4
plt.subplot(1, 4, 4)
plt.imshow(k4_img)
plt.title('k均值聚类(k=4)')
plt.axis('off')
plt.show()

超像素是将图像划分为具有相似特征的像素块(超像素),每个超像素是一个基本分割单元,常用算法:SLIC(简单线性迭代聚类)。
import cv2
import numpy as np
import matplotlib.pyplot as plt
# 设置中文字体
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
# 1. 读取图像并转为RGB
img = cv2.imread('test_img.jpg')
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
# 2. SLIC超像素分割
# 创建SLIC对象
slic = cv2.ximgproc.createSuperpixelSLIC(img, algorithm=cv2.ximgproc.SLICO, region_size=20, ruler=10.0)
# 迭代计算超像素
slic.iterate(10)
# 获取超像素标签
labels = slic.getLabels()
# 获取超像素边界
mask = slic.getLabelContourMask()
# 绘制边界
img_slic = img_rgb.copy()
img_slic[mask != 0] = [255, 0, 0] # 红色边界
# 3. 不同region_size对比
slic_small = cv2.ximgproc.createSuperpixelSLIC(img, algorithm=cv2.ximgproc.SLICO, region_size=10, ruler=10.0)
slic_small.iterate(10)
mask_small = slic_small.getLabelContourMask()
img_slic_small = img_rgb.copy()
img_slic_small[mask_small != 0] = [255, 0, 0]
slic_large = cv2.ximgproc.createSuperpixelSLIC(img, algorithm=cv2.ximgproc.SLICO, region_size=30, ruler=10.0)
slic_large.iterate(10)
mask_large = slic_large.getLabelContourMask()
img_slic_large = img_rgb.copy()
img_slic_large[mask_large != 0] = [255, 0, 0]
# 4. 可视化对比
plt.figure(figsize=(18, 6))
# 原图
plt.subplot(1, 4, 1)
plt.imshow(img_rgb)
plt.title('原始图像')
plt.axis('off')
# region_size=10
plt.subplot(1, 4, 2)
plt.imshow(img_slic_small)
plt.title('超像素(region_size=10)')
plt.axis('off')
# region_size=20
plt.subplot(1, 4, 3)
plt.imshow(img_slic)
plt.title('超像素(region_size=20)')
plt.axis('off')
# region_size=30
plt.subplot(1, 4, 4)
plt.imshow(img_slic_large)
plt.title('超像素(region_size=30)')
plt.axis('off')
plt.show()

将图像建模为无向图G=(V,E):
最小割将图分为源点集(前景)和汇点集(背景),满足割边的权重和最小,同时保证分割的连通性。
常用算法:Max-Flow Min-Cut(最大流最小割),通过求解最大流得到最小割。
import cv2
import numpy as np
import matplotlib.pyplot as plt
# 设置中文字体
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
# 1. 读取图像并初始化
img = cv2.imread('test_img.jpg')
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
# 初始化掩码(0=背景,1=前景,2=可能背景,3=可能前景)
mask = np.zeros(img.shape[:2], np.uint8)
# 初始化前景/背景模型
bgdModel = np.zeros((1, 65), np.float64)
fgdModel = np.zeros((1, 65), np.float64)
# 2. 定义感兴趣区域(ROI),手动框选目标区域
# 格式:(x, y, w, h),需根据图像调整
rect = (50, 50, 300, 300)
# 3. 执行GrabCut(图割分割)
cv2.grabCut(img, mask, rect, bgdModel, fgdModel, 5, cv2.GC_INIT_WITH_RECT)
# 4. 生成分割掩码
# 掩码值:0(背景)、2(可能背景)→ 背景;1(前景)、3(可能前景)→ 前景
mask2 = np.where((mask == 2) | (mask == 0), 0, 1).astype('uint8')
img_grabcut = img_rgb * mask2[:, :, np.newaxis]
# 5. 手动调整掩码(可选,优化分割效果)
# 此处模拟手动标记前景/背景
# mask[100:200, 100:200] = 1 # 标记前景
# mask[0:50, 0:50] = 0 # 标记背景
# cv2.grabCut(img, mask, None, bgdModel, fgdModel, 5, cv2.GC_INIT_WITH_MASK)
# 6. 可视化对比
plt.figure(figsize=(18, 6))
# 原图(标记ROI)
plt.subplot(1, 3, 1)
img_rect = img_rgb.copy()
cv2.rectangle(img_rect, (rect[0], rect[1]), (rect[0]+rect[2], rect[1]+rect[3]), (255, 0, 0), 2)
plt.imshow(img_rect)
plt.title('原始图像(ROI框)')
plt.axis('off')
# 掩码
plt.subplot(1, 3, 2)
plt.imshow(mask2, cmap='gray')
plt.title('分割掩码')
plt.axis('off')
# 分割结果
plt.subplot(1, 3, 3)
plt.imshow(img_grabcut)
plt.title('GrabCut图割分割结果')
plt.axis('off')
plt.show()

分水岭算法将图像视为地形:
问题:噪声会导致过度分割(大量小区域),解决方案:标记控制的分水岭。
水坝是分割边界,通过形态学操作(膨胀、腐蚀)构建,避免不同区域的水混合。
通过手动 / 自动标记前景和背景,避免过度分割:
import cv2
import numpy as np
import matplotlib.pyplot as plt
# 设置中文字体
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
# 1. 读取图像并预处理
img = cv2.imread('test_img.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 降噪
gray_blur = cv2.GaussianBlur(gray, (5, 5), 0)
# 阈值处理(二值化)
_, binary = cv2.threshold(gray_blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
# 2. 形态学操作去除噪声
kernel = np.ones((3, 3), np.uint8)
opening = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel, iterations=2)
# 3. 确定背景区域(膨胀)
sure_bg = cv2.dilate(opening, kernel, iterations=3)
# 4. 确定前景区域(距离变换+阈值)
dist_transform = cv2.distanceTransform(opening, cv2.DIST_L2, 5)
_, sure_fg = cv2.threshold(dist_transform, 0.7*dist_transform.max(), 255, 0)
sure_fg = np.uint8(sure_fg)
# 5. 未知区域(背景-前景)
unknown = cv2.subtract(sure_bg, sure_fg)
# 6. 标记连通区域
_, markers = cv2.connectedComponents(sure_fg)
# 标记+1(避免背景标记为0)
markers = markers + 1
# 未知区域标记为0
markers[unknown == 255] = 0
# 7. 执行分水岭算法
markers = cv2.watershed(img, markers)
img_watershed = img.copy()
# 分割边界标记为红色
img_watershed[markers == -1] = [255, 0, 0]
# 8. 可视化对比
plt.figure(figsize=(18, 12))
# 原图
plt.subplot(2, 3, 1)
plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
plt.title('原始图像')
plt.axis('off')
# 二值图像
plt.subplot(2, 3, 2)
plt.imshow(binary, cmap='gray')
plt.title('二值化图像')
plt.axis('off')
# 确定背景
plt.subplot(2, 3, 3)
plt.imshow(sure_bg, cmap='gray')
plt.title('确定背景')
plt.axis('off')
# 确定前景
plt.subplot(2, 3, 4)
plt.imshow(sure_fg, cmap='gray')
plt.title('确定前景')
plt.axis('off')
# 标记图
plt.subplot(2, 3, 5)
plt.imshow(markers, cmap='jet')
plt.title('标记图')
plt.axis('off')
# 分水岭结果
plt.subplot(2, 3, 6)
plt.imshow(cv2.cvtColor(img_watershed, cv2.COLOR_BGR2RGB))
plt.title('分水岭分割结果(红色边界)')
plt.axis('off')
plt.show()

空间域运动分割基于像素的帧间灰度变化,常用方法:
import cv2
import numpy as np
import matplotlib.pyplot as plt
# 设置中文字体
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
# 1. 读取图像并校验(添加路径校验+统一尺寸)
def read_and_resize_image(img_path, target_size=(400, 300)):
"""
读取图像并统一尺寸
:param img_path: 图像路径
:param target_size: 目标尺寸 (宽, 高)
:return: 读取并缩放后的BGR图像
"""
img = cv2.imread(img_path)
if img is None:
raise Exception(f"无法读取图像文件,请检查路径:{img_path}")
# 缩放图像到统一尺寸(INTER_CUBIC:高质量缩放)
img_resized = cv2.resize(img, target_size, interpolation=cv2.INTER_CUBIC)
return img_resized
# 图像路径(替换为你的实际路径,优先使用绝对路径)
frame1_path = "../picture/1.jpg"
frame2_path = "../picture/Flower.png"
# 统一目标尺寸(宽400,高300,可根据需求调整)
target_size = (400, 300)
# 读取并缩放图像
frame1 = read_and_resize_image(frame1_path, target_size)
frame2 = read_and_resize_image(frame2_path, target_size)
# 转为灰度图
gray1 = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)
gray2 = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY)
# 2. 帧差法实现运动检测
# 计算帧间灰度差
diff = cv2.absdiff(gray1, gray2)
# 高斯模糊降噪(平滑灰度差图像,减少噪点)
diff_blur = cv2.GaussianBlur(diff, (5, 5), 0)
# 二值化处理(提取灰度差较大的区域,即潜在运动区域)
_, motion_mask = cv2.threshold(diff_blur, 30, 255, cv2.THRESH_BINARY)
# 形态学闭运算(先膨胀后腐蚀,填充运动区域内部空洞,连接相邻小区域)
kernel = np.ones((5, 5), np.uint8)
motion_mask = cv2.morphologyEx(motion_mask, cv2.MORPH_CLOSE, kernel)
# 3. 绘制运动区域(在帧2上用红色标记)
frame_motion = frame2.copy()
frame_motion[motion_mask == 255] = [0, 0, 255] # BGR格式:红色
# 4. 可视化对比
plt.figure(figsize=(18, 6))
# 帧1(已缩放)
plt.subplot(1, 4, 1)
plt.imshow(cv2.cvtColor(frame1, cv2.COLOR_BGR2RGB))
plt.title('帧1(统一尺寸后)')
plt.axis('off')
# 帧2(已缩放)
plt.subplot(1, 4, 2)
plt.imshow(cv2.cvtColor(frame2, cv2.COLOR_BGR2RGB))
plt.title('帧2(统一尺寸后)')
plt.axis('off')
# 帧间灰度差
plt.subplot(1, 4, 3)
plt.imshow(diff, cmap='gray')
plt.title('帧间灰度差')
plt.axis('off')
# 运动区域标记
plt.subplot(1, 4, 4)
plt.imshow(cv2.cvtColor(frame_motion, cv2.COLOR_BGR2RGB))
plt.title('运动区域(红色标记)')
plt.axis('off')
plt.tight_layout() # 调整布局,避免标题重叠
plt.show()
# 输出图像尺寸信息,验证统一效果
print(f"帧1尺寸:{frame1.shape[:2]}(高×宽)")
print(f"帧2尺寸:{frame2.shape[:2]}(高×宽)")
print(f"运动区域像素数量:{np.sum(motion_mask == 255)}")

频率域运动分割基于傅里叶变换,将运动信息转换为频率特征:

本章系统讲解了图像分割的核心方法: