在这里插入图片描述
fca
) + 最大池化坐标注意力(fcm
)。import torch
import torch.nn as nn
import torch.nn.functional as F
class InvolutionLayer(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size=, padding=, stride=):
"""
实现Involution层
参数:
in_channels: 输入通道数
out_channels: 输出通道数
kernel_size: 卷积核大小 (默认3)
padding: 填充大小 (默认1)
stride: 步长 (默认1)
"""
super(InvolutionLayer, self).__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.kernel_size = kernel_size
self.padding = padding
self.stride = stride
# 核生成器 - 动态生成卷积核
self.kernel_generator = nn.Sequential(
nn.Conv2d(in_channels, in_channels, , groups=in_channels, bias=False),
nn.BatchNorm2d(in_channels),
nn.ReLU(inplace=True),
nn.Conv2d(in_channels, kernel_size * kernel_size * out_channels, , bias=False)
)
# 用于特征变换的1x1卷积
self.feature_transform = nn.Conv2d(in_channels, out_channels, , bias=False)
def forward(self, x):
"""
前向传播
参数:
x: 输入张量 [B, C, H, W]
返回:
输出张量 [B, out_channels, H, W]
"""
batch_size, _, height, width = x.size()
# 1. 生成动态卷积核 [B, K*K*out_channels, H, W]
kernel = self.kernel_generator(x)
# 2. 重塑核 [B, out_channels, K*K, H, W]
kernel = kernel.view(batch_size, self.out_channels,
self.kernel_size * self.kernel_size,
height, width)
# 3. 特征变换 [B, out_channels, H, W]
features = self.feature_transform(x)
# 4. 使用unfold提取局部特征块 [B, out_channels * K*K, H*W]
unfolded = F.unfold(features,
kernel_size=self.kernel_size,
padding=self.padding,
stride=self.stride)
# 5. 重塑特征块 [B, out_channels, K*K, H, W]
unfolded = unfolded.view(batch_size, self.out_channels,
self.kernel_size * self.kernel_size,
height, width)
# 6. 应用动态核 - 逐元素相乘后求和 [B, out_channels, H, W]
output = (unfolded * kernel).sum(dim=)
return output
class Involution(nn.Module):
def __init__(self, in_channels, out_channels, mid_channels):
"""
双Involution层模块
参数:
in_channels: 输入通道数
out_channels: 输出通道数
mid_channels: 中间层通道数
"""
super(Involution, self).__init__()
# 第一层Involution: 输入->中间特征
self.involution1 = InvolutionLayer(in_channels, mid_channels, kernel_size=, padding=)
# 第二层Involution: 中间特征->输出
self.involution2 = InvolutionLayer(mid_channels, out_channels, kernel_size=, padding=)
# 激活函数
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
# 第一层Involution
x = self.involution1(x)
x = self.relu(x)
# 第二层Involution
x = self.involution2(x)
return x
if __name__ == "__main__":
# 定义输入张量大小(Batch、Channel、Height、Wight)
B, C, H, W = , , ,
input_tensor = torch.randn(B, C, H, W) # 随机生成输入张量
dim = C
# 创建 ARConv 实例
block = Involution(in_channels=, out_channels=, mid_channels=)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
sablock = block.to(device)
print(sablock)
input_tensor = input_tensor.to(device)
# 执行前向传播
output = sablock(input_tensor)
# 打印输入和输出的形状
print(f"Input: {input_tensor.shape}")
print(f"Output: {output.shape}")
运行结果:
以下是对Involution 代码的详细解析,包括其核心思想、结构设计和实现细节:
Involution 是一种动态生成卷积核的机制,与传统卷积的区别在于:
# 核生成器(核心创新点)
self.kernel_generator = nn.Sequential(
nn.Conv2d(in_channels, in_channels, , groups=in_channels, bias=False), # 深度可分离卷积
nn.BatchNorm2d(in_channels),
nn.ReLU(inplace=True),
nn.Conv2d(in_channels, kernel_size * kernel_size * out_channels, , bias=False)
)
# 特征变换器
self.feature_transform = nn.Conv2d(in_channels, out_channels, , bias=False)
kernel = self.kernel_generator(x) # [B, K*K*C_out, H, W]
kernel = kernel.view(batch_size, self.out_channels,
self.kernel_size * self.kernel_size,
height, width) # [B, C_out, K*K, H, W]
features = self.feature_transform(x) # [B, C_out, H, W]
unfolded = F.unfold(features, kernel_size, padding, stride) # [B, C_out*K*K, L]
unfolded = unfolded.view(batch_size, self.out_channels,
self.kernel_size * self.kernel_size,
height, width) # [B, C_out, K*K, H, W]
output = (unfolded * kernel).sum(dim=) # [B, C_out, H, W]
class Involution(nn.Module):
def __init__(self, in_channels, out_channels, mid_channels):
self.involution1 = InvolutionLayer(in_channels, mid_channels, kernel_size=)
self.involution2 = InvolutionLayer(mid_channels, out_channels, kernel_size=)
self.relu = nn.ReLU(inplace=True)
mid_channels > in_channels
:形成瓶颈结构(示例中 64→128→64)特性 | 传统卷积 | Involution |
---|---|---|
核生成方式 | 静态学习 | 动态生成 |
参数量 | ² | ² |
计算模式 | 空间不变 | 空间自适应 |
通道处理 | 跨通道混合 | 可分离通道处理 |
# 当前使用kernel_size=1(无空间聚合)
# 可修改为:
self.involution1 = InvolutionLayer(..., kernel_size=, padding=)
def
forward(self, x):
residual = x
x = self.involution1(x)
x = self.relu(x)
x = self.involution2(x)
return x + residual # 添加残差连接
# 在核生成器中加入SE模块
self.se = nn.Sequential(
nn.AdaptiveAvgPool2d(),
nn.Conv2d(in_channels, in_channels//, ),
nn.ReLU(),
nn.Conv2d(in_channels//, in_channels, ),
nn.Sigmoid()
)
Input: [, , , ]
│
├─ InvolutionLayer1 (→)
│ ├─ 生成个x1核 [, , , ]
│ └─ 输出 [, , , ]
│
├─ ReLU激活
│
└─ InvolutionLayer2 (→)
├─ 生成个x1核 [, , , ]
└─ 输出 [, , , ]
该实现的核心价值在于: