MobileNetV3:轻量级网络的极致优化

🎙️ 语音朗读 当前: 晓晓 (温柔女声)

前言

MobileNetV3是Google在2019年发布的轻量级神经网络,通过NAS(神经架构搜索)和多种优化技术,在保持高精度的同时实现了极致的效率和速度。

MobileNetV3核心组件

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import torch
import torch.nn as nn
import torch.nn.functional as F

class HardSwish(nn.Module):
"""HardSwish激活函数"""
def forward(self, x):
return x * F.relu6(x + 3) / 6

class HardSigmoid(nn.Module):
"""HardSigmoid"""
def forward(self, x):
return F.relu6(x + 3) / 6

class SqueezeExcitation(nn.Module):
"""Squeeze-and-Excitation模块"""

def __init__(self, in_channels, reduced_dim):
super().__init__()
self.se = nn.Sequential(
nn.AdaptiveAvgPool2d(1),
nn.Conv2d(in_channels, reduced_dim, 1),
HardSwish(),
nn.Conv2d(reduced_dim, in_channels, 1),
HardSigmoid()
)

def forward(self, x):
return x * self.se(x)

class Bneck(nn.Module):
"""MobileNetV3的基本模块"""

def __init__(self, in_channels, out_channels, kernel_size=3,
stride=1, expanded_dim=16, se=True, nl='RE'):
super().__init__()

self.stride = stride
self.use_residual = (stride == 1 and in_channels == out_channels)

# 非线性激活函数
if nl == 'RE':
self.nl = nn.ReLU(inplace=True)
elif nl == 'HS':
self.nl = HardSwish()

# 逐点卷积扩展
self.expanded_conv = nn.Sequential(
nn.Conv2d(in_channels, expanded_dim, 1, bias=False),
nn.BatchNorm2d(expanded_dim),
self.nl
)

# 深度可分离卷积
self.depthwise_conv = nn.Sequential(
nn.Conv2d(
expanded_dim, expanded_dim, kernel_size,
stride, (kernel_size - 1) // 2, groups=expanded_dim, bias=False
),
nn.BatchNorm2d(expanded_dim),
self.nl
)

# SE模块
if se:
reduced_dim = max(1, expanded_dim // 4)
self.se = SqueezeExcitation(expanded_dim, reduced_dim)
else:
self.se = nn.Identity()

# 投影层
self.project_conv = nn.Sequential(
nn.Conv2d(expanded_dim, out_channels, 1, bias=False),
nn.BatchNorm2d(out_channels)
)

def forward(self, x):
identity = x

# 扩展
x = self.expanded_conv(x)

# 深度卷积
x = self.depthwise_conv(x)

# SE注意力
x = self.se(x)

# 投影
x = self.project_conv(x)

# 残差连接
if self.use_residual:
return x
else:
return x

MobileNetV3整体架构

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
class MobileNetV3Large(nn.Module):
"""MobileNetV3-Large"""

def __init__(self, num_classes=1000, dropout=0.0005):
super().__init__()

# Stem
self.stem = nn.Sequential(
nn.Conv2d(3, 16, 3, stride=2, padding=1, bias=False),
nn.BatchNorm2d(16),
HardSwish()
)

# 配置: (out_channels, kernel_size, stride, expanded_dim, se, nl)
config = [
# Stage 1
(16, 3, 1, 16, False, 'RE'),
# Stage 2
(24, 3, 2, 64, False, 'RE'),
(24, 3, 1, 72, False, 'RE'),
# Stage 3
(40, 5, 2, 72, True, 'RE'),
(40, 5, 1, 120, True, 'RE'),
(40, 5, 1, 120, True, 'RE'),
# Stage 4
(80, 3, 2, 240, False, 'HS'),
(80, 3, 1, 200, False, 'HS'),
(80, 3, 1, 184, False, 'HS'),
(80, 3, 1, 184, False, 'HS'),
# Stage 5
(112, 3, 1, 480, True, 'HS'),
(112, 3, 1, 672, True, 'HS'),
# Stage 6
(160, 5, 2, 672, True, 'HS'),
(160, 5, 1, 960, True, 'HS'),
(160, 5, 1, 960, True, 'HS'),
]

# 构建bottleneck层
layers = []
for out_ch, kernel, stride, exp_ch, se, nl in config:
layers.append(Bneck(
in_channels=16 if len(layers) == 0 else config[len(layers)-1][0],
out_channels=out_ch,
kernel_size=kernel,
stride=stride,
expanded_dim=exp_ch,
se=se,
nl=nl
))

self.bottlenecks = nn.Sequential(*layers)

# 最后的卷积
self.final_conv = nn.Sequential(
nn.Conv2d(160, 960, 1, bias=False),
nn.BatchNorm2d(960),
HardSwish()
)

# 全局池化和分类器
self.avgpool = nn.AdaptiveAvgPool2d(1)
self.classifier = nn.Sequential(
nn.Dropout(dropout),
nn.Linear(960, 1280),
HardSwish(),
nn.Dropout(dropout),
nn.Linear(1280, num_classes)
)

self._initialize_weights()

def forward(self, x):
x = self.stem(x)
x = self.bottlenecks(x)
x = self.final_conv(x)
x = self.avgpool(x)
x = x.view(x.size(0), -1)
x = self.classifier(x)
return x

def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out')
if m.bias is not None:
nn.init.zeros_(m.bias)
elif isinstance(m, nn.BatchNorm2d):
nn.init.ones_(m.weight)
nn.init.zeros_(m.bias)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
if m.bias is not None:
nn.init.zeros_(m.bias)

class MobileNetV3Small(nn.Module):
"""MobileNetV3-Small(更轻量)"""

def __init__(self, num_classes=1000, dropout=0.0005):
super().__init__()

self.stem = nn.Sequential(
nn.Conv2d(3, 16, 3, stride=2, padding=1, bias=False),
nn.BatchNorm2d(16),
HardSwish()
)

config = [
(16, 3, 2, 16, True, 'RE'),
(24, 3, 2, 72, False, 'RE'),
(24, 3, 1, 88, False, 'RE'),
(40, 5, 2, 96, True, 'HS'),
(40, 5, 1, 240, True, 'HS'),
(40, 5, 1, 240, True, 'HS'),
(48, 5, 1, 120, True, 'HS'),
(48, 5, 1, 144, True, 'HS'),
(96, 5, 2, 288, True, 'HS'),
(96, 5, 1, 576, True, 'HS'),
(96, 5, 1, 576, True, 'HS'),
]

layers = []
for out_ch, kernel, stride, exp_ch, se, nl in config:
layers.append(Bneck(
in_channels=16 if len(layers) == 0 else config[len(layers)-1][0],
out_channels=out_ch,
kernel_size=kernel,
stride=stride,
expanded_dim=exp_ch,
se=se,
nl=nl
))

self.bottlenecks = nn.Sequential(*layers)

self.final_conv = nn.Sequential(
nn.Conv2d(96, 576, 1, bias=False),
nn.BatchNorm2d(576),
HardSwish()
)

self.avgpool = nn.AdaptiveAvgPool2d(1)
self.classifier = nn.Sequential(
nn.Dropout(dropout),
nn.Linear(576, 1024),
HardSwish(),
nn.Dropout(dropout),
nn.Linear(1024, num_classes)
)

训练与评估

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
def train_mobilenetv3(model, train_loader, criterion, optimizer, device, epochs=300):
"""MobileNetV3训练"""
model = model.to(device)

for epoch in range(epochs):
model.train()
running_loss = 0.0
correct = 0
total = 0

for inputs, labels in train_loader:
inputs, labels = inputs.to(device), labels.to(device)

optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()

running_loss += loss.item()
_, predicted = outputs.max(1)
total += labels.size(0)
correct += predicted.eq(labels).sum().item()

accuracy = 100. * correct / total
print(f"Epoch {epoch+1}: Loss={running_loss/len(train_loader):.4f}, Acc={accuracy:.2f}%")

def evaluate_mobilenetv3(model, test_loader, device):
"""MobileNetV3评估"""
model.eval()
correct = 0
total = 0

with torch.no_grad():
for inputs, labels in test_loader:
inputs, labels = inputs.to(device), labels.to(device)
outputs = model(inputs)
_, predicted = outputs.max(1)
total += labels.size(0)
correct += predicted.eq(labels).sum().item()

return 100. * correct / total

MobileNetV3特性对比

特性 MobileNetV1 MobileNetV2 MobileNetV3
深度可分离卷积
线性瓶颈 -
倒残差 -
SE注意力 - -
Hard-Swish - -
NAS搜索 - -

模型量化和部署

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
def quantize_mobilenetv3(model, dataloader):
"""量化MobileNetV3"""
model.eval()

# 动态量化
quantized_model = torch.quantization.quantize_dynamic(
model,
{nn.Conv2d, nn.Linear},
dtype=torch.qint8
)

return quantized_model

def export_to_tflite(model, input_shape=(1, 3, 224, 224)):
"""导出为TensorFlow Lite格式"""
# 1. 导出为ONNX
dummy_input = torch.randn(input_shape)
torch.onnx.export(
model, dummy_input, "mobilenetv3.onnx",
export_params=True, opset_version=11,
do_constant_folding=True,
input_names=['input'],
output_names=['output']
)

# 2. 使用ONNX TensorFlow转换
# onnx_tf.backend.prepare(onnx_model)
# tf_model = onnx_tf.backend.tfgraph_def_from_onnx_model(onnx_model)

# 3. 转换为TensorFlow Lite
# converter = tf.lite.TFLiteConverter.from_session(tf_session,...)
# tflite_model = converter.convert()

MobileNetV3应用场景

  • 移动端图像分类:手机APP中的实时识别
  • 边缘设备:IoT设备上的AI推理
  • 自动驾驶:车载系统的目标检测
  • 视频监控:低功耗设备的视频分析

总结

MobileNetV3通过NAS搜索、SE注意力、Hard-Swish激活等多种优化技术,实现了轻量级网络在精度和速度上的最佳平衡,为移动端和边缘设备的深度学习应用提供了优秀的基础模型。

参考资源

© 2019-2026 ovo$^{mc^2}$ All Rights Reserved. | 站点总访问 28969 次 | 访客 19045
Theme by hiero