YOLOv4目标检测实战指南

🎙️ 语音朗读 当前: 晓晓 (温柔女声)

YOLOv4概述

YOLOv4结合了大量最新tricks,在速度和精度上达到最佳平衡。

核心改进

graph TB
    A[YOLOv4] --> B[Backbone]
    A --> C[Neck]
    A --> D[Head]
    B --> E[CSPDarknet53]
    C --> F[SPP]
    C --> G[PAN]
    D --> H[YOLO Head]

实现代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import torch
import torch.nn as nn

class Conv(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size=1, stride=1, padding=0):
super().__init__()
self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, bias=False)
self.bn = nn.BatchNorm2d(out_channels)
self.act = nn.LeakyReLU(0.1)

def forward(self, x):
return self.act(self.bn(self.conv(x)))

class CSPBlock(nn.Module):
def __init__(self, in_channels, out_channels):
super().__init__()
hidden_channels = out_channels // 2

self.conv1 = Conv(in_channels, hidden_channels, 1)
self.conv2 = Conv(in_channels, hidden_channels, 1)
self.conv3 = Conv(2 * hidden_channels, out_channels, 1)

def forward(self, x):
x1 = self.conv1(x)
x2 = self.conv2(x)
x = torch.cat([x1, x2], dim=1)
return self.conv3(x)

class YOLOv4(nn.Module):
def __init__(self, num_classes=80):
super().__init__()
self.backbone = nn.Sequential(
Conv(3, 32, 3, 1, 1),
Conv(32, 64, 3, 2, 1),
CSPBlock(64, 64),
Conv(64, 128, 3, 2, 1),
CSPBlock(128, 128),
Conv(128, 256, 3, 2, 1),
CSPBlock(256, 256),
Conv(256, 512, 3, 2, 1),
CSPBlock(512, 512),
Conv(512, 1024, 3, 2, 1),
CSPBlock(1024, 1024),
)

def forward(self, x):
return self.backbone(x)

使用预训练模型

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import cv2
import numpy as np

# 加载模型
net = cv2.dnn.readNetFromDarknet('yolov4.cfg', 'yolov4.weights')
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)

# 读取图像
image = cv2.imread('image.jpg')
blob = cv2.dnn.blobFromImage(image, 1/255.0, (416, 416), swapRB=True)
net.setInput(blob)

# 获取输出层
layer_names = net.getLayerNames()
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
outs = net.forward(output_layers)

# 解析检测结果
class_ids = []
confidences = []
boxes = []

for out in outs:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.5:
center_x, center_y, width, height = detection[:4]
x = int(center_x - width / 2)
y = int(center_y - height / 2)
boxes.append([x, y, int(width), int(height)])
confidences.append(float(confidence))
class_ids.append(class_id)

性能对比

模型 FPS mAP
YOLOv4 65 43.5%
YOLOv4-Tiny 370 40.2%
EfficientDet-D3 16 45.8%

总结

YOLOv4是实时目标检测的标杆模型。

© 2019-2026 ovo$^{mc^2}$ All Rights Reserved. | 站点总访问 28969 次 | 访客 19045
Theme by hiero