PyTorch入门实战:动态图与自动求导

🎙️ 语音朗读 当前: 晓晓 (温柔女声)

PyTorch入门实战:动态图与自动求导

PyTorch以其动态计算图和Pythonic的设计风格,成为学术界最受欢迎的深度学习框架。

张量操作

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import torch

# 创建张量
a = torch.tensor([1, 2, 3]) # 从列表创建
b = torch.zeros(3, 4) # 全零张量
c = torch.ones(2, 3) # 全一张量
d = torch.randn(3, 3) # 标准正态分布
e = torch.arange(0, 10, step=2) # 等差数列
f = torch.linspace(0, 1, steps=5) # 等间距

# GPU支持
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
x = torch.randn(3, 3).to(device)

# 张量运算
x = torch.randn(2, 3)
y = torch.randn(2, 3)

print(x + y) # 加法
print(x * y) # 逐元素乘法
print(torch.matmul(x, y.T)) # 矩阵乘法
print(x.sum(dim=1)) # 按行求和
print(x.mean(dim=0)) # 按列均值

# 形状操作
z = torch.randn(2, 3, 4)
print(z.view(2, 12)) # 改变形状
print(z.reshape(6, 4)) # 改变形状
print(z.permute(2, 0, 1)) # 维度交换
print(z.unsqueeze(1)) # 增加维度

自动求导(Autograd)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# requires_grad=True 启用梯度追踪
x = torch.tensor([2.0, 3.0], requires_grad=True)

# 前向计算
y = x ** 2
z = y.sum()

# 反向传播
z.backward()

print(f"梯度: {x.grad}") # dz/dx = 2*x = [4, 6]

# 更复杂的计算图
w = torch.randn(3, 2, requires_grad=True)
b = torch.randn(2, requires_grad=True)
x = torch.randn(1, 3)

y = torch.matmul(x, w) + b
loss = y.pow(2).sum()

loss.backward()
print(f"w梯度: {w.grad.shape}")
print(f"b梯度: {b.grad.shape}")

# 禁用梯度追踪(推理时使用)
with torch.no_grad():
y = torch.matmul(x, w) + b

构建神经网络

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.fc1 = nn.Linear(784, 256)
self.fc2 = nn.Linear(256, 128)
self.fc3 = nn.Linear(128, 10)
self.dropout = nn.Dropout(0.3)

def forward(self, x):
x = x.view(-1, 784)
x = F.relu(self.fc1(x))
x = self.dropout(x)
x = F.relu(self.fc2(x))
x = self.dropout(x)
x = self.fc3(x)
return x

model = Net()
print(model)

# 查看参数
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"总参数量: {total_params:,}")
print(f"可训练参数量: {trainable_params:,}")

训练完整流程

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import torch.optim as optim
from torchvision import datasets, transforms

# 数据加载
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])

train_dataset = datasets.MNIST('./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST('./data', train=False, transform=transform)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1000, shuffle=False)

# 模型、损失函数、优化器
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Net().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 训练循环
for epoch in range(10):
model.train()
train_loss = 0.0
correct = 0
total = 0

for data, target in train_loader:
data, target = data.to(device), target.to(device)

optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
optimizer.step()

train_loss += loss.item()
_, predicted = output.max(1)
total += target.size(0)
correct += predicted.eq(target).sum().item()

# 测试
model.eval()
test_loss = 0.0
test_correct = 0
test_total = 0

with torch.no_grad():
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data)
test_loss += criterion(output, target).item()
_, predicted = output.max(1)
test_total += target.size(0)
test_correct += predicted.eq(target).sum().item()

print(f'Epoch {epoch+1}: '
f'Train Loss={train_loss/len(train_loader):.4f}, '
f'Train Acc={100.*correct/total:.2f}%, '
f'Test Loss={test_loss/len(test_loader):.4f}, '
f'Test Acc={100.*test_correct/test_total:.2f}%')

模型保存与加载

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
# 保存整个模型
torch.save(model, 'model_full.pth')
loaded_model = torch.load('model_full.pth')

# 只保存权重(推荐)
torch.save(model.state_dict(), 'model_weights.pth')
model = Net()
model.load_state_dict(torch.load('model_weights.pth'))
model.eval()

# 保存训练检查点
checkpoint = {
'epoch': epoch,
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'loss': loss,
}
torch.save(checkpoint, 'checkpoint.pth')

自定义Dataset

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
from torch.utils.data import Dataset, DataLoader

class CustomDataset(Dataset):
def __init__(self, data, labels, transform=None):
self.data = data
self.labels = labels
self.transform = transform

def __len__(self):
return len(self.data)

def __getitem__(self, idx):
sample = self.data[idx]
label = self.labels[idx]

if self.transform:
sample = self.transform(sample)

return sample, label

# 使用自定义Dataset
dataset = CustomDataset(X_data, y_data)
loader = DataLoader(dataset, batch_size=32, shuffle=True, num_workers=4)

学习率调度

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
from torch.optim.lr_scheduler import StepLR, CosineAnnealingLR, ReduceLROnPlateau

# 阶梯衰减
scheduler = StepLR(optimizer, step_size=10, gamma=0.1)

# 余弦退火
scheduler = CosineAnnealingLR(optimizer, T_max=50)

# 自适应
scheduler = ReduceLROnPlateau(optimizer, mode='min', patience=5, factor=0.5)

# 在训练循环中使用
for epoch in range(num_epochs):
train(...)
val_loss = validate(...)
scheduler.step() # 或 scheduler.step(val_loss)

总结

PyTorch以其动态计算图和Pythonic的设计,提供了极大的灵活性。Autograd自动求导系统简洁高效,nn.Module使得模型构建直观清晰。从数据加载到训练循环,PyTorch的设计哲学是”所见即所得”,非常适合研究和快速原型开发。

© 2019-2026 ovo$^{mc^2}$ All Rights Reserved. | 站点总访问 28969 次 | 访客 19045
Theme by hiero