pytorch: 基于python的科学计算包
3层抽象:
张量的基本操作:
from __future__ import print_function
import torch
# 把生成的memory返回到x,不同批次调用此函数返回值是不同的
# https://stackoverflow.com/questions/51140927/what-is-uninitialized-data-in-pytorch-empty-function
x = torch.empty(5, 3)
# 随机初始化一个矩阵
x = torch.rand(5, 3)
# 全0矩阵
x = torch.zeros(5, 3, dtype=torch.long)
# 直接张量赋值
x = torch.tensor([5.5, 3])
# 全1矩阵
x = x.new_ones(5, 3, dtype=torch.double)
# 获取x的shape,随机生成矩阵
# x = torch.randn_like(x, dtype=torch.float)
张量的基本运算:
# 加法
x = torch.tensor([5.5, 3])
y = torch.rand(5, 3)
print(x + y)
# 加法
print(torch.add(x, y))
# 加法
result = torch.empty(5, 3)
torch.add(x, y, out=result)
# 原位加法,此时y的值是改变的
y.add_(x)
# 取值
print(x[:, 1])
# 改变array形状
x = torch.randn(4, 4)
y = x.view(16)
下面是一个使用底层张量实现的网络训练:
张量的求导是可追踪的,即某个张量的求导设置为true时,基于此张量的操作或者后续张量,其也是具有可求导属性的。
a = torch.randn(2, 2)
a = ((a * 3) / (a - 1))
print(a.requires_grad)
a.requires_grad_(True)
print(a.requires_grad)
b = (a * a).sum()
print(b.grad_fn)
# False
# True
# <SumBackward0 object at 0x7f1b24845f98>
下面是对于变换过程中的求导,在numpy
和pytorch
中的实现,可以看到,后者能够实现追踪:
加载数据的方法有多种(如下图):
这里使用的数据集都是比较经典的,一般使用torch.utils.data.DataLoader
:
import torchvision.datasets as dset
import torchvision.transforms as transforms
root = './data'
if not os.path.exists(root):
os.mkdir(root)
# 这里的0.5,1.0是对数据进行归一化时使用的mean和variance
# 在MNIST里面,图片的均值和方差分别是:0.1307,0.3081,所以这里提供的值不是对的
trans = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (1.0,))])
# if not exist, download mnist dataset
train_set = dset.MNIST(root=root, train=True, transform=trans, download=True)
test_set = dset.MNIST(root=root, train=False, transform=trans, download=True)
batch_size = 100
train_loader = torch.utils.data.DataLoader(
dataset=train_set,
batch_size=batch_size,
shuffle=True)
test_loader = torch.utils.data.DataLoader(
dataset=test_set,
batch_size=batch_size,
shuffle=False)
import torch
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
# 输入图像channel:1;输出channel:6;5x5卷积核
# 这里构建的网络的padding是1,但是真实的一般使用的是2(外层补上两圈0),输入是32x32
self.conv1 = nn.Conv2d(1, 6, 5)
self.conv2 = nn.Conv2d(6, 16, 5)
# an affine operation: y = Wx + b
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
# 2x2 Max pooling, 先卷积,然后调用relu激活函数,再最大值池化操作
x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
# If the size is a square you can only specify a single number
# 第二次卷积+池化操作
x = F.max_pool2d(F.relu(self.conv2(x)), 2)
# 重新塑形,将多维数据重新塑造为二维数据,256*400
x = x.view(-1, self.num_flat_features(x))
#第一个全连接
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
def num_flat_features(self, x):
# x.size()返回值为(256, 16, 5, 5),size的值为(16, 5, 5),256是batch_size
# x.size返回的是一个元组,size表示截取元组中第二个开始的数字
size = x.size()[1:] # 除去批大小维度的其余维度
num_features = 1
for s in size:
num_features *= s
return num_features
net = Net()
print(net)
# 构建的网络
# Net(
# (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
# (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
# (fc1): Linear(in_features=400, out_features=120, bias=True)
# (fc2): Linear(in_features=120, out_features=84, bias=True)
# (fc3): Linear(in_features=84, out_features=10, bias=True)
# )
这里卷积层的构建调用的是.Conv2d()
函数:
# https://pytorch.org/docs/stable/nn.html#conv2d
torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1,
groups=1, bias=True, padding_mode='zeros')
# 参数列表
# in_channels (int) – Number of channels in the input image
# out_channels (int) – Number of channels produced by the convolution
# kernel_size (int or tuple) – Size of the convolving kernel
# stride (int or tuple, optional) – Stride of the convolution. Default: 1
# padding (int or tuple, optional) – Zero-padding added to both sides of the input. Default: 0
# padding_mode (string, optional) – zeros
# dilation (int or tuple, optional) – Spacing between kernel elements. Default: 1
# groups (int, optional) – Number of blocked connections from input channels to output channels. Default: 1
# bias (bool, optional) – If True, adds a learnable bias to the output. Default: True
上面还使用到了view
函数,用于对数组进行reshape,类似于numpy里面的resize
函数。作用:把原先tensor的数据排成一个一维的数据,然后按照view提供的参数转换为其他维度的张量。
# 【1】即使原始的张量维度不一样,但是提供的view参数一致,那么转换后的张量也是一样的
a=torch.Tensor([[[1,2,3],[4,5,6]]])
b=torch.Tensor([1,2,3,4,5,6])
print(a.view(1,6))
print(b.view(1,6))
# tensor([[1., 2., 3., 4., 5., 6.]])
# 【2】从2x3转换为3x2
a=torch.Tensor([[[1,2,3],[4,5,6]]])
print(a.view(3,2))
# tensor([[1., 2.],
# [3., 4.],
# [5., 6.]])
# 【3】view的参数不能为空,用-1代替未知的
# -1表示自动推断的
# 比如:原始是2x3=6的,现在view第一个参数是1,转换为二维的,那么-1自动推断此维度为6,所以转换后的张量是1x6
a = torch.Tensor(2,3)
print(a)
# tensor([[0.0000, 0.0000, 0.0000],
# [0.0000, 0.0000, 0.0000]])
print(a.view(1,-1))
# tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000]])
import torch.optim as optim
# https://pytorch.org/docs/stable/nn.html#loss-functions
# 这里定义了各种不同的损失函数
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
最好是直接调用优化器,但是最基础的也可手动指定更新(不推荐):
for epoch in range(2): # loop over the dataset multiple times
running_loss = 0.0
for i, data in enumerate(trainloader, 0):
# get the inputs
inputs, labels = data
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
if i % 2000 == 1999: # print every 2000 mini-batches
print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000))
running_loss = 0.0
print('Finished Training')
# [1, 2000] loss: 2.182
# [1, 4000] loss: 1.819
# [1, 6000] loss: 1.648
# [1, 8000] loss: 1.569
# [1, 10000] loss: 1.511
# [1, 12000] loss: 1.473
# [2, 2000] loss: 1.414
# [2, 4000] loss: 1.365
# [2, 6000] loss: 1.358
# [2, 8000] loss: 1.322
# [2, 10000] loss: 1.298
# [2, 12000] loss: 1.282
# Finished Training
correct = 0
total = 0
with torch.no_grad():
for data in testloader:
images, labels = data
outputs = net(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('Accuracy of the network on the 10000 test images: %d %%' % (
100 * correct / total))
# Accuracy of the network on the 10000 test images: 55 %