文章详情

简单tensor：

a = torch.tensor([[1,2],[3,4]])

print(a)a = torch.rand

n(size=(10,3))

print(a)

过程：

tensor([[1, 2], [3, 4]]) tensor([[ 0.8995, -1.6137, 1.4489], [-0.2796, -2.1443, -2.4618], [-0.2358, -0.4249, -0.0716], [-0.1267, -0.6382, 0.0593], [-0.4956, 1.7054, 0.3874], [ 1.3479, -1.6329, 0.2793], [ 1.1211, -1.5430, 0.7186], [-1.5197, 0.5559, -1.6421], [ 0.1900, -0.4175, -0.3922], [ 1.8994, 0.1497, -0.7039]])

用numpy()可以自动扩展维度：

print(a-a[0])

print(torch.exp(a)[0].numpy())

tensor([[ 0.0000, 0.0000, 0.0000], [-2.0583, -0.5631, 1.4932], [-1.0613, -1.0738, 2.2078], [-1.5101, 0.5896, 2.4722], [-2.8219, -2.0846, 1.2405], [ 0.8706, -0.2485, 2.3679], [-1.6590, 0.1935, 1.8698], [-0.3316, 0.8065, 1.6490], [-1.5788, -1.1844, -0.4816], [ 0.0680, -1.4526, 1.8159]]) [3.887189 2.1276016 0.17371987]

非原地操作：生成tensor，原始数据不变,u.add(torch.tensor(3))返8，u仍5

非原地操作：直接改原始数据，不创对象（约等于StringBuilder），,u.add_(torch.tensor(3))

+，add_返回两个tensor（原地操作）:

u = torch.tensor(5)

print("Result when adding out-of-place:",u.add(torch.tensor(3)))

u.add_(torch.tensor(3))

print("Result after adding in-place:", u)

用原始方法计算所有行的和：

s = torch.zeros_like(a[0])

for i in a:

s.add_(i)

print(s)

调用：

torch.sum(a,axis=0)

输出：

tensor([ 3.4945, 2.5325, -2.8684])

计算梯度：

a = torch.randn(size=(2, 2), requires_grad=True)

b = torch.randn(size=(2, 2))

c = torch.mean(torch.sqrt(torch.square(a) + torch.square(b))) # Do some math using `a`c.backward() # call backward() to compute all gradients

# What's the gradient of `c` with respect to `a`?

print(a.grad)

结果：

tensor([[-0.1728, 0.0913], [-0.1666, -0.1942]])

PyTorch租地部分累积梯度，结果更精确（后向传播时retain_graph=True的话，概念图保留，累加grad，重算就用zero_()重置grad=0）

c = torch.mean(torch.sqrt(torch.square(a) + torch.square(b)))

c.backward(retain_graph=True)

print(a.grad)

a.grad.zero_()

c.backward()

print(a.grad)

结果：

tensor([[-0.5185, 0.2739], [-0.4998, -0.5826]]) tensor([[-0.1728, 0.0913], [-0.1666, -0.1942]])

grad_fn：复合函数求导

调用：c用mean()，带有grad_fn的函数=MeanBackward

print(c)

结果：

tensor(0.9143, grad_fn=<MeanBackward0>)

用PyTorch算损失函数，可以用雅可比矩阵 x 矢量计算跟另一个tensor有关的tensor的梯度

用backward函数和pass v作参数，计算v的转置，v大小=原始tensor大小

c = torch.sqrt(torch.square(a) + torch.square(b))

c.backward(torch.eye(2)) # eye(2) means 2x2 identity matrix

print(a.grad)

输出：

tensor([[-0.8642, 0.0913],

[-0.1666, -0.9710]])

例子0：用梯度下降优化

（依塔=学习率lr，▽f=梯度）

x = torch.zeros(2,requires_grad=True)

f = lambda x : (x-torch.tensor([3,-2])).pow(2).sum()

lr = 0.1

15轮梯度下降，逼近最小值点（3,-2）：

for i in range(15):

y = f(x)

y.backward()

gr = x.grad

x.data.add_(-lr*gr)

x.grad.zero_()

print("Step {}: x[0]={}, x[1]={}".format(i,x[0],x[1]))

结果：

Step 0: x[0]=0.6000000238418579, x[1]=-0.4000000059604645 Step 1: x[0]=1.0800000429153442, x[1]=-0.7200000286102295 Step 2: x[0]=1.4639999866485596, x[1]=-0.9760000705718994 Step 3: x[0]=1.7711999416351318, x[1]=-1.1808000802993774 Step 4: x[0]=2.0169599056243896, x[1]=-1.3446400165557861 Step 5: x[0]=2.2135679721832275, x[1]=-1.4757120609283447 Step 6: x[0]=2.370854377746582, x[1]=-1.5805696249008179 Step 7: x[0]=2.4966835975646973, x[1]=-1.6644556522369385 Step 8: x[0]=2.597346782684326, x[1]=-1.7315645217895508 Step 9: x[0]=2.677877426147461, x[1]=-1.7852516174316406 Step 10: x[0]=2.7423019409179688, x[1]=-1.8282012939453125 Step 11: x[0]=2.793841600418091, x[1]=-1.8625609874725342 Step 12: x[0]=2.835073232650757, x[1]=-1.8900487422943115 Step 13: x[0]=2.868058681488037, x[1]=-1.912039041519165 Step 14: x[0]=2.894446849822998, x[1]=-1.929631233215332

例子1：线性回归

import numpy as np

import matplotlib.pyplot as plt

from sklearn.datasets import make_classification, make_regression

from sklearn.model_selection import train_test_split

import random

绘图：

np.random.seed(13) # pick the seed for reproducibility - change it to explore the effects of random variations

train_x = np.linspace(0, 3, 120)

train_labels = 2 * train_x + 0.9 + np.random.randn(*train_x.shape) * 0.5

plt.scatter(train_x,train_labels)

定义模型、损失函数：

input_dim = 1output_dim = 1learning_rate = 0.1

# This is our weight matrixw = torch.tensor([100.0],requires_grad=True,dtype=torch.float32)# This is our bias vectorb = torch.zeros(size=(output_dim,),requires_grad=True)

def f(x):

return torch.matmul(x,w) + b

def compute_loss(labels, predictions):

return torch.mean(torch.square(labels - predictions))

输入：

def train_on_batch(x, y):

predictions = f(x)

loss = compute_loss(y, predictions)

loss.backward()

w.data.sub_(learning_rate * w.grad)

b.data.sub_(learning_rate * b.grad)

w.grad.zero_()

b.grad.zero_()

return loss

# Shuffle the data.

indices = np.random.permutation(len(train_x))

features = torch.tensor(train_x[indices],dtype=torch.float32)

labels = torch.tensor(train_labels[indices],dtype=torch.float32)

batch_size = 4

for epoch in range(10):

for i in range(0,len(features),batch_size):

loss = train_on_batch(features[i:i+batch_size].view(-1,1),labels[i:i+batch_size])

print('Epoch %d: last batch loss = %.4f' % (epoch, float(loss)))

输出：Epoch 0: last batch loss = 94.5247

Epoch 1: last batch loss = 9.3428

Epoch 2: last batch loss = 1.4166

Epoch 3: last batch loss = 0.5224

Epoch 4: last batch loss = 0.3807

Epoch 5: last batch loss = 0.3495

Epoch 6: last batch loss = 0.3413

Epoch 7: last batch loss = 0.3390

Epoch 8: last batch loss = 0.3384

Epoch 9: last batch loss = 0.3382

输入：w,b

输出：

(tensor([1.8617], requires_grad=True), tensor([1.0711], requires_grad=True))

绘图：

plt.scatter(train_x,train_labels)

x = np.array([min(train_x),max(train_x)])

with torch.no_grad():

y = w.numpy()*x+b.numpy()

plt.plot(x,y,color='red')

在GPU里计算（cuda）：

device = 'cuda' if torch.cuda.is_available() else 'cpu'

print('Doing computations on '+device)

### Changes here: indicate devicew = torch.tensor([100.0],requires_grad=True,dtype=torch.float32,device=device)b = torch.zeros(size=(output_dim,),requires_grad=True,device=device)

def f(x):

return torch.matmul(x,w) + b

def compute_loss(labels, predictions):

return torch.mean(torch.square(labels - predictions))

def train_on_batch(x, y):

predictions = f(x)

loss = compute_loss(y, predictions)

loss.backward()

w.data.sub_(learning_rate * w.grad)

b.data.sub_(learning_rate * b.grad)

w.grad.zero_()

b.grad.zero_()

return loss

batch_size = 4for epoch in range(10):

for i in range(0,len(features),batch_size):

### Changes here: move data to required device

loss = train_on_batch(features[i:i+batch_size].view(-1,1).to(device),labels[i:i+batch_size].to(device))

print('Epoch %d: last batch loss = %.4f' % (epoch, float(loss)))

输出：

Doing computations on cpu Epoch 0: last batch loss = 94.5247 Epoch 1: last batch loss = 9.3428 Epoch 2: last batch loss = 1.4166 Epoch 3: last batch loss = 0.5224 Epoch 4: last batch loss = 0.3807 Epoch 5: last batch loss = 0.3495 Epoch 6: last batch loss = 0.3413 Epoch 7: last batch loss = 0.3390 Epoch 8: last batch loss = 0.3384 Epoch 9: last batch loss = 0.3382

例子2：分类

np.random.seed(0) # pick the seed for reproducibility - change it to explore the effects of random variations

n = 100X, Y = make_classification(n_samples = n, n_features=2,

n_redundant=0, n_informative=2, flip_y=0.1,class_sep=1.5)X = X.astype(np.float32)Y = Y.astype(np.int32)

split = [ 70*n//100, (15+70)*n//100 ]train_x, valid_x, test_x = np.split(X, split)train_labels, valid_labels, test_labels = np.split(Y, split)

def plot_dataset(features, labels, W=None, b=None):

# prepare the plot

fig, ax = plt.subplots(1, 1)

ax.set_xlabel('$x_i[0]$ -- (feature 1)')

ax.set_ylabel('$x_i[1]$ -- (feature 2)')

colors = ['r' if l else 'b' for l in labels]

ax.scatter(features[:, 0], features[:, 1], marker='o', c=colors, s=100, alpha = 0.5)

if W is not None:

min_x = min(features[:,0])

max_x = max(features[:,1])

min_y = min(features[:,1])*(1-.1)

max_y = max(features[:,1])*(1+.1)

cx = np.array([min_x,max_x],dtype=np.float32)

cy = (0.5-W[0]*cx-b)/W[1]

ax.plot(cx,cy,'g')

ax.set_ylim(min_y,max_y)

fig.show()

输入：

plot_dataset(train_x, train_labels)

绘图：

Pytorch训练单层感知器：

一个神经网络有2个in和1个out，W大小=2x1，偏见矢量=1

class Network():

def __init__(self):

self.W = torch.randn(size=(2,1),requires_grad=True)

self.b = torch.zeros(size=(1,),requires_grad=True)

def forward(self,x):

return torch.matmul(x,self.W)+self.b

def zero_grad(self):

self.W.data.zero_()

self.b.data.zero_()

def update(self,lr=0.1):

self.W.data.sub_(lr*self.W.grad)

self.b.data.sub_(lr*self.b)

net = Network()

（用W.data.zero_()，不用W.zero_()，不可以直接调正在Autograd的tensor）

p作网络的输出值，用sigmoid激活函数限制z范围为0-1

pi对应第i个实际类的in，要计算：

以上PyTorch可以直接用binary_cross_entropy_with_logits()，自动算小批量的均值（用binary_crossentropy也行）

def train_on_batch(net, x, y):

z = net.forward(x).flatten()

loss = torch.nn.functional.binary_cross_entropy_with_logits(input=z,target=y)

net.zero_grad()

loss.backward()

net.update()

return loss

# Create a tf.data.Dataset object for easy batched iteration

dataset = torch.utils.data.TensorDataset(torch.tensor(train_x),torch.tensor(train_labels,dtype=torch.float32))

dataloader = torch.utils.data.DataLoader(dataset,batch_size=16)

list(dataloader)[0]

输出：

[tensor([[ 1.5442, 2.5290], [-1.6284, 0.0772], [-1.7141, 2.4770], [-1.4951, 0.7320], [-1.6899, 0.9243], [-0.9474, -0.7681], [ 3.8597, -2.2951], [-1.3944, 1.4300], [ 4.3627, 3.1333], [-1.0973, -1.7011], [-2.5532, -0.0777], [-1.2661, -0.3167], [ 0.3921, 1.8406], [ 2.2091, -1.6045], [ 1.8383, -1.4861], [ 0.7173, -0.9718]]), tensor([1., 0., 0., 0., 0., 0., 1., 0., 1., 0., 0., 0., 1., 1., 1., 1.])]

练15个epochs：

for epoch in range(15):

for (x, y) in dataloader:

loss = train_on_batch(net,x,y)

print('Epoch %d: last batch loss = %.4f' % (epoch, float(loss)))

输出：

Epoch 0: last batch loss = 0.6491

Epoch 1: last batch loss = 0.6064

Epoch 2: last batch loss = 0.5822

Epoch 3: last batch loss = 0.5679

Epoch 4: last batch loss = 0.5592

Epoch 5: last batch loss = 0.5537

Epoch 6: last batch loss = 0.5501

Epoch 7: last batch loss = 0.5478

Epoch 8: last batch loss = 0.5463

Epoch 9: last batch loss = 0.5454

Epoch 10: last batch loss = 0.5447

Epoch 11: last batch loss = 0.5443

Epoch 12: last batch loss = 0.5441

Epoch 13: last batch loss = 0.5439

Epoch 14: last batch loss = 0.5438

调用：

print(net.W,net.b)

输出：

tensor([[ 0.1330], [-0.2810]], requires_grad=True) tensor([0.], requires_grad=True)

绘图：

plot_dataset(train_x,train_labels,net.W.detach().numpy(),net.b.detach().numpy())

计算验证集的准确度：

pred = torch.sigmoid(net.forward(torch.tensor(valid_x)))torch.mean(((pred.view(-1)>0.5)==(torch.tensor(valid_labels)>0.5)).type(torch.float32))

输出：

tensor(0.7333)

torch.nn.Module：定义神经网络

（定义神经网络有两个方法：Module类，Sequential）

module里面可以用标准层次

Linear：浓缩线性层次=单层感知器
Softmax，Sigmoid，ReLU：对应激活函数
其他

用内置Linear层次训练单层感知器：

net = torch.nn.Linear(2,1) # 2 inputs, 1 output

print(list(net.parameters()))

结果：

[Parameter containing: tensor([[-0.0422, 0.1821]], requires_grad=True), Parameter containing: tensor([0.6582], requires_grad=True)]

定义SGD优化器：

optim = torch.optim.SGD(net.parameters(),lr=0.05)

用优化器后的训练循环：

val_x = torch.tensor(valid_x)val_lab = torch.tensor(valid_labels)

for ep in range(10):

for (x,y) in dataloader:

z = net(x).flatten()

loss = torch.nn.functional.binary_cross_entropy_with_logits(z,y)

optim.zero_grad()

loss.backward()

optim.step()

acc = ((torch.sigmoid(net(val_x).flatten())>0.5).float()==val_lab).float().mean()

print(f"Epoch {ep}: last batch loss = {loss}, val acc = {acc}")

输出：

Epoch 0: last batch loss = 0.7596041560173035, val acc = 0.5333333611488342 Epoch 1: last batch loss = 0.6602361798286438, val acc = 0.6000000238418579 Epoch 2: last batch loss = 0.5847358107566833, val acc = 0.6666666865348816 Epoch 3: last batch loss = 0.5263020992279053, val acc = 0.7333333492279053 Epoch 4: last batch loss = 0.48015740513801575, val acc = 0.800000011920929 Epoch 5: last batch loss = 0.4430023431777954, val acc = 0.8666666746139526 Epoch 6: last batch loss = 0.41254672408103943, val acc = 0.8666666746139526 Epoch 7: last batch loss = 0.3871781527996063, val acc = 0.800000011920929 Epoch 8: last batch loss = 0.3657420873641968, val acc = 0.800000011920929 Epoch 9: last batch loss = 0.34739670157432556, val acc = 0.800000011920929

def train(net, dataloader, val_x, val_lab, epochs=10, lr=0.05):

optim = torch.optim.Adam(net.parameters(),lr=lr)

for ep in range(epochs):

for (x,y) in dataloader:

z = net(x).flatten()

loss = torch.nn.functional.binary_cross_entropy_with_logits(z,y)

optim.zero_grad()

loss.backward()

optim.step()

acc = ((torch.sigmoid(net(val_x).flatten())>0.5).float()==val_lab).float().mean()

print(f"Epoch {ep}: last batch loss = {loss}, val acc = {acc}")

net = torch.nn.Linear(2,1)

train(net,dataloader,val_x,val_lab,lr=0.03)

输出：

Epoch 0: last batch loss = 0.48486900329589844, val acc = 0.7333333492279053 Epoch 1: last batch loss = 0.41338109970092773, val acc = 0.800000011920929 Epoch 2: last batch loss = 0.35756850242614746, val acc = 0.800000011920929 Epoch 3: last batch loss = 0.31495171785354614, val acc = 0.800000011920929 Epoch 4: last batch loss = 0.2824164032936096, val acc = 0.800000011920929 Epoch 5: last batch loss = 0.2572754919528961, val acc = 0.800000011920929 Epoch 6: last batch loss = 0.23751722276210785, val acc = 0.800000011920929 Epoch 7: last batch loss = 0.2217157930135727, val acc = 0.800000011920929 Epoch 8: last batch loss = 0.2088666558265686, val acc = 0.800000011920929 Epoch 9: last batch loss = 0.19824868440628052, val acc = 0.800000011920929

把神经网络定义为层次序列：

net = torch.nn.Sequential(torch.nn.Linear(2,5),torch.nn.Sigmoid(),torch.nn.Linear(5,1))

print(net)

输出：

Sequential( (0): Linear(in_features=2, out_features=5, bias=True) (1): Sigmoid() (2): Linear(in_features=5, out_features=1, bias=True) )

输入：

train(net,dataloader,val_x,val_lab)

输出：

Epoch 0: last batch loss = 0.5835739970207214, val acc = 0.800000011920929

Epoch 1: last batch loss = 0.4642275869846344, val acc = 0.800000011920929

Epoch 2: last batch loss = 0.35158076882362366, val acc = 0.800000011920929

Epoch 3: last batch loss = 0.26132312417030334, val acc = 0.800000011920929

Epoch 4: last batch loss = 0.19465585052967072, val acc = 0.800000011920929

Epoch 5: last batch loss = 0.14735405147075653, val acc = 0.800000011920929

Epoch 6: last batch loss = 0.11454981565475464, val acc = 0.800000011920929

Epoch 7: last batch loss = 0.09244414418935776, val acc = 0.800000011920929

Epoch 8: last batch loss = 0.07805468142032623, val acc = 0.800000011920929

Epoch 9: last batch loss = 0.06894762068986893, val acc = 0.800000011920929

将网络定义成类：

class MyNet(torch.nn.Module):

def __init__(self,hidden_size=10,func=torch.nn.Sigmoid()):

super().__init__()

self.fc1 = torch.nn.Linear(2,hidden_size)

self.func = func

self.fc2 = torch.nn.Linear(hidden_size,1)

def forward(self,x):

x = self.fc1(x)

x = self.func(x)

x = self.fc2(x)

return x

net = MyNet(func=torch.nn.ReLU())print(net)

输出：

MyNet( (fc1): Linear(in_features=2, out_features=10, bias=True) (func): ReLU() (fc2): Linear(in_features=10, out_features=1, bias=True) )

输入：

train(net,dataloader,val_x,val_lab,lr=0.005)

输出：

Epoch 0: last batch loss = 0.7821246981620789, val acc = 0.46666666865348816

Epoch 1: last batch loss = 0.7457502484321594, val acc = 0.5333333611488342

Epoch 2: last batch loss = 0.7120334506034851, val acc = 0.5333333611488342

Epoch 3: last batch loss = 0.6811249256134033, val acc = 0.6666666865348816

Epoch 4: last batch loss = 0.6533011794090271, val acc = 0.7333333492279053

Epoch 5: last batch loss = 0.627849280834198, val acc = 0.7333333492279053

Epoch 6: last batch loss = 0.6030643582344055, val acc = 0.800000011920929

Epoch 7: last batch loss = 0.5775002837181091, val acc = 0.800000011920929

Epoch 8: last batch loss = 0.5522137880325317, val acc = 0.8666666746139526

Epoch 9: last batch loss = 0.5250465869903564, val acc = 0.8666666746139526

将网络定义成PyTorch Lightning模块：

import pytorch_lightning as pl

class MyNetPL(pl.LightningModule):

def __init__(self, hidden_size = 10, func = torch.nn.Sigmoid()):

super().__init__()

self.fc1 = torch.nn.Linear(2,hidden_size)

self.func = func

self.fc2 = torch.nn.Linear(hidden_size,1)

self.val_epoch_num = 0 # for logging

def forward(self, x):

x = self.fc1(x)

x = self.func(x)

x = self.fc2(x)

return x

def training_step(self, batch, batch_nb):

x, y = batch

y_res = self(x).view(-1)

loss = torch.nn.functional.binary_cross_entropy_with_logits(y_res, y)

return loss

def configure_optimizers(self):

optimizer = torch.optim.SGD(self.parameters(), lr = 0.005)

return optimizer

def validation_step(self, batch, batch_nb):

x, y = batch

y_res = self(x).view(-1)

val_loss = torch.nn.functional.binary_cross_entropy_with_logits(y_res, y)

print("Epoch ", self.val_epoch_num, ": val loss = ", val_loss.item(), " val acc = ",((torch.sigmoid(y_res.flatten())>0.5).float()==y).float().mean().item(), sep = "")

self.val_epoch_num += 1

valid_dataset = torch.utils.data.TensorDataset(torch.tensor(valid_x),torch.tensor(valid_labels,dtype=torch.float32))valid_dataloader = torch.utils.data.DataLoader(valid_dataset, batch_size = 16)

net = MyNetPL(func=torch.nn.ReLU())trainer = pl.Trainer(max_epochs = 30, log_every_n_steps = 1, accelerator='gpu', devices=1)trainer.fit(model = net, train_dataloaders = dataloader, val_dataloaders = valid_dataloader)

输出：略

扩展阅读：

Keras框架实验
Keras + TensorFlow2.x框架实验
用PyTorch优化器绘制训练数据和验证数据损失函数的图表
用 PyTorch优化器解决MNIST分类问题

神经网络实验 - PyTorch试水

目录

评论区 (0)