!!!梯度的产生是由于反向传播,在自定义从零开始编写代码时,第一次反向传播前应该对params参数的梯度进行判断
import torch
import numpy as np
import torch. utils. data as Data
import torchvision. datasets
import torchvision. transforms as transforms
import syssys. path. append( "路径" )
import d2lzh_pytorch as d2l'''
--------------------------------------------------获取和读取数据
'''
batch_size = 256
train_mnist = torchvision. datasets. FashionMNIST( root= '路径' , download= True , train= True , transform= transforms. ToTensor( ) )
test_mnist = torchvision. datasets. FashionMNIST( root= '路径' , download= True , train= False , transform= transforms. ToTensor( ) )
train_iter = Data. DataLoader( train_mnist, batch_size= batch_size, shuffle= True )
test_iter = Data. DataLoader( test_mnist, batch_size= batch_size, shuffle= False ) '''
--------------------------------------------------定义模型参数
'''
num_inputs = 784
num_outputs = 10
num_hidden = 256
w1 = torch. tensor( np. random. normal( 0 , 0.01 , ( num_inputs, num_hidden) ) , dtype= torch. float )
b1 = torch. zeros( num_hidden, dtype= torch. float )
w2 = torch. tensor( np. random. normal( 0 , 0.1 , ( num_hidden, num_outputs) ) , dtype= torch. float )
b2 = torch. zeros( num_outputs, dtype= torch. float )
params = [ w1, b1, w2, b2]
for param in params: param. requires_grad_( requires_grad= True )
'''
---------------------------------------------定义激活函数
''' def relu ( X) : return torch. max ( input = X, other= torch. tensor( 0.0 ) ) '''
---------------------------------------------------定义模型
'''
def net ( X) : X = X. view( ( - 1 , num_inputs) ) H = relu( torch. matmul( X, w1) + b1) return torch. matmul( H, w2) + b2 '''
-----------------------------------------------------定义损失函数
'''
loss = torch. nn. CrossEntropyLoss( ) '''
------------------------------------------------------softmax操作,用于训练模型中训练集准确率调用
''' def softmax ( X) : X_exp = X. exp( ) partition = X_exp. sum ( dim= 1 , keepdim= True ) return X_exp / partition '''
----------------------------------------------------测试集准确率函数,训练模型中测试集准确率调用
''' def evaluate_accuracy ( test_data) : acc_num, num = 0.0 , 0 for X, y in test_data: acc_num += ( softmax( net( X) ) . argmax( dim= 1 ) == y) . float ( ) . sum ( ) . item( ) num += y. shape[ 0 ] return acc_num / num'''
------------------------------------------------------训练模型
'''
num_epochs, lr = 5 , 100 def train ( ) : for epoch in range ( num_epochs) : train_acc, train_l, test_acc, n, num = 0.0 , 0.0 , 0.0 , 0 , 0 for X, y in train_iter: l = loss( net( X) , y) if params[ 0 ] . grad is not None : for param in params: param. grad. data. zero_( ) l. backward( ) d2l. sgd( params, lr, batch_size) train_l += l. item( ) train_acc += ( softmax( net( X) ) . argmax( dim= 1 ) == y) . float ( ) . sum ( ) . item( ) n += y. shape[ 0 ] num += 1 test_acc = evaluate_accuracy( test_iter) print ( f'epoch %d, loss %.4f, train_acc %.3f, test_acc %.3f' % ( epoch + 1 , train_l / num, train_acc / n, test_acc) ) train( )