d2l函数解析

news/2024/11/17 16:00:39/

在学习李沐老师的课时,需要下载d2l库,但有的同学没有下载,所以在此我进行库中一些函数的总结,持续更新(pytorch版)!

大家可以通过ctrl+F进行检索,希望可以有所帮助。

class _WikiTextDataset(torch.utils.data.Dataset):def __init__(self, paragraphs, max_len):paragraphs=[d2l.tokenize(paragraph, token='word') for paragraph in paragraphs]sentences=[sentence for paragraph in paragraphs for sentence in paragraph]self.vocab=d2l.Vocab(sentences, min_freq=5, reserved_tokens=['<pad>','<mask>','<cls>','<sep>'])examples=[]for paragraph in paragraphs:examples.extend(_get_nsp_data_from_paragraph(paragraph, paragraphs, self.vocab, max_len))examples=[(_get_mlm_data_from_tokens(tokens, self.vocab)+(segments, is_next)) for tokens, segments, is_next in examples](self.all_token_ids, self.all_segments, self.valid_lens,self.all_pred_positions, self.all_mlm_weights,self.all_mlm_labels, self.nsp_labels) = _pad_bert_inputs(examples, max_len, self.vocab)def __getitem__(self, idx):return (self.all_token_ids[idx], self.all_segments[idx],self.valid_lens[idx], self.all_pred_positions[idx],self.all_mlm_weights[idx], self.all_mlm_labels[idx],self.nsp_labels[idx])def __len__(self):return len(self.all_token_ids)
class PositionWiseFFN(nn.Module):"""基于位置的前馈网络"""def __init__(self, ffn_num_input, ffn_num_hiddens, ffn_num_outputs,**kwargs):super(PositionWiseFFN, self).__init__(**kwargs)self.dense1 = nn.Linear(ffn_num_input, ffn_num_hiddens)self.relu = nn.ReLU()self.dense2 = nn.Linear(ffn_num_hiddens, ffn_num_outputs)def forward(self, X):return self.dense2(self.relu(self.dense1(X)))class AddNorm(nn.Module):"""残差连接后进行层规范化"""def __init__(self, normalized_shape, dropout, **kwargs):super(AddNorm, self).__init__(**kwargs)self.dropout = nn.Dropout(dropout)self.ln = nn.LayerNorm(normalized_shape)def forward(self, X, Y):return self.ln(self.dropout(Y) + X)class EncoderBlock(nn.Module):"""Transformer编码器块"""def __init__(self, key_size, query_size, value_size, num_hiddens,norm_shape, ffn_num_input, ffn_num_hiddens, num_heads,dropout, use_bias=False, **kwargs):super(EncoderBlock, self).__init__(**kwargs)self.attention = d2l.MultiHeadAttention(key_size, query_size, value_size, num_hiddens, num_heads, dropout,use_bias)self.addnorm1 = AddNorm(norm_shape, dropout)self.ffn = PositionWiseFFN(ffn_num_input, ffn_num_hiddens, num_hiddens)self.addnorm2 = AddNorm(norm_shape, dropout)def forward(self, X, valid_lens):Y = self.addnorm1(X, self.attention(X, X, X, valid_lens))return self.addnorm2(Y, self.ffn(Y))
class BERTEncoder(nn.Module):"""BERT编码器"""def __init__(self, vocab_size, num_hiddens, norm_shape, ffn_num_input,ffn_num_hiddens, num_heads, num_layers, dropout,max_len=1000, key_size=768, query_size=768, value_size=768,**kwargs):super(BERTEncoder, self).__init__(**kwargs)self.token_embedding = nn.Embedding(vocab_size, num_hiddens)self.segment_embedding = nn.Embedding(2, num_hiddens)self.blks = nn.Sequential()for i in range(num_layers):self.blks.add_module(f"{i}", EncoderBlock(key_size, query_size, value_size, num_hiddens, norm_shape,ffn_num_input, ffn_num_hiddens, num_heads, dropout, True))# 在BERT中,位置嵌入是可学习的,因此我们创建一个足够长的位置嵌入参数self.pos_embedding = nn.Parameter(torch.randn(1, max_len,num_hiddens))def forward(self, tokens, segments, valid_lens):# 在以下代码段中,X的形状保持不变:(批量大小,最大序列长度,num_hiddens)X = self.token_embedding(tokens) + self.segment_embedding(segments)X = X + self.pos_embedding.data[:, :X.shape[1], :]for blk in self.blks:X = blk(X, valid_lens)return X
def _get_batch_loss_bert(net, loss, vocab_size, tokens_x, segments_x, valid_lens_x, pred_positions_x,mlm_weights_x, mlm_y, nsp_y):_, mlm_y_hat, nsp_y_hat=net(tokens_x, segments_x, valid_lens_x.reshape(-1), pred_positions_x)mlm_l=loss(mlm_y_hat.reshape(-1, vocab_size), mlm_y.reshape(-1))*mlm_weights_x.reshape(-1,1)mlm_l=mlm_l.sum()/(mlm_weights_x.sum()+1e-8)nsp_l=loss(nsp_y_hat, nsp_y)l=mlm_l+nsp_lreturn mlm_l, nsp_l, l
class MaskLM(nn.Module):"""BERT的掩蔽语言模型任务"""def __init__(self, vocab_size, num_hiddens, num_inputs=768, **kwargs):super(MaskLM, self).__init__(**kwargs)self.mlp = nn.Sequential(nn.Linear(num_inputs, num_hiddens),nn.ReLU(),nn.LayerNorm(num_hiddens),nn.Linear(num_hiddens, vocab_size))def forward(self, X, pred_positions):num_pred_positions = pred_positions.shape[1]pred_positions = pred_positions.reshape(-1)batch_size = X.shape[0]batch_idx = torch.arange(0, batch_size)# 假设batch_size=2,num_pred_positions=3# 那么batch_idx是np.array([0,0,0,1,1,1])batch_idx = torch.repeat_interleave(batch_idx, num_pred_positions)masked_X = X[batch_idx, pred_positions]masked_X = masked_X.reshape((batch_size, num_pred_positions, -1))mlm_Y_hat = self.mlp(masked_X)return mlm_Y_hatclass NextSentencePred(nn.Module):"""BERT的下一句预测任务"""def __init__(self, num_inputs, **kwargs):super(NextSentencePred, self).__init__(**kwargs)self.output = nn.Linear(num_inputs, 2)def forward(self, X):# X的形状:(batchsize,num_hiddens)return self.output(X)class BERTModel(nn.Module):"""BERT模型"""def __init__(self, vocab_size, num_hiddens, norm_shape, ffn_num_input,ffn_num_hiddens, num_heads, num_layers, dropout,max_len=1000, key_size=768, query_size=768, value_size=768,hid_in_features=768, mlm_in_features=768,nsp_in_features=768):super(BERTModel, self).__init__()self.encoder = BERTEncoder(vocab_size, num_hiddens, norm_shape,ffn_num_input, ffn_num_hiddens, num_heads, num_layers,dropout, max_len=max_len, key_size=key_size,query_size=query_size, value_size=value_size)self.hidden = nn.Sequential(nn.Linear(hid_in_features, num_hiddens),nn.Tanh())self.mlm = MaskLM(vocab_size, num_hiddens, mlm_in_features)self.nsp = NextSentencePred(nsp_in_features)def forward(self, tokens, segments, valid_lens=None,pred_positions=None):encoded_X = self.encoder(tokens, segments, valid_lens)if pred_positions is not None:mlm_Y_hat = self.mlm(encoded_X, pred_positions)else:mlm_Y_hat = None# 用于下一句预测的多层感知机分类器的隐藏层,0是“<cls>”标记的索引nsp_Y_hat = self.nsp(self.hidden(encoded_X[:, 0, :]))return encoded_X, mlm_Y_hat, nsp_Y_hat
def get_tokens_and_segments(tokens_a, tokens_b=None):"""获取输入序列的词元及其片段索引"""tokens = ['<cls>'] + tokens_a + ['<sep>']# 0和1分别标记片段A和Bsegments = [0] * (len(tokens_a) + 2)if tokens_b is not None:tokens += tokens_b + ['<sep>']segments += [1] * (len(tokens_b) + 1)return tokens, segments
def multibox_prior(data, sizes, ratios):"""生成以每个像素为中心具有不同形状的锚框"""in_height, in_width = data.shape[-2:]device, num_sizes, num_ratios = data.device, len(sizes), len(ratios)boxes_per_pixel = (num_sizes + num_ratios - 1)size_tensor = torch.tensor(sizes, device=device)ratio_tensor = torch.tensor(ratios, device=device)# 为了将锚点移动到像素的中心,需要设置偏移量。# 因为一个像素的高为1且宽为1,我们选择偏移我们的中心0.5offset_h, offset_w = 0.5, 0.5steps_h = 1.0 / in_height  # 在y轴上缩放步长steps_w = 1.0 / in_width  # 在x轴上缩放步长# 生成锚框的所有中心点center_h = (torch.arange(in_height, device=device) + offset_h) * steps_hcenter_w = (torch.arange(in_width, device=device) + offset_w) * steps_wshift_y, shift_x = torch.meshgrid(center_h, center_w, indexing='ij')shift_y, shift_x = shift_y.reshape(-1), shift_x.reshape(-1)# 生成“boxes_per_pixel”个高和宽,# 之后用于创建锚框的四角坐标(xmin,xmax,ymin,ymax)w = torch.cat((size_tensor * torch.sqrt(ratio_tensor[0]),sizes[0] * torch.sqrt(ratio_tensor[1:])))\* in_height / in_width  # 处理矩形输入h = torch.cat((size_tensor / torch.sqrt(ratio_tensor[0]),sizes[0] / torch.sqrt(ratio_tensor[1:])))# 除以2来获得半高和半宽anchor_manipulations = torch.stack((-w, -h, w, h)).T.repeat(in_height * in_width, 1) / 2# 每个中心点都将有“boxes_per_pixel”个锚框,# 所以生成含所有锚框中心的网格,重复了“boxes_per_pixel”次out_grid = torch.stack([shift_x, shift_y, shift_x, shift_y],dim=1).repeat_interleave(boxes_per_pixel, dim=0)output = out_grid + anchor_manipulationsreturn output.unsqueeze(0)
def box_corner_to_center(boxes):"""从(左上,右下)转换到(中间,宽度,高度)"""x1, y1, x2, y2 = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]cx = (x1 + x2) / 2cy = (y1 + y2) / 2w = x2 - x1h = y2 - y1boxes = torch.stack((cx, cy, w, h), axis=-1)return boxes#@save
def box_center_to_corner(boxes):"""从(中间,宽度,高度)转换到(左上,右下)"""cx, cy, w, h = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]x1 = cx - 0.5 * wy1 = cy - 0.5 * hx2 = cx + 0.5 * wy2 = cy + 0.5 * hboxes = torch.stack((x1, y1, x2, y2), axis=-1)return boxes
def bbox_to_rect(bbox, color):# 将边界框(左上x,左上y,右下x,右下y)格式转换成matplotlib格式:# ((左上x,左上y),宽,高)return d2l.plt.Rectangle(xy=(bbox[0], bbox[1]), width=bbox[2]-bbox[0], height=bbox[3]-bbox[1],fill=False, edgecolor=color, linewidth=2)
class Benchmark:"""用于测量运行时间"""def __init__(self, description='Done'):self.description = descriptiondef __enter__(self):self.timer = d2l.Timer()return selfdef __exit__(self, *args):print(f'{self.description}: {self.timer.stop():.4f} sec')
def train_concise_ch11(trainer_fn, hyperparams, data_iter, num_epochs=4):# 初始化模型net = nn.Sequential(nn.Linear(5, 1))def init_weights(m):if type(m) == nn.Linear:torch.nn.init.normal_(m.weight, std=0.01)net.apply(init_weights)optimizer = trainer_fn(net.parameters(), **hyperparams)loss = nn.MSELoss(reduction='none')animator = d2l.Animator(xlabel='epoch', ylabel='loss',xlim=[0, num_epochs], ylim=[0.22, 0.35])n, timer = 0, d2l.Timer()for _ in range(num_epochs):for X, y in data_iter:optimizer.zero_grad()out = net(X)y = y.reshape(out.shape)l = loss(out, y)l.mean().backward()optimizer.step()n += X.shape[0]if n % 200 == 0:timer.stop()# MSELoss计算平方误差时不带系数1/2animator.add(n/X.shape[0]/len(data_iter),(d2l.evaluate_loss(net, data_iter, loss) / 2,))timer.start()print(f'loss: {animator.Y[0][-1]:.3f}, {timer.avg():.3f} sec/epoch')
def train_ch11(trainer_fn, states, hyperparams, data_iter,feature_dim, num_epochs=2):# 初始化模型w = torch.normal(mean=0.0, std=0.01, size=(feature_dim, 1),requires_grad=True)b = torch.zeros((1), requires_grad=True)net, loss = lambda X: d2l.linreg(X, w, b), d2l.squared_loss# 训练模型animator = d2l.Animator(xlabel='epoch', ylabel='loss',xlim=[0, num_epochs], ylim=[0.22, 0.35])n, timer = 0, d2l.Timer()for _ in range(num_epochs):for X, y in data_iter:l = loss(net(X), y).mean()l.backward()trainer_fn([w, b], states, hyperparams)n += X.shape[0]if n % 200 == 0:timer.stop()animator.add(n/X.shape[0]/len(data_iter),(d2l.evaluate_loss(net, data_iter, loss),))timer.start()print(f'loss: {animator.Y[0][-1]:.3f}, {timer.avg():.3f} sec/epoch')return timer.cumsum(), animator.Y[0]
class Vocab:  #@save"""文本词表"""def __init__(self, tokens=None, min_freq=0, reserved_tokens=None):if tokens is None:tokens = []if reserved_tokens is None:reserved_tokens = []# 按出现频率排序counter = count_corpus(tokens)self._token_freqs = sorted(counter.items(), key=lambda x: x[1],reverse=True)# 未知词元的索引为0self.idx_to_token = ['<unk>'] + reserved_tokensself.token_to_idx = {token: idxfor idx, token in enumerate(self.idx_to_token)}for token, freq in self._token_freqs:if freq < min_freq:breakif token not in self.token_to_idx:self.idx_to_token.append(token)self.token_to_idx[token] = len(self.idx_to_token) - 1def __len__(self):return len(self.idx_to_token)def __getitem__(self, tokens):if not isinstance(tokens, (list, tuple)):return self.token_to_idx.get(tokens, self.unk)return [self.__getitem__(token) for token in tokens]def to_tokens(self, indices):if not isinstance(indices, (list, tuple)):return self.idx_to_token[indices]return [self.idx_to_token[index] for index in indices]@propertydef unk(self):  # 未知词元的索引为0return 0@propertydef token_freqs(self):return self._token_freqsdef count_corpus(tokens):  #@save"""统计词元的频率"""# 这里的tokens是1D列表或2D列表if len(tokens) == 0 or isinstance(tokens[0], list):# 将词元列表展平成一个列表tokens = [token for line in tokens for token in line]return collections.Counter(tokens)
def train_2d(trainer, steps=20, f_grad=None):  #@save"""用定制的训练机优化2D目标函数"""# s1和s2是稍后将使用的内部状态变量x1, x2, s1, s2 = -5, -2, 0, 0results = [(x1, x2)]for i in range(steps):if f_grad:x1, x2, s1, s2 = trainer(x1, x2, s1, s2, f_grad)else:x1, x2, s1, s2 = trainer(x1, x2, s1, s2)results.append((x1, x2))print(f'epoch {i + 1}, x1: {float(x1):f}, x2: {float(x2):f}')return resultsdef show_trace_2d(f, results):  #@save"""显示优化过程中2D变量的轨迹"""d2l.set_figsize()d2l.plt.plot(*zip(*results), '-o', color='#ff7f0e')x1, x2 = torch.meshgrid(torch.arange(-5.5, 1.0, 0.1),torch.arange(-3.0, 1.0, 0.1))d2l.plt.contour(x1, x2, f(x1, x2), colors='#1f77b4')d2l.plt.xlabel('x1')d2l.plt.ylabel('x2')
def train_ch3(net, train_iter, test_iter, loss, num_epochs, updater):  #@save"""训练模型(定义见第3章)"""animator = Animator(xlabel='epoch', xlim=[1, num_epochs], ylim=[0.3, 0.9],legend=['train loss', 'train acc', 'test acc'])for epoch in range(num_epochs):train_metrics = train_epoch_ch3(net, train_iter, loss, updater)test_acc = evaluate_accuracy(net, test_iter)animator.add(epoch + 1, train_metrics + (test_acc,))train_loss, train_acc = train_metricsassert train_loss < 0.5, train_lossassert train_acc <= 1 and train_acc > 0.7, train_accassert test_acc <= 1 and test_acc > 0.7, test_acc
def tokenize(lines, token='word'):  #@save"""将文本行拆分为单词或字符词元"""if token == 'word':return [line.split() for line in lines]elif token == 'char':return [list(line) for line in lines]else:print('错误:未知词元类型:' + token)
class Animator: """在动画中绘制数据"""def __init__(self, xlabel=None, ylabel=None, legend=None, xlim=None,ylim=None, xscale='linear', yscale='linear',fmts=('-', 'm--', 'g-.', 'r:'), nrows=1, ncols=1,figsize=(3.5, 2.5)):# 增量地绘制多条线if legend is None:legend = []d2l.use_svg_display()self.fig, self.axes = d2l.plt.subplots(nrows, ncols, figsize=figsize)if nrows * ncols == 1:self.axes = [self.axes, ]# 使用lambda函数捕获参数self.config_axes = lambda: d2l.set_axes(self.axes[0], xlabel, ylabel, xlim, ylim, xscale, yscale, legend)self.X, self.Y, self.fmts = None, None, fmtsdef add(self, x, y):# 向图表中添加多个数据点if not hasattr(y, "__len__"):y = [y]n = len(y)if not hasattr(x, "__len__"):x = [x] * nif not self.X:self.X = [[] for _ in range(n)]if not self.Y:self.Y = [[] for _ in range(n)]for i, (a, b) in enumerate(zip(x, y)):if a is not None and b is not None:self.X[i].append(a)self.Y[i].append(b)self.axes[0].cla()for x, y, fmt in zip(self.X, self.Y, self.fmts):self.axes[0].plot(x, y, fmt)self.config_axes()display.display(self.fig)display.clear_output(wait=True)
def use_svg_display():  #@save"""使用svg格式在Jupyter中显示绘图"""backend_inline.set_matplotlib_formats('svg')def set_figsize(figsize=(3.5, 2.5)):  #@save"""设置matplotlib的图表大小"""use_svg_display()plt.rcParams['figure.figsize'] = figsizedef set_axes(axes, xlabel, ylabel, xlim, ylim, xscale, yscale, legend):"""设置matplotlib的轴"""axes.set_xlabel(xlabel)axes.set_ylabel(ylabel)axes.set_xscale(xscale)axes.set_yscale(yscale)axes.set_xlim(xlim)axes.set_ylim(ylim)if legend:axes.legend(legend)axes.grid()
def plot(X, Y=None, xlabel=None, ylabel=None, legend=None, xlim=None,ylim=None, xscale='linear', yscale='linear',fmts=('-', 'm--', 'g-.', 'r:'), figsize=(3.5, 2.5), axes=None):"""绘制数据点"""if legend is None:legend = []set_figsize(figsize)axes = axes if axes else plt.gca()# 如果X有一个轴,输出Truedef has_one_axis(X):return (hasattr(X, "ndim") and X.ndim == 1 or isinstance(X, list)and not hasattr(X[0], "__len__"))if has_one_axis(X):X = [X]if Y is None:X, Y = [[]] * len(X), Xelif has_one_axis(Y):Y = [Y]if len(X) != len(Y):X = X * len(Y)axes.cla()for x, y, fmt in zip(X, Y, fmts):if len(x):axes.plot(x, y, fmt)else:axes.plot(y, fmt)set_axes(axes, xlabel, ylabel, xlim, ylim, xscale, yscale, legend)
class Accumulator:def __init__(self, n):self.data=[0.0]*ndef add(self, *args):self.data=[a+float(b) for a, b in zip(self.data, args)]def reset(self):self.data=[0.0]*len(self.data)def __getitem__(self, idx):return self.data[idx]
def evaluate_loss(net, data_iter, loss):metric=Accumulator(2)for x, y in data_iter:out=net(x)y=y.reshape(out.shape)l=loss(out, y)metric.add(l.sum(), l.numel())return metric[0]/metric[1]
def load_array(data_arrays, batch_size, is_train=True):"""Construct a PyTorch data iterator.Defined in :numref:`sec_linear_concise`"""dataset = TensorDataset(*data_arrays)return DataLoader(dataset, batch_size, shuffle=is_train)
def synthetic_data(w, b, num_examples): """⽣成y=Xw+b+噪声"""X = torch.normal(0, 1, (num_examples, len(w)))y = torch.matmul(X, w) + by += torch.normal(0, 0.01, y.shape)return X, y.reshape((-1, 1))

http://www.ppmy.cn/news/829197.html

相关文章

学python编程能做什么项目_十个Python练手的实战项目,学会这些Python就基本没问题了...

python项目练习一&#xff1a;即时标记 这是《python基础教程》后面的实践&#xff0c;照着写写&#xff0c;一方面是来熟悉python的代码方式&#xff0c;另一方面是练习使用python中的基本的以及非基本的语法&#xff0c;做到熟能生巧。 这个项目一开始比较简单&#xff0c;不…

3DMax软件有什么方法调节摄像机

3DMax软件有什么方法调节摄像机?怎样调整3D摄像机&#xff0c;怎样调整摄像机的角度&#xff0c;怎样调整高度&#xff0c;那么&#xff0c;今天小编就教大家3DMax9调整摄像机的方法&#xff0c;简单易懂&#xff0c;希望能帮助到有需要的人。下面是3DMax软件用什么方法调整摄…

Node-包管理工具整套下载使用讲解(nvm、npm、yarn、cnpm、pnpm、nrm)

前言 包管理工具npm&#xff1a; Node Package Manager&#xff0c;就是Node包管理器现在已经不仅仅是node的包管理器了&#xff0c;我们前端项目也都会用它来进行管理项目依赖的包 如何下载和安装npm工具 npm属于node的一个管理工具&#xff0c;所以我们需要先安装Node&#x…

4.4Java EE——多对多查询

订单和商品多对多关系图 在实际项目开发中&#xff0c;多对多的关联关系非常常见。以订单和商品为例&#xff0c;一个订单可以包含多种商品&#xff0c;而一种商品又可以属于多个订单&#xff0c;订单和商品属于多对多关联关系&#xff0c;订单和商品之间的关联关系如图。 在数…

三菱FX3U简单工程编程

1.简单工程编程 1.1.元件 常开触点&#xff08;ld&#xff09; 选中位置&#xff0c;点击图标&#xff0c;输入软元件&#xff0c;完成添加。 选中位置&#xff0c;快捷键F5添加。 选中位置&#xff0c;输入ld 软元件添加。常闭触点&#xff08;ldi&#xff09;横线、竖线 …

山东首富是谁?

2018年山东首富是世界500强排名185位的山东魏桥创业集团有限公司的创始人张士平&#xff0c;2018福布斯富豪榜他排名40位&#xff0c;家族财富327.8亿&#xff0c;世界500强魏桥集团山东在山东也是排名第一&#xff0c;其他3家500强上榜企业是排名234的山东能源集团、排名399的…

有一个肥而不腻的西红柿首富210亿

有一个肥而不腻的西红柿首富210亿 在我国远古时代&#xff0c;商周出现商品交换&#xff0c;春秋开始了商业中心&#xff0c;唐代开始了市&#xff0c;当时称为“草市”&#xff0c;通俗讲就是农村集市贸易&#xff0c;《易经》中聚天下之货&#xff0c;交易而退&#xff0c…

再见,马云!再见,世界首富!

点击“技术领导力”关注∆ 每天早上8:30推送 来源&#xff1a;纯洁的微笑 最近&#xff0c;世界首富换人了&#xff01; 从2017年以来&#xff0c;一直稳坐的世界首富宝座的是亚马逊总裁Jeff Bezos&#xff0c;但是这两天这个位置换人了&#xff01; 由于特斯拉股票疯狂暴涨&a…