【昇腾】NPU ID:物理ID、逻辑ID、芯片映射关系

news/2024/12/18 5:14:00/

起因:
https://www.hiascend.com/document/detail/zh/Atlas%20200I%20A2/23.0.0/re/npu/npusmi_013.html
npu-smi info -l查询所有NPU设备:

[naie@notebook-npu-bd130045-55bbffd786-lr6t8 DCNN]$ npu-smi info -lTotal Count                    : 1NPU ID                         : 6Chip Count                     : 1

运行脚本:

python">import torch_npu
from torch_npu.contrib import transfer_to_npu
import torchimport torch
import torch.nn as nnclass SingleConv(nn.Module):def __init__(self, in_ch, out_ch, kernel_size, stride, padding):super(SingleConv, self).__init__()self.single_conv = nn.Sequential(nn.Conv2d(in_ch, out_ch, kernel_size=kernel_size, padding=padding, stride=stride, bias=False),nn.BatchNorm2d(out_ch),nn.ReLU(inplace=True))def forward(self, x):return self.single_conv(x)class DenseFeaureAggregation(nn.Module):def __init__(self, in_ch, out_ch, base_ch):super(DenseFeaureAggregation, self).__init__()self.conv1 = nn.Sequential(nn.BatchNorm2d(num_features=1 * in_ch, eps=1e-5, affine=True),nn.ReLU(inplace=True),nn.Conv2d(in_ch, base_ch, dilation=2, kernel_size=3, padding=2, stride=1, bias=True),)self.conv2 = nn.Sequential(nn.BatchNorm2d(num_features=in_ch + base_ch, eps=1e-5, affine=True),nn.ReLU(inplace=True),nn.Conv2d(in_ch + base_ch, base_ch, dilation=3, kernel_size=3, padding=3, stride=1, bias=True),)self.conv3 = nn.Sequential(nn.BatchNorm2d(num_features=in_ch + 2 * base_ch, eps=1e-5, affine=True),nn.ReLU(inplace=True),nn.Conv2d(in_ch + 2 * base_ch, base_ch, dilation=5, kernel_size=3, padding=5, stride=1, bias=True),)self.conv4 = nn.Sequential(nn.BatchNorm2d(num_features=in_ch + 3 * base_ch, eps=1e-5, affine=True),nn.ReLU(inplace=True),nn.Conv2d(in_ch + 3 * base_ch, base_ch, dilation=7, kernel_size=3, padding=7, stride=1, bias=True),)self.conv5 = nn.Sequential(nn.BatchNorm2d(num_features=in_ch + 4 * base_ch, eps=1e-5, affine=True),nn.ReLU(inplace=True),nn.Conv2d(in_ch + 4 * base_ch, base_ch, dilation=9, kernel_size=3, padding=9, stride=1, bias=True),)self.conv_out = nn.Sequential(nn.BatchNorm2d(num_features=in_ch + 5 * base_ch, eps=1e-5, affine=True),nn.ReLU(inplace=True),nn.Conv2d(in_ch + 5 * base_ch, out_ch, dilation=1, kernel_size=1, padding=0, stride=1, bias=True),)def forward(self, x):out_ = self.conv1(x)concat_ = torch.cat((out_, x), dim=1)out_ = self.conv2(concat_)concat_ = torch.cat((concat_, out_), dim=1)out_ = self.conv3(concat_)concat_ = torch.cat((concat_, out_), dim=1)out_ = self.conv4(concat_)concat_ = torch.cat((concat_, out_), dim=1)out_ = self.conv5(concat_)concat_ = torch.cat((concat_, out_), dim=1)out_ = self.conv_out(concat_)return out_class Encoder(nn.Module):def __init__(self, in_ch, list_ch):super(Encoder, self).__init__()self.encoder_1 = nn.Sequential(SingleConv(in_ch, list_ch[1], kernel_size=3, stride=1, padding=1),SingleConv(list_ch[1], list_ch[1], kernel_size=3, stride=1, padding=1))self.encoder_2 = nn.Sequential(nn.MaxPool2d(kernel_size=2, stride=2, padding=0),SingleConv(list_ch[1], list_ch[2], kernel_size=3, stride=1, padding=1),SingleConv(list_ch[2], list_ch[2], kernel_size=3, stride=1, padding=1))self.encoder_3 = nn.Sequential(nn.MaxPool2d(kernel_size=2, stride=2, padding=0),SingleConv(list_ch[2], list_ch[3], kernel_size=3, stride=1, padding=1),SingleConv(list_ch[3], list_ch[3], kernel_size=3, stride=1, padding=1))self.encoder_4 = nn.Sequential(nn.MaxPool2d(kernel_size=2, stride=2, padding=0),SingleConv(list_ch[3], list_ch[4], kernel_size=3, stride=1, padding=1),SingleConv(list_ch[4], list_ch[4], kernel_size=3, stride=1, padding=1))self.DFA = DenseFeaureAggregation(list_ch[4], list_ch[4], list_ch[4])def forward(self, x):out_encoder_1 = self.encoder_1(x)out_encoder_2 = self.encoder_2(out_encoder_1)out_encoder_3 = self.encoder_3(out_encoder_2)out_encoder_4 = self.encoder_4(out_encoder_3)out_encoder_4 = self.DFA(out_encoder_4)return [out_encoder_1, out_encoder_2, out_encoder_3, out_encoder_4]class Decoder(nn.Module):def __init__(self, out_ch, list_ch):super(Decoder, self).__init__()self.upconv_3_1 = nn.ConvTranspose2d(list_ch[4], list_ch[3], kernel_size=2, stride=2, bias=True)self.decoder_conv_3_1 = nn.Sequential(SingleConv(2 * list_ch[3], list_ch[3], kernel_size=3, stride=1, padding=1),SingleConv(list_ch[3], list_ch[3], kernel_size=3, stride=1, padding=1))self.upconv_2_1 = nn.ConvTranspose2d(list_ch[3], list_ch[2], kernel_size=2, stride=2, bias=True)self.decoder_conv_2_1 = nn.Sequential(SingleConv(2 * list_ch[2], list_ch[2], kernel_size=3, stride=1, padding=1),SingleConv(list_ch[2], list_ch[2], kernel_size=3, stride=1, padding=1))self.upconv_1_1 = nn.ConvTranspose2d(list_ch[2], list_ch[1], kernel_size=2, stride=2, bias=True)self.decoder_conv_1_1 = nn.Sequential(SingleConv(2 * list_ch[1], list_ch[1], kernel_size=3, stride=1, padding=1),SingleConv(list_ch[1], list_ch[1], kernel_size=3, stride=1, padding=1))self.conv_out = nn.Sequential(nn.Conv2d(list_ch[1], out_ch, kernel_size=1, padding=0, bias=True))def forward(self, out_encoder):out_encoder_1, out_encoder_2, out_encoder_3, out_encoder_4 = out_encoderout_decoder_3_1 = self.decoder_conv_3_1(torch.cat((self.upconv_3_1(out_encoder_4), out_encoder_3), dim=1))out_decoder_2_1 = self.decoder_conv_2_1(torch.cat((self.upconv_2_1(out_decoder_3_1), out_encoder_2), dim=1))out_decoder_1_1 = self.decoder_conv_1_1(torch.cat((self.upconv_1_1(out_decoder_2_1), out_encoder_1), dim=1))output = self.conv_out(out_decoder_1_1)return [output]class Model(nn.Module):def __init__(self, in_ch, out_ch, list_ch):super(Model, self).__init__()self.encoder = Encoder(in_ch, list_ch)self.decoder = Decoder(out_ch, list_ch)# initself.initialize()@staticmethoddef init_conv_deconv_BN(modules):for m in modules():if isinstance(m, nn.Conv2d):nn.init.kaiming_uniform_(m.weight, mode='fan_in', nonlinearity='relu')if m.bias is not None:nn.init.constant_(m.bias, 0.)elif isinstance(m, nn.ConvTranspose2d):nn.init.kaiming_uniform_(m.weight, mode='fan_in', nonlinearity='relu')if m.bias is not None:nn.init.constant_(m.bias, 0.)elif isinstance(m, nn.BatchNorm2d):nn.init.constant_(m.weight, 1.)nn.init.constant_(m.bias, 0.)def initialize(self):print('# random init encoder weight using nn.init.kaiming_uniform !')self.init_conv_deconv_BN(self.decoder.modules)print('# random init decoder weight using nn.init.kaiming_uniform !')self.init_conv_deconv_BN(self.encoder.modules)def forward(self, x):out_encoder = self.encoder(x)out_decoder = self.decoder(out_encoder)  # is a listreturn out_decoderimport re
import subprocessdef get_npu_id():try:# 执行命令并捕获输出output = subprocess.check_output(['npu-smi', 'info', '-l'], text=True)# 使用正则表达式查找NPU IDmatch = re.search(r'NPU ID\s+:\s+(\d+)', output)if match:return match.group(1)  # 返回匹配的第一个组,即NPU IDelse:return "NPU ID not found"except subprocess.CalledProcessError as e:return f"An error occurred: {e}"network = Model(in_ch=4, out_ch=1,list_ch=[-1, 32, 64, 128, 256])npu_id = get_npu_id()
# list_GPU_ids = [npu_id]
device = torch.device('cuda:' + str(npu_id))
network.to(device)
print("device:",npu_id)

报错:

Traceback (most recent call last):File "/home/work/user-job-dir/app/notebook/RTDosePrediction-main/RTDosePrediction/Src/DCNN/test_device_id.py", line 211, in <module>network.to(device)File "/home/naie/.local/lib/python3.9/site-packages/torch_npu/contrib/transfer_to_npu.py", line 56, in decoratedreturn fn(*args, **kwargs)File "/home/naie/.local/lib/python3.9/site-packages/torch_npu/utils/module.py", line 68, in toreturn self._apply(convert)File "/home/naie/.local/lib/python3.9/site-packages/torch/nn/modules/module.py", line 810, in _applymodule._apply(fn)File "/home/naie/.local/lib/python3.9/site-packages/torch/nn/modules/module.py", line 810, in _applymodule._apply(fn)File "/home/naie/.local/lib/python3.9/site-packages/torch/nn/modules/module.py", line 810, in _applymodule._apply(fn)[Previous line repeated 2 more times]File "/home/naie/.local/lib/python3.9/site-packages/torch/nn/modules/module.py", line 833, in _applyparam_applied = fn(param)File "/home/naie/.local/lib/python3.9/site-packages/torch_npu/utils/module.py", line 66, in convertreturn t.to(device, dtype if t.is_floating_point() or t.is_complex() else None, non_blocking)File "/home/naie/.local/lib/python3.9/site-packages/torch_npu/contrib/transfer_to_npu.py", line 56, in decoratedreturn fn(*args, **kwargs)
RuntimeError: exchangeDevice:torch_npu/csrc/aten/common/CopyKernel.cpp:37 NPU error, error code is 107001
[ERROR] 2024-12-13-10:47:03 (PID:38196, Device:0, RankID:-1) ERR00100 PTA call acl api failed
[Error]: Invalid device ID.Check whether the device ID is valid.
EE1001: 2024-12-13-10:47:03.815.272 The argument is invalid.Reason: Set device failed, invalid device, set device=6, valid device range is [0, 1)Solution: 1.Check the input parameter range of the function. 2.Check the function invocation relationship.TraceBack (most recent call last):rtSetDevice execute failed, reason=[device id error][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53]open device 6 failed, runtime result = 107001.[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161]

猜想也许是进行了从物理ID到逻辑ID的映射。
查了一下华为的官方文档:
https://www.hiascend.com/document/detail/zh/Atlas%20200I%20A2/23.0.0/re/npu/npusmi_013.html
还真的存在这么一个映射。
在这里插入图片描述

遂用这个命令查看了当前环境下的芯片映射关系:

[naie@notebook-npu-bd130045-55bbffd786-lr6t8 DCNN]$ npu-smi info -mNPU ID                         Chip ID                        Chip Logic ID                  Chip Name                     6                              0                              0                              Ascend 910B36                              1                              -                              Mcu 

确实物理ID为6的NPU被映射成了0。这是因为当前环境下(notebook)中只存在一个NPU。

但是还有一个问题:什么时候使用物理ID什么时候使用逻辑ID呢?

物理ID

npu-smi info -t power -i id类似于这种命令里使用的id都是物理ID:
在这里插入图片描述
因为npu-smi info -l查出来的是物理ID。

逻辑ID

device = torch.device('cuda:' + str(npu_id))这种就用的是逻辑ID


http://www.ppmy.cn/news/1556045.html

相关文章

LC 318. Maximum Product of Word Lengths

题目描述 Leetcode 318 给定一个只包含小写字母字符串数组&#xff0c;返回两个没有共同字符的字符串长度乘积的最大值 题目思路 简单的思路是将字符串编码成长度为26的数组&#xff0c;然后两两字符串比较&#xff0c;这样可以将O(L1L2)的比较压缩到O(2626)。 位图bitmap …

如何高效获取Twitter数据:Apify平台上的推特数据采集解决方案

引言 在数据分析和市场研究领域&#xff0c;Twitter&#xff08;现在的X&#xff09;数据一直是重要的信息来源。但是&#xff0c;自从Twitter更改API定价策略后&#xff0c;获取数据的成本大幅提升。本文将介绍一个经济实惠的替代方案。 为什么需要Twitter数据&#xff1f; …

go项目zero框架在Linux或Windows服务器中加入开机启动

为了使Go项目在系统启动时自动运行&#xff0c;您可以根据操作系统选择不同的方法。以下是针对Linux和Windows系统的两种常见方案。 ### Linux 系统 #### 使用 Systemd (推荐) Systemd 是大多数现代Linux发行版默认的初始化系统和服务管理器。通过创建一个systemd服务文件&…

快速搭建conda深度学习环境全流程(又全又简洁)

1.首先在云服务器或者本地环境安装miniconda 选择自己电脑相应的版本 Miniconda — Anaconda documentation mkdir -p ~/miniconda3 wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda3/miniconda.sh bash ~/miniconda3/miniconda…

简单了解一下 Go 语言的构建约束?

​构建约束是一种在 Go 语言中控制源文件编译条件的方法&#xff0c;它可以让您指定某些文件只在特定的操作系统、架构、编译器或 Go 版本下编译&#xff0c;而在其他环境中自动忽略。这样可以方便您针对不同的平台或场景编写不同的代码&#xff0c;实现条件编译的功能。 构建…

游戏引擎学习第41天

这一节就讨论了一些数学知识 讨论为什么要进行数学讨论 现在到了需要真正开始讨论数学的时候了&#xff0c;因为从这一步开始&#xff0c;几乎所有计划做的事情都将涉及比基本代数更复杂的数学内容。到目前为止所做的一切基本都可以用基础代数技能理解&#xff0c;但从现在开…

Databend 为什么使用 Rust 开发?

11 月 30 日&#xff0c;Rust China Tour 武汉站在武汉恺德光谷城际酒店举行。本次活动汇聚了来自 Databend、GreptimeDB、华中科技大学的多位 Rust 技术专家和研究者&#xff0c;共同探讨 Rust 语言在前沿技术中的创新应用。Databend 数据库研发工程师张祖前在活动中带来主题演…

微服务之间的相互调用的几种常见实现方式对比 2

本文承接我的另一篇博客微服务之间的相互调用的几种常见实现方式对比_微服务之间怎么互相调用-CSDN博客 目录 五、消息队列 特点 适用场景 六、服务代理 特点 常见实现方法 1. Zuul 工作原理 2. Spring Cloud Gateway 三大核心概念 工作流程 实现步骤 七、事件驱动…