https://github.com/OpenGVLab/LLaMA-Adapter/tree/main/llama_adapter_v2_multimodal
这个连接中有一个大模型微调的例子
个人认为adapter只有在backbone比较大,学习的比较好的情况下有用,不适合一般的任务。
———————尽管有代码,貌似没效果—————————
这个是adapter的代码块,可直接用
# 这个是adapter的代码块,可直接用
class Adapter(nn.Module):"""Conventional Adapter layer, in which the weights of up and down sampler modulesare parameters and are optimized."""def __init__(self, config):super().__init__()self.config = configself.input_dim = 768reduction_factor = 16self.down_sample_size = self.input_dim // reduction_factorself.down_sampler = nn.Linear(self.input_dim, self.down_sample_size)self.up_sampler = nn.Linear(self.down_sample_size, self.input_dim)self.gate = nn.Parameter(torch.zeros(1))def forward(self, x):z = self.down_sampler(x)z = gelu(z)z = self.up_sampler(z)output = z + xif self.gate is not None:output = self.gate * outputreturn output
我把它直接用在bert的输出层中,也可以用在其他地方
class BertOutput(nn.Module):def __init__(self, config):super(BertOutput, self).__init__()self.dense = nn.Linear(config.intermediate_size, config.hidden_size)self.LayerNorm = BertLayerNorm(config.hidden_size, eps=config.layer_norm_eps)self.dropout = nn.Dropout(config.hidden_dropout_prob)self.adapter = Adapter(config) # here !def forward(self, hidden_states, input_tensor):hidden_states = self.dense(hidden_states)hidden_states = self.adapter(hidden_states) # here !hidden_states = self.dropout(hidden_states)hidden_states = self.LayerNorm(hidden_states + input_tensor)return hidden_states
冻结参数
print('----------------------')
for n, p in self.global_encoder.named_parameters():p.requires_grad = False
解冻adapter模块
for name, sub_module in self.global_encoder.named_modules():# self.global_encoder 可以是模型也可以是某个模块# print(sub_module)if isinstance(sub_module,(Adapter)):print(f"{name} is trainable...")# if len(name.split(".")) < 7: # this will not consider layer norms inside adapters then.for param_name, param in sub_module.named_parameters():param.requires_grad = True
打印可训练的参数
def print_trainable_params_percentage(self, model):orig_param_size = sum(p.numel() for p in model.parameters())def count_parameters(model):return sum(p.numel() for p in model.parameters() if p.requires_grad)trainable_size = count_parameters(model)percentage = trainable_size / orig_param_size * 100print(f"Trainable param percentage: {percentage:.2f}%")print(trainable_size)return percentage