| import torch.nn as nn |
| from transformers import ( |
| AutoModelForSequenceClassification |
| ) |
|
|
| RANK = 4 |
| ALPHA = 4 |
| model_ckpt = "distilbert-base-uncased" |
|
|
| from loraLayer import LoRALayer |
|
|
| class LoRALinear(nn.Module): |
| def __init__(self, original_layer, rank, alpha): |
| super().__init__() |
| self.in_features = original_layer.in_features |
| self.out_features = original_layer.out_features |
| self.original_layer = original_layer |
| self.lora = LoRALayer(self.in_features, self.out_features, rank, alpha) |
| |
| def forward(self, x): |
| original_output = self.original_layer(x) |
| lora_output = self.lora(x) |
| return original_output + lora_output |
|
|
| model = AutoModelForSequenceClassification.from_pretrained(model_ckpt) |
|
|
| for param in model.parameters(): |
| param.requires_grad = False |
|
|
| print("--- Injecting LoRA adapters into q_lin and v_lin layers of DISTILBERT---") |
| for layer in model.distilbert.transformer.layer: |
| layer.attention.q_lin = LoRALinear(layer.attention.q_lin, RANK, ALPHA) |
| layer.attention.v_lin = LoRALinear(layer.attention.v_lin, RANK, ALPHA) |
| print("INFO: LoRA Adapters INJECTED") |
|
|
| print("\nTrainable parameters:") |
| for name, param in model.named_parameters(): |
| if param.requires_grad: |
| print(name) |
|
|
| total_params = sum(p.numel() for p in model.parameters()) |
| trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad) |
| print(f"\nTotal parameters: {total_params}") |
| print(f"Trainable LoRA parameters: {trainable_params}") |
| print(f"Percentage of trainable parameters: {100 * trainable_params / total_params:.4f}%") |