1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
| from peft import LoraConfig, get_peft_model, TaskType from transformers import AutoModelForCausalLM
class LLaMA2LoRAFineTuner: """LLaMA2 LoRA微调器""" def __init__(self, model_name="meta-llama/Llama-2-7b-hf"): self.model_name = model_name self.tokenizer = None self.model = None def setup_model(self): """加载模型并应用LoRA""" self.model = AutoModelForCausalLM.from_pretrained( self.model_name, device_map="auto", load_in_8bit=True, torch_dtype=torch.float16 ) lora_config = LoraConfig( task_type=TaskType.CAUSAL_LM, r=8, lora_alpha=16, lora_dropout=0.05, target_modules=[ "q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj" ], bias="none", inference_mode=False ) self.model = get_peft_model(self.model, lora_config) self.model.print_trainable_parameters() def prepare_dataset(self, dataset_path): """准备微调数据集""" def format_promt(example): return { "text": f"### 指令:\n{example['instruction']}\n\n### 回答:\n{example['response']}" } from datasets import load_dataset dataset = load_dataset("json", data_files=dataset_path) dataset = dataset.map(lambda x: format_promt(x)) def tokenize(example): result = self.tokenizer( example["text"], truncation=True, max_length=2048, padding="max_length" ) result["labels"] = result["input_ids"].copy() return result return dataset.map(tokenize, batched=True) def train(self, train_dataset, output_dir="./lora_output"): """训练""" from transformers import TrainingArguments, Trainer training_args = TrainingArguments( output_dir=output_dir, num_train_epochs=3, per_device_train_batch_size=4, gradient_accumulation_steps=4, learning_rate=2e-4, warmup_ratio=0.03, lr_scheduler_type="cosine", logging_steps=10, save_steps=500, fp16=True, optim="paged_adamw_8bit" ) trainer = Trainer( model=self.model, args=training_args, train_dataset=train_dataset, data_collator=DataCollator(self.tokenizer) ) trainer.train() self.model.save_pretrained(output_dir)
|