#!/usr/bin/env python # -*- coding: utf-8 -*- ''' 角色对话LoRA微调训练脚本 基于test.jsonl数据微调Qwen 8B模型生成游戏NPC对话 ''' import json import os import torch from peft import LoraConfig, PeftModel, TaskType, get_peft_model from transformers import AutoModelForCausalLM, AutoTokenizer from transformers import TrainingArguments, Trainer, DataCollatorForSeq2Seq from datasets import Dataset import platform import swanlab from swanlab.integration.transformers import SwanLabCallback # Windows multiprocessing兼容性修复 if platform.system() == "Windows": import multiprocessing multiprocessing.set_start_method('spawn', force=True) os.environ['VLLM_USE_MODELSCOPE'] = 'True' os.environ["CUDA_LAUNCH_BLOCKING"] = "1" os.environ["TORCH_USE_CUDA_DSA"] = "1" def process_func(example, tokenizer): """数据预处理函数""" MAX_LENGTH = 1024 # 构建对话模板 - 专门针对角色对话 system_prompt = f"你是一个游戏中的NPC角色。{example['character']}" instruction = example['instruction'] user_input = example['input'] # 定义输入部分 instruction = tokenizer( f"<|im_start|>system\n{system_prompt}<|im_end|>\n" f"<|im_start|>user\n{instruction + user_input}<|im_end|>\n" f"<|im_start|>assistant\n\n\n\n\n", add_special_tokens=False ) # 定义输出部分 response = tokenizer(f"{example['output']}", add_special_tokens=False) # 合并输入输出 input_ids = instruction["input_ids"] + response["input_ids"] + [tokenizer.pad_token_id] attention_mask = instruction["attention_mask"] + response["attention_mask"] + [1] # 标签:只对输出部分计算损失 labels = [-100] * len(instruction["input_ids"]) + response["input_ids"] + [tokenizer.pad_token_id] # 截断处理 if len(input_ids) > MAX_LENGTH: input_ids = input_ids[:MAX_LENGTH] attention_mask = attention_mask[:MAX_LENGTH] labels = labels[:MAX_LENGTH] return { "input_ids": input_ids, "attention_mask": attention_mask, "labels": labels } def load_model_and_tokenizer(model_path): """加载模型和分词器""" print(f"Loading model from: {model_path}") # 加载分词器 tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False, trust_remote_code=True) if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token # 加载模型 model = AutoModelForCausalLM.from_pretrained( model_path, device_map="auto", torch_dtype=torch.bfloat16, trust_remote_code=True ) return model, tokenizer def create_lora_config(): """创建LoRA配置""" config = LoraConfig( task_type=TaskType.CAUSAL_LM, target_modules=["q_proj", "k_proj", "v_proj", "gate_proj", "up_proj", "down_proj"], inference_mode=False, r=8, # 增加rank以提高表达能力 lora_alpha=16, # alpha = 2 * r lora_dropout=0.1, modules_to_save=["lm_head", "embed_tokens"] ) return config def prepare_dataset(data_path, tokenizer): """准备数据集""" print(f"Loading dataset from: {data_path}") # 加载JSON数据 with open(data_path, 'r', encoding='utf-8') as f: data = json.load(f) print(f"Total samples: {len(data)}") # 转换为Dataset格式 dataset = Dataset.from_list(data) # 应用预处理函数 def tokenize_function(examples): return process_func(examples, tokenizer) tokenized_dataset = dataset.map( tokenize_function, remove_columns=dataset.column_names, batched=False ) return tokenized_dataset def train_lora_model(model_path, data_path, output_dir): """训练LoRA模型""" # 1. 加载模型和分词器 model, tokenizer = load_model_and_tokenizer(model_path) # 2. 创建LoRA配置 lora_config = create_lora_config() # 3. 应用LoRA model = get_peft_model(model, lora_config) # 4. 启用梯度计算 for param in model.parameters(): if param.requires_grad: param.requires_grad_(True) model.config.use_cache = False # 关闭缓存以节省显存 # 5. 准备数据集 train_preparedataset = prepare_dataset(data_path, tokenizer) # 6. 配置训练参数 - 针对3080显卡优化 training_args = TrainingArguments( output_dir=output_dir, per_device_train_batch_size=2, # 减小batch size gradient_accumulation_steps=4, # 增加梯度累积 logging_steps=10, num_train_epochs=3, # 增加训练轮数以充分学习角色特征 save_steps=50, learning_rate=5e-5, # 稍微提高学习率 warmup_ratio=0.1, max_grad_norm=1.0, save_on_each_node=True, gradient_checkpointing=True, gradient_checkpointing_kwargs={"use_reentrant": True}, dataloader_pin_memory=False, # 减少内存使用 remove_unused_columns=False, report_to="none", #bf16=True, #fp16=True, # 使用混合精度训练 save_total_limit=3, # 只保留最新的3个检查点 ) #添加swan监测 swanlab_callback = SwanLabCallback( project = "QwenLora_Learn", experiment_name="Qwen3-8B-LoRA-experiment" ) swanlab.login(api_key="pAxFTROvv3aspmEijax46") # 7. 创建训练器 trainer = Trainer( model=model, args=training_args, train_dataset=train_preparedataset, data_collator=DataCollatorForSeq2Seq(tokenizer=tokenizer, padding=True), callbacks=[swanlab_callback] # 传入之前的swanlab_callback ) # 8. 开始训练 print("Starting training...") trainer.train() # 9. 保存最终模型 final_output_dir = os.path.join(output_dir, "final_model") trainer.save_model(final_output_dir) tokenizer.save_pretrained(final_output_dir) print(f"Training completed! Model saved to: {final_output_dir}") return final_output_dir def test_trained_model(model_path, lora_path): """测试训练后的模型""" print("Testing trained model...") # 加载基础模型 tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False, trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained( model_path, device_map="auto", torch_dtype=torch.bfloat16, trust_remote_code=True ) # 加载LoRA权重 model = PeftModel.from_pretrained(model, lora_path) # 测试对话 test_cases = [ { "system": "你是克莱恩,一位神秘学专家和侦探。", "user": "请告诉我一些关于神秘学的知识。" }, { "system": "你是阿兹克,经验丰富的神秘学导师。", "user": "学生遇到了危险,你会给出什么建议?" }, { "system": "你是塔利姆,一个有礼貌的普通人,遇到了困难。", "user": "你最近怎么样?" } ] for i, case in enumerate(test_cases): messages = [ {"role": "system", "content": case["system"]}, {"role": "user", "content": case["user"]} ] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_tensors="pt", return_dict=True, enable_thinking=False ).to('cuda') inputs = {k: v.to(model.device) for k, v in inputs.items()} with torch.no_grad(): logits = model(**inputs).logits probs = torch.softmax(logits, dim=-1) # 检查非法值 if torch.isnan(probs).any(): print("概率张量包含NaN!") if torch.isinf(probs).any(): print("概率张量包含Inf!") if (probs < 0).any(): print("概率张量包含负数!") outputs = model.generate( **inputs, max_new_tokens=200, do_sample=True, temperature=0.7, top_p=0.8, pad_token_id=tokenizer.eos_token_id ) response = outputs[0][inputs['input_ids'].shape[1]:] decoded_response = tokenizer.decode(response, skip_special_tokens=True) print(f"\n--- 测试用例 {i+1} ---") print(f"系统提示: {case['system']}") print(f"用户输入: {case['user']}") print(f"模型回复: {decoded_response}") def main(): # 配置路径 model_path = '/mnt/e/AI/Project02/AITrain/Qwen/Qwen3-8B-AWQ' # 基础模型路径 data_path = './npc_dialogue_dataset.json' # 训练数据路径 output_dir = './output/NPC_Dialogue_LoRA' # 输出目录 # #####test # final_model_path = os.path.join(output_dir, "final_model") # test_trained_model(model_path, final_model_path) # 确保数据文件存在 if not os.path.exists(data_path): print(f"数据文件不存在: {data_path}") print("请先运行 prepare_dialogue_data.py 生成训练数据") return try: # 训练模型 final_model_path = train_lora_model(model_path, data_path, output_dir) # 测试模型 test_trained_model(model_path, final_model_path) except Exception as e: print(f"训练过程中出现错误: {e}") import traceback traceback.print_exc() if __name__ == '__main__': main()