diff --git a/AITrain/train_npc_dialogue_lora.py b/AITrain/train_npc_dialogue_lora.py index 1e30e14..e34c067 100644 --- a/AITrain/train_npc_dialogue_lora.py +++ b/AITrain/train_npc_dialogue_lora.py @@ -210,10 +210,11 @@ def create_lora_config(): task_type=TaskType.CAUSAL_LM, target_modules=["q_proj", "k_proj", "v_proj", "gate_proj", "up_proj", "down_proj"], inference_mode=False, - r=8, # 增加rank以提高表达能力 - lora_alpha=8, # alpha = 2 * r - lora_dropout=0.1, - modules_to_save=["lm_head", "embed_tokens"] + r=8, # rank + lora_alpha=8, # 降低alpha值以增加稳定性 + lora_dropout=0.05, # 降低dropout以减少不稳定性 + # 移除modules_to_save以避免embed_tokens参数的NaN问题 + # modules_to_save=["lm_head", "embed_tokens"] ) return config @@ -231,7 +232,10 @@ def prepare_dataset(data_path, tokenizer): # 转换为Dataset格式 dataset = Dataset.from_list(data) - + #过滤 None 和空字符串(推荐) + dataset = dataset.filter( + lambda example: example.get("output") not in [None, ""] + ) # 应用预处理函数 tokenized_dataset = dataset.map( lambda example: process_func(example, tokenizer), @@ -280,17 +284,32 @@ def train_lora_model(model_path, data_path, output_dir): # 3. 应用LoRA model = get_peft_model(model, lora_config) - # 4. 启用梯度计算 + # 4. 数值稳定性初始化 - 初始化LoRA权重 + for name, param in model.named_parameters(): + if param.requires_grad: + if 'lora_A' in name: + # LoRA A矩阵使用正态分布初始化 + torch.nn.init.normal_(param, mean=0.0, std=0.01) + elif 'lora_B' in name: + # LoRA B矩阵初始化为0 + torch.nn.init.zeros_(param) + + # 检查初始化后是否有异常值 + if torch.isnan(param).any() or torch.isinf(param).any(): + logger.error(f"Abnormal values detected in parameter {name} after initialization") + torch.nn.init.normal_(param, mean=0.0, std=0.001) + + # 5. 启用梯度计算 for param in model.parameters(): if param.requires_grad: param.requires_grad_(True) model.config.use_cache = False # 关闭缓存以节省显存 - # 5. 准备数据集 + # 6. 准备数据集 train_preparedataset = prepare_dataset(data_path, tokenizer) - # 6. 配置训练参数 - 针对3080显卡优化 + # 7. 配置训练参数 - 针对3080显卡优化 training_args = TrainingArguments( output_dir=output_dir, per_device_train_batch_size=2, # 减小batch size @@ -298,9 +317,9 @@ def train_lora_model(model_path, data_path, output_dir): logging_steps=10, num_train_epochs=3, # 增加训练轮数以充分学习角色特征 save_steps=50, - learning_rate=1e-5, # 降低学习率以增加稳定性 + learning_rate=5e-6, # 进一步降低学习率 warmup_ratio=0.1, - max_grad_norm=1.0, # 保持梯度裁剪 + max_grad_norm=0.5, # 更严格的梯度裁剪 save_on_each_node=True, gradient_checkpointing=True, gradient_checkpointing_kwargs={"use_reentrant": True}, @@ -310,6 +329,8 @@ def train_lora_model(model_path, data_path, output_dir): bf16=True, # 显式启用bf16以匹配模型加载类型 #fp16=False, # 确保fp16被禁用 save_total_limit=3, # 只保留最新的3个检查点 + adam_epsilon=1e-8, # 增加数值稳定性 + weight_decay=0.01, # 添加权重衰减 ) #添加swan监测 @@ -322,7 +343,7 @@ def train_lora_model(model_path, data_path, output_dir): # 创建梯度监控回调 gradient_monitor = GradientMonitorCallback() - # 7. 创建训练器 + # 8. 创建训练器 trainer = Trainer( model=model, args=training_args, @@ -331,7 +352,7 @@ def train_lora_model(model_path, data_path, output_dir): callbacks=[swanlab_callback, gradient_monitor] # 添加梯度监控回调 ) - # 8. 开始训练 + # 9. 开始训练 print("Starting training...") logger.info("Starting training...") @@ -344,7 +365,7 @@ def train_lora_model(model_path, data_path, output_dir): logger.error(traceback.format_exc()) raise - # 9. 保存最终模型 + # 10. 保存最终模型 final_output_dir = os.path.join(output_dir, "final_model") trainer.save_model(final_output_dir) tokenizer.save_pretrained(final_output_dir)