2025-08-08 10:17:40 +08:00
|
|
|
|
#!/usr/bin/env python
|
|
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
|
'''
|
|
|
|
|
|
角色对话LoRA微调训练脚本
|
|
|
|
|
|
基于test.jsonl数据微调Qwen 8B模型生成游戏NPC对话
|
|
|
|
|
|
'''
|
|
|
|
|
|
|
|
|
|
|
|
import json
|
|
|
|
|
|
import os
|
|
|
|
|
|
import torch
|
|
|
|
|
|
from peft import LoraConfig, PeftModel, TaskType, get_peft_model
|
|
|
|
|
|
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
|
|
|
|
from transformers import TrainingArguments, Trainer, DataCollatorForSeq2Seq
|
|
|
|
|
|
from datasets import Dataset
|
|
|
|
|
|
import platform
|
|
|
|
|
|
import swanlab
|
|
|
|
|
|
from swanlab.integration.transformers import SwanLabCallback
|
|
|
|
|
|
|
|
|
|
|
|
# Windows multiprocessing兼容性修复
|
|
|
|
|
|
if platform.system() == "Windows":
|
|
|
|
|
|
import multiprocessing
|
|
|
|
|
|
multiprocessing.set_start_method('spawn', force=True)
|
|
|
|
|
|
|
|
|
|
|
|
os.environ['VLLM_USE_MODELSCOPE'] = 'True'
|
|
|
|
|
|
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
|
|
|
|
|
|
os.environ["TORCH_USE_CUDA_DSA"] = "1"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def process_func(example, tokenizer):
|
|
|
|
|
|
"""数据预处理函数"""
|
|
|
|
|
|
MAX_LENGTH = 1024
|
|
|
|
|
|
|
|
|
|
|
|
# 构建对话模板 - 专门针对角色对话
|
|
|
|
|
|
system_prompt = f"你是一个游戏中的NPC角色。{example['character']}"
|
|
|
|
|
|
instruction = example['instruction']
|
|
|
|
|
|
user_input = example['input']
|
|
|
|
|
|
|
|
|
|
|
|
# 定义输入部分
|
|
|
|
|
|
instruction = tokenizer(
|
|
|
|
|
|
f"<s><|im_start|>system\n{system_prompt}<|im_end|>\n"
|
|
|
|
|
|
f"<|im_start|>user\n{instruction + user_input}<|im_end|>\n"
|
2025-08-11 10:29:12 +08:00
|
|
|
|
f"<|im_start|>assistant\n<think>\n\n</think>\n\n",
|
2025-08-08 10:17:40 +08:00
|
|
|
|
add_special_tokens=False
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
# 定义输出部分
|
|
|
|
|
|
response = tokenizer(f"{example['output']}", add_special_tokens=False)
|
|
|
|
|
|
|
|
|
|
|
|
# 合并输入输出
|
|
|
|
|
|
input_ids = instruction["input_ids"] + response["input_ids"] + [tokenizer.pad_token_id]
|
|
|
|
|
|
attention_mask = instruction["attention_mask"] + response["attention_mask"] + [1]
|
|
|
|
|
|
|
|
|
|
|
|
# 标签:只对输出部分计算损失
|
|
|
|
|
|
labels = [-100] * len(instruction["input_ids"]) + response["input_ids"] + [tokenizer.pad_token_id]
|
|
|
|
|
|
|
|
|
|
|
|
# 截断处理
|
|
|
|
|
|
if len(input_ids) > MAX_LENGTH:
|
|
|
|
|
|
input_ids = input_ids[:MAX_LENGTH]
|
|
|
|
|
|
attention_mask = attention_mask[:MAX_LENGTH]
|
|
|
|
|
|
labels = labels[:MAX_LENGTH]
|
|
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
|
"input_ids": input_ids,
|
|
|
|
|
|
"attention_mask": attention_mask,
|
|
|
|
|
|
"labels": labels
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
def load_model_and_tokenizer(model_path):
|
|
|
|
|
|
"""加载模型和分词器"""
|
|
|
|
|
|
print(f"Loading model from: {model_path}")
|
|
|
|
|
|
|
|
|
|
|
|
# 加载分词器
|
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False, trust_remote_code=True)
|
|
|
|
|
|
if tokenizer.pad_token is None:
|
|
|
|
|
|
tokenizer.pad_token = tokenizer.eos_token
|
|
|
|
|
|
|
|
|
|
|
|
# 加载模型
|
|
|
|
|
|
model = AutoModelForCausalLM.from_pretrained(
|
|
|
|
|
|
model_path,
|
|
|
|
|
|
device_map="auto",
|
|
|
|
|
|
torch_dtype=torch.bfloat16,
|
|
|
|
|
|
trust_remote_code=True
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
return model, tokenizer
|
|
|
|
|
|
|
|
|
|
|
|
def create_lora_config():
|
|
|
|
|
|
"""创建LoRA配置"""
|
|
|
|
|
|
config = LoraConfig(
|
|
|
|
|
|
task_type=TaskType.CAUSAL_LM,
|
|
|
|
|
|
target_modules=["q_proj", "k_proj", "v_proj", "gate_proj", "up_proj", "down_proj", "o_proj"],
|
|
|
|
|
|
inference_mode=False,
|
|
|
|
|
|
r=8, # 增加rank以提高表达能力
|
|
|
|
|
|
lora_alpha=16, # alpha = 2 * r
|
|
|
|
|
|
lora_dropout=0.1,
|
|
|
|
|
|
modules_to_save=["lm_head", "embed_tokens"]
|
|
|
|
|
|
)
|
|
|
|
|
|
return config
|
|
|
|
|
|
|
|
|
|
|
|
def prepare_dataset(data_path, tokenizer):
|
|
|
|
|
|
"""准备数据集"""
|
|
|
|
|
|
print(f"Loading dataset from: {data_path}")
|
|
|
|
|
|
|
|
|
|
|
|
# 加载JSON数据
|
|
|
|
|
|
with open(data_path, 'r', encoding='utf-8') as f:
|
|
|
|
|
|
data = json.load(f)
|
|
|
|
|
|
|
|
|
|
|
|
print(f"Total samples: {len(data)}")
|
|
|
|
|
|
|
|
|
|
|
|
# 转换为Dataset格式
|
|
|
|
|
|
dataset = Dataset.from_list(data)
|
|
|
|
|
|
|
|
|
|
|
|
# 应用预处理函数
|
|
|
|
|
|
def tokenize_function(examples):
|
|
|
|
|
|
return process_func(examples, tokenizer)
|
|
|
|
|
|
|
|
|
|
|
|
tokenized_dataset = dataset.map(
|
|
|
|
|
|
tokenize_function,
|
|
|
|
|
|
remove_columns=dataset.column_names,
|
|
|
|
|
|
batched=False
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
return tokenized_dataset
|
|
|
|
|
|
|
|
|
|
|
|
def train_lora_model(model_path, data_path, output_dir):
|
|
|
|
|
|
"""训练LoRA模型"""
|
|
|
|
|
|
|
|
|
|
|
|
# 1. 加载模型和分词器
|
|
|
|
|
|
model, tokenizer = load_model_and_tokenizer(model_path)
|
|
|
|
|
|
|
|
|
|
|
|
# 2. 创建LoRA配置
|
|
|
|
|
|
lora_config = create_lora_config()
|
|
|
|
|
|
|
|
|
|
|
|
# 3. 应用LoRA
|
|
|
|
|
|
model = get_peft_model(model, lora_config)
|
|
|
|
|
|
|
|
|
|
|
|
# 4. 启用梯度计算
|
|
|
|
|
|
for param in model.parameters():
|
|
|
|
|
|
if param.requires_grad:
|
|
|
|
|
|
param.requires_grad_(True)
|
|
|
|
|
|
|
|
|
|
|
|
model.config.use_cache = False # 关闭缓存以节省显存
|
|
|
|
|
|
|
|
|
|
|
|
# 5. 准备数据集
|
2025-08-11 10:29:12 +08:00
|
|
|
|
train_preparedataset = prepare_dataset(data_path, tokenizer)
|
2025-08-08 10:17:40 +08:00
|
|
|
|
|
|
|
|
|
|
# 6. 配置训练参数 - 针对3080显卡优化
|
|
|
|
|
|
training_args = TrainingArguments(
|
|
|
|
|
|
output_dir=output_dir,
|
2025-08-11 10:29:12 +08:00
|
|
|
|
per_device_train_batch_size=2, # 减小batch size
|
2025-08-08 10:17:40 +08:00
|
|
|
|
gradient_accumulation_steps=4, # 增加梯度累积
|
|
|
|
|
|
logging_steps=10,
|
|
|
|
|
|
num_train_epochs=3, # 增加训练轮数以充分学习角色特征
|
|
|
|
|
|
save_steps=50,
|
|
|
|
|
|
learning_rate=5e-5, # 稍微提高学习率
|
|
|
|
|
|
warmup_ratio=0.1,
|
|
|
|
|
|
max_grad_norm=1.0,
|
|
|
|
|
|
save_on_each_node=True,
|
|
|
|
|
|
gradient_checkpointing=True,
|
|
|
|
|
|
gradient_checkpointing_kwargs={"use_reentrant": True},
|
|
|
|
|
|
dataloader_pin_memory=False, # 减少内存使用
|
|
|
|
|
|
remove_unused_columns=False,
|
|
|
|
|
|
report_to="none",
|
2025-08-11 10:29:12 +08:00
|
|
|
|
#bf16=True,
|
2025-08-08 10:17:40 +08:00
|
|
|
|
#fp16=True, # 使用混合精度训练
|
|
|
|
|
|
save_total_limit=3, # 只保留最新的3个检查点
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
#添加swan监测
|
|
|
|
|
|
swanlab_callback = SwanLabCallback(
|
|
|
|
|
|
project = "QwenLora_Learn",
|
|
|
|
|
|
experiment_name="Qwen3-8B-LoRA-experiment"
|
|
|
|
|
|
)
|
|
|
|
|
|
swanlab.login(api_key="pAxFTROvv3aspmEijax46")
|
|
|
|
|
|
# 7. 创建训练器
|
|
|
|
|
|
trainer = Trainer(
|
|
|
|
|
|
model=model,
|
|
|
|
|
|
args=training_args,
|
2025-08-11 10:29:12 +08:00
|
|
|
|
train_dataset=train_preparedataset,
|
2025-08-08 10:17:40 +08:00
|
|
|
|
data_collator=DataCollatorForSeq2Seq(tokenizer=tokenizer, padding=True),
|
|
|
|
|
|
callbacks=[swanlab_callback] # 传入之前的swanlab_callback
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
# 8. 开始训练
|
|
|
|
|
|
print("Starting training...")
|
|
|
|
|
|
trainer.train()
|
|
|
|
|
|
|
|
|
|
|
|
# 9. 保存最终模型
|
|
|
|
|
|
final_output_dir = os.path.join(output_dir, "final_model")
|
|
|
|
|
|
trainer.save_model(final_output_dir)
|
|
|
|
|
|
tokenizer.save_pretrained(final_output_dir)
|
|
|
|
|
|
|
|
|
|
|
|
print(f"Training completed! Model saved to: {final_output_dir}")
|
|
|
|
|
|
return final_output_dir
|
|
|
|
|
|
|
|
|
|
|
|
def test_trained_model(model_path, lora_path):
|
|
|
|
|
|
"""测试训练后的模型"""
|
|
|
|
|
|
print("Testing trained model...")
|
|
|
|
|
|
|
|
|
|
|
|
# 加载基础模型
|
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False, trust_remote_code=True)
|
|
|
|
|
|
model = AutoModelForCausalLM.from_pretrained(
|
|
|
|
|
|
model_path,
|
|
|
|
|
|
device_map="auto",
|
|
|
|
|
|
torch_dtype=torch.bfloat16,
|
|
|
|
|
|
trust_remote_code=True
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
# 加载LoRA权重
|
|
|
|
|
|
model = PeftModel.from_pretrained(model, lora_path)
|
|
|
|
|
|
|
|
|
|
|
|
# 测试对话
|
|
|
|
|
|
test_cases = [
|
|
|
|
|
|
{
|
|
|
|
|
|
"system": "你是克莱恩,一位神秘学专家和侦探。",
|
|
|
|
|
|
"user": "请告诉我一些关于神秘学的知识。"
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"system": "你是阿兹克,经验丰富的神秘学导师。",
|
|
|
|
|
|
"user": "学生遇到了危险,你会给出什么建议?"
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"system": "你是塔利姆,一个有礼貌的普通人,遇到了困难。",
|
|
|
|
|
|
"user": "你最近怎么样?"
|
|
|
|
|
|
}
|
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
for i, case in enumerate(test_cases):
|
|
|
|
|
|
messages = [
|
|
|
|
|
|
{"role": "system", "content": case["system"]},
|
|
|
|
|
|
{"role": "user", "content": case["user"]}
|
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
inputs = tokenizer.apply_chat_template(
|
|
|
|
|
|
messages,
|
|
|
|
|
|
add_generation_prompt=True,
|
|
|
|
|
|
tokenize=True,
|
|
|
|
|
|
return_tensors="pt",
|
|
|
|
|
|
return_dict=True,
|
|
|
|
|
|
enable_thinking=False
|
|
|
|
|
|
).to('cuda')
|
|
|
|
|
|
|
|
|
|
|
|
inputs = {k: v.to(model.device) for k, v in inputs.items()}
|
|
|
|
|
|
|
|
|
|
|
|
with torch.no_grad():
|
|
|
|
|
|
logits = model(**inputs).logits
|
|
|
|
|
|
probs = torch.softmax(logits, dim=-1)
|
|
|
|
|
|
# 检查非法值
|
|
|
|
|
|
if torch.isnan(probs).any():
|
|
|
|
|
|
print("概率张量包含NaN!")
|
|
|
|
|
|
if torch.isinf(probs).any():
|
|
|
|
|
|
print("概率张量包含Inf!")
|
|
|
|
|
|
if (probs < 0).any():
|
|
|
|
|
|
print("概率张量包含负数!")
|
|
|
|
|
|
outputs = model.generate(
|
|
|
|
|
|
**inputs,
|
|
|
|
|
|
max_new_tokens=200,
|
|
|
|
|
|
do_sample=True,
|
|
|
|
|
|
temperature=0.7,
|
|
|
|
|
|
top_p=0.8,
|
|
|
|
|
|
pad_token_id=tokenizer.eos_token_id
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
response = outputs[0][inputs['input_ids'].shape[1]:]
|
|
|
|
|
|
decoded_response = tokenizer.decode(response, skip_special_tokens=True)
|
|
|
|
|
|
|
|
|
|
|
|
print(f"\n--- 测试用例 {i+1} ---")
|
|
|
|
|
|
print(f"系统提示: {case['system']}")
|
|
|
|
|
|
print(f"用户输入: {case['user']}")
|
|
|
|
|
|
print(f"模型回复: {decoded_response}")
|
|
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
|
|
# 配置路径
|
2025-08-11 10:29:12 +08:00
|
|
|
|
model_path = '/mnt/e/AI/Project02/AITrain/Qwen/Qwen3-8B-AWQ' # 基础模型路径
|
2025-08-08 10:17:40 +08:00
|
|
|
|
data_path = './npc_dialogue_dataset.json' # 训练数据路径
|
|
|
|
|
|
output_dir = './output/NPC_Dialogue_LoRA' # 输出目录
|
|
|
|
|
|
|
2025-08-11 10:29:12 +08:00
|
|
|
|
# #####test
|
|
|
|
|
|
# final_model_path = os.path.join(output_dir, "final_model")
|
|
|
|
|
|
# test_trained_model(model_path, final_model_path)
|
2025-08-08 10:17:40 +08:00
|
|
|
|
# 确保数据文件存在
|
|
|
|
|
|
if not os.path.exists(data_path):
|
|
|
|
|
|
print(f"数据文件不存在: {data_path}")
|
|
|
|
|
|
print("请先运行 prepare_dialogue_data.py 生成训练数据")
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
# 训练模型
|
|
|
|
|
|
final_model_path = train_lora_model(model_path, data_path, output_dir)
|
|
|
|
|
|
|
|
|
|
|
|
# 测试模型
|
|
|
|
|
|
test_trained_model(model_path, final_model_path)
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
print(f"训练过程中出现错误: {e}")
|
|
|
|
|
|
import traceback
|
|
|
|
|
|
traceback.print_exc()
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
|
main()
|