diff --git a/AITrain/train_npc_dialogue_lora.py b/AITrain/train_npc_dialogue_lora.py new file mode 100644 index 0000000..b46ce5b --- /dev/null +++ b/AITrain/train_npc_dialogue_lora.py @@ -0,0 +1,299 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +''' +角色对话LoRA微调训练脚本 +基于test.jsonl数据微调Qwen 8B模型生成游戏NPC对话 +''' + +import json +import os +import torch +from peft import LoraConfig, PeftModel, TaskType, get_peft_model +from transformers import AutoModelForCausalLM, AutoTokenizer +from transformers import TrainingArguments, Trainer, DataCollatorForSeq2Seq +from datasets import Dataset +import platform +import swanlab +from swanlab.integration.transformers import SwanLabCallback + +# Windows multiprocessing兼容性修复 +if platform.system() == "Windows": + import multiprocessing + multiprocessing.set_start_method('spawn', force=True) + +os.environ['VLLM_USE_MODELSCOPE'] = 'True' +os.environ["CUDA_LAUNCH_BLOCKING"] = "1" +os.environ["TORCH_USE_CUDA_DSA"] = "1" + + +def process_func(example, tokenizer): + """数据预处理函数""" + MAX_LENGTH = 1024 + + # 构建对话模板 - 专门针对角色对话 + system_prompt = f"你是一个游戏中的NPC角色。{example['character']}" + instruction = example['instruction'] + user_input = example['input'] + + # 定义输入部分 + instruction = tokenizer( + f"<|im_start|>system\n{system_prompt}<|im_end|>\n" + f"<|im_start|>user\n{instruction + user_input}<|im_end|>\n" + f"<|im_start|>assistant\n", + add_special_tokens=False + ) + + # 定义输出部分 + response = tokenizer(f"{example['output']}", add_special_tokens=False) + + # 合并输入输出 + input_ids = instruction["input_ids"] + response["input_ids"] + [tokenizer.pad_token_id] + attention_mask = instruction["attention_mask"] + response["attention_mask"] + [1] + + # 标签:只对输出部分计算损失 + labels = [-100] * len(instruction["input_ids"]) + response["input_ids"] + [tokenizer.pad_token_id] + + # 截断处理 + if len(input_ids) > MAX_LENGTH: + input_ids = input_ids[:MAX_LENGTH] + attention_mask = attention_mask[:MAX_LENGTH] + labels = labels[:MAX_LENGTH] + + return { + "input_ids": input_ids, + "attention_mask": attention_mask, + "labels": labels + } + +def load_model_and_tokenizer(model_path): + """加载模型和分词器""" + print(f"Loading model from: {model_path}") + + # 加载分词器 + tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False, trust_remote_code=True) + if tokenizer.pad_token is None: + tokenizer.pad_token = tokenizer.eos_token + + # 加载模型 + model = AutoModelForCausalLM.from_pretrained( + model_path, + device_map="auto", + torch_dtype=torch.bfloat16, + trust_remote_code=True + ) + + return model, tokenizer + +def create_lora_config(): + """创建LoRA配置""" + config = LoraConfig( + task_type=TaskType.CAUSAL_LM, + target_modules=["q_proj", "k_proj", "v_proj", "gate_proj", "up_proj", "down_proj", "o_proj"], + inference_mode=False, + r=8, # 增加rank以提高表达能力 + lora_alpha=16, # alpha = 2 * r + lora_dropout=0.1, + modules_to_save=["lm_head", "embed_tokens"] + ) + return config + +def prepare_dataset(data_path, tokenizer): + """准备数据集""" + print(f"Loading dataset from: {data_path}") + + # 加载JSON数据 + with open(data_path, 'r', encoding='utf-8') as f: + data = json.load(f) + + print(f"Total samples: {len(data)}") + + # 转换为Dataset格式 + dataset = Dataset.from_list(data) + + # 应用预处理函数 + def tokenize_function(examples): + return process_func(examples, tokenizer) + + tokenized_dataset = dataset.map( + tokenize_function, + remove_columns=dataset.column_names, + batched=False + ) + + return tokenized_dataset + +def train_lora_model(model_path, data_path, output_dir): + """训练LoRA模型""" + + # 1. 加载模型和分词器 + model, tokenizer = load_model_and_tokenizer(model_path) + + # 2. 创建LoRA配置 + lora_config = create_lora_config() + + # 3. 应用LoRA + model = get_peft_model(model, lora_config) + + # 4. 启用梯度计算 + for param in model.parameters(): + if param.requires_grad: + param.requires_grad_(True) + + model.config.use_cache = False # 关闭缓存以节省显存 + + # 5. 准备数据集 + train_dataset = prepare_dataset(data_path, tokenizer) + + # 6. 配置训练参数 - 针对3080显卡优化 + training_args = TrainingArguments( + output_dir=output_dir, + per_device_train_batch_size=1, # 减小batch size + gradient_accumulation_steps=4, # 增加梯度累积 + logging_steps=10, + num_train_epochs=3, # 增加训练轮数以充分学习角色特征 + save_steps=50, + learning_rate=5e-5, # 稍微提高学习率 + warmup_ratio=0.1, + max_grad_norm=1.0, + save_on_each_node=True, + gradient_checkpointing=True, + gradient_checkpointing_kwargs={"use_reentrant": True}, + dataloader_pin_memory=False, # 减少内存使用 + remove_unused_columns=False, + report_to="none", + #fp16=True, # 使用混合精度训练 + save_total_limit=3, # 只保留最新的3个检查点 + ) + + #添加swan监测 + swanlab_callback = SwanLabCallback( + project = "QwenLora_Learn", + experiment_name="Qwen3-8B-LoRA-experiment" + ) + swanlab.login(api_key="pAxFTROvv3aspmEijax46") + # 7. 创建训练器 + trainer = Trainer( + model=model, + args=training_args, + train_dataset=train_dataset, + data_collator=DataCollatorForSeq2Seq(tokenizer=tokenizer, padding=True), + callbacks=[swanlab_callback] # 传入之前的swanlab_callback + ) + + # 8. 开始训练 + print("Starting training...") + trainer.train() + + # 9. 保存最终模型 + final_output_dir = os.path.join(output_dir, "final_model") + trainer.save_model(final_output_dir) + tokenizer.save_pretrained(final_output_dir) + + print(f"Training completed! Model saved to: {final_output_dir}") + return final_output_dir + +def test_trained_model(model_path, lora_path): + """测试训练后的模型""" + print("Testing trained model...") + + # 加载基础模型 + tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False, trust_remote_code=True) + model = AutoModelForCausalLM.from_pretrained( + model_path, + device_map="auto", + torch_dtype=torch.bfloat16, + trust_remote_code=True + ) + + # 加载LoRA权重 + model = PeftModel.from_pretrained(model, lora_path) + + # 测试对话 + test_cases = [ + { + "system": "你是克莱恩,一位神秘学专家和侦探。", + "user": "请告诉我一些关于神秘学的知识。" + }, + { + "system": "你是阿兹克,经验丰富的神秘学导师。", + "user": "学生遇到了危险,你会给出什么建议?" + }, + { + "system": "你是塔利姆,一个有礼貌的普通人,遇到了困难。", + "user": "你最近怎么样?" + } + ] + + for i, case in enumerate(test_cases): + messages = [ + {"role": "system", "content": case["system"]}, + {"role": "user", "content": case["user"]} + ] + + inputs = tokenizer.apply_chat_template( + messages, + add_generation_prompt=True, + tokenize=True, + return_tensors="pt", + return_dict=True, + enable_thinking=False + ).to('cuda') + + inputs = {k: v.to(model.device) for k, v in inputs.items()} + + with torch.no_grad(): + logits = model(**inputs).logits + probs = torch.softmax(logits, dim=-1) + # 检查非法值 + if torch.isnan(probs).any(): + print("概率张量包含NaN!") + if torch.isinf(probs).any(): + print("概率张量包含Inf!") + if (probs < 0).any(): + print("概率张量包含负数!") + outputs = model.generate( + **inputs, + max_new_tokens=200, + do_sample=True, + temperature=0.7, + top_p=0.8, + pad_token_id=tokenizer.eos_token_id + ) + + response = outputs[0][inputs['input_ids'].shape[1]:] + decoded_response = tokenizer.decode(response, skip_special_tokens=True) + + print(f"\n--- 测试用例 {i+1} ---") + print(f"系统提示: {case['system']}") + print(f"用户输入: {case['user']}") + print(f"模型回复: {decoded_response}") + +def main(): + # 配置路径 + model_path = '/mnt/g/Project02/AITrain/Qwen/Qwen3-8B-AWQ' # 基础模型路径 + data_path = './npc_dialogue_dataset.json' # 训练数据路径 + output_dir = './output/NPC_Dialogue_LoRA' # 输出目录 + + #####test + final_model_path = os.path.join(output_dir, "final_model") + test_trained_model(model_path, final_model_path) + # 确保数据文件存在 + if not os.path.exists(data_path): + print(f"数据文件不存在: {data_path}") + print("请先运行 prepare_dialogue_data.py 生成训练数据") + return + + try: + # 训练模型 + final_model_path = train_lora_model(model_path, data_path, output_dir) + + # 测试模型 + test_trained_model(model_path, final_model_path) + + except Exception as e: + print(f"训练过程中出现错误: {e}") + import traceback + traceback.print_exc() + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/AITrain/vllm_model.py b/AITrain/vllm_model.py deleted file mode 100644 index 8af1f97..0000000 --- a/AITrain/vllm_model.py +++ /dev/null @@ -1,53 +0,0 @@ -import os -from vllm import LLM -from vllm import SamplingParams -from transformers import AutoTokenizer - -os.environ['VLLM_USE_MODELSCOPE'] = 'True' - - -def get_completion(prompts, model, tokenizer=None, temperature = 1.0, top_p = 0.95, top_k=20, min_p=0, - max_tokens = 2048, max_model_len = 4096): - stop_token_ids = [151645, 151643] - # 创建采样参数。temperature 控制生成文本的多样性, - # top_p 控制核心采样的概率, - # top_k 通过限制候选词的数量来控制生成文本的质量和多样性, - # min_p 通过设置概率阈值来筛选候选词,从而在保证文本质量的同时增加多样性 - sampling_params = SamplingParams(temperature=temperature, top_p=top_p, - top_k=top_k, min_p=min_p, max_tokens=max_tokens, stop_token_ids=stop_token_ids) - #初始化vllm推理引擎 - llm = LLM( - model=model, - tokenizer=tokenizer, - max_model_len=max_model_len, - gpu_memory_utilization=0.85, - trust_remote_code=True, - enforce_eager=True, - swap_space=2 # 使用2GB交换空间 - ) - outputs = llm.generate(prompts, sampling_params) - return outputs - - - -if __name__ == '__main__': - model = '/home/tong/AIProject/Qwen/Qwen/Qwen3-0.6B' - tokenizer = AutoTokenizer.from_pretrained(model, use_fast=False) #加载分词器 - prompt = "给我一个关于大模型的简短介绍" - messages = [ - {"role": "user", "content": prompt} - ] - text = tokenizer.apply_chat_template( - messages, - tokenize=False, - add_generation_prompt=True, - enable_thinking=False) - - outputs = get_completion(text, model, tokenizer=None, temperature=0.6, top_p = 0.95, top_k=20, min_p=0) # 对于思考模式,官方建议使用以下参数:temperature = 0.6,TopP = 0.95,TopK = 20,MinP = 0。 - - # 输出是一个包含 prompt、生成文本和其他信息的 RequestOutput 对象列表。 - # 打印输出。 - for output in outputs: - prompt = output.prompt - generated_text = output.outputs[0].text - print(f"Prompt: {prompt!r}, \nResponse: {generated_text!r}") diff --git a/AITrain/配置.txt b/AITrain/配置.txt index ba2551f..d03a779 100644 --- a/AITrain/配置.txt +++ b/AITrain/配置.txt @@ -13,4 +13,10 @@ conda install pytorch==2.3.0 torchvision==0.18.0 torchaudio==2.3.0 pytorch-cuda= 5.下载deepseek模型 pip install modelscope pip install vllm +pip install swanlab==0.5.7 +pip install accelerate==1.6.0 +pip install datasets==3.5.1 +pip install peft==0.15.2 +pip install autoawq + python model_download.py \ No newline at end of file