Project02/AITrain/npc_dialogue_generator.py

440 lines
18 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python
# -*- coding: utf-8 -*-
'''
游戏NPC角色对话生成器
基于微调后的LoRA模型生成角色对话
'''
import torch
import json
import random
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import Dict, List, Optional
import platform
# Windows multiprocessing兼容性修复
if platform.system() == "Windows":
import multiprocessing
multiprocessing.set_start_method('spawn', force=True)
class NPCDialogueGenerator:
def __init__(self, base_model_path: str, lora_model_path: Optional[str] = None, external_character_data: Optional[Dict] = None):
"""
初始化NPC对话生成器
Args:
base_model_path: 基础模型路径
lora_model_path: LoRA模型路径可选
external_character_data: 外部角色数据(可选,优先使用)
"""
self.base_model_path = base_model_path
self.lora_model_path = lora_model_path
self.model = None
self.tokenizer = None
# 优先使用外部角色数据,如果没有则使用内置数据
if external_character_data:
self.character_profiles = self._process_external_character_data(external_character_data)
print(f"✓ 使用外部角色数据: {list(self.character_profiles.keys())}")
else:
self.character_profiles = self._load_character_profiles()
print(f"✓ 使用内置角色数据: {list(self.character_profiles.keys())}")
self._load_model()
def _process_external_character_data(self, external_data: Dict) -> Dict:
"""
处理外部角色数据,转换为对话生成器可用的格式
Args:
external_data: 来自knowledge_base的角色数据
Returns:
处理后的角色数据字典
"""
processed_profiles = {}
for char_name, char_data in external_data.items():
# 提取基本信息
basic_info = char_data.get('basic_info', {})
personality = char_data.get('personality', {})
background = char_data.get('background', {})
skills = char_data.get('skills_and_abilities', {})
speech_patterns = char_data.get('speech_patterns', {})
# 构建角色画像
profile = {
"name": char_data.get('character_name', char_name),
"title": basic_info.get('occupation', '未知'),
"personality": personality.get('core_traits', []) + personality.get('strengths', []),
"background": background.get('childhood', '') + ' ' + background.get('education', ''),
"speech_patterns": speech_patterns.get('vocabulary', []) + speech_patterns.get('tone', []),
"sample_dialogues": self._generate_sample_dialogues(char_data),
# 保存完整数据供高级功能使用
"full_data": char_data
}
processed_profiles[char_name] = profile
return processed_profiles
def _generate_sample_dialogues(self, char_data: Dict) -> List[str]:
"""
基于角色数据生成示例对话
Args:
char_data: 角色数据
Returns:
示例对话列表
"""
# 这里可以根据角色的性格、背景等生成更合适的示例对话
# 暂时返回一些通用的示例
basic_info = char_data.get('basic_info', {})
occupation = basic_info.get('occupation', '角色')
if '侦探' in occupation or '调查员' in occupation:
return [
"我需要仔细分析这个案件。",
"每个细节都可能很重要。",
"让我重新梳理一下线索。"
]
elif '教授' in occupation or '博士' in occupation:
return [
"根据我的研究,这个现象很特殊。",
"我们需要更谨慎地处理这个问题。",
"知识就是力量,但也要小心使用。"
]
else:
return [
"我遇到了一些困难。",
"请帮帮我。",
"这太奇怪了。"
]
def _load_character_profiles(self) -> Dict:
"""加载角色画像数据"""
return {
"维多利亚·布莱克伍德": {
"name": "维多利亚·布莱克伍德",
"title": "神秘学专家",
"personality": ["理性分析", "谨慎小心", "实用主义", "思维缜密"],
"background": "拥有丰富神秘学知识和战斗经验的侦探,既是非凡者也是夏洛克·莫里亚蒂",
"speech_patterns": ["会使用专业术语", "经常进行逻辑分析", "对危险保持警告", "内心独白较多"],
"sample_dialogues": [
"好奇往往是导致死亡的主要因素。",
"总之,我的任务到此为止。",
"这需要仔细分析才能得出结论。"
]
},
"阿奇博尔德·韦恩博士": {
"name": "阿奇博尔德·韦恩博士",
"title": "神秘学导师",
"personality": ["沉稳睿智", "言简意赅", "关怀学生", "经验丰富"],
"background": "神秘学领域的资深专家,经验极其丰富的导师,知识渊博",
"speech_patterns": ["话语简练但信息量大", "给予实用指导", "语调平和但权威", "关心但保持距离"],
"sample_dialogues": [
"耐心是修炼的基础。",
"不要急于求成,稳扎稳打比什么都重要。",
"这种情况需要格外小心。"
]
},
"塔利姆": {
"name": "塔利姆",
"title": "文雅绅士",
"personality": ["礼貌尊敬", "有文化素养", "寻求帮助", "温和友善"],
"background": "受过良好教育的普通人,有一定的文学修养,遇到困难时会寻求专家帮助",
"speech_patterns": ["使用礼貌称谓", "表达困惑时措辞文雅", "会引用文学作品", "语气温和"],
"sample_dialogues": [
"噢,尊敬的大侦探,你最近在忙碌什么?",
"这不是《罗密欧与朱丽叶》的故事!",
"我有个朋友遇到了困难..."
]
},
"艾伦": {
"name": "艾伦",
"title": "困扰的求助者",
"personality": ["焦虑不安", "详细描述", "半信半疑", "急需帮助"],
"background": "普通人,但最近遭遇了一系列神秘的厄运事件,怀疑受到诅咒",
"speech_patterns": ["情绪紧张", "会详细描述遭遇", "语气急切", "表现出恐惧"],
"sample_dialogues": [
"最近我总是遭遇各种厄运...",
"我怀疑是不是受到了什么诅咒。",
"请帮帮我,我不知道该怎么办!"
]
},
"戴莉.西蒙妮": {
"name": "戴莉·西蒙妮",
"title": "专业调查员",
"personality": ["专业简洁", "直接明确", "严谨认真", "目标导向"],
"background": "负责调查神秘事件的专业人员,办事效率高,问题直接",
"speech_patterns": ["问题直接明确", "语气专业", "注重事实", "简洁有力"],
"sample_dialogues": [
"请详细描述事件经过。",
"有什么证据可以证明?",
"这件事需要立即调查。"
]
}
}
def _load_model(self):
"""加载模型和分词器"""
print(f"Loading tokenizer from: {self.base_model_path}")
self.tokenizer = AutoTokenizer.from_pretrained(
self.base_model_path,
use_fast=False,
trust_remote_code=True
)
if self.tokenizer.pad_token is None:
self.tokenizer.pad_token = self.tokenizer.eos_token
print(f"Loading base model from: {self.base_model_path}")
self.model = AutoModelForCausalLM.from_pretrained(
self.base_model_path,
device_map="auto",
torch_dtype=torch.bfloat16,
trust_remote_code=True
)
# 如果有LoRA模型则加载
if self.lora_model_path:
print(f"Loading LoRA weights from: {self.lora_model_path}")
self.model = PeftModel.from_pretrained(self.model, self.lora_model_path)
def generate_character_dialogue(
self,
character_name: str,
context: str = "",
user_input: str = "",
temperature: float = 0.8,
max_new_tokens: int = 150,
top_p: float = 0.9
) -> str:
"""
生成指定角色的对话
Args:
character_name: 角色名称
context: 对话上下文
user_input: 用户输入/触发内容
temperature: 采样温度
max_new_tokens: 最大生成token数
top_p: 核采样参数
Returns:
生成的对话内容
"""
if character_name not in self.character_profiles:
raise ValueError(f"Unknown character: {character_name}")
profile = self.character_profiles[character_name]
# 构建系统提示
system_prompt = self._build_system_prompt(profile, context)
# 构建用户输入
if not user_input:
user_input = "请说一段符合你角色设定的话。"
# 准备消息
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_input}
]
# 应用对话模板
inputs = self.tokenizer.apply_chat_template(
messages,
add_generation_prompt=True,
tokenize=True,
return_tensors="pt",
return_dict=True,
enable_thinking=False
)
# 移动到设备
inputs = {k: v.to(self.model.device) for k, v in inputs.items()}
# 生成对话
with torch.no_grad():
outputs = self.model.generate(
**inputs,
max_new_tokens=max_new_tokens,
do_sample=True,
temperature=temperature,
top_p=top_p,
pad_token_id=self.tokenizer.eos_token_id,
repetition_penalty=1.1
)
# 解码输出
response = outputs[0][inputs['input_ids'].shape[1]:]
dialogue = self.tokenizer.decode(response, skip_special_tokens=True).strip()
return dialogue
def _build_system_prompt(self, profile: Dict, context: str = "") -> str:
"""构建系统提示"""
personality_str = "".join(profile["personality"])
speech_pattern_str = "".join(profile["speech_patterns"])
system_prompt = f"""你是游戏中的NPC角色{profile["name"]}{profile["title"]})。
角色背景:{profile["background"]}
性格特点:{personality_str}
说话风格:{speech_pattern_str}
请严格按照这个角色的设定来回应,保持角色的一致性和独特性。"""
if context:
system_prompt += f"\n\n当前情境:{context}"
return system_prompt
def generate_dialogue_conversation(self, character1: str, character2: str, topic: str, turns: int = 4) -> List[Dict]:
"""生成两个角色之间的对话
Args:
character1: 第一个角色
character2: 第二个角色
topic: 对话主题
turns: 对话轮数
Returns:
对话列表每个元素包含speaker和dialogue
"""
conversation = []
context = f"现在{character1}{character2}在讨论关于{topic}的话题。"
for turn in range(turns):
if turn % 2 == 0:
# character1 说话
speaker = character1
if turn == 0:
user_input = f"开始和{character2}讨论{topic}这个话题。"
else:
# 基于上一轮对话内容
last_dialogue = conversation[-1]["dialogue"]
user_input = f"{character2}刚才说:\"{last_dialogue}\"。请回应。"
else:
# character2 说话
speaker = character2
last_dialogue = conversation[-1]["dialogue"]
user_input = f"{character1}刚才说:\"{last_dialogue}\"。请回应。"
dialogue = self.generate_character_dialogue(
speaker, context, user_input, temperature=0.8
)
conversation.append({
"speaker": speaker,
"dialogue": dialogue
})
return conversation
def get_character_info(self, character_name: str) -> Dict:
"""获取角色信息"""
return self.character_profiles.get(character_name, {})
def list_available_characters(self) -> List[str]:
"""列出所有可用角色"""
return list(self.character_profiles.keys())
def main():
"""测试对话生成器"""
# 配置路径
base_model_path = '/mnt/g/Project02/AITrain/Qwen/Qwen3-8B-AWQ'
lora_model_path = './output/NPC_Dialogue_LoRA/final_model' # 如果没有训练LoRA设为None
# 检查LoRA模型是否存在
import os
if not os.path.exists(lora_model_path):
print("LoRA模型不存在使用基础模型")
lora_model_path = None
# 创建对话生成器
generator = NPCDialogueGenerator(base_model_path, lora_model_path)
print("=== 游戏NPC角色对话生成器 ===")
print(f"可用角色:{', '.join(generator.list_available_characters())}")
# 测试单个角色对话生成
print("\n=== 单角色对话测试 ===")
test_scenarios = [
{
"character": "克莱恩",
"context": "玩家向你咨询神秘学知识",
"input": "请告诉我一些关于灵界的注意事项。"
},
{
"character": "阿兹克",
"context": "学生遇到了修炼瓶颈",
"input": "导师,我在修炼中遇到了困难。"
},
{
"character": "塔利姆",
"context": "在俱乐部偶遇老朋友",
"input": "好久不见,最近怎么样?"
}
]
for scenario in test_scenarios:
print(f"\n--- {scenario['character']} ---")
print(f"情境:{scenario['context']}")
print(f"输入:{scenario['input']}")
dialogue = generator.generate_character_dialogue(
scenario["character"],
scenario["context"],
scenario["input"]
)
print(f"回复:{dialogue}")
# 测试角色间对话
print("\n=== 角色间对话测试 ===")
conversation = generator.generate_dialogue_conversation(
"克莱恩", "塔利姆", "最近遇到的神秘事件", turns=4
)
for turn in conversation:
print(f"{turn['speaker']}{turn['dialogue']}")
# 交互式对话模式
print("\n=== 交互式对话模式 ===")
print("输入格式:角色名 上下文 用户输入")
print("例如:克莱恩 在俱乐部 请给我一些建议")
print("输入'quit'退出")
while True:
try:
user_command = input("\n请输入指令: ").strip()
if user_command.lower() == 'quit':
break
parts = user_command.split(' ', 2)
if len(parts) < 2:
print("格式错误,请使用:角色名 上下文 [用户输入]")
continue
character = parts[0]
context = parts[1]
user_input = parts[2] if len(parts) > 2 else ""
if character not in generator.list_available_characters():
print(f"未知角色:{character}")
print(f"可用角色:{', '.join(generator.list_available_characters())}")
continue
dialogue = generator.generate_character_dialogue(
character, context, user_input
)
print(f"\n{character}{dialogue}")
except KeyboardInterrupt:
break
except Exception as e:
print(f"生成对话时出错:{e}")
print("\n对话生成器已退出")
if __name__ == '__main__':
main()