440 lines
18 KiB
Python
440 lines
18 KiB
Python
#!/usr/bin/env python
|
||
# -*- coding: utf-8 -*-
|
||
'''
|
||
游戏NPC角色对话生成器
|
||
基于微调后的LoRA模型生成角色对话
|
||
'''
|
||
|
||
import torch
|
||
import json
|
||
import random
|
||
from peft import PeftModel
|
||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||
from typing import Dict, List, Optional
|
||
import platform
|
||
|
||
# Windows multiprocessing兼容性修复
|
||
if platform.system() == "Windows":
|
||
import multiprocessing
|
||
multiprocessing.set_start_method('spawn', force=True)
|
||
|
||
class NPCDialogueGenerator:
|
||
def __init__(self, base_model_path: str, lora_model_path: Optional[str] = None, external_character_data: Optional[Dict] = None):
|
||
"""
|
||
初始化NPC对话生成器
|
||
|
||
Args:
|
||
base_model_path: 基础模型路径
|
||
lora_model_path: LoRA模型路径(可选)
|
||
external_character_data: 外部角色数据(可选,优先使用)
|
||
"""
|
||
self.base_model_path = base_model_path
|
||
self.lora_model_path = lora_model_path
|
||
self.model = None
|
||
self.tokenizer = None
|
||
|
||
# 优先使用外部角色数据,如果没有则使用内置数据
|
||
if external_character_data:
|
||
self.character_profiles = self._process_external_character_data(external_character_data)
|
||
print(f"✓ 使用外部角色数据: {list(self.character_profiles.keys())}")
|
||
# else:
|
||
# self.character_profiles = self._load_character_profiles()
|
||
# print(f"✓ 使用内置角色数据: {list(self.character_profiles.keys())}")
|
||
|
||
self._load_model()
|
||
|
||
def _process_external_character_data(self, external_data: Dict) -> Dict:
|
||
"""
|
||
处理外部角色数据,转换为对话生成器可用的格式
|
||
|
||
Args:
|
||
external_data: 来自knowledge_base的角色数据
|
||
|
||
Returns:
|
||
处理后的角色数据字典
|
||
"""
|
||
processed_profiles = {}
|
||
|
||
for char_name, char_data in external_data.items():
|
||
# 提取基本信息
|
||
basic_info = char_data.get('basic_info', {})
|
||
personality = char_data.get('personality', {})
|
||
background = char_data.get('background', {})
|
||
skills = char_data.get('skills_and_abilities', {})
|
||
speech_patterns = char_data.get('speech_patterns', {})
|
||
|
||
# 构建角色画像
|
||
profile = {
|
||
"name": char_data.get('character_name', char_name),
|
||
"title": basic_info.get('occupation', '未知'),
|
||
"personality": personality.get('core_traits', []) + personality.get('strengths', []),
|
||
"background": background.get('childhood', '') + ' ' + background.get('education', ''),
|
||
"speech_patterns": speech_patterns.get('vocabulary', []) + speech_patterns.get('tone', []),
|
||
"sample_dialogues": self._generate_sample_dialogues(char_data),
|
||
# 保存完整数据供高级功能使用
|
||
"full_data": char_data
|
||
}
|
||
|
||
processed_profiles[char_name] = profile
|
||
|
||
return processed_profiles
|
||
|
||
def _generate_sample_dialogues(self, char_data: Dict) -> List[str]:
|
||
"""
|
||
基于角色数据生成示例对话
|
||
|
||
Args:
|
||
char_data: 角色数据
|
||
|
||
Returns:
|
||
示例对话列表
|
||
"""
|
||
# 这里可以根据角色的性格、背景等生成更合适的示例对话
|
||
# 暂时返回一些通用的示例
|
||
basic_info = char_data.get('basic_info', {})
|
||
occupation = basic_info.get('occupation', '角色')
|
||
|
||
if '侦探' in occupation or '调查员' in occupation:
|
||
return [
|
||
"我需要仔细分析这个案件。",
|
||
"每个细节都可能很重要。",
|
||
"让我重新梳理一下线索。"
|
||
]
|
||
elif '教授' in occupation or '博士' in occupation:
|
||
return [
|
||
"根据我的研究,这个现象很特殊。",
|
||
"我们需要更谨慎地处理这个问题。",
|
||
"知识就是力量,但也要小心使用。"
|
||
]
|
||
else:
|
||
return [
|
||
"我遇到了一些困难。",
|
||
"请帮帮我。",
|
||
"这太奇怪了。"
|
||
]
|
||
|
||
# def _load_character_profiles(self) -> Dict:
|
||
# """加载角色画像数据"""
|
||
# return {
|
||
# "维多利亚·布莱克伍德": {
|
||
# "name": "维多利亚·布莱克伍德",
|
||
# "title": "神秘学专家",
|
||
# "personality": ["理性分析", "谨慎小心", "实用主义", "思维缜密"],
|
||
# "background": "拥有丰富神秘学知识和战斗经验的侦探,既是非凡者也是夏洛克·莫里亚蒂",
|
||
# "speech_patterns": ["会使用专业术语", "经常进行逻辑分析", "对危险保持警告", "内心独白较多"],
|
||
# "sample_dialogues": [
|
||
# "好奇往往是导致死亡的主要因素。",
|
||
# "总之,我的任务到此为止。",
|
||
# "这需要仔细分析才能得出结论。"
|
||
# ]
|
||
# },
|
||
# "阿奇博尔德·韦恩博士": {
|
||
# "name": "阿奇博尔德·韦恩博士",
|
||
# "title": "神秘学导师",
|
||
# "personality": ["沉稳睿智", "言简意赅", "关怀学生", "经验丰富"],
|
||
# "background": "神秘学领域的资深专家,经验极其丰富的导师,知识渊博",
|
||
# "speech_patterns": ["话语简练但信息量大", "给予实用指导", "语调平和但权威", "关心但保持距离"],
|
||
# "sample_dialogues": [
|
||
# "耐心是修炼的基础。",
|
||
# "不要急于求成,稳扎稳打比什么都重要。",
|
||
# "这种情况需要格外小心。"
|
||
# ]
|
||
# },
|
||
# "塔利姆": {
|
||
# "name": "塔利姆",
|
||
# "title": "文雅绅士",
|
||
# "personality": ["礼貌尊敬", "有文化素养", "寻求帮助", "温和友善"],
|
||
# "background": "受过良好教育的普通人,有一定的文学修养,遇到困难时会寻求专家帮助",
|
||
# "speech_patterns": ["使用礼貌称谓", "表达困惑时措辞文雅", "会引用文学作品", "语气温和"],
|
||
# "sample_dialogues": [
|
||
# "噢,尊敬的大侦探,你最近在忙碌什么?",
|
||
# "这不是《罗密欧与朱丽叶》的故事!",
|
||
# "我有个朋友遇到了困难..."
|
||
# ]
|
||
# },
|
||
# "艾伦": {
|
||
# "name": "艾伦",
|
||
# "title": "困扰的求助者",
|
||
# "personality": ["焦虑不安", "详细描述", "半信半疑", "急需帮助"],
|
||
# "background": "普通人,但最近遭遇了一系列神秘的厄运事件,怀疑受到诅咒",
|
||
# "speech_patterns": ["情绪紧张", "会详细描述遭遇", "语气急切", "表现出恐惧"],
|
||
# "sample_dialogues": [
|
||
# "最近我总是遭遇各种厄运...",
|
||
# "我怀疑是不是受到了什么诅咒。",
|
||
# "请帮帮我,我不知道该怎么办!"
|
||
# ]
|
||
# },
|
||
# "戴莉.西蒙妮": {
|
||
# "name": "戴莉·西蒙妮",
|
||
# "title": "专业调查员",
|
||
# "personality": ["专业简洁", "直接明确", "严谨认真", "目标导向"],
|
||
# "background": "负责调查神秘事件的专业人员,办事效率高,问题直接",
|
||
# "speech_patterns": ["问题直接明确", "语气专业", "注重事实", "简洁有力"],
|
||
# "sample_dialogues": [
|
||
# "请详细描述事件经过。",
|
||
# "有什么证据可以证明?",
|
||
# "这件事需要立即调查。"
|
||
# ]
|
||
# }
|
||
# }
|
||
|
||
def _load_model(self):
|
||
"""加载模型和分词器"""
|
||
print(f"Loading tokenizer from: {self.base_model_path}")
|
||
self.tokenizer = AutoTokenizer.from_pretrained(
|
||
self.base_model_path,
|
||
use_fast=False,
|
||
trust_remote_code=True
|
||
)
|
||
|
||
if self.tokenizer.pad_token is None:
|
||
self.tokenizer.pad_token = self.tokenizer.eos_token
|
||
|
||
print(f"Loading base model from: {self.base_model_path}")
|
||
self.model = AutoModelForCausalLM.from_pretrained(
|
||
self.base_model_path,
|
||
device_map="auto",
|
||
torch_dtype=torch.bfloat16,
|
||
trust_remote_code=True
|
||
)
|
||
|
||
# 如果有LoRA模型,则加载
|
||
if self.lora_model_path:
|
||
print(f"Loading LoRA weights from: {self.lora_model_path}")
|
||
self.model = PeftModel.from_pretrained(self.model, self.lora_model_path)
|
||
|
||
def generate_character_dialogue(
|
||
self,
|
||
character_name: str,
|
||
context: str = "",
|
||
user_input: str = "",
|
||
temperature: float = 0.8,
|
||
max_new_tokens: int = 150,
|
||
top_p: float = 0.9
|
||
) -> str:
|
||
"""
|
||
生成指定角色的对话
|
||
|
||
Args:
|
||
character_name: 角色名称
|
||
context: 对话上下文
|
||
user_input: 用户输入/触发内容
|
||
temperature: 采样温度
|
||
max_new_tokens: 最大生成token数
|
||
top_p: 核采样参数
|
||
|
||
Returns:
|
||
生成的对话内容
|
||
"""
|
||
if character_name not in self.character_profiles:
|
||
raise ValueError(f"Unknown character: {character_name}")
|
||
|
||
profile = self.character_profiles[character_name]
|
||
|
||
# 构建系统提示
|
||
system_prompt = self._build_system_prompt(profile, context)
|
||
|
||
# 构建用户输入
|
||
if not user_input:
|
||
user_input = "请说一段符合你角色设定的话。"
|
||
|
||
# 准备消息
|
||
messages = [
|
||
{"role": "system", "content": system_prompt},
|
||
{"role": "user", "content": user_input}
|
||
]
|
||
|
||
# 应用对话模板
|
||
inputs = self.tokenizer.apply_chat_template(
|
||
messages,
|
||
add_generation_prompt=True,
|
||
tokenize=True,
|
||
return_tensors="pt",
|
||
return_dict=True,
|
||
enable_thinking=False
|
||
)
|
||
|
||
# 移动到设备
|
||
inputs = {k: v.to(self.model.device) for k, v in inputs.items()}
|
||
|
||
# 生成对话
|
||
with torch.no_grad():
|
||
outputs = self.model.generate(
|
||
**inputs,
|
||
max_new_tokens=max_new_tokens,
|
||
do_sample=True,
|
||
temperature=temperature,
|
||
top_p=top_p,
|
||
pad_token_id=self.tokenizer.eos_token_id,
|
||
repetition_penalty=1.1
|
||
)
|
||
|
||
# 解码输出
|
||
response = outputs[0][inputs['input_ids'].shape[1]:]
|
||
dialogue = self.tokenizer.decode(response, skip_special_tokens=True).strip()
|
||
|
||
return dialogue
|
||
|
||
def _build_system_prompt(self, profile: Dict, context: str = "") -> str:
|
||
"""构建系统提示"""
|
||
personality_str = "、".join(profile["personality"])
|
||
speech_pattern_str = ";".join(profile["speech_patterns"])
|
||
|
||
system_prompt = f"""你是游戏中的NPC角色{profile["name"]}({profile["title"]})。
|
||
角色背景:{profile["background"]}
|
||
性格特点:{personality_str}
|
||
说话风格:{speech_pattern_str}
|
||
请严格按照这个角色的设定来回应,保持角色的一致性和独特性。"""
|
||
if context:
|
||
system_prompt += f"\n\n当前情境:{context}"
|
||
return system_prompt
|
||
|
||
def generate_dialogue_conversation(self, character1: str, character2: str, topic: str, turns: int = 4) -> List[Dict]:
|
||
"""生成两个角色之间的对话
|
||
|
||
Args:
|
||
character1: 第一个角色
|
||
character2: 第二个角色
|
||
topic: 对话主题
|
||
turns: 对话轮数
|
||
|
||
Returns:
|
||
对话列表,每个元素包含speaker和dialogue
|
||
"""
|
||
conversation = []
|
||
context = f"现在{character1}和{character2}在讨论关于{topic}的话题。"
|
||
|
||
for turn in range(turns):
|
||
if turn % 2 == 0:
|
||
# character1 说话
|
||
speaker = character1
|
||
if turn == 0:
|
||
user_input = f"开始和{character2}讨论{topic}这个话题。"
|
||
else:
|
||
# 基于上一轮对话内容
|
||
last_dialogue = conversation[-1]["dialogue"]
|
||
user_input = f"{character2}刚才说:\"{last_dialogue}\"。请回应。"
|
||
else:
|
||
# character2 说话
|
||
speaker = character2
|
||
last_dialogue = conversation[-1]["dialogue"]
|
||
user_input = f"{character1}刚才说:\"{last_dialogue}\"。请回应。"
|
||
|
||
dialogue = self.generate_character_dialogue(
|
||
speaker, context, user_input, temperature=0.8
|
||
)
|
||
|
||
conversation.append({
|
||
"speaker": speaker,
|
||
"dialogue": dialogue
|
||
})
|
||
|
||
return conversation
|
||
|
||
def get_character_info(self, character_name: str) -> Dict:
|
||
"""获取角色信息"""
|
||
return self.character_profiles.get(character_name, {})
|
||
|
||
def list_available_characters(self) -> List[str]:
|
||
"""列出所有可用角色"""
|
||
return list(self.character_profiles.keys())
|
||
|
||
def main():
|
||
"""测试对话生成器"""
|
||
# 配置路径
|
||
base_model_path = '/mnt/g/Project02/AITrain/Qwen/Qwen3-8B-AWQ'
|
||
lora_model_path = './output/NPC_Dialogue_LoRA/final_model' # 如果没有训练LoRA,设为None
|
||
|
||
# 检查LoRA模型是否存在
|
||
import os
|
||
if not os.path.exists(lora_model_path):
|
||
print("LoRA模型不存在,使用基础模型")
|
||
lora_model_path = None
|
||
|
||
# 创建对话生成器
|
||
generator = NPCDialogueGenerator(base_model_path, lora_model_path)
|
||
|
||
print("=== 游戏NPC角色对话生成器 ===")
|
||
print(f"可用角色:{', '.join(generator.list_available_characters())}")
|
||
|
||
# 测试单个角色对话生成
|
||
print("\n=== 单角色对话测试 ===")
|
||
test_scenarios = [
|
||
{
|
||
"character": "克莱恩",
|
||
"context": "玩家向你咨询神秘学知识",
|
||
"input": "请告诉我一些关于灵界的注意事项。"
|
||
},
|
||
{
|
||
"character": "阿兹克",
|
||
"context": "学生遇到了修炼瓶颈",
|
||
"input": "导师,我在修炼中遇到了困难。"
|
||
},
|
||
{
|
||
"character": "塔利姆",
|
||
"context": "在俱乐部偶遇老朋友",
|
||
"input": "好久不见,最近怎么样?"
|
||
}
|
||
]
|
||
|
||
for scenario in test_scenarios:
|
||
print(f"\n--- {scenario['character']} ---")
|
||
print(f"情境:{scenario['context']}")
|
||
print(f"输入:{scenario['input']}")
|
||
|
||
dialogue = generator.generate_character_dialogue(
|
||
scenario["character"],
|
||
scenario["context"],
|
||
scenario["input"]
|
||
)
|
||
print(f"回复:{dialogue}")
|
||
|
||
# 测试角色间对话
|
||
print("\n=== 角色间对话测试 ===")
|
||
conversation = generator.generate_dialogue_conversation(
|
||
"克莱恩", "塔利姆", "最近遇到的神秘事件", turns=4
|
||
)
|
||
|
||
for turn in conversation:
|
||
print(f"{turn['speaker']}:{turn['dialogue']}")
|
||
|
||
# 交互式对话模式
|
||
print("\n=== 交互式对话模式 ===")
|
||
print("输入格式:角色名 上下文 用户输入")
|
||
print("例如:克莱恩 在俱乐部 请给我一些建议")
|
||
print("输入'quit'退出")
|
||
|
||
while True:
|
||
try:
|
||
user_command = input("\n请输入指令: ").strip()
|
||
if user_command.lower() == 'quit':
|
||
break
|
||
|
||
parts = user_command.split(' ', 2)
|
||
if len(parts) < 2:
|
||
print("格式错误,请使用:角色名 上下文 [用户输入]")
|
||
continue
|
||
|
||
character = parts[0]
|
||
context = parts[1]
|
||
user_input = parts[2] if len(parts) > 2 else ""
|
||
|
||
if character not in generator.list_available_characters():
|
||
print(f"未知角色:{character}")
|
||
print(f"可用角色:{', '.join(generator.list_available_characters())}")
|
||
continue
|
||
|
||
dialogue = generator.generate_character_dialogue(
|
||
character, context, user_input
|
||
)
|
||
print(f"\n{character}:{dialogue}")
|
||
|
||
except KeyboardInterrupt:
|
||
break
|
||
except Exception as e:
|
||
print(f"生成对话时出错:{e}")
|
||
|
||
print("\n对话生成器已退出")
|
||
|
||
if __name__ == '__main__':
|
||
main() |