This commit is contained in:
Longmao 2025-08-22 10:43:31 +08:00
commit 350b207527
16 changed files with 16921 additions and 399 deletions

BIN
AITrain/coc.pdf Normal file

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -64,14 +64,39 @@ class RAGKnowledgeBase:
def _load_knowledge_base(self): def _load_knowledge_base(self):
"""加载知识库""" """加载知识库"""
# 加载世界观 # 优先加载RAG知识库作为世界观
worldview_files = [f for f in os.listdir(self.knowledge_dir) rag_worldview_path = "./rag_knowledge/knowledge_base.json"
if f.startswith('worldview') and f.endswith('.json')] if os.path.exists(rag_worldview_path):
if worldview_files: try:
worldview_path = os.path.join(self.knowledge_dir, worldview_files[0]) with open(rag_worldview_path, 'r', encoding='utf-8') as f:
with open(worldview_path, 'r', encoding='utf-8') as f: rag_data = json.load(f)
self.worldview_data = json.load(f) # 从RAG数据中提取世界观信息
print(f"✓ 世界观加载成功: {self.worldview_data.get('worldview_name', '未知')}") self.worldview_data = {
"worldview_name": "克苏鲁神话世界观 (RAG)",
"source": rag_data.get("metadata", {}).get("source_file", "未知"),
"description": f"基于{rag_data.get('metadata', {}).get('source_file', 'PDF文档')}的RAG知识库",
"total_chunks": rag_data.get("metadata", {}).get("total_chunks", 0),
"total_concepts": rag_data.get("metadata", {}).get("total_concepts", 0),
"rag_enabled": True
}
# 保存RAG数据用于检索
self.rag_chunks = rag_data.get("chunks", [])
print(f"✓ RAG世界观加载成功: {self.worldview_data['worldview_name']}")
print(f" - 文档块数: {self.worldview_data['total_chunks']}")
print(f" - 概念数: {self.worldview_data['total_concepts']}")
except Exception as e:
print(f"✗ RAG世界观加载失败: {e}")
self.rag_chunks = []
# 如果没有RAG知识库则加载传统世界观文件
if not hasattr(self, 'rag_chunks') or not self.rag_chunks:
worldview_files = [f for f in os.listdir(self.knowledge_dir)
if f.startswith('worldview') and f.endswith('.json')]
if worldview_files:
worldview_path = os.path.join(self.knowledge_dir, worldview_files[0])
with open(worldview_path, 'r', encoding='utf-8') as f:
self.worldview_data = json.load(f)
print(f"✓ 传统世界观加载成功: {self.worldview_data.get('worldview_name', '未知')}")
# 加载角色数据 # 加载角色数据
character_files = [f for f in os.listdir(self.knowledge_dir) character_files = [f for f in os.listdir(self.knowledge_dir)
@ -96,21 +121,38 @@ class RAGKnowledgeBase:
"""构建可检索的文本块""" """构建可检索的文本块"""
self.chunks = [] self.chunks = []
# 世界观相关文本块 # 优先使用RAG知识库的文本块
if self.worldview_data: if hasattr(self, 'rag_chunks') and self.rag_chunks:
for section_key, section_data in self.worldview_data.items(): for rag_chunk in self.rag_chunks:
if isinstance(section_data, dict): self.chunks.append({
for sub_key, sub_data in section_data.items(): "type": "worldview_rag",
if isinstance(sub_data, (str, list)): "section": "rag_knowledge",
content = str(sub_data) "subsection": rag_chunk.get("type", "unknown"),
if len(content) > 50: # 只保留有意义的文本 "content": rag_chunk.get("content", ""),
self.chunks.append({ "metadata": {
"type": "worldview", "source": "rag_worldview",
"section": section_key, "chunk_id": rag_chunk.get("id", ""),
"subsection": sub_key, "size": rag_chunk.get("size", 0),
"content": content, "hash": rag_chunk.get("hash", "")
"metadata": {"source": "worldview"} }
}) })
print(f"✓ 使用RAG知识库文本块: {len(self.rag_chunks)}")
else:
# 传统世界观相关文本块
if self.worldview_data:
for section_key, section_data in self.worldview_data.items():
if isinstance(section_data, dict):
for sub_key, sub_data in section_data.items():
if isinstance(sub_data, (str, list)):
content = str(sub_data)
if len(content) > 50: # 只保留有意义的文本
self.chunks.append({
"type": "worldview",
"section": section_key,
"subsection": sub_key,
"content": content,
"metadata": {"source": "worldview"}
})
# 角色相关文本块 # 角色相关文本块
for char_name, char_data in self.character_data.items(): for char_name, char_data in self.character_data.items():
@ -134,6 +176,18 @@ class RAGKnowledgeBase:
def _build_vector_index(self): def _build_vector_index(self):
"""构建向量索引""" """构建向量索引"""
try: try:
# 优先使用RAG知识库的预构建向量索引
rag_vector_path = "./rag_knowledge/vector_index.faiss"
rag_embeddings_path = "./rag_knowledge/embeddings.npy"
if os.path.exists(rag_vector_path) and os.path.exists(rag_embeddings_path):
# 加载预构建的向量索引
self.index = faiss.read_index(rag_vector_path)
self.rag_embeddings = np.load(rag_embeddings_path)
print(f"✓ 使用RAG预构建向量索引: {self.index.ntotal}个向量")
return
# 如果没有预构建的向量索引,则重新构建
texts = [chunk["content"] for chunk in self.chunks] texts = [chunk["content"] for chunk in self.chunks]
embeddings = self.embedding_model.encode(texts) embeddings = self.embedding_model.encode(texts)
@ -152,14 +206,26 @@ class RAGKnowledgeBase:
# 向量搜索 # 向量搜索
if EMBEDDING_AVAILABLE and self.embedding_model and self.index: if EMBEDDING_AVAILABLE and self.embedding_model and self.index:
try: try:
query_vector = self.embedding_model.encode([query]) # 如果使用RAG预构建向量索引直接搜索
distances, indices = self.index.search(query_vector.astype(np.float32), top_k * 2) if hasattr(self, 'rag_embeddings'):
query_vector = self.embedding_model.encode([query])
distances, indices = self.index.search(query_vector.astype(np.float32), top_k * 2)
for distance, idx in zip(distances[0], indices[0]): for distance, idx in zip(distances[0], indices[0]):
if idx < len(self.chunks): if idx < len(self.chunks):
chunk = self.chunks[idx].copy() chunk = self.chunks[idx].copy()
chunk["relevance_score"] = float(1 / (1 + distance)) chunk["relevance_score"] = float(1 / (1 + distance))
relevant_chunks.append(chunk) relevant_chunks.append(chunk)
else:
# 传统向量搜索
query_vector = self.embedding_model.encode([query])
distances, indices = self.index.search(query_vector.astype(np.float32), top_k * 2)
for distance, idx in zip(distances[0], indices[0]):
if idx < len(self.chunks):
chunk = self.chunks[idx].copy()
chunk["relevance_score"] = float(1 / (1 + distance))
relevant_chunks.append(chunk)
except Exception as e: except Exception as e:
print(f"向量搜索失败: {e}") print(f"向量搜索失败: {e}")
@ -317,8 +383,17 @@ class DualAIDialogueEngine:
self.conv_mgr = conversation_manager self.conv_mgr = conversation_manager
self.llm_generator = llm_generator self.llm_generator = llm_generator
def generate_character_prompt(self, character_name: str, context_info: List[Dict], dialogue_history: List[DialogueTurn]) -> str: def generate_character_prompt(self, character_name: str, context_info: List[Dict], dialogue_history: List[DialogueTurn],
"""为角色生成对话提示""" history_context_count: int = 3, context_info_count: int = 2) -> str:
"""为角色生成对话提示
Args:
character_name: 角色名称
context_info: 相关上下文信息
dialogue_history: 对话历史
history_context_count: 使用的历史对话轮数默认3轮
context_info_count: 使用的上下文信息数量默认2个
"""
char_data = self.kb.character_data.get(character_name, {}) char_data = self.kb.character_data.get(character_name, {})
# 基础角色设定 # 基础角色设定
@ -338,56 +413,86 @@ class DualAIDialogueEngine:
situation = char_data['current_situation'] situation = char_data['current_situation']
prompt_parts.append(f"当前状态:{situation.get('current_mood', '')}") prompt_parts.append(f"当前状态:{situation.get('current_mood', '')}")
# 相关世界观信息 # 相关世界观信息(可控制数量)
if context_info: if context_info:
prompt_parts.append("相关背景信息:") prompt_parts.append("相关背景信息:")
for info in context_info[:2]: # 只使用最相关的2个信息 for info in context_info[:context_info_count]:
content = info['content'][:200] + "..." if len(info['content']) > 200 else info['content'] content = info['content'][:200] + "..." if len(info['content']) > 200 else info['content']
prompt_parts.append(f"- {content}") prompt_parts.append(f"- {content}")
# 对话历史 # 对话历史(可控制数量)
if dialogue_history: if dialogue_history:
prompt_parts.append("最近的对话:") prompt_parts.append("最近的对话:")
for turn in dialogue_history[-3:]: # 只使用最近的3轮对话 # 使用参数控制历史对话轮数
history_to_use = dialogue_history[-history_context_count:] if history_context_count > 0 else []
for turn in history_to_use:
prompt_parts.append(f"{turn.speaker}: {turn.content}") prompt_parts.append(f"{turn.speaker}: {turn.content}")
prompt_parts.append("\n请根据角色设定和上下文生成符合角色特点的自然对话。回复应该在50-150字之间。") prompt_parts.append("\n请根据角色设定和上下文生成符合角色特点的自然对话。回复应该在50-150字之间。")
return "\n".join(prompt_parts) return "\n".join(prompt_parts)
def generate_dialogue(self, session_id: str, current_speaker: str, topic_hint: str = "") -> Tuple[str, List[str]]: def generate_dialogue(self, session_id: str, current_speaker: str, topic_hint: str = "",
"""生成角色对话""" history_context_count: int = 3, context_info_count: int = 2) -> Tuple[str, List[str]]:
"""生成角色对话
Args:
session_id: 会话ID
current_speaker: 当前说话者
topic_hint: 话题提示
history_context_count: 使用的历史对话轮数默认3轮
context_info_count: 使用的上下文信息数量默认2个
"""
# 获取对话历史 # 获取对话历史
dialogue_history = self.conv_mgr.get_conversation_history(session_id) dialogue_history = self.conv_mgr.get_conversation_history(session_id)
# 构建搜索查询 # 构建搜索查询
if dialogue_history: if dialogue_history:
# 基于最近的对话内容 # 基于最近的对话内容(可控制数量)
recent_content = " ".join([turn.content for turn in dialogue_history[-2:]]) recent_turns = dialogue_history[-history_context_count:] if history_context_count > 0 else []
recent_content = " ".join([turn.content for turn in recent_turns])
search_query = recent_content + " " + topic_hint search_query = recent_content + " " + topic_hint
else: else:
# 首次对话 # 首次对话
search_query = f"{current_speaker} {topic_hint} introduction greeting" search_query = f"{current_speaker} {topic_hint} introduction greeting"
# 搜索相关上下文 # 搜索相关上下文
context_info = self.kb.search_relevant_context(search_query, current_speaker, 10) context_info = self.kb.search_relevant_context(search_query, current_speaker, context_info_count)
# 生成提示 # 生成提示(使用参数控制上下文数量)
prompt = self.generate_character_prompt(current_speaker, context_info, dialogue_history) prompt = self.generate_character_prompt(
current_speaker,
context_info,
dialogue_history,
history_context_count,
context_info_count
)
# 生成对话 # 生成对话 - 使用双模型系统
try: try:
response = self.llm_generator.generate_character_dialogue( # 检查是否为双模型对话系统
current_speaker, if hasattr(self.llm_generator, 'generate_dual_character_dialogue'):
prompt, # 使用双模型系统
topic_hint or "请继续对话", response = self.llm_generator.generate_dual_character_dialogue(
temperature=0.8, current_speaker,
max_new_tokens=150 prompt,
) topic_hint or "请继续对话",
temperature=0.8,
max_new_tokens=150
)
else:
# 兼容旧的单模型系统
response = self.llm_generator.generate_character_dialogue(
current_speaker,
prompt,
topic_hint or "请继续对话",
temperature=0.8,
max_new_tokens=150
)
# 记录使用的上下文 # 记录使用的上下文
context_used = [f"{info['section']}.{info['subsection']}" for info in context_info] context_used = [f"{info['section']}.{info['subsection']}" for info in context_info[:context_info_count]]
avg_relevance = sum(info['relevance_score'] for info in context_info) / len(context_info) if context_info else 0.0 avg_relevance = sum(info['relevance_score'] for info in context_info[:context_info_count]) / len(context_info[:context_info_count]) if context_info else 0.0
# 保存对话轮次 # 保存对话轮次
self.conv_mgr.add_dialogue_turn( self.conv_mgr.add_dialogue_turn(
@ -400,191 +505,345 @@ class DualAIDialogueEngine:
print(f"✗ 对话生成失败: {e}") print(f"✗ 对话生成失败: {e}")
return f"[{current_speaker}暂时无法回应]", [] return f"[{current_speaker}暂时无法回应]", []
def run_conversation_turn(self, session_id: str, characters: List[str], turns_count: int = 1, topic: str = ""): def run_conversation_turn(self, session_id: str, characters: List[str], turns_count: int = 1, topic: str = "",
"""运行对话轮次""" history_context_count: int = 3, context_info_count: int = 2):
results = [] """运行对话轮次
Args:
session_id: 会话ID
characters: 角色列表
turns_count: 对话轮数
topic: 对话主题
history_context_count: 使用的历史对话轮数默认3轮
context_info_count: 使用的上下文信息数量默认2个
"""
results = []
print(f" [上下文设置: 历史{history_context_count}轮, 信息{context_info_count}个]")
for i in range(turns_count): for i in range(turns_count):
for char in characters: for char in characters:
response, context_used = self.generate_dialogue(session_id, char, topic) response, context_used = self.generate_dialogue(
session_id,
char,
topic,
history_context_count,
context_info_count
)
results.append({ results.append({
"speaker": char, "speaker": char,
"content": response, "content": response,
"context_used": context_used, "context_used": context_used,
"turn": i + 1 "turn": i + 1,
"context_settings": {
"history_count": history_context_count,
"context_info_count": context_info_count
}
}) })
print(f"{char}: {response}") print(f"{char}: {response}")
if context_used: # if context_used:
print(f" [使用上下文: {', '.join(context_used)}]") # print(f" [使用上下文: {', '.join(context_used)}]")
print() print()
return results return results
def main(): def run_dual_model_conversation(self, session_id: str, topic: str = "", turns: int = 4,
"""主函数 - 演示系统使用""" history_context_count: int = 3, context_info_count: int = 2):
print("=== RAG增强双AI角色对话系统 ===") """使用双模型系统运行对话
# 设置路径 Args:
knowledge_dir = "./knowledge_base" # 包含世界观和角色文档的目录 session_id: 会话ID
topic: 对话主题
turns: 对话轮数
history_context_count: 使用的历史对话轮数
context_info_count: 使用的上下文信息数量
"""
# 检查是否为双模型对话系统
if not hasattr(self.llm_generator, 'run_dual_character_conversation'):
print("⚠ 当前系统不支持双模型对话")
return self.run_conversation_turn(session_id, self.llm_generator.list_characters(), turns, topic,
history_context_count, context_info_count)
# 检查必要文件 # 获取对话历史
required_dirs = [knowledge_dir] dialogue_history = self.conv_mgr.get_conversation_history(session_id)
for dir_path in required_dirs:
if not os.path.exists(dir_path):
print(f"✗ 目录不存在: {dir_path}")
print("请确保以下文件存在:")
print("- ./knowledge_base/worldview_template_coc.json")
print("- ./knowledge_base/character_template_detective.json")
print("- ./knowledge_base/character_template_professor.json")
return
try: # 构建上下文信息
# 初始化系统组件 if dialogue_history:
print("\n初始化系统...") recent_turns = dialogue_history[-history_context_count:] if history_context_count > 0 else []
kb = RAGKnowledgeBase(knowledge_dir) recent_content = " ".join([turn.content for turn in recent_turns])
conv_mgr = ConversationManager() search_query = recent_content + " " + topic
else:
search_query = f"{topic} introduction greeting"
# 这里需要你的LLM生成器使用现有的NPCDialogueGenerator # 搜索相关上下文
from npc_dialogue_generator import NPCDialogueGenerator context_info = self.kb.search_relevant_context(search_query, top_k=context_info_count)
base_model_path = '/mnt/g/Project02/AITrain/Qwen/Qwen3-4B' # 根据你的路径调整
lora_model_path = './output/NPC_Dialogue_LoRA/final_model'
if not os.path.exists(lora_model_path): # 构建上下文字符串
lora_model_path = None context_str = ""
if context_info:
context_str = "相关背景信息:"
for info in context_info[:context_info_count]:
content = info['content'][:150] + "..." if len(info['content']) > 150 else info['content']
context_str += f"\n- {content}"
llm_generator = NPCDialogueGenerator(base_model_path, lora_model_path) print(f"\n=== 双模型对话系统 ===")
print(f"主题: {topic}")
print(f"角色: {', '.join(self.llm_generator.list_characters())}")
print(f"轮数: {turns}")
print(f"上下文设置: 历史{history_context_count}轮, 信息{context_info_count}")
# 创建对话引擎 # 使用双模型系统生成对话
dialogue_engine = DualAIDialogueEngine(kb, conv_mgr, llm_generator) for turn in range(turns):
# 获取对话历史
dialogue_history = self.conv_mgr.get_conversation_history(session_id)
conversation_results = self.llm_generator.run_dual_character_conversation(
topic=topic,
turn_index = turn,
context=context_str,
dialogue_history = dialogue_history,
history_context_count = history_context_count,
max_new_tokens=150
)
print("✓ 系统初始化完成") # 保存对话到数据库
for result in conversation_results:
self.conv_mgr.add_dialogue_turn(
session_id,
result['speaker'],
result['dialogue'],
[result.get('context_used', '')],
0.8 # 默认相关性分数
)
# 交互式菜单
while True:
print("\n" + "="*50)
print("双AI角色对话系统")
print("1. 创建新对话")
print("2. 继续已有对话")
print("3. 查看对话历史")
print("4. 列出所有会话")
print("0. 退出")
print("="*50)
choice = input("请选择操作: ").strip() return conversation_results
if choice == '0': # def main():
break # """主函数 - 演示系统使用"""
# print("=== RAG增强双AI角色对话系统 ===")
elif choice == '1': # # 设置路径
# 创建新对话 # knowledge_dir = "./knowledge_base" # 包含世界观和角色文档的目录
print(f"可用角色: {list(kb.character_data.keys())}")
characters = input("请输入两个角色名称(用空格分隔): ").strip().split()
if len(characters) != 2: # # 检查必要文件
print("❌ 请输入正好两个角色名称") # required_dirs = [knowledge_dir]
continue # for dir_path in required_dirs:
# if not os.path.exists(dir_path):
# print(f"✗ 目录不存在: {dir_path}")
# print("请确保以下文件存在:")
# print("- ./knowledge_base/worldview_template_coc.json")
# print("- ./knowledge_base/character_template_detective.json")
# print("- ./knowledge_base/character_template_professor.json")
# return
worldview = kb.worldview_data.get('worldview_name', '未知世界观') if kb.worldview_data else '未知世界观' # try:
session_id = conv_mgr.create_session(characters, worldview) # # 初始化系统组件
# print("\n初始化系统...")
# kb = RAGKnowledgeBase(knowledge_dir)
# conv_mgr = ConversationManager()
topic = input("请输入对话主题(可选): ").strip() # # 这里需要你的LLM生成器使用新的双模型对话系统
turns = int(input("请输入对话轮次数量默认2: ").strip() or "2") # from npc_dialogue_generator import DualModelDialogueGenerator
# base_model_path = '/mnt/g/Project02/AITrain/Qwen/Qwen3-4B' # 根据你的路径调整
# lora_model_path = './output/NPC_Dialogue_LoRA/final_model'
print(f"\n开始对话 - 会话ID: {session_id}") # if not os.path.exists(lora_model_path):
dialogue_engine.run_conversation_turn(session_id, characters, turns, topic) # lora_model_path = None
elif choice == '2': # # 创建双模型对话生成器
# 继续已有对话 # if hasattr(kb, 'character_data') and len(kb.character_data) >= 2:
sessions = conv_mgr.list_sessions() # print("✓ 使用knowledge_base角色数据创建双模型对话系统")
if not sessions: # # 获取前两个角色
print("❌ 没有已有对话") # character_names = list(kb.character_data.keys())[:2]
continue # char1_name = character_names[0]
# char2_name = character_names[1]
print("已有会话:") # # 配置两个角色的模型
for i, session in enumerate(sessions[:5]): # character1_config = {
chars = ", ".join(session['characters']) # "name": char1_name,
print(f"{i+1}. {session['session_id'][:8]}... ({chars}) - {session['last_update'][:16]}") # "lora_path": lora_model_path, # 可以为每个角色设置不同的LoRA
# "character_data": kb.character_data[char1_name]
# }
try: # character2_config = {
idx = int(input("请选择会话编号: ").strip()) - 1 # "name": char2_name,
if 0 <= idx < len(sessions): # "lora_path": lora_model_path, # 可以为每个角色设置不同的LoRA
session = sessions[idx] # "character_data": kb.character_data[char2_name]
session_id = session['session_id'] # }
characters = session['characters']
# 显示最近的对话 # llm_generator = DualModelDialogueGenerator(
history = conv_mgr.get_conversation_history(session_id, 4) # base_model_path,
if history: # character1_config,
print("\n最近的对话:") # character2_config
for turn in history: # )
print(f"{turn.speaker}: {turn.content}") # else:
# print("⚠ 角色数据不足,无法创建双模型对话系统")
# return
topic = input("请输入对话主题(可选): ").strip() # # 创建对话引擎
turns = int(input("请输入对话轮次数量默认1: ").strip() or "1") # dialogue_engine = DualAIDialogueEngine(kb, conv_mgr, llm_generator)
print(f"\n继续对话 - 会话ID: {session_id}") # print("✓ 系统初始化完成")
dialogue_engine.run_conversation_turn(session_id, characters, turns, topic)
else:
print("❌ 无效的会话编号")
except ValueError:
print("❌ 请输入有效的数字")
elif choice == '3': # # 交互式菜单
# 查看对话历史 # while True:
session_id = input("请输入会话ID前8位即可: ").strip() # print("\n" + "="*50)
# print("双AI角色对话系统")
# print("1. 创建新对话")
# print("2. 继续已有对话")
# print("3. 查看对话历史")
# print("4. 列出所有会话")
# print("0. 退出")
# print("="*50)
# 查找匹配的会话 # choice = input("请选择操作: ").strip()
sessions = conv_mgr.list_sessions()
matching_session = None
for session in sessions:
if session['session_id'].startswith(session_id):
matching_session = session
break
if matching_session: # if choice == '0':
full_session_id = matching_session['session_id'] # break
history = conv_mgr.get_conversation_history(full_session_id, 20)
if history: # elif choice == '1':
print(f"\n对话历史 - {full_session_id}") # # 创建新对话
print(f"角色: {', '.join(matching_session['characters'])}") # print(f"可用角色: {list(kb.character_data.keys())}")
print(f"世界观: {matching_session['worldview']}") # characters = input("请输入两个角色名称(用空格分隔): ").strip().split()
print("-" * 50)
for turn in history: # if len(characters) != 2:
print(f"[{turn.timestamp[:16]}] {turn.speaker}:") # print("❌ 请输入正好两个角色名称")
print(f" {turn.content}") # continue
if turn.context_used:
print(f" 使用上下文: {', '.join(turn.context_used)}")
print()
else:
print("该会话暂无对话历史")
else:
print("❌ 未找到匹配的会话")
elif choice == '4': # worldview = kb.worldview_data.get('worldview_name', '未知世界观') if kb.worldview_data else '未知世界观'
# 列出所有会话 # session_id = conv_mgr.create_session(characters, worldview)
sessions = conv_mgr.list_sessions()
if sessions:
print(f"\n共有 {len(sessions)} 个对话会话:")
for session in sessions:
chars = ", ".join(session['characters'])
print(f"ID: {session['session_id']}")
print(f" 角色: {chars}")
print(f" 世界观: {session['worldview']}")
print(f" 最后更新: {session['last_update']}")
print()
else:
print("暂无对话会话")
else: # topic = input("请输入对话主题(可选): ").strip()
print("❌ 无效选择") # turns = int(input("请输入对话轮次数量默认2: ").strip() or "2")
except Exception as e: # # 历史上下文控制选项
print(f"✗ 系统运行出错: {e}") # print("\n历史上下文设置:")
import traceback # history_count = input("使用历史对话轮数默认30表示不使用: ").strip()
traceback.print_exc() # history_count = int(history_count) if history_count.isdigit() else 3
if __name__ == '__main__': # context_info_count = input("使用上下文信息数量默认2: ").strip()
main() # context_info_count = int(context_info_count) if context_info_count.isdigit() else 2
# print(f"\n开始对话 - 会话ID: {session_id}")
# print(f"上下文设置: 历史{history_count}轮, 信息{context_info_count}个")
# # 询问是否使用双模型对话
# use_dual_model = input("是否使用双模型对话系统?(y/n默认y): ").strip().lower()
# if use_dual_model != 'n':
# print("使用双模型对话系统...")
# dialogue_engine.run_dual_model_conversation(session_id, topic, turns, history_count, context_info_count)
# else:
# print("使用传统对话系统...")
# dialogue_engine.run_conversation_turn(session_id, characters, turns, topic, history_count, context_info_count)
# elif choice == '2':
# # 继续已有对话
# sessions = conv_mgr.list_sessions()
# if not sessions:
# print("❌ 没有已有对话")
# continue
# print("已有会话:")
# for i, session in enumerate(sessions[:5]):
# chars = ", ".join(session['characters'])
# print(f"{i+1}. {session['session_id'][:8]}... ({chars}) - {session['last_update'][:16]}")
# try:
# idx = int(input("请选择会话编号: ").strip()) - 1
# if 0 <= idx < len(sessions):
# session = sessions[idx]
# session_id = session['session_id']
# characters = session['characters']
# # 显示最近的对话
# history = conv_mgr.get_conversation_history(session_id, 4)
# if history:
# print("\n最近的对话:")
# for turn in history:
# print(f"{turn.speaker}: {turn.content}")
# topic = input("请输入对话主题(可选): ").strip()
# turns = int(input("请输入对话轮次数量默认1: ").strip() or "1")
# # 历史上下文控制选项
# print("\n历史上下文设置:")
# history_count = input("使用历史对话轮数默认30表示不使用: ").strip()
# history_count = int(history_count) if history_count.isdigit() else 3
# context_info_count = input("使用上下文信息数量默认2: ").strip()
# context_info_count = int(context_info_count) if context_info_count.isdigit() else 2
# print(f"\n继续对话 - 会话ID: {session_id}")
# print(f"上下文设置: 历史{history_count}轮, 信息{context_info_count}个")
# # 询问是否使用双模型对话
# use_dual_model = input("是否使用双模型对话系统?(y/n默认y): ").strip().lower()
# if use_dual_model != 'n':
# print("使用双模型对话系统...")
# dialogue_engine.run_dual_model_conversation(session_id, topic, turns, history_count, context_info_count)
# else:
# print("使用传统对话系统...")
# dialogue_engine.run_conversation_turn(session_id, characters, turns, topic, history_count, context_info_count)
# else:
# print("❌ 无效的会话编号")
# except ValueError:
# print("❌ 请输入有效的数字")
# elif choice == '3':
# # 查看对话历史
# session_id = input("请输入会话ID前8位即可: ").strip()
# # 查找匹配的会话
# sessions = conv_mgr.list_sessions()
# matching_session = None
# for session in sessions:
# if session['session_id'].startswith(session_id):
# matching_session = session
# break
# if matching_session:
# full_session_id = matching_session['session_id']
# history = conv_mgr.get_conversation_history(full_session_id, 20)
# if history:
# print(f"\n对话历史 - {full_session_id}")
# print(f"角色: {', '.join(matching_session['characters'])}")
# print(f"世界观: {matching_session['worldview']}")
# print("-" * 50)
# for turn in history:
# print(f"[{turn.timestamp[:16]}] {turn.speaker}:")
# print(f" {turn.content}")
# if turn.context_used:
# print(f" 使用上下文: {', '.join(turn.context_used)}")
# print()
# else:
# print("该会话暂无对话历史")
# else:
# print("❌ 未找到匹配的会话")
# elif choice == '4':
# # 列出所有会话
# sessions = conv_mgr.list_sessions()
# if sessions:
# print(f"\n共有 {len(sessions)} 个对话会话:")
# for session in sessions:
# chars = ", ".join(session['characters'])
# print(f"ID: {session['session_id']}")
# print(f" 角色: {chars}")
# print(f" 世界观: {session['worldview']}")
# print(f" 最后更新: {session['last_update']}")
# print()
# else:
# print("暂无对话会话")
# else:
# print("❌ 无效选择")
# except Exception as e:
# print(f"✗ 系统运行出错: {e}")
# import traceback
# traceback.print_exc()
# if __name__ == '__main__':
# main()

View File

@ -0,0 +1,182 @@
{
"character_name": "维多利亚·布莱克伍德",
"basic_info": {
"age": 28,
"gender": "女",
"occupation": "私人侦探",
"nationality": "英国",
"appearance": "身材高挑,有着深邃的绿色眼睛和波浪状的棕色长发,通常穿着深色长外套"
},
"personality": {
"core_traits": [
"理性冷静",
"观察力敏锐",
"独立自主",
"好奇心强"
],
"strengths": [
"逻辑推理能力强",
"善于发现细节",
"不畏危险",
"坚持正义"
],
"weaknesses": [
"有时过于固执",
"不善表达情感",
"容易忽视他人感受",
"工作狂倾向"
],
"values": [
"真相高于一切",
"保护无辜的人",
"相信科学与理性",
"追求公正"
]
},
"background": {
"childhood": "出生在伦敦一个中产阶级家庭,父亲是律师,母亲是医生。从小就表现出对解谜的兴趣。",
"education": "剑桥大学法学专业毕业,后来转向犯罪学研究",
"career_start": "最初在苏格兰场工作了3年后来决定成为私人侦探",
"major_events": [
"21岁时目睹了一起谋杀案激发了对侦探工作的热情",
"25岁时成功破获了一起连环杀手案声名鹊起",
"27岁时开设了自己的私人侦探事务所"
]
},
"skills_and_abilities": {
"professional_skills": [
"犯罪现场调查",
"法医学基础",
"心理分析",
"跟踪监视",
"档案研究"
],
"languages": [
"英语(母语)",
"法语(流利)",
"德语(基础)"
],
"other_abilities": [
"摄影",
"速记",
"基础格斗技能",
"开锁技术"
]
},
"relationships": {
"allies": [
{
"name": "詹姆斯·沃森医生",
"relationship": "合作伙伴",
"description": "经常协助处理涉及医学知识的案件"
},
{
"name": "艾米丽·哈里斯警官",
"relationship": "朋友/信息源",
"description": "苏格兰场的老同事,偶尔提供官方信息"
}
],
"family": [
{
"name": "威廉·布莱克伍德",
"relationship": "父亲",
"description": "资深律师,关系良好但不常联系"
},
{
"name": "玛格丽特·布莱克伍德",
"relationship": "母亲",
"description": "内科医生,担心女儿的安危但支持她的选择"
}
]
},
"speech_patterns": {
"vocabulary": [
"倾向使用准确的专业词汇",
"很少使用情绪化的表达",
"喜欢引用事实和数据",
"经常使用推理性语句"
],
"tone": [
"语调平稳",
"说话简洁明了",
"很少有语气词",
"在紧张时语速会变快"
],
"habits": [
"习惯在思考时轻敲桌面",
"总是随身携带笔记本",
"喜欢用问句引导对话",
"经常说'让我们看看证据'"
],
"sample_phrases": [
"根据现有证据...",
"这很有趣...",
"让我们从逻辑角度分析",
"事实胜于推测",
"细节决定成败"
]
},
"current_situation": {
"location": "伦敦贝克街221B的侦探事务所",
"recent_activities": [
"刚刚结束了一起艺术品盗窃案的调查",
"正在研究一系列看似无关的失踪案件",
"计划更新事务所的调查设备"
],
"current_mood": "保持专业警觉,对新案件充满期待",
"goals": [
"短期:解决手头的失踪案件",
"中期:扩大事务所的影响力",
"长期:成为伦敦最知名的私人侦探"
]
},
"preferences": {
"likes": [
"复杂的推理谜题",
"古典音乐",
"黑咖啡",
"下雨天的伦敦",
"老式侦探小说"
],
"dislikes": [
"无意义的社交",
"被人轻视",
"不完整的信息",
"时间被浪费",
"感情用事的决定"
],
"habits": [
"每天早上6点起床",
"喜欢在深夜整理案件资料",
"周末会去博物馆或图书馆",
"睡前必须检查门锁"
]
},
"dialogue_examples": {
"casual_conversation": [
"我觉得这个案子比表面看起来更复杂。",
"证据链还不完整,我们需要更多信息。",
"有趣,这个细节之前被忽略了。"
],
"professional_interaction": [
"请详细描述当时的情况,不要遗漏任何细节。",
"根据我的调查,事情的真相可能是这样的...",
"我需要检查现场,任何痕迹都可能是关键。"
],
"emotional_moments": [
"有时候,真相比谎言更难以接受。",
"正义可能会迟到,但绝不会缺席。",
"每个案件背后都有真实的人生。"
]
},
"meta_info": {
"creation_purpose": "用于COC跑团或现代侦探类角色扮演",
"adaptability": "可根据具体剧情需要调整背景和关系网",
"compatibility": "适合与其他侦探、医生、警察等角色配合",
"character_arc_potential": [
"从冷静理性到学会关心他人情感",
"从独立工作到学会团队协作",
"面对更大阴谋时的成长"
]
}
}

View File

@ -0,0 +1,242 @@
{
"character_name": "阿奇博尔德·韦恩",
"basic_info": {
"age": 45,
"gender": "男",
"occupation": "米斯卡托尼克大学考古学教授",
"nationality": "美国",
"appearance": "中等身材,银白色头发,总是戴着金丝边眼镜,穿着考究的三件套西装"
},
"personality": {
"core_traits": [
"博学严谨",
"谨慎保守",
"责任心强",
"内心焦虑"
],
"strengths": [
"渊博的学识",
"精确的记忆力",
"出色的研究能力",
"对学生的关心"
],
"weaknesses": [
"过度担心",
"决策犹豫",
"身体较弱",
"容易陷入研究忘记现实"
],
"values": [
"知识的传承",
"学术诚信",
"保护无知者",
"维护理性"
]
},
"background": {
"childhood": "出生在波士顿一个书香门第,父亲是历史学教授,从小在图书馆中长大",
"education": "哈佛大学考古学博士,专精古代文明研究,特别是前哥伦布时期美洲文明",
"career_development": "29岁进入米斯卡托尼克大学任教逐渐接触到一些'不寻常'的考古发现",
"major_events": [
"35岁时参与了一次南美洲的考古挖掘发现了一些令人不安的古老雕像",
"40岁时首次接触到《死灵之书》的片段从此开始关注超自然考古学",
"42岁时目睹了一次超自然事件开始理解世界的真实面貌",
"目前正在秘密研究如何保护人类免受古老威胁"
]
},
"skills_and_abilities": {
"academic_skills": [
"古代语言学(拉丁语、古希腊语、古玛雅文字)",
"考古学田野调查",
"古代文明研究",
"文物鉴定与保护",
"学术论文写作"
],
"occult_knowledge": [
"神秘学基础理论",
"古老符号的辨识",
"保护性仪式",
"危险文物的识别",
"理智保护技巧"
],
"practical_abilities": [
"图书馆研究",
"多种古代语言翻译",
"文献考证",
"教学能力",
"基础摄影技术"
]
},
"relationships": {
"academic_circles": [
{
"name": "亨利·阿米蒂奇教授",
"relationship": "同事兼导师",
"description": "图书馆学教授,是少数知道真相的学者之一"
},
{
"name": "弗朗西斯·摩根教授",
"relationship": "研究伙伴",
"description": "医学教授,专门研究超自然现象对人体的影响"
}
],
"students": [
{
"name": "多名研究生",
"relationship": "师生",
"description": "尽力保护他们不接触危险知识,同时培养他们的学术能力"
}
],
"family": [
{
"name": "伊莲娜·韦恩",
"relationship": "妻子",
"description": "中学英语教师,不知道丈夫研究的真实性质,经常担心他的健康"
},
{
"name": "托马斯·韦恩",
"relationship": "儿子",
"description": "16岁对父亲的工作很好奇韦恩博士努力让他远离危险知识"
}
]
},
"speech_patterns": {
"vocabulary": [
"经常使用学术术语",
"引用古典文献",
"措辞谨慎保守",
"避免直接描述恐怖事物"
],
"tone": [
"语调温和但认真",
"说话时经常停顿思考",
"在谈到危险知识时声音会变低",
"对学生总是耐心解释"
],
"habits": [
"经常擦拭眼镜",
"思考时会无意识地整理桌上的文件",
"喜欢用'据我所知'开头",
"经常提及'需要更多研究'"
],
"sample_phrases": [
"根据我的研究...",
"这需要极其谨慎的处理",
"古人的智慧不容小觑",
"有些知识是有代价的",
"我们必须保护那些不知情的人"
]
},
"current_situation": {
"location": "米斯卡托尼克大学考古系办公室",
"recent_activities": [
"正在翻译一份神秘的古代文献",
"与其他几位教授秘密讨论如何应对潜在威胁",
"准备下个月的美洲古文明课程",
"关注最近发生的几起奇异事件"
],
"current_concerns": [
"担心某个学生可能接触到了危险知识",
"对最近校园里的奇怪现象感到不安",
"妻子开始怀疑他隐瞒什么事情"
],
"goals": [
"短期:完成当前文献的翻译工作",
"中期:建立更有效的超自然威胁预警系统",
"长期:培养能够承担保护人类责任的下一代学者"
]
},
"mental_state": {
"sanity_level": "相对稳定,但承受着巨大心理压力",
"coping_mechanisms": [
"通过规律的学术工作保持理性",
"与信任的同事分享担忧",
"严格限制自己接触最危险的知识",
"通过教学工作获得成就感"
],
"stress_indicators": [
"失眠和噩梦",
"过度担心家人安全",
"对学生过分保护",
"经常检查办公室的安全性"
]
},
"preferences": {
"likes": [
"古典音乐,特别是巴赫",
"安静的图书馆环境",
"与同行讨论安全的学术话题",
"和家人共度的平静时光",
"收集古代艺术品(安全的那些)"
],
"dislikes": [
"不负责任的学者",
"对超自然现象的轻率态度",
"媒体对考古发现的夸大报道",
"任何可能危及学生的情况",
"不得不对家人撒谎"
],
"daily_habits": [
"早上6点起床先查看昨夜是否有异常",
"上午专注于教学工作",
"午后在办公室进行研究",
"晚上陪伴家人,避免谈论工作",
"睡前检查所有门窗"
]
},
"equipment_and_possessions": {
"academic_tools": [
"高质量的放大镜和测量工具",
"多种古代语言词典",
"精密的摄影设备",
"文物保护用品",
"大量专业书籍"
],
"protective_items": [
"一些他认为有保护作用的符咒",
"经过特殊处理的盐",
"几本关于保护仪式的手册",
"应急通讯设备"
],
"personal_belongings": [
"妻子的照片",
"父亲留下的金表",
"学生送的纪念品",
"一些'安全'的古代复制品"
]
},
"dialogue_examples": {
"teaching_moments": [
"古代文明留给我们的不仅仅是石头和黄金,更重要的是智慧...和警告。",
"在考古学中,我们不仅要发现过去,更要保护现在。",
"有些发现最好留在地下,这不是怯懦,而是智慧。"
],
"research_discussions": [
"这些符号...我在其他地方见过,但愿我的猜测是错的。",
"我们需要更多的资料,但要确保安全地获取。",
"如果我的翻译是正确的,那么我们面临的威胁比想象中更严重。"
],
"personal_conversations": [
"对不起,亲爱的,最近工作压力确实很大。",
"托马斯,也许你应该考虑学习更实用的专业,比如工程学?",
"有些时候,无知确实是一种幸福。"
],
"crisis_responses": [
"保持冷静,记住我们的训练。",
"不要直视它!专注于我们讨论过的保护咒语。",
"如果我们失败了,确保销毁所有相关文献。"
]
},
"character_arc_potential": [
"从纯粹的学者成长为保护人类的守护者",
"在家庭责任和拯救世界之间寻找平衡",
"面对更大威胁时必须做出牺牲的决定",
"培养继承者以延续保护人类的使命"
],
"meta_info": {
"creation_purpose": "COC跑团中的学者型NPC可以提供知识支援和道德指导",
"role_in_story": "导师、信息来源、情感支柱",
"interaction_style": "谨慎但关怀,知识丰富但不会直接给出所有答案",
"narrative_function": "平衡角色团队,提供背景知识,增加故事深度"
}
}

View File

@ -0,0 +1,296 @@
{
"worldview_name": "克苏鲁的呼唤 (Call of Cthulhu)",
"basic_info": {
"genre": "恐怖,超自然,推理",
"time_period": "1920年代",
"primary_setting": "美国新英格兰地区",
"tone": "黑暗,压抑,充满未知恐惧"
},
"core_concepts": {
"cosmic_horror": {
"description": "人类在宇宙中的渺小和无知",
"key_elements": [
"古老存在远超人类理解",
"知识本身就是危险的",
"人类理性的脆弱性",
"宇宙的冷漠和敌意"
]
},
"sanity_system": {
"description": "理智值系统,接触超自然会损害心智",
"mechanics": [
"目睹恐怖事物降低理智",
"学习禁忌知识的代价",
"疯狂的不同表现形式",
"理智恢复的困难性"
]
},
"mythos_knowledge": {
"description": "关于古老神话的危险知识",
"categories": [
"旧日支配者",
"外神",
"禁忌典籍",
"异界生物"
]
}
},
"geography": {
"primary_locations": [
{
"name": "阿卡姆",
"description": "马萨诸塞州的大学城,米斯卡托尼克大学所在地",
"significance": "学术研究中心,许多超自然事件的发源地",
"notable_features": [
"米斯卡托尼克大学图书馆",
"阿卡姆疗养院",
"各种古老建筑"
]
},
{
"name": "敦威治",
"description": "偏远的乡村小镇,充满古老传说",
"significance": "许多怪异事件的发生地",
"notable_features": [
"古老的山丘",
"废弃的农场",
"神秘的石圈"
]
},
{
"name": "印斯茅斯",
"description": "衰败的海港小镇",
"significance": "深潜者的据点",
"notable_features": [
"古老的码头",
"奇怪的居民",
"海底的秘密"
]
}
],
"other_significant_places": [
"南极洲的疯狂山脉",
"太平洋的拉莱耶",
"梦境大陆",
"时空的裂隙点"
]
},
"timeline": {
"ancient_times": [
"数百万年前:旧日支配者统治地球",
"人类出现前:古老种族的兴衰",
"史前时代:各种异界生物的活动"
],
"historical_events": [
"1692年塞勒姆女巫审判隐藏的真实原因",
"1846年某次南极探险的神秘失踪",
"1908年通古斯大爆炸的真相",
"1912年泰坦尼克号沉没的隐情"
],
"current_era_1920s": [
"1920年禁酒令开始地下活动增加",
"1922年某次考古发现引发的事件",
"1925年格陵兰探险队的失踪",
"1929年股市崩盘前的预兆事件"
]
},
"factions_and_organizations": {
"academic": [
{
"name": "米斯卡托尼克大学",
"purpose": "高等教育和研究机构",
"secret_role": "神秘学知识的守护者",
"notable_members": [
"亨利·阿米蒂奇教授",
"其他不愿透露姓名的学者"
]
}
],
"occult": [
{
"name": "星之智慧教派",
"purpose": "崇拜外神和旧日支配者",
"activities": [
"进行邪恶仪式",
"寻找古老遗物",
"传播危险知识"
]
}
],
"government": [
{
"name": "联邦调查局(非正式部门)",
"purpose": "调查超自然事件",
"secrecy_level": "高度机密",
"resources": "有限但专业"
}
]
},
"supernatural_elements": {
"entities": {
"old_ones": [
{
"name": "克苏鲁",
"domain": "海洋,梦境",
"status": "沉睡中",
"influence": "通过梦境影响敏感者"
},
{
"name": "哈斯塔",
"domain": "风,空气,艺术",
"symbols": "黄衣之王",
"influence": "通过艺术作品传播疯狂"
}
],
"lesser_beings": [
"深潜者",
"食尸鬼",
"夜魇",
"拜亚基"
]
},
"artifacts": [
{
"name": "死灵之书Necronomicon",
"type": "禁忌典籍",
"danger_level": "极高",
"effects": "提供强大知识但严重损害理智"
},
{
"name": "闪亮的偏方三八面体",
"type": "几何物品",
"properties": "可以打开时空裂隙"
}
]
},
"social_context": {
"1920s_america": {
"culture": [
"爵士时代的繁荣",
"禁酒令下的地下文化",
"科学理性主义的兴起",
"传统价值观的变化"
],
"technology": [
"无线电的普及",
"汽车开始普及",
"电话系统发展",
"早期航空业"
],
"social_issues": [
"种族隔离",
"性别不平等",
"经济不稳定",
"城市化进程"
]
},
"academic_world": [
"大学的黄金时代",
"考古学的发展",
"人类学研究的兴起",
"心理学的新发现"
]
},
"themes": {
"primary_themes": [
"人类在宇宙中的渺小",
"知识的双刃剑性质",
"文明的脆弱性",
"理性与疯狂的边界"
],
"common_scenarios": [
"调查神秘失踪案件",
"发现古老遗迹",
"面对无法解释的现象",
"与邪教组织斗争",
"保护无知的大众"
],
"moral_dilemmas": [
"是否应该追求危险的真相",
"如何平衡知识与安全",
"是否要向他人透露恐怖真相",
"如何在保持理智的同时对抗超自然"
]
},
"gameplay_elements": {
"investigation": [
"收集线索",
"访问证人",
"研究历史档案",
"解读古老文本"
],
"survival": [
"保持理智值",
"避免直接接触超自然存在",
"寻找盟友和资源",
"制定逃脱计划"
],
"character_development": [
"技能的提升",
"知识的积累",
"心理创伤的处理",
"人际关系的建立"
]
},
"atmosphere": {
"visual_elements": [
"雾气弥漫的街道",
"古老而阴暗的建筑",
"昏暗的图书馆和博物馆",
"荒凉的海岸线"
],
"audio_elements": [
"远处传来的奇怪声音",
"老式留声机的音乐",
"风吹过古老建筑的声音",
"夜晚的虫鸣和海浪声"
],
"emotional_tone": [
"不安和焦虑",
"对未知的恐惧",
"孤独和绝望",
"偶尔的希望之光"
]
},
"language_and_style": {
"writing_style": [
"维多利亚时期的正式语言",
"学术性的措辞",
"大量使用形容词",
"暗示而非直接描述恐怖"
],
"common_vocabulary": [
"古老的 (ancient)",
"不可名状的 (unspeakable)",
"亵渎的 (blasphemous)",
"令人恐惧的 (dreadful)",
"神秘的 (mysterious)"
],
"character_speech": [
"教授:学术性,引用经典",
"调查员:实用主义,关注细节",
"普通民众:迷信,害怕未知",
"邪教徒:狂热,晦涩难懂"
]
},
"usage_guidelines": {
"for_roleplay": [
"强调角色的脆弱性",
"重视调查过程",
"营造紧张氛围",
"合理使用恐怖元素"
],
"for_storytelling": [
"逐渐揭示真相",
"保持神秘感",
"平衡恐怖与希望",
"关注角色成长"
],
"common_pitfalls": [
"避免过度描述恐怖",
"不要让玩家过于无力",
"保持世界的一致性",
"尊重原著精神"
]
}
}

456
AITrain/main_controller.py Normal file
View File

@ -0,0 +1,456 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
'''
双AI角色对话系统主控制程序
完整的工作流程PDF处理 -> 角色加载 -> RAG对话 -> 历史记录
'''
import os
import sys
import shutil
from typing import List, Dict
import json
def check_dependencies():
"""检查依赖库"""
missing_deps = []
try:
import PyPDF2
except ImportError:
missing_deps.append("PyPDF2")
try:
import pymupdf
print("✓ pymupdf 可用")
except ImportError:
print("⚠ pymupdf 不可用,将使用 PyPDF2")
try:
import sentence_transformers
import faiss
print("✓ 向量化功能可用")
except ImportError:
print("⚠ 向量化功能不可用,将使用文本匹配")
if missing_deps:
print(f"✗ 缺少依赖库: {', '.join(missing_deps)}")
print("请运行: pip install PyPDF2 sentence-transformers faiss-cpu")
return False
return True
def setup_directories():
"""设置项目目录结构"""
directories = [
"./knowledge_base",
"./characters",
"./worldview",
"./rag_knowledge",
"./conversation_data"
]
for dir_path in directories:
os.makedirs(dir_path, exist_ok=True)
print(f"✓ 目录就绪: {dir_path}")
def copy_demo_files():
"""复制演示文档到知识库目录"""
file_mappings = [
("./worldview/worldview_template_coc.json", "./knowledge_base/worldview_template_coc.json"),
("./characters/character_template_detective.json", "./knowledge_base/character_template_detective.json"),
("./characters/character_template_professor.json", "./knowledge_base/character_template_professor.json")
]
for source, target in file_mappings:
if os.path.exists(source):
shutil.copy2(source, target)
print(f"✓ 复制文档: {os.path.basename(target)}")
def process_pdf_workflow():
"""PDF处理工作流"""
print("\n" + "="*60)
print("PDF世界观文档处理")
print("="*60)
from pdf_to_rag_processor import PDFToRAGProcessor
pdf_path = input("请输入PDF文件路径 (例: ./coc.pdf): ").strip()
if not os.path.exists(pdf_path):
print(f"✗ 文件不存在: {pdf_path}")
return False
try:
processor = PDFToRAGProcessor()
result = processor.process_pdf_to_rag(pdf_path, "./rag_knowledge")
print(f"\n✓ PDF处理完成!")
print(f" - 文档块数: {result['chunks_count']}")
print(f" - 概念数: {result['concepts_count']}")
print(f" - 向量索引: {'启用' if result['vector_enabled'] else '未启用'}")
return True
except Exception as e:
print(f"✗ PDF处理失败: {e}")
return False
def show_character_info():
"""显示角色信息"""
print("\n" + "="*60)
print("角色设定信息")
print("="*60)
knowledge_dir = "./knowledge_base"
character_files = [f for f in os.listdir(knowledge_dir) if f.startswith('character') and f.endswith('.json')]
for char_file in character_files:
try:
with open(os.path.join(knowledge_dir, char_file), 'r', encoding='utf-8') as f:
char_data = json.load(f)
name = char_data.get('character_name', '未知')
occupation = char_data.get('basic_info', {}).get('occupation', '未知')
traits = char_data.get('personality', {}).get('core_traits', [])
print(f"\n角色: {name}")
print(f" 职业: {occupation}")
print(f" 特点: {', '.join(traits[:3])}")
except Exception as e:
print(f"✗ 读取角色文件失败: {char_file} - {e}")
def run_dialogue_system():
"""运行双AI对话系统"""
print("\n" + "="*60)
print("启动双AI角色对话系统")
print("="*60)
try:
# 直接启动双模型对话
print("\n正在初始化双模型对话系统...")
from dual_ai_dialogue_system import RAGKnowledgeBase, ConversationManager, DualAIDialogueEngine
from npc_dialogue_generator import DualModelDialogueGenerator
# 初始化组件
kb = RAGKnowledgeBase("./knowledge_base")
conv_mgr = ConversationManager("./conversation_data/conversations.db")
# 检查模型路径
base_model_path = '/mnt/e/AI/Project02/AITrain/Qwen/Qwen3-4B'
lora_model_path = './output/NPC_Dialogue_LoRA/final_model'
if not os.path.exists(base_model_path):
print(f"✗ 基础模型路径不存在: {base_model_path}")
print("请修改 main_controller.py 中的模型路径")
return
if not os.path.exists(lora_model_path):
lora_model_path = None
print("⚠ LoRA模型不存在使用基础模型")
# 检查角色数据
if not hasattr(kb, 'character_data') or len(kb.character_data) < 2:
print("✗ 角色数据不足,无法创建双模型对话系统")
print("请确保knowledge_base目录中有至少两个角色文件")
return
# 获取前两个角色
character_names = list(kb.character_data.keys())[:2]
char1_name = character_names[0]
char2_name = character_names[1]
print(f"✓ 使用角色: {char1_name}{char2_name}")
# 配置两个角色的模型
character1_config = {
"name": char1_name,
"lora_path": lora_model_path,
"character_data": kb.character_data[char1_name]
}
character2_config = {
"name": char2_name,
"lora_path": lora_model_path,
"character_data": kb.character_data[char2_name]
}
# 创建双模型对话生成器
print("正在初始化双模型对话生成器...")
dual_generator = DualModelDialogueGenerator(
base_model_path,
character1_config,
character2_config
)
# 创建对话引擎
dialogue_engine = DualAIDialogueEngine(kb, conv_mgr, dual_generator)
# 创建对话会话
characters = [char1_name, char2_name]
worldview = kb.worldview_data.get('worldview_name', '未知世界观') if kb.worldview_data else '未知世界观'
session_id = conv_mgr.create_session(characters, worldview)
print(f"✓ 创建对话会话: {session_id}")
# 交互式对话循环
print(f"\n=== 双AI模型对话系统 ===")
print(f"角色: {char1_name} vs {char2_name}")
print(f"世界观: {worldview}")
print("输入 'quit' 退出对话")
print("-" * 50)
while True:
try:
# 获取用户输入
user_input = input("\n请输入对话主题或指令: ").strip()
if user_input.lower() == 'quit':
print("退出双AI对话系统")
break
if not user_input:
print("请输入有效的对话主题")
continue
# 询问对话轮数
turns_input = input("请输入对话轮数 (默认4): ").strip()
turns = int(turns_input) if turns_input.isdigit() else 4
# 询问历史上下文设置
history_input = input("使用历史对话轮数 (默认2): ").strip()
history_count = int(history_input) if history_input.isdigit() else 2
context_input = input("使用上下文信息数量 (默认10): ").strip()
context_info_count = int(context_input) if context_input.isdigit() else 10
print(f"\n开始对话 - 主题: {user_input}")
print(f"轮数: {turns}, 历史: {history_count}, 上下文: {context_info_count}")
print("-" * 50)
# 运行双模型对话
dialogue_engine.run_dual_model_conversation(
session_id, user_input, turns, history_count, context_info_count
)
print("-" * 50)
print("对话完成!")
except KeyboardInterrupt:
print("\n\n用户中断对话")
break
except Exception as e:
print(f"对话过程中出现错误: {e}")
import traceback
traceback.print_exc()
except Exception as e:
print(f"✗ 对话系统启动失败: {e}")
import traceback
traceback.print_exc()
def create_demo_scenario():
"""创建演示场景"""
print("\n创建演示对话场景...")
try:
from dual_ai_dialogue_system import RAGKnowledgeBase, ConversationManager, DualAIDialogueEngine
from npc_dialogue_generator import NPCDialogueGenerator
# 初始化组件
kb = RAGKnowledgeBase("./knowledge_base")
conv_mgr = ConversationManager("./conversation_data/demo_conversations.db")
# 检查模型路径
base_model_path = '/mnt/e/AI/Project02/AITrain/Qwen/Qwen3-4B'
lora_model_path = './output/NPC_Dialogue_LoRA/final_model'
if not os.path.exists(base_model_path):
print(f"✗ 基础模型路径不存在: {base_model_path}")
print("请修改 main_controller.py 中的模型路径")
return
if not os.path.exists(lora_model_path):
lora_model_path = None
print("⚠ LoRA模型不存在使用基础模型")
llm_generator = NPCDialogueGenerator(base_model_path, lora_model_path, kb.character_data)
dialogue_engine = DualAIDialogueEngine(kb, conv_mgr, llm_generator)
# 创建演示对话
characters = ["维多利亚·布莱克伍德", "阿奇博尔德·韦恩"]
worldview = "克苏鲁的呼唤"
session_id = conv_mgr.create_session(characters, worldview)
print(f"✓ 创建演示会话: {session_id}")
# 运行几轮对话
topic = "最近发生的神秘事件"
print(f"\n开始演示对话 - 主题: {topic}")
print("-" * 40)
# 演示不同的历史上下文设置
# print("演示1: 使用默认上下文设置历史3轮信息2个")
# dialogue_engine.run_conversation_turn(session_id, characters, 6, topic)
session_id = conv_mgr.create_session(characters, worldview)
print(f"✓ 创建演示会话: {session_id}")
print("\n演示3: 使用最少历史上下文历史1轮信息1个")
dialogue_engine.run_conversation_turn(session_id, characters, 6, topic, 1, 10)
session_id = conv_mgr.create_session(characters, worldview)
print(f"✓ 创建演示会话: {session_id}")
print("\n演示2: 使用更多历史上下文历史10轮信息10个")
dialogue_engine.run_conversation_turn(session_id, characters, 6, topic, 5, 10)
print(f"\n✓ 演示完成会话ID: {session_id}")
print("你可以通过主对话系统继续这个对话")
except Exception as e:
print(f"✗ 演示场景创建失败: {e}")
import traceback
traceback.print_exc()
def show_system_status():
"""显示系统状态"""
print("\n" + "="*60)
print("系统状态检查")
print("="*60)
# 检查文件
files_to_check = [
("./knowledge_base/worldview_template_coc.json", "世界观模板"),
("./knowledge_base/character_template_detective.json", "侦探角色"),
("./knowledge_base/character_template_professor.json", "教授角色"),
("./pdf_to_rag_processor.py", "PDF处理器"),
("./dual_ai_dialogue_system.py", "对话系统"),
("./npc_dialogue_generator.py", "NPC生成器")
]
print("\n文件检查:")
for file_path, description in files_to_check:
if os.path.exists(file_path):
print(f"{description}: {file_path}")
else:
print(f"{description}: {file_path} (不存在)")
# 检查目录
print("\n目录检查:")
directories = ["./knowledge_base", "./rag_knowledge", "./conversation_data"]
for dir_path in directories:
if os.path.exists(dir_path):
file_count = len([f for f in os.listdir(dir_path) if os.path.isfile(os.path.join(dir_path, f))])
print(f"{dir_path}: {file_count} 个文件")
else:
print(f"{dir_path}: 不存在")
# 检查对话会话
try:
from dual_ai_dialogue_system import ConversationManager
conv_mgr = ConversationManager("./conversation_data/conversations.db")
sessions = conv_mgr.list_sessions()
print(f"\n✓ 对话会话: {len(sessions)}")
except Exception as e:
print(f"\n✗ 对话会话检查失败: {e}")
def main():
"""主控制程序"""
print("="*70)
print(" 双AI角色对话系统 - 主控制程序")
print(" 基于RAG的世界观增强对话引擎")
print("="*70)
# 检查依赖
if not check_dependencies():
return
# 设置目录
# setup_directories()
# copy_demo_files()
while True:
print("\n" + "="*50)
print("主菜单 - 请选择操作:")
print("1. 处理PDF世界观文档 (转换为RAG格式)")
print("2. 查看角色设定信息")
print("3. 启动双AI对话系统 (支持双模型对话)")
print("4. 创建演示对话场景")
print("5. 系统状态检查")
print("6. 查看使用说明")
print("0. 退出")
print("="*50)
choice = input("请输入选择 (0-6): ").strip()
if choice == '0':
print("\n感谢使用双AI角色对话系统")
break
elif choice == '1':
process_pdf_workflow()
elif choice == '2':
show_character_info()
elif choice == '3':
run_dialogue_system()
elif choice == '4':
create_demo_scenario()
elif choice == '5':
show_system_status()
elif choice == '6':
show_usage_guide()
else:
print("❌ 无效选择,请重新输入")
def show_usage_guide():
"""显示使用说明"""
print("\n" + "="*60)
print("系统使用说明")
print("="*60)
guide = """
🚀 快速开始:
1. 首次使用建议先运行"创建演示对话场景"
2. 如有PDF世界观文档选择"处理PDF世界观文档"
3. 通过"启动双AI对话系统"开始角色对话
📁 文档格式说明:
- 世界观文档: worldview_template_coc.json (参考COC设定)
- 角色设定: character_template_*.json (包含详细人设)
🔧 系统功能:
- PDF自动转换为RAG知识库
- 基于向量相似度的上下文检索
- 持久化对话历史存储
- 角色设定一致性保持
📝 自定义角色:
1. 参考 character_template_*.json 格式
2. 保存到 knowledge_base/ 目录
3. 重启对话系统加载新角色
💾 对话数据:
- 历史对话保存在 conversation_data/ 目录
- 支持会话恢复和历史查看
- 自动记录使用的上下文信息
注意事项:
- 确保模型路径正确设置
- 首次运行需要下载向量化模型
- PDF处理需要足够内存
"""
print(guide)
if __name__ == '__main__':
main()

View File

@ -3,6 +3,7 @@
''' '''
游戏NPC角色对话生成器 游戏NPC角色对话生成器
基于微调后的LoRA模型生成角色对话 基于微调后的LoRA模型生成角色对话
支持双模型对话系统每个模型扮演一个角色
''' '''
import torch import torch
@ -10,8 +11,9 @@ import json
import random import random
from peft import PeftModel from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import Dict, List, Optional from typing import Dict, List, Optional, Tuple
import platform import platform
import os
# Windows multiprocessing兼容性修复 # Windows multiprocessing兼容性修复
if platform.system() == "Windows": if platform.system() == "Windows":
@ -19,86 +21,96 @@ if platform.system() == "Windows":
multiprocessing.set_start_method('spawn', force=True) multiprocessing.set_start_method('spawn', force=True)
class NPCDialogueGenerator: class NPCDialogueGenerator:
def __init__(self, base_model_path: str, lora_model_path: Optional[str] = None): def __init__(self, base_model_path: str, lora_model_path: Optional[str] = None, external_character_data: Optional[Dict] = None):
""" """
初始化NPC对话生成器 初始化NPC对话生成器
Args: Args:
base_model_path: 基础模型路径 base_model_path: 基础模型路径
lora_model_path: LoRA模型路径可选 lora_model_path: LoRA模型路径可选
external_character_data: 外部角色数据可选优先使用
""" """
self.base_model_path = base_model_path self.base_model_path = base_model_path
self.lora_model_path = lora_model_path self.lora_model_path = lora_model_path
self.model = None self.model = None
self.tokenizer = None self.tokenizer = None
self.character_profiles = self._load_character_profiles()
# 优先使用外部角色数据,如果没有则使用内置数据
if external_character_data:
self.character_profiles = self._process_external_character_data(external_character_data)
print(f"✓ 使用外部角色数据: {list(self.character_profiles.keys())}")
self._load_model() self._load_model()
def _load_character_profiles(self) -> Dict: def _process_external_character_data(self, external_data: Dict) -> Dict:
"""加载角色画像数据""" """
return { 处理外部角色数据转换为对话生成器可用的格式
"维多利亚·布莱克伍德": {
"name": "维多利亚·布莱克伍德", Args:
"title": "神秘学专家", external_data: 来自knowledge_base的角色数据
"personality": ["理性分析", "谨慎小心", "实用主义", "思维缜密"],
"background": "拥有丰富神秘学知识和战斗经验的侦探,既是非凡者也是夏洛克·莫里亚蒂", Returns:
"speech_patterns": ["会使用专业术语", "经常进行逻辑分析", "对危险保持警告", "内心独白较多"], 处理后的角色数据字典
"sample_dialogues": [ """
"好奇往往是导致死亡的主要因素。", processed_profiles = {}
"总之,我的任务到此为止。",
"这需要仔细分析才能得出结论。" for char_name, char_data in external_data.items():
] # 提取基本信息
}, basic_info = char_data.get('basic_info', {})
"阿奇博尔德·韦恩博士": { personality = char_data.get('personality', {})
"name": "阿奇博尔德·韦恩博士", background = char_data.get('background', {})
"title": "神秘学导师", skills = char_data.get('skills_and_abilities', {})
"personality": ["沉稳睿智", "言简意赅", "关怀学生", "经验丰富"], speech_patterns = char_data.get('speech_patterns', {})
"background": "神秘学领域的资深专家,经验极其丰富的导师,知识渊博",
"speech_patterns": ["话语简练但信息量大", "给予实用指导", "语调平和但权威", "关心但保持距离"], # 构建角色画像
"sample_dialogues": [ profile = {
"耐心是修炼的基础。", "name": char_data.get('character_name', char_name),
"不要急于求成,稳扎稳打比什么都重要。", "title": basic_info.get('occupation', '未知'),
"这种情况需要格外小心。" "personality": personality.get('core_traits', []) + personality.get('strengths', []),
] "background": background.get('childhood', '') + ' ' + background.get('education', ''),
}, "speech_patterns": speech_patterns.get('vocabulary', []) + speech_patterns.get('tone', []),
"塔利姆": { "sample_dialogues": self._generate_sample_dialogues(char_data),
"name": "塔利姆", # 保存完整数据供高级功能使用
"title": "文雅绅士", "full_data": char_data
"personality": ["礼貌尊敬", "有文化素养", "寻求帮助", "温和友善"],
"background": "受过良好教育的普通人,有一定的文学修养,遇到困难时会寻求专家帮助",
"speech_patterns": ["使用礼貌称谓", "表达困惑时措辞文雅", "会引用文学作品", "语气温和"],
"sample_dialogues": [
"噢,尊敬的大侦探,你最近在忙碌什么?",
"这不是《罗密欧与朱丽叶》的故事!",
"我有个朋友遇到了困难..."
]
},
"艾伦": {
"name": "艾伦",
"title": "困扰的求助者",
"personality": ["焦虑不安", "详细描述", "半信半疑", "急需帮助"],
"background": "普通人,但最近遭遇了一系列神秘的厄运事件,怀疑受到诅咒",
"speech_patterns": ["情绪紧张", "会详细描述遭遇", "语气急切", "表现出恐惧"],
"sample_dialogues": [
"最近我总是遭遇各种厄运...",
"我怀疑是不是受到了什么诅咒。",
"请帮帮我,我不知道该怎么办!"
]
},
"戴莉.西蒙妮": {
"name": "戴莉·西蒙妮",
"title": "专业调查员",
"personality": ["专业简洁", "直接明确", "严谨认真", "目标导向"],
"background": "负责调查神秘事件的专业人员,办事效率高,问题直接",
"speech_patterns": ["问题直接明确", "语气专业", "注重事实", "简洁有力"],
"sample_dialogues": [
"请详细描述事件经过。",
"有什么证据可以证明?",
"这件事需要立即调查。"
]
} }
}
processed_profiles[char_name] = profile
return processed_profiles
def _generate_sample_dialogues(self, char_data: Dict) -> List[str]:
"""
基于角色数据生成示例对话
Args:
char_data: 角色数据
Returns:
示例对话列表
"""
# 这里可以根据角色的性格、背景等生成更合适的示例对话
# 暂时返回一些通用的示例
basic_info = char_data.get('basic_info', {})
occupation = basic_info.get('occupation', '角色')
if '侦探' in occupation or '调查员' in occupation:
return [
"我需要仔细分析这个案件。",
"每个细节都可能很重要。",
"让我重新梳理一下线索。"
]
elif '教授' in occupation or '博士' in occupation:
return [
"根据我的研究,这个现象很特殊。",
"我们需要更谨慎地处理这个问题。",
"知识就是力量,但也要小心使用。"
]
else:
return [
"我遇到了一些困难。",
"请帮帮我。",
"这太奇怪了。"
]
def _load_model(self): def _load_model(self):
"""加载模型和分词器""" """加载模型和分词器"""
@ -121,18 +133,19 @@ class NPCDialogueGenerator:
) )
# 如果有LoRA模型则加载 # 如果有LoRA模型则加载
if self.lora_model_path: # if self.lora_model_path:
print(f"Loading LoRA weights from: {self.lora_model_path}") # print(f"Loading LoRA weights from: {self.lora_model_path}")
self.model = PeftModel.from_pretrained(self.model, self.lora_model_path) # self.model = PeftModel.from_pretrained(self.model, self.lora_model_path)
def generate_character_dialogue( def generate_character_dialogue(
self, self,
character_name: str, character_name: str,
context: str = "", context: str = "",
user_input: str = "",
temperature: float = 0.8, temperature: float = 0.8,
max_new_tokens: int = 150, max_new_tokens: int = 150,
top_p: float = 0.9 top_p: float = 0.9,
dialogue_history: List[Dict] = None,
history_context_count: int = 3
) -> str: ) -> str:
""" """
生成指定角色的对话 生成指定角色的对话
@ -144,6 +157,8 @@ class NPCDialogueGenerator:
temperature: 采样温度 temperature: 采样温度
max_new_tokens: 最大生成token数 max_new_tokens: 最大生成token数
top_p: 核采样参数 top_p: 核采样参数
dialogue_history: 对话历史记录列表每个元素包含speaker和content
history_context_count: 使用的历史对话轮数默认3轮
Returns: Returns:
生成的对话内容 生成的对话内容
@ -153,12 +168,12 @@ class NPCDialogueGenerator:
profile = self.character_profiles[character_name] profile = self.character_profiles[character_name]
# 构建系统提示 # 构建系统提示,包含历史对话数据
system_prompt = self._build_system_prompt(profile, context) system_prompt = self._build_system_prompt(profile, context, dialogue_history, history_context_count)
# 构建用户输入 # 构建用户输入
if not user_input: user_input = "请说一段符合你角色设定的话,保持对话的连贯性。"
user_input = "请说一段符合你角色设定的话。"
# 准备消息 # 准备消息
messages = [ messages = [
@ -178,6 +193,16 @@ class NPCDialogueGenerator:
# 移动到设备 # 移动到设备
inputs = {k: v.to(self.model.device) for k, v in inputs.items()} inputs = {k: v.to(self.model.device) for k, v in inputs.items()}
# 计算input token数并与模型最大token数比较
input_token_count = inputs['input_ids'].shape[1]
try:
max_model_tokens = self.model.config.max_position_embeddings
except AttributeError:
max_model_tokens = 2048
if input_token_count + max_new_tokens > max_model_tokens:
print(f"警告当前输入token数({input_token_count})加上最大生成token数({max_new_tokens})超过模型最大token数({max_model_tokens}),可能导致生成结果不完整或报错。")
# 生成对话 # 生成对话
with torch.no_grad(): with torch.no_grad():
@ -185,10 +210,10 @@ class NPCDialogueGenerator:
**inputs, **inputs,
max_new_tokens=max_new_tokens, max_new_tokens=max_new_tokens,
do_sample=True, do_sample=True,
temperature=temperature, temperature=0.95,
top_p=top_p, top_p=0.92,
pad_token_id=self.tokenizer.eos_token_id, pad_token_id=self.tokenizer.eos_token_id,
repetition_penalty=1.1 repetition_penalty=1.15
) )
# 解码输出 # 解码输出
@ -197,8 +222,15 @@ class NPCDialogueGenerator:
return dialogue return dialogue
def _build_system_prompt(self, profile: Dict, context: str = "") -> str: def _build_system_prompt(self, profile: Dict, context: str = "", dialogue_history: List[Dict] = None, history_context_count: int = 3) -> str:
"""构建系统提示""" """构建系统提示
Args:
profile: 角色配置信息
context: 当前情境
dialogue_history: 对话历史记录列表每个元素包含speaker和content
history_context_count: 使用的历史对话轮数默认3轮
"""
personality_str = "".join(profile["personality"]) personality_str = "".join(profile["personality"])
speech_pattern_str = "".join(profile["speech_patterns"]) speech_pattern_str = "".join(profile["speech_patterns"])
@ -207,9 +239,24 @@ class NPCDialogueGenerator:
性格特点{personality_str} 性格特点{personality_str}
说话风格{speech_pattern_str} 说话风格{speech_pattern_str}
请严格按照这个角色的设定来回应保持角色的一致性和独特性""" 请严格按照这个角色的设定来回应保持角色的一致性和独特性"""
# 添加当前情境
if context: if context:
system_prompt += f"\n\n当前情境:{context}" system_prompt += f"\n\n当前情境:{context}"
return system_prompt
# 添加历史对话数据参考generate_character_prompt的实现
if dialogue_history:
system_prompt += "\n\n最近的对话:"
# 使用参数控制历史对话轮数
history_to_use = dialogue_history[-history_context_count:] if history_context_count > 0 else []
for turn in history_to_use:
system_prompt += f"{turn.speaker}: {turn.content}"
# speaker = turn.get('speaker', '未知')
# content = turn.get('content', '')
# if content:
# system_prompt += f"\n{speaker}: {content}"
return system_prompt
def generate_dialogue_conversation(self, character1: str, character2: str, topic: str, turns: int = 4) -> List[Dict]: def generate_dialogue_conversation(self, character1: str, character2: str, topic: str, turns: int = 4) -> List[Dict]:
"""生成两个角色之间的对话 """生成两个角色之间的对话
@ -261,102 +308,264 @@ class NPCDialogueGenerator:
"""列出所有可用角色""" """列出所有可用角色"""
return list(self.character_profiles.keys()) return list(self.character_profiles.keys())
def main(): class DualModelDialogueGenerator:
"""测试对话生成器""" """双模型对话生成器 - 每个模型扮演一个角色"""
# 配置路径
base_model_path = '/mnt/g/Project02/AITrain/Qwen/Qwen3-8B-AWQ'
lora_model_path = './output/NPC_Dialogue_LoRA/final_model' # 如果没有训练LoRA设为None
# 检查LoRA模型是否存在 def __init__(self,
import os base_model_path: str,
if not os.path.exists(lora_model_path): character1_config: Dict,
print("LoRA模型不存在使用基础模型") character2_config: Dict,
lora_model_path = None lora_model_path: Optional[str] = None):
"""
初始化双模型对话生成器
# 创建对话生成器 Args:
generator = NPCDialogueGenerator(base_model_path, lora_model_path) base_model_path: 基础模型路径
character1_config: 角色1配置 {"name": "角色名", "lora_path": "LoRA路径", "character_data": 角色数据}
character2_config: 角色2配置 {"name": "角色名", "lora_path": "LoRA路径", "character_data": 角色数据}
lora_model_path: 通用LoRA模型路径可选
"""
self.base_model_path = base_model_path
self.character1_config = character1_config
self.character2_config = character2_config
print("=== 游戏NPC角色对话生成器 ===") # 为每个角色创建独立的模型实例
print(f"可用角色:{', '.join(generator.list_available_characters())}") self.character1_generator = None
self.character2_generator = None
# 测试单个角色对话生成 self._initialize_character_models()
print("\n=== 单角色对话测试 ===")
test_scenarios = [
{
"character": "克莱恩",
"context": "玩家向你咨询神秘学知识",
"input": "请告诉我一些关于灵界的注意事项。"
},
{
"character": "阿兹克",
"context": "学生遇到了修炼瓶颈",
"input": "导师,我在修炼中遇到了困难。"
},
{
"character": "塔利姆",
"context": "在俱乐部偶遇老朋友",
"input": "好久不见,最近怎么样?"
}
]
for scenario in test_scenarios: def _initialize_character_models(self):
print(f"\n--- {scenario['character']} ---") """初始化两个角色的模型"""
print(f"情境:{scenario['context']}") print("=== 初始化双模型对话系统 ===")
print(f"输入:{scenario['input']}")
dialogue = generator.generate_character_dialogue( # 初始化角色1的模型
scenario["character"], print(f"\n初始化角色1: {self.character1_config['name']}")
scenario["context"], char1_lora_path = self.character1_config.get('lora_path') or self.character1_config.get('lora_model_path')
scenario["input"] self.character1_generator = NPCDialogueGenerator(
) self.base_model_path,
print(f"回复:{dialogue}") char1_lora_path,
{self.character1_config['name']: self.character1_config['character_data']}
# 测试角色间对话
print("\n=== 角色间对话测试 ===")
conversation = generator.generate_dialogue_conversation(
"克莱恩", "塔利姆", "最近遇到的神秘事件", turns=4
) )
for turn in conversation: # 初始化角色2的模型
print(f"{turn['speaker']}{turn['dialogue']}") print(f"\n初始化角色2: {self.character2_config['name']}")
char2_lora_path = self.character2_config.get('lora_path') or self.character2_config.get('lora_model_path')
self.character2_generator = NPCDialogueGenerator(
self.base_model_path,
char2_lora_path,
{self.character2_config['name']: self.character2_config['character_data']}
)
# 交互式对话模式 print("✓ 双模型对话系统初始化完成")
print("\n=== 交互式对话模式 ===")
print("输入格式:角色名 上下文 用户输入")
print("例如:克莱恩 在俱乐部 请给我一些建议")
print("输入'quit'退出")
while True: def generate_dual_character_dialogue(self,
try: character_name: str,
user_command = input("\n请输入指令: ").strip() context: str = "",
if user_command.lower() == 'quit': temperature: float = 0.8,
break max_new_tokens: int = 150,
dialogue_history: str = "",
history_context_count: int = 3) -> str:
"""
生成指定角色的对话使用对应的模型
parts = user_command.split(' ', 2) Args:
if len(parts) < 2: character_name: 角色名称
print("格式错误,请使用:角色名 上下文 [用户输入]") context: 对话上下文
continue user_input: 用户输入
temperature: 采样温度
max_new_tokens: 最大生成token数
character = parts[0] Returns:
context = parts[1] 生成的对话内容
user_input = parts[2] if len(parts) > 2 else "" """
if character_name == self.character1_config['name']:
return self.character1_generator.generate_character_dialogue(
character_name, context, temperature, max_new_tokens, dialogue_history = dialogue_history, history_context_count=history_context_count,
)
elif character_name == self.character2_config['name']:
return self.character2_generator.generate_character_dialogue(
character_name, context, temperature, max_new_tokens, dialogue_history = dialogue_history, history_context_count=history_context_count,
)
else:
raise ValueError(f"Unknown character: {character_name}")
if character not in generator.list_available_characters(): def run_dual_character_conversation(self,
print(f"未知角色:{character}") topic: str = "",
print(f"可用角色:{', '.join(generator.list_available_characters())}") turn_index: int = 4,
continue context: str = "",
dialogue_history: str = "",
history_context_count: int = 3,
temperature: float = 0.8,
max_new_tokens: int = 150) -> List[Dict]:
"""
运行双角色对话
dialogue = generator.generate_character_dialogue( Args:
character, context, user_input topic: 对话主题
) turns: 对话轮数
print(f"\n{character}{dialogue}") context: 额外上下文
temperature: 采样温度
max_new_tokens: 最大生成token数
except KeyboardInterrupt: Returns:
break 对话列表
except Exception as e: """
print(f"生成对话时出错:{e}") conversation = []
char1_name = self.character1_config['name']
char2_name = self.character2_config['name']
print("\n对话生成器已退出") # 构建完整上下文
full_context = f"现在{char1_name}{char2_name}在讨论关于{topic}的话题。{context}"
if __name__ == '__main__': # print(f"\n=== 开始双角色对话 ===")
main() # print(f"主题: {topic}")
# print(f"角色: {char1_name} vs {char2_name}")
# print("-" * 50)
if turn_index % 2 == 0:
# 角色1说话
speaker = char1_name
# if turn_index == 0:
# user_input = f"开始和{char2_name}讨论{topic}这个话题。"
# else:
# last_dialogue = conversation[-1]["dialogue"]
# user_input = f"{char2_name}刚才说:\"{last_dialogue}\"。请回应。"
else:
# 角色2说话
speaker = char2_name
# last_dialogue = conversation[-1]["dialogue"]
# user_input = f"{char1_name}刚才说:\"{last_dialogue}\"。请回应。"
print(f"\n[第{turn_index+1}轮] {speaker}正在思考...")
# 使用对应角色的模型生成对话
dialogue = self.generate_dual_character_dialogue(
speaker, full_context, temperature, max_new_tokens, dialogue_history, history_context_count
)
conversation.append({
"turn": turn_index + 1,
"speaker": speaker,
"dialogue": dialogue,
"context_used": full_context[:100] + "..." if len(full_context) > 100 else full_context
})
print(f"{speaker}: {dialogue}")
return conversation
def get_character_info(self, character_name: str) -> Dict:
"""获取角色信息"""
if character_name == self.character1_config['name']:
return self.character1_generator.get_character_info(character_name)
elif character_name == self.character2_config['name']:
return self.character2_generator.get_character_info(character_name)
else:
return {}
def list_characters(self) -> List[str]:
"""列出两个角色名称"""
return [self.character1_config['name'], self.character2_config['name']]
# def main():
# """测试对话生成器"""
# # 配置路径
# base_model_path = '/mnt/g/Project02/AITrain/Qwen/Qwen3-8B-AWQ'
# lora_model_path = './output/NPC_Dialogue_LoRA/final_model' # 如果没有训练LoRA设为None
# # 检查LoRA模型是否存在
# if not os.path.exists(lora_model_path):
# print("LoRA模型不存在使用基础模型")
# lora_model_path = None
# # 创建对话生成器
# generator = NPCDialogueGenerator(base_model_path, lora_model_path)
# print("=== 游戏NPC角色对话生成器 ===")
# print(f"可用角色:{', '.join(generator.list_available_characters())}")
# # 测试单个角色对话生成
# print("\n=== 单角色对话测试 ===")
# test_scenarios = [
# {
# "character": "克莱恩",
# "context": "玩家向你咨询神秘学知识",
# "input": "请告诉我一些关于灵界的注意事项。"
# },
# {
# "character": "阿兹克",
# "context": "学生遇到了修炼瓶颈",
# "input": "导师,我在修炼中遇到了困难。"
# },
# {
# "character": "塔利姆",
# "context": "在俱乐部偶遇老朋友",
# "input": "好久不见,最近怎么样?"
# }
# ]
# for scenario in test_scenarios:
# print(f"\n--- {scenario['character']} ---")
# print(f"情境:{scenario['context']}")
# print(f"输入:{scenario['input']}")
# dialogue = generator.generate_character_dialogue(
# scenario["character"],
# scenario["context"],
# scenario["input"]
# )
# print(f"回复:{dialogue}")
# # 测试角色间对话
# print("\n=== 角色间对话测试 ===")
# conversation = generator.generate_dialogue_conversation(
# "克莱恩", "塔利姆", "最近遇到的神秘事件", turns=4
# )
# for turn in conversation:
# print(f"{turn['speaker']}{turn['dialogue']}")
# # 交互式对话模式
# print("\n=== 交互式对话模式 ===")
# print("输入格式:角色名 上下文 用户输入")
# print("例如:克莱恩 在俱乐部 请给我一些建议")
# print("输入'quit'退出")
# while True:
# try:
# user_command = input("\n请输入指令: ").strip()
# if user_command.lower() == 'quit':
# break
# parts = user_command.split(' ', 2)
# if len(parts) < 2:
# print("格式错误,请使用:角色名 上下文 [用户输入]")
# continue
# character = parts[0]
# context = parts[1]
# user_input = parts[2] if len(parts) > 2 else ""
# if character not in generator.list_available_characters():
# print(f"未知角色:{character}")
# print(f"可用角色:{', '.join(generator.list_available_characters())}")
# continue
# dialogue = generator.generate_character_dialogue(
# character, context, user_input
# )
# print(f"\n{character}{dialogue}")
# except KeyboardInterrupt:
# break
# except Exception as e:
# print(f"生成对话时出错:{e}")
# print("\n对话生成器已退出")
# if __name__ == '__main__':
# main()

Binary file not shown.

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

Binary file not shown.

View File

@ -16,5 +16,6 @@ datasets==3.5.1
peft==0.15.2 peft==0.15.2
autoawq autoawq
PyPDF2 PyPDF2
sentence-transformersfaiss-cpu sentence-transformers
faiss-cpu
fitz fitz

3748
AITrain/test.jsonl Normal file

File diff suppressed because it is too large Load Diff

Binary file not shown.