456 lines
16 KiB
Python
456 lines
16 KiB
Python
#!/usr/bin/env python
|
||
# -*- coding: utf-8 -*-
|
||
'''
|
||
双AI角色对话系统主控制程序
|
||
完整的工作流程:PDF处理 -> 角色加载 -> RAG对话 -> 历史记录
|
||
'''
|
||
|
||
import os
|
||
import sys
|
||
import shutil
|
||
from typing import List, Dict
|
||
import json
|
||
|
||
def check_dependencies():
|
||
"""检查依赖库"""
|
||
missing_deps = []
|
||
|
||
try:
|
||
import PyPDF2
|
||
except ImportError:
|
||
missing_deps.append("PyPDF2")
|
||
|
||
try:
|
||
import pymupdf
|
||
print("✓ pymupdf 可用")
|
||
except ImportError:
|
||
print("⚠ pymupdf 不可用,将使用 PyPDF2")
|
||
|
||
try:
|
||
import sentence_transformers
|
||
import faiss
|
||
print("✓ 向量化功能可用")
|
||
except ImportError:
|
||
print("⚠ 向量化功能不可用,将使用文本匹配")
|
||
|
||
if missing_deps:
|
||
print(f"✗ 缺少依赖库: {', '.join(missing_deps)}")
|
||
print("请运行: pip install PyPDF2 sentence-transformers faiss-cpu")
|
||
return False
|
||
|
||
return True
|
||
|
||
def setup_directories():
|
||
"""设置项目目录结构"""
|
||
directories = [
|
||
"./knowledge_base",
|
||
"./characters",
|
||
"./worldview",
|
||
"./rag_knowledge",
|
||
"./conversation_data"
|
||
]
|
||
|
||
for dir_path in directories:
|
||
os.makedirs(dir_path, exist_ok=True)
|
||
print(f"✓ 目录就绪: {dir_path}")
|
||
|
||
def copy_demo_files():
|
||
"""复制演示文档到知识库目录"""
|
||
file_mappings = [
|
||
("./worldview/worldview_template_coc.json", "./knowledge_base/worldview_template_coc.json"),
|
||
("./characters/character_template_detective.json", "./knowledge_base/character_template_detective.json"),
|
||
("./characters/character_template_professor.json", "./knowledge_base/character_template_professor.json")
|
||
]
|
||
|
||
for source, target in file_mappings:
|
||
if os.path.exists(source):
|
||
shutil.copy2(source, target)
|
||
print(f"✓ 复制文档: {os.path.basename(target)}")
|
||
|
||
def process_pdf_workflow():
|
||
"""PDF处理工作流"""
|
||
print("\n" + "="*60)
|
||
print("PDF世界观文档处理")
|
||
print("="*60)
|
||
|
||
from pdf_to_rag_processor import PDFToRAGProcessor
|
||
|
||
pdf_path = input("请输入PDF文件路径 (例: ./coc.pdf): ").strip()
|
||
|
||
if not os.path.exists(pdf_path):
|
||
print(f"✗ 文件不存在: {pdf_path}")
|
||
return False
|
||
|
||
try:
|
||
processor = PDFToRAGProcessor()
|
||
result = processor.process_pdf_to_rag(pdf_path, "./rag_knowledge")
|
||
|
||
print(f"\n✓ PDF处理完成!")
|
||
print(f" - 文档块数: {result['chunks_count']}")
|
||
print(f" - 概念数: {result['concepts_count']}")
|
||
print(f" - 向量索引: {'启用' if result['vector_enabled'] else '未启用'}")
|
||
|
||
return True
|
||
|
||
except Exception as e:
|
||
print(f"✗ PDF处理失败: {e}")
|
||
return False
|
||
|
||
def show_character_info():
|
||
"""显示角色信息"""
|
||
print("\n" + "="*60)
|
||
print("角色设定信息")
|
||
print("="*60)
|
||
|
||
knowledge_dir = "./knowledge_base"
|
||
character_files = [f for f in os.listdir(knowledge_dir) if f.startswith('character') and f.endswith('.json')]
|
||
|
||
for char_file in character_files:
|
||
try:
|
||
with open(os.path.join(knowledge_dir, char_file), 'r', encoding='utf-8') as f:
|
||
char_data = json.load(f)
|
||
|
||
name = char_data.get('character_name', '未知')
|
||
occupation = char_data.get('basic_info', {}).get('occupation', '未知')
|
||
traits = char_data.get('personality', {}).get('core_traits', [])
|
||
|
||
print(f"\n角色: {name}")
|
||
print(f" 职业: {occupation}")
|
||
print(f" 特点: {', '.join(traits[:3])}")
|
||
|
||
except Exception as e:
|
||
print(f"✗ 读取角色文件失败: {char_file} - {e}")
|
||
|
||
def run_dialogue_system():
|
||
"""运行双AI对话系统"""
|
||
print("\n" + "="*60)
|
||
print("启动双AI角色对话系统")
|
||
print("="*60)
|
||
|
||
try:
|
||
|
||
# 直接启动双模型对话
|
||
print("\n正在初始化双模型对话系统...")
|
||
|
||
from dual_ai_dialogue_system import RAGKnowledgeBase, ConversationManager, DualAIDialogueEngine
|
||
from npc_dialogue_generator import DualModelDialogueGenerator
|
||
|
||
# 初始化组件
|
||
kb = RAGKnowledgeBase("./knowledge_base")
|
||
conv_mgr = ConversationManager("./conversation_data/conversations.db")
|
||
|
||
# 检查模型路径
|
||
base_model_path = '/mnt/g/Project02/AITrain/Qwen/Qwen3-4B'
|
||
lora_model_path = './output/NPC_Dialogue_LoRA/final_model'
|
||
|
||
if not os.path.exists(base_model_path):
|
||
print(f"✗ 基础模型路径不存在: {base_model_path}")
|
||
print("请修改 main_controller.py 中的模型路径")
|
||
return
|
||
|
||
if not os.path.exists(lora_model_path):
|
||
lora_model_path = None
|
||
print("⚠ LoRA模型不存在,使用基础模型")
|
||
|
||
# 检查角色数据
|
||
if not hasattr(kb, 'character_data') or len(kb.character_data) < 2:
|
||
print("✗ 角色数据不足,无法创建双模型对话系统")
|
||
print("请确保knowledge_base目录中有至少两个角色文件")
|
||
return
|
||
|
||
# 获取前两个角色
|
||
character_names = list(kb.character_data.keys())[:2]
|
||
char1_name = character_names[0]
|
||
char2_name = character_names[1]
|
||
|
||
print(f"✓ 使用角色: {char1_name} 和 {char2_name}")
|
||
|
||
# 配置两个角色的模型
|
||
character1_config = {
|
||
"name": char1_name,
|
||
"lora_path": lora_model_path,
|
||
"character_data": kb.character_data[char1_name]
|
||
}
|
||
|
||
character2_config = {
|
||
"name": char2_name,
|
||
"lora_path": lora_model_path,
|
||
"character_data": kb.character_data[char2_name]
|
||
}
|
||
|
||
# 创建双模型对话生成器
|
||
print("正在初始化双模型对话生成器...")
|
||
dual_generator = DualModelDialogueGenerator(
|
||
base_model_path,
|
||
character1_config,
|
||
character2_config
|
||
)
|
||
|
||
# 创建对话引擎
|
||
dialogue_engine = DualAIDialogueEngine(kb, conv_mgr, dual_generator)
|
||
|
||
# 创建对话会话
|
||
characters = [char1_name, char2_name]
|
||
worldview = kb.worldview_data.get('worldview_name', '未知世界观') if kb.worldview_data else '未知世界观'
|
||
|
||
session_id = conv_mgr.create_session(characters, worldview)
|
||
print(f"✓ 创建对话会话: {session_id}")
|
||
|
||
# 交互式对话循环
|
||
print(f"\n=== 双AI模型对话系统 ===")
|
||
print(f"角色: {char1_name} vs {char2_name}")
|
||
print(f"世界观: {worldview}")
|
||
print("输入 'quit' 退出对话")
|
||
print("-" * 50)
|
||
|
||
while True:
|
||
try:
|
||
# 获取用户输入
|
||
user_input = input("\n请输入对话主题或指令: ").strip()
|
||
|
||
if user_input.lower() == 'quit':
|
||
print("退出双AI对话系统")
|
||
break
|
||
|
||
if not user_input:
|
||
print("请输入有效的对话主题")
|
||
continue
|
||
|
||
# 询问对话轮数
|
||
turns_input = input("请输入对话轮数 (默认4): ").strip()
|
||
turns = int(turns_input) if turns_input.isdigit() else 4
|
||
|
||
# 询问历史上下文设置
|
||
history_input = input("使用历史对话轮数 (默认10): ").strip()
|
||
history_count = int(history_input) if history_input.isdigit() else 10
|
||
|
||
context_input = input("使用上下文信息数量 (默认5): ").strip()
|
||
context_info_count = int(context_input) if context_input.isdigit() else 5
|
||
|
||
print(f"\n开始对话 - 主题: {user_input}")
|
||
print(f"轮数: {turns}, 历史: {history_count}, 上下文: {context_info_count}")
|
||
print("-" * 50)
|
||
|
||
# 运行双模型对话
|
||
dialogue_engine.run_dual_model_conversation(
|
||
session_id, user_input, turns, history_count, context_info_count
|
||
)
|
||
|
||
print("-" * 50)
|
||
print("对话完成!")
|
||
|
||
except KeyboardInterrupt:
|
||
print("\n\n用户中断对话")
|
||
break
|
||
except Exception as e:
|
||
print(f"对话过程中出现错误: {e}")
|
||
import traceback
|
||
traceback.print_exc()
|
||
|
||
|
||
|
||
except Exception as e:
|
||
print(f"✗ 对话系统启动失败: {e}")
|
||
import traceback
|
||
traceback.print_exc()
|
||
|
||
def create_demo_scenario():
|
||
"""创建演示场景"""
|
||
print("\n创建演示对话场景...")
|
||
|
||
try:
|
||
from dual_ai_dialogue_system import RAGKnowledgeBase, ConversationManager, DualAIDialogueEngine
|
||
from npc_dialogue_generator import NPCDialogueGenerator
|
||
|
||
# 初始化组件
|
||
kb = RAGKnowledgeBase("./knowledge_base")
|
||
conv_mgr = ConversationManager("./conversation_data/demo_conversations.db")
|
||
|
||
# 检查模型路径
|
||
base_model_path = '/mnt/g/Project02/AITrain/Qwen/Qwen3-4B'
|
||
lora_model_path = './output/NPC_Dialogue_LoRA/final_model'
|
||
|
||
if not os.path.exists(base_model_path):
|
||
print(f"✗ 基础模型路径不存在: {base_model_path}")
|
||
print("请修改 main_controller.py 中的模型路径")
|
||
return
|
||
|
||
if not os.path.exists(lora_model_path):
|
||
lora_model_path = None
|
||
print("⚠ LoRA模型不存在,使用基础模型")
|
||
|
||
llm_generator = NPCDialogueGenerator(base_model_path, lora_model_path, kb.character_data)
|
||
dialogue_engine = DualAIDialogueEngine(kb, conv_mgr, llm_generator)
|
||
|
||
# 创建演示对话
|
||
characters = ["维多利亚·布莱克伍德", "阿奇博尔德·韦恩"]
|
||
worldview = "克苏鲁的呼唤"
|
||
|
||
session_id = conv_mgr.create_session(characters, worldview)
|
||
print(f"✓ 创建演示会话: {session_id}")
|
||
|
||
# 运行几轮对话
|
||
topic = "最近发生的神秘事件"
|
||
print(f"\n开始演示对话 - 主题: {topic}")
|
||
print("-" * 40)
|
||
|
||
# 演示不同的历史上下文设置
|
||
# print("演示1: 使用默认上下文设置(历史3轮,信息2个)")
|
||
# dialogue_engine.run_conversation_turn(session_id, characters, 6, topic)
|
||
|
||
|
||
session_id = conv_mgr.create_session(characters, worldview)
|
||
print(f"✓ 创建演示会话: {session_id}")
|
||
print("\n演示3: 使用最少历史上下文(历史1轮,信息1个)")
|
||
dialogue_engine.run_conversation_turn(session_id, characters, 6, topic, 1, 10)
|
||
|
||
session_id = conv_mgr.create_session(characters, worldview)
|
||
print(f"✓ 创建演示会话: {session_id}")
|
||
print("\n演示2: 使用更多历史上下文(历史10轮,信息10个)")
|
||
dialogue_engine.run_conversation_turn(session_id, characters, 6, topic, 5, 10)
|
||
|
||
print(f"\n✓ 演示完成!会话ID: {session_id}")
|
||
print("你可以通过主对话系统继续这个对话")
|
||
|
||
except Exception as e:
|
||
print(f"✗ 演示场景创建失败: {e}")
|
||
import traceback
|
||
traceback.print_exc()
|
||
|
||
def show_system_status():
|
||
"""显示系统状态"""
|
||
print("\n" + "="*60)
|
||
print("系统状态检查")
|
||
print("="*60)
|
||
|
||
# 检查文件
|
||
files_to_check = [
|
||
("./knowledge_base/worldview_template_coc.json", "世界观模板"),
|
||
("./knowledge_base/character_template_detective.json", "侦探角色"),
|
||
("./knowledge_base/character_template_professor.json", "教授角色"),
|
||
("./pdf_to_rag_processor.py", "PDF处理器"),
|
||
("./dual_ai_dialogue_system.py", "对话系统"),
|
||
("./npc_dialogue_generator.py", "NPC生成器")
|
||
]
|
||
|
||
print("\n文件检查:")
|
||
for file_path, description in files_to_check:
|
||
if os.path.exists(file_path):
|
||
print(f"✓ {description}: {file_path}")
|
||
else:
|
||
print(f"✗ {description}: {file_path} (不存在)")
|
||
|
||
# 检查目录
|
||
print("\n目录检查:")
|
||
directories = ["./knowledge_base", "./rag_knowledge", "./conversation_data"]
|
||
for dir_path in directories:
|
||
if os.path.exists(dir_path):
|
||
file_count = len([f for f in os.listdir(dir_path) if os.path.isfile(os.path.join(dir_path, f))])
|
||
print(f"✓ {dir_path}: {file_count} 个文件")
|
||
else:
|
||
print(f"✗ {dir_path}: 不存在")
|
||
|
||
# 检查对话会话
|
||
try:
|
||
from dual_ai_dialogue_system import ConversationManager
|
||
conv_mgr = ConversationManager("./conversation_data/conversations.db")
|
||
sessions = conv_mgr.list_sessions()
|
||
print(f"\n✓ 对话会话: {len(sessions)} 个")
|
||
except Exception as e:
|
||
print(f"\n✗ 对话会话检查失败: {e}")
|
||
|
||
def main():
|
||
"""主控制程序"""
|
||
print("="*70)
|
||
print(" 双AI角色对话系统 - 主控制程序")
|
||
print(" 基于RAG的世界观增强对话引擎")
|
||
print("="*70)
|
||
|
||
# 检查依赖
|
||
if not check_dependencies():
|
||
return
|
||
|
||
# 设置目录
|
||
# setup_directories()
|
||
# copy_demo_files()
|
||
|
||
while True:
|
||
print("\n" + "="*50)
|
||
print("主菜单 - 请选择操作:")
|
||
print("1. 处理PDF世界观文档 (转换为RAG格式)")
|
||
print("2. 查看角色设定信息")
|
||
print("3. 启动双AI对话系统 (支持双模型对话)")
|
||
print("4. 创建演示对话场景")
|
||
print("5. 系统状态检查")
|
||
print("6. 查看使用说明")
|
||
print("0. 退出")
|
||
print("="*50)
|
||
|
||
choice = input("请输入选择 (0-6): ").strip()
|
||
|
||
if choice == '0':
|
||
print("\n感谢使用双AI角色对话系统!")
|
||
break
|
||
|
||
elif choice == '1':
|
||
process_pdf_workflow()
|
||
|
||
elif choice == '2':
|
||
show_character_info()
|
||
|
||
elif choice == '3':
|
||
run_dialogue_system()
|
||
|
||
elif choice == '4':
|
||
create_demo_scenario()
|
||
|
||
elif choice == '5':
|
||
show_system_status()
|
||
|
||
elif choice == '6':
|
||
show_usage_guide()
|
||
|
||
else:
|
||
print("❌ 无效选择,请重新输入")
|
||
|
||
def show_usage_guide():
|
||
"""显示使用说明"""
|
||
print("\n" + "="*60)
|
||
print("系统使用说明")
|
||
print("="*60)
|
||
|
||
guide = """
|
||
🚀 快速开始:
|
||
1. 首次使用建议先运行"创建演示对话场景"
|
||
2. 如有PDF世界观文档,选择"处理PDF世界观文档"
|
||
3. 通过"启动双AI对话系统"开始角色对话
|
||
|
||
📁 文档格式说明:
|
||
- 世界观文档: worldview_template_coc.json (参考COC设定)
|
||
- 角色设定: character_template_*.json (包含详细人设)
|
||
|
||
🔧 系统功能:
|
||
- PDF自动转换为RAG知识库
|
||
- 基于向量相似度的上下文检索
|
||
- 持久化对话历史存储
|
||
- 角色设定一致性保持
|
||
|
||
📝 自定义角色:
|
||
1. 参考 character_template_*.json 格式
|
||
2. 保存到 knowledge_base/ 目录
|
||
3. 重启对话系统加载新角色
|
||
|
||
💾 对话数据:
|
||
- 历史对话保存在 conversation_data/ 目录
|
||
- 支持会话恢复和历史查看
|
||
- 自动记录使用的上下文信息
|
||
|
||
⚠️ 注意事项:
|
||
- 确保模型路径正确设置
|
||
- 首次运行需要下载向量化模型
|
||
- PDF处理需要足够内存
|
||
"""
|
||
print(guide)
|
||
|
||
if __name__ == '__main__':
|
||
main() |