diff --git a/AITrain/conversation_data/conversations.db b/AITrain/conversation_data/conversations.db index a4ed857..eb52c51 100644 Binary files a/AITrain/conversation_data/conversations.db and b/AITrain/conversation_data/conversations.db differ diff --git a/AITrain/dual_ai_dialogue_system.py b/AITrain/dual_ai_dialogue_system.py index 5906a24..4d50784 100644 --- a/AITrain/dual_ai_dialogue_system.py +++ b/AITrain/dual_ai_dialogue_system.py @@ -391,15 +391,16 @@ class DualAIDialogueEngine: """双AI对话引擎""" def __init__(self, knowledge_base: RAGKnowledgeBase, conversation_manager: ConversationManager, llm_generator, - enable_scoring: bool = True, base_model_path: str = None): + enable_scoring: bool = True, base_model_path: str = None, use_manual_scoring: bool = False): self.kb = knowledge_base self.conv_mgr = conversation_manager self.llm_generator = llm_generator self.enable_scoring = enable_scoring + self.use_manual_scoring = use_manual_scoring self.scorer = None # 初始化评分器 - if enable_scoring and base_model_path: + if enable_scoring and base_model_path and not use_manual_scoring: try: from dialogue_scorer import DialogueAIScorer print("正在初始化对话评分系统...") @@ -412,6 +413,74 @@ class DualAIDialogueEngine: except Exception as e: print(f"⚠ 对话评分系统初始化失败: {e}") self.enable_scoring = False + + def _manual_score_dialogue_turn(self, dialogue_content: str, speaker: str, dialogue_history: List[DialogueTurn]) -> Tuple[float, str, str]: + """人工打分对话轮次 + + Args: + dialogue_content: 对话内容 + speaker: 说话者 + dialogue_history: 对话历史 + + Returns: + tuple: (总分, 详细分数JSON, 反馈意见) + """ + print("\n" + "="*60) + print("人工对话评分") + print("="*60) + # print(f"说话者: {speaker}") + # print(f"对话内容: {dialogue_content}") + print("-" * 40) + + # # 显示最近的对话历史作为参考 + # if dialogue_history: + # print("最近对话历史:") + # for i, turn in enumerate(dialogue_history[-3:], 1): + # print(f" {i}. {turn.speaker}: {turn.content[:100]}...") + # print("-" * 40) + + # 五个评分维度 + dimensions = { + 'coherence': '逻辑连贯性 (1-10)', + 'character_consistency': '角色一致性 (1-10)', + 'naturalness': '自然流畅度 (1-10)', + 'information_density': '信息密度 (1-10)', + 'creativity': '创意新颖度 (1-10)' + } + + scores = {} + print("\n请为以下维度打分 (输入1-10的分数,直接回车跳过该维度):") + + for key, desc in dimensions.items(): + while True: + try: + score_input = input(f"{desc}: ").strip() + if score_input == "": + scores[key] = 7.0 # 默认分数 + break + + score = float(score_input) + if 1 <= score <= 10: + scores[key] = score + break + else: + print("请输入1-10之间的分数") + except ValueError: + print("请输入有效的数字") + + # 计算总分 + overall_score = sum(scores.values()) / len(scores) + + # 获取反馈意见 + print("\n请输入对该对话的评价和建议 (可选,直接回车跳过):") + feedback = input("反馈意见: ").strip() + if not feedback: + feedback = f"人工评分完成,总分: {overall_score:.1f}" + + print(f"\n✓ 评分完成 - 总分: {overall_score:.1f}") + print("="*60) + + return overall_score, json.dumps(scores), feedback def score_dialogue_turn(self, dialogue_content: str, speaker: str, dialogue_history: List[DialogueTurn]) -> Tuple[float, str, str]: """对单条对话进行评分 @@ -424,8 +493,16 @@ class DualAIDialogueEngine: Returns: tuple: (总分, 详细分数JSON, 反馈意见) """ - if not self.enable_scoring or not self.scorer: + if not self.enable_scoring: return 0.0, "{}", "评分系统未启用" + + # 人工打分模式 + if self.use_manual_scoring: + return self._manual_score_dialogue_turn(dialogue_content, speaker, dialogue_history) + + # AI自动打分模式 + if not self.scorer: + return 0.0, "{}", "AI评分器未初始化" try: # 获取角色数据 diff --git a/AITrain/main_controller.py b/AITrain/main_controller.py index 2e98479..90f0770 100644 --- a/AITrain/main_controller.py +++ b/AITrain/main_controller.py @@ -121,7 +121,7 @@ def show_character_info(): except Exception as e: print(f"✗ 读取角色文件失败: {char_file} - {e}") -def run_dialogue_system(enableScore: bool): +def run_dialogue_system(enableScore: bool, useManualScoring: bool = False): """运行双AI对话系统""" print("\n" + "="*60) print("启动双AI角色对话系统") @@ -192,7 +192,8 @@ def run_dialogue_system(enableScore: bool): conv_mgr, dual_generator, enable_scoring=enableScore, - base_model_path=base_model_path + base_model_path=base_model_path, + use_manual_scoring=useManualScoring ) # 创建对话会话 @@ -261,176 +262,6 @@ def run_dialogue_system(enableScore: bool): traceback.print_exc() -def analyze_model_performance(): - """分析模型性能""" - print("\n" + "="*60) - print("模型性能分析") - print("="*60) - - try: - from dual_ai_dialogue_system import ConversationManager - import sqlite3 - import json - from datetime import datetime, timedelta - - conv_mgr = ConversationManager("./conversation_data/conversations.db") - - with sqlite3.connect(conv_mgr.db_path) as conn: - print("\n1. 总体性能趋势分析:") - - # 按时间段分析性能趋势 - cursor = conn.execute(""" - SELECT - DATE(timestamp) as date, - COUNT(*) as dialogue_count, - AVG(dialogue_score) as avg_score, - AVG(CASE WHEN dialogue_score >= 8.0 THEN 1.0 ELSE 0.0 END) as high_quality_rate - FROM dialogue_turns - WHERE dialogue_score > 0 - AND timestamp >= datetime('now', '-7 days') - GROUP BY DATE(timestamp) - ORDER BY date DESC - """) - - trend_data = cursor.fetchall() - if trend_data: - print(f" 最近7天性能趋势:") - for date, count, avg_score, hq_rate in trend_data: - print(f" {date}: 平均{avg_score:.2f}分 ({count}轮对话, {hq_rate*100:.1f}%高质量)") - else: - print(" 暂无足够数据进行趋势分析") - - print("\n2. 维度问题分析:") - - # 分析各维度的问题 - cursor = conn.execute(""" - SELECT score_details - FROM dialogue_turns - WHERE dialogue_score > 0 AND score_details != '{}' - ORDER BY timestamp DESC - LIMIT 100 - """) - - dimension_scores = { - 'coherence': [], - 'character_consistency': [], - 'naturalness': [], - 'information_density': [], - 'creativity': [] - } - - for (score_details,) in cursor.fetchall(): - try: - scores = json.loads(score_details) - for dim, score in scores.items(): - if dim in dimension_scores: - dimension_scores[dim].append(float(score)) - except: - continue - - dimension_names = { - 'coherence': '连贯性', - 'character_consistency': '角色一致性', - 'naturalness': '自然度', - 'information_density': '信息密度', - 'creativity': '创意性' - } - - weak_dimensions = [] - for dim, scores in dimension_scores.items(): - if scores: - avg_score = sum(scores) / len(scores) - print(f" {dimension_names[dim]}: 平均{avg_score:.2f}分 ({len(scores)}个样本)") - if avg_score < 7.0: - weak_dimensions.append(dim) - - if weak_dimensions: - print(f"\n ⚠ 发现薄弱维度: {[dimension_names[d] for d in weak_dimensions]}") - print(" 建议进行针对性优化训练") - - print("\n3. 角色表现分析:") - - # 分析不同角色的表现 - cursor = conn.execute(""" - SELECT - speaker, - COUNT(*) as dialogue_count, - AVG(dialogue_score) as avg_score, - MIN(dialogue_score) as min_score, - MAX(dialogue_score) as max_score, - AVG(CASE WHEN dialogue_score >= 8.0 THEN 1.0 ELSE 0.0 END) as high_quality_rate - FROM dialogue_turns - WHERE dialogue_score > 0 - GROUP BY speaker - ORDER BY avg_score DESC - """) - - character_performance = cursor.fetchall() - if character_performance: - print(" 角色表现排名:") - for i, (speaker, count, avg, min_s, max_s, hq_rate) in enumerate(character_performance, 1): - status = "✓" if avg >= 7.5 else "⚠" if avg >= 6.5 else "✗" - print(f" {i}. {speaker} {status}") - print(f" 平均{avg:.2f}分 (范围{min_s:.1f}-{max_s:.1f}, {hq_rate*100:.1f}%高质量, {count}轮)") - - print("\n4. 问题模式识别:") - - # 识别低分对话的常见问题 - cursor = conn.execute(""" - SELECT content, dialogue_score, score_feedback - FROM dialogue_turns - WHERE dialogue_score > 0 AND dialogue_score < 6.0 - ORDER BY dialogue_score ASC - LIMIT 5 - """) - - low_score_examples = cursor.fetchall() - if low_score_examples: - print(" 低分对话示例:") - for i, (content, score, feedback) in enumerate(low_score_examples, 1): - print(f" {i}. 分数{score:.1f}: {content[:50]}...") - if feedback: - print(f" 问题: {feedback[:80]}...") - else: - print(" 暂无低分对话样本") - - print("\n5. 优化建议:") - - # 生成优化建议 - suggestions = [] - - if weak_dimensions: - if 'character_consistency' in weak_dimensions: - suggestions.append("• 加强角色设定训练,增加角色特征描述的权重") - if 'creativity' in weak_dimensions: - suggestions.append("• 增加创意性训练数据,提高对话的趣味性") - if 'coherence' in weak_dimensions: - suggestions.append("• 优化上下文理解,加强对话逻辑连贯性") - if 'naturalness' in weak_dimensions: - suggestions.append("• 增加自然语言训练,改善表达流畅度") - if 'information_density' in weak_dimensions: - suggestions.append("• 优化信息组织,避免冗余表达") - - # 检查是否需要数据收集 - cursor = conn.execute("SELECT COUNT(*) FROM dialogue_turns WHERE dialogue_score > 0") - total_scored = cursor.fetchone()[0] - - if total_scored < 50: - suggestions.append("• 需要收集更多评分数据以进行准确分析") - - if total_scored >= 100: - suggestions.append("• 数据量充足,建议开始模型迭代优化") - - if suggestions: - for suggestion in suggestions: - print(f" {suggestion}") - else: - print(" 当前性能表现良好,继续保持!") - - except Exception as e: - print(f"✗ 性能分析失败: {e}") - import traceback - traceback.print_exc() def generate_training_dataset(): """生成训练数据集""" @@ -1271,18 +1102,19 @@ def main(): print("主菜单 - 请选择操作:") print("1. 处理PDF世界观文档 (转换为RAG格式)") print("2. 查看角色设定信息") - print("3. 启动双AI对话系统 (开启ai打分)") - print("4. 启动双AI对话系统 (关闭ai打分)") - print("5. 系统状态检查") - print("6. 查看对话评分统计") - print("7. 模型性能分析与优化") - print("8. 生成训练数据集") - print("9. 模型迭代优化") - print("10. 查看使用说明") + print("3. 启动双AI对话系统 (开启AI打分)") + print("4. 启动双AI对话系统 (关闭AI打分)") + print("5. 启动双AI对话系统 (开启人工打分)") + print("6. 系统状态检查") + print("7. 查看对话评分统计") + print("8. 模型性能分析与优化") + print("9. 生成训练数据集") + print("10. 模型迭代优化") + print("11. 查看使用说明") print("0. 退出") print("="*50) - choice = input("请输入选择 (0-10): ").strip() + choice = input("请输入选择 (0-11): ").strip() if choice == '0': print("\n感谢使用双AI角色对话系统!") @@ -1301,21 +1133,25 @@ def main(): run_dialogue_system(enableScore = False) elif choice == '5': - show_system_status() + run_dialogue_system(enableScore = True, useManualScoring = True) elif choice == '6': - show_scoring_statistics() + show_system_status() elif choice == '7': - analyze_model_performance() + show_scoring_statistics() elif choice == '8': - generate_training_dataset() + # 模型性能分析与优化 - 待实现 + print("模型性能分析与优化功能开发中...") elif choice == '9': - run_model_optimization() + generate_training_dataset() elif choice == '10': + run_model_optimization() + + elif choice == '11': show_usage_guide() else: