From aefda38d1235332cd1e4f73ae3bc71d7abac45c0 Mon Sep 17 00:00:00 2001 From: 997146918 <997146918@qq.com> Date: Sat, 23 Aug 2025 18:13:45 +0800 Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E4=BA=BA=E5=B7=A5=E6=89=93?= =?UTF-8?q?=E5=88=86=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- AITrain/conversation_data/conversations.db | Bin 40960 -> 57344 bytes AITrain/dual_ai_dialogue_system.py | 83 +++++++- AITrain/main_controller.py | 208 +++------------------ 3 files changed, 102 insertions(+), 189 deletions(-) diff --git a/AITrain/conversation_data/conversations.db b/AITrain/conversation_data/conversations.db index a4ed8570963599e7c2f8aa3d1d78db2931fc2fed..eb52c51b84135e1ec8c8bbc0253b6b5b82c43540 100644 GIT binary patch delta 7377 zcmbtZTWnibc9j(?ij~+*f^q7}j043ph6ct&rl=RCwn)=;?#2H*(NfaOAOTHxaAZ3Y?_|S`_L|NjS(|#7{pzT+`8o;^d+acw0-0Np=J>z>i*YhuDzS@&NKl2;%Tis7xmT&*TYp->TPS~s?BiG*>yMFzq;<$M4 zRr~mWZQMH6Zy&Ud*laedZTJxnVGzB4c&ztwd-#)NgkK4QH-JY<_VUWXME+WUuwhHQhQ53l3v zJ43_rTc?l5u-PUC-@h?a=92y@Q>L0QXS%(J-ufJ43^?bL*+hO^4OU?32OU&Z6d~Vq(JlFe)<@cX`;_|8X zxOJNs+Jf*TU1;A?7hPyy-f88RS4S>%T7HG|>UhfXUzT54{=@RG$A1bhTN{=azxg!o zeS=Qaat6P8p2DxL)AXzG)vG{F|&dhS3i_U+=XK2=t*2+SWe3ZZc(jC-UE-@H?$2U!eKu(9%UbfDo?E6Jgl6u3fo;^@eJ*?v@aQJ(^w35 Fi zBks^svfhZPMF-zaYKg3#-4JmzLJ(Q0U@R|UBWi618`N{V#{7Zd2paKy<|?bj zq@FidyPM+i9CyeR#k)QVQ+!KXNg8W2Y-I(5@}oWGuJFK`wt2{R_IW&p86VCrhA66?%k7L=$7XG{LHf*mrwns=O+a7 zZuzhMuMJw?yJ@qJ3`~rT4Rsm|4VH8<{}RiGS^enmkBpTRmp8wxy7gKGA0Dm)qTDl! zv!J*XVn$_Jt9!MjtX3{!nOY2}@-Wx3;qnvo;xQJQ(<%pS1BtUKspX|FtDc^oOKOb} z&Fb@fKVgLFh*)t-5A5N{)v6y;vwV=HLkP1mxvtjyY}=AYGj z_r1|~{li-y^kE$F`U6K8ALcKp#Vxg_@adqQo5AA(_3`A66Dl50!P zI1nOagE$K$j^H(|Q85;r;I%ItUbURi(E5q--p&(FnCT4qaKEy`ZdraQ-|p1gwR-2q~Xm1OBiYCrYxQHX8WXLWF|lX0Qp9FX%{G zp#y-ST(mA{K`T@cNt4*S#yjxYo4nf98~ME+V)h@-4m6ov`R#UQf5NmdsmQl4wkXb| zM?JfV^ygCr{r(Y&{ z`*nbv?M(vAWN=nO2sy&ZHqx~6jVRr9?is~(?45$(Jmg5Q$^juKIM3X9lO?5N4`G4{ z&uQT$-BoDX4mSoZ<4q_ea~@$4g6w$Y0BFMw%mvMoatGJx^mDsB9~Ouvy`P*CoCj32 ztBP6#mYh7`W#c{RU zAeOJO#TX$0#KE4JIKh0Bi*)jtDlR+Y12*0S_egbV_fyMDjz+h;@-@ zo=rSMClXAF{36vv@Z%7*Cfm|BYJRPjZ3(BaTDV9?&a`PJPfC70KgGc-mbt>R~+?w2lwj`faw6u`%2KUJqKN3}Q3a?=l<}y-rpU zi_~2Q;6++Ic+tI05dj^VBJqg%3;+&lWnEyQiC~YPg#v7@%F1El`p(>p^0PZHD{o%2 zd>48PL=y%)IcSj}Z`x$C#P%XW!<7%Oy(F3ESJEaCSR!M=ONb;Gp9e#xPeqz7>1i2c zO3wt{EfdhPs-!obO$;SiUwBynqW2-5H+>NZMhK3U@q_XtzO$-TNfRF>`V&e(=rqAe zhQxH@q?M*)YAK}%3^}WEj0_;e%Or3DdT<@-$b3;kM>&QW$tvA6d06nHpmD)^SbR$@ znp|B`Sia6Srs2+8zF4GVOMAcrh%{UbM2P;f1>K(P#d?*GgWpnCKe}KMM2pG!p@#*% z$`#+s(qpuJ*K_O%z|?El57huD4}y@^BNi%>d+>@qkSfzT*QcN_gez0kP`^NEC?`#> zr-L(hij!4KE26+5rdbGRQi75gjuGM@l7yZH^FER!U!X?GRk)`vGG5yy-ytNB#+q`H z_yka4LO)l`otsKZ+HZOc0UUTjz>40?&k)R&nU!%1V}x99POEL{`M9yRpuGCdc}WPTu^xcgcvLv)P|Xwif6B9PSX2Ps^^uoee4WLmJT129hjvgTQZQO^lSImTF2Z7UEJBQqb48-G z7aL+?)^L)cgp;U7;AMn>V?gi&&5Biq0vHps2=M~>!EG~B!Z3m)s-q|HM* z_N{alZPln=-iEGVq55P3g)?_-VI~+3xZ0-7=J(JHYPS>Y?p?4T9#d#!l2pBNQ z+#)+V;Ij>5a|gGLScc!52U`&FLyAp1hU@5&Ap69o5n*y!H7a2BoD)Oi0d};_if;5& zAb0pDUv;5W!!*sH3ZX}&VPWGLsxE64R_=>HsZptkO6i2~iXPvnePnRJ6yEM$2igi9 z9m?LP-$yZqUWU^1!Q+ZCK}hI>LGeH+NK-o86QY@;i;wE>uPMXYEe$pfKO+=JEsh3S zWuC}$R$D{wq@}aq9t$^{Cu5_h1%y#I`sURCq~=N=8hKcZB&s8WBBXR03~rQs>5xk`ZGG+kz@i57k;;gWQ;FPI>pEZ%9c^hjRfiq*b<;o%gzNt#-|zT<3nBosC&c9tcSNxZF)_Mc)N?FVKyifc z-4qeCf>hf@l=RtTld0NpKuwB_-&F9P&5oYUJ$a?)qW8zooTC=L_YNueQ_r1k|LTCs z-#=`&y{Sj(@rz;)+yB}SwzsMsQ~DxwbAw!uvtWd?EVgR4h?bB|;9cco`9+CZ)y;kj z>Or#)CkzoPW#lHdBKj=^hO!TDk?oI{TP<%<_ZbRFC3U^c_PMA}sF`OfI8{z3+pb}_ z;It8u;uu(TTwBe4EtZ4RIN?_C7X)CmRI zd&Z&@ffki6%1)v(v!H3=NCME%L%~6b-09PzIc;{*?4FumBv8>)s5E=1)GVMLDrTYP zKMKrxiJB_ty`o3CdLTHE8dqlPl+_*hkf6`GMYROSpp_edAz}9Di0?~2&5ITS6}}L4 z#EEXSxiRLxixd)Wj39&Up(7YGwqCsti#%kce(sQ1x83m)NNAunH6moD+$ zbyClh-`Geci~%y|krd0we80k%=U@T!JidjY4ms#St58LNc=OdGM3vMUb`&CQZp6(6d4@m*Z=8@FNWz?k{H!r*(>Yqy6-ZT4%o3m?BZ-r0HQ=l>1ib0HA` delta 162 zcmZoTz}#?vX@WE_9|Hpe7Z8KNL=AIBzKsb>__>&QGa2}m`P_IjH!CVc^G?o`y*N2` z20N>Tfr+8{W;eNgj9kq8=NR}u@So$qzgf`X0RQB3`ep(w{BJ-8aq<7=|Hl83{|!*) zHa{CD3s8(>^H+N=0Y;8Z0W1n^jI1>btTh`Oi&!_;l(tB5@d6bvvSu-`W^HUtVBMTG Hbs84{?z1Y$ diff --git a/AITrain/dual_ai_dialogue_system.py b/AITrain/dual_ai_dialogue_system.py index 5906a24..4d50784 100644 --- a/AITrain/dual_ai_dialogue_system.py +++ b/AITrain/dual_ai_dialogue_system.py @@ -391,15 +391,16 @@ class DualAIDialogueEngine: """双AI对话引擎""" def __init__(self, knowledge_base: RAGKnowledgeBase, conversation_manager: ConversationManager, llm_generator, - enable_scoring: bool = True, base_model_path: str = None): + enable_scoring: bool = True, base_model_path: str = None, use_manual_scoring: bool = False): self.kb = knowledge_base self.conv_mgr = conversation_manager self.llm_generator = llm_generator self.enable_scoring = enable_scoring + self.use_manual_scoring = use_manual_scoring self.scorer = None # 初始化评分器 - if enable_scoring and base_model_path: + if enable_scoring and base_model_path and not use_manual_scoring: try: from dialogue_scorer import DialogueAIScorer print("正在初始化对话评分系统...") @@ -412,6 +413,74 @@ class DualAIDialogueEngine: except Exception as e: print(f"⚠ 对话评分系统初始化失败: {e}") self.enable_scoring = False + + def _manual_score_dialogue_turn(self, dialogue_content: str, speaker: str, dialogue_history: List[DialogueTurn]) -> Tuple[float, str, str]: + """人工打分对话轮次 + + Args: + dialogue_content: 对话内容 + speaker: 说话者 + dialogue_history: 对话历史 + + Returns: + tuple: (总分, 详细分数JSON, 反馈意见) + """ + print("\n" + "="*60) + print("人工对话评分") + print("="*60) + # print(f"说话者: {speaker}") + # print(f"对话内容: {dialogue_content}") + print("-" * 40) + + # # 显示最近的对话历史作为参考 + # if dialogue_history: + # print("最近对话历史:") + # for i, turn in enumerate(dialogue_history[-3:], 1): + # print(f" {i}. {turn.speaker}: {turn.content[:100]}...") + # print("-" * 40) + + # 五个评分维度 + dimensions = { + 'coherence': '逻辑连贯性 (1-10)', + 'character_consistency': '角色一致性 (1-10)', + 'naturalness': '自然流畅度 (1-10)', + 'information_density': '信息密度 (1-10)', + 'creativity': '创意新颖度 (1-10)' + } + + scores = {} + print("\n请为以下维度打分 (输入1-10的分数,直接回车跳过该维度):") + + for key, desc in dimensions.items(): + while True: + try: + score_input = input(f"{desc}: ").strip() + if score_input == "": + scores[key] = 7.0 # 默认分数 + break + + score = float(score_input) + if 1 <= score <= 10: + scores[key] = score + break + else: + print("请输入1-10之间的分数") + except ValueError: + print("请输入有效的数字") + + # 计算总分 + overall_score = sum(scores.values()) / len(scores) + + # 获取反馈意见 + print("\n请输入对该对话的评价和建议 (可选,直接回车跳过):") + feedback = input("反馈意见: ").strip() + if not feedback: + feedback = f"人工评分完成,总分: {overall_score:.1f}" + + print(f"\n✓ 评分完成 - 总分: {overall_score:.1f}") + print("="*60) + + return overall_score, json.dumps(scores), feedback def score_dialogue_turn(self, dialogue_content: str, speaker: str, dialogue_history: List[DialogueTurn]) -> Tuple[float, str, str]: """对单条对话进行评分 @@ -424,8 +493,16 @@ class DualAIDialogueEngine: Returns: tuple: (总分, 详细分数JSON, 反馈意见) """ - if not self.enable_scoring or not self.scorer: + if not self.enable_scoring: return 0.0, "{}", "评分系统未启用" + + # 人工打分模式 + if self.use_manual_scoring: + return self._manual_score_dialogue_turn(dialogue_content, speaker, dialogue_history) + + # AI自动打分模式 + if not self.scorer: + return 0.0, "{}", "AI评分器未初始化" try: # 获取角色数据 diff --git a/AITrain/main_controller.py b/AITrain/main_controller.py index 2e98479..90f0770 100644 --- a/AITrain/main_controller.py +++ b/AITrain/main_controller.py @@ -121,7 +121,7 @@ def show_character_info(): except Exception as e: print(f"✗ 读取角色文件失败: {char_file} - {e}") -def run_dialogue_system(enableScore: bool): +def run_dialogue_system(enableScore: bool, useManualScoring: bool = False): """运行双AI对话系统""" print("\n" + "="*60) print("启动双AI角色对话系统") @@ -192,7 +192,8 @@ def run_dialogue_system(enableScore: bool): conv_mgr, dual_generator, enable_scoring=enableScore, - base_model_path=base_model_path + base_model_path=base_model_path, + use_manual_scoring=useManualScoring ) # 创建对话会话 @@ -261,176 +262,6 @@ def run_dialogue_system(enableScore: bool): traceback.print_exc() -def analyze_model_performance(): - """分析模型性能""" - print("\n" + "="*60) - print("模型性能分析") - print("="*60) - - try: - from dual_ai_dialogue_system import ConversationManager - import sqlite3 - import json - from datetime import datetime, timedelta - - conv_mgr = ConversationManager("./conversation_data/conversations.db") - - with sqlite3.connect(conv_mgr.db_path) as conn: - print("\n1. 总体性能趋势分析:") - - # 按时间段分析性能趋势 - cursor = conn.execute(""" - SELECT - DATE(timestamp) as date, - COUNT(*) as dialogue_count, - AVG(dialogue_score) as avg_score, - AVG(CASE WHEN dialogue_score >= 8.0 THEN 1.0 ELSE 0.0 END) as high_quality_rate - FROM dialogue_turns - WHERE dialogue_score > 0 - AND timestamp >= datetime('now', '-7 days') - GROUP BY DATE(timestamp) - ORDER BY date DESC - """) - - trend_data = cursor.fetchall() - if trend_data: - print(f" 最近7天性能趋势:") - for date, count, avg_score, hq_rate in trend_data: - print(f" {date}: 平均{avg_score:.2f}分 ({count}轮对话, {hq_rate*100:.1f}%高质量)") - else: - print(" 暂无足够数据进行趋势分析") - - print("\n2. 维度问题分析:") - - # 分析各维度的问题 - cursor = conn.execute(""" - SELECT score_details - FROM dialogue_turns - WHERE dialogue_score > 0 AND score_details != '{}' - ORDER BY timestamp DESC - LIMIT 100 - """) - - dimension_scores = { - 'coherence': [], - 'character_consistency': [], - 'naturalness': [], - 'information_density': [], - 'creativity': [] - } - - for (score_details,) in cursor.fetchall(): - try: - scores = json.loads(score_details) - for dim, score in scores.items(): - if dim in dimension_scores: - dimension_scores[dim].append(float(score)) - except: - continue - - dimension_names = { - 'coherence': '连贯性', - 'character_consistency': '角色一致性', - 'naturalness': '自然度', - 'information_density': '信息密度', - 'creativity': '创意性' - } - - weak_dimensions = [] - for dim, scores in dimension_scores.items(): - if scores: - avg_score = sum(scores) / len(scores) - print(f" {dimension_names[dim]}: 平均{avg_score:.2f}分 ({len(scores)}个样本)") - if avg_score < 7.0: - weak_dimensions.append(dim) - - if weak_dimensions: - print(f"\n ⚠ 发现薄弱维度: {[dimension_names[d] for d in weak_dimensions]}") - print(" 建议进行针对性优化训练") - - print("\n3. 角色表现分析:") - - # 分析不同角色的表现 - cursor = conn.execute(""" - SELECT - speaker, - COUNT(*) as dialogue_count, - AVG(dialogue_score) as avg_score, - MIN(dialogue_score) as min_score, - MAX(dialogue_score) as max_score, - AVG(CASE WHEN dialogue_score >= 8.0 THEN 1.0 ELSE 0.0 END) as high_quality_rate - FROM dialogue_turns - WHERE dialogue_score > 0 - GROUP BY speaker - ORDER BY avg_score DESC - """) - - character_performance = cursor.fetchall() - if character_performance: - print(" 角色表现排名:") - for i, (speaker, count, avg, min_s, max_s, hq_rate) in enumerate(character_performance, 1): - status = "✓" if avg >= 7.5 else "⚠" if avg >= 6.5 else "✗" - print(f" {i}. {speaker} {status}") - print(f" 平均{avg:.2f}分 (范围{min_s:.1f}-{max_s:.1f}, {hq_rate*100:.1f}%高质量, {count}轮)") - - print("\n4. 问题模式识别:") - - # 识别低分对话的常见问题 - cursor = conn.execute(""" - SELECT content, dialogue_score, score_feedback - FROM dialogue_turns - WHERE dialogue_score > 0 AND dialogue_score < 6.0 - ORDER BY dialogue_score ASC - LIMIT 5 - """) - - low_score_examples = cursor.fetchall() - if low_score_examples: - print(" 低分对话示例:") - for i, (content, score, feedback) in enumerate(low_score_examples, 1): - print(f" {i}. 分数{score:.1f}: {content[:50]}...") - if feedback: - print(f" 问题: {feedback[:80]}...") - else: - print(" 暂无低分对话样本") - - print("\n5. 优化建议:") - - # 生成优化建议 - suggestions = [] - - if weak_dimensions: - if 'character_consistency' in weak_dimensions: - suggestions.append("• 加强角色设定训练,增加角色特征描述的权重") - if 'creativity' in weak_dimensions: - suggestions.append("• 增加创意性训练数据,提高对话的趣味性") - if 'coherence' in weak_dimensions: - suggestions.append("• 优化上下文理解,加强对话逻辑连贯性") - if 'naturalness' in weak_dimensions: - suggestions.append("• 增加自然语言训练,改善表达流畅度") - if 'information_density' in weak_dimensions: - suggestions.append("• 优化信息组织,避免冗余表达") - - # 检查是否需要数据收集 - cursor = conn.execute("SELECT COUNT(*) FROM dialogue_turns WHERE dialogue_score > 0") - total_scored = cursor.fetchone()[0] - - if total_scored < 50: - suggestions.append("• 需要收集更多评分数据以进行准确分析") - - if total_scored >= 100: - suggestions.append("• 数据量充足,建议开始模型迭代优化") - - if suggestions: - for suggestion in suggestions: - print(f" {suggestion}") - else: - print(" 当前性能表现良好,继续保持!") - - except Exception as e: - print(f"✗ 性能分析失败: {e}") - import traceback - traceback.print_exc() def generate_training_dataset(): """生成训练数据集""" @@ -1271,18 +1102,19 @@ def main(): print("主菜单 - 请选择操作:") print("1. 处理PDF世界观文档 (转换为RAG格式)") print("2. 查看角色设定信息") - print("3. 启动双AI对话系统 (开启ai打分)") - print("4. 启动双AI对话系统 (关闭ai打分)") - print("5. 系统状态检查") - print("6. 查看对话评分统计") - print("7. 模型性能分析与优化") - print("8. 生成训练数据集") - print("9. 模型迭代优化") - print("10. 查看使用说明") + print("3. 启动双AI对话系统 (开启AI打分)") + print("4. 启动双AI对话系统 (关闭AI打分)") + print("5. 启动双AI对话系统 (开启人工打分)") + print("6. 系统状态检查") + print("7. 查看对话评分统计") + print("8. 模型性能分析与优化") + print("9. 生成训练数据集") + print("10. 模型迭代优化") + print("11. 查看使用说明") print("0. 退出") print("="*50) - choice = input("请输入选择 (0-10): ").strip() + choice = input("请输入选择 (0-11): ").strip() if choice == '0': print("\n感谢使用双AI角色对话系统!") @@ -1301,21 +1133,25 @@ def main(): run_dialogue_system(enableScore = False) elif choice == '5': - show_system_status() + run_dialogue_system(enableScore = True, useManualScoring = True) elif choice == '6': - show_scoring_statistics() + show_system_status() elif choice == '7': - analyze_model_performance() + show_scoring_statistics() elif choice == '8': - generate_training_dataset() + # 模型性能分析与优化 - 待实现 + print("模型性能分析与优化功能开发中...") elif choice == '9': - run_model_optimization() + generate_training_dataset() elif choice == '10': + run_model_optimization() + + elif choice == '11': show_usage_guide() else: