From 897dcaf386499e88a2ed5da752c3af9d8182b750 Mon Sep 17 00:00:00 2001 From: 997146918 <997146918@qq.com> Date: Mon, 11 Aug 2025 14:00:55 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A4=84=E7=90=86=E6=95=B0=E6=8D=AE=E7=A9=BA?= =?UTF-8?q?=E7=9A=84=E6=83=85=E5=86=B5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- AITrain/example.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/AITrain/example.py b/AITrain/example.py index e5ad617..6767aa6 100644 --- a/AITrain/example.py +++ b/AITrain/example.py @@ -40,9 +40,15 @@ def main(): try: response = json.loads(response) for item in response: - with open(f'{file_name}.jsonl', 'a', encoding='utf-8') as f: - json.dump(item, f, ensure_ascii=False) - f.write('\n') + # 数据质量检查:过滤空的instruction或output + if (item.get('instruction', '').strip() and + item.get('output', '').strip() and + item.get('character', '').strip()): + with open(f'{file_name}.jsonl', 'a', encoding='utf-8') as f: + json.dump(item, f, ensure_ascii=False) + f.write('\n') + else: + print(f"跳过空字段数据: instruction='{item.get('instruction', '')}', output='{item.get('output', '')}', character='{item.get('character', '')}'") except Exception as e: print(f"解析错误: {e}") print(f"原始响应: {repr(response[:200])}") # 打印前200字符用于调试