diff --git a/AITrain/example.py b/AITrain/example.py index e5ad617..6767aa6 100644 --- a/AITrain/example.py +++ b/AITrain/example.py @@ -40,9 +40,15 @@ def main(): try: response = json.loads(response) for item in response: - with open(f'{file_name}.jsonl', 'a', encoding='utf-8') as f: - json.dump(item, f, ensure_ascii=False) - f.write('\n') + # 数据质量检查:过滤空的instruction或output + if (item.get('instruction', '').strip() and + item.get('output', '').strip() and + item.get('character', '').strip()): + with open(f'{file_name}.jsonl', 'a', encoding='utf-8') as f: + json.dump(item, f, ensure_ascii=False) + f.write('\n') + else: + print(f"跳过空字段数据: instruction='{item.get('instruction', '')}', output='{item.get('output', '')}', character='{item.get('character', '')}'") except Exception as e: print(f"解析错误: {e}") print(f"原始响应: {repr(response[:200])}") # 打印前200字符用于调试