处理数据空的情况
This commit is contained in:
parent
09631041e0
commit
897dcaf386
@ -40,9 +40,15 @@ def main():
|
|||||||
try:
|
try:
|
||||||
response = json.loads(response)
|
response = json.loads(response)
|
||||||
for item in response:
|
for item in response:
|
||||||
|
# 数据质量检查:过滤空的instruction或output
|
||||||
|
if (item.get('instruction', '').strip() and
|
||||||
|
item.get('output', '').strip() and
|
||||||
|
item.get('character', '').strip()):
|
||||||
with open(f'{file_name}.jsonl', 'a', encoding='utf-8') as f:
|
with open(f'{file_name}.jsonl', 'a', encoding='utf-8') as f:
|
||||||
json.dump(item, f, ensure_ascii=False)
|
json.dump(item, f, ensure_ascii=False)
|
||||||
f.write('\n')
|
f.write('\n')
|
||||||
|
else:
|
||||||
|
print(f"跳过空字段数据: instruction='{item.get('instruction', '')}', output='{item.get('output', '')}', character='{item.get('character', '')}'")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"解析错误: {e}")
|
print(f"解析错误: {e}")
|
||||||
print(f"原始响应: {repr(response[:200])}") # 打印前200字符用于调试
|
print(f"原始响应: {repr(response[:200])}") # 打印前200字符用于调试
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user