diff --git a/tool/csv_tool.py b/tool/csv_tool.py index ac8bc80..3c11463 100644 --- a/tool/csv_tool.py +++ b/tool/csv_tool.py @@ -25,6 +25,40 @@ class CSVTool: writer = csv.writer(f) writer.writerow(self.headers) + # 文件存在,检查是否为空或只有空行 + try: + with open(self.csv_file_name, 'r', encoding='utf-8') as f: + content = f.read() + + # 如果文件内容为空或只包含空白字符(空格、换行符等) + if not content.strip(): + with open(self.csv_file_name, 'w', encoding='utf-8', newline='') as f: + writer = csv.writer(f) + writer.writerow(self.headers) + return + + # 文件有内容,检查第一行是否为表头 + with open(self.csv_file_name, 'r', encoding='utf-8') as f: + reader = csv.reader(f) + first_row = next(reader, None) + + # 如果第一行不是预期的表头,则重新写入表头和原有内容 + if first_row != self.headers: + lines = content.strip().split('\n') + # 过滤掉空行和只包含空白字符的行 + non_empty_lines = [line for line in lines if line.strip()] + + with open(self.csv_file_name, 'w', encoding='utf-8', newline='') as f: + writer = csv.writer(f) + writer.writerow(self.headers) + # 如果有非空内容,则写入 + if non_empty_lines: + f.write('\n'.join(non_empty_lines) + '\n') + + except Exception as e: + print(f"检查/更新表头时出错: {e}") + + def get_existing_data(self, unique_titles: List[str]) -> set: """ 读取现有数据,用于去重检查