将数据写入csv文件时,在windows电脑中打开使用乱码错行,mac电脑打开没有任何问题 。
所以问题应该是编码不同导致的。文本中包含了中文、表情字符集等,无论使用 utf-8 还是 gkb 编码都有问题。
最终使用 utf-8-sig 编码完美解决问题。
with open(file=file, mode=mode, encoding='utf-8-sig', newline='', errors='ignore') as f:f.write(datas)
解决前:
解决后:
案例:将8W+条数据每1000条导出到一个excel中
import csvdef write_to_csv(file, csv_header=None, values=None, data_type='tuple', mode='w'):"""写入数据到csv到文件中:param headers: 表头:列表类型:param values: 表数据:1.嵌套元组的列表;2.嵌套字典的列表:param data_type: 传入的数据类型:1.'tuple';2.'dict':param mode: 写入方式,默认写入“w”, 追加:'a+'"""# utf-8-sig: 解决 windows excel 表情字符乱码等问题with open(file=file, mode=mode, encoding='utf-8-sig', newline='', errors='ignore') as f:writer = csv.writer(f) # 实例化writer对象if data_type == 'tuple':writer.writerows(values)elif data_type == 'header':writer.writerow(csv_header) # 写入表头else:print("数据类型错误,请确认!")def comments_data_csv():""" 将 comments 数据导出 excel (每一千条导出到一个excel文件) """query_data = shein_mysql.query_tables(exe_sql='SELECT count(*) num FROM `shein_comments`;',)comment_num = query_data['num']for i in range(0, int(comment_num/1000)+1):logging.info(f'{i*1000, (i+1)*1000}')query_data = shein_mysql.query_tables(exe_sql='SELECT id, score, overall_fit, content, category, country, country_url FROM `shein_comments` WHERE id>%s and id<=%s;',args=(i*1000, (i+1)*1000),query_all=True)comment_item_li = []comment_item_li.append(['id', 'score', 'overall_fit', 'category', 'content', 'country', 'country_url'])for data in query_data:id = data['id']score = data['score']overall_fit = data['overall_fit']category = data['category']content = data['content']country = data['country']country_url = data['country_url']logging.info(f'item: {id, score, overall_fit, category, content}')comment_item_li.append([id, score, overall_fit, category, content, country, country_url])# 写入 csv 文件write_to_csv(f'data/shein_comment_{i}.csv', values=comment_item_li)
----------------- END --------------------