,可以通过以下步骤实现:
import pandas as pd
import os
# 定义拆分文件的大小(每个文件的行数)
chunk_size = 1000
# 读取文本文件
file_path = 'path/to/text_file.txt'
with open(file_path, 'r') as file:
lines = file.readlines()
# 拆分文件并保存为多个文件
num_chunks = len(lines) // chunk_size + 1
for i in range(num_chunks):
start = i * chunk_size
end = (i + 1) * chunk_size
chunk_lines = lines[start:end]
chunk_file_path = f'path/to/chunk_{i}.txt'
with open(chunk_file_path, 'w') as chunk_file:
chunk_file.writelines(chunk_lines)
# 创建一个空的数据框
df = pd.DataFrame()
# 遍历拆分的文件并逐个读取数据
for i in range(num_chunks):
chunk_file_path = f'path/to/chunk_{i}.txt'
chunk_df = pd.read_csv(chunk_file_path, delimiter='\t') # 根据实际情况设置分隔符
df = pd.concat([df, chunk_df], ignore_index=True)
# 删除拆分的文件
for i in range(num_chunks):
chunk_file_path = f'path/to/chunk_{i}.txt'
os.remove(chunk_file_path)
这样,文本文件就被拆分为多个文件并成功上载到了数据框pandas中。你可以根据实际情况调整拆分文件的大小和分隔符,并根据需要对数据框进行进一步处理和分析。
领取专属 10元无门槛券
手把手带您无忧上云