目录管理
新增目录
创建目录
from zdppy_hdfs.client import Client
client = Client("http://localhost:9870/")
# 创建目录
client.makedirs(hdfs_path="/tmp", permission="755")
# 查看目录
file_dict = client.list(hdfs_path="/", status=True)
for k, v in file_dict:
print(k)
查看目录
查看指定目录
from zdppy_hdfs.client import Client
client = Client("http://lcalhost:9870/")
# 查看hdfs根目录下的文件信息,等同于hdfs dfs -ls /
file_dict = client.list(hdfs_path="/", status=True)
for k, v in file_dict:
print(k)
文件管理
新增文件
上传文件
带回调函数的上传
from zdppy_hdfs.client import Client
client = Client("http://localhost:9870/")
print(client)
def callback(filename, size):
print(filename, "完成了一个chunk上传", "当前大小:", size)
if size == -1:
print("文件上传完成")
# 上传成功返回 hdfs_path
response = client.upload(hdfs_path="/main.py", local_path="main.py", chunk_size=2
cleanup=True)
print(response)
普通的上传
from zdppy_hdfs.client import Client
client = Client("http://localhost:9870/")
response = client.upload(
hdfs_path="/main.py",
local_path="main.py",
)
print(response)
覆盖上传
from zdppy_hdfs.client import Client
client = Client("http://localhost:9870/")
response = client.upload(
hdfs_path="/main.py",
local_path="main.py",
update_exists=True, # 如果已存在则覆盖
)
print(response)
from zdppy_hdfs.client import Client
client = Client("http://localhost:9870/")
# 下载文件
file_path = "/main1.py"
client.download(hdfs_path=file_path, local_path="./main1.py", overwrite=True)
修改文件
修改文件副本数量
from zdppy_hdfs.client import Client
client = Client("http://localhost:9870/")
# 设置副本数量
client.set_replication(hdfs_path="/main1.py", replication=2)
# 查看目录
file_dict = client.list(hdfs_path="/", status=True)
for k, v in file_dict:
print(k)
删除文件
删除指定文件
from zdppy_hdfs.client import Client
client = Client("http://localhost:9870/")
# 删除文件
file_path = "/main1.py"
client.delete(file_path)
# 查看目录
file_dict = client.list(hdfs_path="/", status=True)
for k, v in file_dict:
print(k)
查看文件状态
from zdppy_hdfs.client import Client
client = Client("http://localhost:9870/")
file_path = "/main.py"
print(client.status(hdfs_path=file_path, strict=True))
查看文件大小
from zdppy_hdfs.client import Client
client = Client("http://localhost:9870/")
print("根目录下的文件大小为:", client.checksum(hdfs_path="/main.py"))
from zdppy_hdfs.client import Client
client = Client("http://localhost:9870/")
# 查看目录
print(client.content(hdfs_path="/", strict=True))
# 查看文件
print(client.content(hdfs_path="/main.py", strict=True))
读取文件
from zdppy_hdfs.client import Client
client = Client("http://localhost:9870/")
file_path = "/main1.py"
# 读取200长度
with client.read(file_path, length=200, encoding='utf-8') as obj:
for i in obj:
print(i)
# 从200位置读取200长度
with client.read(file_path, offset=200, length=200, encoding='utf-8') as obj:
for i in obj:
print(i)
# 设置buffer为1024,读取
with client.read(file_path, buffer_size=1024, encoding='utf-8') as obj:
for i in obj:
print(i)
# 设置分隔符为换行
p = client.read(file_path, encoding='utf-8', delimiter='\n')
with p as d:
print(d, type(d), next(d))
# 设置读取每个块的大小为8
p = client.read(file_path, encoding='utf-8', chunk_size=8)
with p as d:
print(d, type(d), next(d))
领取专属 10元无门槛券
私享最新 技术干货