HDFS的javaAPI操作(10)

大数据梦想家

发布于 2021-01-22 08:10:25

51700

代码可运行

运行总次数：0

代码可运行

在之前的博客《HDFS的shell常用命令大全》中,小菌为大家分享的是在linux系统的命令行上通过shell命令操作HDFS。而本篇博客，小菌为大家带来的则是在java的环境下,用JavaAPI操作HDFS!学过Java的小伙伴们是不是感觉很nice(｀・ω・´)~

首先我们需要先创建maven工程并导入jar包。这里需要特别说明的是:

由于cdh版本的所有的软件涉及版权的问题，所以并没有将所有的jar包托管到maven仓库当中去，而是托管在了CDH自己的服务器上面，所以我们默认去maven的仓库下载不到，需要自己手动的添加repository去CDH仓库进行下载。

当然小伙伴们不要着急哈,万能的小菌已经把含有hadoop包的完整maven仓库repository准备好了,需要的小伙伴们私聊小菌哦~。(因为文件比较大,repository无法放在云盘上)。

首先我们需要先创建maven工程 在pom.xml中添加如下内容:

<repositories>
    <repository>
        <id>cloudera</id>
        <url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
    </repository>
</repositories>
<dependencies>
    <dependency>
        <groupId>org.apache.Hadoop</groupId>
        <artifactId>Hadoop-client</artifactId>
        <version>2.6.0-mr1-cdh5.14.0</version>
    </dependency>
    <dependency>
        <groupId>org.apache.Hadoop</groupId>
        <artifactId>Hadoop-common</artifactId>
        <version>2.6.0-cdh5.14.0</version>
    </dependency>
    <dependency>
        <groupId>org.apache.Hadoop</groupId>
        <artifactId>Hadoop-hdfs</artifactId>
        <version>2.6.0-cdh5.14.0</version>
    </dependency>

    <dependency>
        <groupId>org.apache.Hadoop</groupId>
        <artifactId>Hadoop-mapreduce-client-core</artifactId>
        <version>2.6.0-cdh5.14.0</version>
    </dependency>
    <!-- https://mvnrepository.com/artifact/junit/junit -->
    <dependency>
        <groupId>junit</groupId>
        <artifactId>junit</artifactId>
        <version>4.11</version>
        <scope>test</scope>
    </dependency>
    <dependency>
        <groupId>org.testng</groupId>
        <artifactId>testng</artifactId>
        <version>RELEASE</version>
    </dependency>
</dependencies>
<build>
    <plugins>
        <plugin>
            <groupId>org.apache.maven.plugins</groupId>
            <artifactId>maven-compiler-plugin</artifactId>
            <version>3.0</version>
            <configuration>
                <source>1.8</source>
                <target>1.8</target>
                <encoding>UTF-8</encoding>
                <!--    <verbal>true</verbal>-->
            </configuration>
        </plugin>

        <plugin>
            <groupId>org.apache.maven.plugins</groupId>
            <artifactId>maven-shade-plugin</artifactId>
            <version>2.4.3</version>
            <executions>
                <execution>
                    <phase>package</phase>
                    <goals>
                        <goal>shade</goal>
                    </goals>
                    <configuration>
                        <minimizeJar>true</minimizeJar>
                    </configuration>
                </execution>
            </executions>
        </plugin>
      <!--  <plugin>
            <artifactId>maven-assembly-plugin </artifactId>
            <configuration>
                <descriptorRefs>
                    <descriptorRef>jar-with-dependencies</descriptorRef>
                </descriptorRefs>
                <archive>
                    <manifest>
                        <mainClass>cn.itcast.Hadoop.db.DBToHdfs2</mainClass>
                    </manifest>
                </archive>
            </configuration>
            <executions>
                <execution>
                    <id>make-assembly</id>
                    <phase>package</phase>
                    <goals>
                        <goal>single</goal>
                    </goals>
                </execution>
            </executions>
        </plugin>-->
    </plugins>
</build>

添加完毕之后,右上角检查没有报错说明我们的maven项目的jar包配置成功!

接下来小菌将分享自己的代码,其中包含一些利用api对于HDFS的常规操作!

package demo01;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;

/**
 * @Auther: 封茗囧菌
 * @Date: 2019/11/7 09:41
 * @Description:
 */
public class test02 {

    static Configuration conf = new Configuration();

    static FileSystem hdfs;

    static {
        try {
            //该类的对象是一个文件文件系统对象hdfs
            hdfs = FileSystem.get(new URI("hdfs://192.168.100.100:8020"),conf);

        } catch (Exception e) {

            e.printStackTrace();
        }
    }
     
    public static void main(String[] args) throws Exception {
        //获取指定路径所有文件
        listStatus();
        //重命名
        rename();
        //获取文件日期
        GetTime();
        //创建文件夹
        mkdir ();
        //删除文件
        deletefile();
        //创建数据
        AddFile();
        //上传数据
        put();
        //检查位置是否存在
        check();

        
    }

    /**
     * 遍历一个目录下的所有文件路径
     */

    private static void listStatus() throws Exception{

        //获取某一目录下的所有文件
        FileStatus[] status = hdfs.listStatus(new Path("/"));

        //遍历输出
        for (int i = 0; i < status.length; i++) {
            System.out.println(status[i].getPath().toString());
        }
        hdfs.close();
    }

    /**
     * 修改文件名字
     */
    private static void rename() throws URISyntaxException, IOException {
        FileSystem.get(new URI("hdfs.//192.168.100.100:8020"),conf);
        Path frpath = new Path("/abc");
        Path topath = new Path("/aaaaa");
        boolean isRename = hdfs.rename(frpath, topath);
        String result=isRename?"修改成功!":"修改失败!";
        System.out.println(result);

    }
    /**
     * 获取文件的修改时间(毫秒值)
     */
    private static void GetTime() throws IOException {
        FileStatus fileStatus = hdfs.getFileStatus(new Path("/b.txt"));

        long modificationTime = fileStatus.getModificationTime();

        System.out.println(modificationTime);

    }
    /**
     * 删除文件
     */
    private static void deletefile() throws IOException {
        boolean isDeleted = hdfs.delete(new Path("/b.txt"),true);
        System.out.println("Delete?"+isDeleted);

    }
    /**
     * 创建文件夹
     */
    private static void mkdir() throws IOException {

        boolean mkdirs = hdfs.mkdirs(new Path("/cc"));
        if (mkdirs){
            System.out.println("创建成功!");
        }else {
            System.out.println("创建失败!");
        }

    }

    /**
     * 创建文件并写入内容【重点】
     */
    private static void AddFile() throws IOException {
        //确定需要写入的内容并转换成字节数组
        byte[] buff = "hello hadoop world!/r/n hadoop ".getBytes();
        //创建文件
        FSDataOutputStream outputStream = hdfs.create(new Path("/cc/idea.txt"));
        //写入数据
        outputStream.write(buff,0,buff.length);
        //关闭输出流
        outputStream.close();

    }
    /**
     * 上传数据【windows-->linux】
     */
    private static void put() throws IOException {

        Path src = new Path("G:\\Python\\test.csv");
        Path dst = new Path("/");
        hdfs.copyFromLocalFile(src,dst);
        System.out.println("上传成功!");

    }
    /**
     * 检查指定的位置是否存在
     */
      public static void check() throws IOException {

       Path findf = new Path("/aa");
       boolean isExists = hdfs.exists(findf);
        System.out.println("Exists?"+isExists);
    }




}