如何进行Hadoop Java API简单使用
更新:HHH   时间:2023-1-7


今天就跟大家聊聊有关如何进行Hadoop Java API简单使用,可能很多人都不太了解,为了让大家更加了解,小编给大家总结了以下内容,希望大家根据这篇文章可以有所收获。

注意:jar版本,务必和远程的hadoop版本一致。

maven配置文件:

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>Hadoop</groupId>
    <artifactId>demo</artifactId>
    <version>1.0-SNAPSHOT</version>

     <properties>
         <hadoop.version>2.7.1</hadoop.version>
     </properties>

    <dependencies>
        <!--hadoop-->
        <!-- http://mvnrepository.com/artifact/commons-io/commons-io -->
        <dependency>
            <groupId>commons-io</groupId>
            <artifactId>commons-io</artifactId>
            <version>2.4</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-mapreduce-client-core</artifactId>
            <version>${hadoop.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>${hadoop.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>${hadoop.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>${hadoop.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-yarn-common</artifactId>
            <version>${hadoop.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-yarn-api</artifactId>
            <version>${hadoop.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-yarn-client</artifactId>
            <version>${hadoop.version}</version>
        </dependency>
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.12</version>
        </dependency>
    </dependencies>


</project>

测试用例:

package com.demo;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.junit.Before;
import org.junit.Test;

import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.net.URISyntaxException;


public class HadoopTest {


    FileSystem fileSystem = null;

    @Before
    public void init() throws URISyntaxException, IOException, InterruptedException {
        String p = "hdfs://yarn001:9000";
        fileSystem = FileSystem.get(new URI(p), new Configuration(), "root");
    }


    /**
     * 测试文件下载
     * @throws URISyntaxException
     * @throws IOException
     */
    @Test
    public void downloadTest() throws URISyntaxException, IOException {
        Path path = new Path("/hadoop-2.7.1.tar.gz");
        InputStream open = fileSystem.open(path);
        FileOutputStream fileOutputStream = new FileOutputStream("d://hadoop");
        IOUtils.copyBytes(open, fileOutputStream, 4096, true);
    }

    /**
     * 测试文件上传1
     * @throws IOException
     */
    @Test
    public void uploadFileTest1() throws IOException {
        InputStream fileInputStream = new FileInputStream("d://SpringBoot.mobi");
        Path path = new Path("/SpringBoot");
        FSDataOutputStream fsDataOutputStream = fileSystem.create(path);
        IOUtils.copyBytes(fileInputStream,fsDataOutputStream,4096);
    }

    /**
     * 测试文件上传2
     */
    @Test
    public void uploadFileTest2() throws IOException {
        Path localPath = new Path("d://test.xls");
        Path remoterPath = new Path("/testXLS");
        fileSystem.copyFromLocalFile(localPath,remoterPath);
    }

    /**
     * 测试删除文件
     */
    @Test
    public void delFileTest() throws IOException {
        Path path = new Path("/testXLS");
        /**
         * 删除空目录
         */
        boolean delete = fileSystem.delete(path, false);
        /**
         * 删除非空目录
         * 递归删除
         */
        /*
        boolean delete1 = fileSystem.delete(path, true);
        */
        System.out.println(delete?"删除成功":"删除失败");
    }


    /**
     * 创建目录测试
     * @throws IOException
     */
    @Test
    public void createFolder() throws IOException {
        Path path = new Path("/testPath2");
        boolean mkdirs = fileSystem.mkdirs(path);
        System.out.println(mkdirs?"success":"fail");
    }

}

常见异常:

Exception in thread "main" java.lang.NoSuchMethodError: org.apache.hadoop.tracing.SpanReceiverHost.get(Lorg/apache/hadoop/conf/Configuration;Ljava/lang/String;)Lorg/apache/hadoop/tracing/SpanReceiverHost;
at org.apache.hadoop.hdfs.DFSClient.<init>(DFSClient.java:634)
at org.apache.hadoop.hdfs.DFSClient.<init>(DFSClient.java:619)

异常处理:

配置maven的pom.xml文件,把本地lib版本和远程Hadoop 的HDFS版本一致即可。

看完上述内容,你们对如何进行Hadoop Java API简单使用有进一步的了解吗?如果还想了解更多知识或者相关内容,请关注天达云行业资讯频道,感谢大家的支持。

返回云计算教程...