今天就跟大家聊聊有关如何进行Hadoop Java API简单使用,可能很多人都不太了解,为了让大家更加了解,小编给大家总结了以下内容,希望大家根据这篇文章可以有所收获。
注意:jar版本,务必和远程的hadoop版本一致。
maven配置文件:
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>Hadoop</groupId>
<artifactId>demo</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<hadoop.version>2.7.1</hadoop.version>
</properties>
<dependencies>
<!--hadoop-->
<!-- http://mvnrepository.com/artifact/commons-io/commons-io -->
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.4</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-common</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-api</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-client</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
</dependency>
</dependencies>
</project>
测试用例:
package com.demo;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.junit.Before;
import org.junit.Test;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.net.URISyntaxException;
public class HadoopTest {
FileSystem fileSystem = null;
@Before
public void init() throws URISyntaxException, IOException, InterruptedException {
String p = "hdfs://yarn001:9000";
fileSystem = FileSystem.get(new URI(p), new Configuration(), "root");
}
/**
* 测试文件下载
* @throws URISyntaxException
* @throws IOException
*/
@Test
public void downloadTest() throws URISyntaxException, IOException {
Path path = new Path("/hadoop-2.7.1.tar.gz");
InputStream open = fileSystem.open(path);
FileOutputStream fileOutputStream = new FileOutputStream("d://hadoop");
IOUtils.copyBytes(open, fileOutputStream, 4096, true);
}
/**
* 测试文件上传1
* @throws IOException
*/
@Test
public void uploadFileTest1() throws IOException {
InputStream fileInputStream = new FileInputStream("d://SpringBoot.mobi");
Path path = new Path("/SpringBoot");
FSDataOutputStream fsDataOutputStream = fileSystem.create(path);
IOUtils.copyBytes(fileInputStream,fsDataOutputStream,4096);
}
/**
* 测试文件上传2
*/
@Test
public void uploadFileTest2() throws IOException {
Path localPath = new Path("d://test.xls");
Path remoterPath = new Path("/testXLS");
fileSystem.copyFromLocalFile(localPath,remoterPath);
}
/**
* 测试删除文件
*/
@Test
public void delFileTest() throws IOException {
Path path = new Path("/testXLS");
/**
* 删除空目录
*/
boolean delete = fileSystem.delete(path, false);
/**
* 删除非空目录
* 递归删除
*/
/*
boolean delete1 = fileSystem.delete(path, true);
*/
System.out.println(delete?"删除成功":"删除失败");
}
/**
* 创建目录测试
* @throws IOException
*/
@Test
public void createFolder() throws IOException {
Path path = new Path("/testPath2");
boolean mkdirs = fileSystem.mkdirs(path);
System.out.println(mkdirs?"success":"fail");
}
}
常见异常:
Exception in thread "main" java.lang.NoSuchMethodError: org.apache.hadoop.tracing.SpanReceiverHost.get(Lorg/apache/hadoop/conf/Configuration;Ljava/lang/String;)Lorg/apache/hadoop/tracing/SpanReceiverHost;
at org.apache.hadoop.hdfs.DFSClient.<init>(DFSClient.java:634)
at org.apache.hadoop.hdfs.DFSClient.<init>(DFSClient.java:619)
异常处理:
配置maven的pom.xml文件,把本地lib版本和远程Hadoop 的HDFS版本一致即可。
看完上述内容,你们对如何进行Hadoop Java API简单使用有进一步的了解吗?如果还想了解更多知识或者相关内容,请关注天达云行业资讯频道,感谢大家的支持。