博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
Hadoop HDFS文件操作的Java代码
阅读量:6322 次
发布时间:2019-06-22

本文共 6439 字,大约阅读时间需要 21 分钟。

1、创建目录

import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;public class MakeDir {	public static void main(String[] args) throws IOException {		Configuration conf = new Configuration();		FileSystem fs = FileSystem.get(conf);		Path path = new Path("/user/hadoop/hdfs/xxxx");		fs.create(path);		fs.close();	}}

2、删除目录

import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;public class DeleteDir {	public static void main(String[] args) throws IOException {		Configuration conf = new Configuration();		FileSystem fs = FileSystem.get(conf);				Path path = new Path("/user/hadoop/hdfs/xxxx");		fs.delete(path);		fs.close();	}}

3、写文件

import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FSDataOutputStream;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;public class WriteFile {	public static void main(String[] args) throws IOException {		Configuration conf = new Configuration();		FileSystem fs = FileSystem.get(conf);		Path path = new Path("/user/hadoop/hdfs/xxxx.txt");		FSDataOutputStream out = fs.create(path);		out.writeUTF("da jia hao,cai shi zhen de hao!");		fs.close();	}}

4、读文件

import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FSDataInputStream;import org.apache.hadoop.fs.FileStatus;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;public class ReadFile {	public static void main(String[] args) throws IOException {		Configuration conf = new Configuration();		FileSystem fs = FileSystem.get(conf);		Path path = new Path("/user/hadoop/hdfs/xxxx.txt");				if(fs.exists(path)){			FSDataInputStream is = fs.open(path);			FileStatus status = fs.getFileStatus(path);			byte[] buffer = new byte[Integer.parseInt(String.valueOf(status.getLen()))];			is.readFully(0, buffer);			is.close();            fs.close();            System.out.println(buffer.toString());		}	}}

5、上传本地文件到HDFS

import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;public class CopyFromLocalFile {	public static void main(String[] args) throws IOException {				Configuration conf = new Configuration();		FileSystem fs = FileSystem.get(conf);		Path src = new Path("/home/hadoop/xxxx.txt");		Path dst = new Path("/user/hadoop/hdfs/");		fs.copyFromLocalFile(src, dst);		fs.close();	}}

6、删除文件

import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;public class DeleteFile {	public static void main(String[] args) throws IOException {		Configuration conf = new Configuration();		FileSystem fs = FileSystem.get(conf);				Path path = new Path("/user/hadoop/hdfs/xxxx.txt");		fs.delete(path);		fs.close();	}}

7、获取给定目录下的所有子目录以及子文件

import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileStatus;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;public class GetAllChildFile {	static Configuration conf = new Configuration();			public static void main(String[] args)throws IOException {		FileSystem fs = FileSystem.get(conf);		Path path = new Path("/user/hadoop");		getFile(path,fs);		//fs.close();	}		public static void getFile(Path path,FileSystem fs) throws IOException {				FileStatus[] fileStatus = fs.listStatus(path);		for(int i=0;i

8、查找某个文件在HDFS集群的位置

import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.BlockLocation;import org.apache.hadoop.fs.FileStatus;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.hdfs.DistributedFileSystem;import org.apache.hadoop.hdfs.protocol.DatanodeInfo;public class FindFile {		public static void main(String[] args) throws IOException {			getFileLocal();	}		/**	 * 查找某个文件在HDFS集群的位置	 * @Title:  	 * @Description: 	 * @param 	 * @return 	 * @throws	 */	public static void getFileLocal() throws IOException{		Configuration conf = new Configuration();		FileSystem fs = FileSystem.get(conf);		Path path = new Path("/user/hadoop/hdfs/xxxx.txt");				FileStatus status = fs.getFileStatus(path);		BlockLocation[] locations = fs.getFileBlockLocations(status, 0, status.getLen());				int length = locations.length;		for(int i=0;i

9、HDFS集群上所有节点名称信息

package com.hadoop.file;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.BlockLocation;import org.apache.hadoop.fs.FileStatus;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.hdfs.DistributedFileSystem;import org.apache.hadoop.hdfs.protocol.DatanodeInfo;public class FindFile {		public static void main(String[] args) throws IOException {			getHDFSNode();	}		/**	 * HDFS集群上所有节点名称信息	 * @Title:  	 * @Description: 	 * @param 	 * @return 	 * @throws	 */	public static void getHDFSNode() throws IOException{		Configuration conf = new Configuration();		FileSystem fs = FileSystem.get(conf);		DistributedFileSystem  dfs = (DistributedFileSystem)fs;		DatanodeInfo[] dataNodeStats = dfs.getDataNodeStats();				for(int i=0;i

 

伪分布环境下操作FileSystem时候会出现异常:  

Java代码如下:

FileSystem fs = FileSystem.get(conf);     in = fs.open(new Path("hdfs://localhost:9000/wzqSearchEngine/crawl/inject_in"));

 

抛出异常如下:

    Exception in thread "main" java.lang.IllegalArgumentException: Wrong FS: hdfs://localhost:9000/wzqSearchEngine/crawl/inject_in, expected: file:/// 
        at org.apache.hadoop.fs.FileSystem.checkPath(FileSystem.java:310) 
        at org.apache.hadoop.fs.RawLocalFileSystem.pathToFile(RawLocalFileSystem.java:47) 
        at org.apache.hadoop.fs.RawLocalFileSystem.getFileStatus(RawLocalFileSystem.java:357) 
        at org.apache.hadoop.fs.FilterFileSystem.getFileStatus(FilterFileSystem.java:245) 
        at org.apache.hadoop.fs.ChecksumFileSystem$ChecksumFSInputChecker.<init>(ChecksumFileSystem.java:125) 
        at org.apache.hadoop.fs.ChecksumFileSystem.open(ChecksumFileSystem.java:283) 
        at org.apache.hadoop.fs.FileSystem.open(FileSystem.java:356) 
        at com.netease.hadoop.HDFSCatWithAPI.main(HDFSCatWithAPI.java:23) 
解决方案:
将hadoop的core-site.xml和hdfs-site.xml放到当前工程下(Eclipse工作目录的bin文件夹下面)即可。

总结:

因为是访问远程的HDFS 需要通过URI来获得FileSystem。  

  

  

  

  

  

  

  

转载于:https://www.cnblogs.com/wuzhenquan/p/3617751.html

你可能感兴趣的文章
如何设计高扩展的在线网页制作平台
查看>>
SpringBoot整合MyBatis
查看>>
Android 类库书签更新(一)
查看>>
简单的一条SQL,不简单的做事思维 NOT IN 、NOT EXISTS、LEFT JOIN用法差别 ...
查看>>
DataWorks:任务未运行自助排查
查看>>
「镁客早报」特斯拉裁员,马斯克解释没有办法;微软推出Azure DevOps赏金计划...
查看>>
centos 7.4 使用 pgxc_ctl 安装与使用
查看>>
【数据库】表分区
查看>>
img垂直水平居中与div
查看>>
订餐系统之同步美团商家订单
查看>>
CentOS 6.9通过RPM安装EPEL源(http://dl.fedoraproject.org)
查看>>
在网页中加入百度搜索框实例代码
查看>>
采集音频和摄像头视频并实时H264编码及AAC编码
查看>>
3星|《三联生活周刊》2017年39期:英国皇家助产士学会于2017年5月悄悄修改了政策,不再鼓励孕妇自然分娩了...
查看>>
堆排序算法
查看>>
STM32的TAMPER-RTC管脚作为Tamper的使用[转]
查看>>
[记]一个逐步“优化”的范例程序
查看>>
2012-01-09_2
查看>>
数学 - 线性代数导论 - #5 矩阵变换之置换与转置
查看>>
java数据结构:队列
查看>>