1、创建目录
import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;public class MakeDir { public static void main(String[] args) throws IOException { Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Path path = new Path("/user/hadoop/hdfs/xxxx"); fs.create(path); fs.close(); }}
2、删除目录
import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;public class DeleteDir { public static void main(String[] args) throws IOException { Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Path path = new Path("/user/hadoop/hdfs/xxxx"); fs.delete(path); fs.close(); }}
3、写文件
import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FSDataOutputStream;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;public class WriteFile { public static void main(String[] args) throws IOException { Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Path path = new Path("/user/hadoop/hdfs/xxxx.txt"); FSDataOutputStream out = fs.create(path); out.writeUTF("da jia hao,cai shi zhen de hao!"); fs.close(); }}
4、读文件
import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FSDataInputStream;import org.apache.hadoop.fs.FileStatus;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;public class ReadFile { public static void main(String[] args) throws IOException { Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Path path = new Path("/user/hadoop/hdfs/xxxx.txt"); if(fs.exists(path)){ FSDataInputStream is = fs.open(path); FileStatus status = fs.getFileStatus(path); byte[] buffer = new byte[Integer.parseInt(String.valueOf(status.getLen()))]; is.readFully(0, buffer); is.close(); fs.close(); System.out.println(buffer.toString()); } }}
5、上传本地文件到HDFS
import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;public class CopyFromLocalFile { public static void main(String[] args) throws IOException { Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Path src = new Path("/home/hadoop/xxxx.txt"); Path dst = new Path("/user/hadoop/hdfs/"); fs.copyFromLocalFile(src, dst); fs.close(); }}
6、删除文件
import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;public class DeleteFile { public static void main(String[] args) throws IOException { Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Path path = new Path("/user/hadoop/hdfs/xxxx.txt"); fs.delete(path); fs.close(); }}
7、获取给定目录下的所有子目录以及子文件
import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileStatus;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;public class GetAllChildFile { static Configuration conf = new Configuration(); public static void main(String[] args)throws IOException { FileSystem fs = FileSystem.get(conf); Path path = new Path("/user/hadoop"); getFile(path,fs); //fs.close(); } public static void getFile(Path path,FileSystem fs) throws IOException { FileStatus[] fileStatus = fs.listStatus(path); for(int i=0;i
8、查找某个文件在HDFS集群的位置
import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.BlockLocation;import org.apache.hadoop.fs.FileStatus;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.hdfs.DistributedFileSystem;import org.apache.hadoop.hdfs.protocol.DatanodeInfo;public class FindFile { public static void main(String[] args) throws IOException { getFileLocal(); } /** * 查找某个文件在HDFS集群的位置 * @Title: * @Description: * @param * @return * @throws */ public static void getFileLocal() throws IOException{ Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Path path = new Path("/user/hadoop/hdfs/xxxx.txt"); FileStatus status = fs.getFileStatus(path); BlockLocation[] locations = fs.getFileBlockLocations(status, 0, status.getLen()); int length = locations.length; for(int i=0;i
9、HDFS集群上所有节点名称信息
package com.hadoop.file;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.BlockLocation;import org.apache.hadoop.fs.FileStatus;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.hdfs.DistributedFileSystem;import org.apache.hadoop.hdfs.protocol.DatanodeInfo;public class FindFile { public static void main(String[] args) throws IOException { getHDFSNode(); } /** * HDFS集群上所有节点名称信息 * @Title: * @Description: * @param * @return * @throws */ public static void getHDFSNode() throws IOException{ Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); DistributedFileSystem dfs = (DistributedFileSystem)fs; DatanodeInfo[] dataNodeStats = dfs.getDataNodeStats(); for(int i=0;i
伪分布环境下操作FileSystem时候会出现异常:
Java代码如下:
FileSystem fs = FileSystem.get(conf); in = fs.open(new Path("hdfs://localhost:9000/wzqSearchEngine/crawl/inject_in"));
抛出异常如下:
Exception in thread "main" java.lang.IllegalArgumentException: Wrong FS: hdfs://localhost:9000/wzqSearchEngine/crawl/inject_in, expected: file:/// at org.apache.hadoop.fs.FileSystem.checkPath(FileSystem.java:310) at org.apache.hadoop.fs.RawLocalFileSystem.pathToFile(RawLocalFileSystem.java:47) at org.apache.hadoop.fs.RawLocalFileSystem.getFileStatus(RawLocalFileSystem.java:357) at org.apache.hadoop.fs.FilterFileSystem.getFileStatus(FilterFileSystem.java:245) at org.apache.hadoop.fs.ChecksumFileSystem$ChecksumFSInputChecker.<init>(ChecksumFileSystem.java:125) at org.apache.hadoop.fs.ChecksumFileSystem.open(ChecksumFileSystem.java:283) at org.apache.hadoop.fs.FileSystem.open(FileSystem.java:356) at com.netease.hadoop.HDFSCatWithAPI.main(HDFSCatWithAPI.java:23) 解决方案:将hadoop的core-site.xml和hdfs-site.xml放到当前工程下(Eclipse工作目录的bin文件夹下面)即可。
总结:
因为是访问远程的HDFS 需要通过URI来获得FileSystem。