创建maven工程
略
pom.xml
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>3.0.3</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>3.0.3</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>3.0.3</version>
</dependency>
</dependencies>
在 pom.xml 点击右键 -> 【maven】 -> 【reimport】 导入依赖
log4j.properties
打印 hadoop debug 信息
在 resources
文件夹下创建 log4j.properties
文件,内容如下:
log4j.rootLogger=DEBUG, stdout
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%d %p [%c] - %m%n
java
package top.malaoshi;
import java.io.File;
import java.util.List;
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.GzipCodec;
public class Write {
public static void main(String[] args) throws Exception {
File file=new File("C:/Users/mym/Desktop/4.txt");
List<String> list=FileUtils.readLines(file);
// 设置客户端运行身份 以root去操作访问HDFS
System.setProperty("HADOOP_USER_NAME", "root");
// Configuration 用于指定相关参数属性
Configuration conf = new Configuration();
// sequence file key、value
IntWritable key = new IntWritable();
Text value = new Text();
// 构造Writer参数属性
SequenceFile.Writer writer = null;
//使用 gzip 构建压缩编码器
CompressionCodec gzip = new GzipCodec();
/*
* 文件保存路径
* namenode端口号
* 文件名后面加上压缩格式的后缀
*/
Path path=new Path("hdfs://hadoop1:8020/"+file.getName()+gzip.getDefaultExtension());
SequenceFile.Writer.Option optPath = SequenceFile.Writer.file(path);
//指定key的class
SequenceFile.Writer.Option optKey = SequenceFile.Writer.keyClass(key.getClass());
//指定value的class
SequenceFile.Writer.Option optVal = SequenceFile.Writer.valueClass(value.getClass());
/*
* 指定压缩类型:record(或block)
* 指定压缩编码器:gzip
*/
SequenceFile.Writer.Option optCom = SequenceFile.Writer.compression(SequenceFile.CompressionType.RECORD, gzip);
try {
writer = SequenceFile.createWriter(conf, optPath, optKey, optVal, optCom);
for (int i = 0,len=list.size(); i < len; i++) {
key.set(i);
value.set(list.get(i));
System.out.println(key+"==="+value);
writer.append(key, value);
}
}catch(Exception e){
e.printStackTrace();
} finally {
IOUtils.closeStream(writer);
}
}
}