hadoop3.x:Sequence File序列化文件-写文件

创建maven工程

idea创建maven工程必须修改idea配置

pom.xml

  1. <dependencies>
  2. <dependency>
  3. <groupId>org.apache.hadoop</groupId>
  4. <artifactId>hadoop-common</artifactId>
  5. <version>3.0.3</version>
  6. </dependency>
  7. <dependency>
  8. <groupId>org.apache.hadoop</groupId>
  9. <artifactId>hadoop-hdfs</artifactId>
  10. <version>3.0.3</version>
  11. </dependency>
  12. <dependency>
  13. <groupId>org.apache.hadoop</groupId>
  14. <artifactId>hadoop-client</artifactId>
  15. <version>3.0.3</version>
  16. </dependency>
  17. </dependencies>

在 pom.xml 点击右键 -> 【maven】 -> 【reimport】 导入依赖

log4j.properties

打印 hadoop debug 信息

resources 文件夹下创建 log4j.properties 文件,内容如下:

  1. log4j.rootLogger=DEBUG, stdout
  2. log4j.appender.stdout=org.apache.log4j.ConsoleAppender
  3. log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
  4. log4j.appender.stdout.layout.ConversionPattern=%d %p [%c] - %m%n

java

  1. package top.malaoshi;
  2. import java.io.File;
  3. import java.util.List;
  4. import org.apache.commons.io.FileUtils;
  5. import org.apache.hadoop.conf.Configuration;
  6. import org.apache.hadoop.fs.Path;
  7. import org.apache.hadoop.io.IOUtils;
  8. import org.apache.hadoop.io.IntWritable;
  9. import org.apache.hadoop.io.SequenceFile;
  10. import org.apache.hadoop.io.Text;
  11. import org.apache.hadoop.io.compress.CompressionCodec;
  12. import org.apache.hadoop.io.compress.GzipCodec;
  13. public class Write {
  14. public static void main(String[] args) throws Exception {
  15. File file=new File("C:/Users/mym/Desktop/4.txt");
  16. List<String> list=FileUtils.readLines(file);
  17. // 设置客户端运行身份 以root去操作访问HDFS
  18. System.setProperty("HADOOP_USER_NAME", "root");
  19. // Configuration 用于指定相关参数属性
  20. Configuration conf = new Configuration();
  21. // sequence file key、value
  22. IntWritable key = new IntWritable();
  23. Text value = new Text();
  24. // 构造Writer参数属性
  25. SequenceFile.Writer writer = null;
  26. //使用 gzip 构建压缩编码器
  27. CompressionCodec gzip = new GzipCodec();
  28. /*
  29. * 文件保存路径
  30. * namenode端口号
  31. * 文件名后面加上压缩格式的后缀
  32. */
  33. Path path=new Path("hdfs://hadoop1:8020/"+file.getName()+gzip.getDefaultExtension());
  34. SequenceFile.Writer.Option optPath = SequenceFile.Writer.file(path);
  35. //指定key的class
  36. SequenceFile.Writer.Option optKey = SequenceFile.Writer.keyClass(key.getClass());
  37. //指定value的class
  38. SequenceFile.Writer.Option optVal = SequenceFile.Writer.valueClass(value.getClass());
  39. /*
  40. * 指定压缩类型:record(或block)
  41. * 指定压缩编码器:gzip
  42. */
  43. SequenceFile.Writer.Option optCom = SequenceFile.Writer.compression(SequenceFile.CompressionType.RECORD, gzip);
  44. try {
  45. writer = SequenceFile.createWriter(conf, optPath, optKey, optVal, optCom);
  46. for (int i = 0,len=list.size(); i < len; i++) {
  47. key.set(i);
  48. value.set(list.get(i));
  49. System.out.println(key+"==="+value);
  50. writer.append(key, value);
  51. }
  52. }catch(Exception e){
  53. e.printStackTrace();
  54. } finally {
  55. IOUtils.closeStream(writer);
  56. }
  57. }
  58. }

原文出处:http://malaoshi.top/show_1IX2AIA6dLbk.html