mapreduce针对n列orc文件的读写

news/2024/11/29 17:45:00/

建300列的ORC表,可以用execl简单建一个300列,10000行的数据,复制成以tab分割的txt文件
hdfs dfs -put ddd.txt hdfs://hadoop:9000/tmp/input/

create table test_orc_300(
c1  string      , 
c2  string      , 
c3  string      , 
c4  string      , 
c5  string      , 
c6  string      , 
c7  string      , 
c8  string      , 
c9  string      , 
c10 string      , 
c11 string      , 
c12 string      , 
c13 string      , 
c14 string      , 
c15 string      , 
c16 string      , 
c17 string      , 
c18 string      , 
c19 string      , 
c20 string      , 
c21 string      , 
c22 string      , 
c23 string      , 
c24 string      , 
c25 string      , 
c26 string      , 
c27 string      , 
c28 string      , 
c29 string      , 
c30 string      , 
c31 string      , 
c32 string      , 
c33 string      , 
c34 string      , 
c35 string      , 
c36 string      , 
c37 string      , 
c38 string      , 
c39 string      , 
c40 string      , 
c41 string      , 
c42 string      , 
c43 string      , 
c44 string      , 
c45 string      , 
c46 string      , 
c47 string      , 
c48 string      , 
c49 string      , 
c50 string      , 
c51 string      , 
c52 string      , 
c53 string      , 
c54 string      , 
c55 string      , 
c56 string      , 
c57 string      , 
c58 string      , 
c59 string      , 
c60 string      , 
c61 string      , 
c62 string      , 
c63 string      , 
c64 string      , 
c65 string      , 
c66 string      , 
c67 string      , 
c68 string      , 
c69 string      , 
c70 string      , 
c71 string      , 
c72 string      , 
c73 string      , 
c74 string      , 
c75 string      , 
c76 string      , 
c77 string      , 
c78 string      , 
c79 string      , 
c80 string      , 
c81 string      , 
c82 string      , 
c83 string      , 
c84 string      , 
c85 string      , 
c86 string      , 
c87 string      , 
c88 string      , 
c89 string      , 
c90 string      , 
c91 string      , 
c92 string      , 
c93 string      , 
c94 string      , 
c95 string      , 
c96 string      , 
c97 string      , 
c98 string      , 
c99 string      , 
c100    string    , 
c101    string    , 
c102    string    , 
c103    string    , 
c104    string    , 
c105    string    , 
c106    string    , 
c107    string    , 
c108    string    , 
c109    string    , 
c110    string    , 
c111    string    , 
c112    string    , 
c113    string    , 
c114    string    , 
c115    string    , 
c116    string    , 
c117    string    , 
c118    string    , 
c119    string    , 
c120    string    , 
c121    string    , 
c122    string    , 
c123    string    , 
c124    string    , 
c125    string    , 
c126    string    , 
c127    string    , 
c128    string    , 
c129    string    , 
c130    string    , 
c131    string    , 
c132    string    , 
c133    string    , 
c134    string    , 
c135    string    , 
c136    string    , 
c137    string    , 
c138    string    , 
c139    string    , 
c140    string    , 
c141    string    , 
c142    string    , 
c143    string    , 
c144    string    , 
c145    string    , 
c146    string    , 
c147    string    , 
c148    string    , 
c149    string    , 
c150    string    , 
c151    string    , 
c152    string    , 
c153    string    , 
c154    string    , 
c155    string    , 
c156    string    , 
c157    string    , 
c158    string    , 
c159    string    , 
c160    string    , 
c161    string    , 
c162    string    , 
c163    string    , 
c164    string    , 
c165    string    , 
c166    string    , 
c167    string    , 
c168    string    , 
c169    string    , 
c170    string    , 
c171    string    , 
c172    string    , 
c173    string    , 
c174    string    , 
c175    string    , 
c176    string    , 
c177    string    , 
c178    string    , 
c179    string    , 
c180    string    , 
c181    string    , 
c182    string    , 
c183    string    , 
c184    string    , 
c185    string    , 
c186    string    , 
c187    string    , 
c188    string    , 
c189    string    , 
c190    string    , 
c191    string    , 
c192    string    , 
c193    string    , 
c194    string    , 
c195    string    , 
c196    string    , 
c197    string    , 
c198    string    , 
c199    string    , 
c200    string    , 
c201    string    , 
c202    string    , 
c203    string    , 
c204    string    , 
c205    string    , 
c206    string    , 
c207    string    , 
c208    string    , 
c209    string    , 
c210    string    , 
c211    string    , 
c212    string    , 
c213    string    , 
c214    string    , 
c215    string    , 
c216    string    , 
c217    string    , 
c218    string    , 
c219    string    , 
c220    string    , 
c221    string    , 
c222    string    , 
c223    string    , 
c224    string    , 
c225    string    , 
c226    string    , 
c227    string    , 
c228    string    , 
c229    string    , 
c230    string    , 
c231    string    , 
c232    string    , 
c233    string    , 
c234    string    , 
c235    string    , 
c236    string    , 
c237    string    , 
c238    string    , 
c239    string    , 
c240    string    , 
c241    string    , 
c242    string    , 
c243    string    , 
c244    string    , 
c245    string    , 
c246    string    , 
c247    string    , 
c248    string    , 
c249    string    , 
c250    string    , 
c251    string    , 
c252    string    , 
c253    string    , 
c254    string    , 
c255    string    , 
c256    string    , 
c257    string    , 
c258    string    , 
c259    string    , 
c260    string    , 
c261    string    , 
c262    string    , 
c263    string    , 
c264    string    , 
c265    string    , 
c266    string    , 
c267    string    , 
c268    string    , 
c269    string    , 
c270    string    , 
c271    string    , 
c272    string    , 
c273    string    , 
c274    string    , 
c275    string    , 
c276    string    , 
c277    string    , 
c278    string    , 
c279    string    , 
c280    string    , 
c281    string    , 
c282    string    , 
c283    string    , 
c284    string    , 
c285    string    , 
c286    string    , 
c287    string    , 
c288    string    , 
c289    string    , 
c290    string    , 
c291    string    , 
c292    string    , 
c293    string    , 
c294    string    , 
c295    string    , 
c296    string    , 
c297    string    , 
c298    string    , 
c299    string    , 
c300    string      
) stored as orc ; 

ORC读文件

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.orc.mapred.OrcStruct;
import org.apache.orc.mapreduce.OrcInputFormat;
import java.io.IOException;public class OrcReaderMR {public static class OrcMap extends Mapper<NullWritable,OrcStruct,NullWritable,Text>{private Text text = new Text();public void map(NullWritable key, OrcStruct value,Context output) throws IOException, InterruptedException {StringBuffer bf = new StringBuffer();for(int i=0;i<value.getNumFields();i++){WritableComparable fieldValue = value.getFieldValue(i);bf.append(fieldValue.toString()).append("\t");}text.set(bf.toString());output.write(NullWritable.get(),text);}}public static void main(String[] args) throws Exception {Configuration conf = new Configuration();conf.set("fs.defaultFS", "hdfs://hadoop:9000");conf.set("mapreduce.application.classpath", System.getProperty("user.dir"));System.setProperty("HADOOP_USER_NAME", "root");//设置开发环境变量System.setProperty("hadoop.home.dir", "/opt/hadoop-2.7.3/");Job job = Job.getInstance(conf);job.setJarByClass(OrcReaderMR.class);job.setJobName("OrcReaderMR");job.setMapperClass(OrcMap.class);job.setInputFormatClass(OrcInputFormat.class);job.setNumReduceTasks(0);job.setOutputFormatClass(TextOutputFormat.class);// 指定该mapreduce程序数据的输入路径Path inputPath = new Path("/user/hive/warehouse/test_orc_300");// 指定该mapreduce程序数据的输出路径Path outputPath = new Path("/user/hive/warehouse/test_orc_300_out");FileSystem fs = FileSystem.get(conf);if (fs.exists(outputPath)) {fs.delete(outputPath, true);}FileInputFormat.setInputPaths(job, inputPath);FileOutputFormat.setOutputPath(job, outputPath);boolean waitForCompletion = job.waitForCompletion(true);System.exit(waitForCompletion ? 0 : 1);}
}

运行完后查看文件内容

hdfs dfs -cat hdfs://hadoop:9000/user/hive/warehouse/test_orc_300_out/part-m-00000

ORC写文件

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.orc.OrcConf;
import org.apache.orc.TypeDescription;
import org.apache.orc.mapred.OrcStruct;
import org.apache.orc.mapreduce.OrcOutputFormat;
import parquet.filter2.predicate.Operators;import java.io.IOException;
import java.util.ArrayList;
import java.util.List;public class OrcWriterMR {public static class OrcWriterMapperextends Mapper<LongWritable,Text,NullWritable,OrcStruct> {private TypeDescription schema =TypeDescription.fromString("struct<c1:string,c2:string,c3:string,c4:string,c5:string,c6:string,c7:string,c8:string,c9:string,c10:string,c11:string,c12:string,c13:string,c14:string,c15:string,c16:string,c17:string,c18:string,c19:string,c20:string,c21:string,c22:string,c23:string,c24:string,c25:string,c26:string,c27:string,c28:string,c29:string,c30:string,c31:string,c32:string,c33:string,c34:string,c35:string,c36:string,c37:string,c38:string,c39:string,c40:string,c41:string,c42:string,c43:string,c44:string,c45:string,c46:string,c47:string,c48:string,c49:string,c50:string,c51:string,c52:string,c53:string,c54:string,c55:string,c56:string,c57:string,c58:string,c59:string,c60:string,c61:string,c62:string,c63:string,c64:string,c65:string,c66:string,c67:string,c68:string,c69:string,c70:string,c71:string,c72:string,c73:string,c74:string,c75:string,c76:string,c77:string,c78:string,c79:string,c80:string,c81:string,c82:string,c83:string,c84:string,c85:string,c86:string,c87:string,c88:string,c89:string,c90:string,c91:string,c92:string,c93:string,c94:string,c95:string,c96:string,c97:string,c98:string,c99:string,c100:string,c101:string,c102:string,c103:string,c104:string,c105:string,c106:string,c107:string,c108:string,c109:string,c110:string,c111:string,c112:string,c113:string,c114:string,c115:string,c116:string,c117:string,c118:string,c119:string,c120:string,c121:string,c122:string,c123:string,c124:string,c125:string,c126:string,c127:string,c128:string,c129:string,c130:string,c131:string,c132:string,c133:string,c134:string,c135:string,c136:string,c137:string,c138:string,c139:string,c140:string,c141:string,c142:string,c143:string,c144:string,c145:string,c146:string,c147:string,c148:string,c149:string,c150:string,c151:string,c152:string,c153:string,c154:string,c155:string,c156:string,c157:string,c158:string,c159:string,c160:string,c161:string,c162:string,c163:string,c164:string,c165:string,c166:string,c167:string,c168:string,c169:string,c170:string,c171:string,c172:string,c173:string,c174:string,c175:string,c176:string,c177:string,c178:string,c179:string,c180:string,c181:string,c182:string,c183:string,c184:string,c185:string,c186:string,c187:string,c188:string,c189:string,c190:string,c191:string,c192:string,c193:string,c194:string,c195:string,c196:string,c197:string,c198:string,c199:string,c200:string,c201:string,c202:string,c203:string,c204:string,c205:string,c206:string,c207:string,c208:string,c209:string,c210:string,c211:string,c212:string,c213:string,c214:string,c215:string,c216:string,c217:string,c218:string,c219:string,c220:string,c221:string,c222:string,c223:string,c224:string,c225:string,c226:string,c227:string,c228:string,c229:string,c230:string,c231:string,c232:string,c233:string,c234:string,c235:string,c236:string,c237:string,c238:string,c239:string,c240:string,c241:string,c242:string,c243:string,c244:string,c245:string,c246:string,c247:string,c248:string,c249:string,c250:string,c251:string,c252:string,c253:string,c254:string,c255:string,c256:string,c257:string,c258:string,c259:string,c260:string,c261:string,c262:string,c263:string,c264:string,c265:string,c266:string,c267:string,c268:string,c269:string,c270:string,c271:string,c272:string,c273:string,c274:string,c275:string,c276:string,c277:string,c278:string,c279:string,c280:string,c281:string,c282:string,c283:string,c284:string,c285:string,c286:string,c287:string,c288:string,c289:string,c290:string,c291:string,c292:string,c293:string,c294:string,c295:string,c296:string,c297:string,c298:string,c299:string,c300:string>");private OrcStruct pair = (OrcStruct) OrcStruct.createValue(schema);private final NullWritable nada = NullWritable.get();// private IntWritable age = new IntWritable();public void map(LongWritable key, Text value,Context output) throws IOException, InterruptedException {List<Text> list =new ArrayList<Text>();for(int i=0;i<300;i++){Text text = new Text();list.add(text);}if(!"".equals(value.toString())){String[] arr = value.toString().split("\t");for(int n=0;n<300;n++) {list.get(n).set(arr[n]);pair.setFieldValue(n, list.get(n));}output.write(nada, pair);}}}public static void main(String[] args) throws Exception {// 指定mapreduce运行的hdfs相关的参数Configuration conf = new Configuration();conf.set("fs.defaultFS", "hdfs://hadoop:9000");conf.set("mapreduce.application.classpath", System.getProperty("user.dir"));OrcConf.MAPRED_OUTPUT_SCHEMA.setString(conf,"struct<c1:string,c2:string,c3:string,c4:string,c5:string,c6:string,c7:string,c8:string,c9:string,c10:string,c11:string,c12:string,c13:string,c14:string,c15:string,c16:string,c17:string,c18:string,c19:string,c20:string,c21:string,c22:string,c23:string,c24:string,c25:string,c26:string,c27:string,c28:string,c29:string,c30:string,c31:string,c32:string,c33:string,c34:string,c35:string,c36:string,c37:string,c38:string,c39:string,c40:string,c41:string,c42:string,c43:string,c44:string,c45:string,c46:string,c47:string,c48:string,c49:string,c50:string,c51:string,c52:string,c53:string,c54:string,c55:string,c56:string,c57:string,c58:string,c59:string,c60:string,c61:string,c62:string,c63:string,c64:string,c65:string,c66:string,c67:string,c68:string,c69:string,c70:string,c71:string,c72:string,c73:string,c74:string,c75:string,c76:string,c77:string,c78:string,c79:string,c80:string,c81:string,c82:string,c83:string,c84:string,c85:string,c86:string,c87:string,c88:string,c89:string,c90:string,c91:string,c92:string,c93:string,c94:string,c95:string,c96:string,c97:string,c98:string,c99:string,c100:string,c101:string,c102:string,c103:string,c104:string,c105:string,c106:string,c107:string,c108:string,c109:string,c110:string,c111:string,c112:string,c113:string,c114:string,c115:string,c116:string,c117:string,c118:string,c119:string,c120:string,c121:string,c122:string,c123:string,c124:string,c125:string,c126:string,c127:string,c128:string,c129:string,c130:string,c131:string,c132:string,c133:string,c134:string,c135:string,c136:string,c137:string,c138:string,c139:string,c140:string,c141:string,c142:string,c143:string,c144:string,c145:string,c146:string,c147:string,c148:string,c149:string,c150:string,c151:string,c152:string,c153:string,c154:string,c155:string,c156:string,c157:string,c158:string,c159:string,c160:string,c161:string,c162:string,c163:string,c164:string,c165:string,c166:string,c167:string,c168:string,c169:string,c170:string,c171:string,c172:string,c173:string,c174:string,c175:string,c176:string,c177:string,c178:string,c179:string,c180:string,c181:string,c182:string,c183:string,c184:string,c185:string,c186:string,c187:string,c188:string,c189:string,c190:string,c191:string,c192:string,c193:string,c194:string,c195:string,c196:string,c197:string,c198:string,c199:string,c200:string,c201:string,c202:string,c203:string,c204:string,c205:string,c206:string,c207:string,c208:string,c209:string,c210:string,c211:string,c212:string,c213:string,c214:string,c215:string,c216:string,c217:string,c218:string,c219:string,c220:string,c221:string,c222:string,c223:string,c224:string,c225:string,c226:string,c227:string,c228:string,c229:string,c230:string,c231:string,c232:string,c233:string,c234:string,c235:string,c236:string,c237:string,c238:string,c239:string,c240:string,c241:string,c242:string,c243:string,c244:string,c245:string,c246:string,c247:string,c248:string,c249:string,c250:string,c251:string,c252:string,c253:string,c254:string,c255:string,c256:string,c257:string,c258:string,c259:string,c260:string,c261:string,c262:string,c263:string,c264:string,c265:string,c266:string,c267:string,c268:string,c269:string,c270:string,c271:string,c272:string,c273:string,c274:string,c275:string,c276:string,c277:string,c278:string,c279:string,c280:string,c281:string,c282:string,c283:string,c284:string,c285:string,c286:string,c287:string,c288:string,c289:string,c290:string,c291:string,c292:string,c293:string,c294:string,c295:string,c296:string,c297:string,c298:string,c299:string,c300:string>");//分布式集群设置// conf.set("mapred.jar", System.getProperty("user.dir")+"/WordCount.jar");System.setProperty("HADOOP_USER_NAME", "root");//设置开发环境变量System.setProperty("hadoop.home.dir", "/opt/hadoop-2.7.3/");// 设置mapreduce运行模式,这也是默认值// conf.set("mapreduce.framework.name", "yarn");// conf.set("yarn.resourcemanager.hostname", "hadoop");// 获取job对象Job job = Job.getInstance(conf);// 设置jar包所在路径job.setJarByClass(OrcWriterMR.class);job.setJobName("OrcWriterMR");job.setNumReduceTasks(0);// 指定mapper类和reducer类job.setMapperClass(OrcWriterMapper.class);job.setInputFormatClass(TextInputFormat.class);job.setOutputFormatClass(OrcOutputFormat.class);// 指定该mapreduce程序数据的输入路径Path inputPath = new Path("/tmp/input/300.txt");// 指定该mapreduce程序数据的输出路径Path outputPath = new Path("/user/hive/warehouse/test_orc_300");FileSystem fs = FileSystem.get(conf);if (fs.exists(outputPath)) {fs.delete(outputPath, true);}FileInputFormat.setInputPaths(job, inputPath);FileOutputFormat.setOutputPath(job, outputPath);boolean waitForCompletion = job.waitForCompletion(true);System.exit(waitForCompletion ? 0 : 1);}
}

运行完后查看表的内容
select * from test_orc_300;


http://www.ppmy.cn/news/382819.html

相关文章

Neutron DVR 分布式虚拟路由(Neutron Distributed Virtual Routing)

Neutron 作为 OpenStack 一个基础性关键服务&#xff0c;高可用性&#xff08;HA&#xff09;和扩展性是它的基本需求之一。对 neutron server 来说&#xff0c;因为它是无状态的&#xff0c;我们可以使用负载均衡器&#xff08;Load Balancer&#xff09;比如 HAProxy 来实现其…

正确理解 CSS 权值(不应存在进制)

这是我的博客。 欢迎在 freeCodeCamp 社区阅读原文。 前言 直接说结论&#xff0c;CSS 选择器&#xff08;selector&#xff09;的权值&#xff08;以下直接称为“CSS 权值”&#xff09;不应以进制来理解&#xff0c;因此进制既不是 10&#xff0c;100&#xff0c;也不是 25…

微信小程序知识云开发

一个小程序最多5个服务类目&#xff0c;一个月可以修改3次类目 小程序侵权投诉的发起与应对 软件著作权作品登记证书 实现小程序支付功能 如何借助官方支付api简单、高效率地实现小程序支付功能 借助小程序云开发实现 只需要一个简单的云函数 实现微信小程序支付功能 ex…

【Vue】从量子链网页钱包看vue项目结构以及开发部署最佳实践

项目介绍 qtum-web-wallet 是量子链推出的网页版钱包。 项目地址 https://github.com/qtumproject/qtum-web-wallet 项目采用vue搭建。 通过网页可以实现钱包的创建备份转账以及智能合约的部署调用功能。 项目结构 这个vue项目采用了vue-loader,所以整体用的是嵌套结构。 b…

Android P中的AVB校验

avb校验功能主要是由external/avb/libavb库实现的&#xff0c;该库主要完成的工作包括各个分区镜像的校验&#xff0c;签名验证&#xff0c;以及vbmeta数据的解析&#xff0c;包括了各种flags的处理以及dm-verity所需要的参数解析。avb校验库的主入口为 avb_slot_verify(AvbOps…

Android P 如何挂载system镜像到根目录

Android O/P 版本以来&#xff0c;谷歌加入了A/B system的特性&#xff0c;此时ramdisk和system是一起放在同一个system.img镜像中的。而系统起来之后也就不存在system分区了&#xff0c;而是直接把system镜像挂载到/根目录上。那么这个操作是怎么进行的呢&#xff1f; system.…

[转载型] Neutron 系列 (17): Neutron 分布式虚拟路由【上】

http://www.aboutyun.com/forum.php?modviewthread&tid16860&highlightneutron%2B%2B%CF%B5%C1%D0 1.路由的相关知识有哪些&#xff1f; 2.Neutron 的传统和 DVR Router是什么&#xff1f; 3.DVR的功能有哪些&#xff1f; Neutron 作为 OpenStack 一个基础性关键服务&…

[连载型] Neutron 系列 (17): Neutron 分布式虚拟路由【上】

问题导读&#xff1a; 1.路由的相关知识有哪些&#xff1f; 2.Neutron 的传统和 DVR Router是什么&#xff1f; 3.DVR的功能有哪些&#xff1f; Neutron 作为 OpenStack 一个基础性关键服务&#xff0c;高可用性&#xff08;HA&#xff09;和扩展性是它的基本需求之一。对 neut…