建300列的ORC表,可以用execl简单建一个300列,10000行的数据,复制成以tab分割的txt文件
hdfs dfs -put ddd.txt hdfs://hadoop:9000/tmp/input/
create table test_orc_300(
c1 string ,
c2 string ,
c3 string ,
c4 string ,
c5 string ,
c6 string ,
c7 string ,
c8 string ,
c9 string ,
c10 string ,
c11 string ,
c12 string ,
c13 string ,
c14 string ,
c15 string ,
c16 string ,
c17 string ,
c18 string ,
c19 string ,
c20 string ,
c21 string ,
c22 string ,
c23 string ,
c24 string ,
c25 string ,
c26 string ,
c27 string ,
c28 string ,
c29 string ,
c30 string ,
c31 string ,
c32 string ,
c33 string ,
c34 string ,
c35 string ,
c36 string ,
c37 string ,
c38 string ,
c39 string ,
c40 string ,
c41 string ,
c42 string ,
c43 string ,
c44 string ,
c45 string ,
c46 string ,
c47 string ,
c48 string ,
c49 string ,
c50 string ,
c51 string ,
c52 string ,
c53 string ,
c54 string ,
c55 string ,
c56 string ,
c57 string ,
c58 string ,
c59 string ,
c60 string ,
c61 string ,
c62 string ,
c63 string ,
c64 string ,
c65 string ,
c66 string ,
c67 string ,
c68 string ,
c69 string ,
c70 string ,
c71 string ,
c72 string ,
c73 string ,
c74 string ,
c75 string ,
c76 string ,
c77 string ,
c78 string ,
c79 string ,
c80 string ,
c81 string ,
c82 string ,
c83 string ,
c84 string ,
c85 string ,
c86 string ,
c87 string ,
c88 string ,
c89 string ,
c90 string ,
c91 string ,
c92 string ,
c93 string ,
c94 string ,
c95 string ,
c96 string ,
c97 string ,
c98 string ,
c99 string ,
c100 string ,
c101 string ,
c102 string ,
c103 string ,
c104 string ,
c105 string ,
c106 string ,
c107 string ,
c108 string ,
c109 string ,
c110 string ,
c111 string ,
c112 string ,
c113 string ,
c114 string ,
c115 string ,
c116 string ,
c117 string ,
c118 string ,
c119 string ,
c120 string ,
c121 string ,
c122 string ,
c123 string ,
c124 string ,
c125 string ,
c126 string ,
c127 string ,
c128 string ,
c129 string ,
c130 string ,
c131 string ,
c132 string ,
c133 string ,
c134 string ,
c135 string ,
c136 string ,
c137 string ,
c138 string ,
c139 string ,
c140 string ,
c141 string ,
c142 string ,
c143 string ,
c144 string ,
c145 string ,
c146 string ,
c147 string ,
c148 string ,
c149 string ,
c150 string ,
c151 string ,
c152 string ,
c153 string ,
c154 string ,
c155 string ,
c156 string ,
c157 string ,
c158 string ,
c159 string ,
c160 string ,
c161 string ,
c162 string ,
c163 string ,
c164 string ,
c165 string ,
c166 string ,
c167 string ,
c168 string ,
c169 string ,
c170 string ,
c171 string ,
c172 string ,
c173 string ,
c174 string ,
c175 string ,
c176 string ,
c177 string ,
c178 string ,
c179 string ,
c180 string ,
c181 string ,
c182 string ,
c183 string ,
c184 string ,
c185 string ,
c186 string ,
c187 string ,
c188 string ,
c189 string ,
c190 string ,
c191 string ,
c192 string ,
c193 string ,
c194 string ,
c195 string ,
c196 string ,
c197 string ,
c198 string ,
c199 string ,
c200 string ,
c201 string ,
c202 string ,
c203 string ,
c204 string ,
c205 string ,
c206 string ,
c207 string ,
c208 string ,
c209 string ,
c210 string ,
c211 string ,
c212 string ,
c213 string ,
c214 string ,
c215 string ,
c216 string ,
c217 string ,
c218 string ,
c219 string ,
c220 string ,
c221 string ,
c222 string ,
c223 string ,
c224 string ,
c225 string ,
c226 string ,
c227 string ,
c228 string ,
c229 string ,
c230 string ,
c231 string ,
c232 string ,
c233 string ,
c234 string ,
c235 string ,
c236 string ,
c237 string ,
c238 string ,
c239 string ,
c240 string ,
c241 string ,
c242 string ,
c243 string ,
c244 string ,
c245 string ,
c246 string ,
c247 string ,
c248 string ,
c249 string ,
c250 string ,
c251 string ,
c252 string ,
c253 string ,
c254 string ,
c255 string ,
c256 string ,
c257 string ,
c258 string ,
c259 string ,
c260 string ,
c261 string ,
c262 string ,
c263 string ,
c264 string ,
c265 string ,
c266 string ,
c267 string ,
c268 string ,
c269 string ,
c270 string ,
c271 string ,
c272 string ,
c273 string ,
c274 string ,
c275 string ,
c276 string ,
c277 string ,
c278 string ,
c279 string ,
c280 string ,
c281 string ,
c282 string ,
c283 string ,
c284 string ,
c285 string ,
c286 string ,
c287 string ,
c288 string ,
c289 string ,
c290 string ,
c291 string ,
c292 string ,
c293 string ,
c294 string ,
c295 string ,
c296 string ,
c297 string ,
c298 string ,
c299 string ,
c300 string
) stored as orc ;
ORC读文件
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.orc.mapred.OrcStruct;
import org.apache.orc.mapreduce.OrcInputFormat;
import java.io.IOException;public class OrcReaderMR {public static class OrcMap extends Mapper<NullWritable,OrcStruct,NullWritable,Text>{private Text text = new Text();public void map(NullWritable key, OrcStruct value,Context output) throws IOException, InterruptedException {StringBuffer bf = new StringBuffer();for(int i=0;i<value.getNumFields();i++){WritableComparable fieldValue = value.getFieldValue(i);bf.append(fieldValue.toString()).append("\t");}text.set(bf.toString());output.write(NullWritable.get(),text);}}public static void main(String[] args) throws Exception {Configuration conf = new Configuration();conf.set("fs.defaultFS", "hdfs://hadoop:9000");conf.set("mapreduce.application.classpath", System.getProperty("user.dir"));System.setProperty("HADOOP_USER_NAME", "root");//设置开发环境变量System.setProperty("hadoop.home.dir", "/opt/hadoop-2.7.3/");Job job = Job.getInstance(conf);job.setJarByClass(OrcReaderMR.class);job.setJobName("OrcReaderMR");job.setMapperClass(OrcMap.class);job.setInputFormatClass(OrcInputFormat.class);job.setNumReduceTasks(0);job.setOutputFormatClass(TextOutputFormat.class);// 指定该mapreduce程序数据的输入路径Path inputPath = new Path("/user/hive/warehouse/test_orc_300");// 指定该mapreduce程序数据的输出路径Path outputPath = new Path("/user/hive/warehouse/test_orc_300_out");FileSystem fs = FileSystem.get(conf);if (fs.exists(outputPath)) {fs.delete(outputPath, true);}FileInputFormat.setInputPaths(job, inputPath);FileOutputFormat.setOutputPath(job, outputPath);boolean waitForCompletion = job.waitForCompletion(true);System.exit(waitForCompletion ? 0 : 1);}
}
运行完后查看文件内容
hdfs dfs -cat hdfs://hadoop:9000/user/hive/warehouse/test_orc_300_out/part-m-00000
ORC写文件
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.orc.OrcConf;
import org.apache.orc.TypeDescription;
import org.apache.orc.mapred.OrcStruct;
import org.apache.orc.mapreduce.OrcOutputFormat;
import parquet.filter2.predicate.Operators;import java.io.IOException;
import java.util.ArrayList;
import java.util.List;public class OrcWriterMR {public static class OrcWriterMapperextends Mapper<LongWritable,Text,NullWritable,OrcStruct> {private TypeDescription schema =TypeDescription.fromString("struct<c1:string,c2:string,c3:string,c4:string,c5:string,c6:string,c7:string,c8:string,c9:string,c10:string,c11:string,c12:string,c13:string,c14:string,c15:string,c16:string,c17:string,c18:string,c19:string,c20:string,c21:string,c22:string,c23:string,c24:string,c25:string,c26:string,c27:string,c28:string,c29:string,c30:string,c31:string,c32:string,c33:string,c34:string,c35:string,c36:string,c37:string,c38:string,c39:string,c40:string,c41:string,c42:string,c43:string,c44:string,c45:string,c46:string,c47:string,c48:string,c49:string,c50:string,c51:string,c52:string,c53:string,c54:string,c55:string,c56:string,c57:string,c58:string,c59:string,c60:string,c61:string,c62:string,c63:string,c64:string,c65:string,c66:string,c67:string,c68:string,c69:string,c70:string,c71:string,c72:string,c73:string,c74:string,c75:string,c76:string,c77:string,c78:string,c79:string,c80:string,c81:string,c82:string,c83:string,c84:string,c85:string,c86:string,c87:string,c88:string,c89:string,c90:string,c91:string,c92:string,c93:string,c94:string,c95:string,c96:string,c97:string,c98:string,c99:string,c100:string,c101:string,c102:string,c103:string,c104:string,c105:string,c106:string,c107:string,c108:string,c109:string,c110:string,c111:string,c112:string,c113:string,c114:string,c115:string,c116:string,c117:string,c118:string,c119:string,c120:string,c121:string,c122:string,c123:string,c124:string,c125:string,c126:string,c127:string,c128:string,c129:string,c130:string,c131:string,c132:string,c133:string,c134:string,c135:string,c136:string,c137:string,c138:string,c139:string,c140:string,c141:string,c142:string,c143:string,c144:string,c145:string,c146:string,c147:string,c148:string,c149:string,c150:string,c151:string,c152:string,c153:string,c154:string,c155:string,c156:string,c157:string,c158:string,c159:string,c160:string,c161:string,c162:string,c163:string,c164:string,c165:string,c166:string,c167:string,c168:string,c169:string,c170:string,c171:string,c172:string,c173:string,c174:string,c175:string,c176:string,c177:string,c178:string,c179:string,c180:string,c181:string,c182:string,c183:string,c184:string,c185:string,c186:string,c187:string,c188:string,c189:string,c190:string,c191:string,c192:string,c193:string,c194:string,c195:string,c196:string,c197:string,c198:string,c199:string,c200:string,c201:string,c202:string,c203:string,c204:string,c205:string,c206:string,c207:string,c208:string,c209:string,c210:string,c211:string,c212:string,c213:string,c214:string,c215:string,c216:string,c217:string,c218:string,c219:string,c220:string,c221:string,c222:string,c223:string,c224:string,c225:string,c226:string,c227:string,c228:string,c229:string,c230:string,c231:string,c232:string,c233:string,c234:string,c235:string,c236:string,c237:string,c238:string,c239:string,c240:string,c241:string,c242:string,c243:string,c244:string,c245:string,c246:string,c247:string,c248:string,c249:string,c250:string,c251:string,c252:string,c253:string,c254:string,c255:string,c256:string,c257:string,c258:string,c259:string,c260:string,c261:string,c262:string,c263:string,c264:string,c265:string,c266:string,c267:string,c268:string,c269:string,c270:string,c271:string,c272:string,c273:string,c274:string,c275:string,c276:string,c277:string,c278:string,c279:string,c280:string,c281:string,c282:string,c283:string,c284:string,c285:string,c286:string,c287:string,c288:string,c289:string,c290:string,c291:string,c292:string,c293:string,c294:string,c295:string,c296:string,c297:string,c298:string,c299:string,c300:string>");private OrcStruct pair = (OrcStruct) OrcStruct.createValue(schema);private final NullWritable nada = NullWritable.get();// private IntWritable age = new IntWritable();public void map(LongWritable key, Text value,Context output) throws IOException, InterruptedException {List<Text> list =new ArrayList<Text>();for(int i=0;i<300;i++){Text text = new Text();list.add(text);}if(!"".equals(value.toString())){String[] arr = value.toString().split("\t");for(int n=0;n<300;n++) {list.get(n).set(arr[n]);pair.setFieldValue(n, list.get(n));}output.write(nada, pair);}}}public static void main(String[] args) throws Exception {// 指定mapreduce运行的hdfs相关的参数Configuration conf = new Configuration();conf.set("fs.defaultFS", "hdfs://hadoop:9000");conf.set("mapreduce.application.classpath", System.getProperty("user.dir"));OrcConf.MAPRED_OUTPUT_SCHEMA.setString(conf,"struct<c1:string,c2:string,c3:string,c4:string,c5:string,c6:string,c7:string,c8:string,c9:string,c10:string,c11:string,c12:string,c13:string,c14:string,c15:string,c16:string,c17:string,c18:string,c19:string,c20:string,c21:string,c22:string,c23:string,c24:string,c25:string,c26:string,c27:string,c28:string,c29:string,c30:string,c31:string,c32:string,c33:string,c34:string,c35:string,c36:string,c37:string,c38:string,c39:string,c40:string,c41:string,c42:string,c43:string,c44:string,c45:string,c46:string,c47:string,c48:string,c49:string,c50:string,c51:string,c52:string,c53:string,c54:string,c55:string,c56:string,c57:string,c58:string,c59:string,c60:string,c61:string,c62:string,c63:string,c64:string,c65:string,c66:string,c67:string,c68:string,c69:string,c70:string,c71:string,c72:string,c73:string,c74:string,c75:string,c76:string,c77:string,c78:string,c79:string,c80:string,c81:string,c82:string,c83:string,c84:string,c85:string,c86:string,c87:string,c88:string,c89:string,c90:string,c91:string,c92:string,c93:string,c94:string,c95:string,c96:string,c97:string,c98:string,c99:string,c100:string,c101:string,c102:string,c103:string,c104:string,c105:string,c106:string,c107:string,c108:string,c109:string,c110:string,c111:string,c112:string,c113:string,c114:string,c115:string,c116:string,c117:string,c118:string,c119:string,c120:string,c121:string,c122:string,c123:string,c124:string,c125:string,c126:string,c127:string,c128:string,c129:string,c130:string,c131:string,c132:string,c133:string,c134:string,c135:string,c136:string,c137:string,c138:string,c139:string,c140:string,c141:string,c142:string,c143:string,c144:string,c145:string,c146:string,c147:string,c148:string,c149:string,c150:string,c151:string,c152:string,c153:string,c154:string,c155:string,c156:string,c157:string,c158:string,c159:string,c160:string,c161:string,c162:string,c163:string,c164:string,c165:string,c166:string,c167:string,c168:string,c169:string,c170:string,c171:string,c172:string,c173:string,c174:string,c175:string,c176:string,c177:string,c178:string,c179:string,c180:string,c181:string,c182:string,c183:string,c184:string,c185:string,c186:string,c187:string,c188:string,c189:string,c190:string,c191:string,c192:string,c193:string,c194:string,c195:string,c196:string,c197:string,c198:string,c199:string,c200:string,c201:string,c202:string,c203:string,c204:string,c205:string,c206:string,c207:string,c208:string,c209:string,c210:string,c211:string,c212:string,c213:string,c214:string,c215:string,c216:string,c217:string,c218:string,c219:string,c220:string,c221:string,c222:string,c223:string,c224:string,c225:string,c226:string,c227:string,c228:string,c229:string,c230:string,c231:string,c232:string,c233:string,c234:string,c235:string,c236:string,c237:string,c238:string,c239:string,c240:string,c241:string,c242:string,c243:string,c244:string,c245:string,c246:string,c247:string,c248:string,c249:string,c250:string,c251:string,c252:string,c253:string,c254:string,c255:string,c256:string,c257:string,c258:string,c259:string,c260:string,c261:string,c262:string,c263:string,c264:string,c265:string,c266:string,c267:string,c268:string,c269:string,c270:string,c271:string,c272:string,c273:string,c274:string,c275:string,c276:string,c277:string,c278:string,c279:string,c280:string,c281:string,c282:string,c283:string,c284:string,c285:string,c286:string,c287:string,c288:string,c289:string,c290:string,c291:string,c292:string,c293:string,c294:string,c295:string,c296:string,c297:string,c298:string,c299:string,c300:string>");//分布式集群设置// conf.set("mapred.jar", System.getProperty("user.dir")+"/WordCount.jar");System.setProperty("HADOOP_USER_NAME", "root");//设置开发环境变量System.setProperty("hadoop.home.dir", "/opt/hadoop-2.7.3/");// 设置mapreduce运行模式,这也是默认值// conf.set("mapreduce.framework.name", "yarn");// conf.set("yarn.resourcemanager.hostname", "hadoop");// 获取job对象Job job = Job.getInstance(conf);// 设置jar包所在路径job.setJarByClass(OrcWriterMR.class);job.setJobName("OrcWriterMR");job.setNumReduceTasks(0);// 指定mapper类和reducer类job.setMapperClass(OrcWriterMapper.class);job.setInputFormatClass(TextInputFormat.class);job.setOutputFormatClass(OrcOutputFormat.class);// 指定该mapreduce程序数据的输入路径Path inputPath = new Path("/tmp/input/300.txt");// 指定该mapreduce程序数据的输出路径Path outputPath = new Path("/user/hive/warehouse/test_orc_300");FileSystem fs = FileSystem.get(conf);if (fs.exists(outputPath)) {fs.delete(outputPath, true);}FileInputFormat.setInputPaths(job, inputPath);FileOutputFormat.setOutputPath(job, outputPath);boolean waitForCompletion = job.waitForCompletion(true);System.exit(waitForCompletion ? 0 : 1);}
}
运行完后查看表的内容
select * from test_orc_300;