admin管理员组文章数量:1794759
MapReduce
题目描述
关于对于学生成绩相关的练习题,之前是一个入门级别的需求,现在对这些需求进行增强,首先看数据的改变:
computer,huangxiaoming,85,86,41,75,93,42,85
computer,xuzheng,54,52,86,91,42
computer,huangbo,85,42,96,38
english,zhaobenshan,54,52,86,91,42,85,75
english,liuyifei,85,41,75,21,85,96,14
algorithm,liuyifei,75,85,62,48,54,96,15
computer,huangjiaju,85,75,86,85,85
english,liuyifei,76,95,86,74,68,74,48
english,huangdatou,48,58,67,86,15,33,85
algorithm,huanglei,76,95,86,74,68,74,48
algorithm,huangjiaju,85,75,86,85,85,74,86
computer,huangdatou,48,58,67,86,15,33,85
english,zhouqi,85,86,41,75,93,42,85,75,55,47,22
english,huangbo,85,42,96,38,55,47,22
algorithm,liutao,85,75,85,99,66
computer,huangzitao,85,86,41,75,93,42,85
math,wangbaoqiang,85,86,41,75,93,42,85
computer,liujialing,85,41,75,21,85,96,14,74,86
computer,liuyifei,75,85,62,48,54,96,15
computer,liutao,85,75,85,99,66,88,75,91
computer,huanglei,76,95,86,74,68,74,48
english,liujialing,75,85,62,48,54,96,15
math,huanglei,76,95,86,74,68,74,48
math,huangjiaju,85,75,86,85,85,74,86
math,liutao,48,58,67,86,15,33,85
english,huanglei,85,75,85,99,66,88,75,91
math,xuzheng,54,52,86,91,42,85,75
math,huangxiaoming,85,75,85,99,66,88,75,91
math,liujialing,85,86,41,75,93,42,85,75
english,huangxiaoming,85,86,41,75,93,42,85
algorithm,huangdatou,48,58,67,86,15,33,85
algorithm,huangzitao,85,86,41,75,93,42,85,75
一、数据解释
数据字段个数不固定:
第一个是课程名称,总共四个课程,computer,math,english,algorithm,
第二个是学生姓名,后面是每次考试的分数
二、统计需求:
1、统计每门课程的参考人数和课程平均分
2、统计每门课程参考学生的平均分,并且按课程存入不同的结果文件,要求一门课程一个结果文件,并且按平均分从高到低排序,分数保留一位小数
3、求出每门课程参考学生成绩最高的学生的信息:课程,姓名和平均分
三、解题思路
mapper阶段的输出:
key: 课程
value:分数
reducer阶段的输出:
key: 课程
value: 平均分数和人数
四、具体代码实现
package com.ghgj.mazh.mapreduce.exercise.coursescore3;import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;public class CourseScoreMR_Pro_01 {public static void main(String[] args) throws Exception {/*** 一些参数的初始化*/String inputPath = "D:\\bigdata\\coursescore2\\input";String outputPath = "D:\\bigdata\\coursescore2\\output";/*** 初始化一个Job对象*/Configuration conf = new Configuration();Job job = Job.getInstance(conf);/*** 设置jar包所在路径*/job.setJarByClass(CourseScoreMR_Pro_01.class);/*** 指定mapper类和reducer类 等各种其他业务逻辑组件*/job.setMapperClass(Mapper_CS.class);job.setReducerClass(Reducer_CS.class);// 指定maptask的输出类型job.setMapOutputKeyClass(Text.class);job.setMapOutputValueClass(DoubleWritable.class);// 指定reducetask的输出类型job.setOutputKeyClass(Text.class);job.setOutputValueClass(Text.class);/*** 指定该mapreduce程序数据的输入和输出路径*/Path input = new Path(inputPath);Path output = new Path(outputPath);FileSystem fs = FileSystem.get(conf);if (fs.exists(output)) {fs.delete(output, true);}FileInputFormat.setInputPaths(job, input);FileOutputFormat.setOutputPath(job, output);/*** 最后提交任务*/boolean waitForCompletion = job.waitForCompletion(true);System.exit(waitForCompletion ? 0 : 1);}/*** Mapper组件:* <p>* 输入的key:* 输入的value: computer,liutao,85,75,85,99,66,88,75,91* <p>* 输出的key: 课程* 输入的value: 分数*/private static class Mapper_CS extends Mapper<LongWritable, Text, Text, DoubleWritable> {Text keyOut = new Text();DoubleWritable valueOut = new DoubleWritable();@Overrideprotected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {String[] splits = value.toString().split(",");String course = splits[0];int sum = 0;int num = 0;for(int i=2; i<splits.length; i++){sum += Integer.valueOf(splits[i]);num ++;}// 直接取整数double avgScore = Math.round(sum * 1D / num * 10) / 10D;keyOut.set(course);valueOut.set(avgScore);context.write(keyOut, valueOut);}}/*** Reducer组件:* <p>* 输入的key:* 输入的values:* <p>* 输出的key: 课程* 输入的value: 平均分数 和 人数*/private static class Reducer_CS extends Reducer<Text, DoubleWritable, Text, Text> {Text valueOut = new Text();@Overrideprotected void reduce(Text key, Iterable<DoubleWritable> values, Context context) throws IOException, InterruptedException {int sum = 0;int num = 0;for(DoubleWritable v: values){sum += v.get();num ++;}// 直接取整数double avgScore = Math.round(sum * 1D / num * 10) / 10D;valueOut.set(avgScore + "\t" + num);context.write(key, valueOut);}}
}
五、执行结果
algorithm 71.3 6
computer 69.6 10
english 66.0 9
math 72.6 7
至此,大功告成
本文标签: MapReduce
版权声明:本文标题:MapReduce 内容由林淑君副主任自发贡献,该文观点仅代表作者本人, 转载请联系作者并注明出处:http://www.xiehuijuan.com/baike/1700046081a392981.html, 本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌抄袭侵权/违法违规的内容,一经查实,本站将立刻删除。
发表评论