79 lines
2.7 KiB
Java
79 lines
2.7 KiB
Java
|
import java.io.IOException;
|
||
|
import org.apache.hadoop.conf.Configuration;
|
||
|
import org.apache.hadoop.fs.Path;
|
||
|
import org.apache.hadoop.io.LongWritable;
|
||
|
import org.apache.hadoop.io.Text;
|
||
|
import org.apache.hadoop.mapreduce.Job;
|
||
|
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
|
||
|
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
|
||
|
import java.util.Scanner;
|
||
|
import org.apache.hadoop.fs.FSDataInputStream;
|
||
|
import org.apache.hadoop.fs.FileSystem;
|
||
|
import java.net.URI;
|
||
|
/***
|
||
|
* 1:用来描述一个特定的作业
|
||
|
* 比如,该作业使用哪个类作为逻辑处理中的map,那个作为reduce
|
||
|
* 2:还可以指定该作业要处理的数据所在的路径
|
||
|
* 还可以指定改作业输出的结果放到哪个路径
|
||
|
* @author Administrator
|
||
|
*
|
||
|
*/
|
||
|
public class WcRunner{
|
||
|
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
|
||
|
//创建配置文件
|
||
|
Configuration conf = new Configuration();
|
||
|
//获取一个作业
|
||
|
Job job = Job.getInstance(conf);
|
||
|
|
||
|
//设置整个job所用的那些类在哪个jar包
|
||
|
job.setJarByClass(WcRunner.class);
|
||
|
|
||
|
//本job使用的mapper和reducer的类
|
||
|
job.setMapperClass(WcMap.class);
|
||
|
job.setReducerClass(WcReduce.class);
|
||
|
|
||
|
//指定reduce的输出数据key-value类型
|
||
|
job.setOutputKeyClass(Text.class);
|
||
|
job.setOutputValueClass(LongWritable.class);
|
||
|
|
||
|
|
||
|
//指定mapper的输出数据key-value类型
|
||
|
job.setMapOutputKeyClass(Text.class);
|
||
|
job.setMapOutputValueClass(LongWritable.class);
|
||
|
|
||
|
Scanner sc = new Scanner(System.in);
|
||
|
System.out.print("inputPath:");
|
||
|
String inputPath = sc.next();
|
||
|
System.out.print("outputPath:");
|
||
|
String outputPath = sc.next();
|
||
|
|
||
|
//指定要处理的输入数据存放路径
|
||
|
FileInputFormat.setInputPaths(job, new Path("hdfs://master:9000"+inputPath));
|
||
|
|
||
|
//指定处理结果的输出数据存放路径
|
||
|
FileOutputFormat.setOutputPath(job, new Path("hdfs://master:9000"+outputPath));
|
||
|
|
||
|
//将job提交给集群运行
|
||
|
job.waitForCompletion(true);
|
||
|
|
||
|
//输出结果
|
||
|
|
||
|
try {
|
||
|
FileSystem fs = FileSystem.get(new URI("hdfs://master:9000"), new Configuration());
|
||
|
Path srcPath = new Path(outputPath+"/part-r-00000");
|
||
|
|
||
|
FSDataInputStream is = fs.open(srcPath);
|
||
|
System.out.println("Results:");
|
||
|
while(true) {
|
||
|
String line = is.readLine();
|
||
|
if(line == null) {
|
||
|
break;
|
||
|
}
|
||
|
System.out.println(line);
|
||
|
}
|
||
|
is.close();
|
||
|
}catch(Exception e) {
|
||
|
e.printStackTrace();
|
||
|
}
|
||
|
}
|
||
|
}
|