WordCount/src/main/java/WcRunner.java

79 lines
2.7 KiB
Java
Raw Normal View History

2025-02-28 01:23:23 +00:00
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.util.Scanner;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import java.net.URI;
/***
* 1:用来描述一个特定的作业
* 比如,该作业使用哪个类作为逻辑处理中的map,那个作为reduce
* 2:还可以指定该作业要处理的数据所在的路径
* 还可以指定改作业输出的结果放到哪个路径
* @author Administrator
*
*/
public class WcRunner{
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
//创建配置文件
Configuration conf = new Configuration();
//获取一个作业
Job job = Job.getInstance(conf);
//设置整个job所用的那些类在哪个jar包
job.setJarByClass(WcRunner.class);
//本job使用的mapper和reducer的类
job.setMapperClass(WcMap.class);
job.setReducerClass(WcReduce.class);
//指定reduce的输出数据key-value类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
//指定mapper的输出数据key-value类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(LongWritable.class);
Scanner sc = new Scanner(System.in);
System.out.print("inputPath:");
String inputPath = sc.next();
System.out.print("outputPath:");
String outputPath = sc.next();
//指定要处理的输入数据存放路径
FileInputFormat.setInputPaths(job, new Path("hdfs://master:9000"+inputPath));
//指定处理结果的输出数据存放路径
FileOutputFormat.setOutputPath(job, new Path("hdfs://master:9000"+outputPath));
//将job提交给集群运行
job.waitForCompletion(true);
//输出结果
try {
FileSystem fs = FileSystem.get(new URI("hdfs://master:9000"), new Configuration());
Path srcPath = new Path(outputPath+"/part-r-00000");
FSDataInputStream is = fs.open(srcPath);
System.out.println("Results:");
while(true) {
String line = is.readLine();
if(line == null) {
break;
}
System.out.println(line);
}
is.close();
}catch(Exception e) {
e.printStackTrace();
}
}
}