2017-05-07 188 views
0

我想执行的文本文件一个简单的MapReduce,但它不是做的输出。这是我的代码:的MapReduce没有产生一个输出

import java.io.IOException; 
import java.util.StringTokenizer; 
import org.apache.hadoop.conf.Configuration; 
import org.apache.hadoop.fs.Path; 
import org.apache.hadoop.io.IntWritable; 
import org.apache.hadoop.io.Text; 
import org.apache.hadoop.mapreduce.Job; 
import org.apache.hadoop.mapreduce.Mapper; 
import org.apache.hadoop.mapreduce.Reducer; 
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 

public class WordCount { 

    public static class TokenizerMapper 
    extends Mapper<Object, Text, Text, IntWritable>{ 

private final static IntWritable one = new IntWritable(1); 
private Text word = new Text(); 

public void map(Object key, Text value, Context context 
       ) throws IOException, InterruptedException { 
    StringTokenizer itr = new StringTokenizer(value.toString()); 
    while (itr.hasMoreTokens()) { 
    word.set(itr.nextToken()); 
    context.write(word, one); 
    } 
} 
} 

    public static class IntSumReducer 
    extends Reducer<Text,IntWritable,Text,IntWritable> { 
private IntWritable result = new IntWritable(); 

    public void reduce(Text key, Iterable<IntWritable> values, 
        Context context 
        ) throws IOException, InterruptedException { 
     int sum = 0; 
     for (IntWritable val : values) { 
     sum += val.get(); 
     } 
     result.set(sum) ; 
     context.write(key, result); 
    } 
    } 

    public static void main(String[] args) throws Exception { 
    Configuration conf = new Configuration(); 
    Job job = Job.getInstance(conf, "word count"); 
    job.setJarByClass(WordCount.class); 
    job.setMapperClass(TokenizerMapper.class); 
    job.setCombinerClass(IntSumReducer.class); 
    job.setReducerClass(IntSumReducer.class); 
    job.setOutputKeyClass(Text.class); 
    job.setOutputValueClass(IntWritable.class); 
    FileInputFormat.addInputPath(job, new Path(args[0])); 
    FileOutputFormat.setOutputPath(job, new Path(args[1])); 
    System.exit(job.waitForCompletion(true) ? 0 : 1); 
    } 
} 

我jar文件的执行过程中收到此错误:

17/05/07 23:10:53 WARN mapred.LocalJobRunner: job_local973452829_0001 
java.lang.Exception:  org.apache.hadoop.mapreduce.task.reduce.Shuffle$ShuffleError: error in shuffle in localfetcher#1 
at org.apache.hadoop.mapred.LocalJobRunner$Job.runTasks(LocalJobRunner.java:462) 
at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:529) 
Caused by: org.apache.hadoop.mapreduce.task.reduce.Shuffle$ShuffleError: error in shuffle in localfetcher#1 
at org.apache.hadoop.mapreduce.task.reduce.Shuffle.run(Shuffle.java:134) 
at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:376) 
at org.apache.hadoop.mapred.LocalJobRunner$Job$ReduceTaskRunnable.run(LocalJobRunner.java:319) 
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) 
at java.util.concurrent.FutureTask.run(FutureTask.java:266) 
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) 
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) 
at java.lang.Thread.run(Thread.java:748) 
Caused by: java.io.FileNotFoundException: /app/hadoop/tmp%20/mapred/local/localRunner/hduser/jobcache/job_local973452829_0001/attempt_local973452829_0001_m_000000_0/output/file.out.index 
at org.apache.hadoop.fs.RawLocalFileSystem.open(RawLocalFileSystem.java:193) 
at org.apache.hadoop.fs.FileSystem.open(FileSystem.java:764) 
at org.apache.hadoop.io.SecureIOUtils.openFSDataInputStream(SecureIOUtils.java:156) 
at org.apache.hadoop.mapred.SpillRecord.<init>(SpillRecord.java:70) 
at org.apache.hadoop.mapred.SpillRecord.<init>(SpillRecord.java:62) 
at org.apache.hadoop.mapred.SpillRecord.<init>(SpillRecord.java:57) 
at org.apache.hadoop.mapreduce.task.reduce.LocalFetcher.copyMapOutput(LocalFetcher.java:123) 
at org.apache.hadoop.mapreduce.task.reduce.LocalFetcher.doCopy(LocalFetcher.java:101) 
at org.apache.hadoop.mapreduce.task.reduce.LocalFetcher.run(LocalFetcher 

有什么问题在我的代码?我在ubuntu下14.04用Hadoop 2.4

回答

0

这是你的错误的一部分:在

Caused by: java.io.FileNotFoundException: /app/hadoop/tmp%20/mapred/local/localRunner/hduser/jobcache/job_local973452829_0001/attempt_local973452829_0001_m_000000_0/output/file.out.index

我猜它与配置属性hadoop.tmp.dir一个问题,你core-site.xml因为Hadoop是无法存储临时输出文件(从映射)到您的磁盘。

使Hadoop的创建它来存储中间输出或将其设置到某个目录具有相应权限的自己的临时目录,您可以删除该属性。

0

尝试写HDFS像这些

Hadoop的罐子(jar文件名)命令(输入名称)(输出名称)

还是你前检查到导出你的(jar文件)你有没有采取任何警告?