


Query ID = iteblog_20160704104520_988f81d4-0b82- 4778 -af98-43cc1950d357
Total jobs = 1
Launching Job 1 out of 1
Number of reduce tasks determined at compile time: 1
In order to change the average load for a reducer (in bytes):
   set hive.exec.reducers.bytes.per.reducer=<number>
In order to limit the maximum number of reducers:
   set hive.exec.reducers.max=<number>
In order to set a constant number of reducers:
   set mapreduce.job.reduces=<number>



此参数从Hive 0.2.0开始引入。在Hive 0.14.0版本之前默认值是1G(1,000,000,000);而从Hive 0.14.0开始,默认值变成了256M(256,000,000),可以参见HIVE-7158和HIVE-7917。这个参数的含义是每个Reduce处理的字节数。比如输入文件的大小是1GB,那么会启动4个Reduce来处理数据。


此参数从Hive 0.2.0开始引入。在Hive 0.14.0版本之前默认值是999;而从Hive 0.14.0开始,默认值变成了1009;可以参见HIVE-7158和HIVE-7917。这个参数的含义是最多启动的Reduce个数。比如input size/hive.exec.reducers.bytes.per.reducer>hive.exec.reducers.max,那么Hive启动的Reduce个数为hive.exec.reducers.max;反之为input size/hive.exec.reducers.bytes.per.reducer。这个参数只有在mapred.reduce.tasks/mapreduce.job.reduces设置为负数的时候才有效。


此参数从Hive 0.1.0开始引入。默认值是-1。此参数的含义是Reduce的个数,典型的情况是设置成接近可用节点的质数。如果mapred.job.tracker的值是local此参数将会被忽略。在Hadoop中此参数的默认值是1;而在Hive中默认值是-1。通过将此参数设置为-1,Hive将自动计算出应该启动多少个Reduce。



  * Estimate the number of reducers needed for this job, based on job input,
  * and configuration parameters.
  * The output of this method should only be used if the output of this
  * MapRedTask is not being used to populate a bucketed table and the user
  * has not specified the number of reducers to use.
  * @return the number of reducers.
public static int estimateNumberOfReducers(HiveConf conf, ContentSummary inputSummary,
                                            MapWork work, boolean finalMapRed) throws IOException {
   long bytesPerReducer = conf.getLongVar(HiveConf.ConfVars.BYTESPERREDUCER);
   int maxReducers = conf.getIntVar(HiveConf.ConfVars.MAXREDUCERS);
   double samplePercentage = getHighestSamplePercentage(work);
   long totalInputFileSize = getTotalInputFileSize(inputSummary, work, samplePercentage);
   // if all inputs are sampled, we should shrink the size of reducers accordingly.
   if (totalInputFileSize != inputSummary.getLength()) { "BytesPerReducer=" + bytesPerReducer + " maxReducers="
         + maxReducers + " estimated totalInputFileSize=" + totalInputFileSize);
   } else { "BytesPerReducer=" + bytesPerReducer + " maxReducers="
       + maxReducers + " totalInputFileSize=" + totalInputFileSize);
   // If this map reduce job writes final data to a table and bucketing is being inferred,
   // and the user has configured Hive to do this, make sure the number of reducers is a
   // power of two
   boolean powersOfTwo = conf.getBoolVar(HiveConf.ConfVars.HIVE_INFER_BUCKET_SORT_NUM_BUCKETS_POWER_TWO) &&
       finalMapRed && !work.getBucketedColsByDirectory().isEmpty();
   return estimateReducers(totalInputFileSize, bytesPerReducer, maxReducers, powersOfTwo);
public static int estimateReducers( long totalInputFileSize, long bytesPerReducer,
     int maxReducers, boolean powersOfTwo) {
   double bytes = Math.max(totalInputFileSize, bytesPerReducer);
   int reducers = ( int ) Math.ceil(bytes / bytesPerReducer);
   reducers = Math.max( 1 , reducers);
   reducers = Math.min(maxReducers, reducers);
   int reducersLog = ( int )(Math.log(reducers) / Math.log( 2 )) + 1 ;
   int reducersPowerTwo = ( int )Math.pow( 2 , reducersLog);
   if (powersOfTwo) {
     // If the original number of reducers was a power of two, use that
     if (reducersPowerTwo / 2 == reducers) {
       // nothing to do
     } else if (reducersPowerTwo > maxReducers) {
       // If the next power of two greater than the original number of reducers is greater
       // than the max number of reducers, use the preceding power of two, which is strictly
       // less than the original number of reducers and hence the max
       reducers = reducersPowerTwo / 2 ;
     } else {
       // Otherwise use the smallest power of two greater than the original number of reducers
       reducers = reducersPowerTwo;
   return reducers;


  * Set the number of reducers for the mapred work.
private void setNumberOfReducers() throws IOException {
   ReduceWork rWork = work.getReduceWork();
   // this is a temporary hack to fix things that are not fixed in the compiler
   Integer numReducersFromWork = rWork == null ? 0 : rWork.getNumReduceTasks();
   if (rWork == null ) {
         .printInfo( "Number of reduce tasks is set to 0 since there's no reduce operator" );
   } else {
     if (numReducersFromWork >= 0 ) {
       console.printInfo( "Number of reduce tasks determined at compile time: "
           + rWork.getNumReduceTasks());
     } else if (job.getNumReduceTasks() > 0 ) {
       int reducers = job.getNumReduceTasks();
       console.printInfo( "Number of reduce tasks not specified. Defaulting to jobconf value of: "
           + reducers);
     } else {
       if (inputSummary == null ) {
         inputSummary =  Utilities.getInputSummary(driverContext.getCtx(), work.getMapWork(), null );
       int reducers = Utilities.estimateNumberOfReducers(conf, inputSummary, work.getMapWork(),
           .printInfo( "Number of reduce tasks not specified. Estimated from input data size: "
           + reducers);
     console.printInfo( "In order to change the average load for a reducer (in bytes):" );
     console.printInfo( "  set " + HiveConf.ConfVars.BYTESPERREDUCER.varname
         + "=<number>" );
     console.printInfo( "In order to limit the maximum number of reducers:" );
     console.printInfo( "  set " + HiveConf.ConfVars.MAXREDUCERS.varname
         + "=<number>" );
     console.printInfo( "In order to set a constant number of reducers:" );
     console.printInfo( "  set " + HiveConf.ConfVars.HADOOPNUMREDUCERS
         + "=<numbe>" );




