Bootstrap

spark脚本

spark-submit脚本

#!/bin/sh
source ExitCodeCheck.sh
exitCodeCheck(){
if [ $1 -ne 0 ]; then
  echo 'shell execute return value is' $1 'is not 0'
  exit $1
else
  echo 'shell execute success'
fi
}
opts=$@
 
 
getparam(){
arg=$1
echo $opts |xargs -n1 |cut -b 2- |awk -F'=' '{if($1=="'"$arg"'") print $2}'
}
 
 
IncStart=`getparam inc_start`
IncEnd=`getparam inc_end`
oracle_connection=`getparam jdbc_str`
oracle_username=`getparam db_user`
oracle_password=`getparam db_psw`
dataName=`getparam db_sid`
queueName=`getparam hdp_queue`
hdfshostname=`getparam hdfs_host`;
 
 
IncStartYear=`echo ${IncStart:0:4}`;
IncStartMonth=`echo ${IncStart:4:2}`;
IncStartDay=`echo ${IncStart:6:2}`;
IncStartAll=${IncStartYear}"-"${IncStartMonth}"-"${IncStartDay}" 00:00:00.0";
IncStartAllFormat=${IncStartYear}"-"${IncStartMonth}"-"${IncStartDay};
IncEndYear=`echo ${IncEnd:0:4}`;
IncEndMonth=`echo ${IncEnd:4:2}`;
IncEndDay=`echo ${IncEnd:6:2}`;
IncEndAll=${IncEndYear}"-"${IncEndMonth}"-"${IncEndDay}" 00:00:00.0";
IncEndAllFormat=${IncEndYear}"-"${IncEndMonth}"-"${IncEndDay};
 
 
OneDayAgo=`date -d "$IncStart 1 days ago  " +%Y%m%d  `;
OneDayAgoYear=`echo ${OneDayAgo:0:4}`;
OneDayAgoMonth=`echo ${OneDayAgo:4:2}`;
OneDayAgoDay=`echo ${OneDayAgo:6:2}`;
OneDayAgoAll=${OneDayAgoYear}"-"${OneDayAgoMonth}"-"${OneDayAgoDay}" 00:00:00.0";
OneDayAgoAllFormat=${OneDayAgoYear}"-"${OneDayAgoMonth}"-"${OneDayAgoDay};
 
 
#任务名取脚本名
job_name=$0
 
 
rm -rf spark2phoenix.jar 
hadoop fs -get ${hdfshostname}/apps/hduser0101/an_pafc_safe/lib/spark2phoenix.jar .;
 
 
 
 
 
 
SparkTaskName='spark_cgi_pssp_group_info_education';
SparkExecutorMemory='1G';
SparkExecutorCores='1';
SparkJar='spark2phoenix.jar';
SparkDriverMemory='1G';
 
 
queueName='queue_0101_01'
appName='spark_cgi_pssp_group_info_education' ;
sql='SELECT id_icm,ecif_no,agent_code, phone_number FROM an_pafc_safe.idld_lcdm_mit_client_group_info WHERE GROUP_CODE='050108' limit 1000000';
outputTable='cgi.pssp_group_info_education';
 
 
 
 
 
 
#=========开发参数===========
#kafka_metadata_broker_list="10.20.24.151:9092,10.20.24.159:9092,10.20.24.161:9092"    
#zookeeper_quorum="10.20.24.151:2181,10.20.24.159:2181,10.20.24.161:2181"  
#phoenix_jdbc_url="10.20.24.151,10.20.24.159,10.20.24.161:2181:/gbd2-hbase-kylin"  
#
#=========测试参数===========
#kafka_metadata_broker_list="30.4.64.78:9092,30.4.64.76:9092,30.4.64.77:9092"    
#zookeeper_quorum="30.4.64.78:2181,30.4.64.77:2181,30.4.64.76:2181"  
#phoenix_jdbc_url="30.4.64.78,30.4.64.77,30.4.64.76:2181:/gbd2-hbase-kylin"  
 
 
#=========生产参数===========
kafka_metadata_broker_list="30.4.32.71:9092,30.4.32.72:9092,30.4.32.73:9092"
zookeeper_quorum="30.4.32.71:2181,30.4.32.72:2181,30.4.32.73:2181"
phoenix_jdbc_url="30.16.16.29,30.16.16.33,30.16.16.30:2181:/gbd2-hbase02"
 
 
 
 
spark-submit                                   \
--class com.paic.spark.utils.Spark2Phoenix             \
--master yarn-client \
--executor-memory 4G \
--driver-memory 2G \
--num-executors 5 \
--executor-cores 4 \
--queue ${queueName} \
--conf spark.sql.autoBroadcastJoinThreshold=20971520 \
--conf spark.default.parallelism=40 \
--conf spark.sql.shuffle.partitions=40 \
--conf spark.speculation=false \
--conf spark.task.maxFailures=40 \
--conf spark.akka.timeout=300 \
--conf spark.network.timeout=300 \
--conf spark.yarn.max.executor.failures=40 \
--conf spark.executor.extraJavaOptions="-XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:+CMSParallelRemarkEnabled -XX:+ParallelRefProcEnabled -XX:+CMSClassUnloadingEnabled -XX:+PrintGCDetails -XX:+PrintGCDateStamps -XX:+PrintGCTimeStamps -XX:+PrintHeapAtGC -XX:+HeapDumpOnOutOfMemoryError -verbose:gc " \
${SparkJar} "${appName}" "${sql}" "${outputTable}" "${phoenix_jdbc_url}"
exitCodeCheck $?
 
 

hive脚本

#!/bin/sh
source ExitCodeCheck.sh
 
opts=$@
 
getparam(){
arg=$1
echo $opts |xargs -n1 |cut -b 2- |awk -F'=' '{if($1=="'"$arg"'") print $2}'
}
 
IncStart=`getparam inc_start`
IncEnd=`getparam inc_end`
oracle_connection=`getparam jdbc_str`
oracle_username=`getparam db_user`
oracle_password=`getparam db_psw`
dataName=`getparam db_sid`
queueName=`getparam hdp_queue`
hdfshostname=`getparam hdfs_host`;
 
IncStartYear=`echo ${IncStart:0:4}`;
IncStartMonth=`echo ${IncStart:4:2}`;
IncStartDay=`echo ${IncStart:6:2}`;
IncStartAll=${IncStartYear}"-"${IncStartMonth}"-"${IncStartDay}" 00:00:00.0";
IncEndYear=`echo ${IncEnd:0:4}`;
IncEndMonth=`echo ${IncEnd:4:2}`;
IncEndDay=`echo ${IncEnd:6:2}`;
IncEndAll=${IncEndYear}"-"${IncEndMonth}"-"${IncEndDay}" 00:00:00.0";
 
#任务名取脚本名
job_name=$0
 
#BAS 1
hive -v -e "set mapred.job.queue.name=${queueName};
set mapred.job.name=${job_name}_2;
use sx_360_safe;
INSERT OVERWRITE table an_pafc_safe.sub_zeb_bp_info_count PARTITION (day='${IncStartAll}')
--资讯编码各时间点击次数统计表
select substr(t1.created_date, 1, 10) as statis_date,
       t1.code,
       t1.info_no,
       sum(case when substr(created_date,12,2) = '00' then 1 else 0 end) cnt00,
       sum(case when substr(created_date,12,2) = '01' then 1 else 0 end) cnt01,
       sum(case when substr(created_date,12,2) = '02' then 1 else 0 end) cnt02,
       sum(case when substr(created_date,12,2) = '03' then 1 else 0 end) cnt03,
       sum(case when substr(created_date,12,2) = '04' then 1 else 0 end) cnt04,
       sum(case when substr(created_date,12,2) = '05' then 1 else 0 end) cnt05,
       sum(case when substr(created_date,12,2) = '06' then 1 else 0 end) cnt06,
       sum(case when substr(created_date,12,2) = '07' then 1 else 0 end) cnt07,
       sum(case when substr(created_date,12,2) = '08' then 1 else 0 end) cnt08,
       sum(case when substr(created_date,12,2) = '09' then 1 else 0 end) cnt09,
       sum(case when substr(created_date,12,2) = '10' then 1 else 0 end) cnt10,
       sum(case when substr(created_date,12,2) = '11' then 1 else 0 end) cnt11,
       sum(case when substr(created_date,12,2) = '12' then 1 else 0 end) cnt12,
       sum(case when substr(created_date,12,2) = '13' then 1 else 0 end) cnt13,
       sum(case when substr(created_date,12,2) = '14' then 1 else 0 end) cnt14,
       sum(case when substr(created_date,12,2) = '15' then 1 else 0 end) cnt15,
       sum(case when substr(created_date,12,2) = '16' then 1 else 0 end) cnt16,
       sum(case when substr(created_date,12,2) = '17' then 1 else 0 end) cnt17,
       sum(case when substr(created_date,12,2) = '18' then 1 else 0 end) cnt18,
       sum(case when substr(created_date,12,2) = '19' then 1 else 0 end) cnt19,
       sum(case when substr(created_date,12,2) = '20' then 1 else 0 end) cnt20,
       sum(case when substr(created_date,12,2) = '21' then 1 else 0 end) cnt21,
       sum(case when substr(created_date,12,2) = '22' then 1 else 0 end) cnt22,
       sum(case when substr(created_date,12,2) = '23' then 1 else 0 end) cnt23
from an_pafc_safe.sx_ela_bp_info t1
where substr(t1.created_date, 1, 10) < to_date('${IncStartAll}')
and t1.info_no is not null
and length(t1.info_no)<> 0
group by substr(t1.created_date, 1, 10),
         t1.code,
         t1.info_no 
";
 
exitCodeCheck $?
 

转载自https://blog.csdn.net/qq_36732988/article/details/78778947?spm=1001.2014.3001.5501,若侵权请联系删除

;