-- 平均播放进度大于60%的视频类别select tag,
concat(round(avg_play_progress*100,2),'%') avg_play_progress from(select tag,avg(casewhen timestampdiff(second, start_time,end_time)>duration then1else timestampdiff(second, start_time,end_time)/duration end) avg_play_progress
from tb_user_video_log t1
join tb_video_info t2 using(video_id)groupby tag) t
where avg_play_progress>0.6groupby tag
orderby avg_play_progress desc;
-- 每类视频近一个月的转发量/率select tag,round(retweet_cut/play_num,3) retweet_rate from(select tag,sum(if_retweet) retweet_cut,count(*) play_num,
start_time,
date_add(max(date(start_time))over(),interval-29day) min_date,max(start_time)over() max_date
from tb_user_video_log t1
join tb_video_info t2
using(video_id)groupby tag) t
where start_time between min_date and max_date
groupby tag
orderby retweet_rate desc;
-- 每个创作者每月的涨粉率及截止当前的总粉丝量select author,month,fans_growth_rate,sum(t_fans)over(partitionby author orderby author,month)as total_fans
from(select author,month,round(sum(new_if_follow)/count(*),3)as fans_growth_rate,sum(new_if_follow)as t_fans
from(select a.video_id, b.author, date_format(a.start_time,'%Y-%m')asmonth,if(a.if_follow=2,-1, a.if_follow)as new_if_follow
from tb_user_video_log a
join tb_video_info b on a.video_id = b.video_id
whereyear(a.start_time)='2021') a
groupby author,month) a
orderby author, total_fans;
-- 国庆期间每类视频点赞量和转发量WITH new AS(SELECT t2.tag,DATE(t1.start_time)as dt,sum(t1.if_like)as like_cnt,sum(t1.if_retweet)as retweet_cnt
FROM tb_user_video_log AS t1
LEFTJOIN tb_video_info AS t2 USING(video_id)WHEREDATE(t1.start_time)BETWEEN'2021-09-25'and'2021-10-03'GROUPBY t2.tag,dt
ORDERBY dt)SELECT*FROM(SELECT tag,dt,sum(like_cnt)over(partitionby tag orderby dt rows6preceding)AS sum_like_cnt_7d,max(retweet_cnt)over(partitionby tag orderby dt rows6preceding)AS retweet_cnt
FROM new)AS t
WHERE t.dt BETWEEN'2021-10-01'and'2021-10-03'ORDERBY t.tag DESC,t.dt;
-- 近一个月发布的视频中热度最高的top3视频SELECT
t1.video_id,round((100* t1.play_rate +5* t1.likes +3* t1.comments +2* t1.retweets )*(1/(t1.days +1)),0)AS hot_index
from(select a.video_id,sum(if(TIMESTAMPDIFF(SECOND,start_time,end_time)>= duration,1,0))/count(a.video_id) play_rate,sum(if_like) likes,count(comment_id) comments,sum(if_retweet) retweets,
DATEDIFF((selectmax(end_time)from tb_user_video_log),max(end_time)) days
from tb_user_video_log a
innerjoin tb_video_info b
on a.video_id = b.video_id
WHERE DATEDIFF(DATE((SELECTMAX(end_time)FROM tb_user_video_log)),DATE(release_time))<=29groupby a.video_id) t1
orderby hot_index desclimit3;
二、用户增长场景(某度信息流)
-- 2021年11月每天的人均浏览文章时长select
date_format(in_time,'%Y-%m-%d') dt,round(sum(timestampdiff(second, in_time, out_time))/count(distinct uid),1) avg_viiew_len_sec
from tb_user_log
where
date_format(in_time,'%Y%m')=202111and artical_id !=0groupby dt
orderby avg_viiew_len_sec;
-- 每篇文章同一时刻最大在看人数select artical_id,max(cnt) max_uv from(select artical_id,sum(num)over(partitionby artical_id orderby dt asc,num desc)as cnt
from(select artical_id,in_time dt,1 num from tb_user_log
where artical_id !=0unionallselect artical_id,out_time dt,-1 num from tb_user_log
where artical_id !=0)as a)as b
groupby artical_id
orderby max_uv desc;
-- 2021年11月每天新用户的次日留存率select a.dt,round(count(b.uid)/count(a.uid),2) uv_left_rate from(-- 求最小活跃日期select uid,min(date_format(in_time,'%Y-%m-%d')) dt
from tb_user_log
groupby uid) a
leftjoin-- 求所有活跃日期(select uid, date_format(in_time,'%Y-%m-%d') dt
from tb_user_log
unionselect uid, date_format(out_time,'%Y-%m-%d') dt
from tb_user_log) b
on a.uid=b.uid and a.dt=date_sub(b.dt,interval1day)where date_format(a.dt,"%Y-%m")='2021-11'groupby dt;
-- 统计活跃间隔对用户分级结果withtempas(select(casewhen datediff('2021-11-04',first_in)<7then'新晋用户'when datediff('2021-11-04',first_in)>=7and
datediff('2021-11-04',last_in)<7then'忠实用户'when datediff('2021-11-04',last_in)>=30then'流失用户'else'沉睡用户'end)as user_grade
from(select
uid,date(min(in_time))as first_in,date(max(out_time))as last_in
from tb_user_log
groupby uid
) t1
)select
user_grade,round(count(user_grade)/(selectcount(*)fromtemp),2)as ratio
fromtempgroupby user_grade
orderby ratio desc;
-- 每天的日活数及新用户占比select t1.dt, dau, ifnull(round(uv/dau,2),0) uv_new_ratio from(select dt,count(distinct uid) dau from(-- 求所有活跃日期select uid, date_format(in_time,'%Y-%m-%d') dt
from tb_user_log
unionselect uid, date_format(out_time,'%Y-%m-%d') dt
from tb_user_log) b
groupby dt) t1
leftjoin(select dt,count(distinct uid) uv from(-- 求最小活跃日期select uid,min(date_format(in_time,'%Y-%m-%d')) dt
from tb_user_log
groupby uid) a
groupby dt) t2
on t1.dt=t2.dt
orderby dt;
-- 计算商城中2021年每月的GMVselect
date_format(event_time,'%Y-%m')month,sum(total_amount) GMV
from tb_order_overall
whereyear(event_time)=2021andstatusin(0,1)groupbymonthhaving GMV >100000orderby GMV;
-- 统计2021年10月每个退货率不大于0.5的商品各项指标SELECT
product_id,IF(COUNT(event_time)=0,0,ROUND(SUM(if_click)/COUNT(event_time),3))AS ctr,IF(SUM(if_click)=0,0,ROUND(SUM(if_cart)/SUM(if_click),3))AS cart_rate,IF(SUM(if_cart)=0,0,ROUND(SUM(if_payment)/SUM(if_cart),3))AS payment_rate,IF(SUM(if_payment)=0,0,ROUND(SUM(if_refund)/SUM(if_payment),3))AS refund_rate
FROM tb_user_event
WHERE DATE_FORMAT(event_time,'%Y-%m')='2021-10'GROUPBY product_id
HAVING refund_rate <=0.5ORDERBY product_id;
-- 某店铺的各商品毛利率及店铺整体毛利率with new_tab as(select a.product_id,a.shop_id,a.in_price
,b.order_id,b.price,b.cnt
from tb_product_info a , tb_order_detail b ,tb_order_overall c
where b.order_id=c.order_id and a.product_id=b.product_id
and substring(c.event_time,1,7)>='2021-10'and a.shop_id=901and c.statusin(1,2))(select'店铺汇总' product_id,CONCAT(round((1-sum(in_price*cnt)/sum(price*cnt))*100,1),'%')from new_tab)unionall(select product_id,CONCAT(round(ratio*100,1),'%')from(selectdistinct product_id ,(1-sum(in_price*cnt)/sum(price*cnt))as ratio
from new_tab groupby product_id orderby product_id) t where t.ratio>0.249);
-- 零食类商品中复购率top3高的商品select product_id,round(sum(if(t_cnt=2,1,0))/count(distinct uid),3) repurchase_rate
from(select product_id,uid,row_number()over(partitionby product_id,uid orderby event_time) t_cnt
from tb_order_detail
leftjoin tb_order_overall using(order_id)leftjoin tb_product_info using(product_id)where tag='零食'and datediff((selectmax(event_time)from tb_order_overall),event_time)<90andstatus<>2) t
where t_cnt<3groupby product_id
orderby repurchase_rate desc,product_id
limit3;
-- 10月的新户客单价和获客成本with t as(select
order_id,avg(total_amount) total_amount,sum(price*cnt)-avg(total_amount) diff
from tb_order_overall t1
leftjoin tb_order_detail t2 using(order_id)wheremonth(event_time)='10'and(uid,event_time)in(select uid,min(event_time)from tb_order_overall groupby uid
)groupby order_id
)selectround(avg(total_amount),1) avg_amount,round(avg(diff),1) avg_cost from t;
-- 店铺901国庆期间的7日动销率和滞销率select a.dt,round(count(distinct b.product_id)/onsale_cnt,3)as sale_rate,round(1-(count(distinct b.product_id)/onsale_cnt),3)as unsale_rate
from(selectdate(event_time)as dt
from tb_order_overall
wheredate(event_time)BETWEEN'2021-10-01'and'2021-10-03')as a
leftJOIN(selectdate(t2.event_time)as dt,
t3.product_id
from tb_order_overall t2
join tb_order_detail t3
on t2.order_id=t3.order_id and t2.status=1join tb_product_info t1
on t1.product_id=t3.product_id and t1.shop_id=901)as b
on datediff(a.dt,b.dt)BETWEEN0and6JOIN(selectdate(event_time)as dt,count(distinctcasewhen datediff(date(event_time),date(release_time))>=0then product_id end)as onsale_cnt
from tb_product_info,tb_order_overall
where shop_id=901groupby dt)as c
on a.dt=c.dt
groupby a.dt
orderby a.dt;
四、出行场景(某滴打车)
-- 2021年国庆在北京接单3次及以上的司机统计信息SELECT city,round(avg(cnt),3)as avg_order_num,round(avg(sum_fare),3)as avg_income
FROM(SELECT city, driver_id,count(t1.order_id)as cnt,sum(fare)as sum_fare
FROM tb_get_car_order as t1 JOIN tb_get_car_record as t2
ON t1.order_id = t2.order_id
WHEREdate(order_time)between'2021-10-01'and'2021-10-07'AND city ='北京'GROUPBY city, driver_id
HAVING cnt >=3)as t
GROUPBY city;
-- 有取消订单记录的司机平均评分select IFNULL(driver_id,'总体') driver_id,round(avg(grade),1) avg_grade from tb_get_car_order
where driver_id in(selectdistinct driver_id from tb_get_car_order
where start_time isNULLand date_format(order_time,'%Y-%m')='2021-10')and start_time isnotnull-- 成功订单才有评分groupby driver_id
with rollup;
-- 每个城市中评分最高的司机信息withtempas(select
city,
driver_id,round(avg(grade),1)as avg_grade,round(count(order_time)/count(distinctdate(order_time)),1)as avg_order_num,round(sum(mileage)/count(distinctdate(order_time)),3)as avg_mileage
from tb_get_car_order
join tb_get_car_record using(order_id)groupby city,driver_id
)select
city,driver_id,avg_grade,avg_order_num,avg_mileage
from(select*,
dense_rank()over(partitionby city orderby avg_grade desc)as rk
fromtemp) t1
where rk =1orderby avg_order_num;
-- 国庆期间近7日日均取消订单量select time1,round(num3/7,2) finish_num_7d,round(num4/7,2) cancel_num_7d
FROM(select time1,sum(num1)over(orderby time1 rowsbetween6precedingandcurrentrow) num3,sum(num2)over(orderby time1 rowsbetween6precedingandcurrentrow) num4
from(selectdate(order_time) time1,count(*)-sum(if(start_time isnull,1,0)) num1,sum(if(start_time isnull,1,0)) num2
from tb_get_car_order
wheredate(order_time)between'2021-09-25'and'2021-10-03'groupbydate(order_time)) a
) b
where time1 between'2021-10-01'and'2021-10-03';
-- 工作日各时段叫车量、等待接单时间和调度时间select(casewhen substring_index(r.event_time,' ',-1)between'07:00:00'and'08:59:59'then'早高峰'when substring_index(r.event_time,' ',-1)between'09:00:00'and'16:59:59'then'工作时间'when substring_index(r.event_time,' ',-1)between'17:00:00'and'19:59:59'then'晚高峰'else'休息时间'end) period,count(r.order_id) get_car_num,round(avg(timestampdiff(second,r.event_time,o.order_time))/60,1) avg_wait_time,round(avg(timestampdiff(second,o.order_time,o.start_time))/60,1) avg_dispatch_time
from tb_get_car_order o innerjoin tb_get_car_record r on r.order_id=o.order_id
where weekday(r.event_time)notin(5,6)groupby period
orderby get_car_num;
-- 各城市最大同时等车人数with tb as(select a.uid,
a.city,
a.event_time as start_time,if(a.order_id isnull, a.end_time,if(b.start_time isnull, b.finish_time, b.start_time))as end_time
from tb_get_car_record as a
leftjoin tb_get_car_order as b on a.order_id = b.order_id
where date_format(a.event_time,"%Y-%m")='2021-10')select t.city,max(t.wait_cnt)as max_wait_uv
from(select t1.city,
t2.dt,count(t1.uid)as wait_cnt
from tb as t1
innerjoin(select start_time as dt
from tb
unionselect end_time as dt
from tb
)as t2 on(t2.dt between t1.start_time and t1.end_time)groupby t1.city, t2.dt
)as t
groupby t.city
orderby max_wait_uv, t.city;
-- 某宝店铺的实际销售额与客单价selectsum(sales_price) sales_total,round(sum(sales_price)/count(distinct user_id),2) per_trans
from sales_tb;
-- 某宝店铺折扣率selectround(sum(sales_price)/sum(sales_num*tag_price)*100,2)'discount_rate(%)'from sales_tb s
leftjoin product_tb p
on s.item_id=p.item_id;
-- 某宝店铺动销率与售罄率with a as(select
style_id
,sum(inventory) inventory_total
,sum(tag_price * inventory) inventory_price
from product_tb
groupby1),
b as(select
style_id
,sum(sales_num) sales_num_total
,sum(sales_price) gmv
from sales_tb
leftjoin product_tb p using(item_id)groupby1)select
style_id
,round(sales_num_total/(inventory_total - sales_num_total)*100,2) pin_rate
,round(gmv/inventory_price*100,2) sell_through_rate
from a
join b
using(style_id)orderby1;
-- 牛客直播转换率select b.course_id, b.course_name,round((a.count1/a.count2)*100,2)as'sign_rate(%)'from(selectdistinct course_id,sum(if_sign)over(partitionby course_id)as count1,sum(if_vw)over(partitionby course_id)as count2
from behavior_tb) a
join course_tb b on a.course_id=b.course_id
orderby b.course_id;
-- 牛客直播开始时各直播间在线人数select c.course_id,
c.course_name,count(distinct a.user_id) online_num
from course_tb c
innerjoin attend_tb a on a.course_id=c.course_id
wheretime(a.in_datetime)<='19:00:00'andtime(a.out_datetime)>='19:00:00'groupby c.course_id,c.course_name
orderby c.course_id;
-- 牛客直播各科目平均观看时长-- 牛客直播各科目平均观看时长select c.course_name,round(avg(timestampdiff(minute,a.in_datetime,a.out_datetime)),2) avg_Len
from attend_tb a join course_tb c on a.course_id = c.course_id
groupby c.course_name
orderby avg_Len desc;
-- 牛客直播各科目出勤率select d.course_id, c.course_name,`attend_rate(%)`from(select a.course_id,round(online_num/attend_num*100,2)'attend_rate(%)'from(select course_id,count(distinct user_id) online_num
from attend_tb
where timestampdiff(minute, in_datetime, out_datetime)>=10groupby course_id
orderby course_id) a
leftjoin(select course_id,count(distinct user_id) attend_num
from behavior_tb
where if_sign=1groupby course_id) b
on a.course_id=b.course_id) d
leftjoin course_tb c
using(course_id);
-- 牛客直播各科目同时在线人数select b.course_id, c.course_name,max(num) max_num from(select course_id,sum(flag)over(partitionby course_id orderby dt) num from(select user_id, course_id, in_datetime dt,1 flag
from attend_tb
unionallselect user_id, course_id, out_datetime dt,-1 flag
from attend_tb) a) b
leftjoin course_tb c
using(course_id)groupby b.course_id, c.course_name
orderby course_id;
七、某乎回答(内容行业)
-- 某乎问答11月份日人均回答量SELECT
answer_date,ROUND(COUNT(issue_id)/COUNT(DISTINCT author_id),2) per_num
FROM
answer_tb
where date_format(answer_date,'%Y-%m')='2021-11'GROUPBY answer_date
ORDERBY answer_date;
-- 某乎问答高质量的回答中用户属于各级别的数量select level_cut,count(level_cut) ct
from(select(casewhen author_level in(1,2)then'1-2级'when author_level in(3,4)then'3-4级'when author_level in(5,6)then'5-6级'end) level_cut
from answer_tb an
leftjoin author_tb au
on an.author_id = au.author_id
where char_len >=100) t
groupby level_cut
orderby ct desc;
-- 某乎问答单日回答问题数大于等于3个的所有用户select answer_date, author_id,count(issue_id) answer_cnt
from answer_tb
groupby answer_date, author_id
having answer_cnt>=3orderby answer_date, author_id;
-- 某乎问答回答过教育类问题的用户里有多少用户回答过职场类问题SELECTCOUNT(DISTINCT author_id) num
FROM answer_tb
WHERE issue_id LIKE'E%'AND
author_id IN(SELECT
author_id
FROM answer_tb
WHERE issue_id LIKE'C%');