Bootstrap

SQL题目

1 聚合为数组

  什么是数组 如[800,2975,3000,1100,3000]

select collect_list(sal) from emp; 

2 将数组以任意格式聚合

2.1 有重复

select  concat_ws(',',collect_list(area_manager_name))

2.2 没有重复

select concat_ws(',',collect_set(area_manager_name))

2.3 一行转多行

 --1、split 用逗号分隔 返回数组 2、explode 输入数组 变多行
	select 
		...  
	FROM    
		nczbigdata.ods_src_membercenter_ncz_member_card
		LATERAL VIEW OUTER explode(split(car_brand_no,',')) t AS car_brand_no
    WHERE   
    	pt = '${thedate}'

2.4 多行转多列

select   
        mid,
        kv2['10000029']   as gb_acc_name , --变速箱产品名称 根据attr_id筛选
        kv1['51'] as gb_no , --变速箱型号
        kv1['51'] as gb_sub_no, --变速箱子型号
        kv1['52'] as company, --变速箱厂家
fromSELECT  
				mid
               ,str_to_map(concat_ws(',', collect_set(concat(attr_id, '-', attr_value)))
                        ,','
                        ,'-'
                        ) kv1
	XXXX
) t1

3 连续登陆

3.1 方案一

SELECT 
	t.use_id,
	DATE_SUB(t.login_date,INTERVAL t.rn DAY) as date,
	COUNT(1) as counts
FROM
	(SELECT 
		use_id,
		login_date,
		row_number()over(partition by use_id order by login_date ) as rn
	FROM 
		test
	) t
GROUP BY t.use_id,DATE_SUB(t.login_date,INTERVAL t.rn DAY)
HAVING COUNT(1)>=3

3.2 方案二

SELECT 
	uuid
	,COUNT(rn1-rn)
	FROM (
	SELECT uuid
	,datediff(
	concat(DATE,' 00:00:00')
	,concat(min_date,' 00:00:00')
	,'dd'
	)+1 AS rn1
	,rn
FROM (
	SELECT 
		a.uuid
		,ROW_NUMBER() OVER ( PARTITION BY uuid ORDER BY DATE ) AS rn
		,min(DATE) OVER ( PARTITION BY uuid ) AS min_date
		,DATE
	FROM a
) 
) 
GROUP BY uuid
HAVING COUNT(1) > 7

4 次日留存

with a as
(
select create_date,user_id,last_create_date
from (
SELECT substr(create_time,1,10) AS create_date
,user_id
,lead(substr(create_time,1,10), 1) over (partition by user_id order by substr(create_time,1,10)) as next_create_date
FROM nczbigdata.dws_trade_orders a
WHERE pt = 20220207
)
group by create_date,user_id,last_create_date
)
SELECT *
FROM a t1
WHERE datediff(concat(next_create_date,' 00:00:00'),concat(t1.create_date,' 00:00:00'),'dd') = 1

5 互相关注

with a as (
select 1 as to_user,2 form_user
union all 
select 1 as to_user,3 form_user
union all 
select 1 as to_user,4 form_user
union all 
select 2 as to_user,1 form_user
union all 
select 2 as to_user,3 form_user
union all 
select 2 as to_user,4 form_user
union all 
select 3 as to_user,1 form_user
union all 
select 3 as to_user,2 form_user
union all 
select 3 as to_user,4 form_user

)
select 
value
,count(1)
from (
select 
to_user
,form_user
,if(to_user>form_user,CONCAT(to_user,'-',form_user),CONCAT(form_user,'-',to_user)) as value
from a 
)
group by value
having count(1)>1

6 先进先出 库龄计算

select pid,date,qty,sum(case when qty<0 then qty else 0 end)over (partition by pid order by date), 
sum(case when qty>0 then qty else 0 end) over (partition by pid order by date),
sum(case when qty<0 then qty else 0 end)over (partition by pid order by date)+
sum(case when qty>0 then qty else 0 end) over (partition by pid order by date)
from p_dw

7 统计2021年国庆头3天每类视频每天的近一周总点赞量和一周内最大单天转发量

问题背景链接

select *
from (select
  tag,
  dt,
  sum(sum_like_cnt) over (
    partition by tag
    order by
      dt rows between 6 preceding
      and current row
  ) as sum_like_cnt_7d,
  max(retweet_cnt_7d) over (
    partition by tag
    order by
      dt rows between 6 preceding
      and current row
  ) as max_retweet_cnt_7d
from
  (
    select
      b.tag,
      date_format(start_time, '%Y-%m-%d') as dt,
      sum(if_like) as sum_like_cnt,
      sum(if_retweet) as retweet_cnt_7d
    from
      tb_user_video_log a
      left join tb_video_info b on a.video_id = b.video_id
    group by
      b.tag,
      date_format(start_time, '%Y-%m-%d')
  ) t1
) t2 where dt between '2021-10-01' and '2021-10-03'
  order by tag desc,dt

8 每篇文章同一时刻最大在看人数

链接:

select
  artical_id,
  max(cnt)
from
  (
    select
      artical_id,
      sum(mark) over (
        order by
          times,
          mark desc
      ) as cnt
    from
      (
        select
          artical_id,
          uid,
          in_time as times,
          1 as mark
        from
          tb_user_log
        where
          artical_id <> 0
        union all
        select
          artical_id,
          uid,
          out_time as times,
          -1 as mark
        from
          tb_user_log
        where
          artical_id <> 0
      ) a
  ) b
group by
  artical_id
order by
  2 desc

9 每天新用户的次日留存率

链接:

select
  dt,
  case when count(distinct uid)=0 then 0 else round(count(
    distinct case
      when datediff(next_date, dt) = 1 then uid
    end
  ) /count(distinct uid),2)  end as rate
from
  (
    select
      uid,
      substr(in_time, 1, 10) as dt,
      lead(substr(in_time, 1, 10), 1) over (partition by uid) as next_date,
      lag(substr(in_time, 1, 10), 1) over (partition by uid) as last_date
    from
      tb_user_log 
  ) t1
where
  last_date is null and dt>='2021-11-01'
group by
  dt order by 1

10 连续签到领金币

牛客网连接:

解题思路:

1、根据用户id 和日期(登陆日期-排序rn)

2、
  • 当签到天数%7=3 则领取3金币
  • 当签到天数%7=0 则领取7金币
  • 其余情况,领取1金币

select
  uid,
  substr(in_date, 1, 6),
  sum(
    case
      when pmod(days, 7) = 3 then 3
      when pmod(days, 7) = 0 then 7
      else 1
    end
  )
from
  (
    select
      uid,
      row_number() over (
        partition by uid,
        daydiff
        order by
          in_date
      ) as days,
      in_date
    from
      (
        select
          uid,
          in_date,
          date(in_date) - rn as daydiff,
          rn
        from
          (
            select
              uid,
              in_date,
              row_number() over (
                partition by uid
                order by
                  in_date
              ) as rn
            from
              (
                select
                  uid,
                  date_format(in_time, '%Y%m%d') as in_date
                from
                  tb_user_log a
                where
                  artical_id = 0
                  and sign_in = 1
                  and substr(in_time, 1, 10) between '2021-07-07' and '2021-10-31'
                group by
                  uid,
                  date_format(in_time, '%Y%m%d')
              ) t1
          ) t2
      ) t3
  ) t4
group by
  uid,
  substr(in_date, 1, 6)
  order by 1,2 asc

11 某乎问答最大连续回答问题天数大于等于3天的用户及其对应等级

牛客链接:

select
  t4.author_id,
  author_level,
  days_cnt
from
  (
    select
      author_id,
      diff_date,
      count(1) as days_cnt
    from
      (
        select
          answer_date,
          author_id,
          row_number() over (
            partition by author_id
            order by
              answer_date
          ) as rn,
          date_format(
            answer_date -1 * row_number() over (
              partition by author_id
              order by
                answer_date
            ),
            '%Y-%m-%d'
          ) as diff_date
        from
          (
            select
              answer_date,
              author_id
            from
              answer_tb a
            group by
              answer_date,
              author_id
          ) t1
      ) t2
    group by
      author_id,
      diff_date
    having
      count(1) >= 3
  ) t4
  left join author_tb t3 on t4.author_id = t3.author_id
order by
  1

12 牛客直播开始时各直播间在线人数

牛客网链接:

select 
a.course_id,b.course_name,count(distinct user_id) as online_num
from attend_tb a 
left join course_tb b on a.course_id=b.course_id
WHERE date_format(in_datetime,'%H:%i')<='19:00' AND date_format(OUT_datetime,'%H:%i')>'19:00'
group by a.course_id,b.course_name

13 行列转换

  描述:表中记录了各年份各部门的平均绩效考核成绩。

  表名:t1
  表结构:
    a – 年份
    b – 部门
    c – 绩效得分
  表内容:

	a		b	c
	2014	B	9
	2015	A	8
	2014	A	10
	2015	B	7

13.1 多行转多列

  问题描述:将上述表内容转为如下输出结果所示:

	a		col_A 	col_B 
	2014 	10		9
	2015 	8		7

  参考答案:

select
	a,
	max(case when b="A" then c end) col_A, 
	max(case when b="B" then c end) col_B
from t1 group by a;

13.2 将问题一的结果转成源表,问题一结果表名为 t1_2(多列转多行)

参考答案:

select
a,
b, c
from (
select a,"A" as b,col_a as c from t1_2 union all
select a,"B" as b,col_b as c from t1_2
)tmp;

14 student course score 找出哪些学生课程比平均课程成绩高

select student
(
select student,course ,score ,avg(score ) over (partition by course) as avg_score
from score
) t1 where score>avg_score

15 一张1-180天注册用户 活跃留存表

  现有一个用户活跃表user_active、用户注册表user_regist,表中分区字段都为p_date(yyyy-MM-dd),用户字段均为user_id;设计一张1-180天注册用户 活跃留存表;

  表:user_active
  字段:user_id,p_date

  表:user_regist
  字段:user_id,p_date

select regist_date
      ,date_diff
      ,user_count/regist_count as rate 
from (
    select t1.regist_date 
          ,max(t1.regist_count) as regist_count
          ,datediff(t2.active_date, t1.regist_date) as date_diff
          ,count(*) as user_count
    from (
        select user_id
              ,regist_date
              ,count(*) over(partition by regist_date) as regist_count
        from user_regist
        where dt >= date_sub(current_date(), 180) 
    ) t1 
    left join (
        select user_id
              ,to_date(active_date) as active_date
        from user_active
        where dt >= date_sub(current_date(), 180) 
        group by user_id, to_date(active_date)
    ) t2 on t1.user_id = t2.user_id
    where datediff(t2.active_date, t1.regist_date) >=1 
    and datediff(t2.active_date, t1.regist_date) <= 180
    group by t1.regist_date, datediff(t2.active_date, t1.regist_date)
) t

16 两个日期间所有日期

--POSEXPLODE 取index和value
SELECT  t2.start_date
        ,t2.end_date
        ,x
        ,y
        ,DATE_ADD(start_date,x) AS date
FROM    (
            SELECT  start_date
                    ,end_date
                    ,DATEDIFF(end_date,start_date) AS diff
                    ,SPLIT(replace(SPACE(DATEDIFF(end_date,start_date,'dd')),'',',1'),',') AS day_num
            FROM    (
                        SELECT  '2022-06-01' AS start_date
                                ,'2022-06-05' AS end_date
                        
                    ) t1
        ) t2
LATERAL VIEW POSEXPLODE(day_num) t3 AS x,y

17 最大连续登陆天数,间隔N天也算

WITH game_user AS 
(
    SELECT  1001 AS id
            ,'2022-05-01' AS dt
    UNION ALL
    SELECT  1001 AS id
            ,'2022-05-03' AS dt
    UNION ALL
    SELECT  1001 AS id
            ,'2022-05-05' AS dt
    UNION ALL
    SELECT  1001 AS id
            ,'2022-05-06' AS dt
    UNION ALL
    SELECT  1001 AS id
            ,'2022-05-07' AS dt
    UNION ALL
    SELECT  1002 AS id
            ,'2022-05-01' AS dt
    UNION ALL
    SELECT  1002 AS id
            ,'2022-05-04' AS dt
    UNION ALL
    SELECT  1002 AS id
            ,'2022-05-05' AS dt
)
SELECT  id         
,MAX(days) + 1
 FROM    (
            SELECT  id
                    ,flag
                    ,DATEDIFF(MAX(dt),MIN(dt)) days
            FROM    (
                        SELECT  id
                                ,dt
                                 ---如果间隔超过一天,则flag+1,flag从最小的日期开始累积,达到分组的作用
                                ,SUM(IF(flag > 2,1,0)) OVER (PARTITION BY id ORDER BY dt ) flag
                        FROM    (
                                    SELECT  id
                                            ,dt
                                            ,DATEDIFF(dt,lagdt) flag
                                    FROM    (
                                                SELECT  id
                                                        ,dt
                                                        ,LAG(dt,1,'1970-01-01') OVER (PARTITION BY id ORDER BY dt ) lagdt
                                                FROM    game_user
                                            ) t1
                                ) t2
                    ) t3
            GROUP BY id
                     ,flag
         ) t4
 GROUP BY id

18 某个用户连续的访问记录如果时间间隔小于60秒,则视为一个组

select 
id,ts,
sum(if(diff_ts>60,1,0)) over(partition by id order by ts)
from 
(select 
id,ts,ts-lag_ts as diff_ts
from 
(select   
id,ts,
lag(ts,1,0) over(partition by id order by ts ) as lag_ts,--找不到上一条,置0
from tmp.table_test2
) t1
)t2;

19 打折日期交叉问题

  如下为平台商品促销数据:字段为品牌,打折开始日期,打折结束日期,计算每个品牌总的打折销售天数,注意其中的交叉日期,比如vivo品牌,第一次活动时间为2021-06-05到2021-06-15,第二次活动时间为2021-06-09到2021-06-21其中9号到15号为重复天数,只统计一次,即vivo总打折天数为2021-06-05到2021-06-21共计17天。

idsttedt
oppo2021-06-052021-06-09
oppo2021-06-112021-06-21
vivo2021-06-052021-06-15
vivo2021-06-092021-06-21
redmi2021-06-052021-06-21
redmi2021-06-092021-06-15
redmi2021-06-172021-06-26
huawei2021-06-052021-06-26
huawei2021-06-092021-06-15
huawei2021-06-172021-06-21

1 获取当前行以前的数据最大的结束时间放在当前行

2 比较开始时间和下移的数据 如果开始时间大 则无需操作 反之需要移动下来的数据加1后替换当前行的开始时间 第一行数据无需替换

3 计算开始时间与结束时间的差值

4 按照品牌进行分组 计算每条真是数据加1的总和

select
	id,
	sum(days>0,days+1,0) days---会有负数,负数的是已经被上一条记录覆盖了,不累加
from
(
	select
		id,
		datediff(edt,stt) days
	from(
		select
			id,
			if(maxEdt is null,stt,if(stt>maxEdt,stt,data_add(maxEdt,1))) stt,
			edt
		from
		(
			select
				id,
				stt,
				edt,
				max(edt) over(partition by id order by stt rows betwwen UNBOUNDED PRECEDING and 1PRECEDING) maxEdt
			from test4
		)t1
	)t2
)t3
group by id

20 最多连胜的次数

输入:
Matches 表:
±----------±-----------±-------+
| player_id | match_day | result |
±----------±-----------±-------+
| 1 | 2022-01-17 | Win |
| 1 | 2022-01-18 | Win |
| 1 | 2022-01-25 | Win |
| 1 | 2022-01-31 | Draw |
| 1 | 2022-02-08 | Win |
| 2 | 2022-02-06 | Lose |
| 2 | 2022-02-08 | Lose |
| 3 | 2022-03-30 | Win |
±----------±-----------±-------+
输出:
±----------±---------------+
| player_id | longest_streak |
±----------±---------------+
| 1 | 3 |
| 2 | 0 |
| 3 | 1 |
±----------±---------------+
需要注意胜一次算连胜吗?

select 
	player_id,
	max(cnt) as cnt
from
(
	select 
		player_id,
		dt_num-num2,
		sum(if_win) as cnt
	from
	(
			select 
				player_id,
				if(result='win','1','0') as if_win,
				row_number () over (partition by player_id order by match_day) as dt_num,
				row_number () over (partition by player_id, result order by match_day) as num2
			from 
				matches
	) t
	group by 
		player_id,
		dt_num-num2,
) t
group by
	player_id;

21 求最大连胜天数

----这一题 是求最大连胜天数
select 
	player_id,
	max(cnt) as cnt
from
(
	select 
		player_id,
		dt_num-num2,
		sum(if_win) as cnt
	from
	(
			select 
				player_id,
				if(result='win','1','0') as if_win,
				datediff(match_day,'1997-01-01') as dt_num,
				row_number () over (partition by player_id, result order by match_day) as num2
			from 
				matches
	) t
	group by 
		player_id,
		dt_num-num2,
) t
group by
	player_id;

22 AB球队得分流水表,得到连续三次得分的队员名字和每次赶超对手的球员名字

问题:两支篮球队进行了激烈的篮球比赛,比分交替上升。比赛结束后,你有一张两队得分分数的明细表,记录了球队team,球员号码number,球员姓名name, 得分分数score 以及得分时间scoretime(datetime)。现在球队要对比赛中表现突出的球员做出嘉奖,所以请你用sql统计出连续三次得分

方法1:

select distinct a.name ,a.team from
(
select *,lead(name,1) over(partition by team order by score_time) as ld1
,lead(name,2) over(partition by team order by score_time) as ld2
,lag(name,1) over(partition by team order by score_time) as lg1
,lag(name,2) over(partition by team order by score_time) as lg2
from table
) a
where (a.name =a.ld1 and a.name =a.ld2)
or (a.name =a.ld1 and a.name =a.lg1)
or (a.name=a.lg1 and a.name=a.lg2)

方法2:

-- 1.按team分组,按score_time 正向排序
-- 2.获取当前行的前一行 name
-- 3.判断当前行的name 是否与前一行的name是否相同,添加标记 不同为1 相同为0
-- 4.对标记累计求和,作为连续得分分组
-- 5.对分区计数,得出 连续得分次数
select
name
,cont_group
,count(name) as contin_cnt

from (
    select
        team
        ,number
        ,score_time
        ,score
        ,name
        ,pre_name
        ,if_contin
        -- 累计求和,获取连续分组
        ,sum(if_contin) over (partition by team order by score_time asc) as cont_group

    from (
        select
        team
        ,number
        ,score_time
        ,score
        ,name
        -- 获取当前行的前一行的 name
        ,lag(name) over (partition by team order by score_time asc) as pre_name
        -- 判断 pre_name 和name 是否相同
        ,if(lag(name) over (partition by team order by score_time asc) = name
            ,0,1
            ) as if_contin
        from bktab
    ) t1
) t2
group by name
,cont_group
-- 通过这里限制 连续得分次数
having count(name) >= 3
;
反超对方球队的队员名称
select
    team
    ,number
    ,score_time
    ,score
    ,name
    ,ateam_score
    ,bteam_score
from (

    select
        team
        ,number
        ,score_time
        ,score
        ,name
        ,ateam_score
        ,bteam_score
        ,diff_score
        ,lag(diff_score) over (order by score_time asc) as pre_diff_score
        ,case when diff_score > 0 and lag(diff_score) over (order by score_time asc) < 0 then 1--A>B且上一条记录B>A
              when diff_score < 0 and lag(diff_score) over (order by score_time asc) > 0 then 1--A<B且上一条记录B<A
              when diff_score is not null and lag(diff_score) over (order by score_time asc) is null then 1--第一个球 比是符合条件的
              else 0
         end as if_surpass

    from (
        select
        team
        ,number
        ,score_time
        ,score
        ,name
        ,sum(if(team = 'A',score,0)) over (order by score_time asc) as ateam_score
        ,sum(if(team = 'B',score,0)) over (order by score_time asc) as bteam_score
        ,sum(if(team = 'A',score,0)) over (order by score_time asc) - sum(if(team = 'B',score,0)) over (order by score_time asc) as diff_score
        from bktab
    ) t1
) t2
where if_surpass = 1

23 SQL实现次日、三日及七日用户留存率的计算

23.1、方案一

注意role_login_back 表去重

select
	log_day '日期',
	count(user_id_d0) '新增数量',
	count(user_id_d1) / count(user_id_d0) '次日留存率',
	count(user_id_d3) / count(user_id_d0) '3日留存率',
	count(user_id_d7) / count(user_id_d0) '7日留存率',
from (
	select 
		distinct log_day,
		a.user_id_d0,
		b.device_id as user_id_d1,
		c.device_id as user_id_d3,
		d.device_id as user_id_d7
	from 
		(select 
			distinct date(event_time) as log_day, # 只关心日期,不关注具体的时间。
			device_id as user_id_d0
		from role_login_back
		group by device_id
		order by log_day) a
	left join role_login_back b 
	on datediff(date(b.event_time),a.log_day) = 1 
	and a.user_id_d0 = b.device_id
	left join role_login_back c 
	on datediff(date(c.event_time), a.log_day) = 2
	and a.user_id_d0 = c.device_id
	left join role_login_back d
	on datediff(date(d.event_time), a.log_day) = 6
	and a.user_id_d0 = d.device_id 
	)
group by log_day;

23.2、方案二

SELECT  COUNT(CASE WHEN DATEDIFF(next1_date,date) = 2
OR      DATEDIFF(next2_date,date) = 2 THEN user_id END) / COUNT(user_id)
        ,COUNT(CASE WHEN DATEDIFF(next1_date,date) = 4
        OR      DATEDIFF(next2_date,date) = 4
        OR      DATEDIFF(next3_date,date) = 4
        OR      DATEDIFF(next4_date,date) = 4 THEN user_id END) / COUNT(user_id)
FROM    (
            SELECT  uid
                    ,date
                    ,LEAD(date,1) OVER (PARTITION BY uid ORDER BY DATE ) AS next1_date
                    ,LEAD(date,2) OVER (PARTITION BY uid ORDER BY DATE ) AS next2_date
                    ,LEAD(date,3) OVER (PARTITION BY uid ORDER BY DATE ) AS next3_date
                    ,LEAD(date,4) OVER (PARTITION BY uid ORDER BY DATE ) AS next4_date
            FROM    (
                        SELECT  uid
                                ,date
                        FROM    t1
                        GROUP BY uid
                                 ,date
                    ) a
        ) b
WHERE   DATE = '2022-05-01'
;