当前位置：首页 > news >正文

天河建设网站开发/2022年最火的关键词

news 2025/8/3 5:44:47

天河建设网站开发,2022年最火的关键词,有哪些公司,酷站素材文章目录小练习一练习二：连续时间区间合并练习三：间隔连续问题练习四： 打折日期交叉问题小练习一 tableA 存储了一所大学所有人的所有科目的考试成绩，共4列：学号 （ stu_num ），考试科…

文章目录

小练习一
练习二：连续时间区间合并
练习三：间隔连续问题
练习四：打折日期交叉问题

小练习一

tableA 存储了一所大学所有人的所有科目的考试成绩，共4列：学号（ stu_num ），考试科目（subject），考试成绩（ score ）等级（ level ）(共SABCD五个值)，考试时间（ time ），
求：第一次考试的考试成绩等级为A，其它考试成绩等级都为B的学号。

select stu_num
from (select stu_num,part_cnt,sum(case when t.rank=1 and t.level='A' then 1 else flag_b end ) as flag_bfrom (selectstu_num,level,case when level='B' then 1 else 0 as flag_b,sum(stu_num) over (partition by stu_num) as part_cnt,row_num()over(partition by stu_num order by time ) as rankfrom tableA)tgroup by stu_num,part_cnt)t2
where t2.flag_b=t2.part_cnt
;

练习二：连续时间区间合并

需求：连续时间区间合并
- user_id 用户id
- location_id 地点
- start_time 停留开始时间（时间）
- stop_mint 停留时间（分钟）

--数据
select *
from tmpdb.user_location 
;
--原始数据
-- user_location.user_id	user_location.location_id	user_location.start_time	user_location.stop_mint
-- 1	A	2018-01-0108:00:00	60
-- 1	A	2018-01-0109:00:00	60
-- 2	B	2018-01-0110:00:00	60
-- 1	A	2018-01-0111:00:00	60--结果
-- user_id	location_id	start_time	stop_mint
-- 1	A	2018-01-01 08:00:00	120.0
-- 1	A	2018-01-01 11:00:00	60.0
-- 2	B	2018-01-01 10:00:00	60.0

思路
- step1:打标签is_first，该标签为向后开始合并的第一条
- step2:开窗聚合标签，聚合出来group_i，即这些数据是需要合并的一组
- step3:取到各组的最小开始时间，及最大结束时间，即为合并区间的起始和结束
答案解析


select  user_id,location_id,start_time,(unix_timestamp(end_time,'yyyy-MM-dd HH:mm:ss') - unix_timestamp(start_time,'yyyy-MM-dd HH:mm:ss'))/60 as stop_mint --step6:按组取到合并后的开始时间，结束时间
from
(select  user_id,location_id,grp,min(start_time) as start_time,max(end_time)   as end_timefrom(select  user_id,location_id,start_time,end_time,sum(is_first)over(partition by user_id,location_id order by start_time) as grpfrom(select  user_id,location_id,start_time,end_time,case when start_time > lag_end_time then 1                                                          -- 两类：连续的时间的起始 ；不连续的时间else 0 end is_firstfrom(select  user_id,location_id,start_time,end_time,lag(end_time,1,0)over(partition by user_id,location_id order by start_time)    as lag_end_time     -- 上一条的结束时间,lead(start_time,1,0)over(partition by user_id,location_id order by start_time) as lead_start_time  -- 下一条的开始时间from(select  user_id,location_id,from_unixtime(unix_timestamp(start_time,'yyyy-MM-ddHH:mm:ss') ,'yyyy-MM-dd HH:mm:ss')                 as start_time -- 停留开始时间,from_unixtime( unix_timestamp(start_time,'yyyy-MM-ddHH:mm:ss') + stop_mint*60 ,'yyyy-MM-dd HH:mm:ss') as end_time -- 停留结束时间from tmpdb.user_location) time_fix  --step1:将时间整理成标准时间，并计算出停留结束时间) time_fix_lag_lead--step2 :取到上一条结束时间，下一条开始时间) pre_grp_flag--step3: 将 需要向后 合并的开始时间  置为 1) pre_grp--step4: 将 需要合并的数据分组group by  user_id,location_id,grp
) grp--step5:按组取到合并后的开始时间，结束时间
;

详细拆分（看懂上面的下面的请忽略，这里为了方便理解，分的步骤贼散）

--step1:将时间整理成标准时间，并计算出停留结束时间
with time_fix as 
(
select user_id	,location_id,from_unixtime(unix_timestamp(start_time,'yyyy-MM-ddHH:mm:ss') ,'yyyy-MM-dd HH:mm:ss')  as start_time --停留开始时间,from_unixtime( unix_timestamp(start_time,'yyyy-MM-ddHH:mm:ss') + stop_mint*60 , 'yyyy-MM-dd HH:mm:ss')  as end_time -- --停留结束时间
from tmpdb.user_location
)
-- user_id	location_id	start_time	end_time
-- 1	A	2018-01-01 08:00:00	2018-01-01 09:00:00
-- 1	A	2018-01-01 09:00:00	2018-01-01 10:00:00
-- 2	B	2018-01-01 10:00:00	2018-01-01 11:00:00
-- 1	A	2018-01-01 11:00:00	2018-01-01 12:00:00--step2 :取到上一条结束时间，下一条开始时间
, time_fix_lag_lead as
(
select user_id	,location_id,start_time,end_time,lag(end_time,1,0)over(partition by user_id,location_id order by start_time) as lag_end_time --上一条的结束时间,lead(start_time,1,0)over(partition by user_id,location_id order by start_time) as lead_start_time --下一条的开始时间
from time_fix 
)
-- user_id	location_id	start_time	end_time	lag_end_time	lead_start_time
-- 1	A	2018-01-01 08:00:00	2018-01-01 09:00:00	0	2018-01-01 09:00:00
-- 1	A	2018-01-01 09:00:00	2018-01-01 10:00:00	2018-01-01 09:00:00	2018-01-01 11:00:00
-- 1	A	2018-01-01 11:00:00	2018-01-01 12:00:00	2018-01-01 10:00:00	0
-- 2	B	2018-01-01 10:00:00	2018-01-01 11:00:00	0	0--step3: 将 需要向后 合并的开始时间  置为 1
,pre_grp_flag as 
(
select user_id	,location_id	,start_time,end_time,case when start_time > lag_end_time then 1 --两类：连续的时间的起始 ；不连续的时间else 0 end is_first  
from time_fix_lag_lead
)
-- user_id	location_id	start_time	end_time	is_first
-- 1	A	2018-01-01 08:00:00	2018-01-01 09:00:00	1
-- 1	A	2018-01-01 09:00:00	2018-01-01 10:00:00	0
-- 1	A	2018-01-01 11:00:00	2018-01-01 12:00:00	1
-- 2	B	2018-01-01 10:00:00	2018-01-01 11:00:00	1--step4: 将 需要合并的数据分组
,pre_grp as 
(
select user_id	,location_id	,start_time,end_time,sum(is_first)over(partition by user_id,location_id order by start_time) as grp
from pre_grp_flag
)
-- user_id	location_id	start_time	end_time	grp
-- 1	A	2018-01-01 08:00:00	2018-01-01 09:00:00	1
-- 1	A	2018-01-01 09:00:00	2018-01-01 10:00:00	1
-- 1	A	2018-01-01 11:00:00	2018-01-01 12:00:00	2
-- 2	B	2018-01-01 10:00:00	2018-01-01 11:00:00	1--step5:按组取到合并后的开始时间，结束时间
,grp as 
(
select user_id,location_id,grp,min(start_time) as start_time,max(end_time) as end_time
from pre_grp
group by user_id,location_id,grp
)
-- user_id	location_id	grp	start_time	end_time
-- 1	A	1	2018-01-01 08:00:00	2018-01-01 10:00:00
-- 1	A	2	2018-01-01 11:00:00	2018-01-01 12:00:00
-- 2	B	1	2018-01-01 10:00:00	2018-01-01 11:00:00--step6:按组取到合并后的开始时间，结束时间
select user_id	,location_id	,start_time,(unix_timestamp(end_time,'yyyy-MM-dd HH:mm:ss') - unix_timestamp(start_time,'yyyy-MM-dd HH:mm:ss'))/60 as stop_mint
from grp 
;
-- user_id	location_id	start_time	stop_mint
-- 1	A	2018-01-01 08:00:00	120.0
-- 1	A	2018-01-01 11:00:00	60.0
-- 2	B	2018-01-01 10:00:00	60.0

练习三：间隔连续问题

需求：计算每个用户最大的连续登录天数，可以间隔一天。解释：如果一个用户在 1,3,5,6 登录游戏，则视为连续 6 天登录。
结果 1001 5天 1002 2天
准备数据

-- 创建数据
create table tmpdb.hanjiaxiaozhi as select 1001 as id , '2021-12-12' as dt
union all select 1002 as id , '2021-12-12' as dt
union all select 1001 as id , '2021-12-13' as dt
union all select 1001 as id , '2021-12-14' as dt
union all select 1001 as id , '2021-12-16' as dt
union all select 1002 as id , '2021-12-16' as dt
union all select 1001 as id , '2021-12-19' as dt
union all select 1002 as id , '2021-12-17' as dt
union all select 1001 as id , '2021-12-20' as dt
;-- 查看数据
select *
from tmpdb.hanjiaxiaozhi 
;
-- hanjiaxiaozhi.id	hanjiaxiaozhi.dt
-- 1001	2021-12-12
-- 1002	2021-12-12
-- 1001	2021-12-13
-- 1001	2021-12-14
-- 1001	2021-12-16
-- 1002	2021-12-16
-- 1001	2021-12-19
-- 1002	2021-12-17
-- 1001	2021-12-20

解决

select  id,max(max_login) as max_login
from
(select  id,grp_id,datediff(max(dt),min(dt))+1 as max_loginfrom(select  id,dt,lag_dt,sum(if(datediff(dt,lag_dt) > 2,1,0)) over (partition by id order by dt) as grp_idfrom(select  id,dt,lag(dt,1,'1970-01-01') over(partition by id order by dt) as lag_dtfrom tmpdb.hanjiaxiaozhi) pre -- step1:拿到上一条dt)pre_3 --step23:做差值，这里是可以间隔1天，所以如果相减小于2，那么记为0，否则记为1group by  id,grp_id
)t
group by  id --step4:同一grp_id下求同一组最大dt-最小dt，再求最大值 即为该用户最大连续登陆天数
;

详细拆分（看懂上面的下面的请忽略，这里为了方便理解，分的步骤贼散）

-- step1:拿到上一条dt
with pre as
(selectid,dt,lag(dt,1,'1970-01-01') over(partition by id order by dt) as lag_dt
from tmpdb.hanjiaxiaozhi 
)
-- id	dt	lag_dt
-- 1001	2021-12-12	1970-01-01
-- 1001	2021-12-13	2021-12-12
-- 1001	2021-12-14	2021-12-13
-- 1001	2021-12-16	2021-12-14
-- 1001	2021-12-19	2021-12-16
-- 1001	2021-12-20	2021-12-19
-- 1002	2021-12-12	1970-01-01
-- 1002	2021-12-16	2021-12-12
-- 1002	2021-12-17	2021-12-16--step2:做差值，这里是可以间隔1天，所以如果相减小于2，那么记为0，否则记为1
,pre_2 as
(
select id ,dt,lag_dt,if(datediff(dt,lag_dt)<=2,0,1) as is_first
from pre
)
-- id	dt	lag_dt	is_first
-- 1001	2021-12-12	1970-01-01	1
-- 1001	2021-12-13	2021-12-12	0
-- 1001	2021-12-14	2021-12-13	0
-- 1001	2021-12-16	2021-12-14	0
-- 1001	2021-12-19	2021-12-16	1
-- 1001	2021-12-20	2021-12-19	0
-- 1002	2021-12-12	1970-01-01	1
-- 1002	2021-12-16	2021-12-12	1
-- 1002	2021-12-17	2021-12-16	0-- step3:sum开窗分组
,pre_3 as 
(
select id,dt,lag_dt,is_first,sum(is_first) over (partition by id order by dt) as grp_id
from pre_2
)
-- id	dt	lag_dt	is_first	grp_id
-- 1001	2021-12-12	1970-01-01	1	1
-- 1001	2021-12-13	2021-12-12	0	1
-- 1001	2021-12-14	2021-12-13	0	1
-- 1001	2021-12-16	2021-12-14	0	1
-- 1001	2021-12-19	2021-12-16	1	2
-- 1001	2021-12-20	2021-12-19	0	2
-- 1002	2021-12-12	1970-01-01	1	1
-- 1002	2021-12-16	2021-12-12	1	2
-- 1002	2021-12-17	2021-12-16	0	2--这里step2和3可以合并，之所以分开是为了看起来方便
-- ,pre_3 as 
-- (
-- select 
--     id
--     ,dt
--     ,lag_dt
--     ,sum(if(datediff(dt,lag_dt)>2,1,0)) over (partition by id order by dt) as grp_id
-- from pre
-- )--step4:同一grp_id下求同一组最大dt-最小dt，再求最大值 即为该用户最大连续登陆天数
selectid,max(max_login) as max_login
from(selectid,grp_id,datediff(max(dt),min(dt))+1 as max_loginfrom pre_3group by id,grp_id)t
group by id
;
-- id	max_login
-- 1001	5
-- 1002	2

练习四：打折日期交叉问题

需求
如下为平台商品促销数据：字段为品牌，打折开始日期，打折结束日期
计算每个品牌总的打折销售天数，注意其中的交叉日期，比如 vivo 品牌，第一次活动时间为 2021-06-05 到 2021-06-15，第二次活动时间为 2021-06-09 到 2021-06-21 其中 9 号到 15号为重复天数，只统计一次，即 vivo 总打折天数为 2021-06-05 到 2021-06-21 共计 17 天。
准备数据

create table py_tmpdb.hanjiaxiaozhi asselect 'oppo' as brand, '2021-06-05' as stt , '2021-06-09' as edt
union all select 'oppo' as brand, '2021-06-11' as stt , '2021-06-21' as edt
union all select 'vivo' as brand, '2021-06-05' as stt , '2021-06-15' as edt
union all select 'vivo' as brand, '2021-06-09' as stt , '2021-06-21' as edt
union all select 'redmi' as brand, '2021-06-05' as stt , '2021-06-21' as edt
union all select 'redmi' as brand, '2021-06-09' as stt , '2021-06-15' as edt
union all select 'redmi' as brand, '2021-06-17' as stt , '2021-06-26' as edt
union all select 'huawei' as brand, '2021-06-05' as stt , '2021-06-26' as edt
union all select 'huawei' as brand, '2021-06-09' as stt , '2021-06-15' as edt
union all select 'huawei' as brand, '2021-06-17' as stt , '2021-06-21' as edt
;--查看数据
select * 
from py_tmpdb.hanjiaxiaozhi-- hanjiaxiaozhi.brand	hanjiaxiaozhi.stt	hanjiaxiaozhi.edt
-- oppo	2021-06-05	2021-06-09
-- oppo	2021-06-11	2021-06-21
-- vivo	2021-06-05	2021-06-15
-- vivo	2021-06-09	2021-06-21
-- redmi	2021-06-05	2021-06-21
-- redmi	2021-06-09	2021-06-15
-- redmi	2021-06-17	2021-06-26
-- huawei	2021-06-05	2021-06-26
-- huawei	2021-06-09	2021-06-15
-- huawei	2021-06-17	2021-06-21

结果

brand	dur_days
huawei	22
oppo	16
redmi	22
vivo	17

思路
- step1：取到本条之前最大的结束时间
- step2:如果stt大于之前最大的edt，不变，否则把stt+1置为上一条的结束时间
- step3:datediff(edt,stt),会出现负的，这些数据下一步去掉
- step4：如果dur_days为负数，那么+0，否则加dur_days+1
答案解析

select  brand,sum(if(dur_days > 0,dur_days+1,0)) as dur_days
from
(select  brand,stt,edt,datediff(edt,stt) as dur_daysfrom(select  brand,if(max_edt is null,stt,if(stt > max_edt,stt,date_add(max_edt,1))) as stt,edtfrom(select  brand,stt,edt,max(edt)over(partition by brand order by stt rows between unbounded preceding and 1 preceding) as max_edtfrom py_tmpdb.hanjiaxiaozhi) step_1 -- step1：取到本条之前  最大的结束时间) step_2 --step2:如果stt大于之前最大的edt，不变，否则 把stt+1置为上一条的结束时间
) step_3 --step3:datediff(edt,stt),会出现负的，这些数据下一步去掉
group by  brand --step4：如果dur_days为负数，那么+0，否则加dur_days+1
;

详细拆分（看懂上面的下面的请忽略，这里为了方便理解，分的步骤贼散）

-- step1：取到本条之前  最大的结束时间
with step_1 as
(
selectbrand,stt,edt,max(edt)over(partition by brand order by stt rows between unbounded preceding and 1 preceding) as max_edt
from py_tmpdb.hanjiaxiaozhi
)
-- brand	stt	edt	max_edt
-- huawei	2021-06-05	2021-06-26	NULL
-- huawei	2021-06-09	2021-06-15	2021-06-26
-- huawei	2021-06-17	2021-06-21	2021-06-26
-- oppo	2021-06-05	2021-06-09	NULL
-- oppo	2021-06-11	2021-06-21	2021-06-09
-- redmi	2021-06-05	2021-06-21	NULL
-- redmi	2021-06-09	2021-06-15	2021-06-21
-- redmi	2021-06-17	2021-06-26	2021-06-21
-- vivo	2021-06-05	2021-06-15	NULL
-- vivo	2021-06-09	2021-06-21	2021-06-15--step2:如果stt大于之前最大的edt，不变，否则 把stt+1置为上一条的结束时间
,step_2 as
(
select brand,if(max_edt is null,stt,if(stt>max_edt,stt,date_add(max_edt,1))) as stt,edt
from step_1
)
-- brand	stt	edt
-- huawei	2021-06-05	2021-06-26
-- huawei	2021-06-27	2021-06-15
-- huawei	2021-06-27	2021-06-21
-- oppo	2021-06-05	2021-06-09
-- oppo	2021-06-11	2021-06-21
-- redmi	2021-06-05	2021-06-21
-- redmi	2021-06-22	2021-06-15
-- redmi	2021-06-22	2021-06-26
-- vivo	2021-06-05	2021-06-15
-- vivo	2021-06-16	2021-06-21--step3:datediff(edt,stt),会出现负的，这些数据下一步去掉，这里having会报错
,step_3 as
(
selectbrand,stt,edt,datediff(edt,stt) as dur_days
from step_2
)
-- brand	stt	edt	dur_days
-- huawei	2021-06-05	2021-06-26	21
-- huawei	2021-06-27	2021-06-15	-12
-- huawei	2021-06-27	2021-06-21	-6
-- oppo	2021-06-05	2021-06-09	4
-- oppo	2021-06-11	2021-06-21	10
-- redmi	2021-06-05	2021-06-21	16
-- redmi	2021-06-22	2021-06-15	-7
-- redmi	2021-06-22	2021-06-26	4
-- vivo	2021-06-05	2021-06-15	10
-- vivo	2021-06-16	2021-06-21	5--step4：如果dur_days为负数，那么+0，否则加dur_days+1
selectbrand,sum(if(dur_days>0,dur_days+1,0)) as dur_days
from step_3
group by brand
;
-- brand	dur_days
-- huawei	22
-- oppo	16
-- redmi	22
-- vivo	17