我们使用留存专题-基础数据生成数据,计算2024-01-01日~2024-01-07日新增用户的1日、2日、3日、4日、5日、6日、7日留存率
根据新增日期,关联对应日期的登陆数据,计算关联上的登陆用户数据,从而计算出留存率
select create_date,
count(t_login_1.user_id) / count(t_create.user_id) as r1_rate,
count(t_login_2.user_id) / count(t_create.user_id) as r2_rate,
count(t_login_3.user_id) / count(t_create.user_id) as r3_rate,
count(t_login_4.user_id) / count(t_create.user_id) as r4_rate,
count(t_login_5.user_id) / count(t_create.user_id) as r5_rate,
count(t_login_6.user_id) / count(t_create.user_id) as r6_rate,
count(t_login_7.user_id) / count(t_create.user_id) as r7_rate
from (
--新增数据
select user_id,
create_date
from t_user_create
where create_date <= '2024-01-07') t_create
left join
(select user_id,
login_date
from t_user_login) t_login_1
on t_create.user_id = t_login_1.user_id and
datediff(to_date(t_login_1.login_date), to_date(t_create.create_date)) = 1
left join
(select user_id,
login_date
from t_user_login) t_login_2
on t_create.user_id = t_login_2.user_id and
datediff(to_date(t_login_2.login_date), to_date(t_create.create_date)) = 2
left join
(select user_id,
login_date
from t_user_login) t_login_3
on t_create.user_id = t_login_3.user_id and
datediff(to_date(t_login_3.login_date), to_date(t_create.create_date)) = 3
left join
(select user_id,
login_date
from t_user_login) t_login_4
on t_create.user_id = t_login_4.user_id and
datediff(to_date(t_login_4.login_date), to_date(t_create.create_date)) = 4
left join
(select user_id,
login_date
from t_user_login) t_login_5
on t_create.user_id = t_login_5.user_id and
datediff(to_date(t_login_5.login_date), to_date(t_create.create_date)) = 5
left join
(select user_id,
login_date
from t_user_login) t_login_6
on t_create.user_id = t_login_6.user_id and
datediff(to_date(t_login_6.login_date), to_date(t_create.create_date)) = 6
left join
(select user_id,
login_date
from t_user_login) t_login_7
on t_create.user_id = t_login_7.user_id and
datediff(to_date(t_login_7.login_date), to_date(t_create.create_date)) = 7
group by create_date
执行结果
+--------------+----------+----------+----------+----------+----------+----------+----------+
| create_date | r1_rate | r2_rate | r3_rate | r4_rate | r5_rate | r6_rate | r7_rate |
+--------------+----------+----------+----------+----------+----------+----------+----------+
| 2024-01-06 | 1.0 | 0.9 | 0.8 | 0.8 | 0.7 | 0.8 | 0.9 |
| 2024-01-02 | 0.9 | 0.9 | 0.7 | 0.7 | 0.9 | 0.8 | 0.8 |
| 2024-01-04 | 0.9 | 1.0 | 1.0 | 0.9 | 0.8 | 0.9 | 0.9 |
| 2024-01-03 | 1.0 | 0.8 | 0.8 | 0.9 | 0.7 | 0.7 | 0.8 |
| 2024-01-05 | 0.9 | 0.9 | 0.8 | 0.9 | 0.8 | 0.6 | 0.7 |
| 2024-01-01 | 0.9 | 1.0 | 0.9 | 1.0 | 0.7 | 0.9 | 0.6 |
| 2024-01-07 | 0.9 | 0.8 | 0.9 | 1.0 | 0.9 | 0.8 | 0.8 |
+--------------+----------+----------+----------+----------+----------+----------+----------+
根据新增用户信息,关联出所有的登录记录,然后判断出对的N日是否留存,然后计算出对应的留存率。
select create_date,
sum(r1) / count(distinct user_id) as r1_rate,
sum(r2) / count(distinct user_id) as r2_rate,
sum(r3) / count(distinct user_id) as r3_rate,
sum(r4) / count(distinct user_id) as r4_rate,
sum(r5) / count(distinct user_id) as r5_rate,
sum(r6) / count(distinct user_id) as r6_rate,
sum(r7) / count(distinct user_id) as r7_rate
from (select t_create.user_id,
t_create.create_date,
t_login.login_date,
case when datediff(to_date(t_login.login_date), to_date(t_create.create_date)) = 1 then 1 else 0 end as r1,
case when datediff(to_date(t_login.login_date), to_date(t_create.create_date)) = 2 then 1 else 0 end as r2,
case when datediff(to_date(t_login.login_date), to_date(t_create.create_date)) = 3 then 1 else 0 end as r3,
case when datediff(to_date(t_login.login_date), to_date(t_create.create_date)) = 4 then 1 else 0 end as r4,
case when datediff(to_date(t_login.login_date), to_date(t_create.create_date)) = 5 then 1 else 0 end as r5,
case when datediff(to_date(t_login.login_date), to_date(t_create.create_date)) = 6 then 1 else 0 end as r6,
case when datediff(to_date(t_login.login_date), to_date(t_create.create_date)) = 7 then 1 else 0 end as r7
from (select user_id,
create_date
from t_user_create
where create_date <= '2024-01-07') t_create
left join
(select user_id,
login_date
from t_user_login) t_login
on t_create.user_id = t_login.user_id
where to_date(t_login.login_date) > to_date(t_create.create_date)) tt
group by create_date
order by create_date asc
执行结果
+--------------+----------+----------+----------+----------+----------+----------+----------+
| create_date | r1_rate | r2_rate | r3_rate | r4_rate | r5_rate | r6_rate | r7_rate |
+--------------+----------+----------+----------+----------+----------+----------+----------+
| 2024-01-01 | 0.9 | 1.0 | 0.9 | 1.0 | 0.7 | 0.9 | 0.6 |
| 2024-01-02 | 0.9 | 0.9 | 0.7 | 0.7 | 0.9 | 0.8 | 0.8 |
| 2024-01-03 | 1.0 | 0.8 | 0.8 | 0.9 | 0.7 | 0.7 | 0.8 |
| 2024-01-04 | 0.9 | 1.0 | 1.0 | 0.9 | 0.8 | 0.9 | 0.9 |
| 2024-01-05 | 0.9 | 0.9 | 0.8 | 0.9 | 0.8 | 0.6 | 0.7 |
| 2024-01-06 | 1.0 | 0.9 | 0.8 | 0.8 | 0.7 | 0.8 | 0.9 |
| 2024-01-07 | 0.9 | 0.8 | 0.9 | 1.0 | 0.9 | 0.8 | 0.8 |
+--------------+----------+----------+----------+----------+----------+----------+----------+
根据新增用户信息,使用竖表计算出每天对应所有日期的留存率,然后行转列,展示出对应的1~7日留存率。
select create_date,
retention_days,
login_cnt / create_date_cnt as retention_reate
from (select t_create.create_date,
create_date_cnt,
datediff(to_date(t_login.login_date), to_date(t_create.create_date)) as retention_days,
count(1) as login_cnt
from (select user_id,
create_date,
count(user_id) over (partition by create_date) as create_date_cnt
from t_user_create
where create_date <= '2024-01-07') t_create
left join
(select user_id,
login_date
from t_user_login) t_login
on t_create.user_id = t_login.user_id
where to_date(t_login.login_date) > to_date(t_create.create_date)
group by t_create.create_date, create_date_cnt, retention_days) t
where retention_days <= 7
执行结果
+--------------+-----------------+------------------+
| create_date | retention_days | retention_reate |
+--------------+-----------------+------------------+
| 2024-01-04 | 2 | 1.0 |
| 2024-01-04 | 4 | 0.9 |
| 2024-01-02 | 2 | 0.9 |
| 2024-01-04 | 6 | 0.9 |
| 2024-01-04 | 7 | 0.9 |
| 2024-01-05 | 4 | 0.9 |
| 2024-01-06 | 7 | 0.9 |
| 2024-01-02 | 6 | 0.8 |
| 2024-01-02 | 1 | 0.9 |
| 2024-01-03 | 2 | 0.8 |
| 2024-01-06 | 3 | 0.8 |
| 2024-01-05 | 1 | 0.9 |
| 2024-01-02 | 4 | 0.7 |
| 2024-01-02 | 5 | 0.9 |
| 2024-01-03 | 3 | 0.8 |
| 2024-01-04 | 5 | 0.8 |
| 2024-01-03 | 7 | 0.8 |
| 2024-01-07 | 3 | 0.9 |
| 2024-01-01 | 4 | 1.0 |
| 2024-01-06 | 2 | 0.9 |
| 2024-01-06 | 5 | 0.7 |
| 2024-01-05 | 7 | 0.7 |
| 2024-01-05 | 6 | 0.6 |
| 2024-01-03 | 4 | 0.9 |
| 2024-01-02 | 7 | 0.8 |
| 2024-01-01 | 6 | 0.9 |
| 2024-01-07 | 2 | 0.8 |
| 2024-01-03 | 1 | 1.0 |
| 2024-01-01 | 1 | 0.9 |
| 2024-01-05 | 5 | 0.8 |
| 2024-01-07 | 4 | 1.0 |
| 2024-01-06 | 4 | 0.8 |
| 2024-01-05 | 3 | 0.8 |
| 2024-01-07 | 6 | 0.8 |
| 2024-01-01 | 7 | 0.6 |
| 2024-01-03 | 6 | 0.7 |
| 2024-01-07 | 1 | 0.9 |
| 2024-01-07 | 5 | 0.9 |
| 2024-01-02 | 3 | 0.7 |
| 2024-01-06 | 1 | 1.0 |
| 2024-01-06 | 6 | 0.8 |
| 2024-01-07 | 7 | 0.8 |
| 2024-01-01 | 2 | 1.0 |
| 2024-01-01 | 3 | 0.9 |
| 2024-01-03 | 5 | 0.7 |
| 2024-01-04 | 3 | 1.0 |
| 2024-01-01 | 5 | 0.7 |
| 2024-01-04 | 1 | 0.9 |
| 2024-01-05 | 2 | 0.9 |
+--------------+-----------------+------------------+
行转列得到结果
select *
from (select create_date,
retention_days,
login_cnt / create_date_cnt as retention_reate
from (select t_create.create_date,
create_date_cnt,
datediff(to_date(t_login.login_date), to_date(t_create.create_date)) as retention_days,
count(1) as login_cnt
from (select user_id,
create_date,
count(user_id) over (partition by create_date) as create_date_cnt
from t_user_create
where create_date <= '2024-01-07') t_create
left join
(select user_id,
login_date
from t_user_login) t_login
on t_create.user_id = t_login.user_id
where to_date(t_login.login_date) > to_date(t_create.create_date)
group by t_create.create_date, create_date_cnt, retention_days) t
where retention_days <= 7) tt pivot (
sum(retention_reate) as rate
for retention_days in(1 as 1_day,2 as 2_day,3 as 3_day,4 as 4_day,5 as 5_day,6 as 6_day,7 as 7_day)
)
order by 1 asc
执行结果
+--------------+--------+--------+--------+--------+--------+--------+--------+
| create_date | 1_day | 2_day | 3_day | 4_day | 5_day | 6_day | 7_day |
+--------------+--------+--------+--------+--------+--------+--------+--------+
| 2024-01-01 | 0.9 | 1.0 | 0.9 | 1.0 | 0.7 | 0.9 | 0.6 |
| 2024-01-02 | 0.9 | 0.9 | 0.7 | 0.7 | 0.9 | 0.8 | 0.8 |
| 2024-01-03 | 1.0 | 0.8 | 0.8 | 0.9 | 0.7 | 0.7 | 0.8 |
| 2024-01-04 | 0.9 | 1.0 | 1.0 | 0.9 | 0.8 | 0.9 | 0.9 |
| 2024-01-05 | 0.9 | 0.9 | 0.8 | 0.9 | 0.8 | 0.6 | 0.7 |
| 2024-01-06 | 1.0 | 0.9 | 0.8 | 0.8 | 0.7 | 0.8 | 0.9 |
| 2024-01-07 | 0.9 | 0.8 | 0.9 | 1.0 | 0.9 | 0.8 | 0.8 |
+--------------+--------+--------+--------+--------+--------+--------+--------+