电商业务,一个订单表 orders
和一个用户表 users
。现需要分析用户的行为路径和转化率。
表结构:
users
):
user_id
(用户ID, 主键)register_date
(注册日期)city
(所在城市)orders
):
order_id
(订单ID, 主键)user_id
(用户ID, 外键)order_date
(订单日期)amount
(订单金额)具体需求如下:
WITH user_orders_2023 AS (
-- 获取2023年内的订单数据,并标记首次、第二次、最后一次购买日期
SELECT
u.user_id,
u.city,
u.register_date,
o.order_date,
o.amount,
MIN(o.order_date) OVER (PARTITION BY o.user_id) AS first_order_date,
LEAD(o.order_date, 1) OVER (PARTITION BY o.user_id ORDER BY o.order_date) AS second_order_date,
MAX(o.order_date) OVER (PARTITION BY o.user_id) AS last_order_date,
COUNT(o.order_id) OVER (PARTITION BY o.user_id) AS order_count,
SUM(o.amount) OVER (PARTITION BY o.user_id) AS total_amount
FROM
users u
JOIN
orders o ON u.user_id = o.user_id
WHERE
o.order_date BETWEEN '2023-01-01' AND '2023-12-31'
),
user_behavior_path AS (
-- 统计用户行为路径
SELECT
user_id,
city,
first_order_date,
second_order_date,
last_order_date,
order_count,
total_amount
FROM
user_orders_2023
GROUP BY
user_id, city, first_order_date, second_order_date, last_order_date, order_count, total_amount
),
conversion_analysis AS (
-- 计算转化率
SELECT
city,
COUNT(DISTINCT CASE WHEN first_order_date IS NOT NULL THEN user_id END) * 1.0 / COUNT(DISTINCT user_id) AS registration_to_first_purchase,
COUNT(DISTINCT CASE WHEN second_order_date IS NOT NULL THEN user_id END) * 1.0 / COUNT(DISTINCT CASE WHEN first_order_date IS NOT NULL THEN user_id END) AS first_to_second_purchase,
COUNT(DISTINCT CASE WHEN total_amount >= 10000 THEN user_id END) * 1.0 / COUNT(DISTINCT CASE WHEN order_count >= 2 THEN user_id END) AS repurchase_to_high_value
FROM
user_orders_2023
GROUP BY
city
),
retention_analysis AS (
-- 计算用户留存率
SELECT
city,
COUNT(DISTINCT CASE WHEN DATEDIFF(order_date, first_order_date) >= 30 THEN user_id END) * 1.0 / COUNT(DISTINCT user_id) AS retention_30d,
COUNT(DISTINCT CASE WHEN DATEDIFF(order_date, first_order_date) >= 60 THEN user_id END) * 1.0 / COUNT(DISTINCT user_id) AS retention_60d,
COUNT(DISTINCT CASE WHEN DATEDIFF(order_date, first_order_date) >= 90 THEN user_id END) * 1.0 / COUNT(DISTINCT user_id) AS retention_90d
FROM
user_orders_2023
GROUP BY
city
)
-- 综合输出:按城市输出用户行为路径、转化率和留存率
SELECT
ubp.city,
AVG(DATEDIFF(second_order_date, first_order_date)) AS avg_days_first_to_second,
AVG(DATEDIFF(last_order_date, first_order_date)) AS avg_days_first_to_last,
AVG(order_count) AS avg_order_count,
AVG(total_amount) AS avg_total_amount,
ca.registration_to_first_purchase,
ca.first_to_second_purchase,
ca.repurchase_to_high_value,
ra.retention_30d,
ra.retention_60d,
ra.retention_90d
FROM
user_behavior_path ubp
JOIN
conversion_analysis ca ON ubp.city = ca.city
JOIN
retention_analysis ra ON ubp.city = ra.city
GROUP BY
ubp.city,
ca.registration_to_first_purchase,
ca.first_to_second_purchase,
ca.repurchase_to_high_value,
ra.retention_30d,
ra.retention_60d,
ra.retention_90d;
city | avg_days_first_to_second | avg_days_first_to_last | avg_order_count | avg_total_amount | registration_to_first_purchase | first_to_second_purchase | repurchase_to_high_value | retention_30d | retention_60d | retention_90d |
---|---|---|---|---|---|---|---|---|---|---|
北京 | 45.67 | 180.50 | 4.2 | 8500 | 0.80 | 0.60 | 0.25 | 0.50 | 0.40 | 0.30 |
上海 | 50.00 | 200.00 | 3.8 | 7800 | 0.75 | 0.55 | 0.20 | 0.45 | 0.35 | 0.25 |
MIN
、LEAD
、MAX
)标记用户的关键行为节点。DATEDIFF
和条件聚合计算用户留存率。WITH
子句分步骤处理数据,提升可读性和性能。