-- 查询连续3天登录的用户(方案1)
SELECT DISTINCT a.user_id
FROM login_log a
JOIN login_log b ON a.user_id = b.user_id
AND b.login_date = DATE_ADD(a.login_date, INTERVAL 1 DAY)
JOIN login_log c ON a.user_id = c.user_id
AND c.login_date = DATE_ADD(a.login_date, INTERVAL 2 DAY);
-- 方案2:使用窗口函数(推荐)
SELECT DISTINCT user_id
FROM (
SELECT
user_id,
login_date,
DATE_SUB(login_date, INTERVAL ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY login_date) DAY) as diff
FROM (
SELECT DISTINCT user_id, login_date
FROM login_log
) t
) t2
GROUP BY user_id, diff
HAVING COUNT(*) >= 3;
-- 方案1:使用ROW_NUMBER计算连续登录
WITH login_days AS (
SELECT
user_id,
login_date,
ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY login_date) as rn
FROM (
SELECT DISTINCT user_id, DATE(login_time) as login_date
FROM login_log
) t
)
SELECT DISTINCT user_id
FROM (
SELECT
user_id,
DATE_SUB(login_date, INTERVAL rn DAY) as group_date
FROM login_days
) t
GROUP BY user_id, group_date
HAVING COUNT(*) >= 3;
-- 方案2:使用LAG函数
WITH consecutive_check AS (
SELECT
user_id,
login_date,
LAG(login_date, 2) OVER (PARTITION BY user_id ORDER BY login_date) as prev_date2
FROM (
SELECT DISTINCT user_id, DATE(login_time) as login_date
FROM login_log
) t
)
SELECT DISTINCT user_id
FROM consecutive_check
WHERE DATEDIFF(login_date, prev_date2) = 2;
-- 使用用户变量追踪连续天数
SELECT DISTINCT user_id
FROM (
SELECT
user_id,
login_date,
@seq := IF(@prev_user = user_id AND DATEDIFF(login_date, @prev_date) = 1, @seq + 1, 1) as seq,
@max_seq := IF(@seq > @max_seq, @seq, @max_seq) as max_seq,
@prev_user := user_id,
@prev_date := login_date
FROM (
SELECT DISTINCT user_id, DATE(login_time) as login_date
FROM login_log
ORDER BY user_id, login_date
) t
CROSS JOIN (SELECT @prev_user := NULL, @prev_date := NULL, @seq := 0, @max_seq := 0) vars
) t2
WHERE max_seq >= 3;
-- 查询连续N天登录的用户(通用版本)
WITH daily_login AS (
-- 去重,确保每天只算一次登录
SELECT DISTINCT
user_id,
DATE(login_time) as login_date
FROM login_log
),
login_groups AS (
-- 计算连续登录分组
SELECT
user_id,
login_date,
DATE_SUB(login_date,
INTERVAL ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY login_date) DAY
) as group_id
FROM daily_login
),
consecutive_counts AS (
-- 统计每组连续登录天数
SELECT
user_id,
group_id,
MIN(login_date) as start_date,
MAX(login_date) as end_date,
COUNT(*) as consecutive_days,
GROUP_CONCAT(login_date ORDER BY login_date) as login_dates
FROM login_groups
GROUP BY user_id, group_id
)
-- 查询连续N天登录的用户
SELECT DISTINCT user_id
FROM consecutive_counts
WHERE consecutive_days >= 3 -- N = 3
ORDER BY user_id;
-- 如果需要查看更多信息
SELECT *
FROM consecutive_counts
WHERE consecutive_days >= 3
ORDER BY user_id, start_date;
-- 查找每个用户的最长连续登录天数
WITH daily_login AS (
SELECT DISTINCT user_id, DATE(login_time) as login_date
FROM login_log
),
login_groups AS (
SELECT
user_id,
login_date,
DATE_SUB(login_date,
INTERVAL ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY login_date) DAY
) as group_id
FROM daily_login
),
consecutive_counts AS (
SELECT
user_id,
COUNT(*) as consecutive_days,
MIN(login_date) as start_date,
MAX(login_date) as end_date
FROM login_groups
GROUP BY user_id, group_id
)
SELECT
user_id,
MAX(consecutive_days) as max_consecutive_days
FROM consecutive_counts
GROUP BY user_id
ORDER BY max_consecutive_days DESC;
-- 允许中断1天的连续登录(例如:周一到周三,跳过周二也算连续)
WITH daily_login AS (
SELECT DISTINCT user_id, DATE(login_time) as login_date
FROM login_log
),
login_sequence AS (
SELECT
user_id,
login_date,
LAG(login_date) OVER (PARTITION BY user_id ORDER BY login_date) as prev_date
FROM daily_login
),
login_gaps AS (
SELECT
user_id,
login_date,
CASE
WHEN prev_date IS NULL THEN 1
WHEN DATEDIFF(login_date, prev_date) <= 2 THEN 0 -- 允许中断1天
ELSE 1
END as new_group
FROM login_sequence
),
login_groups AS (
SELECT
user_id,
login_date,
SUM(new_group) OVER (PARTITION BY user_id ORDER BY login_date) as group_id
FROM login_gaps
)
SELECT DISTINCT user_id
FROM (
SELECT
user_id,
group_id,
COUNT(*) as consecutive_days
FROM login_groups
GROUP BY user_id, group_id
HAVING COUNT(*) >= 3
) t;
建立索引:
CREATE INDEX idx_user_login ON login_log(user_id, login_time);
CREATE INDEX idx_date ON login_log(login_time);
预先去重:在子查询中使用DISTINCT减少处理数据量
分区表:如果数据量大,考虑按日期分区
物化视图:对于频繁查询,可以创建物化视图存储每日活跃用户
推荐使用窗口函数方案,它在现代数据库系统中性能最好,代码也最简洁。