Sessionization — Reconstructing User Sessions from Events

Sessionization — Reconstructing User Sessions from Events

Raw clickstream events are individual rows. A session is a group of events from the same user with no gap longer than 30 minutes. Sessionization reconstructs these groups purely in SQL using window functions.

-- Assign session boundaries based on 30-minute idle timeout
WITH event_gaps AS (
    SELECT
        user_id,
        session_id,
        event_type,
        page,
        created_at,
        LAG(created_at) OVER (
            PARTITION BY user_id ORDER BY created_at
        )                                               AS prev_event_at,
        EXTRACT(EPOCH FROM (
            created_at
            - LAG(created_at) OVER (
                PARTITION BY user_id ORDER BY created_at
              )
        )) / 60                                         AS idle_minutes
    FROM ts_web_events
),
session_starts AS (
    SELECT *,
        SUM(CASE WHEN idle_minutes IS NULL OR idle_minutes > 30 THEN 1 ELSE 0 END)
            OVER (PARTITION BY user_id ORDER BY created_at)
                                                        AS synthetic_session_id
    FROM event_gaps
),
-- Add exit page before GROUP BY (window functions cannot be used inside FILTER)
with_exit AS (
    SELECT *,
        LAST_VALUE(page) OVER (
            PARTITION BY user_id, synthetic_session_id
            ORDER BY created_at
            ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
        )                                               AS exit_page
    FROM session_starts
)
SELECT
    user_id,
    synthetic_session_id,
    COUNT(*)                                            AS event_count,
    MIN(created_at)                                    AS session_start,
    MAX(created_at)                                    AS session_end,
    ROUND(EXTRACT(EPOCH FROM (MAX(created_at) - MIN(created_at))) / 60, 1)
                                                        AS duration_minutes,
    COUNT(*) FILTER (WHERE event_type = 'page_view')    AS page_views,
    COUNT(*) FILTER (WHERE event_type = 'purchase')     AS purchases,
    bool_or(event_type = 'purchase')                    AS converted,
    MIN(page)                                           AS landing_page,
    MAX(exit_page)                                      AS exit_page
FROM with_exit
GROUP BY user_id, synthetic_session_id
HAVING COUNT(*) > 1
ORDER BY duration_minutes DESC
LIMIT 20;

Purchase this course to unlock the full lesson.

Sign up