Gaps and Islands — Detecting Continuous Runs & Outages

Gaps and Islands — Detecting Continuous Runs & Outages

The gaps-and-islands problem: given a sequence of timestamped events, find contiguous runs (islands) and the breaks between them (gaps). Classic use cases: uptime/downtime periods, consecutive days of activity, unbroken trading sessions.

-- Find continuous high-CPU periods per host (cpu > 70% for 10+ consecutive minutes)
WITH flagged AS (
    SELECT
        host,
        recorded_at,
        cpu_pct,
        CASE WHEN cpu_pct > 70 THEN 1 ELSE 0 END        AS is_high
    FROM ts_server_metrics
    WHERE recorded_at >= NOW() - INTERVAL '7 days'
),
-- Double ROW_NUMBER trick to identify islands
island_groups AS (
    SELECT
        host,
        recorded_at,
        cpu_pct,
        is_high,
        ROW_NUMBER() OVER (PARTITION BY host ORDER BY recorded_at)
        - ROW_NUMBER() OVER (PARTITION BY host, is_high ORDER BY recorded_at)
                                                         AS grp
    FROM flagged
)
SELECT
    host,
    is_high,
    MIN(recorded_at)                                     AS period_start,
    MAX(recorded_at)                                     AS period_end,
    COUNT(*)                                             AS minutes_duration,
    ROUND(AVG(cpu_pct)::numeric, 1)                      AS avg_cpu_pct,
    ROUND(MAX(cpu_pct)::numeric, 1)                      AS peak_cpu_pct
FROM island_groups
WHERE is_high = 1
GROUP BY host, is_high, grp
HAVING COUNT(*) >= 10
ORDER BY minutes_duration DESC
LIMIT 20;

Purchase this course to unlock the full lesson.

Sign up