Date Spine & Gap Filling — Never Miss a Missing Row

Date Spine & Gap Filling — Never Miss a Missing Row

Time series data often has gaps — a server went offline, a sensor missed a reading, no orders on a Sunday. Naive aggregation silently skips those gaps. A date spine generates every expected interval so that gaps appear as explicit NULLs or zeros.

-- Generate a complete hourly spine for the last 7 days
WITH spine AS (
    SELECT generate_series(
        DATE_TRUNC('hour', NOW() - INTERVAL '7 days'),
        DATE_TRUNC('hour', NOW()),
        INTERVAL '1 hour'
    ) AS hour_bucket
),
-- Actual hourly averages per host
actuals AS (
    SELECT
        host,
        DATE_TRUNC('hour', recorded_at) AS hour_bucket,
        ROUND(AVG(cpu_pct)::numeric, 2) AS avg_cpu,
        COUNT(*)                        AS reading_count
    FROM ts_server_metrics
    GROUP BY host, DATE_TRUNC('hour', recorded_at)
),
-- Cross-join spine × hosts, then left-join actuals
hosts AS (
    SELECT DISTINCT host FROM ts_server_metrics
)
SELECT
    h.host,
    s.hour_bucket,
    a.avg_cpu,
    a.reading_count,
    -- Forward-fill: carry last known value over gaps
    LAST_VALUE(a.avg_cpu) OVER (
        PARTITION BY h.host
        ORDER BY s.hour_bucket
        ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
    )                                   AS cpu_forward_filled
FROM spine s
CROSS JOIN hosts h
LEFT JOIN actuals a
    ON a.host = h.host AND a.hour_bucket = s.hour_bucket
ORDER BY h.host, s.hour_bucket
LIMIT 50;

Purchase this course to unlock the full lesson.

Sign up