Gap Filling and Forward-Fill with CTEs

Gap Filling and Forward-Fill with CTEs

Real-world time-series data has holes. Stock markets close on weekends. Weather stations go offline. Customers don't order every day. When you visualize or analyze this data, gaps either cause incorrect aggregations or invisible missing points. Gap filling solves this.

The Two Strategies

Zero-fill: Replace missing periods with zero (ec_orders = 0, revenue = 0) Forward-fill (LOCF): Carry the last known value forward (last recorded stock price, last sensor reading)

Zero-fill is right for event counts. Forward-fill is right for slowly-changing values like prices or measurements.


Example 1: Zero-Fill Daily Revenue

Generate a complete date spine then LEFT JOIN to fill gaps with zero:

WITH date_spine AS (
    -- Generate every day in 2024
    SELECT generate_series(
        DATE '2024-01-01',
        DATE '2024-12-31',
        INTERVAL '1 day'
    )::DATE AS day
),

daily_revenue AS (
    SELECT
        DATE_TRUNC('day', o.created_at)::DATE AS order_day,
        COUNT(DISTINCT o.order_id)             AS ec_orders,
        SUM(oi.quantity * oi.unit_price)       AS revenue
    FROM ec_orders o
    JOIN ec_order_items oi ON oi.order_id = o.order_id
    WHERE o.created_at >= '2024-01-01'
      AND o.created_at <  '2025-01-01'
    GROUP BY 1
)

SELECT
    ds.day,
    COALESCE(dr.ec_orders,  0) AS ec_orders,
    COALESCE(dr.revenue, 0) AS revenue
FROM date_spine ds
LEFT JOIN daily_revenue dr ON dr.order_day = ds.day
ORDER BY ds.day;

Why generate_series instead of recursive CTE here: PostgreSQL's built-in generate_series() is simpler and faster for date ranges. The recursive CTE pattern is useful in databases that don't have it.


Example 2: Forward-Fill Stock Prices (Carry Last Observation)

Stock prices aren't recorded on weekends. We want to fill in Saturday/Sunday with Friday's closing price:

WITH date_spine AS (
    SELECT generate_series(
        (SELECT MIN(price_date) FROM stock_prices WHERE company_id = 1),
        (SELECT MAX(price_date) FROM stock_prices WHERE company_id = 1),
        INTERVAL '1 day'
    )::DATE AS day
),

raw_prices AS (
    SELECT price_date, close_price
    FROM stock_prices
    WHERE company_id = 1
),

spine_with_prices AS (
    SELECT
        ds.day,
        rp.close_price AS actual_price   -- NULL on weekends/gaps
    FROM date_spine ds
    LEFT JOIN raw_prices rp ON rp.price_date = ds.day
),

forward_filled AS (
    SELECT
        day,
        actual_price,
        -- Forward fill: take the last non-null value within the partition
        LAST_VALUE(actual_price) OVER (
            ORDER BY day
            ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
        ) AS filled_price
    FROM spine_with_prices
)

SELECT
    day,
    filled_price,
    CASE WHEN actual_price IS NULL THEN 'filled' ELSE 'actual' END AS source
FROM forward_filled
ORDER BY day;

Note: LAST_VALUE ... IGNORE NULLS is the PostgreSQL 16 syntax. Equivalent older approach uses MAX(actual_price) OVER (ORDER BY day ROWS UNBOUNDED PRECEDING) since MAX ignores NULLs.


Example 3: Monthly Sales — Filling Sparse Data

Some ec_products sell rarely. Show every month even when a product had zero sales:

WITH month_spine AS (
    SELECT
        generate_series(
            DATE_TRUNC('month', MIN(o.created_at)),
            DATE_TRUNC('month', MAX(o.created_at)),
            INTERVAL '1 month'
        )::DATE AS month
    FROM ec_orders o
),

all_products AS (
    SELECT product_id, name, category FROM ec_products
),

product_months AS (
    -- Cross join to get every product × every month combination
    SELECT
        ap.product_id,
        ap.name,
        ap.category,
        ms.month
    FROM all_products ap
    CROSS JOIN month_spine ms
),

actual_sales AS (
    SELECT
        oi.product_id,
        DATE_TRUNC('month', o.created_at)::DATE AS month,
        SUM(oi.quantity * oi.unit_price)         AS revenue,
        SUM(oi.quantity)                          AS units_sold
    FROM ec_order_items oi
    JOIN ec_orders o ON o.order_id = oi.order_id
    GROUP BY oi.product_id, 2
)

SELECT
    pm.month,
    pm.name,
    pm.category,
    COALESCE(s.revenue, 0)     AS revenue,
    COALESCE(s.units_sold, 0)  AS units_sold
FROM product_months pm
LEFT JOIN actual_sales s
    ON s.product_id = pm.product_id
    AND s.month = pm.month
ORDER BY pm.name, pm.month;

Example 4: Forward Fill Across Multiple Companies

Forward-fill closing prices for all companies at once, filling weekend and holiday gaps in one query using PARTITION BY:

WITH date_spine AS (
    -- One row per company per calendar day in the full date range
    SELECT
        c.company_id,
        generate_series(
            (SELECT MIN(price_date) FROM stock_prices),
            (SELECT MAX(price_date) FROM stock_prices),
            INTERVAL '1 day'
        )::DATE AS day
    FROM companies c
),

spine_with_prices AS (
    SELECT
        ds.company_id,
        ds.day,
        sp.close_price   -- NULL on weekends / missing trading days
    FROM date_spine ds
    LEFT JOIN stock_prices sp
        ON sp.company_id = ds.company_id
        AND sp.price_date = ds.day
),

forward_filled AS (
    SELECT
        company_id,
        day,
        close_price AS actual_close,
        -- Forward fill per company: carry last non-null price forward
        MAX(close_price) OVER (
            PARTITION BY company_id
            ORDER BY day
            ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
        ) AS close_filled
    FROM spine_with_prices
)

SELECT
    c.ticker,
    ff.day,
    ff.close_filled                                               AS close_price,
    CASE WHEN ff.actual_close IS NULL THEN 'forward-filled'
         ELSE 'actual' END                                        AS source
FROM forward_filled ff
JOIN companies c ON c.company_id = ff.company_id
ORDER BY c.ticker, ff.day;

Purchase this course to unlock the full lesson.

Sign up