CTEs Combined with Window Functions

CTEs Combined with Window Functions

Window functions and CTEs are both powerful individually. Together, they unlock a completely different tier of analysis — multi-stage computations where each stage builds on the window-function results of the previous one. This is where SQL truly rivals Python/pandas for complex data transformations.

Why Combine Them?

The problem: You can't directly filter on a window function result.

-- ❌ This does NOT work:
SELECT *, ROW_NUMBER() OVER (PARTITION BY category ORDER BY price DESC) AS rn
FROM ec_products
WHERE rn = 1;   -- ERROR: column "rn" doesn't exist at this stage

The solution: Wrap in a CTE (or subquery):

WITH ranked AS (
    SELECT *, ROW_NUMBER() OVER (PARTITION BY category ORDER BY price DESC) AS rn
    FROM ec_products
)
SELECT * FROM ranked WHERE rn = 1;

This is the most common CTE + window function combo. But it goes much deeper.


Example 1: Percentile Bands + Filtering

Classify ec_customers into percentile bands, then analyze each band:

WITH customer_revenue AS (
    SELECT
        o.customer_id,
        SUM(oi.quantity * oi.unit_price) AS total_revenue
    FROM ec_orders o
    JOIN ec_order_items oi ON oi.order_id = o.order_id
    GROUP BY o.customer_id
),

with_percentiles AS (
    SELECT
        customer_id,
        total_revenue,
        NTILE(100) OVER (ORDER BY total_revenue DESC) AS percentile_rank,
        PERCENT_RANK() OVER (ORDER BY total_revenue)  AS pct_from_bottom
    FROM customer_revenue
),

segments AS (
    SELECT
        customer_id,
        total_revenue,
        percentile_rank,
        CASE
            WHEN percentile_rank <= 1  THEN 'Top 1%'
            WHEN percentile_rank <= 10 THEN 'Top 10%'
            WHEN percentile_rank <= 25 THEN 'Top 25%'
            WHEN percentile_rank <= 50 THEN 'Top 50%'
            ELSE 'Bottom 50%'
        END AS segment
    FROM with_percentiles
)

SELECT
    segment,
    COUNT(*) AS ec_customers,
    ROUND((AVG(total_revenue))::NUMERIC, 2) AS avg_revenue,
    ROUND((SUM(total_revenue))::NUMERIC, 2) AS total_revenue,
    ROUND((100.0 * SUM(total_revenue) / SUM(SUM(total_revenue)) OVER ())::NUMERIC, 1) AS revenue_share_pct
FROM segments
GROUP BY segment
ORDER BY MIN(percentile_rank);

Example 2: Moving Average + Trend Detection

Detect whether each day's revenue is above or below the trailing 7-day average:

WITH daily_revenue AS (
    SELECT
        DATE_TRUNC('day', o.created_at)::DATE AS day,
        SUM(oi.quantity * oi.unit_price)       AS revenue
    FROM ec_orders o
    JOIN ec_order_items oi ON oi.order_id = o.order_id
    GROUP BY 1
),

with_moving_avg AS (
    SELECT
        day,
        revenue,
        AVG(revenue) OVER (
            ORDER BY day
            ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
        ) AS moving_avg_7d,
        COUNT(*) OVER (
            ORDER BY day
            ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
        ) AS days_in_window
    FROM daily_revenue
),

trend AS (
    SELECT
        day,
        revenue,
        ROUND(moving_avg_7d::NUMERIC, 2) AS moving_avg_7d,
        days_in_window,
        CASE
            WHEN days_in_window < 7 THEN 'insufficient data'
            WHEN revenue > moving_avg_7d * 1.1 THEN 'above trend (+10%)'
            WHEN revenue < moving_avg_7d * 0.9 THEN 'below trend (-10%)'
            ELSE 'on trend'
        END AS trend_signal
    FROM with_moving_avg
)

SELECT * FROM trend
WHERE trend_signal IN ('above trend (+10%)', 'below trend (-10%)')
ORDER BY day;

Example 3: YoY Change + Rank in Same Query

For each product, compute year-over-year revenue change and rank ec_products within their category:

WITH annual_revenue AS (
    SELECT
        p.product_id,
        p.name,
        p.category,
        EXTRACT(YEAR FROM o.created_at)::INT AS year,
        SUM(oi.quantity * oi.unit_price)     AS revenue
    FROM ec_orders o
    JOIN ec_order_items oi ON oi.order_id = o.order_id
    JOIN ec_products p     ON p.product_id = oi.product_id
    GROUP BY p.product_id, p.name, p.category, year
),

with_yoy AS (
    SELECT
        *,
        LAG(revenue) OVER (PARTITION BY product_id ORDER BY year) AS prev_year_revenue,
        ROUND((100.0 * (revenue - LAG(revenue) OVER (PARTITION BY product_id ORDER BY year))
            / NULLIF(LAG(revenue) OVER (PARTITION BY product_id ORDER BY year), 0))::NUMERIC, 1) AS yoy_change_pct
    FROM annual_revenue
),

with_category_rank AS (
    SELECT
        *,
        RANK() OVER (PARTITION BY category, year ORDER BY revenue DESC) AS category_rank,
        RANK() OVER (PARTITION BY year ORDER BY revenue DESC)           AS overall_rank
    FROM with_yoy
)

SELECT
    year,
    name,
    category,
    ROUND((revenue)::NUMERIC, 2)       AS revenue,
    yoy_change_pct,
    category_rank,
    overall_rank
FROM with_category_rank
WHERE year = 2024   -- or EXTRACT(YEAR FROM CURRENT_DATE)
ORDER BY category, category_rank;

Example 4: Stock Portfolio Analysis — Running Sharpe Ratio

Compute the rolling Sharpe ratio (return-to-volatility) for each stock:

WITH daily_returns AS (
    SELECT
        company_id,
        price_date,
        close_price,
        ROUND((100.0 * (close_price - LAG(close_price) OVER (PARTITION BY company_id ORDER BY price_date))
            / NULLIF(LAG(close_price) OVER (PARTITION BY company_id ORDER BY price_date), 0))::NUMERIC, 4) AS daily_return_pct
    FROM stock_prices
),

rolling_stats AS (
    SELECT
        company_id,
        price_date,
        daily_return_pct,
        AVG(daily_return_pct) OVER (
            PARTITION BY company_id
            ORDER BY price_date
            ROWS BETWEEN 29 PRECEDING AND CURRENT ROW
        ) AS rolling_30d_avg,
        STDDEV(daily_return_pct) OVER (
            PARTITION BY company_id
            ORDER BY price_date
            ROWS BETWEEN 29 PRECEDING AND CURRENT ROW
        ) AS rolling_30d_stddev,
        COUNT(*) OVER (
            PARTITION BY company_id
            ORDER BY price_date
            ROWS BETWEEN 29 PRECEDING AND CURRENT ROW
        ) AS days_in_window
    FROM daily_returns
    WHERE daily_return_pct IS NOT NULL
),

sharpe AS (
    SELECT
        rs.company_id,
        c.ticker,
        rs.price_date,
        rs.rolling_30d_avg,
        rs.rolling_30d_stddev,
        CASE
            WHEN rs.days_in_window >= 30 AND rs.rolling_30d_stddev > 0
            THEN ROUND((rs.rolling_30d_avg / rs.rolling_30d_stddev * SQRT(252))::NUMERIC, 3)
        END AS annualized_sharpe
    FROM rolling_stats rs
    JOIN companies c ON c.company_id = rs.company_id
)

SELECT DISTINCT ON (company_id)
    ticker,
    price_date,
    ROUND(rolling_30d_avg::NUMERIC, 4)    AS avg_daily_return,
    ROUND(rolling_30d_stddev::NUMERIC, 4) AS volatility,
    annualized_sharpe
FROM sharpe
WHERE annualized_sharpe IS NOT NULL
ORDER BY company_id, price_date DESC;

Purchase this course to unlock the full lesson.

Sign up