Step-by-Step Aggregation with CTEs

Step-by-Step Aggregation with CTEs

Complex aggregation problems often feel impossible to solve in a single query. CTEs let you break them into clear, verifiable steps — each one building on the last.

The Problem with Nested Aggregations

Suppose you want to find ec_customers whose average order value is above the overall average order value. You need two aggregation levels:

  1. Average order value per customer
  2. Average of those averages (or the global average)

In raw SQL this becomes a mess of subqueries. With CTEs, each step is explicit.


Example 1: Customers Above the Average Order Value

WITH order_totals AS (
    -- Step 1: total per order
    SELECT
        o.order_id,
        o.customer_id,
        SUM(oi.quantity * oi.unit_price) AS order_total
    FROM ec_orders o
    JOIN ec_order_items oi ON oi.order_id = o.order_id
    GROUP BY o.order_id, o.customer_id
),

customer_avg AS (
    -- Step 2: average order value per customer
    SELECT
        customer_id,
        AVG(order_total)   AS avg_order_value,
        COUNT(*)           AS total_orders,
        SUM(order_total)   AS lifetime_value
    FROM order_totals
    GROUP BY customer_id
),

global_avg AS (
    -- Step 3: global average across all ec_orders
    SELECT AVG(order_total) AS global_avg_order
    FROM order_totals
)

-- Step 4: filter ec_customers above the global average
SELECT
    c.name AS customer,
    ca.avg_order_value,
    ca.total_orders,
    ca.lifetime_value,
    ROUND(ca.avg_order_value - ga.global_avg_order, 2) AS above_avg_by
FROM customer_avg ca
JOIN ec_customers c ON c.customer_id = ca.customer_id
CROSS JOIN global_avg ga
WHERE ca.avg_order_value > ga.global_avg_order
ORDER BY ca.avg_order_value DESC;

Key technique: CROSS JOIN global_avg — because global_avg returns a single row, this effectively attaches the global figure to every customer row without changing the row count.


Example 2: Product Category Performance vs Category Average

Which ec_products are outperforming their own category's average revenue?

WITH product_revenue AS (
    -- revenue per product
    SELECT
        p.product_id,
        p.name,
        p.category,
        SUM(oi.quantity * oi.unit_price) AS revenue
    FROM ec_products p
    JOIN ec_order_items oi ON oi.product_id = p.product_id
    GROUP BY p.product_id, p.name, p.category
),

category_avg AS (
    -- average revenue within each category
    SELECT
        category,
        AVG(revenue) AS avg_category_revenue,
        COUNT(*)     AS products_in_category
    FROM product_revenue
    GROUP BY category
)

SELECT
    pr.name,
    pr.category,
    pr.revenue,
    ROUND(ca.avg_category_revenue, 2) AS category_avg,
    ROUND(pr.revenue - ca.avg_category_revenue, 2) AS difference,
    ROUND(100.0 * (pr.revenue - ca.avg_category_revenue) / ca.avg_category_revenue, 1) AS pct_above_avg
FROM product_revenue pr
JOIN category_avg ca ON ca.category = pr.category
WHERE pr.revenue > ca.avg_category_revenue
ORDER BY pct_above_avg DESC;

Example 3: Multi-Level Sales Funnel from Events

Using the ec_events table — how many users hit each stage, and what's the drop-off at each step?

WITH stage_users AS (
    -- count distinct users per event type
    SELECT
        event_type,
        COUNT(DISTINCT customer_id) AS users
    FROM ec_events
    GROUP BY event_type
),

funnel AS (
    -- define the funnel order explicitly
    SELECT
        event_type,
        users,
        CASE event_type
            WHEN 'page_view'    THEN 1
            WHEN 'add_to_cart'  THEN 2
            WHEN 'purchase'     THEN 3
        END AS funnel_step
    FROM stage_users
    WHERE event_type IN ('page_view', 'add_to_cart', 'purchase')
),

with_lag AS (
    SELECT
        funnel_step,
        event_type,
        users,
        LAG(users) OVER (ORDER BY funnel_step) AS prev_users
    FROM funnel
)

SELECT
    funnel_step,
    event_type,
    users,
    prev_users,
    CASE
        WHEN prev_users IS NULL THEN NULL
        ELSE ROUND(100.0 * users / prev_users, 1)
    END AS retention_pct
FROM with_lag
ORDER BY funnel_step;

This combines CTEs with a window function (LAG) for clean funnel analysis.


Example 4: Stock Portfolio Performance (StockMarket dataset)

Find which stocks in each sector are beating the sector's average 30-day return.

WITH recent_prices AS (
    -- latest price per stock
    SELECT DISTINCT ON (company_id)
        company_id,
        close_price AS latest_price,
        price_date  AS latest_date
    FROM stock_prices
    ORDER BY company_id, price_date DESC
),

prices_30d_ago AS (
    -- price closest to 30 days ago
    SELECT DISTINCT ON (company_id)
        company_id,
        close_price AS price_30d,
        price_date
    FROM stock_prices
    WHERE price_date <= CURRENT_DATE - INTERVAL '30 days'
    ORDER BY company_id, price_date DESC
),

stock_returns AS (
    SELECT
        c.company_id,
        c.ticker,
        c.name,
        c.sector,
        ROUND(100.0 * (rp.latest_price - p30.price_30d) / p30.price_30d, 2) AS return_30d_pct
    FROM companies c
    JOIN recent_prices  rp  ON rp.company_id  = c.company_id
    JOIN prices_30d_ago p30 ON p30.company_id = c.company_id
),

sector_avg AS (
    SELECT
        sector,
        AVG(return_30d_pct) AS avg_sector_return
    FROM stock_returns
    GROUP BY sector
)

SELECT
    sr.ticker,
    sr.name,
    sr.sector,
    sr.return_30d_pct,
    ROUND(sa.avg_sector_return, 2) AS sector_avg,
    ROUND(sr.return_30d_pct - sa.avg_sector_return, 2) AS alpha
FROM stock_returns sr
JOIN sector_avg sa ON sa.sector = sr.sector
WHERE sr.return_30d_pct > sa.avg_sector_return
ORDER BY alpha DESC;

Purchase this course to unlock the full lesson.

Sign up