CTEs Combined with Window Functions
CTEs Combined with Window Functions
Window functions and CTEs are both powerful individually. Together, they unlock a completely different tier of analysis — multi-stage computations where each stage builds on the window-function results of the previous one. This is where SQL truly rivals Python/pandas for complex data transformations.
Why Combine Them?
The problem: You can't directly filter on a window function result.
-- ❌ This does NOT work:
SELECT *, ROW_NUMBER() OVER (PARTITION BY category ORDER BY price DESC) AS rn
FROM ec_products
WHERE rn = 1; -- ERROR: column "rn" doesn't exist at this stage
The solution: Wrap in a CTE (or subquery):
WITH ranked AS (
SELECT *, ROW_NUMBER() OVER (PARTITION BY category ORDER BY price DESC) AS rn
FROM ec_products
)
SELECT * FROM ranked WHERE rn = 1;
This is the most common CTE + window function combo. But it goes much deeper.
Example 1: Percentile Bands + Filtering
Classify ec_customers into percentile bands, then analyze each band:
WITH customer_revenue AS (
SELECT
o.customer_id,
SUM(oi.quantity * oi.unit_price) AS total_revenue
FROM ec_orders o
JOIN ec_order_items oi ON oi.order_id = o.order_id
GROUP BY o.customer_id
),
with_percentiles AS (
SELECT
customer_id,
total_revenue,
NTILE(100) OVER (ORDER BY total_revenue DESC) AS percentile_rank,
PERCENT_RANK() OVER (ORDER BY total_revenue) AS pct_from_bottom
FROM customer_revenue
),
segments AS (
SELECT
customer_id,
total_revenue,
percentile_rank,
CASE
WHEN percentile_rank <= 1 THEN 'Top 1%'
WHEN percentile_rank <= 10 THEN 'Top 10%'
WHEN percentile_rank <= 25 THEN 'Top 25%'
WHEN percentile_rank <= 50 THEN 'Top 50%'
ELSE 'Bottom 50%'
END AS segment
FROM with_percentiles
)
SELECT
segment,
COUNT(*) AS ec_customers,
ROUND((AVG(total_revenue))::NUMERIC, 2) AS avg_revenue,
ROUND((SUM(total_revenue))::NUMERIC, 2) AS total_revenue,
ROUND((100.0 * SUM(total_revenue) / SUM(SUM(total_revenue)) OVER ())::NUMERIC, 1) AS revenue_share_pct
FROM segments
GROUP BY segment
ORDER BY MIN(percentile_rank);
Example 2: Moving Average + Trend Detection
Detect whether each day's revenue is above or below the trailing 7-day average:
WITH daily_revenue AS (
SELECT
DATE_TRUNC('day', o.created_at)::DATE AS day,
SUM(oi.quantity * oi.unit_price) AS revenue
FROM ec_orders o
JOIN ec_order_items oi ON oi.order_id = o.order_id
GROUP BY 1
),
with_moving_avg AS (
SELECT
day,
revenue,
AVG(revenue) OVER (
ORDER BY day
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
) AS moving_avg_7d,
COUNT(*) OVER (
ORDER BY day
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
) AS days_in_window
FROM daily_revenue
),
trend AS (
SELECT
day,
revenue,
ROUND(moving_avg_7d::NUMERIC, 2) AS moving_avg_7d,
days_in_window,
CASE
WHEN days_in_window < 7 THEN 'insufficient data'
WHEN revenue > moving_avg_7d * 1.1 THEN 'above trend (+10%)'
WHEN revenue < moving_avg_7d * 0.9 THEN 'below trend (-10%)'
ELSE 'on trend'
END AS trend_signal
FROM with_moving_avg
)
SELECT * FROM trend
WHERE trend_signal IN ('above trend (+10%)', 'below trend (-10%)')
ORDER BY day;
Example 3: YoY Change + Rank in Same Query
For each product, compute year-over-year revenue change and rank ec_products within their category:
WITH annual_revenue AS (
SELECT
p.product_id,
p.name,
p.category,
EXTRACT(YEAR FROM o.created_at)::INT AS year,
SUM(oi.quantity * oi.unit_price) AS revenue
FROM ec_orders o
JOIN ec_order_items oi ON oi.order_id = o.order_id
JOIN ec_products p ON p.product_id = oi.product_id
GROUP BY p.product_id, p.name, p.category, year
),
with_yoy AS (
SELECT
*,
LAG(revenue) OVER (PARTITION BY product_id ORDER BY year) AS prev_year_revenue,
ROUND((100.0 * (revenue - LAG(revenue) OVER (PARTITION BY product_id ORDER BY year))
/ NULLIF(LAG(revenue) OVER (PARTITION BY product_id ORDER BY year), 0))::NUMERIC, 1) AS yoy_change_pct
FROM annual_revenue
),
with_category_rank AS (
SELECT
*,
RANK() OVER (PARTITION BY category, year ORDER BY revenue DESC) AS category_rank,
RANK() OVER (PARTITION BY year ORDER BY revenue DESC) AS overall_rank
FROM with_yoy
)
SELECT
year,
name,
category,
ROUND((revenue)::NUMERIC, 2) AS revenue,
yoy_change_pct,
category_rank,
overall_rank
FROM with_category_rank
WHERE year = 2024 -- or EXTRACT(YEAR FROM CURRENT_DATE)
ORDER BY category, category_rank;
Example 4: Stock Portfolio Analysis — Running Sharpe Ratio
Compute the rolling Sharpe ratio (return-to-volatility) for each stock:
WITH daily_returns AS (
SELECT
company_id,
price_date,
close_price,
ROUND((100.0 * (close_price - LAG(close_price) OVER (PARTITION BY company_id ORDER BY price_date))
/ NULLIF(LAG(close_price) OVER (PARTITION BY company_id ORDER BY price_date), 0))::NUMERIC, 4) AS daily_return_pct
FROM stock_prices
),
rolling_stats AS (
SELECT
company_id,
price_date,
daily_return_pct,
AVG(daily_return_pct) OVER (
PARTITION BY company_id
ORDER BY price_date
ROWS BETWEEN 29 PRECEDING AND CURRENT ROW
) AS rolling_30d_avg,
STDDEV(daily_return_pct) OVER (
PARTITION BY company_id
ORDER BY price_date
ROWS BETWEEN 29 PRECEDING AND CURRENT ROW
) AS rolling_30d_stddev,
COUNT(*) OVER (
PARTITION BY company_id
ORDER BY price_date
ROWS BETWEEN 29 PRECEDING AND CURRENT ROW
) AS days_in_window
FROM daily_returns
WHERE daily_return_pct IS NOT NULL
),
sharpe AS (
SELECT
rs.company_id,
c.ticker,
rs.price_date,
rs.rolling_30d_avg,
rs.rolling_30d_stddev,
CASE
WHEN rs.days_in_window >= 30 AND rs.rolling_30d_stddev > 0
THEN ROUND((rs.rolling_30d_avg / rs.rolling_30d_stddev * SQRT(252))::NUMERIC, 3)
END AS annualized_sharpe
FROM rolling_stats rs
JOIN companies c ON c.company_id = rs.company_id
)
SELECT DISTINCT ON (company_id)
ticker,
price_date,
ROUND(rolling_30d_avg::NUMERIC, 4) AS avg_daily_return,
ROUND(rolling_30d_stddev::NUMERIC, 4) AS volatility,
annualized_sharpe
FROM sharpe
WHERE annualized_sharpe IS NOT NULL
ORDER BY company_id, price_date DESC;