Recursive CTEs — Graph Traversal
Recursive CTEs — Graph Traversal
Hierarchical data forms trees — one parent per node. Graphs are more general: a node can have multiple parents, edges can be directed or undirected, and cycles are possible. Recursive CTEs handle graphs too, with careful cycle protection.
Trees vs Graphs
| Feature | Tree | Graph |
|---|---|---|
| Parent count | exactly 1 (or 0 for root) | 0, 1, or many |
| Cycles | impossible | possible |
| Direction | implied top-down | explicit |
| SQL challenge | depth/path tracking | cycle detection |
Pattern: Shortest Path (BFS)
Breadth-First Search finds the shortest path between two nodes. With recursive CTEs, each iteration explores one additional "hop":
WITH RECURSIVE
edges(from_node, to_node) AS (
VALUES
('A','B'), ('A','C'),
('B','D'), ('B','E'),
('C','E'), ('C','F'),
('D','G'), ('E','G'),
('F','G'), ('G','Z')
),
paths AS (
-- Anchor: start node
SELECT
from_node,
to_node,
ARRAY[from_node] AS visited,
1 AS hops
FROM edges
WHERE from_node = 'A' -- starting node
UNION ALL
-- Recursive: extend path by one hop
SELECT
p.from_node,
e.to_node,
p.visited || e.to_node,
p.hops + 1
FROM edges e
JOIN paths p ON p.to_node = e.from_node
WHERE NOT e.to_node = ANY(p.visited) -- cycle protection
AND p.hops < 10 -- max depth guard
)
SELECT visited AS path, hops
FROM paths
WHERE to_node = 'Z' -- destination
ORDER BY hops
LIMIT 1; -- shortest path
Example 1: Route Planning (Customer Journey)
Find all possible paths a customer can take through your website event funnel:
-- Define transitions between event types
WITH RECURSIVE
transitions AS (
-- Build edge list from sequential ec_events per session
SELECT DISTINCT
e1.event_type AS from_event,
e2.event_type AS to_event
FROM ec_events e1
JOIN ec_events e2
ON e2.customer_id = e1.customer_id
AND e2.created_at > e1.created_at
AND NOT EXISTS (
SELECT 1 FROM ec_events e3
WHERE e3.customer_id = e1.customer_id
AND e3.created_at > e1.created_at
AND e3.created_at < e2.created_at
) -- immediate next event only
),
journey AS (
SELECT
from_event,
to_event,
ARRAY[from_event, to_event] AS path,
1 AS steps
FROM transitions
WHERE from_event = 'page_view'
UNION ALL
SELECT
j.from_event,
t.to_event,
j.path || t.to_event,
j.steps + 1
FROM transitions t
JOIN journey j ON j.to_event = t.from_event
WHERE NOT t.to_event = ANY(j.path)
AND j.steps < 5
)
SELECT
path,
steps,
path[array_length(path, 1)] AS endpoint
FROM journey
WHERE path[array_length(path, 1)] = 'purchase'
ORDER BY steps;
Example 2: Supplier Network (Reachability)
From a given supplier, which other suppliers can be reached through partnerships (direct or indirect)?
WITH RECURSIVE
supplier_partnerships(supplier_id_a, supplier_id_b) AS (
VALUES
(1, 2), (1, 3),
(2, 4), (3, 4),
(3, 5), (4, 6),
(5, 6)
),
reachable AS (
-- Start: direct partners of supplier 1
SELECT
supplier_id_a AS from_supplier,
supplier_id_b AS to_supplier,
1 AS hops,
ARRAY[supplier_id_a, supplier_id_b] AS path
FROM supplier_partnerships
WHERE supplier_id_a = 1
UNION ALL
SELECT
r.from_supplier,
sp.supplier_id_b,
r.hops + 1,
r.path || sp.supplier_id_b
FROM supplier_partnerships sp
JOIN reachable r ON r.to_supplier = sp.supplier_id_a
WHERE NOT sp.supplier_id_b = ANY(r.path)
AND r.hops < 6
)
SELECT DISTINCT to_supplier, MIN(hops) AS min_hops
FROM reachable
GROUP BY to_supplier
ORDER BY min_hops;
Example 3: Social Network — Degrees of Separation
Using a follows/friends table to find connections within N degrees:
WITH RECURSIVE
user_follows(customer_id, follows_id) AS (
VALUES
(100, 101), (100, 102),
(101, 103), (101, 104),
(102, 105), (103, 106)
),
-- Find all users within 3 degrees of customer_id = 100
connections AS (
SELECT
100 AS origin,
follows_id AS connected_to,
1 AS degree,
ARRAY[100, follows_id] AS path
FROM user_follows
WHERE customer_id = 100
UNION ALL
SELECT
c.origin,
uf.follows_id,
c.degree + 1,
c.path || uf.follows_id
FROM user_follows uf
JOIN connections c ON c.connected_to = uf.customer_id
WHERE NOT uf.follows_id = ANY(c.path)
AND c.degree < 3
)
SELECT
connected_to AS customer_id,
MIN(degree) AS degrees_from_origin
FROM connections
GROUP BY connected_to
ORDER BY degrees_from_origin, connected_to;
Example 4: Dependency Resolution (Package Manager Style)
Resolve transitive dependencies — if A depends on B, and B depends on C, then A transitively depends on C:
WITH RECURSIVE
package_dependencies(package, depends_on) AS (
VALUES
('myapp', 'react'),
('myapp', 'lodash'),
('react', 'react-dom'),
('react', 'scheduler'),
('lodash', 'core-js'),
('react-dom', 'scheduler')
),
all_deps AS (
-- Direct dependencies
SELECT
package AS root_package,
depends_on AS dependency,
1 AS depth,
ARRAY[package, depends_on] AS dep_path
FROM package_dependencies
WHERE package = 'myapp'
UNION ALL
SELECT
ad.root_package,
pd.depends_on,
ad.depth + 1,
ad.dep_path || pd.depends_on
FROM package_dependencies pd
JOIN all_deps ad ON ad.dependency = pd.package
WHERE NOT pd.depends_on = ANY(ad.dep_path) -- prevent circular deps
AND ad.depth < 10
)
SELECT DISTINCT
dependency,
MIN(depth) AS resolved_at_depth
FROM all_deps
GROUP BY dependency
ORDER BY resolved_at_depth, dependency;