Recursive CTEs — Graph Traversal

Recursive CTEs — Graph Traversal

Hierarchical data forms trees — one parent per node. Graphs are more general: a node can have multiple parents, edges can be directed or undirected, and cycles are possible. Recursive CTEs handle graphs too, with careful cycle protection.

Trees vs Graphs

Feature Tree Graph
Parent count exactly 1 (or 0 for root) 0, 1, or many
Cycles impossible possible
Direction implied top-down explicit
SQL challenge depth/path tracking cycle detection

Pattern: Shortest Path (BFS)

Breadth-First Search finds the shortest path between two nodes. With recursive CTEs, each iteration explores one additional "hop":

WITH RECURSIVE
edges(from_node, to_node) AS (
    VALUES
        ('A','B'), ('A','C'),
        ('B','D'), ('B','E'),
        ('C','E'), ('C','F'),
        ('D','G'), ('E','G'),
        ('F','G'), ('G','Z')
),
paths AS (
    -- Anchor: start node
    SELECT
        from_node,
        to_node,
        ARRAY[from_node] AS visited,
        1                AS hops
    FROM edges
    WHERE from_node = 'A'   -- starting node

    UNION ALL

    -- Recursive: extend path by one hop
    SELECT
        p.from_node,
        e.to_node,
        p.visited || e.to_node,
        p.hops + 1
    FROM edges e
    JOIN paths p ON p.to_node = e.from_node
    WHERE NOT e.to_node = ANY(p.visited)   -- cycle protection
      AND p.hops < 10                       -- max depth guard
)
SELECT visited AS path, hops
FROM paths
WHERE to_node = 'Z'   -- destination
ORDER BY hops
LIMIT 1;              -- shortest path

Example 1: Route Planning (Customer Journey)

Find all possible paths a customer can take through your website event funnel:

-- Define transitions between event types
WITH RECURSIVE
transitions AS (
    -- Build edge list from sequential ec_events per session
    SELECT DISTINCT
        e1.event_type AS from_event,
        e2.event_type AS to_event
    FROM ec_events e1
    JOIN ec_events e2
        ON e2.customer_id = e1.customer_id
        AND e2.created_at > e1.created_at
        AND NOT EXISTS (
            SELECT 1 FROM ec_events e3
            WHERE e3.customer_id = e1.customer_id
              AND e3.created_at > e1.created_at
              AND e3.created_at < e2.created_at
        )  -- immediate next event only
),

journey AS (
    SELECT
        from_event,
        to_event,
        ARRAY[from_event, to_event] AS path,
        1 AS steps
    FROM transitions
    WHERE from_event = 'page_view'

    UNION ALL

    SELECT
        j.from_event,
        t.to_event,
        j.path || t.to_event,
        j.steps + 1
    FROM transitions t
    JOIN journey j ON j.to_event = t.from_event
    WHERE NOT t.to_event = ANY(j.path)
      AND j.steps < 5
)

SELECT
    path,
    steps,
    path[array_length(path, 1)] AS endpoint
FROM journey
WHERE path[array_length(path, 1)] = 'purchase'
ORDER BY steps;

Example 2: Supplier Network (Reachability)

From a given supplier, which other suppliers can be reached through partnerships (direct or indirect)?

WITH RECURSIVE
supplier_partnerships(supplier_id_a, supplier_id_b) AS (
    VALUES
        (1, 2), (1, 3),
        (2, 4), (3, 4),
        (3, 5), (4, 6),
        (5, 6)
),
reachable AS (
    -- Start: direct partners of supplier 1
    SELECT
        supplier_id_a AS from_supplier,
        supplier_id_b AS to_supplier,
        1             AS hops,
        ARRAY[supplier_id_a, supplier_id_b] AS path
    FROM supplier_partnerships
    WHERE supplier_id_a = 1

    UNION ALL

    SELECT
        r.from_supplier,
        sp.supplier_id_b,
        r.hops + 1,
        r.path || sp.supplier_id_b
    FROM supplier_partnerships sp
    JOIN reachable r ON r.to_supplier = sp.supplier_id_a
    WHERE NOT sp.supplier_id_b = ANY(r.path)
      AND r.hops < 6
)
SELECT DISTINCT to_supplier, MIN(hops) AS min_hops
FROM reachable
GROUP BY to_supplier
ORDER BY min_hops;

Example 3: Social Network — Degrees of Separation

Using a follows/friends table to find connections within N degrees:

WITH RECURSIVE
user_follows(customer_id, follows_id) AS (
    VALUES
        (100, 101), (100, 102),
        (101, 103), (101, 104),
        (102, 105), (103, 106)
),
-- Find all users within 3 degrees of customer_id = 100
connections AS (
    SELECT
        100            AS origin,
        follows_id     AS connected_to,
        1              AS degree,
        ARRAY[100, follows_id] AS path
    FROM user_follows
    WHERE customer_id = 100

    UNION ALL

    SELECT
        c.origin,
        uf.follows_id,
        c.degree + 1,
        c.path || uf.follows_id
    FROM user_follows uf
    JOIN connections c ON c.connected_to = uf.customer_id
    WHERE NOT uf.follows_id = ANY(c.path)
      AND c.degree < 3
)

SELECT
    connected_to AS customer_id,
    MIN(degree)  AS degrees_from_origin
FROM connections
GROUP BY connected_to
ORDER BY degrees_from_origin, connected_to;

Example 4: Dependency Resolution (Package Manager Style)

Resolve transitive dependencies — if A depends on B, and B depends on C, then A transitively depends on C:

WITH RECURSIVE
package_dependencies(package, depends_on) AS (
    VALUES
        ('myapp',     'react'),
        ('myapp',     'lodash'),
        ('react',     'react-dom'),
        ('react',     'scheduler'),
        ('lodash',    'core-js'),
        ('react-dom', 'scheduler')
),
all_deps AS (
    -- Direct dependencies
    SELECT
        package      AS root_package,
        depends_on   AS dependency,
        1            AS depth,
        ARRAY[package, depends_on] AS dep_path
    FROM package_dependencies
    WHERE package = 'myapp'

    UNION ALL

    SELECT
        ad.root_package,
        pd.depends_on,
        ad.depth + 1,
        ad.dep_path || pd.depends_on
    FROM package_dependencies pd
    JOIN all_deps ad ON ad.dependency = pd.package
    WHERE NOT pd.depends_on = ANY(ad.dep_path)  -- prevent circular deps
      AND ad.depth < 10
)
SELECT DISTINCT
    dependency,
    MIN(depth) AS resolved_at_depth
FROM all_deps
GROUP BY dependency
ORDER BY resolved_at_depth, dependency;

Purchase this course to unlock the full lesson.

Sign up