Viral Content Detection — Retweet Cascades & Engagement Spikes

Viral Content Detection — Retweet Cascades & Engagement Spikes

Viral tweets spread through retweet chains. We detect cascade depth using a recursive CTE to trace retweet trees, and identify engagement spikes by comparing a tweet's performance to the posting user's baseline.

-- Retweet cascade: trace the spread of a viral original tweet
WITH RECURSIVE cascade AS (
    -- Anchor: original (non-retweet) tweets with high retweet count
    SELECT
        id,
        user_id,
        retweet_of_id,
        retweet_count,
        like_count,
        created_at,
        0                                               AS depth,
        ARRAY[id]                                       AS path
    FROM tw_tweets
    WHERE retweet_of_id IS NULL
      AND retweet_count > 500

    UNION ALL

    -- Recursive: tweets that retweet something in the cascade
    SELECT
        t.id,
        t.user_id,
        t.retweet_of_id,
        t.retweet_count,
        t.like_count,
        t.created_at,
        c.depth + 1,
        c.path || t.id
    FROM tw_tweets t
    JOIN cascade c ON c.id = t.retweet_of_id
    WHERE t.id != ALL(c.path)   -- cycle guard
      AND c.depth < 5
)
SELECT
    MAX(depth)                                          AS cascade_depth,
    COUNT(*)                                            AS total_nodes,
    MIN(created_at)                                     AS first_tweet,
    MAX(created_at)                                     AS last_spread,
    EXTRACT(EPOCH FROM MAX(created_at) - MIN(created_at)) / 3600
                                                        AS spread_hours,
    SUM(like_count)                                     AS total_likes_in_cascade
FROM cascade
GROUP BY path[1]
ORDER BY total_nodes DESC
LIMIT 10;

Purchase this course to unlock the full lesson.

Sign up