Viral Content Detection — Retweet Cascades & Engagement Spikes
Viral Content Detection — Retweet Cascades & Engagement Spikes
Viral tweets spread through retweet chains. We detect cascade depth using a recursive CTE to trace retweet trees, and identify engagement spikes by comparing a tweet's performance to the posting user's baseline.
-- Retweet cascade: trace the spread of a viral original tweet
WITH RECURSIVE cascade AS (
-- Anchor: original (non-retweet) tweets with high retweet count
SELECT
id,
user_id,
retweet_of_id,
retweet_count,
like_count,
created_at,
0 AS depth,
ARRAY[id] AS path
FROM tw_tweets
WHERE retweet_of_id IS NULL
AND retweet_count > 500
UNION ALL
-- Recursive: tweets that retweet something in the cascade
SELECT
t.id,
t.user_id,
t.retweet_of_id,
t.retweet_count,
t.like_count,
t.created_at,
c.depth + 1,
c.path || t.id
FROM tw_tweets t
JOIN cascade c ON c.id = t.retweet_of_id
WHERE t.id != ALL(c.path) -- cycle guard
AND c.depth < 5
)
SELECT
MAX(depth) AS cascade_depth,
COUNT(*) AS total_nodes,
MIN(created_at) AS first_tweet,
MAX(created_at) AS last_spread,
EXTRACT(EPOCH FROM MAX(created_at) - MIN(created_at)) / 3600
AS spread_hours,
SUM(like_count) AS total_likes_in_cascade
FROM cascade
GROUP BY path[1]
ORDER BY total_nodes DESC
LIMIT 10;