Incremental Refresh — Updating Only What Changed
Incremental Refresh — Updating Only What Changed
Full REFRESH replaces all data. For very large materialized views, even
CONCURRENTLY can be slow. The alternative: manually refresh only the rows
that changed since the last refresh.
-- Pattern: store a watermark of the last refreshed point
CREATE TABLE IF NOT EXISTS mv_watermarks (
view_name TEXT PRIMARY KEY,
last_refresh TIMESTAMPTZ DEFAULT '1970-01-01'
);
INSERT INTO mv_watermarks (view_name)
VALUES ('mv_incremental_tweet_stats')
ON CONFLICT DO NOTHING;
-- Base aggregate table — full snapshot to start (a regular table, not a
-- materialized view, because we want to INSERT/UPDATE rows incrementally)
CREATE TABLE IF NOT EXISTS mv_incremental_tweet_stats AS
SELECT
user_id,
created_at::DATE AS stat_date,
COUNT(*) AS tweets,
SUM(like_count) AS likes,
SUM(retweet_count) AS retweets
FROM tw_tweets
GROUP BY user_id, created_at::DATE;
CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_incr_tweet_stats_user_day
ON mv_incremental_tweet_stats (user_id, stat_date);
DO $$ BEGIN
IF NOT EXISTS (
SELECT 1 FROM pg_constraint
WHERE conname = 'mv_incr_tweet_stats_pk'
) THEN
ALTER TABLE mv_incremental_tweet_stats
ADD CONSTRAINT mv_incr_tweet_stats_pk
UNIQUE USING INDEX idx_mv_incr_tweet_stats_user_day;
END IF;
END $$;