Incremental Refresh — Updating Only What Changed

Incremental Refresh — Updating Only What Changed

Full REFRESH replaces all data. For very large materialized views, even CONCURRENTLY can be slow. The alternative: manually refresh only the rows that changed since the last refresh.

-- Pattern: store a watermark of the last refreshed point
CREATE TABLE IF NOT EXISTS mv_watermarks (
    view_name    TEXT PRIMARY KEY,
    last_refresh TIMESTAMPTZ DEFAULT '1970-01-01'
);

INSERT INTO mv_watermarks (view_name)
VALUES ('mv_incremental_tweet_stats')
ON CONFLICT DO NOTHING;

-- Base aggregate table — full snapshot to start (a regular table, not a
-- materialized view, because we want to INSERT/UPDATE rows incrementally)
CREATE TABLE IF NOT EXISTS mv_incremental_tweet_stats AS
SELECT
    user_id,
    created_at::DATE                AS stat_date,
    COUNT(*)                        AS tweets,
    SUM(like_count)                 AS likes,
    SUM(retweet_count)              AS retweets
FROM tw_tweets
GROUP BY user_id, created_at::DATE;

CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_incr_tweet_stats_user_day
    ON mv_incremental_tweet_stats (user_id, stat_date);
DO $$ BEGIN
    IF NOT EXISTS (
        SELECT 1 FROM pg_constraint
        WHERE conname = 'mv_incr_tweet_stats_pk'
    ) THEN
        ALTER TABLE mv_incremental_tweet_stats
            ADD CONSTRAINT mv_incr_tweet_stats_pk
            UNIQUE USING INDEX idx_mv_incr_tweet_stats_user_day;
    END IF;
END $$;

Purchase this course to unlock the full lesson.

Sign up