JSONB for Event Logging and Audit Trails

JSONB for Event Logging and Audit Trails

One of the most powerful real-world applications of JSONB is event logging. Application events_log — page views, button clicks, API calls, user actions, system state changes — vary dramatically in their payload structure. A "purchase" event has order_id, amount, and items. A "login" event has IP address and user agent. A "page_view" event has URL, referrer, and session ID.

Storing all these in a single events table with a JSONB payload column is far cleaner than creating one table per event type or adding dozens of nullable columns to a single table.

The EAV anti-pattern vs JSONB

The traditional workaround for variable-schema data is the Entity-Attribute-Value (EAV) pattern: three columns (entity_id, attribute_name, attribute_value), one row per attribute. This is notoriously difficult to query — getting all attributes for one entity requires a pivot — and offers no type safety. JSONB eliminates the need for EAV in most cases.


Example 1: Building a complete event log schema

CREATE TABLE IF NOT EXISTS events_log (
    event_id    BIGSERIAL PRIMARY KEY,
    customer_id INT,
    session_id  TEXT NOT NULL,
    event_type  TEXT NOT NULL,
    occurred_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
    payload     JSONB
);

CREATE INDEX IF NOT EXISTS idx_events_log_customer ON events_log (customer_id);
CREATE INDEX IF NOT EXISTS idx_events_log_type ON events_log (event_type);
CREATE INDEX IF NOT EXISTS idx_events_log_time ON events_log (occurred_at DESC);
CREATE INDEX IF NOT EXISTS idx_events_log_payload ON events_log USING GIN (payload);

-- Insert sample events_log with varied payloads
INSERT INTO events_log (customer_id, session_id, event_type, occurred_at, payload) VALUES
  (1, 'sess_abc', 'page_view',    '2025-03-01 10:00:00+00',
   '{"url":"/home","referrer":null,"duration_ms":3200}'),
  (1, 'sess_abc', 'product_view', '2025-03-01 10:01:15+00',
   '{"product_id":42,"product_name":"MacBook Pro 14","category":"Electronics","price":1999.00}'),
  (1, 'sess_abc', 'add_to_cart',  '2025-03-01 10:03:40+00',
   '{"product_id":42,"quantity":1,"cart_total":1999.00}'),
  (1, 'sess_abc', 'purchase',     '2025-03-01 10:08:22+00',
   '{"order_id":5001,"amount":1999.00,"method":"credit_card","promo_code":null}'),
  (2, 'sess_xyz', 'page_view',    '2025-03-01 11:00:00+00',
   '{"url":"/products","referrer":"google.com","duration_ms":1800}'),
  (2, 'sess_xyz', 'search',       '2025-03-01 11:01:05+00',
   '{"query":"laptop","results_count":12,"filters":{"category":"Electronics","max_price":2500}}'),
  (2, 'sess_xyz', 'product_view', '2025-03-01 11:02:30+00',
   '{"product_id":43,"product_name":"ThinkPad X1","category":"Electronics","price":1599.00}'),
  (3, 'sess_def', 'login',        '2025-03-02 09:00:00+00',
   '{"method":"email","ip":"192.168.1.100","user_agent":"Mozilla/5.0","success":true}'),
  (3, 'sess_def', 'login',        '2025-03-02 08:55:00+00',
   '{"method":"email","ip":"10.0.0.1","user_agent":"curl/7.64.1","success":false}');

Example 2: Querying event payloads

The real power: querying events_log by payload content using the GIN index:

-- (Template — replace these queries with ones that use the events_log
-- table you just created in the block above)
SELECT 'examples below are templates' AS info;
/*
-- All purchase events_log above £1000
SELECT
    e.occurred_at,
    c.email,
    NULL::NUMERIC AS amount,
    NULL::TEXT AS method,
    NULL::TEXT AS order_id
FROM ec_events e
JOIN customers c ON c.customer_id = e.customer_id
WHERE e.event_type = 'purchase'
  AND NULL::NUMERIC > 1000  -- placeholder: ec_events has no payload column
ORDER BY e.occurred_at DESC;

-- All failed login attempts (security analysis)
SELECT
    e.occurred_at,
    c.email,
    NULL::TEXT AS ip_address,
    NULL::TEXT AS user_agent
FROM ec_events e
JOIN customers c ON c.customer_id = e.customer_id
WHERE e.event_type = 'login'
  AND NULL::BOOLEAN = FALSE  -- placeholder: ec_events has no payload
ORDER BY e.occurred_at DESC;

-- Sessions with search events_log and what they searched for
SELECT
    session_id,
    NULL::TEXT AS search_queries,  -- payload column not in ec_events
    NULL::INT AS max_results       -- payload column not in ec_events
FROM ec_events
WHERE event_type = 'search'
GROUP BY session_id
ORDER BY session_id;
*/

Example 3: Funnel analysis from JSONB event log

Funnel analysis on raw event data — tracking conversion rates through the purchase funnel:

WITH session_funnel AS (
    SELECT
        session_id,
        customer_id,
        BOOL_OR(event_type = 'page_view')    AS reached_page,
        BOOL_OR(event_type = 'product_view') AS reached_product,
        BOOL_OR(event_type = 'add_to_cart')  AS reached_cart,
        BOOL_OR(event_type = 'purchase')     AS converted,
        MIN(CASE WHEN event_type = 'purchase'
            THEN (payload ->> 'amount')::NUMERIC END) AS purchase_amount,
        COUNT(*) AS total_events,
        MAX(occurred_at) - MIN(occurred_at) AS session_duration
    FROM events_log
    GROUP BY session_id, customer_id
),
funnel_counts AS (
    SELECT
        COUNT(*) FILTER (WHERE reached_page)    AS page_views,
        COUNT(*) FILTER (WHERE reached_product) AS product_views,
        COUNT(*) FILTER (WHERE reached_cart)    AS add_to_cart,
        COUNT(*) FILTER (WHERE converted)       AS purchases,
        ROUND(AVG(purchase_amount) FILTER (WHERE converted), 2) AS avg_purchase
    FROM session_funnel
)
SELECT
    'Page View'     AS step, page_views AS sessions,
    ROUND(100.0 * page_views / NULLIF(page_views, 0), 1) AS conversion_from_top
FROM funnel_counts
UNION ALL
SELECT 'Product View', product_views,
    ROUND(100.0 * product_views / NULLIF(page_views, 0), 1)
FROM funnel_counts
UNION ALL
SELECT 'Add to Cart', add_to_cart,
    ROUND(100.0 * add_to_cart / NULLIF(page_views, 0), 1)
FROM funnel_counts
UNION ALL
SELECT 'Purchase', purchases,
    ROUND(100.0 * purchases / NULLIF(page_views, 0), 1)
FROM funnel_counts;

Example 4: Audit trail — tracking row-level changes with JSONB

JSONB is ideal for audit trail tables — storing the "before" and "after" state of any row change:

CREATE TABLE IF NOT EXISTS audit_log (
    audit_id    BIGSERIAL PRIMARY KEY,
    table_name  TEXT NOT NULL,
    record_id   INT NOT NULL,
    operation   TEXT NOT NULL,  -- 'INSERT','UPDATE','DELETE'
    changed_at  TIMESTAMPTZ NOT NULL DEFAULT NOW(),
    changed_by  TEXT,
    old_values  JSONB,
    new_values  JSONB,
    changed_fields JSONB  -- only the fields that actually changed
);

-- Function to compute changed fields between two JSONB documents
CREATE OR REPLACE FUNCTION jsonb_diff(old_doc JSONB, new_doc JSONB)
RETURNS JSONB AS $$
    SELECT jsonb_object_agg(key, new_doc->key)
    FROM jsonb_object_keys(new_doc) AS key
    WHERE new_doc->key IS DISTINCT FROM old_doc->key;
$$ LANGUAGE SQL IMMUTABLE;

-- Querying the audit log: what changed for invoice 100?
SELECT
    operation,
    changed_at,
    changed_by,
    jsonb_pretty(old_values) AS before_state,
    jsonb_pretty(new_values) AS after_state,
    jsonb_pretty(changed_fields) AS what_changed
FROM audit_log
WHERE table_name = 'invoices' AND record_id = 100
ORDER BY changed_at DESC;

-- Find all records where the 'status' field was changed
SELECT table_name, record_id, operation, changed_at,
    old_values ->> 'status' AS old_status,
    new_values ->> 'status' AS new_status
FROM audit_log
WHERE changed_fields ? 'status'   -- GIN-indexed: fast lookup
ORDER BY changed_at DESC;

Key Takeaway

JSONB is the correct data type for event logs and audit trails — payloads vary per event type, making a fixed-schema table impractical. The combination of a GIN index on the payload column + structured columns (event_type, occurred_at, customer_id) for filtering gives you the best of both worlds: fast filtered lookups on known dimensions, flexible querying on payload contents. The ? key operator on a GIN-indexed JSONB column is the most efficient way to find records where a specific field was set or changed.