Anomaly Detection — Z-Score and IQR Methods in Pure SQL

Anomaly Detection — Z-Score and IQR Methods in Pure SQL

Anomalies are data points that deviate significantly from the norm. Two standard approaches: Z-score (how many standard deviations from the mean) and IQR (interquartile range — the robust alternative for skewed data).

-- Z-score anomaly detection on hourly CPU averages
WITH hourly AS (
    SELECT
        host,
        DATE_TRUNC('hour', recorded_at)              AS hour_bucket,
        AVG(cpu_pct)                                 AS avg_cpu
    FROM ts_server_metrics
    GROUP BY host, DATE_TRUNC('hour', recorded_at)
),
stats AS (
    SELECT
        host,
        AVG(avg_cpu)                                 AS mean_cpu,
        STDDEV_SAMP(avg_cpu)                         AS std_cpu
    FROM hourly
    GROUP BY host
),
z_scored AS (
    SELECT
        h.host,
        h.hour_bucket,
        ROUND(h.avg_cpu::numeric, 2)                 AS avg_cpu,
        ROUND(s.mean_cpu::numeric, 2)                AS host_mean,
        ROUND(s.std_cpu::numeric, 2)                 AS host_std,
        ROUND(((h.avg_cpu - s.mean_cpu)
               / NULLIF(s.std_cpu, 0))::numeric, 2) AS z_score
    FROM hourly h
    JOIN stats s ON s.host = h.host
)
SELECT *,
    CASE
        WHEN ABS(z_score) > 3   THEN 'Extreme Anomaly'
        WHEN ABS(z_score) > 2   THEN 'Anomaly'
        WHEN ABS(z_score) > 1.5 THEN 'Suspicious'
        ELSE                         'Normal'
    END                                              AS anomaly_label
FROM z_scored
WHERE ABS(z_score) > 2
ORDER BY ABS(z_score) DESC
LIMIT 30;

Purchase this course to unlock the full lesson.

Sign up