Anomaly Detection — Z-Score and IQR Methods in Pure SQL
Anomaly Detection — Z-Score and IQR Methods in Pure SQL
Anomalies are data points that deviate significantly from the norm. Two standard approaches: Z-score (how many standard deviations from the mean) and IQR (interquartile range — the robust alternative for skewed data).
-- Z-score anomaly detection on hourly CPU averages
WITH hourly AS (
SELECT
host,
DATE_TRUNC('hour', recorded_at) AS hour_bucket,
AVG(cpu_pct) AS avg_cpu
FROM ts_server_metrics
GROUP BY host, DATE_TRUNC('hour', recorded_at)
),
stats AS (
SELECT
host,
AVG(avg_cpu) AS mean_cpu,
STDDEV_SAMP(avg_cpu) AS std_cpu
FROM hourly
GROUP BY host
),
z_scored AS (
SELECT
h.host,
h.hour_bucket,
ROUND(h.avg_cpu::numeric, 2) AS avg_cpu,
ROUND(s.mean_cpu::numeric, 2) AS host_mean,
ROUND(s.std_cpu::numeric, 2) AS host_std,
ROUND(((h.avg_cpu - s.mean_cpu)
/ NULLIF(s.std_cpu, 0))::numeric, 2) AS z_score
FROM hourly h
JOIN stats s ON s.host = h.host
)
SELECT *,
CASE
WHEN ABS(z_score) > 3 THEN 'Extreme Anomaly'
WHEN ABS(z_score) > 2 THEN 'Anomaly'
WHEN ABS(z_score) > 1.5 THEN 'Suspicious'
ELSE 'Normal'
END AS anomaly_label
FROM z_scored
WHERE ABS(z_score) > 2
ORDER BY ABS(z_score) DESC
LIMIT 30;