Skip to content

Commit 82a4cd8

Browse files
committed
Add postgres-exporter alerts
1 parent 47d5d05 commit 82a4cd8

File tree

2 files changed

+111
-0
lines changed

2 files changed

+111
-0
lines changed

postgres/system-alerts.yaml.tmpl

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# PROMETHEUS RULES
2+
# DO NOT REMOVE line above, used in `pre-commit` hook
3+
4+
groups:
5+
- name: postgres-exporter
6+
rules:
7+
- alert: "PostgresExporterDown"
8+
expr: |
9+
job="postgres-exporter" == 0
10+
for: 15m
11+
labels:
12+
team: infra
13+
annotations:
14+
summary: "Postgres Exporter is down"
15+
impact: "Postgres instances are not monitored"
16+
qonto_runbook: https://qonto.github.io/database-monitoring-framework/latest/runbooks/postgresql/SQLExporterDown
17+
action: |
18+
Check if postgres-exporter is running in sys-prom namespace.
19+
Check the logs.
20+
Restart pods.

postgres/team-alerts.yaml.tmpl

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
# PROMETHEUS RULES
2+
# DO NOT REMOVE line above, used in `pre-commit` hook
3+
4+
groups:
5+
- name: Postgres
6+
rules:
7+
- alert: "PostgresExporterMissingTarget"
8+
expr: |
9+
min(up{job="postgres-exporter", instance!=""}) by (instance) == 0
10+
for: 15m
11+
labels:
12+
team: infra
13+
annotations:
14+
summary: Postgres Exporter scrape for {{ $labels.target }} failed
15+
impact: "{{ $labels.target }} instance is not monitored"
16+
qonto_runbook: https://qonto.github.io/database-monitoring-framework/latest/runbooks/postgresql/SQLExporterMissingTarget
17+
action: |
18+
Check if the postgres instance is down.
19+
Check if there's an issue with the prometheus_postgres_exporter user.
20+
21+
- alert: "PostgresExporterScrapingLimit"
22+
expr: |
23+
avg_over_time(pg_exporter_last_scrape_duration_seconds{job="postgres-exporter", instance!=""}[10m]) > 30
24+
for: 15m
25+
labels:
26+
severity: warning
27+
annotations:
28+
description: Postgres Exporter scraping is taking a long time
29+
runbook_url: https://qonto.github.io/database-monitoring-framework/latest/runbooks/postgresql/SQLExporterScrapingLimit
30+
summary: Exporter scraping takes long time
31+
32+
- alert: "PostgreSQLLongRunningQueries"
33+
expr: |
34+
pg_long_running_transactions_oldest_timestamp_seconds > 1800
35+
for: 1m
36+
labels:
37+
severity: warning
38+
annotations:
39+
description: "PostgreSQL has a long-running transaction (>30 mins)"
40+
runbook_url: https://qonto.github.io/database-monitoring-framework/latest/runbooks/postgresql/PostgreSQLLongRunningQueries
41+
summary: Long running query on {{ $labels.instance }}
42+
43+
- alert: "PostgreSQLMaxConnections"
44+
expr: |
45+
sum(pg_stat_activity_count) by (server)
46+
/
47+
on(server)
48+
pg_settings_max_connections
49+
> 0.8
50+
for: 10m
51+
labels:
52+
severity: warning
53+
annotations:
54+
description: "{{ $labels.target }} uses >80% of the maximum database connections"
55+
runbook_url: https://qonto.github.io/database-monitoring-framework/latest/runbooks/postgresql/PostgreSQLMaxConnections
56+
summary: "{{ $labels.target }} is close from the maximum database connections"
57+
58+
- alert: "PostgresExporterDown"
59+
expr: |
60+
job="postgres-exporter" == 0
61+
for: 5m
62+
labels:
63+
severity: critical
64+
annotations:
65+
description: Postgres Exporter is down
66+
runbook_url: https://qonto.github.io/database-monitoring-framework/latest/runbooks/postgresql/SQLExporterDown
67+
summary: Exporter is down
68+
69+
- alert: "PostgresExporterMissingTarget"
70+
expr: |
71+
min(up{job="postgres-exporter", instance!=""}) by (instance,exported_job) == 0
72+
for: 5m
73+
labels:
74+
severity: critical
75+
annotations:
76+
description:
77+
Postgres Exporter cannot collect metrics for this target, it's either because
78+
the target is down, or the sql-exporter user cannot connect.
79+
runbook_url: https://qonto.github.io/database-monitoring-framework/latest/runbooks/postgresql/SQLExporterMissingTarget
80+
summary: Exporter scrape for {{ $labels.target }} ({{ $labels.exported_job }}) failed
81+
82+
- alert: "PostgresExporterScrapingLimit"
83+
expr: |
84+
avg_over_time(pg_exporter_last_scrape_duration_seconds{job="postgres-exporter", instance!=""}[10m]) > 30
85+
for: 5m
86+
labels:
87+
severity: warning
88+
annotations:
89+
description: Postgres Exporter scraping is taking a long time
90+
runbook_url: https://qonto.github.io/database-monitoring-framework/latest/runbooks/postgresql/SQLExporterScrapingLimit
91+
summary: Exporter scraping takes long time

0 commit comments

Comments
 (0)