📊 Monitoring (Prometheus + Grafana)
Typ: Prometheus Metrics + Grafana Dashboards
Auth: API Key / Basic Auth
Status: ✅ Open Source Stack
Übersicht
Monitoring Stack für: - API Health & Performance - Server-Metriken (CPU, RAM, Disk) - Uptime-Überwachung - Alerting (E-Mail, Slack)
Stack-Komponenten
| Tool | Funktion | Port |
|---|---|---|
| Prometheus | Metrics Collection | 9090 |
| Grafana | Dashboards | 3000 |
| Alertmanager | Alerting | 9093 |
| Node Exporter | Host Metrics | 9100 |
| UptimeRobot | External Uptime | Cloud |
API Endpoints
| Methode | Endpunkt | Beschreibung | Cache TTL |
|---|---|---|---|
GET |
/api/monitoring/health |
Health Check | - |
GET |
/api/monitoring/metrics |
Prometheus Metrics | - |
GET |
/api/monitoring/status |
System Status | 30s |
GET |
/api/monitoring/uptime |
Uptime History | 5min |
GET |
/api/monitoring/alerts |
Active Alerts | 1min |
Prometheus Metrics
import { Registry, Counter, Histogram, Gauge } from 'prom-client';
const register = new Registry();
// API Request Counter
export const httpRequestsTotal = new Counter({
name: 'http_requests_total',
help: 'Total HTTP requests',
labelNames: ['method', 'path', 'status'],
registers: [register]
});
// Response Time Histogram
export const httpRequestDuration = new Histogram({
name: 'http_request_duration_seconds',
help: 'HTTP request duration in seconds',
labelNames: ['method', 'path'],
buckets: [0.01, 0.05, 0.1, 0.5, 1, 5],
registers: [register]
});
// Cache Hit Rate
export const cacheHitRate = new Gauge({
name: 'cache_hit_rate',
help: 'Cache hit rate percentage',
registers: [register]
});
// Active Connections
export const activeConnections = new Gauge({
name: 'active_connections',
help: 'Number of active connections',
registers: [register]
});
// Middleware
app.use((req, res, next) => {
const start = Date.now();
res.on('finish', () => {
const duration = (Date.now() - start) / 1000;
httpRequestsTotal.inc({
method: req.method,
path: req.route?.path || req.path,
status: res.statusCode
});
httpRequestDuration.observe(
{ method: req.method, path: req.route?.path || req.path },
duration
);
});
next();
});
// Metrics Endpoint
app.get('/metrics', async (req, res) => {
res.set('Content-Type', register.contentType);
res.end(await register.metrics());
});
Prometheus Config
# prometheus.yml
global:
scrape_interval: 15s
evaluation_interval: 15s
alerting:
alertmanagers:
- static_configs:
- targets: ['alertmanager:9093']
rule_files:
- 'alerts.yml'
scrape_configs:
- job_name: 'contract-api'
static_configs:
- targets: ['api:3000']
metrics_path: '/metrics'
- job_name: 'node-exporter'
static_configs:
- targets: ['node-exporter:9100']
- job_name: 'sonicjs'
static_configs:
- targets: ['sonicjs:8787']
Alert Rules
# alerts.yml
groups:
- name: api-alerts
rules:
- alert: HighErrorRate
expr: rate(http_requests_total{status=~"5.."}[5m]) > 0.1
for: 5m
labels:
severity: critical
annotations:
summary: "High error rate detected"
- alert: SlowResponses
expr: histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m])) > 2
for: 5m
labels:
severity: warning
annotations:
summary: "95th percentile response time > 2s"
- alert: ServiceDown
expr: up == 0
for: 1m
labels:
severity: critical
annotations:
summary: "Service {{ $labels.job }} is down"
Grafana Dashboard
{
"title": "Contract API Dashboard",
"panels": [
{
"title": "Requests/sec",
"type": "graph",
"targets": [{
"expr": "rate(http_requests_total[1m])"
}]
},
{
"title": "Response Time (p95)",
"type": "graph",
"targets": [{
"expr": "histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m]))"
}]
},
{
"title": "Error Rate",
"type": "stat",
"targets": [{
"expr": "rate(http_requests_total{status=~\"5..\"}[5m]) / rate(http_requests_total[5m]) * 100"
}]
},
{
"title": "Cache Hit Rate",
"type": "gauge",
"targets": [{
"expr": "cache_hit_rate"
}]
}
]
}
UptimeRobot Integration
const UPTIMEROBOT_API = 'https://api.uptimerobot.com/v2';
async function getUptimeStatus() {
const response = await fetch(`${UPTIMEROBOT_API}/getMonitors`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
api_key: process.env.UPTIMEROBOT_API_KEY,
format: 'json',
logs: 1
})
});
return response.json();
}
async function createMonitor(url: string, name: string) {
const response = await fetch(`${UPTIMEROBOT_API}/newMonitor`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
api_key: process.env.UPTIMEROBOT_API_KEY,
friendly_name: name,
url,
type: 1, // HTTP(s)
interval: 300 // 5 minutes
})
});
return response.json();
}
Docker Compose
# docker-compose.monitoring.yml
services:
prometheus:
image: prom/prometheus:latest
ports:
- "9090:9090"
volumes:
- ./prometheus.yml:/etc/prometheus/prometheus.yml
- ./alerts.yml:/etc/prometheus/alerts.yml
- prometheus_data:/prometheus
grafana:
image: grafana/grafana:latest
ports:
- "3001:3000"
volumes:
- grafana_data:/var/lib/grafana
environment:
- GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_PASSWORD}
alertmanager:
image: prom/alertmanager:latest
ports:
- "9093:9093"
volumes:
- ./alertmanager.yml:/etc/alertmanager/alertmanager.yml
node-exporter:
image: prom/node-exporter:latest
ports:
- "9100:9100"
volumes:
prometheus_data:
grafana_data:
Umgebungsvariablen
# Prometheus
PROMETHEUS_URL="http://prometheus:9090"
# Grafana
GRAFANA_URL="http://grafana:3000"
GRAFANA_PASSWORD="secret"
# UptimeRobot
UPTIMEROBOT_API_KEY=""
# Alerting
ALERTMANAGER_SLACK_WEBHOOK=""
ALERTMANAGER_EMAIL="alerts@example.com"
Prometheus + Grafana • UptimeRobot • Alerting