Files
jenkins-docker-deploy-example/app/models.py
2026-03-10 14:30:10 +00:00

274 lines
11 KiB
Python

"""SQLite schema and database helpers."""
import os
import sqlite3
from contextlib import contextmanager
from datetime import datetime, timedelta, timezone
from pathlib import Path
DATA_PATH = os.environ.get("DATA_PATH", "/app/data")
DB_PATH = Path(DATA_PATH) / "monitor.db"
# Retention: keep last N checks per service, and optionally drop checks older than N days
CHECK_RETENTION_COUNT = int(os.environ.get("CHECK_RETENTION_COUNT", "5000"))
CHECK_RETENTION_DAYS = int(os.environ.get("CHECK_RETENTION_DAYS", "0")) or None
def _ensure_data_dir():
Path(DATA_PATH).mkdir(parents=True, exist_ok=True)
def _migrate_add_status(conn):
"""Add status column if missing (migration for existing DBs)."""
try:
conn.execute("SELECT status FROM checks LIMIT 1")
except sqlite3.OperationalError:
conn.execute("ALTER TABLE checks ADD COLUMN status TEXT")
conn.execute("UPDATE checks SET status = CASE WHEN success = 1 THEN 'OK' ELSE 'ERROR' END")
@contextmanager
def get_db():
_ensure_data_dir()
conn = sqlite3.connect(DB_PATH)
conn.row_factory = sqlite3.Row
try:
yield conn
conn.commit()
finally:
conn.close()
def init_db():
"""Create tables if they don't exist."""
with get_db() as conn:
conn.execute("""
CREATE TABLE IF NOT EXISTS services (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL,
target TEXT NOT NULL,
protocol TEXT NOT NULL CHECK(protocol IN ('http', 'https', 'tcp')),
interval_seconds INTEGER NOT NULL DEFAULT 60,
created_at TEXT NOT NULL
)
""")
conn.execute("""
CREATE TABLE IF NOT EXISTS checks (
id INTEGER PRIMARY KEY AUTOINCREMENT,
service_id INTEGER NOT NULL,
success INTEGER NOT NULL,
status TEXT NOT NULL,
response_time_ms REAL,
timestamp TEXT NOT NULL,
error_message TEXT,
FOREIGN KEY (service_id) REFERENCES services(id)
)
""")
conn.execute("CREATE INDEX IF NOT EXISTS idx_checks_service ON checks(service_id)")
conn.execute("CREATE INDEX IF NOT EXISTS idx_checks_timestamp ON checks(timestamp)")
_migrate_add_status(conn)
conn.execute("CREATE INDEX IF NOT EXISTS idx_checks_status ON checks(status)")
def list_services():
"""Return all services with last check info and uptime."""
with get_db() as conn:
rows = conn.execute(
"""
SELECT s.*,
(SELECT success FROM checks WHERE service_id = s.id ORDER BY timestamp DESC LIMIT 1) as last_success,
(SELECT response_time_ms FROM checks WHERE service_id = s.id ORDER BY timestamp DESC LIMIT 1) as last_response_ms,
(SELECT timestamp FROM checks WHERE service_id = s.id ORDER BY timestamp DESC LIMIT 1) as last_check,
(SELECT ROUND(100.0 * SUM(success) / NULLIF(COUNT(*), 0), 2)
FROM (SELECT success FROM checks WHERE service_id = s.id ORDER BY timestamp DESC LIMIT 1000)) as uptime_pct
FROM services s
ORDER BY s.id
"""
).fetchall()
return [dict(r) for r in rows]
def get_service(service_id: int):
"""Get a single service by id."""
with get_db() as conn:
row = conn.execute("SELECT * FROM services WHERE id = ?", (service_id,)).fetchone()
return dict(row) if row else None
def add_service(name: str, target: str, protocol: str, interval_seconds: int = 60) -> int:
"""Add a new service. Returns the new service id."""
with get_db() as conn:
cur = conn.execute(
"INSERT INTO services (name, target, protocol, interval_seconds, created_at) VALUES (?, ?, ?, ?, ?)",
(name, target, protocol, interval_seconds, datetime.utcnow().isoformat()),
)
return cur.lastrowid
def update_service(service_id: int, name: str = None, target: str = None, protocol: str = None, interval_seconds: int = None) -> bool:
"""Update a service. Only provided fields are updated. Returns True if updated."""
updates = []
args = []
if name is not None:
updates.append("name = ?")
args.append(name)
if target is not None:
updates.append("target = ?")
args.append(target)
if protocol is not None:
updates.append("protocol = ?")
args.append(protocol)
if interval_seconds is not None:
updates.append("interval_seconds = ?")
args.append(interval_seconds)
if not updates:
return True
args.append(service_id)
with get_db() as conn:
cur = conn.execute(
f"UPDATE services SET {', '.join(updates)} WHERE id = ?",
args,
)
return cur.rowcount > 0
def add_check(service_id: int, success: bool, response_time_ms: float | None, error_message: str | None = None):
"""Record a check result. status is OK or ERROR for searchability."""
status = "OK" if success else "ERROR"
with get_db() as conn:
conn.execute(
"INSERT INTO checks (service_id, success, status, response_time_ms, timestamp, error_message) VALUES (?, ?, ?, ?, ?, ?)",
(service_id, 1 if success else 0, status, response_time_ms, datetime.utcnow().isoformat(), error_message),
)
def _checks_where_args(service_id: int, from_ts: str = None, to_ts: str = None, status_filter: str = None, search: str = None):
"""Build WHERE clause and args for checks queries."""
q = "WHERE service_id = ?"
args = [service_id]
if from_ts:
q += " AND timestamp >= ?"
args.append(from_ts)
if to_ts:
q += " AND timestamp <= ?"
args.append(to_ts)
if status_filter == "error":
q += " AND status = 'ERROR'"
elif status_filter == "ok":
q += " AND status = 'OK'"
if search:
q += " AND (error_message LIKE ? OR status LIKE ?)"
args.extend([f"%{search}%", f"%{search}%"])
return q, args
def get_checks_count(service_id: int, from_ts: str = None, to_ts: str = None, status_filter: str = None, search: str = None) -> int:
"""Count checks matching filters (for pagination)."""
where, args = _checks_where_args(service_id, from_ts, to_ts, status_filter, search)
with get_db() as conn:
row = conn.execute(f"SELECT COUNT(*) FROM checks {where}", args).fetchone()
return row[0]
def get_checks(service_id: int, limit: int = 50, offset: int = 0, from_ts: str = None, to_ts: str = None, status_filter: str = None, search: str = None):
"""Get recent checks for a service, optionally filtered and paginated."""
where, args = _checks_where_args(service_id, from_ts, to_ts, status_filter, search)
args.extend([limit, offset])
with get_db() as conn:
rows = conn.execute(f"SELECT * FROM checks {where} ORDER BY timestamp DESC LIMIT ? OFFSET ?", args).fetchall()
return [dict(r) for r in rows]
def get_report_stats(service_id: int, from_ts: str = None, to_ts: str = None):
"""Compute uptime % and latency stats for a service, optionally over a time range."""
with get_db() as conn:
q = "SELECT success, response_time_ms FROM checks WHERE service_id = ?"
args = [service_id]
if from_ts:
q += " AND timestamp >= ?"
args.append(from_ts)
if to_ts:
q += " AND timestamp <= ?"
args.append(to_ts)
q += " ORDER BY timestamp DESC LIMIT 10000"
rows = conn.execute(q, args).fetchall()
if not rows:
return {"total": 0, "uptime_pct": 0, "avg_ms": None, "min_ms": None, "max_ms": None}
total = len(rows)
success_count = sum(1 for r in rows if r["success"])
uptime_pct = (success_count / total) * 100 if total else 0
response_times = [r["response_time_ms"] for r in rows if r["response_time_ms"] is not None]
return {
"total": total,
"uptime_pct": round(uptime_pct, 2),
"avg_ms": round(sum(response_times) / len(response_times), 2) if response_times else None,
"min_ms": min(response_times) if response_times else None,
"max_ms": max(response_times) if response_times else None,
}
def get_history(service_id: int, limit: int = 100, from_ts: str = None, to_ts: str = None):
"""Get check history for charts (JSON), optionally filtered by timestamp range."""
with get_db() as conn:
q = "SELECT timestamp, success, response_time_ms FROM checks WHERE service_id = ?"
args = [service_id]
if from_ts:
q += " AND timestamp >= ?"
args.append(from_ts)
if to_ts:
q += " AND timestamp <= ?"
args.append(to_ts)
q += " ORDER BY timestamp DESC LIMIT ?"
args.append(limit)
rows = conn.execute(q, args).fetchall()
return [{"timestamp": r["timestamp"], "success": bool(r["success"]), "response_time_ms": r["response_time_ms"]} for r in rows]
def delete_service(service_id: int) -> bool:
"""Delete a service and its check history. Returns True if deleted."""
with get_db() as conn:
conn.execute("DELETE FROM checks WHERE service_id = ?", (service_id,))
cur = conn.execute("DELETE FROM services WHERE id = ?", (service_id,))
return cur.rowcount > 0
def get_all_services_for_scheduler():
"""Return all services for the scheduler."""
with get_db() as conn:
rows = conn.execute("SELECT id, target, protocol, interval_seconds FROM services").fetchall()
return [dict(r) for r in rows]
def prune_checks_retention() -> int:
"""
Remove old checks to limit storage. Keeps last CHECK_RETENTION_COUNT per service.
If CHECK_RETENTION_DAYS is set, also deletes checks older than that.
Returns number of rows deleted.
"""
with get_db() as conn:
deleted = 0
# Delete checks older than N days (if configured)
if CHECK_RETENTION_DAYS:
cutoff = (datetime.now(timezone.utc) - timedelta(days=CHECK_RETENTION_DAYS)).isoformat()
cur = conn.execute("DELETE FROM checks WHERE timestamp < ?", (cutoff,))
deleted += cur.rowcount
# Keep only last N checks per service
service_ids = [r[0] for r in conn.execute("SELECT id FROM services").fetchall()]
for sid in service_ids:
# Get ids of checks to keep (most recent N)
keep_ids = conn.execute(
"SELECT id FROM checks WHERE service_id = ? ORDER BY timestamp DESC LIMIT ?",
(sid, CHECK_RETENTION_COUNT),
).fetchall()
keep_ids = [r[0] for r in keep_ids]
if not keep_ids:
continue
placeholders = ",".join("?" * len(keep_ids))
cur = conn.execute(
f"DELETE FROM checks WHERE service_id = ? AND id NOT IN ({placeholders})",
[sid] + keep_ids,
)
deleted += cur.rowcount
return deleted