From 6fca2c9aba13375dd991298d654de9f2c8b70d01 Mon Sep 17 00:00:00 2001 From: Jessey van Offeren Date: Fri, 22 May 2026 09:37:57 +0200 Subject: [PATCH] =?UTF-8?q?feat:=20live=20monitor=20TUI=20(M2)=20=E2=80=94?= =?UTF-8?q?=200.21.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Upgrade `rigdoctor monitor` from a basic redraw to a stdlib curses dashboard (tui.py): current / session-min / session-max per sensor, grouped by subsystem, with temperature & utilization color bands (GPU-lost flagged red). q quits, r resets min/max. Plain full-screen redraw fallback on a non-TTY (--plain forces it). Pure track()/band() helpers are unit-tested; curses path verified in a pty. Completes the Monitoring bundle (M2 + M8). Co-Authored-By: Claude Opus 4.7 (1M context) --- CHANGELOG.md | 8 ++ docs/MODULES.md | 7 +- docs/ROADMAP.md | 3 +- pyproject.toml | 2 +- src/rigdoctor/__init__.py | 2 +- src/rigdoctor/cli.py | 16 ++-- src/rigdoctor/tui.py | 170 ++++++++++++++++++++++++++++++++++++++ tests/test_tui.py | 58 +++++++++++++ 8 files changed, 250 insertions(+), 16 deletions(-) create mode 100644 src/rigdoctor/tui.py create mode 100644 tests/test_tui.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 74be366..44722fa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,14 @@ All notable changes to RigDoctor are recorded here. Format follows (`MAJOR.MINOR.PATCH`, pre-1.0). `__version__` and `pyproject.toml` must match the git release tag (so the auto-updater, D18, can compare versions). +## [0.21.0] - 2026-05-22 +### Added +- **Live monitor TUI (M2).** `rigdoctor monitor` is now a proper **curses** dashboard: + current / session-min / session-max per sensor, grouped by subsystem, with temperature and + utilization **color bands** (and GPU-lost flagged red). `q` quits, `r` resets the session + min/max. Falls back to a plain full-screen redraw on a non-TTY (`--plain` forces it). The + terminal face of the same live data the GUI dashboard graphs. Completes the Monitoring bundle. + ## [0.20.0] - 2026-05-22 ### Changed - **Reorganized navigation** into grouped sidebar sections — **Monitor** (Dashboard) · diff --git a/docs/MODULES.md b/docs/MODULES.md index 59300bd..0258763 100644 --- a/docs/MODULES.md +++ b/docs/MODULES.md @@ -11,7 +11,7 @@ Status: ⬜ not started · 🟦 designing · 🟨 in progress · ✅ done | M1 | Sensor core | Essential | none (nvidia-smi, sysfs) | all (NVIDIA first) | P0 | ✅ | | M3 | Crash-capture logger | Essential | none (opt: smartmontools) | all (NVIDIA first) | P0 | ✅ | | M4 | Health report (log scan) | Essential | none (opt: smartmontools) | all (NVIDIA first) | P0 | ✅ | -| M2 | Live monitor (TUI) | Monitoring | none (stdlib curses) | all | P1 | ⬜ | +| M2 | Live monitor (TUI) | Monitoring | none (stdlib curses) | all | P1 | ✅ | | M8 | Alerting | Monitoring | libnotify (opt) | all | P2 | ✅ | | M5 | System inventory | Diagnostics | none (opt: lm-sensors, dmidecode) | all | P1 | ✅ | | M6 | Gaming env checks | Diagnostics | none | all | P2 | 🟨 | @@ -41,7 +41,10 @@ Status: ⬜ not started · 🟦 designing · 🟨 in progress · ✅ done findings (see SPEC §4). *Implemented:* journalctl scan (Xid/panic/OOM/MCE/AER/thermal/amdgpu), SMART, NVIDIA driver-mismatch, journald-persistence + live-temp checks; `rigdoctor report` (text/JSON) + GUI Health tab. GPU-firmware verification deferred. -- **M2 Live monitor** — depends on M1; the terminal "HWMonitor for Linux" face. Stdlib-only. +- **M2 Live monitor** — the terminal "HWMonitor for Linux" face. *Implemented (`tui.py`):* + `rigdoctor monitor` is a stdlib **curses** dashboard — current / session-min / session-max + per sensor, grouped by subsystem, with temperature & utilization color bands; `q` quits, + `r` resets the min/max. Falls back to a plain redraw on a non-TTY (`--plain` forces it). - **M5 / M6 Diagnostics** — inventory export + gaming-env checks; M6 flags risky settings and suggests the fix command but does not apply it (D9). *M6 implemented (Steam detection first — the D12 "pick a game" foundation):* discovers Steam installs + all library folders diff --git a/docs/ROADMAP.md b/docs/ROADMAP.md index 7c7eaa7..d2cf577 100644 --- a/docs/ROADMAP.md +++ b/docs/ROADMAP.md @@ -22,7 +22,8 @@ Ubuntu + NVIDIA first; `.deb` distribution (see `DECISIONS.md`). last readings + a plausible cause. ## Phase 2 — Live monitor (terminal) -- [ ] M2 TUI dashboard (current/min/max, grouped, throttle highlighting) +- [x] M2 TUI dashboard (`rigdoctor monitor`, `tui.py`): curses, current/min/max grouped by + subsystem with temp/usage color bands; q quit / r reset; plain-redraw fallback on non-TTY - [ ] M8 basic alerting (overheat/throttle/GPU-lost notifications) ## Phase 3 — Diagnostics breadth diff --git a/pyproject.toml b/pyproject.toml index a9574f6..3e06d24 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "rigdoctor" -version = "0.20.0" +version = "0.21.0" description = "Modular hardware monitoring & crash diagnostics for Linux gamers." readme = "README.md" requires-python = ">=3.11" diff --git a/src/rigdoctor/__init__.py b/src/rigdoctor/__init__.py index 3aaf6a4..85f61a2 100644 --- a/src/rigdoctor/__init__.py +++ b/src/rigdoctor/__init__.py @@ -1,3 +1,3 @@ """RigDoctor — modular hardware monitoring & crash diagnostics for Linux gamers.""" -__version__ = "0.20.0" +__version__ = "0.21.0" diff --git a/src/rigdoctor/cli.py b/src/rigdoctor/cli.py index a92c460..1f0205d 100644 --- a/src/rigdoctor/cli.py +++ b/src/rigdoctor/cli.py @@ -44,17 +44,10 @@ def cmd_snapshot(args) -> int: def cmd_monitor(args) -> int: + from .tui import run + interval = args.interval or load_config()["interval"] - try: - for sample in _sampler().stream(interval=interval): - # Basic full-screen redraw; the rich TUI (M2) comes later. - print("\033[2J\033[H", end="") - print(f"RigDoctor — live (every {interval:g}s, Ctrl-C to quit)\n") - print(render_snapshot(sample)) - sys.stdout.flush() - except KeyboardInterrupt: - print() - return 0 + return run(interval, plain=getattr(args, "plain", False)) def cmd_gui(args) -> int: @@ -516,8 +509,9 @@ def build_parser() -> argparse.ArgumentParser: sp.add_argument("--json", action="store_true", help="output JSON instead of text") sp.set_defaults(func=cmd_snapshot) - mp = sub.add_parser("monitor", help="live-refreshing sensor view") + mp = sub.add_parser("monitor", help="live monitor TUI (current/min/max, M2)") mp.add_argument("-n", "--interval", type=float, default=None, help="refresh interval (s)") + mp.add_argument("--plain", action="store_true", help="plain redraw instead of the curses UI") mp.set_defaults(func=cmd_monitor) sub.add_parser("gui", help="launch the desktop GUI (needs PySide6)").set_defaults(func=cmd_gui) diff --git a/src/rigdoctor/tui.py b/src/rigdoctor/tui.py new file mode 100644 index 0000000..0b60756 --- /dev/null +++ b/src/rigdoctor/tui.py @@ -0,0 +1,170 @@ +"""Live monitor TUI (M2): a curses HWMonitor-style terminal dashboard. + +Shows current / session-min / session-max per sensor, grouped by subsystem, with +temperature and utilization color bands. stdlib `curses` only; falls back to a plain +full-screen redraw when stdout isn't a TTY (piped/SSH-without-tty). Keys: q quit, r reset +the session min/max. The terminal face of the same live data the GUI dashboard graphs. +""" + +from __future__ import annotations + +import curses +import sys +import time + +from .core.sample import Reading, Sample +from .core.sampler import Sampler +from .core.sources import available_sources +from .render import _GROUP_ORDER, _GROUP_TITLES, format_raw, metric_label, render_snapshot + +# Color-band thresholds (mirror the GUI dashboard so both faces agree). +TEMP_COLD, TEMP_WARN, TEMP_CRIT = 50.0, 78.0, 88.0 +USAGE_WARN, USAGE_CRIT = 85.0, 95.0 +_USAGE_METRICS = {"util", "used_pct", "mem_util", "load"} + + +def band(r: Reading) -> str: + """Color band for a reading: cold | good | warn | crit | normal | na.""" + if r.source == "gpu" and r.metric == "status": # GPU-lost / query timeout + return "crit" + if r.value is None: + return "na" + if r.unit == "°C": + if r.value >= TEMP_CRIT: + return "crit" + if r.value >= TEMP_WARN: + return "warn" + if r.value >= TEMP_COLD: + return "good" + return "cold" + if r.unit == "%" and r.metric in _USAGE_METRICS: + if r.value >= USAGE_CRIT: + return "crit" + if r.value >= USAGE_WARN: + return "warn" + return "good" + return "normal" + + +def track(stats: dict[str, tuple[float, float]], sample: Sample) -> None: + """Fold a sample's readings into {key: (min, max)} session extremes.""" + for r in sample.readings: + if r.value is None: + continue + lo, hi = stats.get(r.key, (r.value, r.value)) + stats[r.key] = (min(lo, r.value), max(hi, r.value)) + + +# --- curses front-end ----------------------------------------------------------------- + +_BAND_PAIR = {"cold": 1, "good": 2, "warn": 3, "crit": 4} + + +def _init_colors() -> None: + try: + curses.start_color() + curses.use_default_colors() + curses.init_pair(1, curses.COLOR_CYAN, -1) + curses.init_pair(2, curses.COLOR_GREEN, -1) + curses.init_pair(3, curses.COLOR_YELLOW, -1) + curses.init_pair(4, curses.COLOR_RED, -1) + except curses.error: + pass + + +def _attr(band_name: str) -> int: + pair = _BAND_PAIR.get(band_name) + if not pair: + return curses.A_NORMAL + attr = curses.color_pair(pair) + return attr | curses.A_BOLD if band_name == "crit" else attr + + +def _draw(stdscr, sample: Sample, stats: dict, interval: float) -> None: + stdscr.erase() + height, width = stdscr.getmaxyx() + + def put(y: int, x: int, text: str, attr: int = curses.A_NORMAL) -> None: + if 0 <= y < height and 0 <= x < width: + try: + stdscr.addnstr(y, x, text, max(0, width - x - 1), attr) + except curses.error: + pass + + put(0, 0, f"RigDoctor — live monitor every {interval:g}s", curses.A_BOLD) + put(1, 0, "q quit r reset min/max", curses.A_DIM) + + groups = sample.by_source() + order = [k for k in _GROUP_ORDER if k in groups] + [k for k in groups if k not in _GROUP_ORDER] + name_w, col_w = 24, 11 + y = 3 + for key in order: + if y >= height: + break + put(y, 0, _GROUP_TITLES.get(key, key.title()), curses.A_BOLD) + y += 1 + put(y, 2, f"{'sensor':<{name_w}}{'current':>{col_w}}{'min':>{col_w}}{'max':>{col_w}}", curses.A_DIM) + y += 1 + for r in groups[key]: + if y >= height: + break + if r.metric == "name": # device identity line + put(y, 2, str(r.label), curses.A_DIM) + y += 1 + continue + lo, hi = stats.get(r.key, (r.value, r.value)) + put(y, 2, f"{metric_label(r):<{name_w}}") + put(y, 2 + name_w, f"{format_raw(r.value, r.unit):>{col_w}}", _attr(band(r))) + put(y, 2 + name_w + col_w, f"{format_raw(lo, r.unit):>{col_w}}", curses.A_DIM) + put(y, 2 + name_w + 2 * col_w, f"{format_raw(hi, r.unit):>{col_w}}", curses.A_DIM) + y += 1 + y += 1 + stdscr.refresh() + + +def _loop(stdscr, sampler: Sampler, interval: float) -> None: + curses.curs_set(0) + stdscr.nodelay(True) + _init_colors() + stats: dict[str, tuple[float, float]] = {} + latest = sampler.sample() + track(stats, latest) + next_sample = time.monotonic() + interval + while True: + ch = stdscr.getch() + if ch in (ord("q"), ord("Q")): + return + if ch in (ord("r"), ord("R")): + stats.clear() + track(stats, latest) + now = time.monotonic() + if now >= next_sample: + latest = sampler.sample() + track(stats, latest) + next_sample = now + interval + _draw(stdscr, latest, stats, interval) + time.sleep(0.05) # keep key handling responsive without busy-spinning + + +def _run_plain(sampler: Sampler, interval: float) -> int: + """Fallback for non-TTY output: clear + reprint each tick (no curses).""" + try: + for sample in sampler.stream(interval=interval): + print("\033[2J\033[H", end="") + print(f"RigDoctor — live (every {interval:g}s, Ctrl-C to quit)\n") + print(render_snapshot(sample)) + sys.stdout.flush() + except KeyboardInterrupt: + print() + return 0 + + +def run(interval: float, plain: bool = False) -> int: + sampler = Sampler(available_sources()) + if plain or not sys.stdout.isatty(): + return _run_plain(sampler, interval) + try: + curses.wrapper(_loop, sampler, interval) + except curses.error: # terminal can't do curses — degrade gracefully + return _run_plain(sampler, interval) + return 0 diff --git a/tests/test_tui.py b/tests/test_tui.py new file mode 100644 index 0000000..62f4259 --- /dev/null +++ b/tests/test_tui.py @@ -0,0 +1,58 @@ +"""Tests for the M2 live-monitor TUI logic (min/max tracking + color bands).""" + +import unittest + +from rigdoctor import tui +from rigdoctor.core.sample import Reading, Sample + + +def _temp(v): + return Reading("gpu", "temp", v, "°C", "") + + +class TrackTests(unittest.TestCase): + def test_tracks_min_and_max(self): + stats: dict = {} + for v in (60.0, 80.0, 70.0, 55.0): + tui.track(stats, Sample(0.0, [_temp(v)])) + self.assertEqual(stats["gpu.temp"], (55.0, 80.0)) + + def test_ignores_none_values(self): + stats: dict = {} + tui.track(stats, Sample(0.0, [_temp(None)])) + self.assertEqual(stats, {}) + + def test_keys_separate_by_label(self): + stats: dict = {} + tui.track(stats, Sample(0.0, [ + Reading("cpu", "temp", 50.0, "°C", "Core 0"), + Reading("cpu", "temp", 70.0, "°C", "Core 1"), + ])) + self.assertEqual(stats["cpu.temp.Core 0"], (50.0, 50.0)) + self.assertEqual(stats["cpu.temp.Core 1"], (70.0, 70.0)) + + +class BandTests(unittest.TestCase): + def test_temperature_bands(self): + self.assertEqual(tui.band(_temp(40.0)), "cold") + self.assertEqual(tui.band(_temp(60.0)), "good") + self.assertEqual(tui.band(_temp(80.0)), "warn") + self.assertEqual(tui.band(_temp(90.0)), "crit") + + def test_usage_bands(self): + self.assertEqual(tui.band(Reading("gpu", "util", 50.0, "%")), "good") + self.assertEqual(tui.band(Reading("gpu", "util", 88.0, "%")), "warn") + self.assertEqual(tui.band(Reading("memory", "used_pct", 96.0, "%")), "crit") + + def test_non_metric_percentage_is_normal(self): + self.assertEqual(tui.band(Reading("gpu", "fan", 100.0, "%")), "normal") + + def test_gpu_lost_is_crit(self): + self.assertEqual(tui.band(Reading("gpu", "status", None, "", "query-timeout")), "crit") + + def test_missing_value_is_na(self): + self.assertEqual(tui.band(Reading("gpu", "power", None, "W")), "na") + + +if __name__ == "__main__": + unittest.main()