Merge pull request 'feat: live monitor TUI (M2) — 0.21.0' (#17) from feat/m11-tray into main

Reviewed-on: #17
2026-05-22 07:38:17 +00:00
parent 4c5a6547ec 6fca2c9aba
commit 51b7ed69bd
8 changed files with 250 additions and 16 deletions
@@ -5,6 +5,14 @@ All notable changes to RigDoctor are recorded here. Format follows
 (`MAJOR.MINOR.PATCH`, pre-1.0). `__version__` and `pyproject.toml` must match the git
 release tag (so the auto-updater, D18, can compare versions).
 ## [0.21.0] - 2026-05-22
 ### Added
 - **Live monitor TUI (M2).** `rigdoctor monitor` is now a proper **curses** dashboard:
  current / session-min / session-max per sensor, grouped by subsystem, with temperature and
  utilization **color bands** (and GPU-lost flagged red). `q` quits, `r` resets the session
  min/max. Falls back to a plain full-screen redraw on a non-TTY (`--plain` forces it). The
  terminal face of the same live data the GUI dashboard graphs. Completes the Monitoring bundle.
 ## [0.20.0] - 2026-05-22
 ### Changed
 - **Reorganized navigation** into grouped sidebar sections — **Monitor** (Dashboard) ·
@@ -11,7 +11,7 @@ Status: ⬜ not started · 🟦 designing · 🟨 in progress · ✅ done
 | M1 | Sensor core | Essential | none (nvidia-smi, sysfs) | all (NVIDIA first) | P0 | ✅ |
 | M3 | Crash-capture logger | Essential | none (opt: smartmontools) | all (NVIDIA first) | P0 | ✅ |
 | M4 | Health report (log scan) | Essential | none (opt: smartmontools) | all (NVIDIA first) | P0 | ✅ |
-| M2 | Live monitor (TUI) | Monitoring | none (stdlib curses) | all | P1 | ⬜ |
+| M2 | Live monitor (TUI) | Monitoring | none (stdlib curses) | all | P1 | ✅ |
 | M8 | Alerting | Monitoring | libnotify (opt) | all | P2 | ✅ |
 | M5 | System inventory | Diagnostics | none (opt: lm-sensors, dmidecode) | all | P1 | ✅ |
 | M6 | Gaming env checks | Diagnostics | none | all | P2 | 🟨 |
@@ -41,7 +41,10 @@ Status: ⬜ not started · 🟦 designing · 🟨 in progress · ✅ done
  findings (see SPEC §4). *Implemented:* journalctl scan (Xid/panic/OOM/MCE/AER/thermal/amdgpu),
  SMART, NVIDIA driver-mismatch, journald-persistence + live-temp checks; `rigdoctor report`
  (text/JSON) + GUI Health tab. GPU-firmware verification deferred.
- **M2 Live monitor** — depends on M1; the terminal "HWMonitor for Linux" face. Stdlib-only.
+- **M2 Live monitor** — the terminal "HWMonitor for Linux" face. *Implemented (`tui.py`):*
  `rigdoctor monitor` is a stdlib **curses** dashboard — current / session-min / session-max
  per sensor, grouped by subsystem, with temperature & utilization color bands; `q` quits,
  `r` resets the min/max. Falls back to a plain redraw on a non-TTY (`--plain` forces it).
 - **M5 / M6 Diagnostics** — inventory export + gaming-env checks; M6 flags risky settings and
  suggests the fix command but does not apply it (D9). *M6 implemented (Steam detection first —
  the D12 "pick a game" foundation):* discovers Steam installs + all library folders
@@ -22,7 +22,8 @@ Ubuntu + NVIDIA first; `.deb` distribution (see `DECISIONS.md`).
  last readings + a plausible cause.
 ## Phase 2 — Live monitor (terminal)
- [ ] M2 TUI dashboard (current/min/max, grouped, throttle highlighting)
+- [x] M2 TUI dashboard (`rigdoctor monitor`, `tui.py`): curses, current/min/max grouped by
      subsystem with temp/usage color bands; q quit / r reset; plain-redraw fallback on non-TTY
 - [ ] M8 basic alerting (overheat/throttle/GPU-lost notifications)
 ## Phase 3 — Diagnostics breadth
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "rigdoctor"
-version = "0.20.0"
+version = "0.21.0"
 description = "Modular hardware monitoring & crash diagnostics for Linux gamers."
 readme = "README.md"
 requires-python = ">=3.11"
@@ -1,3 +1,3 @@
 """RigDoctor — modular hardware monitoring & crash diagnostics for Linux gamers."""
-__version__ = "0.20.0"
+__version__ = "0.21.0"
@@ -44,17 +44,10 @@ def cmd_snapshot(args) -> int:
 def cmd_monitor(args) -> int:
    from .tui import run
    interval = args.interval or load_config()["interval"]
-    try:
+    return run(interval, plain=getattr(args, "plain", False))
        for sample in _sampler().stream(interval=interval):
            # Basic full-screen redraw; the rich TUI (M2) comes later.
            print("\033[2J\033[H", end="")
            print(f"RigDoctor — live  (every {interval:g}s, Ctrl-C to quit)\n")
            print(render_snapshot(sample))
            sys.stdout.flush()
    except KeyboardInterrupt:
        print()
    return 0
 def cmd_gui(args) -> int:
@@ -516,8 +509,9 @@ def build_parser() -> argparse.ArgumentParser:
    sp.add_argument("--json", action="store_true", help="output JSON instead of text")
    sp.set_defaults(func=cmd_snapshot)
-    mp = sub.add_parser("monitor", help="live-refreshing sensor view")
+    mp = sub.add_parser("monitor", help="live monitor TUI (current/min/max, M2)")
    mp.add_argument("-n", "--interval", type=float, default=None, help="refresh interval (s)")
    mp.add_argument("--plain", action="store_true", help="plain redraw instead of the curses UI")
    mp.set_defaults(func=cmd_monitor)
    sub.add_parser("gui", help="launch the desktop GUI (needs PySide6)").set_defaults(func=cmd_gui)
@@ -0,0 +1,170 @@
 """Live monitor TUI (M2): a curses HWMonitor-style terminal dashboard.
 Shows current / session-min / session-max per sensor, grouped by subsystem, with
 temperature and utilization color bands. stdlib `curses` only; falls back to a plain
 full-screen redraw when stdout isn't a TTY (piped/SSH-without-tty). Keys: q quit, r reset
 the session min/max. The terminal face of the same live data the GUI dashboard graphs.
 """
 from __future__ import annotations
 import curses
 import sys
 import time
 from .core.sample import Reading, Sample
 from .core.sampler import Sampler
 from .core.sources import available_sources
 from .render import _GROUP_ORDER, _GROUP_TITLES, format_raw, metric_label, render_snapshot
 # Color-band thresholds (mirror the GUI dashboard so both faces agree).
 TEMP_COLD, TEMP_WARN, TEMP_CRIT = 50.0, 78.0, 88.0
 USAGE_WARN, USAGE_CRIT = 85.0, 95.0
 _USAGE_METRICS = {"util", "used_pct", "mem_util", "load"}
 def band(r: Reading) -> str:
    """Color band for a reading: cold | good | warn | crit | normal | na."""
    if r.source == "gpu" and r.metric == "status":  # GPU-lost / query timeout
        return "crit"
    if r.value is None:
        return "na"
    if r.unit == "°C":
        if r.value >= TEMP_CRIT:
            return "crit"
        if r.value >= TEMP_WARN:
            return "warn"
        if r.value >= TEMP_COLD:
            return "good"
        return "cold"
    if r.unit == "%" and r.metric in _USAGE_METRICS:
        if r.value >= USAGE_CRIT:
            return "crit"
        if r.value >= USAGE_WARN:
            return "warn"
        return "good"
    return "normal"
 def track(stats: dict[str, tuple[float, float]], sample: Sample) -> None:
    """Fold a sample's readings into {key: (min, max)} session extremes."""
    for r in sample.readings:
        if r.value is None:
            continue
        lo, hi = stats.get(r.key, (r.value, r.value))
        stats[r.key] = (min(lo, r.value), max(hi, r.value))
 # --- curses front-end -----------------------------------------------------------------
 _BAND_PAIR = {"cold": 1, "good": 2, "warn": 3, "crit": 4}
 def _init_colors() -> None:
    try:
        curses.start_color()
        curses.use_default_colors()
        curses.init_pair(1, curses.COLOR_CYAN, -1)
        curses.init_pair(2, curses.COLOR_GREEN, -1)
        curses.init_pair(3, curses.COLOR_YELLOW, -1)
        curses.init_pair(4, curses.COLOR_RED, -1)
    except curses.error:
        pass
 def _attr(band_name: str) -> int:
    pair = _BAND_PAIR.get(band_name)
    if not pair:
        return curses.A_NORMAL
    attr = curses.color_pair(pair)
    return attr | curses.A_BOLD if band_name == "crit" else attr
 def _draw(stdscr, sample: Sample, stats: dict, interval: float) -> None:
    stdscr.erase()
    height, width = stdscr.getmaxyx()
    def put(y: int, x: int, text: str, attr: int = curses.A_NORMAL) -> None:
        if 0 <= y < height and 0 <= x < width:
            try:
                stdscr.addnstr(y, x, text, max(0, width - x - 1), attr)
            except curses.error:
                pass
    put(0, 0, f"RigDoctor — live monitor   every {interval:g}s", curses.A_BOLD)
    put(1, 0, "q quit    r reset min/max", curses.A_DIM)
    groups = sample.by_source()
    order = [k for k in _GROUP_ORDER if k in groups] + [k for k in groups if k not in _GROUP_ORDER]
    name_w, col_w = 24, 11
    y = 3
    for key in order:
        if y >= height:
            break
        put(y, 0, _GROUP_TITLES.get(key, key.title()), curses.A_BOLD)
        y += 1
        put(y, 2, f"{'sensor':<{name_w}}{'current':>{col_w}}{'min':>{col_w}}{'max':>{col_w}}", curses.A_DIM)
        y += 1
        for r in groups[key]:
            if y >= height:
                break
            if r.metric == "name":  # device identity line
                put(y, 2, str(r.label), curses.A_DIM)
                y += 1
                continue
            lo, hi = stats.get(r.key, (r.value, r.value))
            put(y, 2, f"{metric_label(r):<{name_w}}")
            put(y, 2 + name_w, f"{format_raw(r.value, r.unit):>{col_w}}", _attr(band(r)))
            put(y, 2 + name_w + col_w, f"{format_raw(lo, r.unit):>{col_w}}", curses.A_DIM)
            put(y, 2 + name_w + 2 * col_w, f"{format_raw(hi, r.unit):>{col_w}}", curses.A_DIM)
            y += 1
        y += 1
    stdscr.refresh()
 def _loop(stdscr, sampler: Sampler, interval: float) -> None:
    curses.curs_set(0)
    stdscr.nodelay(True)
    _init_colors()
    stats: dict[str, tuple[float, float]] = {}
    latest = sampler.sample()
    track(stats, latest)
    next_sample = time.monotonic() + interval
    while True:
        ch = stdscr.getch()
        if ch in (ord("q"), ord("Q")):
            return
        if ch in (ord("r"), ord("R")):
            stats.clear()
            track(stats, latest)
        now = time.monotonic()
        if now >= next_sample:
            latest = sampler.sample()
            track(stats, latest)
            next_sample = now + interval
        _draw(stdscr, latest, stats, interval)
        time.sleep(0.05)  # keep key handling responsive without busy-spinning
 def _run_plain(sampler: Sampler, interval: float) -> int:
    """Fallback for non-TTY output: clear + reprint each tick (no curses)."""
    try:
        for sample in sampler.stream(interval=interval):
            print("\033[2J\033[H", end="")
            print(f"RigDoctor — live  (every {interval:g}s, Ctrl-C to quit)\n")
            print(render_snapshot(sample))
            sys.stdout.flush()
    except KeyboardInterrupt:
        print()
    return 0
 def run(interval: float, plain: bool = False) -> int:
    sampler = Sampler(available_sources())
    if plain or not sys.stdout.isatty():
        return _run_plain(sampler, interval)
    try:
        curses.wrapper(_loop, sampler, interval)
    except curses.error:  # terminal can't do curses — degrade gracefully
        return _run_plain(sampler, interval)
    return 0
@@ -0,0 +1,58 @@
 """Tests for the M2 live-monitor TUI logic (min/max tracking + color bands)."""
 import unittest
 from rigdoctor import tui
 from rigdoctor.core.sample import Reading, Sample
 def _temp(v):
    return Reading("gpu", "temp", v, "°C", "")
 class TrackTests(unittest.TestCase):
    def test_tracks_min_and_max(self):
        stats: dict = {}
        for v in (60.0, 80.0, 70.0, 55.0):
            tui.track(stats, Sample(0.0, [_temp(v)]))
        self.assertEqual(stats["gpu.temp"], (55.0, 80.0))
    def test_ignores_none_values(self):
        stats: dict = {}
        tui.track(stats, Sample(0.0, [_temp(None)]))
        self.assertEqual(stats, {})
    def test_keys_separate_by_label(self):
        stats: dict = {}
        tui.track(stats, Sample(0.0, [
            Reading("cpu", "temp", 50.0, "°C", "Core 0"),
            Reading("cpu", "temp", 70.0, "°C", "Core 1"),
        ]))
        self.assertEqual(stats["cpu.temp.Core 0"], (50.0, 50.0))
        self.assertEqual(stats["cpu.temp.Core 1"], (70.0, 70.0))
 class BandTests(unittest.TestCase):
    def test_temperature_bands(self):
        self.assertEqual(tui.band(_temp(40.0)), "cold")
        self.assertEqual(tui.band(_temp(60.0)), "good")
        self.assertEqual(tui.band(_temp(80.0)), "warn")
        self.assertEqual(tui.band(_temp(90.0)), "crit")
    def test_usage_bands(self):
        self.assertEqual(tui.band(Reading("gpu", "util", 50.0, "%")), "good")
        self.assertEqual(tui.band(Reading("gpu", "util", 88.0, "%")), "warn")
        self.assertEqual(tui.band(Reading("memory", "used_pct", 96.0, "%")), "crit")
    def test_non_metric_percentage_is_normal(self):
        self.assertEqual(tui.band(Reading("gpu", "fan", 100.0, "%")), "normal")
    def test_gpu_lost_is_crit(self):
        self.assertEqual(tui.band(Reading("gpu", "status", None, "", "query-timeout")), "crit")
    def test_missing_value_is_na(self):
        self.assertEqual(tui.band(Reading("gpu", "power", None, "W")), "na")
 if __name__ == "__main__":
    unittest.main()
`@@ -1,3 +1,3 @@`
	`"""RigDoctor — modular hardware monitoring & crash diagnostics for Linux gamers."""`	`"""RigDoctor — modular hardware monitoring & crash diagnostics for Linux gamers."""`

	`__version__ = "0.20.0"`	`__version__ = "0.21.0"`