Compare commits
2 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 51b7ed69bd | |||
| 6fca2c9aba |
@@ -5,6 +5,14 @@ All notable changes to RigDoctor are recorded here. Format follows
|
||||
(`MAJOR.MINOR.PATCH`, pre-1.0). `__version__` and `pyproject.toml` must match the git
|
||||
release tag (so the auto-updater, D18, can compare versions).
|
||||
|
||||
## [0.21.0] - 2026-05-22
|
||||
### Added
|
||||
- **Live monitor TUI (M2).** `rigdoctor monitor` is now a proper **curses** dashboard:
|
||||
current / session-min / session-max per sensor, grouped by subsystem, with temperature and
|
||||
utilization **color bands** (and GPU-lost flagged red). `q` quits, `r` resets the session
|
||||
min/max. Falls back to a plain full-screen redraw on a non-TTY (`--plain` forces it). The
|
||||
terminal face of the same live data the GUI dashboard graphs. Completes the Monitoring bundle.
|
||||
|
||||
## [0.20.0] - 2026-05-22
|
||||
### Changed
|
||||
- **Reorganized navigation** into grouped sidebar sections — **Monitor** (Dashboard) ·
|
||||
|
||||
+5
-2
@@ -11,7 +11,7 @@ Status: ⬜ not started · 🟦 designing · 🟨 in progress · ✅ done
|
||||
| M1 | Sensor core | Essential | none (nvidia-smi, sysfs) | all (NVIDIA first) | P0 | ✅ |
|
||||
| M3 | Crash-capture logger | Essential | none (opt: smartmontools) | all (NVIDIA first) | P0 | ✅ |
|
||||
| M4 | Health report (log scan) | Essential | none (opt: smartmontools) | all (NVIDIA first) | P0 | ✅ |
|
||||
| M2 | Live monitor (TUI) | Monitoring | none (stdlib curses) | all | P1 | ⬜ |
|
||||
| M2 | Live monitor (TUI) | Monitoring | none (stdlib curses) | all | P1 | ✅ |
|
||||
| M8 | Alerting | Monitoring | libnotify (opt) | all | P2 | ✅ |
|
||||
| M5 | System inventory | Diagnostics | none (opt: lm-sensors, dmidecode) | all | P1 | ✅ |
|
||||
| M6 | Gaming env checks | Diagnostics | none | all | P2 | 🟨 |
|
||||
@@ -41,7 +41,10 @@ Status: ⬜ not started · 🟦 designing · 🟨 in progress · ✅ done
|
||||
findings (see SPEC §4). *Implemented:* journalctl scan (Xid/panic/OOM/MCE/AER/thermal/amdgpu),
|
||||
SMART, NVIDIA driver-mismatch, journald-persistence + live-temp checks; `rigdoctor report`
|
||||
(text/JSON) + GUI Health tab. GPU-firmware verification deferred.
|
||||
- **M2 Live monitor** — depends on M1; the terminal "HWMonitor for Linux" face. Stdlib-only.
|
||||
- **M2 Live monitor** — the terminal "HWMonitor for Linux" face. *Implemented (`tui.py`):*
|
||||
`rigdoctor monitor` is a stdlib **curses** dashboard — current / session-min / session-max
|
||||
per sensor, grouped by subsystem, with temperature & utilization color bands; `q` quits,
|
||||
`r` resets the min/max. Falls back to a plain redraw on a non-TTY (`--plain` forces it).
|
||||
- **M5 / M6 Diagnostics** — inventory export + gaming-env checks; M6 flags risky settings and
|
||||
suggests the fix command but does not apply it (D9). *M6 implemented (Steam detection first —
|
||||
the D12 "pick a game" foundation):* discovers Steam installs + all library folders
|
||||
|
||||
+2
-1
@@ -22,7 +22,8 @@ Ubuntu + NVIDIA first; `.deb` distribution (see `DECISIONS.md`).
|
||||
last readings + a plausible cause.
|
||||
|
||||
## Phase 2 — Live monitor (terminal)
|
||||
- [ ] M2 TUI dashboard (current/min/max, grouped, throttle highlighting)
|
||||
- [x] M2 TUI dashboard (`rigdoctor monitor`, `tui.py`): curses, current/min/max grouped by
|
||||
subsystem with temp/usage color bands; q quit / r reset; plain-redraw fallback on non-TTY
|
||||
- [ ] M8 basic alerting (overheat/throttle/GPU-lost notifications)
|
||||
|
||||
## Phase 3 — Diagnostics breadth
|
||||
|
||||
+1
-1
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
||||
|
||||
[project]
|
||||
name = "rigdoctor"
|
||||
version = "0.20.0"
|
||||
version = "0.21.0"
|
||||
description = "Modular hardware monitoring & crash diagnostics for Linux gamers."
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.11"
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
"""RigDoctor — modular hardware monitoring & crash diagnostics for Linux gamers."""
|
||||
|
||||
__version__ = "0.20.0"
|
||||
__version__ = "0.21.0"
|
||||
|
||||
+5
-11
@@ -44,17 +44,10 @@ def cmd_snapshot(args) -> int:
|
||||
|
||||
|
||||
def cmd_monitor(args) -> int:
|
||||
from .tui import run
|
||||
|
||||
interval = args.interval or load_config()["interval"]
|
||||
try:
|
||||
for sample in _sampler().stream(interval=interval):
|
||||
# Basic full-screen redraw; the rich TUI (M2) comes later.
|
||||
print("\033[2J\033[H", end="")
|
||||
print(f"RigDoctor — live (every {interval:g}s, Ctrl-C to quit)\n")
|
||||
print(render_snapshot(sample))
|
||||
sys.stdout.flush()
|
||||
except KeyboardInterrupt:
|
||||
print()
|
||||
return 0
|
||||
return run(interval, plain=getattr(args, "plain", False))
|
||||
|
||||
|
||||
def cmd_gui(args) -> int:
|
||||
@@ -516,8 +509,9 @@ def build_parser() -> argparse.ArgumentParser:
|
||||
sp.add_argument("--json", action="store_true", help="output JSON instead of text")
|
||||
sp.set_defaults(func=cmd_snapshot)
|
||||
|
||||
mp = sub.add_parser("monitor", help="live-refreshing sensor view")
|
||||
mp = sub.add_parser("monitor", help="live monitor TUI (current/min/max, M2)")
|
||||
mp.add_argument("-n", "--interval", type=float, default=None, help="refresh interval (s)")
|
||||
mp.add_argument("--plain", action="store_true", help="plain redraw instead of the curses UI")
|
||||
mp.set_defaults(func=cmd_monitor)
|
||||
|
||||
sub.add_parser("gui", help="launch the desktop GUI (needs PySide6)").set_defaults(func=cmd_gui)
|
||||
|
||||
@@ -0,0 +1,170 @@
|
||||
"""Live monitor TUI (M2): a curses HWMonitor-style terminal dashboard.
|
||||
|
||||
Shows current / session-min / session-max per sensor, grouped by subsystem, with
|
||||
temperature and utilization color bands. stdlib `curses` only; falls back to a plain
|
||||
full-screen redraw when stdout isn't a TTY (piped/SSH-without-tty). Keys: q quit, r reset
|
||||
the session min/max. The terminal face of the same live data the GUI dashboard graphs.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import curses
|
||||
import sys
|
||||
import time
|
||||
|
||||
from .core.sample import Reading, Sample
|
||||
from .core.sampler import Sampler
|
||||
from .core.sources import available_sources
|
||||
from .render import _GROUP_ORDER, _GROUP_TITLES, format_raw, metric_label, render_snapshot
|
||||
|
||||
# Color-band thresholds (mirror the GUI dashboard so both faces agree).
|
||||
TEMP_COLD, TEMP_WARN, TEMP_CRIT = 50.0, 78.0, 88.0
|
||||
USAGE_WARN, USAGE_CRIT = 85.0, 95.0
|
||||
_USAGE_METRICS = {"util", "used_pct", "mem_util", "load"}
|
||||
|
||||
|
||||
def band(r: Reading) -> str:
|
||||
"""Color band for a reading: cold | good | warn | crit | normal | na."""
|
||||
if r.source == "gpu" and r.metric == "status": # GPU-lost / query timeout
|
||||
return "crit"
|
||||
if r.value is None:
|
||||
return "na"
|
||||
if r.unit == "°C":
|
||||
if r.value >= TEMP_CRIT:
|
||||
return "crit"
|
||||
if r.value >= TEMP_WARN:
|
||||
return "warn"
|
||||
if r.value >= TEMP_COLD:
|
||||
return "good"
|
||||
return "cold"
|
||||
if r.unit == "%" and r.metric in _USAGE_METRICS:
|
||||
if r.value >= USAGE_CRIT:
|
||||
return "crit"
|
||||
if r.value >= USAGE_WARN:
|
||||
return "warn"
|
||||
return "good"
|
||||
return "normal"
|
||||
|
||||
|
||||
def track(stats: dict[str, tuple[float, float]], sample: Sample) -> None:
|
||||
"""Fold a sample's readings into {key: (min, max)} session extremes."""
|
||||
for r in sample.readings:
|
||||
if r.value is None:
|
||||
continue
|
||||
lo, hi = stats.get(r.key, (r.value, r.value))
|
||||
stats[r.key] = (min(lo, r.value), max(hi, r.value))
|
||||
|
||||
|
||||
# --- curses front-end -----------------------------------------------------------------
|
||||
|
||||
_BAND_PAIR = {"cold": 1, "good": 2, "warn": 3, "crit": 4}
|
||||
|
||||
|
||||
def _init_colors() -> None:
|
||||
try:
|
||||
curses.start_color()
|
||||
curses.use_default_colors()
|
||||
curses.init_pair(1, curses.COLOR_CYAN, -1)
|
||||
curses.init_pair(2, curses.COLOR_GREEN, -1)
|
||||
curses.init_pair(3, curses.COLOR_YELLOW, -1)
|
||||
curses.init_pair(4, curses.COLOR_RED, -1)
|
||||
except curses.error:
|
||||
pass
|
||||
|
||||
|
||||
def _attr(band_name: str) -> int:
|
||||
pair = _BAND_PAIR.get(band_name)
|
||||
if not pair:
|
||||
return curses.A_NORMAL
|
||||
attr = curses.color_pair(pair)
|
||||
return attr | curses.A_BOLD if band_name == "crit" else attr
|
||||
|
||||
|
||||
def _draw(stdscr, sample: Sample, stats: dict, interval: float) -> None:
|
||||
stdscr.erase()
|
||||
height, width = stdscr.getmaxyx()
|
||||
|
||||
def put(y: int, x: int, text: str, attr: int = curses.A_NORMAL) -> None:
|
||||
if 0 <= y < height and 0 <= x < width:
|
||||
try:
|
||||
stdscr.addnstr(y, x, text, max(0, width - x - 1), attr)
|
||||
except curses.error:
|
||||
pass
|
||||
|
||||
put(0, 0, f"RigDoctor — live monitor every {interval:g}s", curses.A_BOLD)
|
||||
put(1, 0, "q quit r reset min/max", curses.A_DIM)
|
||||
|
||||
groups = sample.by_source()
|
||||
order = [k for k in _GROUP_ORDER if k in groups] + [k for k in groups if k not in _GROUP_ORDER]
|
||||
name_w, col_w = 24, 11
|
||||
y = 3
|
||||
for key in order:
|
||||
if y >= height:
|
||||
break
|
||||
put(y, 0, _GROUP_TITLES.get(key, key.title()), curses.A_BOLD)
|
||||
y += 1
|
||||
put(y, 2, f"{'sensor':<{name_w}}{'current':>{col_w}}{'min':>{col_w}}{'max':>{col_w}}", curses.A_DIM)
|
||||
y += 1
|
||||
for r in groups[key]:
|
||||
if y >= height:
|
||||
break
|
||||
if r.metric == "name": # device identity line
|
||||
put(y, 2, str(r.label), curses.A_DIM)
|
||||
y += 1
|
||||
continue
|
||||
lo, hi = stats.get(r.key, (r.value, r.value))
|
||||
put(y, 2, f"{metric_label(r):<{name_w}}")
|
||||
put(y, 2 + name_w, f"{format_raw(r.value, r.unit):>{col_w}}", _attr(band(r)))
|
||||
put(y, 2 + name_w + col_w, f"{format_raw(lo, r.unit):>{col_w}}", curses.A_DIM)
|
||||
put(y, 2 + name_w + 2 * col_w, f"{format_raw(hi, r.unit):>{col_w}}", curses.A_DIM)
|
||||
y += 1
|
||||
y += 1
|
||||
stdscr.refresh()
|
||||
|
||||
|
||||
def _loop(stdscr, sampler: Sampler, interval: float) -> None:
|
||||
curses.curs_set(0)
|
||||
stdscr.nodelay(True)
|
||||
_init_colors()
|
||||
stats: dict[str, tuple[float, float]] = {}
|
||||
latest = sampler.sample()
|
||||
track(stats, latest)
|
||||
next_sample = time.monotonic() + interval
|
||||
while True:
|
||||
ch = stdscr.getch()
|
||||
if ch in (ord("q"), ord("Q")):
|
||||
return
|
||||
if ch in (ord("r"), ord("R")):
|
||||
stats.clear()
|
||||
track(stats, latest)
|
||||
now = time.monotonic()
|
||||
if now >= next_sample:
|
||||
latest = sampler.sample()
|
||||
track(stats, latest)
|
||||
next_sample = now + interval
|
||||
_draw(stdscr, latest, stats, interval)
|
||||
time.sleep(0.05) # keep key handling responsive without busy-spinning
|
||||
|
||||
|
||||
def _run_plain(sampler: Sampler, interval: float) -> int:
|
||||
"""Fallback for non-TTY output: clear + reprint each tick (no curses)."""
|
||||
try:
|
||||
for sample in sampler.stream(interval=interval):
|
||||
print("\033[2J\033[H", end="")
|
||||
print(f"RigDoctor — live (every {interval:g}s, Ctrl-C to quit)\n")
|
||||
print(render_snapshot(sample))
|
||||
sys.stdout.flush()
|
||||
except KeyboardInterrupt:
|
||||
print()
|
||||
return 0
|
||||
|
||||
|
||||
def run(interval: float, plain: bool = False) -> int:
|
||||
sampler = Sampler(available_sources())
|
||||
if plain or not sys.stdout.isatty():
|
||||
return _run_plain(sampler, interval)
|
||||
try:
|
||||
curses.wrapper(_loop, sampler, interval)
|
||||
except curses.error: # terminal can't do curses — degrade gracefully
|
||||
return _run_plain(sampler, interval)
|
||||
return 0
|
||||
@@ -0,0 +1,58 @@
|
||||
"""Tests for the M2 live-monitor TUI logic (min/max tracking + color bands)."""
|
||||
|
||||
import unittest
|
||||
|
||||
from rigdoctor import tui
|
||||
from rigdoctor.core.sample import Reading, Sample
|
||||
|
||||
|
||||
def _temp(v):
|
||||
return Reading("gpu", "temp", v, "°C", "")
|
||||
|
||||
|
||||
class TrackTests(unittest.TestCase):
|
||||
def test_tracks_min_and_max(self):
|
||||
stats: dict = {}
|
||||
for v in (60.0, 80.0, 70.0, 55.0):
|
||||
tui.track(stats, Sample(0.0, [_temp(v)]))
|
||||
self.assertEqual(stats["gpu.temp"], (55.0, 80.0))
|
||||
|
||||
def test_ignores_none_values(self):
|
||||
stats: dict = {}
|
||||
tui.track(stats, Sample(0.0, [_temp(None)]))
|
||||
self.assertEqual(stats, {})
|
||||
|
||||
def test_keys_separate_by_label(self):
|
||||
stats: dict = {}
|
||||
tui.track(stats, Sample(0.0, [
|
||||
Reading("cpu", "temp", 50.0, "°C", "Core 0"),
|
||||
Reading("cpu", "temp", 70.0, "°C", "Core 1"),
|
||||
]))
|
||||
self.assertEqual(stats["cpu.temp.Core 0"], (50.0, 50.0))
|
||||
self.assertEqual(stats["cpu.temp.Core 1"], (70.0, 70.0))
|
||||
|
||||
|
||||
class BandTests(unittest.TestCase):
|
||||
def test_temperature_bands(self):
|
||||
self.assertEqual(tui.band(_temp(40.0)), "cold")
|
||||
self.assertEqual(tui.band(_temp(60.0)), "good")
|
||||
self.assertEqual(tui.band(_temp(80.0)), "warn")
|
||||
self.assertEqual(tui.band(_temp(90.0)), "crit")
|
||||
|
||||
def test_usage_bands(self):
|
||||
self.assertEqual(tui.band(Reading("gpu", "util", 50.0, "%")), "good")
|
||||
self.assertEqual(tui.band(Reading("gpu", "util", 88.0, "%")), "warn")
|
||||
self.assertEqual(tui.band(Reading("memory", "used_pct", 96.0, "%")), "crit")
|
||||
|
||||
def test_non_metric_percentage_is_normal(self):
|
||||
self.assertEqual(tui.band(Reading("gpu", "fan", 100.0, "%")), "normal")
|
||||
|
||||
def test_gpu_lost_is_crit(self):
|
||||
self.assertEqual(tui.band(Reading("gpu", "status", None, "", "query-timeout")), "crit")
|
||||
|
||||
def test_missing_value_is_na(self):
|
||||
self.assertEqual(tui.band(Reading("gpu", "power", None, "W")), "na")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user