Compare commits
6 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| ab89dda0b4 | |||
| 305c88ba09 | |||
| 82f3ea49de | |||
| 8d695227bc | |||
| 82bef0a08c | |||
| 73f347449e |
@@ -5,6 +5,37 @@ All notable changes to RigDoctor are recorded here. Format follows
|
|||||||
(`MAJOR.MINOR.PATCH`, pre-1.0). `__version__` and `pyproject.toml` must match the git
|
(`MAJOR.MINOR.PATCH`, pre-1.0). `__version__` and `pyproject.toml` must match the git
|
||||||
release tag (so the auto-updater, D18, can compare versions).
|
release tag (so the auto-updater, D18, can compare versions).
|
||||||
|
|
||||||
|
## [0.15.0] - 2026-05-22
|
||||||
|
### Added
|
||||||
|
- **Hard-crash detection & recovery for the guided diagnostic.** If a focused capture ends
|
||||||
|
without a clean stop (the recorder never wrote `session-stop` and isn't running), RigDoctor
|
||||||
|
treats it as a likely hard freeze. On launch the **Games** page shows a warning banner —
|
||||||
|
*"Your last diagnostic for <game> ended unexpectedly…"* — with **Analyze crash** / **Dismiss**.
|
||||||
|
- **Deeper crash analysis.** *Analyze crash* combines the captured window (final readings before
|
||||||
|
the freeze + any GPU-lost event) with a focused scan of the **previous (crashed) boot's kernel
|
||||||
|
log** (`journalctl -k -b -1`: Xid/panic/OOM/MCE/AER/thermal) plus SMART/driver/persistence/
|
||||||
|
live-temp checks — the full "what happened" picture. `core/diagnostic.py` gains
|
||||||
|
`pending_crash()` / `analyze_crash()`; `health.check_previous_boot()` +
|
||||||
|
`run_health_checks(include_journal=False)` back it.
|
||||||
|
|
||||||
|
## [0.14.0] - 2026-05-22
|
||||||
|
### Changed
|
||||||
|
- **Dashboard headline tiles are now history trend graphs** instead of single-value gauges —
|
||||||
|
GPU temp, GPU load, CPU temp, and memory each plot their recent history (with the current
|
||||||
|
value, window min/max, and a dashed warning-threshold line), so you can see changes over time
|
||||||
|
rather than only the instantaneous reading. New `HistoryGraph` widget (QPainter, no new deps).
|
||||||
|
|
||||||
|
## [0.13.0] - 2026-05-22
|
||||||
|
### Added
|
||||||
|
- **Run Diagnostic now explains itself and can launch the game.** Clicking Run Diagnostic shows
|
||||||
|
what to do — *play the game, reproduce the crash, then Finish & analyze* (and that data
|
||||||
|
survives a hard freeze + reboot) — and offers **Launch game & start** (asks Steam to run it by
|
||||||
|
appid) or **Start without launching**. The recording banner now spells out the next step
|
||||||
|
instead of just showing a sample count.
|
||||||
|
### Fixed
|
||||||
|
- Button labels containing "&" (e.g. "Finish & analyze") rendered as "Finish _analyze" because
|
||||||
|
Qt treated the "&" as a keyboard mnemonic — now escaped so the ampersand shows literally.
|
||||||
|
|
||||||
## [0.12.0] - 2026-05-22
|
## [0.12.0] - 2026-05-22
|
||||||
### Added
|
### Added
|
||||||
- **Guided diagnostic in the GUI.** Each game on the **Games** page now has a **Run Diagnostic**
|
- **Guided diagnostic in the GUI.** Each game on the **Games** page now has a **Run Diagnostic**
|
||||||
|
|||||||
+1
-1
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "rigdoctor"
|
name = "rigdoctor"
|
||||||
version = "0.12.0"
|
version = "0.15.0"
|
||||||
description = "Modular hardware monitoring & crash diagnostics for Linux gamers."
|
description = "Modular hardware monitoring & crash diagnostics for Linux gamers."
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.11"
|
requires-python = ">=3.11"
|
||||||
|
|||||||
@@ -1,3 +1,3 @@
|
|||||||
"""RigDoctor — modular hardware monitoring & crash diagnostics for Linux gamers."""
|
"""RigDoctor — modular hardware monitoring & crash diagnostics for Linux gamers."""
|
||||||
|
|
||||||
__version__ = "0.12.0"
|
__version__ = "0.15.0"
|
||||||
|
|||||||
@@ -11,13 +11,16 @@ The capture is **manually bracketed** (start/finish) for now; auto start/stop on
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
import time
|
import time
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
|
||||||
from .. import config
|
from .. import config
|
||||||
from . import reccontrol
|
from . import reccontrol
|
||||||
from .crashlog import Summary, summarize
|
from .crashlog import Summary, summarize
|
||||||
from .health import Finding
|
from .health import CRITICAL, OK, WARNING, Finding
|
||||||
|
|
||||||
|
_SEV_ORDER = {CRITICAL: 0, WARNING: 1, "info": 2, OK: 3}
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@@ -27,6 +30,14 @@ class DiagnosticResult:
|
|||||||
findings: list[Finding] # health findings: Xid/SMART/driver/etc. (M4)
|
findings: list[Finding] # health findings: Xid/SMART/driver/etc. (M4)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class CrashInfo:
|
||||||
|
game: str | None
|
||||||
|
samples: int
|
||||||
|
when: float | None # ts of the last captured sample (≈ when the freeze hit)
|
||||||
|
gpu_lost: bool
|
||||||
|
|
||||||
|
|
||||||
def _clear_diag_log() -> None:
|
def _clear_diag_log() -> None:
|
||||||
"""Each diagnostic is a fresh focused capture — drop any previous session + segments."""
|
"""Each diagnostic is a fresh focused capture — drop any previous session + segments."""
|
||||||
base = config.DIAG_LOG
|
base = config.DIAG_LOG
|
||||||
@@ -82,3 +93,70 @@ def finish(last_n: int = 10, log_path=None) -> DiagnosticResult:
|
|||||||
game = _game_from_summary(summary) or (reccontrol.read_status() or {}).get("game")
|
game = _game_from_summary(summary) or (reccontrol.read_status() or {}).get("game")
|
||||||
findings = run_health_checks()
|
findings = run_health_checks()
|
||||||
return DiagnosticResult(game=game, summary=summary, findings=findings)
|
return DiagnosticResult(game=game, summary=summary, findings=findings)
|
||||||
|
|
||||||
|
|
||||||
|
# --- hard-crash detection & post-crash analysis -----------------------------------
|
||||||
|
|
||||||
|
def pending_crash() -> CrashInfo | None:
|
||||||
|
"""Detect a diagnostic that ended abnormally (no clean stop, no live recorder).
|
||||||
|
|
||||||
|
A focused capture writes `session-start` (+ `game`) and, on a clean stop, `session-stop`.
|
||||||
|
After a hard freeze that block never runs, so the log has a start with no stop and no
|
||||||
|
live recorder — that's our hard-crash signal. Returns None if a capture is running, none
|
||||||
|
is recorded, it stopped cleanly, or the user already acknowledged it.
|
||||||
|
"""
|
||||||
|
if is_running() or not config.DIAG_LOG.exists():
|
||||||
|
return None
|
||||||
|
summary = summarize(config.DIAG_LOG)
|
||||||
|
kinds = {kind for _ts, kind, _detail in summary.events}
|
||||||
|
if "session-start" not in kinds:
|
||||||
|
return None
|
||||||
|
if "session-stop" in kinds or "diagnostic-acknowledged" in kinds:
|
||||||
|
return None
|
||||||
|
return CrashInfo(
|
||||||
|
game=_game_from_summary(summary),
|
||||||
|
samples=summary.samples,
|
||||||
|
when=summary.end,
|
||||||
|
gpu_lost="gpu-lost" in kinds,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def acknowledge_crash() -> None:
|
||||||
|
"""Mark the recorded crash as seen so it stops prompting (appends a marker event)."""
|
||||||
|
try:
|
||||||
|
config.DIAG_LOG.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
with open(config.DIAG_LOG, "a", encoding="utf-8") as fh:
|
||||||
|
fh.write(json.dumps({"ts": time.time(), "event": "diagnostic-acknowledged", "detail": ""}) + "\n")
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def _crash_headline(summary: Summary) -> Finding:
|
||||||
|
gpu_lost = any(kind == "gpu-lost" for _ts, kind, _detail in summary.events)
|
||||||
|
when = time.strftime("%H:%M:%S", time.localtime(summary.end)) if summary.end else "?"
|
||||||
|
detail = (
|
||||||
|
f"The capture stopped abruptly at {when} after {summary.samples} samples, with no clean "
|
||||||
|
"shutdown recorded — consistent with a hard freeze or power loss."
|
||||||
|
)
|
||||||
|
if gpu_lost:
|
||||||
|
detail += " A GPU-lost event was captured during the session."
|
||||||
|
return Finding(
|
||||||
|
CRITICAL if gpu_lost else WARNING,
|
||||||
|
"Diagnostic",
|
||||||
|
"Session ended without a clean stop (likely a hard crash)",
|
||||||
|
detail,
|
||||||
|
"Review the last readings (Capture, above) and the crash-boot findings below.",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def analyze_crash(last_n: int = 15) -> DiagnosticResult:
|
||||||
|
"""Analyze a recorded hard crash: the captured window + the previous boot's kernel log
|
||||||
|
+ the rest of the health report (SMART/driver/persistence/temps)."""
|
||||||
|
from .health import check_previous_boot, run_health_checks
|
||||||
|
|
||||||
|
summary = summarize(config.DIAG_LOG, last_n=last_n)
|
||||||
|
findings: list[Finding] = [_crash_headline(summary)]
|
||||||
|
findings += check_previous_boot() # the crashed boot's kernel log
|
||||||
|
findings += run_health_checks(include_journal=False) # SMART/driver/persistence/temps
|
||||||
|
findings.sort(key=lambda f: _SEV_ORDER.get(f.severity, 9))
|
||||||
|
return DiagnosticResult(game=_game_from_summary(summary), summary=summary, findings=findings)
|
||||||
|
|||||||
@@ -146,6 +146,22 @@ def check_journal() -> list[Finding]:
|
|||||||
return findings
|
return findings
|
||||||
|
|
||||||
|
|
||||||
|
def check_previous_boot() -> list[Finding]:
|
||||||
|
"""Scan the previous boot's kernel log — the boot that crashed — for fault signatures.
|
||||||
|
|
||||||
|
Needs persistent journald (else the crashed boot's logs were lost on reboot, which the
|
||||||
|
persistence check flags separately). Findings are framed as coming from that boot.
|
||||||
|
"""
|
||||||
|
out = _journalctl(["-k", "-b", "-1", "--no-pager", "-o", "cat"])
|
||||||
|
if not out or not out.strip():
|
||||||
|
return []
|
||||||
|
tagged = []
|
||||||
|
for f in scan_journal_text(out):
|
||||||
|
detail = ("Logged during the previous (crashed) boot. " + (f.detail or "")).strip()
|
||||||
|
tagged.append(Finding(f.severity, f.category, f.title, detail, f.suggestion))
|
||||||
|
return tagged
|
||||||
|
|
||||||
|
|
||||||
def check_journal_persistence() -> list[Finding]:
|
def check_journal_persistence() -> list[Finding]:
|
||||||
if Path("/var/log/journal").is_dir():
|
if Path("/var/log/journal").is_dir():
|
||||||
return []
|
return []
|
||||||
@@ -235,16 +251,20 @@ def check_live_temps() -> list[Finding]:
|
|||||||
)]
|
)]
|
||||||
|
|
||||||
|
|
||||||
def run_health_checks() -> list[Finding]:
|
def run_health_checks(include_journal: bool = True) -> list[Finding]:
|
||||||
"""Run all checks and return findings sorted by severity (worst first).
|
"""Run all checks and return findings sorted by severity (worst first).
|
||||||
|
|
||||||
SMART needs root; if the session collected it via launch elevation, use that
|
SMART needs root; if the session collected it via launch elevation, use that
|
||||||
instead of re-running smartctl (which would just report "needs root").
|
instead of re-running smartctl (which would just report "needs root").
|
||||||
|
|
||||||
|
`include_journal=False` skips the 7-day kernel-journal scan — used by the crash
|
||||||
|
analysis, which scans the previous (crashed) boot specifically instead.
|
||||||
"""
|
"""
|
||||||
from . import elevation
|
from . import elevation
|
||||||
|
|
||||||
findings: list[Finding] = []
|
findings: list[Finding] = []
|
||||||
findings += check_nvidia_driver()
|
findings += check_nvidia_driver()
|
||||||
|
if include_journal:
|
||||||
findings += check_journal()
|
findings += check_journal()
|
||||||
findings += check_journal_persistence()
|
findings += check_journal_persistence()
|
||||||
priv = elevation.privileged()
|
priv = elevation.privileged()
|
||||||
|
|||||||
@@ -15,6 +15,8 @@ from __future__ import annotations
|
|||||||
|
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
import shutil
|
||||||
|
import subprocess
|
||||||
import time
|
import time
|
||||||
from dataclasses import asdict, dataclass
|
from dataclasses import asdict, dataclass
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
@@ -351,6 +353,24 @@ def acknowledge_new() -> None:
|
|||||||
|
|
||||||
# --- formatting -----------------------------------------------------------------------
|
# --- formatting -----------------------------------------------------------------------
|
||||||
|
|
||||||
|
def launch_game(appid: str) -> bool:
|
||||||
|
"""Best-effort: ask Steam to launch a game by appid (steam:// URL). Non-blocking."""
|
||||||
|
if not appid:
|
||||||
|
return False
|
||||||
|
url = f"steam://rungameid/{appid}"
|
||||||
|
for cmd in (["steam", url], ["xdg-open", url]):
|
||||||
|
if shutil.which(cmd[0]):
|
||||||
|
try:
|
||||||
|
subprocess.Popen(
|
||||||
|
cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
|
||||||
|
stdin=subprocess.DEVNULL, start_new_session=True,
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
except (OSError, subprocess.SubprocessError):
|
||||||
|
continue
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
def human_size(num_bytes: int) -> str:
|
def human_size(num_bytes: int) -> str:
|
||||||
if num_bytes <= 0:
|
if num_bytes <= 0:
|
||||||
return "—"
|
return "—"
|
||||||
|
|||||||
@@ -17,19 +17,19 @@ from PySide6.QtWidgets import (
|
|||||||
|
|
||||||
from ..core.sample import Sample
|
from ..core.sample import Sample
|
||||||
from ..render import metric_label
|
from ..render import metric_label
|
||||||
from .widgets import Card, MetricBar, MetricRow, StatGauge
|
from .widgets import Card, HistoryGraph, MetricBar, MetricRow
|
||||||
|
|
||||||
_GROUP_ORDER = ["gpu", "cpu", "memory", "storage"]
|
_GROUP_ORDER = ["gpu", "cpu", "memory", "storage"]
|
||||||
_GROUP_TITLES = {"gpu": "GPU", "cpu": "CPU", "memory": "Memory", "storage": "Storage"}
|
_GROUP_TITLES = {"gpu": "GPU", "cpu": "CPU", "memory": "Memory", "storage": "Storage"}
|
||||||
_BAR_METRICS = {"util", "mem_util", "fan", "used_pct"}
|
_BAR_METRICS = {"util", "mem_util", "fan", "used_pct"}
|
||||||
|
|
||||||
|
|
||||||
def _gauge_card(gauge: StatGauge) -> QFrame:
|
def _tile_card(widget: QWidget) -> QFrame:
|
||||||
card = QFrame()
|
card = QFrame()
|
||||||
card.setObjectName("Card")
|
card.setObjectName("Card")
|
||||||
layout = QVBoxLayout(card)
|
layout = QVBoxLayout(card)
|
||||||
layout.setContentsMargins(6, 14, 6, 8)
|
layout.setContentsMargins(6, 10, 6, 8)
|
||||||
layout.addWidget(gauge)
|
layout.addWidget(widget)
|
||||||
return card
|
return card
|
||||||
|
|
||||||
|
|
||||||
@@ -54,16 +54,16 @@ class Dashboard(QWidget):
|
|||||||
header.addWidget(self._updated)
|
header.addWidget(self._updated)
|
||||||
root.addLayout(header)
|
root.addLayout(header)
|
||||||
|
|
||||||
# Headline gauges
|
# Headline trend graphs (history over the session, not just the live value)
|
||||||
self._g_gpu_temp = StatGauge("GPU Temp", "°C", 100, "temp")
|
self._g_gpu_temp = HistoryGraph("GPU Temp", "°C", 30, 100, "temp")
|
||||||
self._g_gpu_load = StatGauge("GPU Load", "%", 100, "accent")
|
self._g_gpu_load = HistoryGraph("GPU Load", "%", 0, 100, "accent")
|
||||||
self._g_cpu_temp = StatGauge("CPU Temp", "°C", 100, "temp")
|
self._g_cpu_temp = HistoryGraph("CPU Temp", "°C", 30, 100, "temp")
|
||||||
self._g_mem = StatGauge("Memory", "%", 100, "usage")
|
self._g_mem = HistoryGraph("Memory", "%", 0, 100, "usage")
|
||||||
gauges = QHBoxLayout()
|
graphs = QHBoxLayout()
|
||||||
gauges.setSpacing(14)
|
graphs.setSpacing(14)
|
||||||
for g in (self._g_gpu_temp, self._g_gpu_load, self._g_cpu_temp, self._g_mem):
|
for g in (self._g_gpu_temp, self._g_gpu_load, self._g_cpu_temp, self._g_mem):
|
||||||
gauges.addWidget(_gauge_card(g))
|
graphs.addWidget(_tile_card(g))
|
||||||
root.addLayout(gauges)
|
root.addLayout(graphs)
|
||||||
|
|
||||||
# Per-subsystem cards (scrollable, 2-column grid)
|
# Per-subsystem cards (scrollable, 2-column grid)
|
||||||
scroll = QScrollArea()
|
scroll = QScrollArea()
|
||||||
@@ -81,10 +81,10 @@ class Dashboard(QWidget):
|
|||||||
root.addWidget(scroll, 1)
|
root.addWidget(scroll, 1)
|
||||||
|
|
||||||
def update_sample(self, sample: Sample) -> None:
|
def update_sample(self, sample: Sample) -> None:
|
||||||
self._g_gpu_temp.set_value(self._val(sample, "gpu", "temp", ""))
|
self._g_gpu_temp.add_value(self._val(sample, "gpu", "temp", ""))
|
||||||
self._g_gpu_load.set_value(self._val(sample, "gpu", "util"))
|
self._g_gpu_load.add_value(self._val(sample, "gpu", "util"))
|
||||||
self._g_cpu_temp.set_value(self._cpu_temp(sample))
|
self._g_cpu_temp.add_value(self._cpu_temp(sample))
|
||||||
self._g_mem.set_value(self._val(sample, "memory", "used_pct"))
|
self._g_mem.add_value(self._val(sample, "memory", "used_pct"))
|
||||||
|
|
||||||
keys = [r.key for r in sample.readings]
|
keys = [r.key for r in sample.readings]
|
||||||
if keys != self._built_keys: # sources appeared/disappeared
|
if keys != self._built_keys: # sources appeared/disappeared
|
||||||
|
|||||||
+112
-10
@@ -26,10 +26,10 @@ from PySide6.QtWidgets import (
|
|||||||
|
|
||||||
from ..config import load_config, update_config
|
from ..config import load_config, update_config
|
||||||
from .diagnostic_dialog import DiagnosticDialog
|
from .diagnostic_dialog import DiagnosticDialog
|
||||||
from .theme import ACCENT, GOOD, MUTED
|
from .theme import ACCENT, GOOD, MUTED, WARN
|
||||||
|
|
||||||
|
|
||||||
def _game_row(name: str, sublabel: str, size: str, is_new: bool, on_diagnose=None) -> QFrame:
|
def _game_row(name: str, sublabel: str, size: str, is_new: bool, appid: str = "", on_diagnose=None) -> QFrame:
|
||||||
card = QFrame()
|
card = QFrame()
|
||||||
card.setObjectName("Card")
|
card.setObjectName("Card")
|
||||||
h = QHBoxLayout(card)
|
h = QHBoxLayout(card)
|
||||||
@@ -66,7 +66,7 @@ def _game_row(name: str, sublabel: str, size: str, is_new: bool, on_diagnose=Non
|
|||||||
diag_btn = QPushButton("Run Diagnostic")
|
diag_btn = QPushButton("Run Diagnostic")
|
||||||
diag_btn.setObjectName("ActionButton")
|
diag_btn.setObjectName("ActionButton")
|
||||||
diag_btn.setCursor(Qt.CursorShape.PointingHandCursor)
|
diag_btn.setCursor(Qt.CursorShape.PointingHandCursor)
|
||||||
diag_btn.clicked.connect(lambda: on_diagnose(name))
|
diag_btn.clicked.connect(lambda: on_diagnose(name, appid))
|
||||||
h.addWidget(diag_btn, 0)
|
h.addWidget(diag_btn, 0)
|
||||||
return card
|
return card
|
||||||
|
|
||||||
@@ -113,9 +113,10 @@ class GamesPage(QWidget):
|
|||||||
banner_h.setContentsMargins(16, 10, 16, 10)
|
banner_h.setContentsMargins(16, 10, 16, 10)
|
||||||
banner_h.setSpacing(10)
|
banner_h.setSpacing(10)
|
||||||
self._banner_label = QLabel("")
|
self._banner_label = QLabel("")
|
||||||
|
self._banner_label.setWordWrap(True)
|
||||||
self._banner_label.setStyleSheet(f"color: {ACCENT}; font-weight: 700; background: transparent;")
|
self._banner_label.setStyleSheet(f"color: {ACCENT}; font-weight: 700; background: transparent;")
|
||||||
banner_h.addWidget(self._banner_label, 1)
|
banner_h.addWidget(self._banner_label, 1)
|
||||||
self._finish_btn = QPushButton("Finish & analyze")
|
self._finish_btn = QPushButton("Finish && analyze") # && → literal & (not a mnemonic)
|
||||||
self._finish_btn.setObjectName("ActionButton")
|
self._finish_btn.setObjectName("ActionButton")
|
||||||
self._finish_btn.clicked.connect(self._finish_diagnostic)
|
self._finish_btn.clicked.connect(self._finish_diagnostic)
|
||||||
banner_h.addWidget(self._finish_btn)
|
banner_h.addWidget(self._finish_btn)
|
||||||
@@ -125,6 +126,27 @@ class GamesPage(QWidget):
|
|||||||
self._banner.hide()
|
self._banner.hide()
|
||||||
root.addWidget(self._banner)
|
root.addWidget(self._banner)
|
||||||
|
|
||||||
|
# Hard-crash banner: a previous diagnostic ended without a clean stop.
|
||||||
|
self._crash_banner = QFrame()
|
||||||
|
self._crash_banner.setObjectName("Card")
|
||||||
|
self._crash_banner.setStyleSheet(f"#Card {{ border: 1px solid {WARN}; }}")
|
||||||
|
crash_h = QHBoxLayout(self._crash_banner)
|
||||||
|
crash_h.setContentsMargins(16, 10, 16, 10)
|
||||||
|
crash_h.setSpacing(10)
|
||||||
|
self._crash_label = QLabel("")
|
||||||
|
self._crash_label.setWordWrap(True)
|
||||||
|
self._crash_label.setStyleSheet(f"color: {WARN}; font-weight: 700; background: transparent;")
|
||||||
|
crash_h.addWidget(self._crash_label, 1)
|
||||||
|
self._analyze_btn = QPushButton("Analyze crash")
|
||||||
|
self._analyze_btn.setObjectName("ActionButton")
|
||||||
|
self._analyze_btn.clicked.connect(self._analyze_crash)
|
||||||
|
crash_h.addWidget(self._analyze_btn)
|
||||||
|
self._dismiss_btn = QPushButton("Dismiss")
|
||||||
|
self._dismiss_btn.clicked.connect(self._dismiss_crash)
|
||||||
|
crash_h.addWidget(self._dismiss_btn)
|
||||||
|
self._crash_banner.hide()
|
||||||
|
root.addWidget(self._crash_banner)
|
||||||
|
|
||||||
self._diag_timer = QTimer(self)
|
self._diag_timer = QTimer(self)
|
||||||
self._diag_timer.setInterval(1000)
|
self._diag_timer.setInterval(1000)
|
||||||
self._diag_timer.timeout.connect(self._poll_diag)
|
self._diag_timer.timeout.connect(self._poll_diag)
|
||||||
@@ -162,6 +184,7 @@ class GamesPage(QWidget):
|
|||||||
|
|
||||||
self._load_cached() # instant display from the last scan
|
self._load_cached() # instant display from the last scan
|
||||||
QTimer.singleShot(400, self.refresh) # then rescan in the background on launch
|
QTimer.singleShot(400, self.refresh) # then rescan in the background on launch
|
||||||
|
self._check_crash() # surface an interrupted (crashed) diagnostic
|
||||||
|
|
||||||
# --- loading ----------------------------------------------------------------------
|
# --- loading ----------------------------------------------------------------------
|
||||||
|
|
||||||
@@ -269,29 +292,59 @@ class GamesPage(QWidget):
|
|||||||
os.path.basename(g.library.rstrip("/")) or g.library,
|
os.path.basename(g.library.rstrip("/")) or g.library,
|
||||||
steam.human_size(g.size_bytes),
|
steam.human_size(g.size_bytes),
|
||||||
g.appid in new_appids,
|
g.appid in new_appids,
|
||||||
|
appid=g.appid,
|
||||||
on_diagnose=self._start_diagnostic,
|
on_diagnose=self._start_diagnostic,
|
||||||
))
|
))
|
||||||
self._list.addStretch(1)
|
self._list.addStretch(1)
|
||||||
|
|
||||||
# --- guided diagnostic (M6/D12) ---------------------------------------------------
|
# --- guided diagnostic (M6/D12) ---------------------------------------------------
|
||||||
|
|
||||||
def _start_diagnostic(self, name: str) -> None:
|
def _start_diagnostic(self, name: str, appid: str = "") -> None:
|
||||||
from ..core import diagnostic
|
from ..core import diagnostic, steam
|
||||||
|
|
||||||
if diagnostic.is_running():
|
if diagnostic.is_running():
|
||||||
QMessageBox.information(
|
QMessageBox.information(
|
||||||
self, "RigDoctor",
|
self, "RigDoctor",
|
||||||
"A capture is already running — finish or discard it first.")
|
"A capture is already running — finish or discard it first.")
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# Tell the user what the flow actually is, and offer to launch the game for them.
|
||||||
|
box = QMessageBox(self)
|
||||||
|
box.setIcon(QMessageBox.Icon.Information)
|
||||||
|
box.setWindowTitle(f"Run Diagnostic — {name}")
|
||||||
|
box.setText(f"Record a focused diagnostic while you play {name}?")
|
||||||
|
box.setInformativeText(
|
||||||
|
"RigDoctor will capture sensors in the background. Then:\n\n"
|
||||||
|
"1. Play the game and try to reproduce the freeze / black screen / crash.\n"
|
||||||
|
"2. When you're done — or after a hard freeze and reboot — come back here and "
|
||||||
|
"click “Finish & analyze”.\n\n"
|
||||||
|
"Your readings are saved continuously, so even a hard lock won't lose them."
|
||||||
|
)
|
||||||
|
launch_btn = box.addButton("Launch game && start", QMessageBox.ButtonRole.AcceptRole)
|
||||||
|
start_btn = box.addButton("Start without launching", QMessageBox.ButtonRole.ActionRole)
|
||||||
|
box.addButton("Cancel", QMessageBox.ButtonRole.RejectRole)
|
||||||
|
if not appid:
|
||||||
|
launch_btn.setEnabled(False) # no appid → can't ask Steam to launch it
|
||||||
|
box.exec()
|
||||||
|
clicked = box.clickedButton()
|
||||||
|
if clicked not in (launch_btn, start_btn):
|
||||||
|
return
|
||||||
|
|
||||||
if diagnostic.start(game=name) is None:
|
if diagnostic.start(game=name) is None:
|
||||||
QMessageBox.warning(self, "RigDoctor", "Couldn't start the capture.")
|
QMessageBox.warning(self, "RigDoctor", "Couldn't start the capture.")
|
||||||
return
|
return
|
||||||
|
launched = steam.launch_game(appid) if clicked is launch_btn else False
|
||||||
self._diag_game = name
|
self._diag_game = name
|
||||||
self._banner_label.setText(f"● Recording — {name} · starting…")
|
|
||||||
self._finish_btn.setEnabled(True)
|
self._finish_btn.setEnabled(True)
|
||||||
self._discard_btn.setEnabled(True)
|
self._discard_btn.setEnabled(True)
|
||||||
self._banner.show()
|
self._banner.show()
|
||||||
self._diag_timer.start()
|
self._diag_timer.start()
|
||||||
|
self._poll_diag()
|
||||||
|
if clicked is launch_btn and not launched:
|
||||||
|
QMessageBox.information(
|
||||||
|
self, "RigDoctor",
|
||||||
|
"Recording started, but couldn't launch the game automatically — "
|
||||||
|
"launch it yourself, then click “Finish & analyze” when you're done.")
|
||||||
|
|
||||||
def _poll_diag(self) -> None:
|
def _poll_diag(self) -> None:
|
||||||
from ..core import diagnostic
|
from ..core import diagnostic
|
||||||
@@ -301,9 +354,12 @@ class GamesPage(QWidget):
|
|||||||
self._diag_timer.stop() # recorder exited on its own
|
self._diag_timer.stop() # recorder exited on its own
|
||||||
return
|
return
|
||||||
samples = status.get("samples", 0)
|
samples = status.get("samples", 0)
|
||||||
lost = " · GPU-lost seen" if status.get("gpu_lost") else ""
|
lost = " · ⚠ GPU-lost detected" if status.get("gpu_lost") else ""
|
||||||
game = status.get("game") or self._diag_game or ""
|
game = status.get("game") or self._diag_game or "your game"
|
||||||
self._banner_label.setText(f"● Recording — {game} · {samples} samples{lost}")
|
self._banner_label.setText(
|
||||||
|
f"● Recording {game} — play it and reproduce the problem, then click "
|
||||||
|
f"“Finish & analyze”. ({samples} samples{lost})"
|
||||||
|
)
|
||||||
|
|
||||||
def _finish_diagnostic(self) -> None:
|
def _finish_diagnostic(self) -> None:
|
||||||
self._diag_timer.stop()
|
self._diag_timer.stop()
|
||||||
@@ -323,8 +379,10 @@ class GamesPage(QWidget):
|
|||||||
|
|
||||||
def _on_diag_done(self, result) -> None:
|
def _on_diag_done(self, result) -> None:
|
||||||
self._banner.hide()
|
self._banner.hide()
|
||||||
|
self._crash_banner.hide()
|
||||||
self._finish_btn.setEnabled(True)
|
self._finish_btn.setEnabled(True)
|
||||||
self._discard_btn.setEnabled(True)
|
self._discard_btn.setEnabled(True)
|
||||||
|
self._analyze_btn.setEnabled(True)
|
||||||
if result is None:
|
if result is None:
|
||||||
QMessageBox.warning(self, "RigDoctor", "The diagnostic couldn't be analyzed.")
|
QMessageBox.warning(self, "RigDoctor", "The diagnostic couldn't be analyzed.")
|
||||||
return
|
return
|
||||||
@@ -337,6 +395,48 @@ class GamesPage(QWidget):
|
|||||||
reccontrol.stop_background()
|
reccontrol.stop_background()
|
||||||
self._banner.hide()
|
self._banner.hide()
|
||||||
|
|
||||||
|
# --- hard-crash recovery ----------------------------------------------------------
|
||||||
|
|
||||||
|
def _check_crash(self) -> None:
|
||||||
|
from ..core import diagnostic
|
||||||
|
|
||||||
|
info = diagnostic.pending_crash()
|
||||||
|
if info is None:
|
||||||
|
self._crash_banner.hide()
|
||||||
|
return
|
||||||
|
game = info.game or "your last game"
|
||||||
|
extra = " · ⚠ GPU-lost was captured" if info.gpu_lost else ""
|
||||||
|
self._crash_label.setText(
|
||||||
|
f"⚠ Your last diagnostic for {game} ended unexpectedly — likely a hard crash "
|
||||||
|
f"({info.samples} samples{extra}). Analyze it to see the final readings and the "
|
||||||
|
f"likely cause from the system logs."
|
||||||
|
)
|
||||||
|
self._analyze_btn.setEnabled(True)
|
||||||
|
self._crash_banner.show()
|
||||||
|
|
||||||
|
def _analyze_crash(self) -> None:
|
||||||
|
from ..core import diagnostic
|
||||||
|
|
||||||
|
diagnostic.acknowledge_crash() # don't prompt again for this one
|
||||||
|
self._analyze_btn.setEnabled(False)
|
||||||
|
self._crash_label.setText("Analyzing the crash (final readings + system logs)…")
|
||||||
|
threading.Thread(target=self._work_analyze_crash, daemon=True).start()
|
||||||
|
|
||||||
|
def _work_analyze_crash(self) -> None:
|
||||||
|
from ..core import diagnostic
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = diagnostic.analyze_crash()
|
||||||
|
except Exception:
|
||||||
|
result = None
|
||||||
|
self._diag_done.emit(result)
|
||||||
|
|
||||||
|
def _dismiss_crash(self) -> None:
|
||||||
|
from ..core import diagnostic
|
||||||
|
|
||||||
|
diagnostic.acknowledge_crash()
|
||||||
|
self._crash_banner.hide()
|
||||||
|
|
||||||
# --- nav badge integration --------------------------------------------------------
|
# --- nav badge integration --------------------------------------------------------
|
||||||
|
|
||||||
def showEvent(self, event) -> None: # noqa: N802 (Qt override)
|
def showEvent(self, event) -> None: # noqa: N802 (Qt override)
|
||||||
@@ -358,3 +458,5 @@ class GamesPage(QWidget):
|
|||||||
self._banner.show()
|
self._banner.show()
|
||||||
if not self._diag_timer.isActive():
|
if not self._diag_timer.isActive():
|
||||||
self._diag_timer.start()
|
self._diag_timer.start()
|
||||||
|
else:
|
||||||
|
self._check_crash() # re-surface an interrupted diagnostic if one is pending
|
||||||
|
|||||||
@@ -2,8 +2,10 @@
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from PySide6.QtCore import QRectF, Qt
|
from collections import deque
|
||||||
from PySide6.QtGui import QColor, QFont, QPainter, QPen
|
|
||||||
|
from PySide6.QtCore import QPointF, QRectF, Qt
|
||||||
|
from PySide6.QtGui import QColor, QFont, QPainter, QPainterPath, QPen
|
||||||
from PySide6.QtWidgets import (
|
from PySide6.QtWidgets import (
|
||||||
QComboBox,
|
QComboBox,
|
||||||
QFrame,
|
QFrame,
|
||||||
@@ -17,7 +19,19 @@ from PySide6.QtWidgets import (
|
|||||||
|
|
||||||
from ..core.sample import Reading
|
from ..core.sample import Reading
|
||||||
from ..render import format_value
|
from ..render import format_value
|
||||||
from .theme import ACCENT, CRIT, GOOD, MUTED, TEXT, TRACK, WARN, gauge_color, temp_color
|
from .theme import (
|
||||||
|
ACCENT,
|
||||||
|
CRIT,
|
||||||
|
GOOD,
|
||||||
|
MUTED,
|
||||||
|
TEMP_WARN,
|
||||||
|
TEXT,
|
||||||
|
TRACK,
|
||||||
|
USAGE_WARN,
|
||||||
|
WARN,
|
||||||
|
gauge_color,
|
||||||
|
temp_color,
|
||||||
|
)
|
||||||
|
|
||||||
_SEV = {
|
_SEV = {
|
||||||
"critical": ("CRITICAL", CRIT),
|
"critical": ("CRITICAL", CRIT),
|
||||||
@@ -248,6 +262,117 @@ class StatGauge(QWidget):
|
|||||||
p.end()
|
p.end()
|
||||||
|
|
||||||
|
|
||||||
|
class HistoryGraph(QWidget):
|
||||||
|
"""A headline metric as a trend: current value + window min/max + a history line.
|
||||||
|
|
||||||
|
Replaces the at-a-glance gauge with changes-over-time. `kind` drives the color
|
||||||
|
(temp band / usage / accent), matching StatGauge so the dashboard stays consistent.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, title: str, unit: str = "", vmin: float = 0.0, vmax: float = 100.0,
|
||||||
|
kind: str = "accent", history: int = 180) -> None:
|
||||||
|
super().__init__()
|
||||||
|
self._title = title
|
||||||
|
self._unit = unit
|
||||||
|
self._min = vmin
|
||||||
|
self._max = vmax
|
||||||
|
self._kind = kind # "temp" | "usage" | "accent"
|
||||||
|
self._values: deque[float | None] = deque(maxlen=history)
|
||||||
|
self.setMinimumSize(160, 132)
|
||||||
|
|
||||||
|
def add_value(self, value: float | None) -> None:
|
||||||
|
self._values.append(value)
|
||||||
|
self.update()
|
||||||
|
|
||||||
|
def _fmt(self, value: float | None) -> str:
|
||||||
|
if value is None:
|
||||||
|
return "—"
|
||||||
|
if self._unit == "°C":
|
||||||
|
return f"{value:.0f}°"
|
||||||
|
if self._unit == "%":
|
||||||
|
return f"{value:.0f}%"
|
||||||
|
return f"{value:.0f}{self._unit}"
|
||||||
|
|
||||||
|
def paintEvent(self, event) -> None: # noqa: N802 (Qt override)
|
||||||
|
p = QPainter(self)
|
||||||
|
p.setRenderHint(QPainter.RenderHint.Antialiasing)
|
||||||
|
w, h = self.width(), self.height()
|
||||||
|
pad = 10.0
|
||||||
|
present = [v for v in self._values if v is not None]
|
||||||
|
current = next((v for v in reversed(self._values) if v is not None), None)
|
||||||
|
color = QColor(gauge_color(self._kind, current))
|
||||||
|
|
||||||
|
ftitle = QFont()
|
||||||
|
ftitle.setPointSizeF(10.0)
|
||||||
|
ftitle.setBold(True)
|
||||||
|
p.setFont(ftitle)
|
||||||
|
p.setPen(QColor(MUTED))
|
||||||
|
p.drawText(QRectF(pad, 6, w - 2 * pad, 18),
|
||||||
|
Qt.AlignmentFlag.AlignLeft | Qt.AlignmentFlag.AlignVCenter, self._title)
|
||||||
|
|
||||||
|
fval = QFont()
|
||||||
|
fval.setPointSizeF(21.0)
|
||||||
|
fval.setBold(True)
|
||||||
|
p.setFont(fval)
|
||||||
|
p.setPen(color if current is not None else QColor(MUTED))
|
||||||
|
p.drawText(QRectF(pad, 2, w - 2 * pad, 28),
|
||||||
|
Qt.AlignmentFlag.AlignRight | Qt.AlignmentFlag.AlignTop, self._fmt(current))
|
||||||
|
|
||||||
|
if present:
|
||||||
|
fsm = QFont()
|
||||||
|
fsm.setPointSizeF(8.5)
|
||||||
|
p.setFont(fsm)
|
||||||
|
p.setPen(QColor(MUTED))
|
||||||
|
p.drawText(QRectF(pad, 27, w - 2 * pad, 14), Qt.AlignmentFlag.AlignLeft,
|
||||||
|
f"min {self._fmt(min(present))} max {self._fmt(max(present))}")
|
||||||
|
|
||||||
|
g_top, g_bot = 48.0, h - pad
|
||||||
|
g_left, g_right = pad, w - pad
|
||||||
|
span = self._max - self._min
|
||||||
|
if g_bot - g_top < 12 or g_right - g_left < 12 or span <= 0:
|
||||||
|
p.end()
|
||||||
|
return
|
||||||
|
|
||||||
|
def y_of(v: float) -> float:
|
||||||
|
frac = (max(self._min, min(self._max, v)) - self._min) / span
|
||||||
|
return g_bot - frac * (g_bot - g_top)
|
||||||
|
|
||||||
|
warn = TEMP_WARN if self._kind == "temp" else (USAGE_WARN if self._kind == "usage" else None)
|
||||||
|
if warn is not None and self._min <= warn <= self._max:
|
||||||
|
pen = QPen(QColor(TRACK))
|
||||||
|
pen.setWidthF(1.0)
|
||||||
|
pen.setStyle(Qt.PenStyle.DashLine)
|
||||||
|
p.setPen(pen)
|
||||||
|
yw = y_of(warn)
|
||||||
|
p.drawLine(QPointF(g_left, yw), QPointF(g_right, yw))
|
||||||
|
|
||||||
|
maxlen = self._values.maxlen or 1
|
||||||
|
step = (g_right - g_left) / max(1, maxlen - 1)
|
||||||
|
n = len(self._values)
|
||||||
|
# Build the line newest-at-right; break it where readings are missing.
|
||||||
|
path = QPainterPath()
|
||||||
|
drawing = False
|
||||||
|
for i, v in enumerate(self._values):
|
||||||
|
if v is None:
|
||||||
|
drawing = False
|
||||||
|
continue
|
||||||
|
x = g_right - (n - 1 - i) * step
|
||||||
|
y = y_of(v)
|
||||||
|
if drawing:
|
||||||
|
path.lineTo(x, y)
|
||||||
|
else:
|
||||||
|
path.moveTo(x, y)
|
||||||
|
drawing = True
|
||||||
|
if not path.isEmpty():
|
||||||
|
pen = QPen(color)
|
||||||
|
pen.setWidthF(2.0)
|
||||||
|
pen.setCapStyle(Qt.PenCapStyle.RoundCap)
|
||||||
|
pen.setJoinStyle(Qt.PenJoinStyle.RoundJoin)
|
||||||
|
p.setPen(pen)
|
||||||
|
p.drawPath(path)
|
||||||
|
p.end()
|
||||||
|
|
||||||
|
|
||||||
class MetricBar(QWidget):
|
class MetricBar(QWidget):
|
||||||
"""A label + value with a thin progress bar (for 0–100% metrics)."""
|
"""A label + value with a thin progress bar (for 0–100% metrics)."""
|
||||||
|
|
||||||
|
|||||||
@@ -57,5 +57,51 @@ class FinishTests(unittest.TestCase):
|
|||||||
self.assertTrue(any(kind == "gpu-lost" for _ts, kind, _d in result.summary.events))
|
self.assertTrue(any(kind == "gpu-lost" for _ts, kind, _d in result.summary.events))
|
||||||
|
|
||||||
|
|
||||||
|
class CrashDetectionTests(unittest.TestCase):
|
||||||
|
def _diag_log(self, d) -> Path:
|
||||||
|
return Path(d) / "diagnostic.jsonl"
|
||||||
|
|
||||||
|
def test_unterminated_session_is_a_pending_crash(self):
|
||||||
|
with tempfile.TemporaryDirectory() as d:
|
||||||
|
log = self._diag_log(d)
|
||||||
|
_write_log(str(log), "Tarkov") # has session-start + game, no session-stop
|
||||||
|
with mock.patch.object(diagnostic.config, "DIAG_LOG", log), \
|
||||||
|
mock.patch.object(diagnostic.reccontrol, "running_pid", return_value=None):
|
||||||
|
info = diagnostic.pending_crash()
|
||||||
|
self.assertIsNotNone(info)
|
||||||
|
self.assertEqual(info.game, "Tarkov")
|
||||||
|
self.assertTrue(info.gpu_lost) # _write_log writes a gpu-lost event
|
||||||
|
|
||||||
|
def test_clean_stop_is_not_a_crash(self):
|
||||||
|
with tempfile.TemporaryDirectory() as d:
|
||||||
|
log = self._diag_log(d)
|
||||||
|
w = CrashLogWriter(str(log))
|
||||||
|
w.write_event("session-start"); w.write_event("game", "X")
|
||||||
|
w.write_sample(Sample(time.time(), [Reading("gpu", "temp", 60.0, "°C", "")]))
|
||||||
|
w.write_event("session-stop", "samples=1")
|
||||||
|
w.close()
|
||||||
|
with mock.patch.object(diagnostic.config, "DIAG_LOG", log), \
|
||||||
|
mock.patch.object(diagnostic.reccontrol, "running_pid", return_value=None):
|
||||||
|
self.assertIsNone(diagnostic.pending_crash())
|
||||||
|
|
||||||
|
def test_acknowledge_clears_pending_crash(self):
|
||||||
|
with tempfile.TemporaryDirectory() as d:
|
||||||
|
log = self._diag_log(d)
|
||||||
|
_write_log(str(log), "Tarkov")
|
||||||
|
with mock.patch.object(diagnostic.config, "DIAG_LOG", log), \
|
||||||
|
mock.patch.object(diagnostic.reccontrol, "running_pid", return_value=None):
|
||||||
|
self.assertIsNotNone(diagnostic.pending_crash())
|
||||||
|
diagnostic.acknowledge_crash()
|
||||||
|
self.assertIsNone(diagnostic.pending_crash())
|
||||||
|
|
||||||
|
def test_running_capture_is_not_a_crash(self):
|
||||||
|
with tempfile.TemporaryDirectory() as d:
|
||||||
|
log = self._diag_log(d)
|
||||||
|
_write_log(str(log), "Tarkov")
|
||||||
|
with mock.patch.object(diagnostic.config, "DIAG_LOG", log), \
|
||||||
|
mock.patch.object(diagnostic.reccontrol, "running_pid", return_value=4321):
|
||||||
|
self.assertIsNone(diagnostic.pending_crash()) # it's in-progress, not crashed
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|||||||
Reference in New Issue
Block a user