Compare commits
121 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 5996fbdc30 | |||
|
8f4824f576
|
|||
|
edc2166011
|
|||
|
31ecf67ca7
|
|||
| ac4863b0d4 | |||
| b65f36bb2d | |||
|
0f9cb4b684
|
|||
|
b9bfec961c
|
|||
|
b1bc961b79
|
|||
| 410f8882ee | |||
| 1da7816741 | |||
|
33c554c29f
|
|||
| 31178bace8 | |||
|
04e8d72bce
|
|||
| fb468e83c2 | |||
|
b006fa6b8d
|
|||
| b20e8dfc3a | |||
| 9fe9a6576f | |||
|
07bc722209
|
|||
| d405bf7caf | |||
|
9bb0f9a684
|
|||
| 4bbc0fa97e | |||
|
a0f8055328
|
|||
| 323451428b | |||
|
479189ee4e
|
|||
| 51133e4042 | |||
|
bcf6ac2656
|
|||
|
81c7757546
|
|||
| d59261f021 | |||
|
44923b771a
|
|||
| eaaf14c58a | |||
| 7779131cf9 | |||
|
87fa678ccb
|
|||
| c5e24b3984 | |||
|
21cc6a4813
|
|||
| ee73049248 | |||
|
3a8ad5bd5d
|
|||
| e8b84bf046 | |||
|
2342dd83aa
|
|||
| a028fe6d38 | |||
|
a6453335e9
|
|||
| baec47dd4e | |||
| 47ecb702e7 | |||
| 944945ce72 | |||
|
dc719f6a89
|
|||
|
78cd417d0b
|
|||
| 856a3305ad | |||
| 3b1a2e7393 | |||
| 2989e8e23e | |||
| 670df23e06 | |||
| 2ee7763d00 | |||
| bd6cad5a42 | |||
| 7fa9b63661 | |||
| c443a8b9f8 | |||
| bbc22fa288 | |||
| 5502251789 | |||
| 4bd51a40c3 | |||
| 984292c368 | |||
| bffaf73ad4 | |||
| 7f0ab9a635 | |||
| 12339c3282 | |||
| c7e50ba4cb | |||
| a3caabc0d5 | |||
| b59f202891 | |||
| e6d94fbd59 | |||
| 045f40c4de | |||
| 2ff4056d89 | |||
| 2fe03269e4 | |||
| ac2a3981fc | |||
| 2684e5c8ab | |||
| 4386838b69 | |||
| bfbad9cbc6 | |||
| 2e545ff718 | |||
| 5e5dc2d54a | |||
| 7804893054 | |||
| bf3ac4af1a | |||
| e4a37176e1 | |||
| 67665974dc | |||
| 51b7ed69bd | |||
| 6fca2c9aba | |||
| 4c5a6547ec | |||
| 587568e574 | |||
| cc84bbda88 | |||
| 75a4da7af3 | |||
| f95387c5b8 | |||
| 1dc86121f6 | |||
| cd54e5f2c5 | |||
| 1b24d1b032 | |||
| 7ac14416b5 | |||
| b22a2f5593 | |||
| f45d8c9b34 | |||
| 8d6ce47e87 | |||
| 03b2dd8363 | |||
| ab89dda0b4 | |||
| 305c88ba09 | |||
| 82f3ea49de | |||
| 8d695227bc | |||
| 82bef0a08c | |||
| 73f347449e | |||
| 5cd51beadf | |||
| 934b489fec | |||
| 7a283dc338 | |||
| 5682878f22 | |||
| 5a584c08d5 | |||
| 8b1083a29b | |||
| 25b7a58e3c | |||
| 1ec8675fa0 | |||
| 9c30c9824e | |||
| 596b3ec8c4 | |||
| 392ea76347 | |||
| 29f4a45df8 | |||
| d7f07dd7c0 | |||
| 0642eb4712 | |||
| f25ac939cc | |||
| b47006bc22 | |||
| 00394c287c | |||
| 2f6cab72c4 | |||
| 67d4c1cb99 | |||
| e33cc0ef3a | |||
| e3b20089f0 | |||
| 54c0971ac3 |
@@ -11,7 +11,20 @@ on:
|
||||
branches: [main]
|
||||
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.12"
|
||||
- name: Install (core only)
|
||||
run: python -m pip install -e .
|
||||
- name: Run tests
|
||||
run: python -m unittest discover -s tests -v
|
||||
|
||||
release:
|
||||
needs: test # don't publish a release if the tests fail
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
@@ -30,6 +43,9 @@ jobs:
|
||||
- name: Build self-extracting installer (.run)
|
||||
run: python packaging/make_run.py
|
||||
|
||||
- name: Build .deb
|
||||
run: python packaging/make_deb.py
|
||||
|
||||
- name: Read version
|
||||
id: ver
|
||||
run: |
|
||||
@@ -90,3 +106,26 @@ jobs:
|
||||
"${API}/releases/${rid}/assets?name=$(basename "$f")" >/dev/null
|
||||
done
|
||||
echo "Published ${TAG}."
|
||||
|
||||
- name: Publish .deb to the Gitea apt registry (optional — needs REGISTRY_TOKEN)
|
||||
env:
|
||||
PKG_TOKEN: ${{ secrets.REGISTRY_TOKEN }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
if [ -z "${PKG_TOKEN:-}" ]; then
|
||||
echo "REGISTRY_TOKEN not set — skipping apt publish (the .deb is still a release asset)."
|
||||
exit 0
|
||||
fi
|
||||
OWNER="${{ github.repository_owner }}"
|
||||
URL="${{ github.server_url }}/api/packages/${OWNER}/debian/pool/stable/main/upload"
|
||||
for f in dist/*.deb; do
|
||||
echo "Uploading $(basename "$f") to the apt registry…"
|
||||
code=$(curl -sS -o /tmp/apt_upload.txt -w '%{http_code}' \
|
||||
--user "${OWNER}:${PKG_TOKEN}" --upload-file "$f" "$URL" || true)
|
||||
case "$code" in
|
||||
2*) echo " uploaded ($code)";;
|
||||
409) echo " already published ($code) — skipping (registry versions are immutable)";;
|
||||
*) echo " upload failed ($code):"; cat /tmp/apt_upload.txt || true; exit 1;;
|
||||
esac
|
||||
done
|
||||
echo "apt source: deb ${{ github.server_url }}/api/packages/${OWNER}/debian stable main"
|
||||
|
||||
@@ -0,0 +1,44 @@
|
||||
name: tests
|
||||
run-name: Run test suite
|
||||
|
||||
# Runs the unittest suite on pull requests (once per PR). Pushes to main are covered by the
|
||||
# `test` job in release.yml, so we don't trigger on push here — that would double every run.
|
||||
# Two jobs:
|
||||
# core — stdlib-only install; the GUI tests skip (@skipUnless HAVE_QT). Bulletproof.
|
||||
# gui-smoke — installs the GUI extra + offscreen Qt libs and runs the same suite headless,
|
||||
# exercising the MainWindow/SetupWizard/DiagnosticDialog construction tests.
|
||||
# Make `tests / core (pull_request)` a required status check on `main` so a PR can't merge red.
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
|
||||
jobs:
|
||||
core:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.12"
|
||||
- name: Install (core only — no PySide6)
|
||||
run: python -m pip install -e .
|
||||
- name: Run tests (GUI tests skip without PySide6)
|
||||
run: python -m unittest discover -s tests -v
|
||||
|
||||
gui-smoke:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.12"
|
||||
- name: System libraries for offscreen Qt
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y libegl1 libgl1 libxkbcommon0 libdbus-1-3 libglib2.0-0
|
||||
- name: Install (with GUI extra)
|
||||
run: python -m pip install -e ".[gui]"
|
||||
- name: Run tests (headless)
|
||||
env:
|
||||
QT_QPA_PLATFORM: offscreen
|
||||
run: python -m unittest discover -s tests -v
|
||||
@@ -5,6 +5,562 @@ All notable changes to RigDoctor are recorded here. Format follows
|
||||
(`MAJOR.MINOR.PATCH`, pre-1.0). `__version__` and `pyproject.toml` must match the git
|
||||
release tag (so the auto-updater, D18, can compare versions).
|
||||
|
||||
## [0.43.0] - 2026-05-29
|
||||
### Added
|
||||
- **GPU stress test + close thermal monitoring** (`rigdoctor stress`, and a "Stress test…" button
|
||||
on System Health). Runs a GPU load and samples sensors at a high rate (default 0.5 s), then
|
||||
reports per-metric min/avg/**peak**, how long the core spent above each temperature threshold,
|
||||
power vs the limit, throttling (decoded from the NVML clocks-event bitmask), and any GPU **fault**
|
||||
(Xid / VA-space freeze / a query-timeout hang) that hit during the window — the on-demand way to
|
||||
reproduce load-correlated crashes. The load comes from an explicit `--command` (a game or a tool
|
||||
like gpu-burn), an auto-detected loader (gpu-burn/vkmark/glmark2/vkcube), or **monitor-only** when
|
||||
none is found (you launch the game; it tracks temps while you play).
|
||||
- **Drive health & wear in the health report.** A new `core/drives.py` parses the full
|
||||
`smartctl --json` for every drive into prioritized findings: the SMART verdict, a derived
|
||||
**life-left %** (NVMe `percentage_used` or the SATA wear-leveling attribute), **power-on hours**,
|
||||
data written (TBW), temperature, and the early-failure predictors (reallocated / pending /
|
||||
offline-uncorrectable sectors, NVMe media errors, low available spare). Replaces the old
|
||||
pass/fail-only SMART check; flows through the same elevated path (GUI launch / `sudo rigdoctor
|
||||
report`), degrading to per-drive "needs root" notes unprivileged.
|
||||
|
||||
### Fixed
|
||||
- **GUI "Add game…" can now link a launcher.** The dialog only asked for a name, so a custom
|
||||
game (e.g. SPT) couldn't be given its launch command or log folder from the app — those were
|
||||
CLI-only, leaving it unlaunchable from the GUI. It's now a proper form: name + an optional
|
||||
launch command/script (with a **Browse…** file picker) + an optional log folder (auto-detected
|
||||
from the script's folder when left blank).
|
||||
|
||||
## [0.42.0] - 2026-05-29
|
||||
### Added
|
||||
- **Detect hard freezes that log no Xid.** The kernel-log scanner caught Xid codes, OOM, panic,
|
||||
MCE, PCIe AER, thermal events, and amdgpu resets — but a crash that logs *no* Xid slipped
|
||||
through. It now flags the NVIDIA open-kernel-module **VA-space mapping fault** (`gpu_vaspace.c`
|
||||
/ `dmaAllocMapping` assertions, NVKMS GEM-allocation failures) — a driver-internal error that
|
||||
can storm for minutes and end in a freeze without the GPU ever "falling off the bus" (distinct
|
||||
from Xid 79). A new `check_nvidia_module()` notes when the open module (`nvidia-*-open`) is
|
||||
loaded — the context behind these faults — and a new `ai_knowledge` entry lets the assistant
|
||||
tell the no-Xid freeze apart from the Xid 79 hardware drop.
|
||||
- **Add games no launcher reports (e.g. SPT).** A user-authored custom-games list
|
||||
(`core/customgames.py`) shows alongside Steam/Lutris/Heroic in `rigdoctor games` and the GUI
|
||||
("Add game…"), for standalone mod launchers (Single-Player Tarkov), itch.io downloads, or any
|
||||
hand-installed game. Each entry can carry a launch command and a log directory:
|
||||
`rigdoctor games add "SPT" --command .../tarkov.sh` (a sibling `logs/` is auto-detected),
|
||||
`rigdoctor games play "SPT"` launches it under the crash-capture wrapper (tagged with the real
|
||||
name, not the script's), and the diagnostic now tails the game's *own* logs — SPT's
|
||||
server/launcher logs — alongside the kernel log so the analysis sees what the game logged
|
||||
before the freeze.
|
||||
|
||||
## [0.41.0] - 2026-05-25
|
||||
### Added
|
||||
- **Import a crash dump (`.dmp`) and explain it with AI.** The **Games** page gains an
|
||||
"Import crash dump…" button (shown once an AI provider is configured) that opens a Windows
|
||||
minidump — the kind a Proton/Wine game writes when it hard-crashes — parses it, and hands the
|
||||
result to the opt-in AI assistant (D24; cloud sends still ask first). A new stdlib
|
||||
`core/minidump.py` reads the `MDMP` streams with `struct` (no new deps): the exception / crash
|
||||
reason (e.g. access violation `0xC0000005`), the **faulting module** (which DLL the crash
|
||||
address lands in — `nvwgf2umx.dll`, `d3d11.dll`, an anticheat, the game's own `.exe`…), OS/CPU,
|
||||
and the loaded-module list. If `minidump_stackwalk` (Breakpad) or `minidump-stackwalk`
|
||||
(rust-minidump) is on PATH, its fuller report is appended best-effort. The model is told the
|
||||
dump came from a Windows process under Proton, so fixes stay Linux/Proton-side (Proton version,
|
||||
DXVK/VKD3D, driver, launch options) — never Windows admin/registry steps. New `ai_knowledge`
|
||||
facts cover the common exception codes and faulting-module signatures. CLI parity:
|
||||
`rigdoctor ai dump <file>`.
|
||||
|
||||
## [0.40.0] - 2026-05-22
|
||||
### Added
|
||||
- **RAM speed / XMP-EXPO check.** Inventory now shows each module's configured speed and, when it's
|
||||
below the rated speed, the rating (e.g. `4800 MT/s (rated 5600)`); **System Health** flags it
|
||||
("RAM at 4800 MT/s (rated 5600 MT/s)") with the fix — enable XMP/EXPO in BIOS. With the profile
|
||||
off, dmidecode only reports the JEDEC base, so the rated speed is read from both dmidecode and
|
||||
the part number (matched against known DDR5 speed grades, so no false positives). Needs dmidecode
|
||||
(root / launch elevation). Completes the "underperforming hardware" trio with PCIe gen + refresh.
|
||||
|
||||
## [0.39.0] - 2026-05-22
|
||||
### Added
|
||||
- **Displays in the Inventory.** A new `core/displays.py` lists each connected monitor with its
|
||||
resolution and current/max refresh — e.g. `DP-1 · Samsung LC34G55T → 3440x1440 @ 165 Hz`. Reads
|
||||
GNOME's Mutter `DisplayConfig` over D-Bus (works on X11 *and* Wayland), falling back to `xrandr`
|
||||
on other X11 desktops.
|
||||
- **System Health flags monitors below their max refresh.** If a monitor supports a higher refresh
|
||||
at its current resolution (e.g. a 165 Hz panel set to 60 Hz — an easily-missed gaming setting),
|
||||
Health reports it with the fix (raise it in Display settings). Max is computed at the *current*
|
||||
resolution, so it never suggests dropping resolution.
|
||||
|
||||
## [0.38.0] - 2026-05-22
|
||||
### Added
|
||||
- **PCIe link in the Inventory.** Each NVMe drive now shows its negotiated PCIe link next to the
|
||||
model — e.g. `Samsung SSD 980 PRO 1TB (931.5G) · PCIe Gen4 x4` — read from sysfs
|
||||
(`current/max_link_speed` + width). If a drive negotiates below its capability (a slower M.2
|
||||
slot, lane-sharing, or a downtrain) it's flagged: `PCIe Gen3 x4 (capable of Gen4 x4)`. So you
|
||||
can confirm a Gen4 SSD is actually in a Gen4 slot. (SATA disks show no PCIe link.)
|
||||
- **System Health flags downtrained NVMe links.** A new check warns when an NVMe drive negotiates
|
||||
fewer PCIe lanes than it supports (almost always motherboard **lane-sharing** — a GPU/second
|
||||
card or another M.2 stealing lanes) and notes speed-only reductions as info (a slower slot or
|
||||
idle ASPM). The GPU is deliberately excluded — NVIDIA drops its PCIe gen/width at idle, so a
|
||||
snapshot would false-alarm.
|
||||
|
||||
## [0.37.1] - 2026-05-22
|
||||
### Fixed
|
||||
- **`rigdoctor update` now uses the right method for how RigDoctor was installed.** It detects
|
||||
apt (`.deb`), pip (venv/`.run`), or source installs (`updates.install_kind()`); only pip
|
||||
installs self-update in place. An apt install no longer fails with "No module named pip" —
|
||||
it (and the GUI Update button) shows `sudo apt update && sudo apt install --only-upgrade
|
||||
rigdoctor`; a source checkout points to `git pull`.
|
||||
|
||||
## [0.37.0] - 2026-05-22
|
||||
### Added
|
||||
- **Version footer** — a footer across the bottom of the window shows `RigDoctor v<version>` in
|
||||
the bottom-right (moved out of the sidebar).
|
||||
### Fixed
|
||||
- **Pages scroll when content doesn't fit, and the window is no longer pinned to the tallest
|
||||
page's height.** Long pages (Settings, Tuning, …) get a scrollbar when too tall — so controls
|
||||
like Uninstall are always reachable — and the window can now be resized smaller than the screen
|
||||
(min height dropped from "taller than the screen" to ~600px). Pages that manage their own
|
||||
scroll/fill (Dashboard, System Health, Inventory, Share) are unchanged.
|
||||
|
||||
## [0.36.1] - 2026-05-22
|
||||
### Fixed
|
||||
- `rigdoctor gui` printed the wrong fix when PySide6 is missing — it suggested the non-existent
|
||||
`python3-pyside6` package. Now it names the real split modules
|
||||
(`python3-pyside6.qt{widgets,gui,websockets,svg}` + `python3-pyte`).
|
||||
|
||||
## [0.36.0] - 2026-05-22
|
||||
### Fixed
|
||||
- **`.deb` now installs all dependencies automatically — no manual tool install.** The previous
|
||||
`Recommends: python3-pyside6` named a package that doesn't exist on Debian/Ubuntu (PySide6 is
|
||||
split per module), so apt silently skipped it and the GUI wouldn't start. Now it Recommends the
|
||||
actual modules the GUI imports — `python3-pyside6.qt{widgets,gui,websockets,svg}` + `python3-pyte`.
|
||||
### Changed
|
||||
- **`apt install rigdoctor` sets up the whole toolset.** The `.deb` also Recommends the optional
|
||||
diagnostic/gaming tools (smartmontools, lm-sensors, dmidecode, pciutils, libnotify-bin,
|
||||
libsecret-tools, gamemode, mangohud) so they install by default — users never hand-install
|
||||
tools. `cpupower` is a Suggests (kernel-tied); `--no-install-recommends` still gives CLI-only.
|
||||
|
||||
## [0.35.0] - 2026-05-22
|
||||
### Added
|
||||
- **`.deb` package (M9 / D8)** — `packaging/make_deb.py` builds a `rigdoctor_<version>_all.deb`
|
||||
(pure-Python, `Architecture: all`) via `dpkg-deb`: `Depends: python3`, with the GUI deps
|
||||
(`python3-pyside6`, `python3-pyte`) as **Recommends** so `sudo apt install ./rigdoctor_*.deb`
|
||||
gives the full app and `--no-install-recommends` gives CLI-only. Installs the package, both
|
||||
launchers, the desktop entry, and the icon. CI (`release.yml`) builds it as a **release asset**
|
||||
every release, and optionally publishes it to the Gitea **apt registry** (set a `REGISTRY_TOKEN`
|
||||
secret) for `sudo apt install rigdoctor`. **M9 is now complete.**
|
||||
|
||||
## [0.34.0] - 2026-05-22
|
||||
### Added
|
||||
- **Event-based alerts (M8).** Beyond temperature + GPU-lost, RigDoctor now notifies on
|
||||
**critical kernel events** — Xid (GPU error), out-of-memory kills, CPU machine-checks, PCIe
|
||||
AER errors, and disk I/O errors — scanned from the kernel log every ~30s while monitoring and
|
||||
fired one-shot (cooldown-gated, so no spam). A proactive warning the moment something goes
|
||||
wrong, not just on a temperature threshold. Included whenever desktop notifications are on.
|
||||
|
||||
## [0.33.0] - 2026-05-22
|
||||
### Added
|
||||
- **AI explanations stream live.** "Explain with AI" now fills token-by-token as the model
|
||||
generates (Ollama NDJSON + Claude SSE, both via stdlib `urllib`) instead of a multi-second
|
||||
freeze, then re-renders the finished answer as Markdown. `core/ai.explain_stream()`.
|
||||
|
||||
## [0.32.0] - 2026-05-22
|
||||
### Added
|
||||
- **More for diagnostics & reports:**
|
||||
- **`nvidia-smi -q` snapshot** — driver, throttle/clock-event reasons, clocks, power, temps,
|
||||
PCIe link, ECC + retired pages (point-in-time at diagnostic time).
|
||||
- **Display-server log** — auto-detected: `Xorg.0.log` on X11, or the compositor's user-journal
|
||||
slice (gnome-shell/kwin/sway/gamescope) on Wayland.
|
||||
- **Full system inventory** (M5 hardware/OS) is now included in each stored diagnostic and the
|
||||
**Report** bundle — invaluable for larger/shared debugging.
|
||||
These join the kernel log + coredump records in `syslogs.txt`/`inventory.*`, are saved per
|
||||
diagnostic, included in the Report zip, and (logs) fed to the AI on "Explain".
|
||||
|
||||
## [0.31.0] - 2026-05-22
|
||||
### Added
|
||||
- **Diagnostics now collect session-scoped system logs** (`core/syslogs.py`): a kernel-log
|
||||
slice (`journalctl -k` — Xid, OOM-killer, MCE, PCIe AER, thermal, hung tasks) and
|
||||
**crashed-process records** (`coredumpctl` — which executable, signal, and when). They're saved
|
||||
to the diagnostic directory (`syslogs.txt`), included in the **Report** bundle, and fed to the
|
||||
AI on "Explain" alongside the game logs. Best-effort — degrades quietly if the tools are
|
||||
missing or access is denied; scoped to the session window so it doesn't drag in old noise.
|
||||
|
||||
## [0.30.0] - 2026-05-22
|
||||
### Added
|
||||
- **Logging & report bundles (M15, D25)** — opt-in via one **Settings → Logging** toggle
|
||||
(default off). When on: the app logs to a rotating `app.log`, and **each diagnostic is stored
|
||||
in its own folder** (`~/.local/share/rigdoctor/diagnostics/<id>/`) with the capture log, a
|
||||
structured `result.json`, a readable `report.txt`, a session-scoped game-log snapshot, and an
|
||||
`ai/` record of every AI interaction — **the exact data sent, which model, and its reply**.
|
||||
- **Report** — a button on the diagnostic dialog (and `rigdoctor bundle`) zips a diagnostic's
|
||||
folder plus `app.log` into `~/.local/share/rigdoctor/reports/<id>.zip` for sharing. Everything
|
||||
stays local; the zip only leaves your machine if you share it. Available only when logging is on.
|
||||
|
||||
## [0.29.0] - 2026-05-22
|
||||
### Added
|
||||
- **AI now resolves Steam app IDs from your library instead of guessing.** When app IDs appear
|
||||
in the logs/findings, RigDoctor looks them up in your scanned games (`steam.appid_names()`) and
|
||||
injects an "App IDs (resolved from your installed games)" glossary into the prompt — so the
|
||||
model names games correctly (e.g. `2694490 = Path of Exile 2`) rather than hallucinating. Only
|
||||
IDs it can resolve locally are listed; no network, no model "training" needed.
|
||||
|
||||
## [0.28.1] - 2026-05-22
|
||||
### Fixed
|
||||
- **AI explanations were misreading stale/benign logs.** Three fixes so the model analyses the
|
||||
*actual* session: (1) the prompt now states the **real game name, capture duration, and
|
||||
outcome** (clean vs. crash) so the model stops guessing the game from log paths; (2) game logs
|
||||
are **scoped to the session window** (Steam-console lines filtered by timestamp; a stale
|
||||
per-app Proton log from an earlier game is skipped); (3) the reference KB flags common
|
||||
**benign** Steam/Proton lines (`libnvidia-ml.so.1` assertion, routine minidump uploads, "fork
|
||||
without exec") so they aren't reported as the cause. The system prompt also forbids
|
||||
Windows-only advice (no "run as administrator") and tells the model not to invent a problem
|
||||
when the run was clean.
|
||||
|
||||
## [0.28.0] - 2026-05-22
|
||||
### Added
|
||||
- **AI explanations now include recent game logs.** When you press "Explain with AI" on a
|
||||
diagnostic, RigDoctor also gathers recent **Proton** (`~/steam-<appid>.log`) and **Steam**
|
||||
console logs (`core/gamelogs.py`, tail-read + size-bounded) and passes them to the model, so
|
||||
it can correlate log errors with the sensor findings and pinpoint *when* something went wrong.
|
||||
### Fixed
|
||||
- The AI explanation popup now **renders Markdown** (headings, bold, lists) instead of showing
|
||||
raw `###`/`**` — `QTextEdit.setMarkdown`, and the model is told to answer in Markdown.
|
||||
|
||||
## [0.27.1] - 2026-05-22
|
||||
### Changed
|
||||
- AI assistant: selecting **Ollama** now pre-fills the model field with **`qwen2.5:7b`** (a
|
||||
strong 7B that fits an 8 GB GPU; our grounding makes a 7B sufficient). It won't overwrite a
|
||||
model you've already entered, and you can change it freely.
|
||||
|
||||
## [0.27.0] - 2026-05-22
|
||||
### Added
|
||||
- **AI assistant (M14, D24)** — optional, **strictly opt-in, never automatic**. Explains your
|
||||
diagnostics in plain language only when you press **"Explain with AI"** on the diagnostic
|
||||
dialog (or run `rigdoctor ai explain`). You choose a provider explicitly (no default):
|
||||
**Ollama** (local, private, no key) or **Claude** (Anthropic; key stored in the keyring, with
|
||||
a consent prompt before any data is sent). Configure in **Settings → AI assistant**.
|
||||
- Answers are **grounded**: RigDoctor passes the actual findings plus matched reference facts
|
||||
from a curated knowledge base (`core/ai_knowledge.py` — exact keyword/code match, no
|
||||
embeddings, stdlib only), so even a small local model gets the domain facts it needs. Stdlib
|
||||
`urllib` only — no new core dependency. Output is advisory (D9).
|
||||
- CLI: `rigdoctor ai status|test|explain`.
|
||||
|
||||
## [0.26.1] - 2026-05-22
|
||||
### Fixed
|
||||
- **Setup wizard contrast.** The **radio buttons** (Recording trigger) were unstyled, so the
|
||||
selected option was invisible on the dark theme — now styled with a clear accent ring + dot.
|
||||
Bundle **checkboxes** got explicit checked/disabled states, and stay selectable even when a
|
||||
bundle is already installed (the page no longer looks dead when everything's present).
|
||||
|
||||
## [0.26.0] - 2026-05-22
|
||||
### Added
|
||||
- **Graphical setup wizard (M9).** A first-run GUI wizard (`gui/setup_wizard.py`) walks through:
|
||||
environment summary → pick **dependency bundles** (Diagnostics / Monitoring / Gaming / Updates,
|
||||
from the component catalog) → install the missing apt packages → choose the **recording
|
||||
trigger** → a readiness summary. It shows automatically on first launch (until done), is
|
||||
re-runnable from **Settings → Run setup wizard** or `rigdoctor-gui --setup`, and `install.sh`
|
||||
launches it after a fresh install when a desktop session is present.
|
||||
|
||||
## [0.25.0] - 2026-05-22
|
||||
### Changed
|
||||
- **Share is now terminal-only (D23, amends D16).** The Share page is a single shared-terminal
|
||||
experience: the host shares their shell, the guest watches and may type **only if the host
|
||||
ticks "Allow the guest to type"** (otherwise read-only). The terminal is larger and either
|
||||
side can pop it **full-screen** (Esc to exit).
|
||||
### Removed
|
||||
- The read-only **stats view** (live sensors/health/inventory over the relay) and the
|
||||
`rigdoctor share serve` HTTP server — the shared terminal replaces them. (`core/share.py`
|
||||
removed; the `share` CLI command is gone.)
|
||||
|
||||
## [0.24.0] - 2026-05-22
|
||||
### Added
|
||||
- **Shared terminal is now in color.** The terminal view renders pyte's per-cell foreground/
|
||||
background, bold, and reverse, so the host's real shell keeps its theming — fish, `ls`,
|
||||
`git`, prompts, etc. look the same as locally (the session already runs the host's `$SHELL`
|
||||
with its config and `TERM=xterm-256color`; only the rendering was monochrome).
|
||||
|
||||
## [0.23.0] - 2026-05-22
|
||||
### Added
|
||||
- **Crash-logger trigger modes (M9 / D6)** via `systemd --user`, no root: **manual**,
|
||||
**always-on** (a background service records continuously), and **game-launch** (auto-records
|
||||
while a Steam game runs). Set it from **Settings → Recording trigger** or
|
||||
`rigdoctor service mode <manual|always-on|game-launch>`; `rigdoctor service status` shows it.
|
||||
`core/service.py` writes/enables the user units.
|
||||
- **Zero-config game-launch watcher** (`core/watcher.py`, `rigdoctor watch`) — polls Steam's
|
||||
RunningAppID and brackets a focused capture around the running game (the D12 fallback for users
|
||||
who don't add the `wrap` launch option; the wrapper stays the precise primary path).
|
||||
|
||||
## [0.22.0] - 2026-05-22
|
||||
### Added
|
||||
- **M6 breadth.** Environment checks now also report **GPU PowerMizer** mode (NVIDIA, X — flags
|
||||
Adaptive/Auto and suggests Prefer-Max-Performance), the **Wine** version, and the **Steam
|
||||
client** version.
|
||||
- **Non-Steam launchers.** Lutris (its SQLite library) and Heroic (Epic + GOG JSON stores) are
|
||||
detected (`core/launchers.py`) and listed on the Games page and `rigdoctor games`, tagged by
|
||||
launcher. You can Run Diagnostic on them too (records while you play; auto-launch stays
|
||||
Steam-only).
|
||||
### Notes
|
||||
- The zero-config game watcher (D12 fallback) is deferred to the M9 trigger-mode work, where the
|
||||
service integration lives.
|
||||
|
||||
## [0.21.0] - 2026-05-22
|
||||
### Added
|
||||
- **Live monitor TUI (M2).** `rigdoctor monitor` is now a proper **curses** dashboard:
|
||||
current / session-min / session-max per sensor, grouped by subsystem, with temperature and
|
||||
utilization **color bands** (and GPU-lost flagged red). `q` quits, `r` resets the session
|
||||
min/max. Falls back to a plain full-screen redraw on a non-TTY (`--plain` forces it). The
|
||||
terminal face of the same live data the GUI dashboard graphs. Completes the Monitoring bundle.
|
||||
|
||||
## [0.20.0] - 2026-05-22
|
||||
### Changed
|
||||
- **Reorganized navigation** into grouped sidebar sections — **Monitor** (Dashboard) ·
|
||||
**Diagnose** (Games, Recordings, System Health, Tuning) · **System** (Inventory) · **App**
|
||||
(Settings, Share) — so it's clear where to go.
|
||||
- **Renames for clarity:** *Health → System Health* (it's the overall 7-day system scan, not
|
||||
per-game), *Environment → Tuning* (gaming tunables + fixes), *Logs → Recordings*,
|
||||
*Setup → Settings*.
|
||||
- **Settings** absorbed **Notifications** (alerts) — app configuration (components/deps, alerts,
|
||||
account access, uninstall) now lives in one page; Notifications is no longer a separate item.
|
||||
- **Recordings** is now a hub: pick which captured log to view (always-on capture, last
|
||||
diagnostic, or a preserved crash), **Analyze crash** in place, alongside the recorder controls.
|
||||
|
||||
## [0.19.0] - 2026-05-22
|
||||
### Added
|
||||
- **System-tray applet (M11, D13).** A tray icon whose menu shows live **CPU / GPU temp** and
|
||||
**memory used/total**, a **status line** (Normal / Hot / GPU not responding), and is led by a
|
||||
**Run Diagnostic** submenu (pick a detected game → the guided session), plus **Open dashboard**,
|
||||
**Start/Stop recording**, **Snapshot (copy)**, and **Quit**. It reuses the dashboard's sample
|
||||
stream (no extra sampling). With a tray present, **closing the window hides to the tray** (Quit
|
||||
exits); `rigdoctor-gui --tray` starts hidden for autostart. Needs a tray host — on GNOME the
|
||||
AppIndicator extension; degrades to a no-op if none is available. Completes the Desktop UI bundle.
|
||||
- **GUI smoke tests**: construct `MainWindow` headless and exercise the tray, so a startup crash
|
||||
fails the build (closes the gap that let the 0.18.0 import regression ship).
|
||||
|
||||
## [0.18.2] - 2026-05-22
|
||||
### Fixed
|
||||
- **GUI wouldn't start** (0.18.0 regression): the recording indicator used a wrong relative
|
||||
import (`from .core` → `rigdoctor.gui.core`, which doesn't exist), crashing `MainWindow` on
|
||||
launch. Corrected to `from ..core`.
|
||||
|
||||
## [0.18.1] - 2026-05-22
|
||||
### Changed
|
||||
- Recording badge: dropped the sample count (not useful at a glance) — it now shows just
|
||||
**● Recording** + the game, plus a **⚠ GPU-lost** line if one is detected.
|
||||
|
||||
## [0.18.0] - 2026-05-22
|
||||
### Added
|
||||
- **Global recording indicator.** While a capture is running, the sidebar shows a red
|
||||
**● Recording** badge on every page — with the **game** being captured and the live sample
|
||||
count (and a GPU-lost flag if seen). It polls the recorder, so it reflects captures started
|
||||
any way: manual `record`, a guided diagnostic, or the Steam launch wrapper.
|
||||
|
||||
## [0.17.0] - 2026-05-22
|
||||
### Added
|
||||
- **Inventory page is back in the GUI** (it was removed in 0.7.2 in favor of the CLI). Sidebar
|
||||
**Inventory** → System / CPU / Firmware / Memory / GPU / Storage / Display as cards, with
|
||||
**Copy Markdown** and **Save…** for pasting into forum/bug reports, and **Refresh**. Root-only
|
||||
details (motherboard/BIOS/RAM modules via dmidecode) fill in after the launch password prompt.
|
||||
Backed by the existing M5 `core/inventory.py` — the CLI `rigdoctor inventory` is unchanged.
|
||||
|
||||
## [0.16.0] - 2026-05-22
|
||||
### Added
|
||||
- **Automatic crash-capture via a Steam launch wrapper (M6/D12).** Set `rigdoctor wrap
|
||||
%command%` as a game's Steam launch option (or in Lutris/Heroic's wrapper field) and RigDoctor
|
||||
starts a focused, game-tagged capture when the game launches and stops it cleanly on exit — no
|
||||
manual Run Diagnostic / Finish. A hard freeze leaves the capture unterminated, so it's flagged
|
||||
as a crash next launch. The wrapper resolves the game name from Steam's `SteamAppId`, doesn't
|
||||
disturb an existing capture, and returns the game's exit code. (`core/wrap.py`, `rigdoctor wrap`.)
|
||||
- GUI **Auto-capture…** helper on the Games page: shows the exact launch-option line (absolute
|
||||
path, copy button) and how to set it in Steam.
|
||||
- Auto-capture preserves an unanalyzed crash (`diagnostic-crash.jsonl`) before starting a new
|
||||
capture, so relaunching the game can't wipe a crash report you haven't seen yet.
|
||||
### Fixed
|
||||
- `docs/MODULES.md` status column was stale — M1, M3, M4, M5, M8, M10, and M13 are done and now
|
||||
marked ✅ (only M2 and M11 remain not-started; M6/M9/M12 in progress).
|
||||
|
||||
## [0.15.0] - 2026-05-22
|
||||
### Added
|
||||
- **Hard-crash detection & recovery for the guided diagnostic.** If a focused capture ends
|
||||
without a clean stop (the recorder never wrote `session-stop` and isn't running), RigDoctor
|
||||
treats it as a likely hard freeze. On launch the **Games** page shows a warning banner —
|
||||
*"Your last diagnostic for <game> ended unexpectedly…"* — with **Analyze crash** / **Dismiss**.
|
||||
- **Deeper crash analysis.** *Analyze crash* combines the captured window (final readings before
|
||||
the freeze + any GPU-lost event) with a focused scan of the **previous (crashed) boot's kernel
|
||||
log** (`journalctl -k -b -1`: Xid/panic/OOM/MCE/AER/thermal) plus SMART/driver/persistence/
|
||||
live-temp checks — the full "what happened" picture. `core/diagnostic.py` gains
|
||||
`pending_crash()` / `analyze_crash()`; `health.check_previous_boot()` +
|
||||
`run_health_checks(include_journal=False)` back it.
|
||||
|
||||
## [0.14.0] - 2026-05-22
|
||||
### Changed
|
||||
- **Dashboard headline tiles are now history trend graphs** instead of single-value gauges —
|
||||
GPU temp, GPU load, CPU temp, and memory each plot their recent history (with the current
|
||||
value, window min/max, and a dashed warning-threshold line), so you can see changes over time
|
||||
rather than only the instantaneous reading. New `HistoryGraph` widget (QPainter, no new deps).
|
||||
|
||||
## [0.13.0] - 2026-05-22
|
||||
### Added
|
||||
- **Run Diagnostic now explains itself and can launch the game.** Clicking Run Diagnostic shows
|
||||
what to do — *play the game, reproduce the crash, then Finish & analyze* (and that data
|
||||
survives a hard freeze + reboot) — and offers **Launch game & start** (asks Steam to run it by
|
||||
appid) or **Start without launching**. The recording banner now spells out the next step
|
||||
instead of just showing a sample count.
|
||||
### Fixed
|
||||
- Button labels containing "&" (e.g. "Finish & analyze") rendered as "Finish _analyze" because
|
||||
Qt treated the "&" as a keyboard mnemonic — now escaped so the ampersand shows literally.
|
||||
|
||||
## [0.12.0] - 2026-05-22
|
||||
### Added
|
||||
- **Guided diagnostic in the GUI.** Each game on the **Games** page now has a **Run Diagnostic**
|
||||
button → a focused, game-tagged capture starts and a recording banner appears (live sample
|
||||
count, GPU-lost indicator) with **Finish & analyze** / **Discard**. Finishing opens a results
|
||||
dialog: the window-scoped capture summary (peak temps/power, events, last samples) plus the
|
||||
health findings as cards. The banner persists/restores if you navigate away and back while a
|
||||
capture is running. Shares `core/diagnostic.py` with the CLI (one flow, three front-ends).
|
||||
|
||||
## [0.11.0] - 2026-05-22
|
||||
### Added
|
||||
- **Guided diagnostic session (CLI) — the seed use case, end to end.** `rigdoctor diagnose
|
||||
start --game "<name>"` runs a **focused crash-capture tagged with that game** (its own
|
||||
diagnostic log, so the report is scoped to just that session), `diagnose status` shows
|
||||
progress, and `diagnose finish` stops it and prints a combined report: the **capture
|
||||
summary** (peak temps/power, GPU-lost events, last samples — M3) plus the **health findings**
|
||||
(Xid/SMART/driver/etc. — M4). The game can be given by `--game` or `--appid` (resolved from
|
||||
the Steam scan), and is recorded as a log event so it survives a crash + reboot.
|
||||
- Shared orchestration lives in `core/diagnostic.py` (one callable for CLI/GUI/tray, per
|
||||
ARCHITECTURE §7.1); the recorder/`record run` gained an optional `--game` tag.
|
||||
|
||||
## [0.10.2] - 2026-05-22
|
||||
### Changed
|
||||
- When an Environment **Apply**/**Install** fails, the status now shows the **real reason**
|
||||
(cancelled at the password prompt vs. the system rejecting the change, e.g. a BIOS/kernel-
|
||||
locked PCIe ASPM policy) instead of a vague "cancelled, or needs privileges".
|
||||
|
||||
## [0.10.1] - 2026-05-22
|
||||
### Fixed
|
||||
- **Environment-page contrast.** The combo-box **drop-down list** was rendering light-on-light
|
||||
(the popup view is a separate widget the theme didn't cover) — now dark with readable text.
|
||||
- The **Install / Apply** buttons on findings were hard to read (the accent fill didn't paint
|
||||
reliably inside the finding cards, leaving dim dark-on-dark text). They're now an outlined
|
||||
style — bright accent text on the dark card, filling accent on hover — readable regardless,
|
||||
and given a minimum height so the row can't crush them.
|
||||
|
||||
## [0.10.0] - 2026-05-22
|
||||
### Added
|
||||
- **Actionable Environment page (M6) — install & apply, not just advice.** Findings that
|
||||
recommend a tool or a setting are now one-click:
|
||||
- **Install buttons** for GameMode, MangoHud, and cpupower (added to the M9 component catalog,
|
||||
so they also appear on the **Setup** page with the existing installer).
|
||||
- **Apply controls** for runtime-reversible tunables — a dropdown of the live options + Apply,
|
||||
via a single pkexec prompt, no reboot: **CPU governor**, **NVIDIA persistence mode**,
|
||||
**PCIe ASPM policy**, **vm.swappiness**, **Transparent HugePages** (`core/fixes.py`). The
|
||||
chosen value is validated against the live options before anything runs.
|
||||
- This is the consent-gated apply milestone D9 anticipated, scoped to safe settings (**D22**).
|
||||
GRUB-based fixes and CPU mitigations stay suggestion-only; `rigdoctor gameenv` still prints
|
||||
the exact commands for headless use.
|
||||
### Changed
|
||||
- The `Finding` model gained optional `action` (installable component) and `fix` (applyable
|
||||
tunable) fields; the shared `finding_card` widget renders the matching control.
|
||||
|
||||
## [0.9.0] - 2026-05-22
|
||||
### Added
|
||||
- **Gaming environment checks (M6) — the evaluate-and-suggest engine.** A new read-only report
|
||||
(D9) that flags system settings which hurt gaming stability/performance and gives the exact fix
|
||||
command. Checks: **PCIe ASPM**, **NVIDIA persistence mode**, **CPU governor** (the three that
|
||||
map to the seed-case GPU bus-drop / Xid 79), GameMode, MangoHud, `vm.swappiness`, shader disk
|
||||
cache, Transparent HugePages, CPU mitigations, and installed Proton versions.
|
||||
- **CLI:** `rigdoctor gameenv` (text or `--json`).
|
||||
- **GUI:** a new **Environment** page (findings cards, auto-runs on open), reusing the M4
|
||||
health-report card style via a shared `finding_card` widget.
|
||||
### Fixed
|
||||
- **Notification icon** now uses the RigDoctor icon (matching the app/dock) instead of a generic
|
||||
stock icon — resolved from the installed icon theme, the bundled asset, then a stock fallback.
|
||||
|
||||
## [0.8.0] - 2026-05-22
|
||||
### Added
|
||||
- **Gaming environment checks (M6) — Steam game detection.** RigDoctor now finds your Steam
|
||||
libraries (across multiple drives, via `libraryfolders.vdf`) and the games installed in each
|
||||
(parsing `appmanifest_*.acf` — stdlib only, no Steam tooling needed). Runtimes, Proton builds,
|
||||
and redistributables are filtered out.
|
||||
- **Opt-in libraries:** detected libraries are listed with a per-library game count; you check
|
||||
the ones to scan. Nothing is scanned until you pick a library.
|
||||
- **Background scan on every launch:** the GUI rescans the selected libraries in the background
|
||||
when it opens and flags games installed since the last scan with a **NEW** badge plus a count
|
||||
on the **Games** sidebar item (cleared when you view the page). Results are cached
|
||||
(`~/.local/state/rigdoctor/games.json`) so the list shows instantly.
|
||||
- **CLI:** `rigdoctor games` lists detected games; `rigdoctor games libraries
|
||||
[--enable PATH | --disable PATH | --all]` lists/selects libraries (headless-complete, D17).
|
||||
- Config now supports list values (TOML arrays); `steam_libraries` records the selected libraries.
|
||||
|
||||
## [0.7.3] - 2026-05-21
|
||||
### Fixed
|
||||
- Shared terminal now has **scrollback** — large output (e.g. `ls -la`) can be scrolled up to
|
||||
read; it keeps a history buffer and only auto-scrolls to the bottom when you're already there.
|
||||
|
||||
## [0.7.2] - 2026-05-21
|
||||
### Changed
|
||||
- Removed the GUI **Inventory** tab — use the CLI `rigdoctor inventory` instead. (Inventory is
|
||||
still collected for the relay guest view, so a remote helper still sees the host's hardware.)
|
||||
### Fixed
|
||||
- Shared terminal caret now sits at the real cursor position (row **and** column) instead of
|
||||
the start of the line.
|
||||
|
||||
## [0.7.1] - 2026-05-21
|
||||
### Fixed
|
||||
- Shared terminal: a guest who joined **after** the host enabled the terminal stayed read-only.
|
||||
The host now re-sends the terminal state when a guest joins, so the terminal is available.
|
||||
- Inventory page no longer jumps back to the top when it refreshes (e.g. when elevated data
|
||||
arrives) — scroll position is preserved and unchanged data isn't re-rendered.
|
||||
- Shared terminal now follows the cursor to the bottom as output arrives (e.g. `ls -la`),
|
||||
instead of staying scrolled up.
|
||||
|
||||
## [0.7.0] - 2026-05-21
|
||||
### Added
|
||||
- **Shared terminal (M12, Tier 3)**: when the host enables it, the session shares a real **PTY**
|
||||
shell — the guest gets an interactive terminal (vim, top, tab-completion, Ctrl-C) running on
|
||||
the host as the host's user. The host **reads along** live and can type too, e.g. a `sudo`
|
||||
password — which stays local and is never sent to the guest. Off by default, host-consented.
|
||||
The guest also pulls the host's inventory on join.
|
||||
### Fixed
|
||||
- **Input contrast**: all form controls (text fields, spin boxes, combo boxes, terminals) now
|
||||
use the dark theme with readable text (Fusion defaulted them to light-on-light).
|
||||
|
||||
## [0.6.0] - 2026-05-21
|
||||
### Added
|
||||
- **Session sharing over the relay (M12)**: a **Share** tab — *Start shared session* (host)
|
||||
hands you a short code and streams a read-only live view; *Enter share code* (guest) joins
|
||||
someone else's session and views their sensors/health/inventory. Both connect outbound over
|
||||
WebSocket to the relay (`relay_url`, default `wss://rigdoctor.jesseyvanofferen.com`), gated
|
||||
by your Gitea access token — no port forwarding. Read-only.
|
||||
|
||||
## [0.5.0] - 2026-05-21
|
||||
### Added
|
||||
- **Session sharing (M12, Tier 2)**: `rigdoctor share serve` starts a **read-only** live view
|
||||
(sensors auto-refresh + health report + inventory) over a local HTTP server, gated by a
|
||||
random share token. Bind to localhost for local testing, or to all interfaces behind a
|
||||
user-chosen tunnel (Tailscale/cloudflared/SSH) for remote help. No actions, no terminal.
|
||||
(Tier 1 export and Tier 3 gated terminal still to come — D16.)
|
||||
|
||||
## [0.4.1] - 2026-05-21
|
||||
### Fixed
|
||||
- Checkbox contrast: a checked checkbox is now a clear accent-filled box with a checkmark
|
||||
(was hard to tell checked from unchecked on the dark theme).
|
||||
|
||||
## [0.4.0] - 2026-05-21
|
||||
### Added
|
||||
- **Alerts (M8)**: desktop notifications (via `notify-send`) for **overheat** (GPU/CPU past a
|
||||
threshold), **GPU-lost** (nvidia-smi timeout), and a **new version available** (fired once
|
||||
per version). Edge-triggered with a cooldown so it doesn't spam. Degrades gracefully if
|
||||
`notify-send` isn't installed.
|
||||
- **Notifications page**: configure alerts (enable/disable, GPU/CPU temperature thresholds)
|
||||
with a "Send test" button; changes apply live and persist to `config.toml`.
|
||||
- **App icon**: ships a RigDoctor icon and shows it in the dock/launcher. The GUI
|
||||
**self-registers** the icon + `.desktop` on launch (and sets the Wayland app-id), so a
|
||||
self-update + relaunch picks it up — no need to re-run the installer.
|
||||
|
||||
## [0.3.2] - 2026-05-21
|
||||
### Changed
|
||||
- Replaced the per-page "Run with admin" buttons with a **single password prompt at launch**
|
||||
(`pkexec`): the GUI collects root-only data (SMART + dmidecode board/BIOS/RAM) once and
|
||||
caches it for the session, so Health and Inventory always show the full picture. Falls back
|
||||
to non-root if cancelled/unavailable; disable via `elevate_on_launch = false`.
|
||||
|
||||
## [0.3.1] - 2026-05-21
|
||||
### Fixed
|
||||
- Changelog/release notes now **render Markdown** instead of showing raw `#`/`**` markup —
|
||||
|
||||
@@ -1,132 +1,137 @@
|
||||
# RigDoctor
|
||||
|
||||
A **modular diagnostics, monitoring, and health-check toolkit for Linux gamers.**
|
||||
**Hardware monitoring & crash diagnostics for Linux gamers.** Live sensors, crash-safe
|
||||
logging, plain-language health reports, per-game diagnostics, and optional AI explanations —
|
||||
in a desktop app, a tray applet, or the terminal. Ubuntu/Debian + NVIDIA first.
|
||||
|
||||
> **Status:** 🟢 Phase 1 (MVP) complete. The **sensor core (M1)**, **crash-capture logger
|
||||
> (M3)**, and **health report (M4)** all work — live `snapshot`/`monitor`, crash-safe `record`
|
||||
> with a post-crash report, and `report` to scan logs/SMART/driver for likely causes. A
|
||||
> desktop GUI (M10) ties them together (dashboard, recording, health). See `docs/ROADMAP.md`.
|
||||
Linux gaming faults are hard to pin down — GPUs falling off the PCIe bus, black screens
|
||||
mid-game, silent thermal/VRAM throttling, driver/Proton mismatches. The useful data is
|
||||
scattered across `nvidia-smi`, `/sys`, `journalctl`, and SMART, and the readings right before a
|
||||
freeze are usually lost. RigDoctor pulls it together and keeps the evidence.
|
||||
|
||||
## Why this exists
|
||||
## Features
|
||||
|
||||
Linux gaming hardware faults are hard to diagnose: GPUs falling off the PCIe bus, the screen
|
||||
suddenly going black mid-game, silent thermal/VRAM throttling, power transients,
|
||||
driver/library mismatches, Proton quirks, and CPU governor / power-profile misconfiguration.
|
||||
The data needed to diagnose them is scattered across `nvidia-smi`, `/sys/class/hwmon`,
|
||||
`journalctl`, SMART, and more — and the most useful readings (the ones right before a hard
|
||||
freeze) are usually lost because nothing flushed them to disk.
|
||||
- **Live monitoring** — a dark desktop **dashboard** (history graphs + per-subsystem cards), a
|
||||
**tray applet** with at-a-glance status, and a terminal view (`rigdoctor monitor`).
|
||||
- **Crash-safe recording** — background logger that `fsync`s every sample, so the state right
|
||||
before a hard freeze survives. Manual, always-on, or auto-start when a game launches.
|
||||
- **Health report** — scans `journalctl`/SMART/driver for likely causes (Xid, OOM, disk
|
||||
errors, throttling…) and explains them with suggested fixes.
|
||||
- **Per-game diagnostics** — pick a game, capture while you play, get a focused report; hard
|
||||
crashes are detected and analysed on next launch.
|
||||
- **Gaming tune-ups** — flags risky settings (CPU governor, PCIe ASPM, persistence mode…) with
|
||||
**one-click, reversible fixes**.
|
||||
- **Proactive alerts** — desktop notifications on overheating and critical kernel events
|
||||
(GPU-lost, Xid, out-of-memory, disk I/O).
|
||||
- **AI explanations** *(optional, opt-in)* — explain a diagnostic in plain language with a
|
||||
**local model (Ollama)** or **Claude**, or **import a Windows crash dump (`.dmp`)** from a
|
||||
Proton game and have it parsed and analysed. Never automatic; only when you press the button.
|
||||
- **Shareable reports** — zip a diagnostic (logs, inventory, AI transcript) to hand to someone,
|
||||
or share a live **terminal session** for remote help.
|
||||
- **Self-updating** — `apt upgrade`, or the in-app updater.
|
||||
|
||||
RigDoctor pulls all of that into one modular tool: live monitoring, crash-safe logging, a
|
||||
one-shot health report, and an interactive installer that only sets up the modules a given
|
||||
user actually needs for their hardware.
|
||||
## Screenshots
|
||||
|
||||
**Seed use cases:** an RTX 3070 that intermittently "falls off the bus" under heavy GPU load
|
||||
(Path of Exile on Linux, Escape from Tarkov on Windows), and a monitor going black mid-game.
|
||||
See `docs/SPEC.md` §1.
|
||||
| Dashboard | Inventory |
|
||||
|---|---|
|
||||
|  |  |
|
||||
|
||||
## How you run it
|
||||
**Share** — a read-only or interactive terminal session over the relay, for remote help:
|
||||
|
||||
RigDoctor is **GUI-first** — the desktop app is the primary way in — but every feature is
|
||||
also available headless:
|
||||
- **Desktop GUI** — graphical dashboard, recording controls, log browser, reports. The
|
||||
default interface for most users.
|
||||
- **Tray applet** — a small top-menu-bar applet with quick actions and at-a-glance status.
|
||||
- **CLI** — full functionality from the terminal; works over SSH and in scripts.
|
||||

|
||||
|
||||
The GUI/tray are optional modules; a headless (CLI-only) install loses no capability.
|
||||
## Install
|
||||
|
||||
## Key decisions (settled)
|
||||
### Debian / Ubuntu — `.deb`
|
||||
|
||||
| Topic | Decision |
|
||||
|-------|----------|
|
||||
| Name | **RigDoctor** |
|
||||
| Language / stack | **Python 3 + Qt (PySide6)** — core/CLI/daemon stdlib-only; Qt only for GUI/tray |
|
||||
| Primary distro | **Ubuntu** (Debian via apt); others best-effort later |
|
||||
| Primary GPU | **NVIDIA** first; AMD, then Intel later |
|
||||
| MVP | **Sensor core + crash logger + health report** (NVIDIA-only, CLI-first) |
|
||||
| Distribution | **User-local install** (self-updating from the public repo, no root); **`.deb`** optional |
|
||||
| Scope of action | **Read-only + suggestions** (no auto-apply yet) |
|
||||
| Stress tests | **Out of scope** |
|
||||
|
||||
Full rationale and the still-open questions are in `docs/DECISIONS.md`.
|
||||
|
||||
## Repo layout
|
||||
|
||||
| Path | Purpose |
|
||||
|------|---------|
|
||||
| `docs/SPEC.md` | Product specification — vision, requirements, modules (the main planning doc) |
|
||||
| `docs/ARCHITECTURE.md` | Technical design — core engine, front-ends, daemon, installer |
|
||||
| `docs/MODULES.md` | Catalog of modules with scope, dependencies, status |
|
||||
| `docs/ROADMAP.md` | Phased milestones |
|
||||
| `docs/DECISIONS.md` | Decision log + remaining open questions |
|
||||
| `src/rigdoctor/` | Source code — `core/` engine + sources, `cli.py`, `render.py` |
|
||||
| `installer/` | Installer / `.deb` packaging (empty until Phase 4) |
|
||||
| `tests/` | Tests (stdlib `unittest`) |
|
||||
|
||||
## Install (user-local, no root)
|
||||
|
||||
RigDoctor installs into a private venv under `~/.local` — no root, self-updating:
|
||||
The simplest path: grab the latest **`rigdoctor_<version>_all.deb`** from the
|
||||
[releases page](https://git.jesseyvanofferen.com/jessey/rigdoctor/releases) and install it —
|
||||
apt pulls the GUI dependencies (PySide6, pyte) automatically:
|
||||
|
||||
```bash
|
||||
./install.sh # from a source checkout or the self-extracting .run
|
||||
./install.sh --ref v0.0.6 # install a specific released tag (needs a token)
|
||||
./install.sh --uninstall # remove it
|
||||
sudo apt install ./rigdoctor_*_all.deb # CLI only: add --no-install-recommends
|
||||
```
|
||||
|
||||
This adds `rigdoctor` / `rigdoctor-gui` to `~/.local/bin` and a desktop entry. Each release
|
||||
also ships a one-file **`.run`** installer (download, `chmod +x`, run). Updates are gated to
|
||||
accounts on the Git server (a Personal Access Token); save one via the GUI **Setup → Update
|
||||
access** panel or `rigdoctor login`, then `rigdoctor update` (or the sidebar button).
|
||||
|
||||
## Run it (dev)
|
||||
|
||||
Stdlib-only, no install needed (target is Python ≥ 3.11; tested on 3.14):
|
||||
**Or add the apt repository** for `apt install` + automatic updates (the registry is public and
|
||||
GPG-signed — no token needed):
|
||||
|
||||
```bash
|
||||
PYTHONPATH=src python3 -m rigdoctor snapshot # one-shot sensor read
|
||||
PYTHONPATH=src python3 -m rigdoctor snapshot --json
|
||||
PYTHONPATH=src python3 -m rigdoctor monitor -n 1 # live view (Ctrl-C to quit)
|
||||
PYTHONPATH=src python3 -m rigdoctor sources # list detected sensor sources
|
||||
PYTHONPATH=src python3 -m unittest discover -s tests
|
||||
sudo curl https://git.jesseyvanofferen.com/api/packages/jessey/debian/repository.key -o /etc/apt/keyrings/gitea-jessey.asc
|
||||
echo "deb [arch=all signed-by=/etc/apt/keyrings/gitea-jessey.asc] https://git.jesseyvanofferen.com/api/packages/jessey/debian stable main" | sudo tee /etc/apt/sources.list.d/gitea.list
|
||||
sudo apt update
|
||||
sudo apt install rigdoctor
|
||||
```
|
||||
|
||||
### Crash-capture logger (M3)
|
||||
Then `sudo apt upgrade` keeps it current.
|
||||
|
||||
A crash-safe background logger (JSONL, `fsync` per sample, bounded by rotation) for catching
|
||||
the state right before a freeze:
|
||||
Then `sudo apt upgrade` keeps it current.
|
||||
|
||||
### Any distro — self-extracting `.run` (no root)
|
||||
|
||||
Download **`rigdoctor-<version>-installer.run`** from the releases page and run it. It installs
|
||||
into a private virtualenv under `~/.local` (no root), adds the launchers + desktop entry, and
|
||||
opens the first-run setup wizard:
|
||||
|
||||
```bash
|
||||
rigdoctor record start # start logging in the background
|
||||
rigdoctor record status # is it running? latest readings, sample count
|
||||
rigdoctor record stop # stop it
|
||||
rigdoctor record report # post-crash summary: peaks, events, last samples
|
||||
rigdoctor record run # run in the foreground (the systemd-ready entrypoint)
|
||||
sh rigdoctor-*-installer.run
|
||||
```
|
||||
|
||||
Logs live in `~/.local/share/rigdoctor/logs/`. It detects GPU "lost"/hang (nvidia-smi query
|
||||
timeout) and writes an event marker. Trigger modes (always-on / game-launch) and the
|
||||
`systemd --user` service arrive in Phase 4.
|
||||
### Updating & removing
|
||||
|
||||
### Desktop GUI (M10)
|
||||
- **`.deb`:** `sudo apt upgrade` (or reinstall a newer `.deb`).
|
||||
- **`.run` / user-local:** the in-app **Update** button, or `rigdoctor update`.
|
||||
- **Remove:** `sudo apt remove rigdoctor`, or `rigdoctor uninstall` for the user-local install.
|
||||
|
||||
The GUI uses PySide6 (Qt) — the only part of RigDoctor that needs a non-stdlib dep:
|
||||
## Using it
|
||||
|
||||
Launch **RigDoctor** from your app menu, or:
|
||||
|
||||
```bash
|
||||
pip install -e '.[gui]' # core + PySide6, gives `rigdoctor` and `rigdoctor-gui`
|
||||
rigdoctor gui # or: rigdoctor-gui
|
||||
rigdoctor-gui # desktop app (+ tray)
|
||||
rigdoctor --help # everything from the terminal (works over SSH)
|
||||
```
|
||||
|
||||
It opens a dark-themed window with sidebar navigation and a **live dashboard** over the
|
||||
same sensor core — circular gauges for the headline metrics plus collapsible per-subsystem
|
||||
cards (GPU/CPU/memory/storage) with temperature-colored values (icey-blue → green → red).
|
||||
The **Logs** and **Health** sections are full pages (recording controls + post-crash report;
|
||||
and the kernel-log / SMART / driver scan). **Inventory** is a placeholder until M5 lands.
|
||||
Handy CLI commands:
|
||||
|
||||
Without the GUI extra, `pip install -e .` gives just the stdlib-only CLI.
|
||||
```bash
|
||||
rigdoctor snapshot # one-shot reading of every sensor
|
||||
rigdoctor monitor # live terminal dashboard
|
||||
rigdoctor report # health report (logs / SMART / driver)
|
||||
rigdoctor diagnose start|finish # capture while gaming, then analyse
|
||||
rigdoctor gameenv # flag risky gaming settings + fixes
|
||||
rigdoctor inventory # hardware/OS inventory
|
||||
rigdoctor ai explain # AI explanation of the current findings (opt-in)
|
||||
rigdoctor bundle # zip the latest diagnostic into a shareable report
|
||||
```
|
||||
|
||||
## Start here
|
||||
## Requirements
|
||||
|
||||
1. Read `docs/SPEC.md` for what we're building.
|
||||
2. Read `docs/ROADMAP.md` for the build order (Phase 1 = the MVP).
|
||||
3. Read `docs/DECISIONS.md` for the settled decisions (D1–D15).
|
||||
</content>
|
||||
- **Linux** — Ubuntu/Debian first-class (the `.deb`); the `.run` works on any distro with
|
||||
Python ≥ 3.11.
|
||||
- **GPU** — NVIDIA fully supported (via `nvidia-smi`); AMD/Intel sensors are best-effort.
|
||||
- **CLI/daemon** need only Python 3 (stdlib). The **GUI/tray** add **PySide6** (`python3-pyside6`).
|
||||
- Optional tools unlock more: `smartmontools`, `lm-sensors`, `gamemode`, `mangohud`. The setup
|
||||
wizard offers to install them.
|
||||
|
||||
## Privacy
|
||||
|
||||
Everything stays on your machine — no telemetry, no phone-home. The AI assistant is **off by
|
||||
default** and runs only when you explicitly trigger it; with Ollama nothing leaves the machine,
|
||||
and the Claude option asks before sending. Reports are local files; they leave only if you share
|
||||
the zip.
|
||||
|
||||
## Development
|
||||
|
||||
RigDoctor's core is stdlib-only Python; the GUI/tray use PySide6.
|
||||
|
||||
```bash
|
||||
git clone https://git.jesseyvanofferen.com/jessey/rigdoctor && cd rigdoctor
|
||||
pip install -e ".[gui]" # core + GUI; omit [gui] for CLI-only
|
||||
python -m unittest discover -s tests # run the test suite
|
||||
PYTHONPATH=src python3 -m rigdoctor snapshot # run without installing
|
||||
```
|
||||
|
||||
Design docs live in `docs/` — `SPEC.md` (vision/requirements), `ARCHITECTURE.md`,
|
||||
`MODULES.md` (module catalog), `ROADMAP.md`, and `DECISIONS.md` (the decision log).
|
||||
Contributions: branch off `main`, keep tests green (CI runs them on PRs), and bump the version
|
||||
+ `CHANGELOG.md` for shipped changes.
|
||||
|
||||
|
After Width: | Height: | Size: 42 KiB |
@@ -0,0 +1,17 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512">
|
||||
<defs>
|
||||
<radialGradient id="bg" cx="50%" cy="42%" r="78%">
|
||||
<stop offset="0%" stop-color="#1b2230"/>
|
||||
<stop offset="100%" stop-color="#0d0f13"/>
|
||||
</radialGradient>
|
||||
</defs>
|
||||
<rect width="512" height="512" fill="url(#bg)"/>
|
||||
<!-- gauge ring -->
|
||||
<circle cx="256" cy="256" r="168" fill="none" stroke="#2a2f39" stroke-width="28"/>
|
||||
<!-- accent sweep -->
|
||||
<path d="M256 88 a168 168 0 1 1 -118.8 49.2" fill="none" stroke="#38bdf8"
|
||||
stroke-width="28" stroke-linecap="round"/>
|
||||
<!-- heartbeat / monitoring trace -->
|
||||
<path d="M120 264 H200 L232 192 L280 336 L312 264 H392" fill="none" stroke="#e6e8eb"
|
||||
stroke-width="28" stroke-linecap="round" stroke-linejoin="round"/>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 798 B |
|
After Width: | Height: | Size: 171 KiB |
|
After Width: | Height: | Size: 141 KiB |
|
After Width: | Height: | Size: 78 KiB |
@@ -223,9 +223,62 @@ The next version is **determined by the Conventional Commit types** since the la
|
||||
`packaging/bump.sh` writes it into `__init__.py` + `pyproject.toml`. Rules live in
|
||||
`cliff.toml [bump]` (pre-1.0: `breaking_always_bump_major = false`).
|
||||
|
||||
### D22 — Limited live apply of fixes (M6) — *DECIDED 2026-05-22; realizes the D9 milestone*
|
||||
D9 deferred auto-applying fixes to "a deliberate later milestone, gated behind explicit user
|
||||
consent." That milestone lands here, **scoped tightly to stay safe**:
|
||||
- **Only runtime-reversible settings** are applyable from the gaming-environment report (M6):
|
||||
**CPU governor, NVIDIA persistence mode, PCIe ASPM policy, vm.swappiness, Transparent
|
||||
HugePages.** Each takes effect immediately, needs **no reboot**, and reverts on reboot.
|
||||
- **How:** a dropdown of the live options + an Apply button per finding (`core/fixes.py`).
|
||||
Applying runs a **single pkexec-elevated command** (one auth prompt); the chosen value is
|
||||
validated against the live options first; writes target **sysfs/procfs or `nvidia-smi`** —
|
||||
never the GRUB cmdline or a persistent config file.
|
||||
- **Still suggestion-only** (the read-only stance holds for these): GRUB-based `pcie_aspm=off`,
|
||||
CPU **mitigations** changes (security-sensitive, need a reboot), and the shader-cache env var.
|
||||
- Everything remains **CLI-discoverable** (`rigdoctor gameenv` still prints the exact commands);
|
||||
the apply UI is an additive convenience in the GUI, not the only path. Installing optional
|
||||
tools (GameMode/MangoHud/cpupower) reuses the M9 installer and is likewise one-click.
|
||||
|
||||
### D23 — Session sharing scoped to a shared terminal only — *DECIDED 2026-05-22; amends D16*
|
||||
D16's escalating ladder (export → read-only stats view → terminal) is **cut down to just the
|
||||
shared terminal.** Rationale: the terminal is the only mode the owner wants; the stats view
|
||||
duplicated what the GUI already shows and added surface area. Concretely:
|
||||
- **Removed:** the read-only stats view + its HTTP server (`core/share.py`, `rigdoctor share
|
||||
serve`) and the (never-built) bundle export. The `share` CLI command is gone.
|
||||
- **Kept & finished:** the relay **shared terminal** (host PTY of `$SHELL`) — now color-rendered
|
||||
(preserves fish/ls/git theming), full-screen-able, with the guest read-only unless the host
|
||||
ticks "Allow the guest to type" (the D9 consent exception). Account-gated by the Gitea token.
|
||||
|
||||
### D24 — AI assistant module (M14) — *DECIDED 2026-05-22; adds to D14*
|
||||
A new optional module that **explains the collected diagnostics in plain language** (likely
|
||||
root cause + suggested next steps). Adds M14 to the D14 set.
|
||||
- **Strictly opt-in, never automatic.** The model is contacted **only** on an explicit user
|
||||
action (an "Explain with AI" button / `rigdoctor ai explain`) — never on launch, after a
|
||||
diagnostic, in the sample/record loop, or in the background. **Configuring** a provider does
|
||||
not trigger any call.
|
||||
- **Local-first.** Defaults to a local **Ollama** server (data never leaves the machine, no
|
||||
key, stdlib `urllib`). An **OpenAI-compatible** endpoint (cloud or local) can be used with a
|
||||
key (stored in the keyring like the update token). Cloud use shows a "this sends your data to
|
||||
X" consent before the first call.
|
||||
- **Grounded & advisory.** The prompt carries only the findings we collected; output is framed
|
||||
as suggestions (consistent with D9 — it explains/recommends, applying fixes stays
|
||||
consent-gated). No new runtime dependency (HTTP via stdlib).
|
||||
|
||||
### D25 — Logging & report bundles (M15) — *DECIDED 2026-05-22*
|
||||
Opt-in logging + shareable diagnostic reports.
|
||||
- **One combined `logging_enabled` toggle** (default off) controls both application logging
|
||||
(rotating `app.log`) and per-diagnostic storage. Kept as a single switch for simplicity.
|
||||
- **Each diagnostic is stored in its own directory** (`DATA_DIR/diagnostics/<id>/`): capture
|
||||
log, structured `result.json`, human-readable `report.txt`, a scoped game-log snapshot, and an
|
||||
`ai/` folder recording each AI interaction (**exact data sent, provider+model, and the reply**).
|
||||
- **"Report"** zips one diagnostic directory (plus `app.log`) into `DATA_DIR/reports/` —
|
||||
auto-saved there (no save dialog), shown with its path. Available only when logging is on
|
||||
(nothing is stored otherwise). CLI: `rigdoctor bundle`.
|
||||
- Everything stays local; the report only leaves the machine if the user shares the zip.
|
||||
|
||||
## Open
|
||||
|
||||
None currently — all tracked decisions (D1–D21) are resolved. New questions will be added
|
||||
None currently — all tracked decisions (D1–D25) are resolved. New questions will be added
|
||||
here as they arise. Remaining detail to flesh out during build: the tray's supporting-action
|
||||
set (D13), per-module apt package names, M12's tunnel/token specifics, and M13's
|
||||
update mechanism (APT repo vs. self-installed `.deb`).
|
||||
|
||||
@@ -2,24 +2,27 @@
|
||||
|
||||
Status: ⬜ not started · 🟦 designing · 🟨 in progress · ✅ done
|
||||
|
||||
> Module set per D14, plus **M12 (session sharing, D16)** and **M13 (auto-update, D18)**.
|
||||
> Module set per D14, plus **M12 (session sharing, D16)**, **M13 (auto-update, D18)**,
|
||||
> **M14 (AI assistant, D24)**, and **M15 (logging & reports, D25)**.
|
||||
> **M7 (stress/repro) was dropped (D7).** M10/M11 are the GUI and tray modules (D10/D11).
|
||||
> GPU scope reads "all (NVIDIA first)" — NVIDIA first, others via the vendor abstraction (D4).
|
||||
|
||||
| ID | Module | Bundle | Key deps | GPU scope | Priority | Status |
|
||||
|----|--------|--------|----------|-----------|----------|--------|
|
||||
| M1 | Sensor core | Essential | none (nvidia-smi, sysfs) | all (NVIDIA first) | P0 | ⬜ |
|
||||
| M3 | Crash-capture logger | Essential | none (opt: smartmontools) | all (NVIDIA first) | P0 | 🟨 |
|
||||
| M4 | Health report (log scan) | Essential | none (opt: smartmontools) | all (NVIDIA first) | P0 | 🟨 |
|
||||
| M2 | Live monitor (TUI) | Monitoring | none (stdlib curses) | all | P1 | ⬜ |
|
||||
| M8 | Alerting | Monitoring | libnotify (opt) | all | P2 | ⬜ |
|
||||
| M5 | System inventory | Diagnostics | none (opt: lm-sensors, dmidecode) | all | P1 | 🟨 |
|
||||
| M6 | Gaming env checks | Diagnostics | none | all | P2 | ⬜ |
|
||||
| M10 | Desktop GUI | Desktop UI | **python3-pyside6** | all | P2 | 🟨 |
|
||||
| M11 | Tray / menu-bar applet | Desktop UI | **python3-pyside6** (+ AppIndicator on GNOME) | all | P2 | ⬜ |
|
||||
| M9 | Installer | (meta) | none | all | P1 | 🟨 |
|
||||
| M12 | Session sharing / remote assist | Sharing | none (Tier 3: tmate/sshx) | all | P3 | ⬜ |
|
||||
| M13 | Auto-update | (core) | none (stdlib; user-local file swap) | all | P3 | 🟨 |
|
||||
| M1 | Sensor core | Essential | none (nvidia-smi, sysfs) | all (NVIDIA first) | P0 | ✅ |
|
||||
| M3 | Crash-capture logger | Essential | none (opt: smartmontools) | all (NVIDIA first) | P0 | ✅ |
|
||||
| M4 | Health report (log scan) | Essential | none (opt: smartmontools) | all (NVIDIA first) | P0 | ✅ |
|
||||
| M2 | Live monitor (TUI) | Monitoring | none (stdlib curses) | all | P1 | ✅ |
|
||||
| M8 | Alerting | Monitoring | libnotify (opt) | all | P2 | ✅ |
|
||||
| M5 | System inventory | Diagnostics | none (opt: lm-sensors, dmidecode) | all | P1 | ✅ |
|
||||
| M6 | Gaming env checks | Diagnostics | none | all | P2 | 🟨 |
|
||||
| M10 | Desktop GUI | Desktop UI | **python3-pyside6** | all | P2 | ✅ |
|
||||
| M11 | Tray / menu-bar applet | Desktop UI | **python3-pyside6** (+ AppIndicator on GNOME) | all | P2 | ✅ |
|
||||
| M9 | Installer (+ `.deb`) | (meta) | none | all | P1 | ✅ |
|
||||
| M12 | Session sharing (shared terminal) | Sharing | none (relay) | all | P3 | ✅ |
|
||||
| M13 | Auto-update | (core) | none (stdlib; user-local file swap) | all | P3 | ✅ |
|
||||
| M14 | AI assistant (explain diagnostics) | (optional) | none (stdlib urllib; Ollama or Claude) | all | P3 | ✅ |
|
||||
| M15 | Logging & report bundles | (core) | none (stdlib logging + zip) | all | P3 | ✅ |
|
||||
| ~~M7~~ | ~~Stress / repro~~ | — | — | — | — | ❌ dropped (D7) |
|
||||
|
||||
## Notes per module
|
||||
@@ -31,28 +34,62 @@ Status: ⬜ not started · 🟦 designing · 🟨 in progress · ✅ done
|
||||
*Implemented (manual trigger):* JSONL log with fsync-per-sample, size-based rotation
|
||||
(`log_max_bytes`/`log_backups`), GPU-lost/recovered event markers, atomic status file, and
|
||||
`rigdoctor record run|start|stop|status|report`. The foreground `run` is the systemd-ready
|
||||
entrypoint; the service unit + always-on/game-launch triggers (D6/D12) land in Phase 4.
|
||||
Also fully driven from the GUI's Recording/Logs page (M10) via shared `core.reccontrol`.
|
||||
entrypoint. The **game-launch trigger** is implemented via the D12 wrapper (`rigdoctor wrap
|
||||
%command%`, see M6/below); the `systemd --user` service unit + always-on trigger (D6) and the
|
||||
zero-config watcher (D12) are still pending. Also fully driven from the GUI's Recording/Logs
|
||||
page (M10) via shared `core.reccontrol`.
|
||||
- **M4 Health report** — turns scattered logs into a prioritized, plain-language findings
|
||||
list with **suggested** fixes (read-only, D9). Reuses M1 for a live snapshot. Also powers
|
||||
the **guided diagnostic session** (with M3): pick a game → focused capture → scan →
|
||||
findings (see SPEC §4). *Implemented:* journalctl scan (Xid/panic/OOM/MCE/AER/thermal/amdgpu),
|
||||
SMART, NVIDIA driver-mismatch, journald-persistence + live-temp checks; `rigdoctor report`
|
||||
(text/JSON) + GUI Health tab. GPU-firmware verification deferred.
|
||||
- **M2 Live monitor** — depends on M1; the terminal "HWMonitor for Linux" face. Stdlib-only.
|
||||
- **M2 Live monitor** — the terminal "HWMonitor for Linux" face. *Implemented (`tui.py`):*
|
||||
`rigdoctor monitor` is a stdlib **curses** dashboard — current / session-min / session-max
|
||||
per sensor, grouped by subsystem, with temperature & utilization color bands; `q` quits,
|
||||
`r` resets the min/max. Falls back to a plain redraw on a non-TTY (`--plain` forces it).
|
||||
- **M5 / M6 Diagnostics** — inventory export + gaming-env checks; M6 flags risky settings and
|
||||
suggests the fix command but does not apply it (D9).
|
||||
suggests the fix command but does not apply it (D9). *M6 implemented (Steam detection first —
|
||||
the D12 "pick a game" foundation):* discovers Steam installs + all library folders
|
||||
(`libraryfolders.vdf`, multi-drive) and the games in each (`appmanifest_*.acf`), filtering
|
||||
runtimes/Proton/redistributables — stdlib only. **Libraries are opt-in** (`steam_libraries`
|
||||
config); the GUI **Games** page lists them with per-library counts and rescans in the
|
||||
background on every launch, badging games installed since the last scan (cached in
|
||||
`state/games.json`). CLI: `rigdoctor games` / `games libraries [--enable|--disable|--all]`.
|
||||
*Env-check engine implemented* (`core/gameenv.py`): a read-only findings report (reusing the
|
||||
M4 `Finding` model) over PCIe ASPM, NVIDIA persistence mode, CPU governor (the three seed-case
|
||||
contributors to GPU bus-drop / Xid 79), GameMode, MangoHud, swappiness, shader cache, THP, CPU
|
||||
mitigations, and installed Proton versions — each with the suggested fix command. CLI
|
||||
`rigdoctor gameenv`; GUI **Environment** page. Per **D22**, the GUI adds **one-click apply**
|
||||
for the runtime-reversible tunables (governor / NVIDIA persistence / PCIe ASPM / swappiness /
|
||||
THP — dropdown + Apply via a single pkexec prompt, `core/fixes.py`) and **one-click install**
|
||||
of optional tools (GameMode / MangoHud / cpupower, now in the M9 catalog). GRUB/mitigations
|
||||
stay suggestion-only. *Guided diagnostic (D12 "pick a game", `core/diagnostic.py`):* a focused
|
||||
capture tagged with a game → window-scoped report (capture summary + M4 findings), in the CLI
|
||||
(`rigdoctor diagnose start/status/finish`) and GUI (per-game **Run Diagnostic** → recording
|
||||
banner → results dialog). **Auto-capture** via the D12 wrapper (`rigdoctor wrap %command%`,
|
||||
`core/wrap.py`; GUI "Auto-capture…" helper). **Hard crashes are detected** (capture left
|
||||
without a clean stop) and flagged on next launch with a crash-boot kernel-log analysis
|
||||
(`pending_crash`/`analyze_crash` + `health.check_previous_boot`). **Non-Steam launchers**
|
||||
(Lutris SQLite + Heroic JSON, `core/launchers.py`) are detected and listed alongside Steam
|
||||
games; env checks also cover **GPU PowerMizer** (X), **Wine** and **Steam-client** versions.
|
||||
*Pending:* the zero-config watcher (D12 fallback) — landing with M9's trigger-mode work.
|
||||
- **M8 Alerting** — threshold/event notifications; integrates with the tray applet (M11).
|
||||
- **M10 Desktop GUI** — PySide6 graphical front-end over the core engine (dashboard, log
|
||||
browser, report viewer, logger controls). Optional; adds the Qt dependency. *Bootstrapped
|
||||
early (ahead of its Phase 4 slot) at the user's request:* dark-themed window with sidebar
|
||||
nav, a live dashboard (circular gauges + collapsible per-subsystem cards, temperature-
|
||||
colored values), and a **Recording/Logs page** with full M3 controls (start/stop/status +
|
||||
post-crash report). Health/Inventory remain placeholders until M4/M5. GUI-first per D17.
|
||||
- **M11 Tray applet** — `QSystemTrayIcon` menu-bar applet. Dropdown shows live M1 readouts
|
||||
(CPU temp, GPU temp, memory used/total, status dot) and is led by a **Run Diagnostic**
|
||||
action (the guided diagnostic session), plus Open dashboard / Start-Stop recording /
|
||||
Snapshot / Quit (D13). Optional; shares the Qt dependency with M10.
|
||||
- **M10 Desktop GUI** — PySide6 graphical front-end over the core engine. Optional; adds the
|
||||
Qt dependency. Dark-themed window with a **grouped sidebar** (Monitor / Diagnose / System /
|
||||
App) over: **Dashboard** (live history graphs + per-subsystem cards), **Games** (M6 detection
|
||||
+ Run Diagnostic), **Recordings** (recorder controls + view/report any captured log + analyze
|
||||
a crash), **System Health** (M4 scan), **Tuning** (M6 gaming tunables + fixes), **Inventory**
|
||||
(M5), **Settings** (components/deps + alerts + account + uninstall), and **Share** (M12). A
|
||||
global recording badge shows on every page. GUI-first per D17.
|
||||
- **M11 Tray applet** — `QSystemTrayIcon` menu-bar applet. *Implemented (`gui/tray.py`, D13):*
|
||||
the menu shows live M1 readouts (CPU temp, GPU temp, memory used/total) + a status line
|
||||
(Normal / Hot / GPU not responding), led by a **Run Diagnostic** submenu (per detected game →
|
||||
the guided session), plus Open dashboard / Start-Stop recording / Snapshot-copy / Quit. It
|
||||
shares the dashboard's sample stream (no extra sampling) and drives the existing MainWindow
|
||||
flows. With a tray present, closing the window **hides to the tray** (Quit exits); `rigdoctor-gui
|
||||
--tray` starts hidden for autostart. Optional; shares the Qt dependency with M10. *Needs a tray
|
||||
host* — on GNOME that means the AppIndicator extension; degrades to no-op if none is available.
|
||||
- **M9 Installer** — interactive wizard layered on the `.deb` (D8); apt-first dependency
|
||||
resolution; enables the logger service and trigger mode. *Implemented (first cut):* distro/
|
||||
package-manager/GPU detection (`core/sysenv`), an optional-component catalog (`core/catalog`),
|
||||
@@ -62,12 +99,13 @@ Status: ⬜ not started · 🟦 designing · 🟨 in progress · ✅ done
|
||||
**`.run`** (pure-Python self-extractor, `packaging/make_run.py`, built by CI). *Pending:*
|
||||
config/module selection + `systemd --user`
|
||||
service enable.
|
||||
- **M12 Session sharing / remote assist** (D16) — let a helper inspect a user's machine, in
|
||||
an escalating ladder: (1) **diagnostic bundle export** (inventory + recent log + report,
|
||||
one-way), (2) **live read-only view** over a user-chosen tunnel (Tailscale/cloudflared/SSH,
|
||||
no hosted relay), (3) **gated interactive terminal** wrapping tmate/sshx (read-only by
|
||||
default; read-write only on explicit consent — a deliberate exception to D9). Per-session
|
||||
consent, ephemeral revocable tokens, audit log.
|
||||
- **M12 Session sharing / remote assist** (D16, scoped to terminal-only by **D23**) — a single
|
||||
mode: a **host-consented shared terminal** over the relay. The host shares a real PTY running
|
||||
their `$SHELL` (colors/theming preserved — fish etc.); the guest watches live and can type
|
||||
**only if the host allows it** (otherwise read-only) — a deliberate, consent-gated exception
|
||||
to D9. The host reads along and can type too (e.g. a sudo password, which stays local). Either
|
||||
side can pop the terminal **full-screen**. Account-gated by the Gitea token. *The earlier
|
||||
read-only stats view and `share serve` (Tier 1/2) were removed.*
|
||||
- **M13 Auto-update** (D18) — *check + auth implemented:* updates are **gated to Gitea account
|
||||
holders** via a Personal Access Token, stored **encrypted in the OS keyring** (`secret-tool`)
|
||||
with a 0600-file fallback (`config.load_token`/`save_token`/`token_backend`). `core/updates`
|
||||
@@ -82,6 +120,25 @@ Status: ⬜ not started · 🟦 designing · 🟨 in progress · ✅ done
|
||||
atomic symlink swap → restart, incl. the daemon). HTTPS-only, version-check-only (no
|
||||
telemetry), opt-out-able. Surfaced in the GUI; `rigdoctor update` in the CLI. (`.deb` users
|
||||
update via apt instead.)
|
||||
- **M14 AI assistant** (D24) — optional, **strictly opt-in, never automatic**: explains the
|
||||
collected diagnostics in plain language only when the user presses **"Explain with AI"**
|
||||
(`core/ai.py`, GUI button on the diagnostic dialog, `rigdoctor ai explain`). The user picks a
|
||||
provider explicitly (no default): **Ollama** (local, private, no key) or **Claude** (Anthropic
|
||||
Messages API, key in the keyring; consent prompt before sending). Answers are **grounded** —
|
||||
we pass the actual findings plus matched reference facts from a curated knowledge base
|
||||
(`core/ai_knowledge.py`, "RAG-lite": exact keyword/code match, no embeddings, stdlib only),
|
||||
which lifts a small local model and sharpens Claude. Stdlib `urllib` (no pip deps); output is
|
||||
advisory (D9). Configure in **Settings → AI assistant**.
|
||||
|
||||
- **M15 Logging & report bundles** (D25) — opt-in via one `logging_enabled` toggle (default off):
|
||||
application logging to a rotating `app.log` (`core/applog.py`) and **per-diagnostic storage**
|
||||
(`core/diagstore.py`) — each diagnostic gets its own `DATA_DIR/diagnostics/<id>/`: capture,
|
||||
`result.json`, `report.txt`, the full **inventory** (M5: hardware/OS), scoped **game logs**
|
||||
(`core/gamelogs.py`), scoped **system logs** (`core/syslogs.py` — `journalctl -k`,
|
||||
`coredumpctl`, an `nvidia-smi -q` snapshot, and the X11/Wayland display-server log), and an
|
||||
`ai/` record of every AI interaction (exact data sent, model, reply). **"Report"** zips one
|
||||
into `DATA_DIR/reports/` (GUI button on the diagnostic dialog; CLI `rigdoctor bundle`). Logs
|
||||
are session-scoped and fed to the AI on "Explain". Stays local; shareable on demand.
|
||||
|
||||
## Bundles (final — D14)
|
||||
- **Essential:** M1 + M3 + M4 *(the MVP, NVIDIA-only — D5)*
|
||||
@@ -89,6 +146,7 @@ Status: ⬜ not started · 🟦 designing · 🟨 in progress · ✅ done
|
||||
- **Diagnostics:** M5 + M6
|
||||
- **Desktop UI:** M10 + M11 *(adds PySide6)*
|
||||
- **Sharing:** M12 *(session sharing / remote assist — D16)*
|
||||
- **AI:** M14 *(optional AI explanations — D24)*
|
||||
|
||||
## MVP candidate — *confirmed (D5)*
|
||||
**M1 + M3 + M4 (Essential), NVIDIA-only, CLI-first.** Gives a working tool that captures the
|
||||
|
||||
@@ -22,28 +22,57 @@ Ubuntu + NVIDIA first; `.deb` distribution (see `DECISIONS.md`).
|
||||
last readings + a plausible cause.
|
||||
|
||||
## Phase 2 — Live monitor (terminal)
|
||||
- [ ] M2 TUI dashboard (current/min/max, grouped, throttle highlighting)
|
||||
- [x] M2 TUI dashboard (`rigdoctor monitor`, `tui.py`): curses, current/min/max grouped by
|
||||
subsystem with temp/usage color bands; q quit / r reset; plain-redraw fallback on non-TTY
|
||||
- [ ] M8 basic alerting (overheat/throttle/GPU-lost notifications)
|
||||
|
||||
## Phase 3 — Diagnostics breadth
|
||||
- [ ] M5 system inventory + exportable report
|
||||
- [ ] M6 gaming environment checks (suggest-only)
|
||||
- [~] M6 gaming environment checks (suggest-only) — *Steam game/library detection done*
|
||||
(multi-library `libraryfolders.vdf` discovery + `appmanifest` scan, opt-in libraries,
|
||||
launch-time background rescan with new-game badge; CLI `rigdoctor games`, GUI Games page).
|
||||
This is also the D12 "pick a game" foundation. *Env-check engine done* (`rigdoctor gameenv`
|
||||
+ GUI Environment page): PCIe ASPM, NVIDIA persistence, CPU governor, GameMode, MangoHud,
|
||||
swappiness, shader cache, THP, mitigations, Proton versions — read-only with fix commands.
|
||||
Also: GPU PowerMizer (X), Wine + Steam-client versions, and non-Steam launchers
|
||||
(Lutris/Heroic, `core/launchers.py`). *Pending:* the zero-config watcher (D12 fallback,
|
||||
lands with M9's trigger-mode work).
|
||||
- [ ] SMART integration (smartmontools if present)
|
||||
|
||||
## Phase 4 — Desktop UI & installer
|
||||
- [ ] M10 desktop GUI (PySide6: dashboard, log browser, report viewer, logger controls)
|
||||
- [ ] M11 tray / menu-bar applet (QSystemTrayIcon: live M1 readouts + Run Diagnostic +
|
||||
supporting actions — D13)
|
||||
- [ ] Guided diagnostic session (pick game → focused M3 capture → M4 scan → findings),
|
||||
shared by tray/GUI/CLI
|
||||
- [ ] Logger trigger modes: always-on + game-launch (D12 — wrapper first:
|
||||
`rigdoctor wrap %command%` + global Steam compat-tool; zero-config watcher
|
||||
(Steam RunningAppID + /proc) and GameMode hook follow)
|
||||
- [x] M10 desktop GUI (PySide6: dashboard w/ history graphs, logs, health, games, environment,
|
||||
inventory, setup, notifications, share)
|
||||
- [x] M11 tray / menu-bar applet (`gui/tray.py`: live CPU/GPU temp + memory readouts, status
|
||||
line, Run Diagnostic submenu per game, Open dashboard / Start-Stop recording / Snapshot /
|
||||
Quit — D13; close-to-tray, `--tray` autostart). Needs a tray host (AppIndicator on GNOME).
|
||||
- [~] Guided diagnostic session (pick game → focused M3 capture → M4 scan → findings),
|
||||
shared by tray/GUI/CLI — *core + CLI + GUI done* (`core/diagnostic.py`, `rigdoctor
|
||||
diagnose start/status/finish`, and a **Run Diagnostic** button per game on the GUI Games
|
||||
page → recording banner → results dialog with the capture summary + findings). Tags a
|
||||
focused capture with the chosen game (own diagnostic log, window-scoped report) and
|
||||
combines the capture summary with the M4 findings. **Auto start/stop** via the D12
|
||||
wrapper is wired in, and a **hard-crash is detected** (capture left without a clean stop)
|
||||
→ flagged on next launch with a deeper crash-boot log analysis. *Pending:* the tray (M11)
|
||||
entry point and the zero-config watcher.
|
||||
- [~] Logger trigger modes: always-on + game-launch (D12) — *game-launch **wrapper** done:*
|
||||
`rigdoctor wrap %command%` (per-game Steam launch option / Lutris/Heroic wrapper field)
|
||||
auto-brackets a focused capture around the game; GUI "Auto-capture…" helper shows the
|
||||
launch-option string. *Pending:* global Steam compat-tool registration, the zero-config
|
||||
watcher (Steam RunningAppID + /proc), GameMode hook, and the always-on `systemd --user`
|
||||
service.
|
||||
- [~] M9 interactive installer — *done:* distro/GPU detection + optional-dependency install
|
||||
(`rigdoctor install`, GUI Setup tab); **user-local `install.sh` + self-extracting `.run`**
|
||||
(no-root venv install, handles python3-venv prereq, CI-built). *Pending:* module-selection
|
||||
config + `systemd --user` service enable + trigger-mode pick.
|
||||
- [ ] `.deb` packaging (D8) declaring per-bundle deps incl. python3-pyside6 for Desktop UI
|
||||
(`rigdoctor install`, GUI Settings); **user-local `install.sh` + self-extracting `.run`**
|
||||
(no-root venv install, handles python3-venv prereq, CI-built); **`systemd --user` trigger
|
||||
modes** (`core/service.py`, `rigdoctor service mode manual|always-on|game-launch` + GUI
|
||||
Settings "Recording trigger") incl. the zero-config **game-launch watcher**
|
||||
(`core/watcher.py`, `rigdoctor watch`); and a **graphical first-run setup wizard**
|
||||
(`gui/setup_wizard.py`): environment → dependency-bundle selection → install → recording
|
||||
trigger → readiness, auto-launched by install.sh and re-runnable from Settings; and a
|
||||
**`.deb`** (`packaging/make_deb.py`, `Architecture: all`, `Depends: python3`,
|
||||
`Recommends: python3-pyside6/pyte`) built + published in CI (release asset + optional
|
||||
Gitea apt registry). **M9 complete.**
|
||||
- [x] `.deb` packaging (D8) — built via `dpkg-deb` (no debhelper); GUI deps as Recommends so
|
||||
`apt install rigdoctor` includes the Desktop UI, `--no-install-recommends` = CLI only.
|
||||
|
||||
## Phase 5 — Breadth (later)
|
||||
- [ ] AMD GPU support in M1 (Steam Deck / Radeon)
|
||||
@@ -51,17 +80,32 @@ Ubuntu + NVIDIA first; `.deb` distribution (see `DECISIONS.md`).
|
||||
- [x] M13 auto-update (D18) — launch-time version check (GUI sidebar) + no-root self-update
|
||||
apply (`rigdoctor update` / sidebar button → authenticated pip upgrade), token-gated.
|
||||
Restart-after-update is manual for now.
|
||||
- [ ] (Later, separate milestone) Optional auto-apply of suggested fixes behind explicit
|
||||
consent — currently out of scope (D9)
|
||||
- [~] Optional auto-apply of suggested fixes behind explicit consent (D9 milestone) — *first
|
||||
cut shipped for M6 (D22):* one-click apply of runtime-reversible tunables (CPU governor,
|
||||
NVIDIA persistence, PCIe ASPM, swappiness, THP) via a single pkexec prompt, no reboot.
|
||||
GRUB-based fixes + CPU mitigations remain suggestion-only.
|
||||
|
||||
## Phase 6 — Session sharing / remote assist (M12, D16)
|
||||
Escalating ladder, built in order:
|
||||
- [ ] Tier 1: `share export` — diagnostic bundle (inventory + recent log + report); B opens
|
||||
it in RigDoctor. One-way, safest.
|
||||
- [ ] Tier 2: live read-only view (local server + user-chosen tunnel: Tailscale/cloudflared/
|
||||
SSH; no hosted relay), token-gated, A approves, revocable.
|
||||
- [ ] Tier 3: gated interactive terminal (wrap tmate/sshx; read-only default, read-write on
|
||||
explicit consent), with session audit log.
|
||||
## Phase 6 — Session sharing / remote assist (M12, D16 → scoped to terminal-only by D23)
|
||||
- [x] **Shared terminal** — a real PTY (host's `$SHELL`) shared over the relay, color-rendered
|
||||
(pyte), full-screen-able; the guest watches and may type only on host consent (D9
|
||||
exception); host reads along + can type (sudo). The single share mode.
|
||||
- [removed] The read-only stats view (`share serve`) and bundle export — dropped per D23; the
|
||||
shared terminal is the only sharing mode.
|
||||
|
||||
## Phase 7 — AI assistant (M14, D24)
|
||||
- [x] **Explain diagnostics with AI** — opt-in, never automatic (`core/ai.py`, "Explain with AI"
|
||||
button + `rigdoctor ai explain`). Provider chosen explicitly: **Ollama** (local) or
|
||||
**Claude** (Anthropic). Grounded with a curated reference KB (`core/ai_knowledge.py`,
|
||||
RAG-lite, exact match — no embeddings); stdlib `urllib`. Settings → AI assistant.
|
||||
- [ ] *Possible follow-ups:* interactive chat grounded in the data; more reference-KB entries;
|
||||
an "Explain" button on the System Health page.
|
||||
|
||||
## Phase 8 — Logging & report bundles (M15, D25)
|
||||
- [x] **Opt-in logging** (one `logging_enabled` toggle): rotating `app.log` (`core/applog.py`)
|
||||
+ **per-diagnostic storage** in its own directory (`core/diagstore.py`) — capture,
|
||||
result, report, scoped game logs, and AI-interaction records.
|
||||
- [x] **Report** bundle — zip a diagnostic (incl. exactly what was sent to the AI, the model,
|
||||
and its reply) into the reports folder. GUI button + `rigdoctor bundle`.
|
||||
|
||||
> **Out of scope:** stress/repro module (D7); multi-distro support and packaging beyond
|
||||
> Ubuntu/apt + `.deb` (D15) — a thin seam is kept but not built out.
|
||||
|
||||
@@ -43,9 +43,12 @@ RigDoctor's crash-safe logger is designed to fix exactly that.
|
||||
- **Not a stress-test / load-generator** — explicitly out of scope (D7). Users can run
|
||||
existing tools (gpu-burn, vkmark, stress-ng) alongside the logger if they want.
|
||||
- Not an overclocking utility.
|
||||
- **Not (yet) an auto-fixer.** RigDoctor is **read-only**: it diagnoses and *suggests*
|
||||
actions (with the exact command where possible) but does not apply changes itself in this
|
||||
stage. Auto-apply is a deliberate later milestone behind explicit consent. (D9)
|
||||
- **Read-only by default, with a narrow consent-gated exception.** RigDoctor diagnoses and
|
||||
*suggests* actions (with the exact command where possible). It does **not** apply changes
|
||||
itself — **except** a small set of **runtime-reversible** gaming tunables (M6: CPU governor,
|
||||
NVIDIA persistence, PCIe ASPM policy, swappiness, THP) that can be applied from the GUI via a
|
||||
single pkexec prompt, no reboot, revert on reboot (D22, realizing the D9 milestone). Risky/
|
||||
persistent fixes (GRUB cmdline, CPU mitigations) remain suggestion-only.
|
||||
|
||||
## 3. Target users & platforms
|
||||
|
||||
@@ -96,8 +99,10 @@ PCIe topology. Exportable (Markdown/JSON) to paste into forum/bug reports.
|
||||
### M6 — Gaming environment checks
|
||||
Detects & evaluates: GPU power profile / persistence mode, CPU governor, Proton/Wine/Steam
|
||||
versions, GameMode, MangoHud, shader cache, swappiness, hugepages, CPU mitigations,
|
||||
PCIe ASPM. Flags settings that hurt stability/performance and **suggests** the fix command
|
||||
(read-only per D9).
|
||||
PCIe ASPM. Flags settings that hurt stability/performance and **suggests** the fix command.
|
||||
Also includes Steam library/game detection (the D12 "pick a game" foundation) and, per D22,
|
||||
a **one-click apply** for the runtime-reversible tunables (governor, persistence, ASPM,
|
||||
swappiness, THP) plus one-click install of optional tools (GameMode/MangoHud/cpupower).
|
||||
|
||||
### M8 — Alerting
|
||||
Threshold + event alerts (desktop notification / sound / log) on overheat, throttle,
|
||||
@@ -139,15 +144,35 @@ bundles with descriptions and the exact packages each needs → resolve & instal
|
||||
mode. Delivered with the user-local install (and the optional `.deb`) (D8). Module
|
||||
list/bundling is final per D14.
|
||||
|
||||
### M12 — Session sharing / remote assist (D16)
|
||||
Lets a user (A) grant a helper (B) inspection access, as an escalating, consent-driven
|
||||
ladder: (1) **diagnostic bundle export** (inventory + recent capture log + report, one-way);
|
||||
(2) **live read-only view** of the dashboard + logs over a user-chosen tunnel
|
||||
(Tailscale/cloudflared/SSH — no RigDoctor-hosted relay); (3) **gated interactive terminal**
|
||||
wrapping an existing tool (tmate/sshx), read-only by default, read-write only on explicit
|
||||
consent. Per-session consent, ephemeral revocable tokens, permission escalation (view ≠
|
||||
shell), and a session audit log. Tier 3 is a deliberate, consent-gated exception to the
|
||||
read-only stance (D9). Built in Phase 6.
|
||||
### M12 — Session sharing / remote assist (D16, scoped to terminal-only by D23)
|
||||
Lets a user (A) grant a helper (B) a **shared terminal** over the relay: A shares a real PTY
|
||||
running their shell; B watches live and may type **only if A allows it** (otherwise read-only)
|
||||
— a deliberate, consent-gated exception to the read-only stance (D9). A reads along and can
|
||||
type too (e.g. a sudo password, which stays local and is never sent to B). Account-gated by the
|
||||
Gitea token; per-session share code. The shared terminal preserves colors/theming and can be
|
||||
viewed full-screen. *(The earlier read-only stats view / bundle export were dropped — D23.)*
|
||||
|
||||
### M14 — AI assistant (D24)
|
||||
Optional module that explains the collected diagnostics in plain language. **Strictly opt-in and
|
||||
never automatic** — the model is contacted only when the user presses "Explain with AI" (GUI) or
|
||||
runs `rigdoctor ai explain`; configuring it contacts nothing. The user explicitly chooses a
|
||||
provider (no default): **Ollama** (local, private, no key) or **Claude** (Anthropic Messages
|
||||
API, key in the keyring, with a consent prompt before sending data). Answers are **grounded** in
|
||||
the actual findings plus matched reference facts from a curated, exact-match knowledge base
|
||||
("RAG-lite" — no embeddings/vector store, stdlib only); no fine-tuning. HTTP via stdlib `urllib`
|
||||
(no new core dependency); output is advisory (consistent with D9).
|
||||
|
||||
### M15 — Logging & report bundles (D25)
|
||||
Opt-in (one `logging_enabled` toggle, default off). When on: the application logs to a rotating
|
||||
`app.log`, and **each diagnostic is stored in its own directory** (capture log, structured
|
||||
result, human-readable report, the full **inventory** (M5 hardware/OS), session-scoped **game
|
||||
logs** (Proton/Steam) and **system logs** (`journalctl -k`, `coredumpctl`, an `nvidia-smi -q`
|
||||
snapshot, and the X11/Wayland display-server log), and a record of every AI interaction — the
|
||||
exact data sent, the model, and its reply). The collected logs are also fed to the AI on
|
||||
"Explain". Collection is best-effort (degrades if tools are missing/denied). A **Report** action zips one diagnostic's directory
|
||||
(plus the app log) into a shareable bundle saved under the reports folder (GUI button; CLI
|
||||
`rigdoctor bundle`). Everything stays local — a report only leaves the machine if the user
|
||||
shares the zip. Stdlib only (`logging` + `zipfile`).
|
||||
|
||||
## 5. Non-functional requirements
|
||||
- **Zero hard deps for the core/CLI/daemon** — Python stdlib + tools already present. **Qt
|
||||
|
||||
@@ -16,7 +16,8 @@ SCRIPT_DIR=$(CDPATH= cd -- "$(dirname -- "$0")" && pwd)
|
||||
uninstall() {
|
||||
echo "Removing RigDoctor user-local install…"
|
||||
rm -rf "$VENV"
|
||||
rm -f "$BIN_DIR/rigdoctor" "$BIN_DIR/rigdoctor-gui" "$DESKTOP_FILE"
|
||||
rm -f "$BIN_DIR/rigdoctor" "$BIN_DIR/rigdoctor-gui" "$DESKTOP_FILE" \
|
||||
"$DATA_HOME/icons/hicolor/scalable/apps/rigdoctor.svg"
|
||||
echo "Done. (Config and logs under ~/.config/rigdoctor and ~/.local/share/rigdoctor were kept.)"
|
||||
}
|
||||
|
||||
@@ -81,6 +82,17 @@ mkdir -p "$BIN_DIR"
|
||||
ln -sf "$VENV/bin/rigdoctor" "$BIN_DIR/rigdoctor"
|
||||
ln -sf "$VENV/bin/rigdoctor-gui" "$BIN_DIR/rigdoctor-gui"
|
||||
|
||||
# Install the app icon (for the dock/launcher); fall back to a stock icon.
|
||||
ICON_NAME=utilities-system-monitor
|
||||
ICON_SRC=$("$VENV/bin/python" -c "import os, rigdoctor.gui as g; print(os.path.join(os.path.dirname(g.__file__), 'assets', 'rigdoctor.svg'))" 2>/dev/null || true)
|
||||
if [ -n "$ICON_SRC" ] && [ -f "$ICON_SRC" ]; then
|
||||
ICON_DST="$DATA_HOME/icons/hicolor/scalable/apps/rigdoctor.svg"
|
||||
mkdir -p "$(dirname "$ICON_DST")"
|
||||
cp "$ICON_SRC" "$ICON_DST"
|
||||
ICON_NAME=rigdoctor
|
||||
command -v gtk-update-icon-cache >/dev/null 2>&1 && gtk-update-icon-cache -qtf "$DATA_HOME/icons/hicolor" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
mkdir -p "$DESKTOP_DIR"
|
||||
cat > "$DESKTOP_FILE" <<EOF
|
||||
[Desktop Entry]
|
||||
@@ -88,10 +100,12 @@ Type=Application
|
||||
Name=RigDoctor
|
||||
Comment=Hardware monitoring & crash diagnostics for Linux gamers
|
||||
Exec=$VENV/bin/rigdoctor-gui
|
||||
Icon=utilities-system-monitor
|
||||
Icon=$ICON_NAME
|
||||
Terminal=false
|
||||
Categories=System;Monitor;Utility;
|
||||
StartupWMClass=rigdoctor
|
||||
EOF
|
||||
command -v update-desktop-database >/dev/null 2>&1 && update-desktop-database "$DESKTOP_DIR" 2>/dev/null || true
|
||||
|
||||
echo
|
||||
echo "RigDoctor $("$VENV/bin/rigdoctor" --version 2>/dev/null | awk '{print $2}') installed."
|
||||
@@ -101,3 +115,11 @@ case ":$PATH:" in
|
||||
*":$BIN_DIR:"*) ;;
|
||||
*) echo " Note: add $BIN_DIR to your PATH (a fresh login usually does this).";;
|
||||
esac
|
||||
|
||||
# Launch the graphical setup wizard if a desktop session is available (first run shows it).
|
||||
if [ -n "${DISPLAY:-}${WAYLAND_DISPLAY:-}" ] && [ -x "$VENV/bin/rigdoctor-gui" ]; then
|
||||
echo " Opening the setup wizard…"
|
||||
("$VENV/bin/rigdoctor-gui" --setup >/dev/null 2>&1 &)
|
||||
else
|
||||
echo " Run 'rigdoctor-gui' to finish setup."
|
||||
fi
|
||||
|
||||
@@ -0,0 +1,121 @@
|
||||
"""Build a `.deb` for RigDoctor (M9 / D8) — dependency-light, no debhelper.
|
||||
|
||||
Pure-Python app, so it's `Architecture: all`: we stage the package into dist-packages, drop the
|
||||
two launchers in /usr/bin, install the desktop entry + icon, write a DEBIAN/control, and call
|
||||
`dpkg-deb`. The core is stdlib (`Depends: python3`); everything else is **Recommends** so a
|
||||
plain `apt install rigdoctor` sets up the whole toolset automatically (users never hand-install
|
||||
deps) — the GUI modules (Debian/Ubuntu split PySide6 per module, so we name
|
||||
`python3-pyside6.qt{widgets,gui,websockets,svg}`) + `python3-pyte`, plus the diagnostic/gaming
|
||||
tools (smartmontools, lm-sensors, dmidecode, pciutils, libnotify-bin, libsecret-tools, gamemode,
|
||||
mangohud). `--no-install-recommends` still yields a CLI-only install; `cpupower` is a Suggests
|
||||
(kernel-tied/heavy).
|
||||
|
||||
Run: `python packaging/make_deb.py` → `dist/rigdoctor_<version>_all.deb`.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[1]
|
||||
DIST = ROOT / "dist"
|
||||
MAINTAINER = "Jessey van Offeren <jjvanofferen@gmail.com>"
|
||||
HOMEPAGE = "https://git.jesseyvanofferen.com/jessey/rigdoctor"
|
||||
|
||||
|
||||
def _version() -> str:
|
||||
text = (ROOT / "src" / "rigdoctor" / "__init__.py").read_text(encoding="utf-8")
|
||||
for line in text.splitlines():
|
||||
if line.startswith("__version__"):
|
||||
return line.split('"')[1]
|
||||
raise SystemExit("could not read __version__")
|
||||
|
||||
|
||||
_LAUNCHER = """\
|
||||
#!/usr/bin/python3
|
||||
import sys
|
||||
from {module} import main
|
||||
sys.exit(main())
|
||||
"""
|
||||
|
||||
_DESKTOP = """\
|
||||
[Desktop Entry]
|
||||
Type=Application
|
||||
Name=RigDoctor
|
||||
Comment=Hardware monitoring & crash diagnostics for Linux gamers
|
||||
Exec=rigdoctor-gui
|
||||
Icon=rigdoctor
|
||||
Terminal=false
|
||||
Categories=System;Monitor;Utility;
|
||||
StartupWMClass=rigdoctor
|
||||
"""
|
||||
|
||||
_CONTROL = """\
|
||||
Package: rigdoctor
|
||||
Version: {version}
|
||||
Architecture: all
|
||||
Maintainer: {maintainer}
|
||||
Section: utils
|
||||
Priority: optional
|
||||
Depends: python3 (>= 3.11)
|
||||
Recommends: python3-pyside6.qtwidgets, python3-pyside6.qtgui, python3-pyside6.qtwebsockets, python3-pyside6.qtsvg, python3-pyte, smartmontools, lm-sensors, dmidecode, pciutils, libnotify-bin, libsecret-tools, gamemode, mangohud
|
||||
Suggests: linux-tools-generic
|
||||
Homepage: {homepage}
|
||||
Description: Hardware monitoring & crash diagnostics for Linux gamers
|
||||
RigDoctor monitors GPU/CPU temperatures, load, and sensors, captures crash
|
||||
diagnostics while gaming, scans logs (Xid/SMART/kernel) for problems, and can
|
||||
explain them in plain language. The CLI and background daemon are pure Python
|
||||
(stdlib only); the optional desktop GUI and system-tray applet use PySide6,
|
||||
pulled in via Recommends. Install with --no-install-recommends for CLI only.
|
||||
"""
|
||||
|
||||
|
||||
def _write(path: Path, text: str, mode: int = 0o644) -> None:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text(text, encoding="utf-8")
|
||||
path.chmod(mode)
|
||||
|
||||
|
||||
def build() -> Path:
|
||||
version = _version()
|
||||
DIST.mkdir(exist_ok=True)
|
||||
stage = DIST / f"rigdoctor_{version}_all"
|
||||
if stage.exists():
|
||||
shutil.rmtree(stage)
|
||||
|
||||
# Python package → dist-packages (importable system-wide), minus bytecode.
|
||||
pkg_dst = stage / "usr/lib/python3/dist-packages/rigdoctor"
|
||||
shutil.copytree(ROOT / "src" / "rigdoctor", pkg_dst,
|
||||
ignore=shutil.ignore_patterns("__pycache__", "*.pyc"))
|
||||
|
||||
# Launchers.
|
||||
_write(stage / "usr/bin/rigdoctor", _LAUNCHER.format(module="rigdoctor.cli"), 0o755)
|
||||
_write(stage / "usr/bin/rigdoctor-gui", _LAUNCHER.format(module="rigdoctor.gui.app"), 0o755)
|
||||
|
||||
# Desktop entry + icon.
|
||||
_write(stage / "usr/share/applications/rigdoctor.desktop", _DESKTOP)
|
||||
icon = ROOT / "src" / "rigdoctor" / "gui" / "assets" / "rigdoctor.svg"
|
||||
_write(stage / "usr/share/icons/hicolor/scalable/apps/rigdoctor.svg",
|
||||
icon.read_text(encoding="utf-8"))
|
||||
|
||||
# Refresh the desktop database on install/remove (best-effort).
|
||||
_write(stage / "DEBIAN/postinst",
|
||||
"#!/bin/sh\nset -e\nupdate-desktop-database -q 2>/dev/null || true\n", 0o755)
|
||||
_write(stage / "DEBIAN/postrm",
|
||||
"#!/bin/sh\nset -e\nupdate-desktop-database -q 2>/dev/null || true\n", 0o755)
|
||||
_write(stage / "DEBIAN/control",
|
||||
_CONTROL.format(version=version, maintainer=MAINTAINER, homepage=HOMEPAGE))
|
||||
|
||||
out = DIST / f"rigdoctor_{version}_all.deb"
|
||||
subprocess.run(["dpkg-deb", "--root-owner-group", "--build", str(stage), str(out)], check=True)
|
||||
shutil.rmtree(stage)
|
||||
return out
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
path = build()
|
||||
print(f"built {path}")
|
||||
sys.exit(0)
|
||||
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
||||
|
||||
[project]
|
||||
name = "rigdoctor"
|
||||
version = "0.3.1"
|
||||
version = "0.43.0"
|
||||
description = "Modular hardware monitoring & crash diagnostics for Linux gamers."
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.11"
|
||||
@@ -13,7 +13,7 @@ requires-python = ">=3.11"
|
||||
dependencies = []
|
||||
|
||||
[project.optional-dependencies]
|
||||
gui = ["PySide6"]
|
||||
gui = ["PySide6", "pyte"]
|
||||
|
||||
[project.scripts]
|
||||
rigdoctor = "rigdoctor.cli:main"
|
||||
@@ -21,3 +21,6 @@ rigdoctor-gui = "rigdoctor.gui.app:main"
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
where = ["src"]
|
||||
|
||||
[tool.setuptools.package-data]
|
||||
rigdoctor = ["gui/assets/*.svg"]
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
"""RigDoctor — modular hardware monitoring & crash diagnostics for Linux gamers."""
|
||||
|
||||
__version__ = "0.3.1"
|
||||
__version__ = "0.43.0"
|
||||
|
||||
@@ -44,17 +44,10 @@ def cmd_snapshot(args) -> int:
|
||||
|
||||
|
||||
def cmd_monitor(args) -> int:
|
||||
from .tui import run
|
||||
|
||||
interval = args.interval or load_config()["interval"]
|
||||
try:
|
||||
for sample in _sampler().stream(interval=interval):
|
||||
# Basic full-screen redraw; the rich TUI (M2) comes later.
|
||||
print("\033[2J\033[H", end="")
|
||||
print(f"RigDoctor — live (every {interval:g}s, Ctrl-C to quit)\n")
|
||||
print(render_snapshot(sample))
|
||||
sys.stdout.flush()
|
||||
except KeyboardInterrupt:
|
||||
print()
|
||||
return 0
|
||||
return run(interval, plain=getattr(args, "plain", False))
|
||||
|
||||
|
||||
def cmd_gui(args) -> int:
|
||||
@@ -62,8 +55,9 @@ def cmd_gui(args) -> int:
|
||||
from .gui.app import main as gui_main
|
||||
except ImportError as exc:
|
||||
print("The GUI needs PySide6, which isn't installed.")
|
||||
print(" Install it with: pip install 'rigdoctor[gui]'")
|
||||
print(" or on Ubuntu: sudo apt install python3-pyside6")
|
||||
print(" Ubuntu/Debian: sudo apt install python3-pyside6.qtwidgets "
|
||||
"python3-pyside6.qtgui python3-pyside6.qtwebsockets python3-pyside6.qtsvg python3-pyte")
|
||||
print(" pip: pip install 'rigdoctor[gui]'")
|
||||
print(f" ({exc})")
|
||||
return 2
|
||||
return gui_main([sys.argv[0]])
|
||||
@@ -86,6 +80,7 @@ def cmd_record_run(args) -> int:
|
||||
max_bytes=cfg["log_max_bytes"],
|
||||
backups=cfg["log_backups"],
|
||||
status_path=config.STATUS_FILE,
|
||||
game=getattr(args, "game", None),
|
||||
)
|
||||
|
||||
def _handle(_sig, _frame):
|
||||
@@ -268,6 +263,10 @@ def cmd_update(args) -> int:
|
||||
print("\nWhat's new:\n" + "\n".join(" " + ln for ln in notes.splitlines()) + "\n")
|
||||
if args.check:
|
||||
return 0
|
||||
kind = updates.install_kind()
|
||||
if kind != "pip": # apt/source installs aren't pip-updatable — show the right command
|
||||
print(updates.update_hint(kind))
|
||||
return 0
|
||||
print(f"Installing {tag}…")
|
||||
rc, out = updates.apply_update(tag)
|
||||
print(out[-2000:])
|
||||
@@ -295,6 +294,18 @@ def cmd_uninstall(args) -> int:
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_collect_priv(args) -> int:
|
||||
"""Internal: emit root-only data (SMART + dmidecode) as JSON, run via pkexec at launch."""
|
||||
from dataclasses import asdict
|
||||
|
||||
from .core import drives
|
||||
from .core.inventory import _dmidecode
|
||||
|
||||
data = {"drives": [asdict(d) for d in drives.collect()], "dmidecode": _dmidecode()}
|
||||
print(json.dumps(data))
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_inventory(args) -> int:
|
||||
from .core import inventory
|
||||
|
||||
@@ -327,6 +338,349 @@ def cmd_report(args) -> int:
|
||||
return 0
|
||||
|
||||
|
||||
def _resolve_game(args) -> str | None:
|
||||
"""Game name from --game, or looked up from --appid via the Steam scan."""
|
||||
if getattr(args, "game", None):
|
||||
return args.game
|
||||
if getattr(args, "appid", None):
|
||||
from .core import steam
|
||||
|
||||
for g in steam.scan_games(steam.selected_library_paths()):
|
||||
if g.appid == str(args.appid):
|
||||
return g.name
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
def cmd_diagnose(args) -> int:
|
||||
from .core import diagnostic, reccontrol, steam
|
||||
|
||||
sub = args.diagnose_cmd or "status"
|
||||
|
||||
if sub == "start":
|
||||
if reccontrol.running_pid():
|
||||
print("A capture is already running — finish it with: rigdoctor diagnose finish")
|
||||
return 1
|
||||
game = _resolve_game(args)
|
||||
if game is None and (args.game or args.appid):
|
||||
print("Couldn't match that game in your selected Steam libraries.")
|
||||
return 1
|
||||
if game is None:
|
||||
games = steam.cached_games() or steam.scan_games(steam.selected_library_paths())
|
||||
if games:
|
||||
print("Pick a game to focus on, then re-run with --game:")
|
||||
for g in games:
|
||||
print(f" --game {g.name!r}")
|
||||
else:
|
||||
print("No games detected. Select a library: rigdoctor games libraries --all")
|
||||
return 1
|
||||
pid = diagnostic.start(game=game, interval=args.interval)
|
||||
time.sleep(1.0)
|
||||
if pid and reccontrol.pid_alive(pid):
|
||||
print(f"Diagnostic capture started for {game!r} (pid {pid}).")
|
||||
print(" Play your game. When you're done (or after a crash + reboot):")
|
||||
print(" rigdoctor diagnose finish")
|
||||
return 0
|
||||
print(f"Capture failed to start; see {config.SPAWN_LOG}")
|
||||
return 1
|
||||
|
||||
if sub == "status":
|
||||
status = diagnostic.active()
|
||||
if not status:
|
||||
print("No diagnostic capture is running.")
|
||||
return 0
|
||||
game = status.get("game") or "—"
|
||||
print(f"Capturing for {game!r}: {status.get('samples', 0)} samples"
|
||||
+ (" · GPU-lost seen" if status.get("gpu_lost") else ""))
|
||||
return 0
|
||||
|
||||
# finish
|
||||
if not reccontrol.running_pid() and not config.DIAG_LOG.exists():
|
||||
print("No diagnostic to analyze. Start one with: rigdoctor diagnose start --game <name>")
|
||||
return 1
|
||||
print("Stopping capture and analyzing…\n")
|
||||
result = diagnostic.finish(last_n=args.last)
|
||||
from .render import render_health, render_summary
|
||||
|
||||
if result.game:
|
||||
print(f"Diagnostic — {result.game}\n")
|
||||
print(render_summary(result.summary, log_path=config.DIAG_LOG))
|
||||
print("\n" + render_health(result.findings, title="Findings"))
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_wrap(args) -> int:
|
||||
from .core import wrap
|
||||
|
||||
return wrap.run(args.command)
|
||||
|
||||
|
||||
def cmd_watch(args) -> int:
|
||||
from .core import watcher
|
||||
|
||||
interval = args.interval or load_config().get("interval", 1.0)
|
||||
print("Watching for a running Steam game (Ctrl-C to stop)…")
|
||||
return watcher.watch(interval=max(2.0, interval))
|
||||
|
||||
|
||||
def cmd_service(args) -> int:
|
||||
from .core import service
|
||||
|
||||
sub = args.service_cmd or "status"
|
||||
if sub == "mode":
|
||||
ok, msg = service.apply_mode(args.mode)
|
||||
print(f"Trigger mode set to '{args.mode}'.")
|
||||
if not ok and msg:
|
||||
print(f" note: {msg}")
|
||||
return 0 if ok or not service.available() else 1
|
||||
|
||||
info = service.status()
|
||||
print(f"Trigger mode: {info['mode']}")
|
||||
print(f"systemd --user: {'available' if info['available'] else 'not available'}")
|
||||
if info["available"]:
|
||||
print(f" recorder service: {'active' if info.get('recorder_active') else 'inactive'}")
|
||||
print(f" watcher service: {'active' if info.get('watch_active') else 'inactive'}")
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_ai(args) -> int:
|
||||
"""AI assistant (M14) — opt-in; only contacts a provider on `test`/`explain`."""
|
||||
from .core import ai
|
||||
|
||||
sub = args.ai_cmd or "status"
|
||||
if sub == "status":
|
||||
print(f"Provider: {ai.provider() or 'not configured'}")
|
||||
if ai.provider():
|
||||
print(f" {ai.provider_label()}")
|
||||
print(f" ready: {'yes' if ai.is_configured() else 'no'}")
|
||||
else:
|
||||
print(" Configure it in the GUI: Settings → AI assistant.")
|
||||
return 0
|
||||
|
||||
if not ai.is_configured():
|
||||
print("AI is not configured. Set it up in the GUI (Settings → AI assistant).")
|
||||
return 1
|
||||
|
||||
if sub == "test":
|
||||
ok, msg = ai.explain("Connectivity test — reply exactly: RigDoctor AI is working.")
|
||||
print(msg)
|
||||
return 0 if ok else 1
|
||||
|
||||
if sub == "dump":
|
||||
# Parse a Windows .dmp minidump (e.g. from a Proton game crash) and explain it.
|
||||
from .core import minidump
|
||||
|
||||
report = minidump.parse(args.file)
|
||||
if not report.ok:
|
||||
print(f"Couldn't analyze the dump — {report.error}")
|
||||
return 1
|
||||
print(minidump.to_text(report))
|
||||
print(f"\nAsking {ai.provider_label()} to explain {os.path.basename(args.file)}…\n")
|
||||
ok, msg = ai.explain(minidump.to_ai_text(report))
|
||||
print(msg)
|
||||
return 0 if ok else 1
|
||||
|
||||
# explain: gather the current health findings and ask the provider to explain them.
|
||||
from .core import health
|
||||
|
||||
findings = health.run_health_checks()
|
||||
text = ai.format_findings(findings)
|
||||
print(f"Asking {ai.provider_label()} to explain the current health findings…\n")
|
||||
ok, msg = ai.explain(text)
|
||||
print(msg)
|
||||
return 0 if ok else 1
|
||||
|
||||
|
||||
def cmd_bundle(args) -> int:
|
||||
"""Zip the latest stored diagnostic into a report bundle (M15) — needs logging enabled."""
|
||||
from .core import diagstore
|
||||
|
||||
if not diagstore.enabled():
|
||||
print("Logging is off. Enable it (Settings → Logging, or set logging_enabled) so "
|
||||
"diagnostics are stored and can be reported.")
|
||||
return 1
|
||||
directory = diagstore.latest_dir()
|
||||
if directory is None:
|
||||
print("No stored diagnostics yet — run a diagnostic first.")
|
||||
return 1
|
||||
out = diagstore.make_report(directory)
|
||||
print(f"Report written: {out}")
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_gameenv(args) -> int:
|
||||
from dataclasses import asdict
|
||||
|
||||
from .core.gameenv import run_gameenv_checks
|
||||
from .render import render_health
|
||||
|
||||
findings = run_gameenv_checks()
|
||||
if args.json:
|
||||
print(json.dumps([asdict(f) for f in findings], indent=2, ensure_ascii=False))
|
||||
else:
|
||||
print(render_health(findings, title="Gaming environment"))
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_games(args) -> int:
|
||||
from dataclasses import asdict
|
||||
|
||||
from .core import customgames, launchers, steam
|
||||
|
||||
selected = steam.selected_library_paths()
|
||||
result = steam.rescan() if selected else None
|
||||
steam_games = result.games if result else []
|
||||
extra = launchers.scan() # non-Steam (Lutris/Heroic)
|
||||
all_games = list(steam_games) + list(extra) + customgames.scan() # + user-added (SPT etc.)
|
||||
|
||||
if args.json:
|
||||
print(json.dumps({
|
||||
"scanned_at": result.scanned_at if result else None,
|
||||
"new_appids": result.new_appids if result else [],
|
||||
"games": [asdict(g) for g in all_games],
|
||||
}, indent=2, ensure_ascii=False))
|
||||
return 0
|
||||
|
||||
if not all_games:
|
||||
if not selected:
|
||||
print("No Steam libraries selected and no non-Steam games found.")
|
||||
print(" Pick a Steam library: rigdoctor games libraries --enable <path> (or --all)")
|
||||
return 1
|
||||
print("No games found.")
|
||||
return 0
|
||||
|
||||
new = set(result.new_appids) if result else set()
|
||||
print(f"{len(all_games)} game(s):\n")
|
||||
for g in all_games:
|
||||
tag = " NEW" if g.appid in new else ""
|
||||
src = "" if g.launcher == "steam" else f" [{g.launcher}]"
|
||||
size = steam.human_size(g.size_bytes) if g.size_bytes else ""
|
||||
print(f" {g.name:<46}{src:<10} {size:>9}{tag}")
|
||||
if not selected:
|
||||
print("\n(no Steam libraries selected — `rigdoctor games libraries --all` to add them)")
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_games_libraries(args) -> int:
|
||||
from .core import steam
|
||||
|
||||
discovered = steam.discover_libraries()
|
||||
selected = {os.path.realpath(p) for p in steam.selected_library_paths()}
|
||||
|
||||
# --all / --enable / --disable adjust the selection, then we list the result.
|
||||
if args.all or args.enable or args.disable:
|
||||
if args.all:
|
||||
selected = {lib.path for lib in discovered}
|
||||
for raw in args.enable or []:
|
||||
selected.add(os.path.realpath(os.path.expanduser(raw)))
|
||||
for raw in args.disable or []:
|
||||
selected.discard(os.path.realpath(os.path.expanduser(raw)))
|
||||
config.update_config(steam_libraries=sorted(selected))
|
||||
|
||||
if not discovered:
|
||||
print("No Steam libraries detected (is Steam installed?).")
|
||||
return 1
|
||||
if args.json:
|
||||
print(json.dumps([
|
||||
{"path": lib.path, "label": lib.label, "selected": lib.path in selected,
|
||||
"games": len(steam.scan_library(lib.path))}
|
||||
for lib in discovered
|
||||
], indent=2, ensure_ascii=False))
|
||||
return 0
|
||||
print("Steam libraries (checked = scanned for games):\n")
|
||||
for lib in discovered:
|
||||
mark = "x" if lib.path in selected else " "
|
||||
count = len(steam.scan_library(lib.path))
|
||||
label = f" [{lib.label}]" if lib.label else ""
|
||||
print(f" [{mark}] {lib.path}{label} ({count} games)")
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_games_add(args) -> int:
|
||||
from .core import customgames
|
||||
|
||||
if customgames.add(args.name, command=args.command, logdir=args.logdir):
|
||||
print(f"Added '{args.name}' to your games (custom). It'll show in `rigdoctor games` "
|
||||
"and the diagnostic game picker.")
|
||||
entry = customgames.get(args.name) or {}
|
||||
if entry.get("command"):
|
||||
print(f" launch: {entry['command']} (run with: rigdoctor games play \"{args.name}\")")
|
||||
if entry.get("logdir"):
|
||||
print(f" logs: {entry['logdir']} (included in crash diagnostics)")
|
||||
return 0
|
||||
print(f"'{args.name}' is blank or already in your custom games.")
|
||||
return 1
|
||||
|
||||
|
||||
def cmd_games_play(args) -> int:
|
||||
from .core import customgames, wrap
|
||||
|
||||
command = customgames.command(args.name)
|
||||
if command is None:
|
||||
if customgames.get(args.name) is None:
|
||||
print(f"'{args.name}' isn't in your custom games. Add it: "
|
||||
f"rigdoctor games add \"{args.name}\" --command <launch script>")
|
||||
else:
|
||||
print(f"'{args.name}' has no launch command. Set one: "
|
||||
f"rigdoctor games remove \"{args.name}\" && rigdoctor games add \"{args.name}\" "
|
||||
"--command <launch script>")
|
||||
return 1
|
||||
print(f"Launching '{args.name}' with crash-capture… (capture stops cleanly on exit; "
|
||||
"a hard freeze is flagged next time you open RigDoctor)")
|
||||
return wrap.run(command, game=args.name)
|
||||
|
||||
|
||||
def cmd_games_remove(args) -> int:
|
||||
from .core import customgames
|
||||
|
||||
if customgames.remove(args.name):
|
||||
print(f"Removed '{args.name}' from your custom games.")
|
||||
return 0
|
||||
print(f"'{args.name}' isn't in your custom games. Current: {', '.join(customgames.names()) or '(none)'}")
|
||||
return 1
|
||||
|
||||
|
||||
def cmd_stress(args) -> int:
|
||||
import shlex as _shlex
|
||||
|
||||
from .core import stress
|
||||
from .render import format_raw, render_stress
|
||||
|
||||
command = _shlex.split(args.command) if args.command else None
|
||||
if not args.json:
|
||||
loaders = stress.available_loaders()
|
||||
if command:
|
||||
print(f"Stressing with: {' '.join(command)}")
|
||||
elif loaders:
|
||||
print(f"Stressing with auto-detected loader: {loaders[0]}")
|
||||
else:
|
||||
print("No GPU load tool found and no --command given — MONITOR-ONLY mode.")
|
||||
print(f" Launch the game/app now; I'll closely track temps for up to {int(args.duration)}s.")
|
||||
print(f" Sampling every {args.interval:g}s. Press Ctrl-C to stop early.\n")
|
||||
|
||||
def _tick(sample, elapsed) -> None:
|
||||
by = {r.key: r for r in sample.readings}
|
||||
bits = [f"{elapsed:5.0f}s"]
|
||||
for key, tag in (("gpu.temp", "core"), ("gpu.power", "pwr"),
|
||||
("gpu.util", "util"), ("gpu.clock.core", "clk")):
|
||||
r = by.get(key)
|
||||
if r is not None and r.value is not None:
|
||||
bits.append(f"{tag} {format_raw(r.value, r.unit)}")
|
||||
print(" " + " ".join(bits) + " ", end="\r", flush=True)
|
||||
|
||||
result = stress.run(duration=args.duration, interval=args.interval, command=command,
|
||||
on_tick=None if args.json else _tick)
|
||||
if not args.json:
|
||||
print() # end the live line
|
||||
|
||||
if args.json:
|
||||
from dataclasses import asdict
|
||||
print(json.dumps(asdict(result), indent=2, ensure_ascii=False))
|
||||
else:
|
||||
print(render_stress(result))
|
||||
return 0 if result.severity in ("ok", "info") else 1
|
||||
|
||||
|
||||
def build_parser() -> argparse.ArgumentParser:
|
||||
p = argparse.ArgumentParser(
|
||||
prog="rigdoctor",
|
||||
@@ -339,10 +693,19 @@ def build_parser() -> argparse.ArgumentParser:
|
||||
sp.add_argument("--json", action="store_true", help="output JSON instead of text")
|
||||
sp.set_defaults(func=cmd_snapshot)
|
||||
|
||||
mp = sub.add_parser("monitor", help="live-refreshing sensor view")
|
||||
mp = sub.add_parser("monitor", help="live monitor TUI (current/min/max, M2)")
|
||||
mp.add_argument("-n", "--interval", type=float, default=None, help="refresh interval (s)")
|
||||
mp.add_argument("--plain", action="store_true", help="plain redraw instead of the curses UI")
|
||||
mp.set_defaults(func=cmd_monitor)
|
||||
|
||||
st = sub.add_parser("stress", help="GPU stress + close thermal monitoring (repro load crashes)")
|
||||
st.add_argument("-d", "--duration", type=float, default=120.0, help="run for this many seconds (default 120)")
|
||||
st.add_argument("-n", "--interval", type=float, default=0.5, help="sampling interval in seconds (default 0.5)")
|
||||
st.add_argument("--command", default=None,
|
||||
help="load generator to run (e.g. a game or 'gpu-burn 60'); omit to auto-detect or monitor-only")
|
||||
st.add_argument("--json", action="store_true", help="output JSON")
|
||||
st.set_defaults(func=cmd_stress)
|
||||
|
||||
sub.add_parser("gui", help="launch the desktop GUI (needs PySide6)").set_defaults(func=cmd_gui)
|
||||
sub.add_parser("sources", help="list detected sensor sources").set_defaults(func=cmd_sources)
|
||||
|
||||
@@ -371,6 +734,7 @@ def build_parser() -> argparse.ArgumentParser:
|
||||
run_p = rec_sub.add_parser("run", help="run the capture loop in the foreground (systemd-friendly)")
|
||||
run_p.add_argument("-n", "--interval", type=float, default=None, help="sampling interval (s)")
|
||||
run_p.add_argument("-o", "--out", default=None, help="log file path")
|
||||
run_p.add_argument("--game", default=None, help="tag the capture with a game name (M6/diagnose)")
|
||||
run_p.set_defaults(func=cmd_record_run)
|
||||
|
||||
start_p = rec_sub.add_parser("start", help="start recording in the background")
|
||||
@@ -390,15 +754,94 @@ def build_parser() -> argparse.ArgumentParser:
|
||||
rep.add_argument("--json", action="store_true", help="output JSON instead of text")
|
||||
rep.set_defaults(func=cmd_report)
|
||||
|
||||
cp = sub.add_parser("collect-priv", help=argparse.SUPPRESS) # internal: run via pkexec
|
||||
cp.set_defaults(func=cmd_collect_priv)
|
||||
|
||||
inv = sub.add_parser("inventory", help="system inventory (M5): export hardware/OS details")
|
||||
inv.add_argument("--json", action="store_true", help="output JSON")
|
||||
inv.add_argument("--markdown", action="store_true", help="output Markdown (for forum/bug reports)")
|
||||
inv.add_argument("-o", "--output", default=None, help="write to a file instead of stdout")
|
||||
inv.set_defaults(func=cmd_inventory)
|
||||
|
||||
games_p = sub.add_parser("games", help="Steam game & library detection (M6)")
|
||||
games_p.add_argument("--json", action="store_true", help="output JSON")
|
||||
games_p.set_defaults(func=cmd_games)
|
||||
games_sub = games_p.add_subparsers(dest="games_cmd")
|
||||
lib_p = games_sub.add_parser("libraries", help="list/select Steam libraries to scan")
|
||||
lib_p.add_argument("--enable", action="append", metavar="PATH", help="scan this library (repeatable)")
|
||||
lib_p.add_argument("--disable", action="append", metavar="PATH", help="stop scanning this library (repeatable)")
|
||||
lib_p.add_argument("--all", action="store_true", help="scan all detected libraries")
|
||||
lib_p.add_argument("--json", action="store_true", help="output JSON")
|
||||
lib_p.set_defaults(func=cmd_games_libraries)
|
||||
|
||||
add_p = games_sub.add_parser("add", help="add a game no launcher reports (e.g. SPT)")
|
||||
add_p.add_argument("name", help="game name, e.g. \"SPT\"")
|
||||
add_p.add_argument("--command", default=None,
|
||||
help="launch command/script (e.g. the path to tarkov.sh) — enables `games play`")
|
||||
add_p.add_argument("--logdir", default=None,
|
||||
help="the game's own log directory (auto-detected as <command dir>/logs if present)")
|
||||
add_p.set_defaults(func=cmd_games_add)
|
||||
play_p = games_sub.add_parser("play", help="launch a custom game with crash-capture (e.g. SPT)")
|
||||
play_p.add_argument("name", help="game name to launch")
|
||||
play_p.set_defaults(func=cmd_games_play)
|
||||
rm_p = games_sub.add_parser("remove", help="remove a previously added custom game")
|
||||
rm_p.add_argument("name", help="game name to remove")
|
||||
rm_p.set_defaults(func=cmd_games_remove)
|
||||
|
||||
env_p = sub.add_parser("gameenv", help="gaming environment checks (M6): flag stability/perf settings")
|
||||
env_p.add_argument("--json", action="store_true", help="output JSON instead of text")
|
||||
env_p.set_defaults(func=cmd_gameenv)
|
||||
|
||||
diag_p = sub.add_parser("diagnose", help="guided diagnostic: capture while gaming, then analyze")
|
||||
diag_sub = diag_p.add_subparsers(dest="diagnose_cmd")
|
||||
diag_start = diag_sub.add_parser("start", help="start a focused capture for a game")
|
||||
diag_start.add_argument("--game", default=None, help="game name to focus on")
|
||||
diag_start.add_argument("--appid", default=None, help="Steam appid to focus on (resolved to a name)")
|
||||
diag_start.add_argument("-n", "--interval", type=float, default=None, help="sampling interval (s)")
|
||||
diag_start.set_defaults(func=cmd_diagnose)
|
||||
diag_sub.add_parser("status", help="show the in-progress diagnostic").set_defaults(func=cmd_diagnose)
|
||||
diag_finish = diag_sub.add_parser("finish", help="stop the capture and analyze it")
|
||||
diag_finish.add_argument("--last", type=int, default=10, help="recent samples to show")
|
||||
diag_finish.set_defaults(func=cmd_diagnose)
|
||||
diag_p.set_defaults(func=cmd_diagnose, diagnose_cmd=None, last=10)
|
||||
|
||||
wrap_p = sub.add_parser(
|
||||
"wrap", help="run a game with automatic crash-capture (Steam launch option, D12)")
|
||||
wrap_p.add_argument("command", nargs=argparse.REMAINDER,
|
||||
help="the game command — use `rigdoctor wrap %%command%%` in Steam")
|
||||
wrap_p.set_defaults(func=cmd_wrap)
|
||||
|
||||
watch_p = sub.add_parser("watch", help="auto-capture while a Steam game runs (game-launch trigger)")
|
||||
watch_p.add_argument("-n", "--interval", type=float, default=None, help="poll interval (s)")
|
||||
watch_p.set_defaults(func=cmd_watch)
|
||||
|
||||
svc_p = sub.add_parser("service", help="crash-logger trigger mode + systemd --user service (M9/D6)")
|
||||
svc_sub = svc_p.add_subparsers(dest="service_cmd")
|
||||
svc_sub.add_parser("status", help="show the trigger mode and service state").set_defaults(func=cmd_service)
|
||||
mode_p = svc_sub.add_parser("mode", help="set the trigger mode")
|
||||
mode_p.add_argument("mode", choices=("manual", "always-on", "game-launch"))
|
||||
mode_p.set_defaults(func=cmd_service)
|
||||
svc_p.set_defaults(func=cmd_service, service_cmd=None)
|
||||
|
||||
ai_p = sub.add_parser("ai", help="AI assistant (M14): explain diagnostics — opt-in, never automatic")
|
||||
ai_sub = ai_p.add_subparsers(dest="ai_cmd")
|
||||
ai_sub.add_parser("status", help="show the configured provider (contacts nothing)").set_defaults(func=cmd_ai)
|
||||
ai_sub.add_parser("test", help="send a tiny probe to verify connectivity").set_defaults(func=cmd_ai)
|
||||
ai_sub.add_parser("explain", help="explain the current health findings with AI").set_defaults(func=cmd_ai)
|
||||
dump_p = ai_sub.add_parser("dump", help="parse a Windows .dmp crash dump and explain it with AI")
|
||||
dump_p.add_argument("file", help="path to the .dmp minidump (e.g. from a Proton game crash)")
|
||||
dump_p.set_defaults(func=cmd_ai)
|
||||
ai_p.set_defaults(func=cmd_ai, ai_cmd=None)
|
||||
|
||||
bundle_p = sub.add_parser("bundle", help="zip the latest stored diagnostic into a report bundle (M15)")
|
||||
bundle_p.set_defaults(func=cmd_bundle)
|
||||
return p
|
||||
|
||||
|
||||
def main(argv: list[str] | None = None) -> int:
|
||||
from .core import applog
|
||||
|
||||
applog.setup() # opt-in app logging (M15); no-op unless logging_enabled
|
||||
args = build_parser().parse_args(argv)
|
||||
return args.func(args)
|
||||
|
||||
|
||||
@@ -23,16 +23,40 @@ CONFIG_FILE = CONFIG_DIR / "config.toml"
|
||||
|
||||
# Crash-capture logger (M3)
|
||||
LOG_FILE = LOG_DIR / "capture.jsonl"
|
||||
# Guided diagnostic (M6/D12): a focused capture writes here, separate from the always-on
|
||||
# crash log, so its report covers only that session's window.
|
||||
DIAG_LOG = LOG_DIR / "diagnostic.jsonl"
|
||||
# A crashed (unterminated, unacknowledged) diagnostic is preserved here when a new capture
|
||||
# starts, so auto-capture (the Steam wrapper) relaunching the game doesn't wipe it first.
|
||||
DIAG_CRASH = LOG_DIR / "diagnostic-crash.jsonl"
|
||||
STATUS_FILE = STATE_DIR / "recorder.json"
|
||||
PID_FILE = STATE_DIR / "recorder.pid"
|
||||
SPAWN_LOG = STATE_DIR / "recorder.out"
|
||||
|
||||
# Gaming environment / game detection (M6) — cached Steam game scan (mutable state,
|
||||
# not config: refreshed by the background scan on every launch).
|
||||
GAMES_FILE = STATE_DIR / "games.json"
|
||||
# User-added games that no launcher reports (e.g. SPT/standalone mod launchers). Authored
|
||||
# by the user (not a refreshable cache), so it lives in DATA_DIR and persists across scans.
|
||||
CUSTOM_GAMES_FILE = DATA_DIR / "custom-games.json"
|
||||
|
||||
# Logging & reports (opt-in via `logging_enabled`). App log: rotating file of app events.
|
||||
# Each diagnostic is stored under DIAGNOSTICS_DIR/<id>/; "Report" zips one into REPORTS_DIR.
|
||||
APP_LOG = STATE_DIR / "app.log"
|
||||
DIAGNOSTICS_DIR = DATA_DIR / "diagnostics"
|
||||
REPORTS_DIR = DATA_DIR / "reports"
|
||||
|
||||
# Update access token (M13) — gates updates to Gitea account holders (D18).
|
||||
# Stored in the OS keyring (Secret Service / GNOME Keyring) via `secret-tool` when
|
||||
# available — encrypted at rest, unlocked with the login session — else a 0600 file.
|
||||
TOKEN_FILE = CONFIG_DIR / "token"
|
||||
_SECRET_ATTRS = ["application", "rigdoctor", "type", "update-token"]
|
||||
|
||||
# AI assistant (M14, D24) — API key for the Claude provider, stored in the keyring like the
|
||||
# update token (Ollama is local and needs none). Separate keyring entry + file fallback.
|
||||
AI_KEY_FILE = CONFIG_DIR / "ai-key"
|
||||
_AI_SECRET_ATTRS = ["application", "rigdoctor", "type", "ai-key"]
|
||||
|
||||
|
||||
def _secret_tool() -> str | None:
|
||||
return shutil.which("secret-tool")
|
||||
@@ -43,27 +67,27 @@ def keyring_available() -> bool:
|
||||
return _secret_tool() is not None
|
||||
|
||||
|
||||
def _keyring_store(token: str) -> bool:
|
||||
def _keyring_store(value: str, attrs: list[str], label: str) -> bool:
|
||||
tool = _secret_tool()
|
||||
if not tool:
|
||||
return False
|
||||
try:
|
||||
proc = subprocess.run(
|
||||
[tool, "store", "--label", "RigDoctor update token", *_SECRET_ATTRS],
|
||||
input=token, text=True, capture_output=True, timeout=20,
|
||||
[tool, "store", "--label", label, *attrs],
|
||||
input=value, text=True, capture_output=True, timeout=20,
|
||||
)
|
||||
return proc.returncode == 0
|
||||
except (subprocess.SubprocessError, OSError):
|
||||
return False
|
||||
|
||||
|
||||
def _keyring_lookup() -> str | None:
|
||||
def _keyring_lookup(attrs: list[str]) -> str | None:
|
||||
tool = _secret_tool()
|
||||
if not tool:
|
||||
return None
|
||||
try:
|
||||
proc = subprocess.run(
|
||||
[tool, "lookup", *_SECRET_ATTRS], text=True, capture_output=True, timeout=20
|
||||
[tool, "lookup", *attrs], text=True, capture_output=True, timeout=20
|
||||
)
|
||||
if proc.returncode == 0 and proc.stdout.strip():
|
||||
return proc.stdout.strip()
|
||||
@@ -72,54 +96,67 @@ def _keyring_lookup() -> str | None:
|
||||
return None
|
||||
|
||||
|
||||
def _keyring_clear() -> None:
|
||||
def _keyring_clear(attrs: list[str]) -> None:
|
||||
tool = _secret_tool()
|
||||
if not tool:
|
||||
return
|
||||
try:
|
||||
subprocess.run([tool, "clear", *_SECRET_ATTRS], capture_output=True, timeout=20)
|
||||
subprocess.run([tool, "clear", *attrs], capture_output=True, timeout=20)
|
||||
except (subprocess.SubprocessError, OSError):
|
||||
pass
|
||||
|
||||
|
||||
def _load_secret(env_var: str | None, attrs: list[str], file: Path) -> str | None:
|
||||
if env_var:
|
||||
env = os.environ.get(env_var)
|
||||
if env and env.strip():
|
||||
return env.strip()
|
||||
from_keyring = _keyring_lookup(attrs)
|
||||
if from_keyring:
|
||||
return from_keyring
|
||||
try:
|
||||
value = file.read_text().strip()
|
||||
return value or None
|
||||
except OSError:
|
||||
return None
|
||||
|
||||
|
||||
def _save_secret(value: str, attrs: list[str], label: str, file: Path) -> None:
|
||||
value = value.strip()
|
||||
if _keyring_store(value, attrs, label):
|
||||
try: # don't leave a plaintext copy once it's in the keyring
|
||||
file.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
return
|
||||
CONFIG_DIR.mkdir(parents=True, exist_ok=True)
|
||||
file.write_text(value + "\n")
|
||||
try:
|
||||
file.chmod(0o600)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def _clear_secret(attrs: list[str], file: Path) -> None:
|
||||
_keyring_clear(attrs)
|
||||
try:
|
||||
file.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def load_token() -> str | None:
|
||||
"""Token from $RIGDOCTOR_TOKEN, then the OS keyring, then a 0600 file."""
|
||||
env = os.environ.get("RIGDOCTOR_TOKEN")
|
||||
if env and env.strip():
|
||||
return env.strip()
|
||||
from_keyring = _keyring_lookup()
|
||||
if from_keyring:
|
||||
return from_keyring
|
||||
try:
|
||||
token = TOKEN_FILE.read_text().strip()
|
||||
return token or None
|
||||
except OSError:
|
||||
return None
|
||||
return _load_secret("RIGDOCTOR_TOKEN", _SECRET_ATTRS, TOKEN_FILE)
|
||||
|
||||
|
||||
def save_token(token: str) -> None:
|
||||
"""Save to the OS keyring if possible (encrypted); else a 0600 file."""
|
||||
token = token.strip()
|
||||
if _keyring_store(token):
|
||||
try: # don't leave a plaintext copy once it's in the keyring
|
||||
TOKEN_FILE.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
return
|
||||
CONFIG_DIR.mkdir(parents=True, exist_ok=True)
|
||||
TOKEN_FILE.write_text(token + "\n")
|
||||
try:
|
||||
TOKEN_FILE.chmod(0o600)
|
||||
except OSError:
|
||||
pass
|
||||
_save_secret(token, _SECRET_ATTRS, "RigDoctor update token", TOKEN_FILE)
|
||||
|
||||
|
||||
def clear_token() -> None:
|
||||
_keyring_clear()
|
||||
try:
|
||||
TOKEN_FILE.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
_clear_secret(_SECRET_ATTRS, TOKEN_FILE)
|
||||
|
||||
|
||||
def token_backend() -> str:
|
||||
@@ -127,17 +164,42 @@ def token_backend() -> str:
|
||||
env = os.environ.get("RIGDOCTOR_TOKEN")
|
||||
if env and env.strip():
|
||||
return "env"
|
||||
if _keyring_lookup() is not None:
|
||||
if _keyring_lookup(_SECRET_ATTRS) is not None:
|
||||
return "keyring"
|
||||
if TOKEN_FILE.exists():
|
||||
return "file"
|
||||
return "none"
|
||||
|
||||
|
||||
def load_ai_key() -> str | None:
|
||||
"""Claude API key from $RIGDOCTOR_AI_KEY, then the OS keyring, then a 0600 file (M14)."""
|
||||
return _load_secret("RIGDOCTOR_AI_KEY", _AI_SECRET_ATTRS, AI_KEY_FILE)
|
||||
|
||||
|
||||
def save_ai_key(key: str) -> None:
|
||||
_save_secret(key, _AI_SECRET_ATTRS, "RigDoctor AI key", AI_KEY_FILE)
|
||||
|
||||
|
||||
def clear_ai_key() -> None:
|
||||
_clear_secret(_AI_SECRET_ATTRS, AI_KEY_FILE)
|
||||
|
||||
DEFAULTS: dict = {
|
||||
"interval": 1.0, # sampling interval in seconds (default ≤1 Hz — NFR)
|
||||
"log_max_bytes": 20_000_000, # rotate a log segment past this size
|
||||
"log_backups": 10, # keep this many rotated segments (bounds disk use)
|
||||
"update_check_minutes": 30, # re-check for updates this often while running (0 = off)
|
||||
"elevate_on_launch": True, # GUI asks for the password once at launch (SMART/dmidecode)
|
||||
"alerts_enabled": True, # desktop notifications on overheat / GPU-lost / new version
|
||||
"gpu_temp_alert": 90.0, # °C — alert when GPU reaches this
|
||||
"cpu_temp_alert": 95.0, # °C — alert when CPU reaches this
|
||||
"relay_url": "wss://rigdoctor.jesseyvanofferen.com", # session-sharing relay (M12)
|
||||
"steam_libraries": [], # Steam library paths to scan for games (M6); empty = none picked yet
|
||||
"trigger_mode": "manual", # crash-logger trigger (D6): manual | always-on | game-launch
|
||||
"setup_done": False, # first-run GUI setup wizard completed (M9)
|
||||
"ai_provider": "", # AI assistant (M14, D24): "" (unset) | "ollama" | "claude"
|
||||
"ai_model": "", # model name (e.g. "llama3.1" for Ollama; blank = Claude default)
|
||||
"ai_endpoint": "http://localhost:11434", # Ollama server base URL (Claude uses a fixed endpoint)
|
||||
"logging_enabled": False, # opt-in: app logging + per-diagnostic storage + Report (M15)
|
||||
}
|
||||
|
||||
|
||||
@@ -153,3 +215,29 @@ def load_config() -> dict:
|
||||
except Exception:
|
||||
pass
|
||||
return cfg
|
||||
|
||||
|
||||
def _toml_value(value) -> str:
|
||||
if isinstance(value, bool):
|
||||
return "true" if value else "false"
|
||||
if isinstance(value, (int, float)):
|
||||
return repr(value)
|
||||
if isinstance(value, (list, tuple)):
|
||||
return "[" + ", ".join(_toml_value(v) for v in value) + "]"
|
||||
return '"' + str(value).replace("\\", "\\\\").replace('"', '\\"') + '"'
|
||||
|
||||
|
||||
def save_config(values: dict) -> None:
|
||||
"""Write a flat config.toml (stdlib has no TOML writer)."""
|
||||
CONFIG_DIR.mkdir(parents=True, exist_ok=True)
|
||||
lines = ["# RigDoctor config — edit in the app (Notifications) or here."]
|
||||
lines += [f"{key} = {_toml_value(value)}" for key, value in values.items()]
|
||||
CONFIG_FILE.write_text("\n".join(lines) + "\n")
|
||||
|
||||
|
||||
def update_config(**changes) -> dict:
|
||||
"""Merge changes into the current effective config and persist them."""
|
||||
cfg = load_config()
|
||||
cfg.update(changes)
|
||||
save_config(cfg)
|
||||
return cfg
|
||||
|
||||
@@ -0,0 +1,288 @@
|
||||
"""AI assistant (M14, D24): explain the collected diagnostics in plain language.
|
||||
|
||||
**Strictly opt-in and never automatic** — the model is contacted ONLY from a direct user
|
||||
action ("Explain with AI" / ``rigdoctor ai explain``), never on launch, after a diagnostic, or
|
||||
in any loop. Choosing/configuring a provider does not contact anything. The user must pick a
|
||||
provider explicitly (there is no default).
|
||||
|
||||
Two providers, both over stdlib ``urllib`` (no pip deps in core):
|
||||
* **ollama** — a local server (data stays on the machine, no key).
|
||||
* **claude** — the Anthropic Messages API (key in the keyring).
|
||||
|
||||
Answers are *grounded*: we pass the actual findings plus matched reference facts
|
||||
(:mod:`ai_knowledge`) and ask the model to reason over them. Output is advisory (D9).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
|
||||
from .. import config
|
||||
from . import ai_knowledge
|
||||
|
||||
_APPID_RE = re.compile(r"\b\d{5,7}\b") # Steam app IDs are 5–7 digits
|
||||
|
||||
PROVIDERS = ("ollama", "claude")
|
||||
OLLAMA_DEFAULT_ENDPOINT = "http://localhost:11434"
|
||||
# Suggested Ollama model — strong instruction-following that fits an 8 GB GPU at Q4. Because we
|
||||
# ground the prompt with reference facts, a 7B model is sufficient here.
|
||||
OLLAMA_SUGGESTED_MODEL = "qwen2.5:7b"
|
||||
CLAUDE_ENDPOINT = "https://api.anthropic.com/v1/messages"
|
||||
CLAUDE_DEFAULT_MODEL = "claude-opus-4-7"
|
||||
CLAUDE_MAX_TOKENS = 2000
|
||||
ANTHROPIC_VERSION = "2023-06-01"
|
||||
|
||||
SYSTEM_PROMPT = (
|
||||
"You are RigDoctor's hardware-diagnostics assistant for Linux gamers (Ubuntu + NVIDIA, games "
|
||||
"via Steam/Proton). You are given session context, the structured findings RigDoctor "
|
||||
"collected — which may include recent game/Proton/system log excerpts scoped to this session "
|
||||
"— plus reference facts. Use the GAME NAME from the session context; never guess the game "
|
||||
"from log paths or app IDs. Correlate log errors with the findings to pinpoint WHEN and WHY "
|
||||
"things went wrong, identify the most likely root cause, and give concrete, ordered next "
|
||||
"steps with exact Linux commands where useful.\n"
|
||||
"Rules: Base your reasoning ONLY on the data and reference facts provided — never invent "
|
||||
"readings, hardware, or log lines. This is LINUX: never suggest Windows-only steps (e.g. "
|
||||
"'run as administrator', registry edits, toggling antivirus). Treat log lines flagged BENIGN "
|
||||
"in the reference facts as non-causal. If no crash was recorded and there are no warning or "
|
||||
"critical findings, say plainly that the session looks healthy and do NOT manufacture a "
|
||||
"problem. Be concise. Present fixes as suggestions and warn before anything that risks data "
|
||||
"loss or instability. Format your answer in Markdown."
|
||||
)
|
||||
|
||||
|
||||
def provider() -> str:
|
||||
return config.load_config().get("ai_provider", "")
|
||||
|
||||
|
||||
def model() -> str:
|
||||
m = config.load_config().get("ai_model", "").strip()
|
||||
if m:
|
||||
return m
|
||||
return CLAUDE_DEFAULT_MODEL if provider() == "claude" else ""
|
||||
|
||||
|
||||
def endpoint() -> str:
|
||||
ep = config.load_config().get("ai_endpoint", OLLAMA_DEFAULT_ENDPOINT).strip()
|
||||
return ep or OLLAMA_DEFAULT_ENDPOINT
|
||||
|
||||
|
||||
def is_local() -> bool:
|
||||
return provider() == "ollama"
|
||||
|
||||
|
||||
def is_configured() -> bool:
|
||||
"""Whether the chosen provider is ready (does NOT contact anything)."""
|
||||
p = provider()
|
||||
if p == "claude":
|
||||
return bool(config.load_ai_key())
|
||||
if p == "ollama":
|
||||
return bool(model()) # a model name is required; endpoint has a default
|
||||
return False # no provider chosen
|
||||
|
||||
|
||||
def provider_label() -> str:
|
||||
p = provider()
|
||||
if p == "claude":
|
||||
return f"Claude ({model()})"
|
||||
if p == "ollama":
|
||||
return f"Ollama ({model() or '?'} @ {endpoint()})"
|
||||
return "not configured"
|
||||
|
||||
|
||||
def appid_glossary(text: str) -> str:
|
||||
"""Resolve Steam app IDs that appear in `text` against the user's scanned library.
|
||||
|
||||
We don't teach the model app IDs — we look them up locally and hand it the mapping, so it
|
||||
names games correctly instead of guessing. Only IDs we can resolve are listed.
|
||||
"""
|
||||
candidates = set(_APPID_RE.findall(text))
|
||||
if not candidates:
|
||||
return ""
|
||||
try:
|
||||
from . import steam
|
||||
names = steam.appid_names()
|
||||
except Exception: # never let a glossary lookup break an explanation
|
||||
return ""
|
||||
known = sorted((i, names[i]) for i in candidates if i in names)
|
||||
if not known:
|
||||
return ""
|
||||
return "App IDs (resolved from your installed games):\n" + "\n".join(
|
||||
f"- {appid} = {name}" for appid, name in known)
|
||||
|
||||
|
||||
def build_prompt(findings_text: str) -> str:
|
||||
"""The user-message content: app-ID glossary + matched reference facts + the findings."""
|
||||
parts = []
|
||||
glossary = appid_glossary(findings_text)
|
||||
if glossary:
|
||||
parts.append(glossary)
|
||||
parts.append("")
|
||||
facts = ai_knowledge.relevant(findings_text)
|
||||
if facts:
|
||||
parts.append("Reference facts (use these to interpret the findings):")
|
||||
parts += [f"- {f}" for f in facts]
|
||||
parts.append("")
|
||||
parts.append("Collected findings:")
|
||||
parts.append(findings_text.strip() or "(no findings provided)")
|
||||
return "\n".join(parts)
|
||||
|
||||
|
||||
def explain(findings_text: str, timeout: float = 120.0) -> tuple[bool, str]:
|
||||
"""Contact the configured provider to explain the findings. Returns (ok, text | error).
|
||||
|
||||
The caller MUST be a direct user action (D24) — this never runs automatically.
|
||||
"""
|
||||
content = build_prompt(findings_text)
|
||||
try:
|
||||
if provider() == "claude":
|
||||
return _claude(content, timeout)
|
||||
if provider() == "ollama":
|
||||
return _ollama(content, timeout)
|
||||
return False, "No AI provider is configured (Settings → AI assistant)."
|
||||
except urllib.error.HTTPError as exc:
|
||||
return False, _http_error(exc)
|
||||
except (urllib.error.URLError, OSError, TimeoutError) as exc:
|
||||
return False, f"Couldn't reach the AI provider: {exc}"
|
||||
except (ValueError, KeyError, IndexError) as exc:
|
||||
return False, f"Unexpected response from the AI provider: {exc}"
|
||||
|
||||
|
||||
def explain_stream(findings_text: str, on_chunk, timeout: float = 180.0) -> tuple[bool, str]:
|
||||
"""Like :func:`explain`, but calls ``on_chunk(text_delta)`` as tokens arrive and returns
|
||||
``(ok, full_text)`` at the end. Caller MUST be a direct user action (D24)."""
|
||||
content = build_prompt(findings_text)
|
||||
try:
|
||||
if provider() == "claude":
|
||||
return _claude_stream(content, on_chunk, timeout)
|
||||
if provider() == "ollama":
|
||||
return _ollama_stream(content, on_chunk, timeout)
|
||||
return False, "No AI provider is configured (Settings → AI assistant)."
|
||||
except urllib.error.HTTPError as exc:
|
||||
return False, _http_error(exc)
|
||||
except (urllib.error.URLError, OSError, TimeoutError) as exc:
|
||||
return False, f"Couldn't reach the AI provider: {exc}"
|
||||
except (ValueError, KeyError, IndexError) as exc:
|
||||
return False, f"Unexpected response from the AI provider: {exc}"
|
||||
|
||||
|
||||
def _post(url: str, payload: dict, headers: dict, timeout: float) -> dict:
|
||||
req = urllib.request.Request(
|
||||
url, data=json.dumps(payload).encode("utf-8"),
|
||||
headers={"Content-Type": "application/json", **headers},
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||
return json.load(resp)
|
||||
|
||||
|
||||
def _ollama(content: str, timeout: float) -> tuple[bool, str]:
|
||||
if not model():
|
||||
return False, "No Ollama model is set (Settings → AI assistant)."
|
||||
payload = {"model": model(), "system": SYSTEM_PROMPT, "prompt": content, "stream": False}
|
||||
out = _post(endpoint().rstrip("/") + "/api/generate", payload, {}, timeout)
|
||||
return True, (out.get("response") or "").strip() or "(the model returned an empty response)"
|
||||
|
||||
|
||||
def _claude(content: str, timeout: float) -> tuple[bool, str]:
|
||||
key = config.load_ai_key()
|
||||
if not key:
|
||||
return False, "No Claude API key is set (Settings → AI assistant)."
|
||||
# One-shot call: no prompt caching (single request, short system prompt) and no thinking
|
||||
# (keeps a button-press snappy). Sampling params are omitted (removed on current Opus).
|
||||
payload = {
|
||||
"model": model(),
|
||||
"max_tokens": CLAUDE_MAX_TOKENS,
|
||||
"system": SYSTEM_PROMPT,
|
||||
"messages": [{"role": "user", "content": content}],
|
||||
}
|
||||
headers = {"x-api-key": key, "anthropic-version": ANTHROPIC_VERSION}
|
||||
out = _post(CLAUDE_ENDPOINT, payload, headers, timeout)
|
||||
text = "\n".join(b.get("text", "") for b in out.get("content", []) if b.get("type") == "text")
|
||||
return True, text.strip() or "(the model returned no text)"
|
||||
|
||||
|
||||
def _stream_request(url: str, payload: dict, headers: dict, timeout: float):
|
||||
req = urllib.request.Request(
|
||||
url, data=json.dumps(payload).encode("utf-8"),
|
||||
headers={"Content-Type": "application/json", **headers})
|
||||
return urllib.request.urlopen(req, timeout=timeout)
|
||||
|
||||
|
||||
def _ollama_stream(content: str, on_chunk, timeout: float) -> tuple[bool, str]:
|
||||
if not model():
|
||||
return False, "No Ollama model is set (Settings → AI assistant)."
|
||||
payload = {"model": model(), "system": SYSTEM_PROMPT, "prompt": content, "stream": True}
|
||||
parts: list[str] = []
|
||||
with _stream_request(endpoint().rstrip("/") + "/api/generate", payload, {}, timeout) as resp:
|
||||
for raw in resp: # newline-delimited JSON objects
|
||||
line = raw.decode("utf-8", "replace").strip()
|
||||
if not line:
|
||||
continue
|
||||
obj = json.loads(line)
|
||||
chunk = obj.get("response", "")
|
||||
if chunk:
|
||||
parts.append(chunk)
|
||||
on_chunk(chunk)
|
||||
if obj.get("done"):
|
||||
break
|
||||
return True, "".join(parts).strip() or "(the model returned an empty response)"
|
||||
|
||||
|
||||
def _claude_stream(content: str, on_chunk, timeout: float) -> tuple[bool, str]:
|
||||
key = config.load_ai_key()
|
||||
if not key:
|
||||
return False, "No Claude API key is set (Settings → AI assistant)."
|
||||
payload = {
|
||||
"model": model(), "max_tokens": CLAUDE_MAX_TOKENS, "system": SYSTEM_PROMPT,
|
||||
"messages": [{"role": "user", "content": content}], "stream": True,
|
||||
}
|
||||
headers = {"x-api-key": key, "anthropic-version": ANTHROPIC_VERSION}
|
||||
parts: list[str] = []
|
||||
with _stream_request(CLAUDE_ENDPOINT, payload, headers, timeout) as resp:
|
||||
for raw in resp: # SSE: parse `data:` lines, accumulate text deltas
|
||||
line = raw.decode("utf-8", "replace").strip()
|
||||
if not line.startswith("data:"):
|
||||
continue
|
||||
try:
|
||||
event = json.loads(line[5:].strip())
|
||||
except ValueError:
|
||||
continue
|
||||
etype = event.get("type")
|
||||
if etype == "content_block_delta" and event.get("delta", {}).get("type") == "text_delta":
|
||||
chunk = event["delta"].get("text", "")
|
||||
if chunk:
|
||||
parts.append(chunk)
|
||||
on_chunk(chunk)
|
||||
elif etype == "error":
|
||||
return False, event.get("error", {}).get("message", "stream error")
|
||||
elif etype == "message_stop":
|
||||
break
|
||||
return True, "".join(parts).strip() or "(the model returned no text)"
|
||||
|
||||
|
||||
def _http_error(exc: urllib.error.HTTPError) -> str:
|
||||
detail = ""
|
||||
try:
|
||||
body = exc.read().decode("utf-8", "replace")
|
||||
detail = json.loads(body).get("error", {}).get("message", "") or ""
|
||||
except (ValueError, OSError):
|
||||
pass
|
||||
hint = " — check your API key in Settings → AI assistant." if exc.code in (401, 403) else ""
|
||||
return f"AI request failed (HTTP {exc.code}){hint}{(': ' + detail) if detail else ''}"
|
||||
|
||||
|
||||
def format_findings(findings, header: str = "") -> str:
|
||||
"""Render M4 Finding objects (or similar) into the plain-text block we send the model."""
|
||||
lines = [header] if header else []
|
||||
for f in findings:
|
||||
severity = str(getattr(f, "severity", "")).upper()
|
||||
category = getattr(f, "category", "")
|
||||
title = getattr(f, "title", "")
|
||||
detail = getattr(f, "detail", "")
|
||||
line = f"- [{severity}] {category}: {title}".rstrip()
|
||||
if detail:
|
||||
line += f" — {detail}"
|
||||
lines.append(line)
|
||||
return "\n".join(lines) if lines else "No findings."
|
||||
@@ -0,0 +1,128 @@
|
||||
"""Curated reference knowledge for the AI assistant (M14, D24) — "RAG-lite".
|
||||
|
||||
A small, hand-written set of domain facts (Xid codes, SMART attributes, common Linux-gaming
|
||||
error signatures, tunable meanings). At explain-time we select the entries whose triggers
|
||||
appear in the collected findings and inject them into the prompt, so even a small local model
|
||||
gets the relevant facts instead of having to recall them. Provider-agnostic — it sharpens
|
||||
Claude too.
|
||||
|
||||
Retrieval is exact keyword/substring matching, not embeddings: the keys here (``Xid 79``,
|
||||
``SMART 197``, ``fallen off the bus``) are precise, so a vector store would be overkill and
|
||||
would break the stdlib-only rule. Each entry is ``(triggers, fact)``; a trigger matches
|
||||
case-insensitively against the findings text.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
# (triggers, fact). Keep facts short, factual, and cause-oriented — they go into the prompt.
|
||||
ENTRIES: list[tuple[tuple[str, ...], str]] = [
|
||||
(("xid 79", "fallen off the bus", "gpu has fallen"),
|
||||
"NVIDIA Xid 79 / 'GPU has fallen off the bus' = the driver lost PCIe contact with the GPU "
|
||||
"mid-operation. Usual causes, in order: insufficient/unstable PSU power or a bad power "
|
||||
"cable, an unstable overclock/undervolt, PCIe link or riser issues, or overheating. Often "
|
||||
"fatal to the session (hard freeze)."),
|
||||
(("xid 13", "graphics engine exception"),
|
||||
"NVIDIA Xid 13 = graphics engine exception, frequently an unstable GPU overclock or a "
|
||||
"faulty application shader; revert any OC/UV and test."),
|
||||
(("xid 31", "fifo: mmu fault", "mmu fault"),
|
||||
"NVIDIA Xid 31 = MMU fault (illegal memory access by the app/driver) — often a game/driver "
|
||||
"bug or unstable VRAM overclock."),
|
||||
(("xid 8", "xid 62", "xid 63", "xid 64"),
|
||||
"These Xid codes commonly indicate VRAM/ECC or memory-training problems — suspect failing "
|
||||
"VRAM or an unstable memory overclock."),
|
||||
(("va-space mapping", "gpu_vaspace", "dmaallocmapping", "nvkms memory for gem",
|
||||
"open kernel module", "nvidia open"),
|
||||
"NVIDIA open-kernel-module VA-space mapping errors (gpu_vaspace.c / dmaAllocMapping / "
|
||||
"'Failed to allocate NVKMS memory for GEM object') are a driver-internal fault on the open "
|
||||
"module (nvidia-*-open). They can storm for minutes and end in a HARD FREEZE with NO Xid "
|
||||
"logged — so the GPU never 'falls off the bus', and this is distinct from the Xid 79 "
|
||||
"hardware drop. Fix path: switch from the open to the proprietary NVIDIA kernel module and "
|
||||
"update to the latest driver branch."),
|
||||
(("smart 197", "current_pending_sector", "pending sector"),
|
||||
"SMART 197 (Current Pending Sector) > 0 = sectors the drive can't read and is waiting to "
|
||||
"reallocate — early sign of a failing disk. Back up now and run an extended self-test."),
|
||||
(("smart 198", "offline_uncorrectable", "uncorrectable"),
|
||||
"SMART 198 (Offline Uncorrectable) > 0 = sectors that failed to read/write — the drive is "
|
||||
"degrading; back up immediately."),
|
||||
(("smart 5", "reallocated_sector", "reallocated sector"),
|
||||
"SMART 5 (Reallocated Sectors) climbing over time = the drive is using spares for bad "
|
||||
"sectors; a rising count predicts failure."),
|
||||
(("media and data integrity errors", "percentage used", "available spare"),
|
||||
"NVMe health: 'Media and Data Integrity Errors' > 0 is concerning; 'Percentage Used' near "
|
||||
"or over 100% and 'Available Spare' below the threshold mean the SSD is near end-of-life."),
|
||||
(("thermal throttling", "throttle", "tjmax", "package id 0"),
|
||||
"Sustained CPU/GPU temperatures at the thermal limit cause throttling (clocks drop to shed "
|
||||
"heat) — check cooling, fan curves, paste, and case airflow."),
|
||||
(("oom", "out of memory", "oom-killer", "killed process"),
|
||||
"The kernel OOM-killer terminates processes when RAM (and swap) are exhausted — a freeze "
|
||||
"or a game crashing to desktop under memory pressure points here; check swap and "
|
||||
"vm.swappiness, and watch for a memory leak."),
|
||||
(("segfault", "general protection fault", "segmentation fault"),
|
||||
"A segfault/GP-fault is a process accessing invalid memory — for games under Proton it's "
|
||||
"often a Proton/Wine or anticheat incompatibility, or unstable RAM (run memtest)."),
|
||||
(("proton", "wine", "d3d", "vkd3d", "dxvk"),
|
||||
"Proton/Wine issues: mismatched Proton version, missing vkd3d/DXVK, or shader-cache "
|
||||
"corruption are common. Try a known-good Proton version and clear the shader cache."),
|
||||
(("pcie_aspm", "aspm"),
|
||||
"PCIe ASPM (Active State Power Management) can cause GPU/NVMe instability on some boards; "
|
||||
"setting pcie_aspm=off is a common stability fix at a small idle-power cost."),
|
||||
(("cpu_governor", "powersave", "schedutil", "performance governor"),
|
||||
"The CPU frequency governor sets the clock policy; 'performance' avoids latency spikes from "
|
||||
"ramp-up at a higher power draw, while 'powersave'/'schedutil' favor efficiency."),
|
||||
(("nvidia persistence", "persistence mode"),
|
||||
"NVIDIA persistence mode keeps the driver loaded when no app is using the GPU, avoiding "
|
||||
"re-init stalls — harmless to enable."),
|
||||
(("libnvidia-ml.so", "interface.h", "failed to load \"libnvidia-ml"),
|
||||
"BENIGN: a Steam log assertion 'Failed to load libnvidia-ml.so.1' (from interface.h) is "
|
||||
"logged on many normal launches — the Steam runtime sandbox can't see the host NVML library. "
|
||||
"It is NOT by itself a crash cause. Only investigate the driver if the GPU is genuinely "
|
||||
"undetected (nvidia-smi fails)."),
|
||||
(("minidump", ".dmp", "uploading minidump"),
|
||||
"BENIGN-by-default: a minidump upload line means a crash handler ran AND that the game/engine "
|
||||
"routinely uploads dumps; it is not proof that THIS session crashed unless a hard freeze or "
|
||||
"non-zero exit was also recorded. Don't treat a routine minidump line as the root cause."),
|
||||
(("fork without exec", "skipping destruction"),
|
||||
"BENIGN: 'pid X != Y, skipping destruction (fork without exec?)' is routine Steam/Proton "
|
||||
"process bookkeeping, not an error."),
|
||||
# --- crash-dump (.dmp) reasoning -------------------------------------------------
|
||||
(("access violation", "0xc0000005", "0xc0000006"),
|
||||
"Windows exception 0xC0000005 (access violation) = the game read/wrote/executed memory it "
|
||||
"wasn't allowed to. A write/read to a low address (near 0x0) is a null-pointer dereference, "
|
||||
"usually a game or graphics-driver bug; under Proton it's often a DXVK/VKD3D or Proton-version "
|
||||
"issue. Identify the faulting MODULE to localize the fault."),
|
||||
(("stack overflow", "0xc00000fd"),
|
||||
"Windows exception 0xC00000FD (stack overflow) = unbounded recursion or a huge stack "
|
||||
"allocation in the crashing module — almost always a software bug in that module."),
|
||||
(("0xc0000409", "stack buffer overrun", "fast fail"),
|
||||
"Windows 0xC0000409 (stack buffer overrun / __fastfail) = a security check tripped on memory "
|
||||
"corruption; frequently anticheat or a DRM/overlay injecting into the game. Suspect overlays "
|
||||
"(Steam/Discord/MSI Afterburner-equivalents) and anticheat compatibility under Proton."),
|
||||
(("0xc0000374", "heap corruption"),
|
||||
"Windows 0xC0000374 (heap corruption) = something scribbled over heap memory earlier; the "
|
||||
"crash point is a symptom, not the cause. Often a mod, an injected overlay, or unstable RAM."),
|
||||
(("nvwgf2umx", "nvoglv", "nvd3dum", "nvldumd"),
|
||||
"A faulting NVIDIA user-mode driver DLL (nvwgf2umx/nvoglv/nvd3dum) means the crash happened "
|
||||
"inside the GPU driver under Proton. On Linux this points at the NVIDIA driver + the "
|
||||
"DXVK/VKD3D translation layer: try a different driver branch or Proton/Proton-GE version, "
|
||||
"clear the DXVK shader cache, and revert any GPU overclock/undervolt."),
|
||||
(("easyanticheat", "eac", "battleye", "beclient", "anticheat"),
|
||||
"A faulting anticheat module (EasyAntiCheat/BattlEye) under Proton is usually a compatibility "
|
||||
"problem: confirm the title's anticheat has Proton/Linux support enabled and try the Proton "
|
||||
"version the community recommends for it (often Proton-GE or a specific Valve build)."),
|
||||
(("d3d11.dll", "d3d12.dll", "dxgi.dll", "d3d9.dll", "dxvk", "vkd3d"),
|
||||
"A crash in a Direct3D/DXGI module under Proton runs through DXVK (D3D9/10/11) or VKD3D-Proton "
|
||||
"(D3D12). Try a known-good Proton version, update/override DXVK-VKD3D, clear the shader cache, "
|
||||
"and check the GPU driver — these are the usual fixes for D3D faults on Linux."),
|
||||
]
|
||||
|
||||
|
||||
def relevant(findings_text: str, limit: int = 8) -> list[str]:
|
||||
"""Reference facts whose triggers appear in the findings text (case-insensitive)."""
|
||||
haystack = findings_text.lower()
|
||||
hits: list[str] = []
|
||||
for triggers, fact in ENTRIES:
|
||||
if any(t in haystack for t in triggers):
|
||||
hits.append(fact)
|
||||
if len(hits) >= limit:
|
||||
break
|
||||
return hits
|
||||
@@ -0,0 +1,145 @@
|
||||
"""Desktop alerts (M8): notify on overheat / GPU-lost / critical kernel events / new version.
|
||||
|
||||
Edge-triggered: a sustained condition (hot GPU, GPU-lost) fires once when it becomes true and
|
||||
can re-fire only after it clears + a cooldown; momentary **kernel events** (Xid, OOM-kill, MCE,
|
||||
PCIe AER, disk I/O errors) are scanned from the kernel log every `event_interval` seconds and
|
||||
fire one-shot (cooldown-gated). So a 1-Hz sample loop never spams. No-op if notify-send absent.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import shutil
|
||||
import subprocess
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
from ..config import DATA_DIR
|
||||
from .sample import Sample
|
||||
|
||||
APP_NAME = "RigDoctor"
|
||||
_STOCK_ICON = "utilities-system-monitor"
|
||||
# The RigDoctor icon, so notifications match the app/dock icon. Prefer the copy that
|
||||
# desktop integration installs into the icon theme (~/.local/share/icons/...); fall back to
|
||||
# the bundled asset for source/dev runs, then to a stock icon if neither is present.
|
||||
_INSTALLED_ICON = DATA_DIR.parent / "icons" / "hicolor" / "scalable" / "apps" / "rigdoctor.svg"
|
||||
_BUNDLED_ICON = Path(__file__).parents[1] / "gui" / "assets" / "rigdoctor.svg"
|
||||
|
||||
|
||||
def available() -> bool:
|
||||
return shutil.which("notify-send") is not None
|
||||
|
||||
|
||||
def _icon() -> str:
|
||||
"""Resolve the notification icon at call time (the themed copy may be installed late)."""
|
||||
for path in (_INSTALLED_ICON, _BUNDLED_ICON):
|
||||
try:
|
||||
if path.exists():
|
||||
return str(path)
|
||||
except OSError:
|
||||
pass
|
||||
return _STOCK_ICON
|
||||
|
||||
|
||||
def notify(title: str, message: str, urgency: str = "normal") -> bool:
|
||||
"""Send a desktop notification (best-effort). urgency: low|normal|critical."""
|
||||
if not available():
|
||||
return False
|
||||
try:
|
||||
subprocess.run(
|
||||
["notify-send", "-a", APP_NAME, "-u", urgency, "-i", _icon(), title, message],
|
||||
timeout=10,
|
||||
check=False,
|
||||
)
|
||||
return True
|
||||
except (subprocess.SubprocessError, OSError):
|
||||
return False
|
||||
|
||||
|
||||
class AlertMonitor:
|
||||
"""Evaluate samples and raise edge-triggered desktop alerts."""
|
||||
|
||||
def __init__(self, gpu_temp: float = 90.0, cpu_temp: float = 95.0, cooldown: float = 300.0,
|
||||
event_interval: float = 30.0):
|
||||
self.gpu_temp = gpu_temp
|
||||
self.cpu_temp = cpu_temp
|
||||
self.cooldown = cooldown
|
||||
self.event_interval = event_interval # how often to scan the kernel log
|
||||
self.enabled = True
|
||||
self._active: dict[str, bool] = {}
|
||||
self._last: dict[str, float] = {}
|
||||
self._last_kernel_scan = time.time() # only alert on events after the monitor starts
|
||||
|
||||
def _fire(self, key: str, title: str, message: str, urgency: str = "critical") -> None:
|
||||
if self._active.get(key):
|
||||
return # already alerting; wait until it clears
|
||||
now = time.time()
|
||||
if now - self._last.get(key, 0.0) < self.cooldown:
|
||||
return
|
||||
self._active[key] = True
|
||||
self._last[key] = now
|
||||
notify(title, message, urgency)
|
||||
|
||||
def _notify_once(self, key: str, title: str, message: str, urgency: str = "critical") -> None:
|
||||
"""One-shot alert for a momentary event (cooldown-gated, no active latch)."""
|
||||
now = time.time()
|
||||
if now - self._last.get(key, 0.0) < self.cooldown:
|
||||
return
|
||||
self._last[key] = now
|
||||
notify(title, message, urgency)
|
||||
|
||||
def _clear(self, key: str) -> None:
|
||||
self._active[key] = False
|
||||
|
||||
def _scan_kernel_events(self) -> None:
|
||||
"""Periodically scan the kernel log for new critical events (Xid/OOM/MCE/PCIe/disk)."""
|
||||
now = time.time()
|
||||
if now - self._last_kernel_scan < self.event_interval:
|
||||
return
|
||||
since = self._last_kernel_scan
|
||||
self._last_kernel_scan = now
|
||||
try:
|
||||
from . import syslogs
|
||||
|
||||
text = syslogs.kernel_log(since=since)
|
||||
except Exception: # alerting must never crash the sample loop
|
||||
return
|
||||
if not text:
|
||||
return
|
||||
seen: set[str] = set()
|
||||
for label, line in syslogs.scan_critical(text):
|
||||
if label in seen: # one alert per category per scan
|
||||
continue
|
||||
seen.add(label)
|
||||
self._notify_once(f"kernel:{label}", label, line[:180])
|
||||
|
||||
def check(self, sample: Sample) -> None:
|
||||
if not self.enabled:
|
||||
return
|
||||
gpu_t = next(
|
||||
(r.value for r in sample.readings
|
||||
if r.source == "gpu" and r.metric == "temp" and r.label == "" and r.value is not None),
|
||||
None,
|
||||
)
|
||||
if gpu_t is not None:
|
||||
if gpu_t >= self.gpu_temp:
|
||||
self._fire("gpu_temp", "GPU overheating", f"GPU at {gpu_t:.0f} °C")
|
||||
else:
|
||||
self._clear("gpu_temp")
|
||||
|
||||
cpu_temps = [r.value for r in sample.readings
|
||||
if r.source == "cpu" and r.metric == "temp" and r.value is not None]
|
||||
if cpu_temps:
|
||||
cpu_t = max(cpu_temps)
|
||||
if cpu_t >= self.cpu_temp:
|
||||
self._fire("cpu_temp", "CPU overheating", f"CPU at {cpu_t:.0f} °C")
|
||||
else:
|
||||
self._clear("cpu_temp")
|
||||
|
||||
lost = any(r.source == "gpu" and r.metric == "status" and r.label == "query-timeout"
|
||||
for r in sample.readings)
|
||||
if lost:
|
||||
self._fire("gpu_lost", "GPU not responding", "nvidia-smi query timed out — the GPU may have dropped")
|
||||
else:
|
||||
self._clear("gpu_lost")
|
||||
|
||||
self._scan_kernel_events() # Xid / OOM / MCE / PCIe / disk I/O from the kernel log
|
||||
@@ -0,0 +1,63 @@
|
||||
"""Application logging (M15) — opt-in via the `logging_enabled` setting.
|
||||
|
||||
When enabled, app events/errors are written to a rotating file (`config.APP_LOG`); when
|
||||
disabled, nothing is written (no file is created). All RigDoctor code logs through
|
||||
``applog.get_logger(__name__)``; the handler is attached once at startup by :func:`setup`.
|
||||
Stdlib ``logging`` only.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from logging.handlers import RotatingFileHandler
|
||||
|
||||
from .. import config
|
||||
|
||||
_ROOT = "rigdoctor"
|
||||
_configured = False
|
||||
|
||||
|
||||
def setup(force: bool = False) -> bool:
|
||||
"""Attach the file handler if logging is enabled. Idempotent. Returns whether it's on."""
|
||||
global _configured
|
||||
logger = logging.getLogger(_ROOT)
|
||||
enabled = bool(config.load_config().get("logging_enabled", False))
|
||||
|
||||
if not enabled:
|
||||
if force: # toggled off at runtime — detach so we stop writing
|
||||
for h in list(logger.handlers):
|
||||
logger.removeHandler(h)
|
||||
h.close()
|
||||
_configured = False
|
||||
return False
|
||||
|
||||
if _configured and not force:
|
||||
return True
|
||||
for h in list(logger.handlers): # avoid duplicate handlers on re-setup
|
||||
logger.removeHandler(h)
|
||||
h.close()
|
||||
try:
|
||||
config.STATE_DIR.mkdir(parents=True, exist_ok=True)
|
||||
handler = RotatingFileHandler(config.APP_LOG, maxBytes=2_000_000, backupCount=3,
|
||||
encoding="utf-8")
|
||||
handler.setFormatter(logging.Formatter(
|
||||
"%(asctime)s %(levelname)-7s %(name)s: %(message)s"))
|
||||
logger.addHandler(handler)
|
||||
logger.setLevel(logging.INFO)
|
||||
logger.propagate = False
|
||||
_configured = True
|
||||
logger.info("logging started (rigdoctor %s)", _version())
|
||||
except OSError:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def get_logger(name: str) -> logging.Logger:
|
||||
"""A child logger. Safe to call before setup — it just won't write until enabled."""
|
||||
short = name.split(".")[-1]
|
||||
return logging.getLogger(f"{_ROOT}.{short}")
|
||||
|
||||
|
||||
def _version() -> str:
|
||||
from .. import __version__
|
||||
return __version__
|
||||
@@ -45,4 +45,31 @@ COMPONENTS: tuple[Component, ...] = (
|
||||
"libsecret", "Encrypted token storage", "Updates",
|
||||
"Store the update token in the OS keyring, encrypted", ("libsecret-tools",), "secret-tool",
|
||||
),
|
||||
Component(
|
||||
"gamemode", "Feral GameMode", "Gaming",
|
||||
"Auto-applies performance tweaks (CPU governor, scheduling) while a game runs",
|
||||
("gamemode",), "gamemoderun",
|
||||
),
|
||||
Component(
|
||||
"mangohud", "MangoHud", "Gaming",
|
||||
"In-game overlay for FPS, frame times, and temperatures", ("mangohud",), "mangohud",
|
||||
),
|
||||
Component(
|
||||
"cpupower", "cpupower", "Gaming",
|
||||
"Read/set the CPU frequency governor (e.g. performance for gaming)",
|
||||
("linux-tools-common", "linux-tools-generic"), "cpupower",
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def by_id(component_id: str) -> Component | None:
|
||||
"""Look up a catalog component by its id (None if unknown)."""
|
||||
return next((c for c in COMPONENTS if c.id == component_id), None)
|
||||
|
||||
|
||||
def by_bundle() -> dict[str, list[Component]]:
|
||||
"""Components grouped by bundle, preserving catalog order (for the setup wizard)."""
|
||||
groups: dict[str, list[Component]] = {}
|
||||
for c in COMPONENTS:
|
||||
groups.setdefault(c.bundle, []).append(c)
|
||||
return groups
|
||||
|
||||
@@ -0,0 +1,113 @@
|
||||
"""User-added games (M6): a manual list for titles no launcher reports.
|
||||
|
||||
Some games never show up in a Steam/Lutris/Heroic scan — standalone mod launchers like
|
||||
**SPT** (Single-Player Tarkov), itch.io downloads, or any hand-installed executable. This
|
||||
module keeps a small user-authored list so those still appear in the game list and can be
|
||||
picked for a focused diagnostic, in the same `steam.Game` shape as every other source.
|
||||
|
||||
Each entry is a name plus two optionals: a **launch command** (so `rigdoctor games play`
|
||||
can start it under the auto-capture wrapper) and a **log directory** (so a crash diagnostic
|
||||
can read the game's own logs — e.g. SPT's `logs/tarkov-latest.log`). Stored as JSON in
|
||||
`config.CUSTOM_GAMES_FILE`; stdlib only; every reader degrades to [] on a missing/bad file.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import shlex
|
||||
|
||||
from .. import config
|
||||
from .steam import Game
|
||||
|
||||
LAUNCHER = "custom"
|
||||
|
||||
|
||||
def _load() -> list[dict]:
|
||||
try:
|
||||
data = json.loads(config.CUSTOM_GAMES_FILE.read_text())
|
||||
except (OSError, ValueError):
|
||||
return []
|
||||
games = data.get("games") if isinstance(data, dict) else None
|
||||
return [g for g in games if isinstance(g, dict) and g.get("name")] if isinstance(games, list) else []
|
||||
|
||||
|
||||
def _save(games: list[dict]) -> None:
|
||||
config.CUSTOM_GAMES_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||||
config.CUSTOM_GAMES_FILE.write_text(json.dumps({"games": games}, indent=2, ensure_ascii=False) + "\n")
|
||||
|
||||
|
||||
def names() -> list[str]:
|
||||
"""Just the stored names (insertion order preserved)."""
|
||||
return [str(g["name"]) for g in _load()]
|
||||
|
||||
|
||||
def get(name: str) -> dict | None:
|
||||
"""The stored entry (name + optional command/logdir) for a game, case-insensitive."""
|
||||
name = (name or "").strip().lower()
|
||||
return next((g for g in _load() if str(g["name"]).lower() == name), None)
|
||||
|
||||
|
||||
def add(name: str, command: str | None = None, logdir: str | None = None) -> bool:
|
||||
"""Add a game by name, with an optional launch command and log directory.
|
||||
|
||||
Returns False if the name is blank or already present (case-insensitive). When a command
|
||||
is given but no logdir, a sibling `logs/` dir is inferred if it exists (covers SPT's layout).
|
||||
"""
|
||||
name = (name or "").strip()
|
||||
if not name:
|
||||
return False
|
||||
if get(name):
|
||||
return False
|
||||
entry: dict = {"name": name}
|
||||
command = (command or "").strip()
|
||||
if command:
|
||||
entry["command"] = command
|
||||
if not logdir:
|
||||
sibling = os.path.join(os.path.dirname(_argv0(command)), "logs")
|
||||
if os.path.isdir(sibling):
|
||||
logdir = sibling
|
||||
logdir = (logdir or "").strip()
|
||||
if logdir:
|
||||
entry["logdir"] = os.path.expanduser(logdir)
|
||||
games = _load()
|
||||
games.append(entry)
|
||||
_save(games)
|
||||
return True
|
||||
|
||||
|
||||
def remove(name: str) -> bool:
|
||||
"""Remove a game by name (case-insensitive). Returns True if one was removed."""
|
||||
name = (name or "").strip().lower()
|
||||
games = _load()
|
||||
kept = [g for g in games if str(g["name"]).lower() != name]
|
||||
if len(kept) == len(games):
|
||||
return False
|
||||
_save(kept)
|
||||
return True
|
||||
|
||||
|
||||
def _argv0(command: str) -> str:
|
||||
parts = shlex.split(command)
|
||||
return parts[0] if parts else command
|
||||
|
||||
|
||||
def command(name: str) -> list[str] | None:
|
||||
"""The launch argv for a game (shlex-split), or None if it has no command."""
|
||||
entry = get(name)
|
||||
cmd = (entry or {}).get("command")
|
||||
return shlex.split(cmd) if cmd else None
|
||||
|
||||
|
||||
def log_dir(name: str) -> str | None:
|
||||
"""The game's own log directory, or None if it isn't set / doesn't exist."""
|
||||
entry = get(name)
|
||||
path = (entry or {}).get("logdir")
|
||||
return path if path and os.path.isdir(path) else None
|
||||
|
||||
|
||||
def scan() -> list[Game]:
|
||||
"""User-added games as `Game` objects (launcher='custom'), sorted by name."""
|
||||
out = [Game(appid="", name=str(g["name"]), library="", installdir="", launcher=LAUNCHER)
|
||||
for g in _load()]
|
||||
return sorted(out, key=lambda g: g.name.lower())
|
||||
@@ -0,0 +1,205 @@
|
||||
"""Guided diagnostic session (SPEC §4 / ARCHITECTURE §7.1): orchestrate M3 + M4.
|
||||
|
||||
The seed use case, one flow: **pick a game** → **focused crash-capture** scoped to that
|
||||
session (M3, tagged with the game) → on **finish**, **scan & analyze** (M4 health report)
|
||||
over the captured window + system logs → return a prioritized result. This is not a new
|
||||
module — it's a single shared callable so the CLI, GUI, and tray run the identical flow.
|
||||
|
||||
The capture is **manually bracketed** (start/finish) for now; auto start/stop on game launch
|
||||
(the D12 wrapper/watcher) plugs in here later without changing the result shape.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
|
||||
from .. import config
|
||||
from . import reccontrol
|
||||
from .crashlog import Summary, summarize
|
||||
from .health import CRITICAL, OK, WARNING, Finding
|
||||
|
||||
_SEV_ORDER = {CRITICAL: 0, WARNING: 1, "info": 2, OK: 3}
|
||||
|
||||
|
||||
@dataclass
|
||||
class DiagnosticResult:
|
||||
game: str | None
|
||||
summary: Summary # capture window: peak temps/power, events, last samples (M3)
|
||||
findings: list[Finding] # health findings: Xid/SMART/driver/etc. (M4)
|
||||
dir: str | None = None # storage directory when logging is on (M15); else None
|
||||
|
||||
|
||||
@dataclass
|
||||
class CrashInfo:
|
||||
game: str | None
|
||||
samples: int
|
||||
when: float | None # ts of the last captured sample (≈ when the freeze hit)
|
||||
gpu_lost: bool
|
||||
|
||||
|
||||
def _clear_diag_log() -> None:
|
||||
"""Each diagnostic is a fresh focused capture — drop any previous session + segments."""
|
||||
base = config.DIAG_LOG
|
||||
for p in [base, *base.parent.glob(base.name + ".*")]:
|
||||
try:
|
||||
p.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def start(game: str | None = None, interval: float | None = None) -> int | None:
|
||||
"""Begin a focused capture, tagged with the game, into the dedicated diagnostic log.
|
||||
Returns the pid, or None if a capture is already running."""
|
||||
if reccontrol.running_pid():
|
||||
return None
|
||||
if _crash_from_log(config.DIAG_LOG): # preserve an unanalyzed crash before overwriting it
|
||||
try:
|
||||
config.DIAG_LOG.replace(config.DIAG_CRASH)
|
||||
except OSError:
|
||||
pass
|
||||
_clear_diag_log()
|
||||
return reccontrol.start_background(interval=interval, out=str(config.DIAG_LOG), game=game)
|
||||
|
||||
|
||||
def is_running() -> bool:
|
||||
return reccontrol.running_pid() is not None
|
||||
|
||||
|
||||
def active() -> dict | None:
|
||||
"""Status of the in-progress session (running flag, game, samples), or None if idle."""
|
||||
if not is_running():
|
||||
return None
|
||||
return reccontrol.read_status()
|
||||
|
||||
|
||||
def _await_stopped(timeout: float = 6.0) -> None:
|
||||
deadline = time.monotonic() + timeout
|
||||
while reccontrol.running_pid() and time.monotonic() < deadline:
|
||||
time.sleep(0.1)
|
||||
|
||||
|
||||
def _game_from_summary(summary: Summary) -> str | None:
|
||||
"""Recover the focused game from the log's 'game' event (survives a crash + reboot)."""
|
||||
for _ts, kind, detail in reversed(summary.events):
|
||||
if kind == "game" and detail:
|
||||
return detail
|
||||
return None
|
||||
|
||||
|
||||
def finish(last_n: int = 10, log_path=None) -> DiagnosticResult:
|
||||
"""Stop the capture (if running), summarize the window, and run the health report."""
|
||||
from .health import run_health_checks
|
||||
|
||||
reccontrol.stop_background()
|
||||
_await_stopped()
|
||||
path = log_path or config.DIAG_LOG
|
||||
summary = summarize(path, last_n=last_n)
|
||||
game = _game_from_summary(summary) or (reccontrol.read_status() or {}).get("game")
|
||||
findings = run_health_checks()
|
||||
result = DiagnosticResult(game=game, summary=summary, findings=findings)
|
||||
_store(result, path, summary)
|
||||
return result
|
||||
|
||||
|
||||
def _store(result: DiagnosticResult, capture_path, summary: Summary) -> None:
|
||||
"""Persist the diagnostic to its own directory when logging is enabled (M15)."""
|
||||
try:
|
||||
from . import diagstore
|
||||
|
||||
since = (summary.start - 60) if summary.start else None
|
||||
directory = diagstore.store(result, capture_path, since=since)
|
||||
if directory:
|
||||
result.dir = str(directory)
|
||||
except Exception: # storage must never break a diagnostic
|
||||
pass
|
||||
|
||||
|
||||
# --- hard-crash detection & post-crash analysis -----------------------------------
|
||||
|
||||
def _crash_from_log(path) -> CrashInfo | None:
|
||||
"""CrashInfo if `path` holds an abnormally-ended session (start, no stop, not acked)."""
|
||||
if not path.exists():
|
||||
return None
|
||||
summary = summarize(path)
|
||||
kinds = {kind for _ts, kind, _detail in summary.events}
|
||||
if "session-start" not in kinds:
|
||||
return None
|
||||
if "session-stop" in kinds or "diagnostic-acknowledged" in kinds:
|
||||
return None
|
||||
return CrashInfo(
|
||||
game=_game_from_summary(summary),
|
||||
samples=summary.samples,
|
||||
when=summary.end,
|
||||
gpu_lost="gpu-lost" in kinds,
|
||||
)
|
||||
|
||||
|
||||
def _crash_path():
|
||||
"""Where the pending crash lives: the preserved archive if present, else the live log."""
|
||||
return config.DIAG_CRASH if config.DIAG_CRASH.exists() else config.DIAG_LOG
|
||||
|
||||
|
||||
def pending_crash() -> CrashInfo | None:
|
||||
"""Detect a diagnostic that ended abnormally (no clean stop, no live recorder).
|
||||
|
||||
A focused capture writes `session-start` (+ `game`) and, on a clean stop, `session-stop`.
|
||||
After a hard freeze that block never runs, so the log has a start with no stop and no
|
||||
live recorder — that's our hard-crash signal. A crash preserved across an auto-relaunch
|
||||
(`DIAG_CRASH`) is checked first. Returns None if a capture is running, none is recorded,
|
||||
it stopped cleanly, or the user already acknowledged it.
|
||||
"""
|
||||
info = _crash_from_log(config.DIAG_CRASH) # preserved across a relaunch (wrapper)
|
||||
if info is not None:
|
||||
return info
|
||||
if is_running():
|
||||
return None
|
||||
return _crash_from_log(config.DIAG_LOG)
|
||||
|
||||
|
||||
def acknowledge_crash() -> None:
|
||||
"""Mark the recorded crash as seen so it stops prompting."""
|
||||
try:
|
||||
config.DIAG_CRASH.unlink() # drop the preserved archive, if any
|
||||
except OSError:
|
||||
pass
|
||||
try:
|
||||
config.DIAG_LOG.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(config.DIAG_LOG, "a", encoding="utf-8") as fh:
|
||||
fh.write(json.dumps({"ts": time.time(), "event": "diagnostic-acknowledged", "detail": ""}) + "\n")
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def _crash_headline(summary: Summary) -> Finding:
|
||||
gpu_lost = any(kind == "gpu-lost" for _ts, kind, _detail in summary.events)
|
||||
when = time.strftime("%H:%M:%S", time.localtime(summary.end)) if summary.end else "?"
|
||||
detail = (
|
||||
f"The capture stopped abruptly at {when} after {summary.samples} samples, with no clean "
|
||||
"shutdown recorded — consistent with a hard freeze or power loss."
|
||||
)
|
||||
if gpu_lost:
|
||||
detail += " A GPU-lost event was captured during the session."
|
||||
return Finding(
|
||||
CRITICAL if gpu_lost else WARNING,
|
||||
"Diagnostic",
|
||||
"Session ended without a clean stop (likely a hard crash)",
|
||||
detail,
|
||||
"Review the last readings (Capture, above) and the crash-boot findings below.",
|
||||
)
|
||||
|
||||
|
||||
def analyze_crash(last_n: int = 15) -> DiagnosticResult:
|
||||
"""Analyze a recorded hard crash: the captured window + the previous boot's kernel log
|
||||
+ the rest of the health report (SMART/driver/persistence/temps)."""
|
||||
from .health import check_previous_boot, run_health_checks
|
||||
|
||||
summary = summarize(_crash_path(), last_n=last_n)
|
||||
findings: list[Finding] = [_crash_headline(summary)]
|
||||
findings += check_previous_boot() # the crashed boot's kernel log
|
||||
findings += run_health_checks(include_journal=False) # SMART/driver/persistence/temps
|
||||
findings.sort(key=lambda f: _SEV_ORDER.get(f.severity, 9))
|
||||
result = DiagnosticResult(game=_game_from_summary(summary), summary=summary, findings=findings)
|
||||
_store(result, _crash_path(), summary)
|
||||
return result
|
||||
@@ -0,0 +1,152 @@
|
||||
"""Per-diagnostic storage + Report bundles (M15) — opt-in via `logging_enabled`.
|
||||
|
||||
When logging is on, each finished diagnostic is persisted to its own directory under
|
||||
``config.DIAGNOSTICS_DIR/<id>/`` (capture log, structured result, human-readable report, a
|
||||
game-log snapshot, and any AI interactions). "Report" zips one directory — including exactly
|
||||
**what was sent to the AI, which model, and its reply** — into ``config.REPORTS_DIR``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import shutil
|
||||
import time
|
||||
import zipfile
|
||||
from dataclasses import asdict, is_dataclass
|
||||
from pathlib import Path
|
||||
|
||||
from .. import config
|
||||
|
||||
|
||||
def enabled() -> bool:
|
||||
return bool(config.load_config().get("logging_enabled", False))
|
||||
|
||||
|
||||
def _slug(name: str | None) -> str:
|
||||
s = "".join(c if c.isalnum() else "-" for c in (name or "session").lower())
|
||||
return s.strip("-")[:40] or "session"
|
||||
|
||||
|
||||
def _new_dir(game: str | None) -> Path:
|
||||
base = config.DIAGNOSTICS_DIR
|
||||
stamp = time.strftime("%Y%m%d-%H%M%S")
|
||||
name = f"{stamp}-{_slug(game)}"
|
||||
target = base / name
|
||||
n = 1
|
||||
while target.exists():
|
||||
target = base / f"{name}-{n}"
|
||||
n += 1
|
||||
target.mkdir(parents=True, exist_ok=True)
|
||||
return target
|
||||
|
||||
|
||||
def _as_dict(obj):
|
||||
if is_dataclass(obj):
|
||||
return asdict(obj)
|
||||
return getattr(obj, "__dict__", {}) or str(obj)
|
||||
|
||||
|
||||
def store(result, capture_path=None, since: float | None = None) -> Path | None:
|
||||
"""Persist a finished diagnostic to its own directory. Returns the dir, or None if off."""
|
||||
if not enabled():
|
||||
return None
|
||||
from ..render import render_summary
|
||||
from . import ai, gamelogs, syslogs
|
||||
|
||||
target = _new_dir(getattr(result, "game", None))
|
||||
|
||||
if capture_path and Path(capture_path).exists():
|
||||
try:
|
||||
shutil.copyfile(capture_path, target / "capture.jsonl")
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
payload = {
|
||||
"game": getattr(result, "game", None),
|
||||
"stored_at": time.time(),
|
||||
"summary": _as_dict(result.summary),
|
||||
"findings": [_as_dict(f) for f in result.findings],
|
||||
}
|
||||
_write(target / "result.json", json.dumps(payload, indent=2, default=str))
|
||||
|
||||
report = [f"Game: {getattr(result, 'game', None) or 'unknown'}", "",
|
||||
render_summary(result.summary), "",
|
||||
ai.format_findings(result.findings, header="Findings:")]
|
||||
_write(target / "report.txt", "\n".join(report))
|
||||
|
||||
try:
|
||||
logs = gamelogs.collect(since=since, game=getattr(result, "game", None))
|
||||
if logs:
|
||||
_write(target / "gamelogs.txt", logs)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
try:
|
||||
sys_logs = syslogs.collect(since=since)
|
||||
if sys_logs:
|
||||
_write(target / "syslogs.txt", sys_logs)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
try: # full hardware/OS inventory (M5) — invaluable for larger debugging in a shared report
|
||||
from . import inventory
|
||||
|
||||
sections = inventory.collect()
|
||||
_write(target / "inventory.txt", inventory.render_text(sections))
|
||||
_write(target / "inventory.json", inventory.render_json(sections))
|
||||
except Exception: # inventory probes vary by machine; never let it break storage
|
||||
pass
|
||||
return target
|
||||
|
||||
|
||||
def record_ai(diag_dir, *, provider: str, model: str, system: str, prompt: str, response: str) -> None:
|
||||
"""Save one AI interaction (exact data sent, model, reply) into the diagnostic's `ai/` dir."""
|
||||
if not diag_dir:
|
||||
return
|
||||
out = Path(diag_dir) / "ai"
|
||||
try:
|
||||
out.mkdir(parents=True, exist_ok=True)
|
||||
except OSError:
|
||||
return
|
||||
stamp = time.strftime("%Y%m%d-%H%M%S")
|
||||
record = {
|
||||
"timestamp": time.time(), "provider": provider, "model": model,
|
||||
"system_prompt": system, "data_sent_to_model": prompt, "model_reply": response,
|
||||
}
|
||||
_write(out / f"explain-{stamp}.json", json.dumps(record, indent=2, default=str))
|
||||
readable = (
|
||||
f"Provider: {provider}\nModel: {model}\n\n"
|
||||
f"=== System prompt ===\n{system}\n\n"
|
||||
f"=== Data sent to the model ===\n{prompt}\n\n"
|
||||
f"=== Model reply ===\n{response}\n"
|
||||
)
|
||||
_write(out / f"explain-{stamp}.txt", readable)
|
||||
|
||||
|
||||
def make_report(diag_dir) -> Path:
|
||||
"""Zip a diagnostic directory (plus the app log) into REPORTS_DIR; return the zip path."""
|
||||
diag_dir = Path(diag_dir)
|
||||
config.REPORTS_DIR.mkdir(parents=True, exist_ok=True)
|
||||
out = config.REPORTS_DIR / f"report-{diag_dir.name}.zip"
|
||||
with zipfile.ZipFile(out, "w", zipfile.ZIP_DEFLATED) as zf:
|
||||
for path in sorted(diag_dir.rglob("*")):
|
||||
if path.is_file():
|
||||
zf.write(path, arcname=str(Path(diag_dir.name) / path.relative_to(diag_dir)))
|
||||
if config.APP_LOG.exists(): # the application log, for context around the session
|
||||
zf.write(config.APP_LOG, arcname=str(Path(diag_dir.name) / "app.log"))
|
||||
return out
|
||||
|
||||
|
||||
def latest_dir() -> Path | None:
|
||||
try:
|
||||
dirs = [d for d in config.DIAGNOSTICS_DIR.iterdir() if d.is_dir()]
|
||||
except OSError:
|
||||
return None
|
||||
return max(dirs, key=lambda d: d.stat().st_mtime) if dirs else None
|
||||
|
||||
|
||||
def _write(path: Path, text: str) -> None:
|
||||
try:
|
||||
path.write_text(text, encoding="utf-8")
|
||||
except OSError:
|
||||
pass
|
||||
@@ -0,0 +1,148 @@
|
||||
"""Connected displays (M5): resolution + current/max refresh per monitor.
|
||||
|
||||
GNOME exposes the authoritative data over D-Bus (Mutter `DisplayConfig.GetCurrentState`),
|
||||
which works on both X11 and Wayland — read via `busctl --json`. Plain X11 desktops fall back
|
||||
to `xrandr`. Other Wayland compositors (sway/KDE) aren't covered yet and degrade to empty.
|
||||
Stdlib only; every probe fails soft. Max refresh is computed at the *current* resolution, so
|
||||
"can go faster" never suggests dropping resolution.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
from dataclasses import dataclass
|
||||
|
||||
# A few common PNP monitor-vendor IDs → friendly names (best-effort; unknown codes pass through).
|
||||
_PNP = {
|
||||
"SAM": "Samsung", "DEL": "Dell", "GSM": "LG", "LGD": "LG", "AUS": "ASUS", "ACR": "Acer",
|
||||
"BNQ": "BenQ", "MSI": "MSI", "AOC": "AOC", "VSC": "ViewSonic", "HWP": "HP", "HPN": "HP",
|
||||
"PHL": "Philips", "GBT": "Gigabyte", "APP": "Apple", "DGC": "Dell",
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class Monitor:
|
||||
connector: str # e.g. "DP-1"
|
||||
name: str # e.g. "Samsung LC34G55T" ("" if unknown, e.g. xrandr)
|
||||
width: int
|
||||
height: int
|
||||
refresh: float # current Hz
|
||||
max_refresh: float # max Hz available at the current resolution
|
||||
|
||||
@property
|
||||
def can_go_faster(self) -> bool:
|
||||
"""True if a meaningfully higher refresh is available at the current resolution."""
|
||||
return self.max_refresh - self.refresh > 1.0
|
||||
|
||||
def label(self) -> str:
|
||||
return f"{self.connector} · {self.name}".rstrip(" ·") if self.name else self.connector
|
||||
|
||||
|
||||
def _run(cmd: list[str], timeout: float = 8.0) -> str:
|
||||
try:
|
||||
proc = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout)
|
||||
if proc.returncode == 0:
|
||||
return proc.stdout
|
||||
except (subprocess.SubprocessError, OSError):
|
||||
pass
|
||||
return ""
|
||||
|
||||
|
||||
def _parse_mutter(out: str) -> list[Monitor]:
|
||||
"""Parse `busctl --json` output of Mutter DisplayConfig.GetCurrentState.
|
||||
|
||||
data = [serial, monitors, logical_monitors, props]; each monitor is
|
||||
[[connector, vendor, product, serial], [modes], props]; each mode is
|
||||
[id, width, height, refresh, scale, [scales], {props}] where props may hold is-current.
|
||||
"""
|
||||
try:
|
||||
data = json.loads(out)["data"]
|
||||
raw_monitors = data[1]
|
||||
except (json.JSONDecodeError, KeyError, IndexError, TypeError):
|
||||
return []
|
||||
monitors: list[Monitor] = []
|
||||
for mon in raw_monitors:
|
||||
try:
|
||||
connector, vendor, product = mon[0][0], mon[0][1], mon[0][2]
|
||||
modes = mon[1]
|
||||
except (IndexError, TypeError):
|
||||
continue
|
||||
current = None
|
||||
for m in modes:
|
||||
props = m[6] if len(m) > 6 and isinstance(m[6], dict) else {}
|
||||
if (props.get("is-current") or {}).get("data"):
|
||||
current = m
|
||||
break
|
||||
if current is None:
|
||||
continue
|
||||
w, h, r = int(current[1]), int(current[2]), float(current[3])
|
||||
max_r = max((float(m[3]) for m in modes if int(m[1]) == w and int(m[2]) == h), default=r)
|
||||
name = f"{_PNP.get(vendor, vendor)} {product}".strip()
|
||||
monitors.append(Monitor(connector, name, w, h, r, max_r))
|
||||
return monitors
|
||||
|
||||
|
||||
def _parse_xrandr(out: str) -> list[Monitor]:
|
||||
"""Parse `xrandr --query`: an output line with the active WxH+x+y, then indented mode lines
|
||||
whose rates carry `*` for the current one."""
|
||||
monitors: list[Monitor] = []
|
||||
out_re = re.compile(r"^(\S+) connected.*?(\d+)x(\d+)\+\d+\+\d+")
|
||||
mode_re = re.compile(r"^\s+(\d+)x(\d+)\s+(.+)$")
|
||||
name = ""
|
||||
cw = ch = 0
|
||||
cur_r = max_r = 0.0
|
||||
|
||||
def flush() -> None:
|
||||
if name and cw and cur_r:
|
||||
monitors.append(Monitor(name, "", cw, ch, cur_r, max_r or cur_r))
|
||||
|
||||
for line in out.splitlines():
|
||||
mo = out_re.match(line)
|
||||
if mo:
|
||||
flush()
|
||||
name, cw, ch = mo.group(1), int(mo.group(2)), int(mo.group(3))
|
||||
cur_r = max_r = 0.0
|
||||
continue
|
||||
mm = mode_re.match(line)
|
||||
if mm and name and int(mm.group(1)) == cw and int(mm.group(2)) == ch:
|
||||
for tok in mm.group(3).split():
|
||||
try:
|
||||
rate = float(tok.rstrip("*+"))
|
||||
except ValueError:
|
||||
continue
|
||||
max_r = max(max_r, rate)
|
||||
if "*" in tok:
|
||||
cur_r = rate
|
||||
flush()
|
||||
return monitors
|
||||
|
||||
|
||||
def _mutter() -> list[Monitor]:
|
||||
exe = shutil.which("busctl")
|
||||
if not exe:
|
||||
return []
|
||||
out = _run([exe, "--user", "--json=short", "call", "org.gnome.Mutter.DisplayConfig",
|
||||
"/org/gnome/Mutter/DisplayConfig", "org.gnome.Mutter.DisplayConfig",
|
||||
"GetCurrentState"])
|
||||
return _parse_mutter(out) if out.strip() else []
|
||||
|
||||
|
||||
def _xrandr() -> list[Monitor]:
|
||||
if not shutil.which("xrandr"):
|
||||
return []
|
||||
return _parse_xrandr(_run(["xrandr", "--query"]))
|
||||
|
||||
|
||||
def collect() -> list[Monitor]:
|
||||
"""Connected monitors, via the first backend that returns any (Mutter, then xrandr)."""
|
||||
for backend in (_mutter, _xrandr):
|
||||
try:
|
||||
monitors = backend()
|
||||
except Exception:
|
||||
monitors = []
|
||||
if monitors:
|
||||
return monitors
|
||||
return []
|
||||
@@ -0,0 +1,229 @@
|
||||
"""Drive health & wear (M-drives): per-disk SMART stats parsed from smartctl JSON.
|
||||
|
||||
Unlike a GPU, storage exposes a real health/wear story, so this reads it in full: the overall
|
||||
SMART verdict, a derived **life-left %** (NVMe ``percentage_used`` or the SATA wear-leveling
|
||||
attribute), **power-on hours** (the drive's runtime), data written (TBW), temperature, and the
|
||||
early-failure predictors (reallocated / pending / offline-uncorrectable sectors, NVMe media
|
||||
errors, available spare). Turned into prioritized health findings.
|
||||
|
||||
smartctl needs root, so collection runs through the same elevated path as the other root-only
|
||||
checks (``rigdoctor collect-priv`` via pkexec at GUI launch, or ``sudo rigdoctor report``).
|
||||
Parsing is JSON-based (smartctl ``--json``), which is stable across drive types. Stdlib only;
|
||||
degrades gracefully — no smartctl, no root, or an unparseable device yields an info finding.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import shutil
|
||||
import subprocess
|
||||
from dataclasses import dataclass
|
||||
|
||||
from .health import CRITICAL, INFO, OK, WARNING, Finding
|
||||
|
||||
# NVMe writes are counted in 512-KB "data units"; 1 unit = 1000 * 512 bytes.
|
||||
_NVME_UNIT_BYTES = 512_000
|
||||
_LBA_BYTES = 512 # SATA Total_LBAs_Written counts 512-byte sectors
|
||||
|
||||
|
||||
@dataclass
|
||||
class DriveHealth:
|
||||
device: str
|
||||
model: str = ""
|
||||
kind: str = "" # "nvme" | "sata" | "scsi"
|
||||
passed: bool | None = None # SMART overall verdict; None if unknown / needs root
|
||||
needs_root: bool = False
|
||||
health_pct: int | None = None # derived life-left %
|
||||
percent_used: int | None = None # NVMe wear used %
|
||||
power_on_hours: int | None = None
|
||||
temp_c: int | None = None
|
||||
data_written_tb: float | None = None
|
||||
reallocated: int | None = None # SATA reallocated sectors (id 5)
|
||||
pending: int | None = None # SATA current-pending sectors (id 197)
|
||||
offline_uncorrectable: int | None = None # SATA id 198
|
||||
available_spare: int | None = None # NVMe %
|
||||
available_spare_threshold: int | None = None
|
||||
media_errors: int | None = None # NVMe
|
||||
|
||||
|
||||
# --- collection (root) ----------------------------------------------------------------
|
||||
|
||||
def _scan_devices() -> list[str]:
|
||||
try:
|
||||
proc = subprocess.run(["smartctl", "--scan"], capture_output=True, text=True, timeout=10)
|
||||
except (subprocess.SubprocessError, OSError):
|
||||
return []
|
||||
return [ln.split()[0] for ln in proc.stdout.splitlines() if ln.strip().startswith("/dev/")]
|
||||
|
||||
|
||||
def _smartctl_json(device: str) -> dict | None:
|
||||
try:
|
||||
proc = subprocess.run(
|
||||
["smartctl", "--json=c", "-H", "-A", "-i", device],
|
||||
capture_output=True, text=True, timeout=20,
|
||||
)
|
||||
except (subprocess.SubprocessError, OSError):
|
||||
return None
|
||||
try:
|
||||
return json.loads(proc.stdout)
|
||||
except (ValueError, TypeError):
|
||||
return None
|
||||
|
||||
|
||||
def _ata_attr(data: dict, attr_id: int) -> int | None:
|
||||
for row in data.get("ata_smart_attributes", {}).get("table", []):
|
||||
if row.get("id") == attr_id:
|
||||
raw = row.get("raw", {})
|
||||
return raw.get("value")
|
||||
return None
|
||||
|
||||
|
||||
def _ata_norm_value(data: dict, attr_id: int) -> int | None:
|
||||
"""The normalized 'value' (100→0 life indicator) for an ATA attribute."""
|
||||
for row in data.get("ata_smart_attributes", {}).get("table", []):
|
||||
if row.get("id") == attr_id:
|
||||
return row.get("value")
|
||||
return None
|
||||
|
||||
|
||||
def parse(device: str, data: dict | None) -> DriveHealth:
|
||||
"""Build a DriveHealth from smartctl JSON (pure-ish; no IO of its own)."""
|
||||
d = DriveHealth(device=device)
|
||||
if not data:
|
||||
d.needs_root = True
|
||||
return d
|
||||
|
||||
d.model = data.get("model_name") or data.get("scsi_model_name") or ""
|
||||
proto = (data.get("device", {}).get("protocol") or "").lower()
|
||||
d.kind = "nvme" if "nvme" in proto else ("sata" if "ata" in proto else (proto or ""))
|
||||
|
||||
status = data.get("smart_status")
|
||||
if isinstance(status, dict) and "passed" in status:
|
||||
d.passed = bool(status["passed"])
|
||||
else:
|
||||
# No verdict and a non-zero exit usually means we couldn't open the device (needs root).
|
||||
if data.get("smartctl", {}).get("exit_status", 0) and not status:
|
||||
d.needs_root = True
|
||||
|
||||
temp = data.get("temperature", {}).get("current")
|
||||
d.temp_c = int(temp) if isinstance(temp, (int, float)) else None
|
||||
poh = data.get("power_on_time", {}).get("hours")
|
||||
d.power_on_hours = int(poh) if isinstance(poh, (int, float)) else None
|
||||
|
||||
if d.kind == "nvme":
|
||||
log = data.get("nvme_smart_health_information_log", {})
|
||||
d.percent_used = log.get("percentage_used")
|
||||
d.available_spare = log.get("available_spare")
|
||||
d.available_spare_threshold = log.get("available_spare_threshold")
|
||||
d.media_errors = log.get("media_errors")
|
||||
if d.temp_c is None and isinstance(log.get("temperature"), (int, float)):
|
||||
d.temp_c = int(log["temperature"])
|
||||
units = log.get("data_units_written")
|
||||
if isinstance(units, (int, float)):
|
||||
d.data_written_tb = round(units * _NVME_UNIT_BYTES / 1e12, 2)
|
||||
if isinstance(d.percent_used, (int, float)):
|
||||
d.health_pct = max(0, 100 - int(d.percent_used))
|
||||
else: # SATA / ATA
|
||||
d.reallocated = _ata_attr(data, 5)
|
||||
d.pending = _ata_attr(data, 197)
|
||||
d.offline_uncorrectable = _ata_attr(data, 198)
|
||||
lbas = _ata_attr(data, 241) # Total_LBAs_Written
|
||||
if isinstance(lbas, (int, float)) and lbas > 0:
|
||||
d.data_written_tb = round(lbas * _LBA_BYTES / 1e12, 2)
|
||||
wear = _ata_norm_value(data, 177) # Wear_Leveling_Count (Samsung): normalized = life left
|
||||
if wear is None:
|
||||
wear = _ata_norm_value(data, 231) # SSD_Life_Left on some drives
|
||||
if isinstance(wear, int):
|
||||
d.health_pct = wear
|
||||
return d
|
||||
|
||||
|
||||
def collect() -> list[DriveHealth]:
|
||||
"""Per-drive health for every SMART-capable device (needs root for real data)."""
|
||||
if shutil.which("smartctl") is None:
|
||||
return []
|
||||
return [parse(dev, _smartctl_json(dev)) for dev in _scan_devices()]
|
||||
|
||||
|
||||
def from_dicts(rows: list[dict]) -> list[DriveHealth]:
|
||||
"""Rebuild DriveHealth objects from the privileged collector's JSON."""
|
||||
out: list[DriveHealth] = []
|
||||
for r in rows:
|
||||
if isinstance(r, dict) and r.get("device"):
|
||||
fields = {k: r.get(k) for k in DriveHealth.__dataclass_fields__}
|
||||
out.append(DriveHealth(**fields))
|
||||
return out
|
||||
|
||||
|
||||
# --- findings -------------------------------------------------------------------------
|
||||
|
||||
def _stats_line(d: DriveHealth) -> str:
|
||||
parts: list[str] = []
|
||||
if d.health_pct is not None:
|
||||
parts.append(f"{d.health_pct}% life left")
|
||||
elif d.percent_used is not None:
|
||||
parts.append(f"{d.percent_used}% used")
|
||||
if d.power_on_hours is not None:
|
||||
parts.append(f"{d.power_on_hours:,} h powered on")
|
||||
if d.data_written_tb is not None:
|
||||
parts.append(f"{d.data_written_tb:g} TB written")
|
||||
if d.temp_c is not None:
|
||||
parts.append(f"{d.temp_c}°C")
|
||||
if d.available_spare is not None:
|
||||
parts.append(f"spare {d.available_spare}%")
|
||||
return " · ".join(parts)
|
||||
|
||||
|
||||
def to_findings(drives: list[DriveHealth]) -> list[Finding]:
|
||||
if not drives:
|
||||
if shutil.which("smartctl") is None:
|
||||
return [Finding(INFO, "Storage", "SMART not checked (smartmontools missing)",
|
||||
"Disk self-health couldn't be read.",
|
||||
"Install it: `sudo apt install smartmontools`")]
|
||||
return []
|
||||
findings: list[Finding] = []
|
||||
for d in drives:
|
||||
name = d.model or d.device
|
||||
if d.needs_root:
|
||||
findings.append(Finding(INFO, "Storage", f"{name}: SMART needs root",
|
||||
"Reading drive health requires elevated access.",
|
||||
"Run: `sudo rigdoctor report` (or launch the GUI, which asks once)."))
|
||||
continue
|
||||
|
||||
stats = _stats_line(d)
|
||||
# Severity from the failure predictors, worst first.
|
||||
bad = []
|
||||
if d.passed is False:
|
||||
bad.append("SMART overall self-assessment FAILED")
|
||||
for label, val in (("reallocated sectors", d.reallocated),
|
||||
("pending sectors", d.pending),
|
||||
("offline-uncorrectable sectors", d.offline_uncorrectable),
|
||||
("NVMe media errors", d.media_errors)):
|
||||
if isinstance(val, int) and val > 0:
|
||||
bad.append(f"{val} {label}")
|
||||
spare_low = (isinstance(d.available_spare, int) and isinstance(d.available_spare_threshold, int)
|
||||
and d.available_spare < d.available_spare_threshold)
|
||||
worn = isinstance(d.percent_used, int) and d.percent_used >= 90
|
||||
hot = isinstance(d.temp_c, int) and d.temp_c >= 70
|
||||
|
||||
if d.passed is False or bad:
|
||||
findings.append(Finding(
|
||||
CRITICAL, "Storage", f"{name}: failing ({stats})" if stats else f"{name}: failing",
|
||||
"; ".join(bad) + ".",
|
||||
"Back up this drive now and plan to replace it."))
|
||||
elif spare_low or worn:
|
||||
findings.append(Finding(
|
||||
WARNING, "Storage", f"{name}: worn ({stats})",
|
||||
("Available spare below the drive's threshold." if spare_low else
|
||||
f"NVMe wear at {d.percent_used}% used — near end of rated life."),
|
||||
"Back up important data and budget for a replacement."))
|
||||
elif hot:
|
||||
findings.append(Finding(
|
||||
WARNING, "Storage", f"{name}: hot ({stats})",
|
||||
f"Drive temperature is {d.temp_c}°C.",
|
||||
"Improve case/M.2 airflow; sustained heat shortens SSD life."))
|
||||
else:
|
||||
findings.append(Finding(
|
||||
OK, "Storage", f"{name}: healthy" + (f" ({stats})" if stats else ""),
|
||||
"SMART self-assessment passed." if d.passed else ""))
|
||||
return findings
|
||||
@@ -0,0 +1,51 @@
|
||||
"""Session privilege elevation.
|
||||
|
||||
At GUI launch the app asks for the password once (pkexec) and collects the data that
|
||||
needs root — SMART health + dmidecode (board/BIOS/RAM) — caching it for the session so
|
||||
Health and Inventory can always show the full picture without per-action prompts.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
_privileged: dict | None = None
|
||||
|
||||
|
||||
def privileged() -> dict | None:
|
||||
"""Cached root-collected data ({"smart": [...], "dmidecode": {...}}), or None."""
|
||||
return _privileged
|
||||
|
||||
|
||||
def set_privileged(data: dict | None) -> None:
|
||||
global _privileged
|
||||
_privileged = data
|
||||
|
||||
|
||||
def available() -> bool:
|
||||
return shutil.which("pkexec") is not None and os.geteuid() != 0
|
||||
|
||||
|
||||
def _cli() -> list[str]:
|
||||
candidate = os.path.join(os.path.dirname(sys.executable), "rigdoctor")
|
||||
return [candidate] if os.path.exists(candidate) else [sys.executable, "-m", "rigdoctor"]
|
||||
|
||||
|
||||
def collect_via_pkexec(timeout: float = 120.0) -> dict | None:
|
||||
"""Run one elevated collection (single password prompt). None if unavailable/cancelled."""
|
||||
if not available():
|
||||
return None
|
||||
try:
|
||||
proc = subprocess.run(
|
||||
["pkexec", *_cli(), "collect-priv"],
|
||||
capture_output=True, text=True, timeout=timeout,
|
||||
)
|
||||
if proc.returncode == 0 and proc.stdout.strip():
|
||||
return json.loads(proc.stdout)
|
||||
except (subprocess.SubprocessError, OSError, ValueError):
|
||||
pass
|
||||
return None
|
||||
@@ -0,0 +1,177 @@
|
||||
"""Apply runtime-reversible system tunables (M6) — a limited, consent-gated exception to
|
||||
the read-only stance (D9, amended by D22).
|
||||
|
||||
Only safe settings that take effect immediately, need no reboot, and revert on reboot are
|
||||
applyable here: CPU governor, NVIDIA persistence mode, PCIe ASPM policy, vm.swappiness, and
|
||||
Transparent HugePages. Each is set by a single privileged command (one pkexec prompt). The
|
||||
chosen value is validated against the live options before building the command, and writes go
|
||||
to sysfs / procfs (or `nvidia-smi`) — never the GRUB cmdline or a persistent config file.
|
||||
Riskier fixes (GRUB-based PCIe ASPM-off, CPU mitigations) stay suggestion-only.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import shlex
|
||||
import shutil
|
||||
import subprocess
|
||||
from collections.abc import Callable
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
@dataclass
|
||||
class Tunable:
|
||||
id: str
|
||||
label: str # e.g. "CPU governor"
|
||||
options: list[str] # selectable values (live, from the system)
|
||||
current: str | None # the value in effect now (preselect this in the dropdown)
|
||||
note: str = "" # caveat shown by the control, e.g. "resets on reboot"
|
||||
|
||||
|
||||
def _read(path: str) -> str | None:
|
||||
try:
|
||||
return Path(path).read_text()
|
||||
except OSError:
|
||||
return None
|
||||
|
||||
|
||||
def _bracketed(text: str) -> tuple[list[str], str | None]:
|
||||
"""Parse a sysfs 'a [b] c' enum into (options, active)."""
|
||||
options = [tok.strip("[]") for tok in text.split()]
|
||||
active = next((tok.strip("[]") for tok in text.split() if tok.startswith("[")), None)
|
||||
return options, active
|
||||
|
||||
|
||||
# --- individual tunables: a state reader + a command builder per id -------------------
|
||||
|
||||
_GOV = "/sys/devices/system/cpu"
|
||||
|
||||
|
||||
def _cpu_governor() -> Tunable | None:
|
||||
cur = _read(f"{_GOV}/cpu0/cpufreq/scaling_governor")
|
||||
if cur is None:
|
||||
return None
|
||||
avail = _read(f"{_GOV}/cpu0/cpufreq/scaling_available_governors")
|
||||
options = avail.split() if avail and avail.strip() else ["performance", "powersave", "schedutil"]
|
||||
return Tunable("cpu_governor", "CPU governor", options, cur.strip(), "applies now; resets on reboot")
|
||||
|
||||
|
||||
def _cpu_governor_cmd(value: str) -> list[str]:
|
||||
return ["/bin/sh", "-c",
|
||||
f'for f in {_GOV}/cpu*/cpufreq/scaling_governor; do echo {shlex.quote(value)} > "$f"; done']
|
||||
|
||||
|
||||
def _nvidia_persistence() -> Tunable | None:
|
||||
if shutil.which("nvidia-smi") is None:
|
||||
return None
|
||||
try:
|
||||
proc = subprocess.run(
|
||||
["nvidia-smi", "--query-gpu=persistence_mode", "--format=csv,noheader"],
|
||||
capture_output=True, text=True, timeout=10,
|
||||
)
|
||||
except (subprocess.SubprocessError, OSError):
|
||||
return None
|
||||
state = proc.stdout.strip().splitlines()[0].strip().lower() if proc.stdout.strip() else ""
|
||||
current = "Enabled" if state.startswith("enabled") else ("Disabled" if state.startswith("disabled") else None)
|
||||
return Tunable("nvidia_persistence", "NVIDIA persistence mode", ["Enabled", "Disabled"], current,
|
||||
"resets on reboot (enable nvidia-persistenced to persist)")
|
||||
|
||||
|
||||
def _nvidia_persistence_cmd(value: str) -> list[str]:
|
||||
return ["nvidia-smi", "-pm", "1" if value == "Enabled" else "0"]
|
||||
|
||||
|
||||
def _pcie_aspm() -> Tunable | None:
|
||||
text = _read("/sys/module/pcie_aspm/parameters/policy")
|
||||
if not text:
|
||||
return None
|
||||
options, active = _bracketed(text)
|
||||
return Tunable("pcie_aspm", "PCIe ASPM policy", options, active, "applies now; resets on reboot")
|
||||
|
||||
|
||||
def _pcie_aspm_cmd(value: str) -> list[str]:
|
||||
return ["/bin/sh", "-c", f'echo {shlex.quote(value)} > /sys/module/pcie_aspm/parameters/policy']
|
||||
|
||||
|
||||
def _swappiness() -> Tunable | None:
|
||||
text = _read("/proc/sys/vm/swappiness")
|
||||
if text is None or not text.strip().isdigit():
|
||||
return None
|
||||
cur = text.strip()
|
||||
options = ["0", "10", "30", "60", "100"]
|
||||
if cur not in options:
|
||||
options = sorted(set(options) | {cur}, key=int)
|
||||
return Tunable("swappiness", "vm.swappiness", options, cur, "applies now; resets on reboot")
|
||||
|
||||
|
||||
def _swappiness_cmd(value: str) -> list[str]:
|
||||
return ["/bin/sh", "-c", f'echo {shlex.quote(value)} > /proc/sys/vm/swappiness']
|
||||
|
||||
|
||||
def _thp() -> Tunable | None:
|
||||
text = _read("/sys/kernel/mm/transparent_hugepage/enabled")
|
||||
if not text:
|
||||
return None
|
||||
options, active = _bracketed(text)
|
||||
return Tunable("thp", "Transparent HugePages", options, active, "applies now; resets on reboot")
|
||||
|
||||
|
||||
def _thp_cmd(value: str) -> list[str]:
|
||||
return ["/bin/sh", "-c", f'echo {shlex.quote(value)} > /sys/kernel/mm/transparent_hugepage/enabled']
|
||||
|
||||
|
||||
_TUNABLES: dict[str, tuple[Callable[[], Tunable | None], Callable[[str], list[str]]]] = {
|
||||
"cpu_governor": (_cpu_governor, _cpu_governor_cmd),
|
||||
"nvidia_persistence": (_nvidia_persistence, _nvidia_persistence_cmd),
|
||||
"pcie_aspm": (_pcie_aspm, _pcie_aspm_cmd),
|
||||
"swappiness": (_swappiness, _swappiness_cmd),
|
||||
"thp": (_thp, _thp_cmd),
|
||||
}
|
||||
|
||||
|
||||
# --- public API -----------------------------------------------------------------------
|
||||
|
||||
def get_tunable(fix_id: str) -> Tunable | None:
|
||||
"""Live state (options + current value) for a fix id, or None if not applicable here."""
|
||||
fns = _TUNABLES.get(fix_id)
|
||||
return fns[0]() if fns else None
|
||||
|
||||
|
||||
def apply_command(fix_id: str, value: str) -> list[str] | None:
|
||||
"""The privileged command to set fix_id=value, or None if unknown/invalid.
|
||||
|
||||
The value is validated against the *live* options, so only a real, currently-available
|
||||
setting can ever be turned into a command.
|
||||
"""
|
||||
fns = _TUNABLES.get(fix_id)
|
||||
if not fns:
|
||||
return None
|
||||
state = fns[0]()
|
||||
if state is None or value not in state.options:
|
||||
return None
|
||||
return fns[1](value)
|
||||
|
||||
|
||||
def _elevate(cmd: list[str]) -> list[str]:
|
||||
prog = shutil.which(cmd[0]) or cmd[0] # pkexec needs an absolute program path
|
||||
cmd = [prog, *cmd[1:]]
|
||||
if os.geteuid() == 0:
|
||||
return cmd
|
||||
if shutil.which("pkexec"):
|
||||
return ["pkexec", *cmd]
|
||||
if shutil.which("sudo"):
|
||||
return ["sudo", *cmd]
|
||||
return cmd # no escalation available — will likely fail, surfaced to the caller
|
||||
|
||||
|
||||
def apply(fix_id: str, value: str) -> tuple[int, str]:
|
||||
"""Apply fix_id=value via a single elevated command. Returns (exit_code, output)."""
|
||||
cmd = apply_command(fix_id, value)
|
||||
if cmd is None:
|
||||
return (1, f"Unknown or unavailable setting: {fix_id}={value}")
|
||||
try:
|
||||
proc = subprocess.run(_elevate(cmd), capture_output=True, text=True, timeout=120)
|
||||
return (proc.returncode, proc.stdout + proc.stderr)
|
||||
except (subprocess.SubprocessError, OSError) as exc:
|
||||
return (1, str(exc))
|
||||
@@ -0,0 +1,328 @@
|
||||
"""Gaming environment checks (M6): evaluate system settings that affect gaming
|
||||
stability/performance and suggest the fix command — read-only (D9).
|
||||
|
||||
Stdlib-only. Each check degrades gracefully (a missing file/tool yields no finding or an
|
||||
info finding, never an exception). The pure ``evaluate_*`` helpers are split from the IO
|
||||
that reads sysfs / runs tools, so they're unit-testable.
|
||||
|
||||
Several checks target the seed case directly: an RTX 3070 falling off the PCIe bus under
|
||||
load (Xid 79). PCIe ASPM power-saving, NVIDIA persistence mode, and a power-saving CPU
|
||||
governor are the usual contributors to that class of drop-off / stutter.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
from .health import INFO, OK, WARNING, Finding
|
||||
|
||||
_ORDER = {"critical": 0, WARNING: 1, INFO: 2, OK: 3}
|
||||
|
||||
|
||||
def _read(path: str) -> str | None:
|
||||
try:
|
||||
return Path(path).read_text()
|
||||
except OSError:
|
||||
return None
|
||||
|
||||
|
||||
# --- PCIe ASPM (seed-case relevant) ---------------------------------------------------
|
||||
|
||||
def _active_aspm(policy_text: str) -> str | None:
|
||||
"""The active ASPM policy is the bracketed token, e.g. '[default] performance ...'."""
|
||||
m = re.search(r"\[(\w+)\]", policy_text)
|
||||
return m.group(1) if m else None
|
||||
|
||||
|
||||
def evaluate_aspm(policy_text: str | None) -> Finding | None:
|
||||
if not policy_text:
|
||||
return None
|
||||
active = _active_aspm(policy_text)
|
||||
if active is None:
|
||||
return None
|
||||
if active in ("powersave", "powersupersave"):
|
||||
return Finding(
|
||||
WARNING, "PCIe", f"PCIe ASPM is in power-saving mode ({active})",
|
||||
"Aggressive PCIe Active-State Power Management can cause the GPU to drop off the "
|
||||
"bus under load (Xid 79) or stutter — the seed-case failure mode.",
|
||||
"Set the policy to performance below (live), or for a permanent change add "
|
||||
"`pcie_aspm=off` in GRUB, then `sudo update-grub` and reboot.",
|
||||
fix="pcie_aspm",
|
||||
)
|
||||
if active == "performance":
|
||||
return Finding(OK, "PCIe", "PCIe ASPM set to performance", "ASPM power-saving is disabled.",
|
||||
fix="pcie_aspm")
|
||||
return Finding(
|
||||
INFO, "PCIe", f"PCIe ASPM policy: {active}",
|
||||
"ASPM is left to the kernel/BIOS default.",
|
||||
"If you see GPU bus-drop events (Xid 79), set the policy to performance below.",
|
||||
fix="pcie_aspm",
|
||||
)
|
||||
|
||||
|
||||
def check_pcie_aspm() -> list[Finding]:
|
||||
f = evaluate_aspm(_read("/sys/module/pcie_aspm/parameters/policy"))
|
||||
return [f] if f else []
|
||||
|
||||
|
||||
# --- NVIDIA persistence mode (seed-case relevant) -------------------------------------
|
||||
|
||||
def check_gpu_powermizer() -> list[Finding]:
|
||||
"""NVIDIA PowerMizer preferred-performance mode (X only, via nvidia-settings)."""
|
||||
if shutil.which("nvidia-settings") is None or not os.environ.get("DISPLAY"):
|
||||
return []
|
||||
try:
|
||||
proc = subprocess.run(
|
||||
["nvidia-settings", "-q", "[gpu:0]/GPUPowerMizerMode", "-t"],
|
||||
capture_output=True, text=True, timeout=10,
|
||||
)
|
||||
except (subprocess.SubprocessError, OSError):
|
||||
return []
|
||||
raw = proc.stdout.strip().splitlines()[0].strip() if proc.stdout.strip() else ""
|
||||
if not raw.isdigit(): # no X target / Wayland / query failed — skip quietly
|
||||
return []
|
||||
names = {0: "Adaptive", 1: "Prefer Maximum Performance", 2: "Auto"}
|
||||
name = names.get(int(raw), f"mode {raw}")
|
||||
if int(raw) == 1:
|
||||
return [Finding(OK, "GPU", f"GPU PowerMizer: {name}", "The GPU prefers maximum performance.")]
|
||||
return [Finding(
|
||||
INFO, "GPU", f"GPU PowerMizer: {name}",
|
||||
"Adaptive/Auto can downclock the GPU between load spikes, hurting frame consistency.",
|
||||
"Prefer max performance (X only, resets on reboot): "
|
||||
"`nvidia-settings -a '[gpu:0]/GPUPowerMizerMode=1'`.",
|
||||
)]
|
||||
|
||||
|
||||
def check_gpu_persistence() -> list[Finding]:
|
||||
if shutil.which("nvidia-smi") is None:
|
||||
return []
|
||||
try:
|
||||
proc = subprocess.run(
|
||||
["nvidia-smi", "--query-gpu=persistence_mode", "--format=csv,noheader"],
|
||||
capture_output=True, text=True, timeout=10,
|
||||
)
|
||||
except (subprocess.SubprocessError, OSError):
|
||||
return []
|
||||
state = proc.stdout.strip().splitlines()[0].strip() if proc.stdout.strip() else ""
|
||||
if state.lower().startswith("disabled"):
|
||||
return [Finding(
|
||||
INFO, "GPU", "NVIDIA persistence mode is off",
|
||||
"The driver unloads when no client is attached, adding latency on first GPU "
|
||||
"access and churning state between game launches.",
|
||||
"Enable it below (per-boot), or enable the `nvidia-persistenced` service to "
|
||||
"make it permanent.",
|
||||
fix="nvidia_persistence",
|
||||
)]
|
||||
if state.lower().startswith("enabled"):
|
||||
return [Finding(OK, "GPU", "NVIDIA persistence mode on", "The driver stays resident.",
|
||||
fix="nvidia_persistence")]
|
||||
return []
|
||||
|
||||
|
||||
# --- CPU governor ---------------------------------------------------------------------
|
||||
|
||||
def evaluate_governor(governors: set[str]) -> Finding | None:
|
||||
if not governors:
|
||||
return None
|
||||
shown = ", ".join(sorted(governors))
|
||||
if governors == {"performance"}:
|
||||
return Finding(OK, "CPU", "CPU governor: performance", "CPUs run at full clocks under load.",
|
||||
fix="cpu_governor")
|
||||
if "powersave" in governors:
|
||||
return Finding(
|
||||
WARNING, "CPU", f"CPU governor set to power-saving ({shown})",
|
||||
"A powersave governor caps CPU frequency and can bottleneck frame times.",
|
||||
"Set it to performance below (or install GameMode to switch it per-game).",
|
||||
fix="cpu_governor",
|
||||
)
|
||||
return Finding(
|
||||
INFO, "CPU", f"CPU governor: {shown}",
|
||||
"A dynamic governor scales with load; usually fine.",
|
||||
"For the most consistent frame pacing, set performance below (or use GameMode).",
|
||||
fix="cpu_governor",
|
||||
)
|
||||
|
||||
|
||||
def check_cpu_governor() -> list[Finding]:
|
||||
govs: set[str] = set()
|
||||
for p in Path("/sys/devices/system/cpu").glob("cpu*/cpufreq/scaling_governor"):
|
||||
text = _read(str(p))
|
||||
if text and text.strip():
|
||||
govs.add(text.strip())
|
||||
f = evaluate_governor(govs)
|
||||
return [f] if f else []
|
||||
|
||||
|
||||
# --- GameMode / MangoHud --------------------------------------------------------------
|
||||
|
||||
def check_gamemode() -> list[Finding]:
|
||||
if shutil.which("gamemoderun") or shutil.which("gamemoded"):
|
||||
return [Finding(
|
||||
OK, "Tools", "Feral GameMode installed",
|
||||
"GameMode can apply the performance governor and other tweaks while a game runs.",
|
||||
)]
|
||||
return [Finding(
|
||||
INFO, "Tools", "GameMode not installed",
|
||||
"GameMode auto-applies performance tweaks (governor, scheduling) for the duration of a game.",
|
||||
"Install it: `sudo apt install gamemode`, then launch games with `gamemoderun %command%` "
|
||||
"(or use a global Steam launch option).",
|
||||
action="gamemode",
|
||||
)]
|
||||
|
||||
|
||||
def check_mangohud() -> list[Finding]:
|
||||
if shutil.which("mangohud"):
|
||||
return [Finding(OK, "Tools", "MangoHud available", "In-game FPS/temps/frametime overlay is installed.")]
|
||||
return [Finding(
|
||||
INFO, "Tools", "MangoHud not installed",
|
||||
"MangoHud overlays live FPS, frame times, and temps in-game — handy for spotting stutter.",
|
||||
"Install it: `sudo apt install mangohud`, then launch with `mangohud %command%`.",
|
||||
action="mangohud",
|
||||
)]
|
||||
|
||||
|
||||
# --- vm.swappiness --------------------------------------------------------------------
|
||||
|
||||
def evaluate_swappiness(value: int) -> Finding:
|
||||
if value > 10:
|
||||
return Finding(
|
||||
INFO, "Memory", f"vm.swappiness is high ({value})",
|
||||
"A high swappiness lets the kernel swap out memory eagerly, which can cause "
|
||||
"hitching during gaming on systems with ample RAM.",
|
||||
"Lower it below (e.g. 10); applies immediately.",
|
||||
fix="swappiness",
|
||||
)
|
||||
return Finding(OK, "Memory", f"vm.swappiness is {value}", "Swapping is conservative.",
|
||||
fix="swappiness")
|
||||
|
||||
|
||||
def check_swappiness() -> list[Finding]:
|
||||
text = _read("/proc/sys/vm/swappiness")
|
||||
if text is None or not text.strip().isdigit():
|
||||
return []
|
||||
return [evaluate_swappiness(int(text.strip()))]
|
||||
|
||||
|
||||
# --- shader cache ---------------------------------------------------------------------
|
||||
|
||||
def evaluate_shader_cache(env: dict) -> Finding:
|
||||
disabled = (
|
||||
env.get("__GL_SHADER_DISK_CACHE") == "0"
|
||||
or env.get("MESA_SHADER_CACHE_DISABLE", "").lower() in ("1", "true")
|
||||
or env.get("MESA_GLSL_CACHE_DISABLE", "").lower() in ("1", "true")
|
||||
)
|
||||
if disabled:
|
||||
return Finding(
|
||||
WARNING, "GPU", "Shader disk cache is disabled",
|
||||
"With the shader cache off, shaders recompile every run — a common cause of "
|
||||
"in-game stutter, especially on first encounters.",
|
||||
"Unset the disabling variable (e.g. remove `__GL_SHADER_DISK_CACHE=0` / "
|
||||
"`MESA_SHADER_CACHE_DISABLE`) from your environment / launch options.",
|
||||
)
|
||||
return Finding(OK, "GPU", "Shader disk cache enabled", "Compiled shaders are cached between runs (default).")
|
||||
|
||||
|
||||
def check_shader_cache() -> list[Finding]:
|
||||
return [evaluate_shader_cache(os.environ)]
|
||||
|
||||
|
||||
# --- transparent hugepages / CPU mitigations (only when notable) ----------------------
|
||||
|
||||
def check_thp() -> list[Finding]:
|
||||
text = _read("/sys/kernel/mm/transparent_hugepage/enabled")
|
||||
if not text:
|
||||
return []
|
||||
active = _active_aspm(text) # same '[token]' format
|
||||
if active == "never":
|
||||
return [Finding(
|
||||
INFO, "Memory", "Transparent HugePages disabled (never)",
|
||||
"Some workloads benefit from THP; 'madvise' lets apps opt in without the downsides of 'always'.",
|
||||
"Optional: set 'madvise' below; applies immediately.",
|
||||
fix="thp",
|
||||
)]
|
||||
return []
|
||||
|
||||
|
||||
def check_mitigations() -> list[Finding]:
|
||||
cmdline = _read("/proc/cmdline") or ""
|
||||
if "mitigations=off" in cmdline:
|
||||
return [Finding(
|
||||
INFO, "CPU", "CPU security mitigations are disabled",
|
||||
"`mitigations=off` recovers some CPU performance at the cost of CPU-vulnerability "
|
||||
"protections — a deliberate trade-off, noted here for awareness.",
|
||||
"Remove `mitigations=off` from the kernel cmdline to restore protections.",
|
||||
)]
|
||||
return []
|
||||
|
||||
|
||||
# --- Proton versions (informational) --------------------------------------------------
|
||||
|
||||
def check_wine() -> list[Finding]:
|
||||
"""System Wine version (used by Lutris / non-Proton games)."""
|
||||
if shutil.which("wine") is None:
|
||||
return []
|
||||
try:
|
||||
proc = subprocess.run(["wine", "--version"], capture_output=True, text=True, timeout=10)
|
||||
except (subprocess.SubprocessError, OSError):
|
||||
return []
|
||||
ver = proc.stdout.strip().split()[0] if proc.stdout.strip() else ""
|
||||
if not ver:
|
||||
return []
|
||||
return [Finding(
|
||||
INFO, "Tools", f"Wine: {ver}",
|
||||
"System Wine — used by Lutris and non-Proton titles.",
|
||||
"Steam games generally run best on Proton; keep Wine current for native/Lutris use.",
|
||||
)]
|
||||
|
||||
|
||||
def check_steam_client() -> list[Finding]:
|
||||
"""Installed Steam client package version."""
|
||||
from . import steam
|
||||
|
||||
ver = steam.client_version()
|
||||
if not ver:
|
||||
return []
|
||||
return [Finding(INFO, "Tools", f"Steam client: {ver}", "The installed Steam package version.")]
|
||||
|
||||
|
||||
def check_proton() -> list[Finding]:
|
||||
from . import steam
|
||||
|
||||
try:
|
||||
versions = steam.proton_versions()
|
||||
except Exception:
|
||||
versions = []
|
||||
if not versions:
|
||||
return []
|
||||
return [Finding(
|
||||
INFO, "Tools", f"Proton: {len(versions)} version(s) installed",
|
||||
", ".join(versions),
|
||||
"Steam picks the Proton version per game (Properties → Compatibility); "
|
||||
"Proton Experimental often has the latest fixes.",
|
||||
)]
|
||||
|
||||
|
||||
# --- aggregate ------------------------------------------------------------------------
|
||||
|
||||
def run_gameenv_checks() -> list[Finding]:
|
||||
"""Run all environment checks, sorted by severity (worst first)."""
|
||||
findings: list[Finding] = []
|
||||
findings += check_pcie_aspm()
|
||||
findings += check_gpu_persistence()
|
||||
findings += check_gpu_powermizer()
|
||||
findings += check_cpu_governor()
|
||||
findings += check_gamemode()
|
||||
findings += check_mangohud()
|
||||
findings += check_swappiness()
|
||||
findings += check_shader_cache()
|
||||
findings += check_thp()
|
||||
findings += check_mitigations()
|
||||
findings += check_proton()
|
||||
findings += check_wine()
|
||||
findings += check_steam_client()
|
||||
findings.sort(key=lambda f: _ORDER.get(f.severity, 9))
|
||||
return findings
|
||||
@@ -0,0 +1,149 @@
|
||||
"""Collect recent game / Proton / Steam logs to enrich an AI diagnostic (M14).
|
||||
|
||||
Reads logs that already exist on disk — no change to how the game is launched. Two reliable
|
||||
sources: Proton's per-app log (``~/steam-<appid>.log``, written when ``PROTON_LOG=1``) and
|
||||
Steam's own console log. Each is tail-read and size-bounded so the AI prompt stays small. The
|
||||
text is fed to the AI alongside the findings so it can see *when* something went wrong (a
|
||||
vkd3d/DXVK error, a crash line, the exit code) rather than only the sensor summary.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
# Steam keeps logs under its install root; ~/.steam/steam usually symlinks to the real one.
|
||||
_STEAM_LOG_DIRS = ("~/.steam/steam/logs", "~/.local/share/Steam/logs", "~/.steam/root/logs")
|
||||
_STEAM_LOG_FILES = ("console-linux.txt", "console_log.txt", "stderr.txt")
|
||||
_TS = re.compile(r"^\[(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})\]")
|
||||
|
||||
|
||||
def _line_epoch(line: str) -> float | None:
|
||||
m = _TS.match(line)
|
||||
if not m:
|
||||
return None
|
||||
try:
|
||||
return time.mktime(time.strptime(m.group(1), "%Y-%m-%d %H:%M:%S"))
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
|
||||
def _since_filter(text: str, since: float) -> str:
|
||||
"""Keep lines from the first timestamp >= `since` onward (logs are chronological).
|
||||
|
||||
Untimestamped lines before the window are dropped; once inside the window every line is
|
||||
kept (so multi-line entries survive). This scopes a long-lived Steam log to one session.
|
||||
"""
|
||||
out: list[str] = []
|
||||
including = False
|
||||
for line in text.splitlines():
|
||||
epoch = _line_epoch(line)
|
||||
if epoch is not None and epoch >= since:
|
||||
including = True
|
||||
if including:
|
||||
out.append(line)
|
||||
return "\n".join(out)
|
||||
|
||||
|
||||
def _tail(path: Path, max_bytes: int) -> str:
|
||||
"""Last ``max_bytes`` of a file, decoded leniently (empty string on error)."""
|
||||
try:
|
||||
size = path.stat().st_size
|
||||
with path.open("rb") as fh:
|
||||
if size > max_bytes:
|
||||
fh.seek(size - max_bytes)
|
||||
return fh.read().decode("utf-8", "replace")
|
||||
except OSError:
|
||||
return ""
|
||||
|
||||
|
||||
def _proton_logs() -> list[Path]:
|
||||
try:
|
||||
logs = list(Path.home().glob("steam-*.log"))
|
||||
except OSError:
|
||||
return []
|
||||
return sorted(logs, key=lambda p: p.stat().st_mtime, reverse=True)
|
||||
|
||||
|
||||
def _steam_console() -> Path | None:
|
||||
for directory in _STEAM_LOG_DIRS:
|
||||
base = Path(os.path.expanduser(directory))
|
||||
for name in _STEAM_LOG_FILES:
|
||||
candidate = base / name
|
||||
if candidate.exists():
|
||||
return candidate
|
||||
return None
|
||||
|
||||
|
||||
def available() -> bool:
|
||||
return bool(_proton_logs() or _steam_console())
|
||||
|
||||
|
||||
def _custom_game_logs(game: str, since: float | None, max_bytes: int) -> list[str]:
|
||||
"""Tail the recent ``*.log`` files in a custom game's own log dir (e.g. SPT's
|
||||
``logs/tarkov-latest.log`` + ``server-latest.log``), newest first, freshness-scoped by mtime.
|
||||
|
||||
Custom-game logs use their own timestamp formats, so we scope by file mtime (like the Proton
|
||||
log) rather than the ``[YYYY-MM-DD …]`` line filter used for the Steam console.
|
||||
"""
|
||||
from . import customgames
|
||||
|
||||
directory = customgames.log_dir(game)
|
||||
if not directory:
|
||||
return []
|
||||
try:
|
||||
files = [p for p in Path(directory).glob("*.log") if p.is_file()]
|
||||
except OSError:
|
||||
return []
|
||||
files.sort(key=_mtime, reverse=True)
|
||||
sections: list[str] = []
|
||||
for log in files[:4]: # a session touches a handful (tarkov/server/launcher latest)
|
||||
if since is not None and _mtime(log) < since:
|
||||
continue
|
||||
tail = _tail(log, max_bytes).strip()
|
||||
if tail:
|
||||
sections.append(f"--- {game} log ({log.name}) ---\n{tail}")
|
||||
return sections
|
||||
|
||||
|
||||
def collect(since: float | None = None, max_bytes: int = 8000, game: str | None = None) -> str:
|
||||
"""Recent Proton + Steam (+ custom-game) log tails as one labelled text block ('' if none).
|
||||
|
||||
With ``since`` (epoch), scope to that session: skip a Proton log not written during/after
|
||||
the session (a stale per-app log from an earlier game), and keep only Steam-console lines
|
||||
timestamped at/after ``since`` — so we don't feed the model an unrelated past session.
|
||||
|
||||
``game`` (the diagnostic's focused title) pulls in that custom game's own logs if it has a
|
||||
registered log dir — e.g. SPT's server/launcher logs, which Steam/Proton never see.
|
||||
"""
|
||||
sections: list[str] = []
|
||||
|
||||
if game:
|
||||
sections += _custom_game_logs(game, since, max_bytes)
|
||||
|
||||
protons = _proton_logs()
|
||||
if protons:
|
||||
log = protons[0]
|
||||
fresh = since is None or _mtime(log) >= since
|
||||
tail = _tail(log, max_bytes).strip() if fresh else ""
|
||||
if tail:
|
||||
sections.append(f"--- Proton log ({log.name}) ---\n{tail}")
|
||||
|
||||
console = _steam_console()
|
||||
if console:
|
||||
raw = _tail(console, 40000 if since else max_bytes)
|
||||
if since is not None:
|
||||
raw = _since_filter(raw, since)
|
||||
raw = raw.strip()[-max_bytes:].strip()
|
||||
if raw:
|
||||
sections.append(f"--- Steam log ({console.name}) ---\n{raw}")
|
||||
return "\n\n".join(sections)
|
||||
|
||||
|
||||
def _mtime(path: Path) -> float:
|
||||
try:
|
||||
return path.stat().st_mtime
|
||||
except OSError:
|
||||
return 0.0
|
||||
@@ -27,6 +27,8 @@ class Finding:
|
||||
title: str
|
||||
detail: str = ""
|
||||
suggestion: str = ""
|
||||
action: str = "" # optional: id of an installable catalog component (for an Install button)
|
||||
fix: str = "" # optional: id of an applyable runtime tunable (for an Apply dropdown, M6)
|
||||
|
||||
|
||||
# --- NVIDIA Xid knowledge (the seed crash is Xid 79) --------------------------
|
||||
@@ -114,6 +116,31 @@ def scan_journal_text(text: str) -> list[Finding]:
|
||||
"Check power/thermals/driver; capture a session with `rigdoctor record`.",
|
||||
))
|
||||
|
||||
# NVIDIA open-kernel-module VA-space mapping faults: a driver-internal failure that can
|
||||
# storm for minutes and end in a HARD FREEZE with NO Xid logged — the GPU never "falls off
|
||||
# the bus", so the Xid scan above misses it entirely. These code paths live in the open
|
||||
# kernel module (nvidia-*-open); the proprietary module doesn't hit them.
|
||||
nvrm_va = [
|
||||
ln for ln in lines
|
||||
if "gpu_vaspace.c" in ln
|
||||
or "_gvaspaceMappingInsert" in ln
|
||||
or "dmaAllocMapping" in ln
|
||||
or "NVKMS memory for GEM object" in ln
|
||||
]
|
||||
if nvrm_va:
|
||||
findings.append(Finding(
|
||||
WARNING, "GPU", f"NVIDIA driver VA-space mapping errors ×{len(nvrm_va)}",
|
||||
"The NVIDIA kernel module repeatedly failed to update the GPU's virtual address "
|
||||
"space (gpu_vaspace / dmaAllocMapping assertions, NVKMS GEM-allocation failures). "
|
||||
"This is a driver-internal fault that can recur for minutes and end in a hard freeze "
|
||||
"with NO Xid logged — distinct from an Xid 79 hardware drop. These code paths are "
|
||||
"specific to the open kernel module (nvidia-*-open).",
|
||||
"If you're on the open module, switch to the proprietary NVIDIA driver "
|
||||
"(install `nvidia-driver-###` instead of the `…-open` variant) and update to the "
|
||||
"latest branch, then reboot. Capture a session with `rigdoctor record` to confirm "
|
||||
"the errors precede the freeze.",
|
||||
))
|
||||
|
||||
return findings
|
||||
|
||||
|
||||
@@ -144,6 +171,22 @@ def check_journal() -> list[Finding]:
|
||||
return findings
|
||||
|
||||
|
||||
def check_previous_boot() -> list[Finding]:
|
||||
"""Scan the previous boot's kernel log — the boot that crashed — for fault signatures.
|
||||
|
||||
Needs persistent journald (else the crashed boot's logs were lost on reboot, which the
|
||||
persistence check flags separately). Findings are framed as coming from that boot.
|
||||
"""
|
||||
out = _journalctl(["-k", "-b", "-1", "--no-pager", "-o", "cat"])
|
||||
if not out or not out.strip():
|
||||
return []
|
||||
tagged = []
|
||||
for f in scan_journal_text(out):
|
||||
detail = ("Logged during the previous (crashed) boot. " + (f.detail or "")).strip()
|
||||
tagged.append(Finding(f.severity, f.category, f.title, detail, f.suggestion))
|
||||
return tagged
|
||||
|
||||
|
||||
def check_journal_persistence() -> list[Finding]:
|
||||
if Path("/var/log/journal").is_dir():
|
||||
return []
|
||||
@@ -170,47 +213,66 @@ def check_nvidia_driver() -> list[Finding]:
|
||||
return []
|
||||
|
||||
|
||||
def _smart_devices() -> list[str]:
|
||||
def _read_text(path: str) -> str | None:
|
||||
try:
|
||||
proc = subprocess.run(["smartctl", "--scan"], capture_output=True, text=True, timeout=10)
|
||||
except (subprocess.SubprocessError, OSError):
|
||||
return []
|
||||
devices = []
|
||||
for line in proc.stdout.splitlines():
|
||||
line = line.strip()
|
||||
if line.startswith("/dev/"):
|
||||
devices.append(line.split()[0])
|
||||
return devices
|
||||
return Path(path).read_text()
|
||||
except OSError:
|
||||
return None
|
||||
|
||||
|
||||
def check_smart() -> list[Finding]:
|
||||
if shutil.which("smartctl") is None:
|
||||
return [Finding(
|
||||
INFO, "Storage", "SMART not checked (smartmontools missing)",
|
||||
"Disk self-health couldn't be read.",
|
||||
"Install it for disk health checks: `sudo apt install smartmontools`",
|
||||
)]
|
||||
devices = _smart_devices()
|
||||
if not devices:
|
||||
return [Finding(
|
||||
INFO, "Storage", "SMART: couldn't enumerate drives",
|
||||
"Reading SMART usually needs root.",
|
||||
"Run: `sudo rigdoctor report`",
|
||||
)]
|
||||
findings: list[Finding] = []
|
||||
for dev in devices:
|
||||
def _nvidia_module_is_open() -> bool | None:
|
||||
"""Whether the *loaded* NVIDIA kernel module is the open-source flavor.
|
||||
|
||||
True = open (nvidia-*-open), False = proprietary, None = can't tell / no NVIDIA module.
|
||||
/proc is authoritative for the loaded module and needs no external tool; modinfo's filename
|
||||
(…/nvidia-###-open/nvidia.ko) is the fallback.
|
||||
"""
|
||||
proc = _read_text("/proc/driver/nvidia/version")
|
||||
if proc:
|
||||
low = proc.lower()
|
||||
if "open kernel module" in low:
|
||||
return True
|
||||
if "kernel module" in low: # proprietary banner: "NVIDIA UNIX … Kernel Module …"
|
||||
return False
|
||||
if shutil.which("modinfo"):
|
||||
try:
|
||||
proc = subprocess.run(["smartctl", "-H", dev], capture_output=True, text=True, timeout=15)
|
||||
out = subprocess.run(["modinfo", "nvidia"], capture_output=True, text=True, timeout=10).stdout
|
||||
except (subprocess.SubprocessError, OSError):
|
||||
continue
|
||||
combined = proc.stdout + proc.stderr
|
||||
if "Permission denied" in combined or "requires root" in combined.lower():
|
||||
findings.append(Finding(INFO, "Storage", f"SMART for {dev} needs root", "", "Run: `sudo rigdoctor report`"))
|
||||
elif "PASSED" in combined:
|
||||
findings.append(Finding(OK, "Storage", f"SMART OK: {dev}", "Overall-health self-assessment passed."))
|
||||
elif "FAILED" in combined or "FAILING_NOW" in combined:
|
||||
findings.append(Finding(CRITICAL, "Storage", f"SMART FAILED: {dev}", "The drive reports failing health.", "Back up now and replace the drive."))
|
||||
return findings
|
||||
out = ""
|
||||
for line in out.splitlines():
|
||||
if line.startswith("filename:"):
|
||||
return "-open" in line
|
||||
return None
|
||||
|
||||
|
||||
def check_nvidia_module() -> list[Finding]:
|
||||
"""Note when the open-source NVIDIA kernel module is loaded — the context behind the no-Xid
|
||||
VA-space freeze signature, which lives in the open module's code paths (suggestion-only)."""
|
||||
if _nvidia_module_is_open() is not True:
|
||||
return []
|
||||
return [Finding(
|
||||
INFO, "Driver", "NVIDIA open kernel module in use",
|
||||
"The loaded NVIDIA driver is the open-source kernel module (nvidia-*-open). It's fine for "
|
||||
"most setups, but on some GeForce cards it hits driver-internal faults (VA-space mapping "
|
||||
"errors, hard freezes with no Xid) that the proprietary module doesn't.",
|
||||
"If you get unexplained hard freezes with no Xid in the logs, try the proprietary NVIDIA "
|
||||
"driver (`nvidia-driver-###` rather than the `…-open` variant) on the latest branch.",
|
||||
)]
|
||||
|
||||
|
||||
def check_drives() -> list[Finding]:
|
||||
"""Per-drive SMART health + wear/runtime stats (see core/drives.py).
|
||||
|
||||
Uses the session's elevated collection when present (GUI launch / pkexec), else reads
|
||||
smartctl directly — which only returns real data as root, so the unprivileged case yields
|
||||
'needs root' info findings pointing at `sudo rigdoctor report`.
|
||||
"""
|
||||
from . import drives, elevation
|
||||
|
||||
priv = elevation.privileged()
|
||||
if priv is not None and priv.get("drives") is not None:
|
||||
return drives.to_findings(drives.from_dicts(priv["drives"]))
|
||||
return drives.to_findings(drives.collect())
|
||||
|
||||
|
||||
def check_live_temps() -> list[Finding]:
|
||||
@@ -233,13 +295,97 @@ def check_live_temps() -> list[Finding]:
|
||||
)]
|
||||
|
||||
|
||||
def run_health_checks() -> list[Finding]:
|
||||
"""Run all checks and return findings sorted by severity (worst first)."""
|
||||
def check_pcie_links() -> list[Finding]:
|
||||
"""Flag NVMe drives linked below their PCIe capability — a slower slot or, most often,
|
||||
motherboard lane-sharing where a GPU/second card or another M.2 steals lanes from the slot.
|
||||
|
||||
Width reductions are reliable (reported as warnings); speed-only reductions are info (they can
|
||||
also be normal link power management at idle). The GPU is intentionally not checked here:
|
||||
NVIDIA drops its PCIe gen *and* width at idle, so a point-in-time snapshot is misleading.
|
||||
"""
|
||||
from . import inventory
|
||||
|
||||
findings: list[Finding] = []
|
||||
for name, dev in inventory.nvme_controllers():
|
||||
cur_g, cur_w, max_g, max_w = inventory.read_link(dev)
|
||||
if not cur_g or not max_g:
|
||||
continue
|
||||
if max_w and cur_w and cur_w != max_w: # fewer lanes → almost always lane-sharing
|
||||
findings.append(Finding(
|
||||
WARNING, "PCIe", f"{name} linked at x{cur_w} (supports x{max_w})",
|
||||
f"{name} negotiated PCIe Gen{cur_g} x{cur_w}, but the drive supports "
|
||||
f"Gen{max_g} x{max_w}. Fewer lanes is usually motherboard lane-sharing — a GPU or a "
|
||||
"second card in a PCIe slot, or another populated M.2, can steal lanes from this slot.",
|
||||
"Check your board manual's lane-sharing table; move the drive to a full-x4 "
|
||||
"(often CPU-attached) M.2 slot."))
|
||||
elif cur_g < max_g: # full width but a lower generation → slower slot or idle ASPM
|
||||
findings.append(Finding(
|
||||
INFO, "PCIe", f"{name} linked at Gen{cur_g} (supports Gen{max_g})",
|
||||
f"{name} negotiated PCIe Gen{cur_g} but supports Gen{max_g}. This can be a slower "
|
||||
"(chipset or older) M.2 slot, or normal link power management (ASPM) at idle.",
|
||||
"If you expect full speed, check the slot and the BIOS PCIe/ASPM settings."))
|
||||
return findings
|
||||
|
||||
|
||||
def check_displays() -> list[Finding]:
|
||||
"""Flag monitors running below their max refresh rate at the current resolution — e.g. a
|
||||
165 Hz panel set to 60 Hz, a common and easily-missed gaming setting (read-only suggestion)."""
|
||||
from . import displays
|
||||
|
||||
findings: list[Finding] = []
|
||||
for m in displays.collect():
|
||||
if m.can_go_faster:
|
||||
findings.append(Finding(
|
||||
INFO, "Display",
|
||||
f"{m.connector} at {round(m.refresh)} Hz (supports {round(m.max_refresh)} Hz)",
|
||||
f"{m.name or m.connector} is running at {round(m.refresh)} Hz at "
|
||||
f"{m.width}x{m.height}, but supports {round(m.max_refresh)} Hz at that resolution.",
|
||||
"Raise the refresh rate in your desktop's Display settings (GNOME: Settings → Displays)."))
|
||||
return findings
|
||||
|
||||
|
||||
def check_memory_speed() -> list[Finding]:
|
||||
"""Flag RAM running below its rated speed — i.e. the XMP (Intel) / EXPO (AMD) profile isn't
|
||||
enabled, leaving memory bandwidth on the table. Needs dmidecode (root); silent without it."""
|
||||
from . import elevation, inventory
|
||||
|
||||
priv = elevation.privileged()
|
||||
dmi = priv["dmidecode"] if (priv and priv.get("dmidecode")) else inventory._dmidecode()
|
||||
worst: tuple[int, int] | None = None # (configured, rated) with the biggest gap
|
||||
for m in dmi.get("memory", []):
|
||||
configured, rated = inventory.module_speed(m)
|
||||
if configured and rated and configured < rated:
|
||||
if worst is None or (rated - configured) > (worst[1] - worst[0]):
|
||||
worst = (configured, rated)
|
||||
if worst is None:
|
||||
return []
|
||||
configured, rated = worst
|
||||
return [Finding(
|
||||
INFO, "Memory", f"RAM at {configured} MT/s (rated {rated} MT/s)",
|
||||
f"Memory is running at {configured} MT/s but the modules are rated {rated} MT/s — the "
|
||||
"XMP/EXPO profile isn't enabled, so you're leaving memory bandwidth on the table.",
|
||||
"Enable XMP (Intel) or EXPO (AMD) in your BIOS/UEFI to run at the rated speed.")]
|
||||
|
||||
|
||||
def run_health_checks(include_journal: bool = True) -> list[Finding]:
|
||||
"""Run all checks and return findings sorted by severity (worst first).
|
||||
|
||||
Drive SMART and RAM speed need root; if the session collected them via launch elevation,
|
||||
those checks use the cached data instead of re-running (which would just report "needs root").
|
||||
|
||||
`include_journal=False` skips the 7-day kernel-journal scan — used by the crash
|
||||
analysis, which scans the previous (crashed) boot specifically instead.
|
||||
"""
|
||||
findings: list[Finding] = []
|
||||
findings += check_nvidia_driver()
|
||||
findings += check_journal()
|
||||
findings += check_nvidia_module()
|
||||
if include_journal:
|
||||
findings += check_journal()
|
||||
findings += check_journal_persistence()
|
||||
findings += check_smart()
|
||||
findings += check_drives()
|
||||
findings += check_live_temps()
|
||||
findings += check_pcie_links()
|
||||
findings += check_displays()
|
||||
findings += check_memory_speed() # uses elevation data if present, else dmidecode (root)
|
||||
findings.sort(key=lambda f: _ORDER.get(f.severity, 9))
|
||||
return findings
|
||||
|
||||
@@ -9,6 +9,7 @@ from __future__ import annotations
|
||||
import json
|
||||
import os
|
||||
import platform
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
from dataclasses import dataclass
|
||||
@@ -85,6 +86,35 @@ def _firmware(dmi: dict) -> Section:
|
||||
return Section("Firmware", items)
|
||||
|
||||
|
||||
# Common DDR5 XMP/EXPO speed grades (MT/s) — used to read a kit's rated speed from its part
|
||||
# number, since with XMP/EXPO off dmidecode only reports the JEDEC base (e.g. 4800).
|
||||
_DDR_SPEEDS = {4800, 5200, 5600, 6000, 6200, 6400, 6600, 6800, 7000, 7200, 7600, 8000, 8200, 8400}
|
||||
|
||||
|
||||
def _mts(value: str) -> int | None:
|
||||
"""Parse a dmidecode speed like '4800 MT/s' (or 'MHz') to its integer MT/s."""
|
||||
m = re.match(r"\s*(\d+)", value or "")
|
||||
return int(m.group(1)) if m else None
|
||||
|
||||
|
||||
def _rated_from_part(part: str) -> int | None:
|
||||
"""The highest known DDR speed-grade appearing as a 4-digit token in a part number."""
|
||||
grades = [int(n) for n in re.findall(r"(?<!\d)(\d{4})(?!\d)", part or "") if int(n) in _DDR_SPEEDS]
|
||||
return max(grades) if grades else None
|
||||
|
||||
|
||||
def module_speed(m: dict) -> tuple[int | None, int | None]:
|
||||
"""(configured, rated) MT/s for a dmidecode Memory Device.
|
||||
|
||||
Configured = what it's actually running at; rated = the highest of dmidecode's reported max
|
||||
and the part-number speed-grade (so an unapplied XMP/EXPO profile is still detected).
|
||||
"""
|
||||
configured = _mts(m.get("Configured Memory Speed") or m.get("Configured Clock Speed") or m.get("Speed", ""))
|
||||
candidates = [s for s in (_mts(m.get("Speed", "")), _rated_from_part(m.get("Part Number", ""))) if s]
|
||||
rated = max(candidates) if candidates else None
|
||||
return configured, rated
|
||||
|
||||
|
||||
def _memory(dmi: dict) -> Section:
|
||||
items: list[tuple[str, str]] = []
|
||||
try:
|
||||
@@ -98,8 +128,12 @@ def _memory(dmi: dict) -> Section:
|
||||
if modules:
|
||||
items.append(("Modules", str(len(modules))))
|
||||
for i, m in enumerate(modules):
|
||||
desc = " · ".join(p for p in (m.get("Size"), m.get("Type"), m.get("Speed"), m.get("Part Number")) if p)
|
||||
items.append((f"Slot {i}", desc))
|
||||
configured, rated = module_speed(m)
|
||||
speed = f"{configured} MT/s" if configured else m.get("Speed", "")
|
||||
if rated and configured and rated > configured: # XMP/EXPO not applied
|
||||
speed += f" (rated {rated})"
|
||||
parts = (m.get("Size"), m.get("Type"), speed, m.get("Part Number"))
|
||||
items.append((f"Slot {i}", " · ".join(p for p in parts if p)))
|
||||
elif shutil.which("dmidecode"):
|
||||
items.append(("Modules", "run with admin for module details"))
|
||||
return Section("Memory", items)
|
||||
@@ -123,6 +157,64 @@ def _gpu() -> Section:
|
||||
return Section("GPU", [("Device", g) for g in gpus] or [("Device", "unknown")])
|
||||
|
||||
|
||||
# PCIe link speed (GT/s) → generation.
|
||||
_PCIE_GEN = {"2.5": 1, "5": 2, "5.0": 2, "8": 3, "8.0": 3, "16": 4, "16.0": 4, "32": 5, "32.0": 5}
|
||||
|
||||
|
||||
def _gen(speed: str) -> int | None:
|
||||
"""Map a sysfs link speed like '16.0 GT/s PCIe' to its PCIe generation (4)."""
|
||||
tok = speed.strip().split()[0] if speed.strip() else ""
|
||||
return _PCIE_GEN.get(tok)
|
||||
|
||||
|
||||
def read_link(dev: Path) -> tuple[int | None, str, int | None, str]:
|
||||
"""Negotiated/max PCIe link for a PCI device dir: (cur_gen, cur_width, max_gen, max_width).
|
||||
|
||||
Widths are the raw sysfs strings (e.g. '4'); gens are ints (4) or None when unreadable.
|
||||
"""
|
||||
def rd(name: str) -> str:
|
||||
try:
|
||||
return (dev / name).read_text().strip()
|
||||
except OSError:
|
||||
return ""
|
||||
|
||||
return (_gen(rd("current_link_speed")), rd("current_link_width"),
|
||||
_gen(rd("max_link_speed")), rd("max_link_width"))
|
||||
|
||||
|
||||
def _link_desc(dev: Path) -> str:
|
||||
"""Describe a PCI device's negotiated PCIe link, noting if it's below its max.
|
||||
|
||||
e.g. 'PCIe Gen4 x4', or 'PCIe Gen3 x4 (capable of Gen4 x4)' when downtrained / in a
|
||||
slower slot.
|
||||
"""
|
||||
cur_g, cur_w, max_g, max_w = read_link(dev)
|
||||
if not cur_g or not cur_w:
|
||||
return ""
|
||||
desc = f"PCIe Gen{cur_g} x{cur_w}"
|
||||
if max_g and (cur_g < max_g or (max_w and cur_w != max_w)):
|
||||
desc += f" (capable of Gen{max_g} x{max_w})"
|
||||
return desc
|
||||
|
||||
|
||||
def nvme_controllers() -> list[tuple[str, Path]]:
|
||||
"""Each NVMe controller as (name, pci-device-dir), e.g. ('nvme0', /sys/.../device)."""
|
||||
base = Path("/sys/class/nvme")
|
||||
try:
|
||||
entries = [p for p in base.iterdir() if re.fullmatch(r"nvme\d+", p.name)]
|
||||
except OSError:
|
||||
return []
|
||||
return sorted((p.name, p / "device") for p in entries)
|
||||
|
||||
|
||||
def _nvme_link(block_name: str) -> str:
|
||||
"""PCIe link for an NVMe block device (nvme0n1 → controller nvme0); '' for non-NVMe."""
|
||||
m = re.match(r"(nvme\d+)", block_name)
|
||||
if not m:
|
||||
return ""
|
||||
return _link_desc(Path("/sys/class/nvme") / m.group(1) / "device")
|
||||
|
||||
|
||||
def _storage() -> Section:
|
||||
items: list[tuple[str, str]] = []
|
||||
# TYPE first so MODEL (which can contain spaces) is the trailing field.
|
||||
@@ -133,15 +225,27 @@ def _storage() -> Section:
|
||||
continue
|
||||
name, size = parts[1], parts[2]
|
||||
model = parts[3] if len(parts) > 3 else ""
|
||||
items.append((name, f"{model} ({size})".strip()))
|
||||
desc = f"{model} ({size})".strip()
|
||||
link = _nvme_link(name) # NVMe PCIe gen/width (e.g. Gen4 x4), flags downtrains
|
||||
if link:
|
||||
desc += f" · {link}"
|
||||
items.append((name, desc))
|
||||
return Section("Storage", items or [("Disks", "unknown")])
|
||||
|
||||
|
||||
def _display() -> Section:
|
||||
return Section("Display", [
|
||||
from . import displays
|
||||
|
||||
items = [
|
||||
("Session", os.environ.get("XDG_SESSION_TYPE", "unknown")),
|
||||
("Desktop", os.environ.get("XDG_CURRENT_DESKTOP") or os.environ.get("DESKTOP_SESSION", "unknown")),
|
||||
])
|
||||
]
|
||||
for m in displays.collect():
|
||||
val = f"{m.width}x{m.height} @ {round(m.refresh)} Hz"
|
||||
if m.can_go_faster:
|
||||
val += f" (supports {round(m.max_refresh)} Hz)"
|
||||
items.append((m.label(), val))
|
||||
return Section("Display", items)
|
||||
|
||||
|
||||
def _dmidecode() -> dict:
|
||||
@@ -171,7 +275,10 @@ def _dmidecode() -> dict:
|
||||
|
||||
|
||||
def collect() -> list[Section]:
|
||||
dmi = _dmidecode()
|
||||
from . import elevation
|
||||
|
||||
priv = elevation.privileged()
|
||||
dmi = priv["dmidecode"] if (priv and priv.get("dmidecode") is not None) else _dmidecode()
|
||||
return [_system(), _cpu(), _firmware(dmi), _memory(dmi), _gpu(), _storage(), _display()]
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,89 @@
|
||||
"""Non-Steam game detection (M6): Lutris + Heroic installed games.
|
||||
|
||||
Reads each launcher's own install records (Lutris' SQLite library, Heroic's JSON stores),
|
||||
returning the same `steam.Game` shape tagged with the launcher. Stdlib only; every reader
|
||||
degrades to [] if the launcher isn't installed or its files can't be parsed.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import sqlite3
|
||||
from pathlib import Path
|
||||
|
||||
from .steam import Game
|
||||
|
||||
LUTRIS_DB = Path(os.path.expanduser("~/.local/share/lutris/pga.db"))
|
||||
HEROIC_DIR = Path(os.path.expanduser("~/.config/heroic"))
|
||||
|
||||
|
||||
def _lutris_games() -> list[Game]:
|
||||
db = LUTRIS_DB
|
||||
if not db.exists():
|
||||
return []
|
||||
games: list[Game] = []
|
||||
try:
|
||||
con = sqlite3.connect(f"file:{db}?mode=ro", uri=True) # read-only
|
||||
try:
|
||||
rows = con.execute(
|
||||
"SELECT name, slug FROM games WHERE installed = 1 AND name IS NOT NULL"
|
||||
).fetchall()
|
||||
finally:
|
||||
con.close()
|
||||
except (sqlite3.Error, OSError):
|
||||
return []
|
||||
for name, slug in rows:
|
||||
if name:
|
||||
games.append(Game(appid=slug or "", name=str(name), library="", installdir="",
|
||||
launcher="lutris"))
|
||||
return games
|
||||
|
||||
|
||||
def _read_json(path: Path):
|
||||
try:
|
||||
return json.loads(path.read_text())
|
||||
except (OSError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
def _heroic_games() -> list[Game]:
|
||||
base = HEROIC_DIR
|
||||
if not base.is_dir():
|
||||
return []
|
||||
games: list[Game] = []
|
||||
|
||||
# Epic / Legendary: {app_name: {"title": ..., ...}}
|
||||
epic = _read_json(base / "legendaryConfig" / "legendary" / "installed.json")
|
||||
if isinstance(epic, dict):
|
||||
for app_name, info in epic.items():
|
||||
if isinstance(info, dict):
|
||||
games.append(Game(appid=str(app_name), name=info.get("title") or str(app_name),
|
||||
library="", installdir="", launcher="heroic"))
|
||||
|
||||
# GOG: {"installed": [{"appName", "install_path", "title"?}]}
|
||||
gog = _read_json(base / "gog_store" / "installed.json")
|
||||
entries = gog.get("installed") if isinstance(gog, dict) else None
|
||||
if isinstance(entries, list):
|
||||
for e in entries:
|
||||
if not isinstance(e, dict):
|
||||
continue
|
||||
install_path = e.get("install_path") or ""
|
||||
title = e.get("title") or os.path.basename(install_path.rstrip("/")) or str(e.get("appName", ""))
|
||||
if title:
|
||||
games.append(Game(appid=str(e.get("appName", "")), name=title, library="",
|
||||
installdir="", launcher="heroic"))
|
||||
return games
|
||||
|
||||
|
||||
def scan() -> list[Game]:
|
||||
"""Installed non-Steam games (Lutris + Heroic), de-duplicated, sorted by name."""
|
||||
seen: set[tuple[str, str]] = set()
|
||||
out: list[Game] = []
|
||||
for game in _lutris_games() + _heroic_games():
|
||||
key = (game.launcher, game.name)
|
||||
if key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
out.append(game)
|
||||
return sorted(out, key=lambda g: g.name.lower())
|
||||
@@ -0,0 +1,314 @@
|
||||
"""Parse a Windows crash dump (``.dmp`` minidump) into text the AI can reason over (M14).
|
||||
|
||||
Linux gamers get these from Windows games running under **Proton/Wine**: the game's
|
||||
crash handler (Crashpad/Breakpad, Unreal/Unity, or Wine itself) writes a binary minidump
|
||||
when the title hard-crashes. The file is binary, so we can't hand it to a model directly —
|
||||
we parse the documented ``MDMP`` streams with stdlib :mod:`struct` (no pip deps, per the
|
||||
core rule) and pull out the parts that actually diagnose a crash:
|
||||
|
||||
* the **exception / crash reason** (e.g. access violation 0xC0000005),
|
||||
* the **faulting module** (which DLL the crash address lands in — ``nvwgf2umx.dll``,
|
||||
``d3d11.dll``, an anticheat, the game's own .exe…),
|
||||
* **OS / CPU** info, and the **loaded module list**.
|
||||
|
||||
If ``minidump_stackwalk`` (Breakpad) or ``minidump-stackwalk`` (rust-minidump) is on PATH,
|
||||
its fuller report is appended best-effort; we never depend on it.
|
||||
|
||||
The result feeds the existing opt-in AI flow (:mod:`ai`) exactly like the sensor findings do.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import shutil
|
||||
import struct
|
||||
import subprocess
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
|
||||
from .health import CRITICAL, INFO, Finding
|
||||
|
||||
# --- MDMP on-disk layout (all little-endian, packed) --------------------------------
|
||||
_SIGNATURE = b"MDMP"
|
||||
_HEADER = struct.Struct("<4sIIIIIQ") # sig, ver, n_streams, dir_rva, csum, time, flags
|
||||
_DIRECTORY = struct.Struct("<III") # stream_type, data_size, data_rva
|
||||
_SYSINFO = struct.Struct("<HHHBBIIIII") # arch, lvl, rev, n_cpu, prod, maj, min, build, plat, csd
|
||||
_MODULE_STRIDE = 108 # sizeof(MINIDUMP_MODULE)
|
||||
|
||||
# Stream types we read (MINIDUMP_STREAM_TYPE).
|
||||
_MODULE_LIST = 4
|
||||
_EXCEPTION = 6
|
||||
_SYSTEM_INFO = 7
|
||||
_COMMENT_A = 10
|
||||
_COMMENT_W = 11
|
||||
|
||||
_ARCH = {0: "x86", 5: "ARM", 6: "IA-64", 9: "x86-64", 12: "ARM64", 0xFFFF: "unknown"}
|
||||
_PLATFORM = {0x8201: "Linux", 0x8202: "Solaris", 0x8203: "macOS", 0x8204: "iOS",
|
||||
0x8205: "Android", 0x8207: "NaCl"}
|
||||
|
||||
# Common Windows exception (NTSTATUS) codes — what the model needs named, not raw hex.
|
||||
_EXCEPTION_NAMES = {
|
||||
0x80000003: "Breakpoint",
|
||||
0x80000004: "Single step",
|
||||
0xC0000005: "Access violation",
|
||||
0xC0000006: "In-page error",
|
||||
0xC000001D: "Illegal instruction",
|
||||
0xC0000025: "Noncontinuable exception",
|
||||
0xC000008C: "Array bounds exceeded",
|
||||
0xC000008E: "Float divide by zero",
|
||||
0xC0000090: "Float invalid operation",
|
||||
0xC0000094: "Integer divide by zero",
|
||||
0xC0000095: "Integer overflow",
|
||||
0xC0000096: "Privileged instruction",
|
||||
0xC00000FD: "Stack overflow",
|
||||
0xC0000135: "DLL not found",
|
||||
0xC0000142: "DLL initialization failed",
|
||||
0xC0000374: "Heap corruption",
|
||||
0xC0000409: "Stack buffer overrun / fast fail",
|
||||
0xC000041D: "Fatal user-callback exception",
|
||||
0xE06D7363: "C++ exception (MSVC)",
|
||||
}
|
||||
_ACCESS = {0: "reading", 1: "writing", 8: "executing"} # AV ExceptionInformation[0]
|
||||
|
||||
_STACKWALK_BINS = ("minidump_stackwalk", "minidump-stackwalk")
|
||||
_MODULES_SHOWN = 80 # cap the module list so the AI prompt stays bounded
|
||||
|
||||
|
||||
@dataclass
|
||||
class Module:
|
||||
name: str # basename only
|
||||
base: int
|
||||
size: int
|
||||
|
||||
|
||||
@dataclass
|
||||
class MinidumpReport:
|
||||
path: str
|
||||
ok: bool = False
|
||||
error: str = ""
|
||||
crash_reason: str = ""
|
||||
exception_code: int | None = None
|
||||
exception_address: int | None = None
|
||||
faulting_module: str | None = None
|
||||
crashing_thread: int | None = None
|
||||
os_name: str = ""
|
||||
cpu_arch: str = ""
|
||||
cpu_count: int = 0
|
||||
timestamp: int | None = None
|
||||
modules: list[Module] = field(default_factory=list)
|
||||
comment: str = ""
|
||||
stackwalk: str = ""
|
||||
|
||||
|
||||
def parse(path, *, run_stackwalk: bool = True) -> MinidumpReport:
|
||||
"""Parse a ``.dmp`` file. Never raises — a bad/unsupported file returns ``ok=False``."""
|
||||
report = MinidumpReport(path=str(path))
|
||||
try:
|
||||
data = Path(path).read_bytes()
|
||||
except OSError as exc:
|
||||
report.error = f"can't read the file: {exc}"
|
||||
return report
|
||||
if len(data) < _HEADER.size or data[:4] != _SIGNATURE:
|
||||
report.error = "not a Windows minidump (missing the 'MDMP' signature)."
|
||||
return report
|
||||
try:
|
||||
_sig, _ver, n_streams, dir_rva, _csum, ts, _flags = _HEADER.unpack_from(data, 0)
|
||||
report.timestamp = ts or None
|
||||
streams = _streams(data, dir_rva, n_streams)
|
||||
_read_system_info(data, streams.get(_SYSTEM_INFO), report)
|
||||
report.modules = _read_modules(data, streams.get(_MODULE_LIST))
|
||||
_read_exception(data, streams.get(_EXCEPTION), report)
|
||||
report.comment = _read_comment(data, streams)
|
||||
except (struct.error, ValueError, IndexError) as exc:
|
||||
report.error = f"the minidump looks corrupt or unsupported: {exc}"
|
||||
return report
|
||||
if report.exception_address is not None:
|
||||
report.faulting_module = _module_at(report.modules, report.exception_address)
|
||||
report.ok = True
|
||||
if run_stackwalk:
|
||||
report.stackwalk = stackwalk(path)
|
||||
return report
|
||||
|
||||
|
||||
def _streams(data: bytes, dir_rva: int, n: int) -> dict[int, tuple[int, int]]:
|
||||
"""Map stream_type -> (data_size, data_rva). First occurrence of each type wins."""
|
||||
out: dict[int, tuple[int, int]] = {}
|
||||
for i in range(n):
|
||||
off = dir_rva + i * _DIRECTORY.size
|
||||
if off + _DIRECTORY.size > len(data):
|
||||
break
|
||||
stype, size, rva = _DIRECTORY.unpack_from(data, off)
|
||||
out.setdefault(stype, (size, rva))
|
||||
return out
|
||||
|
||||
|
||||
def _read_system_info(data: bytes, loc, report: MinidumpReport) -> None:
|
||||
if not loc:
|
||||
return
|
||||
_size, rva = loc
|
||||
arch, _lvl, _rev, n_cpu, _prod, major, minor, build, platform, _csd = \
|
||||
_SYSINFO.unpack_from(data, rva)
|
||||
report.cpu_arch = _ARCH.get(arch, f"arch 0x{arch:x}")
|
||||
report.cpu_count = n_cpu
|
||||
if platform == 2: # VER_PLATFORM_WIN32_NT
|
||||
report.os_name = f"Windows {major}.{minor}.{build}"
|
||||
elif platform in _PLATFORM:
|
||||
ver = f" {major}.{minor}.{build}" if (major or minor or build) else ""
|
||||
report.os_name = _PLATFORM[platform] + ver
|
||||
else:
|
||||
report.os_name = f"platform 0x{platform:x} {major}.{minor}.{build}"
|
||||
|
||||
|
||||
def _read_modules(data: bytes, loc) -> list[Module]:
|
||||
if not loc:
|
||||
return []
|
||||
_size, rva = loc
|
||||
(count,) = struct.unpack_from("<I", data, rva)
|
||||
base_off = rva + 4
|
||||
modules: list[Module] = []
|
||||
for i in range(count):
|
||||
rec = base_off + i * _MODULE_STRIDE
|
||||
if rec + _MODULE_STRIDE > len(data):
|
||||
break
|
||||
base, = struct.unpack_from("<Q", data, rec)
|
||||
size, = struct.unpack_from("<I", data, rec + 8)
|
||||
name_rva, = struct.unpack_from("<I", data, rec + 20)
|
||||
modules.append(Module(_read_mdstring(data, name_rva), base, size))
|
||||
return modules
|
||||
|
||||
|
||||
def _read_exception(data: bytes, loc, report: MinidumpReport) -> None:
|
||||
if not loc:
|
||||
return
|
||||
_size, rva = loc
|
||||
thread_id, = struct.unpack_from("<I", data, rva) # MINIDUMP_EXCEPTION_STREAM
|
||||
code, = struct.unpack_from("<I", data, rva + 8) # ExceptionRecord.ExceptionCode
|
||||
address, = struct.unpack_from("<Q", data, rva + 24) # ExceptionRecord.ExceptionAddress
|
||||
n_params, = struct.unpack_from("<I", data, rva + 32)
|
||||
report.crashing_thread = thread_id
|
||||
report.exception_code = code
|
||||
report.exception_address = address
|
||||
report.crash_reason = _describe_exception(data, rva, code, n_params)
|
||||
|
||||
|
||||
def _describe_exception(data: bytes, rva: int, code: int, n_params: int) -> str:
|
||||
name = _EXCEPTION_NAMES.get(code, "Unknown exception")
|
||||
reason = f"{name} (0x{code:08X})"
|
||||
if code in (0xC0000005, 0xC0000006) and n_params >= 2:
|
||||
op = struct.unpack_from("<Q", data, rva + 40)[0] # ExceptionInformation[0]
|
||||
addr = struct.unpack_from("<Q", data, rva + 48)[0] # ExceptionInformation[1]
|
||||
reason += f" {_ACCESS.get(op, 'accessing')} 0x{addr:X}"
|
||||
return reason
|
||||
|
||||
|
||||
def _read_mdstring(data: bytes, rva: int) -> str:
|
||||
"""A MINIDUMP_STRING (u32 byte-length + UTF-16LE), returned as a basename."""
|
||||
if not rva or rva + 4 > len(data):
|
||||
return ""
|
||||
length, = struct.unpack_from("<I", data, rva)
|
||||
start = rva + 4
|
||||
raw = data[start:start + length]
|
||||
text = raw.decode("utf-16-le", "replace").strip("\x00")
|
||||
return text.replace("\\", "/").rsplit("/", 1)[-1] or text
|
||||
|
||||
|
||||
def _read_comment(data: bytes, streams: dict[int, tuple[int, int]]) -> str:
|
||||
if _COMMENT_W in streams:
|
||||
size, rva = streams[_COMMENT_W]
|
||||
return data[rva:rva + size].decode("utf-16-le", "replace").strip("\x00").strip()
|
||||
if _COMMENT_A in streams:
|
||||
size, rva = streams[_COMMENT_A]
|
||||
return data[rva:rva + size].decode("utf-8", "replace").strip("\x00").strip()
|
||||
return ""
|
||||
|
||||
|
||||
def _module_at(modules: list[Module], address: int) -> str | None:
|
||||
for m in modules:
|
||||
if m.base <= address < m.base + m.size:
|
||||
return m.name
|
||||
return None
|
||||
|
||||
|
||||
def stackwalk(path, timeout: float = 25.0, max_chars: int = 12000) -> str:
|
||||
"""Best-effort fuller report from an external stackwalker, or '' if none is installed."""
|
||||
exe = next((shutil.which(name) for name in _STACKWALK_BINS if shutil.which(name)), None)
|
||||
if not exe:
|
||||
return ""
|
||||
try:
|
||||
proc = subprocess.run(
|
||||
[exe, str(path)], capture_output=True, text=True, timeout=timeout, check=False)
|
||||
except (OSError, subprocess.SubprocessError):
|
||||
return ""
|
||||
return (proc.stdout or "").strip()[:max_chars]
|
||||
|
||||
|
||||
# --- rendering ----------------------------------------------------------------------
|
||||
|
||||
def to_text(report: MinidumpReport) -> str:
|
||||
"""Human-readable structured summary (also shown in the GUI)."""
|
||||
name = Path(report.path).name
|
||||
lines = [f"Crash dump: {name}"]
|
||||
if report.crash_reason:
|
||||
lines.append(f"Crash reason: {report.crash_reason}")
|
||||
if report.faulting_module:
|
||||
lines.append(f"Faulting module: {report.faulting_module}")
|
||||
elif report.exception_address is not None:
|
||||
lines.append(f"Faulting address: 0x{report.exception_address:X} (no module matched)")
|
||||
if report.crashing_thread is not None:
|
||||
lines.append(f"Crashing thread: {report.crashing_thread}")
|
||||
if report.os_name:
|
||||
lines.append(f"OS: {report.os_name}")
|
||||
if report.cpu_arch:
|
||||
cpus = f" ({report.cpu_count} logical)" if report.cpu_count else ""
|
||||
lines.append(f"CPU: {report.cpu_arch}{cpus}")
|
||||
if report.timestamp:
|
||||
lines.append("Captured: " + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(report.timestamp)))
|
||||
if report.modules:
|
||||
shown = report.modules[:_MODULES_SHOWN]
|
||||
more = len(report.modules) - len(shown)
|
||||
lines.append(f"\nLoaded modules ({len(report.modules)}):")
|
||||
lines += [f"- {m.name}" for m in shown if m.name]
|
||||
if more > 0:
|
||||
lines.append(f"- (+{more} more)")
|
||||
if report.comment:
|
||||
lines.append(f"\nDump comment:\n{report.comment[:1000]}")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def to_ai_text(report: MinidumpReport) -> str:
|
||||
"""The block sent to the model: Proton/Linux framing + summary + stackwalk."""
|
||||
framing = (
|
||||
"These findings come from a Windows crash minidump (.dmp) produced by a game running "
|
||||
"under Proton/Wine on Linux. The faulting modules are Windows DLLs inside the Proton "
|
||||
"prefix, so the crash is a Windows-process fault but the fixes are Linux/Proton-side "
|
||||
"(Proton version, DXVK/VKD3D, GPU driver, launch options, shader cache) — never Windows "
|
||||
"admin/registry steps."
|
||||
)
|
||||
parts = [framing, "", to_text(report)]
|
||||
if report.stackwalk:
|
||||
parts.append("\nminidump_stackwalk output:\n" + report.stackwalk)
|
||||
return "\n".join(parts)
|
||||
|
||||
|
||||
def to_findings(report: MinidumpReport) -> list[Finding]:
|
||||
"""Render the dump as Finding cards for the GUI (mirrors the health report)."""
|
||||
findings: list[Finding] = []
|
||||
detail_bits = []
|
||||
if report.faulting_module:
|
||||
detail_bits.append(f"in {report.faulting_module}")
|
||||
if report.exception_address is not None:
|
||||
detail_bits.append(f"at 0x{report.exception_address:X}")
|
||||
detail = (report.crash_reason or "Crash recorded")
|
||||
if detail_bits:
|
||||
detail += " " + " ".join(detail_bits) + "."
|
||||
findings.append(Finding(
|
||||
CRITICAL, "Crash dump",
|
||||
f"Crash in {report.faulting_module}" if report.faulting_module else "Crash recorded",
|
||||
detail,
|
||||
"Use “Explain with AI” for likely causes and Proton-side fixes.",
|
||||
))
|
||||
env_bits = [b for b in (report.os_name, report.cpu_arch and f"{report.cpu_arch} CPU") if b]
|
||||
if env_bits:
|
||||
findings.append(Finding(
|
||||
INFO, "Crash dump", "Dump environment", " · ".join(env_bits)))
|
||||
return findings
|
||||
@@ -0,0 +1,59 @@
|
||||
"""A pseudo-terminal running the host's shell (M12, Tier 3 — host side).
|
||||
|
||||
Spawns the user's login shell in a real PTY so interactive programs work over a shared
|
||||
session: vim, top, tab-completion, colours, Ctrl-C, and `sudo` (which prompts inside the
|
||||
PTY — the host types that password locally, so it's never sent to the guest). Runs as the
|
||||
host's own user — never elevated. Linux-only (uses `pty`/`termios`).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import fcntl
|
||||
import os
|
||||
import pty
|
||||
import signal
|
||||
import struct
|
||||
import termios
|
||||
|
||||
|
||||
class PtySession:
|
||||
def __init__(self, rows: int = 24, cols: int = 80):
|
||||
self.pid, self.master_fd = pty.fork()
|
||||
if self.pid == 0: # child: become the shell
|
||||
os.environ["TERM"] = "xterm-256color"
|
||||
shell = os.environ.get("SHELL", "/bin/bash")
|
||||
try:
|
||||
os.execvp(shell, [shell])
|
||||
finally:
|
||||
os._exit(1)
|
||||
os.set_blocking(self.master_fd, False)
|
||||
self.set_size(rows, cols)
|
||||
|
||||
def set_size(self, rows: int, cols: int) -> None:
|
||||
try:
|
||||
fcntl.ioctl(self.master_fd, termios.TIOCSWINSZ, struct.pack("HHHH", rows, cols, 0, 0))
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
def write(self, data: bytes) -> None:
|
||||
try:
|
||||
os.write(self.master_fd, data)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
def read(self, size: int = 65536) -> bytes:
|
||||
try:
|
||||
return os.read(self.master_fd, size)
|
||||
except (BlockingIOError, OSError):
|
||||
return b""
|
||||
|
||||
def close(self) -> None:
|
||||
try:
|
||||
os.close(self.master_fd)
|
||||
except OSError:
|
||||
pass
|
||||
try:
|
||||
os.kill(self.pid, signal.SIGHUP)
|
||||
os.waitpid(self.pid, os.WNOHANG)
|
||||
except (OSError, ChildProcessError, ProcessLookupError):
|
||||
pass
|
||||
@@ -38,7 +38,9 @@ def read_status() -> dict | None:
|
||||
return None
|
||||
|
||||
|
||||
def start_background(interval: float | None = None, out: str | None = None) -> int | None:
|
||||
def start_background(
|
||||
interval: float | None = None, out: str | None = None, game: str | None = None
|
||||
) -> int | None:
|
||||
"""Spawn a detached `record run`. Returns the child pid, or None if already running."""
|
||||
if running_pid():
|
||||
return None
|
||||
@@ -48,6 +50,8 @@ def start_background(interval: float | None = None, out: str | None = None) -> i
|
||||
cmd += ["--interval", str(interval)]
|
||||
if out:
|
||||
cmd += ["--out", out]
|
||||
if game:
|
||||
cmd += ["--game", game]
|
||||
out_fh = open(config.SPAWN_LOG, "a")
|
||||
proc = subprocess.Popen(
|
||||
cmd,
|
||||
|
||||
@@ -27,12 +27,14 @@ class Recorder:
|
||||
backups: int = 10,
|
||||
status_path=None,
|
||||
sampler: Sampler | None = None,
|
||||
game: str | None = None,
|
||||
) -> None:
|
||||
self.interval = interval
|
||||
self.sampler = sampler or Sampler(available_sources())
|
||||
self.writer = CrashLogWriter(log_path, max_bytes, backups)
|
||||
self.log_path = Path(log_path)
|
||||
self.status_path = Path(status_path) if status_path else None
|
||||
self.game = game or None
|
||||
self.samples = 0
|
||||
self._stop = threading.Event()
|
||||
self._gpu_lost = False
|
||||
@@ -43,6 +45,8 @@ class Recorder:
|
||||
|
||||
def run(self) -> None:
|
||||
self.writer.write_event("session-start", f"interval={self.interval:g}s")
|
||||
if self.game:
|
||||
self.writer.write_event("game", self.game) # tag the focused-diagnostic target
|
||||
self._write_status(running=True)
|
||||
try:
|
||||
while not self._stop.is_set():
|
||||
@@ -81,6 +85,7 @@ class Recorder:
|
||||
"samples": self.samples,
|
||||
"updated": time.time(),
|
||||
"gpu_lost": self._gpu_lost,
|
||||
"game": self.game,
|
||||
}
|
||||
if sample is not None:
|
||||
data["latest"] = headline(sample)
|
||||
|
||||
@@ -0,0 +1,118 @@
|
||||
"""`systemd --user` services for the crash logger + game watcher (M9 / D6 trigger modes).
|
||||
|
||||
Three trigger modes (D6): **manual** (no service — start/stop by hand), **always-on** (a user
|
||||
service samples continuously, bounded by log rotation), and **game-launch** (a watcher service
|
||||
auto-brackets a capture around each game). No root: everything is a `systemd --user` unit in
|
||||
``~/.config/systemd/user``. Degrades gracefully when systemd isn't available.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from .. import config
|
||||
|
||||
UNIT_DIR = Path(os.path.expanduser("~/.config/systemd/user"))
|
||||
RECORDER_UNIT = "rigdoctor-recorder.service"
|
||||
WATCH_UNIT = "rigdoctor-watch.service"
|
||||
MODES = ("manual", "always-on", "game-launch")
|
||||
|
||||
_UNITS = {
|
||||
RECORDER_UNIT: ("RigDoctor crash-capture recorder (always-on)", ["record", "run"]),
|
||||
WATCH_UNIT: ("RigDoctor game-launch watcher", ["watch"]),
|
||||
}
|
||||
|
||||
|
||||
def available() -> bool:
|
||||
return shutil.which("systemctl") is not None
|
||||
|
||||
|
||||
def _rigdoctor_bin() -> str:
|
||||
exe = Path(sys.executable).with_name("rigdoctor") # next to the venv python
|
||||
if exe.exists():
|
||||
return str(exe)
|
||||
return shutil.which("rigdoctor") or "rigdoctor"
|
||||
|
||||
|
||||
def _systemctl(*args: str) -> tuple[int, str]:
|
||||
try:
|
||||
proc = subprocess.run(["systemctl", "--user", *args],
|
||||
capture_output=True, text=True, timeout=20)
|
||||
return proc.returncode, (proc.stdout + proc.stderr).strip()
|
||||
except (OSError, subprocess.SubprocessError) as exc:
|
||||
return 1, str(exc)
|
||||
|
||||
|
||||
def unit_text(description: str, args: list[str]) -> str:
|
||||
exec_cmd = " ".join([_rigdoctor_bin(), *args])
|
||||
return (
|
||||
"[Unit]\n"
|
||||
f"Description={description}\n\n"
|
||||
"[Service]\n"
|
||||
"Type=simple\n"
|
||||
f"ExecStart={exec_cmd}\n"
|
||||
"Restart=on-failure\n"
|
||||
"RestartSec=5\n\n"
|
||||
"[Install]\n"
|
||||
"WantedBy=default.target\n"
|
||||
)
|
||||
|
||||
|
||||
def install_units() -> None:
|
||||
"""Write/refresh both unit files and reload systemd (idempotent)."""
|
||||
UNIT_DIR.mkdir(parents=True, exist_ok=True)
|
||||
for name, (desc, args) in _UNITS.items():
|
||||
(UNIT_DIR / name).write_text(unit_text(desc, args))
|
||||
_systemctl("daemon-reload")
|
||||
|
||||
|
||||
def is_active(name: str) -> bool:
|
||||
return _systemctl("is-active", name)[0] == 0
|
||||
|
||||
|
||||
def is_enabled(name: str) -> bool:
|
||||
return _systemctl("is-enabled", name)[0] == 0
|
||||
|
||||
|
||||
def _enable(name: str) -> tuple[int, str]:
|
||||
return _systemctl("enable", "--now", name)
|
||||
|
||||
|
||||
def _disable(name: str) -> tuple[int, str]:
|
||||
return _systemctl("disable", "--now", name)
|
||||
|
||||
|
||||
def apply_mode(mode: str) -> tuple[bool, str]:
|
||||
"""Reconcile the user services to `mode` and persist it. Returns (ok, message)."""
|
||||
if mode not in MODES:
|
||||
return False, f"Unknown trigger mode: {mode}"
|
||||
if not available():
|
||||
config.update_config(trigger_mode=mode)
|
||||
return False, "systemd --user isn't available — mode saved, but no service was changed."
|
||||
install_units()
|
||||
if mode == "always-on":
|
||||
_disable(WATCH_UNIT)
|
||||
rc, out = _enable(RECORDER_UNIT)
|
||||
elif mode == "game-launch":
|
||||
_disable(RECORDER_UNIT)
|
||||
rc, out = _enable(WATCH_UNIT)
|
||||
else: # manual
|
||||
_disable(RECORDER_UNIT)
|
||||
_disable(WATCH_UNIT)
|
||||
rc, out = 0, ""
|
||||
config.update_config(trigger_mode=mode)
|
||||
return rc == 0, out
|
||||
|
||||
|
||||
def status() -> dict:
|
||||
"""Current trigger mode (config) + live service states (best-effort)."""
|
||||
cfg = config.load_config()
|
||||
info = {"available": available(), "mode": cfg.get("trigger_mode", "manual")}
|
||||
if info["available"]:
|
||||
info["recorder_active"] = is_active(RECORDER_UNIT)
|
||||
info["watch_active"] = is_active(WATCH_UNIT)
|
||||
return info
|
||||
@@ -0,0 +1,404 @@
|
||||
"""Steam library & game detection (M6, the Steam piece of D12 game detection).
|
||||
|
||||
Discovers a user's Steam installs, the library folders they've configured (Steam tracks
|
||||
them all in ``libraryfolders.vdf``, so multiple libraries on multiple drives are covered),
|
||||
and the games installed in each (one ``appmanifest_<appid>.acf`` per app). Stdlib only —
|
||||
no Steam tooling required, every probe degrades gracefully.
|
||||
|
||||
The set of libraries actually scanned is user-chosen (config ``steam_libraries``); nothing
|
||||
is scanned until the user opts a library in. Scan results are cached in ``games.json`` so the
|
||||
GUI can show the list instantly and the launch-time background scan can diff against it to
|
||||
flag newly-installed games.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import time
|
||||
from dataclasses import asdict, dataclass
|
||||
from pathlib import Path
|
||||
|
||||
from ..config import GAMES_FILE, load_config
|
||||
|
||||
# Steam "apps" that aren't games: runtimes, Proton builds, redistributables. Filtered out of
|
||||
# scans by appid (known IDs) or by name prefix (covers future Proton/runtime versions).
|
||||
_TOOL_APPIDS = {
|
||||
"228980", # Steamworks Common Redistributables
|
||||
"1070560", # Steam Linux Runtime 1.0 (scout)
|
||||
"1391110", # Steam Linux Runtime 2.0 (soldier)
|
||||
"1628350", # Steam Linux Runtime 3.0 (sniper)
|
||||
"1493710", # Proton Experimental
|
||||
"2180100", # Proton Hotfix
|
||||
"1826330", # Proton EasyAntiCheat Runtime
|
||||
"1161040", # Proton BattlEye Runtime
|
||||
}
|
||||
_TOOL_NAME_PREFIXES = ("Proton", "Steam Linux Runtime", "Steamworks Common")
|
||||
|
||||
# Where Steam may be installed (native + Flatpak + Snap). Symlinks (~/.steam/steam) are
|
||||
# resolved and de-duplicated by real path.
|
||||
_ROOT_CANDIDATES = (
|
||||
"~/.steam/steam",
|
||||
"~/.steam/root",
|
||||
"~/.local/share/Steam",
|
||||
"~/.var/app/com.valvesoftware.Steam/data/Steam", # Flatpak
|
||||
"~/snap/steam/common/.local/share/Steam", # Snap
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class SteamLibrary:
|
||||
path: str # the library root (contains a steamapps/ dir)
|
||||
label: str = "" # Steam's label for the folder, if any
|
||||
|
||||
|
||||
@dataclass
|
||||
class Game:
|
||||
appid: str
|
||||
name: str
|
||||
library: str # library path the game lives in (Steam)
|
||||
installdir: str # folder name under <library>/steamapps/common
|
||||
size_bytes: int = 0
|
||||
last_updated: int = 0 # epoch seconds (acf LastUpdated), 0 if unknown
|
||||
launcher: str = "steam" # "steam" | "lutris" | "heroic"
|
||||
|
||||
|
||||
# --- VDF (Valve Data Format) parsing --------------------------------------------------
|
||||
# Minimal text-VDF reader: quoted "key" "value" pairs and "key" { ... } nesting. Enough
|
||||
# for libraryfolders.vdf and appmanifest_*.acf; ignores #base/#include and unquoted tokens.
|
||||
|
||||
def _parse_vdf(text: str) -> dict:
|
||||
pos = 0
|
||||
n = len(text)
|
||||
|
||||
def skip_ws() -> None:
|
||||
nonlocal pos
|
||||
while pos < n:
|
||||
c = text[pos]
|
||||
if c in " \t\r\n":
|
||||
pos += 1
|
||||
elif c == "/" and pos + 1 < n and text[pos + 1] == "/": # // line comment
|
||||
while pos < n and text[pos] != "\n":
|
||||
pos += 1
|
||||
else:
|
||||
break
|
||||
|
||||
def read_string() -> str:
|
||||
nonlocal pos
|
||||
pos += 1 # opening quote
|
||||
out = []
|
||||
while pos < n:
|
||||
c = text[pos]
|
||||
if c == "\\" and pos + 1 < n:
|
||||
nxt = text[pos + 1]
|
||||
out.append({"n": "\n", "t": "\t", "\\": "\\", '"': '"'}.get(nxt, nxt))
|
||||
pos += 2
|
||||
continue
|
||||
if c == '"':
|
||||
pos += 1
|
||||
break
|
||||
out.append(c)
|
||||
pos += 1
|
||||
return "".join(out)
|
||||
|
||||
def parse_obj() -> dict:
|
||||
nonlocal pos
|
||||
obj: dict = {}
|
||||
while True:
|
||||
skip_ws()
|
||||
if pos >= n or text[pos] == "}":
|
||||
pos += 1 # consume closing brace (or run off the end)
|
||||
return obj
|
||||
if text[pos] != '"': # skip unquoted/unsupported tokens defensively
|
||||
pos += 1
|
||||
continue
|
||||
key = read_string()
|
||||
skip_ws()
|
||||
if pos < n and text[pos] == "{":
|
||||
pos += 1
|
||||
obj[key] = parse_obj()
|
||||
elif pos < n and text[pos] == '"':
|
||||
obj[key] = read_string()
|
||||
else: # malformed; bail on this key
|
||||
obj[key] = ""
|
||||
return obj
|
||||
|
||||
skip_ws()
|
||||
if pos < n and text[pos] == '"':
|
||||
root_key = read_string()
|
||||
skip_ws()
|
||||
if pos < n and text[pos] == "{":
|
||||
pos += 1
|
||||
return {root_key: parse_obj()}
|
||||
return {}
|
||||
|
||||
|
||||
def _read_vdf(path: Path) -> dict:
|
||||
try:
|
||||
return _parse_vdf(path.read_text(encoding="utf-8", errors="replace"))
|
||||
except OSError:
|
||||
return {}
|
||||
|
||||
|
||||
# --- discovery ------------------------------------------------------------------------
|
||||
|
||||
def steam_roots() -> list[Path]:
|
||||
"""Existing Steam install roots, de-duplicated by resolved path."""
|
||||
seen: set[Path] = set()
|
||||
roots: list[Path] = []
|
||||
for cand in _ROOT_CANDIDATES:
|
||||
p = Path(os.path.expanduser(cand))
|
||||
if not p.exists():
|
||||
continue
|
||||
real = p.resolve()
|
||||
if real in seen:
|
||||
continue
|
||||
seen.add(real)
|
||||
roots.append(real)
|
||||
return roots
|
||||
|
||||
|
||||
def _libraryfolders_vdf(root: Path) -> Path | None:
|
||||
for rel in ("steamapps/libraryfolders.vdf", "config/libraryfolders.vdf"):
|
||||
p = root / rel
|
||||
if p.exists():
|
||||
return p
|
||||
return None
|
||||
|
||||
|
||||
def discover_libraries() -> list[SteamLibrary]:
|
||||
"""Every Steam library folder configured on this machine, de-duplicated by real path.
|
||||
|
||||
Reads each install's ``libraryfolders.vdf`` (which lists all drives/folders), and
|
||||
always includes the install root itself as a fallback.
|
||||
"""
|
||||
seen: set[Path] = set()
|
||||
libs: list[SteamLibrary] = []
|
||||
|
||||
def add(path: Path, label: str = "") -> None:
|
||||
if not (path / "steamapps").is_dir():
|
||||
return
|
||||
real = path.resolve()
|
||||
if real in seen:
|
||||
return
|
||||
seen.add(real)
|
||||
libs.append(SteamLibrary(path=str(real), label=label))
|
||||
|
||||
for root in steam_roots():
|
||||
vdf = _libraryfolders_vdf(root)
|
||||
folders = _read_vdf(vdf).get("libraryfolders", {}) if vdf else {}
|
||||
if isinstance(folders, dict):
|
||||
for entry in folders.values():
|
||||
if isinstance(entry, dict) and entry.get("path"):
|
||||
add(Path(entry["path"]), entry.get("label", ""))
|
||||
add(root) # the install root is itself a library
|
||||
return libs
|
||||
|
||||
|
||||
# --- game scanning --------------------------------------------------------------------
|
||||
|
||||
def is_tool(appid: str, name: str) -> bool:
|
||||
"""True for non-game Steam apps (runtimes, Proton, redistributables)."""
|
||||
if appid in _TOOL_APPIDS:
|
||||
return True
|
||||
return name.startswith(_TOOL_NAME_PREFIXES)
|
||||
|
||||
|
||||
def scan_library(library: str) -> list[Game]:
|
||||
"""Games installed in one library, parsed from its appmanifest_*.acf files."""
|
||||
steamapps = Path(library) / "steamapps"
|
||||
games: list[Game] = []
|
||||
try:
|
||||
manifests = sorted(steamapps.glob("appmanifest_*.acf"))
|
||||
except OSError:
|
||||
return games
|
||||
for manifest in manifests:
|
||||
state = _read_vdf(manifest).get("AppState", {})
|
||||
if not isinstance(state, dict):
|
||||
continue
|
||||
# Steam treats VDF keys case-insensitively (e.g. "SizeOnDisk" but "lastupdated").
|
||||
state = {k.lower(): v for k, v in state.items()}
|
||||
appid = state.get("appid", "")
|
||||
name = state.get("name", "").strip()
|
||||
if not appid or not name or is_tool(appid, name):
|
||||
continue
|
||||
games.append(Game(
|
||||
appid=appid,
|
||||
name=name,
|
||||
library=str(library),
|
||||
installdir=state.get("installdir", ""),
|
||||
size_bytes=_int(state.get("sizeondisk")),
|
||||
last_updated=_int(state.get("lastupdated")),
|
||||
))
|
||||
return games
|
||||
|
||||
|
||||
def scan_games(libraries: list[str]) -> list[Game]:
|
||||
"""All games across the given libraries, de-duplicated by appid, sorted by name."""
|
||||
by_appid: dict[str, Game] = {}
|
||||
for lib in libraries:
|
||||
for game in scan_library(lib):
|
||||
by_appid.setdefault(game.appid, game) # first library wins on duplicates
|
||||
return sorted(by_appid.values(), key=lambda g: g.name.lower())
|
||||
|
||||
|
||||
def _int(value) -> int:
|
||||
try:
|
||||
return int(value)
|
||||
except (TypeError, ValueError):
|
||||
return 0
|
||||
|
||||
|
||||
def proton_versions() -> list[str]:
|
||||
"""Installed Proton compatibility-tool versions across all discovered libraries.
|
||||
|
||||
Proton builds are the appmanifests we filter out of game scans; here we surface them
|
||||
for the M6 environment report. Returns unique names, newest-looking last.
|
||||
"""
|
||||
names: set[str] = set()
|
||||
for lib in discover_libraries():
|
||||
try:
|
||||
manifests = sorted((Path(lib.path) / "steamapps").glob("appmanifest_*.acf"))
|
||||
except OSError:
|
||||
continue
|
||||
for manifest in manifests:
|
||||
state = _read_vdf(manifest).get("AppState", {})
|
||||
if isinstance(state, dict):
|
||||
state = {k.lower(): v for k, v in state.items()}
|
||||
name = state.get("name", "").strip()
|
||||
if name.startswith("Proton"):
|
||||
names.add(name)
|
||||
return sorted(names)
|
||||
|
||||
|
||||
# --- config-driven selection ----------------------------------------------------------
|
||||
|
||||
def selected_library_paths(cfg: dict | None = None) -> list[str]:
|
||||
"""Library paths the user has opted in to scanning (config ``steam_libraries``)."""
|
||||
cfg = cfg or load_config()
|
||||
paths = cfg.get("steam_libraries") or []
|
||||
return [str(p) for p in paths]
|
||||
|
||||
|
||||
# --- scan cache + new-game detection --------------------------------------------------
|
||||
|
||||
@dataclass
|
||||
class ScanResult:
|
||||
games: list[Game]
|
||||
new_appids: list[str] # newly-installed since the last scan (badge fuel)
|
||||
scanned_at: float
|
||||
|
||||
|
||||
def load_cache() -> dict | None:
|
||||
try:
|
||||
return json.loads(GAMES_FILE.read_text())
|
||||
except (OSError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
def _save_cache(games: list[Game], known: set[str], new: list[str], when: float) -> None:
|
||||
GAMES_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||||
data = {
|
||||
"scanned_at": when,
|
||||
"known_appids": sorted(known),
|
||||
"new_appids": new,
|
||||
"games": [asdict(g) for g in games],
|
||||
}
|
||||
GAMES_FILE.write_text(json.dumps(data, indent=2, ensure_ascii=False))
|
||||
|
||||
|
||||
def cached_games() -> list[Game]:
|
||||
"""Games from the last scan (for instant display before a rescan finishes)."""
|
||||
cache = load_cache()
|
||||
if not cache:
|
||||
return []
|
||||
# Only pass keys present in the record so dataclass defaults fill any new fields.
|
||||
return [Game(**{k: g[k] for k in Game.__dataclass_fields__ if k in g}) for g in cache.get("games", [])]
|
||||
|
||||
|
||||
def appid_names() -> dict[str, str]:
|
||||
"""{appid: name} for the user's scanned games — lets us resolve IDs seen in logs (M14)."""
|
||||
return {g.appid: g.name for g in cached_games() if g.appid and g.name}
|
||||
|
||||
|
||||
def rescan(cfg: dict | None = None) -> ScanResult:
|
||||
"""Scan the selected libraries, diff against the cache, and persist the result.
|
||||
|
||||
Newly-installed games (appids never seen before) are reported in ``new_appids``. The
|
||||
very first scan reports nothing as new (so the whole library isn't flagged at once);
|
||||
unacknowledged new games carry forward until they're acknowledged or uninstalled.
|
||||
"""
|
||||
games = scan_games(selected_library_paths(cfg))
|
||||
current = {g.appid for g in games}
|
||||
prev = load_cache()
|
||||
if prev is None:
|
||||
known: set[str] = set(current) # first run: everything is "known", nothing new
|
||||
new = []
|
||||
else:
|
||||
known = set(prev.get("known_appids", []))
|
||||
carried = set(prev.get("new_appids", [])) & current # still-unacknowledged & installed
|
||||
new = sorted((current - known) | carried)
|
||||
known |= current
|
||||
when = time.time()
|
||||
_save_cache(games, known, new, when)
|
||||
return ScanResult(games=games, new_appids=new, scanned_at=when)
|
||||
|
||||
|
||||
def acknowledge_new() -> None:
|
||||
"""Clear the new-game badge (called when the user views the games list)."""
|
||||
cache = load_cache()
|
||||
if not cache or not cache.get("new_appids"):
|
||||
return
|
||||
cache["new_appids"] = []
|
||||
try:
|
||||
GAMES_FILE.write_text(json.dumps(cache, indent=2, ensure_ascii=False))
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
# --- formatting -----------------------------------------------------------------------
|
||||
|
||||
def client_version() -> str | None:
|
||||
"""The installed Steam package version (apt), or None — best-effort, offline."""
|
||||
if shutil.which("dpkg-query") is None:
|
||||
return None
|
||||
for pkg in ("steam-installer", "steam-launcher", "steam"):
|
||||
try:
|
||||
proc = subprocess.run(["dpkg-query", "-W", "-f=${Version}", pkg],
|
||||
capture_output=True, text=True, timeout=10)
|
||||
except (subprocess.SubprocessError, OSError):
|
||||
continue
|
||||
if proc.returncode == 0 and proc.stdout.strip():
|
||||
return proc.stdout.strip()
|
||||
return None
|
||||
|
||||
|
||||
def launch_game(appid: str) -> bool:
|
||||
"""Best-effort: ask Steam to launch a game by appid (steam:// URL). Non-blocking."""
|
||||
if not appid:
|
||||
return False
|
||||
url = f"steam://rungameid/{appid}"
|
||||
for cmd in (["steam", url], ["xdg-open", url]):
|
||||
if shutil.which(cmd[0]):
|
||||
try:
|
||||
subprocess.Popen(
|
||||
cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
|
||||
stdin=subprocess.DEVNULL, start_new_session=True,
|
||||
)
|
||||
return True
|
||||
except (OSError, subprocess.SubprocessError):
|
||||
continue
|
||||
return False
|
||||
|
||||
|
||||
def human_size(num_bytes: int) -> str:
|
||||
if num_bytes <= 0:
|
||||
return "—"
|
||||
size = float(num_bytes)
|
||||
for unit in ("B", "KB", "MB", "GB", "TB"):
|
||||
if size < 1024 or unit == "TB":
|
||||
return f"{size:.0f} {unit}" if unit in ("B", "KB") else f"{size:.1f} {unit}"
|
||||
size /= 1024
|
||||
return f"{size:.1f} TB"
|
||||
@@ -0,0 +1,322 @@
|
||||
"""GPU stress + close thermal monitoring — the repro tool for load-correlated crashes.
|
||||
|
||||
Run a GPU load and sample sensors at a high rate, then report peak/sustained temperatures,
|
||||
how long the GPU spent above each temperature threshold, power headroom vs the limit, whether
|
||||
it throttled, and any GPU fault (Xid / VA-space / a query timeout) that hit during the window.
|
||||
This is the on-demand way to reproduce the "only under load / only certain games" freezes
|
||||
instead of waiting for a game to trigger them.
|
||||
|
||||
The load comes from, in order: an explicit ``command`` (your game, or a loader like gpu-burn),
|
||||
an auto-detected loader on PATH (gpu-burn / vkmark / glmark2 / vkcube), or **monitor-only** when
|
||||
none is found — then you generate the load yourself (launch the game) while this closely tracks
|
||||
temps for the duration.
|
||||
|
||||
Stdlib only. Degrades gracefully: no nvidia-smi → no GPU stats; a loader that won't start →
|
||||
monitor-only with a note; missing journal access → no fault scan, just the telemetry.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import shutil
|
||||
import subprocess
|
||||
import time
|
||||
from collections.abc import Callable
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
from . import health
|
||||
from .sample import Sample
|
||||
from .sampler import Sampler
|
||||
from .sources import available_sources
|
||||
|
||||
# Default temperature dwell thresholds (°C). 83 is Ampere's typical thermal-throttle point;
|
||||
# 90+ is hot; sustained 95+ on the core (or 100+ on GDDR6 memory) is a cooling problem.
|
||||
DEFAULT_THRESHOLDS = (80, 85, 90, 95)
|
||||
|
||||
# Known GPU load generators, best (heaviest / most deterministic) first. argv builder takes the
|
||||
# remaining duration so a self-terminating loader (gpu-burn) bounds itself; the windowed
|
||||
# benchmarks loop until we kill them. None are required — detection is best-effort.
|
||||
_LOADERS: list[tuple[str, Callable[[float], list[str]]]] = [
|
||||
("gpu-burn", lambda secs: ["gpu-burn", str(max(1, int(secs)))]),
|
||||
("vkmark", lambda _s: ["vkmark", "--run-forever"]),
|
||||
("glmark2", lambda _s: ["glmark2", "--run-forever"]),
|
||||
("vkcube", lambda _s: ["vkcube"]),
|
||||
]
|
||||
|
||||
# NVML clocks-event bits that mean the clocks are being *held back* (a throttle), decoded from
|
||||
# the active-reasons bitmask so we don't depend on per-field name differences across drivers.
|
||||
_THROTTLE_BITS = {
|
||||
0x008: "HW slowdown",
|
||||
0x020: "SW thermal slowdown",
|
||||
0x040: "HW thermal slowdown",
|
||||
0x080: "HW power-brake slowdown",
|
||||
}
|
||||
_POWERCAP_BIT = 0x004 # hitting the power limit — expected under load, reported separately
|
||||
|
||||
|
||||
@dataclass
|
||||
class MetricStat:
|
||||
key: str # e.g. "gpu.temp", "gpu.power", "gpu.clock.core"
|
||||
label: str # human label for the report
|
||||
unit: str
|
||||
min: float
|
||||
avg: float
|
||||
max: float
|
||||
samples: int
|
||||
|
||||
|
||||
@dataclass
|
||||
class _Tick:
|
||||
dt: float # seconds this tick represents (for dwell-time weighting)
|
||||
values: dict[str, float] # reading key -> value across all sources (Nones dropped)
|
||||
throttle: list[str] # active throttle reasons this tick
|
||||
power_capped: bool
|
||||
lost: bool # query timeout / no GPU response this tick
|
||||
|
||||
|
||||
@dataclass
|
||||
class StressResult:
|
||||
load: str # "command: …" | "auto: gpu-burn" | "monitor-only"
|
||||
duration: float # seconds actually monitored
|
||||
samples: int
|
||||
interval: float
|
||||
stats: list[MetricStat] = field(default_factory=list)
|
||||
peak_temp: float | None = None
|
||||
peak_mem_temp: float | None = None
|
||||
avg_temp: float | None = None
|
||||
time_above: dict[int, float] = field(default_factory=dict) # threshold °C -> seconds at/above
|
||||
max_power: float | None = None
|
||||
power_limit: float | None = None
|
||||
power_capped: bool = False
|
||||
throttled: bool = False
|
||||
throttle_reasons: list[str] = field(default_factory=list)
|
||||
gpu_lost: bool = False
|
||||
faults: list[str] = field(default_factory=list) # Xid/VA-space titles in the window
|
||||
aborted: bool = False # Ctrl-C or the load exited early
|
||||
severity: str = health.OK
|
||||
verdict: str = ""
|
||||
|
||||
|
||||
# --- load resolution ------------------------------------------------------------------
|
||||
|
||||
def available_loaders() -> list[str]:
|
||||
"""Known GPU load tools found on PATH (heaviest first)."""
|
||||
return [name for name, _ in _LOADERS if shutil.which(name)]
|
||||
|
||||
|
||||
def _start_load(command: list[str] | None, duration: float) -> tuple[subprocess.Popen | None, str]:
|
||||
"""Start the load process and return (proc, description). proc is None for monitor-only."""
|
||||
if command:
|
||||
try:
|
||||
proc = subprocess.Popen(command, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
||||
return proc, "command: " + " ".join(command)
|
||||
except (OSError, ValueError) as exc:
|
||||
return None, f"monitor-only (command failed to start: {exc})"
|
||||
for name, build in _LOADERS:
|
||||
if shutil.which(name):
|
||||
try:
|
||||
proc = subprocess.Popen(build(duration), stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
||||
return proc, f"auto: {name}"
|
||||
except (OSError, ValueError):
|
||||
continue
|
||||
return None, "monitor-only"
|
||||
|
||||
|
||||
def _stop_load(proc: subprocess.Popen | None) -> None:
|
||||
if proc is None or proc.poll() is not None:
|
||||
return
|
||||
proc.terminate()
|
||||
try:
|
||||
proc.wait(timeout=5)
|
||||
except subprocess.TimeoutExpired:
|
||||
proc.kill()
|
||||
|
||||
|
||||
# --- throttle / fault probes ----------------------------------------------------------
|
||||
|
||||
def _throttle_state() -> tuple[list[str], bool]:
|
||||
"""(active throttle reasons, power-capped) decoded from the clocks-event bitmask."""
|
||||
if shutil.which("nvidia-smi") is None:
|
||||
return [], False
|
||||
raw = ""
|
||||
for field_name in ("clocks_event_reasons.active", "clocks_throttle_reasons.active"):
|
||||
try:
|
||||
proc = subprocess.run(
|
||||
["nvidia-smi", f"--query-gpu={field_name}", "--format=csv,noheader"],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
)
|
||||
except (subprocess.SubprocessError, OSError):
|
||||
continue
|
||||
raw = proc.stdout.strip().splitlines()[0].strip() if proc.stdout.strip() else ""
|
||||
if raw and raw.lower() not in ("n/a", "not supported", "[n/a]"):
|
||||
break
|
||||
try:
|
||||
bits = int(raw, 16)
|
||||
except ValueError:
|
||||
return [], False
|
||||
reasons = [name for bit, name in _THROTTLE_BITS.items() if bits & bit]
|
||||
return reasons, bool(bits & _POWERCAP_BIT)
|
||||
|
||||
|
||||
def _faults_since(start_ts: float) -> list[str]:
|
||||
"""Titles of GPU/PCIe/hardware faults logged to the kernel journal since the run began."""
|
||||
out = health._journalctl(["-k", "--no-pager", "-o", "cat", "--since", f"@{int(start_ts)}"])
|
||||
if not out:
|
||||
return []
|
||||
return [f.title for f in health.scan_journal_text(out)
|
||||
if f.category in ("GPU", "PCIe", "Hardware", "Kernel")]
|
||||
|
||||
|
||||
def _tick_values(sample: Sample) -> tuple[dict[str, float], bool]:
|
||||
"""Reading key -> value across all sources (Nones dropped), plus whether the GPU
|
||||
failed to respond (an nvidia-smi query timeout — a hang/lost signal)."""
|
||||
values: dict[str, float] = {}
|
||||
lost = False
|
||||
for r in sample.readings:
|
||||
if r.source == "gpu" and r.metric == "status" and r.label == "query-timeout":
|
||||
lost = True
|
||||
if r.value is not None:
|
||||
values[r.key] = r.value
|
||||
return values, lost
|
||||
|
||||
|
||||
# --- pure analysis (unit-testable, no IO) ---------------------------------------------
|
||||
|
||||
_REPORT_KEYS = {
|
||||
"gpu.temp": ("GPU core temp", "°C"),
|
||||
"gpu.temp.memory": ("GPU memory temp", "°C"),
|
||||
"gpu.power": ("GPU power", "W"),
|
||||
"gpu.util": ("GPU utilization", "%"),
|
||||
"gpu.mem_util": ("VRAM controller util", "%"),
|
||||
"gpu.clock.core": ("Core clock", "MHz"),
|
||||
"gpu.clock.memory": ("Memory clock", "MHz"),
|
||||
"gpu.fan": ("Fan", "%"),
|
||||
"gpu.mem_used": ("VRAM used", "MiB"),
|
||||
"cpu.temp": ("CPU temp", "°C"),
|
||||
}
|
||||
|
||||
|
||||
def summarize(ticks: list[_Tick], *, load: str, interval: float, faults: list[str],
|
||||
thresholds=DEFAULT_THRESHOLDS) -> StressResult:
|
||||
"""Build a StressResult from collected ticks — pure, so it's tested with synthetic input."""
|
||||
duration = sum(t.dt for t in ticks)
|
||||
result = StressResult(load=load, duration=round(duration, 1), samples=len(ticks),
|
||||
interval=interval, faults=faults)
|
||||
|
||||
series: dict[str, list[float]] = {}
|
||||
throttle_seen: set[str] = set()
|
||||
time_above = {th: 0.0 for th in thresholds}
|
||||
for t in ticks:
|
||||
for key, value in t.values.items():
|
||||
series.setdefault(key, []).append(value)
|
||||
throttle_seen.update(t.throttle)
|
||||
if t.power_capped:
|
||||
result.power_capped = True
|
||||
if t.lost:
|
||||
result.gpu_lost = True
|
||||
core = t.values.get("gpu.temp")
|
||||
if core is not None:
|
||||
for th in thresholds:
|
||||
if core >= th:
|
||||
time_above[th] += t.dt
|
||||
|
||||
for key, (label, unit) in _REPORT_KEYS.items():
|
||||
vals = series.get(key)
|
||||
if not vals:
|
||||
continue
|
||||
stat = MetricStat(key, label, unit, round(min(vals), 1),
|
||||
round(sum(vals) / len(vals), 1), round(max(vals), 1), len(vals))
|
||||
result.stats.append(stat)
|
||||
if key == "gpu.temp":
|
||||
result.peak_temp, result.avg_temp = stat.max, stat.avg
|
||||
elif key == "gpu.temp.memory":
|
||||
result.peak_mem_temp = stat.max
|
||||
elif key == "gpu.power":
|
||||
result.max_power = stat.max
|
||||
|
||||
# power_limit isn't a reported metric (it's ~constant); pull it from the raw series.
|
||||
if "gpu.power_limit" in series:
|
||||
result.power_limit = max(series["gpu.power_limit"])
|
||||
|
||||
result.throttle_reasons = sorted(throttle_seen)
|
||||
result.throttled = bool(throttle_seen)
|
||||
result.time_above = {th: round(secs, 1) for th, secs in time_above.items() if secs > 0}
|
||||
|
||||
_verdict(result)
|
||||
return result
|
||||
|
||||
|
||||
def _verdict(r: StressResult) -> None:
|
||||
"""Set severity + a plain-language conclusion from the gathered signals."""
|
||||
peak = f"{r.peak_temp:.0f}°C" if r.peak_temp is not None else "?"
|
||||
if r.gpu_lost or any(t for t in r.faults):
|
||||
r.severity = health.CRITICAL
|
||||
cause = "; ".join(r.faults) if r.faults else "the GPU stopped responding (query timeout)"
|
||||
r.verdict = (f"GPU fault during the stress run: {cause}. This reproduces the crash under "
|
||||
f"load — capture/keep these logs. Peak core temp {peak}.")
|
||||
return
|
||||
if r.throttled:
|
||||
r.severity = health.WARNING
|
||||
r.verdict = (f"Thermal/HW throttling detected ({', '.join(r.throttle_reasons)}) — the GPU "
|
||||
f"held clocks back to stay safe. Peak core temp {peak}. Improve cooling/airflow.")
|
||||
return
|
||||
if r.peak_temp is not None and r.peak_temp >= 90:
|
||||
r.severity = health.WARNING
|
||||
r.verdict = (f"No fault, but the core peaked at {peak} — hot. Watch GDDR6/VRM cooling; "
|
||||
"sustained high temps shorten the card's life and precede instability.")
|
||||
return
|
||||
if r.peak_temp is None:
|
||||
r.severity = health.INFO
|
||||
r.verdict = "No GPU telemetry was captured (nvidia-smi unavailable?)."
|
||||
return
|
||||
capped = " (power-limited — hitting the cap, which is normal)" if r.power_capped else ""
|
||||
r.verdict = f"Stable: peaked at {peak} with no faults or throttling{capped}."
|
||||
|
||||
|
||||
# --- the run loop (IO) ----------------------------------------------------------------
|
||||
|
||||
def run(duration: float = 120.0, interval: float = 0.5, command: list[str] | None = None,
|
||||
thresholds=DEFAULT_THRESHOLDS, on_tick: Callable[[Sample, float], None] | None = None,
|
||||
should_stop: Callable[[], bool] | None = None) -> StressResult:
|
||||
"""Drive a GPU load for ``duration`` seconds, sampling every ``interval``, and report.
|
||||
|
||||
Stops early on Ctrl-C, if a GPU query times out (likely hang), if the load process exits, or
|
||||
when ``should_stop()`` returns True (the GUI's Stop button). ``on_tick(sample, elapsed)`` is
|
||||
called each tick for live display.
|
||||
"""
|
||||
sampler = Sampler(available_sources())
|
||||
proc, load_desc = _start_load(command, duration)
|
||||
start = time.monotonic()
|
||||
start_ts = time.time()
|
||||
ticks: list[_Tick] = []
|
||||
last = start
|
||||
aborted = False
|
||||
try:
|
||||
while True:
|
||||
sample = sampler.sample()
|
||||
now = time.monotonic()
|
||||
dt = now - last
|
||||
last = now
|
||||
values, lost = _tick_values(sample)
|
||||
reasons, capped = _throttle_state()
|
||||
ticks.append(_Tick(dt=dt, values=values, throttle=reasons, power_capped=capped, lost=lost))
|
||||
if on_tick is not None:
|
||||
on_tick(sample, now - start)
|
||||
if lost: # GPU stopped responding — stop now, it may be hung/lost
|
||||
break
|
||||
if should_stop is not None and should_stop(): # GUI Stop button
|
||||
aborted = True
|
||||
break
|
||||
if proc is not None and proc.poll() is not None: # the load finished/exited
|
||||
break
|
||||
if (now - start) >= duration:
|
||||
break
|
||||
time.sleep(max(0.0, interval - (time.monotonic() - now)))
|
||||
except KeyboardInterrupt:
|
||||
aborted = True
|
||||
finally:
|
||||
_stop_load(proc)
|
||||
|
||||
faults = _faults_since(start_ts)
|
||||
result = summarize(ticks, load=load_desc, interval=interval, faults=faults, thresholds=thresholds)
|
||||
result.aborted = aborted or (proc is not None and command is not None and result.duration < duration - interval)
|
||||
return result
|
||||
@@ -0,0 +1,165 @@
|
||||
"""Session-scoped system logs for diagnostics (M15): kernel, coredumps, NVIDIA, display.
|
||||
|
||||
Covers what the *system* logged when something went wrong, so the report bundle and the AI both
|
||||
see it:
|
||||
* kernel ring-buffer slice (`journalctl -k`) — Xid, OOM-killer, MCE, PCIe AER, thermal, hung tasks
|
||||
* systemd-coredump records (`coredumpctl`) — did the game/wine dump core (SIGSEGV/ABRT), when
|
||||
* an `nvidia-smi -q` snapshot — driver, throttle/clock-event reasons, clocks, power, temps, PCIe,
|
||||
ECC + retired pages (point-in-time at diagnostic time)
|
||||
* the display-server log — `Xorg.0.log` on X11, or the compositor's user-journal slice on Wayland
|
||||
Best-effort and size-bounded: degrades silently if a tool is missing or access is denied. Stdlib only.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
_MAX = 8000 # cap each log section so the prompt/report stays small
|
||||
_NV_MAX = 10000 # nvidia-smi -q is structured + valuable; allow a bit more (head-truncated)
|
||||
|
||||
# Compositors whose user-journal entries are the "Wayland log" (OR-matched by journalctl).
|
||||
_COMPOSITORS = ("gnome-shell", "mutter", "kwin_wayland", "Xwayland", "sway", "gamescope")
|
||||
_XORG_LOGS = ("~/.local/share/xorg/Xorg.0.log", "/var/log/Xorg.0.log")
|
||||
|
||||
|
||||
def _since_arg(since: float | None) -> str | None:
|
||||
return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(since)) if since else None
|
||||
|
||||
|
||||
def _run(cmd: list[str], timeout: float = 15.0) -> str:
|
||||
try:
|
||||
proc = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout)
|
||||
except (OSError, subprocess.SubprocessError):
|
||||
return ""
|
||||
return (proc.stdout or "").strip()
|
||||
|
||||
|
||||
def kernel_log(since: float | None = None, max_bytes: int = _MAX) -> str:
|
||||
if not shutil.which("journalctl"):
|
||||
return ""
|
||||
cmd = ["journalctl", "-k", "--no-pager"]
|
||||
since_arg = _since_arg(since)
|
||||
if since_arg:
|
||||
cmd += ["--since", since_arg]
|
||||
out = _run(cmd)
|
||||
if not out or out.strip().lower() == "-- no entries --": # journalctl's empty marker
|
||||
return ""
|
||||
return out[-max_bytes:]
|
||||
|
||||
|
||||
def coredumps(since: float | None = None, max_bytes: int = _MAX) -> str:
|
||||
if not shutil.which("coredumpctl"):
|
||||
return ""
|
||||
cmd = ["coredumpctl", "list", "--no-pager"]
|
||||
since_arg = _since_arg(since)
|
||||
if since_arg:
|
||||
cmd += ["--since", since_arg]
|
||||
out = _run(cmd)
|
||||
if not out or "no coredumps" in out.lower():
|
||||
return ""
|
||||
return out[-max_bytes:]
|
||||
|
||||
|
||||
def nvidia_snapshot(max_bytes: int = _NV_MAX) -> str:
|
||||
"""Point-in-time `nvidia-smi -q` (head-truncated — driver/temps/clocks/ECC sit near the top)."""
|
||||
if not shutil.which("nvidia-smi"):
|
||||
return ""
|
||||
out = _run(["nvidia-smi", "-q"])
|
||||
return out[:max_bytes] if out else ""
|
||||
|
||||
|
||||
def _xorg_log() -> Path | None:
|
||||
for cand in _XORG_LOGS:
|
||||
path = Path(os.path.expanduser(cand))
|
||||
if path.exists():
|
||||
return path
|
||||
return None
|
||||
|
||||
|
||||
def _session_type() -> str:
|
||||
declared = os.environ.get("XDG_SESSION_TYPE", "").lower()
|
||||
if declared in ("x11", "wayland"):
|
||||
return declared
|
||||
if os.environ.get("WAYLAND_DISPLAY"):
|
||||
return "wayland"
|
||||
return "x11" if _xorg_log() else "unknown"
|
||||
|
||||
|
||||
def _tail_file(path: Path, max_bytes: int) -> str:
|
||||
try:
|
||||
size = path.stat().st_size
|
||||
with path.open("rb") as fh:
|
||||
if size > max_bytes:
|
||||
fh.seek(size - max_bytes)
|
||||
return fh.read().decode("utf-8", "replace")
|
||||
except OSError:
|
||||
return ""
|
||||
|
||||
|
||||
def display_log(since: float | None = None, max_bytes: int = _MAX) -> str:
|
||||
"""Xorg.0.log on X11, or the compositor's user-journal slice on Wayland ('' if none)."""
|
||||
if _session_type() == "wayland":
|
||||
if not shutil.which("journalctl"):
|
||||
return ""
|
||||
cmd = ["journalctl", "--user", "--no-pager"]
|
||||
since_arg = _since_arg(since)
|
||||
if since_arg:
|
||||
cmd += ["--since", since_arg]
|
||||
cmd += [f"_COMM={comp}" for comp in _COMPOSITORS] # OR-matched
|
||||
out = _run(cmd)
|
||||
if not out or out.strip().lower() == "-- no entries --":
|
||||
return ""
|
||||
return out[-max_bytes:]
|
||||
log = _xorg_log() # X11: Xorg log isn't wall-clock-timestamped, so tail rather than scope
|
||||
return _tail_file(log, max_bytes) if log else ""
|
||||
|
||||
|
||||
# Kernel-log patterns worth alerting on in real time (M8 event alerts). (label, regex).
|
||||
_CRITICAL = [
|
||||
("GPU error (Xid)", re.compile(r"NVRM:\s*Xid", re.I)),
|
||||
("Out of memory", re.compile(r"out of memory|oom-kill|killed process \d+", re.I)),
|
||||
("CPU machine-check", re.compile(r"\bmce:|machine check", re.I)),
|
||||
("PCIe error", re.compile(r"\bAER:|pcie bus error", re.I)),
|
||||
("Disk I/O error", re.compile(
|
||||
r"buffer i/o error|\bi/o error\b|critical medium error|ext4-fs error|"
|
||||
r"blk_update_request:.*error|ata\d+.*(?:failed|error)", re.I)),
|
||||
]
|
||||
|
||||
|
||||
def scan_critical(text: str) -> list[tuple[str, str]]:
|
||||
"""(label, line) for kernel lines matching a critical pattern (first match per line)."""
|
||||
events: list[tuple[str, str]] = []
|
||||
for line in text.splitlines():
|
||||
for label, pat in _CRITICAL:
|
||||
if pat.search(line):
|
||||
events.append((label, line.strip()))
|
||||
break
|
||||
return events
|
||||
|
||||
|
||||
def available() -> bool:
|
||||
return bool(shutil.which("journalctl") or shutil.which("coredumpctl")
|
||||
or shutil.which("nvidia-smi") or _xorg_log())
|
||||
|
||||
|
||||
def collect(since: float | None = None) -> str:
|
||||
"""Kernel + coredumps + NVIDIA snapshot + display log as one labelled block ('' if none)."""
|
||||
sections: list[str] = []
|
||||
kern = kernel_log(since)
|
||||
if kern:
|
||||
sections.append(f"--- Kernel log (journalctl -k) ---\n{kern}")
|
||||
cores = coredumps(since)
|
||||
if cores:
|
||||
sections.append(f"--- Crashed processes (coredumpctl) ---\n{cores}")
|
||||
nvidia = nvidia_snapshot()
|
||||
if nvidia:
|
||||
sections.append(f"--- NVIDIA snapshot (nvidia-smi -q) ---\n{nvidia}")
|
||||
display = display_log(since)
|
||||
if display:
|
||||
sections.append(f"--- Display server log ({_session_type()}) ---\n{display}")
|
||||
return "\n\n".join(sections)
|
||||
@@ -23,6 +23,7 @@ def targets(purge: bool = False) -> list[Path]:
|
||||
home / ".local" / "bin" / "rigdoctor",
|
||||
home / ".local" / "bin" / "rigdoctor-gui",
|
||||
share / "applications" / "rigdoctor.desktop",
|
||||
share / "icons" / "hicolor" / "scalable" / "apps" / "rigdoctor.svg",
|
||||
]
|
||||
if purge:
|
||||
items += [config.CONFIG_DIR, config.STATE_DIR, config.DATA_DIR]
|
||||
|
||||
@@ -8,11 +8,14 @@ state for the UI; `apply_update` performs the no-root self-update.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import functools
|
||||
import json
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
from pathlib import Path
|
||||
|
||||
from .. import __version__
|
||||
from ..config import load_token
|
||||
@@ -31,6 +34,50 @@ UP_TO_DATE = "up-to-date"
|
||||
AVAILABLE = "available"
|
||||
|
||||
|
||||
APT_PACKAGE = "rigdoctor"
|
||||
|
||||
|
||||
def _dpkg_owns(path: Path) -> bool:
|
||||
"""True if dpkg reports `path` belongs to a package (i.e. an apt/.deb install)."""
|
||||
if not shutil.which("dpkg"):
|
||||
return False
|
||||
try:
|
||||
r = subprocess.run(["dpkg", "-S", str(path)], capture_output=True, text=True, timeout=5)
|
||||
except (subprocess.SubprocessError, OSError):
|
||||
return False
|
||||
return r.returncode == 0 and APT_PACKAGE in r.stdout
|
||||
|
||||
|
||||
@functools.lru_cache(maxsize=1)
|
||||
def install_kind() -> str:
|
||||
"""How RigDoctor was installed: 'apt' (.deb), 'pip' (venv/.run), or 'dev' (source checkout).
|
||||
|
||||
Decides which updater to use: only 'pip' can self-update in place; apt is root/dpkg-managed
|
||||
and source is VCS-managed, so those are guided rather than auto-applied.
|
||||
"""
|
||||
pkg = Path(__file__).resolve().parents[1] # .../rigdoctor
|
||||
if _dpkg_owns(pkg / "__init__.py"):
|
||||
return "apt"
|
||||
if sys.prefix != sys.base_prefix: # inside a venv → the pip/.run install
|
||||
return "pip"
|
||||
if (pkg.parents[1] / "pyproject.toml").exists(): # repo checkout
|
||||
return "dev"
|
||||
if str(pkg).startswith("/usr/") or "/dist-packages/" in str(pkg):
|
||||
return "apt" # system-managed but no dpkg record — still don't pip
|
||||
return "pip"
|
||||
|
||||
|
||||
def update_hint(kind: str | None = None) -> str:
|
||||
"""Human guidance for installs that can't self-update via pip (apt / source)."""
|
||||
kind = kind or install_kind()
|
||||
if kind == "apt":
|
||||
return ("Installed via apt — update with:\n"
|
||||
f" sudo apt update && sudo apt install --only-upgrade {APT_PACKAGE}")
|
||||
if kind == "dev":
|
||||
return "Running from a source checkout — update with `git pull`."
|
||||
return ""
|
||||
|
||||
|
||||
def _parse(version: str) -> tuple[int, ...]:
|
||||
return tuple(int(p) for p in version.lstrip("vV").split(".") if p.isdigit())
|
||||
|
||||
@@ -100,11 +147,16 @@ def list_releases(limit: int = 15, timeout: float = 6.0) -> tuple[list[tuple[str
|
||||
|
||||
|
||||
def apply_update(tag: str) -> tuple[int, str]:
|
||||
"""Self-update the current (user-local) install to `tag` via authenticated pip.
|
||||
"""Update to `tag` using the method matching how RigDoctor was installed.
|
||||
|
||||
Installs `rigdoctor[gui] @ git+https://oauth2:<token>@…/rigdoctor.git@<tag>` into
|
||||
the running environment. Returns (exit_code, output) with the token scrubbed.
|
||||
Only pip/venv installs are upgraded in place (authenticated pip install of
|
||||
`rigdoctor[gui] @ git+https://oauth2:<token>@…/rigdoctor.git@<tag>`). apt and source
|
||||
installs can't be (root/dpkg- or VCS-managed), so they return guidance instead of
|
||||
attempting pip. Returns (exit_code, output) with the token scrubbed.
|
||||
"""
|
||||
kind = install_kind()
|
||||
if kind != "pip":
|
||||
return (1, update_hint(kind))
|
||||
token = load_token()
|
||||
if not token:
|
||||
return (1, "No update token configured. Run `rigdoctor login`.")
|
||||
|
||||
@@ -0,0 +1,107 @@
|
||||
"""Zero-config game-launch watcher (D12 fallback): poll Steam's RunningAppID and
|
||||
auto-bracket a focused capture around the running game.
|
||||
|
||||
For users who won't add the `rigdoctor wrap %command%` launch option. Less precise than the
|
||||
wrapper (it depends on Steam writing RunningAppID to registry.vdf, and only covers Steam), so
|
||||
the wrapper stays the primary mechanism. Stdlib only; safe to run as a `systemd --user` service
|
||||
(the game-launch trigger mode).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import signal
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
from . import reccontrol, steam
|
||||
from .steam import _parse_vdf
|
||||
|
||||
_REGISTRY_CANDIDATES = ("~/.steam/registry.vdf", "~/.steam/steam/registry.vdf")
|
||||
|
||||
|
||||
def _registry_path() -> Path | None:
|
||||
for cand in _REGISTRY_CANDIDATES:
|
||||
p = Path(os.path.expanduser(cand))
|
||||
if p.exists():
|
||||
return p
|
||||
return None
|
||||
|
||||
|
||||
def _find_key(data: dict, key: str):
|
||||
"""Recursively find a (case-insensitive) scalar key in nested VDF dicts."""
|
||||
target = key.lower()
|
||||
for k, v in data.items():
|
||||
if isinstance(v, dict):
|
||||
found = _find_key(v, key)
|
||||
if found is not None:
|
||||
return found
|
||||
elif k.lower() == target:
|
||||
return v
|
||||
return None
|
||||
|
||||
|
||||
def running_appid() -> int:
|
||||
"""The Steam appid currently running (0 if none / unknown)."""
|
||||
path = _registry_path()
|
||||
if path is None:
|
||||
return 0
|
||||
try:
|
||||
data = _parse_vdf(path.read_text(encoding="utf-8", errors="replace"))
|
||||
except OSError:
|
||||
return 0
|
||||
raw = _find_key(data, "RunningAppID")
|
||||
try:
|
||||
return int(raw)
|
||||
except (TypeError, ValueError):
|
||||
return 0
|
||||
|
||||
|
||||
def transition(prev: int, current: int) -> str | None:
|
||||
"""'start' when a game begins, 'stop' when it ends, else None."""
|
||||
if current and not prev:
|
||||
return "start"
|
||||
if prev and not current:
|
||||
return "stop"
|
||||
return None
|
||||
|
||||
|
||||
def _name_for(appid: int) -> str:
|
||||
target = str(appid)
|
||||
for g in steam.cached_games() or steam.scan_games(steam.selected_library_paths()):
|
||||
if g.appid == target:
|
||||
return g.name
|
||||
return f"Steam app {appid}"
|
||||
|
||||
|
||||
def watch(interval: float = 5.0) -> int:
|
||||
"""Poll for a running Steam game and bracket a capture around it. Blocks until signalled."""
|
||||
from . import diagnostic
|
||||
|
||||
stop = {"flag": False}
|
||||
|
||||
def _on_signal(_sig, _frame):
|
||||
stop["flag"] = True
|
||||
|
||||
signal.signal(signal.SIGTERM, _on_signal)
|
||||
signal.signal(signal.SIGINT, _on_signal)
|
||||
|
||||
prev = 0
|
||||
started = False
|
||||
while not stop["flag"]:
|
||||
current = running_appid()
|
||||
action = transition(prev, current)
|
||||
if action == "start" and not reccontrol.running_pid():
|
||||
started = diagnostic.start(game=_name_for(current)) is not None
|
||||
elif action == "stop" and started:
|
||||
reccontrol.stop_background()
|
||||
started = False
|
||||
prev = current
|
||||
# Sleep in small slices so a stop signal is handled promptly.
|
||||
slept = 0.0
|
||||
while slept < interval and not stop["flag"]:
|
||||
time.sleep(min(0.25, interval - slept))
|
||||
slept += 0.25
|
||||
if started:
|
||||
reccontrol.stop_background()
|
||||
return 0
|
||||
@@ -0,0 +1,82 @@
|
||||
"""Steam-launch wrapper (D12): auto-bracket a focused diagnostic around a game.
|
||||
|
||||
Set as a per-game Steam launch option — `rigdoctor wrap %command%` — or in Lutris/Heroic's
|
||||
wrapper field. Steam expands `%command%` to the real game command; we start a focused capture
|
||||
(tagged with the game), run the game, and stop the capture cleanly when it exits. A hard
|
||||
freeze means the game (and this wrapper) never returns, so the capture is left without a clean
|
||||
stop — which RigDoctor then flags as a crash on next launch.
|
||||
|
||||
Deterministic and daemonless (D12 "build first"): no polling, and it knows the title.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import signal
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def game_name_from_env() -> str | None:
|
||||
"""The launching game's name, resolved from Steam's SteamAppId env var via the scan."""
|
||||
appid = os.environ.get("SteamAppId") or os.environ.get("SteamGameId")
|
||||
if not appid:
|
||||
return None
|
||||
from . import steam
|
||||
|
||||
games = steam.cached_games() or steam.scan_games(steam.selected_library_paths())
|
||||
for game in games:
|
||||
if game.appid == str(appid):
|
||||
return game.name
|
||||
return f"Steam app {appid}"
|
||||
|
||||
|
||||
def launch_option() -> str:
|
||||
"""The exact string to paste into Steam's Launch Options (absolute path → PATH-proof)."""
|
||||
exe = Path(sys.executable).with_name("rigdoctor")
|
||||
prog = str(exe) if exe.exists() else "rigdoctor"
|
||||
quoted = f'"{prog}"' if " " in prog else prog
|
||||
return f"{quoted} wrap %command%"
|
||||
|
||||
|
||||
def run(command: list[str], game: str | None = None) -> int:
|
||||
"""Start a focused capture (unless one's already running), run the game, then stop it.
|
||||
Returns the game's exit code so Steam sees the right status.
|
||||
|
||||
`game` overrides name detection — used by `games play` for a custom game (e.g. SPT), where
|
||||
there's no SteamAppId and the bare script name (tarkov.sh) wouldn't tag the capture usefully.
|
||||
"""
|
||||
from . import diagnostic, reccontrol
|
||||
|
||||
if not command:
|
||||
print("usage: rigdoctor wrap %command% (set as a Steam launch option)", file=sys.stderr)
|
||||
return 2
|
||||
|
||||
game = game or game_name_from_env() or os.path.basename(command[0])
|
||||
started = False
|
||||
if not reccontrol.running_pid(): # don't disturb an existing capture
|
||||
started = diagnostic.start(game=game) is not None
|
||||
|
||||
proc: subprocess.Popen | None = None
|
||||
|
||||
def _forward(signum, _frame): # pass Steam's stop signal to the game
|
||||
if proc is not None and proc.poll() is None:
|
||||
try:
|
||||
proc.send_signal(signum)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
previous = {sig: signal.signal(sig, _forward) for sig in (signal.SIGTERM, signal.SIGINT)}
|
||||
try:
|
||||
proc = subprocess.Popen(command)
|
||||
rc = proc.wait()
|
||||
except (OSError, ValueError, subprocess.SubprocessError) as exc:
|
||||
print(f"rigdoctor wrap: couldn't launch the game: {exc}", file=sys.stderr)
|
||||
rc = 1
|
||||
finally:
|
||||
for sig, handler in previous.items():
|
||||
signal.signal(sig, handler)
|
||||
if started:
|
||||
reccontrol.stop_background() # clean stop → no false crash flag
|
||||
return rc
|
||||
@@ -3,24 +3,50 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from PySide6.QtGui import QIcon
|
||||
from PySide6.QtWidgets import QApplication
|
||||
|
||||
from ..config import load_config
|
||||
from . import desktop
|
||||
from .main_window import MainWindow
|
||||
from .theme import STYLESHEET
|
||||
|
||||
ICON = Path(__file__).parent / "assets" / "rigdoctor.svg"
|
||||
|
||||
|
||||
def main(argv: list[str] | None = None) -> int:
|
||||
from ..core import applog
|
||||
|
||||
applog.setup() # opt-in app logging (M15); no-op unless logging_enabled
|
||||
applog.get_logger(__name__).info("GUI starting")
|
||||
desktop.ensure() # self-register icon + .desktop so updates show it without re-installing
|
||||
app = QApplication(argv if argv is not None else sys.argv)
|
||||
app.setApplicationName("RigDoctor")
|
||||
app.setApplicationDisplayName("RigDoctor")
|
||||
# Match the installed rigdoctor.desktop so the dock/launcher shows our icon (Wayland app-id).
|
||||
app.setDesktopFileName("rigdoctor")
|
||||
if ICON.exists():
|
||||
app.setWindowIcon(QIcon(str(ICON)))
|
||||
app.setStyle("Fusion")
|
||||
app.setStyleSheet(STYLESHEET)
|
||||
|
||||
interval = float(load_config().get("interval", 1.0))
|
||||
cfg = load_config()
|
||||
interval = float(cfg.get("interval", 1.0))
|
||||
window = MainWindow(interval=interval)
|
||||
window.show()
|
||||
# `--tray` starts hidden to the system tray (for autostart); if no tray is available,
|
||||
# fall back to showing the window so the app is never invisible-and-unreachable.
|
||||
args = argv if argv is not None else sys.argv
|
||||
if "--tray" in args and window.tray_available():
|
||||
window.start_minimized_note()
|
||||
else:
|
||||
window.show()
|
||||
# First run (or `--setup`): the graphical setup wizard (M9).
|
||||
if "--setup" in args or not cfg.get("setup_done", False):
|
||||
from .setup_wizard import SetupWizard
|
||||
|
||||
SetupWizard(window).exec()
|
||||
return app.exec()
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,4 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 16 16">
|
||||
<path d="M3.5 8.5 L6.5 11.5 L12.5 4.5" fill="none" stroke="#06222e"
|
||||
stroke-width="2.4" stroke-linecap="round" stroke-linejoin="round"/>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 237 B |
@@ -0,0 +1,8 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="256" height="256" viewBox="0 0 256 256">
|
||||
<rect x="8" y="8" width="240" height="240" rx="52" fill="#15181e"/>
|
||||
<circle cx="128" cy="128" r="84" fill="none" stroke="#2a2f39" stroke-width="14"/>
|
||||
<path d="M128 44 a84 84 0 1 1 -59.4 24.6" fill="none" stroke="#38bdf8"
|
||||
stroke-width="14" stroke-linecap="round"/>
|
||||
<path d="M60 132 H100 L116 96 L140 168 L156 132 H196" fill="none" stroke="#e6e8eb"
|
||||
stroke-width="14" stroke-linecap="round" stroke-linejoin="round"/>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 533 B |
@@ -17,19 +17,19 @@ from PySide6.QtWidgets import (
|
||||
|
||||
from ..core.sample import Sample
|
||||
from ..render import metric_label
|
||||
from .widgets import Card, MetricBar, MetricRow, StatGauge
|
||||
from .widgets import Card, HistoryGraph, MetricBar, MetricRow
|
||||
|
||||
_GROUP_ORDER = ["gpu", "cpu", "memory", "storage"]
|
||||
_GROUP_TITLES = {"gpu": "GPU", "cpu": "CPU", "memory": "Memory", "storage": "Storage"}
|
||||
_BAR_METRICS = {"util", "mem_util", "fan", "used_pct"}
|
||||
|
||||
|
||||
def _gauge_card(gauge: StatGauge) -> QFrame:
|
||||
def _tile_card(widget: QWidget) -> QFrame:
|
||||
card = QFrame()
|
||||
card.setObjectName("Card")
|
||||
layout = QVBoxLayout(card)
|
||||
layout.setContentsMargins(6, 14, 6, 8)
|
||||
layout.addWidget(gauge)
|
||||
layout.setContentsMargins(6, 10, 6, 8)
|
||||
layout.addWidget(widget)
|
||||
return card
|
||||
|
||||
|
||||
@@ -54,16 +54,16 @@ class Dashboard(QWidget):
|
||||
header.addWidget(self._updated)
|
||||
root.addLayout(header)
|
||||
|
||||
# Headline gauges
|
||||
self._g_gpu_temp = StatGauge("GPU Temp", "°C", 100, "temp")
|
||||
self._g_gpu_load = StatGauge("GPU Load", "%", 100, "accent")
|
||||
self._g_cpu_temp = StatGauge("CPU Temp", "°C", 100, "temp")
|
||||
self._g_mem = StatGauge("Memory", "%", 100, "usage")
|
||||
gauges = QHBoxLayout()
|
||||
gauges.setSpacing(14)
|
||||
# Headline trend graphs (history over the session, not just the live value)
|
||||
self._g_gpu_temp = HistoryGraph("GPU Temp", "°C", 30, 100, "temp")
|
||||
self._g_gpu_load = HistoryGraph("GPU Load", "%", 0, 100, "accent")
|
||||
self._g_cpu_temp = HistoryGraph("CPU Temp", "°C", 30, 100, "temp")
|
||||
self._g_mem = HistoryGraph("Memory", "%", 0, 100, "usage")
|
||||
graphs = QHBoxLayout()
|
||||
graphs.setSpacing(14)
|
||||
for g in (self._g_gpu_temp, self._g_gpu_load, self._g_cpu_temp, self._g_mem):
|
||||
gauges.addWidget(_gauge_card(g))
|
||||
root.addLayout(gauges)
|
||||
graphs.addWidget(_tile_card(g))
|
||||
root.addLayout(graphs)
|
||||
|
||||
# Per-subsystem cards (scrollable, 2-column grid)
|
||||
scroll = QScrollArea()
|
||||
@@ -81,10 +81,10 @@ class Dashboard(QWidget):
|
||||
root.addWidget(scroll, 1)
|
||||
|
||||
def update_sample(self, sample: Sample) -> None:
|
||||
self._g_gpu_temp.set_value(self._val(sample, "gpu", "temp", ""))
|
||||
self._g_gpu_load.set_value(self._val(sample, "gpu", "util"))
|
||||
self._g_cpu_temp.set_value(self._cpu_temp(sample))
|
||||
self._g_mem.set_value(self._val(sample, "memory", "used_pct"))
|
||||
self._g_gpu_temp.add_value(self._val(sample, "gpu", "temp", ""))
|
||||
self._g_gpu_load.add_value(self._val(sample, "gpu", "util"))
|
||||
self._g_cpu_temp.add_value(self._cpu_temp(sample))
|
||||
self._g_mem.add_value(self._val(sample, "memory", "used_pct"))
|
||||
|
||||
keys = [r.key for r in sample.readings]
|
||||
if keys != self._built_keys: # sources appeared/disappeared
|
||||
|
||||
@@ -0,0 +1,51 @@
|
||||
"""Best-effort desktop integration: install our icon + .desktop so the dock shows it.
|
||||
|
||||
Runs at GUI launch (idempotent), so a self-update + relaunch refreshes the icon without
|
||||
re-running install.sh. No-op for non-installed (dev) runs where the launcher is absent.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import shutil
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from .. import config
|
||||
|
||||
_ICON_SRC = Path(__file__).parent / "assets" / "rigdoctor.svg"
|
||||
|
||||
_DESKTOP = """[Desktop Entry]
|
||||
Type=Application
|
||||
Name=RigDoctor
|
||||
Comment=Hardware monitoring & crash diagnostics for Linux gamers
|
||||
Exec={exec}
|
||||
Icon=rigdoctor
|
||||
Terminal=false
|
||||
Categories=System;Monitor;Utility;
|
||||
StartupWMClass=rigdoctor
|
||||
"""
|
||||
|
||||
|
||||
def ensure() -> None:
|
||||
share = config.DATA_DIR.parent # ~/.local/share
|
||||
|
||||
try:
|
||||
if _ICON_SRC.exists():
|
||||
icon_dst = share / "icons" / "hicolor" / "scalable" / "apps" / "rigdoctor.svg"
|
||||
icon_dst.parent.mkdir(parents=True, exist_ok=True)
|
||||
if not icon_dst.exists() or icon_dst.read_bytes() != _ICON_SRC.read_bytes():
|
||||
shutil.copyfile(_ICON_SRC, icon_dst)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
gui_exec = Path(sys.executable).with_name("rigdoctor-gui")
|
||||
if not gui_exec.exists(): # dev / not a normal install — don't fabricate a .desktop
|
||||
return
|
||||
try:
|
||||
desktop = share / "applications" / "rigdoctor.desktop"
|
||||
content = _DESKTOP.format(exec=gui_exec)
|
||||
desktop.parent.mkdir(parents=True, exist_ok=True)
|
||||
if not desktop.exists() or desktop.read_text() != content:
|
||||
desktop.write_text(content)
|
||||
except OSError:
|
||||
pass
|
||||
@@ -0,0 +1,229 @@
|
||||
"""Results view for a guided diagnostic session (M6/D12): capture summary + findings."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import threading
|
||||
|
||||
from PySide6.QtCore import Qt, Signal
|
||||
from PySide6.QtGui import QFont, QTextCursor
|
||||
from PySide6.QtWidgets import (
|
||||
QDialog,
|
||||
QFrame,
|
||||
QHBoxLayout,
|
||||
QLabel,
|
||||
QMessageBox,
|
||||
QPushButton,
|
||||
QScrollArea,
|
||||
QTextEdit,
|
||||
QVBoxLayout,
|
||||
QWidget,
|
||||
)
|
||||
|
||||
from ..render import render_summary
|
||||
from .widgets import finding_card
|
||||
|
||||
|
||||
class DiagnosticDialog(QDialog):
|
||||
_chunk = Signal(str) # streamed token delta (worker thread -> GUI)
|
||||
_explained = Signal(object) # (ok, full_text) when the AI stream finishes
|
||||
|
||||
def __init__(self, result, parent=None) -> None:
|
||||
super().__init__(parent)
|
||||
self._result = result
|
||||
self._stream_view = None
|
||||
self._stream_status = None
|
||||
self._chunk.connect(self._on_chunk)
|
||||
self._explained.connect(self._on_explained)
|
||||
self.setWindowTitle(f"Diagnostic — {result.game}" if result.game else "Diagnostic")
|
||||
self.resize(660, 680)
|
||||
|
||||
root = QVBoxLayout(self)
|
||||
root.setContentsMargins(20, 18, 20, 16)
|
||||
root.setSpacing(14)
|
||||
|
||||
title = QLabel(f"Diagnostic — {result.game}" if result.game else "Diagnostic")
|
||||
title.setObjectName("PageTitle")
|
||||
root.addWidget(title)
|
||||
|
||||
scroll = QScrollArea()
|
||||
scroll.setWidgetResizable(True)
|
||||
scroll.setFrameShape(QFrame.Shape.NoFrame)
|
||||
scroll.setStyleSheet("background: transparent;")
|
||||
body = QWidget()
|
||||
col = QVBoxLayout(body)
|
||||
col.setContentsMargins(0, 0, 0, 0)
|
||||
col.setSpacing(10)
|
||||
col.setAlignment(Qt.AlignmentFlag.AlignTop)
|
||||
|
||||
# Capture window summary (peaks / events / last samples) — monospace for the columns.
|
||||
cap_head = QLabel("Capture")
|
||||
cap_head.setStyleSheet("font-weight: 700; background: transparent;")
|
||||
col.addWidget(cap_head)
|
||||
summary = QLabel(render_summary(result.summary))
|
||||
summary.setObjectName("Report")
|
||||
summary.setFont(QFont("monospace"))
|
||||
summary.setTextInteractionFlags(Qt.TextInteractionFlag.TextSelectableByMouse)
|
||||
summary.setWordWrap(False)
|
||||
summary.setStyleSheet(
|
||||
"background: #0d0f13; color: #cfd3da; border: 1px solid #2a2f39; "
|
||||
"border-radius: 8px; padding: 10px;"
|
||||
)
|
||||
col.addWidget(summary)
|
||||
|
||||
find_head = QLabel(f"Findings ({len(result.findings)})")
|
||||
find_head.setStyleSheet("font-weight: 700; background: transparent;")
|
||||
col.addWidget(find_head)
|
||||
if result.findings:
|
||||
for finding in result.findings:
|
||||
col.addWidget(finding_card(finding))
|
||||
else:
|
||||
none = QLabel("No findings.")
|
||||
none.setObjectName("Muted")
|
||||
col.addWidget(none)
|
||||
|
||||
scroll.setWidget(body)
|
||||
root.addWidget(scroll, 1)
|
||||
|
||||
buttons = QHBoxLayout()
|
||||
self._explain_btn = QPushButton("Explain with AI")
|
||||
self._explain_btn.clicked.connect(self._explain_with_ai)
|
||||
from ..core import ai
|
||||
self._explain_btn.setVisible(ai.is_configured()) # opt-in only; hidden if not set up
|
||||
buttons.addWidget(self._explain_btn)
|
||||
self._report_btn = QPushButton("Report") # zip this diagnostic's logs (M15)
|
||||
self._report_btn.clicked.connect(self._make_report)
|
||||
self._report_btn.setVisible(bool(result.dir)) # only when logging stored the session
|
||||
buttons.addWidget(self._report_btn)
|
||||
buttons.addStretch(1)
|
||||
close = QPushButton("Close")
|
||||
close.setObjectName("PrimaryButton")
|
||||
close.clicked.connect(self.accept)
|
||||
buttons.addWidget(close)
|
||||
root.addLayout(buttons)
|
||||
|
||||
# --- AI explanation (M14, D24) — streamed; runs only on this button press ----------
|
||||
def _explain_with_ai(self) -> None:
|
||||
from ..core import ai
|
||||
|
||||
if not ai.is_local(): # cloud provider → explicit consent before sending data
|
||||
confirm = QMessageBox.question(
|
||||
self, "Send to AI provider",
|
||||
f"This sends your diagnostic findings to {ai.provider_label()}.\n\nContinue?",
|
||||
QMessageBox.StandardButton.Yes | QMessageBox.StandardButton.No,
|
||||
QMessageBox.StandardButton.No,
|
||||
)
|
||||
if confirm != QMessageBox.StandardButton.Yes:
|
||||
return
|
||||
self._explain_btn.setEnabled(False)
|
||||
dialog = self._open_stream_dialog()
|
||||
threading.Thread(target=self._work_explain, daemon=True).start()
|
||||
dialog.exec() # streaming fills the view live via signals during this nested loop
|
||||
self._stream_view = self._stream_status = None
|
||||
self._explain_btn.setEnabled(True)
|
||||
|
||||
def _work_explain(self) -> None:
|
||||
from ..core import ai, gamelogs, syslogs
|
||||
|
||||
result = self._result
|
||||
summary = result.summary
|
||||
events = {kind for _ts, kind, _detail in summary.events}
|
||||
clean = "session-stop" in events
|
||||
gpu_lost = "gpu-lost" in events
|
||||
|
||||
lines = [f"Game: {result.game or 'unknown'}"]
|
||||
if summary.start and summary.end:
|
||||
lines.append(f"Capture duration: ~{int(summary.end - summary.start)}s")
|
||||
outcome = "ended cleanly (no crash detected)" if clean else \
|
||||
"ended without a clean stop (possible crash/freeze)"
|
||||
if gpu_lost:
|
||||
outcome += "; a GPU-lost event was recorded"
|
||||
lines.append(f"Outcome: {outcome}")
|
||||
lines.append("")
|
||||
lines.append(ai.format_findings(result.findings, header="Findings:"))
|
||||
lines.append("\nCapture summary:\n" + render_summary(summary))
|
||||
|
||||
since = (summary.start - 60) if summary.start else None
|
||||
logs = gamelogs.collect(since=since, game=result.game) # scoped to this session
|
||||
if logs:
|
||||
lines.append("\nGame/Proton/Steam logs for this session:\n" + logs)
|
||||
sys_logs = syslogs.collect(since=since) # kernel log + crashed-process records
|
||||
if sys_logs:
|
||||
lines.append("\nSystem logs for this session (kernel + crashed processes):\n" + sys_logs)
|
||||
text = "\n".join(lines)
|
||||
|
||||
ok, reply = ai.explain_stream(text, on_chunk=lambda d: self._chunk.emit(d))
|
||||
if result.dir: # record exactly what was sent, the model, and the reply (M15)
|
||||
from ..core import diagstore
|
||||
diagstore.record_ai(
|
||||
result.dir, provider=ai.provider(), model=ai.model(),
|
||||
system=ai.SYSTEM_PROMPT, prompt=ai.build_prompt(text),
|
||||
response=reply if ok else f"[error] {reply}")
|
||||
self._explained.emit((ok, reply))
|
||||
|
||||
def _on_chunk(self, delta: str) -> None:
|
||||
if self._stream_view is None:
|
||||
return
|
||||
self._stream_view.moveCursor(QTextCursor.MoveOperation.End)
|
||||
self._stream_view.insertPlainText(delta) # live plain text as tokens arrive
|
||||
self._stream_view.ensureCursorVisible()
|
||||
|
||||
def _on_explained(self, result) -> None:
|
||||
ok, text = result
|
||||
if self._stream_view is not None:
|
||||
if ok:
|
||||
self._stream_view.setMarkdown(text) # re-render the finished answer as Markdown
|
||||
else:
|
||||
self._stream_view.setPlainText(f"AI explanation failed:\n\n{text}")
|
||||
if self._stream_status is not None:
|
||||
self._stream_status.setText(
|
||||
"AI-generated suggestions — verify before acting, especially anything that changes "
|
||||
"settings or data." if ok else "The request failed.")
|
||||
|
||||
# --- Report bundle (M15) ------------------------------------------------------
|
||||
def _make_report(self) -> None:
|
||||
from PySide6.QtCore import QUrl
|
||||
from PySide6.QtGui import QDesktopServices
|
||||
|
||||
from ..core import diagstore
|
||||
|
||||
self._report_btn.setEnabled(False)
|
||||
try:
|
||||
out = diagstore.make_report(self._result.dir)
|
||||
except OSError as exc:
|
||||
self._report_btn.setEnabled(True)
|
||||
QMessageBox.warning(self, "Report failed", str(exc))
|
||||
return
|
||||
self._report_btn.setEnabled(True)
|
||||
box = QMessageBox(self)
|
||||
box.setWindowTitle("Report created")
|
||||
box.setText(f"Saved report:\n{out}\n\nIt contains this diagnostic's logs and any AI "
|
||||
"interaction (data sent, model, and reply).")
|
||||
open_btn = box.addButton("Open folder", QMessageBox.ButtonRole.ActionRole)
|
||||
box.addButton("OK", QMessageBox.ButtonRole.AcceptRole)
|
||||
box.exec()
|
||||
if box.clickedButton() is open_btn:
|
||||
QDesktopServices.openUrl(QUrl.fromLocalFile(str(out.parent)))
|
||||
|
||||
def _open_stream_dialog(self) -> QDialog:
|
||||
"""A live dialog the AI streams into; finalized to rendered Markdown when done."""
|
||||
from ..core import ai
|
||||
|
||||
dlg = QDialog(self)
|
||||
dlg.setWindowTitle(f"AI explanation — {ai.provider_label()}")
|
||||
dlg.resize(620, 520)
|
||||
lay = QVBoxLayout(dlg)
|
||||
view = QTextEdit()
|
||||
view.setObjectName("Report")
|
||||
view.setReadOnly(True)
|
||||
lay.addWidget(view)
|
||||
status = QLabel("Streaming from the model…")
|
||||
status.setObjectName("Muted")
|
||||
status.setWordWrap(True)
|
||||
lay.addWidget(status)
|
||||
close = QPushButton("Close")
|
||||
close.setObjectName("PrimaryButton")
|
||||
close.clicked.connect(dlg.accept)
|
||||
lay.addWidget(close, alignment=Qt.AlignmentFlag.AlignRight)
|
||||
self._stream_view = view
|
||||
self._stream_status = status
|
||||
return dlg
|
||||
@@ -0,0 +1,156 @@
|
||||
"""Environment page (M6 in the GUI): runs the gaming-environment checks as findings cards."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import threading
|
||||
import time
|
||||
|
||||
from PySide6.QtCore import Qt, QTimer, Signal
|
||||
from PySide6.QtWidgets import (
|
||||
QFrame,
|
||||
QHBoxLayout,
|
||||
QLabel,
|
||||
QPushButton,
|
||||
QScrollArea,
|
||||
QVBoxLayout,
|
||||
QWidget,
|
||||
)
|
||||
|
||||
from .widgets import finding_card
|
||||
|
||||
|
||||
def _fail_reason(out: str) -> str:
|
||||
"""Turn the failed command's output into a short, human reason."""
|
||||
low = (out or "").lower()
|
||||
if "not authorized" in low or "dismissed" in low or "authentication" in low:
|
||||
return "cancelled at the password prompt"
|
||||
if "operation not permitted" in low or "invalid argument" in low or "permission denied" in low:
|
||||
return "the system rejected the change (it may be locked by BIOS/kernel)"
|
||||
last = next((ln.strip() for ln in reversed((out or "").splitlines()) if ln.strip()), "")
|
||||
return (last[:80] or "no privileges, or cancelled")
|
||||
|
||||
|
||||
class EnvironmentPage(QWidget):
|
||||
_result = Signal(object) # list[Finding]
|
||||
_action_done = Signal(object) # (label, rc, output) — install or apply finished
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self.setObjectName("Page")
|
||||
self._result.connect(self._render_findings)
|
||||
self._action_done.connect(self._on_action_done)
|
||||
self._busy = False
|
||||
|
||||
root = QVBoxLayout(self)
|
||||
root.setContentsMargins(20, 18, 20, 18)
|
||||
root.setSpacing(16)
|
||||
|
||||
header = QHBoxLayout()
|
||||
title = QLabel("Tuning")
|
||||
title.setObjectName("PageTitle")
|
||||
header.addWidget(title)
|
||||
header.addStretch(1)
|
||||
self._status = QLabel("")
|
||||
self._status.setObjectName("Muted")
|
||||
header.addWidget(self._status)
|
||||
self._run_btn = QPushButton("Run checks")
|
||||
self._run_btn.setObjectName("PrimaryButton")
|
||||
self._run_btn.clicked.connect(self._run)
|
||||
header.addWidget(self._run_btn)
|
||||
root.addLayout(header)
|
||||
|
||||
intro = QLabel(
|
||||
"System settings that affect gaming stability and performance, with the suggested "
|
||||
"fix command. RigDoctor only reports — it never changes anything."
|
||||
)
|
||||
intro.setObjectName("Muted")
|
||||
intro.setWordWrap(True)
|
||||
root.addWidget(intro)
|
||||
|
||||
scroll = QScrollArea()
|
||||
scroll.setWidgetResizable(True)
|
||||
scroll.setFrameShape(QFrame.Shape.NoFrame)
|
||||
scroll.setStyleSheet("background: transparent;")
|
||||
self._container = QWidget()
|
||||
self._list = QVBoxLayout(self._container)
|
||||
self._list.setContentsMargins(0, 0, 0, 0)
|
||||
self._list.setSpacing(10)
|
||||
self._list.setAlignment(Qt.AlignmentFlag.AlignTop)
|
||||
scroll.setWidget(self._container)
|
||||
root.addWidget(scroll, 1)
|
||||
|
||||
QTimer.singleShot(350, self._run) # auto-run shortly after the window opens
|
||||
|
||||
def _run(self) -> None:
|
||||
self._run_btn.setEnabled(False)
|
||||
self._status.setText("Checking environment…")
|
||||
threading.Thread(target=self._work, daemon=True).start()
|
||||
|
||||
def _work(self) -> None:
|
||||
from ..core.gameenv import run_gameenv_checks
|
||||
|
||||
try:
|
||||
findings = run_gameenv_checks()
|
||||
except Exception:
|
||||
findings = None
|
||||
self._result.emit(findings)
|
||||
|
||||
def _render_findings(self, findings) -> None:
|
||||
self._run_btn.setEnabled(True)
|
||||
if findings is None: # check failed — keep current results
|
||||
self._status.setText("check failed")
|
||||
return
|
||||
|
||||
while self._list.count():
|
||||
item = self._list.takeAt(0)
|
||||
w = item.widget()
|
||||
if w is not None:
|
||||
w.deleteLater()
|
||||
|
||||
crit = sum(1 for f in findings if f.severity == "critical")
|
||||
warn = sum(1 for f in findings if f.severity == "warning")
|
||||
self._status.setText(
|
||||
f"{crit} critical · {warn} warning · {len(findings)} checks · "
|
||||
f"{time.strftime('%H:%M:%S')}"
|
||||
)
|
||||
for finding in findings:
|
||||
self._list.addWidget(finding_card(finding, on_install=self._install, on_apply=self._apply))
|
||||
self._list.addStretch(1)
|
||||
|
||||
def _install(self, component) -> None:
|
||||
if self._busy:
|
||||
return
|
||||
self._busy = True
|
||||
self._run_btn.setEnabled(False)
|
||||
self._status.setText(f"Installing {component.name}… (may prompt for your password)")
|
||||
threading.Thread(target=self._work_install, args=(component,), daemon=True).start()
|
||||
|
||||
def _work_install(self, component) -> None:
|
||||
from ..core import installer
|
||||
|
||||
rc, out = installer.install_packages(list(component.apt))
|
||||
self._action_done.emit((component.name, rc, out))
|
||||
|
||||
def _apply(self, fix_id: str, value: str) -> None:
|
||||
if self._busy:
|
||||
return
|
||||
self._busy = True
|
||||
self._run_btn.setEnabled(False)
|
||||
self._status.setText(f"Applying {value}… (may prompt for your password)")
|
||||
threading.Thread(target=self._work_apply, args=(fix_id, value), daemon=True).start()
|
||||
|
||||
def _work_apply(self, fix_id: str, value: str) -> None:
|
||||
from ..core import fixes
|
||||
|
||||
rc, out = fixes.apply(fix_id, value)
|
||||
self._action_done.emit((value, rc, out))
|
||||
|
||||
def _on_action_done(self, result) -> None:
|
||||
label, rc, out = result
|
||||
self._busy = False
|
||||
if rc == 0:
|
||||
self._status.setText(f"{label} applied — re-checking…")
|
||||
self._run() # re-run so the finding reflects the new state
|
||||
else:
|
||||
self._run_btn.setEnabled(True)
|
||||
self._status.setText(f"'{label}' failed — {_fail_reason(out)}")
|
||||
@@ -0,0 +1,654 @@
|
||||
"""Games page (M6 in the GUI): pick Steam libraries and browse detected games.
|
||||
|
||||
Libraries are opt-in — the user checks which ones to scan. The list is loaded from the
|
||||
cache instantly, then a background rescan refreshes it and flags games installed since the
|
||||
last scan (a "NEW" badge here + a count on the sidebar nav).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import threading
|
||||
import time
|
||||
|
||||
from PySide6.QtCore import Qt, QTimer, Signal
|
||||
from PySide6.QtWidgets import (
|
||||
QApplication,
|
||||
QCheckBox,
|
||||
QDialog,
|
||||
QFileDialog,
|
||||
QFrame,
|
||||
QHBoxLayout,
|
||||
QLabel,
|
||||
QLineEdit,
|
||||
QMessageBox,
|
||||
QPushButton,
|
||||
QScrollArea,
|
||||
QVBoxLayout,
|
||||
QWidget,
|
||||
)
|
||||
|
||||
from ..config import load_config, update_config
|
||||
from .diagnostic_dialog import DiagnosticDialog
|
||||
from .minidump_dialog import MinidumpDialog
|
||||
from .theme import ACCENT, GOOD, MUTED, WARN
|
||||
|
||||
|
||||
def _game_row(name: str, sublabel: str, size: str, is_new: bool, appid: str = "", on_diagnose=None) -> QFrame:
|
||||
card = QFrame()
|
||||
card.setObjectName("Card")
|
||||
h = QHBoxLayout(card)
|
||||
h.setContentsMargins(16, 10, 16, 10)
|
||||
h.setSpacing(10)
|
||||
|
||||
left = QVBoxLayout()
|
||||
left.setSpacing(2)
|
||||
title = QLabel(name)
|
||||
title.setStyleSheet("font-weight: 600; background: transparent;")
|
||||
title.setWordWrap(True)
|
||||
left.addWidget(title)
|
||||
if sublabel:
|
||||
sub = QLabel(sublabel)
|
||||
sub.setObjectName("Muted")
|
||||
left.addWidget(sub)
|
||||
h.addLayout(left, 1)
|
||||
|
||||
if is_new:
|
||||
badge = QLabel("NEW")
|
||||
badge.setStyleSheet(
|
||||
f"color: {GOOD}; border: 1px solid {GOOD}; border-radius: 6px; "
|
||||
f"padding: 1px 6px; font-weight: 700; background: transparent;"
|
||||
)
|
||||
h.addWidget(badge, 0, Qt.AlignmentFlag.AlignVCenter)
|
||||
|
||||
size_label = QLabel(size)
|
||||
size_label.setObjectName("Muted")
|
||||
size_label.setMinimumWidth(80)
|
||||
size_label.setAlignment(Qt.AlignmentFlag.AlignRight | Qt.AlignmentFlag.AlignVCenter)
|
||||
h.addWidget(size_label, 0)
|
||||
|
||||
if on_diagnose is not None:
|
||||
diag_btn = QPushButton("Run Diagnostic")
|
||||
diag_btn.setObjectName("ActionButton")
|
||||
diag_btn.setCursor(Qt.CursorShape.PointingHandCursor)
|
||||
diag_btn.clicked.connect(lambda: on_diagnose(name, appid))
|
||||
h.addWidget(diag_btn, 0)
|
||||
return card
|
||||
|
||||
|
||||
class GamesPage(QWidget):
|
||||
_libraries_ready = Signal(object) # list[dict(path, label, count, selected)]
|
||||
_scanned = Signal(object) # steam.ScanResult
|
||||
new_count_changed = Signal(int) # newly-installed game count (for the nav badge)
|
||||
_diag_done = Signal(object) # DiagnosticResult — focused capture analyzed
|
||||
_dump_parsed = Signal(object) # minidump.MinidumpReport — imported .dmp (or None)
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self.setObjectName("Page")
|
||||
self._libraries_ready.connect(self._render_libraries)
|
||||
self._scanned.connect(self._render_games)
|
||||
self._diag_done.connect(self._on_diag_done)
|
||||
self._dump_parsed.connect(self._on_dump_parsed)
|
||||
self._busy = False
|
||||
self._new_appids: set[str] = set()
|
||||
self._extra_games: list = [] # non-Steam (Lutris/Heroic), appended after a scan
|
||||
self._diag_game: str | None = None
|
||||
|
||||
root = QVBoxLayout(self)
|
||||
root.setContentsMargins(20, 18, 20, 18)
|
||||
root.setSpacing(16)
|
||||
|
||||
header = QHBoxLayout()
|
||||
title = QLabel("Games")
|
||||
title.setObjectName("PageTitle")
|
||||
header.addWidget(title)
|
||||
header.addStretch(1)
|
||||
self._status = QLabel("")
|
||||
self._status.setObjectName("Muted")
|
||||
header.addWidget(self._status)
|
||||
# Import a Windows crash dump (.dmp) from a Proton game and analyze it with AI.
|
||||
# Shown only when an AI provider is configured (AI analysis is the point).
|
||||
self._import_btn = QPushButton("Import crash dump…")
|
||||
self._import_btn.clicked.connect(self._import_dump)
|
||||
header.addWidget(self._import_btn)
|
||||
self._autocap_btn = QPushButton("Auto-capture…")
|
||||
self._autocap_btn.clicked.connect(self._show_autocapture)
|
||||
header.addWidget(self._autocap_btn)
|
||||
# Add a game no launcher reports (e.g. SPT / standalone mod launchers).
|
||||
self._add_btn = QPushButton("Add game…")
|
||||
self._add_btn.clicked.connect(self._add_custom_game)
|
||||
header.addWidget(self._add_btn)
|
||||
self._rescan_btn = QPushButton("Rescan")
|
||||
self._rescan_btn.setObjectName("PrimaryButton")
|
||||
self._rescan_btn.clicked.connect(self.refresh)
|
||||
header.addWidget(self._rescan_btn)
|
||||
root.addLayout(header)
|
||||
|
||||
# In-progress diagnostic banner (hidden until a focused capture is running).
|
||||
self._banner = QFrame()
|
||||
self._banner.setObjectName("Card")
|
||||
self._banner.setStyleSheet(f"#Card {{ border: 1px solid {ACCENT}; }}")
|
||||
banner_h = QHBoxLayout(self._banner)
|
||||
banner_h.setContentsMargins(16, 10, 16, 10)
|
||||
banner_h.setSpacing(10)
|
||||
self._banner_label = QLabel("")
|
||||
self._banner_label.setWordWrap(True)
|
||||
self._banner_label.setStyleSheet(f"color: {ACCENT}; font-weight: 700; background: transparent;")
|
||||
banner_h.addWidget(self._banner_label, 1)
|
||||
self._finish_btn = QPushButton("Finish && analyze") # && → literal & (not a mnemonic)
|
||||
self._finish_btn.setObjectName("ActionButton")
|
||||
self._finish_btn.clicked.connect(self._finish_diagnostic)
|
||||
banner_h.addWidget(self._finish_btn)
|
||||
self._discard_btn = QPushButton("Discard")
|
||||
self._discard_btn.clicked.connect(self._discard_diagnostic)
|
||||
banner_h.addWidget(self._discard_btn)
|
||||
self._banner.hide()
|
||||
root.addWidget(self._banner)
|
||||
|
||||
# Hard-crash banner: a previous diagnostic ended without a clean stop.
|
||||
self._crash_banner = QFrame()
|
||||
self._crash_banner.setObjectName("Card")
|
||||
self._crash_banner.setStyleSheet(f"#Card {{ border: 1px solid {WARN}; }}")
|
||||
crash_h = QHBoxLayout(self._crash_banner)
|
||||
crash_h.setContentsMargins(16, 10, 16, 10)
|
||||
crash_h.setSpacing(10)
|
||||
self._crash_label = QLabel("")
|
||||
self._crash_label.setWordWrap(True)
|
||||
self._crash_label.setStyleSheet(f"color: {WARN}; font-weight: 700; background: transparent;")
|
||||
crash_h.addWidget(self._crash_label, 1)
|
||||
self._analyze_btn = QPushButton("Analyze crash")
|
||||
self._analyze_btn.setObjectName("ActionButton")
|
||||
self._analyze_btn.clicked.connect(self._analyze_crash)
|
||||
crash_h.addWidget(self._analyze_btn)
|
||||
self._dismiss_btn = QPushButton("Dismiss")
|
||||
self._dismiss_btn.clicked.connect(self._dismiss_crash)
|
||||
crash_h.addWidget(self._dismiss_btn)
|
||||
self._crash_banner.hide()
|
||||
root.addWidget(self._crash_banner)
|
||||
|
||||
self._diag_timer = QTimer(self)
|
||||
self._diag_timer.setInterval(1000)
|
||||
self._diag_timer.timeout.connect(self._poll_diag)
|
||||
|
||||
# Libraries (opt-in checkboxes)
|
||||
lib_card = QFrame()
|
||||
lib_card.setObjectName("Card")
|
||||
lib_v = QVBoxLayout(lib_card)
|
||||
lib_v.setContentsMargins(16, 12, 16, 12)
|
||||
lib_v.setSpacing(6)
|
||||
lib_head = QLabel("Steam libraries")
|
||||
lib_head.setStyleSheet("font-weight: 700; background: transparent;")
|
||||
lib_v.addWidget(lib_head)
|
||||
self._lib_box = QVBoxLayout()
|
||||
self._lib_box.setSpacing(6)
|
||||
lib_v.addLayout(self._lib_box)
|
||||
self._lib_hint = QLabel("Looking for Steam libraries…")
|
||||
self._lib_hint.setObjectName("Muted")
|
||||
self._lib_hint.setWordWrap(True)
|
||||
lib_v.addWidget(self._lib_hint)
|
||||
root.addWidget(lib_card)
|
||||
|
||||
# Games list
|
||||
scroll = QScrollArea()
|
||||
scroll.setWidgetResizable(True)
|
||||
scroll.setFrameShape(QFrame.Shape.NoFrame)
|
||||
scroll.setStyleSheet("background: transparent;")
|
||||
self._container = QWidget()
|
||||
self._list = QVBoxLayout(self._container)
|
||||
self._list.setContentsMargins(0, 0, 0, 0)
|
||||
self._list.setSpacing(8)
|
||||
self._list.setAlignment(Qt.AlignmentFlag.AlignTop)
|
||||
scroll.setWidget(self._container)
|
||||
root.addWidget(scroll, 1)
|
||||
|
||||
self._load_cached() # instant display from the last scan
|
||||
QTimer.singleShot(400, self.refresh) # then rescan in the background on launch
|
||||
self._check_crash() # surface an interrupted (crashed) diagnostic
|
||||
self._refresh_import_btn() # show Import only if AI is configured
|
||||
|
||||
# --- loading ----------------------------------------------------------------------
|
||||
|
||||
def _load_cached(self) -> None:
|
||||
from ..core import steam
|
||||
|
||||
cache = steam.load_cache() or {}
|
||||
self._new_appids = set(cache.get("new_appids", []))
|
||||
games = steam.cached_games()
|
||||
if games:
|
||||
self._populate_games(games, self._new_appids)
|
||||
self.new_count_changed.emit(len(self._new_appids))
|
||||
|
||||
def refresh(self) -> None:
|
||||
if self._busy:
|
||||
return
|
||||
self._busy = True
|
||||
self._rescan_btn.setEnabled(False)
|
||||
self._status.setText("Scanning Steam libraries…")
|
||||
threading.Thread(target=self._work, daemon=True).start()
|
||||
|
||||
def _work(self) -> None:
|
||||
from ..core import launchers, steam
|
||||
|
||||
try:
|
||||
selected = {os.path.realpath(p) for p in steam.selected_library_paths()}
|
||||
libs = [
|
||||
{"path": lib.path, "label": lib.label, "selected": lib.path in selected,
|
||||
"count": len(steam.scan_library(lib.path))}
|
||||
for lib in steam.discover_libraries()
|
||||
]
|
||||
self._libraries_ready.emit(libs)
|
||||
try:
|
||||
from ..core import customgames
|
||||
# non-Steam: Lutris/Heroic + user-added games (SPT etc.)
|
||||
self._extra_games = list(launchers.scan()) + customgames.scan()
|
||||
except Exception:
|
||||
self._extra_games = []
|
||||
self._scanned.emit(steam.rescan())
|
||||
except Exception:
|
||||
self._scanned.emit(None)
|
||||
|
||||
# --- rendering --------------------------------------------------------------------
|
||||
|
||||
def _render_libraries(self, libs) -> None:
|
||||
while self._lib_box.count():
|
||||
item = self._lib_box.takeAt(0)
|
||||
w = item.widget()
|
||||
if w is not None:
|
||||
w.deleteLater()
|
||||
if not libs:
|
||||
self._lib_hint.setText("No Steam libraries detected. Is Steam installed?")
|
||||
self._lib_hint.show()
|
||||
return
|
||||
self._lib_hint.hide()
|
||||
for lib in libs:
|
||||
label = lib["path"]
|
||||
if lib["label"]:
|
||||
label += f" [{lib['label']}]"
|
||||
cb = QCheckBox(f"{label} · {lib['count']} games")
|
||||
cb.setChecked(lib["selected"])
|
||||
cb.toggled.connect(lambda checked, p=lib["path"]: self._toggle_library(p, checked))
|
||||
self._lib_box.addWidget(cb)
|
||||
|
||||
def _toggle_library(self, path: str, checked: bool) -> None:
|
||||
selected = {os.path.realpath(p) for p in (load_config().get("steam_libraries") or [])}
|
||||
if checked:
|
||||
selected.add(os.path.realpath(path))
|
||||
else:
|
||||
selected.discard(os.path.realpath(path))
|
||||
update_config(steam_libraries=sorted(selected))
|
||||
self.refresh()
|
||||
|
||||
def _render_games(self, result) -> None:
|
||||
self._busy = False
|
||||
self._rescan_btn.setEnabled(True)
|
||||
if result is None:
|
||||
self._status.setText("scan failed")
|
||||
return
|
||||
self._new_appids = set(result.new_appids)
|
||||
games = list(result.games) + list(self._extra_games)
|
||||
self._populate_games(games, self._new_appids)
|
||||
new = len(self._new_appids)
|
||||
suffix = f" · {new} new" if new else ""
|
||||
non_steam = f" · {len(self._extra_games)} non-Steam" if self._extra_games else ""
|
||||
self._status.setText(
|
||||
f"{len(games)} games · {time.strftime('%H:%M:%S')}{suffix}{non_steam}"
|
||||
)
|
||||
self.new_count_changed.emit(new)
|
||||
|
||||
def _populate_games(self, games, new_appids: set[str]) -> None:
|
||||
from ..core import steam
|
||||
|
||||
while self._list.count():
|
||||
item = self._list.takeAt(0)
|
||||
w = item.widget()
|
||||
if w is not None:
|
||||
w.deleteLater()
|
||||
|
||||
if not games:
|
||||
empty = QLabel(
|
||||
"No games to show yet — check a Steam library above to scan it for games."
|
||||
)
|
||||
empty.setObjectName("Muted")
|
||||
empty.setWordWrap(True)
|
||||
self._list.addWidget(empty)
|
||||
self._list.addStretch(1)
|
||||
return
|
||||
|
||||
for g in games:
|
||||
launcher = getattr(g, "launcher", "steam")
|
||||
if launcher != "steam":
|
||||
sublabel, appid = launcher.title(), "" # non-Steam: can't steam:// launch it
|
||||
else:
|
||||
sublabel, appid = (os.path.basename(g.library.rstrip("/")) or g.library), g.appid
|
||||
self._list.addWidget(_game_row(
|
||||
g.name,
|
||||
sublabel,
|
||||
steam.human_size(g.size_bytes),
|
||||
g.appid in new_appids,
|
||||
appid=appid,
|
||||
on_diagnose=self._start_diagnostic,
|
||||
))
|
||||
self._list.addStretch(1)
|
||||
|
||||
# --- guided diagnostic (M6/D12) ---------------------------------------------------
|
||||
|
||||
def _start_diagnostic(self, name: str, appid: str = "") -> None:
|
||||
from ..core import diagnostic, steam
|
||||
|
||||
if diagnostic.is_running():
|
||||
QMessageBox.information(
|
||||
self, "RigDoctor",
|
||||
"A capture is already running — finish or discard it first.")
|
||||
return
|
||||
|
||||
# Tell the user what the flow actually is, and offer to launch the game for them.
|
||||
box = QMessageBox(self)
|
||||
box.setIcon(QMessageBox.Icon.Information)
|
||||
box.setWindowTitle(f"Run Diagnostic — {name}")
|
||||
box.setText(f"Record a focused diagnostic while you play {name}?")
|
||||
box.setInformativeText(
|
||||
"RigDoctor will capture sensors in the background. Then:\n\n"
|
||||
"1. Play the game and try to reproduce the freeze / black screen / crash.\n"
|
||||
"2. When you're done — or after a hard freeze and reboot — come back here and "
|
||||
"click “Finish & analyze”.\n\n"
|
||||
"Your readings are saved continuously, so even a hard lock won't lose them."
|
||||
)
|
||||
launch_btn = box.addButton("Launch game && start", QMessageBox.ButtonRole.AcceptRole)
|
||||
start_btn = box.addButton("Start without launching", QMessageBox.ButtonRole.ActionRole)
|
||||
box.addButton("Cancel", QMessageBox.ButtonRole.RejectRole)
|
||||
if not appid:
|
||||
launch_btn.setEnabled(False) # no appid → can't ask Steam to launch it
|
||||
box.exec()
|
||||
clicked = box.clickedButton()
|
||||
if clicked not in (launch_btn, start_btn):
|
||||
return
|
||||
|
||||
if diagnostic.start(game=name) is None:
|
||||
QMessageBox.warning(self, "RigDoctor", "Couldn't start the capture.")
|
||||
return
|
||||
launched = steam.launch_game(appid) if clicked is launch_btn else False
|
||||
self._diag_game = name
|
||||
self._finish_btn.setEnabled(True)
|
||||
self._discard_btn.setEnabled(True)
|
||||
self._banner.show()
|
||||
self._diag_timer.start()
|
||||
self._poll_diag()
|
||||
if clicked is launch_btn and not launched:
|
||||
QMessageBox.information(
|
||||
self, "RigDoctor",
|
||||
"Recording started, but couldn't launch the game automatically — "
|
||||
"launch it yourself, then click “Finish & analyze” when you're done.")
|
||||
|
||||
def _poll_diag(self) -> None:
|
||||
from ..core import diagnostic
|
||||
|
||||
status = diagnostic.active()
|
||||
if not status:
|
||||
self._diag_timer.stop() # recorder exited on its own
|
||||
return
|
||||
samples = status.get("samples", 0)
|
||||
lost = " · ⚠ GPU-lost detected" if status.get("gpu_lost") else ""
|
||||
game = status.get("game") or self._diag_game or "your game"
|
||||
self._banner_label.setText(
|
||||
f"● Recording {game} — play it and reproduce the problem, then click "
|
||||
f"“Finish & analyze”. ({samples} samples{lost})"
|
||||
)
|
||||
|
||||
def _finish_diagnostic(self) -> None:
|
||||
self._diag_timer.stop()
|
||||
self._finish_btn.setEnabled(False)
|
||||
self._discard_btn.setEnabled(False)
|
||||
self._banner_label.setText("Analyzing… (running the health report)")
|
||||
threading.Thread(target=self._work_finish, daemon=True).start()
|
||||
|
||||
def _work_finish(self) -> None:
|
||||
from ..core import diagnostic
|
||||
|
||||
try:
|
||||
result = diagnostic.finish()
|
||||
except Exception:
|
||||
result = None
|
||||
self._diag_done.emit(result)
|
||||
|
||||
def _on_diag_done(self, result) -> None:
|
||||
self._banner.hide()
|
||||
self._crash_banner.hide()
|
||||
self._finish_btn.setEnabled(True)
|
||||
self._discard_btn.setEnabled(True)
|
||||
self._analyze_btn.setEnabled(True)
|
||||
if result is None:
|
||||
QMessageBox.warning(self, "RigDoctor", "The diagnostic couldn't be analyzed.")
|
||||
return
|
||||
DiagnosticDialog(result, self).exec()
|
||||
|
||||
def _discard_diagnostic(self) -> None:
|
||||
from ..core import reccontrol
|
||||
|
||||
self._diag_timer.stop()
|
||||
reccontrol.stop_background()
|
||||
self._banner.hide()
|
||||
|
||||
def _add_custom_game(self) -> None:
|
||||
"""Manually add a game no launcher reports (e.g. SPT): name + an optional launch
|
||||
command/script (so it can be launched under crash-capture) and log folder."""
|
||||
from ..core import customgames
|
||||
|
||||
dlg = QDialog(self)
|
||||
dlg.setWindowTitle("Add game")
|
||||
dlg.setMinimumWidth(560)
|
||||
v = QVBoxLayout(dlg)
|
||||
v.setContentsMargins(20, 18, 20, 16)
|
||||
v.setSpacing(10)
|
||||
|
||||
intro = QLabel(
|
||||
"Add a game no launcher reports — a standalone mod launcher like SPT, an itch.io "
|
||||
"download, or any hand-installed game.")
|
||||
intro.setWordWrap(True)
|
||||
v.addWidget(intro)
|
||||
|
||||
name_edit = QLineEdit()
|
||||
name_edit.setPlaceholderText("SPT")
|
||||
v.addWidget(QLabel("Game name"))
|
||||
v.addWidget(name_edit)
|
||||
|
||||
cmd_edit = QLineEdit()
|
||||
cmd_edit.setPlaceholderText("e.g. /run/media/.../Escape-From-Tarkov/tarkov.sh")
|
||||
cmd_row = QHBoxLayout()
|
||||
cmd_row.addWidget(cmd_edit, 1)
|
||||
cmd_browse = QPushButton("Browse…")
|
||||
cmd_row.addWidget(cmd_browse, 0)
|
||||
v.addWidget(QLabel("Launch command / script (optional — enables launch + auto-capture)"))
|
||||
v.addLayout(cmd_row)
|
||||
|
||||
log_edit = QLineEdit()
|
||||
log_edit.setPlaceholderText("auto-detected from the script's folder (its logs/ subfolder)")
|
||||
log_row = QHBoxLayout()
|
||||
log_row.addWidget(log_edit, 1)
|
||||
log_browse = QPushButton("Browse…")
|
||||
log_row.addWidget(log_browse, 0)
|
||||
v.addWidget(QLabel("Log folder (optional — read into crash diagnostics)"))
|
||||
v.addLayout(log_row)
|
||||
|
||||
def _pick_command() -> None:
|
||||
path, _ = QFileDialog.getOpenFileName(dlg, "Select the launch script/executable")
|
||||
if path:
|
||||
cmd_edit.setText(path)
|
||||
|
||||
def _pick_logdir() -> None:
|
||||
path = QFileDialog.getExistingDirectory(dlg, "Select the game's log folder")
|
||||
if path:
|
||||
log_edit.setText(path)
|
||||
|
||||
cmd_browse.clicked.connect(_pick_command)
|
||||
log_browse.clicked.connect(_pick_logdir)
|
||||
|
||||
buttons = QHBoxLayout()
|
||||
buttons.addStretch(1)
|
||||
cancel = QPushButton("Cancel")
|
||||
cancel.clicked.connect(dlg.reject)
|
||||
buttons.addWidget(cancel)
|
||||
add = QPushButton("Add")
|
||||
add.setObjectName("PrimaryButton")
|
||||
add.setDefault(True)
|
||||
add.clicked.connect(dlg.accept)
|
||||
buttons.addWidget(add)
|
||||
v.addLayout(buttons)
|
||||
|
||||
if dlg.exec() != QDialog.DialogCode.Accepted:
|
||||
return
|
||||
name = name_edit.text().strip()
|
||||
if not name:
|
||||
return
|
||||
if customgames.add(name, command=cmd_edit.text().strip() or None,
|
||||
logdir=log_edit.text().strip() or None):
|
||||
self.refresh()
|
||||
else:
|
||||
QMessageBox.information(self, "Add game", f"'{name}' is already in your games.")
|
||||
|
||||
def _show_autocapture(self) -> None:
|
||||
from ..core import wrap
|
||||
|
||||
option = wrap.launch_option()
|
||||
dlg = QDialog(self)
|
||||
dlg.setWindowTitle("Auto-capture in Steam")
|
||||
dlg.resize(580, 250)
|
||||
v = QVBoxLayout(dlg)
|
||||
v.setContentsMargins(20, 18, 20, 16)
|
||||
v.setSpacing(12)
|
||||
info = QLabel(
|
||||
"Capture automatically every time you launch a game — no need to click "
|
||||
"Run Diagnostic.\n\n"
|
||||
"1. In Steam, right-click the game → Properties → Launch Options.\n"
|
||||
"2. Paste the line below.\n\n"
|
||||
"RigDoctor starts a focused capture when the game launches and stops it on exit. "
|
||||
"If the game hard-freezes, you'll get a crash report next time you open RigDoctor."
|
||||
)
|
||||
info.setWordWrap(True)
|
||||
v.addWidget(info)
|
||||
row = QHBoxLayout()
|
||||
field = QLineEdit(option)
|
||||
field.setReadOnly(True)
|
||||
row.addWidget(field, 1)
|
||||
copy = QPushButton("Copy")
|
||||
copy.setObjectName("PrimaryButton")
|
||||
copy.clicked.connect(lambda: QApplication.clipboard().setText(option))
|
||||
row.addWidget(copy)
|
||||
v.addLayout(row)
|
||||
buttons = QHBoxLayout()
|
||||
buttons.addStretch(1)
|
||||
close = QPushButton("Close")
|
||||
close.clicked.connect(dlg.accept)
|
||||
buttons.addWidget(close)
|
||||
v.addLayout(buttons)
|
||||
dlg.exec()
|
||||
|
||||
# --- import a crash dump (.dmp) ---------------------------------------------------
|
||||
|
||||
def _refresh_import_btn(self) -> None:
|
||||
from ..core import ai
|
||||
|
||||
self._import_btn.setVisible(ai.is_configured())
|
||||
|
||||
def _import_dump(self) -> None:
|
||||
from ..core import ai
|
||||
|
||||
if not ai.is_configured():
|
||||
QMessageBox.information(
|
||||
self, "RigDoctor",
|
||||
"Set up an AI provider first (Settings → AI assistant) to analyze a crash dump.")
|
||||
return
|
||||
path, _ = QFileDialog.getOpenFileName(
|
||||
self, "Import crash dump", os.path.expanduser("~"),
|
||||
"Crash dumps (*.dmp);;All files (*)")
|
||||
if not path:
|
||||
return
|
||||
self._import_btn.setEnabled(False)
|
||||
self._status.setText("Parsing crash dump…")
|
||||
threading.Thread(target=self._work_import, args=(path,), daemon=True).start()
|
||||
|
||||
def _work_import(self, path: str) -> None:
|
||||
from ..core import minidump
|
||||
|
||||
try:
|
||||
report = minidump.parse(path) # parses + runs minidump_stackwalk if installed
|
||||
except Exception:
|
||||
report = None
|
||||
self._dump_parsed.emit(report)
|
||||
|
||||
def _on_dump_parsed(self, report) -> None:
|
||||
self._import_btn.setEnabled(True)
|
||||
self._status.setText("")
|
||||
if report is None or not report.ok:
|
||||
detail = report.error if report is not None else "Couldn't read the file."
|
||||
QMessageBox.warning(
|
||||
self, "Import crash dump", f"Couldn't analyze the dump — {detail}")
|
||||
return
|
||||
MinidumpDialog(report, self).exec()
|
||||
|
||||
# --- hard-crash recovery ----------------------------------------------------------
|
||||
|
||||
def _check_crash(self) -> None:
|
||||
from ..core import diagnostic
|
||||
|
||||
info = diagnostic.pending_crash()
|
||||
if info is None:
|
||||
self._crash_banner.hide()
|
||||
return
|
||||
game = info.game or "your last game"
|
||||
extra = " · ⚠ GPU-lost was captured" if info.gpu_lost else ""
|
||||
self._crash_label.setText(
|
||||
f"⚠ Your last diagnostic for {game} ended unexpectedly — likely a hard crash "
|
||||
f"({info.samples} samples{extra}). Analyze it to see the final readings and the "
|
||||
f"likely cause from the system logs."
|
||||
)
|
||||
self._analyze_btn.setEnabled(True)
|
||||
self._crash_banner.show()
|
||||
|
||||
def _analyze_crash(self) -> None:
|
||||
from ..core import diagnostic
|
||||
|
||||
diagnostic.acknowledge_crash() # don't prompt again for this one
|
||||
self._analyze_btn.setEnabled(False)
|
||||
self._crash_label.setText("Analyzing the crash (final readings + system logs)…")
|
||||
threading.Thread(target=self._work_analyze_crash, daemon=True).start()
|
||||
|
||||
def _work_analyze_crash(self) -> None:
|
||||
from ..core import diagnostic
|
||||
|
||||
try:
|
||||
result = diagnostic.analyze_crash()
|
||||
except Exception:
|
||||
result = None
|
||||
self._diag_done.emit(result)
|
||||
|
||||
def _dismiss_crash(self) -> None:
|
||||
from ..core import diagnostic
|
||||
|
||||
diagnostic.acknowledge_crash()
|
||||
self._crash_banner.hide()
|
||||
|
||||
# --- nav badge integration --------------------------------------------------------
|
||||
|
||||
def showEvent(self, event) -> None: # noqa: N802 (Qt override)
|
||||
# Viewing the list acknowledges the new games: clear the sidebar badge. The NEW
|
||||
# tags stay on the rows for this session so the user can still spot them.
|
||||
super().showEvent(event)
|
||||
self._refresh_import_btn() # AI may have been configured since this page was built
|
||||
if self._new_appids:
|
||||
from ..core import steam
|
||||
|
||||
threading.Thread(target=steam.acknowledge_new, daemon=True).start()
|
||||
self.new_count_changed.emit(0)
|
||||
|
||||
# Reflect a capture that's still running (e.g. started earlier, navigated back).
|
||||
from ..core import diagnostic
|
||||
|
||||
if diagnostic.is_running():
|
||||
status = diagnostic.active() or {}
|
||||
self._diag_game = status.get("game") or self._diag_game
|
||||
self._banner.show()
|
||||
if not self._diag_timer.isActive():
|
||||
self._diag_timer.start()
|
||||
else:
|
||||
self._check_crash() # re-surface an interrupted diagnostic if one is pending
|
||||
@@ -2,11 +2,6 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
|
||||
@@ -21,40 +16,7 @@ from PySide6.QtWidgets import (
|
||||
QWidget,
|
||||
)
|
||||
|
||||
from .theme import ACCENT, CRIT, GOOD, MUTED, WARN
|
||||
|
||||
_SEV = {
|
||||
"critical": ("CRITICAL", CRIT),
|
||||
"warning": ("WARNING", WARN),
|
||||
"info": ("INFO", MUTED),
|
||||
"ok": ("OK", GOOD),
|
||||
}
|
||||
|
||||
|
||||
def _finding_widget(finding) -> QFrame:
|
||||
label, color = _SEV.get(finding.severity, ("?", MUTED))
|
||||
card = QFrame()
|
||||
card.setObjectName("Card")
|
||||
v = QVBoxLayout(card)
|
||||
v.setContentsMargins(16, 12, 16, 12)
|
||||
v.setSpacing(4)
|
||||
|
||||
head = QLabel(f"{label} · {finding.category}: {finding.title}")
|
||||
head.setStyleSheet(f"color: {color}; font-weight: 700; background: transparent;")
|
||||
head.setWordWrap(True)
|
||||
v.addWidget(head)
|
||||
|
||||
if finding.detail:
|
||||
detail = QLabel(finding.detail)
|
||||
detail.setObjectName("Muted")
|
||||
detail.setWordWrap(True)
|
||||
v.addWidget(detail)
|
||||
if finding.suggestion:
|
||||
suggestion = QLabel(f"→ {finding.suggestion}")
|
||||
suggestion.setStyleSheet(f"color: {ACCENT}; background: transparent;")
|
||||
suggestion.setWordWrap(True)
|
||||
v.addWidget(suggestion)
|
||||
return card
|
||||
from .widgets import finding_card
|
||||
|
||||
|
||||
class HealthPage(QWidget):
|
||||
@@ -70,18 +32,16 @@ class HealthPage(QWidget):
|
||||
root.setSpacing(16)
|
||||
|
||||
header = QHBoxLayout()
|
||||
title = QLabel("Health")
|
||||
title = QLabel("System Health")
|
||||
title.setObjectName("PageTitle")
|
||||
header.addWidget(title)
|
||||
header.addStretch(1)
|
||||
self._status = QLabel("")
|
||||
self._status.setObjectName("Muted")
|
||||
header.addWidget(self._status)
|
||||
self._admin_btn = QPushButton("Run with admin")
|
||||
self._admin_btn.setToolTip("Run all checks with root (SMART needs it) — prompts for your password")
|
||||
self._admin_btn.clicked.connect(self._run_admin)
|
||||
self._admin_btn.setEnabled(shutil.which("pkexec") is not None)
|
||||
header.addWidget(self._admin_btn)
|
||||
self._stress_btn = QPushButton("Stress test…")
|
||||
self._stress_btn.clicked.connect(self._open_stress)
|
||||
header.addWidget(self._stress_btn)
|
||||
self._run_btn = QPushButton("Run health report")
|
||||
self._run_btn.setObjectName("PrimaryButton")
|
||||
self._run_btn.clicked.connect(self._run)
|
||||
@@ -102,6 +62,11 @@ class HealthPage(QWidget):
|
||||
|
||||
QTimer.singleShot(300, self._run) # auto-run shortly after the window opens
|
||||
|
||||
def _open_stress(self) -> None:
|
||||
from .stress_dialog import StressDialog
|
||||
|
||||
StressDialog(self).exec()
|
||||
|
||||
def _run(self) -> None:
|
||||
self._run_btn.setEnabled(False)
|
||||
self._status.setText("Scanning logs, SMART, and driver…")
|
||||
@@ -116,32 +81,10 @@ class HealthPage(QWidget):
|
||||
findings = []
|
||||
self._result.emit(findings)
|
||||
|
||||
def _run_admin(self) -> None:
|
||||
self._run_btn.setEnabled(False)
|
||||
self._admin_btn.setEnabled(False)
|
||||
self._status.setText("Running all checks with admin (you'll be prompted)…")
|
||||
threading.Thread(target=self._work_admin, daemon=True).start()
|
||||
|
||||
def _work_admin(self) -> None:
|
||||
from ..core.health import Finding
|
||||
|
||||
cli = os.path.join(os.path.dirname(sys.executable), "rigdoctor")
|
||||
if os.path.exists(cli):
|
||||
cmd = ["pkexec", cli, "report", "--json"]
|
||||
else: # dev / not on PATH next to python
|
||||
cmd = ["pkexec", sys.executable, "-m", "rigdoctor", "report", "--json"]
|
||||
try:
|
||||
proc = subprocess.run(cmd, capture_output=True, text=True, timeout=180)
|
||||
findings = [Finding(**d) for d in json.loads(proc.stdout)] if proc.returncode == 0 else None
|
||||
except Exception:
|
||||
findings = None # pkexec cancelled / failed / unparsable
|
||||
self._result.emit(findings)
|
||||
|
||||
def _render_findings(self, findings) -> None:
|
||||
self._run_btn.setEnabled(True)
|
||||
self._admin_btn.setEnabled(shutil.which("pkexec") is not None)
|
||||
if findings is None: # elevated run cancelled/failed — keep current results
|
||||
self._status.setText("admin run cancelled")
|
||||
if findings is None: # collection failed — keep current results
|
||||
self._status.setText("check failed")
|
||||
return
|
||||
|
||||
while self._list.count():
|
||||
@@ -157,5 +100,5 @@ class HealthPage(QWidget):
|
||||
f"{time.strftime('%H:%M:%S')}"
|
||||
)
|
||||
for finding in findings:
|
||||
self._list.addWidget(_finding_widget(finding))
|
||||
self._list.addWidget(finding_card(finding))
|
||||
self._list.addStretch(1)
|
||||
|
||||
@@ -2,11 +2,7 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import threading
|
||||
|
||||
from PySide6.QtCore import Qt, QTimer, Signal
|
||||
@@ -24,7 +20,6 @@ from PySide6.QtWidgets import (
|
||||
)
|
||||
|
||||
from ..core import inventory
|
||||
from .theme import MUTED
|
||||
|
||||
|
||||
def _section_card(section) -> QFrame:
|
||||
@@ -73,11 +68,6 @@ class InventoryPage(QWidget):
|
||||
self._status = QLabel("")
|
||||
self._status.setObjectName("Muted")
|
||||
header.addWidget(self._status)
|
||||
self._admin_btn = QPushButton("Run with admin")
|
||||
self._admin_btn.setToolTip("Re-collect with root for motherboard/BIOS/RAM details (dmidecode)")
|
||||
self._admin_btn.setEnabled(shutil.which("pkexec") is not None)
|
||||
self._admin_btn.clicked.connect(self._run_admin)
|
||||
header.addWidget(self._admin_btn)
|
||||
self._copy_btn = QPushButton("Copy Markdown")
|
||||
self._copy_btn.clicked.connect(self._copy)
|
||||
header.addWidget(self._copy_btn)
|
||||
@@ -90,7 +80,7 @@ class InventoryPage(QWidget):
|
||||
header.addWidget(self._refresh_btn)
|
||||
root.addLayout(header)
|
||||
|
||||
scroll = QScrollArea()
|
||||
self._scroll = scroll = QScrollArea()
|
||||
scroll.setWidgetResizable(True)
|
||||
scroll.setFrameShape(QFrame.Shape.NoFrame)
|
||||
scroll.setStyleSheet("background: transparent;")
|
||||
@@ -115,34 +105,23 @@ class InventoryPage(QWidget):
|
||||
sections = []
|
||||
self._result.emit(sections)
|
||||
|
||||
def _run_admin(self) -> None:
|
||||
self._busy("Collecting with admin (you'll be prompted)…")
|
||||
threading.Thread(target=self._work_admin, daemon=True).start()
|
||||
|
||||
def _work_admin(self) -> None:
|
||||
cli = os.path.join(os.path.dirname(sys.executable), "rigdoctor")
|
||||
cmd = [cli, "inventory", "--json"] if os.path.exists(cli) else [sys.executable, "-m", "rigdoctor", "inventory", "--json"]
|
||||
try:
|
||||
proc = subprocess.run(["pkexec", *cmd], capture_output=True, text=True, timeout=120)
|
||||
sections = inventory.from_dict(json.loads(proc.stdout)) if proc.returncode == 0 else None
|
||||
except Exception:
|
||||
sections = None
|
||||
self._result.emit(sections)
|
||||
|
||||
def _busy(self, text: str) -> None:
|
||||
self._status.setText(text)
|
||||
for b in (self._refresh_btn, self._admin_btn, self._copy_btn, self._save_btn):
|
||||
for b in (self._refresh_btn, self._copy_btn, self._save_btn):
|
||||
b.setEnabled(False)
|
||||
|
||||
def _render(self, sections) -> None:
|
||||
self._refresh_btn.setEnabled(True)
|
||||
self._admin_btn.setEnabled(shutil.which("pkexec") is not None)
|
||||
self._copy_btn.setEnabled(True)
|
||||
self._save_btn.setEnabled(True)
|
||||
if sections is None: # admin run cancelled/failed — keep current
|
||||
self._status.setText("admin run cancelled")
|
||||
if sections is None: # collection failed — keep current
|
||||
self._status.setText("collection failed")
|
||||
return
|
||||
if sections == self._sections: # unchanged — don't rebuild (would jump scroll)
|
||||
self._status.setText("")
|
||||
return
|
||||
|
||||
scroll_pos = self._scroll.verticalScrollBar().value()
|
||||
self._sections = sections
|
||||
while self._list.count():
|
||||
item = self._list.takeAt(0)
|
||||
@@ -153,6 +132,8 @@ class InventoryPage(QWidget):
|
||||
self._list.addWidget(_section_card(section))
|
||||
self._list.addStretch(1)
|
||||
self._status.setText("")
|
||||
# restore scroll after the layout settles so re-renders don't yank to the top
|
||||
QTimer.singleShot(0, lambda: self._scroll.verticalScrollBar().setValue(scroll_pos))
|
||||
|
||||
def _copy(self) -> None:
|
||||
if self._sections:
|
||||
|
||||
@@ -2,12 +2,14 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import html
|
||||
import os
|
||||
import sys
|
||||
import threading
|
||||
from pathlib import Path
|
||||
|
||||
from PySide6.QtCore import Qt, QProcess, QTimer, Signal
|
||||
from PySide6.QtGui import QTextDocument
|
||||
from PySide6.QtGui import QIcon, QTextDocument
|
||||
from PySide6.QtWidgets import (
|
||||
QApplication,
|
||||
QButtonGroup,
|
||||
@@ -18,7 +20,9 @@ from PySide6.QtWidgets import (
|
||||
QMainWindow,
|
||||
QMessageBox,
|
||||
QPushButton,
|
||||
QScrollArea,
|
||||
QStackedWidget,
|
||||
QSystemTrayIcon,
|
||||
QTextEdit,
|
||||
QVBoxLayout,
|
||||
QWidget,
|
||||
@@ -26,31 +30,54 @@ from PySide6.QtWidgets import (
|
||||
|
||||
from .. import __version__
|
||||
from ..config import load_config
|
||||
from ..core import updates
|
||||
from ..core import alerts, elevation, updates
|
||||
from .dashboard import Dashboard
|
||||
from .environment_page import EnvironmentPage
|
||||
from .games_page import GamesPage
|
||||
from .health_page import HealthPage
|
||||
from .inventory_page import InventoryPage
|
||||
from .recorder_page import RecorderPage
|
||||
from .setup_page import SetupPage
|
||||
from .theme import ACCENT, GOOD, MUTED
|
||||
from .share_page import SharePage
|
||||
from .theme import ACCENT, CRIT, GOOD, MUTED, TEXT
|
||||
from .tray import TrayIcon
|
||||
from .worker import SamplerWorker
|
||||
|
||||
_NAV_ITEMS = ["Dashboard", "Logs", "Health", "Setup", "Inventory"]
|
||||
# Sidebar grouped by intent. Each page name maps to a widget built in __init__; the stack is
|
||||
# filled in this order, so _PAGES.index(name) is the stack index.
|
||||
_NAV = [
|
||||
("Monitor", ["Dashboard"]),
|
||||
("Diagnose", ["Games", "Recordings", "System Health", "Tuning"]),
|
||||
("System", ["Inventory"]),
|
||||
("App", ["Settings", "Share"]),
|
||||
]
|
||||
_PAGES = [name for _section, names in _NAV for name in names]
|
||||
# Pages that manage their own scrolling (pinned header + inner scroll) or must fill the
|
||||
# viewport (the Share terminal) — these are added to the stack as-is; every other page is
|
||||
# wrapped in a QScrollArea so it scrolls when too tall and doesn't pin the window's height.
|
||||
_NO_WRAP = {"Dashboard", "System Health", "Inventory", "Share"}
|
||||
_ICON = Path(__file__).parent / "assets" / "rigdoctor.svg"
|
||||
|
||||
|
||||
class MainWindow(QMainWindow):
|
||||
_update_checked = Signal(object) # (state, tag, notes)
|
||||
_update_applied = Signal(int) # pip exit code
|
||||
_changelog_ready = Signal(object) # ([(tag, date, notes)], error)
|
||||
_elevated = Signal() # privileged data collected at launch
|
||||
|
||||
def __init__(self, interval: float = 1.0) -> None:
|
||||
super().__init__()
|
||||
self.setWindowTitle("RigDoctor")
|
||||
self.resize(1000, 680)
|
||||
cfg = load_config()
|
||||
|
||||
central = QWidget()
|
||||
self.setCentralWidget(central)
|
||||
layout = QHBoxLayout(central)
|
||||
outer = QVBoxLayout(central)
|
||||
outer.setContentsMargins(0, 0, 0, 0)
|
||||
outer.setSpacing(0)
|
||||
body = QWidget()
|
||||
layout = QHBoxLayout(body)
|
||||
layout.setContentsMargins(0, 0, 0, 0)
|
||||
layout.setSpacing(0)
|
||||
|
||||
@@ -63,22 +90,54 @@ class MainWindow(QMainWindow):
|
||||
self.dashboard = Dashboard()
|
||||
self.recorder_page = RecorderPage()
|
||||
self.health_page = HealthPage()
|
||||
self.setup_page = SetupPage()
|
||||
self.games_page = GamesPage()
|
||||
self.games_page.new_count_changed.connect(self._set_games_badge)
|
||||
self.environment_page = EnvironmentPage()
|
||||
self.inventory_page = InventoryPage()
|
||||
self._stack.addWidget(self.dashboard) # 0 Dashboard
|
||||
self._stack.addWidget(self.recorder_page) # 1 Logs
|
||||
self._stack.addWidget(self.health_page) # 2 Health
|
||||
self._stack.addWidget(self.setup_page) # 3 Setup
|
||||
self._stack.addWidget(self.inventory_page) # 4 Inventory
|
||||
self.setup_page = SetupPage()
|
||||
self.setup_page.changed.connect(self._apply_alert_settings)
|
||||
self.share_page = SharePage()
|
||||
# Page name → widget; the stack is filled in _PAGES order so indices line up.
|
||||
self._pages = {
|
||||
"Dashboard": self.dashboard,
|
||||
"Games": self.games_page,
|
||||
"Recordings": self.recorder_page,
|
||||
"System Health": self.health_page,
|
||||
"Tuning": self.environment_page,
|
||||
"Inventory": self.inventory_page,
|
||||
"Settings": self.setup_page,
|
||||
"Share": self.share_page,
|
||||
}
|
||||
for name in _PAGES:
|
||||
page = self._pages[name]
|
||||
self._stack.addWidget(page if name in _NO_WRAP else self._scrollable(page))
|
||||
content_layout.addWidget(self._stack)
|
||||
|
||||
layout.addWidget(self._build_sidebar())
|
||||
layout.addWidget(content, 1)
|
||||
outer.addWidget(body, 1)
|
||||
outer.addWidget(self._build_footer())
|
||||
|
||||
self._worker = SamplerWorker(interval=interval)
|
||||
self._worker.sampled.connect(self.dashboard.update_sample)
|
||||
# Desktop alerts (M8): overheat / GPU-lost from the sample stream, new-version below.
|
||||
# Configurable on the Notifications page; gated by AlertMonitor.enabled.
|
||||
self._notified_update_tag = None
|
||||
self._alert_monitor = alerts.AlertMonitor(
|
||||
gpu_temp=float(cfg.get("gpu_temp_alert", 90.0)),
|
||||
cpu_temp=float(cfg.get("cpu_temp_alert", 95.0)),
|
||||
)
|
||||
self._alert_monitor.enabled = bool(cfg.get("alerts_enabled", True))
|
||||
self._worker.sampled.connect(self._alert_monitor.check)
|
||||
self._worker.start()
|
||||
|
||||
# Ask for the password once at launch and collect root-only data (SMART +
|
||||
# dmidecode); Health/Inventory then always show the full picture (config:
|
||||
# elevate_on_launch). Falls back silently to non-root if cancelled/unavailable.
|
||||
if cfg.get("elevate_on_launch", True) and elevation.available():
|
||||
self._elevated.connect(self._on_elevated)
|
||||
threading.Thread(target=self._collect_privileged, daemon=True).start()
|
||||
|
||||
# Update check (M13): once at launch, then periodically so a newly published
|
||||
# release is detected without restarting (interval from config; 0 disables).
|
||||
self._latest_tag = None
|
||||
@@ -88,13 +147,37 @@ class MainWindow(QMainWindow):
|
||||
self._update_applied.connect(self._on_update_applied)
|
||||
self._changelog_ready.connect(self._on_changelog)
|
||||
self._start_update_check()
|
||||
minutes = float(load_config().get("update_check_minutes", 30) or 0)
|
||||
minutes = float(cfg.get("update_check_minutes", 30) or 0)
|
||||
if minutes > 0:
|
||||
self._update_timer = QTimer(self)
|
||||
self._update_timer.setInterval(int(minutes * 60_000))
|
||||
self._update_timer.timeout.connect(self._start_update_check)
|
||||
self._update_timer.start()
|
||||
|
||||
# Reflect any capture (manual, diagnostic, or the Steam wrapper) in the sidebar on
|
||||
# every page, so it's always clear when RigDoctor is recording and for which game.
|
||||
self._rec_timer = QTimer(self)
|
||||
self._rec_timer.setInterval(1500)
|
||||
self._rec_timer.timeout.connect(self._update_recording)
|
||||
self._rec_timer.start()
|
||||
self._update_recording()
|
||||
|
||||
# System-tray applet (M11) — optional; only when the desktop offers a tray. When
|
||||
# present, closing the window hides to the tray instead of quitting.
|
||||
self._tray = None
|
||||
self._quitting = False
|
||||
self._tray_hint_shown = False
|
||||
if QSystemTrayIcon.isSystemTrayAvailable():
|
||||
icon = self.windowIcon() if not self.windowIcon().isNull() else QIcon(str(_ICON))
|
||||
self._tray = TrayIcon(
|
||||
self, icon,
|
||||
gpu_alert=float(cfg.get("gpu_temp_alert", 90.0)),
|
||||
cpu_alert=float(cfg.get("cpu_temp_alert", 95.0)),
|
||||
)
|
||||
self._worker.sampled.connect(self._tray.update_sample)
|
||||
self._tray.show()
|
||||
QApplication.instance().setQuitOnLastWindowClosed(False)
|
||||
|
||||
def _build_sidebar(self) -> QFrame:
|
||||
bar = QFrame()
|
||||
bar.setObjectName("Sidebar")
|
||||
@@ -109,26 +192,42 @@ class MainWindow(QMainWindow):
|
||||
subtitle.setObjectName("AppSubtitle")
|
||||
v.addWidget(title)
|
||||
v.addWidget(subtitle)
|
||||
|
||||
# Global recording indicator — visible on every page while a capture runs.
|
||||
self._rec_indicator = QLabel()
|
||||
self._rec_indicator.setWordWrap(True)
|
||||
self._rec_indicator.setTextFormat(Qt.TextFormat.RichText)
|
||||
self._rec_indicator.setStyleSheet(
|
||||
f"background: #241316; border: 1px solid {CRIT}; border-radius: 8px; padding: 8px 10px;"
|
||||
)
|
||||
self._rec_indicator.hide()
|
||||
v.addSpacing(12)
|
||||
v.addWidget(self._rec_indicator)
|
||||
v.addSpacing(18)
|
||||
|
||||
group = QButtonGroup(self)
|
||||
group.setExclusive(True)
|
||||
for i, name in enumerate(_NAV_ITEMS):
|
||||
btn = QPushButton(name)
|
||||
btn.setObjectName("NavButton")
|
||||
btn.setCheckable(True)
|
||||
btn.setCursor(Qt.CursorShape.PointingHandCursor)
|
||||
btn.setChecked(i == 0)
|
||||
btn.clicked.connect(lambda _checked, idx=i: self._stack.setCurrentIndex(idx))
|
||||
group.addButton(btn, i)
|
||||
v.addWidget(btn)
|
||||
self._nav_buttons: dict[str, QPushButton] = {}
|
||||
for section, names in _NAV:
|
||||
header = QLabel(section.upper())
|
||||
header.setObjectName("NavSection")
|
||||
v.addSpacing(8)
|
||||
v.addWidget(header)
|
||||
for name in names:
|
||||
idx = _PAGES.index(name)
|
||||
btn = QPushButton(name)
|
||||
btn.setObjectName("NavButton")
|
||||
btn.setCheckable(True)
|
||||
btn.setCursor(Qt.CursorShape.PointingHandCursor)
|
||||
btn.setChecked(idx == 0)
|
||||
btn.clicked.connect(lambda _checked, i=idx: self._stack.setCurrentIndex(i))
|
||||
group.addButton(btn, idx)
|
||||
v.addWidget(btn)
|
||||
self._nav_buttons[name] = btn
|
||||
|
||||
v.addStretch(1)
|
||||
live = QLabel(f'<span style="color:{ACCENT};">●</span> <span style="color:{MUTED};">Live</span>')
|
||||
v.addWidget(live)
|
||||
version = QLabel(f"v{__version__}")
|
||||
version.setObjectName("Muted")
|
||||
v.addWidget(version)
|
||||
changelog_btn = QPushButton("Changelog")
|
||||
changelog_btn.setObjectName("LinkButton")
|
||||
changelog_btn.setCursor(Qt.CursorShape.PointingHandCursor)
|
||||
@@ -158,6 +257,27 @@ class MainWindow(QMainWindow):
|
||||
v.addWidget(self._restart_btn)
|
||||
return bar
|
||||
|
||||
def _scrollable(self, page: QWidget) -> QScrollArea:
|
||||
"""Wrap a page so it scrolls when taller than the window — and so the window can shrink
|
||||
below the page's natural height instead of being pinned to it."""
|
||||
area = QScrollArea()
|
||||
area.setWidget(page)
|
||||
area.setWidgetResizable(True)
|
||||
area.setFrameShape(QFrame.Shape.NoFrame)
|
||||
area.setHorizontalScrollBarPolicy(Qt.ScrollBarPolicy.ScrollBarAlwaysOff)
|
||||
return area
|
||||
|
||||
def _build_footer(self) -> QFrame:
|
||||
bar = QFrame()
|
||||
bar.setObjectName("Footer")
|
||||
h = QHBoxLayout(bar)
|
||||
h.setContentsMargins(14, 5, 16, 5)
|
||||
h.addStretch(1)
|
||||
version = QLabel(f"RigDoctor v{__version__}")
|
||||
version.setObjectName("Muted")
|
||||
h.addWidget(version)
|
||||
return bar
|
||||
|
||||
def _restart(self) -> None:
|
||||
gui = os.path.join(os.path.dirname(sys.executable), "rigdoctor-gui")
|
||||
if os.path.exists(gui):
|
||||
@@ -169,6 +289,9 @@ class MainWindow(QMainWindow):
|
||||
def _apply_update(self) -> None:
|
||||
if not self._latest_tag:
|
||||
return
|
||||
if updates.install_kind() != "pip": # apt/source: can't pip-update — show the command
|
||||
QMessageBox.information(self, "Update RigDoctor", updates.update_hint())
|
||||
return
|
||||
box = QMessageBox(self)
|
||||
box.setWindowTitle(f"Update to {self._latest_tag}")
|
||||
box.setText(f"Update RigDoctor to {self._latest_tag}?")
|
||||
@@ -196,6 +319,83 @@ class MainWindow(QMainWindow):
|
||||
self._update_label.setText("update failed")
|
||||
self._update_btn.setEnabled(True)
|
||||
|
||||
def _collect_privileged(self) -> None:
|
||||
data = elevation.collect_via_pkexec()
|
||||
if data is not None:
|
||||
elevation.set_privileged(data)
|
||||
self._elevated.emit()
|
||||
|
||||
def _on_elevated(self) -> None:
|
||||
# Re-run Health + Inventory now that root-only data is available (SMART for Health,
|
||||
# dmidecode motherboard/BIOS/RAM for Inventory).
|
||||
self.health_page._run()
|
||||
self.inventory_page._run()
|
||||
|
||||
# --- tray-driven actions (M11) ----------------------------------------------------
|
||||
|
||||
def show_page(self, name: str) -> None:
|
||||
"""Bring the window forward on a given page (used by the tray)."""
|
||||
if name in self._nav_buttons:
|
||||
self._stack.setCurrentIndex(_PAGES.index(name))
|
||||
self._nav_buttons[name].setChecked(True)
|
||||
self.showNormal()
|
||||
self.raise_()
|
||||
self.activateWindow()
|
||||
|
||||
def show_dashboard(self) -> None:
|
||||
self.show_page("Dashboard")
|
||||
|
||||
def tray_available(self) -> bool:
|
||||
return self._tray is not None
|
||||
|
||||
def start_minimized_note(self) -> None:
|
||||
"""Started hidden to the tray (autostart) — let the user know it's there."""
|
||||
if self._tray is not None:
|
||||
self._tray_hint_shown = True
|
||||
self._tray.showMessage(
|
||||
"RigDoctor", "Running in the tray — right-click the icon for actions.",
|
||||
QSystemTrayIcon.MessageIcon.Information, 4000,
|
||||
)
|
||||
|
||||
def run_diagnostic(self, name: str, appid: str) -> None:
|
||||
self.show_page("Games")
|
||||
self.games_page._start_diagnostic(name, appid)
|
||||
|
||||
def quit_app(self) -> None:
|
||||
self._quitting = True
|
||||
self._worker.stop()
|
||||
self.share_page.shutdown()
|
||||
if self._tray is not None:
|
||||
self._tray.hide()
|
||||
QApplication.instance().quit()
|
||||
|
||||
def _update_recording(self) -> None:
|
||||
from ..core import diagnostic
|
||||
|
||||
status = diagnostic.active()
|
||||
if not status:
|
||||
self._rec_indicator.hide()
|
||||
return
|
||||
game = status.get("game")
|
||||
lines = [f"<span style='color:{CRIT};'>●</span> <b style='color:{TEXT};'>Recording</b>"]
|
||||
if game:
|
||||
lines.append(f"<span style='color:{TEXT};'>{html.escape(str(game))}</span>")
|
||||
if status.get("gpu_lost"):
|
||||
lines.append(f"<span style='color:{CRIT};'>⚠ GPU-lost</span>")
|
||||
self._rec_indicator.setText("<br>".join(lines))
|
||||
self._rec_indicator.show()
|
||||
|
||||
def _set_games_badge(self, count: int) -> None:
|
||||
btn = self._nav_buttons.get("Games")
|
||||
if btn is not None:
|
||||
btn.setText(f"Games ● {count}" if count > 0 else "Games")
|
||||
|
||||
def _apply_alert_settings(self) -> None:
|
||||
cfg = load_config()
|
||||
self._alert_monitor.enabled = bool(cfg.get("alerts_enabled", True))
|
||||
self._alert_monitor.gpu_temp = float(cfg.get("gpu_temp_alert", 90.0))
|
||||
self._alert_monitor.cpu_temp = float(cfg.get("cpu_temp_alert", 95.0))
|
||||
|
||||
def _manual_check(self) -> None:
|
||||
if self._applied:
|
||||
return
|
||||
@@ -257,11 +457,28 @@ class MainWindow(QMainWindow):
|
||||
self._update_label.setText("update check unavailable")
|
||||
elif state == updates.AVAILABLE:
|
||||
self._update_label.setText(f'<span style="color:{GOOD};">{tag} available</span>')
|
||||
self._update_btn.setText(f"Update to {tag}")
|
||||
self._update_btn.setText(f"Update to {tag}" if updates.install_kind() == "pip" else "How to update")
|
||||
self._update_btn.setVisible(True)
|
||||
if self._alert_monitor.enabled and tag != self._notified_update_tag:
|
||||
self._notified_update_tag = tag # once per version, not every poll
|
||||
alerts.notify("Update available", f"RigDoctor {tag} is available — open RigDoctor to update.")
|
||||
else: # UP_TO_DATE
|
||||
self._update_label.setText("up-to-date")
|
||||
|
||||
def closeEvent(self, event) -> None: # noqa: N802 (Qt override)
|
||||
# With a tray, closing the window hides it (the app keeps running for the tray
|
||||
# readouts + any capture); Quit from the tray menu exits for real.
|
||||
if self._tray is not None and not self._quitting:
|
||||
event.ignore()
|
||||
self.hide()
|
||||
if not self._tray_hint_shown:
|
||||
self._tray_hint_shown = True
|
||||
self._tray.showMessage(
|
||||
"RigDoctor",
|
||||
"Still running in the tray — right-click the icon for actions or Quit.",
|
||||
QSystemTrayIcon.MessageIcon.Information, 5000,
|
||||
)
|
||||
return
|
||||
self._worker.stop()
|
||||
self.share_page.shutdown()
|
||||
super().closeEvent(event)
|
||||
|
||||
@@ -0,0 +1,182 @@
|
||||
"""Results view for an imported crash dump (.dmp, M14): parsed summary + AI explanation.
|
||||
|
||||
Mirrors :class:`DiagnosticDialog` — the same opt-in, streamed "Explain with AI" flow (D24),
|
||||
applied to a Windows minidump parsed by :mod:`core.minidump` instead of a sensor capture.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import threading
|
||||
from pathlib import Path
|
||||
|
||||
from PySide6.QtCore import Qt, Signal
|
||||
from PySide6.QtGui import QFont, QTextCursor
|
||||
from PySide6.QtWidgets import (
|
||||
QDialog,
|
||||
QFrame,
|
||||
QHBoxLayout,
|
||||
QLabel,
|
||||
QMessageBox,
|
||||
QPushButton,
|
||||
QScrollArea,
|
||||
QTextEdit,
|
||||
QVBoxLayout,
|
||||
QWidget,
|
||||
)
|
||||
|
||||
from ..core import minidump
|
||||
from .widgets import finding_card
|
||||
|
||||
|
||||
class MinidumpDialog(QDialog):
|
||||
_chunk = Signal(str) # streamed token delta (worker thread -> GUI)
|
||||
_explained = Signal(object) # (ok, full_text) when the AI stream finishes
|
||||
|
||||
def __init__(self, report: minidump.MinidumpReport, parent=None) -> None:
|
||||
super().__init__(parent)
|
||||
self._report = report
|
||||
self._stream_view = None
|
||||
self._stream_status = None
|
||||
self._chunk.connect(self._on_chunk)
|
||||
self._explained.connect(self._on_explained)
|
||||
name = Path(report.path).name
|
||||
self.setWindowTitle(f"Crash dump — {name}")
|
||||
self.resize(660, 680)
|
||||
|
||||
root = QVBoxLayout(self)
|
||||
root.setContentsMargins(20, 18, 20, 16)
|
||||
root.setSpacing(14)
|
||||
|
||||
title = QLabel(f"Crash dump — {name}")
|
||||
title.setObjectName("PageTitle")
|
||||
root.addWidget(title)
|
||||
|
||||
scroll = QScrollArea()
|
||||
scroll.setWidgetResizable(True)
|
||||
scroll.setFrameShape(QFrame.Shape.NoFrame)
|
||||
scroll.setStyleSheet("background: transparent;")
|
||||
body = QWidget()
|
||||
col = QVBoxLayout(body)
|
||||
col.setContentsMargins(0, 0, 0, 0)
|
||||
col.setSpacing(10)
|
||||
col.setAlignment(Qt.AlignmentFlag.AlignTop)
|
||||
|
||||
# Parsed summary (crash reason / faulting module / OS / CPU / modules) — monospace.
|
||||
summary_head = QLabel("Dump summary")
|
||||
summary_head.setStyleSheet("font-weight: 700; background: transparent;")
|
||||
col.addWidget(summary_head)
|
||||
summary = QLabel(minidump.to_text(report))
|
||||
summary.setObjectName("Report")
|
||||
summary.setFont(QFont("monospace"))
|
||||
summary.setTextInteractionFlags(Qt.TextInteractionFlag.TextSelectableByMouse)
|
||||
summary.setWordWrap(False)
|
||||
summary.setStyleSheet(
|
||||
"background: #0d0f13; color: #cfd3da; border: 1px solid #2a2f39; "
|
||||
"border-radius: 8px; padding: 10px;"
|
||||
)
|
||||
col.addWidget(summary)
|
||||
|
||||
findings = minidump.to_findings(report)
|
||||
find_head = QLabel(f"Findings ({len(findings)})")
|
||||
find_head.setStyleSheet("font-weight: 700; background: transparent;")
|
||||
col.addWidget(find_head)
|
||||
for finding in findings:
|
||||
col.addWidget(finding_card(finding))
|
||||
|
||||
if report.stackwalk: # only when an external stackwalker was available
|
||||
sw_head = QLabel("minidump_stackwalk output")
|
||||
sw_head.setStyleSheet("font-weight: 700; background: transparent;")
|
||||
col.addWidget(sw_head)
|
||||
sw = QTextEdit()
|
||||
sw.setObjectName("Report")
|
||||
sw.setReadOnly(True)
|
||||
sw.setFont(QFont("monospace"))
|
||||
sw.setPlainText(report.stackwalk)
|
||||
sw.setMinimumHeight(160)
|
||||
col.addWidget(sw)
|
||||
|
||||
scroll.setWidget(body)
|
||||
root.addWidget(scroll, 1)
|
||||
|
||||
buttons = QHBoxLayout()
|
||||
self._explain_btn = QPushButton("Explain with AI")
|
||||
self._explain_btn.clicked.connect(self._explain_with_ai)
|
||||
from ..core import ai
|
||||
self._explain_btn.setVisible(ai.is_configured()) # opt-in only; hidden if not set up
|
||||
buttons.addWidget(self._explain_btn)
|
||||
buttons.addStretch(1)
|
||||
close = QPushButton("Close")
|
||||
close.setObjectName("PrimaryButton")
|
||||
close.clicked.connect(self.accept)
|
||||
buttons.addWidget(close)
|
||||
root.addLayout(buttons)
|
||||
|
||||
# --- AI explanation (M14, D24) — streamed; runs only on this button press ----------
|
||||
def _explain_with_ai(self) -> None:
|
||||
from ..core import ai
|
||||
|
||||
if not ai.is_local(): # cloud provider → explicit consent before sending data
|
||||
confirm = QMessageBox.question(
|
||||
self, "Send to AI provider",
|
||||
f"This sends the parsed crash dump to {ai.provider_label()}.\n\nContinue?",
|
||||
QMessageBox.StandardButton.Yes | QMessageBox.StandardButton.No,
|
||||
QMessageBox.StandardButton.No,
|
||||
)
|
||||
if confirm != QMessageBox.StandardButton.Yes:
|
||||
return
|
||||
self._explain_btn.setEnabled(False)
|
||||
dialog = self._open_stream_dialog()
|
||||
threading.Thread(target=self._work_explain, daemon=True).start()
|
||||
dialog.exec() # streaming fills the view live via signals during this nested loop
|
||||
self._stream_view = self._stream_status = None
|
||||
self._explain_btn.setEnabled(True)
|
||||
|
||||
def _work_explain(self) -> None:
|
||||
from ..core import ai
|
||||
|
||||
text = minidump.to_ai_text(self._report)
|
||||
ok, reply = ai.explain_stream(text, on_chunk=lambda d: self._chunk.emit(d))
|
||||
self._explained.emit((ok, reply))
|
||||
|
||||
def _on_chunk(self, delta: str) -> None:
|
||||
if self._stream_view is None:
|
||||
return
|
||||
self._stream_view.moveCursor(QTextCursor.MoveOperation.End)
|
||||
self._stream_view.insertPlainText(delta) # live plain text as tokens arrive
|
||||
self._stream_view.ensureCursorVisible()
|
||||
|
||||
def _on_explained(self, result) -> None:
|
||||
ok, text = result
|
||||
if self._stream_view is not None:
|
||||
if ok:
|
||||
self._stream_view.setMarkdown(text) # re-render the finished answer as Markdown
|
||||
else:
|
||||
self._stream_view.setPlainText(f"AI explanation failed:\n\n{text}")
|
||||
if self._stream_status is not None:
|
||||
self._stream_status.setText(
|
||||
"AI-generated suggestions — verify before acting, especially anything that changes "
|
||||
"settings or data." if ok else "The request failed.")
|
||||
|
||||
def _open_stream_dialog(self) -> QDialog:
|
||||
"""A live dialog the AI streams into; finalized to rendered Markdown when done."""
|
||||
from ..core import ai
|
||||
|
||||
dlg = QDialog(self)
|
||||
dlg.setWindowTitle(f"AI explanation — {ai.provider_label()}")
|
||||
dlg.resize(620, 520)
|
||||
lay = QVBoxLayout(dlg)
|
||||
view = QTextEdit()
|
||||
view.setObjectName("Report")
|
||||
view.setReadOnly(True)
|
||||
lay.addWidget(view)
|
||||
status = QLabel("Streaming from the model…")
|
||||
status.setObjectName("Muted")
|
||||
status.setWordWrap(True)
|
||||
lay.addWidget(status)
|
||||
close = QPushButton("Close")
|
||||
close.setObjectName("PrimaryButton")
|
||||
close.clicked.connect(dlg.accept)
|
||||
lay.addWidget(close, alignment=Qt.AlignmentFlag.AlignRight)
|
||||
self._stream_view = view
|
||||
self._stream_status = status
|
||||
return dlg
|
||||
@@ -1,16 +1,19 @@
|
||||
"""Recording & Logs page (M3 in the GUI): start/stop/status + post-crash report.
|
||||
"""Recordings page (M3 in the GUI): recorder controls + view/report any captured log.
|
||||
|
||||
Drives the same background recorder as the CLI via core.reccontrol, so the GUI and
|
||||
`rigdoctor record …` are interchangeable.
|
||||
Drives the same background recorder as the CLI via core.reccontrol, and surfaces the
|
||||
captured data — the always-on log, the last guided diagnostic, and a preserved hard-crash
|
||||
(which can be analyzed in place). One place to see what was captured and what it means.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import threading
|
||||
import time
|
||||
|
||||
from PySide6.QtCore import Qt, QTimer, QUrl
|
||||
from PySide6.QtCore import Qt, QTimer, QUrl, Signal
|
||||
from PySide6.QtGui import QDesktopServices, QFont
|
||||
from PySide6.QtWidgets import (
|
||||
QComboBox,
|
||||
QDoubleSpinBox,
|
||||
QFrame,
|
||||
QHBoxLayout,
|
||||
@@ -25,6 +28,7 @@ from .. import config
|
||||
from ..core import reccontrol
|
||||
from ..core.crashlog import summarize
|
||||
from ..render import format_headline, render_summary
|
||||
from .diagnostic_dialog import DiagnosticDialog
|
||||
from .theme import GOOD, MUTED, WARN
|
||||
|
||||
|
||||
@@ -45,31 +49,30 @@ def _fmt_time(value, fmt="%Y-%m-%d %H:%M:%S") -> str:
|
||||
|
||||
|
||||
class RecorderPage(QWidget):
|
||||
_analyzed = Signal(object) # DiagnosticResult from a crash analysis
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self.setObjectName("Page")
|
||||
self._analyzed.connect(self._show_analysis)
|
||||
root = QVBoxLayout(self)
|
||||
root.setContentsMargins(20, 18, 20, 18)
|
||||
root.setSpacing(16)
|
||||
|
||||
title = QLabel("Recording")
|
||||
title = QLabel("Recordings")
|
||||
title.setObjectName("PageTitle")
|
||||
root.addWidget(title)
|
||||
|
||||
# --- Status + controls -------------------------------------------------
|
||||
status_card, status_layout = _panel("Status")
|
||||
|
||||
self._state = QLabel("○ Not recording")
|
||||
self._state.setStyleSheet(f"color: {MUTED}; font-weight: 700; background: transparent;")
|
||||
status_layout.addWidget(self._state)
|
||||
|
||||
self._info = QLabel("")
|
||||
self._info.setObjectName("Muted")
|
||||
status_layout.addWidget(self._info)
|
||||
|
||||
self._latest = QLabel("")
|
||||
status_layout.addWidget(self._latest)
|
||||
|
||||
self._warn = QLabel("")
|
||||
self._warn.setStyleSheet(f"color: {WARN}; font-weight: 600; background: transparent;")
|
||||
self._warn.setVisible(False)
|
||||
@@ -97,19 +100,20 @@ class RecorderPage(QWidget):
|
||||
status_layout.addLayout(controls)
|
||||
root.addWidget(status_card)
|
||||
|
||||
# --- Report ------------------------------------------------------------
|
||||
report_card = QFrame()
|
||||
report_card.setObjectName("Card")
|
||||
report_layout = QVBoxLayout(report_card)
|
||||
report_layout.setContentsMargins(16, 14, 16, 14)
|
||||
report_layout.setSpacing(10)
|
||||
# --- Captured logs -----------------------------------------------------
|
||||
report_card, report_layout = _panel("Captured logs")
|
||||
header = QHBoxLayout()
|
||||
report_title = QLabel("Post-crash report")
|
||||
report_title.setStyleSheet("font-weight: 700; background: transparent;")
|
||||
header.addWidget(report_title)
|
||||
header.addStretch(1)
|
||||
header.addWidget(QLabel("Show:"))
|
||||
self._source = QComboBox()
|
||||
self._source.currentIndexChanged.connect(self._load_report)
|
||||
header.addWidget(self._source, 1)
|
||||
self._analyze_btn = QPushButton("Analyze crash")
|
||||
self._analyze_btn.setObjectName("ActionButton")
|
||||
self._analyze_btn.clicked.connect(self._analyze_crash)
|
||||
self._analyze_btn.setVisible(False)
|
||||
header.addWidget(self._analyze_btn)
|
||||
refresh_btn = QPushButton("Refresh")
|
||||
refresh_btn.clicked.connect(self._load_report)
|
||||
refresh_btn.clicked.connect(self._refresh_sources)
|
||||
header.addWidget(refresh_btn)
|
||||
report_layout.addLayout(header)
|
||||
|
||||
@@ -121,13 +125,12 @@ class RecorderPage(QWidget):
|
||||
report_layout.addWidget(self._report)
|
||||
root.addWidget(report_card, 1)
|
||||
|
||||
# Poll recorder status once a second (reflects CLI-driven sessions too).
|
||||
self._timer = QTimer(self)
|
||||
self._timer.setInterval(1000)
|
||||
self._timer.timeout.connect(self._refresh_status)
|
||||
self._timer.start()
|
||||
self._refresh_status()
|
||||
self._load_report()
|
||||
self._refresh_sources()
|
||||
|
||||
# --- actions ---------------------------------------------------------------
|
||||
def _on_start(self) -> None:
|
||||
@@ -139,12 +142,56 @@ class RecorderPage(QWidget):
|
||||
self._stop_btn.setEnabled(False)
|
||||
reccontrol.stop_background()
|
||||
QTimer.singleShot(600, self._refresh_status)
|
||||
QTimer.singleShot(900, self._load_report)
|
||||
QTimer.singleShot(900, self._refresh_sources)
|
||||
|
||||
def _open_folder(self) -> None:
|
||||
config.LOG_DIR.mkdir(parents=True, exist_ok=True)
|
||||
QDesktopServices.openUrl(QUrl.fromLocalFile(str(config.LOG_DIR)))
|
||||
|
||||
# --- captured logs ---------------------------------------------------------
|
||||
def _refresh_sources(self) -> None:
|
||||
from ..core import diagnostic
|
||||
|
||||
current = self._source.currentData()
|
||||
self._source.blockSignals(True)
|
||||
self._source.clear()
|
||||
self._source.addItem("Always-on capture", str(config.LOG_FILE))
|
||||
if config.DIAG_LOG.exists():
|
||||
self._source.addItem("Last diagnostic", str(config.DIAG_LOG))
|
||||
if config.DIAG_CRASH.exists():
|
||||
self._source.addItem("Crash (unanalyzed)", str(config.DIAG_CRASH))
|
||||
# keep the previous selection if it's still present
|
||||
idx = self._source.findData(current) if current else -1
|
||||
self._source.setCurrentIndex(idx if idx >= 0 else 0)
|
||||
self._source.blockSignals(False)
|
||||
self._analyze_btn.setVisible(diagnostic.pending_crash() is not None)
|
||||
self._load_report()
|
||||
|
||||
def _load_report(self) -> None:
|
||||
path = self._source.currentData() or str(config.LOG_FILE)
|
||||
summary = summarize(path, last_n=10)
|
||||
self._report.setPlainText(render_summary(summary, log_path=path))
|
||||
|
||||
def _analyze_crash(self) -> None:
|
||||
self._analyze_btn.setEnabled(False)
|
||||
self._report.setPlainText("Analyzing the crash (final readings + system logs)…")
|
||||
threading.Thread(target=self._work_analyze, daemon=True).start()
|
||||
|
||||
def _work_analyze(self) -> None:
|
||||
from ..core import diagnostic
|
||||
|
||||
try:
|
||||
result = diagnostic.analyze_crash()
|
||||
except Exception:
|
||||
result = None
|
||||
self._analyzed.emit(result)
|
||||
|
||||
def _show_analysis(self, result) -> None:
|
||||
self._analyze_btn.setEnabled(True)
|
||||
if result is not None:
|
||||
DiagnosticDialog(result, self).exec()
|
||||
self._refresh_sources()
|
||||
|
||||
# --- refresh ---------------------------------------------------------------
|
||||
def _refresh_status(self) -> None:
|
||||
pid = reccontrol.running_pid()
|
||||
@@ -162,8 +209,10 @@ class RecorderPage(QWidget):
|
||||
self._interval.setEnabled(not running)
|
||||
|
||||
if status:
|
||||
game = status.get("game")
|
||||
game_line = f"Game: {game} " if game else ""
|
||||
self._info.setText(
|
||||
f"Samples: {status.get('samples', 0)} "
|
||||
f"{game_line}Samples: {status.get('samples', 0)} "
|
||||
f"Started: {_fmt_time(status.get('started'))} "
|
||||
f"Updated: {_fmt_time(status.get('updated'), '%H:%M:%S')}\n"
|
||||
f"Log: {status.get('log', config.LOG_FILE)}"
|
||||
@@ -179,7 +228,3 @@ class RecorderPage(QWidget):
|
||||
self._info.setText("No recording yet. Press “Start recording”.")
|
||||
self._latest.setText("")
|
||||
self._warn.setVisible(False)
|
||||
|
||||
def _load_report(self) -> None:
|
||||
summary = summarize(config.LOG_FILE, last_n=10)
|
||||
self._report.setPlainText(render_summary(summary, log_path=config.LOG_FILE))
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""Setup page (M9 in the GUI): show environment + optional components, install missing."""
|
||||
"""Settings page: components/deps, alerts (M8), account access (token), and uninstall."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
@@ -8,12 +8,18 @@ from PySide6.QtCore import Qt, QUrl, Signal
|
||||
from PySide6.QtGui import QDesktopServices
|
||||
from PySide6.QtWidgets import (
|
||||
QApplication,
|
||||
QButtonGroup,
|
||||
QCheckBox,
|
||||
QComboBox,
|
||||
QDoubleSpinBox,
|
||||
QFrame,
|
||||
QGridLayout,
|
||||
QHBoxLayout,
|
||||
QLabel,
|
||||
QLineEdit,
|
||||
QMessageBox,
|
||||
QPushButton,
|
||||
QRadioButton,
|
||||
QSizePolicy,
|
||||
QTextEdit,
|
||||
QVBoxLayout,
|
||||
@@ -21,7 +27,7 @@ from PySide6.QtWidgets import (
|
||||
)
|
||||
|
||||
from .. import config
|
||||
from ..core import installer, sysenv, uninstall, updates
|
||||
from ..core import ai, alerts, installer, service, sysenv, uninstall, updates
|
||||
from .theme import GOOD, MUTED, WARN
|
||||
|
||||
|
||||
@@ -49,18 +55,23 @@ _BACKEND_DESC = {
|
||||
class SetupPage(QWidget):
|
||||
_installed = Signal(int, str)
|
||||
_upd_state = Signal(object)
|
||||
_mode_applied = Signal(object) # (mode, ok, message) from a trigger-mode change
|
||||
_ai_tested = Signal(object) # (ok, message) from an AI connectivity test
|
||||
changed = Signal() # alert settings saved — main window re-applies them live
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self.setObjectName("Page")
|
||||
self._installed.connect(self._on_installed)
|
||||
self._upd_state.connect(self._on_upd_state)
|
||||
self._mode_applied.connect(self._on_mode_applied)
|
||||
self._ai_tested.connect(self._on_ai_tested)
|
||||
|
||||
root = QVBoxLayout(self)
|
||||
root.setContentsMargins(20, 18, 20, 18)
|
||||
root.setSpacing(16)
|
||||
|
||||
title = QLabel("Setup")
|
||||
title = QLabel("Settings")
|
||||
title.setObjectName("PageTitle")
|
||||
root.addWidget(title)
|
||||
|
||||
@@ -70,7 +81,7 @@ class SetupPage(QWidget):
|
||||
env_layout.addWidget(self._env)
|
||||
root.addWidget(env_card)
|
||||
|
||||
comp_card, comp_layout = _panel("Optional components")
|
||||
comp_card, comp_layout = _panel("Components & dependencies")
|
||||
self._components = QVBoxLayout()
|
||||
self._components.setSpacing(6)
|
||||
comp_layout.addLayout(self._components)
|
||||
@@ -80,14 +91,155 @@ class SetupPage(QWidget):
|
||||
self._install_btn.clicked.connect(self._install)
|
||||
self._refresh_btn = QPushButton("Re-check")
|
||||
self._refresh_btn.clicked.connect(self._refresh)
|
||||
wizard_btn = QPushButton("Run setup wizard")
|
||||
wizard_btn.clicked.connect(self._run_wizard)
|
||||
controls.addWidget(self._install_btn)
|
||||
controls.addWidget(self._refresh_btn)
|
||||
controls.addWidget(wizard_btn)
|
||||
controls.addStretch(1)
|
||||
comp_layout.addLayout(controls)
|
||||
root.addWidget(comp_card)
|
||||
|
||||
# Update access (M13): token gating updates to Gitea account holders.
|
||||
upd_card, upd_layout = _panel("Update access")
|
||||
# Alerts (M8) — folded in from the old Notifications page.
|
||||
alerts_card, alerts_layout = _panel("Notifications")
|
||||
self._alerts_enabled = QCheckBox("Enable desktop notifications")
|
||||
alerts_layout.addWidget(self._alerts_enabled)
|
||||
grid = QGridLayout()
|
||||
grid.setHorizontalSpacing(12)
|
||||
grid.setColumnStretch(2, 1)
|
||||
self._gpu_alert = self._spin()
|
||||
self._cpu_alert = self._spin()
|
||||
grid.addWidget(QLabel("GPU temperature alert"), 0, 0)
|
||||
grid.addWidget(self._gpu_alert, 0, 1)
|
||||
grid.addWidget(QLabel("CPU temperature alert"), 1, 0)
|
||||
grid.addWidget(self._cpu_alert, 1, 1)
|
||||
alerts_layout.addLayout(grid)
|
||||
alerts_note = QLabel("GPU-lost, critical kernel events (Xid, out-of-memory, disk I/O, PCIe), "
|
||||
"and new-version alerts are included whenever notifications are enabled.")
|
||||
alerts_note.setObjectName("Muted")
|
||||
alerts_note.setWordWrap(True)
|
||||
alerts_layout.addWidget(alerts_note)
|
||||
alerts_buttons = QHBoxLayout()
|
||||
save_alerts = QPushButton("Save")
|
||||
save_alerts.setObjectName("PrimaryButton")
|
||||
save_alerts.clicked.connect(self._save_alerts)
|
||||
test_alerts = QPushButton("Send test")
|
||||
test_alerts.clicked.connect(self._test_alerts)
|
||||
alerts_buttons.addWidget(save_alerts)
|
||||
alerts_buttons.addWidget(test_alerts)
|
||||
alerts_buttons.addStretch(1)
|
||||
self._alerts_status = QLabel("")
|
||||
self._alerts_status.setObjectName("Muted")
|
||||
alerts_buttons.addWidget(self._alerts_status)
|
||||
alerts_layout.addLayout(alerts_buttons)
|
||||
root.addWidget(alerts_card)
|
||||
|
||||
# Recording trigger (M9 / D6): when the crash logger runs.
|
||||
trig_card, trig_layout = _panel("Recording trigger")
|
||||
trig_desc = QLabel(
|
||||
"When the crash logger runs (uses a systemd --user service):\n"
|
||||
"• Manual — you start/stop it yourself.\n"
|
||||
"• Always-on — a background service records continuously.\n"
|
||||
"• Game-launch — auto-records while a Steam game is running."
|
||||
)
|
||||
trig_desc.setObjectName("Muted")
|
||||
trig_desc.setWordWrap(True)
|
||||
trig_layout.addWidget(trig_desc)
|
||||
trig_row = QHBoxLayout()
|
||||
self._trigger = QComboBox()
|
||||
self._trigger.addItems(list(service.MODES))
|
||||
apply_trigger = QPushButton("Apply")
|
||||
apply_trigger.setObjectName("PrimaryButton")
|
||||
apply_trigger.clicked.connect(self._apply_trigger)
|
||||
trig_row.addWidget(self._trigger, 1)
|
||||
trig_row.addWidget(apply_trigger)
|
||||
trig_layout.addLayout(trig_row)
|
||||
self._trigger_status = QLabel("")
|
||||
self._trigger_status.setObjectName("Muted")
|
||||
self._trigger_status.setWordWrap(True)
|
||||
trig_layout.addWidget(self._trigger_status)
|
||||
if not service.available():
|
||||
apply_trigger.setEnabled(False)
|
||||
self._trigger_status.setText("systemd --user isn't available on this system.")
|
||||
root.addWidget(trig_card)
|
||||
|
||||
# AI assistant (M14, D24): explain diagnostics. Strictly opt-in — the model is only
|
||||
# contacted when the user presses "Explain with AI"; this panel just configures it.
|
||||
ai_card, ai_layout = _panel("AI assistant")
|
||||
ai_desc = QLabel(
|
||||
"Optionally let an AI explain your diagnostics in plain language. It runs <b>only</b> "
|
||||
"when you press “Explain with AI” — never automatically. Choose a provider:\n"
|
||||
"• Ollama — a local model on your machine (private, no key; needs Ollama running).\n"
|
||||
"• Claude — Anthropic's API (higher quality; sends findings to Anthropic; needs a key)."
|
||||
)
|
||||
ai_desc.setObjectName("Muted")
|
||||
ai_desc.setWordWrap(True)
|
||||
ai_layout.addWidget(ai_desc)
|
||||
|
||||
prov_row = QHBoxLayout()
|
||||
self._ai_group = QButtonGroup(self)
|
||||
self._ai_ollama = QRadioButton("Ollama (local)")
|
||||
self._ai_claude = QRadioButton("Claude (Anthropic)")
|
||||
self._ai_group.addButton(self._ai_ollama)
|
||||
self._ai_group.addButton(self._ai_claude)
|
||||
self._ai_ollama.toggled.connect(self._on_ai_provider_changed)
|
||||
prov_row.addWidget(self._ai_ollama)
|
||||
prov_row.addWidget(self._ai_claude)
|
||||
prov_row.addStretch(1)
|
||||
ai_layout.addLayout(prov_row)
|
||||
|
||||
self._ai_model = QLineEdit()
|
||||
self._ai_model.setPlaceholderText(
|
||||
f"Model (e.g. {ai.OLLAMA_SUGGESTED_MODEL} for Ollama; blank = Claude default)")
|
||||
ai_layout.addWidget(self._ai_model)
|
||||
self._ai_endpoint = QLineEdit()
|
||||
self._ai_endpoint.setPlaceholderText("Ollama server URL (default http://localhost:11434)")
|
||||
ai_layout.addWidget(self._ai_endpoint)
|
||||
self._ai_key = QLineEdit()
|
||||
self._ai_key.setEchoMode(QLineEdit.EchoMode.Password)
|
||||
self._ai_key.setPlaceholderText("Claude API key (stored in your keyring)")
|
||||
ai_layout.addWidget(self._ai_key)
|
||||
|
||||
ai_btn_row = QHBoxLayout()
|
||||
ai_save = QPushButton("Save")
|
||||
ai_save.setObjectName("PrimaryButton")
|
||||
ai_save.clicked.connect(self._save_ai)
|
||||
self._ai_test_btn = QPushButton("Test")
|
||||
self._ai_test_btn.clicked.connect(self._test_ai)
|
||||
ai_btn_row.addWidget(ai_save)
|
||||
ai_btn_row.addWidget(self._ai_test_btn)
|
||||
ai_btn_row.addStretch(1)
|
||||
ai_layout.addLayout(ai_btn_row)
|
||||
self._ai_status = QLabel("")
|
||||
self._ai_status.setObjectName("Muted")
|
||||
self._ai_status.setWordWrap(True)
|
||||
ai_layout.addWidget(self._ai_status)
|
||||
root.addWidget(ai_card)
|
||||
|
||||
# Logging (M15): opt-in app logging + per-diagnostic storage (enables the Report bundle).
|
||||
log_card, log_layout = _panel("Logging")
|
||||
log_desc = QLabel(
|
||||
"Save application logs and store each diagnostic in its own folder so you can review "
|
||||
"or <b>Report</b> it. Off by default; everything stays on your machine.\n"
|
||||
f"• Diagnostics: {config.DIAGNOSTICS_DIR}\n"
|
||||
f"• Reports: {config.REPORTS_DIR}"
|
||||
)
|
||||
log_desc.setObjectName("Muted")
|
||||
log_desc.setWordWrap(True)
|
||||
log_layout.addWidget(log_desc)
|
||||
self._logging = QCheckBox("Enable logging (application + diagnostics)")
|
||||
self._logging.setChecked(config.load_config().get("logging_enabled", False))
|
||||
self._logging.toggled.connect(self._toggle_logging)
|
||||
log_layout.addWidget(self._logging)
|
||||
root.addWidget(log_card)
|
||||
|
||||
# Account access (M13/M12): one Gitea token gates updates and session sharing.
|
||||
upd_card, upd_layout = _panel("Account access")
|
||||
hint = QLabel("A Gitea access token unlocks updates and session sharing. "
|
||||
"Create it with scopes <b>read:user</b> and <b>read:repository</b>.")
|
||||
hint.setObjectName("Muted")
|
||||
hint.setWordWrap(True)
|
||||
upd_layout.addWidget(hint)
|
||||
self._upd_status = QLabel("")
|
||||
self._upd_status.setObjectName("Muted")
|
||||
self._upd_status.setWordWrap(True)
|
||||
@@ -95,7 +247,7 @@ class SetupPage(QWidget):
|
||||
token_row = QHBoxLayout()
|
||||
self._token_input = QLineEdit()
|
||||
self._token_input.setEchoMode(QLineEdit.EchoMode.Password)
|
||||
self._token_input.setPlaceholderText("Paste a Gitea token (scope: read:repository)")
|
||||
self._token_input.setPlaceholderText("Paste a Gitea token (read:user + read:repository)")
|
||||
save_btn = QPushButton("Save token")
|
||||
save_btn.setObjectName("PrimaryButton")
|
||||
save_btn.clicked.connect(self._save_token)
|
||||
@@ -110,7 +262,7 @@ class SetupPage(QWidget):
|
||||
self._output = QTextEdit()
|
||||
self._output.setObjectName("Report")
|
||||
self._output.setReadOnly(True)
|
||||
self._output.setMinimumHeight(180)
|
||||
self._output.setMinimumHeight(160)
|
||||
self._output.setVisible(False)
|
||||
root.addWidget(self._output)
|
||||
root.addStretch(1)
|
||||
@@ -124,8 +276,128 @@ class SetupPage(QWidget):
|
||||
root.addLayout(danger)
|
||||
|
||||
self._refresh()
|
||||
self._load_alerts()
|
||||
self._trigger.setCurrentText(config.load_config().get("trigger_mode", "manual"))
|
||||
self._load_ai()
|
||||
self._refresh_update_status()
|
||||
|
||||
# --- AI assistant (M14) ---------------------------------------------------
|
||||
def _load_ai(self) -> None:
|
||||
cfg = config.load_config()
|
||||
prov = cfg.get("ai_provider", "")
|
||||
self._ai_claude.setChecked(prov == "claude")
|
||||
self._ai_ollama.setChecked(prov == "ollama")
|
||||
self._ai_model.setText(cfg.get("ai_model", ""))
|
||||
self._ai_endpoint.setText(cfg.get("ai_endpoint", "http://localhost:11434"))
|
||||
if config.load_ai_key():
|
||||
self._ai_key.setPlaceholderText("Claude API key saved — type to replace")
|
||||
self._on_ai_provider_changed()
|
||||
|
||||
def _ai_provider(self) -> str:
|
||||
if self._ai_claude.isChecked():
|
||||
return "claude"
|
||||
if self._ai_ollama.isChecked():
|
||||
return "ollama"
|
||||
return ""
|
||||
|
||||
def _on_ai_provider_changed(self) -> None:
|
||||
prov = self._ai_provider()
|
||||
self._ai_endpoint.setVisible(prov == "ollama")
|
||||
self._ai_key.setVisible(prov == "claude")
|
||||
self._ai_test_btn.setEnabled(prov != "")
|
||||
if prov == "ollama" and not self._ai_model.text().strip():
|
||||
self._ai_model.setText(ai.OLLAMA_SUGGESTED_MODEL) # suggested default; user can change
|
||||
|
||||
def _save_ai(self) -> None:
|
||||
prov = self._ai_provider()
|
||||
config.update_config(
|
||||
ai_provider=prov,
|
||||
ai_model=self._ai_model.text().strip(),
|
||||
ai_endpoint=self._ai_endpoint.text().strip() or "http://localhost:11434",
|
||||
)
|
||||
if prov == "claude" and self._ai_key.text().strip():
|
||||
config.save_ai_key(self._ai_key.text().strip())
|
||||
self._ai_key.clear()
|
||||
self._ai_key.setPlaceholderText("Claude API key saved — type to replace")
|
||||
self._ai_status.setText("Saved." if prov else "Saved — no provider selected (AI stays off).")
|
||||
|
||||
def _test_ai(self) -> None:
|
||||
self._save_ai()
|
||||
self._ai_status.setText("Testing… contacting the provider.")
|
||||
self._ai_test_btn.setEnabled(False)
|
||||
threading.Thread(target=self._work_test_ai, daemon=True).start()
|
||||
|
||||
def _work_test_ai(self) -> None:
|
||||
from ..core import ai
|
||||
|
||||
ok, msg = ai.explain("Connectivity test — reply exactly: RigDoctor AI is working.")
|
||||
self._ai_tested.emit((ok, msg))
|
||||
|
||||
def _on_ai_tested(self, result) -> None:
|
||||
ok, msg = result
|
||||
self._ai_test_btn.setEnabled(True)
|
||||
self._ai_status.setText(("✓ " if ok else "✗ ") + (msg[:200] if msg else ""))
|
||||
|
||||
def _toggle_logging(self, on: bool) -> None:
|
||||
from ..core import applog
|
||||
|
||||
config.update_config(logging_enabled=on)
|
||||
applog.setup(force=True) # attach/detach the file handler immediately
|
||||
|
||||
def _run_wizard(self) -> None:
|
||||
from .setup_wizard import SetupWizard
|
||||
|
||||
SetupWizard(self).exec()
|
||||
self._refresh()
|
||||
self._trigger.setCurrentText(config.load_config().get("trigger_mode", "manual"))
|
||||
|
||||
# --- recording trigger (M9) -----------------------------------------------
|
||||
def _apply_trigger(self) -> None:
|
||||
mode = self._trigger.currentText()
|
||||
self._trigger_status.setText(f"Applying “{mode}”… (may take a moment)")
|
||||
threading.Thread(target=self._work_trigger, args=(mode,), daemon=True).start()
|
||||
|
||||
def _work_trigger(self, mode: str) -> None:
|
||||
ok, msg = service.apply_mode(mode)
|
||||
self._mode_applied.emit((mode, ok, msg))
|
||||
|
||||
def _on_mode_applied(self, result) -> None:
|
||||
mode, ok, msg = result
|
||||
if ok:
|
||||
self._trigger_status.setText(f"Recording trigger set to “{mode}”.")
|
||||
else:
|
||||
self._trigger_status.setText(f"“{mode}” saved. {msg}")
|
||||
|
||||
# --- alerts (M8) ----------------------------------------------------------
|
||||
@staticmethod
|
||||
def _spin() -> QDoubleSpinBox:
|
||||
spin = QDoubleSpinBox()
|
||||
spin.setRange(40, 110)
|
||||
spin.setDecimals(0)
|
||||
spin.setSingleStep(1)
|
||||
spin.setSuffix(" °C")
|
||||
return spin
|
||||
|
||||
def _load_alerts(self) -> None:
|
||||
cfg = config.load_config()
|
||||
self._alerts_enabled.setChecked(bool(cfg.get("alerts_enabled", True)))
|
||||
self._gpu_alert.setValue(float(cfg.get("gpu_temp_alert", 90.0)))
|
||||
self._cpu_alert.setValue(float(cfg.get("cpu_temp_alert", 95.0)))
|
||||
|
||||
def _save_alerts(self) -> None:
|
||||
config.update_config(
|
||||
alerts_enabled=self._alerts_enabled.isChecked(),
|
||||
gpu_temp_alert=self._gpu_alert.value(),
|
||||
cpu_temp_alert=self._cpu_alert.value(),
|
||||
)
|
||||
self.changed.emit()
|
||||
self._alerts_status.setText("Saved.")
|
||||
|
||||
def _test_alerts(self) -> None:
|
||||
ok = alerts.notify("RigDoctor", "Test notification — alerts are working.")
|
||||
self._alerts_status.setText(
|
||||
"Test sent." if ok else "notify-send not found — install libnotify-bin above.")
|
||||
|
||||
def _uninstall(self) -> None:
|
||||
box = QMessageBox(self)
|
||||
box.setIcon(QMessageBox.Icon.Warning)
|
||||
|
||||
@@ -0,0 +1,259 @@
|
||||
"""First-run GUI setup wizard (M9): the full graphical installer/setup.
|
||||
|
||||
Bootstrap (Python venv + PySide6) is done by install.sh/.run; this wizard handles the rest
|
||||
graphically — environment summary → pick dependency bundles → install the missing apt packages
|
||||
→ choose the recording trigger → readiness summary. Shown automatically on first launch (until
|
||||
`setup_done`), re-runnable from Settings, and launched by install.sh after a fresh install.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import threading
|
||||
|
||||
from PySide6.QtCore import Qt, Signal
|
||||
from PySide6.QtWidgets import (
|
||||
QButtonGroup,
|
||||
QCheckBox,
|
||||
QDialog,
|
||||
QHBoxLayout,
|
||||
QLabel,
|
||||
QPushButton,
|
||||
QRadioButton,
|
||||
QStackedWidget,
|
||||
QTextEdit,
|
||||
QVBoxLayout,
|
||||
QWidget,
|
||||
)
|
||||
|
||||
from .. import config
|
||||
from ..core import catalog, installer, service, sysenv
|
||||
|
||||
|
||||
class SetupWizard(QDialog):
|
||||
_installed = Signal(int, str)
|
||||
|
||||
def __init__(self, parent=None) -> None:
|
||||
super().__init__(parent)
|
||||
self.setWindowTitle("RigDoctor Setup")
|
||||
self.resize(620, 560)
|
||||
self.setObjectName("Page")
|
||||
self._installed.connect(self._on_installed)
|
||||
self._bundle_checks: dict[str, QCheckBox] = {}
|
||||
self._installing = False
|
||||
|
||||
root = QVBoxLayout(self)
|
||||
root.setContentsMargins(22, 20, 22, 16)
|
||||
root.setSpacing(14)
|
||||
|
||||
self._stack = QStackedWidget()
|
||||
self._stack.addWidget(self._page_welcome()) # 0
|
||||
self._stack.addWidget(self._page_bundles()) # 1
|
||||
self._stack.addWidget(self._page_install()) # 2
|
||||
self._stack.addWidget(self._page_trigger()) # 3
|
||||
self._stack.addWidget(self._page_finish()) # 4
|
||||
root.addWidget(self._stack, 1)
|
||||
|
||||
nav = QHBoxLayout()
|
||||
self._skip_btn = QPushButton("Skip")
|
||||
self._skip_btn.clicked.connect(self._skip)
|
||||
self._back_btn = QPushButton("Back")
|
||||
self._back_btn.clicked.connect(lambda: self._go(-1))
|
||||
self._next_btn = QPushButton("Next")
|
||||
self._next_btn.setObjectName("PrimaryButton")
|
||||
self._next_btn.clicked.connect(lambda: self._go(1))
|
||||
nav.addWidget(self._skip_btn)
|
||||
nav.addStretch(1)
|
||||
nav.addWidget(self._back_btn)
|
||||
nav.addWidget(self._next_btn)
|
||||
root.addLayout(nav)
|
||||
|
||||
self._index = 0
|
||||
self._update_nav()
|
||||
|
||||
# --- pages -----------------------------------------------------------------
|
||||
def _page(self, title: str, subtitle: str = "") -> tuple[QWidget, QVBoxLayout]:
|
||||
page = QWidget()
|
||||
v = QVBoxLayout(page)
|
||||
v.setContentsMargins(0, 0, 0, 0)
|
||||
v.setSpacing(10)
|
||||
head = QLabel(title)
|
||||
head.setObjectName("PageTitle")
|
||||
v.addWidget(head)
|
||||
if subtitle:
|
||||
sub = QLabel(subtitle)
|
||||
sub.setObjectName("Muted")
|
||||
sub.setWordWrap(True)
|
||||
v.addWidget(sub)
|
||||
return page, v
|
||||
|
||||
def _page_welcome(self) -> QWidget:
|
||||
page, v = self._page(
|
||||
"Welcome to RigDoctor",
|
||||
"Let's set up monitoring and diagnostics for your machine. This takes a minute and "
|
||||
"needs no root for the app itself — only installing optional tools may ask for your "
|
||||
"password.",
|
||||
)
|
||||
env = QLabel(
|
||||
f"Detected:\n"
|
||||
f" • Distro: {sysenv.distro_name()}\n"
|
||||
f" • Package manager: {sysenv.package_manager() or 'none (apt required for extras)'}\n"
|
||||
f" • GPU: {', '.join(sysenv.gpu_vendors()) or 'unknown'}"
|
||||
)
|
||||
env.setObjectName("Muted")
|
||||
v.addWidget(env)
|
||||
v.addStretch(1)
|
||||
return page
|
||||
|
||||
def _page_bundles(self) -> QWidget:
|
||||
page, v = self._page(
|
||||
"Choose what to set up",
|
||||
"Pick the optional tool bundles to install. Core monitoring, crash capture, and the "
|
||||
"health report work without any of these — they just add capability.",
|
||||
)
|
||||
present = {c.id: ok for c, ok in installer.component_status()}
|
||||
for bundle, comps in catalog.by_bundle().items():
|
||||
missing = [c for c in comps if not present.get(c.id)]
|
||||
names = ", ".join(c.name for c in comps)
|
||||
tag = " — all installed ✓" if not missing else f" — {len(missing)} to install"
|
||||
cb = QCheckBox(f"{bundle}: {names}{tag}")
|
||||
cb.setChecked(bool(missing)) # default-check bundles with something to add
|
||||
cb.setEnabled(sysenv.package_manager() == "apt") # selectable even if already installed
|
||||
self._bundle_checks[bundle] = cb
|
||||
v.addWidget(cb)
|
||||
if sysenv.package_manager() != "apt":
|
||||
note = QLabel("Only apt is supported for installing tools, so these are read-only here.")
|
||||
note.setObjectName("Muted")
|
||||
note.setWordWrap(True)
|
||||
v.addWidget(note)
|
||||
v.addStretch(1)
|
||||
return page
|
||||
|
||||
def _page_install(self) -> QWidget:
|
||||
page, v = self._page("Install tools", "Installing the selected packages…")
|
||||
self._install_status = QLabel("")
|
||||
self._install_status.setObjectName("Muted")
|
||||
self._install_status.setWordWrap(True)
|
||||
v.addWidget(self._install_status)
|
||||
self._install_output = QTextEdit()
|
||||
self._install_output.setObjectName("Report")
|
||||
self._install_output.setReadOnly(True)
|
||||
v.addWidget(self._install_output, 1)
|
||||
return page
|
||||
|
||||
def _page_trigger(self) -> QWidget:
|
||||
page, v = self._page(
|
||||
"Recording trigger",
|
||||
"When the crash logger runs. You can change this any time in Settings.",
|
||||
)
|
||||
self._trigger_group = QButtonGroup(self)
|
||||
labels = {
|
||||
"manual": "Manual — start/stop recording yourself.",
|
||||
"always-on": "Always-on — a background service records continuously.",
|
||||
"game-launch": "Game-launch — auto-record while a Steam game runs.",
|
||||
}
|
||||
for i, (mode, text) in enumerate(labels.items()):
|
||||
rb = QRadioButton(text)
|
||||
rb.setProperty("mode", mode)
|
||||
rb.setChecked(mode == config.load_config().get("trigger_mode", "manual"))
|
||||
self._trigger_group.addButton(rb, i)
|
||||
v.addWidget(rb)
|
||||
if not service.available():
|
||||
note = QLabel("systemd --user isn't available, so always-on / game-launch can't be enabled here.")
|
||||
note.setObjectName("Muted")
|
||||
note.setWordWrap(True)
|
||||
v.addWidget(note)
|
||||
v.addStretch(1)
|
||||
return page
|
||||
|
||||
def _page_finish(self) -> QWidget:
|
||||
page, v = self._page("You're all set", "")
|
||||
self._finish_summary = QLabel("")
|
||||
self._finish_summary.setObjectName("Muted")
|
||||
self._finish_summary.setWordWrap(True)
|
||||
v.addWidget(self._finish_summary)
|
||||
v.addStretch(1)
|
||||
return page
|
||||
|
||||
# --- navigation ------------------------------------------------------------
|
||||
def _go(self, delta: int) -> None:
|
||||
if self._installing:
|
||||
return
|
||||
new = self._index + delta
|
||||
if new < 0:
|
||||
return
|
||||
if new >= self._stack.count(): # past the last page → finish
|
||||
self._finish()
|
||||
return
|
||||
self._index = new
|
||||
self._stack.setCurrentIndex(new)
|
||||
self._update_nav()
|
||||
if new == 2: # entering the install page
|
||||
self._run_install()
|
||||
elif new == 4: # entering the finish page
|
||||
self._fill_summary()
|
||||
|
||||
def _update_nav(self) -> None:
|
||||
self._back_btn.setEnabled(self._index > 0 and not self._installing)
|
||||
last = self._index == self._stack.count() - 1
|
||||
self._next_btn.setText("Finish" if last else "Next")
|
||||
self._skip_btn.setVisible(not last)
|
||||
|
||||
def _selected_components(self):
|
||||
present = {c.id: ok for c, ok in installer.component_status()}
|
||||
chosen = []
|
||||
for bundle, comps in catalog.by_bundle().items():
|
||||
if self._bundle_checks.get(bundle) and self._bundle_checks[bundle].isChecked():
|
||||
chosen += [c for c in comps if not present.get(c.id)]
|
||||
return chosen
|
||||
|
||||
def _run_install(self) -> None:
|
||||
packages = installer.missing_packages(self._selected_components())
|
||||
if not packages:
|
||||
self._install_status.setText("Nothing to install — your selected tools are already present.")
|
||||
self._install_output.setVisible(False)
|
||||
return
|
||||
self._installing = True
|
||||
self._update_nav()
|
||||
self._next_btn.setEnabled(False)
|
||||
self._install_status.setText("Installing… you may be asked for your password.")
|
||||
self._install_output.setVisible(True)
|
||||
self._install_output.setPlainText(f"Installing: {' '.join(packages)}\n")
|
||||
threading.Thread(target=lambda: self._installed.emit(*installer.install_packages(packages)), daemon=True).start()
|
||||
|
||||
def _on_installed(self, rc: int, out: str) -> None:
|
||||
self._installing = False
|
||||
self._install_output.setPlainText(out[-4000:])
|
||||
self._install_status.setText("Done." if rc == 0 else "Some packages may not have installed — see the log.")
|
||||
self._next_btn.setEnabled(True)
|
||||
self._update_nav()
|
||||
|
||||
def _fill_summary(self) -> None:
|
||||
from ..core.sources import available_sources
|
||||
|
||||
status = installer.component_status()
|
||||
present = sum(1 for _c, ok in status if ok)
|
||||
sources = len(available_sources())
|
||||
mode = self._chosen_mode()
|
||||
self._finish_summary.setText(
|
||||
f"• Optional tools present: {present}/{len(status)}\n"
|
||||
f"• Sensor sources detected: {sources}\n"
|
||||
f"• Recording trigger: {mode}\n\n"
|
||||
"You can re-run this wizard or change anything from Settings."
|
||||
)
|
||||
|
||||
def _chosen_mode(self) -> str:
|
||||
btn = self._trigger_group.checkedButton()
|
||||
return btn.property("mode") if btn else "manual"
|
||||
|
||||
def _finish(self) -> None:
|
||||
mode = self._chosen_mode()
|
||||
if service.available():
|
||||
service.apply_mode(mode)
|
||||
else:
|
||||
config.update_config(trigger_mode=mode)
|
||||
config.update_config(setup_done=True)
|
||||
self.accept()
|
||||
|
||||
def _skip(self) -> None:
|
||||
config.update_config(setup_done=True)
|
||||
self.reject()
|
||||
@@ -0,0 +1,348 @@
|
||||
"""Share page (M12): a shared **terminal** session over the relay.
|
||||
|
||||
The host shares a real PTY running their shell; the guest watches it live and — only if the
|
||||
host ticks "Allow the guest to type" — can run commands (as the host's user). The host reads
|
||||
along and can type too, e.g. a sudo password, which stays local and is never sent to the guest.
|
||||
This is the only share mode (the old read-only stats view was removed). Either terminal can be
|
||||
popped full-screen.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import json
|
||||
|
||||
from PySide6.QtCore import Qt, QSocketNotifier, QUrl
|
||||
from PySide6.QtGui import QKeySequence, QShortcut
|
||||
from PySide6.QtWebSockets import QWebSocket
|
||||
from PySide6.QtWidgets import (
|
||||
QCheckBox,
|
||||
QFrame,
|
||||
QHBoxLayout,
|
||||
QLabel,
|
||||
QLineEdit,
|
||||
QPushButton,
|
||||
QVBoxLayout,
|
||||
QWidget,
|
||||
)
|
||||
|
||||
from ..config import load_config, load_token
|
||||
from ..core.pty_session import PtySession
|
||||
from .terminal_widget import TerminalView
|
||||
|
||||
|
||||
def _relay_url() -> str:
|
||||
return load_config().get("relay_url", "wss://rigdoctor.jesseyvanofferen.com").rstrip("/")
|
||||
|
||||
|
||||
def _b64(data: bytes) -> str:
|
||||
return base64.b64encode(data).decode("ascii")
|
||||
|
||||
|
||||
def _card(title: str) -> tuple[QFrame, QVBoxLayout]:
|
||||
card = QFrame()
|
||||
card.setObjectName("Card")
|
||||
v = QVBoxLayout(card)
|
||||
v.setContentsMargins(16, 14, 16, 14)
|
||||
v.setSpacing(10)
|
||||
head = QLabel(title)
|
||||
head.setStyleSheet("font-weight: 700; background: transparent;")
|
||||
v.addWidget(head)
|
||||
return card, v
|
||||
|
||||
|
||||
class SharePage(QWidget):
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self.setObjectName("Page")
|
||||
self._host_ws: QWebSocket | None = None
|
||||
self._guest_ws: QWebSocket | None = None
|
||||
self._pty: PtySession | None = None
|
||||
self._pty_notifier: QSocketNotifier | None = None
|
||||
self._guest_can_type = False
|
||||
self._fs: QWidget | None = None
|
||||
self._fs_state = None
|
||||
|
||||
root = QVBoxLayout(self)
|
||||
root.setContentsMargins(20, 18, 20, 18)
|
||||
root.setSpacing(14)
|
||||
title = QLabel("Share")
|
||||
title.setObjectName("PageTitle")
|
||||
root.addWidget(title)
|
||||
root.addWidget(self._build_host(), 1)
|
||||
root.addWidget(self._build_guest(), 1)
|
||||
|
||||
# ------------------------------------------------------------------ host
|
||||
def _build_host(self) -> QFrame:
|
||||
card, v = _card("Host a terminal session")
|
||||
self._host_status = QLabel("Share a live terminal with someone who has an account.")
|
||||
self._host_status.setObjectName("Muted")
|
||||
self._host_status.setWordWrap(True)
|
||||
v.addWidget(self._host_status)
|
||||
|
||||
row = QHBoxLayout()
|
||||
self._start_btn = QPushButton("Start session")
|
||||
self._start_btn.setObjectName("PrimaryButton")
|
||||
self._start_btn.clicked.connect(self._start_host)
|
||||
self._stop_btn = QPushButton("Stop")
|
||||
self._stop_btn.setEnabled(False)
|
||||
self._stop_btn.clicked.connect(self._stop_host)
|
||||
self._code_label = QLabel("")
|
||||
self._code_label.setStyleSheet("font-weight:700; font-size:18px; color:#38bdf8; background:transparent;")
|
||||
self._code_label.setTextInteractionFlags(Qt.TextInteractionFlag.TextSelectableByMouse)
|
||||
self._host_fs_btn = QPushButton("Full screen")
|
||||
self._host_fs_btn.setEnabled(False)
|
||||
self._host_fs_btn.clicked.connect(lambda: self._enter_fullscreen(self._host_term))
|
||||
row.addWidget(self._start_btn)
|
||||
row.addWidget(self._stop_btn)
|
||||
row.addSpacing(12)
|
||||
row.addWidget(self._code_label)
|
||||
row.addStretch(1)
|
||||
row.addWidget(self._host_fs_btn)
|
||||
v.addLayout(row)
|
||||
|
||||
self._allow_input = QCheckBox(
|
||||
"Allow the guest to type — they run commands as your user (off = they only watch)")
|
||||
self._allow_input.setStyleSheet("color:#fb923c; background:transparent;")
|
||||
self._allow_input.toggled.connect(self._send_terminal_state)
|
||||
v.addWidget(self._allow_input)
|
||||
|
||||
self._host_term = TerminalView()
|
||||
self._host_term.keys.connect(lambda b: self._pty.write(b) if self._pty else None)
|
||||
self._host_term.resized.connect(lambda r, c: self._pty.set_size(r, c) if self._pty else None)
|
||||
self._host_term.setVisible(False)
|
||||
v.addWidget(self._host_term, 1)
|
||||
return card
|
||||
|
||||
def _start_host(self) -> None:
|
||||
if not load_token():
|
||||
self._host_status.setText("Set a Gitea access token in Settings → Account access first.")
|
||||
return
|
||||
self._host_status.setText("Connecting to the relay…")
|
||||
self._start_btn.setEnabled(False)
|
||||
self._host_ws = QWebSocket()
|
||||
self._host_ws.connected.connect(lambda: self._host_ws.sendTextMessage(json.dumps({"token": load_token()})))
|
||||
self._host_ws.textMessageReceived.connect(self._host_msg)
|
||||
self._host_ws.disconnected.connect(self._host_closed)
|
||||
self._host_ws.errorOccurred.connect(lambda *_: self._host_status.setText(f"Relay error: {self._host_ws.errorString()}"))
|
||||
self._host_ws.open(QUrl(_relay_url() + "/ws/host"))
|
||||
|
||||
def _host_msg(self, text: str) -> None:
|
||||
try:
|
||||
data = json.loads(text)
|
||||
except ValueError:
|
||||
return
|
||||
if data.get("error"):
|
||||
self._host_status.setText(f"Rejected: {data['error']}")
|
||||
return
|
||||
if "code" in data: # relay handshake → start the terminal immediately
|
||||
self._code_label.setText(data["code"])
|
||||
self._host_status.setText(
|
||||
f"Sharing as {data.get('user', '?')} — give this code to whoever should connect.")
|
||||
self._stop_btn.setEnabled(True)
|
||||
self._start_pty()
|
||||
self._send_terminal_state()
|
||||
return
|
||||
kind = data.get("type")
|
||||
if kind == "req_full": # a guest joined — tell them their typing permission
|
||||
self._send_terminal_state()
|
||||
elif kind == "pty_in" and self._pty and self._allow_input.isChecked():
|
||||
self._pty.write(base64.b64decode(data["data"]))
|
||||
elif kind == "pty_resize" and self._pty and self._allow_input.isChecked():
|
||||
self._pty.set_size(int(data["rows"]), int(data["cols"]))
|
||||
|
||||
def _send_terminal_state(self) -> None:
|
||||
if self._host_ws and self._code_label.text():
|
||||
self._host_ws.sendTextMessage(json.dumps({"type": "terminal", "enabled": self._allow_input.isChecked()}))
|
||||
|
||||
def _start_pty(self) -> None:
|
||||
if self._pty:
|
||||
return
|
||||
rows, cols = self._host_term.grid()
|
||||
self._pty = PtySession(rows=rows, cols=cols)
|
||||
self._pty_notifier = QSocketNotifier(self._pty.master_fd, QSocketNotifier.Type.Read, self)
|
||||
self._pty_notifier.activated.connect(self._on_pty_output)
|
||||
self._host_term.reset()
|
||||
self._host_term.setVisible(True)
|
||||
self._host_fs_btn.setEnabled(True)
|
||||
self._host_term.setFocus()
|
||||
|
||||
def _on_pty_output(self) -> None:
|
||||
if not self._pty:
|
||||
return
|
||||
data = self._pty.read()
|
||||
if not data: # shell exited
|
||||
self._stop_host()
|
||||
return
|
||||
self._host_term.feed(data)
|
||||
if self._host_ws:
|
||||
self._host_ws.sendTextMessage(json.dumps({"type": "pty", "data": _b64(data)}))
|
||||
|
||||
def _stop_pty(self) -> None:
|
||||
if self._pty_notifier:
|
||||
self._pty_notifier.setEnabled(False)
|
||||
self._pty_notifier = None
|
||||
if self._pty:
|
||||
self._pty.close()
|
||||
self._pty = None
|
||||
self._host_term.setVisible(False)
|
||||
self._host_fs_btn.setEnabled(False)
|
||||
|
||||
def _stop_host(self) -> None:
|
||||
self._stop_pty()
|
||||
if self._host_ws:
|
||||
self._host_ws.close()
|
||||
self._host_ws = None
|
||||
self._code_label.setText("")
|
||||
self._stop_btn.setEnabled(False)
|
||||
self._start_btn.setEnabled(True)
|
||||
self._host_status.setText("Stopped sharing.")
|
||||
|
||||
def _host_closed(self) -> None:
|
||||
self._stop_pty()
|
||||
self._start_btn.setEnabled(True)
|
||||
self._stop_btn.setEnabled(False)
|
||||
if self._code_label.text():
|
||||
self._code_label.setText("")
|
||||
self._host_status.setText("Disconnected from the relay.")
|
||||
|
||||
# ----------------------------------------------------------------- guest
|
||||
def _build_guest(self) -> QFrame:
|
||||
card, v = _card("Join a terminal session")
|
||||
row = QHBoxLayout()
|
||||
self._code_input = QLineEdit()
|
||||
self._code_input.setPlaceholderText("Enter share code")
|
||||
self._code_input.setMaxLength(6)
|
||||
self._code_input.setFixedWidth(160)
|
||||
self._join_btn = QPushButton("Join")
|
||||
self._join_btn.setObjectName("PrimaryButton")
|
||||
self._join_btn.clicked.connect(self._join)
|
||||
self._leave_btn = QPushButton("Leave")
|
||||
self._leave_btn.setEnabled(False)
|
||||
self._leave_btn.clicked.connect(self._leave)
|
||||
self._guest_fs_btn = QPushButton("Full screen")
|
||||
self._guest_fs_btn.setEnabled(False)
|
||||
self._guest_fs_btn.clicked.connect(lambda: self._enter_fullscreen(self._guest_term))
|
||||
row.addWidget(self._code_input)
|
||||
row.addWidget(self._join_btn)
|
||||
row.addWidget(self._leave_btn)
|
||||
row.addStretch(1)
|
||||
row.addWidget(self._guest_fs_btn)
|
||||
v.addLayout(row)
|
||||
self._guest_status = QLabel("")
|
||||
self._guest_status.setObjectName("Muted")
|
||||
self._guest_status.setWordWrap(True)
|
||||
v.addWidget(self._guest_status)
|
||||
|
||||
self._guest_term = TerminalView()
|
||||
self._guest_term.keys.connect(self._guest_key)
|
||||
self._guest_term.resized.connect(self._guest_resize)
|
||||
self._guest_term.setVisible(False)
|
||||
v.addWidget(self._guest_term, 1)
|
||||
return card
|
||||
|
||||
def _join(self) -> None:
|
||||
code = self._code_input.text().strip().upper()
|
||||
if not load_token():
|
||||
self._guest_status.setText("Set a Gitea access token in Settings → Account access first.")
|
||||
return
|
||||
if not code:
|
||||
self._guest_status.setText("Enter a share code.")
|
||||
return
|
||||
self._guest_status.setText("Connecting…")
|
||||
self._join_btn.setEnabled(False)
|
||||
self._guest_ws = QWebSocket()
|
||||
self._guest_ws.connected.connect(lambda: self._guest_ws.sendTextMessage(json.dumps({"token": load_token()})))
|
||||
self._guest_ws.textMessageReceived.connect(self._guest_msg)
|
||||
self._guest_ws.disconnected.connect(self._guest_closed)
|
||||
self._guest_ws.errorOccurred.connect(lambda *_: self._guest_status.setText(f"Relay error: {self._guest_ws.errorString()}"))
|
||||
self._guest_ws.open(QUrl(_relay_url() + "/ws/guest/" + code))
|
||||
|
||||
def _guest_msg(self, text: str) -> None:
|
||||
try:
|
||||
data = json.loads(text)
|
||||
except ValueError:
|
||||
return
|
||||
if data.get("error"):
|
||||
self._guest_status.setText(data["error"])
|
||||
return
|
||||
if "joined" in data:
|
||||
self._guest_status.setText(f"Connected to {data.get('host', '?')}'s terminal — watching.")
|
||||
self._leave_btn.setEnabled(True)
|
||||
self._guest_fs_btn.setEnabled(True)
|
||||
self._guest_term.reset()
|
||||
self._guest_term.setVisible(True)
|
||||
self._guest_ws.sendTextMessage(json.dumps({"type": "req_full"}))
|
||||
return
|
||||
kind = data.get("type")
|
||||
if kind == "terminal":
|
||||
self._guest_can_type = bool(data.get("enabled"))
|
||||
self._guest_status.setText(
|
||||
"You can type — your keystrokes run on the host's machine."
|
||||
if self._guest_can_type else "Read-only — watching the host's terminal.")
|
||||
if self._guest_can_type:
|
||||
self._guest_term.setFocus()
|
||||
self._guest_resize(*self._guest_term.grid())
|
||||
elif kind == "pty":
|
||||
self._guest_term.feed(base64.b64decode(data["data"]))
|
||||
|
||||
def _guest_key(self, data: bytes) -> None:
|
||||
if self._guest_ws and self._guest_can_type:
|
||||
self._guest_ws.sendTextMessage(json.dumps({"type": "pty_in", "data": _b64(data)}))
|
||||
|
||||
def _guest_resize(self, rows: int, cols: int) -> None:
|
||||
if self._guest_ws and self._guest_can_type:
|
||||
self._guest_ws.sendTextMessage(json.dumps({"type": "pty_resize", "rows": rows, "cols": cols}))
|
||||
|
||||
def _leave(self) -> None:
|
||||
if self._guest_ws:
|
||||
self._guest_ws.close()
|
||||
self._guest_ws = None
|
||||
self._guest_term.setVisible(False)
|
||||
self._guest_fs_btn.setEnabled(False)
|
||||
self._guest_can_type = False
|
||||
self._leave_btn.setEnabled(False)
|
||||
self._join_btn.setEnabled(True)
|
||||
self._guest_status.setText("Left the session.")
|
||||
|
||||
def _guest_closed(self) -> None:
|
||||
self._join_btn.setEnabled(True)
|
||||
self._leave_btn.setEnabled(False)
|
||||
if self._guest_term.isVisible():
|
||||
self._guest_status.setText("Session ended (host disconnected).")
|
||||
|
||||
# --------------------------------------------------------------- full screen
|
||||
def _enter_fullscreen(self, term: TerminalView) -> None:
|
||||
if self._fs is not None:
|
||||
return
|
||||
parent_layout = term.parentWidget().layout()
|
||||
self._fs_state = (parent_layout, parent_layout.indexOf(term), term)
|
||||
self._fs = QWidget()
|
||||
self._fs.setStyleSheet("background:#0d0f13;")
|
||||
lay = QVBoxLayout(self._fs)
|
||||
lay.setContentsMargins(0, 0, 0, 0)
|
||||
lay.setSpacing(0)
|
||||
hint = QLabel("Esc to exit full screen")
|
||||
hint.setObjectName("Muted")
|
||||
hint.setStyleSheet("padding:4px 10px; background:#15181e;")
|
||||
lay.addWidget(hint)
|
||||
lay.addWidget(term, 1)
|
||||
QShortcut(QKeySequence(Qt.Key.Key_Escape), self._fs, activated=self._leave_fullscreen)
|
||||
self._fs.showFullScreen()
|
||||
term.setFocus()
|
||||
|
||||
def _leave_fullscreen(self) -> None:
|
||||
if self._fs is None:
|
||||
return
|
||||
parent_layout, index, term = self._fs_state
|
||||
parent_layout.insertWidget(index, term)
|
||||
self._fs.close()
|
||||
self._fs = None
|
||||
self._fs_state = None
|
||||
term.setFocus()
|
||||
|
||||
def shutdown(self) -> None:
|
||||
self._stop_pty()
|
||||
for ws in (self._host_ws, self._guest_ws):
|
||||
if ws:
|
||||
ws.close()
|
||||
@@ -0,0 +1,157 @@
|
||||
"""GPU stress + thermal-monitor dialog (GUI front-end for core/stress.py).
|
||||
|
||||
Runs the stress monitor in a background thread, streams a live one-line readout, and shows the
|
||||
rendered result (telemetry stats + verdict) when it finishes. A Stop button ends the run early
|
||||
via a cooperative flag; closing the dialog mid-run stops it too.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import threading
|
||||
|
||||
from PySide6.QtCore import Qt, Signal
|
||||
from PySide6.QtGui import QFont
|
||||
from PySide6.QtWidgets import (
|
||||
QDialog,
|
||||
QHBoxLayout,
|
||||
QLabel,
|
||||
QLineEdit,
|
||||
QPushButton,
|
||||
QSpinBox,
|
||||
QTextEdit,
|
||||
QVBoxLayout,
|
||||
)
|
||||
|
||||
|
||||
class StressDialog(QDialog):
|
||||
_tick = Signal(str) # live one-line readout (worker thread -> GUI)
|
||||
_done = Signal(object) # stress.StressResult when the run finishes
|
||||
|
||||
def __init__(self, parent=None) -> None:
|
||||
super().__init__(parent)
|
||||
self._stop = threading.Event()
|
||||
self._running = False
|
||||
self._tick.connect(self._on_tick)
|
||||
self._done.connect(self._on_done)
|
||||
self.setWindowTitle("GPU stress + thermal monitor")
|
||||
self.resize(640, 460)
|
||||
|
||||
root = QVBoxLayout(self)
|
||||
root.setContentsMargins(20, 18, 20, 16)
|
||||
root.setSpacing(12)
|
||||
|
||||
intro = QLabel(
|
||||
"Run a GPU load and closely watch temps. Reports peak/sustained temps, time spent "
|
||||
"hot, throttling, and any GPU fault (Xid / driver freeze) during the run.")
|
||||
intro.setWordWrap(True)
|
||||
root.addWidget(intro)
|
||||
|
||||
from ..core import stress
|
||||
loaders = stress.available_loaders()
|
||||
self._mode = QLabel(
|
||||
f"Load tool detected: {loaders[0]} — it'll drive the load." if loaders else
|
||||
"No GPU load tool installed → MONITOR-ONLY: start this, then launch your game; "
|
||||
"it tracks temps while you play. (Or give a command below.)")
|
||||
self._mode.setObjectName("Muted")
|
||||
self._mode.setWordWrap(True)
|
||||
root.addWidget(self._mode)
|
||||
|
||||
form = QHBoxLayout()
|
||||
form.addWidget(QLabel("Duration (s):"))
|
||||
self._duration = QSpinBox()
|
||||
self._duration.setRange(5, 3600)
|
||||
self._duration.setValue(120)
|
||||
form.addWidget(self._duration)
|
||||
form.addSpacing(12)
|
||||
form.addWidget(QLabel("Command (optional):"))
|
||||
self._command = QLineEdit()
|
||||
self._command.setPlaceholderText("e.g. /…/tarkov.sh or gpu-burn 60")
|
||||
form.addWidget(self._command, 1)
|
||||
root.addLayout(form)
|
||||
|
||||
self._live = QLabel("—")
|
||||
self._live.setFont(QFont("monospace"))
|
||||
self._live.setStyleSheet("background: #0d0f13; color: #cfd3da; border: 1px solid #2a2f39; "
|
||||
"border-radius: 8px; padding: 8px;")
|
||||
root.addWidget(self._live)
|
||||
|
||||
self._report = QTextEdit()
|
||||
self._report.setReadOnly(True)
|
||||
self._report.setFont(QFont("monospace"))
|
||||
self._report.setVisible(False)
|
||||
root.addWidget(self._report, 1)
|
||||
|
||||
buttons = QHBoxLayout()
|
||||
buttons.addStretch(1)
|
||||
self._stop_btn = QPushButton("Stop")
|
||||
self._stop_btn.setEnabled(False)
|
||||
self._stop_btn.clicked.connect(self._on_stop)
|
||||
buttons.addWidget(self._stop_btn)
|
||||
self._start_btn = QPushButton("Start")
|
||||
self._start_btn.setObjectName("PrimaryButton")
|
||||
self._start_btn.clicked.connect(self._on_start)
|
||||
buttons.addWidget(self._start_btn)
|
||||
root.addLayout(buttons)
|
||||
|
||||
def _on_start(self) -> None:
|
||||
if self._running:
|
||||
return
|
||||
self._running = True
|
||||
self._stop.clear()
|
||||
self._start_btn.setEnabled(False)
|
||||
self._stop_btn.setEnabled(True)
|
||||
self._report.setVisible(False)
|
||||
self._live.setText("starting…")
|
||||
duration = float(self._duration.value())
|
||||
command_text = self._command.text().strip()
|
||||
threading.Thread(target=self._work, args=(duration, command_text), daemon=True).start()
|
||||
|
||||
def _work(self, duration: float, command_text: str) -> None:
|
||||
import shlex
|
||||
|
||||
from ..core import stress
|
||||
|
||||
command = shlex.split(command_text) if command_text else None
|
||||
|
||||
def _tick(sample, elapsed) -> None:
|
||||
by = {r.key: r for r in sample.readings}
|
||||
from ..render import format_raw
|
||||
bits = [f"{elapsed:5.0f}s"]
|
||||
for key, tag in (("gpu.temp", "core"), ("gpu.power", "pwr"),
|
||||
("gpu.util", "util"), ("gpu.clock.core", "clk"),
|
||||
("gpu.temp.memory", "vram")):
|
||||
r = by.get(key)
|
||||
if r is not None and r.value is not None:
|
||||
bits.append(f"{tag} {format_raw(r.value, r.unit)}")
|
||||
self._tick.emit(" ".join(bits))
|
||||
|
||||
try:
|
||||
result = stress.run(duration=duration, interval=0.5, command=command,
|
||||
on_tick=_tick, should_stop=self._stop.is_set)
|
||||
except Exception as exc: # never let a worker crash take down the dialog
|
||||
result = exc
|
||||
self._done.emit(result)
|
||||
|
||||
def _on_tick(self, text: str) -> None:
|
||||
self._live.setText(text)
|
||||
|
||||
def _on_done(self, result) -> None:
|
||||
from ..render import render_stress
|
||||
|
||||
self._running = False
|
||||
self._start_btn.setEnabled(True)
|
||||
self._stop_btn.setEnabled(False)
|
||||
if isinstance(result, Exception):
|
||||
self._report.setPlainText(f"Stress run failed: {result}")
|
||||
else:
|
||||
self._report.setPlainText(render_stress(result))
|
||||
self._report.setVisible(True)
|
||||
|
||||
def _on_stop(self) -> None:
|
||||
self._stop.set()
|
||||
self._stop_btn.setEnabled(False)
|
||||
self._live.setText("stopping…")
|
||||
|
||||
def closeEvent(self, event) -> None: # stop the run if the dialog is closed mid-flight
|
||||
self._stop.set()
|
||||
super().closeEvent(event)
|
||||
@@ -0,0 +1,161 @@
|
||||
"""A terminal view: renders PTY output via pyte (with colors) and emits keystrokes (M12).
|
||||
|
||||
Used by both sides of a shared session — the host (mirrors its local PTY, can also type, e.g.
|
||||
a sudo password) and the guest (renders the streamed PTY, sends keystrokes). Renders pyte's
|
||||
per-cell foreground/background/bold/reverse so the host's real shell (e.g. fish) keeps its
|
||||
colors and theming; cursor addressing (vim, top) works via pyte. Scrollback is preserved.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import html as _html
|
||||
|
||||
import pyte
|
||||
from PySide6.QtCore import Qt, Signal
|
||||
from PySide6.QtGui import QFontDatabase, QFontMetrics
|
||||
from PySide6.QtWidgets import QTextEdit
|
||||
|
||||
# ANSI named colors → RGB (a dark, modern palette). pyte also yields 6-hex strings for
|
||||
# 256-color / truecolor, which we pass through, and "default" which maps to the theme.
|
||||
_FG_DEFAULT = "#d6dae0"
|
||||
_BG_DEFAULT = "#0d0f13"
|
||||
_NAMED = {
|
||||
"black": "#2a2f39", "red": "#f87171", "green": "#4ade80", "brown": "#e5c07b",
|
||||
"yellow": "#e5c07b", "blue": "#60a5fa", "magenta": "#c084fc", "cyan": "#38bdf8",
|
||||
"white": "#d6dae0",
|
||||
}
|
||||
_BRIGHT = { # bold brightens the standard 8
|
||||
"black": "#5b626c", "red": "#fca5a5", "green": "#86efac", "brown": "#fde68a",
|
||||
"yellow": "#fde68a", "blue": "#93c5fd", "magenta": "#d8b4fe", "cyan": "#7dd3fc",
|
||||
"white": "#ffffff",
|
||||
}
|
||||
_HISTORY_RENDER = 400 # cap scrollback rows rendered per frame (perf)
|
||||
|
||||
|
||||
def _color(name: str, default: str, bright: bool) -> str:
|
||||
if name == "default":
|
||||
return default
|
||||
table = _BRIGHT if bright else _NAMED
|
||||
if name in table:
|
||||
return table[name]
|
||||
if len(name) == 6: # pyte 256/truecolor as a hex string
|
||||
try:
|
||||
int(name, 16)
|
||||
return "#" + name
|
||||
except ValueError:
|
||||
pass
|
||||
return default
|
||||
|
||||
|
||||
class TerminalView(QTextEdit):
|
||||
keys = Signal(bytes) # user keystrokes -> bytes for the PTY
|
||||
resized = Signal(int, int) # rows, cols
|
||||
|
||||
def __init__(self, rows: int = 24, cols: int = 80):
|
||||
super().__init__()
|
||||
self.setLineWrapMode(QTextEdit.LineWrapMode.NoWrap)
|
||||
self.setFont(QFontDatabase.systemFont(QFontDatabase.SystemFont.FixedFont))
|
||||
self.setUndoRedoEnabled(False)
|
||||
self.setReadOnly(False) # we capture keys ourselves; no local editing
|
||||
self.setStyleSheet(f"QTextEdit {{ background: {_BG_DEFAULT}; border: none; }}")
|
||||
self.setMinimumHeight(320)
|
||||
self._rows, self._cols = rows, cols
|
||||
self._screen = pyte.HistoryScreen(cols, rows, history=2000, ratio=0.5)
|
||||
self._stream = pyte.ByteStream(self._screen)
|
||||
|
||||
def grid(self) -> tuple[int, int]:
|
||||
return self._rows, self._cols
|
||||
|
||||
def feed(self, data: bytes) -> None:
|
||||
self._stream.feed(data)
|
||||
self._render()
|
||||
|
||||
def reset(self) -> None:
|
||||
self._screen.reset()
|
||||
self._render()
|
||||
|
||||
# --- rendering ---------------------------------------------------------------------
|
||||
def _span(self, style, text: str) -> str:
|
||||
fg_name, bg_name, bold, reverse = style
|
||||
fg = _color(fg_name, _FG_DEFAULT, bold)
|
||||
bg = _color(bg_name, _BG_DEFAULT, False)
|
||||
if reverse:
|
||||
fg, bg = bg, fg
|
||||
esc = _html.escape(text, quote=False).replace(" ", " ")
|
||||
weight = "font-weight:bold;" if bold else ""
|
||||
return f'<span style="color:{fg};background:{bg};{weight}">{esc}</span>'
|
||||
|
||||
def _row_html(self, row, cursor_x) -> str:
|
||||
out: list[str] = []
|
||||
buf: list[str] = []
|
||||
cur_style = None
|
||||
for x in range(self._cols):
|
||||
ch = row[x]
|
||||
reverse = ch.reverse
|
||||
if cursor_x is not None and x == cursor_x and self.hasFocus():
|
||||
reverse = not reverse # block cursor = inverted cell
|
||||
style = (ch.fg, ch.bg, ch.bold, reverse)
|
||||
if style != cur_style:
|
||||
if buf:
|
||||
out.append(self._span(cur_style, "".join(buf)))
|
||||
buf = []
|
||||
cur_style = style
|
||||
buf.append(ch.data or " ")
|
||||
if buf:
|
||||
out.append(self._span(cur_style, "".join(buf)))
|
||||
return "".join(out)
|
||||
|
||||
def _render(self) -> None:
|
||||
bar = self.verticalScrollBar()
|
||||
at_bottom = bar.value() >= bar.maximum() - 2
|
||||
prev = bar.value()
|
||||
|
||||
history = list(self._screen.history.top)[-_HISTORY_RENDER:]
|
||||
lines = [self._row_html(r, None) for r in history]
|
||||
cur_y = self._screen.cursor.y
|
||||
for y in range(self._rows):
|
||||
cursor_x = self._screen.cursor.x if y == cur_y else None
|
||||
lines.append(self._row_html(self._screen.buffer[y], cursor_x))
|
||||
self.setHtml('<div style="white-space:pre;line-height:100%;">' + "<br>".join(lines) + "</div>")
|
||||
|
||||
bar.setValue(bar.maximum() if at_bottom else prev)
|
||||
|
||||
def resizeEvent(self, event): # noqa: N802 (Qt override)
|
||||
super().resizeEvent(event)
|
||||
fm = QFontMetrics(self.font())
|
||||
cw = max(1, fm.horizontalAdvance("M"))
|
||||
ch = max(1, fm.height())
|
||||
cols = max(20, self.viewport().width() // cw)
|
||||
rows = max(6, self.viewport().height() // ch)
|
||||
if (rows, cols) != (self._rows, self._cols):
|
||||
self._rows, self._cols = rows, cols
|
||||
self._screen.resize(rows, cols)
|
||||
self._render()
|
||||
self.resized.emit(rows, cols)
|
||||
|
||||
def keyPressEvent(self, event): # noqa: N802 (Qt override)
|
||||
data = self._translate(event)
|
||||
if data:
|
||||
self.keys.emit(data)
|
||||
event.accept() # display comes from PTY output, not local editing
|
||||
|
||||
@staticmethod
|
||||
def _translate(event) -> bytes:
|
||||
key = event.key()
|
||||
mod = event.modifiers()
|
||||
k = Qt.Key
|
||||
if mod & Qt.KeyboardModifier.ControlModifier and k.Key_A.value <= key <= k.Key_Z.value:
|
||||
return bytes([key - k.Key_A.value + 1]) # Ctrl-A..Ctrl-Z
|
||||
special = {
|
||||
k.Key_Return.value: b"\r", k.Key_Enter.value: b"\r",
|
||||
k.Key_Backspace.value: b"\x7f", k.Key_Tab.value: b"\t",
|
||||
k.Key_Escape.value: b"\x1b",
|
||||
k.Key_Up.value: b"\x1b[A", k.Key_Down.value: b"\x1b[B",
|
||||
k.Key_Right.value: b"\x1b[C", k.Key_Left.value: b"\x1b[D",
|
||||
k.Key_Home.value: b"\x1b[H", k.Key_End.value: b"\x1b[F",
|
||||
k.Key_Delete.value: b"\x1b[3~", k.Key_PageUp.value: b"\x1b[5~", k.Key_PageDown.value: b"\x1b[6~",
|
||||
}
|
||||
if key in special:
|
||||
return special[key]
|
||||
text = event.text()
|
||||
return text.encode("utf-8") if text else b""
|
||||
@@ -2,6 +2,10 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
_CHECK = (Path(__file__).parent / "assets" / "check.svg").as_posix()
|
||||
|
||||
# Palette (dark)
|
||||
BG = "#101216"
|
||||
SIDEBAR = "#15181e"
|
||||
@@ -10,6 +14,7 @@ CARD_BORDER = "#2a2f39"
|
||||
TRACK = "#2a2f39"
|
||||
TEXT = "#e6e8eb"
|
||||
MUTED = "#8b929c"
|
||||
INPUT_BG = "#0d0f13" # form-control background (must stay dark — see contrast rule)
|
||||
|
||||
ACCENT = "#38bdf8"
|
||||
COLD = "#7dd3fc" # icey-blue
|
||||
@@ -63,6 +68,8 @@ QMainWindow, #ContentArea, #Page {{ background: {BG}; }}
|
||||
QLabel {{ background: transparent; }}
|
||||
|
||||
#Sidebar {{ background: {SIDEBAR}; border-right: 1px solid {CARD_BORDER}; }}
|
||||
#Footer {{ background: {SIDEBAR}; border-top: 1px solid {CARD_BORDER}; }}
|
||||
#Footer QLabel {{ font-size: 11px; }}
|
||||
#AppTitle {{ font-size: 17px; font-weight: 800; }}
|
||||
#AppSubtitle {{ color: {MUTED}; font-size: 11px; }}
|
||||
|
||||
@@ -72,6 +79,7 @@ QPushButton#NavButton {{
|
||||
}}
|
||||
QPushButton#NavButton:hover {{ background: {CARD}; color: {TEXT}; }}
|
||||
QPushButton#NavButton:checked {{ background: {CARD}; color: #ffffff; font-weight: 600; }}
|
||||
QLabel#NavSection {{ color: {MUTED}; font-size: 10px; font-weight: 800; letter-spacing: 1px; padding: 2px 12px 0; }}
|
||||
|
||||
#Card {{ background: {CARD}; border: 1px solid {CARD_BORDER}; border-radius: 12px; }}
|
||||
QPushButton#CardHeader {{
|
||||
@@ -99,6 +107,15 @@ QPushButton#PrimaryButton {{ background: {ACCENT}; color: #06222e; border: none;
|
||||
QPushButton#PrimaryButton:hover {{ background: #5cc8fb; }}
|
||||
QPushButton#PrimaryButton:disabled {{ background: #27424f; color: #5f7c8a; }}
|
||||
|
||||
/* Inline per-finding action buttons (Install / Apply). Outlined: bright accent text on the
|
||||
dark card so it stays readable regardless of fill painting; fills accent on hover. */
|
||||
QPushButton#ActionButton {{
|
||||
background: transparent; color: {ACCENT}; border: 1px solid {ACCENT};
|
||||
border-radius: 8px; padding: 6px 16px; font-weight: 700; min-height: 18px;
|
||||
}}
|
||||
QPushButton#ActionButton:hover {{ background: {ACCENT}; color: #06222e; }}
|
||||
QPushButton#ActionButton:disabled {{ color: {MUTED}; border-color: {CARD_BORDER}; }}
|
||||
|
||||
QDoubleSpinBox, QSpinBox {{
|
||||
background: #262b34; color: {TEXT}; border: 1px solid {CARD_BORDER};
|
||||
border-radius: 6px; padding: 4px 6px;
|
||||
@@ -120,8 +137,56 @@ QPushButton#LinkButton {{
|
||||
}}
|
||||
QPushButton#LinkButton:hover {{ color: {TEXT}; }}
|
||||
|
||||
QCheckBox {{ spacing: 8px; background: transparent; }}
|
||||
QCheckBox::indicator {{
|
||||
width: 17px; height: 17px; border-radius: 4px;
|
||||
border: 1px solid {MUTED}; background: #262b34;
|
||||
}}
|
||||
QCheckBox::indicator:hover {{ border-color: {ACCENT}; }}
|
||||
QCheckBox::indicator:checked {{
|
||||
background: {ACCENT}; border-color: {ACCENT}; image: url("{_CHECK}");
|
||||
}}
|
||||
QCheckBox::indicator:disabled {{ border-color: #3a414d; background: #1c2026; }}
|
||||
QCheckBox::indicator:checked:disabled {{ background: #2a6175; border-color: #2a6175; }}
|
||||
QCheckBox:disabled {{ color: {MUTED}; }}
|
||||
|
||||
/* Radio buttons — same dark treatment as checkboxes; the selected one gets a clear
|
||||
accent dot (Fusion leaves these unstyled = the selection is invisible on dark). */
|
||||
QRadioButton {{ spacing: 8px; background: transparent; }}
|
||||
QRadioButton::indicator {{
|
||||
width: 17px; height: 17px; border-radius: 9px;
|
||||
border: 1px solid {MUTED}; background: #262b34;
|
||||
}}
|
||||
QRadioButton::indicator:hover {{ border-color: {ACCENT}; }}
|
||||
QRadioButton::indicator:checked {{
|
||||
border: 1px solid {ACCENT};
|
||||
background: qradialgradient(cx:0.5, cy:0.5, radius:0.5, fx:0.5, fy:0.5,
|
||||
stop:0 {ACCENT}, stop:0.5 {ACCENT}, stop:0.55 #262b34, stop:1 #262b34);
|
||||
}}
|
||||
QRadioButton:disabled {{ color: {MUTED}; }}
|
||||
|
||||
/* Dialogs (update prompt, changelog) — match the dark theme so text is readable. */
|
||||
QDialog {{ background: {BG}; }}
|
||||
QMessageBox {{ background: {CARD}; }}
|
||||
QDialog QLabel, QMessageBox QLabel {{ color: {TEXT}; background: transparent; }}
|
||||
|
||||
/* Form controls: keep dark bg + light text (Fusion defaults to light-on-light here). */
|
||||
QLineEdit, QPlainTextEdit, QAbstractSpinBox, QComboBox {{
|
||||
background: {INPUT_BG}; color: {TEXT};
|
||||
border: 1px solid {CARD_BORDER}; border-radius: 6px; padding: 5px 8px;
|
||||
selection-background-color: {ACCENT}; selection-color: #06222e;
|
||||
}}
|
||||
QLineEdit:focus, QPlainTextEdit:focus, QAbstractSpinBox:focus, QComboBox:focus {{
|
||||
border: 1px solid {ACCENT};
|
||||
}}
|
||||
QLineEdit:disabled, QPlainTextEdit:disabled, QAbstractSpinBox:disabled {{ color: {MUTED}; }}
|
||||
|
||||
/* The combo-box drop-down list is a separate popup view — unstyled it renders
|
||||
light-on-light (same Fusion trap as the closed control above). */
|
||||
QComboBox QAbstractItemView {{
|
||||
background: {CARD}; color: {TEXT};
|
||||
border: 1px solid {CARD_BORDER}; outline: 0;
|
||||
selection-background-color: {ACCENT}; selection-color: #06222e;
|
||||
}}
|
||||
QComboBox QAbstractItemView::item {{ padding: 5px 8px; min-height: 22px; }}
|
||||
"""
|
||||
|
||||
@@ -0,0 +1,144 @@
|
||||
"""System-tray applet (M11, D13): live readouts + quick actions over the shared engine.
|
||||
|
||||
A QSystemTrayIcon whose menu shows at-a-glance CPU/GPU temp + memory and a status dot, led
|
||||
by **Run Diagnostic** (the guided session), plus Open dashboard / Start-Stop recording /
|
||||
Snapshot / Quit. It consumes the same sample stream as the dashboard (no extra sampling) and
|
||||
drives the existing MainWindow flows — one engine, another front-end.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from PySide6.QtWidgets import QApplication, QMenu, QSystemTrayIcon
|
||||
|
||||
from ..core import reccontrol
|
||||
|
||||
|
||||
def _gpu_temp(sample):
|
||||
for r in sample.readings:
|
||||
if r.source == "gpu" and r.metric == "temp" and r.label == "" and r.value is not None:
|
||||
return r.value
|
||||
return None
|
||||
|
||||
|
||||
def _cpu_temp(sample):
|
||||
temps = [r for r in sample.readings if r.source == "cpu" and r.metric == "temp" and r.value is not None]
|
||||
for r in temps:
|
||||
low = r.label.lower()
|
||||
if low.startswith("package") or "tctl" in low or "tdie" in low:
|
||||
return r.value
|
||||
return max((r.value for r in temps), default=None)
|
||||
|
||||
|
||||
def _memory(sample):
|
||||
used = total = pct = None
|
||||
for r in sample.readings:
|
||||
if r.source == "memory":
|
||||
if r.metric == "used":
|
||||
used = r.value
|
||||
elif r.metric == "total":
|
||||
total = r.value
|
||||
elif r.metric == "used_pct":
|
||||
pct = r.value
|
||||
return used, total, pct
|
||||
|
||||
|
||||
def _gpu_lost(sample) -> bool:
|
||||
return any(r.source == "gpu" and r.metric == "status" and r.label == "query-timeout"
|
||||
for r in sample.readings)
|
||||
|
||||
|
||||
class TrayIcon(QSystemTrayIcon):
|
||||
def __init__(self, window, icon, gpu_alert: float = 90.0, cpu_alert: float = 95.0) -> None:
|
||||
super().__init__(icon, window)
|
||||
self._window = window
|
||||
self._gpu_alert = gpu_alert
|
||||
self._cpu_alert = cpu_alert
|
||||
self._last = None
|
||||
self.setToolTip("RigDoctor")
|
||||
|
||||
menu = QMenu()
|
||||
self._status_act = self._readout(menu, "● starting…")
|
||||
self._cpu_act = self._readout(menu, "CPU temp: —")
|
||||
self._gpu_act = self._readout(menu, "GPU temp: —")
|
||||
self._mem_act = self._readout(menu, "Memory: —")
|
||||
menu.addSeparator()
|
||||
self._diag_menu = menu.addMenu("Run Diagnostic")
|
||||
self._diag_menu.aboutToShow.connect(self._rebuild_diag_menu)
|
||||
menu.addAction("Open dashboard", self._window.show_dashboard)
|
||||
self._rec_act = menu.addAction("Start recording", self._toggle_record)
|
||||
menu.addAction("Snapshot (copy)", self._snapshot)
|
||||
menu.addSeparator()
|
||||
menu.addAction("Quit", self._window.quit_app)
|
||||
menu.aboutToShow.connect(self._refresh_actions)
|
||||
self.setContextMenu(menu)
|
||||
self.activated.connect(self._on_activated)
|
||||
|
||||
@staticmethod
|
||||
def _readout(menu: QMenu, text: str):
|
||||
act = menu.addAction(text)
|
||||
act.setEnabled(False) # display-only line
|
||||
return act
|
||||
|
||||
def _on_activated(self, reason) -> None:
|
||||
if reason in (QSystemTrayIcon.ActivationReason.Trigger,
|
||||
QSystemTrayIcon.ActivationReason.DoubleClick):
|
||||
self._window.show_dashboard()
|
||||
|
||||
def update_sample(self, sample) -> None:
|
||||
self._last = sample
|
||||
cpu, gpu = _cpu_temp(sample), _gpu_temp(sample)
|
||||
used, total, pct = _memory(sample)
|
||||
self._cpu_act.setText(f"CPU temp: {cpu:.0f} °C" if cpu is not None else "CPU temp: —")
|
||||
self._gpu_act.setText(f"GPU temp: {gpu:.0f} °C" if gpu is not None else "GPU temp: —")
|
||||
if used is not None and total is not None:
|
||||
extra = f" ({pct:.0f}%)" if pct is not None else ""
|
||||
self._mem_act.setText(f"Memory: {used:.1f} / {total:.1f} GB{extra}")
|
||||
else:
|
||||
self._mem_act.setText("Memory: —")
|
||||
|
||||
if _gpu_lost(sample):
|
||||
self._status_act.setText("● GPU not responding")
|
||||
elif (gpu is not None and gpu >= self._gpu_alert) or (cpu is not None and cpu >= self._cpu_alert):
|
||||
self._status_act.setText("● Hot — over alert threshold")
|
||||
else:
|
||||
self._status_act.setText("● Normal")
|
||||
|
||||
bits = []
|
||||
if cpu is not None:
|
||||
bits.append(f"CPU {cpu:.0f}°C")
|
||||
if gpu is not None:
|
||||
bits.append(f"GPU {gpu:.0f}°C")
|
||||
self.setToolTip("RigDoctor" + (" — " + " ".join(bits) if bits else ""))
|
||||
|
||||
def _refresh_actions(self) -> None:
|
||||
self._rec_act.setText("Stop recording" if reccontrol.running_pid() else "Start recording")
|
||||
|
||||
def _toggle_record(self) -> None:
|
||||
if reccontrol.running_pid():
|
||||
reccontrol.stop_background()
|
||||
else:
|
||||
reccontrol.start_background()
|
||||
|
||||
def _rebuild_diag_menu(self) -> None:
|
||||
from ..core import steam
|
||||
|
||||
self._diag_menu.clear()
|
||||
games = steam.cached_games()
|
||||
if not games:
|
||||
self._diag_menu.addAction("Open Games to pick a game…",
|
||||
lambda: self._window.show_page("Games"))
|
||||
return
|
||||
for g in games[:20]:
|
||||
self._diag_menu.addAction(
|
||||
g.name,
|
||||
lambda _checked=False, name=g.name, appid=g.appid: self._window.run_diagnostic(name, appid),
|
||||
)
|
||||
|
||||
def _snapshot(self) -> None:
|
||||
if self._last is None:
|
||||
return
|
||||
from ..render import render_snapshot
|
||||
|
||||
QApplication.clipboard().setText(render_snapshot(self._last))
|
||||
self.showMessage("RigDoctor", "Snapshot copied to clipboard.",
|
||||
QSystemTrayIcon.MessageIcon.Information, 4000)
|
||||
@@ -2,9 +2,12 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from PySide6.QtCore import QRectF, Qt
|
||||
from PySide6.QtGui import QColor, QFont, QPainter, QPen
|
||||
from collections import deque
|
||||
|
||||
from PySide6.QtCore import QPointF, QRectF, Qt
|
||||
from PySide6.QtGui import QColor, QFont, QPainter, QPainterPath, QPen
|
||||
from PySide6.QtWidgets import (
|
||||
QComboBox,
|
||||
QFrame,
|
||||
QHBoxLayout,
|
||||
QLabel,
|
||||
@@ -16,7 +19,118 @@ from PySide6.QtWidgets import (
|
||||
|
||||
from ..core.sample import Reading
|
||||
from ..render import format_value
|
||||
from .theme import MUTED, TEXT, TRACK, gauge_color, temp_color
|
||||
from .theme import (
|
||||
ACCENT,
|
||||
CRIT,
|
||||
GOOD,
|
||||
MUTED,
|
||||
TEMP_WARN,
|
||||
TEXT,
|
||||
TRACK,
|
||||
USAGE_WARN,
|
||||
WARN,
|
||||
gauge_color,
|
||||
temp_color,
|
||||
)
|
||||
|
||||
_SEV = {
|
||||
"critical": ("CRITICAL", CRIT),
|
||||
"warning": ("WARNING", WARN),
|
||||
"info": ("INFO", MUTED),
|
||||
"ok": ("OK", GOOD),
|
||||
}
|
||||
|
||||
|
||||
def finding_card(finding, on_install=None, on_apply=None) -> QFrame:
|
||||
"""A card for one M4/M6 Finding (severity-colored title, detail, suggested fix).
|
||||
|
||||
If the finding names an installable catalog component (``finding.action``) and an
|
||||
``on_install(component)`` callback is given, an "Install" button is shown — so a
|
||||
"tool not installed" finding becomes one click instead of a copy-pasted apt command.
|
||||
|
||||
If the finding names a runtime tunable (``finding.fix``) and an ``on_apply(fix_id,
|
||||
value)`` callback is given, a dropdown of the live options + an Apply button is shown
|
||||
(M6 live fixes — D22).
|
||||
"""
|
||||
label, color = _SEV.get(finding.severity, ("?", MUTED))
|
||||
card = QFrame()
|
||||
card.setObjectName("Card")
|
||||
v = QVBoxLayout(card)
|
||||
v.setContentsMargins(16, 12, 16, 12)
|
||||
v.setSpacing(4)
|
||||
|
||||
head = QLabel(f"{label} · {finding.category}: {finding.title}")
|
||||
head.setStyleSheet(f"color: {color}; font-weight: 700; background: transparent;")
|
||||
head.setWordWrap(True)
|
||||
v.addWidget(head)
|
||||
|
||||
if finding.detail:
|
||||
detail = QLabel(finding.detail)
|
||||
detail.setObjectName("Muted")
|
||||
detail.setWordWrap(True)
|
||||
v.addWidget(detail)
|
||||
if finding.suggestion:
|
||||
suggestion = QLabel(f"→ {finding.suggestion}")
|
||||
suggestion.setStyleSheet(f"color: {ACCENT}; background: transparent;")
|
||||
suggestion.setWordWrap(True)
|
||||
v.addWidget(suggestion)
|
||||
|
||||
component = _installable_component(finding) if on_install else None
|
||||
if component is not None:
|
||||
row = QHBoxLayout()
|
||||
row.addStretch(1)
|
||||
btn = QPushButton(f"Install {component.name}")
|
||||
btn.setObjectName("ActionButton")
|
||||
btn.setCursor(Qt.CursorShape.PointingHandCursor)
|
||||
btn.clicked.connect(lambda: on_install(component))
|
||||
row.addWidget(btn)
|
||||
v.addLayout(row)
|
||||
|
||||
tunable = _tunable(finding) if on_apply else None
|
||||
if tunable is not None and tunable.options:
|
||||
row = QHBoxLayout()
|
||||
name = QLabel(f"{tunable.label}:")
|
||||
name.setObjectName("Muted")
|
||||
combo = QComboBox()
|
||||
combo.addItems(tunable.options)
|
||||
if tunable.current in tunable.options:
|
||||
combo.setCurrentText(tunable.current)
|
||||
combo.setCursor(Qt.CursorShape.PointingHandCursor)
|
||||
apply_btn = QPushButton("Apply")
|
||||
apply_btn.setObjectName("ActionButton")
|
||||
apply_btn.setCursor(Qt.CursorShape.PointingHandCursor)
|
||||
apply_btn.clicked.connect(lambda: on_apply(tunable.id, combo.currentText()))
|
||||
row.addWidget(name)
|
||||
row.addWidget(combo, 1)
|
||||
row.addWidget(apply_btn)
|
||||
v.addLayout(row)
|
||||
if tunable.note:
|
||||
note = QLabel(tunable.note)
|
||||
note.setObjectName("Muted")
|
||||
v.addWidget(note)
|
||||
return card
|
||||
|
||||
|
||||
def _tunable(finding):
|
||||
"""The runtime tunable a finding can apply, if any."""
|
||||
fix = getattr(finding, "fix", "")
|
||||
if not fix:
|
||||
return None
|
||||
from ..core import fixes
|
||||
|
||||
return fixes.get_tunable(fix)
|
||||
|
||||
|
||||
def _installable_component(finding):
|
||||
"""The catalog component a finding offers to install, if any and if apt is usable."""
|
||||
action = getattr(finding, "action", "")
|
||||
if not action:
|
||||
return None
|
||||
from ..core import catalog, sysenv
|
||||
|
||||
if sysenv.package_manager() != "apt":
|
||||
return None # apt-only (D15) — no one-click install elsewhere
|
||||
return catalog.by_id(action)
|
||||
|
||||
|
||||
class Card(QFrame):
|
||||
@@ -148,6 +262,117 @@ class StatGauge(QWidget):
|
||||
p.end()
|
||||
|
||||
|
||||
class HistoryGraph(QWidget):
|
||||
"""A headline metric as a trend: current value + window min/max + a history line.
|
||||
|
||||
Replaces the at-a-glance gauge with changes-over-time. `kind` drives the color
|
||||
(temp band / usage / accent), matching StatGauge so the dashboard stays consistent.
|
||||
"""
|
||||
|
||||
def __init__(self, title: str, unit: str = "", vmin: float = 0.0, vmax: float = 100.0,
|
||||
kind: str = "accent", history: int = 180) -> None:
|
||||
super().__init__()
|
||||
self._title = title
|
||||
self._unit = unit
|
||||
self._min = vmin
|
||||
self._max = vmax
|
||||
self._kind = kind # "temp" | "usage" | "accent"
|
||||
self._values: deque[float | None] = deque(maxlen=history)
|
||||
self.setMinimumSize(160, 132)
|
||||
|
||||
def add_value(self, value: float | None) -> None:
|
||||
self._values.append(value)
|
||||
self.update()
|
||||
|
||||
def _fmt(self, value: float | None) -> str:
|
||||
if value is None:
|
||||
return "—"
|
||||
if self._unit == "°C":
|
||||
return f"{value:.0f}°"
|
||||
if self._unit == "%":
|
||||
return f"{value:.0f}%"
|
||||
return f"{value:.0f}{self._unit}"
|
||||
|
||||
def paintEvent(self, event) -> None: # noqa: N802 (Qt override)
|
||||
p = QPainter(self)
|
||||
p.setRenderHint(QPainter.RenderHint.Antialiasing)
|
||||
w, h = self.width(), self.height()
|
||||
pad = 10.0
|
||||
present = [v for v in self._values if v is not None]
|
||||
current = next((v for v in reversed(self._values) if v is not None), None)
|
||||
color = QColor(gauge_color(self._kind, current))
|
||||
|
||||
ftitle = QFont()
|
||||
ftitle.setPointSizeF(10.0)
|
||||
ftitle.setBold(True)
|
||||
p.setFont(ftitle)
|
||||
p.setPen(QColor(MUTED))
|
||||
p.drawText(QRectF(pad, 6, w - 2 * pad, 18),
|
||||
Qt.AlignmentFlag.AlignLeft | Qt.AlignmentFlag.AlignVCenter, self._title)
|
||||
|
||||
fval = QFont()
|
||||
fval.setPointSizeF(21.0)
|
||||
fval.setBold(True)
|
||||
p.setFont(fval)
|
||||
p.setPen(color if current is not None else QColor(MUTED))
|
||||
p.drawText(QRectF(pad, 2, w - 2 * pad, 28),
|
||||
Qt.AlignmentFlag.AlignRight | Qt.AlignmentFlag.AlignTop, self._fmt(current))
|
||||
|
||||
if present:
|
||||
fsm = QFont()
|
||||
fsm.setPointSizeF(8.5)
|
||||
p.setFont(fsm)
|
||||
p.setPen(QColor(MUTED))
|
||||
p.drawText(QRectF(pad, 27, w - 2 * pad, 14), Qt.AlignmentFlag.AlignLeft,
|
||||
f"min {self._fmt(min(present))} max {self._fmt(max(present))}")
|
||||
|
||||
g_top, g_bot = 48.0, h - pad
|
||||
g_left, g_right = pad, w - pad
|
||||
span = self._max - self._min
|
||||
if g_bot - g_top < 12 or g_right - g_left < 12 or span <= 0:
|
||||
p.end()
|
||||
return
|
||||
|
||||
def y_of(v: float) -> float:
|
||||
frac = (max(self._min, min(self._max, v)) - self._min) / span
|
||||
return g_bot - frac * (g_bot - g_top)
|
||||
|
||||
warn = TEMP_WARN if self._kind == "temp" else (USAGE_WARN if self._kind == "usage" else None)
|
||||
if warn is not None and self._min <= warn <= self._max:
|
||||
pen = QPen(QColor(TRACK))
|
||||
pen.setWidthF(1.0)
|
||||
pen.setStyle(Qt.PenStyle.DashLine)
|
||||
p.setPen(pen)
|
||||
yw = y_of(warn)
|
||||
p.drawLine(QPointF(g_left, yw), QPointF(g_right, yw))
|
||||
|
||||
maxlen = self._values.maxlen or 1
|
||||
step = (g_right - g_left) / max(1, maxlen - 1)
|
||||
n = len(self._values)
|
||||
# Build the line newest-at-right; break it where readings are missing.
|
||||
path = QPainterPath()
|
||||
drawing = False
|
||||
for i, v in enumerate(self._values):
|
||||
if v is None:
|
||||
drawing = False
|
||||
continue
|
||||
x = g_right - (n - 1 - i) * step
|
||||
y = y_of(v)
|
||||
if drawing:
|
||||
path.lineTo(x, y)
|
||||
else:
|
||||
path.moveTo(x, y)
|
||||
drawing = True
|
||||
if not path.isEmpty():
|
||||
pen = QPen(color)
|
||||
pen.setWidthF(2.0)
|
||||
pen.setCapStyle(Qt.PenCapStyle.RoundCap)
|
||||
pen.setJoinStyle(Qt.PenJoinStyle.RoundJoin)
|
||||
p.setPen(pen)
|
||||
p.drawPath(path)
|
||||
p.end()
|
||||
|
||||
|
||||
class MetricBar(QWidget):
|
||||
"""A label + value with a thin progress bar (for 0–100% metrics)."""
|
||||
|
||||
|
||||
@@ -102,12 +102,12 @@ def _aggregate_peaks(maxima: dict) -> list[tuple[str, str, float, str, float, st
|
||||
_SEV_LABEL = {"critical": "CRITICAL", "warning": "WARNING", "info": "INFO", "ok": "OK"}
|
||||
|
||||
|
||||
def render_health(findings: list) -> str:
|
||||
def render_health(findings: list, title: str = "Health report") -> str:
|
||||
if not findings:
|
||||
return "Health report: no findings."
|
||||
return f"{title}: no findings."
|
||||
crit = sum(1 for f in findings if f.severity == "critical")
|
||||
warn = sum(1 for f in findings if f.severity == "warning")
|
||||
lines = ["Health report", "", f" {crit} critical · {warn} warning · {len(findings)} checks", ""]
|
||||
lines = [title, "", f" {crit} critical · {warn} warning · {len(findings)} checks", ""]
|
||||
for f in findings:
|
||||
lines.append(f"[{_SEV_LABEL.get(f.severity, '?')}] {f.category}: {f.title}")
|
||||
if f.detail:
|
||||
@@ -118,6 +118,32 @@ def render_health(findings: list) -> str:
|
||||
return "\n".join(lines).rstrip()
|
||||
|
||||
|
||||
def render_stress(result) -> str:
|
||||
"""Render a stress.StressResult: telemetry stats, temp dwell time, and the verdict."""
|
||||
lines = ["GPU stress + thermal monitor", ""]
|
||||
lines.append(f" Load : {result.load}")
|
||||
lines.append(f" Duration : {_fmt_duration(result.duration)} · {result.samples} samples "
|
||||
f"@ {result.interval:g}s" + (" (stopped early)" if result.aborted else ""))
|
||||
if result.stats:
|
||||
lines += ["", f" {'Metric':<22}{'min':>12}{'avg':>12}{'max':>12}"]
|
||||
for s in result.stats:
|
||||
u = s.unit
|
||||
lines.append(f" {s.label:<22}{format_raw(s.min, u):>12}{format_raw(s.avg, u):>12}"
|
||||
f"{format_raw(s.max, u):>12}")
|
||||
if result.time_above:
|
||||
spans = " ".join(f"≥{th}°C: {_fmt_duration(secs)}" for th, secs in sorted(result.time_above.items()))
|
||||
lines += ["", f" Time at temp (core): {spans}"]
|
||||
if result.max_power is not None and result.power_limit:
|
||||
cap = " — hit the power cap" if result.power_capped else ""
|
||||
lines.append(f" Power peak: {result.max_power:.0f} W of {result.power_limit:.0f} W limit{cap}")
|
||||
if result.throttle_reasons:
|
||||
lines.append(f" Throttling: {', '.join(result.throttle_reasons)}")
|
||||
if result.faults:
|
||||
lines.append(f" Faults : {'; '.join(result.faults)}")
|
||||
lines += ["", f"[{_SEV_LABEL.get(result.severity, '?')}] {result.verdict}"]
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def render_summary(summary: Summary, log_path=None) -> str:
|
||||
if summary.samples == 0 and not summary.events:
|
||||
where = f" ({log_path})" if log_path else ""
|
||||
|
||||
@@ -0,0 +1,170 @@
|
||||
"""Live monitor TUI (M2): a curses HWMonitor-style terminal dashboard.
|
||||
|
||||
Shows current / session-min / session-max per sensor, grouped by subsystem, with
|
||||
temperature and utilization color bands. stdlib `curses` only; falls back to a plain
|
||||
full-screen redraw when stdout isn't a TTY (piped/SSH-without-tty). Keys: q quit, r reset
|
||||
the session min/max. The terminal face of the same live data the GUI dashboard graphs.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import curses
|
||||
import sys
|
||||
import time
|
||||
|
||||
from .core.sample import Reading, Sample
|
||||
from .core.sampler import Sampler
|
||||
from .core.sources import available_sources
|
||||
from .render import _GROUP_ORDER, _GROUP_TITLES, format_raw, metric_label, render_snapshot
|
||||
|
||||
# Color-band thresholds (mirror the GUI dashboard so both faces agree).
|
||||
TEMP_COLD, TEMP_WARN, TEMP_CRIT = 50.0, 78.0, 88.0
|
||||
USAGE_WARN, USAGE_CRIT = 85.0, 95.0
|
||||
_USAGE_METRICS = {"util", "used_pct", "mem_util", "load"}
|
||||
|
||||
|
||||
def band(r: Reading) -> str:
|
||||
"""Color band for a reading: cold | good | warn | crit | normal | na."""
|
||||
if r.source == "gpu" and r.metric == "status": # GPU-lost / query timeout
|
||||
return "crit"
|
||||
if r.value is None:
|
||||
return "na"
|
||||
if r.unit == "°C":
|
||||
if r.value >= TEMP_CRIT:
|
||||
return "crit"
|
||||
if r.value >= TEMP_WARN:
|
||||
return "warn"
|
||||
if r.value >= TEMP_COLD:
|
||||
return "good"
|
||||
return "cold"
|
||||
if r.unit == "%" and r.metric in _USAGE_METRICS:
|
||||
if r.value >= USAGE_CRIT:
|
||||
return "crit"
|
||||
if r.value >= USAGE_WARN:
|
||||
return "warn"
|
||||
return "good"
|
||||
return "normal"
|
||||
|
||||
|
||||
def track(stats: dict[str, tuple[float, float]], sample: Sample) -> None:
|
||||
"""Fold a sample's readings into {key: (min, max)} session extremes."""
|
||||
for r in sample.readings:
|
||||
if r.value is None:
|
||||
continue
|
||||
lo, hi = stats.get(r.key, (r.value, r.value))
|
||||
stats[r.key] = (min(lo, r.value), max(hi, r.value))
|
||||
|
||||
|
||||
# --- curses front-end -----------------------------------------------------------------
|
||||
|
||||
_BAND_PAIR = {"cold": 1, "good": 2, "warn": 3, "crit": 4}
|
||||
|
||||
|
||||
def _init_colors() -> None:
|
||||
try:
|
||||
curses.start_color()
|
||||
curses.use_default_colors()
|
||||
curses.init_pair(1, curses.COLOR_CYAN, -1)
|
||||
curses.init_pair(2, curses.COLOR_GREEN, -1)
|
||||
curses.init_pair(3, curses.COLOR_YELLOW, -1)
|
||||
curses.init_pair(4, curses.COLOR_RED, -1)
|
||||
except curses.error:
|
||||
pass
|
||||
|
||||
|
||||
def _attr(band_name: str) -> int:
|
||||
pair = _BAND_PAIR.get(band_name)
|
||||
if not pair:
|
||||
return curses.A_NORMAL
|
||||
attr = curses.color_pair(pair)
|
||||
return attr | curses.A_BOLD if band_name == "crit" else attr
|
||||
|
||||
|
||||
def _draw(stdscr, sample: Sample, stats: dict, interval: float) -> None:
|
||||
stdscr.erase()
|
||||
height, width = stdscr.getmaxyx()
|
||||
|
||||
def put(y: int, x: int, text: str, attr: int = curses.A_NORMAL) -> None:
|
||||
if 0 <= y < height and 0 <= x < width:
|
||||
try:
|
||||
stdscr.addnstr(y, x, text, max(0, width - x - 1), attr)
|
||||
except curses.error:
|
||||
pass
|
||||
|
||||
put(0, 0, f"RigDoctor — live monitor every {interval:g}s", curses.A_BOLD)
|
||||
put(1, 0, "q quit r reset min/max", curses.A_DIM)
|
||||
|
||||
groups = sample.by_source()
|
||||
order = [k for k in _GROUP_ORDER if k in groups] + [k for k in groups if k not in _GROUP_ORDER]
|
||||
name_w, col_w = 24, 11
|
||||
y = 3
|
||||
for key in order:
|
||||
if y >= height:
|
||||
break
|
||||
put(y, 0, _GROUP_TITLES.get(key, key.title()), curses.A_BOLD)
|
||||
y += 1
|
||||
put(y, 2, f"{'sensor':<{name_w}}{'current':>{col_w}}{'min':>{col_w}}{'max':>{col_w}}", curses.A_DIM)
|
||||
y += 1
|
||||
for r in groups[key]:
|
||||
if y >= height:
|
||||
break
|
||||
if r.metric == "name": # device identity line
|
||||
put(y, 2, str(r.label), curses.A_DIM)
|
||||
y += 1
|
||||
continue
|
||||
lo, hi = stats.get(r.key, (r.value, r.value))
|
||||
put(y, 2, f"{metric_label(r):<{name_w}}")
|
||||
put(y, 2 + name_w, f"{format_raw(r.value, r.unit):>{col_w}}", _attr(band(r)))
|
||||
put(y, 2 + name_w + col_w, f"{format_raw(lo, r.unit):>{col_w}}", curses.A_DIM)
|
||||
put(y, 2 + name_w + 2 * col_w, f"{format_raw(hi, r.unit):>{col_w}}", curses.A_DIM)
|
||||
y += 1
|
||||
y += 1
|
||||
stdscr.refresh()
|
||||
|
||||
|
||||
def _loop(stdscr, sampler: Sampler, interval: float) -> None:
|
||||
curses.curs_set(0)
|
||||
stdscr.nodelay(True)
|
||||
_init_colors()
|
||||
stats: dict[str, tuple[float, float]] = {}
|
||||
latest = sampler.sample()
|
||||
track(stats, latest)
|
||||
next_sample = time.monotonic() + interval
|
||||
while True:
|
||||
ch = stdscr.getch()
|
||||
if ch in (ord("q"), ord("Q")):
|
||||
return
|
||||
if ch in (ord("r"), ord("R")):
|
||||
stats.clear()
|
||||
track(stats, latest)
|
||||
now = time.monotonic()
|
||||
if now >= next_sample:
|
||||
latest = sampler.sample()
|
||||
track(stats, latest)
|
||||
next_sample = now + interval
|
||||
_draw(stdscr, latest, stats, interval)
|
||||
time.sleep(0.05) # keep key handling responsive without busy-spinning
|
||||
|
||||
|
||||
def _run_plain(sampler: Sampler, interval: float) -> int:
|
||||
"""Fallback for non-TTY output: clear + reprint each tick (no curses)."""
|
||||
try:
|
||||
for sample in sampler.stream(interval=interval):
|
||||
print("\033[2J\033[H", end="")
|
||||
print(f"RigDoctor — live (every {interval:g}s, Ctrl-C to quit)\n")
|
||||
print(render_snapshot(sample))
|
||||
sys.stdout.flush()
|
||||
except KeyboardInterrupt:
|
||||
print()
|
||||
return 0
|
||||
|
||||
|
||||
def run(interval: float, plain: bool = False) -> int:
|
||||
sampler = Sampler(available_sources())
|
||||
if plain or not sys.stdout.isatty():
|
||||
return _run_plain(sampler, interval)
|
||||
try:
|
||||
curses.wrapper(_loop, sampler, interval)
|
||||
except curses.error: # terminal can't do curses — degrade gracefully
|
||||
return _run_plain(sampler, interval)
|
||||
return 0
|
||||
@@ -0,0 +1,164 @@
|
||||
"""Tests for the M14 AI assistant: provider selection, grounding, parsing (no network)."""
|
||||
|
||||
import unittest
|
||||
from unittest import mock
|
||||
|
||||
from rigdoctor.core import ai, ai_knowledge
|
||||
|
||||
|
||||
class KnowledgeTests(unittest.TestCase):
|
||||
def test_matches_xid_and_smart(self):
|
||||
facts = ai_knowledge.relevant("Kernel: NVRM: Xid 79: GPU has fallen off the bus")
|
||||
self.assertTrue(any("fallen off the bus" in f for f in facts))
|
||||
|
||||
def test_matches_smart_pending(self):
|
||||
facts = ai_knowledge.relevant("SMART 197 Current_Pending_Sector = 8")
|
||||
self.assertTrue(any("Pending Sector" in f for f in facts))
|
||||
|
||||
def test_no_match_returns_empty(self):
|
||||
self.assertEqual(ai_knowledge.relevant("everything is fine"), [])
|
||||
|
||||
|
||||
class ConfigStateTests(unittest.TestCase):
|
||||
def _cfg(self, **over):
|
||||
base = {"ai_provider": "", "ai_model": "", "ai_endpoint": "http://localhost:11434"}
|
||||
base.update(over)
|
||||
return base
|
||||
|
||||
def test_unconfigured_by_default(self):
|
||||
with mock.patch.object(ai.config, "load_config", return_value=self._cfg()):
|
||||
self.assertFalse(ai.is_configured())
|
||||
|
||||
def test_ollama_needs_model(self):
|
||||
with mock.patch.object(ai.config, "load_config", return_value=self._cfg(ai_provider="ollama")):
|
||||
self.assertFalse(ai.is_configured())
|
||||
with mock.patch.object(ai.config, "load_config",
|
||||
return_value=self._cfg(ai_provider="ollama", ai_model="llama3.1")):
|
||||
self.assertTrue(ai.is_configured())
|
||||
|
||||
def test_claude_needs_key(self):
|
||||
with mock.patch.object(ai.config, "load_config", return_value=self._cfg(ai_provider="claude")), \
|
||||
mock.patch.object(ai.config, "load_ai_key", return_value=None):
|
||||
self.assertFalse(ai.is_configured())
|
||||
with mock.patch.object(ai.config, "load_config", return_value=self._cfg(ai_provider="claude")), \
|
||||
mock.patch.object(ai.config, "load_ai_key", return_value="sk-ant-x"):
|
||||
self.assertTrue(ai.is_configured())
|
||||
|
||||
def test_claude_default_model(self):
|
||||
with mock.patch.object(ai.config, "load_config", return_value=self._cfg(ai_provider="claude")):
|
||||
self.assertEqual(ai.model(), ai.CLAUDE_DEFAULT_MODEL)
|
||||
|
||||
|
||||
class PromptTests(unittest.TestCase):
|
||||
def test_build_prompt_includes_facts_and_findings(self):
|
||||
prompt = ai.build_prompt("Xid 79: GPU has fallen off the bus")
|
||||
self.assertIn("Reference facts", prompt)
|
||||
self.assertIn("Collected findings", prompt)
|
||||
self.assertIn("fallen off the bus", prompt)
|
||||
|
||||
def test_format_findings(self):
|
||||
class F:
|
||||
severity, category, title, detail = "warn", "GPU", "Hot", "92C"
|
||||
text = ai.format_findings([F()])
|
||||
self.assertIn("[WARN] GPU: Hot — 92C", text)
|
||||
|
||||
def test_appid_glossary_resolves_known_ids(self):
|
||||
from rigdoctor.core import steam
|
||||
with mock.patch.object(steam, "appid_names", return_value={"2694490": "Path of Exile 2"}):
|
||||
glossary = ai.appid_glossary("Steam log: removed AppID 2694490 ... pid 130544")
|
||||
self.assertIn("2694490 = Path of Exile 2", glossary)
|
||||
|
||||
def test_appid_glossary_ignores_unknown_ids(self):
|
||||
from rigdoctor.core import steam
|
||||
with mock.patch.object(steam, "appid_names", return_value={"570": "Dota 2"}):
|
||||
self.assertEqual(ai.appid_glossary("pid 130544 used 8192 MiB"), "") # not in library
|
||||
|
||||
def test_build_prompt_includes_glossary(self):
|
||||
from rigdoctor.core import steam
|
||||
with mock.patch.object(steam, "appid_names", return_value={"2694490": "Path of Exile 2"}):
|
||||
prompt = ai.build_prompt("AppID 2694490 launched")
|
||||
self.assertIn("Path of Exile 2", prompt)
|
||||
|
||||
|
||||
class ExplainTests(unittest.TestCase):
|
||||
def _cfg(self, **over):
|
||||
base = {"ai_provider": "", "ai_model": "", "ai_endpoint": "http://localhost:11434"}
|
||||
base.update(over)
|
||||
return base
|
||||
|
||||
def test_no_provider(self):
|
||||
with mock.patch.object(ai.config, "load_config", return_value=self._cfg()):
|
||||
ok, msg = ai.explain("x")
|
||||
self.assertFalse(ok)
|
||||
self.assertIn("No AI provider", msg)
|
||||
|
||||
def test_ollama_parses_response(self):
|
||||
with mock.patch.object(ai.config, "load_config",
|
||||
return_value=self._cfg(ai_provider="ollama", ai_model="llama3.1")), \
|
||||
mock.patch.object(ai, "_post", return_value={"response": "It's the PSU."}) as post:
|
||||
ok, msg = ai.explain("Xid 79")
|
||||
self.assertTrue(ok)
|
||||
self.assertEqual(msg, "It's the PSU.")
|
||||
self.assertIn("/api/generate", post.call_args[0][0])
|
||||
|
||||
def test_claude_parses_content_blocks(self):
|
||||
with mock.patch.object(ai.config, "load_config", return_value=self._cfg(ai_provider="claude")), \
|
||||
mock.patch.object(ai.config, "load_ai_key", return_value="sk-ant-x"), \
|
||||
mock.patch.object(ai, "_post", return_value={"content": [
|
||||
{"type": "text", "text": "Likely a failing disk."}]}) as post:
|
||||
ok, msg = ai.explain("SMART 197")
|
||||
self.assertTrue(ok)
|
||||
self.assertEqual(msg, "Likely a failing disk.")
|
||||
headers = post.call_args[0][2]
|
||||
self.assertEqual(headers["anthropic-version"], ai.ANTHROPIC_VERSION)
|
||||
self.assertEqual(headers["x-api-key"], "sk-ant-x")
|
||||
|
||||
|
||||
class _FakeResp:
|
||||
"""A context-managed iterable of byte lines, like urlopen() returns."""
|
||||
def __init__(self, lines):
|
||||
self._lines = [l.encode("utf-8") for l in lines]
|
||||
def __enter__(self):
|
||||
return iter(self._lines)
|
||||
def __exit__(self, *a):
|
||||
return False
|
||||
|
||||
|
||||
class StreamTests(unittest.TestCase):
|
||||
def _cfg(self, **over):
|
||||
base = {"ai_provider": "", "ai_model": "", "ai_endpoint": "http://localhost:11434"}
|
||||
base.update(over)
|
||||
return base
|
||||
|
||||
def test_ollama_stream_accumulates_and_callbacks(self):
|
||||
lines = ['{"response": "It is ", "done": false}',
|
||||
'{"response": "the PSU.", "done": false}',
|
||||
'{"response": "", "done": true}']
|
||||
chunks = []
|
||||
with mock.patch.object(ai.config, "load_config",
|
||||
return_value=self._cfg(ai_provider="ollama", ai_model="qwen2.5:7b")), \
|
||||
mock.patch.object(ai, "_stream_request", return_value=_FakeResp(lines)):
|
||||
ok, full = ai.explain_stream("Xid 79", on_chunk=chunks.append)
|
||||
self.assertTrue(ok)
|
||||
self.assertEqual(full, "It is the PSU.")
|
||||
self.assertEqual(chunks, ["It is ", "the PSU."])
|
||||
|
||||
def test_claude_stream_parses_sse(self):
|
||||
lines = [
|
||||
'event: content_block_delta',
|
||||
'data: {"type":"content_block_delta","delta":{"type":"text_delta","text":"Failing "}}',
|
||||
'data: {"type":"content_block_delta","delta":{"type":"text_delta","text":"disk."}}',
|
||||
'data: {"type":"message_stop"}',
|
||||
]
|
||||
chunks = []
|
||||
with mock.patch.object(ai.config, "load_config", return_value=self._cfg(ai_provider="claude")), \
|
||||
mock.patch.object(ai.config, "load_ai_key", return_value="sk-ant-x"), \
|
||||
mock.patch.object(ai, "_stream_request", return_value=_FakeResp(lines)):
|
||||
ok, full = ai.explain_stream("SMART 197", on_chunk=chunks.append)
|
||||
self.assertTrue(ok)
|
||||
self.assertEqual(full, "Failing disk.")
|
||||
self.assertEqual(chunks, ["Failing ", "disk."])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -0,0 +1,68 @@
|
||||
"""Tests for the M8 alert monitor (edge-triggered; notify mocked)."""
|
||||
|
||||
import unittest
|
||||
from unittest import mock
|
||||
|
||||
from rigdoctor.core import alerts
|
||||
from rigdoctor.core.sample import Reading, Sample
|
||||
|
||||
|
||||
def _gpu(temp):
|
||||
return Sample(readings=[Reading("gpu", "temp", temp, "°C")])
|
||||
|
||||
|
||||
class AlertTests(unittest.TestCase):
|
||||
@mock.patch.object(alerts, "notify")
|
||||
def test_edge_triggered_no_repeat(self, m):
|
||||
mon = alerts.AlertMonitor(gpu_temp=90.0, cooldown=0.0)
|
||||
mon.check(_gpu(95)) # fires
|
||||
mon.check(_gpu(96)) # still hot — no repeat while active
|
||||
self.assertEqual(m.call_count, 1)
|
||||
mon.check(_gpu(50)) # clears
|
||||
mon.check(_gpu(95)) # hot again — fires
|
||||
self.assertEqual(m.call_count, 2)
|
||||
|
||||
@mock.patch.object(alerts, "notify")
|
||||
def test_no_alert_below_threshold(self, m):
|
||||
alerts.AlertMonitor(gpu_temp=90.0).check(_gpu(70))
|
||||
m.assert_not_called()
|
||||
|
||||
@mock.patch.object(alerts, "notify")
|
||||
def test_gpu_lost(self, m):
|
||||
mon = alerts.AlertMonitor()
|
||||
mon.check(Sample(readings=[Reading("gpu", "status", None, "", "query-timeout")]))
|
||||
m.assert_called_once()
|
||||
|
||||
|
||||
class KernelEventAlertTests(unittest.TestCase):
|
||||
@mock.patch.object(alerts, "notify")
|
||||
def test_kernel_event_fires_once_within_cooldown(self, m):
|
||||
mon = alerts.AlertMonitor(cooldown=300.0, event_interval=0.0)
|
||||
mon._last_kernel_scan = 0.0 # force a scan
|
||||
with mock.patch("rigdoctor.core.syslogs.kernel_log",
|
||||
return_value="NVRM: Xid (PCI:0000:01:00): 79, GPU has fallen off the bus"):
|
||||
mon._scan_kernel_events()
|
||||
mon._last_kernel_scan = 0.0 # force another scan — cooldown must suppress it
|
||||
mon._scan_kernel_events()
|
||||
self.assertEqual(m.call_count, 1)
|
||||
self.assertIn("Xid", m.call_args[0][0])
|
||||
|
||||
@mock.patch.object(alerts, "notify")
|
||||
def test_no_alert_when_kernel_log_empty(self, m):
|
||||
mon = alerts.AlertMonitor(event_interval=0.0)
|
||||
mon._last_kernel_scan = 0.0
|
||||
with mock.patch("rigdoctor.core.syslogs.kernel_log", return_value=""):
|
||||
mon._scan_kernel_events()
|
||||
m.assert_not_called()
|
||||
|
||||
@mock.patch.object(alerts, "notify")
|
||||
def test_scan_gated_by_interval(self, m):
|
||||
mon = alerts.AlertMonitor(event_interval=9999.0) # just constructed → not due yet
|
||||
with mock.patch("rigdoctor.core.syslogs.kernel_log", return_value="NVRM: Xid 79") as kl:
|
||||
mon._scan_kernel_events()
|
||||
kl.assert_not_called()
|
||||
m.assert_not_called()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -0,0 +1,42 @@
|
||||
"""Tests for config save/load (flat TOML writer)."""
|
||||
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
from rigdoctor import config
|
||||
|
||||
|
||||
class ConfigTests(unittest.TestCase):
|
||||
def test_save_load_round_trip(self):
|
||||
with tempfile.TemporaryDirectory() as d:
|
||||
cf = Path(d) / "config.toml"
|
||||
with mock.patch.object(config, "CONFIG_FILE", cf), mock.patch.object(config, "CONFIG_DIR", Path(d)):
|
||||
config.save_config({"alerts_enabled": False, "gpu_temp_alert": 88.0, "update_check_minutes": 5})
|
||||
loaded = config.load_config()
|
||||
self.assertIs(loaded["alerts_enabled"], False)
|
||||
self.assertEqual(loaded["gpu_temp_alert"], 88.0)
|
||||
self.assertEqual(loaded["update_check_minutes"], 5)
|
||||
|
||||
def test_list_value_round_trip(self):
|
||||
with tempfile.TemporaryDirectory() as d:
|
||||
cf = Path(d) / "config.toml"
|
||||
with mock.patch.object(config, "CONFIG_FILE", cf), mock.patch.object(config, "CONFIG_DIR", Path(d)):
|
||||
paths = ["/home/u/.local/share/Steam", "/mnt/games/SteamLibrary"]
|
||||
config.update_config(steam_libraries=paths)
|
||||
self.assertEqual(config.load_config()["steam_libraries"], paths)
|
||||
config.update_config(steam_libraries=[])
|
||||
self.assertEqual(config.load_config()["steam_libraries"], [])
|
||||
|
||||
def test_update_config_merges_and_keeps_defaults(self):
|
||||
with tempfile.TemporaryDirectory() as d:
|
||||
cf = Path(d) / "config.toml"
|
||||
with mock.patch.object(config, "CONFIG_FILE", cf), mock.patch.object(config, "CONFIG_DIR", Path(d)):
|
||||
config.update_config(cpu_temp_alert=70.0)
|
||||
self.assertEqual(config.load_config()["cpu_temp_alert"], 70.0)
|
||||
self.assertEqual(config.load_config()["gpu_temp_alert"], 90.0) # default preserved
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -0,0 +1,85 @@
|
||||
"""Tests for user-added games (M6): add/remove/scan of titles no launcher reports (e.g. SPT)."""
|
||||
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
from rigdoctor.core import customgames
|
||||
|
||||
|
||||
class CustomGamesTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self._tmp = tempfile.TemporaryDirectory()
|
||||
self._file = Path(self._tmp.name) / "custom-games.json"
|
||||
self._patch = mock.patch.object(customgames.config, "CUSTOM_GAMES_FILE", self._file)
|
||||
self._patch.start()
|
||||
|
||||
def tearDown(self):
|
||||
self._patch.stop()
|
||||
self._tmp.cleanup()
|
||||
|
||||
def test_missing_file_scans_empty(self):
|
||||
self.assertEqual(customgames.scan(), [])
|
||||
self.assertEqual(customgames.names(), [])
|
||||
|
||||
def test_add_then_scan_returns_game(self):
|
||||
self.assertTrue(customgames.add("SPT"))
|
||||
games = customgames.scan()
|
||||
self.assertEqual(len(games), 1)
|
||||
self.assertEqual(games[0].name, "SPT")
|
||||
self.assertEqual(games[0].launcher, "custom")
|
||||
self.assertTrue(self._file.exists()) # persisted
|
||||
|
||||
def test_add_is_idempotent_case_insensitive(self):
|
||||
self.assertTrue(customgames.add("SPT"))
|
||||
self.assertFalse(customgames.add("spt")) # already present
|
||||
self.assertFalse(customgames.add(" ")) # blank
|
||||
self.assertEqual(customgames.names(), ["SPT"])
|
||||
|
||||
def test_remove(self):
|
||||
customgames.add("SPT")
|
||||
customgames.add("Minecraft")
|
||||
self.assertTrue(customgames.remove("spt")) # case-insensitive
|
||||
self.assertEqual(customgames.names(), ["Minecraft"])
|
||||
self.assertFalse(customgames.remove("nope"))
|
||||
|
||||
def test_scan_sorted_by_name(self):
|
||||
for n in ("Zomboid", "Apex", "SPT"):
|
||||
customgames.add(n)
|
||||
self.assertEqual([g.name for g in customgames.scan()], ["Apex", "SPT", "Zomboid"])
|
||||
|
||||
def test_command_and_logdir_stored_and_resolved(self):
|
||||
logs = Path(self._tmp.name) / "logs"
|
||||
logs.mkdir()
|
||||
sh = Path(self._tmp.name) / "tarkov.sh"
|
||||
sh.write_text("#!/bin/sh\n")
|
||||
self.assertTrue(customgames.add("SPT", command=str(sh), logdir=str(logs)))
|
||||
self.assertEqual(customgames.command("SPT"), [str(sh)])
|
||||
self.assertEqual(customgames.log_dir("SPT"), str(logs))
|
||||
|
||||
def test_logdir_inferred_from_sibling_logs(self):
|
||||
# A command with a sibling logs/ dir (SPT's layout) → logdir auto-detected.
|
||||
sh = Path(self._tmp.name) / "tarkov.sh"
|
||||
sh.write_text("#!/bin/sh\n")
|
||||
(Path(self._tmp.name) / "logs").mkdir()
|
||||
self.assertTrue(customgames.add("SPT", command=str(sh)))
|
||||
self.assertEqual(customgames.log_dir("SPT"), str(Path(self._tmp.name) / "logs"))
|
||||
|
||||
def test_no_command_resolves_to_none(self):
|
||||
customgames.add("SPT")
|
||||
self.assertIsNone(customgames.command("SPT"))
|
||||
self.assertIsNone(customgames.command("missing"))
|
||||
self.assertIsNone(customgames.log_dir("SPT"))
|
||||
|
||||
def test_corrupt_file_degrades_to_empty(self):
|
||||
self._file.parent.mkdir(parents=True, exist_ok=True)
|
||||
self._file.write_text("{not json")
|
||||
self.assertEqual(customgames.scan(), [])
|
||||
# and a subsequent add still works (overwrites the garbage)
|
||||
self.assertTrue(customgames.add("SPT"))
|
||||
self.assertEqual(customgames.names(), ["SPT"])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -0,0 +1,111 @@
|
||||
"""Tests for the guided diagnostic orchestration (M3+M4 glue)."""
|
||||
|
||||
import tempfile
|
||||
import time
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
from rigdoctor.core import diagnostic
|
||||
from rigdoctor.core.crashlog import CrashLogWriter, summarize
|
||||
from rigdoctor.core.health import Finding
|
||||
from rigdoctor.core.sample import Reading, Sample
|
||||
|
||||
|
||||
def _write_log(path: str, game: str) -> None:
|
||||
w = CrashLogWriter(path)
|
||||
w.write_event("session-start", "interval=1s")
|
||||
w.write_event("game", game)
|
||||
for temp in (60.0, 72.0, 81.0):
|
||||
w.write_sample(Sample(ts=time.time(), readings=[Reading("gpu", "temp", temp, "°C", "")]))
|
||||
w.write_event("gpu-lost", "nvidia-smi query timed out")
|
||||
w.close()
|
||||
|
||||
|
||||
class GameRecoveryTests(unittest.TestCase):
|
||||
def test_game_recovered_from_log_event(self):
|
||||
with tempfile.TemporaryDirectory() as d:
|
||||
log = str(Path(d) / "capture.jsonl")
|
||||
_write_log(log, "Path of Exile 2")
|
||||
summary = summarize(log)
|
||||
self.assertEqual(diagnostic._game_from_summary(summary), "Path of Exile 2")
|
||||
|
||||
def test_no_game_event_returns_none(self):
|
||||
with tempfile.TemporaryDirectory() as d:
|
||||
log = str(Path(d) / "capture.jsonl")
|
||||
w = CrashLogWriter(log)
|
||||
w.write_event("session-start")
|
||||
w.close()
|
||||
self.assertIsNone(diagnostic._game_from_summary(summarize(log)))
|
||||
|
||||
|
||||
class FinishTests(unittest.TestCase):
|
||||
def test_finish_combines_summary_and_findings(self):
|
||||
with tempfile.TemporaryDirectory() as d:
|
||||
log = Path(d) / "capture.jsonl"
|
||||
_write_log(str(log), "Satisfactory")
|
||||
fake = [Finding("warning", "GPU", "NVIDIA Xid 79 ×1", "fell off the bus")]
|
||||
with mock.patch("rigdoctor.core.health.run_health_checks", return_value=fake), \
|
||||
mock.patch.object(diagnostic.reccontrol, "stop_background", return_value=False), \
|
||||
mock.patch.object(diagnostic.reccontrol, "running_pid", return_value=None):
|
||||
result = diagnostic.finish(log_path=log)
|
||||
self.assertEqual(result.game, "Satisfactory")
|
||||
self.assertEqual(result.summary.samples, 3)
|
||||
self.assertEqual(result.findings, fake)
|
||||
# peak GPU temp captured in the window, GPU-lost event recorded
|
||||
self.assertEqual(result.summary.maxima["gpu.temp"][0], 81.0)
|
||||
self.assertTrue(any(kind == "gpu-lost" for _ts, kind, _d in result.summary.events))
|
||||
|
||||
|
||||
class CrashDetectionTests(unittest.TestCase):
|
||||
def _diag_log(self, d) -> Path:
|
||||
return Path(d) / "diagnostic.jsonl"
|
||||
|
||||
def test_unterminated_session_is_a_pending_crash(self):
|
||||
with tempfile.TemporaryDirectory() as d:
|
||||
log = self._diag_log(d)
|
||||
_write_log(str(log), "Tarkov") # has session-start + game, no session-stop
|
||||
with mock.patch.object(diagnostic.config, "DIAG_LOG", log), \
|
||||
mock.patch.object(diagnostic.config, "DIAG_CRASH", log.with_suffix(".crash")), \
|
||||
mock.patch.object(diagnostic.reccontrol, "running_pid", return_value=None):
|
||||
info = diagnostic.pending_crash()
|
||||
self.assertIsNotNone(info)
|
||||
self.assertEqual(info.game, "Tarkov")
|
||||
self.assertTrue(info.gpu_lost) # _write_log writes a gpu-lost event
|
||||
|
||||
def test_clean_stop_is_not_a_crash(self):
|
||||
with tempfile.TemporaryDirectory() as d:
|
||||
log = self._diag_log(d)
|
||||
w = CrashLogWriter(str(log))
|
||||
w.write_event("session-start"); w.write_event("game", "X")
|
||||
w.write_sample(Sample(time.time(), [Reading("gpu", "temp", 60.0, "°C", "")]))
|
||||
w.write_event("session-stop", "samples=1")
|
||||
w.close()
|
||||
with mock.patch.object(diagnostic.config, "DIAG_LOG", log), \
|
||||
mock.patch.object(diagnostic.config, "DIAG_CRASH", log.with_suffix(".crash")), \
|
||||
mock.patch.object(diagnostic.reccontrol, "running_pid", return_value=None):
|
||||
self.assertIsNone(diagnostic.pending_crash())
|
||||
|
||||
def test_acknowledge_clears_pending_crash(self):
|
||||
with tempfile.TemporaryDirectory() as d:
|
||||
log = self._diag_log(d)
|
||||
_write_log(str(log), "Tarkov")
|
||||
with mock.patch.object(diagnostic.config, "DIAG_LOG", log), \
|
||||
mock.patch.object(diagnostic.config, "DIAG_CRASH", log.with_suffix(".crash")), \
|
||||
mock.patch.object(diagnostic.reccontrol, "running_pid", return_value=None):
|
||||
self.assertIsNotNone(diagnostic.pending_crash())
|
||||
diagnostic.acknowledge_crash()
|
||||
self.assertIsNone(diagnostic.pending_crash())
|
||||
|
||||
def test_running_capture_is_not_a_crash(self):
|
||||
with tempfile.TemporaryDirectory() as d:
|
||||
log = self._diag_log(d)
|
||||
_write_log(str(log), "Tarkov")
|
||||
with mock.patch.object(diagnostic.config, "DIAG_LOG", log), \
|
||||
mock.patch.object(diagnostic.config, "DIAG_CRASH", log.with_suffix(".crash")), \
|
||||
mock.patch.object(diagnostic.reccontrol, "running_pid", return_value=4321):
|
||||
self.assertIsNone(diagnostic.pending_crash()) # it's in-progress, not crashed
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -0,0 +1,104 @@
|
||||
"""Tests for M15 per-diagnostic storage + Report bundles + app logging."""
|
||||
|
||||
import json
|
||||
import tempfile
|
||||
import unittest
|
||||
import zipfile
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
from rigdoctor.core import applog, diagstore
|
||||
|
||||
|
||||
@dataclass
|
||||
class FakeSummary:
|
||||
start: float = 1.0
|
||||
end: float = 2.0
|
||||
samples: int = 3
|
||||
events: list = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass
|
||||
class FakeFinding:
|
||||
severity: str = "ok"
|
||||
category: str = "GPU"
|
||||
title: str = "Looks fine"
|
||||
detail: str = "no issues"
|
||||
|
||||
|
||||
@dataclass
|
||||
class FakeResult:
|
||||
game: str = "Path of Exile 2"
|
||||
summary: FakeSummary = field(default_factory=FakeSummary)
|
||||
findings: list = field(default_factory=lambda: [FakeFinding()])
|
||||
dir: str | None = None
|
||||
|
||||
|
||||
class StoreTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.tmp = Path(tempfile.mkdtemp())
|
||||
|
||||
def test_disabled_returns_none(self):
|
||||
with mock.patch.object(diagstore, "enabled", return_value=False):
|
||||
self.assertIsNone(diagstore.store(FakeResult()))
|
||||
|
||||
def test_store_writes_artifacts(self):
|
||||
with mock.patch.object(diagstore, "enabled", return_value=True), \
|
||||
mock.patch("rigdoctor.render.render_summary", return_value="SUMMARY-TEXT"), \
|
||||
mock.patch("rigdoctor.core.gamelogs.collect", return_value="LOG-TEXT"), \
|
||||
mock.patch("rigdoctor.core.syslogs.collect", return_value="SYS-LOG"), \
|
||||
mock.patch("rigdoctor.core.inventory.collect", return_value=[]), \
|
||||
mock.patch.object(diagstore.config, "DIAGNOSTICS_DIR", self.tmp / "diagnostics"):
|
||||
directory = diagstore.store(FakeResult())
|
||||
self.assertTrue((directory / "result.json").exists())
|
||||
self.assertTrue((directory / "report.txt").exists())
|
||||
self.assertEqual((directory / "gamelogs.txt").read_text(), "LOG-TEXT")
|
||||
self.assertEqual((directory / "syslogs.txt").read_text(), "SYS-LOG")
|
||||
self.assertTrue((directory / "inventory.txt").exists()) # inventory included for debugging
|
||||
data = json.loads((directory / "result.json").read_text())
|
||||
self.assertEqual(data["game"], "Path of Exile 2")
|
||||
self.assertEqual(len(data["findings"]), 1)
|
||||
|
||||
def test_record_ai_then_report_includes_ai_and_applog(self):
|
||||
diag = self.tmp / "20260522-poe2"
|
||||
diag.mkdir()
|
||||
diagstore.record_ai(diag, provider="claude", model="claude-opus-4-7",
|
||||
system="SYS", prompt="EXACT DATA SENT", response="THE REPLY")
|
||||
ai_files = list((diag / "ai").glob("explain-*.json"))
|
||||
self.assertTrue(ai_files)
|
||||
record = json.loads(ai_files[0].read_text())
|
||||
self.assertEqual(record["model"], "claude-opus-4-7")
|
||||
self.assertEqual(record["data_sent_to_model"], "EXACT DATA SENT")
|
||||
self.assertEqual(record["model_reply"], "THE REPLY")
|
||||
|
||||
app_log = self.tmp / "app.log"
|
||||
app_log.write_text("app log line")
|
||||
with mock.patch.object(diagstore.config, "REPORTS_DIR", self.tmp / "reports"), \
|
||||
mock.patch.object(diagstore.config, "APP_LOG", app_log):
|
||||
out = diagstore.make_report(diag)
|
||||
self.assertTrue(out.exists())
|
||||
with zipfile.ZipFile(out) as zf:
|
||||
names = zf.namelist()
|
||||
self.assertTrue(any(n.endswith("app.log") for n in names))
|
||||
self.assertTrue(any("/ai/explain-" in n for n in names))
|
||||
|
||||
|
||||
class AppLogTests(unittest.TestCase):
|
||||
def test_disabled_is_noop(self):
|
||||
with mock.patch.object(applog.config, "load_config", return_value={"logging_enabled": False}):
|
||||
self.assertFalse(applog.setup(force=True))
|
||||
|
||||
def test_enabled_writes_file(self):
|
||||
tmp = Path(tempfile.mkdtemp())
|
||||
with mock.patch.object(applog.config, "load_config", return_value={"logging_enabled": True}), \
|
||||
mock.patch.object(applog.config, "STATE_DIR", tmp), \
|
||||
mock.patch.object(applog.config, "APP_LOG", tmp / "app.log"):
|
||||
self.assertTrue(applog.setup(force=True))
|
||||
applog.get_logger("test").info("hello world")
|
||||
applog.setup(force=True) # cleanup path: re-run detaches/reattaches cleanly
|
||||
self.assertTrue((tmp / "app.log").exists())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -0,0 +1,67 @@
|
||||
"""Tests for display detection (Mutter D-Bus JSON + xrandr parsers)."""
|
||||
|
||||
import unittest
|
||||
|
||||
from rigdoctor.core import displays
|
||||
|
||||
# Minimal Mutter GetCurrentState (busctl --json) shape: current mode is 60 Hz, panel max 165 Hz.
|
||||
_MUTTER_60 = (
|
||||
'{"type":"x","data":[1,[[["DP-1","SAM","LC34G55T","S"],['
|
||||
'["3440x1440@60",3440,1440,60.0,1.0,[1.0],{"is-current":{"type":"b","data":true}}],'
|
||||
'["3440x1440@165",3440,1440,165.0,1.0,[1.0],{"is-preferred":{"type":"b","data":true}}]'
|
||||
'],{}]],[],{}]}'
|
||||
)
|
||||
_MUTTER_MAX = (
|
||||
'{"type":"x","data":[1,[[["DP-1","SAM","LC34G55T","S"],['
|
||||
'["3440x1440@165",3440,1440,165.0,1.0,[1.0],{"is-current":{"type":"b","data":true}}],'
|
||||
'["3440x1440@60",3440,1440,60.0,1.0,[1.0],{}]'
|
||||
'],{}]],[],{}]}'
|
||||
)
|
||||
|
||||
_XRANDR_60 = """Screen 0: minimum 8 x 8, current 3440 x 1440, maximum 16384 x 16384
|
||||
DP-1 connected primary 3440x1440+0+0 (normal left inverted right x axis y axis) 800mm x 335mm
|
||||
3440x1440 60.00*+ 165.00 100.00
|
||||
2560x1440 165.00 60.00
|
||||
HDMI-1 disconnected (normal left inverted right x axis y axis)
|
||||
"""
|
||||
|
||||
|
||||
class MutterParseTests(unittest.TestCase):
|
||||
def test_parses_and_flags_higher_refresh(self):
|
||||
mons = displays._parse_mutter(_MUTTER_60)
|
||||
self.assertEqual(len(mons), 1)
|
||||
m = mons[0]
|
||||
self.assertEqual(m.connector, "DP-1")
|
||||
self.assertEqual(m.name, "Samsung LC34G55T") # PNP code SAM mapped
|
||||
self.assertEqual((m.width, m.height), (3440, 1440))
|
||||
self.assertEqual(round(m.refresh), 60)
|
||||
self.assertEqual(round(m.max_refresh), 165)
|
||||
self.assertTrue(m.can_go_faster)
|
||||
|
||||
def test_at_max_is_not_flagged(self):
|
||||
m = displays._parse_mutter(_MUTTER_MAX)[0]
|
||||
self.assertEqual(round(m.refresh), 165)
|
||||
self.assertFalse(m.can_go_faster)
|
||||
|
||||
def test_garbage_returns_empty(self):
|
||||
self.assertEqual(displays._parse_mutter("not json"), [])
|
||||
self.assertEqual(displays._parse_mutter("{}"), [])
|
||||
|
||||
|
||||
class XrandrParseTests(unittest.TestCase):
|
||||
def test_current_and_max_refresh(self):
|
||||
mons = displays._parse_xrandr(_XRANDR_60)
|
||||
self.assertEqual(len(mons), 1) # disconnected output ignored
|
||||
m = mons[0]
|
||||
self.assertEqual(m.connector, "DP-1")
|
||||
self.assertEqual((m.width, m.height), (3440, 1440))
|
||||
self.assertEqual(round(m.refresh), 60)
|
||||
self.assertEqual(round(m.max_refresh), 165)
|
||||
self.assertTrue(m.can_go_faster)
|
||||
|
||||
def test_empty_returns_empty(self):
|
||||
self.assertEqual(displays._parse_xrandr(""), [])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -0,0 +1,99 @@
|
||||
"""Tests for drive health parsing & findings (synthetic smartctl JSON)."""
|
||||
|
||||
import unittest
|
||||
from dataclasses import asdict
|
||||
|
||||
from rigdoctor.core import drives
|
||||
from rigdoctor.core.health import CRITICAL, INFO, OK, WARNING
|
||||
|
||||
_NVME_OK = {
|
||||
"model_name": "Samsung SSD 980 PRO 1TB",
|
||||
"device": {"protocol": "NVMe"},
|
||||
"smart_status": {"passed": True},
|
||||
"temperature": {"current": 41},
|
||||
"power_on_time": {"hours": 1234},
|
||||
"nvme_smart_health_information_log": {
|
||||
"percentage_used": 3, "available_spare": 100, "available_spare_threshold": 10,
|
||||
"media_errors": 0, "data_units_written": 200_000_000, # ~102 TB
|
||||
},
|
||||
}
|
||||
|
||||
_NVME_WORN = {
|
||||
"model_name": "Worn NVMe",
|
||||
"device": {"protocol": "NVMe"},
|
||||
"smart_status": {"passed": True},
|
||||
"nvme_smart_health_information_log": {"percentage_used": 96, "available_spare": 100,
|
||||
"available_spare_threshold": 10},
|
||||
}
|
||||
|
||||
_SATA_FAILING = {
|
||||
"model_name": "Samsung SSD 870 QVO 1TB",
|
||||
"device": {"protocol": "ATA"},
|
||||
"smart_status": {"passed": False},
|
||||
"temperature": {"current": 35},
|
||||
"power_on_time": {"hours": 5000},
|
||||
"ata_smart_attributes": {"table": [
|
||||
{"id": 5, "name": "Reallocated_Sector_Ct", "value": 80, "raw": {"value": 12}},
|
||||
{"id": 177, "name": "Wear_Leveling_Count", "value": 88, "raw": {"value": 300}},
|
||||
{"id": 241, "name": "Total_LBAs_Written", "value": 99, "raw": {"value": 2_000_000_000}},
|
||||
]},
|
||||
}
|
||||
|
||||
|
||||
class ParseTests(unittest.TestCase):
|
||||
def test_nvme_parse(self):
|
||||
d = drives.parse("/dev/nvme0", _NVME_OK)
|
||||
self.assertEqual(d.kind, "nvme")
|
||||
self.assertTrue(d.passed)
|
||||
self.assertEqual(d.percent_used, 3)
|
||||
self.assertEqual(d.health_pct, 97) # 100 - percentage_used
|
||||
self.assertEqual(d.power_on_hours, 1234)
|
||||
self.assertEqual(d.temp_c, 41)
|
||||
self.assertAlmostEqual(d.data_written_tb, 102.4, places=1)
|
||||
|
||||
def test_sata_parse(self):
|
||||
d = drives.parse("/dev/sda", _SATA_FAILING)
|
||||
self.assertEqual(d.kind, "sata")
|
||||
self.assertFalse(d.passed)
|
||||
self.assertEqual(d.reallocated, 12) # raw value
|
||||
self.assertEqual(d.health_pct, 88) # normalized wear-leveling value
|
||||
self.assertAlmostEqual(d.data_written_tb, 1.02, places=1)
|
||||
|
||||
def test_needs_root_when_no_data(self):
|
||||
d = drives.parse("/dev/sda", None)
|
||||
self.assertTrue(d.needs_root)
|
||||
|
||||
def test_roundtrip_through_dicts(self):
|
||||
d = drives.parse("/dev/nvme0", _NVME_OK)
|
||||
back = drives.from_dicts([asdict(d)])
|
||||
self.assertEqual(len(back), 1)
|
||||
self.assertEqual(back[0].model, d.model)
|
||||
self.assertEqual(back[0].health_pct, d.health_pct)
|
||||
|
||||
|
||||
class FindingTests(unittest.TestCase):
|
||||
def test_healthy_nvme_is_ok_with_stats(self):
|
||||
f = drives.to_findings([drives.parse("/dev/nvme0", _NVME_OK)])[0]
|
||||
self.assertEqual(f.severity, OK)
|
||||
self.assertIn("97% life left", f.title)
|
||||
self.assertIn("1,234 h", f.title)
|
||||
|
||||
def test_failing_sata_is_critical(self):
|
||||
f = drives.to_findings([drives.parse("/dev/sda", _SATA_FAILING)])[0]
|
||||
self.assertEqual(f.severity, CRITICAL)
|
||||
self.assertIn("FAILED", f.detail)
|
||||
self.assertIn("reallocated sectors", f.detail)
|
||||
|
||||
def test_worn_nvme_is_warning(self):
|
||||
f = drives.to_findings([drives.parse("/dev/nvme1", _NVME_WORN)])[0]
|
||||
self.assertEqual(f.severity, WARNING)
|
||||
self.assertIn("worn", f.title)
|
||||
|
||||
def test_needs_root_is_info(self):
|
||||
f = drives.to_findings([drives.parse("/dev/sda", None)])[0]
|
||||
self.assertEqual(f.severity, INFO)
|
||||
self.assertIn("needs root", f.title)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -0,0 +1,63 @@
|
||||
"""Tests for M6 runtime tunables (parse, command builders, value validation)."""
|
||||
|
||||
import unittest
|
||||
from unittest import mock
|
||||
|
||||
from rigdoctor.core import fixes
|
||||
from rigdoctor.core.fixes import Tunable
|
||||
|
||||
|
||||
class ParseTests(unittest.TestCase):
|
||||
def test_bracketed(self):
|
||||
self.assertEqual(fixes._bracketed("always [madvise] never"), (["always", "madvise", "never"], "madvise"))
|
||||
|
||||
def test_bracketed_none_active(self):
|
||||
self.assertEqual(fixes._bracketed("a b c"), (["a", "b", "c"], None))
|
||||
|
||||
|
||||
class CommandBuilderTests(unittest.TestCase):
|
||||
def test_governor_cmd_writes_value_to_sysfs(self):
|
||||
cmd = fixes._cpu_governor_cmd("performance")
|
||||
self.assertEqual(cmd[:2], ["/bin/sh", "-c"])
|
||||
self.assertIn("performance", cmd[2])
|
||||
self.assertIn("scaling_governor", cmd[2])
|
||||
|
||||
def test_persistence_cmd(self):
|
||||
self.assertEqual(fixes._nvidia_persistence_cmd("Enabled"), ["nvidia-smi", "-pm", "1"])
|
||||
self.assertEqual(fixes._nvidia_persistence_cmd("Disabled"), ["nvidia-smi", "-pm", "0"])
|
||||
|
||||
def test_swappiness_cmd_targets_procfs(self):
|
||||
self.assertIn("/proc/sys/vm/swappiness", fixes._swappiness_cmd("10")[2])
|
||||
|
||||
def test_quoting_is_safe(self):
|
||||
# A value that would be dangerous unquoted stays a single quoted token.
|
||||
cmd = fixes._pcie_aspm_cmd("performance; rm -rf /")
|
||||
self.assertIn("'performance; rm -rf /'", cmd[2])
|
||||
|
||||
|
||||
class ApplyValidationTests(unittest.TestCase):
|
||||
def test_unknown_fix_returns_none(self):
|
||||
self.assertIsNone(fixes.apply_command("does_not_exist", "x"))
|
||||
|
||||
def test_value_validated_against_live_options(self):
|
||||
fake = Tunable("x", "X", ["a", "b"], "a")
|
||||
with mock.patch.dict(fixes._TUNABLES, {"x": (lambda: fake, lambda v: ["echo", v])}, clear=False):
|
||||
self.assertEqual(fixes.apply_command("x", "a"), ["echo", "a"])
|
||||
self.assertIsNone(fixes.apply_command("x", "not-an-option"))
|
||||
|
||||
def test_apply_unknown_is_error(self):
|
||||
rc, _ = fixes.apply("nope", "x")
|
||||
self.assertEqual(rc, 1)
|
||||
|
||||
|
||||
class GameenvWiringTests(unittest.TestCase):
|
||||
def test_findings_reference_known_fix_ids(self):
|
||||
from rigdoctor.core import gameenv
|
||||
|
||||
fix_ids = {f.fix for f in gameenv.run_gameenv_checks() if f.fix}
|
||||
# Whatever fixes the live system surfaces, each must be a real tunable id.
|
||||
self.assertTrue(fix_ids.issubset(set(fixes._TUNABLES)))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -0,0 +1,73 @@
|
||||
"""Tests for M6 gaming-environment checks (pure evaluators + aggregate smoke test)."""
|
||||
|
||||
import unittest
|
||||
|
||||
from rigdoctor.core import gameenv
|
||||
from rigdoctor.core.health import Finding
|
||||
|
||||
|
||||
class AspmTests(unittest.TestCase):
|
||||
def test_powersave_is_warning(self):
|
||||
f = gameenv.evaluate_aspm("[powersave] performance powersupersave\n")
|
||||
self.assertEqual(f.severity, "warning")
|
||||
self.assertEqual(f.category, "PCIe")
|
||||
|
||||
def test_performance_is_ok(self):
|
||||
self.assertEqual(gameenv.evaluate_aspm("[performance] powersave powersupersave").severity, "ok")
|
||||
|
||||
def test_default_is_info(self):
|
||||
self.assertEqual(gameenv.evaluate_aspm("[default] performance powersave").severity, "info")
|
||||
|
||||
def test_missing_is_none(self):
|
||||
self.assertIsNone(gameenv.evaluate_aspm(None))
|
||||
self.assertIsNone(gameenv.evaluate_aspm("no brackets here"))
|
||||
|
||||
|
||||
class GovernorTests(unittest.TestCase):
|
||||
def test_performance_only_is_ok(self):
|
||||
self.assertEqual(gameenv.evaluate_governor({"performance"}).severity, "ok")
|
||||
|
||||
def test_powersave_is_warning(self):
|
||||
f = gameenv.evaluate_governor({"powersave"})
|
||||
self.assertEqual(f.severity, "warning")
|
||||
self.assertEqual(f.fix, "cpu_governor") # offers the live Apply dropdown
|
||||
|
||||
def test_dynamic_is_info(self):
|
||||
self.assertEqual(gameenv.evaluate_governor({"schedutil"}).severity, "info")
|
||||
|
||||
def test_empty_is_none(self):
|
||||
self.assertIsNone(gameenv.evaluate_governor(set()))
|
||||
|
||||
|
||||
class SwappinessTests(unittest.TestCase):
|
||||
def test_high_is_info_with_suggestion(self):
|
||||
f = gameenv.evaluate_swappiness(60)
|
||||
self.assertEqual(f.severity, "info")
|
||||
self.assertEqual(f.fix, "swappiness") # offers the live Apply dropdown
|
||||
|
||||
def test_low_is_ok(self):
|
||||
self.assertEqual(gameenv.evaluate_swappiness(10).severity, "ok")
|
||||
|
||||
|
||||
class ShaderCacheTests(unittest.TestCase):
|
||||
def test_disabled_nvidia_is_warning(self):
|
||||
self.assertEqual(gameenv.evaluate_shader_cache({"__GL_SHADER_DISK_CACHE": "0"}).severity, "warning")
|
||||
|
||||
def test_disabled_mesa_is_warning(self):
|
||||
self.assertEqual(gameenv.evaluate_shader_cache({"MESA_SHADER_CACHE_DISABLE": "true"}).severity, "warning")
|
||||
|
||||
def test_default_is_ok(self):
|
||||
self.assertEqual(gameenv.evaluate_shader_cache({}).severity, "ok")
|
||||
|
||||
|
||||
class AggregateTests(unittest.TestCase):
|
||||
def test_run_returns_sorted_findings(self):
|
||||
findings = gameenv.run_gameenv_checks()
|
||||
self.assertTrue(all(isinstance(f, Finding) for f in findings))
|
||||
order = {"critical": 0, "warning": 1, "info": 2, "ok": 3}
|
||||
sevs = [order.get(f.severity, 9) for f in findings]
|
||||
self.assertEqual(sevs, sorted(sevs)) # worst-first
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -0,0 +1,107 @@
|
||||
"""Tests for M14 game/Proton/Steam log collection."""
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
import time
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
from rigdoctor.core import gamelogs
|
||||
|
||||
|
||||
class TailTests(unittest.TestCase):
|
||||
def test_tail_returns_last_bytes(self):
|
||||
path = Path(tempfile.mkdtemp()) / "x.log"
|
||||
path.write_text("A" * 100 + "TAIL")
|
||||
out = gamelogs._tail(path, 4)
|
||||
self.assertEqual(out, "TAIL")
|
||||
|
||||
def test_tail_short_file(self):
|
||||
path = Path(tempfile.mkdtemp()) / "x.log"
|
||||
path.write_text("short")
|
||||
self.assertEqual(gamelogs._tail(path, 9999), "short")
|
||||
|
||||
def test_tail_missing(self):
|
||||
self.assertEqual(gamelogs._tail(Path("/nope/x.log"), 10), "")
|
||||
|
||||
|
||||
class CollectTests(unittest.TestCase):
|
||||
def test_collect_includes_proton_and_steam(self):
|
||||
tmp = Path(tempfile.mkdtemp())
|
||||
proton = tmp / "steam-570.log"
|
||||
proton.write_text("err: vkd3d device lost")
|
||||
console = tmp / "console-linux.txt"
|
||||
console.write_text("Game removed AppID 570 ... exit")
|
||||
with mock.patch.object(gamelogs, "_proton_logs", return_value=[proton]), \
|
||||
mock.patch.object(gamelogs, "_steam_console", return_value=console):
|
||||
out = gamelogs.collect()
|
||||
self.assertIn("Proton log", out)
|
||||
self.assertIn("vkd3d", out)
|
||||
self.assertIn("Steam log", out)
|
||||
self.assertIn("exit", out)
|
||||
|
||||
def test_collect_empty_when_none(self):
|
||||
with mock.patch.object(gamelogs, "_proton_logs", return_value=[]), \
|
||||
mock.patch.object(gamelogs, "_steam_console", return_value=None):
|
||||
self.assertEqual(gamelogs.collect(), "")
|
||||
|
||||
|
||||
class CustomGameLogTests(unittest.TestCase):
|
||||
def test_collect_includes_custom_game_logs(self):
|
||||
tmp = Path(tempfile.mkdtemp())
|
||||
(tmp / "tarkov-latest.log").write_text(">>> Tarkov gone. clean exit")
|
||||
(tmp / "server-latest.log").write_text("SPT server error: mod failed to load")
|
||||
with mock.patch.object(gamelogs, "_proton_logs", return_value=[]), \
|
||||
mock.patch.object(gamelogs, "_steam_console", return_value=None), \
|
||||
mock.patch("rigdoctor.core.customgames.log_dir", return_value=str(tmp)):
|
||||
out = gamelogs.collect(game="SPT")
|
||||
self.assertIn("SPT log", out)
|
||||
self.assertIn("server-latest.log", out)
|
||||
self.assertIn("mod failed to load", out)
|
||||
|
||||
def test_custom_logs_skipped_when_stale(self):
|
||||
tmp = Path(tempfile.mkdtemp())
|
||||
old = tmp / "tarkov-latest.log"
|
||||
old.write_text("an earlier session")
|
||||
old_mtime = time.time() - 3600
|
||||
os.utime(old, (old_mtime, old_mtime))
|
||||
with mock.patch.object(gamelogs, "_proton_logs", return_value=[]), \
|
||||
mock.patch.object(gamelogs, "_steam_console", return_value=None), \
|
||||
mock.patch("rigdoctor.core.customgames.log_dir", return_value=str(tmp)):
|
||||
self.assertEqual(gamelogs.collect(since=time.time() - 60, game="SPT"), "")
|
||||
|
||||
def test_no_game_means_no_custom_logs(self):
|
||||
with mock.patch.object(gamelogs, "_proton_logs", return_value=[]), \
|
||||
mock.patch.object(gamelogs, "_steam_console", return_value=None):
|
||||
self.assertEqual(gamelogs.collect(), "") # game=None → custom lookup skipped
|
||||
|
||||
|
||||
class SinceScopingTests(unittest.TestCase):
|
||||
def test_since_filter_keeps_window_only(self):
|
||||
text = (
|
||||
"[2026-05-22 13:00:00] old session line\n"
|
||||
"[2026-05-22 13:00:01] another old line\n"
|
||||
"[2026-05-22 14:30:00] new session launch\n"
|
||||
"[2026-05-22 14:30:05] new session error\n"
|
||||
)
|
||||
since = time.mktime(time.strptime("2026-05-22 14:00:00", "%Y-%m-%d %H:%M:%S"))
|
||||
out = gamelogs._since_filter(text, since)
|
||||
self.assertIn("new session launch", out)
|
||||
self.assertIn("new session error", out)
|
||||
self.assertNotIn("old session", out)
|
||||
|
||||
def test_collect_skips_stale_proton_log(self):
|
||||
tmp = Path(tempfile.mkdtemp())
|
||||
proton = tmp / "steam-9999.log"
|
||||
proton.write_text("stale proton output from an earlier game")
|
||||
old_mtime = time.time() - 3600
|
||||
os.utime(proton, (old_mtime, old_mtime))
|
||||
since = time.time() - 60 # session started a minute ago
|
||||
with mock.patch.object(gamelogs, "_proton_logs", return_value=[proton]), \
|
||||
mock.patch.object(gamelogs, "_steam_console", return_value=None):
|
||||
self.assertEqual(gamelogs.collect(since=since), "") # stale log excluded
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -0,0 +1,76 @@
|
||||
"""GUI smoke tests: construct the real widgets so a startup crash fails the build.
|
||||
|
||||
These run headless (offscreen) and skip cleanly if PySide6 isn't installed (the core/CLI
|
||||
test suite stays Qt-free). Constructing MainWindow is the check that would have caught the
|
||||
0.18.0 bad-import regression that broke launch.
|
||||
"""
|
||||
|
||||
import os
|
||||
import time
|
||||
import unittest
|
||||
|
||||
os.environ.setdefault("QT_QPA_PLATFORM", "offscreen")
|
||||
|
||||
try:
|
||||
from PySide6.QtGui import QIcon
|
||||
from PySide6.QtWidgets import QApplication, QWidget
|
||||
HAVE_QT = True
|
||||
except ImportError:
|
||||
HAVE_QT = False
|
||||
|
||||
|
||||
@unittest.skipUnless(HAVE_QT, "PySide6 not installed")
|
||||
class GuiSmokeTests(unittest.TestCase):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.app = QApplication.instance() or QApplication([])
|
||||
|
||||
def test_main_window_constructs(self):
|
||||
from unittest import mock
|
||||
|
||||
from rigdoctor.core import updates
|
||||
from rigdoctor.gui import main_window as mw
|
||||
|
||||
# Avoid construction side effects: no pkexec elevation, no network update check.
|
||||
with mock.patch("rigdoctor.core.elevation.available", return_value=False), \
|
||||
mock.patch.object(updates, "update_state", return_value=(updates.UP_TO_DATE, None, "")):
|
||||
window = mw.MainWindow()
|
||||
try:
|
||||
self.assertEqual(len(window._nav_buttons), len(mw._PAGES))
|
||||
self.assertEqual(set(window._nav_buttons), set(mw._PAGES))
|
||||
finally:
|
||||
window._worker.stop()
|
||||
|
||||
def test_tray_readouts_update(self):
|
||||
from rigdoctor.core.sample import Reading, Sample
|
||||
from rigdoctor.gui.tray import TrayIcon
|
||||
|
||||
class StubWindow(QWidget):
|
||||
def show_dashboard(self): ...
|
||||
def show_page(self, name): ...
|
||||
def run_diagnostic(self, name, appid): ...
|
||||
def quit_app(self): ...
|
||||
|
||||
tray = TrayIcon(StubWindow(), QIcon())
|
||||
tray.update_sample(Sample(time.time(), [
|
||||
Reading("gpu", "temp", 72.0, "°C", ""),
|
||||
Reading("cpu", "temp", 65.0, "°C", "Package id 0"),
|
||||
Reading("memory", "used", 14.2, "GB"),
|
||||
Reading("memory", "total", 31.0, "GB"),
|
||||
Reading("memory", "used_pct", 46.0, "%"),
|
||||
]))
|
||||
self.assertIn("72", tray._gpu_act.text())
|
||||
self.assertIn("65", tray._cpu_act.text())
|
||||
self.assertIn("14.2 / 31.0 GB", tray._mem_act.text())
|
||||
self.assertEqual(tray._status_act.text(), "● Normal")
|
||||
|
||||
def test_setup_wizard_constructs(self):
|
||||
from rigdoctor.gui.setup_wizard import SetupWizard
|
||||
|
||||
wizard = SetupWizard()
|
||||
self.assertEqual(wizard._stack.count(), 5) # welcome/bundles/install/trigger/finish
|
||||
self.assertTrue(wizard._bundle_checks)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -1,8 +1,28 @@
|
||||
"""Tests for the M4 health report's log scanner (synthetic input)."""
|
||||
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
from rigdoctor.core.health import CRITICAL, WARNING, run_health_checks, scan_journal_text
|
||||
from rigdoctor.core import displays, health
|
||||
from rigdoctor.core.health import (
|
||||
CRITICAL,
|
||||
INFO,
|
||||
WARNING,
|
||||
check_displays,
|
||||
check_memory_speed,
|
||||
check_nvidia_module,
|
||||
check_pcie_links,
|
||||
run_health_checks,
|
||||
scan_journal_text,
|
||||
)
|
||||
|
||||
# A real no-Xid freeze: the open-module VA-space storm captured on 2026-05-29.
|
||||
_VASPACE_LOG = """\
|
||||
NVRM: nvCheckFailedNoLog: Check failed: 0 == (pMapNode->gpuMask & gpuMask) @ gpu_vaspace.c:4547
|
||||
NVRM: dmaAllocMapping_GM107: can't update VA space for mapping @vaddr=0x4be00000
|
||||
[drm:nv_drm_gem_alloc_nvkms_memory_ioctl [nvidia_drm]] *ERROR* Failed to allocate NVKMS memory for GEM object
|
||||
"""
|
||||
|
||||
|
||||
class HealthScanTests(unittest.TestCase):
|
||||
@@ -32,6 +52,28 @@ class HealthScanTests(unittest.TestCase):
|
||||
def test_clean_text_yields_no_findings(self):
|
||||
self.assertEqual(scan_journal_text("usb 1-1: new high-speed USB device\nbluetooth: ok"), [])
|
||||
|
||||
def test_vaspace_freeze_detected_without_any_xid(self):
|
||||
findings = scan_journal_text(_VASPACE_LOG)
|
||||
gpu = [f for f in findings if f.category == "GPU"]
|
||||
self.assertEqual(len(gpu), 1)
|
||||
self.assertEqual(gpu[0].severity, WARNING)
|
||||
self.assertIn("VA-space", gpu[0].title)
|
||||
# It must NOT be misreported as an Xid finding (the log has no Xid at all).
|
||||
self.assertNotIn("Xid", gpu[0].title)
|
||||
self.assertIn("open kernel module", gpu[0].detail.lower())
|
||||
|
||||
def test_open_module_finding_when_open_loaded(self):
|
||||
with mock.patch("rigdoctor.core.health._nvidia_module_is_open", return_value=True):
|
||||
findings = check_nvidia_module()
|
||||
self.assertEqual(len(findings), 1)
|
||||
self.assertEqual(findings[0].severity, INFO)
|
||||
self.assertEqual(findings[0].category, "Driver")
|
||||
|
||||
def test_no_module_finding_when_proprietary_or_absent(self):
|
||||
for state in (False, None):
|
||||
with mock.patch("rigdoctor.core.health._nvidia_module_is_open", return_value=state):
|
||||
self.assertEqual(check_nvidia_module(), [])
|
||||
|
||||
def test_run_health_checks_returns_findings(self):
|
||||
# Runs against the real system; just assert it returns a sorted list of Findings.
|
||||
findings = run_health_checks()
|
||||
@@ -42,5 +84,70 @@ class HealthScanTests(unittest.TestCase):
|
||||
self.assertEqual(ranks, sorted(ranks))
|
||||
|
||||
|
||||
class PcieLinkCheckTests(unittest.TestCase):
|
||||
def _with_link(self, cur_g, cur_w, max_g, max_w):
|
||||
# one fake NVMe controller returning the given link tuple
|
||||
return (mock.patch("rigdoctor.core.inventory.nvme_controllers",
|
||||
return_value=[("nvme0", Path("/x"))]),
|
||||
mock.patch("rigdoctor.core.inventory.read_link",
|
||||
return_value=(cur_g, cur_w, max_g, max_w)))
|
||||
|
||||
def test_reduced_width_is_a_warning_about_lane_sharing(self):
|
||||
ctrls, link = self._with_link(4, "2", 4, "4") # Gen4 x2 but supports x4
|
||||
with ctrls, link:
|
||||
findings = check_pcie_links()
|
||||
self.assertEqual(len(findings), 1)
|
||||
self.assertEqual(findings[0].severity, WARNING)
|
||||
self.assertIn("lane-sharing", findings[0].detail)
|
||||
|
||||
def test_reduced_speed_only_is_info(self):
|
||||
ctrls, link = self._with_link(3, "4", 4, "4") # Gen3 x4 but supports Gen4
|
||||
with ctrls, link:
|
||||
findings = check_pcie_links()
|
||||
self.assertEqual(len(findings), 1)
|
||||
self.assertEqual(findings[0].severity, INFO)
|
||||
|
||||
def test_full_speed_no_finding(self):
|
||||
ctrls, link = self._with_link(4, "4", 4, "4")
|
||||
with ctrls, link:
|
||||
self.assertEqual(check_pcie_links(), [])
|
||||
|
||||
|
||||
class DisplayCheckTests(unittest.TestCase):
|
||||
def test_lower_than_max_refresh_is_flagged(self):
|
||||
mon = displays.Monitor("DP-1", "Samsung LC34G55T", 3440, 1440, 60.0, 165.0)
|
||||
with mock.patch("rigdoctor.core.displays.collect", return_value=[mon]):
|
||||
findings = check_displays()
|
||||
self.assertEqual(len(findings), 1)
|
||||
self.assertEqual(findings[0].severity, INFO)
|
||||
self.assertIn("165", findings[0].title)
|
||||
|
||||
def test_at_max_refresh_no_finding(self):
|
||||
mon = displays.Monitor("DP-1", "Samsung LC34G55T", 3440, 1440, 165.0, 165.0)
|
||||
with mock.patch("rigdoctor.core.displays.collect", return_value=[mon]):
|
||||
self.assertEqual(check_displays(), [])
|
||||
|
||||
|
||||
class MemorySpeedCheckTests(unittest.TestCase):
|
||||
def _dmi(self, configured, part):
|
||||
return {"memory": [{"Configured Memory Speed": configured, "Speed": configured,
|
||||
"Part Number": part}]}
|
||||
|
||||
def test_flags_unapplied_expo(self):
|
||||
dmi = self._dmi("4800 MT/s", "CMK32GX5M2B5600Z36")
|
||||
with mock.patch("rigdoctor.core.elevation.privileged", return_value=None), \
|
||||
mock.patch("rigdoctor.core.inventory._dmidecode", return_value=dmi):
|
||||
findings = check_memory_speed()
|
||||
self.assertEqual(len(findings), 1)
|
||||
self.assertEqual(findings[0].severity, INFO)
|
||||
self.assertIn("5600", findings[0].title)
|
||||
|
||||
def test_no_flag_at_rated(self):
|
||||
dmi = self._dmi("5600 MT/s", "CMK32GX5M2B5600Z36")
|
||||
with mock.patch("rigdoctor.core.elevation.privileged", return_value=None), \
|
||||
mock.patch("rigdoctor.core.inventory._dmidecode", return_value=dmi):
|
||||
self.assertEqual(check_memory_speed(), [])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
import unittest
|
||||
|
||||
from rigdoctor.core import installer
|
||||
from rigdoctor.core import catalog, installer
|
||||
from rigdoctor.core.catalog import Component
|
||||
from rigdoctor.core.updates import is_newer
|
||||
|
||||
@@ -31,6 +31,13 @@ class InstallerTests(unittest.TestCase):
|
||||
rc, _ = installer.install_packages([])
|
||||
self.assertEqual(rc, 0)
|
||||
|
||||
def test_by_bundle_groups_all_components(self):
|
||||
groups = catalog.by_bundle()
|
||||
flat = [c for comps in groups.values() for c in comps]
|
||||
self.assertEqual(len(flat), len(catalog.COMPONENTS))
|
||||
self.assertIn("Gaming", groups)
|
||||
self.assertIn("Diagnostics", groups)
|
||||
|
||||
|
||||
class UpdateTests(unittest.TestCase):
|
||||
def test_is_newer(self):
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
"""Tests for the M5 system inventory (render + dict round-trip; collect on real system)."""
|
||||
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
from rigdoctor.core import inventory
|
||||
from rigdoctor.core.inventory import Section
|
||||
@@ -26,5 +28,49 @@ class InventoryTests(unittest.TestCase):
|
||||
self.assertIn("- **Model:** Test CPU", md)
|
||||
|
||||
|
||||
class PcieLinkTests(unittest.TestCase):
|
||||
def test_gen_mapping(self):
|
||||
self.assertEqual(inventory._gen("16.0 GT/s PCIe"), 4)
|
||||
self.assertEqual(inventory._gen("8.0 GT/s PCIe"), 3)
|
||||
self.assertIsNone(inventory._gen(""))
|
||||
|
||||
def _fake_dev(self, cur_s, cur_w, max_s, max_w) -> Path:
|
||||
d = Path(tempfile.mkdtemp())
|
||||
(d / "current_link_speed").write_text(cur_s)
|
||||
(d / "current_link_width").write_text(cur_w)
|
||||
(d / "max_link_speed").write_text(max_s)
|
||||
(d / "max_link_width").write_text(max_w)
|
||||
return d
|
||||
|
||||
def test_link_at_full_speed(self):
|
||||
dev = self._fake_dev("16.0 GT/s PCIe", "4", "16.0 GT/s PCIe", "4")
|
||||
self.assertEqual(inventory._link_desc(dev), "PCIe Gen4 x4")
|
||||
|
||||
def test_link_downtrained_flags_capability(self):
|
||||
dev = self._fake_dev("8.0 GT/s PCIe", "4", "16.0 GT/s PCIe", "4")
|
||||
self.assertEqual(inventory._link_desc(dev), "PCIe Gen3 x4 (capable of Gen4 x4)")
|
||||
|
||||
def test_non_nvme_has_no_link(self):
|
||||
self.assertEqual(inventory._nvme_link("sda"), "")
|
||||
|
||||
|
||||
class MemorySpeedTests(unittest.TestCase):
|
||||
def test_rated_speed_from_part_number(self):
|
||||
self.assertEqual(inventory._rated_from_part("CMK32GX5M2B5600Z36"), 5600)
|
||||
self.assertEqual(inventory._rated_from_part("F5-6000J3038F16G"), 6000)
|
||||
self.assertIsNone(inventory._rated_from_part("NoSpeedHere"))
|
||||
|
||||
def test_detects_unapplied_expo(self):
|
||||
# XMP/EXPO off: dmidecode only sees JEDEC 4800; the 5600 is in the part number.
|
||||
m = {"Configured Memory Speed": "4800 MT/s", "Speed": "4800 MT/s",
|
||||
"Part Number": "CMK32GX5M2B5600Z36"}
|
||||
self.assertEqual(inventory.module_speed(m), (4800, 5600))
|
||||
|
||||
def test_at_rated_speed(self):
|
||||
m = {"Configured Memory Speed": "5600 MT/s", "Speed": "5600 MT/s",
|
||||
"Part Number": "CMK32GX5M2B5600Z36"}
|
||||
self.assertEqual(inventory.module_speed(m), (5600, 5600))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
||||
@@ -0,0 +1,67 @@
|
||||
"""Tests for M6 non-Steam game detection (Lutris SQLite + Heroic JSON)."""
|
||||
|
||||
import json
|
||||
import sqlite3
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
from rigdoctor.core import launchers
|
||||
|
||||
|
||||
class LutrisTests(unittest.TestCase):
|
||||
def test_reads_installed_games_only(self):
|
||||
with tempfile.TemporaryDirectory() as d:
|
||||
db = Path(d) / "pga.db"
|
||||
con = sqlite3.connect(db)
|
||||
con.execute("CREATE TABLE games (id INTEGER, name TEXT, slug TEXT, installed INTEGER)")
|
||||
con.executemany(
|
||||
"INSERT INTO games VALUES (?, ?, ?, ?)",
|
||||
[(1, "Hades", "hades", 1), (2, "Hollow Knight", "hollow-knight", 1), (3, "Old Game", "old", 0)],
|
||||
)
|
||||
con.commit()
|
||||
con.close()
|
||||
with mock.patch.object(launchers, "LUTRIS_DB", db), \
|
||||
mock.patch.object(launchers, "HEROIC_DIR", Path(d) / "nope"):
|
||||
games = launchers.scan()
|
||||
names = {g.name for g in games}
|
||||
self.assertEqual(names, {"Hades", "Hollow Knight"})
|
||||
self.assertTrue(all(g.launcher == "lutris" for g in games))
|
||||
|
||||
def test_missing_db_is_empty(self):
|
||||
with tempfile.TemporaryDirectory() as d:
|
||||
with mock.patch.object(launchers, "LUTRIS_DB", Path(d) / "absent.db"), \
|
||||
mock.patch.object(launchers, "HEROIC_DIR", Path(d) / "nope"):
|
||||
self.assertEqual(launchers.scan(), [])
|
||||
|
||||
|
||||
class HeroicTests(unittest.TestCase):
|
||||
def test_epic_and_gog(self):
|
||||
with tempfile.TemporaryDirectory() as d:
|
||||
base = Path(d) / "heroic"
|
||||
(base / "legendaryConfig" / "legendary").mkdir(parents=True)
|
||||
(base / "gog_store").mkdir(parents=True)
|
||||
(base / "legendaryConfig" / "legendary" / "installed.json").write_text(
|
||||
json.dumps({"abc123": {"title": "Control"}}))
|
||||
(base / "gog_store" / "installed.json").write_text(
|
||||
json.dumps({"installed": [{"appName": "777", "title": "The Witcher 3"}]}))
|
||||
with mock.patch.object(launchers, "LUTRIS_DB", Path(d) / "nope.db"), \
|
||||
mock.patch.object(launchers, "HEROIC_DIR", base):
|
||||
names = {g.name for g in launchers.scan()}
|
||||
self.assertEqual(names, {"Control", "The Witcher 3"})
|
||||
|
||||
def test_gog_title_falls_back_to_install_path(self):
|
||||
with tempfile.TemporaryDirectory() as d:
|
||||
base = Path(d) / "heroic"
|
||||
(base / "gog_store").mkdir(parents=True)
|
||||
(base / "gog_store" / "installed.json").write_text(
|
||||
json.dumps({"installed": [{"appName": "9", "install_path": "/games/Stardew Valley"}]}))
|
||||
with mock.patch.object(launchers, "LUTRIS_DB", Path(d) / "nope.db"), \
|
||||
mock.patch.object(launchers, "HEROIC_DIR", base):
|
||||
names = {g.name for g in launchers.scan()}
|
||||
self.assertEqual(names, {"Stardew Valley"})
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -0,0 +1,163 @@
|
||||
"""Tests for the .dmp minidump parser (M14) — builds a synthetic MDMP, no external tools."""
|
||||
|
||||
import struct
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
from rigdoctor.core import minidump
|
||||
|
||||
|
||||
def _synthetic_dump() -> bytes:
|
||||
"""A minimal but valid MDMP: header + SystemInfo + Exception + 2-module ModuleList.
|
||||
|
||||
Layout (absolute file offsets): header@0, directory@32, SystemInfo@68, Exception@96,
|
||||
ModuleList@264, name strings@484. Module0 spans the exception address, so it's faulting.
|
||||
"""
|
||||
buf = bytearray(600)
|
||||
struct.pack_into("<4sIIIIIQ", buf, 0, b"MDMP", 0xA793, 3, 32, 0, 1_700_000_000, 0)
|
||||
struct.pack_into("<III", buf, 32, 7, 28, 68) # SystemInfoStream
|
||||
struct.pack_into("<III", buf, 44, 6, 168, 96) # ExceptionStream
|
||||
struct.pack_into("<III", buf, 56, 4, 220, 264) # ModuleListStream
|
||||
|
||||
# SystemInfo: x86-64, 16 CPUs, Windows 10.0.19041 (PlatformId 2 = Win32 NT).
|
||||
struct.pack_into("<HHHBBIIIII", buf, 68, 9, 0, 0, 16, 1, 10, 0, 19041, 2, 0)
|
||||
|
||||
# Exception: access violation (write) at 0x140001234.
|
||||
struct.pack_into("<I", buf, 96, 4321) # ThreadId
|
||||
struct.pack_into("<I", buf, 96 + 8, 0xC0000005) # ExceptionCode
|
||||
struct.pack_into("<Q", buf, 96 + 24, 0x140001234) # ExceptionAddress
|
||||
struct.pack_into("<I", buf, 96 + 32, 2) # NumberParameters
|
||||
struct.pack_into("<Q", buf, 96 + 40, 1) # info[0] = write
|
||||
struct.pack_into("<Q", buf, 96 + 48, 0x0) # info[1] = faulting address
|
||||
|
||||
# ModuleList: 2 modules.
|
||||
struct.pack_into("<I", buf, 264, 2)
|
||||
m0, m1 = 268, 268 + minidump._MODULE_STRIDE
|
||||
struct.pack_into("<Q", buf, m0, 0x140000000) # base
|
||||
struct.pack_into("<I", buf, m0 + 8, 0x100000) # size (spans the exception address)
|
||||
struct.pack_into("<I", buf, m0 + 20, 484) # name RVA
|
||||
struct.pack_into("<Q", buf, m1, 0x180000000)
|
||||
struct.pack_into("<I", buf, m1 + 8, 0x080000)
|
||||
struct.pack_into("<I", buf, m1 + 20, 522)
|
||||
|
||||
name0 = "C:\\Games\\game.exe".encode("utf-16-le")
|
||||
struct.pack_into("<I", buf, 484, len(name0))
|
||||
buf[488:488 + len(name0)] = name0
|
||||
name1 = "nvwgf2umx.dll".encode("utf-16-le")
|
||||
struct.pack_into("<I", buf, 522, len(name1))
|
||||
buf[526:526 + len(name1)] = name1
|
||||
return bytes(buf)
|
||||
|
||||
|
||||
class ParseTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self._tmp = tempfile.NamedTemporaryFile(suffix=".dmp", delete=False)
|
||||
self._tmp.write(_synthetic_dump())
|
||||
self._tmp.close()
|
||||
self.path = self._tmp.name
|
||||
|
||||
def tearDown(self):
|
||||
Path(self.path).unlink(missing_ok=True)
|
||||
|
||||
def _parse(self):
|
||||
return minidump.parse(self.path, run_stackwalk=False)
|
||||
|
||||
def test_parses_exception_and_faulting_module(self):
|
||||
r = self._parse()
|
||||
self.assertTrue(r.ok, r.error)
|
||||
self.assertEqual(r.exception_code, 0xC0000005)
|
||||
self.assertIn("Access violation", r.crash_reason)
|
||||
self.assertIn("writing 0x0", r.crash_reason)
|
||||
self.assertEqual(r.faulting_module, "game.exe") # basename, address inside module0
|
||||
self.assertEqual(r.crashing_thread, 4321)
|
||||
|
||||
def test_parses_system_info_and_modules(self):
|
||||
r = self._parse()
|
||||
self.assertEqual(r.os_name, "Windows 10.0.19041")
|
||||
self.assertEqual(r.cpu_arch, "x86-64")
|
||||
self.assertEqual(r.cpu_count, 16)
|
||||
self.assertEqual([m.name for m in r.modules], ["game.exe", "nvwgf2umx.dll"])
|
||||
|
||||
def test_to_text_and_ai_text(self):
|
||||
r = self._parse()
|
||||
text = minidump.to_text(r)
|
||||
self.assertIn("game.exe", text)
|
||||
self.assertIn("nvwgf2umx.dll", text)
|
||||
self.assertIn("Access violation", text)
|
||||
ai_text = minidump.to_ai_text(r)
|
||||
self.assertIn("Proton", ai_text) # Linux/Proton framing for the model
|
||||
self.assertIn("Crash reason", ai_text)
|
||||
|
||||
def test_to_findings(self):
|
||||
findings = minidump.to_findings(self._parse())
|
||||
self.assertEqual(findings[0].severity, minidump.CRITICAL)
|
||||
self.assertIn("game.exe", findings[0].title)
|
||||
|
||||
def test_run_stackwalk_false_skips_external_tool(self):
|
||||
self.assertEqual(self._parse().stackwalk, "")
|
||||
|
||||
|
||||
class RobustnessTests(unittest.TestCase):
|
||||
def test_non_minidump_file(self):
|
||||
with tempfile.NamedTemporaryFile(suffix=".dmp", delete=False) as fh:
|
||||
fh.write(b"not a dump at all")
|
||||
path = fh.name
|
||||
try:
|
||||
r = minidump.parse(path, run_stackwalk=False)
|
||||
finally:
|
||||
Path(path).unlink(missing_ok=True)
|
||||
self.assertFalse(r.ok)
|
||||
self.assertIn("signature", r.error)
|
||||
|
||||
def test_missing_file(self):
|
||||
r = minidump.parse("/nonexistent/does-not-exist.dmp", run_stackwalk=False)
|
||||
self.assertFalse(r.ok)
|
||||
self.assertIn("can't read", r.error)
|
||||
|
||||
def test_stackwalk_absent_returns_empty(self):
|
||||
with mock.patch.object(minidump.shutil, "which", return_value=None):
|
||||
self.assertEqual(minidump.stackwalk("/whatever.dmp"), "")
|
||||
|
||||
|
||||
class CliDumpTests(unittest.TestCase):
|
||||
"""`rigdoctor ai dump <file>` parses then explains via the configured provider."""
|
||||
|
||||
def _args(self, **over):
|
||||
import argparse
|
||||
base = {"ai_cmd": "dump", "file": ""}
|
||||
base.update(over)
|
||||
return argparse.Namespace(**base)
|
||||
|
||||
def test_dump_parses_and_explains(self):
|
||||
from rigdoctor.core import ai
|
||||
|
||||
with tempfile.NamedTemporaryFile(suffix=".dmp", delete=False) as fh:
|
||||
fh.write(_synthetic_dump())
|
||||
path = fh.name
|
||||
try:
|
||||
with mock.patch.object(ai, "is_configured", return_value=True), \
|
||||
mock.patch.object(ai, "provider_label", return_value="Claude (test)"), \
|
||||
mock.patch.object(minidump, "stackwalk", return_value=""), \
|
||||
mock.patch.object(ai, "explain", return_value=(True, "Likely DXVK.")) as explain:
|
||||
from rigdoctor import cli
|
||||
rc = cli.cmd_ai(self._args(file=path))
|
||||
finally:
|
||||
Path(path).unlink(missing_ok=True)
|
||||
self.assertEqual(rc, 0)
|
||||
sent = explain.call_args[0][0]
|
||||
self.assertIn("Proton", sent) # the Linux/Proton framing reached the model
|
||||
self.assertIn("game.exe", sent)
|
||||
|
||||
def test_dump_bad_file_returns_error(self):
|
||||
from rigdoctor.core import ai
|
||||
|
||||
with mock.patch.object(ai, "is_configured", return_value=True):
|
||||
from rigdoctor import cli
|
||||
rc = cli.cmd_ai(self._args(file="/nope/missing.dmp"))
|
||||
self.assertEqual(rc, 1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -0,0 +1,27 @@
|
||||
"""Tests for the host PTY session (M12 Tier 3)."""
|
||||
|
||||
import time
|
||||
import unittest
|
||||
|
||||
from rigdoctor.core.pty_session import PtySession
|
||||
|
||||
|
||||
class PtySessionTests(unittest.TestCase):
|
||||
def test_runs_command_and_reads_output(self):
|
||||
pty = PtySession(rows=24, cols=80)
|
||||
try:
|
||||
time.sleep(0.4)
|
||||
pty.read() # drain the shell prompt
|
||||
pty.write(b"echo PTY_MARKER_42\n")
|
||||
deadline = time.time() + 3
|
||||
buf = ""
|
||||
while time.time() < deadline and "PTY_MARKER_42" not in buf:
|
||||
time.sleep(0.1)
|
||||
buf += pty.read().decode(errors="replace")
|
||||
self.assertIn("PTY_MARKER_42", buf)
|
||||
finally:
|
||||
pty.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -0,0 +1,58 @@
|
||||
"""Tests for the M9 systemd --user trigger-mode service manager."""
|
||||
|
||||
import unittest
|
||||
from unittest import mock
|
||||
|
||||
from rigdoctor.core import service
|
||||
|
||||
|
||||
class UnitTextTests(unittest.TestCase):
|
||||
def test_unit_text_has_required_sections(self):
|
||||
txt = service.unit_text("RigDoctor recorder", ["record", "run"])
|
||||
self.assertIn("[Unit]", txt)
|
||||
self.assertIn("[Service]", txt)
|
||||
self.assertIn("ExecStart=", txt)
|
||||
self.assertIn("record run", txt)
|
||||
self.assertIn("WantedBy=default.target", txt)
|
||||
|
||||
|
||||
class ApplyModeTests(unittest.TestCase):
|
||||
def test_unknown_mode_rejected(self):
|
||||
ok, msg = service.apply_mode("turbo")
|
||||
self.assertFalse(ok)
|
||||
self.assertIn("Unknown", msg)
|
||||
|
||||
def test_no_systemd_saves_mode_but_reports(self):
|
||||
with mock.patch.object(service, "available", return_value=False), \
|
||||
mock.patch.object(service.config, "update_config") as update:
|
||||
ok, msg = service.apply_mode("always-on")
|
||||
self.assertFalse(ok)
|
||||
self.assertIn("available", msg.lower())
|
||||
update.assert_called_once_with(trigger_mode="always-on")
|
||||
|
||||
def test_always_on_enables_recorder_disables_watch(self):
|
||||
calls = []
|
||||
with mock.patch.object(service, "available", return_value=True), \
|
||||
mock.patch.object(service, "install_units"), \
|
||||
mock.patch.object(service, "_enable", side_effect=lambda n: calls.append(("enable", n)) or (0, "")), \
|
||||
mock.patch.object(service, "_disable", side_effect=lambda n: calls.append(("disable", n)) or (0, "")), \
|
||||
mock.patch.object(service.config, "update_config"):
|
||||
ok, _ = service.apply_mode("always-on")
|
||||
self.assertTrue(ok)
|
||||
self.assertIn(("enable", service.RECORDER_UNIT), calls)
|
||||
self.assertIn(("disable", service.WATCH_UNIT), calls)
|
||||
|
||||
def test_manual_disables_both(self):
|
||||
disabled = []
|
||||
with mock.patch.object(service, "available", return_value=True), \
|
||||
mock.patch.object(service, "install_units"), \
|
||||
mock.patch.object(service, "_enable", return_value=(0, "")), \
|
||||
mock.patch.object(service, "_disable", side_effect=lambda n: disabled.append(n) or (0, "")), \
|
||||
mock.patch.object(service.config, "update_config"):
|
||||
ok, _ = service.apply_mode("manual")
|
||||
self.assertTrue(ok)
|
||||
self.assertEqual(set(disabled), {service.RECORDER_UNIT, service.WATCH_UNIT})
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -0,0 +1,147 @@
|
||||
"""Tests for M6 Steam library & game detection (VDF parse, scan, tool filter, cache diff)."""
|
||||
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
from rigdoctor.core import steam
|
||||
|
||||
_GAME_ACF = """"AppState"
|
||||
{{
|
||||
\t"appid"\t\t"{appid}"
|
||||
\t"name"\t\t"{name}"
|
||||
\t"installdir"\t\t"{installdir}"
|
||||
\t"SizeOnDisk"\t\t"{size}"
|
||||
\t"LastUpdated"\t\t"{updated}"
|
||||
}}
|
||||
"""
|
||||
|
||||
_LIBRARYFOLDERS = """"libraryfolders"
|
||||
{{
|
||||
\t"0"
|
||||
\t{{
|
||||
\t\t"path"\t\t"{path}"
|
||||
\t\t"label"\t\t"Main"
|
||||
\t\t"apps"
|
||||
\t\t{{
|
||||
\t\t\t"570"\t\t"123"
|
||||
\t\t}}
|
||||
\t}}
|
||||
}}
|
||||
"""
|
||||
|
||||
|
||||
def _make_library(root: Path, games) -> Path:
|
||||
"""games: list of (appid, name, installdir, size, updated). Returns the library path."""
|
||||
steamapps = root / "steamapps"
|
||||
steamapps.mkdir(parents=True, exist_ok=True)
|
||||
for appid, name, installdir, size, updated in games:
|
||||
(steamapps / f"appmanifest_{appid}.acf").write_text(
|
||||
_GAME_ACF.format(appid=appid, name=name, installdir=installdir, size=size, updated=updated)
|
||||
)
|
||||
return root
|
||||
|
||||
|
||||
class VdfTests(unittest.TestCase):
|
||||
def test_parse_nested_and_pairs(self):
|
||||
data = steam._parse_vdf(_GAME_ACF.format(
|
||||
appid="570", name="Dota 2", installdir="dota 2 beta", size="15", updated="1700"))
|
||||
state = data["AppState"]
|
||||
self.assertEqual(state["appid"], "570")
|
||||
self.assertEqual(state["name"], "Dota 2")
|
||||
self.assertEqual(state["installdir"], "dota 2 beta")
|
||||
|
||||
def test_parse_handles_quotes_in_names(self):
|
||||
acf = _GAME_ACF.format(appid="1", name="Baldur\\'s Gate 3", installdir="bg3", size="1", updated="1")
|
||||
data = steam._parse_vdf(acf)
|
||||
self.assertIn("Baldur", data["AppState"]["name"])
|
||||
|
||||
def test_parse_garbage_returns_empty(self):
|
||||
self.assertEqual(steam._parse_vdf("not vdf at all"), {})
|
||||
|
||||
|
||||
class ToolFilterTests(unittest.TestCase):
|
||||
def test_known_tool_appid(self):
|
||||
self.assertTrue(steam.is_tool("228980", "Steamworks Common Redistributables"))
|
||||
|
||||
def test_proton_name_prefix(self):
|
||||
self.assertTrue(steam.is_tool("9999999", "Proton 8.0"))
|
||||
self.assertTrue(steam.is_tool("9999998", "Steam Linux Runtime 3.0 (sniper)"))
|
||||
|
||||
def test_real_game_is_not_a_tool(self):
|
||||
self.assertFalse(steam.is_tool("570", "Dota 2"))
|
||||
|
||||
|
||||
class ScanTests(unittest.TestCase):
|
||||
def test_scan_library_filters_tools(self):
|
||||
with tempfile.TemporaryDirectory() as d:
|
||||
lib = _make_library(Path(d), [
|
||||
("570", "Dota 2", "dota 2 beta", "15000000000", "1700000000"),
|
||||
("228980", "Steamworks Common Redistributables", "Steamworks Shared", "0", "0"),
|
||||
("1493710", "Proton Experimental", "Proton - Experimental", "0", "0"),
|
||||
])
|
||||
games = steam.scan_library(str(lib))
|
||||
names = {g.name for g in games}
|
||||
self.assertEqual(names, {"Dota 2"})
|
||||
self.assertEqual(games[0].size_bytes, 15000000000)
|
||||
|
||||
def test_scan_games_dedupes_and_sorts(self):
|
||||
with tempfile.TemporaryDirectory() as d1, tempfile.TemporaryDirectory() as d2:
|
||||
a = _make_library(Path(d1), [("10", "Zeta", "zeta", "1", "1"), ("20", "Alpha", "alpha", "1", "1")])
|
||||
b = _make_library(Path(d2), [("20", "Alpha", "alpha", "1", "1")]) # dup appid 20
|
||||
games = steam.scan_games([str(a), str(b)])
|
||||
self.assertEqual([g.name for g in games], ["Alpha", "Zeta"]) # sorted, deduped
|
||||
|
||||
|
||||
class DiscoverTests(unittest.TestCase):
|
||||
def test_discover_reads_libraryfolders(self):
|
||||
with tempfile.TemporaryDirectory() as d:
|
||||
root = Path(d) / "Steam"
|
||||
(root / "steamapps").mkdir(parents=True)
|
||||
extra = Path(d) / "Extra"
|
||||
(extra / "steamapps").mkdir(parents=True)
|
||||
(root / "steamapps" / "libraryfolders.vdf").write_text(
|
||||
_LIBRARYFOLDERS.format(path=str(extra)))
|
||||
with mock.patch.object(steam, "steam_roots", return_value=[root]):
|
||||
libs = steam.discover_libraries()
|
||||
paths = {lib.path for lib in libs}
|
||||
self.assertIn(str(root.resolve()), paths) # root itself
|
||||
self.assertIn(str(extra.resolve()), paths) # the configured extra library
|
||||
|
||||
|
||||
class CacheDiffTests(unittest.TestCase):
|
||||
def _rescan(self, lib, games_file, cfg):
|
||||
with mock.patch.object(steam, "GAMES_FILE", games_file):
|
||||
return steam.rescan(cfg=cfg)
|
||||
|
||||
def test_first_scan_has_no_new_then_added_game_is_new(self):
|
||||
with tempfile.TemporaryDirectory() as d:
|
||||
lib = _make_library(Path(d) / "lib", [("10", "Alpha", "alpha", "1", "1")])
|
||||
games_file = Path(d) / "games.json"
|
||||
cfg = {"steam_libraries": [str(lib)]}
|
||||
|
||||
first = self._rescan(lib, games_file, cfg)
|
||||
self.assertEqual(first.new_appids, []) # first run flags nothing as new
|
||||
|
||||
# Install a second game; it should be flagged new on the next scan.
|
||||
_make_library(lib, [("10", "Alpha", "alpha", "1", "1"), ("20", "Beta", "beta", "1", "1")])
|
||||
second = self._rescan(lib, games_file, cfg)
|
||||
self.assertEqual(second.new_appids, ["20"])
|
||||
self.assertEqual({g.name for g in second.games}, {"Alpha", "Beta"})
|
||||
|
||||
def test_acknowledge_clears_new(self):
|
||||
with tempfile.TemporaryDirectory() as d:
|
||||
lib = _make_library(Path(d) / "lib", [("10", "Alpha", "alpha", "1", "1")])
|
||||
games_file = Path(d) / "games.json"
|
||||
cfg = {"steam_libraries": [str(lib)]}
|
||||
self._rescan(lib, games_file, cfg)
|
||||
_make_library(lib, [("10", "Alpha", "alpha", "1", "1"), ("20", "Beta", "beta", "1", "1")])
|
||||
self._rescan(lib, games_file, cfg)
|
||||
with mock.patch.object(steam, "GAMES_FILE", games_file):
|
||||
steam.acknowledge_new()
|
||||
self.assertEqual(steam.load_cache()["new_appids"], [])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -0,0 +1,77 @@
|
||||
"""Tests for the GPU stress + thermal-monitor analysis (synthetic ticks, no real GPU)."""
|
||||
|
||||
import unittest
|
||||
|
||||
from rigdoctor.core import stress
|
||||
from rigdoctor.core.health import CRITICAL, OK, WARNING
|
||||
|
||||
|
||||
def _tick(temp=None, power=None, throttle=(), capped=False, lost=False, dt=1.0, **extra):
|
||||
values = {}
|
||||
if temp is not None:
|
||||
values["gpu.temp"] = temp
|
||||
if power is not None:
|
||||
values["gpu.power"] = power
|
||||
values.update(extra)
|
||||
return stress._Tick(dt=dt, values=values, throttle=list(throttle), power_capped=capped, lost=lost)
|
||||
|
||||
|
||||
class SummarizeTests(unittest.TestCase):
|
||||
def test_stable_run_is_ok(self):
|
||||
ticks = [_tick(temp=t, power=200, **{"gpu.power_limit": 280}) for t in (60, 65, 70, 72)]
|
||||
r = stress.summarize(ticks, load="monitor-only", interval=1.0, faults=[])
|
||||
self.assertEqual(r.severity, OK)
|
||||
self.assertEqual(r.peak_temp, 72)
|
||||
self.assertEqual(r.max_power, 200)
|
||||
self.assertEqual(r.power_limit, 280)
|
||||
self.assertFalse(r.throttled)
|
||||
self.assertIn("Stable", r.verdict)
|
||||
|
||||
def test_dwell_time_above_thresholds(self):
|
||||
# 3 ticks of 2s each at 82/86/92 °C → ≥80 for all 6s, ≥85 for 4s, ≥90 for 2s.
|
||||
ticks = [_tick(temp=82, dt=2.0), _tick(temp=86, dt=2.0), _tick(temp=92, dt=2.0)]
|
||||
r = stress.summarize(ticks, load="x", interval=2.0, faults=[])
|
||||
self.assertEqual(r.time_above[80], 6.0)
|
||||
self.assertEqual(r.time_above[85], 4.0)
|
||||
self.assertEqual(r.time_above[90], 2.0)
|
||||
self.assertNotIn(95, r.time_above) # never reached → omitted
|
||||
|
||||
def test_throttling_is_a_warning(self):
|
||||
ticks = [_tick(temp=88, throttle=["HW thermal slowdown"])]
|
||||
r = stress.summarize(ticks, load="x", interval=1.0, faults=[])
|
||||
self.assertEqual(r.severity, WARNING)
|
||||
self.assertTrue(r.throttled)
|
||||
self.assertIn("HW thermal slowdown", r.throttle_reasons)
|
||||
|
||||
def test_high_temp_without_throttle_is_a_warning(self):
|
||||
r = stress.summarize([_tick(temp=93)], load="x", interval=1.0, faults=[])
|
||||
self.assertEqual(r.severity, WARNING)
|
||||
self.assertIn("hot", r.verdict.lower())
|
||||
|
||||
def test_gpu_lost_is_critical(self):
|
||||
ticks = [_tick(temp=70), _tick(lost=True)]
|
||||
r = stress.summarize(ticks, load="x", interval=1.0, faults=[])
|
||||
self.assertEqual(r.severity, CRITICAL)
|
||||
self.assertTrue(r.gpu_lost)
|
||||
|
||||
def test_journal_fault_is_critical(self):
|
||||
r = stress.summarize([_tick(temp=70)], load="x", interval=1.0,
|
||||
faults=["NVIDIA Xid 79 ×1"])
|
||||
self.assertEqual(r.severity, CRITICAL)
|
||||
self.assertIn("Xid 79", r.verdict)
|
||||
|
||||
def test_no_telemetry_is_info(self):
|
||||
r = stress.summarize([_tick()], load="monitor-only", interval=1.0, faults=[])
|
||||
self.assertEqual(r.severity, "info")
|
||||
self.assertIsNone(r.peak_temp)
|
||||
|
||||
|
||||
class ThrottleDecodeTests(unittest.TestCase):
|
||||
def test_throttle_bits_map_to_reasons(self):
|
||||
# the constants used by _throttle_state decode the NVML active-reasons bitmask
|
||||
self.assertIn("HW thermal slowdown", stress._THROTTLE_BITS.values())
|
||||
self.assertIn("SW thermal slowdown", stress._THROTTLE_BITS.values())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -0,0 +1,114 @@
|
||||
"""Tests for M15 session-scoped system-log collection (kernel + coredumps)."""
|
||||
|
||||
import unittest
|
||||
from unittest import mock
|
||||
|
||||
from rigdoctor.core import syslogs
|
||||
|
||||
|
||||
class KernelLogTests(unittest.TestCase):
|
||||
def test_passes_since_and_tails(self):
|
||||
with mock.patch("shutil.which", return_value="/usr/bin/journalctl"), \
|
||||
mock.patch.object(syslogs, "_run", return_value="X" * 100 + "TAILLINE") as run:
|
||||
out = syslogs.kernel_log(since=1_000_000_000, max_bytes=8)
|
||||
self.assertEqual(out, "TAILLINE")
|
||||
cmd = run.call_args[0][0]
|
||||
self.assertIn("-k", cmd)
|
||||
self.assertIn("--since", cmd)
|
||||
|
||||
def test_missing_tool_returns_empty(self):
|
||||
with mock.patch("shutil.which", return_value=None):
|
||||
self.assertEqual(syslogs.kernel_log(), "")
|
||||
|
||||
|
||||
class CoredumpTests(unittest.TestCase):
|
||||
def test_empty_when_no_coredumps(self):
|
||||
with mock.patch("shutil.which", return_value="/usr/bin/coredumpctl"), \
|
||||
mock.patch.object(syslogs, "_run", return_value="No coredumps found."):
|
||||
self.assertEqual(syslogs.coredumps(), "")
|
||||
|
||||
def test_returns_list(self):
|
||||
with mock.patch("shutil.which", return_value="/usr/bin/coredumpctl"), \
|
||||
mock.patch.object(syslogs, "_run", return_value="TIME PID SIG EXE\n... SEGV PathOfExile"):
|
||||
out = syslogs.coredumps()
|
||||
self.assertIn("PathOfExile", out)
|
||||
|
||||
|
||||
class NvidiaTests(unittest.TestCase):
|
||||
def test_missing_tool(self):
|
||||
with mock.patch("shutil.which", return_value=None):
|
||||
self.assertEqual(syslogs.nvidia_snapshot(), "")
|
||||
|
||||
def test_snapshot_head_truncated(self):
|
||||
with mock.patch("shutil.which", return_value="/usr/bin/nvidia-smi"), \
|
||||
mock.patch.object(syslogs, "_run", return_value="DRIVER\n" + "x" * 99999):
|
||||
out = syslogs.nvidia_snapshot(max_bytes=10)
|
||||
self.assertEqual(out, "DRIVER\nxxx") # head, not tail
|
||||
|
||||
|
||||
class DisplayTests(unittest.TestCase):
|
||||
def test_session_type_env(self):
|
||||
with mock.patch.dict("os.environ", {"XDG_SESSION_TYPE": "wayland"}):
|
||||
self.assertEqual(syslogs._session_type(), "wayland")
|
||||
|
||||
def test_x11_tails_xorg_log(self):
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
log = Path(tempfile.mkdtemp()) / "Xorg.0.log"
|
||||
log.write_text("(EE) NVIDIA(GPU-0): something failed")
|
||||
with mock.patch.object(syslogs, "_session_type", return_value="x11"), \
|
||||
mock.patch.object(syslogs, "_xorg_log", return_value=log):
|
||||
out = syslogs.display_log()
|
||||
self.assertIn("(EE) NVIDIA", out)
|
||||
|
||||
def test_wayland_uses_user_journal(self):
|
||||
with mock.patch.object(syslogs, "_session_type", return_value="wayland"), \
|
||||
mock.patch("shutil.which", return_value="/usr/bin/journalctl"), \
|
||||
mock.patch.object(syslogs, "_run", return_value="gnome-shell: GPU error") as run:
|
||||
out = syslogs.display_log(since=1_000_000_000)
|
||||
self.assertIn("GPU error", out)
|
||||
cmd = run.call_args[0][0]
|
||||
self.assertIn("--user", cmd)
|
||||
self.assertTrue(any(a.startswith("_COMM=") for a in cmd))
|
||||
|
||||
|
||||
class ScanCriticalTests(unittest.TestCase):
|
||||
def test_matches_each_category(self):
|
||||
text = "\n".join([
|
||||
"NVRM: Xid (PCI:0000:01:00): 79, GPU has fallen off the bus",
|
||||
"Out of memory: Killed process 1234 (PathOfExile)",
|
||||
"mce: [Hardware Error]: CPU 0",
|
||||
"pcieport 0000:00:01.0: AER: Corrected error received",
|
||||
"blk_update_request: I/O error, dev sda, sector 99",
|
||||
"this is a perfectly normal line",
|
||||
])
|
||||
labels = {label for label, _ in syslogs.scan_critical(text)}
|
||||
self.assertEqual(labels, {
|
||||
"GPU error (Xid)", "Out of memory", "CPU machine-check",
|
||||
"PCIe error", "Disk I/O error"})
|
||||
|
||||
def test_clean_log_no_events(self):
|
||||
self.assertEqual(syslogs.scan_critical("usb 1-2: new high-speed device\nsystemd: started"), [])
|
||||
|
||||
|
||||
class CollectTests(unittest.TestCase):
|
||||
def test_collect_combines_sections(self):
|
||||
with mock.patch.object(syslogs, "kernel_log", return_value="NVRM: Xid 79"), \
|
||||
mock.patch.object(syslogs, "coredumps", return_value="game SIGSEGV"), \
|
||||
mock.patch.object(syslogs, "nvidia_snapshot", return_value="Driver Version 595"), \
|
||||
mock.patch.object(syslogs, "display_log", return_value="(EE) NVIDIA"):
|
||||
out = syslogs.collect()
|
||||
for needle in ("Kernel log", "Xid 79", "Crashed processes", "SIGSEGV",
|
||||
"NVIDIA snapshot", "595", "Display server log"):
|
||||
self.assertIn(needle, out)
|
||||
|
||||
def test_collect_empty_when_nothing(self):
|
||||
with mock.patch.object(syslogs, "kernel_log", return_value=""), \
|
||||
mock.patch.object(syslogs, "coredumps", return_value=""), \
|
||||
mock.patch.object(syslogs, "nvidia_snapshot", return_value=""), \
|
||||
mock.patch.object(syslogs, "display_log", return_value=""):
|
||||
self.assertEqual(syslogs.collect(), "")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -0,0 +1,58 @@
|
||||
"""Tests for the M2 live-monitor TUI logic (min/max tracking + color bands)."""
|
||||
|
||||
import unittest
|
||||
|
||||
from rigdoctor import tui
|
||||
from rigdoctor.core.sample import Reading, Sample
|
||||
|
||||
|
||||
def _temp(v):
|
||||
return Reading("gpu", "temp", v, "°C", "")
|
||||
|
||||
|
||||
class TrackTests(unittest.TestCase):
|
||||
def test_tracks_min_and_max(self):
|
||||
stats: dict = {}
|
||||
for v in (60.0, 80.0, 70.0, 55.0):
|
||||
tui.track(stats, Sample(0.0, [_temp(v)]))
|
||||
self.assertEqual(stats["gpu.temp"], (55.0, 80.0))
|
||||
|
||||
def test_ignores_none_values(self):
|
||||
stats: dict = {}
|
||||
tui.track(stats, Sample(0.0, [_temp(None)]))
|
||||
self.assertEqual(stats, {})
|
||||
|
||||
def test_keys_separate_by_label(self):
|
||||
stats: dict = {}
|
||||
tui.track(stats, Sample(0.0, [
|
||||
Reading("cpu", "temp", 50.0, "°C", "Core 0"),
|
||||
Reading("cpu", "temp", 70.0, "°C", "Core 1"),
|
||||
]))
|
||||
self.assertEqual(stats["cpu.temp.Core 0"], (50.0, 50.0))
|
||||
self.assertEqual(stats["cpu.temp.Core 1"], (70.0, 70.0))
|
||||
|
||||
|
||||
class BandTests(unittest.TestCase):
|
||||
def test_temperature_bands(self):
|
||||
self.assertEqual(tui.band(_temp(40.0)), "cold")
|
||||
self.assertEqual(tui.band(_temp(60.0)), "good")
|
||||
self.assertEqual(tui.band(_temp(80.0)), "warn")
|
||||
self.assertEqual(tui.band(_temp(90.0)), "crit")
|
||||
|
||||
def test_usage_bands(self):
|
||||
self.assertEqual(tui.band(Reading("gpu", "util", 50.0, "%")), "good")
|
||||
self.assertEqual(tui.band(Reading("gpu", "util", 88.0, "%")), "warn")
|
||||
self.assertEqual(tui.band(Reading("memory", "used_pct", 96.0, "%")), "crit")
|
||||
|
||||
def test_non_metric_percentage_is_normal(self):
|
||||
self.assertEqual(tui.band(Reading("gpu", "fan", 100.0, "%")), "normal")
|
||||
|
||||
def test_gpu_lost_is_crit(self):
|
||||
self.assertEqual(tui.band(Reading("gpu", "status", None, "", "query-timeout")), "crit")
|
||||
|
||||
def test_missing_value_is_na(self):
|
||||
self.assertEqual(tui.band(Reading("gpu", "power", None, "W")), "na")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -0,0 +1,64 @@
|
||||
"""Tests for the M13 updater: install detection + routing the update to the right method."""
|
||||
|
||||
import unittest
|
||||
from unittest import mock
|
||||
|
||||
from rigdoctor.core import updates
|
||||
|
||||
|
||||
class InstallKindTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
updates.install_kind.cache_clear()
|
||||
|
||||
def tearDown(self):
|
||||
updates.install_kind.cache_clear()
|
||||
|
||||
def test_apt_when_dpkg_owns_the_package(self):
|
||||
with mock.patch.object(updates, "_dpkg_owns", return_value=True):
|
||||
self.assertEqual(updates.install_kind(), "apt")
|
||||
|
||||
def test_pip_when_running_in_a_venv(self):
|
||||
with mock.patch.object(updates, "_dpkg_owns", return_value=False), \
|
||||
mock.patch.object(updates.sys, "prefix", "/opt/venv"), \
|
||||
mock.patch.object(updates.sys, "base_prefix", "/usr"):
|
||||
self.assertEqual(updates.install_kind(), "pip")
|
||||
|
||||
|
||||
class ApplyUpdateRoutingTests(unittest.TestCase):
|
||||
def test_apt_returns_guidance_and_never_runs_pip(self):
|
||||
with mock.patch.object(updates, "install_kind", return_value="apt"), \
|
||||
mock.patch("subprocess.run") as run:
|
||||
rc, out = updates.apply_update("v9.9.9")
|
||||
self.assertEqual(rc, 1)
|
||||
self.assertIn("apt install --only-upgrade", out)
|
||||
run.assert_not_called()
|
||||
|
||||
def test_dev_returns_guidance_and_never_runs_pip(self):
|
||||
with mock.patch.object(updates, "install_kind", return_value="dev"), \
|
||||
mock.patch("subprocess.run") as run:
|
||||
rc, out = updates.apply_update("v9.9.9")
|
||||
self.assertIn("git pull", out)
|
||||
run.assert_not_called()
|
||||
|
||||
def test_pip_install_runs_pip(self):
|
||||
proc = mock.Mock(returncode=0, stdout="Successfully installed", stderr="")
|
||||
with mock.patch.object(updates, "install_kind", return_value="pip"), \
|
||||
mock.patch.object(updates, "load_token", return_value="TOK"), \
|
||||
mock.patch("subprocess.run", return_value=proc) as run:
|
||||
rc, _out = updates.apply_update("v1.2.3")
|
||||
self.assertEqual(rc, 0)
|
||||
cmd = run.call_args[0][0]
|
||||
self.assertIn("pip", cmd)
|
||||
self.assertIn("install", cmd)
|
||||
|
||||
|
||||
class UpdateHintTests(unittest.TestCase):
|
||||
def test_apt_hint_names_the_apt_command(self):
|
||||
self.assertIn("apt install --only-upgrade rigdoctor", updates.update_hint("apt"))
|
||||
|
||||
def test_dev_hint_says_git_pull(self):
|
||||
self.assertIn("git pull", updates.update_hint("dev"))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -0,0 +1,69 @@
|
||||
"""Tests for the M9/D12 game-launch watcher (RunningAppID parse + transitions)."""
|
||||
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
from rigdoctor.core import watcher
|
||||
|
||||
_REGISTRY = """"Registry"
|
||||
{
|
||||
\t"HKCU"
|
||||
\t{
|
||||
\t\t"Software"
|
||||
\t\t{
|
||||
\t\t\t"Valve"
|
||||
\t\t\t{
|
||||
\t\t\t\t"Steam"
|
||||
\t\t\t\t{
|
||||
\t\t\t\t\t"RunningAppID"\t\t"%s"
|
||||
\t\t\t\t}
|
||||
\t\t\t}
|
||||
\t\t}
|
||||
\t}
|
||||
}
|
||||
"""
|
||||
|
||||
|
||||
class TransitionTests(unittest.TestCase):
|
||||
def test_transitions(self):
|
||||
self.assertEqual(watcher.transition(0, 570), "start")
|
||||
self.assertEqual(watcher.transition(570, 0), "stop")
|
||||
self.assertIsNone(watcher.transition(570, 570))
|
||||
self.assertIsNone(watcher.transition(0, 0))
|
||||
|
||||
|
||||
class FindKeyTests(unittest.TestCase):
|
||||
def test_case_insensitive_nested(self):
|
||||
data = {"Registry": {"HKCU": {"steam": {"runningappid": "42"}}}}
|
||||
self.assertEqual(watcher._find_key(data, "RunningAppID"), "42")
|
||||
|
||||
def test_missing(self):
|
||||
self.assertIsNone(watcher._find_key({"a": {"b": "c"}}, "RunningAppID"))
|
||||
|
||||
|
||||
class RunningAppIdTests(unittest.TestCase):
|
||||
def _with_registry(self, content):
|
||||
d = tempfile.mkdtemp()
|
||||
path = Path(d) / "registry.vdf"
|
||||
path.write_text(content)
|
||||
return path
|
||||
|
||||
def test_reads_running_appid(self):
|
||||
path = self._with_registry(_REGISTRY % "570")
|
||||
with mock.patch.object(watcher, "_registry_path", return_value=path):
|
||||
self.assertEqual(watcher.running_appid(), 570)
|
||||
|
||||
def test_zero_when_idle(self):
|
||||
path = self._with_registry(_REGISTRY % "0")
|
||||
with mock.patch.object(watcher, "_registry_path", return_value=path):
|
||||
self.assertEqual(watcher.running_appid(), 0)
|
||||
|
||||
def test_zero_when_no_registry(self):
|
||||
with mock.patch.object(watcher, "_registry_path", return_value=None):
|
||||
self.assertEqual(watcher.running_appid(), 0)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -0,0 +1,68 @@
|
||||
"""Tests for the D12 Steam-launch wrapper (rigdoctor wrap %command%)."""
|
||||
|
||||
import unittest
|
||||
from unittest import mock
|
||||
|
||||
from rigdoctor.core import wrap
|
||||
from rigdoctor.core.steam import Game
|
||||
|
||||
|
||||
class LaunchOptionTests(unittest.TestCase):
|
||||
def test_format(self):
|
||||
opt = wrap.launch_option()
|
||||
self.assertTrue(opt.endswith("wrap %command%"))
|
||||
self.assertIn("rigdoctor", opt)
|
||||
|
||||
|
||||
class GameNameTests(unittest.TestCase):
|
||||
def test_resolves_from_steam_appid(self):
|
||||
g = Game(appid="570", name="Dota 2", library="/x", installdir="dota")
|
||||
with mock.patch.dict("os.environ", {"SteamAppId": "570"}), \
|
||||
mock.patch("rigdoctor.core.steam.cached_games", return_value=[g]):
|
||||
self.assertEqual(wrap.game_name_from_env(), "Dota 2")
|
||||
|
||||
def test_unknown_appid_falls_back(self):
|
||||
with mock.patch.dict("os.environ", {"SteamAppId": "999"}), \
|
||||
mock.patch("rigdoctor.core.steam.cached_games", return_value=[]), \
|
||||
mock.patch("rigdoctor.core.steam.scan_games", return_value=[]):
|
||||
self.assertEqual(wrap.game_name_from_env(), "Steam app 999")
|
||||
|
||||
def test_none_without_steam_env(self):
|
||||
with mock.patch.dict("os.environ", {}, clear=True):
|
||||
self.assertIsNone(wrap.game_name_from_env())
|
||||
|
||||
|
||||
class RunTests(unittest.TestCase):
|
||||
def test_brackets_capture_and_returns_exit_code(self):
|
||||
with mock.patch("rigdoctor.core.reccontrol.running_pid", return_value=None), \
|
||||
mock.patch("rigdoctor.core.diagnostic.start", return_value=123) as start, \
|
||||
mock.patch("rigdoctor.core.reccontrol.stop_background") as stop, \
|
||||
mock.patch.dict("os.environ", {}, clear=True):
|
||||
rc = wrap.run(["true"])
|
||||
self.assertEqual(rc, 0)
|
||||
start.assert_called_once()
|
||||
stop.assert_called_once()
|
||||
|
||||
def test_propagates_game_failure(self):
|
||||
with mock.patch("rigdoctor.core.reccontrol.running_pid", return_value=None), \
|
||||
mock.patch("rigdoctor.core.diagnostic.start", return_value=123), \
|
||||
mock.patch("rigdoctor.core.reccontrol.stop_background"), \
|
||||
mock.patch.dict("os.environ", {}, clear=True):
|
||||
self.assertEqual(wrap.run(["false"]), 1)
|
||||
|
||||
def test_does_not_touch_an_existing_capture(self):
|
||||
with mock.patch("rigdoctor.core.reccontrol.running_pid", return_value=999), \
|
||||
mock.patch("rigdoctor.core.diagnostic.start") as start, \
|
||||
mock.patch("rigdoctor.core.reccontrol.stop_background") as stop, \
|
||||
mock.patch.dict("os.environ", {}, clear=True):
|
||||
rc = wrap.run(["true"])
|
||||
self.assertEqual(rc, 0)
|
||||
start.assert_not_called()
|
||||
stop.assert_not_called()
|
||||
|
||||
def test_empty_command_is_usage_error(self):
|
||||
self.assertEqual(wrap.run([]), 2)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||