feat: gaming environment checks engine (M6) + notification icon — 0.9.0

The evaluate-and-suggest half of M6: a read-only findings report (D9) over
system settings that affect gaming stability/performance, each with the exact
fix command.

- core/gameenv.py: PCIe ASPM, NVIDIA persistence mode, CPU governor (the three
  seed-case contributors to GPU bus-drop / Xid 79), GameMode, MangoHud,
  vm.swappiness, shader disk cache, THP, CPU mitigations, Proton versions.
  Pure evaluate_* helpers split from IO for testing; reuses the M4 Finding model.
- steam.proton_versions(): surfaces installed Proton builds for the report.
- CLI: rigdoctor gameenv (text / --json); render_health() gained a title arg.
- GUI: new Environment page; extracted a shared finding_card widget and switched
  the Health page to it.
- Tests for the pure evaluators + aggregate.

Also fix: desktop notifications now use the RigDoctor icon (installed theme copy
-> bundled asset -> stock fallback) instead of a generic stock icon, matching
the app/dock icon.

Docs (MODULES/ROADMAP) updated; version 0.8.0 -> 0.9.0.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-22 07:53:06 +02:00
parent 0642eb4712
commit 29f4a45df8
15 changed files with 569 additions and 51 deletions
+259
View File
@@ -0,0 +1,259 @@
"""Gaming environment checks (M6): evaluate system settings that affect gaming
stability/performance and suggest the fix command — read-only (D9).
Stdlib-only. Each check degrades gracefully (a missing file/tool yields no finding or an
info finding, never an exception). The pure ``evaluate_*`` helpers are split from the IO
that reads sysfs / runs tools, so they're unit-testable.
Several checks target the seed case directly: an RTX 3070 falling off the PCIe bus under
load (Xid 79). PCIe ASPM power-saving, NVIDIA persistence mode, and a power-saving CPU
governor are the usual contributors to that class of drop-off / stutter.
"""
from __future__ import annotations
import os
import re
import shutil
import subprocess
from pathlib import Path
from .health import INFO, OK, WARNING, Finding
_ORDER = {"critical": 0, WARNING: 1, INFO: 2, OK: 3}
def _read(path: str) -> str | None:
try:
return Path(path).read_text()
except OSError:
return None
# --- PCIe ASPM (seed-case relevant) ---------------------------------------------------
def _active_aspm(policy_text: str) -> str | None:
"""The active ASPM policy is the bracketed token, e.g. '[default] performance ...'."""
m = re.search(r"\[(\w+)\]", policy_text)
return m.group(1) if m else None
def evaluate_aspm(policy_text: str | None) -> Finding | None:
if not policy_text:
return None
active = _active_aspm(policy_text)
if active is None:
return None
if active in ("powersave", "powersupersave"):
return Finding(
WARNING, "PCIe", f"PCIe ASPM is in power-saving mode ({active})",
"Aggressive PCIe Active-State Power Management can cause the GPU to drop off the "
"bus under load (Xid 79) or stutter — the seed-case failure mode.",
"Disable ASPM via the kernel cmdline: add `pcie_aspm=off` (and optionally "
"`pcie_aspm.policy=performance`) in GRUB, then `sudo update-grub` and reboot.",
)
if active == "performance":
return Finding(OK, "PCIe", "PCIe ASPM set to performance", "ASPM power-saving is disabled.")
return Finding(
INFO, "PCIe", f"PCIe ASPM policy: {active}",
"ASPM is left to the kernel/BIOS default.",
"If you see GPU bus-drop events (Xid 79), try `pcie_aspm=off` on the kernel cmdline.",
)
def check_pcie_aspm() -> list[Finding]:
f = evaluate_aspm(_read("/sys/module/pcie_aspm/parameters/policy"))
return [f] if f else []
# --- NVIDIA persistence mode (seed-case relevant) -------------------------------------
def check_gpu_persistence() -> list[Finding]:
if shutil.which("nvidia-smi") is None:
return []
try:
proc = subprocess.run(
["nvidia-smi", "--query-gpu=persistence_mode", "--format=csv,noheader"],
capture_output=True, text=True, timeout=10,
)
except (subprocess.SubprocessError, OSError):
return []
state = proc.stdout.strip().splitlines()[0].strip() if proc.stdout.strip() else ""
if state.lower().startswith("disabled"):
return [Finding(
INFO, "GPU", "NVIDIA persistence mode is off",
"The driver unloads when no client is attached, adding latency on first GPU "
"access and churning state between game launches.",
"Enable it: `sudo nvidia-smi -pm 1` (per-boot), or enable the "
"`nvidia-persistenced` service to make it permanent.",
)]
if state.lower().startswith("enabled"):
return [Finding(OK, "GPU", "NVIDIA persistence mode on", "The driver stays resident.")]
return []
# --- CPU governor ---------------------------------------------------------------------
def evaluate_governor(governors: set[str]) -> Finding | None:
if not governors:
return None
shown = ", ".join(sorted(governors))
if governors == {"performance"}:
return Finding(OK, "CPU", "CPU governor: performance", "CPUs run at full clocks under load.")
if "powersave" in governors:
return Finding(
WARNING, "CPU", f"CPU governor set to power-saving ({shown})",
"A powersave governor caps CPU frequency and can bottleneck frame times.",
"Set performance: `sudo cpupower frequency-set -g performance` "
"(install `linux-tools-common`/`cpupower`), or install GameMode to switch it per-game.",
)
return Finding(
INFO, "CPU", f"CPU governor: {shown}",
"A dynamic governor scales with load; usually fine.",
"For the most consistent frame pacing, `performance` (or GameMode) avoids ramp-up lag.",
)
def check_cpu_governor() -> list[Finding]:
govs: set[str] = set()
for p in Path("/sys/devices/system/cpu").glob("cpu*/cpufreq/scaling_governor"):
text = _read(str(p))
if text and text.strip():
govs.add(text.strip())
f = evaluate_governor(govs)
return [f] if f else []
# --- GameMode / MangoHud --------------------------------------------------------------
def check_gamemode() -> list[Finding]:
if shutil.which("gamemoderun") or shutil.which("gamemoded"):
return [Finding(
OK, "Tools", "Feral GameMode installed",
"GameMode can apply the performance governor and other tweaks while a game runs.",
)]
return [Finding(
INFO, "Tools", "GameMode not installed",
"GameMode auto-applies performance tweaks (governor, scheduling) for the duration of a game.",
"Install it: `sudo apt install gamemode`, then launch games with `gamemoderun %command%` "
"(or use a global Steam launch option).",
)]
def check_mangohud() -> list[Finding]:
if shutil.which("mangohud"):
return [Finding(OK, "Tools", "MangoHud available", "In-game FPS/temps/frametime overlay is installed.")]
return [Finding(
INFO, "Tools", "MangoHud not installed",
"MangoHud overlays live FPS, frame times, and temps in-game — handy for spotting stutter.",
"Install it: `sudo apt install mangohud`, then launch with `mangohud %command%`.",
)]
# --- vm.swappiness --------------------------------------------------------------------
def evaluate_swappiness(value: int) -> Finding:
if value > 10:
return Finding(
INFO, "Memory", f"vm.swappiness is high ({value})",
"A high swappiness lets the kernel swap out memory eagerly, which can cause "
"hitching during gaming on systems with ample RAM.",
"Lower it: `sudo sysctl vm.swappiness=10` (persist in /etc/sysctl.d/99-rigdoctor.conf).",
)
return Finding(OK, "Memory", f"vm.swappiness is {value}", "Swapping is conservative.")
def check_swappiness() -> list[Finding]:
text = _read("/proc/sys/vm/swappiness")
if text is None or not text.strip().isdigit():
return []
return [evaluate_swappiness(int(text.strip()))]
# --- shader cache ---------------------------------------------------------------------
def evaluate_shader_cache(env: dict) -> Finding:
disabled = (
env.get("__GL_SHADER_DISK_CACHE") == "0"
or env.get("MESA_SHADER_CACHE_DISABLE", "").lower() in ("1", "true")
or env.get("MESA_GLSL_CACHE_DISABLE", "").lower() in ("1", "true")
)
if disabled:
return Finding(
WARNING, "GPU", "Shader disk cache is disabled",
"With the shader cache off, shaders recompile every run — a common cause of "
"in-game stutter, especially on first encounters.",
"Unset the disabling variable (e.g. remove `__GL_SHADER_DISK_CACHE=0` / "
"`MESA_SHADER_CACHE_DISABLE`) from your environment / launch options.",
)
return Finding(OK, "GPU", "Shader disk cache enabled", "Compiled shaders are cached between runs (default).")
def check_shader_cache() -> list[Finding]:
return [evaluate_shader_cache(os.environ)]
# --- transparent hugepages / CPU mitigations (only when notable) ----------------------
def check_thp() -> list[Finding]:
text = _read("/sys/kernel/mm/transparent_hugepage/enabled")
if not text:
return []
active = _active_aspm(text) # same '[token]' format
if active == "never":
return [Finding(
INFO, "Memory", "Transparent HugePages disabled (never)",
"Some workloads benefit from THP; 'madvise' lets apps opt in without the downsides of 'always'.",
"Optional: `echo madvise | sudo tee /sys/kernel/mm/transparent_hugepage/enabled`.",
)]
return []
def check_mitigations() -> list[Finding]:
cmdline = _read("/proc/cmdline") or ""
if "mitigations=off" in cmdline:
return [Finding(
INFO, "CPU", "CPU security mitigations are disabled",
"`mitigations=off` recovers some CPU performance at the cost of CPU-vulnerability "
"protections — a deliberate trade-off, noted here for awareness.",
"Remove `mitigations=off` from the kernel cmdline to restore protections.",
)]
return []
# --- Proton versions (informational) --------------------------------------------------
def check_proton() -> list[Finding]:
from . import steam
try:
versions = steam.proton_versions()
except Exception:
versions = []
if not versions:
return []
return [Finding(
INFO, "Tools", f"Proton: {len(versions)} version(s) installed",
", ".join(versions),
"Steam picks the Proton version per game (Properties → Compatibility); "
"Proton Experimental often has the latest fixes.",
)]
# --- aggregate ------------------------------------------------------------------------
def run_gameenv_checks() -> list[Finding]:
"""Run all environment checks, sorted by severity (worst first)."""
findings: list[Finding] = []
findings += check_pcie_aspm()
findings += check_gpu_persistence()
findings += check_cpu_governor()
findings += check_gamemode()
findings += check_mangohud()
findings += check_swappiness()
findings += check_shader_cache()
findings += check_thp()
findings += check_mitigations()
findings += check_proton()
findings.sort(key=lambda f: _ORDER.get(f.severity, 9))
return findings