feat: gaming environment checks engine (M6) + notification icon — 0.9.0
The evaluate-and-suggest half of M6: a read-only findings report (D9) over system settings that affect gaming stability/performance, each with the exact fix command. - core/gameenv.py: PCIe ASPM, NVIDIA persistence mode, CPU governor (the three seed-case contributors to GPU bus-drop / Xid 79), GameMode, MangoHud, vm.swappiness, shader disk cache, THP, CPU mitigations, Proton versions. Pure evaluate_* helpers split from IO for testing; reuses the M4 Finding model. - steam.proton_versions(): surfaces installed Proton builds for the report. - CLI: rigdoctor gameenv (text / --json); render_health() gained a title arg. - GUI: new Environment page; extracted a shared finding_card widget and switched the Health page to it. - Tests for the pure evaluators + aggregate. Also fix: desktop notifications now use the RigDoctor icon (installed theme copy -> bundled asset -> stock fallback) instead of a generic stock icon, matching the app/dock icon. Docs (MODULES/ROADMAP) updated; version 0.8.0 -> 0.9.0. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,259 @@
|
||||
"""Gaming environment checks (M6): evaluate system settings that affect gaming
|
||||
stability/performance and suggest the fix command — read-only (D9).
|
||||
|
||||
Stdlib-only. Each check degrades gracefully (a missing file/tool yields no finding or an
|
||||
info finding, never an exception). The pure ``evaluate_*`` helpers are split from the IO
|
||||
that reads sysfs / runs tools, so they're unit-testable.
|
||||
|
||||
Several checks target the seed case directly: an RTX 3070 falling off the PCIe bus under
|
||||
load (Xid 79). PCIe ASPM power-saving, NVIDIA persistence mode, and a power-saving CPU
|
||||
governor are the usual contributors to that class of drop-off / stutter.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
from .health import INFO, OK, WARNING, Finding
|
||||
|
||||
_ORDER = {"critical": 0, WARNING: 1, INFO: 2, OK: 3}
|
||||
|
||||
|
||||
def _read(path: str) -> str | None:
|
||||
try:
|
||||
return Path(path).read_text()
|
||||
except OSError:
|
||||
return None
|
||||
|
||||
|
||||
# --- PCIe ASPM (seed-case relevant) ---------------------------------------------------
|
||||
|
||||
def _active_aspm(policy_text: str) -> str | None:
|
||||
"""The active ASPM policy is the bracketed token, e.g. '[default] performance ...'."""
|
||||
m = re.search(r"\[(\w+)\]", policy_text)
|
||||
return m.group(1) if m else None
|
||||
|
||||
|
||||
def evaluate_aspm(policy_text: str | None) -> Finding | None:
|
||||
if not policy_text:
|
||||
return None
|
||||
active = _active_aspm(policy_text)
|
||||
if active is None:
|
||||
return None
|
||||
if active in ("powersave", "powersupersave"):
|
||||
return Finding(
|
||||
WARNING, "PCIe", f"PCIe ASPM is in power-saving mode ({active})",
|
||||
"Aggressive PCIe Active-State Power Management can cause the GPU to drop off the "
|
||||
"bus under load (Xid 79) or stutter — the seed-case failure mode.",
|
||||
"Disable ASPM via the kernel cmdline: add `pcie_aspm=off` (and optionally "
|
||||
"`pcie_aspm.policy=performance`) in GRUB, then `sudo update-grub` and reboot.",
|
||||
)
|
||||
if active == "performance":
|
||||
return Finding(OK, "PCIe", "PCIe ASPM set to performance", "ASPM power-saving is disabled.")
|
||||
return Finding(
|
||||
INFO, "PCIe", f"PCIe ASPM policy: {active}",
|
||||
"ASPM is left to the kernel/BIOS default.",
|
||||
"If you see GPU bus-drop events (Xid 79), try `pcie_aspm=off` on the kernel cmdline.",
|
||||
)
|
||||
|
||||
|
||||
def check_pcie_aspm() -> list[Finding]:
|
||||
f = evaluate_aspm(_read("/sys/module/pcie_aspm/parameters/policy"))
|
||||
return [f] if f else []
|
||||
|
||||
|
||||
# --- NVIDIA persistence mode (seed-case relevant) -------------------------------------
|
||||
|
||||
def check_gpu_persistence() -> list[Finding]:
|
||||
if shutil.which("nvidia-smi") is None:
|
||||
return []
|
||||
try:
|
||||
proc = subprocess.run(
|
||||
["nvidia-smi", "--query-gpu=persistence_mode", "--format=csv,noheader"],
|
||||
capture_output=True, text=True, timeout=10,
|
||||
)
|
||||
except (subprocess.SubprocessError, OSError):
|
||||
return []
|
||||
state = proc.stdout.strip().splitlines()[0].strip() if proc.stdout.strip() else ""
|
||||
if state.lower().startswith("disabled"):
|
||||
return [Finding(
|
||||
INFO, "GPU", "NVIDIA persistence mode is off",
|
||||
"The driver unloads when no client is attached, adding latency on first GPU "
|
||||
"access and churning state between game launches.",
|
||||
"Enable it: `sudo nvidia-smi -pm 1` (per-boot), or enable the "
|
||||
"`nvidia-persistenced` service to make it permanent.",
|
||||
)]
|
||||
if state.lower().startswith("enabled"):
|
||||
return [Finding(OK, "GPU", "NVIDIA persistence mode on", "The driver stays resident.")]
|
||||
return []
|
||||
|
||||
|
||||
# --- CPU governor ---------------------------------------------------------------------
|
||||
|
||||
def evaluate_governor(governors: set[str]) -> Finding | None:
|
||||
if not governors:
|
||||
return None
|
||||
shown = ", ".join(sorted(governors))
|
||||
if governors == {"performance"}:
|
||||
return Finding(OK, "CPU", "CPU governor: performance", "CPUs run at full clocks under load.")
|
||||
if "powersave" in governors:
|
||||
return Finding(
|
||||
WARNING, "CPU", f"CPU governor set to power-saving ({shown})",
|
||||
"A powersave governor caps CPU frequency and can bottleneck frame times.",
|
||||
"Set performance: `sudo cpupower frequency-set -g performance` "
|
||||
"(install `linux-tools-common`/`cpupower`), or install GameMode to switch it per-game.",
|
||||
)
|
||||
return Finding(
|
||||
INFO, "CPU", f"CPU governor: {shown}",
|
||||
"A dynamic governor scales with load; usually fine.",
|
||||
"For the most consistent frame pacing, `performance` (or GameMode) avoids ramp-up lag.",
|
||||
)
|
||||
|
||||
|
||||
def check_cpu_governor() -> list[Finding]:
|
||||
govs: set[str] = set()
|
||||
for p in Path("/sys/devices/system/cpu").glob("cpu*/cpufreq/scaling_governor"):
|
||||
text = _read(str(p))
|
||||
if text and text.strip():
|
||||
govs.add(text.strip())
|
||||
f = evaluate_governor(govs)
|
||||
return [f] if f else []
|
||||
|
||||
|
||||
# --- GameMode / MangoHud --------------------------------------------------------------
|
||||
|
||||
def check_gamemode() -> list[Finding]:
|
||||
if shutil.which("gamemoderun") or shutil.which("gamemoded"):
|
||||
return [Finding(
|
||||
OK, "Tools", "Feral GameMode installed",
|
||||
"GameMode can apply the performance governor and other tweaks while a game runs.",
|
||||
)]
|
||||
return [Finding(
|
||||
INFO, "Tools", "GameMode not installed",
|
||||
"GameMode auto-applies performance tweaks (governor, scheduling) for the duration of a game.",
|
||||
"Install it: `sudo apt install gamemode`, then launch games with `gamemoderun %command%` "
|
||||
"(or use a global Steam launch option).",
|
||||
)]
|
||||
|
||||
|
||||
def check_mangohud() -> list[Finding]:
|
||||
if shutil.which("mangohud"):
|
||||
return [Finding(OK, "Tools", "MangoHud available", "In-game FPS/temps/frametime overlay is installed.")]
|
||||
return [Finding(
|
||||
INFO, "Tools", "MangoHud not installed",
|
||||
"MangoHud overlays live FPS, frame times, and temps in-game — handy for spotting stutter.",
|
||||
"Install it: `sudo apt install mangohud`, then launch with `mangohud %command%`.",
|
||||
)]
|
||||
|
||||
|
||||
# --- vm.swappiness --------------------------------------------------------------------
|
||||
|
||||
def evaluate_swappiness(value: int) -> Finding:
|
||||
if value > 10:
|
||||
return Finding(
|
||||
INFO, "Memory", f"vm.swappiness is high ({value})",
|
||||
"A high swappiness lets the kernel swap out memory eagerly, which can cause "
|
||||
"hitching during gaming on systems with ample RAM.",
|
||||
"Lower it: `sudo sysctl vm.swappiness=10` (persist in /etc/sysctl.d/99-rigdoctor.conf).",
|
||||
)
|
||||
return Finding(OK, "Memory", f"vm.swappiness is {value}", "Swapping is conservative.")
|
||||
|
||||
|
||||
def check_swappiness() -> list[Finding]:
|
||||
text = _read("/proc/sys/vm/swappiness")
|
||||
if text is None or not text.strip().isdigit():
|
||||
return []
|
||||
return [evaluate_swappiness(int(text.strip()))]
|
||||
|
||||
|
||||
# --- shader cache ---------------------------------------------------------------------
|
||||
|
||||
def evaluate_shader_cache(env: dict) -> Finding:
|
||||
disabled = (
|
||||
env.get("__GL_SHADER_DISK_CACHE") == "0"
|
||||
or env.get("MESA_SHADER_CACHE_DISABLE", "").lower() in ("1", "true")
|
||||
or env.get("MESA_GLSL_CACHE_DISABLE", "").lower() in ("1", "true")
|
||||
)
|
||||
if disabled:
|
||||
return Finding(
|
||||
WARNING, "GPU", "Shader disk cache is disabled",
|
||||
"With the shader cache off, shaders recompile every run — a common cause of "
|
||||
"in-game stutter, especially on first encounters.",
|
||||
"Unset the disabling variable (e.g. remove `__GL_SHADER_DISK_CACHE=0` / "
|
||||
"`MESA_SHADER_CACHE_DISABLE`) from your environment / launch options.",
|
||||
)
|
||||
return Finding(OK, "GPU", "Shader disk cache enabled", "Compiled shaders are cached between runs (default).")
|
||||
|
||||
|
||||
def check_shader_cache() -> list[Finding]:
|
||||
return [evaluate_shader_cache(os.environ)]
|
||||
|
||||
|
||||
# --- transparent hugepages / CPU mitigations (only when notable) ----------------------
|
||||
|
||||
def check_thp() -> list[Finding]:
|
||||
text = _read("/sys/kernel/mm/transparent_hugepage/enabled")
|
||||
if not text:
|
||||
return []
|
||||
active = _active_aspm(text) # same '[token]' format
|
||||
if active == "never":
|
||||
return [Finding(
|
||||
INFO, "Memory", "Transparent HugePages disabled (never)",
|
||||
"Some workloads benefit from THP; 'madvise' lets apps opt in without the downsides of 'always'.",
|
||||
"Optional: `echo madvise | sudo tee /sys/kernel/mm/transparent_hugepage/enabled`.",
|
||||
)]
|
||||
return []
|
||||
|
||||
|
||||
def check_mitigations() -> list[Finding]:
|
||||
cmdline = _read("/proc/cmdline") or ""
|
||||
if "mitigations=off" in cmdline:
|
||||
return [Finding(
|
||||
INFO, "CPU", "CPU security mitigations are disabled",
|
||||
"`mitigations=off` recovers some CPU performance at the cost of CPU-vulnerability "
|
||||
"protections — a deliberate trade-off, noted here for awareness.",
|
||||
"Remove `mitigations=off` from the kernel cmdline to restore protections.",
|
||||
)]
|
||||
return []
|
||||
|
||||
|
||||
# --- Proton versions (informational) --------------------------------------------------
|
||||
|
||||
def check_proton() -> list[Finding]:
|
||||
from . import steam
|
||||
|
||||
try:
|
||||
versions = steam.proton_versions()
|
||||
except Exception:
|
||||
versions = []
|
||||
if not versions:
|
||||
return []
|
||||
return [Finding(
|
||||
INFO, "Tools", f"Proton: {len(versions)} version(s) installed",
|
||||
", ".join(versions),
|
||||
"Steam picks the Proton version per game (Properties → Compatibility); "
|
||||
"Proton Experimental often has the latest fixes.",
|
||||
)]
|
||||
|
||||
|
||||
# --- aggregate ------------------------------------------------------------------------
|
||||
|
||||
def run_gameenv_checks() -> list[Finding]:
|
||||
"""Run all environment checks, sorted by severity (worst first)."""
|
||||
findings: list[Finding] = []
|
||||
findings += check_pcie_aspm()
|
||||
findings += check_gpu_persistence()
|
||||
findings += check_cpu_governor()
|
||||
findings += check_gamemode()
|
||||
findings += check_mangohud()
|
||||
findings += check_swappiness()
|
||||
findings += check_shader_cache()
|
||||
findings += check_thp()
|
||||
findings += check_mitigations()
|
||||
findings += check_proton()
|
||||
findings.sort(key=lambda f: _ORDER.get(f.severity, 9))
|
||||
return findings
|
||||
Reference in New Issue
Block a user