Files
rigdoctor/src/rigdoctor/core/gameenv.py
T
jessey 9c30c9824e feat(m6): one-click install + apply controls on Environment page — 0.10.0
Make the environment report actionable, not just advisory.

Install (reuses M9 installer):
- Add GameMode, MangoHud, cpupower to the component catalog (so they also show
  on the Setup page); catalog.by_id() lookup.
- "tool not installed" findings (GameMode/MangoHud) get an Install button.

Apply runtime-reversible tunables (D22, realizing the D9 consent-gated milestone):
- core/fixes.py: dropdown of live options + Apply for CPU governor, NVIDIA
  persistence, PCIe ASPM policy, vm.swappiness, THP. One pkexec command each,
  no reboot, reverts on reboot; chosen value validated against live options;
  writes go to sysfs/procfs/nvidia-smi, never GRUB. GRUB/mitigations stay
  suggestion-only.
- Finding gained optional action (install) + fix (apply) ids; shared
  finding_card renders the matching control; Environment page wires both and
  re-checks after a change.

Tests for fixes (parse, command builders, value validation, gameenv wiring).
Docs: D22 added (amends D9); SPEC/MODULES/ROADMAP updated. 0.9.0 -> 0.10.0.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-22 08:05:03 +02:00

272 lines
10 KiB
Python

"""Gaming environment checks (M6): evaluate system settings that affect gaming
stability/performance and suggest the fix command — read-only (D9).
Stdlib-only. Each check degrades gracefully (a missing file/tool yields no finding or an
info finding, never an exception). The pure ``evaluate_*`` helpers are split from the IO
that reads sysfs / runs tools, so they're unit-testable.
Several checks target the seed case directly: an RTX 3070 falling off the PCIe bus under
load (Xid 79). PCIe ASPM power-saving, NVIDIA persistence mode, and a power-saving CPU
governor are the usual contributors to that class of drop-off / stutter.
"""
from __future__ import annotations
import os
import re
import shutil
import subprocess
from pathlib import Path
from .health import INFO, OK, WARNING, Finding
_ORDER = {"critical": 0, WARNING: 1, INFO: 2, OK: 3}
def _read(path: str) -> str | None:
try:
return Path(path).read_text()
except OSError:
return None
# --- PCIe ASPM (seed-case relevant) ---------------------------------------------------
def _active_aspm(policy_text: str) -> str | None:
"""The active ASPM policy is the bracketed token, e.g. '[default] performance ...'."""
m = re.search(r"\[(\w+)\]", policy_text)
return m.group(1) if m else None
def evaluate_aspm(policy_text: str | None) -> Finding | None:
if not policy_text:
return None
active = _active_aspm(policy_text)
if active is None:
return None
if active in ("powersave", "powersupersave"):
return Finding(
WARNING, "PCIe", f"PCIe ASPM is in power-saving mode ({active})",
"Aggressive PCIe Active-State Power Management can cause the GPU to drop off the "
"bus under load (Xid 79) or stutter — the seed-case failure mode.",
"Set the policy to performance below (live), or for a permanent change add "
"`pcie_aspm=off` in GRUB, then `sudo update-grub` and reboot.",
fix="pcie_aspm",
)
if active == "performance":
return Finding(OK, "PCIe", "PCIe ASPM set to performance", "ASPM power-saving is disabled.",
fix="pcie_aspm")
return Finding(
INFO, "PCIe", f"PCIe ASPM policy: {active}",
"ASPM is left to the kernel/BIOS default.",
"If you see GPU bus-drop events (Xid 79), set the policy to performance below.",
fix="pcie_aspm",
)
def check_pcie_aspm() -> list[Finding]:
f = evaluate_aspm(_read("/sys/module/pcie_aspm/parameters/policy"))
return [f] if f else []
# --- NVIDIA persistence mode (seed-case relevant) -------------------------------------
def check_gpu_persistence() -> list[Finding]:
if shutil.which("nvidia-smi") is None:
return []
try:
proc = subprocess.run(
["nvidia-smi", "--query-gpu=persistence_mode", "--format=csv,noheader"],
capture_output=True, text=True, timeout=10,
)
except (subprocess.SubprocessError, OSError):
return []
state = proc.stdout.strip().splitlines()[0].strip() if proc.stdout.strip() else ""
if state.lower().startswith("disabled"):
return [Finding(
INFO, "GPU", "NVIDIA persistence mode is off",
"The driver unloads when no client is attached, adding latency on first GPU "
"access and churning state between game launches.",
"Enable it below (per-boot), or enable the `nvidia-persistenced` service to "
"make it permanent.",
fix="nvidia_persistence",
)]
if state.lower().startswith("enabled"):
return [Finding(OK, "GPU", "NVIDIA persistence mode on", "The driver stays resident.",
fix="nvidia_persistence")]
return []
# --- CPU governor ---------------------------------------------------------------------
def evaluate_governor(governors: set[str]) -> Finding | None:
if not governors:
return None
shown = ", ".join(sorted(governors))
if governors == {"performance"}:
return Finding(OK, "CPU", "CPU governor: performance", "CPUs run at full clocks under load.",
fix="cpu_governor")
if "powersave" in governors:
return Finding(
WARNING, "CPU", f"CPU governor set to power-saving ({shown})",
"A powersave governor caps CPU frequency and can bottleneck frame times.",
"Set it to performance below (or install GameMode to switch it per-game).",
fix="cpu_governor",
)
return Finding(
INFO, "CPU", f"CPU governor: {shown}",
"A dynamic governor scales with load; usually fine.",
"For the most consistent frame pacing, set performance below (or use GameMode).",
fix="cpu_governor",
)
def check_cpu_governor() -> list[Finding]:
govs: set[str] = set()
for p in Path("/sys/devices/system/cpu").glob("cpu*/cpufreq/scaling_governor"):
text = _read(str(p))
if text and text.strip():
govs.add(text.strip())
f = evaluate_governor(govs)
return [f] if f else []
# --- GameMode / MangoHud --------------------------------------------------------------
def check_gamemode() -> list[Finding]:
if shutil.which("gamemoderun") or shutil.which("gamemoded"):
return [Finding(
OK, "Tools", "Feral GameMode installed",
"GameMode can apply the performance governor and other tweaks while a game runs.",
)]
return [Finding(
INFO, "Tools", "GameMode not installed",
"GameMode auto-applies performance tweaks (governor, scheduling) for the duration of a game.",
"Install it: `sudo apt install gamemode`, then launch games with `gamemoderun %command%` "
"(or use a global Steam launch option).",
action="gamemode",
)]
def check_mangohud() -> list[Finding]:
if shutil.which("mangohud"):
return [Finding(OK, "Tools", "MangoHud available", "In-game FPS/temps/frametime overlay is installed.")]
return [Finding(
INFO, "Tools", "MangoHud not installed",
"MangoHud overlays live FPS, frame times, and temps in-game — handy for spotting stutter.",
"Install it: `sudo apt install mangohud`, then launch with `mangohud %command%`.",
action="mangohud",
)]
# --- vm.swappiness --------------------------------------------------------------------
def evaluate_swappiness(value: int) -> Finding:
if value > 10:
return Finding(
INFO, "Memory", f"vm.swappiness is high ({value})",
"A high swappiness lets the kernel swap out memory eagerly, which can cause "
"hitching during gaming on systems with ample RAM.",
"Lower it below (e.g. 10); applies immediately.",
fix="swappiness",
)
return Finding(OK, "Memory", f"vm.swappiness is {value}", "Swapping is conservative.",
fix="swappiness")
def check_swappiness() -> list[Finding]:
text = _read("/proc/sys/vm/swappiness")
if text is None or not text.strip().isdigit():
return []
return [evaluate_swappiness(int(text.strip()))]
# --- shader cache ---------------------------------------------------------------------
def evaluate_shader_cache(env: dict) -> Finding:
disabled = (
env.get("__GL_SHADER_DISK_CACHE") == "0"
or env.get("MESA_SHADER_CACHE_DISABLE", "").lower() in ("1", "true")
or env.get("MESA_GLSL_CACHE_DISABLE", "").lower() in ("1", "true")
)
if disabled:
return Finding(
WARNING, "GPU", "Shader disk cache is disabled",
"With the shader cache off, shaders recompile every run — a common cause of "
"in-game stutter, especially on first encounters.",
"Unset the disabling variable (e.g. remove `__GL_SHADER_DISK_CACHE=0` / "
"`MESA_SHADER_CACHE_DISABLE`) from your environment / launch options.",
)
return Finding(OK, "GPU", "Shader disk cache enabled", "Compiled shaders are cached between runs (default).")
def check_shader_cache() -> list[Finding]:
return [evaluate_shader_cache(os.environ)]
# --- transparent hugepages / CPU mitigations (only when notable) ----------------------
def check_thp() -> list[Finding]:
text = _read("/sys/kernel/mm/transparent_hugepage/enabled")
if not text:
return []
active = _active_aspm(text) # same '[token]' format
if active == "never":
return [Finding(
INFO, "Memory", "Transparent HugePages disabled (never)",
"Some workloads benefit from THP; 'madvise' lets apps opt in without the downsides of 'always'.",
"Optional: set 'madvise' below; applies immediately.",
fix="thp",
)]
return []
def check_mitigations() -> list[Finding]:
cmdline = _read("/proc/cmdline") or ""
if "mitigations=off" in cmdline:
return [Finding(
INFO, "CPU", "CPU security mitigations are disabled",
"`mitigations=off` recovers some CPU performance at the cost of CPU-vulnerability "
"protections — a deliberate trade-off, noted here for awareness.",
"Remove `mitigations=off` from the kernel cmdline to restore protections.",
)]
return []
# --- Proton versions (informational) --------------------------------------------------
def check_proton() -> list[Finding]:
from . import steam
try:
versions = steam.proton_versions()
except Exception:
versions = []
if not versions:
return []
return [Finding(
INFO, "Tools", f"Proton: {len(versions)} version(s) installed",
", ".join(versions),
"Steam picks the Proton version per game (Properties → Compatibility); "
"Proton Experimental often has the latest fixes.",
)]
# --- aggregate ------------------------------------------------------------------------
def run_gameenv_checks() -> list[Finding]:
"""Run all environment checks, sorted by severity (worst first)."""
findings: list[Finding] = []
findings += check_pcie_aspm()
findings += check_gpu_persistence()
findings += check_cpu_governor()
findings += check_gamemode()
findings += check_mangohud()
findings += check_swappiness()
findings += check_shader_cache()
findings += check_thp()
findings += check_mitigations()
findings += check_proton()
findings.sort(key=lambda f: _ORDER.get(f.severity, 9))
return findings