9c30c9824e
Make the environment report actionable, not just advisory. Install (reuses M9 installer): - Add GameMode, MangoHud, cpupower to the component catalog (so they also show on the Setup page); catalog.by_id() lookup. - "tool not installed" findings (GameMode/MangoHud) get an Install button. Apply runtime-reversible tunables (D22, realizing the D9 consent-gated milestone): - core/fixes.py: dropdown of live options + Apply for CPU governor, NVIDIA persistence, PCIe ASPM policy, vm.swappiness, THP. One pkexec command each, no reboot, reverts on reboot; chosen value validated against live options; writes go to sysfs/procfs/nvidia-smi, never GRUB. GRUB/mitigations stay suggestion-only. - Finding gained optional action (install) + fix (apply) ids; shared finding_card renders the matching control; Environment page wires both and re-checks after a change. Tests for fixes (parse, command builders, value validation, gameenv wiring). Docs: D22 added (amends D9); SPEC/MODULES/ROADMAP updated. 0.9.0 -> 0.10.0. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
272 lines
10 KiB
Python
272 lines
10 KiB
Python
"""Gaming environment checks (M6): evaluate system settings that affect gaming
|
|
stability/performance and suggest the fix command — read-only (D9).
|
|
|
|
Stdlib-only. Each check degrades gracefully (a missing file/tool yields no finding or an
|
|
info finding, never an exception). The pure ``evaluate_*`` helpers are split from the IO
|
|
that reads sysfs / runs tools, so they're unit-testable.
|
|
|
|
Several checks target the seed case directly: an RTX 3070 falling off the PCIe bus under
|
|
load (Xid 79). PCIe ASPM power-saving, NVIDIA persistence mode, and a power-saving CPU
|
|
governor are the usual contributors to that class of drop-off / stutter.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
import re
|
|
import shutil
|
|
import subprocess
|
|
from pathlib import Path
|
|
|
|
from .health import INFO, OK, WARNING, Finding
|
|
|
|
_ORDER = {"critical": 0, WARNING: 1, INFO: 2, OK: 3}
|
|
|
|
|
|
def _read(path: str) -> str | None:
|
|
try:
|
|
return Path(path).read_text()
|
|
except OSError:
|
|
return None
|
|
|
|
|
|
# --- PCIe ASPM (seed-case relevant) ---------------------------------------------------
|
|
|
|
def _active_aspm(policy_text: str) -> str | None:
|
|
"""The active ASPM policy is the bracketed token, e.g. '[default] performance ...'."""
|
|
m = re.search(r"\[(\w+)\]", policy_text)
|
|
return m.group(1) if m else None
|
|
|
|
|
|
def evaluate_aspm(policy_text: str | None) -> Finding | None:
|
|
if not policy_text:
|
|
return None
|
|
active = _active_aspm(policy_text)
|
|
if active is None:
|
|
return None
|
|
if active in ("powersave", "powersupersave"):
|
|
return Finding(
|
|
WARNING, "PCIe", f"PCIe ASPM is in power-saving mode ({active})",
|
|
"Aggressive PCIe Active-State Power Management can cause the GPU to drop off the "
|
|
"bus under load (Xid 79) or stutter — the seed-case failure mode.",
|
|
"Set the policy to performance below (live), or for a permanent change add "
|
|
"`pcie_aspm=off` in GRUB, then `sudo update-grub` and reboot.",
|
|
fix="pcie_aspm",
|
|
)
|
|
if active == "performance":
|
|
return Finding(OK, "PCIe", "PCIe ASPM set to performance", "ASPM power-saving is disabled.",
|
|
fix="pcie_aspm")
|
|
return Finding(
|
|
INFO, "PCIe", f"PCIe ASPM policy: {active}",
|
|
"ASPM is left to the kernel/BIOS default.",
|
|
"If you see GPU bus-drop events (Xid 79), set the policy to performance below.",
|
|
fix="pcie_aspm",
|
|
)
|
|
|
|
|
|
def check_pcie_aspm() -> list[Finding]:
|
|
f = evaluate_aspm(_read("/sys/module/pcie_aspm/parameters/policy"))
|
|
return [f] if f else []
|
|
|
|
|
|
# --- NVIDIA persistence mode (seed-case relevant) -------------------------------------
|
|
|
|
def check_gpu_persistence() -> list[Finding]:
|
|
if shutil.which("nvidia-smi") is None:
|
|
return []
|
|
try:
|
|
proc = subprocess.run(
|
|
["nvidia-smi", "--query-gpu=persistence_mode", "--format=csv,noheader"],
|
|
capture_output=True, text=True, timeout=10,
|
|
)
|
|
except (subprocess.SubprocessError, OSError):
|
|
return []
|
|
state = proc.stdout.strip().splitlines()[0].strip() if proc.stdout.strip() else ""
|
|
if state.lower().startswith("disabled"):
|
|
return [Finding(
|
|
INFO, "GPU", "NVIDIA persistence mode is off",
|
|
"The driver unloads when no client is attached, adding latency on first GPU "
|
|
"access and churning state between game launches.",
|
|
"Enable it below (per-boot), or enable the `nvidia-persistenced` service to "
|
|
"make it permanent.",
|
|
fix="nvidia_persistence",
|
|
)]
|
|
if state.lower().startswith("enabled"):
|
|
return [Finding(OK, "GPU", "NVIDIA persistence mode on", "The driver stays resident.",
|
|
fix="nvidia_persistence")]
|
|
return []
|
|
|
|
|
|
# --- CPU governor ---------------------------------------------------------------------
|
|
|
|
def evaluate_governor(governors: set[str]) -> Finding | None:
|
|
if not governors:
|
|
return None
|
|
shown = ", ".join(sorted(governors))
|
|
if governors == {"performance"}:
|
|
return Finding(OK, "CPU", "CPU governor: performance", "CPUs run at full clocks under load.",
|
|
fix="cpu_governor")
|
|
if "powersave" in governors:
|
|
return Finding(
|
|
WARNING, "CPU", f"CPU governor set to power-saving ({shown})",
|
|
"A powersave governor caps CPU frequency and can bottleneck frame times.",
|
|
"Set it to performance below (or install GameMode to switch it per-game).",
|
|
fix="cpu_governor",
|
|
)
|
|
return Finding(
|
|
INFO, "CPU", f"CPU governor: {shown}",
|
|
"A dynamic governor scales with load; usually fine.",
|
|
"For the most consistent frame pacing, set performance below (or use GameMode).",
|
|
fix="cpu_governor",
|
|
)
|
|
|
|
|
|
def check_cpu_governor() -> list[Finding]:
|
|
govs: set[str] = set()
|
|
for p in Path("/sys/devices/system/cpu").glob("cpu*/cpufreq/scaling_governor"):
|
|
text = _read(str(p))
|
|
if text and text.strip():
|
|
govs.add(text.strip())
|
|
f = evaluate_governor(govs)
|
|
return [f] if f else []
|
|
|
|
|
|
# --- GameMode / MangoHud --------------------------------------------------------------
|
|
|
|
def check_gamemode() -> list[Finding]:
|
|
if shutil.which("gamemoderun") or shutil.which("gamemoded"):
|
|
return [Finding(
|
|
OK, "Tools", "Feral GameMode installed",
|
|
"GameMode can apply the performance governor and other tweaks while a game runs.",
|
|
)]
|
|
return [Finding(
|
|
INFO, "Tools", "GameMode not installed",
|
|
"GameMode auto-applies performance tweaks (governor, scheduling) for the duration of a game.",
|
|
"Install it: `sudo apt install gamemode`, then launch games with `gamemoderun %command%` "
|
|
"(or use a global Steam launch option).",
|
|
action="gamemode",
|
|
)]
|
|
|
|
|
|
def check_mangohud() -> list[Finding]:
|
|
if shutil.which("mangohud"):
|
|
return [Finding(OK, "Tools", "MangoHud available", "In-game FPS/temps/frametime overlay is installed.")]
|
|
return [Finding(
|
|
INFO, "Tools", "MangoHud not installed",
|
|
"MangoHud overlays live FPS, frame times, and temps in-game — handy for spotting stutter.",
|
|
"Install it: `sudo apt install mangohud`, then launch with `mangohud %command%`.",
|
|
action="mangohud",
|
|
)]
|
|
|
|
|
|
# --- vm.swappiness --------------------------------------------------------------------
|
|
|
|
def evaluate_swappiness(value: int) -> Finding:
|
|
if value > 10:
|
|
return Finding(
|
|
INFO, "Memory", f"vm.swappiness is high ({value})",
|
|
"A high swappiness lets the kernel swap out memory eagerly, which can cause "
|
|
"hitching during gaming on systems with ample RAM.",
|
|
"Lower it below (e.g. 10); applies immediately.",
|
|
fix="swappiness",
|
|
)
|
|
return Finding(OK, "Memory", f"vm.swappiness is {value}", "Swapping is conservative.",
|
|
fix="swappiness")
|
|
|
|
|
|
def check_swappiness() -> list[Finding]:
|
|
text = _read("/proc/sys/vm/swappiness")
|
|
if text is None or not text.strip().isdigit():
|
|
return []
|
|
return [evaluate_swappiness(int(text.strip()))]
|
|
|
|
|
|
# --- shader cache ---------------------------------------------------------------------
|
|
|
|
def evaluate_shader_cache(env: dict) -> Finding:
|
|
disabled = (
|
|
env.get("__GL_SHADER_DISK_CACHE") == "0"
|
|
or env.get("MESA_SHADER_CACHE_DISABLE", "").lower() in ("1", "true")
|
|
or env.get("MESA_GLSL_CACHE_DISABLE", "").lower() in ("1", "true")
|
|
)
|
|
if disabled:
|
|
return Finding(
|
|
WARNING, "GPU", "Shader disk cache is disabled",
|
|
"With the shader cache off, shaders recompile every run — a common cause of "
|
|
"in-game stutter, especially on first encounters.",
|
|
"Unset the disabling variable (e.g. remove `__GL_SHADER_DISK_CACHE=0` / "
|
|
"`MESA_SHADER_CACHE_DISABLE`) from your environment / launch options.",
|
|
)
|
|
return Finding(OK, "GPU", "Shader disk cache enabled", "Compiled shaders are cached between runs (default).")
|
|
|
|
|
|
def check_shader_cache() -> list[Finding]:
|
|
return [evaluate_shader_cache(os.environ)]
|
|
|
|
|
|
# --- transparent hugepages / CPU mitigations (only when notable) ----------------------
|
|
|
|
def check_thp() -> list[Finding]:
|
|
text = _read("/sys/kernel/mm/transparent_hugepage/enabled")
|
|
if not text:
|
|
return []
|
|
active = _active_aspm(text) # same '[token]' format
|
|
if active == "never":
|
|
return [Finding(
|
|
INFO, "Memory", "Transparent HugePages disabled (never)",
|
|
"Some workloads benefit from THP; 'madvise' lets apps opt in without the downsides of 'always'.",
|
|
"Optional: set 'madvise' below; applies immediately.",
|
|
fix="thp",
|
|
)]
|
|
return []
|
|
|
|
|
|
def check_mitigations() -> list[Finding]:
|
|
cmdline = _read("/proc/cmdline") or ""
|
|
if "mitigations=off" in cmdline:
|
|
return [Finding(
|
|
INFO, "CPU", "CPU security mitigations are disabled",
|
|
"`mitigations=off` recovers some CPU performance at the cost of CPU-vulnerability "
|
|
"protections — a deliberate trade-off, noted here for awareness.",
|
|
"Remove `mitigations=off` from the kernel cmdline to restore protections.",
|
|
)]
|
|
return []
|
|
|
|
|
|
# --- Proton versions (informational) --------------------------------------------------
|
|
|
|
def check_proton() -> list[Finding]:
|
|
from . import steam
|
|
|
|
try:
|
|
versions = steam.proton_versions()
|
|
except Exception:
|
|
versions = []
|
|
if not versions:
|
|
return []
|
|
return [Finding(
|
|
INFO, "Tools", f"Proton: {len(versions)} version(s) installed",
|
|
", ".join(versions),
|
|
"Steam picks the Proton version per game (Properties → Compatibility); "
|
|
"Proton Experimental often has the latest fixes.",
|
|
)]
|
|
|
|
|
|
# --- aggregate ------------------------------------------------------------------------
|
|
|
|
def run_gameenv_checks() -> list[Finding]:
|
|
"""Run all environment checks, sorted by severity (worst first)."""
|
|
findings: list[Finding] = []
|
|
findings += check_pcie_aspm()
|
|
findings += check_gpu_persistence()
|
|
findings += check_cpu_governor()
|
|
findings += check_gamemode()
|
|
findings += check_mangohud()
|
|
findings += check_swappiness()
|
|
findings += check_shader_cache()
|
|
findings += check_thp()
|
|
findings += check_mitigations()
|
|
findings += check_proton()
|
|
findings.sort(key=lambda f: _ORDER.get(f.severity, 9))
|
|
return findings
|