Compare commits
6 Commits
v0.37.1
...
9fe9a6576f
| Author | SHA1 | Date | |
|---|---|---|---|
| 9fe9a6576f | |||
|
07bc722209
|
|||
| d405bf7caf | |||
|
9bb0f9a684
|
|||
| 4bbc0fa97e | |||
|
a0f8055328
|
@@ -5,6 +5,19 @@ All notable changes to RigDoctor are recorded here. Format follows
|
||||
(`MAJOR.MINOR.PATCH`, pre-1.0). `__version__` and `pyproject.toml` must match the git
|
||||
release tag (so the auto-updater, D18, can compare versions).
|
||||
|
||||
## [0.38.0] - 2026-05-22
|
||||
### Added
|
||||
- **PCIe link in the Inventory.** Each NVMe drive now shows its negotiated PCIe link next to the
|
||||
model — e.g. `Samsung SSD 980 PRO 1TB (931.5G) · PCIe Gen4 x4` — read from sysfs
|
||||
(`current/max_link_speed` + width). If a drive negotiates below its capability (a slower M.2
|
||||
slot, lane-sharing, or a downtrain) it's flagged: `PCIe Gen3 x4 (capable of Gen4 x4)`. So you
|
||||
can confirm a Gen4 SSD is actually in a Gen4 slot. (SATA disks show no PCIe link.)
|
||||
- **System Health flags downtrained NVMe links.** A new check warns when an NVMe drive negotiates
|
||||
fewer PCIe lanes than it supports (almost always motherboard **lane-sharing** — a GPU/second
|
||||
card or another M.2 stealing lanes) and notes speed-only reductions as info (a slower slot or
|
||||
idle ASPM). The GPU is deliberately excluded — NVIDIA drops its PCIe gen/width at idle, so a
|
||||
snapshot would false-alarm.
|
||||
|
||||
## [0.37.1] - 2026-05-22
|
||||
### Fixed
|
||||
- **`rigdoctor update` now uses the right method for how RigDoctor was installed.** It detects
|
||||
|
||||
@@ -29,6 +29,16 @@ freeze are usually lost. RigDoctor pulls it together and keeps the evidence.
|
||||
or share a live **terminal session** for remote help.
|
||||
- **Self-updating** — `apt upgrade`, or the in-app updater.
|
||||
|
||||
## Screenshots
|
||||
|
||||
| Dashboard | Inventory |
|
||||
|---|---|
|
||||
|  |  |
|
||||
|
||||
**Share** — a read-only or interactive terminal session over the relay, for remote help:
|
||||
|
||||

|
||||
|
||||
## Install
|
||||
|
||||
### Debian / Ubuntu — `.deb`
|
||||
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 171 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 141 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 78 KiB |
+1
-1
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
||||
|
||||
[project]
|
||||
name = "rigdoctor"
|
||||
version = "0.37.1"
|
||||
version = "0.38.0"
|
||||
description = "Modular hardware monitoring & crash diagnostics for Linux gamers."
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.11"
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
"""RigDoctor — modular hardware monitoring & crash diagnostics for Linux gamers."""
|
||||
|
||||
__version__ = "0.37.1"
|
||||
__version__ = "0.38.0"
|
||||
|
||||
@@ -251,6 +251,38 @@ def check_live_temps() -> list[Finding]:
|
||||
)]
|
||||
|
||||
|
||||
def check_pcie_links() -> list[Finding]:
|
||||
"""Flag NVMe drives linked below their PCIe capability — a slower slot or, most often,
|
||||
motherboard lane-sharing where a GPU/second card or another M.2 steals lanes from the slot.
|
||||
|
||||
Width reductions are reliable (reported as warnings); speed-only reductions are info (they can
|
||||
also be normal link power management at idle). The GPU is intentionally not checked here:
|
||||
NVIDIA drops its PCIe gen *and* width at idle, so a point-in-time snapshot is misleading.
|
||||
"""
|
||||
from . import inventory
|
||||
|
||||
findings: list[Finding] = []
|
||||
for name, dev in inventory.nvme_controllers():
|
||||
cur_g, cur_w, max_g, max_w = inventory.read_link(dev)
|
||||
if not cur_g or not max_g:
|
||||
continue
|
||||
if max_w and cur_w and cur_w != max_w: # fewer lanes → almost always lane-sharing
|
||||
findings.append(Finding(
|
||||
WARNING, "PCIe", f"{name} linked at x{cur_w} (supports x{max_w})",
|
||||
f"{name} negotiated PCIe Gen{cur_g} x{cur_w}, but the drive supports "
|
||||
f"Gen{max_g} x{max_w}. Fewer lanes is usually motherboard lane-sharing — a GPU or a "
|
||||
"second card in a PCIe slot, or another populated M.2, can steal lanes from this slot.",
|
||||
"Check your board manual's lane-sharing table; move the drive to a full-x4 "
|
||||
"(often CPU-attached) M.2 slot."))
|
||||
elif cur_g < max_g: # full width but a lower generation → slower slot or idle ASPM
|
||||
findings.append(Finding(
|
||||
INFO, "PCIe", f"{name} linked at Gen{cur_g} (supports Gen{max_g})",
|
||||
f"{name} negotiated PCIe Gen{cur_g} but supports Gen{max_g}. This can be a slower "
|
||||
"(chipset or older) M.2 slot, or normal link power management (ASPM) at idle.",
|
||||
"If you expect full speed, check the slot and the BIOS PCIe/ASPM settings."))
|
||||
return findings
|
||||
|
||||
|
||||
def run_health_checks(include_journal: bool = True) -> list[Finding]:
|
||||
"""Run all checks and return findings sorted by severity (worst first).
|
||||
|
||||
@@ -273,5 +305,6 @@ def run_health_checks(include_journal: bool = True) -> list[Finding]:
|
||||
else:
|
||||
findings += check_smart()
|
||||
findings += check_live_temps()
|
||||
findings += check_pcie_links()
|
||||
findings.sort(key=lambda f: _ORDER.get(f.severity, 9))
|
||||
return findings
|
||||
|
||||
@@ -9,6 +9,7 @@ from __future__ import annotations
|
||||
import json
|
||||
import os
|
||||
import platform
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
from dataclasses import dataclass
|
||||
@@ -123,6 +124,64 @@ def _gpu() -> Section:
|
||||
return Section("GPU", [("Device", g) for g in gpus] or [("Device", "unknown")])
|
||||
|
||||
|
||||
# PCIe link speed (GT/s) → generation.
|
||||
_PCIE_GEN = {"2.5": 1, "5": 2, "5.0": 2, "8": 3, "8.0": 3, "16": 4, "16.0": 4, "32": 5, "32.0": 5}
|
||||
|
||||
|
||||
def _gen(speed: str) -> int | None:
|
||||
"""Map a sysfs link speed like '16.0 GT/s PCIe' to its PCIe generation (4)."""
|
||||
tok = speed.strip().split()[0] if speed.strip() else ""
|
||||
return _PCIE_GEN.get(tok)
|
||||
|
||||
|
||||
def read_link(dev: Path) -> tuple[int | None, str, int | None, str]:
|
||||
"""Negotiated/max PCIe link for a PCI device dir: (cur_gen, cur_width, max_gen, max_width).
|
||||
|
||||
Widths are the raw sysfs strings (e.g. '4'); gens are ints (4) or None when unreadable.
|
||||
"""
|
||||
def rd(name: str) -> str:
|
||||
try:
|
||||
return (dev / name).read_text().strip()
|
||||
except OSError:
|
||||
return ""
|
||||
|
||||
return (_gen(rd("current_link_speed")), rd("current_link_width"),
|
||||
_gen(rd("max_link_speed")), rd("max_link_width"))
|
||||
|
||||
|
||||
def _link_desc(dev: Path) -> str:
|
||||
"""Describe a PCI device's negotiated PCIe link, noting if it's below its max.
|
||||
|
||||
e.g. 'PCIe Gen4 x4', or 'PCIe Gen3 x4 (capable of Gen4 x4)' when downtrained / in a
|
||||
slower slot.
|
||||
"""
|
||||
cur_g, cur_w, max_g, max_w = read_link(dev)
|
||||
if not cur_g or not cur_w:
|
||||
return ""
|
||||
desc = f"PCIe Gen{cur_g} x{cur_w}"
|
||||
if max_g and (cur_g < max_g or (max_w and cur_w != max_w)):
|
||||
desc += f" (capable of Gen{max_g} x{max_w})"
|
||||
return desc
|
||||
|
||||
|
||||
def nvme_controllers() -> list[tuple[str, Path]]:
|
||||
"""Each NVMe controller as (name, pci-device-dir), e.g. ('nvme0', /sys/.../device)."""
|
||||
base = Path("/sys/class/nvme")
|
||||
try:
|
||||
entries = [p for p in base.iterdir() if re.fullmatch(r"nvme\d+", p.name)]
|
||||
except OSError:
|
||||
return []
|
||||
return sorted((p.name, p / "device") for p in entries)
|
||||
|
||||
|
||||
def _nvme_link(block_name: str) -> str:
|
||||
"""PCIe link for an NVMe block device (nvme0n1 → controller nvme0); '' for non-NVMe."""
|
||||
m = re.match(r"(nvme\d+)", block_name)
|
||||
if not m:
|
||||
return ""
|
||||
return _link_desc(Path("/sys/class/nvme") / m.group(1) / "device")
|
||||
|
||||
|
||||
def _storage() -> Section:
|
||||
items: list[tuple[str, str]] = []
|
||||
# TYPE first so MODEL (which can contain spaces) is the trailing field.
|
||||
@@ -133,7 +192,11 @@ def _storage() -> Section:
|
||||
continue
|
||||
name, size = parts[1], parts[2]
|
||||
model = parts[3] if len(parts) > 3 else ""
|
||||
items.append((name, f"{model} ({size})".strip()))
|
||||
desc = f"{model} ({size})".strip()
|
||||
link = _nvme_link(name) # NVMe PCIe gen/width (e.g. Gen4 x4), flags downtrains
|
||||
if link:
|
||||
desc += f" · {link}"
|
||||
items.append((name, desc))
|
||||
return Section("Storage", items or [("Disks", "unknown")])
|
||||
|
||||
|
||||
|
||||
+40
-1
@@ -1,8 +1,18 @@
|
||||
"""Tests for the M4 health report's log scanner (synthetic input)."""
|
||||
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
from rigdoctor.core.health import CRITICAL, WARNING, run_health_checks, scan_journal_text
|
||||
from rigdoctor.core import health
|
||||
from rigdoctor.core.health import (
|
||||
CRITICAL,
|
||||
INFO,
|
||||
WARNING,
|
||||
check_pcie_links,
|
||||
run_health_checks,
|
||||
scan_journal_text,
|
||||
)
|
||||
|
||||
|
||||
class HealthScanTests(unittest.TestCase):
|
||||
@@ -42,5 +52,34 @@ class HealthScanTests(unittest.TestCase):
|
||||
self.assertEqual(ranks, sorted(ranks))
|
||||
|
||||
|
||||
class PcieLinkCheckTests(unittest.TestCase):
|
||||
def _with_link(self, cur_g, cur_w, max_g, max_w):
|
||||
# one fake NVMe controller returning the given link tuple
|
||||
return (mock.patch("rigdoctor.core.inventory.nvme_controllers",
|
||||
return_value=[("nvme0", Path("/x"))]),
|
||||
mock.patch("rigdoctor.core.inventory.read_link",
|
||||
return_value=(cur_g, cur_w, max_g, max_w)))
|
||||
|
||||
def test_reduced_width_is_a_warning_about_lane_sharing(self):
|
||||
ctrls, link = self._with_link(4, "2", 4, "4") # Gen4 x2 but supports x4
|
||||
with ctrls, link:
|
||||
findings = check_pcie_links()
|
||||
self.assertEqual(len(findings), 1)
|
||||
self.assertEqual(findings[0].severity, WARNING)
|
||||
self.assertIn("lane-sharing", findings[0].detail)
|
||||
|
||||
def test_reduced_speed_only_is_info(self):
|
||||
ctrls, link = self._with_link(3, "4", 4, "4") # Gen3 x4 but supports Gen4
|
||||
with ctrls, link:
|
||||
findings = check_pcie_links()
|
||||
self.assertEqual(len(findings), 1)
|
||||
self.assertEqual(findings[0].severity, INFO)
|
||||
|
||||
def test_full_speed_no_finding(self):
|
||||
ctrls, link = self._with_link(4, "4", 4, "4")
|
||||
with ctrls, link:
|
||||
self.assertEqual(check_pcie_links(), [])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
"""Tests for the M5 system inventory (render + dict round-trip; collect on real system)."""
|
||||
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
from rigdoctor.core import inventory
|
||||
from rigdoctor.core.inventory import Section
|
||||
@@ -26,5 +28,31 @@ class InventoryTests(unittest.TestCase):
|
||||
self.assertIn("- **Model:** Test CPU", md)
|
||||
|
||||
|
||||
class PcieLinkTests(unittest.TestCase):
|
||||
def test_gen_mapping(self):
|
||||
self.assertEqual(inventory._gen("16.0 GT/s PCIe"), 4)
|
||||
self.assertEqual(inventory._gen("8.0 GT/s PCIe"), 3)
|
||||
self.assertIsNone(inventory._gen(""))
|
||||
|
||||
def _fake_dev(self, cur_s, cur_w, max_s, max_w) -> Path:
|
||||
d = Path(tempfile.mkdtemp())
|
||||
(d / "current_link_speed").write_text(cur_s)
|
||||
(d / "current_link_width").write_text(cur_w)
|
||||
(d / "max_link_speed").write_text(max_s)
|
||||
(d / "max_link_width").write_text(max_w)
|
||||
return d
|
||||
|
||||
def test_link_at_full_speed(self):
|
||||
dev = self._fake_dev("16.0 GT/s PCIe", "4", "16.0 GT/s PCIe", "4")
|
||||
self.assertEqual(inventory._link_desc(dev), "PCIe Gen4 x4")
|
||||
|
||||
def test_link_downtrained_flags_capability(self):
|
||||
dev = self._fake_dev("8.0 GT/s PCIe", "4", "16.0 GT/s PCIe", "4")
|
||||
self.assertEqual(inventory._link_desc(dev), "PCIe Gen3 x4 (capable of Gen4 x4)")
|
||||
|
||||
def test_non_nvme_has_no_link(self):
|
||||
self.assertEqual(inventory._nvme_link("sda"), "")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
||||
Reference in New Issue
Block a user