Compare commits
19 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| b47006bc22 | |||
| 00394c287c | |||
| 2f6cab72c4 | |||
| 67d4c1cb99 | |||
| e33cc0ef3a | |||
| e3b20089f0 | |||
| 54c0971ac3 | |||
| 9ae2e22b44 | |||
| 89ebb6c61e | |||
| c8f7d66349 | |||
| 6215181d23 | |||
| 09cbc57b8c | |||
| f3021c4ddb | |||
| ca4bc4c64f | |||
| 46ba53631a | |||
| 4e3f6aa94e | |||
| 2e6a981120 | |||
| daf702671e | |||
| ce5f830393 |
@@ -0,0 +1,92 @@
|
||||
name: release
|
||||
run-name: Release on push to main
|
||||
|
||||
# Builds a wheel + sdist and publishes a Gitea release v<version> on every push to
|
||||
# main. The version comes from pyproject.toml (kept in lockstep with __version__, D19);
|
||||
# if a release for that tag already exists, the job is a no-op — so bump the version
|
||||
# (and CHANGELOG) to cut a new release.
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
|
||||
jobs:
|
||||
release:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.12"
|
||||
|
||||
- name: Build wheel + sdist
|
||||
run: |
|
||||
python -m pip install --upgrade build
|
||||
python -m build
|
||||
|
||||
- name: Build self-extracting installer (.run)
|
||||
run: python packaging/make_run.py
|
||||
|
||||
- name: Read version
|
||||
id: ver
|
||||
run: |
|
||||
V=$(python -c "import tomllib; print(tomllib.load(open('pyproject.toml','rb'))['project']['version'])")
|
||||
echo "version=$V" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Build release notes
|
||||
run: |
|
||||
python - <<'PY'
|
||||
import json
|
||||
version = "${{ steps.ver.outputs.version }}"
|
||||
tag = f"v{version}"
|
||||
out, capturing = [], False
|
||||
try:
|
||||
for line in open("CHANGELOG.md", encoding="utf-8").read().splitlines():
|
||||
if line.startswith("## "):
|
||||
if capturing:
|
||||
break
|
||||
capturing = line.startswith(f"## [{version}]")
|
||||
continue
|
||||
if capturing:
|
||||
out.append(line)
|
||||
except OSError:
|
||||
pass
|
||||
body = "\n".join(out).strip() or f"Release {tag}."
|
||||
payload = {"tag_name": tag, "target_commitish": "${{ github.sha }}", "name": tag, "body": body}
|
||||
open("/tmp/release.json", "w", encoding="utf-8").write(json.dumps(payload))
|
||||
print(f"release notes: {len(body)} chars")
|
||||
PY
|
||||
|
||||
- name: Publish Gitea release
|
||||
env:
|
||||
TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
API="${{ github.server_url }}/api/v1/repos/${{ github.repository }}"
|
||||
TAG="v${{ steps.ver.outputs.version }}"
|
||||
|
||||
code=$(curl -sS -o /tmp/existing.json -w '%{http_code}' \
|
||||
-H "Authorization: token ${TOKEN}" "${API}/releases/tags/${TAG}")
|
||||
if [ "$code" = "200" ]; then
|
||||
echo "Release ${TAG} already exists — nothing to do."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "Creating release ${TAG}…"
|
||||
rid=$(curl -sS -X POST \
|
||||
-H "Authorization: token ${TOKEN}" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d @/tmp/release.json \
|
||||
"${API}/releases" | python -c "import sys, json; print(json.load(sys.stdin)['id'])")
|
||||
|
||||
for f in dist/*; do
|
||||
echo "Uploading $(basename "$f")…"
|
||||
curl -sS -X POST \
|
||||
-H "Authorization: token ${TOKEN}" \
|
||||
-F "attachment=@${f}" \
|
||||
"${API}/releases/${rid}/assets?name=$(basename "$f")" >/dev/null
|
||||
done
|
||||
echo "Published ${TAG}."
|
||||
+209
@@ -0,0 +1,209 @@
|
||||
# Changelog
|
||||
|
||||
All notable changes to RigDoctor are recorded here. Format follows
|
||||
[Keep a Changelog](https://keepachangelog.com/); versioning is SemVer-style
|
||||
(`MAJOR.MINOR.PATCH`, pre-1.0). `__version__` and `pyproject.toml` must match the git
|
||||
release tag (so the auto-updater, D18, can compare versions).
|
||||
|
||||
## [0.7.2] - 2026-05-21
|
||||
### Changed
|
||||
- Removed the GUI **Inventory** tab — use the CLI `rigdoctor inventory` instead. (Inventory is
|
||||
still collected for the relay guest view, so a remote helper still sees the host's hardware.)
|
||||
### Fixed
|
||||
- Shared terminal caret now sits at the real cursor position (row **and** column) instead of
|
||||
the start of the line.
|
||||
|
||||
## [0.7.1] - 2026-05-21
|
||||
### Fixed
|
||||
- Shared terminal: a guest who joined **after** the host enabled the terminal stayed read-only.
|
||||
The host now re-sends the terminal state when a guest joins, so the terminal is available.
|
||||
- Inventory page no longer jumps back to the top when it refreshes (e.g. when elevated data
|
||||
arrives) — scroll position is preserved and unchanged data isn't re-rendered.
|
||||
- Shared terminal now follows the cursor to the bottom as output arrives (e.g. `ls -la`),
|
||||
instead of staying scrolled up.
|
||||
|
||||
## [0.7.0] - 2026-05-21
|
||||
### Added
|
||||
- **Shared terminal (M12, Tier 3)**: when the host enables it, the session shares a real **PTY**
|
||||
shell — the guest gets an interactive terminal (vim, top, tab-completion, Ctrl-C) running on
|
||||
the host as the host's user. The host **reads along** live and can type too, e.g. a `sudo`
|
||||
password — which stays local and is never sent to the guest. Off by default, host-consented.
|
||||
The guest also pulls the host's inventory on join.
|
||||
### Fixed
|
||||
- **Input contrast**: all form controls (text fields, spin boxes, combo boxes, terminals) now
|
||||
use the dark theme with readable text (Fusion defaulted them to light-on-light).
|
||||
|
||||
## [0.6.0] - 2026-05-21
|
||||
### Added
|
||||
- **Session sharing over the relay (M12)**: a **Share** tab — *Start shared session* (host)
|
||||
hands you a short code and streams a read-only live view; *Enter share code* (guest) joins
|
||||
someone else's session and views their sensors/health/inventory. Both connect outbound over
|
||||
WebSocket to the relay (`relay_url`, default `wss://rigdoctor.jesseyvanofferen.com`), gated
|
||||
by your Gitea access token — no port forwarding. Read-only.
|
||||
|
||||
## [0.5.0] - 2026-05-21
|
||||
### Added
|
||||
- **Session sharing (M12, Tier 2)**: `rigdoctor share serve` starts a **read-only** live view
|
||||
(sensors auto-refresh + health report + inventory) over a local HTTP server, gated by a
|
||||
random share token. Bind to localhost for local testing, or to all interfaces behind a
|
||||
user-chosen tunnel (Tailscale/cloudflared/SSH) for remote help. No actions, no terminal.
|
||||
(Tier 1 export and Tier 3 gated terminal still to come — D16.)
|
||||
|
||||
## [0.4.1] - 2026-05-21
|
||||
### Fixed
|
||||
- Checkbox contrast: a checked checkbox is now a clear accent-filled box with a checkmark
|
||||
(was hard to tell checked from unchecked on the dark theme).
|
||||
|
||||
## [0.4.0] - 2026-05-21
|
||||
### Added
|
||||
- **Alerts (M8)**: desktop notifications (via `notify-send`) for **overheat** (GPU/CPU past a
|
||||
threshold), **GPU-lost** (nvidia-smi timeout), and a **new version available** (fired once
|
||||
per version). Edge-triggered with a cooldown so it doesn't spam. Degrades gracefully if
|
||||
`notify-send` isn't installed.
|
||||
- **Notifications page**: configure alerts (enable/disable, GPU/CPU temperature thresholds)
|
||||
with a "Send test" button; changes apply live and persist to `config.toml`.
|
||||
- **App icon**: ships a RigDoctor icon and shows it in the dock/launcher. The GUI
|
||||
**self-registers** the icon + `.desktop` on launch (and sets the Wayland app-id), so a
|
||||
self-update + relaunch picks it up — no need to re-run the installer.
|
||||
|
||||
## [0.3.2] - 2026-05-21
|
||||
### Changed
|
||||
- Replaced the per-page "Run with admin" buttons with a **single password prompt at launch**
|
||||
(`pkexec`): the GUI collects root-only data (SMART + dmidecode board/BIOS/RAM) once and
|
||||
caches it for the session, so Health and Inventory always show the full picture. Falls back
|
||||
to non-root if cancelled/unavailable; disable via `elevate_on_launch = false`.
|
||||
|
||||
## [0.3.1] - 2026-05-21
|
||||
### Fixed
|
||||
- Changelog/release notes now **render Markdown** instead of showing raw `#`/`**` markup —
|
||||
the in-app changelog uses `QTextEdit.setMarkdown()` and the update prompt renders notes as
|
||||
rich text (closes #1).
|
||||
|
||||
## [0.3.0] - 2026-05-21
|
||||
### Added
|
||||
- **System inventory (M5)**: CPU, GPU (model/driver/VBIOS/VRAM/PCIe), motherboard/BIOS, RAM
|
||||
(total + modules), storage, kernel, and display server. CLI `rigdoctor inventory`
|
||||
(`--json` / `--markdown` / `--output`) and a GUI **Inventory** tab with Copy-as-Markdown,
|
||||
Save, and "Run with admin" (for `dmidecode` board/BIOS/RAM details). Fills the last GUI tab.
|
||||
|
||||
## [0.2.0] - 2026-05-21
|
||||
### Added
|
||||
- **"Check for updates" button** in the sidebar — force an immediate version check instead of
|
||||
waiting for the 30-minute poll.
|
||||
|
||||
## [0.1.1] - 2026-05-21
|
||||
### Fixed
|
||||
- Dialogs (the update prompt and changelog) were light-on-light and unreadable — they now use
|
||||
the dark theme with readable text.
|
||||
|
||||
## [0.1.0] - 2026-05-21
|
||||
_First milestone release — a complete, installable, self-updating RigDoctor: live monitoring,
|
||||
crash capture + health report, desktop GUI, user-local install/uninstall, and updates._
|
||||
### Added
|
||||
- **In-app uninstaller**: "Uninstall RigDoctor" button on the Setup page (and
|
||||
`rigdoctor uninstall [--purge]`) — removes the venv, launchers, and desktop entry, with an
|
||||
option to also wipe settings/token/logs. Runs detached so it can delete its own venv.
|
||||
- **In-app changelog**: a "Changelog" link in the sidebar opens the release history (tags +
|
||||
notes) fetched from the update server.
|
||||
|
||||
## [0.0.10] - 2026-05-21
|
||||
### Added
|
||||
- **"Restart now" button** after a successful in-app update — relaunches RigDoctor for you
|
||||
instead of asking you to restart manually.
|
||||
- **Real release notes**: CI now sets each Gitea release's body from the matching CHANGELOG
|
||||
section (instead of "Automated release for…"), and the updater shows **"What's new"** — a
|
||||
notes dialog before applying (GUI) and in `rigdoctor update` (CLI).
|
||||
### Changed
|
||||
- Setup page / `rigdoctor install`: dropped internal module references (M4, M5, …) from the
|
||||
component descriptions — end users don't need them.
|
||||
- Adopting **Conventional Commits** + **git-cliff** (`cliff.toml`, `packaging/changelog.sh`)
|
||||
to generate CHANGELOG entries from commit history going forward (D20).
|
||||
### Fixed
|
||||
- The self-extracting **`.run` installer** is now built **without makeself** (a pure-Python
|
||||
self-extractor, `packaging/make_run.py`), so it reliably attaches to every release — it was
|
||||
silently skipped before because the CI runner couldn't install makeself.
|
||||
|
||||
## [0.0.8] - 2026-05-21
|
||||
### Added
|
||||
- **Periodic update checks**: the GUI now re-checks for new releases while running (every
|
||||
`update_check_minutes`, default 30; 0 disables), so a newly published version is detected
|
||||
without restarting. After applying an update, re-checks stop until restart.
|
||||
- **"Run with admin" on the Health page**: runs all checks (including root-only SMART) via
|
||||
`pkexec rigdoctor report --json`, so the full report — not just "SMART needs root" — is
|
||||
available from the UI.
|
||||
|
||||
## [0.0.7] - 2026-05-21
|
||||
### Added
|
||||
- **User-local installer** `install.sh` (no root): creates a private venv, links
|
||||
`rigdoctor`/`rigdoctor-gui` into `~/.local/bin`, and adds a desktop entry. Re-run to
|
||||
upgrade; `--uninstall` to remove.
|
||||
- **Self-extracting `.run` installer** via `packaging/make-run.sh` (makeself) — one
|
||||
download-and-run executable bundling the wheel + `install.sh`; built and attached to each
|
||||
release by CI.
|
||||
- **Self-update apply (M13)**: `rigdoctor update` now installs the newer version via
|
||||
authenticated pip (`rigdoctor[gui] @ git+https://oauth2:<token>@…@<tag>`); the GUI sidebar
|
||||
"Update to v…" button applies it and prompts to restart. Token is scrubbed from output.
|
||||
|
||||
## [0.0.6] - 2026-05-21
|
||||
### Added
|
||||
- **Token-gated updates (M13)**: store a Gitea Personal Access Token, **encrypted in the OS
|
||||
keyring** (Secret Service / GNOME Keyring via `secret-tool`) with a 0600-file fallback.
|
||||
`rigdoctor login` / `logout` / `update [--check]`; GUI **Setup → Update access** panel
|
||||
(token field, "Get a token", backend status) and sidebar states (connect / up-to-date /
|
||||
"Update to v…" / access denied). Updates are gated to accounts on the Gitea server (D18).
|
||||
- `libsecret-tools` added to the installer catalog (enables encrypted token storage).
|
||||
### Changed
|
||||
- D18 update mechanism revised from anonymous public HTTP to **authenticated HTTP (token)** —
|
||||
the Gitea instance requires sign-in for all anonymous access.
|
||||
|
||||
## [0.0.5] - 2026-05-21
|
||||
### Added
|
||||
- **M9 installer (first cut)**: detects distro / package manager / GPU; a catalog of optional
|
||||
components (smartmontools, lm-sensors, dmidecode, pciutils, libnotify) with what each
|
||||
enables; `rigdoctor install [--check] [-y]` installs missing apt packages via pkexec/sudo
|
||||
with consent; GUI **Setup** tab with one-click install. Fixes the "smartmontools missing"
|
||||
gap in the health report.
|
||||
- **Update check (M13, check half)**: on GUI launch the sidebar checks the Gitea releases API
|
||||
and shows "up-to-date", an "Update to v…" button if a newer release exists, or "update check
|
||||
unavailable" if the API can't be reached anonymously.
|
||||
|
||||
## [0.0.4] - 2026-05-21
|
||||
### Added
|
||||
- **M4 health report**: scans kernel logs (NVIDIA Xid incl. 79 "fell off the bus", kernel
|
||||
panic, OOM, MCE, PCIe AER, thermal, amdgpu reset), SMART health, NVIDIA driver/library
|
||||
mismatch, journald persistence, and live temps → prioritized plain-language findings with
|
||||
suggested fixes (read-only, D9).
|
||||
- CLI `rigdoctor report` (text + `--json`).
|
||||
- GUI **Health** tab: runs checks in the background; findings shown as severity-colored cards.
|
||||
- Tests for the journal scanner.
|
||||
|
||||
## [0.0.3] - 2026-05-21
|
||||
### Added
|
||||
- Show the app version (`v<version>`) in the GUI sidebar.
|
||||
|
||||
## [0.0.2] - 2026-05-21
|
||||
### Added
|
||||
- **M3 crash-capture logger**: crash-safe JSONL (`fsync` per sample), size-based rotation,
|
||||
GPU-lost/recovered event markers, atomic status file; `rigdoctor record run|start|stop|
|
||||
status|report` (foreground `run` is the systemd-ready entrypoint).
|
||||
- **GUI Recording/Logs page** (M10): start/stop/interval controls, live status, and the
|
||||
post-crash report — driving the same recorder via shared `core.reccontrol`.
|
||||
- Shared render helpers (`format_raw`, `format_headline`, `render_summary`) used by CLI + GUI.
|
||||
- Tests for the crash log (writer, rotation, reader, summary, recorder).
|
||||
- **Gitea Actions release workflow** (`.gitea/workflows/release.yml`): on push to `main`,
|
||||
builds wheel + sdist and publishes a Gitea release `v<version>` with the artifacts.
|
||||
### Changed
|
||||
- **GUI-first** emphasis (D17): docs reframed; the CLI keeps full parity for headless/SSH.
|
||||
- CPU core temperatures ordered (package, then core 0, 4, 8, …) at the source — fixes the
|
||||
CLI ordering too.
|
||||
- Distribution revised (D8): **user-local self-updating install** is primary, `.deb` optional.
|
||||
### Planned (docs only)
|
||||
- M12 session sharing / remote assist (D16); M13 no-root auto-update from the public repo
|
||||
(D18); versioning/changelog convention (D19).
|
||||
|
||||
## [0.0.1] - 2026-05-21
|
||||
### Added
|
||||
- Initial release: planning docs and decisions (D1–D15); **M1 sensor core** (NVIDIA GPU via
|
||||
nvidia-smi, CPU via hwmon, memory + DDR5 SPD temps, NVMe); CLI (`snapshot`, `monitor`,
|
||||
`sources`); and the **M10 desktop GUI** — dark dashboard with circular gauges and
|
||||
collapsible, temperature-colored cards.
|
||||
@@ -2,9 +2,10 @@
|
||||
|
||||
A **modular diagnostics, monitoring, and health-check toolkit for Linux gamers.**
|
||||
|
||||
> **Status:** 🟢 Phase 1 (MVP) in progress. Foundational decisions are settled and the
|
||||
> **sensor core (M1)** works — `snapshot` / `monitor` read NVIDIA GPU, CPU, memory, and
|
||||
> NVMe live. Crash logger (M3) and health report (M4) are next. See `docs/ROADMAP.md`.
|
||||
> **Status:** 🟢 Phase 1 (MVP) complete. The **sensor core (M1)**, **crash-capture logger
|
||||
> (M3)**, and **health report (M4)** all work — live `snapshot`/`monitor`, crash-safe `record`
|
||||
> with a post-crash report, and `report` to scan logs/SMART/driver for likely causes. A
|
||||
> desktop GUI (M10) ties them together (dashboard, recording, health). See `docs/ROADMAP.md`.
|
||||
|
||||
## Why this exists
|
||||
|
||||
@@ -25,13 +26,14 @@ See `docs/SPEC.md` §1.
|
||||
|
||||
## How you run it
|
||||
|
||||
Three front-ends over one shared engine — pick what fits:
|
||||
- **CLI / headless** — full functionality from the terminal, works over SSH.
|
||||
- **Desktop GUI** — graphical dashboard, log browser, and health-report viewer.
|
||||
- **Tray applet** — a small applet in the top menu bar with quick actions (e.g. start
|
||||
recording) and at-a-glance status.
|
||||
RigDoctor is **GUI-first** — the desktop app is the primary way in — but every feature is
|
||||
also available headless:
|
||||
- **Desktop GUI** — graphical dashboard, recording controls, log browser, reports. The
|
||||
default interface for most users.
|
||||
- **Tray applet** — a small top-menu-bar applet with quick actions and at-a-glance status.
|
||||
- **CLI** — full functionality from the terminal; works over SSH and in scripts.
|
||||
|
||||
The GUI and tray are optional modules; a headless install loses no diagnostic capability.
|
||||
The GUI/tray are optional modules; a headless (CLI-only) install loses no capability.
|
||||
|
||||
## Key decisions (settled)
|
||||
|
||||
@@ -42,7 +44,7 @@ The GUI and tray are optional modules; a headless install loses no diagnostic ca
|
||||
| Primary distro | **Ubuntu** (Debian via apt); others best-effort later |
|
||||
| Primary GPU | **NVIDIA** first; AMD, then Intel later |
|
||||
| MVP | **Sensor core + crash logger + health report** (NVIDIA-only, CLI-first) |
|
||||
| Distribution | **`.deb`** + interactive module installer |
|
||||
| Distribution | **User-local install** (self-updating from the public repo, no root); **`.deb`** optional |
|
||||
| Scope of action | **Read-only + suggestions** (no auto-apply yet) |
|
||||
| Stress tests | **Out of scope** |
|
||||
|
||||
@@ -61,6 +63,21 @@ Full rationale and the still-open questions are in `docs/DECISIONS.md`.
|
||||
| `installer/` | Installer / `.deb` packaging (empty until Phase 4) |
|
||||
| `tests/` | Tests (stdlib `unittest`) |
|
||||
|
||||
## Install (user-local, no root)
|
||||
|
||||
RigDoctor installs into a private venv under `~/.local` — no root, self-updating:
|
||||
|
||||
```bash
|
||||
./install.sh # from a source checkout or the self-extracting .run
|
||||
./install.sh --ref v0.0.6 # install a specific released tag (needs a token)
|
||||
./install.sh --uninstall # remove it
|
||||
```
|
||||
|
||||
This adds `rigdoctor` / `rigdoctor-gui` to `~/.local/bin` and a desktop entry. Each release
|
||||
also ships a one-file **`.run`** installer (download, `chmod +x`, run). Updates are gated to
|
||||
accounts on the Git server (a Personal Access Token); save one via the GUI **Setup → Update
|
||||
access** panel or `rigdoctor login`, then `rigdoctor update` (or the sidebar button).
|
||||
|
||||
## Run it (dev)
|
||||
|
||||
Stdlib-only, no install needed (target is Python ≥ 3.11; tested on 3.14):
|
||||
@@ -73,6 +90,23 @@ PYTHONPATH=src python3 -m rigdoctor sources # list detected sensor sources
|
||||
PYTHONPATH=src python3 -m unittest discover -s tests
|
||||
```
|
||||
|
||||
### Crash-capture logger (M3)
|
||||
|
||||
A crash-safe background logger (JSONL, `fsync` per sample, bounded by rotation) for catching
|
||||
the state right before a freeze:
|
||||
|
||||
```bash
|
||||
rigdoctor record start # start logging in the background
|
||||
rigdoctor record status # is it running? latest readings, sample count
|
||||
rigdoctor record stop # stop it
|
||||
rigdoctor record report # post-crash summary: peaks, events, last samples
|
||||
rigdoctor record run # run in the foreground (the systemd-ready entrypoint)
|
||||
```
|
||||
|
||||
Logs live in `~/.local/share/rigdoctor/logs/`. It detects GPU "lost"/hang (nvidia-smi query
|
||||
timeout) and writes an event marker. Trigger modes (always-on / game-launch) and the
|
||||
`systemd --user` service arrive in Phase 4.
|
||||
|
||||
### Desktop GUI (M10)
|
||||
|
||||
The GUI uses PySide6 (Qt) — the only part of RigDoctor that needs a non-stdlib dep:
|
||||
@@ -85,7 +119,8 @@ rigdoctor gui # or: rigdoctor-gui
|
||||
It opens a dark-themed window with sidebar navigation and a **live dashboard** over the
|
||||
same sensor core — circular gauges for the headline metrics plus collapsible per-subsystem
|
||||
cards (GPU/CPU/memory/storage) with temperature-colored values (icey-blue → green → red).
|
||||
The Logs / Health / Inventory sections are placeholders until M3–M5 land.
|
||||
The **Logs** and **Health** sections are full pages (recording controls + post-crash report;
|
||||
and the kernel-log / SMART / driver scan). **Inventory** is a placeholder until M5 lands.
|
||||
|
||||
Without the GUI extra, `pip install -e .` gives just the stdlib-only CLI.
|
||||
|
||||
|
||||
+42
@@ -0,0 +1,42 @@
|
||||
# git-cliff configuration — generate CHANGELOG.md from Conventional Commits (D20).
|
||||
# Run via packaging/changelog.sh.
|
||||
|
||||
[changelog]
|
||||
header = """
|
||||
# Changelog
|
||||
|
||||
All notable changes to RigDoctor are recorded here. Format follows
|
||||
[Keep a Changelog](https://keepachangelog.com/); versioning is SemVer-style
|
||||
(`MAJOR.MINOR.PATCH`, pre-1.0). `__version__` and `pyproject.toml` must match the git
|
||||
release tag (so the auto-updater, D18, can compare versions).
|
||||
"""
|
||||
body = """
|
||||
{% for group, commits in commits | group_by(attribute="group") %}
|
||||
## {{ group | upper_first }}
|
||||
{% for commit in commits %}\
|
||||
- {{ commit.message | upper_first }}\
|
||||
{% endfor %}
|
||||
{% endfor %}
|
||||
"""
|
||||
trim = true
|
||||
|
||||
[git]
|
||||
conventional_commits = true
|
||||
filter_unconventional = false
|
||||
commit_parsers = [
|
||||
{ message = "^feat", group = "Added" },
|
||||
{ message = "^fix", group = "Fixed" },
|
||||
{ message = "^docs", group = "Documentation" },
|
||||
{ message = "^perf", group = "Performance" },
|
||||
{ message = "^refactor", group = "Changed" },
|
||||
{ message = "^chore\\(release\\)", skip = true },
|
||||
{ message = "^chore|^build|^ci|^style|^test", group = "Internal" },
|
||||
{ message = ".*", group = "Other" },
|
||||
]
|
||||
tag_pattern = "v[0-9]*"
|
||||
sort_commits = "oldest"
|
||||
|
||||
[bump]
|
||||
# Pre-1.0 rules (D21): feat -> minor, fix -> patch, breaking -> minor (not major).
|
||||
features_always_bump_minor = true
|
||||
breaking_always_bump_major = false
|
||||
@@ -129,8 +129,9 @@ as a single callable so all three front-ends share one implementation.
|
||||
optionally **enable** the `systemd --user` logger service and choose its trigger mode (D6).
|
||||
5. **Verify** each installed module's `probe()` and print a readiness summary.
|
||||
|
||||
Module list/bundling is final (D14). Packaging is `.deb`-first (D8); the wizard layers
|
||||
module selection on top of the package.
|
||||
Module list/bundling is final (D14). Packaging: a **user-local install is primary**
|
||||
(self-updating from the public repo, no root — D8/D18), with an **optional `.deb`** system
|
||||
package; the wizard layers module selection on top of either.
|
||||
|
||||
## 9. GPU vendor abstraction
|
||||
| Capability | NVIDIA (first) | AMD (later) | Intel (later) |
|
||||
|
||||
+117
-11
@@ -1,8 +1,9 @@
|
||||
# RigDoctor — Decisions & Open Questions
|
||||
|
||||
Format: each item is **OPEN** (needs a call) or **DECIDED** (with date + rationale).
|
||||
Decisions D1–D15 were all settled on 2026-05-21; the original open questions are kept below
|
||||
with their resolutions so the reasoning is traceable. No tracked decisions are currently open.
|
||||
Decisions D1–D19 are settled (D1–D15 on 2026-05-21); the original open questions are kept
|
||||
below with their resolutions so the reasoning is traceable. No tracked decisions are
|
||||
currently open.
|
||||
|
||||
## Decided
|
||||
|
||||
@@ -34,9 +35,10 @@ AMD and Intel come later behind the vendor abstraction; nothing should hard-code
|
||||
way that blocks them.
|
||||
|
||||
### D5 — MVP scope — *DECIDED 2026-05-21*
|
||||
**M1 + M3 + M4 (the *Essential* bundle), NVIDIA-only**, CLI-first. This is the first build
|
||||
target — it captures the seed crash and explains the logs before any installer, GUI, tray,
|
||||
or multi-vendor work.
|
||||
**M1 + M3 + M4 (the *Essential* bundle), NVIDIA-only.** This was the first build target — it
|
||||
captures the seed crash and explains the logs before any installer, multi-vendor, etc. work.
|
||||
*(The MVP was built CLI-first; per D17 the GUI is now the primary interface going forward —
|
||||
the CLI keeps full parity.)*
|
||||
|
||||
### D6 — Crash-logger trigger model — *DECIDED 2026-05-21*
|
||||
**Let the user choose.** All three modes are supported and selectable (installer + config):
|
||||
@@ -50,10 +52,13 @@ or multi-vendor work.
|
||||
generators. Users who want to reproduce load can run existing tools (gpu-burn, vkmark,
|
||||
stress-ng) themselves alongside the logger.
|
||||
|
||||
### D8 — Distribution / packaging — *DECIDED 2026-05-21*
|
||||
**`.deb` package** as the primary distribution channel (matches the Ubuntu-first focus). The
|
||||
`.deb` declares dependencies per module group; the interactive installer (M9) handles module
|
||||
selection on top. AUR / Flatpak / COPR are possible later, not now.
|
||||
### D8 — Distribution / packaging — *DECIDED 2026-05-21; revised 2026-05-21 (see D18)*
|
||||
**Primary: a user-local install** (pipx/venv or a versioned bundle under `~/.local`, owned by
|
||||
the user) so the app can **self-update from the public Gitea releases with no root** (D18). A
|
||||
**`.deb` remains an optional** system-install channel for users who prefer it (updated via
|
||||
apt). *Why the revision:* the repo is public and we want frictionless, GUI-first self-updates,
|
||||
which a root-owned system package can't apply silently. The interactive installer (M9) layers
|
||||
module selection on top of either channel. AUR / Flatpak / COPR still later, if warranted.
|
||||
|
||||
### D9 — Scope of action (read-only vs apply-fixes) — *DECIDED 2026-05-21*
|
||||
**Read-only + suggestions.** RigDoctor diagnoses, monitors, and **suggests** actions in
|
||||
@@ -118,10 +123,111 @@ build or maintain mappings for other package managers. A thin seam is left in th
|
||||
another package manager *could* be added later, but multi-distro support is **not** a planned
|
||||
deliverable. Revisit only if Ubuntu-only proves too narrow.
|
||||
|
||||
### D16 — Session sharing / remote assist (M12) — *DECIDED 2026-05-21*
|
||||
Build a **session-sharing / remote-assist** capability (new module **M12**) so a user (A)
|
||||
can let a helper (B) inspect their machine. **Full ladder, built in order:**
|
||||
1. **Diagnostic bundle export** — `share export` packages inventory (M5) + recent capture
|
||||
log (M3) + a report into one file A sends to B; B opens it in RigDoctor. One-way, no live
|
||||
connection. Safest; build first.
|
||||
2. **Live read-only view** — a small local server serving the live dashboard + logs
|
||||
read-only, reached over a **user-chosen tunnel** (Tailscale / cloudflared / SSH reverse
|
||||
tunnel — *no RigDoctor-hosted relay*, to keep the no-telemetry promise). Token-gated,
|
||||
short TTL, A approves and can kill instantly. No terminal.
|
||||
3. **Gated interactive terminal** — wrap an existing trusted tool (`tmate`/`sshx`) rather
|
||||
than rolling our own; **read-only link by default**, read-write requires explicit
|
||||
per-session consent. This is a deliberate, consent-gated exception to the read-only stance
|
||||
(D9) — it's full machine access and must be treated as such.
|
||||
|
||||
*Cross-cutting principles:* explicit per-session consent; ephemeral, revocable tokens;
|
||||
clear permission escalation (view ≠ shell); no mandatory central relay; session audit log.
|
||||
*Note:* this adds M12 on top of the "final" list from D14; the catalog is updated accordingly.
|
||||
|
||||
### D17 — GUI-first interface emphasis — *DECIDED 2026-05-21*
|
||||
The **desktop GUI (M10) is the primary, default interface** for end users — it's the more
|
||||
user-friendly way in, and **every capability** (recording, reports, status, …) must be
|
||||
reachable from it. This **supersedes the earlier "CLI-first / terminal-first" framing**
|
||||
(updates D5 and the SPEC wording).
|
||||
- *The CLI is not removed:* it keeps **full functionality** for headless / SSH / server /
|
||||
scripting use, and it's the engine the background daemon runs on.
|
||||
- *No change to layering (D2):* the core, CLI, and daemon stay **stdlib-only** and must run
|
||||
without Qt. "GUI-first" is about emphasis and front-end parity, not dropping headless support.
|
||||
|
||||
### D18 — Auto-update (M13) — *PLANNED 2026-05-21; mechanism revised 2026-05-21*
|
||||
RigDoctor should **check for a newer version on launch and self-update** (new module **M13**).
|
||||
**Mechanism (revised): user-local, no-root self-update over authenticated HTTP (token).**
|
||||
*Why revised:* the Gitea instance requires sign-in for **all** anonymous access (repo page,
|
||||
releases feed, raw, API all 303/403 anonymously), so the original "public HTTP" plan can't
|
||||
work. Updates are therefore **gated to people with an account on the Gitea server**, which is
|
||||
desirable — access control is delegated to Gitea.
|
||||
- *Auth:* each user creates a **Personal Access Token** (scope `read:repository`); RigDoctor
|
||||
stores it at `~/.config/rigdoctor/token` (mode 0600) or reads `RIGDOCTOR_TOKEN`. Requests
|
||||
send `Authorization: token <PAT>`. Finer access = repo visibility/collaborators on Gitea.
|
||||
- *Check:* `GET /api/v1/repos/jessey/rigdoctor/releases/latest` with the token; compare tags.
|
||||
- *Apply:* `pip install --upgrade "git+https://oauth2:<token>@…/rigdoctor.git@<tag>"` into the
|
||||
user-local venv, then restart (incl. the daemon). No root.
|
||||
- *States surfaced:* no-token → "connect to update server"; auth error → "access denied";
|
||||
newer → "Update to v…"; else "up-to-date".
|
||||
- *Original (now-superseded) plan was anonymous public HTTP:*
|
||||
- *Install model (D8 revised):* primary install is **user-local** (`~/.local`), so the running
|
||||
app can replace its own files and update with **no apt, no root, no password prompt**.
|
||||
- *Check:* on launch, query the **public Gitea releases API**
|
||||
(`/api/v1/repos/jessey/rigdoctor/releases/latest`) over HTTPS; compare to the running version.
|
||||
- *Apply:* download the new release bundle, **verify checksum/signature**, stage it
|
||||
(e.g. `~/.local/share/rigdoctor/versions/x.y.z`), swap a symlink atomically, then restart
|
||||
(including the `systemd --user` daemon).
|
||||
- *GUI-first (D17):* a non-intrusive "update available" prompt + one-click apply; `rigdoctor
|
||||
update` in the CLI.
|
||||
- *Security:* HTTPS only; verify checksum/signature before swapping; never run unverified code.
|
||||
- *Privacy (no telemetry):* version-check only — no tracking; auto-check is opt-out-able.
|
||||
- *`.deb` users:* the optional `.deb` channel updates via apt instead; auto-update targets the
|
||||
user-local install.
|
||||
- *Caveat (to confirm before building):* the Gitea instance currently **requires sign-in for
|
||||
API calls** (`"Only signed in user is allowed to call APIs."`), so anonymous version checks
|
||||
need the instance/repo set to allow anonymous access — or a separate public version endpoint
|
||||
(e.g. a static file or a mirror).
|
||||
|
||||
### D19 — Versioning & changelog — *DECIDED 2026-05-21*
|
||||
**Track a version number on every change.** SemVer-style `MAJOR.MINOR.PATCH` (pre-1.0: bump
|
||||
PATCH for ordinary changes, MINOR for larger milestones). `__version__`
|
||||
(`rigdoctor/__init__.py`) and `pyproject.toml` are the single source of truth and **must match
|
||||
the git release tag** so the auto-updater (D18) can compare versions. Every change updates
|
||||
`CHANGELOG.md` — now generated from **Conventional Commits** via git-cliff (see D20).
|
||||
*Milestone policy (pre-1.0):* **0.0.x** = early development; **0.1.0** = first complete,
|
||||
installable, self-updating release (reached 2026-05-21); **0.x.0** = each later milestone
|
||||
(AMD/Intel, unattended logger auto-start, session sharing…); **1.0.0** = broadly stable
|
||||
(multi-vendor/distro, no major caveats). PATCH (`0.x.PATCH`) for fixes/small changes. *Note:* an early placeholder `0.1.0` was corrected to
|
||||
follow the released **0.0.x** line — first release was **V0.0.1**; current is **0.0.2**.
|
||||
|
||||
### D20 — Automated changelog & release notes — *DECIDED 2026-05-21*
|
||||
**Release notes are generated from our changes, surfaced in the auto-updater.**
|
||||
- *Release body:* CI sets each Gitea release's `body` from the matching `CHANGELOG.md`
|
||||
section (was a hardcoded "Automated release for…"). The updater fetches the release `body`
|
||||
and shows **"What's new"** — a dialog before applying (GUI) and in `rigdoctor update` (CLI).
|
||||
- *Generation:* adopt **Conventional Commits** (`feat:`/`fix:`/`docs:`/`chore:` …) and
|
||||
**git-cliff** (`cliff.toml`, `packaging/changelog.sh`) to generate `CHANGELOG.md` from
|
||||
commit history. Refines D19's "hand-write CHANGELOG" to "generate it from conventional
|
||||
commits"; `__version__`/`pyproject.toml`/tag still the source of truth for the version.
|
||||
- *CI does not auto-commit the changelog* (avoids push loops) — it's regenerated by the dev
|
||||
via the script when cutting a version; CI only reads the section for the release body.
|
||||
|
||||
### D21 — Versioning rules & automation — *DECIDED 2026-05-21*
|
||||
The next version is **determined by the Conventional Commit types** since the last release
|
||||
(D20), so it can be auto-computed instead of guessed:
|
||||
- `fix:` / `perf:` → bump **PATCH**.
|
||||
- `feat:` → bump **MINOR** (pre-1.0: `0.MINOR.0`).
|
||||
- breaking (`feat!:` / `BREAKING CHANGE:`) → pre-1.0: bump **MINOR** (not major); post-1.0: MAJOR.
|
||||
- `docs:` / `chore:` / `refactor:` / `ci:` / `test:` / `style:` alone → **PATCH** (no feature release).
|
||||
- Milestone overrides by hand are allowed (e.g., jumping to `1.0.0`); see the milestone policy in D19.
|
||||
|
||||
*Automation:* `git-cliff --bumped-version` computes the next version from history;
|
||||
`packaging/bump.sh` writes it into `__init__.py` + `pyproject.toml`. Rules live in
|
||||
`cliff.toml [bump]` (pre-1.0: `breaking_always_bump_major = false`).
|
||||
|
||||
## Open
|
||||
|
||||
None currently — all tracked decisions (D1–D15) are resolved. New questions will be added
|
||||
None currently — all tracked decisions (D1–D21) are resolved. New questions will be added
|
||||
here as they arise. Remaining detail to flesh out during build: the tray's supporting-action
|
||||
set (D13 proposed list) and per-module apt package names (filled in as modules land).
|
||||
set (D13), per-module apt package names, M12's tunnel/token specifics, and M13's
|
||||
update mechanism (APT repo vs. self-installed `.deb`).
|
||||
</content>
|
||||
</invoke>
|
||||
|
||||
+50
-12
@@ -2,22 +2,24 @@
|
||||
|
||||
Status: ⬜ not started · 🟦 designing · 🟨 in progress · ✅ done
|
||||
|
||||
> Final module set (D14). **M7 (stress/repro) was dropped (D7).** M10/M11 are the GUI and
|
||||
> tray modules (D10/D11). GPU scope reads "all (NVIDIA first)" — NVIDIA is implemented first,
|
||||
> others via the vendor abstraction (D4).
|
||||
> Module set per D14, plus **M12 (session sharing, D16)** and **M13 (auto-update, D18)**.
|
||||
> **M7 (stress/repro) was dropped (D7).** M10/M11 are the GUI and tray modules (D10/D11).
|
||||
> GPU scope reads "all (NVIDIA first)" — NVIDIA first, others via the vendor abstraction (D4).
|
||||
|
||||
| ID | Module | Bundle | Key deps | GPU scope | Priority | Status |
|
||||
|----|--------|--------|----------|-----------|----------|--------|
|
||||
| M1 | Sensor core | Essential | none (nvidia-smi, sysfs) | all (NVIDIA first) | P0 | ⬜ |
|
||||
| M3 | Crash-capture logger | Essential | none (opt: smartmontools) | all (NVIDIA first) | P0 | ⬜ |
|
||||
| M4 | Health report (log scan) | Essential | none (opt: smartmontools) | all (NVIDIA first) | P0 | ⬜ |
|
||||
| M3 | Crash-capture logger | Essential | none (opt: smartmontools) | all (NVIDIA first) | P0 | 🟨 |
|
||||
| M4 | Health report (log scan) | Essential | none (opt: smartmontools) | all (NVIDIA first) | P0 | 🟨 |
|
||||
| M2 | Live monitor (TUI) | Monitoring | none (stdlib curses) | all | P1 | ⬜ |
|
||||
| M8 | Alerting | Monitoring | libnotify (opt) | all | P2 | ⬜ |
|
||||
| M5 | System inventory | Diagnostics | none (opt: lm-sensors, dmidecode) | all | P1 | ⬜ |
|
||||
| M8 | Alerting | Monitoring | libnotify (opt) | all | P2 | 🟨 |
|
||||
| M5 | System inventory | Diagnostics | none (opt: lm-sensors, dmidecode) | all | P1 | 🟨 |
|
||||
| M6 | Gaming env checks | Diagnostics | none | all | P2 | ⬜ |
|
||||
| M10 | Desktop GUI | Desktop UI | **python3-pyside6** | all | P2 | 🟨 |
|
||||
| M11 | Tray / menu-bar applet | Desktop UI | **python3-pyside6** (+ AppIndicator on GNOME) | all | P2 | ⬜ |
|
||||
| M9 | Installer | (meta) | none | all | P1 | ⬜ |
|
||||
| M9 | Installer | (meta) | none | all | P1 | 🟨 |
|
||||
| M12 | Session sharing / remote assist | Sharing | none (Tier 3: tmate/sshx) | all | P3 | 🟨 |
|
||||
| M13 | Auto-update | (core) | none (stdlib; user-local file swap) | all | P3 | 🟨 |
|
||||
| ~~M7~~ | ~~Stress / repro~~ | — | — | — | — | ❌ dropped (D7) |
|
||||
|
||||
## Notes per module
|
||||
@@ -26,10 +28,17 @@ Status: ⬜ not started · 🟦 designing · 🟨 in progress · ✅ done
|
||||
- **M3 Crash-capture logger** — the highest-value piece for the seed use case. `fsync` per
|
||||
sample; GPU-lost detection via query timeout; bounded rotation; `systemd --user` service
|
||||
with a **user-selectable trigger mode** (always-on / game-launch / manual — D6).
|
||||
*Implemented (manual trigger):* JSONL log with fsync-per-sample, size-based rotation
|
||||
(`log_max_bytes`/`log_backups`), GPU-lost/recovered event markers, atomic status file, and
|
||||
`rigdoctor record run|start|stop|status|report`. The foreground `run` is the systemd-ready
|
||||
entrypoint; the service unit + always-on/game-launch triggers (D6/D12) land in Phase 4.
|
||||
Also fully driven from the GUI's Recording/Logs page (M10) via shared `core.reccontrol`.
|
||||
- **M4 Health report** — turns scattered logs into a prioritized, plain-language findings
|
||||
list with **suggested** fixes (read-only, D9). Reuses M1 for a live snapshot. Also powers
|
||||
the **guided diagnostic session** (with M3): pick a game → focused capture → scan →
|
||||
findings (see SPEC §4).
|
||||
findings (see SPEC §4). *Implemented:* journalctl scan (Xid/panic/OOM/MCE/AER/thermal/amdgpu),
|
||||
SMART, NVIDIA driver-mismatch, journald-persistence + live-temp checks; `rigdoctor report`
|
||||
(text/JSON) + GUI Health tab. GPU-firmware verification deferred.
|
||||
- **M2 Live monitor** — depends on M1; the terminal "HWMonitor for Linux" face. Stdlib-only.
|
||||
- **M5 / M6 Diagnostics** — inventory export + gaming-env checks; M6 flags risky settings and
|
||||
suggests the fix command but does not apply it (D9).
|
||||
@@ -37,20 +46,49 @@ Status: ⬜ not started · 🟦 designing · 🟨 in progress · ✅ done
|
||||
- **M10 Desktop GUI** — PySide6 graphical front-end over the core engine (dashboard, log
|
||||
browser, report viewer, logger controls). Optional; adds the Qt dependency. *Bootstrapped
|
||||
early (ahead of its Phase 4 slot) at the user's request:* dark-themed window with sidebar
|
||||
nav and a live dashboard (circular gauges + collapsible per-subsystem cards, temperature-
|
||||
colored values); Logs/Health/Inventory are placeholders until M3–M5.
|
||||
nav, a live dashboard (circular gauges + collapsible per-subsystem cards, temperature-
|
||||
colored values), and a **Recording/Logs page** with full M3 controls (start/stop/status +
|
||||
post-crash report). Health/Inventory remain placeholders until M4/M5. GUI-first per D17.
|
||||
- **M11 Tray applet** — `QSystemTrayIcon` menu-bar applet. Dropdown shows live M1 readouts
|
||||
(CPU temp, GPU temp, memory used/total, status dot) and is led by a **Run Diagnostic**
|
||||
action (the guided diagnostic session), plus Open dashboard / Start-Stop recording /
|
||||
Snapshot / Quit (D13). Optional; shares the Qt dependency with M10.
|
||||
- **M9 Installer** — interactive wizard layered on the `.deb` (D8); apt-first dependency
|
||||
resolution; enables the logger service and trigger mode.
|
||||
resolution; enables the logger service and trigger mode. *Implemented (first cut):* distro/
|
||||
package-manager/GPU detection (`core/sysenv`), an optional-component catalog (`core/catalog`),
|
||||
and dependency install via pkexec/sudo — `rigdoctor install [--check] [-y]` + GUI Setup tab.
|
||||
The **user-local app install** is `install.sh` (private venv + `~/.local/bin` launchers +
|
||||
desktop entry, no root; handles the `python3-venv` prerequisite) plus a self-extracting
|
||||
**`.run`** (pure-Python self-extractor, `packaging/make_run.py`, built by CI). *Pending:*
|
||||
config/module selection + `systemd --user`
|
||||
service enable.
|
||||
- **M12 Session sharing / remote assist** (D16) — let a helper inspect a user's machine, in
|
||||
an escalating ladder: (1) **diagnostic bundle export** (inventory + recent log + report,
|
||||
one-way), (2) **live read-only view** over a user-chosen tunnel (Tailscale/cloudflared/SSH,
|
||||
no hosted relay), (3) **gated interactive terminal** wrapping tmate/sshx (read-only by
|
||||
default; read-write only on explicit consent — a deliberate exception to D9). Per-session
|
||||
consent, ephemeral revocable tokens, audit log.
|
||||
- **M13 Auto-update** (D18) — *check + auth implemented:* updates are **gated to Gitea account
|
||||
holders** via a Personal Access Token, stored **encrypted in the OS keyring** (`secret-tool`)
|
||||
with a 0600-file fallback (`config.load_token`/`save_token`/`token_backend`). `core/updates`
|
||||
queries the releases API with the token; CLI `login`/`logout`/`update`; GUI Setup "Update
|
||||
access" panel + sidebar states. The no-root **self-update apply** is implemented:
|
||||
`rigdoctor update` runs an authenticated `pip install --upgrade "rigdoctor[gui] @
|
||||
git+https://oauth2:<token>@…@<tag>"` into the user-local venv (GUI "Update to v…" button +
|
||||
restart prompt; token scrubbed). Installed via the user-local **`install.sh`** /
|
||||
self-extracting **`.run`** (M9).
|
||||
*Original plan:* On launch, check the public Gitea releases API and
|
||||
**self-update a user-local install with no root** (download → verify checksum/signature →
|
||||
atomic symlink swap → restart, incl. the daemon). HTTPS-only, version-check-only (no
|
||||
telemetry), opt-out-able. Surfaced in the GUI; `rigdoctor update` in the CLI. (`.deb` users
|
||||
update via apt instead.)
|
||||
|
||||
## Bundles (final — D14)
|
||||
- **Essential:** M1 + M3 + M4 *(the MVP, NVIDIA-only — D5)*
|
||||
- **Monitoring:** M2 + M8
|
||||
- **Diagnostics:** M5 + M6
|
||||
- **Desktop UI:** M10 + M11 *(adds PySide6)*
|
||||
- **Sharing:** M12 *(session sharing / remote assist — D16)*
|
||||
|
||||
## MVP candidate — *confirmed (D5)*
|
||||
**M1 + M3 + M4 (Essential), NVIDIA-only, CLI-first.** Gives a working tool that captures the
|
||||
|
||||
+23
-9
@@ -11,13 +11,13 @@ Ubuntu + NVIDIA first; `.deb` distribution (see `DECISIONS.md`).
|
||||
- [x] Lock the MVP scope (M1 + M3 + M4, NVIDIA-only)
|
||||
|
||||
## Phase 1 — MVP: capture *this* crash (Essential bundle, NVIDIA-only, CLI)
|
||||
- [ ] M1 sensor core (NVIDIA via nvidia-smi + hwmon for CPU/RAM/NVMe), stdlib-only
|
||||
- [ ] M3 crash-capture logger (CSV, fsync per sample, GPU-lost detection, rotation,
|
||||
`systemd --user` service)
|
||||
- [ ] Manual trigger mode first (`rigdoctor record start/stop`); other modes in Phase 4
|
||||
- [ ] M4 health report (Xid/panic/OOM/MCE/AER/thermal scan + driver-mismatch + snapshot,
|
||||
suggested fixes only — D9)
|
||||
- [ ] `--report` post-crash summary (max temps/power, throttle events, last N samples)
|
||||
- [x] M1 sensor core (NVIDIA via nvidia-smi + hwmon for CPU/RAM/NVMe), stdlib-only
|
||||
- [x] M3 crash-capture logger (JSONL, fsync per sample, GPU-lost detection, size rotation)
|
||||
- [x] Manual trigger mode (`rigdoctor record run/start/stop/status`); `systemd --user`
|
||||
service + other trigger modes in Phase 4 (`run` is already the service entrypoint)
|
||||
- [x] M4 health report (Xid/panic/OOM/MCE/AER/thermal scan + SMART + driver-mismatch +
|
||||
journald-persistence + live temps, suggested fixes only — D9; GPU-firmware verify deferred)
|
||||
- [x] `record report` post-crash summary (peak temps/power per subsystem, events, last N samples)
|
||||
- **Exit criteria:** user can run it during gaming and, after a freeze/black-screen, see the
|
||||
last readings + a plausible cause.
|
||||
|
||||
@@ -39,16 +39,30 @@ Ubuntu + NVIDIA first; `.deb` distribution (see `DECISIONS.md`).
|
||||
- [ ] Logger trigger modes: always-on + game-launch (D12 — wrapper first:
|
||||
`rigdoctor wrap %command%` + global Steam compat-tool; zero-config watcher
|
||||
(Steam RunningAppID + /proc) and GameMode hook follow)
|
||||
- [ ] M9 interactive installer (GPU detection, module menu, apt dependency resolution,
|
||||
service enable + trigger-mode pick)
|
||||
- [~] M9 interactive installer — *done:* distro/GPU detection + optional-dependency install
|
||||
(`rigdoctor install`, GUI Setup tab); **user-local `install.sh` + self-extracting `.run`**
|
||||
(no-root venv install, handles python3-venv prereq, CI-built). *Pending:* module-selection
|
||||
config + `systemd --user` service enable + trigger-mode pick.
|
||||
- [ ] `.deb` packaging (D8) declaring per-bundle deps incl. python3-pyside6 for Desktop UI
|
||||
|
||||
## Phase 5 — Breadth (later)
|
||||
- [ ] AMD GPU support in M1 (Steam Deck / Radeon)
|
||||
- [ ] Intel GPU best-effort
|
||||
- [x] M13 auto-update (D18) — launch-time version check (GUI sidebar) + no-root self-update
|
||||
apply (`rigdoctor update` / sidebar button → authenticated pip upgrade), token-gated.
|
||||
Restart-after-update is manual for now.
|
||||
- [ ] (Later, separate milestone) Optional auto-apply of suggested fixes behind explicit
|
||||
consent — currently out of scope (D9)
|
||||
|
||||
## Phase 6 — Session sharing / remote assist (M12, D16)
|
||||
Escalating ladder, built in order:
|
||||
- [ ] Tier 1: `share export` — diagnostic bundle (inventory + recent log + report); B opens
|
||||
it in RigDoctor. One-way, safest.
|
||||
- [x] Tier 2: live read-only view — `rigdoctor share serve` (stdlib HTTP, token-gated:
|
||||
sensors + health + inventory). Remote = user-chosen tunnel; GUI controls still to add.
|
||||
- [x] Tier 3: host-consented interactive terminal — a real PTY shell shared over the relay
|
||||
(own `pty`, pyte-rendered guest), off by default; host reads along + can type (sudo).
|
||||
|
||||
> **Out of scope:** stress/repro module (D7); multi-distro support and packaging beyond
|
||||
> Ubuntu/apt + `.deb` (D15) — a thin seam is kept but not built out.
|
||||
|
||||
|
||||
+18
-5
@@ -31,8 +31,9 @@ RigDoctor's crash-safe logger is designed to fix exactly that.
|
||||
- Catch and preserve the machine's state in the seconds before a hard freeze.
|
||||
- Make hard-to-investigate gaming faults debuggable: collect scattered signals, correlate
|
||||
them, and explain them.
|
||||
- Offer **three ways to run**: full **CLI / headless** (works over SSH), a **desktop GUI**,
|
||||
and a **system-tray / top-menu-bar applet** with quick actions. (D10/D11)
|
||||
- Be **GUI-first** (D17): the **desktop GUI** is the primary interface, complemented by a
|
||||
**system-tray / top-menu-bar applet** for quick actions — backed by a **full CLI** that
|
||||
keeps complete functionality for headless / SSH / scripting use. (D10/D11/D17)
|
||||
- Be modular: a novice installs a one-click "monitor + capture + report" bundle; a power
|
||||
user installs everything including the GUI, tray, and diagnostics.
|
||||
- Low overhead; safe defaults; no telemetry/phone-home.
|
||||
@@ -135,7 +136,18 @@ rather than adding a new one.
|
||||
Interactive wizard: detect GPU vendor (NVIDIA-first) → present module menu grouped into
|
||||
bundles with descriptions and the exact packages each needs → resolve & install (apt first)
|
||||
→ write config → optionally enable the `systemd --user` logger service and pick its trigger
|
||||
mode. Delivered alongside the `.deb` (D8). Module list/bundling is final per D14.
|
||||
mode. Delivered with the user-local install (and the optional `.deb`) (D8). Module
|
||||
list/bundling is final per D14.
|
||||
|
||||
### M12 — Session sharing / remote assist (D16)
|
||||
Lets a user (A) grant a helper (B) inspection access, as an escalating, consent-driven
|
||||
ladder: (1) **diagnostic bundle export** (inventory + recent capture log + report, one-way);
|
||||
(2) **live read-only view** of the dashboard + logs over a user-chosen tunnel
|
||||
(Tailscale/cloudflared/SSH — no RigDoctor-hosted relay); (3) **gated interactive terminal**
|
||||
wrapping an existing tool (tmate/sshx), read-only by default, read-write only on explicit
|
||||
consent. Per-session consent, ephemeral revocable tokens, permission escalation (view ≠
|
||||
shell), and a session audit log. Tier 3 is a deliberate, consent-gated exception to the
|
||||
read-only stance (D9). Built in Phase 6.
|
||||
|
||||
## 5. Non-functional requirements
|
||||
- **Zero hard deps for the core/CLI/daemon** — Python stdlib + tools already present. **Qt
|
||||
@@ -144,8 +156,9 @@ mode. Delivered alongside the `.deb` (D8). Module list/bundling is final per D14
|
||||
- **Crash-safe logging** — flush + `fsync` per sample; bounded disk usage.
|
||||
- **Low overhead** — default ≤1 Hz sampling; negligible CPU/GPU cost. The always-on daemon
|
||||
is stdlib-only (no Qt loaded) so it stays tiny.
|
||||
- **Headless-equivalent** — every diagnostic capability is reachable from the CLI; the GUI
|
||||
and tray are conveniences over the same engine, never the only way to do something.
|
||||
- **GUI-first, CLI-complete** (D17) — the GUI is the primary interface, but every capability
|
||||
is *also* reachable from the CLI so RigDoctor runs fully headless (SSH/servers). Both
|
||||
front-ends sit over the same engine; neither is the only way to do something.
|
||||
- **Privacy** — local only; inventory export is opt-in and reviewable; no telemetry.
|
||||
- **Portability** — graceful degradation when a sensor/tool is unavailable (N/A, not crash).
|
||||
|
||||
|
||||
Executable
+117
@@ -0,0 +1,117 @@
|
||||
#!/usr/bin/env sh
|
||||
# RigDoctor user-local installer (no root). Creates a private venv, links the
|
||||
# `rigdoctor` / `rigdoctor-gui` commands into ~/.local/bin, and adds a desktop
|
||||
# entry. Installs from a bundled wheel (the .run installer) or from a source
|
||||
# checkout. Re-run to upgrade; `./install.sh --uninstall` to remove.
|
||||
set -eu
|
||||
|
||||
APP_NAME=rigdoctor
|
||||
DATA_HOME="${XDG_DATA_HOME:-$HOME/.local/share}"
|
||||
VENV="$DATA_HOME/$APP_NAME/venv"
|
||||
BIN_DIR="$HOME/.local/bin"
|
||||
DESKTOP_DIR="$DATA_HOME/applications"
|
||||
DESKTOP_FILE="$DESKTOP_DIR/rigdoctor.desktop"
|
||||
SCRIPT_DIR=$(CDPATH= cd -- "$(dirname -- "$0")" && pwd)
|
||||
|
||||
uninstall() {
|
||||
echo "Removing RigDoctor user-local install…"
|
||||
rm -rf "$VENV"
|
||||
rm -f "$BIN_DIR/rigdoctor" "$BIN_DIR/rigdoctor-gui" "$DESKTOP_FILE" \
|
||||
"$DATA_HOME/icons/hicolor/scalable/apps/rigdoctor.svg"
|
||||
echo "Done. (Config and logs under ~/.config/rigdoctor and ~/.local/share/rigdoctor were kept.)"
|
||||
}
|
||||
|
||||
REF=""
|
||||
while [ $# -gt 0 ]; do
|
||||
case "$1" in
|
||||
--uninstall) uninstall; exit 0 ;;
|
||||
--ref) REF="${2:-}"; [ -n "$REF" ] || { echo "--ref needs a tag"; exit 1; }; shift 2 ;;
|
||||
-h|--help) echo "Usage: install.sh [--ref <tag>] [--uninstall]"; exit 0 ;;
|
||||
*) echo "Unknown option: $1"; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
PY=python3
|
||||
command -v "$PY" >/dev/null 2>&1 || { echo "python3 not found — install Python 3.11+."; exit 1; }
|
||||
"$PY" - <<'EOF' || { echo "Python 3.11+ is required."; exit 1; }
|
||||
import sys
|
||||
sys.exit(0 if sys.version_info >= (3, 11) else 1)
|
||||
EOF
|
||||
|
||||
# venv support (ensurepip) is required; install python3-venv if it's missing.
|
||||
if ! "$PY" -c "import ensurepip" >/dev/null 2>&1; then
|
||||
PYVER=$("$PY" -c "import sys; print(f'{sys.version_info.major}.{sys.version_info.minor}')")
|
||||
PKGS="python3-venv python${PYVER}-venv"
|
||||
echo "Python venv support is missing — needs: $PKGS"
|
||||
if command -v pkexec >/dev/null 2>&1; then ESC=pkexec
|
||||
elif command -v sudo >/dev/null 2>&1; then ESC=sudo
|
||||
else ESC=""; fi
|
||||
if [ -n "$ESC" ] && command -v apt-get >/dev/null 2>&1; then
|
||||
echo "Installing $PKGS (you may be prompted for your password)…"
|
||||
"$ESC" sh -c "apt-get update && apt-get install -y $PKGS" \
|
||||
|| { echo "Failed. Install manually: sudo apt install $PKGS"; exit 1; }
|
||||
else
|
||||
echo "Install it manually, then re-run: sudo apt install $PKGS"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# Where to install from: a specific released tag (--ref), a bundled wheel, or source.
|
||||
WHEEL=$(ls "$SCRIPT_DIR"/rigdoctor-*.whl 2>/dev/null | head -n1 || true)
|
||||
if [ -n "$REF" ]; then
|
||||
CONF="${XDG_CONFIG_HOME:-$HOME/.config}/rigdoctor/token"
|
||||
TOKEN="${RIGDOCTOR_TOKEN:-$(cat "$CONF" 2>/dev/null || true)}"
|
||||
[ -n "$TOKEN" ] || { echo "--ref needs a token (run 'rigdoctor login' or set RIGDOCTOR_TOKEN)."; exit 1; }
|
||||
SRC="rigdoctor[gui] @ git+https://oauth2:$TOKEN@git.jesseyvanofferen.com/jessey/rigdoctor.git@$REF"
|
||||
elif [ -n "$WHEEL" ]; then
|
||||
SRC="$WHEEL[gui]"
|
||||
elif [ -f "$SCRIPT_DIR/pyproject.toml" ]; then
|
||||
SRC="$SCRIPT_DIR[gui]"
|
||||
else
|
||||
echo "No bundled wheel or source found next to the installer."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Creating venv at $VENV…"
|
||||
"$PY" -m venv "$VENV"
|
||||
"$VENV/bin/pip" install --upgrade pip >/dev/null
|
||||
echo "Installing RigDoctor (pulls in PySide6 — this can take a minute)…"
|
||||
"$VENV/bin/pip" install "$SRC"
|
||||
|
||||
mkdir -p "$BIN_DIR"
|
||||
ln -sf "$VENV/bin/rigdoctor" "$BIN_DIR/rigdoctor"
|
||||
ln -sf "$VENV/bin/rigdoctor-gui" "$BIN_DIR/rigdoctor-gui"
|
||||
|
||||
# Install the app icon (for the dock/launcher); fall back to a stock icon.
|
||||
ICON_NAME=utilities-system-monitor
|
||||
ICON_SRC=$("$VENV/bin/python" -c "import os, rigdoctor.gui as g; print(os.path.join(os.path.dirname(g.__file__), 'assets', 'rigdoctor.svg'))" 2>/dev/null || true)
|
||||
if [ -n "$ICON_SRC" ] && [ -f "$ICON_SRC" ]; then
|
||||
ICON_DST="$DATA_HOME/icons/hicolor/scalable/apps/rigdoctor.svg"
|
||||
mkdir -p "$(dirname "$ICON_DST")"
|
||||
cp "$ICON_SRC" "$ICON_DST"
|
||||
ICON_NAME=rigdoctor
|
||||
command -v gtk-update-icon-cache >/dev/null 2>&1 && gtk-update-icon-cache -qtf "$DATA_HOME/icons/hicolor" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
mkdir -p "$DESKTOP_DIR"
|
||||
cat > "$DESKTOP_FILE" <<EOF
|
||||
[Desktop Entry]
|
||||
Type=Application
|
||||
Name=RigDoctor
|
||||
Comment=Hardware monitoring & crash diagnostics for Linux gamers
|
||||
Exec=$VENV/bin/rigdoctor-gui
|
||||
Icon=$ICON_NAME
|
||||
Terminal=false
|
||||
Categories=System;Monitor;Utility;
|
||||
StartupWMClass=rigdoctor
|
||||
EOF
|
||||
command -v update-desktop-database >/dev/null 2>&1 && update-desktop-database "$DESKTOP_DIR" 2>/dev/null || true
|
||||
|
||||
echo
|
||||
echo "RigDoctor $("$VENV/bin/rigdoctor" --version 2>/dev/null | awk '{print $2}') installed."
|
||||
echo " GUI: rigdoctor-gui (or find 'RigDoctor' in your app menu)"
|
||||
echo " CLI: rigdoctor --help"
|
||||
case ":$PATH:" in
|
||||
*":$BIN_DIR:"*) ;;
|
||||
*) echo " Note: add $BIN_DIR to your PATH (a fresh login usually does this).";;
|
||||
esac
|
||||
Executable
+25
@@ -0,0 +1,25 @@
|
||||
#!/usr/bin/env sh
|
||||
# Auto-set the next version from Conventional Commits (git-cliff), per D21.
|
||||
# Run after committing your feat:/fix: changes; it updates __init__.py + pyproject.toml.
|
||||
# Then update CHANGELOG.md, commit as `chore(release): vX.Y.Z`, and push (CI tags + releases).
|
||||
set -eu
|
||||
|
||||
ROOT=$(CDPATH= cd -- "$(dirname -- "$0")/.." && pwd)
|
||||
cd "$ROOT"
|
||||
|
||||
command -v git-cliff >/dev/null 2>&1 || { echo "git-cliff not found. Install: pip install git-cliff"; exit 1; }
|
||||
|
||||
NEXT=$(git-cliff --bumped-version | sed 's/^v//')
|
||||
[ -n "$NEXT" ] || { echo "Could not compute the next version."; exit 1; }
|
||||
|
||||
python3 - "$NEXT" <<'PY'
|
||||
import pathlib, re, sys
|
||||
version = sys.argv[1]
|
||||
init = pathlib.Path("src/rigdoctor/__init__.py")
|
||||
init.write_text(re.sub(r'__version__ = "[^"]+"', f'__version__ = "{version}"', init.read_text()))
|
||||
proj = pathlib.Path("pyproject.toml")
|
||||
proj.write_text(re.sub(r'(?m)^version = "[^"]+"', f'version = "{version}"', proj.read_text(), count=1))
|
||||
PY
|
||||
|
||||
echo "Set version to $NEXT."
|
||||
echo "Next: add a '## [$NEXT]' CHANGELOG section, then commit as 'chore(release): v$NEXT'."
|
||||
Executable
+20
@@ -0,0 +1,20 @@
|
||||
#!/usr/bin/env sh
|
||||
# Regenerate CHANGELOG.md from Conventional Commits using git-cliff (D20).
|
||||
# Install once: pip install git-cliff (ships prebuilt binaries)
|
||||
# Usage: packaging/changelog.sh [--tag vX.Y.Z]
|
||||
set -eu
|
||||
|
||||
ROOT=$(CDPATH= cd -- "$(dirname -- "$0")/.." && pwd)
|
||||
cd "$ROOT"
|
||||
|
||||
command -v git-cliff >/dev/null 2>&1 || {
|
||||
echo "git-cliff not found. Install it: pip install git-cliff"
|
||||
exit 1
|
||||
}
|
||||
|
||||
if [ "${1:-}" = "--tag" ] && [ -n "${2:-}" ]; then
|
||||
git-cliff --tag "$2" -o CHANGELOG.md
|
||||
else
|
||||
git-cliff -o CHANGELOG.md
|
||||
fi
|
||||
echo "Wrote CHANGELOG.md"
|
||||
Executable
+3
@@ -0,0 +1,3 @@
|
||||
#!/usr/bin/env sh
|
||||
# Build the self-extracting .run installer (delegates to make_run.py — no makeself).
|
||||
exec python3 "$(CDPATH= cd -- "$(dirname -- "$0")" && pwd)/make_run.py" "$@"
|
||||
@@ -0,0 +1,62 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Build a dependency-free self-extracting .run installer (no makeself).
|
||||
|
||||
Produces dist/rigdoctor-<version>-installer.run: a POSIX shell stub with an appended
|
||||
tar.gz of the wheel + install.sh. Running it extracts to a temp dir and runs install.sh.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import tarfile
|
||||
import tomllib
|
||||
from pathlib import Path
|
||||
|
||||
ROOT = Path(__file__).resolve().parent.parent
|
||||
MARKER = "__RIGDOCTOR_ARCHIVE__"
|
||||
|
||||
STUB = f"""#!/bin/sh
|
||||
# RigDoctor self-extracting installer. Extracts the embedded archive and runs install.sh.
|
||||
set -eu
|
||||
SKIP=$(awk '/^{MARKER}$/ {{ print NR + 1; exit 0 }}' "$0")
|
||||
TMP=$(mktemp -d)
|
||||
tail -n +"$SKIP" "$0" | tar -xz -C "$TMP"
|
||||
sh "$TMP/install.sh" "$@"
|
||||
RET=$?
|
||||
rm -rf "$TMP"
|
||||
exit $RET
|
||||
{MARKER}
|
||||
"""
|
||||
|
||||
|
||||
def main() -> int:
|
||||
version = tomllib.loads((ROOT / "pyproject.toml").read_text())["project"]["version"]
|
||||
dist = ROOT / "dist"
|
||||
dist.mkdir(exist_ok=True)
|
||||
|
||||
wheel = dist / f"rigdoctor-{version}-py3-none-any.whl"
|
||||
if not wheel.exists():
|
||||
subprocess.run([sys.executable, "-m", "build", "--wheel"], cwd=ROOT, check=True)
|
||||
if not wheel.exists():
|
||||
print(f"wheel not found: {wheel}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
buf = io.BytesIO()
|
||||
with tarfile.open(fileobj=buf, mode="w:gz") as tar:
|
||||
tar.add(wheel, arcname=wheel.name)
|
||||
tar.add(ROOT / "install.sh", arcname="install.sh")
|
||||
|
||||
out = dist / f"rigdoctor-{version}-installer.run"
|
||||
with open(out, "wb") as f:
|
||||
f.write(STUB.encode())
|
||||
f.write(buf.getvalue())
|
||||
os.chmod(out, 0o755)
|
||||
print(f"Built {out}")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
+5
-2
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
||||
|
||||
[project]
|
||||
name = "rigdoctor"
|
||||
version = "0.1.0"
|
||||
version = "0.7.2"
|
||||
description = "Modular hardware monitoring & crash diagnostics for Linux gamers."
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.11"
|
||||
@@ -13,7 +13,7 @@ requires-python = ">=3.11"
|
||||
dependencies = []
|
||||
|
||||
[project.optional-dependencies]
|
||||
gui = ["PySide6"]
|
||||
gui = ["PySide6", "pyte"]
|
||||
|
||||
[project.scripts]
|
||||
rigdoctor = "rigdoctor.cli:main"
|
||||
@@ -21,3 +21,6 @@ rigdoctor-gui = "rigdoctor.gui.app:main"
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
where = ["src"]
|
||||
|
||||
[tool.setuptools.package-data]
|
||||
rigdoctor = ["gui/assets/*.svg"]
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
"""RigDoctor — modular hardware monitoring & crash diagnostics for Linux gamers."""
|
||||
|
||||
__version__ = "0.1.0"
|
||||
__version__ = "0.7.2"
|
||||
|
||||
+338
-9
@@ -4,13 +4,18 @@ from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import signal
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
from . import __version__
|
||||
from . import __version__, config
|
||||
from .config import load_config
|
||||
from .core import reccontrol
|
||||
from .core.sampler import Sampler
|
||||
from .core.sources import available_sources
|
||||
from .render import render_snapshot
|
||||
from .render import format_headline, render_snapshot, render_summary
|
||||
|
||||
|
||||
def _sampler() -> Sampler:
|
||||
@@ -64,14 +69,280 @@ def cmd_gui(args) -> int:
|
||||
return gui_main([sys.argv[0]])
|
||||
|
||||
|
||||
def cmd_record(args) -> int:
|
||||
print("`record` (M3 crash-capture logger) is not implemented yet — next on the roadmap.")
|
||||
return 2
|
||||
# --- M3 crash-capture logger ---------------------------------------------------
|
||||
|
||||
def cmd_record_run(args) -> int:
|
||||
cfg = load_config()
|
||||
interval = args.interval or cfg["interval"]
|
||||
log_path = Path(args.out) if args.out else config.LOG_FILE
|
||||
config.STATE_DIR.mkdir(parents=True, exist_ok=True)
|
||||
config.PID_FILE.write_text(str(os.getpid()))
|
||||
|
||||
from .core.recorder import Recorder
|
||||
|
||||
recorder = Recorder(
|
||||
interval=interval,
|
||||
log_path=log_path,
|
||||
max_bytes=cfg["log_max_bytes"],
|
||||
backups=cfg["log_backups"],
|
||||
status_path=config.STATUS_FILE,
|
||||
)
|
||||
|
||||
def _handle(_sig, _frame):
|
||||
recorder.stop()
|
||||
|
||||
signal.signal(signal.SIGTERM, _handle)
|
||||
signal.signal(signal.SIGINT, _handle)
|
||||
|
||||
print(f"Recording to {log_path} every {interval:g}s — stop with Ctrl-C or `rigdoctor record stop`.")
|
||||
try:
|
||||
recorder.run()
|
||||
finally:
|
||||
try:
|
||||
config.PID_FILE.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
print(f"Stopped after {recorder.samples} samples.")
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_record_start(args) -> int:
|
||||
if reccontrol.running_pid():
|
||||
print(f"Recorder already running (pid {reccontrol.running_pid()}).")
|
||||
return 0
|
||||
pid = reccontrol.start_background(args.interval, args.out)
|
||||
time.sleep(1.0) # let it come up
|
||||
if pid and reccontrol.pid_alive(pid):
|
||||
print(f"Recording started in the background (pid {pid}).")
|
||||
print(f" log: {args.out or config.LOG_FILE}")
|
||||
print(" status: rigdoctor record status · stop: rigdoctor record stop")
|
||||
return 0
|
||||
print(f"Recorder failed to start; see {config.SPAWN_LOG}")
|
||||
return 1
|
||||
|
||||
|
||||
def cmd_record_stop(args) -> int:
|
||||
pid = reccontrol.running_pid()
|
||||
if not pid:
|
||||
print("Recorder is not running.")
|
||||
return 0
|
||||
if not reccontrol.stop_background():
|
||||
print(f"Could not stop recorder (pid {pid}).")
|
||||
return 1
|
||||
for _ in range(50):
|
||||
if not reccontrol.pid_alive(pid):
|
||||
break
|
||||
time.sleep(0.1)
|
||||
print(f"Recorder stopped (pid {pid}).")
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_record_status(args) -> int:
|
||||
pid = reccontrol.running_pid()
|
||||
status = reccontrol.read_status()
|
||||
print(f"● recording (pid {pid})" if pid else "○ not recording")
|
||||
if status:
|
||||
print(f" log: {status.get('log')}")
|
||||
print(f" samples: {status.get('samples')}")
|
||||
if status.get("started"):
|
||||
print(f" started: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(status['started']))}")
|
||||
if status.get("updated"):
|
||||
print(f" updated: {time.strftime('%H:%M:%S', time.localtime(status['updated']))}")
|
||||
if status.get("gpu_lost"):
|
||||
print(" ⚠ a GPU-lost event was recorded this session")
|
||||
if status.get("latest"):
|
||||
print(f" latest: {format_headline(status['latest'])}")
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_record_report(args) -> int:
|
||||
from .core.crashlog import summarize
|
||||
|
||||
log_path = Path(args.log) if args.log else config.LOG_FILE
|
||||
summary = summarize(log_path, last_n=args.last)
|
||||
print(render_summary(summary, log_path=log_path))
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_install(args) -> int:
|
||||
from .core import installer, sysenv
|
||||
|
||||
print(f"Distro: {sysenv.distro_name()}")
|
||||
pm = sysenv.package_manager()
|
||||
print(f"Package manager: {pm or 'none (only apt is supported)'}")
|
||||
print(f"GPU: {', '.join(sysenv.gpu_vendors()) or 'unknown'}\n")
|
||||
|
||||
status = installer.component_status()
|
||||
print("Optional components:")
|
||||
for component, present in status:
|
||||
mark = "✓" if present else "✗"
|
||||
print(f" [{mark}] {component.name:<22} — {component.enables}")
|
||||
if not present:
|
||||
print(f" apt: {' '.join(component.apt)}")
|
||||
|
||||
missing = [c for c, present in status if not present]
|
||||
if not missing:
|
||||
print("\nAll optional components are installed. ✔")
|
||||
return 0
|
||||
|
||||
packages = installer.missing_packages(missing)
|
||||
print(f"\nMissing packages: {' '.join(packages)}")
|
||||
if args.check:
|
||||
return 0
|
||||
if pm != "apt":
|
||||
print(f"Automatic install needs apt. Install manually:\n sudo apt install {' '.join(packages)}")
|
||||
return 1
|
||||
if not args.yes:
|
||||
try:
|
||||
reply = input(f"\nInstall {len(packages)} package(s) now? [y/N] ").strip().lower()
|
||||
except EOFError:
|
||||
reply = "n"
|
||||
if reply not in ("y", "yes"):
|
||||
print("Aborted.")
|
||||
return 1
|
||||
|
||||
print("Installing (you may be prompted for your password)…")
|
||||
rc, out = installer.install_packages(packages)
|
||||
print(out[-2000:])
|
||||
if rc == 0:
|
||||
still = [c.name for c, present in installer.component_status() if not present]
|
||||
print("\nStill missing: " + (", ".join(still) if still else "none ✔"))
|
||||
else:
|
||||
print(f"\nInstall failed (exit {rc}).")
|
||||
return rc
|
||||
|
||||
|
||||
def cmd_login(args) -> int:
|
||||
from getpass import getpass
|
||||
|
||||
from .core import updates
|
||||
|
||||
token = args.token
|
||||
if not token:
|
||||
print(f"Create a token (scope read:repository) at: {updates.TOKEN_PAGE}")
|
||||
try:
|
||||
token = getpass("Paste token: ").strip()
|
||||
except (EOFError, KeyboardInterrupt):
|
||||
token = ""
|
||||
if not token:
|
||||
print("No token provided.")
|
||||
return 1
|
||||
config.save_token(token)
|
||||
state, tag, _notes = updates.update_state()
|
||||
if state == updates.AUTH:
|
||||
print("Token saved, but the server rejected it (check scope/permissions).")
|
||||
return 1
|
||||
if state in (updates.UP_TO_DATE, updates.AVAILABLE):
|
||||
print(f"Token saved and verified. Latest release: {tag}.")
|
||||
return 0
|
||||
print("Token saved (couldn't reach the server to verify right now).")
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_logout(args) -> int:
|
||||
config.clear_token()
|
||||
print("Update token removed.")
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_update(args) -> int:
|
||||
from .core import updates
|
||||
|
||||
state, tag, notes = updates.update_state()
|
||||
if state == updates.NO_TOKEN:
|
||||
print("No update token. Run `rigdoctor login` after creating one at:")
|
||||
print(f" {updates.TOKEN_PAGE}")
|
||||
return 1
|
||||
if state == updates.AUTH:
|
||||
print("The update server rejected your token (check scope/permissions).")
|
||||
return 1
|
||||
if state == updates.NETWORK:
|
||||
print("Couldn't reach the update server.")
|
||||
return 1
|
||||
if state == updates.UP_TO_DATE:
|
||||
print(f"Up to date (v{__version__}).")
|
||||
return 0
|
||||
# AVAILABLE
|
||||
print(f"Update available: {tag} (current v{__version__}).")
|
||||
if notes:
|
||||
print("\nWhat's new:\n" + "\n".join(" " + ln for ln in notes.splitlines()) + "\n")
|
||||
if args.check:
|
||||
return 0
|
||||
print(f"Installing {tag}…")
|
||||
rc, out = updates.apply_update(tag)
|
||||
print(out[-2000:])
|
||||
if rc == 0:
|
||||
print(f"\nUpdated to {tag}. Restart RigDoctor to use the new version.")
|
||||
return 0
|
||||
print(f"\nUpdate failed (exit {rc}).")
|
||||
return rc
|
||||
|
||||
|
||||
def cmd_uninstall(args) -> int:
|
||||
from .core import uninstall as uninstaller
|
||||
|
||||
scope = "everything (app + settings, token, and logs)" if args.purge else "the app (settings/logs kept)"
|
||||
if not args.yes:
|
||||
try:
|
||||
reply = input(f"Uninstall RigDoctor — remove {scope}? [y/N] ").strip().lower()
|
||||
except EOFError:
|
||||
reply = "n"
|
||||
if reply not in ("y", "yes"):
|
||||
print("Aborted.")
|
||||
return 1
|
||||
uninstaller.uninstall(purge=args.purge)
|
||||
print("Uninstalling… RigDoctor will be removed momentarily.")
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_share_serve(args) -> int:
|
||||
from .core import share
|
||||
|
||||
return share.serve(host=args.host, port=args.port)
|
||||
|
||||
|
||||
def cmd_collect_priv(args) -> int:
|
||||
"""Internal: emit root-only data (SMART + dmidecode) as JSON, run via pkexec at launch."""
|
||||
from dataclasses import asdict
|
||||
|
||||
from .core.health import check_smart
|
||||
from .core.inventory import _dmidecode
|
||||
|
||||
data = {"smart": [asdict(f) for f in check_smart()], "dmidecode": _dmidecode()}
|
||||
print(json.dumps(data))
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_inventory(args) -> int:
|
||||
from .core import inventory
|
||||
|
||||
sections = inventory.collect()
|
||||
if args.json:
|
||||
text = inventory.render_json(sections)
|
||||
elif args.markdown:
|
||||
text = inventory.render_markdown(sections)
|
||||
else:
|
||||
text = inventory.render_text(sections)
|
||||
if args.output:
|
||||
Path(args.output).write_text(text)
|
||||
print(f"Wrote {args.output}")
|
||||
else:
|
||||
print(text)
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_report(args) -> int:
|
||||
print("`report` (M4 health report) is not implemented yet — next on the roadmap.")
|
||||
return 2
|
||||
from dataclasses import asdict
|
||||
|
||||
from .core.health import run_health_checks
|
||||
from .render import render_health
|
||||
|
||||
findings = run_health_checks()
|
||||
if args.json:
|
||||
print(json.dumps([asdict(f) for f in findings], indent=2, ensure_ascii=False))
|
||||
else:
|
||||
print(render_health(findings))
|
||||
return 0
|
||||
|
||||
|
||||
def build_parser() -> argparse.ArgumentParser:
|
||||
@@ -92,8 +363,66 @@ def build_parser() -> argparse.ArgumentParser:
|
||||
|
||||
sub.add_parser("gui", help="launch the desktop GUI (needs PySide6)").set_defaults(func=cmd_gui)
|
||||
sub.add_parser("sources", help="list detected sensor sources").set_defaults(func=cmd_sources)
|
||||
sub.add_parser("record", help="crash-capture logger (coming soon)").set_defaults(func=cmd_record)
|
||||
sub.add_parser("report", help="health report (coming soon)").set_defaults(func=cmd_report)
|
||||
|
||||
inst = sub.add_parser("install", help="set up optional system dependencies (M9)")
|
||||
inst.add_argument("--check", action="store_true", help="report status only; install nothing")
|
||||
inst.add_argument("-y", "--yes", action="store_true", help="install without confirmation")
|
||||
inst.set_defaults(func=cmd_install)
|
||||
|
||||
login = sub.add_parser("login", help="save a Gitea token for updates (M13)")
|
||||
login.add_argument("--token", default=None, help="token (prompted if omitted)")
|
||||
login.set_defaults(func=cmd_login)
|
||||
sub.add_parser("logout", help="remove the saved update token").set_defaults(func=cmd_logout)
|
||||
|
||||
upd = sub.add_parser("update", help="check for / apply a newer version (M13)")
|
||||
upd.add_argument("--check", action="store_true", help="only report, don't apply")
|
||||
upd.set_defaults(func=cmd_update)
|
||||
|
||||
unin = sub.add_parser("uninstall", help="remove the user-local install")
|
||||
unin.add_argument("--purge", action="store_true", help="also remove settings, token, and logs")
|
||||
unin.add_argument("-y", "--yes", action="store_true", help="don't ask for confirmation")
|
||||
unin.set_defaults(func=cmd_uninstall)
|
||||
|
||||
rec = sub.add_parser("record", help="crash-capture logger (M3)")
|
||||
rec_sub = rec.add_subparsers(dest="record_cmd", required=True)
|
||||
|
||||
run_p = rec_sub.add_parser("run", help="run the capture loop in the foreground (systemd-friendly)")
|
||||
run_p.add_argument("-n", "--interval", type=float, default=None, help="sampling interval (s)")
|
||||
run_p.add_argument("-o", "--out", default=None, help="log file path")
|
||||
run_p.set_defaults(func=cmd_record_run)
|
||||
|
||||
start_p = rec_sub.add_parser("start", help="start recording in the background")
|
||||
start_p.add_argument("-n", "--interval", type=float, default=None, help="sampling interval (s)")
|
||||
start_p.add_argument("-o", "--out", default=None, help="log file path")
|
||||
start_p.set_defaults(func=cmd_record_start)
|
||||
|
||||
rec_sub.add_parser("stop", help="stop background recording").set_defaults(func=cmd_record_stop)
|
||||
rec_sub.add_parser("status", help="show recorder status").set_defaults(func=cmd_record_status)
|
||||
|
||||
report_p = rec_sub.add_parser("report", help="summarize the captured log (post-crash)")
|
||||
report_p.add_argument("--last", type=int, default=10, help="recent samples to show")
|
||||
report_p.add_argument("--log", default=None, help="path to a capture log")
|
||||
report_p.set_defaults(func=cmd_record_report)
|
||||
|
||||
rep = sub.add_parser("report", help="health report (M4): scan logs/SMART/driver for issues")
|
||||
rep.add_argument("--json", action="store_true", help="output JSON instead of text")
|
||||
rep.set_defaults(func=cmd_report)
|
||||
|
||||
cp = sub.add_parser("collect-priv", help=argparse.SUPPRESS) # internal: run via pkexec
|
||||
cp.set_defaults(func=cmd_collect_priv)
|
||||
|
||||
share_p = sub.add_parser("share", help="session sharing (M12)")
|
||||
share_sub = share_p.add_subparsers(dest="share_cmd", required=True)
|
||||
serve_p = share_sub.add_parser("serve", help="serve a read-only live view (token-gated)")
|
||||
serve_p.add_argument("--host", default="127.0.0.1", help="bind address (use 0.0.0.0 + a tunnel for remote)")
|
||||
serve_p.add_argument("--port", type=int, default=8765, help="port")
|
||||
serve_p.set_defaults(func=cmd_share_serve)
|
||||
|
||||
inv = sub.add_parser("inventory", help="system inventory (M5): export hardware/OS details")
|
||||
inv.add_argument("--json", action="store_true", help="output JSON")
|
||||
inv.add_argument("--markdown", action="store_true", help="output Markdown (for forum/bug reports)")
|
||||
inv.add_argument("-o", "--output", default=None, help="write to a file instead of stdout")
|
||||
inv.set_defaults(func=cmd_inventory)
|
||||
return p
|
||||
|
||||
|
||||
|
||||
+147
-1
@@ -3,6 +3,8 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
APP = "rigdoctor"
|
||||
@@ -19,8 +21,128 @@ STATE_DIR = _xdg("XDG_STATE_HOME", ".local/state")
|
||||
LOG_DIR = DATA_DIR / "logs"
|
||||
CONFIG_FILE = CONFIG_DIR / "config.toml"
|
||||
|
||||
# Crash-capture logger (M3)
|
||||
LOG_FILE = LOG_DIR / "capture.jsonl"
|
||||
STATUS_FILE = STATE_DIR / "recorder.json"
|
||||
PID_FILE = STATE_DIR / "recorder.pid"
|
||||
SPAWN_LOG = STATE_DIR / "recorder.out"
|
||||
|
||||
# Update access token (M13) — gates updates to Gitea account holders (D18).
|
||||
# Stored in the OS keyring (Secret Service / GNOME Keyring) via `secret-tool` when
|
||||
# available — encrypted at rest, unlocked with the login session — else a 0600 file.
|
||||
TOKEN_FILE = CONFIG_DIR / "token"
|
||||
_SECRET_ATTRS = ["application", "rigdoctor", "type", "update-token"]
|
||||
|
||||
|
||||
def _secret_tool() -> str | None:
|
||||
return shutil.which("secret-tool")
|
||||
|
||||
|
||||
def keyring_available() -> bool:
|
||||
"""True if an encrypted OS keyring (secret-tool) is usable."""
|
||||
return _secret_tool() is not None
|
||||
|
||||
|
||||
def _keyring_store(token: str) -> bool:
|
||||
tool = _secret_tool()
|
||||
if not tool:
|
||||
return False
|
||||
try:
|
||||
proc = subprocess.run(
|
||||
[tool, "store", "--label", "RigDoctor update token", *_SECRET_ATTRS],
|
||||
input=token, text=True, capture_output=True, timeout=20,
|
||||
)
|
||||
return proc.returncode == 0
|
||||
except (subprocess.SubprocessError, OSError):
|
||||
return False
|
||||
|
||||
|
||||
def _keyring_lookup() -> str | None:
|
||||
tool = _secret_tool()
|
||||
if not tool:
|
||||
return None
|
||||
try:
|
||||
proc = subprocess.run(
|
||||
[tool, "lookup", *_SECRET_ATTRS], text=True, capture_output=True, timeout=20
|
||||
)
|
||||
if proc.returncode == 0 and proc.stdout.strip():
|
||||
return proc.stdout.strip()
|
||||
except (subprocess.SubprocessError, OSError):
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
def _keyring_clear() -> None:
|
||||
tool = _secret_tool()
|
||||
if not tool:
|
||||
return
|
||||
try:
|
||||
subprocess.run([tool, "clear", *_SECRET_ATTRS], capture_output=True, timeout=20)
|
||||
except (subprocess.SubprocessError, OSError):
|
||||
pass
|
||||
|
||||
|
||||
def load_token() -> str | None:
|
||||
"""Token from $RIGDOCTOR_TOKEN, then the OS keyring, then a 0600 file."""
|
||||
env = os.environ.get("RIGDOCTOR_TOKEN")
|
||||
if env and env.strip():
|
||||
return env.strip()
|
||||
from_keyring = _keyring_lookup()
|
||||
if from_keyring:
|
||||
return from_keyring
|
||||
try:
|
||||
token = TOKEN_FILE.read_text().strip()
|
||||
return token or None
|
||||
except OSError:
|
||||
return None
|
||||
|
||||
|
||||
def save_token(token: str) -> None:
|
||||
"""Save to the OS keyring if possible (encrypted); else a 0600 file."""
|
||||
token = token.strip()
|
||||
if _keyring_store(token):
|
||||
try: # don't leave a plaintext copy once it's in the keyring
|
||||
TOKEN_FILE.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
return
|
||||
CONFIG_DIR.mkdir(parents=True, exist_ok=True)
|
||||
TOKEN_FILE.write_text(token + "\n")
|
||||
try:
|
||||
TOKEN_FILE.chmod(0o600)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def clear_token() -> None:
|
||||
_keyring_clear()
|
||||
try:
|
||||
TOKEN_FILE.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def token_backend() -> str:
|
||||
"""Where the active token lives: 'env' | 'keyring' | 'file' | 'none'."""
|
||||
env = os.environ.get("RIGDOCTOR_TOKEN")
|
||||
if env and env.strip():
|
||||
return "env"
|
||||
if _keyring_lookup() is not None:
|
||||
return "keyring"
|
||||
if TOKEN_FILE.exists():
|
||||
return "file"
|
||||
return "none"
|
||||
|
||||
DEFAULTS: dict = {
|
||||
"interval": 1.0, # sampling interval in seconds (default ≤1 Hz, low overhead — NFR)
|
||||
"interval": 1.0, # sampling interval in seconds (default ≤1 Hz — NFR)
|
||||
"log_max_bytes": 20_000_000, # rotate a log segment past this size
|
||||
"log_backups": 10, # keep this many rotated segments (bounds disk use)
|
||||
"update_check_minutes": 30, # re-check for updates this often while running (0 = off)
|
||||
"elevate_on_launch": True, # GUI asks for the password once at launch (SMART/dmidecode)
|
||||
"alerts_enabled": True, # desktop notifications on overheat / GPU-lost / new version
|
||||
"gpu_temp_alert": 90.0, # °C — alert when GPU reaches this
|
||||
"cpu_temp_alert": 95.0, # °C — alert when CPU reaches this
|
||||
"relay_url": "wss://rigdoctor.jesseyvanofferen.com", # session-sharing relay (M12)
|
||||
}
|
||||
|
||||
|
||||
@@ -36,3 +158,27 @@ def load_config() -> dict:
|
||||
except Exception:
|
||||
pass
|
||||
return cfg
|
||||
|
||||
|
||||
def _toml_value(value) -> str:
|
||||
if isinstance(value, bool):
|
||||
return "true" if value else "false"
|
||||
if isinstance(value, (int, float)):
|
||||
return repr(value)
|
||||
return '"' + str(value).replace("\\", "\\\\").replace('"', '\\"') + '"'
|
||||
|
||||
|
||||
def save_config(values: dict) -> None:
|
||||
"""Write a flat config.toml (stdlib has no TOML writer)."""
|
||||
CONFIG_DIR.mkdir(parents=True, exist_ok=True)
|
||||
lines = ["# RigDoctor config — edit in the app (Notifications) or here."]
|
||||
lines += [f"{key} = {_toml_value(value)}" for key, value in values.items()]
|
||||
CONFIG_FILE.write_text("\n".join(lines) + "\n")
|
||||
|
||||
|
||||
def update_config(**changes) -> dict:
|
||||
"""Merge changes into the current effective config and persist them."""
|
||||
cfg = load_config()
|
||||
cfg.update(changes)
|
||||
save_config(cfg)
|
||||
return cfg
|
||||
|
||||
@@ -0,0 +1,91 @@
|
||||
"""Desktop alerts (M8): notify on overheat / GPU-lost / new version via notify-send.
|
||||
|
||||
Edge-triggered: an alert fires when a condition becomes true (not every sample), and
|
||||
can fire again only after it has cleared and a cooldown has passed — so a hot GPU or a
|
||||
1-Hz sample loop doesn't spam notifications. Degrades to a no-op if notify-send is absent.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import shutil
|
||||
import subprocess
|
||||
import time
|
||||
|
||||
from .sample import Sample
|
||||
|
||||
APP_NAME = "RigDoctor"
|
||||
_ICON = "utilities-system-monitor"
|
||||
|
||||
|
||||
def available() -> bool:
|
||||
return shutil.which("notify-send") is not None
|
||||
|
||||
|
||||
def notify(title: str, message: str, urgency: str = "normal") -> bool:
|
||||
"""Send a desktop notification (best-effort). urgency: low|normal|critical."""
|
||||
if not available():
|
||||
return False
|
||||
try:
|
||||
subprocess.run(
|
||||
["notify-send", "-a", APP_NAME, "-u", urgency, "-i", _ICON, title, message],
|
||||
timeout=10,
|
||||
check=False,
|
||||
)
|
||||
return True
|
||||
except (subprocess.SubprocessError, OSError):
|
||||
return False
|
||||
|
||||
|
||||
class AlertMonitor:
|
||||
"""Evaluate samples and raise edge-triggered desktop alerts."""
|
||||
|
||||
def __init__(self, gpu_temp: float = 90.0, cpu_temp: float = 95.0, cooldown: float = 300.0):
|
||||
self.gpu_temp = gpu_temp
|
||||
self.cpu_temp = cpu_temp
|
||||
self.cooldown = cooldown
|
||||
self.enabled = True
|
||||
self._active: dict[str, bool] = {}
|
||||
self._last: dict[str, float] = {}
|
||||
|
||||
def _fire(self, key: str, title: str, message: str, urgency: str = "critical") -> None:
|
||||
if self._active.get(key):
|
||||
return # already alerting; wait until it clears
|
||||
now = time.time()
|
||||
if now - self._last.get(key, 0.0) < self.cooldown:
|
||||
return
|
||||
self._active[key] = True
|
||||
self._last[key] = now
|
||||
notify(title, message, urgency)
|
||||
|
||||
def _clear(self, key: str) -> None:
|
||||
self._active[key] = False
|
||||
|
||||
def check(self, sample: Sample) -> None:
|
||||
if not self.enabled:
|
||||
return
|
||||
gpu_t = next(
|
||||
(r.value for r in sample.readings
|
||||
if r.source == "gpu" and r.metric == "temp" and r.label == "" and r.value is not None),
|
||||
None,
|
||||
)
|
||||
if gpu_t is not None:
|
||||
if gpu_t >= self.gpu_temp:
|
||||
self._fire("gpu_temp", "GPU overheating", f"GPU at {gpu_t:.0f} °C")
|
||||
else:
|
||||
self._clear("gpu_temp")
|
||||
|
||||
cpu_temps = [r.value for r in sample.readings
|
||||
if r.source == "cpu" and r.metric == "temp" and r.value is not None]
|
||||
if cpu_temps:
|
||||
cpu_t = max(cpu_temps)
|
||||
if cpu_t >= self.cpu_temp:
|
||||
self._fire("cpu_temp", "CPU overheating", f"CPU at {cpu_t:.0f} °C")
|
||||
else:
|
||||
self._clear("cpu_temp")
|
||||
|
||||
lost = any(r.source == "gpu" and r.metric == "status" and r.label == "query-timeout"
|
||||
for r in sample.readings)
|
||||
if lost:
|
||||
self._fire("gpu_lost", "GPU not responding", "nvidia-smi query timed out — the GPU may have dropped")
|
||||
else:
|
||||
self._clear("gpu_lost")
|
||||
@@ -0,0 +1,48 @@
|
||||
"""Installable component catalog (M9): optional system tools and what they enable.
|
||||
|
||||
apt-only (D15). Core monitoring (M1/M3/M4) needs no packages — these are optional
|
||||
enrichments the installer can add. Each component is detected by a representative
|
||||
command (present == usable).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Component:
|
||||
id: str
|
||||
name: str
|
||||
bundle: str
|
||||
enables: str # capability unlocked when present
|
||||
apt: tuple[str, ...] # apt package name(s)
|
||||
command: str # command used to detect presence
|
||||
|
||||
|
||||
COMPONENTS: tuple[Component, ...] = (
|
||||
Component(
|
||||
"smartmontools", "SMART disk health", "Diagnostics",
|
||||
"Disk health (SMART) in the health report", ("smartmontools",), "smartctl",
|
||||
),
|
||||
Component(
|
||||
"lm-sensors", "lm-sensors", "Diagnostics",
|
||||
"Extra motherboard / voltage sensors", ("lm-sensors",), "sensors",
|
||||
),
|
||||
Component(
|
||||
"dmidecode", "dmidecode", "Diagnostics",
|
||||
"Motherboard / BIOS / RAM details for system inventory", ("dmidecode",), "dmidecode",
|
||||
),
|
||||
Component(
|
||||
"pciutils", "pciutils", "Diagnostics",
|
||||
"PCIe topology + GPU detection (lspci)", ("pciutils",), "lspci",
|
||||
),
|
||||
Component(
|
||||
"libnotify", "Desktop notifications", "Monitoring",
|
||||
"Desktop alert notifications", ("libnotify-bin",), "notify-send",
|
||||
),
|
||||
Component(
|
||||
"libsecret", "Encrypted token storage", "Updates",
|
||||
"Store the update token in the OS keyring, encrypted", ("libsecret-tools",), "secret-tool",
|
||||
),
|
||||
)
|
||||
@@ -0,0 +1,177 @@
|
||||
"""Crash-capture log (M3): rotating, fsync-per-sample JSONL writer + reader + summary.
|
||||
|
||||
On-disk format is JSON Lines, one record per line:
|
||||
sample : {"ts": <float>, "readings": [[source, metric, value, unit, label], ...]}
|
||||
event : {"ts": <float>, "event": <str>, "detail": <str>}
|
||||
|
||||
Every line is flushed and fsync'd, so the readings right before a hard lock survive.
|
||||
A torn final line (interrupted mid-write by a crash) is tolerated on read.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
from collections import deque
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
from .sample import Reading, Sample
|
||||
|
||||
|
||||
class CrashLogWriter:
|
||||
"""Append samples/events as JSONL, fsync per line, rotate by size."""
|
||||
|
||||
def __init__(self, path, max_bytes: int = 20_000_000, backups: int = 10) -> None:
|
||||
self.path = Path(path)
|
||||
self.max_bytes = int(max_bytes)
|
||||
self.backups = int(backups)
|
||||
self.path.parent.mkdir(parents=True, exist_ok=True)
|
||||
self._fh = open(self.path, "a", encoding="utf-8")
|
||||
|
||||
def _write(self, obj: dict) -> None:
|
||||
self._fh.write(json.dumps(obj, separators=(",", ":"), ensure_ascii=False))
|
||||
self._fh.write("\n")
|
||||
self._fh.flush()
|
||||
os.fsync(self._fh.fileno()) # survive a hard lock
|
||||
if self.max_bytes and self._fh.tell() >= self.max_bytes:
|
||||
self._rotate()
|
||||
|
||||
def write_sample(self, sample: Sample) -> None:
|
||||
rows = [[r.source, r.metric, r.value, r.unit, r.label] for r in sample.readings]
|
||||
self._write({"ts": round(sample.ts, 3), "readings": rows})
|
||||
|
||||
def write_event(self, kind: str, detail: str = "") -> None:
|
||||
self._write({"ts": round(time.time(), 3), "event": kind, "detail": detail})
|
||||
|
||||
def _rotate(self) -> None:
|
||||
# Mirror logging.handlers.RotatingFileHandler: shift base.i -> base.i+1.
|
||||
self._fh.close()
|
||||
base = str(self.path)
|
||||
for i in range(self.backups - 1, 0, -1):
|
||||
src = Path(f"{base}.{i}")
|
||||
dst = Path(f"{base}.{i + 1}")
|
||||
if src.exists():
|
||||
if dst.exists():
|
||||
dst.unlink()
|
||||
src.rename(dst)
|
||||
if self.backups > 0:
|
||||
first = Path(f"{base}.1")
|
||||
if first.exists():
|
||||
first.unlink()
|
||||
self.path.rename(first)
|
||||
self._fh = open(self.path, "a", encoding="utf-8")
|
||||
|
||||
def close(self) -> None:
|
||||
try:
|
||||
self._fh.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _segment_files(path) -> list[Path]:
|
||||
"""All log segments oldest→newest: base.N … base.1, base."""
|
||||
base = Path(path)
|
||||
numbered: list[tuple[int, Path]] = []
|
||||
for p in base.parent.glob(base.name + ".*"):
|
||||
suffix = p.name[len(base.name) + 1:]
|
||||
if suffix.isdigit():
|
||||
numbered.append((int(suffix), p))
|
||||
numbered.sort(reverse=True) # highest number = oldest
|
||||
files = [p for _, p in numbered]
|
||||
if base.exists():
|
||||
files.append(base)
|
||||
return files
|
||||
|
||||
|
||||
def iter_records(path, include_backups: bool = True):
|
||||
"""Yield parsed records oldest→newest, tolerating a torn final line."""
|
||||
files = _segment_files(path) if include_backups else [Path(path)]
|
||||
for f in files:
|
||||
try:
|
||||
with open(f, encoding="utf-8") as fh:
|
||||
for line in fh:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
try:
|
||||
yield json.loads(line)
|
||||
except ValueError:
|
||||
continue
|
||||
except OSError:
|
||||
continue
|
||||
|
||||
|
||||
def record_to_sample(rec: dict) -> Sample:
|
||||
readings = [Reading(s, m, v, u, label) for s, m, v, u, label in rec.get("readings", [])]
|
||||
return Sample(ts=rec.get("ts", 0.0), readings=readings)
|
||||
|
||||
|
||||
def headline(sample: Sample) -> dict:
|
||||
"""Extract the few at-a-glance values used by status/report displays."""
|
||||
|
||||
def find(source: str, metric: str, label: str | None = None):
|
||||
for r in sample.readings:
|
||||
if r.source == source and r.metric == metric and (label is None or r.label == label):
|
||||
return r.value
|
||||
return None
|
||||
|
||||
cpu_pkg = None
|
||||
cpu_temps = []
|
||||
for r in sample.readings:
|
||||
if r.source == "cpu" and r.metric == "temp" and r.value is not None:
|
||||
cpu_temps.append(r.value)
|
||||
low = r.label.lower()
|
||||
if cpu_pkg is None and (low.startswith("package") or "tctl" in low or "tdie" in low):
|
||||
cpu_pkg = r.value
|
||||
if cpu_pkg is None and cpu_temps:
|
||||
cpu_pkg = max(cpu_temps)
|
||||
|
||||
return {
|
||||
"gpu_temp": find("gpu", "temp", ""),
|
||||
"gpu_util": find("gpu", "util"),
|
||||
"gpu_power": find("gpu", "power"),
|
||||
"cpu_temp": cpu_pkg,
|
||||
"mem_pct": find("memory", "used_pct"),
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class Summary:
|
||||
start: float | None
|
||||
end: float | None
|
||||
samples: int
|
||||
maxima: dict # reading.key -> (value, unit, ts)
|
||||
events: list # [(ts, kind, detail), ...]
|
||||
last: list # [Sample, ...] most recent
|
||||
|
||||
|
||||
def summarize(path, last_n: int = 10) -> Summary:
|
||||
start = end = None
|
||||
count = 0
|
||||
maxima: dict = {}
|
||||
events: list = []
|
||||
recent: deque = deque(maxlen=last_n)
|
||||
|
||||
for rec in iter_records(path):
|
||||
ts = rec.get("ts")
|
||||
if "event" in rec:
|
||||
events.append((ts, rec.get("event", ""), rec.get("detail", "")))
|
||||
continue
|
||||
if "readings" not in rec:
|
||||
continue
|
||||
count += 1
|
||||
if start is None:
|
||||
start = ts
|
||||
end = ts
|
||||
sample = record_to_sample(rec)
|
||||
recent.append(sample)
|
||||
for r in sample.readings:
|
||||
if r.value is None:
|
||||
continue
|
||||
current = maxima.get(r.key)
|
||||
if current is None or r.value > current[0]:
|
||||
maxima[r.key] = (r.value, r.unit, ts)
|
||||
|
||||
return Summary(start, end, count, maxima, events, list(recent))
|
||||
@@ -0,0 +1,51 @@
|
||||
"""Session privilege elevation.
|
||||
|
||||
At GUI launch the app asks for the password once (pkexec) and collects the data that
|
||||
needs root — SMART health + dmidecode (board/BIOS/RAM) — caching it for the session so
|
||||
Health and Inventory can always show the full picture without per-action prompts.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
_privileged: dict | None = None
|
||||
|
||||
|
||||
def privileged() -> dict | None:
|
||||
"""Cached root-collected data ({"smart": [...], "dmidecode": {...}}), or None."""
|
||||
return _privileged
|
||||
|
||||
|
||||
def set_privileged(data: dict | None) -> None:
|
||||
global _privileged
|
||||
_privileged = data
|
||||
|
||||
|
||||
def available() -> bool:
|
||||
return shutil.which("pkexec") is not None and os.geteuid() != 0
|
||||
|
||||
|
||||
def _cli() -> list[str]:
|
||||
candidate = os.path.join(os.path.dirname(sys.executable), "rigdoctor")
|
||||
return [candidate] if os.path.exists(candidate) else [sys.executable, "-m", "rigdoctor"]
|
||||
|
||||
|
||||
def collect_via_pkexec(timeout: float = 120.0) -> dict | None:
|
||||
"""Run one elevated collection (single password prompt). None if unavailable/cancelled."""
|
||||
if not available():
|
||||
return None
|
||||
try:
|
||||
proc = subprocess.run(
|
||||
["pkexec", *_cli(), "collect-priv"],
|
||||
capture_output=True, text=True, timeout=timeout,
|
||||
)
|
||||
if proc.returncode == 0 and proc.stdout.strip():
|
||||
return json.loads(proc.stdout)
|
||||
except (subprocess.SubprocessError, OSError, ValueError):
|
||||
pass
|
||||
return None
|
||||
@@ -0,0 +1,255 @@
|
||||
"""Health report (M4): scan kernel logs + SMART + driver/library state into a
|
||||
prioritized, plain-language findings list with suggested fixes (read-only, D9).
|
||||
|
||||
Stdlib-only. Every check degrades gracefully — a missing tool/permission yields an
|
||||
info finding, never an exception.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
CRITICAL = "critical"
|
||||
WARNING = "warning"
|
||||
INFO = "info"
|
||||
OK = "ok"
|
||||
_ORDER = {CRITICAL: 0, WARNING: 1, INFO: 2, OK: 3}
|
||||
|
||||
|
||||
@dataclass
|
||||
class Finding:
|
||||
severity: str # critical | warning | info | ok
|
||||
category: str # GPU, Kernel, Memory, Storage, Thermal, Driver, PCIe, Logs
|
||||
title: str
|
||||
detail: str = ""
|
||||
suggestion: str = ""
|
||||
|
||||
|
||||
# --- NVIDIA Xid knowledge (the seed crash is Xid 79) --------------------------
|
||||
_XID_INFO: dict[int, tuple[str, str]] = {
|
||||
13: (WARNING, "Graphics engine exception (often an app/driver bug or unstable overclock)"),
|
||||
31: (WARNING, "GPU memory page fault (usually a driver or application bug)"),
|
||||
43: (WARNING, "GPU stopped processing a task (application error)"),
|
||||
45: (INFO, "Preemptive channel removal (often a side-effect of another error or a reboot)"),
|
||||
48: (CRITICAL, "Double-bit ECC error — VRAM hardware fault"),
|
||||
62: (CRITICAL, "Internal microcontroller halt (often follows instability)"),
|
||||
79: (CRITICAL, "GPU has fallen off the bus — hardware: power delivery, PCIe link, or thermals"),
|
||||
94: (CRITICAL, "Contained ECC error"),
|
||||
95: (CRITICAL, "Uncontained ECC error"),
|
||||
119: (CRITICAL, "GSP RPC timeout — GPU System Processor hang"),
|
||||
120: (CRITICAL, "GSP error — GPU System Processor fault"),
|
||||
}
|
||||
_XID_SUGGEST: dict[int, str] = {
|
||||
79: "Check PSU/power cables and reseat the GPU/riser; test a lower power limit "
|
||||
"(`sudo nvidia-smi -pl <watts>`) and capture a session with `rigdoctor record`.",
|
||||
48: "Persistent VRAM ECC errors mean failing memory — RMA the card if it recurs.",
|
||||
119: "GSP hangs are often driver-version specific — try a different driver branch.",
|
||||
120: "GSP errors are often driver-version specific — try a different driver branch.",
|
||||
}
|
||||
_XID_RE = re.compile(r"Xid(?:\s*\([^)]*\))?:?\s*(\d+)")
|
||||
|
||||
|
||||
def scan_journal_text(text: str) -> list[Finding]:
|
||||
"""Parse kernel-log text into findings (separated from IO so it's testable)."""
|
||||
lines = text.splitlines()
|
||||
findings: list[Finding] = []
|
||||
|
||||
xids: dict[int, int] = {}
|
||||
for line in lines:
|
||||
if "Xid" in line:
|
||||
m = _XID_RE.search(line)
|
||||
if m:
|
||||
code = int(m.group(1))
|
||||
xids[code] = xids.get(code, 0) + 1
|
||||
for code in sorted(xids):
|
||||
severity, desc = _XID_INFO.get(code, (WARNING, f"NVIDIA GPU error (Xid {code})"))
|
||||
suggest = _XID_SUGGEST.get(code, "Look up this Xid code in NVIDIA's Xid error documentation.")
|
||||
findings.append(Finding(severity, "GPU", f"NVIDIA Xid {code} ×{xids[code]}", desc, suggest))
|
||||
|
||||
oom = sum(1 for ln in lines if "Out of memory" in ln or "oom-kill" in ln or "oom_reaper" in ln)
|
||||
if oom:
|
||||
findings.append(Finding(
|
||||
WARNING, "Memory", f"Out-of-memory kills ×{oom}",
|
||||
"The kernel killed processes to reclaim RAM.",
|
||||
"Close memory-heavy apps, add zram/swap, or investigate a leak.",
|
||||
))
|
||||
|
||||
if any("Kernel panic" in ln for ln in lines):
|
||||
findings.append(Finding(
|
||||
CRITICAL, "Kernel", "Kernel panic recorded",
|
||||
"The kernel hit an unrecoverable error.",
|
||||
"Note the panic message; review recent driver/kernel updates and hardware.",
|
||||
))
|
||||
|
||||
if any("mce:" in ln or "Machine check" in ln or "Hardware Error" in ln for ln in lines):
|
||||
findings.append(Finding(
|
||||
CRITICAL, "Hardware", "Machine Check Exception (MCE)",
|
||||
"The CPU reported a hardware error.",
|
||||
"Run memtest86 for RAM, check CPU temps/voltages, and review the MCE detail.",
|
||||
))
|
||||
|
||||
if any("AER:" in ln or "PCIe Bus Error" in ln or ("pcieport" in ln and "error" in ln.lower()) for ln in lines):
|
||||
findings.append(Finding(
|
||||
WARNING, "PCIe", "PCIe bus errors (AER)",
|
||||
"Correctable/uncorrectable PCIe errors were logged.",
|
||||
"Reseat the device and check risers/cabling; AER storms can precede a GPU drop.",
|
||||
))
|
||||
|
||||
low = [ln.lower() for ln in lines]
|
||||
if any(("thermal" in ln and ("critical" in ln or "throttl" in ln)) or "temperature above threshold" in ln for ln in low):
|
||||
findings.append(Finding(
|
||||
WARNING, "Thermal", "Thermal events logged",
|
||||
"The system logged thermal throttling / critical-temperature events.",
|
||||
"Improve airflow/cooling and check fan curves; watch live temps on the dashboard.",
|
||||
))
|
||||
|
||||
if any("amdgpu" in ln and "reset" in ln for ln in low):
|
||||
findings.append(Finding(
|
||||
CRITICAL, "GPU", "AMD GPU reset (amdgpu)",
|
||||
"The AMD GPU was reset after a hang.",
|
||||
"Check power/thermals/driver; capture a session with `rigdoctor record`.",
|
||||
))
|
||||
|
||||
return findings
|
||||
|
||||
|
||||
def _journalctl(args: list[str]) -> str | None:
|
||||
if shutil.which("journalctl") is None:
|
||||
return None
|
||||
try:
|
||||
proc = subprocess.run(["journalctl", *args], capture_output=True, text=True, timeout=25)
|
||||
return proc.stdout
|
||||
except (subprocess.SubprocessError, OSError):
|
||||
return None
|
||||
|
||||
|
||||
def check_journal() -> list[Finding]:
|
||||
out = _journalctl(["-k", "--no-pager", "-o", "cat", "--since", "-7 days"])
|
||||
if out is None:
|
||||
return [Finding(
|
||||
INFO, "Logs", "Couldn't read the kernel journal",
|
||||
"journalctl is unavailable or not readable.",
|
||||
"Ensure systemd/journald is present and your user is in the 'systemd-journal' or 'adm' group.",
|
||||
)]
|
||||
findings = scan_journal_text(out)
|
||||
if not findings:
|
||||
findings.append(Finding(
|
||||
OK, "Logs", "No notable kernel errors (last 7 days)",
|
||||
"No Xid, panic, OOM, MCE, PCIe AER, or thermal events found.",
|
||||
))
|
||||
return findings
|
||||
|
||||
|
||||
def check_journal_persistence() -> list[Finding]:
|
||||
if Path("/var/log/journal").is_dir():
|
||||
return []
|
||||
return [Finding(
|
||||
WARNING, "Logs", "journald isn't persistent across reboots",
|
||||
"Crash-boot kernel logs are discarded on reboot, so a hard freeze's evidence can vanish.",
|
||||
"Enable persistent logging: `sudo mkdir -p /var/log/journal && sudo systemctl restart systemd-journald`",
|
||||
)]
|
||||
|
||||
|
||||
def check_nvidia_driver() -> list[Finding]:
|
||||
if shutil.which("nvidia-smi") is None:
|
||||
return []
|
||||
try:
|
||||
proc = subprocess.run(["nvidia-smi"], capture_output=True, text=True, timeout=10)
|
||||
except (subprocess.SubprocessError, OSError):
|
||||
return []
|
||||
if "Driver/library version mismatch" in (proc.stdout + proc.stderr):
|
||||
return [Finding(
|
||||
CRITICAL, "Driver", "NVIDIA driver/library version mismatch",
|
||||
"The loaded kernel module and the userspace NVIDIA libraries differ — GPU monitoring will fail until resolved.",
|
||||
"Reboot to load the matching module (or finish the interrupted driver update).",
|
||||
)]
|
||||
return []
|
||||
|
||||
|
||||
def _smart_devices() -> list[str]:
|
||||
try:
|
||||
proc = subprocess.run(["smartctl", "--scan"], capture_output=True, text=True, timeout=10)
|
||||
except (subprocess.SubprocessError, OSError):
|
||||
return []
|
||||
devices = []
|
||||
for line in proc.stdout.splitlines():
|
||||
line = line.strip()
|
||||
if line.startswith("/dev/"):
|
||||
devices.append(line.split()[0])
|
||||
return devices
|
||||
|
||||
|
||||
def check_smart() -> list[Finding]:
|
||||
if shutil.which("smartctl") is None:
|
||||
return [Finding(
|
||||
INFO, "Storage", "SMART not checked (smartmontools missing)",
|
||||
"Disk self-health couldn't be read.",
|
||||
"Install it for disk health checks: `sudo apt install smartmontools`",
|
||||
)]
|
||||
devices = _smart_devices()
|
||||
if not devices:
|
||||
return [Finding(
|
||||
INFO, "Storage", "SMART: couldn't enumerate drives",
|
||||
"Reading SMART usually needs root.",
|
||||
"Run: `sudo rigdoctor report`",
|
||||
)]
|
||||
findings: list[Finding] = []
|
||||
for dev in devices:
|
||||
try:
|
||||
proc = subprocess.run(["smartctl", "-H", dev], capture_output=True, text=True, timeout=15)
|
||||
except (subprocess.SubprocessError, OSError):
|
||||
continue
|
||||
combined = proc.stdout + proc.stderr
|
||||
if "Permission denied" in combined or "requires root" in combined.lower():
|
||||
findings.append(Finding(INFO, "Storage", f"SMART for {dev} needs root", "", "Run: `sudo rigdoctor report`"))
|
||||
elif "PASSED" in combined:
|
||||
findings.append(Finding(OK, "Storage", f"SMART OK: {dev}", "Overall-health self-assessment passed."))
|
||||
elif "FAILED" in combined or "FAILING_NOW" in combined:
|
||||
findings.append(Finding(CRITICAL, "Storage", f"SMART FAILED: {dev}", "The drive reports failing health.", "Back up now and replace the drive."))
|
||||
return findings
|
||||
|
||||
|
||||
def check_live_temps() -> list[Finding]:
|
||||
from .sampler import Sampler
|
||||
from .sources import available_sources
|
||||
|
||||
sample = Sampler(available_sources()).sample()
|
||||
hot = [
|
||||
(r.source, r.label or r.metric, r.value)
|
||||
for r in sample.readings
|
||||
if r.unit == "°C" and r.value is not None and r.value >= 90
|
||||
]
|
||||
if not hot:
|
||||
return []
|
||||
worst = max(hot, key=lambda x: x[2])
|
||||
detail = "; ".join(f"{s} {label} {v:.0f}°C" for s, label, v in hot)
|
||||
return [Finding(
|
||||
WARNING, "Thermal", f"High temperature right now ({worst[2]:.0f}°C)",
|
||||
detail, "Check cooling/airflow and reduce load.",
|
||||
)]
|
||||
|
||||
|
||||
def run_health_checks() -> list[Finding]:
|
||||
"""Run all checks and return findings sorted by severity (worst first).
|
||||
|
||||
SMART needs root; if the session collected it via launch elevation, use that
|
||||
instead of re-running smartctl (which would just report "needs root").
|
||||
"""
|
||||
from . import elevation
|
||||
|
||||
findings: list[Finding] = []
|
||||
findings += check_nvidia_driver()
|
||||
findings += check_journal()
|
||||
findings += check_journal_persistence()
|
||||
priv = elevation.privileged()
|
||||
if priv is not None and priv.get("smart") is not None:
|
||||
findings += [Finding(**d) for d in priv["smart"]]
|
||||
else:
|
||||
findings += check_smart()
|
||||
findings += check_live_temps()
|
||||
findings.sort(key=lambda f: _ORDER.get(f.severity, 9))
|
||||
return findings
|
||||
@@ -0,0 +1,58 @@
|
||||
"""Optional-dependency installer (M9): figure out what's missing and install it.
|
||||
|
||||
apt-only (D15). Installs run via pkexec/sudo so a normal user gets a single auth
|
||||
prompt; nothing is installed without an explicit confirmation by the caller.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import shlex
|
||||
import shutil
|
||||
import subprocess
|
||||
from collections.abc import Callable
|
||||
|
||||
from . import sysenv
|
||||
from .catalog import COMPONENTS, Component
|
||||
|
||||
|
||||
def component_status(present: Callable[[str], bool] | None = None) -> list[tuple[Component, bool]]:
|
||||
"""Pair each catalog component with whether it's installed (command present)."""
|
||||
present = present or sysenv.has_command
|
||||
return [(c, present(c.command)) for c in COMPONENTS]
|
||||
|
||||
|
||||
def missing_packages(components: list[Component]) -> list[str]:
|
||||
"""De-duplicated apt package list for the given components, order preserved."""
|
||||
packages: list[str] = []
|
||||
for component in components:
|
||||
for pkg in component.apt:
|
||||
if pkg not in packages:
|
||||
packages.append(pkg)
|
||||
return packages
|
||||
|
||||
|
||||
def apt_install_command(packages: list[str]) -> list[str]:
|
||||
"""Build an `apt-get update && install` command, elevated if we're not root."""
|
||||
inner = "apt-get update && apt-get install -y " + " ".join(shlex.quote(p) for p in packages)
|
||||
cmd = ["/bin/sh", "-c", inner]
|
||||
if os.geteuid() == 0:
|
||||
return cmd
|
||||
if shutil.which("pkexec"):
|
||||
return ["pkexec", *cmd]
|
||||
if shutil.which("sudo"):
|
||||
return ["sudo", *cmd]
|
||||
return cmd # no privilege escalation available — will likely fail, surfaced to the caller
|
||||
|
||||
|
||||
def install_packages(packages: list[str]) -> tuple[int, str]:
|
||||
"""Install the given packages. Returns (exit_code, combined_output)."""
|
||||
if not packages:
|
||||
return (0, "Nothing to install.")
|
||||
try:
|
||||
proc = subprocess.run(
|
||||
apt_install_command(packages), capture_output=True, text=True, timeout=900
|
||||
)
|
||||
return (proc.returncode, proc.stdout + proc.stderr)
|
||||
except (subprocess.SubprocessError, OSError) as exc:
|
||||
return (1, str(exc))
|
||||
@@ -0,0 +1,206 @@
|
||||
"""System inventory (M5): collect hardware/OS details, exportable to Markdown/JSON.
|
||||
|
||||
Stdlib + tools already used elsewhere (nvidia-smi, lspci, lsblk, dmidecode). Every
|
||||
probe degrades gracefully; board/BIOS/RAM-module details need dmidecode as root.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import platform
|
||||
import shutil
|
||||
import subprocess
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
from .. import __version__
|
||||
from . import sysenv
|
||||
|
||||
|
||||
@dataclass
|
||||
class Section:
|
||||
title: str
|
||||
items: list[tuple[str, str]]
|
||||
|
||||
|
||||
def _run(cmd: list[str], timeout: float = 12.0) -> str:
|
||||
try:
|
||||
proc = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout)
|
||||
if proc.returncode == 0:
|
||||
return proc.stdout
|
||||
except (subprocess.SubprocessError, OSError):
|
||||
pass
|
||||
return ""
|
||||
|
||||
|
||||
def _system() -> Section:
|
||||
u = os.uname()
|
||||
return Section("System", [
|
||||
("Distro", sysenv.distro_name()),
|
||||
("Kernel", u.release),
|
||||
("Architecture", u.machine),
|
||||
("Hostname", u.nodename),
|
||||
("Python", platform.python_version()),
|
||||
("RigDoctor", __version__),
|
||||
])
|
||||
|
||||
|
||||
def _cpu() -> Section:
|
||||
model = "?"
|
||||
threads = 0
|
||||
core_ids: set[tuple[str, str]] = set()
|
||||
phys = "0"
|
||||
try:
|
||||
for line in Path("/proc/cpuinfo").read_text().splitlines():
|
||||
if line.startswith("model name") and model == "?":
|
||||
model = line.split(":", 1)[1].strip()
|
||||
elif line.startswith("processor"):
|
||||
threads += 1
|
||||
elif line.startswith("physical id"):
|
||||
phys = line.split(":", 1)[1].strip()
|
||||
elif line.startswith("core id"):
|
||||
core_ids.add((phys, line.split(":", 1)[1].strip()))
|
||||
except OSError:
|
||||
pass
|
||||
items = [("Model", model)]
|
||||
if core_ids:
|
||||
items.append(("Cores", str(len(core_ids))))
|
||||
items.append(("Threads", str(threads or os.cpu_count() or "?")))
|
||||
return Section("CPU", items)
|
||||
|
||||
|
||||
def _firmware(dmi: dict) -> Section:
|
||||
board = dmi.get("baseboard", {})
|
||||
bios = dmi.get("bios", {})
|
||||
items: list[tuple[str, str]] = []
|
||||
if board:
|
||||
items.append(("Motherboard", f"{board.get('Manufacturer', '')} {board.get('Product Name', '')}".strip()))
|
||||
if bios:
|
||||
items.append(("BIOS", f"{bios.get('Vendor', '')} {bios.get('Version', '')}".strip()))
|
||||
if bios.get("Release Date"):
|
||||
items.append(("BIOS date", bios["Release Date"]))
|
||||
if not items:
|
||||
items = [("Motherboard / BIOS", "run with admin (dmidecode needs root)")]
|
||||
return Section("Firmware", items)
|
||||
|
||||
|
||||
def _memory(dmi: dict) -> Section:
|
||||
items: list[tuple[str, str]] = []
|
||||
try:
|
||||
for line in Path("/proc/meminfo").read_text().splitlines():
|
||||
if line.startswith("MemTotal"):
|
||||
items.append(("Total", f"{int(line.split()[1]) / 1024 / 1024:.1f} GB"))
|
||||
break
|
||||
except (OSError, ValueError, IndexError):
|
||||
pass
|
||||
modules = dmi.get("memory", [])
|
||||
if modules:
|
||||
items.append(("Modules", str(len(modules))))
|
||||
for i, m in enumerate(modules):
|
||||
desc = " · ".join(p for p in (m.get("Size"), m.get("Type"), m.get("Speed"), m.get("Part Number")) if p)
|
||||
items.append((f"Slot {i}", desc))
|
||||
elif shutil.which("dmidecode"):
|
||||
items.append(("Modules", "run with admin for module details"))
|
||||
return Section("Memory", items)
|
||||
|
||||
|
||||
def _gpu() -> Section:
|
||||
if shutil.which("nvidia-smi"):
|
||||
out = _run([
|
||||
"nvidia-smi",
|
||||
"--query-gpu=name,driver_version,vbios_version,memory.total,pcie.link.gen.max,pcie.link.width.max",
|
||||
"--format=csv,noheader",
|
||||
])
|
||||
line = out.strip().splitlines()[0] if out.strip() else ""
|
||||
if line:
|
||||
cols = [c.strip() for c in line.split(",")]
|
||||
keys = ["Name", "Driver", "VBIOS", "VRAM", "PCIe gen (max)", "PCIe width (max)"]
|
||||
return Section("GPU", list(zip(keys, cols)))
|
||||
out = _run(["lspci"])
|
||||
gpus = [ln.split(":", 2)[-1].strip() for ln in out.splitlines()
|
||||
if "VGA compatible controller" in ln or "3D controller" in ln]
|
||||
return Section("GPU", [("Device", g) for g in gpus] or [("Device", "unknown")])
|
||||
|
||||
|
||||
def _storage() -> Section:
|
||||
items: list[tuple[str, str]] = []
|
||||
# TYPE first so MODEL (which can contain spaces) is the trailing field.
|
||||
out = _run(["lsblk", "-dn", "-o", "TYPE,NAME,SIZE,MODEL"])
|
||||
for line in out.strip().splitlines():
|
||||
parts = line.split(None, 3)
|
||||
if len(parts) < 3 or parts[0] != "disk": # skip loop/zram/rom devices
|
||||
continue
|
||||
name, size = parts[1], parts[2]
|
||||
model = parts[3] if len(parts) > 3 else ""
|
||||
items.append((name, f"{model} ({size})".strip()))
|
||||
return Section("Storage", items or [("Disks", "unknown")])
|
||||
|
||||
|
||||
def _display() -> Section:
|
||||
return Section("Display", [
|
||||
("Session", os.environ.get("XDG_SESSION_TYPE", "unknown")),
|
||||
("Desktop", os.environ.get("XDG_CURRENT_DESKTOP") or os.environ.get("DESKTOP_SESSION", "unknown")),
|
||||
])
|
||||
|
||||
|
||||
def _dmidecode() -> dict:
|
||||
if not shutil.which("dmidecode"):
|
||||
return {}
|
||||
out = _run(["dmidecode", "-t", "baseboard", "-t", "bios", "-t", "memory"], timeout=15)
|
||||
if not out.strip():
|
||||
return {}
|
||||
result: dict = {"baseboard": {}, "bios": {}, "memory": []}
|
||||
for block in out.split("Handle "):
|
||||
lines = block.splitlines()
|
||||
if len(lines) < 2:
|
||||
continue
|
||||
title = lines[1].strip()
|
||||
kv: dict[str, str] = {}
|
||||
for ln in lines[2:]:
|
||||
if ln.startswith("\t") and ":" in ln:
|
||||
key, _, value = ln.strip().partition(":")
|
||||
kv[key.strip()] = value.strip()
|
||||
if title == "Base Board Information":
|
||||
result["baseboard"] = kv
|
||||
elif title == "BIOS Information":
|
||||
result["bios"] = kv
|
||||
elif title == "Memory Device" and kv.get("Size") and kv["Size"] != "No Module Installed":
|
||||
result["memory"].append(kv)
|
||||
return result
|
||||
|
||||
|
||||
def collect() -> list[Section]:
|
||||
from . import elevation
|
||||
|
||||
priv = elevation.privileged()
|
||||
dmi = priv["dmidecode"] if (priv and priv.get("dmidecode") is not None) else _dmidecode()
|
||||
return [_system(), _cpu(), _firmware(dmi), _memory(dmi), _gpu(), _storage(), _display()]
|
||||
|
||||
|
||||
def to_dict(sections: list[Section]) -> dict:
|
||||
return {s.title: dict(s.items) for s in sections}
|
||||
|
||||
|
||||
def from_dict(data: dict) -> list[Section]:
|
||||
return [Section(title, list(items.items())) for title, items in data.items()]
|
||||
|
||||
|
||||
def render_markdown(sections: list[Section]) -> str:
|
||||
out = ["# RigDoctor system inventory", ""]
|
||||
for s in sections:
|
||||
out.append(f"## {s.title}")
|
||||
out += [f"- **{k}:** {v}" for k, v in s.items]
|
||||
out.append("")
|
||||
return "\n".join(out).strip() + "\n"
|
||||
|
||||
|
||||
def render_text(sections: list[Section]) -> str:
|
||||
blocks = []
|
||||
for s in sections:
|
||||
blocks.append("\n".join([s.title] + [f" {k:<18} {v}" for k, v in s.items]))
|
||||
return "\n\n".join(blocks)
|
||||
|
||||
|
||||
def render_json(sections: list[Section]) -> str:
|
||||
return json.dumps(to_dict(sections), indent=2, ensure_ascii=False)
|
||||
@@ -0,0 +1,59 @@
|
||||
"""A pseudo-terminal running the host's shell (M12, Tier 3 — host side).
|
||||
|
||||
Spawns the user's login shell in a real PTY so interactive programs work over a shared
|
||||
session: vim, top, tab-completion, colours, Ctrl-C, and `sudo` (which prompts inside the
|
||||
PTY — the host types that password locally, so it's never sent to the guest). Runs as the
|
||||
host's own user — never elevated. Linux-only (uses `pty`/`termios`).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import fcntl
|
||||
import os
|
||||
import pty
|
||||
import signal
|
||||
import struct
|
||||
import termios
|
||||
|
||||
|
||||
class PtySession:
|
||||
def __init__(self, rows: int = 24, cols: int = 80):
|
||||
self.pid, self.master_fd = pty.fork()
|
||||
if self.pid == 0: # child: become the shell
|
||||
os.environ["TERM"] = "xterm-256color"
|
||||
shell = os.environ.get("SHELL", "/bin/bash")
|
||||
try:
|
||||
os.execvp(shell, [shell])
|
||||
finally:
|
||||
os._exit(1)
|
||||
os.set_blocking(self.master_fd, False)
|
||||
self.set_size(rows, cols)
|
||||
|
||||
def set_size(self, rows: int, cols: int) -> None:
|
||||
try:
|
||||
fcntl.ioctl(self.master_fd, termios.TIOCSWINSZ, struct.pack("HHHH", rows, cols, 0, 0))
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
def write(self, data: bytes) -> None:
|
||||
try:
|
||||
os.write(self.master_fd, data)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
def read(self, size: int = 65536) -> bytes:
|
||||
try:
|
||||
return os.read(self.master_fd, size)
|
||||
except (BlockingIOError, OSError):
|
||||
return b""
|
||||
|
||||
def close(self) -> None:
|
||||
try:
|
||||
os.close(self.master_fd)
|
||||
except OSError:
|
||||
pass
|
||||
try:
|
||||
os.kill(self.pid, signal.SIGHUP)
|
||||
os.waitpid(self.pid, os.WNOHANG)
|
||||
except (OSError, ChildProcessError, ProcessLookupError):
|
||||
pass
|
||||
@@ -0,0 +1,71 @@
|
||||
"""Background-process control for the crash-capture recorder (shared by CLI + GUI).
|
||||
|
||||
Both front-ends start/stop/inspect the same `systemd`-style detached recorder via the
|
||||
PID and status files, so behaviour is identical however you drive it.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import signal
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
from .. import config
|
||||
|
||||
|
||||
def pid_alive(pid: int) -> bool:
|
||||
try:
|
||||
os.kill(pid, 0)
|
||||
except OSError:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def running_pid() -> int | None:
|
||||
try:
|
||||
pid = int(config.PID_FILE.read_text().strip())
|
||||
except (OSError, ValueError):
|
||||
return None
|
||||
return pid if pid_alive(pid) else None
|
||||
|
||||
|
||||
def read_status() -> dict | None:
|
||||
try:
|
||||
return json.loads(config.STATUS_FILE.read_text())
|
||||
except (OSError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
def start_background(interval: float | None = None, out: str | None = None) -> int | None:
|
||||
"""Spawn a detached `record run`. Returns the child pid, or None if already running."""
|
||||
if running_pid():
|
||||
return None
|
||||
config.STATE_DIR.mkdir(parents=True, exist_ok=True)
|
||||
cmd = [sys.executable, "-m", "rigdoctor", "record", "run"]
|
||||
if interval:
|
||||
cmd += ["--interval", str(interval)]
|
||||
if out:
|
||||
cmd += ["--out", out]
|
||||
out_fh = open(config.SPAWN_LOG, "a")
|
||||
proc = subprocess.Popen(
|
||||
cmd,
|
||||
stdout=out_fh,
|
||||
stderr=subprocess.STDOUT,
|
||||
stdin=subprocess.DEVNULL,
|
||||
start_new_session=True,
|
||||
)
|
||||
return proc.pid
|
||||
|
||||
|
||||
def stop_background() -> bool:
|
||||
"""Signal the running recorder to stop. Returns False if it wasn't running."""
|
||||
pid = running_pid()
|
||||
if not pid:
|
||||
return False
|
||||
try:
|
||||
os.kill(pid, signal.SIGTERM)
|
||||
except OSError:
|
||||
return False
|
||||
return True
|
||||
@@ -0,0 +1,93 @@
|
||||
"""Crash-capture recorder (M3): the sampling loop that writes a crash-safe log.
|
||||
|
||||
Runs in the foreground (so it works as a `systemd --user` ExecStart and under
|
||||
manual `record run`). Stop it by calling stop() — typically from a SIGTERM/SIGINT
|
||||
handler installed by the CLI.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import threading
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
from .crashlog import CrashLogWriter, headline
|
||||
from .sampler import Sampler
|
||||
from .sources import available_sources
|
||||
|
||||
|
||||
class Recorder:
|
||||
def __init__(
|
||||
self,
|
||||
interval: float,
|
||||
log_path,
|
||||
max_bytes: int = 20_000_000,
|
||||
backups: int = 10,
|
||||
status_path=None,
|
||||
sampler: Sampler | None = None,
|
||||
) -> None:
|
||||
self.interval = interval
|
||||
self.sampler = sampler or Sampler(available_sources())
|
||||
self.writer = CrashLogWriter(log_path, max_bytes, backups)
|
||||
self.log_path = Path(log_path)
|
||||
self.status_path = Path(status_path) if status_path else None
|
||||
self.samples = 0
|
||||
self._stop = threading.Event()
|
||||
self._gpu_lost = False
|
||||
self._started = time.time()
|
||||
|
||||
def stop(self) -> None:
|
||||
self._stop.set()
|
||||
|
||||
def run(self) -> None:
|
||||
self.writer.write_event("session-start", f"interval={self.interval:g}s")
|
||||
self._write_status(running=True)
|
||||
try:
|
||||
while not self._stop.is_set():
|
||||
t0 = time.monotonic()
|
||||
sample = self.sampler.sample()
|
||||
self.writer.write_sample(sample)
|
||||
self.samples += 1
|
||||
self._detect_gpu_lost(sample)
|
||||
self._write_status(running=True, sample=sample)
|
||||
self._stop.wait(max(0.0, self.interval - (time.monotonic() - t0)))
|
||||
finally:
|
||||
self.writer.write_event("session-stop", f"samples={self.samples}")
|
||||
self.writer.close()
|
||||
self._write_status(running=False)
|
||||
|
||||
def _detect_gpu_lost(self, sample) -> None:
|
||||
lost = any(
|
||||
r.source == "gpu" and r.metric == "status" and r.label == "query-timeout"
|
||||
for r in sample.readings
|
||||
)
|
||||
if lost and not self._gpu_lost:
|
||||
self._gpu_lost = True
|
||||
self.writer.write_event("gpu-lost", "nvidia-smi query timed out — GPU may be hung/lost")
|
||||
elif not lost and self._gpu_lost:
|
||||
self._gpu_lost = False
|
||||
self.writer.write_event("gpu-recovered", "GPU responding again")
|
||||
|
||||
def _write_status(self, running: bool, sample=None) -> None:
|
||||
if self.status_path is None:
|
||||
return
|
||||
data = {
|
||||
"running": running,
|
||||
"pid": os.getpid(),
|
||||
"log": str(self.log_path),
|
||||
"started": self._started,
|
||||
"samples": self.samples,
|
||||
"updated": time.time(),
|
||||
"gpu_lost": self._gpu_lost,
|
||||
}
|
||||
if sample is not None:
|
||||
data["latest"] = headline(sample)
|
||||
try:
|
||||
self.status_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
tmp = self.status_path.with_suffix(self.status_path.suffix + ".tmp")
|
||||
tmp.write_text(json.dumps(data))
|
||||
tmp.replace(self.status_path) # atomic
|
||||
except OSError:
|
||||
pass
|
||||
@@ -0,0 +1,194 @@
|
||||
"""Session sharing (M12, Tier 2): a read-only live view over a local HTTP server.
|
||||
|
||||
Serves the live sensor snapshot + health report + inventory, **read-only**, gated by a
|
||||
random share token. Bind to localhost for local testing, or to all interfaces behind a
|
||||
user-chosen tunnel (Tailscale / cloudflared / SSH) for remote help. No actions, no terminal.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import secrets
|
||||
from dataclasses import asdict
|
||||
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
|
||||
from urllib.parse import parse_qs, urlparse
|
||||
|
||||
from .sampler import Sampler
|
||||
from .sources import available_sources
|
||||
|
||||
_PAGE = """<!doctype html>
|
||||
<html><head><meta charset="utf-8"><title>RigDoctor — shared</title>
|
||||
<style>
|
||||
body{background:#101216;color:#e6e8eb;font-family:system-ui,sans-serif;margin:0;padding:24px}
|
||||
h1{font-size:20px;margin:0 0 4px} h2{font-size:14px;color:#8b929c;margin:18px 0 6px}
|
||||
.card{background:#1b1f26;border:1px solid #2a2f39;border-radius:12px;padding:16px;margin:14px 0}
|
||||
table{width:100%;border-collapse:collapse} td{padding:3px 0;font-size:14px}
|
||||
td.v{text-align:right;font-weight:600} .muted{color:#8b929c}
|
||||
.critical{color:#f87171} .warning{color:#fb923c} .ok{color:#4ade80} .info{color:#8b929c}
|
||||
.badge{display:inline-block;background:#38bdf8;color:#06222e;border-radius:6px;padding:1px 8px;font-size:12px;font-weight:700}
|
||||
</style></head><body>
|
||||
<h1>RigDoctor <span class="badge">read-only share</span></h1>
|
||||
<p class="muted">A live view shared by the machine's owner. You can look, not change anything.</p>
|
||||
<div class="card"><div id="live">loading…</div></div>
|
||||
<div class="card"><h2 style="margin-top:0">Health</h2><div id="health">loading…</div></div>
|
||||
<div class="card"><h2 style="margin-top:0">Inventory</h2><div id="inv">loading…</div></div>
|
||||
<script>
|
||||
const T=new URLSearchParams(location.search).get('t');
|
||||
const j=async p=>(await fetch(p+'?t='+encodeURIComponent(T))).json();
|
||||
const fmt=(v,u)=>v==null?'N/A':(u==='\\u00b0C'?(+v).toFixed(1)+' °C':(u?v+' '+u:v));
|
||||
async function live(){try{const d=await j('/api/snapshot');let h='';
|
||||
for(const[g,items]of Object.entries(d.groups)){h+='<h2>'+g.toUpperCase()+'</h2><table>';
|
||||
for(const it of items)h+='<tr><td class="muted">'+it.name+'</td><td class="v">'+fmt(it.value,it.unit)+'</td></tr>';
|
||||
h+='</table>';}document.getElementById('live').innerHTML=h;}catch(e){}}
|
||||
async function once(){try{const r=await j('/api/report');
|
||||
document.getElementById('health').innerHTML=r.map(f=>'<div><span class="'+f.severity+'">['+f.severity.toUpperCase()+']</span> '+f.category+': '+f.title+'</div>').join('')||'no findings';}catch(e){}
|
||||
try{const inv=await j('/api/inventory');let h='';
|
||||
for(const[s,kv]of Object.entries(inv)){h+='<h2>'+s+'</h2><table>';
|
||||
for(const[k,v]of Object.entries(kv))h+='<tr><td class="muted">'+k+'</td><td class="v">'+v+'</td></tr>';
|
||||
h+='</table>';}document.getElementById('inv').innerHTML=h;}catch(e){}}
|
||||
live();once();setInterval(live,2000);
|
||||
</script></body></html>"""
|
||||
|
||||
|
||||
def _snapshot(sampler: Sampler) -> dict:
|
||||
sample = sampler.sample()
|
||||
groups: dict[str, list] = {}
|
||||
for r in sample.readings:
|
||||
if r.metric == "name":
|
||||
item = {"name": "device", "value": r.label, "unit": ""}
|
||||
else:
|
||||
item = {"name": (r.label + " " + r.metric).strip() if r.label else r.metric,
|
||||
"value": r.value, "unit": r.unit}
|
||||
groups.setdefault(r.source, []).append(item)
|
||||
return {"ts": sample.ts, "groups": groups}
|
||||
|
||||
|
||||
def _report() -> list:
|
||||
from .health import run_health_checks
|
||||
return [asdict(f) for f in run_health_checks()]
|
||||
|
||||
|
||||
def _inventory() -> dict:
|
||||
from .inventory import collect, to_dict
|
||||
return to_dict(collect())
|
||||
|
||||
|
||||
# --- Relay (M12) frames: a host streams these; a guest renders them. -----------------
|
||||
|
||||
def host_full_frame(sampler: Sampler) -> str:
|
||||
"""Initial frame: live snapshot + health report + inventory."""
|
||||
return json.dumps({"type": "full", "snapshot": _snapshot(sampler),
|
||||
"report": _report(), "inventory": _inventory()})
|
||||
|
||||
|
||||
def host_snapshot_frame(sampler: Sampler) -> str:
|
||||
"""Recurring frame: just the live snapshot."""
|
||||
return json.dumps({"type": "snapshot", "snapshot": _snapshot(sampler)})
|
||||
|
||||
|
||||
def _fmt(value, unit: str) -> str:
|
||||
if value is None:
|
||||
return "N/A"
|
||||
if unit == "°C":
|
||||
try:
|
||||
return f"{float(value):.1f} °C"
|
||||
except (TypeError, ValueError):
|
||||
return str(value)
|
||||
return f"{value} {unit}".strip()
|
||||
|
||||
|
||||
def guest_html(snapshot: dict | None, report: list | None, inventory: dict | None) -> str:
|
||||
"""Render a received frame as read-only dark HTML for the guest's view."""
|
||||
import html as _html
|
||||
|
||||
def esc(x) -> str:
|
||||
return _html.escape(str(x))
|
||||
|
||||
out = ['<div style="font-family:sans-serif;color:#e6e8eb">']
|
||||
if snapshot:
|
||||
for group, items in snapshot.get("groups", {}).items():
|
||||
out.append(f'<h3 style="color:#8b929c">{esc(group).upper()}</h3><table width="100%">')
|
||||
for it in items:
|
||||
out.append(f'<tr><td style="color:#8b929c">{esc(it.get("name"))}</td>'
|
||||
f'<td align="right"><b>{esc(_fmt(it.get("value"), it.get("unit", "")))}</b></td></tr>')
|
||||
out.append("</table>")
|
||||
if report:
|
||||
out.append('<h3 style="color:#8b929c">HEALTH</h3>')
|
||||
colors = {"critical": "#f87171", "warning": "#fb923c", "ok": "#4ade80"}
|
||||
for f in report:
|
||||
sev = f.get("severity", "info")
|
||||
out.append(f'<div><span style="color:{colors.get(sev, "#8b929c")}">[{esc(sev).upper()}]</span> '
|
||||
f'{esc(f.get("category"))}: {esc(f.get("title"))}</div>')
|
||||
if inventory:
|
||||
out.append('<h3 style="color:#8b929c">INVENTORY</h3>')
|
||||
for section, kv in inventory.items():
|
||||
out.append(f'<h4 style="margin:6px 0;color:#8b929c">{esc(section)}</h4><table width="100%">')
|
||||
for k, v in kv.items():
|
||||
out.append(f'<tr><td style="color:#8b929c">{esc(k)}</td><td align="right"><b>{esc(v)}</b></td></tr>')
|
||||
out.append("</table>")
|
||||
out.append("</div>")
|
||||
return "".join(out)
|
||||
|
||||
|
||||
class _Handler(BaseHTTPRequestHandler):
|
||||
def log_message(self, *args): # quiet
|
||||
pass
|
||||
|
||||
def _authed(self, query: dict) -> bool:
|
||||
return secrets.compare_digest(query.get("t", [""])[0], self.server.token)
|
||||
|
||||
def _send(self, code: int, ctype: str, body: bytes) -> None:
|
||||
self.send_response(code)
|
||||
self.send_header("Content-Type", ctype)
|
||||
self.send_header("Content-Length", str(len(body)))
|
||||
self.end_headers()
|
||||
self.wfile.write(body)
|
||||
|
||||
def do_GET(self) -> None: # noqa: N802
|
||||
parsed = urlparse(self.path)
|
||||
if not self._authed(parse_qs(parsed.query)):
|
||||
self._send(403, "text/plain", b"Forbidden: missing or invalid share token")
|
||||
return
|
||||
if parsed.path == "/":
|
||||
self._send(200, "text/html; charset=utf-8", _PAGE.encode())
|
||||
elif parsed.path == "/api/snapshot":
|
||||
self._send(200, "application/json", json.dumps(_snapshot(self.server.sampler)).encode())
|
||||
elif parsed.path == "/api/report":
|
||||
self._send(200, "application/json", json.dumps(_report()).encode())
|
||||
elif parsed.path == "/api/inventory":
|
||||
self._send(200, "application/json", json.dumps(_inventory()).encode())
|
||||
else:
|
||||
self._send(404, "text/plain", b"Not found")
|
||||
|
||||
|
||||
class _Server(ThreadingHTTPServer):
|
||||
daemon_threads = True
|
||||
|
||||
def __init__(self, addr, token: str):
|
||||
super().__init__(addr, _Handler)
|
||||
self.token = token
|
||||
self.sampler = Sampler(available_sources())
|
||||
|
||||
|
||||
def make_server(host: str = "127.0.0.1", port: int = 0, token: str | None = None) -> tuple[_Server, str]:
|
||||
token = token or secrets.token_urlsafe(16)
|
||||
return _Server((host, port), token), token
|
||||
|
||||
|
||||
def serve(host: str = "127.0.0.1", port: int = 8765) -> int:
|
||||
srv, token = make_server(host, port)
|
||||
url = f"http://{host}:{srv.server_address[1]}/?t={token}"
|
||||
print(
|
||||
f"Sharing a read-only live view at:\n {url}\n\n"
|
||||
"Anyone with this URL (and network access to this host) can VIEW your sensors,\n"
|
||||
"health report, and inventory — read-only. For remote help, expose it via a tunnel\n"
|
||||
"(Tailscale / cloudflared / `ssh -R`). Press Ctrl-C to stop sharing.",
|
||||
flush=True,
|
||||
)
|
||||
try:
|
||||
srv.serve_forever()
|
||||
except KeyboardInterrupt:
|
||||
print("\nStopped sharing.")
|
||||
finally:
|
||||
srv.shutdown()
|
||||
return 0
|
||||
@@ -0,0 +1,49 @@
|
||||
"""Environment detection for the installer (M9)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import shutil
|
||||
import subprocess
|
||||
|
||||
|
||||
def package_manager() -> str | None:
|
||||
"""Only apt is supported (D15); return 'apt' if present, else None."""
|
||||
if shutil.which("apt-get") or shutil.which("apt"):
|
||||
return "apt"
|
||||
return None
|
||||
|
||||
|
||||
def has_command(cmd: str) -> bool:
|
||||
return shutil.which(cmd) is not None
|
||||
|
||||
|
||||
def distro_name() -> str:
|
||||
try:
|
||||
data: dict[str, str] = {}
|
||||
with open("/etc/os-release") as f:
|
||||
for line in f:
|
||||
key, _, value = line.partition("=")
|
||||
data[key.strip()] = value.strip().strip('"')
|
||||
return data.get("PRETTY_NAME") or data.get("NAME") or "Linux"
|
||||
except OSError:
|
||||
return "Linux"
|
||||
|
||||
|
||||
def gpu_vendors() -> list[str]:
|
||||
vendors: list[str] = []
|
||||
if shutil.which("nvidia-smi"):
|
||||
vendors.append("NVIDIA")
|
||||
out = ""
|
||||
if shutil.which("lspci"):
|
||||
try:
|
||||
out = subprocess.run(["lspci"], capture_output=True, text=True, timeout=10).stdout
|
||||
except (subprocess.SubprocessError, OSError):
|
||||
out = ""
|
||||
low = out.lower()
|
||||
if "nvidia" in low and "NVIDIA" not in vendors:
|
||||
vendors.append("NVIDIA")
|
||||
if ("amd/ati" in low or "advanced micro devices" in low or "radeon" in low) and "AMD" not in vendors:
|
||||
vendors.append("AMD")
|
||||
if "intel" in low and any(k in low for k in ("vga", "display", "graphics")) and "Intel" not in vendors:
|
||||
vendors.append("Intel")
|
||||
return vendors
|
||||
@@ -0,0 +1,42 @@
|
||||
"""Uninstall the user-local RigDoctor install (app files; optionally all data).
|
||||
|
||||
Mirrors `install.sh --uninstall`. The removal runs in a detached shell so it can
|
||||
delete the venv the current process is running from once we exit.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import shlex
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
from .. import config
|
||||
from . import reccontrol
|
||||
|
||||
|
||||
def targets(purge: bool = False) -> list[Path]:
|
||||
"""Paths removed by an uninstall. With purge, also config/state/logs."""
|
||||
home = Path.home()
|
||||
share = config.DATA_DIR.parent # ~/.local/share
|
||||
items = [
|
||||
config.DATA_DIR / "venv",
|
||||
home / ".local" / "bin" / "rigdoctor",
|
||||
home / ".local" / "bin" / "rigdoctor-gui",
|
||||
share / "applications" / "rigdoctor.desktop",
|
||||
share / "icons" / "hicolor" / "scalable" / "apps" / "rigdoctor.svg",
|
||||
]
|
||||
if purge:
|
||||
items += [config.CONFIG_DIR, config.STATE_DIR, config.DATA_DIR]
|
||||
return items
|
||||
|
||||
|
||||
def uninstall(purge: bool = False) -> None:
|
||||
"""Stop the recorder, clear the token if purging, and remove the install."""
|
||||
reccontrol.stop_background()
|
||||
if purge:
|
||||
config.clear_token() # removes keyring entry + any file fallback
|
||||
paths = " ".join(shlex.quote(str(p)) for p in targets(purge))
|
||||
subprocess.Popen(
|
||||
["/bin/sh", "-c", f"sleep 1; rm -rf {paths}"],
|
||||
start_new_session=True,
|
||||
)
|
||||
@@ -0,0 +1,119 @@
|
||||
"""Update check (M13): ask the Gitea releases API for the latest version + notes.
|
||||
|
||||
Stdlib-only (urllib). The Gitea instance requires sign-in, so updates are gated to
|
||||
account holders via a Personal Access Token (D18): set $RIGDOCTOR_TOKEN or save one
|
||||
with `rigdoctor login`. Returns the latest tag, its release notes (body), and a clear
|
||||
state for the UI; `apply_update` performs the no-root self-update.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
|
||||
from .. import __version__
|
||||
from ..config import load_token
|
||||
|
||||
GITEA_BASE = "https://git.jesseyvanofferen.com"
|
||||
REPO = "jessey/rigdoctor"
|
||||
LATEST_API = f"{GITEA_BASE}/api/v1/repos/{REPO}/releases/latest"
|
||||
RELEASES_PAGE = f"{GITEA_BASE}/{REPO}/releases"
|
||||
TOKEN_PAGE = f"{GITEA_BASE}/user/settings/applications"
|
||||
|
||||
# Update states
|
||||
NO_TOKEN = "no-token"
|
||||
AUTH = "auth"
|
||||
NETWORK = "network"
|
||||
UP_TO_DATE = "up-to-date"
|
||||
AVAILABLE = "available"
|
||||
|
||||
|
||||
def _parse(version: str) -> tuple[int, ...]:
|
||||
return tuple(int(p) for p in version.lstrip("vV").split(".") if p.isdigit())
|
||||
|
||||
|
||||
def is_newer(latest: str, current: str = __version__) -> bool:
|
||||
try:
|
||||
return _parse(latest) > _parse(current)
|
||||
except (ValueError, AttributeError):
|
||||
return False
|
||||
|
||||
|
||||
def fetch_latest(timeout: float = 5.0) -> tuple[str | None, str, str | None]:
|
||||
"""Return (tag, notes, error). error is NO_TOKEN/AUTH/NETWORK, or None on success."""
|
||||
token = load_token()
|
||||
if not token:
|
||||
return (None, "", NO_TOKEN)
|
||||
req = urllib.request.Request(
|
||||
LATEST_API,
|
||||
headers={"Accept": "application/json", "Authorization": f"token {token}"},
|
||||
)
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=timeout) as resp: # noqa: S310 (https)
|
||||
data = json.load(resp)
|
||||
return (data.get("tag_name") or None, (data.get("body") or "").strip(), None)
|
||||
except urllib.error.HTTPError as exc:
|
||||
return (None, "", AUTH if exc.code in (401, 403) else NETWORK)
|
||||
except Exception:
|
||||
return (None, "", NETWORK)
|
||||
|
||||
|
||||
def check_latest(timeout: float = 5.0) -> str | None:
|
||||
"""Convenience: latest tag or None (ignores notes/error)."""
|
||||
tag, _notes, _error = fetch_latest(timeout)
|
||||
return tag
|
||||
|
||||
|
||||
def update_state(timeout: float = 5.0) -> tuple[str, str | None, str]:
|
||||
"""Return (state, tag, notes). state in NO_TOKEN/AUTH/NETWORK/UP_TO_DATE/AVAILABLE."""
|
||||
tag, notes, error = fetch_latest(timeout)
|
||||
if error:
|
||||
return (error, None, "")
|
||||
if tag and is_newer(tag):
|
||||
return (AVAILABLE, tag, notes)
|
||||
return (UP_TO_DATE, tag, notes)
|
||||
|
||||
|
||||
def list_releases(limit: int = 15, timeout: float = 6.0) -> tuple[list[tuple[str, str, str]], str | None]:
|
||||
"""Return ([(tag, date, notes), …], error) for the in-app changelog."""
|
||||
token = load_token()
|
||||
if not token:
|
||||
return ([], NO_TOKEN)
|
||||
req = urllib.request.Request(
|
||||
f"{GITEA_BASE}/api/v1/repos/{REPO}/releases?limit={limit}",
|
||||
headers={"Accept": "application/json", "Authorization": f"token {token}"},
|
||||
)
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=timeout) as resp: # noqa: S310 (https)
|
||||
data = json.load(resp)
|
||||
return ([
|
||||
(r.get("tag_name") or "?", (r.get("published_at") or "")[:10], (r.get("body") or "").strip())
|
||||
for r in data
|
||||
], None)
|
||||
except urllib.error.HTTPError as exc:
|
||||
return ([], AUTH if exc.code in (401, 403) else NETWORK)
|
||||
except Exception:
|
||||
return ([], NETWORK)
|
||||
|
||||
|
||||
def apply_update(tag: str) -> tuple[int, str]:
|
||||
"""Self-update the current (user-local) install to `tag` via authenticated pip.
|
||||
|
||||
Installs `rigdoctor[gui] @ git+https://oauth2:<token>@…/rigdoctor.git@<tag>` into
|
||||
the running environment. Returns (exit_code, output) with the token scrubbed.
|
||||
"""
|
||||
token = load_token()
|
||||
if not token:
|
||||
return (1, "No update token configured. Run `rigdoctor login`.")
|
||||
host = GITEA_BASE.split("://", 1)[1]
|
||||
ref = f"rigdoctor[gui] @ git+https://oauth2:{token}@{host}/{REPO}.git@{tag}"
|
||||
cmd = [sys.executable, "-m", "pip", "install", "--upgrade", ref]
|
||||
try:
|
||||
proc = subprocess.run(cmd, capture_output=True, text=True, timeout=1800)
|
||||
out = (proc.stdout + proc.stderr).replace(token, "***")
|
||||
return (proc.returncode, out)
|
||||
except (subprocess.SubprocessError, OSError) as exc:
|
||||
return (1, str(exc).replace(token, "***"))
|
||||
@@ -3,18 +3,28 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from PySide6.QtGui import QIcon
|
||||
from PySide6.QtWidgets import QApplication
|
||||
|
||||
from ..config import load_config
|
||||
from . import desktop
|
||||
from .main_window import MainWindow
|
||||
from .theme import STYLESHEET
|
||||
|
||||
ICON = Path(__file__).parent / "assets" / "rigdoctor.svg"
|
||||
|
||||
|
||||
def main(argv: list[str] | None = None) -> int:
|
||||
desktop.ensure() # self-register icon + .desktop so updates show it without re-installing
|
||||
app = QApplication(argv if argv is not None else sys.argv)
|
||||
app.setApplicationName("RigDoctor")
|
||||
app.setApplicationDisplayName("RigDoctor")
|
||||
# Match the installed rigdoctor.desktop so the dock/launcher shows our icon (Wayland app-id).
|
||||
app.setDesktopFileName("rigdoctor")
|
||||
if ICON.exists():
|
||||
app.setWindowIcon(QIcon(str(ICON)))
|
||||
app.setStyle("Fusion")
|
||||
app.setStyleSheet(STYLESHEET)
|
||||
|
||||
|
||||
@@ -0,0 +1,4 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 16 16">
|
||||
<path d="M3.5 8.5 L6.5 11.5 L12.5 4.5" fill="none" stroke="#06222e"
|
||||
stroke-width="2.4" stroke-linecap="round" stroke-linejoin="round"/>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 237 B |
@@ -0,0 +1,8 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="256" height="256" viewBox="0 0 256 256">
|
||||
<rect x="8" y="8" width="240" height="240" rx="52" fill="#15181e"/>
|
||||
<circle cx="128" cy="128" r="84" fill="none" stroke="#2a2f39" stroke-width="14"/>
|
||||
<path d="M128 44 a84 84 0 1 1 -59.4 24.6" fill="none" stroke="#38bdf8"
|
||||
stroke-width="14" stroke-linecap="round"/>
|
||||
<path d="M60 132 H100 L116 96 L140 168 L156 132 H196" fill="none" stroke="#e6e8eb"
|
||||
stroke-width="14" stroke-linecap="round" stroke-linejoin="round"/>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 533 B |
@@ -0,0 +1,51 @@
|
||||
"""Best-effort desktop integration: install our icon + .desktop so the dock shows it.
|
||||
|
||||
Runs at GUI launch (idempotent), so a self-update + relaunch refreshes the icon without
|
||||
re-running install.sh. No-op for non-installed (dev) runs where the launcher is absent.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import shutil
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from .. import config
|
||||
|
||||
_ICON_SRC = Path(__file__).parent / "assets" / "rigdoctor.svg"
|
||||
|
||||
_DESKTOP = """[Desktop Entry]
|
||||
Type=Application
|
||||
Name=RigDoctor
|
||||
Comment=Hardware monitoring & crash diagnostics for Linux gamers
|
||||
Exec={exec}
|
||||
Icon=rigdoctor
|
||||
Terminal=false
|
||||
Categories=System;Monitor;Utility;
|
||||
StartupWMClass=rigdoctor
|
||||
"""
|
||||
|
||||
|
||||
def ensure() -> None:
|
||||
share = config.DATA_DIR.parent # ~/.local/share
|
||||
|
||||
try:
|
||||
if _ICON_SRC.exists():
|
||||
icon_dst = share / "icons" / "hicolor" / "scalable" / "apps" / "rigdoctor.svg"
|
||||
icon_dst.parent.mkdir(parents=True, exist_ok=True)
|
||||
if not icon_dst.exists() or icon_dst.read_bytes() != _ICON_SRC.read_bytes():
|
||||
shutil.copyfile(_ICON_SRC, icon_dst)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
gui_exec = Path(sys.executable).with_name("rigdoctor-gui")
|
||||
if not gui_exec.exists(): # dev / not a normal install — don't fabricate a .desktop
|
||||
return
|
||||
try:
|
||||
desktop = share / "applications" / "rigdoctor.desktop"
|
||||
content = _DESKTOP.format(exec=gui_exec)
|
||||
desktop.parent.mkdir(parents=True, exist_ok=True)
|
||||
if not desktop.exists() or desktop.read_text() != content:
|
||||
desktop.write_text(content)
|
||||
except OSError:
|
||||
pass
|
||||
@@ -0,0 +1,129 @@
|
||||
"""Health page (M4 in the GUI): runs the health checks and shows findings as cards."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import threading
|
||||
import time
|
||||
|
||||
from PySide6.QtCore import Qt, QTimer, Signal
|
||||
from PySide6.QtWidgets import (
|
||||
QFrame,
|
||||
QHBoxLayout,
|
||||
QLabel,
|
||||
QPushButton,
|
||||
QScrollArea,
|
||||
QVBoxLayout,
|
||||
QWidget,
|
||||
)
|
||||
|
||||
from .theme import ACCENT, CRIT, GOOD, MUTED, WARN
|
||||
|
||||
_SEV = {
|
||||
"critical": ("CRITICAL", CRIT),
|
||||
"warning": ("WARNING", WARN),
|
||||
"info": ("INFO", MUTED),
|
||||
"ok": ("OK", GOOD),
|
||||
}
|
||||
|
||||
|
||||
def _finding_widget(finding) -> QFrame:
|
||||
label, color = _SEV.get(finding.severity, ("?", MUTED))
|
||||
card = QFrame()
|
||||
card.setObjectName("Card")
|
||||
v = QVBoxLayout(card)
|
||||
v.setContentsMargins(16, 12, 16, 12)
|
||||
v.setSpacing(4)
|
||||
|
||||
head = QLabel(f"{label} · {finding.category}: {finding.title}")
|
||||
head.setStyleSheet(f"color: {color}; font-weight: 700; background: transparent;")
|
||||
head.setWordWrap(True)
|
||||
v.addWidget(head)
|
||||
|
||||
if finding.detail:
|
||||
detail = QLabel(finding.detail)
|
||||
detail.setObjectName("Muted")
|
||||
detail.setWordWrap(True)
|
||||
v.addWidget(detail)
|
||||
if finding.suggestion:
|
||||
suggestion = QLabel(f"→ {finding.suggestion}")
|
||||
suggestion.setStyleSheet(f"color: {ACCENT}; background: transparent;")
|
||||
suggestion.setWordWrap(True)
|
||||
v.addWidget(suggestion)
|
||||
return card
|
||||
|
||||
|
||||
class HealthPage(QWidget):
|
||||
_result = Signal(object) # list[Finding]
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self.setObjectName("Page")
|
||||
self._result.connect(self._render_findings)
|
||||
|
||||
root = QVBoxLayout(self)
|
||||
root.setContentsMargins(20, 18, 20, 18)
|
||||
root.setSpacing(16)
|
||||
|
||||
header = QHBoxLayout()
|
||||
title = QLabel("Health")
|
||||
title.setObjectName("PageTitle")
|
||||
header.addWidget(title)
|
||||
header.addStretch(1)
|
||||
self._status = QLabel("")
|
||||
self._status.setObjectName("Muted")
|
||||
header.addWidget(self._status)
|
||||
self._run_btn = QPushButton("Run health report")
|
||||
self._run_btn.setObjectName("PrimaryButton")
|
||||
self._run_btn.clicked.connect(self._run)
|
||||
header.addWidget(self._run_btn)
|
||||
root.addLayout(header)
|
||||
|
||||
scroll = QScrollArea()
|
||||
scroll.setWidgetResizable(True)
|
||||
scroll.setFrameShape(QFrame.Shape.NoFrame)
|
||||
scroll.setStyleSheet("background: transparent;")
|
||||
self._container = QWidget()
|
||||
self._list = QVBoxLayout(self._container)
|
||||
self._list.setContentsMargins(0, 0, 0, 0)
|
||||
self._list.setSpacing(10)
|
||||
self._list.setAlignment(Qt.AlignmentFlag.AlignTop)
|
||||
scroll.setWidget(self._container)
|
||||
root.addWidget(scroll, 1)
|
||||
|
||||
QTimer.singleShot(300, self._run) # auto-run shortly after the window opens
|
||||
|
||||
def _run(self) -> None:
|
||||
self._run_btn.setEnabled(False)
|
||||
self._status.setText("Scanning logs, SMART, and driver…")
|
||||
threading.Thread(target=self._work, daemon=True).start()
|
||||
|
||||
def _work(self) -> None:
|
||||
from ..core.health import run_health_checks
|
||||
|
||||
try:
|
||||
findings = run_health_checks()
|
||||
except Exception:
|
||||
findings = []
|
||||
self._result.emit(findings)
|
||||
|
||||
def _render_findings(self, findings) -> None:
|
||||
self._run_btn.setEnabled(True)
|
||||
if findings is None: # collection failed — keep current results
|
||||
self._status.setText("check failed")
|
||||
return
|
||||
|
||||
while self._list.count():
|
||||
item = self._list.takeAt(0)
|
||||
w = item.widget()
|
||||
if w is not None:
|
||||
w.deleteLater()
|
||||
|
||||
crit = sum(1 for f in findings if f.severity == "critical")
|
||||
warn = sum(1 for f in findings if f.severity == "warning")
|
||||
self._status.setText(
|
||||
f"{crit} critical · {warn} warning · {len(findings)} checks · "
|
||||
f"{time.strftime('%H:%M:%S')}"
|
||||
)
|
||||
for finding in findings:
|
||||
self._list.addWidget(_finding_widget(finding))
|
||||
self._list.addStretch(1)
|
||||
@@ -2,36 +2,54 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from PySide6.QtCore import Qt
|
||||
import os
|
||||
import sys
|
||||
import threading
|
||||
|
||||
from PySide6.QtCore import Qt, QProcess, QTimer, Signal
|
||||
from PySide6.QtGui import QTextDocument
|
||||
from PySide6.QtWidgets import (
|
||||
QApplication,
|
||||
QButtonGroup,
|
||||
QDialog,
|
||||
QFrame,
|
||||
QHBoxLayout,
|
||||
QLabel,
|
||||
QMainWindow,
|
||||
QMessageBox,
|
||||
QPushButton,
|
||||
QStackedWidget,
|
||||
QTextEdit,
|
||||
QVBoxLayout,
|
||||
QWidget,
|
||||
)
|
||||
|
||||
from .. import __version__
|
||||
from ..config import load_config
|
||||
from ..core import alerts, elevation, updates
|
||||
from .dashboard import Dashboard
|
||||
from .theme import ACCENT, MUTED
|
||||
from .health_page import HealthPage
|
||||
from .notifications_page import NotificationsPage
|
||||
from .recorder_page import RecorderPage
|
||||
from .setup_page import SetupPage
|
||||
from .share_page import SharePage
|
||||
from .theme import ACCENT, GOOD, MUTED
|
||||
from .worker import SamplerWorker
|
||||
|
||||
_NAV_ITEMS = ["Dashboard", "Logs", "Health", "Inventory"]
|
||||
_PLACEHOLDERS = {
|
||||
"Logs": "Captured crash logs will appear here once the logger (M3) lands.",
|
||||
"Health": "The health report (M4) — log scan + plain-language findings — lands here.",
|
||||
"Inventory": "System inventory (M5) — CPU/GPU/board/RAM/drivers — lands here.",
|
||||
}
|
||||
_NAV_ITEMS = ["Dashboard", "Logs", "Health", "Setup", "Notifications", "Share"]
|
||||
|
||||
|
||||
class MainWindow(QMainWindow):
|
||||
_update_checked = Signal(object) # (state, tag, notes)
|
||||
_update_applied = Signal(int) # pip exit code
|
||||
_changelog_ready = Signal(object) # ([(tag, date, notes)], error)
|
||||
_elevated = Signal() # privileged data collected at launch
|
||||
|
||||
def __init__(self, interval: float = 1.0) -> None:
|
||||
super().__init__()
|
||||
self.setWindowTitle("RigDoctor")
|
||||
self.resize(1000, 680)
|
||||
cfg = load_config()
|
||||
|
||||
central = QWidget()
|
||||
self.setCentralWidget(central)
|
||||
@@ -46,9 +64,18 @@ class MainWindow(QMainWindow):
|
||||
content_layout.setContentsMargins(0, 0, 0, 0)
|
||||
self._stack = QStackedWidget()
|
||||
self.dashboard = Dashboard()
|
||||
self._stack.addWidget(self.dashboard)
|
||||
for name in _NAV_ITEMS[1:]:
|
||||
self._stack.addWidget(self._placeholder_page(name, _PLACEHOLDERS[name]))
|
||||
self.recorder_page = RecorderPage()
|
||||
self.health_page = HealthPage()
|
||||
self.setup_page = SetupPage()
|
||||
self.notifications_page = NotificationsPage()
|
||||
self.notifications_page.changed.connect(self._apply_alert_settings)
|
||||
self.share_page = SharePage()
|
||||
self._stack.addWidget(self.dashboard) # 0 Dashboard
|
||||
self._stack.addWidget(self.recorder_page) # 1 Logs
|
||||
self._stack.addWidget(self.health_page) # 2 Health
|
||||
self._stack.addWidget(self.setup_page) # 3 Setup
|
||||
self._stack.addWidget(self.notifications_page) # 4 Notifications
|
||||
self._stack.addWidget(self.share_page) # 5 Share
|
||||
content_layout.addWidget(self._stack)
|
||||
|
||||
layout.addWidget(self._build_sidebar())
|
||||
@@ -56,8 +83,40 @@ class MainWindow(QMainWindow):
|
||||
|
||||
self._worker = SamplerWorker(interval=interval)
|
||||
self._worker.sampled.connect(self.dashboard.update_sample)
|
||||
# Desktop alerts (M8): overheat / GPU-lost from the sample stream, new-version below.
|
||||
# Configurable on the Notifications page; gated by AlertMonitor.enabled.
|
||||
self._notified_update_tag = None
|
||||
self._alert_monitor = alerts.AlertMonitor(
|
||||
gpu_temp=float(cfg.get("gpu_temp_alert", 90.0)),
|
||||
cpu_temp=float(cfg.get("cpu_temp_alert", 95.0)),
|
||||
)
|
||||
self._alert_monitor.enabled = bool(cfg.get("alerts_enabled", True))
|
||||
self._worker.sampled.connect(self._alert_monitor.check)
|
||||
self._worker.start()
|
||||
|
||||
# Ask for the password once at launch and collect root-only data (SMART +
|
||||
# dmidecode); Health/Inventory then always show the full picture (config:
|
||||
# elevate_on_launch). Falls back silently to non-root if cancelled/unavailable.
|
||||
if cfg.get("elevate_on_launch", True) and elevation.available():
|
||||
self._elevated.connect(self._on_elevated)
|
||||
threading.Thread(target=self._collect_privileged, daemon=True).start()
|
||||
|
||||
# Update check (M13): once at launch, then periodically so a newly published
|
||||
# release is detected without restarting (interval from config; 0 disables).
|
||||
self._latest_tag = None
|
||||
self._latest_notes = ""
|
||||
self._applied = False
|
||||
self._update_checked.connect(self._show_update_state)
|
||||
self._update_applied.connect(self._on_update_applied)
|
||||
self._changelog_ready.connect(self._on_changelog)
|
||||
self._start_update_check()
|
||||
minutes = float(cfg.get("update_check_minutes", 30) or 0)
|
||||
if minutes > 0:
|
||||
self._update_timer = QTimer(self)
|
||||
self._update_timer.setInterval(int(minutes * 60_000))
|
||||
self._update_timer.timeout.connect(self._start_update_check)
|
||||
self._update_timer.start()
|
||||
|
||||
def _build_sidebar(self) -> QFrame:
|
||||
bar = QFrame()
|
||||
bar.setObjectName("Sidebar")
|
||||
@@ -89,31 +148,163 @@ class MainWindow(QMainWindow):
|
||||
v.addStretch(1)
|
||||
live = QLabel(f'<span style="color:{ACCENT};">●</span> <span style="color:{MUTED};">Live</span>')
|
||||
v.addWidget(live)
|
||||
version = QLabel(f"v{__version__}")
|
||||
version.setObjectName("Muted")
|
||||
v.addWidget(version)
|
||||
changelog_btn = QPushButton("Changelog")
|
||||
changelog_btn.setObjectName("LinkButton")
|
||||
changelog_btn.setCursor(Qt.CursorShape.PointingHandCursor)
|
||||
changelog_btn.clicked.connect(self._show_changelog)
|
||||
v.addWidget(changelog_btn)
|
||||
check_btn = QPushButton("Check for updates")
|
||||
check_btn.setObjectName("LinkButton")
|
||||
check_btn.setCursor(Qt.CursorShape.PointingHandCursor)
|
||||
check_btn.clicked.connect(self._manual_check)
|
||||
v.addWidget(check_btn)
|
||||
|
||||
# Update state (filled in by the background check).
|
||||
self._update_label = QLabel("checking for updates…")
|
||||
self._update_label.setObjectName("Muted")
|
||||
v.addWidget(self._update_label)
|
||||
self._update_btn = QPushButton()
|
||||
self._update_btn.setObjectName("PrimaryButton")
|
||||
self._update_btn.setCursor(Qt.CursorShape.PointingHandCursor)
|
||||
self._update_btn.clicked.connect(self._apply_update)
|
||||
self._update_btn.setVisible(False)
|
||||
v.addWidget(self._update_btn)
|
||||
self._restart_btn = QPushButton("Restart now")
|
||||
self._restart_btn.setObjectName("PrimaryButton")
|
||||
self._restart_btn.setCursor(Qt.CursorShape.PointingHandCursor)
|
||||
self._restart_btn.clicked.connect(self._restart)
|
||||
self._restart_btn.setVisible(False)
|
||||
v.addWidget(self._restart_btn)
|
||||
return bar
|
||||
|
||||
def _placeholder_page(self, title: str, description: str) -> QWidget:
|
||||
page = QWidget()
|
||||
page.setObjectName("Page")
|
||||
v = QVBoxLayout(page)
|
||||
v.setContentsMargins(20, 18, 20, 18)
|
||||
v.setSpacing(16)
|
||||
head = QLabel(title)
|
||||
head.setObjectName("PageTitle")
|
||||
v.addWidget(head)
|
||||
def _restart(self) -> None:
|
||||
gui = os.path.join(os.path.dirname(sys.executable), "rigdoctor-gui")
|
||||
if os.path.exists(gui):
|
||||
QProcess.startDetached(gui)
|
||||
else: # dev / not installed next to python
|
||||
QProcess.startDetached(sys.executable, sys.argv)
|
||||
QApplication.instance().quit()
|
||||
|
||||
card = QFrame()
|
||||
card.setObjectName("Card")
|
||||
cv = QVBoxLayout(card)
|
||||
cv.setContentsMargins(24, 48, 24, 48)
|
||||
msg = QLabel(description)
|
||||
msg.setObjectName("Muted")
|
||||
msg.setWordWrap(True)
|
||||
msg.setAlignment(Qt.AlignmentFlag.AlignCenter)
|
||||
cv.addWidget(msg)
|
||||
v.addWidget(card)
|
||||
v.addStretch(1)
|
||||
return page
|
||||
def _apply_update(self) -> None:
|
||||
if not self._latest_tag:
|
||||
return
|
||||
box = QMessageBox(self)
|
||||
box.setWindowTitle(f"Update to {self._latest_tag}")
|
||||
box.setText(f"Update RigDoctor to {self._latest_tag}?")
|
||||
notes_doc = QTextDocument()
|
||||
notes_doc.setMarkdown(self._latest_notes or "_(no release notes)_")
|
||||
box.setInformativeText(notes_doc.toHtml()) # render Markdown as rich text (#1)
|
||||
box.setStandardButtons(QMessageBox.StandardButton.Ok | QMessageBox.StandardButton.Cancel)
|
||||
box.button(QMessageBox.StandardButton.Ok).setText("Update")
|
||||
if box.exec() != QMessageBox.StandardButton.Ok:
|
||||
return
|
||||
self._update_btn.setEnabled(False)
|
||||
self._update_label.setText("updating…")
|
||||
tag = self._latest_tag
|
||||
threading.Thread(target=lambda: self._update_applied.emit(updates.apply_update(tag)[0]), daemon=True).start()
|
||||
|
||||
def _on_update_applied(self, rc: int) -> None:
|
||||
if rc == 0:
|
||||
self._applied = True
|
||||
self._update_label.setText("update installed")
|
||||
self._update_btn.setVisible(False)
|
||||
self._restart_btn.setVisible(True)
|
||||
if hasattr(self, "_update_timer"):
|
||||
self._update_timer.stop()
|
||||
else:
|
||||
self._update_label.setText("update failed")
|
||||
self._update_btn.setEnabled(True)
|
||||
|
||||
def _collect_privileged(self) -> None:
|
||||
data = elevation.collect_via_pkexec()
|
||||
if data is not None:
|
||||
elevation.set_privileged(data)
|
||||
self._elevated.emit()
|
||||
|
||||
def _on_elevated(self) -> None:
|
||||
# Re-run Health now that root-only SMART data is available. (dmidecode is still
|
||||
# collected and used by the relay guest view + the CLI `rigdoctor inventory`.)
|
||||
self.health_page._run()
|
||||
|
||||
def _apply_alert_settings(self) -> None:
|
||||
cfg = load_config()
|
||||
self._alert_monitor.enabled = bool(cfg.get("alerts_enabled", True))
|
||||
self._alert_monitor.gpu_temp = float(cfg.get("gpu_temp_alert", 90.0))
|
||||
self._alert_monitor.cpu_temp = float(cfg.get("cpu_temp_alert", 95.0))
|
||||
|
||||
def _manual_check(self) -> None:
|
||||
if self._applied:
|
||||
return
|
||||
self._update_label.setText("checking for updates…")
|
||||
self._start_update_check()
|
||||
|
||||
def _start_update_check(self) -> None:
|
||||
threading.Thread(target=self._check_updates, daemon=True).start()
|
||||
|
||||
def _show_changelog(self) -> None:
|
||||
dialog = QDialog(self)
|
||||
dialog.setWindowTitle("RigDoctor — Changelog")
|
||||
dialog.resize(560, 540)
|
||||
layout = QVBoxLayout(dialog)
|
||||
view = QTextEdit()
|
||||
view.setObjectName("Report")
|
||||
view.setReadOnly(True)
|
||||
view.setPlainText("Loading…")
|
||||
layout.addWidget(view)
|
||||
self._changelog_view = view
|
||||
dialog.show()
|
||||
threading.Thread(target=self._fetch_changelog, daemon=True).start()
|
||||
|
||||
def _fetch_changelog(self) -> None:
|
||||
self._changelog_ready.emit(updates.list_releases())
|
||||
|
||||
def _on_changelog(self, result) -> None:
|
||||
view = getattr(self, "_changelog_view", None)
|
||||
if view is None:
|
||||
return
|
||||
releases, error = result
|
||||
if error == updates.NO_TOKEN:
|
||||
view.setPlainText("Add an update token (Setup → Update access) to load the changelog.")
|
||||
return
|
||||
if error or not releases:
|
||||
view.setPlainText("Couldn't load the changelog from the update server.")
|
||||
return
|
||||
blocks = []
|
||||
for tag, date, notes in releases:
|
||||
title = f"## {tag}" + (f" — {date}" if date else "")
|
||||
blocks.append(f"{title}\n\n{notes or '_(no notes)_'}")
|
||||
view.setMarkdown("\n\n".join(blocks)) # render Markdown instead of raw text (#1)
|
||||
|
||||
def _check_updates(self) -> None:
|
||||
self._update_checked.emit(updates.update_state())
|
||||
|
||||
def _show_update_state(self, result) -> None:
|
||||
if self._applied: # an update was applied this session; awaiting restart
|
||||
return
|
||||
state, tag, notes = result
|
||||
self._latest_tag = tag
|
||||
self._latest_notes = notes
|
||||
self._update_btn.setVisible(False)
|
||||
if state == updates.NO_TOKEN:
|
||||
self._update_label.setText("connect to update server")
|
||||
elif state == updates.AUTH:
|
||||
self._update_label.setText("update access denied")
|
||||
elif state == updates.NETWORK:
|
||||
self._update_label.setText("update check unavailable")
|
||||
elif state == updates.AVAILABLE:
|
||||
self._update_label.setText(f'<span style="color:{GOOD};">{tag} available</span>')
|
||||
self._update_btn.setText(f"Update to {tag}")
|
||||
self._update_btn.setVisible(True)
|
||||
if self._alert_monitor.enabled and tag != self._notified_update_tag:
|
||||
self._notified_update_tag = tag # once per version, not every poll
|
||||
alerts.notify("Update available", f"RigDoctor {tag} is available — open RigDoctor to update.")
|
||||
else: # UP_TO_DATE
|
||||
self._update_label.setText("up-to-date")
|
||||
|
||||
def closeEvent(self, event) -> None: # noqa: N802 (Qt override)
|
||||
self._worker.stop()
|
||||
self.share_page.shutdown()
|
||||
super().closeEvent(event)
|
||||
|
||||
@@ -0,0 +1,108 @@
|
||||
"""Notifications page (M8 config): user-configurable alert settings."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from PySide6.QtCore import Qt, Signal
|
||||
from PySide6.QtWidgets import (
|
||||
QCheckBox,
|
||||
QDoubleSpinBox,
|
||||
QFrame,
|
||||
QGridLayout,
|
||||
QHBoxLayout,
|
||||
QLabel,
|
||||
QPushButton,
|
||||
QVBoxLayout,
|
||||
QWidget,
|
||||
)
|
||||
|
||||
from ..config import load_config, update_config
|
||||
from ..core import alerts
|
||||
|
||||
|
||||
class NotificationsPage(QWidget):
|
||||
changed = Signal() # settings saved — main window re-applies them live
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self.setObjectName("Page")
|
||||
root = QVBoxLayout(self)
|
||||
root.setContentsMargins(20, 18, 20, 18)
|
||||
root.setSpacing(16)
|
||||
|
||||
title = QLabel("Notifications")
|
||||
title.setObjectName("PageTitle")
|
||||
root.addWidget(title)
|
||||
|
||||
card = QFrame()
|
||||
card.setObjectName("Card")
|
||||
v = QVBoxLayout(card)
|
||||
v.setContentsMargins(16, 14, 16, 14)
|
||||
v.setSpacing(10)
|
||||
head = QLabel("Alerts")
|
||||
head.setStyleSheet("font-weight: 700; background: transparent;")
|
||||
v.addWidget(head)
|
||||
|
||||
self._enabled = QCheckBox("Enable desktop notifications")
|
||||
v.addWidget(self._enabled)
|
||||
|
||||
grid = QGridLayout()
|
||||
grid.setHorizontalSpacing(12)
|
||||
grid.setColumnStretch(2, 1)
|
||||
self._gpu = self._spin()
|
||||
self._cpu = self._spin()
|
||||
grid.addWidget(QLabel("GPU temperature alert"), 0, 0)
|
||||
grid.addWidget(self._gpu, 0, 1)
|
||||
grid.addWidget(QLabel("CPU temperature alert"), 1, 0)
|
||||
grid.addWidget(self._cpu, 1, 1)
|
||||
v.addLayout(grid)
|
||||
|
||||
note = QLabel("GPU-lost and new-version alerts are included whenever notifications are enabled.")
|
||||
note.setObjectName("Muted")
|
||||
note.setWordWrap(True)
|
||||
v.addWidget(note)
|
||||
|
||||
buttons = QHBoxLayout()
|
||||
save = QPushButton("Save")
|
||||
save.setObjectName("PrimaryButton")
|
||||
save.clicked.connect(self._save)
|
||||
test = QPushButton("Send test")
|
||||
test.clicked.connect(self._test)
|
||||
buttons.addWidget(save)
|
||||
buttons.addWidget(test)
|
||||
buttons.addStretch(1)
|
||||
v.addLayout(buttons)
|
||||
self._status = QLabel("")
|
||||
self._status.setObjectName("Muted")
|
||||
v.addWidget(self._status)
|
||||
|
||||
root.addWidget(card)
|
||||
root.addStretch(1)
|
||||
self._load()
|
||||
|
||||
@staticmethod
|
||||
def _spin() -> QDoubleSpinBox:
|
||||
spin = QDoubleSpinBox()
|
||||
spin.setRange(40, 110)
|
||||
spin.setDecimals(0)
|
||||
spin.setSingleStep(1)
|
||||
spin.setSuffix(" °C")
|
||||
return spin
|
||||
|
||||
def _load(self) -> None:
|
||||
cfg = load_config()
|
||||
self._enabled.setChecked(bool(cfg.get("alerts_enabled", True)))
|
||||
self._gpu.setValue(float(cfg.get("gpu_temp_alert", 90.0)))
|
||||
self._cpu.setValue(float(cfg.get("cpu_temp_alert", 95.0)))
|
||||
|
||||
def _save(self) -> None:
|
||||
update_config(
|
||||
alerts_enabled=self._enabled.isChecked(),
|
||||
gpu_temp_alert=self._gpu.value(),
|
||||
cpu_temp_alert=self._cpu.value(),
|
||||
)
|
||||
self.changed.emit()
|
||||
self._status.setText("Saved.")
|
||||
|
||||
def _test(self) -> None:
|
||||
ok = alerts.notify("RigDoctor", "Test notification — alerts are working.")
|
||||
self._status.setText("Test notification sent." if ok else "notify-send not found — install libnotify-bin (Setup).")
|
||||
@@ -0,0 +1,185 @@
|
||||
"""Recording & Logs page (M3 in the GUI): start/stop/status + post-crash report.
|
||||
|
||||
Drives the same background recorder as the CLI via core.reccontrol, so the GUI and
|
||||
`rigdoctor record …` are interchangeable.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import time
|
||||
|
||||
from PySide6.QtCore import Qt, QTimer, QUrl
|
||||
from PySide6.QtGui import QDesktopServices, QFont
|
||||
from PySide6.QtWidgets import (
|
||||
QDoubleSpinBox,
|
||||
QFrame,
|
||||
QHBoxLayout,
|
||||
QLabel,
|
||||
QPushButton,
|
||||
QTextEdit,
|
||||
QVBoxLayout,
|
||||
QWidget,
|
||||
)
|
||||
|
||||
from .. import config
|
||||
from ..core import reccontrol
|
||||
from ..core.crashlog import summarize
|
||||
from ..render import format_headline, render_summary
|
||||
from .theme import GOOD, MUTED, WARN
|
||||
|
||||
|
||||
def _panel(title: str) -> tuple[QFrame, QVBoxLayout]:
|
||||
frame = QFrame()
|
||||
frame.setObjectName("Card")
|
||||
layout = QVBoxLayout(frame)
|
||||
layout.setContentsMargins(16, 14, 16, 14)
|
||||
layout.setSpacing(10)
|
||||
label = QLabel(title)
|
||||
label.setStyleSheet("font-weight: 700; background: transparent;")
|
||||
layout.addWidget(label)
|
||||
return frame, layout
|
||||
|
||||
|
||||
def _fmt_time(value, fmt="%Y-%m-%d %H:%M:%S") -> str:
|
||||
return time.strftime(fmt, time.localtime(value)) if value else "—"
|
||||
|
||||
|
||||
class RecorderPage(QWidget):
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self.setObjectName("Page")
|
||||
root = QVBoxLayout(self)
|
||||
root.setContentsMargins(20, 18, 20, 18)
|
||||
root.setSpacing(16)
|
||||
|
||||
title = QLabel("Recording")
|
||||
title.setObjectName("PageTitle")
|
||||
root.addWidget(title)
|
||||
|
||||
# --- Status + controls -------------------------------------------------
|
||||
status_card, status_layout = _panel("Status")
|
||||
|
||||
self._state = QLabel("○ Not recording")
|
||||
self._state.setStyleSheet(f"color: {MUTED}; font-weight: 700; background: transparent;")
|
||||
status_layout.addWidget(self._state)
|
||||
|
||||
self._info = QLabel("")
|
||||
self._info.setObjectName("Muted")
|
||||
status_layout.addWidget(self._info)
|
||||
|
||||
self._latest = QLabel("")
|
||||
status_layout.addWidget(self._latest)
|
||||
|
||||
self._warn = QLabel("")
|
||||
self._warn.setStyleSheet(f"color: {WARN}; font-weight: 600; background: transparent;")
|
||||
self._warn.setVisible(False)
|
||||
status_layout.addWidget(self._warn)
|
||||
|
||||
controls = QHBoxLayout()
|
||||
controls.setSpacing(8)
|
||||
controls.addWidget(QLabel("Interval (s)"))
|
||||
self._interval = QDoubleSpinBox()
|
||||
self._interval.setRange(0.1, 10.0)
|
||||
self._interval.setSingleStep(0.1)
|
||||
self._interval.setValue(float(config.DEFAULTS["interval"]))
|
||||
controls.addWidget(self._interval)
|
||||
self._start_btn = QPushButton("Start recording")
|
||||
self._start_btn.setObjectName("PrimaryButton")
|
||||
self._start_btn.clicked.connect(self._on_start)
|
||||
self._stop_btn = QPushButton("Stop")
|
||||
self._stop_btn.clicked.connect(self._on_stop)
|
||||
controls.addWidget(self._start_btn)
|
||||
controls.addWidget(self._stop_btn)
|
||||
controls.addStretch(1)
|
||||
folder_btn = QPushButton("Open log folder")
|
||||
folder_btn.clicked.connect(self._open_folder)
|
||||
controls.addWidget(folder_btn)
|
||||
status_layout.addLayout(controls)
|
||||
root.addWidget(status_card)
|
||||
|
||||
# --- Report ------------------------------------------------------------
|
||||
report_card = QFrame()
|
||||
report_card.setObjectName("Card")
|
||||
report_layout = QVBoxLayout(report_card)
|
||||
report_layout.setContentsMargins(16, 14, 16, 14)
|
||||
report_layout.setSpacing(10)
|
||||
header = QHBoxLayout()
|
||||
report_title = QLabel("Post-crash report")
|
||||
report_title.setStyleSheet("font-weight: 700; background: transparent;")
|
||||
header.addWidget(report_title)
|
||||
header.addStretch(1)
|
||||
refresh_btn = QPushButton("Refresh")
|
||||
refresh_btn.clicked.connect(self._load_report)
|
||||
header.addWidget(refresh_btn)
|
||||
report_layout.addLayout(header)
|
||||
|
||||
self._report = QTextEdit()
|
||||
self._report.setObjectName("Report")
|
||||
self._report.setReadOnly(True)
|
||||
self._report.setFont(QFont("monospace", 10))
|
||||
self._report.setLineWrapMode(QTextEdit.LineWrapMode.NoWrap)
|
||||
report_layout.addWidget(self._report)
|
||||
root.addWidget(report_card, 1)
|
||||
|
||||
# Poll recorder status once a second (reflects CLI-driven sessions too).
|
||||
self._timer = QTimer(self)
|
||||
self._timer.setInterval(1000)
|
||||
self._timer.timeout.connect(self._refresh_status)
|
||||
self._timer.start()
|
||||
self._refresh_status()
|
||||
self._load_report()
|
||||
|
||||
# --- actions ---------------------------------------------------------------
|
||||
def _on_start(self) -> None:
|
||||
self._start_btn.setEnabled(False)
|
||||
reccontrol.start_background(interval=self._interval.value())
|
||||
QTimer.singleShot(600, self._refresh_status)
|
||||
|
||||
def _on_stop(self) -> None:
|
||||
self._stop_btn.setEnabled(False)
|
||||
reccontrol.stop_background()
|
||||
QTimer.singleShot(600, self._refresh_status)
|
||||
QTimer.singleShot(900, self._load_report)
|
||||
|
||||
def _open_folder(self) -> None:
|
||||
config.LOG_DIR.mkdir(parents=True, exist_ok=True)
|
||||
QDesktopServices.openUrl(QUrl.fromLocalFile(str(config.LOG_DIR)))
|
||||
|
||||
# --- refresh ---------------------------------------------------------------
|
||||
def _refresh_status(self) -> None:
|
||||
pid = reccontrol.running_pid()
|
||||
status = reccontrol.read_status()
|
||||
running = pid is not None
|
||||
|
||||
if running:
|
||||
self._state.setText(f"● Recording (pid {pid})")
|
||||
self._state.setStyleSheet(f"color: {GOOD}; font-weight: 700; background: transparent;")
|
||||
else:
|
||||
self._state.setText("○ Not recording")
|
||||
self._state.setStyleSheet(f"color: {MUTED}; font-weight: 700; background: transparent;")
|
||||
self._start_btn.setEnabled(not running)
|
||||
self._stop_btn.setEnabled(running)
|
||||
self._interval.setEnabled(not running)
|
||||
|
||||
if status:
|
||||
self._info.setText(
|
||||
f"Samples: {status.get('samples', 0)} "
|
||||
f"Started: {_fmt_time(status.get('started'))} "
|
||||
f"Updated: {_fmt_time(status.get('updated'), '%H:%M:%S')}\n"
|
||||
f"Log: {status.get('log', config.LOG_FILE)}"
|
||||
)
|
||||
latest = status.get("latest")
|
||||
self._latest.setText(format_headline(latest) if latest else "")
|
||||
if status.get("gpu_lost"):
|
||||
self._warn.setText("⚠ A GPU-lost event was recorded this session")
|
||||
self._warn.setVisible(True)
|
||||
else:
|
||||
self._warn.setVisible(False)
|
||||
else:
|
||||
self._info.setText("No recording yet. Press “Start recording”.")
|
||||
self._latest.setText("")
|
||||
self._warn.setVisible(False)
|
||||
|
||||
def _load_report(self) -> None:
|
||||
summary = summarize(config.LOG_FILE, last_n=10)
|
||||
self._report.setPlainText(render_summary(summary, log_path=config.LOG_FILE))
|
||||
@@ -0,0 +1,239 @@
|
||||
"""Setup page (M9 in the GUI): show environment + optional components, install missing."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import threading
|
||||
|
||||
from PySide6.QtCore import Qt, QUrl, Signal
|
||||
from PySide6.QtGui import QDesktopServices
|
||||
from PySide6.QtWidgets import (
|
||||
QApplication,
|
||||
QFrame,
|
||||
QHBoxLayout,
|
||||
QLabel,
|
||||
QLineEdit,
|
||||
QMessageBox,
|
||||
QPushButton,
|
||||
QSizePolicy,
|
||||
QTextEdit,
|
||||
QVBoxLayout,
|
||||
QWidget,
|
||||
)
|
||||
|
||||
from .. import config
|
||||
from ..core import installer, sysenv, uninstall, updates
|
||||
from .theme import GOOD, MUTED, WARN
|
||||
|
||||
|
||||
def _panel(title: str) -> tuple[QFrame, QVBoxLayout]:
|
||||
frame = QFrame()
|
||||
frame.setObjectName("Card")
|
||||
frame.setSizePolicy(QSizePolicy.Policy.Expanding, QSizePolicy.Policy.Maximum)
|
||||
layout = QVBoxLayout(frame)
|
||||
layout.setContentsMargins(16, 14, 16, 14)
|
||||
layout.setSpacing(8)
|
||||
label = QLabel(title)
|
||||
label.setStyleSheet("font-weight: 700; background: transparent;")
|
||||
layout.addWidget(label)
|
||||
return frame, layout
|
||||
|
||||
|
||||
_BACKEND_DESC = {
|
||||
"env": "token from $RIGDOCTOR_TOKEN",
|
||||
"keyring": "token stored in the OS keyring (encrypted)",
|
||||
"file": "token stored in a 0600 file — install libsecret-tools to encrypt it",
|
||||
"none": "no token saved",
|
||||
}
|
||||
|
||||
|
||||
class SetupPage(QWidget):
|
||||
_installed = Signal(int, str)
|
||||
_upd_state = Signal(object)
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self.setObjectName("Page")
|
||||
self._installed.connect(self._on_installed)
|
||||
self._upd_state.connect(self._on_upd_state)
|
||||
|
||||
root = QVBoxLayout(self)
|
||||
root.setContentsMargins(20, 18, 20, 18)
|
||||
root.setSpacing(16)
|
||||
|
||||
title = QLabel("Setup")
|
||||
title.setObjectName("PageTitle")
|
||||
root.addWidget(title)
|
||||
|
||||
env_card, env_layout = _panel("Environment")
|
||||
self._env = QLabel("")
|
||||
self._env.setObjectName("Muted")
|
||||
env_layout.addWidget(self._env)
|
||||
root.addWidget(env_card)
|
||||
|
||||
comp_card, comp_layout = _panel("Optional components")
|
||||
self._components = QVBoxLayout()
|
||||
self._components.setSpacing(6)
|
||||
comp_layout.addLayout(self._components)
|
||||
controls = QHBoxLayout()
|
||||
self._install_btn = QPushButton("Install missing")
|
||||
self._install_btn.setObjectName("PrimaryButton")
|
||||
self._install_btn.clicked.connect(self._install)
|
||||
self._refresh_btn = QPushButton("Re-check")
|
||||
self._refresh_btn.clicked.connect(self._refresh)
|
||||
controls.addWidget(self._install_btn)
|
||||
controls.addWidget(self._refresh_btn)
|
||||
controls.addStretch(1)
|
||||
comp_layout.addLayout(controls)
|
||||
root.addWidget(comp_card)
|
||||
|
||||
# Account access (M13/M12): one Gitea token gates updates and session sharing.
|
||||
upd_card, upd_layout = _panel("Account access")
|
||||
hint = QLabel("A Gitea access token unlocks updates and session sharing. "
|
||||
"Create it with scopes <b>read:user</b> and <b>read:repository</b>.")
|
||||
hint.setObjectName("Muted")
|
||||
hint.setWordWrap(True)
|
||||
upd_layout.addWidget(hint)
|
||||
self._upd_status = QLabel("")
|
||||
self._upd_status.setObjectName("Muted")
|
||||
self._upd_status.setWordWrap(True)
|
||||
upd_layout.addWidget(self._upd_status)
|
||||
token_row = QHBoxLayout()
|
||||
self._token_input = QLineEdit()
|
||||
self._token_input.setEchoMode(QLineEdit.EchoMode.Password)
|
||||
self._token_input.setPlaceholderText("Paste a Gitea token (read:user + read:repository)")
|
||||
save_btn = QPushButton("Save token")
|
||||
save_btn.setObjectName("PrimaryButton")
|
||||
save_btn.clicked.connect(self._save_token)
|
||||
get_btn = QPushButton("Get a token")
|
||||
get_btn.clicked.connect(lambda: QDesktopServices.openUrl(QUrl(updates.TOKEN_PAGE)))
|
||||
token_row.addWidget(self._token_input, 1)
|
||||
token_row.addWidget(save_btn)
|
||||
token_row.addWidget(get_btn)
|
||||
upd_layout.addLayout(token_row)
|
||||
root.addWidget(upd_card)
|
||||
|
||||
self._output = QTextEdit()
|
||||
self._output.setObjectName("Report")
|
||||
self._output.setReadOnly(True)
|
||||
self._output.setMinimumHeight(180)
|
||||
self._output.setVisible(False)
|
||||
root.addWidget(self._output)
|
||||
root.addStretch(1)
|
||||
|
||||
danger = QHBoxLayout()
|
||||
danger.addStretch(1)
|
||||
uninstall_btn = QPushButton("Uninstall RigDoctor")
|
||||
uninstall_btn.setObjectName("DangerButton")
|
||||
uninstall_btn.clicked.connect(self._uninstall)
|
||||
danger.addWidget(uninstall_btn)
|
||||
root.addLayout(danger)
|
||||
|
||||
self._refresh()
|
||||
self._refresh_update_status()
|
||||
|
||||
def _uninstall(self) -> None:
|
||||
box = QMessageBox(self)
|
||||
box.setIcon(QMessageBox.Icon.Warning)
|
||||
box.setWindowTitle("Uninstall RigDoctor")
|
||||
box.setText("Uninstall RigDoctor?")
|
||||
box.setInformativeText(
|
||||
"This removes the app. Choose “Remove all” to also delete your settings, "
|
||||
"update token, and captured logs."
|
||||
)
|
||||
remove_all = box.addButton("Remove all", QMessageBox.ButtonRole.DestructiveRole)
|
||||
app_only = box.addButton("Uninstall", QMessageBox.ButtonRole.AcceptRole)
|
||||
box.addButton("Cancel", QMessageBox.ButtonRole.RejectRole)
|
||||
box.exec()
|
||||
clicked = box.clickedButton()
|
||||
if clicked is remove_all:
|
||||
purge = True
|
||||
elif clicked is app_only:
|
||||
purge = False
|
||||
else:
|
||||
return
|
||||
uninstall.uninstall(purge=purge)
|
||||
QMessageBox.information(self, "RigDoctor", "Uninstalling… RigDoctor will close now.")
|
||||
QApplication.instance().quit()
|
||||
|
||||
def _refresh(self) -> None:
|
||||
self._env.setText(
|
||||
f"Distro: {sysenv.distro_name()} "
|
||||
f"Package manager: {sysenv.package_manager() or 'none (apt required)'} "
|
||||
f"GPU: {', '.join(sysenv.gpu_vendors()) or 'unknown'}"
|
||||
)
|
||||
while self._components.count():
|
||||
item = self._components.takeAt(0)
|
||||
w = item.widget()
|
||||
if w is not None:
|
||||
w.deleteLater()
|
||||
|
||||
status = installer.component_status()
|
||||
for component, present in status:
|
||||
mark = "✓" if present else "✗"
|
||||
color = GOOD if present else MUTED
|
||||
row = QLabel(f"<span style='color:{color}'>[{mark}]</span> "
|
||||
f"<b>{component.name}</b> — {component.enables}")
|
||||
row.setTextFormat(Qt.TextFormat.RichText)
|
||||
row.setWordWrap(True)
|
||||
self._components.addWidget(row)
|
||||
|
||||
self._missing = [c for c, present in status if not present]
|
||||
self._install_btn.setEnabled(bool(self._missing) and sysenv.package_manager() == "apt")
|
||||
if not self._missing:
|
||||
self._install_btn.setText("All installed ✔")
|
||||
|
||||
def _install(self) -> None:
|
||||
packages = installer.missing_packages(self._missing)
|
||||
if not packages:
|
||||
return
|
||||
self._install_btn.setEnabled(False)
|
||||
self._install_btn.setText("Installing… (may prompt for password)")
|
||||
self._output.setVisible(True)
|
||||
self._output.setPlainText(f"Installing: {' '.join(packages)}\n")
|
||||
threading.Thread(target=self._work, args=(packages,), daemon=True).start()
|
||||
|
||||
def _work(self, packages: list[str]) -> None:
|
||||
rc, out = installer.install_packages(packages)
|
||||
self._installed.emit(rc, out)
|
||||
|
||||
def _on_installed(self, rc: int, out: str) -> None:
|
||||
self._output.setPlainText(out[-4000:])
|
||||
self._install_btn.setText("Install missing")
|
||||
self._refresh()
|
||||
# If libsecret-tools was just installed, move a file token into the keyring.
|
||||
if config.token_backend() == "file" and config.keyring_available():
|
||||
token = config.load_token()
|
||||
if token:
|
||||
config.save_token(token)
|
||||
self._refresh_update_status()
|
||||
|
||||
# --- update access (token) ------------------------------------------------
|
||||
def _save_token(self) -> None:
|
||||
token = self._token_input.text().strip()
|
||||
if not token:
|
||||
return
|
||||
config.save_token(token)
|
||||
self._token_input.clear()
|
||||
self._refresh_update_status()
|
||||
|
||||
def _refresh_update_status(self) -> None:
|
||||
self._upd_status.setText(f"{_BACKEND_DESC[config.token_backend()]} · checking…")
|
||||
threading.Thread(target=self._check_update, daemon=True).start()
|
||||
|
||||
def _check_update(self) -> None:
|
||||
self._upd_state.emit((config.token_backend(), updates.update_state()))
|
||||
|
||||
def _on_upd_state(self, result) -> None:
|
||||
backend, (state, tag, _notes) = result
|
||||
msg = {
|
||||
updates.NO_TOKEN: "paste a token below to enable updates",
|
||||
updates.AUTH: "token rejected — check its scope/permissions",
|
||||
updates.NETWORK: "couldn't reach the update server",
|
||||
updates.UP_TO_DATE: f"up to date ({tag})" if tag else "up to date",
|
||||
updates.AVAILABLE: f"update available: {tag}",
|
||||
}[state]
|
||||
color = GOOD if state == updates.AVAILABLE else (WARN if state == updates.AUTH else MUTED)
|
||||
self._upd_status.setText(
|
||||
f"<span style='color:{MUTED}'>{_BACKEND_DESC[backend]}</span> · "
|
||||
f"<span style='color:{color}'>{msg}</span>"
|
||||
)
|
||||
@@ -0,0 +1,348 @@
|
||||
"""Share page (M12): host or join a shared session over the relay.
|
||||
|
||||
Guest sees the host's live sensors + health + inventory (read-only). If the host enables it,
|
||||
a full **PTY terminal** is shared: the guest types and the commands run on the host (as the
|
||||
host's user), the host reads along, and the host can type too — e.g. a sudo password, which
|
||||
stays local and is never sent to the guest.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import json
|
||||
|
||||
from PySide6.QtCore import Qt, QSocketNotifier, QTimer, QUrl
|
||||
from PySide6.QtWebSockets import QWebSocket
|
||||
from PySide6.QtWidgets import (
|
||||
QCheckBox,
|
||||
QFrame,
|
||||
QHBoxLayout,
|
||||
QLabel,
|
||||
QLineEdit,
|
||||
QPushButton,
|
||||
QTextEdit,
|
||||
QVBoxLayout,
|
||||
QWidget,
|
||||
)
|
||||
|
||||
from ..config import load_config, load_token
|
||||
from ..core import share
|
||||
from ..core.pty_session import PtySession
|
||||
from ..core.sampler import Sampler
|
||||
from ..core.sources import available_sources
|
||||
from .terminal_widget import TerminalView
|
||||
|
||||
|
||||
def _relay_url() -> str:
|
||||
return load_config().get("relay_url", "wss://rigdoctor.jesseyvanofferen.com").rstrip("/")
|
||||
|
||||
|
||||
def _b64(data: bytes) -> str:
|
||||
return base64.b64encode(data).decode("ascii")
|
||||
|
||||
|
||||
def _card(title: str) -> tuple[QFrame, QVBoxLayout]:
|
||||
card = QFrame()
|
||||
card.setObjectName("Card")
|
||||
v = QVBoxLayout(card)
|
||||
v.setContentsMargins(16, 14, 16, 14)
|
||||
v.setSpacing(10)
|
||||
head = QLabel(title)
|
||||
head.setStyleSheet("font-weight: 700; background: transparent;")
|
||||
v.addWidget(head)
|
||||
return card, v
|
||||
|
||||
|
||||
class SharePage(QWidget):
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self.setObjectName("Page")
|
||||
self._sampler = Sampler(available_sources())
|
||||
self._host_ws: QWebSocket | None = None
|
||||
self._guest_ws: QWebSocket | None = None
|
||||
self._pty: PtySession | None = None
|
||||
self._pty_notifier: QSocketNotifier | None = None
|
||||
self._last_report = None
|
||||
self._last_inv = None
|
||||
self._timer = QTimer(self)
|
||||
self._timer.setInterval(2000)
|
||||
self._timer.timeout.connect(self._stream)
|
||||
|
||||
root = QVBoxLayout(self)
|
||||
root.setContentsMargins(20, 18, 20, 18)
|
||||
root.setSpacing(14)
|
||||
title = QLabel("Share")
|
||||
title.setObjectName("PageTitle")
|
||||
root.addWidget(title)
|
||||
root.addWidget(self._build_host())
|
||||
root.addWidget(self._build_guest(), 1)
|
||||
|
||||
# ------------------------------------------------------------------ host
|
||||
def _build_host(self) -> QFrame:
|
||||
card, v = _card("Start a shared session")
|
||||
self._host_status = QLabel("Let someone with an account view your machine, read-only.")
|
||||
self._host_status.setObjectName("Muted")
|
||||
self._host_status.setWordWrap(True)
|
||||
v.addWidget(self._host_status)
|
||||
|
||||
row = QHBoxLayout()
|
||||
self._start_btn = QPushButton("Start shared session")
|
||||
self._start_btn.setObjectName("PrimaryButton")
|
||||
self._start_btn.clicked.connect(self._start_host)
|
||||
self._stop_btn = QPushButton("Stop")
|
||||
self._stop_btn.setEnabled(False)
|
||||
self._stop_btn.clicked.connect(self._stop_host)
|
||||
self._code_label = QLabel("")
|
||||
self._code_label.setStyleSheet("font-weight:700; font-size:18px; color:#38bdf8; background:transparent;")
|
||||
self._code_label.setTextInteractionFlags(Qt.TextInteractionFlag.TextSelectableByMouse)
|
||||
row.addWidget(self._start_btn)
|
||||
row.addWidget(self._stop_btn)
|
||||
row.addSpacing(12)
|
||||
row.addWidget(self._code_label)
|
||||
row.addStretch(1)
|
||||
v.addLayout(row)
|
||||
|
||||
self._allow_term = QCheckBox("Allow remote terminal — the guest runs commands as your user (you read along; you can type too, e.g. a sudo password)")
|
||||
self._allow_term.setStyleSheet("color:#fb923c; background:transparent;")
|
||||
self._allow_term.toggled.connect(self._toggle_terminal)
|
||||
v.addWidget(self._allow_term)
|
||||
|
||||
self._host_term = TerminalView()
|
||||
self._host_term.keys.connect(lambda b: self._pty.write(b) if self._pty else None)
|
||||
self._host_term.resized.connect(lambda r, c: self._pty.set_size(r, c) if self._pty else None)
|
||||
self._host_term.setVisible(False)
|
||||
v.addWidget(self._host_term)
|
||||
return card
|
||||
|
||||
def _start_host(self) -> None:
|
||||
if not load_token():
|
||||
self._host_status.setText("Set a Gitea access token in Setup → Account access first.")
|
||||
return
|
||||
self._host_status.setText("Connecting to the relay…")
|
||||
self._start_btn.setEnabled(False)
|
||||
self._host_ws = QWebSocket()
|
||||
self._host_ws.connected.connect(lambda: self._host_ws.sendTextMessage(json.dumps({"token": load_token()})))
|
||||
self._host_ws.textMessageReceived.connect(self._host_msg)
|
||||
self._host_ws.disconnected.connect(self._host_closed)
|
||||
self._host_ws.errorOccurred.connect(lambda *_: self._host_status.setText(f"Relay error: {self._host_ws.errorString()}"))
|
||||
self._host_ws.open(QUrl(_relay_url() + "/ws/host"))
|
||||
|
||||
def _host_msg(self, text: str) -> None:
|
||||
try:
|
||||
data = json.loads(text)
|
||||
except ValueError:
|
||||
return
|
||||
if data.get("error"):
|
||||
self._host_status.setText(f"Rejected: {data['error']}")
|
||||
return
|
||||
if "code" in data: # relay handshake
|
||||
self._code_label.setText(data["code"])
|
||||
self._host_status.setText(f"Sharing as {data.get('user', '?')} — give this code to whoever should view your machine.")
|
||||
self._stop_btn.setEnabled(True)
|
||||
self._host_ws.sendTextMessage(share.host_full_frame(self._sampler))
|
||||
self._send_terminal_state()
|
||||
if self._allow_term.isChecked():
|
||||
self._start_pty()
|
||||
self._timer.start()
|
||||
return
|
||||
kind = data.get("type") # frames forwarded from a guest
|
||||
if kind == "req_full":
|
||||
# A guest just joined — send a full frame AND the current terminal state, so a
|
||||
# guest that joins *after* the host enabled the terminal still gets access.
|
||||
self._host_ws.sendTextMessage(share.host_full_frame(self._sampler))
|
||||
self._send_terminal_state()
|
||||
elif kind == "pty_in" and self._pty:
|
||||
self._pty.write(base64.b64decode(data["data"]))
|
||||
elif kind == "pty_resize" and self._pty:
|
||||
self._pty.set_size(int(data["rows"]), int(data["cols"]))
|
||||
|
||||
def _toggle_terminal(self, on: bool) -> None:
|
||||
if on and self._host_ws and self._code_label.text():
|
||||
self._start_pty()
|
||||
elif not on:
|
||||
self._stop_pty()
|
||||
self._send_terminal_state()
|
||||
|
||||
def _send_terminal_state(self) -> None:
|
||||
if self._host_ws and self._code_label.text():
|
||||
self._host_ws.sendTextMessage(json.dumps({"type": "terminal", "enabled": self._allow_term.isChecked()}))
|
||||
|
||||
def _start_pty(self) -> None:
|
||||
if self._pty:
|
||||
return
|
||||
rows, cols = self._host_term.grid()
|
||||
self._pty = PtySession(rows=rows, cols=cols)
|
||||
self._pty_notifier = QSocketNotifier(self._pty.master_fd, QSocketNotifier.Type.Read, self)
|
||||
self._pty_notifier.activated.connect(self._on_pty_output)
|
||||
self._host_term.reset()
|
||||
self._host_term.setVisible(True)
|
||||
|
||||
def _on_pty_output(self) -> None:
|
||||
if not self._pty:
|
||||
return
|
||||
data = self._pty.read()
|
||||
if not data: # shell exited / EOF
|
||||
self._stop_pty()
|
||||
self._send_terminal_state()
|
||||
self._allow_term.setChecked(False)
|
||||
return
|
||||
self._host_term.feed(data)
|
||||
if self._host_ws:
|
||||
self._host_ws.sendTextMessage(json.dumps({"type": "pty", "data": _b64(data)}))
|
||||
|
||||
def _stop_pty(self) -> None:
|
||||
if self._pty_notifier:
|
||||
self._pty_notifier.setEnabled(False)
|
||||
self._pty_notifier = None
|
||||
if self._pty:
|
||||
self._pty.close()
|
||||
self._pty = None
|
||||
self._host_term.setVisible(False)
|
||||
|
||||
def _stream(self) -> None:
|
||||
if self._host_ws:
|
||||
self._host_ws.sendTextMessage(share.host_snapshot_frame(self._sampler))
|
||||
|
||||
def _stop_host(self) -> None:
|
||||
self._timer.stop()
|
||||
self._stop_pty()
|
||||
if self._host_ws:
|
||||
self._host_ws.close()
|
||||
self._host_ws = None
|
||||
self._code_label.setText("")
|
||||
self._stop_btn.setEnabled(False)
|
||||
self._start_btn.setEnabled(True)
|
||||
self._host_status.setText("Stopped sharing.")
|
||||
|
||||
def _host_closed(self) -> None:
|
||||
self._timer.stop()
|
||||
self._stop_pty()
|
||||
self._start_btn.setEnabled(True)
|
||||
self._stop_btn.setEnabled(False)
|
||||
if self._code_label.text():
|
||||
self._code_label.setText("")
|
||||
self._host_status.setText("Disconnected from the relay.")
|
||||
|
||||
# ----------------------------------------------------------------- guest
|
||||
def _build_guest(self) -> QFrame:
|
||||
card, v = _card("Join a shared session")
|
||||
row = QHBoxLayout()
|
||||
self._code_input = QLineEdit()
|
||||
self._code_input.setPlaceholderText("Enter share code")
|
||||
self._code_input.setMaxLength(6)
|
||||
self._code_input.setFixedWidth(160)
|
||||
self._join_btn = QPushButton("Join")
|
||||
self._join_btn.setObjectName("PrimaryButton")
|
||||
self._join_btn.clicked.connect(self._join)
|
||||
self._leave_btn = QPushButton("Leave")
|
||||
self._leave_btn.setEnabled(False)
|
||||
self._leave_btn.clicked.connect(self._leave)
|
||||
row.addWidget(self._code_input)
|
||||
row.addWidget(self._join_btn)
|
||||
row.addWidget(self._leave_btn)
|
||||
row.addStretch(1)
|
||||
v.addLayout(row)
|
||||
self._guest_status = QLabel("")
|
||||
self._guest_status.setObjectName("Muted")
|
||||
v.addWidget(self._guest_status)
|
||||
|
||||
self._view = QTextEdit()
|
||||
self._view.setObjectName("Report")
|
||||
self._view.setReadOnly(True)
|
||||
self._view.setVisible(False)
|
||||
self._view.setMinimumHeight(200)
|
||||
v.addWidget(self._view)
|
||||
|
||||
self._term_label = QLabel("")
|
||||
self._term_label.setObjectName("Muted")
|
||||
self._term_label.setVisible(False)
|
||||
v.addWidget(self._term_label)
|
||||
self._guest_term = TerminalView()
|
||||
self._guest_term.keys.connect(self._guest_key)
|
||||
self._guest_term.resized.connect(self._guest_resize)
|
||||
self._guest_term.setVisible(False)
|
||||
v.addWidget(self._guest_term)
|
||||
return card
|
||||
|
||||
def _join(self) -> None:
|
||||
code = self._code_input.text().strip().upper()
|
||||
if not load_token():
|
||||
self._guest_status.setText("Set a Gitea access token in Setup → Account access first.")
|
||||
return
|
||||
if not code:
|
||||
self._guest_status.setText("Enter a share code.")
|
||||
return
|
||||
self._guest_status.setText("Connecting…")
|
||||
self._join_btn.setEnabled(False)
|
||||
self._guest_ws = QWebSocket()
|
||||
self._guest_ws.connected.connect(lambda: self._guest_ws.sendTextMessage(json.dumps({"token": load_token()})))
|
||||
self._guest_ws.textMessageReceived.connect(self._guest_msg)
|
||||
self._guest_ws.disconnected.connect(self._guest_closed)
|
||||
self._guest_ws.errorOccurred.connect(lambda *_: self._guest_status.setText(f"Relay error: {self._guest_ws.errorString()}"))
|
||||
self._guest_ws.open(QUrl(_relay_url() + "/ws/guest/" + code))
|
||||
|
||||
def _guest_msg(self, text: str) -> None:
|
||||
try:
|
||||
data = json.loads(text)
|
||||
except ValueError:
|
||||
return
|
||||
if data.get("error"):
|
||||
self._guest_status.setText(data["error"])
|
||||
return
|
||||
if "joined" in data:
|
||||
self._guest_status.setText(f"Viewing {data.get('host', '?')}'s machine — read-only.")
|
||||
self._leave_btn.setEnabled(True)
|
||||
self._view.setVisible(True)
|
||||
self._guest_ws.sendTextMessage(json.dumps({"type": "req_full"}))
|
||||
return
|
||||
kind = data.get("type")
|
||||
if kind in ("full", "snapshot"):
|
||||
if kind == "full":
|
||||
self._last_report = data.get("report")
|
||||
self._last_inv = data.get("inventory")
|
||||
self._view.setHtml(share.guest_html(data.get("snapshot"), self._last_report, self._last_inv))
|
||||
elif kind == "terminal":
|
||||
self._set_terminal_visible(bool(data.get("enabled")))
|
||||
elif kind == "pty":
|
||||
self._guest_term.feed(base64.b64decode(data["data"]))
|
||||
|
||||
def _set_terminal_visible(self, enabled: bool) -> None:
|
||||
self._term_label.setVisible(True)
|
||||
self._term_label.setText("Terminal enabled by host — your keystrokes run on their machine. Click here and type."
|
||||
if enabled else "Terminal not enabled by the host.")
|
||||
self._guest_term.setVisible(enabled)
|
||||
if enabled:
|
||||
self._guest_term.reset()
|
||||
self._guest_resize(*self._guest_term.grid())
|
||||
self._guest_term.setFocus()
|
||||
|
||||
def _guest_key(self, data: bytes) -> None:
|
||||
if self._guest_ws:
|
||||
self._guest_ws.sendTextMessage(json.dumps({"type": "pty_in", "data": _b64(data)}))
|
||||
|
||||
def _guest_resize(self, rows: int, cols: int) -> None:
|
||||
if self._guest_ws:
|
||||
self._guest_ws.sendTextMessage(json.dumps({"type": "pty_resize", "rows": rows, "cols": cols}))
|
||||
|
||||
def _leave(self) -> None:
|
||||
if self._guest_ws:
|
||||
self._guest_ws.close()
|
||||
self._guest_ws = None
|
||||
for w in (self._view, self._term_label, self._guest_term):
|
||||
w.setVisible(False)
|
||||
self._leave_btn.setEnabled(False)
|
||||
self._join_btn.setEnabled(True)
|
||||
self._guest_status.setText("Left the session.")
|
||||
|
||||
def _guest_closed(self) -> None:
|
||||
self._join_btn.setEnabled(True)
|
||||
self._leave_btn.setEnabled(False)
|
||||
if self._view.isVisible():
|
||||
self._guest_status.setText("Session ended (host disconnected).")
|
||||
|
||||
def shutdown(self) -> None:
|
||||
self._timer.stop()
|
||||
self._stop_pty()
|
||||
for ws in (self._host_ws, self._guest_ws):
|
||||
if ws:
|
||||
ws.close()
|
||||
@@ -0,0 +1,89 @@
|
||||
"""A minimal terminal view: renders PTY output via pyte and emits keystrokes (M12, Tier 3).
|
||||
|
||||
Used by both sides of a shared session — the host (mirrors its local PTY, can also type, e.g.
|
||||
a sudo password) and the guest (renders the streamed PTY, sends keystrokes). Monochrome for
|
||||
now; cursor addressing / layout (vim, top) work via pyte.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pyte
|
||||
from PySide6.QtCore import Qt, Signal
|
||||
from PySide6.QtGui import QFontDatabase, QFontMetrics, QTextCursor
|
||||
from PySide6.QtWidgets import QPlainTextEdit
|
||||
|
||||
|
||||
class TerminalView(QPlainTextEdit):
|
||||
keys = Signal(bytes) # user keystrokes -> bytes for the PTY
|
||||
resized = Signal(int, int) # rows, cols
|
||||
|
||||
def __init__(self, rows: int = 24, cols: int = 80):
|
||||
super().__init__()
|
||||
self.setLineWrapMode(QPlainTextEdit.LineWrapMode.NoWrap)
|
||||
self.setFont(QFontDatabase.systemFont(QFontDatabase.SystemFont.FixedFont))
|
||||
self.setUndoRedoEnabled(False)
|
||||
self.setMinimumHeight(260)
|
||||
self._rows, self._cols = rows, cols
|
||||
self._screen = pyte.Screen(cols, rows)
|
||||
self._stream = pyte.ByteStream(self._screen)
|
||||
|
||||
def grid(self) -> tuple[int, int]:
|
||||
return self._rows, self._cols
|
||||
|
||||
def feed(self, data: bytes) -> None:
|
||||
self._stream.feed(data)
|
||||
self._render()
|
||||
|
||||
def reset(self) -> None:
|
||||
self._screen.reset()
|
||||
self._render()
|
||||
|
||||
def _render(self) -> None:
|
||||
self.setPlainText("\n".join(self._screen.display))
|
||||
# Place the caret at the terminal's actual cursor (row, col) and keep it in view.
|
||||
cursor = self.textCursor()
|
||||
cursor.movePosition(QTextCursor.MoveOperation.Start)
|
||||
cursor.movePosition(QTextCursor.MoveOperation.Down, QTextCursor.MoveMode.MoveAnchor, self._screen.cursor.y)
|
||||
cursor.movePosition(QTextCursor.MoveOperation.Right, QTextCursor.MoveMode.MoveAnchor, self._screen.cursor.x)
|
||||
self.setTextCursor(cursor)
|
||||
self.ensureCursorVisible()
|
||||
|
||||
def resizeEvent(self, event): # noqa: N802 (Qt override)
|
||||
super().resizeEvent(event)
|
||||
fm = QFontMetrics(self.font())
|
||||
cw = max(1, fm.horizontalAdvance("M"))
|
||||
ch = max(1, fm.height())
|
||||
cols = max(20, self.viewport().width() // cw)
|
||||
rows = max(6, self.viewport().height() // ch)
|
||||
if (rows, cols) != (self._rows, self._cols):
|
||||
self._rows, self._cols = rows, cols
|
||||
self._screen.resize(rows, cols)
|
||||
self._render()
|
||||
self.resized.emit(rows, cols)
|
||||
|
||||
def keyPressEvent(self, event): # noqa: N802 (Qt override)
|
||||
data = self._translate(event)
|
||||
if data:
|
||||
self.keys.emit(data)
|
||||
event.accept() # display comes from PTY output, not local editing
|
||||
|
||||
@staticmethod
|
||||
def _translate(event) -> bytes:
|
||||
key = event.key()
|
||||
mod = event.modifiers()
|
||||
k = Qt.Key
|
||||
if mod & Qt.KeyboardModifier.ControlModifier and k.Key_A.value <= key <= k.Key_Z.value:
|
||||
return bytes([key - k.Key_A.value + 1]) # Ctrl-A..Ctrl-Z
|
||||
special = {
|
||||
k.Key_Return.value: b"\r", k.Key_Enter.value: b"\r",
|
||||
k.Key_Backspace.value: b"\x7f", k.Key_Tab.value: b"\t",
|
||||
k.Key_Escape.value: b"\x1b",
|
||||
k.Key_Up.value: b"\x1b[A", k.Key_Down.value: b"\x1b[B",
|
||||
k.Key_Right.value: b"\x1b[C", k.Key_Left.value: b"\x1b[D",
|
||||
k.Key_Home.value: b"\x1b[H", k.Key_End.value: b"\x1b[F",
|
||||
k.Key_Delete.value: b"\x1b[3~", k.Key_PageUp.value: b"\x1b[5~", k.Key_PageDown.value: b"\x1b[6~",
|
||||
}
|
||||
if key in special:
|
||||
return special[key]
|
||||
text = event.text()
|
||||
return text.encode("utf-8") if text else b""
|
||||
@@ -2,6 +2,10 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
_CHECK = (Path(__file__).parent / "assets" / "check.svg").as_posix()
|
||||
|
||||
# Palette (dark)
|
||||
BG = "#101216"
|
||||
SIDEBAR = "#15181e"
|
||||
@@ -10,6 +14,7 @@ CARD_BORDER = "#2a2f39"
|
||||
TRACK = "#2a2f39"
|
||||
TEXT = "#e6e8eb"
|
||||
MUTED = "#8b929c"
|
||||
INPUT_BG = "#0d0f13" # form-control background (must stay dark — see contrast rule)
|
||||
|
||||
ACCENT = "#38bdf8"
|
||||
COLD = "#7dd3fc" # icey-blue
|
||||
@@ -88,4 +93,61 @@ QScrollBar::handle:vertical {{ background: {CARD_BORDER}; border-radius: 5px; mi
|
||||
QScrollBar::handle:vertical:hover {{ background: #3a414d; }}
|
||||
QScrollBar::add-line:vertical, QScrollBar::sub-line:vertical {{ height: 0; }}
|
||||
QScrollBar::add-page:vertical, QScrollBar::sub-page:vertical {{ background: transparent; }}
|
||||
|
||||
QPushButton {{
|
||||
background: #262b34; color: {TEXT}; border: 1px solid {CARD_BORDER};
|
||||
border-radius: 8px; padding: 7px 14px;
|
||||
}}
|
||||
QPushButton:hover {{ background: #2f3540; }}
|
||||
QPushButton:disabled {{ color: #5b626c; background: #1c2026; border-color: #23272f; }}
|
||||
QPushButton#PrimaryButton {{ background: {ACCENT}; color: #06222e; border: none; font-weight: 700; }}
|
||||
QPushButton#PrimaryButton:hover {{ background: #5cc8fb; }}
|
||||
QPushButton#PrimaryButton:disabled {{ background: #27424f; color: #5f7c8a; }}
|
||||
|
||||
QDoubleSpinBox, QSpinBox {{
|
||||
background: #262b34; color: {TEXT}; border: 1px solid {CARD_BORDER};
|
||||
border-radius: 6px; padding: 4px 6px;
|
||||
}}
|
||||
|
||||
QTextEdit#Report {{
|
||||
background: #0d0f13; color: #cfd3da; border: 1px solid {CARD_BORDER}; border-radius: 8px;
|
||||
}}
|
||||
|
||||
QPushButton#DangerButton {{
|
||||
background: transparent; color: {CRIT}; border: 1px solid {CRIT};
|
||||
border-radius: 8px; padding: 7px 14px;
|
||||
}}
|
||||
QPushButton#DangerButton:hover {{ background: {CRIT}; color: #1a0d0d; }}
|
||||
|
||||
QPushButton#LinkButton {{
|
||||
background: transparent; border: none; color: {MUTED};
|
||||
text-align: left; padding: 0; text-decoration: underline;
|
||||
}}
|
||||
QPushButton#LinkButton:hover {{ color: {TEXT}; }}
|
||||
|
||||
QCheckBox {{ spacing: 8px; background: transparent; }}
|
||||
QCheckBox::indicator {{
|
||||
width: 17px; height: 17px; border-radius: 4px;
|
||||
border: 1px solid {MUTED}; background: #262b34;
|
||||
}}
|
||||
QCheckBox::indicator:hover {{ border-color: {ACCENT}; }}
|
||||
QCheckBox::indicator:checked {{
|
||||
background: {ACCENT}; border-color: {ACCENT}; image: url("{_CHECK}");
|
||||
}}
|
||||
|
||||
/* Dialogs (update prompt, changelog) — match the dark theme so text is readable. */
|
||||
QDialog {{ background: {BG}; }}
|
||||
QMessageBox {{ background: {CARD}; }}
|
||||
QDialog QLabel, QMessageBox QLabel {{ color: {TEXT}; background: transparent; }}
|
||||
|
||||
/* Form controls: keep dark bg + light text (Fusion defaults to light-on-light here). */
|
||||
QLineEdit, QPlainTextEdit, QAbstractSpinBox, QComboBox {{
|
||||
background: {INPUT_BG}; color: {TEXT};
|
||||
border: 1px solid {CARD_BORDER}; border-radius: 6px; padding: 5px 8px;
|
||||
selection-background-color: {ACCENT}; selection-color: #06222e;
|
||||
}}
|
||||
QLineEdit:focus, QPlainTextEdit:focus, QAbstractSpinBox:focus, QComboBox:focus {{
|
||||
border: 1px solid {ACCENT};
|
||||
}}
|
||||
QLineEdit:disabled, QPlainTextEdit:disabled, QAbstractSpinBox:disabled {{ color: {MUTED}; }}
|
||||
"""
|
||||
|
||||
+122
-7
@@ -2,21 +2,29 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import time
|
||||
|
||||
from .core.crashlog import Summary, headline
|
||||
from .core.sample import Reading, Sample
|
||||
|
||||
_GROUP_ORDER = ["gpu", "cpu", "memory", "storage"]
|
||||
_GROUP_TITLES = {"gpu": "GPU", "cpu": "CPU", "memory": "Memory", "storage": "Storage"}
|
||||
|
||||
|
||||
def format_raw(value: float | None, unit: str) -> str:
|
||||
"""Format a value + unit for display."""
|
||||
if value is None:
|
||||
return "N/A"
|
||||
if unit == "°C":
|
||||
return f"{value:.1f} °C"
|
||||
if unit:
|
||||
return f"{value:g} {unit}"
|
||||
return f"{value:g}"
|
||||
|
||||
|
||||
def format_value(r: Reading) -> str:
|
||||
"""Format a reading's value + unit for display (shared by CLI and GUI)."""
|
||||
if r.value is None:
|
||||
return "N/A"
|
||||
if r.unit == "°C":
|
||||
return f"{r.value:.1f} °C"
|
||||
if r.unit:
|
||||
return f"{r.value:g} {r.unit}"
|
||||
return f"{r.value:g}"
|
||||
return format_raw(r.value, r.unit)
|
||||
|
||||
|
||||
def metric_label(r: Reading) -> str:
|
||||
@@ -41,3 +49,110 @@ def render_snapshot(sample: Sample) -> str:
|
||||
lines = [title] + [_fmt(r) for r in groups[key]]
|
||||
blocks.append("\n".join(lines))
|
||||
return "\n\n".join(blocks)
|
||||
|
||||
|
||||
def format_headline(h: dict) -> str:
|
||||
"""One-line headline summary from a headline() dict."""
|
||||
|
||||
def g(value, unit):
|
||||
return format_raw(value, unit) if value is not None else "—"
|
||||
|
||||
return (
|
||||
f"GPU {g(h.get('gpu_temp'), '°C')} {g(h.get('gpu_util'), '%')} {g(h.get('gpu_power'), 'W')}"
|
||||
f" · CPU {g(h.get('cpu_temp'), '°C')} · MEM {g(h.get('mem_pct'), '%')}"
|
||||
)
|
||||
|
||||
|
||||
def _fmt_duration(seconds: float) -> str:
|
||||
seconds = int(seconds)
|
||||
h, rem = divmod(seconds, 3600)
|
||||
m, s = divmod(rem, 60)
|
||||
if h:
|
||||
return f"{h}h {m}m {s}s"
|
||||
if m:
|
||||
return f"{m}m {s}s"
|
||||
return f"{s}s"
|
||||
|
||||
|
||||
# Metrics worth surfacing as session peaks (by metric name within reading.key).
|
||||
_PEAK_METRICS = ("temp", "power", "util", "mem_util", "fan", "used_pct")
|
||||
_SOURCE_ORDER = {"gpu": 0, "cpu": 1, "memory": 2, "storage": 3}
|
||||
|
||||
|
||||
def _aggregate_peaks(maxima: dict) -> list[tuple[str, str, float, str, float, str]]:
|
||||
"""Collapse per-label maxima to the single worst value per (source, metric).
|
||||
|
||||
Returns rows of (source, metric, value, unit, ts, label) in display order.
|
||||
"""
|
||||
agg: dict[tuple[str, str], tuple[float, str, float, str]] = {}
|
||||
for key, (value, unit, ts) in maxima.items():
|
||||
parts = key.split(".")
|
||||
if len(parts) < 2 or parts[1] not in _PEAK_METRICS:
|
||||
continue
|
||||
source, metric = parts[0], parts[1]
|
||||
label = ".".join(parts[2:])
|
||||
current = agg.get((source, metric))
|
||||
if current is None or value > current[0]:
|
||||
agg[(source, metric)] = (value, unit, ts, label)
|
||||
rows = [(s, m, v, u, ts, lbl) for (s, m), (v, u, ts, lbl) in agg.items()]
|
||||
rows.sort(key=lambda r: (_SOURCE_ORDER.get(r[0], 9), r[1]))
|
||||
return rows
|
||||
|
||||
|
||||
_SEV_LABEL = {"critical": "CRITICAL", "warning": "WARNING", "info": "INFO", "ok": "OK"}
|
||||
|
||||
|
||||
def render_health(findings: list) -> str:
|
||||
if not findings:
|
||||
return "Health report: no findings."
|
||||
crit = sum(1 for f in findings if f.severity == "critical")
|
||||
warn = sum(1 for f in findings if f.severity == "warning")
|
||||
lines = ["Health report", "", f" {crit} critical · {warn} warning · {len(findings)} checks", ""]
|
||||
for f in findings:
|
||||
lines.append(f"[{_SEV_LABEL.get(f.severity, '?')}] {f.category}: {f.title}")
|
||||
if f.detail:
|
||||
lines.append(f" {f.detail}")
|
||||
if f.suggestion:
|
||||
lines.append(f" → {f.suggestion}")
|
||||
lines.append("")
|
||||
return "\n".join(lines).rstrip()
|
||||
|
||||
|
||||
def render_summary(summary: Summary, log_path=None) -> str:
|
||||
if summary.samples == 0 and not summary.events:
|
||||
where = f" ({log_path})" if log_path else ""
|
||||
return f"No capture data found{where}. Start one with: rigdoctor record start"
|
||||
|
||||
lines: list[str] = ["Crash-capture report", ""]
|
||||
if summary.start and summary.end:
|
||||
start = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(summary.start))
|
||||
end = time.strftime("%H:%M:%S", time.localtime(summary.end))
|
||||
lines.append(f" Window : {start} → {end} ({_fmt_duration(summary.end - summary.start)})")
|
||||
lines.append(f" Samples : {summary.samples}")
|
||||
if log_path:
|
||||
lines.append(f" Log : {log_path}")
|
||||
|
||||
if summary.events:
|
||||
lines += ["", "Events"]
|
||||
for ts, kind, detail in summary.events:
|
||||
stamp = time.strftime("%H:%M:%S", time.localtime(ts)) if ts else "--:--:--"
|
||||
mark = " ⚠" if "lost" in kind else " "
|
||||
suffix = f" — {detail}" if detail else ""
|
||||
lines.append(f" {mark} {stamp} {kind}{suffix}")
|
||||
|
||||
peaks = _aggregate_peaks(summary.maxima)
|
||||
if peaks:
|
||||
lines += ["", "Peaks (session maximum)"]
|
||||
for source, metric, value, unit, ts, label in peaks:
|
||||
stamp = time.strftime("%H:%M:%S", time.localtime(ts)) if ts else ""
|
||||
detail = f" ({label})" if label else ""
|
||||
name = f"{source} {metric}"
|
||||
lines.append(f" {name:<16} {format_raw(value, unit):>10} at {stamp}{detail}")
|
||||
|
||||
if summary.last:
|
||||
lines += ["", f"Last {len(summary.last)} samples (most recent last)"]
|
||||
for sample in summary.last:
|
||||
stamp = time.strftime("%H:%M:%S", time.localtime(sample.ts)) if sample.ts else "--:--:--"
|
||||
lines.append(f" {stamp} {format_headline(headline(sample))}")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
@@ -0,0 +1,38 @@
|
||||
"""Tests for the M8 alert monitor (edge-triggered; notify mocked)."""
|
||||
|
||||
import unittest
|
||||
from unittest import mock
|
||||
|
||||
from rigdoctor.core import alerts
|
||||
from rigdoctor.core.sample import Reading, Sample
|
||||
|
||||
|
||||
def _gpu(temp):
|
||||
return Sample(readings=[Reading("gpu", "temp", temp, "°C")])
|
||||
|
||||
|
||||
class AlertTests(unittest.TestCase):
|
||||
@mock.patch.object(alerts, "notify")
|
||||
def test_edge_triggered_no_repeat(self, m):
|
||||
mon = alerts.AlertMonitor(gpu_temp=90.0, cooldown=0.0)
|
||||
mon.check(_gpu(95)) # fires
|
||||
mon.check(_gpu(96)) # still hot — no repeat while active
|
||||
self.assertEqual(m.call_count, 1)
|
||||
mon.check(_gpu(50)) # clears
|
||||
mon.check(_gpu(95)) # hot again — fires
|
||||
self.assertEqual(m.call_count, 2)
|
||||
|
||||
@mock.patch.object(alerts, "notify")
|
||||
def test_no_alert_below_threshold(self, m):
|
||||
alerts.AlertMonitor(gpu_temp=90.0).check(_gpu(70))
|
||||
m.assert_not_called()
|
||||
|
||||
@mock.patch.object(alerts, "notify")
|
||||
def test_gpu_lost(self, m):
|
||||
mon = alerts.AlertMonitor()
|
||||
mon.check(Sample(readings=[Reading("gpu", "status", None, "", "query-timeout")]))
|
||||
m.assert_called_once()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -0,0 +1,32 @@
|
||||
"""Tests for config save/load (flat TOML writer)."""
|
||||
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
from rigdoctor import config
|
||||
|
||||
|
||||
class ConfigTests(unittest.TestCase):
|
||||
def test_save_load_round_trip(self):
|
||||
with tempfile.TemporaryDirectory() as d:
|
||||
cf = Path(d) / "config.toml"
|
||||
with mock.patch.object(config, "CONFIG_FILE", cf), mock.patch.object(config, "CONFIG_DIR", Path(d)):
|
||||
config.save_config({"alerts_enabled": False, "gpu_temp_alert": 88.0, "update_check_minutes": 5})
|
||||
loaded = config.load_config()
|
||||
self.assertIs(loaded["alerts_enabled"], False)
|
||||
self.assertEqual(loaded["gpu_temp_alert"], 88.0)
|
||||
self.assertEqual(loaded["update_check_minutes"], 5)
|
||||
|
||||
def test_update_config_merges_and_keeps_defaults(self):
|
||||
with tempfile.TemporaryDirectory() as d:
|
||||
cf = Path(d) / "config.toml"
|
||||
with mock.patch.object(config, "CONFIG_FILE", cf), mock.patch.object(config, "CONFIG_DIR", Path(d)):
|
||||
config.update_config(cpu_temp_alert=70.0)
|
||||
self.assertEqual(config.load_config()["cpu_temp_alert"], 70.0)
|
||||
self.assertEqual(config.load_config()["gpu_temp_alert"], 90.0) # default preserved
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -0,0 +1,103 @@
|
||||
"""Tests for the M3 crash-capture log: writer, rotation, reader, summary, recorder."""
|
||||
|
||||
import tempfile
|
||||
import threading
|
||||
import time
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
from rigdoctor.core.crashlog import CrashLogWriter, iter_records, summarize
|
||||
from rigdoctor.core.recorder import Recorder
|
||||
from rigdoctor.core.sample import Reading, Sample
|
||||
from rigdoctor.core.sampler import Sampler
|
||||
from rigdoctor.core.sources.base import Source
|
||||
|
||||
|
||||
class _FakeSource(Source):
|
||||
name = "gpu"
|
||||
|
||||
def __init__(self, temp=50.0):
|
||||
self._temp = temp
|
||||
|
||||
def probe(self):
|
||||
return True
|
||||
|
||||
def read(self):
|
||||
return [
|
||||
Reading("gpu", "name", None, "", "Fake GPU"),
|
||||
Reading("gpu", "temp", self._temp, "°C"),
|
||||
Reading("gpu", "power", 100.0, "W"),
|
||||
]
|
||||
|
||||
|
||||
class CrashLogTests(unittest.TestCase):
|
||||
def test_write_and_read_roundtrip(self):
|
||||
with tempfile.TemporaryDirectory() as d:
|
||||
path = Path(d) / "capture.jsonl"
|
||||
w = CrashLogWriter(path)
|
||||
w.write_event("session-start")
|
||||
w.write_sample(Sample(ts=1.0, readings=[Reading("gpu", "temp", 60.0, "°C")]))
|
||||
w.write_event("gpu-lost", "timeout")
|
||||
w.close()
|
||||
|
||||
records = list(iter_records(path))
|
||||
self.assertEqual(records[0]["event"], "session-start")
|
||||
self.assertEqual(records[1]["readings"][0], ["gpu", "temp", 60.0, "°C", ""])
|
||||
self.assertEqual(records[2]["event"], "gpu-lost")
|
||||
|
||||
def test_rotation_bounds_segments(self):
|
||||
with tempfile.TemporaryDirectory() as d:
|
||||
path = Path(d) / "capture.jsonl"
|
||||
w = CrashLogWriter(path, max_bytes=200, backups=2)
|
||||
for i in range(200):
|
||||
w.write_sample(Sample(ts=float(i), readings=[Reading("gpu", "temp", float(i), "°C")]))
|
||||
w.close()
|
||||
# base + at most `backups` rotated segments
|
||||
segments = list(Path(d).glob("capture.jsonl*"))
|
||||
self.assertLessEqual(len(segments), 3)
|
||||
self.assertTrue((Path(d) / "capture.jsonl").exists())
|
||||
# rotation must not lose readability across segments
|
||||
samples = [r for r in iter_records(path) if "readings" in r]
|
||||
self.assertGreater(len(samples), 0)
|
||||
|
||||
def test_summary_tracks_peaks_and_events(self):
|
||||
with tempfile.TemporaryDirectory() as d:
|
||||
path = Path(d) / "capture.jsonl"
|
||||
w = CrashLogWriter(path)
|
||||
w.write_sample(Sample(ts=1.0, readings=[Reading("gpu", "temp", 60.0, "°C")]))
|
||||
w.write_sample(Sample(ts=2.0, readings=[Reading("gpu", "temp", 81.0, "°C")]))
|
||||
w.write_event("gpu-lost", "timeout")
|
||||
w.close()
|
||||
|
||||
s = summarize(path)
|
||||
self.assertEqual(s.samples, 2)
|
||||
self.assertEqual(s.maxima["gpu.temp"][0], 81.0)
|
||||
self.assertEqual(s.events[0][1], "gpu-lost")
|
||||
self.assertEqual(len(s.last), 2)
|
||||
|
||||
def test_recorder_writes_samples_and_stops(self):
|
||||
with tempfile.TemporaryDirectory() as d:
|
||||
path = Path(d) / "capture.jsonl"
|
||||
status = Path(d) / "status.json"
|
||||
rec = Recorder(
|
||||
interval=0.02,
|
||||
log_path=path,
|
||||
status_path=status,
|
||||
sampler=Sampler([_FakeSource()]),
|
||||
)
|
||||
t = threading.Thread(target=rec.run)
|
||||
t.start()
|
||||
time.sleep(0.2)
|
||||
rec.stop()
|
||||
t.join(timeout=2)
|
||||
|
||||
self.assertFalse(t.is_alive())
|
||||
self.assertGreater(rec.samples, 0)
|
||||
self.assertTrue(status.exists())
|
||||
kinds = [r.get("event") for r in iter_records(path) if "event" in r]
|
||||
self.assertIn("session-start", kinds)
|
||||
self.assertIn("session-stop", kinds)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -0,0 +1,46 @@
|
||||
"""Tests for the M4 health report's log scanner (synthetic input)."""
|
||||
|
||||
import unittest
|
||||
|
||||
from rigdoctor.core.health import CRITICAL, WARNING, run_health_checks, scan_journal_text
|
||||
|
||||
|
||||
class HealthScanTests(unittest.TestCase):
|
||||
def test_xid_79_is_critical(self):
|
||||
text = "NVRM: Xid (PCI:0000:01:00): 79, pid=1234, GPU has fallen off the bus."
|
||||
findings = scan_journal_text(text)
|
||||
gpu = [f for f in findings if f.category == "GPU"]
|
||||
self.assertEqual(len(gpu), 1)
|
||||
self.assertIn("79", gpu[0].title)
|
||||
self.assertEqual(gpu[0].severity, CRITICAL)
|
||||
|
||||
def test_xid_count_aggregates(self):
|
||||
text = "\n".join(["NVRM: Xid (PCI:0000:01:00): 79, foo"] * 3)
|
||||
gpu = [f for f in scan_journal_text(text) if f.category == "GPU"][0]
|
||||
self.assertIn("×3", gpu.title)
|
||||
|
||||
def test_oom_and_panic_detected(self):
|
||||
text = "Out of memory: Killed process 999 (game)\nKernel panic - not syncing: x"
|
||||
cats = {f.category for f in scan_journal_text(text)}
|
||||
self.assertIn("Memory", cats)
|
||||
self.assertIn("Kernel", cats)
|
||||
|
||||
def test_mce_critical(self):
|
||||
findings = scan_journal_text("mce: [Hardware Error]: Machine check events logged")
|
||||
self.assertTrue(any(f.severity == CRITICAL and f.category == "Hardware" for f in findings))
|
||||
|
||||
def test_clean_text_yields_no_findings(self):
|
||||
self.assertEqual(scan_journal_text("usb 1-1: new high-speed USB device\nbluetooth: ok"), [])
|
||||
|
||||
def test_run_health_checks_returns_findings(self):
|
||||
# Runs against the real system; just assert it returns a sorted list of Findings.
|
||||
findings = run_health_checks()
|
||||
self.assertIsInstance(findings, list)
|
||||
severities = [f.severity for f in findings]
|
||||
order = {"critical": 0, "warning": 1, "info": 2, "ok": 3}
|
||||
ranks = [order.get(s, 9) for s in severities]
|
||||
self.assertEqual(ranks, sorted(ranks))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -0,0 +1,46 @@
|
||||
"""Tests for the M9 installer logic and the M13 version comparison."""
|
||||
|
||||
import unittest
|
||||
|
||||
from rigdoctor.core import installer
|
||||
from rigdoctor.core.catalog import Component
|
||||
from rigdoctor.core.updates import is_newer
|
||||
|
||||
|
||||
class InstallerTests(unittest.TestCase):
|
||||
def test_component_status_uses_presence(self):
|
||||
status = installer.component_status(present=lambda cmd: cmd == "smartctl")
|
||||
by_id = {c.id: ok for c, ok in status}
|
||||
self.assertTrue(by_id["smartmontools"])
|
||||
self.assertFalse(by_id["dmidecode"])
|
||||
|
||||
def test_missing_packages_dedup_preserves_order(self):
|
||||
comps = [
|
||||
Component("a", "A", "B", "x", ("p1", "p2"), "c1"),
|
||||
Component("b", "B", "B", "y", ("p2", "p3"), "c2"),
|
||||
]
|
||||
self.assertEqual(installer.missing_packages(comps), ["p1", "p2", "p3"])
|
||||
|
||||
def test_apt_command_includes_packages(self):
|
||||
joined = " ".join(installer.apt_install_command(["smartmontools", "dmidecode"]))
|
||||
self.assertIn("smartmontools", joined)
|
||||
self.assertIn("dmidecode", joined)
|
||||
self.assertIn("apt-get install", joined)
|
||||
|
||||
def test_install_nothing_is_noop(self):
|
||||
rc, _ = installer.install_packages([])
|
||||
self.assertEqual(rc, 0)
|
||||
|
||||
|
||||
class UpdateTests(unittest.TestCase):
|
||||
def test_is_newer(self):
|
||||
self.assertTrue(is_newer("v0.0.5", "0.0.4"))
|
||||
self.assertFalse(is_newer("v0.0.4", "0.0.4"))
|
||||
self.assertFalse(is_newer("v0.0.3", "0.0.4"))
|
||||
|
||||
def test_is_newer_handles_garbage(self):
|
||||
self.assertFalse(is_newer("not-a-version", "0.0.4"))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -0,0 +1,30 @@
|
||||
"""Tests for the M5 system inventory (render + dict round-trip; collect on real system)."""
|
||||
|
||||
import unittest
|
||||
|
||||
from rigdoctor.core import inventory
|
||||
from rigdoctor.core.inventory import Section
|
||||
|
||||
|
||||
class InventoryTests(unittest.TestCase):
|
||||
def test_collect_returns_sections(self):
|
||||
sections = inventory.collect()
|
||||
self.assertTrue(sections)
|
||||
titles = {s.title for s in sections}
|
||||
self.assertIn("System", titles)
|
||||
self.assertIn("CPU", titles)
|
||||
|
||||
def test_dict_round_trip(self):
|
||||
sections = [Section("System", [("Kernel", "7.0.0"), ("Distro", "Ubuntu")])]
|
||||
restored = inventory.from_dict(inventory.to_dict(sections))
|
||||
self.assertEqual(restored[0].title, "System")
|
||||
self.assertEqual(restored[0].items, [("Kernel", "7.0.0"), ("Distro", "Ubuntu")])
|
||||
|
||||
def test_render_markdown(self):
|
||||
md = inventory.render_markdown([Section("CPU", [("Model", "Test CPU")])])
|
||||
self.assertIn("## CPU", md)
|
||||
self.assertIn("- **Model:** Test CPU", md)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -0,0 +1,27 @@
|
||||
"""Tests for the host PTY session (M12 Tier 3)."""
|
||||
|
||||
import time
|
||||
import unittest
|
||||
|
||||
from rigdoctor.core.pty_session import PtySession
|
||||
|
||||
|
||||
class PtySessionTests(unittest.TestCase):
|
||||
def test_runs_command_and_reads_output(self):
|
||||
pty = PtySession(rows=24, cols=80)
|
||||
try:
|
||||
time.sleep(0.4)
|
||||
pty.read() # drain the shell prompt
|
||||
pty.write(b"echo PTY_MARKER_42\n")
|
||||
deadline = time.time() + 3
|
||||
buf = ""
|
||||
while time.time() < deadline and "PTY_MARKER_42" not in buf:
|
||||
time.sleep(0.1)
|
||||
buf += pty.read().decode(errors="replace")
|
||||
self.assertIn("PTY_MARKER_42", buf)
|
||||
finally:
|
||||
pty.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -0,0 +1,38 @@
|
||||
"""Tests for M12 relay frames + guest HTML rendering (host/guest data shapes)."""
|
||||
|
||||
import json
|
||||
import unittest
|
||||
|
||||
from rigdoctor.core import share
|
||||
from rigdoctor.core.sampler import Sampler
|
||||
from rigdoctor.core.sources import available_sources
|
||||
|
||||
|
||||
class RelayFrameTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.sampler = Sampler(available_sources())
|
||||
|
||||
def test_full_frame_shape(self):
|
||||
frame = json.loads(share.host_full_frame(self.sampler))
|
||||
self.assertEqual(frame["type"], "full")
|
||||
self.assertIn("groups", frame["snapshot"])
|
||||
self.assertIsInstance(frame["report"], list)
|
||||
self.assertIsInstance(frame["inventory"], dict)
|
||||
|
||||
def test_snapshot_frame_shape(self):
|
||||
frame = json.loads(share.host_snapshot_frame(self.sampler))
|
||||
self.assertEqual(frame["type"], "snapshot")
|
||||
self.assertIn("groups", frame["snapshot"])
|
||||
|
||||
def test_guest_html_renders(self):
|
||||
snap = {"groups": {"gpu": [{"name": "temp", "value": 51.0, "unit": "°C"}]}}
|
||||
report = [{"severity": "ok", "category": "Logs", "title": "No errors"}]
|
||||
inv = {"System": {"Kernel": "7.0.0"}}
|
||||
html = share.guest_html(snap, report, inv)
|
||||
self.assertIn("51.0 °C", html)
|
||||
self.assertIn("No errors", html)
|
||||
self.assertIn("Kernel", html)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -0,0 +1,46 @@
|
||||
"""Tests for M12 Tier 2 share server: token gating + endpoints."""
|
||||
|
||||
import json
|
||||
import threading
|
||||
import unittest
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
|
||||
from rigdoctor.core import share
|
||||
|
||||
|
||||
class ShareServerTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.srv, self.token = share.make_server("127.0.0.1", 0)
|
||||
self.port = self.srv.server_address[1]
|
||||
self.thread = threading.Thread(target=self.srv.serve_forever, daemon=True)
|
||||
self.thread.start()
|
||||
|
||||
def tearDown(self):
|
||||
self.srv.shutdown()
|
||||
|
||||
def _url(self, path, token=None):
|
||||
q = f"?t={token}" if token else ""
|
||||
return f"http://127.0.0.1:{self.port}{path}{q}"
|
||||
|
||||
def test_requires_token(self):
|
||||
with self.assertRaises(urllib.error.HTTPError) as cm:
|
||||
urllib.request.urlopen(self._url("/api/snapshot"), timeout=10)
|
||||
self.assertEqual(cm.exception.code, 403)
|
||||
|
||||
def test_bad_token_rejected(self):
|
||||
with self.assertRaises(urllib.error.HTTPError) as cm:
|
||||
urllib.request.urlopen(self._url("/api/snapshot", "wrong"), timeout=10)
|
||||
self.assertEqual(cm.exception.code, 403)
|
||||
|
||||
def test_snapshot_with_token(self):
|
||||
data = json.load(urllib.request.urlopen(self._url("/api/snapshot", self.token), timeout=10))
|
||||
self.assertIn("groups", data)
|
||||
|
||||
def test_page_served(self):
|
||||
body = urllib.request.urlopen(self._url("/", self.token), timeout=10).read()
|
||||
self.assertIn(b"read-only share", body)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -0,0 +1,36 @@
|
||||
"""Tests for update-token storage (file fallback + env override), keyring mocked out."""
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
from rigdoctor import config
|
||||
|
||||
|
||||
class TokenStorageTests(unittest.TestCase):
|
||||
def test_file_fallback_roundtrip(self):
|
||||
with tempfile.TemporaryDirectory() as d:
|
||||
token_file = Path(d) / "token"
|
||||
with mock.patch.object(config, "_secret_tool", return_value=None), \
|
||||
mock.patch.object(config, "TOKEN_FILE", token_file), \
|
||||
mock.patch.dict(os.environ, {}, clear=True):
|
||||
self.assertIsNone(config.load_token())
|
||||
config.save_token("abc123")
|
||||
self.assertEqual(config.load_token(), "abc123")
|
||||
self.assertEqual(config.token_backend(), "file")
|
||||
self.assertEqual(token_file.stat().st_mode & 0o777, 0o600)
|
||||
config.clear_token()
|
||||
self.assertIsNone(config.load_token())
|
||||
self.assertEqual(config.token_backend(), "none")
|
||||
|
||||
def test_env_override_wins(self):
|
||||
with mock.patch.object(config, "_secret_tool", return_value=None), \
|
||||
mock.patch.dict(os.environ, {"RIGDOCTOR_TOKEN": "envtok"}, clear=True):
|
||||
self.assertEqual(config.load_token(), "envtok")
|
||||
self.assertEqual(config.token_backend(), "env")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user