feat: detect a hard-crashed diagnostic + analyze the crash boot — 0.15.0

A focused capture that ends without a clean stop (no session-stop, no live
recorder) is treated as a likely hard freeze.

- core/diagnostic.py: pending_crash() detects the unterminated session;
  acknowledge_crash() dismisses it; analyze_crash() combines the captured window
  (final readings + GPU-lost) with a focused scan of the PREVIOUS (crashed) boot
  + SMART/driver/persistence/temps.
- health.check_previous_boot() scans `journalctl -k -b -1`; run_health_checks
  gained include_journal to avoid double-scanning for the crash path.
- GUI: Games page shows a warning banner on launch for an interrupted diagnostic
  with Analyze crash / Dismiss → results dialog.
- Tests for crash detection / clean-stop / acknowledge / in-progress.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-22 08:52:59 +02:00
parent 8d695227bc
commit 305c88ba09
7 changed files with 231 additions and 6 deletions
+46
View File
@@ -57,5 +57,51 @@ class FinishTests(unittest.TestCase):
self.assertTrue(any(kind == "gpu-lost" for _ts, kind, _d in result.summary.events))
class CrashDetectionTests(unittest.TestCase):
def _diag_log(self, d) -> Path:
return Path(d) / "diagnostic.jsonl"
def test_unterminated_session_is_a_pending_crash(self):
with tempfile.TemporaryDirectory() as d:
log = self._diag_log(d)
_write_log(str(log), "Tarkov") # has session-start + game, no session-stop
with mock.patch.object(diagnostic.config, "DIAG_LOG", log), \
mock.patch.object(diagnostic.reccontrol, "running_pid", return_value=None):
info = diagnostic.pending_crash()
self.assertIsNotNone(info)
self.assertEqual(info.game, "Tarkov")
self.assertTrue(info.gpu_lost) # _write_log writes a gpu-lost event
def test_clean_stop_is_not_a_crash(self):
with tempfile.TemporaryDirectory() as d:
log = self._diag_log(d)
w = CrashLogWriter(str(log))
w.write_event("session-start"); w.write_event("game", "X")
w.write_sample(Sample(time.time(), [Reading("gpu", "temp", 60.0, "°C", "")]))
w.write_event("session-stop", "samples=1")
w.close()
with mock.patch.object(diagnostic.config, "DIAG_LOG", log), \
mock.patch.object(diagnostic.reccontrol, "running_pid", return_value=None):
self.assertIsNone(diagnostic.pending_crash())
def test_acknowledge_clears_pending_crash(self):
with tempfile.TemporaryDirectory() as d:
log = self._diag_log(d)
_write_log(str(log), "Tarkov")
with mock.patch.object(diagnostic.config, "DIAG_LOG", log), \
mock.patch.object(diagnostic.reccontrol, "running_pid", return_value=None):
self.assertIsNotNone(diagnostic.pending_crash())
diagnostic.acknowledge_crash()
self.assertIsNone(diagnostic.pending_crash())
def test_running_capture_is_not_a_crash(self):
with tempfile.TemporaryDirectory() as d:
log = self._diag_log(d)
_write_log(str(log), "Tarkov")
with mock.patch.object(diagnostic.config, "DIAG_LOG", log), \
mock.patch.object(diagnostic.reccontrol, "running_pid", return_value=4321):
self.assertIsNone(diagnostic.pending_crash()) # it's in-progress, not crashed
if __name__ == "__main__":
unittest.main()