Compare commits

..

2 Commits

Author SHA1 Message Date
Mikhail Chusavitin
540a9e39b8 refactor(audit): rename Fan Stress Test → GPU Platform Stress Test
Update all user-facing strings in TUI and ActionResult title.
Internal identifiers (types, functions, file name) unchanged.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-26 09:56:25 +03:00
Mikhail Chusavitin
58510207fa fix(iso): fall back through ROCm 6.4→6.3→6.2 if repo Release file missing
ROCm 6.4 does not yet publish a Release file for Debian Bookworm, causing
the live-build chroot hook to fail with "does not have a Release file".

Try each version in ROCM_CANDIDATES order; skip to the next if apt-get update
fails (repo unavailable). Exit gracefully if none are available.
Also rename inner 'candidate' variable to 'smi_path' to avoid collision.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-26 09:52:17 +03:00
5 changed files with 29 additions and 14 deletions

View File

@@ -505,7 +505,7 @@ func (a *App) RunFanStressTestResult(ctx context.Context, opts platform.FanStres
if err != nil && err != context.Canceled {
body += "\nERROR: " + err.Error()
}
return ActionResult{Title: "Fan Stress Test", Body: body}, err
return ActionResult{Title: "GPU Platform Stress Test", Body: body}, err
}
// formatFanStressResult formats the summary.txt from a fan-stress run, including

View File

@@ -140,7 +140,7 @@ func (m model) updateConfirm(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
pollSATProgress("gpu-amd", since),
)
case actionRunFanStress:
m.busyTitle = "Fan Stress Test"
m.busyTitle = "GPU Platform Stress Test"
m.progressPrefix = "fan-stress"
m.progressSince = time.Now()
m.progressLines = nil

View File

@@ -317,11 +317,11 @@ func renderHealthCheck(m model) string {
if m.hcCursor == hcCurFanStress {
pfx = "> "
}
fmt.Fprintf(&b, "%s[ FAN STRESS TEST [F] ] (thermal cycling, fan lag, throttle check)\n", pfx)
fmt.Fprintf(&b, "%s[ GPU PLATFORM STRESS TEST [F] ] (thermal cycling, fan lag, throttle check)\n", pfx)
}
fmt.Fprintln(&b)
fmt.Fprintln(&b, "─────────────────────────────────────────────────────────────────")
fmt.Fprint(&b, "[↑↓] move [space/enter] toggle [letter] single test [R] run all [F] fan stress [Esc] back")
fmt.Fprint(&b, "[↑↓] move [space/enter] toggle [letter] single test [R] run all [F] gpu stress [Esc] back")
return b.String()
}

View File

@@ -191,7 +191,7 @@ func (m model) confirmBody() (string, string) {
return "AMD GPU test", "Run AMD GPU diagnostic pack (rocm-smi)?"
case actionRunFanStress:
modes := []string{"Quick (2×2min)", "Standard (2×5min)", "Express (2×10min)"}
return "Fan Stress Test", "Two-phase GPU thermal cycling test.\n" +
return "GPU Platform Stress Test", "Two-phase GPU thermal cycling test.\n" +
"Monitors fans, temps, power — detects throttling.\n" +
"Mode: " + modes[m.hcMode] + "\n\nAll NVIDIA GPUs will be stressed."
default:

View File

@@ -5,13 +5,13 @@
set -e
ROCM_VERSION="6.4"
# ROCm versions to try in order (newest first). Fall back if a version's
# Release file is missing from the repo (happens with brand-new releases).
ROCM_CANDIDATES="6.4 6.3 6.2"
ROCM_KEYRING="/etc/apt/keyrings/rocm.gpg"
ROCM_LIST="/etc/apt/sources.list.d/rocm.list"
APT_UPDATED=0
echo "=== AMD ROCm ${ROCM_VERSION}: adding repository ==="
mkdir -p /etc/apt/keyrings
ensure_tool() {
@@ -51,11 +51,26 @@ if ! wget -qO- "https://repo.radeon.com/rocm/rocm.gpg.key" \
exit 0
fi
cat > "${ROCM_LIST}" <<EOF
deb [arch=amd64 signed-by=${ROCM_KEYRING}] https://repo.radeon.com/rocm/apt/${ROCM_VERSION} bookworm main
# Try each ROCm version until apt-get update succeeds (repo has a Release file).
ROCM_VERSION=""
for candidate in ${ROCM_CANDIDATES}; do
cat > "${ROCM_LIST}" <<EOF
deb [arch=amd64 signed-by=${ROCM_KEYRING}] https://repo.radeon.com/rocm/apt/${candidate} bookworm main
EOF
if apt-get update -qq 2>/dev/null; then
ROCM_VERSION="${candidate}"
echo "=== AMD ROCm ${ROCM_VERSION}: repository available ==="
break
fi
echo "WARN: ROCm ${candidate} repository not available for bookworm, trying next..."
rm -f "${ROCM_LIST}"
done
apt-get update -qq
if [ -z "${ROCM_VERSION}" ]; then
echo "WARN: no ROCm apt repository available for bookworm — skipping ROCm install"
rm -f "${ROCM_KEYRING}"
exit 0
fi
# rocm-smi-lib provides the rocm-smi CLI tool for GPU monitoring
if apt-get install -y --no-install-recommends rocm-smi-lib 2>/dev/null; then
@@ -63,9 +78,9 @@ if apt-get install -y --no-install-recommends rocm-smi-lib 2>/dev/null; then
if [ -x /opt/rocm/bin/rocm-smi ]; then
ln -sf /opt/rocm/bin/rocm-smi /usr/local/bin/rocm-smi
else
candidate="$(find /opt -path '*/bin/rocm-smi' -type f 2>/dev/null | sort | tail -1)"
if [ -n "${candidate}" ]; then
ln -sf "${candidate}" /usr/local/bin/rocm-smi
smi_path="$(find /opt -path '*/bin/rocm-smi' -type f 2>/dev/null | sort | tail -1)"
if [ -n "${smi_path}" ]; then
ln -sf "${smi_path}" /usr/local/bin/rocm-smi
fi
fi
rocm-smi --version 2>/dev/null || true