Compare commits
2 Commits
audit/v1.0
...
audit/v1.0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
540a9e39b8 | ||
|
|
58510207fa |
@@ -505,7 +505,7 @@ func (a *App) RunFanStressTestResult(ctx context.Context, opts platform.FanStres
|
||||
if err != nil && err != context.Canceled {
|
||||
body += "\nERROR: " + err.Error()
|
||||
}
|
||||
return ActionResult{Title: "Fan Stress Test", Body: body}, err
|
||||
return ActionResult{Title: "GPU Platform Stress Test", Body: body}, err
|
||||
}
|
||||
|
||||
// formatFanStressResult formats the summary.txt from a fan-stress run, including
|
||||
|
||||
@@ -140,7 +140,7 @@ func (m model) updateConfirm(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
|
||||
pollSATProgress("gpu-amd", since),
|
||||
)
|
||||
case actionRunFanStress:
|
||||
m.busyTitle = "Fan Stress Test"
|
||||
m.busyTitle = "GPU Platform Stress Test"
|
||||
m.progressPrefix = "fan-stress"
|
||||
m.progressSince = time.Now()
|
||||
m.progressLines = nil
|
||||
|
||||
@@ -317,11 +317,11 @@ func renderHealthCheck(m model) string {
|
||||
if m.hcCursor == hcCurFanStress {
|
||||
pfx = "> "
|
||||
}
|
||||
fmt.Fprintf(&b, "%s[ FAN STRESS TEST [F] ] (thermal cycling, fan lag, throttle check)\n", pfx)
|
||||
fmt.Fprintf(&b, "%s[ GPU PLATFORM STRESS TEST [F] ] (thermal cycling, fan lag, throttle check)\n", pfx)
|
||||
}
|
||||
|
||||
fmt.Fprintln(&b)
|
||||
fmt.Fprintln(&b, "─────────────────────────────────────────────────────────────────")
|
||||
fmt.Fprint(&b, "[↑↓] move [space/enter] toggle [letter] single test [R] run all [F] fan stress [Esc] back")
|
||||
fmt.Fprint(&b, "[↑↓] move [space/enter] toggle [letter] single test [R] run all [F] gpu stress [Esc] back")
|
||||
return b.String()
|
||||
}
|
||||
|
||||
@@ -191,7 +191,7 @@ func (m model) confirmBody() (string, string) {
|
||||
return "AMD GPU test", "Run AMD GPU diagnostic pack (rocm-smi)?"
|
||||
case actionRunFanStress:
|
||||
modes := []string{"Quick (2×2min)", "Standard (2×5min)", "Express (2×10min)"}
|
||||
return "Fan Stress Test", "Two-phase GPU thermal cycling test.\n" +
|
||||
return "GPU Platform Stress Test", "Two-phase GPU thermal cycling test.\n" +
|
||||
"Monitors fans, temps, power — detects throttling.\n" +
|
||||
"Mode: " + modes[m.hcMode] + "\n\nAll NVIDIA GPUs will be stressed."
|
||||
default:
|
||||
|
||||
@@ -5,13 +5,13 @@
|
||||
|
||||
set -e
|
||||
|
||||
ROCM_VERSION="6.4"
|
||||
# ROCm versions to try in order (newest first). Fall back if a version's
|
||||
# Release file is missing from the repo (happens with brand-new releases).
|
||||
ROCM_CANDIDATES="6.4 6.3 6.2"
|
||||
ROCM_KEYRING="/etc/apt/keyrings/rocm.gpg"
|
||||
ROCM_LIST="/etc/apt/sources.list.d/rocm.list"
|
||||
APT_UPDATED=0
|
||||
|
||||
echo "=== AMD ROCm ${ROCM_VERSION}: adding repository ==="
|
||||
|
||||
mkdir -p /etc/apt/keyrings
|
||||
|
||||
ensure_tool() {
|
||||
@@ -51,11 +51,26 @@ if ! wget -qO- "https://repo.radeon.com/rocm/rocm.gpg.key" \
|
||||
exit 0
|
||||
fi
|
||||
|
||||
cat > "${ROCM_LIST}" <<EOF
|
||||
deb [arch=amd64 signed-by=${ROCM_KEYRING}] https://repo.radeon.com/rocm/apt/${ROCM_VERSION} bookworm main
|
||||
# Try each ROCm version until apt-get update succeeds (repo has a Release file).
|
||||
ROCM_VERSION=""
|
||||
for candidate in ${ROCM_CANDIDATES}; do
|
||||
cat > "${ROCM_LIST}" <<EOF
|
||||
deb [arch=amd64 signed-by=${ROCM_KEYRING}] https://repo.radeon.com/rocm/apt/${candidate} bookworm main
|
||||
EOF
|
||||
if apt-get update -qq 2>/dev/null; then
|
||||
ROCM_VERSION="${candidate}"
|
||||
echo "=== AMD ROCm ${ROCM_VERSION}: repository available ==="
|
||||
break
|
||||
fi
|
||||
echo "WARN: ROCm ${candidate} repository not available for bookworm, trying next..."
|
||||
rm -f "${ROCM_LIST}"
|
||||
done
|
||||
|
||||
apt-get update -qq
|
||||
if [ -z "${ROCM_VERSION}" ]; then
|
||||
echo "WARN: no ROCm apt repository available for bookworm — skipping ROCm install"
|
||||
rm -f "${ROCM_KEYRING}"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# rocm-smi-lib provides the rocm-smi CLI tool for GPU monitoring
|
||||
if apt-get install -y --no-install-recommends rocm-smi-lib 2>/dev/null; then
|
||||
@@ -63,9 +78,9 @@ if apt-get install -y --no-install-recommends rocm-smi-lib 2>/dev/null; then
|
||||
if [ -x /opt/rocm/bin/rocm-smi ]; then
|
||||
ln -sf /opt/rocm/bin/rocm-smi /usr/local/bin/rocm-smi
|
||||
else
|
||||
candidate="$(find /opt -path '*/bin/rocm-smi' -type f 2>/dev/null | sort | tail -1)"
|
||||
if [ -n "${candidate}" ]; then
|
||||
ln -sf "${candidate}" /usr/local/bin/rocm-smi
|
||||
smi_path="$(find /opt -path '*/bin/rocm-smi' -type f 2>/dev/null | sort | tail -1)"
|
||||
if [ -n "${smi_path}" ]; then
|
||||
ln -sf "${smi_path}" /usr/local/bin/rocm-smi
|
||||
fi
|
||||
fi
|
||||
rocm-smi --version 2>/dev/null || true
|
||||
|
||||
Reference in New Issue
Block a user