Compare commits
3 Commits
iso/v1.0.1
...
iso/v1.0.1
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
bd94b6c792 | ||
|
|
06017eddfd | ||
|
|
0ac7b6a963 |
@@ -3,7 +3,6 @@ package tui
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"os/exec"
|
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"bee/audit/internal/platform"
|
"bee/audit/internal/platform"
|
||||||
@@ -102,7 +101,7 @@ func (m model) updateNvidiaSATSetup(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
|
|||||||
return m, nil
|
return m, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// startNvidiaSAT launches the SAT and nvtop.
|
// startNvidiaSAT launches the NVIDIA acceptance pack.
|
||||||
func (m model) startNvidiaSAT() (tea.Model, tea.Cmd) {
|
func (m model) startNvidiaSAT() (tea.Model, tea.Cmd) {
|
||||||
var selectedGPUs []platform.NvidiaGPU
|
var selectedGPUs []platform.NvidiaGPU
|
||||||
for i, sel := range m.nvidiaGPUSel {
|
for i, sel := range m.nvidiaGPUSel {
|
||||||
@@ -142,31 +141,12 @@ func (m model) startNvidiaSAT() (tea.Model, tea.Cmd) {
|
|||||||
return nvidiaSATDoneMsg{title: result.Title, body: result.Body, err: err}
|
return nvidiaSATDoneMsg{title: result.Title, body: result.Body, err: err}
|
||||||
}
|
}
|
||||||
|
|
||||||
nvtopPath, lookErr := exec.LookPath("nvtop")
|
|
||||||
if lookErr != nil {
|
|
||||||
// nvtop not available: just run the SAT, show running screen
|
|
||||||
return m, satCmd
|
return m, satCmd
|
||||||
}
|
}
|
||||||
|
|
||||||
return m, tea.Batch(
|
|
||||||
satCmd,
|
|
||||||
tea.ExecProcess(exec.Command(nvtopPath), func(_ error) tea.Msg {
|
|
||||||
return nvtopClosedMsg{}
|
|
||||||
}),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
// updateNvidiaSATRunning handles keys on the running screen.
|
// updateNvidiaSATRunning handles keys on the running screen.
|
||||||
func (m model) updateNvidiaSATRunning(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
|
func (m model) updateNvidiaSATRunning(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
|
||||||
switch msg.String() {
|
switch msg.String() {
|
||||||
case "o", "O":
|
|
||||||
nvtopPath, err := exec.LookPath("nvtop")
|
|
||||||
if err != nil {
|
|
||||||
return m, nil
|
|
||||||
}
|
|
||||||
return m, tea.ExecProcess(exec.Command(nvtopPath), func(_ error) tea.Msg {
|
|
||||||
return nvtopClosedMsg{}
|
|
||||||
})
|
|
||||||
case "a", "A":
|
case "a", "A":
|
||||||
if m.nvidiaSATCancel != nil {
|
if m.nvidiaSATCancel != nil {
|
||||||
m.nvidiaSATCancel()
|
m.nvidiaSATCancel()
|
||||||
@@ -234,5 +214,5 @@ func renderNvidiaSATSetup(m model) string {
|
|||||||
|
|
||||||
// renderNvidiaSATRunning renders the running screen.
|
// renderNvidiaSATRunning renders the running screen.
|
||||||
func renderNvidiaSATRunning() string {
|
func renderNvidiaSATRunning() string {
|
||||||
return "NVIDIA SAT\n\nTest is running...\n\n[o] Open nvtop [a] Abort test [ctrl+c] quit\n"
|
return "NVIDIA SAT\n\nTest is running...\n\n[a] Abort test [ctrl+c] quit\n"
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -32,6 +32,6 @@ lb config noauto \
|
|||||||
--memtest none \
|
--memtest none \
|
||||||
--iso-volume "EASY-BEE" \
|
--iso-volume "EASY-BEE" \
|
||||||
--iso-application "EASY-BEE" \
|
--iso-application "EASY-BEE" \
|
||||||
--bootappend-live "boot=live components console=ttyS0,115200n8 console=ttyS1,115200n8 loglevel=7 systemd.log_target=console systemd.journald.forward_to_console=1 systemd.journald.max_level_console=debug username=bee user-fullname=Bee modprobe.blacklist=nouveau" \
|
--bootappend-live "boot=live components console=tty0 console=ttyS0,115200n8 loglevel=3 username=bee user-fullname=Bee modprobe.blacklist=nouveau" \
|
||||||
--apt-recommends false \
|
--apt-recommends false \
|
||||||
"${@}"
|
"${@}"
|
||||||
|
|||||||
@@ -46,7 +46,8 @@ CACHE_DIR="${DIST_DIR}/nvidia-${NVIDIA_VERSION}-${KVER}"
|
|||||||
CACHE_ROOT="${BEE_CACHE_DIR:-${DIST_DIR}/cache}"
|
CACHE_ROOT="${BEE_CACHE_DIR:-${DIST_DIR}/cache}"
|
||||||
DOWNLOAD_CACHE_DIR="${CACHE_ROOT}/nvidia-downloads"
|
DOWNLOAD_CACHE_DIR="${CACHE_ROOT}/nvidia-downloads"
|
||||||
EXTRACT_CACHE_DIR="${CACHE_ROOT}/nvidia-extract"
|
EXTRACT_CACHE_DIR="${CACHE_ROOT}/nvidia-extract"
|
||||||
if [ -d "$CACHE_DIR/modules" ] && [ -f "$CACHE_DIR/bin/nvidia-smi" ]; then
|
if [ -d "$CACHE_DIR/modules" ] && [ -f "$CACHE_DIR/bin/nvidia-smi" ] \
|
||||||
|
&& [ "$(ls "$CACHE_DIR/lib/libnvidia-ptxjitcompiler.so."* 2>/dev/null | wc -l)" -gt 0 ]; then
|
||||||
echo "=== NVIDIA cached, skipping build ==="
|
echo "=== NVIDIA cached, skipping build ==="
|
||||||
echo "cache: $CACHE_DIR"
|
echo "cache: $CACHE_DIR"
|
||||||
echo "modules: $(ls "$CACHE_DIR/modules/"*.ko 2>/dev/null | wc -l) .ko files"
|
echo "modules: $(ls "$CACHE_DIR/modules/"*.ko 2>/dev/null | wc -l) .ko files"
|
||||||
@@ -129,8 +130,10 @@ else
|
|||||||
echo "WARNING: no firmware/ dir found in installer (may be needed for Hopper GPUs)"
|
echo "WARNING: no firmware/ dir found in installer (may be needed for Hopper GPUs)"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Copy ALL userspace library files
|
# Copy ALL userspace library files.
|
||||||
for lib in libnvidia-ml libcuda; do
|
# libnvidia-ptxjitcompiler is required by libcuda for PTX JIT compilation
|
||||||
|
# (cuModuleLoadDataEx with PTX source) — without it CUDA_ERROR_JIT_COMPILER_NOT_FOUND.
|
||||||
|
for lib in libnvidia-ml libcuda libnvidia-ptxjitcompiler; do
|
||||||
count=0
|
count=0
|
||||||
for f in $(find "$EXTRACT_DIR" -maxdepth 1 -name "${lib}.so.*" 2>/dev/null); do
|
for f in $(find "$EXTRACT_DIR" -maxdepth 1 -name "${lib}.so.*" 2>/dev/null); do
|
||||||
cp "$f" "$CACHE_DIR/lib/" && count=$((count+1))
|
cp "$f" "$CACHE_DIR/lib/" && count=$((count+1))
|
||||||
@@ -147,7 +150,7 @@ ko_count=$(ls "$CACHE_DIR/modules/"*.ko 2>/dev/null | wc -l)
|
|||||||
[ "$ko_count" -gt 0 ] || { echo "ERROR: no .ko files built in $CACHE_DIR/modules/"; exit 1; }
|
[ "$ko_count" -gt 0 ] || { echo "ERROR: no .ko files built in $CACHE_DIR/modules/"; exit 1; }
|
||||||
|
|
||||||
# Create soname symlinks: use [0-9][0-9]* to avoid circular symlink (.so.1 has single digit)
|
# Create soname symlinks: use [0-9][0-9]* to avoid circular symlink (.so.1 has single digit)
|
||||||
for lib in libnvidia-ml libcuda; do
|
for lib in libnvidia-ml libcuda libnvidia-ptxjitcompiler; do
|
||||||
versioned=$(ls "$CACHE_DIR/lib/${lib}.so."[0-9][0-9]* 2>/dev/null | head -1)
|
versioned=$(ls "$CACHE_DIR/lib/${lib}.so."[0-9][0-9]* 2>/dev/null | head -1)
|
||||||
[ -n "$versioned" ] || continue
|
[ -n "$versioned" ] || continue
|
||||||
base=$(basename "$versioned")
|
base=$(basename "$versioned")
|
||||||
|
|||||||
@@ -100,4 +100,9 @@ if [ -n "$uvm_major" ]; then
|
|||||||
mknod -m 666 /dev/nvidia-uvm-tools c "$uvm_major" 1 || true
|
mknod -m 666 /dev/nvidia-uvm-tools c "$uvm_major" 1 || true
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Refresh dynamic linker cache so that NVIDIA/NCCL libs injected into /usr/lib/
|
||||||
|
# are visible to dlopen() calls (libcuda, libnvidia-ptxjitcompiler, libnccl, etc.)
|
||||||
|
ldconfig 2>/dev/null || true
|
||||||
|
log "ldconfig refreshed"
|
||||||
|
|
||||||
log "done"
|
log "done"
|
||||||
|
|||||||
Reference in New Issue
Block a user