Compare commits

..

3 Commits

Author SHA1 Message Date
Mikhail Chusavitin
bd94b6c792 fix(iso): add libnvidia-ptxjitcompiler + ldconfig for PTX JIT and NCCL
- build-nvidia-module.sh: copy libnvidia-ptxjitcompiler.so.* alongside
  libcuda/libnvidia-ml — required by cuModuleLoadDataEx for PTX JIT.
  Without it: CUDA_ERROR_JIT_COMPILER_NOT_FOUND at runtime.
  Cache check updated to force rebuild when ptxjitcompiler is missing.
- bee-nvidia-load: run ldconfig after module load so that NVIDIA/NCCL
  libs injected into /usr/lib/ are visible to dlopen() callers.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-26 10:37:27 +03:00
Mikhail Chusavitin
06017eddfd feat(tui): remove nvtop auto-launch from NVIDIA SAT
nvtop is no longer shown during NVIDIA SAT runs.
[o] Open nvtop shortcut also removed from the running screen.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-26 10:29:05 +03:00
Mikhail Chusavitin
0ac7b6a963 fix(iso): restore console=tty0 — VGA screen was black without it
Commit d36e844 dropped console=tty0 and added dual-serial + debug logging.
Without console=tty0 the kernel never initialises the VGA console,
leaving the physical screen permanently blank.

- Restore console=tty0 (VGA) as primary, keep console=ttyS0 for SOL
- Drop console=ttyS1 (redundant second serial port)
- Replace loglevel=7 + journald debug flood with loglevel=3 (errors only)
  so kernel messages don't overwrite the TUI on the local screen
- Remove systemd.log_target/forward_to_console debug params

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-26 10:23:53 +03:00
4 changed files with 16 additions and 28 deletions

View File

@@ -3,7 +3,6 @@ package tui
import (
"context"
"fmt"
"os/exec"
"strings"
"bee/audit/internal/platform"
@@ -102,7 +101,7 @@ func (m model) updateNvidiaSATSetup(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
return m, nil
}
// startNvidiaSAT launches the SAT and nvtop.
// startNvidiaSAT launches the NVIDIA acceptance pack.
func (m model) startNvidiaSAT() (tea.Model, tea.Cmd) {
var selectedGPUs []platform.NvidiaGPU
for i, sel := range m.nvidiaGPUSel {
@@ -142,31 +141,12 @@ func (m model) startNvidiaSAT() (tea.Model, tea.Cmd) {
return nvidiaSATDoneMsg{title: result.Title, body: result.Body, err: err}
}
nvtopPath, lookErr := exec.LookPath("nvtop")
if lookErr != nil {
// nvtop not available: just run the SAT, show running screen
return m, satCmd
}
return m, tea.Batch(
satCmd,
tea.ExecProcess(exec.Command(nvtopPath), func(_ error) tea.Msg {
return nvtopClosedMsg{}
}),
)
return m, satCmd
}
// updateNvidiaSATRunning handles keys on the running screen.
func (m model) updateNvidiaSATRunning(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
switch msg.String() {
case "o", "O":
nvtopPath, err := exec.LookPath("nvtop")
if err != nil {
return m, nil
}
return m, tea.ExecProcess(exec.Command(nvtopPath), func(_ error) tea.Msg {
return nvtopClosedMsg{}
})
case "a", "A":
if m.nvidiaSATCancel != nil {
m.nvidiaSATCancel()
@@ -234,5 +214,5 @@ func renderNvidiaSATSetup(m model) string {
// renderNvidiaSATRunning renders the running screen.
func renderNvidiaSATRunning() string {
return "NVIDIA SAT\n\nTest is running...\n\n[o] Open nvtop [a] Abort test [ctrl+c] quit\n"
return "NVIDIA SAT\n\nTest is running...\n\n[a] Abort test [ctrl+c] quit\n"
}

View File

@@ -32,6 +32,6 @@ lb config noauto \
--memtest none \
--iso-volume "EASY-BEE" \
--iso-application "EASY-BEE" \
--bootappend-live "boot=live components console=ttyS0,115200n8 console=ttyS1,115200n8 loglevel=7 systemd.log_target=console systemd.journald.forward_to_console=1 systemd.journald.max_level_console=debug username=bee user-fullname=Bee modprobe.blacklist=nouveau" \
--bootappend-live "boot=live components console=tty0 console=ttyS0,115200n8 loglevel=3 username=bee user-fullname=Bee modprobe.blacklist=nouveau" \
--apt-recommends false \
"${@}"

View File

@@ -46,7 +46,8 @@ CACHE_DIR="${DIST_DIR}/nvidia-${NVIDIA_VERSION}-${KVER}"
CACHE_ROOT="${BEE_CACHE_DIR:-${DIST_DIR}/cache}"
DOWNLOAD_CACHE_DIR="${CACHE_ROOT}/nvidia-downloads"
EXTRACT_CACHE_DIR="${CACHE_ROOT}/nvidia-extract"
if [ -d "$CACHE_DIR/modules" ] && [ -f "$CACHE_DIR/bin/nvidia-smi" ]; then
if [ -d "$CACHE_DIR/modules" ] && [ -f "$CACHE_DIR/bin/nvidia-smi" ] \
&& [ "$(ls "$CACHE_DIR/lib/libnvidia-ptxjitcompiler.so."* 2>/dev/null | wc -l)" -gt 0 ]; then
echo "=== NVIDIA cached, skipping build ==="
echo "cache: $CACHE_DIR"
echo "modules: $(ls "$CACHE_DIR/modules/"*.ko 2>/dev/null | wc -l) .ko files"
@@ -129,8 +130,10 @@ else
echo "WARNING: no firmware/ dir found in installer (may be needed for Hopper GPUs)"
fi
# Copy ALL userspace library files
for lib in libnvidia-ml libcuda; do
# Copy ALL userspace library files.
# libnvidia-ptxjitcompiler is required by libcuda for PTX JIT compilation
# (cuModuleLoadDataEx with PTX source) — without it CUDA_ERROR_JIT_COMPILER_NOT_FOUND.
for lib in libnvidia-ml libcuda libnvidia-ptxjitcompiler; do
count=0
for f in $(find "$EXTRACT_DIR" -maxdepth 1 -name "${lib}.so.*" 2>/dev/null); do
cp "$f" "$CACHE_DIR/lib/" && count=$((count+1))
@@ -147,7 +150,7 @@ ko_count=$(ls "$CACHE_DIR/modules/"*.ko 2>/dev/null | wc -l)
[ "$ko_count" -gt 0 ] || { echo "ERROR: no .ko files built in $CACHE_DIR/modules/"; exit 1; }
# Create soname symlinks: use [0-9][0-9]* to avoid circular symlink (.so.1 has single digit)
for lib in libnvidia-ml libcuda; do
for lib in libnvidia-ml libcuda libnvidia-ptxjitcompiler; do
versioned=$(ls "$CACHE_DIR/lib/${lib}.so."[0-9][0-9]* 2>/dev/null | head -1)
[ -n "$versioned" ] || continue
base=$(basename "$versioned")

View File

@@ -100,4 +100,9 @@ if [ -n "$uvm_major" ]; then
mknod -m 666 /dev/nvidia-uvm-tools c "$uvm_major" 1 || true
fi
# Refresh dynamic linker cache so that NVIDIA/NCCL libs injected into /usr/lib/
# are visible to dlopen() calls (libcuda, libnvidia-ptxjitcompiler, libnccl, etc.)
ldconfig 2>/dev/null || true
log "ldconfig refreshed"
log "done"