Compare commits

...

6 Commits
v3.10 ... v3.11

Author SHA1 Message Date
c9ee078622 fix(stress): keep platform burn responsive under load 2026-03-31 22:28:26 +03:00
ea660500c9 chore: commit pending repo changes 2026-03-31 22:17:36 +03:00
d43a9aeec7 fix(iso): restore live-build memtest integration 2026-03-31 22:10:28 +03:00
Mikhail Chusavitin
f5622e351e Fix staged John cleanup for repeated ISO builds 2026-03-31 11:40:52 +03:00
Mikhail Chusavitin
a20806afc8 Fix ISO grub package conflict 2026-03-31 11:38:30 +03:00
Mikhail Chusavitin
4f9b6b3bcd Harden NVIDIA boot logging on live ISO 2026-03-31 11:37:21 +03:00
19 changed files with 290 additions and 179 deletions

View File

@@ -10,9 +10,11 @@ import (
"os"
"os/exec"
"path/filepath"
"runtime"
"strconv"
"strings"
"sync"
"syscall"
"time"
)
@@ -374,10 +376,17 @@ func buildCPUStressCmd(ctx context.Context) (*exec.Cmd, error) {
return nil, fmt.Errorf("stressapptest not found: %w", err)
}
// Use a very long duration; the context timeout will kill it at the right time.
cmd := exec.CommandContext(ctx, path, "-s", "86400", "-W", "--cc_test")
cmdArgs := []string{"-s", "86400", "-W", "--cc_test"}
if threads := platformStressCPUThreads(); threads > 0 {
cmdArgs = append(cmdArgs, "-m", strconv.Itoa(threads))
}
if mb := platformStressMemoryMB(); mb > 0 {
cmdArgs = append(cmdArgs, "-M", strconv.Itoa(mb))
}
cmd := exec.CommandContext(ctx, path, cmdArgs...)
cmd.Stdout = nil
cmd.Stderr = nil
if err := cmd.Start(); err != nil {
if err := startLowPriorityCmd(cmd, 15); err != nil {
return nil, fmt.Errorf("stressapptest start: %w", err)
}
return cmd, nil
@@ -418,7 +427,7 @@ func buildAMDGPUStressCmd(ctx context.Context) *exec.Cmd {
cmd := exec.CommandContext(ctx, rvsPath, "-c", cfgFile)
cmd.Stdout = nil
cmd.Stderr = nil
_ = cmd.Start()
_ = startLowPriorityCmd(cmd, 10)
return cmd
}
@@ -433,10 +442,50 @@ func buildNvidiaGPUStressCmd(ctx context.Context) *exec.Cmd {
cmd := exec.CommandContext(ctx, path, "--seconds", "86400", "--size-mb", "64")
cmd.Stdout = nil
cmd.Stderr = nil
_ = cmd.Start()
_ = startLowPriorityCmd(cmd, 10)
return cmd
}
func startLowPriorityCmd(cmd *exec.Cmd, nice int) error {
if err := cmd.Start(); err != nil {
return err
}
if cmd.Process != nil {
_ = syscall.Setpriority(syscall.PRIO_PROCESS, cmd.Process.Pid, nice)
}
return nil
}
func platformStressCPUThreads() int {
if n := envInt("BEE_PLATFORM_STRESS_THREADS", 0); n > 0 {
return n
}
cpus := runtime.NumCPU()
switch {
case cpus <= 2:
return 1
case cpus <= 8:
return cpus - 1
default:
return cpus - 2
}
}
func platformStressMemoryMB() int {
if mb := envInt("BEE_PLATFORM_STRESS_MB", 0); mb > 0 {
return mb
}
free := freeMemBytes()
if free <= 0 {
return 0
}
mb := int((free * 60) / 100 / (1024 * 1024))
if mb < 1024 {
return 1024
}
return mb
}
func packPlatformDir(dir, dest string) error {
f, err := os.Create(dest)
if err != nil {

View File

@@ -0,0 +1,34 @@
package platform
import (
"runtime"
"testing"
)
func TestPlatformStressCPUThreadsOverride(t *testing.T) {
t.Setenv("BEE_PLATFORM_STRESS_THREADS", "7")
if got := platformStressCPUThreads(); got != 7 {
t.Fatalf("platformStressCPUThreads=%d want 7", got)
}
}
func TestPlatformStressCPUThreadsDefaultLeavesHeadroom(t *testing.T) {
t.Setenv("BEE_PLATFORM_STRESS_THREADS", "")
got := platformStressCPUThreads()
if got < 1 {
t.Fatalf("platformStressCPUThreads=%d want >= 1", got)
}
if got > runtime.NumCPU() {
t.Fatalf("platformStressCPUThreads=%d want <= NumCPU=%d", got, runtime.NumCPU())
}
if runtime.NumCPU() > 2 && got >= runtime.NumCPU() {
t.Fatalf("platformStressCPUThreads=%d want headroom below NumCPU=%d", got, runtime.NumCPU())
}
}
func TestPlatformStressMemoryMBOverride(t *testing.T) {
t.Setenv("BEE_PLATFORM_STRESS_MB", "8192")
if got := platformStressMemoryMB(); got != 8192 {
t.Fatalf("platformStressMemoryMB=%d want 8192", got)
}
}

View File

@@ -667,6 +667,22 @@ func (h *handler) handleAPIInstallStream(w http.ResponseWriter, r *http.Request)
// ── Metrics SSE ───────────────────────────────────────────────────────────────
func (h *handler) handleAPIMetricsLatest(w http.ResponseWriter, r *http.Request) {
sample, ok := h.latestMetric()
if !ok {
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte("{}"))
return
}
b, err := json.Marshal(sample)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write(b)
}
func (h *handler) handleAPIMetricsStream(w http.ResponseWriter, r *http.Request) {
if !sseStart(w) {
return

View File

@@ -532,16 +532,10 @@ function refreshCharts() {
}
setInterval(refreshCharts, 3000);
const es = new EventSource('/api/metrics/stream');
es.addEventListener('metrics', e => {
const d = JSON.parse(e.data);
// Show/hide Fan RPM card based on data availability
fetch('/api/metrics/latest').then(r => r.json()).then(d => {
const fanCard = document.getElementById('card-server-fans');
if (fanCard) fanCard.style.display = (d.fans && d.fans.length > 0) ? '' : 'none';
});
es.onerror = () => {};
}).catch(() => {});
</script>`
}

View File

@@ -270,6 +270,7 @@ func NewHandler(opts HandlerOptions) http.Handler {
// Metrics — SSE stream of live sensor data + server-side SVG charts + CSV export
mux.HandleFunc("GET /api/metrics/stream", h.handleAPIMetricsStream)
mux.HandleFunc("GET /api/metrics/latest", h.handleAPIMetricsLatest)
mux.HandleFunc("GET /api/metrics/chart/", h.handleMetricsChartSVG)
mux.HandleFunc("GET /api/metrics/export.csv", h.handleAPIMetricsExportCSV)
@@ -1230,13 +1231,6 @@ probe();
func (h *handler) handlePage(w http.ResponseWriter, r *http.Request) {
page := strings.TrimPrefix(r.URL.Path, "/")
if page == "" {
// Serve loading page until audit snapshot exists
if _, err := os.Stat(h.opts.AuditPath); err != nil {
w.Header().Set("Cache-Control", "no-store")
w.Header().Set("Content-Type", "text/html; charset=utf-8")
_, _ = w.Write([]byte(loadingPageHTML))
return
}
page = "dashboard"
}
// Redirect old routes to new names

2
bible

Submodule bible updated: 688b87e98d...456c1f022c

View File

@@ -13,9 +13,10 @@ Use one of:
This applies to:
- `iso/builder/config/package-lists/*.list.chroot`
- Any package referenced in `grub.cfg`, hooks, or overlay scripts (e.g. file paths like `/boot/memtest86+x64.bin`)
- Any package referenced in bootloader configs, hooks, or overlay scripts
## Example of what goes wrong without this
## Memtest rule
`memtest86+` in Debian bookworm installs `/boot/memtest86+x64.bin`, not `/boot/memtest86+.bin`.
Guessing the filename caused a broken GRUB entry that only surfaced at boot time, after a full rebuild.
Prefer live-build's built-in memtest integration over custom hooks or hardcoded
bootloader paths. If you ever need to reference memtest files manually, verify
the exact package file list first for the target Debian release.

View File

@@ -29,7 +29,7 @@ lb config noauto \
--security true \
--linux-flavours "amd64" \
--linux-packages "${LB_LINUX_PACKAGES}" \
--memtest none \
--memtest memtest86+ \
--iso-volume "EASY_BEE_${BEE_GPU_VENDOR_UPPER:-NVIDIA}" \
--iso-application "EASY-BEE-${BEE_GPU_VENDOR_UPPER:-NVIDIA}" \
--bootappend-live "boot=live components video=1920x1080 console=tty0 console=ttyS0,115200n8 loglevel=7 username=bee user-fullname=Bee modprobe.blacklist=nouveau" \

View File

@@ -36,6 +36,7 @@ typedef void *CUstream;
#define MAX_CUBLAS_PROFILES 5
#define MIN_PROFILE_BUDGET_BYTES ((size_t)4u * 1024u * 1024u)
#define MIN_STREAM_BUDGET_BYTES ((size_t)64u * 1024u * 1024u)
#define STRESS_LAUNCH_DEPTH 8
static const char *ptx_source =
".version 6.0\n"
@@ -422,24 +423,31 @@ static int run_ptx_fallback(struct cuda_api *api,
double deadline = start + (double)seconds;
while (now_seconds() < deadline) {
launches_per_wave = 0;
for (int lane = 0; lane < stream_count; lane++) {
unsigned int blocks = (unsigned int)((words[lane] + threads - 1) / threads);
if (!check_rc(api,
"cuLaunchKernel",
api->cuLaunchKernel(kernel,
blocks,
1,
1,
threads,
1,
1,
0,
streams[lane],
params[lane],
NULL))) {
goto fail;
for (int depth = 0; depth < STRESS_LAUNCH_DEPTH && now_seconds() < deadline; depth++) {
int launched_this_batch = 0;
for (int lane = 0; lane < stream_count; lane++) {
unsigned int blocks = (unsigned int)((words[lane] + threads - 1) / threads);
if (!check_rc(api,
"cuLaunchKernel",
api->cuLaunchKernel(kernel,
blocks,
1,
1,
threads,
1,
1,
0,
streams[lane],
params[lane],
NULL))) {
goto fail;
}
launches_per_wave++;
launched_this_batch++;
}
if (launched_this_batch <= 0) {
break;
}
launches_per_wave++;
}
if (launches_per_wave <= 0) {
goto fail;
@@ -460,10 +468,11 @@ static int run_ptx_fallback(struct cuda_api *api,
report->iterations = iterations;
snprintf(report->details,
sizeof(report->details),
"fallback_int32=OK requested_mb=%d actual_mb=%d streams=%d per_stream_mb=%zu iterations=%lu\n",
"fallback_int32=OK requested_mb=%d actual_mb=%d streams=%d queue_depth=%d per_stream_mb=%zu iterations=%lu\n",
size_mb,
report->buffer_mb,
report->stream_count,
STRESS_LAUNCH_DEPTH,
bytes_per_stream[0] / (1024u * 1024u),
iterations);
@@ -1184,10 +1193,11 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
report->buffer_mb = (int)(total_budget / (1024u * 1024u));
append_detail(report->details,
sizeof(report->details),
"requested_mb=%d actual_mb=%d streams=%d mp_count=%d per_worker_mb=%zu\n",
"requested_mb=%d actual_mb=%d streams=%d queue_depth=%d mp_count=%d per_worker_mb=%zu\n",
size_mb,
report->buffer_mb,
report->stream_count,
STRESS_LAUNCH_DEPTH,
mp_count,
per_profile_budget / (1024u * 1024u));
@@ -1239,26 +1249,33 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
double deadline = now_seconds() + (double)seconds;
while (now_seconds() < deadline) {
wave_launches = 0;
for (int i = 0; i < prepared_count; i++) {
if (!prepared[i].ready) {
continue;
}
if (!run_cublas_profile(handle, &cublas, &prepared[i])) {
append_detail(report->details,
sizeof(report->details),
"%s=FAILED runtime\n",
prepared[i].desc.name);
for (int j = 0; j < prepared_count; j++) {
destroy_profile(&cublas, cuda, &prepared[j]);
for (int depth = 0; depth < STRESS_LAUNCH_DEPTH && now_seconds() < deadline; depth++) {
int launched_this_batch = 0;
for (int i = 0; i < prepared_count; i++) {
if (!prepared[i].ready) {
continue;
}
cublas.cublasLtDestroy(handle);
destroy_streams(cuda, streams, stream_count);
cuda->cuCtxDestroy(ctx);
return 0;
if (!run_cublas_profile(handle, &cublas, &prepared[i])) {
append_detail(report->details,
sizeof(report->details),
"%s=FAILED runtime\n",
prepared[i].desc.name);
for (int j = 0; j < prepared_count; j++) {
destroy_profile(&cublas, cuda, &prepared[j]);
}
cublas.cublasLtDestroy(handle);
destroy_streams(cuda, streams, stream_count);
cuda->cuCtxDestroy(ctx);
return 0;
}
prepared[i].iterations++;
report->iterations++;
wave_launches++;
launched_this_batch++;
}
if (launched_this_batch <= 0) {
break;
}
prepared[i].iterations++;
report->iterations++;
wave_launches++;
}
if (wave_launches <= 0) {
break;

View File

@@ -245,13 +245,13 @@ rm -f \
"${OVERLAY_STAGE_DIR}/etc/bee-release" \
"${OVERLAY_STAGE_DIR}/root/.ssh/authorized_keys" \
"${OVERLAY_STAGE_DIR}/usr/local/bin/bee" \
"${OVERLAY_STAGE_DIR}/usr/local/bin/bee-gpu-stress" \
"${OVERLAY_STAGE_DIR}/usr/local/bin/bee-nccl-gpu-stress" \
"${OVERLAY_STAGE_DIR}/usr/local/bin/john" \
"${OVERLAY_STAGE_DIR}/usr/local/lib/bee/bee-gpu-burn-worker" \
"${OVERLAY_STAGE_DIR}/usr/local/lib/bee/john" \
"${OVERLAY_STAGE_DIR}/usr/local/bin/bee-smoketest" \
"${OVERLAY_STAGE_DIR}/usr/local/bin/all_reduce_perf"
rm -rf \
"${OVERLAY_STAGE_DIR}/usr/local/lib/bee/john"
# Remove NVIDIA-specific overlay files for non-nvidia variants
if [ "$BEE_GPU_VENDOR" != "nvidia" ]; then
@@ -304,7 +304,6 @@ if [ "$BEE_GPU_VENDOR" = "nvidia" ] && [ -f "$GPU_BURN_WORKER_BIN" ]; then
chmod +x "${OVERLAY_STAGE_DIR}/usr/local/bin/bee-gpu-burn" 2>/dev/null || true
chmod +x "${OVERLAY_STAGE_DIR}/usr/local/bin/bee-john-gpu-stress" 2>/dev/null || true
chmod +x "${OVERLAY_STAGE_DIR}/usr/local/bin/bee-nccl-gpu-stress" 2>/dev/null || true
ln -sfn bee-gpu-burn "${OVERLAY_STAGE_DIR}/usr/local/bin/bee-gpu-stress"
fi
# --- inject smoketest into overlay so it runs directly on the live CD ---

View File

@@ -29,16 +29,6 @@ menuentry "EASY-BEE (fail-safe)" {
initrd @INITRD_LIVE@
}
if [ "${grub_platform}" = "efi" ]; then
menuentry "Memory Test (memtest86+)" {
chainloader /boot/memtest86+x64.efi
}
else
menuentry "Memory Test (memtest86+)" {
linux16 /boot/memtest86+x64.bin
}
fi
if [ "${grub_platform}" = "efi" ]; then
menuentry "UEFI Firmware Settings" {
fwsetup

View File

@@ -1,76 +0,0 @@
#!/bin/sh
# Copy memtest86+ binaries from chroot /boot into the ISO boot directory
# so GRUB can chainload them directly (they must be on the ISO filesystem,
# not inside the squashfs).
#
# Primary: copy from chroot/boot/ (populated by package postinst).
# Naming fallbacks:
# Debian Bookworm: /boot/memtest86+ — EFI PE64 (no extension)
# /boot/memtest86+.bin — legacy binary
# Upstream/Ubuntu: /boot/memtest86+x64.efi, /boot/memtest86+x64.bin, etc.
# Last resort: extract directly from the cached .deb if postinst didn't place
# the files (happens in chroot environments without grub triggers).
set -e
MEMTEST_FILES="memtest86+x64.bin memtest86+x64.efi memtest86+ia32.bin memtest86+ia32.efi"
# Ensure destination directory exists (absence caused silent copy failures).
mkdir -p binary/boot
echo "memtest: scanning chroot/boot/ for memtest files:"
ls chroot/boot/memtest* 2>/dev/null || echo "memtest: WARNING: no memtest files in chroot/boot/"
# Primary path: copy upstream-named files from chroot/boot/
for f in ${MEMTEST_FILES}; do
src="chroot/boot/${f}"
if [ -f "${src}" ]; then
cp "${src}" "binary/boot/${f}"
echo "memtest: copied ${f} from chroot/boot/"
fi
done
# Debian Bookworm naming fallback: /boot/memtest86+ (no extension) is the EFI binary.
if [ ! -f "binary/boot/memtest86+x64.efi" ] && [ -f "chroot/boot/memtest86+" ]; then
cp "chroot/boot/memtest86+" "binary/boot/memtest86+x64.efi"
echo "memtest: copied /boot/memtest86+ as memtest86+x64.efi (Debian naming)"
fi
if [ ! -f "binary/boot/memtest86+x64.bin" ] && [ -f "chroot/boot/memtest86+.bin" ]; then
cp "chroot/boot/memtest86+.bin" "binary/boot/memtest86+x64.bin"
echo "memtest: copied /boot/memtest86+.bin as memtest86+x64.bin (Debian naming)"
fi
# Last resort: if EFI binary still missing, extract from cached .deb
if [ ! -f "binary/boot/memtest86+x64.efi" ]; then
echo "memtest: EFI binary missing — attempting extraction from .deb cache"
deb=$(find chroot/var/cache/apt/archives/ chroot/var/lib/apt/lists/ \
-name 'memtest86+_*.deb' -o -name 'memtest86+*.deb' 2>/dev/null \
| head -1)
if [ -z "$deb" ]; then
deb=$(find cache/ -name 'memtest86+_*.deb' -o -name 'memtest86+*.deb' 2>/dev/null | head -1)
fi
if [ -n "$deb" ]; then
echo "memtest: extracting from ${deb}"
EXTRACT_DIR="$(mktemp -d)"
dpkg-deb -x "${deb}" "${EXTRACT_DIR}"
echo "memtest: files found in .deb:"
find "${EXTRACT_DIR}/boot" -type f 2>/dev/null || echo " (none in /boot)"
for f in ${MEMTEST_FILES}; do
src="${EXTRACT_DIR}/boot/${f}"
if [ -f "${src}" ]; then
cp "${src}" "binary/boot/${f}"
echo "memtest: extracted ${f} from .deb"
fi
done
# Debian naming fallback inside .deb as well
if [ ! -f "binary/boot/memtest86+x64.efi" ] && [ -f "${EXTRACT_DIR}/boot/memtest86+" ]; then
cp "${EXTRACT_DIR}/boot/memtest86+" "binary/boot/memtest86+x64.efi"
echo "memtest: extracted /boot/memtest86+ as memtest86+x64.efi from .deb"
fi
rm -rf "${EXTRACT_DIR}"
else
echo "memtest: WARNING: no memtest86+ .deb found in cache — memtest will not be available"
fi
fi
echo "memtest: binary/boot/ contents:"
ls binary/boot/memtest* 2>/dev/null || echo " (none)"

View File

@@ -21,14 +21,15 @@ openssh-server
# Disk installer
squashfs-tools
parted
# grub-pc / grub-efi-amd64 provide grub-install + grub2-common (required for chroot install).
# The -bin variants only carry binary modules and do NOT include grub-install itself.
grub-pc
# Keep GRUB install tools without selecting a single active platform package.
# grub-pc and grub-efi-amd64 conflict with each other, but grub2-common
# provides grub-install/update-grub and the *-bin packages provide BIOS/UEFI modules.
grub2-common
grub-pc-bin
grub-efi-amd64
grub-efi-amd64-bin
grub-efi-amd64-signed
shim-signed
efibootmgr
# Filesystem support for USB export targets
exfatprogs
@@ -50,7 +51,6 @@ sudo
zstd
mstflint
memtester
memtest86+
stress-ng
stressapptest

View File

@@ -1,25 +1,9 @@
[Unit]
Description=Bee: schedule startup hardware audit via task queue
# Start AFTER bee-web, not before — bee-web must not wait for audit.
After=bee-web.service
Wants=bee-web.service
Description=Bee: on-demand hardware audit (not started automatically)
[Service]
Type=oneshot
RemainAfterExit=yes
# Wait up to 90s for bee-web to respond on /healthz, then sleep 60s for
# the system to settle (GPU drivers, sensors), then enqueue the audit as
# a background task so it appears in the task list and logs.
ExecStart=/bin/sh -c '\
i=0; \
while [ $i -lt 90 ]; do \
if curl -sf http://localhost/healthz >/dev/null 2>&1; then break; fi; \
sleep 1; i=$((i+1)); \
done; \
sleep 60; \
curl -sf -X POST http://localhost/api/audit/run >/dev/null'
ExecStart=/bin/sh -c 'curl -sf -X POST http://localhost/api/audit/run >/dev/null'
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target

View File

@@ -12,17 +12,55 @@
set -euo pipefail
usage() {
cat >&2 <<'EOF'
Usage: bee-install <device> [logfile]
Installs the live system to a local disk (WIPES the target).
device Target block device, e.g. /dev/sda or /dev/nvme0n1
Must be a hard disk or NVMe — NOT a CD-ROM (/dev/sr*)
logfile Optional path for progress log (default: /tmp/bee-install.log)
Examples:
bee-install /dev/sda
bee-install /dev/nvme0n1
bee-install /dev/sdb /tmp/my-install.log
WARNING: ALL DATA ON <device> WILL BE ERASED.
Layout (UEFI): GPT — partition 1: EFI 512MB vfat, partition 2: root ext4
Layout (BIOS): MBR — partition 1: root ext4
EOF
exit 1
}
DEVICE="${1:-}"
LOGFILE="${2:-/tmp/bee-install.log}"
if [ -z "$DEVICE" ]; then
echo "Usage: bee-install <device> [logfile]" >&2
exit 1
if [ -z "$DEVICE" ] || [ "$DEVICE" = "--help" ] || [ "$DEVICE" = "-h" ]; then
usage
fi
if [ ! -b "$DEVICE" ]; then
echo "ERROR: $DEVICE is not a block device" >&2
echo "Run 'lsblk' to list available disks." >&2
exit 1
fi
# Block CD-ROM devices
case "$DEVICE" in
/dev/sr*|/dev/scd*)
echo "ERROR: $DEVICE is a CD-ROM/optical device — cannot install to it." >&2
echo "Run 'lsblk' to find the target disk (e.g. /dev/sda, /dev/nvme0n1)." >&2
exit 1
;;
esac
# Check required tools
for tool in parted mkfs.vfat mkfs.ext4 unsquashfs grub-install update-grub; do
if ! command -v "$tool" >/dev/null 2>&1; then
echo "ERROR: required tool not found: $tool" >&2
exit 1
fi
done
SQUASHFS="/run/live/medium/live/filesystem.squashfs"
if [ ! -f "$SQUASHFS" ]; then

View File

@@ -23,6 +23,62 @@ contains_csv() {
echo ",${haystack}," | grep -q ",${needle},"
}
show_opencl_diagnostics() {
if command -v clinfo >/dev/null 2>&1; then
echo "-- clinfo -l --" >&2
clinfo -l >&2 || true
fi
echo "-- john --list=opencl-devices --" >&2
./john --list=opencl-devices >&2 || true
}
ensure_nvidia_uvm() {
if lsmod 2>/dev/null | grep -q '^nvidia_uvm '; then
return 0
fi
if [ "$(id -u)" != "0" ]; then
return 1
fi
ko="/usr/local/lib/nvidia/nvidia-uvm.ko"
[ -f "${ko}" ] || return 1
if ! insmod "${ko}" >/dev/null 2>&1; then
return 1
fi
uvm_major=$(grep -m1 ' nvidia-uvm$' /proc/devices | awk '{print $1}')
if [ -n "${uvm_major}" ]; then
mknod -m 666 /dev/nvidia-uvm c "${uvm_major}" 0 2>/dev/null || true
mknod -m 666 /dev/nvidia-uvm-tools c "${uvm_major}" 1 2>/dev/null || true
fi
return 0
}
ensure_opencl_ready() {
out=$(./john --list=opencl-devices 2>&1 || true)
if echo "${out}" | grep -q "Device #"; then
return 0
fi
if ensure_nvidia_uvm; then
out=$(./john --list=opencl-devices 2>&1 || true)
if echo "${out}" | grep -q "Device #"; then
return 0
fi
fi
echo "OpenCL devices are not available for John." >&2
if ! lsmod 2>/dev/null | grep -q '^nvidia_uvm '; then
echo "nvidia_uvm is not loaded." >&2
fi
if [ ! -e /dev/nvidia-uvm ]; then
echo "/dev/nvidia-uvm is missing." >&2
fi
show_opencl_diagnostics
return 1
}
while [ "$#" -gt 0 ]; do
case "$1" in
--seconds|-t) [ "$#" -ge 2 ] || usage; SECONDS="$2"; shift 2 ;;
@@ -76,6 +132,8 @@ echo "john_devices=${JOHN_DEVICES}"
cd "${JOHN_DIR}"
ensure_opencl_ready || exit 1
choose_format() {
if [ -n "${FORMAT}" ]; then
echo "${FORMAT}"

View File

@@ -17,7 +17,7 @@ mkdir -p "$(dirname "$log_file")"
serial_sink() {
local tty="$1"
if [ -w "$tty" ]; then
cat > "$tty"
cat > "$tty" 2>/dev/null || true
else
cat > /dev/null
fi

View File

@@ -59,11 +59,24 @@ load_module() {
return 1
}
load_host_module() {
mod="$1"
if modprobe "$mod" >/dev/null 2>&1; then
log "host module loaded: $mod"
return 0
fi
return 1
}
case "$nvidia_mode" in
normal|full)
if ! load_module nvidia; then
exit 1
fi
# nvidia-modeset on some server kernels needs ACPI video helper symbols
# exported by the generic "video" module. Best-effort only; compute paths
# remain functional even if display-related modules stay absent.
load_host_module video || true
load_module nvidia-modeset || true
load_module nvidia-uvm || true
;;