Compare commits

...

8 Commits
v4.3 ... v4.5

Author SHA1 Message Date
Mikhail Chusavitin
728270dc8e Unblock bee-web startup and expand support bundle diagnostics 2026-04-04 15:18:43 +03:00
Mikhail Chusavitin
8692f825bc Use plain repo tags for build version 2026-04-03 10:48:51 +03:00
Mikhail Chusavitin
11f52ac710 Fix task log modal scrolling 2026-04-03 10:36:11 +03:00
Mikhail Chusavitin
1cb398fe83 Show tag version at top of sidebar 2026-04-03 10:08:00 +03:00
Mikhail Chusavitin
7a843be6b0 Stabilize DCGM GPU discovery 2026-04-03 09:50:33 +03:00
Mikhail Chusavitin
7f6386dccc Restore USB support bundle export on tools page 2026-04-03 09:48:22 +03:00
Mikhail Chusavitin
eea2591bcc Fix John GPU stress duration semantics 2026-04-03 09:46:16 +03:00
Mikhail Chusavitin
295a19b93a feat(tasks): run all queued tasks in parallel
Tasks are now started simultaneously when multiple are enqueued (e.g.
Run All). The worker drains all pending tasks at once and launches each
in its own goroutine, waiting via WaitGroup. kmsg watcher updated to
use a shared event window with a reference counter across concurrent tasks.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-03 09:15:06 +03:00
17 changed files with 464 additions and 172 deletions

View File

@@ -1,5 +1,7 @@
LISTEN ?= :8080
AUDIT_PATH ?=
VERSION ?= $(shell sh ./scripts/resolve-version.sh)
GO_LDFLAGS := -X main.Version=$(VERSION)
RUN_ARGS := web --listen $(LISTEN)
ifneq ($(AUDIT_PATH),)
@@ -9,10 +11,10 @@ endif
.PHONY: run build test
run:
go run ./cmd/bee $(RUN_ARGS)
go run -ldflags "$(GO_LDFLAGS)" ./cmd/bee $(RUN_ARGS)
build:
go build -o bee ./cmd/bee
go build -ldflags "$(GO_LDFLAGS)" -o bee ./cmd/bee
test:
go test ./...

View File

@@ -7,7 +7,6 @@ import (
"io"
"log/slog"
"os"
"runtime/debug"
"strings"
"bee/audit/internal/app"
@@ -21,30 +20,7 @@ var Version = "dev"
func buildLabel() string {
label := strings.TrimSpace(Version)
if label == "" {
label = "dev"
}
if info, ok := debug.ReadBuildInfo(); ok {
var revision string
var modified bool
for _, setting := range info.Settings {
switch setting.Key {
case "vcs.revision":
revision = setting.Value
case "vcs.modified":
modified = setting.Value == "true"
}
}
if revision != "" {
short := revision
if len(short) > 12 {
short = short[:12]
}
label += " (" + short
if modified {
label += "+"
}
label += ")"
}
return "dev"
}
return label
}

View File

@@ -46,8 +46,6 @@ func TestRunUnknownCommand(t *testing.T) {
}
func TestRunVersion(t *testing.T) {
t.Parallel()
old := Version
Version = "test-version"
t.Cleanup(func() { Version = old })
@@ -62,6 +60,16 @@ func TestRunVersion(t *testing.T) {
}
}
func TestBuildLabelUsesVersionAsIs(t *testing.T) {
old := Version
Version = "1.2.3"
t.Cleanup(func() { Version = old })
if got := buildLabel(); got != "1.2.3" {
t.Fatalf("buildLabel=%q want %q", got, "1.2.3")
}
}
func TestRunExportRequiresTarget(t *testing.T) {
t.Parallel()

View File

@@ -754,6 +754,26 @@ func TestBuildSupportBundleIncludesExportDirContents(t *testing.T) {
}
}
for _, want := range []string{
"/system/ip-link.txt",
"/system/ip-link-stats.txt",
"/system/ethtool-info.txt",
"/system/ethtool-link.txt",
"/system/ethtool-module.txt",
"/system/mstflint-query.txt",
} {
var found bool
for _, name := range names {
if contains(name, want) {
found = true
break
}
}
if !found {
t.Fatalf("support bundle missing %s, names=%v", want, names)
}
}
var foundRaw bool
for _, name := range names {
if contains(name, "/export/bee-sat/memory-run/verbose.log") {

View File

@@ -32,6 +32,8 @@ var supportBundleCommands = []struct {
{name: "system/lspci-nn.txt", cmd: []string{"lspci", "-nn"}},
{name: "system/lspci-vvv.txt", cmd: []string{"lspci", "-vvv"}},
{name: "system/ip-addr.txt", cmd: []string{"ip", "addr"}},
{name: "system/ip-link.txt", cmd: []string{"ip", "-details", "link", "show"}},
{name: "system/ip-link-stats.txt", cmd: []string{"ip", "-s", "link", "show"}},
{name: "system/ip-route.txt", cmd: []string{"ip", "route"}},
{name: "system/mount.txt", cmd: []string{"mount"}},
{name: "system/df-h.txt", cmd: []string{"df", "-h"}},
@@ -47,6 +49,83 @@ for d in /sys/bus/pci/devices/*/; do
printf " %-22s %s\n" "$f" "$(cat "$d/$f" 2>/dev/null)"
done
done
`}},
{name: "system/ethtool-info.txt", cmd: []string{"sh", "-c", `
if ! command -v ethtool >/dev/null 2>&1; then
echo "ethtool not found"
exit 0
fi
found=0
for path in /sys/class/net/*; do
[ -e "$path" ] || continue
iface=$(basename "$path")
[ "$iface" = "lo" ] && continue
found=1
echo "=== $iface ==="
ethtool -i "$iface" 2>&1 || true
echo
done
if [ "$found" -eq 0 ]; then
echo "no interfaces found"
fi
`}},
{name: "system/ethtool-link.txt", cmd: []string{"sh", "-c", `
if ! command -v ethtool >/dev/null 2>&1; then
echo "ethtool not found"
exit 0
fi
found=0
for path in /sys/class/net/*; do
[ -e "$path" ] || continue
iface=$(basename "$path")
[ "$iface" = "lo" ] && continue
found=1
echo "=== $iface ==="
ethtool "$iface" 2>&1 || true
echo
done
if [ "$found" -eq 0 ]; then
echo "no interfaces found"
fi
`}},
{name: "system/ethtool-module.txt", cmd: []string{"sh", "-c", `
if ! command -v ethtool >/dev/null 2>&1; then
echo "ethtool not found"
exit 0
fi
found=0
for path in /sys/class/net/*; do
[ -e "$path" ] || continue
iface=$(basename "$path")
[ "$iface" = "lo" ] && continue
found=1
echo "=== $iface ==="
ethtool -m "$iface" 2>&1 || true
echo
done
if [ "$found" -eq 0 ]; then
echo "no interfaces found"
fi
`}},
{name: "system/mstflint-query.txt", cmd: []string{"sh", "-c", `
if ! command -v mstflint >/dev/null 2>&1; then
echo "mstflint not found"
exit 0
fi
found=0
for path in /sys/bus/pci/devices/*; do
[ -e "$path/vendor" ] || continue
vendor=$(cat "$path/vendor" 2>/dev/null)
[ "$vendor" = "0x15b3" ] || continue
bdf=$(basename "$path")
found=1
echo "=== $bdf ==="
mstflint -d "$bdf" q 2>&1 || true
echo
done
if [ "$found" -eq 0 ]; then
echo "no Mellanox/NVIDIA networking devices found"
fi
`}},
}

View File

@@ -286,7 +286,25 @@ func (s *System) RunNvidiaAcceptancePack(baseDir string, logFunc func(string)) (
// gpuIndices: specific GPU indices to test (empty = all GPUs).
// ctx cancellation kills the running job.
func (s *System) RunNvidiaAcceptancePackWithOptions(ctx context.Context, baseDir string, diagLevel int, gpuIndices []int, logFunc func(string)) (string, error) {
return runAcceptancePackCtx(ctx, baseDir, "gpu-nvidia", nvidiaDCGMJobs(diagLevel, gpuIndices), logFunc)
resolvedGPUIndices, err := resolveDCGMGPUIndices(gpuIndices)
if err != nil {
return "", err
}
return runAcceptancePackCtx(ctx, baseDir, "gpu-nvidia", nvidiaDCGMJobs(diagLevel, resolvedGPUIndices), logFunc)
}
func resolveDCGMGPUIndices(gpuIndices []int) ([]int, error) {
if len(gpuIndices) > 0 {
return dedupeSortedIndices(gpuIndices), nil
}
all, err := listNvidiaGPUIndices()
if err != nil {
return nil, err
}
if len(all) == 0 {
return nil, fmt.Errorf("nvidia-smi found no NVIDIA GPUs")
}
return all, nil
}
func (s *System) RunMemoryAcceptancePack(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {

View File

@@ -162,6 +162,39 @@ func TestBuildNvidiaStressJobUsesNCCLLoader(t *testing.T) {
}
}
func TestResolveDCGMGPUIndicesUsesDetectedGPUsWhenUnset(t *testing.T) {
t.Parallel()
oldExecCommand := satExecCommand
satExecCommand = func(name string, args ...string) *exec.Cmd {
if name == "nvidia-smi" {
return exec.Command("sh", "-c", "printf '2\n0\n1\n'")
}
return exec.Command(name, args...)
}
t.Cleanup(func() { satExecCommand = oldExecCommand })
got, err := resolveDCGMGPUIndices(nil)
if err != nil {
t.Fatalf("resolveDCGMGPUIndices error: %v", err)
}
if want := "0,1,2"; joinIndexList(got) != want {
t.Fatalf("gpuIndices=%q want %q", joinIndexList(got), want)
}
}
func TestResolveDCGMGPUIndicesKeepsExplicitSelection(t *testing.T) {
t.Parallel()
got, err := resolveDCGMGPUIndices([]int{3, 1, 3})
if err != nil {
t.Fatalf("resolveDCGMGPUIndices error: %v", err)
}
if want := "1,3"; joinIndexList(got) != want {
t.Fatalf("gpuIndices=%q want %q", joinIndexList(got), want)
}
}
func TestNvidiaStressArchivePrefixByLoader(t *testing.T) {
t.Parallel()

View File

@@ -14,17 +14,17 @@ import (
)
// kmsgWatcher reads /dev/kmsg and accumulates hardware error events.
// During an active SAT task window it records matching lines; on task finish
// it writes Warning status records to the component status DB.
// It supports multiple concurrent SAT tasks: a shared event window is open
// while any SAT task is running, and flushed when all tasks complete.
type kmsgWatcher struct {
mu sync.Mutex
activeWindow *kmsgWindow
activeCount int // number of in-flight SAT tasks
window *kmsgWindow
statusDB *app.ComponentStatusDB
}
type kmsgWindow struct {
taskID string
target string
targets []string // SAT targets running concurrently
startedAt time.Time
seen map[kmsgEventKey]bool
events []kmsgEvent
@@ -71,7 +71,7 @@ func (w *kmsgWatcher) run() {
continue
}
w.mu.Lock()
if w.activeWindow != nil {
if w.window != nil {
w.recordEvent(evt)
}
w.mu.Unlock()
@@ -85,41 +85,49 @@ func (w *kmsgWatcher) run() {
// Must be called with w.mu held.
func (w *kmsgWatcher) recordEvent(evt kmsgEvent) {
if len(evt.ids) == 0 {
// Events without a device ID (e.g. MCE) — deduplicate by category.
key := kmsgEventKey{id: "", category: evt.category}
if !w.activeWindow.seen[key] {
w.activeWindow.seen[key] = true
w.activeWindow.events = append(w.activeWindow.events, evt)
if !w.window.seen[key] {
w.window.seen[key] = true
w.window.events = append(w.window.events, evt)
}
return
}
for _, id := range evt.ids {
key := kmsgEventKey{id: id, category: evt.category}
if !w.activeWindow.seen[key] {
w.activeWindow.seen[key] = true
w.activeWindow.events = append(w.activeWindow.events, evt)
if !w.window.seen[key] {
w.window.seen[key] = true
w.window.events = append(w.window.events, evt)
}
}
}
// NotifyTaskStarted opens a new event window for the given SAT task.
// NotifyTaskStarted increments the active task counter and opens a shared event window
// if this is the first task starting.
func (w *kmsgWatcher) NotifyTaskStarted(taskID, target string) {
w.mu.Lock()
defer w.mu.Unlock()
w.activeWindow = &kmsgWindow{
taskID: taskID,
target: target,
startedAt: time.Now(),
seen: make(map[kmsgEventKey]bool),
if w.activeCount == 0 {
w.window = &kmsgWindow{
startedAt: time.Now(),
seen: make(map[kmsgEventKey]bool),
}
}
w.activeCount++
if w.window != nil {
w.window.targets = append(w.window.targets, target)
}
}
// NotifyTaskFinished closes the event window and asynchronously writes status records.
// NotifyTaskFinished decrements the active task counter. When all tasks finish,
// it flushes the accumulated events to the status DB.
func (w *kmsgWatcher) NotifyTaskFinished(taskID string) {
w.mu.Lock()
window := w.activeWindow
if window != nil && window.taskID == taskID {
w.activeWindow = nil
w.activeCount--
var window *kmsgWindow
if w.activeCount <= 0 {
w.activeCount = 0
window = w.window
w.window = nil
}
w.mu.Unlock()
@@ -164,7 +172,7 @@ func (w *kmsgWatcher) flushWindow(window *kmsgWindow) {
}
}
for key, detail := range seen {
detail = "kernel error during " + window.target + " SAT: " + truncate(detail, 120)
detail = "kernel error during SAT (" + strings.Join(window.targets, ",") + "): " + truncate(detail, 120)
w.statusDB.Record(key, source, "Warning", detail)
}
}

View File

@@ -29,6 +29,7 @@ a{color:var(--accent);text-decoration:none}
.sidebar{width:210px;min-height:100vh;background:#1b1c1d;flex-shrink:0;display:flex;flex-direction:column}
.sidebar-logo{padding:18px 16px 12px;font-size:18px;font-weight:700;color:#fff;letter-spacing:-.5px}
.sidebar-logo span{color:rgba(255,255,255,.5);font-weight:400;font-size:12px;display:block;margin-top:2px}
.sidebar-version{padding:0 16px 14px;font-size:11px;color:rgba(255,255,255,.45)}
.nav{flex:1}
.nav-item{display:block;padding:10px 16px;color:rgba(255,255,255,.7);font-size:13px;border-left:3px solid transparent;transition:all .15s}
.nav-item:hover{color:#fff;background:rgba(255,255,255,.08)}
@@ -96,6 +97,10 @@ func layoutNav(active string, buildLabel string) string {
var b strings.Builder
b.WriteString(`<aside class="sidebar">`)
b.WriteString(`<div class="sidebar-logo">bee<span>hardware audit</span></div>`)
if strings.TrimSpace(buildLabel) == "" {
buildLabel = "dev"
}
b.WriteString(`<div class="sidebar-version">Version ` + html.EscapeString(buildLabel) + `</div>`)
b.WriteString(`<nav class="nav">`)
for _, item := range items {
cls := "nav-item"
@@ -110,11 +115,7 @@ func layoutNav(active string, buildLabel string) string {
cls, item.href, item.label))
}
}
if strings.TrimSpace(buildLabel) == "" {
buildLabel = "dev"
}
b.WriteString(`</nav>`)
b.WriteString(`<div style="padding:12px 16px;border-top:1px solid rgba(255,255,255,.08);font-size:11px;color:rgba(255,255,255,.45)">Build ` + html.EscapeString(buildLabel) + `</div>`)
b.WriteString(`</aside>`)
return b.String()
}
@@ -1089,72 +1090,7 @@ func renderExport(exportDir string) string {
</div></div>
</div>
<div class="card" style="margin-top:16px">
<div class="card-head">Export to USB
<button class="btn btn-sm btn-secondary" onclick="usbRefresh()" style="margin-left:auto">&#8635; Refresh</button>
</div>
<div class="card-body">
<p style="font-size:13px;color:var(--muted);margin-bottom:12px">Write audit JSON or support bundle directly to a removable USB drive.</p>
<div id="usb-status" style="font-size:13px;color:var(--muted)">Scanning for USB devices...</div>
<div id="usb-targets" style="margin-top:12px"></div>
<div id="usb-msg" style="margin-top:10px;font-size:13px"></div>
</div>
</div>
<script>
(function(){
function usbRefresh() {
document.getElementById('usb-status').textContent = 'Scanning...';
document.getElementById('usb-targets').innerHTML = '';
document.getElementById('usb-msg').textContent = '';
fetch('/api/export/usb').then(r=>r.json()).then(targets => {
const st = document.getElementById('usb-status');
const ct = document.getElementById('usb-targets');
if (!targets || targets.length === 0) {
st.textContent = 'No removable USB devices found.';
return;
}
st.textContent = targets.length + ' device(s) found:';
ct.innerHTML = '<table><tr><th>Device</th><th>FS</th><th>Size</th><th>Label</th><th>Model</th><th>Actions</th></tr>' +
targets.map(t => {
const dev = t.device || '';
const label = t.label || '';
const model = t.model || '';
return '<tr>' +
'<td style="font-family:monospace">'+dev+'</td>' +
'<td>'+t.fs_type+'</td>' +
'<td>'+t.size+'</td>' +
'<td>'+label+'</td>' +
'<td style="font-size:12px;color:var(--muted)">'+model+'</td>' +
'<td style="white-space:nowrap">' +
'<button class="btn btn-sm btn-primary" onclick="usbExport(\'audit\','+JSON.stringify(t)+')">Audit JSON</button> ' +
'<button class="btn btn-sm btn-secondary" onclick="usbExport(\'bundle\','+JSON.stringify(t)+')">Support Bundle</button>' +
'</td></tr>';
}).join('') + '</table>';
}).catch(e => {
document.getElementById('usb-status').textContent = 'Error: ' + e;
});
}
window.usbExport = function(type, target) {
const msg = document.getElementById('usb-msg');
msg.style.color = 'var(--muted)';
msg.textContent = 'Exporting to ' + (target.device||'') + '...';
fetch('/api/export/usb/'+type, {
method: 'POST',
headers: {'Content-Type':'application/json'},
body: JSON.stringify(target)
}).then(r=>r.json()).then(d => {
if (d.error) { msg.style.color='var(--err,red)'; msg.textContent = 'Error: '+d.error; return; }
msg.style.color = 'var(--ok,green)';
msg.textContent = d.message || 'Done.';
}).catch(e => {
msg.style.color = 'var(--err,red)';
msg.textContent = 'Error: '+e;
});
};
window.usbRefresh = usbRefresh;
usbRefresh();
})();
</script>`
` + renderUSBExportCard()
}
func listExportFiles(exportDir string) ([]string, error) {
@@ -1224,6 +1160,77 @@ window.supportBundleDownload = function() {
</script>`
}
func renderUSBExportCard() string {
return `<div class="card" style="margin-top:16px">
<div class="card-head">Export to USB
<button class="btn btn-sm btn-secondary" onclick="usbRefresh()" style="margin-left:auto">&#8635; Refresh</button>
</div>
<div class="card-body">` + renderUSBExportInline() + `</div>
</div>`
}
func renderUSBExportInline() string {
return `<p style="font-size:13px;color:var(--muted);margin-bottom:12px">Write audit JSON or support bundle directly to a removable USB drive.</p>
<div id="usb-status" style="font-size:13px;color:var(--muted)">Scanning for USB devices...</div>
<div id="usb-targets" style="margin-top:12px"></div>
<div id="usb-msg" style="margin-top:10px;font-size:13px"></div>
<script>
(function(){
function usbRefresh() {
document.getElementById('usb-status').textContent = 'Scanning...';
document.getElementById('usb-targets').innerHTML = '';
document.getElementById('usb-msg').textContent = '';
fetch('/api/export/usb').then(r=>r.json()).then(targets => {
const st = document.getElementById('usb-status');
const ct = document.getElementById('usb-targets');
if (!targets || targets.length === 0) {
st.textContent = 'No removable USB devices found.';
return;
}
st.textContent = targets.length + ' device(s) found:';
ct.innerHTML = '<table><tr><th>Device</th><th>FS</th><th>Size</th><th>Label</th><th>Model</th><th>Actions</th></tr>' +
targets.map(t => {
const dev = t.device || '';
const label = t.label || '';
const model = t.model || '';
return '<tr>' +
'<td style="font-family:monospace">'+dev+'</td>' +
'<td>'+t.fs_type+'</td>' +
'<td>'+t.size+'</td>' +
'<td>'+label+'</td>' +
'<td style="font-size:12px;color:var(--muted)">'+model+'</td>' +
'<td style="white-space:nowrap">' +
'<button class="btn btn-sm btn-primary" onclick="usbExport(\'audit\','+JSON.stringify(t)+')">Audit JSON</button> ' +
'<button class="btn btn-sm btn-secondary" onclick="usbExport(\'bundle\','+JSON.stringify(t)+')">Support Bundle</button>' +
'</td></tr>';
}).join('') + '</table>';
}).catch(e => {
document.getElementById('usb-status').textContent = 'Error: ' + e;
});
}
window.usbExport = function(type, target) {
const msg = document.getElementById('usb-msg');
msg.style.color = 'var(--muted)';
msg.textContent = 'Exporting to ' + (target.device||'') + '...';
fetch('/api/export/usb/'+type, {
method: 'POST',
headers: {'Content-Type':'application/json'},
body: JSON.stringify(target)
}).then(r=>r.json()).then(d => {
if (d.error) { msg.style.color='var(--err,red)'; msg.textContent = 'Error: '+d.error; return; }
msg.style.color = 'var(--ok,green)';
msg.textContent = d.message || 'Done.';
}).catch(e => {
msg.style.color = 'var(--err,red)';
msg.textContent = 'Error: '+e;
});
};
window.usbRefresh = usbRefresh;
usbRefresh();
})();
</script>`
}
// ── Display Resolution ────────────────────────────────────────────────────────
func renderDisplayInline() string {
@@ -1325,6 +1332,10 @@ function installToRAM() {
<div class="card"><div class="card-head">Support Bundle</div><div class="card-body">
<p style="font-size:13px;color:var(--muted);margin-bottom:12px">Downloads a tar.gz archive of all audit files, SAT results, and logs.</p>
` + renderSupportBundleInline() + `
<div style="border-top:1px solid var(--border);margin-top:16px;padding-top:16px">
<div style="font-weight:600;margin-bottom:8px">Export to USB</div>
` + renderUSBExportInline() + `
</div>
</div></div>
<div class="card"><div class="card-head">Tool Check <button class="btn btn-sm btn-secondary" onclick="checkTools()" style="margin-left:auto">&#8635; Check</button></div>
@@ -1578,7 +1589,11 @@ func renderTasks() string {
<div class="card-head" style="padding:14px 18px;font-size:14px">Logs — <span id="task-log-title"></span>
<button class="btn btn-sm btn-secondary" onclick="closeTaskLog()" style="margin-left:auto">&#10005;</button>
</div>
<div class="card-body" style="padding:16px;flex:1;min-height:0"><div id="task-log-terminal" class="terminal" style="height:100%;max-height:none"></div></div>
<div class="card-body" style="padding:16px;flex:1;min-height:0;overflow:hidden">
<div style="height:100%;min-height:0;overflow:auto">
<div id="task-log-terminal" class="terminal" style="margin:0;max-height:none;overflow:visible"></div>
</div>
</div>
</div>
</div>
<script>

View File

@@ -275,9 +275,10 @@ func TestRootRendersDashboard(t *testing.T) {
}
handler := NewHandler(HandlerOptions{
Title: "Bee Hardware Audit",
AuditPath: path,
ExportDir: exportDir,
Title: "Bee Hardware Audit",
BuildLabel: "1.2.3",
AuditPath: path,
ExportDir: exportDir,
})
first := httptest.NewRecorder()
@@ -292,6 +293,11 @@ func TestRootRendersDashboard(t *testing.T) {
if !strings.Contains(first.Body.String(), `/viewer`) {
t.Fatalf("first body missing viewer link: %s", first.Body.String())
}
versionIdx := strings.Index(first.Body.String(), `Version 1.2.3`)
navIdx := strings.Index(first.Body.String(), `href="/"`)
if versionIdx == -1 || navIdx == -1 || versionIdx > navIdx {
t.Fatalf("version should render near top of sidebar before nav links: %s", first.Body.String())
}
if got := first.Header().Get("Cache-Control"); got != "no-store" {
t.Fatalf("first cache-control=%q", got)
}
@@ -395,6 +401,46 @@ func TestToolsPageRendersRestartGPUDriversButton(t *testing.T) {
if !strings.Contains(body, `id="boot-source-text"`) {
t.Fatalf("tools page missing boot source field: %s", body)
}
if !strings.Contains(body, `Export to USB`) {
t.Fatalf("tools page missing export to usb section: %s", body)
}
if !strings.Contains(body, `Support Bundle</button>`) {
t.Fatalf("tools page missing support bundle usb button: %s", body)
}
}
func TestTasksPageRendersScrollableLogModal(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "audit.json")
exportDir := filepath.Join(dir, "export")
if err := os.MkdirAll(exportDir, 0755); err != nil {
t.Fatal(err)
}
if err := os.WriteFile(path, []byte(`{"collected_at":"2026-03-15T00:00:00Z"}`), 0644); err != nil {
t.Fatal(err)
}
handler := NewHandler(HandlerOptions{
Title: "Bee Hardware Audit",
AuditPath: path,
ExportDir: exportDir,
})
rec := httptest.NewRecorder()
handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/tasks", nil))
if rec.Code != http.StatusOK {
t.Fatalf("status=%d", rec.Code)
}
body := rec.Body.String()
if !strings.Contains(body, `height:calc(100vh - 32px)`) {
t.Fatalf("tasks page missing bounded log modal height: %s", body)
}
if !strings.Contains(body, `flex:1;min-height:0;overflow:hidden`) {
t.Fatalf("tasks page missing log modal overflow guard: %s", body)
}
if !strings.Contains(body, `height:100%;min-height:0;overflow:auto`) {
t.Fatalf("tasks page missing scrollable log wrapper: %s", body)
}
}
func TestViewerRendersLatestSnapshot(t *testing.T) {

View File

@@ -393,11 +393,13 @@ func (q *taskQueue) worker() {
for {
<-q.trigger
setCPUGovernor("performance")
// Drain all pending tasks and start them in parallel.
q.mu.Lock()
var batch []*Task
for {
q.mu.Lock()
t := q.nextPending()
if t == nil {
q.mu.Unlock()
break
}
now := time.Now()
@@ -406,37 +408,58 @@ func (q *taskQueue) worker() {
t.DoneAt = nil
t.ErrMsg = ""
j := newTaskJobState(t.LogPath)
ctx, cancel := context.WithCancel(context.Background())
j.cancel = cancel
t.job = j
batch = append(batch, t)
}
if len(batch) > 0 {
q.persistLocked()
q.mu.Unlock()
}
q.mu.Unlock()
if q.kmsgWatcher != nil && isSATTarget(t.Target) {
q.kmsgWatcher.NotifyTaskStarted(t.ID, t.Target)
}
var wg sync.WaitGroup
for _, t := range batch {
t := t
j := t.job
taskCtx, taskCancel := context.WithCancel(context.Background())
j.cancel = taskCancel
wg.Add(1)
go func() {
defer wg.Done()
q.runTask(t, j, ctx)
if q.kmsgWatcher != nil {
q.kmsgWatcher.NotifyTaskFinished(t.ID)
}
q.mu.Lock()
now2 := time.Now()
t.DoneAt = &now2
if t.Status == TaskRunning { // not cancelled externally
if j.err != "" {
t.Status = TaskFailed
t.ErrMsg = j.err
} else {
t.Status = TaskDone
if q.kmsgWatcher != nil && isSATTarget(t.Target) {
q.kmsgWatcher.NotifyTaskStarted(t.ID, t.Target)
}
}
q.runTask(t, j, taskCtx)
if q.kmsgWatcher != nil {
q.kmsgWatcher.NotifyTaskFinished(t.ID)
}
q.mu.Lock()
now2 := time.Now()
t.DoneAt = &now2
if t.Status == TaskRunning {
if j.err != "" {
t.Status = TaskFailed
t.ErrMsg = j.err
} else {
t.Status = TaskDone
}
}
q.persistLocked()
q.mu.Unlock()
}()
}
wg.Wait()
if len(batch) > 0 {
q.mu.Lock()
q.prune()
q.persistLocked()
q.mu.Unlock()
}
setCPUGovernor("powersave")
}
}

View File

@@ -0,0 +1,16 @@
#!/bin/sh
set -eu
tag="$(git describe --tags --match 'v[0-9]*' --abbrev=7 --dirty 2>/dev/null || true)"
case "${tag}" in
v*)
printf '%s\n' "${tag#v}"
;;
"")
printf 'dev\n'
;;
*)
printf '%s\n' "${tag}"
;;
esac

View File

@@ -54,15 +54,8 @@ resolve_audit_version() {
return 0
fi
tag="$(git -C "${REPO_ROOT}" describe --tags --match 'audit/v*' --abbrev=7 --dirty 2>/dev/null || true)"
if [ -z "${tag}" ]; then
tag="$(git -C "${REPO_ROOT}" describe --tags --match 'v[0-9]*' --abbrev=7 --dirty 2>/dev/null || true)"
fi
tag="$(git -C "${REPO_ROOT}" describe --tags --match 'v[0-9]*' --abbrev=7 --dirty 2>/dev/null || true)"
case "${tag}" in
audit/v*)
echo "${tag#audit/v}"
return 0
;;
v*)
echo "${tag#v}"
return 0

View File

@@ -1,7 +1,6 @@
[Unit]
Description=Bee: hardware audit
After=bee-preflight.service bee-network.service bee-nvidia.service
Before=bee-web.service
[Service]
Type=oneshot

View File

@@ -1,6 +1,5 @@
[Unit]
Description=Bee: hardware audit web viewer
After=bee-audit.service
[Service]
Type=simple

View File

@@ -1,10 +1,11 @@
#!/bin/sh
set -eu
SECONDS=300
DURATION_SEC=300
DEVICES=""
EXCLUDE=""
FORMAT=""
TEST_SLICE_SECONDS=300
JOHN_DIR="/usr/local/lib/bee/john/run"
JOHN_BIN="${JOHN_DIR}/john"
export OCL_ICD_VENDORS="/etc/OpenCL/vendors"
@@ -116,7 +117,7 @@ ensure_opencl_ready() {
while [ "$#" -gt 0 ]; do
case "$1" in
--seconds|-t) [ "$#" -ge 2 ] || usage; SECONDS="$2"; shift 2 ;;
--seconds|-t) [ "$#" -ge 2 ] || usage; DURATION_SEC="$2"; shift 2 ;;
--devices) [ "$#" -ge 2 ] || usage; DEVICES="$2"; shift 2 ;;
--exclude) [ "$#" -ge 2 ] || usage; EXCLUDE="$2"; shift 2 ;;
--format) [ "$#" -ge 2 ] || usage; FORMAT="$2"; shift 2 ;;
@@ -189,14 +190,51 @@ CHOSEN_FORMAT=$(choose_format) || {
exit 1
}
echo "format=${CHOSEN_FORMAT}"
run_john_loop() {
opencl_id="$1"
deadline="$2"
round=0
while :; do
now=$(date +%s)
remaining=$((deadline - now))
if [ "${remaining}" -le 0 ]; then
break
fi
round=$((round + 1))
slice="${remaining}"
if [ "${slice}" -gt "${TEST_SLICE_SECONDS}" ]; then
slice="${TEST_SLICE_SECONDS}"
fi
echo "device=${opencl_id} round=${round} remaining_sec=${remaining} slice_sec=${slice}"
./john --test="${slice}" --format="${CHOSEN_FORMAT}" --devices="${opencl_id}" || return 1
done
}
PIDS=""
cleanup() {
rc=$?
trap - EXIT INT TERM
for pid in ${PIDS}; do
kill "${pid}" 2>/dev/null || true
done
for pid in ${PIDS}; do
wait "${pid}" 2>/dev/null || true
done
exit "${rc}"
}
trap cleanup EXIT INT TERM
echo "format=${CHOSEN_FORMAT}"
echo "target_seconds=${DURATION_SEC}"
echo "slice_seconds=${TEST_SLICE_SECONDS}"
DEADLINE=$(( $(date +%s) + DURATION_SEC ))
_first=1
for opencl_id in $(echo "${JOHN_DEVICES}" | tr ',' ' '); do
[ "${_first}" = "1" ] || sleep 3
_first=0
./john --test="${SECONDS}" --format="${CHOSEN_FORMAT}" --devices="${opencl_id}" &
PIDS="${PIDS} $!"
run_john_loop "${opencl_id}" "${DEADLINE}" &
pid=$!
PIDS="${PIDS} ${pid}"
done
FAIL=0
for pid in ${PIDS}; do

View File

@@ -128,13 +128,32 @@ ldconfig 2>/dev/null || true
log "ldconfig refreshed"
# Start DCGM host engine so dcgmi can discover GPUs.
# nv-hostengine must run before any dcgmi command — without it, dcgmi reports
# "group is empty" even when GPUs and modules are present.
# Skip if already running (e.g. started by a dcgm systemd service or prior boot).
# nv-hostengine must run after the NVIDIA modules and device nodes are ready.
# If it started too early (for example via systemd before bee-nvidia-load), it can
# keep a stale empty inventory and dcgmi diag later reports no testable entities.
if command -v nv-hostengine >/dev/null 2>&1; then
if pgrep -x nv-hostengine >/dev/null 2>&1; then
log "nv-hostengine already running — skipping"
else
if command -v pkill >/dev/null 2>&1; then
pkill -x nv-hostengine >/dev/null 2>&1 || true
tries=0
while pgrep -x nv-hostengine >/dev/null 2>&1; do
tries=$((tries + 1))
if [ "${tries}" -ge 10 ]; then
log "WARN: nv-hostengine is still running after restart request"
break
fi
sleep 1
done
if pgrep -x nv-hostengine >/dev/null 2>&1; then
log "WARN: keeping existing nv-hostengine process"
else
log "nv-hostengine restarted"
fi
else
log "WARN: pkill not found — cannot refresh nv-hostengine inventory"
fi
fi
if ! pgrep -x nv-hostengine >/dev/null 2>&1; then
nv-hostengine
log "nv-hostengine started"
fi