Compare commits
9 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f3c14cd893 | ||
|
|
728270dc8e | ||
|
|
8692f825bc | ||
|
|
11f52ac710 | ||
|
|
1cb398fe83 | ||
|
|
7a843be6b0 | ||
|
|
7f6386dccc | ||
|
|
eea2591bcc | ||
|
|
295a19b93a |
@@ -1,5 +1,7 @@
|
|||||||
LISTEN ?= :8080
|
LISTEN ?= :8080
|
||||||
AUDIT_PATH ?=
|
AUDIT_PATH ?=
|
||||||
|
VERSION ?= $(shell sh ./scripts/resolve-version.sh)
|
||||||
|
GO_LDFLAGS := -X main.Version=$(VERSION)
|
||||||
|
|
||||||
RUN_ARGS := web --listen $(LISTEN)
|
RUN_ARGS := web --listen $(LISTEN)
|
||||||
ifneq ($(AUDIT_PATH),)
|
ifneq ($(AUDIT_PATH),)
|
||||||
@@ -9,10 +11,10 @@ endif
|
|||||||
.PHONY: run build test
|
.PHONY: run build test
|
||||||
|
|
||||||
run:
|
run:
|
||||||
go run ./cmd/bee $(RUN_ARGS)
|
go run -ldflags "$(GO_LDFLAGS)" ./cmd/bee $(RUN_ARGS)
|
||||||
|
|
||||||
build:
|
build:
|
||||||
go build -o bee ./cmd/bee
|
go build -ldflags "$(GO_LDFLAGS)" -o bee ./cmd/bee
|
||||||
|
|
||||||
test:
|
test:
|
||||||
go test ./...
|
go test ./...
|
||||||
|
|||||||
@@ -7,7 +7,6 @@ import (
|
|||||||
"io"
|
"io"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
"os"
|
||||||
"runtime/debug"
|
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"bee/audit/internal/app"
|
"bee/audit/internal/app"
|
||||||
@@ -21,30 +20,7 @@ var Version = "dev"
|
|||||||
func buildLabel() string {
|
func buildLabel() string {
|
||||||
label := strings.TrimSpace(Version)
|
label := strings.TrimSpace(Version)
|
||||||
if label == "" {
|
if label == "" {
|
||||||
label = "dev"
|
return "dev"
|
||||||
}
|
|
||||||
if info, ok := debug.ReadBuildInfo(); ok {
|
|
||||||
var revision string
|
|
||||||
var modified bool
|
|
||||||
for _, setting := range info.Settings {
|
|
||||||
switch setting.Key {
|
|
||||||
case "vcs.revision":
|
|
||||||
revision = setting.Value
|
|
||||||
case "vcs.modified":
|
|
||||||
modified = setting.Value == "true"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if revision != "" {
|
|
||||||
short := revision
|
|
||||||
if len(short) > 12 {
|
|
||||||
short = short[:12]
|
|
||||||
}
|
|
||||||
label += " (" + short
|
|
||||||
if modified {
|
|
||||||
label += "+"
|
|
||||||
}
|
|
||||||
label += ")"
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return label
|
return label
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -46,8 +46,6 @@ func TestRunUnknownCommand(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestRunVersion(t *testing.T) {
|
func TestRunVersion(t *testing.T) {
|
||||||
t.Parallel()
|
|
||||||
|
|
||||||
old := Version
|
old := Version
|
||||||
Version = "test-version"
|
Version = "test-version"
|
||||||
t.Cleanup(func() { Version = old })
|
t.Cleanup(func() { Version = old })
|
||||||
@@ -62,6 +60,16 @@ func TestRunVersion(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestBuildLabelUsesVersionAsIs(t *testing.T) {
|
||||||
|
old := Version
|
||||||
|
Version = "1.2.3"
|
||||||
|
t.Cleanup(func() { Version = old })
|
||||||
|
|
||||||
|
if got := buildLabel(); got != "1.2.3" {
|
||||||
|
t.Fatalf("buildLabel=%q want %q", got, "1.2.3")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestRunExportRequiresTarget(t *testing.T) {
|
func TestRunExportRequiresTarget(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
|
|
||||||
|
|||||||
@@ -754,6 +754,26 @@ func TestBuildSupportBundleIncludesExportDirContents(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for _, want := range []string{
|
||||||
|
"/system/ip-link.txt",
|
||||||
|
"/system/ip-link-stats.txt",
|
||||||
|
"/system/ethtool-info.txt",
|
||||||
|
"/system/ethtool-link.txt",
|
||||||
|
"/system/ethtool-module.txt",
|
||||||
|
"/system/mstflint-query.txt",
|
||||||
|
} {
|
||||||
|
var found bool
|
||||||
|
for _, name := range names {
|
||||||
|
if contains(name, want) {
|
||||||
|
found = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !found {
|
||||||
|
t.Fatalf("support bundle missing %s, names=%v", want, names)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
var foundRaw bool
|
var foundRaw bool
|
||||||
for _, name := range names {
|
for _, name := range names {
|
||||||
if contains(name, "/export/bee-sat/memory-run/verbose.log") {
|
if contains(name, "/export/bee-sat/memory-run/verbose.log") {
|
||||||
|
|||||||
@@ -32,6 +32,8 @@ var supportBundleCommands = []struct {
|
|||||||
{name: "system/lspci-nn.txt", cmd: []string{"lspci", "-nn"}},
|
{name: "system/lspci-nn.txt", cmd: []string{"lspci", "-nn"}},
|
||||||
{name: "system/lspci-vvv.txt", cmd: []string{"lspci", "-vvv"}},
|
{name: "system/lspci-vvv.txt", cmd: []string{"lspci", "-vvv"}},
|
||||||
{name: "system/ip-addr.txt", cmd: []string{"ip", "addr"}},
|
{name: "system/ip-addr.txt", cmd: []string{"ip", "addr"}},
|
||||||
|
{name: "system/ip-link.txt", cmd: []string{"ip", "-details", "link", "show"}},
|
||||||
|
{name: "system/ip-link-stats.txt", cmd: []string{"ip", "-s", "link", "show"}},
|
||||||
{name: "system/ip-route.txt", cmd: []string{"ip", "route"}},
|
{name: "system/ip-route.txt", cmd: []string{"ip", "route"}},
|
||||||
{name: "system/mount.txt", cmd: []string{"mount"}},
|
{name: "system/mount.txt", cmd: []string{"mount"}},
|
||||||
{name: "system/df-h.txt", cmd: []string{"df", "-h"}},
|
{name: "system/df-h.txt", cmd: []string{"df", "-h"}},
|
||||||
@@ -47,6 +49,83 @@ for d in /sys/bus/pci/devices/*/; do
|
|||||||
printf " %-22s %s\n" "$f" "$(cat "$d/$f" 2>/dev/null)"
|
printf " %-22s %s\n" "$f" "$(cat "$d/$f" 2>/dev/null)"
|
||||||
done
|
done
|
||||||
done
|
done
|
||||||
|
`}},
|
||||||
|
{name: "system/ethtool-info.txt", cmd: []string{"sh", "-c", `
|
||||||
|
if ! command -v ethtool >/dev/null 2>&1; then
|
||||||
|
echo "ethtool not found"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
found=0
|
||||||
|
for path in /sys/class/net/*; do
|
||||||
|
[ -e "$path" ] || continue
|
||||||
|
iface=$(basename "$path")
|
||||||
|
[ "$iface" = "lo" ] && continue
|
||||||
|
found=1
|
||||||
|
echo "=== $iface ==="
|
||||||
|
ethtool -i "$iface" 2>&1 || true
|
||||||
|
echo
|
||||||
|
done
|
||||||
|
if [ "$found" -eq 0 ]; then
|
||||||
|
echo "no interfaces found"
|
||||||
|
fi
|
||||||
|
`}},
|
||||||
|
{name: "system/ethtool-link.txt", cmd: []string{"sh", "-c", `
|
||||||
|
if ! command -v ethtool >/dev/null 2>&1; then
|
||||||
|
echo "ethtool not found"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
found=0
|
||||||
|
for path in /sys/class/net/*; do
|
||||||
|
[ -e "$path" ] || continue
|
||||||
|
iface=$(basename "$path")
|
||||||
|
[ "$iface" = "lo" ] && continue
|
||||||
|
found=1
|
||||||
|
echo "=== $iface ==="
|
||||||
|
ethtool "$iface" 2>&1 || true
|
||||||
|
echo
|
||||||
|
done
|
||||||
|
if [ "$found" -eq 0 ]; then
|
||||||
|
echo "no interfaces found"
|
||||||
|
fi
|
||||||
|
`}},
|
||||||
|
{name: "system/ethtool-module.txt", cmd: []string{"sh", "-c", `
|
||||||
|
if ! command -v ethtool >/dev/null 2>&1; then
|
||||||
|
echo "ethtool not found"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
found=0
|
||||||
|
for path in /sys/class/net/*; do
|
||||||
|
[ -e "$path" ] || continue
|
||||||
|
iface=$(basename "$path")
|
||||||
|
[ "$iface" = "lo" ] && continue
|
||||||
|
found=1
|
||||||
|
echo "=== $iface ==="
|
||||||
|
ethtool -m "$iface" 2>&1 || true
|
||||||
|
echo
|
||||||
|
done
|
||||||
|
if [ "$found" -eq 0 ]; then
|
||||||
|
echo "no interfaces found"
|
||||||
|
fi
|
||||||
|
`}},
|
||||||
|
{name: "system/mstflint-query.txt", cmd: []string{"sh", "-c", `
|
||||||
|
if ! command -v mstflint >/dev/null 2>&1; then
|
||||||
|
echo "mstflint not found"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
found=0
|
||||||
|
for path in /sys/bus/pci/devices/*; do
|
||||||
|
[ -e "$path/vendor" ] || continue
|
||||||
|
vendor=$(cat "$path/vendor" 2>/dev/null)
|
||||||
|
[ "$vendor" = "0x15b3" ] || continue
|
||||||
|
bdf=$(basename "$path")
|
||||||
|
found=1
|
||||||
|
echo "=== $bdf ==="
|
||||||
|
mstflint -d "$bdf" q 2>&1 || true
|
||||||
|
echo
|
||||||
|
done
|
||||||
|
if [ "$found" -eq 0 ]; then
|
||||||
|
echo "no Mellanox/NVIDIA networking devices found"
|
||||||
|
fi
|
||||||
`}},
|
`}},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -2,18 +2,21 @@ package collector
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"bee/audit/internal/schema"
|
"bee/audit/internal/schema"
|
||||||
|
"context"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strings"
|
"strings"
|
||||||
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
const mellanoxVendorID = 0x15b3
|
const mellanoxVendorID = 0x15b3
|
||||||
|
const nicProbeTimeout = 2 * time.Second
|
||||||
|
|
||||||
var (
|
var (
|
||||||
mstflintQuery = func(bdf string) (string, error) {
|
mstflintQuery = func(bdf string) (string, error) {
|
||||||
out, err := exec.Command("mstflint", "-d", bdf, "q").Output()
|
out, err := commandOutputWithTimeout(nicProbeTimeout, "mstflint", "-d", bdf, "q")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
@@ -21,7 +24,7 @@ var (
|
|||||||
}
|
}
|
||||||
|
|
||||||
ethtoolInfoQuery = func(iface string) (string, error) {
|
ethtoolInfoQuery = func(iface string) (string, error) {
|
||||||
out, err := exec.Command("ethtool", "-i", iface).Output()
|
out, err := commandOutputWithTimeout(nicProbeTimeout, "ethtool", "-i", iface)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
@@ -29,6 +32,14 @@ var (
|
|||||||
}
|
}
|
||||||
|
|
||||||
netIfacesByBDF = listNetIfacesByBDF
|
netIfacesByBDF = listNetIfacesByBDF
|
||||||
|
readNetCarrierFile = func(iface string) (string, error) {
|
||||||
|
path := filepath.Join("/sys/class/net", iface, "carrier")
|
||||||
|
raw, err := os.ReadFile(path)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
return strings.TrimSpace(string(raw)), nil
|
||||||
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
// enrichPCIeWithMellanox enriches Mellanox/NVIDIA Networking devices with
|
// enrichPCIeWithMellanox enriches Mellanox/NVIDIA Networking devices with
|
||||||
@@ -162,3 +173,17 @@ func listNetIfacesByBDF(bdf string) []string {
|
|||||||
}
|
}
|
||||||
return ifaces
|
return ifaces
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func commandOutputWithTimeout(timeout time.Duration, name string, args ...string) ([]byte, error) {
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), timeout)
|
||||||
|
defer cancel()
|
||||||
|
return exec.CommandContext(ctx, name, args...).Output()
|
||||||
|
}
|
||||||
|
|
||||||
|
func interfaceHasCarrier(iface string) bool {
|
||||||
|
raw, err := readNetCarrierFile(iface)
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return strings.TrimSpace(raw) == "1"
|
||||||
|
}
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ import (
|
|||||||
|
|
||||||
var (
|
var (
|
||||||
ethtoolModuleQuery = func(iface string) (string, error) {
|
ethtoolModuleQuery = func(iface string) (string, error) {
|
||||||
out, err := raidToolQuery("ethtool", "-m", iface)
|
out, err := commandOutputWithTimeout(nicProbeTimeout, "ethtool", "-m", iface)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
@@ -58,10 +58,12 @@ func enrichPCIeWithNICTelemetry(devs []schema.HardwarePCIeDevice) []schema.Hardw
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if out, err := ethtoolModuleQuery(iface); err == nil {
|
if interfaceHasCarrier(iface) {
|
||||||
if injectSFPDOMTelemetry(&devs[i], out) {
|
if out, err := ethtoolModuleQuery(iface); err == nil {
|
||||||
enriched++
|
if injectSFPDOMTelemetry(&devs[i], out) {
|
||||||
continue
|
enriched++
|
||||||
|
continue
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if len(devs[i].MacAddresses) > 0 || devs[i].Firmware != nil {
|
if len(devs[i].MacAddresses) > 0 || devs[i].Firmware != nil {
|
||||||
|
|||||||
@@ -57,6 +57,7 @@ func TestEnrichPCIeWithNICTelemetryAddsSerialFallback(t *testing.T) {
|
|||||||
origReadMAC := readNetAddressFile
|
origReadMAC := readNetAddressFile
|
||||||
origEth := ethtoolInfoQuery
|
origEth := ethtoolInfoQuery
|
||||||
origModule := ethtoolModuleQuery
|
origModule := ethtoolModuleQuery
|
||||||
|
origCarrier := readNetCarrierFile
|
||||||
t.Cleanup(func() {
|
t.Cleanup(func() {
|
||||||
queryPCILSPCIDetail = origDetail
|
queryPCILSPCIDetail = origDetail
|
||||||
readPCIVPDFile = origVPD
|
readPCIVPDFile = origVPD
|
||||||
@@ -64,6 +65,7 @@ func TestEnrichPCIeWithNICTelemetryAddsSerialFallback(t *testing.T) {
|
|||||||
readNetAddressFile = origReadMAC
|
readNetAddressFile = origReadMAC
|
||||||
ethtoolInfoQuery = origEth
|
ethtoolInfoQuery = origEth
|
||||||
ethtoolModuleQuery = origModule
|
ethtoolModuleQuery = origModule
|
||||||
|
readNetCarrierFile = origCarrier
|
||||||
})
|
})
|
||||||
|
|
||||||
queryPCILSPCIDetail = func(bdf string) (string, error) {
|
queryPCILSPCIDetail = func(bdf string) (string, error) {
|
||||||
@@ -82,6 +84,7 @@ func TestEnrichPCIeWithNICTelemetryAddsSerialFallback(t *testing.T) {
|
|||||||
}
|
}
|
||||||
return "aa:bb:cc:dd:ee:ff", nil
|
return "aa:bb:cc:dd:ee:ff", nil
|
||||||
}
|
}
|
||||||
|
readNetCarrierFile = func(string) (string, error) { return "1", nil }
|
||||||
ethtoolInfoQuery = func(string) (string, error) { return "", fmt.Errorf("skip firmware") }
|
ethtoolInfoQuery = func(string) (string, error) { return "", fmt.Errorf("skip firmware") }
|
||||||
ethtoolModuleQuery = func(string) (string, error) { return "", fmt.Errorf("skip optics") }
|
ethtoolModuleQuery = func(string) (string, error) { return "", fmt.Errorf("skip optics") }
|
||||||
|
|
||||||
@@ -101,6 +104,42 @@ func TestEnrichPCIeWithNICTelemetryAddsSerialFallback(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestEnrichPCIeWithNICTelemetrySkipsModuleQueryWithoutCarrier(t *testing.T) {
|
||||||
|
origIfaces := netIfacesByBDF
|
||||||
|
origReadMAC := readNetAddressFile
|
||||||
|
origEth := ethtoolInfoQuery
|
||||||
|
origModule := ethtoolModuleQuery
|
||||||
|
origCarrier := readNetCarrierFile
|
||||||
|
t.Cleanup(func() {
|
||||||
|
netIfacesByBDF = origIfaces
|
||||||
|
readNetAddressFile = origReadMAC
|
||||||
|
ethtoolInfoQuery = origEth
|
||||||
|
ethtoolModuleQuery = origModule
|
||||||
|
readNetCarrierFile = origCarrier
|
||||||
|
})
|
||||||
|
|
||||||
|
netIfacesByBDF = func(string) []string { return []string{"eth0"} }
|
||||||
|
readNetAddressFile = func(string) (string, error) { return "aa:bb:cc:dd:ee:ff", nil }
|
||||||
|
readNetCarrierFile = func(string) (string, error) { return "0", nil }
|
||||||
|
ethtoolInfoQuery = func(string) (string, error) { return "", fmt.Errorf("skip firmware") }
|
||||||
|
ethtoolModuleQuery = func(string) (string, error) {
|
||||||
|
t.Fatal("ethtool -m should not be called without carrier")
|
||||||
|
return "", nil
|
||||||
|
}
|
||||||
|
|
||||||
|
class := "EthernetController"
|
||||||
|
bdf := "0000:18:00.0"
|
||||||
|
devs := []schema.HardwarePCIeDevice{{
|
||||||
|
DeviceClass: &class,
|
||||||
|
BDF: &bdf,
|
||||||
|
}}
|
||||||
|
|
||||||
|
out := enrichPCIeWithNICTelemetry(devs)
|
||||||
|
if len(out[0].MacAddresses) != 1 || out[0].MacAddresses[0] != "aa:bb:cc:dd:ee:ff" {
|
||||||
|
t.Fatalf("mac_addresses=%v", out[0].MacAddresses)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestDBMValue(t *testing.T) {
|
func TestDBMValue(t *testing.T) {
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
in string
|
in string
|
||||||
|
|||||||
@@ -286,7 +286,25 @@ func (s *System) RunNvidiaAcceptancePack(baseDir string, logFunc func(string)) (
|
|||||||
// gpuIndices: specific GPU indices to test (empty = all GPUs).
|
// gpuIndices: specific GPU indices to test (empty = all GPUs).
|
||||||
// ctx cancellation kills the running job.
|
// ctx cancellation kills the running job.
|
||||||
func (s *System) RunNvidiaAcceptancePackWithOptions(ctx context.Context, baseDir string, diagLevel int, gpuIndices []int, logFunc func(string)) (string, error) {
|
func (s *System) RunNvidiaAcceptancePackWithOptions(ctx context.Context, baseDir string, diagLevel int, gpuIndices []int, logFunc func(string)) (string, error) {
|
||||||
return runAcceptancePackCtx(ctx, baseDir, "gpu-nvidia", nvidiaDCGMJobs(diagLevel, gpuIndices), logFunc)
|
resolvedGPUIndices, err := resolveDCGMGPUIndices(gpuIndices)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
return runAcceptancePackCtx(ctx, baseDir, "gpu-nvidia", nvidiaDCGMJobs(diagLevel, resolvedGPUIndices), logFunc)
|
||||||
|
}
|
||||||
|
|
||||||
|
func resolveDCGMGPUIndices(gpuIndices []int) ([]int, error) {
|
||||||
|
if len(gpuIndices) > 0 {
|
||||||
|
return dedupeSortedIndices(gpuIndices), nil
|
||||||
|
}
|
||||||
|
all, err := listNvidiaGPUIndices()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if len(all) == 0 {
|
||||||
|
return nil, fmt.Errorf("nvidia-smi found no NVIDIA GPUs")
|
||||||
|
}
|
||||||
|
return all, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *System) RunMemoryAcceptancePack(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
|
func (s *System) RunMemoryAcceptancePack(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
|
||||||
|
|||||||
@@ -162,6 +162,39 @@ func TestBuildNvidiaStressJobUsesNCCLLoader(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestResolveDCGMGPUIndicesUsesDetectedGPUsWhenUnset(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
oldExecCommand := satExecCommand
|
||||||
|
satExecCommand = func(name string, args ...string) *exec.Cmd {
|
||||||
|
if name == "nvidia-smi" {
|
||||||
|
return exec.Command("sh", "-c", "printf '2\n0\n1\n'")
|
||||||
|
}
|
||||||
|
return exec.Command(name, args...)
|
||||||
|
}
|
||||||
|
t.Cleanup(func() { satExecCommand = oldExecCommand })
|
||||||
|
|
||||||
|
got, err := resolveDCGMGPUIndices(nil)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("resolveDCGMGPUIndices error: %v", err)
|
||||||
|
}
|
||||||
|
if want := "0,1,2"; joinIndexList(got) != want {
|
||||||
|
t.Fatalf("gpuIndices=%q want %q", joinIndexList(got), want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestResolveDCGMGPUIndicesKeepsExplicitSelection(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
got, err := resolveDCGMGPUIndices([]int{3, 1, 3})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("resolveDCGMGPUIndices error: %v", err)
|
||||||
|
}
|
||||||
|
if want := "1,3"; joinIndexList(got) != want {
|
||||||
|
t.Fatalf("gpuIndices=%q want %q", joinIndexList(got), want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestNvidiaStressArchivePrefixByLoader(t *testing.T) {
|
func TestNvidiaStressArchivePrefixByLoader(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
|
|
||||||
|
|||||||
@@ -14,17 +14,17 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
// kmsgWatcher reads /dev/kmsg and accumulates hardware error events.
|
// kmsgWatcher reads /dev/kmsg and accumulates hardware error events.
|
||||||
// During an active SAT task window it records matching lines; on task finish
|
// It supports multiple concurrent SAT tasks: a shared event window is open
|
||||||
// it writes Warning status records to the component status DB.
|
// while any SAT task is running, and flushed when all tasks complete.
|
||||||
type kmsgWatcher struct {
|
type kmsgWatcher struct {
|
||||||
mu sync.Mutex
|
mu sync.Mutex
|
||||||
activeWindow *kmsgWindow
|
activeCount int // number of in-flight SAT tasks
|
||||||
|
window *kmsgWindow
|
||||||
statusDB *app.ComponentStatusDB
|
statusDB *app.ComponentStatusDB
|
||||||
}
|
}
|
||||||
|
|
||||||
type kmsgWindow struct {
|
type kmsgWindow struct {
|
||||||
taskID string
|
targets []string // SAT targets running concurrently
|
||||||
target string
|
|
||||||
startedAt time.Time
|
startedAt time.Time
|
||||||
seen map[kmsgEventKey]bool
|
seen map[kmsgEventKey]bool
|
||||||
events []kmsgEvent
|
events []kmsgEvent
|
||||||
@@ -71,7 +71,7 @@ func (w *kmsgWatcher) run() {
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
w.mu.Lock()
|
w.mu.Lock()
|
||||||
if w.activeWindow != nil {
|
if w.window != nil {
|
||||||
w.recordEvent(evt)
|
w.recordEvent(evt)
|
||||||
}
|
}
|
||||||
w.mu.Unlock()
|
w.mu.Unlock()
|
||||||
@@ -85,41 +85,49 @@ func (w *kmsgWatcher) run() {
|
|||||||
// Must be called with w.mu held.
|
// Must be called with w.mu held.
|
||||||
func (w *kmsgWatcher) recordEvent(evt kmsgEvent) {
|
func (w *kmsgWatcher) recordEvent(evt kmsgEvent) {
|
||||||
if len(evt.ids) == 0 {
|
if len(evt.ids) == 0 {
|
||||||
// Events without a device ID (e.g. MCE) — deduplicate by category.
|
|
||||||
key := kmsgEventKey{id: "", category: evt.category}
|
key := kmsgEventKey{id: "", category: evt.category}
|
||||||
if !w.activeWindow.seen[key] {
|
if !w.window.seen[key] {
|
||||||
w.activeWindow.seen[key] = true
|
w.window.seen[key] = true
|
||||||
w.activeWindow.events = append(w.activeWindow.events, evt)
|
w.window.events = append(w.window.events, evt)
|
||||||
}
|
}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
for _, id := range evt.ids {
|
for _, id := range evt.ids {
|
||||||
key := kmsgEventKey{id: id, category: evt.category}
|
key := kmsgEventKey{id: id, category: evt.category}
|
||||||
if !w.activeWindow.seen[key] {
|
if !w.window.seen[key] {
|
||||||
w.activeWindow.seen[key] = true
|
w.window.seen[key] = true
|
||||||
w.activeWindow.events = append(w.activeWindow.events, evt)
|
w.window.events = append(w.window.events, evt)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// NotifyTaskStarted opens a new event window for the given SAT task.
|
// NotifyTaskStarted increments the active task counter and opens a shared event window
|
||||||
|
// if this is the first task starting.
|
||||||
func (w *kmsgWatcher) NotifyTaskStarted(taskID, target string) {
|
func (w *kmsgWatcher) NotifyTaskStarted(taskID, target string) {
|
||||||
w.mu.Lock()
|
w.mu.Lock()
|
||||||
defer w.mu.Unlock()
|
defer w.mu.Unlock()
|
||||||
w.activeWindow = &kmsgWindow{
|
if w.activeCount == 0 {
|
||||||
taskID: taskID,
|
w.window = &kmsgWindow{
|
||||||
target: target,
|
startedAt: time.Now(),
|
||||||
startedAt: time.Now(),
|
seen: make(map[kmsgEventKey]bool),
|
||||||
seen: make(map[kmsgEventKey]bool),
|
}
|
||||||
|
}
|
||||||
|
w.activeCount++
|
||||||
|
if w.window != nil {
|
||||||
|
w.window.targets = append(w.window.targets, target)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// NotifyTaskFinished closes the event window and asynchronously writes status records.
|
// NotifyTaskFinished decrements the active task counter. When all tasks finish,
|
||||||
|
// it flushes the accumulated events to the status DB.
|
||||||
func (w *kmsgWatcher) NotifyTaskFinished(taskID string) {
|
func (w *kmsgWatcher) NotifyTaskFinished(taskID string) {
|
||||||
w.mu.Lock()
|
w.mu.Lock()
|
||||||
window := w.activeWindow
|
w.activeCount--
|
||||||
if window != nil && window.taskID == taskID {
|
var window *kmsgWindow
|
||||||
w.activeWindow = nil
|
if w.activeCount <= 0 {
|
||||||
|
w.activeCount = 0
|
||||||
|
window = w.window
|
||||||
|
w.window = nil
|
||||||
}
|
}
|
||||||
w.mu.Unlock()
|
w.mu.Unlock()
|
||||||
|
|
||||||
@@ -164,7 +172,7 @@ func (w *kmsgWatcher) flushWindow(window *kmsgWindow) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
for key, detail := range seen {
|
for key, detail := range seen {
|
||||||
detail = "kernel error during " + window.target + " SAT: " + truncate(detail, 120)
|
detail = "kernel error during SAT (" + strings.Join(window.targets, ",") + "): " + truncate(detail, 120)
|
||||||
w.statusDB.Record(key, source, "Warning", detail)
|
w.statusDB.Record(key, source, "Warning", detail)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -29,6 +29,7 @@ a{color:var(--accent);text-decoration:none}
|
|||||||
.sidebar{width:210px;min-height:100vh;background:#1b1c1d;flex-shrink:0;display:flex;flex-direction:column}
|
.sidebar{width:210px;min-height:100vh;background:#1b1c1d;flex-shrink:0;display:flex;flex-direction:column}
|
||||||
.sidebar-logo{padding:18px 16px 12px;font-size:18px;font-weight:700;color:#fff;letter-spacing:-.5px}
|
.sidebar-logo{padding:18px 16px 12px;font-size:18px;font-weight:700;color:#fff;letter-spacing:-.5px}
|
||||||
.sidebar-logo span{color:rgba(255,255,255,.5);font-weight:400;font-size:12px;display:block;margin-top:2px}
|
.sidebar-logo span{color:rgba(255,255,255,.5);font-weight:400;font-size:12px;display:block;margin-top:2px}
|
||||||
|
.sidebar-version{padding:0 16px 14px;font-size:11px;color:rgba(255,255,255,.45)}
|
||||||
.nav{flex:1}
|
.nav{flex:1}
|
||||||
.nav-item{display:block;padding:10px 16px;color:rgba(255,255,255,.7);font-size:13px;border-left:3px solid transparent;transition:all .15s}
|
.nav-item{display:block;padding:10px 16px;color:rgba(255,255,255,.7);font-size:13px;border-left:3px solid transparent;transition:all .15s}
|
||||||
.nav-item:hover{color:#fff;background:rgba(255,255,255,.08)}
|
.nav-item:hover{color:#fff;background:rgba(255,255,255,.08)}
|
||||||
@@ -96,6 +97,10 @@ func layoutNav(active string, buildLabel string) string {
|
|||||||
var b strings.Builder
|
var b strings.Builder
|
||||||
b.WriteString(`<aside class="sidebar">`)
|
b.WriteString(`<aside class="sidebar">`)
|
||||||
b.WriteString(`<div class="sidebar-logo">bee<span>hardware audit</span></div>`)
|
b.WriteString(`<div class="sidebar-logo">bee<span>hardware audit</span></div>`)
|
||||||
|
if strings.TrimSpace(buildLabel) == "" {
|
||||||
|
buildLabel = "dev"
|
||||||
|
}
|
||||||
|
b.WriteString(`<div class="sidebar-version">Version ` + html.EscapeString(buildLabel) + `</div>`)
|
||||||
b.WriteString(`<nav class="nav">`)
|
b.WriteString(`<nav class="nav">`)
|
||||||
for _, item := range items {
|
for _, item := range items {
|
||||||
cls := "nav-item"
|
cls := "nav-item"
|
||||||
@@ -110,11 +115,7 @@ func layoutNav(active string, buildLabel string) string {
|
|||||||
cls, item.href, item.label))
|
cls, item.href, item.label))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if strings.TrimSpace(buildLabel) == "" {
|
|
||||||
buildLabel = "dev"
|
|
||||||
}
|
|
||||||
b.WriteString(`</nav>`)
|
b.WriteString(`</nav>`)
|
||||||
b.WriteString(`<div style="padding:12px 16px;border-top:1px solid rgba(255,255,255,.08);font-size:11px;color:rgba(255,255,255,.45)">Build ` + html.EscapeString(buildLabel) + `</div>`)
|
|
||||||
b.WriteString(`</aside>`)
|
b.WriteString(`</aside>`)
|
||||||
return b.String()
|
return b.String()
|
||||||
}
|
}
|
||||||
@@ -1089,72 +1090,7 @@ func renderExport(exportDir string) string {
|
|||||||
</div></div>
|
</div></div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="card" style="margin-top:16px">
|
` + renderUSBExportCard()
|
||||||
<div class="card-head">Export to USB
|
|
||||||
<button class="btn btn-sm btn-secondary" onclick="usbRefresh()" style="margin-left:auto">↻ Refresh</button>
|
|
||||||
</div>
|
|
||||||
<div class="card-body">
|
|
||||||
<p style="font-size:13px;color:var(--muted);margin-bottom:12px">Write audit JSON or support bundle directly to a removable USB drive.</p>
|
|
||||||
<div id="usb-status" style="font-size:13px;color:var(--muted)">Scanning for USB devices...</div>
|
|
||||||
<div id="usb-targets" style="margin-top:12px"></div>
|
|
||||||
<div id="usb-msg" style="margin-top:10px;font-size:13px"></div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<script>
|
|
||||||
(function(){
|
|
||||||
function usbRefresh() {
|
|
||||||
document.getElementById('usb-status').textContent = 'Scanning...';
|
|
||||||
document.getElementById('usb-targets').innerHTML = '';
|
|
||||||
document.getElementById('usb-msg').textContent = '';
|
|
||||||
fetch('/api/export/usb').then(r=>r.json()).then(targets => {
|
|
||||||
const st = document.getElementById('usb-status');
|
|
||||||
const ct = document.getElementById('usb-targets');
|
|
||||||
if (!targets || targets.length === 0) {
|
|
||||||
st.textContent = 'No removable USB devices found.';
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
st.textContent = targets.length + ' device(s) found:';
|
|
||||||
ct.innerHTML = '<table><tr><th>Device</th><th>FS</th><th>Size</th><th>Label</th><th>Model</th><th>Actions</th></tr>' +
|
|
||||||
targets.map(t => {
|
|
||||||
const dev = t.device || '';
|
|
||||||
const label = t.label || '';
|
|
||||||
const model = t.model || '';
|
|
||||||
return '<tr>' +
|
|
||||||
'<td style="font-family:monospace">'+dev+'</td>' +
|
|
||||||
'<td>'+t.fs_type+'</td>' +
|
|
||||||
'<td>'+t.size+'</td>' +
|
|
||||||
'<td>'+label+'</td>' +
|
|
||||||
'<td style="font-size:12px;color:var(--muted)">'+model+'</td>' +
|
|
||||||
'<td style="white-space:nowrap">' +
|
|
||||||
'<button class="btn btn-sm btn-primary" onclick="usbExport(\'audit\','+JSON.stringify(t)+')">Audit JSON</button> ' +
|
|
||||||
'<button class="btn btn-sm btn-secondary" onclick="usbExport(\'bundle\','+JSON.stringify(t)+')">Support Bundle</button>' +
|
|
||||||
'</td></tr>';
|
|
||||||
}).join('') + '</table>';
|
|
||||||
}).catch(e => {
|
|
||||||
document.getElementById('usb-status').textContent = 'Error: ' + e;
|
|
||||||
});
|
|
||||||
}
|
|
||||||
window.usbExport = function(type, target) {
|
|
||||||
const msg = document.getElementById('usb-msg');
|
|
||||||
msg.style.color = 'var(--muted)';
|
|
||||||
msg.textContent = 'Exporting to ' + (target.device||'') + '...';
|
|
||||||
fetch('/api/export/usb/'+type, {
|
|
||||||
method: 'POST',
|
|
||||||
headers: {'Content-Type':'application/json'},
|
|
||||||
body: JSON.stringify(target)
|
|
||||||
}).then(r=>r.json()).then(d => {
|
|
||||||
if (d.error) { msg.style.color='var(--err,red)'; msg.textContent = 'Error: '+d.error; return; }
|
|
||||||
msg.style.color = 'var(--ok,green)';
|
|
||||||
msg.textContent = d.message || 'Done.';
|
|
||||||
}).catch(e => {
|
|
||||||
msg.style.color = 'var(--err,red)';
|
|
||||||
msg.textContent = 'Error: '+e;
|
|
||||||
});
|
|
||||||
};
|
|
||||||
window.usbRefresh = usbRefresh;
|
|
||||||
usbRefresh();
|
|
||||||
})();
|
|
||||||
</script>`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func listExportFiles(exportDir string) ([]string, error) {
|
func listExportFiles(exportDir string) ([]string, error) {
|
||||||
@@ -1224,6 +1160,77 @@ window.supportBundleDownload = function() {
|
|||||||
</script>`
|
</script>`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func renderUSBExportCard() string {
|
||||||
|
return `<div class="card" style="margin-top:16px">
|
||||||
|
<div class="card-head">Export to USB
|
||||||
|
<button class="btn btn-sm btn-secondary" onclick="usbRefresh()" style="margin-left:auto">↻ Refresh</button>
|
||||||
|
</div>
|
||||||
|
<div class="card-body">` + renderUSBExportInline() + `</div>
|
||||||
|
</div>`
|
||||||
|
}
|
||||||
|
|
||||||
|
func renderUSBExportInline() string {
|
||||||
|
return `<p style="font-size:13px;color:var(--muted);margin-bottom:12px">Write audit JSON or support bundle directly to a removable USB drive.</p>
|
||||||
|
<div id="usb-status" style="font-size:13px;color:var(--muted)">Scanning for USB devices...</div>
|
||||||
|
<div id="usb-targets" style="margin-top:12px"></div>
|
||||||
|
<div id="usb-msg" style="margin-top:10px;font-size:13px"></div>
|
||||||
|
<script>
|
||||||
|
(function(){
|
||||||
|
function usbRefresh() {
|
||||||
|
document.getElementById('usb-status').textContent = 'Scanning...';
|
||||||
|
document.getElementById('usb-targets').innerHTML = '';
|
||||||
|
document.getElementById('usb-msg').textContent = '';
|
||||||
|
fetch('/api/export/usb').then(r=>r.json()).then(targets => {
|
||||||
|
const st = document.getElementById('usb-status');
|
||||||
|
const ct = document.getElementById('usb-targets');
|
||||||
|
if (!targets || targets.length === 0) {
|
||||||
|
st.textContent = 'No removable USB devices found.';
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
st.textContent = targets.length + ' device(s) found:';
|
||||||
|
ct.innerHTML = '<table><tr><th>Device</th><th>FS</th><th>Size</th><th>Label</th><th>Model</th><th>Actions</th></tr>' +
|
||||||
|
targets.map(t => {
|
||||||
|
const dev = t.device || '';
|
||||||
|
const label = t.label || '';
|
||||||
|
const model = t.model || '';
|
||||||
|
return '<tr>' +
|
||||||
|
'<td style="font-family:monospace">'+dev+'</td>' +
|
||||||
|
'<td>'+t.fs_type+'</td>' +
|
||||||
|
'<td>'+t.size+'</td>' +
|
||||||
|
'<td>'+label+'</td>' +
|
||||||
|
'<td style="font-size:12px;color:var(--muted)">'+model+'</td>' +
|
||||||
|
'<td style="white-space:nowrap">' +
|
||||||
|
'<button class="btn btn-sm btn-primary" onclick="usbExport(\'audit\','+JSON.stringify(t)+')">Audit JSON</button> ' +
|
||||||
|
'<button class="btn btn-sm btn-secondary" onclick="usbExport(\'bundle\','+JSON.stringify(t)+')">Support Bundle</button>' +
|
||||||
|
'</td></tr>';
|
||||||
|
}).join('') + '</table>';
|
||||||
|
}).catch(e => {
|
||||||
|
document.getElementById('usb-status').textContent = 'Error: ' + e;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
window.usbExport = function(type, target) {
|
||||||
|
const msg = document.getElementById('usb-msg');
|
||||||
|
msg.style.color = 'var(--muted)';
|
||||||
|
msg.textContent = 'Exporting to ' + (target.device||'') + '...';
|
||||||
|
fetch('/api/export/usb/'+type, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {'Content-Type':'application/json'},
|
||||||
|
body: JSON.stringify(target)
|
||||||
|
}).then(r=>r.json()).then(d => {
|
||||||
|
if (d.error) { msg.style.color='var(--err,red)'; msg.textContent = 'Error: '+d.error; return; }
|
||||||
|
msg.style.color = 'var(--ok,green)';
|
||||||
|
msg.textContent = d.message || 'Done.';
|
||||||
|
}).catch(e => {
|
||||||
|
msg.style.color = 'var(--err,red)';
|
||||||
|
msg.textContent = 'Error: '+e;
|
||||||
|
});
|
||||||
|
};
|
||||||
|
window.usbRefresh = usbRefresh;
|
||||||
|
usbRefresh();
|
||||||
|
})();
|
||||||
|
</script>`
|
||||||
|
}
|
||||||
|
|
||||||
// ── Display Resolution ────────────────────────────────────────────────────────
|
// ── Display Resolution ────────────────────────────────────────────────────────
|
||||||
|
|
||||||
func renderDisplayInline() string {
|
func renderDisplayInline() string {
|
||||||
@@ -1325,6 +1332,10 @@ function installToRAM() {
|
|||||||
<div class="card"><div class="card-head">Support Bundle</div><div class="card-body">
|
<div class="card"><div class="card-head">Support Bundle</div><div class="card-body">
|
||||||
<p style="font-size:13px;color:var(--muted);margin-bottom:12px">Downloads a tar.gz archive of all audit files, SAT results, and logs.</p>
|
<p style="font-size:13px;color:var(--muted);margin-bottom:12px">Downloads a tar.gz archive of all audit files, SAT results, and logs.</p>
|
||||||
` + renderSupportBundleInline() + `
|
` + renderSupportBundleInline() + `
|
||||||
|
<div style="border-top:1px solid var(--border);margin-top:16px;padding-top:16px">
|
||||||
|
<div style="font-weight:600;margin-bottom:8px">Export to USB</div>
|
||||||
|
` + renderUSBExportInline() + `
|
||||||
|
</div>
|
||||||
</div></div>
|
</div></div>
|
||||||
|
|
||||||
<div class="card"><div class="card-head">Tool Check <button class="btn btn-sm btn-secondary" onclick="checkTools()" style="margin-left:auto">↻ Check</button></div>
|
<div class="card"><div class="card-head">Tool Check <button class="btn btn-sm btn-secondary" onclick="checkTools()" style="margin-left:auto">↻ Check</button></div>
|
||||||
@@ -1578,7 +1589,11 @@ func renderTasks() string {
|
|||||||
<div class="card-head" style="padding:14px 18px;font-size:14px">Logs — <span id="task-log-title"></span>
|
<div class="card-head" style="padding:14px 18px;font-size:14px">Logs — <span id="task-log-title"></span>
|
||||||
<button class="btn btn-sm btn-secondary" onclick="closeTaskLog()" style="margin-left:auto">✕</button>
|
<button class="btn btn-sm btn-secondary" onclick="closeTaskLog()" style="margin-left:auto">✕</button>
|
||||||
</div>
|
</div>
|
||||||
<div class="card-body" style="padding:16px;flex:1;min-height:0"><div id="task-log-terminal" class="terminal" style="height:100%;max-height:none"></div></div>
|
<div class="card-body" style="padding:16px;flex:1;min-height:0;overflow:hidden">
|
||||||
|
<div style="height:100%;min-height:0;overflow:auto">
|
||||||
|
<div id="task-log-terminal" class="terminal" style="margin:0;max-height:none;overflow:visible"></div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<script>
|
<script>
|
||||||
|
|||||||
@@ -275,9 +275,10 @@ func TestRootRendersDashboard(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
handler := NewHandler(HandlerOptions{
|
handler := NewHandler(HandlerOptions{
|
||||||
Title: "Bee Hardware Audit",
|
Title: "Bee Hardware Audit",
|
||||||
AuditPath: path,
|
BuildLabel: "1.2.3",
|
||||||
ExportDir: exportDir,
|
AuditPath: path,
|
||||||
|
ExportDir: exportDir,
|
||||||
})
|
})
|
||||||
|
|
||||||
first := httptest.NewRecorder()
|
first := httptest.NewRecorder()
|
||||||
@@ -292,6 +293,11 @@ func TestRootRendersDashboard(t *testing.T) {
|
|||||||
if !strings.Contains(first.Body.String(), `/viewer`) {
|
if !strings.Contains(first.Body.String(), `/viewer`) {
|
||||||
t.Fatalf("first body missing viewer link: %s", first.Body.String())
|
t.Fatalf("first body missing viewer link: %s", first.Body.String())
|
||||||
}
|
}
|
||||||
|
versionIdx := strings.Index(first.Body.String(), `Version 1.2.3`)
|
||||||
|
navIdx := strings.Index(first.Body.String(), `href="/"`)
|
||||||
|
if versionIdx == -1 || navIdx == -1 || versionIdx > navIdx {
|
||||||
|
t.Fatalf("version should render near top of sidebar before nav links: %s", first.Body.String())
|
||||||
|
}
|
||||||
if got := first.Header().Get("Cache-Control"); got != "no-store" {
|
if got := first.Header().Get("Cache-Control"); got != "no-store" {
|
||||||
t.Fatalf("first cache-control=%q", got)
|
t.Fatalf("first cache-control=%q", got)
|
||||||
}
|
}
|
||||||
@@ -395,6 +401,46 @@ func TestToolsPageRendersRestartGPUDriversButton(t *testing.T) {
|
|||||||
if !strings.Contains(body, `id="boot-source-text"`) {
|
if !strings.Contains(body, `id="boot-source-text"`) {
|
||||||
t.Fatalf("tools page missing boot source field: %s", body)
|
t.Fatalf("tools page missing boot source field: %s", body)
|
||||||
}
|
}
|
||||||
|
if !strings.Contains(body, `Export to USB`) {
|
||||||
|
t.Fatalf("tools page missing export to usb section: %s", body)
|
||||||
|
}
|
||||||
|
if !strings.Contains(body, `Support Bundle</button>`) {
|
||||||
|
t.Fatalf("tools page missing support bundle usb button: %s", body)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestTasksPageRendersScrollableLogModal(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
path := filepath.Join(dir, "audit.json")
|
||||||
|
exportDir := filepath.Join(dir, "export")
|
||||||
|
if err := os.MkdirAll(exportDir, 0755); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
if err := os.WriteFile(path, []byte(`{"collected_at":"2026-03-15T00:00:00Z"}`), 0644); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
handler := NewHandler(HandlerOptions{
|
||||||
|
Title: "Bee Hardware Audit",
|
||||||
|
AuditPath: path,
|
||||||
|
ExportDir: exportDir,
|
||||||
|
})
|
||||||
|
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/tasks", nil))
|
||||||
|
if rec.Code != http.StatusOK {
|
||||||
|
t.Fatalf("status=%d", rec.Code)
|
||||||
|
}
|
||||||
|
body := rec.Body.String()
|
||||||
|
if !strings.Contains(body, `height:calc(100vh - 32px)`) {
|
||||||
|
t.Fatalf("tasks page missing bounded log modal height: %s", body)
|
||||||
|
}
|
||||||
|
if !strings.Contains(body, `flex:1;min-height:0;overflow:hidden`) {
|
||||||
|
t.Fatalf("tasks page missing log modal overflow guard: %s", body)
|
||||||
|
}
|
||||||
|
if !strings.Contains(body, `height:100%;min-height:0;overflow:auto`) {
|
||||||
|
t.Fatalf("tasks page missing scrollable log wrapper: %s", body)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestViewerRendersLatestSnapshot(t *testing.T) {
|
func TestViewerRendersLatestSnapshot(t *testing.T) {
|
||||||
|
|||||||
@@ -393,11 +393,13 @@ func (q *taskQueue) worker() {
|
|||||||
for {
|
for {
|
||||||
<-q.trigger
|
<-q.trigger
|
||||||
setCPUGovernor("performance")
|
setCPUGovernor("performance")
|
||||||
|
|
||||||
|
// Drain all pending tasks and start them in parallel.
|
||||||
|
q.mu.Lock()
|
||||||
|
var batch []*Task
|
||||||
for {
|
for {
|
||||||
q.mu.Lock()
|
|
||||||
t := q.nextPending()
|
t := q.nextPending()
|
||||||
if t == nil {
|
if t == nil {
|
||||||
q.mu.Unlock()
|
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
now := time.Now()
|
now := time.Now()
|
||||||
@@ -406,37 +408,58 @@ func (q *taskQueue) worker() {
|
|||||||
t.DoneAt = nil
|
t.DoneAt = nil
|
||||||
t.ErrMsg = ""
|
t.ErrMsg = ""
|
||||||
j := newTaskJobState(t.LogPath)
|
j := newTaskJobState(t.LogPath)
|
||||||
ctx, cancel := context.WithCancel(context.Background())
|
|
||||||
j.cancel = cancel
|
|
||||||
t.job = j
|
t.job = j
|
||||||
|
batch = append(batch, t)
|
||||||
|
}
|
||||||
|
if len(batch) > 0 {
|
||||||
q.persistLocked()
|
q.persistLocked()
|
||||||
q.mu.Unlock()
|
}
|
||||||
|
q.mu.Unlock()
|
||||||
|
|
||||||
if q.kmsgWatcher != nil && isSATTarget(t.Target) {
|
var wg sync.WaitGroup
|
||||||
q.kmsgWatcher.NotifyTaskStarted(t.ID, t.Target)
|
for _, t := range batch {
|
||||||
}
|
t := t
|
||||||
|
j := t.job
|
||||||
|
taskCtx, taskCancel := context.WithCancel(context.Background())
|
||||||
|
j.cancel = taskCancel
|
||||||
|
wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
|
||||||
q.runTask(t, j, ctx)
|
if q.kmsgWatcher != nil && isSATTarget(t.Target) {
|
||||||
|
q.kmsgWatcher.NotifyTaskStarted(t.ID, t.Target)
|
||||||
if q.kmsgWatcher != nil {
|
|
||||||
q.kmsgWatcher.NotifyTaskFinished(t.ID)
|
|
||||||
}
|
|
||||||
|
|
||||||
q.mu.Lock()
|
|
||||||
now2 := time.Now()
|
|
||||||
t.DoneAt = &now2
|
|
||||||
if t.Status == TaskRunning { // not cancelled externally
|
|
||||||
if j.err != "" {
|
|
||||||
t.Status = TaskFailed
|
|
||||||
t.ErrMsg = j.err
|
|
||||||
} else {
|
|
||||||
t.Status = TaskDone
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
q.runTask(t, j, taskCtx)
|
||||||
|
|
||||||
|
if q.kmsgWatcher != nil {
|
||||||
|
q.kmsgWatcher.NotifyTaskFinished(t.ID)
|
||||||
|
}
|
||||||
|
|
||||||
|
q.mu.Lock()
|
||||||
|
now2 := time.Now()
|
||||||
|
t.DoneAt = &now2
|
||||||
|
if t.Status == TaskRunning {
|
||||||
|
if j.err != "" {
|
||||||
|
t.Status = TaskFailed
|
||||||
|
t.ErrMsg = j.err
|
||||||
|
} else {
|
||||||
|
t.Status = TaskDone
|
||||||
|
}
|
||||||
|
}
|
||||||
|
q.persistLocked()
|
||||||
|
q.mu.Unlock()
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
wg.Wait()
|
||||||
|
|
||||||
|
if len(batch) > 0 {
|
||||||
|
q.mu.Lock()
|
||||||
q.prune()
|
q.prune()
|
||||||
q.persistLocked()
|
q.persistLocked()
|
||||||
q.mu.Unlock()
|
q.mu.Unlock()
|
||||||
}
|
}
|
||||||
|
|
||||||
setCPUGovernor("powersave")
|
setCPUGovernor("powersave")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
16
audit/scripts/resolve-version.sh
Executable file
16
audit/scripts/resolve-version.sh
Executable file
@@ -0,0 +1,16 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
set -eu
|
||||||
|
|
||||||
|
tag="$(git describe --tags --match 'v[0-9]*' --abbrev=7 --dirty 2>/dev/null || true)"
|
||||||
|
|
||||||
|
case "${tag}" in
|
||||||
|
v*)
|
||||||
|
printf '%s\n' "${tag#v}"
|
||||||
|
;;
|
||||||
|
"")
|
||||||
|
printf 'dev\n'
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
printf '%s\n' "${tag}"
|
||||||
|
;;
|
||||||
|
esac
|
||||||
@@ -54,15 +54,8 @@ resolve_audit_version() {
|
|||||||
return 0
|
return 0
|
||||||
fi
|
fi
|
||||||
|
|
||||||
tag="$(git -C "${REPO_ROOT}" describe --tags --match 'audit/v*' --abbrev=7 --dirty 2>/dev/null || true)"
|
tag="$(git -C "${REPO_ROOT}" describe --tags --match 'v[0-9]*' --abbrev=7 --dirty 2>/dev/null || true)"
|
||||||
if [ -z "${tag}" ]; then
|
|
||||||
tag="$(git -C "${REPO_ROOT}" describe --tags --match 'v[0-9]*' --abbrev=7 --dirty 2>/dev/null || true)"
|
|
||||||
fi
|
|
||||||
case "${tag}" in
|
case "${tag}" in
|
||||||
audit/v*)
|
|
||||||
echo "${tag#audit/v}"
|
|
||||||
return 0
|
|
||||||
;;
|
|
||||||
v*)
|
v*)
|
||||||
echo "${tag#v}"
|
echo "${tag#v}"
|
||||||
return 0
|
return 0
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
[Unit]
|
[Unit]
|
||||||
Description=Bee: hardware audit
|
Description=Bee: hardware audit
|
||||||
After=bee-preflight.service bee-network.service bee-nvidia.service
|
After=bee-preflight.service bee-network.service bee-nvidia.service
|
||||||
Before=bee-web.service
|
|
||||||
|
|
||||||
[Service]
|
[Service]
|
||||||
Type=oneshot
|
Type=oneshot
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
[Unit]
|
[Unit]
|
||||||
Description=Bee: hardware audit web viewer
|
Description=Bee: hardware audit web viewer
|
||||||
After=bee-audit.service
|
|
||||||
|
|
||||||
[Service]
|
[Service]
|
||||||
Type=simple
|
Type=simple
|
||||||
|
|||||||
@@ -1,10 +1,11 @@
|
|||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
set -eu
|
set -eu
|
||||||
|
|
||||||
SECONDS=300
|
DURATION_SEC=300
|
||||||
DEVICES=""
|
DEVICES=""
|
||||||
EXCLUDE=""
|
EXCLUDE=""
|
||||||
FORMAT=""
|
FORMAT=""
|
||||||
|
TEST_SLICE_SECONDS=300
|
||||||
JOHN_DIR="/usr/local/lib/bee/john/run"
|
JOHN_DIR="/usr/local/lib/bee/john/run"
|
||||||
JOHN_BIN="${JOHN_DIR}/john"
|
JOHN_BIN="${JOHN_DIR}/john"
|
||||||
export OCL_ICD_VENDORS="/etc/OpenCL/vendors"
|
export OCL_ICD_VENDORS="/etc/OpenCL/vendors"
|
||||||
@@ -116,7 +117,7 @@ ensure_opencl_ready() {
|
|||||||
|
|
||||||
while [ "$#" -gt 0 ]; do
|
while [ "$#" -gt 0 ]; do
|
||||||
case "$1" in
|
case "$1" in
|
||||||
--seconds|-t) [ "$#" -ge 2 ] || usage; SECONDS="$2"; shift 2 ;;
|
--seconds|-t) [ "$#" -ge 2 ] || usage; DURATION_SEC="$2"; shift 2 ;;
|
||||||
--devices) [ "$#" -ge 2 ] || usage; DEVICES="$2"; shift 2 ;;
|
--devices) [ "$#" -ge 2 ] || usage; DEVICES="$2"; shift 2 ;;
|
||||||
--exclude) [ "$#" -ge 2 ] || usage; EXCLUDE="$2"; shift 2 ;;
|
--exclude) [ "$#" -ge 2 ] || usage; EXCLUDE="$2"; shift 2 ;;
|
||||||
--format) [ "$#" -ge 2 ] || usage; FORMAT="$2"; shift 2 ;;
|
--format) [ "$#" -ge 2 ] || usage; FORMAT="$2"; shift 2 ;;
|
||||||
@@ -189,14 +190,51 @@ CHOSEN_FORMAT=$(choose_format) || {
|
|||||||
exit 1
|
exit 1
|
||||||
}
|
}
|
||||||
|
|
||||||
echo "format=${CHOSEN_FORMAT}"
|
run_john_loop() {
|
||||||
|
opencl_id="$1"
|
||||||
|
deadline="$2"
|
||||||
|
round=0
|
||||||
|
while :; do
|
||||||
|
now=$(date +%s)
|
||||||
|
remaining=$((deadline - now))
|
||||||
|
if [ "${remaining}" -le 0 ]; then
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
round=$((round + 1))
|
||||||
|
slice="${remaining}"
|
||||||
|
if [ "${slice}" -gt "${TEST_SLICE_SECONDS}" ]; then
|
||||||
|
slice="${TEST_SLICE_SECONDS}"
|
||||||
|
fi
|
||||||
|
echo "device=${opencl_id} round=${round} remaining_sec=${remaining} slice_sec=${slice}"
|
||||||
|
./john --test="${slice}" --format="${CHOSEN_FORMAT}" --devices="${opencl_id}" || return 1
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
PIDS=""
|
PIDS=""
|
||||||
|
cleanup() {
|
||||||
|
rc=$?
|
||||||
|
trap - EXIT INT TERM
|
||||||
|
for pid in ${PIDS}; do
|
||||||
|
kill "${pid}" 2>/dev/null || true
|
||||||
|
done
|
||||||
|
for pid in ${PIDS}; do
|
||||||
|
wait "${pid}" 2>/dev/null || true
|
||||||
|
done
|
||||||
|
exit "${rc}"
|
||||||
|
}
|
||||||
|
trap cleanup EXIT INT TERM
|
||||||
|
|
||||||
|
echo "format=${CHOSEN_FORMAT}"
|
||||||
|
echo "target_seconds=${DURATION_SEC}"
|
||||||
|
echo "slice_seconds=${TEST_SLICE_SECONDS}"
|
||||||
|
DEADLINE=$(( $(date +%s) + DURATION_SEC ))
|
||||||
_first=1
|
_first=1
|
||||||
for opencl_id in $(echo "${JOHN_DEVICES}" | tr ',' ' '); do
|
for opencl_id in $(echo "${JOHN_DEVICES}" | tr ',' ' '); do
|
||||||
[ "${_first}" = "1" ] || sleep 3
|
[ "${_first}" = "1" ] || sleep 3
|
||||||
_first=0
|
_first=0
|
||||||
./john --test="${SECONDS}" --format="${CHOSEN_FORMAT}" --devices="${opencl_id}" &
|
run_john_loop "${opencl_id}" "${DEADLINE}" &
|
||||||
PIDS="${PIDS} $!"
|
pid=$!
|
||||||
|
PIDS="${PIDS} ${pid}"
|
||||||
done
|
done
|
||||||
FAIL=0
|
FAIL=0
|
||||||
for pid in ${PIDS}; do
|
for pid in ${PIDS}; do
|
||||||
|
|||||||
@@ -128,13 +128,32 @@ ldconfig 2>/dev/null || true
|
|||||||
log "ldconfig refreshed"
|
log "ldconfig refreshed"
|
||||||
|
|
||||||
# Start DCGM host engine so dcgmi can discover GPUs.
|
# Start DCGM host engine so dcgmi can discover GPUs.
|
||||||
# nv-hostengine must run before any dcgmi command — without it, dcgmi reports
|
# nv-hostengine must run after the NVIDIA modules and device nodes are ready.
|
||||||
# "group is empty" even when GPUs and modules are present.
|
# If it started too early (for example via systemd before bee-nvidia-load), it can
|
||||||
# Skip if already running (e.g. started by a dcgm systemd service or prior boot).
|
# keep a stale empty inventory and dcgmi diag later reports no testable entities.
|
||||||
if command -v nv-hostengine >/dev/null 2>&1; then
|
if command -v nv-hostengine >/dev/null 2>&1; then
|
||||||
if pgrep -x nv-hostengine >/dev/null 2>&1; then
|
if pgrep -x nv-hostengine >/dev/null 2>&1; then
|
||||||
log "nv-hostengine already running — skipping"
|
if command -v pkill >/dev/null 2>&1; then
|
||||||
else
|
pkill -x nv-hostengine >/dev/null 2>&1 || true
|
||||||
|
tries=0
|
||||||
|
while pgrep -x nv-hostengine >/dev/null 2>&1; do
|
||||||
|
tries=$((tries + 1))
|
||||||
|
if [ "${tries}" -ge 10 ]; then
|
||||||
|
log "WARN: nv-hostengine is still running after restart request"
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
if pgrep -x nv-hostengine >/dev/null 2>&1; then
|
||||||
|
log "WARN: keeping existing nv-hostengine process"
|
||||||
|
else
|
||||||
|
log "nv-hostengine restarted"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
log "WARN: pkill not found — cannot refresh nv-hostengine inventory"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
if ! pgrep -x nv-hostengine >/dev/null 2>&1; then
|
||||||
nv-hostengine
|
nv-hostengine
|
||||||
log "nv-hostengine started"
|
log "nv-hostengine started"
|
||||||
fi
|
fi
|
||||||
|
|||||||
Reference in New Issue
Block a user