diff --git a/audit/internal/app/app.go b/audit/internal/app/app.go index e93e337..85fbda6 100644 --- a/audit/internal/app/app.go +++ b/audit/internal/app/app.go @@ -139,6 +139,7 @@ type satRunner interface { RunFanStressTest(ctx context.Context, baseDir string, opts platform.FanStressOptions) (string, error) RunPlatformStress(ctx context.Context, baseDir string, opts platform.PlatformStressOptions, logFunc func(string)) (string, error) RunNCCLTests(ctx context.Context, baseDir string, logFunc func(string)) (string, error) + RunHPL(ctx context.Context, baseDir string, opts platform.HPLOptions, logFunc func(string)) (string, *platform.HPLResult, error) } type runtimeChecker interface { @@ -737,6 +738,13 @@ func (a *App) RunNCCLTestsResult(ctx context.Context) (ActionResult, error) { return ActionResult{Title: "NCCL bandwidth test", Body: body}, err } +func (a *App) RunHPL(ctx context.Context, baseDir string, opts platform.HPLOptions, logFunc func(string)) (string, *platform.HPLResult, error) { + if a == nil { + return "", nil, fmt.Errorf("app not configured") + } + return a.sat.RunHPL(ctx, baseDir, opts, logFunc) +} + func (a *App) RunFanStressTestResult(ctx context.Context, opts platform.FanStressOptions) (ActionResult, error) { path, err := a.RunFanStressTest(ctx, "", opts) body := formatFanStressResult(path) diff --git a/audit/internal/app/app_test.go b/audit/internal/app/app_test.go index b809ce9..dea2a5a 100644 --- a/audit/internal/app/app_test.go +++ b/audit/internal/app/app_test.go @@ -282,6 +282,9 @@ func (f fakeSAT) RunPlatformStress(_ context.Context, _ string, _ platform.Platf func (f fakeSAT) RunNCCLTests(_ context.Context, _ string, _ func(string)) (string, error) { return "", nil } +func (f fakeSAT) RunHPL(_ context.Context, _ string, _ platform.HPLOptions, _ func(string)) (string, *platform.HPLResult, error) { + return "", nil, nil +} func TestNetworkStatusFormatsInterfacesAndRoute(t *testing.T) { t.Parallel() diff --git a/audit/internal/platform/hpl.go b/audit/internal/platform/hpl.go new file mode 100644 index 0000000..0ee4143 --- /dev/null +++ b/audit/internal/platform/hpl.go @@ -0,0 +1,142 @@ +package platform + +import ( + "context" + "fmt" + "os" + "os/exec" + "path/filepath" + "strconv" + "strings" + "time" +) + +// HPLOptions configures the HPL (LINPACK) benchmark run. +type HPLOptions struct { + MemFraction float64 // fraction of RAM to use (default 0.80) + NB int // block size (default 256) +} + +// HPLResult holds the parsed result of an HPL run. +type HPLResult struct { + N int // matrix dimension + NB int // block size + P int // process grid rows + Q int // process grid cols + TimeSec float64 // wall time in seconds + GFlops float64 // achieved performance + Residual float64 // backward error residual (from HPL verification line) + Status string // "PASSED" or "FAILED" + RawOutput string // full xhpl output +} + +func applyHPLDefaults(opts *HPLOptions) { + if opts.MemFraction <= 0 || opts.MemFraction > 1 { + opts.MemFraction = 0.80 + } + if opts.NB <= 0 { + opts.NB = 256 + } +} + +// RunHPL runs bee-hpl and returns parsed results plus a tar.gz artifact path. +func (s *System) RunHPL(ctx context.Context, baseDir string, opts HPLOptions, logFunc func(string)) (string, *HPLResult, error) { + applyHPLDefaults(&opts) + + if baseDir == "" { + baseDir = "/var/log/bee-sat" + } + ts := time.Now().UTC().Format("20060102-150405") + runDir := filepath.Join(baseDir, "hpl-"+ts) + if err := os.MkdirAll(runDir, 0755); err != nil { + return "", nil, fmt.Errorf("mkdir %s: %w", runDir, err) + } + + logPath := filepath.Join(runDir, "hpl.log") + + cmd := []string{ + "bee-hpl", + "--mem-fraction", strconv.FormatFloat(opts.MemFraction, 'f', 2, 64), + "--nb", strconv.Itoa(opts.NB), + } + + if logFunc != nil { + logFunc(fmt.Sprintf("HPL: N will be auto-sized to %.0f%% of RAM, NB=%d", opts.MemFraction*100, opts.NB)) + } + + out, err := runSATCommandCtx(ctx, "", "hpl", cmd, nil, logFunc) + _ = os.WriteFile(logPath, out, 0644) + + result := parseHPLOutput(string(out)) + result.RawOutput = string(out) + + if err != nil && err != context.Canceled { + return "", result, fmt.Errorf("bee-hpl failed: %w", err) + } + if err == nil && result.GFlops <= 0 { + return "", result, fmt.Errorf("HPL completed but no Gflops result found in output") + } + + // Write summary + summary := fmt.Sprintf("N=%d NB=%d time=%.2fs gflops=%.3f status=%s\n", + result.N, result.NB, result.TimeSec, result.GFlops, result.Status) + _ = os.WriteFile(filepath.Join(runDir, "summary.txt"), []byte(summary), 0644) + + if logFunc != nil { + logFunc(fmt.Sprintf("HPL result: N=%d NB=%d %.2fs %.3f Gflops %s", + result.N, result.NB, result.TimeSec, result.GFlops, result.Status)) + } + + ts2 := time.Now().UTC().Format("20060102-150405") + archive := filepath.Join(baseDir, "hpl-"+ts2+".tar.gz") + if archErr := createTarGz(archive, runDir); archErr != nil { + return runDir, result, err + } + return archive, result, err +} + +// parseHPLOutput extracts N, NB, time, and Gflops from standard HPL output. +// +// HPL prints a result line of the form: +// +// WR00L2L2 45312 256 1 1 1234.56 5.678e+01 +// T/V N NB P Q Time Gflops +func parseHPLOutput(output string) *HPLResult { + result := &HPLResult{Status: "FAILED"} + for _, line := range strings.Split(output, "\n") { + line = strings.TrimSpace(line) + // Result line starts with WR + if strings.HasPrefix(line, "WR") { + fields := strings.Fields(line) + // WR00L2L2 N NB P Q Time Gflops + if len(fields) >= 7 { + result.N, _ = strconv.Atoi(fields[1]) + result.NB, _ = strconv.Atoi(fields[2]) + result.P, _ = strconv.Atoi(fields[3]) + result.Q, _ = strconv.Atoi(fields[4]) + result.TimeSec, _ = strconv.ParseFloat(fields[5], 64) + result.GFlops, _ = strconv.ParseFloat(fields[6], 64) + } + } + // Verification line: "||Ax-b||_oo/(eps*(||A||_oo*||x||_oo+||b||_oo)*N)= ... PASSED" + if strings.Contains(line, "PASSED") { + result.Status = "PASSED" + fields := strings.Fields(line) + for i, f := range fields { + if f == "PASSED" && i > 0 { + result.Residual, _ = strconv.ParseFloat(fields[i-1], 64) + } + } + } + } + return result +} + +// hplAvailable returns true if bee-hpl and xhpl are present and executable. +func hplAvailable() bool { + if _, err := exec.LookPath("bee-hpl"); err != nil { + return false + } + _, err := os.Stat("/usr/local/lib/bee/xhpl") + return err == nil +} diff --git a/audit/internal/webui/pages.go b/audit/internal/webui/pages.go index 7c9467a..38cabce 100644 --- a/audit/internal/webui/pages.go +++ b/audit/internal/webui/pages.go @@ -1143,6 +1143,16 @@ func renderValidate(opts HandlerOptions) string { `` + `
+` + `
` + + renderSATCard("hpl", "LINPACK (HPL)", "runSAT('hpl')", "", renderValidateCardBody( + ``, + `Standard High Performance LINPACK benchmark. Measures sustained FP64 GFLOPS and memory bandwidth of the CPU subsystem. Uses 80% of available RAM. Pass/fail based on HPL residual check.`, + `xhpl (HPL 2.3, OpenBLAS)`, + `Skipped in Validate mode. Runs in Stress mode only. Runtime scales with RAM — expect 5–30 min.

Only runs in Stress mode. Switch mode above to enable in Run All.

`, + )) + + `
` + + `
+
` + renderSATCard("amd", "AMD GPU", "runAMDValidateSet()", "", renderValidateCardBody( inv.AMD, `Runs the selected AMD checks only. GPU Validate collects inventory; MEM Integrity uses the RVS MEM module; MEM Bandwidth uses rocm-bandwidth-test and the RVS BABEL module.`, @@ -1178,6 +1188,7 @@ function satModeChanged() { {card: 'sat-card-nvidia-pulse', hint: 'sat-pt-mode-hint'}, {card: 'sat-card-nvidia-interconnect', hint: 'sat-ni-mode-hint'}, {card: 'sat-card-nvidia-bandwidth', hint: 'sat-nb-mode-hint'}, + {card: 'sat-card-hpl', hint: 'sat-hpl-mode-hint'}, ].forEach(function(item) { const card = document.getElementById(item.card); if (card) { @@ -1188,7 +1199,7 @@ function satModeChanged() { }); } function satLabels() { - return {nvidia:'Validate GPU', 'nvidia-targeted-stress':'NVIDIA Targeted Stress (dcgmi diag targeted_stress)', 'nvidia-targeted-power':'NVIDIA Targeted Power (dcgmi diag targeted_power)', 'nvidia-pulse':'NVIDIA Pulse Test (dcgmi diag pulse_test)', 'nvidia-interconnect':'NVIDIA Interconnect (NCCL all_reduce_perf)', 'nvidia-bandwidth':'NVIDIA Bandwidth (NVBandwidth)', memory:'Validate Memory', storage:'Validate Storage', cpu:'Validate CPU', amd:'Validate AMD GPU', 'amd-mem':'AMD GPU MEM Integrity', 'amd-bandwidth':'AMD GPU MEM Bandwidth'}; + return {nvidia:'Validate GPU', 'nvidia-targeted-stress':'NVIDIA Targeted Stress (dcgmi diag targeted_stress)', 'nvidia-targeted-power':'NVIDIA Targeted Power (dcgmi diag targeted_power)', 'nvidia-pulse':'NVIDIA PSU Pulse Test (dcgmi diag pulse_test)', 'nvidia-interconnect':'NVIDIA Interconnect (NCCL all_reduce_perf)', 'nvidia-bandwidth':'NVIDIA Bandwidth (NVBandwidth)', hpl:'LINPACK (HPL)', memory:'Validate Memory', storage:'Validate Storage', cpu:'Validate CPU', amd:'Validate AMD GPU', 'amd-mem':'AMD GPU MEM Integrity', 'amd-bandwidth':'AMD GPU MEM Bandwidth'}; } let satNvidiaGPUsPromise = null; function loadSatNvidiaGPUs() { @@ -1437,8 +1448,8 @@ function runAllSAT() { const cycles = Math.max(1, parseInt(document.getElementById('sat-cycles').value)||1); const status = document.getElementById('sat-all-status'); status.textContent = 'Enqueuing...'; - const stressOnlyTargets = ['nvidia-targeted-stress', 'nvidia-targeted-power', 'nvidia-pulse', 'nvidia-interconnect', 'nvidia-bandwidth']; - const baseTargets = ['nvidia','nvidia-targeted-stress','nvidia-targeted-power','nvidia-pulse','nvidia-interconnect','nvidia-bandwidth','memory','storage','cpu'].concat(selectedAMDValidateTargets()); + const stressOnlyTargets = ['nvidia-targeted-stress', 'nvidia-targeted-power', 'nvidia-pulse', 'nvidia-interconnect', 'nvidia-bandwidth', 'hpl']; + const baseTargets = ['nvidia','nvidia-targeted-stress','nvidia-targeted-power','nvidia-pulse','nvidia-interconnect','nvidia-bandwidth','hpl','memory','storage','cpu'].concat(selectedAMDValidateTargets()); const activeTargets = baseTargets.filter(target => { if (stressOnlyTargets.indexOf(target) >= 0 && !satStressMode()) return false; const btn = document.getElementById('sat-btn-' + target); @@ -2082,7 +2093,7 @@ func benchmarkHistoryParallelLabel(serverModel, gpuName string, count int) strin func renderBurn() string { return `
⚠ Warning: Stress tests on this page run hardware at high load. Repeated or prolonged use may reduce hardware lifespan. Use only when necessary.
-
Scope: DCGM diagnostics and ` + "targeted_stress" + ` remain in Validate. Burn exposes official NVIDIA load recipes by test goal plus separate custom stress tools.
+
Scope: DCGM diagnostics (` + "targeted_stress, targeted_power, pulse_test" + `), NCCL, NVBandwidth, and LINPACK remain in Validate → Stress mode. Burn exposes sustained GPU compute load recipes.

Tasks continue in the background — view progress in Tasks.

diff --git a/audit/internal/webui/server_test.go b/audit/internal/webui/server_test.go index b169ff2..20b5a22 100644 --- a/audit/internal/webui/server_test.go +++ b/audit/internal/webui/server_test.go @@ -741,8 +741,8 @@ func TestBurnPageRendersGoalBasedNVIDIACards(t *testing.T) { for _, needle := range []string{ `NVIDIA Max Compute Load`, `dcgmproftester`, - `targeted_stress remain in Validate`, - `NVIDIA Interconnect Test (NCCL all_reduce_perf)`, + `NCCL`, + `Validate → Stress mode`, `id="burn-gpu-list"`, } { if !strings.Contains(body, needle) { diff --git a/audit/internal/webui/tasks.go b/audit/internal/webui/tasks.go index 68c430d..111c20e 100644 --- a/audit/internal/webui/tasks.go +++ b/audit/internal/webui/tasks.go @@ -39,6 +39,7 @@ var taskNames = map[string]string{ "nvidia-interconnect": "NVIDIA Interconnect Test (NCCL all_reduce_perf)", "nvidia-bandwidth": "NVIDIA Bandwidth Test (NVBandwidth)", "nvidia-stress": "NVIDIA GPU Stress", + "hpl": "LINPACK (HPL)", "memory": "Memory SAT", "storage": "Storage SAT", "cpu": "CPU SAT", @@ -739,6 +740,19 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) { dur = resolveBurnPreset(t.params.BurnProfile).DurationSec } archive, err = runSATStressPackCtx(a, ctx, "", dur, j.append) + case "hpl": + if a == nil { + err = fmt.Errorf("app not configured") + break + } + opts := platform.HPLOptions{ + MemFraction: 0.80, + NB: 256, + } + archive, err = func() (string, error) { + path, _, runErr := a.RunHPL(ctx, "", opts, j.append) + return path, runErr + }() case "platform-stress": if a == nil { err = fmt.Errorf("app not configured") diff --git a/iso/builder/VERSIONS b/iso/builder/VERSIONS index aac0fc1..6902df9 100644 --- a/iso/builder/VERSIONS +++ b/iso/builder/VERSIONS @@ -19,5 +19,7 @@ ROCRAND_VERSION=3.2.0.60304-76~22.04 HIP_RUNTIME_AMD_VERSION=6.3.42134.60304-76~22.04 HIPBLASLT_VERSION=0.10.0.60304-76~22.04 COMGR_VERSION=2.8.0.60304-76~22.04 +HPL_VERSION=2.3 +HPL_SHA256=32c5c17d22330e6f2337b681aded51637fb6008d3f0eb7c277b163fadd612830 GO_VERSION=1.24.0 AUDIT_VERSION=1.0.0 diff --git a/iso/builder/build-hpl.sh b/iso/builder/build-hpl.sh new file mode 100755 index 0000000..136b4f2 --- /dev/null +++ b/iso/builder/build-hpl.sh @@ -0,0 +1,244 @@ +#!/bin/sh +# build-hpl.sh — build HPL (High Performance LINPACK) for the bee LiveCD. +# +# Downloads HPL 2.3 from netlib, downloads OpenBLAS runtime from the Debian 12 +# apt repo, and compiles xhpl using a minimal single-process MPI stub so that +# no MPI package is required inside the ISO. +# +# The resulting xhpl binary is a standard HPL binary whose output is compatible +# with the accepted HPL format (WR... Gflops lines). +# +# Output: +# $CACHE_DIR/bin/xhpl +# $CACHE_DIR/lib/libopenblas.so* (runtime, injected into ISO /usr/lib/) + +set -e + +HPL_VERSION="$1" +HPL_SHA256="$2" +DIST_DIR="$3" + +[ -n "$HPL_VERSION" ] || { echo "usage: $0 "; exit 1; } +[ -n "$HPL_SHA256" ] || { echo "usage: $0 "; exit 1; } +[ -n "$DIST_DIR" ] || { echo "usage: $0 "; exit 1; } + +echo "=== HPL ${HPL_VERSION} ===" + +CACHE_DIR="${DIST_DIR}/hpl-${HPL_VERSION}" +CACHE_ROOT="${BEE_CACHE_DIR:-${DIST_DIR}/cache}" +DOWNLOAD_CACHE_DIR="${CACHE_ROOT}/hpl-downloads" + +if [ -x "${CACHE_DIR}/bin/xhpl" ]; then + echo "=== HPL cached, skipping build ===" + echo "binary: ${CACHE_DIR}/bin/xhpl" + exit 0 +fi + +mkdir -p "${DOWNLOAD_CACHE_DIR}" "${CACHE_DIR}/bin" "${CACHE_DIR}/lib" + +# ── download HPL source ──────────────────────────────────────────────────────── +HPL_TAR="${DOWNLOAD_CACHE_DIR}/hpl-${HPL_VERSION}.tar.gz" +HPL_URL="https://www.netlib.org/benchmark/hpl/hpl-${HPL_VERSION}.tar.gz" + +if [ ! -f "${HPL_TAR}" ]; then + echo "=== downloading HPL ${HPL_VERSION} ===" + wget --show-progress -O "${HPL_TAR}" "${HPL_URL}" +fi + +actual_sha="$(sha256sum "${HPL_TAR}" | awk '{print $1}')" +if [ "${actual_sha}" != "${HPL_SHA256}" ]; then + echo "ERROR: sha256 mismatch for hpl-${HPL_VERSION}.tar.gz" >&2 + echo " expected: ${HPL_SHA256}" >&2 + echo " actual: ${actual_sha}" >&2 + rm -f "${HPL_TAR}" + exit 1 +fi +echo "sha256 OK: hpl-${HPL_VERSION}.tar.gz" + +# ── download OpenBLAS from Debian 12 apt repo ───────────────────────────────── +REPO_BASE="https://deb.debian.org/debian/pool/main/o/openblas" +PACKAGES_GZ="${DOWNLOAD_CACHE_DIR}/Packages.gz" +OPENBLAS_PKG="libopenblas0-openmp" + +echo "=== fetching Debian 12 Packages.gz ===" +wget -q -O "${PACKAGES_GZ}" \ + "https://deb.debian.org/debian/dists/bookworm/main/binary-amd64/Packages.gz" + +lookup_deb() { + pkg="$1" + gzip -dc "${PACKAGES_GZ}" | awk -v pkg="$pkg" ' + /^Package: / { cur=$2 } + /^Filename: / { file=$2 } + /^SHA256: / { sha=$2 } + /^$/ { + if (cur == pkg) { print file " " sha; exit } + cur=""; file=""; sha="" + } + END { + if (cur == pkg) print file " " sha + }' +} + +meta="$(lookup_deb "${OPENBLAS_PKG}")" +[ -n "$meta" ] || { echo "ERROR: ${OPENBLAS_PKG} not found in Packages.gz"; exit 1; } +repo_file="$(printf '%s' "$meta" | awk '{print $1}')" +repo_sha="$(printf '%s' "$meta" | awk '{print $2}')" + +OPENBLAS_DEB="${DOWNLOAD_CACHE_DIR}/$(basename "${repo_file}")" +if [ -f "${OPENBLAS_DEB}" ]; then + actual="$(sha256sum "${OPENBLAS_DEB}" | awk '{print $1}')" + [ "$actual" = "$repo_sha" ] || rm -f "${OPENBLAS_DEB}" +fi +if [ ! -f "${OPENBLAS_DEB}" ]; then + echo "=== downloading ${OPENBLAS_PKG} ===" + wget --show-progress -O "${OPENBLAS_DEB}" "https://deb.debian.org/debian/${repo_file}" + actual="$(sha256sum "${OPENBLAS_DEB}" | awk '{print $1}')" + [ "$actual" = "$repo_sha" ] || { echo "ERROR: sha256 mismatch for ${OPENBLAS_PKG}"; rm -f "${OPENBLAS_DEB}"; exit 1; } +fi + +# extract libopenblas shared libs +TMP_DEB=$(mktemp -d) +trap 'rm -rf "${TMP_DEB}" "${BUILD_TMP:-}"' EXIT INT TERM +( + cd "${TMP_DEB}" + ar x "${OPENBLAS_DEB}" + tar xf data.tar.* +) +find "${TMP_DEB}" \( -name 'libopenblas*.so*' \) \( -type f -o -type l \) \ + -exec cp -a {} "${CACHE_DIR}/lib/" \; +echo "=== OpenBLAS libs: $(ls "${CACHE_DIR}/lib/" | wc -l) files ===" + +# also need libopenblas-dev header for compilation (we only need the .so symlink) +OPENBLAS_SO="$(find "${CACHE_DIR}/lib" -maxdepth 1 -name 'libopenblas.so.*' -type f | sort | head -1)" +[ -n "${OPENBLAS_SO}" ] || { echo "ERROR: libopenblas.so not extracted"; exit 1; } +SONAME="$(basename "${OPENBLAS_SO}")" +ln -sf "${SONAME}" "${CACHE_DIR}/lib/libopenblas.so" 2>/dev/null || true +ln -sf "${SONAME}" "${CACHE_DIR}/lib/libblas.so" 2>/dev/null || true + +# ── build HPL ───────────────────────────────────────────────────────────────── +BUILD_TMP=$(mktemp -d) + +cd "${BUILD_TMP}" +tar xf "${HPL_TAR}" +SRC_DIR="$(find . -maxdepth 1 -type d -name 'hpl-*' | head -1)" +[ -n "${SRC_DIR}" ] || { echo "ERROR: HPL source dir not found"; exit 1; } +cd "${SRC_DIR}" + +# Write a minimal single-process MPI stub so we don't need an MPI package. +# HPL only needs these functions for single-process execution. +cat > "${BUILD_TMP}/mpi_stub.c" <<'MPISTUB' +#include +#include +#include + +typedef int MPI_Comm; +typedef int MPI_Datatype; +typedef int MPI_Op; +typedef int MPI_Status; +typedef int MPI_Request; + +#define MPI_COMM_WORLD 0 +#define MPI_SUCCESS 0 +#define MPI_DOUBLE 6 +#define MPI_INT 5 +#define MPI_SUM 0 +#define MPI_MAX 1 +#define MPI_MIN 2 +#define MPI_BYTE 1 +#define MPI_ANY_SOURCE -1 +#define MPI_ANY_TAG -1 +#define MPI_STATUS_IGNORE ((MPI_Status*)0) + +int MPI_Init(int *argc, char ***argv) { (void)argc; (void)argv; return MPI_SUCCESS; } +int MPI_Finalize(void) { return MPI_SUCCESS; } +int MPI_Comm_rank(MPI_Comm c, int *rank) { (void)c; *rank = 0; return MPI_SUCCESS; } +int MPI_Comm_size(MPI_Comm c, int *size) { (void)c; *size = 1; return MPI_SUCCESS; } +int MPI_Bcast(void *b, int n, MPI_Datatype t, int r, MPI_Comm c) + { (void)b;(void)n;(void)t;(void)r;(void)c; return MPI_SUCCESS; } +int MPI_Reduce(const void *s, void *r, int n, MPI_Datatype t, MPI_Op op, int root, MPI_Comm c) { + (void)op;(void)root;(void)c; + size_t sz = (t==MPI_DOUBLE)?sizeof(double):(t==MPI_INT)?sizeof(int):1; + memcpy(r, s, (size_t)n * sz); + return MPI_SUCCESS; +} +int MPI_Allreduce(const void *s, void *r, int n, MPI_Datatype t, MPI_Op op, MPI_Comm c) + { return MPI_Reduce(s,r,n,t,op,0,c); } +int MPI_Send(const void *b, int n, MPI_Datatype t, int d, int tag, MPI_Comm c) + { (void)b;(void)n;(void)t;(void)d;(void)tag;(void)c; return MPI_SUCCESS; } +int MPI_Recv(void *b, int n, MPI_Datatype t, int s, int tag, MPI_Comm c, MPI_Status *st) + { (void)b;(void)n;(void)t;(void)s;(void)tag;(void)c;(void)st; return MPI_SUCCESS; } +int MPI_Sendrecv(const void *sb, int sn, MPI_Datatype st2, int dest, int stag, + void *rb, int rn, MPI_Datatype rt, int src, int rtag, + MPI_Comm c, MPI_Status *status) + { (void)sb;(void)sn;(void)st2;(void)dest;(void)stag; + (void)rb;(void)rn;(void)rt;(void)src;(void)rtag;(void)c;(void)status; + return MPI_SUCCESS; } +int MPI_Irecv(void *b, int n, MPI_Datatype t, int s, int tag, MPI_Comm c, MPI_Request *req) + { (void)b;(void)n;(void)t;(void)s;(void)tag;(void)c;(void)req; return MPI_SUCCESS; } +int MPI_Wait(MPI_Request *req, MPI_Status *st) + { (void)req;(void)st; return MPI_SUCCESS; } +int MPI_Abort(MPI_Comm c, int code) { (void)c; exit(code); } +double MPI_Wtime(void) { + struct timeval tv; + gettimeofday(&tv, NULL); + return (double)tv.tv_sec + (double)tv.tv_usec * 1e-6; +} +MPISTUB + +# Write Make.bee — HPL makefile configuration +cat > Make.bee <&1 | tail -20 + +XHPL_BIN="bin/bee/xhpl" +[ -x "${XHPL_BIN}" ] || { echo "ERROR: xhpl not found after build"; exit 1; } + +cp "${XHPL_BIN}" "${CACHE_DIR}/bin/xhpl" +chmod +x "${CACHE_DIR}/bin/xhpl" +echo "=== HPL build complete ===" +echo "binary: ${CACHE_DIR}/bin/xhpl" +echo "libs: $(ls "${CACHE_DIR}/lib/")" diff --git a/iso/builder/build.sh b/iso/builder/build.sh index a2160e3..fd0e2bb 100755 --- a/iso/builder/build.sh +++ b/iso/builder/build.sh @@ -1148,6 +1148,19 @@ if [ "$BEE_GPU_VENDOR" = "nvidia" ]; then echo "=== john injected ===" fi +# --- build HPL (CPU LINPACK) — runs on all variants --- +run_step "build HPL ${HPL_VERSION}" "80-hpl" \ + sh "${BUILDER_DIR}/build-hpl.sh" "${HPL_VERSION}" "${HPL_SHA256}" "${DIST_DIR}" + +HPL_CACHE="${DIST_DIR}/hpl-${HPL_VERSION}" +mkdir -p "${OVERLAY_STAGE_DIR}/usr/local/lib/bee" +cp "${HPL_CACHE}/bin/xhpl" "${OVERLAY_STAGE_DIR}/usr/local/lib/bee/xhpl" +chmod +x "${OVERLAY_STAGE_DIR}/usr/local/lib/bee/xhpl" +chmod +x "${OVERLAY_STAGE_DIR}/usr/local/bin/bee-hpl" 2>/dev/null || true +# Inject OpenBLAS runtime libs needed by xhpl +cp "${HPL_CACHE}/lib/"* "${OVERLAY_STAGE_DIR}/usr/lib/" 2>/dev/null || true +echo "=== HPL injected: xhpl + $(ls "${HPL_CACHE}/lib/" | wc -l) OpenBLAS libs ===" + # --- embed build metadata --- mkdir -p "${OVERLAY_STAGE_DIR}/etc" BUILD_DATE="$(date +%Y-%m-%d)" @@ -1180,6 +1193,7 @@ BUILD_DATE=${BUILD_DATE} GIT_COMMIT=${GIT_COMMIT} DEBIAN_VERSION=${DEBIAN_VERSION} DEBIAN_KERNEL_ABI=${DEBIAN_KERNEL_ABI} +HPL_VERSION=${HPL_VERSION} ${GPU_VERSION_LINE} EOF diff --git a/iso/overlay/usr/local/bin/bee-hpl b/iso/overlay/usr/local/bin/bee-hpl new file mode 100755 index 0000000..3e855cd --- /dev/null +++ b/iso/overlay/usr/local/bin/bee-hpl @@ -0,0 +1,97 @@ +#!/bin/sh +# bee-hpl — run HPL (High Performance LINPACK) with auto-sized problem. +# +# Generates HPL.dat based on available RAM, runs xhpl, and prints standard +# HPL output. The WR... line with Gflops is parsed by the bee audit tool. +# +# Usage: bee-hpl [--mem-fraction 0.80] [--nb 256] [--seconds N] +# +# --mem-fraction fraction of total RAM to use for the matrix (default 0.80) +# --nb block size; 256 is good for modern CPUs (default 256) +# --seconds ignored — HPL runtime is determined by problem size; kept +# for interface compatibility with other bee stress tools + +set -eu + +XHPL="/usr/local/lib/bee/xhpl" +MEM_FRACTION="0.80" +NB=256 + +usage() { + echo "usage: $0 [--mem-fraction 0.80] [--nb 256] [--seconds N]" >&2 + exit 2 +} + +while [ "$#" -gt 0 ]; do + case "$1" in + --mem-fraction) [ "$#" -ge 2 ] || usage; MEM_FRACTION="$2"; shift 2 ;; + --nb) [ "$#" -ge 2 ] || usage; NB="$2"; shift 2 ;; + --seconds) [ "$#" -ge 2 ] || usage; shift 2 ;; # accepted, ignored + *) usage ;; + esac +done + +[ -x "${XHPL}" ] || { echo "ERROR: xhpl not found at ${XHPL}" >&2; exit 1; } + +# Detect total RAM in bytes +TOTAL_KB=$(grep MemTotal /proc/meminfo | awk '{print $2}') +[ -n "${TOTAL_KB}" ] || { echo "ERROR: cannot read MemTotal from /proc/meminfo" >&2; exit 1; } +TOTAL_BYTES=$(( TOTAL_KB * 1024 )) + +# N = floor(sqrt(fraction * total_bytes / 8)) rounded down to multiple of NB +# Use awk for floating-point sqrt +N=$(awk -v total="${TOTAL_BYTES}" -v frac="${MEM_FRACTION}" -v nb="${NB}" ' +BEGIN { + raw = int(sqrt(total * frac / 8.0)) + n = int(raw / nb) * nb + if (n < nb) n = nb + print n +}') + +echo "loader=bee-hpl" +echo "total_ram_mb=$(( TOTAL_KB / 1024 ))" +echo "matrix_n=${N}" +echo "block_nb=${NB}" +echo "mem_fraction=${MEM_FRACTION}" + +# Generate HPL.dat in a temp directory and run from there +RUNDIR=$(mktemp -d) +trap 'rm -rf "${RUNDIR}"' EXIT INT TERM + +cat > "${RUNDIR}/HPL.dat" <= 1) +1 # of panels in recursion +2 NDIVs +1 # of recursive panel fact. +1 RFACTs (0=left, 1=Crout, 2=Right) +1 # of broadcast +1 BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM) +1 # of lookahead depth +1 DEPTHs (>=0) +2 SWAP (0=bin-exch,1=long,2=mix) +64 swapping threshold +0 L1 in (0=transposed,1=no-transposed) form +0 U in (0=transposed,1=no-transposed) form +1 Equilibration (0=no,1=yes) +8 memory alignment in double (> 0) +DAT + +cd "${RUNDIR}" +echo "---" +"${XHPL}"