Add HPL (LINPACK) benchmark as validate/stress task

HPL 2.3 from netlib compiled against OpenBLAS with a minimal
single-process MPI stub — no MPI package required in the ISO.
Matrix size is auto-sized to 80% of total RAM at runtime.

Build:
- VERSIONS: HPL_VERSION=2.3, HPL_SHA256=32c5c17d…
- build-hpl.sh: downloads HPL + OpenBLAS from Debian 12 repo,
  compiles xhpl with a self-contained mpi_stub.c
- build.sh: step 80-hpl, injects xhpl + libopenblas into overlay

Runtime:
- bee-hpl: generates HPL.dat (N auto from /proc/meminfo, NB=256,
  P=1 Q=1), runs xhpl, prints standard WR... Gflops output
- platform/hpl.go: RunHPL(), parses WR line → GFlops + PASSED/FAILED
- tasks.go: target "hpl"
- pages.go: LINPACK (HPL) card in validate/stress grid (stress-only)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-08 07:08:18 +03:00
parent b2f8626fee
commit 16e7ae00e7
10 changed files with 541 additions and 6 deletions

View File

@@ -0,0 +1,97 @@
#!/bin/sh
# bee-hpl — run HPL (High Performance LINPACK) with auto-sized problem.
#
# Generates HPL.dat based on available RAM, runs xhpl, and prints standard
# HPL output. The WR... line with Gflops is parsed by the bee audit tool.
#
# Usage: bee-hpl [--mem-fraction 0.80] [--nb 256] [--seconds N]
#
# --mem-fraction fraction of total RAM to use for the matrix (default 0.80)
# --nb block size; 256 is good for modern CPUs (default 256)
# --seconds ignored — HPL runtime is determined by problem size; kept
# for interface compatibility with other bee stress tools
set -eu
XHPL="/usr/local/lib/bee/xhpl"
MEM_FRACTION="0.80"
NB=256
usage() {
echo "usage: $0 [--mem-fraction 0.80] [--nb 256] [--seconds N]" >&2
exit 2
}
while [ "$#" -gt 0 ]; do
case "$1" in
--mem-fraction) [ "$#" -ge 2 ] || usage; MEM_FRACTION="$2"; shift 2 ;;
--nb) [ "$#" -ge 2 ] || usage; NB="$2"; shift 2 ;;
--seconds) [ "$#" -ge 2 ] || usage; shift 2 ;; # accepted, ignored
*) usage ;;
esac
done
[ -x "${XHPL}" ] || { echo "ERROR: xhpl not found at ${XHPL}" >&2; exit 1; }
# Detect total RAM in bytes
TOTAL_KB=$(grep MemTotal /proc/meminfo | awk '{print $2}')
[ -n "${TOTAL_KB}" ] || { echo "ERROR: cannot read MemTotal from /proc/meminfo" >&2; exit 1; }
TOTAL_BYTES=$(( TOTAL_KB * 1024 ))
# N = floor(sqrt(fraction * total_bytes / 8)) rounded down to multiple of NB
# Use awk for floating-point sqrt
N=$(awk -v total="${TOTAL_BYTES}" -v frac="${MEM_FRACTION}" -v nb="${NB}" '
BEGIN {
raw = int(sqrt(total * frac / 8.0))
n = int(raw / nb) * nb
if (n < nb) n = nb
print n
}')
echo "loader=bee-hpl"
echo "total_ram_mb=$(( TOTAL_KB / 1024 ))"
echo "matrix_n=${N}"
echo "block_nb=${NB}"
echo "mem_fraction=${MEM_FRACTION}"
# Generate HPL.dat in a temp directory and run from there
RUNDIR=$(mktemp -d)
trap 'rm -rf "${RUNDIR}"' EXIT INT TERM
cat > "${RUNDIR}/HPL.dat" <<DAT
HPLinpack benchmark input file
Innovative Computing Laboratory, University of Tennessee
HPL.out output file name (if any)
6 device out (6=stdout, 7=stderr, file)
1 # of problems sizes (N)
${N} Ns
1 # of NBs
${NB} NBs
0 PMAP process mapping (0=Row-,1=Column-major)
1 # of process grids (P x Q)
1 Ps
1 Qs
16.0 threshold
1 # of panel fact
2 PFACTs (0=left, 1=Crout, 2=Right)
1 # of recursive stopping criterium
4 NBMINs (>= 1)
1 # of panels in recursion
2 NDIVs
1 # of recursive panel fact.
1 RFACTs (0=left, 1=Crout, 2=Right)
1 # of broadcast
1 BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM)
1 # of lookahead depth
1 DEPTHs (>=0)
2 SWAP (0=bin-exch,1=long,2=mix)
64 swapping threshold
0 L1 in (0=transposed,1=no-transposed) form
0 U in (0=transposed,1=no-transposed) form
1 Equilibration (0=no,1=yes)
8 memory alignment in double (> 0)
DAT
cd "${RUNDIR}"
echo "---"
"${XHPL}"