HPL 2.3 from netlib compiled against OpenBLAS with a minimal single-process MPI stub — no MPI package required in the ISO. Matrix size is auto-sized to 80% of total RAM at runtime. Build: - VERSIONS: HPL_VERSION=2.3, HPL_SHA256=32c5c17d… - build-hpl.sh: downloads HPL + OpenBLAS from Debian 12 repo, compiles xhpl with a self-contained mpi_stub.c - build.sh: step 80-hpl, injects xhpl + libopenblas into overlay Runtime: - bee-hpl: generates HPL.dat (N auto from /proc/meminfo, NB=256, P=1 Q=1), runs xhpl, prints standard WR... Gflops output - platform/hpl.go: RunHPL(), parses WR line → GFlops + PASSED/FAILED - tasks.go: target "hpl" - pages.go: LINPACK (HPL) card in validate/stress grid (stress-only) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
98 lines
3.1 KiB
Bash
Executable File
98 lines
3.1 KiB
Bash
Executable File
#!/bin/sh
|
|
# bee-hpl — run HPL (High Performance LINPACK) with auto-sized problem.
|
|
#
|
|
# Generates HPL.dat based on available RAM, runs xhpl, and prints standard
|
|
# HPL output. The WR... line with Gflops is parsed by the bee audit tool.
|
|
#
|
|
# Usage: bee-hpl [--mem-fraction 0.80] [--nb 256] [--seconds N]
|
|
#
|
|
# --mem-fraction fraction of total RAM to use for the matrix (default 0.80)
|
|
# --nb block size; 256 is good for modern CPUs (default 256)
|
|
# --seconds ignored — HPL runtime is determined by problem size; kept
|
|
# for interface compatibility with other bee stress tools
|
|
|
|
set -eu
|
|
|
|
XHPL="/usr/local/lib/bee/xhpl"
|
|
MEM_FRACTION="0.80"
|
|
NB=256
|
|
|
|
usage() {
|
|
echo "usage: $0 [--mem-fraction 0.80] [--nb 256] [--seconds N]" >&2
|
|
exit 2
|
|
}
|
|
|
|
while [ "$#" -gt 0 ]; do
|
|
case "$1" in
|
|
--mem-fraction) [ "$#" -ge 2 ] || usage; MEM_FRACTION="$2"; shift 2 ;;
|
|
--nb) [ "$#" -ge 2 ] || usage; NB="$2"; shift 2 ;;
|
|
--seconds) [ "$#" -ge 2 ] || usage; shift 2 ;; # accepted, ignored
|
|
*) usage ;;
|
|
esac
|
|
done
|
|
|
|
[ -x "${XHPL}" ] || { echo "ERROR: xhpl not found at ${XHPL}" >&2; exit 1; }
|
|
|
|
# Detect total RAM in bytes
|
|
TOTAL_KB=$(grep MemTotal /proc/meminfo | awk '{print $2}')
|
|
[ -n "${TOTAL_KB}" ] || { echo "ERROR: cannot read MemTotal from /proc/meminfo" >&2; exit 1; }
|
|
TOTAL_BYTES=$(( TOTAL_KB * 1024 ))
|
|
|
|
# N = floor(sqrt(fraction * total_bytes / 8)) rounded down to multiple of NB
|
|
# Use awk for floating-point sqrt
|
|
N=$(awk -v total="${TOTAL_BYTES}" -v frac="${MEM_FRACTION}" -v nb="${NB}" '
|
|
BEGIN {
|
|
raw = int(sqrt(total * frac / 8.0))
|
|
n = int(raw / nb) * nb
|
|
if (n < nb) n = nb
|
|
print n
|
|
}')
|
|
|
|
echo "loader=bee-hpl"
|
|
echo "total_ram_mb=$(( TOTAL_KB / 1024 ))"
|
|
echo "matrix_n=${N}"
|
|
echo "block_nb=${NB}"
|
|
echo "mem_fraction=${MEM_FRACTION}"
|
|
|
|
# Generate HPL.dat in a temp directory and run from there
|
|
RUNDIR=$(mktemp -d)
|
|
trap 'rm -rf "${RUNDIR}"' EXIT INT TERM
|
|
|
|
cat > "${RUNDIR}/HPL.dat" <<DAT
|
|
HPLinpack benchmark input file
|
|
Innovative Computing Laboratory, University of Tennessee
|
|
HPL.out output file name (if any)
|
|
6 device out (6=stdout, 7=stderr, file)
|
|
1 # of problems sizes (N)
|
|
${N} Ns
|
|
1 # of NBs
|
|
${NB} NBs
|
|
0 PMAP process mapping (0=Row-,1=Column-major)
|
|
1 # of process grids (P x Q)
|
|
1 Ps
|
|
1 Qs
|
|
16.0 threshold
|
|
1 # of panel fact
|
|
2 PFACTs (0=left, 1=Crout, 2=Right)
|
|
1 # of recursive stopping criterium
|
|
4 NBMINs (>= 1)
|
|
1 # of panels in recursion
|
|
2 NDIVs
|
|
1 # of recursive panel fact.
|
|
1 RFACTs (0=left, 1=Crout, 2=Right)
|
|
1 # of broadcast
|
|
1 BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM)
|
|
1 # of lookahead depth
|
|
1 DEPTHs (>=0)
|
|
2 SWAP (0=bin-exch,1=long,2=mix)
|
|
64 swapping threshold
|
|
0 L1 in (0=transposed,1=no-transposed) form
|
|
0 U in (0=transposed,1=no-transposed) form
|
|
1 Equilibration (0=no,1=yes)
|
|
8 memory alignment in double (> 0)
|
|
DAT
|
|
|
|
cd "${RUNDIR}"
|
|
echo "---"
|
|
"${XHPL}"
|