#!/bin/sh
# bee-hpl — run HPL (High Performance LINPACK) with auto-sized problem.
#
# Generates HPL.dat based on available RAM, runs xhpl, and prints standard
# HPL output. The WR... line with Gflops is parsed by the bee audit tool.
#
# Usage: bee-hpl [--mem-fraction 0.80] [--nb 256] [--seconds N]
#
# --mem-fraction   fraction of total RAM to use for the matrix (default 0.80)
# --nb             block size; 256 is good for modern CPUs (default 256)
# --seconds        ignored — HPL runtime is determined by problem size; kept
#                  for interface compatibility with other bee stress tools

set -eu

XHPL="/usr/local/lib/bee/xhpl"
MEM_FRACTION="0.80"
NB=256

usage() {
    echo "usage: $0 [--mem-fraction 0.80] [--nb 256] [--seconds N]" >&2
    exit 2
}

while [ "$#" -gt 0 ]; do
    case "$1" in
        --mem-fraction) [ "$#" -ge 2 ] || usage; MEM_FRACTION="$2"; shift 2 ;;
        --nb)           [ "$#" -ge 2 ] || usage; NB="$2"; shift 2 ;;
        --seconds)      [ "$#" -ge 2 ] || usage; shift 2 ;;  # accepted, ignored
        *) usage ;;
    esac
done

[ -x "${XHPL}" ] || { echo "ERROR: xhpl not found at ${XHPL}" >&2; exit 1; }

# Detect total RAM in bytes
TOTAL_KB=$(grep MemTotal /proc/meminfo | awk '{print $2}')
[ -n "${TOTAL_KB}" ] || { echo "ERROR: cannot read MemTotal from /proc/meminfo" >&2; exit 1; }
TOTAL_BYTES=$(( TOTAL_KB * 1024 ))

# N = floor(sqrt(fraction * total_bytes / 8)) rounded down to multiple of NB
# Use awk for floating-point sqrt
N=$(awk -v total="${TOTAL_BYTES}" -v frac="${MEM_FRACTION}" -v nb="${NB}" '
BEGIN {
    raw = int(sqrt(total * frac / 8.0))
    n   = int(raw / nb) * nb
    if (n < nb) n = nb
    print n
}')

echo "loader=bee-hpl"
echo "total_ram_mb=$(( TOTAL_KB / 1024 ))"
echo "matrix_n=${N}"
echo "block_nb=${NB}"
echo "mem_fraction=${MEM_FRACTION}"

# Generate HPL.dat in a temp directory and run from there
RUNDIR=$(mktemp -d)
trap 'rm -rf "${RUNDIR}"' EXIT INT TERM

cat > "${RUNDIR}/HPL.dat" <<DAT
HPLinpack benchmark input file
Innovative Computing Laboratory, University of Tennessee
HPL.out        output file name (if any)
6              device out (6=stdout, 7=stderr, file)
1              # of problems sizes (N)
${N}           Ns
1              # of NBs
${NB}          NBs
0              PMAP process mapping (0=Row-,1=Column-major)
1              # of process grids (P x Q)
1              Ps
1              Qs
16.0           threshold
1              # of panel fact
2              PFACTs (0=left, 1=Crout, 2=Right)
1              # of recursive stopping criterium
4              NBMINs (>= 1)
1              # of panels in recursion
2              NDIVs
1              # of recursive panel fact.
1              RFACTs (0=left, 1=Crout, 2=Right)
1              # of broadcast
1              BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM)
1              # of lookahead depth
1              DEPTHs (>=0)
2              SWAP (0=bin-exch,1=long,2=mix)
64             swapping threshold
0              L1 in (0=transposed,1=no-transposed) form
0              U  in (0=transposed,1=no-transposed) form
1              Equilibration (0=no,1=yes)
8              memory alignment in double (> 0)
DAT

cd "${RUNDIR}"
echo "---"
"${XHPL}"
