#!/bin/sh set -eu SECONDS=5 SIZE_MB=64 DEVICES="" EXCLUDE="" WORKER="/usr/local/lib/bee/bee-gpu-burn-worker" usage() { echo "usage: $0 [--seconds N] [--size-mb N] [--devices 0,1] [--exclude 2,3]" >&2 exit 2 } normalize_list() { echo "${1:-}" | tr ',' '\n' | sed 's/[[:space:]]//g' | awk 'NF' | sort -n | uniq | paste -sd, - } contains_csv() { needle="$1" haystack="${2:-}" echo ",${haystack}," | grep -q ",${needle}," } while [ "$#" -gt 0 ]; do case "$1" in --seconds|-t) [ "$#" -ge 2 ] || usage; SECONDS="$2"; shift 2 ;; --size-mb|-m) [ "$#" -ge 2 ] || usage; SIZE_MB="$2"; shift 2 ;; --devices) [ "$#" -ge 2 ] || usage; DEVICES="$2"; shift 2 ;; --exclude) [ "$#" -ge 2 ] || usage; EXCLUDE="$2"; shift 2 ;; *) usage ;; esac done [ -x "${WORKER}" ] || { echo "bee-gpu-burn worker not found: ${WORKER}" >&2; exit 1; } ALL_DEVICES=$(nvidia-smi --query-gpu=index --format=csv,noheader,nounits 2>/dev/null | sed 's/[[:space:]]//g' | awk 'NF' | paste -sd, -) [ -n "${ALL_DEVICES}" ] || { echo "nvidia-smi found no NVIDIA GPUs" >&2; exit 1; } DEVICES=$(normalize_list "${DEVICES}") EXCLUDE=$(normalize_list "${EXCLUDE}") SELECTED="${DEVICES}" if [ -z "${SELECTED}" ]; then SELECTED="${ALL_DEVICES}" fi FINAL="" for id in $(echo "${SELECTED}" | tr ',' ' '); do [ -n "${id}" ] || continue if contains_csv "${id}" "${EXCLUDE}"; then continue fi if [ -z "${FINAL}" ]; then FINAL="${id}" else FINAL="${FINAL},${id}" fi done [ -n "${FINAL}" ] || { echo "no NVIDIA GPUs selected after filters" >&2; exit 1; } echo "loader=bee-gpu-burn" echo "selected_gpus=${FINAL}" TMP_DIR=$(mktemp -d) trap 'rm -rf "${TMP_DIR}"' EXIT INT TERM WORKERS="" for id in $(echo "${FINAL}" | tr ',' ' '); do log="${TMP_DIR}/gpu-${id}.log" echo "starting gpu ${id}" "${WORKER}" --device "${id}" --seconds "${SECONDS}" --size-mb "${SIZE_MB}" >"${log}" 2>&1 & pid=$! WORKERS="${WORKERS} ${pid}:${id}:${log}" done status=0 for spec in ${WORKERS}; do pid=${spec%%:*} rest=${spec#*:} id=${rest%%:*} log=${rest#*:} if wait "${pid}"; then echo "gpu ${id} finished: OK" else rc=$? echo "gpu ${id} finished: FAILED rc=${rc}" status=1 fi sed "s/^/[gpu ${id}] /" "${log}" || true done exit "${status}"