#!/bin/sh
# smoketest.sh — run on a live ISO via SSH to verify all critical components.
#
# Usage:
#   ssh root@<ip> 'sh -s' < smoketest.sh
#   or: scp smoketest.sh root@<ip>:/var/tmp/ && ssh root@<ip> sh /var/tmp/smoketest.sh
#
# Exit code: 0 = all required checks passed, 1 = at least one required check failed.

PASS=0
FAIL=0
WARN=0

ok()   { echo "[  OK  ] $*"; PASS=$((PASS+1)); }
fail() { echo "[ FAIL ] $*"; FAIL=$((FAIL+1)); }
warn() { echo "[ WARN ] $*"; WARN=$((WARN+1)); }
info() { echo "[ INFO ] $*"; }

echo "========================================"
echo " bee live ISO smoketest"
echo " host:   $(uname -n)"
echo " kernel: $(uname -r)"
echo " date:   $(date -u)"
echo "========================================"
echo ""

# --- kernel version ---
KVER=$(uname -r)
info "kernel: $KVER"

# --- PATH ---
echo "-- PATH & binaries --"
for tool in dmidecode smartctl nvme ipmitool lspci audit; do
    if p=$(PATH="/usr/local/bin:$PATH" command -v "$tool" 2>/dev/null); then
        ok "$tool found: $p"
    else
        fail "$tool: NOT FOUND"
    fi
done

if p=$(PATH="/usr/local/bin:$PATH" command -v nvidia-smi 2>/dev/null); then
    ok "nvidia-smi found: $p"
else
    fail "nvidia-smi: NOT FOUND"
fi

echo ""
echo "-- NVIDIA modules --"
KO_DIR="/usr/local/lib/nvidia"
if [ -d "$KO_DIR" ]; then
    ko_count=$(ls "$KO_DIR"/*.ko 2>/dev/null | wc -l)
    ok "NVIDIA ko dir exists: $KO_DIR ($ko_count .ko files)"
else
    fail "NVIDIA ko dir missing: $KO_DIR"
fi

for mod in nvidia nvidia_modeset nvidia_uvm; do
    if /sbin/lsmod 2>/dev/null | grep -q "^$mod "; then
        ok "module loaded: $mod"
    else
        fail "module NOT loaded: $mod"
    fi
done

echo ""
echo "-- nvidia-smi --"
if PATH="/usr/local/bin:$PATH" command -v nvidia-smi >/dev/null 2>&1; then
    if PATH="/usr/local/bin:$PATH" nvidia-smi -L 2>/dev/null | grep -q "GPU"; then
        gpu_count=$(PATH="/usr/local/bin:$PATH" nvidia-smi -L 2>/dev/null | grep -c "GPU")
        ok "nvidia-smi: $gpu_count GPU(s) found"
    else
        fail "nvidia-smi: runs but no GPUs detected"
    fi
else
    fail "nvidia-smi: not found in PATH"
fi

echo ""
echo "-- lib symlinks --"
for lib in libnvidia-ml libcuda; do
    if [ -f "/usr/lib/${lib}.so.1" ] || [ -L "/usr/lib/${lib}.so.1" ]; then
        ok "/usr/lib/${lib}.so.1 exists"
    else
        fail "/usr/lib/${lib}.so.1 MISSING (nvidia-smi will fail)"
    fi
done

echo ""
echo "-- gcompat (glibc compat for nvidia-smi) --"
if [ -L /lib64/ld-linux-x86-64.so.2 ] || [ -f /lib64/ld-linux-x86-64.so.2 ]; then
    ok "gcompat: /lib64/ld-linux-x86-64.so.2 present"
else
    fail "gcompat: /lib64/ld-linux-x86-64.so.2 MISSING — nvidia-smi will fail to exec"
fi

echo ""
echo "-- openrc services --"
for svc in bee-nvidia bee-network; do
    if rc-service "$svc" status >/dev/null 2>&1; then
        ok "service running: $svc"
    else
        fail "service NOT running: $svc"
    fi
done

for svc in bee-audit-debug dropbear bee-sshsetup; do
    if [ -f "/etc/init.d/$svc" ]; then
        if rc-service "$svc" status >/dev/null 2>&1; then
            ok "service running: $svc"
        else
            warn "service not running: $svc (may be one-shot)"
        fi
    fi
done

echo ""
echo "-- audit binary --"
AUDIT=/usr/local/bin/audit
if [ -x "$AUDIT" ]; then
    ok "audit binary: present"
    ver=$("$AUDIT" --version 2>/dev/null || "$AUDIT" version 2>/dev/null || echo "unknown")
    info "audit version: $ver"
else
    fail "audit binary: NOT FOUND at $AUDIT"
fi

echo ""
echo "-- audit last run --"
# audit binary logs via slog to stderr (bee-audit.log); JSON output goes to bee-audit.json.
# slog format: time=... level=INFO msg="audit output written" path=...
if [ -f /var/log/bee-audit.json ] && [ -s /var/log/bee-audit.json ]; then
    ok "audit: bee-audit.json present and non-empty"
    info "size: $(du -sh /var/log/bee-audit.json | cut -f1)"
else
    fail "audit: bee-audit.json missing or empty"
fi

if [ -f /var/log/bee-audit.log ]; then
    last_line=$(tail -1 /var/log/bee-audit.log)
    info "last log line: $last_line"
    # slog writes: msg="audit output written" on success
    if grep -q "audit output written" /var/log/bee-audit.log 2>/dev/null; then
        ok "audit: completed successfully"
    else
        warn "audit: 'audit output written' not found in log — may have failed"
    fi
    # check for nvidia enrichment skip (slog message from nvidia collector)
    if grep -q "nvidia: enrichment skipped\|nvidia.*skipped\|enrichment skipped" /var/log/bee-audit.log 2>/dev/null; then
        reason=$(grep -E "nvidia.*skipped|enrichment skipped" /var/log/bee-audit.log | tail -1)
        fail "audit: nvidia enrichment skipped — $reason"
    else
        ok "audit: nvidia enrichment OK (no skip message)"
    fi
else
    warn "audit: no log found at /var/log/bee-audit.log"
fi

echo ""
echo "-- network --"
if ip route show default 2>/dev/null | grep -q "default"; then
    gw=$(ip route show default | awk '{print $3}' | head -1)
    ok "default route: $gw"
else
    fail "no default route"
fi
if ping -c1 -W3 1.1.1.1 >/dev/null 2>&1; then
    ok "internet: reachable (1.1.1.1)"
else
    fail "internet: unreachable"
fi

echo ""
echo "-- /etc/profile.d/bee.sh PATH --"
if grep -q "/usr/local/bin" /etc/profile.d/bee.sh 2>/dev/null; then
    ok "/etc/profile.d/bee.sh exports /usr/local/bin"
else
    fail "/etc/profile.d/bee.sh does not add /usr/local/bin to PATH"
fi

echo ""
echo "========================================"
echo " Results: OK=$PASS  FAIL=$FAIL  WARN=$WARN"
echo "========================================"
[ "$FAIL" -eq 0 ] && exit 0 || exit 1
