diff --git a/iso/overlay/etc/systemd/system/bee-selfheal.timer b/iso/overlay/etc/systemd/system/bee-selfheal.timer index c96a238..2095569 100644 --- a/iso/overlay/etc/systemd/system/bee-selfheal.timer +++ b/iso/overlay/etc/systemd/system/bee-selfheal.timer @@ -3,7 +3,7 @@ Description=Bee: run self-heal checks periodically [Timer] OnBootSec=45sec -OnUnitActiveSec=60sec +OnUnitActiveSec=3min AccuracySec=15sec Unit=bee-selfheal.service diff --git a/iso/overlay/usr/local/bin/bee-selfheal b/iso/overlay/usr/local/bin/bee-selfheal index 0aac92f..a2b1325 100644 --- a/iso/overlay/usr/local/bin/bee-selfheal +++ b/iso/overlay/usr/local/bin/bee-selfheal @@ -8,11 +8,17 @@ EXPORT_DIR="/appdata/bee/export" AUDIT_JSON="${EXPORT_DIR}/bee-audit.json" RUNTIME_JSON="${EXPORT_DIR}/runtime-health.json" LOCK_DIR="/run/bee-selfheal.lock" +EVENTS=0 log() { echo "[${LOG_PREFIX}] $*" } +log_event() { + EVENTS=$((EVENTS + 1)) + log "$*" +} + have_nvidia_gpu() { lspci -Dn 2>/dev/null | awk '$2 ~ /^03(00|02):$/ && $3 ~ /^10de:/ { found=1; exit } END { exit(found ? 0 : 1) }' } @@ -56,24 +62,22 @@ web_healthy() { mkdir -p "${EXPORT_DIR}" /run if ! mkdir "${LOCK_DIR}" 2>/dev/null; then - log "another self-heal run is already active" + log_event "another self-heal run is already active" exit 0 fi trap 'rmdir "${LOCK_DIR}" >/dev/null 2>&1 || true' EXIT -log "start" - if have_nvidia_gpu && [ ! -e /dev/nvidia0 ]; then - log "NVIDIA GPU detected but /dev/nvidia0 is missing" + log_event "NVIDIA GPU detected but /dev/nvidia0 is missing" restart_service bee-nvidia.service || true fi runtime_state="$(artifact_state "${RUNTIME_JSON}")" if [ "${runtime_state}" != "ready" ]; then if [ "${runtime_state}" = "interrupted" ]; then - log "runtime-health.json.tmp exists — interrupted runtime-health write detected" + log_event "runtime-health.json.tmp exists — interrupted runtime-health write detected" else - log "runtime-health.json missing or empty" + log_event "runtime-health.json missing or empty" fi restart_service bee-preflight.service || true fi @@ -81,19 +85,17 @@ fi audit_state="$(artifact_state "${AUDIT_JSON}")" if [ "${audit_state}" != "ready" ]; then if [ "${audit_state}" = "interrupted" ]; then - log "bee-audit.json.tmp exists — interrupted audit write detected" + log_event "bee-audit.json.tmp exists — interrupted audit write detected" else - log "bee-audit.json missing or empty" + log_event "bee-audit.json missing or empty" fi restart_service bee-audit.service || true fi if ! service_active bee-web.service; then - log "bee-web.service is not active" + log_event "bee-web.service is not active" restart_service bee-web.service || true elif ! web_healthy; then - log "bee-web health check failed" + log_event "bee-web health check failed" restart_service bee-web.service || true fi - -log "done"