- audit JSON: IPMI sensor readings (ipmitool sensor) merged into hardware.sensors alongside lm-sensors data - audit JSON: IPMI SEL entries (ipmitool sel list) in hardware.event_logs with source "ipmi-sel" - audit JSON: dmesg error/warning lines in hardware.event_logs with source "dmesg" (filtered by error/warn/AER/Xid/NVRM/ECC/panic patterns) - support bundle: added ipmitool-sensor.txt, ipmitool-sel.txt, ipmitool-sel-time.txt to techdump - saa_dmi.go: fix dmiItemRE to accept SHN with parentheses (e.g. PS(4)LC for PSU fields) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
130 lines
3.2 KiB
Go
130 lines
3.2 KiB
Go
package collector
|
|
|
|
import (
|
|
"bee/audit/internal/schema"
|
|
"log/slog"
|
|
"os/exec"
|
|
"regexp"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
// dmesg -T output: [Thu Jun 18 14:23:45 2026] message
|
|
// dmesg without -T: [ 123.456789] message
|
|
var dmesgTimestampRE = regexp.MustCompile(`^\[([^\]]+)\]\s*(.*)$`)
|
|
|
|
// Keywords that indicate an error or hardware problem worth capturing.
|
|
var dmesgErrorPatterns = []*regexp.Regexp{
|
|
regexp.MustCompile(`(?i)\berr(or)?\b`),
|
|
regexp.MustCompile(`(?i)\bfail(ed|ure)?\b`),
|
|
regexp.MustCompile(`(?i)\bfault\b`),
|
|
regexp.MustCompile(`(?i)\bwarn(ing)?\b`),
|
|
regexp.MustCompile(`(?i)\bAER\b`),
|
|
regexp.MustCompile(`(?i)\bXid\b`),
|
|
regexp.MustCompile(`(?i)\bNVRM\b`),
|
|
regexp.MustCompile(`(?i)\bpanic\b`),
|
|
regexp.MustCompile(`(?i)\bcorrected\b`),
|
|
regexp.MustCompile(`(?i)\buncorrect`),
|
|
regexp.MustCompile(`(?i)\bECC\b`),
|
|
regexp.MustCompile(`(?i)\btimeout\b`),
|
|
regexp.MustCompile(`(?i)\breset\b`),
|
|
regexp.MustCompile(`(?i)\bdead\b`),
|
|
regexp.MustCompile(`(?i)\bhang\b`),
|
|
regexp.MustCompile(`(?i)\bstall\b`),
|
|
regexp.MustCompile(`(?i)\bdisabled\b`),
|
|
}
|
|
|
|
// collectDmesgErrors runs `dmesg -T` (or `dmesg` without -T on failure) and
|
|
// returns only lines that match known error/warning patterns.
|
|
func collectDmesgErrors() []schema.HardwareEventLog {
|
|
out, err := exec.Command("dmesg", "-T").Output()
|
|
if err != nil || len(out) == 0 {
|
|
// Fallback: dmesg without human-readable timestamps
|
|
out, err = exec.Command("dmesg").Output()
|
|
if err != nil || len(out) == 0 {
|
|
return nil
|
|
}
|
|
}
|
|
entries := parseDmesgErrors(string(out))
|
|
if len(entries) == 0 {
|
|
return nil
|
|
}
|
|
slog.Info("dmesg: collected error entries", "count", len(entries))
|
|
return entries
|
|
}
|
|
|
|
func parseDmesgErrors(output string) []schema.HardwareEventLog {
|
|
var entries []schema.HardwareEventLog
|
|
collectedAt := time.Now().UTC().Format(time.RFC3339)
|
|
|
|
for _, line := range strings.Split(output, "\n") {
|
|
line = strings.TrimSpace(line)
|
|
if line == "" {
|
|
continue
|
|
}
|
|
|
|
var timestamp, message string
|
|
if m := dmesgTimestampRE.FindStringSubmatch(line); m != nil {
|
|
timestamp = strings.TrimSpace(m[1])
|
|
message = strings.TrimSpace(m[2])
|
|
} else {
|
|
message = line
|
|
}
|
|
|
|
if message == "" {
|
|
continue
|
|
}
|
|
if !matchesAny(message, dmesgErrorPatterns) {
|
|
continue
|
|
}
|
|
|
|
severity := dmesgSeverity(message)
|
|
source := "dmesg"
|
|
|
|
var eventTime *string
|
|
if timestamp != "" {
|
|
t := timestamp
|
|
eventTime = &t
|
|
} else {
|
|
eventTime = &collectedAt
|
|
}
|
|
|
|
entries = append(entries, schema.HardwareEventLog{
|
|
Source: source,
|
|
EventTime: eventTime,
|
|
Severity: &severity,
|
|
Message: message,
|
|
})
|
|
}
|
|
return entries
|
|
}
|
|
|
|
func matchesAny(s string, patterns []*regexp.Regexp) bool {
|
|
for _, p := range patterns {
|
|
if p.MatchString(s) {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func dmesgSeverity(msg string) string {
|
|
lower := strings.ToLower(msg)
|
|
switch {
|
|
case strings.Contains(lower, "panic") ||
|
|
strings.Contains(lower, "aer") ||
|
|
strings.Contains(lower, "uncorrect") ||
|
|
strings.Contains(lower, "xid") ||
|
|
strings.Contains(lower, "nvrm"):
|
|
return statusCritical
|
|
case strings.Contains(lower, "error") ||
|
|
strings.Contains(lower, "fault") ||
|
|
strings.Contains(lower, "fail") ||
|
|
strings.Contains(lower, "dead") ||
|
|
strings.Contains(lower, "hang"):
|
|
return statusCritical
|
|
default:
|
|
return statusWarning
|
|
}
|
|
}
|