- Collect hardware event logs (last 7 days) from Systems and Managers/SEL LogServices - Parse AMI raw IPMI dump messages into readable descriptions (Sensor_Type: Event_Type) - Filter out audit/journal/non-hardware log services; only SEL from Managers - MSI ghost GPU filter: exclude processor GPU entries with temperature=0 when host is powered on - Reanimator collected_at uses InventoryData/Status.LastModifiedTime (30-day fallback) - Invalidate Redfish inventory CRC groups before host power-on - Log inventory LastModifiedTime age in collection logs - Drop SecureBoot collection (SecureBootMode, SecureBootDatabases) — not hardware inventory - Add build version to UI footer via template - Add MSI Redfish API reference doc to bible-local/docs/ ADL-032–ADL-035 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
393 lines
12 KiB
Go
393 lines
12 KiB
Go
package collector
|
|
|
|
import (
|
|
"context"
|
|
"log"
|
|
"net/http"
|
|
"strings"
|
|
"time"
|
|
|
|
"git.mchus.pro/mchus/logpile/internal/models"
|
|
)
|
|
|
|
const (
|
|
redfishLogEntriesWindow = 7 * 24 * time.Hour
|
|
redfishLogEntriesMaxTotal = 500
|
|
redfishLogEntriesMaxPerSvc = 200
|
|
)
|
|
|
|
// collectRedfishLogEntries fetches hardware event log entries from Systems and Managers LogServices.
|
|
// Only hardware-relevant entries from the last 7 days are returned.
|
|
// For Systems: all log services except audit/journal/security/debug.
|
|
// For Managers: only the IPMI SEL service (Id="SEL") — audit and event logs are excluded.
|
|
func (c *RedfishConnector) collectRedfishLogEntries(ctx context.Context, client *http.Client, req Request, baseURL string, systemPaths, managerPaths []string) []map[string]interface{} {
|
|
cutoff := time.Now().UTC().Add(-redfishLogEntriesWindow)
|
|
seen := make(map[string]struct{})
|
|
var out []map[string]interface{}
|
|
|
|
collectFrom := func(logServicesPath string, filter func(map[string]interface{}) bool) {
|
|
if len(out) >= redfishLogEntriesMaxTotal {
|
|
return
|
|
}
|
|
services, err := c.getCollectionMembers(ctx, client, req, baseURL, logServicesPath)
|
|
if err != nil || len(services) == 0 {
|
|
return
|
|
}
|
|
for _, svc := range services {
|
|
if len(out) >= redfishLogEntriesMaxTotal {
|
|
break
|
|
}
|
|
if !filter(svc) {
|
|
continue
|
|
}
|
|
entriesPath := redfishLogServiceEntriesPath(svc)
|
|
if entriesPath == "" {
|
|
continue
|
|
}
|
|
entries := c.fetchRedfishLogEntriesWithPaging(ctx, client, req, baseURL, entriesPath, cutoff, seen, redfishLogEntriesMaxPerSvc)
|
|
out = append(out, entries...)
|
|
}
|
|
}
|
|
|
|
for _, systemPath := range systemPaths {
|
|
collectFrom(joinPath(systemPath, "/LogServices"), isHardwareLogService)
|
|
}
|
|
// Managers hold the IPMI SEL on AMI/MSI BMCs — include only the "SEL" service.
|
|
for _, managerPath := range managerPaths {
|
|
collectFrom(joinPath(managerPath, "/LogServices"), isManagerSELService)
|
|
}
|
|
|
|
if len(out) > 0 {
|
|
log.Printf("redfish: collected %d hardware log entries (Systems+Managers SEL, window=7d)", len(out))
|
|
}
|
|
return out
|
|
}
|
|
|
|
// fetchRedfishLogEntriesWithPaging fetches entries from a LogEntry collection,
|
|
// following nextLink pages. Stops early when entries older than cutoff are encountered
|
|
// (assumes BMC returns entries newest-first, which is typical).
|
|
func (c *RedfishConnector) fetchRedfishLogEntriesWithPaging(ctx context.Context, client *http.Client, req Request, baseURL, entriesPath string, cutoff time.Time, seen map[string]struct{}, limit int) []map[string]interface{} {
|
|
var out []map[string]interface{}
|
|
nextPath := entriesPath
|
|
|
|
for nextPath != "" && len(out) < limit {
|
|
collection, err := c.getJSON(ctx, client, req, baseURL, nextPath)
|
|
if err != nil {
|
|
break
|
|
}
|
|
|
|
// Handle both linked members (@odata.id only) and inline members (full objects).
|
|
rawMembers, _ := collection["Members"].([]interface{})
|
|
hitOldEntry := false
|
|
|
|
for _, rawMember := range rawMembers {
|
|
if len(out) >= limit {
|
|
break
|
|
}
|
|
memberMap, ok := rawMember.(map[string]interface{})
|
|
if !ok {
|
|
continue
|
|
}
|
|
|
|
var entry map[string]interface{}
|
|
if _, hasCreated := memberMap["Created"]; hasCreated {
|
|
// Inline entry — use directly.
|
|
entry = memberMap
|
|
} else {
|
|
// Linked entry — fetch by path.
|
|
memberPath := normalizeRedfishPath(asString(memberMap["@odata.id"]))
|
|
if memberPath == "" {
|
|
continue
|
|
}
|
|
entry, err = c.getJSON(ctx, client, req, baseURL, memberPath)
|
|
if err != nil || len(entry) == 0 {
|
|
continue
|
|
}
|
|
}
|
|
|
|
// Dedup by entry Id or path.
|
|
entryKey := asString(entry["Id"])
|
|
if entryKey == "" {
|
|
entryKey = asString(entry["@odata.id"])
|
|
}
|
|
if entryKey != "" {
|
|
if _, dup := seen[entryKey]; dup {
|
|
continue
|
|
}
|
|
seen[entryKey] = struct{}{}
|
|
}
|
|
|
|
// Time filter.
|
|
created := parseRedfishEntryTime(asString(entry["Created"]))
|
|
if !created.IsZero() && created.Before(cutoff) {
|
|
hitOldEntry = true
|
|
continue
|
|
}
|
|
|
|
// Hardware relevance filter.
|
|
if !isHardwareLogEntry(entry) {
|
|
continue
|
|
}
|
|
|
|
out = append(out, entry)
|
|
}
|
|
|
|
// Stop paging once we've seen entries older than the window.
|
|
if hitOldEntry {
|
|
break
|
|
}
|
|
nextPath = firstNonEmpty(
|
|
normalizeRedfishPath(asString(collection["Members@odata.nextLink"])),
|
|
normalizeRedfishPath(asString(collection["@odata.nextLink"])),
|
|
)
|
|
}
|
|
return out
|
|
}
|
|
|
|
// isManagerSELService returns true only for the IPMI SEL exposed under Managers.
|
|
// On AMI/MSI BMCs the hardware SEL lives at Managers/{mgr}/LogServices/SEL.
|
|
// All other Manager log services (AuditLog, EventLog, Journal) are excluded.
|
|
func isManagerSELService(svc map[string]interface{}) bool {
|
|
id := strings.ToLower(strings.TrimSpace(asString(svc["Id"])))
|
|
return id == "sel"
|
|
}
|
|
|
|
// isHardwareLogService returns true if the log service looks like a hardware event log
|
|
// (SEL, System Event Log) rather than a BMC audit/journal log.
|
|
func isHardwareLogService(svc map[string]interface{}) bool {
|
|
id := strings.ToLower(strings.TrimSpace(asString(svc["Id"])))
|
|
name := strings.ToLower(strings.TrimSpace(asString(svc["Name"])))
|
|
for _, skip := range []string{"audit", "journal", "bmc", "security", "manager", "debug"} {
|
|
if strings.Contains(id, skip) || strings.Contains(name, skip) {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
// redfishLogServiceEntriesPath returns the Entries collection path for a LogService document.
|
|
func redfishLogServiceEntriesPath(svc map[string]interface{}) string {
|
|
if entriesLink, ok := svc["Entries"].(map[string]interface{}); ok {
|
|
if p := normalizeRedfishPath(asString(entriesLink["@odata.id"])); p != "" {
|
|
return p
|
|
}
|
|
}
|
|
if id := normalizeRedfishPath(asString(svc["@odata.id"])); id != "" {
|
|
return joinPath(id, "/Entries")
|
|
}
|
|
return ""
|
|
}
|
|
|
|
// isHardwareLogEntry returns true if the log entry is hardware-related.
|
|
// Audit, authentication, and session events are excluded.
|
|
func isHardwareLogEntry(entry map[string]interface{}) bool {
|
|
entryType := strings.TrimSpace(asString(entry["EntryType"]))
|
|
if strings.EqualFold(entryType, "Oem") {
|
|
return false
|
|
}
|
|
|
|
msgID := strings.ToLower(strings.TrimSpace(asString(entry["MessageId"])))
|
|
for _, skip := range []string{
|
|
"user", "account", "password", "login", "logon", "session",
|
|
"auth", "certificate", "security", "credential", "privilege",
|
|
} {
|
|
if strings.Contains(msgID, skip) {
|
|
return false
|
|
}
|
|
}
|
|
// Also check the human-readable message for obvious audit patterns.
|
|
msg := strings.ToLower(strings.TrimSpace(asString(entry["Message"])))
|
|
for _, skip := range []string{"logged in", "logged out", "log in", "log out", "sign in", "signed in"} {
|
|
if strings.Contains(msg, skip) {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
// parseRedfishEntryTime parses a Redfish LogEntry Created timestamp (ISO 8601 / RFC 3339).
|
|
func parseRedfishEntryTime(raw string) time.Time {
|
|
raw = strings.TrimSpace(raw)
|
|
if raw == "" {
|
|
return time.Time{}
|
|
}
|
|
for _, layout := range []string{time.RFC3339, time.RFC3339Nano, "2006-01-02T15:04:05Z07:00"} {
|
|
if t, err := time.Parse(layout, raw); err == nil {
|
|
return t.UTC()
|
|
}
|
|
}
|
|
return time.Time{}
|
|
}
|
|
|
|
// parseRedfishLogEntries converts raw log entries stored in RawPayloads into models.Event slice.
|
|
// Called during Redfish replay for both live and offline (archive) collections.
|
|
func parseRedfishLogEntries(rawPayloads map[string]any, collectedAt time.Time) []models.Event {
|
|
raw, ok := rawPayloads["redfish_log_entries"]
|
|
if !ok {
|
|
return nil
|
|
}
|
|
|
|
var entries []map[string]interface{}
|
|
switch v := raw.(type) {
|
|
case []map[string]interface{}:
|
|
entries = v
|
|
case []interface{}:
|
|
for _, item := range v {
|
|
if m, ok := item.(map[string]interface{}); ok {
|
|
entries = append(entries, m)
|
|
}
|
|
}
|
|
default:
|
|
return nil
|
|
}
|
|
|
|
if len(entries) == 0 {
|
|
return nil
|
|
}
|
|
|
|
out := make([]models.Event, 0, len(entries))
|
|
for _, entry := range entries {
|
|
ev := redfishLogEntryToEvent(entry, collectedAt)
|
|
if ev == nil {
|
|
continue
|
|
}
|
|
out = append(out, *ev)
|
|
}
|
|
return out
|
|
}
|
|
|
|
// redfishLogEntryToEvent converts a single Redfish LogEntry document to models.Event.
|
|
func redfishLogEntryToEvent(entry map[string]interface{}, collectedAt time.Time) *models.Event {
|
|
// Prefer EventTimestamp (actual hardware event time) over Created (Redfish record creation time).
|
|
ts := parseRedfishEntryTime(asString(entry["EventTimestamp"]))
|
|
if ts.IsZero() {
|
|
ts = parseRedfishEntryTime(asString(entry["Created"]))
|
|
}
|
|
if ts.IsZero() {
|
|
ts = collectedAt
|
|
}
|
|
|
|
severity := redfishLogEntrySeverity(entry)
|
|
sensorType := strings.TrimSpace(asString(entry["SensorType"]))
|
|
messageID := strings.TrimSpace(asString(entry["MessageId"]))
|
|
entryType := strings.TrimSpace(asString(entry["EntryType"]))
|
|
entryCode := strings.TrimSpace(asString(entry["EntryCode"]))
|
|
|
|
// SensorName: prefer "Name", fall back to "SensorNumber" + SensorType.
|
|
sensorName := strings.TrimSpace(asString(entry["Name"]))
|
|
if sensorName == "" {
|
|
num := strings.TrimSpace(asString(entry["SensorNumber"]))
|
|
if num != "" && sensorType != "" {
|
|
sensorName = sensorType + " " + num
|
|
}
|
|
}
|
|
|
|
rawMessage := strings.TrimSpace(asString(entry["Message"]))
|
|
|
|
// AMI/MSI BMCs dump raw IPMI record fields into Message instead of human-readable text.
|
|
// Detect this and build a readable description from structured fields instead.
|
|
description, rawData := redfishDecodeMessage(rawMessage, sensorType, entryCode, entry)
|
|
if description == "" {
|
|
return nil
|
|
}
|
|
|
|
return &models.Event{
|
|
ID: messageID,
|
|
Timestamp: ts,
|
|
Source: "redfish",
|
|
SensorType: sensorType,
|
|
SensorName: sensorName,
|
|
EventType: entryType,
|
|
Severity: severity,
|
|
Description: description,
|
|
RawData: rawData,
|
|
}
|
|
}
|
|
|
|
// redfishDecodeMessage returns a human-readable description and optional raw data.
|
|
// AMI/MSI BMCs dump raw IPMI record fields into Message as "Key : Value, Key : Value, ..."
|
|
// instead of a plain human-readable string. We extract the useful decoded fields from it.
|
|
func redfishDecodeMessage(message, sensorType, entryCode string, entry map[string]interface{}) (description, rawData string) {
|
|
if !isRawIPMIDump(message) {
|
|
description = message
|
|
return
|
|
}
|
|
|
|
rawData = message
|
|
kv := parseIPMIDumpKV(message)
|
|
|
|
// Sensor_Type inside the dump is more specific than the top-level SensorType field.
|
|
if v := kv["Sensor_Type"]; v != "" {
|
|
sensorType = v
|
|
}
|
|
eventType := kv["Event_Type"] // human-readable IPMI event type, e.g. "Legacy OFF State"
|
|
|
|
var parts []string
|
|
if sensorType != "" {
|
|
parts = append(parts, sensorType)
|
|
}
|
|
if eventType != "" {
|
|
parts = append(parts, eventType)
|
|
} else if entryCode != "" {
|
|
parts = append(parts, entryCode)
|
|
}
|
|
description = strings.Join(parts, ": ")
|
|
return
|
|
}
|
|
|
|
// isRawIPMIDump returns true if the message is an AMI raw IPMI record dump.
|
|
func isRawIPMIDump(message string) bool {
|
|
return strings.Contains(message, "Event_Data_1 :") && strings.Contains(message, "Record_Type :")
|
|
}
|
|
|
|
// parseIPMIDumpKV parses the AMI "Key : Value, Key : Value, " format into a map.
|
|
func parseIPMIDumpKV(message string) map[string]string {
|
|
out := make(map[string]string)
|
|
for _, part := range strings.Split(message, ",") {
|
|
part = strings.TrimSpace(part)
|
|
idx := strings.Index(part, " : ")
|
|
if idx < 0 {
|
|
continue
|
|
}
|
|
k := strings.TrimSpace(part[:idx])
|
|
v := strings.TrimSpace(part[idx+3:])
|
|
if k != "" && v != "" {
|
|
out[k] = v
|
|
}
|
|
}
|
|
return out
|
|
}
|
|
|
|
// redfishLogEntrySeverity maps a Redfish LogEntry to models.Severity.
|
|
// AMI/MSI BMCs often set Severity="OK" on all SEL records regardless of content,
|
|
// so we fall back to inferring severity from SensorType when the explicit field is unhelpful.
|
|
func redfishLogEntrySeverity(entry map[string]interface{}) models.Severity {
|
|
// Newer Redfish uses MessageSeverity; older uses Severity.
|
|
raw := strings.ToLower(firstNonEmpty(
|
|
strings.TrimSpace(asString(entry["MessageSeverity"])),
|
|
strings.TrimSpace(asString(entry["Severity"])),
|
|
))
|
|
switch raw {
|
|
case "critical":
|
|
return models.SeverityCritical
|
|
case "warning":
|
|
return models.SeverityWarning
|
|
case "ok", "informational", "":
|
|
// BMC didn't set a meaningful severity — infer from SensorType.
|
|
return redfishSeverityFromSensorType(strings.TrimSpace(asString(entry["SensorType"])))
|
|
default:
|
|
return models.SeverityInfo
|
|
}
|
|
}
|
|
|
|
// redfishSeverityFromSensorType infers event severity from the IPMI/Redfish SensorType string.
|
|
func redfishSeverityFromSensorType(sensorType string) models.Severity {
|
|
switch strings.ToLower(sensorType) {
|
|
case "critical interrupt", "processor", "memory", "power unit",
|
|
"power supply", "drive slot", "system firmware progress":
|
|
return models.SeverityWarning
|
|
default:
|
|
return models.SeverityInfo
|
|
}
|
|
}
|