Files
logpile/internal/collector/redfish_logentries.go
Mikhail Chusavitin 96e65d8f65 feat: Redfish hardware event log collection + MSI ghost GPU filter + inventory improvements
- Collect hardware event logs (last 7 days) from Systems and Managers/SEL LogServices
- Parse AMI raw IPMI dump messages into readable descriptions (Sensor_Type: Event_Type)
- Filter out audit/journal/non-hardware log services; only SEL from Managers
- MSI ghost GPU filter: exclude processor GPU entries with temperature=0 when host is powered on
- Reanimator collected_at uses InventoryData/Status.LastModifiedTime (30-day fallback)
- Invalidate Redfish inventory CRC groups before host power-on
- Log inventory LastModifiedTime age in collection logs
- Drop SecureBoot collection (SecureBootMode, SecureBootDatabases) — not hardware inventory
- Add build version to UI footer via template
- Add MSI Redfish API reference doc to bible-local/docs/

ADL-032–ADL-035

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-18 23:47:22 +03:00

393 lines
12 KiB
Go

package collector
import (
"context"
"log"
"net/http"
"strings"
"time"
"git.mchus.pro/mchus/logpile/internal/models"
)
const (
redfishLogEntriesWindow = 7 * 24 * time.Hour
redfishLogEntriesMaxTotal = 500
redfishLogEntriesMaxPerSvc = 200
)
// collectRedfishLogEntries fetches hardware event log entries from Systems and Managers LogServices.
// Only hardware-relevant entries from the last 7 days are returned.
// For Systems: all log services except audit/journal/security/debug.
// For Managers: only the IPMI SEL service (Id="SEL") — audit and event logs are excluded.
func (c *RedfishConnector) collectRedfishLogEntries(ctx context.Context, client *http.Client, req Request, baseURL string, systemPaths, managerPaths []string) []map[string]interface{} {
cutoff := time.Now().UTC().Add(-redfishLogEntriesWindow)
seen := make(map[string]struct{})
var out []map[string]interface{}
collectFrom := func(logServicesPath string, filter func(map[string]interface{}) bool) {
if len(out) >= redfishLogEntriesMaxTotal {
return
}
services, err := c.getCollectionMembers(ctx, client, req, baseURL, logServicesPath)
if err != nil || len(services) == 0 {
return
}
for _, svc := range services {
if len(out) >= redfishLogEntriesMaxTotal {
break
}
if !filter(svc) {
continue
}
entriesPath := redfishLogServiceEntriesPath(svc)
if entriesPath == "" {
continue
}
entries := c.fetchRedfishLogEntriesWithPaging(ctx, client, req, baseURL, entriesPath, cutoff, seen, redfishLogEntriesMaxPerSvc)
out = append(out, entries...)
}
}
for _, systemPath := range systemPaths {
collectFrom(joinPath(systemPath, "/LogServices"), isHardwareLogService)
}
// Managers hold the IPMI SEL on AMI/MSI BMCs — include only the "SEL" service.
for _, managerPath := range managerPaths {
collectFrom(joinPath(managerPath, "/LogServices"), isManagerSELService)
}
if len(out) > 0 {
log.Printf("redfish: collected %d hardware log entries (Systems+Managers SEL, window=7d)", len(out))
}
return out
}
// fetchRedfishLogEntriesWithPaging fetches entries from a LogEntry collection,
// following nextLink pages. Stops early when entries older than cutoff are encountered
// (assumes BMC returns entries newest-first, which is typical).
func (c *RedfishConnector) fetchRedfishLogEntriesWithPaging(ctx context.Context, client *http.Client, req Request, baseURL, entriesPath string, cutoff time.Time, seen map[string]struct{}, limit int) []map[string]interface{} {
var out []map[string]interface{}
nextPath := entriesPath
for nextPath != "" && len(out) < limit {
collection, err := c.getJSON(ctx, client, req, baseURL, nextPath)
if err != nil {
break
}
// Handle both linked members (@odata.id only) and inline members (full objects).
rawMembers, _ := collection["Members"].([]interface{})
hitOldEntry := false
for _, rawMember := range rawMembers {
if len(out) >= limit {
break
}
memberMap, ok := rawMember.(map[string]interface{})
if !ok {
continue
}
var entry map[string]interface{}
if _, hasCreated := memberMap["Created"]; hasCreated {
// Inline entry — use directly.
entry = memberMap
} else {
// Linked entry — fetch by path.
memberPath := normalizeRedfishPath(asString(memberMap["@odata.id"]))
if memberPath == "" {
continue
}
entry, err = c.getJSON(ctx, client, req, baseURL, memberPath)
if err != nil || len(entry) == 0 {
continue
}
}
// Dedup by entry Id or path.
entryKey := asString(entry["Id"])
if entryKey == "" {
entryKey = asString(entry["@odata.id"])
}
if entryKey != "" {
if _, dup := seen[entryKey]; dup {
continue
}
seen[entryKey] = struct{}{}
}
// Time filter.
created := parseRedfishEntryTime(asString(entry["Created"]))
if !created.IsZero() && created.Before(cutoff) {
hitOldEntry = true
continue
}
// Hardware relevance filter.
if !isHardwareLogEntry(entry) {
continue
}
out = append(out, entry)
}
// Stop paging once we've seen entries older than the window.
if hitOldEntry {
break
}
nextPath = firstNonEmpty(
normalizeRedfishPath(asString(collection["Members@odata.nextLink"])),
normalizeRedfishPath(asString(collection["@odata.nextLink"])),
)
}
return out
}
// isManagerSELService returns true only for the IPMI SEL exposed under Managers.
// On AMI/MSI BMCs the hardware SEL lives at Managers/{mgr}/LogServices/SEL.
// All other Manager log services (AuditLog, EventLog, Journal) are excluded.
func isManagerSELService(svc map[string]interface{}) bool {
id := strings.ToLower(strings.TrimSpace(asString(svc["Id"])))
return id == "sel"
}
// isHardwareLogService returns true if the log service looks like a hardware event log
// (SEL, System Event Log) rather than a BMC audit/journal log.
func isHardwareLogService(svc map[string]interface{}) bool {
id := strings.ToLower(strings.TrimSpace(asString(svc["Id"])))
name := strings.ToLower(strings.TrimSpace(asString(svc["Name"])))
for _, skip := range []string{"audit", "journal", "bmc", "security", "manager", "debug"} {
if strings.Contains(id, skip) || strings.Contains(name, skip) {
return false
}
}
return true
}
// redfishLogServiceEntriesPath returns the Entries collection path for a LogService document.
func redfishLogServiceEntriesPath(svc map[string]interface{}) string {
if entriesLink, ok := svc["Entries"].(map[string]interface{}); ok {
if p := normalizeRedfishPath(asString(entriesLink["@odata.id"])); p != "" {
return p
}
}
if id := normalizeRedfishPath(asString(svc["@odata.id"])); id != "" {
return joinPath(id, "/Entries")
}
return ""
}
// isHardwareLogEntry returns true if the log entry is hardware-related.
// Audit, authentication, and session events are excluded.
func isHardwareLogEntry(entry map[string]interface{}) bool {
entryType := strings.TrimSpace(asString(entry["EntryType"]))
if strings.EqualFold(entryType, "Oem") {
return false
}
msgID := strings.ToLower(strings.TrimSpace(asString(entry["MessageId"])))
for _, skip := range []string{
"user", "account", "password", "login", "logon", "session",
"auth", "certificate", "security", "credential", "privilege",
} {
if strings.Contains(msgID, skip) {
return false
}
}
// Also check the human-readable message for obvious audit patterns.
msg := strings.ToLower(strings.TrimSpace(asString(entry["Message"])))
for _, skip := range []string{"logged in", "logged out", "log in", "log out", "sign in", "signed in"} {
if strings.Contains(msg, skip) {
return false
}
}
return true
}
// parseRedfishEntryTime parses a Redfish LogEntry Created timestamp (ISO 8601 / RFC 3339).
func parseRedfishEntryTime(raw string) time.Time {
raw = strings.TrimSpace(raw)
if raw == "" {
return time.Time{}
}
for _, layout := range []string{time.RFC3339, time.RFC3339Nano, "2006-01-02T15:04:05Z07:00"} {
if t, err := time.Parse(layout, raw); err == nil {
return t.UTC()
}
}
return time.Time{}
}
// parseRedfishLogEntries converts raw log entries stored in RawPayloads into models.Event slice.
// Called during Redfish replay for both live and offline (archive) collections.
func parseRedfishLogEntries(rawPayloads map[string]any, collectedAt time.Time) []models.Event {
raw, ok := rawPayloads["redfish_log_entries"]
if !ok {
return nil
}
var entries []map[string]interface{}
switch v := raw.(type) {
case []map[string]interface{}:
entries = v
case []interface{}:
for _, item := range v {
if m, ok := item.(map[string]interface{}); ok {
entries = append(entries, m)
}
}
default:
return nil
}
if len(entries) == 0 {
return nil
}
out := make([]models.Event, 0, len(entries))
for _, entry := range entries {
ev := redfishLogEntryToEvent(entry, collectedAt)
if ev == nil {
continue
}
out = append(out, *ev)
}
return out
}
// redfishLogEntryToEvent converts a single Redfish LogEntry document to models.Event.
func redfishLogEntryToEvent(entry map[string]interface{}, collectedAt time.Time) *models.Event {
// Prefer EventTimestamp (actual hardware event time) over Created (Redfish record creation time).
ts := parseRedfishEntryTime(asString(entry["EventTimestamp"]))
if ts.IsZero() {
ts = parseRedfishEntryTime(asString(entry["Created"]))
}
if ts.IsZero() {
ts = collectedAt
}
severity := redfishLogEntrySeverity(entry)
sensorType := strings.TrimSpace(asString(entry["SensorType"]))
messageID := strings.TrimSpace(asString(entry["MessageId"]))
entryType := strings.TrimSpace(asString(entry["EntryType"]))
entryCode := strings.TrimSpace(asString(entry["EntryCode"]))
// SensorName: prefer "Name", fall back to "SensorNumber" + SensorType.
sensorName := strings.TrimSpace(asString(entry["Name"]))
if sensorName == "" {
num := strings.TrimSpace(asString(entry["SensorNumber"]))
if num != "" && sensorType != "" {
sensorName = sensorType + " " + num
}
}
rawMessage := strings.TrimSpace(asString(entry["Message"]))
// AMI/MSI BMCs dump raw IPMI record fields into Message instead of human-readable text.
// Detect this and build a readable description from structured fields instead.
description, rawData := redfishDecodeMessage(rawMessage, sensorType, entryCode, entry)
if description == "" {
return nil
}
return &models.Event{
ID: messageID,
Timestamp: ts,
Source: "redfish",
SensorType: sensorType,
SensorName: sensorName,
EventType: entryType,
Severity: severity,
Description: description,
RawData: rawData,
}
}
// redfishDecodeMessage returns a human-readable description and optional raw data.
// AMI/MSI BMCs dump raw IPMI record fields into Message as "Key : Value, Key : Value, ..."
// instead of a plain human-readable string. We extract the useful decoded fields from it.
func redfishDecodeMessage(message, sensorType, entryCode string, entry map[string]interface{}) (description, rawData string) {
if !isRawIPMIDump(message) {
description = message
return
}
rawData = message
kv := parseIPMIDumpKV(message)
// Sensor_Type inside the dump is more specific than the top-level SensorType field.
if v := kv["Sensor_Type"]; v != "" {
sensorType = v
}
eventType := kv["Event_Type"] // human-readable IPMI event type, e.g. "Legacy OFF State"
var parts []string
if sensorType != "" {
parts = append(parts, sensorType)
}
if eventType != "" {
parts = append(parts, eventType)
} else if entryCode != "" {
parts = append(parts, entryCode)
}
description = strings.Join(parts, ": ")
return
}
// isRawIPMIDump returns true if the message is an AMI raw IPMI record dump.
func isRawIPMIDump(message string) bool {
return strings.Contains(message, "Event_Data_1 :") && strings.Contains(message, "Record_Type :")
}
// parseIPMIDumpKV parses the AMI "Key : Value, Key : Value, " format into a map.
func parseIPMIDumpKV(message string) map[string]string {
out := make(map[string]string)
for _, part := range strings.Split(message, ",") {
part = strings.TrimSpace(part)
idx := strings.Index(part, " : ")
if idx < 0 {
continue
}
k := strings.TrimSpace(part[:idx])
v := strings.TrimSpace(part[idx+3:])
if k != "" && v != "" {
out[k] = v
}
}
return out
}
// redfishLogEntrySeverity maps a Redfish LogEntry to models.Severity.
// AMI/MSI BMCs often set Severity="OK" on all SEL records regardless of content,
// so we fall back to inferring severity from SensorType when the explicit field is unhelpful.
func redfishLogEntrySeverity(entry map[string]interface{}) models.Severity {
// Newer Redfish uses MessageSeverity; older uses Severity.
raw := strings.ToLower(firstNonEmpty(
strings.TrimSpace(asString(entry["MessageSeverity"])),
strings.TrimSpace(asString(entry["Severity"])),
))
switch raw {
case "critical":
return models.SeverityCritical
case "warning":
return models.SeverityWarning
case "ok", "informational", "":
// BMC didn't set a meaningful severity — infer from SensorType.
return redfishSeverityFromSensorType(strings.TrimSpace(asString(entry["SensorType"])))
default:
return models.SeverityInfo
}
}
// redfishSeverityFromSensorType infers event severity from the IPMI/Redfish SensorType string.
func redfishSeverityFromSensorType(sensorType string) models.Severity {
switch strings.ToLower(sensorType) {
case "critical interrupt", "processor", "memory", "power unit",
"power supply", "drive slot", "system firmware progress":
return models.SeverityWarning
default:
return models.SeverityInfo
}
}