collector/redfish: improve GPU SN/model fallback and warnings

This commit is contained in:
2026-02-28 12:52:22 +03:00
parent ddab93a5ee
commit 9aadf2f1e9
3 changed files with 436 additions and 11 deletions

View File

@@ -4,6 +4,7 @@ import (
"fmt"
"sort"
"strings"
"time"
"git.mchus.pro/mchus/logpile/internal/models"
)
@@ -35,6 +36,7 @@ func ReplayRedfishFromRawPayloads(rawPayloads map[string]any, emit ProgressFn) (
chassisPaths := r.discoverMemberPaths("/redfish/v1/Chassis", "/redfish/v1/Chassis/1")
managerPaths := r.discoverMemberPaths("/redfish/v1/Managers", "/redfish/v1/Managers/1")
primarySystem := firstPathOrDefault(systemPaths, "/redfish/v1/Systems/1")
primaryChassis := firstPathOrDefault(chassisPaths, "/redfish/v1/Chassis/1")
primaryManager := firstPathOrDefault(managerPaths, "/redfish/v1/Managers/1")
if emit != nil {
@@ -44,8 +46,15 @@ func ReplayRedfishFromRawPayloads(rawPayloads map[string]any, emit ProgressFn) (
if err != nil {
return nil, fmt.Errorf("system info: %w", err)
}
chassisDoc, _ := r.getJSON(primaryChassis)
biosDoc, _ := r.getJSON(joinPath(primarySystem, "/Bios"))
secureBootDoc, _ := r.getJSON(joinPath(primarySystem, "/SecureBoot"))
systemFRUDoc, _ := r.getJSON(joinPath(primarySystem, "/Oem/Public/FRU"))
chassisFRUDoc, _ := r.getJSON(joinPath(primaryChassis, "/Oem/Public/FRU"))
fruDoc := systemFRUDoc
if len(fruDoc) == 0 {
fruDoc = chassisFRUDoc
}
if emit != nil {
emit(Progress{Status: "running", Progress: 55, Message: "Redfish snapshot: replay CPU/RAM/Storage..."})
@@ -71,7 +80,7 @@ func ReplayRedfishFromRawPayloads(rawPayloads map[string]any, emit ProgressFn) (
Sensors: make([]models.SensorReading, 0),
RawPayloads: cloneRawPayloads(rawPayloads),
Hardware: &models.HardwareConfig{
BoardInfo: parseBoardInfo(systemDoc),
BoardInfo: parseBoardInfoWithFallback(systemDoc, chassisDoc, fruDoc),
CPUs: parseCPUs(processors),
Memory: parseMemory(memory),
Storage: storageDevices,
@@ -83,9 +92,72 @@ func ReplayRedfishFromRawPayloads(rawPayloads map[string]any, emit ProgressFn) (
Firmware: parseFirmware(systemDoc, biosDoc, managerDoc, secureBootDoc, networkProtocolDoc),
},
}
appendMissingServerModelWarning(result, systemDoc, joinPath(primarySystem, "/Oem/Public/FRU"), joinPath(primaryChassis, "/Oem/Public/FRU"))
return result, nil
}
func appendMissingServerModelWarning(result *models.AnalysisResult, systemDoc map[string]interface{}, systemFRUPath, chassisFRUPath string) {
if result == nil || result.Hardware == nil {
return
}
if strings.TrimSpace(result.Hardware.BoardInfo.ProductName) != "" {
return
}
reasons := make([]string, 0, 3)
systemModelRaw := strings.TrimSpace(asString(systemDoc["Model"]))
if systemModelRaw != "" && normalizeRedfishIdentityField(systemModelRaw) == "" {
reasons = append(reasons, fmt.Sprintf("system model is placeholder: %q", systemModelRaw))
}
errs := redfishFetchErrorsFromRawPayloads(result.RawPayloads)
if msg := errs[normalizeRedfishPath(systemFRUPath)]; strings.TrimSpace(msg) != "" {
reasons = append(reasons, fmt.Sprintf("%s unavailable: %s", systemFRUPath, msg))
}
if msg := errs[normalizeRedfishPath(chassisFRUPath)]; strings.TrimSpace(msg) != "" {
reasons = append(reasons, fmt.Sprintf("%s unavailable: %s", chassisFRUPath, msg))
}
if len(reasons) == 0 {
reasons = append(reasons, "no non-placeholder ProductName/Model found in collected Redfish documents")
}
result.Events = append(result.Events, models.Event{
Timestamp: time.Now(),
Source: "Redfish",
EventType: "Collection Warning",
Severity: models.SeverityWarning,
Description: "Server model is missing in collected Redfish data",
RawData: strings.Join(reasons, "; "),
})
}
func redfishFetchErrorsFromRawPayloads(rawPayloads map[string]any) map[string]string {
out := make(map[string]string)
if len(rawPayloads) == 0 {
return out
}
raw, ok := rawPayloads["redfish_fetch_errors"]
if !ok {
return out
}
switch list := raw.(type) {
case []map[string]interface{}:
return redfishFetchErrorListToMap(list)
case []interface{}:
normalized := make([]map[string]interface{}, 0, len(list))
for _, item := range list {
m, ok := item.(map[string]interface{})
if !ok {
continue
}
normalized = append(normalized, m)
}
return redfishFetchErrorListToMap(normalized)
default:
return out
}
}
type redfishSnapshotReader struct {
tree map[string]interface{}
}
@@ -479,13 +551,15 @@ func (r redfishSnapshotReader) collectPSUs(chassisPaths []string) []models.PSU {
}
func (r redfishSnapshotReader) collectGPUs(systemPaths, chassisPaths []string) []models.GPU {
collections := make([]string, 0, len(systemPaths)*2+len(chassisPaths))
collections := make([]string, 0, len(systemPaths)*3+len(chassisPaths)*2)
for _, systemPath := range systemPaths {
collections = append(collections, joinPath(systemPath, "/PCIeDevices"))
collections = append(collections, joinPath(systemPath, "/Accelerators"))
collections = append(collections, joinPath(systemPath, "/GraphicsControllers"))
}
for _, chassisPath := range chassisPaths {
collections = append(collections, joinPath(chassisPath, "/PCIeDevices"))
collections = append(collections, joinPath(chassisPath, "/Accelerators"))
}
var out []models.GPU
seen := make(map[string]struct{})
@@ -502,7 +576,7 @@ func (r redfishSnapshotReader) collectGPUs(systemPaths, chassisPaths []string) [
}
gpu := parseGPU(doc, functionDocs, idx)
idx++
key := firstNonEmpty(gpu.SerialNumber, gpu.BDF, gpu.Slot+"|"+gpu.Model)
key := gpuDedupKey(gpu)
if key == "" {
continue
}