collector/redfish: improve GPU SN/model fallback and warnings
This commit is contained in:
@@ -4,6 +4,7 @@ import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"git.mchus.pro/mchus/logpile/internal/models"
|
||||
)
|
||||
@@ -35,6 +36,7 @@ func ReplayRedfishFromRawPayloads(rawPayloads map[string]any, emit ProgressFn) (
|
||||
chassisPaths := r.discoverMemberPaths("/redfish/v1/Chassis", "/redfish/v1/Chassis/1")
|
||||
managerPaths := r.discoverMemberPaths("/redfish/v1/Managers", "/redfish/v1/Managers/1")
|
||||
primarySystem := firstPathOrDefault(systemPaths, "/redfish/v1/Systems/1")
|
||||
primaryChassis := firstPathOrDefault(chassisPaths, "/redfish/v1/Chassis/1")
|
||||
primaryManager := firstPathOrDefault(managerPaths, "/redfish/v1/Managers/1")
|
||||
|
||||
if emit != nil {
|
||||
@@ -44,8 +46,15 @@ func ReplayRedfishFromRawPayloads(rawPayloads map[string]any, emit ProgressFn) (
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("system info: %w", err)
|
||||
}
|
||||
chassisDoc, _ := r.getJSON(primaryChassis)
|
||||
biosDoc, _ := r.getJSON(joinPath(primarySystem, "/Bios"))
|
||||
secureBootDoc, _ := r.getJSON(joinPath(primarySystem, "/SecureBoot"))
|
||||
systemFRUDoc, _ := r.getJSON(joinPath(primarySystem, "/Oem/Public/FRU"))
|
||||
chassisFRUDoc, _ := r.getJSON(joinPath(primaryChassis, "/Oem/Public/FRU"))
|
||||
fruDoc := systemFRUDoc
|
||||
if len(fruDoc) == 0 {
|
||||
fruDoc = chassisFRUDoc
|
||||
}
|
||||
|
||||
if emit != nil {
|
||||
emit(Progress{Status: "running", Progress: 55, Message: "Redfish snapshot: replay CPU/RAM/Storage..."})
|
||||
@@ -71,7 +80,7 @@ func ReplayRedfishFromRawPayloads(rawPayloads map[string]any, emit ProgressFn) (
|
||||
Sensors: make([]models.SensorReading, 0),
|
||||
RawPayloads: cloneRawPayloads(rawPayloads),
|
||||
Hardware: &models.HardwareConfig{
|
||||
BoardInfo: parseBoardInfo(systemDoc),
|
||||
BoardInfo: parseBoardInfoWithFallback(systemDoc, chassisDoc, fruDoc),
|
||||
CPUs: parseCPUs(processors),
|
||||
Memory: parseMemory(memory),
|
||||
Storage: storageDevices,
|
||||
@@ -83,9 +92,72 @@ func ReplayRedfishFromRawPayloads(rawPayloads map[string]any, emit ProgressFn) (
|
||||
Firmware: parseFirmware(systemDoc, biosDoc, managerDoc, secureBootDoc, networkProtocolDoc),
|
||||
},
|
||||
}
|
||||
appendMissingServerModelWarning(result, systemDoc, joinPath(primarySystem, "/Oem/Public/FRU"), joinPath(primaryChassis, "/Oem/Public/FRU"))
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func appendMissingServerModelWarning(result *models.AnalysisResult, systemDoc map[string]interface{}, systemFRUPath, chassisFRUPath string) {
|
||||
if result == nil || result.Hardware == nil {
|
||||
return
|
||||
}
|
||||
if strings.TrimSpace(result.Hardware.BoardInfo.ProductName) != "" {
|
||||
return
|
||||
}
|
||||
|
||||
reasons := make([]string, 0, 3)
|
||||
systemModelRaw := strings.TrimSpace(asString(systemDoc["Model"]))
|
||||
if systemModelRaw != "" && normalizeRedfishIdentityField(systemModelRaw) == "" {
|
||||
reasons = append(reasons, fmt.Sprintf("system model is placeholder: %q", systemModelRaw))
|
||||
}
|
||||
|
||||
errs := redfishFetchErrorsFromRawPayloads(result.RawPayloads)
|
||||
if msg := errs[normalizeRedfishPath(systemFRUPath)]; strings.TrimSpace(msg) != "" {
|
||||
reasons = append(reasons, fmt.Sprintf("%s unavailable: %s", systemFRUPath, msg))
|
||||
}
|
||||
if msg := errs[normalizeRedfishPath(chassisFRUPath)]; strings.TrimSpace(msg) != "" {
|
||||
reasons = append(reasons, fmt.Sprintf("%s unavailable: %s", chassisFRUPath, msg))
|
||||
}
|
||||
if len(reasons) == 0 {
|
||||
reasons = append(reasons, "no non-placeholder ProductName/Model found in collected Redfish documents")
|
||||
}
|
||||
|
||||
result.Events = append(result.Events, models.Event{
|
||||
Timestamp: time.Now(),
|
||||
Source: "Redfish",
|
||||
EventType: "Collection Warning",
|
||||
Severity: models.SeverityWarning,
|
||||
Description: "Server model is missing in collected Redfish data",
|
||||
RawData: strings.Join(reasons, "; "),
|
||||
})
|
||||
}
|
||||
|
||||
func redfishFetchErrorsFromRawPayloads(rawPayloads map[string]any) map[string]string {
|
||||
out := make(map[string]string)
|
||||
if len(rawPayloads) == 0 {
|
||||
return out
|
||||
}
|
||||
raw, ok := rawPayloads["redfish_fetch_errors"]
|
||||
if !ok {
|
||||
return out
|
||||
}
|
||||
switch list := raw.(type) {
|
||||
case []map[string]interface{}:
|
||||
return redfishFetchErrorListToMap(list)
|
||||
case []interface{}:
|
||||
normalized := make([]map[string]interface{}, 0, len(list))
|
||||
for _, item := range list {
|
||||
m, ok := item.(map[string]interface{})
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
normalized = append(normalized, m)
|
||||
}
|
||||
return redfishFetchErrorListToMap(normalized)
|
||||
default:
|
||||
return out
|
||||
}
|
||||
}
|
||||
|
||||
type redfishSnapshotReader struct {
|
||||
tree map[string]interface{}
|
||||
}
|
||||
@@ -479,13 +551,15 @@ func (r redfishSnapshotReader) collectPSUs(chassisPaths []string) []models.PSU {
|
||||
}
|
||||
|
||||
func (r redfishSnapshotReader) collectGPUs(systemPaths, chassisPaths []string) []models.GPU {
|
||||
collections := make([]string, 0, len(systemPaths)*2+len(chassisPaths))
|
||||
collections := make([]string, 0, len(systemPaths)*3+len(chassisPaths)*2)
|
||||
for _, systemPath := range systemPaths {
|
||||
collections = append(collections, joinPath(systemPath, "/PCIeDevices"))
|
||||
collections = append(collections, joinPath(systemPath, "/Accelerators"))
|
||||
collections = append(collections, joinPath(systemPath, "/GraphicsControllers"))
|
||||
}
|
||||
for _, chassisPath := range chassisPaths {
|
||||
collections = append(collections, joinPath(chassisPath, "/PCIeDevices"))
|
||||
collections = append(collections, joinPath(chassisPath, "/Accelerators"))
|
||||
}
|
||||
var out []models.GPU
|
||||
seen := make(map[string]struct{})
|
||||
@@ -502,7 +576,7 @@ func (r redfishSnapshotReader) collectGPUs(systemPaths, chassisPaths []string) [
|
||||
}
|
||||
gpu := parseGPU(doc, functionDocs, idx)
|
||||
idx++
|
||||
key := firstNonEmpty(gpu.SerialNumber, gpu.BDF, gpu.Slot+"|"+gpu.Model)
|
||||
key := gpuDedupKey(gpu)
|
||||
if key == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user