From 6c19a58b2400fccc3d70dcaecdaf1023d9a93998 Mon Sep 17 00:00:00 2001 From: Michael Chus Date: Sat, 28 Feb 2026 12:59:57 +0300 Subject: [PATCH] collector/redfish: expand endpoint coverage and timestamp collect logs --- internal/collector/redfish.go | 49 ++++ internal/collector/redfish_replay.go | 336 ++++++++++++++++++++++++++- internal/server/job_manager.go | 17 +- 3 files changed, 396 insertions(+), 6 deletions(-) diff --git a/internal/collector/redfish.go b/internal/collector/redfish.go index 96b1f7c..f26c0f5 100644 --- a/internal/collector/redfish.go +++ b/internal/collector/redfish.go @@ -974,6 +974,12 @@ func redfishCriticalEndpoints(systemPaths, chassisPaths, managerPaths []string) add(joinPath(p, "/Power")) add(joinPath(p, "/Thermal")) add(joinPath(p, "/Sensors")) + add(joinPath(p, "/HealthSummary")) + add(joinPath(p, "/ThresholdSensors")) + add(joinPath(p, "/DiscreteSensors")) + add(joinPath(p, "/Boards")) + add(joinPath(p, "/Backplanes")) + add(joinPath(p, "/Assembly")) add(joinPath(p, "/NetworkAdapters")) add(joinPath(p, "/PCIeDevices")) add(joinPath(p, "/Accelerators")) @@ -1796,6 +1802,9 @@ func parseGPU(doc map[string]interface{}, functionDocs []map[string]interface{}, if bdf := asString(doc["BDF"]); bdf != "" { gpu.BDF = bdf } + if gpu.BDF == "" { + gpu.BDF = buildBDFfromOemPublic(doc) + } if gpu.VendorID == 0 { gpu.VendorID = asHexOrInt(doc["VendorId"]) } @@ -1850,6 +1859,9 @@ func parsePCIeDevice(doc map[string]interface{}, functionDocs []map[string]inter VendorID: asHexOrInt(doc["VendorId"]), DeviceID: asHexOrInt(doc["DeviceId"]), } + if strings.TrimSpace(dev.BDF) == "" { + dev.BDF = buildBDFfromOemPublic(doc) + } for _, fn := range functionDocs { if dev.BDF == "" { @@ -1969,6 +1981,36 @@ func isGenericPCIeClassLabel(v string) bool { } } +func buildBDFfromOemPublic(doc map[string]interface{}) string { + if len(doc) == 0 { + return "" + } + oem, ok := doc["Oem"].(map[string]interface{}) + if !ok { + return "" + } + public, ok := oem["Public"].(map[string]interface{}) + if !ok { + return "" + } + + bus := asHexOrInt(public["BusNumber"]) + dev := asHexOrInt(public["DeviceNumber"]) + fn := asHexOrInt(public["FunctionNumber"]) + if bus < 0 || dev < 0 || fn < 0 { + return "" + } + segment := asHexOrInt(public["Segment"]) + if segment < 0 { + segment = 0 + } + // Require at least bus + dev numbers to avoid inventing meaningless BDFs. + if bus == 0 && dev == 0 && fn == 0 { + return "" + } + return fmt.Sprintf("%04x:%02x:%02x.%x", segment, bus, dev, fn) +} + func normalizeRedfishIdentityField(v string) string { v = strings.TrimSpace(v) if v == "" { @@ -2514,6 +2556,7 @@ func redfishSnapshotPrioritySeeds(systemPaths, chassisPaths, managerPaths []stri add(joinPath(p, "/PCIeDevices")) add(joinPath(p, "/PCIeFunctions")) add(joinPath(p, "/Accelerators")) + add(joinPath(p, "/GraphicsControllers")) add(joinPath(p, "/Storage")) add(joinPath(p, "/SimpleStorage")) add(joinPath(p, "/Storage/IntelVROC")) @@ -2524,6 +2567,12 @@ func redfishSnapshotPrioritySeeds(systemPaths, chassisPaths, managerPaths []stri add(p) add(joinPath(p, "/Oem/Public/FRU")) add(joinPath(p, "/Sensors")) + add(joinPath(p, "/HealthSummary")) + add(joinPath(p, "/ThresholdSensors")) + add(joinPath(p, "/DiscreteSensors")) + add(joinPath(p, "/Boards")) + add(joinPath(p, "/Backplanes")) + add(joinPath(p, "/Assembly")) add(joinPath(p, "/Thermal")) add(joinPath(p, "/EnvironmentMetrics")) add(joinPath(p, "/PCIeDevices")) diff --git a/internal/collector/redfish_replay.go b/internal/collector/redfish_replay.go index a68e61f..57792b4 100644 --- a/internal/collector/redfish_replay.go +++ b/internal/collector/redfish_replay.go @@ -1,6 +1,7 @@ package collector import ( + "encoding/json" "fmt" "sort" "strings" @@ -55,6 +56,7 @@ func ReplayRedfishFromRawPayloads(rawPayloads map[string]any, emit ProgressFn) ( if len(fruDoc) == 0 { fruDoc = chassisFRUDoc } + boardFallbackDocs := r.collectBoardFallbackDocs(chassisPaths) if emit != nil { emit(Progress{Status: "running", Progress: 55, Message: "Redfish snapshot: replay CPU/RAM/Storage..."}) @@ -71,16 +73,24 @@ func ReplayRedfishFromRawPayloads(rawPayloads map[string]any, emit ProgressFn) ( pcieDevices := r.collectPCIeDevices(systemPaths, chassisPaths) gpus := r.collectGPUs(systemPaths, chassisPaths) nics := r.collectNICs(chassisPaths) + r.enrichNICsFromNetworkInterfaces(&nics, systemPaths) + thresholdSensors := r.collectThresholdSensors(chassisPaths) + discreteEvents := r.collectDiscreteSensorEvents(chassisPaths) + healthEvents := r.collectHealthSummaryEvents(chassisPaths) managerDoc, _ := r.getJSON(primaryManager) networkProtocolDoc, _ := r.getJSON(joinPath(primaryManager, "/NetworkProtocol")) + firmware := parseFirmware(systemDoc, biosDoc, managerDoc, secureBootDoc, networkProtocolDoc) + firmware = dedupeFirmwareInfo(append(firmware, r.collectFirmwareInventory()...)) + boardInfo := parseBoardInfoWithFallback(systemDoc, chassisDoc, fruDoc) + applyBoardInfoFallbackFromDocs(&boardInfo, boardFallbackDocs) result := &models.AnalysisResult{ - Events: make([]models.Event, 0), + Events: append(append(make([]models.Event, 0, len(discreteEvents)+len(healthEvents)+1), healthEvents...), discreteEvents...), FRU: make([]models.FRUInfo, 0), - Sensors: make([]models.SensorReading, 0), + Sensors: thresholdSensors, RawPayloads: cloneRawPayloads(rawPayloads), Hardware: &models.HardwareConfig{ - BoardInfo: parseBoardInfoWithFallback(systemDoc, chassisDoc, fruDoc), + BoardInfo: boardInfo, CPUs: parseCPUs(processors), Memory: parseMemory(memory), Storage: storageDevices, @@ -89,7 +99,7 @@ func ReplayRedfishFromRawPayloads(rawPayloads map[string]any, emit ProgressFn) ( GPUs: gpus, PowerSupply: psus, NetworkAdapters: nics, - Firmware: parseFirmware(systemDoc, biosDoc, managerDoc, secureBootDoc, networkProtocolDoc), + Firmware: firmware, }, } appendMissingServerModelWarning(result, systemDoc, joinPath(primarySystem, "/Oem/Public/FRU"), joinPath(primaryChassis, "/Oem/Public/FRU")) @@ -158,6 +168,324 @@ func redfishFetchErrorsFromRawPayloads(rawPayloads map[string]any) map[string]st } } +func (r redfishSnapshotReader) collectFirmwareInventory() []models.FirmwareInfo { + docs, err := r.getCollectionMembers("/redfish/v1/UpdateService/FirmwareInventory") + if err != nil || len(docs) == 0 { + return nil + } + out := make([]models.FirmwareInfo, 0, len(docs)) + for _, doc := range docs { + version := firstNonEmpty( + asString(doc["Version"]), + asString(doc["FirmwareVersion"]), + asString(doc["SoftwareVersion"]), + ) + if strings.TrimSpace(version) == "" { + continue + } + name := firstNonEmpty( + asString(doc["DeviceName"]), + asString(doc["Name"]), + asString(doc["Id"]), + ) + if strings.TrimSpace(name) == "" { + continue + } + out = append(out, models.FirmwareInfo{DeviceName: name, Version: version}) + } + return out +} + +func dedupeFirmwareInfo(items []models.FirmwareInfo) []models.FirmwareInfo { + seen := make(map[string]struct{}, len(items)) + out := make([]models.FirmwareInfo, 0, len(items)) + for _, fw := range items { + name := strings.TrimSpace(fw.DeviceName) + ver := strings.TrimSpace(fw.Version) + if name == "" || ver == "" { + continue + } + key := strings.ToLower(name + "|" + ver) + if _, ok := seen[key]; ok { + continue + } + seen[key] = struct{}{} + out = append(out, models.FirmwareInfo{DeviceName: name, Version: ver}) + } + return out +} + +func (r redfishSnapshotReader) collectThresholdSensors(chassisPaths []string) []models.SensorReading { + out := make([]models.SensorReading, 0) + seen := make(map[string]struct{}) + for _, chassisPath := range chassisPaths { + docs, err := r.getCollectionMembers(joinPath(chassisPath, "/ThresholdSensors")) + if err != nil || len(docs) == 0 { + continue + } + for _, doc := range docs { + sensor, ok := parseThresholdSensor(doc) + if !ok { + continue + } + key := strings.ToLower(strings.TrimSpace(sensor.Name)) + if key == "" { + key = strings.ToLower(strings.TrimSpace(sensor.Type) + "|" + strings.TrimSpace(sensor.RawValue)) + } + if _, exists := seen[key]; exists { + continue + } + seen[key] = struct{}{} + out = append(out, sensor) + } + } + return out +} + +func parseThresholdSensor(doc map[string]interface{}) (models.SensorReading, bool) { + if len(doc) == 0 { + return models.SensorReading{}, false + } + name := firstNonEmpty(asString(doc["Name"]), asString(doc["Id"])) + status := mapStatus(doc["Status"]) + if status == "" { + status = firstNonEmpty(asString(doc["Health"]), asString(doc["State"])) + } + reading := 0.0 + unit := "" + rawValue := "" + switch { + case asString(doc["ReadingCelsius"]) != "": + reading = asFloat(doc["ReadingCelsius"]) + unit = "C" + rawValue = asString(doc["ReadingCelsius"]) + case asString(doc["ReadingVolts"]) != "": + reading = asFloat(doc["ReadingVolts"]) + unit = "V" + rawValue = asString(doc["ReadingVolts"]) + case asString(doc["ReadingAmps"]) != "": + reading = asFloat(doc["ReadingAmps"]) + unit = "A" + rawValue = asString(doc["ReadingAmps"]) + case asString(doc["ReadingWatts"]) != "": + reading = asFloat(doc["ReadingWatts"]) + unit = "W" + rawValue = asString(doc["ReadingWatts"]) + case asString(doc["Reading"]) != "": + reading = asFloat(doc["Reading"]) + unit = asString(doc["ReadingUnits"]) + rawValue = asString(doc["Reading"]) + } + + if name == "" && rawValue == "" && status == "" { + return models.SensorReading{}, false + } + return models.SensorReading{ + Name: firstNonEmpty(name, "threshold-sensor"), + Type: firstNonEmpty(asString(doc["ReadingType"]), asString(doc["SensorType"]), "threshold"), + Value: reading, + Unit: unit, + RawValue: rawValue, + Status: status, + }, true +} + +func (r redfishSnapshotReader) collectDiscreteSensorEvents(chassisPaths []string) []models.Event { + out := make([]models.Event, 0) + for _, chassisPath := range chassisPaths { + docs, err := r.getCollectionMembers(joinPath(chassisPath, "/DiscreteSensors")) + if err != nil || len(docs) == 0 { + continue + } + for _, doc := range docs { + name := firstNonEmpty(asString(doc["Name"]), asString(doc["Id"])) + status := mapStatus(doc["Status"]) + if status == "" { + status = firstNonEmpty(asString(doc["Health"]), asString(doc["State"])) + } + if name == "" || status == "" { + continue + } + normalized := strings.ToLower(strings.TrimSpace(status)) + if normalized == "ok" || normalized == "enabled" || normalized == "normal" || normalized == "present" { + continue + } + out = append(out, models.Event{ + Timestamp: time.Now(), + Source: "Redfish", + SensorName: name, + EventType: "Discrete Sensor Status", + Severity: models.SeverityWarning, + Description: fmt.Sprintf("%s reports %s", name, status), + RawData: firstNonEmpty(asString(doc["Description"]), status), + }) + } + } + return out +} + +func (r redfishSnapshotReader) collectHealthSummaryEvents(chassisPaths []string) []models.Event { + out := make([]models.Event, 0) + for _, chassisPath := range chassisPaths { + doc, err := r.getJSON(joinPath(chassisPath, "/HealthSummary")) + if err != nil || len(doc) == 0 { + continue + } + health := firstNonEmpty( + mapStatus(doc["Status"]), + asString(doc["Health"]), + asString(doc["HealthRollup"]), + findFirstNormalizedStringByKeys(doc, "Health", "HealthRollup", "OverallHealth"), + ) + if health == "" { + continue + } + if strings.EqualFold(health, "OK") || strings.EqualFold(health, "Normal") { + continue + } + raw, _ := json.Marshal(doc) + out = append(out, models.Event{ + Timestamp: time.Now(), + Source: "Redfish", + EventType: "Health Summary", + Severity: models.SeverityWarning, + Description: fmt.Sprintf("Chassis health summary reports %s", health), + RawData: string(raw), + }) + } + return out +} + +func (r redfishSnapshotReader) enrichNICsFromNetworkInterfaces(nics *[]models.NetworkAdapter, systemPaths []string) { + if nics == nil { + return + } + bySlot := make(map[string]int, len(*nics)) + for i, nic := range *nics { + bySlot[strings.ToLower(strings.TrimSpace(nic.Slot))] = i + } + + for _, systemPath := range systemPaths { + ifaces, err := r.getCollectionMembers(joinPath(systemPath, "/NetworkInterfaces")) + if err != nil || len(ifaces) == 0 { + continue + } + for _, iface := range ifaces { + slot := firstNonEmpty(asString(iface["Id"]), asString(iface["Name"])) + if strings.TrimSpace(slot) == "" { + continue + } + idx, ok := bySlot[strings.ToLower(strings.TrimSpace(slot))] + if !ok { + *nics = append(*nics, models.NetworkAdapter{ + Slot: slot, + Present: true, + Model: firstNonEmpty(asString(iface["Model"]), asString(iface["Name"])), + Status: mapStatus(iface["Status"]), + }) + idx = len(*nics) - 1 + bySlot[strings.ToLower(strings.TrimSpace(slot))] = idx + } + + portsPath := redfishLinkedPath(iface, "NetworkPorts") + if portsPath == "" { + continue + } + portDocs, err := r.getCollectionMembers(portsPath) + if err != nil || len(portDocs) == 0 { + continue + } + macs := append([]string{}, (*nics)[idx].MACAddresses...) + for _, p := range portDocs { + macs = append(macs, collectNetworkPortMACs(p)...) + } + (*nics)[idx].MACAddresses = dedupeStrings(macs) + if (*nics)[idx].PortCount == 0 { + (*nics)[idx].PortCount = len(portDocs) + } + } + } +} + +func collectNetworkPortMACs(doc map[string]interface{}) []string { + if len(doc) == 0 { + return nil + } + out := make([]string, 0, 4) + if list, ok := doc["AssociatedNetworkAddresses"].([]interface{}); ok { + for _, item := range list { + if s := strings.TrimSpace(asString(item)); s != "" { + out = append(out, s) + } + } + } + for _, key := range []string{"MACAddress", "PermanentMACAddress", "CurrentMACAddress"} { + if s := strings.TrimSpace(asString(doc[key])); s != "" { + out = append(out, s) + } + } + return out +} + +func dedupeStrings(items []string) []string { + seen := make(map[string]struct{}, len(items)) + out := make([]string, 0, len(items)) + for _, item := range items { + s := strings.TrimSpace(item) + if s == "" { + continue + } + key := strings.ToLower(s) + if _, ok := seen[key]; ok { + continue + } + seen[key] = struct{}{} + out = append(out, s) + } + return out +} + +func (r redfishSnapshotReader) collectBoardFallbackDocs(chassisPaths []string) []map[string]interface{} { + out := make([]map[string]interface{}, 0) + for _, chassisPath := range chassisPaths { + for _, suffix := range []string{"/Boards", "/Backplanes", "/Assembly"} { + path := joinPath(chassisPath, suffix) + if docs, err := r.getCollectionMembers(path); err == nil && len(docs) > 0 { + out = append(out, docs...) + continue + } + if doc, err := r.getJSON(path); err == nil && len(doc) > 0 { + out = append(out, doc) + } + } + } + return out +} + +func applyBoardInfoFallbackFromDocs(board *models.BoardInfo, docs []map[string]interface{}) { + if board == nil || len(docs) == 0 { + return + } + for _, doc := range docs { + candidate := parseBoardInfoFromFRUDoc(doc) + if board.Manufacturer == "" { + board.Manufacturer = candidate.Manufacturer + } + if board.ProductName == "" { + board.ProductName = candidate.ProductName + } + if board.SerialNumber == "" { + board.SerialNumber = candidate.SerialNumber + } + if board.PartNumber == "" { + board.PartNumber = candidate.PartNumber + } + if board.Manufacturer != "" && board.ProductName != "" && board.SerialNumber != "" && board.PartNumber != "" { + return + } + } +} + type redfishSnapshotReader struct { tree map[string]interface{} } diff --git a/internal/server/job_manager.go b/internal/server/job_manager.go index 42228b1..484d04f 100644 --- a/internal/server/job_manager.go +++ b/internal/server/job_manager.go @@ -2,6 +2,7 @@ package server import ( "context" + "fmt" "sync" "time" ) @@ -23,7 +24,7 @@ func (m *JobManager) CreateJob(req CollectRequest) *Job { ID: generateJobID(), Status: CollectStatusQueued, Progress: 0, - Logs: []string{"Задача поставлена в очередь"}, + Logs: []string{formatCollectLogLine(now, "Задача поставлена в очередь")}, CreatedAt: now, UpdatedAt: now, RequestMeta: CollectRequestMeta{ @@ -65,7 +66,7 @@ func (m *JobManager) CancelJob(id string) (*Job, bool) { job.Status = CollectStatusCanceled job.Error = "" job.UpdatedAt = time.Now().UTC() - job.Logs = append(job.Logs, "Сбор отменен пользователем") + job.Logs = append(job.Logs, formatCollectLogLine(job.UpdatedAt, "Сбор отменен пользователем")) } cancelFn := job.cancel @@ -120,6 +121,7 @@ func (m *JobManager) AppendJobLog(id, message string) (*Job, bool) { job.Logs = append(job.Logs, message) job.UpdatedAt = time.Now().UTC() + job.Logs[len(job.Logs)-1] = formatCollectLogLine(job.UpdatedAt, message) cloned := cloneJob(job) m.mu.Unlock() @@ -157,6 +159,17 @@ func normalizeProgress(progress int) int { return progress } +func formatCollectLogLine(ts time.Time, message string) string { + msg := message + if msg == "" { + msg = "-" + } + if ts.IsZero() { + ts = time.Now().UTC() + } + return fmt.Sprintf("%s %s", ts.Format(time.RFC3339Nano), msg) +} + func cloneJob(job *Job) *Job { if job == nil { return nil