From 612058ed16dd33a1c4b796c0408758e35d68b166 Mon Sep 17 00:00:00 2001 From: Michael Chus Date: Sat, 28 Feb 2026 17:56:04 +0300 Subject: [PATCH] redfish: optimize snapshot/plan-b crawl and add timing diagnostics --- internal/collector/redfish.go | 1320 +++++++++++++++++++++++--- internal/collector/redfish_replay.go | 32 +- internal/collector/redfish_test.go | 261 ++++- 3 files changed, 1473 insertions(+), 140 deletions(-) diff --git a/internal/collector/redfish.go b/internal/collector/redfish.go index cff0032..207a53a 100644 --- a/internal/collector/redfish.go +++ b/internal/collector/redfish.go @@ -63,28 +63,31 @@ func (c *RedfishConnector) debugSnapshotf(format string, args ...interface{}) { } func (c *RedfishConnector) Collect(ctx context.Context, req Request, emit ProgressFn) (*models.AnalysisResult, error) { + collectStart := time.Now() baseURL, err := c.baseURL(req) if err != nil { return nil, err } - client := c.httpClient(req) + snapshotClient := c.httpClientWithTimeout(req, redfishSnapshotRequestTimeout()) + prefetchClient := c.httpClientWithTimeout(req, redfishPrefetchRequestTimeout()) + criticalClient := c.httpClientWithTimeout(req, redfishCriticalRequestTimeout()) if emit != nil { emit(Progress{Status: "running", Progress: 10, Message: "Redfish: подключение к BMC..."}) } - if _, err := c.getJSON(ctx, client, req, baseURL, "/redfish/v1"); err != nil { + if _, err := c.getJSON(ctx, snapshotClient, req, baseURL, "/redfish/v1"); err != nil { return nil, fmt.Errorf("redfish service root: %w", err) } - systemPaths := c.discoverMemberPaths(ctx, client, req, baseURL, "/redfish/v1/Systems", "/redfish/v1/Systems/1") - chassisPaths := c.discoverMemberPaths(ctx, client, req, baseURL, "/redfish/v1/Chassis", "/redfish/v1/Chassis/1") - managerPaths := c.discoverMemberPaths(ctx, client, req, baseURL, "/redfish/v1/Managers", "/redfish/v1/Managers/1") + systemPaths := c.discoverMemberPaths(ctx, snapshotClient, req, baseURL, "/redfish/v1/Systems", "/redfish/v1/Systems/1") + chassisPaths := c.discoverMemberPaths(ctx, snapshotClient, req, baseURL, "/redfish/v1/Chassis", "/redfish/v1/Chassis/1") + managerPaths := c.discoverMemberPaths(ctx, snapshotClient, req, baseURL, "/redfish/v1/Managers", "/redfish/v1/Managers/1") criticalPaths := redfishCriticalEndpoints(systemPaths, chassisPaths, managerPaths) - criticalClient := c.httpClientWithTimeout(req, redfishCriticalRequestTimeout()) if emit != nil { emit(Progress{Status: "running", Progress: 30, Message: "Redfish: чтение структуры Redfish..."}) } + prefetchedCritical := c.prefetchCriticalRedfishDocs(ctx, prefetchClient, req, baseURL, criticalPaths, emit) if emit != nil { emit(Progress{Status: "running", Progress: 55, Message: "Redfish: подготовка snapshot..."}) @@ -92,9 +95,29 @@ func (c *RedfishConnector) Collect(ctx context.Context, req Request, emit Progre emit(Progress{Status: "running", Progress: 90, Message: "Redfish: сбор расширенного snapshot..."}) } c.debugSnapshotf("snapshot crawl start host=%s port=%d", req.Host, req.Port) - rawTree, fetchErrors := c.collectRawRedfishTree(ctx, client, req, baseURL, redfishSnapshotPrioritySeeds(systemPaths, chassisPaths, managerPaths), emit) + rawTree, fetchErrors := c.collectRawRedfishTree(ctx, snapshotClient, req, baseURL, redfishSnapshotPrioritySeeds(systemPaths, chassisPaths, managerPaths), emit) + if len(prefetchedCritical) > 0 { + reused := 0 + for p, doc := range prefetchedCritical { + if _, exists := rawTree[p]; exists { + continue + } + rawTree[p] = doc + reused++ + } + if emit != nil && reused > 0 { + emit(Progress{ + Status: "running", + Progress: 96, + Message: fmt.Sprintf("Redfish: prefetch использован для %d документов", reused), + }) + } + } c.debugSnapshotf("snapshot crawl done docs=%d", len(rawTree)) fetchErrMap := redfishFetchErrorListToMap(fetchErrors) + for p := range prefetchedCritical { + delete(fetchErrMap, p) + } if recoveredN := c.recoverCriticalRedfishDocsPlanB(ctx, criticalClient, req, baseURL, criticalPaths, rawTree, fetchErrMap, emit); recoveredN > 0 { c.debugSnapshotf("critical plan-b recovered docs=%d", recoveredN) } @@ -114,7 +137,124 @@ func (c *RedfishConnector) Collect(ctx context.Context, req Request, emit Progre rawPayloads["redfish_fetch_errors"] = redfishFetchErrorMapToList(fetchErrMap) } // Unified tunnel: live collection and raw import go through the same analyzer over redfish_tree. - return ReplayRedfishFromRawPayloads(rawPayloads, nil) + result, err := ReplayRedfishFromRawPayloads(rawPayloads, nil) + if err != nil { + return nil, err + } + totalElapsed := time.Since(collectStart).Round(time.Second) + log.Printf("redfish-collect: completed in %s (docs=%d, fetch_errors=%d)", totalElapsed, len(rawTree), len(fetchErrMap)) + if emit != nil { + emit(Progress{ + Status: "running", + Progress: 100, + Message: fmt.Sprintf("Redfish: сбор завершен за %s", totalElapsed), + }) + } + return result, nil +} + +func (c *RedfishConnector) prefetchCriticalRedfishDocs( + ctx context.Context, + client *http.Client, + req Request, + baseURL string, + criticalPaths []string, + emit ProgressFn, +) map[string]interface{} { + if !redfishPrefetchEnabled() || len(criticalPaths) == 0 { + return nil + } + + targets := redfishPrefetchTargets(criticalPaths) + if len(targets) == 0 { + return nil + } + if emit != nil { + emit(Progress{ + Status: "running", + Progress: 35, + Message: fmt.Sprintf("Redfish: prefetch критичных endpoint (%d)...", len(targets)), + }) + } + + out := make(map[string]interface{}, len(targets)) + seen := make(map[string]struct{}, len(targets)) + var mu sync.Mutex + + addDoc := func(path string, doc map[string]interface{}) { + path = normalizeRedfishPath(path) + if path == "" || len(doc) == 0 { + return + } + mu.Lock() + if _, exists := seen[path]; !exists { + seen[path] = struct{}{} + out[path] = doc + } + mu.Unlock() + } + + workerN := redfishPrefetchWorkers() + jobs := make(chan string, len(targets)) + var wg sync.WaitGroup + for i := 0; i < workerN; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for p := range jobs { + doc, err := c.getJSONWithRetry(ctx, client, req, baseURL, p, redfishPrefetchRetryAttempts(), redfishPrefetchRetryBackoff()) + if err != nil { + continue + } + addDoc(p, doc) + memberPaths := redfishCollectionMemberRefs(doc) + if len(memberPaths) == 0 { + continue + } + if maxMembers := redfishPrefetchMemberRecoveryMax(); maxMembers > 0 && len(memberPaths) > maxMembers { + memberPaths = memberPaths[:maxMembers] + } + for _, memberPath := range memberPaths { + memberPath = normalizeRedfishPath(memberPath) + if memberPath == "" { + continue + } + mu.Lock() + _, exists := seen[memberPath] + mu.Unlock() + if exists { + continue + } + memberDoc, err := c.getJSONWithRetry(ctx, client, req, baseURL, memberPath, redfishPrefetchMemberRetryAttempts(), redfishPrefetchRetryBackoff()) + if err != nil { + continue + } + addDoc(memberPath, memberDoc) + } + } + }() + } + + for _, p := range targets { + select { + case jobs <- p: + case <-ctx.Done(): + close(jobs) + wg.Wait() + return out + } + } + close(jobs) + wg.Wait() + + if emit != nil { + emit(Progress{ + Status: "running", + Progress: 40, + Message: fmt.Sprintf("Redfish: prefetch завершен (targets=%d, docs=%d)", len(targets), len(out)), + }) + } + return out } func (c *RedfishConnector) httpClient(req Request) *http.Client { @@ -311,7 +451,6 @@ func (c *RedfishConnector) collectStorageVolumes(ctx context.Context, client *ht func (c *RedfishConnector) collectNICs(ctx context.Context, client *http.Client, req Request, baseURL string, chassisPaths []string) []models.NetworkAdapter { var nics []models.NetworkAdapter - seen := make(map[string]struct{}) for _, chassisPath := range chassisPaths { adapterDocs, err := c.getCollectionMembers(ctx, client, req, baseURL, joinPath(chassisPath, "/NetworkAdapters")) if err != nil { @@ -327,23 +466,15 @@ func (c *RedfishConnector) collectNICs(ctx context.Context, client *http.Client, functionDocs := c.getLinkedPCIeFunctions(ctx, client, req, baseURL, pcieDoc) enrichNICFromPCIe(&nic, pcieDoc, functionDocs) } - key := firstNonEmpty(nic.SerialNumber, nic.Slot+"|"+nic.Model) - if key == "" { - continue - } - if _, ok := seen[key]; ok { - continue - } - seen[key] = struct{}{} nics = append(nics, nic) } } - return nics + return dedupeNetworkAdapters(nics) } func (c *RedfishConnector) collectPSUs(ctx context.Context, client *http.Client, req Request, baseURL string, chassisPaths []string) []models.PSU { var out []models.PSU - seen := make(map[string]struct{}) + seen := make(map[string]int) idx := 1 for _, chassisPath := range chassisPaths { // Redfish 2022+/X14+ commonly uses PowerSubsystem as the primary source. @@ -370,18 +501,27 @@ func (c *RedfishConnector) collectPSUs(ctx context.Context, client *http.Client, return out } -func appendPSU(out *[]models.PSU, seen map[string]struct{}, psu models.PSU, currentIdx int) int { +func appendPSU(out *[]models.PSU, seen map[string]int, psu models.PSU, currentIdx int) int { nextIdx := currentIdx + 1 - key := firstNonEmpty(psu.SerialNumber, psu.Slot+"|"+psu.Model) - if key == "" { + keys := psuIdentityKeys(psu) + if len(keys) == 0 { return nextIdx } - if _, ok := seen[key]; ok { - return nextIdx + for _, key := range keys { + if idx, ok := seen[key]; ok { + (*out)[idx] = mergePSUEntries((*out)[idx], psu) + for _, mergedKey := range psuIdentityKeys((*out)[idx]) { + seen[mergedKey] = idx + } + return nextIdx + } + } + idx := len(*out) + for _, key := range keys { + seen[key] = idx } - seen[key] = struct{}{} *out = append(*out, psu) - return len(*out) + 1 + return nextIdx } func (c *RedfishConnector) collectKnownStorageMembers(ctx context.Context, client *http.Client, req Request, baseURL, systemPath string, relativeCollections []string) []map[string]interface{} { @@ -461,7 +601,6 @@ func (c *RedfishConnector) collectPCIeDevices(ctx context.Context, client *http. } var out []models.PCIeDevice - seen := make(map[string]struct{}) for _, collectionPath := range collections { memberDocs, err := c.getCollectionMembers(ctx, client, req, baseURL, collectionPath) if err != nil || len(memberDocs) == 0 { @@ -471,14 +610,6 @@ func (c *RedfishConnector) collectPCIeDevices(ctx context.Context, client *http. for _, doc := range memberDocs { functionDocs := c.getLinkedPCIeFunctions(ctx, client, req, baseURL, doc) dev := parsePCIeDevice(doc, functionDocs) - key := pcieDeviceDedupKey(dev) - if key == "" { - continue - } - if _, ok := seen[key]; ok { - continue - } - seen[key] = struct{}{} out = append(out, dev) } } @@ -491,19 +622,11 @@ func (c *RedfishConnector) collectPCIeDevices(ctx context.Context, client *http. } for idx, fn := range functionDocs { dev := parsePCIeFunction(fn, idx+1) - key := pcieDeviceDedupKey(dev) - if key == "" { - continue - } - if _, ok := seen[key]; ok { - continue - } - seen[key] = struct{}{} out = append(out, dev) } } - return out + return dedupePCIeDevices(out) } func (c *RedfishConnector) discoverMemberPaths(ctx context.Context, client *http.Client, req Request, baseURL, collectionPath, fallbackPath string) []string { @@ -535,13 +658,14 @@ func (c *RedfishConnector) discoverMemberPaths(ctx context.Context, client *http func (c *RedfishConnector) collectRawRedfishTree(ctx context.Context, client *http.Client, req Request, baseURL string, seedPaths []string, emit ProgressFn) (map[string]interface{}, []map[string]interface{}) { maxDocuments := redfishSnapshotMaxDocuments() - const workers = 6 + workers := redfishSnapshotWorkers() const heartbeatInterval = 5 * time.Second crawlStart := time.Now() memoryClient := c.httpClientWithTimeout(req, redfishSnapshotMemoryRequestTimeout()) memoryGate := make(chan struct{}, redfishSnapshotMemoryConcurrency()) branchLimiter := newRedfishSnapshotBranchLimiter(redfishSnapshotBranchConcurrency()) branchRetryPause := redfishSnapshotBranchRequeueBackoff() + timings := newRedfishPathTimingCollector(4) out := make(map[string]interface{}, maxDocuments) fetchErrors := make(map[string]string) @@ -655,6 +779,7 @@ func (c *RedfishConnector) collectRawRedfishTree(ctx context.Context, client *ht lastPath.Store(current) c.debugSnapshotf("worker=%d fetch start path=%s queue_len=%d", workerID, current, len(jobs)) + fetchStart := time.Now() doc, err := func() (map[string]interface{}, error) { defer branchLimiter.release(current) if !isRedfishMemoryMemberPath(current) { @@ -676,6 +801,7 @@ func (c *RedfishConnector) collectRawRedfishTree(ctx context.Context, client *ht redfishSnapshotMemoryRetryBackoff(), ) }() + timings.Observe(current, time.Since(fetchStart), err != nil) if err == nil { mu.Lock() out[current] = doc @@ -824,6 +950,18 @@ func (c *RedfishConnector) collectRawRedfishTree(ctx context.Context, client *ht sort.Slice(errorList, func(i, j int) bool { return asString(errorList[i]["path"]) < asString(errorList[j]["path"]) }) + if summary := timings.Summary(12); summary != "" { + log.Printf("redfish-snapshot-timing: %s", summary) + } + if emit != nil { + if summary := timings.Summary(3); summary != "" { + emit(Progress{ + Status: "running", + Progress: 98, + Message: fmt.Sprintf("Redfish snapshot: топ веток по времени: %s", summary), + }) + } + } return out, errorList } @@ -979,6 +1117,7 @@ func directNumericProbePlan(collectionPath string) (maxItems, startIndex, missBu func shouldPostProbeCollectionPath(path string) bool { path = normalizeRedfishPath(path) + sensorProbeEnabled := redfishSnapshotSensorPostProbeEnabled() // Restrict expensive post-probe to collections that historically recover // missing inventory/telemetry on partially implemented BMCs. switch { @@ -987,8 +1126,9 @@ func shouldPostProbeCollectionPath(path string) bool { strings.HasSuffix(path, "/DiscreteSensors"), strings.HasSuffix(path, "/Temperatures"), strings.HasSuffix(path, "/Fans"), - strings.HasSuffix(path, "/Voltages"), - strings.HasSuffix(path, "/PowerSupplies"), + strings.HasSuffix(path, "/Voltages"): + return sensorProbeEnabled + case strings.HasSuffix(path, "/PowerSupplies"), strings.HasSuffix(path, "/EthernetInterfaces"), strings.HasSuffix(path, "/NetworkPorts"), strings.HasSuffix(path, "/Ports"), @@ -1069,13 +1209,12 @@ func redfishCriticalEndpoints(systemPaths, chassisPaths, managerPaths []string) add(joinPath(p, "/SecureBoot")) add(joinPath(p, "/Oem/Public")) add(joinPath(p, "/Oem/Public/FRU")) - add(joinPath(p, "/Oem/Public/ThermalConfig")) - add(joinPath(p, "/ThermalConfig")) add(joinPath(p, "/Processors")) add(joinPath(p, "/Memory")) add(joinPath(p, "/Storage")) add(joinPath(p, "/SimpleStorage")) add(joinPath(p, "/PCIeDevices")) + add(joinPath(p, "/PCIeFunctions")) add(joinPath(p, "/Accelerators")) add(joinPath(p, "/GraphicsControllers")) add(joinPath(p, "/EthernetInterfaces")) @@ -1085,17 +1224,7 @@ func redfishCriticalEndpoints(systemPaths, chassisPaths, managerPaths []string) add(p) add(joinPath(p, "/Oem/Public")) add(joinPath(p, "/Oem/Public/FRU")) - add(joinPath(p, "/Oem/Public/ThermalConfig")) - add(joinPath(p, "/ThermalConfig")) add(joinPath(p, "/Power")) - add(joinPath(p, "/Thermal")) - add(joinPath(p, "/Sensors")) - add(joinPath(p, "/HealthSummary")) - add(joinPath(p, "/ThresholdSensors")) - add(joinPath(p, "/DiscreteSensors")) - add(joinPath(p, "/Boards")) - add(joinPath(p, "/Backplanes")) - add(joinPath(p, "/Assembly")) add(joinPath(p, "/NetworkAdapters")) add(joinPath(p, "/PCIeDevices")) add(joinPath(p, "/Accelerators")) @@ -1150,6 +1279,179 @@ func isRetryableRedfishFetchError(err error) bool { return false } +func redfishSnapshotRequestTimeout() time.Duration { + if v := strings.TrimSpace(os.Getenv("LOGPILE_REDFISH_SNAPSHOT_TIMEOUT")); v != "" { + if d, err := time.ParseDuration(v); err == nil && d > 0 { + return d + } + } + return 12 * time.Second +} + +func redfishSnapshotWorkers() int { + if v := strings.TrimSpace(os.Getenv("LOGPILE_REDFISH_SNAPSHOT_WORKERS")); v != "" { + if n, err := strconv.Atoi(v); err == nil && n >= 1 && n <= 16 { + return n + } + } + return 4 +} + +func redfishPrefetchEnabled() bool { + if v := strings.TrimSpace(os.Getenv("LOGPILE_REDFISH_PREFETCH_ENABLED")); v != "" { + switch strings.ToLower(v) { + case "0", "false", "off", "no": + return false + default: + return true + } + } + return true +} + +func redfishPrefetchRequestTimeout() time.Duration { + if v := strings.TrimSpace(os.Getenv("LOGPILE_REDFISH_PREFETCH_TIMEOUT")); v != "" { + if d, err := time.ParseDuration(v); err == nil && d > 0 { + return d + } + } + return 20 * time.Second +} + +func redfishPrefetchRetryAttempts() int { + if v := strings.TrimSpace(os.Getenv("LOGPILE_REDFISH_PREFETCH_RETRIES")); v != "" { + if n, err := strconv.Atoi(v); err == nil && n >= 1 && n <= 8 { + return n + } + } + return 2 +} + +func redfishPrefetchMemberRetryAttempts() int { + if v := strings.TrimSpace(os.Getenv("LOGPILE_REDFISH_PREFETCH_MEMBER_RETRIES")); v != "" { + if n, err := strconv.Atoi(v); err == nil && n >= 1 && n <= 6 { + return n + } + } + return 1 +} + +func redfishPrefetchMemberRecoveryMax() int { + if v := strings.TrimSpace(os.Getenv("LOGPILE_REDFISH_PREFETCH_MEMBER_RECOVERY_MAX")); v != "" { + if n, err := strconv.Atoi(v); err == nil && n >= 1 && n <= 512 { + return n + } + } + return 48 +} + +func redfishPrefetchRetryBackoff() time.Duration { + if v := strings.TrimSpace(os.Getenv("LOGPILE_REDFISH_PREFETCH_BACKOFF")); v != "" { + if d, err := time.ParseDuration(v); err == nil && d >= 0 { + return d + } + } + return 900 * time.Millisecond +} + +func redfishPrefetchWorkers() int { + if v := strings.TrimSpace(os.Getenv("LOGPILE_REDFISH_PREFETCH_WORKERS")); v != "" { + if n, err := strconv.Atoi(v); err == nil && n >= 1 && n <= 8 { + return n + } + } + return 2 +} + +func redfishSnapshotSensorPostProbeEnabled() bool { + if v := strings.TrimSpace(os.Getenv("LOGPILE_REDFISH_SENSOR_POSTPROBE")); v != "" { + switch strings.ToLower(v) { + case "1", "true", "on", "yes": + return true + default: + return false + } + } + return false +} + +func redfishPrefetchTargets(criticalPaths []string) []string { + if len(criticalPaths) == 0 { + return nil + } + out := make([]string, 0, len(criticalPaths)) + seen := make(map[string]struct{}, len(criticalPaths)) + for _, p := range criticalPaths { + p = normalizeRedfishPath(p) + if p == "" || !shouldPrefetchCriticalPath(p) { + continue + } + if _, ok := seen[p]; ok { + continue + } + seen[p] = struct{}{} + out = append(out, p) + } + return out +} + +func shouldPrefetchCriticalPath(p string) bool { + p = normalizeRedfishPath(p) + if p == "" { + return false + } + for _, noisy := range []string{ + "/Fabrics", + "/Backplanes", + "/Boards", + "/Assembly", + "/Sensors", + "/ThresholdSensors", + "/DiscreteSensors", + "/ThermalConfig", + "/ThermalSubsystem", + "/EnvironmentMetrics", + "/Certificates", + "/LogServices", + } { + if strings.Contains(p, noisy) { + return false + } + } + for _, suffix := range []string{ + "/Bios", + "/SecureBoot", + "/Processors", + "/Memory", + "/Storage", + "/SimpleStorage", + "/PCIeDevices", + "/PCIeFunctions", + "/Accelerators", + "/GraphicsControllers", + "/EthernetInterfaces", + "/NetworkInterfaces", + "/NetworkAdapters", + "/Drives", + "/Power", + "/PowerSubsystem/PowerSupplies", + "/NetworkProtocol", + "/Oem/Public", + "/Oem/Public/FRU", + } { + if strings.HasSuffix(p, suffix) { + return true + } + } + switch p { + case "/redfish/v1/UpdateService", "/redfish/v1/UpdateService/FirmwareInventory": + return true + } + parts := strings.Split(strings.Trim(p, "/"), "/") + return len(parts) == 4 && parts[0] == "redfish" && parts[1] == "v1" && + (parts[2] == "Systems" || parts[2] == "Chassis" || parts[2] == "Managers") +} + func redfishCriticalRequestTimeout() time.Duration { if v := strings.TrimSpace(os.Getenv("LOGPILE_REDFISH_CRITICAL_TIMEOUT")); v != "" { if d, err := time.ParseDuration(v); err == nil && d > 0 { @@ -1679,6 +1981,7 @@ func (c *RedfishConnector) collectCriticalCollectionMembersSequential( func (c *RedfishConnector) recoverCriticalRedfishDocsPlanB(ctx context.Context, client *http.Client, req Request, baseURL string, criticalPaths []string, rawTree map[string]interface{}, fetchErrs map[string]string, emit ProgressFn) int { planBStart := time.Now() + timings := newRedfishPathTimingCollector(4) var targets []string seenTargets := make(map[string]struct{}) addTarget := func(path string) { @@ -1769,7 +2072,9 @@ func (c *RedfishConnector) recoverCriticalRedfishDocsPlanB(ctx context.Context, return recovered } } + reqStart := time.Now() doc, err := c.getJSONWithRetry(ctx, client, req, baseURL, p, redfishCriticalPlanBAttempts(), redfishCriticalRetryBackoff()) + timings.Observe(p, time.Since(reqStart), err != nil) if err == nil { rawTree[p] = doc delete(fetchErrs, p) @@ -1817,12 +2122,22 @@ func (c *RedfishConnector) recoverCriticalRedfishDocsPlanB(ctx context.Context, } } if emit != nil { + if summary := timings.Summary(3); summary != "" { + emit(Progress{ + Status: "running", + Progress: 97, + Message: fmt.Sprintf("Redfish: plan-B топ веток по времени: %s", summary), + }) + } emit(Progress{ Status: "running", Progress: 97, Message: fmt.Sprintf("Redfish: plan-B завершен за %s (targets=%d, recovered=%d)", time.Since(planBStart).Round(time.Second), len(targets), recovered), }) } + if summary := timings.Summary(12); summary != "" { + log.Printf("redfish-planb-timing: %s", summary) + } return recovered } @@ -2469,42 +2784,64 @@ func dropModelOnlyGPUPlaceholders(items []models.GPU) []models.GPU { // Merge serial from generic GraphicsControllers placeholders (slot ~= model) // into concrete PCIe rows (with BDF) when mapping is unambiguous. mergedPlaceholder := make(map[int]struct{}) + usedConcrete := make(map[int]struct{}) + unresolvedByGroup := make(map[string][]int) for i := range items { serial := normalizeRedfishIdentityField(items[i].SerialNumber) if serial == "" || strings.TrimSpace(items[i].BDF) != "" || !isModelOnlyGPUPlaceholder(items[i]) { continue } + candidates := matchingConcreteGPUIndexes(items, i, usedConcrete) candidate := -1 - model := strings.TrimSpace(items[i].Model) - mfr := strings.TrimSpace(items[i].Manufacturer) + if len(candidates) == 1 { + candidate = candidates[0] + } + + if candidate >= 0 { + mergeGPUPlaceholderIntoConcrete(&items[candidate], items[i]) + usedConcrete[candidate] = struct{}{} + mergedPlaceholder[i] = struct{}{} + continue + } + group := gpuModelVendorKey(items[i]) + if group == "" { + continue + } + unresolvedByGroup[group] = append(unresolvedByGroup[group], i) + } + + // Fallback mapping by order for ambiguous groups (e.g. same model x8). + for group, placeholders := range unresolvedByGroup { + donors := make([]int, 0, len(placeholders)) for j := range items { - if i == j { + if _, used := usedConcrete[j]; used { continue } - if !strings.EqualFold(strings.TrimSpace(items[j].Model), model) { + if !isConcreteGPUDonor(items[j]) { continue } - otherMfr := strings.TrimSpace(items[j].Manufacturer) - if mfr != "" && otherMfr != "" && !strings.EqualFold(mfr, otherMfr) { - continue - } - if strings.TrimSpace(items[j].BDF) == "" || isModelOnlyGPUPlaceholder(items[j]) { + if gpuModelVendorKey(items[j]) != group { continue } if normalizeRedfishIdentityField(items[j].SerialNumber) != "" { continue } - if candidate != -1 { - candidate = -2 - break - } - candidate = j + donors = append(donors, j) } - - if candidate >= 0 { - items[candidate].SerialNumber = serial - mergedPlaceholder[i] = struct{}{} + limit := len(placeholders) + if len(donors) < limit { + limit = len(donors) + } + for k := 0; k < limit; k++ { + pi := placeholders[k] + di := donors[k] + if normalizeRedfishIdentityField(items[pi].SerialNumber) == "" { + continue + } + mergeGPUPlaceholderIntoConcrete(&items[di], items[pi]) + usedConcrete[di] = struct{}{} + mergedPlaceholder[pi] = struct{}{} } } @@ -2545,6 +2882,74 @@ func isModelOnlyGPUPlaceholder(gpu models.GPU) bool { return strings.EqualFold(slot, model) || strings.HasPrefix(strings.ToUpper(slot), "GPU") } +func isConcreteGPUDonor(gpu models.GPU) bool { + if strings.TrimSpace(gpu.BDF) == "" { + return false + } + return !isModelOnlyGPUPlaceholder(gpu) +} + +func gpuModelVendorKey(gpu models.GPU) string { + model := strings.ToLower(strings.TrimSpace(gpu.Model)) + if model == "" { + return "" + } + mfr := strings.ToLower(strings.TrimSpace(gpu.Manufacturer)) + return model + "|" + mfr +} + +func matchingConcreteGPUIndexes(items []models.GPU, placeholderIdx int, usedConcrete map[int]struct{}) []int { + out := make([]int, 0, 2) + ph := items[placeholderIdx] + for j := range items { + if j == placeholderIdx { + continue + } + if _, used := usedConcrete[j]; used { + continue + } + if !isConcreteGPUDonor(items[j]) { + continue + } + if !strings.EqualFold(strings.TrimSpace(items[j].Model), strings.TrimSpace(ph.Model)) { + continue + } + otherMfr := strings.TrimSpace(items[j].Manufacturer) + phMfr := strings.TrimSpace(ph.Manufacturer) + if phMfr != "" && otherMfr != "" && !strings.EqualFold(phMfr, otherMfr) { + continue + } + if normalizeRedfishIdentityField(items[j].SerialNumber) != "" { + continue + } + out = append(out, j) + } + return out +} + +func mergeGPUPlaceholderIntoConcrete(concrete *models.GPU, placeholder models.GPU) { + if concrete == nil { + return + } + if normalizeRedfishIdentityField(concrete.SerialNumber) == "" { + if serial := normalizeRedfishIdentityField(placeholder.SerialNumber); serial != "" { + concrete.SerialNumber = serial + } + } + if strings.TrimSpace(concrete.UUID) == "" && strings.TrimSpace(placeholder.UUID) != "" { + concrete.UUID = placeholder.UUID + } + if strings.TrimSpace(concrete.PartNumber) == "" && strings.TrimSpace(placeholder.PartNumber) != "" { + concrete.PartNumber = placeholder.PartNumber + } + if strings.TrimSpace(concrete.Firmware) == "" && strings.TrimSpace(placeholder.Firmware) != "" { + concrete.Firmware = placeholder.Firmware + } + if strings.TrimSpace(concrete.Status) == "" && strings.TrimSpace(placeholder.Status) != "" { + concrete.Status = placeholder.Status + } +} + func hasMergedPlaceholderIndex(indexes map[int]struct{}, idx int) bool { _, ok := indexes[idx] return ok @@ -2650,25 +3055,651 @@ func dedupeStorage(items []models.Storage) []models.Storage { if len(items) <= 1 { return items } + // Pass 1: drop exact duplicates by identity and keep the richer variant. + out := dedupeStorageByIdentityPreferRich(items) + if len(out) <= 1 { + return out + } + // Pass 2: replace placeholder slots with rich drive data (slot is preserved). + merged, consumedDonors := mergeStoragePlaceholders(out) + if len(consumedDonors) > 0 { + compacted := make([]models.Storage, 0, len(merged)-len(consumedDonors)) + for i, item := range merged { + if _, consumed := consumedDonors[i]; consumed { + continue + } + compacted = append(compacted, item) + } + out = compacted + } else { + out = merged + } + // Pass 3: final identity dedupe after placeholder merge. + return dedupeStorageByIdentityPreferRich(out) +} + +func dedupeStorageByIdentityPreferRich(items []models.Storage) []models.Storage { + if len(items) == 0 { + return nil + } out := make([]models.Storage, 0, len(items)) - seen := make(map[string]struct{}, len(items)) + seen := make(map[string]int, len(items)) for _, item := range items { - key := firstNonEmpty( - normalizeRedfishIdentityField(item.SerialNumber), - strings.TrimSpace(item.Slot)+"|"+strings.TrimSpace(item.Model), - ) + key := storageIdentityKey(item) if key == "" { continue } - if _, ok := seen[key]; ok { + if idx, ok := seen[key]; ok { + out[idx] = richerStorageEntry(out[idx], item) continue } - seen[key] = struct{}{} + seen[key] = len(out) out = append(out, item) } return out } +func storageIdentityKey(item models.Storage) string { + if serial := normalizeRedfishIdentityField(item.SerialNumber); serial != "" { + return "sn:" + serial + } + slot := strings.TrimSpace(item.Slot) + model := strings.TrimSpace(item.Model) + if slot == "" && model == "" { + return "" + } + return "slotmodel:" + slot + "|" + model +} + +func richerStorageEntry(a, b models.Storage) models.Storage { + if storageRichnessScore(b) > storageRichnessScore(a) { + return b + } + return a +} + +func storageRichnessScore(item models.Storage) int { + score := 0 + if normalizeRedfishIdentityField(item.SerialNumber) != "" { + score += 100 + } + if item.SizeGB > 0 { + score += 40 + } + if normalizedStorageModel(item) != "" { + score += 20 + } + if normalizeRedfishIdentityField(item.Manufacturer) != "" { + score += 10 + } + if normalizeRedfishIdentityField(item.Firmware) != "" { + score += 8 + } + if strings.TrimSpace(item.Interface) != "" { + score += 5 + } + if strings.TrimSpace(item.Description) != "" { + score += 3 + } + if item.Present { + score++ + } + return score +} + +func normalizedStorageModel(item models.Storage) string { + model := normalizeRedfishIdentityField(item.Model) + if model == "" { + return "" + } + slot := strings.TrimSpace(item.Slot) + if slot != "" && strings.EqualFold(model, slot) { + return "" + } + return model +} + +func isStoragePlaceholder(item models.Storage) bool { + if normalizeRedfishIdentityField(item.SerialNumber) != "" { + return false + } + if item.SizeGB > 0 { + return false + } + if normalizedStorageModel(item) != "" { + return false + } + if normalizeRedfishIdentityField(item.Manufacturer) != "" { + return false + } + if normalizeRedfishIdentityField(item.Firmware) != "" { + return false + } + if strings.TrimSpace(item.Description) != "" { + return false + } + return true +} + +func isRichStorageDonor(item models.Storage) bool { + if isStoragePlaceholder(item) { + return false + } + return normalizeRedfishIdentityField(item.SerialNumber) != "" || + item.SizeGB > 0 || + normalizedStorageModel(item) != "" || + normalizeRedfishIdentityField(item.Manufacturer) != "" || + normalizeRedfishIdentityField(item.Firmware) != "" +} + +func mergeStoragePlaceholders(items []models.Storage) ([]models.Storage, map[int]struct{}) { + if len(items) <= 1 { + return items, nil + } + out := make([]models.Storage, len(items)) + copy(out, items) + + placeholderIdx := make([]int, 0, len(out)) + donorIdx := make([]int, 0, len(out)) + for i, item := range out { + if isStoragePlaceholder(item) { + placeholderIdx = append(placeholderIdx, i) + continue + } + if isRichStorageDonor(item) { + donorIdx = append(donorIdx, i) + } + } + if len(placeholderIdx) == 0 || len(donorIdx) == 0 { + return out, nil + } + + consumed := make(map[int]struct{}, len(donorIdx)) + for _, pi := range placeholderIdx { + di := findStorageDonorIndex(out, donorIdx, consumed, out[pi].Type) + if di < 0 { + continue + } + out[pi] = mergeStorageIntoPlaceholder(out[pi], out[di]) + consumed[di] = struct{}{} + } + if len(consumed) == 0 { + return out, nil + } + return out, consumed +} + +func findStorageDonorIndex(items []models.Storage, donors []int, consumed map[int]struct{}, placeholderType string) int { + placeholderType = strings.TrimSpace(strings.ToUpper(placeholderType)) + if placeholderType != "" { + for _, idx := range donors { + if _, used := consumed[idx]; used { + continue + } + if strings.TrimSpace(strings.ToUpper(items[idx].Type)) == placeholderType { + return idx + } + } + } + for _, idx := range donors { + if _, used := consumed[idx]; !used { + return idx + } + } + return -1 +} + +func mergeStorageIntoPlaceholder(placeholder, donor models.Storage) models.Storage { + out := placeholder + if strings.TrimSpace(out.Type) == "" { + out.Type = donor.Type + } + if normalizedStorageModel(out) == "" && normalizedStorageModel(donor) != "" { + out.Model = donor.Model + } + if out.SizeGB <= 0 && donor.SizeGB > 0 { + out.SizeGB = donor.SizeGB + } + if normalizeRedfishIdentityField(out.SerialNumber) == "" && normalizeRedfishIdentityField(donor.SerialNumber) != "" { + out.SerialNumber = donor.SerialNumber + } + if normalizeRedfishIdentityField(out.Manufacturer) == "" && normalizeRedfishIdentityField(donor.Manufacturer) != "" { + out.Manufacturer = donor.Manufacturer + } + if normalizeRedfishIdentityField(out.Firmware) == "" && normalizeRedfishIdentityField(donor.Firmware) != "" { + out.Firmware = donor.Firmware + } + if strings.TrimSpace(out.Interface) == "" && strings.TrimSpace(donor.Interface) != "" { + out.Interface = donor.Interface + } + if strings.TrimSpace(out.Location) == "" && strings.TrimSpace(donor.Location) != "" { + out.Location = donor.Location + } + if out.BackplaneID == 0 && donor.BackplaneID != 0 { + out.BackplaneID = donor.BackplaneID + } + if strings.TrimSpace(out.Status) == "" && strings.TrimSpace(donor.Status) != "" { + out.Status = donor.Status + } + if strings.TrimSpace(out.Description) == "" && strings.TrimSpace(donor.Description) != "" { + out.Description = donor.Description + } + if !out.Present { + out.Present = donor.Present + } + return out +} + +func dedupeNetworkAdapters(items []models.NetworkAdapter) []models.NetworkAdapter { + if len(items) <= 1 { + return items + } + out := make([]models.NetworkAdapter, 0, len(items)) + bySerial := make(map[string]int, len(items)) + bySlotModel := make(map[string]int, len(items)) + bySlot := make(map[string]int, len(items)) + + for _, item := range items { + serialKey := normalizeRedfishIdentityField(item.SerialNumber) + slotModelKey := networkAdapterSlotModelKey(item) + slotKey := strings.TrimSpace(item.Slot) + idx := -1 + if serialKey != "" { + if existing, ok := bySerial[serialKey]; ok { + idx = existing + } + } + if idx < 0 && slotModelKey != "" { + if existing, ok := bySlotModel[slotModelKey]; ok { + idx = existing + } + } + if idx < 0 && slotKey != "" { + if existing, ok := bySlot[slotKey]; ok { + idx = existing + } + } + + if idx >= 0 { + out[idx] = mergeNetworkAdapterEntries(out[idx], item) + } else { + idx = len(out) + out = append(out, item) + } + + merged := out[idx] + if serial := normalizeRedfishIdentityField(merged.SerialNumber); serial != "" { + bySerial[serial] = idx + } + if slotModel := networkAdapterSlotModelKey(merged); slotModel != "" { + bySlotModel[slotModel] = idx + } + if slot := strings.TrimSpace(merged.Slot); slot != "" { + bySlot[slot] = idx + } + } + return out +} + +func networkAdapterSlotModelKey(nic models.NetworkAdapter) string { + slot := strings.TrimSpace(nic.Slot) + model := normalizeNetworkAdapterModel(nic) + if slot == "" && model == "" { + return "" + } + return slot + "|" + model +} + +func normalizeNetworkAdapterModel(nic models.NetworkAdapter) string { + model := normalizeRedfishIdentityField(nic.Model) + if model == "" { + return "" + } + slot := strings.TrimSpace(nic.Slot) + if slot != "" && strings.EqualFold(slot, model) { + return "" + } + return model +} + +func networkAdapterRichnessScore(nic models.NetworkAdapter) int { + score := 0 + if normalizeRedfishIdentityField(nic.SerialNumber) != "" { + score += 80 + } + if normalizeNetworkAdapterModel(nic) != "" { + score += 20 + } + if normalizeRedfishIdentityField(nic.Vendor) != "" { + score += 10 + } + if normalizeRedfishIdentityField(nic.Firmware) != "" { + score += 8 + } + if normalizeRedfishIdentityField(nic.PartNumber) != "" { + score += 6 + } + if nic.VendorID > 0 { + score += 5 + } + if nic.DeviceID > 0 { + score += 5 + } + if nic.PortCount > 0 { + score += 4 + } + if len(nic.MACAddresses) > 0 { + score += 4 + } + if strings.TrimSpace(nic.Location) != "" { + score += 2 + } + if nic.Present { + score++ + } + return score +} + +func mergeNetworkAdapterEntries(a, b models.NetworkAdapter) models.NetworkAdapter { + base, donor := a, b + if networkAdapterRichnessScore(donor) > networkAdapterRichnessScore(base) { + base, donor = donor, base + } + out := base + if strings.TrimSpace(out.Slot) == "" && strings.TrimSpace(donor.Slot) != "" { + out.Slot = donor.Slot + } + if strings.TrimSpace(out.Location) == "" && strings.TrimSpace(donor.Location) != "" { + out.Location = donor.Location + } + if normalizeNetworkAdapterModel(out) == "" && normalizeNetworkAdapterModel(donor) != "" { + out.Model = donor.Model + } + if strings.TrimSpace(out.Description) == "" && strings.TrimSpace(donor.Description) != "" { + out.Description = donor.Description + } + if normalizeRedfishIdentityField(out.Vendor) == "" && normalizeRedfishIdentityField(donor.Vendor) != "" { + out.Vendor = donor.Vendor + } + if out.VendorID == 0 && donor.VendorID != 0 { + out.VendorID = donor.VendorID + } + if out.DeviceID == 0 && donor.DeviceID != 0 { + out.DeviceID = donor.DeviceID + } + if normalizeRedfishIdentityField(out.SerialNumber) == "" && normalizeRedfishIdentityField(donor.SerialNumber) != "" { + out.SerialNumber = donor.SerialNumber + } + if normalizeRedfishIdentityField(out.PartNumber) == "" && normalizeRedfishIdentityField(donor.PartNumber) != "" { + out.PartNumber = donor.PartNumber + } + if normalizeRedfishIdentityField(out.Firmware) == "" && normalizeRedfishIdentityField(donor.Firmware) != "" { + out.Firmware = donor.Firmware + } + if out.PortCount == 0 && donor.PortCount > 0 { + out.PortCount = donor.PortCount + } + if strings.TrimSpace(out.PortType) == "" && strings.TrimSpace(donor.PortType) != "" { + out.PortType = donor.PortType + } + if strings.TrimSpace(out.Status) == "" && strings.TrimSpace(donor.Status) != "" { + out.Status = donor.Status + } + out.Present = out.Present || donor.Present + if len(donor.MACAddresses) > 0 { + out.MACAddresses = dedupeStrings(append(append([]string{}, out.MACAddresses...), donor.MACAddresses...)) + } + return out +} + +func dedupePCIeDevices(items []models.PCIeDevice) []models.PCIeDevice { + if len(items) <= 1 { + return items + } + out := make([]models.PCIeDevice, 0, len(items)) + byPrimary := make(map[string]int, len(items)) + byLoose := make(map[string]int, len(items)) + + for _, item := range items { + primaryKey := pcieDeviceDedupKey(item) + looseKey := pcieDeviceLooseKey(item) + idx := -1 + if primaryKey != "" { + if existing, ok := byPrimary[primaryKey]; ok { + idx = existing + } + } + if idx < 0 && looseKey != "" { + if existing, ok := byLoose[looseKey]; ok { + idx = existing + } + } + + if idx >= 0 { + out[idx] = mergePCIeDeviceEntries(out[idx], item) + } else { + idx = len(out) + out = append(out, item) + } + + merged := out[idx] + if k := pcieDeviceDedupKey(merged); k != "" { + byPrimary[k] = idx + } + if k := pcieDeviceLooseKey(merged); k != "" { + byLoose[k] = idx + } + } + return out +} + +func pcieDeviceLooseKey(dev models.PCIeDevice) string { + return firstNonEmpty( + strings.TrimSpace(dev.Slot)+"|"+strings.TrimSpace(dev.PartNumber)+"|"+strings.TrimSpace(dev.DeviceClass), + strings.TrimSpace(dev.Slot)+"|"+strings.TrimSpace(dev.DeviceClass), + strings.TrimSpace(dev.PartNumber)+"|"+strings.TrimSpace(dev.DeviceClass), + strings.TrimSpace(dev.Description)+"|"+strings.TrimSpace(dev.DeviceClass), + ) +} + +func pcieDeviceRichnessScore(dev models.PCIeDevice) int { + score := 0 + if bdf := strings.TrimSpace(dev.BDF); looksLikeCanonicalBDF(bdf) { + score += 120 + } + if normalizeRedfishIdentityField(dev.SerialNumber) != "" { + score += 80 + } + if normalizeRedfishIdentityField(dev.PartNumber) != "" { + score += 20 + } + if normalizeRedfishIdentityField(dev.Manufacturer) != "" { + score += 10 + } + if dev.VendorID > 0 { + score += 8 + } + if dev.DeviceID > 0 { + score += 8 + } + if !isGenericPCIeClassLabel(dev.DeviceClass) { + score += 8 + } + if dev.LinkWidth > 0 || dev.MaxLinkWidth > 0 { + score += 6 + } + if strings.TrimSpace(dev.LinkSpeed) != "" || strings.TrimSpace(dev.MaxLinkSpeed) != "" { + score += 6 + } + if strings.TrimSpace(dev.Description) != "" { + score += 3 + } + if strings.TrimSpace(dev.Slot) != "" { + score += 2 + } + return score +} + +func mergePCIeDeviceEntries(a, b models.PCIeDevice) models.PCIeDevice { + base, donor := a, b + if pcieDeviceRichnessScore(donor) > pcieDeviceRichnessScore(base) { + base, donor = donor, base + } + out := base + if strings.TrimSpace(out.Slot) == "" && strings.TrimSpace(donor.Slot) != "" { + out.Slot = donor.Slot + } + if strings.TrimSpace(out.Description) == "" && strings.TrimSpace(donor.Description) != "" { + out.Description = donor.Description + } + if out.VendorID == 0 && donor.VendorID != 0 { + out.VendorID = donor.VendorID + } + if out.DeviceID == 0 && donor.DeviceID != 0 { + out.DeviceID = donor.DeviceID + } + if strings.TrimSpace(out.BDF) == "" && strings.TrimSpace(donor.BDF) != "" { + out.BDF = donor.BDF + } + if isGenericPCIeClassLabel(out.DeviceClass) && !isGenericPCIeClassLabel(donor.DeviceClass) { + out.DeviceClass = donor.DeviceClass + } + if normalizeRedfishIdentityField(out.Manufacturer) == "" && normalizeRedfishIdentityField(donor.Manufacturer) != "" { + out.Manufacturer = donor.Manufacturer + } + if out.LinkWidth == 0 && donor.LinkWidth > 0 { + out.LinkWidth = donor.LinkWidth + } + if strings.TrimSpace(out.LinkSpeed) == "" && strings.TrimSpace(donor.LinkSpeed) != "" { + out.LinkSpeed = donor.LinkSpeed + } + if out.MaxLinkWidth == 0 && donor.MaxLinkWidth > 0 { + out.MaxLinkWidth = donor.MaxLinkWidth + } + if strings.TrimSpace(out.MaxLinkSpeed) == "" && strings.TrimSpace(donor.MaxLinkSpeed) != "" { + out.MaxLinkSpeed = donor.MaxLinkSpeed + } + if normalizeRedfishIdentityField(out.PartNumber) == "" && normalizeRedfishIdentityField(donor.PartNumber) != "" { + out.PartNumber = donor.PartNumber + } + if normalizeRedfishIdentityField(out.SerialNumber) == "" && normalizeRedfishIdentityField(donor.SerialNumber) != "" { + out.SerialNumber = donor.SerialNumber + } + if strings.TrimSpace(out.Status) == "" && strings.TrimSpace(donor.Status) != "" { + out.Status = donor.Status + } + if len(donor.MACAddresses) > 0 { + out.MACAddresses = dedupeStrings(append(append([]string{}, out.MACAddresses...), donor.MACAddresses...)) + } + return out +} + +func psuIdentityKeys(psu models.PSU) []string { + keys := make([]string, 0, 3) + if serial := normalizeRedfishIdentityField(psu.SerialNumber); serial != "" { + keys = append(keys, "sn:"+serial) + } + slot := strings.TrimSpace(psu.Slot) + model := strings.TrimSpace(psu.Model) + if slot != "" && model != "" { + keys = append(keys, "slotmodel:"+slot+"|"+model) + } + if slot != "" { + keys = append(keys, "slot:"+slot) + } + if len(keys) == 0 && model != "" { + keys = append(keys, "model:"+model) + } + return keys +} + +func psuRichnessScore(psu models.PSU) int { + score := 0 + if normalizeRedfishIdentityField(psu.SerialNumber) != "" { + score += 100 + } + if normalizeRedfishIdentityField(psu.Model) != "" { + score += 20 + } + if psu.WattageW > 0 { + score += 20 + } + if normalizeRedfishIdentityField(psu.Vendor) != "" { + score += 8 + } + if normalizeRedfishIdentityField(psu.PartNumber) != "" { + score += 8 + } + if normalizeRedfishIdentityField(psu.Firmware) != "" { + score += 8 + } + if psu.InputPowerW > 0 || psu.OutputPowerW > 0 { + score += 6 + } + if psu.InputVoltage > 0 { + score += 4 + } + if psu.Present { + score++ + } + return score +} + +func mergePSUEntries(a, b models.PSU) models.PSU { + base, donor := a, b + if psuRichnessScore(donor) > psuRichnessScore(base) { + base, donor = donor, base + } + out := base + if strings.TrimSpace(out.Slot) == "" && strings.TrimSpace(donor.Slot) != "" { + out.Slot = donor.Slot + } + out.Present = out.Present || donor.Present + if normalizeRedfishIdentityField(out.Model) == "" && normalizeRedfishIdentityField(donor.Model) != "" { + out.Model = donor.Model + } + if strings.TrimSpace(out.Description) == "" && strings.TrimSpace(donor.Description) != "" { + out.Description = donor.Description + } + if normalizeRedfishIdentityField(out.Vendor) == "" && normalizeRedfishIdentityField(donor.Vendor) != "" { + out.Vendor = donor.Vendor + } + if out.WattageW == 0 && donor.WattageW > 0 { + out.WattageW = donor.WattageW + } + if normalizeRedfishIdentityField(out.SerialNumber) == "" && normalizeRedfishIdentityField(donor.SerialNumber) != "" { + out.SerialNumber = donor.SerialNumber + } + if normalizeRedfishIdentityField(out.PartNumber) == "" && normalizeRedfishIdentityField(donor.PartNumber) != "" { + out.PartNumber = donor.PartNumber + } + if normalizeRedfishIdentityField(out.Firmware) == "" && normalizeRedfishIdentityField(donor.Firmware) != "" { + out.Firmware = donor.Firmware + } + if strings.TrimSpace(out.Status) == "" && strings.TrimSpace(donor.Status) != "" { + out.Status = donor.Status + } + if strings.TrimSpace(out.InputType) == "" && strings.TrimSpace(donor.InputType) != "" { + out.InputType = donor.InputType + } + if out.InputPowerW == 0 && donor.InputPowerW > 0 { + out.InputPowerW = donor.InputPowerW + } + if out.OutputPowerW == 0 && donor.OutputPowerW > 0 { + out.OutputPowerW = donor.OutputPowerW + } + if out.InputVoltage == 0 && donor.InputVoltage > 0 { + out.InputVoltage = donor.InputVoltage + } + if out.OutputVoltage == 0 && donor.OutputVoltage > 0 { + out.OutputVoltage = donor.OutputVoltage + } + if out.TemperatureC == 0 && donor.TemperatureC > 0 { + out.TemperatureC = donor.TemperatureC + } + return out +} + func dedupeStorageVolumes(items []models.StorageVolume) []models.StorageVolume { seen := make(map[string]struct{}, len(items)) out := make([]models.StorageVolume, 0, len(items)) @@ -3054,6 +4085,101 @@ func redfishLocationLabel(v interface{}) string { } } +type redfishPathTiming struct { + Path string + Duration time.Duration + Requests int + Errors int +} + +type redfishPathTimingCollector struct { + depth int + mu sync.Mutex + byKey map[string]redfishPathTiming +} + +func newRedfishPathTimingCollector(depth int) *redfishPathTimingCollector { + if depth < 1 { + depth = 1 + } + return &redfishPathTimingCollector{ + depth: depth, + byKey: make(map[string]redfishPathTiming), + } +} + +func (c *redfishPathTimingCollector) Observe(path string, d time.Duration, failed bool) { + if c == nil { + return + } + key := redfishBranchPathForTiming(path, c.depth) + if key == "" { + return + } + c.mu.Lock() + item := c.byKey[key] + item.Path = key + item.Duration += d + item.Requests++ + if failed { + item.Errors++ + } + c.byKey[key] = item + c.mu.Unlock() +} + +func (c *redfishPathTimingCollector) Summary(limit int) string { + if c == nil || limit == 0 { + return "" + } + c.mu.Lock() + items := make([]redfishPathTiming, 0, len(c.byKey)) + for _, item := range c.byKey { + items = append(items, item) + } + c.mu.Unlock() + if len(items) == 0 { + return "" + } + sort.Slice(items, func(i, j int) bool { + if items[i].Duration == items[j].Duration { + if items[i].Requests == items[j].Requests { + return items[i].Path < items[j].Path + } + return items[i].Requests > items[j].Requests + } + return items[i].Duration > items[j].Duration + }) + if limit < 0 || limit > len(items) { + limit = len(items) + } + parts := make([]string, 0, limit) + for i := 0; i < limit; i++ { + item := items[i] + parts = append(parts, fmt.Sprintf("%s=%s(req=%d,err=%d)", item.Path, item.Duration.Round(time.Millisecond), item.Requests, item.Errors)) + } + return strings.Join(parts, "; ") +} + +func redfishBranchPathForTiming(path string, depth int) string { + normalized := normalizeRedfishPath(path) + if normalized == "" { + return "" + } + parts := strings.Split(strings.Trim(normalized, "/"), "/") + if len(parts) < 2 || parts[0] != "redfish" || parts[1] != "v1" { + return normalized + } + if depth < 1 { + depth = 1 + } + maxParts := 2 + depth + if len(parts) > maxParts { + parts = parts[:maxParts] + } + return "/" + strings.Join(parts, "/") +} + func compactProgressPath(p string) string { const maxLen = 72 if len(p) <= maxLen { @@ -3095,7 +4221,6 @@ func redfishSnapshotPrioritySeeds(systemPaths, chassisPaths, managerPaths []stri add("/redfish/v1/UpdateService") add("/redfish/v1/UpdateService/FirmwareInventory") - add("/redfish/v1/Fabrics") for _, p := range systemPaths { add(p) @@ -3103,14 +4228,11 @@ func redfishSnapshotPrioritySeeds(systemPaths, chassisPaths, managerPaths []stri add(joinPath(p, "/SecureBoot")) add(joinPath(p, "/Oem/Public")) add(joinPath(p, "/Oem/Public/FRU")) - add(joinPath(p, "/Oem/Public/ThermalConfig")) - add(joinPath(p, "/ThermalConfig")) add(joinPath(p, "/Processors")) add(joinPath(p, "/Memory")) add(joinPath(p, "/EthernetInterfaces")) add(joinPath(p, "/NetworkInterfaces")) add(joinPath(p, "/BootOptions")) - add(joinPath(p, "/Certificates")) add(joinPath(p, "/PCIeDevices")) add(joinPath(p, "/PCIeFunctions")) add(joinPath(p, "/Accelerators")) @@ -3125,37 +4247,15 @@ func redfishSnapshotPrioritySeeds(systemPaths, chassisPaths, managerPaths []stri add(p) add(joinPath(p, "/Oem/Public")) add(joinPath(p, "/Oem/Public/FRU")) - add(joinPath(p, "/Oem/Public/ThermalConfig")) - add(joinPath(p, "/ThermalConfig")) - add(joinPath(p, "/Sensors")) - add(joinPath(p, "/HealthSummary")) - add(joinPath(p, "/ThresholdSensors")) - add(joinPath(p, "/DiscreteSensors")) - add(joinPath(p, "/Boards")) - add(joinPath(p, "/Backplanes")) - add(joinPath(p, "/Assembly")) - add(joinPath(p, "/Thermal")) - add(joinPath(p, "/EnvironmentMetrics")) add(joinPath(p, "/PCIeDevices")) add(joinPath(p, "/PCIeSlots")) add(joinPath(p, "/NetworkAdapters")) add(joinPath(p, "/Drives")) - add(joinPath(p, "/Temperatures")) - add(joinPath(p, "/Fans")) - add(joinPath(p, "/Voltages")) - add(joinPath(p, "/PowerSubsystem")) - add(joinPath(p, "/PowerSubsystem/PowerSupplies")) - add(joinPath(p, "/PowerSubsystem/Voltages")) - add(joinPath(p, "/ThermalSubsystem")) - add(joinPath(p, "/ThermalSubsystem/Fans")) - add(joinPath(p, "/ThermalSubsystem/Temperatures")) add(joinPath(p, "/Power")) } for _, p := range managerPaths { add(p) add(joinPath(p, "/EthernetInterfaces")) - add(joinPath(p, "/NetworkProtocol/HTTPS/Certificates")) - add(joinPath(p, "/LogServices")) add(joinPath(p, "/NetworkProtocol")) } return out diff --git a/internal/collector/redfish_replay.go b/internal/collector/redfish_replay.go index b1414c6..d6df873 100644 --- a/internal/collector/redfish_replay.go +++ b/internal/collector/redfish_replay.go @@ -1105,7 +1105,6 @@ func (r redfishSnapshotReader) probeDirectDiskBayChildren(drivesCollectionPath s func (r redfishSnapshotReader) collectNICs(chassisPaths []string) []models.NetworkAdapter { var nics []models.NetworkAdapter - seen := make(map[string]struct{}) for _, chassisPath := range chassisPaths { adapterDocs, err := r.getCollectionMembers(joinPath(chassisPath, "/NetworkAdapters")) if err != nil { @@ -1121,23 +1120,15 @@ func (r redfishSnapshotReader) collectNICs(chassisPaths []string) []models.Netwo functionDocs := r.getLinkedPCIeFunctions(pcieDoc) enrichNICFromPCIe(&nic, pcieDoc, functionDocs) } - key := firstNonEmpty(nic.SerialNumber, nic.Slot+"|"+nic.Model) - if key == "" { - continue - } - if _, ok := seen[key]; ok { - continue - } - seen[key] = struct{}{} nics = append(nics, nic) } } - return nics + return dedupeNetworkAdapters(nics) } func (r redfishSnapshotReader) collectPSUs(chassisPaths []string) []models.PSU { var out []models.PSU - seen := make(map[string]struct{}) + seen := make(map[string]int) idx := 1 for _, chassisPath := range chassisPaths { if memberDocs, err := r.getCollectionMembers(joinPath(chassisPath, "/PowerSubsystem/PowerSupplies")); err == nil && len(memberDocs) > 0 { @@ -1213,7 +1204,6 @@ func (r redfishSnapshotReader) collectPCIeDevices(systemPaths, chassisPaths []st collections = append(collections, joinPath(chassisPath, "/PCIeDevices")) } var out []models.PCIeDevice - seen := make(map[string]struct{}) for _, collectionPath := range collections { memberDocs, err := r.getCollectionMembers(collectionPath) if err != nil || len(memberDocs) == 0 { @@ -1222,14 +1212,6 @@ func (r redfishSnapshotReader) collectPCIeDevices(systemPaths, chassisPaths []st for _, doc := range memberDocs { functionDocs := r.getLinkedPCIeFunctions(doc) dev := parsePCIeDevice(doc, functionDocs) - key := pcieDeviceDedupKey(dev) - if key == "" { - continue - } - if _, ok := seen[key]; ok { - continue - } - seen[key] = struct{}{} out = append(out, dev) } } @@ -1240,18 +1222,10 @@ func (r redfishSnapshotReader) collectPCIeDevices(systemPaths, chassisPaths []st } for idx, fn := range functionDocs { dev := parsePCIeFunction(fn, idx+1) - key := pcieDeviceDedupKey(dev) - if key == "" { - continue - } - if _, ok := seen[key]; ok { - continue - } - seen[key] = struct{}{} out = append(out, dev) } } - return out + return dedupePCIeDevices(out) } func stringsTrimTrailingSlash(s string) string { diff --git a/internal/collector/redfish_test.go b/internal/collector/redfish_test.go index 20dbeb8..e297ed0 100644 --- a/internal/collector/redfish_test.go +++ b/internal/collector/redfish_test.go @@ -3,6 +3,7 @@ package collector import ( "context" "encoding/json" + "fmt" "net/http" "net/http/httptest" "strings" @@ -1257,6 +1258,124 @@ func TestDedupeStorage_IgnoresPlaceholderSerial(t *testing.T) { } } +func TestDedupeStorage_MergesPlaceholderSlotsWithRichDrivesByOrder(t *testing.T) { + in := []models.Storage{ + {Slot: "PCIe8_RAID_Disk_1:0", Type: "SSD", Model: "SOLIDIGM SSDSC2K", SizeGB: 1787, SerialNumber: "S1", Present: true}, + {Slot: "PCIe8_RAID_Disk_1:1", Type: "SSD", Model: "SOLIDIGM SSDSC2K", SizeGB: 1787, SerialNumber: "S2", Present: true}, + {Slot: "PCIe8_RAID_Disk_1:2", Type: "SSD", Model: "SOLIDIGM SSDSC2K", SizeGB: 1787, SerialNumber: "S3", Present: true}, + {Slot: "OB01", Type: "NVMe", Model: "N/A", SerialNumber: "N/A", Present: true}, + {Slot: "OB02", Type: "NVMe", Model: "N/A", SerialNumber: "N/A", Present: true}, + {Slot: "OB03", Type: "NVMe", Model: "N/A", SerialNumber: "N/A", Present: true}, + {Slot: "OB04", Type: "NVMe", Model: "N/A", SerialNumber: "N/A", Present: true}, + {Slot: "FP00HDD00", Type: "NVMe", Model: "INTEL SSDPE2KE032T8", SizeGB: 2980, SerialNumber: "N1", Present: true}, + {Slot: "FP00HDD02", Type: "NVMe", Model: "INTEL SSDPE2KE032T8", SizeGB: 2980, SerialNumber: "N2", Present: true}, + {Slot: "FP00HDD04", Type: "NVMe", Model: "INTEL SSDPE2KE032T8", SizeGB: 2980, SerialNumber: "N3", Present: true}, + {Slot: "FP00HDD06", Type: "NVMe", Model: "INTEL SSDPE2KE032T8", SizeGB: 2980, SerialNumber: "N4", Present: true}, + } + + out := dedupeStorage(in) + if len(out) != 7 { + t.Fatalf("expected 7 rows after placeholder merge, got %d", len(out)) + } + + bySlot := make(map[string]models.Storage, len(out)) + for _, d := range out { + bySlot[d.Slot] = d + if strings.HasPrefix(d.Slot, "FP00HDD") { + t.Fatalf("expected FP donor slot %q to be absorbed by placeholder slot", d.Slot) + } + } + if bySlot["OB01"].SerialNumber != "N1" || bySlot["OB02"].SerialNumber != "N2" || bySlot["OB03"].SerialNumber != "N3" || bySlot["OB04"].SerialNumber != "N4" { + t.Fatalf("expected OB slots to be enriched in order, got OB01=%q OB02=%q OB03=%q OB04=%q", + bySlot["OB01"].SerialNumber, bySlot["OB02"].SerialNumber, bySlot["OB03"].SerialNumber, bySlot["OB04"].SerialNumber) + } + if bySlot["OB01"].Model != "INTEL SSDPE2KE032T8" || bySlot["OB01"].SizeGB != 2980 { + t.Fatalf("expected OB01 to inherit rich model/size, got model=%q size=%d", bySlot["OB01"].Model, bySlot["OB01"].SizeGB) + } +} + +func TestDedupeNetworkAdapters_MergesBySlotAndKeepsRicherData(t *testing.T) { + in := []models.NetworkAdapter{ + { + Slot: "NIC-A", + Model: "N/A", + Vendor: "", + Present: true, + }, + { + Slot: "NIC-A", + Model: "ConnectX-7", + Vendor: "NVIDIA", + SerialNumber: "NICSN001", + Firmware: "28.41.2020", + PortCount: 2, + MACAddresses: []string{"00:11:22:33:44:55"}, + Present: true, + }, + } + + out := dedupeNetworkAdapters(in) + if len(out) != 1 { + t.Fatalf("expected merged single NIC row, got %d", len(out)) + } + if out[0].SerialNumber != "NICSN001" || out[0].Model != "ConnectX-7" || out[0].Vendor != "NVIDIA" { + t.Fatalf("expected richer NIC fields preserved, got %+v", out[0]) + } +} + +func TestDedupePCIeDevices_MergesByLooseKeyAndKeepsBDF(t *testing.T) { + in := []models.PCIeDevice{ + { + Slot: "PCIe Slot 3", + DeviceClass: "Network Controller", + PartNumber: "MCX75310AAS-NEAT", + }, + { + Slot: "PCIe Slot 3", + DeviceClass: "Network Controller", + PartNumber: "MCX75310AAS-NEAT", + BDF: "0000:af:00.0", + VendorID: 0x15b3, + DeviceID: 0x1021, + SerialNumber: "MT000123", + }, + } + + out := dedupePCIeDevices(in) + if len(out) != 1 { + t.Fatalf("expected merged single PCIe row, got %d", len(out)) + } + if out[0].BDF != "0000:af:00.0" || out[0].SerialNumber != "MT000123" || out[0].VendorID == 0 || out[0].DeviceID == 0 { + t.Fatalf("expected richer PCIe fields preserved, got %+v", out[0]) + } +} + +func TestAppendPSU_MergesRicherDuplicate(t *testing.T) { + var out []models.PSU + seen := make(map[string]int) + idx := 1 + idx = appendPSU(&out, seen, models.PSU{ + Slot: "PSU1", + Model: "N/A", + Present: true, + }, idx) + _ = appendPSU(&out, seen, models.PSU{ + Slot: "PSU1", + Model: "DLG2700BW54C31", + SerialNumber: "DGPLV2515025L", + WattageW: 2700, + Firmware: "00.01.04", + Present: true, + }, idx) + + if len(out) != 1 { + t.Fatalf("expected PSU duplicate merge, got %d rows", len(out)) + } + if out[0].SerialNumber != "DGPLV2515025L" || out[0].WattageW != 2700 || out[0].Model != "DLG2700BW54C31" { + t.Fatalf("expected richer PSU fields preserved, got %+v", out[0]) + } +} + func TestReplayCollectGPUs_DropsModelOnlyPlaceholderWhenConcreteDiscoveredLater(t *testing.T) { r := redfishSnapshotReader{tree: map[string]interface{}{ "/redfish/v1/Systems/1/GraphicsControllers": map[string]interface{}{ @@ -1335,6 +1454,76 @@ func TestReplayCollectGPUs_MergesGraphicsSerialIntoConcretePCIeGPU(t *testing.T) } } +func TestReplayCollectGPUs_MergesAmbiguousSameModelByOrder(t *testing.T) { + tree := map[string]interface{}{ + "/redfish/v1/Systems/1/GraphicsControllers": map[string]interface{}{ + "Members": []interface{}{}, + }, + "/redfish/v1/Chassis/1/PCIeDevices": map[string]interface{}{ + "Members": []interface{}{}, + }, + } + + pcieIDs := []int{4, 8, 12, 14, 20, 23, 26, 30} + serials := []string{ + "1654425002361", + "1654425004310", + "1654425004204", + "1654225097289", + "1654225095717", + "1654425002114", + "1654425002714", + "1654425002991", + } + for i := 0; i < len(pcieIDs); i++ { + gpuPath := fmt.Sprintf("/redfish/v1/Systems/1/GraphicsControllers/GPU%d", i+1) + pciePath := fmt.Sprintf("/redfish/v1/Chassis/1/PCIeDevices/%d", pcieIDs[i]) + tree["/redfish/v1/Systems/1/GraphicsControllers"].(map[string]interface{})["Members"] = + append(tree["/redfish/v1/Systems/1/GraphicsControllers"].(map[string]interface{})["Members"].([]interface{}), map[string]interface{}{"@odata.id": gpuPath}) + tree["/redfish/v1/Chassis/1/PCIeDevices"].(map[string]interface{})["Members"] = + append(tree["/redfish/v1/Chassis/1/PCIeDevices"].(map[string]interface{})["Members"].([]interface{}), map[string]interface{}{"@odata.id": pciePath}) + + tree[gpuPath] = map[string]interface{}{ + "Id": fmt.Sprintf("GPU%d", i+1), + "Name": "H200-SXM5-141G", + "Model": "H200-SXM5-141G", + "Manufacturer": "NVIDIA", + "SerialNumber": serials[i], + } + tree[pciePath] = map[string]interface{}{ + "Id": fmt.Sprintf("%d", pcieIDs[i]), + "Name": fmt.Sprintf("PCIeCard%d", pcieIDs[i]), + "Model": "H200-SXM5-141G", + "Manufacturer": "NVIDIA", + "BDF": fmt.Sprintf("0000:%02x:00.0", i+1), + } + } + + r := redfishSnapshotReader{tree: tree} + got := r.collectGPUs([]string{"/redfish/v1/Systems/1"}, []string{"/redfish/v1/Chassis/1"}) + if len(got) != len(pcieIDs) { + t.Fatalf("expected %d merged GPUs, got %d", len(pcieIDs), len(got)) + } + + bySlot := make(map[string]models.GPU, len(got)) + for _, gpu := range got { + bySlot[gpu.Slot] = gpu + if strings.EqualFold(strings.TrimSpace(gpu.Slot), strings.TrimSpace(gpu.Model)) { + t.Fatalf("expected model-only placeholder to be dropped, got slot=%q", gpu.Slot) + } + } + for i, id := range pcieIDs { + slot := fmt.Sprintf("PCIeCard%d", id) + gpu, ok := bySlot[slot] + if !ok { + t.Fatalf("expected concrete slot %q in output", slot) + } + if gpu.SerialNumber != serials[i] { + t.Fatalf("expected slot %s serial %s, got %s", slot, serials[i], gpu.SerialNumber) + } + } +} + func TestShouldCrawlPath_MemorySubresourcesAreSkipped(t *testing.T) { if !shouldCrawlPath("/redfish/v1/Systems/1/Memory/CPU0_C0D0") { t.Fatalf("expected direct DIMM resource to be crawlable") @@ -1393,8 +1582,12 @@ func TestRedfishSnapshotBranchKey(t *testing.T) { } func TestShouldPostProbeCollectionPath(t *testing.T) { + if shouldPostProbeCollectionPath("/redfish/v1/Chassis/1/Sensors") { + t.Fatalf("expected sensors collection to be skipped by default") + } + t.Setenv("LOGPILE_REDFISH_SENSOR_POSTPROBE", "1") if !shouldPostProbeCollectionPath("/redfish/v1/Chassis/1/Sensors") { - t.Fatalf("expected sensors collection to be post-probed") + t.Fatalf("expected sensors collection to be post-probed when enabled") } if !shouldPostProbeCollectionPath("/redfish/v1/Systems/1/Storage/RAID/Drives") { t.Fatalf("expected drives collection to be post-probed") @@ -1406,3 +1599,69 @@ func TestShouldPostProbeCollectionPath(t *testing.T) { t.Fatalf("expected assembly member resource to be skipped from post-probe") } } + +func TestRedfishSnapshotPrioritySeeds_DefaultSkipsNoisyBranches(t *testing.T) { + seeds := redfishSnapshotPrioritySeeds( + []string{"/redfish/v1/Systems/1"}, + []string{"/redfish/v1/Chassis/1"}, + []string{"/redfish/v1/Managers/1"}, + ) + joined := strings.Join(seeds, "\n") + for _, noisy := range []string{ + "/redfish/v1/Fabrics", + "/redfish/v1/Chassis/1/Backplanes", + "/redfish/v1/Chassis/1/Boards", + "/redfish/v1/Chassis/1/Sensors", + "/redfish/v1/Managers/1/LogServices", + } { + if strings.Contains(joined, noisy) { + t.Fatalf("unexpected noisy seed %q", noisy) + } + } + for _, wanted := range []string{ + "/redfish/v1/Systems/1/Memory", + "/redfish/v1/Systems/1/PCIeDevices", + "/redfish/v1/Chassis/1/Drives", + "/redfish/v1/Chassis/1/NetworkAdapters", + "/redfish/v1/Managers/1/NetworkProtocol", + } { + if !strings.Contains(joined, wanted) { + t.Fatalf("expected seed %q", wanted) + } + } +} + +func TestRedfishPrefetchTargets_FilterNoisyBranches(t *testing.T) { + critical := []string{ + "/redfish/v1/Systems/1", + "/redfish/v1/Systems/1/Memory", + "/redfish/v1/Systems/1/Oem/Public/FRU", + "/redfish/v1/Chassis/1/Drives", + "/redfish/v1/Chassis/1/Backplanes", + "/redfish/v1/Chassis/1/Sensors", + "/redfish/v1/Managers/1/LogServices", + "/redfish/v1/Managers/1/NetworkProtocol", + } + got := redfishPrefetchTargets(critical) + joined := strings.Join(got, "\n") + for _, wanted := range []string{ + "/redfish/v1/Systems/1", + "/redfish/v1/Systems/1/Memory", + "/redfish/v1/Systems/1/Oem/Public/FRU", + "/redfish/v1/Chassis/1/Drives", + "/redfish/v1/Managers/1/NetworkProtocol", + } { + if !strings.Contains(joined, wanted) { + t.Fatalf("expected prefetch target %q", wanted) + } + } + for _, noisy := range []string{ + "/redfish/v1/Chassis/1/Backplanes", + "/redfish/v1/Chassis/1/Sensors", + "/redfish/v1/Managers/1/LogServices", + } { + if strings.Contains(joined, noisy) { + t.Fatalf("unexpected noisy prefetch target %q", noisy) + } + } +}