improve redfish collection progress and robust hardware dedup/serial parsing

This commit is contained in:
2026-02-28 16:07:42 +03:00
parent 8dbbec3610
commit 9a30705c9a
9 changed files with 871 additions and 60 deletions

View File

@@ -436,7 +436,7 @@ func (c *RedfishConnector) collectGPUs(ctx context.Context, client *http.Client,
continue
}
key := gpuDedupKey(gpu)
key := gpuDocDedupKey(doc, gpu)
if key == "" {
continue
}
@@ -1281,25 +1281,20 @@ func (c *RedfishConnector) getCollectionMembers(ctx context.Context, client *htt
return nil, err
}
refs, ok := collection["Members"].([]interface{})
if !ok || len(refs) == 0 {
memberPaths := redfishCollectionMemberRefs(collection)
if len(memberPaths) == 0 {
return []map[string]interface{}{}, nil
}
out := make([]map[string]interface{}, 0, len(refs))
for _, refAny := range refs {
ref, ok := refAny.(map[string]interface{})
if !ok {
continue
}
memberPath := asString(ref["@odata.id"])
if memberPath == "" {
continue
}
out := make([]map[string]interface{}, 0, len(memberPaths))
for _, memberPath := range memberPaths {
memberDoc, err := c.getJSON(ctx, client, req, baseURL, memberPath)
if err != nil {
continue
}
if strings.TrimSpace(asString(memberDoc["@odata.id"])) == "" {
memberDoc["@odata.id"] = normalizeRedfishPath(memberPath)
}
out = append(out, memberDoc)
}
return out, nil
@@ -1387,20 +1382,12 @@ func (c *RedfishConnector) getJSONWithRetry(ctx context.Context, client *http.Cl
}
func (c *RedfishConnector) collectCriticalCollectionMembersSequential(ctx context.Context, client *http.Client, req Request, baseURL, collectionPath string, collectionDoc map[string]interface{}) (map[string]interface{}, bool) {
refs, ok := collectionDoc["Members"].([]interface{})
if !ok || len(refs) == 0 {
memberPaths := redfishCollectionMemberRefs(collectionDoc)
if len(memberPaths) == 0 {
return nil, false
}
out := make(map[string]interface{})
for _, refAny := range refs {
ref, ok := refAny.(map[string]interface{})
if !ok {
continue
}
memberPath := normalizeRedfishPath(asString(ref["@odata.id"]))
if memberPath == "" {
continue
}
for _, memberPath := range memberPaths {
doc, err := c.getJSONWithRetry(ctx, client, req, baseURL, memberPath, redfishCriticalRetryAttempts(), redfishCriticalRetryBackoff())
if err != nil {
continue
@@ -1412,6 +1399,19 @@ func (c *RedfishConnector) collectCriticalCollectionMembersSequential(ctx contex
func (c *RedfishConnector) recoverCriticalRedfishDocsPlanB(ctx context.Context, client *http.Client, req Request, baseURL string, criticalPaths []string, rawTree map[string]interface{}, fetchErrs map[string]string, emit ProgressFn) int {
var targets []string
seenTargets := make(map[string]struct{})
addTarget := func(path string) {
path = normalizeRedfishPath(path)
if path == "" {
return
}
if _, ok := seenTargets[path]; ok {
return
}
seenTargets[path] = struct{}{}
targets = append(targets, path)
}
for _, p := range criticalPaths {
p = normalizeRedfishPath(p)
if p == "" {
@@ -1424,7 +1424,35 @@ func (c *RedfishConnector) recoverCriticalRedfishDocsPlanB(ctx context.Context,
if hasErr && !isRetryableRedfishFetchError(fmt.Errorf("%s", errMsg)) {
continue
}
targets = append(targets, p)
addTarget(p)
}
// If a critical collection document was fetched, but some of its members
// failed during the initial crawl (common for /Drives on partially loaded BMCs),
// retry those member resources in plan-B too.
for _, p := range criticalPaths {
p = normalizeRedfishPath(p)
if p == "" {
continue
}
docAny, ok := rawTree[p]
if !ok {
continue
}
doc, ok := docAny.(map[string]interface{})
if !ok {
continue
}
for _, memberPath := range redfishCollectionMemberRefs(doc) {
if _, exists := rawTree[memberPath]; exists {
continue
}
errMsg, hasErr := fetchErrs[memberPath]
if hasErr && !isRetryableRedfishFetchError(fmt.Errorf("%s", errMsg)) {
continue
}
addTarget(memberPath)
}
}
if len(targets) == 0 {
return 0
@@ -1608,7 +1636,7 @@ func parseCPUs(docs []map[string]interface{}) []models.CPU {
Threads: asInt(doc["TotalThreads"]),
FrequencyMHz: asInt(doc["OperatingSpeedMHz"]),
MaxFreqMHz: asInt(doc["MaxSpeedMHz"]),
SerialNumber: asString(doc["SerialNumber"]),
SerialNumber: findFirstNormalizedStringByKeys(doc, "SerialNumber"),
})
}
return cpus
@@ -1638,7 +1666,7 @@ func parseMemory(docs []map[string]interface{}) []models.MemoryDIMM {
MaxSpeedMHz: asInt(doc["MaxSpeedMHz"]),
CurrentSpeedMHz: asInt(doc["OperatingSpeedMhz"]),
Manufacturer: asString(doc["Manufacturer"]),
SerialNumber: asString(doc["SerialNumber"]),
SerialNumber: findFirstNormalizedStringByKeys(doc, "SerialNumber"),
PartNumber: asString(doc["PartNumber"]),
Status: mapStatus(doc["Status"]),
})
@@ -1665,7 +1693,7 @@ func parseDrive(doc map[string]interface{}) models.Storage {
Type: storageType,
Model: firstNonEmpty(asString(doc["Model"]), asString(doc["Name"])),
SizeGB: sizeGB,
SerialNumber: asString(doc["SerialNumber"]),
SerialNumber: findFirstNormalizedStringByKeys(doc, "SerialNumber"),
Manufacturer: asString(doc["Manufacturer"]),
Firmware: asString(doc["Revision"]),
Interface: asString(doc["Protocol"]),
@@ -1737,7 +1765,7 @@ func parseNIC(doc map[string]interface{}) models.NetworkAdapter {
Vendor: strings.TrimSpace(vendor),
VendorID: vendorID,
DeviceID: deviceID,
SerialNumber: asString(doc["SerialNumber"]),
SerialNumber: findFirstNormalizedStringByKeys(doc, "SerialNumber"),
PartNumber: asString(doc["PartNumber"]),
Firmware: firmware,
PortCount: portCount,
@@ -1828,7 +1856,7 @@ func parsePSU(doc map[string]interface{}, idx int) models.PSU {
Model: firstNonEmpty(asString(doc["Model"]), asString(doc["Name"])),
Vendor: asString(doc["Manufacturer"]),
WattageW: asInt(doc["PowerCapacityWatts"]),
SerialNumber: asString(doc["SerialNumber"]),
SerialNumber: findFirstNormalizedStringByKeys(doc, "SerialNumber"),
PartNumber: asString(doc["PartNumber"]),
Firmware: asString(doc["FirmwareVersion"]),
Status: status,
@@ -1856,7 +1884,7 @@ func parseGPU(doc map[string]interface{}, functionDocs []map[string]interface{},
Location: firstNonEmpty(redfishLocationLabel(doc["Location"]), redfishLocationLabel(doc["PhysicalLocation"])),
Model: firstNonEmpty(asString(doc["Model"]), asString(doc["Name"])),
Manufacturer: asString(doc["Manufacturer"]),
SerialNumber: strings.TrimSpace(asString(doc["SerialNumber"])),
SerialNumber: findFirstNormalizedStringByKeys(doc, "SerialNumber"),
PartNumber: asString(doc["PartNumber"]),
Firmware: asString(doc["FirmwareVersion"]),
Status: mapStatus(doc["Status"]),
@@ -1918,7 +1946,7 @@ func parsePCIeDevice(doc map[string]interface{}, functionDocs []map[string]inter
DeviceClass: asString(doc["DeviceType"]),
Manufacturer: asString(doc["Manufacturer"]),
PartNumber: asString(doc["PartNumber"]),
SerialNumber: asString(doc["SerialNumber"]),
SerialNumber: findFirstNormalizedStringByKeys(doc, "SerialNumber"),
VendorID: asHexOrInt(doc["VendorId"]),
DeviceID: asHexOrInt(doc["DeviceId"]),
}
@@ -1988,7 +2016,7 @@ func parsePCIeFunction(doc map[string]interface{}, idx int) models.PCIeDevice {
DeviceID: asHexOrInt(doc["DeviceId"]),
DeviceClass: firstNonEmpty(asString(doc["DeviceClass"]), asString(doc["ClassCode"]), "PCIe device"),
Manufacturer: asString(doc["Manufacturer"]),
SerialNumber: asString(doc["SerialNumber"]),
SerialNumber: findFirstNormalizedStringByKeys(doc, "SerialNumber"),
LinkWidth: asInt(doc["CurrentLinkWidth"]),
LinkSpeed: firstNonEmpty(asString(doc["CurrentLinkSpeedGTs"]), asString(doc["CurrentLinkSpeed"])),
MaxLinkWidth: asInt(doc["MaxLinkWidth"]),
@@ -2097,6 +2125,13 @@ func gpuDedupKey(gpu models.GPU) string {
return firstNonEmpty(strings.TrimSpace(gpu.Slot)+"|"+strings.TrimSpace(gpu.Model), strings.TrimSpace(gpu.Slot))
}
func gpuDocDedupKey(doc map[string]interface{}, gpu models.GPU) string {
if path := normalizeRedfishPath(asString(doc["@odata.id"])); path != "" {
return "path:" + path
}
return gpuDedupKey(gpu)
}
func shouldSkipGenericGPUDuplicate(existing []models.GPU, candidate models.GPU) bool {
if len(existing) == 0 {
return false
@@ -2137,6 +2172,48 @@ func dropModelOnlyGPUPlaceholders(items []models.GPU) []models.GPU {
return items
}
// Merge serial from generic GraphicsControllers placeholders (slot ~= model)
// into concrete PCIe rows (with BDF) when mapping is unambiguous.
mergedPlaceholder := make(map[int]struct{})
for i := range items {
serial := normalizeRedfishIdentityField(items[i].SerialNumber)
if serial == "" || strings.TrimSpace(items[i].BDF) != "" || !isModelOnlyGPUPlaceholder(items[i]) {
continue
}
candidate := -1
model := strings.TrimSpace(items[i].Model)
mfr := strings.TrimSpace(items[i].Manufacturer)
for j := range items {
if i == j {
continue
}
if !strings.EqualFold(strings.TrimSpace(items[j].Model), model) {
continue
}
otherMfr := strings.TrimSpace(items[j].Manufacturer)
if mfr != "" && otherMfr != "" && !strings.EqualFold(mfr, otherMfr) {
continue
}
if strings.TrimSpace(items[j].BDF) == "" || isModelOnlyGPUPlaceholder(items[j]) {
continue
}
if normalizeRedfishIdentityField(items[j].SerialNumber) != "" {
continue
}
if candidate != -1 {
candidate = -2
break
}
candidate = j
}
if candidate >= 0 {
items[candidate].SerialNumber = serial
mergedPlaceholder[i] = struct{}{}
}
}
concreteByModel := make(map[string]struct{}, len(items))
for _, gpu := range items {
modelKey := strings.ToLower(strings.TrimSpace(gpu.Model))
@@ -2152,14 +2229,12 @@ func dropModelOnlyGPUPlaceholders(items []models.GPU) []models.GPU {
}
out := make([]models.GPU, 0, len(items))
for _, gpu := range items {
for i, gpu := range items {
modelKey := strings.ToLower(strings.TrimSpace(gpu.Model))
slot := strings.TrimSpace(gpu.Slot)
if _, hasConcrete := concreteByModel[modelKey]; hasConcrete &&
normalizeRedfishIdentityField(gpu.SerialNumber) == "" &&
strings.TrimSpace(gpu.BDF) == "" &&
(strings.EqualFold(slot, strings.TrimSpace(gpu.Model)) ||
strings.HasPrefix(strings.ToUpper(slot), "GPU")) {
isModelOnlyGPUPlaceholder(gpu) &&
(normalizeRedfishIdentityField(gpu.SerialNumber) == "" || hasMergedPlaceholderIndex(mergedPlaceholder, i)) {
continue
}
out = append(out, gpu)
@@ -2167,6 +2242,20 @@ func dropModelOnlyGPUPlaceholders(items []models.GPU) []models.GPU {
return out
}
func isModelOnlyGPUPlaceholder(gpu models.GPU) bool {
slot := strings.TrimSpace(gpu.Slot)
model := strings.TrimSpace(gpu.Model)
if slot == "" || model == "" {
return false
}
return strings.EqualFold(slot, model) || strings.HasPrefix(strings.ToUpper(slot), "GPU")
}
func hasMergedPlaceholderIndex(indexes map[int]struct{}, idx int) bool {
_, ok := indexes[idx]
return ok
}
func looksLikeGPU(doc map[string]interface{}, functionDocs []map[string]interface{}) bool {
deviceType := strings.ToLower(asString(doc["DeviceType"]))
if strings.Contains(deviceType, "gpu") || strings.Contains(deviceType, "graphics") || strings.Contains(deviceType, "accelerator") {
@@ -2537,6 +2626,42 @@ func normalizeRedfishPath(raw string) string {
return raw
}
func redfishCollectionMemberRefs(collection map[string]interface{}) []string {
if len(collection) == 0 {
return nil
}
var out []string
seen := make(map[string]struct{})
addRefs := func(raw any) {
refs, ok := raw.([]interface{})
if !ok || len(refs) == 0 {
return
}
for _, refAny := range refs {
ref, ok := refAny.(map[string]interface{})
if !ok {
continue
}
memberPath := normalizeRedfishPath(asString(ref["@odata.id"]))
if memberPath == "" {
continue
}
if _, exists := seen[memberPath]; exists {
continue
}
seen[memberPath] = struct{}{}
out = append(out, memberPath)
}
}
addRefs(collection["Members"])
if oem, ok := collection["Oem"].(map[string]interface{}); ok {
if public, ok := oem["Public"].(map[string]interface{}); ok {
addRefs(public["Members"])
}
}
return out
}
func extractODataIDs(v interface{}) []string {
var refs []string
var walk func(any)