collector/redfish: fix server model fallback and GPU/NVMe regressions

This commit is contained in:
2026-02-28 14:50:02 +03:00
parent a2c9e9a57f
commit f35cabac48
3 changed files with 146 additions and 5 deletions

View File

@@ -460,7 +460,7 @@ func (c *RedfishConnector) collectGPUs(ctx context.Context, client *http.Client,
}
}
return out
return dropModelOnlyGPUPlaceholders(out)
}
func (c *RedfishConnector) collectPCIeDevices(ctx context.Context, client *http.Client, req Request, baseURL string, systemPaths, chassisPaths []string) []models.PCIeDevice {
@@ -2070,7 +2070,9 @@ func shouldSkipGenericGPUDuplicate(existing []models.GPU, candidate models.GPU)
if !strings.EqualFold(strings.TrimSpace(gpu.Model), model) {
continue
}
if !strings.EqualFold(strings.TrimSpace(gpu.Manufacturer), strings.TrimSpace(candidate.Manufacturer)) {
existingMfr := strings.TrimSpace(gpu.Manufacturer)
candidateMfr := strings.TrimSpace(candidate.Manufacturer)
if existingMfr != "" && candidateMfr != "" && !strings.EqualFold(existingMfr, candidateMfr) {
continue
}
if normalizeRedfishIdentityField(gpu.SerialNumber) != "" || strings.TrimSpace(gpu.BDF) != "" {
@@ -2080,6 +2082,41 @@ func shouldSkipGenericGPUDuplicate(existing []models.GPU, candidate models.GPU)
return false
}
func dropModelOnlyGPUPlaceholders(items []models.GPU) []models.GPU {
if len(items) < 2 {
return items
}
concreteByModel := make(map[string]struct{}, len(items))
for _, gpu := range items {
modelKey := strings.ToLower(strings.TrimSpace(gpu.Model))
if modelKey == "" {
continue
}
if normalizeRedfishIdentityField(gpu.SerialNumber) != "" || strings.TrimSpace(gpu.BDF) != "" {
concreteByModel[modelKey] = struct{}{}
}
}
if len(concreteByModel) == 0 {
return items
}
out := make([]models.GPU, 0, len(items))
for _, gpu := range items {
modelKey := strings.ToLower(strings.TrimSpace(gpu.Model))
slot := strings.TrimSpace(gpu.Slot)
if _, hasConcrete := concreteByModel[modelKey]; hasConcrete &&
normalizeRedfishIdentityField(gpu.SerialNumber) == "" &&
strings.TrimSpace(gpu.BDF) == "" &&
(strings.EqualFold(slot, strings.TrimSpace(gpu.Model)) ||
strings.HasPrefix(strings.ToUpper(slot), "GPU")) {
continue
}
out = append(out, gpu)
}
return out
}
func looksLikeGPU(doc map[string]interface{}, functionDocs []map[string]interface{}) bool {
deviceType := strings.ToLower(asString(doc["DeviceType"]))
if strings.Contains(deviceType, "gpu") || strings.Contains(deviceType, "graphics") || strings.Contains(deviceType, "accelerator") {
@@ -2183,7 +2220,10 @@ func dedupeStorage(items []models.Storage) []models.Storage {
out := make([]models.Storage, 0, len(items))
seen := make(map[string]struct{}, len(items))
for _, item := range items {
key := firstNonEmpty(item.SerialNumber, item.Slot+"|"+item.Model)
key := firstNonEmpty(
normalizeRedfishIdentityField(item.SerialNumber),
strings.TrimSpace(item.Slot)+"|"+strings.TrimSpace(item.Model),
)
if key == "" {
continue
}

View File

@@ -448,7 +448,7 @@ func dedupeStrings(items []string) []string {
func (r redfishSnapshotReader) collectBoardFallbackDocs(chassisPaths []string) []map[string]interface{} {
out := make([]map[string]interface{}, 0)
for _, chassisPath := range chassisPaths {
for _, suffix := range []string{"/Boards", "/Backplanes", "/Assembly"} {
for _, suffix := range []string{"/Boards", "/Backplanes"} {
path := joinPath(chassisPath, suffix)
if docs, err := r.getCollectionMembers(path); err == nil && len(docs) > 0 {
out = append(out, docs...)
@@ -468,6 +468,9 @@ func applyBoardInfoFallbackFromDocs(board *models.BoardInfo, docs []map[string]i
}
for _, doc := range docs {
candidate := parseBoardInfoFromFRUDoc(doc)
if !isLikelyServerProductName(candidate.ProductName) {
continue
}
if board.Manufacturer == "" {
board.Manufacturer = candidate.Manufacturer
}
@@ -486,6 +489,27 @@ func applyBoardInfoFallbackFromDocs(board *models.BoardInfo, docs []map[string]i
}
}
func isLikelyServerProductName(v string) bool {
v = strings.TrimSpace(v)
if v == "" {
return false
}
n := strings.ToUpper(v)
if strings.Contains(n, "NULL") {
return false
}
componentTokens := []string{
"DIMM", "DDR", "NVME", "SSD", "HDD", "GPU", "NIC", "RAID",
"PSU", "FAN", "BACKPLANE", "FRU",
}
for _, token := range componentTokens {
if strings.Contains(n, strings.ToUpper(token)) {
return false
}
}
return true
}
type redfishSnapshotReader struct {
tree map[string]interface{}
}
@@ -918,7 +942,7 @@ func (r redfishSnapshotReader) collectGPUs(systemPaths, chassisPaths []string) [
out = append(out, gpu)
}
}
return out
return dropModelOnlyGPUPlaceholders(out)
}
func (r redfishSnapshotReader) collectPCIeDevices(systemPaths, chassisPaths []string) []models.PCIeDevice {

View File

@@ -6,6 +6,8 @@ import (
"net/http"
"net/http/httptest"
"testing"
"git.mchus.pro/mchus/logpile/internal/models"
)
func TestRedfishConnectorCollect(t *testing.T) {
@@ -691,3 +693,78 @@ func TestReplayCollectGPUs_SkipsModelOnlyDuplicateFromGraphicsControllers(t *tes
}
}
}
func TestApplyBoardInfoFallbackFromDocs_SkipsComponentProductNames(t *testing.T) {
board := models.BoardInfo{
SerialNumber: "23E100051",
}
docs := []map[string]interface{}{
{
"Model": "DDR5 DIMM",
"Manufacturer": "DELTA",
"SerialNumber": "802C1A2507D284B001",
},
{
"ProductName": "NF5688M7",
"Manufacturer": "Inspur",
"PartNumber": "YZMB-00001",
},
}
applyBoardInfoFallbackFromDocs(&board, docs)
if board.ProductName != "NF5688M7" {
t.Fatalf("expected server model from fallback docs, got %q", board.ProductName)
}
if board.Manufacturer != "Inspur" {
t.Fatalf("expected manufacturer from server fallback doc, got %q", board.Manufacturer)
}
}
func TestDedupeStorage_IgnoresPlaceholderSerial(t *testing.T) {
in := []models.Storage{
{Slot: "OB01", Model: "N/A", SerialNumber: "N/A"},
{Slot: "OB02", Model: "N/A", SerialNumber: "N/A"},
{Slot: "OB03", Model: "N/A", SerialNumber: "N/A"},
{Slot: "OB04", Model: "N/A", SerialNumber: "N/A"},
}
out := dedupeStorage(in)
if len(out) != 4 {
t.Fatalf("expected all placeholder-serial NVMe drives to be kept by slot key, got %d", len(out))
}
}
func TestReplayCollectGPUs_DropsModelOnlyPlaceholderWhenConcreteDiscoveredLater(t *testing.T) {
r := redfishSnapshotReader{tree: map[string]interface{}{
"/redfish/v1/Systems/1/GraphicsControllers": map[string]interface{}{
"Members": []interface{}{
map[string]interface{}{"@odata.id": "/redfish/v1/Systems/1/GraphicsControllers/GPU0"},
},
},
"/redfish/v1/Systems/1/GraphicsControllers/GPU0": map[string]interface{}{
"Id": "GPU0",
"Name": "H200-SXM5-141G",
"Model": "H200-SXM5-141G",
},
"/redfish/v1/Chassis/1/PCIeDevices": map[string]interface{}{
"Members": []interface{}{
map[string]interface{}{"@odata.id": "/redfish/v1/Chassis/1/PCIeDevices/4"},
},
},
"/redfish/v1/Chassis/1/PCIeDevices/4": map[string]interface{}{
"Id": "4",
"Name": "PCIeCard4",
"Model": "H200-SXM5-141G",
"Manufacturer": "NVIDIA",
"BDF": "0000:0f:00.0",
},
}}
got := r.collectGPUs([]string{"/redfish/v1/Systems/1"}, []string{"/redfish/v1/Chassis/1"})
if len(got) != 1 {
t.Fatalf("expected generic graphics placeholder to be dropped, got %d GPUs", len(got))
}
if got[0].Slot != "PCIeCard4" {
t.Fatalf("expected concrete PCIe GPU to remain, got slot=%q", got[0].Slot)
}
}