redfish: optimize snapshot/plan-b crawl and add timing diagnostics

This commit is contained in:
2026-02-28 17:56:04 +03:00
parent e0146adfff
commit 612058ed16
3 changed files with 1473 additions and 140 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -1105,7 +1105,6 @@ func (r redfishSnapshotReader) probeDirectDiskBayChildren(drivesCollectionPath s
func (r redfishSnapshotReader) collectNICs(chassisPaths []string) []models.NetworkAdapter { func (r redfishSnapshotReader) collectNICs(chassisPaths []string) []models.NetworkAdapter {
var nics []models.NetworkAdapter var nics []models.NetworkAdapter
seen := make(map[string]struct{})
for _, chassisPath := range chassisPaths { for _, chassisPath := range chassisPaths {
adapterDocs, err := r.getCollectionMembers(joinPath(chassisPath, "/NetworkAdapters")) adapterDocs, err := r.getCollectionMembers(joinPath(chassisPath, "/NetworkAdapters"))
if err != nil { if err != nil {
@@ -1121,23 +1120,15 @@ func (r redfishSnapshotReader) collectNICs(chassisPaths []string) []models.Netwo
functionDocs := r.getLinkedPCIeFunctions(pcieDoc) functionDocs := r.getLinkedPCIeFunctions(pcieDoc)
enrichNICFromPCIe(&nic, pcieDoc, functionDocs) enrichNICFromPCIe(&nic, pcieDoc, functionDocs)
} }
key := firstNonEmpty(nic.SerialNumber, nic.Slot+"|"+nic.Model)
if key == "" {
continue
}
if _, ok := seen[key]; ok {
continue
}
seen[key] = struct{}{}
nics = append(nics, nic) nics = append(nics, nic)
} }
} }
return nics return dedupeNetworkAdapters(nics)
} }
func (r redfishSnapshotReader) collectPSUs(chassisPaths []string) []models.PSU { func (r redfishSnapshotReader) collectPSUs(chassisPaths []string) []models.PSU {
var out []models.PSU var out []models.PSU
seen := make(map[string]struct{}) seen := make(map[string]int)
idx := 1 idx := 1
for _, chassisPath := range chassisPaths { for _, chassisPath := range chassisPaths {
if memberDocs, err := r.getCollectionMembers(joinPath(chassisPath, "/PowerSubsystem/PowerSupplies")); err == nil && len(memberDocs) > 0 { if memberDocs, err := r.getCollectionMembers(joinPath(chassisPath, "/PowerSubsystem/PowerSupplies")); err == nil && len(memberDocs) > 0 {
@@ -1213,7 +1204,6 @@ func (r redfishSnapshotReader) collectPCIeDevices(systemPaths, chassisPaths []st
collections = append(collections, joinPath(chassisPath, "/PCIeDevices")) collections = append(collections, joinPath(chassisPath, "/PCIeDevices"))
} }
var out []models.PCIeDevice var out []models.PCIeDevice
seen := make(map[string]struct{})
for _, collectionPath := range collections { for _, collectionPath := range collections {
memberDocs, err := r.getCollectionMembers(collectionPath) memberDocs, err := r.getCollectionMembers(collectionPath)
if err != nil || len(memberDocs) == 0 { if err != nil || len(memberDocs) == 0 {
@@ -1222,14 +1212,6 @@ func (r redfishSnapshotReader) collectPCIeDevices(systemPaths, chassisPaths []st
for _, doc := range memberDocs { for _, doc := range memberDocs {
functionDocs := r.getLinkedPCIeFunctions(doc) functionDocs := r.getLinkedPCIeFunctions(doc)
dev := parsePCIeDevice(doc, functionDocs) dev := parsePCIeDevice(doc, functionDocs)
key := pcieDeviceDedupKey(dev)
if key == "" {
continue
}
if _, ok := seen[key]; ok {
continue
}
seen[key] = struct{}{}
out = append(out, dev) out = append(out, dev)
} }
} }
@@ -1240,18 +1222,10 @@ func (r redfishSnapshotReader) collectPCIeDevices(systemPaths, chassisPaths []st
} }
for idx, fn := range functionDocs { for idx, fn := range functionDocs {
dev := parsePCIeFunction(fn, idx+1) dev := parsePCIeFunction(fn, idx+1)
key := pcieDeviceDedupKey(dev)
if key == "" {
continue
}
if _, ok := seen[key]; ok {
continue
}
seen[key] = struct{}{}
out = append(out, dev) out = append(out, dev)
} }
} }
return out return dedupePCIeDevices(out)
} }
func stringsTrimTrailingSlash(s string) string { func stringsTrimTrailingSlash(s string) string {

View File

@@ -3,6 +3,7 @@ package collector
import ( import (
"context" "context"
"encoding/json" "encoding/json"
"fmt"
"net/http" "net/http"
"net/http/httptest" "net/http/httptest"
"strings" "strings"
@@ -1257,6 +1258,124 @@ func TestDedupeStorage_IgnoresPlaceholderSerial(t *testing.T) {
} }
} }
func TestDedupeStorage_MergesPlaceholderSlotsWithRichDrivesByOrder(t *testing.T) {
in := []models.Storage{
{Slot: "PCIe8_RAID_Disk_1:0", Type: "SSD", Model: "SOLIDIGM SSDSC2K", SizeGB: 1787, SerialNumber: "S1", Present: true},
{Slot: "PCIe8_RAID_Disk_1:1", Type: "SSD", Model: "SOLIDIGM SSDSC2K", SizeGB: 1787, SerialNumber: "S2", Present: true},
{Slot: "PCIe8_RAID_Disk_1:2", Type: "SSD", Model: "SOLIDIGM SSDSC2K", SizeGB: 1787, SerialNumber: "S3", Present: true},
{Slot: "OB01", Type: "NVMe", Model: "N/A", SerialNumber: "N/A", Present: true},
{Slot: "OB02", Type: "NVMe", Model: "N/A", SerialNumber: "N/A", Present: true},
{Slot: "OB03", Type: "NVMe", Model: "N/A", SerialNumber: "N/A", Present: true},
{Slot: "OB04", Type: "NVMe", Model: "N/A", SerialNumber: "N/A", Present: true},
{Slot: "FP00HDD00", Type: "NVMe", Model: "INTEL SSDPE2KE032T8", SizeGB: 2980, SerialNumber: "N1", Present: true},
{Slot: "FP00HDD02", Type: "NVMe", Model: "INTEL SSDPE2KE032T8", SizeGB: 2980, SerialNumber: "N2", Present: true},
{Slot: "FP00HDD04", Type: "NVMe", Model: "INTEL SSDPE2KE032T8", SizeGB: 2980, SerialNumber: "N3", Present: true},
{Slot: "FP00HDD06", Type: "NVMe", Model: "INTEL SSDPE2KE032T8", SizeGB: 2980, SerialNumber: "N4", Present: true},
}
out := dedupeStorage(in)
if len(out) != 7 {
t.Fatalf("expected 7 rows after placeholder merge, got %d", len(out))
}
bySlot := make(map[string]models.Storage, len(out))
for _, d := range out {
bySlot[d.Slot] = d
if strings.HasPrefix(d.Slot, "FP00HDD") {
t.Fatalf("expected FP donor slot %q to be absorbed by placeholder slot", d.Slot)
}
}
if bySlot["OB01"].SerialNumber != "N1" || bySlot["OB02"].SerialNumber != "N2" || bySlot["OB03"].SerialNumber != "N3" || bySlot["OB04"].SerialNumber != "N4" {
t.Fatalf("expected OB slots to be enriched in order, got OB01=%q OB02=%q OB03=%q OB04=%q",
bySlot["OB01"].SerialNumber, bySlot["OB02"].SerialNumber, bySlot["OB03"].SerialNumber, bySlot["OB04"].SerialNumber)
}
if bySlot["OB01"].Model != "INTEL SSDPE2KE032T8" || bySlot["OB01"].SizeGB != 2980 {
t.Fatalf("expected OB01 to inherit rich model/size, got model=%q size=%d", bySlot["OB01"].Model, bySlot["OB01"].SizeGB)
}
}
func TestDedupeNetworkAdapters_MergesBySlotAndKeepsRicherData(t *testing.T) {
in := []models.NetworkAdapter{
{
Slot: "NIC-A",
Model: "N/A",
Vendor: "",
Present: true,
},
{
Slot: "NIC-A",
Model: "ConnectX-7",
Vendor: "NVIDIA",
SerialNumber: "NICSN001",
Firmware: "28.41.2020",
PortCount: 2,
MACAddresses: []string{"00:11:22:33:44:55"},
Present: true,
},
}
out := dedupeNetworkAdapters(in)
if len(out) != 1 {
t.Fatalf("expected merged single NIC row, got %d", len(out))
}
if out[0].SerialNumber != "NICSN001" || out[0].Model != "ConnectX-7" || out[0].Vendor != "NVIDIA" {
t.Fatalf("expected richer NIC fields preserved, got %+v", out[0])
}
}
func TestDedupePCIeDevices_MergesByLooseKeyAndKeepsBDF(t *testing.T) {
in := []models.PCIeDevice{
{
Slot: "PCIe Slot 3",
DeviceClass: "Network Controller",
PartNumber: "MCX75310AAS-NEAT",
},
{
Slot: "PCIe Slot 3",
DeviceClass: "Network Controller",
PartNumber: "MCX75310AAS-NEAT",
BDF: "0000:af:00.0",
VendorID: 0x15b3,
DeviceID: 0x1021,
SerialNumber: "MT000123",
},
}
out := dedupePCIeDevices(in)
if len(out) != 1 {
t.Fatalf("expected merged single PCIe row, got %d", len(out))
}
if out[0].BDF != "0000:af:00.0" || out[0].SerialNumber != "MT000123" || out[0].VendorID == 0 || out[0].DeviceID == 0 {
t.Fatalf("expected richer PCIe fields preserved, got %+v", out[0])
}
}
func TestAppendPSU_MergesRicherDuplicate(t *testing.T) {
var out []models.PSU
seen := make(map[string]int)
idx := 1
idx = appendPSU(&out, seen, models.PSU{
Slot: "PSU1",
Model: "N/A",
Present: true,
}, idx)
_ = appendPSU(&out, seen, models.PSU{
Slot: "PSU1",
Model: "DLG2700BW54C31",
SerialNumber: "DGPLV2515025L",
WattageW: 2700,
Firmware: "00.01.04",
Present: true,
}, idx)
if len(out) != 1 {
t.Fatalf("expected PSU duplicate merge, got %d rows", len(out))
}
if out[0].SerialNumber != "DGPLV2515025L" || out[0].WattageW != 2700 || out[0].Model != "DLG2700BW54C31" {
t.Fatalf("expected richer PSU fields preserved, got %+v", out[0])
}
}
func TestReplayCollectGPUs_DropsModelOnlyPlaceholderWhenConcreteDiscoveredLater(t *testing.T) { func TestReplayCollectGPUs_DropsModelOnlyPlaceholderWhenConcreteDiscoveredLater(t *testing.T) {
r := redfishSnapshotReader{tree: map[string]interface{}{ r := redfishSnapshotReader{tree: map[string]interface{}{
"/redfish/v1/Systems/1/GraphicsControllers": map[string]interface{}{ "/redfish/v1/Systems/1/GraphicsControllers": map[string]interface{}{
@@ -1335,6 +1454,76 @@ func TestReplayCollectGPUs_MergesGraphicsSerialIntoConcretePCIeGPU(t *testing.T)
} }
} }
func TestReplayCollectGPUs_MergesAmbiguousSameModelByOrder(t *testing.T) {
tree := map[string]interface{}{
"/redfish/v1/Systems/1/GraphicsControllers": map[string]interface{}{
"Members": []interface{}{},
},
"/redfish/v1/Chassis/1/PCIeDevices": map[string]interface{}{
"Members": []interface{}{},
},
}
pcieIDs := []int{4, 8, 12, 14, 20, 23, 26, 30}
serials := []string{
"1654425002361",
"1654425004310",
"1654425004204",
"1654225097289",
"1654225095717",
"1654425002114",
"1654425002714",
"1654425002991",
}
for i := 0; i < len(pcieIDs); i++ {
gpuPath := fmt.Sprintf("/redfish/v1/Systems/1/GraphicsControllers/GPU%d", i+1)
pciePath := fmt.Sprintf("/redfish/v1/Chassis/1/PCIeDevices/%d", pcieIDs[i])
tree["/redfish/v1/Systems/1/GraphicsControllers"].(map[string]interface{})["Members"] =
append(tree["/redfish/v1/Systems/1/GraphicsControllers"].(map[string]interface{})["Members"].([]interface{}), map[string]interface{}{"@odata.id": gpuPath})
tree["/redfish/v1/Chassis/1/PCIeDevices"].(map[string]interface{})["Members"] =
append(tree["/redfish/v1/Chassis/1/PCIeDevices"].(map[string]interface{})["Members"].([]interface{}), map[string]interface{}{"@odata.id": pciePath})
tree[gpuPath] = map[string]interface{}{
"Id": fmt.Sprintf("GPU%d", i+1),
"Name": "H200-SXM5-141G",
"Model": "H200-SXM5-141G",
"Manufacturer": "NVIDIA",
"SerialNumber": serials[i],
}
tree[pciePath] = map[string]interface{}{
"Id": fmt.Sprintf("%d", pcieIDs[i]),
"Name": fmt.Sprintf("PCIeCard%d", pcieIDs[i]),
"Model": "H200-SXM5-141G",
"Manufacturer": "NVIDIA",
"BDF": fmt.Sprintf("0000:%02x:00.0", i+1),
}
}
r := redfishSnapshotReader{tree: tree}
got := r.collectGPUs([]string{"/redfish/v1/Systems/1"}, []string{"/redfish/v1/Chassis/1"})
if len(got) != len(pcieIDs) {
t.Fatalf("expected %d merged GPUs, got %d", len(pcieIDs), len(got))
}
bySlot := make(map[string]models.GPU, len(got))
for _, gpu := range got {
bySlot[gpu.Slot] = gpu
if strings.EqualFold(strings.TrimSpace(gpu.Slot), strings.TrimSpace(gpu.Model)) {
t.Fatalf("expected model-only placeholder to be dropped, got slot=%q", gpu.Slot)
}
}
for i, id := range pcieIDs {
slot := fmt.Sprintf("PCIeCard%d", id)
gpu, ok := bySlot[slot]
if !ok {
t.Fatalf("expected concrete slot %q in output", slot)
}
if gpu.SerialNumber != serials[i] {
t.Fatalf("expected slot %s serial %s, got %s", slot, serials[i], gpu.SerialNumber)
}
}
}
func TestShouldCrawlPath_MemorySubresourcesAreSkipped(t *testing.T) { func TestShouldCrawlPath_MemorySubresourcesAreSkipped(t *testing.T) {
if !shouldCrawlPath("/redfish/v1/Systems/1/Memory/CPU0_C0D0") { if !shouldCrawlPath("/redfish/v1/Systems/1/Memory/CPU0_C0D0") {
t.Fatalf("expected direct DIMM resource to be crawlable") t.Fatalf("expected direct DIMM resource to be crawlable")
@@ -1393,8 +1582,12 @@ func TestRedfishSnapshotBranchKey(t *testing.T) {
} }
func TestShouldPostProbeCollectionPath(t *testing.T) { func TestShouldPostProbeCollectionPath(t *testing.T) {
if shouldPostProbeCollectionPath("/redfish/v1/Chassis/1/Sensors") {
t.Fatalf("expected sensors collection to be skipped by default")
}
t.Setenv("LOGPILE_REDFISH_SENSOR_POSTPROBE", "1")
if !shouldPostProbeCollectionPath("/redfish/v1/Chassis/1/Sensors") { if !shouldPostProbeCollectionPath("/redfish/v1/Chassis/1/Sensors") {
t.Fatalf("expected sensors collection to be post-probed") t.Fatalf("expected sensors collection to be post-probed when enabled")
} }
if !shouldPostProbeCollectionPath("/redfish/v1/Systems/1/Storage/RAID/Drives") { if !shouldPostProbeCollectionPath("/redfish/v1/Systems/1/Storage/RAID/Drives") {
t.Fatalf("expected drives collection to be post-probed") t.Fatalf("expected drives collection to be post-probed")
@@ -1406,3 +1599,69 @@ func TestShouldPostProbeCollectionPath(t *testing.T) {
t.Fatalf("expected assembly member resource to be skipped from post-probe") t.Fatalf("expected assembly member resource to be skipped from post-probe")
} }
} }
func TestRedfishSnapshotPrioritySeeds_DefaultSkipsNoisyBranches(t *testing.T) {
seeds := redfishSnapshotPrioritySeeds(
[]string{"/redfish/v1/Systems/1"},
[]string{"/redfish/v1/Chassis/1"},
[]string{"/redfish/v1/Managers/1"},
)
joined := strings.Join(seeds, "\n")
for _, noisy := range []string{
"/redfish/v1/Fabrics",
"/redfish/v1/Chassis/1/Backplanes",
"/redfish/v1/Chassis/1/Boards",
"/redfish/v1/Chassis/1/Sensors",
"/redfish/v1/Managers/1/LogServices",
} {
if strings.Contains(joined, noisy) {
t.Fatalf("unexpected noisy seed %q", noisy)
}
}
for _, wanted := range []string{
"/redfish/v1/Systems/1/Memory",
"/redfish/v1/Systems/1/PCIeDevices",
"/redfish/v1/Chassis/1/Drives",
"/redfish/v1/Chassis/1/NetworkAdapters",
"/redfish/v1/Managers/1/NetworkProtocol",
} {
if !strings.Contains(joined, wanted) {
t.Fatalf("expected seed %q", wanted)
}
}
}
func TestRedfishPrefetchTargets_FilterNoisyBranches(t *testing.T) {
critical := []string{
"/redfish/v1/Systems/1",
"/redfish/v1/Systems/1/Memory",
"/redfish/v1/Systems/1/Oem/Public/FRU",
"/redfish/v1/Chassis/1/Drives",
"/redfish/v1/Chassis/1/Backplanes",
"/redfish/v1/Chassis/1/Sensors",
"/redfish/v1/Managers/1/LogServices",
"/redfish/v1/Managers/1/NetworkProtocol",
}
got := redfishPrefetchTargets(critical)
joined := strings.Join(got, "\n")
for _, wanted := range []string{
"/redfish/v1/Systems/1",
"/redfish/v1/Systems/1/Memory",
"/redfish/v1/Systems/1/Oem/Public/FRU",
"/redfish/v1/Chassis/1/Drives",
"/redfish/v1/Managers/1/NetworkProtocol",
} {
if !strings.Contains(joined, wanted) {
t.Fatalf("expected prefetch target %q", wanted)
}
}
for _, noisy := range []string{
"/redfish/v1/Chassis/1/Backplanes",
"/redfish/v1/Chassis/1/Sensors",
"/redfish/v1/Managers/1/LogServices",
} {
if strings.Contains(joined, noisy) {
t.Fatalf("unexpected noisy prefetch target %q", noisy)
}
}
}