Improve Redfish recovery flow and raw export timing diagnostics

This commit is contained in:
2026-02-28 16:55:58 +03:00
parent 9a30705c9a
commit e0146adfff
10 changed files with 1437 additions and 58 deletions

View File

@@ -5,6 +5,7 @@ import (
"encoding/json"
"net/http"
"net/http/httptest"
"strings"
"testing"
"git.mchus.pro/mchus/logpile/internal/models"
@@ -303,6 +304,210 @@ func TestReplayRedfishFromRawPayloads_FallbackCollectionMembersByPrefix(t *testi
}
}
func TestReplayRedfishFromRawPayloads_ParsesInlineThresholdAndDiscreteSensors(t *testing.T) {
raw := map[string]any{
"redfish_tree": map[string]interface{}{
"/redfish/v1": map[string]interface{}{
"Systems": map[string]interface{}{"@odata.id": "/redfish/v1/Systems"},
"Chassis": map[string]interface{}{"@odata.id": "/redfish/v1/Chassis"},
"Managers": map[string]interface{}{"@odata.id": "/redfish/v1/Managers"},
},
"/redfish/v1/Systems": map[string]interface{}{
"Members": []interface{}{
map[string]interface{}{"@odata.id": "/redfish/v1/Systems/1"},
},
},
"/redfish/v1/Systems/1": map[string]interface{}{
"Id": "1",
"Manufacturer": "Inspur",
"Model": "NF5688M7",
"SerialNumber": "23E100051",
},
"/redfish/v1/Chassis": map[string]interface{}{
"Members": []interface{}{
map[string]interface{}{"@odata.id": "/redfish/v1/Chassis/1"},
},
},
"/redfish/v1/Chassis/1": map[string]interface{}{
"Id": "1",
},
"/redfish/v1/Managers": map[string]interface{}{
"Members": []interface{}{
map[string]interface{}{"@odata.id": "/redfish/v1/Managers/1"},
},
},
"/redfish/v1/Managers/1": map[string]interface{}{
"Id": "1",
},
"/redfish/v1/Chassis/1/ThresholdSensors": map[string]interface{}{
"Sensors": []interface{}{
map[string]interface{}{
"Name": "Inlet_Temp",
"Reading": 16,
"ReadingUnits": "deg_c",
"State": "Enabled",
},
},
},
"/redfish/v1/Chassis/1/DiscreteSensors": map[string]interface{}{
"Sensors": []interface{}{
map[string]interface{}{
"Name": "PSU_Redundant",
"State": "Disabled",
},
},
},
},
}
got, err := ReplayRedfishFromRawPayloads(raw, nil)
if err != nil {
t.Fatalf("replay failed: %v", err)
}
if len(got.Sensors) == 0 {
t.Fatalf("expected sensors from inline ThresholdSensors")
}
foundSensor := false
for _, s := range got.Sensors {
if s.Name == "Inlet_Temp" {
foundSensor = true
break
}
}
if !foundSensor {
t.Fatalf("expected Inlet_Temp sensor in replay output")
}
foundEvent := false
for _, ev := range got.Events {
if ev.EventType == "Discrete Sensor Status" && ev.SensorName == "PSU_Redundant" {
foundEvent = true
break
}
}
if !foundEvent {
t.Fatalf("expected discrete sensor warning event from inline DiscreteSensors")
}
}
func TestReplayRedfishFromRawPayloads_CollectsThermalAndPowerSensors(t *testing.T) {
raw := map[string]any{
"redfish_tree": map[string]interface{}{
"/redfish/v1": map[string]interface{}{
"Systems": map[string]interface{}{"@odata.id": "/redfish/v1/Systems"},
"Chassis": map[string]interface{}{"@odata.id": "/redfish/v1/Chassis"},
"Managers": map[string]interface{}{"@odata.id": "/redfish/v1/Managers"},
},
"/redfish/v1/Systems": map[string]interface{}{
"Members": []interface{}{
map[string]interface{}{"@odata.id": "/redfish/v1/Systems/1"},
},
},
"/redfish/v1/Systems/1": map[string]interface{}{
"Id": "1",
"Manufacturer": "Inspur",
"Model": "NF5688M7",
"SerialNumber": "23E100051",
},
"/redfish/v1/Chassis": map[string]interface{}{
"Members": []interface{}{
map[string]interface{}{"@odata.id": "/redfish/v1/Chassis/1"},
},
},
"/redfish/v1/Chassis/1": map[string]interface{}{
"Id": "1",
},
"/redfish/v1/Managers": map[string]interface{}{
"Members": []interface{}{
map[string]interface{}{"@odata.id": "/redfish/v1/Managers/1"},
},
},
"/redfish/v1/Managers/1": map[string]interface{}{
"Id": "1",
},
"/redfish/v1/Chassis/1/Thermal": map[string]interface{}{
"Fans": []interface{}{
map[string]interface{}{
"Name": "FAN0_F_Speed",
"Reading": 9279,
"ReadingUnits": "RPM",
"Status": map[string]interface{}{
"Health": "OK",
"State": "Enabled",
},
},
},
"Temperatures": []interface{}{
map[string]interface{}{
"Name": "CPU0_Temp",
"ReadingCelsius": 44,
"Status": map[string]interface{}{
"Health": "OK",
"State": "Enabled",
},
},
},
},
"/redfish/v1/Chassis/1/Power": map[string]interface{}{
"Oem": map[string]interface{}{
"Public": map[string]interface{}{
"TotalPower": 1836,
"CurrentCPUPowerWatts": 304,
"CurrentMemoryPowerWatts": 75,
"CurrentFANPowerWatts": 180,
},
},
"PowerControl": []interface{}{
map[string]interface{}{
"Name": "System Power Control 1",
"PowerConsumedWatts": 1836,
"Status": map[string]interface{}{
"Health": "OK",
"State": "Enabled",
},
},
},
"PowerSupplies": []interface{}{
map[string]interface{}{
"Name": "Power Supply 1",
"PowerInputWatts": 180,
"LastPowerOutputWatts": 155,
"LineInputVoltage": 223.25,
"Status": map[string]interface{}{
"Health": "OK",
"State": "Enabled",
},
},
},
},
},
}
got, err := ReplayRedfishFromRawPayloads(raw, nil)
if err != nil {
t.Fatalf("replay failed: %v", err)
}
if len(got.Sensors) == 0 {
t.Fatalf("expected non-empty sensors")
}
expected := map[string]bool{
"FAN0_F_Speed": false,
"CPU0_Temp": false,
"Total_Power": false,
"System Power Control 1_Consumed": false,
"Power Supply 1_InputPower": false,
}
for _, s := range got.Sensors {
if _, ok := expected[s.Name]; ok {
expected[s.Name] = true
}
}
for name, found := range expected {
if !found {
t.Fatalf("expected sensor %q in replay output", name)
}
}
}
func TestEnrichNICFromPCIeFunctions(t *testing.T) {
nic := parseNIC(map[string]interface{}{
"Id": "1",
@@ -502,6 +707,79 @@ func TestRecoverCriticalRedfishDocsPlanB_RetriesMembersFromExistingCollection(t
}
}
func TestRecoverCriticalRedfishDocsPlanB_RetriesMembersFromSystemMemoryCollection(t *testing.T) {
t.Setenv("LOGPILE_REDFISH_CRITICAL_COOLDOWN", "0s")
t.Setenv("LOGPILE_REDFISH_CRITICAL_SLOW_GAP", "0s")
t.Setenv("LOGPILE_REDFISH_CRITICAL_PLANB_RETRIES", "1")
t.Setenv("LOGPILE_REDFISH_CRITICAL_RETRIES", "1")
t.Setenv("LOGPILE_REDFISH_CRITICAL_BACKOFF", "0s")
const systemPath = "/redfish/v1/Systems/1"
const memoryPath = "/redfish/v1/Systems/1/Memory"
const dimmPath = "/redfish/v1/Systems/1/Memory/CPU1_C1D1"
mux := http.NewServeMux()
mux.HandleFunc(dimmPath, func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_ = json.NewEncoder(w).Encode(map[string]interface{}{
"Id": "CPU1_C1D1",
"Name": "CPU1_C1D1",
"DeviceLocator": "CPU1_C1D1",
"CapacityMiB": 65536,
"MemoryDeviceType": "DDR5",
"Status": map[string]interface{}{"State": "Enabled", "Health": "OK"},
"SerialNumber": "DIMM-SN-001",
"PartNumber": "DIMM-PN-001",
})
})
ts := httptest.NewServer(mux)
defer ts.Close()
rawTree := map[string]interface{}{
memoryPath: map[string]interface{}{
"Members": []interface{}{
map[string]interface{}{"@odata.id": dimmPath},
},
},
}
fetchErrs := map[string]string{
dimmPath: `Get "https://example/redfish/v1/Systems/1/Memory/CPU1_C1D1": context deadline exceeded (Client.Timeout exceeded while awaiting headers)`,
}
criticalPaths := redfishCriticalEndpoints([]string{systemPath}, nil, nil)
hasMemoryPath := false
for _, p := range criticalPaths {
if p == memoryPath {
hasMemoryPath = true
break
}
}
if !hasMemoryPath {
t.Fatalf("expected critical endpoints to include %s", memoryPath)
}
c := NewRedfishConnector()
recovered := c.recoverCriticalRedfishDocsPlanB(
context.Background(),
ts.Client(),
Request{},
ts.URL,
criticalPaths,
rawTree,
fetchErrs,
nil,
)
if recovered == 0 {
t.Fatalf("expected plan-B to recover at least one DIMM document")
}
if _, ok := rawTree[dimmPath]; !ok {
t.Fatalf("expected recovered DIMM doc for %s", dimmPath)
}
if _, ok := fetchErrs[dimmPath]; ok {
t.Fatalf("expected DIMM fetch error for %s to be cleared", dimmPath)
}
}
func TestReplayCollectStorage_ProbesSupermicroNVMeDiskBayWhenCollectionEmpty(t *testing.T) {
r := redfishSnapshotReader{tree: map[string]interface{}{
"/redfish/v1/Systems": map[string]interface{}{
@@ -828,6 +1106,70 @@ func TestReplayRedfishFromRawPayloads_AddsMissingServerModelWarning(t *testing.T
}
}
func TestReplayRedfishFromRawPayloads_AddsDriveFetchWarning(t *testing.T) {
raw := map[string]any{
"redfish_tree": map[string]interface{}{
"/redfish/v1": map[string]interface{}{
"Systems": map[string]interface{}{"@odata.id": "/redfish/v1/Systems"},
"Chassis": map[string]interface{}{"@odata.id": "/redfish/v1/Chassis"},
"Managers": map[string]interface{}{"@odata.id": "/redfish/v1/Managers"},
},
"/redfish/v1/Systems": map[string]interface{}{
"Members": []interface{}{
map[string]interface{}{"@odata.id": "/redfish/v1/Systems/1"},
},
},
"/redfish/v1/Systems/1": map[string]interface{}{
"Manufacturer": "Inspur",
"Model": "NF5688M7",
"SerialNumber": "23E100051",
},
"/redfish/v1/Chassis": map[string]interface{}{
"Members": []interface{}{
map[string]interface{}{"@odata.id": "/redfish/v1/Chassis/1"},
},
},
"/redfish/v1/Chassis/1": map[string]interface{}{
"Id": "1",
"Manufacturer": "Inspur",
"Model": "NF5688M7",
},
"/redfish/v1/Managers": map[string]interface{}{
"Members": []interface{}{
map[string]interface{}{"@odata.id": "/redfish/v1/Managers/1"},
},
},
"/redfish/v1/Managers/1": map[string]interface{}{
"Id": "1",
},
},
"redfish_fetch_errors": []map[string]interface{}{
{
"path": "/redfish/v1/Chassis/1/Drives/FP00HDD00",
"error": `Get "...": context deadline exceeded (Client.Timeout exceeded while awaiting headers)`,
},
},
}
got, err := ReplayRedfishFromRawPayloads(raw, nil)
if err != nil {
t.Fatalf("replay failed: %v", err)
}
found := false
for _, ev := range got.Events {
if ev.Source == "Redfish" &&
ev.EventType == "Collection Warning" &&
strings.Contains(strings.ToLower(ev.Description), "drive documents were unavailable") {
found = true
break
}
}
if !found {
t.Fatalf("expected collection warning event for drive fetch errors")
}
}
func TestReplayCollectGPUs_SkipsModelOnlyDuplicateFromGraphicsControllers(t *testing.T) {
r := redfishSnapshotReader{tree: map[string]interface{}{
"/redfish/v1/Systems/1/PCIeDevices": map[string]interface{}{
@@ -1008,6 +1350,48 @@ func TestShouldCrawlPath_MemorySubresourcesAreSkipped(t *testing.T) {
}
}
func TestIsRedfishMemoryMemberPath(t *testing.T) {
cases := []struct {
path string
want bool
}{
{path: "/redfish/v1/Systems/1/Memory", want: false},
{path: "/redfish/v1/Systems/1/Memory/CPU0_C0D0", want: true},
{path: "/redfish/v1/Systems/1/Memory/CPU0_C0D0/Assembly", want: false},
{path: "/redfish/v1/Systems/1/Memory/CPU0_C0D0/MemoryMetrics", want: false},
{path: "/redfish/v1/Chassis/1/Memory/CPU0_C0D0", want: false},
}
for _, tc := range cases {
got := isRedfishMemoryMemberPath(tc.path)
if got != tc.want {
t.Fatalf("isRedfishMemoryMemberPath(%q) = %v, want %v", tc.path, got, tc.want)
}
}
}
func TestRedfishSnapshotBranchKey(t *testing.T) {
cases := []struct {
path string
want string
}{
{path: "", want: ""},
{path: "/redfish/v1", want: ""},
{path: "/redfish/v1/Systems", want: "/redfish/v1/Systems"},
{path: "/redfish/v1/Systems/1", want: "/redfish/v1/Systems/1"},
{path: "/redfish/v1/Systems/1/Memory", want: "/redfish/v1/Systems/1/Memory"},
{path: "/redfish/v1/Systems/1/Memory/CPU0_C0D0", want: "/redfish/v1/Systems/1/Memory"},
{path: "/redfish/v1/Systems/1/PCIeDevices/GPU1", want: "/redfish/v1/Systems/1/PCIeDevices"},
{path: "/redfish/v1/Chassis/1/Sensors/1", want: "/redfish/v1/Chassis/1/Sensors"},
{path: "/redfish/v1/UpdateService/FirmwareInventory/BIOS", want: "/redfish/v1/UpdateService/FirmwareInventory"},
}
for _, tc := range cases {
got := redfishSnapshotBranchKey(tc.path)
if got != tc.want {
t.Fatalf("redfishSnapshotBranchKey(%q) = %q, want %q", tc.path, got, tc.want)
}
}
}
func TestShouldPostProbeCollectionPath(t *testing.T) {
if !shouldPostProbeCollectionPath("/redfish/v1/Chassis/1/Sensors") {
t.Fatalf("expected sensors collection to be post-probed")