fix(redfish): gate hgx diagnostic plan-b by debug toggle
This commit is contained in:
@@ -496,7 +496,6 @@ func (c *RedfishConnector) Collect(ctx context.Context, req Request, emit Progre
|
||||
return result, nil
|
||||
}
|
||||
|
||||
|
||||
// collectDebugPayloads fetches vendor-specific diagnostic endpoints on a best-effort basis.
|
||||
// Results are stored in rawPayloads["redfish_debug_payloads"] and exported with the bundle.
|
||||
// Enabled only when Request.DebugPayloads is true.
|
||||
@@ -511,7 +510,6 @@ func (c *RedfishConnector) collectDebugPayloads(ctx context.Context, client *htt
|
||||
return out
|
||||
}
|
||||
|
||||
|
||||
func firstNonEmptyPath(paths []string, fallback string) string {
|
||||
for _, p := range paths {
|
||||
if strings.TrimSpace(p) != "" {
|
||||
@@ -543,7 +541,6 @@ func redfishSystemPowerState(systemDoc map[string]interface{}) string {
|
||||
return ""
|
||||
}
|
||||
|
||||
|
||||
func (c *RedfishConnector) postJSON(ctx context.Context, client *http.Client, req Request, baseURL, resourcePath string, payload map[string]any) error {
|
||||
body, err := json.Marshal(payload)
|
||||
if err != nil {
|
||||
@@ -2299,7 +2296,6 @@ func redfishCriticalSlowGap() time.Duration {
|
||||
return 1200 * time.Millisecond
|
||||
}
|
||||
|
||||
|
||||
func redfishSnapshotMemoryRequestTimeout() time.Duration {
|
||||
if v := strings.TrimSpace(os.Getenv("LOGPILE_REDFISH_MEMORY_TIMEOUT")); v != "" {
|
||||
if d, err := time.ParseDuration(v); err == nil && d > 0 {
|
||||
@@ -2878,11 +2874,16 @@ func (c *RedfishConnector) recoverCriticalRedfishDocsPlanB(ctx context.Context,
|
||||
timings := newRedfishPathTimingCollector(4)
|
||||
var targets []string
|
||||
seenTargets := make(map[string]struct{})
|
||||
skippedDiagnosticTargets := 0
|
||||
addTarget := func(path string) {
|
||||
path = normalizeRedfishPath(path)
|
||||
if path == "" {
|
||||
return
|
||||
}
|
||||
if !shouldIncludeCriticalPlanBPath(req, path) {
|
||||
skippedDiagnosticTargets++
|
||||
return
|
||||
}
|
||||
if _, ok := seenTargets[path]; ok {
|
||||
return
|
||||
}
|
||||
@@ -2968,6 +2969,13 @@ func (c *RedfishConnector) recoverCriticalRedfishDocsPlanB(ctx context.Context,
|
||||
return 0
|
||||
}
|
||||
if emit != nil {
|
||||
if skippedDiagnosticTargets > 0 {
|
||||
emit(Progress{
|
||||
Status: "running",
|
||||
Progress: 97,
|
||||
Message: fmt.Sprintf("Redfish: расширенная диагностика выключена, пропущено %d тяжелых diagnostic endpoint", skippedDiagnosticTargets),
|
||||
})
|
||||
}
|
||||
totalETA := redfishCriticalCooldown() + estimatePlanBETA(len(targets))
|
||||
emit(Progress{
|
||||
Status: "running",
|
||||
@@ -3073,6 +3081,39 @@ func (c *RedfishConnector) recoverCriticalRedfishDocsPlanB(ctx context.Context,
|
||||
return recovered
|
||||
}
|
||||
|
||||
func shouldIncludeCriticalPlanBPath(req Request, path string) bool {
|
||||
if req.DebugPayloads {
|
||||
return true
|
||||
}
|
||||
return !isExtendedDiagnosticCriticalPlanBPath(path)
|
||||
}
|
||||
|
||||
func isExtendedDiagnosticCriticalPlanBPath(path string) bool {
|
||||
path = normalizeRedfishPath(path)
|
||||
if path == "" {
|
||||
return false
|
||||
}
|
||||
parts := strings.Split(strings.Trim(path, "/"), "/")
|
||||
if len(parts) < 5 || parts[0] != "redfish" || parts[1] != "v1" || parts[2] != "Chassis" {
|
||||
return false
|
||||
}
|
||||
if !strings.HasPrefix(parts[3], "HGX_") {
|
||||
return false
|
||||
}
|
||||
for _, suffix := range []string{
|
||||
"/Accelerators",
|
||||
"/Assembly",
|
||||
"/Drives",
|
||||
"/NetworkAdapters",
|
||||
"/PCIeDevices",
|
||||
} {
|
||||
if strings.HasSuffix(path, suffix) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (c *RedfishConnector) recoverProfilePlanBDocs(ctx context.Context, client *http.Client, req Request, baseURL string, plan redfishprofile.AcquisitionPlan, rawTree map[string]interface{}, emit ProgressFn) int {
|
||||
if len(plan.PlanBPaths) == 0 || plan.Mode == redfishprofile.ModeFallback || !plan.Tuning.RecoveryPolicy.EnableProfilePlanB {
|
||||
return 0
|
||||
|
||||
57
internal/collector/redfish_planb_test.go
Normal file
57
internal/collector/redfish_planb_test.go
Normal file
@@ -0,0 +1,57 @@
|
||||
package collector
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestShouldIncludeCriticalPlanBPath(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
req Request
|
||||
path string
|
||||
want bool
|
||||
}{
|
||||
{
|
||||
name: "skip hgx erot pcie without extended diagnostics",
|
||||
req: Request{},
|
||||
path: "/redfish/v1/Chassis/HGX_ERoT_NVSwitch_0/PCIeDevices",
|
||||
want: false,
|
||||
},
|
||||
{
|
||||
name: "skip hgx chassis assembly without extended diagnostics",
|
||||
req: Request{},
|
||||
path: "/redfish/v1/Chassis/HGX_Chassis_0/Assembly",
|
||||
want: false,
|
||||
},
|
||||
{
|
||||
name: "keep standard chassis inventory without extended diagnostics",
|
||||
req: Request{},
|
||||
path: "/redfish/v1/Chassis/1/PCIeDevices",
|
||||
want: true,
|
||||
},
|
||||
{
|
||||
name: "keep nvme storage backplane drives without extended diagnostics",
|
||||
req: Request{},
|
||||
path: "/redfish/v1/Chassis/NVMeSSD.0.Group.0.StorageBackplane/Drives",
|
||||
want: true,
|
||||
},
|
||||
{
|
||||
name: "keep system processors without extended diagnostics",
|
||||
req: Request{},
|
||||
path: "/redfish/v1/Systems/HGX_Baseboard_0/Processors",
|
||||
want: true,
|
||||
},
|
||||
{
|
||||
name: "include hgx erot pcie when extended diagnostics enabled",
|
||||
req: Request{DebugPayloads: true},
|
||||
path: "/redfish/v1/Chassis/HGX_ERoT_NVSwitch_0/PCIeDevices",
|
||||
want: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if got := shouldIncludeCriticalPlanBPath(tt.req, tt.path); got != tt.want {
|
||||
t.Fatalf("shouldIncludeCriticalPlanBPath(%q) = %v, want %v", tt.path, got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user