From b04877549a80bcba1c34422d4bdd0db52ff98c8e Mon Sep 17 00:00:00 2001 From: Michael Chus Date: Mon, 13 Apr 2026 19:29:04 +0300 Subject: [PATCH] feat(collector): add Lenovo XCC profile to skip noisy snapshot paths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lenovo ThinkSystem SR650 V3 (and similar XCC-based servers) caused collection runs of 23+ minutes because the BMC exposes two large high- error-rate subtrees in the snapshot BFS: - Chassis/1/Sensors: 315 individual sensor members, 282/315 failing, ~3.7s per request → ~19 minutes wasted. These documents are never read by any LOGPile parser (thermal/power data comes from aggregate Chassis/*/Thermal and Chassis/*/Power endpoints). - Chassis/1/Oem/Lenovo: 75 requests (LEDs×47, Slots×26, etc.), 68/75 failing → 8+ minutes wasted on non-inventory data. Add a Lenovo profile (matched on SystemManufacturer/OEMNamespace "Lenovo") that sets SnapshotExcludeContains to block individual sensor documents and non-inventory Lenovo OEM subtrees from the snapshot BFS queue. Also sets rate policy thresholds appropriate for XCC BMC latency (p95 often 3-5s). Add SnapshotExcludeContains []string to AcquisitionTuning and check it in the snapshot enqueue closure in redfish.go. Co-Authored-By: Claude Sonnet 4.6 --- internal/collector/redfish.go | 10 ++- .../collector/redfishprofile/matcher_test.go | 41 ++++++++++++ .../redfishprofile/profile_lenovo.go | 65 +++++++++++++++++++ .../redfishprofile/profiles_common.go | 5 ++ internal/collector/redfishprofile/types.go | 21 +++--- 5 files changed, 130 insertions(+), 12 deletions(-) create mode 100644 internal/collector/redfishprofile/profile_lenovo.go diff --git a/internal/collector/redfish.go b/internal/collector/redfish.go index 03d438c..0361ca6 100644 --- a/internal/collector/redfish.go +++ b/internal/collector/redfish.go @@ -345,8 +345,9 @@ func (c *RedfishConnector) Collect(ctx context.Context, req Request, emit Progre "manager_critical_suffixes": acquisitionPlan.ScopedPaths.ManagerCriticalSuffixes, }, "tuning": map[string]any{ - "snapshot_max_documents": acquisitionPlan.Tuning.SnapshotMaxDocuments, - "snapshot_workers": acquisitionPlan.Tuning.SnapshotWorkers, + "snapshot_max_documents": acquisitionPlan.Tuning.SnapshotMaxDocuments, + "snapshot_workers": acquisitionPlan.Tuning.SnapshotWorkers, + "snapshot_exclude_contains": acquisitionPlan.Tuning.SnapshotExcludeContains, "prefetch_workers": acquisitionPlan.Tuning.PrefetchWorkers, "prefetch_enabled": boolPointerValue(acquisitionPlan.Tuning.PrefetchEnabled), "nvme_post_probe": boolPointerValue(acquisitionPlan.Tuning.NVMePostProbeEnabled), @@ -1343,6 +1344,11 @@ func (c *RedfishConnector) collectRawRedfishTree(ctx context.Context, client *ht if !shouldCrawlPath(path) { return } + for _, pattern := range tuning.SnapshotExcludeContains { + if pattern != "" && strings.Contains(path, pattern) { + return + } + } mu.Lock() if len(seen) >= maxDocuments { mu.Unlock() diff --git a/internal/collector/redfishprofile/matcher_test.go b/internal/collector/redfishprofile/matcher_test.go index e515f1a..3224a1e 100644 --- a/internal/collector/redfishprofile/matcher_test.go +++ b/internal/collector/redfishprofile/matcher_test.go @@ -326,6 +326,47 @@ func TestBuildAnalysisDirectives_SupermicroEnablesStorageRecovery(t *testing.T) } } +func TestMatchProfiles_LenovoXCCSelectsMatchedModeAndExcludesSensors(t *testing.T) { + match := MatchProfiles(MatchSignals{ + SystemManufacturer: "Lenovo", + ChassisManufacturer: "Lenovo", + OEMNamespaces: []string{"Lenovo"}, + }) + if match.Mode != ModeMatched { + t.Fatalf("expected matched mode, got %q", match.Mode) + } + found := false + for _, profile := range match.Profiles { + if profile.Name() == "lenovo" { + found = true + break + } + } + if !found { + t.Fatal("expected lenovo profile to be selected") + } + + // Verify the acquisition plan excludes noisy Lenovo-specific snapshot paths. + plan := BuildAcquisitionPlan(MatchSignals{ + SystemManufacturer: "Lenovo", + ChassisManufacturer: "Lenovo", + OEMNamespaces: []string{"Lenovo"}, + }) + wantExcluded := []string{"/Sensors/", "/Oem/Lenovo/LEDs/", "/Oem/Lenovo/Slots/"} + for _, want := range wantExcluded { + found := false + for _, ex := range plan.Tuning.SnapshotExcludeContains { + if ex == want { + found = true + break + } + } + if !found { + t.Errorf("expected SnapshotExcludeContains to include %q, got %v", want, plan.Tuning.SnapshotExcludeContains) + } + } +} + func TestMatchProfiles_OrderingIsDeterministic(t *testing.T) { signals := MatchSignals{ SystemManufacturer: "Micro-Star International Co., Ltd.", diff --git a/internal/collector/redfishprofile/profile_lenovo.go b/internal/collector/redfishprofile/profile_lenovo.go new file mode 100644 index 0000000..77fe8c1 --- /dev/null +++ b/internal/collector/redfishprofile/profile_lenovo.go @@ -0,0 +1,65 @@ +package redfishprofile + +func lenovoProfile() Profile { + return staticProfile{ + name: "lenovo", + priority: 20, + safeForFallback: true, + matchFn: func(s MatchSignals) int { + score := 0 + if containsFold(s.SystemManufacturer, "lenovo") || + containsFold(s.ChassisManufacturer, "lenovo") { + score += 80 + } + for _, ns := range s.OEMNamespaces { + if containsFold(ns, "lenovo") { + score += 30 + break + } + } + // Lenovo XClarity Controller (XCC) is the BMC product line. + if containsFold(s.ServiceRootProduct, "xclarity") || + containsFold(s.ServiceRootProduct, "xcc") { + score += 30 + } + return min(score, 100) + }, + extendAcquisition: func(plan *AcquisitionPlan, _ MatchSignals) { + // Lenovo XCC BMC exposes Chassis/1/Sensors with hundreds of individual + // sensor member documents (e.g. Chassis/1/Sensors/101L1). These are + // not used by any LOGPile parser — thermal/power data is read from + // the aggregate Chassis/*/Thermal and Chassis/*/Power endpoints. On + // a real server they largely return errors, wasting many minutes. + // Lenovo OEM subtrees under Oem/Lenovo/LEDs and Oem/Lenovo/Slots also + // enumerate dozens of individual documents not relevant to inventory. + ensureSnapshotExcludeContains(plan, + "/Sensors/", // individual sensor docs (Chassis/1/Sensors/NNN) + "/Oem/Lenovo/LEDs/", // individual LED status entries (~47 per server) + "/Oem/Lenovo/Slots/", // individual slot detail entries (~26 per server) + "/Oem/Lenovo/Metrics/", // operational metrics, not inventory + "/Oem/Lenovo/History", // historical telemetry + "/Oem/Lenovo/ScheduledPower", // power scheduling config + "/Oem/Lenovo/BootSettings/BootOrder", // individual boot order lists + "/PortForwardingMap/", // network port forwarding config + ) + // Lenovo XCC BMC is typically slow (p95 latency often 3-5s even under + // normal load). Set rate thresholds that don't over-throttle on the + // first few requests, and give the ETA estimator a realistic baseline. + ensureRatePolicy(plan, AcquisitionRatePolicy{ + TargetP95LatencyMS: 2000, + ThrottleP95LatencyMS: 4000, + MinSnapshotWorkers: 2, + MinPrefetchWorkers: 1, + DisablePrefetchOnErrors: true, + }) + ensureETABaseline(plan, AcquisitionETABaseline{ + DiscoverySeconds: 15, + SnapshotSeconds: 120, + PrefetchSeconds: 30, + CriticalPlanBSeconds: 40, + ProfilePlanBSeconds: 20, + }) + addPlanNote(plan, "lenovo xcc acquisition extensions enabled: noisy sensor/oem paths excluded from snapshot") + }, + } +} diff --git a/internal/collector/redfishprofile/profiles_common.go b/internal/collector/redfishprofile/profiles_common.go index 154931d..ac7efc8 100644 --- a/internal/collector/redfishprofile/profiles_common.go +++ b/internal/collector/redfishprofile/profiles_common.go @@ -56,6 +56,7 @@ func BuiltinProfiles() []Profile { supermicroProfile(), dellProfile(), hpeProfile(), + lenovoProfile(), inspurGroupOEMPlatformsProfile(), hgxProfile(), xfusionProfile(), @@ -226,6 +227,10 @@ func ensurePrefetchPolicy(plan *AcquisitionPlan, policy AcquisitionPrefetchPolic addPlanPaths(&plan.Tuning.PrefetchPolicy.ExcludeContains, policy.ExcludeContains...) } +func ensureSnapshotExcludeContains(plan *AcquisitionPlan, patterns ...string) { + addPlanPaths(&plan.Tuning.SnapshotExcludeContains, patterns...) +} + func min(a, b int) int { if a < b { return a diff --git a/internal/collector/redfishprofile/types.go b/internal/collector/redfishprofile/types.go index 1a538db..b9f39b3 100644 --- a/internal/collector/redfishprofile/types.go +++ b/internal/collector/redfishprofile/types.go @@ -53,16 +53,17 @@ type AcquisitionScopedPathPolicy struct { } type AcquisitionTuning struct { - SnapshotMaxDocuments int - SnapshotWorkers int - PrefetchEnabled *bool - PrefetchWorkers int - NVMePostProbeEnabled *bool - RatePolicy AcquisitionRatePolicy - ETABaseline AcquisitionETABaseline - PostProbePolicy AcquisitionPostProbePolicy - RecoveryPolicy AcquisitionRecoveryPolicy - PrefetchPolicy AcquisitionPrefetchPolicy + SnapshotMaxDocuments int + SnapshotWorkers int + SnapshotExcludeContains []string + PrefetchEnabled *bool + PrefetchWorkers int + NVMePostProbeEnabled *bool + RatePolicy AcquisitionRatePolicy + ETABaseline AcquisitionETABaseline + PostProbePolicy AcquisitionPostProbePolicy + RecoveryPolicy AcquisitionRecoveryPolicy + PrefetchPolicy AcquisitionPrefetchPolicy } type AcquisitionRatePolicy struct {