Raise Redfish snapshot crawl limit and prioritize PCIe paths

This commit is contained in:
Mikhail Chusavitin
2026-02-24 17:41:37 +03:00
parent ce30f943df
commit 6f66a8b2a1

View File

@@ -113,7 +113,7 @@ func (c *RedfishConnector) Collect(ctx context.Context, req Request, emit Progre
emit(Progress{Status: "running", Progress: 90, Message: "Redfish: сбор расширенного snapshot..."})
}
c.debugSnapshotf("snapshot crawl start host=%s port=%d", req.Host, req.Port)
rawTree := c.collectRawRedfishTree(ctx, client, req, baseURL, emit)
rawTree := c.collectRawRedfishTree(ctx, client, req, baseURL, redfishSnapshotPrioritySeeds(systemPaths, chassisPaths, managerPaths), emit)
c.debugSnapshotf("snapshot crawl done docs=%d", len(rawTree))
result := &models.AnalysisResult{
@@ -449,8 +449,8 @@ func (c *RedfishConnector) discoverMemberPaths(ctx context.Context, client *http
return nil
}
func (c *RedfishConnector) collectRawRedfishTree(ctx context.Context, client *http.Client, req Request, baseURL string, emit ProgressFn) map[string]interface{} {
const maxDocuments = 1200
func (c *RedfishConnector) collectRawRedfishTree(ctx context.Context, client *http.Client, req Request, baseURL string, seedPaths []string, emit ProgressFn) map[string]interface{} {
maxDocuments := redfishSnapshotMaxDocuments()
const workers = 6
const heartbeatInterval = 5 * time.Second
@@ -488,6 +488,9 @@ func (c *RedfishConnector) collectRawRedfishTree(ctx context.Context, client *ht
}
enqueue("/redfish/v1")
for _, seed := range seedPaths {
enqueue(seed)
}
c.debugSnapshotf("snapshot queue initialized workers=%d max_documents=%d", workers, maxDocuments)
stopHeartbeat := make(chan struct{})
if emit != nil {
@@ -1500,6 +1503,58 @@ func compactProgressPath(p string) string {
return "..." + p[len(p)-maxLen+3:]
}
func redfishSnapshotMaxDocuments() int {
// Default is intentionally high enough to capture vendor-specific PCIe/GPU trees
// on modern HGX-class systems while staying within memory budgets of a typical
// developer workstation.
const (
def = 100000
min = 1200
max = 500000
)
if v := strings.TrimSpace(os.Getenv("LOGPILE_REDFISH_SNAPSHOT_MAX_DOCS")); v != "" {
if n, err := strconv.Atoi(v); err == nil {
if n < min {
return min
}
if n > max {
return max
}
return n
}
}
return def
}
func redfishSnapshotPrioritySeeds(systemPaths, chassisPaths, managerPaths []string) []string {
var out []string
add := func(p string) {
if p = normalizeRedfishPath(p); p != "" {
out = append(out, p)
}
}
add("/redfish/v1/UpdateService")
add("/redfish/v1/UpdateService/FirmwareInventory")
add("/redfish/v1/Fabrics")
for _, p := range systemPaths {
add(joinPath(p, "/PCIeDevices"))
add(joinPath(p, "/PCIeFunctions"))
add(joinPath(p, "/Accelerators"))
}
for _, p := range chassisPaths {
add(joinPath(p, "/PCIeDevices"))
add(joinPath(p, "/PCIeSlots"))
add(joinPath(p, "/NetworkAdapters"))
add(joinPath(p, "/Power"))
}
for _, p := range managerPaths {
add(joinPath(p, "/NetworkProtocol"))
}
return out
}
func shouldReportSnapshotFetchError(err error) bool {
if err == nil {
return false