Harden Redfish collection for slow BMC endpoints
This commit is contained in:
@@ -80,6 +80,9 @@ func (c *RedfishConnector) Collect(ctx context.Context, req Request, emit Progre
|
|||||||
systemPaths := c.discoverMemberPaths(ctx, client, req, baseURL, "/redfish/v1/Systems", "/redfish/v1/Systems/1")
|
systemPaths := c.discoverMemberPaths(ctx, client, req, baseURL, "/redfish/v1/Systems", "/redfish/v1/Systems/1")
|
||||||
chassisPaths := c.discoverMemberPaths(ctx, client, req, baseURL, "/redfish/v1/Chassis", "/redfish/v1/Chassis/1")
|
chassisPaths := c.discoverMemberPaths(ctx, client, req, baseURL, "/redfish/v1/Chassis", "/redfish/v1/Chassis/1")
|
||||||
managerPaths := c.discoverMemberPaths(ctx, client, req, baseURL, "/redfish/v1/Managers", "/redfish/v1/Managers/1")
|
managerPaths := c.discoverMemberPaths(ctx, client, req, baseURL, "/redfish/v1/Managers", "/redfish/v1/Managers/1")
|
||||||
|
criticalPaths := redfishCriticalEndpoints(systemPaths, chassisPaths, managerPaths)
|
||||||
|
criticalClient := c.httpClientWithTimeout(req, redfishCriticalRequestTimeout())
|
||||||
|
criticalWarmDocs, criticalWarmErrs := c.collectCriticalRedfishDocsSequential(ctx, criticalClient, req, baseURL, criticalPaths)
|
||||||
|
|
||||||
if emit != nil {
|
if emit != nil {
|
||||||
emit(Progress{Status: "running", Progress: 30, Message: "Redfish: чтение структуры Redfish..."})
|
emit(Progress{Status: "running", Progress: 30, Message: "Redfish: чтение структуры Redfish..."})
|
||||||
@@ -90,27 +93,48 @@ func (c *RedfishConnector) Collect(ctx context.Context, req Request, emit Progre
|
|||||||
c.debugSnapshotf("snapshot crawl start host=%s port=%d", req.Host, req.Port)
|
c.debugSnapshotf("snapshot crawl start host=%s port=%d", req.Host, req.Port)
|
||||||
rawTree, fetchErrors := c.collectRawRedfishTree(ctx, client, req, baseURL, redfishSnapshotPrioritySeeds(systemPaths, chassisPaths, managerPaths), emit)
|
rawTree, fetchErrors := c.collectRawRedfishTree(ctx, client, req, baseURL, redfishSnapshotPrioritySeeds(systemPaths, chassisPaths, managerPaths), emit)
|
||||||
c.debugSnapshotf("snapshot crawl done docs=%d", len(rawTree))
|
c.debugSnapshotf("snapshot crawl done docs=%d", len(rawTree))
|
||||||
|
for p, doc := range criticalWarmDocs {
|
||||||
|
if _, ok := rawTree[p]; !ok {
|
||||||
|
rawTree[p] = doc
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fetchErrMap := redfishFetchErrorListToMap(fetchErrors)
|
||||||
|
for p, msg := range criticalWarmErrs {
|
||||||
|
if _, ok := rawTree[p]; ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if _, exists := fetchErrMap[p]; !exists {
|
||||||
|
fetchErrMap[p] = msg
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if recoveredN := c.recoverCriticalRedfishDocsPlanB(ctx, criticalClient, req, baseURL, criticalPaths, rawTree, fetchErrMap, emit); recoveredN > 0 {
|
||||||
|
c.debugSnapshotf("critical plan-b recovered docs=%d", recoveredN)
|
||||||
|
}
|
||||||
if emit != nil {
|
if emit != nil {
|
||||||
emit(Progress{Status: "running", Progress: 99, Message: "Redfish: анализ raw snapshot..."})
|
emit(Progress{Status: "running", Progress: 99, Message: "Redfish: анализ raw snapshot..."})
|
||||||
}
|
}
|
||||||
rawPayloads := map[string]any{
|
rawPayloads := map[string]any{
|
||||||
"redfish_tree": rawTree,
|
"redfish_tree": rawTree,
|
||||||
}
|
}
|
||||||
if len(fetchErrors) > 0 {
|
if len(fetchErrMap) > 0 {
|
||||||
rawPayloads["redfish_fetch_errors"] = fetchErrors
|
rawPayloads["redfish_fetch_errors"] = redfishFetchErrorMapToList(fetchErrMap)
|
||||||
}
|
}
|
||||||
// Unified tunnel: live collection and raw import go through the same analyzer over redfish_tree.
|
// Unified tunnel: live collection and raw import go through the same analyzer over redfish_tree.
|
||||||
return ReplayRedfishFromRawPayloads(rawPayloads, nil)
|
return ReplayRedfishFromRawPayloads(rawPayloads, nil)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *RedfishConnector) httpClient(req Request) *http.Client {
|
func (c *RedfishConnector) httpClient(req Request) *http.Client {
|
||||||
|
return c.httpClientWithTimeout(req, c.timeout)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *RedfishConnector) httpClientWithTimeout(req Request, timeout time.Duration) *http.Client {
|
||||||
transport := &http.Transport{}
|
transport := &http.Transport{}
|
||||||
if req.TLSMode == "insecure" {
|
if req.TLSMode == "insecure" {
|
||||||
transport.TLSClientConfig = &tls.Config{InsecureSkipVerify: true} //nolint:gosec
|
transport.TLSClientConfig = &tls.Config{InsecureSkipVerify: true} //nolint:gosec
|
||||||
}
|
}
|
||||||
return &http.Client{
|
return &http.Client{
|
||||||
Transport: transport,
|
Transport: transport,
|
||||||
Timeout: c.timeout,
|
Timeout: timeout,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -448,7 +472,7 @@ func (c *RedfishConnector) collectPCIeDevices(ctx context.Context, client *http.
|
|||||||
for _, doc := range memberDocs {
|
for _, doc := range memberDocs {
|
||||||
functionDocs := c.getLinkedPCIeFunctions(ctx, client, req, baseURL, doc)
|
functionDocs := c.getLinkedPCIeFunctions(ctx, client, req, baseURL, doc)
|
||||||
dev := parsePCIeDevice(doc, functionDocs)
|
dev := parsePCIeDevice(doc, functionDocs)
|
||||||
key := firstNonEmpty(dev.BDF, dev.SerialNumber, dev.Slot+"|"+dev.DeviceClass)
|
key := pcieDeviceDedupKey(dev)
|
||||||
if key == "" {
|
if key == "" {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
@@ -468,7 +492,7 @@ func (c *RedfishConnector) collectPCIeDevices(ctx context.Context, client *http.
|
|||||||
}
|
}
|
||||||
for idx, fn := range functionDocs {
|
for idx, fn := range functionDocs {
|
||||||
dev := parsePCIeFunction(fn, idx+1)
|
dev := parsePCIeFunction(fn, idx+1)
|
||||||
key := firstNonEmpty(dev.BDF, dev.SerialNumber, dev.Slot+"|"+dev.DeviceClass)
|
key := pcieDeviceDedupKey(dev)
|
||||||
if key == "" {
|
if key == "" {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
@@ -775,6 +799,40 @@ func (c *RedfishConnector) probeDirectRedfishCollectionChildren(ctx context.Cont
|
|||||||
return out
|
return out
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c *RedfishConnector) probeDirectRedfishCollectionChildrenSlow(ctx context.Context, client *http.Client, req Request, baseURL, collectionPath string) map[string]map[string]interface{} {
|
||||||
|
normalized := normalizeRedfishPath(collectionPath)
|
||||||
|
maxItems, startIndex, missBudget := directNumericProbePlan(normalized)
|
||||||
|
if maxItems <= 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
out := make(map[string]map[string]interface{})
|
||||||
|
consecutiveMisses := 0
|
||||||
|
for i := startIndex; i <= maxItems; i++ {
|
||||||
|
if len(out) > 0 || i > startIndex {
|
||||||
|
select {
|
||||||
|
case <-time.After(redfishCriticalSlowGap()):
|
||||||
|
case <-ctx.Done():
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
}
|
||||||
|
path := fmt.Sprintf("%s/%d", normalized, i)
|
||||||
|
doc, err := c.getJSONWithRetry(ctx, client, req, baseURL, path, redfishCriticalPlanBAttempts(), redfishCriticalRetryBackoff())
|
||||||
|
if err != nil {
|
||||||
|
consecutiveMisses++
|
||||||
|
if consecutiveMisses >= missBudget {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
consecutiveMisses = 0
|
||||||
|
if !looksLikeRedfishResource(doc) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
out[normalizeRedfishPath(path)] = doc
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
func directNumericProbePlan(collectionPath string) (maxItems, startIndex, missBudget int) {
|
func directNumericProbePlan(collectionPath string) (maxItems, startIndex, missBudget int) {
|
||||||
switch {
|
switch {
|
||||||
case strings.HasSuffix(collectionPath, "/Systems"):
|
case strings.HasSuffix(collectionPath, "/Systems"):
|
||||||
@@ -848,6 +906,169 @@ func looksLikeRedfishResource(doc map[string]interface{}) bool {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func shouldSlowProbeCriticalCollection(p string) bool {
|
||||||
|
p = normalizeRedfishPath(p)
|
||||||
|
for _, suffix := range []string{
|
||||||
|
"/Processors",
|
||||||
|
"/Memory",
|
||||||
|
"/Storage",
|
||||||
|
"/Drives",
|
||||||
|
"/Volumes",
|
||||||
|
"/PCIeDevices",
|
||||||
|
"/PCIeFunctions",
|
||||||
|
"/NetworkAdapters",
|
||||||
|
"/EthernetInterfaces",
|
||||||
|
"/NetworkInterfaces",
|
||||||
|
"/Sensors",
|
||||||
|
"/Fans",
|
||||||
|
"/Temperatures",
|
||||||
|
"/Voltages",
|
||||||
|
} {
|
||||||
|
if strings.HasSuffix(p, suffix) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func redfishCriticalEndpoints(systemPaths, chassisPaths, managerPaths []string) []string {
|
||||||
|
var out []string
|
||||||
|
seen := make(map[string]struct{})
|
||||||
|
add := func(p string) {
|
||||||
|
p = normalizeRedfishPath(p)
|
||||||
|
if p == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if _, ok := seen[p]; ok {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
seen[p] = struct{}{}
|
||||||
|
out = append(out, p)
|
||||||
|
}
|
||||||
|
for _, p := range systemPaths {
|
||||||
|
add(p)
|
||||||
|
add(joinPath(p, "/Bios"))
|
||||||
|
add(joinPath(p, "/SecureBoot"))
|
||||||
|
add(joinPath(p, "/Processors"))
|
||||||
|
add(joinPath(p, "/Memory"))
|
||||||
|
add(joinPath(p, "/Storage"))
|
||||||
|
add(joinPath(p, "/SimpleStorage"))
|
||||||
|
add(joinPath(p, "/PCIeDevices"))
|
||||||
|
add(joinPath(p, "/EthernetInterfaces"))
|
||||||
|
add(joinPath(p, "/NetworkInterfaces"))
|
||||||
|
}
|
||||||
|
for _, p := range chassisPaths {
|
||||||
|
add(p)
|
||||||
|
add(joinPath(p, "/Power"))
|
||||||
|
add(joinPath(p, "/Thermal"))
|
||||||
|
add(joinPath(p, "/Sensors"))
|
||||||
|
add(joinPath(p, "/NetworkAdapters"))
|
||||||
|
add(joinPath(p, "/PCIeDevices"))
|
||||||
|
add(joinPath(p, "/Drives"))
|
||||||
|
}
|
||||||
|
for _, p := range managerPaths {
|
||||||
|
add(p)
|
||||||
|
add(joinPath(p, "/NetworkProtocol"))
|
||||||
|
}
|
||||||
|
add("/redfish/v1/UpdateService")
|
||||||
|
add("/redfish/v1/UpdateService/FirmwareInventory")
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
func redfishFetchErrorListToMap(list []map[string]interface{}) map[string]string {
|
||||||
|
out := make(map[string]string, len(list))
|
||||||
|
for _, item := range list {
|
||||||
|
p := normalizeRedfishPath(asString(item["path"]))
|
||||||
|
if p == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
out[p] = asString(item["error"])
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
func redfishFetchErrorMapToList(m map[string]string) []map[string]interface{} {
|
||||||
|
if len(m) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
out := make([]map[string]interface{}, 0, len(m))
|
||||||
|
for p, msg := range m {
|
||||||
|
out = append(out, map[string]interface{}{"path": p, "error": msg})
|
||||||
|
}
|
||||||
|
sort.Slice(out, func(i, j int) bool {
|
||||||
|
return asString(out[i]["path"]) < asString(out[j]["path"])
|
||||||
|
})
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
func isRetryableRedfishFetchError(err error) bool {
|
||||||
|
if err == nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
msg := strings.ToLower(err.Error())
|
||||||
|
if strings.Contains(msg, "timeout") || strings.Contains(msg, "deadline exceeded") || strings.Contains(msg, "connection reset") || strings.Contains(msg, "unexpected eof") {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if strings.HasPrefix(msg, "status 500 ") || strings.HasPrefix(msg, "status 502 ") || strings.HasPrefix(msg, "status 503 ") || strings.HasPrefix(msg, "status 504 ") {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func redfishCriticalRequestTimeout() time.Duration {
|
||||||
|
if v := strings.TrimSpace(os.Getenv("LOGPILE_REDFISH_CRITICAL_TIMEOUT")); v != "" {
|
||||||
|
if d, err := time.ParseDuration(v); err == nil && d > 0 {
|
||||||
|
return d
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 45 * time.Second
|
||||||
|
}
|
||||||
|
|
||||||
|
func redfishCriticalRetryAttempts() int {
|
||||||
|
if v := strings.TrimSpace(os.Getenv("LOGPILE_REDFISH_CRITICAL_RETRIES")); v != "" {
|
||||||
|
if n, err := strconv.Atoi(v); err == nil && n >= 1 && n <= 10 {
|
||||||
|
return n
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 3
|
||||||
|
}
|
||||||
|
|
||||||
|
func redfishCriticalPlanBAttempts() int {
|
||||||
|
if v := strings.TrimSpace(os.Getenv("LOGPILE_REDFISH_CRITICAL_PLANB_RETRIES")); v != "" {
|
||||||
|
if n, err := strconv.Atoi(v); err == nil && n >= 1 && n <= 10 {
|
||||||
|
return n
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 3
|
||||||
|
}
|
||||||
|
|
||||||
|
func redfishCriticalRetryBackoff() time.Duration {
|
||||||
|
if v := strings.TrimSpace(os.Getenv("LOGPILE_REDFISH_CRITICAL_BACKOFF")); v != "" {
|
||||||
|
if d, err := time.ParseDuration(v); err == nil && d >= 0 {
|
||||||
|
return d
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 1500 * time.Millisecond
|
||||||
|
}
|
||||||
|
|
||||||
|
func redfishCriticalCooldown() time.Duration {
|
||||||
|
if v := strings.TrimSpace(os.Getenv("LOGPILE_REDFISH_CRITICAL_COOLDOWN")); v != "" {
|
||||||
|
if d, err := time.ParseDuration(v); err == nil && d >= 0 {
|
||||||
|
return d
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 4 * time.Second
|
||||||
|
}
|
||||||
|
|
||||||
|
func redfishCriticalSlowGap() time.Duration {
|
||||||
|
if v := strings.TrimSpace(os.Getenv("LOGPILE_REDFISH_CRITICAL_SLOW_GAP")); v != "" {
|
||||||
|
if d, err := time.ParseDuration(v); err == nil && d >= 0 {
|
||||||
|
return d
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 1200 * time.Millisecond
|
||||||
|
}
|
||||||
|
|
||||||
func redfishLinkRefs(doc map[string]interface{}, topKey, nestedKey string) []string {
|
func redfishLinkRefs(doc map[string]interface{}, topKey, nestedKey string) []string {
|
||||||
top, ok := doc[topKey].(map[string]interface{})
|
top, ok := doc[topKey].(map[string]interface{})
|
||||||
if !ok {
|
if !ok {
|
||||||
@@ -870,6 +1091,36 @@ func redfishLinkRefs(doc map[string]interface{}, topKey, nestedKey string) []str
|
|||||||
return out
|
return out
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func pcieDeviceDedupKey(dev models.PCIeDevice) string {
|
||||||
|
if bdf := strings.TrimSpace(dev.BDF); looksLikeCanonicalBDF(bdf) {
|
||||||
|
return strings.ToLower(bdf)
|
||||||
|
}
|
||||||
|
if s := strings.TrimSpace(dev.SerialNumber); s != "" {
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
return firstNonEmpty(
|
||||||
|
strings.TrimSpace(dev.Slot)+"|"+strings.TrimSpace(dev.PartNumber)+"|"+strings.TrimSpace(dev.DeviceClass),
|
||||||
|
strings.TrimSpace(dev.Slot)+"|"+strings.TrimSpace(dev.DeviceClass),
|
||||||
|
strings.TrimSpace(dev.PartNumber)+"|"+strings.TrimSpace(dev.DeviceClass),
|
||||||
|
strings.TrimSpace(dev.Description)+"|"+strings.TrimSpace(dev.DeviceClass),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
func looksLikeCanonicalBDF(bdf string) bool {
|
||||||
|
bdf = strings.TrimSpace(strings.ToLower(bdf))
|
||||||
|
if bdf == "" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
// Accept common forms: 0000:65:00.0 or 65:00.0
|
||||||
|
if strings.Count(bdf, ":") == 2 && strings.Contains(bdf, ".") {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if strings.Count(bdf, ":") == 1 && strings.Contains(bdf, ".") {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
func shouldCrawlPath(path string) bool {
|
func shouldCrawlPath(path string) bool {
|
||||||
if path == "" {
|
if path == "" {
|
||||||
return false
|
return false
|
||||||
@@ -1013,6 +1264,163 @@ func (c *RedfishConnector) getJSON(ctx context.Context, client *http.Client, req
|
|||||||
return doc, nil
|
return doc, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c *RedfishConnector) getJSONWithRetry(ctx context.Context, client *http.Client, req Request, baseURL, requestPath string, attempts int, backoff time.Duration) (map[string]interface{}, error) {
|
||||||
|
if attempts < 1 {
|
||||||
|
attempts = 1
|
||||||
|
}
|
||||||
|
var lastErr error
|
||||||
|
for i := 0; i < attempts; i++ {
|
||||||
|
doc, err := c.getJSON(ctx, client, req, baseURL, requestPath)
|
||||||
|
if err == nil {
|
||||||
|
return doc, nil
|
||||||
|
}
|
||||||
|
lastErr = err
|
||||||
|
if i == attempts-1 || !isRetryableRedfishFetchError(err) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if backoff > 0 {
|
||||||
|
select {
|
||||||
|
case <-time.After(backoff * time.Duration(i+1)):
|
||||||
|
case <-ctx.Done():
|
||||||
|
return nil, ctx.Err()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil, lastErr
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *RedfishConnector) collectCriticalRedfishDocsSequential(ctx context.Context, client *http.Client, req Request, baseURL string, paths []string) (map[string]interface{}, map[string]string) {
|
||||||
|
docs := make(map[string]interface{})
|
||||||
|
errs := make(map[string]string)
|
||||||
|
for _, p := range paths {
|
||||||
|
doc, err := c.getJSONWithRetry(ctx, client, req, baseURL, p, redfishCriticalRetryAttempts(), redfishCriticalRetryBackoff())
|
||||||
|
if err != nil {
|
||||||
|
errs[p] = err.Error()
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
docs[p] = doc
|
||||||
|
// For critical collections, eagerly fetch members sequentially with the same slow policy.
|
||||||
|
if members, ok := c.collectCriticalCollectionMembersSequential(ctx, client, req, baseURL, p, doc); ok {
|
||||||
|
for mp, md := range members {
|
||||||
|
docs[mp] = md
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return docs, errs
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *RedfishConnector) collectCriticalCollectionMembersSequential(ctx context.Context, client *http.Client, req Request, baseURL, collectionPath string, collectionDoc map[string]interface{}) (map[string]interface{}, bool) {
|
||||||
|
refs, ok := collectionDoc["Members"].([]interface{})
|
||||||
|
if !ok || len(refs) == 0 {
|
||||||
|
return nil, false
|
||||||
|
}
|
||||||
|
out := make(map[string]interface{})
|
||||||
|
for _, refAny := range refs {
|
||||||
|
ref, ok := refAny.(map[string]interface{})
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
memberPath := normalizeRedfishPath(asString(ref["@odata.id"]))
|
||||||
|
if memberPath == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
doc, err := c.getJSONWithRetry(ctx, client, req, baseURL, memberPath, redfishCriticalRetryAttempts(), redfishCriticalRetryBackoff())
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
out[memberPath] = doc
|
||||||
|
}
|
||||||
|
return out, true
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *RedfishConnector) recoverCriticalRedfishDocsPlanB(ctx context.Context, client *http.Client, req Request, baseURL string, criticalPaths []string, rawTree map[string]interface{}, fetchErrs map[string]string, emit ProgressFn) int {
|
||||||
|
var targets []string
|
||||||
|
for _, p := range criticalPaths {
|
||||||
|
p = normalizeRedfishPath(p)
|
||||||
|
if p == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if _, ok := rawTree[p]; ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
errMsg, hasErr := fetchErrs[p]
|
||||||
|
if !hasErr || !isRetryableRedfishFetchError(fmt.Errorf("%s", errMsg)) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
targets = append(targets, p)
|
||||||
|
}
|
||||||
|
if len(targets) == 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
if emit != nil {
|
||||||
|
emit(Progress{Status: "running", Progress: 97, Message: "Redfish: cooldown перед повторным добором критичных endpoint..."})
|
||||||
|
}
|
||||||
|
select {
|
||||||
|
case <-time.After(redfishCriticalCooldown()):
|
||||||
|
case <-ctx.Done():
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
recovered := 0
|
||||||
|
for i, p := range targets {
|
||||||
|
if emit != nil {
|
||||||
|
emit(Progress{
|
||||||
|
Status: "running",
|
||||||
|
Progress: 97,
|
||||||
|
Message: fmt.Sprintf("Redfish: plan-B (%d/%d) %s", i+1, len(targets), compactProgressPath(p)),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
if i > 0 {
|
||||||
|
select {
|
||||||
|
case <-time.After(redfishCriticalSlowGap()):
|
||||||
|
case <-ctx.Done():
|
||||||
|
return recovered
|
||||||
|
}
|
||||||
|
}
|
||||||
|
doc, err := c.getJSONWithRetry(ctx, client, req, baseURL, p, redfishCriticalPlanBAttempts(), redfishCriticalRetryBackoff())
|
||||||
|
if err == nil {
|
||||||
|
rawTree[p] = doc
|
||||||
|
delete(fetchErrs, p)
|
||||||
|
recovered++
|
||||||
|
if members, ok := c.collectCriticalCollectionMembersSequential(ctx, client, req, baseURL, p, doc); ok {
|
||||||
|
for mp, md := range members {
|
||||||
|
if _, exists := rawTree[mp]; !exists {
|
||||||
|
rawTree[mp] = md
|
||||||
|
recovered++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if shouldSlowProbeCriticalCollection(p) {
|
||||||
|
if children := c.probeDirectRedfishCollectionChildrenSlow(ctx, client, req, baseURL, p); len(children) > 0 {
|
||||||
|
for cp, cd := range children {
|
||||||
|
if _, exists := rawTree[cp]; exists {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
rawTree[cp] = cd
|
||||||
|
recovered++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
fetchErrs[p] = err.Error()
|
||||||
|
// If collection endpoint times out, still try direct child probing for common numeric paths.
|
||||||
|
if shouldSlowProbeCriticalCollection(p) {
|
||||||
|
if children := c.probeDirectRedfishCollectionChildrenSlow(ctx, client, req, baseURL, p); len(children) > 0 {
|
||||||
|
for cp, cd := range children {
|
||||||
|
if _, exists := rawTree[cp]; exists {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
rawTree[cp] = cd
|
||||||
|
recovered++
|
||||||
|
}
|
||||||
|
delete(fetchErrs, p)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return recovered
|
||||||
|
}
|
||||||
|
|
||||||
func parseBoardInfo(system map[string]interface{}) models.BoardInfo {
|
func parseBoardInfo(system map[string]interface{}) models.BoardInfo {
|
||||||
return models.BoardInfo{
|
return models.BoardInfo{
|
||||||
Manufacturer: asString(system["Manufacturer"]),
|
Manufacturer: asString(system["Manufacturer"]),
|
||||||
|
|||||||
@@ -534,7 +534,7 @@ func (r redfishSnapshotReader) collectPCIeDevices(systemPaths, chassisPaths []st
|
|||||||
for _, doc := range memberDocs {
|
for _, doc := range memberDocs {
|
||||||
functionDocs := r.getLinkedPCIeFunctions(doc)
|
functionDocs := r.getLinkedPCIeFunctions(doc)
|
||||||
dev := parsePCIeDevice(doc, functionDocs)
|
dev := parsePCIeDevice(doc, functionDocs)
|
||||||
key := firstNonEmpty(dev.BDF, dev.SerialNumber, dev.Slot+"|"+dev.DeviceClass)
|
key := pcieDeviceDedupKey(dev)
|
||||||
if key == "" {
|
if key == "" {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
@@ -552,7 +552,7 @@ func (r redfishSnapshotReader) collectPCIeDevices(systemPaths, chassisPaths []st
|
|||||||
}
|
}
|
||||||
for idx, fn := range functionDocs {
|
for idx, fn := range functionDocs {
|
||||||
dev := parsePCIeFunction(fn, idx+1)
|
dev := parsePCIeFunction(fn, idx+1)
|
||||||
key := firstNonEmpty(dev.BDF, dev.SerialNumber, dev.Slot+"|"+dev.DeviceClass)
|
key := pcieDeviceDedupKey(dev)
|
||||||
if key == "" {
|
if key == "" {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user