Improve Redfish raw replay recovery and GUI diagnostics
This commit is contained in:
@@ -23,8 +23,8 @@ import (
|
||||
)
|
||||
|
||||
type RedfishConnector struct {
|
||||
timeout time.Duration
|
||||
debug bool
|
||||
timeout time.Duration
|
||||
debug bool
|
||||
debugSnapshot bool
|
||||
}
|
||||
|
||||
@@ -88,15 +88,19 @@ func (c *RedfishConnector) Collect(ctx context.Context, req Request, emit Progre
|
||||
emit(Progress{Status: "running", Progress: 90, Message: "Redfish: сбор расширенного snapshot..."})
|
||||
}
|
||||
c.debugSnapshotf("snapshot crawl start host=%s port=%d", req.Host, req.Port)
|
||||
rawTree := c.collectRawRedfishTree(ctx, client, req, baseURL, redfishSnapshotPrioritySeeds(systemPaths, chassisPaths, managerPaths), emit)
|
||||
rawTree, fetchErrors := c.collectRawRedfishTree(ctx, client, req, baseURL, redfishSnapshotPrioritySeeds(systemPaths, chassisPaths, managerPaths), emit)
|
||||
c.debugSnapshotf("snapshot crawl done docs=%d", len(rawTree))
|
||||
if emit != nil {
|
||||
emit(Progress{Status: "running", Progress: 99, Message: "Redfish: анализ raw snapshot..."})
|
||||
}
|
||||
// Unified tunnel: live collection and raw import go through the same analyzer over redfish_tree.
|
||||
return ReplayRedfishFromRawPayloads(map[string]any{
|
||||
rawPayloads := map[string]any{
|
||||
"redfish_tree": rawTree,
|
||||
}, nil)
|
||||
}
|
||||
if len(fetchErrors) > 0 {
|
||||
rawPayloads["redfish_fetch_errors"] = fetchErrors
|
||||
}
|
||||
// Unified tunnel: live collection and raw import go through the same analyzer over redfish_tree.
|
||||
return ReplayRedfishFromRawPayloads(rawPayloads, nil)
|
||||
}
|
||||
|
||||
func (c *RedfishConnector) httpClient(req Request) *http.Client {
|
||||
@@ -444,7 +448,7 @@ func (c *RedfishConnector) collectPCIeDevices(ctx context.Context, client *http.
|
||||
for _, doc := range memberDocs {
|
||||
functionDocs := c.getLinkedPCIeFunctions(ctx, client, req, baseURL, doc)
|
||||
dev := parsePCIeDevice(doc, functionDocs)
|
||||
key := firstNonEmpty(dev.SerialNumber, dev.BDF, dev.Slot+"|"+dev.DeviceClass)
|
||||
key := firstNonEmpty(dev.BDF, dev.SerialNumber, dev.Slot+"|"+dev.DeviceClass)
|
||||
if key == "" {
|
||||
continue
|
||||
}
|
||||
@@ -506,12 +510,13 @@ func (c *RedfishConnector) discoverMemberPaths(ctx context.Context, client *http
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *RedfishConnector) collectRawRedfishTree(ctx context.Context, client *http.Client, req Request, baseURL string, seedPaths []string, emit ProgressFn) map[string]interface{} {
|
||||
func (c *RedfishConnector) collectRawRedfishTree(ctx context.Context, client *http.Client, req Request, baseURL string, seedPaths []string, emit ProgressFn) (map[string]interface{}, []map[string]interface{}) {
|
||||
maxDocuments := redfishSnapshotMaxDocuments()
|
||||
const workers = 6
|
||||
const heartbeatInterval = 5 * time.Second
|
||||
|
||||
out := make(map[string]interface{}, maxDocuments)
|
||||
fetchErrors := make(map[string]string)
|
||||
seen := make(map[string]struct{}, maxDocuments)
|
||||
rootCounts := make(map[string]int)
|
||||
var mu sync.Mutex
|
||||
@@ -602,15 +607,20 @@ func (c *RedfishConnector) collectRawRedfishTree(ctx context.Context, client *ht
|
||||
enqueue(ref)
|
||||
}
|
||||
}
|
||||
n := atomic.AddInt32(&processed, 1)
|
||||
if err != nil {
|
||||
c.debugSnapshotf("worker=%d fetch error path=%s err=%v", workerID, current, err)
|
||||
if emit != nil && shouldReportSnapshotFetchError(err) {
|
||||
emit(Progress{
|
||||
Status: "running",
|
||||
Progress: 92 + int(minInt32(n/200, 6)),
|
||||
Message: fmt.Sprintf("Redfish snapshot: ошибка на %s", compactProgressPath(current)),
|
||||
})
|
||||
n := atomic.AddInt32(&processed, 1)
|
||||
if err != nil {
|
||||
mu.Lock()
|
||||
if _, ok := fetchErrors[current]; !ok {
|
||||
fetchErrors[current] = err.Error()
|
||||
}
|
||||
mu.Unlock()
|
||||
c.debugSnapshotf("worker=%d fetch error path=%s err=%v", workerID, current, err)
|
||||
if emit != nil && shouldReportSnapshotFetchError(err) {
|
||||
emit(Progress{
|
||||
Status: "running",
|
||||
Progress: 92 + int(minInt32(n/200, 6)),
|
||||
Message: fmt.Sprintf("Redfish snapshot: ошибка на %s", compactProgressPath(current)),
|
||||
})
|
||||
}
|
||||
}
|
||||
if emit != nil && n%40 == 0 {
|
||||
@@ -677,7 +687,18 @@ func (c *RedfishConnector) collectRawRedfishTree(ctx context.Context, client *ht
|
||||
})
|
||||
}
|
||||
|
||||
return out
|
||||
errorList := make([]map[string]interface{}, 0, len(fetchErrors))
|
||||
for p, msg := range fetchErrors {
|
||||
errorList = append(errorList, map[string]interface{}{
|
||||
"path": p,
|
||||
"error": msg,
|
||||
})
|
||||
}
|
||||
sort.Slice(errorList, func(i, j int) bool {
|
||||
return asString(errorList[i]["path"]) < asString(errorList[j]["path"])
|
||||
})
|
||||
|
||||
return out, errorList
|
||||
}
|
||||
|
||||
func (c *RedfishConnector) probeSupermicroNVMeDiskBays(ctx context.Context, client *http.Client, req Request, baseURL, backplanePath string) []map[string]interface{} {
|
||||
|
||||
@@ -2,6 +2,8 @@ package collector
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"git.mchus.pro/mchus/logpile/internal/models"
|
||||
)
|
||||
@@ -64,12 +66,10 @@ func ReplayRedfishFromRawPayloads(rawPayloads map[string]any, emit ProgressFn) (
|
||||
networkProtocolDoc, _ := r.getJSON(joinPath(primaryManager, "/NetworkProtocol"))
|
||||
|
||||
result := &models.AnalysisResult{
|
||||
Events: make([]models.Event, 0),
|
||||
FRU: make([]models.FRUInfo, 0),
|
||||
Sensors: make([]models.SensorReading, 0),
|
||||
RawPayloads: map[string]any{
|
||||
"redfish_tree": tree,
|
||||
},
|
||||
Events: make([]models.Event, 0),
|
||||
FRU: make([]models.FRUInfo, 0),
|
||||
Sensors: make([]models.SensorReading, 0),
|
||||
RawPayloads: cloneRawPayloads(rawPayloads),
|
||||
Hardware: &models.HardwareConfig{
|
||||
BoardInfo: parseBoardInfo(systemDoc),
|
||||
CPUs: parseCPUs(processors),
|
||||
@@ -115,11 +115,11 @@ func (r redfishSnapshotReader) getJSON(requestPath string) (map[string]interface
|
||||
func (r redfishSnapshotReader) getCollectionMembers(collectionPath string) ([]map[string]interface{}, error) {
|
||||
collection, err := r.getJSON(collectionPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return r.fallbackCollectionMembers(collectionPath, err)
|
||||
}
|
||||
refs, ok := collection["Members"].([]interface{})
|
||||
if !ok || len(refs) == 0 {
|
||||
return []map[string]interface{}{}, nil
|
||||
return r.fallbackCollectionMembers(collectionPath, nil)
|
||||
}
|
||||
out := make([]map[string]interface{}, 0, len(refs))
|
||||
for _, refAny := range refs {
|
||||
@@ -137,9 +137,61 @@ func (r redfishSnapshotReader) getCollectionMembers(collectionPath string) ([]ma
|
||||
}
|
||||
out = append(out, doc)
|
||||
}
|
||||
if len(out) == 0 {
|
||||
return r.fallbackCollectionMembers(collectionPath, nil)
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func (r redfishSnapshotReader) fallbackCollectionMembers(collectionPath string, originalErr error) ([]map[string]interface{}, error) {
|
||||
prefix := strings.TrimSuffix(normalizeRedfishPath(collectionPath), "/") + "/"
|
||||
if prefix == "/" {
|
||||
if originalErr != nil {
|
||||
return nil, originalErr
|
||||
}
|
||||
return []map[string]interface{}{}, nil
|
||||
}
|
||||
paths := make([]string, 0)
|
||||
for key := range r.tree {
|
||||
p := normalizeRedfishPath(key)
|
||||
if !strings.HasPrefix(p, prefix) {
|
||||
continue
|
||||
}
|
||||
rest := strings.TrimPrefix(p, prefix)
|
||||
if rest == "" || strings.Contains(rest, "/") {
|
||||
continue
|
||||
}
|
||||
paths = append(paths, p)
|
||||
}
|
||||
if len(paths) == 0 {
|
||||
if originalErr != nil {
|
||||
return nil, originalErr
|
||||
}
|
||||
return []map[string]interface{}{}, nil
|
||||
}
|
||||
sort.Strings(paths)
|
||||
out := make([]map[string]interface{}, 0, len(paths))
|
||||
for _, p := range paths {
|
||||
doc, err := r.getJSON(p)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
out = append(out, doc)
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func cloneRawPayloads(src map[string]any) map[string]any {
|
||||
if len(src) == 0 {
|
||||
return nil
|
||||
}
|
||||
dst := make(map[string]any, len(src))
|
||||
for k, v := range src {
|
||||
dst[k] = v
|
||||
}
|
||||
return dst
|
||||
}
|
||||
|
||||
func (r redfishSnapshotReader) discoverMemberPaths(collectionPath, fallbackPath string) []string {
|
||||
collection, err := r.getJSON(collectionPath)
|
||||
if err == nil {
|
||||
@@ -482,7 +534,7 @@ func (r redfishSnapshotReader) collectPCIeDevices(systemPaths, chassisPaths []st
|
||||
for _, doc := range memberDocs {
|
||||
functionDocs := r.getLinkedPCIeFunctions(doc)
|
||||
dev := parsePCIeDevice(doc, functionDocs)
|
||||
key := firstNonEmpty(dev.SerialNumber, dev.BDF, dev.Slot+"|"+dev.DeviceClass)
|
||||
key := firstNonEmpty(dev.BDF, dev.SerialNumber, dev.Slot+"|"+dev.DeviceClass)
|
||||
if key == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -44,9 +44,9 @@ func TestRedfishConnectorCollect(t *testing.T) {
|
||||
},
|
||||
})
|
||||
register("/redfish/v1/Systems/1/Memory/DIMM1", map[string]interface{}{
|
||||
"Name": "DIMM A1",
|
||||
"CapacityMiB": 32768,
|
||||
"MemoryDeviceType": "DDR5",
|
||||
"Name": "DIMM A1",
|
||||
"CapacityMiB": 32768,
|
||||
"MemoryDeviceType": "DDR5",
|
||||
"OperatingSpeedMhz": 4800,
|
||||
"Status": map[string]interface{}{
|
||||
"Health": "OK",
|
||||
@@ -91,14 +91,14 @@ func TestRedfishConnectorCollect(t *testing.T) {
|
||||
},
|
||||
})
|
||||
register("/redfish/v1/Systems/1/PCIeFunctions/GPU1F0", map[string]interface{}{
|
||||
"FunctionId": "0000:65:00.0",
|
||||
"VendorId": "0x10DE",
|
||||
"DeviceId": "0x2331",
|
||||
"ClassCode": "0x030200",
|
||||
"CurrentLinkWidth": 16,
|
||||
"CurrentLinkSpeed": "16.0 GT/s",
|
||||
"MaxLinkWidth": 16,
|
||||
"MaxLinkSpeed": "16.0 GT/s",
|
||||
"FunctionId": "0000:65:00.0",
|
||||
"VendorId": "0x10DE",
|
||||
"DeviceId": "0x2331",
|
||||
"ClassCode": "0x030200",
|
||||
"CurrentLinkWidth": 16,
|
||||
"CurrentLinkSpeed": "16.0 GT/s",
|
||||
"MaxLinkWidth": 16,
|
||||
"MaxLinkSpeed": "16.0 GT/s",
|
||||
})
|
||||
register("/redfish/v1/Chassis/1/NetworkAdapters", map[string]interface{}{
|
||||
"Members": []map[string]string{
|
||||
@@ -239,6 +239,68 @@ func TestParsePCIeDeviceSlot_EmptyMapFallsBackToID(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestReplayRedfishFromRawPayloads_FallbackCollectionMembersByPrefix(t *testing.T) {
|
||||
raw := map[string]any{
|
||||
"redfish_tree": map[string]interface{}{
|
||||
"/redfish/v1": map[string]interface{}{
|
||||
"Systems": map[string]interface{}{"@odata.id": "/redfish/v1/Systems"},
|
||||
"Chassis": map[string]interface{}{"@odata.id": "/redfish/v1/Chassis"},
|
||||
"Managers": map[string]interface{}{"@odata.id": "/redfish/v1/Managers"},
|
||||
},
|
||||
"/redfish/v1/Systems": map[string]interface{}{
|
||||
"Members": []interface{}{
|
||||
map[string]interface{}{"@odata.id": "/redfish/v1/Systems/1"},
|
||||
},
|
||||
},
|
||||
"/redfish/v1/Systems/1": map[string]interface{}{
|
||||
"Manufacturer": "Supermicro",
|
||||
"Model": "SYS-TEST",
|
||||
"SerialNumber": "SYS123",
|
||||
},
|
||||
// Intentionally missing /redfish/v1/Systems/1/Processors collection.
|
||||
"/redfish/v1/Systems/1/Processors/CPU1": map[string]interface{}{
|
||||
"Id": "CPU1",
|
||||
"Model": "Xeon Gold",
|
||||
"TotalCores": 32,
|
||||
"TotalThreads": 64,
|
||||
},
|
||||
"/redfish/v1/Chassis": map[string]interface{}{
|
||||
"Members": []interface{}{
|
||||
map[string]interface{}{"@odata.id": "/redfish/v1/Chassis/1"},
|
||||
},
|
||||
},
|
||||
"/redfish/v1/Chassis/1": map[string]interface{}{
|
||||
"Id": "1",
|
||||
},
|
||||
"/redfish/v1/Managers": map[string]interface{}{
|
||||
"Members": []interface{}{
|
||||
map[string]interface{}{"@odata.id": "/redfish/v1/Managers/1"},
|
||||
},
|
||||
},
|
||||
"/redfish/v1/Managers/1": map[string]interface{}{
|
||||
"Id": "1",
|
||||
},
|
||||
},
|
||||
"redfish_fetch_errors": []map[string]interface{}{
|
||||
{"path": "/redfish/v1/Systems/1/Processors", "error": "status 500"},
|
||||
},
|
||||
}
|
||||
|
||||
got, err := ReplayRedfishFromRawPayloads(raw, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("replay failed: %v", err)
|
||||
}
|
||||
if got.Hardware == nil {
|
||||
t.Fatalf("expected hardware")
|
||||
}
|
||||
if len(got.Hardware.CPUs) != 1 {
|
||||
t.Fatalf("expected one CPU via prefix fallback, got %d", len(got.Hardware.CPUs))
|
||||
}
|
||||
if _, ok := got.RawPayloads["redfish_fetch_errors"]; !ok {
|
||||
t.Fatalf("expected raw payloads to preserve redfish_fetch_errors")
|
||||
}
|
||||
}
|
||||
|
||||
func TestEnrichNICFromPCIeFunctions(t *testing.T) {
|
||||
nic := parseNIC(map[string]interface{}{
|
||||
"Id": "1",
|
||||
@@ -333,7 +395,7 @@ func TestReplayCollectStorage_ProbesSupermicroNVMeDiskBayWhenCollectionEmpty(t *
|
||||
},
|
||||
"/redfish/v1/Chassis/NVMeSSD.0.Group.0.StorageBackplane/Drives": map[string]interface{}{
|
||||
"Members@odata.count": 0,
|
||||
"Members": []interface{}{},
|
||||
"Members": []interface{}{},
|
||||
},
|
||||
"/redfish/v1/Chassis/NVMeSSD.0.Group.0.StorageBackplane/Drives/Disk.Bay.0": map[string]interface{}{
|
||||
"Id": "Disk.Bay.0",
|
||||
|
||||
@@ -319,6 +319,11 @@ func (s *Server) handleGetConfig(w http.ResponseWriter, r *http.Request) {
|
||||
"target_host": result.TargetHost,
|
||||
"collected_at": result.CollectedAt,
|
||||
}
|
||||
if result.RawPayloads != nil {
|
||||
if fetchErrors, ok := result.RawPayloads["redfish_fetch_errors"]; ok {
|
||||
response["redfish_fetch_errors"] = fetchErrors
|
||||
}
|
||||
}
|
||||
|
||||
if result.Hardware == nil {
|
||||
response["hardware"] = map[string]interface{}{}
|
||||
|
||||
Reference in New Issue
Block a user