feat: redesign collection UI + add StopHostAfterCollect + TCP ping pre-probe
- Single "Подключиться" button flow: probe first, then show collect options - Power management checkboxes: power on before / stop after collect - Modal confirmation when enabling shutdown on already-powered-on host - StopHostAfterCollect flag: host shuts down only when explicitly requested - TCP ping (10 attempts, min 3 successes) before Redfish probe - Debug payloads checkbox (Oem/Ami/Inventory/Crc, off by default) - Remove platform_config BIOS settings collection (unreliable on AMI) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -159,14 +159,11 @@ func (c *RedfishConnector) Collect(ctx context.Context, req Request, emit Progre
|
||||
|
||||
systemPaths := c.discoverMemberPaths(discoveryCtx, snapshotClient, req, baseURL, "/redfish/v1/Systems", "/redfish/v1/Systems/1")
|
||||
primarySystem := firstNonEmptyPath(systemPaths, "/redfish/v1/Systems/1")
|
||||
poweredOnByCollector := false
|
||||
if primarySystem != "" {
|
||||
if on, changed := c.ensureHostPowerForCollection(ctx, snapshotClient, req, baseURL, primarySystem, emit); on {
|
||||
poweredOnByCollector = changed
|
||||
}
|
||||
c.ensureHostPowerForCollection(ctx, snapshotClient, req, baseURL, primarySystem, emit)
|
||||
}
|
||||
defer func() {
|
||||
if !poweredOnByCollector || primarySystem == "" {
|
||||
if primarySystem == "" || !req.StopHostAfterCollect {
|
||||
return
|
||||
}
|
||||
shutdownCtx, cancel := context.WithTimeout(context.Background(), 45*time.Second)
|
||||
@@ -313,6 +310,10 @@ func (c *RedfishConnector) Collect(ctx context.Context, req Request, emit Progre
|
||||
}
|
||||
// Collect hardware event logs separately (not part of tree-walk to avoid bloat).
|
||||
rawLogEntries := c.collectRedfishLogEntries(withRedfishTelemetryPhase(ctx, "log_entries"), snapshotClient, req, baseURL, systemPaths, managerPaths)
|
||||
var debugPayloads map[string]any
|
||||
if req.DebugPayloads {
|
||||
debugPayloads = c.collectDebugPayloads(ctx, snapshotClient, req, baseURL, systemPaths)
|
||||
}
|
||||
rawPayloads := map[string]any{
|
||||
"redfish_tree": rawTree,
|
||||
"redfish_profiles": map[string]any{
|
||||
@@ -418,6 +419,9 @@ func (c *RedfishConnector) Collect(ctx context.Context, req Request, emit Progre
|
||||
if len(rawLogEntries) > 0 {
|
||||
rawPayloads["redfish_log_entries"] = rawLogEntries
|
||||
}
|
||||
if len(debugPayloads) > 0 {
|
||||
rawPayloads["redfish_debug_payloads"] = debugPayloads
|
||||
}
|
||||
// Unified tunnel: live collection and raw import go through the same analyzer over redfish_tree.
|
||||
result, err := ReplayRedfishFromRawPayloads(rawPayloads, nil)
|
||||
if err != nil {
|
||||
@@ -618,6 +622,20 @@ func (c *RedfishConnector) restoreHostPowerAfterCollection(ctx context.Context,
|
||||
}
|
||||
}
|
||||
|
||||
// collectDebugPayloads fetches vendor-specific diagnostic endpoints on a best-effort basis.
|
||||
// Results are stored in rawPayloads["redfish_debug_payloads"] and exported with the bundle.
|
||||
// Enabled only when Request.DebugPayloads is true.
|
||||
func (c *RedfishConnector) collectDebugPayloads(ctx context.Context, client *http.Client, req Request, baseURL string, systemPaths []string) map[string]any {
|
||||
out := map[string]any{}
|
||||
for _, systemPath := range systemPaths {
|
||||
// AMI/MSI: inventory CRC groups — reveals which groups are supported by this BMC.
|
||||
if doc, err := c.getJSON(ctx, client, req, baseURL, joinPath(systemPath, "/Oem/Ami/Inventory/Crc")); err == nil {
|
||||
out[joinPath(systemPath, "/Oem/Ami/Inventory/Crc")] = doc
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// invalidateRedfishInventory POSTs to the AMI/MSI InventoryCrc endpoint to zero out
|
||||
// all known CRC groups before a host power-on. This causes the BMC to accept fresh
|
||||
// inventory from the host after boot, preventing stale inventory (ghost GPUs, wrong
|
||||
@@ -630,8 +648,6 @@ func (c *RedfishConnector) invalidateRedfishInventory(ctx context.Context, clien
|
||||
{"CPU": 0},
|
||||
{"DIMM": 0},
|
||||
{"PCIE": 0},
|
||||
{"CERTIFICATES": 0},
|
||||
{"SECUREBOOT": 0},
|
||||
},
|
||||
}
|
||||
if err := c.postJSON(ctx, client, req, baseURL, crcPath, body); err != nil {
|
||||
@@ -5609,6 +5625,7 @@ func parseFirmware(system, bios, manager, networkProtocol map[string]interface{}
|
||||
return out
|
||||
}
|
||||
|
||||
|
||||
func mapStatus(statusAny interface{}) string {
|
||||
if statusAny == nil {
|
||||
return ""
|
||||
|
||||
@@ -123,7 +123,7 @@ func ReplayRedfishFromRawPayloads(rawPayloads map[string]any, emit ProgressFn) (
|
||||
PowerSupply: psus,
|
||||
NetworkAdapters: nics,
|
||||
Firmware: firmware,
|
||||
},
|
||||
},
|
||||
}
|
||||
match := profileMatch
|
||||
for _, profile := range match.Profiles {
|
||||
@@ -277,6 +277,7 @@ func redfishFetchErrorsFromRawPayloads(rawPayloads map[string]any) map[string]st
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
func buildDriveFetchWarningEvents(rawPayloads map[string]any) []models.Event {
|
||||
errs := redfishFetchErrorsFromRawPayloads(rawPayloads)
|
||||
if len(errs) == 0 {
|
||||
|
||||
@@ -15,7 +15,9 @@ type Request struct {
|
||||
Password string
|
||||
Token string
|
||||
TLSMode string
|
||||
PowerOnIfHostOff bool
|
||||
PowerOnIfHostOff bool
|
||||
StopHostAfterCollect bool
|
||||
DebugPayloads bool
|
||||
}
|
||||
|
||||
type Progress struct {
|
||||
|
||||
@@ -43,13 +43,13 @@ func ConvertToReanimator(result *models.AnalysisResult) (*ReanimatorExport, erro
|
||||
TargetHost: targetHost,
|
||||
CollectedAt: collectedAt,
|
||||
Hardware: ReanimatorHardware{
|
||||
Board: convertBoard(result.Hardware.BoardInfo),
|
||||
Firmware: dedupeFirmware(convertFirmware(result.Hardware.Firmware)),
|
||||
CPUs: dedupeCPUs(convertCPUsFromDevices(devices, collectedAt, result.Hardware.BoardInfo.SerialNumber, buildCPUMicrocodeBySocket(result.Hardware.Firmware))),
|
||||
Memory: dedupeMemory(convertMemoryFromDevices(devices, collectedAt)),
|
||||
Storage: dedupeStorage(convertStorageFromDevices(devices, collectedAt)),
|
||||
PCIeDevices: dedupePCIe(convertPCIeFromDevices(devices, collectedAt)),
|
||||
PowerSupplies: dedupePSUs(convertPSUsFromDevices(devices, collectedAt)),
|
||||
Board: convertBoard(result.Hardware.BoardInfo),
|
||||
Firmware: dedupeFirmware(convertFirmware(result.Hardware.Firmware)),
|
||||
CPUs: dedupeCPUs(convertCPUsFromDevices(devices, collectedAt, result.Hardware.BoardInfo.SerialNumber, buildCPUMicrocodeBySocket(result.Hardware.Firmware))),
|
||||
Memory: dedupeMemory(convertMemoryFromDevices(devices, collectedAt)),
|
||||
Storage: dedupeStorage(convertStorageFromDevices(devices, collectedAt)),
|
||||
PCIeDevices: dedupePCIe(convertPCIeFromDevices(devices, collectedAt)),
|
||||
PowerSupplies: dedupePSUs(convertPSUsFromDevices(devices, collectedAt)),
|
||||
Sensors: convertSensors(result.Sensors),
|
||||
EventLogs: convertEventLogs(result.Events, collectedAt),
|
||||
},
|
||||
|
||||
@@ -19,7 +19,9 @@ type CollectRequest struct {
|
||||
Password string `json:"password,omitempty"`
|
||||
Token string `json:"token,omitempty"`
|
||||
TLSMode string `json:"tls_mode"`
|
||||
PowerOnIfHostOff bool `json:"power_on_if_host_off,omitempty"`
|
||||
PowerOnIfHostOff bool `json:"power_on_if_host_off,omitempty"`
|
||||
StopHostAfterCollect bool `json:"stop_host_after_collect,omitempty"`
|
||||
DebugPayloads bool `json:"debug_payloads,omitempty"`
|
||||
}
|
||||
|
||||
type CollectProbeResponse struct {
|
||||
|
||||
@@ -10,8 +10,10 @@ import (
|
||||
"fmt"
|
||||
"html/template"
|
||||
"io"
|
||||
"net"
|
||||
"net/http"
|
||||
"os"
|
||||
"sync/atomic"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"sort"
|
||||
@@ -1574,6 +1576,32 @@ func (s *Server) handleCollectStart(w http.ResponseWriter, r *http.Request) {
|
||||
_ = json.NewEncoder(w).Encode(job.toJobResponse("Collection job accepted"))
|
||||
}
|
||||
|
||||
// pingHost dials host:port up to total times with 2s timeout each, returns true if
|
||||
// at least need attempts succeeded.
|
||||
func pingHost(host string, port int, total, need int) (bool, string) {
|
||||
addr := fmt.Sprintf("%s:%d", host, port)
|
||||
var successes atomic.Int32
|
||||
done := make(chan struct{}, total)
|
||||
for i := 0; i < total; i++ {
|
||||
go func() {
|
||||
defer func() { done <- struct{}{} }()
|
||||
conn, err := net.DialTimeout("tcp", addr, 2*time.Second)
|
||||
if err == nil {
|
||||
conn.Close()
|
||||
successes.Add(1)
|
||||
}
|
||||
}()
|
||||
}
|
||||
for i := 0; i < total; i++ {
|
||||
<-done
|
||||
}
|
||||
n := int(successes.Load())
|
||||
if n < need {
|
||||
return false, fmt.Sprintf("Хост недоступен: только %d из %d попыток подключения к %s прошли успешно (требуется минимум %d)", n, total, addr, need)
|
||||
}
|
||||
return true, ""
|
||||
}
|
||||
|
||||
func (s *Server) handleCollectProbe(w http.ResponseWriter, r *http.Request) {
|
||||
var req CollectRequest
|
||||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||
@@ -1595,6 +1623,11 @@ func (s *Server) handleCollectProbe(w http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
}
|
||||
|
||||
if ok, msg := pingHost(req.Host, req.Port, 10, 3); !ok {
|
||||
jsonError(w, msg, http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(r.Context(), 20*time.Second)
|
||||
defer cancel()
|
||||
|
||||
@@ -1967,7 +2000,9 @@ func toCollectorRequest(req CollectRequest) collector.Request {
|
||||
Password: req.Password,
|
||||
Token: req.Token,
|
||||
TLSMode: req.TLSMode,
|
||||
PowerOnIfHostOff: req.PowerOnIfHostOff,
|
||||
PowerOnIfHostOff: req.PowerOnIfHostOff,
|
||||
StopHostAfterCollect: req.StopHostAfterCollect,
|
||||
DebugPayloads: req.DebugPayloads,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user