feat(collect): remove power-on/off, add skip-hung for Redfish collection

Remove power-on and power-off functionality from the Redfish collector;
keep host power-state detection and show a warning in the UI when the
host is powered off before collection starts.

Add a "Пропустить зависшие" (skip hung) button that lets the user abort
stuck Redfish collection phases without losing already-collected data.
Introduces a two-level context model in Collect(): the outer job context
covers the full lifecycle including replay; an inner collectCtx covers
snapshot, prefetch, and plan-B phases only. Closing the skipCh cancels
collectCtx immediately — aborts all in-flight HTTP requests and exits
plan-B loops — then replay runs on whatever rawTree was collected.

Signal path: UI → POST /api/collect/{id}/skip → JobManager.SkipJob()
→ close(skipCh) → goroutine in Collect() → cancelCollect().

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Mikhail Chusavitin
2026-04-10 13:12:38 +03:00
parent 7e9af89c46
commit 9df13327aa
14 changed files with 240 additions and 771 deletions

View File

@@ -24,6 +24,7 @@ func newCollectTestServer() (*Server, *httptest.Server) {
mux.HandleFunc("POST /api/collect", s.handleCollectStart)
mux.HandleFunc("GET /api/collect/{id}", s.handleCollectStatus)
mux.HandleFunc("POST /api/collect/{id}/cancel", s.handleCollectCancel)
mux.HandleFunc("POST /api/collect/{id}/skip", s.handleCollectSkip)
return s, httptest.NewServer(mux)
}
@@ -65,9 +66,6 @@ func TestCollectProbe(t *testing.T) {
if payload.HostPowerState != "Off" {
t.Fatalf("expected host power state Off, got %q", payload.HostPowerState)
}
if !payload.PowerControlAvailable {
t.Fatalf("expected power control to be available")
}
}
func TestCollectLifecycleToTerminal(t *testing.T) {

View File

@@ -26,12 +26,11 @@ func (c *mockConnector) Probe(ctx context.Context, req collector.Request) (*coll
hostPoweredOn = false
}
return &collector.ProbeResult{
Reachable: true,
Protocol: c.protocol,
HostPowerState: map[bool]string{true: "On", false: "Off"}[hostPoweredOn],
HostPoweredOn: hostPoweredOn,
PowerControlAvailable: true,
SystemPath: "/redfish/v1/Systems/1",
Reachable: true,
Protocol: c.protocol,
HostPowerState: map[bool]string{true: "On", false: "Off"}[hostPoweredOn],
HostPoweredOn: hostPoweredOn,
SystemPath: "/redfish/v1/Systems/1",
}, nil
}

View File

@@ -19,18 +19,15 @@ type CollectRequest struct {
Password string `json:"password,omitempty"`
Token string `json:"token,omitempty"`
TLSMode string `json:"tls_mode"`
PowerOnIfHostOff bool `json:"power_on_if_host_off,omitempty"`
StopHostAfterCollect bool `json:"stop_host_after_collect,omitempty"`
DebugPayloads bool `json:"debug_payloads,omitempty"`
DebugPayloads bool `json:"debug_payloads,omitempty"`
}
type CollectProbeResponse struct {
Reachable bool `json:"reachable"`
Protocol string `json:"protocol,omitempty"`
HostPowerState string `json:"host_power_state,omitempty"`
HostPoweredOn bool `json:"host_powered_on"`
PowerControlAvailable bool `json:"power_control_available"`
Message string `json:"message,omitempty"`
HostPowerState string `json:"host_power_state,omitempty"`
HostPoweredOn bool `json:"host_powered_on"`
Message string `json:"message,omitempty"`
}
type CollectJobResponse struct {
@@ -78,7 +75,8 @@ type Job struct {
CreatedAt time.Time
UpdatedAt time.Time
RequestMeta CollectRequestMeta
cancel func()
cancel func()
skipFn func()
}
type CollectModuleStatus struct {

View File

@@ -18,6 +18,7 @@ import (
"sort"
"strconv"
"strings"
"sync"
"sync/atomic"
"time"
@@ -1674,34 +1675,28 @@ func (s *Server) handleCollectProbe(w http.ResponseWriter, r *http.Request) {
message := "Связь с BMC установлена"
if result != nil {
switch {
case !result.HostPoweredOn && result.PowerControlAvailable:
message = "Связь с BMC установлена, host выключен. Можно включить перед сбором."
case !result.HostPoweredOn:
message = "Связь с BMC установлена, host выключен."
default:
message = "Связь с BMC установлена, host включен."
if result.HostPoweredOn {
message = "Связь с BMC установлена, host включён."
} else {
message = "Связь с BMC установлена, host выключен. Данные инвентаря могут быть неполными."
}
}
hostPowerState := ""
hostPoweredOn := false
powerControlAvailable := false
reachable := false
if result != nil {
reachable = result.Reachable
hostPowerState = strings.TrimSpace(result.HostPowerState)
hostPoweredOn = result.HostPoweredOn
powerControlAvailable = result.PowerControlAvailable
}
jsonResponse(w, CollectProbeResponse{
Reachable: reachable,
Protocol: req.Protocol,
HostPowerState: hostPowerState,
HostPoweredOn: hostPoweredOn,
PowerControlAvailable: powerControlAvailable,
Message: message,
Reachable: reachable,
Protocol: req.Protocol,
HostPowerState: hostPowerState,
HostPoweredOn: hostPoweredOn,
Message: message,
})
}
@@ -1737,6 +1732,22 @@ func (s *Server) handleCollectCancel(w http.ResponseWriter, r *http.Request) {
jsonResponse(w, job.toStatusResponse())
}
func (s *Server) handleCollectSkip(w http.ResponseWriter, r *http.Request) {
jobID := strings.TrimSpace(r.PathValue("id"))
if !isValidCollectJobID(jobID) {
jsonError(w, "Invalid collect job id", http.StatusBadRequest)
return
}
job, ok := s.jobManager.SkipJob(jobID)
if !ok {
jsonError(w, "Collect job not found", http.StatusNotFound)
return
}
jsonResponse(w, job.toStatusResponse())
}
func (s *Server) startCollectionJob(jobID string, req CollectRequest) {
ctx, cancel := context.WithCancel(context.Background())
if attached := s.jobManager.AttachJobCancel(jobID, cancel); !attached {
@@ -1744,6 +1755,11 @@ func (s *Server) startCollectionJob(jobID string, req CollectRequest) {
return
}
skipCh := make(chan struct{})
var skipOnce sync.Once
skipFn := func() { skipOnce.Do(func() { close(skipCh) }) }
s.jobManager.AttachJobSkip(jobID, skipFn)
go func() {
connector, ok := s.getCollector(req.Protocol)
if !ok {
@@ -1811,7 +1827,9 @@ func (s *Server) startCollectionJob(jobID string, req CollectRequest) {
}
}
result, err := connector.Collect(ctx, toCollectorRequest(req), emitProgress)
collectorReq := toCollectorRequest(req)
collectorReq.SkipHungCh = skipCh
result, err := connector.Collect(ctx, collectorReq, emitProgress)
if err != nil {
if ctx.Err() != nil {
return
@@ -2035,9 +2053,7 @@ func toCollectorRequest(req CollectRequest) collector.Request {
Password: req.Password,
Token: req.Token,
TLSMode: req.TLSMode,
PowerOnIfHostOff: req.PowerOnIfHostOff,
StopHostAfterCollect: req.StopHostAfterCollect,
DebugPayloads: req.DebugPayloads,
DebugPayloads: req.DebugPayloads,
}
}

View File

@@ -175,6 +175,43 @@ func (m *JobManager) UpdateJobDebugInfo(id string, info *CollectDebugInfo) (*Job
return cloned, true
}
func (m *JobManager) AttachJobSkip(id string, skipFn func()) bool {
m.mu.Lock()
defer m.mu.Unlock()
job, ok := m.jobs[id]
if !ok || job == nil || isTerminalCollectStatus(job.Status) {
return false
}
job.skipFn = skipFn
return true
}
func (m *JobManager) SkipJob(id string) (*Job, bool) {
m.mu.Lock()
job, ok := m.jobs[id]
if !ok || job == nil {
m.mu.Unlock()
return nil, false
}
if isTerminalCollectStatus(job.Status) {
cloned := cloneJob(job)
m.mu.Unlock()
return cloned, true
}
skipFn := job.skipFn
job.skipFn = nil
job.UpdatedAt = time.Now().UTC()
job.Logs = append(job.Logs, formatCollectLogLine(job.UpdatedAt, "Пропуск зависших запросов по команде пользователя"))
cloned := cloneJob(job)
m.mu.Unlock()
if skipFn != nil {
skipFn()
}
return cloned, true
}
func (m *JobManager) AttachJobCancel(id string, cancelFn context.CancelFunc) bool {
m.mu.Lock()
defer m.mu.Unlock()
@@ -229,5 +266,6 @@ func cloneJob(job *Job) *Job {
cloned.CurrentPhase = job.CurrentPhase
cloned.ETASeconds = job.ETASeconds
cloned.cancel = nil
cloned.skipFn = nil
return &cloned
}

View File

@@ -99,6 +99,7 @@ func (s *Server) setupRoutes() {
s.mux.HandleFunc("POST /api/collect/probe", s.handleCollectProbe)
s.mux.HandleFunc("GET /api/collect/{id}", s.handleCollectStatus)
s.mux.HandleFunc("POST /api/collect/{id}/cancel", s.handleCollectCancel)
s.mux.HandleFunc("POST /api/collect/{id}/skip", s.handleCollectSkip)
}
func (s *Server) Run() error {

View File

@@ -24,6 +24,7 @@ func newFlowTestServer() (*Server, *httptest.Server) {
mux.HandleFunc("POST /api/collect", s.handleCollectStart)
mux.HandleFunc("GET /api/collect/{id}", s.handleCollectStatus)
mux.HandleFunc("POST /api/collect/{id}/cancel", s.handleCollectCancel)
mux.HandleFunc("POST /api/collect/{id}/skip", s.handleCollectSkip)
return s, httptest.NewServer(mux)
}