Compare commits
4 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| f1621efee4 | |||
| 4461249cc3 | |||
| e609fbbc26 | |||
| cc2b49ea41 |
@@ -21,10 +21,11 @@ import (
|
||||
)
|
||||
|
||||
var (
|
||||
satExecCommand = exec.Command
|
||||
satLookPath = exec.LookPath
|
||||
satGlob = filepath.Glob
|
||||
satStat = os.Stat
|
||||
satExecCommand = exec.Command
|
||||
satLookPath = exec.LookPath
|
||||
satGlob = filepath.Glob
|
||||
satStat = os.Stat
|
||||
satFreeMemBytes = freeMemBytes
|
||||
|
||||
rocmSMIExecutableGlobs = []string{
|
||||
"/opt/rocm/bin/rocm-smi",
|
||||
@@ -262,6 +263,9 @@ func (s *System) ListNvidiaGPUs() ([]NvidiaGPU, error) {
|
||||
MemoryMB: memMB,
|
||||
})
|
||||
}
|
||||
sort.Slice(gpus, func(i, j int) bool {
|
||||
return gpus[i].Index < gpus[j].Index
|
||||
})
|
||||
return gpus, nil
|
||||
}
|
||||
|
||||
@@ -404,6 +408,25 @@ func resolveDCGMGPUIndices(gpuIndices []int) ([]int, error) {
|
||||
return all, nil
|
||||
}
|
||||
|
||||
func memoryStressSizeArg() string {
|
||||
if mb := envInt("BEE_VM_STRESS_SIZE_MB", 0); mb > 0 {
|
||||
return fmt.Sprintf("%dM", mb)
|
||||
}
|
||||
availBytes := satFreeMemBytes()
|
||||
if availBytes <= 0 {
|
||||
return "80%"
|
||||
}
|
||||
availMB := availBytes / (1024 * 1024)
|
||||
targetMB := (availMB * 2) / 3
|
||||
if targetMB >= 256 {
|
||||
targetMB = (targetMB / 256) * 256
|
||||
}
|
||||
if targetMB <= 0 {
|
||||
return "80%"
|
||||
}
|
||||
return fmt.Sprintf("%dM", targetMB)
|
||||
}
|
||||
|
||||
func (s *System) RunMemoryAcceptancePack(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
|
||||
sizeMB := envInt("BEE_MEMTESTER_SIZE_MB", 128)
|
||||
passes := envInt("BEE_MEMTESTER_PASSES", 1)
|
||||
@@ -419,11 +442,9 @@ func (s *System) RunMemoryStressPack(ctx context.Context, baseDir string, durati
|
||||
if seconds <= 0 {
|
||||
seconds = envInt("BEE_VM_STRESS_SECONDS", 300)
|
||||
}
|
||||
// Use 80% of RAM by default; override with BEE_VM_STRESS_SIZE_MB.
|
||||
sizeArg := "80%"
|
||||
if mb := envInt("BEE_VM_STRESS_SIZE_MB", 0); mb > 0 {
|
||||
sizeArg = fmt.Sprintf("%dM", mb)
|
||||
}
|
||||
// Base the default on current MemAvailable and keep headroom for the OS and
|
||||
// concurrent stressors so mixed burn runs do not trip the OOM killer.
|
||||
sizeArg := memoryStressSizeArg()
|
||||
return runAcceptancePackCtx(ctx, baseDir, "memory-stress", []satJob{
|
||||
{name: "01-free-before.log", cmd: []string{"free", "-h"}},
|
||||
{name: "02-stress-ng-vm.log", cmd: []string{
|
||||
|
||||
@@ -276,6 +276,37 @@ func TestEnvIntFallback(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemoryStressSizeArgUsesAvailableMemory(t *testing.T) {
|
||||
oldFreeMemBytes := satFreeMemBytes
|
||||
satFreeMemBytes = func() int64 { return 96 * 1024 * 1024 * 1024 }
|
||||
t.Cleanup(func() { satFreeMemBytes = oldFreeMemBytes })
|
||||
|
||||
if got := memoryStressSizeArg(); got != "65536M" {
|
||||
t.Fatalf("sizeArg=%q want 65536M", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemoryStressSizeArgRespectsOverride(t *testing.T) {
|
||||
oldFreeMemBytes := satFreeMemBytes
|
||||
satFreeMemBytes = func() int64 { return 96 * 1024 * 1024 * 1024 }
|
||||
t.Cleanup(func() { satFreeMemBytes = oldFreeMemBytes })
|
||||
t.Setenv("BEE_VM_STRESS_SIZE_MB", "4096")
|
||||
|
||||
if got := memoryStressSizeArg(); got != "4096M" {
|
||||
t.Fatalf("sizeArg=%q want 4096M", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemoryStressSizeArgFallsBackWhenFreeMemoryUnknown(t *testing.T) {
|
||||
oldFreeMemBytes := satFreeMemBytes
|
||||
satFreeMemBytes = func() int64 { return 0 }
|
||||
t.Cleanup(func() { satFreeMemBytes = oldFreeMemBytes })
|
||||
|
||||
if got := memoryStressSizeArg(); got != "80%" {
|
||||
t.Fatalf("sizeArg=%q want 80%%", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestClassifySATResult(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"bee/audit/internal/platform"
|
||||
@@ -52,6 +53,12 @@ var metricChartPalette = []string{
|
||||
"#ffbe5c",
|
||||
}
|
||||
|
||||
var gpuLabelCache struct {
|
||||
mu sync.Mutex
|
||||
loadedAt time.Time
|
||||
byIndex map[int]string
|
||||
}
|
||||
|
||||
func renderMetricChartSVG(title string, labels []string, times []time.Time, datasets [][]float64, names []string, yMin, yMax *float64, canvasHeight int, timeline []chartTimelineSegment) ([]byte, error) {
|
||||
pointCount := len(labels)
|
||||
if len(times) > pointCount {
|
||||
@@ -76,15 +83,7 @@ func renderMetricChartSVG(title string, labels []string, times []time.Time, data
|
||||
}
|
||||
}
|
||||
|
||||
mn, avg, mx := globalStats(datasets)
|
||||
if mx > 0 {
|
||||
title = fmt.Sprintf("%s ↓%s ~%s ↑%s",
|
||||
title,
|
||||
chartLegendNumber(mn),
|
||||
chartLegendNumber(avg),
|
||||
chartLegendNumber(mx),
|
||||
)
|
||||
}
|
||||
statsLabel := chartStatsLabel(datasets)
|
||||
|
||||
legendItems := []metricChartSeries{}
|
||||
for i, name := range names {
|
||||
@@ -106,7 +105,7 @@ func renderMetricChartSVG(title string, labels []string, times []time.Time, data
|
||||
|
||||
var b strings.Builder
|
||||
writeSVGOpen(&b, layout.Width, layout.Height)
|
||||
writeChartFrame(&b, title, layout.Width, layout.Height)
|
||||
writeChartFrame(&b, title, statsLabel, layout.Width, layout.Height)
|
||||
writeTimelineIdleSpans(&b, layout, start, end, timeline)
|
||||
writeVerticalGrid(&b, layout, times, pointCount, 8)
|
||||
writeHorizontalGrid(&b, layout, scale)
|
||||
@@ -126,21 +125,19 @@ func renderGPUOverviewChartSVG(idx int, samples []platform.LiveMetricSample, tim
|
||||
temp := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.TempC })
|
||||
power := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.PowerW })
|
||||
coreClock := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.ClockMHz })
|
||||
memClock := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.MemClockMHz })
|
||||
if temp == nil && power == nil && coreClock == nil && memClock == nil {
|
||||
if temp == nil && power == nil && coreClock == nil {
|
||||
return nil, false, nil
|
||||
}
|
||||
labels := sampleTimeLabels(samples)
|
||||
times := sampleTimes(samples)
|
||||
svg, err := drawGPUOverviewChartSVG(
|
||||
fmt.Sprintf("GPU %d Overview", idx),
|
||||
gpuDisplayLabel(idx)+" Overview",
|
||||
labels,
|
||||
times,
|
||||
[]metricChartSeries{
|
||||
{Name: "Temp C", Values: coalesceDataset(temp, len(labels)), Color: "#f05a5a", AxisTitle: "Temp C"},
|
||||
{Name: "Power W", Values: coalesceDataset(power, len(labels)), Color: "#ffb357", AxisTitle: "Power W"},
|
||||
{Name: "Core Clock MHz", Values: coalesceDataset(coreClock, len(labels)), Color: "#73bf69", AxisTitle: "Core MHz"},
|
||||
{Name: "Memory Clock MHz", Values: coalesceDataset(memClock, len(labels)), Color: "#5794f2", AxisTitle: "Memory MHz"},
|
||||
},
|
||||
timeline,
|
||||
)
|
||||
@@ -151,8 +148,8 @@ func renderGPUOverviewChartSVG(idx int, samples []platform.LiveMetricSample, tim
|
||||
}
|
||||
|
||||
func drawGPUOverviewChartSVG(title string, labels []string, times []time.Time, series []metricChartSeries, timeline []chartTimelineSegment) ([]byte, error) {
|
||||
if len(series) != 4 {
|
||||
return nil, fmt.Errorf("gpu overview requires 4 series, got %d", len(series))
|
||||
if len(series) != 3 {
|
||||
return nil, fmt.Errorf("gpu overview requires 3 series, got %d", len(series))
|
||||
}
|
||||
const (
|
||||
width = 1400
|
||||
@@ -166,7 +163,6 @@ func drawGPUOverviewChartSVG(title string, labels []string, times []time.Time, s
|
||||
leftOuterAxis = 72
|
||||
leftInnerAxis = 132
|
||||
rightInnerAxis = 1268
|
||||
rightOuterAxis = 1328
|
||||
)
|
||||
layout := chartLayout{
|
||||
Width: width,
|
||||
@@ -176,7 +172,7 @@ func drawGPUOverviewChartSVG(title string, labels []string, times []time.Time, s
|
||||
PlotTop: plotTop,
|
||||
PlotBottom: plotBottom,
|
||||
}
|
||||
axisX := []int{leftOuterAxis, leftInnerAxis, rightInnerAxis, rightOuterAxis}
|
||||
axisX := []int{leftOuterAxis, leftInnerAxis, rightInnerAxis}
|
||||
pointCount := len(labels)
|
||||
if len(times) > pointCount {
|
||||
pointCount = len(times)
|
||||
@@ -214,7 +210,7 @@ func drawGPUOverviewChartSVG(title string, labels []string, times []time.Time, s
|
||||
|
||||
var b strings.Builder
|
||||
writeSVGOpen(&b, width, height)
|
||||
writeChartFrame(&b, title, width, height)
|
||||
writeChartFrame(&b, title, "", width, height)
|
||||
writeTimelineIdleSpans(&b, layout, start, end, timeline)
|
||||
writeVerticalGrid(&b, layout, times, pointCount, 8)
|
||||
writeHorizontalGrid(&b, layout, scales[0])
|
||||
@@ -457,10 +453,14 @@ func writeSVGClose(b *strings.Builder) {
|
||||
b.WriteString("</svg>\n")
|
||||
}
|
||||
|
||||
func writeChartFrame(b *strings.Builder, title string, width, height int) {
|
||||
func writeChartFrame(b *strings.Builder, title, subtitle string, width, height int) {
|
||||
fmt.Fprintf(b, `<rect width="%d" height="%d" rx="10" ry="10" fill="#ffffff" stroke="#d7e0ea"/>`+"\n", width, height)
|
||||
fmt.Fprintf(b, `<text x="%d" y="30" text-anchor="middle" font-family="sans-serif" font-size="16" font-weight="700" fill="#1f2937">%s</text>`+"\n",
|
||||
width/2, sanitizeChartText(title))
|
||||
if strings.TrimSpace(subtitle) != "" {
|
||||
fmt.Fprintf(b, `<text x="%d" y="50" text-anchor="middle" font-family="sans-serif" font-size="12" font-weight="600" fill="#64748b">%s</text>`+"\n",
|
||||
width/2, sanitizeChartText(subtitle))
|
||||
}
|
||||
}
|
||||
|
||||
func writePlotBorder(b *strings.Builder, layout chartLayout) {
|
||||
@@ -545,7 +545,21 @@ func writeSeriesPolyline(b *strings.Builder, layout chartLayout, times []time.Ti
|
||||
x := chartXForTime(chartPointTime(times, 0), start, end, layout.PlotLeft, layout.PlotRight)
|
||||
y := chartYForValue(values[0], scale, layout.PlotTop, layout.PlotBottom)
|
||||
fmt.Fprintf(b, `<circle cx="%.1f" cy="%.1f" r="3.5" fill="%s"/>`+"\n", x, y, color)
|
||||
return
|
||||
}
|
||||
peakIdx := 0
|
||||
peakValue := values[0]
|
||||
for idx, value := range values[1:] {
|
||||
if value >= peakValue {
|
||||
peakIdx = idx + 1
|
||||
peakValue = value
|
||||
}
|
||||
}
|
||||
x := chartXForTime(chartPointTime(times, peakIdx), start, end, layout.PlotLeft, layout.PlotRight)
|
||||
y := chartYForValue(peakValue, scale, layout.PlotTop, layout.PlotBottom)
|
||||
fmt.Fprintf(b, `<circle cx="%.1f" cy="%.1f" r="4.2" fill="%s" stroke="#ffffff" stroke-width="1.6"/>`+"\n", x, y, color)
|
||||
fmt.Fprintf(b, `<path d="M %.1f %.1f L %.1f %.1f L %.1f %.1f Z" fill="%s" opacity="0.9"/>`+"\n",
|
||||
x, y-10, x-5, y-18, x+5, y-18, color)
|
||||
}
|
||||
|
||||
func writeLegend(b *strings.Builder, layout chartLayout, series []metricChartSeries) {
|
||||
@@ -711,3 +725,49 @@ func valueClamp(value float64, scale chartScale) float64 {
|
||||
}
|
||||
return value
|
||||
}
|
||||
|
||||
func chartStatsLabel(datasets [][]float64) string {
|
||||
mn, avg, mx := globalStats(datasets)
|
||||
if mx <= 0 && avg <= 0 && mn <= 0 {
|
||||
return ""
|
||||
}
|
||||
return fmt.Sprintf("min %s avg %s max %s",
|
||||
chartLegendNumber(mn),
|
||||
chartLegendNumber(avg),
|
||||
chartLegendNumber(mx),
|
||||
)
|
||||
}
|
||||
|
||||
func gpuDisplayLabel(idx int) string {
|
||||
if name := gpuModelNameByIndex(idx); name != "" {
|
||||
return fmt.Sprintf("GPU %d — %s", idx, name)
|
||||
}
|
||||
return fmt.Sprintf("GPU %d", idx)
|
||||
}
|
||||
|
||||
func gpuModelNameByIndex(idx int) string {
|
||||
now := time.Now()
|
||||
gpuLabelCache.mu.Lock()
|
||||
if now.Sub(gpuLabelCache.loadedAt) > 30*time.Second || gpuLabelCache.byIndex == nil {
|
||||
gpuLabelCache.loadedAt = now
|
||||
gpuLabelCache.byIndex = loadGPUModelNames()
|
||||
}
|
||||
name := strings.TrimSpace(gpuLabelCache.byIndex[idx])
|
||||
gpuLabelCache.mu.Unlock()
|
||||
return name
|
||||
}
|
||||
|
||||
func loadGPUModelNames() map[int]string {
|
||||
out := map[int]string{}
|
||||
gpus, err := platform.New().ListNvidiaGPUs()
|
||||
if err != nil {
|
||||
return out
|
||||
}
|
||||
for _, gpu := range gpus {
|
||||
name := strings.TrimSpace(gpu.Name)
|
||||
if name != "" {
|
||||
out[gpu.Index] = name
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
@@ -9,13 +9,14 @@ import (
|
||||
|
||||
// jobState holds the output lines and completion status of an async job.
|
||||
type jobState struct {
|
||||
lines []string
|
||||
done bool
|
||||
err string
|
||||
mu sync.Mutex
|
||||
subs []chan string
|
||||
cancel func() // optional cancel function; nil if job is not cancellable
|
||||
logPath string
|
||||
lines []string
|
||||
done bool
|
||||
err string
|
||||
mu sync.Mutex
|
||||
subs []chan string
|
||||
cancel func() // optional cancel function; nil if job is not cancellable
|
||||
logPath string
|
||||
serialPrefix string
|
||||
}
|
||||
|
||||
// abort cancels the job if it has a cancel function and is not yet done.
|
||||
@@ -36,6 +37,9 @@ func (j *jobState) append(line string) {
|
||||
if j.logPath != "" {
|
||||
appendJobLog(j.logPath, line)
|
||||
}
|
||||
if j.serialPrefix != "" {
|
||||
taskSerialWriteLine(j.serialPrefix + line)
|
||||
}
|
||||
for _, ch := range j.subs {
|
||||
select {
|
||||
case ch <- line:
|
||||
@@ -107,8 +111,11 @@ func (m *jobManager) get(id string) (*jobState, bool) {
|
||||
return j, ok
|
||||
}
|
||||
|
||||
func newTaskJobState(logPath string) *jobState {
|
||||
func newTaskJobState(logPath string, serialPrefix ...string) *jobState {
|
||||
j := &jobState{logPath: logPath}
|
||||
if len(serialPrefix) > 0 {
|
||||
j.serialPrefix = serialPrefix[0]
|
||||
}
|
||||
if logPath == "" {
|
||||
return j
|
||||
}
|
||||
|
||||
@@ -22,6 +22,13 @@ type MetricsDB struct {
|
||||
db *sql.DB
|
||||
}
|
||||
|
||||
func (m *MetricsDB) Close() error {
|
||||
if m == nil || m.db == nil {
|
||||
return nil
|
||||
}
|
||||
return m.db.Close()
|
||||
}
|
||||
|
||||
// openMetricsDB opens (or creates) the metrics database at the given path.
|
||||
func openMetricsDB(path string) (*MetricsDB, error) {
|
||||
if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
|
||||
@@ -164,6 +171,23 @@ func (m *MetricsDB) LoadAll() ([]platform.LiveMetricSample, error) {
|
||||
return m.loadSamples(`SELECT ts,cpu_load_pct,mem_load_pct,power_w FROM sys_metrics ORDER BY ts`, nil)
|
||||
}
|
||||
|
||||
// LoadBetween returns samples in chronological order within the given time window.
|
||||
func (m *MetricsDB) LoadBetween(start, end time.Time) ([]platform.LiveMetricSample, error) {
|
||||
if m == nil {
|
||||
return nil, nil
|
||||
}
|
||||
if start.IsZero() || end.IsZero() {
|
||||
return nil, nil
|
||||
}
|
||||
if end.Before(start) {
|
||||
start, end = end, start
|
||||
}
|
||||
return m.loadSamples(
|
||||
`SELECT ts,cpu_load_pct,mem_load_pct,power_w FROM sys_metrics WHERE ts>=? AND ts<=? ORDER BY ts`,
|
||||
start.Unix(), end.Unix(),
|
||||
)
|
||||
}
|
||||
|
||||
// loadSamples reconstructs LiveMetricSample rows from the normalized tables.
|
||||
func (m *MetricsDB) loadSamples(query string, args ...any) ([]platform.LiveMetricSample, error) {
|
||||
rows, err := m.db.Query(query, args...)
|
||||
@@ -364,9 +388,6 @@ func (m *MetricsDB) ExportCSV(w io.Writer) error {
|
||||
return cw.Error()
|
||||
}
|
||||
|
||||
// Close closes the database.
|
||||
func (m *MetricsDB) Close() { _ = m.db.Close() }
|
||||
|
||||
func nullFloat(v float64) sql.NullFloat64 {
|
||||
return sql.NullFloat64{Float64: v, Valid: true}
|
||||
}
|
||||
|
||||
@@ -143,3 +143,32 @@ CREATE TABLE temp_metrics (
|
||||
t.Fatalf("MemClockMHz=%v want 2600", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMetricsDBLoadBetweenFiltersWindow(t *testing.T) {
|
||||
db, err := openMetricsDB(filepath.Join(t.TempDir(), "metrics.db"))
|
||||
if err != nil {
|
||||
t.Fatalf("openMetricsDB: %v", err)
|
||||
}
|
||||
defer db.Close()
|
||||
|
||||
base := time.Unix(1_700_000_000, 0).UTC()
|
||||
for i := 0; i < 5; i++ {
|
||||
if err := db.Write(platform.LiveMetricSample{
|
||||
Timestamp: base.Add(time.Duration(i) * time.Minute),
|
||||
CPULoadPct: float64(i),
|
||||
}); err != nil {
|
||||
t.Fatalf("Write(%d): %v", i, err)
|
||||
}
|
||||
}
|
||||
|
||||
got, err := db.LoadBetween(base.Add(1*time.Minute), base.Add(3*time.Minute))
|
||||
if err != nil {
|
||||
t.Fatalf("LoadBetween: %v", err)
|
||||
}
|
||||
if len(got) != 3 {
|
||||
t.Fatalf("LoadBetween len=%d want 3", len(got))
|
||||
}
|
||||
if !got[0].Timestamp.Equal(base.Add(1*time.Minute)) || !got[2].Timestamp.Equal(base.Add(3*time.Minute)) {
|
||||
t.Fatalf("window=%v..%v", got[0].Timestamp, got[2].Timestamp)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -834,12 +834,6 @@ func renderMetrics() string {
|
||||
<img id="chart-gpu-all-clock" data-chart-refresh="1" src="/api/metrics/chart/gpu-all-clock.svg" style="width:100%;display:block;border-radius:6px" alt="GPU core clock">
|
||||
</div>
|
||||
</div>
|
||||
<div class="card" style="margin-bottom:16px">
|
||||
<div class="card-head">GPU — Memory Clock</div>
|
||||
<div class="card-body" style="padding:8px">
|
||||
<img id="chart-gpu-all-memclock" data-chart-refresh="1" src="/api/metrics/chart/gpu-all-memclock.svg" style="width:100%;display:block;border-radius:6px" alt="GPU memory clock">
|
||||
</div>
|
||||
</div>
|
||||
<div class="card" style="margin-bottom:16px">
|
||||
<div class="card-head">GPU — Power</div>
|
||||
<div class="card-body" style="padding:8px">
|
||||
@@ -860,6 +854,35 @@ func renderMetrics() string {
|
||||
<script>
|
||||
let gpuChartKey = '';
|
||||
const gpuChartModeStorageKey = 'bee.metrics.gpuChartMode';
|
||||
let metricsNvidiaGPUsPromise = null;
|
||||
|
||||
function loadMetricsNvidiaGPUs() {
|
||||
if (!metricsNvidiaGPUsPromise) {
|
||||
metricsNvidiaGPUsPromise = fetch('/api/gpu/nvidia')
|
||||
.then(function(r) {
|
||||
if (!r.ok) throw new Error('Failed to load NVIDIA GPUs.');
|
||||
return r.json();
|
||||
})
|
||||
.then(function(list) { return Array.isArray(list) ? list : []; })
|
||||
.catch(function() { return []; });
|
||||
}
|
||||
return metricsNvidiaGPUsPromise;
|
||||
}
|
||||
|
||||
function metricsGPUNameMap(list) {
|
||||
const out = {};
|
||||
(list || []).forEach(function(gpu) {
|
||||
const idx = Number(gpu.index);
|
||||
if (!Number.isFinite(idx) || !gpu.name) return;
|
||||
out[idx] = gpu.name;
|
||||
});
|
||||
return out;
|
||||
}
|
||||
|
||||
function metricsGPUDisplayLabel(idx, names) {
|
||||
const name = names && names[idx];
|
||||
return name ? ('GPU ' + idx + ' — ' + name) : ('GPU ' + idx);
|
||||
}
|
||||
|
||||
function loadGPUChartModePreference() {
|
||||
try {
|
||||
@@ -909,14 +932,15 @@ function gpuIndices(rows) {
|
||||
return out.sort(function(a, b) { return a - b; });
|
||||
}
|
||||
|
||||
function renderGPUOverviewCards(indices) {
|
||||
function renderGPUOverviewCards(indices, names) {
|
||||
const host = document.getElementById('gpu-metrics-by-gpu');
|
||||
if (!host) return;
|
||||
host.innerHTML = indices.map(function(idx) {
|
||||
const label = metricsGPUDisplayLabel(idx, names);
|
||||
return '<div class="card" style="margin-bottom:16px">' +
|
||||
'<div class="card-head">GPU ' + idx + ' — Overview</div>' +
|
||||
'<div class="card-head">' + label + ' — Overview</div>' +
|
||||
'<div class="card-body" style="padding:8px">' +
|
||||
'<img id="chart-gpu-' + idx + '-overview" data-chart-refresh="1" src="/api/metrics/chart/gpu/' + idx + '-overview.svg" style="width:100%;display:block;border-radius:6px" alt="GPU ' + idx + ' overview">' +
|
||||
'<img id="chart-gpu-' + idx + '-overview" data-chart-refresh="1" src="/api/metrics/chart/gpu/' + idx + '-overview.svg" style="width:100%;display:block;border-radius:6px" alt="' + label + ' overview">' +
|
||||
'</div></div>';
|
||||
}).join('');
|
||||
}
|
||||
@@ -936,18 +960,21 @@ function syncMetricsLayout(d) {
|
||||
const section = document.getElementById('gpu-metrics-section');
|
||||
const summary = document.getElementById('gpu-metrics-summary');
|
||||
const indices = gpuIndices(d.gpus);
|
||||
if (section) section.style.display = indices.length > 0 ? '' : 'none';
|
||||
if (summary) {
|
||||
summary.textContent = indices.length > 0
|
||||
? ('Detected GPUs: ' + indices.map(function(idx) { return 'GPU ' + idx; }).join(', '))
|
||||
: 'No GPUs detected in live metrics.';
|
||||
}
|
||||
const nextKey = indices.join(',');
|
||||
if (nextKey !== gpuChartKey) {
|
||||
renderGPUOverviewCards(indices);
|
||||
gpuChartKey = nextKey;
|
||||
}
|
||||
applyGPUChartMode();
|
||||
loadMetricsNvidiaGPUs().then(function(gpus) {
|
||||
const names = metricsGPUNameMap(gpus);
|
||||
if (section) section.style.display = indices.length > 0 ? '' : 'none';
|
||||
if (summary) {
|
||||
summary.textContent = indices.length > 0
|
||||
? ('Detected GPUs: ' + indices.map(function(idx) { return metricsGPUDisplayLabel(idx, names); }).join(', '))
|
||||
: 'No GPUs detected in live metrics.';
|
||||
}
|
||||
const nextKey = indices.join(',') + '|' + indices.map(function(idx) { return names[idx] || ''; }).join(',');
|
||||
if (nextKey !== gpuChartKey) {
|
||||
renderGPUOverviewCards(indices, names);
|
||||
gpuChartKey = nextKey;
|
||||
}
|
||||
applyGPUChartMode();
|
||||
});
|
||||
}
|
||||
|
||||
function loadMetricsLayout() {
|
||||
@@ -1029,17 +1056,17 @@ func renderValidate(opts HandlerOptions) string {
|
||||
`</div>
|
||||
<div style="height:1px;background:var(--border);margin:16px 0"></div>
|
||||
<div class="grid3">
|
||||
` + renderSATCard("nvidia", "NVIDIA GPU", "runSAT('nvidia')", "", renderValidateCardBody(
|
||||
` + renderSATCard("nvidia", "NVIDIA GPU", "runNvidiaValidateSet('nvidia')", "", renderValidateCardBody(
|
||||
inv.NVIDIA,
|
||||
`Runs NVIDIA diagnostics and board inventory checks.`,
|
||||
`<code>nvidia-smi</code>, <code>dmidecode</code>, <code>dcgmi diag</code>`,
|
||||
`Diag level is taken from Validate Profile.`,
|
||||
`Runs one GPU at a time. Diag level is taken from Validate Profile.`,
|
||||
)) +
|
||||
renderSATCard("nvidia-targeted-stress", "NVIDIA GPU Targeted Stress", "runSAT('nvidia-targeted-stress')", "", renderValidateCardBody(
|
||||
renderSATCard("nvidia-targeted-stress", "NVIDIA GPU Targeted Stress", "runNvidiaValidateSet('nvidia-targeted-stress')", "", renderValidateCardBody(
|
||||
inv.NVIDIA,
|
||||
`Runs a controlled NVIDIA DCGM load in Validate to check stability under moderate stress.`,
|
||||
`<code>dcgmi diag targeted_stress</code>`,
|
||||
`Uses the fixed DCGM targeted stress recipe.`,
|
||||
`Runs one GPU at a time with the fixed DCGM targeted stress recipe.`,
|
||||
)) +
|
||||
`</div>
|
||||
<div class="grid3" style="margin-top:16px">
|
||||
@@ -1077,17 +1104,37 @@ function satCPUDurationFromDiagLevel() {
|
||||
function satLabels() {
|
||||
return {nvidia:'Validate GPU', 'nvidia-targeted-stress':'NVIDIA Targeted Stress Validate (dcgmi diag targeted_stress)', memory:'Validate Memory', storage:'Validate Storage', cpu:'Validate CPU', amd:'Validate AMD GPU', 'amd-mem':'AMD GPU MEM Integrity', 'amd-bandwidth':'AMD GPU MEM Bandwidth'};
|
||||
}
|
||||
function satRequestBody(target) {
|
||||
let satNvidiaGPUsPromise = null;
|
||||
function loadSatNvidiaGPUs() {
|
||||
if (!satNvidiaGPUsPromise) {
|
||||
satNvidiaGPUsPromise = fetch('/api/gpu/nvidia')
|
||||
.then(r => {
|
||||
if (!r.ok) throw new Error('Failed to load NVIDIA GPUs.');
|
||||
return r.json();
|
||||
})
|
||||
.then(list => Array.isArray(list) ? list : []);
|
||||
}
|
||||
return satNvidiaGPUsPromise;
|
||||
}
|
||||
function satGPUDisplayName(gpu) {
|
||||
const idx = (gpu && Number.isFinite(Number(gpu.index))) ? Number(gpu.index) : 0;
|
||||
const name = gpu && gpu.name ? gpu.name : ('GPU ' + idx);
|
||||
return 'GPU ' + idx + ' — ' + name;
|
||||
}
|
||||
function satRequestBody(target, overrides) {
|
||||
const body = {};
|
||||
const labels = satLabels();
|
||||
body.display_name = labels[target] || ('Validate ' + target);
|
||||
if (target === 'nvidia') body.diag_level = satDiagLevel();
|
||||
if (target === 'nvidia-targeted-stress') body.duration = 300;
|
||||
if (target === 'cpu') body.duration = satCPUDurationFromDiagLevel();
|
||||
if (overrides) {
|
||||
Object.keys(overrides).forEach(key => { body[key] = overrides[key]; });
|
||||
}
|
||||
return body;
|
||||
}
|
||||
function enqueueSATTarget(target) {
|
||||
return fetch('/api/sat/'+target+'/run', {method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify(satRequestBody(target))})
|
||||
function enqueueSATTarget(target, overrides) {
|
||||
return fetch('/api/sat/'+target+'/run', {method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify(satRequestBody(target, overrides))})
|
||||
.then(r => r.json());
|
||||
}
|
||||
function selectedAMDValidateTargets() {
|
||||
@@ -1101,12 +1148,15 @@ function selectedAMDValidateTargets() {
|
||||
return targets;
|
||||
}
|
||||
function runSAT(target) {
|
||||
return runSATWithOverrides(target, null);
|
||||
}
|
||||
function runSATWithOverrides(target, overrides) {
|
||||
if (satES) { satES.close(); satES = null; }
|
||||
document.getElementById('sat-output').style.display='block';
|
||||
document.getElementById('sat-title').textContent = '— ' + target;
|
||||
const term = document.getElementById('sat-terminal');
|
||||
term.textContent = 'Enqueuing ' + target + ' test...\n';
|
||||
return enqueueSATTarget(target)
|
||||
return enqueueSATTarget(target, overrides)
|
||||
.then(d => {
|
||||
term.textContent += 'Task ' + d.task_id + ' queued. Streaming log...\n';
|
||||
satES = new EventSource('/api/tasks/'+d.task_id+'/stream');
|
||||
@@ -1114,6 +1164,55 @@ function runSAT(target) {
|
||||
satES.addEventListener('done', e => { satES.close(); satES=null; term.textContent += (e.data ? '\nERROR: '+e.data : '\nCompleted.')+'\n'; });
|
||||
});
|
||||
}
|
||||
function expandSATTarget(target) {
|
||||
if (target !== 'nvidia' && target !== 'nvidia-targeted-stress') {
|
||||
return Promise.resolve([{target: target}]);
|
||||
}
|
||||
return loadSatNvidiaGPUs().then(gpus => gpus.map(gpu => ({
|
||||
target: target,
|
||||
overrides: {
|
||||
gpu_indices: [Number(gpu.index)],
|
||||
display_name: (satLabels()[target] || ('Validate ' + target)) + ' (' + satGPUDisplayName(gpu) + ')'
|
||||
},
|
||||
label: satGPUDisplayName(gpu)
|
||||
})));
|
||||
}
|
||||
function runNvidiaValidateSet(target) {
|
||||
return loadSatNvidiaGPUs().then(gpus => {
|
||||
if (!gpus.length) return;
|
||||
if (gpus.length === 1) {
|
||||
const gpu = gpus[0];
|
||||
return runSATWithOverrides(target, {
|
||||
gpu_indices: [Number(gpu.index)],
|
||||
display_name: (satLabels()[target] || ('Validate ' + target)) + ' (' + satGPUDisplayName(gpu) + ')'
|
||||
});
|
||||
}
|
||||
if (satES) { satES.close(); satES = null; }
|
||||
document.getElementById('sat-output').style.display='block';
|
||||
document.getElementById('sat-title').textContent = '— ' + target;
|
||||
const term = document.getElementById('sat-terminal');
|
||||
term.textContent = 'Enqueuing ' + target + ' tests one GPU at a time...\n';
|
||||
const labelBase = satLabels()[target] || ('Validate ' + target);
|
||||
const enqueueNext = (idx) => {
|
||||
if (idx >= gpus.length) return;
|
||||
const gpu = gpus[idx];
|
||||
const gpuLabel = satGPUDisplayName(gpu);
|
||||
enqueueSATTarget(target, {
|
||||
gpu_indices: [Number(gpu.index)],
|
||||
display_name: labelBase + ' (' + gpuLabel + ')'
|
||||
}).then(d => {
|
||||
term.textContent += 'Task ' + d.task_id + ' queued for ' + gpuLabel + '.\n';
|
||||
if (idx === gpus.length - 1) {
|
||||
satES = new EventSource('/api/tasks/' + d.task_id + '/stream');
|
||||
satES.onmessage = e => { term.textContent += e.data+'\n'; term.scrollTop=term.scrollHeight; };
|
||||
satES.addEventListener('done', e => { satES.close(); satES=null; term.textContent += (e.data ? '\nERROR: '+e.data : '\nCompleted.')+'\n'; });
|
||||
}
|
||||
enqueueNext(idx + 1);
|
||||
});
|
||||
};
|
||||
enqueueNext(0);
|
||||
});
|
||||
}
|
||||
function runAMDValidateSet() {
|
||||
const targets = selectedAMDValidateTargets();
|
||||
if (!targets.length) return;
|
||||
@@ -1142,25 +1241,38 @@ function runAMDValidateSet() {
|
||||
}
|
||||
function runAllSAT() {
|
||||
const cycles = Math.max(1, parseInt(document.getElementById('sat-cycles').value)||1);
|
||||
const targets = ['nvidia','nvidia-targeted-stress','memory','storage','cpu'].concat(selectedAMDValidateTargets());
|
||||
const total = targets.length * cycles;
|
||||
let enqueued = 0;
|
||||
const status = document.getElementById('sat-all-status');
|
||||
status.textContent = 'Enqueuing...';
|
||||
const enqueueNext = (cycle, idx) => {
|
||||
if (cycle >= cycles) { status.textContent = 'Enqueued '+total+' tasks.'; return; }
|
||||
if (idx >= targets.length) { enqueueNext(cycle+1, 0); return; }
|
||||
const target = targets[idx];
|
||||
const baseTargets = ['nvidia','nvidia-targeted-stress','memory','storage','cpu'].concat(selectedAMDValidateTargets());
|
||||
const activeTargets = baseTargets.filter(target => {
|
||||
const btn = document.getElementById('sat-btn-' + target);
|
||||
if (btn && btn.disabled) { enqueueNext(cycle, idx+1); return; }
|
||||
enqueueSATTarget(target)
|
||||
.then(()=>{
|
||||
enqueued++;
|
||||
status.textContent = 'Enqueued '+enqueued+'/'+total+'...';
|
||||
enqueueNext(cycle, idx+1);
|
||||
});
|
||||
};
|
||||
enqueueNext(0, 0);
|
||||
return !(btn && btn.disabled);
|
||||
});
|
||||
Promise.all(activeTargets.map(expandSATTarget)).then(groups => {
|
||||
const expanded = [];
|
||||
for (let cycle = 0; cycle < cycles; cycle++) {
|
||||
groups.forEach(group => group.forEach(item => expanded.push(item)));
|
||||
}
|
||||
const total = expanded.length;
|
||||
let enqueued = 0;
|
||||
if (!total) {
|
||||
status.textContent = 'No tasks selected.';
|
||||
return;
|
||||
}
|
||||
const enqueueNext = (idx) => {
|
||||
if (idx >= expanded.length) { status.textContent = 'Enqueued ' + total + ' tasks.'; return; }
|
||||
const item = expanded[idx];
|
||||
enqueueSATTarget(item.target, item.overrides)
|
||||
.then(() => {
|
||||
enqueued++;
|
||||
status.textContent = 'Enqueued ' + enqueued + '/' + total + '...';
|
||||
enqueueNext(idx + 1);
|
||||
});
|
||||
};
|
||||
enqueueNext(0);
|
||||
}).catch(err => {
|
||||
status.textContent = 'Error: ' + err.message;
|
||||
});
|
||||
}
|
||||
</script>
|
||||
<script>
|
||||
@@ -2179,29 +2291,57 @@ function usbRefresh() {
|
||||
'<td>'+label+'</td>' +
|
||||
'<td style="font-size:12px;color:var(--muted)">'+model+'</td>' +
|
||||
'<td style="white-space:nowrap">' +
|
||||
'<button class="btn btn-sm btn-primary" onclick="usbExport(\'audit\','+JSON.stringify(t)+')">Audit JSON</button> ' +
|
||||
'<button class="btn btn-sm btn-secondary" onclick="usbExport(\'bundle\','+JSON.stringify(t)+')">Support Bundle</button>' +
|
||||
'<button class="btn btn-sm btn-primary" onclick="usbExport(\'audit\','+JSON.stringify(t)+',this)">Audit JSON</button> ' +
|
||||
'<button class="btn btn-sm btn-secondary" onclick="usbExport(\'bundle\','+JSON.stringify(t)+',this)">Support Bundle</button>' +
|
||||
'<div class="usb-row-msg" style="margin-top:6px;font-size:12px;color:var(--muted)"></div>' +
|
||||
'</td></tr>';
|
||||
}).join('') + '</table>';
|
||||
}).catch(e => {
|
||||
document.getElementById('usb-status').textContent = 'Error: ' + e;
|
||||
});
|
||||
}
|
||||
window.usbExport = function(type, target) {
|
||||
window.usbExport = function(type, target, btn) {
|
||||
const msg = document.getElementById('usb-msg');
|
||||
const row = btn ? btn.closest('td') : null;
|
||||
const rowMsg = row ? row.querySelector('.usb-row-msg') : null;
|
||||
const originalText = btn ? btn.textContent : '';
|
||||
if (btn) {
|
||||
btn.disabled = true;
|
||||
btn.textContent = 'Exporting...';
|
||||
}
|
||||
if (rowMsg) {
|
||||
rowMsg.style.color = 'var(--muted)';
|
||||
rowMsg.textContent = 'Working...';
|
||||
}
|
||||
msg.style.color = 'var(--muted)';
|
||||
msg.textContent = 'Exporting to ' + (target.device||'') + '...';
|
||||
msg.textContent = 'Exporting ' + (type === 'bundle' ? 'support bundle' : 'audit JSON') + ' to ' + (target.device||'') + '...';
|
||||
fetch('/api/export/usb/'+type, {
|
||||
method: 'POST',
|
||||
headers: {'Content-Type':'application/json'},
|
||||
body: JSON.stringify(target)
|
||||
}).then(r=>r.json()).then(d => {
|
||||
if (d.error) { msg.style.color='var(--err,red)'; msg.textContent = 'Error: '+d.error; return; }
|
||||
}).then(async r => {
|
||||
const d = await r.json();
|
||||
if (!r.ok) throw new Error(d.error || ('HTTP ' + r.status));
|
||||
return d;
|
||||
}).then(d => {
|
||||
msg.style.color = 'var(--ok,green)';
|
||||
msg.textContent = d.message || 'Done.';
|
||||
if (rowMsg) {
|
||||
rowMsg.style.color = 'var(--ok,green)';
|
||||
rowMsg.textContent = d.message || 'Done.';
|
||||
}
|
||||
}).catch(e => {
|
||||
msg.style.color = 'var(--err,red)';
|
||||
msg.textContent = 'Error: '+e;
|
||||
if (rowMsg) {
|
||||
rowMsg.style.color = 'var(--err,red)';
|
||||
rowMsg.textContent = 'Error: ' + e;
|
||||
}
|
||||
}).finally(() => {
|
||||
if (btn) {
|
||||
btn.disabled = false;
|
||||
btn.textContent = originalText;
|
||||
}
|
||||
});
|
||||
};
|
||||
window.usbRefresh = usbRefresh;
|
||||
@@ -2558,30 +2698,16 @@ func renderTasks() string {
|
||||
<button class="btn btn-danger btn-sm" onclick="cancelAll()">Cancel All</button>
|
||||
<button class="btn btn-sm" style="background:#b45309;color:#fff" onclick="killWorkers()" title="Send SIGKILL to all running test processes (bee-gpu-burn, stress-ng, stressapptest, memtester)">Kill Workers</button>
|
||||
<span id="kill-toast" style="font-size:12px;color:var(--muted);display:none"></span>
|
||||
<span style="font-size:12px;color:var(--muted)">Tasks run one at a time. Logs persist after navigation.</span>
|
||||
<span style="font-size:12px;color:var(--muted)">Open a task to view its saved logs and charts.</span>
|
||||
</div>
|
||||
<div class="card">
|
||||
<div id="tasks-table"><p style="color:var(--muted);font-size:13px;padding:16px">Loading...</p></div>
|
||||
</div>
|
||||
<div id="task-log-overlay" style="display:none;position:fixed;inset:0;background:rgba(0,0,0,.58);z-index:120;align-items:center;justify-content:center;padding:16px">
|
||||
<div style="background:#fff;border-radius:6px;box-shadow:0 24px 60px rgba(0,0,0,.35);width:calc(100vw - 32px);max-width:1600px;height:calc(100vh - 32px);display:flex;flex-direction:column;overflow:hidden;position:relative">
|
||||
<div class="card-head" style="padding:14px 18px;font-size:14px">Logs — <span id="task-log-title"></span>
|
||||
<button class="btn btn-sm btn-secondary" onclick="closeTaskLog()" style="margin-left:auto">✕</button>
|
||||
</div>
|
||||
<div class="card-body" style="padding:16px;flex:1;min-height:0;overflow:hidden">
|
||||
<div style="height:100%;min-height:0;overflow:auto">
|
||||
<div id="task-log-terminal" class="terminal" style="margin:0;max-height:none;overflow:visible"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<script>
|
||||
var _taskLogES = null;
|
||||
var _taskRefreshTimer = null;
|
||||
var _tasksAll = [];
|
||||
var _taskPage = 1;
|
||||
var _taskPageSize = 50;
|
||||
var _taskLogID = '';
|
||||
|
||||
function loadTasks() {
|
||||
fetch('/api/tasks').then(r=>r.json()).then(tasks => {
|
||||
@@ -2589,7 +2715,6 @@ function loadTasks() {
|
||||
if (_tasksAll.length === 0) {
|
||||
_taskPage = 1;
|
||||
document.getElementById('tasks-table').innerHTML = '<p style="color:var(--muted);font-size:13px;padding:16px">No tasks.</p>';
|
||||
syncTaskLogFromHash();
|
||||
return;
|
||||
}
|
||||
const totalPages = Math.max(1, Math.ceil(_tasksAll.length / _taskPageSize));
|
||||
@@ -2601,7 +2726,7 @@ function loadTasks() {
|
||||
const dur = t.elapsed_sec ? formatDurSec(t.elapsed_sec) : '';
|
||||
const statusClass = {running:'badge-ok',pending:'badge-unknown',done:'badge-ok',failed:'badge-err',cancelled:'badge-unknown'}[t.status]||'badge-unknown';
|
||||
const statusLabel = {running:'▶ running',pending:'pending',done:'✓ done',failed:'✗ failed',cancelled:'cancelled'}[t.status]||t.status;
|
||||
let actions = '<button class="btn btn-sm btn-secondary" onclick="viewLog(\''+t.id+'\',\''+escHtml(t.name)+'\')">Logs</button>';
|
||||
let actions = '<a class="btn btn-sm btn-secondary" href="/tasks/'+encodeURIComponent(t.id)+'">Open</a>';
|
||||
if (t.status === 'running' || t.status === 'pending') {
|
||||
actions += ' <button class="btn btn-sm btn-danger" onclick="cancelTask(\''+t.id+'\')">Cancel</button>';
|
||||
}
|
||||
@@ -2609,7 +2734,7 @@ function loadTasks() {
|
||||
actions += ' <button class="btn btn-sm btn-secondary" onclick="setPriority(\''+t.id+'\',1)" title="Increase priority">⇧</button>';
|
||||
actions += ' <button class="btn btn-sm btn-secondary" onclick="setPriority(\''+t.id+'\',-1)" title="Decrease priority">⇩</button>';
|
||||
}
|
||||
return '<tr><td>'+escHtml(t.name)+'</td>' +
|
||||
return '<tr><td><a href="/tasks/'+encodeURIComponent(t.id)+'">'+escHtml(t.name)+'</a></td>' +
|
||||
'<td><span class="badge '+statusClass+'">'+statusLabel+'</span></td>' +
|
||||
'<td style="font-size:12px;color:var(--muted)">'+fmtTime(t.created_at)+'</td>' +
|
||||
'<td style="font-size:12px;color:var(--muted)">'+dur+'</td>' +
|
||||
@@ -2629,7 +2754,6 @@ function loadTasks() {
|
||||
'</div>';
|
||||
document.getElementById('tasks-table').innerHTML =
|
||||
'<table><tr><th>Name</th><th>Status</th><th>Created</th><th>Duration</th><th>Priority</th><th>Actions</th></tr>'+rows+'</table>' + pager;
|
||||
syncTaskLogFromHash();
|
||||
});
|
||||
}
|
||||
|
||||
@@ -2672,58 +2796,6 @@ function setPriority(id, delta) {
|
||||
fetch('/api/tasks/'+id+'/priority',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({delta:delta})})
|
||||
.then(()=>loadTasks());
|
||||
}
|
||||
function resetTaskLog(term, text) {
|
||||
term.textContent = text ? text + '\n' : '';
|
||||
if (text) term.dataset.placeholder = '1';
|
||||
else delete term.dataset.placeholder;
|
||||
}
|
||||
function prependTaskLogLine(term, line) {
|
||||
if (term.dataset.placeholder === '1') {
|
||||
term.textContent = '';
|
||||
delete term.dataset.placeholder;
|
||||
}
|
||||
term.prepend(document.createTextNode(line + '\n'));
|
||||
term.scrollTop = 0;
|
||||
}
|
||||
function viewLog(id, name) {
|
||||
if (_taskLogES) { _taskLogES.close(); _taskLogES = null; }
|
||||
_taskLogID = id;
|
||||
window.location.hash = id;
|
||||
document.getElementById('task-log-overlay').style.display = 'flex';
|
||||
document.getElementById('task-log-title').textContent = name;
|
||||
const term = document.getElementById('task-log-terminal');
|
||||
resetTaskLog(term, 'Connecting...');
|
||||
_taskLogES = new EventSource('/api/tasks/'+id+'/stream');
|
||||
_taskLogES.onopen = () => {
|
||||
if (term.dataset.placeholder === '1') resetTaskLog(term, 'Connected. Waiting for output...');
|
||||
};
|
||||
_taskLogES.onmessage = e => { prependTaskLogLine(term, e.data); };
|
||||
_taskLogES.addEventListener('done', e => {
|
||||
_taskLogES.close(); _taskLogES=null;
|
||||
prependTaskLogLine(term, e.data ? 'ERROR: '+e.data : 'Done.');
|
||||
});
|
||||
}
|
||||
function syncTaskLogFromHash() {
|
||||
const id = (window.location.hash || '').replace(/^#/, '');
|
||||
if (!id || id === _taskLogID) return;
|
||||
const task = _tasksAll.find(t => t.id === id);
|
||||
if (!task) return;
|
||||
viewLog(task.id, task.name || task.id);
|
||||
}
|
||||
function closeTaskLog() {
|
||||
if (_taskLogES) { _taskLogES.close(); _taskLogES=null; }
|
||||
_taskLogID = '';
|
||||
if (window.location.hash) history.replaceState(null, '', '/tasks');
|
||||
document.getElementById('task-log-overlay').style.display='none';
|
||||
}
|
||||
|
||||
document.getElementById('task-log-overlay').addEventListener('click', function(e) {
|
||||
if (e.target === this) closeTaskLog();
|
||||
});
|
||||
window.addEventListener('hashchange', syncTaskLogFromHash);
|
||||
window.addEventListener('keydown', function(e) {
|
||||
if (e.key === 'Escape' && document.getElementById('task-log-overlay').style.display !== 'none') closeTaskLog();
|
||||
});
|
||||
|
||||
loadTasks();
|
||||
_taskRefreshTimer = setInterval(loadTasks, 2000);
|
||||
|
||||
41
audit/internal/webui/serial_console.go
Normal file
41
audit/internal/webui/serial_console.go
Normal file
@@ -0,0 +1,41 @@
|
||||
package webui
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
var taskSerialWriteLine = writeTaskSerialLine
|
||||
|
||||
func writeTaskSerialLine(line string) {
|
||||
line = strings.TrimSpace(line)
|
||||
if line == "" {
|
||||
return
|
||||
}
|
||||
payload := fmt.Sprintf("%s %s\n", time.Now().UTC().Format("2006-01-02 15:04:05Z"), line)
|
||||
for _, path := range []string{"/dev/ttyS0", "/dev/ttyS1", "/dev/console"} {
|
||||
f, err := os.OpenFile(path, os.O_WRONLY|os.O_APPEND, 0)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
_, _ = f.WriteString(payload)
|
||||
_ = f.Close()
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
func taskSerialPrefix(t *Task) string {
|
||||
if t == nil {
|
||||
return "[task] "
|
||||
}
|
||||
return fmt.Sprintf("[task %s %s] ", t.ID, t.Name)
|
||||
}
|
||||
|
||||
func taskSerialEvent(t *Task, event string) {
|
||||
if t == nil {
|
||||
return
|
||||
}
|
||||
taskSerialWriteLine(fmt.Sprintf("%s%s", taskSerialPrefix(t), strings.TrimSpace(event)))
|
||||
}
|
||||
@@ -265,6 +265,7 @@ func NewHandler(opts HandlerOptions) http.Handler {
|
||||
mux.HandleFunc("POST /api/tasks/{id}/cancel", h.handleAPITasksCancel)
|
||||
mux.HandleFunc("POST /api/tasks/{id}/priority", h.handleAPITasksPriority)
|
||||
mux.HandleFunc("GET /api/tasks/{id}/stream", h.handleAPITasksStream)
|
||||
mux.HandleFunc("GET /tasks/{id}", h.handleTaskPage)
|
||||
|
||||
// Services
|
||||
mux.HandleFunc("GET /api/services", h.handleAPIServicesList)
|
||||
@@ -703,7 +704,7 @@ func chartDataFromSamples(path string, samples []platform.LiveMetricSample) ([][
|
||||
}
|
||||
switch sub {
|
||||
case "load":
|
||||
title = fmt.Sprintf("GPU %d Load", idx)
|
||||
title = gpuDisplayLabel(idx) + " Load"
|
||||
util := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.UsagePct })
|
||||
mem := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.MemUsagePct })
|
||||
if util == nil && mem == nil {
|
||||
@@ -714,7 +715,7 @@ func chartDataFromSamples(path string, samples []platform.LiveMetricSample) ([][
|
||||
yMin = floatPtr(0)
|
||||
yMax = floatPtr(100)
|
||||
case "temp":
|
||||
title = fmt.Sprintf("GPU %d Temperature", idx)
|
||||
title = gpuDisplayLabel(idx) + " Temperature"
|
||||
temp := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.TempC })
|
||||
if temp == nil {
|
||||
return nil, nil, nil, "", nil, nil, false
|
||||
@@ -724,7 +725,7 @@ func chartDataFromSamples(path string, samples []platform.LiveMetricSample) ([][
|
||||
yMin = floatPtr(0)
|
||||
yMax = autoMax120(temp)
|
||||
case "clock":
|
||||
title = fmt.Sprintf("GPU %d Core Clock", idx)
|
||||
title = gpuDisplayLabel(idx) + " Core Clock"
|
||||
clock := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.ClockMHz })
|
||||
if clock == nil {
|
||||
return nil, nil, nil, "", nil, nil, false
|
||||
@@ -733,7 +734,7 @@ func chartDataFromSamples(path string, samples []platform.LiveMetricSample) ([][
|
||||
names = []string{"Core Clock MHz"}
|
||||
yMin, yMax = autoBounds120(clock)
|
||||
case "memclock":
|
||||
title = fmt.Sprintf("GPU %d Memory Clock", idx)
|
||||
title = gpuDisplayLabel(idx) + " Memory Clock"
|
||||
clock := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.MemClockMHz })
|
||||
if clock == nil {
|
||||
return nil, nil, nil, "", nil, nil, false
|
||||
@@ -742,7 +743,7 @@ func chartDataFromSamples(path string, samples []platform.LiveMetricSample) ([][
|
||||
names = []string{"Memory Clock MHz"}
|
||||
yMin, yMax = autoBounds120(clock)
|
||||
default:
|
||||
title = fmt.Sprintf("GPU %d Power", idx)
|
||||
title = gpuDisplayLabel(idx) + " Power"
|
||||
power := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.PowerW })
|
||||
if power == nil {
|
||||
return nil, nil, nil, "", nil, nil, false
|
||||
@@ -871,7 +872,7 @@ func gpuDatasets(samples []platform.LiveMetricSample, pick func(platform.GPUMetr
|
||||
continue
|
||||
}
|
||||
datasets = append(datasets, ds)
|
||||
names = append(names, fmt.Sprintf("GPU %d", idx))
|
||||
names = append(names, gpuDisplayLabel(idx))
|
||||
}
|
||||
return datasets, names
|
||||
}
|
||||
|
||||
@@ -184,15 +184,15 @@ func TestChartDataFromSamplesIncludesGPUClockCharts(t *testing.T) {
|
||||
{
|
||||
Timestamp: time.Now().Add(-2 * time.Minute),
|
||||
GPUs: []platform.GPUMetricRow{
|
||||
{GPUIndex: 0, ClockMHz: 1400, MemClockMHz: 2600},
|
||||
{GPUIndex: 3, ClockMHz: 1500, MemClockMHz: 2800},
|
||||
{GPUIndex: 0, ClockMHz: 1400},
|
||||
{GPUIndex: 3, ClockMHz: 1500},
|
||||
},
|
||||
},
|
||||
{
|
||||
Timestamp: time.Now().Add(-1 * time.Minute),
|
||||
GPUs: []platform.GPUMetricRow{
|
||||
{GPUIndex: 0, ClockMHz: 1410, MemClockMHz: 2610},
|
||||
{GPUIndex: 3, ClockMHz: 1510, MemClockMHz: 2810},
|
||||
{GPUIndex: 0, ClockMHz: 1410},
|
||||
{GPUIndex: 3, ClockMHz: 1510},
|
||||
},
|
||||
},
|
||||
}
|
||||
@@ -210,20 +210,6 @@ func TestChartDataFromSamplesIncludesGPUClockCharts(t *testing.T) {
|
||||
if got := datasets[1][1]; got != 1510 {
|
||||
t.Fatalf("GPU 3 core clock=%v want 1510", got)
|
||||
}
|
||||
|
||||
datasets, names, _, title, _, _, ok = chartDataFromSamples("gpu-all-memclock", samples)
|
||||
if !ok {
|
||||
t.Fatal("gpu-all-memclock returned ok=false")
|
||||
}
|
||||
if title != "GPU Memory Clock" {
|
||||
t.Fatalf("title=%q", title)
|
||||
}
|
||||
if len(names) != 2 || names[0] != "GPU 0" || names[1] != "GPU 3" {
|
||||
t.Fatalf("names=%v", names)
|
||||
}
|
||||
if got := datasets[0][0]; got != 2600 {
|
||||
t.Fatalf("GPU 0 memory clock=%v want 2600", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNormalizePowerSeriesHoldsLastPositive(t *testing.T) {
|
||||
@@ -256,10 +242,10 @@ func TestRenderMetricsUsesBufferedChartRefresh(t *testing.T) {
|
||||
if !strings.Contains(body, `/api/metrics/chart/gpu-all-clock.svg`) {
|
||||
t.Fatalf("metrics page should include GPU core clock chart: %s", body)
|
||||
}
|
||||
if !strings.Contains(body, `/api/metrics/chart/gpu-all-memclock.svg`) {
|
||||
t.Fatalf("metrics page should include GPU memory clock chart: %s", body)
|
||||
if strings.Contains(body, `/api/metrics/chart/gpu-all-memclock.svg`) {
|
||||
t.Fatalf("metrics page should not include GPU memory clock chart: %s", body)
|
||||
}
|
||||
if !strings.Contains(body, `renderGPUOverviewCards(indices)`) {
|
||||
if !strings.Contains(body, `renderGPUOverviewCards(indices, names)`) {
|
||||
t.Fatalf("metrics page should build per-GPU chart cards dynamically: %s", body)
|
||||
}
|
||||
}
|
||||
@@ -585,7 +571,7 @@ func TestAuditPageRendersViewerFrameAndActions(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestTasksPageRendersLogModalAndPaginationControls(t *testing.T) {
|
||||
func TestTasksPageRendersOpenLinksAndPaginationControls(t *testing.T) {
|
||||
handler := NewHandler(HandlerOptions{})
|
||||
rec := httptest.NewRecorder()
|
||||
handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/tasks", nil))
|
||||
@@ -593,8 +579,8 @@ func TestTasksPageRendersLogModalAndPaginationControls(t *testing.T) {
|
||||
t.Fatalf("status=%d", rec.Code)
|
||||
}
|
||||
body := rec.Body.String()
|
||||
if !strings.Contains(body, `id="task-log-overlay"`) {
|
||||
t.Fatalf("tasks page missing log modal overlay: %s", body)
|
||||
if !strings.Contains(body, `Open a task to view its saved logs and charts.`) {
|
||||
t.Fatalf("tasks page missing task report hint: %s", body)
|
||||
}
|
||||
if !strings.Contains(body, `_taskPageSize = 50`) {
|
||||
t.Fatalf("tasks page missing pagination size config: %s", body)
|
||||
@@ -691,37 +677,49 @@ func TestBurnPageRendersGoalBasedNVIDIACards(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestTasksPageRendersScrollableLogModal(t *testing.T) {
|
||||
func TestTaskDetailPageRendersSavedReport(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
path := filepath.Join(dir, "audit.json")
|
||||
exportDir := filepath.Join(dir, "export")
|
||||
if err := os.MkdirAll(exportDir, 0755); err != nil {
|
||||
reportDir := filepath.Join(exportDir, "tasks", "task-1_cpu_sat_done")
|
||||
if err := os.MkdirAll(reportDir, 0755); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := os.WriteFile(path, []byte(`{"collected_at":"2026-03-15T00:00:00Z"}`), 0644); err != nil {
|
||||
reportPath := filepath.Join(reportDir, "report.html")
|
||||
if err := os.WriteFile(reportPath, []byte(`<div class="card"><div class="card-head">Task Report</div><div class="card-body">saved report</div></div>`), 0644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
handler := NewHandler(HandlerOptions{
|
||||
Title: "Bee Hardware Audit",
|
||||
AuditPath: path,
|
||||
ExportDir: exportDir,
|
||||
globalQueue.mu.Lock()
|
||||
origTasks := globalQueue.tasks
|
||||
globalQueue.tasks = []*Task{{
|
||||
ID: "task-1",
|
||||
Name: "CPU SAT",
|
||||
Target: "cpu",
|
||||
Status: TaskDone,
|
||||
CreatedAt: time.Now(),
|
||||
ArtifactsDir: reportDir,
|
||||
ReportHTMLPath: reportPath,
|
||||
}}
|
||||
globalQueue.mu.Unlock()
|
||||
t.Cleanup(func() {
|
||||
globalQueue.mu.Lock()
|
||||
globalQueue.tasks = origTasks
|
||||
globalQueue.mu.Unlock()
|
||||
})
|
||||
|
||||
handler := NewHandler(HandlerOptions{Title: "Bee Hardware Audit", ExportDir: exportDir})
|
||||
|
||||
rec := httptest.NewRecorder()
|
||||
handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/tasks", nil))
|
||||
handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/tasks/task-1", nil))
|
||||
if rec.Code != http.StatusOK {
|
||||
t.Fatalf("status=%d", rec.Code)
|
||||
}
|
||||
body := rec.Body.String()
|
||||
if !strings.Contains(body, `height:calc(100vh - 32px)`) {
|
||||
t.Fatalf("tasks page missing bounded log modal height: %s", body)
|
||||
if !strings.Contains(body, `saved report`) {
|
||||
t.Fatalf("task detail page missing saved report: %s", body)
|
||||
}
|
||||
if !strings.Contains(body, `flex:1;min-height:0;overflow:hidden`) {
|
||||
t.Fatalf("tasks page missing log modal overflow guard: %s", body)
|
||||
}
|
||||
if !strings.Contains(body, `height:100%;min-height:0;overflow:auto`) {
|
||||
t.Fatalf("tasks page missing scrollable log wrapper: %s", body)
|
||||
if !strings.Contains(body, `Back to Tasks`) {
|
||||
t.Fatalf("task detail page missing back link: %s", body)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
85
audit/internal/webui/task_page.go
Normal file
85
audit/internal/webui/task_page.go
Normal file
@@ -0,0 +1,85 @@
|
||||
package webui
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"html"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func (h *handler) handleTaskPage(w http.ResponseWriter, r *http.Request) {
|
||||
id := r.PathValue("id")
|
||||
task, ok := globalQueue.findByID(id)
|
||||
if !ok {
|
||||
http.NotFound(w, r)
|
||||
return
|
||||
}
|
||||
snapshot := *task
|
||||
body := renderTaskDetailPage(h.opts, snapshot)
|
||||
w.Header().Set("Cache-Control", "no-store")
|
||||
w.Header().Set("Content-Type", "text/html; charset=utf-8")
|
||||
_, _ = w.Write([]byte(body))
|
||||
}
|
||||
|
||||
func renderTaskDetailPage(opts HandlerOptions, task Task) string {
|
||||
title := task.Name
|
||||
if strings.TrimSpace(title) == "" {
|
||||
title = task.ID
|
||||
}
|
||||
var body strings.Builder
|
||||
body.WriteString(`<div style="display:flex;align-items:center;gap:12px;margin-bottom:16px;flex-wrap:wrap">`)
|
||||
body.WriteString(`<a class="btn btn-secondary btn-sm" href="/tasks">Back to Tasks</a>`)
|
||||
body.WriteString(`<span style="font-size:12px;color:var(--muted)">Artifacts are saved in the task folder under <code>./tasks</code>.</span>`)
|
||||
body.WriteString(`</div>`)
|
||||
|
||||
if report := loadTaskReportFragment(task); report != "" {
|
||||
body.WriteString(report)
|
||||
} else {
|
||||
body.WriteString(`<div class="card"><div class="card-head">Task Summary</div><div class="card-body">`)
|
||||
body.WriteString(`<div style="font-size:18px;font-weight:700">` + html.EscapeString(title) + `</div>`)
|
||||
body.WriteString(`<div style="margin-top:8px">` + renderTaskStatusBadge(task.Status) + `</div>`)
|
||||
if strings.TrimSpace(task.ErrMsg) != "" {
|
||||
body.WriteString(`<div style="margin-top:8px;color:var(--crit-fg)">` + html.EscapeString(task.ErrMsg) + `</div>`)
|
||||
}
|
||||
body.WriteString(`</div></div>`)
|
||||
}
|
||||
|
||||
if task.Status == TaskRunning || task.Status == TaskPending {
|
||||
body.WriteString(`<div class="card"><div class="card-head">Live Logs</div><div class="card-body">`)
|
||||
body.WriteString(`<div id="task-live-log" class="terminal" style="max-height:none;white-space:pre-wrap">Connecting...</div>`)
|
||||
body.WriteString(`</div></div>`)
|
||||
body.WriteString(`<script>
|
||||
var _taskDetailES = new EventSource('/api/tasks/` + html.EscapeString(task.ID) + `/stream');
|
||||
var _taskDetailTerm = document.getElementById('task-live-log');
|
||||
_taskDetailES.onopen = function(){ _taskDetailTerm.textContent = ''; };
|
||||
_taskDetailES.onmessage = function(e){ _taskDetailTerm.textContent += e.data + "\n"; _taskDetailTerm.scrollTop = _taskDetailTerm.scrollHeight; };
|
||||
_taskDetailES.addEventListener('done', function(){ _taskDetailES.close(); setTimeout(function(){ window.location.reload(); }, 1000); });
|
||||
_taskDetailES.onerror = function(){ _taskDetailES.close(); };
|
||||
</script>`)
|
||||
}
|
||||
|
||||
return layoutHead(opts.Title+" — "+title) +
|
||||
layoutNav("tasks", opts.BuildLabel) +
|
||||
`<div class="main"><div class="topbar"><h1>` + html.EscapeString(title) + `</h1></div><div class="content">` +
|
||||
body.String() +
|
||||
`</div></div></body></html>`
|
||||
}
|
||||
|
||||
func loadTaskReportFragment(task Task) string {
|
||||
if strings.TrimSpace(task.ReportHTMLPath) == "" {
|
||||
return ""
|
||||
}
|
||||
data, err := os.ReadFile(task.ReportHTMLPath)
|
||||
if err != nil || len(data) == 0 {
|
||||
return ""
|
||||
}
|
||||
return string(data)
|
||||
}
|
||||
|
||||
func taskArtifactDownloadLink(task Task, absPath string) string {
|
||||
if strings.TrimSpace(absPath) == "" {
|
||||
return ""
|
||||
}
|
||||
return fmt.Sprintf(`/export/file?path=%s`, absPath)
|
||||
}
|
||||
286
audit/internal/webui/task_report.go
Normal file
286
audit/internal/webui/task_report.go
Normal file
@@ -0,0 +1,286 @@
|
||||
package webui
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"html"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"bee/audit/internal/platform"
|
||||
)
|
||||
|
||||
var taskReportMetricsDBPath = metricsDBPath
|
||||
|
||||
type taskReport struct {
|
||||
ID string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Target string `json:"target"`
|
||||
Status string `json:"status"`
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
StartedAt *time.Time `json:"started_at,omitempty"`
|
||||
DoneAt *time.Time `json:"done_at,omitempty"`
|
||||
DurationSec int `json:"duration_sec,omitempty"`
|
||||
Error string `json:"error,omitempty"`
|
||||
LogFile string `json:"log_file,omitempty"`
|
||||
Charts []taskReportChart `json:"charts,omitempty"`
|
||||
GeneratedAt time.Time `json:"generated_at"`
|
||||
}
|
||||
|
||||
type taskReportChart struct {
|
||||
Title string `json:"title"`
|
||||
File string `json:"file"`
|
||||
}
|
||||
|
||||
type taskChartSpec struct {
|
||||
Path string
|
||||
File string
|
||||
}
|
||||
|
||||
var taskDashboardChartSpecs = []taskChartSpec{
|
||||
{Path: "server-load", File: "server-load.svg"},
|
||||
{Path: "server-temp-cpu", File: "server-temp-cpu.svg"},
|
||||
{Path: "server-temp-ambient", File: "server-temp-ambient.svg"},
|
||||
{Path: "server-power", File: "server-power.svg"},
|
||||
{Path: "server-fans", File: "server-fans.svg"},
|
||||
{Path: "gpu-all-load", File: "gpu-all-load.svg"},
|
||||
{Path: "gpu-all-memload", File: "gpu-all-memload.svg"},
|
||||
{Path: "gpu-all-clock", File: "gpu-all-clock.svg"},
|
||||
{Path: "gpu-all-power", File: "gpu-all-power.svg"},
|
||||
{Path: "gpu-all-temp", File: "gpu-all-temp.svg"},
|
||||
}
|
||||
|
||||
func writeTaskReportArtifacts(t *Task) error {
|
||||
if t == nil {
|
||||
return nil
|
||||
}
|
||||
ensureTaskReportPaths(t)
|
||||
if strings.TrimSpace(t.ArtifactsDir) == "" {
|
||||
return nil
|
||||
}
|
||||
if err := os.MkdirAll(t.ArtifactsDir, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
start, end := taskTimeWindow(t)
|
||||
samples, _ := loadTaskMetricSamples(start, end)
|
||||
charts, inlineCharts := writeTaskCharts(t.ArtifactsDir, start, end, samples)
|
||||
|
||||
logText := ""
|
||||
if data, err := os.ReadFile(t.LogPath); err == nil {
|
||||
logText = string(data)
|
||||
}
|
||||
|
||||
report := taskReport{
|
||||
ID: t.ID,
|
||||
Name: t.Name,
|
||||
Target: t.Target,
|
||||
Status: t.Status,
|
||||
CreatedAt: t.CreatedAt,
|
||||
StartedAt: t.StartedAt,
|
||||
DoneAt: t.DoneAt,
|
||||
DurationSec: taskElapsedSec(t, reportDoneTime(t)),
|
||||
Error: t.ErrMsg,
|
||||
LogFile: filepath.Base(t.LogPath),
|
||||
Charts: charts,
|
||||
GeneratedAt: time.Now().UTC(),
|
||||
}
|
||||
if err := writeJSONFile(t.ReportJSONPath, report); err != nil {
|
||||
return err
|
||||
}
|
||||
return os.WriteFile(t.ReportHTMLPath, []byte(renderTaskReportFragment(report, inlineCharts, logText)), 0644)
|
||||
}
|
||||
|
||||
func reportDoneTime(t *Task) time.Time {
|
||||
if t != nil && t.DoneAt != nil && !t.DoneAt.IsZero() {
|
||||
return *t.DoneAt
|
||||
}
|
||||
return time.Now()
|
||||
}
|
||||
|
||||
func taskTimeWindow(t *Task) (time.Time, time.Time) {
|
||||
if t == nil {
|
||||
now := time.Now().UTC()
|
||||
return now, now
|
||||
}
|
||||
start := t.CreatedAt.UTC()
|
||||
if t.StartedAt != nil && !t.StartedAt.IsZero() {
|
||||
start = t.StartedAt.UTC()
|
||||
}
|
||||
end := time.Now().UTC()
|
||||
if t.DoneAt != nil && !t.DoneAt.IsZero() {
|
||||
end = t.DoneAt.UTC()
|
||||
}
|
||||
if end.Before(start) {
|
||||
end = start
|
||||
}
|
||||
return start, end
|
||||
}
|
||||
|
||||
func loadTaskMetricSamples(start, end time.Time) ([]platform.LiveMetricSample, error) {
|
||||
db, err := openMetricsDB(taskReportMetricsDBPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer db.Close()
|
||||
return db.LoadBetween(start, end)
|
||||
}
|
||||
|
||||
func writeTaskCharts(dir string, start, end time.Time, samples []platform.LiveMetricSample) ([]taskReportChart, map[string]string) {
|
||||
if len(samples) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
timeline := []chartTimelineSegment{{Start: start, End: end, Active: true}}
|
||||
var charts []taskReportChart
|
||||
inline := make(map[string]string)
|
||||
for _, spec := range taskDashboardChartSpecs {
|
||||
title, svg, ok := renderTaskChartSVG(spec.Path, samples, timeline)
|
||||
if !ok || len(svg) == 0 {
|
||||
continue
|
||||
}
|
||||
path := filepath.Join(dir, spec.File)
|
||||
if err := os.WriteFile(path, svg, 0644); err != nil {
|
||||
continue
|
||||
}
|
||||
charts = append(charts, taskReportChart{Title: title, File: spec.File})
|
||||
inline[spec.File] = string(svg)
|
||||
}
|
||||
|
||||
for _, idx := range taskGPUIndices(samples) {
|
||||
file := fmt.Sprintf("gpu-%d-overview.svg", idx)
|
||||
svg, ok, err := renderGPUOverviewChartSVG(idx, samples, timeline)
|
||||
if err != nil || !ok || len(svg) == 0 {
|
||||
continue
|
||||
}
|
||||
path := filepath.Join(dir, file)
|
||||
if err := os.WriteFile(path, svg, 0644); err != nil {
|
||||
continue
|
||||
}
|
||||
charts = append(charts, taskReportChart{Title: gpuDisplayLabel(idx) + " Overview", File: file})
|
||||
inline[file] = string(svg)
|
||||
}
|
||||
return charts, inline
|
||||
}
|
||||
|
||||
func renderTaskChartSVG(path string, samples []platform.LiveMetricSample, timeline []chartTimelineSegment) (string, []byte, bool) {
|
||||
datasets, names, labels, title, yMin, yMax, ok := chartDataFromSamples(path, samples)
|
||||
if !ok {
|
||||
return "", nil, false
|
||||
}
|
||||
buf, err := renderMetricChartSVG(
|
||||
title,
|
||||
labels,
|
||||
sampleTimes(samples),
|
||||
datasets,
|
||||
names,
|
||||
yMin,
|
||||
yMax,
|
||||
chartCanvasHeightForPath(path, len(names)),
|
||||
timeline,
|
||||
)
|
||||
if err != nil {
|
||||
return "", nil, false
|
||||
}
|
||||
return title, buf, true
|
||||
}
|
||||
|
||||
func taskGPUIndices(samples []platform.LiveMetricSample) []int {
|
||||
seen := map[int]bool{}
|
||||
var out []int
|
||||
for _, s := range samples {
|
||||
for _, g := range s.GPUs {
|
||||
if seen[g.GPUIndex] {
|
||||
continue
|
||||
}
|
||||
seen[g.GPUIndex] = true
|
||||
out = append(out, g.GPUIndex)
|
||||
}
|
||||
}
|
||||
sort.Ints(out)
|
||||
return out
|
||||
}
|
||||
|
||||
func writeJSONFile(path string, v any) error {
|
||||
data, err := json.MarshalIndent(v, "", " ")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return os.WriteFile(path, data, 0644)
|
||||
}
|
||||
|
||||
func renderTaskReportFragment(report taskReport, charts map[string]string, logText string) string {
|
||||
var b strings.Builder
|
||||
b.WriteString(`<div class="card"><div class="card-head">Task Report</div><div class="card-body">`)
|
||||
b.WriteString(`<div class="grid2">`)
|
||||
b.WriteString(`<div><div style="font-size:12px;color:var(--muted);margin-bottom:6px">Task</div><div style="font-size:16px;font-weight:700">` + html.EscapeString(report.Name) + `</div>`)
|
||||
b.WriteString(`<div style="font-size:13px;color:var(--muted)">` + html.EscapeString(report.Target) + `</div></div>`)
|
||||
b.WriteString(`<div><div style="font-size:12px;color:var(--muted);margin-bottom:6px">Status</div><div>` + renderTaskStatusBadge(report.Status) + `</div>`)
|
||||
if strings.TrimSpace(report.Error) != "" {
|
||||
b.WriteString(`<div style="margin-top:8px;font-size:13px;color:var(--crit-fg)">` + html.EscapeString(report.Error) + `</div>`)
|
||||
}
|
||||
b.WriteString(`</div></div>`)
|
||||
b.WriteString(`<div style="margin-top:14px;font-size:13px;color:var(--muted)">`)
|
||||
b.WriteString(`Started: ` + formatTaskTime(report.StartedAt, report.CreatedAt) + ` | Finished: ` + formatTaskTime(report.DoneAt, time.Time{}) + ` | Duration: ` + formatTaskDuration(report.DurationSec))
|
||||
b.WriteString(`</div></div></div>`)
|
||||
|
||||
if len(report.Charts) > 0 {
|
||||
b.WriteString(`<div class="grid2">`)
|
||||
for _, chart := range report.Charts {
|
||||
b.WriteString(`<div class="card"><div class="card-head">` + html.EscapeString(chart.Title) + `</div><div class="card-body" style="padding:12px">`)
|
||||
b.WriteString(charts[chart.File])
|
||||
b.WriteString(`</div></div>`)
|
||||
}
|
||||
b.WriteString(`</div>`)
|
||||
} else {
|
||||
b.WriteString(`<div class="alert alert-info">No metric samples were captured during this task window.</div>`)
|
||||
}
|
||||
|
||||
b.WriteString(`<div class="card"><div class="card-head">Logs</div><div class="card-body">`)
|
||||
b.WriteString(`<div class="terminal" style="max-height:none;white-space:pre-wrap">` + html.EscapeString(strings.TrimSpace(logText)) + `</div>`)
|
||||
b.WriteString(`</div></div>`)
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func renderTaskStatusBadge(status string) string {
|
||||
className := map[string]string{
|
||||
TaskRunning: "badge-ok",
|
||||
TaskPending: "badge-unknown",
|
||||
TaskDone: "badge-ok",
|
||||
TaskFailed: "badge-err",
|
||||
TaskCancelled: "badge-unknown",
|
||||
}[status]
|
||||
if className == "" {
|
||||
className = "badge-unknown"
|
||||
}
|
||||
label := strings.TrimSpace(status)
|
||||
if label == "" {
|
||||
label = "unknown"
|
||||
}
|
||||
return `<span class="badge ` + className + `">` + html.EscapeString(label) + `</span>`
|
||||
}
|
||||
|
||||
func formatTaskTime(ts *time.Time, fallback time.Time) string {
|
||||
if ts != nil && !ts.IsZero() {
|
||||
return ts.Local().Format("2006-01-02 15:04:05")
|
||||
}
|
||||
if !fallback.IsZero() {
|
||||
return fallback.Local().Format("2006-01-02 15:04:05")
|
||||
}
|
||||
return "n/a"
|
||||
}
|
||||
|
||||
func formatTaskDuration(sec int) string {
|
||||
if sec <= 0 {
|
||||
return "n/a"
|
||||
}
|
||||
if sec < 60 {
|
||||
return fmt.Sprintf("%ds", sec)
|
||||
}
|
||||
if sec < 3600 {
|
||||
return fmt.Sprintf("%dm %02ds", sec/60, sec%60)
|
||||
}
|
||||
return fmt.Sprintf("%dh %02dm %02ds", sec/3600, (sec%3600)/60, sec%60)
|
||||
}
|
||||
@@ -92,17 +92,20 @@ func taskDisplayName(target, profile, loader string) string {
|
||||
|
||||
// Task represents one unit of work in the queue.
|
||||
type Task struct {
|
||||
ID string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Target string `json:"target"`
|
||||
Priority int `json:"priority"`
|
||||
Status string `json:"status"`
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
StartedAt *time.Time `json:"started_at,omitempty"`
|
||||
DoneAt *time.Time `json:"done_at,omitempty"`
|
||||
ElapsedSec int `json:"elapsed_sec,omitempty"`
|
||||
ErrMsg string `json:"error,omitempty"`
|
||||
LogPath string `json:"log_path,omitempty"`
|
||||
ID string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Target string `json:"target"`
|
||||
Priority int `json:"priority"`
|
||||
Status string `json:"status"`
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
StartedAt *time.Time `json:"started_at,omitempty"`
|
||||
DoneAt *time.Time `json:"done_at,omitempty"`
|
||||
ElapsedSec int `json:"elapsed_sec,omitempty"`
|
||||
ErrMsg string `json:"error,omitempty"`
|
||||
LogPath string `json:"log_path,omitempty"`
|
||||
ArtifactsDir string `json:"artifacts_dir,omitempty"`
|
||||
ReportJSONPath string `json:"report_json_path,omitempty"`
|
||||
ReportHTMLPath string `json:"report_html_path,omitempty"`
|
||||
|
||||
// runtime fields (not serialised)
|
||||
job *jobState
|
||||
@@ -126,17 +129,20 @@ type taskParams struct {
|
||||
}
|
||||
|
||||
type persistedTask struct {
|
||||
ID string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Target string `json:"target"`
|
||||
Priority int `json:"priority"`
|
||||
Status string `json:"status"`
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
StartedAt *time.Time `json:"started_at,omitempty"`
|
||||
DoneAt *time.Time `json:"done_at,omitempty"`
|
||||
ErrMsg string `json:"error,omitempty"`
|
||||
LogPath string `json:"log_path,omitempty"`
|
||||
Params taskParams `json:"params,omitempty"`
|
||||
ID string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Target string `json:"target"`
|
||||
Priority int `json:"priority"`
|
||||
Status string `json:"status"`
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
StartedAt *time.Time `json:"started_at,omitempty"`
|
||||
DoneAt *time.Time `json:"done_at,omitempty"`
|
||||
ErrMsg string `json:"error,omitempty"`
|
||||
LogPath string `json:"log_path,omitempty"`
|
||||
ArtifactsDir string `json:"artifacts_dir,omitempty"`
|
||||
ReportJSONPath string `json:"report_json_path,omitempty"`
|
||||
ReportHTMLPath string `json:"report_html_path,omitempty"`
|
||||
Params taskParams `json:"params,omitempty"`
|
||||
}
|
||||
|
||||
type burnPreset struct {
|
||||
@@ -252,6 +258,7 @@ func (q *taskQueue) enqueue(t *Task) {
|
||||
q.prune()
|
||||
q.persistLocked()
|
||||
q.mu.Unlock()
|
||||
taskSerialEvent(t, "queued")
|
||||
select {
|
||||
case q.trigger <- struct{}{}:
|
||||
default:
|
||||
@@ -429,7 +436,7 @@ func (q *taskQueue) worker() {
|
||||
t.StartedAt = &now
|
||||
t.DoneAt = nil
|
||||
t.ErrMsg = ""
|
||||
j := newTaskJobState(t.LogPath)
|
||||
j := newTaskJobState(t.LogPath, taskSerialPrefix(t))
|
||||
t.job = j
|
||||
batch = append(batch, t)
|
||||
}
|
||||
@@ -496,8 +503,6 @@ func (q *taskQueue) executeTask(t *Task, j *jobState, ctx context.Context) {
|
||||
|
||||
func (q *taskQueue) finalizeTaskRun(t *Task, j *jobState) {
|
||||
q.mu.Lock()
|
||||
defer q.mu.Unlock()
|
||||
|
||||
now := time.Now()
|
||||
t.DoneAt = &now
|
||||
if t.Status == TaskRunning {
|
||||
@@ -509,7 +514,18 @@ func (q *taskQueue) finalizeTaskRun(t *Task, j *jobState) {
|
||||
t.ErrMsg = ""
|
||||
}
|
||||
}
|
||||
q.finalizeTaskArtifactPathsLocked(t)
|
||||
q.persistLocked()
|
||||
q.mu.Unlock()
|
||||
|
||||
if err := writeTaskReportArtifacts(t); err != nil {
|
||||
appendJobLog(t.LogPath, "WARN: task report generation failed: "+err.Error())
|
||||
}
|
||||
if t.ErrMsg != "" {
|
||||
taskSerialEvent(t, "finished with status="+t.Status+" error="+t.ErrMsg)
|
||||
return
|
||||
}
|
||||
taskSerialEvent(t, "finished with status="+t.Status)
|
||||
}
|
||||
|
||||
// setCPUGovernor writes the given governor to all CPU scaling_governor sysfs files.
|
||||
@@ -848,6 +864,7 @@ func (h *handler) handleAPITasksCancel(w http.ResponseWriter, r *http.Request) {
|
||||
now := time.Now()
|
||||
t.DoneAt = &now
|
||||
globalQueue.persistLocked()
|
||||
taskSerialEvent(t, "finished with status="+t.Status)
|
||||
writeJSON(w, map[string]string{"status": "cancelled"})
|
||||
case TaskRunning:
|
||||
if t.job != nil {
|
||||
@@ -857,6 +874,7 @@ func (h *handler) handleAPITasksCancel(w http.ResponseWriter, r *http.Request) {
|
||||
now := time.Now()
|
||||
t.DoneAt = &now
|
||||
globalQueue.persistLocked()
|
||||
taskSerialEvent(t, "finished with status="+t.Status)
|
||||
writeJSON(w, map[string]string{"status": "cancelled"})
|
||||
default:
|
||||
writeError(w, http.StatusConflict, "task is not running or pending")
|
||||
@@ -897,6 +915,7 @@ func (h *handler) handleAPITasksCancelAll(w http.ResponseWriter, _ *http.Request
|
||||
case TaskPending:
|
||||
t.Status = TaskCancelled
|
||||
t.DoneAt = &now
|
||||
taskSerialEvent(t, "finished with status="+t.Status)
|
||||
n++
|
||||
case TaskRunning:
|
||||
if t.job != nil {
|
||||
@@ -904,6 +923,7 @@ func (h *handler) handleAPITasksCancelAll(w http.ResponseWriter, _ *http.Request
|
||||
}
|
||||
t.Status = TaskCancelled
|
||||
t.DoneAt = &now
|
||||
taskSerialEvent(t, "finished with status="+t.Status)
|
||||
n++
|
||||
}
|
||||
}
|
||||
@@ -922,6 +942,7 @@ func (h *handler) handleAPITasksKillWorkers(w http.ResponseWriter, _ *http.Reque
|
||||
case TaskPending:
|
||||
t.Status = TaskCancelled
|
||||
t.DoneAt = &now
|
||||
taskSerialEvent(t, "finished with status="+t.Status)
|
||||
cancelled++
|
||||
case TaskRunning:
|
||||
if t.job != nil {
|
||||
@@ -929,6 +950,7 @@ func (h *handler) handleAPITasksKillWorkers(w http.ResponseWriter, _ *http.Reque
|
||||
}
|
||||
t.Status = TaskCancelled
|
||||
t.DoneAt = &now
|
||||
taskSerialEvent(t, "finished with status="+t.Status)
|
||||
cancelled++
|
||||
}
|
||||
}
|
||||
@@ -992,10 +1014,10 @@ func (h *handler) handleAPITasksStream(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
|
||||
func (q *taskQueue) assignTaskLogPathLocked(t *Task) {
|
||||
if t.LogPath != "" || q.logsDir == "" || t.ID == "" {
|
||||
if q.logsDir == "" || t.ID == "" {
|
||||
return
|
||||
}
|
||||
t.LogPath = filepath.Join(q.logsDir, t.ID+".log")
|
||||
q.ensureTaskArtifactPathsLocked(t)
|
||||
}
|
||||
|
||||
func (q *taskQueue) loadLocked() {
|
||||
@@ -1012,17 +1034,20 @@ func (q *taskQueue) loadLocked() {
|
||||
}
|
||||
for _, pt := range persisted {
|
||||
t := &Task{
|
||||
ID: pt.ID,
|
||||
Name: pt.Name,
|
||||
Target: pt.Target,
|
||||
Priority: pt.Priority,
|
||||
Status: pt.Status,
|
||||
CreatedAt: pt.CreatedAt,
|
||||
StartedAt: pt.StartedAt,
|
||||
DoneAt: pt.DoneAt,
|
||||
ErrMsg: pt.ErrMsg,
|
||||
LogPath: pt.LogPath,
|
||||
params: pt.Params,
|
||||
ID: pt.ID,
|
||||
Name: pt.Name,
|
||||
Target: pt.Target,
|
||||
Priority: pt.Priority,
|
||||
Status: pt.Status,
|
||||
CreatedAt: pt.CreatedAt,
|
||||
StartedAt: pt.StartedAt,
|
||||
DoneAt: pt.DoneAt,
|
||||
ErrMsg: pt.ErrMsg,
|
||||
LogPath: pt.LogPath,
|
||||
ArtifactsDir: pt.ArtifactsDir,
|
||||
ReportJSONPath: pt.ReportJSONPath,
|
||||
ReportHTMLPath: pt.ReportHTMLPath,
|
||||
params: pt.Params,
|
||||
}
|
||||
q.assignTaskLogPathLocked(t)
|
||||
if t.Status == TaskRunning {
|
||||
@@ -1053,17 +1078,20 @@ func (q *taskQueue) persistLocked() {
|
||||
state := make([]persistedTask, 0, len(q.tasks))
|
||||
for _, t := range q.tasks {
|
||||
state = append(state, persistedTask{
|
||||
ID: t.ID,
|
||||
Name: t.Name,
|
||||
Target: t.Target,
|
||||
Priority: t.Priority,
|
||||
Status: t.Status,
|
||||
CreatedAt: t.CreatedAt,
|
||||
StartedAt: t.StartedAt,
|
||||
DoneAt: t.DoneAt,
|
||||
ErrMsg: t.ErrMsg,
|
||||
LogPath: t.LogPath,
|
||||
Params: t.params,
|
||||
ID: t.ID,
|
||||
Name: t.Name,
|
||||
Target: t.Target,
|
||||
Priority: t.Priority,
|
||||
Status: t.Status,
|
||||
CreatedAt: t.CreatedAt,
|
||||
StartedAt: t.StartedAt,
|
||||
DoneAt: t.DoneAt,
|
||||
ErrMsg: t.ErrMsg,
|
||||
LogPath: t.LogPath,
|
||||
ArtifactsDir: t.ArtifactsDir,
|
||||
ReportJSONPath: t.ReportJSONPath,
|
||||
ReportHTMLPath: t.ReportHTMLPath,
|
||||
Params: t.params,
|
||||
})
|
||||
}
|
||||
data, err := json.MarshalIndent(state, "", " ")
|
||||
@@ -1094,3 +1122,88 @@ func taskElapsedSec(t *Task, now time.Time) int {
|
||||
}
|
||||
return int(end.Sub(start).Round(time.Second) / time.Second)
|
||||
}
|
||||
|
||||
func taskFolderStatus(status string) string {
|
||||
status = strings.TrimSpace(strings.ToLower(status))
|
||||
switch status {
|
||||
case TaskRunning, TaskDone, TaskFailed, TaskCancelled:
|
||||
return status
|
||||
default:
|
||||
return TaskPending
|
||||
}
|
||||
}
|
||||
|
||||
func sanitizeTaskFolderPart(s string) string {
|
||||
s = strings.TrimSpace(strings.ToLower(s))
|
||||
if s == "" {
|
||||
return "task"
|
||||
}
|
||||
var b strings.Builder
|
||||
lastDash := false
|
||||
for _, r := range s {
|
||||
isAlnum := (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9')
|
||||
if isAlnum {
|
||||
b.WriteRune(r)
|
||||
lastDash = false
|
||||
continue
|
||||
}
|
||||
if !lastDash {
|
||||
b.WriteByte('-')
|
||||
lastDash = true
|
||||
}
|
||||
}
|
||||
out := strings.Trim(b.String(), "-")
|
||||
if out == "" {
|
||||
return "task"
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func taskArtifactsDir(root string, t *Task, status string) string {
|
||||
if strings.TrimSpace(root) == "" || t == nil {
|
||||
return ""
|
||||
}
|
||||
return filepath.Join(root, fmt.Sprintf("%s_%s_%s", t.ID, sanitizeTaskFolderPart(t.Name), taskFolderStatus(status)))
|
||||
}
|
||||
|
||||
func ensureTaskReportPaths(t *Task) {
|
||||
if t == nil || strings.TrimSpace(t.ArtifactsDir) == "" {
|
||||
return
|
||||
}
|
||||
if t.LogPath == "" || filepath.Base(t.LogPath) == "task.log" {
|
||||
t.LogPath = filepath.Join(t.ArtifactsDir, "task.log")
|
||||
}
|
||||
t.ReportJSONPath = filepath.Join(t.ArtifactsDir, "report.json")
|
||||
t.ReportHTMLPath = filepath.Join(t.ArtifactsDir, "report.html")
|
||||
}
|
||||
|
||||
func (q *taskQueue) ensureTaskArtifactPathsLocked(t *Task) {
|
||||
if t == nil || strings.TrimSpace(q.logsDir) == "" || strings.TrimSpace(t.ID) == "" {
|
||||
return
|
||||
}
|
||||
if strings.TrimSpace(t.ArtifactsDir) == "" {
|
||||
t.ArtifactsDir = taskArtifactsDir(q.logsDir, t, t.Status)
|
||||
}
|
||||
if t.ArtifactsDir != "" {
|
||||
_ = os.MkdirAll(t.ArtifactsDir, 0755)
|
||||
}
|
||||
ensureTaskReportPaths(t)
|
||||
}
|
||||
|
||||
func (q *taskQueue) finalizeTaskArtifactPathsLocked(t *Task) {
|
||||
if t == nil || strings.TrimSpace(q.logsDir) == "" || strings.TrimSpace(t.ID) == "" {
|
||||
return
|
||||
}
|
||||
q.ensureTaskArtifactPathsLocked(t)
|
||||
dstDir := taskArtifactsDir(q.logsDir, t, t.Status)
|
||||
if dstDir == "" {
|
||||
return
|
||||
}
|
||||
if t.ArtifactsDir != "" && t.ArtifactsDir != dstDir {
|
||||
if _, err := os.Stat(dstDir); err != nil {
|
||||
_ = os.Rename(t.ArtifactsDir, dstDir)
|
||||
}
|
||||
t.ArtifactsDir = dstDir
|
||||
}
|
||||
ensureTaskReportPaths(t)
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ package webui
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
@@ -12,6 +13,7 @@ import (
|
||||
"time"
|
||||
|
||||
"bee/audit/internal/app"
|
||||
"bee/audit/internal/platform"
|
||||
)
|
||||
|
||||
func TestTaskQueuePersistsAndRecoversPendingTasks(t *testing.T) {
|
||||
@@ -248,6 +250,124 @@ func TestHandleAPITasksStreamPendingTaskStartsSSEImmediately(t *testing.T) {
|
||||
t.Fatalf("stream did not emit queued status promptly, body=%q", rec.Body.String())
|
||||
}
|
||||
|
||||
func TestFinalizeTaskRunCreatesReportFolderAndArtifacts(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
metricsPath := filepath.Join(dir, "metrics.db")
|
||||
prevMetricsPath := taskReportMetricsDBPath
|
||||
taskReportMetricsDBPath = metricsPath
|
||||
t.Cleanup(func() { taskReportMetricsDBPath = prevMetricsPath })
|
||||
|
||||
db, err := openMetricsDB(metricsPath)
|
||||
if err != nil {
|
||||
t.Fatalf("openMetricsDB: %v", err)
|
||||
}
|
||||
base := time.Now().UTC().Add(-45 * time.Second)
|
||||
if err := db.Write(platform.LiveMetricSample{
|
||||
Timestamp: base,
|
||||
CPULoadPct: 42,
|
||||
MemLoadPct: 35,
|
||||
PowerW: 510,
|
||||
}); err != nil {
|
||||
t.Fatalf("Write: %v", err)
|
||||
}
|
||||
_ = db.Close()
|
||||
|
||||
q := &taskQueue{
|
||||
statePath: filepath.Join(dir, "tasks-state.json"),
|
||||
logsDir: filepath.Join(dir, "tasks"),
|
||||
trigger: make(chan struct{}, 1),
|
||||
}
|
||||
if err := os.MkdirAll(q.logsDir, 0755); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
started := time.Now().UTC().Add(-90 * time.Second)
|
||||
task := &Task{
|
||||
ID: "task-1",
|
||||
Name: "CPU SAT",
|
||||
Target: "cpu",
|
||||
Status: TaskRunning,
|
||||
CreatedAt: started.Add(-10 * time.Second),
|
||||
StartedAt: &started,
|
||||
}
|
||||
q.assignTaskLogPathLocked(task)
|
||||
appendJobLog(task.LogPath, "line-1")
|
||||
|
||||
job := newTaskJobState(task.LogPath)
|
||||
job.finish("")
|
||||
q.finalizeTaskRun(task, job)
|
||||
|
||||
if task.Status != TaskDone {
|
||||
t.Fatalf("status=%q want %q", task.Status, TaskDone)
|
||||
}
|
||||
if !strings.Contains(filepath.Base(task.ArtifactsDir), "_done") {
|
||||
t.Fatalf("artifacts dir=%q", task.ArtifactsDir)
|
||||
}
|
||||
if _, err := os.Stat(task.ReportJSONPath); err != nil {
|
||||
t.Fatalf("report json: %v", err)
|
||||
}
|
||||
if _, err := os.Stat(task.ReportHTMLPath); err != nil {
|
||||
t.Fatalf("report html: %v", err)
|
||||
}
|
||||
var report taskReport
|
||||
data, err := os.ReadFile(task.ReportJSONPath)
|
||||
if err != nil {
|
||||
t.Fatalf("ReadFile(report.json): %v", err)
|
||||
}
|
||||
if err := json.Unmarshal(data, &report); err != nil {
|
||||
t.Fatalf("Unmarshal(report.json): %v", err)
|
||||
}
|
||||
if report.ID != task.ID || report.Status != TaskDone {
|
||||
t.Fatalf("report=%+v", report)
|
||||
}
|
||||
if len(report.Charts) == 0 {
|
||||
t.Fatalf("expected charts in report, got none")
|
||||
}
|
||||
}
|
||||
|
||||
func TestTaskLifecycleMirrorsToSerialConsole(t *testing.T) {
|
||||
var lines []string
|
||||
prev := taskSerialWriteLine
|
||||
taskSerialWriteLine = func(line string) { lines = append(lines, line) }
|
||||
t.Cleanup(func() { taskSerialWriteLine = prev })
|
||||
|
||||
dir := t.TempDir()
|
||||
q := &taskQueue{
|
||||
statePath: filepath.Join(dir, "tasks-state.json"),
|
||||
logsDir: filepath.Join(dir, "tasks"),
|
||||
trigger: make(chan struct{}, 1),
|
||||
}
|
||||
task := &Task{
|
||||
ID: "task-serial-1",
|
||||
Name: "CPU SAT",
|
||||
Target: "cpu",
|
||||
Status: TaskPending,
|
||||
CreatedAt: time.Now().UTC(),
|
||||
}
|
||||
|
||||
q.enqueue(task)
|
||||
started := time.Now().UTC()
|
||||
task.Status = TaskRunning
|
||||
task.StartedAt = &started
|
||||
job := newTaskJobState(task.LogPath, taskSerialPrefix(task))
|
||||
job.append("Starting CPU SAT...")
|
||||
job.append("CPU stress duration: 60s")
|
||||
job.finish("")
|
||||
q.finalizeTaskRun(task, job)
|
||||
|
||||
joined := strings.Join(lines, "\n")
|
||||
for _, needle := range []string{
|
||||
"queued",
|
||||
"Starting CPU SAT...",
|
||||
"CPU stress duration: 60s",
|
||||
"finished with status=done",
|
||||
} {
|
||||
if !strings.Contains(joined, needle) {
|
||||
t.Fatalf("serial mirror missing %q in %q", needle, joined)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveBurnPreset(t *testing.T) {
|
||||
tests := []struct {
|
||||
profile string
|
||||
|
||||
Reference in New Issue
Block a user