Compare commits

..

3 Commits
v5.1 ... v5.2

16 changed files with 1425 additions and 447 deletions

View File

@@ -274,9 +274,6 @@ func normalizeNvidiaBenchmarkOptionsForBenchmark(opts NvidiaBenchmarkOptions) Nv
}
opts.GPUIndices = dedupeSortedIndices(opts.GPUIndices)
opts.ExcludeGPUIndices = dedupeSortedIndices(opts.ExcludeGPUIndices)
if !opts.RunNCCL {
opts.RunNCCL = true
}
return opts
}

View File

@@ -41,6 +41,21 @@ func TestResolveBenchmarkProfile(t *testing.T) {
}
}
func TestNormalizeNvidiaBenchmarkOptionsPreservesRunNCCLChoice(t *testing.T) {
t.Parallel()
opts := normalizeNvidiaBenchmarkOptionsForBenchmark(NvidiaBenchmarkOptions{
Profile: "stability",
RunNCCL: false,
})
if opts.Profile != NvidiaBenchmarkProfileStability {
t.Fatalf("profile=%q want %q", opts.Profile, NvidiaBenchmarkProfileStability)
}
if opts.RunNCCL {
t.Fatalf("RunNCCL should stay false when explicitly disabled")
}
}
func TestParseBenchmarkBurnLog(t *testing.T) {
t.Parallel()

View File

@@ -120,10 +120,45 @@ func (s *System) RunInstallToRAM(ctx context.Context, logFunc func(string)) erro
log(fmt.Sprintf("Warning: rebind /run/live/medium failed: %v", err))
}
log("Verifying live medium now served from RAM...")
status := s.LiveBootSource()
if err := verifyInstallToRAMStatus(status); err != nil {
return err
}
log(fmt.Sprintf("Verification passed: live medium now served from %s.", describeLiveBootSource(status)))
log("Done. Installation media can be safely disconnected.")
return nil
}
func verifyInstallToRAMStatus(status LiveBootSource) error {
if status.InRAM {
return nil
}
return fmt.Errorf("install to RAM verification failed: live medium still mounted from %s", describeLiveBootSource(status))
}
func describeLiveBootSource(status LiveBootSource) string {
source := strings.TrimSpace(status.Device)
if source == "" {
source = strings.TrimSpace(status.Source)
}
if source == "" {
source = "unknown source"
}
switch strings.TrimSpace(status.Kind) {
case "ram":
return "RAM"
case "usb":
return "USB (" + source + ")"
case "cdrom":
return "CD-ROM (" + source + ")"
case "disk":
return "disk (" + source + ")"
default:
return source
}
}
func copyFileLarge(ctx context.Context, src, dst string, logFunc func(string)) error {
in, err := os.Open(src)
if err != nil {

View File

@@ -3,6 +3,8 @@ package platform
import "testing"
func TestInferLiveBootKind(t *testing.T) {
t.Parallel()
tests := []struct {
name string
fsType string
@@ -18,6 +20,7 @@ func TestInferLiveBootKind(t *testing.T) {
{name: "unknown", source: "overlay", want: "unknown"},
}
for _, tc := range tests {
tc := tc
t.Run(tc.name, func(t *testing.T) {
got := inferLiveBootKind(tc.fsType, tc.source, tc.deviceType, tc.transport)
if got != tc.want {
@@ -26,3 +29,29 @@ func TestInferLiveBootKind(t *testing.T) {
})
}
}
func TestVerifyInstallToRAMStatus(t *testing.T) {
t.Parallel()
if err := verifyInstallToRAMStatus(LiveBootSource{InRAM: true, Kind: "ram", Source: "tmpfs"}); err != nil {
t.Fatalf("expected success for RAM-backed status, got %v", err)
}
err := verifyInstallToRAMStatus(LiveBootSource{InRAM: false, Kind: "usb", Device: "/dev/sdb1"})
if err == nil {
t.Fatal("expected verification failure when media is still on USB")
}
if got := err.Error(); got != "install to RAM verification failed: live medium still mounted from USB (/dev/sdb1)" {
t.Fatalf("error=%q", got)
}
}
func TestDescribeLiveBootSource(t *testing.T) {
t.Parallel()
if got := describeLiveBootSource(LiveBootSource{InRAM: true, Kind: "ram"}); got != "RAM" {
t.Fatalf("got %q want RAM", got)
}
if got := describeLiveBootSource(LiveBootSource{Kind: "unknown", Source: "/run/live/medium"}); got != "/run/live/medium" {
t.Fatalf("got %q want /run/live/medium", got)
}
}

View File

@@ -12,11 +12,11 @@ import (
"os"
"os/exec"
"path/filepath"
"syscall"
"sort"
"strconv"
"strings"
"sync"
"syscall"
"time"
)
@@ -76,15 +76,15 @@ func streamExecOutput(cmd *exec.Cmd, logFunc func(string)) ([]byte, error) {
// NvidiaGPU holds basic GPU info from nvidia-smi.
type NvidiaGPU struct {
Index int
Name string
MemoryMB int
Index int `json:"index"`
Name string `json:"name"`
MemoryMB int `json:"memory_mb"`
}
// AMDGPUInfo holds basic info about an AMD GPU from rocm-smi.
type AMDGPUInfo struct {
Index int
Name string
Index int `json:"index"`
Name string `json:"name"`
}
// DetectGPUVendor returns "nvidia" if /dev/nvidia0 exists, "amd" if /dev/kfd exists, or "" otherwise.

View File

@@ -10,17 +10,30 @@ import (
func (s *System) ListBeeServices() ([]string, error) {
seen := map[string]bool{}
var out []string
for _, pattern := range []string{"/etc/systemd/system/bee-*.service", "/lib/systemd/system/bee-*.service"} {
for _, pattern := range []string{
"/etc/systemd/system/bee-*.service",
"/lib/systemd/system/bee-*.service",
"/etc/systemd/system/bee-*.timer",
"/lib/systemd/system/bee-*.timer",
} {
matches, err := filepath.Glob(pattern)
if err != nil {
return nil, err
}
for _, match := range matches {
name := strings.TrimSuffix(filepath.Base(match), ".service")
base := filepath.Base(match)
name := base
if strings.HasSuffix(base, ".service") {
name = strings.TrimSuffix(base, ".service")
}
// Skip template units (e.g. bee-journal-mirror@) — they have no instances to query.
if strings.HasSuffix(name, "@") {
continue
}
// bee-selfheal is timer-managed; showing the oneshot service as inactive is misleading.
if name == "bee-selfheal" && strings.HasSuffix(base, ".service") {
continue
}
if !seen[name] {
seen[name] = true
out = append(out, name)

View File

@@ -44,12 +44,12 @@ type StaticIPv4Config struct {
}
type RemovableTarget struct {
Device string
FSType string
Size string
Label string
Model string
Mountpoint string
Device string `json:"device"`
FSType string `json:"fs_type"`
Size string `json:"size"`
Label string `json:"label"`
Model string `json:"model"`
Mountpoint string `json:"mountpoint"`
}
type ToolStatus struct {

View File

@@ -0,0 +1,31 @@
package platform
import (
"encoding/json"
"strings"
"testing"
)
func TestRemovableTargetJSONUsesFrontendFieldNames(t *testing.T) {
t.Parallel()
data, err := json.Marshal(RemovableTarget{
Device: "/dev/sdb1",
FSType: "exfat",
Size: "1.8T",
Label: "USB",
Model: "Flash",
})
if err != nil {
t.Fatalf("marshal: %v", err)
}
raw := string(data)
for _, key := range []string{`"device"`, `"fs_type"`, `"size"`, `"label"`, `"model"`} {
if !strings.Contains(raw, key) {
t.Fatalf("json missing key %s: %s", key, raw)
}
}
if strings.Contains(raw, `"Device"`) || strings.Contains(raw, `"FSType"`) {
t.Fatalf("json still contains Go field names: %s", raw)
}
}

View File

@@ -232,6 +232,54 @@ func (h *handler) handleAPISATRun(target string) http.HandlerFunc {
}
}
func (h *handler) handleAPIBenchmarkNvidiaRun(w http.ResponseWriter, r *http.Request) {
if h.opts.App == nil {
writeError(w, http.StatusServiceUnavailable, "app not configured")
return
}
var body struct {
Profile string `json:"profile"`
SizeMB int `json:"size_mb"`
GPUIndices []int `json:"gpu_indices"`
ExcludeGPUIndices []int `json:"exclude_gpu_indices"`
RunNCCL *bool `json:"run_nccl"`
DisplayName string `json:"display_name"`
}
if r.Body != nil {
if err := json.NewDecoder(r.Body).Decode(&body); err != nil && !errors.Is(err, io.EOF) {
writeError(w, http.StatusBadRequest, "invalid request body")
return
}
}
runNCCL := true
if body.RunNCCL != nil {
runNCCL = *body.RunNCCL
}
t := &Task{
ID: newJobID("benchmark-nvidia"),
Name: taskDisplayName("nvidia-benchmark", "", ""),
Target: "nvidia-benchmark",
Priority: 15,
Status: TaskPending,
CreatedAt: time.Now(),
params: taskParams{
GPUIndices: body.GPUIndices,
ExcludeGPUIndices: body.ExcludeGPUIndices,
SizeMB: body.SizeMB,
BenchmarkProfile: body.Profile,
RunNCCL: runNCCL,
DisplayName: body.DisplayName,
},
}
if strings.TrimSpace(body.DisplayName) != "" {
t.Name = body.DisplayName
}
globalQueue.enqueue(t)
writeJSON(w, map[string]string{"task_id": t.ID, "job_id": t.ID})
}
func (h *handler) handleAPISATStream(w http.ResponseWriter, r *http.Request) {
id := r.URL.Query().Get("job_id")
if id == "" {
@@ -491,6 +539,22 @@ func (h *handler) handleAPIExportUSBBundle(w http.ResponseWriter, r *http.Reques
// ── GPU presence ──────────────────────────────────────────────────────────────
func (h *handler) handleAPIGNVIDIAGPUs(w http.ResponseWriter, _ *http.Request) {
if h.opts.App == nil {
writeError(w, http.StatusServiceUnavailable, "app not configured")
return
}
gpus, err := h.opts.App.ListNvidiaGPUs()
if err != nil {
writeError(w, http.StatusInternalServerError, err.Error())
return
}
if gpus == nil {
gpus = []platform.NvidiaGPU{}
}
writeJSON(w, gpus)
}
func (h *handler) handleAPIGPUPresence(w http.ResponseWriter, r *http.Request) {
if h.opts.App == nil {
writeError(w, http.StatusServiceUnavailable, "app not configured")
@@ -516,8 +580,10 @@ func (h *handler) handleAPIGPUTools(w http.ResponseWriter, _ *http.Request) {
_, amdErr := os.Stat("/dev/kfd")
nvidiaUp := nvidiaErr == nil
amdUp := amdErr == nil
_, dcgmErr := exec.LookPath("dcgmi")
writeJSON(w, []toolEntry{
{ID: "bee-gpu-burn", Available: nvidiaUp, Vendor: "nvidia"},
{ID: "dcgm", Available: nvidiaUp && dcgmErr == nil, Vendor: "nvidia"},
{ID: "john", Available: nvidiaUp, Vendor: "nvidia"},
{ID: "nccl", Available: nvidiaUp, Vendor: "nvidia"},
{ID: "rvs", Available: amdUp, Vendor: "amd"},

View File

@@ -64,6 +64,42 @@ func TestHandleAPISATRunDecodesBodyWithoutContentLength(t *testing.T) {
}
}
func TestHandleAPIBenchmarkNvidiaRunQueuesSelectedGPUs(t *testing.T) {
globalQueue.mu.Lock()
originalTasks := globalQueue.tasks
globalQueue.tasks = nil
globalQueue.mu.Unlock()
t.Cleanup(func() {
globalQueue.mu.Lock()
globalQueue.tasks = originalTasks
globalQueue.mu.Unlock()
})
h := &handler{opts: HandlerOptions{App: &app.App{}}}
req := httptest.NewRequest("POST", "/api/benchmark/nvidia/run", strings.NewReader(`{"profile":"standard","gpu_indices":[1,3],"run_nccl":false}`))
rec := httptest.NewRecorder()
h.handleAPIBenchmarkNvidiaRun(rec, req)
if rec.Code != 200 {
t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
}
globalQueue.mu.Lock()
defer globalQueue.mu.Unlock()
if len(globalQueue.tasks) != 1 {
t.Fatalf("tasks=%d want 1", len(globalQueue.tasks))
}
task := globalQueue.tasks[0]
if task.Target != "nvidia-benchmark" {
t.Fatalf("target=%q want nvidia-benchmark", task.Target)
}
if got := task.params.GPUIndices; len(got) != 2 || got[0] != 1 || got[1] != 3 {
t.Fatalf("gpu indices=%v want [1 3]", got)
}
if task.params.RunNCCL {
t.Fatal("RunNCCL should reflect explicit false from request")
}
}
func TestPushFanRingsTracksByNameAndCarriesForwardMissingSamples(t *testing.T) {
h := &handler{}

View File

@@ -0,0 +1,713 @@
package webui
import (
"fmt"
"math"
"sort"
"strconv"
"strings"
"time"
"bee/audit/internal/platform"
)
type chartTimelineSegment struct {
Start time.Time
End time.Time
Active bool
}
type chartScale struct {
Min float64
Max float64
Ticks []float64
}
type chartLayout struct {
Width int
Height int
PlotLeft int
PlotRight int
PlotTop int
PlotBottom int
}
type metricChartSeries struct {
Name string
AxisTitle string
Color string
Values []float64
}
var metricChartPalette = []string{
"#5794f2",
"#73bf69",
"#f2cc0c",
"#ff9830",
"#f2495c",
"#b877d9",
"#56d2f7",
"#8ab8ff",
"#9adf8f",
"#ffbe5c",
}
func renderMetricChartSVG(title string, labels []string, times []time.Time, datasets [][]float64, names []string, yMin, yMax *float64, canvasHeight int, timeline []chartTimelineSegment) ([]byte, error) {
pointCount := len(labels)
if len(times) > pointCount {
pointCount = len(times)
}
if pointCount == 0 {
pointCount = 1
labels = []string{""}
times = []time.Time{time.Time{}}
}
if len(labels) < pointCount {
padded := make([]string, pointCount)
copy(padded, labels)
labels = padded
}
if len(times) < pointCount {
times = synthesizeChartTimes(times, pointCount)
}
for i := range datasets {
if len(datasets[i]) == 0 {
datasets[i] = make([]float64, pointCount)
}
}
mn, avg, mx := globalStats(datasets)
if mx > 0 {
title = fmt.Sprintf("%s ↓%s ~%s ↑%s",
title,
chartLegendNumber(mn),
chartLegendNumber(avg),
chartLegendNumber(mx),
)
}
legendItems := []metricChartSeries{}
for i, name := range names {
color := metricChartPalette[i%len(metricChartPalette)]
values := make([]float64, pointCount)
if i < len(datasets) {
copy(values, coalesceDataset(datasets[i], pointCount))
}
legendItems = append(legendItems, metricChartSeries{
Name: name,
Color: color,
Values: values,
})
}
scale := singleAxisChartScale(datasets, yMin, yMax)
layout := singleAxisChartLayout(canvasHeight, len(legendItems))
start, end := chartTimeBounds(times)
var b strings.Builder
writeSVGOpen(&b, layout.Width, layout.Height)
writeChartFrame(&b, title, layout.Width, layout.Height)
writeTimelineIdleSpans(&b, layout, start, end, timeline)
writeVerticalGrid(&b, layout, times, pointCount, 8)
writeHorizontalGrid(&b, layout, scale)
writeTimelineBoundaries(&b, layout, start, end, timeline)
writePlotBorder(&b, layout)
writeSingleAxisY(&b, layout, scale)
writeXAxisLabels(&b, layout, times, labels, start, end, 8)
for _, item := range legendItems {
writeSeriesPolyline(&b, layout, times, start, end, item.Values, scale, item.Color)
}
writeLegend(&b, layout, legendItems)
writeSVGClose(&b)
return []byte(b.String()), nil
}
func renderGPUOverviewChartSVG(idx int, samples []platform.LiveMetricSample, timeline []chartTimelineSegment) ([]byte, bool, error) {
temp := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.TempC })
power := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.PowerW })
coreClock := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.ClockMHz })
memClock := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.MemClockMHz })
if temp == nil && power == nil && coreClock == nil && memClock == nil {
return nil, false, nil
}
labels := sampleTimeLabels(samples)
times := sampleTimes(samples)
svg, err := drawGPUOverviewChartSVG(
fmt.Sprintf("GPU %d Overview", idx),
labels,
times,
[]metricChartSeries{
{Name: "Temp C", Values: coalesceDataset(temp, len(labels)), Color: "#f05a5a", AxisTitle: "Temp C"},
{Name: "Power W", Values: coalesceDataset(power, len(labels)), Color: "#ffb357", AxisTitle: "Power W"},
{Name: "Core Clock MHz", Values: coalesceDataset(coreClock, len(labels)), Color: "#73bf69", AxisTitle: "Core MHz"},
{Name: "Memory Clock MHz", Values: coalesceDataset(memClock, len(labels)), Color: "#5794f2", AxisTitle: "Memory MHz"},
},
timeline,
)
if err != nil {
return nil, false, err
}
return svg, true, nil
}
func drawGPUOverviewChartSVG(title string, labels []string, times []time.Time, series []metricChartSeries, timeline []chartTimelineSegment) ([]byte, error) {
if len(series) != 4 {
return nil, fmt.Errorf("gpu overview requires 4 series, got %d", len(series))
}
const (
width = 1400
height = 840
plotLeft = 180
plotRight = 1220
plotTop = 96
plotBottom = 660
)
const (
leftOuterAxis = 72
leftInnerAxis = 132
rightInnerAxis = 1268
rightOuterAxis = 1328
)
layout := chartLayout{
Width: width,
Height: height,
PlotLeft: plotLeft,
PlotRight: plotRight,
PlotTop: plotTop,
PlotBottom: plotBottom,
}
axisX := []int{leftOuterAxis, leftInnerAxis, rightInnerAxis, rightOuterAxis}
pointCount := len(labels)
if len(times) > pointCount {
pointCount = len(times)
}
if pointCount == 0 {
pointCount = 1
labels = []string{""}
times = []time.Time{time.Time{}}
}
if len(labels) < pointCount {
padded := make([]string, pointCount)
copy(padded, labels)
labels = padded
}
if len(times) < pointCount {
times = synthesizeChartTimes(times, pointCount)
}
for i := range series {
if len(series[i].Values) == 0 {
series[i].Values = make([]float64, pointCount)
}
}
scales := make([]chartScale, len(series))
for i := range series {
min, max := chartSeriesBounds(series[i].Values)
ticks := chartNiceTicks(min, max, 8)
scales[i] = chartScale{
Min: ticks[0],
Max: ticks[len(ticks)-1],
Ticks: ticks,
}
}
start, end := chartTimeBounds(times)
var b strings.Builder
writeSVGOpen(&b, width, height)
writeChartFrame(&b, title, width, height)
writeTimelineIdleSpans(&b, layout, start, end, timeline)
writeVerticalGrid(&b, layout, times, pointCount, 8)
writeHorizontalGrid(&b, layout, scales[0])
writeTimelineBoundaries(&b, layout, start, end, timeline)
writePlotBorder(&b, layout)
for i, axisLineX := range axisX {
fmt.Fprintf(&b, `<line x1="%d" y1="%d" x2="%d" y2="%d" stroke="%s" stroke-width="1"/>`+"\n",
axisLineX, layout.PlotTop, axisLineX, layout.PlotBottom, series[i].Color)
fmt.Fprintf(&b, `<text x="%d" y="%d" text-anchor="middle" font-family="sans-serif" font-size="11" font-weight="700" fill="%s">%s</text>`+"\n",
axisLineX, 64, series[i].Color, sanitizeChartText(series[i].AxisTitle))
for _, tick := range scales[i].Ticks {
y := chartYForValue(valueClamp(tick, scales[i]), scales[i], layout.PlotTop, layout.PlotBottom)
label := sanitizeChartText(chartYAxisNumber(tick))
if i < 2 {
fmt.Fprintf(&b, `<line x1="%d" y1="%.1f" x2="%d" y2="%.1f" stroke="%s" stroke-width="1"/>`+"\n",
axisLineX, y, axisLineX+6, y, series[i].Color)
fmt.Fprintf(&b, `<text x="%d" y="%.1f" text-anchor="end" dy="4" font-family="sans-serif" font-size="10" fill="%s">%s</text>`+"\n",
axisLineX-8, y, series[i].Color, label)
continue
}
fmt.Fprintf(&b, `<line x1="%d" y1="%.1f" x2="%d" y2="%.1f" stroke="%s" stroke-width="1"/>`+"\n",
axisLineX, y, axisLineX-6, y, series[i].Color)
fmt.Fprintf(&b, `<text x="%d" y="%.1f" text-anchor="start" dy="4" font-family="sans-serif" font-size="10" fill="%s">%s</text>`+"\n",
axisLineX+8, y, series[i].Color, label)
}
}
writeXAxisLabels(&b, layout, times, labels, start, end, 8)
for i := range series {
writeSeriesPolyline(&b, layout, times, start, end, series[i].Values, scales[i], series[i].Color)
}
writeLegend(&b, layout, series)
writeSVGClose(&b)
return []byte(b.String()), nil
}
func metricsTimelineSegments(samples []platform.LiveMetricSample, now time.Time) []chartTimelineSegment {
if len(samples) == 0 {
return nil
}
times := sampleTimes(samples)
start, end := chartTimeBounds(times)
if start.IsZero() || end.IsZero() {
return nil
}
return chartTimelineSegmentsForRange(start, end, now, snapshotTaskHistory())
}
func snapshotTaskHistory() []Task {
globalQueue.mu.Lock()
defer globalQueue.mu.Unlock()
out := make([]Task, len(globalQueue.tasks))
for i, t := range globalQueue.tasks {
out[i] = *t
}
return out
}
func chartTimelineSegmentsForRange(start, end, now time.Time, tasks []Task) []chartTimelineSegment {
if start.IsZero() || end.IsZero() {
return nil
}
if end.Before(start) {
start, end = end, start
}
type interval struct {
start time.Time
end time.Time
}
active := make([]interval, 0, len(tasks))
for _, task := range tasks {
if task.StartedAt == nil {
continue
}
intervalStart := task.StartedAt.UTC()
intervalEnd := now.UTC()
if task.DoneAt != nil {
intervalEnd = task.DoneAt.UTC()
}
if !intervalEnd.After(intervalStart) {
continue
}
if intervalEnd.Before(start) || intervalStart.After(end) {
continue
}
if intervalStart.Before(start) {
intervalStart = start
}
if intervalEnd.After(end) {
intervalEnd = end
}
active = append(active, interval{start: intervalStart, end: intervalEnd})
}
sort.Slice(active, func(i, j int) bool {
if active[i].start.Equal(active[j].start) {
return active[i].end.Before(active[j].end)
}
return active[i].start.Before(active[j].start)
})
merged := make([]interval, 0, len(active))
for _, span := range active {
if len(merged) == 0 {
merged = append(merged, span)
continue
}
last := &merged[len(merged)-1]
if !span.start.After(last.end) {
if span.end.After(last.end) {
last.end = span.end
}
continue
}
merged = append(merged, span)
}
segments := make([]chartTimelineSegment, 0, len(merged)*2+1)
cursor := start
for _, span := range merged {
if span.start.After(cursor) {
segments = append(segments, chartTimelineSegment{Start: cursor, End: span.start, Active: false})
}
segments = append(segments, chartTimelineSegment{Start: span.start, End: span.end, Active: true})
cursor = span.end
}
if cursor.Before(end) {
segments = append(segments, chartTimelineSegment{Start: cursor, End: end, Active: false})
}
if len(segments) == 0 {
segments = append(segments, chartTimelineSegment{Start: start, End: end, Active: false})
}
return segments
}
func sampleTimes(samples []platform.LiveMetricSample) []time.Time {
times := make([]time.Time, 0, len(samples))
for _, sample := range samples {
times = append(times, sample.Timestamp)
}
return times
}
func singleAxisChartScale(datasets [][]float64, yMin, yMax *float64) chartScale {
min, max := 0.0, 1.0
if yMin != nil && yMax != nil {
min, max = *yMin, *yMax
} else {
min, max = chartSeriesBounds(flattenDatasets(datasets))
if yMin != nil {
min = *yMin
}
if yMax != nil {
max = *yMax
}
}
ticks := chartNiceTicks(min, max, 8)
return chartScale{Min: ticks[0], Max: ticks[len(ticks)-1], Ticks: ticks}
}
func flattenDatasets(datasets [][]float64) []float64 {
total := 0
for _, ds := range datasets {
total += len(ds)
}
out := make([]float64, 0, total)
for _, ds := range datasets {
out = append(out, ds...)
}
return out
}
func singleAxisChartLayout(canvasHeight int, seriesCount int) chartLayout {
legendRows := 0
if chartLegendVisible(seriesCount) && seriesCount > 0 {
cols := 4
if seriesCount < cols {
cols = seriesCount
}
legendRows = (seriesCount + cols - 1) / cols
}
legendHeight := 0
if legendRows > 0 {
legendHeight = legendRows*24 + 24
}
return chartLayout{
Width: 1400,
Height: canvasHeight,
PlotLeft: 96,
PlotRight: 1352,
PlotTop: 72,
PlotBottom: canvasHeight - 60 - legendHeight,
}
}
func chartTimeBounds(times []time.Time) (time.Time, time.Time) {
if len(times) == 0 {
return time.Time{}, time.Time{}
}
start := times[0].UTC()
end := start
for _, ts := range times[1:] {
t := ts.UTC()
if t.Before(start) {
start = t
}
if t.After(end) {
end = t
}
}
return start, end
}
func synthesizeChartTimes(times []time.Time, count int) []time.Time {
if count <= 0 {
return nil
}
if len(times) == count {
return times
}
if len(times) == 1 {
out := make([]time.Time, count)
for i := range out {
out[i] = times[0].Add(time.Duration(i) * time.Minute)
}
return out
}
base := time.Now().UTC().Add(-time.Duration(count-1) * time.Minute)
out := make([]time.Time, count)
for i := range out {
out[i] = base.Add(time.Duration(i) * time.Minute)
}
return out
}
func writeSVGOpen(b *strings.Builder, width, height int) {
fmt.Fprintf(b, `<svg xmlns="http://www.w3.org/2000/svg" width="%d" height="%d" viewBox="0 0 %d %d">`+"\n", width, height, width, height)
}
func writeSVGClose(b *strings.Builder) {
b.WriteString("</svg>\n")
}
func writeChartFrame(b *strings.Builder, title string, width, height int) {
fmt.Fprintf(b, `<rect width="%d" height="%d" rx="10" ry="10" fill="#ffffff" stroke="#d7e0ea"/>`+"\n", width, height)
fmt.Fprintf(b, `<text x="%d" y="30" text-anchor="middle" font-family="sans-serif" font-size="16" font-weight="700" fill="#1f2937">%s</text>`+"\n",
width/2, sanitizeChartText(title))
}
func writePlotBorder(b *strings.Builder, layout chartLayout) {
fmt.Fprintf(b, `<rect x="%d" y="%d" width="%d" height="%d" fill="none" stroke="#cbd5e1" stroke-width="1"/>`+"\n",
layout.PlotLeft, layout.PlotTop, layout.PlotRight-layout.PlotLeft, layout.PlotBottom-layout.PlotTop)
}
func writeHorizontalGrid(b *strings.Builder, layout chartLayout, scale chartScale) {
b.WriteString(`<g stroke="#e2e8f0" stroke-width="1">` + "\n")
for _, tick := range scale.Ticks {
y := chartYForValue(tick, scale, layout.PlotTop, layout.PlotBottom)
fmt.Fprintf(b, `<line x1="%d" y1="%.1f" x2="%d" y2="%.1f"/>`+"\n",
layout.PlotLeft, y, layout.PlotRight, y)
}
b.WriteString(`</g>` + "\n")
}
func writeVerticalGrid(b *strings.Builder, layout chartLayout, times []time.Time, pointCount, target int) {
if pointCount <= 0 {
return
}
start, end := chartTimeBounds(times)
b.WriteString(`<g stroke="#edf2f7" stroke-width="1">` + "\n")
for _, idx := range gpuChartLabelIndices(pointCount, target) {
ts := chartPointTime(times, idx)
x := chartXForTime(ts, start, end, layout.PlotLeft, layout.PlotRight)
fmt.Fprintf(b, `<line x1="%.1f" y1="%d" x2="%.1f" y2="%d"/>`+"\n",
x, layout.PlotTop, x, layout.PlotBottom)
}
b.WriteString(`</g>` + "\n")
}
func writeSingleAxisY(b *strings.Builder, layout chartLayout, scale chartScale) {
fmt.Fprintf(b, `<line x1="%d" y1="%d" x2="%d" y2="%d" stroke="#64748b" stroke-width="1"/>`+"\n",
layout.PlotLeft, layout.PlotTop, layout.PlotLeft, layout.PlotBottom)
for _, tick := range scale.Ticks {
y := chartYForValue(tick, scale, layout.PlotTop, layout.PlotBottom)
fmt.Fprintf(b, `<line x1="%d" y1="%.1f" x2="%d" y2="%.1f" stroke="#64748b" stroke-width="1"/>`+"\n",
layout.PlotLeft, y, layout.PlotLeft-6, y)
fmt.Fprintf(b, `<text x="%d" y="%.1f" text-anchor="end" dy="4" font-family="sans-serif" font-size="10" fill="#475569">%s</text>`+"\n",
layout.PlotLeft-10, y, sanitizeChartText(chartYAxisNumber(tick)))
}
}
func writeXAxisLabels(b *strings.Builder, layout chartLayout, times []time.Time, labels []string, start, end time.Time, target int) {
pointCount := len(labels)
if len(times) > pointCount {
pointCount = len(times)
}
b.WriteString(`<g font-family="sans-serif" font-size="11" fill="#64748b" text-anchor="middle">` + "\n")
for _, idx := range gpuChartLabelIndices(pointCount, target) {
x := chartXForTime(chartPointTime(times, idx), start, end, layout.PlotLeft, layout.PlotRight)
label := ""
if idx < len(labels) {
label = labels[idx]
}
fmt.Fprintf(b, `<text x="%.1f" y="%d">%s</text>`+"\n", x, layout.PlotBottom+28, sanitizeChartText(label))
}
b.WriteString(`</g>` + "\n")
fmt.Fprintf(b, `<text x="%d" y="%d" text-anchor="middle" font-family="sans-serif" font-size="12" fill="#64748b">Time</text>`+"\n",
(layout.PlotLeft+layout.PlotRight)/2, layout.PlotBottom+48)
}
func writeSeriesPolyline(b *strings.Builder, layout chartLayout, times []time.Time, start, end time.Time, values []float64, scale chartScale, color string) {
if len(values) == 0 {
return
}
var points strings.Builder
for idx, value := range values {
if idx > 0 {
points.WriteByte(' ')
}
x := chartXForTime(chartPointTime(times, idx), start, end, layout.PlotLeft, layout.PlotRight)
y := chartYForValue(value, scale, layout.PlotTop, layout.PlotBottom)
points.WriteString(strconv.FormatFloat(x, 'f', 1, 64))
points.WriteByte(',')
points.WriteString(strconv.FormatFloat(y, 'f', 1, 64))
}
fmt.Fprintf(b, `<polyline points="%s" fill="none" stroke="%s" stroke-width="2.2" stroke-linejoin="round" stroke-linecap="round"/>`+"\n",
points.String(), color)
if len(values) == 1 {
x := chartXForTime(chartPointTime(times, 0), start, end, layout.PlotLeft, layout.PlotRight)
y := chartYForValue(values[0], scale, layout.PlotTop, layout.PlotBottom)
fmt.Fprintf(b, `<circle cx="%.1f" cy="%.1f" r="3.5" fill="%s"/>`+"\n", x, y, color)
}
}
func writeLegend(b *strings.Builder, layout chartLayout, series []metricChartSeries) {
if !chartLegendVisible(len(series)) || len(series) == 0 {
return
}
cols := 4
if len(series) < cols {
cols = len(series)
}
cellWidth := float64(layout.PlotRight-layout.PlotLeft) / float64(cols)
baseY := layout.PlotBottom + 74
for i, item := range series {
row := i / cols
col := i % cols
x := float64(layout.PlotLeft) + cellWidth*float64(col) + 8
y := float64(baseY + row*24)
fmt.Fprintf(b, `<line x1="%.1f" y1="%.1f" x2="%.1f" y2="%.1f" stroke="%s" stroke-width="3"/>`+"\n",
x, y, x+28, y, item.Color)
fmt.Fprintf(b, `<text x="%.1f" y="%.1f" font-family="sans-serif" font-size="12" fill="#1f2937">%s</text>`+"\n",
x+38, y+4, sanitizeChartText(item.Name))
}
}
func writeTimelineIdleSpans(b *strings.Builder, layout chartLayout, start, end time.Time, segments []chartTimelineSegment) {
if len(segments) == 0 {
return
}
b.WriteString(`<g data-role="timeline-overlay">` + "\n")
for _, segment := range segments {
if segment.Active || !segment.End.After(segment.Start) {
continue
}
x0 := chartXForTime(segment.Start, start, end, layout.PlotLeft, layout.PlotRight)
x1 := chartXForTime(segment.End, start, end, layout.PlotLeft, layout.PlotRight)
fmt.Fprintf(b, `<rect x="%.1f" y="%d" width="%.1f" height="%d" fill="#475569" opacity="0.10"/>`+"\n",
x0, layout.PlotTop, math.Max(1, x1-x0), layout.PlotBottom-layout.PlotTop)
}
b.WriteString(`</g>` + "\n")
}
func writeTimelineBoundaries(b *strings.Builder, layout chartLayout, start, end time.Time, segments []chartTimelineSegment) {
if len(segments) == 0 {
return
}
seen := map[int]bool{}
b.WriteString(`<g data-role="timeline-boundaries" stroke="#94a3b8" stroke-width="1.2">` + "\n")
for i, segment := range segments {
if i > 0 {
x := int(math.Round(chartXForTime(segment.Start, start, end, layout.PlotLeft, layout.PlotRight)))
if !seen[x] {
seen[x] = true
fmt.Fprintf(b, `<line x1="%d" y1="%d" x2="%d" y2="%d"/>`+"\n", x, layout.PlotTop, x, layout.PlotBottom)
}
}
if i < len(segments)-1 {
x := int(math.Round(chartXForTime(segment.End, start, end, layout.PlotLeft, layout.PlotRight)))
if !seen[x] {
seen[x] = true
fmt.Fprintf(b, `<line x1="%d" y1="%d" x2="%d" y2="%d"/>`+"\n", x, layout.PlotTop, x, layout.PlotBottom)
}
}
}
b.WriteString(`</g>` + "\n")
}
func chartXForTime(ts, start, end time.Time, left, right int) float64 {
if !end.After(start) {
return float64(left+right) / 2
}
if ts.Before(start) {
ts = start
}
if ts.After(end) {
ts = end
}
ratio := float64(ts.Sub(start)) / float64(end.Sub(start))
return float64(left) + ratio*float64(right-left)
}
func chartPointTime(times []time.Time, idx int) time.Time {
if idx >= 0 && idx < len(times) && !times[idx].IsZero() {
return times[idx].UTC()
}
if len(times) > 0 && !times[0].IsZero() {
return times[0].UTC().Add(time.Duration(idx) * time.Minute)
}
return time.Now().UTC().Add(time.Duration(idx) * time.Minute)
}
func chartYForValue(value float64, scale chartScale, plotTop, plotBottom int) float64 {
if scale.Max <= scale.Min {
return float64(plotTop+plotBottom) / 2
}
return float64(plotBottom) - (value-scale.Min)/(scale.Max-scale.Min)*float64(plotBottom-plotTop)
}
func chartSeriesBounds(values []float64) (float64, float64) {
if len(values) == 0 {
return 0, 1
}
min, max := values[0], values[0]
for _, value := range values[1:] {
if value < min {
min = value
}
if value > max {
max = value
}
}
if min == max {
if max == 0 {
return 0, 1
}
pad := math.Abs(max) * 0.1
if pad == 0 {
pad = 1
}
min -= pad
max += pad
}
if min > 0 {
pad := (max - min) * 0.2
if pad == 0 {
pad = max * 0.1
}
min -= pad
if min < 0 {
min = 0
}
max += pad
}
return min, max
}
func chartNiceTicks(min, max float64, target int) []float64 {
if min == max {
max = min + 1
}
span := max - min
step := math.Pow(10, math.Floor(math.Log10(span/float64(target))))
for _, factor := range []float64{1, 2, 5, 10} {
if span/(factor*step) <= float64(target)*1.5 {
step = factor * step
break
}
}
low := math.Floor(min/step) * step
high := math.Ceil(max/step) * step
var ticks []float64
for value := low; value <= high+step*0.001; value += step {
ticks = append(ticks, math.Round(value*1e9)/1e9)
}
return ticks
}
func valueClamp(value float64, scale chartScale) float64 {
if value < scale.Min {
return scale.Min
}
if value > scale.Max {
return scale.Max
}
return value
}

View File

@@ -232,7 +232,7 @@ func truncate(s string, max int) string {
// isSATTarget returns true for task targets that run hardware acceptance tests.
func isSATTarget(target string) bool {
switch target {
case "nvidia", "nvidia-stress", "memory", "memory-stress", "storage",
case "nvidia", "nvidia-benchmark", "nvidia-stress", "memory", "memory-stress", "storage",
"cpu", "sat-stress", "amd", "amd-mem", "amd-bandwidth", "amd-stress",
"platform-stress":
return true

View File

@@ -91,6 +91,7 @@ func layoutNav(active string, buildLabel string) string {
{"audit", "Audit", "/audit", ""},
{"validate", "Validate", "/validate", ""},
{"burn", "Burn", "/burn", ""},
{"benchmark", "Benchmark", "/benchmark", ""},
{"tasks", "Tasks", "/tasks", ""},
{"tools", "Tools", "/tools", ""},
}
@@ -140,6 +141,10 @@ func renderPage(page string, opts HandlerOptions) string {
pageID = "burn"
title = "Burn"
body = renderBurn()
case "benchmark":
pageID = "benchmark"
title = "Benchmark"
body = renderBenchmark()
case "tasks":
pageID = "tasks"
title = "Tasks"
@@ -553,6 +558,21 @@ func renderMetrics() string {
<script>
let gpuChartKey = '';
const gpuChartModeStorageKey = 'bee.metrics.gpuChartMode';
function loadGPUChartModePreference() {
try {
return sessionStorage.getItem(gpuChartModeStorageKey) === 'per-gpu';
} catch (_) {
return false;
}
}
function saveGPUChartModePreference(perGPU) {
try {
sessionStorage.setItem(gpuChartModeStorageKey, perGPU ? 'per-gpu' : 'per-metric');
} catch (_) {}
}
function refreshChartImage(el) {
if (!el || el.dataset.loading === '1') return;
@@ -633,10 +653,19 @@ function loadMetricsLayout() {
fetch('/api/metrics/latest').then(function(r) { return r.json(); }).then(syncMetricsLayout).catch(function() {});
}
document.getElementById('gpu-chart-toggle').addEventListener('change', function() {
applyGPUChartMode();
refreshCharts();
});
const gpuChartToggle = document.getElementById('gpu-chart-toggle');
if (gpuChartToggle) {
gpuChartToggle.checked = loadGPUChartModePreference();
}
applyGPUChartMode();
if (gpuChartToggle) {
gpuChartToggle.addEventListener('change', function() {
saveGPUChartModePreference(!!gpuChartToggle.checked);
applyGPUChartMode();
refreshCharts();
});
}
loadMetricsLayout();
setInterval(refreshCharts, 3000);
@@ -757,6 +786,193 @@ func renderSATCard(id, label, extra string) string {
label, extra, id, id)
}
// ── Benchmark ─────────────────────────────────────────────────────────────────
func renderBenchmark() string {
return `<p style="color:var(--muted);font-size:13px;margin-bottom:16px">Benchmark runs generate a human-readable TXT report and machine-readable result bundle. Tasks continue in the background — view progress in <a href="/tasks">Tasks</a>.</p>
<div class="grid2">
<div class="card">
<div class="card-head">NVIDIA Benchmark</div>
<div class="card-body">
<div class="form-row">
<label>Profile</label>
<select id="benchmark-profile">
<option value="standard" selected>Standard — about 15 minutes</option>
<option value="stability">Stability — 1 to 2 hours</option>
<option value="overnight">Overnight — 8 hours</option>
</select>
</div>
<div class="form-row">
<label>GPU Selection</label>
<div style="display:flex;gap:8px;flex-wrap:wrap;margin-bottom:8px">
<button class="btn btn-sm btn-secondary" type="button" onclick="benchmarkSelectAll()">Select All</button>
<button class="btn btn-sm btn-secondary" type="button" onclick="benchmarkSelectNone()">Clear</button>
</div>
<div id="benchmark-gpu-list" style="border:1px solid var(--border);border-radius:4px;padding:12px;min-height:88px">
<p style="color:var(--muted);font-size:13px">Loading NVIDIA GPUs...</p>
</div>
</div>
<label class="benchmark-cb-row">
<input type="checkbox" id="benchmark-run-nccl" checked>
<span>Run multi-GPU interconnect step (NCCL) only on the selected GPUs</span>
</label>
<p id="benchmark-selection-note" style="font-size:12px;color:var(--muted);margin:10px 0 14px">Select one GPU for single-card benchmarking or several GPUs for a constrained multi-GPU run.</p>
<button id="benchmark-run-btn" class="btn btn-primary" onclick="runNvidiaBenchmark()" disabled>&#9654; Run Benchmark</button>
<span id="benchmark-run-status" style="margin-left:10px;font-size:12px;color:var(--muted)"></span>
</div>
</div>
<div class="card">
<div class="card-head">Method</div>
<div class="card-body">
<p style="font-size:13px;color:var(--muted);margin-bottom:10px">Each benchmark run performs warmup, sustained compute, telemetry capture, cooldown, and optional NCCL interconnect checks.</p>
<table>
<tr><th>Profile</th><th>Purpose</th></tr>
<tr><td>Standard</td><td>Fast, repeatable performance check for server-to-server comparison.</td></tr>
<tr><td>Stability</td><td>Longer run for thermal drift, power caps, and clock instability.</td></tr>
<tr><td>Overnight</td><td>Extended verification of long-run stability and late throttling.</td></tr>
</table>
</div>
</div>
</div>
<div id="benchmark-output" style="display:none;margin-top:16px" class="card">
<div class="card-head">Benchmark Output <span id="benchmark-title"></span></div>
<div class="card-body"><div id="benchmark-terminal" class="terminal"></div></div>
</div>
<style>
.benchmark-cb-row { display:flex; align-items:flex-start; gap:8px; cursor:pointer; font-size:13px; }
.benchmark-cb-row input[type=checkbox] { width:16px; height:16px; margin-top:2px; flex-shrink:0; }
.benchmark-gpu-row { display:flex; align-items:flex-start; gap:8px; padding:6px 0; cursor:pointer; font-size:13px; }
.benchmark-gpu-row input[type=checkbox] { width:16px; height:16px; margin-top:2px; flex-shrink:0; }
</style>
<script>
let benchmarkES = null;
function benchmarkSelectedGPUIndices() {
return Array.from(document.querySelectorAll('.benchmark-gpu-checkbox'))
.filter(function(el) { return el.checked && !el.disabled; })
.map(function(el) { return parseInt(el.value, 10); })
.filter(function(v) { return !Number.isNaN(v); })
.sort(function(a, b) { return a - b; });
}
function benchmarkUpdateSelectionNote() {
const selected = benchmarkSelectedGPUIndices();
const btn = document.getElementById('benchmark-run-btn');
const note = document.getElementById('benchmark-selection-note');
const nccl = document.getElementById('benchmark-run-nccl');
if (!selected.length) {
btn.disabled = true;
note.textContent = 'Select at least one NVIDIA GPU to run the benchmark.';
return;
}
btn.disabled = false;
note.textContent = 'Selected GPUs: ' + selected.join(', ') + '.';
if (nccl && nccl.checked && selected.length < 2) {
note.textContent += ' NCCL will be skipped because fewer than 2 GPUs are selected.';
} else if (nccl && nccl.checked) {
note.textContent += ' NCCL interconnect will use only these GPUs.';
}
}
function benchmarkRenderGPUList(gpus) {
const root = document.getElementById('benchmark-gpu-list');
if (!gpus || !gpus.length) {
root.innerHTML = '<p style="color:var(--muted);font-size:13px">No NVIDIA GPUs detected.</p>';
benchmarkUpdateSelectionNote();
return;
}
root.innerHTML = gpus.map(function(gpu) {
const mem = gpu.memory_mb > 0 ? ' · ' + gpu.memory_mb + ' MiB' : '';
return '<label class="benchmark-gpu-row">'
+ '<input class="benchmark-gpu-checkbox" type="checkbox" value="' + gpu.index + '" checked onchange="benchmarkUpdateSelectionNote()">'
+ '<span><strong>GPU ' + gpu.index + '</strong> — ' + gpu.name + mem + '</span>'
+ '</label>';
}).join('');
benchmarkUpdateSelectionNote();
}
function benchmarkLoadGPUs() {
const status = document.getElementById('benchmark-run-status');
status.textContent = '';
fetch('/api/gpu/nvidia').then(function(r) {
return r.json().then(function(body) {
if (!r.ok) throw new Error(body.error || ('HTTP ' + r.status));
return body;
});
}).then(function(gpus) {
benchmarkRenderGPUList(gpus);
}).catch(function(err) {
document.getElementById('benchmark-gpu-list').innerHTML = '<p style="color:var(--crit-fg);font-size:13px">Error: ' + err.message + '</p>';
benchmarkUpdateSelectionNote();
});
}
function benchmarkSelectAll() {
document.querySelectorAll('.benchmark-gpu-checkbox').forEach(function(el) { el.checked = true; });
benchmarkUpdateSelectionNote();
}
function benchmarkSelectNone() {
document.querySelectorAll('.benchmark-gpu-checkbox').forEach(function(el) { el.checked = false; });
benchmarkUpdateSelectionNote();
}
function runNvidiaBenchmark() {
const selected = benchmarkSelectedGPUIndices();
const status = document.getElementById('benchmark-run-status');
if (!selected.length) {
status.textContent = 'Select at least one GPU.';
return;
}
if (benchmarkES) { benchmarkES.close(); benchmarkES = null; }
const body = {
profile: document.getElementById('benchmark-profile').value || 'standard',
gpu_indices: selected,
run_nccl: !!document.getElementById('benchmark-run-nccl').checked,
display_name: 'NVIDIA Benchmark'
};
document.getElementById('benchmark-output').style.display = 'block';
document.getElementById('benchmark-title').textContent = '— ' + body.profile + ' [' + selected.join(', ') + ']';
const term = document.getElementById('benchmark-terminal');
term.textContent = 'Enqueuing benchmark for GPUs ' + selected.join(', ') + '...\n';
status.textContent = 'Queueing...';
fetch('/api/benchmark/nvidia/run', {
method: 'POST',
headers: {'Content-Type':'application/json'},
body: JSON.stringify(body)
}).then(function(r) {
return r.json().then(function(payload) {
if (!r.ok) throw new Error(payload.error || ('HTTP ' + r.status));
return payload;
});
}).then(function(d) {
status.textContent = 'Task ' + d.task_id + ' queued.';
term.textContent += 'Task ' + d.task_id + ' queued. Streaming log...\n';
benchmarkES = new EventSource('/api/tasks/' + d.task_id + '/stream');
benchmarkES.onmessage = function(e) { term.textContent += e.data + '\n'; term.scrollTop = term.scrollHeight; };
benchmarkES.addEventListener('done', function(e) {
benchmarkES.close();
benchmarkES = null;
term.textContent += (e.data ? '\nERROR: ' + e.data : '\nCompleted.') + '\n';
term.scrollTop = term.scrollHeight;
status.textContent = e.data ? 'Failed.' : 'Completed.';
});
}).catch(function(err) {
status.textContent = 'Error.';
term.textContent += 'ERROR: ' + err.message + '\n';
});
}
document.getElementById('benchmark-run-nccl').addEventListener('change', benchmarkUpdateSelectionNote);
benchmarkLoadGPUs();
</script>`
}
// ── Burn ──────────────────────────────────────────────────────────────────────
func renderBurn() string {
@@ -781,11 +997,12 @@ func renderBurn() string {
<div class="card">
<div class="card-head">GPU Stress</div>
<div class="card-body">
<p style="font-size:12px;color:var(--muted);margin:0 0 10px">Tests run on all GPUs in the system. Availability determined by driver status.</p>
<p style="font-size:12px;color:var(--muted);margin:0 0 10px">NVIDIA tools run on all discovered GPUs. DCGM is the official NVIDIA diagnostic path. NCCL exercises multi-GPU fabric and is not a full compute burn.</p>
<div id="gpu-tools-list">
<label class="cb-row"><input type="checkbox" id="burn-gpu-bee" value="bee-gpu-burn" disabled><span>bee-gpu-burn <span class="cb-note" id="note-bee"></span></span></label>
<label class="cb-row"><input type="checkbox" id="burn-gpu-dcgm" value="dcgm" disabled><span>DCGM Diagnostics (Official NVIDIA) <span class="cb-note" id="note-dcgm"></span></span></label>
<label class="cb-row"><input type="checkbox" id="burn-gpu-john" value="john" disabled><span>John the Ripper (OpenCL) <span class="cb-note" id="note-john"></span></span></label>
<label class="cb-row"><input type="checkbox" id="burn-gpu-nccl" value="nccl" disabled><span>NCCL all_reduce_perf <span class="cb-note" id="note-nccl"></span></span></label>
<label class="cb-row"><input type="checkbox" id="burn-gpu-nccl" value="nccl" disabled><span>NCCL all_reduce_perf (Interconnect) <span class="cb-note" id="note-nccl"></span></span></label>
<label class="cb-row"><input type="checkbox" id="burn-gpu-rvs" value="rvs" disabled><span>RVS GST (AMD) <span class="cb-note" id="note-rvs"></span></span></label>
</div>
<button class="btn btn-primary" style="margin-top:10px" onclick="runGPUStress()">&#9654; Run GPU Stress</button>
@@ -857,17 +1074,18 @@ function streamTask(taskId, label) {
}
function runGPUStress() {
const ids = ['burn-gpu-bee','burn-gpu-john','burn-gpu-nccl','burn-gpu-rvs'];
const loaderMap = {'burn-gpu-bee':'builtin','burn-gpu-john':'john','burn-gpu-nccl':'nccl','burn-gpu-rvs':'rvs'};
const targetMap = {'burn-gpu-bee':'nvidia-stress','burn-gpu-john':'nvidia-stress','burn-gpu-nccl':'nvidia-stress','burn-gpu-rvs':'amd-stress'};
let last = null;
ids.filter(id => {
const el = document.getElementById(id);
const tasks = [
{id:'burn-gpu-bee', target:'nvidia-stress', label:'bee-gpu-burn', extra:{loader:'builtin'}},
{id:'burn-gpu-dcgm', target:'nvidia', label:'DCGM Diagnostics (Official NVIDIA)', extra:{display_name:'NVIDIA DCGM Diagnostics (Official)'}},
{id:'burn-gpu-john', target:'nvidia-stress', label:'John GPU Stress', extra:{loader:'john'}},
{id:'burn-gpu-nccl', target:'nvidia-stress', label:'NCCL Interconnect Stress', extra:{loader:'nccl', display_name:'NCCL Interconnect Stress'}},
{id:'burn-gpu-rvs', target:'amd-stress', label:'RVS GST', extra:{}},
];
tasks.filter(t => {
const el = document.getElementById(t.id);
return el && el.checked && !el.disabled;
}).forEach(id => {
const target = targetMap[id];
const extra = target === 'nvidia-stress' ? {loader: loaderMap[id]} : {};
enqueueTask(target, extra).then(d => { last = d; streamTask(d.task_id, target + ' / ' + loaderMap[id]); });
}).forEach(t => {
enqueueTask(t.target, t.extra).then(d => { streamTask(d.task_id, t.label); });
});
}
@@ -904,13 +1122,15 @@ function runAll() {
const done = () => { count++; status.textContent = count + ' tasks queued.'; };
// GPU tests
const gpuIds = ['burn-gpu-bee','burn-gpu-john','burn-gpu-nccl','burn-gpu-rvs'];
const loaderMap = {'burn-gpu-bee':'builtin','burn-gpu-john':'john','burn-gpu-nccl':'nccl','burn-gpu-rvs':'rvs'};
const gpuTargetMap = {'burn-gpu-bee':'nvidia-stress','burn-gpu-john':'nvidia-stress','burn-gpu-nccl':'nvidia-stress','burn-gpu-rvs':'amd-stress'};
gpuIds.filter(id => { const el = document.getElementById(id); return el && el.checked && !el.disabled; }).forEach(id => {
const target = gpuTargetMap[id];
const extra = target === 'nvidia-stress' ? {loader: loaderMap[id]} : {};
enqueueTask(target, extra).then(d => { streamTask(d.task_id, target); done(); });
const gpuTasks = [
{id:'burn-gpu-bee', target:'nvidia-stress', label:'bee-gpu-burn', extra:{loader:'builtin'}},
{id:'burn-gpu-dcgm', target:'nvidia', label:'DCGM Diagnostics (Official NVIDIA)', extra:{display_name:'NVIDIA DCGM Diagnostics (Official)'}},
{id:'burn-gpu-john', target:'nvidia-stress', label:'John GPU Stress', extra:{loader:'john'}},
{id:'burn-gpu-nccl', target:'nvidia-stress', label:'NCCL Interconnect Stress', extra:{loader:'nccl', display_name:'NCCL Interconnect Stress'}},
{id:'burn-gpu-rvs', target:'amd-stress', label:'RVS GST', extra:{}},
];
gpuTasks.filter(t => { const el = document.getElementById(t.id); return el && el.checked && !el.disabled; }).forEach(t => {
enqueueTask(t.target, t.extra).then(d => { streamTask(d.task_id, t.label); done(); });
});
// Compute tests
@@ -931,17 +1151,19 @@ function runAll() {
// Load GPU tool availability
fetch('/api/gpu/tools').then(r => r.json()).then(tools => {
const nvidiaMap = {'bee-gpu-burn':'burn-gpu-bee','john':'burn-gpu-john','nccl':'burn-gpu-nccl','rvs':'burn-gpu-rvs'};
const noteMap = {'bee-gpu-burn':'note-bee','john':'note-john','nccl':'note-nccl','rvs':'note-rvs'};
const nvidiaMap = {'bee-gpu-burn':'burn-gpu-bee','dcgm':'burn-gpu-dcgm','john':'burn-gpu-john','nccl':'burn-gpu-nccl','rvs':'burn-gpu-rvs'};
const noteMap = {'bee-gpu-burn':'note-bee','dcgm':'note-dcgm','john':'note-john','nccl':'note-nccl','rvs':'note-rvs'};
tools.forEach(t => {
const cb = document.getElementById(nvidiaMap[t.id]);
const note = document.getElementById(noteMap[t.id]);
if (!cb) return;
if (t.available) {
cb.disabled = false;
if (t.id === 'bee-gpu-burn') cb.checked = true;
if (t.id === 'bee-gpu-burn' || t.id === 'dcgm') cb.checked = true;
} else {
const reason = t.vendor === 'nvidia' ? 'NVIDIA driver not running' : 'AMD driver not running';
let reason = t.vendor === 'nvidia' ? 'NVIDIA driver not running' : 'AMD driver not running';
if (t.id === 'dcgm' && t.vendor === 'nvidia') reason = 'dcgmi not available or NVIDIA driver not running';
if (t.id === 'nccl' && t.vendor === 'nvidia') reason = 'NCCL interconnect tool unavailable or NVIDIA driver not running';
if (note) note.textContent = '— ' + reason;
}
});
@@ -1101,7 +1323,8 @@ func renderNetwork() string {
// ── Services ──────────────────────────────────────────────────────────────────
func renderServicesInline() string {
return `<div style="display:flex;justify-content:flex-end;gap:8px;flex-wrap:wrap;margin-bottom:8px"><button class="btn btn-sm btn-secondary" onclick="restartGPUDrivers()">Restart GPU Drivers</button><button class="btn btn-sm btn-secondary" onclick="loadServices()">&#8635; Refresh</button></div>
return `<p style="font-size:13px;color:var(--muted);margin-bottom:10px">` + html.EscapeString(`bee-selfheal.timer is expected to be active; the oneshot bee-selfheal.service itself is not shown as a long-running service.`) + `</p>
<div style="display:flex;justify-content:flex-end;gap:8px;flex-wrap:wrap;margin-bottom:8px"><button class="btn btn-sm btn-secondary" onclick="restartGPUDrivers()">Restart GPU Drivers</button><button class="btn btn-sm btn-secondary" onclick="loadServices()">&#8635; Refresh</button></div>
<div id="svc-table"><p style="color:var(--muted);font-size:13px">Loading...</p></div>
<div id="svc-out" style="display:none;margin-top:8px" class="card">
<div class="card-head">Output</div>
@@ -1127,7 +1350,7 @@ function loadServices() {
'</td></tr>';
}).join('');
document.getElementById('svc-table').innerHTML =
'<table><tr><th>Service</th><th>Status</th><th>Actions</th></tr>'+rows+'</table>';
'<table><tr><th>Unit</th><th>Status</th><th>Actions</th></tr>'+rows+'</table>';
});
}
function toggleBody(id) {

View File

@@ -8,7 +8,6 @@ import (
"html"
"io"
"log/slog"
"math"
"mime"
"net"
"net/http"
@@ -16,7 +15,6 @@ import (
"path/filepath"
"runtime/debug"
"sort"
"strconv"
"strings"
"sync"
"time"
@@ -24,7 +22,6 @@ import (
"bee/audit/internal/app"
"bee/audit/internal/platform"
"bee/audit/internal/runtimeenv"
gocharts "github.com/go-analyze/charts"
"reanimator/chart/viewer"
"reanimator/chart/web"
)
@@ -253,6 +250,7 @@ func NewHandler(opts HandlerOptions) http.Handler {
mux.HandleFunc("POST /api/sat/platform-stress/run", h.handleAPISATRun("platform-stress"))
mux.HandleFunc("GET /api/sat/stream", h.handleAPISATStream)
mux.HandleFunc("POST /api/sat/abort", h.handleAPISATAbort)
mux.HandleFunc("POST /api/benchmark/nvidia/run", h.handleAPIBenchmarkNvidiaRun)
// Tasks
mux.HandleFunc("GET /api/tasks", h.handleAPITasksList)
@@ -289,6 +287,7 @@ func NewHandler(opts HandlerOptions) http.Handler {
// GPU presence / tools
mux.HandleFunc("GET /api/gpu/presence", h.handleAPIGPUPresence)
mux.HandleFunc("GET /api/gpu/nvidia", h.handleAPIGNVIDIAGPUs)
mux.HandleFunc("GET /api/gpu/tools", h.handleAPIGPUTools)
// System
@@ -555,13 +554,14 @@ func (h *handler) handleMetricsChartSVG(w http.ResponseWriter, r *http.Request)
http.Error(w, "metrics database not available", http.StatusServiceUnavailable)
return
}
samples, err := h.metricsDB.LoadAll()
if err != nil || len(samples) == 0 {
http.Error(w, "metrics history unavailable", http.StatusServiceUnavailable)
return
}
timeline := metricsTimelineSegments(samples, time.Now())
if idx, sub, ok := parseGPUChartPath(path); ok && sub == "overview" {
samples, err := h.metricsDB.LoadAll()
if err != nil || len(samples) == 0 {
http.Error(w, "metrics history unavailable", http.StatusServiceUnavailable)
return
}
buf, ok, err := renderGPUOverviewChartSVG(idx, samples)
buf, ok, err := renderGPUOverviewChartSVG(idx, samples, timeline)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
@@ -575,13 +575,23 @@ func (h *handler) handleMetricsChartSVG(w http.ResponseWriter, r *http.Request)
_, _ = w.Write(buf)
return
}
datasets, names, labels, title, yMin, yMax, ok := h.chartDataFromDB(path)
datasets, names, labels, title, yMin, yMax, ok := chartDataFromSamples(path, samples)
if !ok {
http.Error(w, "metrics history unavailable", http.StatusServiceUnavailable)
return
}
buf, err := renderChartSVG(title, datasets, names, labels, yMin, yMax)
buf, err := renderMetricChartSVG(
title,
labels,
sampleTimes(samples),
datasets,
names,
yMin,
yMax,
chartCanvasHeightForPath(path, len(names)),
timeline,
)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
@@ -591,14 +601,6 @@ func (h *handler) handleMetricsChartSVG(w http.ResponseWriter, r *http.Request)
_, _ = w.Write(buf)
}
func (h *handler) chartDataFromDB(path string) ([][]float64, []string, []string, string, *float64, *float64, bool) {
samples, err := h.metricsDB.LoadAll()
if err != nil || len(samples) == 0 {
return nil, nil, nil, "", nil, nil, false
}
return chartDataFromSamples(path, samples)
}
func chartDataFromSamples(path string, samples []platform.LiveMetricSample) ([][]float64, []string, []string, string, *float64, *float64, bool) {
var datasets [][]float64
var names []string
@@ -996,247 +998,6 @@ func autoBounds120(datasets ...[]float64) (*float64, *float64) {
return floatPtr(low), floatPtr(high)
}
func renderGPUOverviewChartSVG(idx int, samples []platform.LiveMetricSample) ([]byte, bool, error) {
temp := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.TempC })
power := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.PowerW })
coreClock := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.ClockMHz })
memClock := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.MemClockMHz })
if temp == nil && power == nil && coreClock == nil && memClock == nil {
return nil, false, nil
}
labels := sampleTimeLabels(samples)
svg, err := drawGPUOverviewChartSVG(
fmt.Sprintf("GPU %d Overview", idx),
labels,
[]gpuOverviewSeries{
{Name: "Temp C", Values: coalesceDataset(temp, len(samples)), Color: "#f05a5a", AxisTitle: "Temp C"},
{Name: "Power W", Values: coalesceDataset(power, len(samples)), Color: "#ffb357", AxisTitle: "Power W"},
{Name: "Core Clock MHz", Values: coalesceDataset(coreClock, len(samples)), Color: "#73bf69", AxisTitle: "Core MHz"},
{Name: "Memory Clock MHz", Values: coalesceDataset(memClock, len(samples)), Color: "#5794f2", AxisTitle: "Memory MHz"},
},
)
if err != nil {
return nil, false, err
}
return svg, true, nil
}
type gpuOverviewSeries struct {
Name string
AxisTitle string
Color string
Values []float64
}
func drawGPUOverviewChartSVG(title string, labels []string, series []gpuOverviewSeries) ([]byte, error) {
if len(series) != 4 {
return nil, fmt.Errorf("gpu overview requires 4 series, got %d", len(series))
}
const (
width = 1400
height = 420
plotLeft = 180
plotRight = 1220
plotTop = 74
plotBottom = 292
)
const (
leftOuterAxis = 72
leftInnerAxis = 132
rightInnerAxis = 1268
rightOuterAxis = 1328
)
axisX := []int{leftOuterAxis, leftInnerAxis, rightInnerAxis, rightOuterAxis}
plotWidth := plotRight - plotLeft
plotHeight := plotBottom - plotTop
pointCount := len(labels)
if pointCount == 0 {
pointCount = 1
labels = []string{""}
}
for i := range series {
if len(series[i].Values) == 0 {
series[i].Values = make([]float64, pointCount)
}
}
type axisScale struct {
Min float64
Max float64
Ticks []float64
}
scales := make([]axisScale, len(series))
for i := range series {
min, max := gpuChartSeriesBounds(series[i].Values)
ticks := gpuChartNiceTicks(min, max, 8)
scales[i] = axisScale{
Min: ticks[0],
Max: ticks[len(ticks)-1],
Ticks: ticks,
}
}
xFor := func(index int) float64 {
if pointCount <= 1 {
return float64(plotLeft + plotWidth/2)
}
return float64(plotLeft) + float64(index)*float64(plotWidth)/float64(pointCount-1)
}
yFor := func(value float64, scale axisScale) float64 {
if scale.Max <= scale.Min {
return float64(plotTop + plotHeight/2)
}
return float64(plotBottom) - (value-scale.Min)/(scale.Max-scale.Min)*float64(plotHeight)
}
var b strings.Builder
b.WriteString(fmt.Sprintf(`<svg xmlns="http://www.w3.org/2000/svg" width="%d" height="%d" viewBox="0 0 %d %d">`, width, height, width, height))
b.WriteString("\n")
b.WriteString(`<rect width="100%" height="100%" rx="10" ry="10" fill="#111217" stroke="#2f3440"/>` + "\n")
b.WriteString(`<text x="700" y="28" text-anchor="middle" font-family="sans-serif" font-size="16" font-weight="700" fill="#f5f7fa">` + sanitizeChartText(title) + `</text>` + "\n")
b.WriteString(`<g stroke="#2f3440" stroke-width="1">` + "\n")
for _, tick := range scales[0].Ticks {
y := yFor(tick, scales[0])
fmt.Fprintf(&b, `<line x1="%d" y1="%.1f" x2="%d" y2="%.1f"/>`+"\n", plotLeft, y, plotRight, y)
}
for _, idx := range gpuChartLabelIndices(pointCount, 8) {
x := xFor(idx)
fmt.Fprintf(&b, `<line x1="%.1f" y1="%d" x2="%.1f" y2="%d"/>`+"\n", x, plotTop, x, plotBottom)
}
b.WriteString("</g>\n")
fmt.Fprintf(&b, `<rect x="%d" y="%d" width="%d" height="%d" fill="none" stroke="#454c5c" stroke-width="1"/>`+"\n",
plotLeft, plotTop, plotWidth, plotHeight)
for i, axisLineX := range axisX {
fmt.Fprintf(&b, `<line x1="%d" y1="%d" x2="%d" y2="%d" stroke="%s" stroke-width="1"/>`+"\n",
axisLineX, plotTop, axisLineX, plotBottom, series[i].Color)
fmt.Fprintf(&b, `<text x="%d" y="%d" text-anchor="middle" font-family="sans-serif" font-size="11" font-weight="700" fill="%s">%s</text>`+"\n",
axisLineX, 52, series[i].Color, sanitizeChartText(series[i].AxisTitle))
for _, tick := range scales[i].Ticks {
y := yFor(tick, scales[i])
label := sanitizeChartText(gpuChartFormatTick(tick))
if i < 2 {
fmt.Fprintf(&b, `<line x1="%d" y1="%.1f" x2="%d" y2="%.1f" stroke="%s" stroke-width="1"/>`+"\n",
axisLineX, y, axisLineX+6, y, series[i].Color)
fmt.Fprintf(&b, `<text x="%d" y="%.1f" text-anchor="end" dy="4" font-family="sans-serif" font-size="10" fill="%s">%s</text>`+"\n",
axisLineX-8, y, series[i].Color, label)
continue
}
fmt.Fprintf(&b, `<line x1="%d" y1="%.1f" x2="%d" y2="%.1f" stroke="%s" stroke-width="1"/>`+"\n",
axisLineX, y, axisLineX-6, y, series[i].Color)
fmt.Fprintf(&b, `<text x="%d" y="%.1f" text-anchor="start" dy="4" font-family="sans-serif" font-size="10" fill="%s">%s</text>`+"\n",
axisLineX+8, y, series[i].Color, label)
}
}
b.WriteString(`<g font-family="sans-serif" font-size="11" fill="#c8d0d8" text-anchor="middle">` + "\n")
for _, idx := range gpuChartLabelIndices(pointCount, 8) {
x := xFor(idx)
fmt.Fprintf(&b, `<text x="%.1f" y="%d">%s</text>`+"\n", x, plotBottom+22, sanitizeChartText(labels[idx]))
}
b.WriteString(`</g>` + "\n")
b.WriteString(`<text x="700" y="338" text-anchor="middle" font-family="sans-serif" font-size="12" fill="#c8d0d8">Time</text>` + "\n")
for i := range series {
var points strings.Builder
for j, value := range series[i].Values {
if j > 0 {
points.WriteByte(' ')
}
points.WriteString(strconv.FormatFloat(xFor(j), 'f', 1, 64))
points.WriteByte(',')
points.WriteString(strconv.FormatFloat(yFor(value, scales[i]), 'f', 1, 64))
}
fmt.Fprintf(&b, `<polyline points="%s" fill="none" stroke="%s" stroke-width="2"/>`+"\n",
points.String(), series[i].Color)
if len(series[i].Values) == 1 {
fmt.Fprintf(&b, `<circle cx="%.1f" cy="%.1f" r="3" fill="%s"/>`+"\n",
xFor(0), yFor(series[i].Values[0], scales[i]), series[i].Color)
}
}
const legendY = 372
legendX := []int{190, 470, 790, 1090}
for i := range series {
fmt.Fprintf(&b, `<line x1="%d" y1="%d" x2="%d" y2="%d" stroke="%s" stroke-width="3"/>`+"\n",
legendX[i], legendY, legendX[i]+28, legendY, series[i].Color)
fmt.Fprintf(&b, `<text x="%d" y="%d" font-family="sans-serif" font-size="12" fill="#f5f7fa">%s</text>`+"\n",
legendX[i]+38, legendY+4, sanitizeChartText(series[i].Name))
}
b.WriteString("</svg>\n")
return []byte(b.String()), nil
}
func gpuChartSeriesBounds(values []float64) (float64, float64) {
if len(values) == 0 {
return 0, 1
}
min, max := values[0], values[0]
for _, value := range values[1:] {
if value < min {
min = value
}
if value > max {
max = value
}
}
if min == max {
if max == 0 {
return 0, 1
}
pad := math.Abs(max) * 0.1
if pad == 0 {
pad = 1
}
min -= pad
max += pad
}
if min > 0 {
pad := (max - min) * 0.2
if pad == 0 {
pad = max * 0.1
}
min -= pad
if min < 0 {
min = 0
}
max += pad
}
return min, max
}
func gpuChartNiceTicks(min, max float64, target int) []float64 {
if min == max {
max = min + 1
}
span := max - min
step := math.Pow(10, math.Floor(math.Log10(span/float64(target))))
for _, factor := range []float64{1, 2, 5, 10} {
if span/(factor*step) <= float64(target)*1.5 {
step = factor * step
break
}
}
low := math.Floor(min/step) * step
high := math.Ceil(max/step) * step
var ticks []float64
for value := low; value <= high+step*0.001; value += step {
ticks = append(ticks, math.Round(value*1e9)/1e9)
}
return ticks
}
func gpuChartFormatTick(value float64) string {
if value == math.Trunc(value) {
return strconv.Itoa(int(value))
}
return strconv.FormatFloat(value, 'f', 1, 64)
}
func gpuChartLabelIndices(total, target int) []int {
if total <= 0 {
return nil
@@ -1258,64 +1019,16 @@ func gpuChartLabelIndices(total, target int) []int {
return indices
}
// renderChartSVG renders a line chart SVG with a fixed Y-axis range.
func renderChartSVG(title string, datasets [][]float64, names []string, labels []string, yMin, yMax *float64) ([]byte, error) {
n := len(labels)
if n == 0 {
n = 1
labels = []string{""}
func chartCanvasHeightForPath(path string, seriesCount int) int {
height := chartCanvasHeight(seriesCount)
if isGPUChartPath(path) {
return height * 2
}
for i := range datasets {
if len(datasets[i]) == 0 {
datasets[i] = make([]float64, n)
}
}
// Append global min/avg/max to title.
mn, avg, mx := globalStats(datasets)
if mx > 0 {
title = fmt.Sprintf("%s ↓%s ~%s ↑%s",
title,
chartLegendNumber(mn),
chartLegendNumber(avg),
chartLegendNumber(mx),
)
}
title = sanitizeChartText(title)
names = sanitizeChartTexts(names)
sparse := sanitizeChartTexts(sparseLabels(labels, 6))
return height
}
opt := gocharts.NewLineChartOptionWithData(datasets)
opt.Title = gocharts.TitleOption{Text: title}
opt.XAxis.Labels = sparse
opt.Legend = gocharts.LegendOption{SeriesNames: names}
if chartLegendVisible(len(names)) {
opt.Legend.Offset = gocharts.OffsetStr{Top: gocharts.PositionBottom}
opt.Legend.OverlayChart = gocharts.Ptr(false)
} else {
opt.Legend.Show = gocharts.Ptr(false)
}
opt.Symbol = gocharts.SymbolNone
// Right padding: reserve space for the MarkLine label (library recommendation).
opt.Padding = gocharts.NewBox(20, 20, 80, 20)
if yMin != nil || yMax != nil {
opt.YAxis = []gocharts.YAxisOption{chartYAxisOption(yMin, yMax)}
}
// Add a single peak mark line on the series that holds the global maximum.
peakIdx, _ := globalPeakSeries(datasets)
if peakIdx >= 0 && peakIdx < len(opt.SeriesList) {
opt.SeriesList[peakIdx].MarkLine = gocharts.NewMarkLine(gocharts.SeriesMarkTypeMax)
}
p := gocharts.NewPainter(gocharts.PainterOptions{
OutputFormat: gocharts.ChartOutputSVG,
Width: 1400,
Height: chartCanvasHeight(len(names)),
}, gocharts.PainterThemeOption(gocharts.GetTheme("grafana")))
if err := p.LineChart(opt); err != nil {
return nil, err
}
return p.Bytes()
func isGPUChartPath(path string) bool {
return strings.HasPrefix(path, "gpu-all-") || strings.HasPrefix(path, "gpu/")
}
func chartLegendVisible(seriesCount int) bool {
@@ -1329,30 +1042,6 @@ func chartCanvasHeight(seriesCount int) int {
return 288
}
func chartYAxisOption(yMin, yMax *float64) gocharts.YAxisOption {
return gocharts.YAxisOption{
Min: yMin,
Max: yMax,
LabelCount: 11,
ValueFormatter: chartYAxisNumber,
}
}
// globalPeakSeries returns the index of the series containing the global maximum
// value across all datasets, and that maximum value.
func globalPeakSeries(datasets [][]float64) (idx int, peak float64) {
idx = -1
for i, ds := range datasets {
for _, v := range ds {
if v > peak {
peak = v
idx = i
}
}
}
return idx, peak
}
// globalStats returns min, average, and max across all values in all datasets.
func globalStats(datasets [][]float64) (mn, avg, mx float64) {
var sum float64
@@ -1392,21 +1081,6 @@ func sanitizeChartText(s string) string {
}, s))
}
func sanitizeChartTexts(in []string) []string {
out := make([]string, len(in))
for i, s := range in {
out[i] = sanitizeChartText(s)
}
return out
}
func safeIdx(s []float64, i int) float64 {
if i < len(s) {
return s[i]
}
return 0
}
func snapshotNamedRings(rings []*namedMetricsRing) ([][]float64, []string, []string) {
var datasets [][]float64
var names []string
@@ -1493,20 +1167,6 @@ func chartYAxisNumber(v float64) string {
return out
}
func sparseLabels(labels []string, n int) []string {
out := make([]string, len(labels))
step := len(labels) / n
if step < 1 {
step = 1
}
for i, l := range labels {
if i%step == 0 {
out[i] = l
}
}
return out
}
func (h *handler) handleAPIMetricsExportCSV(w http.ResponseWriter, r *http.Request) {
if h.metricsDB == nil {
http.Error(w, "metrics database not available", http.StatusServiceUnavailable)

View File

@@ -304,6 +304,124 @@ func TestChartCanvasHeight(t *testing.T) {
}
}
func TestChartTimelineSegmentsForRangeMergesActiveSpansAndIdleGaps(t *testing.T) {
start := time.Date(2026, 4, 5, 12, 0, 0, 0, time.UTC)
end := start.Add(10 * time.Minute)
taskWindow := func(offsetStart, offsetEnd time.Duration) Task {
s := start.Add(offsetStart)
e := start.Add(offsetEnd)
return Task{
Name: "task",
Status: TaskDone,
StartedAt: &s,
DoneAt: &e,
}
}
segments := chartTimelineSegmentsForRange(start, end, end, []Task{
taskWindow(1*time.Minute, 3*time.Minute),
taskWindow(2*time.Minute, 5*time.Minute),
taskWindow(7*time.Minute, 8*time.Minute),
})
if len(segments) != 5 {
t.Fatalf("segments=%d want 5: %#v", len(segments), segments)
}
wantActive := []bool{false, true, false, true, false}
wantMinutes := [][2]int{{0, 1}, {1, 5}, {5, 7}, {7, 8}, {8, 10}}
for i, segment := range segments {
if segment.Active != wantActive[i] {
t.Fatalf("segment[%d].Active=%v want %v", i, segment.Active, wantActive[i])
}
if got := int(segment.Start.Sub(start).Minutes()); got != wantMinutes[i][0] {
t.Fatalf("segment[%d] start=%d want %d", i, got, wantMinutes[i][0])
}
if got := int(segment.End.Sub(start).Minutes()); got != wantMinutes[i][1] {
t.Fatalf("segment[%d] end=%d want %d", i, got, wantMinutes[i][1])
}
}
}
func TestRenderMetricChartSVGIncludesTimelineOverlay(t *testing.T) {
start := time.Date(2026, 4, 5, 12, 0, 0, 0, time.UTC)
labels := []string{"12:00", "12:01", "12:02"}
times := []time.Time{start, start.Add(time.Minute), start.Add(2 * time.Minute)}
svg, err := renderMetricChartSVG(
"System Power",
labels,
times,
[][]float64{{300, 320, 310}},
[]string{"Power W"},
floatPtr(0),
floatPtr(400),
360,
[]chartTimelineSegment{
{Start: start, End: start.Add(time.Minute), Active: false},
{Start: start.Add(time.Minute), End: start.Add(2 * time.Minute), Active: true},
},
)
if err != nil {
t.Fatal(err)
}
body := string(svg)
if !strings.Contains(body, `data-role="timeline-overlay"`) {
t.Fatalf("svg missing timeline overlay: %s", body)
}
if !strings.Contains(body, `opacity="0.10"`) {
t.Fatalf("svg missing idle overlay opacity: %s", body)
}
if !strings.Contains(body, `System Power`) {
t.Fatalf("svg missing chart title: %s", body)
}
}
func TestHandleMetricsChartSVGRendersCustomSVG(t *testing.T) {
dir := t.TempDir()
db, err := openMetricsDB(filepath.Join(dir, "metrics.db"))
if err != nil {
t.Fatal(err)
}
t.Cleanup(func() { _ = db.db.Close() })
start := time.Date(2026, 4, 5, 12, 0, 0, 0, time.UTC)
for i, sample := range []platform.LiveMetricSample{
{Timestamp: start, PowerW: 300},
{Timestamp: start.Add(time.Minute), PowerW: 320},
{Timestamp: start.Add(2 * time.Minute), PowerW: 310},
} {
if err := db.Write(sample); err != nil {
t.Fatalf("write sample %d: %v", i, err)
}
}
globalQueue.mu.Lock()
prevTasks := globalQueue.tasks
s := start.Add(30 * time.Second)
e := start.Add(90 * time.Second)
globalQueue.tasks = []*Task{{Name: "Burn", Status: TaskDone, StartedAt: &s, DoneAt: &e}}
globalQueue.mu.Unlock()
t.Cleanup(func() {
globalQueue.mu.Lock()
globalQueue.tasks = prevTasks
globalQueue.mu.Unlock()
})
h := &handler{opts: HandlerOptions{ExportDir: dir}, metricsDB: db}
rec := httptest.NewRecorder()
req := httptest.NewRequest(http.MethodGet, "/api/metrics/chart/server-power.svg", nil)
h.handleMetricsChartSVG(rec, req)
if rec.Code != http.StatusOK {
t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
}
body := rec.Body.String()
if !strings.Contains(body, `data-role="timeline-overlay"`) {
t.Fatalf("custom svg response missing timeline overlay: %s", body)
}
if !strings.Contains(body, `stroke-linecap="round"`) {
t.Fatalf("custom svg response missing custom polyline styling: %s", body)
}
}
func TestNormalizeFanSeriesHoldsLastPositive(t *testing.T) {
got := normalizeFanSeries([]float64{4200, 0, 0, 4300, 0})
want := []float64{4200, 4200, 4200, 4300, 4300}
@@ -317,21 +435,6 @@ func TestNormalizeFanSeriesHoldsLastPositive(t *testing.T) {
}
}
func TestChartYAxisOption(t *testing.T) {
min := floatPtr(0)
max := floatPtr(100)
opt := chartYAxisOption(min, max)
if opt.Min != min || opt.Max != max {
t.Fatalf("chartYAxisOption min/max mismatch: %#v", opt)
}
if opt.LabelCount != 11 {
t.Fatalf("chartYAxisOption labelCount=%d want 11", opt.LabelCount)
}
if got := opt.ValueFormatter(1000); got != "1к" {
t.Fatalf("chartYAxisOption formatter(1000)=%q want 1к", got)
}
}
func TestSnapshotFanRingsUsesTimelineLabels(t *testing.T) {
r1 := newMetricsRing(4)
r2 := newMetricsRing(4)
@@ -514,6 +617,47 @@ func TestToolsPageRendersRestartGPUDriversButton(t *testing.T) {
}
}
func TestBenchmarkPageRendersGPUSelectionControls(t *testing.T) {
handler := NewHandler(HandlerOptions{})
rec := httptest.NewRecorder()
handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/benchmark", nil))
if rec.Code != http.StatusOK {
t.Fatalf("status=%d", rec.Code)
}
body := rec.Body.String()
for _, needle := range []string{
`href="/benchmark"`,
`id="benchmark-gpu-list"`,
`/api/gpu/nvidia`,
`/api/benchmark/nvidia/run`,
`benchmark-run-nccl`,
} {
if !strings.Contains(body, needle) {
t.Fatalf("benchmark page missing %q: %s", needle, body)
}
}
}
func TestBurnPageRendersOfficialNVIDIADCGMAndNCCLInterconnectLabel(t *testing.T) {
handler := NewHandler(HandlerOptions{})
rec := httptest.NewRecorder()
handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/burn", nil))
if rec.Code != http.StatusOK {
t.Fatalf("status=%d", rec.Code)
}
body := rec.Body.String()
for _, needle := range []string{
`DCGM Diagnostics (Official NVIDIA)`,
`NCCL all_reduce_perf (Interconnect)`,
`DCGM is the official NVIDIA diagnostic path`,
`burn-gpu-dcgm`,
} {
if !strings.Contains(body, needle) {
t.Fatalf("burn page missing %q: %s", needle, body)
}
}
}
func TestTasksPageRendersScrollableLogModal(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "audit.json")

View File

@@ -30,22 +30,23 @@ const (
// taskNames maps target → human-readable name for validate (SAT) runs.
var taskNames = map[string]string{
"nvidia": "NVIDIA SAT",
"nvidia-stress": "NVIDIA GPU Stress",
"memory": "Memory SAT",
"storage": "Storage SAT",
"cpu": "CPU SAT",
"amd": "AMD GPU SAT",
"amd-mem": "AMD GPU MEM Integrity",
"amd-bandwidth": "AMD GPU MEM Bandwidth",
"amd-stress": "AMD GPU Burn-in",
"memory-stress": "Memory Burn-in",
"sat-stress": "SAT Stress (stressapptest)",
"platform-stress": "Platform Thermal Cycling",
"audit": "Audit",
"support-bundle": "Support Bundle",
"install": "Install to Disk",
"install-to-ram": "Install to RAM",
"nvidia": "NVIDIA SAT",
"nvidia-benchmark": "NVIDIA Benchmark",
"nvidia-stress": "NVIDIA GPU Stress",
"memory": "Memory SAT",
"storage": "Storage SAT",
"cpu": "CPU SAT",
"amd": "AMD GPU SAT",
"amd-mem": "AMD GPU MEM Integrity",
"amd-bandwidth": "AMD GPU MEM Bandwidth",
"amd-stress": "AMD GPU Burn-in",
"memory-stress": "Memory Burn-in",
"sat-stress": "SAT Stress (stressapptest)",
"platform-stress": "Platform Thermal Cycling",
"audit": "Audit",
"support-bundle": "Support Bundle",
"install": "Install to Disk",
"install-to-ram": "Install to RAM",
}
// burnNames maps target → human-readable name when a burn profile is set.
@@ -108,8 +109,11 @@ type taskParams struct {
DiagLevel int `json:"diag_level,omitempty"`
GPUIndices []int `json:"gpu_indices,omitempty"`
ExcludeGPUIndices []int `json:"exclude_gpu_indices,omitempty"`
SizeMB int `json:"size_mb,omitempty"`
Loader string `json:"loader,omitempty"`
BurnProfile string `json:"burn_profile,omitempty"`
BenchmarkProfile string `json:"benchmark_profile,omitempty"`
RunNCCL bool `json:"run_nccl,omitempty"`
DisplayName string `json:"display_name,omitempty"`
Device string `json:"device,omitempty"` // for install
PlatformComponents []string `json:"platform_components,omitempty"`
@@ -547,6 +551,18 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
} else {
archive, err = a.RunNvidiaAcceptancePack("", j.append)
}
case "nvidia-benchmark":
if a == nil {
err = fmt.Errorf("app not configured")
break
}
archive, err = a.RunNvidiaBenchmarkCtx(ctx, "", platform.NvidiaBenchmarkOptions{
Profile: t.params.BenchmarkProfile,
SizeMB: t.params.SizeMB,
GPUIndices: t.params.GPUIndices,
ExcludeGPUIndices: t.params.ExcludeGPUIndices,
RunNCCL: t.params.RunNCCL,
}, j.append)
case "nvidia-stress":
if a == nil {
err = fmt.Errorf("app not configured")