Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 25af2df23a | |||
| 20abff7f90 | |||
| a14ec8631c |
@@ -274,9 +274,6 @@ func normalizeNvidiaBenchmarkOptionsForBenchmark(opts NvidiaBenchmarkOptions) Nv
|
|||||||
}
|
}
|
||||||
opts.GPUIndices = dedupeSortedIndices(opts.GPUIndices)
|
opts.GPUIndices = dedupeSortedIndices(opts.GPUIndices)
|
||||||
opts.ExcludeGPUIndices = dedupeSortedIndices(opts.ExcludeGPUIndices)
|
opts.ExcludeGPUIndices = dedupeSortedIndices(opts.ExcludeGPUIndices)
|
||||||
if !opts.RunNCCL {
|
|
||||||
opts.RunNCCL = true
|
|
||||||
}
|
|
||||||
return opts
|
return opts
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -41,6 +41,21 @@ func TestResolveBenchmarkProfile(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestNormalizeNvidiaBenchmarkOptionsPreservesRunNCCLChoice(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
opts := normalizeNvidiaBenchmarkOptionsForBenchmark(NvidiaBenchmarkOptions{
|
||||||
|
Profile: "stability",
|
||||||
|
RunNCCL: false,
|
||||||
|
})
|
||||||
|
if opts.Profile != NvidiaBenchmarkProfileStability {
|
||||||
|
t.Fatalf("profile=%q want %q", opts.Profile, NvidiaBenchmarkProfileStability)
|
||||||
|
}
|
||||||
|
if opts.RunNCCL {
|
||||||
|
t.Fatalf("RunNCCL should stay false when explicitly disabled")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestParseBenchmarkBurnLog(t *testing.T) {
|
func TestParseBenchmarkBurnLog(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
|
|
||||||
|
|||||||
@@ -120,10 +120,45 @@ func (s *System) RunInstallToRAM(ctx context.Context, logFunc func(string)) erro
|
|||||||
log(fmt.Sprintf("Warning: rebind /run/live/medium failed: %v", err))
|
log(fmt.Sprintf("Warning: rebind /run/live/medium failed: %v", err))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
log("Verifying live medium now served from RAM...")
|
||||||
|
status := s.LiveBootSource()
|
||||||
|
if err := verifyInstallToRAMStatus(status); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
log(fmt.Sprintf("Verification passed: live medium now served from %s.", describeLiveBootSource(status)))
|
||||||
log("Done. Installation media can be safely disconnected.")
|
log("Done. Installation media can be safely disconnected.")
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func verifyInstallToRAMStatus(status LiveBootSource) error {
|
||||||
|
if status.InRAM {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return fmt.Errorf("install to RAM verification failed: live medium still mounted from %s", describeLiveBootSource(status))
|
||||||
|
}
|
||||||
|
|
||||||
|
func describeLiveBootSource(status LiveBootSource) string {
|
||||||
|
source := strings.TrimSpace(status.Device)
|
||||||
|
if source == "" {
|
||||||
|
source = strings.TrimSpace(status.Source)
|
||||||
|
}
|
||||||
|
if source == "" {
|
||||||
|
source = "unknown source"
|
||||||
|
}
|
||||||
|
switch strings.TrimSpace(status.Kind) {
|
||||||
|
case "ram":
|
||||||
|
return "RAM"
|
||||||
|
case "usb":
|
||||||
|
return "USB (" + source + ")"
|
||||||
|
case "cdrom":
|
||||||
|
return "CD-ROM (" + source + ")"
|
||||||
|
case "disk":
|
||||||
|
return "disk (" + source + ")"
|
||||||
|
default:
|
||||||
|
return source
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func copyFileLarge(ctx context.Context, src, dst string, logFunc func(string)) error {
|
func copyFileLarge(ctx context.Context, src, dst string, logFunc func(string)) error {
|
||||||
in, err := os.Open(src)
|
in, err := os.Open(src)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@@ -3,6 +3,8 @@ package platform
|
|||||||
import "testing"
|
import "testing"
|
||||||
|
|
||||||
func TestInferLiveBootKind(t *testing.T) {
|
func TestInferLiveBootKind(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
name string
|
name string
|
||||||
fsType string
|
fsType string
|
||||||
@@ -18,6 +20,7 @@ func TestInferLiveBootKind(t *testing.T) {
|
|||||||
{name: "unknown", source: "overlay", want: "unknown"},
|
{name: "unknown", source: "overlay", want: "unknown"},
|
||||||
}
|
}
|
||||||
for _, tc := range tests {
|
for _, tc := range tests {
|
||||||
|
tc := tc
|
||||||
t.Run(tc.name, func(t *testing.T) {
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
got := inferLiveBootKind(tc.fsType, tc.source, tc.deviceType, tc.transport)
|
got := inferLiveBootKind(tc.fsType, tc.source, tc.deviceType, tc.transport)
|
||||||
if got != tc.want {
|
if got != tc.want {
|
||||||
@@ -26,3 +29,29 @@ func TestInferLiveBootKind(t *testing.T) {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestVerifyInstallToRAMStatus(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
if err := verifyInstallToRAMStatus(LiveBootSource{InRAM: true, Kind: "ram", Source: "tmpfs"}); err != nil {
|
||||||
|
t.Fatalf("expected success for RAM-backed status, got %v", err)
|
||||||
|
}
|
||||||
|
err := verifyInstallToRAMStatus(LiveBootSource{InRAM: false, Kind: "usb", Device: "/dev/sdb1"})
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("expected verification failure when media is still on USB")
|
||||||
|
}
|
||||||
|
if got := err.Error(); got != "install to RAM verification failed: live medium still mounted from USB (/dev/sdb1)" {
|
||||||
|
t.Fatalf("error=%q", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDescribeLiveBootSource(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
if got := describeLiveBootSource(LiveBootSource{InRAM: true, Kind: "ram"}); got != "RAM" {
|
||||||
|
t.Fatalf("got %q want RAM", got)
|
||||||
|
}
|
||||||
|
if got := describeLiveBootSource(LiveBootSource{Kind: "unknown", Source: "/run/live/medium"}); got != "/run/live/medium" {
|
||||||
|
t.Fatalf("got %q want /run/live/medium", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -12,11 +12,11 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"syscall"
|
|
||||||
"sort"
|
"sort"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
|
"syscall"
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -76,15 +76,15 @@ func streamExecOutput(cmd *exec.Cmd, logFunc func(string)) ([]byte, error) {
|
|||||||
|
|
||||||
// NvidiaGPU holds basic GPU info from nvidia-smi.
|
// NvidiaGPU holds basic GPU info from nvidia-smi.
|
||||||
type NvidiaGPU struct {
|
type NvidiaGPU struct {
|
||||||
Index int
|
Index int `json:"index"`
|
||||||
Name string
|
Name string `json:"name"`
|
||||||
MemoryMB int
|
MemoryMB int `json:"memory_mb"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// AMDGPUInfo holds basic info about an AMD GPU from rocm-smi.
|
// AMDGPUInfo holds basic info about an AMD GPU from rocm-smi.
|
||||||
type AMDGPUInfo struct {
|
type AMDGPUInfo struct {
|
||||||
Index int
|
Index int `json:"index"`
|
||||||
Name string
|
Name string `json:"name"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// DetectGPUVendor returns "nvidia" if /dev/nvidia0 exists, "amd" if /dev/kfd exists, or "" otherwise.
|
// DetectGPUVendor returns "nvidia" if /dev/nvidia0 exists, "amd" if /dev/kfd exists, or "" otherwise.
|
||||||
|
|||||||
@@ -10,17 +10,30 @@ import (
|
|||||||
func (s *System) ListBeeServices() ([]string, error) {
|
func (s *System) ListBeeServices() ([]string, error) {
|
||||||
seen := map[string]bool{}
|
seen := map[string]bool{}
|
||||||
var out []string
|
var out []string
|
||||||
for _, pattern := range []string{"/etc/systemd/system/bee-*.service", "/lib/systemd/system/bee-*.service"} {
|
for _, pattern := range []string{
|
||||||
|
"/etc/systemd/system/bee-*.service",
|
||||||
|
"/lib/systemd/system/bee-*.service",
|
||||||
|
"/etc/systemd/system/bee-*.timer",
|
||||||
|
"/lib/systemd/system/bee-*.timer",
|
||||||
|
} {
|
||||||
matches, err := filepath.Glob(pattern)
|
matches, err := filepath.Glob(pattern)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
for _, match := range matches {
|
for _, match := range matches {
|
||||||
name := strings.TrimSuffix(filepath.Base(match), ".service")
|
base := filepath.Base(match)
|
||||||
|
name := base
|
||||||
|
if strings.HasSuffix(base, ".service") {
|
||||||
|
name = strings.TrimSuffix(base, ".service")
|
||||||
|
}
|
||||||
// Skip template units (e.g. bee-journal-mirror@) — they have no instances to query.
|
// Skip template units (e.g. bee-journal-mirror@) — they have no instances to query.
|
||||||
if strings.HasSuffix(name, "@") {
|
if strings.HasSuffix(name, "@") {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
// bee-selfheal is timer-managed; showing the oneshot service as inactive is misleading.
|
||||||
|
if name == "bee-selfheal" && strings.HasSuffix(base, ".service") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
if !seen[name] {
|
if !seen[name] {
|
||||||
seen[name] = true
|
seen[name] = true
|
||||||
out = append(out, name)
|
out = append(out, name)
|
||||||
|
|||||||
@@ -44,12 +44,12 @@ type StaticIPv4Config struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type RemovableTarget struct {
|
type RemovableTarget struct {
|
||||||
Device string
|
Device string `json:"device"`
|
||||||
FSType string
|
FSType string `json:"fs_type"`
|
||||||
Size string
|
Size string `json:"size"`
|
||||||
Label string
|
Label string `json:"label"`
|
||||||
Model string
|
Model string `json:"model"`
|
||||||
Mountpoint string
|
Mountpoint string `json:"mountpoint"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type ToolStatus struct {
|
type ToolStatus struct {
|
||||||
|
|||||||
31
audit/internal/platform/types_test.go
Normal file
31
audit/internal/platform/types_test.go
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
package platform
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestRemovableTargetJSONUsesFrontendFieldNames(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
data, err := json.Marshal(RemovableTarget{
|
||||||
|
Device: "/dev/sdb1",
|
||||||
|
FSType: "exfat",
|
||||||
|
Size: "1.8T",
|
||||||
|
Label: "USB",
|
||||||
|
Model: "Flash",
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("marshal: %v", err)
|
||||||
|
}
|
||||||
|
raw := string(data)
|
||||||
|
for _, key := range []string{`"device"`, `"fs_type"`, `"size"`, `"label"`, `"model"`} {
|
||||||
|
if !strings.Contains(raw, key) {
|
||||||
|
t.Fatalf("json missing key %s: %s", key, raw)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if strings.Contains(raw, `"Device"`) || strings.Contains(raw, `"FSType"`) {
|
||||||
|
t.Fatalf("json still contains Go field names: %s", raw)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -232,6 +232,54 @@ func (h *handler) handleAPISATRun(target string) http.HandlerFunc {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (h *handler) handleAPIBenchmarkNvidiaRun(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if h.opts.App == nil {
|
||||||
|
writeError(w, http.StatusServiceUnavailable, "app not configured")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
var body struct {
|
||||||
|
Profile string `json:"profile"`
|
||||||
|
SizeMB int `json:"size_mb"`
|
||||||
|
GPUIndices []int `json:"gpu_indices"`
|
||||||
|
ExcludeGPUIndices []int `json:"exclude_gpu_indices"`
|
||||||
|
RunNCCL *bool `json:"run_nccl"`
|
||||||
|
DisplayName string `json:"display_name"`
|
||||||
|
}
|
||||||
|
if r.Body != nil {
|
||||||
|
if err := json.NewDecoder(r.Body).Decode(&body); err != nil && !errors.Is(err, io.EOF) {
|
||||||
|
writeError(w, http.StatusBadRequest, "invalid request body")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
runNCCL := true
|
||||||
|
if body.RunNCCL != nil {
|
||||||
|
runNCCL = *body.RunNCCL
|
||||||
|
}
|
||||||
|
t := &Task{
|
||||||
|
ID: newJobID("benchmark-nvidia"),
|
||||||
|
Name: taskDisplayName("nvidia-benchmark", "", ""),
|
||||||
|
Target: "nvidia-benchmark",
|
||||||
|
Priority: 15,
|
||||||
|
Status: TaskPending,
|
||||||
|
CreatedAt: time.Now(),
|
||||||
|
params: taskParams{
|
||||||
|
GPUIndices: body.GPUIndices,
|
||||||
|
ExcludeGPUIndices: body.ExcludeGPUIndices,
|
||||||
|
SizeMB: body.SizeMB,
|
||||||
|
BenchmarkProfile: body.Profile,
|
||||||
|
RunNCCL: runNCCL,
|
||||||
|
DisplayName: body.DisplayName,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(body.DisplayName) != "" {
|
||||||
|
t.Name = body.DisplayName
|
||||||
|
}
|
||||||
|
globalQueue.enqueue(t)
|
||||||
|
writeJSON(w, map[string]string{"task_id": t.ID, "job_id": t.ID})
|
||||||
|
}
|
||||||
|
|
||||||
func (h *handler) handleAPISATStream(w http.ResponseWriter, r *http.Request) {
|
func (h *handler) handleAPISATStream(w http.ResponseWriter, r *http.Request) {
|
||||||
id := r.URL.Query().Get("job_id")
|
id := r.URL.Query().Get("job_id")
|
||||||
if id == "" {
|
if id == "" {
|
||||||
@@ -491,6 +539,22 @@ func (h *handler) handleAPIExportUSBBundle(w http.ResponseWriter, r *http.Reques
|
|||||||
|
|
||||||
// ── GPU presence ──────────────────────────────────────────────────────────────
|
// ── GPU presence ──────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
func (h *handler) handleAPIGNVIDIAGPUs(w http.ResponseWriter, _ *http.Request) {
|
||||||
|
if h.opts.App == nil {
|
||||||
|
writeError(w, http.StatusServiceUnavailable, "app not configured")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
gpus, err := h.opts.App.ListNvidiaGPUs()
|
||||||
|
if err != nil {
|
||||||
|
writeError(w, http.StatusInternalServerError, err.Error())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if gpus == nil {
|
||||||
|
gpus = []platform.NvidiaGPU{}
|
||||||
|
}
|
||||||
|
writeJSON(w, gpus)
|
||||||
|
}
|
||||||
|
|
||||||
func (h *handler) handleAPIGPUPresence(w http.ResponseWriter, r *http.Request) {
|
func (h *handler) handleAPIGPUPresence(w http.ResponseWriter, r *http.Request) {
|
||||||
if h.opts.App == nil {
|
if h.opts.App == nil {
|
||||||
writeError(w, http.StatusServiceUnavailable, "app not configured")
|
writeError(w, http.StatusServiceUnavailable, "app not configured")
|
||||||
@@ -516,8 +580,10 @@ func (h *handler) handleAPIGPUTools(w http.ResponseWriter, _ *http.Request) {
|
|||||||
_, amdErr := os.Stat("/dev/kfd")
|
_, amdErr := os.Stat("/dev/kfd")
|
||||||
nvidiaUp := nvidiaErr == nil
|
nvidiaUp := nvidiaErr == nil
|
||||||
amdUp := amdErr == nil
|
amdUp := amdErr == nil
|
||||||
|
_, dcgmErr := exec.LookPath("dcgmi")
|
||||||
writeJSON(w, []toolEntry{
|
writeJSON(w, []toolEntry{
|
||||||
{ID: "bee-gpu-burn", Available: nvidiaUp, Vendor: "nvidia"},
|
{ID: "bee-gpu-burn", Available: nvidiaUp, Vendor: "nvidia"},
|
||||||
|
{ID: "dcgm", Available: nvidiaUp && dcgmErr == nil, Vendor: "nvidia"},
|
||||||
{ID: "john", Available: nvidiaUp, Vendor: "nvidia"},
|
{ID: "john", Available: nvidiaUp, Vendor: "nvidia"},
|
||||||
{ID: "nccl", Available: nvidiaUp, Vendor: "nvidia"},
|
{ID: "nccl", Available: nvidiaUp, Vendor: "nvidia"},
|
||||||
{ID: "rvs", Available: amdUp, Vendor: "amd"},
|
{ID: "rvs", Available: amdUp, Vendor: "amd"},
|
||||||
|
|||||||
@@ -64,6 +64,42 @@ func TestHandleAPISATRunDecodesBodyWithoutContentLength(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestHandleAPIBenchmarkNvidiaRunQueuesSelectedGPUs(t *testing.T) {
|
||||||
|
globalQueue.mu.Lock()
|
||||||
|
originalTasks := globalQueue.tasks
|
||||||
|
globalQueue.tasks = nil
|
||||||
|
globalQueue.mu.Unlock()
|
||||||
|
t.Cleanup(func() {
|
||||||
|
globalQueue.mu.Lock()
|
||||||
|
globalQueue.tasks = originalTasks
|
||||||
|
globalQueue.mu.Unlock()
|
||||||
|
})
|
||||||
|
|
||||||
|
h := &handler{opts: HandlerOptions{App: &app.App{}}}
|
||||||
|
req := httptest.NewRequest("POST", "/api/benchmark/nvidia/run", strings.NewReader(`{"profile":"standard","gpu_indices":[1,3],"run_nccl":false}`))
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
|
||||||
|
h.handleAPIBenchmarkNvidiaRun(rec, req)
|
||||||
|
|
||||||
|
if rec.Code != 200 {
|
||||||
|
t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
|
||||||
|
}
|
||||||
|
globalQueue.mu.Lock()
|
||||||
|
defer globalQueue.mu.Unlock()
|
||||||
|
if len(globalQueue.tasks) != 1 {
|
||||||
|
t.Fatalf("tasks=%d want 1", len(globalQueue.tasks))
|
||||||
|
}
|
||||||
|
task := globalQueue.tasks[0]
|
||||||
|
if task.Target != "nvidia-benchmark" {
|
||||||
|
t.Fatalf("target=%q want nvidia-benchmark", task.Target)
|
||||||
|
}
|
||||||
|
if got := task.params.GPUIndices; len(got) != 2 || got[0] != 1 || got[1] != 3 {
|
||||||
|
t.Fatalf("gpu indices=%v want [1 3]", got)
|
||||||
|
}
|
||||||
|
if task.params.RunNCCL {
|
||||||
|
t.Fatal("RunNCCL should reflect explicit false from request")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestPushFanRingsTracksByNameAndCarriesForwardMissingSamples(t *testing.T) {
|
func TestPushFanRingsTracksByNameAndCarriesForwardMissingSamples(t *testing.T) {
|
||||||
h := &handler{}
|
h := &handler{}
|
||||||
|
|||||||
713
audit/internal/webui/charts_svg.go
Normal file
713
audit/internal/webui/charts_svg.go
Normal file
@@ -0,0 +1,713 @@
|
|||||||
|
package webui
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"math"
|
||||||
|
"sort"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"bee/audit/internal/platform"
|
||||||
|
)
|
||||||
|
|
||||||
|
type chartTimelineSegment struct {
|
||||||
|
Start time.Time
|
||||||
|
End time.Time
|
||||||
|
Active bool
|
||||||
|
}
|
||||||
|
|
||||||
|
type chartScale struct {
|
||||||
|
Min float64
|
||||||
|
Max float64
|
||||||
|
Ticks []float64
|
||||||
|
}
|
||||||
|
|
||||||
|
type chartLayout struct {
|
||||||
|
Width int
|
||||||
|
Height int
|
||||||
|
PlotLeft int
|
||||||
|
PlotRight int
|
||||||
|
PlotTop int
|
||||||
|
PlotBottom int
|
||||||
|
}
|
||||||
|
|
||||||
|
type metricChartSeries struct {
|
||||||
|
Name string
|
||||||
|
AxisTitle string
|
||||||
|
Color string
|
||||||
|
Values []float64
|
||||||
|
}
|
||||||
|
|
||||||
|
var metricChartPalette = []string{
|
||||||
|
"#5794f2",
|
||||||
|
"#73bf69",
|
||||||
|
"#f2cc0c",
|
||||||
|
"#ff9830",
|
||||||
|
"#f2495c",
|
||||||
|
"#b877d9",
|
||||||
|
"#56d2f7",
|
||||||
|
"#8ab8ff",
|
||||||
|
"#9adf8f",
|
||||||
|
"#ffbe5c",
|
||||||
|
}
|
||||||
|
|
||||||
|
func renderMetricChartSVG(title string, labels []string, times []time.Time, datasets [][]float64, names []string, yMin, yMax *float64, canvasHeight int, timeline []chartTimelineSegment) ([]byte, error) {
|
||||||
|
pointCount := len(labels)
|
||||||
|
if len(times) > pointCount {
|
||||||
|
pointCount = len(times)
|
||||||
|
}
|
||||||
|
if pointCount == 0 {
|
||||||
|
pointCount = 1
|
||||||
|
labels = []string{""}
|
||||||
|
times = []time.Time{time.Time{}}
|
||||||
|
}
|
||||||
|
if len(labels) < pointCount {
|
||||||
|
padded := make([]string, pointCount)
|
||||||
|
copy(padded, labels)
|
||||||
|
labels = padded
|
||||||
|
}
|
||||||
|
if len(times) < pointCount {
|
||||||
|
times = synthesizeChartTimes(times, pointCount)
|
||||||
|
}
|
||||||
|
for i := range datasets {
|
||||||
|
if len(datasets[i]) == 0 {
|
||||||
|
datasets[i] = make([]float64, pointCount)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
mn, avg, mx := globalStats(datasets)
|
||||||
|
if mx > 0 {
|
||||||
|
title = fmt.Sprintf("%s ↓%s ~%s ↑%s",
|
||||||
|
title,
|
||||||
|
chartLegendNumber(mn),
|
||||||
|
chartLegendNumber(avg),
|
||||||
|
chartLegendNumber(mx),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
legendItems := []metricChartSeries{}
|
||||||
|
for i, name := range names {
|
||||||
|
color := metricChartPalette[i%len(metricChartPalette)]
|
||||||
|
values := make([]float64, pointCount)
|
||||||
|
if i < len(datasets) {
|
||||||
|
copy(values, coalesceDataset(datasets[i], pointCount))
|
||||||
|
}
|
||||||
|
legendItems = append(legendItems, metricChartSeries{
|
||||||
|
Name: name,
|
||||||
|
Color: color,
|
||||||
|
Values: values,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
scale := singleAxisChartScale(datasets, yMin, yMax)
|
||||||
|
layout := singleAxisChartLayout(canvasHeight, len(legendItems))
|
||||||
|
start, end := chartTimeBounds(times)
|
||||||
|
|
||||||
|
var b strings.Builder
|
||||||
|
writeSVGOpen(&b, layout.Width, layout.Height)
|
||||||
|
writeChartFrame(&b, title, layout.Width, layout.Height)
|
||||||
|
writeTimelineIdleSpans(&b, layout, start, end, timeline)
|
||||||
|
writeVerticalGrid(&b, layout, times, pointCount, 8)
|
||||||
|
writeHorizontalGrid(&b, layout, scale)
|
||||||
|
writeTimelineBoundaries(&b, layout, start, end, timeline)
|
||||||
|
writePlotBorder(&b, layout)
|
||||||
|
writeSingleAxisY(&b, layout, scale)
|
||||||
|
writeXAxisLabels(&b, layout, times, labels, start, end, 8)
|
||||||
|
for _, item := range legendItems {
|
||||||
|
writeSeriesPolyline(&b, layout, times, start, end, item.Values, scale, item.Color)
|
||||||
|
}
|
||||||
|
writeLegend(&b, layout, legendItems)
|
||||||
|
writeSVGClose(&b)
|
||||||
|
return []byte(b.String()), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func renderGPUOverviewChartSVG(idx int, samples []platform.LiveMetricSample, timeline []chartTimelineSegment) ([]byte, bool, error) {
|
||||||
|
temp := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.TempC })
|
||||||
|
power := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.PowerW })
|
||||||
|
coreClock := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.ClockMHz })
|
||||||
|
memClock := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.MemClockMHz })
|
||||||
|
if temp == nil && power == nil && coreClock == nil && memClock == nil {
|
||||||
|
return nil, false, nil
|
||||||
|
}
|
||||||
|
labels := sampleTimeLabels(samples)
|
||||||
|
times := sampleTimes(samples)
|
||||||
|
svg, err := drawGPUOverviewChartSVG(
|
||||||
|
fmt.Sprintf("GPU %d Overview", idx),
|
||||||
|
labels,
|
||||||
|
times,
|
||||||
|
[]metricChartSeries{
|
||||||
|
{Name: "Temp C", Values: coalesceDataset(temp, len(labels)), Color: "#f05a5a", AxisTitle: "Temp C"},
|
||||||
|
{Name: "Power W", Values: coalesceDataset(power, len(labels)), Color: "#ffb357", AxisTitle: "Power W"},
|
||||||
|
{Name: "Core Clock MHz", Values: coalesceDataset(coreClock, len(labels)), Color: "#73bf69", AxisTitle: "Core MHz"},
|
||||||
|
{Name: "Memory Clock MHz", Values: coalesceDataset(memClock, len(labels)), Color: "#5794f2", AxisTitle: "Memory MHz"},
|
||||||
|
},
|
||||||
|
timeline,
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
return nil, false, err
|
||||||
|
}
|
||||||
|
return svg, true, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func drawGPUOverviewChartSVG(title string, labels []string, times []time.Time, series []metricChartSeries, timeline []chartTimelineSegment) ([]byte, error) {
|
||||||
|
if len(series) != 4 {
|
||||||
|
return nil, fmt.Errorf("gpu overview requires 4 series, got %d", len(series))
|
||||||
|
}
|
||||||
|
const (
|
||||||
|
width = 1400
|
||||||
|
height = 840
|
||||||
|
plotLeft = 180
|
||||||
|
plotRight = 1220
|
||||||
|
plotTop = 96
|
||||||
|
plotBottom = 660
|
||||||
|
)
|
||||||
|
const (
|
||||||
|
leftOuterAxis = 72
|
||||||
|
leftInnerAxis = 132
|
||||||
|
rightInnerAxis = 1268
|
||||||
|
rightOuterAxis = 1328
|
||||||
|
)
|
||||||
|
layout := chartLayout{
|
||||||
|
Width: width,
|
||||||
|
Height: height,
|
||||||
|
PlotLeft: plotLeft,
|
||||||
|
PlotRight: plotRight,
|
||||||
|
PlotTop: plotTop,
|
||||||
|
PlotBottom: plotBottom,
|
||||||
|
}
|
||||||
|
axisX := []int{leftOuterAxis, leftInnerAxis, rightInnerAxis, rightOuterAxis}
|
||||||
|
pointCount := len(labels)
|
||||||
|
if len(times) > pointCount {
|
||||||
|
pointCount = len(times)
|
||||||
|
}
|
||||||
|
if pointCount == 0 {
|
||||||
|
pointCount = 1
|
||||||
|
labels = []string{""}
|
||||||
|
times = []time.Time{time.Time{}}
|
||||||
|
}
|
||||||
|
if len(labels) < pointCount {
|
||||||
|
padded := make([]string, pointCount)
|
||||||
|
copy(padded, labels)
|
||||||
|
labels = padded
|
||||||
|
}
|
||||||
|
if len(times) < pointCount {
|
||||||
|
times = synthesizeChartTimes(times, pointCount)
|
||||||
|
}
|
||||||
|
for i := range series {
|
||||||
|
if len(series[i].Values) == 0 {
|
||||||
|
series[i].Values = make([]float64, pointCount)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
scales := make([]chartScale, len(series))
|
||||||
|
for i := range series {
|
||||||
|
min, max := chartSeriesBounds(series[i].Values)
|
||||||
|
ticks := chartNiceTicks(min, max, 8)
|
||||||
|
scales[i] = chartScale{
|
||||||
|
Min: ticks[0],
|
||||||
|
Max: ticks[len(ticks)-1],
|
||||||
|
Ticks: ticks,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
start, end := chartTimeBounds(times)
|
||||||
|
|
||||||
|
var b strings.Builder
|
||||||
|
writeSVGOpen(&b, width, height)
|
||||||
|
writeChartFrame(&b, title, width, height)
|
||||||
|
writeTimelineIdleSpans(&b, layout, start, end, timeline)
|
||||||
|
writeVerticalGrid(&b, layout, times, pointCount, 8)
|
||||||
|
writeHorizontalGrid(&b, layout, scales[0])
|
||||||
|
writeTimelineBoundaries(&b, layout, start, end, timeline)
|
||||||
|
writePlotBorder(&b, layout)
|
||||||
|
|
||||||
|
for i, axisLineX := range axisX {
|
||||||
|
fmt.Fprintf(&b, `<line x1="%d" y1="%d" x2="%d" y2="%d" stroke="%s" stroke-width="1"/>`+"\n",
|
||||||
|
axisLineX, layout.PlotTop, axisLineX, layout.PlotBottom, series[i].Color)
|
||||||
|
fmt.Fprintf(&b, `<text x="%d" y="%d" text-anchor="middle" font-family="sans-serif" font-size="11" font-weight="700" fill="%s">%s</text>`+"\n",
|
||||||
|
axisLineX, 64, series[i].Color, sanitizeChartText(series[i].AxisTitle))
|
||||||
|
for _, tick := range scales[i].Ticks {
|
||||||
|
y := chartYForValue(valueClamp(tick, scales[i]), scales[i], layout.PlotTop, layout.PlotBottom)
|
||||||
|
label := sanitizeChartText(chartYAxisNumber(tick))
|
||||||
|
if i < 2 {
|
||||||
|
fmt.Fprintf(&b, `<line x1="%d" y1="%.1f" x2="%d" y2="%.1f" stroke="%s" stroke-width="1"/>`+"\n",
|
||||||
|
axisLineX, y, axisLineX+6, y, series[i].Color)
|
||||||
|
fmt.Fprintf(&b, `<text x="%d" y="%.1f" text-anchor="end" dy="4" font-family="sans-serif" font-size="10" fill="%s">%s</text>`+"\n",
|
||||||
|
axisLineX-8, y, series[i].Color, label)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
fmt.Fprintf(&b, `<line x1="%d" y1="%.1f" x2="%d" y2="%.1f" stroke="%s" stroke-width="1"/>`+"\n",
|
||||||
|
axisLineX, y, axisLineX-6, y, series[i].Color)
|
||||||
|
fmt.Fprintf(&b, `<text x="%d" y="%.1f" text-anchor="start" dy="4" font-family="sans-serif" font-size="10" fill="%s">%s</text>`+"\n",
|
||||||
|
axisLineX+8, y, series[i].Color, label)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
writeXAxisLabels(&b, layout, times, labels, start, end, 8)
|
||||||
|
for i := range series {
|
||||||
|
writeSeriesPolyline(&b, layout, times, start, end, series[i].Values, scales[i], series[i].Color)
|
||||||
|
}
|
||||||
|
writeLegend(&b, layout, series)
|
||||||
|
writeSVGClose(&b)
|
||||||
|
return []byte(b.String()), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func metricsTimelineSegments(samples []platform.LiveMetricSample, now time.Time) []chartTimelineSegment {
|
||||||
|
if len(samples) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
times := sampleTimes(samples)
|
||||||
|
start, end := chartTimeBounds(times)
|
||||||
|
if start.IsZero() || end.IsZero() {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return chartTimelineSegmentsForRange(start, end, now, snapshotTaskHistory())
|
||||||
|
}
|
||||||
|
|
||||||
|
func snapshotTaskHistory() []Task {
|
||||||
|
globalQueue.mu.Lock()
|
||||||
|
defer globalQueue.mu.Unlock()
|
||||||
|
out := make([]Task, len(globalQueue.tasks))
|
||||||
|
for i, t := range globalQueue.tasks {
|
||||||
|
out[i] = *t
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
func chartTimelineSegmentsForRange(start, end, now time.Time, tasks []Task) []chartTimelineSegment {
|
||||||
|
if start.IsZero() || end.IsZero() {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if end.Before(start) {
|
||||||
|
start, end = end, start
|
||||||
|
}
|
||||||
|
type interval struct {
|
||||||
|
start time.Time
|
||||||
|
end time.Time
|
||||||
|
}
|
||||||
|
active := make([]interval, 0, len(tasks))
|
||||||
|
for _, task := range tasks {
|
||||||
|
if task.StartedAt == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
intervalStart := task.StartedAt.UTC()
|
||||||
|
intervalEnd := now.UTC()
|
||||||
|
if task.DoneAt != nil {
|
||||||
|
intervalEnd = task.DoneAt.UTC()
|
||||||
|
}
|
||||||
|
if !intervalEnd.After(intervalStart) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if intervalEnd.Before(start) || intervalStart.After(end) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if intervalStart.Before(start) {
|
||||||
|
intervalStart = start
|
||||||
|
}
|
||||||
|
if intervalEnd.After(end) {
|
||||||
|
intervalEnd = end
|
||||||
|
}
|
||||||
|
active = append(active, interval{start: intervalStart, end: intervalEnd})
|
||||||
|
}
|
||||||
|
sort.Slice(active, func(i, j int) bool {
|
||||||
|
if active[i].start.Equal(active[j].start) {
|
||||||
|
return active[i].end.Before(active[j].end)
|
||||||
|
}
|
||||||
|
return active[i].start.Before(active[j].start)
|
||||||
|
})
|
||||||
|
merged := make([]interval, 0, len(active))
|
||||||
|
for _, span := range active {
|
||||||
|
if len(merged) == 0 {
|
||||||
|
merged = append(merged, span)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
last := &merged[len(merged)-1]
|
||||||
|
if !span.start.After(last.end) {
|
||||||
|
if span.end.After(last.end) {
|
||||||
|
last.end = span.end
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
merged = append(merged, span)
|
||||||
|
}
|
||||||
|
|
||||||
|
segments := make([]chartTimelineSegment, 0, len(merged)*2+1)
|
||||||
|
cursor := start
|
||||||
|
for _, span := range merged {
|
||||||
|
if span.start.After(cursor) {
|
||||||
|
segments = append(segments, chartTimelineSegment{Start: cursor, End: span.start, Active: false})
|
||||||
|
}
|
||||||
|
segments = append(segments, chartTimelineSegment{Start: span.start, End: span.end, Active: true})
|
||||||
|
cursor = span.end
|
||||||
|
}
|
||||||
|
if cursor.Before(end) {
|
||||||
|
segments = append(segments, chartTimelineSegment{Start: cursor, End: end, Active: false})
|
||||||
|
}
|
||||||
|
if len(segments) == 0 {
|
||||||
|
segments = append(segments, chartTimelineSegment{Start: start, End: end, Active: false})
|
||||||
|
}
|
||||||
|
return segments
|
||||||
|
}
|
||||||
|
|
||||||
|
func sampleTimes(samples []platform.LiveMetricSample) []time.Time {
|
||||||
|
times := make([]time.Time, 0, len(samples))
|
||||||
|
for _, sample := range samples {
|
||||||
|
times = append(times, sample.Timestamp)
|
||||||
|
}
|
||||||
|
return times
|
||||||
|
}
|
||||||
|
|
||||||
|
func singleAxisChartScale(datasets [][]float64, yMin, yMax *float64) chartScale {
|
||||||
|
min, max := 0.0, 1.0
|
||||||
|
if yMin != nil && yMax != nil {
|
||||||
|
min, max = *yMin, *yMax
|
||||||
|
} else {
|
||||||
|
min, max = chartSeriesBounds(flattenDatasets(datasets))
|
||||||
|
if yMin != nil {
|
||||||
|
min = *yMin
|
||||||
|
}
|
||||||
|
if yMax != nil {
|
||||||
|
max = *yMax
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ticks := chartNiceTicks(min, max, 8)
|
||||||
|
return chartScale{Min: ticks[0], Max: ticks[len(ticks)-1], Ticks: ticks}
|
||||||
|
}
|
||||||
|
|
||||||
|
func flattenDatasets(datasets [][]float64) []float64 {
|
||||||
|
total := 0
|
||||||
|
for _, ds := range datasets {
|
||||||
|
total += len(ds)
|
||||||
|
}
|
||||||
|
out := make([]float64, 0, total)
|
||||||
|
for _, ds := range datasets {
|
||||||
|
out = append(out, ds...)
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
func singleAxisChartLayout(canvasHeight int, seriesCount int) chartLayout {
|
||||||
|
legendRows := 0
|
||||||
|
if chartLegendVisible(seriesCount) && seriesCount > 0 {
|
||||||
|
cols := 4
|
||||||
|
if seriesCount < cols {
|
||||||
|
cols = seriesCount
|
||||||
|
}
|
||||||
|
legendRows = (seriesCount + cols - 1) / cols
|
||||||
|
}
|
||||||
|
legendHeight := 0
|
||||||
|
if legendRows > 0 {
|
||||||
|
legendHeight = legendRows*24 + 24
|
||||||
|
}
|
||||||
|
return chartLayout{
|
||||||
|
Width: 1400,
|
||||||
|
Height: canvasHeight,
|
||||||
|
PlotLeft: 96,
|
||||||
|
PlotRight: 1352,
|
||||||
|
PlotTop: 72,
|
||||||
|
PlotBottom: canvasHeight - 60 - legendHeight,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func chartTimeBounds(times []time.Time) (time.Time, time.Time) {
|
||||||
|
if len(times) == 0 {
|
||||||
|
return time.Time{}, time.Time{}
|
||||||
|
}
|
||||||
|
start := times[0].UTC()
|
||||||
|
end := start
|
||||||
|
for _, ts := range times[1:] {
|
||||||
|
t := ts.UTC()
|
||||||
|
if t.Before(start) {
|
||||||
|
start = t
|
||||||
|
}
|
||||||
|
if t.After(end) {
|
||||||
|
end = t
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return start, end
|
||||||
|
}
|
||||||
|
|
||||||
|
func synthesizeChartTimes(times []time.Time, count int) []time.Time {
|
||||||
|
if count <= 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if len(times) == count {
|
||||||
|
return times
|
||||||
|
}
|
||||||
|
if len(times) == 1 {
|
||||||
|
out := make([]time.Time, count)
|
||||||
|
for i := range out {
|
||||||
|
out[i] = times[0].Add(time.Duration(i) * time.Minute)
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
base := time.Now().UTC().Add(-time.Duration(count-1) * time.Minute)
|
||||||
|
out := make([]time.Time, count)
|
||||||
|
for i := range out {
|
||||||
|
out[i] = base.Add(time.Duration(i) * time.Minute)
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
func writeSVGOpen(b *strings.Builder, width, height int) {
|
||||||
|
fmt.Fprintf(b, `<svg xmlns="http://www.w3.org/2000/svg" width="%d" height="%d" viewBox="0 0 %d %d">`+"\n", width, height, width, height)
|
||||||
|
}
|
||||||
|
|
||||||
|
func writeSVGClose(b *strings.Builder) {
|
||||||
|
b.WriteString("</svg>\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
func writeChartFrame(b *strings.Builder, title string, width, height int) {
|
||||||
|
fmt.Fprintf(b, `<rect width="%d" height="%d" rx="10" ry="10" fill="#ffffff" stroke="#d7e0ea"/>`+"\n", width, height)
|
||||||
|
fmt.Fprintf(b, `<text x="%d" y="30" text-anchor="middle" font-family="sans-serif" font-size="16" font-weight="700" fill="#1f2937">%s</text>`+"\n",
|
||||||
|
width/2, sanitizeChartText(title))
|
||||||
|
}
|
||||||
|
|
||||||
|
func writePlotBorder(b *strings.Builder, layout chartLayout) {
|
||||||
|
fmt.Fprintf(b, `<rect x="%d" y="%d" width="%d" height="%d" fill="none" stroke="#cbd5e1" stroke-width="1"/>`+"\n",
|
||||||
|
layout.PlotLeft, layout.PlotTop, layout.PlotRight-layout.PlotLeft, layout.PlotBottom-layout.PlotTop)
|
||||||
|
}
|
||||||
|
|
||||||
|
func writeHorizontalGrid(b *strings.Builder, layout chartLayout, scale chartScale) {
|
||||||
|
b.WriteString(`<g stroke="#e2e8f0" stroke-width="1">` + "\n")
|
||||||
|
for _, tick := range scale.Ticks {
|
||||||
|
y := chartYForValue(tick, scale, layout.PlotTop, layout.PlotBottom)
|
||||||
|
fmt.Fprintf(b, `<line x1="%d" y1="%.1f" x2="%d" y2="%.1f"/>`+"\n",
|
||||||
|
layout.PlotLeft, y, layout.PlotRight, y)
|
||||||
|
}
|
||||||
|
b.WriteString(`</g>` + "\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
func writeVerticalGrid(b *strings.Builder, layout chartLayout, times []time.Time, pointCount, target int) {
|
||||||
|
if pointCount <= 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
start, end := chartTimeBounds(times)
|
||||||
|
b.WriteString(`<g stroke="#edf2f7" stroke-width="1">` + "\n")
|
||||||
|
for _, idx := range gpuChartLabelIndices(pointCount, target) {
|
||||||
|
ts := chartPointTime(times, idx)
|
||||||
|
x := chartXForTime(ts, start, end, layout.PlotLeft, layout.PlotRight)
|
||||||
|
fmt.Fprintf(b, `<line x1="%.1f" y1="%d" x2="%.1f" y2="%d"/>`+"\n",
|
||||||
|
x, layout.PlotTop, x, layout.PlotBottom)
|
||||||
|
}
|
||||||
|
b.WriteString(`</g>` + "\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
func writeSingleAxisY(b *strings.Builder, layout chartLayout, scale chartScale) {
|
||||||
|
fmt.Fprintf(b, `<line x1="%d" y1="%d" x2="%d" y2="%d" stroke="#64748b" stroke-width="1"/>`+"\n",
|
||||||
|
layout.PlotLeft, layout.PlotTop, layout.PlotLeft, layout.PlotBottom)
|
||||||
|
for _, tick := range scale.Ticks {
|
||||||
|
y := chartYForValue(tick, scale, layout.PlotTop, layout.PlotBottom)
|
||||||
|
fmt.Fprintf(b, `<line x1="%d" y1="%.1f" x2="%d" y2="%.1f" stroke="#64748b" stroke-width="1"/>`+"\n",
|
||||||
|
layout.PlotLeft, y, layout.PlotLeft-6, y)
|
||||||
|
fmt.Fprintf(b, `<text x="%d" y="%.1f" text-anchor="end" dy="4" font-family="sans-serif" font-size="10" fill="#475569">%s</text>`+"\n",
|
||||||
|
layout.PlotLeft-10, y, sanitizeChartText(chartYAxisNumber(tick)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func writeXAxisLabels(b *strings.Builder, layout chartLayout, times []time.Time, labels []string, start, end time.Time, target int) {
|
||||||
|
pointCount := len(labels)
|
||||||
|
if len(times) > pointCount {
|
||||||
|
pointCount = len(times)
|
||||||
|
}
|
||||||
|
b.WriteString(`<g font-family="sans-serif" font-size="11" fill="#64748b" text-anchor="middle">` + "\n")
|
||||||
|
for _, idx := range gpuChartLabelIndices(pointCount, target) {
|
||||||
|
x := chartXForTime(chartPointTime(times, idx), start, end, layout.PlotLeft, layout.PlotRight)
|
||||||
|
label := ""
|
||||||
|
if idx < len(labels) {
|
||||||
|
label = labels[idx]
|
||||||
|
}
|
||||||
|
fmt.Fprintf(b, `<text x="%.1f" y="%d">%s</text>`+"\n", x, layout.PlotBottom+28, sanitizeChartText(label))
|
||||||
|
}
|
||||||
|
b.WriteString(`</g>` + "\n")
|
||||||
|
fmt.Fprintf(b, `<text x="%d" y="%d" text-anchor="middle" font-family="sans-serif" font-size="12" fill="#64748b">Time</text>`+"\n",
|
||||||
|
(layout.PlotLeft+layout.PlotRight)/2, layout.PlotBottom+48)
|
||||||
|
}
|
||||||
|
|
||||||
|
func writeSeriesPolyline(b *strings.Builder, layout chartLayout, times []time.Time, start, end time.Time, values []float64, scale chartScale, color string) {
|
||||||
|
if len(values) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
var points strings.Builder
|
||||||
|
for idx, value := range values {
|
||||||
|
if idx > 0 {
|
||||||
|
points.WriteByte(' ')
|
||||||
|
}
|
||||||
|
x := chartXForTime(chartPointTime(times, idx), start, end, layout.PlotLeft, layout.PlotRight)
|
||||||
|
y := chartYForValue(value, scale, layout.PlotTop, layout.PlotBottom)
|
||||||
|
points.WriteString(strconv.FormatFloat(x, 'f', 1, 64))
|
||||||
|
points.WriteByte(',')
|
||||||
|
points.WriteString(strconv.FormatFloat(y, 'f', 1, 64))
|
||||||
|
}
|
||||||
|
fmt.Fprintf(b, `<polyline points="%s" fill="none" stroke="%s" stroke-width="2.2" stroke-linejoin="round" stroke-linecap="round"/>`+"\n",
|
||||||
|
points.String(), color)
|
||||||
|
if len(values) == 1 {
|
||||||
|
x := chartXForTime(chartPointTime(times, 0), start, end, layout.PlotLeft, layout.PlotRight)
|
||||||
|
y := chartYForValue(values[0], scale, layout.PlotTop, layout.PlotBottom)
|
||||||
|
fmt.Fprintf(b, `<circle cx="%.1f" cy="%.1f" r="3.5" fill="%s"/>`+"\n", x, y, color)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func writeLegend(b *strings.Builder, layout chartLayout, series []metricChartSeries) {
|
||||||
|
if !chartLegendVisible(len(series)) || len(series) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
cols := 4
|
||||||
|
if len(series) < cols {
|
||||||
|
cols = len(series)
|
||||||
|
}
|
||||||
|
cellWidth := float64(layout.PlotRight-layout.PlotLeft) / float64(cols)
|
||||||
|
baseY := layout.PlotBottom + 74
|
||||||
|
for i, item := range series {
|
||||||
|
row := i / cols
|
||||||
|
col := i % cols
|
||||||
|
x := float64(layout.PlotLeft) + cellWidth*float64(col) + 8
|
||||||
|
y := float64(baseY + row*24)
|
||||||
|
fmt.Fprintf(b, `<line x1="%.1f" y1="%.1f" x2="%.1f" y2="%.1f" stroke="%s" stroke-width="3"/>`+"\n",
|
||||||
|
x, y, x+28, y, item.Color)
|
||||||
|
fmt.Fprintf(b, `<text x="%.1f" y="%.1f" font-family="sans-serif" font-size="12" fill="#1f2937">%s</text>`+"\n",
|
||||||
|
x+38, y+4, sanitizeChartText(item.Name))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func writeTimelineIdleSpans(b *strings.Builder, layout chartLayout, start, end time.Time, segments []chartTimelineSegment) {
|
||||||
|
if len(segments) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
b.WriteString(`<g data-role="timeline-overlay">` + "\n")
|
||||||
|
for _, segment := range segments {
|
||||||
|
if segment.Active || !segment.End.After(segment.Start) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
x0 := chartXForTime(segment.Start, start, end, layout.PlotLeft, layout.PlotRight)
|
||||||
|
x1 := chartXForTime(segment.End, start, end, layout.PlotLeft, layout.PlotRight)
|
||||||
|
fmt.Fprintf(b, `<rect x="%.1f" y="%d" width="%.1f" height="%d" fill="#475569" opacity="0.10"/>`+"\n",
|
||||||
|
x0, layout.PlotTop, math.Max(1, x1-x0), layout.PlotBottom-layout.PlotTop)
|
||||||
|
}
|
||||||
|
b.WriteString(`</g>` + "\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
func writeTimelineBoundaries(b *strings.Builder, layout chartLayout, start, end time.Time, segments []chartTimelineSegment) {
|
||||||
|
if len(segments) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
seen := map[int]bool{}
|
||||||
|
b.WriteString(`<g data-role="timeline-boundaries" stroke="#94a3b8" stroke-width="1.2">` + "\n")
|
||||||
|
for i, segment := range segments {
|
||||||
|
if i > 0 {
|
||||||
|
x := int(math.Round(chartXForTime(segment.Start, start, end, layout.PlotLeft, layout.PlotRight)))
|
||||||
|
if !seen[x] {
|
||||||
|
seen[x] = true
|
||||||
|
fmt.Fprintf(b, `<line x1="%d" y1="%d" x2="%d" y2="%d"/>`+"\n", x, layout.PlotTop, x, layout.PlotBottom)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if i < len(segments)-1 {
|
||||||
|
x := int(math.Round(chartXForTime(segment.End, start, end, layout.PlotLeft, layout.PlotRight)))
|
||||||
|
if !seen[x] {
|
||||||
|
seen[x] = true
|
||||||
|
fmt.Fprintf(b, `<line x1="%d" y1="%d" x2="%d" y2="%d"/>`+"\n", x, layout.PlotTop, x, layout.PlotBottom)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
b.WriteString(`</g>` + "\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
func chartXForTime(ts, start, end time.Time, left, right int) float64 {
|
||||||
|
if !end.After(start) {
|
||||||
|
return float64(left+right) / 2
|
||||||
|
}
|
||||||
|
if ts.Before(start) {
|
||||||
|
ts = start
|
||||||
|
}
|
||||||
|
if ts.After(end) {
|
||||||
|
ts = end
|
||||||
|
}
|
||||||
|
ratio := float64(ts.Sub(start)) / float64(end.Sub(start))
|
||||||
|
return float64(left) + ratio*float64(right-left)
|
||||||
|
}
|
||||||
|
|
||||||
|
func chartPointTime(times []time.Time, idx int) time.Time {
|
||||||
|
if idx >= 0 && idx < len(times) && !times[idx].IsZero() {
|
||||||
|
return times[idx].UTC()
|
||||||
|
}
|
||||||
|
if len(times) > 0 && !times[0].IsZero() {
|
||||||
|
return times[0].UTC().Add(time.Duration(idx) * time.Minute)
|
||||||
|
}
|
||||||
|
return time.Now().UTC().Add(time.Duration(idx) * time.Minute)
|
||||||
|
}
|
||||||
|
|
||||||
|
func chartYForValue(value float64, scale chartScale, plotTop, plotBottom int) float64 {
|
||||||
|
if scale.Max <= scale.Min {
|
||||||
|
return float64(plotTop+plotBottom) / 2
|
||||||
|
}
|
||||||
|
return float64(plotBottom) - (value-scale.Min)/(scale.Max-scale.Min)*float64(plotBottom-plotTop)
|
||||||
|
}
|
||||||
|
|
||||||
|
func chartSeriesBounds(values []float64) (float64, float64) {
|
||||||
|
if len(values) == 0 {
|
||||||
|
return 0, 1
|
||||||
|
}
|
||||||
|
min, max := values[0], values[0]
|
||||||
|
for _, value := range values[1:] {
|
||||||
|
if value < min {
|
||||||
|
min = value
|
||||||
|
}
|
||||||
|
if value > max {
|
||||||
|
max = value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if min == max {
|
||||||
|
if max == 0 {
|
||||||
|
return 0, 1
|
||||||
|
}
|
||||||
|
pad := math.Abs(max) * 0.1
|
||||||
|
if pad == 0 {
|
||||||
|
pad = 1
|
||||||
|
}
|
||||||
|
min -= pad
|
||||||
|
max += pad
|
||||||
|
}
|
||||||
|
if min > 0 {
|
||||||
|
pad := (max - min) * 0.2
|
||||||
|
if pad == 0 {
|
||||||
|
pad = max * 0.1
|
||||||
|
}
|
||||||
|
min -= pad
|
||||||
|
if min < 0 {
|
||||||
|
min = 0
|
||||||
|
}
|
||||||
|
max += pad
|
||||||
|
}
|
||||||
|
return min, max
|
||||||
|
}
|
||||||
|
|
||||||
|
func chartNiceTicks(min, max float64, target int) []float64 {
|
||||||
|
if min == max {
|
||||||
|
max = min + 1
|
||||||
|
}
|
||||||
|
span := max - min
|
||||||
|
step := math.Pow(10, math.Floor(math.Log10(span/float64(target))))
|
||||||
|
for _, factor := range []float64{1, 2, 5, 10} {
|
||||||
|
if span/(factor*step) <= float64(target)*1.5 {
|
||||||
|
step = factor * step
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
low := math.Floor(min/step) * step
|
||||||
|
high := math.Ceil(max/step) * step
|
||||||
|
var ticks []float64
|
||||||
|
for value := low; value <= high+step*0.001; value += step {
|
||||||
|
ticks = append(ticks, math.Round(value*1e9)/1e9)
|
||||||
|
}
|
||||||
|
return ticks
|
||||||
|
}
|
||||||
|
|
||||||
|
func valueClamp(value float64, scale chartScale) float64 {
|
||||||
|
if value < scale.Min {
|
||||||
|
return scale.Min
|
||||||
|
}
|
||||||
|
if value > scale.Max {
|
||||||
|
return scale.Max
|
||||||
|
}
|
||||||
|
return value
|
||||||
|
}
|
||||||
@@ -232,7 +232,7 @@ func truncate(s string, max int) string {
|
|||||||
// isSATTarget returns true for task targets that run hardware acceptance tests.
|
// isSATTarget returns true for task targets that run hardware acceptance tests.
|
||||||
func isSATTarget(target string) bool {
|
func isSATTarget(target string) bool {
|
||||||
switch target {
|
switch target {
|
||||||
case "nvidia", "nvidia-stress", "memory", "memory-stress", "storage",
|
case "nvidia", "nvidia-benchmark", "nvidia-stress", "memory", "memory-stress", "storage",
|
||||||
"cpu", "sat-stress", "amd", "amd-mem", "amd-bandwidth", "amd-stress",
|
"cpu", "sat-stress", "amd", "amd-mem", "amd-bandwidth", "amd-stress",
|
||||||
"platform-stress":
|
"platform-stress":
|
||||||
return true
|
return true
|
||||||
|
|||||||
@@ -91,6 +91,7 @@ func layoutNav(active string, buildLabel string) string {
|
|||||||
{"audit", "Audit", "/audit", ""},
|
{"audit", "Audit", "/audit", ""},
|
||||||
{"validate", "Validate", "/validate", ""},
|
{"validate", "Validate", "/validate", ""},
|
||||||
{"burn", "Burn", "/burn", ""},
|
{"burn", "Burn", "/burn", ""},
|
||||||
|
{"benchmark", "Benchmark", "/benchmark", ""},
|
||||||
{"tasks", "Tasks", "/tasks", ""},
|
{"tasks", "Tasks", "/tasks", ""},
|
||||||
{"tools", "Tools", "/tools", ""},
|
{"tools", "Tools", "/tools", ""},
|
||||||
}
|
}
|
||||||
@@ -140,6 +141,10 @@ func renderPage(page string, opts HandlerOptions) string {
|
|||||||
pageID = "burn"
|
pageID = "burn"
|
||||||
title = "Burn"
|
title = "Burn"
|
||||||
body = renderBurn()
|
body = renderBurn()
|
||||||
|
case "benchmark":
|
||||||
|
pageID = "benchmark"
|
||||||
|
title = "Benchmark"
|
||||||
|
body = renderBenchmark()
|
||||||
case "tasks":
|
case "tasks":
|
||||||
pageID = "tasks"
|
pageID = "tasks"
|
||||||
title = "Tasks"
|
title = "Tasks"
|
||||||
@@ -553,6 +558,21 @@ func renderMetrics() string {
|
|||||||
|
|
||||||
<script>
|
<script>
|
||||||
let gpuChartKey = '';
|
let gpuChartKey = '';
|
||||||
|
const gpuChartModeStorageKey = 'bee.metrics.gpuChartMode';
|
||||||
|
|
||||||
|
function loadGPUChartModePreference() {
|
||||||
|
try {
|
||||||
|
return sessionStorage.getItem(gpuChartModeStorageKey) === 'per-gpu';
|
||||||
|
} catch (_) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function saveGPUChartModePreference(perGPU) {
|
||||||
|
try {
|
||||||
|
sessionStorage.setItem(gpuChartModeStorageKey, perGPU ? 'per-gpu' : 'per-metric');
|
||||||
|
} catch (_) {}
|
||||||
|
}
|
||||||
|
|
||||||
function refreshChartImage(el) {
|
function refreshChartImage(el) {
|
||||||
if (!el || el.dataset.loading === '1') return;
|
if (!el || el.dataset.loading === '1') return;
|
||||||
@@ -633,10 +653,19 @@ function loadMetricsLayout() {
|
|||||||
fetch('/api/metrics/latest').then(function(r) { return r.json(); }).then(syncMetricsLayout).catch(function() {});
|
fetch('/api/metrics/latest').then(function(r) { return r.json(); }).then(syncMetricsLayout).catch(function() {});
|
||||||
}
|
}
|
||||||
|
|
||||||
document.getElementById('gpu-chart-toggle').addEventListener('change', function() {
|
const gpuChartToggle = document.getElementById('gpu-chart-toggle');
|
||||||
applyGPUChartMode();
|
if (gpuChartToggle) {
|
||||||
refreshCharts();
|
gpuChartToggle.checked = loadGPUChartModePreference();
|
||||||
});
|
}
|
||||||
|
applyGPUChartMode();
|
||||||
|
|
||||||
|
if (gpuChartToggle) {
|
||||||
|
gpuChartToggle.addEventListener('change', function() {
|
||||||
|
saveGPUChartModePreference(!!gpuChartToggle.checked);
|
||||||
|
applyGPUChartMode();
|
||||||
|
refreshCharts();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
loadMetricsLayout();
|
loadMetricsLayout();
|
||||||
setInterval(refreshCharts, 3000);
|
setInterval(refreshCharts, 3000);
|
||||||
@@ -757,6 +786,193 @@ func renderSATCard(id, label, extra string) string {
|
|||||||
label, extra, id, id)
|
label, extra, id, id)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── Benchmark ─────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
func renderBenchmark() string {
|
||||||
|
return `<p style="color:var(--muted);font-size:13px;margin-bottom:16px">Benchmark runs generate a human-readable TXT report and machine-readable result bundle. Tasks continue in the background — view progress in <a href="/tasks">Tasks</a>.</p>
|
||||||
|
|
||||||
|
<div class="grid2">
|
||||||
|
<div class="card">
|
||||||
|
<div class="card-head">NVIDIA Benchmark</div>
|
||||||
|
<div class="card-body">
|
||||||
|
<div class="form-row">
|
||||||
|
<label>Profile</label>
|
||||||
|
<select id="benchmark-profile">
|
||||||
|
<option value="standard" selected>Standard — about 15 minutes</option>
|
||||||
|
<option value="stability">Stability — 1 to 2 hours</option>
|
||||||
|
<option value="overnight">Overnight — 8 hours</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
<div class="form-row">
|
||||||
|
<label>GPU Selection</label>
|
||||||
|
<div style="display:flex;gap:8px;flex-wrap:wrap;margin-bottom:8px">
|
||||||
|
<button class="btn btn-sm btn-secondary" type="button" onclick="benchmarkSelectAll()">Select All</button>
|
||||||
|
<button class="btn btn-sm btn-secondary" type="button" onclick="benchmarkSelectNone()">Clear</button>
|
||||||
|
</div>
|
||||||
|
<div id="benchmark-gpu-list" style="border:1px solid var(--border);border-radius:4px;padding:12px;min-height:88px">
|
||||||
|
<p style="color:var(--muted);font-size:13px">Loading NVIDIA GPUs...</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<label class="benchmark-cb-row">
|
||||||
|
<input type="checkbox" id="benchmark-run-nccl" checked>
|
||||||
|
<span>Run multi-GPU interconnect step (NCCL) only on the selected GPUs</span>
|
||||||
|
</label>
|
||||||
|
<p id="benchmark-selection-note" style="font-size:12px;color:var(--muted);margin:10px 0 14px">Select one GPU for single-card benchmarking or several GPUs for a constrained multi-GPU run.</p>
|
||||||
|
<button id="benchmark-run-btn" class="btn btn-primary" onclick="runNvidiaBenchmark()" disabled>▶ Run Benchmark</button>
|
||||||
|
<span id="benchmark-run-status" style="margin-left:10px;font-size:12px;color:var(--muted)"></span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="card">
|
||||||
|
<div class="card-head">Method</div>
|
||||||
|
<div class="card-body">
|
||||||
|
<p style="font-size:13px;color:var(--muted);margin-bottom:10px">Each benchmark run performs warmup, sustained compute, telemetry capture, cooldown, and optional NCCL interconnect checks.</p>
|
||||||
|
<table>
|
||||||
|
<tr><th>Profile</th><th>Purpose</th></tr>
|
||||||
|
<tr><td>Standard</td><td>Fast, repeatable performance check for server-to-server comparison.</td></tr>
|
||||||
|
<tr><td>Stability</td><td>Longer run for thermal drift, power caps, and clock instability.</td></tr>
|
||||||
|
<tr><td>Overnight</td><td>Extended verification of long-run stability and late throttling.</td></tr>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div id="benchmark-output" style="display:none;margin-top:16px" class="card">
|
||||||
|
<div class="card-head">Benchmark Output <span id="benchmark-title"></span></div>
|
||||||
|
<div class="card-body"><div id="benchmark-terminal" class="terminal"></div></div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<style>
|
||||||
|
.benchmark-cb-row { display:flex; align-items:flex-start; gap:8px; cursor:pointer; font-size:13px; }
|
||||||
|
.benchmark-cb-row input[type=checkbox] { width:16px; height:16px; margin-top:2px; flex-shrink:0; }
|
||||||
|
.benchmark-gpu-row { display:flex; align-items:flex-start; gap:8px; padding:6px 0; cursor:pointer; font-size:13px; }
|
||||||
|
.benchmark-gpu-row input[type=checkbox] { width:16px; height:16px; margin-top:2px; flex-shrink:0; }
|
||||||
|
</style>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
let benchmarkES = null;
|
||||||
|
|
||||||
|
function benchmarkSelectedGPUIndices() {
|
||||||
|
return Array.from(document.querySelectorAll('.benchmark-gpu-checkbox'))
|
||||||
|
.filter(function(el) { return el.checked && !el.disabled; })
|
||||||
|
.map(function(el) { return parseInt(el.value, 10); })
|
||||||
|
.filter(function(v) { return !Number.isNaN(v); })
|
||||||
|
.sort(function(a, b) { return a - b; });
|
||||||
|
}
|
||||||
|
|
||||||
|
function benchmarkUpdateSelectionNote() {
|
||||||
|
const selected = benchmarkSelectedGPUIndices();
|
||||||
|
const btn = document.getElementById('benchmark-run-btn');
|
||||||
|
const note = document.getElementById('benchmark-selection-note');
|
||||||
|
const nccl = document.getElementById('benchmark-run-nccl');
|
||||||
|
if (!selected.length) {
|
||||||
|
btn.disabled = true;
|
||||||
|
note.textContent = 'Select at least one NVIDIA GPU to run the benchmark.';
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
btn.disabled = false;
|
||||||
|
note.textContent = 'Selected GPUs: ' + selected.join(', ') + '.';
|
||||||
|
if (nccl && nccl.checked && selected.length < 2) {
|
||||||
|
note.textContent += ' NCCL will be skipped because fewer than 2 GPUs are selected.';
|
||||||
|
} else if (nccl && nccl.checked) {
|
||||||
|
note.textContent += ' NCCL interconnect will use only these GPUs.';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function benchmarkRenderGPUList(gpus) {
|
||||||
|
const root = document.getElementById('benchmark-gpu-list');
|
||||||
|
if (!gpus || !gpus.length) {
|
||||||
|
root.innerHTML = '<p style="color:var(--muted);font-size:13px">No NVIDIA GPUs detected.</p>';
|
||||||
|
benchmarkUpdateSelectionNote();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
root.innerHTML = gpus.map(function(gpu) {
|
||||||
|
const mem = gpu.memory_mb > 0 ? ' · ' + gpu.memory_mb + ' MiB' : '';
|
||||||
|
return '<label class="benchmark-gpu-row">'
|
||||||
|
+ '<input class="benchmark-gpu-checkbox" type="checkbox" value="' + gpu.index + '" checked onchange="benchmarkUpdateSelectionNote()">'
|
||||||
|
+ '<span><strong>GPU ' + gpu.index + '</strong> — ' + gpu.name + mem + '</span>'
|
||||||
|
+ '</label>';
|
||||||
|
}).join('');
|
||||||
|
benchmarkUpdateSelectionNote();
|
||||||
|
}
|
||||||
|
|
||||||
|
function benchmarkLoadGPUs() {
|
||||||
|
const status = document.getElementById('benchmark-run-status');
|
||||||
|
status.textContent = '';
|
||||||
|
fetch('/api/gpu/nvidia').then(function(r) {
|
||||||
|
return r.json().then(function(body) {
|
||||||
|
if (!r.ok) throw new Error(body.error || ('HTTP ' + r.status));
|
||||||
|
return body;
|
||||||
|
});
|
||||||
|
}).then(function(gpus) {
|
||||||
|
benchmarkRenderGPUList(gpus);
|
||||||
|
}).catch(function(err) {
|
||||||
|
document.getElementById('benchmark-gpu-list').innerHTML = '<p style="color:var(--crit-fg);font-size:13px">Error: ' + err.message + '</p>';
|
||||||
|
benchmarkUpdateSelectionNote();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function benchmarkSelectAll() {
|
||||||
|
document.querySelectorAll('.benchmark-gpu-checkbox').forEach(function(el) { el.checked = true; });
|
||||||
|
benchmarkUpdateSelectionNote();
|
||||||
|
}
|
||||||
|
|
||||||
|
function benchmarkSelectNone() {
|
||||||
|
document.querySelectorAll('.benchmark-gpu-checkbox').forEach(function(el) { el.checked = false; });
|
||||||
|
benchmarkUpdateSelectionNote();
|
||||||
|
}
|
||||||
|
|
||||||
|
function runNvidiaBenchmark() {
|
||||||
|
const selected = benchmarkSelectedGPUIndices();
|
||||||
|
const status = document.getElementById('benchmark-run-status');
|
||||||
|
if (!selected.length) {
|
||||||
|
status.textContent = 'Select at least one GPU.';
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (benchmarkES) { benchmarkES.close(); benchmarkES = null; }
|
||||||
|
const body = {
|
||||||
|
profile: document.getElementById('benchmark-profile').value || 'standard',
|
||||||
|
gpu_indices: selected,
|
||||||
|
run_nccl: !!document.getElementById('benchmark-run-nccl').checked,
|
||||||
|
display_name: 'NVIDIA Benchmark'
|
||||||
|
};
|
||||||
|
document.getElementById('benchmark-output').style.display = 'block';
|
||||||
|
document.getElementById('benchmark-title').textContent = '— ' + body.profile + ' [' + selected.join(', ') + ']';
|
||||||
|
const term = document.getElementById('benchmark-terminal');
|
||||||
|
term.textContent = 'Enqueuing benchmark for GPUs ' + selected.join(', ') + '...\n';
|
||||||
|
status.textContent = 'Queueing...';
|
||||||
|
fetch('/api/benchmark/nvidia/run', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {'Content-Type':'application/json'},
|
||||||
|
body: JSON.stringify(body)
|
||||||
|
}).then(function(r) {
|
||||||
|
return r.json().then(function(payload) {
|
||||||
|
if (!r.ok) throw new Error(payload.error || ('HTTP ' + r.status));
|
||||||
|
return payload;
|
||||||
|
});
|
||||||
|
}).then(function(d) {
|
||||||
|
status.textContent = 'Task ' + d.task_id + ' queued.';
|
||||||
|
term.textContent += 'Task ' + d.task_id + ' queued. Streaming log...\n';
|
||||||
|
benchmarkES = new EventSource('/api/tasks/' + d.task_id + '/stream');
|
||||||
|
benchmarkES.onmessage = function(e) { term.textContent += e.data + '\n'; term.scrollTop = term.scrollHeight; };
|
||||||
|
benchmarkES.addEventListener('done', function(e) {
|
||||||
|
benchmarkES.close();
|
||||||
|
benchmarkES = null;
|
||||||
|
term.textContent += (e.data ? '\nERROR: ' + e.data : '\nCompleted.') + '\n';
|
||||||
|
term.scrollTop = term.scrollHeight;
|
||||||
|
status.textContent = e.data ? 'Failed.' : 'Completed.';
|
||||||
|
});
|
||||||
|
}).catch(function(err) {
|
||||||
|
status.textContent = 'Error.';
|
||||||
|
term.textContent += 'ERROR: ' + err.message + '\n';
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
document.getElementById('benchmark-run-nccl').addEventListener('change', benchmarkUpdateSelectionNote);
|
||||||
|
benchmarkLoadGPUs();
|
||||||
|
</script>`
|
||||||
|
}
|
||||||
|
|
||||||
// ── Burn ──────────────────────────────────────────────────────────────────────
|
// ── Burn ──────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
func renderBurn() string {
|
func renderBurn() string {
|
||||||
@@ -781,11 +997,12 @@ func renderBurn() string {
|
|||||||
<div class="card">
|
<div class="card">
|
||||||
<div class="card-head">GPU Stress</div>
|
<div class="card-head">GPU Stress</div>
|
||||||
<div class="card-body">
|
<div class="card-body">
|
||||||
<p style="font-size:12px;color:var(--muted);margin:0 0 10px">Tests run on all GPUs in the system. Availability determined by driver status.</p>
|
<p style="font-size:12px;color:var(--muted);margin:0 0 10px">NVIDIA tools run on all discovered GPUs. DCGM is the official NVIDIA diagnostic path. NCCL exercises multi-GPU fabric and is not a full compute burn.</p>
|
||||||
<div id="gpu-tools-list">
|
<div id="gpu-tools-list">
|
||||||
<label class="cb-row"><input type="checkbox" id="burn-gpu-bee" value="bee-gpu-burn" disabled><span>bee-gpu-burn <span class="cb-note" id="note-bee"></span></span></label>
|
<label class="cb-row"><input type="checkbox" id="burn-gpu-bee" value="bee-gpu-burn" disabled><span>bee-gpu-burn <span class="cb-note" id="note-bee"></span></span></label>
|
||||||
|
<label class="cb-row"><input type="checkbox" id="burn-gpu-dcgm" value="dcgm" disabled><span>DCGM Diagnostics (Official NVIDIA) <span class="cb-note" id="note-dcgm"></span></span></label>
|
||||||
<label class="cb-row"><input type="checkbox" id="burn-gpu-john" value="john" disabled><span>John the Ripper (OpenCL) <span class="cb-note" id="note-john"></span></span></label>
|
<label class="cb-row"><input type="checkbox" id="burn-gpu-john" value="john" disabled><span>John the Ripper (OpenCL) <span class="cb-note" id="note-john"></span></span></label>
|
||||||
<label class="cb-row"><input type="checkbox" id="burn-gpu-nccl" value="nccl" disabled><span>NCCL all_reduce_perf <span class="cb-note" id="note-nccl"></span></span></label>
|
<label class="cb-row"><input type="checkbox" id="burn-gpu-nccl" value="nccl" disabled><span>NCCL all_reduce_perf (Interconnect) <span class="cb-note" id="note-nccl"></span></span></label>
|
||||||
<label class="cb-row"><input type="checkbox" id="burn-gpu-rvs" value="rvs" disabled><span>RVS GST (AMD) <span class="cb-note" id="note-rvs"></span></span></label>
|
<label class="cb-row"><input type="checkbox" id="burn-gpu-rvs" value="rvs" disabled><span>RVS GST (AMD) <span class="cb-note" id="note-rvs"></span></span></label>
|
||||||
</div>
|
</div>
|
||||||
<button class="btn btn-primary" style="margin-top:10px" onclick="runGPUStress()">▶ Run GPU Stress</button>
|
<button class="btn btn-primary" style="margin-top:10px" onclick="runGPUStress()">▶ Run GPU Stress</button>
|
||||||
@@ -857,17 +1074,18 @@ function streamTask(taskId, label) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
function runGPUStress() {
|
function runGPUStress() {
|
||||||
const ids = ['burn-gpu-bee','burn-gpu-john','burn-gpu-nccl','burn-gpu-rvs'];
|
const tasks = [
|
||||||
const loaderMap = {'burn-gpu-bee':'builtin','burn-gpu-john':'john','burn-gpu-nccl':'nccl','burn-gpu-rvs':'rvs'};
|
{id:'burn-gpu-bee', target:'nvidia-stress', label:'bee-gpu-burn', extra:{loader:'builtin'}},
|
||||||
const targetMap = {'burn-gpu-bee':'nvidia-stress','burn-gpu-john':'nvidia-stress','burn-gpu-nccl':'nvidia-stress','burn-gpu-rvs':'amd-stress'};
|
{id:'burn-gpu-dcgm', target:'nvidia', label:'DCGM Diagnostics (Official NVIDIA)', extra:{display_name:'NVIDIA DCGM Diagnostics (Official)'}},
|
||||||
let last = null;
|
{id:'burn-gpu-john', target:'nvidia-stress', label:'John GPU Stress', extra:{loader:'john'}},
|
||||||
ids.filter(id => {
|
{id:'burn-gpu-nccl', target:'nvidia-stress', label:'NCCL Interconnect Stress', extra:{loader:'nccl', display_name:'NCCL Interconnect Stress'}},
|
||||||
const el = document.getElementById(id);
|
{id:'burn-gpu-rvs', target:'amd-stress', label:'RVS GST', extra:{}},
|
||||||
|
];
|
||||||
|
tasks.filter(t => {
|
||||||
|
const el = document.getElementById(t.id);
|
||||||
return el && el.checked && !el.disabled;
|
return el && el.checked && !el.disabled;
|
||||||
}).forEach(id => {
|
}).forEach(t => {
|
||||||
const target = targetMap[id];
|
enqueueTask(t.target, t.extra).then(d => { streamTask(d.task_id, t.label); });
|
||||||
const extra = target === 'nvidia-stress' ? {loader: loaderMap[id]} : {};
|
|
||||||
enqueueTask(target, extra).then(d => { last = d; streamTask(d.task_id, target + ' / ' + loaderMap[id]); });
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -904,13 +1122,15 @@ function runAll() {
|
|||||||
const done = () => { count++; status.textContent = count + ' tasks queued.'; };
|
const done = () => { count++; status.textContent = count + ' tasks queued.'; };
|
||||||
|
|
||||||
// GPU tests
|
// GPU tests
|
||||||
const gpuIds = ['burn-gpu-bee','burn-gpu-john','burn-gpu-nccl','burn-gpu-rvs'];
|
const gpuTasks = [
|
||||||
const loaderMap = {'burn-gpu-bee':'builtin','burn-gpu-john':'john','burn-gpu-nccl':'nccl','burn-gpu-rvs':'rvs'};
|
{id:'burn-gpu-bee', target:'nvidia-stress', label:'bee-gpu-burn', extra:{loader:'builtin'}},
|
||||||
const gpuTargetMap = {'burn-gpu-bee':'nvidia-stress','burn-gpu-john':'nvidia-stress','burn-gpu-nccl':'nvidia-stress','burn-gpu-rvs':'amd-stress'};
|
{id:'burn-gpu-dcgm', target:'nvidia', label:'DCGM Diagnostics (Official NVIDIA)', extra:{display_name:'NVIDIA DCGM Diagnostics (Official)'}},
|
||||||
gpuIds.filter(id => { const el = document.getElementById(id); return el && el.checked && !el.disabled; }).forEach(id => {
|
{id:'burn-gpu-john', target:'nvidia-stress', label:'John GPU Stress', extra:{loader:'john'}},
|
||||||
const target = gpuTargetMap[id];
|
{id:'burn-gpu-nccl', target:'nvidia-stress', label:'NCCL Interconnect Stress', extra:{loader:'nccl', display_name:'NCCL Interconnect Stress'}},
|
||||||
const extra = target === 'nvidia-stress' ? {loader: loaderMap[id]} : {};
|
{id:'burn-gpu-rvs', target:'amd-stress', label:'RVS GST', extra:{}},
|
||||||
enqueueTask(target, extra).then(d => { streamTask(d.task_id, target); done(); });
|
];
|
||||||
|
gpuTasks.filter(t => { const el = document.getElementById(t.id); return el && el.checked && !el.disabled; }).forEach(t => {
|
||||||
|
enqueueTask(t.target, t.extra).then(d => { streamTask(d.task_id, t.label); done(); });
|
||||||
});
|
});
|
||||||
|
|
||||||
// Compute tests
|
// Compute tests
|
||||||
@@ -931,17 +1151,19 @@ function runAll() {
|
|||||||
|
|
||||||
// Load GPU tool availability
|
// Load GPU tool availability
|
||||||
fetch('/api/gpu/tools').then(r => r.json()).then(tools => {
|
fetch('/api/gpu/tools').then(r => r.json()).then(tools => {
|
||||||
const nvidiaMap = {'bee-gpu-burn':'burn-gpu-bee','john':'burn-gpu-john','nccl':'burn-gpu-nccl','rvs':'burn-gpu-rvs'};
|
const nvidiaMap = {'bee-gpu-burn':'burn-gpu-bee','dcgm':'burn-gpu-dcgm','john':'burn-gpu-john','nccl':'burn-gpu-nccl','rvs':'burn-gpu-rvs'};
|
||||||
const noteMap = {'bee-gpu-burn':'note-bee','john':'note-john','nccl':'note-nccl','rvs':'note-rvs'};
|
const noteMap = {'bee-gpu-burn':'note-bee','dcgm':'note-dcgm','john':'note-john','nccl':'note-nccl','rvs':'note-rvs'};
|
||||||
tools.forEach(t => {
|
tools.forEach(t => {
|
||||||
const cb = document.getElementById(nvidiaMap[t.id]);
|
const cb = document.getElementById(nvidiaMap[t.id]);
|
||||||
const note = document.getElementById(noteMap[t.id]);
|
const note = document.getElementById(noteMap[t.id]);
|
||||||
if (!cb) return;
|
if (!cb) return;
|
||||||
if (t.available) {
|
if (t.available) {
|
||||||
cb.disabled = false;
|
cb.disabled = false;
|
||||||
if (t.id === 'bee-gpu-burn') cb.checked = true;
|
if (t.id === 'bee-gpu-burn' || t.id === 'dcgm') cb.checked = true;
|
||||||
} else {
|
} else {
|
||||||
const reason = t.vendor === 'nvidia' ? 'NVIDIA driver not running' : 'AMD driver not running';
|
let reason = t.vendor === 'nvidia' ? 'NVIDIA driver not running' : 'AMD driver not running';
|
||||||
|
if (t.id === 'dcgm' && t.vendor === 'nvidia') reason = 'dcgmi not available or NVIDIA driver not running';
|
||||||
|
if (t.id === 'nccl' && t.vendor === 'nvidia') reason = 'NCCL interconnect tool unavailable or NVIDIA driver not running';
|
||||||
if (note) note.textContent = '— ' + reason;
|
if (note) note.textContent = '— ' + reason;
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
@@ -1101,7 +1323,8 @@ func renderNetwork() string {
|
|||||||
// ── Services ──────────────────────────────────────────────────────────────────
|
// ── Services ──────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
func renderServicesInline() string {
|
func renderServicesInline() string {
|
||||||
return `<div style="display:flex;justify-content:flex-end;gap:8px;flex-wrap:wrap;margin-bottom:8px"><button class="btn btn-sm btn-secondary" onclick="restartGPUDrivers()">Restart GPU Drivers</button><button class="btn btn-sm btn-secondary" onclick="loadServices()">↻ Refresh</button></div>
|
return `<p style="font-size:13px;color:var(--muted);margin-bottom:10px">` + html.EscapeString(`bee-selfheal.timer is expected to be active; the oneshot bee-selfheal.service itself is not shown as a long-running service.`) + `</p>
|
||||||
|
<div style="display:flex;justify-content:flex-end;gap:8px;flex-wrap:wrap;margin-bottom:8px"><button class="btn btn-sm btn-secondary" onclick="restartGPUDrivers()">Restart GPU Drivers</button><button class="btn btn-sm btn-secondary" onclick="loadServices()">↻ Refresh</button></div>
|
||||||
<div id="svc-table"><p style="color:var(--muted);font-size:13px">Loading...</p></div>
|
<div id="svc-table"><p style="color:var(--muted);font-size:13px">Loading...</p></div>
|
||||||
<div id="svc-out" style="display:none;margin-top:8px" class="card">
|
<div id="svc-out" style="display:none;margin-top:8px" class="card">
|
||||||
<div class="card-head">Output</div>
|
<div class="card-head">Output</div>
|
||||||
@@ -1127,7 +1350,7 @@ function loadServices() {
|
|||||||
'</td></tr>';
|
'</td></tr>';
|
||||||
}).join('');
|
}).join('');
|
||||||
document.getElementById('svc-table').innerHTML =
|
document.getElementById('svc-table').innerHTML =
|
||||||
'<table><tr><th>Service</th><th>Status</th><th>Actions</th></tr>'+rows+'</table>';
|
'<table><tr><th>Unit</th><th>Status</th><th>Actions</th></tr>'+rows+'</table>';
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
function toggleBody(id) {
|
function toggleBody(id) {
|
||||||
|
|||||||
@@ -8,7 +8,6 @@ import (
|
|||||||
"html"
|
"html"
|
||||||
"io"
|
"io"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"math"
|
|
||||||
"mime"
|
"mime"
|
||||||
"net"
|
"net"
|
||||||
"net/http"
|
"net/http"
|
||||||
@@ -16,7 +15,6 @@ import (
|
|||||||
"path/filepath"
|
"path/filepath"
|
||||||
"runtime/debug"
|
"runtime/debug"
|
||||||
"sort"
|
"sort"
|
||||||
"strconv"
|
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
@@ -24,7 +22,6 @@ import (
|
|||||||
"bee/audit/internal/app"
|
"bee/audit/internal/app"
|
||||||
"bee/audit/internal/platform"
|
"bee/audit/internal/platform"
|
||||||
"bee/audit/internal/runtimeenv"
|
"bee/audit/internal/runtimeenv"
|
||||||
gocharts "github.com/go-analyze/charts"
|
|
||||||
"reanimator/chart/viewer"
|
"reanimator/chart/viewer"
|
||||||
"reanimator/chart/web"
|
"reanimator/chart/web"
|
||||||
)
|
)
|
||||||
@@ -253,6 +250,7 @@ func NewHandler(opts HandlerOptions) http.Handler {
|
|||||||
mux.HandleFunc("POST /api/sat/platform-stress/run", h.handleAPISATRun("platform-stress"))
|
mux.HandleFunc("POST /api/sat/platform-stress/run", h.handleAPISATRun("platform-stress"))
|
||||||
mux.HandleFunc("GET /api/sat/stream", h.handleAPISATStream)
|
mux.HandleFunc("GET /api/sat/stream", h.handleAPISATStream)
|
||||||
mux.HandleFunc("POST /api/sat/abort", h.handleAPISATAbort)
|
mux.HandleFunc("POST /api/sat/abort", h.handleAPISATAbort)
|
||||||
|
mux.HandleFunc("POST /api/benchmark/nvidia/run", h.handleAPIBenchmarkNvidiaRun)
|
||||||
|
|
||||||
// Tasks
|
// Tasks
|
||||||
mux.HandleFunc("GET /api/tasks", h.handleAPITasksList)
|
mux.HandleFunc("GET /api/tasks", h.handleAPITasksList)
|
||||||
@@ -289,6 +287,7 @@ func NewHandler(opts HandlerOptions) http.Handler {
|
|||||||
|
|
||||||
// GPU presence / tools
|
// GPU presence / tools
|
||||||
mux.HandleFunc("GET /api/gpu/presence", h.handleAPIGPUPresence)
|
mux.HandleFunc("GET /api/gpu/presence", h.handleAPIGPUPresence)
|
||||||
|
mux.HandleFunc("GET /api/gpu/nvidia", h.handleAPIGNVIDIAGPUs)
|
||||||
mux.HandleFunc("GET /api/gpu/tools", h.handleAPIGPUTools)
|
mux.HandleFunc("GET /api/gpu/tools", h.handleAPIGPUTools)
|
||||||
|
|
||||||
// System
|
// System
|
||||||
@@ -555,13 +554,14 @@ func (h *handler) handleMetricsChartSVG(w http.ResponseWriter, r *http.Request)
|
|||||||
http.Error(w, "metrics database not available", http.StatusServiceUnavailable)
|
http.Error(w, "metrics database not available", http.StatusServiceUnavailable)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
samples, err := h.metricsDB.LoadAll()
|
||||||
|
if err != nil || len(samples) == 0 {
|
||||||
|
http.Error(w, "metrics history unavailable", http.StatusServiceUnavailable)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
timeline := metricsTimelineSegments(samples, time.Now())
|
||||||
if idx, sub, ok := parseGPUChartPath(path); ok && sub == "overview" {
|
if idx, sub, ok := parseGPUChartPath(path); ok && sub == "overview" {
|
||||||
samples, err := h.metricsDB.LoadAll()
|
buf, ok, err := renderGPUOverviewChartSVG(idx, samples, timeline)
|
||||||
if err != nil || len(samples) == 0 {
|
|
||||||
http.Error(w, "metrics history unavailable", http.StatusServiceUnavailable)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
buf, ok, err := renderGPUOverviewChartSVG(idx, samples)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||||
return
|
return
|
||||||
@@ -575,13 +575,23 @@ func (h *handler) handleMetricsChartSVG(w http.ResponseWriter, r *http.Request)
|
|||||||
_, _ = w.Write(buf)
|
_, _ = w.Write(buf)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
datasets, names, labels, title, yMin, yMax, ok := h.chartDataFromDB(path)
|
datasets, names, labels, title, yMin, yMax, ok := chartDataFromSamples(path, samples)
|
||||||
if !ok {
|
if !ok {
|
||||||
http.Error(w, "metrics history unavailable", http.StatusServiceUnavailable)
|
http.Error(w, "metrics history unavailable", http.StatusServiceUnavailable)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
buf, err := renderChartSVG(title, datasets, names, labels, yMin, yMax)
|
buf, err := renderMetricChartSVG(
|
||||||
|
title,
|
||||||
|
labels,
|
||||||
|
sampleTimes(samples),
|
||||||
|
datasets,
|
||||||
|
names,
|
||||||
|
yMin,
|
||||||
|
yMax,
|
||||||
|
chartCanvasHeightForPath(path, len(names)),
|
||||||
|
timeline,
|
||||||
|
)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||||
return
|
return
|
||||||
@@ -591,14 +601,6 @@ func (h *handler) handleMetricsChartSVG(w http.ResponseWriter, r *http.Request)
|
|||||||
_, _ = w.Write(buf)
|
_, _ = w.Write(buf)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (h *handler) chartDataFromDB(path string) ([][]float64, []string, []string, string, *float64, *float64, bool) {
|
|
||||||
samples, err := h.metricsDB.LoadAll()
|
|
||||||
if err != nil || len(samples) == 0 {
|
|
||||||
return nil, nil, nil, "", nil, nil, false
|
|
||||||
}
|
|
||||||
return chartDataFromSamples(path, samples)
|
|
||||||
}
|
|
||||||
|
|
||||||
func chartDataFromSamples(path string, samples []platform.LiveMetricSample) ([][]float64, []string, []string, string, *float64, *float64, bool) {
|
func chartDataFromSamples(path string, samples []platform.LiveMetricSample) ([][]float64, []string, []string, string, *float64, *float64, bool) {
|
||||||
var datasets [][]float64
|
var datasets [][]float64
|
||||||
var names []string
|
var names []string
|
||||||
@@ -996,247 +998,6 @@ func autoBounds120(datasets ...[]float64) (*float64, *float64) {
|
|||||||
return floatPtr(low), floatPtr(high)
|
return floatPtr(low), floatPtr(high)
|
||||||
}
|
}
|
||||||
|
|
||||||
func renderGPUOverviewChartSVG(idx int, samples []platform.LiveMetricSample) ([]byte, bool, error) {
|
|
||||||
temp := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.TempC })
|
|
||||||
power := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.PowerW })
|
|
||||||
coreClock := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.ClockMHz })
|
|
||||||
memClock := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.MemClockMHz })
|
|
||||||
if temp == nil && power == nil && coreClock == nil && memClock == nil {
|
|
||||||
return nil, false, nil
|
|
||||||
}
|
|
||||||
labels := sampleTimeLabels(samples)
|
|
||||||
svg, err := drawGPUOverviewChartSVG(
|
|
||||||
fmt.Sprintf("GPU %d Overview", idx),
|
|
||||||
labels,
|
|
||||||
[]gpuOverviewSeries{
|
|
||||||
{Name: "Temp C", Values: coalesceDataset(temp, len(samples)), Color: "#f05a5a", AxisTitle: "Temp C"},
|
|
||||||
{Name: "Power W", Values: coalesceDataset(power, len(samples)), Color: "#ffb357", AxisTitle: "Power W"},
|
|
||||||
{Name: "Core Clock MHz", Values: coalesceDataset(coreClock, len(samples)), Color: "#73bf69", AxisTitle: "Core MHz"},
|
|
||||||
{Name: "Memory Clock MHz", Values: coalesceDataset(memClock, len(samples)), Color: "#5794f2", AxisTitle: "Memory MHz"},
|
|
||||||
},
|
|
||||||
)
|
|
||||||
if err != nil {
|
|
||||||
return nil, false, err
|
|
||||||
}
|
|
||||||
return svg, true, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
type gpuOverviewSeries struct {
|
|
||||||
Name string
|
|
||||||
AxisTitle string
|
|
||||||
Color string
|
|
||||||
Values []float64
|
|
||||||
}
|
|
||||||
|
|
||||||
func drawGPUOverviewChartSVG(title string, labels []string, series []gpuOverviewSeries) ([]byte, error) {
|
|
||||||
if len(series) != 4 {
|
|
||||||
return nil, fmt.Errorf("gpu overview requires 4 series, got %d", len(series))
|
|
||||||
}
|
|
||||||
const (
|
|
||||||
width = 1400
|
|
||||||
height = 420
|
|
||||||
plotLeft = 180
|
|
||||||
plotRight = 1220
|
|
||||||
plotTop = 74
|
|
||||||
plotBottom = 292
|
|
||||||
)
|
|
||||||
const (
|
|
||||||
leftOuterAxis = 72
|
|
||||||
leftInnerAxis = 132
|
|
||||||
rightInnerAxis = 1268
|
|
||||||
rightOuterAxis = 1328
|
|
||||||
)
|
|
||||||
axisX := []int{leftOuterAxis, leftInnerAxis, rightInnerAxis, rightOuterAxis}
|
|
||||||
plotWidth := plotRight - plotLeft
|
|
||||||
plotHeight := plotBottom - plotTop
|
|
||||||
|
|
||||||
pointCount := len(labels)
|
|
||||||
if pointCount == 0 {
|
|
||||||
pointCount = 1
|
|
||||||
labels = []string{""}
|
|
||||||
}
|
|
||||||
for i := range series {
|
|
||||||
if len(series[i].Values) == 0 {
|
|
||||||
series[i].Values = make([]float64, pointCount)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
type axisScale struct {
|
|
||||||
Min float64
|
|
||||||
Max float64
|
|
||||||
Ticks []float64
|
|
||||||
}
|
|
||||||
scales := make([]axisScale, len(series))
|
|
||||||
for i := range series {
|
|
||||||
min, max := gpuChartSeriesBounds(series[i].Values)
|
|
||||||
ticks := gpuChartNiceTicks(min, max, 8)
|
|
||||||
scales[i] = axisScale{
|
|
||||||
Min: ticks[0],
|
|
||||||
Max: ticks[len(ticks)-1],
|
|
||||||
Ticks: ticks,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
xFor := func(index int) float64 {
|
|
||||||
if pointCount <= 1 {
|
|
||||||
return float64(plotLeft + plotWidth/2)
|
|
||||||
}
|
|
||||||
return float64(plotLeft) + float64(index)*float64(plotWidth)/float64(pointCount-1)
|
|
||||||
}
|
|
||||||
yFor := func(value float64, scale axisScale) float64 {
|
|
||||||
if scale.Max <= scale.Min {
|
|
||||||
return float64(plotTop + plotHeight/2)
|
|
||||||
}
|
|
||||||
return float64(plotBottom) - (value-scale.Min)/(scale.Max-scale.Min)*float64(plotHeight)
|
|
||||||
}
|
|
||||||
|
|
||||||
var b strings.Builder
|
|
||||||
b.WriteString(fmt.Sprintf(`<svg xmlns="http://www.w3.org/2000/svg" width="%d" height="%d" viewBox="0 0 %d %d">`, width, height, width, height))
|
|
||||||
b.WriteString("\n")
|
|
||||||
b.WriteString(`<rect width="100%" height="100%" rx="10" ry="10" fill="#111217" stroke="#2f3440"/>` + "\n")
|
|
||||||
b.WriteString(`<text x="700" y="28" text-anchor="middle" font-family="sans-serif" font-size="16" font-weight="700" fill="#f5f7fa">` + sanitizeChartText(title) + `</text>` + "\n")
|
|
||||||
|
|
||||||
b.WriteString(`<g stroke="#2f3440" stroke-width="1">` + "\n")
|
|
||||||
for _, tick := range scales[0].Ticks {
|
|
||||||
y := yFor(tick, scales[0])
|
|
||||||
fmt.Fprintf(&b, `<line x1="%d" y1="%.1f" x2="%d" y2="%.1f"/>`+"\n", plotLeft, y, plotRight, y)
|
|
||||||
}
|
|
||||||
for _, idx := range gpuChartLabelIndices(pointCount, 8) {
|
|
||||||
x := xFor(idx)
|
|
||||||
fmt.Fprintf(&b, `<line x1="%.1f" y1="%d" x2="%.1f" y2="%d"/>`+"\n", x, plotTop, x, plotBottom)
|
|
||||||
}
|
|
||||||
b.WriteString("</g>\n")
|
|
||||||
|
|
||||||
fmt.Fprintf(&b, `<rect x="%d" y="%d" width="%d" height="%d" fill="none" stroke="#454c5c" stroke-width="1"/>`+"\n",
|
|
||||||
plotLeft, plotTop, plotWidth, plotHeight)
|
|
||||||
|
|
||||||
for i, axisLineX := range axisX {
|
|
||||||
fmt.Fprintf(&b, `<line x1="%d" y1="%d" x2="%d" y2="%d" stroke="%s" stroke-width="1"/>`+"\n",
|
|
||||||
axisLineX, plotTop, axisLineX, plotBottom, series[i].Color)
|
|
||||||
fmt.Fprintf(&b, `<text x="%d" y="%d" text-anchor="middle" font-family="sans-serif" font-size="11" font-weight="700" fill="%s">%s</text>`+"\n",
|
|
||||||
axisLineX, 52, series[i].Color, sanitizeChartText(series[i].AxisTitle))
|
|
||||||
for _, tick := range scales[i].Ticks {
|
|
||||||
y := yFor(tick, scales[i])
|
|
||||||
label := sanitizeChartText(gpuChartFormatTick(tick))
|
|
||||||
if i < 2 {
|
|
||||||
fmt.Fprintf(&b, `<line x1="%d" y1="%.1f" x2="%d" y2="%.1f" stroke="%s" stroke-width="1"/>`+"\n",
|
|
||||||
axisLineX, y, axisLineX+6, y, series[i].Color)
|
|
||||||
fmt.Fprintf(&b, `<text x="%d" y="%.1f" text-anchor="end" dy="4" font-family="sans-serif" font-size="10" fill="%s">%s</text>`+"\n",
|
|
||||||
axisLineX-8, y, series[i].Color, label)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
fmt.Fprintf(&b, `<line x1="%d" y1="%.1f" x2="%d" y2="%.1f" stroke="%s" stroke-width="1"/>`+"\n",
|
|
||||||
axisLineX, y, axisLineX-6, y, series[i].Color)
|
|
||||||
fmt.Fprintf(&b, `<text x="%d" y="%.1f" text-anchor="start" dy="4" font-family="sans-serif" font-size="10" fill="%s">%s</text>`+"\n",
|
|
||||||
axisLineX+8, y, series[i].Color, label)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
b.WriteString(`<g font-family="sans-serif" font-size="11" fill="#c8d0d8" text-anchor="middle">` + "\n")
|
|
||||||
for _, idx := range gpuChartLabelIndices(pointCount, 8) {
|
|
||||||
x := xFor(idx)
|
|
||||||
fmt.Fprintf(&b, `<text x="%.1f" y="%d">%s</text>`+"\n", x, plotBottom+22, sanitizeChartText(labels[idx]))
|
|
||||||
}
|
|
||||||
b.WriteString(`</g>` + "\n")
|
|
||||||
b.WriteString(`<text x="700" y="338" text-anchor="middle" font-family="sans-serif" font-size="12" fill="#c8d0d8">Time</text>` + "\n")
|
|
||||||
|
|
||||||
for i := range series {
|
|
||||||
var points strings.Builder
|
|
||||||
for j, value := range series[i].Values {
|
|
||||||
if j > 0 {
|
|
||||||
points.WriteByte(' ')
|
|
||||||
}
|
|
||||||
points.WriteString(strconv.FormatFloat(xFor(j), 'f', 1, 64))
|
|
||||||
points.WriteByte(',')
|
|
||||||
points.WriteString(strconv.FormatFloat(yFor(value, scales[i]), 'f', 1, 64))
|
|
||||||
}
|
|
||||||
fmt.Fprintf(&b, `<polyline points="%s" fill="none" stroke="%s" stroke-width="2"/>`+"\n",
|
|
||||||
points.String(), series[i].Color)
|
|
||||||
if len(series[i].Values) == 1 {
|
|
||||||
fmt.Fprintf(&b, `<circle cx="%.1f" cy="%.1f" r="3" fill="%s"/>`+"\n",
|
|
||||||
xFor(0), yFor(series[i].Values[0], scales[i]), series[i].Color)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const legendY = 372
|
|
||||||
legendX := []int{190, 470, 790, 1090}
|
|
||||||
for i := range series {
|
|
||||||
fmt.Fprintf(&b, `<line x1="%d" y1="%d" x2="%d" y2="%d" stroke="%s" stroke-width="3"/>`+"\n",
|
|
||||||
legendX[i], legendY, legendX[i]+28, legendY, series[i].Color)
|
|
||||||
fmt.Fprintf(&b, `<text x="%d" y="%d" font-family="sans-serif" font-size="12" fill="#f5f7fa">%s</text>`+"\n",
|
|
||||||
legendX[i]+38, legendY+4, sanitizeChartText(series[i].Name))
|
|
||||||
}
|
|
||||||
|
|
||||||
b.WriteString("</svg>\n")
|
|
||||||
return []byte(b.String()), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func gpuChartSeriesBounds(values []float64) (float64, float64) {
|
|
||||||
if len(values) == 0 {
|
|
||||||
return 0, 1
|
|
||||||
}
|
|
||||||
min, max := values[0], values[0]
|
|
||||||
for _, value := range values[1:] {
|
|
||||||
if value < min {
|
|
||||||
min = value
|
|
||||||
}
|
|
||||||
if value > max {
|
|
||||||
max = value
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if min == max {
|
|
||||||
if max == 0 {
|
|
||||||
return 0, 1
|
|
||||||
}
|
|
||||||
pad := math.Abs(max) * 0.1
|
|
||||||
if pad == 0 {
|
|
||||||
pad = 1
|
|
||||||
}
|
|
||||||
min -= pad
|
|
||||||
max += pad
|
|
||||||
}
|
|
||||||
if min > 0 {
|
|
||||||
pad := (max - min) * 0.2
|
|
||||||
if pad == 0 {
|
|
||||||
pad = max * 0.1
|
|
||||||
}
|
|
||||||
min -= pad
|
|
||||||
if min < 0 {
|
|
||||||
min = 0
|
|
||||||
}
|
|
||||||
max += pad
|
|
||||||
}
|
|
||||||
return min, max
|
|
||||||
}
|
|
||||||
|
|
||||||
func gpuChartNiceTicks(min, max float64, target int) []float64 {
|
|
||||||
if min == max {
|
|
||||||
max = min + 1
|
|
||||||
}
|
|
||||||
span := max - min
|
|
||||||
step := math.Pow(10, math.Floor(math.Log10(span/float64(target))))
|
|
||||||
for _, factor := range []float64{1, 2, 5, 10} {
|
|
||||||
if span/(factor*step) <= float64(target)*1.5 {
|
|
||||||
step = factor * step
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
low := math.Floor(min/step) * step
|
|
||||||
high := math.Ceil(max/step) * step
|
|
||||||
var ticks []float64
|
|
||||||
for value := low; value <= high+step*0.001; value += step {
|
|
||||||
ticks = append(ticks, math.Round(value*1e9)/1e9)
|
|
||||||
}
|
|
||||||
return ticks
|
|
||||||
}
|
|
||||||
|
|
||||||
func gpuChartFormatTick(value float64) string {
|
|
||||||
if value == math.Trunc(value) {
|
|
||||||
return strconv.Itoa(int(value))
|
|
||||||
}
|
|
||||||
return strconv.FormatFloat(value, 'f', 1, 64)
|
|
||||||
}
|
|
||||||
|
|
||||||
func gpuChartLabelIndices(total, target int) []int {
|
func gpuChartLabelIndices(total, target int) []int {
|
||||||
if total <= 0 {
|
if total <= 0 {
|
||||||
return nil
|
return nil
|
||||||
@@ -1258,64 +1019,16 @@ func gpuChartLabelIndices(total, target int) []int {
|
|||||||
return indices
|
return indices
|
||||||
}
|
}
|
||||||
|
|
||||||
// renderChartSVG renders a line chart SVG with a fixed Y-axis range.
|
func chartCanvasHeightForPath(path string, seriesCount int) int {
|
||||||
func renderChartSVG(title string, datasets [][]float64, names []string, labels []string, yMin, yMax *float64) ([]byte, error) {
|
height := chartCanvasHeight(seriesCount)
|
||||||
n := len(labels)
|
if isGPUChartPath(path) {
|
||||||
if n == 0 {
|
return height * 2
|
||||||
n = 1
|
|
||||||
labels = []string{""}
|
|
||||||
}
|
}
|
||||||
for i := range datasets {
|
return height
|
||||||
if len(datasets[i]) == 0 {
|
}
|
||||||
datasets[i] = make([]float64, n)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Append global min/avg/max to title.
|
|
||||||
mn, avg, mx := globalStats(datasets)
|
|
||||||
if mx > 0 {
|
|
||||||
title = fmt.Sprintf("%s ↓%s ~%s ↑%s",
|
|
||||||
title,
|
|
||||||
chartLegendNumber(mn),
|
|
||||||
chartLegendNumber(avg),
|
|
||||||
chartLegendNumber(mx),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
title = sanitizeChartText(title)
|
|
||||||
names = sanitizeChartTexts(names)
|
|
||||||
sparse := sanitizeChartTexts(sparseLabels(labels, 6))
|
|
||||||
|
|
||||||
opt := gocharts.NewLineChartOptionWithData(datasets)
|
func isGPUChartPath(path string) bool {
|
||||||
opt.Title = gocharts.TitleOption{Text: title}
|
return strings.HasPrefix(path, "gpu-all-") || strings.HasPrefix(path, "gpu/")
|
||||||
opt.XAxis.Labels = sparse
|
|
||||||
opt.Legend = gocharts.LegendOption{SeriesNames: names}
|
|
||||||
if chartLegendVisible(len(names)) {
|
|
||||||
opt.Legend.Offset = gocharts.OffsetStr{Top: gocharts.PositionBottom}
|
|
||||||
opt.Legend.OverlayChart = gocharts.Ptr(false)
|
|
||||||
} else {
|
|
||||||
opt.Legend.Show = gocharts.Ptr(false)
|
|
||||||
}
|
|
||||||
opt.Symbol = gocharts.SymbolNone
|
|
||||||
// Right padding: reserve space for the MarkLine label (library recommendation).
|
|
||||||
opt.Padding = gocharts.NewBox(20, 20, 80, 20)
|
|
||||||
if yMin != nil || yMax != nil {
|
|
||||||
opt.YAxis = []gocharts.YAxisOption{chartYAxisOption(yMin, yMax)}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add a single peak mark line on the series that holds the global maximum.
|
|
||||||
peakIdx, _ := globalPeakSeries(datasets)
|
|
||||||
if peakIdx >= 0 && peakIdx < len(opt.SeriesList) {
|
|
||||||
opt.SeriesList[peakIdx].MarkLine = gocharts.NewMarkLine(gocharts.SeriesMarkTypeMax)
|
|
||||||
}
|
|
||||||
|
|
||||||
p := gocharts.NewPainter(gocharts.PainterOptions{
|
|
||||||
OutputFormat: gocharts.ChartOutputSVG,
|
|
||||||
Width: 1400,
|
|
||||||
Height: chartCanvasHeight(len(names)),
|
|
||||||
}, gocharts.PainterThemeOption(gocharts.GetTheme("grafana")))
|
|
||||||
if err := p.LineChart(opt); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
return p.Bytes()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func chartLegendVisible(seriesCount int) bool {
|
func chartLegendVisible(seriesCount int) bool {
|
||||||
@@ -1329,30 +1042,6 @@ func chartCanvasHeight(seriesCount int) int {
|
|||||||
return 288
|
return 288
|
||||||
}
|
}
|
||||||
|
|
||||||
func chartYAxisOption(yMin, yMax *float64) gocharts.YAxisOption {
|
|
||||||
return gocharts.YAxisOption{
|
|
||||||
Min: yMin,
|
|
||||||
Max: yMax,
|
|
||||||
LabelCount: 11,
|
|
||||||
ValueFormatter: chartYAxisNumber,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// globalPeakSeries returns the index of the series containing the global maximum
|
|
||||||
// value across all datasets, and that maximum value.
|
|
||||||
func globalPeakSeries(datasets [][]float64) (idx int, peak float64) {
|
|
||||||
idx = -1
|
|
||||||
for i, ds := range datasets {
|
|
||||||
for _, v := range ds {
|
|
||||||
if v > peak {
|
|
||||||
peak = v
|
|
||||||
idx = i
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return idx, peak
|
|
||||||
}
|
|
||||||
|
|
||||||
// globalStats returns min, average, and max across all values in all datasets.
|
// globalStats returns min, average, and max across all values in all datasets.
|
||||||
func globalStats(datasets [][]float64) (mn, avg, mx float64) {
|
func globalStats(datasets [][]float64) (mn, avg, mx float64) {
|
||||||
var sum float64
|
var sum float64
|
||||||
@@ -1392,21 +1081,6 @@ func sanitizeChartText(s string) string {
|
|||||||
}, s))
|
}, s))
|
||||||
}
|
}
|
||||||
|
|
||||||
func sanitizeChartTexts(in []string) []string {
|
|
||||||
out := make([]string, len(in))
|
|
||||||
for i, s := range in {
|
|
||||||
out[i] = sanitizeChartText(s)
|
|
||||||
}
|
|
||||||
return out
|
|
||||||
}
|
|
||||||
|
|
||||||
func safeIdx(s []float64, i int) float64 {
|
|
||||||
if i < len(s) {
|
|
||||||
return s[i]
|
|
||||||
}
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
|
|
||||||
func snapshotNamedRings(rings []*namedMetricsRing) ([][]float64, []string, []string) {
|
func snapshotNamedRings(rings []*namedMetricsRing) ([][]float64, []string, []string) {
|
||||||
var datasets [][]float64
|
var datasets [][]float64
|
||||||
var names []string
|
var names []string
|
||||||
@@ -1493,20 +1167,6 @@ func chartYAxisNumber(v float64) string {
|
|||||||
return out
|
return out
|
||||||
}
|
}
|
||||||
|
|
||||||
func sparseLabels(labels []string, n int) []string {
|
|
||||||
out := make([]string, len(labels))
|
|
||||||
step := len(labels) / n
|
|
||||||
if step < 1 {
|
|
||||||
step = 1
|
|
||||||
}
|
|
||||||
for i, l := range labels {
|
|
||||||
if i%step == 0 {
|
|
||||||
out[i] = l
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return out
|
|
||||||
}
|
|
||||||
|
|
||||||
func (h *handler) handleAPIMetricsExportCSV(w http.ResponseWriter, r *http.Request) {
|
func (h *handler) handleAPIMetricsExportCSV(w http.ResponseWriter, r *http.Request) {
|
||||||
if h.metricsDB == nil {
|
if h.metricsDB == nil {
|
||||||
http.Error(w, "metrics database not available", http.StatusServiceUnavailable)
|
http.Error(w, "metrics database not available", http.StatusServiceUnavailable)
|
||||||
|
|||||||
@@ -304,6 +304,124 @@ func TestChartCanvasHeight(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestChartTimelineSegmentsForRangeMergesActiveSpansAndIdleGaps(t *testing.T) {
|
||||||
|
start := time.Date(2026, 4, 5, 12, 0, 0, 0, time.UTC)
|
||||||
|
end := start.Add(10 * time.Minute)
|
||||||
|
taskWindow := func(offsetStart, offsetEnd time.Duration) Task {
|
||||||
|
s := start.Add(offsetStart)
|
||||||
|
e := start.Add(offsetEnd)
|
||||||
|
return Task{
|
||||||
|
Name: "task",
|
||||||
|
Status: TaskDone,
|
||||||
|
StartedAt: &s,
|
||||||
|
DoneAt: &e,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
segments := chartTimelineSegmentsForRange(start, end, end, []Task{
|
||||||
|
taskWindow(1*time.Minute, 3*time.Minute),
|
||||||
|
taskWindow(2*time.Minute, 5*time.Minute),
|
||||||
|
taskWindow(7*time.Minute, 8*time.Minute),
|
||||||
|
})
|
||||||
|
if len(segments) != 5 {
|
||||||
|
t.Fatalf("segments=%d want 5: %#v", len(segments), segments)
|
||||||
|
}
|
||||||
|
wantActive := []bool{false, true, false, true, false}
|
||||||
|
wantMinutes := [][2]int{{0, 1}, {1, 5}, {5, 7}, {7, 8}, {8, 10}}
|
||||||
|
for i, segment := range segments {
|
||||||
|
if segment.Active != wantActive[i] {
|
||||||
|
t.Fatalf("segment[%d].Active=%v want %v", i, segment.Active, wantActive[i])
|
||||||
|
}
|
||||||
|
if got := int(segment.Start.Sub(start).Minutes()); got != wantMinutes[i][0] {
|
||||||
|
t.Fatalf("segment[%d] start=%d want %d", i, got, wantMinutes[i][0])
|
||||||
|
}
|
||||||
|
if got := int(segment.End.Sub(start).Minutes()); got != wantMinutes[i][1] {
|
||||||
|
t.Fatalf("segment[%d] end=%d want %d", i, got, wantMinutes[i][1])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRenderMetricChartSVGIncludesTimelineOverlay(t *testing.T) {
|
||||||
|
start := time.Date(2026, 4, 5, 12, 0, 0, 0, time.UTC)
|
||||||
|
labels := []string{"12:00", "12:01", "12:02"}
|
||||||
|
times := []time.Time{start, start.Add(time.Minute), start.Add(2 * time.Minute)}
|
||||||
|
svg, err := renderMetricChartSVG(
|
||||||
|
"System Power",
|
||||||
|
labels,
|
||||||
|
times,
|
||||||
|
[][]float64{{300, 320, 310}},
|
||||||
|
[]string{"Power W"},
|
||||||
|
floatPtr(0),
|
||||||
|
floatPtr(400),
|
||||||
|
360,
|
||||||
|
[]chartTimelineSegment{
|
||||||
|
{Start: start, End: start.Add(time.Minute), Active: false},
|
||||||
|
{Start: start.Add(time.Minute), End: start.Add(2 * time.Minute), Active: true},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
body := string(svg)
|
||||||
|
if !strings.Contains(body, `data-role="timeline-overlay"`) {
|
||||||
|
t.Fatalf("svg missing timeline overlay: %s", body)
|
||||||
|
}
|
||||||
|
if !strings.Contains(body, `opacity="0.10"`) {
|
||||||
|
t.Fatalf("svg missing idle overlay opacity: %s", body)
|
||||||
|
}
|
||||||
|
if !strings.Contains(body, `System Power`) {
|
||||||
|
t.Fatalf("svg missing chart title: %s", body)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestHandleMetricsChartSVGRendersCustomSVG(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
db, err := openMetricsDB(filepath.Join(dir, "metrics.db"))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
t.Cleanup(func() { _ = db.db.Close() })
|
||||||
|
|
||||||
|
start := time.Date(2026, 4, 5, 12, 0, 0, 0, time.UTC)
|
||||||
|
for i, sample := range []platform.LiveMetricSample{
|
||||||
|
{Timestamp: start, PowerW: 300},
|
||||||
|
{Timestamp: start.Add(time.Minute), PowerW: 320},
|
||||||
|
{Timestamp: start.Add(2 * time.Minute), PowerW: 310},
|
||||||
|
} {
|
||||||
|
if err := db.Write(sample); err != nil {
|
||||||
|
t.Fatalf("write sample %d: %v", i, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
globalQueue.mu.Lock()
|
||||||
|
prevTasks := globalQueue.tasks
|
||||||
|
s := start.Add(30 * time.Second)
|
||||||
|
e := start.Add(90 * time.Second)
|
||||||
|
globalQueue.tasks = []*Task{{Name: "Burn", Status: TaskDone, StartedAt: &s, DoneAt: &e}}
|
||||||
|
globalQueue.mu.Unlock()
|
||||||
|
t.Cleanup(func() {
|
||||||
|
globalQueue.mu.Lock()
|
||||||
|
globalQueue.tasks = prevTasks
|
||||||
|
globalQueue.mu.Unlock()
|
||||||
|
})
|
||||||
|
|
||||||
|
h := &handler{opts: HandlerOptions{ExportDir: dir}, metricsDB: db}
|
||||||
|
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
req := httptest.NewRequest(http.MethodGet, "/api/metrics/chart/server-power.svg", nil)
|
||||||
|
h.handleMetricsChartSVG(rec, req)
|
||||||
|
|
||||||
|
if rec.Code != http.StatusOK {
|
||||||
|
t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
|
||||||
|
}
|
||||||
|
body := rec.Body.String()
|
||||||
|
if !strings.Contains(body, `data-role="timeline-overlay"`) {
|
||||||
|
t.Fatalf("custom svg response missing timeline overlay: %s", body)
|
||||||
|
}
|
||||||
|
if !strings.Contains(body, `stroke-linecap="round"`) {
|
||||||
|
t.Fatalf("custom svg response missing custom polyline styling: %s", body)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestNormalizeFanSeriesHoldsLastPositive(t *testing.T) {
|
func TestNormalizeFanSeriesHoldsLastPositive(t *testing.T) {
|
||||||
got := normalizeFanSeries([]float64{4200, 0, 0, 4300, 0})
|
got := normalizeFanSeries([]float64{4200, 0, 0, 4300, 0})
|
||||||
want := []float64{4200, 4200, 4200, 4300, 4300}
|
want := []float64{4200, 4200, 4200, 4300, 4300}
|
||||||
@@ -317,21 +435,6 @@ func TestNormalizeFanSeriesHoldsLastPositive(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestChartYAxisOption(t *testing.T) {
|
|
||||||
min := floatPtr(0)
|
|
||||||
max := floatPtr(100)
|
|
||||||
opt := chartYAxisOption(min, max)
|
|
||||||
if opt.Min != min || opt.Max != max {
|
|
||||||
t.Fatalf("chartYAxisOption min/max mismatch: %#v", opt)
|
|
||||||
}
|
|
||||||
if opt.LabelCount != 11 {
|
|
||||||
t.Fatalf("chartYAxisOption labelCount=%d want 11", opt.LabelCount)
|
|
||||||
}
|
|
||||||
if got := opt.ValueFormatter(1000); got != "1к" {
|
|
||||||
t.Fatalf("chartYAxisOption formatter(1000)=%q want 1к", got)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestSnapshotFanRingsUsesTimelineLabels(t *testing.T) {
|
func TestSnapshotFanRingsUsesTimelineLabels(t *testing.T) {
|
||||||
r1 := newMetricsRing(4)
|
r1 := newMetricsRing(4)
|
||||||
r2 := newMetricsRing(4)
|
r2 := newMetricsRing(4)
|
||||||
@@ -514,6 +617,47 @@ func TestToolsPageRendersRestartGPUDriversButton(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestBenchmarkPageRendersGPUSelectionControls(t *testing.T) {
|
||||||
|
handler := NewHandler(HandlerOptions{})
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/benchmark", nil))
|
||||||
|
if rec.Code != http.StatusOK {
|
||||||
|
t.Fatalf("status=%d", rec.Code)
|
||||||
|
}
|
||||||
|
body := rec.Body.String()
|
||||||
|
for _, needle := range []string{
|
||||||
|
`href="/benchmark"`,
|
||||||
|
`id="benchmark-gpu-list"`,
|
||||||
|
`/api/gpu/nvidia`,
|
||||||
|
`/api/benchmark/nvidia/run`,
|
||||||
|
`benchmark-run-nccl`,
|
||||||
|
} {
|
||||||
|
if !strings.Contains(body, needle) {
|
||||||
|
t.Fatalf("benchmark page missing %q: %s", needle, body)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBurnPageRendersOfficialNVIDIADCGMAndNCCLInterconnectLabel(t *testing.T) {
|
||||||
|
handler := NewHandler(HandlerOptions{})
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/burn", nil))
|
||||||
|
if rec.Code != http.StatusOK {
|
||||||
|
t.Fatalf("status=%d", rec.Code)
|
||||||
|
}
|
||||||
|
body := rec.Body.String()
|
||||||
|
for _, needle := range []string{
|
||||||
|
`DCGM Diagnostics (Official NVIDIA)`,
|
||||||
|
`NCCL all_reduce_perf (Interconnect)`,
|
||||||
|
`DCGM is the official NVIDIA diagnostic path`,
|
||||||
|
`burn-gpu-dcgm`,
|
||||||
|
} {
|
||||||
|
if !strings.Contains(body, needle) {
|
||||||
|
t.Fatalf("burn page missing %q: %s", needle, body)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestTasksPageRendersScrollableLogModal(t *testing.T) {
|
func TestTasksPageRendersScrollableLogModal(t *testing.T) {
|
||||||
dir := t.TempDir()
|
dir := t.TempDir()
|
||||||
path := filepath.Join(dir, "audit.json")
|
path := filepath.Join(dir, "audit.json")
|
||||||
|
|||||||
@@ -30,22 +30,23 @@ const (
|
|||||||
|
|
||||||
// taskNames maps target → human-readable name for validate (SAT) runs.
|
// taskNames maps target → human-readable name for validate (SAT) runs.
|
||||||
var taskNames = map[string]string{
|
var taskNames = map[string]string{
|
||||||
"nvidia": "NVIDIA SAT",
|
"nvidia": "NVIDIA SAT",
|
||||||
"nvidia-stress": "NVIDIA GPU Stress",
|
"nvidia-benchmark": "NVIDIA Benchmark",
|
||||||
"memory": "Memory SAT",
|
"nvidia-stress": "NVIDIA GPU Stress",
|
||||||
"storage": "Storage SAT",
|
"memory": "Memory SAT",
|
||||||
"cpu": "CPU SAT",
|
"storage": "Storage SAT",
|
||||||
"amd": "AMD GPU SAT",
|
"cpu": "CPU SAT",
|
||||||
"amd-mem": "AMD GPU MEM Integrity",
|
"amd": "AMD GPU SAT",
|
||||||
"amd-bandwidth": "AMD GPU MEM Bandwidth",
|
"amd-mem": "AMD GPU MEM Integrity",
|
||||||
"amd-stress": "AMD GPU Burn-in",
|
"amd-bandwidth": "AMD GPU MEM Bandwidth",
|
||||||
"memory-stress": "Memory Burn-in",
|
"amd-stress": "AMD GPU Burn-in",
|
||||||
"sat-stress": "SAT Stress (stressapptest)",
|
"memory-stress": "Memory Burn-in",
|
||||||
"platform-stress": "Platform Thermal Cycling",
|
"sat-stress": "SAT Stress (stressapptest)",
|
||||||
"audit": "Audit",
|
"platform-stress": "Platform Thermal Cycling",
|
||||||
"support-bundle": "Support Bundle",
|
"audit": "Audit",
|
||||||
"install": "Install to Disk",
|
"support-bundle": "Support Bundle",
|
||||||
"install-to-ram": "Install to RAM",
|
"install": "Install to Disk",
|
||||||
|
"install-to-ram": "Install to RAM",
|
||||||
}
|
}
|
||||||
|
|
||||||
// burnNames maps target → human-readable name when a burn profile is set.
|
// burnNames maps target → human-readable name when a burn profile is set.
|
||||||
@@ -108,8 +109,11 @@ type taskParams struct {
|
|||||||
DiagLevel int `json:"diag_level,omitempty"`
|
DiagLevel int `json:"diag_level,omitempty"`
|
||||||
GPUIndices []int `json:"gpu_indices,omitempty"`
|
GPUIndices []int `json:"gpu_indices,omitempty"`
|
||||||
ExcludeGPUIndices []int `json:"exclude_gpu_indices,omitempty"`
|
ExcludeGPUIndices []int `json:"exclude_gpu_indices,omitempty"`
|
||||||
|
SizeMB int `json:"size_mb,omitempty"`
|
||||||
Loader string `json:"loader,omitempty"`
|
Loader string `json:"loader,omitempty"`
|
||||||
BurnProfile string `json:"burn_profile,omitempty"`
|
BurnProfile string `json:"burn_profile,omitempty"`
|
||||||
|
BenchmarkProfile string `json:"benchmark_profile,omitempty"`
|
||||||
|
RunNCCL bool `json:"run_nccl,omitempty"`
|
||||||
DisplayName string `json:"display_name,omitempty"`
|
DisplayName string `json:"display_name,omitempty"`
|
||||||
Device string `json:"device,omitempty"` // for install
|
Device string `json:"device,omitempty"` // for install
|
||||||
PlatformComponents []string `json:"platform_components,omitempty"`
|
PlatformComponents []string `json:"platform_components,omitempty"`
|
||||||
@@ -547,6 +551,18 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
|
|||||||
} else {
|
} else {
|
||||||
archive, err = a.RunNvidiaAcceptancePack("", j.append)
|
archive, err = a.RunNvidiaAcceptancePack("", j.append)
|
||||||
}
|
}
|
||||||
|
case "nvidia-benchmark":
|
||||||
|
if a == nil {
|
||||||
|
err = fmt.Errorf("app not configured")
|
||||||
|
break
|
||||||
|
}
|
||||||
|
archive, err = a.RunNvidiaBenchmarkCtx(ctx, "", platform.NvidiaBenchmarkOptions{
|
||||||
|
Profile: t.params.BenchmarkProfile,
|
||||||
|
SizeMB: t.params.SizeMB,
|
||||||
|
GPUIndices: t.params.GPUIndices,
|
||||||
|
ExcludeGPUIndices: t.params.ExcludeGPUIndices,
|
||||||
|
RunNCCL: t.params.RunNCCL,
|
||||||
|
}, j.append)
|
||||||
case "nvidia-stress":
|
case "nvidia-stress":
|
||||||
if a == nil {
|
if a == nil {
|
||||||
err = fmt.Errorf("app not configured")
|
err = fmt.Errorf("app not configured")
|
||||||
|
|||||||
Reference in New Issue
Block a user