diff --git a/audit/internal/app/app.go b/audit/internal/app/app.go
index e898189..8d46181 100644
--- a/audit/internal/app/app.go
+++ b/audit/internal/app/app.go
@@ -114,6 +114,8 @@ type satRunner interface {
DetectGPUVendor() string
ListAMDGPUs() ([]platform.AMDGPUInfo, error)
RunAMDAcceptancePack(ctx context.Context, baseDir string, logFunc func(string)) (string, error)
+ RunAMDMemIntegrityPack(ctx context.Context, baseDir string, logFunc func(string)) (string, error)
+ RunAMDMemBandwidthPack(ctx context.Context, baseDir string, logFunc func(string)) (string, error)
RunAMDStressPack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error)
RunMemoryStressPack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error)
RunSATStressPack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error)
@@ -577,6 +579,20 @@ func (a *App) RunAMDAcceptancePackResult(baseDir string) (ActionResult, error) {
return ActionResult{Title: "AMD GPU SAT", Body: satResultBody(path)}, err
}
+func (a *App) RunAMDMemIntegrityPackCtx(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
+ if strings.TrimSpace(baseDir) == "" {
+ baseDir = DefaultSATBaseDir
+ }
+ return a.sat.RunAMDMemIntegrityPack(ctx, baseDir, logFunc)
+}
+
+func (a *App) RunAMDMemBandwidthPackCtx(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
+ if strings.TrimSpace(baseDir) == "" {
+ baseDir = DefaultSATBaseDir
+ }
+ return a.sat.RunAMDMemBandwidthPack(ctx, baseDir, logFunc)
+}
+
func (a *App) RunMemoryStressPack(baseDir string, durationSec int, logFunc func(string)) (string, error) {
return a.RunMemoryStressPackCtx(context.Background(), baseDir, durationSec, logFunc)
}
diff --git a/audit/internal/app/app_test.go b/audit/internal/app/app_test.go
index fcd88c4..93d1b7f 100644
--- a/audit/internal/app/app_test.go
+++ b/audit/internal/app/app_test.go
@@ -181,6 +181,14 @@ func (f fakeSAT) RunAMDAcceptancePack(_ context.Context, baseDir string, _ func(
return "", nil
}
+func (f fakeSAT) RunAMDMemIntegrityPack(_ context.Context, _ string, _ func(string)) (string, error) {
+ return "", nil
+}
+
+func (f fakeSAT) RunAMDMemBandwidthPack(_ context.Context, _ string, _ func(string)) (string, error) {
+ return "", nil
+}
+
func (f fakeSAT) RunAMDStressPack(_ context.Context, _ string, _ int, _ func(string)) (string, error) {
return "", nil
}
diff --git a/audit/internal/platform/sat.go b/audit/internal/platform/sat.go
index b7f3d2d..041b402 100644
--- a/audit/internal/platform/sat.go
+++ b/audit/internal/platform/sat.go
@@ -136,6 +136,54 @@ func (s *System) RunAMDAcceptancePack(ctx context.Context, baseDir string, logFu
}, logFunc)
}
+// RunAMDMemIntegrityPack runs the official RVS MEM module as a validate-style memory integrity test.
+func (s *System) RunAMDMemIntegrityPack(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
+ if err := ensureAMDRuntimeReady(); err != nil {
+ return "", err
+ }
+ cfgFile := "/tmp/bee-amd-mem.conf"
+ cfg := `actions:
+- name: mem_integrity
+ device: all
+ module: mem
+ parallel: true
+ duration: 60000
+ copy_matrix: false
+ target_stress: 90
+ matrix_size: 8640
+`
+ _ = os.WriteFile(cfgFile, []byte(cfg), 0644)
+ return runAcceptancePackCtx(ctx, baseDir, "gpu-amd-mem", []satJob{
+ {name: "01-rocm-smi.log", cmd: []string{"rocm-smi"}},
+ {name: "02-rvs-mem.log", cmd: []string{"rvs", "-c", cfgFile}},
+ {name: "03-rocm-smi-after.log", cmd: []string{"rocm-smi", "--showtemp", "--showpower", "--showmemuse", "--csv"}},
+ }, logFunc)
+}
+
+// RunAMDMemBandwidthPack runs AMD's memory/interconnect bandwidth-oriented tools.
+func (s *System) RunAMDMemBandwidthPack(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
+ if err := ensureAMDRuntimeReady(); err != nil {
+ return "", err
+ }
+ cfgFile := "/tmp/bee-amd-babel.conf"
+ cfg := `actions:
+- name: babel_mem_bw
+ device: all
+ module: babel
+ parallel: true
+ copy_matrix: true
+ target_stress: 90
+ matrix_size: 134217728
+`
+ _ = os.WriteFile(cfgFile, []byte(cfg), 0644)
+ return runAcceptancePackCtx(ctx, baseDir, "gpu-amd-bandwidth", []satJob{
+ {name: "01-rocm-smi.log", cmd: []string{"rocm-smi"}},
+ {name: "02-rocm-bandwidth-test.log", cmd: []string{"rocm-bandwidth-test"}},
+ {name: "03-rvs-babel.log", cmd: []string{"rvs", "-c", cfgFile}},
+ {name: "04-rocm-smi-after.log", cmd: []string{"rocm-smi", "--showtemp", "--showpower", "--showmemuse", "--csv"}},
+ }, logFunc)
+}
+
// RunAMDStressPack runs an AMD GPU burn-in pack.
// Missing tools are reported as UNSUPPORTED, consistent with the existing SAT pattern.
func (s *System) RunAMDStressPack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
@@ -161,7 +209,7 @@ func amdStressRVSConfig(seconds int) string {
module: gst
parallel: true
duration: %d
- copy_matrix: true
+ copy_matrix: false
target_stress: 90
matrix_size_a: 8640
matrix_size_b: 8640
diff --git a/audit/internal/platform/sat_test.go b/audit/internal/platform/sat_test.go
index fa3fed3..96b1552 100644
--- a/audit/internal/platform/sat_test.go
+++ b/audit/internal/platform/sat_test.go
@@ -39,15 +39,26 @@ func TestRunNvidiaAcceptancePackIncludesGPUStress(t *testing.T) {
}
}
-func TestAMDStressConfigEnablesVRAMTraffic(t *testing.T) {
+func TestAMDStressConfigUsesSingleGSTAction(t *testing.T) {
t.Parallel()
cfg := amdStressRVSConfig(123)
- if !strings.Contains(cfg, "copy_matrix: true") {
- t.Fatalf("config missing VRAM copy path:\n%s", cfg)
+ if !strings.Contains(cfg, "module: gst") {
+ t.Fatalf("config missing gst module:\n%s", cfg)
}
- if !strings.Contains(cfg, "duration: 123000") {
- t.Fatalf("config missing millisecond duration:\n%s", cfg)
+ if strings.Contains(cfg, "module: mem") {
+ t.Fatalf("config should not include mem module:\n%s", cfg)
+ }
+ if !strings.Contains(cfg, "copy_matrix: false") {
+ t.Fatalf("config should use copy_matrix=false:\n%s", cfg)
+ }
+ if strings.Count(cfg, "duration: 123000") != 1 {
+ t.Fatalf("config should apply duration once:\n%s", cfg)
+ }
+ for _, field := range []string{"matrix_size_a: 8640", "matrix_size_b: 8640", "matrix_size_c: 8640"} {
+ if !strings.Contains(cfg, field) {
+ t.Fatalf("config missing %s:\n%s", field, cfg)
+ }
}
}
diff --git a/audit/internal/webui/api.go b/audit/internal/webui/api.go
index 7b295c7..a88aabc 100644
--- a/audit/internal/webui/api.go
+++ b/audit/internal/webui/api.go
@@ -599,10 +599,9 @@ func (h *handler) handleAPIMetricsStream(w http.ResponseWriter, r *http.Request)
case <-r.Context().Done():
return
case <-ticker.C:
- sample := platform.SampleLiveMetrics()
- h.feedRings(sample)
- if h.metricsDB != nil {
- _ = h.metricsDB.Write(sample)
+ sample, ok := h.latestMetric()
+ if !ok {
+ continue
}
b, err := json.Marshal(sample)
if err != nil {
diff --git a/audit/internal/webui/metricsdb.go b/audit/internal/webui/metricsdb.go
index 90e4b37..704ffb2 100644
--- a/audit/internal/webui/metricsdb.go
+++ b/audit/internal/webui/metricsdb.go
@@ -3,7 +3,6 @@ package webui
import (
"database/sql"
"encoding/csv"
- "fmt"
"io"
"strconv"
"time"
@@ -13,7 +12,6 @@ import (
)
const metricsDBPath = "/appdata/bee/metrics.db"
-const metricsKeepDuration = 24 * time.Hour
// MetricsDB persists live metric samples to SQLite.
type MetricsDB struct {
@@ -116,11 +114,18 @@ func (m *MetricsDB) Write(s platform.LiveMetricSample) error {
}
// LoadRecent returns up to n samples in chronological order (oldest first).
-// It reconstructs LiveMetricSample from the normalized tables.
func (m *MetricsDB) LoadRecent(n int) ([]platform.LiveMetricSample, error) {
- rows, err := m.db.Query(
- `SELECT ts,cpu_load_pct,mem_load_pct,power_w FROM sys_metrics ORDER BY ts DESC LIMIT ?`, n,
- )
+ return m.loadSamples(`SELECT ts,cpu_load_pct,mem_load_pct,power_w FROM sys_metrics ORDER BY ts DESC LIMIT ?`, n)
+}
+
+// LoadAll returns all persisted samples in chronological order (oldest first).
+func (m *MetricsDB) LoadAll() ([]platform.LiveMetricSample, error) {
+ return m.loadSamples(`SELECT ts,cpu_load_pct,mem_load_pct,power_w FROM sys_metrics ORDER BY ts`, nil)
+}
+
+// loadSamples reconstructs LiveMetricSample rows from the normalized tables.
+func (m *MetricsDB) loadSamples(query string, args ...any) ([]platform.LiveMetricSample, error) {
+ rows, err := m.db.Query(query, args...)
if err != nil {
return nil, err
}
@@ -257,14 +262,6 @@ func (m *MetricsDB) LoadRecent(n int) ([]platform.LiveMetricSample, error) {
return samples, nil
}
-// Prune deletes samples older than keepDuration.
-func (m *MetricsDB) Prune(keepDuration time.Duration) {
- cutoff := time.Now().Add(-keepDuration).Unix()
- for _, table := range []string{"sys_metrics", "gpu_metrics", "fan_metrics", "temp_metrics"} {
- _, _ = m.db.Exec(fmt.Sprintf("DELETE FROM %s WHERE ts < ?", table), cutoff)
- }
-}
-
// ExportCSV writes all sys+gpu data as CSV to w.
func (m *MetricsDB) ExportCSV(w io.Writer) error {
rows, err := m.db.Query(`
diff --git a/audit/internal/webui/pages.go b/audit/internal/webui/pages.go
index 05c41f2..00378e8 100644
--- a/audit/internal/webui/pages.go
+++ b/audit/internal/webui/pages.go
@@ -494,7 +494,11 @@ func renderValidate() string {
renderSATCard("memory", "Memory", "") +
renderSATCard("storage", "Storage", "") +
renderSATCard("cpu", "CPU", `
`) +
- renderSATCard("amd", "AMD GPU", "") +
+ renderSATCard("amd", "AMD GPU", `
+
+
+
+Additional AMD memory diagnostics: RVS MEM for integrity and BABEL + rocm-bandwidth-test for memory/interconnect bandwidth.
`) +
`
Test Output
@@ -505,7 +509,7 @@ let satES = null;
function runSAT(target) {
if (satES) { satES.close(); satES = null; }
const body = {};
- const labels = {nvidia:'Validate GPU', memory:'Validate Memory', storage:'Validate Storage', cpu:'Validate CPU', amd:'Validate AMD GPU'};
+ const labels = {nvidia:'Validate GPU', memory:'Validate Memory', storage:'Validate Storage', cpu:'Validate CPU', amd:'Validate AMD GPU', 'amd-mem':'AMD GPU MEM Integrity', 'amd-bandwidth':'AMD GPU MEM Bandwidth'};
body.display_name = labels[target] || ('Validate ' + target);
if (target === 'nvidia') body.diag_level = parseInt(document.getElementById('sat-nvidia-level').value)||1;
if (target === 'cpu') body.duration = parseInt(document.getElementById('sat-cpu-dur').value)||60;
@@ -524,7 +528,7 @@ function runSAT(target) {
}
function runAllSAT() {
const cycles = Math.max(1, parseInt(document.getElementById('sat-cycles').value)||1);
- const targets = ['nvidia','memory','storage','cpu','amd'];
+ const targets = ['nvidia','memory','storage','cpu','amd','amd-mem','amd-bandwidth'];
const total = targets.length * cycles;
let enqueued = 0;
const status = document.getElementById('sat-all-status');
@@ -536,7 +540,7 @@ function runAllSAT() {
const btn = document.getElementById('sat-btn-' + target);
if (btn && btn.disabled) { enqueueNext(cycle, idx+1); return; }
const body = {};
- const labels = {nvidia:'Validate GPU', memory:'Validate Memory', storage:'Validate Storage', cpu:'Validate CPU', amd:'Validate AMD GPU'};
+ const labels = {nvidia:'Validate GPU', memory:'Validate Memory', storage:'Validate Storage', cpu:'Validate CPU', amd:'Validate AMD GPU', 'amd-mem':'AMD GPU MEM Integrity', 'amd-bandwidth':'AMD GPU MEM Bandwidth'};
body.display_name = labels[target] || ('Validate ' + target);
if (target === 'nvidia') body.diag_level = parseInt(document.getElementById('sat-nvidia-level').value)||1;
if (target === 'cpu') body.duration = parseInt(document.getElementById('sat-cpu-dur').value)||60;
@@ -554,6 +558,8 @@ function runAllSAT() {
fetch('/api/gpu/presence').then(r=>r.json()).then(gp => {
if (!gp.nvidia) disableSATCard('nvidia', 'No NVIDIA GPU detected');
if (!gp.amd) disableSATCard('amd', 'No AMD GPU detected');
+ if (!gp.amd) disableSATCard('amd-mem', 'No AMD GPU detected');
+ if (!gp.amd) disableSATCard('amd-bandwidth', 'No AMD GPU detected');
});
function disableSATCard(id, reason) {
const btn = document.getElementById('sat-btn-' + id);
diff --git a/audit/internal/webui/server.go b/audit/internal/webui/server.go
index 5c94cbc..9d894cc 100644
--- a/audit/internal/webui/server.go
+++ b/audit/internal/webui/server.go
@@ -132,6 +132,8 @@ type handler struct {
// per-GPU rings (index = GPU index)
gpuRings []*gpuRings
ringsMu sync.Mutex
+ latestMu sync.RWMutex
+ latest *platform.LiveMetricSample
// metrics persistence (nil if DB unavailable)
metricsDB *MetricsDB
// install job (at most one at a time)
@@ -164,13 +166,16 @@ func NewHandler(opts HandlerOptions) http.Handler {
// Open metrics DB and pre-fill ring buffers from history.
if db, err := openMetricsDB(metricsDBPath); err == nil {
h.metricsDB = db
- db.Prune(metricsKeepDuration)
if samples, err := db.LoadRecent(120); err == nil {
for _, s := range samples {
h.feedRings(s)
}
+ if len(samples) > 0 {
+ h.setLatestMetric(samples[len(samples)-1])
+ }
}
}
+ h.startMetricsCollector()
globalQueue.startWorker(&opts)
mux := http.NewServeMux()
@@ -198,6 +203,8 @@ func NewHandler(opts HandlerOptions) http.Handler {
mux.HandleFunc("POST /api/sat/storage/run", h.handleAPISATRun("storage"))
mux.HandleFunc("POST /api/sat/cpu/run", h.handleAPISATRun("cpu"))
mux.HandleFunc("POST /api/sat/amd/run", h.handleAPISATRun("amd"))
+ mux.HandleFunc("POST /api/sat/amd-mem/run", h.handleAPISATRun("amd-mem"))
+ mux.HandleFunc("POST /api/sat/amd-bandwidth/run", h.handleAPISATRun("amd-bandwidth"))
mux.HandleFunc("POST /api/sat/amd-stress/run", h.handleAPISATRun("amd-stress"))
mux.HandleFunc("POST /api/sat/memory-stress/run", h.handleAPISATRun("memory-stress"))
mux.HandleFunc("POST /api/sat/sat-stress/run", h.handleAPISATRun("sat-stress"))
@@ -260,6 +267,37 @@ func NewHandler(opts HandlerOptions) http.Handler {
return mux
}
+func (h *handler) startMetricsCollector() {
+ go func() {
+ ticker := time.NewTicker(1 * time.Second)
+ defer ticker.Stop()
+ for range ticker.C {
+ sample := platform.SampleLiveMetrics()
+ h.feedRings(sample)
+ h.setLatestMetric(sample)
+ if h.metricsDB != nil {
+ _ = h.metricsDB.Write(sample)
+ }
+ }
+ }()
+}
+
+func (h *handler) setLatestMetric(sample platform.LiveMetricSample) {
+ h.latestMu.Lock()
+ defer h.latestMu.Unlock()
+ cp := sample
+ h.latest = &cp
+}
+
+func (h *handler) latestMetric() (platform.LiveMetricSample, bool) {
+ h.latestMu.RLock()
+ defer h.latestMu.RUnlock()
+ if h.latest == nil {
+ return platform.LiveMetricSample{}, false
+ }
+ return *h.latest, true
+}
+
// ListenAndServe starts the HTTP server.
func ListenAndServe(addr string, opts HandlerOptions) error {
return http.ListenAndServe(addr, NewHandler(opts))
@@ -387,6 +425,20 @@ func (h *handler) handleMetricsChartSVG(w http.ResponseWriter, r *http.Request)
path := strings.TrimPrefix(r.URL.Path, "/api/metrics/chart/")
path = strings.TrimSuffix(path, ".svg")
+ if h.metricsDB != nil {
+ if datasets, names, labels, title, yMin, yMax, ok := h.chartDataFromDB(path); ok {
+ buf, err := renderChartSVG(title, datasets, names, labels, yMin, yMax)
+ if err != nil {
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ return
+ }
+ w.Header().Set("Content-Type", "image/svg+xml")
+ w.Header().Set("Cache-Control", "no-store")
+ _, _ = w.Write(buf)
+ return
+ }
+ }
+
var datasets [][]float64
var names []string
var labels []string
@@ -601,6 +653,268 @@ func (h *handler) handleMetricsChartSVG(w http.ResponseWriter, r *http.Request)
_, _ = w.Write(buf)
}
+func (h *handler) chartDataFromDB(path string) ([][]float64, []string, []string, string, *float64, *float64, bool) {
+ samples, err := h.metricsDB.LoadAll()
+ if err != nil || len(samples) == 0 {
+ return nil, nil, nil, "", nil, nil, false
+ }
+ return chartDataFromSamples(path, samples)
+}
+
+func chartDataFromSamples(path string, samples []platform.LiveMetricSample) ([][]float64, []string, []string, string, *float64, *float64, bool) {
+ var datasets [][]float64
+ var names []string
+ var title string
+ var yMin, yMax *float64
+ labels := sampleTimeLabels(samples)
+
+ switch {
+ case path == "server-load":
+ title = "CPU / Memory Load"
+ cpu := make([]float64, len(samples))
+ mem := make([]float64, len(samples))
+ for i, s := range samples {
+ cpu[i] = s.CPULoadPct
+ mem[i] = s.MemLoadPct
+ }
+ datasets = [][]float64{cpu, mem}
+ names = []string{"CPU Load %", "Mem Load %"}
+ yMin = floatPtr(0)
+ yMax = floatPtr(100)
+
+ case path == "server-temp", path == "server-temp-cpu":
+ title = "CPU Temperature"
+ datasets, names = namedTempDatasets(samples, "cpu")
+ yMin = floatPtr(0)
+ yMax = autoMax120(datasets...)
+
+ case path == "server-temp-gpu":
+ title = "GPU Temperature"
+ datasets, names = gpuDatasets(samples, func(g platform.GPUMetricRow) float64 { return g.TempC })
+ yMin = floatPtr(0)
+ yMax = autoMax120(datasets...)
+
+ case path == "server-temp-ambient":
+ title = "Ambient / Other Sensors"
+ datasets, names = namedTempDatasets(samples, "ambient")
+ yMin = floatPtr(0)
+ yMax = autoMax120(datasets...)
+
+ case path == "server-power":
+ title = "System Power"
+ power := make([]float64, len(samples))
+ for i, s := range samples {
+ power[i] = s.PowerW
+ }
+ datasets = [][]float64{power}
+ names = []string{"Power W"}
+ yMin, yMax = autoBounds120(power)
+
+ case path == "server-fans":
+ title = "Fan RPM"
+ datasets, names = namedFanDatasets(samples)
+ yMin, yMax = autoBounds120(datasets...)
+
+ case path == "gpu-all-load":
+ title = "GPU Compute Load"
+ datasets, names = gpuDatasets(samples, func(g platform.GPUMetricRow) float64 { return g.UsagePct })
+ yMin = floatPtr(0)
+ yMax = floatPtr(100)
+
+ case path == "gpu-all-memload":
+ title = "GPU Memory Load"
+ datasets, names = gpuDatasets(samples, func(g platform.GPUMetricRow) float64 { return g.MemUsagePct })
+ yMin = floatPtr(0)
+ yMax = floatPtr(100)
+
+ case path == "gpu-all-power":
+ title = "GPU Power"
+ datasets, names = gpuDatasets(samples, func(g platform.GPUMetricRow) float64 { return g.PowerW })
+ yMin, yMax = autoBounds120(datasets...)
+
+ case path == "gpu-all-temp":
+ title = "GPU Temperature"
+ datasets, names = gpuDatasets(samples, func(g platform.GPUMetricRow) float64 { return g.TempC })
+ yMin = floatPtr(0)
+ yMax = autoMax120(datasets...)
+
+ case strings.HasPrefix(path, "gpu/"):
+ rest := strings.TrimPrefix(path, "gpu/")
+ sub := ""
+ if i := strings.LastIndex(rest, "-"); i > 0 {
+ sub = rest[i+1:]
+ rest = rest[:i]
+ }
+ idx := 0
+ fmt.Sscanf(rest, "%d", &idx)
+ switch sub {
+ case "load":
+ title = fmt.Sprintf("GPU %d Load", idx)
+ util := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.UsagePct })
+ mem := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.MemUsagePct })
+ if util == nil && mem == nil {
+ return nil, nil, nil, "", nil, nil, false
+ }
+ datasets = [][]float64{coalesceDataset(util, len(samples)), coalesceDataset(mem, len(samples))}
+ names = []string{"Load %", "Mem %"}
+ yMin = floatPtr(0)
+ yMax = floatPtr(100)
+ case "temp":
+ title = fmt.Sprintf("GPU %d Temperature", idx)
+ temp := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.TempC })
+ if temp == nil {
+ return nil, nil, nil, "", nil, nil, false
+ }
+ datasets = [][]float64{temp}
+ names = []string{"Temp °C"}
+ yMin = floatPtr(0)
+ yMax = autoMax120(temp)
+ default:
+ title = fmt.Sprintf("GPU %d Power", idx)
+ power := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.PowerW })
+ if power == nil {
+ return nil, nil, nil, "", nil, nil, false
+ }
+ datasets = [][]float64{power}
+ names = []string{"Power W"}
+ yMin, yMax = autoBounds120(power)
+ }
+
+ default:
+ return nil, nil, nil, "", nil, nil, false
+ }
+
+ return datasets, names, labels, title, yMin, yMax, len(datasets) > 0
+}
+
+func sampleTimeLabels(samples []platform.LiveMetricSample) []string {
+ labels := make([]string, len(samples))
+ if len(samples) == 0 {
+ return labels
+ }
+ sameDay := true
+ first := samples[0].Timestamp.Local()
+ for _, s := range samples {
+ ts := s.Timestamp.Local()
+ if ts.Year() != first.Year() || ts.YearDay() != first.YearDay() {
+ sameDay = false
+ break
+ }
+ }
+ for i, s := range samples {
+ ts := s.Timestamp.Local()
+ if sameDay {
+ labels[i] = ts.Format("15:04")
+ } else {
+ labels[i] = ts.Format("01-02 15:04")
+ }
+ }
+ return labels
+}
+
+func namedTempDatasets(samples []platform.LiveMetricSample, group string) ([][]float64, []string) {
+ seen := map[string]bool{}
+ var names []string
+ for _, s := range samples {
+ for _, t := range s.Temps {
+ if t.Group == group && !seen[t.Name] {
+ seen[t.Name] = true
+ names = append(names, t.Name)
+ }
+ }
+ }
+ datasets := make([][]float64, 0, len(names))
+ for _, name := range names {
+ ds := make([]float64, len(samples))
+ for i, s := range samples {
+ for _, t := range s.Temps {
+ if t.Group == group && t.Name == name {
+ ds[i] = t.Celsius
+ break
+ }
+ }
+ }
+ datasets = append(datasets, ds)
+ }
+ return datasets, names
+}
+
+func namedFanDatasets(samples []platform.LiveMetricSample) ([][]float64, []string) {
+ seen := map[string]bool{}
+ var names []string
+ for _, s := range samples {
+ for _, f := range s.Fans {
+ if !seen[f.Name] {
+ seen[f.Name] = true
+ names = append(names, f.Name)
+ }
+ }
+ }
+ datasets := make([][]float64, 0, len(names))
+ for _, name := range names {
+ ds := make([]float64, len(samples))
+ for i, s := range samples {
+ for _, f := range s.Fans {
+ if f.Name == name {
+ ds[i] = f.RPM
+ break
+ }
+ }
+ }
+ datasets = append(datasets, ds)
+ }
+ return datasets, names
+}
+
+func gpuDatasets(samples []platform.LiveMetricSample, pick func(platform.GPUMetricRow) float64) ([][]float64, []string) {
+ seen := map[int]bool{}
+ var indices []int
+ for _, s := range samples {
+ for _, g := range s.GPUs {
+ if !seen[g.GPUIndex] {
+ seen[g.GPUIndex] = true
+ indices = append(indices, g.GPUIndex)
+ }
+ }
+ }
+ datasets := make([][]float64, 0, len(indices))
+ names := make([]string, 0, len(indices))
+ for _, idx := range indices {
+ ds := gpuDatasetByIndex(samples, idx, pick)
+ if ds == nil {
+ continue
+ }
+ datasets = append(datasets, ds)
+ names = append(names, fmt.Sprintf("GPU %d", idx))
+ }
+ return datasets, names
+}
+
+func gpuDatasetByIndex(samples []platform.LiveMetricSample, idx int, pick func(platform.GPUMetricRow) float64) []float64 {
+ found := false
+ ds := make([]float64, len(samples))
+ for i, s := range samples {
+ for _, g := range s.GPUs {
+ if g.GPUIndex == idx {
+ ds[i] = pick(g)
+ found = true
+ break
+ }
+ }
+ }
+ if !found {
+ return nil
+ }
+ return ds
+}
+
+func coalesceDataset(ds []float64, n int) []float64 {
+ if ds != nil {
+ return ds
+ }
+ return make([]float64, n)
+}
+
// floatPtr returns a pointer to a float64 value.
func floatPtr(v float64) *float64 { return &v }
@@ -621,6 +935,47 @@ func autoMax120(datasets ...[]float64) *float64 {
return &v
}
+func autoBounds120(datasets ...[]float64) (*float64, *float64) {
+ min := 0.0
+ max := 0.0
+ first := true
+ for _, ds := range datasets {
+ for _, v := range ds {
+ if first {
+ min, max = v, v
+ first = false
+ continue
+ }
+ if v < min {
+ min = v
+ }
+ if v > max {
+ max = v
+ }
+ }
+ }
+ if first {
+ return nil, nil
+ }
+ if max <= 0 {
+ return floatPtr(0), nil
+ }
+ span := max - min
+ if span <= 0 {
+ span = max * 0.1
+ if span <= 0 {
+ span = 1
+ }
+ }
+ pad := span * 0.2
+ low := min - pad
+ if low < 0 {
+ low = 0
+ }
+ high := max + pad
+ return floatPtr(low), floatPtr(high)
+}
+
// renderChartSVG renders a line chart SVG with a fixed Y-axis range.
func renderChartSVG(title string, datasets [][]float64, names []string, labels []string, yMin, yMax *float64) ([]byte, error) {
n := len(labels)
diff --git a/audit/internal/webui/server_test.go b/audit/internal/webui/server_test.go
index 2d2eaa6..6544799 100644
--- a/audit/internal/webui/server_test.go
+++ b/audit/internal/webui/server_test.go
@@ -7,6 +7,9 @@ import (
"path/filepath"
"strings"
"testing"
+ "time"
+
+ "bee/audit/internal/platform"
)
func TestChartLegendNumber(t *testing.T) {
@@ -31,6 +34,61 @@ func TestChartLegendNumber(t *testing.T) {
}
}
+func TestChartDataFromSamplesUsesFullHistory(t *testing.T) {
+ samples := []platform.LiveMetricSample{
+ {
+ Timestamp: time.Now().Add(-3 * time.Minute),
+ CPULoadPct: 10,
+ MemLoadPct: 20,
+ PowerW: 300,
+ GPUs: []platform.GPUMetricRow{
+ {GPUIndex: 0, UsagePct: 90, MemUsagePct: 5, PowerW: 120, TempC: 50},
+ },
+ },
+ {
+ Timestamp: time.Now().Add(-2 * time.Minute),
+ CPULoadPct: 30,
+ MemLoadPct: 40,
+ PowerW: 320,
+ GPUs: []platform.GPUMetricRow{
+ {GPUIndex: 0, UsagePct: 95, MemUsagePct: 7, PowerW: 125, TempC: 51},
+ },
+ },
+ {
+ Timestamp: time.Now().Add(-1 * time.Minute),
+ CPULoadPct: 50,
+ MemLoadPct: 60,
+ PowerW: 340,
+ GPUs: []platform.GPUMetricRow{
+ {GPUIndex: 0, UsagePct: 97, MemUsagePct: 9, PowerW: 130, TempC: 52},
+ },
+ },
+ }
+
+ datasets, names, labels, title, _, _, ok := chartDataFromSamples("gpu-all-power", samples)
+ if !ok {
+ t.Fatal("chartDataFromSamples returned ok=false")
+ }
+ if title != "GPU Power" {
+ t.Fatalf("title=%q", title)
+ }
+ if len(names) != 1 || names[0] != "GPU 0" {
+ t.Fatalf("names=%v", names)
+ }
+ if len(labels) != len(samples) {
+ t.Fatalf("labels len=%d want %d", len(labels), len(samples))
+ }
+ if len(datasets) != 1 || len(datasets[0]) != len(samples) {
+ t.Fatalf("datasets shape=%v", datasets)
+ }
+ if got := datasets[0][0]; got != 120 {
+ t.Fatalf("datasets[0][0]=%v want 120", got)
+ }
+ if got := datasets[0][2]; got != 130 {
+ t.Fatalf("datasets[0][2]=%v want 130", got)
+ }
+}
+
func TestRootRendersDashboard(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "audit.json")
diff --git a/audit/internal/webui/tasks.go b/audit/internal/webui/tasks.go
index 6d1a277..5e0e1dd 100644
--- a/audit/internal/webui/tasks.go
+++ b/audit/internal/webui/tasks.go
@@ -30,6 +30,8 @@ var taskNames = map[string]string{
"storage": "Storage SAT",
"cpu": "CPU SAT",
"amd": "AMD GPU SAT",
+ "amd-mem": "AMD GPU MEM Integrity",
+ "amd-bandwidth": "AMD GPU MEM Bandwidth",
"amd-stress": "AMD GPU Burn-in",
"memory-stress": "Memory Burn-in",
"sat-stress": "SAT Stress (stressapptest)",
@@ -124,6 +126,12 @@ var (
runAMDAcceptancePackCtx = func(a *app.App, ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
return a.RunAMDAcceptancePackCtx(ctx, baseDir, logFunc)
}
+ runAMDMemIntegrityPackCtx = func(a *app.App, ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
+ return a.RunAMDMemIntegrityPackCtx(ctx, baseDir, logFunc)
+ }
+ runAMDMemBandwidthPackCtx = func(a *app.App, ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
+ return a.RunAMDMemBandwidthPackCtx(ctx, baseDir, logFunc)
+ }
runAMDStressPackCtx = func(a *app.App, ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
return a.RunAMDStressPackCtx(ctx, baseDir, durationSec, logFunc)
}
@@ -380,6 +388,10 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
archive, err = runCPUAcceptancePackCtx(a, ctx, "", dur, j.append)
case "amd":
archive, err = runAMDAcceptancePackCtx(a, ctx, "", j.append)
+ case "amd-mem":
+ archive, err = runAMDMemIntegrityPackCtx(a, ctx, "", j.append)
+ case "amd-bandwidth":
+ archive, err = runAMDMemBandwidthPackCtx(a, ctx, "", j.append)
case "amd-stress":
dur := t.params.Duration
if t.params.BurnProfile != "" && dur <= 0 {