feat(metrics): persist history in sqlite and add AMD memory validate tests

2026-03-29 12:28:06 +03:00
parent 98f0cf0d52
commit e15bcc91c5
10 changed files with 539 additions and 29 deletions
--- a/audit/internal/app/app.go
+++ b/audit/internal/app/app.go
@@ -114,6 +114,8 @@ type satRunner interface {
 	DetectGPUVendor() string
 	ListAMDGPUs() ([]platform.AMDGPUInfo, error)
 	RunAMDAcceptancePack(ctx context.Context, baseDir string, logFunc func(string)) (string, error)
+	RunAMDMemIntegrityPack(ctx context.Context, baseDir string, logFunc func(string)) (string, error)
+	RunAMDMemBandwidthPack(ctx context.Context, baseDir string, logFunc func(string)) (string, error)
 	RunAMDStressPack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error)
 	RunMemoryStressPack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error)
 	RunSATStressPack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error)
@@ -577,6 +579,20 @@ func (a *App) RunAMDAcceptancePackResult(baseDir string) (ActionResult, error) {
 	return ActionResult{Title: "AMD GPU SAT", Body: satResultBody(path)}, err
 }

+func (a *App) RunAMDMemIntegrityPackCtx(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
+	if strings.TrimSpace(baseDir) == "" {
+		baseDir = DefaultSATBaseDir
+	}
+	return a.sat.RunAMDMemIntegrityPack(ctx, baseDir, logFunc)
+}
+
+func (a *App) RunAMDMemBandwidthPackCtx(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
+	if strings.TrimSpace(baseDir) == "" {
+		baseDir = DefaultSATBaseDir
+	}
+	return a.sat.RunAMDMemBandwidthPack(ctx, baseDir, logFunc)
+}
+
 func (a *App) RunMemoryStressPack(baseDir string, durationSec int, logFunc func(string)) (string, error) {
 	return a.RunMemoryStressPackCtx(context.Background(), baseDir, durationSec, logFunc)
 }
--- a/audit/internal/app/app_test.go
+++ b/audit/internal/app/app_test.go
@@ -181,6 +181,14 @@ func (f fakeSAT) RunAMDAcceptancePack(_ context.Context, baseDir string, _ func(
 	return "", nil
 }

+func (f fakeSAT) RunAMDMemIntegrityPack(_ context.Context, _ string, _ func(string)) (string, error) {
+	return "", nil
+}
+
+func (f fakeSAT) RunAMDMemBandwidthPack(_ context.Context, _ string, _ func(string)) (string, error) {
+	return "", nil
+}
+
 func (f fakeSAT) RunAMDStressPack(_ context.Context, _ string, _ int, _ func(string)) (string, error) {
 	return "", nil
 }
--- a/audit/internal/platform/sat.go
+++ b/audit/internal/platform/sat.go
@@ -136,6 +136,54 @@ func (s *System) RunAMDAcceptancePack(ctx context.Context, baseDir string, logFu
 	}, logFunc)
 }

+// RunAMDMemIntegrityPack runs the official RVS MEM module as a validate-style memory integrity test.
+func (s *System) RunAMDMemIntegrityPack(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
+	if err := ensureAMDRuntimeReady(); err != nil {
+		return "", err
+	}
+	cfgFile := "/tmp/bee-amd-mem.conf"
+	cfg := `actions:
+- name: mem_integrity
+  device: all
+  module: mem
+  parallel: true
+  duration: 60000
+  copy_matrix: false
+  target_stress: 90
+  matrix_size: 8640
+`
+	_ = os.WriteFile(cfgFile, []byte(cfg), 0644)
+	return runAcceptancePackCtx(ctx, baseDir, "gpu-amd-mem", []satJob{
+		{name: "01-rocm-smi.log", cmd: []string{"rocm-smi"}},
+		{name: "02-rvs-mem.log", cmd: []string{"rvs", "-c", cfgFile}},
+		{name: "03-rocm-smi-after.log", cmd: []string{"rocm-smi", "--showtemp", "--showpower", "--showmemuse", "--csv"}},
+	}, logFunc)
+}
+
+// RunAMDMemBandwidthPack runs AMD's memory/interconnect bandwidth-oriented tools.
+func (s *System) RunAMDMemBandwidthPack(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
+	if err := ensureAMDRuntimeReady(); err != nil {
+		return "", err
+	}
+	cfgFile := "/tmp/bee-amd-babel.conf"
+	cfg := `actions:
+- name: babel_mem_bw
+  device: all
+  module: babel
+  parallel: true
+  copy_matrix: true
+  target_stress: 90
+  matrix_size: 134217728
+`
+	_ = os.WriteFile(cfgFile, []byte(cfg), 0644)
+	return runAcceptancePackCtx(ctx, baseDir, "gpu-amd-bandwidth", []satJob{
+		{name: "01-rocm-smi.log", cmd: []string{"rocm-smi"}},
+		{name: "02-rocm-bandwidth-test.log", cmd: []string{"rocm-bandwidth-test"}},
+		{name: "03-rvs-babel.log", cmd: []string{"rvs", "-c", cfgFile}},
+		{name: "04-rocm-smi-after.log", cmd: []string{"rocm-smi", "--showtemp", "--showpower", "--showmemuse", "--csv"}},
+	}, logFunc)
+}
+
 // RunAMDStressPack runs an AMD GPU burn-in pack.
 // Missing tools are reported as UNSUPPORTED, consistent with the existing SAT pattern.
 func (s *System) RunAMDStressPack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
@@ -161,7 +209,7 @@ func amdStressRVSConfig(seconds int) string {
  module: gst
  parallel: true
  duration: %d
-  copy_matrix: true
+  copy_matrix: false
  target_stress: 90
  matrix_size_a: 8640
  matrix_size_b: 8640
--- a/audit/internal/platform/sat_test.go
+++ b/audit/internal/platform/sat_test.go
@@ -39,15 +39,26 @@ func TestRunNvidiaAcceptancePackIncludesGPUStress(t *testing.T) {
 	}
 }

-func TestAMDStressConfigEnablesVRAMTraffic(t *testing.T) {
+func TestAMDStressConfigUsesSingleGSTAction(t *testing.T) {
 	t.Parallel()

 	cfg := amdStressRVSConfig(123)
-	if !strings.Contains(cfg, "copy_matrix: true") {
-		t.Fatalf("config missing VRAM copy path:\n%s", cfg)
+	if !strings.Contains(cfg, "module: gst") {
+		t.Fatalf("config missing gst module:\n%s", cfg)
 	}
-	if !strings.Contains(cfg, "duration: 123000") {
-		t.Fatalf("config missing millisecond duration:\n%s", cfg)
+	if strings.Contains(cfg, "module: mem") {
+		t.Fatalf("config should not include mem module:\n%s", cfg)
+	}
+	if !strings.Contains(cfg, "copy_matrix: false") {
+		t.Fatalf("config should use copy_matrix=false:\n%s", cfg)
+	}
+	if strings.Count(cfg, "duration: 123000") != 1 {
+		t.Fatalf("config should apply duration once:\n%s", cfg)
+	}
+	for _, field := range []string{"matrix_size_a: 8640", "matrix_size_b: 8640", "matrix_size_c: 8640"} {
+		if !strings.Contains(cfg, field) {
+			t.Fatalf("config missing %s:\n%s", field, cfg)
+		}
 	}
 }

--- a/audit/internal/webui/api.go
+++ b/audit/internal/webui/api.go
@@ -599,10 +599,9 @@ func (h *handler) handleAPIMetricsStream(w http.ResponseWriter, r *http.Request)
 		case <-r.Context().Done():
 			return
 		case <-ticker.C:
-			sample := platform.SampleLiveMetrics()
-			h.feedRings(sample)
-			if h.metricsDB != nil {
-				_ = h.metricsDB.Write(sample)
+			sample, ok := h.latestMetric()
+			if !ok {
+				continue
 			}
 			b, err := json.Marshal(sample)
 			if err != nil {
--- a/audit/internal/webui/metricsdb.go
+++ b/audit/internal/webui/metricsdb.go
@@ -3,7 +3,6 @@ package webui
 import (
 	"database/sql"
 	"encoding/csv"
-	"fmt"
 	"io"
 	"strconv"
 	"time"
@@ -13,7 +12,6 @@ import (
 )

 const metricsDBPath = "/appdata/bee/metrics.db"
-const metricsKeepDuration = 24 * time.Hour

 // MetricsDB persists live metric samples to SQLite.
 type MetricsDB struct {
@@ -116,11 +114,18 @@ func (m *MetricsDB) Write(s platform.LiveMetricSample) error {
 }

 // LoadRecent returns up to n samples in chronological order (oldest first).
-// It reconstructs LiveMetricSample from the normalized tables.
 func (m *MetricsDB) LoadRecent(n int) ([]platform.LiveMetricSample, error) {
-	rows, err := m.db.Query(
-		`SELECT ts,cpu_load_pct,mem_load_pct,power_w FROM sys_metrics ORDER BY ts DESC LIMIT ?`, n,
-	)
+	return m.loadSamples(`SELECT ts,cpu_load_pct,mem_load_pct,power_w FROM sys_metrics ORDER BY ts DESC LIMIT ?`, n)
+}
+
+// LoadAll returns all persisted samples in chronological order (oldest first).
+func (m *MetricsDB) LoadAll() ([]platform.LiveMetricSample, error) {
+	return m.loadSamples(`SELECT ts,cpu_load_pct,mem_load_pct,power_w FROM sys_metrics ORDER BY ts`, nil)
+}
+
+// loadSamples reconstructs LiveMetricSample rows from the normalized tables.
+func (m *MetricsDB) loadSamples(query string, args ...any) ([]platform.LiveMetricSample, error) {
+	rows, err := m.db.Query(query, args...)
 	if err != nil {
 		return nil, err
 	}
@@ -257,14 +262,6 @@ func (m *MetricsDB) LoadRecent(n int) ([]platform.LiveMetricSample, error) {
 	return samples, nil
 }

-// Prune deletes samples older than keepDuration.
-func (m *MetricsDB) Prune(keepDuration time.Duration) {
-	cutoff := time.Now().Add(-keepDuration).Unix()
-	for _, table := range []string{"sys_metrics", "gpu_metrics", "fan_metrics", "temp_metrics"} {
-		_, _ = m.db.Exec(fmt.Sprintf("DELETE FROM %s WHERE ts < ?", table), cutoff)
-	}
-}
-
 // ExportCSV writes all sys+gpu data as CSV to w.
 func (m *MetricsDB) ExportCSV(w io.Writer) error {
 	rows, err := m.db.Query(`
--- a/audit/internal/webui/pages.go
+++ b/audit/internal/webui/pages.go
@@ -494,7 +494,11 @@ func renderValidate() string {
 		renderSATCard("memory", "Memory", "") +
 		renderSATCard("storage", "Storage", "") +
 		renderSATCard("cpu", "CPU", `<div class="form-row"><label>Duration (seconds)</label><input type="number" id="sat-cpu-dur" value="60" min="10"></div>`) +
-		renderSATCard("amd", "AMD GPU", "") +
+		renderSATCard("amd", "AMD GPU", `<div style="display:flex;gap:8px;flex-wrap:wrap;margin-bottom:8px">
+<button id="sat-btn-amd-mem" class="btn" type="button" onclick="runSAT('amd-mem')">MEM Integrity</button>
+<button id="sat-btn-amd-bandwidth" class="btn" type="button" onclick="runSAT('amd-bandwidth')">MEM Bandwidth</button>
+</div>
+<p style="color:var(--muted);font-size:12px;margin:0">Additional AMD memory diagnostics: RVS MEM for integrity and BABEL + rocm-bandwidth-test for memory/interconnect bandwidth.</p>`) +
 		`</div>
 <div id="sat-output" style="display:none;margin-top:16px" class="card">
  <div class="card-head">Test Output <span id="sat-title"></span></div>
@@ -505,7 +509,7 @@ let satES = null;
 function runSAT(target) {
  if (satES) { satES.close(); satES = null; }
  const body = {};
-  const labels = {nvidia:'Validate GPU', memory:'Validate Memory', storage:'Validate Storage', cpu:'Validate CPU', amd:'Validate AMD GPU'};
+  const labels = {nvidia:'Validate GPU', memory:'Validate Memory', storage:'Validate Storage', cpu:'Validate CPU', amd:'Validate AMD GPU', 'amd-mem':'AMD GPU MEM Integrity', 'amd-bandwidth':'AMD GPU MEM Bandwidth'};
  body.display_name = labels[target] || ('Validate ' + target);
  if (target === 'nvidia') body.diag_level = parseInt(document.getElementById('sat-nvidia-level').value)||1;
  if (target === 'cpu') body.duration = parseInt(document.getElementById('sat-cpu-dur').value)||60;
@@ -524,7 +528,7 @@ function runSAT(target) {
 }
 function runAllSAT() {
  const cycles = Math.max(1, parseInt(document.getElementById('sat-cycles').value)||1);
-  const targets = ['nvidia','memory','storage','cpu','amd'];
+  const targets = ['nvidia','memory','storage','cpu','amd','amd-mem','amd-bandwidth'];
  const total = targets.length * cycles;
  let enqueued = 0;
  const status = document.getElementById('sat-all-status');
@@ -536,7 +540,7 @@ function runAllSAT() {
    const btn = document.getElementById('sat-btn-' + target);
    if (btn && btn.disabled) { enqueueNext(cycle, idx+1); return; }
    const body = {};
-    const labels = {nvidia:'Validate GPU', memory:'Validate Memory', storage:'Validate Storage', cpu:'Validate CPU', amd:'Validate AMD GPU'};
+    const labels = {nvidia:'Validate GPU', memory:'Validate Memory', storage:'Validate Storage', cpu:'Validate CPU', amd:'Validate AMD GPU', 'amd-mem':'AMD GPU MEM Integrity', 'amd-bandwidth':'AMD GPU MEM Bandwidth'};
    body.display_name = labels[target] || ('Validate ' + target);
    if (target === 'nvidia') body.diag_level = parseInt(document.getElementById('sat-nvidia-level').value)||1;
    if (target === 'cpu') body.duration = parseInt(document.getElementById('sat-cpu-dur').value)||60;
@@ -554,6 +558,8 @@ function runAllSAT() {
 fetch('/api/gpu/presence').then(r=>r.json()).then(gp => {
    if (!gp.nvidia) disableSATCard('nvidia', 'No NVIDIA GPU detected');
    if (!gp.amd) disableSATCard('amd', 'No AMD GPU detected');
+    if (!gp.amd) disableSATCard('amd-mem', 'No AMD GPU detected');
+    if (!gp.amd) disableSATCard('amd-bandwidth', 'No AMD GPU detected');
 });
 function disableSATCard(id, reason) {
    const btn = document.getElementById('sat-btn-' + id);
--- a/audit/internal/webui/server.go
+++ b/audit/internal/webui/server.go
@@ -132,6 +132,8 @@ type handler struct {
 	// per-GPU rings (index = GPU index)
 	gpuRings []*gpuRings
 	ringsMu  sync.Mutex
+	latestMu sync.RWMutex
+	latest   *platform.LiveMetricSample
 	// metrics persistence (nil if DB unavailable)
 	metricsDB *MetricsDB
 	// install job (at most one at a time)
@@ -164,13 +166,16 @@ func NewHandler(opts HandlerOptions) http.Handler {
 	// Open metrics DB and pre-fill ring buffers from history.
 	if db, err := openMetricsDB(metricsDBPath); err == nil {
 		h.metricsDB = db
-		db.Prune(metricsKeepDuration)
 		if samples, err := db.LoadRecent(120); err == nil {
 			for _, s := range samples {
 				h.feedRings(s)
 			}
+			if len(samples) > 0 {
+				h.setLatestMetric(samples[len(samples)-1])
+			}
 		}
 	}
+	h.startMetricsCollector()

 	globalQueue.startWorker(&opts)
 	mux := http.NewServeMux()
@@ -198,6 +203,8 @@ func NewHandler(opts HandlerOptions) http.Handler {
 	mux.HandleFunc("POST /api/sat/storage/run", h.handleAPISATRun("storage"))
 	mux.HandleFunc("POST /api/sat/cpu/run", h.handleAPISATRun("cpu"))
 	mux.HandleFunc("POST /api/sat/amd/run", h.handleAPISATRun("amd"))
+	mux.HandleFunc("POST /api/sat/amd-mem/run", h.handleAPISATRun("amd-mem"))
+	mux.HandleFunc("POST /api/sat/amd-bandwidth/run", h.handleAPISATRun("amd-bandwidth"))
 	mux.HandleFunc("POST /api/sat/amd-stress/run", h.handleAPISATRun("amd-stress"))
 	mux.HandleFunc("POST /api/sat/memory-stress/run", h.handleAPISATRun("memory-stress"))
 	mux.HandleFunc("POST /api/sat/sat-stress/run", h.handleAPISATRun("sat-stress"))
@@ -260,6 +267,37 @@ func NewHandler(opts HandlerOptions) http.Handler {
 	return mux
 }

+func (h *handler) startMetricsCollector() {
+	go func() {
+		ticker := time.NewTicker(1 * time.Second)
+		defer ticker.Stop()
+		for range ticker.C {
+			sample := platform.SampleLiveMetrics()
+			h.feedRings(sample)
+			h.setLatestMetric(sample)
+			if h.metricsDB != nil {
+				_ = h.metricsDB.Write(sample)
+			}
+		}
+	}()
+}
+
+func (h *handler) setLatestMetric(sample platform.LiveMetricSample) {
+	h.latestMu.Lock()
+	defer h.latestMu.Unlock()
+	cp := sample
+	h.latest = &cp
+}
+
+func (h *handler) latestMetric() (platform.LiveMetricSample, bool) {
+	h.latestMu.RLock()
+	defer h.latestMu.RUnlock()
+	if h.latest == nil {
+		return platform.LiveMetricSample{}, false
+	}
+	return *h.latest, true
+}
+
 // ListenAndServe starts the HTTP server.
 func ListenAndServe(addr string, opts HandlerOptions) error {
 	return http.ListenAndServe(addr, NewHandler(opts))
@@ -387,6 +425,20 @@ func (h *handler) handleMetricsChartSVG(w http.ResponseWriter, r *http.Request)
 	path := strings.TrimPrefix(r.URL.Path, "/api/metrics/chart/")
 	path = strings.TrimSuffix(path, ".svg")

+	if h.metricsDB != nil {
+		if datasets, names, labels, title, yMin, yMax, ok := h.chartDataFromDB(path); ok {
+			buf, err := renderChartSVG(title, datasets, names, labels, yMin, yMax)
+			if err != nil {
+				http.Error(w, err.Error(), http.StatusInternalServerError)
+				return
+			}
+			w.Header().Set("Content-Type", "image/svg+xml")
+			w.Header().Set("Cache-Control", "no-store")
+			_, _ = w.Write(buf)
+			return
+		}
+	}
+
 	var datasets [][]float64
 	var names []string
 	var labels []string
@@ -601,6 +653,268 @@ func (h *handler) handleMetricsChartSVG(w http.ResponseWriter, r *http.Request)
 	_, _ = w.Write(buf)
 }

+func (h *handler) chartDataFromDB(path string) ([][]float64, []string, []string, string, *float64, *float64, bool) {
+	samples, err := h.metricsDB.LoadAll()
+	if err != nil || len(samples) == 0 {
+		return nil, nil, nil, "", nil, nil, false
+	}
+	return chartDataFromSamples(path, samples)
+}
+
+func chartDataFromSamples(path string, samples []platform.LiveMetricSample) ([][]float64, []string, []string, string, *float64, *float64, bool) {
+	var datasets [][]float64
+	var names []string
+	var title string
+	var yMin, yMax *float64
+	labels := sampleTimeLabels(samples)
+
+	switch {
+	case path == "server-load":
+		title = "CPU / Memory Load"
+		cpu := make([]float64, len(samples))
+		mem := make([]float64, len(samples))
+		for i, s := range samples {
+			cpu[i] = s.CPULoadPct
+			mem[i] = s.MemLoadPct
+		}
+		datasets = [][]float64{cpu, mem}
+		names = []string{"CPU Load %", "Mem Load %"}
+		yMin = floatPtr(0)
+		yMax = floatPtr(100)
+
+	case path == "server-temp", path == "server-temp-cpu":
+		title = "CPU Temperature"
+		datasets, names = namedTempDatasets(samples, "cpu")
+		yMin = floatPtr(0)
+		yMax = autoMax120(datasets...)
+
+	case path == "server-temp-gpu":
+		title = "GPU Temperature"
+		datasets, names = gpuDatasets(samples, func(g platform.GPUMetricRow) float64 { return g.TempC })
+		yMin = floatPtr(0)
+		yMax = autoMax120(datasets...)
+
+	case path == "server-temp-ambient":
+		title = "Ambient / Other Sensors"
+		datasets, names = namedTempDatasets(samples, "ambient")
+		yMin = floatPtr(0)
+		yMax = autoMax120(datasets...)
+
+	case path == "server-power":
+		title = "System Power"
+		power := make([]float64, len(samples))
+		for i, s := range samples {
+			power[i] = s.PowerW
+		}
+		datasets = [][]float64{power}
+		names = []string{"Power W"}
+		yMin, yMax = autoBounds120(power)
+
+	case path == "server-fans":
+		title = "Fan RPM"
+		datasets, names = namedFanDatasets(samples)
+		yMin, yMax = autoBounds120(datasets...)
+
+	case path == "gpu-all-load":
+		title = "GPU Compute Load"
+		datasets, names = gpuDatasets(samples, func(g platform.GPUMetricRow) float64 { return g.UsagePct })
+		yMin = floatPtr(0)
+		yMax = floatPtr(100)
+
+	case path == "gpu-all-memload":
+		title = "GPU Memory Load"
+		datasets, names = gpuDatasets(samples, func(g platform.GPUMetricRow) float64 { return g.MemUsagePct })
+		yMin = floatPtr(0)
+		yMax = floatPtr(100)
+
+	case path == "gpu-all-power":
+		title = "GPU Power"
+		datasets, names = gpuDatasets(samples, func(g platform.GPUMetricRow) float64 { return g.PowerW })
+		yMin, yMax = autoBounds120(datasets...)
+
+	case path == "gpu-all-temp":
+		title = "GPU Temperature"
+		datasets, names = gpuDatasets(samples, func(g platform.GPUMetricRow) float64 { return g.TempC })
+		yMin = floatPtr(0)
+		yMax = autoMax120(datasets...)
+
+	case strings.HasPrefix(path, "gpu/"):
+		rest := strings.TrimPrefix(path, "gpu/")
+		sub := ""
+		if i := strings.LastIndex(rest, "-"); i > 0 {
+			sub = rest[i+1:]
+			rest = rest[:i]
+		}
+		idx := 0
+		fmt.Sscanf(rest, "%d", &idx)
+		switch sub {
+		case "load":
+			title = fmt.Sprintf("GPU %d Load", idx)
+			util := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.UsagePct })
+			mem := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.MemUsagePct })
+			if util == nil && mem == nil {
+				return nil, nil, nil, "", nil, nil, false
+			}
+			datasets = [][]float64{coalesceDataset(util, len(samples)), coalesceDataset(mem, len(samples))}
+			names = []string{"Load %", "Mem %"}
+			yMin = floatPtr(0)
+			yMax = floatPtr(100)
+		case "temp":
+			title = fmt.Sprintf("GPU %d Temperature", idx)
+			temp := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.TempC })
+			if temp == nil {
+				return nil, nil, nil, "", nil, nil, false
+			}
+			datasets = [][]float64{temp}
+			names = []string{"Temp °C"}
+			yMin = floatPtr(0)
+			yMax = autoMax120(temp)
+		default:
+			title = fmt.Sprintf("GPU %d Power", idx)
+			power := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.PowerW })
+			if power == nil {
+				return nil, nil, nil, "", nil, nil, false
+			}
+			datasets = [][]float64{power}
+			names = []string{"Power W"}
+			yMin, yMax = autoBounds120(power)
+		}
+
+	default:
+		return nil, nil, nil, "", nil, nil, false
+	}
+
+	return datasets, names, labels, title, yMin, yMax, len(datasets) > 0
+}
+
+func sampleTimeLabels(samples []platform.LiveMetricSample) []string {
+	labels := make([]string, len(samples))
+	if len(samples) == 0 {
+		return labels
+	}
+	sameDay := true
+	first := samples[0].Timestamp.Local()
+	for _, s := range samples {
+		ts := s.Timestamp.Local()
+		if ts.Year() != first.Year() || ts.YearDay() != first.YearDay() {
+			sameDay = false
+			break
+		}
+	}
+	for i, s := range samples {
+		ts := s.Timestamp.Local()
+		if sameDay {
+			labels[i] = ts.Format("15:04")
+		} else {
+			labels[i] = ts.Format("01-02 15:04")
+		}
+	}
+	return labels
+}
+
+func namedTempDatasets(samples []platform.LiveMetricSample, group string) ([][]float64, []string) {
+	seen := map[string]bool{}
+	var names []string
+	for _, s := range samples {
+		for _, t := range s.Temps {
+			if t.Group == group && !seen[t.Name] {
+				seen[t.Name] = true
+				names = append(names, t.Name)
+			}
+		}
+	}
+	datasets := make([][]float64, 0, len(names))
+	for _, name := range names {
+		ds := make([]float64, len(samples))
+		for i, s := range samples {
+			for _, t := range s.Temps {
+				if t.Group == group && t.Name == name {
+					ds[i] = t.Celsius
+					break
+				}
+			}
+		}
+		datasets = append(datasets, ds)
+	}
+	return datasets, names
+}
+
+func namedFanDatasets(samples []platform.LiveMetricSample) ([][]float64, []string) {
+	seen := map[string]bool{}
+	var names []string
+	for _, s := range samples {
+		for _, f := range s.Fans {
+			if !seen[f.Name] {
+				seen[f.Name] = true
+				names = append(names, f.Name)
+			}
+		}
+	}
+	datasets := make([][]float64, 0, len(names))
+	for _, name := range names {
+		ds := make([]float64, len(samples))
+		for i, s := range samples {
+			for _, f := range s.Fans {
+				if f.Name == name {
+					ds[i] = f.RPM
+					break
+				}
+			}
+		}
+		datasets = append(datasets, ds)
+	}
+	return datasets, names
+}
+
+func gpuDatasets(samples []platform.LiveMetricSample, pick func(platform.GPUMetricRow) float64) ([][]float64, []string) {
+	seen := map[int]bool{}
+	var indices []int
+	for _, s := range samples {
+		for _, g := range s.GPUs {
+			if !seen[g.GPUIndex] {
+				seen[g.GPUIndex] = true
+				indices = append(indices, g.GPUIndex)
+			}
+		}
+	}
+	datasets := make([][]float64, 0, len(indices))
+	names := make([]string, 0, len(indices))
+	for _, idx := range indices {
+		ds := gpuDatasetByIndex(samples, idx, pick)
+		if ds == nil {
+			continue
+		}
+		datasets = append(datasets, ds)
+		names = append(names, fmt.Sprintf("GPU %d", idx))
+	}
+	return datasets, names
+}
+
+func gpuDatasetByIndex(samples []platform.LiveMetricSample, idx int, pick func(platform.GPUMetricRow) float64) []float64 {
+	found := false
+	ds := make([]float64, len(samples))
+	for i, s := range samples {
+		for _, g := range s.GPUs {
+			if g.GPUIndex == idx {
+				ds[i] = pick(g)
+				found = true
+				break
+			}
+		}
+	}
+	if !found {
+		return nil
+	}
+	return ds
+}
+
+func coalesceDataset(ds []float64, n int) []float64 {
+	if ds != nil {
+		return ds
+	}
+	return make([]float64, n)
+}
+
 // floatPtr returns a pointer to a float64 value.
 func floatPtr(v float64) *float64 { return &v }

@@ -621,6 +935,47 @@ func autoMax120(datasets ...[]float64) *float64 {
 	return &v
 }

+func autoBounds120(datasets ...[]float64) (*float64, *float64) {
+	min := 0.0
+	max := 0.0
+	first := true
+	for _, ds := range datasets {
+		for _, v := range ds {
+			if first {
+				min, max = v, v
+				first = false
+				continue
+			}
+			if v < min {
+				min = v
+			}
+			if v > max {
+				max = v
+			}
+		}
+	}
+	if first {
+		return nil, nil
+	}
+	if max <= 0 {
+		return floatPtr(0), nil
+	}
+	span := max - min
+	if span <= 0 {
+		span = max * 0.1
+		if span <= 0 {
+			span = 1
+		}
+	}
+	pad := span * 0.2
+	low := min - pad
+	if low < 0 {
+		low = 0
+	}
+	high := max + pad
+	return floatPtr(low), floatPtr(high)
+}
+
 // renderChartSVG renders a line chart SVG with a fixed Y-axis range.
 func renderChartSVG(title string, datasets [][]float64, names []string, labels []string, yMin, yMax *float64) ([]byte, error) {
 	n := len(labels)
--- a/audit/internal/webui/server_test.go
+++ b/audit/internal/webui/server_test.go
@@ -7,6 +7,9 @@ import (
 	"path/filepath"
 	"strings"
 	"testing"
+	"time"
+
+	"bee/audit/internal/platform"
 )

 func TestChartLegendNumber(t *testing.T) {
@@ -31,6 +34,61 @@ func TestChartLegendNumber(t *testing.T) {
 	}
 }

+func TestChartDataFromSamplesUsesFullHistory(t *testing.T) {
+	samples := []platform.LiveMetricSample{
+		{
+			Timestamp:  time.Now().Add(-3 * time.Minute),
+			CPULoadPct: 10,
+			MemLoadPct: 20,
+			PowerW:     300,
+			GPUs: []platform.GPUMetricRow{
+				{GPUIndex: 0, UsagePct: 90, MemUsagePct: 5, PowerW: 120, TempC: 50},
+			},
+		},
+		{
+			Timestamp:  time.Now().Add(-2 * time.Minute),
+			CPULoadPct: 30,
+			MemLoadPct: 40,
+			PowerW:     320,
+			GPUs: []platform.GPUMetricRow{
+				{GPUIndex: 0, UsagePct: 95, MemUsagePct: 7, PowerW: 125, TempC: 51},
+			},
+		},
+		{
+			Timestamp:  time.Now().Add(-1 * time.Minute),
+			CPULoadPct: 50,
+			MemLoadPct: 60,
+			PowerW:     340,
+			GPUs: []platform.GPUMetricRow{
+				{GPUIndex: 0, UsagePct: 97, MemUsagePct: 9, PowerW: 130, TempC: 52},
+			},
+		},
+	}
+
+	datasets, names, labels, title, _, _, ok := chartDataFromSamples("gpu-all-power", samples)
+	if !ok {
+		t.Fatal("chartDataFromSamples returned ok=false")
+	}
+	if title != "GPU Power" {
+		t.Fatalf("title=%q", title)
+	}
+	if len(names) != 1 || names[0] != "GPU 0" {
+		t.Fatalf("names=%v", names)
+	}
+	if len(labels) != len(samples) {
+		t.Fatalf("labels len=%d want %d", len(labels), len(samples))
+	}
+	if len(datasets) != 1 || len(datasets[0]) != len(samples) {
+		t.Fatalf("datasets shape=%v", datasets)
+	}
+	if got := datasets[0][0]; got != 120 {
+		t.Fatalf("datasets[0][0]=%v want 120", got)
+	}
+	if got := datasets[0][2]; got != 130 {
+		t.Fatalf("datasets[0][2]=%v want 130", got)
+	}
+}
+
 func TestRootRendersDashboard(t *testing.T) {
 	dir := t.TempDir()
 	path := filepath.Join(dir, "audit.json")
--- a/audit/internal/webui/tasks.go
+++ b/audit/internal/webui/tasks.go
@@ -30,6 +30,8 @@ var taskNames = map[string]string{
 	"storage":        "Storage SAT",
 	"cpu":            "CPU SAT",
 	"amd":            "AMD GPU SAT",
+	"amd-mem":        "AMD GPU MEM Integrity",
+	"amd-bandwidth":  "AMD GPU MEM Bandwidth",
 	"amd-stress":     "AMD GPU Burn-in",
 	"memory-stress":  "Memory Burn-in",
 	"sat-stress":     "SAT Stress (stressapptest)",
@@ -124,6 +126,12 @@ var (
 	runAMDAcceptancePackCtx = func(a *app.App, ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
 		return a.RunAMDAcceptancePackCtx(ctx, baseDir, logFunc)
 	}
+	runAMDMemIntegrityPackCtx = func(a *app.App, ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
+		return a.RunAMDMemIntegrityPackCtx(ctx, baseDir, logFunc)
+	}
+	runAMDMemBandwidthPackCtx = func(a *app.App, ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
+		return a.RunAMDMemBandwidthPackCtx(ctx, baseDir, logFunc)
+	}
 	runAMDStressPackCtx = func(a *app.App, ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
 		return a.RunAMDStressPackCtx(ctx, baseDir, durationSec, logFunc)
 	}
@@ -380,6 +388,10 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
 		archive, err = runCPUAcceptancePackCtx(a, ctx, "", dur, j.append)
 	case "amd":
 		archive, err = runAMDAcceptancePackCtx(a, ctx, "", j.append)
+	case "amd-mem":
+		archive, err = runAMDMemIntegrityPackCtx(a, ctx, "", j.append)
+	case "amd-bandwidth":
+		archive, err = runAMDMemBandwidthPackCtx(a, ctx, "", j.append)
 	case "amd-stress":
 		dur := t.params.Duration
 		if t.params.BurnProfile != "" && dur <= 0 {