Fix GPU model propagation, export filenames, PSU/service status, and chart perf
- nvidia.go: add Name field to nvidiaGPUInfo, include model name in nvidia-smi query, set dev.Model in enrichPCIeWithNVIDIAData - pages.go: fix duplicate GPU count in validate card summary (4 GPU: 4 x … → 4 x … GPU); fix PSU UNKNOWN fallback from hw.PowerSupplies; treat activating/deactivating/reloading service states as OK in Runtime Health - support_bundle.go: use "150405" time format (no colons) for exFAT compat - sat.go / benchmark.go / platform_stress.go / sat_fan_stress.go: remove .tar.gz archive creation from export dirs — export packs everything itself - charts_svg.go: add min-max downsampling (1400 pt cap) for SVG chart perf - benchmark_report.go / sat.go: normalize GPU fallback to "Unknown GPU" Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -213,7 +213,7 @@ func BuildSupportBundle(exportDir string) (string, error) {
|
|||||||
|
|
||||||
now := time.Now().UTC()
|
now := time.Now().UTC()
|
||||||
date := now.Format("2006-01-02")
|
date := now.Format("2006-01-02")
|
||||||
tod := now.Format("15:04:05")
|
tod := now.Format("150405")
|
||||||
ver := bundleVersion()
|
ver := bundleVersion()
|
||||||
model := serverModelForBundle()
|
model := serverModelForBundle()
|
||||||
sn := serverSerialForBundle()
|
sn := serverSerialForBundle()
|
||||||
|
|||||||
@@ -335,11 +335,7 @@ func (s *System) RunNvidiaBenchmark(ctx context.Context, baseDir string, opts Nv
|
|||||||
return "", fmt.Errorf("write summary.txt: %w", err)
|
return "", fmt.Errorf("write summary.txt: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
archive := filepath.Join(baseDir, "gpu-benchmark-"+ts+".tar.gz")
|
return runDir, nil
|
||||||
if err := createTarGz(archive, runDir); err != nil {
|
|
||||||
return "", fmt.Errorf("pack benchmark archive: %w", err)
|
|
||||||
}
|
|
||||||
return archive, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func normalizeNvidiaBenchmarkOptionsForBenchmark(opts NvidiaBenchmarkOptions) NvidiaBenchmarkOptions {
|
func normalizeNvidiaBenchmarkOptionsForBenchmark(opts NvidiaBenchmarkOptions) NvidiaBenchmarkOptions {
|
||||||
|
|||||||
@@ -90,7 +90,7 @@ func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult, charts []benc
|
|||||||
for _, gpu := range result.GPUs {
|
for _, gpu := range result.GPUs {
|
||||||
name := strings.TrimSpace(gpu.Name)
|
name := strings.TrimSpace(gpu.Name)
|
||||||
if name == "" {
|
if name == "" {
|
||||||
name = "Unknown"
|
name = "Unknown GPU"
|
||||||
}
|
}
|
||||||
interconnect := "-"
|
interconnect := "-"
|
||||||
if gpu.Scores.InterconnectScore > 0 {
|
if gpu.Scores.InterconnectScore > 0 {
|
||||||
|
|||||||
@@ -161,13 +161,7 @@ func (s *System) RunPlatformStress(
|
|||||||
}
|
}
|
||||||
_ = os.WriteFile(filepath.Join(runDir, "summary.txt"), []byte(summary), 0644)
|
_ = os.WriteFile(filepath.Join(runDir, "summary.txt"), []byte(summary), 0644)
|
||||||
|
|
||||||
// Pack tar.gz
|
return runDir, nil
|
||||||
archivePath := filepath.Join(baseDir, "platform-stress-"+stamp+".tar.gz")
|
|
||||||
if err := packPlatformDir(runDir, archivePath); err != nil {
|
|
||||||
return "", fmt.Errorf("pack archive: %w", err)
|
|
||||||
}
|
|
||||||
_ = os.RemoveAll(runDir)
|
|
||||||
return archivePath, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// collectPhase samples live metrics every second until ctx is done.
|
// collectPhase samples live metrics every second until ctx is done.
|
||||||
|
|||||||
@@ -662,11 +662,7 @@ func (s *System) RunStorageAcceptancePack(ctx context.Context, baseDir string, e
|
|||||||
if err := os.WriteFile(filepath.Join(runDir, "summary.txt"), []byte(summary.String()), 0644); err != nil {
|
if err := os.WriteFile(filepath.Join(runDir, "summary.txt"), []byte(summary.String()), 0644); err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
archive := filepath.Join(baseDir, "storage-"+ts+".tar.gz")
|
return runDir, nil
|
||||||
if err := createTarGz(archive, runDir); err != nil {
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
return archive, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type satJob struct {
|
type satJob struct {
|
||||||
@@ -852,11 +848,7 @@ func runAcceptancePackCtx(ctx context.Context, baseDir, prefix string, jobs []sa
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
archive := filepath.Join(baseDir, prefix+"-"+ts+".tar.gz")
|
return runDir, nil
|
||||||
if err := createTarGz(archive, runDir); err != nil {
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
return archive, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func updateNvidiaGPUStatus(perGPU map[int]*nvidiaGPUStatusFile, idx int, status, jobName, detail string) {
|
func updateNvidiaGPUStatus(perGPU map[int]*nvidiaGPUStatusFile, idx int, status, jobName, detail string) {
|
||||||
@@ -919,7 +911,7 @@ func writeNvidiaGPUStatusFiles(runDir, overall string, perGPU map[int]*nvidiaGPU
|
|||||||
entry.Health = "UNKNOWN"
|
entry.Health = "UNKNOWN"
|
||||||
}
|
}
|
||||||
if entry.Name == "" {
|
if entry.Name == "" {
|
||||||
entry.Name = "unknown"
|
entry.Name = "Unknown GPU"
|
||||||
}
|
}
|
||||||
var body strings.Builder
|
var body strings.Builder
|
||||||
fmt.Fprintf(&body, "gpu_index=%d\n", entry.Index)
|
fmt.Fprintf(&body, "gpu_index=%d\n", entry.Index)
|
||||||
|
|||||||
@@ -223,11 +223,7 @@ func (s *System) RunFanStressTest(ctx context.Context, baseDir string, opts FanS
|
|||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
|
||||||
archive := filepath.Join(baseDir, "fan-stress-"+ts+".tar.gz")
|
return runDir, nil
|
||||||
if err := createTarGz(archive, runDir); err != nil {
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
return archive, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func applyFanStressDefaults(opts *FanStressOptions) {
|
func applyFanStressDefaults(opts *FanStressOptions) {
|
||||||
|
|||||||
@@ -83,6 +83,10 @@ func renderMetricChartSVG(title string, labels []string, times []time.Time, data
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Downsample to at most ~1400 points (one per pixel) before building SVG.
|
||||||
|
times, datasets = downsampleTimeSeries(times, datasets, 1400)
|
||||||
|
pointCount = len(times)
|
||||||
|
|
||||||
statsLabel := chartStatsLabel(datasets)
|
statsLabel := chartStatsLabel(datasets)
|
||||||
|
|
||||||
legendItems := []metricChartSeries{}
|
legendItems := []metricChartSeries{}
|
||||||
@@ -196,6 +200,19 @@ func drawGPUOverviewChartSVG(title string, labels []string, times []time.Time, s
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Downsample to at most ~1400 points before building SVG.
|
||||||
|
{
|
||||||
|
datasets := make([][]float64, len(series))
|
||||||
|
for i := range series {
|
||||||
|
datasets[i] = series[i].Values
|
||||||
|
}
|
||||||
|
times, datasets = downsampleTimeSeries(times, datasets, 1400)
|
||||||
|
pointCount = len(times)
|
||||||
|
for i := range series {
|
||||||
|
series[i].Values = datasets[i]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
scales := make([]chartScale, len(series))
|
scales := make([]chartScale, len(series))
|
||||||
for i := range series {
|
for i := range series {
|
||||||
min, max := chartSeriesBounds(series[i].Values)
|
min, max := chartSeriesBounds(series[i].Values)
|
||||||
@@ -626,6 +643,87 @@ func writeTimelineBoundaries(b *strings.Builder, layout chartLayout, start, end
|
|||||||
b.WriteString(`</g>` + "\n")
|
b.WriteString(`</g>` + "\n")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// downsampleTimeSeries reduces the time series to at most maxPts points using
|
||||||
|
// min-max bucketing. Each bucket contributes the index of its min and max value
|
||||||
|
// (using the first full-length dataset as the reference series). All parallel
|
||||||
|
// datasets are sampled at those same indices so all series stay aligned.
|
||||||
|
// If len(times) <= maxPts the inputs are returned unchanged.
|
||||||
|
func downsampleTimeSeries(times []time.Time, datasets [][]float64, maxPts int) ([]time.Time, [][]float64) {
|
||||||
|
n := len(times)
|
||||||
|
if n <= maxPts || maxPts <= 0 {
|
||||||
|
return times, datasets
|
||||||
|
}
|
||||||
|
buckets := maxPts / 2
|
||||||
|
if buckets < 1 {
|
||||||
|
buckets = 1
|
||||||
|
}
|
||||||
|
// Use the first dataset that has the same length as times as the reference
|
||||||
|
// for deciding which two indices to keep per bucket.
|
||||||
|
var ref []float64
|
||||||
|
for _, ds := range datasets {
|
||||||
|
if len(ds) == n {
|
||||||
|
ref = ds
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
selected := make([]int, 0, maxPts)
|
||||||
|
bucketSize := float64(n) / float64(buckets)
|
||||||
|
for b := 0; b < buckets; b++ {
|
||||||
|
lo := int(math.Round(float64(b) * bucketSize))
|
||||||
|
hi := int(math.Round(float64(b+1) * bucketSize))
|
||||||
|
if hi > n {
|
||||||
|
hi = n
|
||||||
|
}
|
||||||
|
if lo >= hi {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if ref == nil {
|
||||||
|
selected = append(selected, lo)
|
||||||
|
if hi-1 != lo {
|
||||||
|
selected = append(selected, hi-1)
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
minIdx, maxIdx := lo, lo
|
||||||
|
for i := lo + 1; i < hi; i++ {
|
||||||
|
if ref[i] < ref[minIdx] {
|
||||||
|
minIdx = i
|
||||||
|
}
|
||||||
|
if ref[i] > ref[maxIdx] {
|
||||||
|
maxIdx = i
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if minIdx <= maxIdx {
|
||||||
|
selected = append(selected, minIdx)
|
||||||
|
if maxIdx != minIdx {
|
||||||
|
selected = append(selected, maxIdx)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
selected = append(selected, maxIdx)
|
||||||
|
if minIdx != maxIdx {
|
||||||
|
selected = append(selected, minIdx)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
outTimes := make([]time.Time, len(selected))
|
||||||
|
for i, idx := range selected {
|
||||||
|
outTimes[i] = times[idx]
|
||||||
|
}
|
||||||
|
outDatasets := make([][]float64, len(datasets))
|
||||||
|
for d, ds := range datasets {
|
||||||
|
if len(ds) != n {
|
||||||
|
outDatasets[d] = ds
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
out := make([]float64, len(selected))
|
||||||
|
for i, idx := range selected {
|
||||||
|
out[i] = ds[idx]
|
||||||
|
}
|
||||||
|
outDatasets[d] = out
|
||||||
|
}
|
||||||
|
return outTimes, outDatasets
|
||||||
|
}
|
||||||
|
|
||||||
func chartXForTime(ts, start, end time.Time, left, right int) float64 {
|
func chartXForTime(ts, start, end time.Time, left, right int) float64 {
|
||||||
if !end.After(start) {
|
if !end.After(start) {
|
||||||
return float64(left+right) / 2
|
return float64(left+right) / 2
|
||||||
|
|||||||
@@ -349,6 +349,9 @@ func renderHardwareSummaryCard(opts HandlerOptions) string {
|
|||||||
writeRow("GPU", hwDescribeGPU(hw), runtimeStatusBadge(gpuRow.Status))
|
writeRow("GPU", hwDescribeGPU(hw), runtimeStatusBadge(gpuRow.Status))
|
||||||
|
|
||||||
psuRow := aggregateComponentStatus("PSU", records, nil, []string{"psu:"})
|
psuRow := aggregateComponentStatus("PSU", records, nil, []string{"psu:"})
|
||||||
|
if psuRow.Status == "UNKNOWN" && len(hw.PowerSupplies) > 0 {
|
||||||
|
psuRow.Status = hwPSUStatus(hw.PowerSupplies)
|
||||||
|
}
|
||||||
writeRow("PSU", hwDescribePSU(hw), runtimeStatusBadge(psuRow.Status))
|
writeRow("PSU", hwDescribePSU(hw), runtimeStatusBadge(psuRow.Status))
|
||||||
|
|
||||||
if nicDesc := hwDescribeNIC(hw); nicDesc != "" {
|
if nicDesc := hwDescribeNIC(hw); nicDesc != "" {
|
||||||
@@ -506,6 +509,31 @@ func hwDescribeGPU(hw schema.HardwareSnapshot) string {
|
|||||||
return strings.Join(parts, ", ")
|
return strings.Join(parts, ", ")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// hwPSUStatus returns "OK", "CRITICAL", "WARNING", or "UNKNOWN" based on
|
||||||
|
// PSU statuses from the audit snapshot. Used as fallback when component-status.json
|
||||||
|
// has no psu: records yet (e.g. first boot before audit writes them).
|
||||||
|
func hwPSUStatus(psus []schema.HardwarePowerSupply) string {
|
||||||
|
worst := "UNKNOWN"
|
||||||
|
for _, psu := range psus {
|
||||||
|
if psu.Status == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
switch strings.ToUpper(strings.TrimSpace(*psu.Status)) {
|
||||||
|
case "CRITICAL":
|
||||||
|
return "CRITICAL"
|
||||||
|
case "WARNING":
|
||||||
|
if worst != "CRITICAL" {
|
||||||
|
worst = "WARNING"
|
||||||
|
}
|
||||||
|
case "OK":
|
||||||
|
if worst == "UNKNOWN" {
|
||||||
|
worst = "OK"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return worst
|
||||||
|
}
|
||||||
|
|
||||||
// hwDescribePSU returns a summary like "2× 1600 W" or "2× PSU".
|
// hwDescribePSU returns a summary like "2× 1600 W" or "2× PSU".
|
||||||
func hwDescribePSU(hw schema.HardwareSnapshot) string {
|
func hwDescribePSU(hw schema.HardwareSnapshot) string {
|
||||||
n := len(hw.PowerSupplies)
|
n := len(hw.PowerSupplies)
|
||||||
@@ -742,7 +770,13 @@ func buildRuntimeServicesRow(health schema.RuntimeHealth) runtimeHealthRow {
|
|||||||
nonActive := make([]string, 0)
|
nonActive := make([]string, 0)
|
||||||
for _, svc := range health.Services {
|
for _, svc := range health.Services {
|
||||||
state := strings.TrimSpace(strings.ToLower(svc.Status))
|
state := strings.TrimSpace(strings.ToLower(svc.Status))
|
||||||
if state != "active" {
|
// "activating" and "deactivating" are transient states for oneshot services
|
||||||
|
// (RemainAfterExit=yes) — the service is running normally, not failed.
|
||||||
|
// Only "failed" and "inactive" (after services should be running) are problems.
|
||||||
|
switch state {
|
||||||
|
case "active", "activating", "deactivating", "reloading":
|
||||||
|
// OK — service is running or transitioning normally
|
||||||
|
default:
|
||||||
nonActive = append(nonActive, svc.Name+"="+svc.Status)
|
nonActive = append(nonActive, svc.Name+"="+svc.Status)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1777,6 +1811,11 @@ func formatValidateDeviceSummary(total int, models map[string]int, unit string)
|
|||||||
if total != 1 {
|
if total != 1 {
|
||||||
label += "s"
|
label += "s"
|
||||||
}
|
}
|
||||||
|
// If there is only one model the leading count duplicates the per-model
|
||||||
|
// count already in parts (e.g. "4 GPU: 4 x RTX …" → "4 x RTX …").
|
||||||
|
if len(parts) == 1 {
|
||||||
|
return parts[0] + " " + label
|
||||||
|
}
|
||||||
return fmt.Sprintf("%d %s: %s", total, label, strings.Join(parts, ", "))
|
return fmt.Sprintf("%d %s: %s", total, label, strings.Join(parts, ", "))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
117
bible-local/docs/gpu-model-propagation.md
Normal file
117
bible-local/docs/gpu-model-propagation.md
Normal file
@@ -0,0 +1,117 @@
|
|||||||
|
# GPU Model Name Propagation
|
||||||
|
|
||||||
|
How GPU model names are detected, stored, and displayed throughout the project.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Detection Sources
|
||||||
|
|
||||||
|
There are **two separate pipelines** for GPU model names — they use different structs and don't share state.
|
||||||
|
|
||||||
|
### Pipeline A — Live / SAT (nvidia-smi query at runtime)
|
||||||
|
|
||||||
|
**File:** `audit/internal/platform/sat.go`
|
||||||
|
|
||||||
|
- `ListNvidiaGPUs()` → `NvidiaGPU.Name` (field: `name`, from `nvidia-smi --query-gpu=index,name,...`)
|
||||||
|
- `ListNvidiaGPUStatuses()` → `NvidiaGPUStatus.Name`
|
||||||
|
- Used by: GPU selection UI, live metrics labels, burn/stress test logic
|
||||||
|
|
||||||
|
### Pipeline B — Benchmark results
|
||||||
|
|
||||||
|
**File:** `audit/internal/platform/benchmark.go`, line 124
|
||||||
|
|
||||||
|
- `queryBenchmarkGPUInfo(selected)` → `benchmarkGPUInfo.Name`
|
||||||
|
- Stored in `BenchmarkGPUResult.Name` (`json:"name,omitempty"`)
|
||||||
|
- Used by: benchmark history table, benchmark report
|
||||||
|
|
||||||
|
### Pipeline C — Hardware audit JSON (PCIe schema)
|
||||||
|
|
||||||
|
**File:** `audit/internal/schema/hardware.go`
|
||||||
|
|
||||||
|
- `HardwarePCIeDevice.Model *string` (field name is **Model**, not Name)
|
||||||
|
- For AMD GPUs: populated by `audit/internal/collector/amdgpu.go` from `info.Product`
|
||||||
|
- For NVIDIA GPUs: **NOT populated** by `audit/internal/collector/nvidia.go` — the NVIDIA enricher sets telemetry/status but skips the Model field
|
||||||
|
- Used by: hardware summary page (`hwDescribeGPU` in `pages.go:487`)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Key Inconsistency: NVIDIA PCIe Model is Never Set
|
||||||
|
|
||||||
|
`audit/internal/collector/nvidia.go` — `enrichPCIeWithNVIDIAData()` enriches NVIDIA PCIe devices with telemetry and status but does **not** populate `HardwarePCIeDevice.Model`.
|
||||||
|
|
||||||
|
This means:
|
||||||
|
- Hardware summary page shows "Unknown GPU" for all NVIDIA devices (falls back at `pages.go:486`)
|
||||||
|
- AMD GPUs do have their model populated
|
||||||
|
|
||||||
|
The fix would be: copy `gpu.Name` from the SAT pipeline into `dev.Model` inside `enrichPCIeWithNVIDIAData`.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Benchmark History "Unknown GPU" Issue
|
||||||
|
|
||||||
|
**Symptom:** Benchmark history table shows "GPU #N — Unknown GPU" columns instead of real GPU model names.
|
||||||
|
|
||||||
|
**Root cause:** `BenchmarkGPUResult.Name` has tag `json:"name,omitempty"`. If `queryBenchmarkGPUInfo()` fails (warns at `benchmark.go:126`) or returns empty names, the Name field is never set and is omitted from JSON. Loaded results have empty Name → falls back to "Unknown GPU" at `pages.go:2226, 2237`.
|
||||||
|
|
||||||
|
This happens for:
|
||||||
|
- Older result files saved before the `Name` field was added
|
||||||
|
- Runs where nvidia-smi query failed before the benchmark started
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Fallback Strings — Current State
|
||||||
|
|
||||||
|
| Location | File | Fallback string |
|
||||||
|
|---|---|---|
|
||||||
|
| Hardware summary (PCIe) | `pages.go:486` | `"Unknown GPU"` |
|
||||||
|
| Benchmark report summary | `benchmark_report.go:43` | `"Unknown GPU"` |
|
||||||
|
| Benchmark report scorecard | `benchmark_report.go:93` | `"Unknown"` ← inconsistent |
|
||||||
|
| Benchmark report detail | `benchmark_report.go:122` | `"Unknown GPU"` |
|
||||||
|
| Benchmark history per-GPU col | `pages.go:2226` | `"Unknown GPU"` |
|
||||||
|
| Benchmark history parallel col | `pages.go:2237` | `"Unknown GPU"` |
|
||||||
|
| SAT status file write | `sat.go:922` | `"unknown"` ← lowercase, inconsistent |
|
||||||
|
| GPU selection API | `api.go:163` | `"GPU N"` (no "Unknown") |
|
||||||
|
|
||||||
|
**Rule:** all UI fallbacks should use `"Unknown GPU"`. The two outliers are `benchmark_report.go:93` (`"Unknown"`) and `sat.go:922` (`"unknown"`).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## GPU Selection UI
|
||||||
|
|
||||||
|
**File:** `audit/internal/webui/pages.go`
|
||||||
|
|
||||||
|
- Source: `GET /api/gpus` → `api.go` → `ListNvidiaGPUs()` → live nvidia-smi
|
||||||
|
- Render: `'GPU ' + gpu.index + ' — ' + gpu.name + ' · ' + mem`
|
||||||
|
- Fallback: `gpu.name || 'GPU ' + idx` (JS, line ~1432)
|
||||||
|
|
||||||
|
This always shows the correct model because it queries nvidia-smi live. It is **not** connected to benchmark result data.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Data Flow Summary
|
||||||
|
|
||||||
|
```
|
||||||
|
nvidia-smi (live)
|
||||||
|
└─ ListNvidiaGPUs() → NvidiaGPU.Name
|
||||||
|
├─ GPU selection UI (always correct)
|
||||||
|
├─ Live metrics labels (charts_svg.go)
|
||||||
|
└─ SAT/burn status file (sat.go)
|
||||||
|
|
||||||
|
nvidia-smi (at benchmark start)
|
||||||
|
└─ queryBenchmarkGPUInfo() → benchmarkGPUInfo.Name
|
||||||
|
└─ BenchmarkGPUResult.Name (json:"name,omitempty")
|
||||||
|
├─ Benchmark report
|
||||||
|
└─ Benchmark history table columns
|
||||||
|
|
||||||
|
nvidia-smi / lspci (audit collection)
|
||||||
|
└─ HardwarePCIeDevice.Model (NVIDIA: NOT populated; AMD: populated)
|
||||||
|
└─ Hardware summary page hwDescribeGPU()
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## What Needs Fixing
|
||||||
|
|
||||||
|
1. **NVIDIA PCIe Model** — `enrichPCIeWithNVIDIAData()` should set `dev.Model = &gpu.Name`
|
||||||
|
2. **Fallback consistency** — `benchmark_report.go:93` should say `"Unknown GPU"` not `"Unknown"`; `sat.go:922` should say `"Unknown GPU"` not `"unknown"`
|
||||||
|
3. **Old benchmark JSONs** — no fix possible for already-saved results with missing names (display-only issue)
|
||||||
Reference in New Issue
Block a user