commit d7f10f8f0891f495d83e0419ae01789ef71ba61b Author: Michael Chus Date: Mon Apr 13 12:00:47 2026 +0300 2026-04-13 benchmarks diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7ba63ab --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +# Allow only result.json and README.md everywhere; block everything else. +* +!*/ +!.gitignore +!README.md +!**/result.json diff --git a/README.md b/README.md new file mode 100644 index 0000000..119fbc4 --- /dev/null +++ b/README.md @@ -0,0 +1,127 @@ +# pub-beebench + +Публичный репозиторий с результатами GPU-бенчмарков. Содержимое автоматически +публикуется на сайте через [beebench-publisher](https://github.com/reanimator/beebench). + +--- + +## Структура репозитория + +Каждый запуск бенчмарка — отдельная **директория** в корне репозитория или +архив `.tar.gz`. Вложенность произвольная: `result.json` может лежать как +непосредственно в директории бандла, так и в поддиректории. + +``` +pub-beebench/ + dell-r760-h100-2026-04-12/ + result.json + 00-nvidia-smi-q.log # опционально — используется для определения + gpu-0-steady.log # модели GPU если поле name пустое в result.json + supermicro-x13-a100-2026-03-01/ + logs/ + gpu-0-steady.log + result.json + xe9680-h100x8-2026-02-15.tar.gz +``` + +**Правила именования директории/архива** — произвольные, на публикацию не +влияют. Рекомендуется использовать читаемые имена с датой: +`<сервер>--<дата>`. + +--- + +## Формат result.json + +Обязательные поля отмечены `*`. + +```jsonc +{ + "benchmark_version": "v1.0.0", + "generated_at": "2026-04-12T10:30:00Z", // * ISO 8601 UTC + "server_model": "Dell PowerEdge R760", // * используется как заголовок страницы сервера + "hostname": "r760-lab-01", // отображается если server_model не задан + "benchmark_profile": "standard", // произвольная строка: standard / overnight / … + "overall_status": "OK", // OK | WARN | FAIL + "selected_gpu_indices": [0], // индексы GPU участвовавших в тесте + "findings": ["Stable benchmark run"], // список наблюдений + "warnings": [], + "gpus": [ // * минимум один элемент + { + "index": 0, // * порядковый номер GPU + "name": "NVIDIA H100 PCIe 80GB", // * модель; если пустая — берётся из лог-файлов + "status": "OK", // OK | WARN | FAIL + "bus_id": "0000:17:00.0", + "vbios": "96.00.5E.00.02", + "compute_capability": "9.0", + "multiprocessor_count": 114, + "steady": { + "duration_sec": 90, + "avg_temp_c": 73.2, + "p95_temp_c": 75.0, + "avg_power_w": 311.5, + "p95_power_w": 321.0, + "avg_graphics_clock_mhz": 1618, + "p95_graphics_clock_mhz": 1634, + "avg_memory_clock_mhz": 1593, + "p95_memory_clock_mhz": 1593, + "avg_usage_pct": 98.4, + "avg_mem_usage_pct": 92.1 + }, + "scores": { // * все поля обязательны; compute_score > 0 + "compute_score": 932.4, + "power_sustain_score": 901.2, + "thermal_sustain_score": 889.3, + "stability_score": 944.0, + "composite_score": 926.7 + }, + "degradation_reasons": [], + "notes": ["Healthy run"] + } + ] +} +``` + +### Обязательные условия для публикации + +- `generated_at` — должно быть заполнено и являться валидной датой ISO 8601 +- `gpus` — минимум один элемент +- `compute_score` хотя бы у одного GPU должен быть больше `0`; запуски где все + оценки равны нулю считаются неудавшимися и отклоняются + +--- + +## Несколько GPU в одном запуске + +Если сервер тестировался с несколькими GPU одновременно, все они перечисляются +в массиве `gpus`. Каждая карта получает отдельную запись с собственными +метриками и оценками. + +```jsonc +"gpus": [ + { "index": 0, "name": "NVIDIA H100 PCIe 80GB", ... }, + { "index": 1, "name": "NVIDIA H100 PCIe 80GB", ... } +] +``` + +--- + +## Дополнительные файлы в бандле + +Если поле `name` у GPU пустое, publisher попытается определить модель из +следующих файлов (ищутся в той же директории что и `result.json`): + +| Файл | Источник | +|---|---| +| `00-nvidia-smi-q.log` | вывод `nvidia-smi -q` | +| `gpu-0-warmup.log` | лог прогрева | +| `gpu-0-steady.log` | лог steady-фазы | + +Остальные файлы (`.html`, `.csv`, `.txt`) игнорируются но могут присутствовать. + +--- + +## Удаление результата + +Чтобы убрать результат с сайта — удалите соответствующую директорию или архив +из репозитория и сделайте коммит. Сайт обновится при следующей синхронизации +(по умолчанию раз в сутки). diff --git a/gpu-benchmark-20260413-040542/result.json b/gpu-benchmark-20260413-040542/result.json new file mode 100644 index 0000000..1833570 --- /dev/null +++ b/gpu-benchmark-20260413-040542/result.json @@ -0,0 +1,190 @@ +{ + "benchmark_version": "1", + "generated_at": "2026-04-13T04:05:42.675718581Z", + "hostname": "debian", + "server_model": "G5500 V7", + "benchmark_profile": "standard", + "parallel_gpus": true, + "ramp_step": 1, + "ramp_total": 8, + "ramp_run_id": "ramp-20260413-035309", + "overall_status": "OK", + "selected_gpu_indices": [ + 0 + ], + "findings": [ + "All 1 GPU(s) passed the benchmark.", + "GPU 0 held clocks without observable throttle counters during steady state.", + "Server power delta 1188 W exceeds GPU-reported sum 514 W by 131%. Other components (CPU, NVMe, networking) may be drawing substantial power under GPU load." + ], + "normalization": { + "status": "full", + "gpus": [ + { + "index": 0, + "persistence_mode": "applied", + "gpu_clock_lock_mhz": 2430, + "gpu_clock_lock_status": "applied", + "memory_clock_lock_mhz": 12481, + "memory_clock_lock_status": "applied" + } + ] + }, + "host_config": { + "cpu_model": "INTEL(R) XEON(R) GOLD 6530", + "cpu_sockets": 2, + "cpu_cores": 64, + "cpu_threads": 128, + "mem_total_gib": 125.36544799804688 + }, + "cpu_load": { + "avg_pct": 1.1, + "max_pct": 4.5, + "p95_pct": 4, + "samples": 71, + "status": "ok" + }, + "gpus": [ + { + "index": 0, + "uuid": "GPU-8281bc14-c076-e306-75b1-e91596be45e7", + "name": "NVIDIA RTX PRO 6000 Blackwell Server Edition", + "bus_id": "00000000:4B:00.0", + "vbios": "98.02.67.00.0A", + "compute_capability": "12.0", + "backend": "cublasLt", + "status": "OK", + "power_limit_w": 600, + "default_power_limit_w": 600, + "calibrated_peak_power_w": 600.28, + "max_graphics_clock_mhz": 2430, + "max_memory_clock_mhz": 12481, + "locked_graphics_clock_mhz": 2430, + "locked_memory_clock_mhz": 12481, + "baseline": { + "duration_sec": 15.070173316, + "samples": 15, + "avg_temp_c": 57, + "p95_temp_c": 62.599999999999994, + "avg_power_w": 119.25133333333333, + "p95_power_w": 207.5479999999996, + "avg_graphics_clock_mhz": 2418, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0.6189673096400233, + "power_cv_pct": 74.77759859420686, + "temp_cv_pct": 6.077371254627639, + "clock_drift_pct": 0 + }, + "steady": { + "duration_sec": 518.070872587, + "samples": 518, + "avg_temp_c": 71.9073359073359, + "p95_temp_c": 72, + "avg_power_w": 514.3837065637066, + "p95_power_w": 520.4545, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 99.54826254826254, + "avg_mem_usage_pct": 38.46138996138996, + "clock_cv_pct": 0, + "power_cv_pct": 5.442205008113535, + "temp_cv_pct": 0.9844059147322263, + "clock_drift_pct": 0 + }, + "cooldown": { + "duration_sec": 120.076189386, + "samples": 113, + "avg_temp_c": 49.92920353982301, + "p95_temp_c": 56, + "avg_power_w": 89.37265486725663, + "p95_power_w": 94.67, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 2.53783367168321, + "temp_cv_pct": 4.882111393317778, + "clock_drift_pct": 0 + }, + "throttle_counters": { + "sw_power_cap_us": 0, + "sw_thermal_slowdown_us": 0, + "sync_boost_us": 0, + "hw_thermal_slowdown_us": 0, + "hw_power_brake_slowdown_us": 0 + }, + "precision_results": [ + { + "name": "fp16_tensor", + "category": "fp16_bf16", + "supported": true, + "lanes": 16, + "m": 11904, + "n": 11904, + "k": 11904, + "iterations": 27840, + "teraops_per_sec": 195.675631386624 + }, + { + "name": "fp32_tf32", + "category": "fp32_tf32", + "supported": true, + "lanes": 16, + "m": 8320, + "n": 8320, + "k": 8320, + "iterations": 27840, + "teraops_per_sec": 66.807922688 + }, + { + "name": "fp64", + "category": "fp64", + "supported": false, + "notes": "unsupported" + }, + { + "name": "fp8_e4m3", + "category": "fp8", + "supported": true, + "lanes": 16, + "m": 13696, + "n": 13696, + "k": 13696, + "iterations": 27840, + "teraops_per_sec": 298.015759794176 + }, + { + "name": "fp8_e5m2", + "category": "fp8", + "supported": false, + "notes": "unsupported" + } + ], + "scores": { + "compute_score": 560.4993138688001, + "power_sustain_score": 99.95333333333333, + "thermal_sustain_score": 100, + "stability_score": 89.11558998377294, + "interconnect_score": 0, + "composite_score": 545.2083180519243 + } + } + ], + "server_power": { + "available": true, + "idle_w": 993.3333333333334, + "loaded_w": 2181.4117647058824, + "delta_w": 1188.078431372549, + "gpu_reported_sum_w": 514.3837065637066, + "reporting_ratio": 2.3097124115952243 + } +} \ No newline at end of file diff --git a/gpu-benchmark-20260413-042023/result.json b/gpu-benchmark-20260413-042023/result.json new file mode 100644 index 0000000..84455cd --- /dev/null +++ b/gpu-benchmark-20260413-042023/result.json @@ -0,0 +1,339 @@ +{ + "benchmark_version": "1", + "generated_at": "2026-04-13T04:20:23.254080304Z", + "hostname": "debian", + "server_model": "G5500 V7", + "benchmark_profile": "standard", + "parallel_gpus": true, + "ramp_step": 2, + "ramp_total": 8, + "ramp_run_id": "ramp-20260413-035309", + "overall_status": "OK", + "selected_gpu_indices": [ + 0, + 1 + ], + "findings": [ + "All 2 GPU(s) passed the benchmark.", + "GPU 0 showed unstable clocks/power over the benchmark window.", + "GPU 1 showed unstable clocks/power over the benchmark window.", + "Server power delta 1672 W exceeds GPU-reported sum 1036 W by 61%. Other components (CPU, NVMe, networking) may be drawing substantial power under GPU load." + ], + "normalization": { + "status": "full", + "gpus": [ + { + "index": 0, + "persistence_mode": "applied", + "gpu_clock_lock_mhz": 2430, + "gpu_clock_lock_status": "applied", + "memory_clock_lock_mhz": 12481, + "memory_clock_lock_status": "applied" + }, + { + "index": 1, + "persistence_mode": "applied", + "gpu_clock_lock_mhz": 2430, + "gpu_clock_lock_status": "applied", + "memory_clock_lock_mhz": 12481, + "memory_clock_lock_status": "applied" + } + ] + }, + "host_config": { + "cpu_model": "INTEL(R) XEON(R) GOLD 6530", + "cpu_sockets": 2, + "cpu_cores": 64, + "cpu_threads": 128, + "mem_total_gib": 125.36544799804688 + }, + "cpu_load": { + "avg_pct": 1.5, + "max_pct": 1.6, + "p95_pct": 1.6, + "samples": 71, + "status": "ok" + }, + "gpus": [ + { + "index": 0, + "uuid": "GPU-8281bc14-c076-e306-75b1-e91596be45e7", + "name": "NVIDIA RTX PRO 6000 Blackwell Server Edition", + "bus_id": "00000000:4B:00.0", + "vbios": "98.02.67.00.0A", + "compute_capability": "12.0", + "backend": "cublasLt", + "status": "OK", + "power_limit_w": 600, + "default_power_limit_w": 600, + "calibrated_peak_power_w": 600.22, + "max_graphics_clock_mhz": 2430, + "max_memory_clock_mhz": 12481, + "locked_graphics_clock_mhz": 2430, + "locked_memory_clock_mhz": 12481, + "baseline": { + "duration_sec": 15.057418789, + "samples": 15, + "avg_temp_c": 59.53333333333333, + "p95_temp_c": 65.6, + "avg_power_w": 118.17133333333335, + "p95_power_w": 195.23499999999967, + "avg_graphics_clock_mhz": 2419.5333333333333, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0.3814546146932904, + "power_cv_pct": 64.74043823099278, + "temp_cv_pct": 6.098655679368466, + "clock_drift_pct": 0 + }, + "steady": { + "duration_sec": 520.069532944, + "samples": 520, + "avg_temp_c": 71.74807692307692, + "p95_temp_c": 72, + "avg_power_w": 511.9014230769234, + "p95_power_w": 520.072, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 99.03846153846153, + "avg_mem_usage_pct": 38.41538461538462, + "clock_cv_pct": 0, + "power_cv_pct": 7.630331977379506, + "temp_cv_pct": 1.6849782906634136, + "clock_drift_pct": 0 + }, + "cooldown": { + "duration_sec": 121.009027315, + "samples": 114, + "avg_temp_c": 49.79824561403509, + "p95_temp_c": 54.349999999999994, + "avg_power_w": 89.54026315789477, + "p95_power_w": 93.49049999999998, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 1.7838271138649684, + "temp_cv_pct": 3.883484530370241, + "clock_drift_pct": 0 + }, + "throttle_counters": { + "sw_power_cap_us": 0, + "sw_thermal_slowdown_us": 0, + "sync_boost_us": 0, + "hw_thermal_slowdown_us": 0, + "hw_power_brake_slowdown_us": 0 + }, + "precision_results": [ + { + "name": "fp16_tensor", + "category": "fp16_bf16", + "supported": true, + "lanes": 16, + "m": 11904, + "n": 11904, + "k": 11904, + "iterations": 27840, + "teraops_per_sec": 195.675631386624 + }, + { + "name": "fp32_tf32", + "category": "fp32_tf32", + "supported": true, + "lanes": 16, + "m": 8320, + "n": 8320, + "k": 8320, + "iterations": 27840, + "teraops_per_sec": 66.807922688 + }, + { + "name": "fp64", + "category": "fp64", + "supported": false, + "notes": "unsupported" + }, + { + "name": "fp8_e4m3", + "category": "fp8", + "supported": true, + "lanes": 16, + "m": 13696, + "n": 13696, + "k": 13696, + "iterations": 27840, + "teraops_per_sec": 298.015759794176 + }, + { + "name": "fp8_e5m2", + "category": "fp8", + "supported": false, + "notes": "unsupported" + } + ], + "scores": { + "compute_score": 560.4993138688001, + "power_sustain_score": 99.96333333333332, + "thermal_sustain_score": 100, + "stability_score": 84.73933604524099, + "interconnect_score": 0, + "composite_score": 539.0845072169755 + }, + "degradation_reasons": [ + "variance_too_high" + ] + }, + { + "index": 1, + "uuid": "GPU-a66d6d68-9870-dfe0-1823-b38a141c21ae", + "name": "NVIDIA RTX PRO 6000 Blackwell Server Edition", + "bus_id": "00000000:4C:00.0", + "vbios": "98.02.67.00.0A", + "compute_capability": "12.0", + "backend": "cublasLt", + "status": "OK", + "power_limit_w": 600, + "default_power_limit_w": 600, + "calibrated_peak_power_w": 600.225, + "max_graphics_clock_mhz": 2430, + "max_memory_clock_mhz": 12481, + "locked_graphics_clock_mhz": 2430, + "locked_memory_clock_mhz": 12481, + "baseline": { + "duration_sec": 15.057418789, + "samples": 15, + "avg_temp_c": 56.333333333333336, + "p95_temp_c": 61.599999999999994, + "avg_power_w": 113.68933333333335, + "p95_power_w": 194.66699999999966, + "avg_graphics_clock_mhz": 2419.5333333333333, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0.3814546146932904, + "power_cv_pct": 71.92406919974427, + "temp_cv_pct": 5.675540264275408, + "clock_drift_pct": 0 + }, + "steady": { + "duration_sec": 520.069532944, + "samples": 520, + "avg_temp_c": 74.13461538461539, + "p95_temp_c": 75, + "avg_power_w": 523.9403653846153, + "p95_power_w": 532.881, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 98.86923076923077, + "avg_mem_usage_pct": 39.32884615384615, + "clock_cv_pct": 0, + "power_cv_pct": 8.668121242011786, + "temp_cv_pct": 1.9390814166506207, + "clock_drift_pct": 0 + }, + "cooldown": { + "duration_sec": 121.009027315, + "samples": 114, + "avg_temp_c": 51.63157894736842, + "p95_temp_c": 56, + "avg_power_w": 88.05026315789462, + "p95_power_w": 92.084, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 1.8363510924174664, + "temp_cv_pct": 3.5292319871148523, + "clock_drift_pct": 0 + }, + "throttle_counters": { + "sw_power_cap_us": 0, + "sw_thermal_slowdown_us": 0, + "sync_boost_us": 0, + "hw_thermal_slowdown_us": 0, + "hw_power_brake_slowdown_us": 0 + }, + "precision_results": [ + { + "name": "fp16_tensor", + "category": "fp16_bf16", + "supported": true, + "lanes": 16, + "m": 11904, + "n": 11904, + "k": 11904, + "iterations": 27840, + "teraops_per_sec": 195.675631386624 + }, + { + "name": "fp32_tf32", + "category": "fp32_tf32", + "supported": true, + "lanes": 16, + "m": 8320, + "n": 8320, + "k": 8320, + "iterations": 27840, + "teraops_per_sec": 66.807922688 + }, + { + "name": "fp64", + "category": "fp64", + "supported": false, + "notes": "unsupported" + }, + { + "name": "fp8_e4m3", + "category": "fp8", + "supported": true, + "lanes": 16, + "m": 13696, + "n": 13696, + "k": 13696, + "iterations": 27840, + "teraops_per_sec": 298.015759794176 + }, + { + "name": "fp8_e5m2", + "category": "fp8", + "supported": false, + "notes": "unsupported" + } + ], + "scores": { + "compute_score": 560.4993138688001, + "power_sustain_score": 99.96249999999999, + "thermal_sustain_score": 100, + "stability_score": 82.66375751597643, + "interconnect_score": 0, + "composite_score": 536.175405738999 + }, + "degradation_reasons": [ + "variance_too_high" + ] + } + ], + "server_power": { + "available": true, + "idle_w": 1028, + "loaded_w": 2699.883495145631, + "delta_w": 1671.883495145631, + "gpu_reported_sum_w": 1035.8417884615387, + "reporting_ratio": 1.6140336427522965 + } +} \ No newline at end of file diff --git a/gpu-benchmark-20260413-043509/result.json b/gpu-benchmark-20260413-043509/result.json new file mode 100644 index 0000000..e64eee2 --- /dev/null +++ b/gpu-benchmark-20260413-043509/result.json @@ -0,0 +1,485 @@ +{ + "benchmark_version": "1", + "generated_at": "2026-04-13T04:35:09.488332038Z", + "hostname": "debian", + "server_model": "G5500 V7", + "benchmark_profile": "standard", + "parallel_gpus": true, + "ramp_step": 3, + "ramp_total": 8, + "ramp_run_id": "ramp-20260413-035309", + "overall_status": "OK", + "selected_gpu_indices": [ + 0, + 1, + 2 + ], + "findings": [ + "All 3 GPU(s) passed the benchmark.", + "GPU 0 showed unstable clocks/power over the benchmark window.", + "GPU 1 showed unstable clocks/power over the benchmark window.", + "GPU 2 showed unstable clocks/power over the benchmark window.", + "Server power delta 2188 W exceeds GPU-reported sum 1565 W by 40%. Other components (CPU, NVMe, networking) may be drawing substantial power under GPU load." + ], + "normalization": { + "status": "full", + "gpus": [ + { + "index": 0, + "persistence_mode": "applied", + "gpu_clock_lock_mhz": 2430, + "gpu_clock_lock_status": "applied", + "memory_clock_lock_mhz": 12481, + "memory_clock_lock_status": "applied" + }, + { + "index": 1, + "persistence_mode": "applied", + "gpu_clock_lock_mhz": 2430, + "gpu_clock_lock_status": "applied", + "memory_clock_lock_mhz": 12481, + "memory_clock_lock_status": "applied" + }, + { + "index": 2, + "persistence_mode": "applied", + "gpu_clock_lock_mhz": 2430, + "gpu_clock_lock_status": "applied", + "memory_clock_lock_mhz": 12481, + "memory_clock_lock_status": "applied" + } + ] + }, + "host_config": { + "cpu_model": "INTEL(R) XEON(R) GOLD 6530", + "cpu_sockets": 2, + "cpu_cores": 64, + "cpu_threads": 128, + "mem_total_gib": 125.36544799804688 + }, + "cpu_load": { + "avg_pct": 1.9, + "max_pct": 2.5, + "p95_pct": 2.5, + "samples": 82, + "status": "ok" + }, + "gpus": [ + { + "index": 0, + "uuid": "GPU-8281bc14-c076-e306-75b1-e91596be45e7", + "name": "NVIDIA RTX PRO 6000 Blackwell Server Edition", + "bus_id": "00000000:4B:00.0", + "vbios": "98.02.67.00.0A", + "compute_capability": "12.0", + "backend": "cublasLt", + "status": "OK", + "power_limit_w": 600, + "default_power_limit_w": 600, + "calibrated_peak_power_w": 600.255, + "max_graphics_clock_mhz": 2430, + "max_memory_clock_mhz": 12481, + "locked_graphics_clock_mhz": 2430, + "locked_memory_clock_mhz": 12481, + "baseline": { + "duration_sec": 15.033289155, + "samples": 15, + "avg_temp_c": 59.2, + "p95_temp_c": 65.3, + "avg_power_w": 118.2433333333333, + "p95_power_w": 195.2919999999997, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 64.53847869928777, + "temp_cv_pct": 5.890403977757667, + "clock_drift_pct": 0 + }, + "steady": { + "duration_sec": 522.069202793, + "samples": 522, + "avg_temp_c": 71.73946360153256, + "p95_temp_c": 72, + "avg_power_w": 511.1453831417621, + "p95_power_w": 520.4285, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 98.83141762452107, + "avg_mem_usage_pct": 38.42145593869732, + "clock_cv_pct": 0, + "power_cv_pct": 8.76270864901095, + "temp_cv_pct": 2.1245135467552823, + "clock_drift_pct": 0 + }, + "cooldown": { + "duration_sec": 120.942911251, + "samples": 114, + "avg_temp_c": 49.43859649122807, + "p95_temp_c": 54, + "avg_power_w": 89.03017543859652, + "p95_power_w": 92.58, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 1.7468159750921186, + "temp_cv_pct": 3.6835591717577603, + "clock_drift_pct": 0 + }, + "throttle_counters": { + "sw_power_cap_us": 0, + "sw_thermal_slowdown_us": 0, + "sync_boost_us": 0, + "hw_thermal_slowdown_us": 0, + "hw_power_brake_slowdown_us": 0 + }, + "precision_results": [ + { + "name": "fp16_tensor", + "category": "fp16_bf16", + "supported": true, + "lanes": 16, + "m": 11904, + "n": 11904, + "k": 11904, + "iterations": 27840, + "teraops_per_sec": 195.675631386624 + }, + { + "name": "fp32_tf32", + "category": "fp32_tf32", + "supported": true, + "lanes": 16, + "m": 8320, + "n": 8320, + "k": 8320, + "iterations": 27840, + "teraops_per_sec": 66.807922688 + }, + { + "name": "fp64", + "category": "fp64", + "supported": false, + "notes": "unsupported" + }, + { + "name": "fp8_e4m3", + "category": "fp8", + "supported": true, + "lanes": 16, + "m": 13696, + "n": 13696, + "k": 13696, + "iterations": 27840, + "teraops_per_sec": 298.015759794176 + }, + { + "name": "fp8_e5m2", + "category": "fp8", + "supported": false, + "notes": "unsupported" + } + ], + "scores": { + "compute_score": 560.4993138688001, + "power_sustain_score": 99.9575, + "thermal_sustain_score": 100, + "stability_score": 82.47458270197811, + "interconnect_score": 0, + "composite_score": 535.9061211105268 + }, + "degradation_reasons": [ + "variance_too_high" + ] + }, + { + "index": 1, + "uuid": "GPU-a66d6d68-9870-dfe0-1823-b38a141c21ae", + "name": "NVIDIA RTX PRO 6000 Blackwell Server Edition", + "bus_id": "00000000:4C:00.0", + "vbios": "98.02.67.00.0A", + "compute_capability": "12.0", + "backend": "cublasLt", + "status": "OK", + "power_limit_w": 600, + "default_power_limit_w": 600, + "calibrated_peak_power_w": 600.165, + "max_graphics_clock_mhz": 2430, + "max_memory_clock_mhz": 12481, + "locked_graphics_clock_mhz": 2430, + "locked_memory_clock_mhz": 12481, + "baseline": { + "duration_sec": 15.033289155, + "samples": 15, + "avg_temp_c": 61, + "p95_temp_c": 66.6, + "avg_power_w": 112.94333333333337, + "p95_power_w": 177.84699999999975, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 57.05117016518504, + "temp_cv_pct": 5.678855106783204, + "clock_drift_pct": 0 + }, + "steady": { + "duration_sec": 522.069202793, + "samples": 522, + "avg_temp_c": 73.69731800766283, + "p95_temp_c": 74, + "avg_power_w": 521.9090804597699, + "p95_power_w": 532.379, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 98.46934865900383, + "avg_mem_usage_pct": 38.99233716475096, + "clock_cv_pct": 0, + "power_cv_pct": 9.915628616003222, + "temp_cv_pct": 2.3155813129932827, + "clock_drift_pct": 0 + }, + "cooldown": { + "duration_sec": 120.942911251, + "samples": 114, + "avg_temp_c": 50.75438596491228, + "p95_temp_c": 55, + "avg_power_w": 87.372894736842, + "p95_power_w": 90.94749999999999, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 1.7330150264667308, + "temp_cv_pct": 3.4872476446986864, + "clock_drift_pct": 0 + }, + "throttle_counters": { + "sw_power_cap_us": 0, + "sw_thermal_slowdown_us": 0, + "sync_boost_us": 0, + "hw_thermal_slowdown_us": 0, + "hw_power_brake_slowdown_us": 0 + }, + "precision_results": [ + { + "name": "fp16_tensor", + "category": "fp16_bf16", + "supported": true, + "lanes": 16, + "m": 11904, + "n": 11904, + "k": 11904, + "iterations": 27840, + "teraops_per_sec": 195.675631386624 + }, + { + "name": "fp32_tf32", + "category": "fp32_tf32", + "supported": true, + "lanes": 16, + "m": 8320, + "n": 8320, + "k": 8320, + "iterations": 27840, + "teraops_per_sec": 66.807922688 + }, + { + "name": "fp64", + "category": "fp64", + "supported": false, + "notes": "unsupported" + }, + { + "name": "fp8_e4m3", + "category": "fp8", + "supported": true, + "lanes": 16, + "m": 13696, + "n": 13696, + "k": 13696, + "iterations": 27840, + "teraops_per_sec": 298.015759794176 + }, + { + "name": "fp8_e5m2", + "category": "fp8", + "supported": false, + "notes": "unsupported" + } + ], + "scores": { + "compute_score": 560.4993138688001, + "power_sustain_score": 99.97250000000001, + "thermal_sustain_score": 100, + "stability_score": 80.16874276799356, + "interconnect_score": 0, + "composite_score": 532.6876780928648 + }, + "degradation_reasons": [ + "variance_too_high" + ] + }, + { + "index": 2, + "uuid": "GPU-82b32f5b-5fca-9674-a845-cfd5da365d09", + "name": "NVIDIA RTX PRO 6000 Blackwell Server Edition", + "bus_id": "00000000:4E:00.0", + "vbios": "98.02.67.00.0A", + "compute_capability": "12.0", + "backend": "cublasLt", + "status": "OK", + "power_limit_w": 600, + "default_power_limit_w": 600, + "calibrated_peak_power_w": 600.23, + "max_graphics_clock_mhz": 2430, + "max_memory_clock_mhz": 12481, + "locked_graphics_clock_mhz": 2430, + "locked_memory_clock_mhz": 12481, + "baseline": { + "duration_sec": 15.033289155, + "samples": 15, + "avg_temp_c": 53.733333333333334, + "p95_temp_c": 59.3, + "avg_power_w": 114.68533333333335, + "p95_power_w": 191.34599999999966, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 67.3613557450656, + "temp_cv_pct": 5.864169874985842, + "clock_drift_pct": 0 + }, + "steady": { + "duration_sec": 522.069202793, + "samples": 522, + "avg_temp_c": 73.5823754789272, + "p95_temp_c": 74, + "avg_power_w": 531.7479693486594, + "p95_power_w": 542.509, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 98.47892720306514, + "avg_mem_usage_pct": 38.74521072796935, + "clock_cv_pct": 0, + "power_cv_pct": 9.887326796340815, + "temp_cv_pct": 2.4110385580810907, + "clock_drift_pct": 0 + }, + "cooldown": { + "duration_sec": 120.942911251, + "samples": 114, + "avg_temp_c": 49.57017543859649, + "p95_temp_c": 54.349999999999994, + "avg_power_w": 90.77263157894727, + "p95_power_w": 94.967, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 2.017263598743592, + "temp_cv_pct": 4.05207099778575, + "clock_drift_pct": 0 + }, + "throttle_counters": { + "sw_power_cap_us": 0, + "sw_thermal_slowdown_us": 0, + "sync_boost_us": 0, + "hw_thermal_slowdown_us": 0, + "hw_power_brake_slowdown_us": 0 + }, + "precision_results": [ + { + "name": "fp16_tensor", + "category": "fp16_bf16", + "supported": true, + "lanes": 16, + "m": 11904, + "n": 11904, + "k": 11904, + "iterations": 27840, + "teraops_per_sec": 195.675631386624 + }, + { + "name": "fp32_tf32", + "category": "fp32_tf32", + "supported": true, + "lanes": 16, + "m": 8320, + "n": 8320, + "k": 8320, + "iterations": 27840, + "teraops_per_sec": 66.807922688 + }, + { + "name": "fp64", + "category": "fp64", + "supported": false, + "notes": "unsupported" + }, + { + "name": "fp8_e4m3", + "category": "fp8", + "supported": true, + "lanes": 16, + "m": 13696, + "n": 13696, + "k": 13696, + "iterations": 27840, + "teraops_per_sec": 298.015759794176 + }, + { + "name": "fp8_e5m2", + "category": "fp8", + "supported": false, + "notes": "unsupported" + } + ], + "scores": { + "compute_score": 560.4993138688001, + "power_sustain_score": 99.96166666666666, + "thermal_sustain_score": 100, + "stability_score": 80.22534640731837, + "interconnect_score": 0, + "composite_score": 532.7578857315244 + }, + "degradation_reasons": [ + "variance_too_high" + ] + } + ], + "server_power": { + "available": true, + "idle_w": 1036, + "loaded_w": 3223.5728155339807, + "delta_w": 2187.5728155339807, + "gpu_reported_sum_w": 1564.8024329501914, + "reporting_ratio": 1.397986588894582 + } +} \ No newline at end of file diff --git a/gpu-benchmark-20260413-045001/result.json b/gpu-benchmark-20260413-045001/result.json new file mode 100644 index 0000000..3b8e5c3 --- /dev/null +++ b/gpu-benchmark-20260413-045001/result.json @@ -0,0 +1,631 @@ +{ + "benchmark_version": "1", + "generated_at": "2026-04-13T04:50:01.238832218Z", + "hostname": "debian", + "server_model": "G5500 V7", + "benchmark_profile": "standard", + "parallel_gpus": true, + "ramp_step": 4, + "ramp_total": 8, + "ramp_run_id": "ramp-20260413-035309", + "overall_status": "OK", + "selected_gpu_indices": [ + 0, + 1, + 2, + 3 + ], + "findings": [ + "All 4 GPU(s) passed the benchmark.", + "GPU 0 showed unstable clocks/power over the benchmark window.", + "GPU 1 showed unstable clocks/power over the benchmark window.", + "GPU 2 showed unstable clocks/power over the benchmark window.", + "GPU 3 showed unstable clocks/power over the benchmark window.", + "Server power delta 2690 W exceeds GPU-reported sum 2085 W by 29%. Other components (CPU, NVMe, networking) may be drawing substantial power under GPU load." + ], + "normalization": { + "status": "full", + "gpus": [ + { + "index": 0, + "persistence_mode": "applied", + "gpu_clock_lock_mhz": 2430, + "gpu_clock_lock_status": "applied", + "memory_clock_lock_mhz": 12481, + "memory_clock_lock_status": "applied" + }, + { + "index": 1, + "persistence_mode": "applied", + "gpu_clock_lock_mhz": 2430, + "gpu_clock_lock_status": "applied", + "memory_clock_lock_mhz": 12481, + "memory_clock_lock_status": "applied" + }, + { + "index": 2, + "persistence_mode": "applied", + "gpu_clock_lock_mhz": 2430, + "gpu_clock_lock_status": "applied", + "memory_clock_lock_mhz": 12481, + "memory_clock_lock_status": "applied" + }, + { + "index": 3, + "persistence_mode": "applied", + "gpu_clock_lock_mhz": 2430, + "gpu_clock_lock_status": "applied", + "memory_clock_lock_mhz": 12481, + "memory_clock_lock_status": "applied" + } + ] + }, + "host_config": { + "cpu_model": "INTEL(R) XEON(R) GOLD 6530", + "cpu_sockets": 2, + "cpu_cores": 64, + "cpu_threads": 128, + "mem_total_gib": 125.36544799804688 + }, + "cpu_load": { + "avg_pct": 3.1, + "max_pct": 3.3, + "p95_pct": 3.2, + "samples": 70, + "status": "ok" + }, + "gpus": [ + { + "index": 0, + "uuid": "GPU-8281bc14-c076-e306-75b1-e91596be45e7", + "name": "NVIDIA RTX PRO 6000 Blackwell Server Edition", + "bus_id": "00000000:4B:00.0", + "vbios": "98.02.67.00.0A", + "compute_capability": "12.0", + "backend": "cublasLt", + "status": "OK", + "power_limit_w": 600, + "default_power_limit_w": 600, + "calibrated_peak_power_w": 600.255, + "max_graphics_clock_mhz": 2430, + "max_memory_clock_mhz": 12481, + "locked_graphics_clock_mhz": 2430, + "locked_memory_clock_mhz": 12481, + "baseline": { + "duration_sec": 15.036223085, + "samples": 15, + "avg_temp_c": 58.666666666666664, + "p95_temp_c": 64.3, + "avg_power_w": 102.08999999999999, + "p95_power_w": 124.32599999999992, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 18.77753269993211, + "temp_cv_pct": 5.9915574211200315, + "clock_drift_pct": 0 + }, + "steady": { + "duration_sec": 525.067614256, + "samples": 525, + "avg_temp_c": 71.64, + "p95_temp_c": 72, + "avg_power_w": 508.3619999999994, + "p95_power_w": 520.1840000000001, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 98.29523809523809, + "avg_mem_usage_pct": 38.43047619047619, + "clock_cv_pct": 0, + "power_cv_pct": 10.748058849985153, + "temp_cv_pct": 2.789715042814912, + "clock_drift_pct": 0 + }, + "cooldown": { + "duration_sec": 120.741717307, + "samples": 114, + "avg_temp_c": 49.62280701754386, + "p95_temp_c": 53, + "avg_power_w": 89.43131578947367, + "p95_power_w": 92.2615, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 1.4118633273458687, + "temp_cv_pct": 2.9165260445929433, + "clock_drift_pct": 0 + }, + "throttle_counters": { + "sw_power_cap_us": 0, + "sw_thermal_slowdown_us": 0, + "sync_boost_us": 0, + "hw_thermal_slowdown_us": 0, + "hw_power_brake_slowdown_us": 0 + }, + "precision_results": [ + { + "name": "fp16_tensor", + "category": "fp16_bf16", + "supported": true, + "lanes": 16, + "m": 11904, + "n": 11904, + "k": 11904, + "iterations": 27840, + "teraops_per_sec": 195.675631386624 + }, + { + "name": "fp32_tf32", + "category": "fp32_tf32", + "supported": true, + "lanes": 16, + "m": 8320, + "n": 8320, + "k": 8320, + "iterations": 27840, + "teraops_per_sec": 66.807922688 + }, + { + "name": "fp64", + "category": "fp64", + "supported": false, + "notes": "unsupported" + }, + { + "name": "fp8_e4m3", + "category": "fp8", + "supported": true, + "lanes": 16, + "m": 13696, + "n": 13696, + "k": 13696, + "iterations": 27840, + "teraops_per_sec": 298.015759794176 + }, + { + "name": "fp8_e5m2", + "category": "fp8", + "supported": false, + "notes": "unsupported" + } + ], + "scores": { + "compute_score": 560.4993138688001, + "power_sustain_score": 99.9575, + "thermal_sustain_score": 100, + "stability_score": 78.50388230002969, + "interconnect_score": 0, + "composite_score": 530.3421839833501 + }, + "degradation_reasons": [ + "variance_too_high" + ] + }, + { + "index": 1, + "uuid": "GPU-a66d6d68-9870-dfe0-1823-b38a141c21ae", + "name": "NVIDIA RTX PRO 6000 Blackwell Server Edition", + "bus_id": "00000000:4C:00.0", + "vbios": "98.02.67.00.0A", + "compute_capability": "12.0", + "backend": "cublasLt", + "status": "OK", + "power_limit_w": 600, + "default_power_limit_w": 600, + "calibrated_peak_power_w": 600.185, + "max_graphics_clock_mhz": 2430, + "max_memory_clock_mhz": 12481, + "locked_graphics_clock_mhz": 2430, + "locked_memory_clock_mhz": 12481, + "baseline": { + "duration_sec": 15.036223085, + "samples": 15, + "avg_temp_c": 60.333333333333336, + "p95_temp_c": 66.3, + "avg_power_w": 104.13399999999999, + "p95_power_w": 135.44299999999987, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 28.279218723013607, + "temp_cv_pct": 5.634275705627386, + "clock_drift_pct": 0 + }, + "steady": { + "duration_sec": 525.067614256, + "samples": 525, + "avg_temp_c": 74.02285714285715, + "p95_temp_c": 75, + "avg_power_w": 520.1055999999996, + "p95_power_w": 532.95, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 97.92761904761905, + "avg_mem_usage_pct": 38.944761904761904, + "clock_cv_pct": 0, + "power_cv_pct": 11.619346871453436, + "temp_cv_pct": 2.9818204514658326, + "clock_drift_pct": 0 + }, + "cooldown": { + "duration_sec": 120.741717307, + "samples": 114, + "avg_temp_c": 51.53508771929825, + "p95_temp_c": 55, + "avg_power_w": 88.07684210526322, + "p95_power_w": 90.61, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 1.3530652386403696, + "temp_cv_pct": 2.546658308018548, + "clock_drift_pct": 0 + }, + "throttle_counters": { + "sw_power_cap_us": 0, + "sw_thermal_slowdown_us": 0, + "sync_boost_us": 0, + "hw_thermal_slowdown_us": 0, + "hw_power_brake_slowdown_us": 0 + }, + "precision_results": [ + { + "name": "fp16_tensor", + "category": "fp16_bf16", + "supported": true, + "lanes": 16, + "m": 11904, + "n": 11904, + "k": 11904, + "iterations": 27840, + "teraops_per_sec": 195.675631386624 + }, + { + "name": "fp32_tf32", + "category": "fp32_tf32", + "supported": true, + "lanes": 16, + "m": 8320, + "n": 8320, + "k": 8320, + "iterations": 27840, + "teraops_per_sec": 66.807922688 + }, + { + "name": "fp64", + "category": "fp64", + "supported": false, + "notes": "unsupported" + }, + { + "name": "fp8_e4m3", + "category": "fp8", + "supported": true, + "lanes": 16, + "m": 13696, + "n": 13696, + "k": 13696, + "iterations": 27840, + "teraops_per_sec": 298.015759794176 + }, + { + "name": "fp8_e5m2", + "category": "fp8", + "supported": false, + "notes": "unsupported" + } + ], + "scores": { + "compute_score": 560.4993138688001, + "power_sustain_score": 99.96916666666668, + "thermal_sustain_score": 100, + "stability_score": 76.76130625709312, + "interconnect_score": 0, + "composite_score": 527.9102110302674 + }, + "degradation_reasons": [ + "variance_too_high" + ] + }, + { + "index": 2, + "uuid": "GPU-82b32f5b-5fca-9674-a845-cfd5da365d09", + "name": "NVIDIA RTX PRO 6000 Blackwell Server Edition", + "bus_id": "00000000:4E:00.0", + "vbios": "98.02.67.00.0A", + "compute_capability": "12.0", + "backend": "cublasLt", + "status": "OK", + "power_limit_w": 600, + "default_power_limit_w": 600, + "calibrated_peak_power_w": 600.255, + "max_graphics_clock_mhz": 2430, + "max_memory_clock_mhz": 12481, + "locked_graphics_clock_mhz": 2430, + "locked_memory_clock_mhz": 12481, + "baseline": { + "duration_sec": 15.036223085, + "samples": 15, + "avg_temp_c": 58.46666666666667, + "p95_temp_c": 64.3, + "avg_power_w": 104.23466666666667, + "p95_power_w": 128.58599999999993, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 20.35320302333862, + "temp_cv_pct": 5.986045495282411, + "clock_drift_pct": 0 + }, + "steady": { + "duration_sec": 525.067614256, + "samples": 525, + "avg_temp_c": 74.3047619047619, + "p95_temp_c": 75, + "avg_power_w": 530.595066666667, + "p95_power_w": 544.2040000000001, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 97.9047619047619, + "avg_mem_usage_pct": 38.54666666666667, + "clock_cv_pct": 0, + "power_cv_pct": 11.755075516052898, + "temp_cv_pct": 3.1451081246290715, + "clock_drift_pct": 0 + }, + "cooldown": { + "duration_sec": 120.741717307, + "samples": 114, + "avg_temp_c": 50.473684210526315, + "p95_temp_c": 54, + "avg_power_w": 91.29131578947388, + "p95_power_w": 94.4975, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 1.5813669058676583, + "temp_cv_pct": 2.9944744585943432, + "clock_drift_pct": 0 + }, + "throttle_counters": { + "sw_power_cap_us": 0, + "sw_thermal_slowdown_us": 0, + "sync_boost_us": 0, + "hw_thermal_slowdown_us": 0, + "hw_power_brake_slowdown_us": 0 + }, + "precision_results": [ + { + "name": "fp16_tensor", + "category": "fp16_bf16", + "supported": true, + "lanes": 16, + "m": 11904, + "n": 11904, + "k": 11904, + "iterations": 27840, + "teraops_per_sec": 195.675631386624 + }, + { + "name": "fp32_tf32", + "category": "fp32_tf32", + "supported": true, + "lanes": 16, + "m": 8320, + "n": 8320, + "k": 8320, + "iterations": 27840, + "teraops_per_sec": 66.807922688 + }, + { + "name": "fp64", + "category": "fp64", + "supported": false, + "notes": "unsupported" + }, + { + "name": "fp8_e4m3", + "category": "fp8", + "supported": true, + "lanes": 16, + "m": 13696, + "n": 13696, + "k": 13696, + "iterations": 27840, + "teraops_per_sec": 298.015759794176 + }, + { + "name": "fp8_e5m2", + "category": "fp8", + "supported": false, + "notes": "unsupported" + } + ], + "scores": { + "compute_score": 560.4993138688001, + "power_sustain_score": 99.9575, + "thermal_sustain_score": 100, + "stability_score": 76.4898489678942, + "interconnect_score": 0, + "composite_score": 527.520023231423 + }, + "degradation_reasons": [ + "variance_too_high" + ] + }, + { + "index": 3, + "uuid": "GPU-c88562a9-54cd-c3cf-c3fc-ef6c68ce5228", + "name": "NVIDIA RTX PRO 6000 Blackwell Server Edition", + "bus_id": "00000000:4F:00.0", + "vbios": "98.02.67.00.0A", + "compute_capability": "12.0", + "backend": "cublasLt", + "status": "OK", + "power_limit_w": 600, + "default_power_limit_w": 600, + "calibrated_peak_power_w": 600.265, + "max_graphics_clock_mhz": 2430, + "max_memory_clock_mhz": 12481, + "locked_graphics_clock_mhz": 2430, + "locked_memory_clock_mhz": 12481, + "baseline": { + "duration_sec": 15.036223085, + "samples": 15, + "avg_temp_c": 55.86666666666667, + "p95_temp_c": 61.3, + "avg_power_w": 95.958, + "p95_power_w": 116.39399999999993, + "avg_graphics_clock_mhz": 2425.2, + "p95_graphics_clock_mhz": 2430, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0.16160249004012395, + "power_cv_pct": 18.28206144786019, + "temp_cv_pct": 5.540900604412363, + "clock_drift_pct": 0.21971985718209908 + }, + "steady": { + "duration_sec": 525.067614256, + "samples": 525, + "avg_temp_c": 75.29142857142857, + "p95_temp_c": 76, + "avg_power_w": 525.9955047619045, + "p95_power_w": 539.4580000000001, + "avg_graphics_clock_mhz": 2422.0761904761903, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 97.94285714285714, + "avg_mem_usage_pct": 38.499047619047616, + "clock_cv_pct": 0.03207964525416391, + "power_cv_pct": 11.564343959222406, + "temp_cv_pct": 3.0543353777078432, + "clock_drift_pct": 0 + }, + "cooldown": { + "duration_sec": 120.741717307, + "samples": 114, + "avg_temp_c": 52.39473684210526, + "p95_temp_c": 55.349999999999994, + "avg_power_w": 88.51675438596483, + "p95_power_w": 91.02149999999999, + "avg_graphics_clock_mhz": 2422.2105263157896, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0.05286830237971597, + "power_cv_pct": 1.3571261625836293, + "temp_cv_pct": 2.5104636139944287, + "clock_drift_pct": 0 + }, + "throttle_counters": { + "sw_power_cap_us": 0, + "sw_thermal_slowdown_us": 0, + "sync_boost_us": 0, + "hw_thermal_slowdown_us": 0, + "hw_power_brake_slowdown_us": 0 + }, + "precision_results": [ + { + "name": "fp16_tensor", + "category": "fp16_bf16", + "supported": true, + "lanes": 16, + "m": 11904, + "n": 11904, + "k": 11904, + "iterations": 27840, + "teraops_per_sec": 195.675631386624 + }, + { + "name": "fp32_tf32", + "category": "fp32_tf32", + "supported": true, + "lanes": 16, + "m": 8320, + "n": 8320, + "k": 8320, + "iterations": 27840, + "teraops_per_sec": 66.807922688 + }, + { + "name": "fp64", + "category": "fp64", + "supported": false, + "notes": "unsupported" + }, + { + "name": "fp8_e4m3", + "category": "fp8", + "supported": true, + "lanes": 16, + "m": 13696, + "n": 13696, + "k": 13696, + "iterations": 27840, + "teraops_per_sec": 298.015759794176 + }, + { + "name": "fp8_e5m2", + "category": "fp8", + "supported": false, + "notes": "unsupported" + } + ], + "scores": { + "compute_score": 560.4993138688001, + "power_sustain_score": 99.95583333333333, + "thermal_sustain_score": 100, + "stability_score": 76.74299350053853, + "interconnect_score": 0, + "composite_score": 527.8733403252803 + }, + "degradation_reasons": [ + "variance_too_high" + ] + } + ], + "server_power": { + "available": true, + "idle_w": 1048, + "loaded_w": 3737.5384615384614, + "delta_w": 2689.5384615384614, + "gpu_reported_sum_w": 2085.0581714285704, + "reporting_ratio": 1.2899105158757913 + } +} \ No newline at end of file diff --git a/gpu-benchmark-20260413-050458/result.json b/gpu-benchmark-20260413-050458/result.json new file mode 100644 index 0000000..aa41f05 --- /dev/null +++ b/gpu-benchmark-20260413-050458/result.json @@ -0,0 +1,776 @@ +{ + "benchmark_version": "1", + "generated_at": "2026-04-13T05:04:58.909534574Z", + "hostname": "debian", + "server_model": "G5500 V7", + "benchmark_profile": "standard", + "parallel_gpus": true, + "ramp_step": 5, + "ramp_total": 8, + "ramp_run_id": "ramp-20260413-035309", + "overall_status": "OK", + "selected_gpu_indices": [ + 0, + 1, + 2, + 3, + 4 + ], + "findings": [ + "All 5 GPU(s) passed the benchmark.", + "GPU 0 showed unstable clocks/power over the benchmark window.", + "GPU 1 showed unstable clocks/power over the benchmark window.", + "GPU 2 showed unstable clocks/power over the benchmark window.", + "GPU 3 showed unstable clocks/power over the benchmark window.", + "GPU 4 showed unstable clocks/power over the benchmark window." + ], + "normalization": { + "status": "full", + "gpus": [ + { + "index": 0, + "persistence_mode": "applied", + "gpu_clock_lock_mhz": 2430, + "gpu_clock_lock_status": "applied", + "memory_clock_lock_mhz": 12481, + "memory_clock_lock_status": "applied" + }, + { + "index": 1, + "persistence_mode": "applied", + "gpu_clock_lock_mhz": 2430, + "gpu_clock_lock_status": "applied", + "memory_clock_lock_mhz": 12481, + "memory_clock_lock_status": "applied" + }, + { + "index": 2, + "persistence_mode": "applied", + "gpu_clock_lock_mhz": 2430, + "gpu_clock_lock_status": "applied", + "memory_clock_lock_mhz": 12481, + "memory_clock_lock_status": "applied" + }, + { + "index": 3, + "persistence_mode": "applied", + "gpu_clock_lock_mhz": 2430, + "gpu_clock_lock_status": "applied", + "memory_clock_lock_mhz": 12481, + "memory_clock_lock_status": "applied" + }, + { + "index": 4, + "persistence_mode": "applied", + "gpu_clock_lock_mhz": 2430, + "gpu_clock_lock_status": "applied", + "memory_clock_lock_mhz": 12481, + "memory_clock_lock_status": "applied" + } + ] + }, + "host_config": { + "cpu_model": "INTEL(R) XEON(R) GOLD 6530", + "cpu_sockets": 2, + "cpu_cores": 64, + "cpu_threads": 128, + "mem_total_gib": 125.36544799804688 + }, + "cpu_load": { + "avg_pct": 3.7, + "max_pct": 3.9, + "p95_pct": 3.9, + "samples": 70, + "status": "ok" + }, + "gpus": [ + { + "index": 0, + "uuid": "GPU-8281bc14-c076-e306-75b1-e91596be45e7", + "name": "NVIDIA RTX PRO 6000 Blackwell Server Edition", + "bus_id": "00000000:4B:00.0", + "vbios": "98.02.67.00.0A", + "compute_capability": "12.0", + "backend": "cublasLt", + "status": "OK", + "power_limit_w": 600, + "default_power_limit_w": 600, + "calibrated_peak_power_w": 600.2645, + "max_graphics_clock_mhz": 2430, + "max_memory_clock_mhz": 12481, + "locked_graphics_clock_mhz": 2430, + "locked_memory_clock_mhz": 12481, + "baseline": { + "duration_sec": 15.003491683, + "samples": 15, + "avg_temp_c": 58.8, + "p95_temp_c": 64.3, + "avg_power_w": 97.76533333333332, + "p95_power_w": 104.70400000000001, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 3.7803015697327367, + "temp_cv_pct": 5.798962291403367, + "clock_drift_pct": 0 + }, + "steady": { + "duration_sec": 527.066986904, + "samples": 527, + "avg_temp_c": 71.57495256166983, + "p95_temp_c": 73, + "avg_power_w": 507.0497533206832, + "p95_power_w": 520.9, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 97.74952561669829, + "avg_mem_usage_pct": 37.86337760910816, + "clock_cv_pct": 0, + "power_cv_pct": 12.012757746133422, + "temp_cv_pct": 3.2586034768067687, + "clock_drift_pct": 0 + }, + "cooldown": { + "duration_sec": 120.648768617, + "samples": 114, + "avg_temp_c": 49.56140350877193, + "p95_temp_c": 52, + "avg_power_w": 89.30096491228069, + "p95_power_w": 91.444, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 1.1038088274923465, + "temp_cv_pct": 2.4499053413094867, + "clock_drift_pct": 0 + }, + "throttle_counters": { + "sw_power_cap_us": 0, + "sw_thermal_slowdown_us": 0, + "sync_boost_us": 0, + "hw_thermal_slowdown_us": 0, + "hw_power_brake_slowdown_us": 0 + }, + "precision_results": [ + { + "name": "fp16_tensor", + "category": "fp16_bf16", + "supported": true, + "lanes": 16, + "m": 11904, + "n": 11904, + "k": 11904, + "iterations": 27840, + "teraops_per_sec": 195.675631386624 + }, + { + "name": "fp32_tf32", + "category": "fp32_tf32", + "supported": true, + "lanes": 16, + "m": 8320, + "n": 8320, + "k": 8320, + "iterations": 27840, + "teraops_per_sec": 66.807922688 + }, + { + "name": "fp64", + "category": "fp64", + "supported": false, + "notes": "unsupported" + }, + { + "name": "fp8_e4m3", + "category": "fp8", + "supported": true, + "lanes": 16, + "m": 13696, + "n": 13696, + "k": 13696, + "iterations": 27840, + "teraops_per_sec": 298.015759794176 + }, + { + "name": "fp8_e5m2", + "category": "fp8", + "supported": false, + "notes": "unsupported" + } + ], + "scores": { + "compute_score": 560.4993138688001, + "power_sustain_score": 99.95591666666667, + "thermal_sustain_score": 100, + "stability_score": 75.97448450773315, + "interconnect_score": 0, + "composite_score": 526.796538479771 + }, + "degradation_reasons": [ + "variance_too_high" + ] + }, + { + "index": 1, + "uuid": "GPU-a66d6d68-9870-dfe0-1823-b38a141c21ae", + "name": "NVIDIA RTX PRO 6000 Blackwell Server Edition", + "bus_id": "00000000:4C:00.0", + "vbios": "98.02.67.00.0A", + "compute_capability": "12.0", + "backend": "cublasLt", + "status": "OK", + "power_limit_w": 600, + "default_power_limit_w": 600, + "calibrated_peak_power_w": 600.249, + "max_graphics_clock_mhz": 2430, + "max_memory_clock_mhz": 12481, + "locked_graphics_clock_mhz": 2430, + "locked_memory_clock_mhz": 12481, + "baseline": { + "duration_sec": 15.003491683, + "samples": 15, + "avg_temp_c": 60.93333333333333, + "p95_temp_c": 66.3, + "avg_power_w": 96.53799999999998, + "p95_power_w": 102.759, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 3.5395358534781804, + "temp_cv_pct": 5.441938781376757, + "clock_drift_pct": 0 + }, + "steady": { + "duration_sec": 527.066986904, + "samples": 527, + "avg_temp_c": 74.3965844402277, + "p95_temp_c": 75, + "avg_power_w": 518.9630170777989, + "p95_power_w": 533.52, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 97.55977229601518, + "avg_mem_usage_pct": 38.588235294117645, + "clock_cv_pct": 0, + "power_cv_pct": 12.701708089948635, + "temp_cv_pct": 3.4531352747066717, + "clock_drift_pct": 0 + }, + "cooldown": { + "duration_sec": 120.648768617, + "samples": 114, + "avg_temp_c": 51.8421052631579, + "p95_temp_c": 54, + "avg_power_w": 88.07710526315786, + "p95_power_w": 90.59700000000001, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 1.2064879357740999, + "temp_cv_pct": 2.115803591518185, + "clock_drift_pct": 0 + }, + "throttle_counters": { + "sw_power_cap_us": 0, + "sw_thermal_slowdown_us": 0, + "sync_boost_us": 0, + "hw_thermal_slowdown_us": 0, + "hw_power_brake_slowdown_us": 0 + }, + "precision_results": [ + { + "name": "fp16_tensor", + "category": "fp16_bf16", + "supported": true, + "lanes": 16, + "m": 11904, + "n": 11904, + "k": 11904, + "iterations": 27840, + "teraops_per_sec": 195.675631386624 + }, + { + "name": "fp32_tf32", + "category": "fp32_tf32", + "supported": true, + "lanes": 16, + "m": 8320, + "n": 8320, + "k": 8320, + "iterations": 27840, + "teraops_per_sec": 66.807922688 + }, + { + "name": "fp64", + "category": "fp64", + "supported": false, + "notes": "unsupported" + }, + { + "name": "fp8_e4m3", + "category": "fp8", + "supported": true, + "lanes": 16, + "m": 13696, + "n": 13696, + "k": 13696, + "iterations": 27840, + "teraops_per_sec": 298.015759794176 + }, + { + "name": "fp8_e5m2", + "category": "fp8", + "supported": false, + "notes": "unsupported" + } + ], + "scores": { + "compute_score": 560.4993138688001, + "power_sustain_score": 99.9585, + "thermal_sustain_score": 100, + "stability_score": 74.59658382010272, + "interconnect_score": 0, + "composite_score": 524.8679294396218 + }, + "degradation_reasons": [ + "variance_too_high" + ] + }, + { + "index": 2, + "uuid": "GPU-82b32f5b-5fca-9674-a845-cfd5da365d09", + "name": "NVIDIA RTX PRO 6000 Blackwell Server Edition", + "bus_id": "00000000:4E:00.0", + "vbios": "98.02.67.00.0A", + "compute_capability": "12.0", + "backend": "cublasLt", + "status": "OK", + "power_limit_w": 600, + "default_power_limit_w": 600, + "calibrated_peak_power_w": 600.2090000000001, + "max_graphics_clock_mhz": 2430, + "max_memory_clock_mhz": 12481, + "locked_graphics_clock_mhz": 2430, + "locked_memory_clock_mhz": 12481, + "baseline": { + "duration_sec": 15.003491683, + "samples": 15, + "avg_temp_c": 58.86666666666667, + "p95_temp_c": 64.3, + "avg_power_w": 101.12600000000002, + "p95_power_w": 109.22299999999998, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 4.773882905476738, + "temp_cv_pct": 5.646636857222156, + "clock_drift_pct": 0 + }, + "steady": { + "duration_sec": 527.066986904, + "samples": 527, + "avg_temp_c": 74.14800759013282, + "p95_temp_c": 75, + "avg_power_w": 528.8367552182166, + "p95_power_w": 544.247, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 97.5370018975332, + "avg_mem_usage_pct": 38.415559772296014, + "clock_cv_pct": 0, + "power_cv_pct": 12.766621799252786, + "temp_cv_pct": 3.59396235562364, + "clock_drift_pct": 0 + }, + "cooldown": { + "duration_sec": 120.648768617, + "samples": 114, + "avg_temp_c": 50.526315789473685, + "p95_temp_c": 53.349999999999994, + "avg_power_w": 91.43912280701771, + "p95_power_w": 94.1305, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 1.2921924053692504, + "temp_cv_pct": 2.44810135237115, + "clock_drift_pct": 0 + }, + "throttle_counters": { + "sw_power_cap_us": 0, + "sw_thermal_slowdown_us": 0, + "sync_boost_us": 0, + "hw_thermal_slowdown_us": 0, + "hw_power_brake_slowdown_us": 0 + }, + "precision_results": [ + { + "name": "fp16_tensor", + "category": "fp16_bf16", + "supported": true, + "lanes": 16, + "m": 11904, + "n": 11904, + "k": 11904, + "iterations": 27840, + "teraops_per_sec": 195.675631386624 + }, + { + "name": "fp32_tf32", + "category": "fp32_tf32", + "supported": true, + "lanes": 16, + "m": 8320, + "n": 8320, + "k": 8320, + "iterations": 27840, + "teraops_per_sec": 66.807922688 + }, + { + "name": "fp64", + "category": "fp64", + "supported": false, + "notes": "unsupported" + }, + { + "name": "fp8_e4m3", + "category": "fp8", + "supported": true, + "lanes": 16, + "m": 13696, + "n": 13696, + "k": 13696, + "iterations": 27840, + "teraops_per_sec": 298.015759794176 + }, + { + "name": "fp8_e5m2", + "category": "fp8", + "supported": false, + "notes": "unsupported" + } + ], + "scores": { + "compute_score": 560.4993138688001, + "power_sustain_score": 99.96516666666666, + "thermal_sustain_score": 100, + "stability_score": 74.46675640149442, + "interconnect_score": 0, + "composite_score": 524.6916139851322 + }, + "degradation_reasons": [ + "variance_too_high" + ] + }, + { + "index": 3, + "uuid": "GPU-c88562a9-54cd-c3cf-c3fc-ef6c68ce5228", + "name": "NVIDIA RTX PRO 6000 Blackwell Server Edition", + "bus_id": "00000000:4F:00.0", + "vbios": "98.02.67.00.0A", + "compute_capability": "12.0", + "backend": "cublasLt", + "status": "OK", + "power_limit_w": 600, + "default_power_limit_w": 600, + "calibrated_peak_power_w": 600.269, + "max_graphics_clock_mhz": 2430, + "max_memory_clock_mhz": 12481, + "locked_graphics_clock_mhz": 2430, + "locked_memory_clock_mhz": 12481, + "baseline": { + "duration_sec": 15.003491683, + "samples": 15, + "avg_temp_c": 60.93333333333333, + "p95_temp_c": 66.3, + "avg_power_w": 96.50600000000001, + "p95_power_w": 104.157, + "avg_graphics_clock_mhz": 2425.2, + "p95_graphics_clock_mhz": 2430, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0.16160249004012395, + "power_cv_pct": 3.9969044026420764, + "temp_cv_pct": 5.441938781376757, + "clock_drift_pct": 0 + }, + "steady": { + "duration_sec": 527.066986904, + "samples": 527, + "avg_temp_c": 75.23908918406072, + "p95_temp_c": 76, + "avg_power_w": 524.9754079696397, + "p95_power_w": 540.1080000000001, + "avg_graphics_clock_mhz": 2422.0607210626185, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 97.66223908918406, + "avg_mem_usage_pct": 38.6584440227704, + "clock_cv_pct": 0.02866653390270464, + "power_cv_pct": 12.770181756684392, + "temp_cv_pct": 3.444650082421283, + "clock_drift_pct": 0 + }, + "cooldown": { + "duration_sec": 120.648768617, + "samples": 114, + "avg_temp_c": 52.44736842105263, + "p95_temp_c": 55, + "avg_power_w": 88.59570175438584, + "p95_power_w": 90.821, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 1.1068190849003288, + "temp_cv_pct": 2.1442860694231825, + "clock_drift_pct": 0 + }, + "throttle_counters": { + "sw_power_cap_us": 0, + "sw_thermal_slowdown_us": 0, + "sync_boost_us": 0, + "hw_thermal_slowdown_us": 0, + "hw_power_brake_slowdown_us": 0 + }, + "precision_results": [ + { + "name": "fp16_tensor", + "category": "fp16_bf16", + "supported": true, + "lanes": 16, + "m": 11904, + "n": 11904, + "k": 11904, + "iterations": 27840, + "teraops_per_sec": 195.675631386624 + }, + { + "name": "fp32_tf32", + "category": "fp32_tf32", + "supported": true, + "lanes": 16, + "m": 8320, + "n": 8320, + "k": 8320, + "iterations": 27840, + "teraops_per_sec": 66.807922688 + }, + { + "name": "fp64", + "category": "fp64", + "supported": false, + "notes": "unsupported" + }, + { + "name": "fp8_e4m3", + "category": "fp8", + "supported": true, + "lanes": 16, + "m": 13696, + "n": 13696, + "k": 13696, + "iterations": 27840, + "teraops_per_sec": 298.015759794176 + }, + { + "name": "fp8_e5m2", + "category": "fp8", + "supported": false, + "notes": "unsupported" + } + ], + "scores": { + "compute_score": 560.4993138688001, + "power_sustain_score": 99.95516666666667, + "thermal_sustain_score": 100, + "stability_score": 74.3449703510204, + "interconnect_score": 0, + "composite_score": 524.5125540011004 + }, + "degradation_reasons": [ + "variance_too_high" + ] + }, + { + "index": 4, + "uuid": "GPU-19d64ace-f127-d9a8-1999-ffe37453b930", + "name": "NVIDIA RTX PRO 6000 Blackwell Server Edition", + "bus_id": "00000000:CB:00.0", + "vbios": "98.02.67.00.0A", + "compute_capability": "12.0", + "backend": "cublasLt", + "status": "OK", + "power_limit_w": 600, + "default_power_limit_w": 600, + "calibrated_peak_power_w": 600.3885, + "max_graphics_clock_mhz": 2430, + "max_memory_clock_mhz": 12481, + "locked_graphics_clock_mhz": 2430, + "locked_memory_clock_mhz": 12481, + "baseline": { + "duration_sec": 15.003491683, + "samples": 15, + "avg_temp_c": 51.13333333333333, + "p95_temp_c": 56.3, + "avg_power_w": 88.92800000000003, + "p95_power_w": 94.11599999999999, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 3.1242071347047644, + "temp_cv_pct": 6.0538131766591405, + "clock_drift_pct": 0 + }, + "steady": { + "duration_sec": 527.066986904, + "samples": 527, + "avg_temp_c": 68.54459203036053, + "p95_temp_c": 69, + "avg_power_w": 510.0916698292221, + "p95_power_w": 523.961, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 97.91271347248576, + "avg_mem_usage_pct": 37.77988614800759, + "clock_cv_pct": 0, + "power_cv_pct": 12.20243417529326, + "temp_cv_pct": 3.207085724241198, + "clock_drift_pct": 0 + }, + "cooldown": { + "duration_sec": 120.648768617, + "samples": 114, + "avg_temp_c": 46.578947368421055, + "p95_temp_c": 50, + "avg_power_w": 85.92385964912292, + "p95_power_w": 88.47, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 1.1661917565791895, + "temp_cv_pct": 2.8844929564663992, + "clock_drift_pct": 0 + }, + "throttle_counters": { + "sw_power_cap_us": 0, + "sw_thermal_slowdown_us": 0, + "sync_boost_us": 0, + "hw_thermal_slowdown_us": 0, + "hw_power_brake_slowdown_us": 0 + }, + "precision_results": [ + { + "name": "fp16_tensor", + "category": "fp16_bf16", + "supported": true, + "lanes": 16, + "m": 11904, + "n": 11904, + "k": 11904, + "iterations": 27840, + "teraops_per_sec": 195.675631386624 + }, + { + "name": "fp32_tf32", + "category": "fp32_tf32", + "supported": true, + "lanes": 16, + "m": 8320, + "n": 8320, + "k": 8320, + "iterations": 27840, + "teraops_per_sec": 66.807922688 + }, + { + "name": "fp64", + "category": "fp64", + "supported": false, + "notes": "unsupported" + }, + { + "name": "fp8_e4m3", + "category": "fp8", + "supported": true, + "lanes": 16, + "m": 13696, + "n": 13696, + "k": 13696, + "iterations": 27840, + "teraops_per_sec": 298.015759794176 + }, + { + "name": "fp8_e5m2", + "category": "fp8", + "supported": false, + "notes": "unsupported" + } + ], + "scores": { + "compute_score": 560.4993138688001, + "power_sustain_score": 99.93525, + "thermal_sustain_score": 100, + "stability_score": 75.59513164941347, + "interconnect_score": 0, + "composite_score": 526.2475954590352 + }, + "degradation_reasons": [ + "variance_too_high" + ] + } + ], + "server_power": { + "available": true, + "idle_w": 1181.3333333333333, + "loaded_w": 4268.7692307692305, + "delta_w": 3087.4358974358975, + "gpu_reported_sum_w": 2589.9166034155605, + "reporting_ratio": 1.1920985769828312 + } +} \ No newline at end of file diff --git a/gpu-benchmark-20260413-052002/result.json b/gpu-benchmark-20260413-052002/result.json new file mode 100644 index 0000000..5ba822c --- /dev/null +++ b/gpu-benchmark-20260413-052002/result.json @@ -0,0 +1,922 @@ +{ + "benchmark_version": "1", + "generated_at": "2026-04-13T05:20:02.176595191Z", + "hostname": "debian", + "server_model": "G5500 V7", + "benchmark_profile": "standard", + "parallel_gpus": true, + "ramp_step": 6, + "ramp_total": 8, + "ramp_run_id": "ramp-20260413-035309", + "overall_status": "OK", + "selected_gpu_indices": [ + 0, + 1, + 2, + 3, + 4, + 5 + ], + "findings": [ + "All 6 GPU(s) passed the benchmark.", + "GPU 0 showed unstable clocks/power over the benchmark window.", + "GPU 1 showed unstable clocks/power over the benchmark window.", + "GPU 2 showed unstable clocks/power over the benchmark window.", + "GPU 3 showed unstable clocks/power over the benchmark window.", + "GPU 4 showed unstable clocks/power over the benchmark window.", + "GPU 5 showed unstable clocks/power over the benchmark window." + ], + "normalization": { + "status": "full", + "gpus": [ + { + "index": 0, + "persistence_mode": "applied", + "gpu_clock_lock_mhz": 2430, + "gpu_clock_lock_status": "applied", + "memory_clock_lock_mhz": 12481, + "memory_clock_lock_status": "applied" + }, + { + "index": 1, + "persistence_mode": "applied", + "gpu_clock_lock_mhz": 2430, + "gpu_clock_lock_status": "applied", + "memory_clock_lock_mhz": 12481, + "memory_clock_lock_status": "applied" + }, + { + "index": 2, + "persistence_mode": "applied", + "gpu_clock_lock_mhz": 2430, + "gpu_clock_lock_status": "applied", + "memory_clock_lock_mhz": 12481, + "memory_clock_lock_status": "applied" + }, + { + "index": 3, + "persistence_mode": "applied", + "gpu_clock_lock_mhz": 2430, + "gpu_clock_lock_status": "applied", + "memory_clock_lock_mhz": 12481, + "memory_clock_lock_status": "applied" + }, + { + "index": 4, + "persistence_mode": "applied", + "gpu_clock_lock_mhz": 2430, + "gpu_clock_lock_status": "applied", + "memory_clock_lock_mhz": 12481, + "memory_clock_lock_status": "applied" + }, + { + "index": 5, + "persistence_mode": "applied", + "gpu_clock_lock_mhz": 2430, + "gpu_clock_lock_status": "applied", + "memory_clock_lock_mhz": 12481, + "memory_clock_lock_status": "applied" + } + ] + }, + "host_config": { + "cpu_model": "INTEL(R) XEON(R) GOLD 6530", + "cpu_sockets": 2, + "cpu_cores": 64, + "cpu_threads": 128, + "mem_total_gib": 125.36544799804688 + }, + "cpu_load": { + "avg_pct": 3.8, + "max_pct": 4.9, + "p95_pct": 4.9, + "samples": 85, + "status": "ok" + }, + "gpus": [ + { + "index": 0, + "uuid": "GPU-8281bc14-c076-e306-75b1-e91596be45e7", + "name": "NVIDIA RTX PRO 6000 Blackwell Server Edition", + "bus_id": "00000000:4B:00.0", + "vbios": "98.02.67.00.0A", + "compute_capability": "12.0", + "backend": "cublasLt", + "status": "OK", + "power_limit_w": 600, + "default_power_limit_w": 600, + "calibrated_peak_power_w": 600.3, + "max_graphics_clock_mhz": 2430, + "max_memory_clock_mhz": 12481, + "locked_graphics_clock_mhz": 2430, + "locked_memory_clock_mhz": 12481, + "baseline": { + "duration_sec": 16.053059923, + "samples": 16, + "avg_temp_c": 58.625, + "p95_temp_c": 64.25, + "avg_power_w": 97.95062499999999, + "p95_power_w": 104.6425, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 3.5538544062758373, + "temp_cv_pct": 5.843151217929519, + "clock_drift_pct": 0 + }, + "steady": { + "duration_sec": 529.066805487, + "samples": 529, + "avg_temp_c": 73.03024574669188, + "p95_temp_c": 74, + "avg_power_w": 516.1044234404536, + "p95_power_w": 532.3919999999999, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 97.3742911153119, + "avg_mem_usage_pct": 38.68620037807183, + "clock_cv_pct": 0, + "power_cv_pct": 13.26954724794044, + "temp_cv_pct": 3.837795991456247, + "clock_drift_pct": 0 + }, + "cooldown": { + "duration_sec": 120.562160642, + "samples": 114, + "avg_temp_c": 49.473684210526315, + "p95_temp_c": 52.349999999999994, + "avg_power_w": 89.25877192982453, + "p95_power_w": 91.7115, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 1.123523691008624, + "temp_cv_pct": 2.2750375790560877, + "clock_drift_pct": 0 + }, + "throttle_counters": { + "sw_power_cap_us": 0, + "sw_thermal_slowdown_us": 0, + "sync_boost_us": 0, + "hw_thermal_slowdown_us": 0, + "hw_power_brake_slowdown_us": 0 + }, + "precision_results": [ + { + "name": "fp16_tensor", + "category": "fp16_bf16", + "supported": true, + "lanes": 16, + "m": 11904, + "n": 11904, + "k": 11904, + "iterations": 27840, + "teraops_per_sec": 195.675631386624 + }, + { + "name": "fp32_tf32", + "category": "fp32_tf32", + "supported": true, + "lanes": 16, + "m": 8320, + "n": 8320, + "k": 8320, + "iterations": 27840, + "teraops_per_sec": 66.807922688 + }, + { + "name": "fp64", + "category": "fp64", + "supported": false, + "notes": "unsupported" + }, + { + "name": "fp8_e4m3", + "category": "fp8", + "supported": true, + "lanes": 16, + "m": 13696, + "n": 13696, + "k": 13696, + "iterations": 27840, + "teraops_per_sec": 298.015759794176 + }, + { + "name": "fp8_e5m2", + "category": "fp8", + "supported": false, + "notes": "unsupported" + } + ], + "scores": { + "compute_score": 560.4993138688001, + "power_sustain_score": 99.95, + "thermal_sustain_score": 100, + "stability_score": 73.46090550411913, + "interconnect_score": 0, + "composite_score": 523.2694157811586 + }, + "degradation_reasons": [ + "variance_too_high" + ] + }, + { + "index": 1, + "uuid": "GPU-a66d6d68-9870-dfe0-1823-b38a141c21ae", + "name": "NVIDIA RTX PRO 6000 Blackwell Server Edition", + "bus_id": "00000000:4C:00.0", + "vbios": "98.02.67.00.0A", + "compute_capability": "12.0", + "backend": "cublasLt", + "status": "OK", + "power_limit_w": 600, + "default_power_limit_w": 600, + "calibrated_peak_power_w": 600.204, + "max_graphics_clock_mhz": 2430, + "max_memory_clock_mhz": 12481, + "locked_graphics_clock_mhz": 2430, + "locked_memory_clock_mhz": 12481, + "baseline": { + "duration_sec": 16.053059923, + "samples": 16, + "avg_temp_c": 60.875, + "p95_temp_c": 66.25, + "avg_power_w": 96.51062499999996, + "p95_power_w": 102.8975, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 3.6149766986050422, + "temp_cv_pct": 5.287225129192498, + "clock_drift_pct": 0 + }, + "steady": { + "duration_sec": 529.066805487, + "samples": 529, + "avg_temp_c": 74.37240075614366, + "p95_temp_c": 75, + "avg_power_w": 517.2094328922498, + "p95_power_w": 533.688, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 97.19281663516068, + "avg_mem_usage_pct": 37.65217391304348, + "clock_cv_pct": 0, + "power_cv_pct": 13.84280954041754, + "temp_cv_pct": 3.6879251320846564, + "clock_drift_pct": 0 + }, + "cooldown": { + "duration_sec": 120.562160642, + "samples": 114, + "avg_temp_c": 51.3421052631579, + "p95_temp_c": 54, + "avg_power_w": 88.02447368421053, + "p95_power_w": 90.24149999999999, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 1.1123685256067952, + "temp_cv_pct": 1.8392656383518242, + "clock_drift_pct": 0 + }, + "throttle_counters": { + "sw_power_cap_us": 0, + "sw_thermal_slowdown_us": 0, + "sync_boost_us": 0, + "hw_thermal_slowdown_us": 0, + "hw_power_brake_slowdown_us": 0 + }, + "precision_results": [ + { + "name": "fp16_tensor", + "category": "fp16_bf16", + "supported": true, + "lanes": 16, + "m": 11904, + "n": 11904, + "k": 11904, + "iterations": 27840, + "teraops_per_sec": 195.675631386624 + }, + { + "name": "fp32_tf32", + "category": "fp32_tf32", + "supported": true, + "lanes": 16, + "m": 8320, + "n": 8320, + "k": 8320, + "iterations": 27840, + "teraops_per_sec": 66.807922688 + }, + { + "name": "fp64", + "category": "fp64", + "supported": false, + "notes": "unsupported" + }, + { + "name": "fp8_e4m3", + "category": "fp8", + "supported": true, + "lanes": 16, + "m": 13696, + "n": 13696, + "k": 13696, + "iterations": 27840, + "teraops_per_sec": 298.015759794176 + }, + { + "name": "fp8_e5m2", + "category": "fp8", + "supported": false, + "notes": "unsupported" + } + ], + "scores": { + "compute_score": 560.4993138688001, + "power_sustain_score": 99.96600000000001, + "thermal_sustain_score": 100, + "stability_score": 72.31438091916492, + "interconnect_score": 0, + "composite_score": 521.6763021566902 + }, + "degradation_reasons": [ + "variance_too_high" + ] + }, + { + "index": 2, + "uuid": "GPU-82b32f5b-5fca-9674-a845-cfd5da365d09", + "name": "NVIDIA RTX PRO 6000 Blackwell Server Edition", + "bus_id": "00000000:4E:00.0", + "vbios": "98.02.67.00.0A", + "compute_capability": "12.0", + "backend": "cublasLt", + "status": "OK", + "power_limit_w": 600, + "default_power_limit_w": 600, + "calibrated_peak_power_w": 600.238, + "max_graphics_clock_mhz": 2430, + "max_memory_clock_mhz": 12481, + "locked_graphics_clock_mhz": 2430, + "locked_memory_clock_mhz": 12481, + "baseline": { + "duration_sec": 16.053059923, + "samples": 16, + "avg_temp_c": 58.625, + "p95_temp_c": 64.25, + "avg_power_w": 99.44687500000002, + "p95_power_w": 107.8875, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 4.262486998727176, + "temp_cv_pct": 5.811945914899365, + "clock_drift_pct": 0 + }, + "steady": { + "duration_sec": 529.066805487, + "samples": 529, + "avg_temp_c": 74.24385633270322, + "p95_temp_c": 75, + "avg_power_w": 527.5048582230622, + "p95_power_w": 544.214, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 97.19092627599244, + "avg_mem_usage_pct": 37.54820415879017, + "clock_cv_pct": 0, + "power_cv_pct": 13.666807767183627, + "temp_cv_pct": 3.857745887762385, + "clock_drift_pct": 0 + }, + "cooldown": { + "duration_sec": 120.562160642, + "samples": 114, + "avg_temp_c": 50.37719298245614, + "p95_temp_c": 53, + "avg_power_w": 92.15017543859648, + "p95_power_w": 94.5575, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 1.1369648747132337, + "temp_cv_pct": 1.991810636420684, + "clock_drift_pct": 0 + }, + "throttle_counters": { + "sw_power_cap_us": 0, + "sw_thermal_slowdown_us": 0, + "sync_boost_us": 0, + "hw_thermal_slowdown_us": 0, + "hw_power_brake_slowdown_us": 0 + }, + "precision_results": [ + { + "name": "fp16_tensor", + "category": "fp16_bf16", + "supported": true, + "lanes": 16, + "m": 11904, + "n": 11904, + "k": 11904, + "iterations": 27840, + "teraops_per_sec": 195.675631386624 + }, + { + "name": "fp32_tf32", + "category": "fp32_tf32", + "supported": true, + "lanes": 16, + "m": 8320, + "n": 8320, + "k": 8320, + "iterations": 27840, + "teraops_per_sec": 66.807922688 + }, + { + "name": "fp64", + "category": "fp64", + "supported": false, + "notes": "unsupported" + }, + { + "name": "fp8_e4m3", + "category": "fp8", + "supported": true, + "lanes": 16, + "m": 13696, + "n": 13696, + "k": 13696, + "iterations": 27840, + "teraops_per_sec": 298.015759794176 + }, + { + "name": "fp8_e5m2", + "category": "fp8", + "supported": false, + "notes": "unsupported" + } + ], + "scores": { + "compute_score": 560.4993138688001, + "power_sustain_score": 99.96033333333332, + "thermal_sustain_score": 100, + "stability_score": 72.66638446563275, + "interconnect_score": 0, + "composite_score": 522.1647822782088 + }, + "degradation_reasons": [ + "variance_too_high" + ] + }, + { + "index": 3, + "uuid": "GPU-c88562a9-54cd-c3cf-c3fc-ef6c68ce5228", + "name": "NVIDIA RTX PRO 6000 Blackwell Server Edition", + "bus_id": "00000000:4F:00.0", + "vbios": "98.02.67.00.0A", + "compute_capability": "12.0", + "backend": "cublasLt", + "status": "OK", + "power_limit_w": 600, + "default_power_limit_w": 600, + "calibrated_peak_power_w": 600.284, + "max_graphics_clock_mhz": 2430, + "max_memory_clock_mhz": 12481, + "locked_graphics_clock_mhz": 2430, + "locked_memory_clock_mhz": 12481, + "baseline": { + "duration_sec": 16.053059923, + "samples": 16, + "avg_temp_c": 60.8125, + "p95_temp_c": 66.25, + "avg_power_w": 96.78375000000001, + "p95_power_w": 103.835, + "avg_graphics_clock_mhz": 2424.5, + "p95_graphics_clock_mhz": 2430, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0.1529428436192135, + "power_cv_pct": 4.0949357562590665, + "temp_cv_pct": 5.3036241921647145, + "clock_drift_pct": 0 + }, + "steady": { + "duration_sec": 529.066805487, + "samples": 529, + "avg_temp_c": 75.4366729678639, + "p95_temp_c": 76, + "avg_power_w": 523.9496975425328, + "p95_power_w": 540.62, + "avg_graphics_clock_mhz": 2422.0151228733457, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 97.35349716446125, + "avg_mem_usage_pct": 38.52741020793951, + "clock_cv_pct": 0.014347440279947565, + "power_cv_pct": 13.778878893679746, + "temp_cv_pct": 3.609529116252534, + "clock_drift_pct": 0 + }, + "cooldown": { + "duration_sec": 120.562160642, + "samples": 114, + "avg_temp_c": 52.333333333333336, + "p95_temp_c": 54.349999999999994, + "avg_power_w": 88.46842105263153, + "p95_power_w": 90.41649999999998, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 1.007904824255818, + "temp_cv_pct": 1.8015459393287852, + "clock_drift_pct": 0 + }, + "throttle_counters": { + "sw_power_cap_us": 0, + "sw_thermal_slowdown_us": 0, + "sync_boost_us": 0, + "hw_thermal_slowdown_us": 0, + "hw_power_brake_slowdown_us": 0 + }, + "precision_results": [ + { + "name": "fp16_tensor", + "category": "fp16_bf16", + "supported": true, + "lanes": 16, + "m": 11904, + "n": 11904, + "k": 11904, + "iterations": 27840, + "teraops_per_sec": 195.675631386624 + }, + { + "name": "fp32_tf32", + "category": "fp32_tf32", + "supported": true, + "lanes": 16, + "m": 8320, + "n": 8320, + "k": 8320, + "iterations": 27840, + "teraops_per_sec": 66.807922688 + }, + { + "name": "fp64", + "category": "fp64", + "supported": false, + "notes": "unsupported" + }, + { + "name": "fp8_e4m3", + "category": "fp8", + "supported": true, + "lanes": 16, + "m": 13696, + "n": 13696, + "k": 13696, + "iterations": 27840, + "teraops_per_sec": 298.015759794176 + }, + { + "name": "fp8_e5m2", + "category": "fp8", + "supported": false, + "notes": "unsupported" + } + ], + "scores": { + "compute_score": 560.4993138688001, + "power_sustain_score": 99.95266666666667, + "thermal_sustain_score": 100, + "stability_score": 72.38485245152071, + "interconnect_score": 0, + "composite_score": 521.7638402842446 + }, + "degradation_reasons": [ + "variance_too_high" + ] + }, + { + "index": 4, + "uuid": "GPU-19d64ace-f127-d9a8-1999-ffe37453b930", + "name": "NVIDIA RTX PRO 6000 Blackwell Server Edition", + "bus_id": "00000000:CB:00.0", + "vbios": "98.02.67.00.0A", + "compute_capability": "12.0", + "backend": "cublasLt", + "status": "OK", + "power_limit_w": 600, + "default_power_limit_w": 600, + "calibrated_peak_power_w": 600.32, + "max_graphics_clock_mhz": 2430, + "max_memory_clock_mhz": 12481, + "locked_graphics_clock_mhz": 2430, + "locked_memory_clock_mhz": 12481, + "baseline": { + "duration_sec": 16.053059923, + "samples": 16, + "avg_temp_c": 54.875, + "p95_temp_c": 60.25, + "avg_power_w": 92.22937500000002, + "p95_power_w": 98.205, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 3.382386785564523, + "temp_cv_pct": 5.9006076751656575, + "clock_drift_pct": 0 + }, + "steady": { + "duration_sec": 529.066805487, + "samples": 529, + "avg_temp_c": 69.15879017013232, + "p95_temp_c": 70, + "avg_power_w": 509.86896030245765, + "p95_power_w": 525.116, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 97.53308128544424, + "avg_mem_usage_pct": 37.90359168241966, + "clock_cv_pct": 0, + "power_cv_pct": 13.0988749354967, + "temp_cv_pct": 3.6747327643850913, + "clock_drift_pct": 0 + }, + "cooldown": { + "duration_sec": 120.562160642, + "samples": 114, + "avg_temp_c": 46.5, + "p95_temp_c": 49.349999999999994, + "avg_power_w": 85.81798245614041, + "p95_power_w": 87.93700000000001, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 0.9711049131181364, + "temp_cv_pct": 2.503564953095039, + "clock_drift_pct": 0 + }, + "throttle_counters": { + "sw_power_cap_us": 0, + "sw_thermal_slowdown_us": 0, + "sync_boost_us": 0, + "hw_thermal_slowdown_us": 0, + "hw_power_brake_slowdown_us": 0 + }, + "precision_results": [ + { + "name": "fp16_tensor", + "category": "fp16_bf16", + "supported": true, + "lanes": 16, + "m": 11904, + "n": 11904, + "k": 11904, + "iterations": 27840, + "teraops_per_sec": 195.675631386624 + }, + { + "name": "fp32_tf32", + "category": "fp32_tf32", + "supported": true, + "lanes": 16, + "m": 8320, + "n": 8320, + "k": 8320, + "iterations": 27840, + "teraops_per_sec": 66.807922688 + }, + { + "name": "fp64", + "category": "fp64", + "supported": false, + "notes": "unsupported" + }, + { + "name": "fp8_e4m3", + "category": "fp8", + "supported": true, + "lanes": 16, + "m": 13696, + "n": 13696, + "k": 13696, + "iterations": 27840, + "teraops_per_sec": 298.015759794176 + }, + { + "name": "fp8_e5m2", + "category": "fp8", + "supported": false, + "notes": "unsupported" + } + ], + "scores": { + "compute_score": 560.4993138688001, + "power_sustain_score": 99.94666666666666, + "thermal_sustain_score": 100, + "stability_score": 73.80225012900661, + "interconnect_score": 0, + "composite_score": 523.7449218546949 + }, + "degradation_reasons": [ + "variance_too_high" + ] + }, + { + "index": 5, + "uuid": "GPU-8d5d3b52-6221-c572-e4dc-3eb34699d5a4", + "name": "NVIDIA RTX PRO 6000 Blackwell Server Edition", + "bus_id": "00000000:CC:00.0", + "vbios": "98.02.67.00.0A", + "compute_capability": "12.0", + "backend": "cublasLt", + "status": "OK", + "power_limit_w": 600, + "default_power_limit_w": 600, + "calibrated_peak_power_w": 600.272, + "max_graphics_clock_mhz": 2430, + "max_memory_clock_mhz": 12481, + "locked_graphics_clock_mhz": 2430, + "locked_memory_clock_mhz": 12481, + "baseline": { + "duration_sec": 16.053059923, + "samples": 16, + "avg_temp_c": 53.375, + "p95_temp_c": 58.25, + "avg_power_w": 93.08749999999998, + "p95_power_w": 98.725, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 3.571040763944285, + "temp_cv_pct": 5.416877520477929, + "clock_drift_pct": 0 + }, + "steady": { + "duration_sec": 529.066805487, + "samples": 529, + "avg_temp_c": 71.84499054820417, + "p95_temp_c": 73, + "avg_power_w": 518.6966351606807, + "p95_power_w": 536.0600000000001, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 97.17958412098298, + "avg_mem_usage_pct": 38.36862003780718, + "clock_cv_pct": 0, + "power_cv_pct": 13.736936451172705, + "temp_cv_pct": 3.995613888278051, + "clock_drift_pct": 0 + }, + "cooldown": { + "duration_sec": 120.562160642, + "samples": 114, + "avg_temp_c": 49.01754385964912, + "p95_temp_c": 51, + "avg_power_w": 88.78377192982451, + "p95_power_w": 90.8905, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 0.9946861228149281, + "temp_cv_pct": 1.9293961305574223, + "clock_drift_pct": 0 + }, + "throttle_counters": { + "sw_power_cap_us": 0, + "sw_thermal_slowdown_us": 0, + "sync_boost_us": 0, + "hw_thermal_slowdown_us": 0, + "hw_power_brake_slowdown_us": 0 + }, + "precision_results": [ + { + "name": "fp16_tensor", + "category": "fp16_bf16", + "supported": true, + "lanes": 16, + "m": 11904, + "n": 11904, + "k": 11904, + "iterations": 27840, + "teraops_per_sec": 195.675631386624 + }, + { + "name": "fp32_tf32", + "category": "fp32_tf32", + "supported": true, + "lanes": 16, + "m": 8320, + "n": 8320, + "k": 8320, + "iterations": 27840, + "teraops_per_sec": 66.807922688 + }, + { + "name": "fp64", + "category": "fp64", + "supported": false, + "notes": "unsupported" + }, + { + "name": "fp8_e4m3", + "category": "fp8", + "supported": true, + "lanes": 16, + "m": 13696, + "n": 13696, + "k": 13696, + "iterations": 27840, + "teraops_per_sec": 298.015759794176 + }, + { + "name": "fp8_e5m2", + "category": "fp8", + "supported": false, + "notes": "unsupported" + } + ], + "scores": { + "compute_score": 560.4993138688001, + "power_sustain_score": 99.95466666666665, + "thermal_sustain_score": 100, + "stability_score": 72.5261270976546, + "interconnect_score": 0, + "composite_score": 521.9634826377489 + }, + "degradation_reasons": [ + "variance_too_high" + ] + } + ], + "server_power": { + "available": true, + "idle_w": 1228, + "loaded_w": 4799.076923076923, + "delta_w": 3571.076923076923, + "gpu_reported_sum_w": 3113.334007561437, + "reporting_ratio": 1.1470266005522547 + } +} \ No newline at end of file diff --git a/gpu-benchmark-20260413-053511/result.json b/gpu-benchmark-20260413-053511/result.json new file mode 100644 index 0000000..f232c1d --- /dev/null +++ b/gpu-benchmark-20260413-053511/result.json @@ -0,0 +1,1068 @@ +{ + "benchmark_version": "1", + "generated_at": "2026-04-13T05:35:11.571022479Z", + "hostname": "debian", + "server_model": "G5500 V7", + "benchmark_profile": "standard", + "parallel_gpus": true, + "ramp_step": 7, + "ramp_total": 8, + "ramp_run_id": "ramp-20260413-035309", + "overall_status": "OK", + "selected_gpu_indices": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6 + ], + "findings": [ + "All 7 GPU(s) passed the benchmark.", + "GPU 0 showed unstable clocks/power over the benchmark window.", + "GPU 1 showed unstable clocks/power over the benchmark window.", + "GPU 2 showed unstable clocks/power over the benchmark window.", + "GPU 3 showed unstable clocks/power over the benchmark window.", + "GPU 4 showed unstable clocks/power over the benchmark window.", + "GPU 5 showed unstable clocks/power over the benchmark window.", + "GPU 6 showed unstable clocks/power over the benchmark window." + ], + "normalization": { + "status": "full", + "gpus": [ + { + "index": 0, + "persistence_mode": "applied", + "gpu_clock_lock_mhz": 2430, + "gpu_clock_lock_status": "applied", + "memory_clock_lock_mhz": 12481, + "memory_clock_lock_status": "applied" + }, + { + "index": 1, + "persistence_mode": "applied", + "gpu_clock_lock_mhz": 2430, + "gpu_clock_lock_status": "applied", + "memory_clock_lock_mhz": 12481, + "memory_clock_lock_status": "applied" + }, + { + "index": 2, + "persistence_mode": "applied", + "gpu_clock_lock_mhz": 2430, + "gpu_clock_lock_status": "applied", + "memory_clock_lock_mhz": 12481, + "memory_clock_lock_status": "applied" + }, + { + "index": 3, + "persistence_mode": "applied", + "gpu_clock_lock_mhz": 2430, + "gpu_clock_lock_status": "applied", + "memory_clock_lock_mhz": 12481, + "memory_clock_lock_status": "applied" + }, + { + "index": 4, + "persistence_mode": "applied", + "gpu_clock_lock_mhz": 2430, + "gpu_clock_lock_status": "applied", + "memory_clock_lock_mhz": 12481, + "memory_clock_lock_status": "applied" + }, + { + "index": 5, + "persistence_mode": "applied", + "gpu_clock_lock_mhz": 2430, + "gpu_clock_lock_status": "applied", + "memory_clock_lock_mhz": 12481, + "memory_clock_lock_status": "applied" + }, + { + "index": 6, + "persistence_mode": "applied", + "gpu_clock_lock_mhz": 2430, + "gpu_clock_lock_status": "applied", + "memory_clock_lock_mhz": 12481, + "memory_clock_lock_status": "applied" + } + ] + }, + "host_config": { + "cpu_model": "INTEL(R) XEON(R) GOLD 6530", + "cpu_sockets": 2, + "cpu_cores": 64, + "cpu_threads": 128, + "mem_total_gib": 125.36544799804688 + }, + "cpu_load": { + "avg_pct": 4.4, + "max_pct": 5.7, + "p95_pct": 5.6, + "samples": 84, + "status": "ok" + }, + "gpus": [ + { + "index": 0, + "uuid": "GPU-8281bc14-c076-e306-75b1-e91596be45e7", + "name": "NVIDIA RTX PRO 6000 Blackwell Server Edition", + "bus_id": "00000000:4B:00.0", + "vbios": "98.02.67.00.0A", + "compute_capability": "12.0", + "backend": "cublasLt", + "status": "OK", + "power_limit_w": 600, + "default_power_limit_w": 600, + "calibrated_peak_power_w": 600.314, + "max_graphics_clock_mhz": 2430, + "max_memory_clock_mhz": 12481, + "locked_graphics_clock_mhz": 2430, + "locked_memory_clock_mhz": 12481, + "baseline": { + "duration_sec": 16.04678938, + "samples": 16, + "avg_temp_c": 58.1875, + "p95_temp_c": 64, + "avg_power_w": 97.07624999999999, + "p95_power_w": 104.1875, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 3.8062564565253245, + "temp_cv_pct": 5.641910198353984, + "clock_drift_pct": 0 + }, + "steady": { + "duration_sec": 531.065115947, + "samples": 531, + "avg_temp_c": 72.64783427495291, + "p95_temp_c": 74, + "avg_power_w": 510.3100188323918, + "p95_power_w": 532.09, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 96.99435028248588, + "avg_mem_usage_pct": 38.71939736346516, + "clock_cv_pct": 0, + "power_cv_pct": 14.24939934825413, + "temp_cv_pct": 4.027206872308068, + "clock_drift_pct": 0 + }, + "cooldown": { + "duration_sec": 120.372856967, + "samples": 114, + "avg_temp_c": 50.12280701754386, + "p95_temp_c": 52, + "avg_power_w": 89.38657894736835, + "p95_power_w": 91.3775, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 0.9290319187872187, + "temp_cv_pct": 1.8524492295973676, + "clock_drift_pct": 0 + }, + "throttle_counters": { + "sw_power_cap_us": 0, + "sw_thermal_slowdown_us": 0, + "sync_boost_us": 0, + "hw_thermal_slowdown_us": 0, + "hw_power_brake_slowdown_us": 0 + }, + "precision_results": [ + { + "name": "fp16_tensor", + "category": "fp16_bf16", + "supported": true, + "lanes": 16, + "m": 11904, + "n": 11904, + "k": 11904, + "iterations": 27840, + "teraops_per_sec": 195.675631386624 + }, + { + "name": "fp32_tf32", + "category": "fp32_tf32", + "supported": true, + "lanes": 16, + "m": 8320, + "n": 8320, + "k": 8320, + "iterations": 27840, + "teraops_per_sec": 66.807922688 + }, + { + "name": "fp64", + "category": "fp64", + "supported": false, + "notes": "unsupported" + }, + { + "name": "fp8_e4m3", + "category": "fp8", + "supported": true, + "lanes": 16, + "m": 13696, + "n": 13696, + "k": 13696, + "iterations": 27840, + "teraops_per_sec": 298.015759794176 + }, + { + "name": "fp8_e5m2", + "category": "fp8", + "supported": false, + "notes": "unsupported" + } + ], + "scores": { + "compute_score": 560.4993138688001, + "power_sustain_score": 99.94766666666668, + "thermal_sustain_score": 100, + "stability_score": 71.50120130349174, + "interconnect_score": 0, + "composite_score": 520.5214218839666 + }, + "degradation_reasons": [ + "variance_too_high" + ] + }, + { + "index": 1, + "uuid": "GPU-a66d6d68-9870-dfe0-1823-b38a141c21ae", + "name": "NVIDIA RTX PRO 6000 Blackwell Server Edition", + "bus_id": "00000000:4C:00.0", + "vbios": "98.02.67.00.0A", + "compute_capability": "12.0", + "backend": "cublasLt", + "status": "OK", + "power_limit_w": 600, + "default_power_limit_w": 600, + "calibrated_peak_power_w": 600.258, + "max_graphics_clock_mhz": 2430, + "max_memory_clock_mhz": 12481, + "locked_graphics_clock_mhz": 2430, + "locked_memory_clock_mhz": 12481, + "baseline": { + "duration_sec": 16.04678938, + "samples": 16, + "avg_temp_c": 60.5625, + "p95_temp_c": 66.25, + "avg_power_w": 95.88437499999998, + "p95_power_w": 102.965, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 3.962008587086827, + "temp_cv_pct": 5.748666450858805, + "clock_drift_pct": 0 + }, + "steady": { + "duration_sec": 531.065115947, + "samples": 531, + "avg_temp_c": 74.55743879472693, + "p95_temp_c": 76, + "avg_power_w": 516.4025988700567, + "p95_power_w": 534.14, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 96.92467043314501, + "avg_mem_usage_pct": 38.4030131826742, + "clock_cv_pct": 0, + "power_cv_pct": 14.717500783253215, + "temp_cv_pct": 3.946380183758861, + "clock_drift_pct": 0 + }, + "cooldown": { + "duration_sec": 120.372856967, + "samples": 114, + "avg_temp_c": 51.75438596491228, + "p95_temp_c": 54, + "avg_power_w": 88.00070175438614, + "p95_power_w": 90.0505, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 0.9245185916668872, + "temp_cv_pct": 1.7274837025411365, + "clock_drift_pct": 0 + }, + "throttle_counters": { + "sw_power_cap_us": 0, + "sw_thermal_slowdown_us": 0, + "sync_boost_us": 0, + "hw_thermal_slowdown_us": 0, + "hw_power_brake_slowdown_us": 0 + }, + "precision_results": [ + { + "name": "fp16_tensor", + "category": "fp16_bf16", + "supported": true, + "lanes": 16, + "m": 11904, + "n": 11904, + "k": 11904, + "iterations": 27840, + "teraops_per_sec": 195.675631386624 + }, + { + "name": "fp32_tf32", + "category": "fp32_tf32", + "supported": true, + "lanes": 16, + "m": 8320, + "n": 8320, + "k": 8320, + "iterations": 27840, + "teraops_per_sec": 66.807922688 + }, + { + "name": "fp64", + "category": "fp64", + "supported": false, + "notes": "unsupported" + }, + { + "name": "fp8_e4m3", + "category": "fp8", + "supported": true, + "lanes": 16, + "m": 13696, + "n": 13696, + "k": 13696, + "iterations": 27840, + "teraops_per_sec": 298.015759794176 + }, + { + "name": "fp8_e5m2", + "category": "fp8", + "supported": false, + "notes": "unsupported" + } + ], + "scores": { + "compute_score": 560.4993138688001, + "power_sustain_score": 99.957, + "thermal_sustain_score": 100, + "stability_score": 70.56499843349357, + "interconnect_score": 0, + "composite_score": 519.2174162086708 + }, + "degradation_reasons": [ + "variance_too_high" + ] + }, + { + "index": 2, + "uuid": "GPU-82b32f5b-5fca-9674-a845-cfd5da365d09", + "name": "NVIDIA RTX PRO 6000 Blackwell Server Edition", + "bus_id": "00000000:4E:00.0", + "vbios": "98.02.67.00.0A", + "compute_capability": "12.0", + "backend": "cublasLt", + "status": "OK", + "power_limit_w": 600, + "default_power_limit_w": 600, + "calibrated_peak_power_w": 600.2040000000001, + "max_graphics_clock_mhz": 2430, + "max_memory_clock_mhz": 12481, + "locked_graphics_clock_mhz": 2430, + "locked_memory_clock_mhz": 12481, + "baseline": { + "duration_sec": 16.04678938, + "samples": 16, + "avg_temp_c": 58.4375, + "p95_temp_c": 64.25, + "avg_power_w": 98.92562500000001, + "p95_power_w": 106.3175, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 3.941158294591091, + "temp_cv_pct": 6.079346092458744, + "clock_drift_pct": 0 + }, + "steady": { + "duration_sec": 531.065115947, + "samples": 531, + "avg_temp_c": 74.05461393596987, + "p95_temp_c": 75, + "avg_power_w": 525.6578342749533, + "p95_power_w": 544.575, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 96.76459510357816, + "avg_mem_usage_pct": 38.220338983050844, + "clock_cv_pct": 0, + "power_cv_pct": 14.662926314872996, + "temp_cv_pct": 4.151712016818929, + "clock_drift_pct": 0 + }, + "cooldown": { + "duration_sec": 120.372856967, + "samples": 114, + "avg_temp_c": 50.40350877192982, + "p95_temp_c": 53, + "avg_power_w": 91.19526315789464, + "p95_power_w": 93.51750000000001, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 1.0123803885891556, + "temp_cv_pct": 1.9964678593386713, + "clock_drift_pct": 0 + }, + "throttle_counters": { + "sw_power_cap_us": 0, + "sw_thermal_slowdown_us": 0, + "sync_boost_us": 0, + "hw_thermal_slowdown_us": 0, + "hw_power_brake_slowdown_us": 0 + }, + "precision_results": [ + { + "name": "fp16_tensor", + "category": "fp16_bf16", + "supported": true, + "lanes": 16, + "m": 11904, + "n": 11904, + "k": 11904, + "iterations": 27840, + "teraops_per_sec": 195.675631386624 + }, + { + "name": "fp32_tf32", + "category": "fp32_tf32", + "supported": true, + "lanes": 16, + "m": 8320, + "n": 8320, + "k": 8320, + "iterations": 27840, + "teraops_per_sec": 66.807922688 + }, + { + "name": "fp64", + "category": "fp64", + "supported": false, + "notes": "unsupported" + }, + { + "name": "fp8_e4m3", + "category": "fp8", + "supported": true, + "lanes": 16, + "m": 13696, + "n": 13696, + "k": 13696, + "iterations": 27840, + "teraops_per_sec": 298.015759794176 + }, + { + "name": "fp8_e5m2", + "category": "fp8", + "supported": false, + "notes": "unsupported" + } + ], + "scores": { + "compute_score": 560.4993138688001, + "power_sustain_score": 99.966, + "thermal_sustain_score": 100, + "stability_score": 70.674147370254, + "interconnect_score": 0, + "composite_score": 519.3779277098173 + }, + "degradation_reasons": [ + "variance_too_high" + ] + }, + { + "index": 3, + "uuid": "GPU-c88562a9-54cd-c3cf-c3fc-ef6c68ce5228", + "name": "NVIDIA RTX PRO 6000 Blackwell Server Edition", + "bus_id": "00000000:4F:00.0", + "vbios": "98.02.67.00.0A", + "compute_capability": "12.0", + "backend": "cublasLt", + "status": "OK", + "power_limit_w": 600, + "default_power_limit_w": 600, + "calibrated_peak_power_w": 600.318, + "max_graphics_clock_mhz": 2430, + "max_memory_clock_mhz": 12481, + "locked_graphics_clock_mhz": 2430, + "locked_memory_clock_mhz": 12481, + "baseline": { + "duration_sec": 16.04678938, + "samples": 16, + "avg_temp_c": 60.5625, + "p95_temp_c": 66.25, + "avg_power_w": 95.94187500000001, + "p95_power_w": 102.9875, + "avg_graphics_clock_mhz": 2426, + "p95_graphics_clock_mhz": 2430, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0.16488046166529266, + "power_cv_pct": 3.9902501502275056, + "temp_cv_pct": 5.748666450858805, + "clock_drift_pct": 0.08237232289950577 + }, + "steady": { + "duration_sec": 531.065115947, + "samples": 531, + "avg_temp_c": 75.23728813559322, + "p95_temp_c": 76, + "avg_power_w": 521.9880979284374, + "p95_power_w": 540.245, + "avg_graphics_clock_mhz": 2422.030131826742, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 97.00376647834275, + "avg_mem_usage_pct": 38.22975517890772, + "clock_cv_pct": 0.020232929020700553, + "power_cv_pct": 14.608025160871934, + "temp_cv_pct": 3.9179887726622487, + "clock_drift_pct": 0 + }, + "cooldown": { + "duration_sec": 120.372856967, + "samples": 114, + "avg_temp_c": 52.29824561403509, + "p95_temp_c": 54, + "avg_power_w": 88.52728070175428, + "p95_power_w": 90.384, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 0.8770139832436826, + "temp_cv_pct": 1.5597890556944174, + "clock_drift_pct": 0 + }, + "throttle_counters": { + "sw_power_cap_us": 0, + "sw_thermal_slowdown_us": 0, + "sync_boost_us": 0, + "hw_thermal_slowdown_us": 0, + "hw_power_brake_slowdown_us": 0 + }, + "precision_results": [ + { + "name": "fp16_tensor", + "category": "fp16_bf16", + "supported": true, + "lanes": 16, + "m": 11904, + "n": 11904, + "k": 11904, + "iterations": 27840, + "teraops_per_sec": 195.675631386624 + }, + { + "name": "fp32_tf32", + "category": "fp32_tf32", + "supported": true, + "lanes": 16, + "m": 8320, + "n": 8320, + "k": 8320, + "iterations": 27840, + "teraops_per_sec": 66.807922688 + }, + { + "name": "fp64", + "category": "fp64", + "supported": false, + "notes": "unsupported" + }, + { + "name": "fp8_e4m3", + "category": "fp8", + "supported": true, + "lanes": 16, + "m": 13696, + "n": 13696, + "k": 13696, + "iterations": 27840, + "teraops_per_sec": 298.015759794176 + }, + { + "name": "fp8_e5m2", + "category": "fp8", + "supported": false, + "notes": "unsupported" + } + ], + "scores": { + "compute_score": 560.4993138688001, + "power_sustain_score": 99.947, + "thermal_sustain_score": 100, + "stability_score": 70.70301796217333, + "interconnect_score": 0, + "composite_score": 519.4024083467765 + }, + "degradation_reasons": [ + "variance_too_high" + ] + }, + { + "index": 4, + "uuid": "GPU-19d64ace-f127-d9a8-1999-ffe37453b930", + "name": "NVIDIA RTX PRO 6000 Blackwell Server Edition", + "bus_id": "00000000:CB:00.0", + "vbios": "98.02.67.00.0A", + "compute_capability": "12.0", + "backend": "cublasLt", + "status": "OK", + "power_limit_w": 600, + "default_power_limit_w": 600, + "calibrated_peak_power_w": 600.3, + "max_graphics_clock_mhz": 2430, + "max_memory_clock_mhz": 12481, + "locked_graphics_clock_mhz": 2430, + "locked_memory_clock_mhz": 12481, + "baseline": { + "duration_sec": 16.04678938, + "samples": 16, + "avg_temp_c": 54.9375, + "p95_temp_c": 60.25, + "avg_power_w": 92.15750000000004, + "p95_power_w": 98.3625, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 3.352429847224058, + "temp_cv_pct": 5.879603895072832, + "clock_drift_pct": 0 + }, + "steady": { + "duration_sec": 531.065115947, + "samples": 531, + "avg_temp_c": 69.18455743879473, + "p95_temp_c": 70, + "avg_power_w": 508.81698681732604, + "p95_power_w": 525.855, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 97.14124293785311, + "avg_mem_usage_pct": 38.2090395480226, + "clock_cv_pct": 0, + "power_cv_pct": 14.121257794447253, + "temp_cv_pct": 4.066965153480223, + "clock_drift_pct": 0 + }, + "cooldown": { + "duration_sec": 120.372856967, + "samples": 114, + "avg_temp_c": 47.06140350877193, + "p95_temp_c": 49, + "avg_power_w": 85.78964912280702, + "p95_power_w": 87.43, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 0.8815489349736993, + "temp_cv_pct": 2.0351040458762837, + "clock_drift_pct": 0 + }, + "throttle_counters": { + "sw_power_cap_us": 0, + "sw_thermal_slowdown_us": 0, + "sync_boost_us": 0, + "hw_thermal_slowdown_us": 0, + "hw_power_brake_slowdown_us": 0 + }, + "precision_results": [ + { + "name": "fp16_tensor", + "category": "fp16_bf16", + "supported": true, + "lanes": 16, + "m": 11904, + "n": 11904, + "k": 11904, + "iterations": 27840, + "teraops_per_sec": 195.675631386624 + }, + { + "name": "fp32_tf32", + "category": "fp32_tf32", + "supported": true, + "lanes": 16, + "m": 8320, + "n": 8320, + "k": 8320, + "iterations": 27840, + "teraops_per_sec": 66.807922688 + }, + { + "name": "fp64", + "category": "fp64", + "supported": false, + "notes": "unsupported" + }, + { + "name": "fp8_e4m3", + "category": "fp8", + "supported": true, + "lanes": 16, + "m": 13696, + "n": 13696, + "k": 13696, + "iterations": 27840, + "teraops_per_sec": 298.015759794176 + }, + { + "name": "fp8_e5m2", + "category": "fp8", + "supported": false, + "notes": "unsupported" + } + ], + "scores": { + "compute_score": 560.4993138688001, + "power_sustain_score": 99.95, + "thermal_sustain_score": 100, + "stability_score": 71.7574844111055, + "interconnect_score": 0, + "composite_score": 520.8824998964992 + }, + "degradation_reasons": [ + "variance_too_high" + ] + }, + { + "index": 5, + "uuid": "GPU-8d5d3b52-6221-c572-e4dc-3eb34699d5a4", + "name": "NVIDIA RTX PRO 6000 Blackwell Server Edition", + "bus_id": "00000000:CC:00.0", + "vbios": "98.02.67.00.0A", + "compute_capability": "12.0", + "backend": "cublasLt", + "status": "OK", + "power_limit_w": 600, + "default_power_limit_w": 600, + "calibrated_peak_power_w": 600.264, + "max_graphics_clock_mhz": 2430, + "max_memory_clock_mhz": 12481, + "locked_graphics_clock_mhz": 2430, + "locked_memory_clock_mhz": 12481, + "baseline": { + "duration_sec": 16.04678938, + "samples": 16, + "avg_temp_c": 57.5, + "p95_temp_c": 63.25, + "avg_power_w": 96.57249999999999, + "p95_power_w": 103.7375, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 3.902165660400851, + "temp_cv_pct": 6.086956521739131, + "clock_drift_pct": 0 + }, + "steady": { + "duration_sec": 531.065115947, + "samples": 531, + "avg_temp_c": 71.94161958568738, + "p95_temp_c": 73, + "avg_power_w": 516.3799811676082, + "p95_power_w": 536.86, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 96.8060263653484, + "avg_mem_usage_pct": 38.133709981167605, + "clock_cv_pct": 0, + "power_cv_pct": 14.786168761467863, + "temp_cv_pct": 4.231629191129241, + "clock_drift_pct": 0 + }, + "cooldown": { + "duration_sec": 120.372856967, + "samples": 114, + "avg_temp_c": 49.280701754385966, + "p95_temp_c": 51, + "avg_power_w": 88.84307017543861, + "p95_power_w": 90.644, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 0.942977011916492, + "temp_cv_pct": 1.5788847890022546, + "clock_drift_pct": 0 + }, + "throttle_counters": { + "sw_power_cap_us": 0, + "sw_thermal_slowdown_us": 0, + "sync_boost_us": 0, + "hw_thermal_slowdown_us": 0, + "hw_power_brake_slowdown_us": 0 + }, + "precision_results": [ + { + "name": "fp16_tensor", + "category": "fp16_bf16", + "supported": true, + "lanes": 16, + "m": 11904, + "n": 11904, + "k": 11904, + "iterations": 27840, + "teraops_per_sec": 195.675631386624 + }, + { + "name": "fp32_tf32", + "category": "fp32_tf32", + "supported": true, + "lanes": 16, + "m": 8320, + "n": 8320, + "k": 8320, + "iterations": 27840, + "teraops_per_sec": 66.807922688 + }, + { + "name": "fp64", + "category": "fp64", + "supported": false, + "notes": "unsupported" + }, + { + "name": "fp8_e4m3", + "category": "fp8", + "supported": true, + "lanes": 16, + "m": 13696, + "n": 13696, + "k": 13696, + "iterations": 27840, + "teraops_per_sec": 298.015759794176 + }, + { + "name": "fp8_e5m2", + "category": "fp8", + "supported": false, + "notes": "unsupported" + } + ], + "scores": { + "compute_score": 560.4993138688001, + "power_sustain_score": 99.956, + "thermal_sustain_score": 100, + "stability_score": 70.42766247706427, + "interconnect_score": 0, + "composite_score": 519.0241336863296 + }, + "degradation_reasons": [ + "variance_too_high" + ] + }, + { + "index": 6, + "uuid": "GPU-a23c67ec-a8e2-7ebf-2593-79d73ec889a6", + "name": "NVIDIA RTX PRO 6000 Blackwell Server Edition", + "bus_id": "00000000:CE:00.0", + "vbios": "98.02.67.00.0A", + "compute_capability": "12.0", + "backend": "cublasLt", + "status": "OK", + "power_limit_w": 600, + "default_power_limit_w": 600, + "calibrated_peak_power_w": 600.25, + "max_graphics_clock_mhz": 2430, + "max_memory_clock_mhz": 12481, + "locked_graphics_clock_mhz": 2430, + "locked_memory_clock_mhz": 12481, + "baseline": { + "duration_sec": 16.04678938, + "samples": 16, + "avg_temp_c": 51.5, + "p95_temp_c": 57, + "avg_power_w": 92.36124999999998, + "p95_power_w": 97.38, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 2.950204809082421, + "temp_cv_pct": 5.905594689609921, + "clock_drift_pct": 0 + }, + "steady": { + "duration_sec": 531.065115947, + "samples": 531, + "avg_temp_c": 70.20338983050847, + "p95_temp_c": 71, + "avg_power_w": 509.8354237288135, + "p95_power_w": 527.455, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 96.84934086629002, + "avg_mem_usage_pct": 38.18079096045198, + "clock_cv_pct": 0, + "power_cv_pct": 14.468544117030715, + "temp_cv_pct": 4.251349242502187, + "clock_drift_pct": 0 + }, + "cooldown": { + "duration_sec": 120.372856967, + "samples": 114, + "avg_temp_c": 47.35087719298246, + "p95_temp_c": 49.349999999999994, + "avg_power_w": 87.85578947368428, + "p95_power_w": 89.6515, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 0.983368735998178, + "temp_cv_pct": 1.977622251957387, + "clock_drift_pct": 0 + }, + "throttle_counters": { + "sw_power_cap_us": 0, + "sw_thermal_slowdown_us": 0, + "sync_boost_us": 0, + "hw_thermal_slowdown_us": 0, + "hw_power_brake_slowdown_us": 0 + }, + "precision_results": [ + { + "name": "fp16_tensor", + "category": "fp16_bf16", + "supported": true, + "lanes": 16, + "m": 11904, + "n": 11904, + "k": 11904, + "iterations": 27840, + "teraops_per_sec": 195.675631386624 + }, + { + "name": "fp32_tf32", + "category": "fp32_tf32", + "supported": true, + "lanes": 16, + "m": 8320, + "n": 8320, + "k": 8320, + "iterations": 27840, + "teraops_per_sec": 66.807922688 + }, + { + "name": "fp64", + "category": "fp64", + "supported": false, + "notes": "unsupported" + }, + { + "name": "fp8_e4m3", + "category": "fp8", + "supported": true, + "lanes": 16, + "m": 13696, + "n": 13696, + "k": 13696, + "iterations": 27840, + "teraops_per_sec": 298.015759794176 + }, + { + "name": "fp8_e5m2", + "category": "fp8", + "supported": false, + "notes": "unsupported" + } + ], + "scores": { + "compute_score": 560.4993138688001, + "power_sustain_score": 99.95833333333333, + "thermal_sustain_score": 100, + "stability_score": 71.06291176593857, + "interconnect_score": 0, + "composite_score": 519.9162374103023 + }, + "degradation_reasons": [ + "variance_too_high" + ] + } + ], + "server_power": { + "available": true, + "idle_w": 1164, + "loaded_w": 5288.114285714286, + "delta_w": 4124.114285714286, + "gpu_reported_sum_w": 3609.3909416195866, + "reporting_ratio": 1.1426067035741314 + } +} \ No newline at end of file diff --git a/gpu-benchmark-20260413-055026/result.json b/gpu-benchmark-20260413-055026/result.json new file mode 100644 index 0000000..2101463 --- /dev/null +++ b/gpu-benchmark-20260413-055026/result.json @@ -0,0 +1,1234 @@ +{ + "benchmark_version": "1", + "generated_at": "2026-04-13T05:50:26.607897085Z", + "hostname": "debian", + "server_model": "G5500 V7", + "benchmark_profile": "standard", + "parallel_gpus": true, + "ramp_step": 8, + "ramp_total": 8, + "ramp_run_id": "ramp-20260413-035309", + "overall_status": "OK", + "selected_gpu_indices": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7 + ], + "findings": [ + "All 8 GPU(s) passed the benchmark.", + "GPU 0 showed unstable clocks/power over the benchmark window.", + "GPU 1 showed unstable clocks/power over the benchmark window.", + "GPU 2 showed unstable clocks/power over the benchmark window.", + "GPU 3 showed unstable clocks/power over the benchmark window.", + "GPU 4 showed unstable clocks/power over the benchmark window.", + "GPU 5 showed unstable clocks/power over the benchmark window.", + "GPU 6 showed unstable clocks/power over the benchmark window.", + "GPU 7 showed unstable clocks/power over the benchmark window.", + "Multi-GPU all_reduce max bus bandwidth: 3.9 GB/s." + ], + "normalization": { + "status": "full", + "gpus": [ + { + "index": 0, + "persistence_mode": "applied", + "gpu_clock_lock_mhz": 2430, + "gpu_clock_lock_status": "applied", + "memory_clock_lock_mhz": 12481, + "memory_clock_lock_status": "applied" + }, + { + "index": 1, + "persistence_mode": "applied", + "gpu_clock_lock_mhz": 2430, + "gpu_clock_lock_status": "applied", + "memory_clock_lock_mhz": 12481, + "memory_clock_lock_status": "applied" + }, + { + "index": 2, + "persistence_mode": "applied", + "gpu_clock_lock_mhz": 2430, + "gpu_clock_lock_status": "applied", + "memory_clock_lock_mhz": 12481, + "memory_clock_lock_status": "applied" + }, + { + "index": 3, + "persistence_mode": "applied", + "gpu_clock_lock_mhz": 2430, + "gpu_clock_lock_status": "applied", + "memory_clock_lock_mhz": 12481, + "memory_clock_lock_status": "applied" + }, + { + "index": 4, + "persistence_mode": "applied", + "gpu_clock_lock_mhz": 2430, + "gpu_clock_lock_status": "applied", + "memory_clock_lock_mhz": 12481, + "memory_clock_lock_status": "applied" + }, + { + "index": 5, + "persistence_mode": "applied", + "gpu_clock_lock_mhz": 2430, + "gpu_clock_lock_status": "applied", + "memory_clock_lock_mhz": 12481, + "memory_clock_lock_status": "applied" + }, + { + "index": 6, + "persistence_mode": "applied", + "gpu_clock_lock_mhz": 2430, + "gpu_clock_lock_status": "applied", + "memory_clock_lock_mhz": 12481, + "memory_clock_lock_status": "applied" + }, + { + "index": 7, + "persistence_mode": "applied", + "gpu_clock_lock_mhz": 2430, + "gpu_clock_lock_status": "applied", + "memory_clock_lock_mhz": 12481, + "memory_clock_lock_status": "applied" + } + ] + }, + "host_config": { + "cpu_model": "INTEL(R) XEON(R) GOLD 6530", + "cpu_sockets": 2, + "cpu_cores": 64, + "cpu_threads": 128, + "mem_total_gib": 125.36544799804688 + }, + "cpu_load": { + "avg_pct": 4.9, + "max_pct": 6.9, + "p95_pct": 6.6, + "samples": 91, + "status": "ok" + }, + "gpus": [ + { + "index": 0, + "uuid": "GPU-8281bc14-c076-e306-75b1-e91596be45e7", + "name": "NVIDIA RTX PRO 6000 Blackwell Server Edition", + "bus_id": "00000000:4B:00.0", + "vbios": "98.02.67.00.0A", + "compute_capability": "12.0", + "backend": "cublasLt", + "status": "OK", + "power_limit_w": 600, + "default_power_limit_w": 600, + "calibrated_peak_power_w": 600.284, + "max_graphics_clock_mhz": 2430, + "max_memory_clock_mhz": 12481, + "locked_graphics_clock_mhz": 2430, + "locked_memory_clock_mhz": 12481, + "baseline": { + "duration_sec": 16.034820522, + "samples": 16, + "avg_temp_c": 58.5625, + "p95_temp_c": 64.25, + "avg_power_w": 97.34562499999998, + "p95_power_w": 104.3425, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 3.8153775648467336, + "temp_cv_pct": 5.852307308796241, + "clock_drift_pct": 0 + }, + "steady": { + "duration_sec": 534.065632494, + "samples": 534, + "avg_temp_c": 72.97940074906367, + "p95_temp_c": 74, + "avg_power_w": 512.3819850187268, + "p95_power_w": 533.2760000000001, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 96.45692883895131, + "avg_mem_usage_pct": 38.39138576779026, + "clock_cv_pct": 0, + "power_cv_pct": 15.592428960695315, + "temp_cv_pct": 4.647413618844314, + "clock_drift_pct": 0 + }, + "cooldown": { + "duration_sec": 120.311993894, + "samples": 114, + "avg_temp_c": 50.19298245614035, + "p95_temp_c": 52, + "avg_power_w": 89.36491228070172, + "p95_power_w": 91.181, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 0.8317003174992245, + "temp_cv_pct": 1.1765139936880011, + "clock_drift_pct": 0 + }, + "throttle_counters": { + "sw_power_cap_us": 0, + "sw_thermal_slowdown_us": 0, + "sync_boost_us": 0, + "hw_thermal_slowdown_us": 0, + "hw_power_brake_slowdown_us": 0 + }, + "precision_results": [ + { + "name": "fp16_tensor", + "category": "fp16_bf16", + "supported": true, + "lanes": 16, + "m": 11904, + "n": 11904, + "k": 11904, + "iterations": 27840, + "teraops_per_sec": 195.675631386624 + }, + { + "name": "fp32_tf32", + "category": "fp32_tf32", + "supported": true, + "lanes": 16, + "m": 8320, + "n": 8320, + "k": 8320, + "iterations": 27840, + "teraops_per_sec": 66.807922688 + }, + { + "name": "fp64", + "category": "fp64", + "supported": false, + "notes": "unsupported" + }, + { + "name": "fp8_e4m3", + "category": "fp8", + "supported": true, + "lanes": 16, + "m": 13696, + "n": 13696, + "k": 13696, + "iterations": 27840, + "teraops_per_sec": 298.015759794176 + }, + { + "name": "fp8_e5m2", + "category": "fp8", + "supported": false, + "notes": "unsupported" + } + ], + "scores": { + "compute_score": 560.4993138688001, + "power_sustain_score": 99.95266666666667, + "thermal_sustain_score": 100, + "stability_score": 68.81514207860937, + "interconnect_score": 3.92, + "composite_score": 572.8117211343068 + }, + "degradation_reasons": [ + "variance_too_high" + ] + }, + { + "index": 1, + "uuid": "GPU-a66d6d68-9870-dfe0-1823-b38a141c21ae", + "name": "NVIDIA RTX PRO 6000 Blackwell Server Edition", + "bus_id": "00000000:4C:00.0", + "vbios": "98.02.67.00.0A", + "compute_capability": "12.0", + "backend": "cublasLt", + "status": "OK", + "power_limit_w": 600, + "default_power_limit_w": 600, + "calibrated_peak_power_w": 600.224, + "max_graphics_clock_mhz": 2430, + "max_memory_clock_mhz": 12481, + "locked_graphics_clock_mhz": 2430, + "locked_memory_clock_mhz": 12481, + "baseline": { + "duration_sec": 16.034820522, + "samples": 16, + "avg_temp_c": 60.625, + "p95_temp_c": 66.25, + "avg_power_w": 96.18749999999999, + "p95_power_w": 102.9575, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 3.6190678467089095, + "temp_cv_pct": 5.650387466410194, + "clock_drift_pct": 0 + }, + "steady": { + "duration_sec": 534.065632494, + "samples": 534, + "avg_temp_c": 74.33707865168539, + "p95_temp_c": 76, + "avg_power_w": 513.9588576779026, + "p95_power_w": 534.706, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 96.09550561797752, + "avg_mem_usage_pct": 37.97752808988764, + "clock_cv_pct": 0, + "power_cv_pct": 16.032037069751265, + "temp_cv_pct": 4.56941886281622, + "clock_drift_pct": 0 + }, + "cooldown": { + "duration_sec": 120.311993894, + "samples": 114, + "avg_temp_c": 51.333333333333336, + "p95_temp_c": 53, + "avg_power_w": 88.73859649122802, + "p95_power_w": 90.269, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 0.8602569271127033, + "temp_cv_pct": 1.4288820888835887, + "clock_drift_pct": 0 + }, + "throttle_counters": { + "sw_power_cap_us": 0, + "sw_thermal_slowdown_us": 0, + "sync_boost_us": 0, + "hw_thermal_slowdown_us": 0, + "hw_power_brake_slowdown_us": 0 + }, + "precision_results": [ + { + "name": "fp16_tensor", + "category": "fp16_bf16", + "supported": true, + "lanes": 16, + "m": 11904, + "n": 11904, + "k": 11904, + "iterations": 27840, + "teraops_per_sec": 195.675631386624 + }, + { + "name": "fp32_tf32", + "category": "fp32_tf32", + "supported": true, + "lanes": 16, + "m": 8320, + "n": 8320, + "k": 8320, + "iterations": 27840, + "teraops_per_sec": 66.807922688 + }, + { + "name": "fp64", + "category": "fp64", + "supported": false, + "notes": "unsupported" + }, + { + "name": "fp8_e4m3", + "category": "fp8", + "supported": true, + "lanes": 16, + "m": 13696, + "n": 13696, + "k": 13696, + "iterations": 27840, + "teraops_per_sec": 298.015759794176 + }, + { + "name": "fp8_e5m2", + "category": "fp8", + "supported": false, + "notes": "unsupported" + } + ], + "scores": { + "compute_score": 560.4993138688001, + "power_sustain_score": 99.96266666666666, + "thermal_sustain_score": 100, + "stability_score": 67.93592586049746, + "interconnect_score": 3.92, + "composite_score": 571.5881284065297 + }, + "degradation_reasons": [ + "variance_too_high" + ] + }, + { + "index": 2, + "uuid": "GPU-82b32f5b-5fca-9674-a845-cfd5da365d09", + "name": "NVIDIA RTX PRO 6000 Blackwell Server Edition", + "bus_id": "00000000:4E:00.0", + "vbios": "98.02.67.00.0A", + "compute_capability": "12.0", + "backend": "cublasLt", + "status": "OK", + "power_limit_w": 600, + "default_power_limit_w": 600, + "calibrated_peak_power_w": 600.1980000000001, + "max_graphics_clock_mhz": 2430, + "max_memory_clock_mhz": 12481, + "locked_graphics_clock_mhz": 2430, + "locked_memory_clock_mhz": 12481, + "baseline": { + "duration_sec": 16.034820522, + "samples": 16, + "avg_temp_c": 58.5625, + "p95_temp_c": 64.25, + "avg_power_w": 99.20875000000002, + "p95_power_w": 105.82, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 3.747848083593493, + "temp_cv_pct": 5.944992306170952, + "clock_drift_pct": 0 + }, + "steady": { + "duration_sec": 534.065632494, + "samples": 534, + "avg_temp_c": 74.38389513108615, + "p95_temp_c": 76, + "avg_power_w": 524.240243445693, + "p95_power_w": 545.7495, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 96.23970037453184, + "avg_mem_usage_pct": 38.12546816479401, + "clock_cv_pct": 0, + "power_cv_pct": 15.99414543851988, + "temp_cv_pct": 4.860573845224712, + "clock_drift_pct": 0 + }, + "cooldown": { + "duration_sec": 120.311993894, + "samples": 114, + "avg_temp_c": 50.333333333333336, + "p95_temp_c": 52, + "avg_power_w": 91.16175438596487, + "p95_power_w": 92.95, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 0.9826901628083612, + "temp_cv_pct": 1.5934652423370474, + "clock_drift_pct": 0 + }, + "throttle_counters": { + "sw_power_cap_us": 0, + "sw_thermal_slowdown_us": 0, + "sync_boost_us": 0, + "hw_thermal_slowdown_us": 0, + "hw_power_brake_slowdown_us": 0 + }, + "precision_results": [ + { + "name": "fp16_tensor", + "category": "fp16_bf16", + "supported": true, + "lanes": 16, + "m": 11904, + "n": 11904, + "k": 11904, + "iterations": 27840, + "teraops_per_sec": 195.675631386624 + }, + { + "name": "fp32_tf32", + "category": "fp32_tf32", + "supported": true, + "lanes": 16, + "m": 8320, + "n": 8320, + "k": 8320, + "iterations": 27840, + "teraops_per_sec": 66.807922688 + }, + { + "name": "fp64", + "category": "fp64", + "supported": false, + "notes": "unsupported" + }, + { + "name": "fp8_e4m3", + "category": "fp8", + "supported": true, + "lanes": 16, + "m": 13696, + "n": 13696, + "k": 13696, + "iterations": 27840, + "teraops_per_sec": 298.015759794176 + }, + { + "name": "fp8_e5m2", + "category": "fp8", + "supported": false, + "notes": "unsupported" + } + ], + "scores": { + "compute_score": 560.4993138688001, + "power_sustain_score": 99.96699999999998, + "thermal_sustain_score": 100, + "stability_score": 68.01170912296024, + "interconnect_score": 3.92, + "composite_score": 571.6979628186027 + }, + "degradation_reasons": [ + "variance_too_high" + ] + }, + { + "index": 3, + "uuid": "GPU-c88562a9-54cd-c3cf-c3fc-ef6c68ce5228", + "name": "NVIDIA RTX PRO 6000 Blackwell Server Edition", + "bus_id": "00000000:4F:00.0", + "vbios": "98.02.67.00.0A", + "compute_capability": "12.0", + "backend": "cublasLt", + "status": "OK", + "power_limit_w": 600, + "default_power_limit_w": 600, + "calibrated_peak_power_w": 600.3299999999999, + "max_graphics_clock_mhz": 2430, + "max_memory_clock_mhz": 12481, + "locked_graphics_clock_mhz": 2430, + "locked_memory_clock_mhz": 12481, + "baseline": { + "duration_sec": 16.034820522, + "samples": 16, + "avg_temp_c": 60.625, + "p95_temp_c": 66.25, + "avg_power_w": 96.16687500000002, + "p95_power_w": 103.22749999999999, + "avg_graphics_clock_mhz": 2426, + "p95_graphics_clock_mhz": 2430, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0.16488046166529266, + "power_cv_pct": 3.782241291247955, + "temp_cv_pct": 5.650387466410194, + "clock_drift_pct": 0 + }, + "steady": { + "duration_sec": 534.065632494, + "samples": 534, + "avg_temp_c": 75.62921348314607, + "p95_temp_c": 77, + "avg_power_w": 520.5265355805242, + "p95_power_w": 541.687, + "avg_graphics_clock_mhz": 2422.0898876404494, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 96.28651685393258, + "avg_mem_usage_pct": 37.99250936329588, + "clock_cv_pct": 0.03481377033352888, + "power_cv_pct": 15.961220894100176, + "temp_cv_pct": 4.648694191504124, + "clock_drift_pct": 0 + }, + "cooldown": { + "duration_sec": 120.311993894, + "samples": 114, + "avg_temp_c": 52.271929824561404, + "p95_temp_c": 54, + "avg_power_w": 88.49894736842091, + "p95_power_w": 89.7775, + "avg_graphics_clock_mhz": 2422.1403508771928, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0.04336205341685846, + "power_cv_pct": 0.8186267515664026, + "temp_cv_pct": 1.3476366177488701, + "clock_drift_pct": 0.011795234725168349 + }, + "throttle_counters": { + "sw_power_cap_us": 0, + "sw_thermal_slowdown_us": 0, + "sync_boost_us": 0, + "hw_thermal_slowdown_us": 0, + "hw_power_brake_slowdown_us": 0 + }, + "precision_results": [ + { + "name": "fp16_tensor", + "category": "fp16_bf16", + "supported": true, + "lanes": 16, + "m": 11904, + "n": 11904, + "k": 11904, + "iterations": 27840, + "teraops_per_sec": 195.675631386624 + }, + { + "name": "fp32_tf32", + "category": "fp32_tf32", + "supported": true, + "lanes": 16, + "m": 8320, + "n": 8320, + "k": 8320, + "iterations": 27840, + "teraops_per_sec": 66.807922688 + }, + { + "name": "fp64", + "category": "fp64", + "supported": false, + "notes": "unsupported" + }, + { + "name": "fp8_e4m3", + "category": "fp8", + "supported": true, + "lanes": 16, + "m": 13696, + "n": 13696, + "k": 13696, + "iterations": 27840, + "teraops_per_sec": 298.015759794176 + }, + { + "name": "fp8_e5m2", + "category": "fp8", + "supported": false, + "notes": "unsupported" + } + ], + "scores": { + "compute_score": 560.4993138688001, + "power_sustain_score": 99.94500000000001, + "thermal_sustain_score": 100, + "stability_score": 67.93830313046553, + "interconnect_score": 3.92, + "composite_score": 571.5766063201771 + }, + "degradation_reasons": [ + "variance_too_high" + ] + }, + { + "index": 4, + "uuid": "GPU-19d64ace-f127-d9a8-1999-ffe37453b930", + "name": "NVIDIA RTX PRO 6000 Blackwell Server Edition", + "bus_id": "00000000:CB:00.0", + "vbios": "98.02.67.00.0A", + "compute_capability": "12.0", + "backend": "cublasLt", + "status": "OK", + "power_limit_w": 600, + "default_power_limit_w": 600, + "calibrated_peak_power_w": 600.34, + "max_graphics_clock_mhz": 2430, + "max_memory_clock_mhz": 12481, + "locked_graphics_clock_mhz": 2430, + "locked_memory_clock_mhz": 12481, + "baseline": { + "duration_sec": 16.034820522, + "samples": 16, + "avg_temp_c": 55, + "p95_temp_c": 61, + "avg_power_w": 92.33500000000002, + "p95_power_w": 98.7525, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 3.492341021406794, + "temp_cv_pct": 6.064392756421061, + "clock_drift_pct": 0 + }, + "steady": { + "duration_sec": 534.065632494, + "samples": 534, + "avg_temp_c": 69.26779026217228, + "p95_temp_c": 71, + "avg_power_w": 506.3082958801495, + "p95_power_w": 526.117, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 96.45318352059925, + "avg_mem_usage_pct": 37.60299625468165, + "clock_cv_pct": 0, + "power_cv_pct": 15.54102422434703, + "temp_cv_pct": 4.641054486371343, + "clock_drift_pct": 0 + }, + "cooldown": { + "duration_sec": 120.311993894, + "samples": 114, + "avg_temp_c": 46.921052631578945, + "p95_temp_c": 49, + "avg_power_w": 86.75456140350886, + "p95_power_w": 88.41, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 0.7715872996388503, + "temp_cv_pct": 1.8107227998901132, + "clock_drift_pct": 0 + }, + "throttle_counters": { + "sw_power_cap_us": 0, + "sw_thermal_slowdown_us": 0, + "sync_boost_us": 0, + "hw_thermal_slowdown_us": 0, + "hw_power_brake_slowdown_us": 0 + }, + "precision_results": [ + { + "name": "fp16_tensor", + "category": "fp16_bf16", + "supported": true, + "lanes": 16, + "m": 11904, + "n": 11904, + "k": 11904, + "iterations": 27840, + "teraops_per_sec": 195.675631386624 + }, + { + "name": "fp32_tf32", + "category": "fp32_tf32", + "supported": true, + "lanes": 16, + "m": 8320, + "n": 8320, + "k": 8320, + "iterations": 27840, + "teraops_per_sec": 66.807922688 + }, + { + "name": "fp64", + "category": "fp64", + "supported": false, + "notes": "unsupported" + }, + { + "name": "fp8_e4m3", + "category": "fp8", + "supported": true, + "lanes": 16, + "m": 13696, + "n": 13696, + "k": 13696, + "iterations": 27840, + "teraops_per_sec": 298.015759794176 + }, + { + "name": "fp8_e5m2", + "category": "fp8", + "supported": false, + "notes": "unsupported" + } + ], + "scores": { + "compute_score": 560.4993138688001, + "power_sustain_score": 99.94333333333333, + "thermal_sustain_score": 100, + "stability_score": 68.91795155130595, + "interconnect_score": 3.92, + "composite_score": 572.9479357411767 + }, + "degradation_reasons": [ + "variance_too_high" + ] + }, + { + "index": 5, + "uuid": "GPU-8d5d3b52-6221-c572-e4dc-3eb34699d5a4", + "name": "NVIDIA RTX PRO 6000 Blackwell Server Edition", + "bus_id": "00000000:CC:00.0", + "vbios": "98.02.67.00.0A", + "compute_capability": "12.0", + "backend": "cublasLt", + "status": "OK", + "power_limit_w": 600, + "default_power_limit_w": 600, + "calibrated_peak_power_w": 600.188, + "max_graphics_clock_mhz": 2430, + "max_memory_clock_mhz": 12481, + "locked_graphics_clock_mhz": 2430, + "locked_memory_clock_mhz": 12481, + "baseline": { + "duration_sec": 16.034820522, + "samples": 16, + "avg_temp_c": 57.75, + "p95_temp_c": 63.25, + "avg_power_w": 97.051875, + "p95_power_w": 104.14, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 3.9002521641511976, + "temp_cv_pct": 5.79181305638946, + "clock_drift_pct": 0 + }, + "steady": { + "duration_sec": 534.065632494, + "samples": 534, + "avg_temp_c": 72.08988764044943, + "p95_temp_c": 74, + "avg_power_w": 514.1286891385772, + "p95_power_w": 537.069, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 96.2940074906367, + "avg_mem_usage_pct": 37.838951310861425, + "clock_cv_pct": 0, + "power_cv_pct": 15.864665152673057, + "temp_cv_pct": 4.671288336933857, + "clock_drift_pct": 0 + }, + "cooldown": { + "duration_sec": 120.311993894, + "samples": 114, + "avg_temp_c": 49.58771929824562, + "p95_temp_c": 51, + "avg_power_w": 89.05771929824559, + "p95_power_w": 91.04, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 0.8327686073487044, + "temp_cv_pct": 1.4826689027277737, + "clock_drift_pct": 0 + }, + "throttle_counters": { + "sw_power_cap_us": 0, + "sw_thermal_slowdown_us": 0, + "sync_boost_us": 0, + "hw_thermal_slowdown_us": 0, + "hw_power_brake_slowdown_us": 0 + }, + "precision_results": [ + { + "name": "fp16_tensor", + "category": "fp16_bf16", + "supported": true, + "lanes": 16, + "m": 11904, + "n": 11904, + "k": 11904, + "iterations": 27840, + "teraops_per_sec": 195.675631386624 + }, + { + "name": "fp32_tf32", + "category": "fp32_tf32", + "supported": true, + "lanes": 16, + "m": 8320, + "n": 8320, + "k": 8320, + "iterations": 27840, + "teraops_per_sec": 66.807922688 + }, + { + "name": "fp64", + "category": "fp64", + "supported": false, + "notes": "unsupported" + }, + { + "name": "fp8_e4m3", + "category": "fp8", + "supported": true, + "lanes": 16, + "m": 13696, + "n": 13696, + "k": 13696, + "iterations": 27840, + "teraops_per_sec": 298.015759794176 + }, + { + "name": "fp8_e5m2", + "category": "fp8", + "supported": false, + "notes": "unsupported" + } + ], + "scores": { + "compute_score": 560.4993138688001, + "power_sustain_score": 99.96866666666666, + "thermal_sustain_score": 100, + "stability_score": 68.27066969465389, + "interconnect_score": 3.92, + "composite_score": 572.0622321237707 + }, + "degradation_reasons": [ + "variance_too_high" + ] + }, + { + "index": 6, + "uuid": "GPU-a23c67ec-a8e2-7ebf-2593-79d73ec889a6", + "name": "NVIDIA RTX PRO 6000 Blackwell Server Edition", + "bus_id": "00000000:CE:00.0", + "vbios": "98.02.67.00.0A", + "compute_capability": "12.0", + "backend": "cublasLt", + "status": "OK", + "power_limit_w": 600, + "default_power_limit_w": 600, + "calibrated_peak_power_w": 600.2180000000001, + "max_graphics_clock_mhz": 2430, + "max_memory_clock_mhz": 12481, + "locked_graphics_clock_mhz": 2430, + "locked_memory_clock_mhz": 12481, + "baseline": { + "duration_sec": 16.034820522, + "samples": 16, + "avg_temp_c": 55.6875, + "p95_temp_c": 61.25, + "avg_power_w": 95.11187500000003, + "p95_power_w": 102.2825, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 3.814204476943954, + "temp_cv_pct": 6.013660302859176, + "clock_drift_pct": 0 + }, + "steady": { + "duration_sec": 534.065632494, + "samples": 534, + "avg_temp_c": 70.8314606741573, + "p95_temp_c": 72, + "avg_power_w": 508.88904494382024, + "p95_power_w": 529.0815, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 96.2621722846442, + "avg_mem_usage_pct": 37.840823970037455, + "clock_cv_pct": 0, + "power_cv_pct": 15.79881616328304, + "temp_cv_pct": 4.810144110611115, + "clock_drift_pct": 0 + }, + "cooldown": { + "duration_sec": 120.311993894, + "samples": 114, + "avg_temp_c": 48.03508771929825, + "p95_temp_c": 50, + "avg_power_w": 88.44701754385963, + "p95_power_w": 89.97, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 0.7855280810040005, + "temp_cv_pct": 1.6756858651503985, + "clock_drift_pct": 0 + }, + "throttle_counters": { + "sw_power_cap_us": 0, + "sw_thermal_slowdown_us": 0, + "sync_boost_us": 0, + "hw_thermal_slowdown_us": 0, + "hw_power_brake_slowdown_us": 0 + }, + "precision_results": [ + { + "name": "fp16_tensor", + "category": "fp16_bf16", + "supported": true, + "lanes": 16, + "m": 11904, + "n": 11904, + "k": 11904, + "iterations": 27840, + "teraops_per_sec": 195.675631386624 + }, + { + "name": "fp32_tf32", + "category": "fp32_tf32", + "supported": true, + "lanes": 16, + "m": 8320, + "n": 8320, + "k": 8320, + "iterations": 27840, + "teraops_per_sec": 66.807922688 + }, + { + "name": "fp64", + "category": "fp64", + "supported": false, + "notes": "unsupported" + }, + { + "name": "fp8_e4m3", + "category": "fp8", + "supported": true, + "lanes": 16, + "m": 13696, + "n": 13696, + "k": 13696, + "iterations": 27840, + "teraops_per_sec": 298.015759794176 + }, + { + "name": "fp8_e5m2", + "category": "fp8", + "supported": false, + "notes": "unsupported" + } + ], + "scores": { + "compute_score": 560.4993138688001, + "power_sustain_score": 99.96366666666665, + "thermal_sustain_score": 100, + "stability_score": 68.40236767343391, + "interconnect_score": 3.92, + "composite_score": 572.242569945777 + }, + "degradation_reasons": [ + "variance_too_high" + ] + }, + { + "index": 7, + "uuid": "GPU-f9d537b7-86a1-407c-c137-3bc30fb8c2e8", + "name": "NVIDIA RTX PRO 6000 Blackwell Server Edition", + "bus_id": "00000000:CF:00.0", + "vbios": "98.02.67.00.0A", + "compute_capability": "12.0", + "backend": "cublasLt", + "status": "OK", + "power_limit_w": 600, + "default_power_limit_w": 600, + "calibrated_peak_power_w": 600.354, + "max_graphics_clock_mhz": 2430, + "max_memory_clock_mhz": 12481, + "locked_graphics_clock_mhz": 2430, + "locked_memory_clock_mhz": 12481, + "baseline": { + "duration_sec": 16.034820522, + "samples": 16, + "avg_temp_c": 54.1875, + "p95_temp_c": 59.25, + "avg_power_w": 90.46875000000001, + "p95_power_w": 95.4, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 3.169928895921586, + "temp_cv_pct": 5.544736028476085, + "clock_drift_pct": 0 + }, + "steady": { + "duration_sec": 534.065632494, + "samples": 534, + "avg_temp_c": 72.82771535580524, + "p95_temp_c": 74, + "avg_power_w": 508.06207865168585, + "p95_power_w": 527.48, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 96.44194756554307, + "avg_mem_usage_pct": 37.62734082397004, + "clock_cv_pct": 0, + "power_cv_pct": 15.469757701038384, + "temp_cv_pct": 4.561873839006074, + "clock_drift_pct": 0 + }, + "cooldown": { + "duration_sec": 120.311993894, + "samples": 114, + "avg_temp_c": 50.28947368421053, + "p95_temp_c": 52, + "avg_power_w": 86.8166666666667, + "p95_power_w": 88.6715, + "avg_graphics_clock_mhz": 2422, + "p95_graphics_clock_mhz": 2422, + "avg_memory_clock_mhz": 12481, + "p95_memory_clock_mhz": 12481, + "avg_usage_pct": 0, + "avg_mem_usage_pct": 0, + "clock_cv_pct": 0, + "power_cv_pct": 0.956688279977931, + "temp_cv_pct": 1.4834649538474145, + "clock_drift_pct": 0 + }, + "throttle_counters": { + "sw_power_cap_us": 0, + "sw_thermal_slowdown_us": 0, + "sync_boost_us": 0, + "hw_thermal_slowdown_us": 0, + "hw_power_brake_slowdown_us": 0 + }, + "precision_results": [ + { + "name": "fp16_tensor", + "category": "fp16_bf16", + "supported": true, + "lanes": 16, + "m": 11904, + "n": 11904, + "k": 11904, + "iterations": 27840, + "teraops_per_sec": 195.675631386624 + }, + { + "name": "fp32_tf32", + "category": "fp32_tf32", + "supported": true, + "lanes": 16, + "m": 8320, + "n": 8320, + "k": 8320, + "iterations": 27840, + "teraops_per_sec": 66.807922688 + }, + { + "name": "fp64", + "category": "fp64", + "supported": false, + "notes": "unsupported" + }, + { + "name": "fp8_e4m3", + "category": "fp8", + "supported": true, + "lanes": 16, + "m": 13696, + "n": 13696, + "k": 13696, + "iterations": 27840, + "teraops_per_sec": 298.015759794176 + }, + { + "name": "fp8_e5m2", + "category": "fp8", + "supported": false, + "notes": "unsupported" + } + ], + "scores": { + "compute_score": 560.4993138688001, + "power_sustain_score": 99.94099999999999, + "thermal_sustain_score": 100, + "stability_score": 69.06048459792324, + "interconnect_score": 3.92, + "composite_score": 573.1456981806597 + }, + "degradation_reasons": [ + "variance_too_high" + ] + } + ], + "interconnect": { + "status": "OK", + "attempted": true, + "supported": true, + "selected_gpu_indices": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7 + ], + "avg_algbw_gbps": 2.2100000000000004, + "max_algbw_gbps": 2.24, + "avg_busbw_gbps": 3.8699999999999997, + "max_busbw_gbps": 3.92 + }, + "server_power": { + "available": true, + "idle_w": 1305.3333333333333, + "loaded_w": 5845.942857142857, + "delta_w": 4540.609523809524, + "gpu_reported_sum_w": 4108.495730337079, + "reporting_ratio": 1.1051756705700635 + } +} \ No newline at end of file