Files
pub-beebench/perf/perf-20260416-080820/result.json
Mikhail Chusavitin b4990a85b3 one more pass add
2026-04-16 13:04:34 +03:00

1206 lines
37 KiB
JSON

{
"benchmark_version": "2",
"generated_at": "2026-04-16T08:08:20.63072288Z",
"hostname": "debian",
"server_model": "MLT-S06",
"benchmark_profile": "standard",
"parallel_gpus": true,
"ramp_total": 4,
"ramp_run_id": "ramp-20260416-072956",
"overall_status": "FAILED",
"selected_gpu_indices": [
0,
1,
2,
3
],
"findings": [
"0 of 4 GPU(s) passed the benchmark.",
"[POWER] GPU 0: power cap throttle 100.0% of steady state — server is not delivering full TDP to the GPU.",
"[THERMAL] GPU 0: thermal throttle 100.0% of steady state.",
"GPU 0 average SM clock stayed below the requested lock target.",
"GPU 0 had incomplete precision coverage: fp64:FAILED, fp4:FAILED.",
"[POWER] GPU 1: power cap throttle 100.0% of steady state — server is not delivering full TDP to the GPU.",
"[THERMAL] GPU 1: thermal throttle 100.0% of steady state.",
"GPU 1 average SM clock stayed below the requested lock target.",
"GPU 1 had incomplete precision coverage: fp64:FAILED, fp4:FAILED.",
"[POWER] GPU 2: power cap throttle 100.0% of steady state — server is not delivering full TDP to the GPU.",
"[THERMAL] GPU 2: thermal throttle 100.0% of steady state.",
"GPU 2 average SM clock stayed below the requested lock target.",
"GPU 2 had incomplete precision coverage: fp64:FAILED, fp4:FAILED.",
"[POWER] GPU 3: power cap throttle 100.0% of steady state — server is not delivering full TDP to the GPU.",
"[THERMAL] GPU 3: thermal throttle 100.0% of steady state.",
"GPU 3 average SM clock stayed below the requested lock target.",
"GPU 3 had incomplete precision coverage: fp64:FAILED, fp4:FAILED.",
"Multi-GPU all_reduce max bus bandwidth: 8.5 GB/s."
],
"normalization": {
"status": "full",
"gpus": [
{
"index": 0,
"persistence_mode": "applied",
"gpu_clock_lock_mhz": 1980,
"gpu_clock_lock_status": "applied",
"memory_clock_lock_mhz": 2619,
"memory_clock_lock_status": "applied"
},
{
"index": 1,
"persistence_mode": "applied",
"gpu_clock_lock_mhz": 1980,
"gpu_clock_lock_status": "applied",
"memory_clock_lock_mhz": 2619,
"memory_clock_lock_status": "applied"
},
{
"index": 2,
"persistence_mode": "applied",
"gpu_clock_lock_mhz": 1980,
"gpu_clock_lock_status": "applied",
"memory_clock_lock_mhz": 2619,
"memory_clock_lock_status": "applied"
},
{
"index": 3,
"persistence_mode": "applied",
"gpu_clock_lock_mhz": 1980,
"gpu_clock_lock_status": "applied",
"memory_clock_lock_mhz": 2619,
"memory_clock_lock_status": "applied"
}
]
},
"host_config": {
"cpu_model": "Intel(R) Xeon(R) Gold 6430",
"cpu_sockets": 2,
"cpu_cores": 64,
"cpu_threads": 128,
"mem_total_gib": 62.53376007080078
},
"cpu_load": {
"avg_pct": 2.3,
"max_pct": 3.5,
"p95_pct": 3.2,
"samples": 45,
"status": "ok"
},
"cooling": {
"available": true,
"avg_fan_rpm": 6523.529411764706,
"notes": [
"fan duty cycle unavailable on this host; RPM-only fan telemetry was collected"
]
},
"gpus": [
{
"index": 0,
"uuid": "GPU-a87b1588-4e92-bd7b-74be-9db000808a64",
"name": "NVIDIA H100 80GB HBM3",
"bus_id": "00000000:49:00.0",
"vbios": "96.00.74.00.01",
"status": "PARTIAL",
"power_limit_w": 700,
"default_power_limit_w": 700,
"max_graphics_clock_mhz": 1980,
"max_memory_clock_mhz": 2619,
"locked_graphics_clock_mhz": 1980,
"locked_memory_clock_mhz": 2619,
"baseline": {
"duration_sec": 18.67612165,
"samples": 1,
"avg_temp_c": 41,
"p95_temp_c": 41,
"avg_power_w": 138.02,
"p95_power_w": 138.02,
"avg_graphics_clock_mhz": 1830,
"p95_graphics_clock_mhz": 1830,
"avg_memory_clock_mhz": 2619,
"p95_memory_clock_mhz": 2619,
"avg_usage_pct": 0,
"avg_mem_usage_pct": 0,
"clock_cv_pct": 0,
"power_cv_pct": 0,
"temp_cv_pct": 0,
"clock_drift_pct": 0
},
"steady": {
"duration_sec": 0,
"samples": 0,
"avg_temp_c": 0,
"p95_temp_c": 0,
"avg_power_w": 0,
"p95_power_w": 0,
"avg_graphics_clock_mhz": 0,
"p95_graphics_clock_mhz": 0,
"avg_memory_clock_mhz": 0,
"p95_memory_clock_mhz": 0,
"avg_usage_pct": 0,
"avg_mem_usage_pct": 0,
"clock_cv_pct": 0,
"power_cv_pct": 0,
"temp_cv_pct": 0,
"clock_drift_pct": 0
},
"precision_steady": [
{
"precision": "int8",
"status": "OK",
"steady": {
"duration_sec": 53.666271418,
"samples": 2,
"avg_temp_c": 51.5,
"p95_temp_c": 59.15,
"avg_power_w": 448.345,
"p95_power_w": 674.3395,
"avg_graphics_clock_mhz": 1522.5,
"p95_graphics_clock_mhz": 1799.25,
"avg_memory_clock_mhz": 2619,
"p95_memory_clock_mhz": 2619,
"avg_usage_pct": 50.5,
"avg_mem_usage_pct": 43.5,
"clock_cv_pct": 20.19704433497537,
"power_cv_pct": 56.00709275223321,
"temp_cv_pct": 16.50485436893204,
"clock_drift_pct": 0
},
"teraops_per_sec": 1151.669844836352,
"weighted_teraops_per_sec": 287.917461209088,
"ecc": {
"corrected": 0,
"uncorrected": 0
}
},
{
"precision": "fp8",
"status": "OK",
"steady": {
"duration_sec": 94.749526187,
"samples": 2,
"avg_temp_c": 68,
"p95_temp_c": 69.8,
"avg_power_w": 699.245,
"p95_power_w": 699.3575,
"avg_graphics_clock_mhz": 1162.5,
"p95_graphics_clock_mhz": 1196.25,
"avg_memory_clock_mhz": 2619,
"p95_memory_clock_mhz": 2619,
"avg_usage_pct": 100,
"avg_mem_usage_pct": 75.5,
"clock_cv_pct": 3.225806451612903,
"power_cv_pct": 0.017876423857160224,
"temp_cv_pct": 2.941176470588235,
"clock_drift_pct": 0
},
"teraops_per_sec": 1418.4876727074816,
"weighted_teraops_per_sec": 354.6219181768704,
"ecc": {
"corrected": 0,
"uncorrected": 0
}
},
{
"precision": "fp16",
"status": "OK",
"steady": {
"duration_sec": 160.278286598,
"samples": 3,
"avg_temp_c": 75.66666666666667,
"p95_temp_c": 77.8,
"avg_power_w": 699.0766666666667,
"p95_power_w": 699.643,
"avg_graphics_clock_mhz": 1140,
"p95_graphics_clock_mhz": 1140,
"avg_memory_clock_mhz": 2619,
"p95_memory_clock_mhz": 2619,
"avg_usage_pct": 100,
"avg_mem_usage_pct": 66.33333333333333,
"clock_cv_pct": 0,
"power_cv_pct": 0.07192548770429025,
"temp_cv_pct": 2.715600882365188,
"clock_drift_pct": 0
},
"teraops_per_sec": 605.7576061293909,
"weighted_teraops_per_sec": 302.87880306469543,
"ecc": {
"corrected": 0,
"uncorrected": 0
}
},
{
"precision": "fp32",
"status": "OK",
"steady": {
"duration_sec": 234.413351493,
"samples": 3,
"avg_temp_c": 79,
"p95_temp_c": 79.9,
"avg_power_w": 698.8633333333333,
"p95_power_w": 698.938,
"avg_graphics_clock_mhz": 1205,
"p95_graphics_clock_mhz": 1239,
"avg_memory_clock_mhz": 2619,
"p95_memory_clock_mhz": 2619,
"avg_usage_pct": 100,
"avg_mem_usage_pct": 100,
"clock_cv_pct": 2.3472424271752614,
"power_cv_pct": 0.008846382454843477,
"temp_cv_pct": 1.0335399758578812,
"clock_drift_pct": 0
},
"teraops_per_sec": 329.0028834816,
"weighted_teraops_per_sec": 329.0028834816,
"ecc": {
"corrected": 0,
"uncorrected": 0
}
},
{
"precision": "fp64",
"status": "FAILED",
"steady": {
"duration_sec": 281.842154315,
"samples": 2,
"avg_temp_c": 81.5,
"p95_temp_c": 81.95,
"avg_power_w": 698.085,
"p95_power_w": 698.2245,
"avg_graphics_clock_mhz": 1170,
"p95_graphics_clock_mhz": 1197,
"avg_memory_clock_mhz": 2619,
"p95_memory_clock_mhz": 2619,
"avg_usage_pct": 100,
"avg_mem_usage_pct": 89.5,
"clock_cv_pct": 2.564102564102564,
"power_cv_pct": 0.022203599848160258,
"temp_cv_pct": 0.6134969325153374,
"clock_drift_pct": 0
},
"ecc": {
"corrected": 0,
"uncorrected": 0
},
"notes": "precision phase failed"
},
{
"precision": "fp4",
"status": "FAILED",
"steady": {
"duration_sec": 323.470255195,
"samples": 2,
"avg_temp_c": 80.5,
"p95_temp_c": 82.75,
"avg_power_w": 481.03499999999997,
"p95_power_w": 677.4465,
"avg_graphics_clock_mhz": 1507.5,
"p95_graphics_clock_mhz": 1797.75,
"avg_memory_clock_mhz": 2619,
"p95_memory_clock_mhz": 2619,
"avg_usage_pct": 50,
"avg_mem_usage_pct": 37,
"clock_cv_pct": 21.393034825870647,
"power_cv_pct": 45.36780067978422,
"temp_cv_pct": 3.1055900621118013,
"clock_drift_pct": 0
},
"ecc": {
"corrected": 0,
"uncorrected": 0
},
"notes": "precision phase failed"
}
],
"precision_failures": [
"fp64:FAILED",
"fp4:FAILED"
],
"cooldown": {
"duration_sec": 0,
"samples": 0,
"avg_temp_c": 0,
"p95_temp_c": 0,
"avg_power_w": 0,
"p95_power_w": 0,
"avg_graphics_clock_mhz": 0,
"p95_graphics_clock_mhz": 0,
"avg_memory_clock_mhz": 0,
"p95_memory_clock_mhz": 0,
"avg_usage_pct": 0,
"avg_mem_usage_pct": 0,
"clock_cv_pct": 0,
"power_cv_pct": 0,
"temp_cv_pct": 0,
"clock_drift_pct": 0
},
"throttle_counters": {
"sw_power_cap_us": 200018,
"sw_thermal_slowdown_us": 200019,
"sync_boost_us": 0,
"hw_thermal_slowdown_us": 0,
"hw_power_brake_slowdown_us": 0
},
"ecc": {
"corrected": 0,
"uncorrected": 0
},
"scores": {
"compute_score": 1274.421065932254,
"synthetic_score": 1274.421065932254,
"power_sustain_score": 66.60624071973443,
"thermal_sustain_score": 100,
"stability_score": 0,
"thermal_throttle_pct": 100,
"power_cap_throttle_pct": 100,
"temp_headroom_c": 0,
"interconnect_score": 8.54,
"server_quality_score": 49.98187221592033,
"composite_score": 1274.421065932254
},
"degradation_reasons": [
"power_capped",
"thermal_limited",
"low_sm_clock_vs_target"
],
"notes": [
"parallel warmup failed: exit status 1"
]
},
{
"index": 1,
"uuid": "GPU-30b320aa-c18f-6b25-d0ed-24aeb14f1fd3",
"name": "NVIDIA H100 80GB HBM3",
"bus_id": "00000000:5A:00.0",
"vbios": "96.00.74.00.01",
"status": "PARTIAL",
"power_limit_w": 700,
"default_power_limit_w": 700,
"max_graphics_clock_mhz": 1980,
"max_memory_clock_mhz": 2619,
"locked_graphics_clock_mhz": 1980,
"locked_memory_clock_mhz": 2619,
"baseline": {
"duration_sec": 18.67612165,
"samples": 1,
"avg_temp_c": 44,
"p95_temp_c": 44,
"avg_power_w": 139.47,
"p95_power_w": 139.47,
"avg_graphics_clock_mhz": 1830,
"p95_graphics_clock_mhz": 1830,
"avg_memory_clock_mhz": 2619,
"p95_memory_clock_mhz": 2619,
"avg_usage_pct": 0,
"avg_mem_usage_pct": 0,
"clock_cv_pct": 0,
"power_cv_pct": 0,
"temp_cv_pct": 0,
"clock_drift_pct": 0
},
"steady": {
"duration_sec": 0,
"samples": 0,
"avg_temp_c": 0,
"p95_temp_c": 0,
"avg_power_w": 0,
"p95_power_w": 0,
"avg_graphics_clock_mhz": 0,
"p95_graphics_clock_mhz": 0,
"avg_memory_clock_mhz": 0,
"p95_memory_clock_mhz": 0,
"avg_usage_pct": 0,
"avg_mem_usage_pct": 0,
"clock_cv_pct": 0,
"power_cv_pct": 0,
"temp_cv_pct": 0,
"clock_drift_pct": 0
},
"precision_steady": [
{
"precision": "int8",
"status": "OK",
"steady": {
"duration_sec": 53.666271418,
"samples": 2,
"avg_temp_c": 54,
"p95_temp_c": 61.2,
"avg_power_w": 449.475,
"p95_power_w": 673.8045,
"avg_graphics_clock_mhz": 1507.5,
"p95_graphics_clock_mhz": 1797.75,
"avg_memory_clock_mhz": 2619,
"p95_memory_clock_mhz": 2619,
"avg_usage_pct": 50,
"avg_mem_usage_pct": 42.5,
"clock_cv_pct": 21.393034825870647,
"power_cv_pct": 55.454697146671116,
"temp_cv_pct": 14.814814814814813,
"clock_drift_pct": 0
},
"teraops_per_sec": 1149.1842264662016,
"weighted_teraops_per_sec": 287.2960566165504,
"ecc": {
"corrected": 0,
"uncorrected": 0
}
},
{
"precision": "fp8",
"status": "OK",
"steady": {
"duration_sec": 94.749526187,
"samples": 2,
"avg_temp_c": 70,
"p95_temp_c": 71.8,
"avg_power_w": 699.095,
"p95_power_w": 699.3605,
"avg_graphics_clock_mhz": 1162.5,
"p95_graphics_clock_mhz": 1169.25,
"avg_memory_clock_mhz": 2619,
"p95_memory_clock_mhz": 2619,
"avg_usage_pct": 100,
"avg_mem_usage_pct": 75.5,
"clock_cv_pct": 0.6451612903225806,
"power_cv_pct": 0.04219741236885057,
"temp_cv_pct": 2.857142857142857,
"clock_drift_pct": 0
},
"teraops_per_sec": 1417.5994524991488,
"weighted_teraops_per_sec": 354.3998631247872,
"ecc": {
"corrected": 0,
"uncorrected": 0
}
},
{
"precision": "fp16",
"status": "OK",
"steady": {
"duration_sec": 160.278286598,
"samples": 3,
"avg_temp_c": 77,
"p95_temp_c": 78.8,
"avg_power_w": 698.4633333333333,
"p95_power_w": 698.548,
"avg_graphics_clock_mhz": 1105,
"p95_graphics_clock_mhz": 1110,
"avg_memory_clock_mhz": 2619,
"p95_memory_clock_mhz": 2619,
"avg_usage_pct": 100,
"avg_mem_usage_pct": 62,
"clock_cv_pct": 0.6399156390828484,
"power_cv_pct": 0.010213341935880632,
"temp_cv_pct": 2.1207703400720157,
"clock_drift_pct": 0
},
"teraops_per_sec": 605.7576061293909,
"weighted_teraops_per_sec": 302.87880306469543,
"ecc": {
"corrected": 0,
"uncorrected": 0
}
},
{
"precision": "fp32",
"status": "OK",
"steady": {
"duration_sec": 234.413351493,
"samples": 3,
"avg_temp_c": 80,
"p95_temp_c": 80.9,
"avg_power_w": 699.0533333333333,
"p95_power_w": 699.09,
"avg_graphics_clock_mhz": 1140,
"p95_graphics_clock_mhz": 1140,
"avg_memory_clock_mhz": 2619,
"p95_memory_clock_mhz": 2619,
"avg_usage_pct": 100,
"avg_mem_usage_pct": 100,
"clock_cv_pct": 0,
"power_cv_pct": 0.007417817041191886,
"temp_cv_pct": 1.0206207261596576,
"clock_drift_pct": 0
},
"teraops_per_sec": 328.3913539584,
"weighted_teraops_per_sec": 328.3913539584,
"ecc": {
"corrected": 0,
"uncorrected": 0
}
},
{
"precision": "fp64",
"status": "FAILED",
"steady": {
"duration_sec": 281.842154315,
"samples": 2,
"avg_temp_c": 81.5,
"p95_temp_c": 81.95,
"avg_power_w": 680.315,
"p95_power_w": 697.4375,
"avg_graphics_clock_mhz": 1117.5,
"p95_graphics_clock_mhz": 1151.25,
"avg_memory_clock_mhz": 2619,
"p95_memory_clock_mhz": 2619,
"avg_usage_pct": 100,
"avg_mem_usage_pct": 88.5,
"clock_cv_pct": 3.3557046979865772,
"power_cv_pct": 2.796498680758183,
"temp_cv_pct": 0.6134969325153374,
"clock_drift_pct": 0
},
"ecc": {
"corrected": 0,
"uncorrected": 0
},
"notes": "precision phase failed"
},
{
"precision": "fp4",
"status": "FAILED",
"steady": {
"duration_sec": 323.470255195,
"samples": 2,
"avg_temp_c": 83.5,
"p95_temp_c": 83.95,
"avg_power_w": 699.105,
"p95_power_w": 699.5145,
"avg_graphics_clock_mhz": 1147.5,
"p95_graphics_clock_mhz": 1208.25,
"avg_memory_clock_mhz": 2619,
"p95_memory_clock_mhz": 2619,
"avg_usage_pct": 100,
"avg_mem_usage_pct": 61,
"clock_cv_pct": 5.88235294117647,
"power_cv_pct": 0.06508321353730613,
"temp_cv_pct": 0.5988023952095809,
"clock_drift_pct": 0
},
"ecc": {
"corrected": 0,
"uncorrected": 0
},
"notes": "precision phase failed"
}
],
"precision_failures": [
"fp64:FAILED",
"fp4:FAILED"
],
"cooldown": {
"duration_sec": 0,
"samples": 0,
"avg_temp_c": 0,
"p95_temp_c": 0,
"avg_power_w": 0,
"p95_power_w": 0,
"avg_graphics_clock_mhz": 0,
"p95_graphics_clock_mhz": 0,
"avg_memory_clock_mhz": 0,
"p95_memory_clock_mhz": 0,
"avg_usage_pct": 0,
"avg_mem_usage_pct": 0,
"clock_cv_pct": 0,
"power_cv_pct": 0,
"temp_cv_pct": 0,
"clock_drift_pct": 0
},
"throttle_counters": {
"sw_power_cap_us": 100036,
"sw_thermal_slowdown_us": 100036,
"sync_boost_us": 0,
"hw_thermal_slowdown_us": 0,
"hw_power_brake_slowdown_us": 0
},
"ecc": {
"corrected": 0,
"uncorrected": 0
},
"scores": {
"compute_score": 1272.966076764433,
"synthetic_score": 1272.966076764433,
"power_sustain_score": 81.87262810051264,
"thermal_sustain_score": 100,
"stability_score": 0,
"thermal_throttle_pct": 100,
"power_cap_throttle_pct": 100,
"temp_headroom_c": 0,
"interconnect_score": 8.54,
"server_quality_score": 54.56178843015379,
"composite_score": 1272.966076764433
},
"degradation_reasons": [
"power_capped",
"thermal_limited",
"low_sm_clock_vs_target"
],
"notes": [
"parallel warmup failed: exit status 1"
]
},
{
"index": 2,
"uuid": "GPU-0182c11c-0c2c-aafd-0ada-113b64147ee6",
"name": "NVIDIA H100 80GB HBM3",
"bus_id": "00000000:C8:00.0",
"vbios": "96.00.74.00.01",
"status": "PARTIAL",
"power_limit_w": 700,
"default_power_limit_w": 700,
"max_graphics_clock_mhz": 1980,
"max_memory_clock_mhz": 2619,
"locked_graphics_clock_mhz": 1980,
"locked_memory_clock_mhz": 2619,
"baseline": {
"duration_sec": 18.67612165,
"samples": 1,
"avg_temp_c": 43,
"p95_temp_c": 43,
"avg_power_w": 122.08,
"p95_power_w": 122.08,
"avg_graphics_clock_mhz": 1830,
"p95_graphics_clock_mhz": 1830,
"avg_memory_clock_mhz": 2619,
"p95_memory_clock_mhz": 2619,
"avg_usage_pct": 0,
"avg_mem_usage_pct": 0,
"clock_cv_pct": 0,
"power_cv_pct": 0,
"temp_cv_pct": 0,
"clock_drift_pct": 0
},
"steady": {
"duration_sec": 0,
"samples": 0,
"avg_temp_c": 0,
"p95_temp_c": 0,
"avg_power_w": 0,
"p95_power_w": 0,
"avg_graphics_clock_mhz": 0,
"p95_graphics_clock_mhz": 0,
"avg_memory_clock_mhz": 0,
"p95_memory_clock_mhz": 0,
"avg_usage_pct": 0,
"avg_mem_usage_pct": 0,
"clock_cv_pct": 0,
"power_cv_pct": 0,
"temp_cv_pct": 0,
"clock_drift_pct": 0
},
"precision_steady": [
{
"precision": "int8",
"status": "OK",
"steady": {
"duration_sec": 53.666271418,
"samples": 2,
"avg_temp_c": 53,
"p95_temp_c": 60.2,
"avg_power_w": 447.94,
"p95_power_w": 672.9129999999999,
"avg_graphics_clock_mhz": 1522.5,
"p95_graphics_clock_mhz": 1799.25,
"avg_memory_clock_mhz": 2619,
"p95_memory_clock_mhz": 2619,
"avg_usage_pct": 50,
"avg_mem_usage_pct": 43,
"clock_cv_pct": 20.19704433497537,
"power_cv_pct": 55.804348796713846,
"temp_cv_pct": 15.09433962264151,
"clock_drift_pct": 0
},
"teraops_per_sec": 1149.59849619456,
"weighted_teraops_per_sec": 287.39962404864,
"ecc": {
"corrected": 0,
"uncorrected": 0
}
},
{
"precision": "fp8",
"status": "OK",
"steady": {
"duration_sec": 94.749526187,
"samples": 2,
"avg_temp_c": 69,
"p95_temp_c": 70.8,
"avg_power_w": 699.225,
"p95_power_w": 699.3285000000001,
"avg_graphics_clock_mhz": 1140,
"p95_graphics_clock_mhz": 1180.5,
"avg_memory_clock_mhz": 2619,
"p95_memory_clock_mhz": 2619,
"avg_usage_pct": 100,
"avg_mem_usage_pct": 73.5,
"clock_cv_pct": 3.9473684210526314,
"power_cv_pct": 0.016446780363975698,
"temp_cv_pct": 2.898550724637681,
"clock_drift_pct": 0
},
"teraops_per_sec": 1417.5994524991488,
"weighted_teraops_per_sec": 354.3998631247872,
"ecc": {
"corrected": 0,
"uncorrected": 0
}
},
{
"precision": "fp16",
"status": "OK",
"steady": {
"duration_sec": 160.278286598,
"samples": 3,
"avg_temp_c": 75.33333333333333,
"p95_temp_c": 76.9,
"avg_power_w": 699.0933333333334,
"p95_power_w": 699.518,
"avg_graphics_clock_mhz": 1115,
"p95_graphics_clock_mhz": 1123.5,
"avg_memory_clock_mhz": 2619,
"p95_memory_clock_mhz": 2619,
"avg_usage_pct": 100,
"avg_mem_usage_pct": 64,
"clock_cv_pct": 0.6341764853691009,
"power_cv_pct": 0.050873430990042545,
"temp_cv_pct": 2.2562033245985775,
"clock_drift_pct": 0
},
"teraops_per_sec": 605.7576061293909,
"weighted_teraops_per_sec": 302.87880306469543,
"ecc": {
"corrected": 0,
"uncorrected": 0
}
},
{
"precision": "fp32",
"status": "OK",
"steady": {
"duration_sec": 234.413351493,
"samples": 3,
"avg_temp_c": 78.66666666666667,
"p95_temp_c": 79.9,
"avg_power_w": 698.96,
"p95_power_w": 699.181,
"avg_graphics_clock_mhz": 1150,
"p95_graphics_clock_mhz": 1167,
"avg_memory_clock_mhz": 2619,
"p95_memory_clock_mhz": 2619,
"avg_usage_pct": 100,
"avg_mem_usage_pct": 100,
"clock_cv_pct": 1.2297509238026914,
"power_cv_pct": 0.027170690803947822,
"temp_cv_pct": 1.5854480452431954,
"clock_drift_pct": 0
},
"teraops_per_sec": 328.7990403072,
"weighted_teraops_per_sec": 328.7990403072,
"ecc": {
"corrected": 0,
"uncorrected": 0
}
},
{
"precision": "fp64",
"status": "FAILED",
"steady": {
"duration_sec": 281.842154315,
"samples": 2,
"avg_temp_c": 81,
"p95_temp_c": 81.9,
"avg_power_w": 697.7,
"p95_power_w": 698.861,
"avg_graphics_clock_mhz": 1170,
"p95_graphics_clock_mhz": 1210.5,
"avg_memory_clock_mhz": 2619,
"p95_memory_clock_mhz": 2619,
"avg_usage_pct": 100,
"avg_mem_usage_pct": 85,
"clock_cv_pct": 3.8461538461538463,
"power_cv_pct": 0.18489322058191493,
"temp_cv_pct": 1.2345679012345678,
"clock_drift_pct": 0
},
"ecc": {
"corrected": 0,
"uncorrected": 0
},
"notes": "precision phase failed"
},
{
"precision": "fp4",
"status": "FAILED",
"steady": {
"duration_sec": 323.470255195,
"samples": 2,
"avg_temp_c": 82.5,
"p95_temp_c": 82.95,
"avg_power_w": 697.71,
"p95_power_w": 698.268,
"avg_graphics_clock_mhz": 1192.5,
"p95_graphics_clock_mhz": 1212.75,
"avg_memory_clock_mhz": 2619,
"p95_memory_clock_mhz": 2619,
"avg_usage_pct": 100,
"avg_mem_usage_pct": 64.5,
"clock_cv_pct": 1.8867924528301887,
"power_cv_pct": 0.08886213469779773,
"temp_cv_pct": 0.6060606060606061,
"clock_drift_pct": 0
},
"ecc": {
"corrected": 0,
"uncorrected": 0
},
"notes": "precision phase failed"
}
],
"precision_failures": [
"fp64:FAILED",
"fp4:FAILED"
],
"cooldown": {
"duration_sec": 0,
"samples": 0,
"avg_temp_c": 0,
"p95_temp_c": 0,
"avg_power_w": 0,
"p95_power_w": 0,
"avg_graphics_clock_mhz": 0,
"p95_graphics_clock_mhz": 0,
"avg_memory_clock_mhz": 0,
"p95_memory_clock_mhz": 0,
"avg_usage_pct": 0,
"avg_mem_usage_pct": 0,
"clock_cv_pct": 0,
"power_cv_pct": 0,
"temp_cv_pct": 0,
"clock_drift_pct": 0
},
"throttle_counters": {
"sw_power_cap_us": 99976,
"sw_thermal_slowdown_us": 99976,
"sync_boost_us": 0,
"hw_thermal_slowdown_us": 0,
"hw_power_brake_slowdown_us": 0
},
"ecc": {
"corrected": 0,
"uncorrected": 0
},
"scores": {
"compute_score": 1273.4773305453227,
"synthetic_score": 1273.4773305453227,
"power_sustain_score": 83.14921020461449,
"thermal_sustain_score": 100,
"stability_score": 0,
"thermal_throttle_pct": 100,
"power_cap_throttle_pct": 100,
"temp_headroom_c": 0,
"interconnect_score": 8.54,
"server_quality_score": 54.944763061384336,
"composite_score": 1273.4773305453227
},
"degradation_reasons": [
"power_capped",
"thermal_limited",
"low_sm_clock_vs_target"
],
"notes": [
"parallel warmup failed: exit status 1"
]
},
{
"index": 3,
"uuid": "GPU-9ee0af22-3dda-6f5c-1a13-35c63f324216",
"name": "NVIDIA H100 80GB HBM3",
"bus_id": "00000000:D8:00.0",
"vbios": "96.00.74.00.01",
"status": "PARTIAL",
"power_limit_w": 700,
"default_power_limit_w": 700,
"max_graphics_clock_mhz": 1980,
"max_memory_clock_mhz": 2619,
"locked_graphics_clock_mhz": 1980,
"locked_memory_clock_mhz": 2619,
"baseline": {
"duration_sec": 18.67612165,
"samples": 1,
"avg_temp_c": 41,
"p95_temp_c": 41,
"avg_power_w": 98.14,
"p95_power_w": 98.14,
"avg_graphics_clock_mhz": 1830,
"p95_graphics_clock_mhz": 1830,
"avg_memory_clock_mhz": 2619,
"p95_memory_clock_mhz": 2619,
"avg_usage_pct": 0,
"avg_mem_usage_pct": 0,
"clock_cv_pct": 0,
"power_cv_pct": 0,
"temp_cv_pct": 0,
"clock_drift_pct": 0
},
"steady": {
"duration_sec": 0,
"samples": 0,
"avg_temp_c": 0,
"p95_temp_c": 0,
"avg_power_w": 0,
"p95_power_w": 0,
"avg_graphics_clock_mhz": 0,
"p95_graphics_clock_mhz": 0,
"avg_memory_clock_mhz": 0,
"p95_memory_clock_mhz": 0,
"avg_usage_pct": 0,
"avg_mem_usage_pct": 0,
"clock_cv_pct": 0,
"power_cv_pct": 0,
"temp_cv_pct": 0,
"clock_drift_pct": 0
},
"precision_steady": [
{
"precision": "int8",
"status": "OK",
"steady": {
"duration_sec": 53.666271418,
"samples": 2,
"avg_temp_c": 51,
"p95_temp_c": 58.2,
"avg_power_w": 445.47,
"p95_power_w": 672.252,
"avg_graphics_clock_mhz": 1522.5,
"p95_graphics_clock_mhz": 1799.25,
"avg_memory_clock_mhz": 2619,
"p95_memory_clock_mhz": 2619,
"avg_usage_pct": 50,
"avg_mem_usage_pct": 42,
"clock_cv_pct": 20.19704433497537,
"power_cv_pct": 56.56497631714819,
"temp_cv_pct": 15.686274509803921,
"clock_drift_pct": 0
},
"teraops_per_sec": 1149.59849619456,
"weighted_teraops_per_sec": 287.39962404864,
"ecc": {
"corrected": 0,
"uncorrected": 0
}
},
{
"precision": "fp8",
"status": "OK",
"steady": {
"duration_sec": 94.749526187,
"samples": 2,
"avg_temp_c": 67,
"p95_temp_c": 68.8,
"avg_power_w": 698.51,
"p95_power_w": 699.3470000000001,
"avg_graphics_clock_mhz": 1162.5,
"p95_graphics_clock_mhz": 1196.25,
"avg_memory_clock_mhz": 2619,
"p95_memory_clock_mhz": 2619,
"avg_usage_pct": 100,
"avg_mem_usage_pct": 73.5,
"clock_cv_pct": 3.225806451612903,
"power_cv_pct": 0.13314054201085265,
"temp_cv_pct": 2.9850746268656714,
"clock_drift_pct": 0
},
"teraops_per_sec": 1417.5994524991488,
"weighted_teraops_per_sec": 354.3998631247872,
"ecc": {
"corrected": 0,
"uncorrected": 0
}
},
{
"precision": "fp16",
"status": "OK",
"steady": {
"duration_sec": 160.278286598,
"samples": 3,
"avg_temp_c": 73.66666666666667,
"p95_temp_c": 74.9,
"avg_power_w": 699.1333333333332,
"p95_power_w": 699.4029999999999,
"avg_graphics_clock_mhz": 1140,
"p95_graphics_clock_mhz": 1153.5,
"avg_memory_clock_mhz": 2619,
"p95_memory_clock_mhz": 2619,
"avg_usage_pct": 100,
"avg_mem_usage_pct": 62,
"clock_cv_pct": 1.07433760648385,
"power_cv_pct": 0.03630428114961699,
"temp_cv_pct": 1.6930576410741816,
"clock_drift_pct": 0
},
"teraops_per_sec": 605.7576061293909,
"weighted_teraops_per_sec": 302.87880306469543,
"ecc": {
"corrected": 0,
"uncorrected": 0
}
},
{
"precision": "fp32",
"status": "OK",
"steady": {
"duration_sec": 234.413351493,
"samples": 3,
"avg_temp_c": 76.33333333333333,
"p95_temp_c": 77,
"avg_power_w": 699.0499999999998,
"p95_power_w": 699.372,
"avg_graphics_clock_mhz": 1185,
"p95_graphics_clock_mhz": 1198.5,
"avg_memory_clock_mhz": 2619,
"p95_memory_clock_mhz": 2619,
"avg_usage_pct": 100,
"avg_mem_usage_pct": 100,
"clock_cv_pct": 1.0335399758578812,
"power_cv_pct": 0.04093033637817591,
"temp_cv_pct": 1.2351210151730088,
"clock_drift_pct": 0
},
"teraops_per_sec": 328.7990403072,
"weighted_teraops_per_sec": 328.7990403072,
"ecc": {
"corrected": 0,
"uncorrected": 0
}
},
{
"precision": "fp64",
"status": "FAILED",
"steady": {
"duration_sec": 281.842154315,
"samples": 2,
"avg_temp_c": 78.5,
"p95_temp_c": 78.95,
"avg_power_w": 697.9100000000001,
"p95_power_w": 698.63,
"avg_graphics_clock_mhz": 1155,
"p95_graphics_clock_mhz": 1168.5,
"avg_memory_clock_mhz": 2619,
"p95_memory_clock_mhz": 2619,
"avg_usage_pct": 100,
"avg_mem_usage_pct": 86.5,
"clock_cv_pct": 1.2987012987012987,
"power_cv_pct": 0.11462796062529715,
"temp_cv_pct": 0.6369426751592357,
"clock_drift_pct": 0
},
"ecc": {
"corrected": 0,
"uncorrected": 0
},
"notes": "precision phase failed"
},
{
"precision": "fp4",
"status": "FAILED",
"steady": {
"duration_sec": 323.470255195,
"samples": 2,
"avg_temp_c": 80,
"p95_temp_c": 80,
"avg_power_w": 696.935,
"p95_power_w": 697.1645000000001,
"avg_graphics_clock_mhz": 1185,
"p95_graphics_clock_mhz": 1198.5,
"avg_memory_clock_mhz": 2619,
"p95_memory_clock_mhz": 2619,
"avg_usage_pct": 100,
"avg_mem_usage_pct": 70.5,
"clock_cv_pct": 1.2658227848101267,
"power_cv_pct": 0.036588778006564786,
"temp_cv_pct": 0,
"clock_drift_pct": 0
},
"ecc": {
"corrected": 0,
"uncorrected": 0
},
"notes": "precision phase failed"
}
],
"precision_failures": [
"fp64:FAILED",
"fp4:FAILED"
],
"cooldown": {
"duration_sec": 0,
"samples": 0,
"avg_temp_c": 0,
"p95_temp_c": 0,
"avg_power_w": 0,
"p95_power_w": 0,
"avg_graphics_clock_mhz": 0,
"p95_graphics_clock_mhz": 0,
"avg_memory_clock_mhz": 0,
"p95_memory_clock_mhz": 0,
"avg_usage_pct": 0,
"avg_mem_usage_pct": 0,
"clock_cv_pct": 0,
"power_cv_pct": 0,
"temp_cv_pct": 0,
"clock_drift_pct": 0
},
"throttle_counters": {
"sw_power_cap_us": 199978,
"sw_thermal_slowdown_us": 199978,
"sync_boost_us": 0,
"hw_thermal_slowdown_us": 0,
"hw_power_brake_slowdown_us": 0
},
"ecc": {
"corrected": 0,
"uncorrected": 0
},
"scores": {
"compute_score": 1273.4773305453227,
"synthetic_score": 1273.4773305453227,
"power_sustain_score": 83.15253738424808,
"thermal_sustain_score": 100,
"stability_score": 0,
"thermal_throttle_pct": 100,
"power_cap_throttle_pct": 100,
"temp_headroom_c": 0,
"interconnect_score": 8.54,
"server_quality_score": 54.94576121527442,
"composite_score": 1273.4773305453227
},
"degradation_reasons": [
"power_capped",
"thermal_limited",
"low_sm_clock_vs_target"
],
"notes": [
"parallel warmup failed: exit status 1"
]
}
],
"interconnect": {
"status": "OK",
"attempted": true,
"supported": true,
"selected_gpu_indices": [
0,
1,
2,
3
],
"avg_algbw_gbps": 5.6375,
"max_algbw_gbps": 5.69,
"avg_busbw_gbps": 8.4575,
"max_busbw_gbps": 8.54
},
"server_power": {
"available": false,
"notes": [
"IPMI power reading unavailable; server-side power characterization skipped"
]
}
}