Fix combined gpu burn profile capacity for fp4

2026-04-14 00:00:40 +03:00
parent bf182daa89
commit 0fb8f2777f
1 changed files with 4 additions and 3 deletions
--- a/iso/builder/bee-gpu-stress.c
+++ b/iso/builder/bee-gpu-stress.c
@@ -33,7 +33,6 @@ typedef void *CUstream;
 #define CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR 75
 #define CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR 76
 #define MAX_STRESS_STREAMS 16
-#define MAX_CUBLAS_PROFILES 5
 #define MIN_PROFILE_BUDGET_BYTES ((size_t)4u * 1024u * 1024u)
 #define MIN_STREAM_BUDGET_BYTES ((size_t)64u * 1024u * 1024u)

@@ -689,6 +688,8 @@ static const struct profile_desc k_profiles[] = {
 #endif
 };

+#define PROFILE_COUNT ((int)(sizeof(k_profiles) / sizeof(k_profiles[0])))
+
 static int load_cublaslt(struct cublaslt_api *api) {
    memset(api, 0, sizeof(*api));
    api->lib = dlopen("libcublasLt.so.13", RTLD_NOW | RTLD_LOCAL);
@@ -1124,7 +1125,7 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
                               const char *precision_filter,
                               struct stress_report *report) {
    struct cublaslt_api cublas;
-    struct prepared_profile prepared[MAX_STRESS_STREAMS * MAX_CUBLAS_PROFILES];
+    struct prepared_profile prepared[MAX_STRESS_STREAMS * PROFILE_COUNT];
    cublasLtHandle_t handle = NULL;
    CUcontext ctx = NULL;
    CUstream streams[MAX_STRESS_STREAMS] = {0};
@@ -1134,7 +1135,7 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
    int active = 0;
    int mp_count = 0;
    int stream_count = 1;
-    int profile_count = (int)(sizeof(k_profiles) / sizeof(k_profiles[0]));
+    int profile_count = PROFILE_COUNT;
    int prepared_count = 0;
    size_t requested_budget = 0;
    size_t total_budget = 0;