Fix combined gpu burn profile capacity for fp4

This commit is contained in:
2026-04-14 00:00:40 +03:00
parent bf182daa89
commit 0fb8f2777f

View File

@@ -33,7 +33,6 @@ typedef void *CUstream;
#define CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR 75
#define CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR 76
#define MAX_STRESS_STREAMS 16
#define MAX_CUBLAS_PROFILES 5
#define MIN_PROFILE_BUDGET_BYTES ((size_t)4u * 1024u * 1024u)
#define MIN_STREAM_BUDGET_BYTES ((size_t)64u * 1024u * 1024u)
@@ -689,6 +688,8 @@ static const struct profile_desc k_profiles[] = {
#endif
};
#define PROFILE_COUNT ((int)(sizeof(k_profiles) / sizeof(k_profiles[0])))
static int load_cublaslt(struct cublaslt_api *api) {
memset(api, 0, sizeof(*api));
api->lib = dlopen("libcublasLt.so.13", RTLD_NOW | RTLD_LOCAL);
@@ -1124,7 +1125,7 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
const char *precision_filter,
struct stress_report *report) {
struct cublaslt_api cublas;
struct prepared_profile prepared[MAX_STRESS_STREAMS * MAX_CUBLAS_PROFILES];
struct prepared_profile prepared[MAX_STRESS_STREAMS * PROFILE_COUNT];
cublasLtHandle_t handle = NULL;
CUcontext ctx = NULL;
CUstream streams[MAX_STRESS_STREAMS] = {0};
@@ -1134,7 +1135,7 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
int active = 0;
int mp_count = 0;
int stream_count = 1;
int profile_count = (int)(sizeof(k_profiles) / sizeof(k_profiles[0]));
int profile_count = PROFILE_COUNT;
int prepared_count = 0;
size_t requested_budget = 0;
size_t total_budget = 0;