refactor(webui): redesign Burn tab and fix gpu-burn memory defaults
- Burn tab: replace 6 flat cards with 3 grouped cards (GPU Stress, Compute Stress, Platform Thermal Cycling) + global Burn Profile - Run All button at top enqueues all enabled tests across all cards - GPU Stress: tool checkboxes enabled/disabled via new /api/gpu/tools endpoint based on driver status (/dev/nvidia0, /dev/kfd) - Compute Stress: checkboxes for cpu/memory-stress/stressapptest - Platform Thermal Cycling: component checkboxes (cpu/nvidia/amd) with platform_components param wired through to PlatformStressOptions - bee-gpu-burn: default size-mb changed from 64 to 0 (auto); script now queries nvidia-smi memory.total per GPU and uses 95% of it - platform_stress: removed hardcoded --size-mb 64; respects Components field to selectively run CPU and/or GPU load goroutines Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -106,9 +106,10 @@ type taskParams struct {
|
||||
GPUIndices []int `json:"gpu_indices,omitempty"`
|
||||
ExcludeGPUIndices []int `json:"exclude_gpu_indices,omitempty"`
|
||||
Loader string `json:"loader,omitempty"`
|
||||
BurnProfile string `json:"burn_profile,omitempty"`
|
||||
DisplayName string `json:"display_name,omitempty"`
|
||||
Device string `json:"device,omitempty"` // for install
|
||||
BurnProfile string `json:"burn_profile,omitempty"`
|
||||
DisplayName string `json:"display_name,omitempty"`
|
||||
Device string `json:"device,omitempty"` // for install
|
||||
PlatformComponents []string `json:"platform_components,omitempty"`
|
||||
}
|
||||
|
||||
type persistedTask struct {
|
||||
@@ -550,6 +551,7 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
|
||||
break
|
||||
}
|
||||
opts := resolvePlatformStressPreset(t.params.BurnProfile)
|
||||
opts.Components = t.params.PlatformComponents
|
||||
archive, err = a.RunPlatformStress(ctx, "", opts, j.append)
|
||||
case "audit":
|
||||
if a == nil {
|
||||
|
||||
Reference in New Issue
Block a user