WIP: checkpoint current tree
This commit is contained in:
@@ -232,6 +232,54 @@ func (h *handler) handleAPISATRun(target string) http.HandlerFunc {
|
||||
}
|
||||
}
|
||||
|
||||
func (h *handler) handleAPIBenchmarkNvidiaRun(w http.ResponseWriter, r *http.Request) {
|
||||
if h.opts.App == nil {
|
||||
writeError(w, http.StatusServiceUnavailable, "app not configured")
|
||||
return
|
||||
}
|
||||
|
||||
var body struct {
|
||||
Profile string `json:"profile"`
|
||||
SizeMB int `json:"size_mb"`
|
||||
GPUIndices []int `json:"gpu_indices"`
|
||||
ExcludeGPUIndices []int `json:"exclude_gpu_indices"`
|
||||
RunNCCL *bool `json:"run_nccl"`
|
||||
DisplayName string `json:"display_name"`
|
||||
}
|
||||
if r.Body != nil {
|
||||
if err := json.NewDecoder(r.Body).Decode(&body); err != nil && !errors.Is(err, io.EOF) {
|
||||
writeError(w, http.StatusBadRequest, "invalid request body")
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
runNCCL := true
|
||||
if body.RunNCCL != nil {
|
||||
runNCCL = *body.RunNCCL
|
||||
}
|
||||
t := &Task{
|
||||
ID: newJobID("benchmark-nvidia"),
|
||||
Name: taskDisplayName("nvidia-benchmark", "", ""),
|
||||
Target: "nvidia-benchmark",
|
||||
Priority: 15,
|
||||
Status: TaskPending,
|
||||
CreatedAt: time.Now(),
|
||||
params: taskParams{
|
||||
GPUIndices: body.GPUIndices,
|
||||
ExcludeGPUIndices: body.ExcludeGPUIndices,
|
||||
SizeMB: body.SizeMB,
|
||||
BenchmarkProfile: body.Profile,
|
||||
RunNCCL: runNCCL,
|
||||
DisplayName: body.DisplayName,
|
||||
},
|
||||
}
|
||||
if strings.TrimSpace(body.DisplayName) != "" {
|
||||
t.Name = body.DisplayName
|
||||
}
|
||||
globalQueue.enqueue(t)
|
||||
writeJSON(w, map[string]string{"task_id": t.ID, "job_id": t.ID})
|
||||
}
|
||||
|
||||
func (h *handler) handleAPISATStream(w http.ResponseWriter, r *http.Request) {
|
||||
id := r.URL.Query().Get("job_id")
|
||||
if id == "" {
|
||||
@@ -491,6 +539,22 @@ func (h *handler) handleAPIExportUSBBundle(w http.ResponseWriter, r *http.Reques
|
||||
|
||||
// ── GPU presence ──────────────────────────────────────────────────────────────
|
||||
|
||||
func (h *handler) handleAPIGNVIDIAGPUs(w http.ResponseWriter, _ *http.Request) {
|
||||
if h.opts.App == nil {
|
||||
writeError(w, http.StatusServiceUnavailable, "app not configured")
|
||||
return
|
||||
}
|
||||
gpus, err := h.opts.App.ListNvidiaGPUs()
|
||||
if err != nil {
|
||||
writeError(w, http.StatusInternalServerError, err.Error())
|
||||
return
|
||||
}
|
||||
if gpus == nil {
|
||||
gpus = []platform.NvidiaGPU{}
|
||||
}
|
||||
writeJSON(w, gpus)
|
||||
}
|
||||
|
||||
func (h *handler) handleAPIGPUPresence(w http.ResponseWriter, r *http.Request) {
|
||||
if h.opts.App == nil {
|
||||
writeError(w, http.StatusServiceUnavailable, "app not configured")
|
||||
@@ -516,8 +580,10 @@ func (h *handler) handleAPIGPUTools(w http.ResponseWriter, _ *http.Request) {
|
||||
_, amdErr := os.Stat("/dev/kfd")
|
||||
nvidiaUp := nvidiaErr == nil
|
||||
amdUp := amdErr == nil
|
||||
_, dcgmErr := exec.LookPath("dcgmi")
|
||||
writeJSON(w, []toolEntry{
|
||||
{ID: "bee-gpu-burn", Available: nvidiaUp, Vendor: "nvidia"},
|
||||
{ID: "dcgm", Available: nvidiaUp && dcgmErr == nil, Vendor: "nvidia"},
|
||||
{ID: "john", Available: nvidiaUp, Vendor: "nvidia"},
|
||||
{ID: "nccl", Available: nvidiaUp, Vendor: "nvidia"},
|
||||
{ID: "rvs", Available: amdUp, Vendor: "amd"},
|
||||
|
||||
Reference in New Issue
Block a user