From abaeaea13f2adbeb36102b3f8e147760123259bd Mon Sep 17 00:00:00 2001 From: Mikhail Chusavitin Date: Thu, 2 Jul 2026 19:18:28 +0300 Subject: [PATCH] add Confidential Computing readiness check + collect nvidia-smi conf-compute -q New read-only "Check" step reports whether this server can run NVIDIA Confidential Computing: CPU TEE support (Intel TDX / AMD SEV-SNP, via dmesg and kvm_amd sysfs params) and GPU firmware CC capability (via `nvidia-smi conf-compute -q`). Also collect that command's output into the techdump export bundle. Co-Authored-By: Claude Sonnet 5 --- audit/internal/app/app.go | 1 + audit/internal/app/app_packs.go | 16 ++ audit/internal/app/app_test.go | 4 + .../internal/platform/confidential_compute.go | 248 ++++++++++++++++++ audit/internal/platform/techdump.go | 1 + audit/internal/webui/api.go | 2 +- audit/internal/webui/page_validate.go | 10 +- audit/internal/webui/server.go | 1 + audit/internal/webui/task_runner.go | 6 + audit/internal/webui/tasks.go | 10 + 10 files changed, 296 insertions(+), 3 deletions(-) create mode 100644 audit/internal/platform/confidential_compute.go diff --git a/audit/internal/app/app.go b/audit/internal/app/app.go index c9b09ee..e470812 100644 --- a/audit/internal/app/app.go +++ b/audit/internal/app/app.go @@ -134,6 +134,7 @@ type satRunner interface { ResetNvidiaGPU(index int) (string, error) RunMemoryAcceptancePack(ctx context.Context, baseDir string, sizeMB, passes int, logFunc func(string)) (string, error) RunStorageAcceptancePack(ctx context.Context, baseDir string, extended bool, logFunc func(string)) (string, error) + RunConfidentialComputingCheckPack(ctx context.Context, baseDir string, logFunc func(string)) (string, error) RunCPUAcceptancePack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) ListNvidiaGPUs() ([]platform.NvidiaGPU, error) DetectGPUVendor() string diff --git a/audit/internal/app/app_packs.go b/audit/internal/app/app_packs.go index 59ffc4c..0d255f3 100644 --- a/audit/internal/app/app_packs.go +++ b/audit/internal/app/app_packs.go @@ -206,6 +206,22 @@ func (a *App) RunStorageAcceptancePackResult(baseDir string) (ActionResult, erro return ActionResult{Title: "Storage SAT", Body: satResultBody(path)}, err } +func (a *App) RunConfidentialComputingCheckPackCtx(ctx context.Context, baseDir string, logFunc func(string)) (string, error) { + if strings.TrimSpace(baseDir) == "" { + baseDir = DefaultSATBaseDir + } + return a.sat.RunConfidentialComputingCheckPack(ctx, baseDir, logFunc) +} + +func (a *App) RunConfidentialComputingCheckPack(baseDir string, logFunc func(string)) (string, error) { + return a.RunConfidentialComputingCheckPackCtx(context.Background(), baseDir, logFunc) +} + +func (a *App) RunConfidentialComputingCheckPackResult(baseDir string) (ActionResult, error) { + path, err := a.RunConfidentialComputingCheckPack(baseDir, nil) + return ActionResult{Title: "Confidential Computing Check", Body: satResultBody(path)}, err +} + func (a *App) DetectGPUVendor() string { return a.sat.DetectGPUVendor() } diff --git a/audit/internal/app/app_test.go b/audit/internal/app/app_test.go index 850cc19..6ec3869 100644 --- a/audit/internal/app/app_test.go +++ b/audit/internal/app/app_test.go @@ -243,6 +243,10 @@ func (f fakeSAT) RunStorageAcceptancePack(_ context.Context, baseDir string, _ b return f.runStorageFn(baseDir) } +func (f fakeSAT) RunConfidentialComputingCheckPack(_ context.Context, baseDir string, _ func(string)) (string, error) { + return "", nil +} + func (f fakeSAT) RunCPUAcceptancePack(_ context.Context, baseDir string, durationSec int, _ func(string)) (string, error) { if f.runCPUFn != nil { return f.runCPUFn(baseDir, durationSec) diff --git a/audit/internal/platform/confidential_compute.go b/audit/internal/platform/confidential_compute.go new file mode 100644 index 0000000..95551b9 --- /dev/null +++ b/audit/internal/platform/confidential_compute.go @@ -0,0 +1,248 @@ +package platform + +import ( + "bytes" + "context" + "fmt" + "os" + "path/filepath" + "strings" + "time" +) + +// ConfidentialComputingStatus summarizes whether this server can run NVIDIA +// Confidential Computing: CPU-side TEE support (Intel TDX / AMD SEV-SNP) and +// GPU firmware CC capability, as reported by `nvidia-smi conf-compute -q`. +type ConfidentialComputingStatus struct { + CollectedAt time.Time `json:"collected_at"` + + // GPU-reported fields, parsed from `nvidia-smi conf-compute -q`. + NvidiaSMIAvailable bool `json:"nvidia_smi_available"` + CCState string `json:"cc_state,omitempty"` // ON / OFF + MultiGPUMode string `json:"multi_gpu_mode,omitempty"` // Protected PCIe / ... + CPUCCCapability string `json:"cpu_cc_capability,omitempty"` // e.g. "INTEL TDX", "AMD SEV-SNP", "NONE" + GPUCCCapability string `json:"gpu_cc_capability,omitempty"` // e.g. "CC Capable", "Not Capable" + CCGPUsReadyState string `json:"cc_gpus_ready_state,omitempty"` // Ready / Not Ready + + // Host-side evidence that the CPU's TEE is actually active in the running + // kernel (BIOS + kernel cmdline + firmware), independent of what the GPU + // driver reports. Used as a fallback when the NVIDIA driver isn't loaded. + HostAMDSEVSupported bool `json:"host_amd_sev_supported"` + HostAMDSEVESSupported bool `json:"host_amd_sev_es_supported"` + HostAMDSEVSNPActive bool `json:"host_amd_sev_snp_active"` + HostIntelTDXActive bool `json:"host_intel_tdx_active"` + + // GPUCanRunCC is true when the GPU firmware reports CC-capable. + GPUCanRunCC bool `json:"gpu_can_run_cc"` + // CPUCanRunCC is true when either the GPU driver or the host kernel + // reports an active/available CPU TEE (SEV-SNP or TDX). + CPUCanRunCC bool `json:"cpu_can_run_cc"` + // Ready is true when both the CPU and the GPU support Confidential + // Computing, regardless of whether CC mode is currently enabled. + Ready bool `json:"ready"` + + Notes []string `json:"notes,omitempty"` +} + +// RunConfidentialComputingCheckPack runs a read-only check of whether this +// server can run NVIDIA Confidential Computing: it queries the GPU driver +// (`nvidia-smi conf-compute -q`) and inspects host kernel/dmesg evidence of +// AMD SEV-SNP / Intel TDX support. It changes nothing on the system. +func (s *System) RunConfidentialComputingCheckPack(ctx context.Context, baseDir string, logFunc func(string)) (string, error) { + if ctx == nil { + ctx = context.Background() + } + if baseDir == "" { + baseDir = "/var/log/bee-sat" + } + ts := time.Now().UTC().Format("20060102-150405") + runDir := filepath.Join(baseDir, "confidential-computing-"+ts) + if err := os.MkdirAll(runDir, 0755); err != nil { + return "", err + } + verboseLog := filepath.Join(runDir, "verbose.log") + + status := ConfidentialComputingStatus{CollectedAt: time.Now().UTC()} + + // GPU firmware / driver state. + ccOut, ccErr := runSATCommandCtx(ctx, verboseLog, "nvidia-smi-conf-compute-q", []string{"nvidia-smi", "conf-compute", "-q"}, nil, logFunc) + _ = os.WriteFile(filepath.Join(runDir, "01-nvidia-smi-conf-compute-q.log"), ccOut, 0644) + if ccErr == nil { + status.NvidiaSMIAvailable = true + fields := parseConfComputeFields(ccOut) + status.CCState = fields["CC State"] + status.MultiGPUMode = fields["Multi-GPU Mode"] + status.CPUCCCapability = fields["CPU CC Capabilities"] + status.GPUCCCapability = fields["GPU CC Capabilities"] + status.CCGPUsReadyState = fields["CC GPUs Ready State"] + } else { + status.Notes = append(status.Notes, "nvidia-smi conf-compute -q unavailable (no NVIDIA driver, or GPU not present): "+firstLine(string(ccOut))) + } + + // Host kernel evidence, independent of the GPU driver. + dmesgOut, _ := runSATCommandCtx(ctx, verboseLog, "dmesg", []string{"dmesg"}, nil, nil) + ccDmesgLines := filterConfComputeDmesgLines(dmesgOut) + _ = os.WriteFile(filepath.Join(runDir, "02-dmesg-cc-relevant.log"), []byte(strings.Join(ccDmesgLines, "\n")+"\n"), 0644) + + lowerDmesg := strings.ToLower(strings.Join(ccDmesgLines, "\n")) + status.HostAMDSEVSNPActive = strings.Contains(lowerDmesg, "sev-snp enabled") + status.HostIntelTDXActive = strings.Contains(lowerDmesg, "tdx module") && strings.Contains(lowerDmesg, "module initialized") || + strings.Contains(lowerDmesg, "virt/tdx: module initialized") + + for i, path := range []string{ + "/sys/module/kvm_amd/parameters/sev", + "/sys/module/kvm_amd/parameters/sev_es", + "/sys/module/kvm_amd/parameters/sev_snp", + } { + name := fmt.Sprintf("sysfs-%s", filepath.Base(path)) + out, err := runSATCommandCtx(ctx, verboseLog, name, []string{"cat", path}, nil, nil) + _ = os.WriteFile(filepath.Join(runDir, fmt.Sprintf("03-%02d-%s.log", i+1, name)), out, 0644) + if err != nil { + continue + } + val := strings.TrimSpace(string(out)) + switch filepath.Base(path) { + case "sev": + status.HostAMDSEVSupported = strings.EqualFold(val, "Y") + case "sev_es": + status.HostAMDSEVESSupported = strings.EqualFold(val, "Y") + case "sev_snp": + if strings.EqualFold(val, "Y") { + status.HostAMDSEVSNPActive = true + } + } + } + + status.GPUCanRunCC = strings.EqualFold(strings.TrimSpace(status.GPUCCCapability), "CC Capable") + cpuCapReported := strings.TrimSpace(status.CPUCCCapability) + status.CPUCanRunCC = status.HostAMDSEVSNPActive || status.HostIntelTDXActive || + (cpuCapReported != "" && !strings.EqualFold(cpuCapReported, "NONE")) + status.Ready = status.CPUCanRunCC && status.GPUCanRunCC + + if !status.NvidiaSMIAvailable { + status.Notes = append(status.Notes, "GPU CC capability unknown — install the NVIDIA driver to query it with `nvidia-smi conf-compute -q`.") + } + + summary := renderConfidentialComputingSummary(status) + if err := os.WriteFile(filepath.Join(runDir, "summary.txt"), []byte(summary), 0644); err != nil { + return "", err + } + report := renderConfidentialComputingReport(status) + if err := os.WriteFile(filepath.Join(runDir, "confidential-computing-report.txt"), []byte(report), 0644); err != nil { + return "", err + } + + return runDir, nil +} + +// parseConfComputeFields parses the indented "Key : Value" block emitted by +// `nvidia-smi conf-compute -q`, e.g.: +// +// CC State : OFF +// Multi-GPU Mode : Protected PCIe +// CPU CC Capabilities : INTEL TDX +// GPU CC Capabilities : CC Capable +// CC GPUs Ready State : Not Ready +func parseConfComputeFields(out []byte) map[string]string { + fields := map[string]string{} + for _, line := range strings.Split(string(out), "\n") { + idx := strings.Index(line, ":") + if idx < 0 { + continue + } + key := strings.TrimSpace(line[:idx]) + val := strings.TrimSpace(line[idx+1:]) + if key == "" || val == "" { + continue + } + fields[key] = val + } + return fields +} + +// filterConfComputeDmesgLines returns the dmesg lines relevant to CPU +// Confidential Computing support (AMD SEV/SEV-ES/SEV-SNP, Intel TDX). +func filterConfComputeDmesgLines(dmesgOut []byte) []string { + var lines []string + scanner := bytes.Split(dmesgOut, []byte("\n")) + for _, raw := range scanner { + lower := strings.ToLower(string(raw)) + if strings.Contains(lower, "sev") || strings.Contains(lower, "tdx") { + lines = append(lines, string(raw)) + } + } + return lines +} + +func renderConfidentialComputingSummary(status ConfidentialComputingStatus) string { + var b strings.Builder + fmt.Fprintf(&b, "run_at_utc=%s\n", status.CollectedAt.Format(time.RFC3339)) + fmt.Fprintf(&b, "nvidia_smi_available=%t\n", status.NvidiaSMIAvailable) + fmt.Fprintf(&b, "cc_state=%s\n", status.CCState) + fmt.Fprintf(&b, "multi_gpu_mode=%s\n", status.MultiGPUMode) + fmt.Fprintf(&b, "cpu_cc_capability=%s\n", status.CPUCCCapability) + fmt.Fprintf(&b, "gpu_cc_capability=%s\n", status.GPUCCCapability) + fmt.Fprintf(&b, "cc_gpus_ready_state=%s\n", status.CCGPUsReadyState) + fmt.Fprintf(&b, "host_amd_sev_supported=%t\n", status.HostAMDSEVSupported) + fmt.Fprintf(&b, "host_amd_sev_es_supported=%t\n", status.HostAMDSEVESSupported) + fmt.Fprintf(&b, "host_amd_sev_snp_active=%t\n", status.HostAMDSEVSNPActive) + fmt.Fprintf(&b, "host_intel_tdx_active=%t\n", status.HostIntelTDXActive) + fmt.Fprintf(&b, "cpu_can_run_cc=%t\n", status.CPUCanRunCC) + fmt.Fprintf(&b, "gpu_can_run_cc=%t\n", status.GPUCanRunCC) + fmt.Fprintf(&b, "ready=%t\n", status.Ready) + if status.Ready { + fmt.Fprintln(&b, "overall_status=OK") + } else { + fmt.Fprintln(&b, "overall_status=NOT_READY") + } + return b.String() +} + +func renderConfidentialComputingReport(status ConfidentialComputingStatus) string { + var b strings.Builder + line := strings.Repeat("=", 80) + b.WriteString(line + "\n") + b.WriteString("Confidential Computing Readiness\n") + b.WriteString(line + "\n\n") + + verdict := "NOT READY" + if status.Ready { + verdict = "READY" + } + fmt.Fprintf(&b, "Verdict: %s\n\n", verdict) + + b.WriteString("-- CPU ----------------------------------------------------------------------\n") + fmt.Fprintf(&b, " Reported by GPU driver : %s\n", nonEmptyOr(status.CPUCCCapability, "unknown")) + fmt.Fprintf(&b, " AMD SEV supported : %t\n", status.HostAMDSEVSupported) + fmt.Fprintf(&b, " AMD SEV-ES supported : %t\n", status.HostAMDSEVESSupported) + fmt.Fprintf(&b, " AMD SEV-SNP active : %t\n", status.HostAMDSEVSNPActive) + fmt.Fprintf(&b, " Intel TDX active : %t\n", status.HostIntelTDXActive) + fmt.Fprintf(&b, " Can run CC : %t\n\n", status.CPUCanRunCC) + + b.WriteString("-- GPU ----------------------------------------------------------------------\n") + fmt.Fprintf(&b, " nvidia-smi available : %t\n", status.NvidiaSMIAvailable) + fmt.Fprintf(&b, " GPU CC Capabilities : %s\n", nonEmptyOr(status.GPUCCCapability, "unknown")) + fmt.Fprintf(&b, " CC State (current) : %s\n", nonEmptyOr(status.CCState, "unknown")) + fmt.Fprintf(&b, " Multi-GPU Mode : %s\n", nonEmptyOr(status.MultiGPUMode, "unknown")) + fmt.Fprintf(&b, " CC GPUs Ready State : %s\n", nonEmptyOr(status.CCGPUsReadyState, "unknown")) + fmt.Fprintf(&b, " Can run CC : %t\n\n", status.GPUCanRunCC) + + if len(status.Notes) > 0 { + b.WriteString("-- Notes ----------------------------------------------------------------------\n") + for _, n := range status.Notes { + fmt.Fprintf(&b, " - %s\n", n) + } + b.WriteString("\n") + } + + fmt.Fprintf(&b, "Collected : %s\n", status.CollectedAt.Format("2006-01-02 15:04:05 UTC")) + b.WriteString(line + "\n") + return b.String() +} + +func nonEmptyOr(v, fallback string) string { + if strings.TrimSpace(v) == "" { + return fallback + } + return v +} diff --git a/audit/internal/platform/techdump.go b/audit/internal/platform/techdump.go index 7cfe457..298be1a 100644 --- a/audit/internal/platform/techdump.go +++ b/audit/internal/platform/techdump.go @@ -38,6 +38,7 @@ var techDumpNvidiaCommands = []struct { }{ {Name: "nvidia-smi", Args: []string{"-q"}, File: "nvidia-smi-q.txt"}, {Name: "nvidia-smi", Args: []string{"--query-gpu=index,pci.bus_id,serial,vbios_version,temperature.gpu,power.draw,ecc.errors.uncorrected.aggregate.total,ecc.errors.corrected.aggregate.total,clocks_throttle_reasons.hw_slowdown", "--format=csv,noheader,nounits"}, File: "nvidia-smi-query.csv"}, + {Name: "nvidia-smi", Args: []string{"conf-compute", "-q"}, File: "nvidia-smi-conf-compute-q.txt"}, } type lsblkDumpRoot struct { diff --git a/audit/internal/webui/api.go b/audit/internal/webui/api.go index 81a4a05..19672d2 100644 --- a/audit/internal/webui/api.go +++ b/audit/internal/webui/api.go @@ -135,7 +135,7 @@ func defaultTaskPriority(target string, params taskParams) int { return taskPriorityBurn case "nvidia", "nvidia-targeted-stress", "nvidia-targeted-power", "nvidia-pulse", "nvidia-interconnect", "nvidia-bandwidth", "memory", "storage", "cpu", - "amd", "amd-mem", "amd-bandwidth": + "amd", "amd-mem", "amd-bandwidth", "confidential-computing": if params.StressMode { return taskPriorityValidateStress } diff --git a/audit/internal/webui/page_validate.go b/audit/internal/webui/page_validate.go index 226232f..1423e4b 100644 --- a/audit/internal/webui/page_validate.go +++ b/audit/internal/webui/page_validate.go @@ -676,6 +676,12 @@ func renderCheck(opts HandlerOptions) string { `lsblk; NVMe: nvme id-ctrl, nvme smart-log; SATA/SAS: smartctl -H -A`, `Seconds — instantaneous device query, no wear counters incremented.`, )) + + renderSATCard("confidential-computing", "Confidential Computing", "runSAT('confidential-computing')", "", renderValidateCardBody( + inv.NVIDIA, + `Checks whether this server can run NVIDIA Confidential Computing: CPU TEE support (Intel TDX / AMD SEV-SNP) and GPU firmware CC capability. Read-only — changes nothing.`, + `nvidia-smi conf-compute -q, dmesg, /sys/module/kvm_amd/parameters/*`, + `Seconds — read-only query only.`, + )) + `
@@ -737,7 +743,7 @@ func renderCheck(opts HandlerOptions) string {