Compare commits
4 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fc7fe0b08e | ||
|
|
3cf75a541a | ||
|
|
1f750d3edd | ||
|
|
b2b0444131 |
@@ -145,14 +145,23 @@ func New(platform *platform.System) *App {
|
|||||||
// ApplySATOverlay parses a raw audit JSON, overlays the latest SAT results,
|
// ApplySATOverlay parses a raw audit JSON, overlays the latest SAT results,
|
||||||
// and returns the updated JSON. Used by the web UI to serve always-fresh status.
|
// and returns the updated JSON. Used by the web UI to serve always-fresh status.
|
||||||
func ApplySATOverlay(auditJSON []byte) ([]byte, error) {
|
func ApplySATOverlay(auditJSON []byte) ([]byte, error) {
|
||||||
var snap schema.HardwareIngestRequest
|
snap, err := readAuditSnapshot(auditJSON)
|
||||||
if err := json.Unmarshal(auditJSON, &snap); err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
applyLatestSATStatuses(&snap.Hardware, DefaultSATBaseDir)
|
applyLatestSATStatuses(&snap.Hardware, DefaultSATBaseDir)
|
||||||
return json.MarshalIndent(snap, "", " ")
|
return json.MarshalIndent(snap, "", " ")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func readAuditSnapshot(auditJSON []byte) (schema.HardwareIngestRequest, error) {
|
||||||
|
var snap schema.HardwareIngestRequest
|
||||||
|
if err := json.Unmarshal(auditJSON, &snap); err != nil {
|
||||||
|
return schema.HardwareIngestRequest{}, err
|
||||||
|
}
|
||||||
|
collector.NormalizeSnapshot(&snap.Hardware, snap.CollectedAt)
|
||||||
|
return snap, nil
|
||||||
|
}
|
||||||
|
|
||||||
func (a *App) RunAudit(runtimeMode runtimeenv.Mode, output string) (string, error) {
|
func (a *App) RunAudit(runtimeMode runtimeenv.Mode, output string) (string, error) {
|
||||||
if runtimeMode == runtimeenv.ModeLiveCD {
|
if runtimeMode == runtimeenv.ModeLiveCD {
|
||||||
if err := a.runtime.CaptureTechnicalDump(DefaultTechDumpDir); err != nil {
|
if err := a.runtime.CaptureTechnicalDump(DefaultTechDumpDir); err != nil {
|
||||||
@@ -276,6 +285,9 @@ func (a *App) ExportLatestAudit(target platform.RemovableTarget) (string, error)
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
if normalized, normErr := ApplySATOverlay(data); normErr == nil {
|
||||||
|
data = normalized
|
||||||
|
}
|
||||||
if err := os.WriteFile(tmpPath, data, 0644); err != nil {
|
if err := os.WriteFile(tmpPath, data, 0644); err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
@@ -733,6 +745,7 @@ func (a *App) HealthSummaryResult() ActionResult {
|
|||||||
if err := json.Unmarshal(raw, &snapshot); err != nil {
|
if err := json.Unmarshal(raw, &snapshot); err != nil {
|
||||||
return ActionResult{Title: "Health summary", Body: "Audit JSON is unreadable."}
|
return ActionResult{Title: "Health summary", Body: "Audit JSON is unreadable."}
|
||||||
}
|
}
|
||||||
|
collector.NormalizeSnapshot(&snapshot.Hardware, snapshot.CollectedAt)
|
||||||
|
|
||||||
summary := collector.BuildHealthSummary(snapshot.Hardware)
|
summary := collector.BuildHealthSummary(snapshot.Hardware)
|
||||||
var body strings.Builder
|
var body strings.Builder
|
||||||
@@ -767,6 +780,7 @@ func (a *App) MainBanner() string {
|
|||||||
if err := json.Unmarshal(raw, &snapshot); err != nil {
|
if err := json.Unmarshal(raw, &snapshot); err != nil {
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
collector.NormalizeSnapshot(&snapshot.Hardware, snapshot.CollectedAt)
|
||||||
|
|
||||||
var lines []string
|
var lines []string
|
||||||
if system := formatSystemLine(snapshot.Hardware.Board); system != "" {
|
if system := formatSystemLine(snapshot.Hardware.Board); system != "" {
|
||||||
|
|||||||
@@ -660,13 +660,50 @@ func TestHealthSummaryResultIncludesCompactSATSummary(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestApplySATOverlayFiltersIgnoredLegacyDevices(t *testing.T) {
|
||||||
|
tmp := t.TempDir()
|
||||||
|
oldSATBaseDir := DefaultSATBaseDir
|
||||||
|
DefaultSATBaseDir = filepath.Join(tmp, "sat")
|
||||||
|
t.Cleanup(func() { DefaultSATBaseDir = oldSATBaseDir })
|
||||||
|
|
||||||
|
raw := `{
|
||||||
|
"collected_at": "2026-03-15T10:00:00Z",
|
||||||
|
"hardware": {
|
||||||
|
"board": {"serial_number": "SRV123"},
|
||||||
|
"storage": [
|
||||||
|
{"model": "Virtual HDisk0", "serial_number": "AAAABBBBCCCC3"},
|
||||||
|
{"model": "PASCARI", "serial_number": "DISK1", "status": "OK"}
|
||||||
|
],
|
||||||
|
"pcie_devices": [
|
||||||
|
{"device_class": "Co-processor", "model": "402xx Series QAT", "status": "OK"},
|
||||||
|
{"device_class": "VideoController", "model": "NVIDIA H100", "status": "OK"}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}`
|
||||||
|
|
||||||
|
got, err := ApplySATOverlay([]byte(raw))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("ApplySATOverlay error: %v", err)
|
||||||
|
}
|
||||||
|
text := string(got)
|
||||||
|
if contains(text, "Virtual HDisk0") {
|
||||||
|
t.Fatalf("overlaid audit should drop virtual hdisk:\n%s", text)
|
||||||
|
}
|
||||||
|
if contains(text, "\"device_class\": \"Co-processor\"") {
|
||||||
|
t.Fatalf("overlaid audit should drop co-processors:\n%s", text)
|
||||||
|
}
|
||||||
|
if !contains(text, "PASCARI") || !contains(text, "NVIDIA H100") {
|
||||||
|
t.Fatalf("overlaid audit should keep real devices:\n%s", text)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestBuildSupportBundleIncludesExportDirContents(t *testing.T) {
|
func TestBuildSupportBundleIncludesExportDirContents(t *testing.T) {
|
||||||
tmp := t.TempDir()
|
tmp := t.TempDir()
|
||||||
exportDir := filepath.Join(tmp, "export")
|
exportDir := filepath.Join(tmp, "export")
|
||||||
if err := os.MkdirAll(filepath.Join(exportDir, "bee-sat", "memory-run"), 0755); err != nil {
|
if err := os.MkdirAll(filepath.Join(exportDir, "bee-sat", "memory-run"), 0755); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
if err := os.WriteFile(filepath.Join(exportDir, "bee-audit.json"), []byte(`{"ok":true}`), 0644); err != nil {
|
if err := os.WriteFile(filepath.Join(exportDir, "bee-audit.json"), []byte(`{"collected_at":"2026-03-15T10:00:00Z","hardware":{"board":{"serial_number":"SRV123"},"storage":[{"model":"Virtual HDisk0","serial_number":"AAAABBBBCCCC3"},{"model":"PASCARI","serial_number":"DISK1"}],"pcie_devices":[{"device_class":"Co-processor","model":"402xx Series QAT"},{"device_class":"VideoController","model":"NVIDIA H100"}]}}`), 0644); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
if err := os.WriteFile(filepath.Join(exportDir, "bee-sat", "memory-run", "verbose.log"), []byte("sat verbose"), 0644); err != nil {
|
if err := os.WriteFile(filepath.Join(exportDir, "bee-sat", "memory-run", "verbose.log"), []byte("sat verbose"), 0644); err != nil {
|
||||||
@@ -698,6 +735,7 @@ func TestBuildSupportBundleIncludesExportDirContents(t *testing.T) {
|
|||||||
|
|
||||||
tr := tar.NewReader(gzr)
|
tr := tar.NewReader(gzr)
|
||||||
var names []string
|
var names []string
|
||||||
|
var auditJSON string
|
||||||
for {
|
for {
|
||||||
hdr, err := tr.Next()
|
hdr, err := tr.Next()
|
||||||
if errors.Is(err, io.EOF) {
|
if errors.Is(err, io.EOF) {
|
||||||
@@ -707,6 +745,13 @@ func TestBuildSupportBundleIncludesExportDirContents(t *testing.T) {
|
|||||||
t.Fatalf("read tar entry: %v", err)
|
t.Fatalf("read tar entry: %v", err)
|
||||||
}
|
}
|
||||||
names = append(names, hdr.Name)
|
names = append(names, hdr.Name)
|
||||||
|
if contains(hdr.Name, "/export/bee-audit.json") {
|
||||||
|
body, err := io.ReadAll(tr)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("read audit entry: %v", err)
|
||||||
|
}
|
||||||
|
auditJSON = string(body)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
var foundRaw bool
|
var foundRaw bool
|
||||||
@@ -721,6 +766,12 @@ func TestBuildSupportBundleIncludesExportDirContents(t *testing.T) {
|
|||||||
if !foundRaw {
|
if !foundRaw {
|
||||||
t.Fatalf("support bundle missing raw SAT log, names=%v", names)
|
t.Fatalf("support bundle missing raw SAT log, names=%v", names)
|
||||||
}
|
}
|
||||||
|
if contains(auditJSON, "Virtual HDisk0") || contains(auditJSON, "\"device_class\": \"Co-processor\"") {
|
||||||
|
t.Fatalf("support bundle should normalize ignored devices:\n%s", auditJSON)
|
||||||
|
}
|
||||||
|
if !contains(auditJSON, "PASCARI") || !contains(auditJSON, "NVIDIA H100") {
|
||||||
|
t.Fatalf("support bundle should keep real devices:\n%s", auditJSON)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestMainBanner(t *testing.T) {
|
func TestMainBanner(t *testing.T) {
|
||||||
@@ -734,6 +785,10 @@ func TestMainBanner(t *testing.T) {
|
|||||||
product := "PowerEdge R760"
|
product := "PowerEdge R760"
|
||||||
cpuModel := "Intel Xeon Gold 6430"
|
cpuModel := "Intel Xeon Gold 6430"
|
||||||
memoryType := "DDR5"
|
memoryType := "DDR5"
|
||||||
|
memorySerialA := "DIMM-A"
|
||||||
|
memorySerialB := "DIMM-B"
|
||||||
|
storageSerialA := "DISK-A"
|
||||||
|
storageSerialB := "DISK-B"
|
||||||
gpuClass := "VideoController"
|
gpuClass := "VideoController"
|
||||||
gpuModel := "NVIDIA H100"
|
gpuModel := "NVIDIA H100"
|
||||||
|
|
||||||
@@ -749,12 +804,12 @@ func TestMainBanner(t *testing.T) {
|
|||||||
{Model: &cpuModel},
|
{Model: &cpuModel},
|
||||||
},
|
},
|
||||||
Memory: []schema.HardwareMemory{
|
Memory: []schema.HardwareMemory{
|
||||||
{Present: &trueValue, SizeMB: intPtr(524288), Type: &memoryType},
|
{Present: &trueValue, SizeMB: intPtr(524288), Type: &memoryType, SerialNumber: &memorySerialA},
|
||||||
{Present: &trueValue, SizeMB: intPtr(524288), Type: &memoryType},
|
{Present: &trueValue, SizeMB: intPtr(524288), Type: &memoryType, SerialNumber: &memorySerialB},
|
||||||
},
|
},
|
||||||
Storage: []schema.HardwareStorage{
|
Storage: []schema.HardwareStorage{
|
||||||
{Present: &trueValue, SizeGB: intPtr(3840)},
|
{Present: &trueValue, SizeGB: intPtr(3840), SerialNumber: &storageSerialA},
|
||||||
{Present: &trueValue, SizeGB: intPtr(3840)},
|
{Present: &trueValue, SizeGB: intPtr(3840), SerialNumber: &storageSerialB},
|
||||||
},
|
},
|
||||||
PCIeDevices: []schema.HardwarePCIeDevice{
|
PCIeDevices: []schema.HardwarePCIeDevice{
|
||||||
{DeviceClass: &gpuClass, Model: &gpuModel},
|
{DeviceClass: &gpuClass, Model: &gpuModel},
|
||||||
|
|||||||
@@ -247,7 +247,7 @@ func copyDirContents(srcDir, dstDir string) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func copyExportDirForSupportBundle(srcDir, dstDir string) error {
|
func copyExportDirForSupportBundle(srcDir, dstDir string) error {
|
||||||
return copyDirContentsFiltered(srcDir, dstDir, func(rel string, info os.FileInfo) bool {
|
if err := copyDirContentsFiltered(srcDir, dstDir, func(rel string, info os.FileInfo) bool {
|
||||||
cleanRel := filepath.ToSlash(strings.TrimPrefix(filepath.Clean(rel), "./"))
|
cleanRel := filepath.ToSlash(strings.TrimPrefix(filepath.Clean(rel), "./"))
|
||||||
if cleanRel == "" {
|
if cleanRel == "" {
|
||||||
return true
|
return true
|
||||||
@@ -259,7 +259,25 @@ func copyExportDirForSupportBundle(srcDir, dstDir string) error {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
return true
|
return true
|
||||||
})
|
}); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return normalizeSupportBundleAuditJSON(filepath.Join(dstDir, "bee-audit.json"))
|
||||||
|
}
|
||||||
|
|
||||||
|
func normalizeSupportBundleAuditJSON(path string) error {
|
||||||
|
data, err := os.ReadFile(path)
|
||||||
|
if err != nil {
|
||||||
|
if os.IsNotExist(err) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
normalized, err := ApplySATOverlay(data)
|
||||||
|
if err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return os.WriteFile(path, normalized, 0644)
|
||||||
}
|
}
|
||||||
|
|
||||||
func copyDirContentsFiltered(srcDir, dstDir string, keep func(rel string, info os.FileInfo) bool) error {
|
func copyDirContentsFiltered(srcDir, dstDir string, keep func(rel string, info os.FileInfo) bool) error {
|
||||||
|
|||||||
@@ -1,10 +1,18 @@
|
|||||||
package collector
|
package collector
|
||||||
|
|
||||||
import "bee/audit/internal/schema"
|
import (
|
||||||
|
"bee/audit/internal/schema"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
func NormalizeSnapshot(snap *schema.HardwareSnapshot, collectedAt string) {
|
||||||
|
finalizeSnapshot(snap, collectedAt)
|
||||||
|
}
|
||||||
|
|
||||||
func finalizeSnapshot(snap *schema.HardwareSnapshot, collectedAt string) {
|
func finalizeSnapshot(snap *schema.HardwareSnapshot, collectedAt string) {
|
||||||
snap.Memory = filterMemory(snap.Memory)
|
snap.Memory = filterMemory(snap.Memory)
|
||||||
snap.Storage = filterStorage(snap.Storage)
|
snap.Storage = filterStorage(snap.Storage)
|
||||||
|
snap.PCIeDevices = filterPCIe(snap.PCIeDevices)
|
||||||
snap.PowerSupplies = filterPSUs(snap.PowerSupplies)
|
snap.PowerSupplies = filterPSUs(snap.PowerSupplies)
|
||||||
|
|
||||||
setComponentStatusMetadata(snap, collectedAt)
|
setComponentStatusMetadata(snap, collectedAt)
|
||||||
@@ -33,11 +41,25 @@ func filterStorage(disks []schema.HardwareStorage) []schema.HardwareStorage {
|
|||||||
if disk.SerialNumber == nil || *disk.SerialNumber == "" {
|
if disk.SerialNumber == nil || *disk.SerialNumber == "" {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
if disk.Model != nil && isVirtualHDiskModel(*disk.Model) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
out = append(out, disk)
|
out = append(out, disk)
|
||||||
}
|
}
|
||||||
return out
|
return out
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func filterPCIe(devs []schema.HardwarePCIeDevice) []schema.HardwarePCIeDevice {
|
||||||
|
out := make([]schema.HardwarePCIeDevice, 0, len(devs))
|
||||||
|
for _, dev := range devs {
|
||||||
|
if dev.DeviceClass != nil && strings.Contains(strings.ToLower(strings.TrimSpace(*dev.DeviceClass)), "co-processor") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
out = append(out, dev)
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
func filterPSUs(psus []schema.HardwarePowerSupply) []schema.HardwarePowerSupply {
|
func filterPSUs(psus []schema.HardwarePowerSupply) []schema.HardwarePowerSupply {
|
||||||
out := make([]schema.HardwarePowerSupply, 0, len(psus))
|
out := make([]schema.HardwarePowerSupply, 0, len(psus))
|
||||||
for _, psu := range psus {
|
for _, psu := range psus {
|
||||||
|
|||||||
@@ -10,6 +10,10 @@ func TestFinalizeSnapshotFiltersComponentsWithoutRequiredSerials(t *testing.T) {
|
|||||||
present := true
|
present := true
|
||||||
status := statusOK
|
status := statusOK
|
||||||
serial := "SN-1"
|
serial := "SN-1"
|
||||||
|
virtualModel := "Virtual HDisk1"
|
||||||
|
realModel := "PASCARI"
|
||||||
|
coProcessorClass := "Co-processor"
|
||||||
|
gpuClass := "VideoController"
|
||||||
|
|
||||||
snap := schema.HardwareSnapshot{
|
snap := schema.HardwareSnapshot{
|
||||||
Memory: []schema.HardwareMemory{
|
Memory: []schema.HardwareMemory{
|
||||||
@@ -17,9 +21,15 @@ func TestFinalizeSnapshotFiltersComponentsWithoutRequiredSerials(t *testing.T) {
|
|||||||
{Present: &present, HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status}},
|
{Present: &present, HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status}},
|
||||||
},
|
},
|
||||||
Storage: []schema.HardwareStorage{
|
Storage: []schema.HardwareStorage{
|
||||||
|
{Model: &virtualModel, SerialNumber: &serial, HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status}},
|
||||||
{SerialNumber: &serial, HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status}},
|
{SerialNumber: &serial, HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status}},
|
||||||
|
{Model: &realModel, SerialNumber: &serial, HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status}},
|
||||||
{HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status}},
|
{HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status}},
|
||||||
},
|
},
|
||||||
|
PCIeDevices: []schema.HardwarePCIeDevice{
|
||||||
|
{DeviceClass: &coProcessorClass, HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status}},
|
||||||
|
{DeviceClass: &gpuClass, HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status}},
|
||||||
|
},
|
||||||
PowerSupplies: []schema.HardwarePowerSupply{
|
PowerSupplies: []schema.HardwarePowerSupply{
|
||||||
{SerialNumber: &serial, HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status}},
|
{SerialNumber: &serial, HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status}},
|
||||||
{HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status}},
|
{HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status}},
|
||||||
@@ -31,9 +41,12 @@ func TestFinalizeSnapshotFiltersComponentsWithoutRequiredSerials(t *testing.T) {
|
|||||||
if len(snap.Memory) != 1 || snap.Memory[0].StatusCheckedAt == nil || *snap.Memory[0].StatusCheckedAt != collectedAt {
|
if len(snap.Memory) != 1 || snap.Memory[0].StatusCheckedAt == nil || *snap.Memory[0].StatusCheckedAt != collectedAt {
|
||||||
t.Fatalf("memory finalize mismatch: %+v", snap.Memory)
|
t.Fatalf("memory finalize mismatch: %+v", snap.Memory)
|
||||||
}
|
}
|
||||||
if len(snap.Storage) != 1 || snap.Storage[0].StatusCheckedAt == nil || *snap.Storage[0].StatusCheckedAt != collectedAt {
|
if len(snap.Storage) != 2 || snap.Storage[0].StatusCheckedAt == nil || *snap.Storage[0].StatusCheckedAt != collectedAt {
|
||||||
t.Fatalf("storage finalize mismatch: %+v", snap.Storage)
|
t.Fatalf("storage finalize mismatch: %+v", snap.Storage)
|
||||||
}
|
}
|
||||||
|
if len(snap.PCIeDevices) != 1 || snap.PCIeDevices[0].DeviceClass == nil || *snap.PCIeDevices[0].DeviceClass != gpuClass {
|
||||||
|
t.Fatalf("pcie finalize mismatch: %+v", snap.PCIeDevices)
|
||||||
|
}
|
||||||
if len(snap.PowerSupplies) != 1 || snap.PowerSupplies[0].StatusCheckedAt == nil || *snap.PowerSupplies[0].StatusCheckedAt != collectedAt {
|
if len(snap.PowerSupplies) != 1 || snap.PowerSupplies[0].StatusCheckedAt == nil || *snap.PowerSupplies[0].StatusCheckedAt != collectedAt {
|
||||||
t.Fatalf("psu finalize mismatch: %+v", snap.PowerSupplies)
|
t.Fatalf("psu finalize mismatch: %+v", snap.PowerSupplies)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -59,6 +59,7 @@ func shouldIncludePCIeDevice(class, vendor, device string) bool {
|
|||||||
"host bridge",
|
"host bridge",
|
||||||
"isa bridge",
|
"isa bridge",
|
||||||
"pci bridge",
|
"pci bridge",
|
||||||
|
"co-processor",
|
||||||
"performance counter",
|
"performance counter",
|
||||||
"performance counters",
|
"performance counters",
|
||||||
"ram memory",
|
"ram memory",
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ func TestShouldIncludePCIeDevice(t *testing.T) {
|
|||||||
{name: "audio", class: "Audio device", want: false},
|
{name: "audio", class: "Audio device", want: false},
|
||||||
{name: "host bridge", class: "Host bridge", want: false},
|
{name: "host bridge", class: "Host bridge", want: false},
|
||||||
{name: "pci bridge", class: "PCI bridge", want: false},
|
{name: "pci bridge", class: "PCI bridge", want: false},
|
||||||
|
{name: "co-processor", class: "Co-processor", want: false},
|
||||||
{name: "smbus", class: "SMBus", want: false},
|
{name: "smbus", class: "SMBus", want: false},
|
||||||
{name: "perf", class: "Performance counters", want: false},
|
{name: "perf", class: "Performance counters", want: false},
|
||||||
{name: "non essential instrumentation", class: "Non-Essential Instrumentation", want: false},
|
{name: "non essential instrumentation", class: "Non-Essential Instrumentation", want: false},
|
||||||
@@ -76,6 +77,20 @@ func TestParseLspci_filtersAMDChipsetNoise(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestParseLspci_filtersCoProcessors(t *testing.T) {
|
||||||
|
input := "" +
|
||||||
|
"Slot:\t0000:01:00.0\nClass:\tCo-processor\nVendor:\tIntel Corporation\nDevice:\t402xx Series QAT\n\n" +
|
||||||
|
"Slot:\t0000:65:00.0\nClass:\tVGA compatible controller\nVendor:\tNVIDIA Corporation\nDevice:\tH100\n\n"
|
||||||
|
|
||||||
|
devs := parseLspci(input)
|
||||||
|
if len(devs) != 1 {
|
||||||
|
t.Fatalf("expected 1 remaining device, got %d", len(devs))
|
||||||
|
}
|
||||||
|
if devs[0].Model == nil || *devs[0].Model != "H100" {
|
||||||
|
t.Fatalf("unexpected remaining device: %+v", devs[0])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestPCIeJSONUsesSlotNotBDF(t *testing.T) {
|
func TestPCIeJSONUsesSlotNotBDF(t *testing.T) {
|
||||||
input := "Slot:\t0000:65:00.0\nClass:\tVGA compatible controller\nVendor:\tNVIDIA Corporation\nDevice:\tH100\n\n"
|
input := "Slot:\t0000:65:00.0\nClass:\tVGA compatible controller\nVendor:\tNVIDIA Corporation\nDevice:\tH100\n\n"
|
||||||
|
|
||||||
|
|||||||
@@ -91,7 +91,11 @@ func discoverStorageDevices() []lsblkDevice {
|
|||||||
// These have zero reported size, a generic fake serial, and a model name that
|
// These have zero reported size, a generic fake serial, and a model name that
|
||||||
// starts with "Virtual HDisk".
|
// starts with "Virtual HDisk".
|
||||||
func isVirtualBMCDisk(dev lsblkDevice) bool {
|
func isVirtualBMCDisk(dev lsblkDevice) bool {
|
||||||
model := strings.ToLower(strings.TrimSpace(dev.Model))
|
return isVirtualHDiskModel(dev.Model)
|
||||||
|
}
|
||||||
|
|
||||||
|
func isVirtualHDiskModel(model string) bool {
|
||||||
|
model = strings.ToLower(strings.TrimSpace(model))
|
||||||
return strings.HasPrefix(model, "virtual hdisk")
|
return strings.HasPrefix(model, "virtual hdisk")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
64
audit/internal/platform/kill_workers.go
Normal file
64
audit/internal/platform/kill_workers.go
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
package platform
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"syscall"
|
||||||
|
)
|
||||||
|
|
||||||
|
// workerPatterns are substrings matched against /proc/<pid>/cmdline to identify
|
||||||
|
// bee test worker processes that should be killed by KillTestWorkers.
|
||||||
|
var workerPatterns = []string{
|
||||||
|
"bee-gpu-burn",
|
||||||
|
"stress-ng",
|
||||||
|
"stressapptest",
|
||||||
|
"memtester",
|
||||||
|
}
|
||||||
|
|
||||||
|
// KilledProcess describes a process that was sent SIGKILL.
|
||||||
|
type KilledProcess struct {
|
||||||
|
PID int `json:"pid"`
|
||||||
|
Name string `json:"name"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// KillTestWorkers scans /proc for running test worker processes and sends
|
||||||
|
// SIGKILL to each one found. It returns a list of killed processes.
|
||||||
|
// Errors for individual processes (e.g. already exited) are silently ignored.
|
||||||
|
func KillTestWorkers() []KilledProcess {
|
||||||
|
entries, err := os.ReadDir("/proc")
|
||||||
|
if err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var killed []KilledProcess
|
||||||
|
for _, e := range entries {
|
||||||
|
if !e.IsDir() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
pid, err := strconv.Atoi(e.Name())
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
cmdline, err := os.ReadFile(fmt.Sprintf("/proc/%d/cmdline", pid))
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// /proc/*/cmdline uses NUL bytes as argument separators.
|
||||||
|
args := strings.SplitN(strings.ReplaceAll(string(cmdline), "\x00", " "), " ", 2)
|
||||||
|
exe := strings.TrimSpace(args[0])
|
||||||
|
base := exe
|
||||||
|
if idx := strings.LastIndexByte(exe, '/'); idx >= 0 {
|
||||||
|
base = exe[idx+1:]
|
||||||
|
}
|
||||||
|
for _, pat := range workerPatterns {
|
||||||
|
if strings.Contains(base, pat) || strings.Contains(exe, pat) {
|
||||||
|
_ = syscall.Kill(pid, syscall.SIGKILL)
|
||||||
|
killed = append(killed, KilledProcess{PID: pid, Name: base})
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return killed
|
||||||
|
}
|
||||||
@@ -428,26 +428,6 @@ func (h *handler) handleAPIExportList(w http.ResponseWriter, r *http.Request) {
|
|||||||
writeJSON(w, entries)
|
writeJSON(w, entries)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (h *handler) handleAPIExportBundle(w http.ResponseWriter, r *http.Request) {
|
|
||||||
if globalQueue.hasActiveTarget("support-bundle") {
|
|
||||||
writeError(w, http.StatusConflict, "support bundle task is already pending or running")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
t := &Task{
|
|
||||||
ID: newJobID("support-bundle"),
|
|
||||||
Name: "Support Bundle",
|
|
||||||
Target: "support-bundle",
|
|
||||||
Status: TaskPending,
|
|
||||||
CreatedAt: time.Now(),
|
|
||||||
}
|
|
||||||
globalQueue.enqueue(t)
|
|
||||||
writeJSON(w, map[string]string{
|
|
||||||
"status": "queued",
|
|
||||||
"task_id": t.ID,
|
|
||||||
"job_id": t.ID,
|
|
||||||
"url": "/export/support.tar.gz",
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func (h *handler) handleAPIExportUSBTargets(w http.ResponseWriter, _ *http.Request) {
|
func (h *handler) handleAPIExportUSBTargets(w http.ResponseWriter, _ *http.Request) {
|
||||||
if h.opts.App == nil {
|
if h.opts.App == nil {
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
package webui
|
package webui
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/json"
|
|
||||||
"net/http/httptest"
|
"net/http/httptest"
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
@@ -65,42 +64,6 @@ func TestHandleAPISATRunDecodesBodyWithoutContentLength(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestHandleAPIExportBundleQueuesTask(t *testing.T) {
|
|
||||||
globalQueue.mu.Lock()
|
|
||||||
originalTasks := globalQueue.tasks
|
|
||||||
globalQueue.tasks = nil
|
|
||||||
globalQueue.mu.Unlock()
|
|
||||||
t.Cleanup(func() {
|
|
||||||
globalQueue.mu.Lock()
|
|
||||||
globalQueue.tasks = originalTasks
|
|
||||||
globalQueue.mu.Unlock()
|
|
||||||
})
|
|
||||||
|
|
||||||
h := &handler{opts: HandlerOptions{ExportDir: t.TempDir()}}
|
|
||||||
req := httptest.NewRequest("POST", "/api/export/bundle", nil)
|
|
||||||
rec := httptest.NewRecorder()
|
|
||||||
|
|
||||||
h.handleAPIExportBundle(rec, req)
|
|
||||||
|
|
||||||
if rec.Code != 200 {
|
|
||||||
t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
|
|
||||||
}
|
|
||||||
var body map[string]string
|
|
||||||
if err := json.Unmarshal(rec.Body.Bytes(), &body); err != nil {
|
|
||||||
t.Fatalf("decode response: %v", err)
|
|
||||||
}
|
|
||||||
if body["task_id"] == "" {
|
|
||||||
t.Fatalf("missing task_id in response: %v", body)
|
|
||||||
}
|
|
||||||
globalQueue.mu.Lock()
|
|
||||||
defer globalQueue.mu.Unlock()
|
|
||||||
if len(globalQueue.tasks) != 1 {
|
|
||||||
t.Fatalf("tasks=%d want 1", len(globalQueue.tasks))
|
|
||||||
}
|
|
||||||
if got := globalQueue.tasks[0].Target; got != "support-bundle" {
|
|
||||||
t.Fatalf("target=%q want support-bundle", got)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestPushFanRingsTracksByNameAndCarriesForwardMissingSamples(t *testing.T) {
|
func TestPushFanRingsTracksByNameAndCarriesForwardMissingSamples(t *testing.T) {
|
||||||
h := &handler{}
|
h := &handler{}
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import (
|
|||||||
"io"
|
"io"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"sort"
|
||||||
"strconv"
|
"strconv"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@@ -217,7 +218,9 @@ func (m *MetricsDB) loadSamples(query string, args ...any) ([]platform.LiveMetri
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Collect unique GPU indices and fan names from loaded data (preserve order)
|
// Collect unique GPU indices and fan/temp names from loaded data.
|
||||||
|
// Sort each list so that sample reconstruction is deterministic regardless
|
||||||
|
// of Go's non-deterministic map iteration order.
|
||||||
seenGPU := map[int]bool{}
|
seenGPU := map[int]bool{}
|
||||||
var gpuIndices []int
|
var gpuIndices []int
|
||||||
for k := range gpuData {
|
for k := range gpuData {
|
||||||
@@ -226,6 +229,8 @@ func (m *MetricsDB) loadSamples(query string, args ...any) ([]platform.LiveMetri
|
|||||||
gpuIndices = append(gpuIndices, k.idx)
|
gpuIndices = append(gpuIndices, k.idx)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
sort.Ints(gpuIndices)
|
||||||
|
|
||||||
seenFan := map[string]bool{}
|
seenFan := map[string]bool{}
|
||||||
var fanNames []string
|
var fanNames []string
|
||||||
for k := range fanData {
|
for k := range fanData {
|
||||||
@@ -234,6 +239,8 @@ func (m *MetricsDB) loadSamples(query string, args ...any) ([]platform.LiveMetri
|
|||||||
fanNames = append(fanNames, k.name)
|
fanNames = append(fanNames, k.name)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
sort.Strings(fanNames)
|
||||||
|
|
||||||
seenTemp := map[string]bool{}
|
seenTemp := map[string]bool{}
|
||||||
var tempNames []string
|
var tempNames []string
|
||||||
for k := range tempData {
|
for k := range tempData {
|
||||||
@@ -242,6 +249,7 @@ func (m *MetricsDB) loadSamples(query string, args ...any) ([]platform.LiveMetri
|
|||||||
tempNames = append(tempNames, k.name)
|
tempNames = append(tempNames, k.name)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
sort.Strings(tempNames)
|
||||||
|
|
||||||
samples := make([]platform.LiveMetricSample, len(sysRows))
|
samples := make([]platform.LiveMetricSample, len(sysRows))
|
||||||
for i, r := range sysRows {
|
for i, r := range sysRows {
|
||||||
|
|||||||
@@ -1178,73 +1178,46 @@ func listExportFiles(exportDir string) ([]string, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func renderSupportBundleInline() string {
|
func renderSupportBundleInline() string {
|
||||||
return `<button id="support-bundle-btn" class="btn btn-primary" onclick="supportBundleBuild()">Build Support Bundle</button>
|
return `<button id="support-bundle-btn" class="btn btn-primary" onclick="supportBundleDownload()">↓ Download Support Bundle</button>
|
||||||
<a id="support-bundle-download" class="btn btn-secondary" href="/export/support.tar.gz" style="display:none">↓ Download Support Bundle</a>
|
<div id="support-bundle-status" style="margin-top:10px;font-size:13px;color:var(--muted)"></div>
|
||||||
<div id="support-bundle-status" style="margin-top:12px;font-size:13px;color:var(--muted)">No support bundle built in this session.</div>
|
|
||||||
<div id="support-bundle-log" class="terminal" style="display:none;margin-top:12px;max-height:260px"></div>
|
|
||||||
<script>
|
<script>
|
||||||
(function(){
|
window.supportBundleDownload = function() {
|
||||||
var _supportBundleES = null;
|
|
||||||
window.supportBundleBuild = function() {
|
|
||||||
var btn = document.getElementById('support-bundle-btn');
|
var btn = document.getElementById('support-bundle-btn');
|
||||||
var status = document.getElementById('support-bundle-status');
|
var status = document.getElementById('support-bundle-status');
|
||||||
var log = document.getElementById('support-bundle-log');
|
|
||||||
var download = document.getElementById('support-bundle-download');
|
|
||||||
if (_supportBundleES) {
|
|
||||||
_supportBundleES.close();
|
|
||||||
_supportBundleES = null;
|
|
||||||
}
|
|
||||||
btn.disabled = true;
|
btn.disabled = true;
|
||||||
btn.textContent = 'Building...';
|
btn.textContent = 'Building...';
|
||||||
status.textContent = 'Queueing support bundle task...';
|
status.textContent = 'Collecting logs and export data\u2026';
|
||||||
status.style.color = 'var(--muted)';
|
status.style.color = 'var(--muted)';
|
||||||
log.style.display = '';
|
var filename = 'bee-support.tar.gz';
|
||||||
log.textContent = '';
|
fetch('/export/support.tar.gz')
|
||||||
download.style.display = 'none';
|
.then(function(r) {
|
||||||
|
if (!r.ok) throw new Error('HTTP ' + r.status);
|
||||||
fetch('/api/export/bundle', {method:'POST'}).then(function(r){
|
var cd = r.headers.get('Content-Disposition') || '';
|
||||||
return r.json().then(function(j){
|
var m = cd.match(/filename="?([^";]+)"?/);
|
||||||
if (!r.ok) throw new Error(j.error || r.statusText);
|
if (m) filename = m[1];
|
||||||
return j;
|
return r.blob();
|
||||||
});
|
})
|
||||||
}).then(function(data){
|
.then(function(blob) {
|
||||||
if (!data.task_id) throw new Error('missing task id');
|
var url = URL.createObjectURL(blob);
|
||||||
status.textContent = 'Building support bundle...';
|
var a = document.createElement('a');
|
||||||
_supportBundleES = new EventSource('/api/tasks/' + data.task_id + '/stream');
|
a.href = url;
|
||||||
_supportBundleES.onmessage = function(e) {
|
a.download = filename;
|
||||||
log.textContent += e.data + '\n';
|
document.body.appendChild(a);
|
||||||
log.scrollTop = log.scrollHeight;
|
a.click();
|
||||||
};
|
document.body.removeChild(a);
|
||||||
_supportBundleES.addEventListener('done', function(e) {
|
URL.revokeObjectURL(url);
|
||||||
_supportBundleES.close();
|
status.textContent = 'Download started.';
|
||||||
_supportBundleES = null;
|
|
||||||
btn.disabled = false;
|
|
||||||
btn.textContent = 'Build Support Bundle';
|
|
||||||
if (e.data) {
|
|
||||||
status.textContent = 'Error: ' + e.data;
|
|
||||||
status.style.color = 'var(--crit-fg)';
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
status.textContent = 'Support bundle ready.';
|
|
||||||
status.style.color = 'var(--ok-fg)';
|
status.style.color = 'var(--ok-fg)';
|
||||||
download.style.display = '';
|
})
|
||||||
});
|
.catch(function(e) {
|
||||||
_supportBundleES.onerror = function() {
|
status.textContent = 'Error: ' + e.message;
|
||||||
if (_supportBundleES) _supportBundleES.close();
|
|
||||||
_supportBundleES = null;
|
|
||||||
btn.disabled = false;
|
|
||||||
btn.textContent = 'Build Support Bundle';
|
|
||||||
status.textContent = 'Support bundle stream disconnected.';
|
|
||||||
status.style.color = 'var(--crit-fg)';
|
status.style.color = 'var(--crit-fg)';
|
||||||
};
|
})
|
||||||
}).catch(function(e){
|
.finally(function() {
|
||||||
btn.disabled = false;
|
btn.disabled = false;
|
||||||
btn.textContent = 'Build Support Bundle';
|
btn.textContent = '\u2195 Download Support Bundle';
|
||||||
status.textContent = 'Error: ' + e;
|
});
|
||||||
status.style.color = 'var(--crit-fg)';
|
|
||||||
});
|
|
||||||
};
|
};
|
||||||
})();
|
|
||||||
</script>`
|
</script>`
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1577,8 +1550,10 @@ func renderInstall() string {
|
|||||||
// ── Tasks ─────────────────────────────────────────────────────────────────────
|
// ── Tasks ─────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
func renderTasks() string {
|
func renderTasks() string {
|
||||||
return `<div style="display:flex;align-items:center;gap:12px;margin-bottom:16px">
|
return `<div style="display:flex;align-items:center;gap:12px;margin-bottom:16px;flex-wrap:wrap">
|
||||||
<button class="btn btn-danger btn-sm" onclick="cancelAll()">Cancel All</button>
|
<button class="btn btn-danger btn-sm" onclick="cancelAll()">Cancel All</button>
|
||||||
|
<button class="btn btn-sm" style="background:#b45309;color:#fff" onclick="killWorkers()" title="Send SIGKILL to all running test processes (bee-gpu-burn, stress-ng, stressapptest, memtester)">Kill Workers</button>
|
||||||
|
<span id="kill-toast" style="font-size:12px;color:var(--muted);display:none"></span>
|
||||||
<span style="font-size:12px;color:var(--muted)">Tasks run one at a time. Logs persist after navigation.</span>
|
<span style="font-size:12px;color:var(--muted)">Tasks run one at a time. Logs persist after navigation.</span>
|
||||||
</div>
|
</div>
|
||||||
<div class="card">
|
<div class="card">
|
||||||
@@ -1639,6 +1614,21 @@ function cancelTask(id) {
|
|||||||
function cancelAll() {
|
function cancelAll() {
|
||||||
fetch('/api/tasks/cancel-all',{method:'POST'}).then(()=>loadTasks());
|
fetch('/api/tasks/cancel-all',{method:'POST'}).then(()=>loadTasks());
|
||||||
}
|
}
|
||||||
|
function killWorkers() {
|
||||||
|
if (!confirm('Send SIGKILL to all running test workers (bee-gpu-burn, stress-ng, stressapptest, memtester)?\n\nThis will also cancel all queued and running tasks.')) return;
|
||||||
|
fetch('/api/tasks/kill-workers',{method:'POST'})
|
||||||
|
.then(r=>r.json())
|
||||||
|
.then(d=>{
|
||||||
|
loadTasks();
|
||||||
|
var toast = document.getElementById('kill-toast');
|
||||||
|
var parts = [];
|
||||||
|
if (d.cancelled > 0) parts.push(d.cancelled+' task'+(d.cancelled===1?'':'s')+' cancelled');
|
||||||
|
if (d.killed > 0) parts.push(d.killed+' process'+(d.killed===1?'':'es')+' killed');
|
||||||
|
toast.textContent = parts.length ? parts.join(', ')+'.' : 'No processes found.';
|
||||||
|
toast.style.display = '';
|
||||||
|
setTimeout(()=>{ toast.style.display='none'; }, 5000);
|
||||||
|
});
|
||||||
|
}
|
||||||
function setPriority(id, delta) {
|
function setPriority(id, delta) {
|
||||||
fetch('/api/tasks/'+id+'/priority',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({delta:delta})})
|
fetch('/api/tasks/'+id+'/priority',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({delta:delta})})
|
||||||
.then(()=>loadTasks());
|
.then(()=>loadTasks());
|
||||||
|
|||||||
@@ -128,7 +128,11 @@ type namedMetricsRing struct {
|
|||||||
Ring *metricsRing
|
Ring *metricsRing
|
||||||
}
|
}
|
||||||
|
|
||||||
const metricsChartWindow = 120
|
// metricsChartWindow is the number of samples kept in the live ring buffer.
|
||||||
|
// At metricsCollectInterval = 5 s this covers 30 minutes of live history.
|
||||||
|
const metricsChartWindow = 360
|
||||||
|
|
||||||
|
var metricsCollectInterval = 5 * time.Second
|
||||||
|
|
||||||
// pendingNetChange tracks a network state change awaiting confirmation.
|
// pendingNetChange tracks a network state change awaiting confirmation.
|
||||||
type pendingNetChange struct {
|
type pendingNetChange struct {
|
||||||
@@ -238,6 +242,7 @@ func NewHandler(opts HandlerOptions) http.Handler {
|
|||||||
// Tasks
|
// Tasks
|
||||||
mux.HandleFunc("GET /api/tasks", h.handleAPITasksList)
|
mux.HandleFunc("GET /api/tasks", h.handleAPITasksList)
|
||||||
mux.HandleFunc("POST /api/tasks/cancel-all", h.handleAPITasksCancelAll)
|
mux.HandleFunc("POST /api/tasks/cancel-all", h.handleAPITasksCancelAll)
|
||||||
|
mux.HandleFunc("POST /api/tasks/kill-workers", h.handleAPITasksKillWorkers)
|
||||||
mux.HandleFunc("POST /api/tasks/{id}/cancel", h.handleAPITasksCancel)
|
mux.HandleFunc("POST /api/tasks/{id}/cancel", h.handleAPITasksCancel)
|
||||||
mux.HandleFunc("POST /api/tasks/{id}/priority", h.handleAPITasksPriority)
|
mux.HandleFunc("POST /api/tasks/{id}/priority", h.handleAPITasksPriority)
|
||||||
mux.HandleFunc("GET /api/tasks/{id}/stream", h.handleAPITasksStream)
|
mux.HandleFunc("GET /api/tasks/{id}/stream", h.handleAPITasksStream)
|
||||||
@@ -256,7 +261,6 @@ func NewHandler(opts HandlerOptions) http.Handler {
|
|||||||
|
|
||||||
// Export
|
// Export
|
||||||
mux.HandleFunc("GET /api/export/list", h.handleAPIExportList)
|
mux.HandleFunc("GET /api/export/list", h.handleAPIExportList)
|
||||||
mux.HandleFunc("POST /api/export/bundle", h.handleAPIExportBundle)
|
|
||||||
mux.HandleFunc("GET /api/export/usb", h.handleAPIExportUSBTargets)
|
mux.HandleFunc("GET /api/export/usb", h.handleAPIExportUSBTargets)
|
||||||
mux.HandleFunc("POST /api/export/usb/audit", h.handleAPIExportUSBAudit)
|
mux.HandleFunc("POST /api/export/usb/audit", h.handleAPIExportUSBAudit)
|
||||||
mux.HandleFunc("POST /api/export/usb/bundle", h.handleAPIExportUSBBundle)
|
mux.HandleFunc("POST /api/export/usb/bundle", h.handleAPIExportUSBBundle)
|
||||||
@@ -301,7 +305,7 @@ func NewHandler(opts HandlerOptions) http.Handler {
|
|||||||
|
|
||||||
func (h *handler) startMetricsCollector() {
|
func (h *handler) startMetricsCollector() {
|
||||||
go func() {
|
go func() {
|
||||||
ticker := time.NewTicker(1 * time.Second)
|
ticker := time.NewTicker(metricsCollectInterval)
|
||||||
defer ticker.Stop()
|
defer ticker.Stop()
|
||||||
for range ticker.C {
|
for range ticker.C {
|
||||||
sample := platform.SampleLiveMetrics()
|
sample := platform.SampleLiveMetrics()
|
||||||
@@ -381,15 +385,12 @@ func (h *handler) handleRuntimeHealthJSON(w http.ResponseWriter, r *http.Request
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (h *handler) handleSupportBundleDownload(w http.ResponseWriter, r *http.Request) {
|
func (h *handler) handleSupportBundleDownload(w http.ResponseWriter, r *http.Request) {
|
||||||
archive, err := app.LatestSupportBundlePath()
|
archive, err := app.BuildSupportBundle(h.opts.ExportDir)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if errors.Is(err, os.ErrNotExist) {
|
http.Error(w, fmt.Sprintf("build support bundle: %v", err), http.StatusInternalServerError)
|
||||||
http.Error(w, "support bundle not built yet", http.StatusNotFound)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
http.Error(w, fmt.Sprintf("locate support bundle: %v", err), http.StatusInternalServerError)
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
defer os.Remove(archive)
|
||||||
w.Header().Set("Cache-Control", "no-store")
|
w.Header().Set("Cache-Control", "no-store")
|
||||||
w.Header().Set("Content-Type", "application/gzip")
|
w.Header().Set("Content-Type", "application/gzip")
|
||||||
w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%q", filepath.Base(archive)))
|
w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%q", filepath.Base(archive)))
|
||||||
@@ -1059,9 +1060,16 @@ func chartYAxisNumber(v float64) string {
|
|||||||
v = -v
|
v = -v
|
||||||
}
|
}
|
||||||
var out string
|
var out string
|
||||||
if v >= 1000 {
|
switch {
|
||||||
|
case v >= 10000:
|
||||||
out = fmt.Sprintf("%dк", int((v+500)/1000))
|
out = fmt.Sprintf("%dк", int((v+500)/1000))
|
||||||
} else {
|
case v >= 1000:
|
||||||
|
// Use one decimal place so ticks like 1400, 1600, 1800 read as
|
||||||
|
// "1,4к", "1,6к", "1,8к" instead of the ambiguous "1к"/"2к".
|
||||||
|
s := fmt.Sprintf("%.1f", v/1000)
|
||||||
|
s = strings.TrimRight(strings.TrimRight(s, "0"), ".")
|
||||||
|
out = strings.ReplaceAll(s, ".", ",") + "к"
|
||||||
|
default:
|
||||||
out = fmt.Sprintf("%.0f", v)
|
out = fmt.Sprintf("%.0f", v)
|
||||||
}
|
}
|
||||||
if neg {
|
if neg {
|
||||||
|
|||||||
@@ -175,10 +175,13 @@ func TestChartYAxisNumber(t *testing.T) {
|
|||||||
}{
|
}{
|
||||||
{in: 999, want: "999"},
|
{in: 999, want: "999"},
|
||||||
{in: 1000, want: "1к"},
|
{in: 1000, want: "1к"},
|
||||||
{in: 1370, want: "1к"},
|
{in: 1370, want: "1,4к"},
|
||||||
{in: 1500, want: "2к"},
|
{in: 1500, want: "1,5к"},
|
||||||
|
{in: 1700, want: "1,7к"},
|
||||||
|
{in: 2000, want: "2к"},
|
||||||
|
{in: 9999, want: "10к"},
|
||||||
{in: 10200, want: "10к"},
|
{in: 10200, want: "10к"},
|
||||||
{in: -1499, want: "-1к"},
|
{in: -1500, want: "-1,5к"},
|
||||||
}
|
}
|
||||||
for _, tc := range tests {
|
for _, tc := range tests {
|
||||||
if got := chartYAxisNumber(tc.in); got != tc.want {
|
if got := chartYAxisNumber(tc.in); got != tc.want {
|
||||||
|
|||||||
@@ -716,6 +716,38 @@ func (h *handler) handleAPITasksCancelAll(w http.ResponseWriter, _ *http.Request
|
|||||||
writeJSON(w, map[string]int{"cancelled": n})
|
writeJSON(w, map[string]int{"cancelled": n})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (h *handler) handleAPITasksKillWorkers(w http.ResponseWriter, _ *http.Request) {
|
||||||
|
// Cancel all queued/running tasks in the queue first.
|
||||||
|
globalQueue.mu.Lock()
|
||||||
|
now := time.Now()
|
||||||
|
cancelled := 0
|
||||||
|
for _, t := range globalQueue.tasks {
|
||||||
|
switch t.Status {
|
||||||
|
case TaskPending:
|
||||||
|
t.Status = TaskCancelled
|
||||||
|
t.DoneAt = &now
|
||||||
|
cancelled++
|
||||||
|
case TaskRunning:
|
||||||
|
if t.job != nil {
|
||||||
|
t.job.abort()
|
||||||
|
}
|
||||||
|
t.Status = TaskCancelled
|
||||||
|
t.DoneAt = &now
|
||||||
|
cancelled++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
globalQueue.persistLocked()
|
||||||
|
globalQueue.mu.Unlock()
|
||||||
|
|
||||||
|
// Kill orphaned test worker processes at the OS level.
|
||||||
|
killed := platform.KillTestWorkers()
|
||||||
|
writeJSON(w, map[string]any{
|
||||||
|
"cancelled": cancelled,
|
||||||
|
"killed": len(killed),
|
||||||
|
"processes": killed,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
func (h *handler) handleAPITasksStream(w http.ResponseWriter, r *http.Request) {
|
func (h *handler) handleAPITasksStream(w http.ResponseWriter, r *http.Request) {
|
||||||
id := r.PathValue("id")
|
id := r.PathValue("id")
|
||||||
// Wait up to 5s for the task to get a job (it may be pending)
|
// Wait up to 5s for the task to get a job (it may be pending)
|
||||||
@@ -769,8 +801,17 @@ func (q *taskQueue) loadLocked() {
|
|||||||
params: pt.Params,
|
params: pt.Params,
|
||||||
}
|
}
|
||||||
q.assignTaskLogPathLocked(t)
|
q.assignTaskLogPathLocked(t)
|
||||||
if t.Status == TaskPending || t.Status == TaskRunning {
|
if t.Status == TaskRunning {
|
||||||
t.Status = TaskPending
|
// The task was interrupted by a bee-web restart. Child processes
|
||||||
|
// (e.g. bee-gpu-burn-worker) survive the restart in their own
|
||||||
|
// process groups and cannot be cancelled retroactively. Mark the
|
||||||
|
// task as failed so the user can decide whether to re-run it
|
||||||
|
// rather than blindly re-launching duplicate workers.
|
||||||
|
now := time.Now()
|
||||||
|
t.Status = TaskFailed
|
||||||
|
t.DoneAt = &now
|
||||||
|
t.ErrMsg = "interrupted by bee-web restart"
|
||||||
|
} else if t.Status == TaskPending {
|
||||||
t.StartedAt = nil
|
t.StartedAt = nil
|
||||||
t.DoneAt = nil
|
t.DoneAt = nil
|
||||||
t.ErrMsg = ""
|
t.ErrMsg = ""
|
||||||
|
|||||||
@@ -24,21 +24,34 @@ func TestTaskQueuePersistsAndRecoversPendingTasks(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
started := time.Now().Add(-time.Minute)
|
started := time.Now().Add(-time.Minute)
|
||||||
task := &Task{
|
|
||||||
ID: "task-1",
|
// A task that was pending (not yet started) must be re-queued on restart.
|
||||||
|
pendingTask := &Task{
|
||||||
|
ID: "task-pending",
|
||||||
Name: "Memory Burn-in",
|
Name: "Memory Burn-in",
|
||||||
Target: "memory-stress",
|
Target: "memory-stress",
|
||||||
Priority: 2,
|
Priority: 2,
|
||||||
Status: TaskRunning,
|
Status: TaskPending,
|
||||||
CreatedAt: time.Now().Add(-2 * time.Minute),
|
CreatedAt: time.Now().Add(-2 * time.Minute),
|
||||||
StartedAt: &started,
|
params: taskParams{Duration: 300, BurnProfile: "smoke"},
|
||||||
params: taskParams{
|
}
|
||||||
Duration: 300,
|
// A task that was running when bee-web crashed must NOT be re-queued —
|
||||||
BurnProfile: "smoke",
|
// its child processes (e.g. gpu-burn-worker) survive the restart in
|
||||||
},
|
// their own process groups and can't be cancelled retroactively.
|
||||||
|
runningTask := &Task{
|
||||||
|
ID: "task-running",
|
||||||
|
Name: "NVIDIA GPU Stress",
|
||||||
|
Target: "nvidia-stress",
|
||||||
|
Priority: 1,
|
||||||
|
Status: TaskRunning,
|
||||||
|
CreatedAt: time.Now().Add(-3 * time.Minute),
|
||||||
|
StartedAt: &started,
|
||||||
|
params: taskParams{Duration: 86400},
|
||||||
|
}
|
||||||
|
for _, task := range []*Task{pendingTask, runningTask} {
|
||||||
|
q.tasks = append(q.tasks, task)
|
||||||
|
q.assignTaskLogPathLocked(task)
|
||||||
}
|
}
|
||||||
q.tasks = append(q.tasks, task)
|
|
||||||
q.assignTaskLogPathLocked(task)
|
|
||||||
q.persistLocked()
|
q.persistLocked()
|
||||||
|
|
||||||
recovered := &taskQueue{
|
recovered := &taskQueue{
|
||||||
@@ -48,21 +61,47 @@ func TestTaskQueuePersistsAndRecoversPendingTasks(t *testing.T) {
|
|||||||
}
|
}
|
||||||
recovered.loadLocked()
|
recovered.loadLocked()
|
||||||
|
|
||||||
if len(recovered.tasks) != 1 {
|
if len(recovered.tasks) != 2 {
|
||||||
t.Fatalf("tasks=%d want 1", len(recovered.tasks))
|
t.Fatalf("tasks=%d want 2", len(recovered.tasks))
|
||||||
}
|
}
|
||||||
got := recovered.tasks[0]
|
|
||||||
if got.Status != TaskPending {
|
byID := map[string]*Task{}
|
||||||
t.Fatalf("status=%q want %q", got.Status, TaskPending)
|
for i := range recovered.tasks {
|
||||||
|
byID[recovered.tasks[i].ID] = recovered.tasks[i]
|
||||||
}
|
}
|
||||||
if got.StartedAt != nil {
|
|
||||||
t.Fatalf("started_at=%v want nil for recovered pending task", got.StartedAt)
|
// Pending task must be re-queued as pending with params intact.
|
||||||
|
p := byID["task-pending"]
|
||||||
|
if p == nil {
|
||||||
|
t.Fatal("task-pending not found")
|
||||||
}
|
}
|
||||||
if got.params.Duration != 300 || got.params.BurnProfile != "smoke" {
|
if p.Status != TaskPending {
|
||||||
t.Fatalf("params=%+v", got.params)
|
t.Fatalf("pending task: status=%q want %q", p.Status, TaskPending)
|
||||||
}
|
}
|
||||||
if got.LogPath == "" {
|
if p.StartedAt != nil {
|
||||||
t.Fatal("expected log path")
|
t.Fatalf("pending task: started_at=%v want nil", p.StartedAt)
|
||||||
|
}
|
||||||
|
if p.params.Duration != 300 || p.params.BurnProfile != "smoke" {
|
||||||
|
t.Fatalf("pending task: params=%+v", p.params)
|
||||||
|
}
|
||||||
|
if p.LogPath == "" {
|
||||||
|
t.Fatal("pending task: expected log path")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Running task must be marked failed, not re-queued, to prevent
|
||||||
|
// launching duplicate workers (e.g. a second set of gpu-burn-workers).
|
||||||
|
r := byID["task-running"]
|
||||||
|
if r == nil {
|
||||||
|
t.Fatal("task-running not found")
|
||||||
|
}
|
||||||
|
if r.Status != TaskFailed {
|
||||||
|
t.Fatalf("running task: status=%q want %q", r.Status, TaskFailed)
|
||||||
|
}
|
||||||
|
if r.ErrMsg == "" {
|
||||||
|
t.Fatal("running task: expected non-empty error message")
|
||||||
|
}
|
||||||
|
if r.DoneAt == nil {
|
||||||
|
t.Fatal("running task: expected done_at to be set")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -591,9 +591,12 @@ recover_iso_memtest() {
|
|||||||
AUDIT_VERSION_EFFECTIVE="$(resolve_audit_version)"
|
AUDIT_VERSION_EFFECTIVE="$(resolve_audit_version)"
|
||||||
ISO_VERSION_EFFECTIVE="$(resolve_iso_version)"
|
ISO_VERSION_EFFECTIVE="$(resolve_iso_version)"
|
||||||
ISO_BASENAME="easy-bee-${BEE_GPU_VENDOR}-v${ISO_VERSION_EFFECTIVE}-amd64"
|
ISO_BASENAME="easy-bee-${BEE_GPU_VENDOR}-v${ISO_VERSION_EFFECTIVE}-amd64"
|
||||||
LOG_DIR="${DIST_DIR}/${ISO_BASENAME}.logs"
|
# Versioned output directory: dist/easy-bee-v4.1/ — all final artefacts live here.
|
||||||
LOG_ARCHIVE="${DIST_DIR}/${ISO_BASENAME}.logs.tar.gz"
|
OUT_DIR="${DIST_DIR}/easy-bee-v${ISO_VERSION_EFFECTIVE}"
|
||||||
ISO_OUT="${DIST_DIR}/${ISO_BASENAME}.iso"
|
mkdir -p "${OUT_DIR}"
|
||||||
|
LOG_DIR="${OUT_DIR}/${ISO_BASENAME}.logs"
|
||||||
|
LOG_ARCHIVE="${OUT_DIR}/${ISO_BASENAME}.logs.tar.gz"
|
||||||
|
ISO_OUT="${OUT_DIR}/${ISO_BASENAME}.iso"
|
||||||
LOG_OUT="${LOG_DIR}/build.log"
|
LOG_OUT="${LOG_DIR}/build.log"
|
||||||
|
|
||||||
cleanup_build_log() {
|
cleanup_build_log() {
|
||||||
@@ -616,7 +619,8 @@ cleanup_build_log() {
|
|||||||
|
|
||||||
if [ -n "${LOG_DIR:-}" ] && [ -d "${LOG_DIR}" ] && command -v tar >/dev/null 2>&1; then
|
if [ -n "${LOG_DIR:-}" ] && [ -d "${LOG_DIR}" ] && command -v tar >/dev/null 2>&1; then
|
||||||
rm -f "${LOG_ARCHIVE}"
|
rm -f "${LOG_ARCHIVE}"
|
||||||
tar -czf "${LOG_ARCHIVE}" -C "${DIST_DIR}" "$(basename "${LOG_DIR}")" 2>/dev/null || true
|
tar -czf "${LOG_ARCHIVE}" -C "$(dirname "${LOG_DIR}")" "$(basename "${LOG_DIR}")" 2>/dev/null || true
|
||||||
|
rm -rf "${LOG_DIR}"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
exit "${status}"
|
exit "${status}"
|
||||||
|
|||||||
Reference in New Issue
Block a user