package app import ( "encoding/json" "os" "path/filepath" "strings" "sync" "time" ) // ComponentStatusDB is a persistent, append-only store of hardware component health records. // Records are keyed by component identity strings (e.g. "pcie:0000:c8:00.0", "storage:nvme0n1"). // Once a component is marked Warning or Critical, subsequent OK entries do not downgrade it — // the component stays at the highest observed severity until explicitly reset. type ComponentStatusDB struct { path string mu sync.Mutex records map[string]*ComponentStatusRecord } // ComponentStatusRecord holds the current and historical health of one hardware component. type ComponentStatusRecord struct { ComponentKey string `json:"component_key"` Status string `json:"status"` // "OK", "Warning", "Critical", "Unknown" LastCheckedAt time.Time `json:"last_checked_at"` LastChangedAt time.Time `json:"last_changed_at"` ErrorSummary string `json:"error_summary,omitempty"` History []ComponentStatusEntry `json:"history"` } // ComponentStatusEntry is one observation written to a component's history. type ComponentStatusEntry struct { At time.Time `json:"at"` Status string `json:"status"` Source string `json:"source"` // e.g. "sat:nvidia", "sat:memory", "watchdog:kmsg" Detail string `json:"detail,omitempty"` } // OpenComponentStatusDB opens (or creates) the JSON status DB at path. func OpenComponentStatusDB(path string) (*ComponentStatusDB, error) { db := &ComponentStatusDB{ path: path, records: make(map[string]*ComponentStatusRecord), } if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil { return nil, err } data, err := os.ReadFile(path) if err != nil && !os.IsNotExist(err) { return nil, err } if len(data) > 0 { var records []ComponentStatusRecord if err := json.Unmarshal(data, &records); err == nil { for i := range records { db.records[records[i].ComponentKey] = &records[i] } } } return db, nil } // Record writes one observation for the given component key. // source is a short label like "sat:nvidia" or "watchdog:kmsg". // status is "OK", "Warning", "Critical", or "Unknown". // OK never downgrades an existing Warning or Critical status. func (db *ComponentStatusDB) Record(key, source, status, detail string) { if db == nil || strings.TrimSpace(key) == "" { return } db.mu.Lock() defer db.mu.Unlock() now := time.Now().UTC() rec, exists := db.records[key] if !exists { rec = &ComponentStatusRecord{ComponentKey: key} db.records[key] = rec } rec.LastCheckedAt = now entry := ComponentStatusEntry{At: now, Status: status, Source: source, Detail: detail} rec.History = append(rec.History, entry) // Status merge: OK never downgrades Warning/Critical. newSev := componentSeverity(status) curSev := componentSeverity(rec.Status) if newSev > curSev { rec.Status = status rec.LastChangedAt = now rec.ErrorSummary = detail } else if rec.Status == "" { rec.Status = status rec.LastChangedAt = now } _ = db.saveLocked() } // Get returns the current record for a component key. func (db *ComponentStatusDB) Get(key string) (ComponentStatusRecord, bool) { if db == nil { return ComponentStatusRecord{}, false } db.mu.Lock() defer db.mu.Unlock() r, ok := db.records[key] if !ok { return ComponentStatusRecord{}, false } return *r, true } // All returns a snapshot of all records. func (db *ComponentStatusDB) All() []ComponentStatusRecord { if db == nil { return nil } db.mu.Lock() defer db.mu.Unlock() out := make([]ComponentStatusRecord, 0, len(db.records)) for _, r := range db.records { out = append(out, *r) } return out } func (db *ComponentStatusDB) saveLocked() error { records := make([]ComponentStatusRecord, 0, len(db.records)) for _, r := range db.records { records = append(records, *r) } data, err := json.MarshalIndent(records, "", " ") if err != nil { return err } return os.WriteFile(db.path, data, 0644) } // componentSeverity returns a numeric severity so higher values win. func componentSeverity(status string) int { switch strings.TrimSpace(status) { case "Critical": return 3 case "Warning": return 2 case "OK": return 1 default: return 0 } } // ApplySATResultToDB reads a SAT summary.txt from the run directory next to archivePath // and writes component status records to db for the given SAT target. // archivePath may be either a bare .tar.gz path or "Archive written to /path/foo.tar.gz". func ApplySATResultToDB(db *ComponentStatusDB, target, archivePath string) { if db == nil || strings.TrimSpace(archivePath) == "" { return } archivePath = extractArchivePath(archivePath) if archivePath == "" { return } runDir := strings.TrimSuffix(archivePath, ".tar.gz") data, err := os.ReadFile(filepath.Join(runDir, "summary.txt")) if err != nil { return } kv := parseSATKV(string(data)) overall := strings.ToUpper(strings.TrimSpace(kv["overall_status"])) if overall == "" { return } source := "sat:" + target dbStatus := satStatusToDBStatus(overall) // Map SAT target to component keys. switch target { case "nvidia", "amd", "nvidia-stress", "amd-stress", "amd-mem", "amd-bandwidth": db.Record("pcie:gpu:"+target, source, dbStatus, target+" SAT: "+overall) case "memory", "memory-stress", "sat-stress": db.Record("memory:all", source, dbStatus, target+" SAT: "+overall) case "cpu", "platform-stress": db.Record("cpu:all", source, dbStatus, target+" SAT: "+overall) case "storage": // Try to record per-device if available in summary. recordedAny := false for key, val := range kv { if !strings.HasSuffix(key, "_status") || key == "overall_status" { continue } base := strings.TrimSuffix(key, "_status") idx := strings.Index(base, "_") if idx <= 0 { continue } devName := base[:idx] devStatus := satStatusToDBStatus(strings.ToUpper(strings.TrimSpace(val))) db.Record("storage:"+devName, source, devStatus, "storage SAT: "+val) recordedAny = true } if !recordedAny { db.Record("storage:all", source, dbStatus, "storage SAT: "+overall) } } } func satStatusToDBStatus(overall string) string { switch overall { case "OK": return "OK" case "FAILED": return "Warning" case "PARTIAL", "UNSUPPORTED": return "Unknown" default: return "Unknown" } } // ExtractArchivePath extracts a bare .tar.gz path from a string that may be // "Archive written to /path/foo.tar.gz" or already a bare path. func ExtractArchivePath(s string) string { return extractArchivePath(s) } // ReadSATOverallStatus reads the overall_status value from the summary.txt // file located in the run directory alongside archivePath. // Returns "" if the file cannot be read. func ReadSATOverallStatus(archivePath string) string { if strings.TrimSpace(archivePath) == "" { return "" } runDir := strings.TrimSuffix(archivePath, ".tar.gz") data, err := os.ReadFile(filepath.Join(runDir, "summary.txt")) if err != nil { return "" } kv := parseSATKV(string(data)) return strings.ToUpper(strings.TrimSpace(kv["overall_status"])) } func extractArchivePath(s string) string { s = strings.TrimSpace(s) if strings.HasSuffix(s, ".tar.gz") { parts := strings.Fields(s) if len(parts) > 0 { return parts[len(parts)-1] } } return s } func parseSATKV(raw string) map[string]string { kv := make(map[string]string) for _, line := range strings.Split(raw, "\n") { k, v, ok := strings.Cut(strings.TrimSpace(line), "=") if ok { kv[strings.TrimSpace(k)] = strings.TrimSpace(v) } } return kv }