204 lines
5.6 KiB
Go
204 lines
5.6 KiB
Go
package platform
|
|
|
|
import (
|
|
"archive/tar"
|
|
"compress/gzip"
|
|
"fmt"
|
|
"io"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"sort"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
func (s *System) RunNvidiaAcceptancePack(baseDir string) (string, error) {
|
|
return runAcceptancePack(baseDir, "gpu-nvidia", nvidiaSATJobs())
|
|
}
|
|
|
|
func (s *System) RunMemoryAcceptancePack(baseDir string) (string, error) {
|
|
return runAcceptancePack(baseDir, "memory", []satJob{
|
|
{name: "01-free-before.log", cmd: []string{"free", "-h"}},
|
|
{name: "02-memtester.log", cmd: []string{"memtester", "128M", "1"}},
|
|
{name: "03-free-after.log", cmd: []string{"free", "-h"}},
|
|
})
|
|
}
|
|
|
|
func (s *System) RunStorageAcceptancePack(baseDir string) (string, error) {
|
|
if baseDir == "" {
|
|
baseDir = "/var/log/bee-sat"
|
|
}
|
|
ts := time.Now().UTC().Format("20060102-150405")
|
|
runDir := filepath.Join(baseDir, "storage-"+ts)
|
|
if err := os.MkdirAll(runDir, 0755); err != nil {
|
|
return "", err
|
|
}
|
|
|
|
devices, err := listStorageDevices()
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
sort.Strings(devices)
|
|
|
|
var summary strings.Builder
|
|
fmt.Fprintf(&summary, "run_at_utc=%s\n", time.Now().UTC().Format(time.RFC3339))
|
|
if len(devices) == 0 {
|
|
fmt.Fprintln(&summary, "devices=0")
|
|
} else {
|
|
fmt.Fprintf(&summary, "devices=%d\n", len(devices))
|
|
}
|
|
|
|
for index, devPath := range devices {
|
|
prefix := fmt.Sprintf("%02d-%s", index+1, filepath.Base(devPath))
|
|
commands := storageSATCommands(devPath)
|
|
for cmdIndex, job := range commands {
|
|
name := fmt.Sprintf("%s-%02d-%s.log", prefix, cmdIndex+1, job.name)
|
|
out, err := exec.Command(job.cmd[0], job.cmd[1:]...).CombinedOutput()
|
|
if writeErr := os.WriteFile(filepath.Join(runDir, name), out, 0644); writeErr != nil {
|
|
return "", writeErr
|
|
}
|
|
rc := 0
|
|
if err != nil {
|
|
rc = 1
|
|
}
|
|
fmt.Fprintf(&summary, "%s_%s_rc=%d\n", filepath.Base(devPath), strings.ReplaceAll(job.name, "-", "_"), rc)
|
|
}
|
|
}
|
|
|
|
if err := os.WriteFile(filepath.Join(runDir, "summary.txt"), []byte(summary.String()), 0644); err != nil {
|
|
return "", err
|
|
}
|
|
archive := filepath.Join(baseDir, "storage-"+ts+".tar.gz")
|
|
if err := createTarGz(archive, runDir); err != nil {
|
|
return "", err
|
|
}
|
|
return archive, nil
|
|
}
|
|
|
|
type satJob struct {
|
|
name string
|
|
cmd []string
|
|
}
|
|
|
|
func nvidiaSATJobs() []satJob {
|
|
return []satJob{
|
|
{name: "01-nvidia-smi-q.log", cmd: []string{"nvidia-smi", "-q"}},
|
|
{name: "02-dmidecode-baseboard.log", cmd: []string{"dmidecode", "-t", "baseboard"}},
|
|
{name: "03-dmidecode-system.log", cmd: []string{"dmidecode", "-t", "system"}},
|
|
{name: "04-nvidia-bug-report.log", cmd: []string{"nvidia-bug-report.sh", "--output", "{{run_dir}}/nvidia-bug-report.log"}},
|
|
{name: "05-bee-gpu-stress.log", cmd: []string{"bee-gpu-stress", "--seconds", "5", "--size-mb", "64"}},
|
|
}
|
|
}
|
|
|
|
func runAcceptancePack(baseDir, prefix string, jobs []satJob) (string, error) {
|
|
if baseDir == "" {
|
|
baseDir = "/var/log/bee-sat"
|
|
}
|
|
ts := time.Now().UTC().Format("20060102-150405")
|
|
runDir := filepath.Join(baseDir, prefix+"-"+ts)
|
|
if err := os.MkdirAll(runDir, 0755); err != nil {
|
|
return "", err
|
|
}
|
|
|
|
var summary strings.Builder
|
|
fmt.Fprintf(&summary, "run_at_utc=%s\n", time.Now().UTC().Format(time.RFC3339))
|
|
for _, job := range jobs {
|
|
cmd := make([]string, 0, len(job.cmd))
|
|
for _, arg := range job.cmd {
|
|
cmd = append(cmd, strings.ReplaceAll(arg, "{{run_dir}}", runDir))
|
|
}
|
|
out, err := exec.Command(cmd[0], cmd[1:]...).CombinedOutput()
|
|
if writeErr := os.WriteFile(filepath.Join(runDir, job.name), out, 0644); writeErr != nil {
|
|
return "", writeErr
|
|
}
|
|
rc := 0
|
|
if err != nil {
|
|
rc = 1
|
|
}
|
|
fmt.Fprintf(&summary, "%s_rc=%d\n", strings.TrimSuffix(strings.TrimPrefix(job.name, "0"), ".log"), rc)
|
|
}
|
|
if err := os.WriteFile(filepath.Join(runDir, "summary.txt"), []byte(summary.String()), 0644); err != nil {
|
|
return "", err
|
|
}
|
|
|
|
archive := filepath.Join(baseDir, prefix+"-"+ts+".tar.gz")
|
|
if err := createTarGz(archive, runDir); err != nil {
|
|
return "", err
|
|
}
|
|
return archive, nil
|
|
}
|
|
|
|
func listStorageDevices() ([]string, error) {
|
|
out, err := exec.Command("lsblk", "-dn", "-o", "NAME,TYPE").Output()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
var devices []string
|
|
for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") {
|
|
fields := strings.Fields(strings.TrimSpace(line))
|
|
if len(fields) != 2 || fields[1] != "disk" {
|
|
continue
|
|
}
|
|
devices = append(devices, "/dev/"+fields[0])
|
|
}
|
|
return devices, nil
|
|
}
|
|
|
|
func storageSATCommands(devPath string) []satJob {
|
|
if strings.Contains(filepath.Base(devPath), "nvme") {
|
|
return []satJob{
|
|
{name: "nvme-id-ctrl", cmd: []string{"nvme", "id-ctrl", devPath, "-o", "json"}},
|
|
{name: "nvme-smart-log", cmd: []string{"nvme", "smart-log", devPath, "-o", "json"}},
|
|
{name: "nvme-device-self-test", cmd: []string{"nvme", "device-self-test", devPath, "--start", "1"}},
|
|
}
|
|
}
|
|
return []satJob{
|
|
{name: "smartctl-health", cmd: []string{"smartctl", "-H", "-A", devPath}},
|
|
{name: "smartctl-self-test-short", cmd: []string{"smartctl", "-t", "short", devPath}},
|
|
}
|
|
}
|
|
|
|
func createTarGz(dst, srcDir string) error {
|
|
file, err := os.Create(dst)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer file.Close()
|
|
|
|
gz := gzip.NewWriter(file)
|
|
defer gz.Close()
|
|
|
|
tw := tar.NewWriter(gz)
|
|
defer tw.Close()
|
|
|
|
base := filepath.Dir(srcDir)
|
|
return filepath.Walk(srcDir, func(path string, info os.FileInfo, err error) error {
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if info.IsDir() {
|
|
return nil
|
|
}
|
|
header, err := tar.FileInfoHeader(info, "")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
rel, err := filepath.Rel(base, path)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
header.Name = rel
|
|
if err := tw.WriteHeader(header); err != nil {
|
|
return err
|
|
}
|
|
file, err := os.Open(path)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer file.Close()
|
|
_, err = io.Copy(tw, file)
|
|
return err
|
|
})
|
|
}
|