mirror of
https://github.com/Dvorinka/beszel.git
synced 2026-06-04 21:32:57 +00:00
Initial commit: Beszel fork with Domain Locker integration
This commit is contained in:
@@ -0,0 +1,785 @@
|
||||
package systems
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"hash/fnv"
|
||||
"math/rand"
|
||||
"net"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel/internal/common"
|
||||
"github.com/henrygd/beszel/internal/hub/transport"
|
||||
"github.com/henrygd/beszel/internal/hub/utils"
|
||||
"github.com/henrygd/beszel/internal/hub/ws"
|
||||
|
||||
"github.com/henrygd/beszel/internal/entities/container"
|
||||
"github.com/henrygd/beszel/internal/entities/smart"
|
||||
"github.com/henrygd/beszel/internal/entities/system"
|
||||
"github.com/henrygd/beszel/internal/entities/systemd"
|
||||
|
||||
"github.com/henrygd/beszel"
|
||||
|
||||
"github.com/blang/semver"
|
||||
"github.com/fxamacker/cbor/v2"
|
||||
"github.com/lxzan/gws"
|
||||
"github.com/pocketbase/dbx"
|
||||
"github.com/pocketbase/pocketbase/core"
|
||||
"golang.org/x/crypto/ssh"
|
||||
)
|
||||
|
||||
type System struct {
|
||||
Id string `db:"id"`
|
||||
Host string `db:"host"`
|
||||
Port string `db:"port"`
|
||||
Status string `db:"status"`
|
||||
manager *SystemManager // Manager that this system belongs to
|
||||
client *ssh.Client // SSH client for fetching data
|
||||
sshTransport *transport.SSHTransport // SSH transport for requests
|
||||
data *system.CombinedData // system data from agent
|
||||
ctx context.Context // Context for stopping the updater
|
||||
cancel context.CancelFunc // Stops and removes system from updater
|
||||
WsConn *ws.WsConn // Handler for agent WebSocket connection
|
||||
agentVersion semver.Version // Agent version
|
||||
updateTicker *time.Ticker // Ticker for updating the system
|
||||
detailsFetched atomic.Bool // True if static system details have been fetched and saved
|
||||
smartFetching atomic.Bool // True if SMART devices are currently being fetched
|
||||
smartInterval time.Duration // Interval for periodic SMART data updates
|
||||
}
|
||||
|
||||
func (sm *SystemManager) NewSystem(systemId string) *System {
|
||||
system := &System{
|
||||
Id: systemId,
|
||||
data: &system.CombinedData{},
|
||||
}
|
||||
system.ctx, system.cancel = system.getContext()
|
||||
return system
|
||||
}
|
||||
|
||||
// StartUpdater starts the system updater.
|
||||
// It first fetches the data from the agent then updates the records.
|
||||
// If the data is not found or the system is down, it sets the system down.
|
||||
func (sys *System) StartUpdater() {
|
||||
// Channel that can be used to set the system down. Currently only used to
|
||||
// allow a short delay for reconnection after websocket connection is closed.
|
||||
var downChan chan struct{}
|
||||
|
||||
// Add random jitter to first WebSocket connection to prevent
|
||||
// clustering if all agents are started at the same time.
|
||||
// SSH connections during hub startup are already staggered.
|
||||
var jitter <-chan time.Time
|
||||
if sys.WsConn != nil {
|
||||
jitter = getJitter()
|
||||
// use the websocket connection's down channel to set the system down
|
||||
downChan = sys.WsConn.DownChan
|
||||
} else {
|
||||
// if the system does not have a websocket connection, wait before updating
|
||||
// to allow the agent to connect via websocket (makes sure fingerprint is set).
|
||||
time.Sleep(11 * time.Second)
|
||||
}
|
||||
|
||||
// update immediately if system is not paused (only for ws connections)
|
||||
// we'll wait a minute before connecting via SSH to prioritize ws connections
|
||||
if sys.Status != paused && sys.ctx.Err() == nil {
|
||||
if err := sys.update(); err != nil {
|
||||
_ = sys.setDown(err)
|
||||
}
|
||||
}
|
||||
|
||||
sys.updateTicker = time.NewTicker(time.Duration(interval) * time.Millisecond)
|
||||
// Go 1.23+ will automatically stop the ticker when the system is garbage collected, however we seem to need this or testing/synctest will block even if calling runtime.GC()
|
||||
defer sys.updateTicker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-sys.ctx.Done():
|
||||
return
|
||||
case <-sys.updateTicker.C:
|
||||
if err := sys.update(); err != nil {
|
||||
_ = sys.setDown(err)
|
||||
}
|
||||
case <-downChan:
|
||||
sys.WsConn = nil
|
||||
downChan = nil
|
||||
_ = sys.setDown(nil)
|
||||
case <-jitter:
|
||||
sys.updateTicker.Reset(time.Duration(interval) * time.Millisecond)
|
||||
if err := sys.update(); err != nil {
|
||||
_ = sys.setDown(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// update updates the system data and records.
|
||||
func (sys *System) update() error {
|
||||
if sys.Status == paused {
|
||||
sys.handlePaused()
|
||||
return nil
|
||||
}
|
||||
options := common.DataRequestOptions{
|
||||
CacheTimeMs: uint16(interval),
|
||||
}
|
||||
// fetch system details if not already fetched
|
||||
if !sys.detailsFetched.Load() {
|
||||
options.IncludeDetails = true
|
||||
}
|
||||
|
||||
data, err := sys.fetchDataFromAgent(options)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// ensure deprecated fields from older agents are migrated to current fields
|
||||
migrateDeprecatedFields(data, !sys.detailsFetched.Load())
|
||||
|
||||
// create system records
|
||||
_, err = sys.createRecords(data)
|
||||
|
||||
// if details were included and fetched successfully, mark details as fetched and update smart interval if set by agent
|
||||
if err == nil && data.Details != nil {
|
||||
sys.detailsFetched.Store(true)
|
||||
// update smart interval if it's set on the agent side
|
||||
if data.Details.SmartInterval > 0 {
|
||||
sys.smartInterval = data.Details.SmartInterval
|
||||
sys.manager.hub.Logger().Info("SMART interval updated from agent details", "system", sys.Id, "interval", sys.smartInterval.String())
|
||||
// make sure we reset expiration of lastFetch to remain as long as the new smart interval
|
||||
// to prevent premature expiration leading to new fetch if interval is different.
|
||||
sys.manager.smartFetchMap.UpdateExpiration(sys.Id, sys.smartInterval+time.Minute)
|
||||
}
|
||||
}
|
||||
|
||||
// Fetch and save SMART devices when system first comes online or at intervals
|
||||
if backgroundSmartFetchEnabled() && sys.detailsFetched.Load() {
|
||||
if sys.smartInterval <= 0 {
|
||||
sys.smartInterval = time.Hour
|
||||
}
|
||||
if sys.shouldFetchSmart() && sys.smartFetching.CompareAndSwap(false, true) {
|
||||
sys.manager.hub.Logger().Info("SMART fetch", "system", sys.Id, "interval", sys.smartInterval.String())
|
||||
go func() {
|
||||
defer sys.smartFetching.Store(false)
|
||||
_ = sys.FetchAndSaveSmartDevices()
|
||||
}()
|
||||
}
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func (sys *System) handlePaused() {
|
||||
if sys.WsConn == nil {
|
||||
// if the system is paused and there's no websocket connection, remove the system
|
||||
_ = sys.manager.RemoveSystem(sys.Id)
|
||||
} else {
|
||||
// Send a ping to the agent to keep the connection alive if the system is paused
|
||||
if err := sys.WsConn.Ping(); err != nil {
|
||||
sys.manager.hub.Logger().Warn("Failed to ping agent", "system", sys.Id, "err", err)
|
||||
_ = sys.manager.RemoveSystem(sys.Id)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// createRecords updates the system record and adds system_stats and container_stats records
|
||||
func (sys *System) createRecords(data *system.CombinedData) (*core.Record, error) {
|
||||
systemRecord, err := sys.getRecord(sys.manager.hub)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
hub := sys.manager.hub
|
||||
err = hub.RunInTransaction(func(txApp core.App) error {
|
||||
// add system_stats record
|
||||
systemStatsCollection, err := txApp.FindCachedCollectionByNameOrId("system_stats")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
systemStatsRecord := core.NewRecord(systemStatsCollection)
|
||||
systemStatsRecord.Set("system", systemRecord.Id)
|
||||
systemStatsRecord.Set("stats", data.Stats)
|
||||
systemStatsRecord.Set("type", "1m")
|
||||
if err := txApp.SaveNoValidate(systemStatsRecord); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// add containers and container_stats records
|
||||
if len(data.Containers) > 0 {
|
||||
if data.Containers[0].Id != "" {
|
||||
if err := createContainerRecords(txApp, data.Containers, sys.Id); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
containerStatsCollection, err := txApp.FindCachedCollectionByNameOrId("container_stats")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
containerStatsRecord := core.NewRecord(containerStatsCollection)
|
||||
containerStatsRecord.Set("system", systemRecord.Id)
|
||||
containerStatsRecord.Set("stats", data.Containers)
|
||||
containerStatsRecord.Set("type", "1m")
|
||||
if err := txApp.SaveNoValidate(containerStatsRecord); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// add new systemd_stats record
|
||||
if len(data.SystemdServices) > 0 {
|
||||
if err := createSystemdStatsRecords(txApp, data.SystemdServices, sys.Id); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// add system details record
|
||||
if data.Details != nil {
|
||||
if err := createSystemDetailsRecord(txApp, data.Details, sys.Id); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// update system record (do this last because it triggers alerts and we need above records to be inserted first)
|
||||
systemRecord.Set("status", up)
|
||||
systemRecord.Set("info", data.Info)
|
||||
if err := txApp.SaveNoValidate(systemRecord); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
})
|
||||
|
||||
return systemRecord, err
|
||||
}
|
||||
|
||||
func createSystemDetailsRecord(app core.App, data *system.Details, systemId string) error {
|
||||
collectionName := "system_details"
|
||||
params := dbx.Params{
|
||||
"id": systemId,
|
||||
"system": systemId,
|
||||
"hostname": data.Hostname,
|
||||
"kernel": data.Kernel,
|
||||
"cores": data.Cores,
|
||||
"threads": data.Threads,
|
||||
"cpu": data.CpuModel,
|
||||
"os": data.Os,
|
||||
"os_name": data.OsName,
|
||||
"arch": data.Arch,
|
||||
"memory": data.MemoryTotal,
|
||||
"podman": data.Podman,
|
||||
"updated": time.Now().UTC(),
|
||||
}
|
||||
result, err := app.DB().Update(collectionName, params, dbx.HashExp{"id": systemId}).Execute()
|
||||
rowsAffected, _ := result.RowsAffected()
|
||||
if err != nil || rowsAffected == 0 {
|
||||
_, err = app.DB().Insert(collectionName, params).Execute()
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func createSystemdStatsRecords(app core.App, data []*systemd.Service, systemId string) error {
|
||||
if len(data) == 0 {
|
||||
return nil
|
||||
}
|
||||
// shared params for all records
|
||||
params := dbx.Params{
|
||||
"system": systemId,
|
||||
"updated": time.Now().UTC().UnixMilli(),
|
||||
}
|
||||
|
||||
valueStrings := make([]string, 0, len(data))
|
||||
for i, service := range data {
|
||||
suffix := fmt.Sprintf("%d", i)
|
||||
valueStrings = append(valueStrings, fmt.Sprintf("({:id%[1]s}, {:system}, {:name%[1]s}, {:state%[1]s}, {:sub%[1]s}, {:cpu%[1]s}, {:cpuPeak%[1]s}, {:memory%[1]s}, {:memPeak%[1]s}, {:updated})", suffix))
|
||||
params["id"+suffix] = makeStableHashId(systemId, service.Name)
|
||||
params["name"+suffix] = service.Name
|
||||
params["state"+suffix] = service.State
|
||||
params["sub"+suffix] = service.Sub
|
||||
params["cpu"+suffix] = service.Cpu
|
||||
params["cpuPeak"+suffix] = service.CpuPeak
|
||||
params["memory"+suffix] = service.Mem
|
||||
params["memPeak"+suffix] = service.MemPeak
|
||||
}
|
||||
queryString := fmt.Sprintf(
|
||||
"INSERT INTO systemd_services (id, system, name, state, sub, cpu, cpuPeak, memory, memPeak, updated) VALUES %s ON CONFLICT(id) DO UPDATE SET system = excluded.system, name = excluded.name, state = excluded.state, sub = excluded.sub, cpu = excluded.cpu, cpuPeak = excluded.cpuPeak, memory = excluded.memory, memPeak = excluded.memPeak, updated = excluded.updated",
|
||||
strings.Join(valueStrings, ","),
|
||||
)
|
||||
_, err := app.DB().NewQuery(queryString).Bind(params).Execute()
|
||||
return err
|
||||
}
|
||||
|
||||
// createContainerRecords creates container records
|
||||
func createContainerRecords(app core.App, data []*container.Stats, systemId string) error {
|
||||
if len(data) == 0 {
|
||||
return nil
|
||||
}
|
||||
// shared params for all records
|
||||
params := dbx.Params{
|
||||
"system": systemId,
|
||||
"updated": time.Now().UTC().UnixMilli(),
|
||||
}
|
||||
valueStrings := make([]string, 0, len(data))
|
||||
for i, container := range data {
|
||||
suffix := fmt.Sprintf("%d", i)
|
||||
valueStrings = append(valueStrings, fmt.Sprintf("({:id%[1]s}, {:system}, {:name%[1]s}, {:image%[1]s}, {:ports%[1]s}, {:status%[1]s}, {:health%[1]s}, {:cpu%[1]s}, {:memory%[1]s}, {:net%[1]s}, {:updated})", suffix))
|
||||
params["id"+suffix] = container.Id
|
||||
params["name"+suffix] = container.Name
|
||||
params["image"+suffix] = container.Image
|
||||
params["ports"+suffix] = container.Ports
|
||||
params["status"+suffix] = container.Status
|
||||
params["health"+suffix] = container.Health
|
||||
params["cpu"+suffix] = container.Cpu
|
||||
params["memory"+suffix] = container.Mem
|
||||
netBytes := container.Bandwidth[0] + container.Bandwidth[1]
|
||||
if netBytes == 0 {
|
||||
netBytes = uint64((container.NetworkSent + container.NetworkRecv) * 1024 * 1024)
|
||||
}
|
||||
params["net"+suffix] = netBytes
|
||||
}
|
||||
queryString := fmt.Sprintf(
|
||||
"INSERT INTO containers (id, system, name, image, ports, status, health, cpu, memory, net, updated) VALUES %s ON CONFLICT(id) DO UPDATE SET system = excluded.system, name = excluded.name, image = excluded.image, ports = excluded.ports, status = excluded.status, health = excluded.health, cpu = excluded.cpu, memory = excluded.memory, net = excluded.net, updated = excluded.updated",
|
||||
strings.Join(valueStrings, ","),
|
||||
)
|
||||
_, err := app.DB().NewQuery(queryString).Bind(params).Execute()
|
||||
return err
|
||||
}
|
||||
|
||||
// getRecord retrieves the system record from the database.
|
||||
// If the record is not found, it removes the system from the manager.
|
||||
func (sys *System) getRecord(app core.App) (*core.Record, error) {
|
||||
record, err := app.FindRecordById("systems", sys.Id)
|
||||
if err != nil || record == nil {
|
||||
_ = sys.manager.RemoveSystem(sys.Id)
|
||||
return nil, err
|
||||
}
|
||||
return record, nil
|
||||
}
|
||||
|
||||
// HasUser checks if the given user is in the system's users list.
|
||||
// Returns true if SHARE_ALL_SYSTEMS is enabled (any authenticated user can access any system).
|
||||
func (sys *System) HasUser(app core.App, user *core.Record) bool {
|
||||
if user == nil {
|
||||
return false
|
||||
}
|
||||
if v, _ := utils.GetEnv("SHARE_ALL_SYSTEMS"); v == "true" {
|
||||
return true
|
||||
}
|
||||
var recordData = struct {
|
||||
Users string
|
||||
}{}
|
||||
err := app.DB().NewQuery("SELECT users FROM systems WHERE id={:id}").
|
||||
Bind(dbx.Params{"id": sys.Id}).
|
||||
One(&recordData)
|
||||
if err != nil || recordData.Users == "" {
|
||||
return false
|
||||
}
|
||||
return strings.Contains(recordData.Users, user.Id)
|
||||
}
|
||||
|
||||
// setDown marks a system as down in the database.
|
||||
// It takes the original error that caused the system to go down and returns any error
|
||||
// encountered during the process of updating the system status.
|
||||
func (sys *System) setDown(originalError error) error {
|
||||
if sys.Status == down || sys.Status == paused {
|
||||
return nil
|
||||
}
|
||||
record, err := sys.getRecord(sys.manager.hub)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if originalError != nil {
|
||||
sys.manager.hub.Logger().Error("System down", "system", record.GetString("name"), "err", originalError)
|
||||
}
|
||||
record.Set("status", down)
|
||||
return sys.manager.hub.SaveNoValidate(record)
|
||||
}
|
||||
|
||||
func (sys *System) getContext() (context.Context, context.CancelFunc) {
|
||||
if sys.ctx == nil {
|
||||
sys.ctx, sys.cancel = context.WithCancel(context.Background())
|
||||
}
|
||||
return sys.ctx, sys.cancel
|
||||
}
|
||||
|
||||
// request sends a request to the agent, trying WebSocket first, then SSH.
|
||||
// This is the unified request method that uses the transport abstraction.
|
||||
func (sys *System) request(ctx context.Context, action common.WebSocketAction, req any, dest any) error {
|
||||
// Try WebSocket first
|
||||
if sys.WsConn != nil && sys.WsConn.IsConnected() {
|
||||
wsTransport := transport.NewWebSocketTransport(sys.WsConn)
|
||||
if err := wsTransport.Request(ctx, action, req, dest); err == nil {
|
||||
return nil
|
||||
} else if !shouldFallbackToSSH(err) {
|
||||
return err
|
||||
} else if shouldCloseWebSocket(err) {
|
||||
sys.closeWebSocketConnection()
|
||||
}
|
||||
}
|
||||
|
||||
// Fall back to SSH if WebSocket fails
|
||||
if err := sys.ensureSSHTransport(); err != nil {
|
||||
return err
|
||||
}
|
||||
err := sys.sshTransport.RequestWithRetry(ctx, action, req, dest, 1)
|
||||
// Keep legacy SSH client/version fields in sync for other code paths.
|
||||
if sys.sshTransport != nil {
|
||||
sys.client = sys.sshTransport.GetClient()
|
||||
sys.agentVersion = sys.sshTransport.GetAgentVersion()
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func shouldFallbackToSSH(err error) bool {
|
||||
if err == nil {
|
||||
return false
|
||||
}
|
||||
if errors.Is(err, context.DeadlineExceeded) || errors.Is(err, context.Canceled) {
|
||||
return true
|
||||
}
|
||||
if errors.Is(err, gws.ErrConnClosed) {
|
||||
return true
|
||||
}
|
||||
return errors.Is(err, transport.ErrWebSocketNotConnected)
|
||||
}
|
||||
|
||||
func shouldCloseWebSocket(err error) bool {
|
||||
if err == nil {
|
||||
return false
|
||||
}
|
||||
return errors.Is(err, gws.ErrConnClosed) || errors.Is(err, transport.ErrWebSocketNotConnected)
|
||||
}
|
||||
|
||||
// ensureSSHTransport ensures the SSH transport is initialized and connected.
|
||||
func (sys *System) ensureSSHTransport() error {
|
||||
if sys.sshTransport == nil {
|
||||
if sys.manager.sshConfig == nil {
|
||||
if err := sys.manager.createSSHClientConfig(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
sys.sshTransport = transport.NewSSHTransport(transport.SSHTransportConfig{
|
||||
Host: sys.Host,
|
||||
Port: sys.Port,
|
||||
Config: sys.manager.sshConfig,
|
||||
Timeout: 4 * time.Second,
|
||||
})
|
||||
}
|
||||
// Sync client state with transport
|
||||
if sys.client != nil {
|
||||
sys.sshTransport.SetClient(sys.client)
|
||||
sys.sshTransport.SetAgentVersion(sys.agentVersion)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// fetchDataFromAgent attempts to fetch data from the agent, prioritizing WebSocket if available.
|
||||
func (sys *System) fetchDataFromAgent(options common.DataRequestOptions) (*system.CombinedData, error) {
|
||||
if sys.data == nil {
|
||||
sys.data = &system.CombinedData{}
|
||||
}
|
||||
|
||||
if sys.WsConn != nil && sys.WsConn.IsConnected() {
|
||||
wsData, err := sys.fetchDataViaWebSocket(options)
|
||||
if err == nil {
|
||||
return wsData, nil
|
||||
}
|
||||
// close the WebSocket connection if error and try SSH
|
||||
sys.closeWebSocketConnection()
|
||||
}
|
||||
|
||||
sshData, err := sys.fetchDataViaSSH(options)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return sshData, nil
|
||||
}
|
||||
|
||||
func (sys *System) fetchDataViaWebSocket(options common.DataRequestOptions) (*system.CombinedData, error) {
|
||||
if sys.WsConn == nil || !sys.WsConn.IsConnected() {
|
||||
return nil, errors.New("no websocket connection")
|
||||
}
|
||||
wsTransport := transport.NewWebSocketTransport(sys.WsConn)
|
||||
err := wsTransport.Request(context.Background(), common.GetData, options, sys.data)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return sys.data, nil
|
||||
}
|
||||
|
||||
// FetchContainerInfoFromAgent fetches container info from the agent
|
||||
func (sys *System) FetchContainerInfoFromAgent(containerID string) (string, error) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
var result string
|
||||
err := sys.request(ctx, common.GetContainerInfo, common.ContainerInfoRequest{ContainerID: containerID}, &result)
|
||||
return result, err
|
||||
}
|
||||
|
||||
// FetchContainerLogsFromAgent fetches container logs from the agent
|
||||
func (sys *System) FetchContainerLogsFromAgent(containerID string) (string, error) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
var result string
|
||||
err := sys.request(ctx, common.GetContainerLogs, common.ContainerLogsRequest{ContainerID: containerID}, &result)
|
||||
return result, err
|
||||
}
|
||||
|
||||
// FetchSystemdInfoFromAgent fetches detailed systemd service information from the agent
|
||||
func (sys *System) FetchSystemdInfoFromAgent(serviceName string) (systemd.ServiceDetails, error) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
var result systemd.ServiceDetails
|
||||
err := sys.request(ctx, common.GetSystemdInfo, common.SystemdInfoRequest{ServiceName: serviceName}, &result)
|
||||
return result, err
|
||||
}
|
||||
|
||||
// FetchSmartDataFromAgent fetches SMART data from the agent
|
||||
func (sys *System) FetchSmartDataFromAgent() (map[string]smart.SmartData, error) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
|
||||
defer cancel()
|
||||
var result map[string]smart.SmartData
|
||||
err := sys.request(ctx, common.GetSmartData, nil, &result)
|
||||
return result, err
|
||||
}
|
||||
|
||||
func makeStableHashId(strings ...string) string {
|
||||
hash := fnv.New32a()
|
||||
for _, str := range strings {
|
||||
hash.Write([]byte(str))
|
||||
}
|
||||
return fmt.Sprintf("%x", hash.Sum32())
|
||||
}
|
||||
|
||||
// fetchDataViaSSH handles fetching data using SSH.
|
||||
// This function encapsulates the original SSH logic.
|
||||
// It updates sys.data directly upon successful fetch.
|
||||
func (sys *System) fetchDataViaSSH(options common.DataRequestOptions) (*system.CombinedData, error) {
|
||||
err := sys.runSSHOperation(4*time.Second, 1, func(session *ssh.Session) (bool, error) {
|
||||
stdout, err := session.StdoutPipe()
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
stdin, stdinErr := session.StdinPipe()
|
||||
if err := session.Shell(); err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
*sys.data = system.CombinedData{}
|
||||
|
||||
if sys.agentVersion.GTE(beszel.MinVersionAgentResponse) && stdinErr == nil {
|
||||
req := common.HubRequest[any]{Action: common.GetData, Data: options}
|
||||
_ = cbor.NewEncoder(stdin).Encode(req)
|
||||
_ = stdin.Close()
|
||||
|
||||
var resp common.AgentResponse
|
||||
if decErr := cbor.NewDecoder(stdout).Decode(&resp); decErr == nil && resp.SystemData != nil {
|
||||
*sys.data = *resp.SystemData
|
||||
if err := session.Wait(); err != nil {
|
||||
return false, err
|
||||
}
|
||||
return false, nil
|
||||
}
|
||||
}
|
||||
|
||||
var decodeErr error
|
||||
if sys.agentVersion.GTE(beszel.MinVersionCbor) {
|
||||
decodeErr = cbor.NewDecoder(stdout).Decode(sys.data)
|
||||
} else {
|
||||
decodeErr = json.NewDecoder(stdout).Decode(sys.data)
|
||||
}
|
||||
|
||||
if decodeErr != nil {
|
||||
return true, decodeErr
|
||||
}
|
||||
|
||||
if err := session.Wait(); err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
return false, nil
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return sys.data, nil
|
||||
}
|
||||
|
||||
// runSSHOperation establishes an SSH session and executes the provided operation.
|
||||
// The operation can request a retry by returning true as the first return value.
|
||||
func (sys *System) runSSHOperation(timeout time.Duration, retries int, operation func(*ssh.Session) (bool, error)) error {
|
||||
for attempt := 0; attempt <= retries; attempt++ {
|
||||
if sys.client == nil || sys.Status == down {
|
||||
if err := sys.createSSHClient(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
session, err := sys.createSessionWithTimeout(timeout)
|
||||
if err != nil {
|
||||
if attempt >= retries {
|
||||
return err
|
||||
}
|
||||
sys.manager.hub.Logger().Warn("Session closed. Retrying...", "host", sys.Host, "port", sys.Port, "err", err)
|
||||
sys.closeSSHConnection()
|
||||
continue
|
||||
}
|
||||
|
||||
retry, opErr := func() (bool, error) {
|
||||
defer session.Close()
|
||||
return operation(session)
|
||||
}()
|
||||
|
||||
if opErr == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
if retry {
|
||||
sys.closeSSHConnection()
|
||||
if attempt < retries {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
return opErr
|
||||
}
|
||||
|
||||
return fmt.Errorf("ssh operation failed")
|
||||
}
|
||||
|
||||
// createSSHClient creates a new SSH client for the system
|
||||
func (s *System) createSSHClient() error {
|
||||
if s.manager.sshConfig == nil {
|
||||
if err := s.manager.createSSHClientConfig(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
network := "tcp"
|
||||
host := s.Host
|
||||
if strings.HasPrefix(host, "/") {
|
||||
network = "unix"
|
||||
} else {
|
||||
host = net.JoinHostPort(host, s.Port)
|
||||
}
|
||||
var err error
|
||||
s.client, err = ssh.Dial(network, host, s.manager.sshConfig)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
s.agentVersion, _ = extractAgentVersion(string(s.client.Conn.ServerVersion()))
|
||||
s.manager.resetFailedSmartFetchState(s.Id)
|
||||
return nil
|
||||
}
|
||||
|
||||
// createSessionWithTimeout creates a new SSH session with a timeout to avoid hanging
|
||||
// in case of network issues
|
||||
func (sys *System) createSessionWithTimeout(timeout time.Duration) (*ssh.Session, error) {
|
||||
if sys.client == nil {
|
||||
return nil, fmt.Errorf("client not initialized")
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(sys.ctx, timeout)
|
||||
defer cancel()
|
||||
|
||||
sessionChan := make(chan *ssh.Session, 1)
|
||||
errChan := make(chan error, 1)
|
||||
|
||||
go func() {
|
||||
if session, err := sys.client.NewSession(); err != nil {
|
||||
errChan <- err
|
||||
} else {
|
||||
sessionChan <- session
|
||||
}
|
||||
}()
|
||||
|
||||
select {
|
||||
case session := <-sessionChan:
|
||||
return session, nil
|
||||
case err := <-errChan:
|
||||
return nil, err
|
||||
case <-ctx.Done():
|
||||
return nil, fmt.Errorf("timeout")
|
||||
}
|
||||
}
|
||||
|
||||
// closeSSHConnection closes the SSH connection but keeps the system in the manager
|
||||
func (sys *System) closeSSHConnection() {
|
||||
if sys.sshTransport != nil {
|
||||
sys.sshTransport.Close()
|
||||
}
|
||||
if sys.client != nil {
|
||||
sys.client.Close()
|
||||
sys.client = nil
|
||||
}
|
||||
}
|
||||
|
||||
// closeWebSocketConnection closes the WebSocket connection but keeps the system in the manager
|
||||
// to allow updating via SSH. It will be removed if the WS connection is re-established.
|
||||
// The system will be set as down a few seconds later if the connection is not re-established.
|
||||
func (sys *System) closeWebSocketConnection() {
|
||||
if sys.WsConn != nil {
|
||||
sys.WsConn.Close(nil)
|
||||
}
|
||||
}
|
||||
|
||||
// extractAgentVersion extracts the beszel version from SSH server version string
|
||||
func extractAgentVersion(versionString string) (semver.Version, error) {
|
||||
_, after, _ := strings.Cut(versionString, "_")
|
||||
return semver.Parse(after)
|
||||
}
|
||||
|
||||
// getJitter returns a channel that will be triggered after a random delay
|
||||
// between 51% and 95% of the interval.
|
||||
// This is used to stagger the initial WebSocket connections to prevent clustering.
|
||||
func getJitter() <-chan time.Time {
|
||||
minPercent := 51
|
||||
maxPercent := 95
|
||||
jitterRange := maxPercent - minPercent
|
||||
msDelay := (interval * minPercent / 100) + rand.Intn(interval*jitterRange/100)
|
||||
return time.After(time.Duration(msDelay) * time.Millisecond)
|
||||
}
|
||||
|
||||
// migrateDeprecatedFields moves values from deprecated fields to their new locations if the new
|
||||
// fields are not already populated. Deprecated fields and refs may be removed at least 30 days
|
||||
// and one minor version release after the release that includes the migration.
|
||||
//
|
||||
// This is run when processing incoming system data from agents, which may be on older versions.
|
||||
func migrateDeprecatedFields(cd *system.CombinedData, createDetails bool) {
|
||||
// migration added 0.19.0
|
||||
if cd.Stats.Bandwidth[0] == 0 && cd.Stats.Bandwidth[1] == 0 {
|
||||
cd.Stats.Bandwidth[0] = uint64(cd.Stats.NetworkSent * 1024 * 1024)
|
||||
cd.Stats.Bandwidth[1] = uint64(cd.Stats.NetworkRecv * 1024 * 1024)
|
||||
cd.Stats.NetworkSent, cd.Stats.NetworkRecv = 0, 0
|
||||
}
|
||||
// migration added 0.19.0
|
||||
if cd.Info.BandwidthBytes == 0 {
|
||||
cd.Info.BandwidthBytes = uint64(cd.Info.Bandwidth * 1024 * 1024)
|
||||
cd.Info.Bandwidth = 0
|
||||
}
|
||||
// migration added 0.19.0
|
||||
if cd.Stats.DiskIO[0] == 0 && cd.Stats.DiskIO[1] == 0 {
|
||||
cd.Stats.DiskIO[0] = uint64(cd.Stats.DiskReadPs * 1024 * 1024)
|
||||
cd.Stats.DiskIO[1] = uint64(cd.Stats.DiskWritePs * 1024 * 1024)
|
||||
cd.Stats.DiskReadPs, cd.Stats.DiskWritePs = 0, 0
|
||||
}
|
||||
// migration added 0.19.0 - Move deprecated Info fields to Details struct
|
||||
if cd.Details == nil && cd.Info.Hostname != "" {
|
||||
if createDetails {
|
||||
cd.Details = &system.Details{
|
||||
Hostname: cd.Info.Hostname,
|
||||
Kernel: cd.Info.KernelVersion,
|
||||
Cores: cd.Info.Cores,
|
||||
Threads: cd.Info.Threads,
|
||||
CpuModel: cd.Info.CpuModel,
|
||||
Podman: cd.Info.Podman,
|
||||
Os: cd.Info.Os,
|
||||
MemoryTotal: uint64(cd.Stats.Mem * 1024 * 1024 * 1024),
|
||||
}
|
||||
}
|
||||
// zero the deprecated fields to prevent saving them in systems.info DB json payload
|
||||
cd.Info.Hostname = ""
|
||||
cd.Info.KernelVersion = ""
|
||||
cd.Info.Cores = 0
|
||||
cd.Info.CpuModel = ""
|
||||
cd.Info.Podman = false
|
||||
cd.Info.Os = 0
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,374 @@
|
||||
package systems
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel/internal/hub/ws"
|
||||
|
||||
"github.com/henrygd/beszel/internal/entities/system"
|
||||
"github.com/henrygd/beszel/internal/hub/expirymap"
|
||||
|
||||
"github.com/henrygd/beszel/internal/common"
|
||||
|
||||
"github.com/henrygd/beszel"
|
||||
|
||||
"github.com/blang/semver"
|
||||
"github.com/pocketbase/pocketbase/core"
|
||||
"github.com/pocketbase/pocketbase/tools/store"
|
||||
"golang.org/x/crypto/ssh"
|
||||
)
|
||||
|
||||
// System status constants
|
||||
const (
|
||||
up string = "up" // System is online and responding
|
||||
down string = "down" // System is offline or not responding
|
||||
paused string = "paused" // System monitoring is paused
|
||||
pending string = "pending" // System is waiting on initial connection result
|
||||
|
||||
// interval is the default update interval in milliseconds (60 seconds)
|
||||
interval int = 60_000
|
||||
// interval int = 10_000 // Debug interval for faster updates
|
||||
|
||||
// sessionTimeout is the maximum time to wait for SSH connections
|
||||
sessionTimeout = 4 * time.Second
|
||||
)
|
||||
|
||||
// errSystemExists is returned when attempting to add a system that already exists
|
||||
var errSystemExists = errors.New("system exists")
|
||||
|
||||
// SystemManager manages a collection of monitored systems and their connections.
|
||||
// It handles system lifecycle, status updates, and maintains both SSH and WebSocket connections.
|
||||
type SystemManager struct {
|
||||
hub hubLike // Hub interface for database and alert operations
|
||||
systems *store.Store[string, *System] // Thread-safe store of active systems
|
||||
sshConfig *ssh.ClientConfig // SSH client configuration for system connections
|
||||
smartFetchMap *expirymap.ExpiryMap[smartFetchState] // Stores last SMART fetch time/result; TTL is only for cleanup
|
||||
}
|
||||
|
||||
// hubLike defines the interface requirements for the hub dependency.
|
||||
// It extends core.App with system-specific functionality.
|
||||
type hubLike interface {
|
||||
core.App
|
||||
GetSSHKey(dataDir string) (ssh.Signer, error)
|
||||
HandleSystemAlerts(systemRecord *core.Record, data *system.CombinedData) error
|
||||
HandleStatusAlerts(status string, systemRecord *core.Record) error
|
||||
CancelPendingStatusAlerts(systemID string)
|
||||
}
|
||||
|
||||
// NewSystemManager creates a new SystemManager instance with the provided hub.
|
||||
// The hub must implement the hubLike interface to provide database and alert functionality.
|
||||
func NewSystemManager(hub hubLike) *SystemManager {
|
||||
return &SystemManager{
|
||||
systems: store.New(map[string]*System{}),
|
||||
hub: hub,
|
||||
smartFetchMap: expirymap.New[smartFetchState](time.Hour),
|
||||
}
|
||||
}
|
||||
|
||||
// GetSystem returns a system by ID from the store
|
||||
func (sm *SystemManager) GetSystem(systemID string) (*System, error) {
|
||||
sys, ok := sm.systems.GetOk(systemID)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("system not found")
|
||||
}
|
||||
return sys, nil
|
||||
}
|
||||
|
||||
// Initialize sets up the system manager by binding event hooks and starting existing systems.
|
||||
// It configures SSH client settings and begins monitoring all non-paused systems from the database.
|
||||
// Systems are started with staggered delays to prevent overwhelming the hub during startup.
|
||||
func (sm *SystemManager) Initialize() error {
|
||||
sm.bindEventHooks()
|
||||
|
||||
// Initialize SSH client configuration
|
||||
err := sm.createSSHClientConfig()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Load existing systems from database (excluding paused ones)
|
||||
var systems []*System
|
||||
err = sm.hub.DB().NewQuery("SELECT id, host, port, status FROM systems WHERE status != 'paused'").All(&systems)
|
||||
if err != nil || len(systems) == 0 {
|
||||
return err
|
||||
}
|
||||
|
||||
// Start systems in background with staggered timing
|
||||
go func() {
|
||||
// Calculate staggered delay between system starts (max 2 seconds per system)
|
||||
delta := interval / max(1, len(systems))
|
||||
delta = min(delta, 2_000)
|
||||
sleepTime := time.Duration(delta) * time.Millisecond
|
||||
|
||||
for _, system := range systems {
|
||||
time.Sleep(sleepTime)
|
||||
_ = sm.AddSystem(system)
|
||||
}
|
||||
}()
|
||||
return nil
|
||||
}
|
||||
|
||||
// bindEventHooks registers event handlers for system and fingerprint record changes.
|
||||
// These hooks ensure the system manager stays synchronized with database changes.
|
||||
func (sm *SystemManager) bindEventHooks() {
|
||||
sm.hub.OnRecordCreate("systems").BindFunc(sm.onRecordCreate)
|
||||
sm.hub.OnRecordAfterCreateSuccess("systems").BindFunc(sm.onRecordAfterCreateSuccess)
|
||||
sm.hub.OnRecordUpdate("systems").BindFunc(sm.onRecordUpdate)
|
||||
sm.hub.OnRecordAfterUpdateSuccess("systems").BindFunc(sm.onRecordAfterUpdateSuccess)
|
||||
sm.hub.OnRecordAfterDeleteSuccess("systems").BindFunc(sm.onRecordAfterDeleteSuccess)
|
||||
sm.hub.OnRecordAfterUpdateSuccess("fingerprints").BindFunc(sm.onTokenRotated)
|
||||
sm.hub.OnRealtimeSubscribeRequest().BindFunc(sm.onRealtimeSubscribeRequest)
|
||||
sm.hub.OnRealtimeConnectRequest().BindFunc(sm.onRealtimeConnectRequest)
|
||||
}
|
||||
|
||||
// onTokenRotated handles fingerprint token rotation events.
|
||||
// When a system's authentication token is rotated, any existing WebSocket connection
|
||||
// must be closed to force re-authentication with the new token.
|
||||
func (sm *SystemManager) onTokenRotated(e *core.RecordEvent) error {
|
||||
systemID := e.Record.GetString("system")
|
||||
system, ok := sm.systems.GetOk(systemID)
|
||||
if !ok {
|
||||
return e.Next()
|
||||
}
|
||||
// No need to close connection if not connected via websocket
|
||||
if system.WsConn == nil {
|
||||
return e.Next()
|
||||
}
|
||||
system.setDown(nil)
|
||||
sm.RemoveSystem(systemID)
|
||||
return e.Next()
|
||||
}
|
||||
|
||||
// onRecordCreate is called before a new system record is committed to the database.
|
||||
// It initializes the record with default values: empty info and pending status.
|
||||
func (sm *SystemManager) onRecordCreate(e *core.RecordEvent) error {
|
||||
e.Record.Set("info", system.Info{})
|
||||
e.Record.Set("status", pending)
|
||||
return e.Next()
|
||||
}
|
||||
|
||||
// onRecordAfterCreateSuccess is called after a new system record is successfully created.
|
||||
// It adds the new system to the manager to begin monitoring.
|
||||
func (sm *SystemManager) onRecordAfterCreateSuccess(e *core.RecordEvent) error {
|
||||
if err := sm.AddRecord(e.Record, nil); err != nil {
|
||||
e.App.Logger().Error("Error adding record", "err", err)
|
||||
}
|
||||
return e.Next()
|
||||
}
|
||||
|
||||
// onRecordUpdate is called before a system record is updated in the database.
|
||||
// It clears system info when the status is changed to paused.
|
||||
func (sm *SystemManager) onRecordUpdate(e *core.RecordEvent) error {
|
||||
if e.Record.GetString("status") == paused {
|
||||
e.Record.Set("info", system.Info{})
|
||||
}
|
||||
return e.Next()
|
||||
}
|
||||
|
||||
// onRecordAfterUpdateSuccess handles system record updates after they're committed to the database.
|
||||
// It manages system lifecycle based on status changes and triggers appropriate alerts.
|
||||
// Status transitions are handled as follows:
|
||||
// - paused: Closes SSH connection and deactivates alerts
|
||||
// - pending: Starts monitoring (reuses WebSocket if available)
|
||||
// - up: Triggers system alerts
|
||||
// - down: Triggers status change alerts
|
||||
func (sm *SystemManager) onRecordAfterUpdateSuccess(e *core.RecordEvent) error {
|
||||
newStatus := e.Record.GetString("status")
|
||||
prevStatus := pending
|
||||
system, ok := sm.systems.GetOk(e.Record.Id)
|
||||
if ok {
|
||||
prevStatus = system.Status
|
||||
system.Status = newStatus
|
||||
}
|
||||
|
||||
switch newStatus {
|
||||
case paused:
|
||||
if ok {
|
||||
// Pause monitoring but keep system in manager for potential resume
|
||||
system.closeSSHConnection()
|
||||
}
|
||||
_ = deactivateAlerts(e.App, e.Record.Id)
|
||||
sm.hub.CancelPendingStatusAlerts(e.Record.Id)
|
||||
return e.Next()
|
||||
case pending:
|
||||
// Resume monitoring, preferring existing WebSocket connection
|
||||
if ok && system.WsConn != nil {
|
||||
go system.update()
|
||||
return e.Next()
|
||||
}
|
||||
// Start new monitoring session
|
||||
if err := sm.AddRecord(e.Record, nil); err != nil {
|
||||
e.App.Logger().Error("Error adding record", "err", err)
|
||||
}
|
||||
_ = deactivateAlerts(e.App, e.Record.Id)
|
||||
return e.Next()
|
||||
}
|
||||
|
||||
// Handle systems not in manager
|
||||
if !ok {
|
||||
return sm.AddRecord(e.Record, nil)
|
||||
}
|
||||
|
||||
// Trigger system alerts when system comes online
|
||||
if newStatus == up {
|
||||
if err := sm.hub.HandleSystemAlerts(e.Record, system.data); err != nil {
|
||||
e.App.Logger().Error("Error handling system alerts", "err", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Trigger status change alerts for up/down transitions
|
||||
if (newStatus == down && prevStatus == up) || (newStatus == up && prevStatus == down) {
|
||||
if err := sm.hub.HandleStatusAlerts(newStatus, e.Record); err != nil {
|
||||
e.App.Logger().Error("Error handling status alerts", "err", err)
|
||||
}
|
||||
}
|
||||
return e.Next()
|
||||
}
|
||||
|
||||
// onRecordAfterDeleteSuccess is called after a system record is successfully deleted.
|
||||
// It removes the system from the manager and cleans up all associated resources.
|
||||
func (sm *SystemManager) onRecordAfterDeleteSuccess(e *core.RecordEvent) error {
|
||||
sm.RemoveSystem(e.Record.Id)
|
||||
return e.Next()
|
||||
}
|
||||
|
||||
// AddSystem adds a system to the manager and starts monitoring it.
|
||||
// It validates required fields, initializes the system context, and starts the update goroutine.
|
||||
// Returns error if a system with the same ID already exists.
|
||||
func (sm *SystemManager) AddSystem(sys *System) error {
|
||||
if sm.systems.Has(sys.Id) {
|
||||
return errSystemExists
|
||||
}
|
||||
if sys.Id == "" || sys.Host == "" {
|
||||
return errors.New("system missing required fields")
|
||||
}
|
||||
|
||||
// Initialize system for monitoring
|
||||
sys.manager = sm
|
||||
sys.ctx, sys.cancel = sys.getContext()
|
||||
sys.data = &system.CombinedData{}
|
||||
sm.systems.Set(sys.Id, sys)
|
||||
|
||||
// Start monitoring in background
|
||||
go sys.StartUpdater()
|
||||
return nil
|
||||
}
|
||||
|
||||
// RemoveSystem removes a system from the manager and cleans up all associated resources.
|
||||
// It cancels the system's context, closes all connections, and removes it from the store.
|
||||
// Returns an error if the system is not found.
|
||||
func (sm *SystemManager) RemoveSystem(systemID string) error {
|
||||
system, ok := sm.systems.GetOk(systemID)
|
||||
if !ok {
|
||||
return errors.New("system not found")
|
||||
}
|
||||
|
||||
// Stop the update goroutine
|
||||
if system.cancel != nil {
|
||||
system.cancel()
|
||||
}
|
||||
|
||||
// Clean up all connections
|
||||
system.closeSSHConnection()
|
||||
system.closeWebSocketConnection()
|
||||
sm.systems.Remove(systemID)
|
||||
return nil
|
||||
}
|
||||
|
||||
// AddRecord creates a System instance from a database record and adds it to the manager.
|
||||
// If a system with the same ID already exists, it's removed first to ensure clean state.
|
||||
// If no system instance is provided, a new one is created.
|
||||
// This method is typically called when systems are created or their status changes to pending.
|
||||
func (sm *SystemManager) AddRecord(record *core.Record, system *System) (err error) {
|
||||
// Remove existing system to ensure clean state
|
||||
if sm.systems.Has(record.Id) {
|
||||
_ = sm.RemoveSystem(record.Id)
|
||||
}
|
||||
|
||||
// Create new system if none provided
|
||||
if system == nil {
|
||||
system = sm.NewSystem(record.Id)
|
||||
}
|
||||
|
||||
// Populate system from record
|
||||
system.Status = record.GetString("status")
|
||||
system.Host = record.GetString("host")
|
||||
system.Port = record.GetString("port")
|
||||
|
||||
return sm.AddSystem(system)
|
||||
}
|
||||
|
||||
// AddWebSocketSystem creates and adds a system with an established WebSocket connection.
|
||||
// This method is called when an agent connects via WebSocket with valid authentication.
|
||||
// The system is immediately added to monitoring with the provided connection and version info.
|
||||
func (sm *SystemManager) AddWebSocketSystem(systemId string, agentVersion semver.Version, wsConn *ws.WsConn) error {
|
||||
systemRecord, err := sm.hub.FindRecordById("systems", systemId)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
sm.resetFailedSmartFetchState(systemId)
|
||||
|
||||
system := sm.NewSystem(systemId)
|
||||
system.WsConn = wsConn
|
||||
system.agentVersion = agentVersion
|
||||
|
||||
if err := sm.AddRecord(systemRecord, system); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// resetFailedSmartFetchState clears only failed SMART cooldown entries so a fresh
|
||||
// agent reconnect retries SMART discovery immediately after configuration changes.
|
||||
func (sm *SystemManager) resetFailedSmartFetchState(systemID string) {
|
||||
state, ok := sm.smartFetchMap.GetOk(systemID)
|
||||
if ok && !state.Successful {
|
||||
sm.smartFetchMap.Remove(systemID)
|
||||
}
|
||||
}
|
||||
|
||||
// createSSHClientConfig initializes the SSH client configuration for connecting to an agent's server
|
||||
func (sm *SystemManager) createSSHClientConfig() error {
|
||||
privateKey, err := sm.hub.GetSSHKey("")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
sm.sshConfig = &ssh.ClientConfig{
|
||||
User: "u",
|
||||
Auth: []ssh.AuthMethod{
|
||||
ssh.PublicKeys(privateKey),
|
||||
},
|
||||
Config: ssh.Config{
|
||||
Ciphers: common.DefaultCiphers,
|
||||
KeyExchanges: common.DefaultKeyExchanges,
|
||||
MACs: common.DefaultMACs,
|
||||
},
|
||||
HostKeyCallback: ssh.InsecureIgnoreHostKey(),
|
||||
ClientVersion: fmt.Sprintf("SSH-2.0-%s_%s", beszel.AppName, beszel.Version),
|
||||
Timeout: sessionTimeout,
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// deactivateAlerts finds all triggered alerts for a system and sets them to inactive.
|
||||
// This is called when a system is paused or goes offline to prevent continued alerts.
|
||||
func deactivateAlerts(app core.App, systemID string) error {
|
||||
// Note: Direct SQL updates don't trigger SSE, so we use the PocketBase API
|
||||
// _, err := app.DB().NewQuery(fmt.Sprintf("UPDATE alerts SET triggered = false WHERE system = '%s'", systemID)).Execute()
|
||||
|
||||
alerts, err := app.FindRecordsByFilter("alerts", fmt.Sprintf("system = '%s' && triggered = 1", systemID), "", -1, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, alert := range alerts {
|
||||
alert.Set("triggered", false)
|
||||
if err := app.SaveNoValidate(alert); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -0,0 +1,176 @@
|
||||
package systems
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel/internal/common"
|
||||
"github.com/pocketbase/pocketbase/core"
|
||||
"github.com/pocketbase/pocketbase/tools/subscriptions"
|
||||
)
|
||||
|
||||
type subscriptionInfo struct {
|
||||
subscription string
|
||||
connectedClients uint8
|
||||
}
|
||||
|
||||
var (
|
||||
activeSubscriptions = make(map[string]*subscriptionInfo)
|
||||
workerRunning bool
|
||||
tickerStopChan chan struct{}
|
||||
realtimeMutex sync.Mutex
|
||||
)
|
||||
|
||||
// onRealtimeConnectRequest handles client connection events for realtime subscriptions.
|
||||
// It cleans up existing subscriptions when a client connects.
|
||||
func (sm *SystemManager) onRealtimeConnectRequest(e *core.RealtimeConnectRequestEvent) error {
|
||||
// after e.Next() is the client disconnection
|
||||
e.Next()
|
||||
subscriptions := e.Client.Subscriptions()
|
||||
for k := range subscriptions {
|
||||
sm.removeRealtimeSubscription(k, subscriptions[k])
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// onRealtimeSubscribeRequest handles client subscription events for realtime metrics.
|
||||
// It tracks new subscriptions and unsubscriptions to manage the realtime worker lifecycle.
|
||||
func (sm *SystemManager) onRealtimeSubscribeRequest(e *core.RealtimeSubscribeRequestEvent) error {
|
||||
oldSubs := e.Client.Subscriptions()
|
||||
// after e.Next() is the result of the subscribe request
|
||||
err := e.Next()
|
||||
newSubs := e.Client.Subscriptions()
|
||||
|
||||
// handle new subscriptions
|
||||
for k, options := range newSubs {
|
||||
if _, ok := oldSubs[k]; !ok {
|
||||
if strings.HasPrefix(k, "rt_metrics") {
|
||||
systemId := options.Query["system"]
|
||||
if _, ok := activeSubscriptions[systemId]; !ok {
|
||||
activeSubscriptions[systemId] = &subscriptionInfo{
|
||||
subscription: k,
|
||||
}
|
||||
}
|
||||
activeSubscriptions[systemId].connectedClients += 1
|
||||
sm.onRealtimeSubscriptionAdded()
|
||||
}
|
||||
}
|
||||
}
|
||||
// handle unsubscriptions
|
||||
for k := range oldSubs {
|
||||
if _, ok := newSubs[k]; !ok {
|
||||
sm.removeRealtimeSubscription(k, oldSubs[k])
|
||||
}
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
// onRealtimeSubscriptionAdded initializes or starts the realtime worker when the first subscription is added.
|
||||
// It ensures only one worker runs at a time.
|
||||
func (sm *SystemManager) onRealtimeSubscriptionAdded() {
|
||||
realtimeMutex.Lock()
|
||||
defer realtimeMutex.Unlock()
|
||||
|
||||
// Start the worker if it's not already running
|
||||
if !workerRunning {
|
||||
workerRunning = true
|
||||
// Create a new stop channel for this worker instance
|
||||
tickerStopChan = make(chan struct{})
|
||||
go sm.startRealtimeWorker()
|
||||
}
|
||||
}
|
||||
|
||||
// checkSubscriptions stops the realtime worker when there are no active subscriptions.
|
||||
// This prevents unnecessary resource usage when no clients are listening for realtime data.
|
||||
func (sm *SystemManager) checkSubscriptions() {
|
||||
if !workerRunning || len(activeSubscriptions) > 0 {
|
||||
return
|
||||
}
|
||||
|
||||
realtimeMutex.Lock()
|
||||
defer realtimeMutex.Unlock()
|
||||
|
||||
// Signal the worker to stop
|
||||
if tickerStopChan != nil {
|
||||
select {
|
||||
case tickerStopChan <- struct{}{}:
|
||||
default:
|
||||
}
|
||||
}
|
||||
|
||||
// Mark worker as stopped (will be reset when next subscription comes in)
|
||||
workerRunning = false
|
||||
}
|
||||
|
||||
// removeRealtimeSubscription removes a realtime subscription and checks if the worker should be stopped.
|
||||
// It only processes subscriptions with the "rt_metrics" prefix and triggers cleanup when subscriptions are removed.
|
||||
func (sm *SystemManager) removeRealtimeSubscription(subscription string, options subscriptions.SubscriptionOptions) {
|
||||
if strings.HasPrefix(subscription, "rt_metrics") {
|
||||
systemId := options.Query["system"]
|
||||
if info, ok := activeSubscriptions[systemId]; ok {
|
||||
info.connectedClients -= 1
|
||||
if info.connectedClients <= 0 {
|
||||
delete(activeSubscriptions, systemId)
|
||||
}
|
||||
}
|
||||
sm.checkSubscriptions()
|
||||
}
|
||||
}
|
||||
|
||||
// startRealtimeWorker runs the main loop for fetching realtime data from agents.
|
||||
// It continuously fetches system data and broadcasts it to subscribed clients via WebSocket.
|
||||
func (sm *SystemManager) startRealtimeWorker() {
|
||||
sm.fetchRealtimeDataAndNotify()
|
||||
tick := time.Tick(1 * time.Second)
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-tickerStopChan:
|
||||
return
|
||||
case <-tick:
|
||||
if len(activeSubscriptions) == 0 {
|
||||
return
|
||||
}
|
||||
sm.fetchRealtimeDataAndNotify()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// fetchRealtimeDataAndNotify fetches realtime data for all active subscriptions and notifies the clients.
|
||||
func (sm *SystemManager) fetchRealtimeDataAndNotify() {
|
||||
for systemId, info := range activeSubscriptions {
|
||||
system, err := sm.GetSystem(systemId)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
go func() {
|
||||
data, err := system.fetchDataFromAgent(common.DataRequestOptions{CacheTimeMs: 1000})
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
bytes, err := json.Marshal(data)
|
||||
if err == nil {
|
||||
notify(sm.hub, info.subscription, bytes)
|
||||
}
|
||||
}()
|
||||
}
|
||||
}
|
||||
|
||||
// notify broadcasts realtime data to all clients subscribed to a specific subscription.
|
||||
// It iterates through all connected clients and sends the data only to those with matching subscriptions.
|
||||
func notify(app core.App, subscription string, data []byte) error {
|
||||
message := subscriptions.Message{
|
||||
Name: subscription,
|
||||
Data: data,
|
||||
}
|
||||
for _, client := range app.SubscriptionsBroker().Clients() {
|
||||
if !client.HasSubscription(subscription) {
|
||||
continue
|
||||
}
|
||||
client.Send(message)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -0,0 +1,135 @@
|
||||
package systems
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"errors"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel/internal/entities/smart"
|
||||
"github.com/pocketbase/pocketbase/core"
|
||||
)
|
||||
|
||||
type smartFetchState struct {
|
||||
LastAttempt int64
|
||||
Successful bool
|
||||
}
|
||||
|
||||
// FetchAndSaveSmartDevices fetches SMART data from the agent and saves it to the database
|
||||
func (sys *System) FetchAndSaveSmartDevices() error {
|
||||
smartData, err := sys.FetchSmartDataFromAgent()
|
||||
if err != nil {
|
||||
sys.recordSmartFetchResult(err, 0)
|
||||
return err
|
||||
}
|
||||
err = sys.saveSmartDevices(smartData)
|
||||
sys.recordSmartFetchResult(err, len(smartData))
|
||||
return err
|
||||
}
|
||||
|
||||
// recordSmartFetchResult stores a cooldown entry for the SMART interval and marks
|
||||
// whether the last fetch produced any devices, so failed setup can retry on reconnect.
|
||||
func (sys *System) recordSmartFetchResult(err error, deviceCount int) {
|
||||
if sys.manager == nil {
|
||||
return
|
||||
}
|
||||
interval := sys.smartFetchInterval()
|
||||
success := err == nil && deviceCount > 0
|
||||
if sys.manager.hub != nil {
|
||||
sys.manager.hub.Logger().Info("SMART fetch result", "system", sys.Id, "success", success, "devices", deviceCount, "interval", interval.String(), "err", err)
|
||||
}
|
||||
sys.manager.smartFetchMap.Set(sys.Id, smartFetchState{LastAttempt: time.Now().UnixMilli(), Successful: success}, interval+time.Minute)
|
||||
}
|
||||
|
||||
// shouldFetchSmart returns true when there is no active SMART cooldown entry for this system.
|
||||
func (sys *System) shouldFetchSmart() bool {
|
||||
if sys.manager == nil {
|
||||
return true
|
||||
}
|
||||
state, ok := sys.manager.smartFetchMap.GetOk(sys.Id)
|
||||
if !ok {
|
||||
return true
|
||||
}
|
||||
return !time.UnixMilli(state.LastAttempt).Add(sys.smartFetchInterval()).After(time.Now())
|
||||
}
|
||||
|
||||
// smartFetchInterval returns the agent-provided SMART interval or the default when unset.
|
||||
func (sys *System) smartFetchInterval() time.Duration {
|
||||
if sys.smartInterval > 0 {
|
||||
return sys.smartInterval
|
||||
}
|
||||
return time.Hour
|
||||
}
|
||||
|
||||
// saveSmartDevices saves SMART device data to the smart_devices collection
|
||||
func (sys *System) saveSmartDevices(smartData map[string]smart.SmartData) error {
|
||||
if len(smartData) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
hub := sys.manager.hub
|
||||
collection, err := hub.FindCachedCollectionByNameOrId("smart_devices")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for deviceKey, device := range smartData {
|
||||
if err := sys.upsertSmartDeviceRecord(collection, deviceKey, device); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (sys *System) upsertSmartDeviceRecord(collection *core.Collection, deviceKey string, device smart.SmartData) error {
|
||||
hub := sys.manager.hub
|
||||
recordID := makeStableHashId(sys.Id, deviceKey)
|
||||
|
||||
record, err := hub.FindRecordById(collection, recordID)
|
||||
if err != nil {
|
||||
if !errors.Is(err, sql.ErrNoRows) {
|
||||
return err
|
||||
}
|
||||
record = core.NewRecord(collection)
|
||||
record.Set("id", recordID)
|
||||
}
|
||||
|
||||
name := device.DiskName
|
||||
if name == "" {
|
||||
name = deviceKey
|
||||
}
|
||||
|
||||
powerOnHours, powerCycles := extractPowerMetrics(device.Attributes)
|
||||
record.Set("system", sys.Id)
|
||||
record.Set("name", name)
|
||||
record.Set("model", device.ModelName)
|
||||
record.Set("state", device.SmartStatus)
|
||||
record.Set("capacity", device.Capacity)
|
||||
record.Set("temp", device.Temperature)
|
||||
record.Set("firmware", device.FirmwareVersion)
|
||||
record.Set("serial", device.SerialNumber)
|
||||
record.Set("type", device.DiskType)
|
||||
record.Set("hours", powerOnHours)
|
||||
record.Set("cycles", powerCycles)
|
||||
record.Set("attributes", device.Attributes)
|
||||
|
||||
return hub.SaveNoValidate(record)
|
||||
}
|
||||
|
||||
// extractPowerMetrics extracts power on hours and power cycles from SMART attributes
|
||||
func extractPowerMetrics(attributes []*smart.SmartAttribute) (powerOnHours, powerCycles uint64) {
|
||||
for _, attr := range attributes {
|
||||
nameLower := strings.ToLower(attr.Name)
|
||||
if powerOnHours == 0 && (strings.Contains(nameLower, "poweronhours") || strings.Contains(nameLower, "power_on_hours")) {
|
||||
powerOnHours = attr.RawValue
|
||||
}
|
||||
if powerCycles == 0 && ((strings.Contains(nameLower, "power") && strings.Contains(nameLower, "cycle")) || strings.Contains(nameLower, "startstopcycles")) {
|
||||
powerCycles = attr.RawValue
|
||||
}
|
||||
if powerOnHours > 0 && powerCycles > 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
@@ -0,0 +1,94 @@
|
||||
//go:build testing
|
||||
|
||||
package systems
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel/internal/hub/expirymap"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestRecordSmartFetchResult(t *testing.T) {
|
||||
sm := &SystemManager{smartFetchMap: expirymap.New[smartFetchState](time.Hour)}
|
||||
t.Cleanup(sm.smartFetchMap.StopCleaner)
|
||||
|
||||
sys := &System{
|
||||
Id: "system-1",
|
||||
manager: sm,
|
||||
smartInterval: time.Hour,
|
||||
}
|
||||
|
||||
// Successful fetch with devices
|
||||
sys.recordSmartFetchResult(nil, 5)
|
||||
state, ok := sm.smartFetchMap.GetOk(sys.Id)
|
||||
assert.True(t, ok, "expected smart fetch result to be stored")
|
||||
assert.True(t, state.Successful, "expected successful fetch state to be recorded")
|
||||
|
||||
// Failed fetch
|
||||
sys.recordSmartFetchResult(errors.New("failed"), 0)
|
||||
state, ok = sm.smartFetchMap.GetOk(sys.Id)
|
||||
assert.True(t, ok, "expected failed smart fetch state to be stored")
|
||||
assert.False(t, state.Successful, "expected failed smart fetch state to be marked unsuccessful")
|
||||
|
||||
// Successful fetch but no devices
|
||||
sys.recordSmartFetchResult(nil, 0)
|
||||
state, ok = sm.smartFetchMap.GetOk(sys.Id)
|
||||
assert.True(t, ok, "expected fetch with zero devices to be stored")
|
||||
assert.False(t, state.Successful, "expected fetch with zero devices to be marked unsuccessful")
|
||||
}
|
||||
|
||||
func TestShouldFetchSmart(t *testing.T) {
|
||||
sm := &SystemManager{smartFetchMap: expirymap.New[smartFetchState](time.Hour)}
|
||||
t.Cleanup(sm.smartFetchMap.StopCleaner)
|
||||
|
||||
sys := &System{
|
||||
Id: "system-1",
|
||||
manager: sm,
|
||||
smartInterval: time.Hour,
|
||||
}
|
||||
|
||||
assert.True(t, sys.shouldFetchSmart(), "expected initial smart fetch to be allowed")
|
||||
|
||||
sys.recordSmartFetchResult(errors.New("failed"), 0)
|
||||
assert.False(t, sys.shouldFetchSmart(), "expected smart fetch to be blocked while interval entry exists")
|
||||
|
||||
sm.smartFetchMap.Remove(sys.Id)
|
||||
assert.True(t, sys.shouldFetchSmart(), "expected smart fetch to be allowed after interval entry is cleared")
|
||||
}
|
||||
|
||||
func TestShouldFetchSmart_IgnoresExtendedTTLWhenFetchIsDue(t *testing.T) {
|
||||
sm := &SystemManager{smartFetchMap: expirymap.New[smartFetchState](time.Hour)}
|
||||
t.Cleanup(sm.smartFetchMap.StopCleaner)
|
||||
|
||||
sys := &System{
|
||||
Id: "system-1",
|
||||
manager: sm,
|
||||
smartInterval: time.Hour,
|
||||
}
|
||||
|
||||
sm.smartFetchMap.Set(sys.Id, smartFetchState{
|
||||
LastAttempt: time.Now().Add(-2 * time.Hour).UnixMilli(),
|
||||
Successful: true,
|
||||
}, 10*time.Minute)
|
||||
sm.smartFetchMap.UpdateExpiration(sys.Id, 3*time.Hour)
|
||||
|
||||
assert.True(t, sys.shouldFetchSmart(), "expected fetch time to take precedence over updated TTL")
|
||||
}
|
||||
|
||||
func TestResetFailedSmartFetchState(t *testing.T) {
|
||||
sm := &SystemManager{smartFetchMap: expirymap.New[smartFetchState](time.Hour)}
|
||||
t.Cleanup(sm.smartFetchMap.StopCleaner)
|
||||
|
||||
sm.smartFetchMap.Set("system-1", smartFetchState{LastAttempt: time.Now().UnixMilli(), Successful: false}, time.Hour)
|
||||
sm.resetFailedSmartFetchState("system-1")
|
||||
_, ok := sm.smartFetchMap.GetOk("system-1")
|
||||
assert.False(t, ok, "expected failed smart fetch state to be cleared on reconnect")
|
||||
|
||||
sm.smartFetchMap.Set("system-1", smartFetchState{LastAttempt: time.Now().UnixMilli(), Successful: true}, time.Hour)
|
||||
sm.resetFailedSmartFetchState("system-1")
|
||||
_, ok = sm.smartFetchMap.GetOk("system-1")
|
||||
assert.True(t, ok, "expected successful smart fetch state to be preserved")
|
||||
}
|
||||
@@ -0,0 +1,75 @@
|
||||
//go:build testing
|
||||
|
||||
package systems
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestGetSystemdServiceId(t *testing.T) {
|
||||
t.Run("deterministic output", func(t *testing.T) {
|
||||
systemId := "sys-123"
|
||||
serviceName := "nginx.service"
|
||||
|
||||
// Call multiple times and ensure same result
|
||||
id1 := makeStableHashId(systemId, serviceName)
|
||||
id2 := makeStableHashId(systemId, serviceName)
|
||||
id3 := makeStableHashId(systemId, serviceName)
|
||||
|
||||
assert.Equal(t, id1, id2)
|
||||
assert.Equal(t, id2, id3)
|
||||
assert.NotEmpty(t, id1)
|
||||
})
|
||||
|
||||
t.Run("different inputs produce different ids", func(t *testing.T) {
|
||||
systemId1 := "sys-123"
|
||||
systemId2 := "sys-456"
|
||||
serviceName1 := "nginx.service"
|
||||
serviceName2 := "apache.service"
|
||||
|
||||
id1 := makeStableHashId(systemId1, serviceName1)
|
||||
id2 := makeStableHashId(systemId2, serviceName1)
|
||||
id3 := makeStableHashId(systemId1, serviceName2)
|
||||
id4 := makeStableHashId(systemId2, serviceName2)
|
||||
|
||||
// All IDs should be different
|
||||
assert.NotEqual(t, id1, id2)
|
||||
assert.NotEqual(t, id1, id3)
|
||||
assert.NotEqual(t, id1, id4)
|
||||
assert.NotEqual(t, id2, id3)
|
||||
assert.NotEqual(t, id2, id4)
|
||||
assert.NotEqual(t, id3, id4)
|
||||
})
|
||||
|
||||
t.Run("consistent length", func(t *testing.T) {
|
||||
testCases := []struct {
|
||||
systemId string
|
||||
serviceName string
|
||||
}{
|
||||
{"short", "short.service"},
|
||||
{"very-long-system-id-that-might-be-used-in-practice", "very-long-service-name.service"},
|
||||
{"", "empty-system.service"},
|
||||
{"empty-service", ""},
|
||||
{"", ""},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
id := makeStableHashId(tc.systemId, tc.serviceName)
|
||||
// FNV-32 produces 8 hex characters
|
||||
assert.Len(t, id, 8, "ID should be 8 characters for systemId='%s', serviceName='%s'", tc.systemId, tc.serviceName)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("hexadecimal output", func(t *testing.T) {
|
||||
id := makeStableHashId("test-system", "test-service")
|
||||
assert.NotEmpty(t, id)
|
||||
|
||||
// Should only contain hexadecimal characters
|
||||
for _, char := range id {
|
||||
assert.True(t, (char >= '0' && char <= '9') || (char >= 'a' && char <= 'f'),
|
||||
"ID should only contain hexadecimal characters, got: %s", id)
|
||||
}
|
||||
})
|
||||
}
|
||||
@@ -0,0 +1,159 @@
|
||||
//go:build testing
|
||||
|
||||
package systems
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/henrygd/beszel/internal/entities/system"
|
||||
)
|
||||
|
||||
func TestCombinedData_MigrateDeprecatedFields(t *testing.T) {
|
||||
t.Run("Migrate NetworkSent and NetworkRecv to Bandwidth", func(t *testing.T) {
|
||||
cd := &system.CombinedData{
|
||||
Stats: system.Stats{
|
||||
NetworkSent: 1.5, // 1.5 MB
|
||||
NetworkRecv: 2.5, // 2.5 MB
|
||||
},
|
||||
}
|
||||
migrateDeprecatedFields(cd, true)
|
||||
|
||||
expectedSent := uint64(1.5 * 1024 * 1024)
|
||||
expectedRecv := uint64(2.5 * 1024 * 1024)
|
||||
|
||||
if cd.Stats.Bandwidth[0] != expectedSent {
|
||||
t.Errorf("expected Bandwidth[0] %d, got %d", expectedSent, cd.Stats.Bandwidth[0])
|
||||
}
|
||||
if cd.Stats.Bandwidth[1] != expectedRecv {
|
||||
t.Errorf("expected Bandwidth[1] %d, got %d", expectedRecv, cd.Stats.Bandwidth[1])
|
||||
}
|
||||
if cd.Stats.NetworkSent != 0 || cd.Stats.NetworkRecv != 0 {
|
||||
t.Errorf("expected NetworkSent and NetworkRecv to be reset, got %f, %f", cd.Stats.NetworkSent, cd.Stats.NetworkRecv)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("Migrate Info.Bandwidth to Info.BandwidthBytes", func(t *testing.T) {
|
||||
cd := &system.CombinedData{
|
||||
Info: system.Info{
|
||||
Bandwidth: 10.0, // 10 MB
|
||||
},
|
||||
}
|
||||
migrateDeprecatedFields(cd, true)
|
||||
|
||||
expected := uint64(10 * 1024 * 1024)
|
||||
if cd.Info.BandwidthBytes != expected {
|
||||
t.Errorf("expected BandwidthBytes %d, got %d", expected, cd.Info.BandwidthBytes)
|
||||
}
|
||||
if cd.Info.Bandwidth != 0 {
|
||||
t.Errorf("expected Info.Bandwidth to be reset, got %f", cd.Info.Bandwidth)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("Migrate DiskReadPs and DiskWritePs to DiskIO", func(t *testing.T) {
|
||||
cd := &system.CombinedData{
|
||||
Stats: system.Stats{
|
||||
DiskReadPs: 3.0, // 3 MB
|
||||
DiskWritePs: 4.0, // 4 MB
|
||||
},
|
||||
}
|
||||
migrateDeprecatedFields(cd, true)
|
||||
|
||||
expectedRead := uint64(3 * 1024 * 1024)
|
||||
expectedWrite := uint64(4 * 1024 * 1024)
|
||||
|
||||
if cd.Stats.DiskIO[0] != expectedRead {
|
||||
t.Errorf("expected DiskIO[0] %d, got %d", expectedRead, cd.Stats.DiskIO[0])
|
||||
}
|
||||
if cd.Stats.DiskIO[1] != expectedWrite {
|
||||
t.Errorf("expected DiskIO[1] %d, got %d", expectedWrite, cd.Stats.DiskIO[1])
|
||||
}
|
||||
if cd.Stats.DiskReadPs != 0 || cd.Stats.DiskWritePs != 0 {
|
||||
t.Errorf("expected DiskReadPs and DiskWritePs to be reset, got %f, %f", cd.Stats.DiskReadPs, cd.Stats.DiskWritePs)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("Migrate Info fields to Details struct", func(t *testing.T) {
|
||||
cd := &system.CombinedData{
|
||||
Stats: system.Stats{
|
||||
Mem: 16.0, // 16 GB
|
||||
},
|
||||
Info: system.Info{
|
||||
Hostname: "test-host",
|
||||
KernelVersion: "6.8.0",
|
||||
Cores: 8,
|
||||
Threads: 16,
|
||||
CpuModel: "Intel i7",
|
||||
Podman: true,
|
||||
Os: system.Linux,
|
||||
},
|
||||
}
|
||||
migrateDeprecatedFields(cd, true)
|
||||
|
||||
if cd.Details == nil {
|
||||
t.Fatal("expected Details struct to be created")
|
||||
}
|
||||
if cd.Details.Hostname != "test-host" {
|
||||
t.Errorf("expected Hostname 'test-host', got '%s'", cd.Details.Hostname)
|
||||
}
|
||||
if cd.Details.Kernel != "6.8.0" {
|
||||
t.Errorf("expected Kernel '6.8.0', got '%s'", cd.Details.Kernel)
|
||||
}
|
||||
if cd.Details.Cores != 8 {
|
||||
t.Errorf("expected Cores 8, got %d", cd.Details.Cores)
|
||||
}
|
||||
if cd.Details.Threads != 16 {
|
||||
t.Errorf("expected Threads 16, got %d", cd.Details.Threads)
|
||||
}
|
||||
if cd.Details.CpuModel != "Intel i7" {
|
||||
t.Errorf("expected CpuModel 'Intel i7', got '%s'", cd.Details.CpuModel)
|
||||
}
|
||||
if cd.Details.Podman != true {
|
||||
t.Errorf("expected Podman true, got %v", cd.Details.Podman)
|
||||
}
|
||||
if cd.Details.Os != system.Linux {
|
||||
t.Errorf("expected Os Linux, got %d", cd.Details.Os)
|
||||
}
|
||||
expectedMem := uint64(16 * 1024 * 1024 * 1024)
|
||||
if cd.Details.MemoryTotal != expectedMem {
|
||||
t.Errorf("expected MemoryTotal %d, got %d", expectedMem, cd.Details.MemoryTotal)
|
||||
}
|
||||
|
||||
if cd.Info.Hostname != "" || cd.Info.KernelVersion != "" || cd.Info.Cores != 0 || cd.Info.CpuModel != "" || cd.Info.Podman != false || cd.Info.Os != 0 {
|
||||
t.Errorf("expected Info fields to be reset, got %+v", cd.Info)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("Do not migrate if Details already exists", func(t *testing.T) {
|
||||
cd := &system.CombinedData{
|
||||
Details: &system.Details{Hostname: "existing-host"},
|
||||
Info: system.Info{
|
||||
Hostname: "deprecated-host",
|
||||
},
|
||||
}
|
||||
migrateDeprecatedFields(cd, true)
|
||||
|
||||
if cd.Details.Hostname != "existing-host" {
|
||||
t.Errorf("expected Hostname 'existing-host', got '%s'", cd.Details.Hostname)
|
||||
}
|
||||
if cd.Info.Hostname != "deprecated-host" {
|
||||
t.Errorf("expected Info.Hostname to remain 'deprecated-host', got '%s'", cd.Info.Hostname)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("Do not create details if migrateDetails is false", func(t *testing.T) {
|
||||
cd := &system.CombinedData{
|
||||
Info: system.Info{
|
||||
Hostname: "deprecated-host",
|
||||
},
|
||||
}
|
||||
migrateDeprecatedFields(cd, false)
|
||||
|
||||
if cd.Details != nil {
|
||||
t.Fatal("expected Details struct to not be created")
|
||||
}
|
||||
|
||||
if cd.Info.Hostname != "" {
|
||||
t.Errorf("expected Info.Hostname to be reset, got '%s'", cd.Info.Hostname)
|
||||
}
|
||||
})
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
//go:build !testing
|
||||
|
||||
package systems
|
||||
|
||||
// Background SMART fetching is enabled in production but disabled for tests (systems_test_helpers.go).
|
||||
//
|
||||
// The hub integration tests create/replace systems and clean up the test apps quickly.
|
||||
// Background SMART fetching can outlive teardown and crash in PocketBase internals (nil DB).
|
||||
func backgroundSmartFetchEnabled() bool { return true }
|
||||
@@ -0,0 +1,480 @@
|
||||
//go:build testing
|
||||
|
||||
package systems_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sync"
|
||||
"testing"
|
||||
"testing/synctest"
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel/internal/entities/container"
|
||||
"github.com/henrygd/beszel/internal/entities/system"
|
||||
"github.com/henrygd/beszel/internal/hub/systems"
|
||||
"github.com/henrygd/beszel/internal/tests"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestSystemManagerNew(t *testing.T) {
|
||||
hub, err := tests.NewTestHub(t.TempDir())
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer hub.Cleanup()
|
||||
sm := hub.GetSystemManager()
|
||||
|
||||
user, err := tests.CreateUser(hub, "test@test.com", "testtesttest")
|
||||
require.NoError(t, err)
|
||||
|
||||
synctest.Test(t, func(t *testing.T) {
|
||||
sm.Initialize()
|
||||
|
||||
record, err := tests.CreateRecord(hub, "systems", map[string]any{
|
||||
"name": "it-was-coney-island",
|
||||
"host": "the-playground-of-the-world",
|
||||
"port": "33914",
|
||||
"users": []string{user.Id},
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
assert.Equal(t, "pending", record.GetString("status"), "System status should be 'pending'")
|
||||
assert.Equal(t, "pending", sm.GetSystemStatusFromStore(record.Id), "System status should be 'pending'")
|
||||
|
||||
// Verify the system host and port
|
||||
host, port := sm.GetSystemHostPort(record.Id)
|
||||
assert.Equal(t, record.GetString("host"), host, "System host should match")
|
||||
assert.Equal(t, record.GetString("port"), port, "System port should match")
|
||||
|
||||
time.Sleep(13 * time.Second)
|
||||
synctest.Wait()
|
||||
|
||||
assert.Equal(t, "pending", record.Fresh().GetString("status"), "System status should be 'pending'")
|
||||
// Verify the system was added by checking if it exists
|
||||
assert.True(t, sm.HasSystem(record.Id), "System should exist in the store")
|
||||
|
||||
time.Sleep(10 * time.Second)
|
||||
synctest.Wait()
|
||||
|
||||
// system should be set to down after 15 seconds (no websocket connection)
|
||||
assert.Equal(t, "down", sm.GetSystemStatusFromStore(record.Id), "System status should be 'down'")
|
||||
// make sure the system is down in the db
|
||||
record, err = hub.FindRecordById("systems", record.Id)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, "down", record.GetString("status"), "System status should be 'down'")
|
||||
|
||||
assert.Equal(t, 1, sm.GetSystemCount(), "System count should be 1")
|
||||
|
||||
err = sm.RemoveSystem(record.Id)
|
||||
assert.NoError(t, err)
|
||||
|
||||
assert.Equal(t, 0, sm.GetSystemCount(), "System count should be 0")
|
||||
assert.False(t, sm.HasSystem(record.Id), "System should not exist in the store after removal")
|
||||
|
||||
// let's also make sure a system is removed from the store when the record is deleted
|
||||
record, err = tests.CreateRecord(hub, "systems", map[string]any{
|
||||
"name": "there-was-no-place-like-it",
|
||||
"host": "in-the-whole-world",
|
||||
"port": "33914",
|
||||
"users": []string{user.Id},
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
assert.True(t, sm.HasSystem(record.Id), "System should exist in the store after creation")
|
||||
|
||||
time.Sleep(8 * time.Second)
|
||||
synctest.Wait()
|
||||
assert.Equal(t, "pending", sm.GetSystemStatusFromStore(record.Id), "System status should be 'pending'")
|
||||
|
||||
sm.SetSystemStatusInDB(record.Id, "up")
|
||||
time.Sleep(time.Second)
|
||||
synctest.Wait()
|
||||
assert.Equal(t, "up", sm.GetSystemStatusFromStore(record.Id), "System status should be 'up'")
|
||||
|
||||
// make sure the system switches to down after 11 seconds
|
||||
sm.RemoveSystem(record.Id)
|
||||
sm.AddRecord(record, nil)
|
||||
assert.Equal(t, "pending", sm.GetSystemStatusFromStore(record.Id), "System status should be 'pending'")
|
||||
time.Sleep(12 * time.Second)
|
||||
synctest.Wait()
|
||||
assert.Equal(t, "down", sm.GetSystemStatusFromStore(record.Id), "System status should be 'down'")
|
||||
|
||||
// sm.SetSystemStatusInDB(record.Id, "paused")
|
||||
// time.Sleep(time.Second)
|
||||
// synctest.Wait()
|
||||
// assert.Equal(t, "paused", sm.GetSystemStatusFromStore(record.Id), "System status should be 'paused'")
|
||||
|
||||
// delete the record
|
||||
err = hub.Delete(record)
|
||||
require.NoError(t, err)
|
||||
assert.False(t, sm.HasSystem(record.Id), "System should not exist in the store after deletion")
|
||||
})
|
||||
|
||||
testOld(t, hub)
|
||||
|
||||
synctest.Test(t, func(t *testing.T) {
|
||||
time.Sleep(time.Second)
|
||||
synctest.Wait()
|
||||
|
||||
for _, systemId := range sm.GetAllSystemIDs() {
|
||||
err = sm.RemoveSystem(systemId)
|
||||
require.NoError(t, err)
|
||||
assert.False(t, sm.HasSystem(systemId), "System should not exist in the store after deletion")
|
||||
}
|
||||
|
||||
assert.Equal(t, 0, sm.GetSystemCount(), "System count should be 0")
|
||||
|
||||
// TODO: test with websocket client
|
||||
})
|
||||
}
|
||||
|
||||
func testOld(t *testing.T, hub *tests.TestHub) {
|
||||
user, err := tests.CreateUser(hub, "test@testy.com", "testtesttest")
|
||||
require.NoError(t, err)
|
||||
|
||||
sm := hub.GetSystemManager()
|
||||
assert.NotNil(t, sm)
|
||||
|
||||
// error expected when creating a user with a duplicate email
|
||||
_, err = tests.CreateUser(hub, "test@test.com", "testtesttest")
|
||||
require.Error(t, err)
|
||||
|
||||
// Test collection existence. todo: move to hub package tests
|
||||
t.Run("CollectionExistence", func(t *testing.T) {
|
||||
// Verify that required collections exist
|
||||
systems, err := hub.FindCachedCollectionByNameOrId("systems")
|
||||
require.NoError(t, err)
|
||||
assert.NotNil(t, systems)
|
||||
|
||||
systemStats, err := hub.FindCachedCollectionByNameOrId("system_stats")
|
||||
require.NoError(t, err)
|
||||
assert.NotNil(t, systemStats)
|
||||
|
||||
containerStats, err := hub.FindCachedCollectionByNameOrId("container_stats")
|
||||
require.NoError(t, err)
|
||||
assert.NotNil(t, containerStats)
|
||||
})
|
||||
|
||||
t.Run("RemoveSystem", func(t *testing.T) {
|
||||
// Get the count before adding the system
|
||||
countBefore := sm.GetSystemCount()
|
||||
|
||||
// Create a test system record
|
||||
record, err := tests.CreateRecord(hub, "systems", map[string]any{
|
||||
"name": "i-even-got-lost-at-coney-island",
|
||||
"host": "but-they-found-me",
|
||||
"port": "33914",
|
||||
"users": []string{user.Id},
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
// Verify the system count increased
|
||||
countAfterAdd := sm.GetSystemCount()
|
||||
assert.Equal(t, countBefore+1, countAfterAdd, "System count should increase after adding a system via event hook")
|
||||
|
||||
// Verify the system exists
|
||||
assert.True(t, sm.HasSystem(record.Id), "System should exist in the store")
|
||||
|
||||
// Remove the system
|
||||
err = sm.RemoveSystem(record.Id)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Check that the system count decreased
|
||||
countAfterRemove := sm.GetSystemCount()
|
||||
assert.Equal(t, countAfterAdd-1, countAfterRemove, "System count should decrease after removing a system")
|
||||
|
||||
// Verify the system no longer exists
|
||||
assert.False(t, sm.HasSystem(record.Id), "System should not exist in the store after removal")
|
||||
|
||||
// Verify the system is not in the list of all system IDs
|
||||
ids := sm.GetAllSystemIDs()
|
||||
assert.NotContains(t, ids, record.Id, "System ID should not be in the list of all system IDs after removal")
|
||||
|
||||
// Verify the system status is empty
|
||||
status := sm.GetSystemStatusFromStore(record.Id)
|
||||
assert.Equal(t, "", status, "System status should be empty after removal")
|
||||
|
||||
// Try to remove it again - should return an error since it's already removed
|
||||
err = sm.RemoveSystem(record.Id)
|
||||
assert.Error(t, err)
|
||||
})
|
||||
|
||||
t.Run("NewRecordPending", func(t *testing.T) {
|
||||
// Create a test system
|
||||
record, err := tests.CreateRecord(hub, "systems", map[string]any{
|
||||
"name": "and-you-know",
|
||||
"host": "i-feel-very-bad",
|
||||
"port": "33914",
|
||||
"users": []string{user.Id},
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
// Add the record to the system manager
|
||||
err = sm.AddRecord(record, nil)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Test filtering records by status - should be "pending" now
|
||||
filter := "status = 'pending'"
|
||||
pendingSystems, err := hub.FindRecordsByFilter("systems", filter, "-created", 0, 0, nil)
|
||||
require.NoError(t, err)
|
||||
assert.GreaterOrEqual(t, len(pendingSystems), 1)
|
||||
})
|
||||
|
||||
t.Run("SystemStatusUpdate", func(t *testing.T) {
|
||||
// Create a test system record
|
||||
record, err := tests.CreateRecord(hub, "systems", map[string]any{
|
||||
"name": "we-used-to-sleep-on-the-beach",
|
||||
"host": "sleep-overnight-here",
|
||||
"port": "33914",
|
||||
"users": []string{user.Id},
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
// Add the record to the system manager
|
||||
err = sm.AddRecord(record, nil)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Test status changes
|
||||
initialStatus := sm.GetSystemStatusFromStore(record.Id)
|
||||
|
||||
// Set a new status
|
||||
sm.SetSystemStatusInDB(record.Id, "up")
|
||||
|
||||
// Verify status was updated
|
||||
newStatus := sm.GetSystemStatusFromStore(record.Id)
|
||||
assert.Equal(t, "up", newStatus, "System status should be updated to 'up'")
|
||||
assert.NotEqual(t, initialStatus, newStatus, "Status should have changed")
|
||||
|
||||
// Verify the database was updated
|
||||
updatedRecord, err := hub.FindRecordById("systems", record.Id)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, "up", updatedRecord.Get("status"), "Database status should match")
|
||||
})
|
||||
|
||||
t.Run("HandleSystemData", func(t *testing.T) {
|
||||
// Create a test system record
|
||||
record, err := tests.CreateRecord(hub, "systems", map[string]any{
|
||||
"name": "things-changed-you-know",
|
||||
"host": "they-dont-sleep-anymore-on-the-beach",
|
||||
"port": "33914",
|
||||
"users": []string{user.Id},
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
// Create test system data
|
||||
testData := &system.CombinedData{
|
||||
Details: &system.Details{
|
||||
Hostname: "data-test.example.com",
|
||||
Kernel: "5.15.0-generic",
|
||||
Cores: 4,
|
||||
Threads: 8,
|
||||
CpuModel: "Test CPU",
|
||||
},
|
||||
Info: system.Info{
|
||||
Uptime: 3600,
|
||||
Cpu: 25.5,
|
||||
MemPct: 40.2,
|
||||
DiskPct: 60.0,
|
||||
Bandwidth: 100.0,
|
||||
AgentVersion: "1.0.0",
|
||||
},
|
||||
Stats: system.Stats{
|
||||
Cpu: 25.5,
|
||||
Mem: 16384.0,
|
||||
MemUsed: 6553.6,
|
||||
MemPct: 40.0,
|
||||
DiskTotal: 1024000.0,
|
||||
DiskUsed: 614400.0,
|
||||
DiskPct: 60.0,
|
||||
NetworkSent: 1024.0,
|
||||
NetworkRecv: 2048.0,
|
||||
},
|
||||
Containers: []*container.Stats{},
|
||||
}
|
||||
|
||||
// Test handling system data. todo: move to hub/alerts package tests
|
||||
err = hub.HandleSystemAlerts(record, testData)
|
||||
assert.NoError(t, err)
|
||||
})
|
||||
|
||||
t.Run("ErrorHandling", func(t *testing.T) {
|
||||
// Try to add a non-existent record
|
||||
nonExistentId := "non_existent_id"
|
||||
err := sm.RemoveSystem(nonExistentId)
|
||||
assert.Error(t, err)
|
||||
|
||||
// Try to add a system with invalid host
|
||||
system := &systems.System{
|
||||
Host: "",
|
||||
}
|
||||
err = sm.AddSystem(system)
|
||||
assert.Error(t, err)
|
||||
})
|
||||
|
||||
t.Run("ConcurrentOperations", func(t *testing.T) {
|
||||
// Create a test system
|
||||
record, err := tests.CreateRecord(hub, "systems", map[string]any{
|
||||
"name": "jfkjahkfajs",
|
||||
"host": "localhost",
|
||||
"port": "33914",
|
||||
"users": []string{user.Id},
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
// Run concurrent operations
|
||||
const goroutines = 5
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(goroutines)
|
||||
|
||||
for i := range goroutines {
|
||||
go func(i int) {
|
||||
defer wg.Done()
|
||||
|
||||
// Alternate between different operations
|
||||
switch i % 3 {
|
||||
case 0:
|
||||
status := fmt.Sprintf("status-%d", i)
|
||||
sm.SetSystemStatusInDB(record.Id, status)
|
||||
case 1:
|
||||
_ = sm.GetSystemStatusFromStore(record.Id)
|
||||
case 2:
|
||||
_, _ = sm.GetSystemHostPort(record.Id)
|
||||
}
|
||||
}(i)
|
||||
}
|
||||
|
||||
wg.Wait()
|
||||
|
||||
// Verify system still exists and is in a valid state
|
||||
assert.True(t, sm.HasSystem(record.Id), "System should still exist after concurrent operations")
|
||||
status := sm.GetSystemStatusFromStore(record.Id)
|
||||
assert.NotEmpty(t, status, "System should have a status after concurrent operations")
|
||||
})
|
||||
|
||||
t.Run("ContextCancellation", func(t *testing.T) {
|
||||
// Create a test system record
|
||||
record, err := tests.CreateRecord(hub, "systems", map[string]any{
|
||||
"name": "lkhsdfsjf",
|
||||
"host": "localhost",
|
||||
"port": "33914",
|
||||
"users": []string{user.Id},
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
// Verify the system exists in the store
|
||||
assert.True(t, sm.HasSystem(record.Id), "System should exist in the store")
|
||||
|
||||
// Store the original context and cancel function
|
||||
originalCtx, originalCancel, err := sm.GetSystemContextFromStore(record.Id)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Ensure the context is not nil
|
||||
assert.NotNil(t, originalCtx, "System context should not be nil")
|
||||
assert.NotNil(t, originalCancel, "System cancel function should not be nil")
|
||||
|
||||
// Cancel the context
|
||||
originalCancel()
|
||||
|
||||
// Wait a short time for cancellation to propagate
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
|
||||
// Verify the context is done
|
||||
select {
|
||||
case <-originalCtx.Done():
|
||||
// Context was properly cancelled
|
||||
default:
|
||||
t.Fatal("Context was not cancelled")
|
||||
}
|
||||
|
||||
// Verify the system is still in the store (cancellation shouldn't remove it)
|
||||
assert.True(t, sm.HasSystem(record.Id), "System should still exist after context cancellation")
|
||||
|
||||
// Explicitly remove the system
|
||||
err = sm.RemoveSystem(record.Id)
|
||||
assert.NoError(t, err, "RemoveSystem should succeed")
|
||||
|
||||
// Verify the system is removed
|
||||
assert.False(t, sm.HasSystem(record.Id), "System should be removed after RemoveSystem")
|
||||
|
||||
// Try to remove it again - should return an error
|
||||
err = sm.RemoveSystem(record.Id)
|
||||
assert.Error(t, err, "RemoveSystem should fail for non-existent system")
|
||||
|
||||
// Add the system back
|
||||
err = sm.AddRecord(record, nil)
|
||||
require.NoError(t, err, "AddRecord should succeed")
|
||||
|
||||
// Verify the system is back in the store
|
||||
assert.True(t, sm.HasSystem(record.Id), "System should exist after re-adding")
|
||||
|
||||
// Verify a new context was created
|
||||
newCtx, newCancel, err := sm.GetSystemContextFromStore(record.Id)
|
||||
assert.NoError(t, err)
|
||||
assert.NotNil(t, newCtx, "New system context should not be nil")
|
||||
assert.NotNil(t, newCancel, "New system cancel function should not be nil")
|
||||
assert.NotEqual(t, originalCtx, newCtx, "New context should be different from original")
|
||||
|
||||
// Clean up
|
||||
err = sm.RemoveSystem(record.Id)
|
||||
assert.NoError(t, err)
|
||||
})
|
||||
}
|
||||
|
||||
func TestHasUser(t *testing.T) {
|
||||
hub, err := tests.NewTestHub(t.TempDir())
|
||||
require.NoError(t, err)
|
||||
defer hub.Cleanup()
|
||||
|
||||
sm := hub.GetSystemManager()
|
||||
err = sm.Initialize()
|
||||
require.NoError(t, err)
|
||||
|
||||
user1, err := tests.CreateUser(hub, "user1@test.com", "password123")
|
||||
require.NoError(t, err)
|
||||
user2, err := tests.CreateUser(hub, "user2@test.com", "password123")
|
||||
require.NoError(t, err)
|
||||
|
||||
systemRecord, err := tests.CreateRecord(hub, "systems", map[string]any{
|
||||
"name": "has-user-test",
|
||||
"host": "127.0.0.1",
|
||||
"port": "33914",
|
||||
"users": []string{user1.Id},
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
sys, err := sm.GetSystemFromStore(systemRecord.Id)
|
||||
require.NoError(t, err)
|
||||
|
||||
t.Run("user in list returns true", func(t *testing.T) {
|
||||
assert.True(t, sys.HasUser(hub, user1))
|
||||
})
|
||||
|
||||
t.Run("user not in list returns false", func(t *testing.T) {
|
||||
assert.False(t, sys.HasUser(hub, user2))
|
||||
})
|
||||
|
||||
t.Run("unknown user ID returns false", func(t *testing.T) {
|
||||
assert.False(t, sys.HasUser(hub, nil))
|
||||
})
|
||||
|
||||
t.Run("SHARE_ALL_SYSTEMS=true grants access to non-member", func(t *testing.T) {
|
||||
t.Setenv("SHARE_ALL_SYSTEMS", "true")
|
||||
assert.True(t, sys.HasUser(hub, user2))
|
||||
})
|
||||
|
||||
t.Run("BESZEL_HUB_SHARE_ALL_SYSTEMS=true grants access to non-member", func(t *testing.T) {
|
||||
t.Setenv("BESZEL_HUB_SHARE_ALL_SYSTEMS", "true")
|
||||
assert.True(t, sys.HasUser(hub, user2))
|
||||
})
|
||||
|
||||
t.Run("additional user works", func(t *testing.T) {
|
||||
assert.False(t, sys.HasUser(hub, user2))
|
||||
systemRecord.Set("users", []string{user1.Id, user2.Id})
|
||||
err = hub.Save(systemRecord)
|
||||
require.NoError(t, err)
|
||||
assert.True(t, sys.HasUser(hub, user1))
|
||||
assert.True(t, sys.HasUser(hub, user2))
|
||||
})
|
||||
}
|
||||
@@ -0,0 +1,127 @@
|
||||
//go:build testing
|
||||
|
||||
package systems
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
entities "github.com/henrygd/beszel/internal/entities/system"
|
||||
"github.com/pocketbase/pocketbase/core"
|
||||
)
|
||||
|
||||
// The hub integration tests create/replace systems and cleanup the test apps quickly.
|
||||
// Background SMART fetching can outlive teardown and crash in PocketBase internals (nil DB).
|
||||
//
|
||||
// We keep the explicit SMART refresh endpoint / method available, but disable
|
||||
// the automatic background fetch during tests.
|
||||
func backgroundSmartFetchEnabled() bool { return false }
|
||||
|
||||
// TESTING ONLY: GetSystemCount returns the number of systems in the store
|
||||
func (sm *SystemManager) GetSystemCount() int {
|
||||
return sm.systems.Length()
|
||||
}
|
||||
|
||||
// TESTING ONLY: HasSystem checks if a system with the given ID exists in the store
|
||||
func (sm *SystemManager) HasSystem(systemID string) bool {
|
||||
return sm.systems.Has(systemID)
|
||||
}
|
||||
|
||||
// TESTING ONLY: GetSystemStatusFromStore returns the status of a system with the given ID
|
||||
// Returns an empty string if the system doesn't exist
|
||||
func (sm *SystemManager) GetSystemStatusFromStore(systemID string) string {
|
||||
sys, ok := sm.systems.GetOk(systemID)
|
||||
if !ok {
|
||||
return ""
|
||||
}
|
||||
return sys.Status
|
||||
}
|
||||
|
||||
// TESTING ONLY: GetSystemContextFromStore returns the context and cancel function for a system
|
||||
func (sm *SystemManager) GetSystemContextFromStore(systemID string) (context.Context, context.CancelFunc, error) {
|
||||
sys, ok := sm.systems.GetOk(systemID)
|
||||
if !ok {
|
||||
return nil, nil, fmt.Errorf("no system")
|
||||
}
|
||||
return sys.ctx, sys.cancel, nil
|
||||
}
|
||||
|
||||
// TESTING ONLY: GetSystemFromStore returns a store from the system
|
||||
func (sm *SystemManager) GetSystemFromStore(systemID string) (*System, error) {
|
||||
sys, ok := sm.systems.GetOk(systemID)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("no system")
|
||||
}
|
||||
return sys, nil
|
||||
}
|
||||
|
||||
// TESTING ONLY: GetAllSystemIDs returns a slice of all system IDs in the store
|
||||
func (sm *SystemManager) GetAllSystemIDs() []string {
|
||||
data := sm.systems.GetAll()
|
||||
ids := make([]string, 0, len(data))
|
||||
for id := range data {
|
||||
ids = append(ids, id)
|
||||
}
|
||||
return ids
|
||||
}
|
||||
|
||||
// TESTING ONLY: GetSystemData returns the combined data for a system with the given ID
|
||||
// Returns nil if the system doesn't exist
|
||||
// This method is intended for testing
|
||||
func (sm *SystemManager) GetSystemData(systemID string) *entities.CombinedData {
|
||||
sys, ok := sm.systems.GetOk(systemID)
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
return sys.data
|
||||
}
|
||||
|
||||
// TESTING ONLY: GetSystemHostPort returns the host and port for a system with the given ID
|
||||
// Returns empty strings if the system doesn't exist
|
||||
func (sm *SystemManager) GetSystemHostPort(systemID string) (string, string) {
|
||||
sys, ok := sm.systems.GetOk(systemID)
|
||||
if !ok {
|
||||
return "", ""
|
||||
}
|
||||
return sys.Host, sys.Port
|
||||
}
|
||||
|
||||
// TESTING ONLY: SetSystemStatusInDB sets the status of a system directly and updates the database record
|
||||
// This is intended for testing
|
||||
// Returns false if the system doesn't exist
|
||||
func (sm *SystemManager) SetSystemStatusInDB(systemID string, status string) bool {
|
||||
if !sm.HasSystem(systemID) {
|
||||
return false
|
||||
}
|
||||
|
||||
// Update the database record
|
||||
record, err := sm.hub.FindRecordById("systems", systemID)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
record.Set("status", status)
|
||||
err = sm.hub.Save(record)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// TESTING ONLY: RemoveAllSystems removes all systems from the store
|
||||
func (sm *SystemManager) RemoveAllSystems() {
|
||||
for _, system := range sm.systems.GetAll() {
|
||||
sm.RemoveSystem(system.Id)
|
||||
}
|
||||
sm.smartFetchMap.StopCleaner()
|
||||
}
|
||||
|
||||
func (s *System) StopUpdater() {
|
||||
s.cancel()
|
||||
}
|
||||
|
||||
func (s *System) CreateRecords(data *entities.CombinedData) (*core.Record, error) {
|
||||
s.data = data
|
||||
return s.createRecords(data)
|
||||
}
|
||||
Reference in New Issue
Block a user