Initial commit: Beszel fork with Domain Locker integration

This commit is contained in:
Tomas Dvorak
2026-04-21 15:39:43 +02:00
commit 363d708e91
440 changed files with 160889 additions and 0 deletions
+785
View File
@@ -0,0 +1,785 @@
package systems
import (
"context"
"encoding/json"
"errors"
"fmt"
"hash/fnv"
"math/rand"
"net"
"strings"
"sync/atomic"
"time"
"github.com/henrygd/beszel/internal/common"
"github.com/henrygd/beszel/internal/hub/transport"
"github.com/henrygd/beszel/internal/hub/utils"
"github.com/henrygd/beszel/internal/hub/ws"
"github.com/henrygd/beszel/internal/entities/container"
"github.com/henrygd/beszel/internal/entities/smart"
"github.com/henrygd/beszel/internal/entities/system"
"github.com/henrygd/beszel/internal/entities/systemd"
"github.com/henrygd/beszel"
"github.com/blang/semver"
"github.com/fxamacker/cbor/v2"
"github.com/lxzan/gws"
"github.com/pocketbase/dbx"
"github.com/pocketbase/pocketbase/core"
"golang.org/x/crypto/ssh"
)
type System struct {
Id string `db:"id"`
Host string `db:"host"`
Port string `db:"port"`
Status string `db:"status"`
manager *SystemManager // Manager that this system belongs to
client *ssh.Client // SSH client for fetching data
sshTransport *transport.SSHTransport // SSH transport for requests
data *system.CombinedData // system data from agent
ctx context.Context // Context for stopping the updater
cancel context.CancelFunc // Stops and removes system from updater
WsConn *ws.WsConn // Handler for agent WebSocket connection
agentVersion semver.Version // Agent version
updateTicker *time.Ticker // Ticker for updating the system
detailsFetched atomic.Bool // True if static system details have been fetched and saved
smartFetching atomic.Bool // True if SMART devices are currently being fetched
smartInterval time.Duration // Interval for periodic SMART data updates
}
func (sm *SystemManager) NewSystem(systemId string) *System {
system := &System{
Id: systemId,
data: &system.CombinedData{},
}
system.ctx, system.cancel = system.getContext()
return system
}
// StartUpdater starts the system updater.
// It first fetches the data from the agent then updates the records.
// If the data is not found or the system is down, it sets the system down.
func (sys *System) StartUpdater() {
// Channel that can be used to set the system down. Currently only used to
// allow a short delay for reconnection after websocket connection is closed.
var downChan chan struct{}
// Add random jitter to first WebSocket connection to prevent
// clustering if all agents are started at the same time.
// SSH connections during hub startup are already staggered.
var jitter <-chan time.Time
if sys.WsConn != nil {
jitter = getJitter()
// use the websocket connection's down channel to set the system down
downChan = sys.WsConn.DownChan
} else {
// if the system does not have a websocket connection, wait before updating
// to allow the agent to connect via websocket (makes sure fingerprint is set).
time.Sleep(11 * time.Second)
}
// update immediately if system is not paused (only for ws connections)
// we'll wait a minute before connecting via SSH to prioritize ws connections
if sys.Status != paused && sys.ctx.Err() == nil {
if err := sys.update(); err != nil {
_ = sys.setDown(err)
}
}
sys.updateTicker = time.NewTicker(time.Duration(interval) * time.Millisecond)
// Go 1.23+ will automatically stop the ticker when the system is garbage collected, however we seem to need this or testing/synctest will block even if calling runtime.GC()
defer sys.updateTicker.Stop()
for {
select {
case <-sys.ctx.Done():
return
case <-sys.updateTicker.C:
if err := sys.update(); err != nil {
_ = sys.setDown(err)
}
case <-downChan:
sys.WsConn = nil
downChan = nil
_ = sys.setDown(nil)
case <-jitter:
sys.updateTicker.Reset(time.Duration(interval) * time.Millisecond)
if err := sys.update(); err != nil {
_ = sys.setDown(err)
}
}
}
}
// update updates the system data and records.
func (sys *System) update() error {
if sys.Status == paused {
sys.handlePaused()
return nil
}
options := common.DataRequestOptions{
CacheTimeMs: uint16(interval),
}
// fetch system details if not already fetched
if !sys.detailsFetched.Load() {
options.IncludeDetails = true
}
data, err := sys.fetchDataFromAgent(options)
if err != nil {
return err
}
// ensure deprecated fields from older agents are migrated to current fields
migrateDeprecatedFields(data, !sys.detailsFetched.Load())
// create system records
_, err = sys.createRecords(data)
// if details were included and fetched successfully, mark details as fetched and update smart interval if set by agent
if err == nil && data.Details != nil {
sys.detailsFetched.Store(true)
// update smart interval if it's set on the agent side
if data.Details.SmartInterval > 0 {
sys.smartInterval = data.Details.SmartInterval
sys.manager.hub.Logger().Info("SMART interval updated from agent details", "system", sys.Id, "interval", sys.smartInterval.String())
// make sure we reset expiration of lastFetch to remain as long as the new smart interval
// to prevent premature expiration leading to new fetch if interval is different.
sys.manager.smartFetchMap.UpdateExpiration(sys.Id, sys.smartInterval+time.Minute)
}
}
// Fetch and save SMART devices when system first comes online or at intervals
if backgroundSmartFetchEnabled() && sys.detailsFetched.Load() {
if sys.smartInterval <= 0 {
sys.smartInterval = time.Hour
}
if sys.shouldFetchSmart() && sys.smartFetching.CompareAndSwap(false, true) {
sys.manager.hub.Logger().Info("SMART fetch", "system", sys.Id, "interval", sys.smartInterval.String())
go func() {
defer sys.smartFetching.Store(false)
_ = sys.FetchAndSaveSmartDevices()
}()
}
}
return err
}
func (sys *System) handlePaused() {
if sys.WsConn == nil {
// if the system is paused and there's no websocket connection, remove the system
_ = sys.manager.RemoveSystem(sys.Id)
} else {
// Send a ping to the agent to keep the connection alive if the system is paused
if err := sys.WsConn.Ping(); err != nil {
sys.manager.hub.Logger().Warn("Failed to ping agent", "system", sys.Id, "err", err)
_ = sys.manager.RemoveSystem(sys.Id)
}
}
}
// createRecords updates the system record and adds system_stats and container_stats records
func (sys *System) createRecords(data *system.CombinedData) (*core.Record, error) {
systemRecord, err := sys.getRecord(sys.manager.hub)
if err != nil {
return nil, err
}
hub := sys.manager.hub
err = hub.RunInTransaction(func(txApp core.App) error {
// add system_stats record
systemStatsCollection, err := txApp.FindCachedCollectionByNameOrId("system_stats")
if err != nil {
return err
}
systemStatsRecord := core.NewRecord(systemStatsCollection)
systemStatsRecord.Set("system", systemRecord.Id)
systemStatsRecord.Set("stats", data.Stats)
systemStatsRecord.Set("type", "1m")
if err := txApp.SaveNoValidate(systemStatsRecord); err != nil {
return err
}
// add containers and container_stats records
if len(data.Containers) > 0 {
if data.Containers[0].Id != "" {
if err := createContainerRecords(txApp, data.Containers, sys.Id); err != nil {
return err
}
}
containerStatsCollection, err := txApp.FindCachedCollectionByNameOrId("container_stats")
if err != nil {
return err
}
containerStatsRecord := core.NewRecord(containerStatsCollection)
containerStatsRecord.Set("system", systemRecord.Id)
containerStatsRecord.Set("stats", data.Containers)
containerStatsRecord.Set("type", "1m")
if err := txApp.SaveNoValidate(containerStatsRecord); err != nil {
return err
}
}
// add new systemd_stats record
if len(data.SystemdServices) > 0 {
if err := createSystemdStatsRecords(txApp, data.SystemdServices, sys.Id); err != nil {
return err
}
}
// add system details record
if data.Details != nil {
if err := createSystemDetailsRecord(txApp, data.Details, sys.Id); err != nil {
return err
}
}
// update system record (do this last because it triggers alerts and we need above records to be inserted first)
systemRecord.Set("status", up)
systemRecord.Set("info", data.Info)
if err := txApp.SaveNoValidate(systemRecord); err != nil {
return err
}
return nil
})
return systemRecord, err
}
func createSystemDetailsRecord(app core.App, data *system.Details, systemId string) error {
collectionName := "system_details"
params := dbx.Params{
"id": systemId,
"system": systemId,
"hostname": data.Hostname,
"kernel": data.Kernel,
"cores": data.Cores,
"threads": data.Threads,
"cpu": data.CpuModel,
"os": data.Os,
"os_name": data.OsName,
"arch": data.Arch,
"memory": data.MemoryTotal,
"podman": data.Podman,
"updated": time.Now().UTC(),
}
result, err := app.DB().Update(collectionName, params, dbx.HashExp{"id": systemId}).Execute()
rowsAffected, _ := result.RowsAffected()
if err != nil || rowsAffected == 0 {
_, err = app.DB().Insert(collectionName, params).Execute()
}
return err
}
func createSystemdStatsRecords(app core.App, data []*systemd.Service, systemId string) error {
if len(data) == 0 {
return nil
}
// shared params for all records
params := dbx.Params{
"system": systemId,
"updated": time.Now().UTC().UnixMilli(),
}
valueStrings := make([]string, 0, len(data))
for i, service := range data {
suffix := fmt.Sprintf("%d", i)
valueStrings = append(valueStrings, fmt.Sprintf("({:id%[1]s}, {:system}, {:name%[1]s}, {:state%[1]s}, {:sub%[1]s}, {:cpu%[1]s}, {:cpuPeak%[1]s}, {:memory%[1]s}, {:memPeak%[1]s}, {:updated})", suffix))
params["id"+suffix] = makeStableHashId(systemId, service.Name)
params["name"+suffix] = service.Name
params["state"+suffix] = service.State
params["sub"+suffix] = service.Sub
params["cpu"+suffix] = service.Cpu
params["cpuPeak"+suffix] = service.CpuPeak
params["memory"+suffix] = service.Mem
params["memPeak"+suffix] = service.MemPeak
}
queryString := fmt.Sprintf(
"INSERT INTO systemd_services (id, system, name, state, sub, cpu, cpuPeak, memory, memPeak, updated) VALUES %s ON CONFLICT(id) DO UPDATE SET system = excluded.system, name = excluded.name, state = excluded.state, sub = excluded.sub, cpu = excluded.cpu, cpuPeak = excluded.cpuPeak, memory = excluded.memory, memPeak = excluded.memPeak, updated = excluded.updated",
strings.Join(valueStrings, ","),
)
_, err := app.DB().NewQuery(queryString).Bind(params).Execute()
return err
}
// createContainerRecords creates container records
func createContainerRecords(app core.App, data []*container.Stats, systemId string) error {
if len(data) == 0 {
return nil
}
// shared params for all records
params := dbx.Params{
"system": systemId,
"updated": time.Now().UTC().UnixMilli(),
}
valueStrings := make([]string, 0, len(data))
for i, container := range data {
suffix := fmt.Sprintf("%d", i)
valueStrings = append(valueStrings, fmt.Sprintf("({:id%[1]s}, {:system}, {:name%[1]s}, {:image%[1]s}, {:ports%[1]s}, {:status%[1]s}, {:health%[1]s}, {:cpu%[1]s}, {:memory%[1]s}, {:net%[1]s}, {:updated})", suffix))
params["id"+suffix] = container.Id
params["name"+suffix] = container.Name
params["image"+suffix] = container.Image
params["ports"+suffix] = container.Ports
params["status"+suffix] = container.Status
params["health"+suffix] = container.Health
params["cpu"+suffix] = container.Cpu
params["memory"+suffix] = container.Mem
netBytes := container.Bandwidth[0] + container.Bandwidth[1]
if netBytes == 0 {
netBytes = uint64((container.NetworkSent + container.NetworkRecv) * 1024 * 1024)
}
params["net"+suffix] = netBytes
}
queryString := fmt.Sprintf(
"INSERT INTO containers (id, system, name, image, ports, status, health, cpu, memory, net, updated) VALUES %s ON CONFLICT(id) DO UPDATE SET system = excluded.system, name = excluded.name, image = excluded.image, ports = excluded.ports, status = excluded.status, health = excluded.health, cpu = excluded.cpu, memory = excluded.memory, net = excluded.net, updated = excluded.updated",
strings.Join(valueStrings, ","),
)
_, err := app.DB().NewQuery(queryString).Bind(params).Execute()
return err
}
// getRecord retrieves the system record from the database.
// If the record is not found, it removes the system from the manager.
func (sys *System) getRecord(app core.App) (*core.Record, error) {
record, err := app.FindRecordById("systems", sys.Id)
if err != nil || record == nil {
_ = sys.manager.RemoveSystem(sys.Id)
return nil, err
}
return record, nil
}
// HasUser checks if the given user is in the system's users list.
// Returns true if SHARE_ALL_SYSTEMS is enabled (any authenticated user can access any system).
func (sys *System) HasUser(app core.App, user *core.Record) bool {
if user == nil {
return false
}
if v, _ := utils.GetEnv("SHARE_ALL_SYSTEMS"); v == "true" {
return true
}
var recordData = struct {
Users string
}{}
err := app.DB().NewQuery("SELECT users FROM systems WHERE id={:id}").
Bind(dbx.Params{"id": sys.Id}).
One(&recordData)
if err != nil || recordData.Users == "" {
return false
}
return strings.Contains(recordData.Users, user.Id)
}
// setDown marks a system as down in the database.
// It takes the original error that caused the system to go down and returns any error
// encountered during the process of updating the system status.
func (sys *System) setDown(originalError error) error {
if sys.Status == down || sys.Status == paused {
return nil
}
record, err := sys.getRecord(sys.manager.hub)
if err != nil {
return err
}
if originalError != nil {
sys.manager.hub.Logger().Error("System down", "system", record.GetString("name"), "err", originalError)
}
record.Set("status", down)
return sys.manager.hub.SaveNoValidate(record)
}
func (sys *System) getContext() (context.Context, context.CancelFunc) {
if sys.ctx == nil {
sys.ctx, sys.cancel = context.WithCancel(context.Background())
}
return sys.ctx, sys.cancel
}
// request sends a request to the agent, trying WebSocket first, then SSH.
// This is the unified request method that uses the transport abstraction.
func (sys *System) request(ctx context.Context, action common.WebSocketAction, req any, dest any) error {
// Try WebSocket first
if sys.WsConn != nil && sys.WsConn.IsConnected() {
wsTransport := transport.NewWebSocketTransport(sys.WsConn)
if err := wsTransport.Request(ctx, action, req, dest); err == nil {
return nil
} else if !shouldFallbackToSSH(err) {
return err
} else if shouldCloseWebSocket(err) {
sys.closeWebSocketConnection()
}
}
// Fall back to SSH if WebSocket fails
if err := sys.ensureSSHTransport(); err != nil {
return err
}
err := sys.sshTransport.RequestWithRetry(ctx, action, req, dest, 1)
// Keep legacy SSH client/version fields in sync for other code paths.
if sys.sshTransport != nil {
sys.client = sys.sshTransport.GetClient()
sys.agentVersion = sys.sshTransport.GetAgentVersion()
}
return err
}
func shouldFallbackToSSH(err error) bool {
if err == nil {
return false
}
if errors.Is(err, context.DeadlineExceeded) || errors.Is(err, context.Canceled) {
return true
}
if errors.Is(err, gws.ErrConnClosed) {
return true
}
return errors.Is(err, transport.ErrWebSocketNotConnected)
}
func shouldCloseWebSocket(err error) bool {
if err == nil {
return false
}
return errors.Is(err, gws.ErrConnClosed) || errors.Is(err, transport.ErrWebSocketNotConnected)
}
// ensureSSHTransport ensures the SSH transport is initialized and connected.
func (sys *System) ensureSSHTransport() error {
if sys.sshTransport == nil {
if sys.manager.sshConfig == nil {
if err := sys.manager.createSSHClientConfig(); err != nil {
return err
}
}
sys.sshTransport = transport.NewSSHTransport(transport.SSHTransportConfig{
Host: sys.Host,
Port: sys.Port,
Config: sys.manager.sshConfig,
Timeout: 4 * time.Second,
})
}
// Sync client state with transport
if sys.client != nil {
sys.sshTransport.SetClient(sys.client)
sys.sshTransport.SetAgentVersion(sys.agentVersion)
}
return nil
}
// fetchDataFromAgent attempts to fetch data from the agent, prioritizing WebSocket if available.
func (sys *System) fetchDataFromAgent(options common.DataRequestOptions) (*system.CombinedData, error) {
if sys.data == nil {
sys.data = &system.CombinedData{}
}
if sys.WsConn != nil && sys.WsConn.IsConnected() {
wsData, err := sys.fetchDataViaWebSocket(options)
if err == nil {
return wsData, nil
}
// close the WebSocket connection if error and try SSH
sys.closeWebSocketConnection()
}
sshData, err := sys.fetchDataViaSSH(options)
if err != nil {
return nil, err
}
return sshData, nil
}
func (sys *System) fetchDataViaWebSocket(options common.DataRequestOptions) (*system.CombinedData, error) {
if sys.WsConn == nil || !sys.WsConn.IsConnected() {
return nil, errors.New("no websocket connection")
}
wsTransport := transport.NewWebSocketTransport(sys.WsConn)
err := wsTransport.Request(context.Background(), common.GetData, options, sys.data)
if err != nil {
return nil, err
}
return sys.data, nil
}
// FetchContainerInfoFromAgent fetches container info from the agent
func (sys *System) FetchContainerInfoFromAgent(containerID string) (string, error) {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
var result string
err := sys.request(ctx, common.GetContainerInfo, common.ContainerInfoRequest{ContainerID: containerID}, &result)
return result, err
}
// FetchContainerLogsFromAgent fetches container logs from the agent
func (sys *System) FetchContainerLogsFromAgent(containerID string) (string, error) {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
var result string
err := sys.request(ctx, common.GetContainerLogs, common.ContainerLogsRequest{ContainerID: containerID}, &result)
return result, err
}
// FetchSystemdInfoFromAgent fetches detailed systemd service information from the agent
func (sys *System) FetchSystemdInfoFromAgent(serviceName string) (systemd.ServiceDetails, error) {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
var result systemd.ServiceDetails
err := sys.request(ctx, common.GetSystemdInfo, common.SystemdInfoRequest{ServiceName: serviceName}, &result)
return result, err
}
// FetchSmartDataFromAgent fetches SMART data from the agent
func (sys *System) FetchSmartDataFromAgent() (map[string]smart.SmartData, error) {
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
defer cancel()
var result map[string]smart.SmartData
err := sys.request(ctx, common.GetSmartData, nil, &result)
return result, err
}
func makeStableHashId(strings ...string) string {
hash := fnv.New32a()
for _, str := range strings {
hash.Write([]byte(str))
}
return fmt.Sprintf("%x", hash.Sum32())
}
// fetchDataViaSSH handles fetching data using SSH.
// This function encapsulates the original SSH logic.
// It updates sys.data directly upon successful fetch.
func (sys *System) fetchDataViaSSH(options common.DataRequestOptions) (*system.CombinedData, error) {
err := sys.runSSHOperation(4*time.Second, 1, func(session *ssh.Session) (bool, error) {
stdout, err := session.StdoutPipe()
if err != nil {
return false, err
}
stdin, stdinErr := session.StdinPipe()
if err := session.Shell(); err != nil {
return false, err
}
*sys.data = system.CombinedData{}
if sys.agentVersion.GTE(beszel.MinVersionAgentResponse) && stdinErr == nil {
req := common.HubRequest[any]{Action: common.GetData, Data: options}
_ = cbor.NewEncoder(stdin).Encode(req)
_ = stdin.Close()
var resp common.AgentResponse
if decErr := cbor.NewDecoder(stdout).Decode(&resp); decErr == nil && resp.SystemData != nil {
*sys.data = *resp.SystemData
if err := session.Wait(); err != nil {
return false, err
}
return false, nil
}
}
var decodeErr error
if sys.agentVersion.GTE(beszel.MinVersionCbor) {
decodeErr = cbor.NewDecoder(stdout).Decode(sys.data)
} else {
decodeErr = json.NewDecoder(stdout).Decode(sys.data)
}
if decodeErr != nil {
return true, decodeErr
}
if err := session.Wait(); err != nil {
return false, err
}
return false, nil
})
if err != nil {
return nil, err
}
return sys.data, nil
}
// runSSHOperation establishes an SSH session and executes the provided operation.
// The operation can request a retry by returning true as the first return value.
func (sys *System) runSSHOperation(timeout time.Duration, retries int, operation func(*ssh.Session) (bool, error)) error {
for attempt := 0; attempt <= retries; attempt++ {
if sys.client == nil || sys.Status == down {
if err := sys.createSSHClient(); err != nil {
return err
}
}
session, err := sys.createSessionWithTimeout(timeout)
if err != nil {
if attempt >= retries {
return err
}
sys.manager.hub.Logger().Warn("Session closed. Retrying...", "host", sys.Host, "port", sys.Port, "err", err)
sys.closeSSHConnection()
continue
}
retry, opErr := func() (bool, error) {
defer session.Close()
return operation(session)
}()
if opErr == nil {
return nil
}
if retry {
sys.closeSSHConnection()
if attempt < retries {
continue
}
}
return opErr
}
return fmt.Errorf("ssh operation failed")
}
// createSSHClient creates a new SSH client for the system
func (s *System) createSSHClient() error {
if s.manager.sshConfig == nil {
if err := s.manager.createSSHClientConfig(); err != nil {
return err
}
}
network := "tcp"
host := s.Host
if strings.HasPrefix(host, "/") {
network = "unix"
} else {
host = net.JoinHostPort(host, s.Port)
}
var err error
s.client, err = ssh.Dial(network, host, s.manager.sshConfig)
if err != nil {
return err
}
s.agentVersion, _ = extractAgentVersion(string(s.client.Conn.ServerVersion()))
s.manager.resetFailedSmartFetchState(s.Id)
return nil
}
// createSessionWithTimeout creates a new SSH session with a timeout to avoid hanging
// in case of network issues
func (sys *System) createSessionWithTimeout(timeout time.Duration) (*ssh.Session, error) {
if sys.client == nil {
return nil, fmt.Errorf("client not initialized")
}
ctx, cancel := context.WithTimeout(sys.ctx, timeout)
defer cancel()
sessionChan := make(chan *ssh.Session, 1)
errChan := make(chan error, 1)
go func() {
if session, err := sys.client.NewSession(); err != nil {
errChan <- err
} else {
sessionChan <- session
}
}()
select {
case session := <-sessionChan:
return session, nil
case err := <-errChan:
return nil, err
case <-ctx.Done():
return nil, fmt.Errorf("timeout")
}
}
// closeSSHConnection closes the SSH connection but keeps the system in the manager
func (sys *System) closeSSHConnection() {
if sys.sshTransport != nil {
sys.sshTransport.Close()
}
if sys.client != nil {
sys.client.Close()
sys.client = nil
}
}
// closeWebSocketConnection closes the WebSocket connection but keeps the system in the manager
// to allow updating via SSH. It will be removed if the WS connection is re-established.
// The system will be set as down a few seconds later if the connection is not re-established.
func (sys *System) closeWebSocketConnection() {
if sys.WsConn != nil {
sys.WsConn.Close(nil)
}
}
// extractAgentVersion extracts the beszel version from SSH server version string
func extractAgentVersion(versionString string) (semver.Version, error) {
_, after, _ := strings.Cut(versionString, "_")
return semver.Parse(after)
}
// getJitter returns a channel that will be triggered after a random delay
// between 51% and 95% of the interval.
// This is used to stagger the initial WebSocket connections to prevent clustering.
func getJitter() <-chan time.Time {
minPercent := 51
maxPercent := 95
jitterRange := maxPercent - minPercent
msDelay := (interval * minPercent / 100) + rand.Intn(interval*jitterRange/100)
return time.After(time.Duration(msDelay) * time.Millisecond)
}
// migrateDeprecatedFields moves values from deprecated fields to their new locations if the new
// fields are not already populated. Deprecated fields and refs may be removed at least 30 days
// and one minor version release after the release that includes the migration.
//
// This is run when processing incoming system data from agents, which may be on older versions.
func migrateDeprecatedFields(cd *system.CombinedData, createDetails bool) {
// migration added 0.19.0
if cd.Stats.Bandwidth[0] == 0 && cd.Stats.Bandwidth[1] == 0 {
cd.Stats.Bandwidth[0] = uint64(cd.Stats.NetworkSent * 1024 * 1024)
cd.Stats.Bandwidth[1] = uint64(cd.Stats.NetworkRecv * 1024 * 1024)
cd.Stats.NetworkSent, cd.Stats.NetworkRecv = 0, 0
}
// migration added 0.19.0
if cd.Info.BandwidthBytes == 0 {
cd.Info.BandwidthBytes = uint64(cd.Info.Bandwidth * 1024 * 1024)
cd.Info.Bandwidth = 0
}
// migration added 0.19.0
if cd.Stats.DiskIO[0] == 0 && cd.Stats.DiskIO[1] == 0 {
cd.Stats.DiskIO[0] = uint64(cd.Stats.DiskReadPs * 1024 * 1024)
cd.Stats.DiskIO[1] = uint64(cd.Stats.DiskWritePs * 1024 * 1024)
cd.Stats.DiskReadPs, cd.Stats.DiskWritePs = 0, 0
}
// migration added 0.19.0 - Move deprecated Info fields to Details struct
if cd.Details == nil && cd.Info.Hostname != "" {
if createDetails {
cd.Details = &system.Details{
Hostname: cd.Info.Hostname,
Kernel: cd.Info.KernelVersion,
Cores: cd.Info.Cores,
Threads: cd.Info.Threads,
CpuModel: cd.Info.CpuModel,
Podman: cd.Info.Podman,
Os: cd.Info.Os,
MemoryTotal: uint64(cd.Stats.Mem * 1024 * 1024 * 1024),
}
}
// zero the deprecated fields to prevent saving them in systems.info DB json payload
cd.Info.Hostname = ""
cd.Info.KernelVersion = ""
cd.Info.Cores = 0
cd.Info.CpuModel = ""
cd.Info.Podman = false
cd.Info.Os = 0
}
}
+374
View File
@@ -0,0 +1,374 @@
package systems
import (
"errors"
"fmt"
"time"
"github.com/henrygd/beszel/internal/hub/ws"
"github.com/henrygd/beszel/internal/entities/system"
"github.com/henrygd/beszel/internal/hub/expirymap"
"github.com/henrygd/beszel/internal/common"
"github.com/henrygd/beszel"
"github.com/blang/semver"
"github.com/pocketbase/pocketbase/core"
"github.com/pocketbase/pocketbase/tools/store"
"golang.org/x/crypto/ssh"
)
// System status constants
const (
up string = "up" // System is online and responding
down string = "down" // System is offline or not responding
paused string = "paused" // System monitoring is paused
pending string = "pending" // System is waiting on initial connection result
// interval is the default update interval in milliseconds (60 seconds)
interval int = 60_000
// interval int = 10_000 // Debug interval for faster updates
// sessionTimeout is the maximum time to wait for SSH connections
sessionTimeout = 4 * time.Second
)
// errSystemExists is returned when attempting to add a system that already exists
var errSystemExists = errors.New("system exists")
// SystemManager manages a collection of monitored systems and their connections.
// It handles system lifecycle, status updates, and maintains both SSH and WebSocket connections.
type SystemManager struct {
hub hubLike // Hub interface for database and alert operations
systems *store.Store[string, *System] // Thread-safe store of active systems
sshConfig *ssh.ClientConfig // SSH client configuration for system connections
smartFetchMap *expirymap.ExpiryMap[smartFetchState] // Stores last SMART fetch time/result; TTL is only for cleanup
}
// hubLike defines the interface requirements for the hub dependency.
// It extends core.App with system-specific functionality.
type hubLike interface {
core.App
GetSSHKey(dataDir string) (ssh.Signer, error)
HandleSystemAlerts(systemRecord *core.Record, data *system.CombinedData) error
HandleStatusAlerts(status string, systemRecord *core.Record) error
CancelPendingStatusAlerts(systemID string)
}
// NewSystemManager creates a new SystemManager instance with the provided hub.
// The hub must implement the hubLike interface to provide database and alert functionality.
func NewSystemManager(hub hubLike) *SystemManager {
return &SystemManager{
systems: store.New(map[string]*System{}),
hub: hub,
smartFetchMap: expirymap.New[smartFetchState](time.Hour),
}
}
// GetSystem returns a system by ID from the store
func (sm *SystemManager) GetSystem(systemID string) (*System, error) {
sys, ok := sm.systems.GetOk(systemID)
if !ok {
return nil, fmt.Errorf("system not found")
}
return sys, nil
}
// Initialize sets up the system manager by binding event hooks and starting existing systems.
// It configures SSH client settings and begins monitoring all non-paused systems from the database.
// Systems are started with staggered delays to prevent overwhelming the hub during startup.
func (sm *SystemManager) Initialize() error {
sm.bindEventHooks()
// Initialize SSH client configuration
err := sm.createSSHClientConfig()
if err != nil {
return err
}
// Load existing systems from database (excluding paused ones)
var systems []*System
err = sm.hub.DB().NewQuery("SELECT id, host, port, status FROM systems WHERE status != 'paused'").All(&systems)
if err != nil || len(systems) == 0 {
return err
}
// Start systems in background with staggered timing
go func() {
// Calculate staggered delay between system starts (max 2 seconds per system)
delta := interval / max(1, len(systems))
delta = min(delta, 2_000)
sleepTime := time.Duration(delta) * time.Millisecond
for _, system := range systems {
time.Sleep(sleepTime)
_ = sm.AddSystem(system)
}
}()
return nil
}
// bindEventHooks registers event handlers for system and fingerprint record changes.
// These hooks ensure the system manager stays synchronized with database changes.
func (sm *SystemManager) bindEventHooks() {
sm.hub.OnRecordCreate("systems").BindFunc(sm.onRecordCreate)
sm.hub.OnRecordAfterCreateSuccess("systems").BindFunc(sm.onRecordAfterCreateSuccess)
sm.hub.OnRecordUpdate("systems").BindFunc(sm.onRecordUpdate)
sm.hub.OnRecordAfterUpdateSuccess("systems").BindFunc(sm.onRecordAfterUpdateSuccess)
sm.hub.OnRecordAfterDeleteSuccess("systems").BindFunc(sm.onRecordAfterDeleteSuccess)
sm.hub.OnRecordAfterUpdateSuccess("fingerprints").BindFunc(sm.onTokenRotated)
sm.hub.OnRealtimeSubscribeRequest().BindFunc(sm.onRealtimeSubscribeRequest)
sm.hub.OnRealtimeConnectRequest().BindFunc(sm.onRealtimeConnectRequest)
}
// onTokenRotated handles fingerprint token rotation events.
// When a system's authentication token is rotated, any existing WebSocket connection
// must be closed to force re-authentication with the new token.
func (sm *SystemManager) onTokenRotated(e *core.RecordEvent) error {
systemID := e.Record.GetString("system")
system, ok := sm.systems.GetOk(systemID)
if !ok {
return e.Next()
}
// No need to close connection if not connected via websocket
if system.WsConn == nil {
return e.Next()
}
system.setDown(nil)
sm.RemoveSystem(systemID)
return e.Next()
}
// onRecordCreate is called before a new system record is committed to the database.
// It initializes the record with default values: empty info and pending status.
func (sm *SystemManager) onRecordCreate(e *core.RecordEvent) error {
e.Record.Set("info", system.Info{})
e.Record.Set("status", pending)
return e.Next()
}
// onRecordAfterCreateSuccess is called after a new system record is successfully created.
// It adds the new system to the manager to begin monitoring.
func (sm *SystemManager) onRecordAfterCreateSuccess(e *core.RecordEvent) error {
if err := sm.AddRecord(e.Record, nil); err != nil {
e.App.Logger().Error("Error adding record", "err", err)
}
return e.Next()
}
// onRecordUpdate is called before a system record is updated in the database.
// It clears system info when the status is changed to paused.
func (sm *SystemManager) onRecordUpdate(e *core.RecordEvent) error {
if e.Record.GetString("status") == paused {
e.Record.Set("info", system.Info{})
}
return e.Next()
}
// onRecordAfterUpdateSuccess handles system record updates after they're committed to the database.
// It manages system lifecycle based on status changes and triggers appropriate alerts.
// Status transitions are handled as follows:
// - paused: Closes SSH connection and deactivates alerts
// - pending: Starts monitoring (reuses WebSocket if available)
// - up: Triggers system alerts
// - down: Triggers status change alerts
func (sm *SystemManager) onRecordAfterUpdateSuccess(e *core.RecordEvent) error {
newStatus := e.Record.GetString("status")
prevStatus := pending
system, ok := sm.systems.GetOk(e.Record.Id)
if ok {
prevStatus = system.Status
system.Status = newStatus
}
switch newStatus {
case paused:
if ok {
// Pause monitoring but keep system in manager for potential resume
system.closeSSHConnection()
}
_ = deactivateAlerts(e.App, e.Record.Id)
sm.hub.CancelPendingStatusAlerts(e.Record.Id)
return e.Next()
case pending:
// Resume monitoring, preferring existing WebSocket connection
if ok && system.WsConn != nil {
go system.update()
return e.Next()
}
// Start new monitoring session
if err := sm.AddRecord(e.Record, nil); err != nil {
e.App.Logger().Error("Error adding record", "err", err)
}
_ = deactivateAlerts(e.App, e.Record.Id)
return e.Next()
}
// Handle systems not in manager
if !ok {
return sm.AddRecord(e.Record, nil)
}
// Trigger system alerts when system comes online
if newStatus == up {
if err := sm.hub.HandleSystemAlerts(e.Record, system.data); err != nil {
e.App.Logger().Error("Error handling system alerts", "err", err)
}
}
// Trigger status change alerts for up/down transitions
if (newStatus == down && prevStatus == up) || (newStatus == up && prevStatus == down) {
if err := sm.hub.HandleStatusAlerts(newStatus, e.Record); err != nil {
e.App.Logger().Error("Error handling status alerts", "err", err)
}
}
return e.Next()
}
// onRecordAfterDeleteSuccess is called after a system record is successfully deleted.
// It removes the system from the manager and cleans up all associated resources.
func (sm *SystemManager) onRecordAfterDeleteSuccess(e *core.RecordEvent) error {
sm.RemoveSystem(e.Record.Id)
return e.Next()
}
// AddSystem adds a system to the manager and starts monitoring it.
// It validates required fields, initializes the system context, and starts the update goroutine.
// Returns error if a system with the same ID already exists.
func (sm *SystemManager) AddSystem(sys *System) error {
if sm.systems.Has(sys.Id) {
return errSystemExists
}
if sys.Id == "" || sys.Host == "" {
return errors.New("system missing required fields")
}
// Initialize system for monitoring
sys.manager = sm
sys.ctx, sys.cancel = sys.getContext()
sys.data = &system.CombinedData{}
sm.systems.Set(sys.Id, sys)
// Start monitoring in background
go sys.StartUpdater()
return nil
}
// RemoveSystem removes a system from the manager and cleans up all associated resources.
// It cancels the system's context, closes all connections, and removes it from the store.
// Returns an error if the system is not found.
func (sm *SystemManager) RemoveSystem(systemID string) error {
system, ok := sm.systems.GetOk(systemID)
if !ok {
return errors.New("system not found")
}
// Stop the update goroutine
if system.cancel != nil {
system.cancel()
}
// Clean up all connections
system.closeSSHConnection()
system.closeWebSocketConnection()
sm.systems.Remove(systemID)
return nil
}
// AddRecord creates a System instance from a database record and adds it to the manager.
// If a system with the same ID already exists, it's removed first to ensure clean state.
// If no system instance is provided, a new one is created.
// This method is typically called when systems are created or their status changes to pending.
func (sm *SystemManager) AddRecord(record *core.Record, system *System) (err error) {
// Remove existing system to ensure clean state
if sm.systems.Has(record.Id) {
_ = sm.RemoveSystem(record.Id)
}
// Create new system if none provided
if system == nil {
system = sm.NewSystem(record.Id)
}
// Populate system from record
system.Status = record.GetString("status")
system.Host = record.GetString("host")
system.Port = record.GetString("port")
return sm.AddSystem(system)
}
// AddWebSocketSystem creates and adds a system with an established WebSocket connection.
// This method is called when an agent connects via WebSocket with valid authentication.
// The system is immediately added to monitoring with the provided connection and version info.
func (sm *SystemManager) AddWebSocketSystem(systemId string, agentVersion semver.Version, wsConn *ws.WsConn) error {
systemRecord, err := sm.hub.FindRecordById("systems", systemId)
if err != nil {
return err
}
sm.resetFailedSmartFetchState(systemId)
system := sm.NewSystem(systemId)
system.WsConn = wsConn
system.agentVersion = agentVersion
if err := sm.AddRecord(systemRecord, system); err != nil {
return err
}
return nil
}
// resetFailedSmartFetchState clears only failed SMART cooldown entries so a fresh
// agent reconnect retries SMART discovery immediately after configuration changes.
func (sm *SystemManager) resetFailedSmartFetchState(systemID string) {
state, ok := sm.smartFetchMap.GetOk(systemID)
if ok && !state.Successful {
sm.smartFetchMap.Remove(systemID)
}
}
// createSSHClientConfig initializes the SSH client configuration for connecting to an agent's server
func (sm *SystemManager) createSSHClientConfig() error {
privateKey, err := sm.hub.GetSSHKey("")
if err != nil {
return err
}
sm.sshConfig = &ssh.ClientConfig{
User: "u",
Auth: []ssh.AuthMethod{
ssh.PublicKeys(privateKey),
},
Config: ssh.Config{
Ciphers: common.DefaultCiphers,
KeyExchanges: common.DefaultKeyExchanges,
MACs: common.DefaultMACs,
},
HostKeyCallback: ssh.InsecureIgnoreHostKey(),
ClientVersion: fmt.Sprintf("SSH-2.0-%s_%s", beszel.AppName, beszel.Version),
Timeout: sessionTimeout,
}
return nil
}
// deactivateAlerts finds all triggered alerts for a system and sets them to inactive.
// This is called when a system is paused or goes offline to prevent continued alerts.
func deactivateAlerts(app core.App, systemID string) error {
// Note: Direct SQL updates don't trigger SSE, so we use the PocketBase API
// _, err := app.DB().NewQuery(fmt.Sprintf("UPDATE alerts SET triggered = false WHERE system = '%s'", systemID)).Execute()
alerts, err := app.FindRecordsByFilter("alerts", fmt.Sprintf("system = '%s' && triggered = 1", systemID), "", -1, 0)
if err != nil {
return err
}
for _, alert := range alerts {
alert.Set("triggered", false)
if err := app.SaveNoValidate(alert); err != nil {
return err
}
}
return nil
}
+176
View File
@@ -0,0 +1,176 @@
package systems
import (
"encoding/json"
"strings"
"sync"
"time"
"github.com/henrygd/beszel/internal/common"
"github.com/pocketbase/pocketbase/core"
"github.com/pocketbase/pocketbase/tools/subscriptions"
)
type subscriptionInfo struct {
subscription string
connectedClients uint8
}
var (
activeSubscriptions = make(map[string]*subscriptionInfo)
workerRunning bool
tickerStopChan chan struct{}
realtimeMutex sync.Mutex
)
// onRealtimeConnectRequest handles client connection events for realtime subscriptions.
// It cleans up existing subscriptions when a client connects.
func (sm *SystemManager) onRealtimeConnectRequest(e *core.RealtimeConnectRequestEvent) error {
// after e.Next() is the client disconnection
e.Next()
subscriptions := e.Client.Subscriptions()
for k := range subscriptions {
sm.removeRealtimeSubscription(k, subscriptions[k])
}
return nil
}
// onRealtimeSubscribeRequest handles client subscription events for realtime metrics.
// It tracks new subscriptions and unsubscriptions to manage the realtime worker lifecycle.
func (sm *SystemManager) onRealtimeSubscribeRequest(e *core.RealtimeSubscribeRequestEvent) error {
oldSubs := e.Client.Subscriptions()
// after e.Next() is the result of the subscribe request
err := e.Next()
newSubs := e.Client.Subscriptions()
// handle new subscriptions
for k, options := range newSubs {
if _, ok := oldSubs[k]; !ok {
if strings.HasPrefix(k, "rt_metrics") {
systemId := options.Query["system"]
if _, ok := activeSubscriptions[systemId]; !ok {
activeSubscriptions[systemId] = &subscriptionInfo{
subscription: k,
}
}
activeSubscriptions[systemId].connectedClients += 1
sm.onRealtimeSubscriptionAdded()
}
}
}
// handle unsubscriptions
for k := range oldSubs {
if _, ok := newSubs[k]; !ok {
sm.removeRealtimeSubscription(k, oldSubs[k])
}
}
return err
}
// onRealtimeSubscriptionAdded initializes or starts the realtime worker when the first subscription is added.
// It ensures only one worker runs at a time.
func (sm *SystemManager) onRealtimeSubscriptionAdded() {
realtimeMutex.Lock()
defer realtimeMutex.Unlock()
// Start the worker if it's not already running
if !workerRunning {
workerRunning = true
// Create a new stop channel for this worker instance
tickerStopChan = make(chan struct{})
go sm.startRealtimeWorker()
}
}
// checkSubscriptions stops the realtime worker when there are no active subscriptions.
// This prevents unnecessary resource usage when no clients are listening for realtime data.
func (sm *SystemManager) checkSubscriptions() {
if !workerRunning || len(activeSubscriptions) > 0 {
return
}
realtimeMutex.Lock()
defer realtimeMutex.Unlock()
// Signal the worker to stop
if tickerStopChan != nil {
select {
case tickerStopChan <- struct{}{}:
default:
}
}
// Mark worker as stopped (will be reset when next subscription comes in)
workerRunning = false
}
// removeRealtimeSubscription removes a realtime subscription and checks if the worker should be stopped.
// It only processes subscriptions with the "rt_metrics" prefix and triggers cleanup when subscriptions are removed.
func (sm *SystemManager) removeRealtimeSubscription(subscription string, options subscriptions.SubscriptionOptions) {
if strings.HasPrefix(subscription, "rt_metrics") {
systemId := options.Query["system"]
if info, ok := activeSubscriptions[systemId]; ok {
info.connectedClients -= 1
if info.connectedClients <= 0 {
delete(activeSubscriptions, systemId)
}
}
sm.checkSubscriptions()
}
}
// startRealtimeWorker runs the main loop for fetching realtime data from agents.
// It continuously fetches system data and broadcasts it to subscribed clients via WebSocket.
func (sm *SystemManager) startRealtimeWorker() {
sm.fetchRealtimeDataAndNotify()
tick := time.Tick(1 * time.Second)
for {
select {
case <-tickerStopChan:
return
case <-tick:
if len(activeSubscriptions) == 0 {
return
}
sm.fetchRealtimeDataAndNotify()
}
}
}
// fetchRealtimeDataAndNotify fetches realtime data for all active subscriptions and notifies the clients.
func (sm *SystemManager) fetchRealtimeDataAndNotify() {
for systemId, info := range activeSubscriptions {
system, err := sm.GetSystem(systemId)
if err != nil {
continue
}
go func() {
data, err := system.fetchDataFromAgent(common.DataRequestOptions{CacheTimeMs: 1000})
if err != nil {
return
}
bytes, err := json.Marshal(data)
if err == nil {
notify(sm.hub, info.subscription, bytes)
}
}()
}
}
// notify broadcasts realtime data to all clients subscribed to a specific subscription.
// It iterates through all connected clients and sends the data only to those with matching subscriptions.
func notify(app core.App, subscription string, data []byte) error {
message := subscriptions.Message{
Name: subscription,
Data: data,
}
for _, client := range app.SubscriptionsBroker().Clients() {
if !client.HasSubscription(subscription) {
continue
}
client.Send(message)
}
return nil
}
+135
View File
@@ -0,0 +1,135 @@
package systems
import (
"database/sql"
"errors"
"strings"
"time"
"github.com/henrygd/beszel/internal/entities/smart"
"github.com/pocketbase/pocketbase/core"
)
type smartFetchState struct {
LastAttempt int64
Successful bool
}
// FetchAndSaveSmartDevices fetches SMART data from the agent and saves it to the database
func (sys *System) FetchAndSaveSmartDevices() error {
smartData, err := sys.FetchSmartDataFromAgent()
if err != nil {
sys.recordSmartFetchResult(err, 0)
return err
}
err = sys.saveSmartDevices(smartData)
sys.recordSmartFetchResult(err, len(smartData))
return err
}
// recordSmartFetchResult stores a cooldown entry for the SMART interval and marks
// whether the last fetch produced any devices, so failed setup can retry on reconnect.
func (sys *System) recordSmartFetchResult(err error, deviceCount int) {
if sys.manager == nil {
return
}
interval := sys.smartFetchInterval()
success := err == nil && deviceCount > 0
if sys.manager.hub != nil {
sys.manager.hub.Logger().Info("SMART fetch result", "system", sys.Id, "success", success, "devices", deviceCount, "interval", interval.String(), "err", err)
}
sys.manager.smartFetchMap.Set(sys.Id, smartFetchState{LastAttempt: time.Now().UnixMilli(), Successful: success}, interval+time.Minute)
}
// shouldFetchSmart returns true when there is no active SMART cooldown entry for this system.
func (sys *System) shouldFetchSmart() bool {
if sys.manager == nil {
return true
}
state, ok := sys.manager.smartFetchMap.GetOk(sys.Id)
if !ok {
return true
}
return !time.UnixMilli(state.LastAttempt).Add(sys.smartFetchInterval()).After(time.Now())
}
// smartFetchInterval returns the agent-provided SMART interval or the default when unset.
func (sys *System) smartFetchInterval() time.Duration {
if sys.smartInterval > 0 {
return sys.smartInterval
}
return time.Hour
}
// saveSmartDevices saves SMART device data to the smart_devices collection
func (sys *System) saveSmartDevices(smartData map[string]smart.SmartData) error {
if len(smartData) == 0 {
return nil
}
hub := sys.manager.hub
collection, err := hub.FindCachedCollectionByNameOrId("smart_devices")
if err != nil {
return err
}
for deviceKey, device := range smartData {
if err := sys.upsertSmartDeviceRecord(collection, deviceKey, device); err != nil {
return err
}
}
return nil
}
func (sys *System) upsertSmartDeviceRecord(collection *core.Collection, deviceKey string, device smart.SmartData) error {
hub := sys.manager.hub
recordID := makeStableHashId(sys.Id, deviceKey)
record, err := hub.FindRecordById(collection, recordID)
if err != nil {
if !errors.Is(err, sql.ErrNoRows) {
return err
}
record = core.NewRecord(collection)
record.Set("id", recordID)
}
name := device.DiskName
if name == "" {
name = deviceKey
}
powerOnHours, powerCycles := extractPowerMetrics(device.Attributes)
record.Set("system", sys.Id)
record.Set("name", name)
record.Set("model", device.ModelName)
record.Set("state", device.SmartStatus)
record.Set("capacity", device.Capacity)
record.Set("temp", device.Temperature)
record.Set("firmware", device.FirmwareVersion)
record.Set("serial", device.SerialNumber)
record.Set("type", device.DiskType)
record.Set("hours", powerOnHours)
record.Set("cycles", powerCycles)
record.Set("attributes", device.Attributes)
return hub.SaveNoValidate(record)
}
// extractPowerMetrics extracts power on hours and power cycles from SMART attributes
func extractPowerMetrics(attributes []*smart.SmartAttribute) (powerOnHours, powerCycles uint64) {
for _, attr := range attributes {
nameLower := strings.ToLower(attr.Name)
if powerOnHours == 0 && (strings.Contains(nameLower, "poweronhours") || strings.Contains(nameLower, "power_on_hours")) {
powerOnHours = attr.RawValue
}
if powerCycles == 0 && ((strings.Contains(nameLower, "power") && strings.Contains(nameLower, "cycle")) || strings.Contains(nameLower, "startstopcycles")) {
powerCycles = attr.RawValue
}
if powerOnHours > 0 && powerCycles > 0 {
break
}
}
return
}
+94
View File
@@ -0,0 +1,94 @@
//go:build testing
package systems
import (
"errors"
"testing"
"time"
"github.com/henrygd/beszel/internal/hub/expirymap"
"github.com/stretchr/testify/assert"
)
func TestRecordSmartFetchResult(t *testing.T) {
sm := &SystemManager{smartFetchMap: expirymap.New[smartFetchState](time.Hour)}
t.Cleanup(sm.smartFetchMap.StopCleaner)
sys := &System{
Id: "system-1",
manager: sm,
smartInterval: time.Hour,
}
// Successful fetch with devices
sys.recordSmartFetchResult(nil, 5)
state, ok := sm.smartFetchMap.GetOk(sys.Id)
assert.True(t, ok, "expected smart fetch result to be stored")
assert.True(t, state.Successful, "expected successful fetch state to be recorded")
// Failed fetch
sys.recordSmartFetchResult(errors.New("failed"), 0)
state, ok = sm.smartFetchMap.GetOk(sys.Id)
assert.True(t, ok, "expected failed smart fetch state to be stored")
assert.False(t, state.Successful, "expected failed smart fetch state to be marked unsuccessful")
// Successful fetch but no devices
sys.recordSmartFetchResult(nil, 0)
state, ok = sm.smartFetchMap.GetOk(sys.Id)
assert.True(t, ok, "expected fetch with zero devices to be stored")
assert.False(t, state.Successful, "expected fetch with zero devices to be marked unsuccessful")
}
func TestShouldFetchSmart(t *testing.T) {
sm := &SystemManager{smartFetchMap: expirymap.New[smartFetchState](time.Hour)}
t.Cleanup(sm.smartFetchMap.StopCleaner)
sys := &System{
Id: "system-1",
manager: sm,
smartInterval: time.Hour,
}
assert.True(t, sys.shouldFetchSmart(), "expected initial smart fetch to be allowed")
sys.recordSmartFetchResult(errors.New("failed"), 0)
assert.False(t, sys.shouldFetchSmart(), "expected smart fetch to be blocked while interval entry exists")
sm.smartFetchMap.Remove(sys.Id)
assert.True(t, sys.shouldFetchSmart(), "expected smart fetch to be allowed after interval entry is cleared")
}
func TestShouldFetchSmart_IgnoresExtendedTTLWhenFetchIsDue(t *testing.T) {
sm := &SystemManager{smartFetchMap: expirymap.New[smartFetchState](time.Hour)}
t.Cleanup(sm.smartFetchMap.StopCleaner)
sys := &System{
Id: "system-1",
manager: sm,
smartInterval: time.Hour,
}
sm.smartFetchMap.Set(sys.Id, smartFetchState{
LastAttempt: time.Now().Add(-2 * time.Hour).UnixMilli(),
Successful: true,
}, 10*time.Minute)
sm.smartFetchMap.UpdateExpiration(sys.Id, 3*time.Hour)
assert.True(t, sys.shouldFetchSmart(), "expected fetch time to take precedence over updated TTL")
}
func TestResetFailedSmartFetchState(t *testing.T) {
sm := &SystemManager{smartFetchMap: expirymap.New[smartFetchState](time.Hour)}
t.Cleanup(sm.smartFetchMap.StopCleaner)
sm.smartFetchMap.Set("system-1", smartFetchState{LastAttempt: time.Now().UnixMilli(), Successful: false}, time.Hour)
sm.resetFailedSmartFetchState("system-1")
_, ok := sm.smartFetchMap.GetOk("system-1")
assert.False(t, ok, "expected failed smart fetch state to be cleared on reconnect")
sm.smartFetchMap.Set("system-1", smartFetchState{LastAttempt: time.Now().UnixMilli(), Successful: true}, time.Hour)
sm.resetFailedSmartFetchState("system-1")
_, ok = sm.smartFetchMap.GetOk("system-1")
assert.True(t, ok, "expected successful smart fetch state to be preserved")
}
@@ -0,0 +1,75 @@
//go:build testing
package systems
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestGetSystemdServiceId(t *testing.T) {
t.Run("deterministic output", func(t *testing.T) {
systemId := "sys-123"
serviceName := "nginx.service"
// Call multiple times and ensure same result
id1 := makeStableHashId(systemId, serviceName)
id2 := makeStableHashId(systemId, serviceName)
id3 := makeStableHashId(systemId, serviceName)
assert.Equal(t, id1, id2)
assert.Equal(t, id2, id3)
assert.NotEmpty(t, id1)
})
t.Run("different inputs produce different ids", func(t *testing.T) {
systemId1 := "sys-123"
systemId2 := "sys-456"
serviceName1 := "nginx.service"
serviceName2 := "apache.service"
id1 := makeStableHashId(systemId1, serviceName1)
id2 := makeStableHashId(systemId2, serviceName1)
id3 := makeStableHashId(systemId1, serviceName2)
id4 := makeStableHashId(systemId2, serviceName2)
// All IDs should be different
assert.NotEqual(t, id1, id2)
assert.NotEqual(t, id1, id3)
assert.NotEqual(t, id1, id4)
assert.NotEqual(t, id2, id3)
assert.NotEqual(t, id2, id4)
assert.NotEqual(t, id3, id4)
})
t.Run("consistent length", func(t *testing.T) {
testCases := []struct {
systemId string
serviceName string
}{
{"short", "short.service"},
{"very-long-system-id-that-might-be-used-in-practice", "very-long-service-name.service"},
{"", "empty-system.service"},
{"empty-service", ""},
{"", ""},
}
for _, tc := range testCases {
id := makeStableHashId(tc.systemId, tc.serviceName)
// FNV-32 produces 8 hex characters
assert.Len(t, id, 8, "ID should be 8 characters for systemId='%s', serviceName='%s'", tc.systemId, tc.serviceName)
}
})
t.Run("hexadecimal output", func(t *testing.T) {
id := makeStableHashId("test-system", "test-service")
assert.NotEmpty(t, id)
// Should only contain hexadecimal characters
for _, char := range id {
assert.True(t, (char >= '0' && char <= '9') || (char >= 'a' && char <= 'f'),
"ID should only contain hexadecimal characters, got: %s", id)
}
})
}
+159
View File
@@ -0,0 +1,159 @@
//go:build testing
package systems
import (
"testing"
"github.com/henrygd/beszel/internal/entities/system"
)
func TestCombinedData_MigrateDeprecatedFields(t *testing.T) {
t.Run("Migrate NetworkSent and NetworkRecv to Bandwidth", func(t *testing.T) {
cd := &system.CombinedData{
Stats: system.Stats{
NetworkSent: 1.5, // 1.5 MB
NetworkRecv: 2.5, // 2.5 MB
},
}
migrateDeprecatedFields(cd, true)
expectedSent := uint64(1.5 * 1024 * 1024)
expectedRecv := uint64(2.5 * 1024 * 1024)
if cd.Stats.Bandwidth[0] != expectedSent {
t.Errorf("expected Bandwidth[0] %d, got %d", expectedSent, cd.Stats.Bandwidth[0])
}
if cd.Stats.Bandwidth[1] != expectedRecv {
t.Errorf("expected Bandwidth[1] %d, got %d", expectedRecv, cd.Stats.Bandwidth[1])
}
if cd.Stats.NetworkSent != 0 || cd.Stats.NetworkRecv != 0 {
t.Errorf("expected NetworkSent and NetworkRecv to be reset, got %f, %f", cd.Stats.NetworkSent, cd.Stats.NetworkRecv)
}
})
t.Run("Migrate Info.Bandwidth to Info.BandwidthBytes", func(t *testing.T) {
cd := &system.CombinedData{
Info: system.Info{
Bandwidth: 10.0, // 10 MB
},
}
migrateDeprecatedFields(cd, true)
expected := uint64(10 * 1024 * 1024)
if cd.Info.BandwidthBytes != expected {
t.Errorf("expected BandwidthBytes %d, got %d", expected, cd.Info.BandwidthBytes)
}
if cd.Info.Bandwidth != 0 {
t.Errorf("expected Info.Bandwidth to be reset, got %f", cd.Info.Bandwidth)
}
})
t.Run("Migrate DiskReadPs and DiskWritePs to DiskIO", func(t *testing.T) {
cd := &system.CombinedData{
Stats: system.Stats{
DiskReadPs: 3.0, // 3 MB
DiskWritePs: 4.0, // 4 MB
},
}
migrateDeprecatedFields(cd, true)
expectedRead := uint64(3 * 1024 * 1024)
expectedWrite := uint64(4 * 1024 * 1024)
if cd.Stats.DiskIO[0] != expectedRead {
t.Errorf("expected DiskIO[0] %d, got %d", expectedRead, cd.Stats.DiskIO[0])
}
if cd.Stats.DiskIO[1] != expectedWrite {
t.Errorf("expected DiskIO[1] %d, got %d", expectedWrite, cd.Stats.DiskIO[1])
}
if cd.Stats.DiskReadPs != 0 || cd.Stats.DiskWritePs != 0 {
t.Errorf("expected DiskReadPs and DiskWritePs to be reset, got %f, %f", cd.Stats.DiskReadPs, cd.Stats.DiskWritePs)
}
})
t.Run("Migrate Info fields to Details struct", func(t *testing.T) {
cd := &system.CombinedData{
Stats: system.Stats{
Mem: 16.0, // 16 GB
},
Info: system.Info{
Hostname: "test-host",
KernelVersion: "6.8.0",
Cores: 8,
Threads: 16,
CpuModel: "Intel i7",
Podman: true,
Os: system.Linux,
},
}
migrateDeprecatedFields(cd, true)
if cd.Details == nil {
t.Fatal("expected Details struct to be created")
}
if cd.Details.Hostname != "test-host" {
t.Errorf("expected Hostname 'test-host', got '%s'", cd.Details.Hostname)
}
if cd.Details.Kernel != "6.8.0" {
t.Errorf("expected Kernel '6.8.0', got '%s'", cd.Details.Kernel)
}
if cd.Details.Cores != 8 {
t.Errorf("expected Cores 8, got %d", cd.Details.Cores)
}
if cd.Details.Threads != 16 {
t.Errorf("expected Threads 16, got %d", cd.Details.Threads)
}
if cd.Details.CpuModel != "Intel i7" {
t.Errorf("expected CpuModel 'Intel i7', got '%s'", cd.Details.CpuModel)
}
if cd.Details.Podman != true {
t.Errorf("expected Podman true, got %v", cd.Details.Podman)
}
if cd.Details.Os != system.Linux {
t.Errorf("expected Os Linux, got %d", cd.Details.Os)
}
expectedMem := uint64(16 * 1024 * 1024 * 1024)
if cd.Details.MemoryTotal != expectedMem {
t.Errorf("expected MemoryTotal %d, got %d", expectedMem, cd.Details.MemoryTotal)
}
if cd.Info.Hostname != "" || cd.Info.KernelVersion != "" || cd.Info.Cores != 0 || cd.Info.CpuModel != "" || cd.Info.Podman != false || cd.Info.Os != 0 {
t.Errorf("expected Info fields to be reset, got %+v", cd.Info)
}
})
t.Run("Do not migrate if Details already exists", func(t *testing.T) {
cd := &system.CombinedData{
Details: &system.Details{Hostname: "existing-host"},
Info: system.Info{
Hostname: "deprecated-host",
},
}
migrateDeprecatedFields(cd, true)
if cd.Details.Hostname != "existing-host" {
t.Errorf("expected Hostname 'existing-host', got '%s'", cd.Details.Hostname)
}
if cd.Info.Hostname != "deprecated-host" {
t.Errorf("expected Info.Hostname to remain 'deprecated-host', got '%s'", cd.Info.Hostname)
}
})
t.Run("Do not create details if migrateDetails is false", func(t *testing.T) {
cd := &system.CombinedData{
Info: system.Info{
Hostname: "deprecated-host",
},
}
migrateDeprecatedFields(cd, false)
if cd.Details != nil {
t.Fatal("expected Details struct to not be created")
}
if cd.Info.Hostname != "" {
t.Errorf("expected Info.Hostname to be reset, got '%s'", cd.Info.Hostname)
}
})
}
@@ -0,0 +1,9 @@
//go:build !testing
package systems
// Background SMART fetching is enabled in production but disabled for tests (systems_test_helpers.go).
//
// The hub integration tests create/replace systems and clean up the test apps quickly.
// Background SMART fetching can outlive teardown and crash in PocketBase internals (nil DB).
func backgroundSmartFetchEnabled() bool { return true }
+480
View File
@@ -0,0 +1,480 @@
//go:build testing
package systems_test
import (
"fmt"
"sync"
"testing"
"testing/synctest"
"time"
"github.com/henrygd/beszel/internal/entities/container"
"github.com/henrygd/beszel/internal/entities/system"
"github.com/henrygd/beszel/internal/hub/systems"
"github.com/henrygd/beszel/internal/tests"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestSystemManagerNew(t *testing.T) {
hub, err := tests.NewTestHub(t.TempDir())
if err != nil {
t.Fatal(err)
}
defer hub.Cleanup()
sm := hub.GetSystemManager()
user, err := tests.CreateUser(hub, "test@test.com", "testtesttest")
require.NoError(t, err)
synctest.Test(t, func(t *testing.T) {
sm.Initialize()
record, err := tests.CreateRecord(hub, "systems", map[string]any{
"name": "it-was-coney-island",
"host": "the-playground-of-the-world",
"port": "33914",
"users": []string{user.Id},
})
require.NoError(t, err)
assert.Equal(t, "pending", record.GetString("status"), "System status should be 'pending'")
assert.Equal(t, "pending", sm.GetSystemStatusFromStore(record.Id), "System status should be 'pending'")
// Verify the system host and port
host, port := sm.GetSystemHostPort(record.Id)
assert.Equal(t, record.GetString("host"), host, "System host should match")
assert.Equal(t, record.GetString("port"), port, "System port should match")
time.Sleep(13 * time.Second)
synctest.Wait()
assert.Equal(t, "pending", record.Fresh().GetString("status"), "System status should be 'pending'")
// Verify the system was added by checking if it exists
assert.True(t, sm.HasSystem(record.Id), "System should exist in the store")
time.Sleep(10 * time.Second)
synctest.Wait()
// system should be set to down after 15 seconds (no websocket connection)
assert.Equal(t, "down", sm.GetSystemStatusFromStore(record.Id), "System status should be 'down'")
// make sure the system is down in the db
record, err = hub.FindRecordById("systems", record.Id)
require.NoError(t, err)
assert.Equal(t, "down", record.GetString("status"), "System status should be 'down'")
assert.Equal(t, 1, sm.GetSystemCount(), "System count should be 1")
err = sm.RemoveSystem(record.Id)
assert.NoError(t, err)
assert.Equal(t, 0, sm.GetSystemCount(), "System count should be 0")
assert.False(t, sm.HasSystem(record.Id), "System should not exist in the store after removal")
// let's also make sure a system is removed from the store when the record is deleted
record, err = tests.CreateRecord(hub, "systems", map[string]any{
"name": "there-was-no-place-like-it",
"host": "in-the-whole-world",
"port": "33914",
"users": []string{user.Id},
})
require.NoError(t, err)
assert.True(t, sm.HasSystem(record.Id), "System should exist in the store after creation")
time.Sleep(8 * time.Second)
synctest.Wait()
assert.Equal(t, "pending", sm.GetSystemStatusFromStore(record.Id), "System status should be 'pending'")
sm.SetSystemStatusInDB(record.Id, "up")
time.Sleep(time.Second)
synctest.Wait()
assert.Equal(t, "up", sm.GetSystemStatusFromStore(record.Id), "System status should be 'up'")
// make sure the system switches to down after 11 seconds
sm.RemoveSystem(record.Id)
sm.AddRecord(record, nil)
assert.Equal(t, "pending", sm.GetSystemStatusFromStore(record.Id), "System status should be 'pending'")
time.Sleep(12 * time.Second)
synctest.Wait()
assert.Equal(t, "down", sm.GetSystemStatusFromStore(record.Id), "System status should be 'down'")
// sm.SetSystemStatusInDB(record.Id, "paused")
// time.Sleep(time.Second)
// synctest.Wait()
// assert.Equal(t, "paused", sm.GetSystemStatusFromStore(record.Id), "System status should be 'paused'")
// delete the record
err = hub.Delete(record)
require.NoError(t, err)
assert.False(t, sm.HasSystem(record.Id), "System should not exist in the store after deletion")
})
testOld(t, hub)
synctest.Test(t, func(t *testing.T) {
time.Sleep(time.Second)
synctest.Wait()
for _, systemId := range sm.GetAllSystemIDs() {
err = sm.RemoveSystem(systemId)
require.NoError(t, err)
assert.False(t, sm.HasSystem(systemId), "System should not exist in the store after deletion")
}
assert.Equal(t, 0, sm.GetSystemCount(), "System count should be 0")
// TODO: test with websocket client
})
}
func testOld(t *testing.T, hub *tests.TestHub) {
user, err := tests.CreateUser(hub, "test@testy.com", "testtesttest")
require.NoError(t, err)
sm := hub.GetSystemManager()
assert.NotNil(t, sm)
// error expected when creating a user with a duplicate email
_, err = tests.CreateUser(hub, "test@test.com", "testtesttest")
require.Error(t, err)
// Test collection existence. todo: move to hub package tests
t.Run("CollectionExistence", func(t *testing.T) {
// Verify that required collections exist
systems, err := hub.FindCachedCollectionByNameOrId("systems")
require.NoError(t, err)
assert.NotNil(t, systems)
systemStats, err := hub.FindCachedCollectionByNameOrId("system_stats")
require.NoError(t, err)
assert.NotNil(t, systemStats)
containerStats, err := hub.FindCachedCollectionByNameOrId("container_stats")
require.NoError(t, err)
assert.NotNil(t, containerStats)
})
t.Run("RemoveSystem", func(t *testing.T) {
// Get the count before adding the system
countBefore := sm.GetSystemCount()
// Create a test system record
record, err := tests.CreateRecord(hub, "systems", map[string]any{
"name": "i-even-got-lost-at-coney-island",
"host": "but-they-found-me",
"port": "33914",
"users": []string{user.Id},
})
require.NoError(t, err)
// Verify the system count increased
countAfterAdd := sm.GetSystemCount()
assert.Equal(t, countBefore+1, countAfterAdd, "System count should increase after adding a system via event hook")
// Verify the system exists
assert.True(t, sm.HasSystem(record.Id), "System should exist in the store")
// Remove the system
err = sm.RemoveSystem(record.Id)
assert.NoError(t, err)
// Check that the system count decreased
countAfterRemove := sm.GetSystemCount()
assert.Equal(t, countAfterAdd-1, countAfterRemove, "System count should decrease after removing a system")
// Verify the system no longer exists
assert.False(t, sm.HasSystem(record.Id), "System should not exist in the store after removal")
// Verify the system is not in the list of all system IDs
ids := sm.GetAllSystemIDs()
assert.NotContains(t, ids, record.Id, "System ID should not be in the list of all system IDs after removal")
// Verify the system status is empty
status := sm.GetSystemStatusFromStore(record.Id)
assert.Equal(t, "", status, "System status should be empty after removal")
// Try to remove it again - should return an error since it's already removed
err = sm.RemoveSystem(record.Id)
assert.Error(t, err)
})
t.Run("NewRecordPending", func(t *testing.T) {
// Create a test system
record, err := tests.CreateRecord(hub, "systems", map[string]any{
"name": "and-you-know",
"host": "i-feel-very-bad",
"port": "33914",
"users": []string{user.Id},
})
require.NoError(t, err)
// Add the record to the system manager
err = sm.AddRecord(record, nil)
require.NoError(t, err)
// Test filtering records by status - should be "pending" now
filter := "status = 'pending'"
pendingSystems, err := hub.FindRecordsByFilter("systems", filter, "-created", 0, 0, nil)
require.NoError(t, err)
assert.GreaterOrEqual(t, len(pendingSystems), 1)
})
t.Run("SystemStatusUpdate", func(t *testing.T) {
// Create a test system record
record, err := tests.CreateRecord(hub, "systems", map[string]any{
"name": "we-used-to-sleep-on-the-beach",
"host": "sleep-overnight-here",
"port": "33914",
"users": []string{user.Id},
})
require.NoError(t, err)
// Add the record to the system manager
err = sm.AddRecord(record, nil)
require.NoError(t, err)
// Test status changes
initialStatus := sm.GetSystemStatusFromStore(record.Id)
// Set a new status
sm.SetSystemStatusInDB(record.Id, "up")
// Verify status was updated
newStatus := sm.GetSystemStatusFromStore(record.Id)
assert.Equal(t, "up", newStatus, "System status should be updated to 'up'")
assert.NotEqual(t, initialStatus, newStatus, "Status should have changed")
// Verify the database was updated
updatedRecord, err := hub.FindRecordById("systems", record.Id)
require.NoError(t, err)
assert.Equal(t, "up", updatedRecord.Get("status"), "Database status should match")
})
t.Run("HandleSystemData", func(t *testing.T) {
// Create a test system record
record, err := tests.CreateRecord(hub, "systems", map[string]any{
"name": "things-changed-you-know",
"host": "they-dont-sleep-anymore-on-the-beach",
"port": "33914",
"users": []string{user.Id},
})
require.NoError(t, err)
// Create test system data
testData := &system.CombinedData{
Details: &system.Details{
Hostname: "data-test.example.com",
Kernel: "5.15.0-generic",
Cores: 4,
Threads: 8,
CpuModel: "Test CPU",
},
Info: system.Info{
Uptime: 3600,
Cpu: 25.5,
MemPct: 40.2,
DiskPct: 60.0,
Bandwidth: 100.0,
AgentVersion: "1.0.0",
},
Stats: system.Stats{
Cpu: 25.5,
Mem: 16384.0,
MemUsed: 6553.6,
MemPct: 40.0,
DiskTotal: 1024000.0,
DiskUsed: 614400.0,
DiskPct: 60.0,
NetworkSent: 1024.0,
NetworkRecv: 2048.0,
},
Containers: []*container.Stats{},
}
// Test handling system data. todo: move to hub/alerts package tests
err = hub.HandleSystemAlerts(record, testData)
assert.NoError(t, err)
})
t.Run("ErrorHandling", func(t *testing.T) {
// Try to add a non-existent record
nonExistentId := "non_existent_id"
err := sm.RemoveSystem(nonExistentId)
assert.Error(t, err)
// Try to add a system with invalid host
system := &systems.System{
Host: "",
}
err = sm.AddSystem(system)
assert.Error(t, err)
})
t.Run("ConcurrentOperations", func(t *testing.T) {
// Create a test system
record, err := tests.CreateRecord(hub, "systems", map[string]any{
"name": "jfkjahkfajs",
"host": "localhost",
"port": "33914",
"users": []string{user.Id},
})
require.NoError(t, err)
// Run concurrent operations
const goroutines = 5
var wg sync.WaitGroup
wg.Add(goroutines)
for i := range goroutines {
go func(i int) {
defer wg.Done()
// Alternate between different operations
switch i % 3 {
case 0:
status := fmt.Sprintf("status-%d", i)
sm.SetSystemStatusInDB(record.Id, status)
case 1:
_ = sm.GetSystemStatusFromStore(record.Id)
case 2:
_, _ = sm.GetSystemHostPort(record.Id)
}
}(i)
}
wg.Wait()
// Verify system still exists and is in a valid state
assert.True(t, sm.HasSystem(record.Id), "System should still exist after concurrent operations")
status := sm.GetSystemStatusFromStore(record.Id)
assert.NotEmpty(t, status, "System should have a status after concurrent operations")
})
t.Run("ContextCancellation", func(t *testing.T) {
// Create a test system record
record, err := tests.CreateRecord(hub, "systems", map[string]any{
"name": "lkhsdfsjf",
"host": "localhost",
"port": "33914",
"users": []string{user.Id},
})
require.NoError(t, err)
// Verify the system exists in the store
assert.True(t, sm.HasSystem(record.Id), "System should exist in the store")
// Store the original context and cancel function
originalCtx, originalCancel, err := sm.GetSystemContextFromStore(record.Id)
assert.NoError(t, err)
// Ensure the context is not nil
assert.NotNil(t, originalCtx, "System context should not be nil")
assert.NotNil(t, originalCancel, "System cancel function should not be nil")
// Cancel the context
originalCancel()
// Wait a short time for cancellation to propagate
time.Sleep(10 * time.Millisecond)
// Verify the context is done
select {
case <-originalCtx.Done():
// Context was properly cancelled
default:
t.Fatal("Context was not cancelled")
}
// Verify the system is still in the store (cancellation shouldn't remove it)
assert.True(t, sm.HasSystem(record.Id), "System should still exist after context cancellation")
// Explicitly remove the system
err = sm.RemoveSystem(record.Id)
assert.NoError(t, err, "RemoveSystem should succeed")
// Verify the system is removed
assert.False(t, sm.HasSystem(record.Id), "System should be removed after RemoveSystem")
// Try to remove it again - should return an error
err = sm.RemoveSystem(record.Id)
assert.Error(t, err, "RemoveSystem should fail for non-existent system")
// Add the system back
err = sm.AddRecord(record, nil)
require.NoError(t, err, "AddRecord should succeed")
// Verify the system is back in the store
assert.True(t, sm.HasSystem(record.Id), "System should exist after re-adding")
// Verify a new context was created
newCtx, newCancel, err := sm.GetSystemContextFromStore(record.Id)
assert.NoError(t, err)
assert.NotNil(t, newCtx, "New system context should not be nil")
assert.NotNil(t, newCancel, "New system cancel function should not be nil")
assert.NotEqual(t, originalCtx, newCtx, "New context should be different from original")
// Clean up
err = sm.RemoveSystem(record.Id)
assert.NoError(t, err)
})
}
func TestHasUser(t *testing.T) {
hub, err := tests.NewTestHub(t.TempDir())
require.NoError(t, err)
defer hub.Cleanup()
sm := hub.GetSystemManager()
err = sm.Initialize()
require.NoError(t, err)
user1, err := tests.CreateUser(hub, "user1@test.com", "password123")
require.NoError(t, err)
user2, err := tests.CreateUser(hub, "user2@test.com", "password123")
require.NoError(t, err)
systemRecord, err := tests.CreateRecord(hub, "systems", map[string]any{
"name": "has-user-test",
"host": "127.0.0.1",
"port": "33914",
"users": []string{user1.Id},
})
require.NoError(t, err)
sys, err := sm.GetSystemFromStore(systemRecord.Id)
require.NoError(t, err)
t.Run("user in list returns true", func(t *testing.T) {
assert.True(t, sys.HasUser(hub, user1))
})
t.Run("user not in list returns false", func(t *testing.T) {
assert.False(t, sys.HasUser(hub, user2))
})
t.Run("unknown user ID returns false", func(t *testing.T) {
assert.False(t, sys.HasUser(hub, nil))
})
t.Run("SHARE_ALL_SYSTEMS=true grants access to non-member", func(t *testing.T) {
t.Setenv("SHARE_ALL_SYSTEMS", "true")
assert.True(t, sys.HasUser(hub, user2))
})
t.Run("BESZEL_HUB_SHARE_ALL_SYSTEMS=true grants access to non-member", func(t *testing.T) {
t.Setenv("BESZEL_HUB_SHARE_ALL_SYSTEMS", "true")
assert.True(t, sys.HasUser(hub, user2))
})
t.Run("additional user works", func(t *testing.T) {
assert.False(t, sys.HasUser(hub, user2))
systemRecord.Set("users", []string{user1.Id, user2.Id})
err = hub.Save(systemRecord)
require.NoError(t, err)
assert.True(t, sys.HasUser(hub, user1))
assert.True(t, sys.HasUser(hub, user2))
})
}
@@ -0,0 +1,127 @@
//go:build testing
package systems
import (
"context"
"fmt"
entities "github.com/henrygd/beszel/internal/entities/system"
"github.com/pocketbase/pocketbase/core"
)
// The hub integration tests create/replace systems and cleanup the test apps quickly.
// Background SMART fetching can outlive teardown and crash in PocketBase internals (nil DB).
//
// We keep the explicit SMART refresh endpoint / method available, but disable
// the automatic background fetch during tests.
func backgroundSmartFetchEnabled() bool { return false }
// TESTING ONLY: GetSystemCount returns the number of systems in the store
func (sm *SystemManager) GetSystemCount() int {
return sm.systems.Length()
}
// TESTING ONLY: HasSystem checks if a system with the given ID exists in the store
func (sm *SystemManager) HasSystem(systemID string) bool {
return sm.systems.Has(systemID)
}
// TESTING ONLY: GetSystemStatusFromStore returns the status of a system with the given ID
// Returns an empty string if the system doesn't exist
func (sm *SystemManager) GetSystemStatusFromStore(systemID string) string {
sys, ok := sm.systems.GetOk(systemID)
if !ok {
return ""
}
return sys.Status
}
// TESTING ONLY: GetSystemContextFromStore returns the context and cancel function for a system
func (sm *SystemManager) GetSystemContextFromStore(systemID string) (context.Context, context.CancelFunc, error) {
sys, ok := sm.systems.GetOk(systemID)
if !ok {
return nil, nil, fmt.Errorf("no system")
}
return sys.ctx, sys.cancel, nil
}
// TESTING ONLY: GetSystemFromStore returns a store from the system
func (sm *SystemManager) GetSystemFromStore(systemID string) (*System, error) {
sys, ok := sm.systems.GetOk(systemID)
if !ok {
return nil, fmt.Errorf("no system")
}
return sys, nil
}
// TESTING ONLY: GetAllSystemIDs returns a slice of all system IDs in the store
func (sm *SystemManager) GetAllSystemIDs() []string {
data := sm.systems.GetAll()
ids := make([]string, 0, len(data))
for id := range data {
ids = append(ids, id)
}
return ids
}
// TESTING ONLY: GetSystemData returns the combined data for a system with the given ID
// Returns nil if the system doesn't exist
// This method is intended for testing
func (sm *SystemManager) GetSystemData(systemID string) *entities.CombinedData {
sys, ok := sm.systems.GetOk(systemID)
if !ok {
return nil
}
return sys.data
}
// TESTING ONLY: GetSystemHostPort returns the host and port for a system with the given ID
// Returns empty strings if the system doesn't exist
func (sm *SystemManager) GetSystemHostPort(systemID string) (string, string) {
sys, ok := sm.systems.GetOk(systemID)
if !ok {
return "", ""
}
return sys.Host, sys.Port
}
// TESTING ONLY: SetSystemStatusInDB sets the status of a system directly and updates the database record
// This is intended for testing
// Returns false if the system doesn't exist
func (sm *SystemManager) SetSystemStatusInDB(systemID string, status string) bool {
if !sm.HasSystem(systemID) {
return false
}
// Update the database record
record, err := sm.hub.FindRecordById("systems", systemID)
if err != nil {
return false
}
record.Set("status", status)
err = sm.hub.Save(record)
if err != nil {
return false
}
return true
}
// TESTING ONLY: RemoveAllSystems removes all systems from the store
func (sm *SystemManager) RemoveAllSystems() {
for _, system := range sm.systems.GetAll() {
sm.RemoveSystem(system.Id)
}
sm.smartFetchMap.StopCleaner()
}
func (s *System) StopUpdater() {
s.cancel()
}
func (s *System) CreateRecords(data *entities.CombinedData) (*core.Record, error) {
s.data = data
return s.createRecords(data)
}