mirror of
https://github.com/Dvorinka/beszel.git
synced 2026-06-03 21:02:56 +00:00
Initial commit: Beszel fork with Domain Locker integration
This commit is contained in:
@@ -0,0 +1,313 @@
|
||||
// Package alerts handles alert management and delivery.
|
||||
package alerts
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/mail"
|
||||
"net/url"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/nicholas-fedor/shoutrrr"
|
||||
"github.com/pocketbase/dbx"
|
||||
"github.com/pocketbase/pocketbase/core"
|
||||
"github.com/pocketbase/pocketbase/tools/mailer"
|
||||
)
|
||||
|
||||
type hubLike interface {
|
||||
core.App
|
||||
MakeLink(parts ...string) string
|
||||
}
|
||||
|
||||
type AlertManager struct {
|
||||
hub hubLike
|
||||
stopOnce sync.Once
|
||||
pendingAlerts sync.Map
|
||||
alertsCache *AlertsCache
|
||||
}
|
||||
|
||||
type AlertMessageData struct {
|
||||
UserID string
|
||||
SystemID string
|
||||
Title string
|
||||
Message string
|
||||
Link string
|
||||
LinkText string
|
||||
}
|
||||
|
||||
type UserNotificationSettings struct {
|
||||
Emails []string `json:"emails"`
|
||||
Webhooks []string `json:"webhooks"`
|
||||
}
|
||||
|
||||
type SystemAlertFsStats struct {
|
||||
DiskTotal float64 `json:"d"`
|
||||
DiskUsed float64 `json:"du"`
|
||||
}
|
||||
|
||||
// Values pulled from system_stats.stats that are relevant to alerts.
|
||||
type SystemAlertStats struct {
|
||||
Cpu float64 `json:"cpu"`
|
||||
Mem float64 `json:"mp"`
|
||||
Disk float64 `json:"dp"`
|
||||
Bandwidth [2]uint64 `json:"b"`
|
||||
GPU map[string]SystemAlertGPUData `json:"g"`
|
||||
Temperatures map[string]float32 `json:"t"`
|
||||
LoadAvg [3]float64 `json:"la"`
|
||||
Battery [2]uint8 `json:"bat"`
|
||||
ExtraFs map[string]SystemAlertFsStats `json:"efs"`
|
||||
}
|
||||
|
||||
type SystemAlertGPUData struct {
|
||||
Usage float64 `json:"u"`
|
||||
}
|
||||
|
||||
type SystemAlertData struct {
|
||||
systemRecord *core.Record
|
||||
alertData CachedAlertData
|
||||
name string
|
||||
unit string
|
||||
val float64
|
||||
threshold float64
|
||||
triggered bool
|
||||
time time.Time
|
||||
count uint8
|
||||
min uint8
|
||||
mapSums map[string]float32
|
||||
descriptor string // override descriptor in notification body (for temp sensor, disk partition, etc)
|
||||
}
|
||||
|
||||
// notification services that support title param
|
||||
var supportsTitle = map[string]struct{}{
|
||||
"bark": {},
|
||||
"discord": {},
|
||||
"gotify": {},
|
||||
"ifttt": {},
|
||||
"join": {},
|
||||
"lark": {},
|
||||
"ntfy": {},
|
||||
"opsgenie": {},
|
||||
"pushbullet": {},
|
||||
"pushover": {},
|
||||
"slack": {},
|
||||
"teams": {},
|
||||
"telegram": {},
|
||||
"zulip": {},
|
||||
}
|
||||
|
||||
// NewAlertManager creates a new AlertManager instance.
|
||||
func NewAlertManager(app hubLike) *AlertManager {
|
||||
am := &AlertManager{
|
||||
hub: app,
|
||||
alertsCache: NewAlertsCache(app),
|
||||
}
|
||||
am.bindEvents()
|
||||
return am
|
||||
}
|
||||
|
||||
// Bind events to the alerts collection lifecycle
|
||||
func (am *AlertManager) bindEvents() {
|
||||
am.hub.OnRecordAfterUpdateSuccess("alerts").BindFunc(updateHistoryOnAlertUpdate)
|
||||
am.hub.OnRecordAfterDeleteSuccess("alerts").BindFunc(resolveHistoryOnAlertDelete)
|
||||
am.hub.OnRecordAfterUpdateSuccess("smart_devices").BindFunc(am.handleSmartDeviceAlert)
|
||||
|
||||
am.hub.OnServe().BindFunc(func(e *core.ServeEvent) error {
|
||||
// Populate all alerts into cache on startup
|
||||
_ = am.alertsCache.PopulateFromDB(true)
|
||||
|
||||
if err := resolveStatusAlerts(e.App); err != nil {
|
||||
e.App.Logger().Error("Failed to resolve stale status alerts", "err", err)
|
||||
}
|
||||
if err := am.restorePendingStatusAlerts(); err != nil {
|
||||
e.App.Logger().Error("Failed to restore pending status alerts", "err", err)
|
||||
}
|
||||
return e.Next()
|
||||
})
|
||||
}
|
||||
|
||||
// IsNotificationSilenced checks if a notification should be silenced based on configured quiet hours
|
||||
func (am *AlertManager) IsNotificationSilenced(userID, systemID string) bool {
|
||||
// Query for quiet hours windows that match this user and system
|
||||
// Include both global windows (system is null/empty) and system-specific windows
|
||||
var filter string
|
||||
var params dbx.Params
|
||||
|
||||
if systemID == "" {
|
||||
// If no systemID provided, only check global windows
|
||||
filter = "user={:user} AND system=''"
|
||||
params = dbx.Params{"user": userID}
|
||||
} else {
|
||||
// Check both global and system-specific windows
|
||||
filter = "user={:user} AND (system='' OR system={:system})"
|
||||
params = dbx.Params{
|
||||
"user": userID,
|
||||
"system": systemID,
|
||||
}
|
||||
}
|
||||
|
||||
quietHourWindows, err := am.hub.FindAllRecords("quiet_hours", dbx.NewExp(filter, params))
|
||||
if err != nil || len(quietHourWindows) == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
now := time.Now().UTC()
|
||||
|
||||
for _, window := range quietHourWindows {
|
||||
windowType := window.GetString("type")
|
||||
start := window.GetDateTime("start").Time()
|
||||
end := window.GetDateTime("end").Time()
|
||||
|
||||
if windowType == "daily" {
|
||||
// For daily recurring windows, extract just the time portion and compare
|
||||
// The start/end are stored as full datetime but we only care about HH:MM
|
||||
startHour, startMin, _ := start.Clock()
|
||||
endHour, endMin, _ := end.Clock()
|
||||
nowHour, nowMin, _ := now.Clock()
|
||||
|
||||
// Convert to minutes since midnight for easier comparison
|
||||
startMinutes := startHour*60 + startMin
|
||||
endMinutes := endHour*60 + endMin
|
||||
nowMinutes := nowHour*60 + nowMin
|
||||
|
||||
// Handle case where window crosses midnight
|
||||
if endMinutes < startMinutes {
|
||||
// Window crosses midnight (e.g., 23:00 - 01:00)
|
||||
if nowMinutes >= startMinutes || nowMinutes < endMinutes {
|
||||
return true
|
||||
}
|
||||
} else {
|
||||
// Normal case (e.g., 09:00 - 17:00)
|
||||
if nowMinutes >= startMinutes && nowMinutes < endMinutes {
|
||||
return true
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// One-time window: check if current time is within the date range
|
||||
if (now.After(start) || now.Equal(start)) && now.Before(end) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// SendAlert sends an alert to the user
|
||||
func (am *AlertManager) SendAlert(data AlertMessageData) error {
|
||||
// Check if alert is silenced
|
||||
if am.IsNotificationSilenced(data.UserID, data.SystemID) {
|
||||
am.hub.Logger().Info("Notification silenced", "user", data.UserID, "system", data.SystemID, "title", data.Title)
|
||||
return nil
|
||||
}
|
||||
|
||||
// get user settings
|
||||
record, err := am.hub.FindFirstRecordByFilter(
|
||||
"user_settings", "user={:user}",
|
||||
dbx.Params{"user": data.UserID},
|
||||
)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// unmarshal user settings
|
||||
userAlertSettings := UserNotificationSettings{
|
||||
Emails: []string{},
|
||||
Webhooks: []string{},
|
||||
}
|
||||
if err := record.UnmarshalJSONField("settings", &userAlertSettings); err != nil {
|
||||
am.hub.Logger().Error("Failed to unmarshal user settings", "err", err)
|
||||
}
|
||||
// send alerts via webhooks
|
||||
for _, webhook := range userAlertSettings.Webhooks {
|
||||
if err := am.SendShoutrrrAlert(webhook, data.Title, data.Message, data.Link, data.LinkText); err != nil {
|
||||
am.hub.Logger().Error("Failed to send shoutrrr alert", "err", err)
|
||||
}
|
||||
}
|
||||
// send alerts via email
|
||||
if len(userAlertSettings.Emails) == 0 {
|
||||
return nil
|
||||
}
|
||||
addresses := []mail.Address{}
|
||||
for _, email := range userAlertSettings.Emails {
|
||||
addresses = append(addresses, mail.Address{Address: email})
|
||||
}
|
||||
message := mailer.Message{
|
||||
To: addresses,
|
||||
Subject: data.Title,
|
||||
Text: data.Message + fmt.Sprintf("\n\n%s", data.Link),
|
||||
From: mail.Address{
|
||||
Address: am.hub.Settings().Meta.SenderAddress,
|
||||
Name: am.hub.Settings().Meta.SenderName,
|
||||
},
|
||||
}
|
||||
err = am.hub.NewMailClient().Send(&message)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
am.hub.Logger().Info("Sent email alert", "to", message.To, "subj", message.Subject)
|
||||
return nil
|
||||
}
|
||||
|
||||
// SendShoutrrrAlert sends an alert via a Shoutrrr URL
|
||||
func (am *AlertManager) SendShoutrrrAlert(notificationUrl, title, message, link, linkText string) error {
|
||||
// Parse the URL
|
||||
parsedURL, err := url.Parse(notificationUrl)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error parsing URL: %v", err)
|
||||
}
|
||||
scheme := parsedURL.Scheme
|
||||
queryParams := parsedURL.Query()
|
||||
|
||||
// Add title
|
||||
if _, ok := supportsTitle[scheme]; ok {
|
||||
queryParams.Add("title", title)
|
||||
} else if scheme == "mattermost" {
|
||||
// use markdown title for mattermost
|
||||
message = "##### " + title + "\n\n" + message
|
||||
} else if scheme == "generic" && queryParams.Has("template") {
|
||||
// add title as property if using generic with template json
|
||||
titleKey := queryParams.Get("titlekey")
|
||||
if titleKey == "" {
|
||||
titleKey = "title"
|
||||
}
|
||||
queryParams.Add("$"+titleKey, title)
|
||||
} else {
|
||||
// otherwise just add title to message
|
||||
message = title + "\n\n" + message
|
||||
}
|
||||
|
||||
// Add link
|
||||
switch scheme {
|
||||
case "ntfy":
|
||||
queryParams.Add("Actions", fmt.Sprintf("view, %s, %s", linkText, link))
|
||||
case "lark":
|
||||
queryParams.Add("link", link)
|
||||
case "bark":
|
||||
queryParams.Add("url", link)
|
||||
default:
|
||||
message += "\n\n" + link
|
||||
}
|
||||
|
||||
// Encode the modified query parameters back into the URL
|
||||
parsedURL.RawQuery = queryParams.Encode()
|
||||
// log.Println("URL after modification:", parsedURL.String())
|
||||
|
||||
err = shoutrrr.Send(parsedURL.String(), message)
|
||||
|
||||
if err == nil {
|
||||
am.hub.Logger().Info("Sent shoutrrr alert", "title", title)
|
||||
} else {
|
||||
am.hub.Logger().Error("Error sending shoutrrr alert", "err", err)
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// setAlertTriggered updates the "triggered" status of an alert record in the database
|
||||
func (am *AlertManager) setAlertTriggered(alert CachedAlertData, triggered bool) error {
|
||||
alertRecord, err := am.hub.FindRecordById("alerts", alert.Id)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
alertRecord.Set("triggered", triggered)
|
||||
return am.hub.Save(alertRecord)
|
||||
}
|
||||
@@ -0,0 +1,192 @@
|
||||
package alerts
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"errors"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"slices"
|
||||
"strings"
|
||||
|
||||
"github.com/pocketbase/dbx"
|
||||
"github.com/pocketbase/pocketbase/core"
|
||||
)
|
||||
|
||||
// UpsertUserAlerts handles API request to create or update alerts for a user
|
||||
// across multiple systems (POST /api/beszel/user-alerts)
|
||||
func UpsertUserAlerts(e *core.RequestEvent) error {
|
||||
userID := e.Auth.Id
|
||||
|
||||
reqData := struct {
|
||||
Min uint8 `json:"min"`
|
||||
Value float64 `json:"value"`
|
||||
Name string `json:"name"`
|
||||
Systems []string `json:"systems"`
|
||||
Overwrite bool `json:"overwrite"`
|
||||
}{}
|
||||
err := e.BindBody(&reqData)
|
||||
if err != nil || userID == "" || reqData.Name == "" || len(reqData.Systems) == 0 {
|
||||
return e.BadRequestError("Bad data", err)
|
||||
}
|
||||
|
||||
alertsCollection, err := e.App.FindCachedCollectionByNameOrId("alerts")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = e.App.RunInTransaction(func(txApp core.App) error {
|
||||
for _, systemId := range reqData.Systems {
|
||||
// find existing matching alert
|
||||
alertRecord, err := txApp.FindFirstRecordByFilter(alertsCollection,
|
||||
"system={:system} && name={:name} && user={:user}",
|
||||
dbx.Params{"system": systemId, "name": reqData.Name, "user": userID})
|
||||
|
||||
if err != nil && !errors.Is(err, sql.ErrNoRows) {
|
||||
return err
|
||||
}
|
||||
|
||||
// skip if alert already exists and overwrite is not set
|
||||
if !reqData.Overwrite && alertRecord != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
// create new alert if it doesn't exist
|
||||
if alertRecord == nil {
|
||||
alertRecord = core.NewRecord(alertsCollection)
|
||||
alertRecord.Set("user", userID)
|
||||
alertRecord.Set("system", systemId)
|
||||
alertRecord.Set("name", reqData.Name)
|
||||
}
|
||||
|
||||
alertRecord.Set("value", reqData.Value)
|
||||
alertRecord.Set("min", reqData.Min)
|
||||
|
||||
if err := txApp.SaveNoValidate(alertRecord); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return e.JSON(http.StatusOK, map[string]any{"success": true})
|
||||
}
|
||||
|
||||
// DeleteUserAlerts handles API request to delete alerts for a user across multiple systems
|
||||
// (DELETE /api/beszel/user-alerts)
|
||||
func DeleteUserAlerts(e *core.RequestEvent) error {
|
||||
userID := e.Auth.Id
|
||||
|
||||
reqData := struct {
|
||||
AlertName string `json:"name"`
|
||||
Systems []string `json:"systems"`
|
||||
}{}
|
||||
err := e.BindBody(&reqData)
|
||||
if err != nil || userID == "" || reqData.AlertName == "" || len(reqData.Systems) == 0 {
|
||||
return e.BadRequestError("Bad data", err)
|
||||
}
|
||||
|
||||
var numDeleted uint16
|
||||
|
||||
err = e.App.RunInTransaction(func(txApp core.App) error {
|
||||
for _, systemId := range reqData.Systems {
|
||||
// Find existing alert to delete
|
||||
alertRecord, err := txApp.FindFirstRecordByFilter("alerts",
|
||||
"system={:system} && name={:name} && user={:user}",
|
||||
dbx.Params{"system": systemId, "name": reqData.AlertName, "user": userID})
|
||||
|
||||
if err != nil {
|
||||
if errors.Is(err, sql.ErrNoRows) {
|
||||
// alert doesn't exist, continue to next system
|
||||
continue
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
if err := txApp.Delete(alertRecord); err != nil {
|
||||
return err
|
||||
}
|
||||
numDeleted++
|
||||
}
|
||||
return nil
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return e.JSON(http.StatusOK, map[string]any{"success": true, "count": numDeleted})
|
||||
}
|
||||
|
||||
// SendTestNotification handles API request to send a test notification to a specified Shoutrrr URL
|
||||
func (am *AlertManager) SendTestNotification(e *core.RequestEvent) error {
|
||||
var data struct {
|
||||
URL string `json:"url"`
|
||||
}
|
||||
err := e.BindBody(&data)
|
||||
if err != nil || data.URL == "" {
|
||||
return e.BadRequestError("URL is required", err)
|
||||
}
|
||||
// Only allow admins to send test notifications to internal URLs
|
||||
if !e.Auth.IsSuperuser() && e.Auth.GetString("role") != "admin" {
|
||||
internalURL, err := isInternalURL(data.URL)
|
||||
if err != nil {
|
||||
return e.BadRequestError(err.Error(), nil)
|
||||
}
|
||||
if internalURL {
|
||||
return e.ForbiddenError("Only admins can send to internal destinations", nil)
|
||||
}
|
||||
}
|
||||
err = am.SendShoutrrrAlert(data.URL, "Test Alert", "This is a notification from Beszel.", am.hub.Settings().Meta.AppURL, "View Beszel")
|
||||
if err != nil {
|
||||
return e.JSON(200, map[string]string{"err": err.Error()})
|
||||
}
|
||||
return e.JSON(200, map[string]bool{"err": false})
|
||||
}
|
||||
|
||||
// isInternalURL checks if the given shoutrrr URL points to an internal destination (localhost or private IP)
|
||||
func isInternalURL(rawURL string) (bool, error) {
|
||||
parsedURL, err := url.Parse(rawURL)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
host := parsedURL.Hostname()
|
||||
if host == "" {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
if strings.EqualFold(host, "localhost") {
|
||||
return true, nil
|
||||
}
|
||||
|
||||
if ip := net.ParseIP(host); ip != nil {
|
||||
return isInternalIP(ip), nil
|
||||
}
|
||||
|
||||
// Some Shoutrrr URLs use the host position for service identifiers rather than a
|
||||
// network hostname (for example, discord://token@webhookid). Restrict DNS lookups
|
||||
// to names that look like actual hostnames so valid service URLs keep working.
|
||||
if !strings.Contains(host, ".") {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
ips, err := net.LookupIP(host)
|
||||
if err != nil {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
if slices.ContainsFunc(ips, isInternalIP) {
|
||||
return true, nil
|
||||
}
|
||||
|
||||
return false, nil
|
||||
}
|
||||
|
||||
func isInternalIP(ip net.IP) bool {
|
||||
return ip.IsPrivate() || ip.IsLoopback() || ip.IsUnspecified()
|
||||
}
|
||||
@@ -0,0 +1,501 @@
|
||||
//go:build testing
|
||||
|
||||
package alerts_test
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/henrygd/beszel/internal/alerts"
|
||||
beszelTests "github.com/henrygd/beszel/internal/tests"
|
||||
pbTests "github.com/pocketbase/pocketbase/tests"
|
||||
|
||||
"github.com/pocketbase/dbx"
|
||||
"github.com/pocketbase/pocketbase/core"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
// marshal to json and return an io.Reader (for use in ApiScenario.Body)
|
||||
func jsonReader(v any) io.Reader {
|
||||
data, err := json.Marshal(v)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return bytes.NewReader(data)
|
||||
}
|
||||
|
||||
func TestIsInternalURL(t *testing.T) {
|
||||
testCases := []struct {
|
||||
name string
|
||||
url string
|
||||
internal bool
|
||||
}{
|
||||
{name: "loopback ipv4", url: "generic://127.0.0.1", internal: true},
|
||||
{name: "localhost hostname", url: "generic://localhost", internal: true},
|
||||
{name: "localhost hostname", url: "generic+http://localhost/api/v1/postStuff", internal: true},
|
||||
{name: "localhost hostname", url: "generic+http://127.0.0.1:8080/api/v1/postStuff", internal: true},
|
||||
{name: "localhost hostname", url: "generic+https://beszel.dev/api/v1/postStuff", internal: false},
|
||||
{name: "public ipv4", url: "generic://8.8.8.8", internal: false},
|
||||
{name: "token style service url", url: "discord://abc123@123456789", internal: false},
|
||||
{name: "single label service url", url: "slack://token@team/channel", internal: false},
|
||||
}
|
||||
|
||||
for _, testCase := range testCases {
|
||||
t.Run(testCase.name, func(t *testing.T) {
|
||||
internal, err := alerts.IsInternalURL(testCase.url)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, testCase.internal, internal)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestUserAlertsApi(t *testing.T) {
|
||||
hub, _ := beszelTests.NewTestHub(t.TempDir())
|
||||
defer hub.Cleanup()
|
||||
|
||||
hub.StartHub()
|
||||
|
||||
user1, _ := beszelTests.CreateUser(hub, "alertstest@example.com", "password")
|
||||
user1Token, _ := user1.NewAuthToken()
|
||||
|
||||
user2, _ := beszelTests.CreateUser(hub, "alertstest2@example.com", "password")
|
||||
user2Token, _ := user2.NewAuthToken()
|
||||
|
||||
system1, _ := beszelTests.CreateRecord(hub, "systems", map[string]any{
|
||||
"name": "system1",
|
||||
"users": []string{user1.Id},
|
||||
"host": "127.0.0.1",
|
||||
})
|
||||
|
||||
system2, _ := beszelTests.CreateRecord(hub, "systems", map[string]any{
|
||||
"name": "system2",
|
||||
"users": []string{user1.Id, user2.Id},
|
||||
"host": "127.0.0.2",
|
||||
})
|
||||
|
||||
userRecords, _ := hub.CountRecords("users")
|
||||
assert.EqualValues(t, 2, userRecords, "all users should be created")
|
||||
|
||||
systemRecords, _ := hub.CountRecords("systems")
|
||||
assert.EqualValues(t, 2, systemRecords, "all systems should be created")
|
||||
|
||||
testAppFactory := func(t testing.TB) *pbTests.TestApp {
|
||||
return hub.TestApp
|
||||
}
|
||||
|
||||
scenarios := []beszelTests.ApiScenario{
|
||||
// {
|
||||
// Name: "GET not implemented - returns index",
|
||||
// Method: http.MethodGet,
|
||||
// URL: "/api/beszel/user-alerts",
|
||||
// ExpectedStatus: 200,
|
||||
// ExpectedContent: []string{"<html ", "globalThis.BESZEL"},
|
||||
// TestAppFactory: testAppFactory,
|
||||
// },
|
||||
{
|
||||
Name: "POST no auth",
|
||||
Method: http.MethodPost,
|
||||
URL: "/api/beszel/user-alerts",
|
||||
ExpectedStatus: 401,
|
||||
ExpectedContent: []string{"requires valid"},
|
||||
TestAppFactory: testAppFactory,
|
||||
},
|
||||
{
|
||||
Name: "POST no body",
|
||||
Method: http.MethodPost,
|
||||
URL: "/api/beszel/user-alerts",
|
||||
Headers: map[string]string{
|
||||
"Authorization": user1Token,
|
||||
},
|
||||
ExpectedStatus: 400,
|
||||
ExpectedContent: []string{"Bad data"},
|
||||
TestAppFactory: testAppFactory,
|
||||
},
|
||||
{
|
||||
Name: "POST bad data",
|
||||
Method: http.MethodPost,
|
||||
URL: "/api/beszel/user-alerts",
|
||||
Headers: map[string]string{
|
||||
"Authorization": user1Token,
|
||||
},
|
||||
ExpectedStatus: 400,
|
||||
ExpectedContent: []string{"Bad data"},
|
||||
TestAppFactory: testAppFactory,
|
||||
Body: jsonReader(map[string]any{
|
||||
"invalidField": "this should cause validation error",
|
||||
"threshold": "not a number",
|
||||
}),
|
||||
},
|
||||
{
|
||||
Name: "POST malformed JSON",
|
||||
Method: http.MethodPost,
|
||||
URL: "/api/beszel/user-alerts",
|
||||
Headers: map[string]string{
|
||||
"Authorization": user1Token,
|
||||
},
|
||||
ExpectedStatus: 400,
|
||||
ExpectedContent: []string{"Bad data"},
|
||||
TestAppFactory: testAppFactory,
|
||||
Body: strings.NewReader(`{"alertType": "cpu", "threshold": 80, "enabled": true,}`),
|
||||
},
|
||||
{
|
||||
Name: "POST valid alert data multiple systems",
|
||||
Method: http.MethodPost,
|
||||
URL: "/api/beszel/user-alerts",
|
||||
Headers: map[string]string{
|
||||
"Authorization": user1Token,
|
||||
},
|
||||
ExpectedStatus: 200,
|
||||
ExpectedContent: []string{"\"success\":true"},
|
||||
TestAppFactory: testAppFactory,
|
||||
Body: jsonReader(map[string]any{
|
||||
"name": "CPU",
|
||||
"value": 69,
|
||||
"min": 9,
|
||||
"systems": []string{system1.Id, system2.Id},
|
||||
"overwrite": false,
|
||||
}),
|
||||
AfterTestFunc: func(t testing.TB, app *pbTests.TestApp, res *http.Response) {
|
||||
// check total alerts
|
||||
alerts, _ := app.CountRecords("alerts")
|
||||
assert.EqualValues(t, 2, alerts, "should have 2 alerts")
|
||||
// check alert has correct values
|
||||
matchingAlerts, _ := app.CountRecords("alerts", dbx.HashExp{"name": "CPU", "user": user1.Id, "system": system1.Id, "value": 69, "min": 9})
|
||||
assert.EqualValues(t, 1, matchingAlerts, "should have 1 alert")
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "POST valid alert data single system",
|
||||
Method: http.MethodPost,
|
||||
URL: "/api/beszel/user-alerts",
|
||||
Headers: map[string]string{
|
||||
"Authorization": user1Token,
|
||||
},
|
||||
ExpectedStatus: 200,
|
||||
ExpectedContent: []string{"\"success\":true"},
|
||||
TestAppFactory: testAppFactory,
|
||||
Body: jsonReader(map[string]any{
|
||||
"name": "Memory",
|
||||
"systems": []string{system1.Id},
|
||||
"value": 90,
|
||||
"min": 10,
|
||||
}),
|
||||
AfterTestFunc: func(t testing.TB, app *pbTests.TestApp, res *http.Response) {
|
||||
user1Alerts, _ := app.CountRecords("alerts", dbx.HashExp{"user": user1.Id})
|
||||
assert.EqualValues(t, 3, user1Alerts, "should have 3 alerts")
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "Overwrite: false, should not overwrite existing alert",
|
||||
Method: http.MethodPost,
|
||||
URL: "/api/beszel/user-alerts",
|
||||
Headers: map[string]string{
|
||||
"Authorization": user1Token,
|
||||
},
|
||||
ExpectedStatus: 200,
|
||||
ExpectedContent: []string{"\"success\":true"},
|
||||
TestAppFactory: testAppFactory,
|
||||
Body: jsonReader(map[string]any{
|
||||
"name": "CPU",
|
||||
"value": 45,
|
||||
"min": 5,
|
||||
"systems": []string{system1.Id},
|
||||
"overwrite": false,
|
||||
}),
|
||||
BeforeTestFunc: func(t testing.TB, app *pbTests.TestApp, e *core.ServeEvent) {
|
||||
beszelTests.ClearCollection(t, app, "alerts")
|
||||
beszelTests.CreateRecord(app, "alerts", map[string]any{
|
||||
"name": "CPU",
|
||||
"system": system1.Id,
|
||||
"user": user1.Id,
|
||||
"value": 80,
|
||||
"min": 10,
|
||||
})
|
||||
},
|
||||
AfterTestFunc: func(t testing.TB, app *pbTests.TestApp, res *http.Response) {
|
||||
alerts, _ := app.CountRecords("alerts")
|
||||
assert.EqualValues(t, 1, alerts, "should have 1 alert")
|
||||
alert, _ := app.FindFirstRecordByFilter("alerts", "name = 'CPU' && user = {:user}", dbx.Params{"user": user1.Id})
|
||||
assert.EqualValues(t, 80, alert.Get("value"), "should have 80 as value")
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "Overwrite: true, should overwrite existing alert",
|
||||
Method: http.MethodPost,
|
||||
URL: "/api/beszel/user-alerts",
|
||||
Headers: map[string]string{
|
||||
"Authorization": user2Token,
|
||||
},
|
||||
ExpectedStatus: 200,
|
||||
ExpectedContent: []string{"\"success\":true"},
|
||||
TestAppFactory: testAppFactory,
|
||||
Body: jsonReader(map[string]any{
|
||||
"name": "CPU",
|
||||
"value": 45,
|
||||
"min": 5,
|
||||
"systems": []string{system2.Id},
|
||||
"overwrite": true,
|
||||
}),
|
||||
BeforeTestFunc: func(t testing.TB, app *pbTests.TestApp, e *core.ServeEvent) {
|
||||
beszelTests.ClearCollection(t, app, "alerts")
|
||||
beszelTests.CreateRecord(app, "alerts", map[string]any{
|
||||
"name": "CPU",
|
||||
"system": system2.Id,
|
||||
"user": user2.Id,
|
||||
"value": 80,
|
||||
"min": 10,
|
||||
})
|
||||
},
|
||||
AfterTestFunc: func(t testing.TB, app *pbTests.TestApp, res *http.Response) {
|
||||
alerts, _ := app.CountRecords("alerts")
|
||||
assert.EqualValues(t, 1, alerts, "should have 1 alert")
|
||||
alert, _ := app.FindFirstRecordByFilter("alerts", "name = 'CPU' && user = {:user}", dbx.Params{"user": user2.Id})
|
||||
assert.EqualValues(t, 45, alert.Get("value"), "should have 45 as value")
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "DELETE no auth",
|
||||
Method: http.MethodDelete,
|
||||
URL: "/api/beszel/user-alerts",
|
||||
ExpectedStatus: 401,
|
||||
ExpectedContent: []string{"requires valid"},
|
||||
TestAppFactory: testAppFactory,
|
||||
Body: jsonReader(map[string]any{
|
||||
"name": "CPU",
|
||||
"systems": []string{system1.Id},
|
||||
}),
|
||||
BeforeTestFunc: func(t testing.TB, app *pbTests.TestApp, e *core.ServeEvent) {
|
||||
beszelTests.ClearCollection(t, app, "alerts")
|
||||
beszelTests.CreateRecord(app, "alerts", map[string]any{
|
||||
"name": "CPU",
|
||||
"system": system1.Id,
|
||||
"user": user1.Id,
|
||||
"value": 80,
|
||||
"min": 10,
|
||||
})
|
||||
},
|
||||
AfterTestFunc: func(t testing.TB, app *pbTests.TestApp, res *http.Response) {
|
||||
alerts, _ := app.CountRecords("alerts")
|
||||
assert.EqualValues(t, 1, alerts, "should have 1 alert")
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "DELETE alert",
|
||||
Method: http.MethodDelete,
|
||||
URL: "/api/beszel/user-alerts",
|
||||
Headers: map[string]string{
|
||||
"Authorization": user1Token,
|
||||
},
|
||||
ExpectedStatus: 200,
|
||||
ExpectedContent: []string{"\"count\":1", "\"success\":true"},
|
||||
TestAppFactory: testAppFactory,
|
||||
Body: jsonReader(map[string]any{
|
||||
"name": "CPU",
|
||||
"systems": []string{system1.Id},
|
||||
}),
|
||||
BeforeTestFunc: func(t testing.TB, app *pbTests.TestApp, e *core.ServeEvent) {
|
||||
beszelTests.ClearCollection(t, app, "alerts")
|
||||
beszelTests.CreateRecord(app, "alerts", map[string]any{
|
||||
"name": "CPU",
|
||||
"system": system1.Id,
|
||||
"user": user1.Id,
|
||||
"value": 80,
|
||||
"min": 10,
|
||||
})
|
||||
},
|
||||
AfterTestFunc: func(t testing.TB, app *pbTests.TestApp, res *http.Response) {
|
||||
alerts, _ := app.CountRecords("alerts")
|
||||
assert.Zero(t, alerts, "should have 0 alerts")
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "DELETE alert multiple systems",
|
||||
Method: http.MethodDelete,
|
||||
URL: "/api/beszel/user-alerts",
|
||||
Headers: map[string]string{
|
||||
"Authorization": user1Token,
|
||||
},
|
||||
ExpectedStatus: 200,
|
||||
ExpectedContent: []string{"\"count\":2", "\"success\":true"},
|
||||
TestAppFactory: testAppFactory,
|
||||
Body: jsonReader(map[string]any{
|
||||
"name": "Memory",
|
||||
"systems": []string{system1.Id, system2.Id},
|
||||
}),
|
||||
BeforeTestFunc: func(t testing.TB, app *pbTests.TestApp, e *core.ServeEvent) {
|
||||
beszelTests.ClearCollection(t, app, "alerts")
|
||||
for _, systemId := range []string{system1.Id, system2.Id} {
|
||||
_, err := beszelTests.CreateRecord(app, "alerts", map[string]any{
|
||||
"name": "Memory",
|
||||
"system": systemId,
|
||||
"user": user1.Id,
|
||||
"value": 90,
|
||||
"min": 10,
|
||||
})
|
||||
assert.NoError(t, err, "should create alert")
|
||||
}
|
||||
alerts, _ := app.CountRecords("alerts")
|
||||
assert.EqualValues(t, 2, alerts, "should have 2 alerts")
|
||||
},
|
||||
AfterTestFunc: func(t testing.TB, app *pbTests.TestApp, res *http.Response) {
|
||||
alerts, _ := app.CountRecords("alerts")
|
||||
assert.Zero(t, alerts, "should have 0 alerts")
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "User 2 should not be able to delete alert of user 1",
|
||||
Method: http.MethodDelete,
|
||||
URL: "/api/beszel/user-alerts",
|
||||
Headers: map[string]string{
|
||||
"Authorization": user2Token,
|
||||
},
|
||||
ExpectedStatus: 200,
|
||||
ExpectedContent: []string{"\"count\":1", "\"success\":true"},
|
||||
TestAppFactory: testAppFactory,
|
||||
Body: jsonReader(map[string]any{
|
||||
"name": "CPU",
|
||||
"systems": []string{system2.Id},
|
||||
}),
|
||||
BeforeTestFunc: func(t testing.TB, app *pbTests.TestApp, e *core.ServeEvent) {
|
||||
beszelTests.ClearCollection(t, app, "alerts")
|
||||
for _, user := range []string{user1.Id, user2.Id} {
|
||||
beszelTests.CreateRecord(app, "alerts", map[string]any{
|
||||
"name": "CPU",
|
||||
"system": system2.Id,
|
||||
"user": user,
|
||||
"value": 80,
|
||||
"min": 10,
|
||||
})
|
||||
}
|
||||
alerts, _ := app.CountRecords("alerts")
|
||||
assert.EqualValues(t, 2, alerts, "should have 2 alerts")
|
||||
user1AlertCount, _ := app.CountRecords("alerts", dbx.HashExp{"user": user1.Id})
|
||||
assert.EqualValues(t, 1, user1AlertCount, "should have 1 alert")
|
||||
user2AlertCount, _ := app.CountRecords("alerts", dbx.HashExp{"user": user2.Id})
|
||||
assert.EqualValues(t, 1, user2AlertCount, "should have 1 alert")
|
||||
},
|
||||
AfterTestFunc: func(t testing.TB, app *pbTests.TestApp, res *http.Response) {
|
||||
user1AlertCount, _ := app.CountRecords("alerts", dbx.HashExp{"user": user1.Id})
|
||||
assert.EqualValues(t, 1, user1AlertCount, "should have 1 alert")
|
||||
user2AlertCount, _ := app.CountRecords("alerts", dbx.HashExp{"user": user2.Id})
|
||||
assert.Zero(t, user2AlertCount, "should have 0 alerts")
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, scenario := range scenarios {
|
||||
scenario.Test(t)
|
||||
}
|
||||
}
|
||||
func TestSendTestNotification(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
userToken, err := user.NewAuthToken()
|
||||
|
||||
adminUser, err := beszelTests.CreateUserWithRole(hub, "admin@example.com", "password123", "admin")
|
||||
assert.NoError(t, err, "Failed to create admin user")
|
||||
adminUserToken, err := adminUser.NewAuthToken()
|
||||
|
||||
superuser, err := beszelTests.CreateSuperuser(hub, "superuser@example.com", "password123")
|
||||
assert.NoError(t, err, "Failed to create superuser")
|
||||
superuserToken, err := superuser.NewAuthToken()
|
||||
assert.NoError(t, err, "Failed to create superuser auth token")
|
||||
|
||||
testAppFactory := func(t testing.TB) *pbTests.TestApp {
|
||||
return hub.TestApp
|
||||
}
|
||||
|
||||
scenarios := []beszelTests.ApiScenario{
|
||||
{
|
||||
Name: "POST /test-notification - no auth should fail",
|
||||
Method: http.MethodPost,
|
||||
URL: "/api/beszel/test-notification",
|
||||
ExpectedStatus: 401,
|
||||
ExpectedContent: []string{"requires valid"},
|
||||
TestAppFactory: testAppFactory,
|
||||
Body: jsonReader(map[string]any{
|
||||
"url": "generic://127.0.0.1",
|
||||
}),
|
||||
},
|
||||
{
|
||||
Name: "POST /test-notification - with external auth should succeed",
|
||||
Method: http.MethodPost,
|
||||
URL: "/api/beszel/test-notification",
|
||||
TestAppFactory: testAppFactory,
|
||||
Headers: map[string]string{
|
||||
"Authorization": userToken,
|
||||
},
|
||||
Body: jsonReader(map[string]any{
|
||||
"url": "generic://8.8.8.8",
|
||||
}),
|
||||
ExpectedStatus: 200,
|
||||
ExpectedContent: []string{"\"err\":"},
|
||||
},
|
||||
{
|
||||
Name: "POST /test-notification - local url with user auth should fail",
|
||||
Method: http.MethodPost,
|
||||
URL: "/api/beszel/test-notification",
|
||||
TestAppFactory: testAppFactory,
|
||||
Headers: map[string]string{
|
||||
"Authorization": userToken,
|
||||
},
|
||||
Body: jsonReader(map[string]any{
|
||||
"url": "generic://localhost:8010",
|
||||
}),
|
||||
ExpectedStatus: 403,
|
||||
ExpectedContent: []string{"Only admins"},
|
||||
},
|
||||
{
|
||||
Name: "POST /test-notification - internal url with user auth should fail",
|
||||
Method: http.MethodPost,
|
||||
URL: "/api/beszel/test-notification",
|
||||
TestAppFactory: testAppFactory,
|
||||
Headers: map[string]string{
|
||||
"Authorization": userToken,
|
||||
},
|
||||
Body: jsonReader(map[string]any{
|
||||
"url": "generic+http://192.168.0.5",
|
||||
}),
|
||||
ExpectedStatus: 403,
|
||||
ExpectedContent: []string{"Only admins"},
|
||||
},
|
||||
{
|
||||
Name: "POST /test-notification - internal url with admin auth should succeed",
|
||||
Method: http.MethodPost,
|
||||
URL: "/api/beszel/test-notification",
|
||||
TestAppFactory: testAppFactory,
|
||||
Headers: map[string]string{
|
||||
"Authorization": adminUserToken,
|
||||
},
|
||||
Body: jsonReader(map[string]any{
|
||||
"url": "generic://127.0.0.1",
|
||||
}),
|
||||
ExpectedStatus: 200,
|
||||
ExpectedContent: []string{"\"err\":"},
|
||||
},
|
||||
{
|
||||
Name: "POST /test-notification - internal url with superuser auth should succeed",
|
||||
Method: http.MethodPost,
|
||||
URL: "/api/beszel/test-notification",
|
||||
TestAppFactory: testAppFactory,
|
||||
Headers: map[string]string{
|
||||
"Authorization": superuserToken,
|
||||
},
|
||||
Body: jsonReader(map[string]any{
|
||||
"url": "generic://127.0.0.1",
|
||||
}),
|
||||
ExpectedStatus: 200,
|
||||
ExpectedContent: []string{"\"err\":"},
|
||||
},
|
||||
}
|
||||
|
||||
for _, scenario := range scenarios {
|
||||
scenario.Test(t)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,386 @@
|
||||
//go:build testing
|
||||
|
||||
package alerts_test
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel/internal/entities/system"
|
||||
beszelTests "github.com/henrygd/beszel/internal/tests"
|
||||
|
||||
"github.com/pocketbase/dbx"
|
||||
"github.com/pocketbase/pocketbase/tools/types"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
// TestBatteryAlertLogic tests that battery alerts trigger when value drops BELOW threshold
|
||||
// (opposite of other alerts like CPU, Memory, etc. which trigger when exceeding threshold)
|
||||
func TestBatteryAlertLogic(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
// Create a system
|
||||
systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
|
||||
require.NoError(t, err)
|
||||
systemRecord := systems[0]
|
||||
|
||||
// Create a battery alert with threshold of 20% and min of 1 minute (immediate trigger)
|
||||
batteryAlert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "Battery",
|
||||
"system": systemRecord.Id,
|
||||
"user": user.Id,
|
||||
"value": 20, // threshold: 20%
|
||||
"min": 1, // 1 minute (immediate trigger for testing)
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
// Verify alert is not triggered initially
|
||||
assert.False(t, batteryAlert.GetBool("triggered"), "Alert should not be triggered initially")
|
||||
|
||||
// Create system stats with battery at 50% (above threshold - should NOT trigger)
|
||||
statsHigh := system.Stats{
|
||||
Cpu: 10,
|
||||
MemPct: 30,
|
||||
DiskPct: 40,
|
||||
Battery: [2]uint8{50, 1}, // 50% battery, discharging
|
||||
}
|
||||
statsHighJSON, _ := json.Marshal(statsHigh)
|
||||
_, err = beszelTests.CreateRecord(hub, "system_stats", map[string]any{
|
||||
"system": systemRecord.Id,
|
||||
"type": "1m",
|
||||
"stats": string(statsHighJSON),
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
// Create CombinedData for the alert handler
|
||||
combinedDataHigh := &system.CombinedData{
|
||||
Stats: statsHigh,
|
||||
Info: system.Info{
|
||||
AgentVersion: "0.12.0",
|
||||
Cpu: 10,
|
||||
MemPct: 30,
|
||||
DiskPct: 40,
|
||||
},
|
||||
}
|
||||
|
||||
// Simulate system update time
|
||||
systemRecord.Set("updated", time.Now().UTC())
|
||||
err = hub.SaveNoValidate(systemRecord)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Handle system alerts with high battery
|
||||
am := hub.GetAlertManager()
|
||||
err = am.HandleSystemAlerts(systemRecord, combinedDataHigh)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Verify alert is still NOT triggered (battery 50% is above threshold 20%)
|
||||
batteryAlert, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": batteryAlert.Id})
|
||||
require.NoError(t, err)
|
||||
assert.False(t, batteryAlert.GetBool("triggered"), "Alert should NOT be triggered when battery (50%%) is above threshold (20%%)")
|
||||
|
||||
// Now create stats with battery at 15% (below threshold - should trigger)
|
||||
statsLow := system.Stats{
|
||||
Cpu: 10,
|
||||
MemPct: 30,
|
||||
DiskPct: 40,
|
||||
Battery: [2]uint8{15, 1}, // 15% battery, discharging
|
||||
}
|
||||
statsLowJSON, _ := json.Marshal(statsLow)
|
||||
_, err = beszelTests.CreateRecord(hub, "system_stats", map[string]any{
|
||||
"system": systemRecord.Id,
|
||||
"type": "1m",
|
||||
"stats": string(statsLowJSON),
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
combinedDataLow := &system.CombinedData{
|
||||
Stats: statsLow,
|
||||
Info: system.Info{
|
||||
AgentVersion: "0.12.0",
|
||||
Cpu: 10,
|
||||
MemPct: 30,
|
||||
DiskPct: 40,
|
||||
},
|
||||
}
|
||||
|
||||
// Update system timestamp
|
||||
systemRecord.Set("updated", time.Now().UTC())
|
||||
err = hub.SaveNoValidate(systemRecord)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Handle system alerts with low battery
|
||||
err = am.HandleSystemAlerts(systemRecord, combinedDataLow)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Wait for the alert to be processed
|
||||
time.Sleep(20 * time.Millisecond)
|
||||
|
||||
// Verify alert IS triggered (battery 15% is below threshold 20%)
|
||||
batteryAlert, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": batteryAlert.Id})
|
||||
require.NoError(t, err)
|
||||
assert.True(t, batteryAlert.GetBool("triggered"), "Alert SHOULD be triggered when battery (15%%) drops below threshold (20%%)")
|
||||
|
||||
// Now test resolution: battery goes back above threshold
|
||||
statsRecovered := system.Stats{
|
||||
Cpu: 10,
|
||||
MemPct: 30,
|
||||
DiskPct: 40,
|
||||
Battery: [2]uint8{25, 1}, // 25% battery, discharging
|
||||
}
|
||||
statsRecoveredJSON, _ := json.Marshal(statsRecovered)
|
||||
_, err = beszelTests.CreateRecord(hub, "system_stats", map[string]any{
|
||||
"system": systemRecord.Id,
|
||||
"type": "1m",
|
||||
"stats": string(statsRecoveredJSON),
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
combinedDataRecovered := &system.CombinedData{
|
||||
Stats: statsRecovered,
|
||||
Info: system.Info{
|
||||
AgentVersion: "0.12.0",
|
||||
Cpu: 10,
|
||||
MemPct: 30,
|
||||
DiskPct: 40,
|
||||
},
|
||||
}
|
||||
|
||||
// Update system timestamp
|
||||
systemRecord.Set("updated", time.Now().UTC())
|
||||
err = hub.SaveNoValidate(systemRecord)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Handle system alerts with recovered battery
|
||||
err = am.HandleSystemAlerts(systemRecord, combinedDataRecovered)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Wait for the alert to be processed
|
||||
time.Sleep(20 * time.Millisecond)
|
||||
|
||||
// Verify alert is now resolved (battery 25% is above threshold 20%)
|
||||
batteryAlert, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": batteryAlert.Id})
|
||||
require.NoError(t, err)
|
||||
assert.False(t, batteryAlert.GetBool("triggered"), "Alert should be resolved when battery (25%%) goes above threshold (20%%)")
|
||||
}
|
||||
|
||||
// TestBatteryAlertNoBattery verifies that systems without battery data don't trigger alerts
|
||||
func TestBatteryAlertNoBattery(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
// Create a system
|
||||
systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
|
||||
require.NoError(t, err)
|
||||
systemRecord := systems[0]
|
||||
|
||||
// Create a battery alert
|
||||
batteryAlert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "Battery",
|
||||
"system": systemRecord.Id,
|
||||
"user": user.Id,
|
||||
"value": 20,
|
||||
"min": 1,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
// Create stats with NO battery data (Battery[0] = 0)
|
||||
statsNoBattery := system.Stats{
|
||||
Cpu: 10,
|
||||
MemPct: 30,
|
||||
DiskPct: 40,
|
||||
Battery: [2]uint8{0, 0}, // No battery
|
||||
}
|
||||
|
||||
combinedData := &system.CombinedData{
|
||||
Stats: statsNoBattery,
|
||||
Info: system.Info{
|
||||
AgentVersion: "0.12.0",
|
||||
Cpu: 10,
|
||||
MemPct: 30,
|
||||
DiskPct: 40,
|
||||
},
|
||||
}
|
||||
|
||||
// Simulate system update time
|
||||
systemRecord.Set("updated", time.Now().UTC())
|
||||
err = hub.SaveNoValidate(systemRecord)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Handle system alerts
|
||||
am := hub.GetAlertManager()
|
||||
err = am.HandleSystemAlerts(systemRecord, combinedData)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Wait a moment for processing
|
||||
time.Sleep(20 * time.Millisecond)
|
||||
|
||||
// Verify alert is NOT triggered (no battery data should skip the alert)
|
||||
batteryAlert, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": batteryAlert.Id})
|
||||
require.NoError(t, err)
|
||||
assert.False(t, batteryAlert.GetBool("triggered"), "Alert should NOT be triggered when system has no battery")
|
||||
}
|
||||
|
||||
// TestBatteryAlertAveragedSamples tests battery alerts with min > 1 (averaging multiple samples)
|
||||
// This ensures the inverted threshold logic works correctly across averaged time windows
|
||||
func TestBatteryAlertAveragedSamples(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
// Create a system
|
||||
systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
|
||||
require.NoError(t, err)
|
||||
systemRecord := systems[0]
|
||||
|
||||
// Create a battery alert with threshold of 25% and min of 2 minutes (requires averaging)
|
||||
batteryAlert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "Battery",
|
||||
"system": systemRecord.Id,
|
||||
"user": user.Id,
|
||||
"value": 25, // threshold: 25%
|
||||
"min": 2, // 2 minutes - requires averaging
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
// Verify alert is not triggered initially
|
||||
assert.False(t, batteryAlert.GetBool("triggered"), "Alert should not be triggered initially")
|
||||
|
||||
am := hub.GetAlertManager()
|
||||
now := time.Now().UTC()
|
||||
|
||||
// Create system_stats records with low battery (below threshold)
|
||||
// The alert has min=2 minutes, so alert.time = now - 2 minutes
|
||||
// For the alert to be valid, alert.time must be AFTER the oldest record's created time
|
||||
// So we need records older than (now - 2 min), plus records within the window
|
||||
// Records at: now-3min (oldest, before window), now-90s, now-60s, now-30s
|
||||
recordTimes := []time.Duration{
|
||||
-180 * time.Second, // 3 min ago - this makes the oldest record before alert.time
|
||||
-90 * time.Second,
|
||||
-60 * time.Second,
|
||||
-30 * time.Second,
|
||||
}
|
||||
|
||||
for _, offset := range recordTimes {
|
||||
statsLow := system.Stats{
|
||||
Cpu: 10,
|
||||
MemPct: 30,
|
||||
DiskPct: 40,
|
||||
Battery: [2]uint8{15, 1}, // 15% battery (below 25% threshold)
|
||||
}
|
||||
statsLowJSON, _ := json.Marshal(statsLow)
|
||||
|
||||
recordTime := now.Add(offset)
|
||||
record, err := beszelTests.CreateRecord(hub, "system_stats", map[string]any{
|
||||
"system": systemRecord.Id,
|
||||
"type": "1m",
|
||||
"stats": string(statsLowJSON),
|
||||
})
|
||||
require.NoError(t, err)
|
||||
// Update created time to simulate historical records - use SetRaw with formatted string
|
||||
record.SetRaw("created", recordTime.Format(types.DefaultDateLayout))
|
||||
err = hub.SaveNoValidate(record)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
// Create combined data with low battery
|
||||
combinedDataLow := &system.CombinedData{
|
||||
Stats: system.Stats{
|
||||
Cpu: 10,
|
||||
MemPct: 30,
|
||||
DiskPct: 40,
|
||||
Battery: [2]uint8{15, 1},
|
||||
},
|
||||
Info: system.Info{
|
||||
AgentVersion: "0.12.0",
|
||||
Cpu: 10,
|
||||
MemPct: 30,
|
||||
DiskPct: 40,
|
||||
},
|
||||
}
|
||||
|
||||
// Update system timestamp
|
||||
systemRecord.Set("updated", now)
|
||||
err = hub.SaveNoValidate(systemRecord)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Handle system alerts - should trigger because average battery is below threshold
|
||||
err = am.HandleSystemAlerts(systemRecord, combinedDataLow)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Wait for alert processing
|
||||
time.Sleep(20 * time.Millisecond)
|
||||
|
||||
// Verify alert IS triggered (average battery 15% is below threshold 25%)
|
||||
batteryAlert, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": batteryAlert.Id})
|
||||
require.NoError(t, err)
|
||||
assert.True(t, batteryAlert.GetBool("triggered"),
|
||||
"Alert SHOULD be triggered when average battery (15%%) is below threshold (25%%) over min period")
|
||||
|
||||
// Now add records with high battery to test resolution
|
||||
// Use a new time window 2 minutes later
|
||||
newNow := now.Add(2 * time.Minute)
|
||||
// Records need to span before the alert time window (newNow - 2 min)
|
||||
recordTimesHigh := []time.Duration{
|
||||
-180 * time.Second, // 3 min before newNow - makes oldest record before alert.time
|
||||
-90 * time.Second,
|
||||
-60 * time.Second,
|
||||
-30 * time.Second,
|
||||
}
|
||||
|
||||
for _, offset := range recordTimesHigh {
|
||||
statsHigh := system.Stats{
|
||||
Cpu: 10,
|
||||
MemPct: 30,
|
||||
DiskPct: 40,
|
||||
Battery: [2]uint8{50, 1}, // 50% battery (above 25% threshold)
|
||||
}
|
||||
statsHighJSON, _ := json.Marshal(statsHigh)
|
||||
|
||||
recordTime := newNow.Add(offset)
|
||||
record, err := beszelTests.CreateRecord(hub, "system_stats", map[string]any{
|
||||
"system": systemRecord.Id,
|
||||
"type": "1m",
|
||||
"stats": string(statsHighJSON),
|
||||
})
|
||||
require.NoError(t, err)
|
||||
record.SetRaw("created", recordTime.Format(types.DefaultDateLayout))
|
||||
err = hub.SaveNoValidate(record)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
// Create combined data with high battery
|
||||
combinedDataHigh := &system.CombinedData{
|
||||
Stats: system.Stats{
|
||||
Cpu: 10,
|
||||
MemPct: 30,
|
||||
DiskPct: 40,
|
||||
Battery: [2]uint8{50, 1},
|
||||
},
|
||||
Info: system.Info{
|
||||
AgentVersion: "0.12.0",
|
||||
Cpu: 10,
|
||||
MemPct: 30,
|
||||
DiskPct: 40,
|
||||
},
|
||||
}
|
||||
|
||||
// Update system timestamp to the new time window
|
||||
systemRecord.Set("updated", newNow)
|
||||
err = hub.SaveNoValidate(systemRecord)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Handle system alerts - should resolve because average battery is now above threshold
|
||||
err = am.HandleSystemAlerts(systemRecord, combinedDataHigh)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Wait for alert processing
|
||||
time.Sleep(20 * time.Millisecond)
|
||||
|
||||
// Verify alert is resolved (average battery 50% is above threshold 25%)
|
||||
batteryAlert, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": batteryAlert.Id})
|
||||
require.NoError(t, err)
|
||||
assert.False(t, batteryAlert.GetBool("triggered"),
|
||||
"Alert should be resolved when average battery (50%%) is above threshold (25%%) over min period")
|
||||
}
|
||||
@@ -0,0 +1,177 @@
|
||||
package alerts
|
||||
|
||||
import (
|
||||
"github.com/pocketbase/dbx"
|
||||
"github.com/pocketbase/pocketbase/core"
|
||||
"github.com/pocketbase/pocketbase/tools/store"
|
||||
)
|
||||
|
||||
// CachedAlertData represents the relevant fields of an alert record for status checking and updates.
|
||||
type CachedAlertData struct {
|
||||
Id string
|
||||
SystemID string
|
||||
UserID string
|
||||
Name string
|
||||
Value float64
|
||||
Triggered bool
|
||||
Min uint8
|
||||
// Created types.DateTime
|
||||
}
|
||||
|
||||
func (a *CachedAlertData) PopulateFromRecord(record *core.Record) {
|
||||
a.Id = record.Id
|
||||
a.SystemID = record.GetString("system")
|
||||
a.UserID = record.GetString("user")
|
||||
a.Name = record.GetString("name")
|
||||
a.Value = record.GetFloat("value")
|
||||
a.Triggered = record.GetBool("triggered")
|
||||
a.Min = uint8(record.GetInt("min"))
|
||||
// a.Created = record.GetDateTime("created")
|
||||
}
|
||||
|
||||
// AlertsCache provides an in-memory cache for system alerts.
|
||||
type AlertsCache struct {
|
||||
app core.App
|
||||
store *store.Store[string, *store.Store[string, CachedAlertData]]
|
||||
populated bool
|
||||
}
|
||||
|
||||
// NewAlertsCache creates a new instance of SystemAlertsCache.
|
||||
func NewAlertsCache(app core.App) *AlertsCache {
|
||||
c := AlertsCache{
|
||||
app: app,
|
||||
store: store.New(map[string]*store.Store[string, CachedAlertData]{}),
|
||||
}
|
||||
return c.bindEvents()
|
||||
}
|
||||
|
||||
// bindEvents sets up event listeners to keep the cache in sync with database changes.
|
||||
func (c *AlertsCache) bindEvents() *AlertsCache {
|
||||
c.app.OnRecordAfterUpdateSuccess("alerts").BindFunc(func(e *core.RecordEvent) error {
|
||||
// c.Delete(e.Record.Original()) // this would be needed if the system field on an existing alert was changed, however we don't currently allow that in the UI so we'll leave it commented out
|
||||
c.Update(e.Record)
|
||||
return e.Next()
|
||||
})
|
||||
c.app.OnRecordAfterDeleteSuccess("alerts").BindFunc(func(e *core.RecordEvent) error {
|
||||
c.Delete(e.Record)
|
||||
return e.Next()
|
||||
})
|
||||
c.app.OnRecordAfterCreateSuccess("alerts").BindFunc(func(e *core.RecordEvent) error {
|
||||
c.Update(e.Record)
|
||||
return e.Next()
|
||||
})
|
||||
return c
|
||||
}
|
||||
|
||||
// PopulateFromDB clears current entries and loads all alerts from the database into the cache.
|
||||
func (c *AlertsCache) PopulateFromDB(force bool) error {
|
||||
if !force && c.populated {
|
||||
return nil
|
||||
}
|
||||
records, err := c.app.FindAllRecords("alerts")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
c.store.RemoveAll()
|
||||
for _, record := range records {
|
||||
c.Update(record)
|
||||
}
|
||||
c.populated = true
|
||||
return nil
|
||||
}
|
||||
|
||||
// Update adds or updates an alert record in the cache.
|
||||
func (c *AlertsCache) Update(record *core.Record) {
|
||||
systemID := record.GetString("system")
|
||||
if systemID == "" {
|
||||
return
|
||||
}
|
||||
systemStore, ok := c.store.GetOk(systemID)
|
||||
if !ok {
|
||||
systemStore = store.New(map[string]CachedAlertData{})
|
||||
c.store.Set(systemID, systemStore)
|
||||
}
|
||||
var ca CachedAlertData
|
||||
ca.PopulateFromRecord(record)
|
||||
systemStore.Set(record.Id, ca)
|
||||
}
|
||||
|
||||
// Delete removes an alert record from the cache.
|
||||
func (c *AlertsCache) Delete(record *core.Record) {
|
||||
systemID := record.GetString("system")
|
||||
if systemID == "" {
|
||||
return
|
||||
}
|
||||
if systemStore, ok := c.store.GetOk(systemID); ok {
|
||||
systemStore.Remove(record.Id)
|
||||
}
|
||||
}
|
||||
|
||||
// GetSystemAlerts returns all alerts for the specified system, lazy-loading if necessary.
|
||||
func (c *AlertsCache) GetSystemAlerts(systemID string) []CachedAlertData {
|
||||
systemStore, ok := c.store.GetOk(systemID)
|
||||
if !ok {
|
||||
// Populate cache for this system
|
||||
records, err := c.app.FindAllRecords("alerts", dbx.NewExp("system={:system}", dbx.Params{"system": systemID}))
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
systemStore = store.New(map[string]CachedAlertData{})
|
||||
for _, record := range records {
|
||||
var ca CachedAlertData
|
||||
ca.PopulateFromRecord(record)
|
||||
systemStore.Set(record.Id, ca)
|
||||
}
|
||||
c.store.Set(systemID, systemStore)
|
||||
}
|
||||
all := systemStore.GetAll()
|
||||
alerts := make([]CachedAlertData, 0, len(all))
|
||||
for _, alert := range all {
|
||||
alerts = append(alerts, alert)
|
||||
}
|
||||
return alerts
|
||||
}
|
||||
|
||||
// GetAlert returns a specific alert by its ID from the cache.
|
||||
func (c *AlertsCache) GetAlert(systemID, alertID string) (CachedAlertData, bool) {
|
||||
if systemStore, ok := c.store.GetOk(systemID); ok {
|
||||
return systemStore.GetOk(alertID)
|
||||
}
|
||||
return CachedAlertData{}, false
|
||||
}
|
||||
|
||||
// GetAlertsByName returns all alerts of a specific type for the specified system.
|
||||
func (c *AlertsCache) GetAlertsByName(systemID, alertName string) []CachedAlertData {
|
||||
allAlerts := c.GetSystemAlerts(systemID)
|
||||
var alerts []CachedAlertData
|
||||
for _, record := range allAlerts {
|
||||
if record.Name == alertName {
|
||||
alerts = append(alerts, record)
|
||||
}
|
||||
}
|
||||
return alerts
|
||||
}
|
||||
|
||||
// GetAlertsExcludingNames returns all alerts for the specified system excluding the given types.
|
||||
func (c *AlertsCache) GetAlertsExcludingNames(systemID string, excludedNames ...string) []CachedAlertData {
|
||||
excludeMap := make(map[string]struct{})
|
||||
for _, name := range excludedNames {
|
||||
excludeMap[name] = struct{}{}
|
||||
}
|
||||
allAlerts := c.GetSystemAlerts(systemID)
|
||||
var alerts []CachedAlertData
|
||||
for _, record := range allAlerts {
|
||||
if _, excluded := excludeMap[record.Name]; !excluded {
|
||||
alerts = append(alerts, record)
|
||||
}
|
||||
}
|
||||
return alerts
|
||||
}
|
||||
|
||||
// Refresh returns the latest cached copy for an alert snapshot if it still exists.
|
||||
func (c *AlertsCache) Refresh(alert CachedAlertData) (CachedAlertData, bool) {
|
||||
if alert.Id == "" {
|
||||
return CachedAlertData{}, false
|
||||
}
|
||||
return c.GetAlert(alert.SystemID, alert.Id)
|
||||
}
|
||||
@@ -0,0 +1,215 @@
|
||||
//go:build testing
|
||||
|
||||
package alerts_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/henrygd/beszel/internal/alerts"
|
||||
beszelTests "github.com/henrygd/beszel/internal/tests"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestSystemAlertsCachePopulateAndFilter(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
systems, err := beszelTests.CreateSystems(hub, 2, user.Id, "up")
|
||||
require.NoError(t, err)
|
||||
system1 := systems[0]
|
||||
system2 := systems[1]
|
||||
|
||||
statusAlert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "Status",
|
||||
"system": system1.Id,
|
||||
"user": user.Id,
|
||||
"min": 1,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
cpuAlert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "CPU",
|
||||
"system": system1.Id,
|
||||
"user": user.Id,
|
||||
"value": 80,
|
||||
"min": 1,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
memoryAlert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "Memory",
|
||||
"system": system2.Id,
|
||||
"user": user.Id,
|
||||
"value": 90,
|
||||
"min": 1,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
cache := alerts.NewAlertsCache(hub)
|
||||
cache.PopulateFromDB(false)
|
||||
|
||||
statusAlerts := cache.GetAlertsByName(system1.Id, "Status")
|
||||
require.Len(t, statusAlerts, 1)
|
||||
assert.Equal(t, statusAlert.Id, statusAlerts[0].Id)
|
||||
|
||||
nonStatusAlerts := cache.GetAlertsExcludingNames(system1.Id, "Status")
|
||||
require.Len(t, nonStatusAlerts, 1)
|
||||
assert.Equal(t, cpuAlert.Id, nonStatusAlerts[0].Id)
|
||||
|
||||
system2Alerts := cache.GetSystemAlerts(system2.Id)
|
||||
require.Len(t, system2Alerts, 1)
|
||||
assert.Equal(t, memoryAlert.Id, system2Alerts[0].Id)
|
||||
}
|
||||
|
||||
func TestSystemAlertsCacheLazyLoadUpdateAndDelete(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
|
||||
require.NoError(t, err)
|
||||
systemRecord := systems[0]
|
||||
|
||||
statusAlert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "Status",
|
||||
"system": systemRecord.Id,
|
||||
"user": user.Id,
|
||||
"min": 1,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
cache := alerts.NewAlertsCache(hub)
|
||||
require.Len(t, cache.GetSystemAlerts(systemRecord.Id), 1, "first lookup should lazy-load alerts for the system")
|
||||
|
||||
cpuAlert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "CPU",
|
||||
"system": systemRecord.Id,
|
||||
"user": user.Id,
|
||||
"value": 80,
|
||||
"min": 1,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
cache.Update(cpuAlert)
|
||||
|
||||
nonStatusAlerts := cache.GetAlertsExcludingNames(systemRecord.Id, "Status")
|
||||
require.Len(t, nonStatusAlerts, 1)
|
||||
assert.Equal(t, cpuAlert.Id, nonStatusAlerts[0].Id)
|
||||
|
||||
cache.Delete(statusAlert)
|
||||
assert.Empty(t, cache.GetAlertsByName(systemRecord.Id, "Status"), "deleted alerts should be removed from the in-memory cache")
|
||||
}
|
||||
|
||||
func TestSystemAlertsCacheRefreshReturnsLatestCopy(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
|
||||
require.NoError(t, err)
|
||||
system := systems[0]
|
||||
|
||||
alert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "Status",
|
||||
"system": system.Id,
|
||||
"user": user.Id,
|
||||
"min": 1,
|
||||
"triggered": false,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
cache := alerts.NewAlertsCache(hub)
|
||||
snapshot := cache.GetSystemAlerts(system.Id)[0]
|
||||
assert.False(t, snapshot.Triggered)
|
||||
|
||||
alert.Set("triggered", true)
|
||||
require.NoError(t, hub.Save(alert))
|
||||
|
||||
refreshed, ok := cache.Refresh(snapshot)
|
||||
require.True(t, ok)
|
||||
assert.Equal(t, snapshot.Id, refreshed.Id)
|
||||
assert.True(t, refreshed.Triggered, "refresh should return the updated cached value rather than the stale snapshot")
|
||||
|
||||
require.NoError(t, hub.Delete(alert))
|
||||
_, ok = cache.Refresh(snapshot)
|
||||
assert.False(t, ok, "refresh should report false when the cached alert no longer exists")
|
||||
}
|
||||
|
||||
func TestAlertManagerCacheLifecycle(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
|
||||
require.NoError(t, err)
|
||||
system := systems[0]
|
||||
|
||||
// Create an alert
|
||||
alert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "CPU",
|
||||
"system": system.Id,
|
||||
"user": user.Id,
|
||||
"value": 80,
|
||||
"min": 1,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
am := hub.AlertManager
|
||||
cache := am.GetSystemAlertsCache()
|
||||
|
||||
// Verify it's in cache (it should be since CreateRecord triggers the event)
|
||||
assert.Len(t, cache.GetSystemAlerts(system.Id), 1)
|
||||
assert.Equal(t, alert.Id, cache.GetSystemAlerts(system.Id)[0].Id)
|
||||
assert.EqualValues(t, 80, cache.GetSystemAlerts(system.Id)[0].Value)
|
||||
|
||||
// Update the alert through PocketBase to trigger events
|
||||
alert.Set("value", 85)
|
||||
require.NoError(t, hub.Save(alert))
|
||||
|
||||
// Check if updated value is reflected (or at least that it's still there)
|
||||
cachedAlerts := cache.GetSystemAlerts(system.Id)
|
||||
assert.Len(t, cachedAlerts, 1)
|
||||
assert.EqualValues(t, 85, cachedAlerts[0].Value)
|
||||
|
||||
// Delete the alert through PocketBase to trigger events
|
||||
require.NoError(t, hub.Delete(alert))
|
||||
|
||||
// Verify it's removed from cache
|
||||
assert.Empty(t, cache.GetSystemAlerts(system.Id), "alert should be removed from cache after PocketBase delete")
|
||||
}
|
||||
|
||||
// func TestAlertManagerCacheMovesAlertToNewSystemOnUpdate(t *testing.T) {
|
||||
// hub, user := beszelTests.GetHubWithUser(t)
|
||||
// defer hub.Cleanup()
|
||||
|
||||
// systems, err := beszelTests.CreateSystems(hub, 2, user.Id, "up")
|
||||
// require.NoError(t, err)
|
||||
// system1 := systems[0]
|
||||
// system2 := systems[1]
|
||||
|
||||
// alert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
// "name": "CPU",
|
||||
// "system": system1.Id,
|
||||
// "user": user.Id,
|
||||
// "value": 80,
|
||||
// "min": 1,
|
||||
// })
|
||||
// require.NoError(t, err)
|
||||
|
||||
// am := hub.AlertManager
|
||||
// cache := am.GetSystemAlertsCache()
|
||||
|
||||
// // Initially in system1 cache
|
||||
// assert.Len(t, cache.Get(system1.Id), 1)
|
||||
// assert.Empty(t, cache.Get(system2.Id))
|
||||
|
||||
// // Move alert to system2
|
||||
// alert.Set("system", system2.Id)
|
||||
// require.NoError(t, hub.Save(alert))
|
||||
|
||||
// // DEBUG: print if it is found
|
||||
// // fmt.Printf("system1 alerts after update: %v\n", cache.Get(system1.Id))
|
||||
|
||||
// // Should be removed from system1 and present in system2
|
||||
// assert.Empty(t, cache.GetType(system1.Id, "CPU"), "updated alerts should be evicted from the previous system cache")
|
||||
// require.Len(t, cache.Get(system2.Id), 1)
|
||||
// assert.Equal(t, alert.Id, cache.Get(system2.Id)[0].Id)
|
||||
// }
|
||||
@@ -0,0 +1,155 @@
|
||||
//go:build testing
|
||||
|
||||
package alerts_test
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel/internal/entities/system"
|
||||
beszelTests "github.com/henrygd/beszel/internal/tests"
|
||||
|
||||
"github.com/pocketbase/dbx"
|
||||
"github.com/pocketbase/pocketbase/tools/types"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
// TestDiskAlertExtraFsMultiMinute tests that multi-minute disk alerts correctly use
|
||||
// historical per-minute values for extra (non-root) filesystems, not the current live snapshot.
|
||||
func TestDiskAlertExtraFsMultiMinute(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
|
||||
require.NoError(t, err)
|
||||
systemRecord := systems[0]
|
||||
|
||||
// Disk alert: threshold 80%, min=2 (requires historical averaging)
|
||||
diskAlert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "Disk",
|
||||
"system": systemRecord.Id,
|
||||
"user": user.Id,
|
||||
"value": 80, // threshold: 80%
|
||||
"min": 2, // 2 minutes - requires historical averaging
|
||||
})
|
||||
require.NoError(t, err)
|
||||
assert.False(t, diskAlert.GetBool("triggered"), "Alert should not be triggered initially")
|
||||
|
||||
am := hub.GetAlertManager()
|
||||
now := time.Now().UTC()
|
||||
|
||||
extraFsHigh := map[string]*system.FsStats{
|
||||
"/mnt/data": {DiskTotal: 1000, DiskUsed: 920}, // 92% - above threshold
|
||||
}
|
||||
|
||||
// Insert 4 historical records spread over 3 minutes (same pattern as battery tests).
|
||||
// The oldest record must predate (now - 2min) so the alert time window is valid.
|
||||
recordTimes := []time.Duration{
|
||||
-180 * time.Second, // 3 min ago - anchors oldest record before alert.time
|
||||
-90 * time.Second,
|
||||
-60 * time.Second,
|
||||
-30 * time.Second,
|
||||
}
|
||||
|
||||
for _, offset := range recordTimes {
|
||||
stats := system.Stats{
|
||||
DiskPct: 30, // root disk at 30% - below threshold
|
||||
ExtraFs: extraFsHigh,
|
||||
}
|
||||
statsJSON, _ := json.Marshal(stats)
|
||||
|
||||
recordTime := now.Add(offset)
|
||||
record, err := beszelTests.CreateRecord(hub, "system_stats", map[string]any{
|
||||
"system": systemRecord.Id,
|
||||
"type": "1m",
|
||||
"stats": string(statsJSON),
|
||||
})
|
||||
require.NoError(t, err)
|
||||
record.SetRaw("created", recordTime.Format(types.DefaultDateLayout))
|
||||
err = hub.SaveNoValidate(record)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
combinedDataHigh := &system.CombinedData{
|
||||
Stats: system.Stats{
|
||||
DiskPct: 30,
|
||||
ExtraFs: extraFsHigh,
|
||||
},
|
||||
Info: system.Info{
|
||||
DiskPct: 30,
|
||||
},
|
||||
}
|
||||
|
||||
systemRecord.Set("updated", now)
|
||||
err = hub.SaveNoValidate(systemRecord)
|
||||
require.NoError(t, err)
|
||||
|
||||
err = am.HandleSystemAlerts(systemRecord, combinedDataHigh)
|
||||
require.NoError(t, err)
|
||||
|
||||
time.Sleep(20 * time.Millisecond)
|
||||
|
||||
diskAlert, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": diskAlert.Id})
|
||||
require.NoError(t, err)
|
||||
assert.True(t, diskAlert.GetBool("triggered"),
|
||||
"Alert SHOULD be triggered when extra disk average (92%%) exceeds threshold (80%%)")
|
||||
|
||||
// --- Resolution: extra disk drops to 50%, alert should resolve ---
|
||||
|
||||
extraFsLow := map[string]*system.FsStats{
|
||||
"/mnt/data": {DiskTotal: 1000, DiskUsed: 500}, // 50% - below threshold
|
||||
}
|
||||
|
||||
newNow := now.Add(2 * time.Minute)
|
||||
recordTimesLow := []time.Duration{
|
||||
-180 * time.Second,
|
||||
-90 * time.Second,
|
||||
-60 * time.Second,
|
||||
-30 * time.Second,
|
||||
}
|
||||
|
||||
for _, offset := range recordTimesLow {
|
||||
stats := system.Stats{
|
||||
DiskPct: 30,
|
||||
ExtraFs: extraFsLow,
|
||||
}
|
||||
statsJSON, _ := json.Marshal(stats)
|
||||
|
||||
recordTime := newNow.Add(offset)
|
||||
record, err := beszelTests.CreateRecord(hub, "system_stats", map[string]any{
|
||||
"system": systemRecord.Id,
|
||||
"type": "1m",
|
||||
"stats": string(statsJSON),
|
||||
})
|
||||
require.NoError(t, err)
|
||||
record.SetRaw("created", recordTime.Format(types.DefaultDateLayout))
|
||||
err = hub.SaveNoValidate(record)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
combinedDataLow := &system.CombinedData{
|
||||
Stats: system.Stats{
|
||||
DiskPct: 30,
|
||||
ExtraFs: extraFsLow,
|
||||
},
|
||||
Info: system.Info{
|
||||
DiskPct: 30,
|
||||
},
|
||||
}
|
||||
|
||||
systemRecord.Set("updated", newNow)
|
||||
err = hub.SaveNoValidate(systemRecord)
|
||||
require.NoError(t, err)
|
||||
|
||||
err = am.HandleSystemAlerts(systemRecord, combinedDataLow)
|
||||
require.NoError(t, err)
|
||||
|
||||
time.Sleep(20 * time.Millisecond)
|
||||
|
||||
diskAlert, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": diskAlert.Id})
|
||||
require.NoError(t, err)
|
||||
assert.False(t, diskAlert.GetBool("triggered"),
|
||||
"Alert should be resolved when extra disk average (50%%) drops below threshold (80%%)")
|
||||
}
|
||||
@@ -0,0 +1,74 @@
|
||||
package alerts
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/pocketbase/dbx"
|
||||
"github.com/pocketbase/pocketbase/core"
|
||||
)
|
||||
|
||||
// On triggered alert record delete, set matching alert history record to resolved
|
||||
func resolveHistoryOnAlertDelete(e *core.RecordEvent) error {
|
||||
if !e.Record.GetBool("triggered") {
|
||||
return e.Next()
|
||||
}
|
||||
_ = resolveAlertHistoryRecord(e.App, e.Record.Id)
|
||||
return e.Next()
|
||||
}
|
||||
|
||||
// On alert record update, update alert history record
|
||||
func updateHistoryOnAlertUpdate(e *core.RecordEvent) error {
|
||||
original := e.Record.Original()
|
||||
new := e.Record
|
||||
|
||||
originalTriggered := original.GetBool("triggered")
|
||||
newTriggered := new.GetBool("triggered")
|
||||
|
||||
// no need to update alert history if triggered state has not changed
|
||||
if originalTriggered == newTriggered {
|
||||
return e.Next()
|
||||
}
|
||||
|
||||
// if new state is triggered, create new alert history record
|
||||
if newTriggered {
|
||||
_, _ = createAlertHistoryRecord(e.App, new)
|
||||
return e.Next()
|
||||
}
|
||||
|
||||
// if new state is not triggered, check for matching alert history record and set it to resolved
|
||||
_ = resolveAlertHistoryRecord(e.App, new.Id)
|
||||
return e.Next()
|
||||
}
|
||||
|
||||
// resolveAlertHistoryRecord sets the resolved field to the current time
|
||||
func resolveAlertHistoryRecord(app core.App, alertRecordID string) error {
|
||||
alertHistoryRecord, err := app.FindFirstRecordByFilter("alerts_history", "alert_id={:alert_id} && resolved=null", dbx.Params{"alert_id": alertRecordID})
|
||||
if err != nil || alertHistoryRecord == nil {
|
||||
return err
|
||||
}
|
||||
alertHistoryRecord.Set("resolved", time.Now().UTC())
|
||||
err = app.Save(alertHistoryRecord)
|
||||
if err != nil {
|
||||
app.Logger().Error("Failed to resolve alert history", "err", err)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// createAlertHistoryRecord creates a new alert history record
|
||||
func createAlertHistoryRecord(app core.App, alertRecord *core.Record) (alertHistoryRecord *core.Record, err error) {
|
||||
alertHistoryCollection, err := app.FindCachedCollectionByNameOrId("alerts_history")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
alertHistoryRecord = core.NewRecord(alertHistoryCollection)
|
||||
alertHistoryRecord.Set("alert_id", alertRecord.Id)
|
||||
alertHistoryRecord.Set("user", alertRecord.GetString("user"))
|
||||
alertHistoryRecord.Set("system", alertRecord.GetString("system"))
|
||||
alertHistoryRecord.Set("name", alertRecord.GetString("name"))
|
||||
alertHistoryRecord.Set("value", alertRecord.GetFloat("value"))
|
||||
err = app.Save(alertHistoryRecord)
|
||||
if err != nil {
|
||||
app.Logger().Error("Failed to save alert history", "err", err)
|
||||
}
|
||||
return alertHistoryRecord, err
|
||||
}
|
||||
@@ -0,0 +1,425 @@
|
||||
//go:build testing
|
||||
|
||||
package alerts_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"testing/synctest"
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel/internal/alerts"
|
||||
beszelTests "github.com/henrygd/beszel/internal/tests"
|
||||
|
||||
"github.com/pocketbase/dbx"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestAlertSilencedOneTime(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
// Create a system
|
||||
systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
|
||||
assert.NoError(t, err)
|
||||
system := systems[0]
|
||||
|
||||
// Create an alert
|
||||
alert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "CPU",
|
||||
"system": system.Id,
|
||||
"user": user.Id,
|
||||
"value": 80,
|
||||
"min": 1,
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Create a one-time quiet hours window (current time - 1 hour to current time + 1 hour)
|
||||
now := time.Now().UTC()
|
||||
startTime := now.Add(-1 * time.Hour)
|
||||
endTime := now.Add(1 * time.Hour)
|
||||
|
||||
_, err = beszelTests.CreateRecord(hub, "quiet_hours", map[string]any{
|
||||
"user": user.Id,
|
||||
"system": system.Id,
|
||||
"type": "one-time",
|
||||
"start": startTime,
|
||||
"end": endTime,
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Get alert manager
|
||||
am := alerts.NewAlertManager(hub)
|
||||
defer am.Stop()
|
||||
|
||||
// Test that alert is silenced
|
||||
silenced := am.IsNotificationSilenced(user.Id, system.Id)
|
||||
assert.True(t, silenced, "Alert should be silenced during active one-time window")
|
||||
|
||||
// Create a window that has already ended
|
||||
pastStart := now.Add(-3 * time.Hour)
|
||||
pastEnd := now.Add(-2 * time.Hour)
|
||||
|
||||
_, err = beszelTests.CreateRecord(hub, "quiet_hours", map[string]any{
|
||||
"user": user.Id,
|
||||
"system": system.Id,
|
||||
"type": "one-time",
|
||||
"start": pastStart,
|
||||
"end": pastEnd,
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Should still be silenced because of the first window
|
||||
silenced = am.IsNotificationSilenced(user.Id, system.Id)
|
||||
assert.True(t, silenced, "Alert should still be silenced (past window doesn't affect active window)")
|
||||
|
||||
// Clear all windows and create a future window
|
||||
_, err = hub.DB().NewQuery("DELETE FROM quiet_hours").Execute()
|
||||
assert.NoError(t, err)
|
||||
|
||||
futureStart := now.Add(2 * time.Hour)
|
||||
futureEnd := now.Add(3 * time.Hour)
|
||||
|
||||
_, err = beszelTests.CreateRecord(hub, "quiet_hours", map[string]any{
|
||||
"user": user.Id,
|
||||
"system": system.Id,
|
||||
"type": "one-time",
|
||||
"start": futureStart,
|
||||
"end": futureEnd,
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Alert should NOT be silenced (window hasn't started yet)
|
||||
silenced = am.IsNotificationSilenced(user.Id, system.Id)
|
||||
assert.False(t, silenced, "Alert should not be silenced (window hasn't started)")
|
||||
|
||||
_ = alert
|
||||
}
|
||||
|
||||
func TestAlertSilencedDaily(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
// Create a system
|
||||
systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
|
||||
assert.NoError(t, err)
|
||||
system := systems[0]
|
||||
|
||||
// Get alert manager
|
||||
am := alerts.NewAlertManager(hub)
|
||||
defer am.Stop()
|
||||
|
||||
// Get current hour and create a window that includes current time
|
||||
now := time.Now().UTC()
|
||||
currentHour := now.Hour()
|
||||
currentMin := now.Minute()
|
||||
|
||||
// Create a window from 1 hour ago to 1 hour from now
|
||||
startHour := (currentHour - 1 + 24) % 24
|
||||
endHour := (currentHour + 1) % 24
|
||||
|
||||
// Create times with just the hours/minutes we want (date doesn't matter for daily)
|
||||
startTime := time.Date(2000, 1, 1, startHour, currentMin, 0, 0, time.UTC)
|
||||
endTime := time.Date(2000, 1, 1, endHour, currentMin, 0, 0, time.UTC)
|
||||
|
||||
_, err = beszelTests.CreateRecord(hub, "quiet_hours", map[string]any{
|
||||
"user": user.Id,
|
||||
"system": system.Id,
|
||||
"type": "daily",
|
||||
"start": startTime,
|
||||
"end": endTime,
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Alert should be silenced (current time is within the daily window)
|
||||
silenced := am.IsNotificationSilenced(user.Id, system.Id)
|
||||
assert.True(t, silenced, "Alert should be silenced during active daily window")
|
||||
|
||||
// Clear windows and create one that doesn't include current time
|
||||
_, err = hub.DB().NewQuery("DELETE FROM quiet_hours").Execute()
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Create a window from 6-12 hours from now
|
||||
futureStartHour := (currentHour + 6) % 24
|
||||
futureEndHour := (currentHour + 12) % 24
|
||||
|
||||
startTime = time.Date(2000, 1, 1, futureStartHour, 0, 0, 0, time.UTC)
|
||||
endTime = time.Date(2000, 1, 1, futureEndHour, 0, 0, 0, time.UTC)
|
||||
|
||||
_, err = beszelTests.CreateRecord(hub, "quiet_hours", map[string]any{
|
||||
"user": user.Id,
|
||||
"system": system.Id,
|
||||
"type": "daily",
|
||||
"start": startTime,
|
||||
"end": endTime,
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Alert should NOT be silenced
|
||||
silenced = am.IsNotificationSilenced(user.Id, system.Id)
|
||||
assert.False(t, silenced, "Alert should not be silenced (outside daily window)")
|
||||
}
|
||||
|
||||
func TestAlertSilencedDailyMidnightCrossing(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
// Create a system
|
||||
systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
|
||||
assert.NoError(t, err)
|
||||
system := systems[0]
|
||||
|
||||
// Get alert manager
|
||||
am := alerts.NewAlertManager(hub)
|
||||
defer am.Stop()
|
||||
|
||||
// Create a window that crosses midnight: 22:00 - 02:00
|
||||
startTime := time.Date(2000, 1, 1, 22, 0, 0, 0, time.UTC)
|
||||
endTime := time.Date(2000, 1, 1, 2, 0, 0, 0, time.UTC)
|
||||
|
||||
_, err = beszelTests.CreateRecord(hub, "quiet_hours", map[string]any{
|
||||
"user": user.Id,
|
||||
"system": system.Id,
|
||||
"type": "daily",
|
||||
"start": startTime,
|
||||
"end": endTime,
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Test with a time at 23:00 (should be silenced)
|
||||
// We can't control the actual current time, but we can verify the logic
|
||||
// by checking if the window was created correctly
|
||||
windows, err := hub.FindAllRecords("quiet_hours", dbx.HashExp{
|
||||
"user": user.Id,
|
||||
"system": system.Id,
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
assert.Len(t, windows, 1, "Should have created 1 window")
|
||||
|
||||
window := windows[0]
|
||||
assert.Equal(t, "daily", window.GetString("type"))
|
||||
assert.Equal(t, 22, window.GetDateTime("start").Time().Hour())
|
||||
assert.Equal(t, 2, window.GetDateTime("end").Time().Hour())
|
||||
}
|
||||
|
||||
func TestAlertSilencedGlobal(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
// Create multiple systems
|
||||
systems, err := beszelTests.CreateSystems(hub, 3, user.Id, "up")
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Get alert manager
|
||||
am := alerts.NewAlertManager(hub)
|
||||
defer am.Stop()
|
||||
|
||||
// Create a global quiet hours window (no system specified)
|
||||
now := time.Now().UTC()
|
||||
startTime := now.Add(-1 * time.Hour)
|
||||
endTime := now.Add(1 * time.Hour)
|
||||
|
||||
_, err = beszelTests.CreateRecord(hub, "quiet_hours", map[string]any{
|
||||
"user": user.Id,
|
||||
"type": "one-time",
|
||||
"start": startTime,
|
||||
"end": endTime,
|
||||
// system field is empty/null for global windows
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
// All systems should be silenced
|
||||
for _, system := range systems {
|
||||
silenced := am.IsNotificationSilenced(user.Id, system.Id)
|
||||
assert.True(t, silenced, "Alert should be silenced for system %s (global window)", system.Id)
|
||||
}
|
||||
|
||||
// Even with a systemID that doesn't exist, should be silenced
|
||||
silenced := am.IsNotificationSilenced(user.Id, "nonexistent-system")
|
||||
assert.True(t, silenced, "Alert should be silenced for any system (global window)")
|
||||
}
|
||||
|
||||
func TestAlertSilencedSystemSpecific(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
// Create multiple systems
|
||||
systems, err := beszelTests.CreateSystems(hub, 2, user.Id, "up")
|
||||
assert.NoError(t, err)
|
||||
system1 := systems[0]
|
||||
system2 := systems[1]
|
||||
|
||||
// Get alert manager
|
||||
am := alerts.NewAlertManager(hub)
|
||||
defer am.Stop()
|
||||
|
||||
// Create a system-specific quiet hours window for system1 only
|
||||
now := time.Now().UTC()
|
||||
startTime := now.Add(-1 * time.Hour)
|
||||
endTime := now.Add(1 * time.Hour)
|
||||
|
||||
_, err = beszelTests.CreateRecord(hub, "quiet_hours", map[string]any{
|
||||
"user": user.Id,
|
||||
"system": system1.Id,
|
||||
"type": "one-time",
|
||||
"start": startTime,
|
||||
"end": endTime,
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
// System1 should be silenced
|
||||
silenced := am.IsNotificationSilenced(user.Id, system1.Id)
|
||||
assert.True(t, silenced, "Alert should be silenced for system1")
|
||||
|
||||
// System2 should NOT be silenced
|
||||
silenced = am.IsNotificationSilenced(user.Id, system2.Id)
|
||||
assert.False(t, silenced, "Alert should not be silenced for system2")
|
||||
}
|
||||
|
||||
func TestAlertSilencedMultiUser(t *testing.T) {
|
||||
hub, _ := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
// Create two users
|
||||
user1, err := beszelTests.CreateUser(hub, "user1@example.com", "password")
|
||||
assert.NoError(t, err)
|
||||
|
||||
user2, err := beszelTests.CreateUser(hub, "user2@example.com", "password")
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Create a system accessible to both users
|
||||
system, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
|
||||
"name": "shared-system",
|
||||
"users": []string{user1.Id, user2.Id},
|
||||
"host": "127.0.0.1",
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Get alert manager
|
||||
am := alerts.NewAlertManager(hub)
|
||||
defer am.Stop()
|
||||
|
||||
// Create a quiet hours window for user1 only
|
||||
now := time.Now().UTC()
|
||||
startTime := now.Add(-1 * time.Hour)
|
||||
endTime := now.Add(1 * time.Hour)
|
||||
|
||||
_, err = beszelTests.CreateRecord(hub, "quiet_hours", map[string]any{
|
||||
"user": user1.Id,
|
||||
"system": system.Id,
|
||||
"type": "one-time",
|
||||
"start": startTime,
|
||||
"end": endTime,
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
// User1 should be silenced
|
||||
silenced := am.IsNotificationSilenced(user1.Id, system.Id)
|
||||
assert.True(t, silenced, "Alert should be silenced for user1")
|
||||
|
||||
// User2 should NOT be silenced
|
||||
silenced = am.IsNotificationSilenced(user2.Id, system.Id)
|
||||
assert.False(t, silenced, "Alert should not be silenced for user2")
|
||||
}
|
||||
|
||||
func TestAlertSilencedWithActualAlert(t *testing.T) {
|
||||
synctest.Test(t, func(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
// Create a system
|
||||
systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
|
||||
assert.NoError(t, err)
|
||||
system := systems[0]
|
||||
|
||||
// Create a status alert
|
||||
_, err = beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "Status",
|
||||
"system": system.Id,
|
||||
"user": user.Id,
|
||||
"min": 1,
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Create user settings with email
|
||||
userSettings, err := hub.FindFirstRecordByFilter("user_settings", "user={:user}", dbx.Params{"user": user.Id})
|
||||
if err != nil || userSettings == nil {
|
||||
userSettings, err = beszelTests.CreateRecord(hub, "user_settings", map[string]any{
|
||||
"user": user.Id,
|
||||
"settings": map[string]any{
|
||||
"emails": []string{"test@example.com"},
|
||||
},
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
|
||||
// Create a quiet hours window
|
||||
now := time.Now().UTC()
|
||||
startTime := now.Add(-1 * time.Hour)
|
||||
endTime := now.Add(1 * time.Hour)
|
||||
|
||||
_, err = beszelTests.CreateRecord(hub, "quiet_hours", map[string]any{
|
||||
"user": user.Id,
|
||||
"system": system.Id,
|
||||
"type": "one-time",
|
||||
"start": startTime,
|
||||
"end": endTime,
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Get initial email count
|
||||
initialEmailCount := hub.TestMailer.TotalSend()
|
||||
|
||||
// Trigger an alert by setting system to down
|
||||
system.Set("status", "down")
|
||||
err = hub.SaveNoValidate(system)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Wait for the alert to be processed (1 minute + buffer)
|
||||
time.Sleep(time.Second * 75)
|
||||
synctest.Wait()
|
||||
|
||||
// Check that no email was sent (because alert is silenced)
|
||||
finalEmailCount := hub.TestMailer.TotalSend()
|
||||
assert.Equal(t, initialEmailCount, finalEmailCount, "No emails should be sent when alert is silenced")
|
||||
|
||||
// Clear quiet hours windows
|
||||
_, err = hub.DB().NewQuery("DELETE FROM quiet_hours").Execute()
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Reset system to up, then down again
|
||||
system.Set("status", "up")
|
||||
err = hub.SaveNoValidate(system)
|
||||
assert.NoError(t, err)
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
|
||||
system.Set("status", "down")
|
||||
err = hub.SaveNoValidate(system)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Wait for the alert to be processed
|
||||
time.Sleep(time.Second * 75)
|
||||
synctest.Wait()
|
||||
|
||||
// Now an email should be sent
|
||||
newEmailCount := hub.TestMailer.TotalSend()
|
||||
assert.Greater(t, newEmailCount, finalEmailCount, "Email should be sent when not silenced")
|
||||
})
|
||||
}
|
||||
|
||||
func TestAlertSilencedNoWindows(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
// Create a system
|
||||
systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
|
||||
assert.NoError(t, err)
|
||||
system := systems[0]
|
||||
|
||||
// Get alert manager
|
||||
am := alerts.NewAlertManager(hub)
|
||||
defer am.Stop()
|
||||
|
||||
// Without any quiet hours windows, alert should NOT be silenced
|
||||
silenced := am.IsNotificationSilenced(user.Id, system.Id)
|
||||
assert.False(t, silenced, "Alert should not be silenced when no windows exist")
|
||||
}
|
||||
@@ -0,0 +1,107 @@
|
||||
package alerts
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/pocketbase/pocketbase/core"
|
||||
)
|
||||
|
||||
// handleSmartDeviceAlert sends alerts when a SMART device state worsens into WARNING/FAILED.
|
||||
// This is automatic and does not require user opt-in.
|
||||
func (am *AlertManager) handleSmartDeviceAlert(e *core.RecordEvent) error {
|
||||
oldState := e.Record.Original().GetString("state")
|
||||
newState := e.Record.GetString("state")
|
||||
|
||||
if !shouldSendSmartDeviceAlert(oldState, newState) {
|
||||
return e.Next()
|
||||
}
|
||||
|
||||
systemID := e.Record.GetString("system")
|
||||
if systemID == "" {
|
||||
return e.Next()
|
||||
}
|
||||
|
||||
// Fetch the system record to get the name and users
|
||||
systemRecord, err := e.App.FindRecordById("systems", systemID)
|
||||
if err != nil {
|
||||
e.App.Logger().Error("Failed to find system for SMART alert", "err", err, "systemID", systemID)
|
||||
return e.Next()
|
||||
}
|
||||
|
||||
systemName := systemRecord.GetString("name")
|
||||
deviceName := e.Record.GetString("name")
|
||||
model := e.Record.GetString("model")
|
||||
statusLabel := smartStateLabel(newState)
|
||||
|
||||
// Build alert message
|
||||
title := fmt.Sprintf("SMART %s on %s: %s %s", statusLabel, systemName, deviceName, smartStateEmoji(newState))
|
||||
var message string
|
||||
if model != "" {
|
||||
message = fmt.Sprintf("Disk %s (%s) SMART status changed to %s", deviceName, model, newState)
|
||||
} else {
|
||||
message = fmt.Sprintf("Disk %s SMART status changed to %s", deviceName, newState)
|
||||
}
|
||||
|
||||
// Get users associated with the system
|
||||
userIDs := systemRecord.GetStringSlice("users")
|
||||
if len(userIDs) == 0 {
|
||||
return e.Next()
|
||||
}
|
||||
|
||||
// Send alert to each user
|
||||
for _, userID := range userIDs {
|
||||
if err := am.SendAlert(AlertMessageData{
|
||||
UserID: userID,
|
||||
SystemID: systemID,
|
||||
Title: title,
|
||||
Message: message,
|
||||
Link: am.hub.MakeLink("system", systemID),
|
||||
LinkText: "View " + systemName,
|
||||
}); err != nil {
|
||||
e.App.Logger().Error("Failed to send SMART alert", "err", err, "userID", userID)
|
||||
}
|
||||
}
|
||||
|
||||
return e.Next()
|
||||
}
|
||||
|
||||
func shouldSendSmartDeviceAlert(oldState, newState string) bool {
|
||||
oldSeverity := smartStateSeverity(oldState)
|
||||
newSeverity := smartStateSeverity(newState)
|
||||
|
||||
// Ignore unknown states and recoveries; only alert on worsening transitions
|
||||
// from known-good/degraded states into WARNING/FAILED.
|
||||
return oldSeverity >= 1 && newSeverity > oldSeverity
|
||||
}
|
||||
|
||||
func smartStateSeverity(state string) int {
|
||||
switch state {
|
||||
case "PASSED":
|
||||
return 1
|
||||
case "WARNING":
|
||||
return 2
|
||||
case "FAILED":
|
||||
return 3
|
||||
default:
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
func smartStateEmoji(state string) string {
|
||||
switch state {
|
||||
case "WARNING":
|
||||
return "\U0001F7E0"
|
||||
default:
|
||||
return "\U0001F534"
|
||||
}
|
||||
}
|
||||
|
||||
func smartStateLabel(state string) string {
|
||||
switch state {
|
||||
case "FAILED":
|
||||
return "failure"
|
||||
default:
|
||||
return strings.ToLower(state)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,264 @@
|
||||
//go:build testing
|
||||
|
||||
package alerts_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
beszelTests "github.com/henrygd/beszel/internal/tests"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestSmartDeviceAlert(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
// Create a system for the user
|
||||
system, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
|
||||
"name": "test-system",
|
||||
"users": []string{user.Id},
|
||||
"host": "127.0.0.1",
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Create a smart_device with state PASSED
|
||||
smartDevice, err := beszelTests.CreateRecord(hub, "smart_devices", map[string]any{
|
||||
"system": system.Id,
|
||||
"name": "/dev/sda",
|
||||
"model": "Samsung SSD 970 EVO",
|
||||
"state": "PASSED",
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Verify no emails sent initially
|
||||
assert.Zero(t, hub.TestMailer.TotalSend(), "should have 0 emails sent initially")
|
||||
|
||||
// Re-fetch the record so PocketBase can properly track original values
|
||||
smartDevice, err = hub.FindRecordById("smart_devices", smartDevice.Id)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Update the smart device state to FAILED
|
||||
smartDevice.Set("state", "FAILED")
|
||||
err = hub.Save(smartDevice)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Wait for the alert to be processed
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
|
||||
// Verify that an email was sent
|
||||
assert.EqualValues(t, 1, hub.TestMailer.TotalSend(), "should have 1 email sent after state changed to FAILED")
|
||||
|
||||
// Check the email content
|
||||
lastMessage := hub.TestMailer.LastMessage()
|
||||
assert.Contains(t, lastMessage.Subject, "SMART failure on test-system")
|
||||
assert.Contains(t, lastMessage.Subject, "/dev/sda")
|
||||
assert.Contains(t, lastMessage.Text, "Samsung SSD 970 EVO")
|
||||
assert.Contains(t, lastMessage.Text, "FAILED")
|
||||
}
|
||||
|
||||
func TestSmartDeviceAlertPassedToWarning(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
system, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
|
||||
"name": "test-system",
|
||||
"users": []string{user.Id},
|
||||
"host": "127.0.0.1",
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
smartDevice, err := beszelTests.CreateRecord(hub, "smart_devices", map[string]any{
|
||||
"system": system.Id,
|
||||
"name": "/dev/mmcblk0",
|
||||
"model": "eMMC",
|
||||
"state": "PASSED",
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
smartDevice, err = hub.FindRecordById("smart_devices", smartDevice.Id)
|
||||
assert.NoError(t, err)
|
||||
|
||||
smartDevice.Set("state", "WARNING")
|
||||
err = hub.Save(smartDevice)
|
||||
assert.NoError(t, err)
|
||||
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
|
||||
assert.EqualValues(t, 1, hub.TestMailer.TotalSend(), "should have 1 email sent after state changed to WARNING")
|
||||
lastMessage := hub.TestMailer.LastMessage()
|
||||
assert.Contains(t, lastMessage.Subject, "SMART warning on test-system")
|
||||
assert.Contains(t, lastMessage.Text, "WARNING")
|
||||
}
|
||||
|
||||
func TestSmartDeviceAlertWarningToFailed(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
system, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
|
||||
"name": "test-system",
|
||||
"users": []string{user.Id},
|
||||
"host": "127.0.0.1",
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
smartDevice, err := beszelTests.CreateRecord(hub, "smart_devices", map[string]any{
|
||||
"system": system.Id,
|
||||
"name": "/dev/mmcblk0",
|
||||
"model": "eMMC",
|
||||
"state": "WARNING",
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
smartDevice, err = hub.FindRecordById("smart_devices", smartDevice.Id)
|
||||
assert.NoError(t, err)
|
||||
|
||||
smartDevice.Set("state", "FAILED")
|
||||
err = hub.Save(smartDevice)
|
||||
assert.NoError(t, err)
|
||||
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
|
||||
assert.EqualValues(t, 1, hub.TestMailer.TotalSend(), "should have 1 email sent after state changed from WARNING to FAILED")
|
||||
lastMessage := hub.TestMailer.LastMessage()
|
||||
assert.Contains(t, lastMessage.Subject, "SMART failure on test-system")
|
||||
assert.Contains(t, lastMessage.Text, "FAILED")
|
||||
}
|
||||
|
||||
func TestSmartDeviceAlertNoAlertOnNonPassedToFailed(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
// Create a system for the user
|
||||
system, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
|
||||
"name": "test-system",
|
||||
"users": []string{user.Id},
|
||||
"host": "127.0.0.1",
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Create a smart_device with state UNKNOWN
|
||||
smartDevice, err := beszelTests.CreateRecord(hub, "smart_devices", map[string]any{
|
||||
"system": system.Id,
|
||||
"name": "/dev/sda",
|
||||
"model": "Samsung SSD 970 EVO",
|
||||
"state": "UNKNOWN",
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Re-fetch the record so PocketBase can properly track original values
|
||||
smartDevice, err = hub.FindRecordById("smart_devices", smartDevice.Id)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Update the state from UNKNOWN to FAILED - should NOT trigger alert.
|
||||
// We only alert from known healthy/degraded states.
|
||||
smartDevice.Set("state", "FAILED")
|
||||
err = hub.Save(smartDevice)
|
||||
assert.NoError(t, err)
|
||||
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
|
||||
// Verify no email was sent (only PASSED -> FAILED triggers alert)
|
||||
assert.Zero(t, hub.TestMailer.TotalSend(), "should have 0 emails when changing from UNKNOWN to FAILED")
|
||||
|
||||
// Re-fetch the record again
|
||||
smartDevice, err = hub.FindRecordById("smart_devices", smartDevice.Id)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Update state from FAILED to PASSED - should NOT trigger alert
|
||||
smartDevice.Set("state", "PASSED")
|
||||
err = hub.Save(smartDevice)
|
||||
assert.NoError(t, err)
|
||||
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
|
||||
// Verify no email was sent
|
||||
assert.Zero(t, hub.TestMailer.TotalSend(), "should have 0 emails when changing from FAILED to PASSED")
|
||||
}
|
||||
|
||||
func TestSmartDeviceAlertMultipleUsers(t *testing.T) {
|
||||
hub, user1 := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
// Create a second user
|
||||
user2, err := beszelTests.CreateUser(hub, "test2@example.com", "password")
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Create user settings for the second user
|
||||
_, err = beszelTests.CreateRecord(hub, "user_settings", map[string]any{
|
||||
"user": user2.Id,
|
||||
"settings": `{"emails":["test2@example.com"],"webhooks":[]}`,
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Create a system with both users
|
||||
system, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
|
||||
"name": "shared-system",
|
||||
"users": []string{user1.Id, user2.Id},
|
||||
"host": "127.0.0.1",
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Create a smart_device with state PASSED
|
||||
smartDevice, err := beszelTests.CreateRecord(hub, "smart_devices", map[string]any{
|
||||
"system": system.Id,
|
||||
"name": "/dev/nvme0n1",
|
||||
"model": "WD Black SN850",
|
||||
"state": "PASSED",
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Re-fetch the record so PocketBase can properly track original values
|
||||
smartDevice, err = hub.FindRecordById("smart_devices", smartDevice.Id)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Update the smart device state to FAILED
|
||||
smartDevice.Set("state", "FAILED")
|
||||
err = hub.Save(smartDevice)
|
||||
assert.NoError(t, err)
|
||||
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
|
||||
// Verify that two emails were sent (one for each user)
|
||||
assert.EqualValues(t, 2, hub.TestMailer.TotalSend(), "should have 2 emails sent for 2 users")
|
||||
}
|
||||
|
||||
func TestSmartDeviceAlertWithoutModel(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
// Create a system for the user
|
||||
system, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
|
||||
"name": "test-system",
|
||||
"users": []string{user.Id},
|
||||
"host": "127.0.0.1",
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Create a smart_device with state PASSED but no model
|
||||
smartDevice, err := beszelTests.CreateRecord(hub, "smart_devices", map[string]any{
|
||||
"system": system.Id,
|
||||
"name": "/dev/sdb",
|
||||
"state": "PASSED",
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Re-fetch the record so PocketBase can properly track original values
|
||||
smartDevice, err = hub.FindRecordById("smart_devices", smartDevice.Id)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Update the smart device state to FAILED
|
||||
smartDevice.Set("state", "FAILED")
|
||||
err = hub.Save(smartDevice)
|
||||
assert.NoError(t, err)
|
||||
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
|
||||
// Verify that an email was sent
|
||||
assert.EqualValues(t, 1, hub.TestMailer.TotalSend(), "should have 1 email sent")
|
||||
|
||||
// Check that the email doesn't have empty parentheses for missing model
|
||||
lastMessage := hub.TestMailer.LastMessage()
|
||||
assert.NotContains(t, lastMessage.Text, "()", "should not have empty parentheses for missing model")
|
||||
assert.Contains(t, lastMessage.Text, "/dev/sdb")
|
||||
}
|
||||
@@ -0,0 +1,239 @@
|
||||
package alerts
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/pocketbase/pocketbase/core"
|
||||
)
|
||||
|
||||
type alertInfo struct {
|
||||
systemName string
|
||||
alertData CachedAlertData
|
||||
expireTime time.Time
|
||||
timer *time.Timer
|
||||
}
|
||||
|
||||
// Stop cancels all pending status alert timers.
|
||||
func (am *AlertManager) Stop() {
|
||||
am.stopOnce.Do(func() {
|
||||
am.pendingAlerts.Range(func(key, value any) bool {
|
||||
info := value.(*alertInfo)
|
||||
if info.timer != nil {
|
||||
info.timer.Stop()
|
||||
}
|
||||
am.pendingAlerts.Delete(key)
|
||||
return true
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
// HandleStatusAlerts manages the logic when system status changes.
|
||||
func (am *AlertManager) HandleStatusAlerts(newStatus string, systemRecord *core.Record) error {
|
||||
if newStatus != "up" && newStatus != "down" {
|
||||
return nil
|
||||
}
|
||||
|
||||
alerts := am.alertsCache.GetAlertsByName(systemRecord.Id, "Status")
|
||||
if len(alerts) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
systemName := systemRecord.GetString("name")
|
||||
if newStatus == "down" {
|
||||
am.handleSystemDown(systemName, alerts)
|
||||
} else {
|
||||
am.handleSystemUp(systemName, alerts)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// handleSystemDown manages the logic when a system status changes to "down". It schedules pending alerts for each alert record.
|
||||
func (am *AlertManager) handleSystemDown(systemName string, alerts []CachedAlertData) {
|
||||
for _, alertData := range alerts {
|
||||
min := max(1, int(alertData.Min))
|
||||
am.schedulePendingStatusAlert(systemName, alertData, time.Duration(min)*time.Minute)
|
||||
}
|
||||
}
|
||||
|
||||
// schedulePendingStatusAlert sets up a timer to send a "down" alert after the specified delay if the system is still down.
|
||||
// It returns true if the alert was scheduled, or false if an alert was already pending for the given alert record.
|
||||
func (am *AlertManager) schedulePendingStatusAlert(systemName string, alertData CachedAlertData, delay time.Duration) bool {
|
||||
alert := &alertInfo{
|
||||
systemName: systemName,
|
||||
alertData: alertData,
|
||||
expireTime: time.Now().Add(delay),
|
||||
}
|
||||
|
||||
storedAlert, loaded := am.pendingAlerts.LoadOrStore(alertData.Id, alert)
|
||||
if loaded {
|
||||
return false
|
||||
}
|
||||
|
||||
stored := storedAlert.(*alertInfo)
|
||||
stored.timer = time.AfterFunc(time.Until(stored.expireTime), func() {
|
||||
am.processPendingAlert(alertData.Id)
|
||||
})
|
||||
return true
|
||||
}
|
||||
|
||||
// handleSystemUp manages the logic when a system status changes to "up".
|
||||
// It cancels any pending alerts and sends "up" alerts.
|
||||
func (am *AlertManager) handleSystemUp(systemName string, alerts []CachedAlertData) {
|
||||
for _, alertData := range alerts {
|
||||
// If alert exists for record, delete and continue (down alert not sent)
|
||||
if am.cancelPendingAlert(alertData.Id) {
|
||||
continue
|
||||
}
|
||||
if !alertData.Triggered {
|
||||
continue
|
||||
}
|
||||
if err := am.sendStatusAlert("up", systemName, alertData); err != nil {
|
||||
am.hub.Logger().Error("Failed to send alert", "err", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// cancelPendingAlert stops the timer and removes the pending alert for the given alert ID. Returns true if a pending alert was found and cancelled.
|
||||
func (am *AlertManager) cancelPendingAlert(alertID string) bool {
|
||||
value, loaded := am.pendingAlerts.LoadAndDelete(alertID)
|
||||
if !loaded {
|
||||
return false
|
||||
}
|
||||
|
||||
info := value.(*alertInfo)
|
||||
if info.timer != nil {
|
||||
info.timer.Stop()
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// CancelPendingStatusAlerts cancels all pending status alert timers for a given system.
|
||||
// This is called when a system is paused to prevent delayed alerts from firing.
|
||||
func (am *AlertManager) CancelPendingStatusAlerts(systemID string) {
|
||||
am.pendingAlerts.Range(func(key, value any) bool {
|
||||
info := value.(*alertInfo)
|
||||
if info.alertData.SystemID == systemID {
|
||||
am.cancelPendingAlert(key.(string))
|
||||
}
|
||||
return true
|
||||
})
|
||||
}
|
||||
|
||||
// processPendingAlert sends a "down" alert if the pending alert has expired and the system is still down.
|
||||
func (am *AlertManager) processPendingAlert(alertID string) {
|
||||
value, loaded := am.pendingAlerts.LoadAndDelete(alertID)
|
||||
if !loaded {
|
||||
return
|
||||
}
|
||||
|
||||
info := value.(*alertInfo)
|
||||
refreshedAlertData, ok := am.alertsCache.Refresh(info.alertData)
|
||||
if !ok || refreshedAlertData.Triggered {
|
||||
return
|
||||
}
|
||||
if err := am.sendStatusAlert("down", info.systemName, refreshedAlertData); err != nil {
|
||||
am.hub.Logger().Error("Failed to send alert", "err", err)
|
||||
}
|
||||
}
|
||||
|
||||
// sendStatusAlert sends a status alert ("up" or "down") to the users associated with the alert records.
|
||||
func (am *AlertManager) sendStatusAlert(alertStatus string, systemName string, alertData CachedAlertData) error {
|
||||
// Update trigger state for alert record before sending alert
|
||||
triggered := alertStatus == "down"
|
||||
if err := am.setAlertTriggered(alertData, triggered); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var emoji string
|
||||
if alertStatus == "up" {
|
||||
emoji = "\u2705" // Green checkmark emoji
|
||||
} else {
|
||||
emoji = "\U0001F534" // Red alert emoji
|
||||
}
|
||||
|
||||
title := fmt.Sprintf("Connection to %s is %s %v", systemName, alertStatus, emoji)
|
||||
message := strings.TrimSuffix(title, emoji)
|
||||
|
||||
// Get system ID for the link
|
||||
systemID := alertData.SystemID
|
||||
|
||||
return am.SendAlert(AlertMessageData{
|
||||
UserID: alertData.UserID,
|
||||
SystemID: systemID,
|
||||
Title: title,
|
||||
Message: message,
|
||||
Link: am.hub.MakeLink("system", systemID),
|
||||
LinkText: "View " + systemName,
|
||||
})
|
||||
}
|
||||
|
||||
// resolveStatusAlerts resolves any triggered status alerts that weren't resolved
|
||||
// when system came up (https://github.com/henrygd/beszel/issues/1052).
|
||||
func resolveStatusAlerts(app core.App) error {
|
||||
db := app.DB()
|
||||
// Find all active status alerts where the system is actually up
|
||||
var alertIds []string
|
||||
err := db.NewQuery(`
|
||||
SELECT a.id
|
||||
FROM alerts a
|
||||
JOIN systems s ON a.system = s.id
|
||||
WHERE a.name = 'Status'
|
||||
AND a.triggered = true
|
||||
AND s.status = 'up'
|
||||
`).Column(&alertIds)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// resolve all matching alert records
|
||||
for _, alertId := range alertIds {
|
||||
alert, err := app.FindRecordById("alerts", alertId)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
alert.Set("triggered", false)
|
||||
err = app.Save(alert)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// restorePendingStatusAlerts re-queues untriggered status alerts for systems that
|
||||
// are still down after a hub restart. This rebuilds the lost in-memory timer state.
|
||||
func (am *AlertManager) restorePendingStatusAlerts() error {
|
||||
type pendingStatusAlert struct {
|
||||
AlertID string `db:"alert_id"`
|
||||
SystemID string `db:"system_id"`
|
||||
SystemName string `db:"system_name"`
|
||||
}
|
||||
|
||||
var pending []pendingStatusAlert
|
||||
err := am.hub.DB().NewQuery(`
|
||||
SELECT a.id AS alert_id, a.system AS system_id, s.name AS system_name
|
||||
FROM alerts a
|
||||
JOIN systems s ON a.system = s.id
|
||||
WHERE a.name = 'Status'
|
||||
AND a.triggered = false
|
||||
AND s.status = 'down'
|
||||
`).All(&pending)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Make sure cache is populated before trying to restore pending alerts
|
||||
_ = am.alertsCache.PopulateFromDB(false)
|
||||
|
||||
for _, item := range pending {
|
||||
alertData, ok := am.alertsCache.GetAlert(item.SystemID, item.AlertID)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
min := max(1, int(alertData.Min))
|
||||
am.schedulePendingStatusAlert(item.SystemName, alertData, time.Duration(min)*time.Minute)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,359 @@
|
||||
package alerts
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel/internal/entities/system"
|
||||
|
||||
"github.com/pocketbase/dbx"
|
||||
"github.com/pocketbase/pocketbase/core"
|
||||
"github.com/pocketbase/pocketbase/tools/types"
|
||||
)
|
||||
|
||||
func (am *AlertManager) HandleSystemAlerts(systemRecord *core.Record, data *system.CombinedData) error {
|
||||
alerts := am.alertsCache.GetAlertsExcludingNames(systemRecord.Id, "Status")
|
||||
if len(alerts) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
var validAlerts []SystemAlertData
|
||||
now := systemRecord.GetDateTime("updated").Time().UTC()
|
||||
oldestTime := now
|
||||
|
||||
for _, alertData := range alerts {
|
||||
name := alertData.Name
|
||||
var val float64
|
||||
unit := "%"
|
||||
|
||||
switch name {
|
||||
case "CPU":
|
||||
val = data.Info.Cpu
|
||||
case "Memory":
|
||||
val = data.Info.MemPct
|
||||
case "Bandwidth":
|
||||
val = float64(data.Info.BandwidthBytes) / (1024 * 1024)
|
||||
unit = " MB/s"
|
||||
case "Disk":
|
||||
maxUsedPct := data.Info.DiskPct
|
||||
for _, fs := range data.Stats.ExtraFs {
|
||||
usedPct := fs.DiskUsed / fs.DiskTotal * 100
|
||||
if usedPct > maxUsedPct {
|
||||
maxUsedPct = usedPct
|
||||
}
|
||||
}
|
||||
val = maxUsedPct
|
||||
case "Temperature":
|
||||
if data.Info.DashboardTemp < 1 {
|
||||
continue
|
||||
}
|
||||
val = data.Info.DashboardTemp
|
||||
unit = "°C"
|
||||
case "LoadAvg1":
|
||||
val = data.Info.LoadAvg[0]
|
||||
unit = ""
|
||||
case "LoadAvg5":
|
||||
val = data.Info.LoadAvg[1]
|
||||
unit = ""
|
||||
case "LoadAvg15":
|
||||
val = data.Info.LoadAvg[2]
|
||||
unit = ""
|
||||
case "GPU":
|
||||
val = data.Info.GpuPct
|
||||
case "Battery":
|
||||
if data.Stats.Battery[0] == 0 {
|
||||
continue
|
||||
}
|
||||
val = float64(data.Stats.Battery[0])
|
||||
}
|
||||
|
||||
triggered := alertData.Triggered
|
||||
threshold := alertData.Value
|
||||
|
||||
// Battery alert has inverted logic: trigger when value is BELOW threshold
|
||||
lowAlert := isLowAlert(name)
|
||||
|
||||
// CONTINUE
|
||||
// For normal alerts: IF not triggered and curValue <= threshold, OR triggered and curValue > threshold
|
||||
// For low alerts (Battery): IF not triggered and curValue >= threshold, OR triggered and curValue < threshold
|
||||
if lowAlert {
|
||||
if (!triggered && val >= threshold) || (triggered && val < threshold) {
|
||||
continue
|
||||
}
|
||||
} else {
|
||||
if (!triggered && val <= threshold) || (triggered && val > threshold) {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
min := max(1, alertData.Min)
|
||||
|
||||
alert := SystemAlertData{
|
||||
systemRecord: systemRecord,
|
||||
alertData: alertData,
|
||||
name: name,
|
||||
unit: unit,
|
||||
val: val,
|
||||
threshold: threshold,
|
||||
triggered: triggered,
|
||||
min: min,
|
||||
}
|
||||
|
||||
// send alert immediately if min is 1 - no need to sum up values.
|
||||
if min == 1 {
|
||||
if lowAlert {
|
||||
alert.triggered = val < threshold
|
||||
} else {
|
||||
alert.triggered = val > threshold
|
||||
}
|
||||
go am.sendSystemAlert(alert)
|
||||
continue
|
||||
}
|
||||
|
||||
alert.time = now.Add(-time.Duration(min) * time.Minute)
|
||||
if alert.time.Before(oldestTime) {
|
||||
oldestTime = alert.time
|
||||
}
|
||||
|
||||
validAlerts = append(validAlerts, alert)
|
||||
}
|
||||
|
||||
systemStats := []struct {
|
||||
Stats []byte `db:"stats"`
|
||||
Created types.DateTime `db:"created"`
|
||||
}{}
|
||||
|
||||
err := am.hub.DB().
|
||||
Select("stats", "created").
|
||||
From("system_stats").
|
||||
Where(dbx.NewExp(
|
||||
"system={:system} AND type='1m' AND created > {:created}",
|
||||
dbx.Params{
|
||||
"system": systemRecord.Id,
|
||||
// subtract some time to give us a bit of buffer
|
||||
"created": oldestTime.Add(-time.Second * 90),
|
||||
},
|
||||
)).
|
||||
OrderBy("created").
|
||||
All(&systemStats)
|
||||
if err != nil || len(systemStats) == 0 {
|
||||
return err
|
||||
}
|
||||
|
||||
// get oldest record creation time from first record in the slice
|
||||
oldestRecordTime := systemStats[0].Created.Time()
|
||||
// log.Println("oldestRecordTime", oldestRecordTime.String())
|
||||
|
||||
// Filter validAlerts to keep only those with time newer than oldestRecord
|
||||
filteredAlerts := make([]SystemAlertData, 0, len(validAlerts))
|
||||
for _, alert := range validAlerts {
|
||||
if alert.time.After(oldestRecordTime) {
|
||||
filteredAlerts = append(filteredAlerts, alert)
|
||||
}
|
||||
}
|
||||
validAlerts = filteredAlerts
|
||||
|
||||
if len(validAlerts) == 0 {
|
||||
// log.Println("no valid alerts found")
|
||||
return nil
|
||||
}
|
||||
|
||||
var stats SystemAlertStats
|
||||
|
||||
// we can skip the latest systemStats record since it's the current value
|
||||
for i := range systemStats {
|
||||
stat := systemStats[i]
|
||||
// subtract 10 seconds to give a small time buffer
|
||||
systemStatsCreation := stat.Created.Time().Add(-time.Second * 10)
|
||||
if err := json.Unmarshal(stat.Stats, &stats); err != nil {
|
||||
return err
|
||||
}
|
||||
// log.Println("stats", stats)
|
||||
for j := range validAlerts {
|
||||
alert := &validAlerts[j]
|
||||
// reset alert val on first iteration
|
||||
if i == 0 {
|
||||
alert.val = 0
|
||||
}
|
||||
// continue if system_stats is older than alert time range
|
||||
if systemStatsCreation.Before(alert.time) {
|
||||
continue
|
||||
}
|
||||
// add to alert value
|
||||
switch alert.name {
|
||||
case "CPU":
|
||||
alert.val += stats.Cpu
|
||||
case "Memory":
|
||||
alert.val += stats.Mem
|
||||
case "Bandwidth":
|
||||
alert.val += float64(stats.Bandwidth[0]+stats.Bandwidth[1]) / (1024 * 1024)
|
||||
case "Disk":
|
||||
if alert.mapSums == nil {
|
||||
alert.mapSums = make(map[string]float32, len(stats.ExtraFs)+1)
|
||||
}
|
||||
// add root disk
|
||||
if _, ok := alert.mapSums["root"]; !ok {
|
||||
alert.mapSums["root"] = 0.0
|
||||
}
|
||||
alert.mapSums["root"] += float32(stats.Disk)
|
||||
// add extra disks from historical record
|
||||
for key, fs := range stats.ExtraFs {
|
||||
if fs.DiskTotal > 0 {
|
||||
if _, ok := alert.mapSums[key]; !ok {
|
||||
alert.mapSums[key] = 0.0
|
||||
}
|
||||
alert.mapSums[key] += float32(fs.DiskUsed / fs.DiskTotal * 100)
|
||||
}
|
||||
}
|
||||
case "Temperature":
|
||||
if alert.mapSums == nil {
|
||||
alert.mapSums = make(map[string]float32, len(stats.Temperatures))
|
||||
}
|
||||
for key, temp := range stats.Temperatures {
|
||||
if _, ok := alert.mapSums[key]; !ok {
|
||||
alert.mapSums[key] = float32(0)
|
||||
}
|
||||
alert.mapSums[key] += temp
|
||||
}
|
||||
case "LoadAvg1":
|
||||
alert.val += stats.LoadAvg[0]
|
||||
case "LoadAvg5":
|
||||
alert.val += stats.LoadAvg[1]
|
||||
case "LoadAvg15":
|
||||
alert.val += stats.LoadAvg[2]
|
||||
case "GPU":
|
||||
if len(stats.GPU) == 0 {
|
||||
continue
|
||||
}
|
||||
maxUsage := 0.0
|
||||
for _, gpu := range stats.GPU {
|
||||
if gpu.Usage > maxUsage {
|
||||
maxUsage = gpu.Usage
|
||||
}
|
||||
}
|
||||
alert.val += maxUsage
|
||||
case "Battery":
|
||||
alert.val += float64(stats.Battery[0])
|
||||
default:
|
||||
continue
|
||||
}
|
||||
alert.count++
|
||||
}
|
||||
}
|
||||
// sum up vals for each alert
|
||||
for _, alert := range validAlerts {
|
||||
switch alert.name {
|
||||
case "Disk":
|
||||
maxPct := float32(0)
|
||||
for key, value := range alert.mapSums {
|
||||
sumPct := float32(value)
|
||||
if sumPct > maxPct {
|
||||
maxPct = sumPct
|
||||
alert.descriptor = fmt.Sprintf("Usage of %s", key)
|
||||
}
|
||||
}
|
||||
alert.val = float64(maxPct / float32(alert.count))
|
||||
case "Temperature":
|
||||
maxTemp := float32(0)
|
||||
for key, value := range alert.mapSums {
|
||||
sumTemp := float32(value) / float32(alert.count)
|
||||
if sumTemp > maxTemp {
|
||||
maxTemp = sumTemp
|
||||
alert.descriptor = fmt.Sprintf("Highest sensor %s", key)
|
||||
}
|
||||
}
|
||||
alert.val = float64(maxTemp)
|
||||
default:
|
||||
alert.val = alert.val / float64(alert.count)
|
||||
}
|
||||
minCount := float32(alert.min) / 1.2
|
||||
// log.Println("alert", alert.name, "val", alert.val, "threshold", alert.threshold, "triggered", alert.triggered)
|
||||
// log.Printf("%s: val %f | count %d | min-count %f | threshold %f\n", alert.name, alert.val, alert.count, minCount, alert.threshold)
|
||||
// pass through alert if count is greater than or equal to minCount
|
||||
if float32(alert.count) >= minCount {
|
||||
// Battery alert has inverted logic: trigger when value is BELOW threshold
|
||||
lowAlert := isLowAlert(alert.name)
|
||||
if lowAlert {
|
||||
if !alert.triggered && alert.val < alert.threshold {
|
||||
alert.triggered = true
|
||||
go am.sendSystemAlert(alert)
|
||||
} else if alert.triggered && alert.val >= alert.threshold {
|
||||
alert.triggered = false
|
||||
go am.sendSystemAlert(alert)
|
||||
}
|
||||
} else {
|
||||
if !alert.triggered && alert.val > alert.threshold {
|
||||
alert.triggered = true
|
||||
go am.sendSystemAlert(alert)
|
||||
} else if alert.triggered && alert.val <= alert.threshold {
|
||||
alert.triggered = false
|
||||
go am.sendSystemAlert(alert)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (am *AlertManager) sendSystemAlert(alert SystemAlertData) {
|
||||
// log.Printf("Sending alert %s: val %f | count %d | threshold %f\n", alert.name, alert.val, alert.count, alert.threshold)
|
||||
systemName := alert.systemRecord.GetString("name")
|
||||
|
||||
// change Disk to Disk usage
|
||||
if alert.name == "Disk" {
|
||||
alert.name += " usage"
|
||||
}
|
||||
// format LoadAvg5 and LoadAvg15
|
||||
if after, ok := strings.CutPrefix(alert.name, "LoadAvg"); ok {
|
||||
alert.name = after + "m Load"
|
||||
}
|
||||
|
||||
// make title alert name lowercase if not CPU or GPU
|
||||
titleAlertName := alert.name
|
||||
if titleAlertName != "CPU" && titleAlertName != "GPU" {
|
||||
titleAlertName = strings.ToLower(titleAlertName)
|
||||
}
|
||||
|
||||
var subject string
|
||||
lowAlert := isLowAlert(alert.name)
|
||||
if alert.triggered {
|
||||
if lowAlert {
|
||||
subject = fmt.Sprintf("%s %s below threshold", systemName, titleAlertName)
|
||||
} else {
|
||||
subject = fmt.Sprintf("%s %s above threshold", systemName, titleAlertName)
|
||||
}
|
||||
} else {
|
||||
if lowAlert {
|
||||
subject = fmt.Sprintf("%s %s above threshold", systemName, titleAlertName)
|
||||
} else {
|
||||
subject = fmt.Sprintf("%s %s below threshold", systemName, titleAlertName)
|
||||
}
|
||||
}
|
||||
minutesLabel := "minute"
|
||||
if alert.min > 1 {
|
||||
minutesLabel += "s"
|
||||
}
|
||||
if alert.descriptor == "" {
|
||||
alert.descriptor = alert.name
|
||||
}
|
||||
body := fmt.Sprintf("%s averaged %.2f%s for the previous %v %s.", alert.descriptor, alert.val, alert.unit, alert.min, minutesLabel)
|
||||
|
||||
if err := am.setAlertTriggered(alert.alertData, alert.triggered); err != nil {
|
||||
// app.Logger().Error("failed to save alert record", "err", err)
|
||||
return
|
||||
}
|
||||
am.SendAlert(AlertMessageData{
|
||||
UserID: alert.alertData.UserID,
|
||||
SystemID: alert.systemRecord.Id,
|
||||
Title: subject,
|
||||
Message: body,
|
||||
Link: am.hub.MakeLink("system", alert.systemRecord.Id),
|
||||
LinkText: "View " + systemName,
|
||||
})
|
||||
}
|
||||
|
||||
func isLowAlert(name string) bool {
|
||||
return name == "Battery"
|
||||
}
|
||||
@@ -0,0 +1,218 @@
|
||||
//go:build testing
|
||||
|
||||
package alerts_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"testing/synctest"
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel/internal/entities/system"
|
||||
beszelTests "github.com/henrygd/beszel/internal/tests"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
type systemAlertValueSetter[T any] func(info *system.Info, stats *system.Stats, value T)
|
||||
|
||||
type systemAlertTestFixture struct {
|
||||
hub *beszelTests.TestHub
|
||||
alertID string
|
||||
submit func(*system.CombinedData) error
|
||||
}
|
||||
|
||||
func createCombinedData[T any](value T, setValue systemAlertValueSetter[T]) *system.CombinedData {
|
||||
var data system.CombinedData
|
||||
setValue(&data.Info, &data.Stats, value)
|
||||
return &data
|
||||
}
|
||||
|
||||
func newSystemAlertTestFixture(t *testing.T, alertName string, min int, threshold float64) *systemAlertTestFixture {
|
||||
t.Helper()
|
||||
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
|
||||
systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
|
||||
require.NoError(t, err)
|
||||
systemRecord := systems[0]
|
||||
|
||||
sysManagerSystem, err := hub.GetSystemManager().GetSystemFromStore(systemRecord.Id)
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, sysManagerSystem)
|
||||
sysManagerSystem.StopUpdater()
|
||||
|
||||
userSettings, err := hub.FindFirstRecordByFilter("user_settings", "user={:user}", map[string]any{"user": user.Id})
|
||||
require.NoError(t, err)
|
||||
userSettings.Set("settings", `{"emails":["test@example.com"],"webhooks":[]}`)
|
||||
require.NoError(t, hub.Save(userSettings))
|
||||
|
||||
alertRecord, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": alertName,
|
||||
"system": systemRecord.Id,
|
||||
"user": user.Id,
|
||||
"min": min,
|
||||
"value": threshold,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
assert.False(t, alertRecord.GetBool("triggered"), "Alert should not be triggered initially")
|
||||
|
||||
alertsCache := hub.GetAlertManager().GetSystemAlertsCache()
|
||||
cachedAlerts := alertsCache.GetAlertsExcludingNames(systemRecord.Id, "Status")
|
||||
assert.Len(t, cachedAlerts, 1, "Alert should be in cache")
|
||||
|
||||
return &systemAlertTestFixture{
|
||||
hub: hub,
|
||||
alertID: alertRecord.Id,
|
||||
submit: func(data *system.CombinedData) error {
|
||||
_, err := sysManagerSystem.CreateRecords(data)
|
||||
return err
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func (fixture *systemAlertTestFixture) cleanup() {
|
||||
fixture.hub.Cleanup()
|
||||
}
|
||||
|
||||
func submitValue[T any](fixture *systemAlertTestFixture, t *testing.T, value T, setValue systemAlertValueSetter[T]) {
|
||||
t.Helper()
|
||||
require.NoError(t, fixture.submit(createCombinedData(value, setValue)))
|
||||
}
|
||||
|
||||
func (fixture *systemAlertTestFixture) assertTriggered(t *testing.T, triggered bool, message string) {
|
||||
t.Helper()
|
||||
|
||||
alertRecord, err := fixture.hub.FindRecordById("alerts", fixture.alertID)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, triggered, alertRecord.GetBool("triggered"), message)
|
||||
}
|
||||
|
||||
func waitForSystemAlert(d time.Duration) {
|
||||
time.Sleep(d)
|
||||
synctest.Wait()
|
||||
}
|
||||
|
||||
func testOneMinuteSystemAlert[T any](t *testing.T, alertName string, threshold float64, setValue systemAlertValueSetter[T], triggerValue, resolveValue T) {
|
||||
t.Helper()
|
||||
|
||||
synctest.Test(t, func(t *testing.T) {
|
||||
fixture := newSystemAlertTestFixture(t, alertName, 1, threshold)
|
||||
defer fixture.cleanup()
|
||||
|
||||
submitValue(fixture, t, triggerValue, setValue)
|
||||
waitForSystemAlert(time.Second)
|
||||
|
||||
fixture.assertTriggered(t, true, "Alert should be triggered")
|
||||
assert.Equal(t, 1, fixture.hub.TestMailer.TotalSend(), "An email should have been sent")
|
||||
|
||||
submitValue(fixture, t, resolveValue, setValue)
|
||||
waitForSystemAlert(time.Second)
|
||||
|
||||
fixture.assertTriggered(t, false, "Alert should be untriggered")
|
||||
assert.Equal(t, 2, fixture.hub.TestMailer.TotalSend(), "A second email should have been sent for untriggering the alert")
|
||||
|
||||
waitForSystemAlert(time.Minute)
|
||||
})
|
||||
}
|
||||
|
||||
func testMultiMinuteSystemAlert[T any](t *testing.T, alertName string, threshold float64, min int, setValue systemAlertValueSetter[T], baselineValue, triggerValue, resolveValue T) {
|
||||
t.Helper()
|
||||
|
||||
synctest.Test(t, func(t *testing.T) {
|
||||
fixture := newSystemAlertTestFixture(t, alertName, min, threshold)
|
||||
defer fixture.cleanup()
|
||||
|
||||
submitValue(fixture, t, baselineValue, setValue)
|
||||
waitForSystemAlert(time.Minute + time.Second)
|
||||
fixture.assertTriggered(t, false, "Alert should not be triggered yet")
|
||||
|
||||
submitValue(fixture, t, triggerValue, setValue)
|
||||
waitForSystemAlert(time.Minute)
|
||||
fixture.assertTriggered(t, false, "Alert should not be triggered until the history window is full")
|
||||
|
||||
submitValue(fixture, t, triggerValue, setValue)
|
||||
waitForSystemAlert(time.Second)
|
||||
fixture.assertTriggered(t, true, "Alert should be triggered")
|
||||
assert.Equal(t, 1, fixture.hub.TestMailer.TotalSend(), "An email should have been sent")
|
||||
|
||||
submitValue(fixture, t, resolveValue, setValue)
|
||||
waitForSystemAlert(time.Second)
|
||||
fixture.assertTriggered(t, false, "Alert should be untriggered")
|
||||
assert.Equal(t, 2, fixture.hub.TestMailer.TotalSend(), "A second email should have been sent for untriggering the alert")
|
||||
})
|
||||
}
|
||||
|
||||
func setCPUAlertValue(info *system.Info, stats *system.Stats, value float64) {
|
||||
info.Cpu = value
|
||||
stats.Cpu = value
|
||||
}
|
||||
|
||||
func setMemoryAlertValue(info *system.Info, stats *system.Stats, value float64) {
|
||||
info.MemPct = value
|
||||
stats.MemPct = value
|
||||
}
|
||||
|
||||
func setDiskAlertValue(info *system.Info, stats *system.Stats, value float64) {
|
||||
info.DiskPct = value
|
||||
stats.DiskPct = value
|
||||
}
|
||||
|
||||
func setBandwidthAlertValue(info *system.Info, stats *system.Stats, value [2]uint64) {
|
||||
info.BandwidthBytes = value[0] + value[1]
|
||||
stats.Bandwidth = value
|
||||
}
|
||||
|
||||
func megabytesToBytes(mb uint64) uint64 {
|
||||
return mb * 1024 * 1024
|
||||
}
|
||||
|
||||
func setGPUAlertValue(info *system.Info, stats *system.Stats, value float64) {
|
||||
info.GpuPct = value
|
||||
stats.GPUData = map[string]system.GPUData{
|
||||
"GPU0": {Usage: value},
|
||||
}
|
||||
}
|
||||
|
||||
func setTemperatureAlertValue(info *system.Info, stats *system.Stats, value float64) {
|
||||
info.DashboardTemp = value
|
||||
stats.Temperatures = map[string]float64{
|
||||
"Temp0": value,
|
||||
}
|
||||
}
|
||||
|
||||
func setLoadAvgAlertValue(info *system.Info, stats *system.Stats, value [3]float64) {
|
||||
info.LoadAvg = value
|
||||
stats.LoadAvg = value
|
||||
}
|
||||
|
||||
func setBatteryAlertValue(info *system.Info, stats *system.Stats, value [2]uint8) {
|
||||
info.Battery = value
|
||||
stats.Battery = value
|
||||
}
|
||||
|
||||
func TestSystemAlertsOneMin(t *testing.T) {
|
||||
testOneMinuteSystemAlert(t, "CPU", 50, setCPUAlertValue, 51, 49)
|
||||
testOneMinuteSystemAlert(t, "Memory", 50, setMemoryAlertValue, 51, 49)
|
||||
testOneMinuteSystemAlert(t, "Disk", 50, setDiskAlertValue, 51, 49)
|
||||
testOneMinuteSystemAlert(t, "Bandwidth", 50, setBandwidthAlertValue, [2]uint64{megabytesToBytes(26), megabytesToBytes(25)}, [2]uint64{megabytesToBytes(25), megabytesToBytes(24)})
|
||||
testOneMinuteSystemAlert(t, "GPU", 50, setGPUAlertValue, 51, 49)
|
||||
testOneMinuteSystemAlert(t, "Temperature", 70, setTemperatureAlertValue, 71, 69)
|
||||
testOneMinuteSystemAlert(t, "LoadAvg1", 4, setLoadAvgAlertValue, [3]float64{4.1, 0, 0}, [3]float64{3.9, 0, 0})
|
||||
testOneMinuteSystemAlert(t, "LoadAvg5", 4, setLoadAvgAlertValue, [3]float64{0, 4.1, 0}, [3]float64{0, 3.9, 0})
|
||||
testOneMinuteSystemAlert(t, "LoadAvg15", 4, setLoadAvgAlertValue, [3]float64{0, 0, 4.1}, [3]float64{0, 0, 3.9})
|
||||
testOneMinuteSystemAlert(t, "Battery", 20, setBatteryAlertValue, [2]uint8{19, 0}, [2]uint8{21, 0})
|
||||
}
|
||||
|
||||
func TestSystemAlertsTwoMin(t *testing.T) {
|
||||
testMultiMinuteSystemAlert(t, "CPU", 50, 2, setCPUAlertValue, 10, 51, 48)
|
||||
testMultiMinuteSystemAlert(t, "Memory", 50, 2, setMemoryAlertValue, 10, 51, 48)
|
||||
testMultiMinuteSystemAlert(t, "Disk", 50, 2, setDiskAlertValue, 10, 51, 48)
|
||||
testMultiMinuteSystemAlert(t, "Bandwidth", 50, 2, setBandwidthAlertValue, [2]uint64{megabytesToBytes(10), megabytesToBytes(10)}, [2]uint64{megabytesToBytes(26), megabytesToBytes(25)}, [2]uint64{megabytesToBytes(10), megabytesToBytes(10)})
|
||||
testMultiMinuteSystemAlert(t, "GPU", 50, 2, setGPUAlertValue, 10, 51, 48)
|
||||
testMultiMinuteSystemAlert(t, "Temperature", 70, 2, setTemperatureAlertValue, 10, 71, 67)
|
||||
testMultiMinuteSystemAlert(t, "LoadAvg1", 4, 2, setLoadAvgAlertValue, [3]float64{0, 0, 0}, [3]float64{4.1, 0, 0}, [3]float64{3.5, 0, 0})
|
||||
testMultiMinuteSystemAlert(t, "LoadAvg5", 4, 2, setLoadAvgAlertValue, [3]float64{0, 2, 0}, [3]float64{0, 4.1, 0}, [3]float64{0, 3.5, 0})
|
||||
testMultiMinuteSystemAlert(t, "LoadAvg15", 4, 2, setLoadAvgAlertValue, [3]float64{0, 0, 2}, [3]float64{0, 0, 4.1}, [3]float64{0, 0, 3.5})
|
||||
testMultiMinuteSystemAlert(t, "Battery", 20, 2, setBatteryAlertValue, [2]uint8{21, 0}, [2]uint8{19, 0}, [2]uint8{25, 1})
|
||||
}
|
||||
@@ -0,0 +1,187 @@
|
||||
//go:build testing
|
||||
|
||||
package alerts_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"testing/synctest"
|
||||
"time"
|
||||
|
||||
beszelTests "github.com/henrygd/beszel/internal/tests"
|
||||
|
||||
"github.com/henrygd/beszel/internal/alerts"
|
||||
"github.com/pocketbase/dbx"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestAlertsHistory(t *testing.T) {
|
||||
synctest.Test(t, func(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
// Create systems and alerts
|
||||
systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
|
||||
assert.NoError(t, err)
|
||||
system := systems[0]
|
||||
|
||||
alert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "Status",
|
||||
"system": system.Id,
|
||||
"user": user.Id,
|
||||
"min": 1,
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Initially, no alert history records should exist
|
||||
initialHistoryCount, err := hub.CountRecords("alerts_history", nil)
|
||||
assert.NoError(t, err)
|
||||
assert.Zero(t, initialHistoryCount, "Should have 0 alert history records initially")
|
||||
|
||||
// Set system to up initially
|
||||
system.Set("status", "up")
|
||||
err = hub.SaveNoValidate(system)
|
||||
assert.NoError(t, err)
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
|
||||
// Set system to down to trigger alert
|
||||
system.Set("status", "down")
|
||||
err = hub.SaveNoValidate(system)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Wait for alert to trigger (after the downtime delay)
|
||||
// With 1 minute delay, we need to wait at least 1 minute + some buffer
|
||||
time.Sleep(time.Second * 75)
|
||||
|
||||
// Check that alert is triggered
|
||||
triggeredCount, err := hub.CountRecords("alerts", dbx.HashExp{"triggered": true, "id": alert.Id})
|
||||
assert.NoError(t, err)
|
||||
assert.EqualValues(t, 1, triggeredCount, "Alert should be triggered")
|
||||
|
||||
// Check that alert history record was created
|
||||
historyCount, err := hub.CountRecords("alerts_history", dbx.HashExp{"alert_id": alert.Id})
|
||||
assert.NoError(t, err)
|
||||
assert.EqualValues(t, 1, historyCount, "Should have 1 alert history record for triggered alert")
|
||||
|
||||
// Get the alert history record and verify it's not resolved immediately
|
||||
historyRecord, err := hub.FindFirstRecordByFilter("alerts_history", "alert_id={:alert_id}", dbx.Params{"alert_id": alert.Id})
|
||||
assert.NoError(t, err)
|
||||
assert.NotNil(t, historyRecord, "Alert history record should exist")
|
||||
assert.Equal(t, alert.Id, historyRecord.GetString("alert_id"), "Alert history should reference correct alert")
|
||||
assert.Equal(t, system.Id, historyRecord.GetString("system"), "Alert history should reference correct system")
|
||||
assert.Equal(t, "Status", historyRecord.GetString("name"), "Alert history should have correct name")
|
||||
|
||||
// The alert history might be resolved immediately in some cases, so let's check the alert's triggered status
|
||||
alertRecord, err := hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alert.Id})
|
||||
assert.NoError(t, err)
|
||||
assert.True(t, alertRecord.GetBool("triggered"), "Alert should still be triggered when checking history")
|
||||
|
||||
// Now resolve the alert by setting system back to up
|
||||
system.Set("status", "up")
|
||||
err = hub.SaveNoValidate(system)
|
||||
assert.NoError(t, err)
|
||||
time.Sleep(200 * time.Millisecond)
|
||||
|
||||
// Check that alert is no longer triggered
|
||||
triggeredCount, err = hub.CountRecords("alerts", dbx.HashExp{"triggered": true, "id": alert.Id})
|
||||
assert.NoError(t, err)
|
||||
assert.Zero(t, triggeredCount, "Alert should not be triggered after system is back up")
|
||||
|
||||
// Check that alert history record is now resolved
|
||||
historyRecord, err = hub.FindFirstRecordByFilter("alerts_history", "alert_id={:alert_id}", dbx.Params{"alert_id": alert.Id})
|
||||
assert.NoError(t, err)
|
||||
assert.NotNil(t, historyRecord, "Alert history record should still exist")
|
||||
assert.NotNil(t, historyRecord.Get("resolved"), "Alert history should be resolved")
|
||||
|
||||
// Test deleting a triggered alert resolves its history
|
||||
// Create another system and alert
|
||||
systems2, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
|
||||
assert.NoError(t, err)
|
||||
system2 := systems2[0]
|
||||
system2.Set("name", "test-system-2") // Rename for clarity
|
||||
err = hub.SaveNoValidate(system2)
|
||||
assert.NoError(t, err)
|
||||
|
||||
alert2, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "Status",
|
||||
"system": system2.Id,
|
||||
"user": user.Id,
|
||||
"min": 1,
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Set system2 to down to trigger alert
|
||||
system2.Set("status", "down")
|
||||
err = hub.SaveNoValidate(system2)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Wait for alert to trigger
|
||||
time.Sleep(time.Second * 75)
|
||||
|
||||
// Verify alert is triggered and history record exists
|
||||
triggeredCount, err = hub.CountRecords("alerts", dbx.HashExp{"triggered": true, "id": alert2.Id})
|
||||
assert.NoError(t, err)
|
||||
assert.EqualValues(t, 1, triggeredCount, "Second alert should be triggered")
|
||||
|
||||
historyCount, err = hub.CountRecords("alerts_history", dbx.HashExp{"alert_id": alert2.Id})
|
||||
assert.NoError(t, err)
|
||||
assert.EqualValues(t, 1, historyCount, "Should have 1 alert history record for second alert")
|
||||
|
||||
// Delete the triggered alert
|
||||
err = hub.Delete(alert2)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Check that alert history record is resolved after deletion
|
||||
historyRecord2, err := hub.FindFirstRecordByFilter("alerts_history", "alert_id={:alert_id}", dbx.Params{"alert_id": alert2.Id})
|
||||
assert.NoError(t, err)
|
||||
assert.NotNil(t, historyRecord2, "Alert history record should still exist after alert deletion")
|
||||
assert.NotNil(t, historyRecord2.Get("resolved"), "Alert history should be resolved after alert deletion")
|
||||
|
||||
// Verify total history count is correct (2 records total)
|
||||
totalHistoryCount, err := hub.CountRecords("alerts_history", nil)
|
||||
assert.NoError(t, err)
|
||||
assert.EqualValues(t, 2, totalHistoryCount, "Should have 2 total alert history records")
|
||||
})
|
||||
}
|
||||
|
||||
func TestSetAlertTriggered(t *testing.T) {
|
||||
hub, _ := beszelTests.NewTestHub(t.TempDir())
|
||||
defer hub.Cleanup()
|
||||
|
||||
hub.StartHub()
|
||||
|
||||
user, _ := beszelTests.CreateUser(hub, "test@example.com", "password")
|
||||
system, _ := beszelTests.CreateRecord(hub, "systems", map[string]any{
|
||||
"name": "test-system",
|
||||
"users": []string{user.Id},
|
||||
"host": "127.0.0.1",
|
||||
})
|
||||
|
||||
alertRecord, _ := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "CPU",
|
||||
"system": system.Id,
|
||||
"user": user.Id,
|
||||
"value": 80,
|
||||
"triggered": false,
|
||||
})
|
||||
|
||||
am := alerts.NewAlertManager(hub)
|
||||
|
||||
var alert alerts.CachedAlertData
|
||||
alert.PopulateFromRecord(alertRecord)
|
||||
|
||||
// Test triggering the alert
|
||||
err := am.SetAlertTriggered(alert, true)
|
||||
assert.NoError(t, err)
|
||||
|
||||
updatedRecord, err := hub.FindRecordById("alerts", alert.Id)
|
||||
assert.NoError(t, err)
|
||||
assert.True(t, updatedRecord.GetBool("triggered"))
|
||||
|
||||
// Test un-triggering the alert
|
||||
err = am.SetAlertTriggered(alert, false)
|
||||
assert.NoError(t, err)
|
||||
|
||||
updatedRecord, err = hub.FindRecordById("alerts", alert.Id)
|
||||
assert.NoError(t, err)
|
||||
assert.False(t, updatedRecord.GetBool("triggered"))
|
||||
}
|
||||
@@ -0,0 +1,101 @@
|
||||
//go:build testing
|
||||
|
||||
package alerts
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/pocketbase/pocketbase/core"
|
||||
)
|
||||
|
||||
func NewTestAlertManagerWithoutWorker(app hubLike) *AlertManager {
|
||||
return &AlertManager{
|
||||
hub: app,
|
||||
alertsCache: NewAlertsCache(app),
|
||||
}
|
||||
}
|
||||
|
||||
// GetSystemAlertsCache returns the internal system alerts cache.
|
||||
func (am *AlertManager) GetSystemAlertsCache() *AlertsCache {
|
||||
return am.alertsCache
|
||||
}
|
||||
|
||||
func (am *AlertManager) GetAlertManager() *AlertManager {
|
||||
return am
|
||||
}
|
||||
|
||||
func (am *AlertManager) GetPendingAlerts() *sync.Map {
|
||||
return &am.pendingAlerts
|
||||
}
|
||||
|
||||
func (am *AlertManager) GetPendingAlertsCount() int {
|
||||
count := 0
|
||||
am.pendingAlerts.Range(func(key, value any) bool {
|
||||
count++
|
||||
return true
|
||||
})
|
||||
return count
|
||||
}
|
||||
|
||||
// ProcessPendingAlerts manually processes all expired alerts (for testing)
|
||||
func (am *AlertManager) ProcessPendingAlerts() ([]CachedAlertData, error) {
|
||||
now := time.Now()
|
||||
var lastErr error
|
||||
var processedAlerts []CachedAlertData
|
||||
am.pendingAlerts.Range(func(key, value any) bool {
|
||||
info := value.(*alertInfo)
|
||||
if now.After(info.expireTime) {
|
||||
if info.timer != nil {
|
||||
info.timer.Stop()
|
||||
}
|
||||
am.processPendingAlert(key.(string))
|
||||
processedAlerts = append(processedAlerts, info.alertData)
|
||||
}
|
||||
return true
|
||||
})
|
||||
return processedAlerts, lastErr
|
||||
}
|
||||
|
||||
// ForceExpirePendingAlerts sets all pending alerts to expire immediately (for testing)
|
||||
func (am *AlertManager) ForceExpirePendingAlerts() {
|
||||
now := time.Now()
|
||||
am.pendingAlerts.Range(func(key, value any) bool {
|
||||
info := value.(*alertInfo)
|
||||
info.expireTime = now.Add(-time.Second) // Set to 1 second ago
|
||||
return true
|
||||
})
|
||||
}
|
||||
|
||||
func (am *AlertManager) ResetPendingAlertTimer(alertID string, delay time.Duration) bool {
|
||||
value, loaded := am.pendingAlerts.Load(alertID)
|
||||
if !loaded {
|
||||
return false
|
||||
}
|
||||
|
||||
info := value.(*alertInfo)
|
||||
if info.timer != nil {
|
||||
info.timer.Stop()
|
||||
}
|
||||
info.expireTime = time.Now().Add(delay)
|
||||
info.timer = time.AfterFunc(delay, func() {
|
||||
am.processPendingAlert(alertID)
|
||||
})
|
||||
return true
|
||||
}
|
||||
|
||||
func ResolveStatusAlerts(app core.App) error {
|
||||
return resolveStatusAlerts(app)
|
||||
}
|
||||
|
||||
func (am *AlertManager) RestorePendingStatusAlerts() error {
|
||||
return am.restorePendingStatusAlerts()
|
||||
}
|
||||
|
||||
func (am *AlertManager) SetAlertTriggered(alert CachedAlertData, triggered bool) error {
|
||||
return am.setAlertTriggered(alert, triggered)
|
||||
}
|
||||
|
||||
func IsInternalURL(rawURL string) (bool, error) {
|
||||
return isInternalURL(rawURL)
|
||||
}
|
||||
Reference in New Issue
Block a user