Files
Containr/app/backend/internal/scaling/autoscaler.go
T
2026-04-10 12:02:36 +02:00

884 lines
25 KiB
Go

package scaling
import (
"context"
"fmt"
"log"
"math"
"sync"
"time"
"containr/internal/deployment"
"containr/internal/metrics"
)
// AutoScaler manages automatic scaling of services
type AutoScaler struct {
scheduler *deployment.Scheduler
metricsCollector *metrics.MetricsCollector
policies map[string]*ScalingPolicy
services map[string]*ServiceScalingState
servicePlacements map[string]map[string]string // service_id -> instance_id -> node_id
replicaSequence map[string]int64
events []ScaleEvent
mu sync.RWMutex
checkInterval time.Duration
cooldownPeriod time.Duration
maxStoredEvents int
enabled bool
}
// ScalingPolicy defines how a service should scale
type ScalingPolicy struct {
ServiceID string `json:"service_id"`
MinReplicas int `json:"min_replicas"`
MaxReplicas int `json:"max_replicas"`
TargetCPU float64 `json:"target_cpu"` // Target CPU utilization percentage
TargetMemory float64 `json:"target_memory"` // Target memory utilization percentage
ScaleUpCooldown time.Duration `json:"scale_up_cooldown"`
ScaleDownCooldown time.Duration `json:"scale_down_cooldown"`
ScaleUpStep int `json:"scale_up_step"` // How many replicas to add when scaling up
ScaleDownStep int `json:"scale_down_step"` // How many replicas to remove when scaling down
Metrics []string `json:"metrics"` // Which metrics to consider
Thresholds map[string]float64 `json:"thresholds"` // Custom thresholds for metrics
Enabled bool `json:"enabled"`
CostOptimization *CostOptimization `json:"cost_optimization"`
}
// CostOptimization defines cost-related scaling parameters
type CostOptimization struct {
MaxCostPerHour float64 `json:"max_cost_per_hour"`
PreferEfficiency bool `json:"prefer_efficiency"`
IdleTimeout time.Duration `json:"idle_timeout"`
}
// ServiceScalingState tracks the current scaling state of a service
type ServiceScalingState struct {
ServiceID string
CurrentReplicas int
DesiredReplicas int
LastScaleAction time.Time
LastScaleDirection string // "up" or "down"
ScaleUpCooldown time.Time
ScaleDownCooldown time.Time
MetricsHistory []MetricsSnapshot
Policy *ScalingPolicy
}
// MetricsSnapshot captures metrics at a point in time
type MetricsSnapshot struct {
Timestamp time.Time
CPU float64
Memory float64
Requests float64
Errors float64
}
// ScaleEvent represents a scaling action
type ScaleEvent struct {
ServiceID string `json:"service_id"`
Action string `json:"action"` // "scale_up" or "scale_down"
FromReplicas int `json:"from_replicas"`
ToReplicas int `json:"to_replicas"`
Reason string `json:"reason"`
Timestamp time.Time `json:"timestamp"`
Metrics map[string]float64 `json:"metrics"`
CostImpact float64 `json:"cost_impact"`
}
// ScalingDecision contains the decision made by the autoscaler
type ScalingDecision struct {
ShouldScale bool `json:"should_scale"`
Action string `json:"action"`
CurrentReplicas int `json:"current_replicas"`
DesiredReplicas int `json:"desired_replicas"`
Reason string `json:"reason"`
Metrics map[string]float64 `json:"metrics"`
CostEstimate float64 `json:"cost_estimate"`
}
// NewAutoScaler creates a new auto-scaler
func NewAutoScaler(scheduler *deployment.Scheduler, metricsCollector *metrics.MetricsCollector) *AutoScaler {
return &AutoScaler{
scheduler: scheduler,
metricsCollector: metricsCollector,
policies: make(map[string]*ScalingPolicy),
services: make(map[string]*ServiceScalingState),
servicePlacements: make(map[string]map[string]string),
replicaSequence: make(map[string]int64),
events: make([]ScaleEvent, 0, 200),
checkInterval: 30 * time.Second,
cooldownPeriod: 5 * time.Minute,
maxStoredEvents: 200,
enabled: true,
}
}
// Start begins the auto-scaling process
func (as *AutoScaler) Start(ctx context.Context) error {
ticker := time.NewTicker(as.checkInterval)
defer ticker.Stop()
log.Printf("AutoScaler started with check interval: %v", as.checkInterval)
for {
select {
case <-ctx.Done():
return ctx.Err()
case <-ticker.C:
if as.enabled {
if err := as.checkAndScale(ctx); err != nil {
log.Printf("Error during auto-scaling check: %v", err)
}
}
}
}
}
// checkAndScale evaluates all services and scales if necessary
func (as *AutoScaler) checkAndScale(ctx context.Context) error {
as.mu.RLock()
servicesToCheck := make([]*ServiceScalingState, 0, len(as.services))
for _, state := range as.services {
if state.Policy != nil && state.Policy.Enabled {
servicesToCheck = append(servicesToCheck, state)
}
}
as.mu.RUnlock()
for _, state := range servicesToCheck {
decision, err := as.evaluateScaling(ctx, state)
if err != nil {
log.Printf("Error evaluating scaling for service %s: %v", state.ServiceID, err)
continue
}
if decision.ShouldScale {
if err := as.executeScaling(ctx, state, decision); err != nil {
log.Printf("Error executing scaling for service %s: %v", state.ServiceID, err)
}
}
}
return nil
}
// evaluateScaling determines if a service needs to scale
func (as *AutoScaler) evaluateScaling(ctx context.Context, state *ServiceScalingState) (*ScalingDecision, error) {
policy := state.Policy
now := time.Now()
// Check cooldowns
if now.Before(state.ScaleUpCooldown) && now.Before(state.ScaleDownCooldown) {
return &ScalingDecision{
ShouldScale: false,
CurrentReplicas: state.CurrentReplicas,
DesiredReplicas: state.CurrentReplicas,
Reason: "In cooldown period",
}, nil
}
// Get current metrics
metrics, err := as.getServiceMetrics(ctx, state.ServiceID)
if err != nil {
return nil, fmt.Errorf("failed to get service metrics: %w", err)
}
// Calculate desired replicas based on metrics
desiredReplicas := as.calculateDesiredReplicas(state, metrics, policy)
// Ensure within bounds
if desiredReplicas < policy.MinReplicas {
desiredReplicas = policy.MinReplicas
}
if desiredReplicas > policy.MaxReplicas {
desiredReplicas = policy.MaxReplicas
}
// Check if scaling is needed
if desiredReplicas == state.CurrentReplicas {
return &ScalingDecision{
ShouldScale: false,
CurrentReplicas: state.CurrentReplicas,
DesiredReplicas: desiredReplicas,
Reason: "No scaling needed",
Metrics: metrics,
}, nil
}
// Determine action and check cooldowns
action := "scale_down"
if desiredReplicas > state.CurrentReplicas {
action = "scale_up"
if now.Before(state.ScaleUpCooldown) {
return &ScalingDecision{
ShouldScale: false,
CurrentReplicas: state.CurrentReplicas,
DesiredReplicas: desiredReplicas,
Reason: "Scale up cooldown active",
Metrics: metrics,
}, nil
}
} else {
if now.Before(state.ScaleDownCooldown) {
return &ScalingDecision{
ShouldScale: false,
CurrentReplicas: state.CurrentReplicas,
DesiredReplicas: desiredReplicas,
Reason: "Scale down cooldown active",
Metrics: metrics,
}, nil
}
}
// Apply scaling steps
if action == "scale_up" {
maxStep := policy.ScaleUpStep
if maxStep <= 0 {
maxStep = 1
}
if desiredReplicas-state.CurrentReplicas > maxStep {
desiredReplicas = state.CurrentReplicas + maxStep
}
} else {
maxStep := policy.ScaleDownStep
if maxStep <= 0 {
maxStep = 1
}
if state.CurrentReplicas-desiredReplicas > maxStep {
desiredReplicas = state.CurrentReplicas - maxStep
}
}
// Cost optimization check
if policy.CostOptimization != nil {
costEstimate := as.estimateScalingCost(state, desiredReplicas)
if costEstimate > policy.CostOptimization.MaxCostPerHour {
return &ScalingDecision{
ShouldScale: false,
CurrentReplicas: state.CurrentReplicas,
DesiredReplicas: state.CurrentReplicas,
Reason: fmt.Sprintf("Cost estimate %.2f exceeds maximum %.2f", costEstimate, policy.CostOptimization.MaxCostPerHour),
Metrics: metrics,
CostEstimate: costEstimate,
}, nil
}
}
reason := as.generateScalingReason(state, metrics, desiredReplicas)
return &ScalingDecision{
ShouldScale: true,
Action: action,
CurrentReplicas: state.CurrentReplicas,
DesiredReplicas: desiredReplicas,
Reason: reason,
Metrics: metrics,
CostEstimate: as.estimateScalingCost(state, desiredReplicas),
}, nil
}
// calculateDesiredReplicas calculates the desired number of replicas based on metrics
func (as *AutoScaler) calculateDesiredReplicas(state *ServiceScalingState, metrics map[string]float64, policy *ScalingPolicy) int {
currentReplicas := state.CurrentReplicas
desiredReplicas := currentReplicas
// CPU-based scaling
if cpuUsage, ok := metrics["cpu"]; ok && policy.TargetCPU > 0 {
cpuRatio := cpuUsage / policy.TargetCPU
if cpuRatio > 1.2 { // Scale up if CPU is 20% above target
desiredReplicas = int(math.Ceil(float64(currentReplicas) * cpuRatio))
} else if cpuRatio < 0.8 { // Scale down if CPU is 20% below target
desiredReplicas = int(math.Floor(float64(currentReplicas) * cpuRatio))
}
}
// Memory-based scaling
if memoryUsage, ok := metrics["memory"]; ok && policy.TargetMemory > 0 {
memoryRatio := memoryUsage / policy.TargetMemory
if memoryRatio > 1.2 {
memDesired := int(math.Ceil(float64(currentReplicas) * memoryRatio))
if memDesired > desiredReplicas {
desiredReplicas = memDesired
}
} else if memoryUsage < 0.8 {
memDesired := int(math.Floor(float64(currentReplicas) * memoryRatio))
if memDesired < desiredReplicas {
desiredReplicas = memDesired
}
}
}
// Request rate scaling
if requestRate, ok := metrics["requests_per_second"]; ok {
// Simple heuristic: scale based on request rate per replica
// Assume each replica can handle ~100 requests per second
requestsPerReplica := 100.0
requestDesired := int(math.Ceil(requestRate / requestsPerReplica))
if requestDesired > desiredReplicas {
desiredReplicas = requestDesired
}
}
// Error rate scaling (scale up if error rate is high)
if errorRate, ok := metrics["error_rate"]; ok && errorRate > 0.05 { // 5% error rate
errorDesired := currentReplicas + 1
if errorDesired > desiredReplicas {
desiredReplicas = errorDesired
}
}
return desiredReplicas
}
// getServiceMetrics gets current metrics for a service
func (as *AutoScaler) getServiceMetrics(ctx context.Context, serviceID string) (map[string]float64, error) {
// Get service metrics from the metrics collector
serviceMetrics, err := as.metricsCollector.GetServiceMetrics(serviceID)
if err != nil {
// If no metrics available, return empty map
return make(map[string]float64), nil
}
metrics := make(map[string]float64)
// Calculate average metrics across instances
if len(serviceMetrics.Instances) > 0 {
var totalCPU, totalMemory, totalRequests float64
var totalErrors int64
for _, instance := range serviceMetrics.Instances {
totalCPU += instance.CPU
totalMemory += float64(instance.Memory)
totalRequests += serviceMetrics.Requests.Throughput
totalErrors += serviceMetrics.Errors.Total
}
instanceCount := float64(len(serviceMetrics.Instances))
metrics["cpu"] = totalCPU / instanceCount
metrics["memory"] = totalMemory / instanceCount / (1024 * 1024 * 1024) // Convert to GB
metrics["requests_per_second"] = totalRequests
if serviceMetrics.Requests.Total > 0 {
metrics["error_rate"] = float64(totalErrors) / float64(serviceMetrics.Requests.Total)
} else {
metrics["error_rate"] = 0
}
}
return metrics, nil
}
// executeScaling performs the actual scaling action
func (as *AutoScaler) executeScaling(ctx context.Context, state *ServiceScalingState, decision *ScalingDecision) error {
serviceID := state.ServiceID
fromReplicas := state.CurrentReplicas
toReplicas := decision.DesiredReplicas
log.Printf("Executing scaling for service %s: %d -> %d replicas (%s)",
serviceID, fromReplicas, toReplicas, decision.Reason)
if err := as.scaleServiceOnScheduler(ctx, serviceID, toReplicas); err != nil {
return fmt.Errorf("failed to apply scheduler scaling plan: %w", err)
}
// Update state
as.mu.Lock()
state.CurrentReplicas = toReplicas
state.DesiredReplicas = toReplicas
state.LastScaleAction = time.Now()
state.LastScaleDirection = decision.Action
// Set cooldowns
if decision.Action == "scale_up" {
state.ScaleUpCooldown = time.Now().Add(state.Policy.ScaleUpCooldown)
} else {
state.ScaleDownCooldown = time.Now().Add(state.Policy.ScaleDownCooldown)
}
as.mu.Unlock()
// Record the scaling event
event := &ScaleEvent{
ServiceID: serviceID,
Action: decision.Action,
FromReplicas: fromReplicas,
ToReplicas: toReplicas,
Reason: decision.Reason,
Timestamp: time.Now(),
Metrics: decision.Metrics,
CostImpact: decision.CostEstimate,
}
as.recordScaleEvent(*event)
log.Printf("Scaling event: %+v", event)
return nil
}
// ManualScale performs an immediate, manual scaling action for a service.
func (as *AutoScaler) ManualScale(ctx context.Context, serviceID string, replicas int, reason string) (*ScaleEvent, error) {
_ = ctx
if replicas < 1 {
return nil, fmt.Errorf("replicas must be at least 1")
}
as.mu.Lock()
state, exists := as.services[serviceID]
if !exists {
as.mu.Unlock()
return nil, fmt.Errorf("no scaling state found for service: %s", serviceID)
}
if state.Policy != nil {
if replicas < state.Policy.MinReplicas || replicas > state.Policy.MaxReplicas {
as.mu.Unlock()
return nil, fmt.Errorf(
"requested replicas %d outside policy bounds [%d, %d]",
replicas,
state.Policy.MinReplicas,
state.Policy.MaxReplicas,
)
}
}
fromReplicas := state.CurrentReplicas
if reason == "" {
reason = "manual scaling request"
}
action := "manual_scale_noop"
lastDirection := state.LastScaleDirection
switch {
case replicas > fromReplicas:
action = "manual_scale_up"
lastDirection = "scale_up"
case replicas < fromReplicas:
action = "manual_scale_down"
lastDirection = "scale_down"
}
now := time.Now()
state.CurrentReplicas = replicas
state.DesiredReplicas = replicas
state.LastScaleAction = now
state.LastScaleDirection = lastDirection
as.mu.Unlock()
if err := as.scaleServiceOnScheduler(ctx, serviceID, replicas); err != nil {
return nil, fmt.Errorf("manual scale applied in state but failed on scheduler: %w", err)
}
event := ScaleEvent{
ServiceID: serviceID,
Action: action,
FromReplicas: fromReplicas,
ToReplicas: replicas,
Reason: reason,
Timestamp: now,
Metrics: map[string]float64{
"requested_replicas": float64(replicas),
},
CostImpact: as.estimateServiceCost(serviceID, replicas),
}
as.recordScaleEvent(event)
result := event
return &result, nil
}
// GetScalingEvents returns recent scale events across services, newest first.
func (as *AutoScaler) GetScalingEvents(limit int) []ScaleEvent {
as.mu.RLock()
defer as.mu.RUnlock()
if limit <= 0 || limit > len(as.events) {
limit = len(as.events)
}
result := make([]ScaleEvent, 0, limit)
for i := len(as.events) - 1; i >= 0 && len(result) < limit; i-- {
result = append(result, cloneScaleEvent(as.events[i]))
}
return result
}
// GetServiceScalingHistory returns recent scale events for a service, newest first.
func (as *AutoScaler) GetServiceScalingHistory(serviceID string, limit int) []ScaleEvent {
as.mu.RLock()
defer as.mu.RUnlock()
if limit <= 0 {
limit = len(as.events)
}
result := make([]ScaleEvent, 0, limit)
for i := len(as.events) - 1; i >= 0 && len(result) < limit; i-- {
if as.events[i].ServiceID == serviceID {
result = append(result, cloneScaleEvent(as.events[i]))
}
}
return result
}
func (as *AutoScaler) estimateServiceCost(serviceID string, replicas int) float64 {
as.mu.RLock()
state := as.services[serviceID]
as.mu.RUnlock()
if state == nil {
return float64(replicas) * 0.01
}
return as.estimateScalingCost(state, replicas)
}
func (as *AutoScaler) recordScaleEvent(event ScaleEvent) {
as.mu.Lock()
defer as.mu.Unlock()
as.events = append(as.events, event)
if as.maxStoredEvents > 0 && len(as.events) > as.maxStoredEvents {
start := len(as.events) - as.maxStoredEvents
as.events = as.events[start:]
}
}
func cloneScaleEvent(event ScaleEvent) ScaleEvent {
clone := event
if event.Metrics == nil {
return clone
}
clone.Metrics = make(map[string]float64, len(event.Metrics))
for key, value := range event.Metrics {
clone.Metrics[key] = value
}
return clone
}
// generateScalingReason creates a human-readable reason for scaling
func (as *AutoScaler) generateScalingReason(state *ServiceScalingState, metrics map[string]float64, desiredReplicas int) string {
var reasons []string
if cpuUsage, ok := metrics["cpu"]; ok {
if cpuUsage > state.Policy.TargetCPU*1.2 {
reasons = append(reasons, fmt.Sprintf("CPU usage %.1f%% above target %.1f%%", cpuUsage, state.Policy.TargetCPU))
} else if cpuUsage < state.Policy.TargetCPU*0.8 {
reasons = append(reasons, fmt.Sprintf("CPU usage %.1f%% below target %.1f%%", cpuUsage, state.Policy.TargetCPU))
}
}
if memoryUsage, ok := metrics["memory"]; ok && state.Policy.TargetMemory > 0 {
if memoryUsage > state.Policy.TargetMemory*1.2 {
reasons = append(reasons, fmt.Sprintf("Memory usage %.1fGB above target %.1fGB", memoryUsage, state.Policy.TargetMemory))
}
}
if requestRate, ok := metrics["requests_per_second"]; ok {
reasons = append(reasons, fmt.Sprintf("Request rate %.0f/s requires %d replicas", requestRate, desiredReplicas))
}
if len(reasons) == 0 {
return "Automatic scaling based on metrics"
}
return fmt.Sprintf("Scale %s: %v", state.LastScaleDirection, reasons)
}
// estimateScalingCost estimates the cost impact of scaling
func (as *AutoScaler) estimateScalingCost(state *ServiceScalingState, replicas int) float64 {
if replicas <= 0 {
return 0
}
// Basic resource-informed model in USD/hour.
baseReplicaCost := 0.01
cpuComponent := math.Max(0, state.Policy.TargetCPU) / 100.0 * 0.02
memoryComponent := math.Max(0, state.Policy.TargetMemory) * 0.005
perReplica := baseReplicaCost + cpuComponent + memoryComponent
total := float64(replicas) * perReplica
if state.Policy.CostOptimization != nil && state.Policy.CostOptimization.MaxCostPerHour > 0 {
maxCost := state.Policy.CostOptimization.MaxCostPerHour
if total > maxCost {
return maxCost
}
}
return total
}
func (as *AutoScaler) scaleServiceOnScheduler(ctx context.Context, serviceID string, targetReplicas int) error {
if targetReplicas < 0 {
return fmt.Errorf("target replicas cannot be negative")
}
if as.scheduler == nil {
return fmt.Errorf("scheduler not initialized")
}
select {
case <-ctx.Done():
return ctx.Err()
default:
}
as.mu.Lock()
if _, exists := as.servicePlacements[serviceID]; !exists {
as.servicePlacements[serviceID] = make(map[string]string)
}
placement := make(map[string]string, len(as.servicePlacements[serviceID]))
for instanceID, nodeID := range as.servicePlacements[serviceID] {
placement[instanceID] = nodeID
}
as.mu.Unlock()
currentReplicas := len(placement)
if currentReplicas == targetReplicas {
return nil
}
if currentReplicas < targetReplicas {
addCount := targetReplicas - currentReplicas
for i := 0; i < addCount; i++ {
node, err := as.pickNodeForReplica()
if err != nil {
return err
}
instanceID := as.nextReplicaInstanceID(serviceID)
if err := as.addReplicaToNode(node, instanceID); err != nil {
return err
}
placement[instanceID] = node.ID
}
} else {
removeCount := currentReplicas - targetReplicas
removed := 0
for instanceID, nodeID := range placement {
if removed >= removeCount {
break
}
if err := as.removeReplicaFromNode(nodeID, instanceID); err != nil {
return err
}
delete(placement, instanceID)
removed++
}
}
as.mu.Lock()
as.servicePlacements[serviceID] = placement
as.mu.Unlock()
return nil
}
func (as *AutoScaler) pickNodeForReplica() (*deployment.Node, error) {
nodes := as.scheduler.GetReadyNodes()
if len(nodes) == 0 {
return nil, fmt.Errorf("no ready nodes available for scaling")
}
selected := nodes[0]
for _, node := range nodes[1:] {
if len(node.Containers) < len(selected.Containers) {
selected = node
}
}
return selected, nil
}
func (as *AutoScaler) nextReplicaInstanceID(serviceID string) string {
as.mu.Lock()
defer as.mu.Unlock()
as.replicaSequence[serviceID]++
return fmt.Sprintf("%s-replica-%d", serviceID, as.replicaSequence[serviceID])
}
func (as *AutoScaler) addReplicaToNode(node *deployment.Node, instanceID string) error {
if node == nil {
return fmt.Errorf("node is nil")
}
updated := *node
updated.Containers = append(append([]string{}, node.Containers...), instanceID)
updated.Usage.CPU = math.Min(100, updated.Usage.CPU+2.0)
updated.Usage.Memory += 128 * 1024 * 1024
return as.scheduler.UpdateNode(&updated)
}
func (as *AutoScaler) removeReplicaFromNode(nodeID, instanceID string) error {
nodes := as.scheduler.GetNodes()
for _, node := range nodes {
if node.ID != nodeID {
continue
}
updated := *node
updated.Containers = removeString(node.Containers, instanceID)
if updated.Usage.CPU >= 2.0 {
updated.Usage.CPU -= 2.0
} else {
updated.Usage.CPU = 0
}
if updated.Usage.Memory >= 128*1024*1024 {
updated.Usage.Memory -= 128 * 1024 * 1024
} else {
updated.Usage.Memory = 0
}
return as.scheduler.UpdateNode(&updated)
}
return fmt.Errorf("node %s not found while removing instance %s", nodeID, instanceID)
}
func removeString(items []string, target string) []string {
if len(items) == 0 {
return items
}
result := make([]string, 0, len(items))
for _, item := range items {
if item == target {
continue
}
result = append(result, item)
}
return result
}
// SetScalingPolicy sets or updates a scaling policy for a service
func (as *AutoScaler) SetScalingPolicy(policy *ScalingPolicy) error {
as.mu.Lock()
defer as.mu.Unlock()
// Set default values if not specified
if policy.ScaleUpCooldown == 0 {
policy.ScaleUpCooldown = 3 * time.Minute
}
if policy.ScaleDownCooldown == 0 {
policy.ScaleDownCooldown = 5 * time.Minute
}
if policy.ScaleUpStep == 0 {
policy.ScaleUpStep = 1
}
if policy.ScaleDownStep == 0 {
policy.ScaleDownStep = 1
}
if policy.MinReplicas == 0 {
policy.MinReplicas = 1
}
if policy.MaxReplicas == 0 {
policy.MaxReplicas = 10
}
as.policies[policy.ServiceID] = policy
// Initialize service state if not exists
if _, exists := as.services[policy.ServiceID]; !exists {
as.services[policy.ServiceID] = &ServiceScalingState{
ServiceID: policy.ServiceID,
CurrentReplicas: policy.MinReplicas,
DesiredReplicas: policy.MinReplicas,
Policy: policy,
MetricsHistory: make([]MetricsSnapshot, 0),
}
} else {
as.services[policy.ServiceID].Policy = policy
}
return nil
}
// GetScalingPolicy returns the scaling policy for a service
func (as *AutoScaler) GetScalingPolicy(serviceID string) (*ScalingPolicy, error) {
as.mu.RLock()
defer as.mu.RUnlock()
policy, exists := as.policies[serviceID]
if !exists {
return nil, fmt.Errorf("no scaling policy found for service: %s", serviceID)
}
return policy, nil
}
// GetServiceState returns the current scaling state of a service
func (as *AutoScaler) GetServiceState(serviceID string) (*ServiceScalingState, error) {
as.mu.RLock()
defer as.mu.RUnlock()
state, exists := as.services[serviceID]
if !exists {
return nil, fmt.Errorf("no scaling state found for service: %s", serviceID)
}
return state, nil
}
// GetAllServiceStates returns all service scaling states
func (as *AutoScaler) GetAllServiceStates() map[string]*ServiceScalingState {
as.mu.RLock()
defer as.mu.RUnlock()
result := make(map[string]*ServiceScalingState)
for id, state := range as.services {
result[id] = state
}
return result
}
// Enable enables the auto-scaler
func (as *AutoScaler) Enable() {
as.mu.Lock()
defer as.mu.Unlock()
as.enabled = true
}
// Disable disables the auto-scaler
func (as *AutoScaler) Disable() {
as.mu.Lock()
defer as.mu.Unlock()
as.enabled = false
}
// IsEnabled returns whether the auto-scaler is enabled
func (as *AutoScaler) IsEnabled() bool {
as.mu.RLock()
defer as.mu.RUnlock()
return as.enabled
}
// GetScalingSummary returns a summary of scaling activities
func (as *AutoScaler) GetScalingSummary() map[string]interface{} {
as.mu.RLock()
defer as.mu.RUnlock()
totalServices := len(as.services)
enabledServices := 0
totalReplicas := 0
scalingUp := 0
scalingDown := 0
for _, state := range as.services {
if state.Policy != nil && state.Policy.Enabled {
enabledServices++
}
totalReplicas += state.CurrentReplicas
if state.LastScaleDirection == "scale_up" && time.Since(state.LastScaleAction) < time.Hour {
scalingUp++
} else if state.LastScaleDirection == "scale_down" && time.Since(state.LastScaleAction) < time.Hour {
scalingDown++
}
}
return map[string]interface{}{
"total_services": totalServices,
"enabled_services": enabledServices,
"total_replicas": totalReplicas,
"scaling_up": scalingUp,
"scaling_down": scalingDown,
"total_events": len(as.events),
"enabled": as.enabled,
"check_interval": as.checkInterval.String(),
}
}