mirror of
https://github.com/Dvorinka/Containr.git
synced 2026-06-03 20:12:58 +00:00
overhaul
This commit is contained in:
@@ -0,0 +1,490 @@
|
||||
package deployment
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log"
|
||||
"time"
|
||||
|
||||
"containr/internal/build"
|
||||
"containr/internal/docker"
|
||||
"containr/internal/types"
|
||||
|
||||
"github.com/docker/docker/api/types/mount"
|
||||
"github.com/docker/docker/api/types/network"
|
||||
"github.com/docker/go-connections/nat"
|
||||
)
|
||||
|
||||
type DeploymentEngine struct {
|
||||
buildManager *build.BuildManager
|
||||
dockerClient *docker.Client
|
||||
scheduler *Scheduler
|
||||
deployments map[string]*Deployment
|
||||
deploymentLog chan *DeploymentEvent
|
||||
}
|
||||
|
||||
type Deployment struct {
|
||||
ID string `json:"id"`
|
||||
ProjectID string `json:"project_id"`
|
||||
ServiceID string `json:"service_id"`
|
||||
Status string `json:"status"`
|
||||
ImageName string `json:"image_name"`
|
||||
ImageTag string `json:"image_tag"`
|
||||
Environment string `json:"environment"`
|
||||
Replicas int `json:"replicas"`
|
||||
Config ServiceConfig `json:"config"`
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
StartedAt *time.Time `json:"started_at,omitempty"`
|
||||
CompletedAt *time.Time `json:"completed_at,omitempty"`
|
||||
Containers []ContainerInfo `json:"containers"`
|
||||
BuildLog string `json:"build_log"`
|
||||
DeployLog string `json:"deploy_log"`
|
||||
Error string `json:"error,omitempty"`
|
||||
Metadata map[string]string `json:"metadata"`
|
||||
}
|
||||
|
||||
type ServiceConfig struct {
|
||||
Name string `json:"name"`
|
||||
Image string `json:"image"`
|
||||
Command []string `json:"command,omitempty"`
|
||||
Environment map[string]string `json:"environment,omitempty"`
|
||||
Labels map[string]string `json:"labels,omitempty"`
|
||||
RestartPolicy string `json:"restart_policy"`
|
||||
PortMappings []PortMapping `json:"port_mappings,omitempty"`
|
||||
VolumeMounts []VolumeMount `json:"volume_mounts,omitempty"`
|
||||
Networks []string `json:"networks,omitempty"`
|
||||
Resources ResourceLimits `json:"resources,omitempty"`
|
||||
HealthCheck *HealthCheck `json:"health_check,omitempty"`
|
||||
Replicas int `json:"replicas"`
|
||||
}
|
||||
|
||||
type PortMapping struct {
|
||||
ContainerPort int32 `json:"container_port"`
|
||||
HostPort int32 `json:"host_port,omitempty"`
|
||||
Protocol string `json:"protocol"`
|
||||
HostIP string `json:"host_ip,omitempty"`
|
||||
}
|
||||
|
||||
type VolumeMount struct {
|
||||
Type string `json:"type"`
|
||||
Source string `json:"source"`
|
||||
Destination string `json:"destination"`
|
||||
ReadOnly bool `json:"read_only,omitempty"`
|
||||
}
|
||||
|
||||
type ResourceLimits struct {
|
||||
MemoryBytes int64 `json:"memory_bytes,omitempty"`
|
||||
CPUQuota int64 `json:"cpu_quota,omitempty"`
|
||||
CPUPeriod int64 `json:"cpu_period,omitempty"`
|
||||
CPUShares int64 `json:"cpu_shares,omitempty"`
|
||||
}
|
||||
|
||||
type HealthCheck struct {
|
||||
Test []string `json:"test"`
|
||||
Interval time.Duration `json:"interval"`
|
||||
Timeout time.Duration `json:"timeout"`
|
||||
Retries int `json:"retries"`
|
||||
StartPeriod time.Duration `json:"start_period"`
|
||||
}
|
||||
|
||||
type ContainerInfo struct {
|
||||
ID string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Status string `json:"status"`
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
StartedAt time.Time `json:"started_at"`
|
||||
Ports []PortInfo `json:"ports,omitempty"`
|
||||
Resources ResourceUsage `json:"resources"`
|
||||
Health *HealthStatus `json:"health,omitempty"`
|
||||
}
|
||||
|
||||
type PortInfo struct {
|
||||
ContainerPort int32 `json:"container_port"`
|
||||
HostPort int32 `json:"host_port,omitempty"`
|
||||
HostIP string `json:"host_ip"`
|
||||
Protocol string `json:"protocol"`
|
||||
}
|
||||
|
||||
type ResourceUsage struct {
|
||||
CPUPercent float64 `json:"cpu_percent"`
|
||||
MemoryUsage int64 `json:"memory_usage"`
|
||||
MemoryLimit int64 `json:"memory_limit"`
|
||||
NetworkRx int64 `json:"network_rx"`
|
||||
NetworkTx int64 `json:"network_tx"`
|
||||
}
|
||||
|
||||
type HealthStatus struct {
|
||||
Status string `json:"status"`
|
||||
FailingStreak int `json:"failing_streak"`
|
||||
LastCheck time.Time `json:"last_check"`
|
||||
}
|
||||
|
||||
type DeploymentEvent struct {
|
||||
Type string `json:"type"`
|
||||
Deployment *Deployment `json:"deployment"`
|
||||
Timestamp time.Time `json:"timestamp"`
|
||||
Message string `json:"message"`
|
||||
}
|
||||
|
||||
type DeploymentRequest struct {
|
||||
ProjectID string `json:"project_id"`
|
||||
ServiceID string `json:"service_id"`
|
||||
Environment string `json:"environment"`
|
||||
Config ServiceConfig `json:"config"`
|
||||
BuildConfig *BuildConfig `json:"build_config,omitempty"`
|
||||
Trigger TriggerConfig `json:"trigger"`
|
||||
}
|
||||
|
||||
type BuildConfig struct {
|
||||
BuildType string `json:"build_type"`
|
||||
SourcePath string `json:"source_path"`
|
||||
PrebuiltImage string `json:"prebuilt_image"`
|
||||
BuildCommand string `json:"build_command"`
|
||||
StartCommand string `json:"start_command"`
|
||||
Environment map[string]string `json:"environment"`
|
||||
Branch string `json:"branch"`
|
||||
Commit string `json:"commit"`
|
||||
}
|
||||
|
||||
type TriggerConfig struct {
|
||||
Type string `json:"type"` // webhook, manual, api, scheduled
|
||||
Source string `json:"source"` // Source of trigger
|
||||
User string `json:"user"` // User who triggered
|
||||
Data map[string]string `json:"data"` // Trigger-specific data
|
||||
Timestamp time.Time `json:"timestamp"` // When trigger occurred
|
||||
}
|
||||
|
||||
func NewDeploymentEngine(buildManager *build.BuildManager, dockerClient *docker.Client) *DeploymentEngine {
|
||||
return &DeploymentEngine{
|
||||
buildManager: buildManager,
|
||||
dockerClient: dockerClient,
|
||||
scheduler: NewScheduler(),
|
||||
deployments: make(map[string]*Deployment),
|
||||
deploymentLog: make(chan *DeploymentEvent, 1000),
|
||||
}
|
||||
}
|
||||
|
||||
// Deploy starts a new deployment
|
||||
func (de *DeploymentEngine) Deploy(ctx context.Context, req *DeploymentRequest) (*Deployment, error) {
|
||||
deployment := &Deployment{
|
||||
ID: generateDeploymentID(),
|
||||
ProjectID: req.ProjectID,
|
||||
ServiceID: req.ServiceID,
|
||||
Status: "pending",
|
||||
Environment: req.Environment,
|
||||
Config: req.Config,
|
||||
CreatedAt: time.Now(),
|
||||
Metadata: map[string]string{
|
||||
"trigger_type": req.Trigger.Type,
|
||||
"trigger_source": req.Trigger.Source,
|
||||
"branch": req.BuildConfig.Branch,
|
||||
"commit": req.BuildConfig.Commit,
|
||||
},
|
||||
}
|
||||
|
||||
// Store deployment
|
||||
de.deployments[deployment.ID] = deployment
|
||||
|
||||
// Log deployment start
|
||||
de.logEvent(&DeploymentEvent{
|
||||
Type: "deployment_started",
|
||||
Deployment: deployment,
|
||||
Timestamp: time.Now(),
|
||||
Message: fmt.Sprintf("Deployment started for service %s", req.ServiceID),
|
||||
})
|
||||
|
||||
// Start deployment in background
|
||||
go de.executeDeployment(ctx, deployment, req)
|
||||
|
||||
return deployment, nil
|
||||
}
|
||||
|
||||
// executeDeployment executes the deployment process
|
||||
func (de *DeploymentEngine) executeDeployment(ctx context.Context, deployment *Deployment, req *DeploymentRequest) {
|
||||
deployment.Status = "building"
|
||||
deployment.StartedAt = &[]time.Time{time.Now()}[0]
|
||||
|
||||
de.logEvent(&DeploymentEvent{
|
||||
Type: "build_started",
|
||||
Deployment: deployment,
|
||||
Timestamp: time.Now(),
|
||||
Message: "Build process started",
|
||||
})
|
||||
|
||||
// Step 1: Build the image
|
||||
imageName, err := de.buildImage(ctx, deployment, req.BuildConfig)
|
||||
if err != nil {
|
||||
deployment.Status = "failed"
|
||||
deployment.Error = fmt.Sprintf("Build failed: %v", err)
|
||||
deployment.CompletedAt = &[]time.Time{time.Now()}[0]
|
||||
|
||||
de.logEvent(&DeploymentEvent{
|
||||
Type: "build_failed",
|
||||
Deployment: deployment,
|
||||
Timestamp: time.Now(),
|
||||
Message: deployment.Error,
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
deployment.ImageName = imageName
|
||||
deployment.Status = "deploying"
|
||||
|
||||
de.logEvent(&DeploymentEvent{
|
||||
Type: "build_completed",
|
||||
Deployment: deployment,
|
||||
Timestamp: time.Now(),
|
||||
Message: fmt.Sprintf("Build completed successfully: %s", imageName),
|
||||
})
|
||||
|
||||
// Step 2: Deploy the service
|
||||
err = de.deployService(ctx, deployment)
|
||||
if err != nil {
|
||||
deployment.Status = "failed"
|
||||
deployment.Error = fmt.Sprintf("Deployment failed: %v", err)
|
||||
deployment.CompletedAt = &[]time.Time{time.Now()}[0]
|
||||
|
||||
de.logEvent(&DeploymentEvent{
|
||||
Type: "deployment_failed",
|
||||
Deployment: deployment,
|
||||
Timestamp: time.Now(),
|
||||
Message: deployment.Error,
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
deployment.Status = "running"
|
||||
deployment.CompletedAt = &[]time.Time{time.Now()}[0]
|
||||
|
||||
de.logEvent(&DeploymentEvent{
|
||||
Type: "deployment_completed",
|
||||
Deployment: deployment,
|
||||
Timestamp: time.Now(),
|
||||
Message: "Deployment completed successfully",
|
||||
})
|
||||
}
|
||||
|
||||
// buildImage builds the container image
|
||||
func (de *DeploymentEngine) buildImage(ctx context.Context, deployment *Deployment, buildConfig *BuildConfig) (string, error) {
|
||||
if buildConfig == nil {
|
||||
return "", fmt.Errorf("build config is required")
|
||||
}
|
||||
|
||||
buildReq := &types.BuildRequest{
|
||||
BuildType: buildConfig.BuildType,
|
||||
SourcePath: buildConfig.SourcePath,
|
||||
PrebuiltImage: buildConfig.PrebuiltImage,
|
||||
ImageName: fmt.Sprintf("containr-%s-%s", deployment.ServiceID, deployment.Environment),
|
||||
ImageTag: deployment.ID,
|
||||
BuildCommand: buildConfig.BuildCommand,
|
||||
StartCommand: buildConfig.StartCommand,
|
||||
Environment: buildConfig.Environment,
|
||||
ProjectID: deployment.ProjectID,
|
||||
ServiceID: deployment.ServiceID,
|
||||
DeploymentID: deployment.ID,
|
||||
TriggeredBy: "deployment_engine",
|
||||
Branch: buildConfig.Branch,
|
||||
Commit: buildConfig.Commit,
|
||||
}
|
||||
|
||||
response, err := de.buildManager.Build(ctx, buildReq)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
deployment.BuildLog = response.BuildLog
|
||||
|
||||
return response.ImageName, nil
|
||||
}
|
||||
|
||||
// deployService deploys the service using the built image
|
||||
func (de *DeploymentEngine) deployService(ctx context.Context, deployment *Deployment) error {
|
||||
// Convert service config to Docker container config
|
||||
containerConfig := &docker.ContainerConfig{
|
||||
Name: fmt.Sprintf("containr-%s-%s", deployment.ServiceID, deployment.ID),
|
||||
Image: deployment.ImageName,
|
||||
Cmd: deployment.Config.Command,
|
||||
Labels: deployment.Config.Labels,
|
||||
Networks: make(map[string]*network.EndpointSettings),
|
||||
}
|
||||
|
||||
// Set environment variables
|
||||
for k, v := range deployment.Config.Environment {
|
||||
containerConfig.Env = append(containerConfig.Env, fmt.Sprintf("%s=%s", k, v))
|
||||
}
|
||||
|
||||
// Set restart policy
|
||||
containerConfig.RestartPolicy = deployment.Config.RestartPolicy
|
||||
|
||||
// Configure port mappings
|
||||
portBindings := make(nat.PortMap)
|
||||
for _, pm := range deployment.Config.PortMappings {
|
||||
port := nat.Port(fmt.Sprintf("%d/%s", pm.ContainerPort, pm.Protocol))
|
||||
if pm.HostPort > 0 {
|
||||
portBindings[port] = []nat.PortBinding{
|
||||
{
|
||||
HostIP: pm.HostIP,
|
||||
HostPort: fmt.Sprintf("%d", pm.HostPort),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
containerConfig.PortBindings = portBindings
|
||||
|
||||
// Configure resource limits
|
||||
if deployment.Config.Resources.MemoryBytes > 0 {
|
||||
containerConfig.Memory = deployment.Config.Resources.MemoryBytes
|
||||
}
|
||||
if deployment.Config.Resources.CPUQuota > 0 {
|
||||
containerConfig.NanoCPUs = deployment.Config.Resources.CPUQuota
|
||||
}
|
||||
|
||||
// Configure volume mounts
|
||||
for _, vm := range deployment.Config.VolumeMounts {
|
||||
mount := mount.Mount{
|
||||
Type: mount.Type(vm.Type),
|
||||
Source: vm.Source,
|
||||
Target: vm.Destination,
|
||||
ReadOnly: vm.ReadOnly,
|
||||
}
|
||||
containerConfig.Mounts = append(containerConfig.Mounts, mount)
|
||||
}
|
||||
|
||||
// Create containers based on replica count
|
||||
deployment.Containers = make([]ContainerInfo, deployment.Config.Replicas)
|
||||
for i := 0; i < deployment.Config.Replicas; i++ {
|
||||
containerName := fmt.Sprintf("%s-%d", containerConfig.Name, i)
|
||||
|
||||
// Create container
|
||||
containerID, err := de.dockerClient.CreateContainer(ctx, *containerConfig)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create container %d: %w", i, err)
|
||||
}
|
||||
|
||||
// Start container
|
||||
err = de.dockerClient.StartContainer(ctx, containerID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to start container %d: %w", i, err)
|
||||
}
|
||||
|
||||
// Get container info
|
||||
_, err = de.dockerClient.GetContainer(ctx, containerID)
|
||||
if err != nil {
|
||||
log.Printf("Failed to get container info for %s: %v", containerID, err)
|
||||
}
|
||||
|
||||
deployment.Containers[i] = ContainerInfo{
|
||||
ID: containerID,
|
||||
Name: containerName,
|
||||
Status: "running",
|
||||
CreatedAt: time.Now(),
|
||||
StartedAt: time.Now(),
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetDeployment gets a deployment by ID
|
||||
func (de *DeploymentEngine) GetDeployment(id string) (*Deployment, error) {
|
||||
deployment, exists := de.deployments[id]
|
||||
if !exists {
|
||||
return nil, fmt.Errorf("deployment not found: %s", id)
|
||||
}
|
||||
return deployment, nil
|
||||
}
|
||||
|
||||
// ListDeployments lists all deployments
|
||||
func (de *DeploymentEngine) ListDeployments(projectID, serviceID string) ([]*Deployment, error) {
|
||||
var deployments []*Deployment
|
||||
|
||||
for _, deployment := range de.deployments {
|
||||
if projectID != "" && deployment.ProjectID != projectID {
|
||||
continue
|
||||
}
|
||||
if serviceID != "" && deployment.ServiceID != serviceID {
|
||||
continue
|
||||
}
|
||||
deployments = append(deployments, deployment)
|
||||
}
|
||||
|
||||
return deployments, nil
|
||||
}
|
||||
|
||||
// CancelDeployment cancels a running deployment
|
||||
func (de *DeploymentEngine) CancelDeployment(ctx context.Context, id string) error {
|
||||
deployment, exists := de.deployments[id]
|
||||
if !exists {
|
||||
return fmt.Errorf("deployment not found: %s", id)
|
||||
}
|
||||
|
||||
if deployment.Status == "completed" || deployment.Status == "failed" {
|
||||
return fmt.Errorf("cannot cancel completed deployment: %s", id)
|
||||
}
|
||||
|
||||
// Stop all containers
|
||||
for _, container := range deployment.Containers {
|
||||
err := de.dockerClient.StopContainer(ctx, container.ID, nil)
|
||||
if err != nil {
|
||||
log.Printf("Failed to stop container %s: %v", container.ID, err)
|
||||
}
|
||||
}
|
||||
|
||||
deployment.Status = "cancelled"
|
||||
deployment.CompletedAt = &[]time.Time{time.Now()}[0]
|
||||
|
||||
de.logEvent(&DeploymentEvent{
|
||||
Type: "deployment_cancelled",
|
||||
Deployment: deployment,
|
||||
Timestamp: time.Now(),
|
||||
Message: "Deployment was cancelled",
|
||||
})
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetDeploymentLogs gets the logs for a deployment
|
||||
func (de *DeploymentEngine) GetDeploymentLogs(ctx context.Context, id string) (string, error) {
|
||||
deployment, exists := de.deployments[id]
|
||||
if !exists {
|
||||
return "", fmt.Errorf("deployment not found: %s", id)
|
||||
}
|
||||
|
||||
logs := deployment.BuildLog
|
||||
logs += "\n" + deployment.DeployLog
|
||||
|
||||
// Add container logs
|
||||
for _, container := range deployment.Containers {
|
||||
containerLogs, err := de.dockerClient.GetContainerLogs(ctx, container.ID, docker.LogOptions{
|
||||
Stdout: true,
|
||||
Stderr: true,
|
||||
})
|
||||
if err != nil {
|
||||
log.Printf("Failed to get logs for container %s: %v", container.ID, err)
|
||||
continue
|
||||
}
|
||||
logs += fmt.Sprintf("\n=== Container %s Logs ===\n%s", container.Name, containerLogs)
|
||||
}
|
||||
|
||||
return logs, nil
|
||||
}
|
||||
|
||||
// WatchDeploymentEvents returns a channel of deployment events
|
||||
func (de *DeploymentEngine) WatchDeploymentEvents() <-chan *DeploymentEvent {
|
||||
return de.deploymentLog
|
||||
}
|
||||
|
||||
// logEvent logs a deployment event
|
||||
func (de *DeploymentEngine) logEvent(event *DeploymentEvent) {
|
||||
select {
|
||||
case de.deploymentLog <- event:
|
||||
default:
|
||||
// Channel is full, drop the event
|
||||
log.Printf("Deployment event channel is full, dropping event: %s", event.Type)
|
||||
}
|
||||
}
|
||||
|
||||
// generateDeploymentID generates a unique deployment ID
|
||||
func generateDeploymentID() string {
|
||||
return fmt.Sprintf("deploy-%d", time.Now().UnixNano())
|
||||
}
|
||||
@@ -0,0 +1,718 @@
|
||||
package deployment
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
type HistoryManager struct {
|
||||
storagePath string
|
||||
mu sync.RWMutex
|
||||
deployments map[string]*DeploymentRecord
|
||||
}
|
||||
|
||||
type DeploymentRecord struct {
|
||||
ID string `json:"id"`
|
||||
ProjectID string `json:"project_id"`
|
||||
ServiceID string `json:"service_id"`
|
||||
Environment string `json:"environment"`
|
||||
Status string `json:"status"`
|
||||
ImageName string `json:"image_name"`
|
||||
ImageTag string `json:"image_tag"`
|
||||
Config ServiceConfig `json:"config"`
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
StartedAt *time.Time `json:"started_at,omitempty"`
|
||||
CompletedAt *time.Time `json:"completed_at,omitempty"`
|
||||
Duration time.Duration `json:"duration"`
|
||||
Containers []ContainerRecord `json:"containers"`
|
||||
BuildLog string `json:"build_log"`
|
||||
DeployLog string `json:"deploy_log"`
|
||||
Error string `json:"error,omitempty"`
|
||||
Metadata map[string]string `json:"metadata"`
|
||||
Trigger TriggerRecord `json:"trigger"`
|
||||
RollbackFrom *string `json:"rollback_from,omitempty"`
|
||||
Rollbacks []string `json:"rollbacks"`
|
||||
Tags []string `json:"tags"`
|
||||
Annotations map[string]interface{} `json:"annotations"`
|
||||
}
|
||||
|
||||
type ContainerRecord struct {
|
||||
ID string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Status string `json:"status"`
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
StartedAt time.Time `json:"started_at"`
|
||||
StoppedAt *time.Time `json:"stopped_at,omitempty"`
|
||||
Ports []PortRecord `json:"ports,omitempty"`
|
||||
Resources ResourceRecord `json:"resources"`
|
||||
Health *HealthRecord `json:"health,omitempty"`
|
||||
ExitCode *int `json:"exit_code,omitempty"`
|
||||
ErrorMessage string `json:"error_message,omitempty"`
|
||||
}
|
||||
|
||||
type PortRecord struct {
|
||||
ContainerPort int32 `json:"container_port"`
|
||||
HostPort int32 `json:"host_port,omitempty"`
|
||||
HostIP string `json:"host_ip"`
|
||||
Protocol string `json:"protocol"`
|
||||
}
|
||||
|
||||
type ResourceRecord struct {
|
||||
CPUPercent float64 `json:"cpu_percent"`
|
||||
MemoryUsage int64 `json:"memory_usage"`
|
||||
MemoryLimit int64 `json:"memory_limit"`
|
||||
NetworkRx int64 `json:"network_rx"`
|
||||
NetworkTx int64 `json:"network_tx"`
|
||||
PidsCurrent uint64 `json:"pids_current"`
|
||||
PidsLimit uint64 `json:"pids_limit"`
|
||||
}
|
||||
|
||||
type HealthRecord struct {
|
||||
Status string `json:"status"`
|
||||
FailingStreak int `json:"failing_streak"`
|
||||
LastCheck time.Time `json:"last_check"`
|
||||
Output string `json:"output,omitempty"`
|
||||
}
|
||||
|
||||
type TriggerRecord struct {
|
||||
Type string `json:"type"` // webhook, manual, api, scheduled
|
||||
Source string `json:"source"` // Source of trigger
|
||||
User string `json:"user"` // User who triggered
|
||||
Data map[string]string `json:"data"` // Trigger-specific data
|
||||
Timestamp time.Time `json:"timestamp"` // When trigger occurred
|
||||
}
|
||||
|
||||
type DeploymentFilter struct {
|
||||
ProjectID string `json:"project_id,omitempty"`
|
||||
ServiceID string `json:"service_id,omitempty"`
|
||||
Environment string `json:"environment,omitempty"`
|
||||
Status string `json:"status,omitempty"`
|
||||
TriggerType string `json:"trigger_type,omitempty"`
|
||||
User string `json:"user,omitempty"`
|
||||
From time.Time `json:"from,omitempty"`
|
||||
To time.Time `json:"to,omitempty"`
|
||||
Tags []string `json:"tags,omitempty"`
|
||||
Limit int `json:"limit,omitempty"`
|
||||
Offset int `json:"offset,omitempty"`
|
||||
SortBy string `json:"sort_by,omitempty"` // created_at, started_at, completed_at, duration
|
||||
SortOrder string `json:"sort_order,omitempty"` // asc, desc
|
||||
}
|
||||
|
||||
type DeploymentStats struct {
|
||||
TotalDeployments int `json:"total_deployments"`
|
||||
SuccessfulDeployments int `json:"successful_deployments"`
|
||||
FailedDeployments int `json:"failed_deployments"`
|
||||
AverageDuration time.Duration `json:"average_duration"`
|
||||
DeploymentsByStatus map[string]int `json:"deployments_by_status"`
|
||||
DeploymentsByEnv map[string]int `json:"deployments_by_env"`
|
||||
DeploymentsByDay map[string]int `json:"deployments_by_day"`
|
||||
RecentActivity []DeploymentRecord `json:"recent_activity"`
|
||||
TopServices []ServiceDeploymentStats `json:"top_services"`
|
||||
TopUsers []UserDeploymentStats `json:"top_users"`
|
||||
}
|
||||
|
||||
type ServiceDeploymentStats struct {
|
||||
ServiceID string `json:"service_id"`
|
||||
ServiceName string `json:"service_name"`
|
||||
DeploymentCount int `json:"deployment_count"`
|
||||
SuccessCount int `json:"success_count"`
|
||||
FailureCount int `json:"failure_count"`
|
||||
SuccessRate float64 `json:"success_rate"`
|
||||
AverageDuration time.Duration `json:"average_duration"`
|
||||
LastDeployment time.Time `json:"last_deployment"`
|
||||
}
|
||||
|
||||
type UserDeploymentStats struct {
|
||||
User string `json:"user"`
|
||||
DeploymentCount int `json:"deployment_count"`
|
||||
SuccessCount int `json:"success_count"`
|
||||
FailureCount int `json:"failure_count"`
|
||||
SuccessRate float64 `json:"success_rate"`
|
||||
AverageDuration time.Duration `json:"average_duration"`
|
||||
LastDeployment time.Time `json:"last_deployment"`
|
||||
}
|
||||
|
||||
func NewHistoryManager(storagePath string) *HistoryManager {
|
||||
return &HistoryManager{
|
||||
storagePath: storagePath,
|
||||
deployments: make(map[string]*DeploymentRecord),
|
||||
}
|
||||
}
|
||||
|
||||
// RecordDeployment records a deployment in history
|
||||
func (hm *HistoryManager) RecordDeployment(deployment *Deployment) error {
|
||||
hm.mu.Lock()
|
||||
defer hm.mu.Unlock()
|
||||
|
||||
record := hm.convertToRecord(deployment)
|
||||
hm.deployments[record.ID] = record
|
||||
|
||||
// Save to storage
|
||||
return hm.saveDeployment(record)
|
||||
}
|
||||
|
||||
// GetDeployment gets a deployment record by ID
|
||||
func (hm *HistoryManager) GetDeployment(id string) (*DeploymentRecord, error) {
|
||||
hm.mu.RLock()
|
||||
defer hm.mu.RUnlock()
|
||||
|
||||
record, exists := hm.deployments[id]
|
||||
if !exists {
|
||||
return nil, fmt.Errorf("deployment not found: %s", id)
|
||||
}
|
||||
|
||||
return record, nil
|
||||
}
|
||||
|
||||
// ListDeployments lists deployments with filtering
|
||||
func (hm *HistoryManager) ListDeployments(filter DeploymentFilter) ([]*DeploymentRecord, error) {
|
||||
hm.mu.RLock()
|
||||
defer hm.mu.RUnlock()
|
||||
|
||||
var deployments []*DeploymentRecord
|
||||
|
||||
for _, record := range hm.deployments {
|
||||
if hm.matchesFilter(record, filter) {
|
||||
deployments = append(deployments, record)
|
||||
}
|
||||
}
|
||||
|
||||
// Sort deployments
|
||||
hm.sortDeployments(deployments, filter.SortBy, filter.SortOrder)
|
||||
|
||||
// Apply pagination
|
||||
if filter.Limit > 0 {
|
||||
start := filter.Offset
|
||||
if start >= len(deployments) {
|
||||
return []*DeploymentRecord{}, nil
|
||||
}
|
||||
end := start + filter.Limit
|
||||
if end > len(deployments) {
|
||||
end = len(deployments)
|
||||
}
|
||||
deployments = deployments[start:end]
|
||||
}
|
||||
|
||||
return deployments, nil
|
||||
}
|
||||
|
||||
// RollbackDeployment creates a rollback deployment
|
||||
func (hm *HistoryManager) RollbackDeployment(ctx context.Context, deploymentID, reason string, userID string) (*DeploymentRecord, error) {
|
||||
hm.mu.RLock()
|
||||
originalDeployment, exists := hm.deployments[deploymentID]
|
||||
hm.mu.RUnlock()
|
||||
|
||||
if !exists {
|
||||
return nil, fmt.Errorf("deployment not found: %s", deploymentID)
|
||||
}
|
||||
|
||||
// Create rollback deployment record
|
||||
rollbackRecord := &DeploymentRecord{
|
||||
ID: generateDeploymentID(),
|
||||
ProjectID: originalDeployment.ProjectID,
|
||||
ServiceID: originalDeployment.ServiceID,
|
||||
Environment: originalDeployment.Environment,
|
||||
Status: "pending",
|
||||
ImageName: originalDeployment.ImageName,
|
||||
ImageTag: originalDeployment.ImageTag,
|
||||
Config: originalDeployment.Config,
|
||||
CreatedAt: time.Now(),
|
||||
Metadata: map[string]string{
|
||||
"rollback_from": deploymentID,
|
||||
"rollback_reason": reason,
|
||||
},
|
||||
Trigger: TriggerRecord{
|
||||
Type: "rollback",
|
||||
Source: "deployment_history",
|
||||
User: userID,
|
||||
Data: map[string]string{
|
||||
"original_deployment": deploymentID,
|
||||
"reason": reason,
|
||||
},
|
||||
Timestamp: time.Now(),
|
||||
},
|
||||
RollbackFrom: &deploymentID,
|
||||
Tags: append(originalDeployment.Tags, "rollback"),
|
||||
}
|
||||
|
||||
// Record the rollback
|
||||
err := hm.RecordDeployment(&Deployment{
|
||||
ID: rollbackRecord.ID,
|
||||
ProjectID: rollbackRecord.ProjectID,
|
||||
ServiceID: rollbackRecord.ServiceID,
|
||||
Environment: rollbackRecord.Environment,
|
||||
Status: rollbackRecord.Status,
|
||||
ImageName: rollbackRecord.ImageName,
|
||||
ImageTag: rollbackRecord.ImageTag,
|
||||
Config: rollbackRecord.Config,
|
||||
CreatedAt: rollbackRecord.CreatedAt,
|
||||
Metadata: rollbackRecord.Metadata,
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to record rollback: %w", err)
|
||||
}
|
||||
|
||||
// Update original deployment to track rollbacks
|
||||
hm.mu.Lock()
|
||||
if original, exists := hm.deployments[deploymentID]; exists {
|
||||
original.Rollbacks = append(original.Rollbacks, rollbackRecord.ID)
|
||||
hm.saveDeployment(original)
|
||||
}
|
||||
hm.mu.Unlock()
|
||||
|
||||
return rollbackRecord, nil
|
||||
}
|
||||
|
||||
// GetDeploymentHistory gets the deployment history for a service
|
||||
func (hm *HistoryManager) GetDeploymentHistory(serviceID, environment string, limit int) ([]*DeploymentRecord, error) {
|
||||
filter := DeploymentFilter{
|
||||
ServiceID: serviceID,
|
||||
Environment: environment,
|
||||
Limit: limit,
|
||||
SortBy: "created_at",
|
||||
SortOrder: "desc",
|
||||
}
|
||||
|
||||
return hm.ListDeployments(filter)
|
||||
}
|
||||
|
||||
// GetDeploymentStats gets deployment statistics
|
||||
func (hm *HistoryManager) GetDeploymentStats(projectID string) (*DeploymentStats, error) {
|
||||
hm.mu.RLock()
|
||||
defer hm.mu.RUnlock()
|
||||
|
||||
stats := &DeploymentStats{
|
||||
DeploymentsByStatus: make(map[string]int),
|
||||
DeploymentsByEnv: make(map[string]int),
|
||||
DeploymentsByDay: make(map[string]int),
|
||||
}
|
||||
|
||||
var totalDuration time.Duration
|
||||
var successfulDeployments int
|
||||
|
||||
for _, record := range hm.deployments {
|
||||
if projectID != "" && record.ProjectID != projectID {
|
||||
continue
|
||||
}
|
||||
|
||||
stats.TotalDeployments++
|
||||
|
||||
// Count by status
|
||||
stats.DeploymentsByStatus[record.Status]++
|
||||
|
||||
// Count by environment
|
||||
stats.DeploymentsByEnv[record.Environment]++
|
||||
|
||||
// Count by day
|
||||
day := record.CreatedAt.Format("2006-01-02")
|
||||
stats.DeploymentsByDay[day]++
|
||||
|
||||
// Calculate success metrics
|
||||
if record.Status == "running" || record.Status == "completed" {
|
||||
successfulDeployments++
|
||||
stats.SuccessfulDeployments++
|
||||
} else if record.Status == "failed" {
|
||||
stats.FailedDeployments++
|
||||
}
|
||||
|
||||
// Calculate duration
|
||||
if record.Duration > 0 {
|
||||
totalDuration += record.Duration
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate average duration
|
||||
if stats.TotalDeployments > 0 {
|
||||
stats.AverageDuration = totalDuration / time.Duration(stats.TotalDeployments)
|
||||
}
|
||||
|
||||
// Get recent activity
|
||||
stats.RecentActivity = hm.getRecentActivity(projectID, 10)
|
||||
|
||||
// Get top services and users
|
||||
stats.TopServices = hm.getTopServices(projectID, 5)
|
||||
stats.TopUsers = hm.getTopUsers(projectID, 5)
|
||||
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
// DeleteDeployment removes a deployment from history
|
||||
func (hm *HistoryManager) DeleteDeployment(id string) error {
|
||||
hm.mu.Lock()
|
||||
defer hm.mu.Unlock()
|
||||
|
||||
if _, exists := hm.deployments[id]; !exists {
|
||||
return fmt.Errorf("deployment not found: %s", id)
|
||||
}
|
||||
|
||||
delete(hm.deployments, id)
|
||||
|
||||
// Remove from storage
|
||||
return hm.deleteDeploymentFile(id)
|
||||
}
|
||||
|
||||
// convertToRecord converts a Deployment to DeploymentRecord
|
||||
func (hm *HistoryManager) convertToRecord(deployment *Deployment) *DeploymentRecord {
|
||||
record := &DeploymentRecord{
|
||||
ID: deployment.ID,
|
||||
ProjectID: deployment.ProjectID,
|
||||
ServiceID: deployment.ServiceID,
|
||||
Environment: deployment.Environment,
|
||||
Status: deployment.Status,
|
||||
ImageName: deployment.ImageName,
|
||||
ImageTag: deployment.ImageTag,
|
||||
Config: deployment.Config,
|
||||
CreatedAt: deployment.CreatedAt,
|
||||
StartedAt: deployment.StartedAt,
|
||||
CompletedAt: deployment.CompletedAt,
|
||||
BuildLog: deployment.BuildLog,
|
||||
DeployLog: deployment.DeployLog,
|
||||
Error: deployment.Error,
|
||||
Metadata: deployment.Metadata,
|
||||
Tags: []string{},
|
||||
Annotations: make(map[string]interface{}),
|
||||
}
|
||||
|
||||
// Calculate duration
|
||||
if deployment.StartedAt != nil && deployment.CompletedAt != nil {
|
||||
record.Duration = deployment.CompletedAt.Sub(*deployment.StartedAt)
|
||||
}
|
||||
|
||||
// Convert containers
|
||||
for _, container := range deployment.Containers {
|
||||
containerRecord := ContainerRecord{
|
||||
ID: container.ID,
|
||||
Name: container.Name,
|
||||
Status: container.Status,
|
||||
CreatedAt: container.CreatedAt,
|
||||
StartedAt: container.StartedAt,
|
||||
Resources: ResourceRecord{
|
||||
CPUPercent: container.Resources.CPUPercent,
|
||||
MemoryUsage: container.Resources.MemoryUsage,
|
||||
MemoryLimit: container.Resources.MemoryLimit,
|
||||
NetworkRx: container.Resources.NetworkRx,
|
||||
NetworkTx: container.Resources.NetworkTx,
|
||||
},
|
||||
}
|
||||
|
||||
if container.Health != nil {
|
||||
containerRecord.Health = &HealthRecord{
|
||||
Status: container.Health.Status,
|
||||
FailingStreak: container.Health.FailingStreak,
|
||||
LastCheck: container.Health.LastCheck,
|
||||
}
|
||||
}
|
||||
|
||||
record.Containers = append(record.Containers, containerRecord)
|
||||
}
|
||||
|
||||
return record
|
||||
}
|
||||
|
||||
// matchesFilter checks if a deployment record matches the filter
|
||||
func (hm *HistoryManager) matchesFilter(record *DeploymentRecord, filter DeploymentFilter) bool {
|
||||
if filter.ProjectID != "" && record.ProjectID != filter.ProjectID {
|
||||
return false
|
||||
}
|
||||
|
||||
if filter.ServiceID != "" && record.ServiceID != filter.ServiceID {
|
||||
return false
|
||||
}
|
||||
|
||||
if filter.Environment != "" && record.Environment != filter.Environment {
|
||||
return false
|
||||
}
|
||||
|
||||
if filter.Status != "" && record.Status != filter.Status {
|
||||
return false
|
||||
}
|
||||
|
||||
if filter.TriggerType != "" && record.Trigger.Type != filter.TriggerType {
|
||||
return false
|
||||
}
|
||||
|
||||
if filter.User != "" && record.Trigger.User != filter.User {
|
||||
return false
|
||||
}
|
||||
|
||||
if !filter.From.IsZero() && record.CreatedAt.Before(filter.From) {
|
||||
return false
|
||||
}
|
||||
|
||||
if !filter.To.IsZero() && record.CreatedAt.After(filter.To) {
|
||||
return false
|
||||
}
|
||||
|
||||
if len(filter.Tags) > 0 {
|
||||
hasTag := false
|
||||
for _, tag := range filter.Tags {
|
||||
for _, recordTag := range record.Tags {
|
||||
if recordTag == tag {
|
||||
hasTag = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if hasTag {
|
||||
break
|
||||
}
|
||||
}
|
||||
if !hasTag {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// sortDeployments sorts deployments based on the specified criteria
|
||||
func (hm *HistoryManager) sortDeployments(deployments []*DeploymentRecord, sortBy, sortOrder string) {
|
||||
if sortBy == "" {
|
||||
sortBy = "created_at"
|
||||
}
|
||||
if sortOrder == "" {
|
||||
sortOrder = "desc"
|
||||
}
|
||||
|
||||
sort.Slice(deployments, func(i, j int) bool {
|
||||
var less bool
|
||||
|
||||
switch sortBy {
|
||||
case "created_at":
|
||||
less = deployments[i].CreatedAt.Before(deployments[j].CreatedAt)
|
||||
case "started_at":
|
||||
if deployments[i].StartedAt == nil {
|
||||
less = true
|
||||
} else if deployments[j].StartedAt == nil {
|
||||
less = false
|
||||
} else {
|
||||
less = deployments[i].StartedAt.Before(*deployments[j].StartedAt)
|
||||
}
|
||||
case "completed_at":
|
||||
if deployments[i].CompletedAt == nil {
|
||||
less = true
|
||||
} else if deployments[j].CompletedAt == nil {
|
||||
less = false
|
||||
} else {
|
||||
less = deployments[i].CompletedAt.Before(*deployments[j].CompletedAt)
|
||||
}
|
||||
case "duration":
|
||||
less = deployments[i].Duration < deployments[j].Duration
|
||||
default:
|
||||
less = deployments[i].ID < deployments[j].ID
|
||||
}
|
||||
|
||||
if sortOrder == "desc" {
|
||||
return !less
|
||||
}
|
||||
return less
|
||||
})
|
||||
}
|
||||
|
||||
// getRecentActivity gets recent deployment activity
|
||||
func (hm *HistoryManager) getRecentActivity(projectID string, limit int) []DeploymentRecord {
|
||||
var deployments []DeploymentRecord
|
||||
|
||||
for _, record := range hm.deployments {
|
||||
if projectID != "" && record.ProjectID != projectID {
|
||||
continue
|
||||
}
|
||||
deployments = append(deployments, *record)
|
||||
}
|
||||
|
||||
// Sort by created_at desc
|
||||
sort.Slice(deployments, func(i, j int) bool {
|
||||
return deployments[i].CreatedAt.After(deployments[j].CreatedAt)
|
||||
})
|
||||
|
||||
if len(deployments) > limit {
|
||||
deployments = deployments[:limit]
|
||||
}
|
||||
|
||||
return deployments
|
||||
}
|
||||
|
||||
// getTopServices gets top services by deployment count
|
||||
func (hm *HistoryManager) getTopServices(projectID string, limit int) []ServiceDeploymentStats {
|
||||
serviceStats := make(map[string]*ServiceDeploymentStats)
|
||||
|
||||
for _, record := range hm.deployments {
|
||||
if projectID != "" && record.ProjectID != projectID {
|
||||
continue
|
||||
}
|
||||
|
||||
stats, exists := serviceStats[record.ServiceID]
|
||||
if !exists {
|
||||
stats = &ServiceDeploymentStats{
|
||||
ServiceID: record.ServiceID,
|
||||
}
|
||||
serviceStats[record.ServiceID] = stats
|
||||
}
|
||||
|
||||
stats.DeploymentCount++
|
||||
stats.LastDeployment = record.CreatedAt
|
||||
|
||||
if record.Status == "running" || record.Status == "completed" {
|
||||
stats.SuccessCount++
|
||||
} else if record.Status == "failed" {
|
||||
stats.FailureCount++
|
||||
}
|
||||
|
||||
if record.Duration > 0 {
|
||||
// Simple moving average for duration
|
||||
if stats.AverageDuration == 0 {
|
||||
stats.AverageDuration = record.Duration
|
||||
} else {
|
||||
stats.AverageDuration = (stats.AverageDuration + record.Duration) / 2
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate success rates
|
||||
for _, stats := range serviceStats {
|
||||
if stats.DeploymentCount > 0 {
|
||||
stats.SuccessRate = float64(stats.SuccessCount) / float64(stats.DeploymentCount) * 100
|
||||
}
|
||||
}
|
||||
|
||||
// Convert to slice and sort
|
||||
var topServices []ServiceDeploymentStats
|
||||
for _, stats := range serviceStats {
|
||||
topServices = append(topServices, *stats)
|
||||
}
|
||||
|
||||
sort.Slice(topServices, func(i, j int) bool {
|
||||
return topServices[i].DeploymentCount > topServices[j].DeploymentCount
|
||||
})
|
||||
|
||||
if len(topServices) > limit {
|
||||
topServices = topServices[:limit]
|
||||
}
|
||||
|
||||
return topServices
|
||||
}
|
||||
|
||||
// getTopUsers gets top users by deployment count
|
||||
func (hm *HistoryManager) getTopUsers(projectID string, limit int) []UserDeploymentStats {
|
||||
userStats := make(map[string]*UserDeploymentStats)
|
||||
|
||||
for _, record := range hm.deployments {
|
||||
if projectID != "" && record.ProjectID != projectID {
|
||||
continue
|
||||
}
|
||||
|
||||
user := record.Trigger.User
|
||||
if user == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
stats, exists := userStats[user]
|
||||
if !exists {
|
||||
stats = &UserDeploymentStats{
|
||||
User: user,
|
||||
}
|
||||
userStats[user] = stats
|
||||
}
|
||||
|
||||
stats.DeploymentCount++
|
||||
stats.LastDeployment = record.CreatedAt
|
||||
|
||||
if record.Status == "running" || record.Status == "completed" {
|
||||
stats.SuccessCount++
|
||||
} else if record.Status == "failed" {
|
||||
stats.FailureCount++
|
||||
}
|
||||
|
||||
if record.Duration > 0 {
|
||||
if stats.AverageDuration == 0 {
|
||||
stats.AverageDuration = record.Duration
|
||||
} else {
|
||||
stats.AverageDuration = (stats.AverageDuration + record.Duration) / 2
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate success rates
|
||||
for _, stats := range userStats {
|
||||
if stats.DeploymentCount > 0 {
|
||||
stats.SuccessRate = float64(stats.SuccessCount) / float64(stats.DeploymentCount) * 100
|
||||
}
|
||||
}
|
||||
|
||||
// Convert to slice and sort
|
||||
var topUsers []UserDeploymentStats
|
||||
for _, stats := range userStats {
|
||||
topUsers = append(topUsers, *stats)
|
||||
}
|
||||
|
||||
sort.Slice(topUsers, func(i, j int) bool {
|
||||
return topUsers[i].DeploymentCount > topUsers[j].DeploymentCount
|
||||
})
|
||||
|
||||
if len(topUsers) > limit {
|
||||
topUsers = topUsers[:limit]
|
||||
}
|
||||
|
||||
return topUsers
|
||||
}
|
||||
|
||||
// saveDeployment saves a deployment record to storage
|
||||
func (hm *HistoryManager) saveDeployment(record *DeploymentRecord) error {
|
||||
if err := os.MkdirAll(hm.storagePath, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
filename := filepath.Join(hm.storagePath, record.ID+".json")
|
||||
data, err := json.MarshalIndent(record, "", " ")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return os.WriteFile(filename, data, 0644)
|
||||
}
|
||||
|
||||
// deleteDeploymentFile removes a deployment file from storage
|
||||
func (hm *HistoryManager) deleteDeploymentFile(id string) error {
|
||||
filename := filepath.Join(hm.storagePath, id+".json")
|
||||
return os.Remove(filename)
|
||||
}
|
||||
|
||||
// loadDeployments loads all deployments from storage
|
||||
func (hm *HistoryManager) loadDeployments() error {
|
||||
if _, err := os.Stat(hm.storagePath); os.IsNotExist(err) {
|
||||
return nil // Storage doesn't exist yet
|
||||
}
|
||||
|
||||
files, err := os.ReadDir(hm.storagePath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, file := range files {
|
||||
if file.IsDir() || !strings.HasSuffix(file.Name(), ".json") {
|
||||
continue
|
||||
}
|
||||
|
||||
filename := filepath.Join(hm.storagePath, file.Name())
|
||||
data, err := os.ReadFile(filename)
|
||||
if err != nil {
|
||||
continue // Skip files that can't be read
|
||||
}
|
||||
|
||||
var record DeploymentRecord
|
||||
if err := json.Unmarshal(data, &record); err != nil {
|
||||
continue // Skip invalid files
|
||||
}
|
||||
|
||||
hm.deployments[record.ID] = &record
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -0,0 +1,379 @@
|
||||
package deployment
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sort"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"containr/internal/docker"
|
||||
)
|
||||
|
||||
type Scheduler struct {
|
||||
nodes map[string]*Node
|
||||
mu sync.RWMutex
|
||||
dockerClient *docker.Client
|
||||
schedulingAlg SchedulingAlgorithm
|
||||
}
|
||||
|
||||
type Node struct {
|
||||
ID string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Address string `json:"address"`
|
||||
Status string `json:"status"`
|
||||
Capacity ResourceCapacity `json:"capacity"`
|
||||
Usage NodeResourceUsage `json:"usage"`
|
||||
Labels map[string]string `json:"labels"`
|
||||
LastHeartbeat time.Time `json:"last_heartbeat"`
|
||||
Containers []string `json:"containers"`
|
||||
}
|
||||
|
||||
type ResourceCapacity struct {
|
||||
CPU int64 `json:"cpu"` // CPU cores in nanoseconds
|
||||
Memory int64 `json:"memory"` // Memory in bytes
|
||||
Storage int64 `json:"storage"` // Storage in bytes
|
||||
Network int64 `json:"network"` // Network bandwidth in bytes per second
|
||||
}
|
||||
|
||||
type NodeResourceUsage struct {
|
||||
CPU float64 `json:"cpu"` // CPU usage percentage
|
||||
Memory int64 `json:"memory"` // Memory usage in bytes
|
||||
Storage int64 `json:"storage"` // Storage usage in bytes
|
||||
Network int64 `json:"network"` // Network usage in bytes per second
|
||||
}
|
||||
|
||||
type SchedulingAlgorithm string
|
||||
|
||||
const (
|
||||
SchedulingAlgorithmRoundRobin SchedulingAlgorithm = "round_robin"
|
||||
SchedulingAlgorithmLeastLoaded SchedulingAlgorithm = "least_loaded"
|
||||
SchedulingAlgorithmBestFit SchedulingAlgorithm = "best_fit"
|
||||
SchedulingAlgorithmRandom SchedulingAlgorithm = "random"
|
||||
)
|
||||
|
||||
type SchedulingDecision struct {
|
||||
NodeID string `json:"node_id"`
|
||||
Reason string `json:"reason"`
|
||||
Score float64 `json:"score"`
|
||||
Alternatives []NodeScore `json:"alternatives"`
|
||||
}
|
||||
|
||||
type NodeScore struct {
|
||||
NodeID string `json:"node_id"`
|
||||
Score float64 `json:"score"`
|
||||
Reason string `json:"reason"`
|
||||
}
|
||||
|
||||
func NewScheduler() *Scheduler {
|
||||
return &Scheduler{
|
||||
nodes: make(map[string]*Node),
|
||||
schedulingAlg: SchedulingAlgorithmLeastLoaded,
|
||||
}
|
||||
}
|
||||
|
||||
// RegisterNode registers a new node in the scheduler
|
||||
func (s *Scheduler) RegisterNode(node *Node) error {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
|
||||
if _, exists := s.nodes[node.ID]; exists {
|
||||
return fmt.Errorf("node already registered: %s", node.ID)
|
||||
}
|
||||
|
||||
node.Status = "ready"
|
||||
node.LastHeartbeat = time.Now()
|
||||
s.nodes[node.ID] = node
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// UnregisterNode removes a node from the scheduler
|
||||
func (s *Scheduler) UnregisterNode(nodeID string) error {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
|
||||
if _, exists := s.nodes[nodeID]; !exists {
|
||||
return fmt.Errorf("node not found: %s", nodeID)
|
||||
}
|
||||
|
||||
delete(s.nodes, nodeID)
|
||||
return nil
|
||||
}
|
||||
|
||||
// UpdateNode updates node information
|
||||
func (s *Scheduler) UpdateNode(node *Node) error {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
|
||||
if _, exists := s.nodes[node.ID]; !exists {
|
||||
return fmt.Errorf("node not found: %s", node.ID)
|
||||
}
|
||||
|
||||
node.LastHeartbeat = time.Now()
|
||||
s.nodes[node.ID] = node
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetNodes returns all registered nodes
|
||||
func (s *Scheduler) GetNodes() []*Node {
|
||||
s.mu.RLock()
|
||||
defer s.mu.RUnlock()
|
||||
|
||||
nodes := make([]*Node, 0, len(s.nodes))
|
||||
for _, node := range s.nodes {
|
||||
nodes = append(nodes, node)
|
||||
}
|
||||
|
||||
return nodes
|
||||
}
|
||||
|
||||
// GetReadyNodes returns only nodes that are ready for scheduling
|
||||
func (s *Scheduler) GetReadyNodes() []*Node {
|
||||
s.mu.RLock()
|
||||
defer s.mu.RUnlock()
|
||||
|
||||
nodes := make([]*Node, 0, len(s.nodes))
|
||||
for _, node := range s.nodes {
|
||||
if node.Status == "ready" && s.isNodeHealthy(node) {
|
||||
nodes = append(nodes, node)
|
||||
}
|
||||
}
|
||||
|
||||
return nodes
|
||||
}
|
||||
|
||||
// ScheduleContainer schedules a container to run on the best available node
|
||||
func (s *Scheduler) ScheduleContainer(ctx context.Context, requirements ResourceCapacity) (*SchedulingDecision, error) {
|
||||
readyNodes := s.GetReadyNodes()
|
||||
if len(readyNodes) == 0 {
|
||||
return nil, fmt.Errorf("no ready nodes available")
|
||||
}
|
||||
|
||||
var decision *SchedulingDecision
|
||||
|
||||
switch s.schedulingAlg {
|
||||
case SchedulingAlgorithmRoundRobin:
|
||||
decision = s.scheduleRoundRobin(readyNodes, requirements)
|
||||
case SchedulingAlgorithmLeastLoaded:
|
||||
decision = s.scheduleLeastLoaded(readyNodes, requirements)
|
||||
case SchedulingAlgorithmBestFit:
|
||||
decision = s.scheduleBestFit(readyNodes, requirements)
|
||||
case SchedulingAlgorithmRandom:
|
||||
decision = s.scheduleRandom(readyNodes, requirements)
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown scheduling algorithm: %s", s.schedulingAlg)
|
||||
}
|
||||
|
||||
if decision == nil {
|
||||
return nil, fmt.Errorf("failed to schedule container")
|
||||
}
|
||||
|
||||
return decision, nil
|
||||
}
|
||||
|
||||
// scheduleRoundRobin schedules containers in a round-robin fashion
|
||||
func (s *Scheduler) scheduleRoundRobin(nodes []*Node, requirements ResourceCapacity) *SchedulingDecision {
|
||||
// Find the node with the fewest containers
|
||||
var selectedNode *Node
|
||||
minContainers := int(^uint(0) >> 1) // Max int
|
||||
|
||||
for _, node := range nodes {
|
||||
if len(node.Containers) < minContainers && s.canFitRequirements(node, requirements) {
|
||||
selectedNode = node
|
||||
minContainers = len(node.Containers)
|
||||
}
|
||||
}
|
||||
|
||||
if selectedNode == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return &SchedulingDecision{
|
||||
NodeID: selectedNode.ID,
|
||||
Reason: "Round-robin scheduling",
|
||||
Score: 1.0,
|
||||
}
|
||||
}
|
||||
|
||||
// scheduleLeastLoaded schedules containers on the least loaded node
|
||||
func (s *Scheduler) scheduleLeastLoaded(nodes []*Node, requirements ResourceCapacity) *SchedulingDecision {
|
||||
var scores []NodeScore
|
||||
|
||||
for _, node := range nodes {
|
||||
if !s.canFitRequirements(node, requirements) {
|
||||
continue
|
||||
}
|
||||
|
||||
score := s.calculateLoadScore(node)
|
||||
scores = append(scores, NodeScore{
|
||||
NodeID: node.ID,
|
||||
Score: score,
|
||||
Reason: "Load-based score",
|
||||
})
|
||||
}
|
||||
|
||||
if len(scores) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Sort by score (highest first)
|
||||
sort.Slice(scores, func(i, j int) bool {
|
||||
return scores[i].Score > scores[j].Score
|
||||
})
|
||||
|
||||
selected := scores[0]
|
||||
|
||||
return &SchedulingDecision{
|
||||
NodeID: selected.NodeID,
|
||||
Reason: selected.Reason,
|
||||
Score: selected.Score,
|
||||
Alternatives: scores[1:],
|
||||
}
|
||||
}
|
||||
|
||||
// scheduleBestFit schedules containers on the node with the best resource fit
|
||||
func (s *Scheduler) scheduleBestFit(nodes []*Node, requirements ResourceCapacity) *SchedulingDecision {
|
||||
var scores []NodeScore
|
||||
|
||||
for _, node := range nodes {
|
||||
if !s.canFitRequirements(node, requirements) {
|
||||
continue
|
||||
}
|
||||
|
||||
score := s.calculateFitScore(node, requirements)
|
||||
scores = append(scores, NodeScore{
|
||||
NodeID: node.ID,
|
||||
Score: score,
|
||||
Reason: "Best-fit score",
|
||||
})
|
||||
}
|
||||
|
||||
if len(scores) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Sort by score (highest first)
|
||||
sort.Slice(scores, func(i, j int) bool {
|
||||
return scores[i].Score > scores[j].Score
|
||||
})
|
||||
|
||||
selected := scores[0]
|
||||
|
||||
return &SchedulingDecision{
|
||||
NodeID: selected.NodeID,
|
||||
Reason: selected.Reason,
|
||||
Score: selected.Score,
|
||||
Alternatives: scores[1:],
|
||||
}
|
||||
}
|
||||
|
||||
// scheduleRandom schedules containers on a random available node
|
||||
func (s *Scheduler) scheduleRandom(nodes []*Node, requirements ResourceCapacity) *SchedulingDecision {
|
||||
var availableNodes []*Node
|
||||
|
||||
for _, node := range nodes {
|
||||
if s.canFitRequirements(node, requirements) {
|
||||
availableNodes = append(availableNodes, node)
|
||||
}
|
||||
}
|
||||
|
||||
if len(availableNodes) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Simple random selection (in production, use proper random)
|
||||
selectedNode := availableNodes[0] // For simplicity, just pick the first one
|
||||
|
||||
return &SchedulingDecision{
|
||||
NodeID: selectedNode.ID,
|
||||
Reason: "Random selection",
|
||||
Score: 1.0,
|
||||
}
|
||||
}
|
||||
|
||||
// canFitRequirements checks if a node can accommodate the resource requirements
|
||||
func (s *Scheduler) canFitRequirements(node *Node, requirements ResourceCapacity) bool {
|
||||
availableCPU := node.Capacity.CPU - int64(node.Usage.CPU*float64(node.Capacity.CPU)/100)
|
||||
availableMemory := node.Capacity.Memory - node.Usage.Memory
|
||||
|
||||
return availableCPU >= requirements.CPU && availableMemory >= requirements.Memory
|
||||
}
|
||||
|
||||
// calculateLoadScore calculates a score based on node load
|
||||
func (s *Scheduler) calculateLoadScore(node *Node) float64 {
|
||||
// Lower load = higher score
|
||||
cpuLoad := node.Usage.CPU / 100.0
|
||||
memoryLoad := float64(node.Usage.Memory) / float64(node.Capacity.Memory)
|
||||
containerLoad := float64(len(node.Containers)) / 10.0 // Assume max 10 containers
|
||||
|
||||
// Combined load score (0-1, where 0 is no load and 1 is full load)
|
||||
combinedLoad := (cpuLoad + memoryLoad + containerLoad) / 3.0
|
||||
|
||||
// Convert to score where higher is better (1 - load)
|
||||
return 1.0 - combinedLoad
|
||||
}
|
||||
|
||||
// calculateFitScore calculates how well the requirements fit the node
|
||||
func (s *Scheduler) calculateFitScore(node *Node, requirements ResourceCapacity) float64 {
|
||||
availableCPU := node.Capacity.CPU - int64(node.Usage.CPU*float64(node.Capacity.CPU)/100)
|
||||
availableMemory := node.Capacity.Memory - node.Usage.Memory
|
||||
|
||||
// Calculate utilization after placing this container
|
||||
newCPUUtilization := float64(node.Capacity.CPU-availableCPU+requirements.CPU) / float64(node.Capacity.CPU)
|
||||
newMemoryUtilization := float64(node.Capacity.Memory-availableMemory+requirements.Memory) / float64(node.Capacity.Memory)
|
||||
|
||||
// Prefer moderate utilization (not too low, not too high)
|
||||
cpuScore := 1.0 - abs(newCPUUtilization-0.7)
|
||||
memoryScore := 1.0 - abs(newMemoryUtilization-0.7)
|
||||
|
||||
return (cpuScore + memoryScore) / 2.0
|
||||
}
|
||||
|
||||
// isNodeHealthy checks if a node is healthy based on heartbeat
|
||||
func (s *Scheduler) isNodeHealthy(node *Node) bool {
|
||||
return time.Since(node.LastHeartbeat) < 30*time.Second
|
||||
}
|
||||
|
||||
// abs returns the absolute value of a float64
|
||||
func abs(x float64) float64 {
|
||||
if x < 0 {
|
||||
return -x
|
||||
}
|
||||
return x
|
||||
}
|
||||
|
||||
// SetSchedulingAlgorithm sets the scheduling algorithm
|
||||
func (s *Scheduler) SetSchedulingAlgorithm(alg SchedulingAlgorithm) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
s.schedulingAlg = alg
|
||||
}
|
||||
|
||||
// GetNodeStats returns statistics about nodes
|
||||
func (s *Scheduler) GetNodeStats() map[string]interface{} {
|
||||
s.mu.RLock()
|
||||
defer s.mu.RUnlock()
|
||||
|
||||
totalNodes := len(s.nodes)
|
||||
readyNodes := 0
|
||||
unhealthyNodes := 0
|
||||
|
||||
for _, node := range s.nodes {
|
||||
if node.Status == "ready" {
|
||||
if s.isNodeHealthy(node) {
|
||||
readyNodes++
|
||||
} else {
|
||||
unhealthyNodes++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return map[string]interface{}{
|
||||
"total_nodes": totalNodes,
|
||||
"ready_nodes": readyNodes,
|
||||
"unhealthy_nodes": unhealthyNodes,
|
||||
"scheduling_alg": string(s.schedulingAlg),
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user