feat: initial implementation of container management platform

This commit is contained in:
Tomas Dvorak
2026-02-16 10:18:05 +01:00
commit ffa5489dc1
167 changed files with 55910 additions and 0 deletions
+360
View File
@@ -0,0 +1,360 @@
package networking
import (
"context"
"fmt"
"net"
"strings"
"sync"
"time"
"github.com/miekg/dns"
)
// DNSServer provides internal DNS resolution for services
type DNSServer struct {
server *dns.Server
serviceDiscovery *ServiceDiscovery
domain string
addresses []string
mu sync.RWMutex
}
// DNSConfig holds DNS server configuration
type DNSConfig struct {
Domain string `json:"domain"`
Addresses []string `json:"addresses"`
Port int `json:"port"`
Upstream []string `json:"upstream"`
}
// NewDNSServer creates a new DNS server
func NewDNSServer(config DNSConfig, serviceDiscovery *ServiceDiscovery) *DNSServer {
return &DNSServer{
domain: config.Domain,
addresses: config.Addresses,
serviceDiscovery: serviceDiscovery,
}
}
// Start starts the DNS server
func (d *DNSServer) Start(ctx context.Context) error {
d.mu.Lock()
defer d.mu.Unlock()
// Create DNS handler
handler := dns.NewServeMux()
handler.HandleFunc(d.domain, d.handleServiceRequest)
handler.HandleFunc("in-addr.arpa.", d.handleReverseRequest)
// Create server
d.server = &dns.Server{
Addr: ":53",
Net: "udp",
Handler: handler,
}
// Start server in goroutine
go func() {
if err := d.server.ListenAndServe(); err != nil {
fmt.Printf("DNS server error: %v\n", err)
}
}()
return nil
}
// Stop stops the DNS server
func (d *DNSServer) Stop() error {
d.mu.Lock()
defer d.mu.Unlock()
if d.server != nil {
return d.server.Shutdown()
}
return nil
}
// handleServiceRequest handles DNS requests for services
func (d *DNSServer) handleServiceRequest(w dns.ResponseWriter, r *dns.Msg) {
msg := new(dns.Msg)
msg.SetReply(r)
msg.Authoritative = true
for _, question := range r.Question {
if question.Qtype == dns.TypeA {
// Extract service name from query
serviceName := d.extractServiceName(question.Name)
if serviceName == "" {
continue
}
// Resolve service
ips, err := d.serviceDiscovery.ResolveService(context.Background(), serviceName, "")
if err != nil {
continue
}
// Create A records
for _, ip := range ips {
rr, err := dns.NewRR(fmt.Sprintf("%s 30 IN A %s", question.Name, ip))
if err == nil {
msg.Answer = append(msg.Answer, rr)
}
}
} else if question.Qtype == dns.TypeSRV {
// Handle SRV requests for service discovery
serviceName := d.extractServiceName(question.Name)
if serviceName == "" {
continue
}
// Get service instances
instances, err := d.serviceDiscovery.DiscoverService(context.Background(), serviceName, "")
if err != nil {
continue
}
// Create SRV records
for _, instance := range instances {
target := fmt.Sprintf("%s.%s", instance.ServiceName, d.domain)
srv := fmt.Sprintf("%s 30 IN SRV 10 5 %d %s", question.Name, instance.Port, target)
rr, err := dns.NewRR(srv)
if err == nil {
msg.Answer = append(msg.Answer, rr)
}
}
}
}
w.WriteMsg(msg)
}
// handleReverseRequest handles reverse DNS lookups
func (d *DNSServer) handleReverseRequest(w dns.ResponseWriter, r *dns.Msg) {
msg := new(dns.Msg)
msg.SetReply(r)
msg.Authoritative = true
for _, question := range r.Question {
if question.Qtype == dns.TypePTR {
// Extract IP from reverse query
ip := d.extractIPFromReverse(question.Name)
if ip == "" {
continue
}
// Find service by IP
var serviceName string
for _, instance := range d.serviceDiscovery.services {
if instance.IPAddress == ip {
serviceName = instance.ServiceName
break
}
}
if serviceName != "" {
ptr := fmt.Sprintf("%s 30 IN PTR %s.%s", question.Name, serviceName, d.domain)
rr, err := dns.NewRR(ptr)
if err == nil {
msg.Answer = append(msg.Answer, rr)
}
}
}
}
w.WriteMsg(msg)
}
// extractServiceName extracts service name from DNS query
func (d *DNSServer) extractServiceName(query string) string {
// Remove domain suffix
if strings.HasSuffix(query, d.domain) {
name := strings.TrimSuffix(query, d.domain)
name = strings.Trim(name, ".")
return name
}
return ""
}
// extractIPFromReverse extracts IP from reverse DNS query
func (d *DNSServer) extractIPFromReverse(reverse string) string {
// Handle IPv4 reverse lookup
if strings.HasSuffix(reverse, "in-addr.arpa.") {
parts := strings.Split(reverse, ".")
if len(parts) >= 4 {
// Reverse the first 4 parts to get IP
ip := fmt.Sprintf("%s.%s.%s.%s", parts[3], parts[2], parts[1], parts[0])
return ip
}
}
return ""
}
// DNSClient provides DNS resolution utilities
type DNSClient struct {
servers []string
timeout time.Duration
}
// NewDNSClient creates a new DNS client
func NewDNSClient(servers []string) *DNSClient {
return &DNSClient{
servers: servers,
timeout: 5 * time.Second,
}
}
// ResolveService resolves a service name using DNS
func (c *DNSClient) ResolveService(serviceName, domain string) ([]string, error) {
fqdn := fmt.Sprintf("%s.%s", serviceName, domain)
// Create DNS message
msg := new(dns.Msg)
msg.SetQuestion(dns.Fqdn(fqdn), dns.TypeA)
msg.RecursionDesired = true
// Try each DNS server
for _, server := range c.servers {
client := &dns.Client{Timeout: c.timeout}
response, _, err := client.Exchange(msg, server+":53")
if err != nil {
continue
}
if len(response.Answer) > 0 {
var ips []string
for _, answer := range response.Answer {
if a, ok := answer.(*dns.A); ok {
ips = append(ips, a.A.String())
}
}
return ips, nil
}
}
return nil, fmt.Errorf("failed to resolve service: %s", serviceName)
}
// ResolveSRV resolves SRV records for a service
func (c *DNSClient) ResolveSRV(serviceName, domain string) ([]*SRVRecord, error) {
fqdn := fmt.Sprintf("_%s._tcp.%s", serviceName, domain)
// Create DNS message
msg := new(dns.Msg)
msg.SetQuestion(dns.Fqdn(fqdn), dns.TypeSRV)
msg.RecursionDesired = true
// Try each DNS server
for _, server := range c.servers {
client := &dns.Client{Timeout: c.timeout}
response, _, err := client.Exchange(msg, server+":53")
if err != nil {
continue
}
if len(response.Answer) > 0 {
var records []*SRVRecord
for _, answer := range response.Answer {
if srv, ok := answer.(*dns.SRV); ok {
record := &SRVRecord{
Priority: srv.Priority,
Weight: srv.Weight,
Port: srv.Port,
Target: srv.Target,
}
records = append(records, record)
}
}
return records, nil
}
}
return nil, fmt.Errorf("failed to resolve SRV record for service: %s", serviceName)
}
// SRVRecord represents an SRV record
type SRVRecord struct {
Priority uint16
Weight uint16
Port uint16
Target string
}
// NetworkUtils provides network utility functions
type NetworkUtils struct{}
// NewNetworkUtils creates a new network utils instance
func NewNetworkUtils() *NetworkUtils {
return &NetworkUtils{}
}
// GetLocalIP returns the local IP address
func (nu *NetworkUtils) GetLocalIP() (string, error) {
addrs, err := net.InterfaceAddrs()
if err != nil {
return "", err
}
for _, addr := range addrs {
if ipnet, ok := addr.(*net.IPNet); ok && !ipnet.IP.IsLoopback() {
if ipnet.IP.To4() != nil {
return ipnet.IP.String(), nil
}
}
}
return "", fmt.Errorf("no local IP address found")
}
// IsPortOpen checks if a port is open on a host
func (nu *NetworkUtils) IsPortOpen(host string, port int, timeout time.Duration) bool {
address := fmt.Sprintf("%s:%d", host, port)
conn, err := net.DialTimeout("tcp", address, timeout)
if err != nil {
return false
}
conn.Close()
return true
}
// WaitForPort waits for a port to become available
func (nu *NetworkUtils) WaitForPort(host string, port int, timeout time.Duration) error {
start := time.Now()
for time.Since(start) < timeout {
if nu.IsPortOpen(host, port, 1*time.Second) {
return nil
}
time.Sleep(1 * time.Second)
}
return fmt.Errorf("port %d on %s did not become available within %v", port, host, timeout)
}
// GenerateSubnet generates a subnet for a project
func (nu *NetworkUtils) GenerateSubnet(projectID string) string {
// Simple hash-based subnet generation
hash := 0
for _, c := range projectID {
hash = hash*31 + int(c)
}
if hash < 0 {
hash = -hash
}
// Generate 10.x.y.0/24 subnet
octet2 := (hash % 254) + 1
octet3 := ((hash / 254) % 254) + 1
return fmt.Sprintf("10.%d.%d.0/24", octet2, octet3)
}
// GetAvailablePort finds an available port in a range
func (nu *NetworkUtils) GetAvailablePort(start, end int) (int, error) {
for port := start; port <= end; port++ {
listener, err := net.Listen("tcp", fmt.Sprintf(":%d", port))
if err == nil {
listener.Close()
return port, nil
}
}
return 0, fmt.Errorf("no available ports in range %d-%d", start, end)
}
+446
View File
@@ -0,0 +1,446 @@
package networking
import (
"context"
"fmt"
"net"
"sync"
"time"
"containr/internal/deployment"
)
// ServiceDiscovery handles service registration, discovery, and DNS resolution
type ServiceDiscovery struct {
services map[string]*ServiceInstance
instances map[string][]*ServiceInstance
mu sync.RWMutex
scheduler *deployment.Scheduler
dnsDomain string
loadBalancer *LoadBalancer
}
// ServiceInstance represents a running instance of a service
type ServiceInstance struct {
ID string `json:"id"`
ServiceID string `json:"service_id"`
ServiceName string `json:"service_name"`
ProjectID string `json:"project_id"`
NodeID string `json:"node_id"`
IPAddress string `json:"ip_address"`
Port int `json:"port"`
Status string `json:"status"`
Health HealthStatus `json:"health"`
Labels map[string]string `json:"labels"`
Metadata map[string]string `json:"metadata"`
CreatedAt time.Time `json:"created_at"`
LastSeen time.Time `json:"last_seen"`
}
// HealthStatus represents the health status of a service instance
type HealthStatus struct {
Status string `json:"status"` // healthy, unhealthy, unknown
LastCheck time.Time `json:"last_check"`
CheckCount int `json:"check_count"`
FailureCount int `json:"failure_count"`
Message string `json:"message"`
}
// ServiceRegistry represents the service registry
type ServiceRegistry struct {
Name string `json:"name"`
Namespace string `json:"namespace"`
Instances []*ServiceInstance `json:"instances"`
Selector map[string]string `json:"selector"`
Ports []ServicePort `json:"ports"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
}
// ServicePort represents a service port configuration
type ServicePort struct {
Name string `json:"name"`
Port int `json:"port"`
TargetPort int `json:"target_port"`
Protocol string `json:"protocol"`
}
// LoadBalancer handles load balancing across service instances
type LoadBalancer struct {
strategy LoadBalancingStrategy
mu sync.RWMutex
}
type LoadBalancingStrategy string
const (
StrategyRoundRobin LoadBalancingStrategy = "round_robin"
StrategyLeastConnections LoadBalancingStrategy = "least_connections"
StrategyIPHash LoadBalancingStrategy = "ip_hash"
StrategyRandom LoadBalancingStrategy = "random"
)
// DNSRecord represents a DNS record for a service
type DNSRecord struct {
Name string `json:"name"`
Type string `json:"type"` // A, SRV, CNAME
TTL int `json:"ttl"`
Records []string `json:"records"` // IP addresses or hostnames
Priority int `json:"priority"` // For SRV records
Weight int `json:"weight"` // For SRV records
Port int `json:"port"` // For SRV records
}
// NewServiceDiscovery creates a new service discovery instance
func NewServiceDiscovery(scheduler *deployment.Scheduler, dnsDomain string) *ServiceDiscovery {
return &ServiceDiscovery{
services: make(map[string]*ServiceInstance),
instances: make(map[string][]*ServiceInstance),
scheduler: scheduler,
dnsDomain: dnsDomain,
loadBalancer: NewLoadBalancer(StrategyRoundRobin),
}
}
// RegisterService registers a new service instance
func (sd *ServiceDiscovery) RegisterService(ctx context.Context, instance *ServiceInstance) error {
sd.mu.Lock()
defer sd.mu.Unlock()
// Validate instance
if instance.ServiceName == "" || instance.IPAddress == "" {
return fmt.Errorf("service name and IP address are required")
}
// Set defaults
if instance.Status == "" {
instance.Status = "starting"
}
if instance.Health.Status == "" {
instance.Health.Status = "unknown"
}
instance.CreatedAt = time.Now()
instance.LastSeen = time.Now()
// Store instance
sd.services[instance.ID] = instance
// Add to service instances map
serviceKey := sd.getServiceKey(instance.ServiceName, instance.ProjectID)
sd.instances[serviceKey] = append(sd.instances[serviceKey], instance)
// Start health checking
go sd.startHealthCheck(instance)
return nil
}
// UnregisterService removes a service instance
func (sd *ServiceDiscovery) UnregisterService(ctx context.Context, instanceID string) error {
sd.mu.Lock()
defer sd.mu.Unlock()
instance, exists := sd.services[instanceID]
if !exists {
return fmt.Errorf("service instance not found: %s", instanceID)
}
// Remove from services map
delete(sd.services, instanceID)
// Remove from instances map
serviceKey := sd.getServiceKey(instance.ServiceName, instance.ProjectID)
instances := sd.instances[serviceKey]
for i, inst := range instances {
if inst.ID == instanceID {
sd.instances[serviceKey] = append(instances[:i], instances[i+1:]...)
break
}
}
return nil
}
// DiscoverService finds service instances by name and project
func (sd *ServiceDiscovery) DiscoverService(ctx context.Context, serviceName, projectID string) ([]*ServiceInstance, error) {
sd.mu.RLock()
defer sd.mu.RUnlock()
serviceKey := sd.getServiceKey(serviceName, projectID)
instances, exists := sd.instances[serviceKey]
if !exists {
return nil, fmt.Errorf("service not found: %s", serviceName)
}
// Filter healthy instances only
var healthyInstances []*ServiceInstance
for _, instance := range instances {
if instance.Health.Status == "healthy" && instance.Status == "running" {
healthyInstances = append(healthyInstances, instance)
}
}
if len(healthyInstances) == 0 {
return nil, fmt.Errorf("no healthy instances found for service: %s", serviceName)
}
return healthyInstances, nil
}
// GetServiceEndpoints returns all endpoints for a service
func (sd *ServiceDiscovery) GetServiceEndpoints(ctx context.Context, serviceName, projectID string) ([]string, error) {
instances, err := sd.DiscoverService(ctx, serviceName, projectID)
if err != nil {
return nil, err
}
var endpoints []string
for _, instance := range instances {
endpoint := fmt.Sprintf("%s:%d", instance.IPAddress, instance.Port)
endpoints = append(endpoints, endpoint)
}
return endpoints, nil
}
// ResolveService resolves a service name to IP addresses (DNS-like functionality)
func (sd *ServiceDiscovery) ResolveService(ctx context.Context, serviceName, projectID string) ([]string, error) {
instances, err := sd.DiscoverService(ctx, serviceName, projectID)
if err != nil {
return nil, err
}
var ips []string
for _, instance := range instances {
ips = append(ips, instance.IPAddress)
}
return ips, nil
}
// GetDNSRecords generates DNS records for all services
func (sd *ServiceDiscovery) GetDNSRecords(ctx context.Context) ([]*DNSRecord, error) {
sd.mu.RLock()
defer sd.mu.RUnlock()
var records []*DNSRecord
// Group instances by service
serviceGroups := make(map[string][]*ServiceInstance)
for _, instance := range sd.services {
if instance.Health.Status == "healthy" && instance.Status == "running" {
serviceGroups[instance.ServiceName] = append(serviceGroups[instance.ServiceName], instance)
}
}
// Create A records for each service
for serviceName, instances := range serviceGroups {
var ips []string
for _, instance := range instances {
ips = append(ips, instance.IPAddress)
}
if len(ips) > 0 {
// Create A record
fqdn := fmt.Sprintf("%s.%s", serviceName, sd.dnsDomain)
record := &DNSRecord{
Name: fqdn,
Type: "A",
TTL: 30,
Records: ips,
}
records = append(records, record)
// Create SRV record for services with ports
if len(instances) > 0 && instances[0].Port > 0 {
srvRecord := &DNSRecord{
Name: fmt.Sprintf("_%s._tcp.%s", serviceName, sd.dnsDomain),
Type: "SRV",
TTL: 30,
Port: instances[0].Port,
Records: []string{fqdn},
Priority: 10,
Weight: 5,
}
records = append(records, srvRecord)
}
}
}
return records, nil
}
// UpdateServiceHealth updates the health status of a service instance
func (sd *ServiceDiscovery) UpdateServiceHealth(ctx context.Context, instanceID string, health HealthStatus) error {
sd.mu.Lock()
defer sd.mu.Unlock()
instance, exists := sd.services[instanceID]
if !exists {
return fmt.Errorf("service instance not found: %s", instanceID)
}
instance.Health = health
instance.LastSeen = time.Now()
return nil
}
// GetServiceStats returns statistics about services
func (sd *ServiceDiscovery) GetServiceStats(ctx context.Context) map[string]interface{} {
sd.mu.RLock()
defer sd.mu.RUnlock()
totalServices := len(sd.instances)
healthyInstances := 0
unhealthyInstances := 0
for _, instance := range sd.services {
if instance.Health.Status == "healthy" {
healthyInstances++
} else if instance.Health.Status == "unhealthy" {
unhealthyInstances++
}
}
return map[string]interface{}{
"total_services": totalServices,
"healthy_instances": healthyInstances,
"unhealthy_instances": unhealthyInstances,
"dns_domain": sd.dnsDomain,
"load_balancer": string(sd.loadBalancer.strategy),
}
}
// getServiceKey creates a unique key for a service
func (sd *ServiceDiscovery) getServiceKey(serviceName, projectID string) string {
return fmt.Sprintf("%s:%s", projectID, serviceName)
}
// startHealthCheck starts periodic health checking for a service instance
func (sd *ServiceDiscovery) startHealthCheck(instance *ServiceInstance) {
ticker := time.NewTicker(30 * time.Second)
defer ticker.Stop()
for {
select {
case <-ticker.C:
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
healthy := sd.checkInstanceHealth(ctx, instance)
health := HealthStatus{
LastCheck: time.Now(),
}
if healthy {
health.Status = "healthy"
health.CheckCount++
health.FailureCount = 0
health.Message = "Health check passed"
} else {
health.Status = "unhealthy"
health.CheckCount++
health.FailureCount++
health.Message = "Health check failed"
}
sd.UpdateServiceHealth(ctx, instance.ID, health)
cancel()
}
}
}
// checkInstanceHealth performs a health check on a service instance
func (sd *ServiceDiscovery) checkInstanceHealth(ctx context.Context, instance *ServiceInstance) bool {
// Simple TCP connection check
if instance.Port > 0 {
address := fmt.Sprintf("%s:%d", instance.IPAddress, instance.Port)
conn, err := net.DialTimeout("tcp", address, 5*time.Second)
if err != nil {
return false
}
conn.Close()
return true
}
// If no port specified, assume healthy
return true
}
// NewLoadBalancer creates a new load balancer
func NewLoadBalancer(strategy LoadBalancingStrategy) *LoadBalancer {
return &LoadBalancer{
strategy: strategy,
}
}
// SelectInstance selects an instance using the configured load balancing strategy
func (lb *LoadBalancer) SelectInstance(instances []*ServiceInstance, clientIP string) *ServiceInstance {
lb.mu.RLock()
defer lb.mu.RUnlock()
if len(instances) == 0 {
return nil
}
switch lb.strategy {
case StrategyRoundRobin:
return lb.roundRobinSelect(instances)
case StrategyLeastConnections:
return lb.leastConnectionsSelect(instances)
case StrategyIPHash:
return lb.ipHashSelect(instances, clientIP)
case StrategyRandom:
return lb.randomSelect(instances)
default:
return instances[0]
}
}
// roundRobinSelect implements round-robin load balancing
func (lb *LoadBalancer) roundRobinSelect(instances []*ServiceInstance) *ServiceInstance {
// Simple implementation - in production, maintain round-robin state
return instances[0]
}
// leastConnectionsSelect selects instance with least connections
func (lb *LoadBalancer) leastConnectionsSelect(instances []*ServiceInstance) *ServiceInstance {
var selected *ServiceInstance
minConnections := int(^uint(0) >> 1) // Max int
for _, instance := range instances {
// In a real implementation, track actual connections
connections := 0 // Placeholder
if connections < minConnections {
selected = instance
minConnections = connections
}
}
return selected
}
// ipHashSelect selects instance based on client IP hash
func (lb *LoadBalancer) ipHashSelect(instances []*ServiceInstance, clientIP string) *ServiceInstance {
if clientIP == "" {
return instances[0]
}
hash := 0
for _, c := range clientIP {
hash = hash*31 + int(c)
}
if hash < 0 {
hash = -hash
}
index := hash % len(instances)
return instances[index]
}
// randomSelect selects a random instance
func (lb *LoadBalancer) randomSelect(instances []*ServiceInstance) *ServiceInstance {
// Simple implementation - in production, use proper random
return instances[0]
}