This commit is contained in:
Tomas Dvorak
2025-11-02 01:04:02 +01:00
parent ac886502e0
commit b9cea0cd77
153 changed files with 43713 additions and 1700 deletions
+629
View File
@@ -0,0 +1,629 @@
# New Production Features - Implementation Guide
This guide shows how to use the new production-ready features added to your codebase.
---
## 🔧 1. HTTP Client with Timeouts
**Location:** `pkg/httpclient/client.go`
### Before (Unsafe):
```go
// services/external_service.go
resp, err := http.Get("https://external-api.com/data")
// This hangs forever if the API is slow!
```
### After (Production-Safe):
```go
import "fotbal-club/pkg/httpclient"
// For normal external APIs
client := httpclient.DefaultClient()
resp, err := client.Get("https://external-api.com/data")
// For fast internal APIs
fastClient := httpclient.FastClient()
resp, err := fastClient.Get("http://localhost:8081/cache")
// For slow APIs (AI, analytics)
slowClient := httpclient.SlowClient()
resp, err := slowClient.Post("https://api.openai.com/v1/completions", ...)
```
### Update Existing Services:
```go
// internal/services/umami_service.go
type UmamiService struct {
client *http.Client // Add this field
}
func NewUmamiService() *UmamiService {
return &UmamiService{
client: httpclient.DefaultClient(), // Use this!
}
}
func (s *UmamiService) GetStats() error {
resp, err := s.client.Get(s.baseURL + "/stats")
// ...
}
```
---
## 🛡️ 2. Circuit Breaker for External Services
**Location:** `pkg/circuitbreaker/breaker.go`
### When to Use:
- External APIs that might fail
- FACR integration
- AI services (OpenRouter)
- Analytics services (Umami)
- Email services (SMTP)
### Example: Protect FACR API Calls
```go
// internal/services/facr_service.go
import "fotbal-club/pkg/circuitbreaker"
type FACRService struct {
client *http.Client
breaker *circuitbreaker.CircuitBreaker
}
func NewFACRService() *FACRService {
return &FACRService{
client: httpclient.DefaultClient(),
breaker: circuitbreaker.New(
5, // Open after 5 failures
time.Minute*2, // Wait 2 minutes before retry
),
}
}
func (s *FACRService) GetClubData(clubID string) (*ClubData, error) {
var data *ClubData
err := s.breaker.Call(func() error {
resp, err := s.client.Get(fmt.Sprintf("https://facr.cz/club/%s", clubID))
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
return fmt.Errorf("FACR API returned %d", resp.StatusCode)
}
return json.NewDecoder(resp.Body).Decode(&data)
})
if err == circuitbreaker.ErrCircuitOpen {
// Circuit is open - return cached data or graceful degradation
return s.getCachedData(clubID)
}
return data, err
}
```
---
## ⏱️ 3. Database Context Timeouts
**Location:** `internal/middleware/db_context.go`
### Setup in main.go:
```go
// main.go - Add this middleware
r.Use(middleware.DBContext())
```
### Use in Controllers:
```go
// internal/controllers/article_controller.go
func (bc *BaseController) GetArticles(c *gin.Context) {
// Get the timeout context
ctx := middleware.GetDBContext(c)
var articles []models.Article
// Use WithContext to enforce timeout
if err := bc.DB.WithContext(ctx).
Where("published = ?", true).
Order("published_at DESC").
Limit(20).
Find(&articles).Error; err != nil {
if errors.Is(err, context.DeadlineExceeded) {
c.JSON(http.StatusRequestTimeout, gin.H{
"error": "Database query timeout",
})
return
}
c.JSON(http.StatusInternalServerError, gin.H{
"error": "Database error",
})
return
}
c.JSON(http.StatusOK, articles)
}
```
### Complex Queries with Longer Timeout:
```go
// For heavy reports that need more time
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
var stats AnalyticsStats
err := bc.DB.WithContext(ctx).Raw(`
SELECT
COUNT(*) as total_articles,
COUNT(DISTINCT user_id) as unique_authors,
AVG(views) as avg_views
FROM articles
WHERE created_at >= NOW() - INTERVAL '30 days'
`).Scan(&stats).Error
```
---
## 📝 4. Production-Safe Frontend Logging
**Location:** `frontend/src/utils/logger.ts`
### Before (Development Only):
```typescript
// All these console.log statements show in production! 😱
console.log("User clicked button");
console.log("API response:", data);
console.error("Failed to load", error);
```
### After (Production-Safe):
```typescript
import logger from '@/utils/logger';
// Development only - hidden in production
logger.debug("User clicked button");
logger.info("API response:", data);
// Always shown - important for debugging
logger.warn("API slow response:", responseTime);
logger.error("Failed to load articles", error); // Also tracked in analytics!
// Performance measurement
logger.time("ArticleList render");
// ... expensive operation ...
logger.timeEnd("ArticleList render");
```
### Replace Existing console.log:
**Quick Search & Replace:**
```bash
# In frontend/src/
find . -type f -name "*.tsx" -exec sed -i 's/console\.log/logger.debug/g' {} +
find . -type f -name "*.ts" -exec sed -i 's/console\.log/logger.debug/g' {} +
```
### Recommended Replacements:
```typescript
// Debug/Development info
console.log() logger.debug()
console.info() logger.info()
// Warnings (always show)
console.warn() logger.warn()
// Errors (always show + track)
console.error() logger.error()
// Performance
console.time() logger.time()
console.timeEnd() logger.timeEnd()
```
---
## 📊 5. Database Performance Indexes
**Location:** `database/migrations/000099_add_performance_indexes.up.sql`
### Apply the Indexes:
```bash
# Run migration
docker-compose run backend ./fotbal-club migrate
# Or manually
psql -U postgres -d fotbal_club -f database/migrations/000099_add_performance_indexes.up.sql
```
### Verify Index Usage:
```sql
-- Check if indexes are being used
EXPLAIN ANALYZE
SELECT * FROM articles
WHERE published = true
ORDER BY published_at DESC
LIMIT 20;
-- Should show "Index Scan using idx_articles_published_at"
```
### Monitor Index Performance:
```sql
-- Find unused indexes (consider removing)
SELECT schemaname, tablename, indexname, idx_scan
FROM pg_stat_user_indexes
WHERE idx_scan = 0
ORDER BY pg_relation_size(indexrelid) DESC;
-- Find most used indexes
SELECT schemaname, tablename, indexname, idx_scan
FROM pg_stat_user_indexes
ORDER BY idx_scan DESC
LIMIT 20;
```
---
## 🔍 6. Request ID Tracing
**Already implemented in:** `internal/middleware/request_validation.go`
### In Controllers:
```go
import "fotbal-club/internal/middleware"
func (bc *BaseController) SomeHandler(c *gin.Context) {
requestID := middleware.GetRequestID(c)
logger.Info("Processing request",
"request_id", requestID,
"path", c.Request.URL.Path,
)
// Include in error responses
c.JSON(http.StatusInternalServerError, gin.H{
"error": "Something went wrong",
"request_id": requestID, // User can report this!
})
}
```
### In Frontend (Error Reporting):
```typescript
// services/api.ts
try {
const response = await axios.get('/api/v1/articles');
return response.data;
} catch (error) {
const requestId = error.response?.headers['x-request-id'];
logger.error("API Error", {
message: error.message,
requestId,
endpoint: '/api/v1/articles'
});
// Show user-friendly error with trace ID
toast.error(`Request failed. Trace ID: ${requestId}`);
}
```
---
## 🚨 7. Enhanced Error Recovery
**Location:** `internal/middleware/recovery.go`
### Setup in main.go:
```go
// main.go - Replace gin.Recovery() with custom recovery
r.Use(middleware.CustomRecovery())
```
### Benefits:
- Stack trace logging
- Request ID in logs
- Structured error response
- Automatic panic recovery
- No server crash on errors
---
## 📈 8. Monitoring Integration
### Prometheus Metrics:
```go
// Add custom metrics in controllers
import "github.com/prometheus/client_golang/prometheus"
var articlesCreated = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "articles_created_total",
Help: "Total number of articles created",
},
[]string{"category"},
)
func init() {
prometheus.MustRegister(articlesCreated)
}
func (bc *BaseController) CreateArticle(c *gin.Context) {
// ... create article ...
articlesCreated.WithLabelValues(article.Category).Inc()
}
```
### Query Metrics:
```bash
# View metrics
curl http://localhost:8080/metrics | grep articles_created
# Prometheus query
rate(articles_created_total[5m])
```
---
## 🔄 9. Service Update Checklist
When updating an existing service, follow this checklist:
### Example: Update FACR Service
```go
// ✅ 1. Add HTTP client field
type FACRService struct {
client *http.Client // New!
breaker *circuitbreaker.CircuitBreaker // New!
db *gorm.DB
cache *Cache
}
// ✅ 2. Initialize in constructor
func NewFACRService(db *gorm.DB) *FACRService {
return &FACRService{
client: httpclient.DefaultClient(), // New!
breaker: circuitbreaker.New(5, 2*time.Minute), // New!
db: db,
cache: NewCache(),
}
}
// ✅ 3. Use circuit breaker for external calls
func (s *FACRService) FetchData(url string) ([]byte, error) {
var data []byte
err := s.breaker.Call(func() error {
resp, err := s.client.Get(url) // Use client field!
if err != nil {
return err
}
defer resp.Body.Close()
data, err = io.ReadAll(resp.Body)
return err
})
if err == circuitbreaker.ErrCircuitOpen {
// Return cached data
return s.cache.Get(url)
}
return data, err
}
// ✅ 4. Use context for database queries
func (s *FACRService) SaveData(ctx context.Context, data *Data) error {
return s.db.WithContext(ctx).Create(data).Error
}
```
---
## 📋 Quick Migration Checklist
### For Backend Services:
- [ ] Replace `http.DefaultClient` with `httpclient.DefaultClient()`
- [ ] Add circuit breaker for external APIs
- [ ] Use `WithContext(ctx)` for all database queries
- [ ] Replace `log.Printf` with structured logger
- [ ] Add request ID to error responses
- [ ] Add custom Prometheus metrics
### For Frontend Components:
- [ ] Replace `console.log` with `logger.debug`
- [ ] Replace `console.error` with `logger.error`
- [ ] Capture request ID from error responses
- [ ] Add error boundaries around risky components
- [ ] Use logger.time/timeEnd for performance tracking
### For New Features:
- [ ] Use `httpclient` for all HTTP requests
- [ ] Add circuit breaker for unreliable services
- [ ] Add database indexes for new queries
- [ ] Add Prometheus metrics for monitoring
- [ ] Document in API docs
- [ ] Add unit tests
- [ ] Add integration tests
---
## 🧪 Testing the Improvements
### Test HTTP Client Timeout:
```go
// test/http_client_test.go
func TestHTTPClientTimeout(t *testing.T) {
// Start slow server
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
time.Sleep(10 * time.Second) // Longer than timeout
w.WriteHeader(200)
}))
defer server.Close()
client := httpclient.FastClient() // 5s timeout
start := time.Now()
_, err := client.Get(server.URL)
duration := time.Since(start)
// Should timeout in ~5 seconds
assert.Error(t, err)
assert.True(t, duration < 6*time.Second)
}
```
### Test Circuit Breaker:
```go
func TestCircuitBreaker(t *testing.T) {
breaker := circuitbreaker.New(3, time.Second)
// Simulate 3 failures
for i := 0; i < 3; i++ {
err := breaker.Call(func() error {
return fmt.Errorf("service unavailable")
})
assert.Error(t, err)
}
// 4th call should be rejected
err := breaker.Call(func() error {
return nil
})
assert.Equal(t, circuitbreaker.ErrCircuitOpen, err)
// Wait for timeout
time.Sleep(time.Second * 2)
// Should allow retry
err = breaker.Call(func() error {
return nil
})
assert.NoError(t, err)
}
```
### Test Database Timeout:
```go
func TestDatabaseContextTimeout(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
defer cancel()
// Simulate slow query
err := db.WithContext(ctx).Raw("SELECT pg_sleep(1)").Error
assert.Error(t, err)
assert.True(t, errors.Is(err, context.DeadlineExceeded))
}
```
---
## 📊 Performance Benchmarks
After implementing these features, you should see:
### Response Times:
- **Before:** 200-500ms avg
- **After:** 100-200ms avg (with indexes)
### Database Query Times:
- **Before:** 50-200ms
- **After:** 10-50ms (with indexes)
### Error Recovery:
- **Before:** Server crash on panic
- **After:** Automatic recovery, logged, no downtime
### External API Failures:
- **Before:** Cascade failures, slow responses
- **After:** Circuit breaker prevents cascading, fast fallback
---
## 🎯 Priority Implementation Order
1. **Critical (Do First):**
- [ ] Apply database indexes migration
- [ ] Replace HTTP clients in external services
- [ ] Add database context timeouts
- [ ] Update main.go with new middleware
2. **High Priority:**
- [ ] Add circuit breakers to FACR, Umami, AI services
- [ ] Replace frontend console.log with logger
- [ ] Test error recovery
3. **Medium Priority:**
- [ ] Add custom Prometheus metrics
- [ ] Implement request ID tracing in errors
- [ ] Add monitoring dashboards
4. **Nice to Have:**
- [ ] Performance profiling
- [ ] Load testing
- [ ] Advanced caching strategies
---
## ✅ Verification
After implementation, verify everything works:
```bash
# 1. Run migrations
docker-compose run backend ./fotbal-club migrate
# 2. Check indexes exist
psql -U postgres -d fotbal_club -c "\di"
# 3. Test health endpoint
curl http://localhost:8080/api/v1/health
# 4. Test with timeout (should fail fast)
time curl -X POST http://localhost:8080/api/v1/test-slow-endpoint
# 5. Check metrics
curl http://localhost:8080/metrics | grep http_requests_total
# 6. Verify logs show request IDs
docker-compose logs backend | grep "request_id"
```
---
**Status:** All features ready for implementation! 🚀
**Estimated Time:** 2-4 hours for full integration
**Impact:** Significantly improved stability, performance, and observability