This commit is contained in:
Tomas Dvorak
2026-03-20 16:17:39 +01:00
parent dc3b7e22ee
commit a9a89bed7c
3 changed files with 35 additions and 5 deletions
+32
View File
@@ -92,8 +92,19 @@ var (
embeddedScraplingHelperOnce sync.Once
embeddedScraplingHelperFile string
embeddedScraplingHelperErr error
// Simple in-memory cache for fetched pages
pageCache = make(map[string]*cacheEntry)
pageCacheMu sync.RWMutex
)
type cacheEntry struct {
body []byte
timestamp time.Time
}
const cacheTTL = 5 * time.Minute
// NewCloudflareClient creates a new Cloudflare Browser Rendering API client
func NewCloudflareClient() *CloudflareClient {
accountID := strings.TrimSpace(os.Getenv("CLOUDFLARE_ACCOUNT_ID"))
@@ -592,8 +603,20 @@ func fetchPageWithFallback(url string) ([]byte, error) {
// fetchPageWithFallback tries Go HTTP first, then wget, then Scrapling, then Cloudflare Browser Rendering.
func fetchPageWithFallbackOptions(url string, opts fetchOptions) ([]byte, error) {
// Check cache first
pageCacheMu.RLock()
if entry, ok := pageCache[url]; ok {
if time.Since(entry.timestamp) < cacheTTL {
pageCacheMu.RUnlock()
log.Printf("Cache hit for %s", url)
return entry.body, nil
}
}
pageCacheMu.RUnlock()
body, err := fetchPageDirect(url, opts)
if err == nil {
cachePage(url, body)
return body, nil
}
log.Printf("Direct request failed for %s: %v", url, err)
@@ -601,6 +624,7 @@ func fetchPageWithFallbackOptions(url string, opts fetchOptions) ([]byte, error)
body, err = fetchPageWithWget(url, opts)
if err == nil {
log.Printf("Successfully retrieved content via wget for %s", url)
cachePage(url, body)
return body, nil
}
log.Printf("wget fallback failed for %s: %v", url, err)
@@ -608,6 +632,7 @@ func fetchPageWithFallbackOptions(url string, opts fetchOptions) ([]byte, error)
body, err = fetchPageWithScrapling(url, opts)
if err == nil {
log.Printf("Successfully retrieved content via Scrapling for %s", url)
cachePage(url, body)
return body, nil
}
log.Printf("Scrapling fallback failed for %s: %v", url, err)
@@ -629,6 +654,7 @@ func fetchPageWithFallbackOptions(url string, opts fetchOptions) ([]byte, error)
return nil, fmt.Errorf("Cloudflare crawl returned a challenge page")
}
log.Printf("Successfully retrieved content via Cloudflare crawl for %s", url)
cachePage(url, body)
return body, nil
}
@@ -639,6 +665,12 @@ func fetchPageWithFallbackOptions(url string, opts fetchOptions) ([]byte, error)
return nil, fmt.Errorf("go scraping failed, wget failed, Scrapling failed, and Cloudflare client is not available")
}
func cachePage(url string, body []byte) {
pageCacheMu.Lock()
pageCache[url] = &cacheEntry{body: body, timestamp: time.Now()}
pageCacheMu.Unlock()
}
// parseCompetitionMatchesFromFotbal scrapes matches from the public fotbal.cz
// competition page (e.g., https://www.fotbal.cz/souteze/turnaje/table/{id}).
// It filters to only include matches involving the given clubName if provided.