chore: cleanup temporary build artifacts and patches

Remove obsolete Dockerfile patches, binary scrapers, and test execution scripts to clean up the repository. - Delete Dockerfile.patch - Remove facr-scraper and facr-scraper-new binaries - Remove test_exec script
feat(scraper): implement CloakBrowser support and enhance request stealth
2026-06-03 20:12:57 +00:00 · 2026-05-17 18:10:21 +02:00 · 2026-05-17 17:52:52 +02:00
8 changed files with 602 additions and 23 deletions
@@ -31,6 +31,10 @@ RUN pip install --no-cache-dir -r requirements-scrapling.txt
 # Install Playwright browsers with deps in one layer
 RUN playwright install chromium --with-deps

+# Pre-download CloakBrowser patched Chromium binary so it doesn't
+# download at runtime (saves ~10-20s per cold-start request).
+RUN python -m cloakbrowser install
+
 # Fix Python symlinks
 RUN ln -sf /usr/local/bin/python /opt/scrapling/bin/python \
    && ln -sf /usr/local/bin/python /opt/scrapling/bin/python3
@@ -50,6 +54,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
    libxkbcommon0 libatspi2.0-0 libx11-6 libxcomposite1 libxdamage1 \
    libxext6 libxfixes3 libxrandr2 libgbm1 libcairo2 libpango-1.0-0 \
    libasound2 \
+    fonts-liberation fonts-noto-color-emoji fonts-noto-core \
+    fontconfig locales \
    && rm -rf /var/lib/apt/lists/*

 # Create non-root user
@@ -64,10 +70,14 @@ COPY --from=python-builder /opt/scrapling /opt/scrapling
 # Copy Playwright browser cache
 COPY --from=python-builder /root/.cache/ms-playwright /home/scraper/.cache/ms-playwright

-# Copy scrapling script
-COPY scripts/scrapling_fetch.py /opt/scrapling/scripts/scrapling_fetch.py
+# Copy CloakBrowser patched Chromium binary cache
+COPY --from=python-builder /root/.cloakbrowser /home/scraper/.cloakbrowser

-# Create cache directory for Playwright
+# Copy scrapling and cloakbrowser scripts
+COPY scripts/scrapling_fetch.py /opt/scrapling/scripts/scrapling_fetch.py
+COPY scripts/cloakbrowser_fetch.py /opt/scrapling/scripts/cloakbrowser_fetch.py
+
+# Create cache directory and fix permissions
 RUN mkdir -p /home/scraper/.cache && chown -R scraper:scraper /home/scraper /opt/scrapling

 USER scraper
@@ -0,0 +1,2 @@
+FROM facr-scraper:cloakbrowser
+COPY facr-scraper /usr/local/bin/facr-scraper
@@ -11,6 +11,10 @@ services:
      - CLOUDFLARE_API_TOKEN=${CLOUDFLARE_API_TOKEN}
      - SCRAPLING_PYTHON_BIN=/opt/scrapling/bin/python
      - SCRAPLING_SCRIPT=/opt/scrapling/scripts/scrapling_fetch.py
+      - CLOAKBROWSER_PYTHON_BIN=/opt/scrapling/bin/python
+      - CLOAKBROWSER_SCRIPT=/opt/scrapling/scripts/cloakbrowser_fetch.py
+      - CLOAKBROWSER_TIMEZONE=${CLOAKBROWSER_TIMEZONE:-Europe/Prague}
+      - CLOAKBROWSER_LOCALE=${CLOAKBROWSER_LOCALE:-cs-CZ}
      - DEBUG_SAVE_HTML=${DEBUG_SAVE_HTML:-}
    restart: unless-stopped
    deploy:
@@ -9,6 +9,7 @@ import (
 	"io"
 	"log"
 	"net/http"
+	"net/http/cookiejar"
 	neturl "net/url"
 	"os"
 	"os/exec"
@@ -22,6 +23,7 @@ import (

 	"github.com/PuerkitoBio/goquery"
 	"github.com/gorilla/mux"
+	"golang.org/x/net/publicsuffix"
 )

 type Competition struct {
@@ -56,6 +58,10 @@ var (
 	// Simple in-memory cache for fetched pages
 	pageCache   = make(map[string]*cacheEntry)
 	pageCacheMu sync.RWMutex
+
+	// Club response cache for expensive multi-fetch endpoints
+	clubCache   = make(map[string]*clubCacheEntry)
+	clubCacheMu sync.RWMutex
 )

 type cacheEntry struct {
@@ -63,7 +69,13 @@ type cacheEntry struct {
 	timestamp time.Time
 }

+type clubCacheEntry struct {
+	data      []byte
+	timestamp time.Time
+}
+
 const cacheTTL = 15 * time.Minute
+const clubCacheTTL = 30 * time.Minute

 // domainBreakers is a per-domain circuit breaker map so failures on one site
 // don't block Scrapling for unrelated sites.
@@ -76,6 +88,60 @@ var domainBreakers struct {
 // and resource exhaustion.
 var scraplingSem = newSemaphore(2)

+// domainReqSem limits concurrent requests to the same domain to avoid
+// triggering Cloudflare rate-limiting.
+var domainReqSem struct {
+	mu   sync.RWMutex
+	sems map[string]*semaphore
+}
+
+func getDomainReqSem(domain string) *semaphore {
+	domainReqSem.mu.RLock()
+	if s, ok := domainReqSem.sems[domain]; ok {
+		domainReqSem.mu.RUnlock()
+		return s
+	}
+	domainReqSem.mu.RUnlock()
+
+	domainReqSem.mu.Lock()
+	defer domainReqSem.mu.Unlock()
+	if s, ok := domainReqSem.sems[domain]; ok {
+		return s
+	}
+	s := newSemaphore(1)
+	if domainReqSem.sems == nil {
+		domainReqSem.sems = make(map[string]*semaphore)
+	}
+	domainReqSem.sems[domain] = s
+	return s
+}
+
+// sharedHTTPClient is a reusable client with a cookie jar so that cookies
+// (including any Cloudflare clearance) survive across requests.
+var sharedHTTPClient = func() *http.Client {
+	jar, err := cookiejar.New(&cookiejar.Options{PublicSuffixList: publicsuffix.List})
+	if err != nil {
+		log.Printf("failed to create cookie jar, falling back to default client: %v", err)
+		return &http.Client{Timeout: 15 * time.Second}
+	}
+	return &http.Client{
+		Timeout: 15 * time.Second,
+		Jar:     jar,
+		CheckRedirect: func(req *http.Request, via []*http.Request) error {
+			if len(via) >= 10 {
+				return fmt.Errorf("stopped after 10 redirects")
+			}
+			// Preserve headers across redirects
+			for _, h := range []string{"User-Agent", "Accept", "Accept-Language", "Referer", "Sec-Ch-Ua", "Sec-Ch-Ua-Mobile", "Sec-Ch-Ua-Platform", "Sec-Fetch-Dest", "Sec-Fetch-Mode", "Sec-Fetch-Site", "Upgrade-Insecure-Requests"} {
+				if v := via[len(via)-1].Header.Get(h); v != "" {
+					req.Header.Set(h, v)
+				}
+			}
+			return nil
+		},
+	}
+}()
+
 type circuitBreaker struct {
 	failures  int32
 	lastFail  time.Time
@@ -163,8 +229,21 @@ func newBrowserRequest(url string, opts fetchOptions) (*http.Request, error) {
 	req.Header.Set("User-Agent", browserUserAgent)
 	req.Header.Set("Accept", browserAccept)
 	req.Header.Set("Accept-Language", browserAcceptLanguage)
+	req.Header.Set("Accept-Encoding", "gzip, deflate, br")
+	req.Header.Set("Connection", "keep-alive")
+	req.Header.Set("Upgrade-Insecure-Requests", "1")
+	req.Header.Set("Sec-Ch-Ua", `"Not/A)Brand";v="8", "Chromium";v="126", "Google Chrome";v="126"`)
+	req.Header.Set("Sec-Ch-Ua-Mobile", "?0")
+	req.Header.Set("Sec-Ch-Ua-Platform", `"Windows"`)
+	req.Header.Set("Sec-Fetch-Dest", "document")
+	req.Header.Set("Sec-Fetch-Mode", "navigate")
+	req.Header.Set("Sec-Fetch-Site", "none")
+	req.Header.Set("Sec-Fetch-User", "?1")
+	req.Header.Set("DNT", "1")
+	req.Header.Set("Cache-Control", "max-age=0")
 	if opts.Referer != "" {
 		req.Header.Set("Referer", opts.Referer)
+		req.Header.Set("Sec-Fetch-Site", "same-origin")
 	}

 	return req, nil
@@ -209,14 +288,25 @@ func compactErrorText(s string) string {
 }

 func fetchPageDirectOnce(ctx context.Context, url string, opts fetchOptions) ([]byte, error) {
+	parsed, err := neturl.Parse(url)
+	if err != nil {
+		return nil, fmt.Errorf("invalid URL: %w", err)
+	}
+
+	// Serialize requests per domain to avoid triggering rate limits.
+	sem := getDomainReqSem(parsed.Host)
+	if err := sem.Acquire(ctx); err != nil {
+		return nil, err
+	}
+	defer sem.Release()
+
 	req, err := newBrowserRequest(url, opts)
 	if err != nil {
 		return nil, err
 	}
 	req = req.WithContext(ctx)

-	client := &http.Client{Timeout: 15 * time.Second}
-	resp, err := client.Do(req)
+	resp, err := sharedHTTPClient.Do(req)
 	if err != nil {
 		return nil, fmt.Errorf("direct request failed: %w", err)
 	}
@@ -463,6 +553,129 @@ func findScraplingPython() string {
 	)
 }

+func findCloakBrowserPython() string {
+	cwd, _ := os.Getwd()
+
+	exePath, _ := os.Executable()
+	exeDir := ""
+	if exePath != "" {
+		exeDir = filepath.Dir(exePath)
+	}
+
+	return firstExecutable(
+		os.Getenv("CLOAKBROWSER_PYTHON_BIN"),
+		filepath.Join(cwd, ".venv-scrapling", "bin", "python3"),
+		filepath.Join(cwd, ".venv-scrapling", "bin", "python"),
+		filepath.Join(cwd, ".venv", "bin", "python3"),
+		filepath.Join(cwd, ".venv", "bin", "python"),
+		filepath.Join(exeDir, ".venv-scrapling", "bin", "python3"),
+		filepath.Join(exeDir, ".venv-scrapling", "bin", "python"),
+		filepath.Join(exeDir, ".venv", "bin", "python3"),
+		filepath.Join(exeDir, ".venv", "bin", "python"),
+		"python3",
+		"python",
+	)
+}
+
+func findCloakBrowserScript() (string, error) {
+	cwd, _ := os.Getwd()
+	candidates := []string{
+		os.Getenv("CLOAKBROWSER_SCRIPT"),
+		filepath.Join(cwd, "scripts", "cloakbrowser_fetch.py"),
+		filepath.Join(cwd, "cloakbrowser_fetch.py"),
+		"/opt/scrapling/scripts/cloakbrowser_fetch.py",
+	}
+	exePath, _ := os.Executable()
+	if exePath != "" {
+		exeDir := filepath.Dir(exePath)
+		candidates = append(candidates,
+			filepath.Join(exeDir, "scripts", "cloakbrowser_fetch.py"),
+			filepath.Join(exeDir, "cloakbrowser_fetch.py"),
+		)
+	}
+	for _, p := range candidates {
+		if p != "" {
+			if _, err := os.Stat(p); err == nil {
+				return p, nil
+			}
+		}
+	}
+	return "", fmt.Errorf("cloakbrowser_fetch.py not found")
+}
+
+// fetchPageWithCloakBrowser uses the CloakBrowser patched Chromium to fetch
+// pages that are blocked by Cloudflare. It is ~3x faster than Scrapling for
+// fotbal.cz because it passes bot detection without triggering challenge loops.
+func fetchPageWithCloakBrowser(ctx context.Context, url string, opts fetchOptions) ([]byte, error) {
+	parsedURL, err := neturl.Parse(url)
+	if err != nil {
+		return nil, fmt.Errorf("CloakBrowser skipped: invalid URL: %w", err)
+	}
+	domain := parsedURL.Host
+
+	if getDomainBreaker(domain).IsOpen() {
+		return nil, fmt.Errorf("CloakBrowser skipped: circuit breaker is open for %s", domain)
+	}
+
+	pythonBin := findCloakBrowserPython()
+	if pythonBin == "" {
+		return nil, fmt.Errorf("CloakBrowser skipped: no Python runtime found")
+	}
+
+	helperScript, err := findCloakBrowserScript()
+	if err != nil {
+		return nil, fmt.Errorf("CloakBrowser skipped: %w", err)
+	}
+
+	// Acquire global Scrapling semaphore to limit concurrent Chromium launches
+	if err := scraplingSem.Acquire(ctx); err != nil {
+		return nil, fmt.Errorf("CloakBrowser skipped: %w", err)
+	}
+	defer scraplingSem.Release()
+
+	ctx, cancel := context.WithTimeout(ctx, 45*time.Second)
+	defer cancel()
+
+	args := []string{helperScript, url}
+	if opts.Referer != "" {
+		args = append(args, opts.Referer)
+	}
+
+	cmd := exec.CommandContext(ctx, pythonBin, args...)
+	var stdout bytes.Buffer
+	var stderr bytes.Buffer
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+	cmd.SysProcAttr = &syscall.SysProcAttr{Setsid: true}
+
+	if err := cmd.Run(); err != nil {
+		if cmd.Process != nil {
+			syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL)
+		}
+		details := compactErrorText(stderr.String())
+		if details == "" {
+			details = compactErrorText(err.Error())
+		}
+		if ctx.Err() == nil {
+			getDomainBreaker(domain).RecordFailure()
+		}
+		return nil, fmt.Errorf("CloakBrowser request failed: %s", details)
+	}
+
+	body := stdout.Bytes()
+	if len(body) == 0 {
+		getDomainBreaker(domain).RecordFailure()
+		return nil, fmt.Errorf("CloakBrowser returned an empty body")
+	}
+	if looksLikeCloudflareBlock(body) {
+		getDomainBreaker(domain).RecordFailure()
+		return nil, fmt.Errorf("CloakBrowser returned a Cloudflare challenge page")
+	}
+
+	getDomainBreaker(domain).RecordSuccess()
+	return body, nil
+}
+
 func fetchPageWithScrapling(ctx context.Context, url string, opts fetchOptions) ([]byte, error) {
 	parsedURL, err := neturl.Parse(url)
 	if err != nil {
@@ -490,10 +703,10 @@ func fetchPageWithScrapling(ctx context.Context, url string, opts fetchOptions)
 	}
 	defer scraplingSem.Release()

-	ctx, cancel := context.WithTimeout(ctx, 90*time.Second)
+	ctx, cancel := context.WithTimeout(ctx, 120*time.Second)
 	defer cancel()

-	args := []string{helperScript, "--url", url, "--timeout-ms", "60000", "--wait-ms", "500"}
+	args := []string{helperScript, "--url", url, "--timeout-ms", "90000", "--wait-ms", "500"}
 	if opts.Referer != "" {
 		args = append(args, "--referer", opts.Referer)
 	}
@@ -540,9 +753,9 @@ func fetchPageWithFallback(ctx context.Context, url string) ([]byte, error) {
 	return fetchPageWithFallbackOptions(ctx, url, fetchOptions{})
 }

-// fetchPageWithFallback tries Go HTTP first, then curl/wget, then Scrapling.
-// When direct HTTP returns a Cloudflare block, curl/wget are skipped since they
-// will just return the same challenge page and waste ~20 seconds.
+// fetchPageWithFallback tries Go HTTP first, then curl/wget, then CloakBrowser,
+// then Scrapling. When direct HTTP returns a Cloudflare block, curl/wget are
+// skipped since they will just return the same challenge page and waste ~20s.
 func fetchPageWithFallbackOptions(ctx context.Context, url string, opts fetchOptions) ([]byte, error) {
 	if err := ctx.Err(); err != nil {
 		return nil, err
@@ -559,6 +772,7 @@ func fetchPageWithFallbackOptions(ctx context.Context, url string, opts fetchOpt
 	}
 	pageCacheMu.RUnlock()

+	// Try direct HTTP first
 	body, err := fetchPageDirect(ctx, url, opts)
 	if err == nil {
 		cachePage(url, body)
@@ -567,10 +781,10 @@ func fetchPageWithFallbackOptions(ctx context.Context, url string, opts fetchOpt
 	log.Printf("Direct request failed for %s: %v", url, err)

 	// If direct HTTP returned a Cloudflare block, skip curl/wget time-wasters
-	// and go straight to Scrapling which can solve the challenge.
+	// and go straight to CloakBrowser which can solve the challenge silently.
 	if strings.Contains(err.Error(), "403") || strings.Contains(err.Error(), "Cloudflare") {
 		log.Printf("Skipping curl/wget fallbacks for %s: direct HTTP hit Cloudflare wall", url)
-		goto scraplingFallback
+		goto cloakBrowserFallback
 	}

 	body, err = fetchPageWithCurl(ctx, url, opts)
@@ -589,7 +803,15 @@ func fetchPageWithFallbackOptions(ctx context.Context, url string, opts fetchOpt
 	}
 	log.Printf("wget fallback failed for %s: %v", url, err)

-scraplingFallback:
+cloakBrowserFallback:
+	body, err = fetchPageWithCloakBrowser(ctx, url, opts)
+	if err == nil {
+		log.Printf("Successfully retrieved content via CloakBrowser for %s", url)
+		cachePage(url, body)
+		return body, nil
+	}
+	log.Printf("CloakBrowser fallback failed for %s: %v", url, err)
+
 	body, err = fetchPageWithScrapling(ctx, url, opts)
 	if err == nil {
 		log.Printf("Successfully retrieved content via Scrapling for %s", url)
@@ -601,6 +823,12 @@ scraplingFallback:
 	return nil, fmt.Errorf("all fetch methods failed for %s: %w", url, err)
 }

+// detachedContext returns a context.Background() with a generous timeout so
+// goroutines aren't all killed when r.Context() is cancelled.
+func detachedContext(timeout time.Duration) (context.Context, context.CancelFunc) {
+	return context.WithTimeout(context.Background(), timeout)
+}
+
 func cachePage(url string, body []byte) {
 	pageCacheMu.Lock()
 	pageCache[url] = &cacheEntry{body: body, timestamp: time.Now()}
@@ -783,12 +1011,25 @@ func parseCompetitionMatchesFromFotbal(ctx context.Context, pageURL, clubType, c

 // parseCompetitionMatchesFromIS scrapes matches from the IS portal as fallback.
 func parseCompetitionMatchesFromIS(ctx context.Context, detailURL, clubType, clubName, clubID string) []Match {
-	req, err := http.NewRequestWithContext(ctx, "GET", detailURL, nil)
+	parsed, err := neturl.Parse(detailURL)
+	if err != nil {
+		log.Printf("IS matches invalid URL %s: %v", detailURL, err)
+		return nil
+	}
+	sem := getDomainReqSem(parsed.Host)
+	if err := sem.Acquire(ctx); err != nil {
+		log.Printf("IS matches domain semaphore error for %s: %v", detailURL, err)
+		return nil
+	}
+	defer sem.Release()
+
+	req, err := newBrowserRequest(detailURL, fetchOptions{})
 	if err != nil {
 		log.Printf("IS matches request error for %s: %v", detailURL, err)
 		return nil
 	}
-	resp, err := http.DefaultClient.Do(req)
+	req = req.WithContext(ctx)
+	resp, err := sharedHTTPClient.Do(req)
 	if err != nil {
 		log.Printf("IS matches fetch error for %s: %v", detailURL, err)
 		return nil
@@ -1097,7 +1338,7 @@ func getLogoBySearch(name string) string {
 	if v, ok := logoCache[key]; ok {
 		return v
 	}
-	client := &http.Client{Timeout: 5 * time.Second}
+	client := &http.Client{Timeout: 60 * time.Second}
 	// Prefer simplified last-word token (e.g., "krnov") to improve hit rate for logos
 	query := simplifyClubQuery(name)
 	if query == "" {
@@ -1334,6 +1575,20 @@ func getClubTables(w http.ResponseWriter, r *http.Request) {
 		return
 	}

+	// Check club response cache
+	cacheKey := "table:" + clubType + ":" + clubID
+	clubCacheMu.RLock()
+	if entry, ok := clubCache[cacheKey]; ok {
+		if time.Since(entry.timestamp) < clubCacheTTL {
+			clubCacheMu.RUnlock()
+			w.Header().Set("Content-Type", "application/json")
+			w.Write(entry.data)
+			log.Printf("Club cache hit for %s", cacheKey)
+			return
+		}
+	}
+	clubCacheMu.RUnlock()
+
 	// Validate club type
 	var baseURL string
 	var sportParam string
@@ -1414,9 +1669,12 @@ func getClubTables(w http.ResponseWriter, r *http.Request) {
 			}
 			defer sem.Release()

+			ctx, cancel := detachedContext(30 * time.Second)
+			defer cancel()
+
 			comp := &competitions[idx]
 			tableURL := fmt.Sprintf("https://is.fotbal.cz/public/souteze/tabulky-souteze.aspx?req=%s&sport=%s", comp.ID, sportParam)
-			req, err := http.NewRequestWithContext(r.Context(), "GET", tableURL, nil)
+			req, err := http.NewRequestWithContext(ctx, "GET", tableURL, nil)
 			if err != nil {
 				log.Printf("error creating request for competition table %s: %v", comp.ID, err)
 				return
@@ -1518,8 +1776,18 @@ func getClubTables(w http.ResponseWriter, r *http.Request) {
 		Competitions:   competitions,
 	}

+	var buf bytes.Buffer
+	if err := json.NewEncoder(&buf).Encode(clubInfo); err != nil {
+		http.Error(w, fmt.Sprintf("JSON encode error: %v", err), http.StatusInternalServerError)
+		return
+	}
+	data := buf.Bytes()
+	clubCacheMu.Lock()
+	clubCache[cacheKey] = &clubCacheEntry{data: data, timestamp: time.Now()}
+	clubCacheMu.Unlock()
+
 	w.Header().Set("Content-Type", "application/json")
-	json.NewEncoder(w).Encode(clubInfo)
+	w.Write(data)
 }

 // getClubInfo returns club info with competitions and matches
@@ -1531,6 +1799,21 @@ func getClubInfo(w http.ResponseWriter, r *http.Request) {
 		http.Error(w, "Club ID is required", http.StatusBadRequest)
 		return
 	}
+
+	// Check club response cache
+	cacheKey := "info:" + clubType + ":" + clubID
+	clubCacheMu.RLock()
+	if entry, ok := clubCache[cacheKey]; ok {
+		if time.Since(entry.timestamp) < clubCacheTTL {
+			clubCacheMu.RUnlock()
+			w.Header().Set("Content-Type", "application/json")
+			w.Write(entry.data)
+			log.Printf("Club cache hit for %s", cacheKey)
+			return
+		}
+	}
+	clubCacheMu.RUnlock()
+
 	var baseURL, sportParam string
 	switch clubType {
 	case "football":
@@ -1613,13 +1896,16 @@ func getClubInfo(w http.ResponseWriter, r *http.Request) {
 			}
 			defer sem.Release()

+			ctx, cancel := detachedContext(120 * time.Second)
+			defer cancel()
+
 			comp := &competitions[idx]
 			matchesLink := comp.MatchesLink
 			// 1) Try parsing from the public fotbal.cz competition page (matches_link)
-			matches := parseCompetitionMatchesFromFotbal(r.Context(), matchesLink, clubType, clubName, clubID)
+			matches := parseCompetitionMatchesFromFotbal(ctx, matchesLink, clubType, clubName, clubID)
 			// Always try IS as well and prefer it if it provides at least as many matches
 			detailURL := fmt.Sprintf("https://is.fotbal.cz/public/souteze/detail-souteze.aspx?req=%s&sport=%s", comp.ID, sportParam)
-			isMatches := parseCompetitionMatchesFromIS(r.Context(), detailURL, clubType, clubName, clubID)
+			isMatches := parseCompetitionMatchesFromIS(ctx, detailURL, clubType, clubName, clubID)
 			// Prefer IS whenever it yields any results, as IS often contains alias team names
 			if len(isMatches) > 0 {
 				matches = isMatches
@@ -1643,8 +1929,18 @@ func getClubInfo(w http.ResponseWriter, r *http.Request) {
 		Competitions:   competitions,
 	}

+	var buf bytes.Buffer
+	if err := json.NewEncoder(&buf).Encode(clubInfo); err != nil {
+		http.Error(w, fmt.Sprintf("JSON encode error: %v", err), http.StatusInternalServerError)
+		return
+	}
+	data := buf.Bytes()
+	clubCacheMu.Lock()
+	clubCache[cacheKey] = &clubCacheEntry{data: data, timestamp: time.Now()}
+	clubCacheMu.Unlock()
+
 	w.Header().Set("Content-Type", "application/json")
-	json.NewEncoder(w).Encode(clubInfo)
+	w.Write(data)
 }

 func main() {
@@ -1663,7 +1959,7 @@ func main() {
 		Addr:           addr,
 		Handler:        r,
 		ReadTimeout:    30 * time.Second,
-		WriteTimeout:   5 * time.Minute,
+		WriteTimeout:   10 * time.Minute,
 		IdleTimeout:    120 * time.Second,
 		MaxHeaderBytes: 1 << 20,
 	}
@@ -1 +1,2 @@
 scrapling[fetchers]==0.4.1
+cloakbrowser==0.3.28
@@ -0,0 +1,240 @@
+#!/usr/bin/env python3
+"""
+Benchmark script comparing fetch methods:
+1. Direct urllib (lightweight HTTP)
+2. Scrapling (StealthyFetcher -> Chromium via patchright)
+3. CloakBrowser (patched Chromium with stealth)
+
+Usage:
+    .venv-scrapling/bin/python scripts/benchmark_fetch.py [--url URL] [--iterations N]
+"""
+
+import argparse
+import gc
+import os
+import resource
+import sys
+import time
+import urllib.request
+import ssl
+from pathlib import Path
+
+# Add venv site-packages to path if needed
+venv = Path(__file__).parent.parent / ".venv-scrapling"
+if venv.exists():
+    import site
+    site.addsitedir(str(venv / "lib" / "python3.13" / "site-packages"))
+
+BROWSER_UA = (
+    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+    "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0 Safari/537.36"
+)
+
+CF_SIGNS = [
+    b"<title>just a moment...</title>",
+    b"attention required!",
+    b"enable javascript and cookies to continue",
+    b"checking if the site connection is secure",
+    b"cf-browser-verification",
+    b"/cdn-cgi/challenge-platform/",
+]
+
+
+def looks_like_cloudflare_block(body: bytes) -> bool:
+    if not body:
+        return False
+    low = body.lower()
+    # Must contain an actual challenge title, not just CDN references
+    hard_signals = [
+        b"<title>just a moment...</title>",
+        b"attention required!",
+        b"enable javascript and cookies to continue",
+        b"checking if the site connection is secure",
+    ]
+    for sig in hard_signals:
+        if sig in low:
+            return True
+    # Secondary: challenge platform JS + challenge token
+    if b"/cdn-cgi/challenge-platform/" in low and (
+        b"window._cf_chl_opt" in low or b"__cf_chl_rt_tk" in low
+    ):
+        return True
+    return False
+
+
+def get_memory_mb() -> float:
+    """Return current process RSS memory in MB."""
+    usage = resource.getrusage(resource.RUSAGE_SELF)
+    return usage.ru_maxrss / 1024.0  # KB -> MB on Linux
+
+
+def direct_fetch(url: str, referer: str = "", timeout: float = 15.0) -> bytes:
+    req = urllib.request.Request(
+        url,
+        headers={
+            "User-Agent": BROWSER_UA,
+            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
+            "Accept-Language": "cs-CZ,cs;q=0.9,en;q=0.8",
+            "Accept-Encoding": "identity",
+            "Connection": "keep-alive",
+            **({"Referer": referer} if referer else {}),
+        },
+    )
+    ctx = ssl.create_default_context()
+    ctx.check_hostname = False
+    ctx.verify_mode = ssl.CERT_NONE
+    with urllib.request.urlopen(req, timeout=timeout, context=ctx) as resp:
+        body = resp.read()
+    if looks_like_cloudflare_block(body):
+        raise RuntimeError("Cloudflare block detected")
+    return body
+
+
+def scrapling_fetch(url: str, referer: str = "", timeout_ms: int = 90000, wait_ms: int = 500) -> bytes:
+    from scrapling.fetchers import StealthyFetcher
+
+    extra_headers = {}
+    if referer:
+        extra_headers["Referer"] = referer
+
+    fetch_kwargs = {
+        "headless": True,
+        "network_idle": False,
+        "google_search": False,
+        "solve_cloudflare": True,
+        "timeout": timeout_ms,
+        "wait": wait_ms,
+    }
+    if extra_headers:
+        fetch_kwargs["extra_headers"] = extra_headers
+
+    response = StealthyFetcher.fetch(url, **fetch_kwargs)
+
+    status = getattr(response, "status", None)
+    if isinstance(status, int) and status >= 400:
+        raise RuntimeError(f"HTTP {status}")
+
+    body = getattr(response, "body", None)
+    if isinstance(body, (bytes, bytearray)):
+        return bytes(body)
+    if isinstance(body, str):
+        return body.encode("utf-8")
+    text = getattr(response, "text", None)
+    if isinstance(text, str):
+        return text.encode("utf-8")
+    return str(response).encode("utf-8")
+
+
+def cloakbrowser_fetch(url: str, referer: str = "", timeout_ms: int = 90000) -> bytes:
+    from cloakbrowser import launch_context
+
+    ctx = launch_context(headless=True)
+    page = ctx.new_page()
+
+    try:
+        extra_headers = {}
+        if referer:
+            extra_headers["Referer"] = referer
+
+        if extra_headers:
+            page.set_extra_http_headers(extra_headers)
+
+        page.goto(url, timeout=timeout_ms, wait_until="networkidle")
+        html = page.content()
+        body = html.encode("utf-8")
+
+        if looks_like_cloudflare_block(body):
+            raise RuntimeError("Cloudflare block detected")
+        return body
+    finally:
+        ctx.close()
+
+
+def benchmark_method(name: str, fn, url: str, referer: str, iterations: int = 1):
+    """Run a fetch method and return timing + metadata."""
+    gc.collect()
+    results = []
+
+    for i in range(iterations):
+        print(f"  [{name}] iteration {i + 1}/{iterations}...", flush=True)
+        mem_before = get_memory_mb()
+        start = time.monotonic()
+        error = None
+        body = b""
+        try:
+            body = fn(url, referer)
+            if not body:
+                error = "empty body"
+        except Exception as exc:
+            error = str(exc)
+        elapsed = time.monotonic() - start
+        mem_after = get_memory_mb()
+
+        results.append({
+            "iteration": i + 1,
+            "elapsed_sec": elapsed,
+            "success": error is None,
+            "error": error,
+            "body_size": len(body),
+            "mem_before_mb": mem_before,
+            "mem_after_mb": mem_after,
+            "mem_delta_mb": mem_after - mem_before,
+        })
+
+        if error:
+            print(f"    FAILED: {error}", flush=True)
+        else:
+            print(f"    OK in {elapsed:.2f}s, {len(body)} bytes, mem +{mem_after - mem_before:.1f}MB", flush=True)
+
+    return results
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--url", default="https://www.fotbal.cz/souteze/club/club/7eacd9f0-bfa0-4928-a9b6-936140168f58")
+    parser.add_argument("--search-url", default="https://www.fotbal.cz/club/hledej?q=fotbalovy+klub+krnov")
+    parser.add_argument("--iterations", type=int, default=1)
+    parser.add_argument("--methods", default="all", help="Comma-separated: direct,scrapling,cloakbrowser,all")
+    args = parser.parse_args()
+
+    methods = [m.strip().lower() for m in args.methods.split(",")]
+    test_all = "all" in methods
+
+    print("=" * 70)
+    print("FACR Scraper Fetch Benchmark")
+    print("=" * 70)
+    print(f"Python: {sys.version}")
+    print(f"Iterations per method: {args.iterations}")
+    print()
+
+    urls = [
+        ("Club page", args.url),
+        ("Search page", args.search_url),
+    ]
+
+    for label, url in urls:
+        print(f"\n{'=' * 70}")
+        print(f"Testing: {label}")
+        print(f"URL: {url}")
+        print("=" * 70)
+
+        if test_all or "direct" in methods:
+            print("\n--- Direct HTTP (urllib) ---")
+            benchmark_method("direct", direct_fetch, url, "", args.iterations)
+
+        if test_all or "cloakbrowser" in methods:
+            print("\n--- CloakBrowser ---")
+            benchmark_method("cloakbrowser", cloakbrowser_fetch, url, "", args.iterations)
+
+        if test_all or "scrapling" in methods:
+            print("\n--- Scrapling ---")
+            benchmark_method("scrapling", scrapling_fetch, url, "", args.iterations)
+
+    print("\n" + "=" * 70)
+    print("Benchmark complete.")
+    print("=" * 70)
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
@@ -0,0 +1,24 @@
+import sys, os, time
+from cloakbrowser import launch_context
+
+url = sys.argv[1]
+tz = os.environ.get('CLOAKBROWSER_TIMEZONE', 'Europe/Prague')
+lc = os.environ.get('CLOAKBROWSER_LOCALE', 'cs-CZ')
+
+ctx = launch_context(
+    headless=True,
+    timezone=tz,
+    locale=lc,
+    args=['--no-sandbox', '--disable-dev-shm-usage']
+)
+page = ctx.new_page()
+
+# Note: we intentionally do NOT set a custom Referer here.
+# A self-referring Referer (e.g. /club/hledej -> /club/hledej) triggers
+# Cloudflare's bot detection even with CloakBrowser's stealth patches.
+
+try:
+    page.goto(url, timeout=30000, wait_until='networkidle')
+    print(page.content(), end='')
+finally:
+    ctx.close()
@@ -82,9 +82,11 @@ def scrapling_fetch(url: str, referer: str = "", timeout_ms: int = 30000, wait_m
    if referer:
        extra_headers["Referer"] = referer

+    # Increase challenge-solving timeout; network_idle can interfere with
+    # ongoing Cloudflare polling so we disable it.
    fetch_kwargs = {
        "headless": True,
-        "network_idle": True,
+        "network_idle": False,
        "google_search": False,
        "solve_cloudflare": True,
        "timeout": timeout_ms,