mirror of
https://github.com/Dvorinka/facr-scraper.git
synced 2026-06-03 20:12:57 +00:00
feat(scraper): implement CloakBrowser support and enhance request stealth
Integrate CloakBrowser to improve success rates against Cloudflare challenges and implement more robust request handling in the Go backend. - Add CloakBrowser integration to Dockerfile and requirements - Implement domain-specific request semaphores in Go to prevent rate-limiting - Add shared HTTP client with cookie jar and header preservation for better session management - Enhance request headers in Go to include modern client hints (Sec-Ch-Ua) - Add benchmarking scripts to compare fetch methods (urllib vs Scrapling vs CloakBrowser) - Update docker-compose to support CloakBrowser environment variables - Optimize Docker image by pre-downloading patched Chromium binaries
This commit is contained in:
@@ -9,6 +9,7 @@ import (
|
||||
"io"
|
||||
"log"
|
||||
"net/http"
|
||||
"net/http/cookiejar"
|
||||
neturl "net/url"
|
||||
"os"
|
||||
"os/exec"
|
||||
@@ -22,6 +23,7 @@ import (
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"github.com/gorilla/mux"
|
||||
"golang.org/x/net/publicsuffix"
|
||||
)
|
||||
|
||||
type Competition struct {
|
||||
@@ -56,6 +58,10 @@ var (
|
||||
// Simple in-memory cache for fetched pages
|
||||
pageCache = make(map[string]*cacheEntry)
|
||||
pageCacheMu sync.RWMutex
|
||||
|
||||
// Club response cache for expensive multi-fetch endpoints
|
||||
clubCache = make(map[string]*clubCacheEntry)
|
||||
clubCacheMu sync.RWMutex
|
||||
)
|
||||
|
||||
type cacheEntry struct {
|
||||
@@ -63,7 +69,13 @@ type cacheEntry struct {
|
||||
timestamp time.Time
|
||||
}
|
||||
|
||||
type clubCacheEntry struct {
|
||||
data []byte
|
||||
timestamp time.Time
|
||||
}
|
||||
|
||||
const cacheTTL = 15 * time.Minute
|
||||
const clubCacheTTL = 30 * time.Minute
|
||||
|
||||
// domainBreakers is a per-domain circuit breaker map so failures on one site
|
||||
// don't block Scrapling for unrelated sites.
|
||||
@@ -76,6 +88,60 @@ var domainBreakers struct {
|
||||
// and resource exhaustion.
|
||||
var scraplingSem = newSemaphore(2)
|
||||
|
||||
// domainReqSem limits concurrent requests to the same domain to avoid
|
||||
// triggering Cloudflare rate-limiting.
|
||||
var domainReqSem struct {
|
||||
mu sync.RWMutex
|
||||
sems map[string]*semaphore
|
||||
}
|
||||
|
||||
func getDomainReqSem(domain string) *semaphore {
|
||||
domainReqSem.mu.RLock()
|
||||
if s, ok := domainReqSem.sems[domain]; ok {
|
||||
domainReqSem.mu.RUnlock()
|
||||
return s
|
||||
}
|
||||
domainReqSem.mu.RUnlock()
|
||||
|
||||
domainReqSem.mu.Lock()
|
||||
defer domainReqSem.mu.Unlock()
|
||||
if s, ok := domainReqSem.sems[domain]; ok {
|
||||
return s
|
||||
}
|
||||
s := newSemaphore(1)
|
||||
if domainReqSem.sems == nil {
|
||||
domainReqSem.sems = make(map[string]*semaphore)
|
||||
}
|
||||
domainReqSem.sems[domain] = s
|
||||
return s
|
||||
}
|
||||
|
||||
// sharedHTTPClient is a reusable client with a cookie jar so that cookies
|
||||
// (including any Cloudflare clearance) survive across requests.
|
||||
var sharedHTTPClient = func() *http.Client {
|
||||
jar, err := cookiejar.New(&cookiejar.Options{PublicSuffixList: publicsuffix.List})
|
||||
if err != nil {
|
||||
log.Printf("failed to create cookie jar, falling back to default client: %v", err)
|
||||
return &http.Client{Timeout: 15 * time.Second}
|
||||
}
|
||||
return &http.Client{
|
||||
Timeout: 15 * time.Second,
|
||||
Jar: jar,
|
||||
CheckRedirect: func(req *http.Request, via []*http.Request) error {
|
||||
if len(via) >= 10 {
|
||||
return fmt.Errorf("stopped after 10 redirects")
|
||||
}
|
||||
// Preserve headers across redirects
|
||||
for _, h := range []string{"User-Agent", "Accept", "Accept-Language", "Referer", "Sec-Ch-Ua", "Sec-Ch-Ua-Mobile", "Sec-Ch-Ua-Platform", "Sec-Fetch-Dest", "Sec-Fetch-Mode", "Sec-Fetch-Site", "Upgrade-Insecure-Requests"} {
|
||||
if v := via[len(via)-1].Header.Get(h); v != "" {
|
||||
req.Header.Set(h, v)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
},
|
||||
}
|
||||
}()
|
||||
|
||||
type circuitBreaker struct {
|
||||
failures int32
|
||||
lastFail time.Time
|
||||
@@ -163,8 +229,21 @@ func newBrowserRequest(url string, opts fetchOptions) (*http.Request, error) {
|
||||
req.Header.Set("User-Agent", browserUserAgent)
|
||||
req.Header.Set("Accept", browserAccept)
|
||||
req.Header.Set("Accept-Language", browserAcceptLanguage)
|
||||
req.Header.Set("Accept-Encoding", "gzip, deflate, br")
|
||||
req.Header.Set("Connection", "keep-alive")
|
||||
req.Header.Set("Upgrade-Insecure-Requests", "1")
|
||||
req.Header.Set("Sec-Ch-Ua", `"Not/A)Brand";v="8", "Chromium";v="126", "Google Chrome";v="126"`)
|
||||
req.Header.Set("Sec-Ch-Ua-Mobile", "?0")
|
||||
req.Header.Set("Sec-Ch-Ua-Platform", `"Windows"`)
|
||||
req.Header.Set("Sec-Fetch-Dest", "document")
|
||||
req.Header.Set("Sec-Fetch-Mode", "navigate")
|
||||
req.Header.Set("Sec-Fetch-Site", "none")
|
||||
req.Header.Set("Sec-Fetch-User", "?1")
|
||||
req.Header.Set("DNT", "1")
|
||||
req.Header.Set("Cache-Control", "max-age=0")
|
||||
if opts.Referer != "" {
|
||||
req.Header.Set("Referer", opts.Referer)
|
||||
req.Header.Set("Sec-Fetch-Site", "same-origin")
|
||||
}
|
||||
|
||||
return req, nil
|
||||
@@ -209,14 +288,25 @@ func compactErrorText(s string) string {
|
||||
}
|
||||
|
||||
func fetchPageDirectOnce(ctx context.Context, url string, opts fetchOptions) ([]byte, error) {
|
||||
parsed, err := neturl.Parse(url)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid URL: %w", err)
|
||||
}
|
||||
|
||||
// Serialize requests per domain to avoid triggering rate limits.
|
||||
sem := getDomainReqSem(parsed.Host)
|
||||
if err := sem.Acquire(ctx); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer sem.Release()
|
||||
|
||||
req, err := newBrowserRequest(url, opts)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
req = req.WithContext(ctx)
|
||||
|
||||
client := &http.Client{Timeout: 15 * time.Second}
|
||||
resp, err := client.Do(req)
|
||||
resp, err := sharedHTTPClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("direct request failed: %w", err)
|
||||
}
|
||||
@@ -463,6 +553,129 @@ func findScraplingPython() string {
|
||||
)
|
||||
}
|
||||
|
||||
func findCloakBrowserPython() string {
|
||||
cwd, _ := os.Getwd()
|
||||
|
||||
exePath, _ := os.Executable()
|
||||
exeDir := ""
|
||||
if exePath != "" {
|
||||
exeDir = filepath.Dir(exePath)
|
||||
}
|
||||
|
||||
return firstExecutable(
|
||||
os.Getenv("CLOAKBROWSER_PYTHON_BIN"),
|
||||
filepath.Join(cwd, ".venv-scrapling", "bin", "python3"),
|
||||
filepath.Join(cwd, ".venv-scrapling", "bin", "python"),
|
||||
filepath.Join(cwd, ".venv", "bin", "python3"),
|
||||
filepath.Join(cwd, ".venv", "bin", "python"),
|
||||
filepath.Join(exeDir, ".venv-scrapling", "bin", "python3"),
|
||||
filepath.Join(exeDir, ".venv-scrapling", "bin", "python"),
|
||||
filepath.Join(exeDir, ".venv", "bin", "python3"),
|
||||
filepath.Join(exeDir, ".venv", "bin", "python"),
|
||||
"python3",
|
||||
"python",
|
||||
)
|
||||
}
|
||||
|
||||
func findCloakBrowserScript() (string, error) {
|
||||
cwd, _ := os.Getwd()
|
||||
candidates := []string{
|
||||
os.Getenv("CLOAKBROWSER_SCRIPT"),
|
||||
filepath.Join(cwd, "scripts", "cloakbrowser_fetch.py"),
|
||||
filepath.Join(cwd, "cloakbrowser_fetch.py"),
|
||||
"/opt/scrapling/scripts/cloakbrowser_fetch.py",
|
||||
}
|
||||
exePath, _ := os.Executable()
|
||||
if exePath != "" {
|
||||
exeDir := filepath.Dir(exePath)
|
||||
candidates = append(candidates,
|
||||
filepath.Join(exeDir, "scripts", "cloakbrowser_fetch.py"),
|
||||
filepath.Join(exeDir, "cloakbrowser_fetch.py"),
|
||||
)
|
||||
}
|
||||
for _, p := range candidates {
|
||||
if p != "" {
|
||||
if _, err := os.Stat(p); err == nil {
|
||||
return p, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
return "", fmt.Errorf("cloakbrowser_fetch.py not found")
|
||||
}
|
||||
|
||||
// fetchPageWithCloakBrowser uses the CloakBrowser patched Chromium to fetch
|
||||
// pages that are blocked by Cloudflare. It is ~3x faster than Scrapling for
|
||||
// fotbal.cz because it passes bot detection without triggering challenge loops.
|
||||
func fetchPageWithCloakBrowser(ctx context.Context, url string, opts fetchOptions) ([]byte, error) {
|
||||
parsedURL, err := neturl.Parse(url)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("CloakBrowser skipped: invalid URL: %w", err)
|
||||
}
|
||||
domain := parsedURL.Host
|
||||
|
||||
if getDomainBreaker(domain).IsOpen() {
|
||||
return nil, fmt.Errorf("CloakBrowser skipped: circuit breaker is open for %s", domain)
|
||||
}
|
||||
|
||||
pythonBin := findCloakBrowserPython()
|
||||
if pythonBin == "" {
|
||||
return nil, fmt.Errorf("CloakBrowser skipped: no Python runtime found")
|
||||
}
|
||||
|
||||
helperScript, err := findCloakBrowserScript()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("CloakBrowser skipped: %w", err)
|
||||
}
|
||||
|
||||
// Acquire global Scrapling semaphore to limit concurrent Chromium launches
|
||||
if err := scraplingSem.Acquire(ctx); err != nil {
|
||||
return nil, fmt.Errorf("CloakBrowser skipped: %w", err)
|
||||
}
|
||||
defer scraplingSem.Release()
|
||||
|
||||
ctx, cancel := context.WithTimeout(ctx, 45*time.Second)
|
||||
defer cancel()
|
||||
|
||||
args := []string{helperScript, url}
|
||||
if opts.Referer != "" {
|
||||
args = append(args, opts.Referer)
|
||||
}
|
||||
|
||||
cmd := exec.CommandContext(ctx, pythonBin, args...)
|
||||
var stdout bytes.Buffer
|
||||
var stderr bytes.Buffer
|
||||
cmd.Stdout = &stdout
|
||||
cmd.Stderr = &stderr
|
||||
cmd.SysProcAttr = &syscall.SysProcAttr{Setsid: true}
|
||||
|
||||
if err := cmd.Run(); err != nil {
|
||||
if cmd.Process != nil {
|
||||
syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL)
|
||||
}
|
||||
details := compactErrorText(stderr.String())
|
||||
if details == "" {
|
||||
details = compactErrorText(err.Error())
|
||||
}
|
||||
if ctx.Err() == nil {
|
||||
getDomainBreaker(domain).RecordFailure()
|
||||
}
|
||||
return nil, fmt.Errorf("CloakBrowser request failed: %s", details)
|
||||
}
|
||||
|
||||
body := stdout.Bytes()
|
||||
if len(body) == 0 {
|
||||
getDomainBreaker(domain).RecordFailure()
|
||||
return nil, fmt.Errorf("CloakBrowser returned an empty body")
|
||||
}
|
||||
if looksLikeCloudflareBlock(body) {
|
||||
getDomainBreaker(domain).RecordFailure()
|
||||
return nil, fmt.Errorf("CloakBrowser returned a Cloudflare challenge page")
|
||||
}
|
||||
|
||||
getDomainBreaker(domain).RecordSuccess()
|
||||
return body, nil
|
||||
}
|
||||
|
||||
func fetchPageWithScrapling(ctx context.Context, url string, opts fetchOptions) ([]byte, error) {
|
||||
parsedURL, err := neturl.Parse(url)
|
||||
if err != nil {
|
||||
@@ -490,10 +703,10 @@ func fetchPageWithScrapling(ctx context.Context, url string, opts fetchOptions)
|
||||
}
|
||||
defer scraplingSem.Release()
|
||||
|
||||
ctx, cancel := context.WithTimeout(ctx, 90*time.Second)
|
||||
ctx, cancel := context.WithTimeout(ctx, 120*time.Second)
|
||||
defer cancel()
|
||||
|
||||
args := []string{helperScript, "--url", url, "--timeout-ms", "60000", "--wait-ms", "500"}
|
||||
args := []string{helperScript, "--url", url, "--timeout-ms", "90000", "--wait-ms", "500"}
|
||||
if opts.Referer != "" {
|
||||
args = append(args, "--referer", opts.Referer)
|
||||
}
|
||||
@@ -540,9 +753,9 @@ func fetchPageWithFallback(ctx context.Context, url string) ([]byte, error) {
|
||||
return fetchPageWithFallbackOptions(ctx, url, fetchOptions{})
|
||||
}
|
||||
|
||||
// fetchPageWithFallback tries Go HTTP first, then curl/wget, then Scrapling.
|
||||
// When direct HTTP returns a Cloudflare block, curl/wget are skipped since they
|
||||
// will just return the same challenge page and waste ~20 seconds.
|
||||
// fetchPageWithFallback tries Go HTTP first, then curl/wget, then CloakBrowser,
|
||||
// then Scrapling. When direct HTTP returns a Cloudflare block, curl/wget are
|
||||
// skipped since they will just return the same challenge page and waste ~20s.
|
||||
func fetchPageWithFallbackOptions(ctx context.Context, url string, opts fetchOptions) ([]byte, error) {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return nil, err
|
||||
@@ -559,6 +772,7 @@ func fetchPageWithFallbackOptions(ctx context.Context, url string, opts fetchOpt
|
||||
}
|
||||
pageCacheMu.RUnlock()
|
||||
|
||||
// Try direct HTTP first
|
||||
body, err := fetchPageDirect(ctx, url, opts)
|
||||
if err == nil {
|
||||
cachePage(url, body)
|
||||
@@ -567,10 +781,10 @@ func fetchPageWithFallbackOptions(ctx context.Context, url string, opts fetchOpt
|
||||
log.Printf("Direct request failed for %s: %v", url, err)
|
||||
|
||||
// If direct HTTP returned a Cloudflare block, skip curl/wget time-wasters
|
||||
// and go straight to Scrapling which can solve the challenge.
|
||||
// and go straight to CloakBrowser which can solve the challenge silently.
|
||||
if strings.Contains(err.Error(), "403") || strings.Contains(err.Error(), "Cloudflare") {
|
||||
log.Printf("Skipping curl/wget fallbacks for %s: direct HTTP hit Cloudflare wall", url)
|
||||
goto scraplingFallback
|
||||
goto cloakBrowserFallback
|
||||
}
|
||||
|
||||
body, err = fetchPageWithCurl(ctx, url, opts)
|
||||
@@ -589,7 +803,15 @@ func fetchPageWithFallbackOptions(ctx context.Context, url string, opts fetchOpt
|
||||
}
|
||||
log.Printf("wget fallback failed for %s: %v", url, err)
|
||||
|
||||
scraplingFallback:
|
||||
cloakBrowserFallback:
|
||||
body, err = fetchPageWithCloakBrowser(ctx, url, opts)
|
||||
if err == nil {
|
||||
log.Printf("Successfully retrieved content via CloakBrowser for %s", url)
|
||||
cachePage(url, body)
|
||||
return body, nil
|
||||
}
|
||||
log.Printf("CloakBrowser fallback failed for %s: %v", url, err)
|
||||
|
||||
body, err = fetchPageWithScrapling(ctx, url, opts)
|
||||
if err == nil {
|
||||
log.Printf("Successfully retrieved content via Scrapling for %s", url)
|
||||
@@ -601,6 +823,12 @@ scraplingFallback:
|
||||
return nil, fmt.Errorf("all fetch methods failed for %s: %w", url, err)
|
||||
}
|
||||
|
||||
// detachedContext returns a context.Background() with a generous timeout so
|
||||
// goroutines aren't all killed when r.Context() is cancelled.
|
||||
func detachedContext(timeout time.Duration) (context.Context, context.CancelFunc) {
|
||||
return context.WithTimeout(context.Background(), timeout)
|
||||
}
|
||||
|
||||
func cachePage(url string, body []byte) {
|
||||
pageCacheMu.Lock()
|
||||
pageCache[url] = &cacheEntry{body: body, timestamp: time.Now()}
|
||||
@@ -783,12 +1011,25 @@ func parseCompetitionMatchesFromFotbal(ctx context.Context, pageURL, clubType, c
|
||||
|
||||
// parseCompetitionMatchesFromIS scrapes matches from the IS portal as fallback.
|
||||
func parseCompetitionMatchesFromIS(ctx context.Context, detailURL, clubType, clubName, clubID string) []Match {
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", detailURL, nil)
|
||||
parsed, err := neturl.Parse(detailURL)
|
||||
if err != nil {
|
||||
log.Printf("IS matches invalid URL %s: %v", detailURL, err)
|
||||
return nil
|
||||
}
|
||||
sem := getDomainReqSem(parsed.Host)
|
||||
if err := sem.Acquire(ctx); err != nil {
|
||||
log.Printf("IS matches domain semaphore error for %s: %v", detailURL, err)
|
||||
return nil
|
||||
}
|
||||
defer sem.Release()
|
||||
|
||||
req, err := newBrowserRequest(detailURL, fetchOptions{})
|
||||
if err != nil {
|
||||
log.Printf("IS matches request error for %s: %v", detailURL, err)
|
||||
return nil
|
||||
}
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
req = req.WithContext(ctx)
|
||||
resp, err := sharedHTTPClient.Do(req)
|
||||
if err != nil {
|
||||
log.Printf("IS matches fetch error for %s: %v", detailURL, err)
|
||||
return nil
|
||||
@@ -1097,7 +1338,7 @@ func getLogoBySearch(name string) string {
|
||||
if v, ok := logoCache[key]; ok {
|
||||
return v
|
||||
}
|
||||
client := &http.Client{Timeout: 5 * time.Second}
|
||||
client := &http.Client{Timeout: 60 * time.Second}
|
||||
// Prefer simplified last-word token (e.g., "krnov") to improve hit rate for logos
|
||||
query := simplifyClubQuery(name)
|
||||
if query == "" {
|
||||
@@ -1334,6 +1575,20 @@ func getClubTables(w http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
}
|
||||
|
||||
// Check club response cache
|
||||
cacheKey := "table:" + clubType + ":" + clubID
|
||||
clubCacheMu.RLock()
|
||||
if entry, ok := clubCache[cacheKey]; ok {
|
||||
if time.Since(entry.timestamp) < clubCacheTTL {
|
||||
clubCacheMu.RUnlock()
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.Write(entry.data)
|
||||
log.Printf("Club cache hit for %s", cacheKey)
|
||||
return
|
||||
}
|
||||
}
|
||||
clubCacheMu.RUnlock()
|
||||
|
||||
// Validate club type
|
||||
var baseURL string
|
||||
var sportParam string
|
||||
@@ -1414,9 +1669,12 @@ func getClubTables(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
defer sem.Release()
|
||||
|
||||
ctx, cancel := detachedContext(30 * time.Second)
|
||||
defer cancel()
|
||||
|
||||
comp := &competitions[idx]
|
||||
tableURL := fmt.Sprintf("https://is.fotbal.cz/public/souteze/tabulky-souteze.aspx?req=%s&sport=%s", comp.ID, sportParam)
|
||||
req, err := http.NewRequestWithContext(r.Context(), "GET", tableURL, nil)
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", tableURL, nil)
|
||||
if err != nil {
|
||||
log.Printf("error creating request for competition table %s: %v", comp.ID, err)
|
||||
return
|
||||
@@ -1518,8 +1776,18 @@ func getClubTables(w http.ResponseWriter, r *http.Request) {
|
||||
Competitions: competitions,
|
||||
}
|
||||
|
||||
var buf bytes.Buffer
|
||||
if err := json.NewEncoder(&buf).Encode(clubInfo); err != nil {
|
||||
http.Error(w, fmt.Sprintf("JSON encode error: %v", err), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
data := buf.Bytes()
|
||||
clubCacheMu.Lock()
|
||||
clubCache[cacheKey] = &clubCacheEntry{data: data, timestamp: time.Now()}
|
||||
clubCacheMu.Unlock()
|
||||
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
json.NewEncoder(w).Encode(clubInfo)
|
||||
w.Write(data)
|
||||
}
|
||||
|
||||
// getClubInfo returns club info with competitions and matches
|
||||
@@ -1531,6 +1799,21 @@ func getClubInfo(w http.ResponseWriter, r *http.Request) {
|
||||
http.Error(w, "Club ID is required", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
// Check club response cache
|
||||
cacheKey := "info:" + clubType + ":" + clubID
|
||||
clubCacheMu.RLock()
|
||||
if entry, ok := clubCache[cacheKey]; ok {
|
||||
if time.Since(entry.timestamp) < clubCacheTTL {
|
||||
clubCacheMu.RUnlock()
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.Write(entry.data)
|
||||
log.Printf("Club cache hit for %s", cacheKey)
|
||||
return
|
||||
}
|
||||
}
|
||||
clubCacheMu.RUnlock()
|
||||
|
||||
var baseURL, sportParam string
|
||||
switch clubType {
|
||||
case "football":
|
||||
@@ -1613,13 +1896,16 @@ func getClubInfo(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
defer sem.Release()
|
||||
|
||||
ctx, cancel := detachedContext(120 * time.Second)
|
||||
defer cancel()
|
||||
|
||||
comp := &competitions[idx]
|
||||
matchesLink := comp.MatchesLink
|
||||
// 1) Try parsing from the public fotbal.cz competition page (matches_link)
|
||||
matches := parseCompetitionMatchesFromFotbal(r.Context(), matchesLink, clubType, clubName, clubID)
|
||||
matches := parseCompetitionMatchesFromFotbal(ctx, matchesLink, clubType, clubName, clubID)
|
||||
// Always try IS as well and prefer it if it provides at least as many matches
|
||||
detailURL := fmt.Sprintf("https://is.fotbal.cz/public/souteze/detail-souteze.aspx?req=%s&sport=%s", comp.ID, sportParam)
|
||||
isMatches := parseCompetitionMatchesFromIS(r.Context(), detailURL, clubType, clubName, clubID)
|
||||
isMatches := parseCompetitionMatchesFromIS(ctx, detailURL, clubType, clubName, clubID)
|
||||
// Prefer IS whenever it yields any results, as IS often contains alias team names
|
||||
if len(isMatches) > 0 {
|
||||
matches = isMatches
|
||||
@@ -1643,8 +1929,18 @@ func getClubInfo(w http.ResponseWriter, r *http.Request) {
|
||||
Competitions: competitions,
|
||||
}
|
||||
|
||||
var buf bytes.Buffer
|
||||
if err := json.NewEncoder(&buf).Encode(clubInfo); err != nil {
|
||||
http.Error(w, fmt.Sprintf("JSON encode error: %v", err), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
data := buf.Bytes()
|
||||
clubCacheMu.Lock()
|
||||
clubCache[cacheKey] = &clubCacheEntry{data: data, timestamp: time.Now()}
|
||||
clubCacheMu.Unlock()
|
||||
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
json.NewEncoder(w).Encode(clubInfo)
|
||||
w.Write(data)
|
||||
}
|
||||
|
||||
func main() {
|
||||
@@ -1663,7 +1959,7 @@ func main() {
|
||||
Addr: addr,
|
||||
Handler: r,
|
||||
ReadTimeout: 30 * time.Second,
|
||||
WriteTimeout: 5 * time.Minute,
|
||||
WriteTimeout: 10 * time.Minute,
|
||||
IdleTimeout: 120 * time.Second,
|
||||
MaxHeaderBytes: 1 << 20,
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user