package main import ( "bytes" "context" _ "embed" "encoding/json" "fmt" "io" "log" "net/http" neturl "net/url" "os" "os/exec" "path/filepath" "regexp" "strings" "sync" "time" "github.com/PuerkitoBio/goquery" "github.com/gorilla/mux" ) type Competition struct { ID string `json:"id"` Code string `json:"code"` Name string `json:"name"` TeamCount string `json:"team_count"` MatchesLink string `json:"matches_link"` Matches []Match `json:"matches,omitempty"` Table *CompetitionTable `json:"table,omitempty"` } // Cloudflare Browser Rendering API structures type CloudflareCrawlRequest struct { URL string `json:"url"` Limit int `json:"limit,omitempty"` Depth int `json:"depth,omitempty"` Formats []string `json:"formats,omitempty"` Render bool `json:"render,omitempty"` Source string `json:"source,omitempty"` Options map[string]interface{} `json:"options,omitempty"` } type CloudflareCrawlResponse struct { Success bool `json:"success"` Result string `json:"result"` // job ID } type CloudflareCrawlJob struct { ID string `json:"id"` Status string `json:"status"` BrowserSecondsUsed float64 `json:"browserSecondsUsed"` Total int `json:"total"` Finished int `json:"finished"` Records []CloudflareCrawlRecord `json:"records"` Cursor string `json:"cursor,omitempty"` } type CloudflareCrawlRecord struct { URL string `json:"url"` Status string `json:"status"` Markdown string `json:"markdown,omitempty"` HTML string `json:"html,omitempty"` JSON interface{} `json:"json,omitempty"` Metadata map[string]interface{} `json:"metadata"` } type CloudflareClient struct { AccountID string APIToken string BaseURL string Client *http.Client } type fetchOptions struct { Referer string } const ( browserUserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0 Safari/537.36" browserAccept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8" browserAcceptLanguage = "cs-CZ,cs;q=0.9,en;q=0.8" scraplingHelperPath = "scripts/scrapling_fetch.py" ) //go:embed scripts/scrapling_fetch.py var embeddedScraplingHelper string var ( embeddedScraplingHelperOnce sync.Once embeddedScraplingHelperFile string embeddedScraplingHelperErr error ) // NewCloudflareClient creates a new Cloudflare Browser Rendering API client func NewCloudflareClient() *CloudflareClient { accountID := strings.TrimSpace(os.Getenv("CLOUDFLARE_ACCOUNT_ID")) apiToken := strings.TrimSpace(os.Getenv("CLOUDFLARE_API_TOKEN")) if accountID == "" || apiToken == "" { return nil } return &CloudflareClient{ AccountID: accountID, APIToken: apiToken, BaseURL: "https://api.cloudflare.com/client/v4", Client: &http.Client{ Timeout: 30 * time.Second, }, } } // StartCrawl initiates a crawl job func (c *CloudflareClient) StartCrawl(ctx context.Context, req CloudflareCrawlRequest) (string, error) { if c == nil { return "", fmt.Errorf("Cloudflare client not initialized") } // Set defaults if req.Limit == 0 { req.Limit = 10 } if req.Depth == 0 { req.Depth = 1 } if len(req.Formats) == 0 { req.Formats = []string{"html", "markdown"} } if req.Source == "" { req.Source = "all" } // Restrict to specific URL patterns for fotbal.cz to avoid crawling unrelated content if req.Options == nil { req.Options = make(map[string]interface{}) } // Only crawl URLs from the same domain and specific paths includePatterns := []string{ "https://www.fotbal.cz/**", } excludePatterns := []string{ "**/api/**", "**/static/**", "**/media/**", } req.Options["includePatterns"] = includePatterns req.Options["excludePatterns"] = excludePatterns req.Options["includeExternalLinks"] = false req.Options["includeSubdomains"] = false body, err := json.Marshal(req) if err != nil { return "", fmt.Errorf("failed to marshal request: %w", err) } url := fmt.Sprintf("%s/accounts/%s/browser-rendering/crawl", c.BaseURL, c.AccountID) httpReq, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(body)) if err != nil { return "", fmt.Errorf("failed to create request: %w", err) } httpReq.Header.Set("Authorization", "Bearer "+c.APIToken) httpReq.Header.Set("Content-Type", "application/json") resp, err := c.Client.Do(httpReq) if err != nil { return "", fmt.Errorf("failed to send request: %w", err) } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { body, _ := io.ReadAll(resp.Body) return "", fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body)) } var crawlResp CloudflareCrawlResponse if err := json.NewDecoder(resp.Body).Decode(&crawlResp); err != nil { return "", fmt.Errorf("failed to decode response: %w", err) } if !crawlResp.Success { return "", fmt.Errorf("API returned unsuccessful response") } return crawlResp.Result, nil } // GetCrawlResults retrieves the results of a crawl job func (c *CloudflareClient) GetCrawlResults(ctx context.Context, jobID string, limit int) (*CloudflareCrawlJob, error) { if c == nil { return nil, fmt.Errorf("Cloudflare client not initialized") } url := fmt.Sprintf("%s/accounts/%s/browser-rendering/crawl/%s", c.BaseURL, c.AccountID, jobID) if limit > 0 { url += fmt.Sprintf("?limit=%d", limit) } httpReq, err := http.NewRequestWithContext(ctx, "GET", url, nil) if err != nil { return nil, fmt.Errorf("failed to create request: %w", err) } httpReq.Header.Set("Authorization", "Bearer "+c.APIToken) resp, err := c.Client.Do(httpReq) if err != nil { return nil, fmt.Errorf("failed to send request: %w", err) } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { body, _ := io.ReadAll(resp.Body) return nil, fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body)) } var result struct { Success bool `json:"success"` Result CloudflareCrawlJob `json:"result"` } if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { return nil, fmt.Errorf("failed to decode response: %w", err) } if !result.Success { return nil, fmt.Errorf("API returned unsuccessful response") } return &result.Result, nil } // WaitForCrawlCompletion waits for a crawl job to complete and returns the results func (c *CloudflareClient) WaitForCrawlCompletion(ctx context.Context, jobID string, maxAttempts int, delay time.Duration) (*CloudflareCrawlJob, error) { if c == nil { return nil, fmt.Errorf("Cloudflare client not initialized") } for i := 0; i < maxAttempts; i++ { job, err := c.GetCrawlResults(ctx, jobID, 1) // Use limit=1 for status checks if err != nil { return nil, err } if job.Status != "running" { // Get full results fullJob, err := c.GetCrawlResults(ctx, jobID, 0) // No limit for full results if err != nil { return nil, err } return fullJob, nil } select { case <-ctx.Done(): return nil, ctx.Err() case <-time.After(delay): continue } } return nil, fmt.Errorf("crawl job did not complete within timeout") } // CrawlURL performs a complete crawl operation for a single URL func (c *CloudflareClient) CrawlURL(ctx context.Context, url string) (*CloudflareCrawlJob, error) { if c == nil { return nil, fmt.Errorf("Cloudflare client not initialized") } req := CloudflareCrawlRequest{ URL: url, Limit: 1, // Only crawl the specific URL Depth: 0, // Don't follow links Formats: []string{"html", "markdown"}, Render: true, Source: "links", // Only crawl the specific URL, not sitemaps } jobID, err := c.StartCrawl(ctx, req) if err != nil { return nil, fmt.Errorf("failed to start crawl: %w", err) } // Wait for completion with reasonable timeout ctx, cancel := context.WithTimeout(ctx, 2*time.Minute) defer cancel() job, err := c.WaitForCrawlCompletion(ctx, jobID, 24, 5*time.Second) if err != nil { return nil, fmt.Errorf("failed to wait for crawl completion: %w", err) } return job, nil } func newBrowserRequest(url string, opts fetchOptions) (*http.Request, error) { req, err := http.NewRequest("GET", url, nil) if err != nil { return nil, fmt.Errorf("failed to create request: %w", err) } req.Header.Set("User-Agent", browserUserAgent) req.Header.Set("Accept", browserAccept) req.Header.Set("Accept-Language", browserAcceptLanguage) if opts.Referer != "" { req.Header.Set("Referer", opts.Referer) } return req, nil } func looksLikeCloudflareBlock(body []byte) bool { if len(body) == 0 { return false } lower := strings.ToLower(string(body)) hardSignals := []string{ "just a moment...", "attention required!", "enable javascript and cookies to continue", "checking if the site connection is secure", "cf-browser-verification", } for _, signal := range hardSignals { if strings.Contains(lower, signal) { return true } } if strings.Contains(lower, "/cdn-cgi/challenge-platform/") && (strings.Contains(lower, "window._cf_chl_opt") || strings.Contains(lower, "__cf_chl_rt_tk") || strings.Contains(lower, "cf_chl_seq_")) { return true } return false } func compactErrorText(s string) string { s = strings.Join(strings.Fields(strings.TrimSpace(s)), " ") if len(s) > 220 { return s[:217] + "..." } return s } func fetchPageDirect(url string, opts fetchOptions) ([]byte, error) { req, err := newBrowserRequest(url, opts) if err != nil { return nil, err } client := &http.Client{Timeout: 15 * time.Second} resp, err := client.Do(req) if err != nil { return nil, fmt.Errorf("direct request failed: %w", err) } defer resp.Body.Close() body, err := io.ReadAll(resp.Body) if err != nil { return nil, fmt.Errorf("failed to read response body: %w", err) } if resp.StatusCode != http.StatusOK { return nil, fmt.Errorf("direct request returned HTTP %d", resp.StatusCode) } if looksLikeCloudflareBlock(body) { return nil, fmt.Errorf("direct request returned a Cloudflare challenge page") } return body, nil } func fetchPageWithWget(url string, opts fetchOptions) ([]byte, error) { if _, err := exec.LookPath("wget"); err != nil { return nil, fmt.Errorf("wget not available: %w", err) } ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second) defer cancel() args := []string{ "--quiet", "--tries=1", "--timeout=15", "--max-redirect=10", "--output-document=-", "--user-agent=" + browserUserAgent, "--header=Accept: " + browserAccept, "--header=Accept-Language: " + browserAcceptLanguage, } if opts.Referer != "" { args = append(args, "--header=Referer: "+opts.Referer) } args = append(args, url) cmd := exec.CommandContext(ctx, "wget", args...) var stdout bytes.Buffer var stderr bytes.Buffer cmd.Stdout = &stdout cmd.Stderr = &stderr if err := cmd.Run(); err != nil { details := compactErrorText(stderr.String()) if details == "" { details = compactErrorText(err.Error()) } return nil, fmt.Errorf("wget request failed: %s", details) } body := stdout.Bytes() if len(body) == 0 { return nil, fmt.Errorf("wget returned an empty body") } if looksLikeCloudflareBlock(body) { return nil, fmt.Errorf("wget returned a Cloudflare challenge page") } return body, nil } func firstExistingFile(paths ...string) string { for _, path := range paths { path = strings.TrimSpace(path) if path == "" { continue } if info, err := os.Stat(path); err == nil && !info.IsDir() { return path } } return "" } func firstExecutable(paths ...string) string { for _, path := range paths { path = strings.TrimSpace(path) if path == "" { continue } if strings.ContainsRune(path, os.PathSeparator) { if info, err := os.Stat(path); err == nil && !info.IsDir() { return path } continue } if resolved, err := exec.LookPath(path); err == nil { return resolved } } return "" } func ensureEmbeddedScraplingHelper() (string, error) { embeddedScraplingHelperOnce.Do(func() { if strings.TrimSpace(embeddedScraplingHelper) == "" { embeddedScraplingHelperErr = fmt.Errorf("embedded Scrapling helper is empty") return } file, err := os.CreateTemp("", "facr-scrapling-*.py") if err != nil { embeddedScraplingHelperErr = fmt.Errorf("create embedded Scrapling helper: %w", err) return } defer file.Close() if _, err := file.WriteString(embeddedScraplingHelper); err != nil { embeddedScraplingHelperErr = fmt.Errorf("write embedded Scrapling helper: %w", err) return } if err := file.Chmod(0600); err != nil { embeddedScraplingHelperErr = fmt.Errorf("chmod embedded Scrapling helper: %w", err) return } embeddedScraplingHelperFile = file.Name() }) if embeddedScraplingHelperErr != nil { return "", embeddedScraplingHelperErr } if embeddedScraplingHelperFile == "" { return "", fmt.Errorf("embedded Scrapling helper path is empty") } return embeddedScraplingHelperFile, nil } func findScraplingHelperScript() (string, error) { cwd, _ := os.Getwd() exePath, _ := os.Executable() exeDir := "" if exePath != "" { exeDir = filepath.Dir(exePath) } if path := firstExistingFile( os.Getenv("SCRAPLING_SCRIPT"), filepath.Join(cwd, scraplingHelperPath), filepath.Join(exeDir, scraplingHelperPath), ); path != "" { return path, nil } return ensureEmbeddedScraplingHelper() } func findScraplingPython() string { cwd, _ := os.Getwd() exePath, _ := os.Executable() exeDir := "" if exePath != "" { exeDir = filepath.Dir(exePath) } return firstExecutable( os.Getenv("SCRAPLING_PYTHON_BIN"), filepath.Join(cwd, ".venv-scrapling", "bin", "python3"), filepath.Join(cwd, ".venv-scrapling", "bin", "python"), filepath.Join(cwd, ".venv", "bin", "python3"), filepath.Join(cwd, ".venv", "bin", "python"), filepath.Join(exeDir, ".venv-scrapling", "bin", "python3"), filepath.Join(exeDir, ".venv-scrapling", "bin", "python"), filepath.Join(exeDir, ".venv", "bin", "python3"), filepath.Join(exeDir, ".venv", "bin", "python"), "python3", "python", ) } func fetchPageWithScrapling(url string, opts fetchOptions) ([]byte, error) { pythonBin := findScraplingPython() if pythonBin == "" { return nil, fmt.Errorf("Scrapling skipped: no Python runtime found") } helperScript, err := findScraplingHelperScript() if err != nil { return nil, fmt.Errorf("Scrapling skipped: %w", err) } ctx, cancel := context.WithTimeout(context.Background(), 90*time.Second) defer cancel() args := []string{helperScript, "--url", url} if opts.Referer != "" { args = append(args, "--referer", opts.Referer) } cmd := exec.CommandContext(ctx, pythonBin, args...) var stdout bytes.Buffer var stderr bytes.Buffer cmd.Stdout = &stdout cmd.Stderr = &stderr if err := cmd.Run(); err != nil { details := compactErrorText(stderr.String()) if details == "" { details = compactErrorText(err.Error()) } return nil, fmt.Errorf("Scrapling request failed: %s", details) } body := stdout.Bytes() if len(body) == 0 { return nil, fmt.Errorf("Scrapling returned an empty body") } if looksLikeCloudflareBlock(body) { return nil, fmt.Errorf("Scrapling returned a Cloudflare challenge page") } return body, nil } func fetchPageWithFallback(url string) ([]byte, error) { return fetchPageWithFallbackOptions(url, fetchOptions{}) } // fetchPageWithFallback tries Go HTTP first, then wget, then Scrapling, then Cloudflare Browser Rendering. func fetchPageWithFallbackOptions(url string, opts fetchOptions) ([]byte, error) { body, err := fetchPageDirect(url, opts) if err == nil { return body, nil } log.Printf("Direct request failed for %s: %v", url, err) body, err = fetchPageWithWget(url, opts) if err == nil { log.Printf("Successfully retrieved content via wget for %s", url) return body, nil } log.Printf("wget fallback failed for %s: %v", url, err) body, err = fetchPageWithScrapling(url, opts) if err == nil { log.Printf("Successfully retrieved content via Scrapling for %s", url) return body, nil } log.Printf("Scrapling fallback failed for %s: %v", url, err) if cfClient := NewCloudflareClient(); cfClient != nil { log.Printf("Attempting Cloudflare crawl fallback for %s", url) ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute) defer cancel() job, err := cfClient.CrawlURL(ctx, url) if err != nil { log.Printf("Cloudflare crawl failed for %s: %v", url, err) return nil, fmt.Errorf("go scraping failed, wget failed, Scrapling failed, and Cloudflare crawl failed: %w", err) } if len(job.Records) > 0 && job.Records[0].Status == "completed" { body := []byte(job.Records[0].HTML) if looksLikeCloudflareBlock(body) { return nil, fmt.Errorf("Cloudflare crawl returned a challenge page") } log.Printf("Successfully retrieved content via Cloudflare crawl for %s", url) return body, nil } log.Printf("Cloudflare crawl returned no completed records for %s", url) return nil, fmt.Errorf("Cloudflare crawl returned no completed records") } return nil, fmt.Errorf("go scraping failed, wget failed, Scrapling failed, and Cloudflare client is not available") } // parseCompetitionMatchesFromFotbal scrapes matches from the public fotbal.cz // competition page (e.g., https://www.fotbal.cz/souteze/turnaje/table/{id}). // It filters to only include matches involving the given clubName if provided. func parseCompetitionMatchesFromFotbal(pageURL, clubType, clubName, clubID string) []Match { pageURL = strings.TrimSpace(pageURL) if pageURL == "" { return nil } body, err := fetchPageWithFallback(pageURL) if err != nil { log.Printf("fotbal.cz matches fetch failed for %s: %v", pageURL, err) return nil } // If we still don't have body content, return nil if len(body) == 0 { log.Printf("No content available for %s", pageURL) return nil } // Debug: save full HTML if env toggled if os.Getenv("DEBUG_SAVE_HTML") != "" { // derive a friendly filename from last URL path segment comp := pageURL if i := strings.LastIndex(comp, "/"); i >= 0 && i+1 < len(comp) { comp = comp[i+1:] } fname := fmt.Sprintf("fotbal_comp_%s.html", comp) if err := os.WriteFile(fname, body, 0644); err != nil { log.Printf("failed writing debug HTML %s: %v", fname, err) } else { log.Printf("saved debug HTML: %s", fname) } } doc, err := goquery.NewDocumentFromReader(bytes.NewReader(body)) if err != nil { log.Printf("fotbal.cz matches parse error for %s: %v", pageURL, err) return nil } var matches []Match // Sections per round doc.Find("section.js-matchRoundSection li.MatchRound").Each(func(_ int, li *goquery.Selection) { a := li.Find("a.MatchRound-match").First() if a.Length() == 0 { return } // Teams teamNames := []string{} li.Find("a.MatchRound-match ul li span.H7").Each(func(_ int, s *goquery.Selection) { t := strings.TrimSpace(s.Text()) if t != "" { teamNames = append(teamNames, t) } }) if len(teamNames) < 2 { return } home := teamNames[0] away := teamNames[1] // Try to extract team IDs from img URLs if present imgIDs := []string{} li.Find("a.MatchRound-match img").Each(func(_ int, img *goquery.Selection) { src := strings.TrimSpace(img.AttrOr("src", "")) if src == "" { return } if id := extractUUIDFromHref(src); id != "" { imgIDs = append(imgIDs, id) } }) homeID, awayID := "", "" if len(imgIDs) >= 1 { homeID = imgIDs[0] } if len(imgIDs) >= 2 { awayID = imgIDs[1] } // Score score := strings.TrimSpace(a.Find("strong.H4").First().Text()) if re := regexp.MustCompile(`\s*([0-9]+)\s*:\s*([0-9]+)\s*`); re != nil { if m := re.FindStringSubmatch(score); len(m) == 3 { score = fmt.Sprintf("%s:%s", m[1], m[2]) } } // Meta: date, match id in meta list and link dateText := "" li.Find(".MatchRound-meta p").Each(func(_ int, p *goquery.Selection) { label := strings.TrimSpace(p.Find("strong").First().Text()) txt := strings.TrimSpace(p.Text()) if strings.HasPrefix(strings.ToLower(label), "datum") { // Remove label from text dateText = strings.TrimSpace(strings.ReplaceAll(txt, label+":", "")) } }) // Venue from details, if available venue := "" li.Find(".js-matchRoundDetails li p").Each(func(_ int, p *goquery.Selection) { label := strings.TrimSpace(p.Find("strong").First().Text()) txt := strings.TrimSpace(p.Text()) if strings.HasPrefix(strings.ToLower(label), "hřiště") || strings.HasPrefix(strings.ToLower(label), "hriste") { venue = strings.TrimSpace(strings.ReplaceAll(txt, label+":", "")) } }) // Match ID from the anchor href matchID := extractUUIDFromHref(a.AttrOr("href", "")) reportURL := "" if matchID != "" { if strings.EqualFold(clubType, "futsal") { reportURL = fmt.Sprintf("https://www.fotbal.cz/futsal/zapasy/futsal/%s", matchID) } else { reportURL = fmt.Sprintf("https://www.fotbal.cz/souteze/zapasy/zapas/%s", matchID) } } // Filter by club involvement: prefer UUID match, fallback to name matching including simplified token if clubName != "" || clubID != "" { involved := false // If we could extract team UUIDs, match by ID first (robust against aliases) if clubID != "" && (strings.EqualFold(homeID, clubID) || strings.EqualFold(awayID, clubID)) { involved = true } else if clubName != "" { // Fallback to fuzzy full-name matching involved = strings.EqualFold(home, clubName) || strings.EqualFold(away, clubName) || containsFold(clubName, home) || containsFold(clubName, away) || containsFold(home, clubName) || containsFold(away, clubName) // As a last resort, try matching a simplified token (e.g., city) of the club name if !involved { token := simplifyClubQuery(clubName) if token != "" && (containsFold(home, token) || containsFold(away, token)) { involved = true } } } if !involved { return } } // Backfill IDs for current club if missing if homeID == "" { if strings.EqualFold(home, clubName) || containsFold(home, clubName) || containsFold(clubName, home) { homeID = clubID } else { token := simplifyClubQuery(clubName) if token != "" && containsFold(home, token) { homeID = clubID } } } if awayID == "" { if strings.EqualFold(away, clubName) || containsFold(away, clubName) || containsFold(clubName, away) { awayID = clubID } else { token := simplifyClubQuery(clubName) if token != "" && containsFold(away, token) { awayID = clubID } } } homeLogo := getLogo(home, homeID) awayLogo := getLogo(away, awayID) matches = append(matches, Match{ DateTime: dateText, Home: home, HomeID: homeID, HomeLogoURL: homeLogo, Away: away, AwayID: awayID, AwayLogoURL: awayLogo, Score: score, Venue: venue, MatchID: matchID, ReportURL: reportURL, FACRLink: reportURL, }) }) return matches } // parseCompetitionMatchesFromIS scrapes matches from the IS portal as fallback. func parseCompetitionMatchesFromIS(detailURL, clubType, clubName, clubID string) []Match { resp, err := http.Get(detailURL) if err != nil { log.Printf("IS matches fetch error for %s: %v", detailURL, err) return nil } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { log.Printf("IS matches non-200 for %s: %d", detailURL, resp.StatusCode) return nil } // Read body so we can optionally save and then parse from memory body, err := io.ReadAll(resp.Body) if err != nil { log.Printf("IS matches read error for %s: %v", detailURL, err) return nil } if os.Getenv("DEBUG_SAVE_HTML") != "" { // name the file using the req (competition id) if present fname := "is_detail.html" if u, err := neturl.Parse(detailURL); err == nil { req := u.Query().Get("req") sport := u.Query().Get("sport") if req != "" { fname = fmt.Sprintf("is_comp_%s_%s.html", req, sport) } } if err := os.WriteFile(fname, body, 0644); err != nil { log.Printf("failed writing debug IS HTML %s: %v", fname, err) } else { log.Printf("saved debug IS HTML: %s", fname) } } docDetail, err := goquery.NewDocumentFromReader(bytes.NewReader(body)) if err != nil { log.Printf("IS matches parse error for %s: %v", detailURL, err) return nil } var matches []Match totalRows := 0 keptRows := 0 docDetail.Find("table.soutez-zapasy tr").Each(func(_ int, s *goquery.Selection) { if s.Find("th").Length() > 0 { return } tds := s.Find("td") if tds.Length() < 5 { return } totalRows++ getText := func(sel *goquery.Selection) string { return strings.TrimSpace(sel.Text()) } dt := getText(tds.Eq(0)) rawHome := getText(tds.Eq(1)) if idx := strings.Index(rawHome, "("); idx >= 0 { rawHome = strings.TrimSpace(rawHome[:idx]) } rawAway := getText(tds.Eq(2)) if idx := strings.Index(rawAway, "("); idx >= 0 { rawAway = strings.TrimSpace(rawAway[:idx]) } homeID := extractUUIDFromHref(tds.Eq(1).Find("a").First().AttrOr("href", "")) awayID := extractUUIDFromHref(tds.Eq(2).Find("a").First().AttrOr("href", "")) rawScore := getText(tds.Eq(3)) score := "" if re := regexp.MustCompile(`(\d+)\s*:\s*(\d+)`); re != nil { if m := re.FindStringSubmatch(rawScore); len(m) == 3 { score = fmt.Sprintf("%s:%s", m[1], m[2]) } } venue := "" if tds.Length() > 4 { venue = getText(tds.Eq(4)) } var reportURL, matchID string var isReportHref, isDelegHref string // Use the last column for links to be robust to optional columns tds.Eq(tds.Length() - 1).Find("a").Each(func(_ int, a *goquery.Selection) { href := strings.TrimSpace(a.AttrOr("href", "")) if href == "" { return } if u, err := neturl.Parse(href); err == nil { if id := u.Query().Get("zapas"); id != "" { matchID = id } } // Capture specific IS links if strings.Contains(href, "zapis-o-utkani-report.aspx") { isReportHref = resolveISURL(href) } if strings.Contains(href, "zapas-delegace-report.aspx") { isDelegHref = resolveISURL(href) } }) if matchID != "" { if strings.EqualFold(clubType, "futsal") { reportURL = fmt.Sprintf("https://www.fotbal.cz/futsal/zapasy/futsal/%s", matchID) } else { reportURL = fmt.Sprintf("https://www.fotbal.cz/souteze/zapasy/zapas/%s", matchID) } } // Canonical fotbal.cz link facrLink := reportURL // Filter by club involvement: prefer UUID match, fallback to name matching with simplified token if clubName != "" || clubID != "" { involved := false if clubID != "" && (strings.EqualFold(homeID, clubID) || strings.EqualFold(awayID, clubID)) { involved = true } else if clubName != "" { involved = strings.EqualFold(rawHome, clubName) || strings.EqualFold(rawAway, clubName) || containsFold(clubName, rawHome) || containsFold(clubName, rawAway) || containsFold(rawHome, clubName) || containsFold(rawAway, clubName) if !involved { token := simplifyClubQuery(clubName) if token != "" && (containsFold(rawHome, token) || containsFold(rawAway, token)) { involved = true } } } if !involved { return } } keptRows++ if homeID == "" { if strings.EqualFold(rawHome, clubName) || containsFold(rawHome, clubName) || containsFold(clubName, rawHome) { homeID = clubID } else { token := simplifyClubQuery(clubName) if token != "" && containsFold(rawHome, token) { homeID = clubID } } } if awayID == "" { if strings.EqualFold(rawAway, clubName) || containsFold(rawAway, clubName) || containsFold(clubName, rawAway) { awayID = clubID } else { token := simplifyClubQuery(clubName) if token != "" && containsFold(rawAway, token) { awayID = clubID } } } homeLogo := getLogo(rawHome, homeID) awayLogo := getLogo(rawAway, awayID) matches = append(matches, Match{DateTime: dt, Home: rawHome, HomeID: homeID, HomeLogoURL: homeLogo, Away: rawAway, AwayID: awayID, AwayLogoURL: awayLogo, Score: score, Venue: venue, MatchID: matchID, ReportURL: func() string { if isReportHref != "" { return isReportHref } return reportURL }(), FACRLink: facrLink, DelegationURL: isDelegHref}) }) if os.Getenv("DEBUG_SAVE_HTML") != "" { log.Printf("IS parse summary for %s: total rows=%d, kept=%d", detailURL, totalRows, keptRows) } return matches } var logoCache = map[string]string{} type logoAPISearchResult struct { ID string `json:"id"` Name string `json:"name"` LogoURL string `json:"logo_url"` HasLocalLogo bool `json:"has_local_logo"` } type searchAPIResult struct { Results []struct { Name string `json:"name"` LogoURL string `json:"logo_url"` } `json:"results"` } // a simplified search token like "krnov" to improve chances of finding a logo. func simplifyClubQuery(name string) string { s := strings.TrimSpace(name) if s == "" { return "" } parts := strings.Fields(s) if len(parts) == 0 { return "" } // Walk from the end to find a meaningful token (avoid legal suffixes like "z.s.") stop := map[string]struct{}{ "z.s.": {}, "z.s": {}, "zs": {}, "zapsany": {}, "zapsaný": {}, "spolek": {}, "o.s.": {}, "o.s": {}, "os": {}, "a.s.": {}, "a.s": {}, "as": {}, "s.r.o.": {}, "s.r.o": {}, "sro": {}, } for i := len(parts) - 1; i >= 0; i-- { tok := parts[i] tok = strings.Trim(tok, ",.;:-()[]{}\"'`“”’") lt := strings.ToLower(tok) if _, banned := stop[lt]; banned { continue } // prefer tokens with letters and length >= 3 letters := regexp.MustCompile(`[a-zA-Zá-žÁ-Ž]`).MatchString if len([]rune(lt)) >= 3 && letters(lt) { return lt } } // Fallback to last token sanitized last := strings.Trim(parts[len(parts)-1], ",.;:-()[]{}\"'`“”’") return strings.ToLower(last) } func getLogoFromLogoAPI(teamName string, teamID string) string { base := strings.TrimSpace(os.Getenv("LOGOAPI_BASE_URL")) if base == "" { base = "https://logoapi.sportcreative.eu" } base = strings.TrimRight(base, "/") name := strings.TrimSpace(teamName) if name == "" { return "" } cacheKey := "logoapi|" + strings.ToLower(name) if v, ok := logoCache[cacheKey]; ok { return v } client := &http.Client{Timeout: 5 * time.Second} doSearch := func(q string) ([]logoAPISearchResult, bool) { q = strings.TrimSpace(q) if q == "" { return nil, false } u := fmt.Sprintf("%s/clubs/search-with-logos?q=%s", base, neturl.QueryEscape(q)) resp, err := client.Get(u) if err != nil { return nil, false } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { io.Copy(io.Discard, resp.Body) return nil, false } var payload []logoAPISearchResult if err := json.NewDecoder(resp.Body).Decode(&payload); err != nil { // Non-JSON or invalid response – treat as no result return nil, false } return payload, true } // Search strictly by full club name; if it yields nothing usable, let caller fall back to FACR. payload, ok := doSearch(name) if !ok || len(payload) == 0 { logoCache[cacheKey] = "" return "" } // Normalize names for comparison (case-insensitive, strip common legal/sport suffixes). normalize := func(s string) string { s = strings.ToLower(strings.TrimSpace(s)) if s == "" { return s } parts := strings.Fields(s) stop := map[string]struct{}{ "fotbal": {}, "futsal": {}, "z.s.": {}, "z.s": {}, "zs": {}, "o.s.": {}, "o.s": {}, "os": {}, "a.s.": {}, "a.s": {}, "as": {}, "s.r.o.": {}, "s.r.o": {}, "sro": {}, } end := len(parts) for end > 0 { if _, banned := stop[parts[end-1]]; banned { end-- continue } break } if end != len(parts) { parts = parts[:end] } return strings.Join(parts, " ") } want := normalize(name) var withLogo []logoAPISearchResult for _, r := range payload { if r.HasLocalLogo { withLogo = append(withLogo, r) } } if len(withLogo) == 0 { logoCache[cacheKey] = "" return "" } // Only accept a logo when the normalized club name matches; avoid arbitrary first-result picks. for _, r := range withLogo { if normalize(r.Name) == want { logoCache[cacheKey] = r.LogoURL return r.LogoURL } } // No strong match – treat as "no logo" so upstream can fall back to FACR assets. logoCache[cacheKey] = "" return "" } func getLogoBySearch(name string) string { key := strings.ToLower(strings.TrimSpace(name)) if key == "" { return "" } if v, ok := logoCache[key]; ok { return v } client := &http.Client{Timeout: 5 * time.Second} // Prefer simplified last-word token (e.g., "krnov") to improve hit rate for logos query := simplifyClubQuery(name) if query == "" { query = name } doSearch := func(q string) (searchAPIResult, bool) { url := fmt.Sprintf("http://localhost:8686/club/search?q=%s", neturl.QueryEscape(q)) resp, err := client.Get(url) if err != nil { return searchAPIResult{}, false } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { io.Copy(io.Discard, resp.Body) return searchAPIResult{}, false } var payload searchAPIResult if err := json.NewDecoder(resp.Body).Decode(&payload); err != nil { return searchAPIResult{}, false } return payload, true } payload, ok := doSearch(query) if !ok || len(payload.Results) == 0 { // Fallback to full name if simplified token yields nothing payload, ok = doSearch(name) if !ok { return "" } } // pick best match: exact (case-insensitive), then contains, else first best := "" for _, r := range payload.Results { if strings.EqualFold(strings.TrimSpace(r.Name), strings.TrimSpace(name)) { best = r.LogoURL break } } if best == "" { for _, r := range payload.Results { rname := strings.ToLower(r.Name) if strings.Contains(rname, key) || strings.Contains(key, rname) { best = r.LogoURL break } } } if best == "" && len(payload.Results) > 0 { best = payload.Results[0].LogoURL } logoCache[key] = best return best } func getLogo(teamName string, teamID string) string { placeholder := "https://www.fotbal.cz/dist/img/logo-club-empty.svg" name := strings.ToLower(strings.TrimSpace(teamName)) if name == "" || strings.Contains(name, "volno") || strings.Contains(name, "volný los") || strings.Contains(name, "volny los") || strings.Contains(name, "bye") { return placeholder } if logo := getLogoFromLogoAPI(teamName, teamID); logo != "" { return logo } // If we have a team ID, construct the official logo URL directly. // This avoids wrong matches for duplicate names (e.g., multiple "Ořechov"). if tid := strings.TrimSpace(teamID); tid != "" { return fmt.Sprintf("https://is1.fotbal.cz/media/kluby/%s/%s_crop.jpg", tid, tid) } // Otherwise, try the local FACR-based search endpoint by name. if logo := getLogoBySearch(teamName); logo != "" { return logo } // No ID and no search hit -> placeholder return placeholder } // CompetitionTable holds standings sections; currently only Overall is used type CompetitionTable struct { Overall []TableRow `json:"overall"` } // ClubInfo is the response for club info and tables endpoints type ClubInfo struct { Name string `json:"name"` ClubID string `json:"club_id"` ClubType string `json:"club_type"` ClubInternalID string `json:"club_internal_id,omitempty"` URL string `json:"url,omitempty"` LogoURL string `json:"logo_url,omitempty"` Address string `json:"address,omitempty"` Category string `json:"category,omitempty"` Competitions []Competition `json:"competitions"` } // SearchResult represents one club from fotbal.cz search type SearchResult struct { Name string `json:"name"` ClubID string `json:"club_id"` ClubType string `json:"club_type"` // football or futsal URL string `json:"url"` LogoURL string `json:"logo_url"` Category string `json:"category,omitempty"` Address string `json:"address,omitempty"` } // getClubSearch queries fotbal.cz club search and returns results with logo func getClubSearch(w http.ResponseWriter, r *http.Request) { q := strings.TrimSpace(r.URL.Query().Get("q")) if q == "" { http.Error(w, "query parameter 'q' is required", http.StatusBadRequest) return } // Build search URL vals := neturl.Values{} vals.Set("q", q) searchURL := "https://www.fotbal.cz/club/hledej?" + vals.Encode() fetchSearchPage := func(url string) ([]byte, error) { return fetchPageWithFallbackOptions(url, fetchOptions{ Referer: "https://www.fotbal.cz/club/hledej", }) } // Try direct HTTP request first body, err := fetchSearchPage(searchURL) if err != nil { log.Printf("Direct search request failed for %s: %v", searchURL, err) // Retry with quoted query for short tokens searchURL2 := searchURL tokens := strings.Fields(q) for _, t := range tokens { if len([]rune(t)) <= 2 { vals2 := neturl.Values{} vals2.Set("q", "\""+q+"\"") searchURL2 = "https://www.fotbal.cz/club/hledej?" + vals2.Encode() break } } body, err = fetchSearchPage(searchURL2) if err != nil { log.Printf("Retried search request failed for %s: %v", searchURL2, err) // Return empty results instead of error w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(map[string]any{ "query": q, "count": 0, "results": []SearchResult{}, }) return } } doc, err := goquery.NewDocumentFromReader(bytes.NewReader(body)) if err != nil { http.Error(w, fmt.Sprintf("Error parsing HTML: %v", err), http.StatusInternalServerError) return } var results []SearchResult // The page lists clubs in section "Výsledky hledání" as li.ListItemSplit doc.Find("li.ListItemSplit").Each(func(_ int, li *goquery.Selection) { a := li.Find("a.Link--inverted").First() href, _ := a.Attr("href") if href == "" { return } name := strings.TrimSpace(a.Find("span.H7").First().Text()) if name == "" { // fallback to link text name = strings.TrimSpace(a.Text()) } img := a.Find("img").First() logoURL, _ := img.Attr("src") // Category category := strings.TrimSpace(li.Find(".ClubCategories .BadgeCategory").First().Text()) // Address address := strings.TrimSpace(li.Find(".ClubAddress p").First().Text()) // Infer club type from href clubType := "football" if strings.Contains(strings.ToLower(href), "/futsal/") { clubType = "futsal" } // Extract club ID from last path segment // e.g., https://www.fotbal.cz/futsal/club/club/{uuid} parts := strings.Split(strings.TrimRight(href, "/"), "/") clubID := "" if len(parts) > 0 { clubID = parts[len(parts)-1] } // Normalize URL (ensure absolute) if !strings.HasPrefix(href, "http://") && !strings.HasPrefix(href, "https://") { href = "https://www.fotbal.cz" + href } // Prefer logoapi / local logo when available if l := strings.TrimSpace(getLogo(name, clubID)); l != "" { logoURL = l } results = append(results, SearchResult{ Name: name, ClubID: clubID, ClubType: clubType, URL: href, LogoURL: logoURL, Category: category, Address: address, }) }) w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(map[string]any{ "query": q, "count": len(results), "results": results, }) } // getClubTables returns club info with competition standings tables (no matches) func getClubTables(w http.ResponseWriter, r *http.Request) { vars := mux.Vars(r) clubID := vars["id"] clubType := vars["type"] if clubID == "" { http.Error(w, "Club ID is required", http.StatusBadRequest) return } // Validate club type var baseURL string var sportParam string switch clubType { case "football": baseURL = "https://www.fotbal.cz/souteze/club/club" sportParam = "fotbal" case "futsal": baseURL = "https://www.fotbal.cz/futsal/club/club" sportParam = "futsal" default: http.Error(w, "Invalid club type. Use 'football' or 'futsal'.", http.StatusBadRequest) return } url := fmt.Sprintf("%s/%s", baseURL, clubID) body, err := fetchPageWithFallback(url) if err != nil { http.Error(w, fmt.Sprintf("Error fetching club data: %v", err), http.StatusInternalServerError) return } doc, err := goquery.NewDocumentFromReader(bytes.NewReader(body)) if err != nil { http.Error(w, fmt.Sprintf("Error parsing HTML: %v", err), http.StatusInternalServerError) return } // Extract club internal ID clubInternalID := "" doc.Find("section").Each(func(i int, s *goquery.Selection) { headerText := s.Find("h3 span").First().Text() if strings.TrimSpace(headerText) == "ID klubu" { clubInternalID = strings.TrimSpace(s.Find("ul li").First().Text()) } }) // Extract competitions var competitions []Competition doc.Find("table.Table tbody tr").Each(func(i int, s *goquery.Selection) { code := strings.TrimSpace(s.Find("td:first-child").Text()) nameLink := s.Find("td:nth-child(2) a") name := strings.TrimSpace(nameLink.Text()) teamCount := strings.TrimSpace(s.Find("td:nth-child(3)").Text()) // Extract competition ID from the link parts := strings.Split(nameLink.AttrOr("href", ""), "/") compID := "" if len(parts) >= 2 { compID = parts[len(parts)-1] } // Build public table link depending on clubType tableLink := "" if strings.EqualFold(clubType, "futsal") { tableLink = fmt.Sprintf("https://www.fotbal.cz/futsal/futsal/table/%s", compID) } else { tableLink = fmt.Sprintf("https://www.fotbal.cz/souteze/turnaje/table/%s", compID) } competitions = append(competitions, Competition{ ID: compID, Code: code, Name: name, TeamCount: teamCount, MatchesLink: tableLink, }) }) // For each competition, fetch the standings tables from is.fotbal.cz for i := range competitions { comp := &competitions[i] tableURL := fmt.Sprintf("https://is.fotbal.cz/public/souteze/tabulky-souteze.aspx?req=%s&sport=%s", comp.ID, sportParam) resp, err := http.Get(tableURL) if err != nil { log.Printf("error fetching competition table for %s: %v", comp.ID, err) continue } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { log.Printf("non-200 response for %s: %d", comp.ID, resp.StatusCode) continue } docTable, err := goquery.NewDocumentFromReader(resp.Body) if err != nil { log.Printf("error parsing table HTML for %s: %v", comp.ID, err) continue } // Parse section: Tabulka celková (only overall) var overall []TableRow parseSection := func(headerText string) []TableRow { var rows []TableRow // Find the h3 with matching text, then the following .list.tabulky table docTable.Find("h3").EachWithBreak(func(_ int, h3 *goquery.Selection) bool { if strings.EqualFold(strings.TrimSpace(h3.Text()), headerText) { list := h3.NextAllFiltered("div.list.tabulky").First() if list.Length() == 0 { return false } table := list.Find("table.vysledky-tabulky tbody") table.Find("tr").Each(func(_ int, tr *goquery.Selection) { // skip header rows containing th if tr.Find("th").Length() > 0 { return } tds := tr.Find("td") if tds.Length() < 8 { return } get := func(i int) string { return strings.TrimSpace(tds.Eq(i).Text()) } rank := get(0) team := get(1) teamID := extractUUIDFromHref(tds.Eq(1).Find("a").First().AttrOr("href", "")) played := get(2) wins := get(3) draws := get(4) losses := get(5) scoreRaw := get(6) // normalize score like "5 : 0" -> "5:0" score := scoreRaw if re := regexp.MustCompile(`\s*([0-9]+)\s*:\s*([0-9]+)\s*`); re != nil { if m := re.FindStringSubmatch(scoreRaw); len(m) == 3 { score = fmt.Sprintf("%s:%s", m[1], m[2]) } } points := get(7) rows = append(rows, TableRow{ Rank: rank, Team: team, TeamID: teamID, TeamLogoURL: getLogo(team, teamID), Played: played, Wins: wins, Draws: draws, Losses: losses, Score: score, Points: points, }) }) return false } return true }) return rows } overall = parseSection("Tabulka celková") comp.Table = &CompetitionTable{Overall: overall} } clubName := strings.TrimSpace(doc.Find("h1.H4 span").First().Text()) clubURL := strings.TrimSpace(doc.Find("h1.H4 a").First().AttrOr("href", "")) logoURL := strings.TrimSpace(doc.Find("img.Logo").First().AttrOr("src", "")) if l := strings.TrimSpace(getLogo(clubName, clubID)); l != "" { logoURL = l } category := strings.TrimSpace(doc.Find("section").First().Find("h3 span").First().Text()) address := strings.TrimSpace(doc.Find("section").First().Find("ul li").First().Text()) clubInfo := ClubInfo{ Name: clubName, ClubID: clubID, ClubType: clubType, ClubInternalID: clubInternalID, URL: clubURL, LogoURL: logoURL, Address: address, Category: category, Competitions: competitions, } w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(clubInfo) } // getClubInfo returns club info with competitions and matches func getClubInfo(w http.ResponseWriter, r *http.Request) { vars := mux.Vars(r) clubID := vars["id"] clubType := vars["type"] if clubID == "" { http.Error(w, "Club ID is required", http.StatusBadRequest) return } var baseURL, sportParam string switch clubType { case "football": baseURL = "https://www.fotbal.cz/souteze/club/club" sportParam = "fotbal" case "futsal": baseURL = "https://www.fotbal.cz/futsal/club/club" sportParam = "futsal" default: http.Error(w, "Invalid club type. Use 'football' or 'futsal'.", http.StatusBadRequest) return } url := fmt.Sprintf("%s/%s", baseURL, clubID) body, err := fetchPageWithFallback(url) if err != nil { http.Error(w, fmt.Sprintf("Error fetching club data: %v", err), http.StatusInternalServerError) return } doc, err := goquery.NewDocumentFromReader(bytes.NewReader(body)) if err != nil { http.Error(w, fmt.Sprintf("Error parsing HTML: %v", err), http.StatusInternalServerError) return } clubName := strings.TrimSpace(doc.Find("h1.H4 span").First().Text()) // Basic club metadata clubURL := fmt.Sprintf("%s/%s", baseURL, clubID) logoURL := getLogo(clubName, clubID) if logoURL == "" { logoURL = fmt.Sprintf("https://is1.fotbal.cz/media/kluby/%s/%s_crop.jpg", clubID, clubID) } category := "Fotbal" if strings.EqualFold(clubType, "futsal") { category = "Futsal" } // Internal ID clubInternalID := "" doc.Find("section").Each(func(_ int, s *goquery.Selection) { if strings.TrimSpace(s.Find("h3 span").First().Text()) == "ID klubu" { clubInternalID = strings.TrimSpace(s.Find("ul li").First().Text()) } }) // Address (best-effort) address := strings.TrimSpace(doc.Find(".ClubAddress p").First().Text()) // Competitions list var competitions []Competition doc.Find("table.Table tbody tr").Each(func(_ int, tr *goquery.Selection) { code := strings.TrimSpace(tr.Find("td:first-child").Text()) nameLink := tr.Find("td:nth-child(2) a") name := strings.TrimSpace(nameLink.Text()) teamCount := strings.TrimSpace(tr.Find("td:nth-child(3)").Text()) parts := strings.Split(strings.TrimSpace(nameLink.AttrOr("href", "")), "/") compID := "" if len(parts) >= 2 { compID = parts[len(parts)-1] } // Public table URL for convenience tableLink := "" if strings.EqualFold(clubType, "futsal") { tableLink = fmt.Sprintf("https://www.fotbal.cz/futsal/futsal/table/%s", compID) } else { tableLink = fmt.Sprintf("https://www.fotbal.cz/souteze/turnaje/table/%s", compID) } competitions = append(competitions, Competition{ID: compID, Code: code, Name: name, TeamCount: teamCount, MatchesLink: tableLink}) }) // For each competition, fetch matches for i := range competitions { comp := &competitions[i] matchesLink := comp.MatchesLink // 1) Try parsing from the public fotbal.cz competition page (matches_link) matches := parseCompetitionMatchesFromFotbal(matchesLink, clubType, clubName, clubID) // Always try IS as well and prefer it if it provides at least as many matches detailURL := fmt.Sprintf("https://is.fotbal.cz/public/souteze/detail-souteze.aspx?req=%s&sport=%s", comp.ID, sportParam) isMatches := parseCompetitionMatchesFromIS(detailURL, clubType, clubName, clubID) // Prefer IS whenever it yields any results, as IS often contains alias team names if len(isMatches) > 0 { matches = isMatches } comp.Matches = matches } clubInfo := ClubInfo{ Name: clubName, ClubID: clubID, ClubType: clubType, ClubInternalID: clubInternalID, URL: clubURL, LogoURL: logoURL, Address: address, Category: category, Competitions: competitions, } w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(clubInfo) } func main() { r := mux.NewRouter() r.HandleFunc("/club/{type}/{id}", getClubInfo).Methods("GET") r.HandleFunc("/club/{type}/{id}/table", getClubTables).Methods("GET") r.HandleFunc("/club/search", getClubSearch).Methods("GET") r.HandleFunc("/club/{id:[0-9a-fA-F-]+}", func(w http.ResponseWriter, r *http.Request) { vars := mux.Vars(r) http.Redirect(w, r, "/club/football/"+vars["id"], http.StatusMovedPermanently) }).Methods("GET") r.HandleFunc("/", docsHandler) port := ":8686" fmt.Printf("Server running on http://localhost%s\n", port) log.Fatal(http.ListenAndServe(port, r)) } // docsHandler serves a simple HTML API documentation at the root endpoint. func docsHandler(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "text/html; charset=utf-8") io.WriteString(w, ` FACR Scraper API Docs

FACR Scraper API

Status: ok — server is running.

Search Clubs

GET /club/search?q=QUERY

Find clubs on fotbal.cz. Supports football and futsal clubs.

Example: /club/search?q=Sparta

Response shape
{
  "query": "Sparta",
  "count": 2,
  "results": [
    {
      "name": "AC Sparta Praha",
      "club_id": "",
      "club_type": "football",
      "url": "https://www.fotbal.cz/...",
      "logo_url": "https://.../logo.png",
      "category": "Muži",
      "address": "..."
    }
  ]
}

Club Info + Matches

GET /club/{type}/{id}

Example: /club/football/{id}

Response shape
{
  "name": "AC Sparta Praha",
  "club_id": "00000000-0000-0000-0000-000000000000",
  "club_type": "football",
  "club_internal_id": "123456",
  "url": "https://www.fotbal.cz/...",
  "logo_url": "https://is1.fotbal.cz/media/kluby/.../logo.jpg",
  "address": "Milady Horákové 98, 160 00 Praha 6",
  "category": "Muži A",
  "competitions": [
    {
      "id": "12345",
      "code": "1. LIGA",
      "name": "Fortuna Liga",
      "team_count": "16",
      "matches_link": "https://www.fotbal.cz/...",
      "matches": [
        {
          "date_time": "12.08.2023 18:00",
          "home": "AC Sparta Praha",
          "home_id": "00000000-0000-0000-0000-000000000000",
          "home_logo_url": "https://.../sparta.png",
          "away": "SK Slavia Praha",
          "away_id": "11111111-1111-1111-1111-111111111111",
          "away_logo_url": "https://.../slavia.png",
          "score": "2:1",
          "venue": "Stadion Letná",
          "match_id": "match12345",
          "report_url": "https://www.fotbal.cz/..."
        }
      ]
    }
  ]
}

Club Tables (Standings)

GET /club/{type}/{id}/table

Returns standings (overall table) for each competition of the club.

Example: /club/football/{id}/table

Response shape
{
  "name": "AC Sparta Praha",
  "club_id": "00000000-0000-0000-0000-000000000000",
  "club_type": "football",
  "club_internal_id": "123456",
  "url": "https://www.fotbal.cz/...",
  "logo_url": "https://is1.fotbal.cz/media/kluby/.../logo.jpg",
  "competitions": [
    {
      "id": "12345",
      "code": "1. LIGA",
      "name": "Fortuna Liga",
      "team_count": "16",
      "matches_link": "https://www.fotbal.cz/...",
      "table": {
        "overall": [
          {
            "rank": "1",
            "team": "AC Sparta Praha",
            "team_id": "00000000-0000-0000-0000-000000000000",
            "team_logo_url": "https://.../sparta.png",
            "played": "10",
            "wins": "8",
            "draws": "2",
            "losses": "0",
            "score": "25:5",
            "points": "26"
          },
          {
            "rank": "2",
            "team": "SK Slavia Praha",
            "team_id": "11111111-1111-1111-1111-111111111111",
            "team_logo_url": "https://.../slavia.png",
            "played": "10",
            "wins": "7",
            "draws": "2",
            "losses": "1",
            "score": "20:8",
            "points": "23"
          }
        ]
      }
    }
  ]
}

Shortcuts

GET /club/{id} → redirects to /club/football/{id}

`) } func containsFold(s, substr string) bool { s = strings.ToLower(strings.TrimSpace(s)) substr = strings.ToLower(strings.TrimSpace(substr)) if substr == "" { return false } return strings.Contains(s, substr) } // extractUUIDFromHref finds the first UUID-like token in an href and returns it. func extractUUIDFromHref(href string) string { href = strings.TrimSpace(href) if href == "" { return "" } re := regexp.MustCompile(`[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}`) if m := re.FindString(href); m != "" { return m } // Fallback: some links may end with ID after slash; take last path token if it looks like hex+hyphenated parts := strings.Split(href, "/") if len(parts) > 0 { cand := parts[len(parts)-1] if re.MatchString(cand) { return cand } } return "" } type Match struct { DateTime string `json:"date_time"` Home string `json:"home"` HomeID string `json:"home_id,omitempty"` HomeLogoURL string `json:"home_logo_url,omitempty"` Away string `json:"away"` AwayID string `json:"away_id,omitempty"` AwayLogoURL string `json:"away_logo_url,omitempty"` Score string `json:"score"` Venue string `json:"venue"` Note string `json:"note,omitempty"` MatchID string `json:"match_id"` ReportURL string `json:"report_url,omitempty"` FACRLink string `json:"facr_link,omitempty"` DelegationURL string `json:"delegation_url,omitempty"` } // TableRow represents one row in a standings table type TableRow struct { Rank string `json:"rank"` Team string `json:"team"` TeamID string `json:"team_id,omitempty"` TeamLogoURL string `json:"team_logo_url,omitempty"` Played string `json:"played"` Wins string `json:"wins"` Draws string `json:"draws"` Losses string `json:"losses"` Score string `json:"score"` Points string `json:"points"` } // resolveISURL makes relative IS links absolute against https://is.fotbal.cz/public/ func resolveISURL(href string) string { href = strings.TrimSpace(href) if strings.HasPrefix(href, "http://") || strings.HasPrefix(href, "https://") { if u, err := neturl.Parse(href); err == nil { u.Scheme = "https" u.Host = "is.fotbal.cz" if !strings.HasPrefix(u.Path, "/public/") { if strings.HasPrefix(u.Path, "/zapasy/") { u.Path = "/public" + u.Path } } q := u.Query() q.Del("discipline") u.RawQuery = q.Encode() return u.String() } return href } href = strings.TrimPrefix(href, "./") for strings.HasPrefix(href, "../") { href = strings.TrimPrefix(href, "../") } if strings.HasPrefix(href, "/") { href = strings.TrimPrefix(href, "/") } path := "/public/" + href u := neturl.URL{Scheme: "https", Host: "is.fotbal.cz", Path: path} return u.String() }