package main import ( "bytes" "crypto/tls" "encoding/json" "fmt" "io" "log" "net/http" "net/smtp" neturl "net/url" "os" "regexp" "strings" "sync" "time" "github.com/PuerkitoBio/goquery" "github.com/gorilla/mux" ) type Competition struct { ID string `json:"id"` Code string `json:"code"` Name string `json:"name"` TeamCount string `json:"team_count"` MatchesLink string `json:"matches_link"` Matches []Match `json:"matches,omitempty"` Table *CompetitionTable `json:"table,omitempty"` } // parseCompetitionMatchesFromFotbal scrapes matches from the public fotbal.cz // competition page (e.g., https://www.fotbal.cz/souteze/turnaje/table/{id}). // It filters to only include matches involving the given clubName if provided. func parseCompetitionMatchesFromFotbal(pageURL, clubType, clubName, clubID string) []Match { pageURL = strings.TrimSpace(pageURL) if pageURL == "" { return nil } // Request with browser-like headers; some fotbal.cz pages 404 without them req, _ := http.NewRequest("GET", pageURL, nil) req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0 Safari/537.36") req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8") req.Header.Set("Accept-Language", "cs-CZ,cs;q=0.9,en;q=0.8") client := &http.Client{Timeout: 15 * time.Second} resp, err := client.Do(req) if err != nil { log.Printf("fotbal.cz matches fetch error for %s: %v", pageURL, err) return nil } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { log.Printf("fotbal.cz matches non-200 for %s: %d", pageURL, resp.StatusCode) return nil } // Read body to optionally save and to allow multiple reads body, err := io.ReadAll(resp.Body) if err != nil { log.Printf("fotbal.cz matches read error for %s: %v", pageURL, err) return nil } // Debug: save full HTML if env toggled if os.Getenv("DEBUG_SAVE_HTML") != "" { // derive a friendly filename from last URL path segment comp := pageURL if i := strings.LastIndex(comp, "/"); i >= 0 && i+1 < len(comp) { comp = comp[i+1:] } fname := fmt.Sprintf("fotbal_comp_%s.html", comp) if err := os.WriteFile(fname, body, 0644); err != nil { log.Printf("failed writing debug HTML %s: %v", fname, err) } else { log.Printf("saved debug HTML: %s", fname) } } doc, err := goquery.NewDocumentFromReader(bytes.NewReader(body)) if err != nil { log.Printf("fotbal.cz matches parse error for %s: %v", pageURL, err) return nil } var matches []Match // Sections per round doc.Find("section.js-matchRoundSection li.MatchRound").Each(func(_ int, li *goquery.Selection) { a := li.Find("a.MatchRound-match").First() if a.Length() == 0 { return } // Teams teamNames := []string{} li.Find("a.MatchRound-match ul li span.H7").Each(func(_ int, s *goquery.Selection) { t := strings.TrimSpace(s.Text()) if t != "" { teamNames = append(teamNames, t) } }) if len(teamNames) < 2 { return } home := teamNames[0] away := teamNames[1] // Try to extract team IDs from img URLs if present imgIDs := []string{} li.Find("a.MatchRound-match img").Each(func(_ int, img *goquery.Selection) { src := strings.TrimSpace(img.AttrOr("src", "")) if src == "" { return } if id := extractUUIDFromHref(src); id != "" { imgIDs = append(imgIDs, id) } }) homeID, awayID := "", "" if len(imgIDs) >= 1 { homeID = imgIDs[0] } if len(imgIDs) >= 2 { awayID = imgIDs[1] } // Score score := strings.TrimSpace(a.Find("strong.H4").First().Text()) if re := regexp.MustCompile(`\s*([0-9]+)\s*:\s*([0-9]+)\s*`); re != nil { if m := re.FindStringSubmatch(score); len(m) == 3 { score = fmt.Sprintf("%s:%s", m[1], m[2]) } } // Meta: date, match id in meta list and link dateText := "" li.Find(".MatchRound-meta p").Each(func(_ int, p *goquery.Selection) { label := strings.TrimSpace(p.Find("strong").First().Text()) txt := strings.TrimSpace(p.Text()) if strings.HasPrefix(strings.ToLower(label), "datum") { // Remove label from text dateText = strings.TrimSpace(strings.ReplaceAll(txt, label+":", "")) } }) // Venue from details, if available venue := "" li.Find(".js-matchRoundDetails li p").Each(func(_ int, p *goquery.Selection) { label := strings.TrimSpace(p.Find("strong").First().Text()) txt := strings.TrimSpace(p.Text()) if strings.HasPrefix(strings.ToLower(label), "hřiště") || strings.HasPrefix(strings.ToLower(label), "hriste") { venue = strings.TrimSpace(strings.ReplaceAll(txt, label+":", "")) } }) // Match ID from the anchor href matchID := extractUUIDFromHref(a.AttrOr("href", "")) reportURL := "" if matchID != "" { if strings.EqualFold(clubType, "futsal") { reportURL = fmt.Sprintf("https://www.fotbal.cz/futsal/zapasy/futsal/%s", matchID) } else { reportURL = fmt.Sprintf("https://www.fotbal.cz/souteze/zapasy/zapas/%s", matchID) } } // Filter by club involvement: prefer UUID match, fallback to name matching including simplified token if clubName != "" || clubID != "" { involved := false // If we could extract team UUIDs, match by ID first (robust against aliases) if clubID != "" && (strings.EqualFold(homeID, clubID) || strings.EqualFold(awayID, clubID)) { involved = true } else if clubName != "" { // Fallback to fuzzy full-name matching involved = strings.EqualFold(home, clubName) || strings.EqualFold(away, clubName) || containsFold(clubName, home) || containsFold(clubName, away) || containsFold(home, clubName) || containsFold(away, clubName) // As a last resort, try matching a simplified token (e.g., city) of the club name if !involved { token := simplifyClubQuery(clubName) if token != "" && (containsFold(home, token) || containsFold(away, token)) { involved = true } } } if !involved { return } } // Backfill IDs for current club if missing if homeID == "" { if strings.EqualFold(home, clubName) || containsFold(home, clubName) || containsFold(clubName, home) { homeID = clubID } else { token := simplifyClubQuery(clubName) if token != "" && containsFold(home, token) { homeID = clubID } } } if awayID == "" { if strings.EqualFold(away, clubName) || containsFold(away, clubName) || containsFold(clubName, away) { awayID = clubID } else { token := simplifyClubQuery(clubName) if token != "" && containsFold(away, token) { awayID = clubID } } } homeLogo := getLogo(home, homeID) awayLogo := getLogo(away, awayID) matches = append(matches, Match{ DateTime: dateText, Home: home, HomeID: homeID, HomeLogoURL: homeLogo, Away: away, AwayID: awayID, AwayLogoURL: awayLogo, Score: score, Venue: venue, MatchID: matchID, ReportURL: reportURL, FACRLink: reportURL, }) }) return matches } // parseCompetitionMatchesFromIS scrapes matches from the IS portal as fallback. func parseCompetitionMatchesFromIS(detailURL, clubType, clubName, clubID string) []Match { resp, err := http.Get(detailURL) if err != nil { log.Printf("IS matches fetch error for %s: %v", detailURL, err) return nil } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { log.Printf("IS matches non-200 for %s: %d", detailURL, resp.StatusCode) return nil } // Read body so we can optionally save and then parse from memory body, err := io.ReadAll(resp.Body) if err != nil { log.Printf("IS matches read error for %s: %v", detailURL, err) return nil } if os.Getenv("DEBUG_SAVE_HTML") != "" { // name the file using the req (competition id) if present fname := "is_detail.html" if u, err := neturl.Parse(detailURL); err == nil { req := u.Query().Get("req") sport := u.Query().Get("sport") if req != "" { fname = fmt.Sprintf("is_comp_%s_%s.html", req, sport) } } if err := os.WriteFile(fname, body, 0644); err != nil { log.Printf("failed writing debug IS HTML %s: %v", fname, err) } else { log.Printf("saved debug IS HTML: %s", fname) } } docDetail, err := goquery.NewDocumentFromReader(bytes.NewReader(body)) if err != nil { log.Printf("IS matches parse error for %s: %v", detailURL, err) return nil } var matches []Match totalRows := 0 keptRows := 0 docDetail.Find("table.soutez-zapasy tr").Each(func(_ int, s *goquery.Selection) { if s.Find("th").Length() > 0 { return } tds := s.Find("td") if tds.Length() < 5 { return } totalRows++ getText := func(sel *goquery.Selection) string { return strings.TrimSpace(sel.Text()) } dt := getText(tds.Eq(0)) rawHome := getText(tds.Eq(1)) if idx := strings.Index(rawHome, "("); idx >= 0 { rawHome = strings.TrimSpace(rawHome[:idx]) } rawAway := getText(tds.Eq(2)) if idx := strings.Index(rawAway, "("); idx >= 0 { rawAway = strings.TrimSpace(rawAway[:idx]) } homeID := extractUUIDFromHref(tds.Eq(1).Find("a").First().AttrOr("href", "")) awayID := extractUUIDFromHref(tds.Eq(2).Find("a").First().AttrOr("href", "")) rawScore := getText(tds.Eq(3)) score := "" if re := regexp.MustCompile(`(\d+)\s*:\s*(\d+)`); re != nil { if m := re.FindStringSubmatch(rawScore); len(m) == 3 { score = fmt.Sprintf("%s:%s", m[1], m[2]) } } venue := "" if tds.Length() > 4 { venue = getText(tds.Eq(4)) } var reportURL, matchID string var isReportHref, isDelegHref string // Use the last column for links to be robust to optional columns tds.Eq(tds.Length() - 1).Find("a").Each(func(_ int, a *goquery.Selection) { href := strings.TrimSpace(a.AttrOr("href", "")) if href == "" { return } if u, err := neturl.Parse(href); err == nil { if id := u.Query().Get("zapas"); id != "" { matchID = id } } // Capture specific IS links if strings.Contains(href, "zapis-o-utkani-report.aspx") { isReportHref = resolveISURL(href) } if strings.Contains(href, "zapas-delegace-report.aspx") { isDelegHref = resolveISURL(href) } }) if matchID != "" { if strings.EqualFold(clubType, "futsal") { reportURL = fmt.Sprintf("https://www.fotbal.cz/futsal/zapasy/futsal/%s", matchID) } else { reportURL = fmt.Sprintf("https://www.fotbal.cz/souteze/zapasy/zapas/%s", matchID) } } // Canonical fotbal.cz link facrLink := reportURL // Filter by club involvement: prefer UUID match, fallback to name matching with simplified token if clubName != "" || clubID != "" { involved := false if clubID != "" && (strings.EqualFold(homeID, clubID) || strings.EqualFold(awayID, clubID)) { involved = true } else if clubName != "" { involved = strings.EqualFold(rawHome, clubName) || strings.EqualFold(rawAway, clubName) || containsFold(clubName, rawHome) || containsFold(clubName, rawAway) || containsFold(rawHome, clubName) || containsFold(rawAway, clubName) if !involved { token := simplifyClubQuery(clubName) if token != "" && (containsFold(rawHome, token) || containsFold(rawAway, token)) { involved = true } } } if !involved { return } } keptRows++ if homeID == "" { if strings.EqualFold(rawHome, clubName) || containsFold(rawHome, clubName) || containsFold(clubName, rawHome) { homeID = clubID } else { token := simplifyClubQuery(clubName) if token != "" && containsFold(rawHome, token) { homeID = clubID } } } if awayID == "" { if strings.EqualFold(rawAway, clubName) || containsFold(rawAway, clubName) || containsFold(clubName, rawAway) { awayID = clubID } else { token := simplifyClubQuery(clubName) if token != "" && containsFold(rawAway, token) { awayID = clubID } } } homeLogo := getLogo(rawHome, homeID) awayLogo := getLogo(rawAway, awayID) matches = append(matches, Match{DateTime: dt, Home: rawHome, HomeID: homeID, HomeLogoURL: homeLogo, Away: rawAway, AwayID: awayID, AwayLogoURL: awayLogo, Score: score, Venue: venue, MatchID: matchID, ReportURL: func() string { if isReportHref != "" { return isReportHref } return reportURL }(), FACRLink: facrLink, DelegationURL: isDelegHref}) }) if os.Getenv("DEBUG_SAVE_HTML") != "" { log.Printf("IS parse summary for %s: total rows=%d, kept=%d", detailURL, totalRows, keptRows) } return matches } var logoCache = map[string]string{} var missingLogoNotified = map[string]bool{} var missingLogoMu sync.Mutex type logoAPISearchResult struct { ID string `json:"id"` Name string `json:"name"` LogoURL string `json:"logo_url"` HasLocalLogo bool `json:"has_local_logo"` } type searchAPIResult struct { Results []struct { Name string `json:"name"` LogoURL string `json:"logo_url"` } `json:"results"` } // a simplified search token like "krnov" to improve chances of finding a logo. func simplifyClubQuery(name string) string { s := strings.TrimSpace(name) if s == "" { return "" } parts := strings.Fields(s) if len(parts) == 0 { return "" } // Walk from the end to find a meaningful token (avoid legal suffixes like "z.s.") stop := map[string]struct{}{ "z.s.": {}, "z.s": {}, "zs": {}, "zapsany": {}, "zapsaný": {}, "spolek": {}, "o.s.": {}, "o.s": {}, "os": {}, "a.s.": {}, "a.s": {}, "as": {}, "s.r.o.": {}, "s.r.o": {}, "sro": {}, } for i := len(parts) - 1; i >= 0; i-- { tok := parts[i] tok = strings.Trim(tok, ",.;:-()[]{}\"'`“”’") lt := strings.ToLower(tok) if _, banned := stop[lt]; banned { continue } // prefer tokens with letters and length >= 3 letters := regexp.MustCompile(`[a-zA-Zá-žÁ-Ž]`).MatchString if len([]rune(lt)) >= 3 && letters(lt) { return lt } } // Fallback to last token sanitized last := strings.Trim(parts[len(parts)-1], ",.;:-()[]{}\"'`“”’") return strings.ToLower(last) } func getLogoFromLogoAPI(teamName string, teamID string) string { base := strings.TrimSpace(os.Getenv("LOGOAPI_BASE_URL")) if base == "" { return "" } base = strings.TrimRight(base, "/") name := strings.TrimSpace(teamName) id := strings.TrimSpace(teamID) if name == "" && id == "" { return "" } cacheKey := "logoapi|" + strings.ToLower(name) if id != "" { cacheKey += "|" + strings.ToLower(id) } if v, ok := logoCache[cacheKey]; ok { return v } client := &http.Client{Timeout: 5 * time.Second} doSearch := func(q string) ([]logoAPISearchResult, bool) { q = strings.TrimSpace(q) if q == "" { return nil, false } u := fmt.Sprintf("%s/clubs/search-with-logos?q=%s", base, neturl.QueryEscape(q)) resp, err := client.Get(u) if err != nil { return nil, false } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { io.Copy(io.Discard, resp.Body) return nil, false } var payload []logoAPISearchResult if err := json.NewDecoder(resp.Body).Decode(&payload); err != nil { return nil, false } return payload, true } var candidates []logoAPISearchResult if id != "" { if payload, ok := doSearch(id); ok && len(payload) > 0 { candidates = payload } } if len(candidates) == 0 && name != "" { q := simplifyClubQuery(name) if q == "" { q = name } if payload, ok := doSearch(q); ok && len(payload) > 0 { candidates = payload } } if len(candidates) == 0 { logoCache[cacheKey] = "" return "" } var withLogo []logoAPISearchResult for _, r := range candidates { if r.HasLocalLogo { withLogo = append(withLogo, r) } } if len(withLogo) == 0 { logoCache[cacheKey] = "" return "" } var best string if id != "" { for _, r := range withLogo { if strings.EqualFold(strings.TrimSpace(r.ID), id) { best = r.LogoURL break } } } if best == "" && name != "" { for _, r := range withLogo { if strings.EqualFold(strings.TrimSpace(r.Name), name) { best = r.LogoURL break } } } if best == "" { best = withLogo[0].LogoURL } logoCache[cacheKey] = best return best } func getLogoBySearch(name string) string { key := strings.ToLower(strings.TrimSpace(name)) if key == "" { return "" } if v, ok := logoCache[key]; ok { return v } client := &http.Client{Timeout: 5 * time.Second} // Prefer simplified last-word token (e.g., "krnov") to improve hit rate for logos query := simplifyClubQuery(name) if query == "" { query = name } doSearch := func(q string) (searchAPIResult, bool) { url := fmt.Sprintf("http://localhost:8686/club/search?q=%s", neturl.QueryEscape(q)) resp, err := client.Get(url) if err != nil { return searchAPIResult{}, false } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { io.Copy(io.Discard, resp.Body) return searchAPIResult{}, false } var payload searchAPIResult if err := json.NewDecoder(resp.Body).Decode(&payload); err != nil { return searchAPIResult{}, false } return payload, true } payload, ok := doSearch(query) if !ok || len(payload.Results) == 0 { // Fallback to full name if simplified token yields nothing payload, ok = doSearch(name) if !ok { return "" } } // pick best match: exact (case-insensitive), then contains, else first best := "" for _, r := range payload.Results { if strings.EqualFold(strings.TrimSpace(r.Name), strings.TrimSpace(name)) { best = r.LogoURL break } } if best == "" { for _, r := range payload.Results { rname := strings.ToLower(r.Name) if strings.Contains(rname, key) || strings.Contains(key, rname) { best = r.LogoURL break } } } if best == "" && len(payload.Results) > 0 { best = payload.Results[0].LogoURL } logoCache[key] = best return best } func notifyMissingLogo(teamName string, teamID string) { name := strings.ToLower(strings.TrimSpace(teamName)) id := strings.ToLower(strings.TrimSpace(teamID)) if name == "" && id == "" { return } key := name if id != "" { if key != "" { key += "|" } key += id } missingLogoMu.Lock() if missingLogoNotified[key] { missingLogoMu.Unlock() return } missingLogoNotified[key] = true missingLogoMu.Unlock() if err := sendMissingLogoEmail(teamName, teamID); err != nil { log.Printf("error sending missing logo email for %s (%s): %v", teamName, teamID, err) } } func sendMissingLogoEmail(teamName string, teamID string) error { host := strings.TrimSpace(os.Getenv("SMTP_HOST")) if host == "" { host = "smtp.purelymail.com" } port := strings.TrimSpace(os.Getenv("SMTP_PORT")) if port == "" { port = "465" } user := strings.TrimSpace(os.Getenv("SMTP_USER")) pass := os.Getenv("SMTP_PASS") if user == "" || pass == "" { return nil } to := strings.TrimSpace(os.Getenv("MISSING_LOGO_NOTIFY_TO")) if to == "" { to = "info@tdvorak.dev" } addr := host + ":" + port subject := "Missing local logo for club" body := fmt.Sprintf("A club logo is missing in logoapi.\n\nName: %s\nID: %s\nTime: %s\n", teamName, teamID, time.Now().Format(time.RFC3339)) var msg bytes.Buffer msg.WriteString("From: " + user + "\r\n") msg.WriteString("To: " + to + "\r\n") msg.WriteString("Subject: " + subject + "\r\n") msg.WriteString("MIME-Version: 1.0\r\n") msg.WriteString("Content-Type: text/plain; charset=utf-8\r\n") msg.WriteString("\r\n") msg.WriteString(body) tlsConfig := &tls.Config{ServerName: host} conn, err := tls.Dial("tcp", addr, tlsConfig) if err != nil { return err } defer conn.Close() c, err := smtp.NewClient(conn, host) if err != nil { return err } defer c.Close() if err := c.Auth(smtp.PlainAuth("", user, pass, host)); err != nil { return err } if err := c.Mail(user); err != nil { return err } if err := c.Rcpt(to); err != nil { return err } w, err := c.Data() if err != nil { return err } if _, err := w.Write(msg.Bytes()); err != nil { return err } if err := w.Close(); err != nil { return err } return c.Quit() } func getLogo(teamName string, teamID string) string { placeholder := "https://www.fotbal.cz/dist/img/logo-club-empty.svg" name := strings.ToLower(strings.TrimSpace(teamName)) if name == "" || strings.Contains(name, "volno") || strings.Contains(name, "volný los") || strings.Contains(name, "volny los") || strings.Contains(name, "bye") { return placeholder } if logo := getLogoFromLogoAPI(teamName, teamID); logo != "" { return logo } notifyMissingLogo(teamName, teamID) // If we have a team ID, construct the official logo URL directly. // This avoids wrong matches for duplicate names (e.g., multiple "Ořechov"). if tid := strings.TrimSpace(teamID); tid != "" { return fmt.Sprintf("https://is1.fotbal.cz/media/kluby/%s/%s_crop.jpg", tid, tid) } // Otherwise, try the local FACR-based search endpoint by name. if logo := getLogoBySearch(teamName); logo != "" { return logo } // No ID and no search hit -> placeholder return placeholder } // CompetitionTable holds standings sections; currently only Overall is used type CompetitionTable struct { Overall []TableRow `json:"overall"` } // ClubInfo is the response for club info and tables endpoints type ClubInfo struct { Name string `json:"name"` ClubID string `json:"club_id"` ClubType string `json:"club_type"` ClubInternalID string `json:"club_internal_id,omitempty"` URL string `json:"url,omitempty"` LogoURL string `json:"logo_url,omitempty"` Address string `json:"address,omitempty"` Category string `json:"category,omitempty"` Competitions []Competition `json:"competitions"` } // SearchResult represents one club from fotbal.cz search type SearchResult struct { Name string `json:"name"` ClubID string `json:"club_id"` ClubType string `json:"club_type"` // football or futsal URL string `json:"url"` LogoURL string `json:"logo_url"` Category string `json:"category,omitempty"` Address string `json:"address,omitempty"` } // getClubSearch queries fotbal.cz club search and returns results with logo func getClubSearch(w http.ResponseWriter, r *http.Request) { q := strings.TrimSpace(r.URL.Query().Get("q")) if q == "" { http.Error(w, "query parameter 'q' is required", http.StatusBadRequest) return } // Build search URL vals := neturl.Values{} vals.Set("q", q) searchURL := "https://www.fotbal.cz/club/hledej?" + vals.Encode() req, err := http.NewRequest("GET", searchURL, nil) if err != nil { http.Error(w, fmt.Sprintf("Error creating request: %v", err), http.StatusInternalServerError) return } // Set headers to mimic a browser; fotbal.cz may 404 otherwise req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0 Safari/537.36") req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8") req.Header.Set("Accept-Language", "cs-CZ,cs;q=0.9,en;q=0.8") req.Header.Set("Referer", "https://www.fotbal.cz/club/hledej") client := &http.Client{} resp, err := client.Do(req) if err != nil { http.Error(w, fmt.Sprintf("Error fetching search page: %v", err), http.StatusInternalServerError) return } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { // Retry once. If query has very short tokens, try quoting the whole query. resp.Body.Close() searchURL2 := searchURL tokens := strings.Fields(q) for _, t := range tokens { if len([]rune(t)) <= 2 { vals2 := neturl.Values{} vals2.Set("q", "\""+q+"\"") searchURL2 = "https://www.fotbal.cz/club/hledej?" + vals2.Encode() break } } req2, _ := http.NewRequest("GET", searchURL2, nil) req2.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0 Safari/537.36") req2.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8") req2.Header.Set("Accept-Language", "en-US,en;q=0.9") resp2, err2 := client.Do(req2) if err2 != nil { http.Error(w, fmt.Sprintf("Error fetching (retry): %v", err2), http.StatusBadGateway) return } defer resp2.Body.Close() if resp2.StatusCode != http.StatusOK { // Treat as no results instead of surfacing error to client w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(map[string]any{ "query": q, "count": 0, "results": []SearchResult{}, }) return } // replace resp with resp2 for downstream parsing resp = resp2 } doc, err := goquery.NewDocumentFromReader(resp.Body) if err != nil { http.Error(w, fmt.Sprintf("Error parsing HTML: %v", err), http.StatusInternalServerError) return } var results []SearchResult // The page lists clubs in section "Výsledky hledání" as li.ListItemSplit doc.Find("li.ListItemSplit").Each(func(_ int, li *goquery.Selection) { a := li.Find("a.Link--inverted").First() href, _ := a.Attr("href") if href == "" { return } name := strings.TrimSpace(a.Find("span.H7").First().Text()) if name == "" { // fallback to link text name = strings.TrimSpace(a.Text()) } img := a.Find("img").First() logoURL, _ := img.Attr("src") // Category category := strings.TrimSpace(li.Find(".ClubCategories .BadgeCategory").First().Text()) // Address address := strings.TrimSpace(li.Find(".ClubAddress p").First().Text()) // Infer club type from href clubType := "football" if strings.Contains(strings.ToLower(href), "/futsal/") { clubType = "futsal" } // Extract club ID from last path segment // e.g., https://www.fotbal.cz/futsal/club/club/{uuid} parts := strings.Split(strings.TrimRight(href, "/"), "/") clubID := "" if len(parts) > 0 { clubID = parts[len(parts)-1] } // Normalize URL (ensure absolute) if !strings.HasPrefix(href, "http://") && !strings.HasPrefix(href, "https://") { href = "https://www.fotbal.cz" + href } results = append(results, SearchResult{ Name: name, ClubID: clubID, ClubType: clubType, URL: href, LogoURL: logoURL, Category: category, Address: address, }) }) w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(map[string]any{ "query": q, "count": len(results), "results": results, }) } // getClubTables returns club info with competition standings tables (no matches) func getClubTables(w http.ResponseWriter, r *http.Request) { vars := mux.Vars(r) clubID := vars["id"] clubType := vars["type"] if clubID == "" { http.Error(w, "Club ID is required", http.StatusBadRequest) return } // Validate club type var baseURL string var sportParam string switch clubType { case "football": baseURL = "https://www.fotbal.cz/souteze/club/club" sportParam = "fotbal" case "futsal": baseURL = "https://www.fotbal.cz/futsal/club/club" sportParam = "futsal" default: http.Error(w, "Invalid club type. Use 'football' or 'futsal'.", http.StatusBadRequest) return } url := fmt.Sprintf("%s/%s", baseURL, clubID) resp, err := http.Get(url) if err != nil { http.Error(w, fmt.Sprintf("Error fetching club data: %v", err), http.StatusInternalServerError) return } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { http.Error(w, fmt.Sprintf("Error: received status code %d", resp.StatusCode), resp.StatusCode) return } doc, err := goquery.NewDocumentFromReader(resp.Body) if err != nil { http.Error(w, fmt.Sprintf("Error parsing HTML: %v", err), http.StatusInternalServerError) return } // Extract club internal ID clubInternalID := "" doc.Find("section").Each(func(i int, s *goquery.Selection) { headerText := s.Find("h3 span").First().Text() if strings.TrimSpace(headerText) == "ID klubu" { clubInternalID = strings.TrimSpace(s.Find("ul li").First().Text()) } }) // Extract competitions var competitions []Competition doc.Find("table.Table tbody tr").Each(func(i int, s *goquery.Selection) { code := strings.TrimSpace(s.Find("td:first-child").Text()) nameLink := s.Find("td:nth-child(2) a") name := strings.TrimSpace(nameLink.Text()) teamCount := strings.TrimSpace(s.Find("td:nth-child(3)").Text()) // Extract competition ID from the link parts := strings.Split(nameLink.AttrOr("href", ""), "/") compID := "" if len(parts) >= 2 { compID = parts[len(parts)-1] } // Build public table link depending on clubType tableLink := "" if strings.EqualFold(clubType, "futsal") { tableLink = fmt.Sprintf("https://www.fotbal.cz/futsal/futsal/table/%s", compID) } else { tableLink = fmt.Sprintf("https://www.fotbal.cz/souteze/turnaje/table/%s", compID) } competitions = append(competitions, Competition{ ID: compID, Code: code, Name: name, TeamCount: teamCount, MatchesLink: tableLink, }) }) // For each competition, fetch the standings tables from is.fotbal.cz for i := range competitions { comp := &competitions[i] tableURL := fmt.Sprintf("https://is.fotbal.cz/public/souteze/tabulky-souteze.aspx?req=%s&sport=%s", comp.ID, sportParam) resp, err := http.Get(tableURL) if err != nil { log.Printf("error fetching competition table for %s: %v", comp.ID, err) continue } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { log.Printf("non-200 response for %s: %d", comp.ID, resp.StatusCode) continue } docTable, err := goquery.NewDocumentFromReader(resp.Body) if err != nil { log.Printf("error parsing table HTML for %s: %v", comp.ID, err) continue } // Parse section: Tabulka celková (only overall) var overall []TableRow parseSection := func(headerText string) []TableRow { var rows []TableRow // Find the h3 with matching text, then the following .list.tabulky table docTable.Find("h3").EachWithBreak(func(_ int, h3 *goquery.Selection) bool { if strings.EqualFold(strings.TrimSpace(h3.Text()), headerText) { list := h3.NextAllFiltered("div.list.tabulky").First() if list.Length() == 0 { return false } table := list.Find("table.vysledky-tabulky tbody") table.Find("tr").Each(func(_ int, tr *goquery.Selection) { // skip header rows containing th if tr.Find("th").Length() > 0 { return } tds := tr.Find("td") if tds.Length() < 8 { return } get := func(i int) string { return strings.TrimSpace(tds.Eq(i).Text()) } rank := get(0) team := get(1) teamID := extractUUIDFromHref(tds.Eq(1).Find("a").First().AttrOr("href", "")) played := get(2) wins := get(3) draws := get(4) losses := get(5) scoreRaw := get(6) // normalize score like "5 : 0" -> "5:0" score := scoreRaw if re := regexp.MustCompile(`\s*([0-9]+)\s*:\s*([0-9]+)\s*`); re != nil { if m := re.FindStringSubmatch(scoreRaw); len(m) == 3 { score = fmt.Sprintf("%s:%s", m[1], m[2]) } } points := get(7) rows = append(rows, TableRow{ Rank: rank, Team: team, TeamID: teamID, TeamLogoURL: getLogo(team, teamID), Played: played, Wins: wins, Draws: draws, Losses: losses, Score: score, Points: points, }) }) return false } return true }) return rows } overall = parseSection("Tabulka celková") comp.Table = &CompetitionTable{Overall: overall} } clubName := strings.TrimSpace(doc.Find("h1.H4 span").First().Text()) clubURL := strings.TrimSpace(doc.Find("h1.H4 a").First().AttrOr("href", "")) logoURL := strings.TrimSpace(doc.Find("img.Logo").First().AttrOr("src", "")) category := strings.TrimSpace(doc.Find("section").First().Find("h3 span").First().Text()) address := strings.TrimSpace(doc.Find("section").First().Find("ul li").First().Text()) clubInfo := ClubInfo{ Name: clubName, ClubID: clubID, ClubType: clubType, ClubInternalID: clubInternalID, URL: clubURL, LogoURL: logoURL, Address: address, Category: category, Competitions: competitions, } w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(clubInfo) } // getClubInfo returns club info with competitions and matches func getClubInfo(w http.ResponseWriter, r *http.Request) { vars := mux.Vars(r) clubID := vars["id"] clubType := vars["type"] if clubID == "" { http.Error(w, "Club ID is required", http.StatusBadRequest) return } var baseURL, sportParam string switch clubType { case "football": baseURL = "https://www.fotbal.cz/souteze/club/club" sportParam = "fotbal" case "futsal": baseURL = "https://www.fotbal.cz/futsal/club/club" sportParam = "futsal" default: http.Error(w, "Invalid club type. Use 'football' or 'futsal'.", http.StatusBadRequest) return } url := fmt.Sprintf("%s/%s", baseURL, clubID) resp, err := http.Get(url) if err != nil { http.Error(w, fmt.Sprintf("Error fetching club data: %v", err), http.StatusInternalServerError) return } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { http.Error(w, fmt.Sprintf("Error: received status code %d", resp.StatusCode), resp.StatusCode) return } doc, err := goquery.NewDocumentFromReader(resp.Body) if err != nil { http.Error(w, fmt.Sprintf("Error parsing HTML: %v", err), http.StatusInternalServerError) return } clubName := strings.TrimSpace(doc.Find("h1.H4 span").First().Text()) // Basic club metadata clubURL := fmt.Sprintf("%s/%s", baseURL, clubID) logoURL := fmt.Sprintf("https://is1.fotbal.cz/media/kluby/%s/%s_crop.jpg", clubID, clubID) category := "Fotbal" if strings.EqualFold(clubType, "futsal") { category = "Futsal" } // Internal ID clubInternalID := "" doc.Find("section").Each(func(_ int, s *goquery.Selection) { if strings.TrimSpace(s.Find("h3 span").First().Text()) == "ID klubu" { clubInternalID = strings.TrimSpace(s.Find("ul li").First().Text()) } }) // Address (best-effort) address := strings.TrimSpace(doc.Find(".ClubAddress p").First().Text()) // Competitions list var competitions []Competition doc.Find("table.Table tbody tr").Each(func(_ int, tr *goquery.Selection) { code := strings.TrimSpace(tr.Find("td:first-child").Text()) nameLink := tr.Find("td:nth-child(2) a") name := strings.TrimSpace(nameLink.Text()) teamCount := strings.TrimSpace(tr.Find("td:nth-child(3)").Text()) parts := strings.Split(strings.TrimSpace(nameLink.AttrOr("href", "")), "/") compID := "" if len(parts) >= 2 { compID = parts[len(parts)-1] } // Public table URL for convenience tableLink := "" if strings.EqualFold(clubType, "futsal") { tableLink = fmt.Sprintf("https://www.fotbal.cz/futsal/futsal/table/%s", compID) } else { tableLink = fmt.Sprintf("https://www.fotbal.cz/souteze/turnaje/table/%s", compID) } competitions = append(competitions, Competition{ID: compID, Code: code, Name: name, TeamCount: teamCount, MatchesLink: tableLink}) }) // For each competition, fetch matches for i := range competitions { comp := &competitions[i] matchesLink := comp.MatchesLink // 1) Try parsing from the public fotbal.cz competition page (matches_link) matches := parseCompetitionMatchesFromFotbal(matchesLink, clubType, clubName, clubID) // Always try IS as well and prefer it if it provides at least as many matches detailURL := fmt.Sprintf("https://is.fotbal.cz/public/souteze/detail-souteze.aspx?req=%s&sport=%s", comp.ID, sportParam) isMatches := parseCompetitionMatchesFromIS(detailURL, clubType, clubName, clubID) // Prefer IS whenever it yields any results, as IS often contains alias team names if len(isMatches) > 0 { matches = isMatches } comp.Matches = matches } clubInfo := ClubInfo{ Name: clubName, ClubID: clubID, ClubType: clubType, ClubInternalID: clubInternalID, URL: clubURL, LogoURL: logoURL, Address: address, Category: category, Competitions: competitions, } w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(clubInfo) } func main() { r := mux.NewRouter() r.HandleFunc("/club/{type}/{id}", getClubInfo).Methods("GET") r.HandleFunc("/club/{type}/{id}/table", getClubTables).Methods("GET") r.HandleFunc("/club/search", getClubSearch).Methods("GET") r.HandleFunc("/club/{id:[0-9a-fA-F-]+}", func(w http.ResponseWriter, r *http.Request) { vars := mux.Vars(r) http.Redirect(w, r, "/club/football/"+vars["id"], http.StatusMovedPermanently) }).Methods("GET") r.HandleFunc("/", docsHandler) port := ":8686" fmt.Printf("Server running on http://localhost%s\n", port) log.Fatal(http.ListenAndServe(port, r)) } // docsHandler serves a simple HTML API documentation at the root endpoint. func docsHandler(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "text/html; charset=utf-8") io.WriteString(w, ` FACR Scraper API Docs

FACR Scraper API

Status: ok — server is running.

Search Clubs

GET /club/search?q=QUERY

Find clubs on fotbal.cz. Supports football and futsal clubs.

Example: /club/search?q=Sparta

Response shape
{
  "query": "Sparta",
  "count": 2,
  "results": [
    {
      "name": "AC Sparta Praha",
      "club_id": "",
      "club_type": "football",
      "url": "https://www.fotbal.cz/...",
      "logo_url": "https://.../logo.png",
      "category": "Muži",
      "address": "..."
    }
  ]
}

Club Info + Matches

GET /club/{type}/{id}

Example: /club/football/{id}

Response shape
{
  "name": "AC Sparta Praha",
  "club_id": "00000000-0000-0000-0000-000000000000",
  "club_type": "football",
  "club_internal_id": "123456",
  "url": "https://www.fotbal.cz/...",
  "logo_url": "https://is1.fotbal.cz/media/kluby/.../logo.jpg",
  "address": "Milady Horákové 98, 160 00 Praha 6",
  "category": "Muži A",
  "competitions": [
    {
      "id": "12345",
      "code": "1. LIGA",
      "name": "Fortuna Liga",
      "team_count": "16",
      "matches_link": "https://www.fotbal.cz/...",
      "matches": [
        {
          "date_time": "12.08.2023 18:00",
          "home": "AC Sparta Praha",
          "home_id": "00000000-0000-0000-0000-000000000000",
          "home_logo_url": "https://.../sparta.png",
          "away": "SK Slavia Praha",
          "away_id": "11111111-1111-1111-1111-111111111111",
          "away_logo_url": "https://.../slavia.png",
          "score": "2:1",
          "venue": "Stadion Letná",
          "match_id": "match12345",
          "report_url": "https://www.fotbal.cz/..."
        }
      ]
    }
  ]
}

Club Tables (Standings)

GET /club/{type}/{id}/table

Returns standings (overall table) for each competition of the club.

Example: /club/football/{id}/table

Response shape
{
  "name": "AC Sparta Praha",
  "club_id": "00000000-0000-0000-0000-000000000000",
  "club_type": "football",
  "club_internal_id": "123456",
  "url": "https://www.fotbal.cz/...",
  "logo_url": "https://is1.fotbal.cz/media/kluby/.../logo.jpg",
  "competitions": [
    {
      "id": "12345",
      "code": "1. LIGA",
      "name": "Fortuna Liga",
      "team_count": "16",
      "matches_link": "https://www.fotbal.cz/...",
      "table": {
        "overall": [
          {
            "rank": "1",
            "team": "AC Sparta Praha",
            "team_id": "00000000-0000-0000-0000-000000000000",
            "team_logo_url": "https://.../sparta.png",
            "played": "10",
            "wins": "8",
            "draws": "2",
            "losses": "0",
            "score": "25:5",
            "points": "26"
          },
          {
            "rank": "2",
            "team": "SK Slavia Praha",
            "team_id": "11111111-1111-1111-1111-111111111111",
            "team_logo_url": "https://.../slavia.png",
            "played": "10",
            "wins": "7",
            "draws": "2",
            "losses": "1",
            "score": "20:8",
            "points": "23"
          }
        ]
      }
    }
  ]
}

Shortcuts

GET /club/{id} → redirects to /club/football/{id}

`) } func containsFold(s, substr string) bool { s = strings.ToLower(strings.TrimSpace(s)) substr = strings.ToLower(strings.TrimSpace(substr)) if substr == "" { return false } return strings.Contains(s, substr) } // extractUUIDFromHref finds the first UUID-like token in an href and returns it. func extractUUIDFromHref(href string) string { href = strings.TrimSpace(href) if href == "" { return "" } re := regexp.MustCompile(`[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}`) if m := re.FindString(href); m != "" { return m } // Fallback: some links may end with ID after slash; take last path token if it looks like hex+hyphenated parts := strings.Split(href, "/") if len(parts) > 0 { cand := parts[len(parts)-1] if re.MatchString(cand) { return cand } } return "" } type Match struct { DateTime string `json:"date_time"` Home string `json:"home"` HomeID string `json:"home_id,omitempty"` HomeLogoURL string `json:"home_logo_url,omitempty"` Away string `json:"away"` AwayID string `json:"away_id,omitempty"` AwayLogoURL string `json:"away_logo_url,omitempty"` Score string `json:"score"` Venue string `json:"venue"` Note string `json:"note,omitempty"` MatchID string `json:"match_id"` ReportURL string `json:"report_url,omitempty"` FACRLink string `json:"facr_link,omitempty"` DelegationURL string `json:"delegation_url,omitempty"` } // TableRow represents one row in a standings table type TableRow struct { Rank string `json:"rank"` Team string `json:"team"` TeamID string `json:"team_id,omitempty"` TeamLogoURL string `json:"team_logo_url,omitempty"` Played string `json:"played"` Wins string `json:"wins"` Draws string `json:"draws"` Losses string `json:"losses"` Score string `json:"score"` Points string `json:"points"` } // resolveISURL makes relative IS links absolute against https://is.fotbal.cz/public/ func resolveISURL(href string) string { href = strings.TrimSpace(href) if strings.HasPrefix(href, "http://") || strings.HasPrefix(href, "https://") { if u, err := neturl.Parse(href); err == nil { u.Scheme = "https" u.Host = "is.fotbal.cz" if !strings.HasPrefix(u.Path, "/public/") { if strings.HasPrefix(u.Path, "/zapasy/") { u.Path = "/public" + u.Path } } q := u.Query() q.Del("discipline") u.RawQuery = q.Encode() return u.String() } return href } href = strings.TrimPrefix(href, "./") for strings.HasPrefix(href, "../") { href = strings.TrimPrefix(href, "../") } if strings.HasPrefix(href, "/") { href = strings.TrimPrefix(href, "/") } path := "/public/" + href u := neturl.URL{Scheme: "https", Host: "is.fotbal.cz", Path: path} return u.String() }