This commit is contained in:
Tomas Dvorak
2025-12-01 10:29:42 +01:00
parent 8ea7df2410
commit 4ebbf1155c
3 changed files with 700 additions and 2279 deletions
+6
View File
@@ -0,0 +1,6 @@
export LOGOAPI_BASE_URL="http://localhost:8080" # or your real logoapi base URL
export SMTP_HOST="smtp.purelymail.com"
export SMTP_PORT="465"
export SMTP_USER="info@tdvorak.dev"
export SMTP_PASS="%8s3Yad*!b3*t"
export MISSING_LOGO_NOTIFY_TO="info@tdvorak.dev"
-1821
View File
File diff suppressed because it is too large Load Diff
+258 -22
View File
@@ -2,15 +2,18 @@ package main
import ( import (
"bytes" "bytes"
"crypto/tls"
"encoding/json" "encoding/json"
"fmt" "fmt"
"io" "io"
"log" "log"
"net/http" "net/http"
"net/smtp"
neturl "net/url" neturl "net/url"
"os" "os"
"regexp" "regexp"
"strings" "strings"
"sync"
"time" "time"
"github.com/PuerkitoBio/goquery" "github.com/PuerkitoBio/goquery"
@@ -101,14 +104,20 @@ func parseCompetitionMatchesFromFotbal(pageURL, clubType, clubName, clubID strin
imgIDs := []string{} imgIDs := []string{}
li.Find("a.MatchRound-match img").Each(func(_ int, img *goquery.Selection) { li.Find("a.MatchRound-match img").Each(func(_ int, img *goquery.Selection) {
src := strings.TrimSpace(img.AttrOr("src", "")) src := strings.TrimSpace(img.AttrOr("src", ""))
if src == "" { return } if src == "" {
return
}
if id := extractUUIDFromHref(src); id != "" { if id := extractUUIDFromHref(src); id != "" {
imgIDs = append(imgIDs, id) imgIDs = append(imgIDs, id)
} }
}) })
homeID, awayID := "", "" homeID, awayID := "", ""
if len(imgIDs) >= 1 { homeID = imgIDs[0] } if len(imgIDs) >= 1 {
if len(imgIDs) >= 2 { awayID = imgIDs[1] } homeID = imgIDs[0]
}
if len(imgIDs) >= 2 {
awayID = imgIDs[1]
}
// Score // Score
score := strings.TrimSpace(a.Find("strong.H4").First().Text()) score := strings.TrimSpace(a.Find("strong.H4").First().Text())
if re := regexp.MustCompile(`\s*([0-9]+)\s*:\s*([0-9]+)\s*`); re != nil { if re := regexp.MustCompile(`\s*([0-9]+)\s*:\s*([0-9]+)\s*`); re != nil {
@@ -164,7 +173,9 @@ func parseCompetitionMatchesFromFotbal(pageURL, clubType, clubName, clubID strin
} }
} }
} }
if !involved { return } if !involved {
return
}
} }
// Backfill IDs for current club if missing // Backfill IDs for current club if missing
if homeID == "" { if homeID == "" {
@@ -246,33 +257,49 @@ func parseCompetitionMatchesFromIS(detailURL, clubType, clubName, clubID string)
totalRows := 0 totalRows := 0
keptRows := 0 keptRows := 0
docDetail.Find("table.soutez-zapasy tr").Each(func(_ int, s *goquery.Selection) { docDetail.Find("table.soutez-zapasy tr").Each(func(_ int, s *goquery.Selection) {
if s.Find("th").Length() > 0 { return } if s.Find("th").Length() > 0 {
return
}
tds := s.Find("td") tds := s.Find("td")
if tds.Length() < 5 { return } if tds.Length() < 5 {
return
}
totalRows++ totalRows++
getText := func(sel *goquery.Selection) string { return strings.TrimSpace(sel.Text()) } getText := func(sel *goquery.Selection) string { return strings.TrimSpace(sel.Text()) }
dt := getText(tds.Eq(0)) dt := getText(tds.Eq(0))
rawHome := getText(tds.Eq(1)) rawHome := getText(tds.Eq(1))
if idx := strings.Index(rawHome, "("); idx >= 0 { rawHome = strings.TrimSpace(rawHome[:idx]) } if idx := strings.Index(rawHome, "("); idx >= 0 {
rawHome = strings.TrimSpace(rawHome[:idx])
}
rawAway := getText(tds.Eq(2)) rawAway := getText(tds.Eq(2))
if idx := strings.Index(rawAway, "("); idx >= 0 { rawAway = strings.TrimSpace(rawAway[:idx]) } if idx := strings.Index(rawAway, "("); idx >= 0 {
rawAway = strings.TrimSpace(rawAway[:idx])
}
homeID := extractUUIDFromHref(tds.Eq(1).Find("a").First().AttrOr("href", "")) homeID := extractUUIDFromHref(tds.Eq(1).Find("a").First().AttrOr("href", ""))
awayID := extractUUIDFromHref(tds.Eq(2).Find("a").First().AttrOr("href", "")) awayID := extractUUIDFromHref(tds.Eq(2).Find("a").First().AttrOr("href", ""))
rawScore := getText(tds.Eq(3)) rawScore := getText(tds.Eq(3))
score := "" score := ""
if re := regexp.MustCompile(`(\d+)\s*:\s*(\d+)`); re != nil { if re := regexp.MustCompile(`(\d+)\s*:\s*(\d+)`); re != nil {
if m := re.FindStringSubmatch(rawScore); len(m) == 3 { score = fmt.Sprintf("%s:%s", m[1], m[2]) } if m := re.FindStringSubmatch(rawScore); len(m) == 3 {
score = fmt.Sprintf("%s:%s", m[1], m[2])
}
} }
venue := "" venue := ""
if tds.Length() > 4 { venue = getText(tds.Eq(4)) } if tds.Length() > 4 {
venue = getText(tds.Eq(4))
}
var reportURL, matchID string var reportURL, matchID string
var isReportHref, isDelegHref string var isReportHref, isDelegHref string
// Use the last column for links to be robust to optional columns // Use the last column for links to be robust to optional columns
tds.Eq(tds.Length()-1).Find("a").Each(func(_ int, a *goquery.Selection) { tds.Eq(tds.Length() - 1).Find("a").Each(func(_ int, a *goquery.Selection) {
href := strings.TrimSpace(a.AttrOr("href", "")) href := strings.TrimSpace(a.AttrOr("href", ""))
if href == "" { return } if href == "" {
return
}
if u, err := neturl.Parse(href); err == nil { if u, err := neturl.Parse(href); err == nil {
if id := u.Query().Get("zapas"); id != "" { matchID = id } if id := u.Query().Get("zapas"); id != "" {
matchID = id
}
} }
// Capture specific IS links // Capture specific IS links
if strings.Contains(href, "zapis-o-utkani-report.aspx") { if strings.Contains(href, "zapis-o-utkani-report.aspx") {
@@ -307,32 +334,59 @@ func parseCompetitionMatchesFromIS(detailURL, clubType, clubName, clubID string)
} }
} }
} }
if !involved { return } if !involved {
return
}
} }
keptRows++ keptRows++
if homeID == "" { if homeID == "" {
if strings.EqualFold(rawHome, clubName) || containsFold(rawHome, clubName) || containsFold(clubName, rawHome) { homeID = clubID } else { if strings.EqualFold(rawHome, clubName) || containsFold(rawHome, clubName) || containsFold(clubName, rawHome) {
homeID = clubID
} else {
token := simplifyClubQuery(clubName) token := simplifyClubQuery(clubName)
if token != "" && containsFold(rawHome, token) { homeID = clubID } if token != "" && containsFold(rawHome, token) {
homeID = clubID
}
} }
} }
if awayID == "" { if awayID == "" {
if strings.EqualFold(rawAway, clubName) || containsFold(rawAway, clubName) || containsFold(clubName, rawAway) { awayID = clubID } else { if strings.EqualFold(rawAway, clubName) || containsFold(rawAway, clubName) || containsFold(clubName, rawAway) {
awayID = clubID
} else {
token := simplifyClubQuery(clubName) token := simplifyClubQuery(clubName)
if token != "" && containsFold(rawAway, token) { awayID = clubID } if token != "" && containsFold(rawAway, token) {
awayID = clubID
}
} }
} }
homeLogo := getLogo(rawHome, homeID) homeLogo := getLogo(rawHome, homeID)
awayLogo := getLogo(rawAway, awayID) awayLogo := getLogo(rawAway, awayID)
matches = append(matches, Match{DateTime: dt, Home: rawHome, HomeID: homeID, HomeLogoURL: homeLogo, Away: rawAway, AwayID: awayID, AwayLogoURL: awayLogo, Score: score, Venue: venue, MatchID: matchID, ReportURL: func() string { if isReportHref != "" { return isReportHref }; return reportURL }(), FACRLink: facrLink, DelegationURL: isDelegHref}) matches = append(matches, Match{DateTime: dt, Home: rawHome, HomeID: homeID, HomeLogoURL: homeLogo, Away: rawAway, AwayID: awayID, AwayLogoURL: awayLogo, Score: score, Venue: venue, MatchID: matchID, ReportURL: func() string {
if isReportHref != "" {
return isReportHref
}
return reportURL
}(), FACRLink: facrLink, DelegationURL: isDelegHref})
}) })
if os.Getenv("DEBUG_SAVE_HTML") != "" { if os.Getenv("DEBUG_SAVE_HTML") != "" {
log.Printf("IS parse summary for %s: total rows=%d, kept=%d", detailURL, totalRows, keptRows) log.Printf("IS parse summary for %s: total rows=%d, kept=%d", detailURL, totalRows, keptRows)
} }
return matches return matches
} }
var logoCache = map[string]string{} var logoCache = map[string]string{}
var missingLogoNotified = map[string]bool{}
var missingLogoMu sync.Mutex
type logoAPISearchResult struct {
ID string `json:"id"`
Name string `json:"name"`
LogoURL string `json:"logo_url"`
HasLocalLogo bool `json:"has_local_logo"`
}
type searchAPIResult struct { type searchAPIResult struct {
Results []struct { Results []struct {
Name string `json:"name"` Name string `json:"name"`
@@ -374,6 +428,99 @@ func simplifyClubQuery(name string) string {
return strings.ToLower(last) return strings.ToLower(last)
} }
func getLogoFromLogoAPI(teamName string, teamID string) string {
base := strings.TrimSpace(os.Getenv("LOGOAPI_BASE_URL"))
if base == "" {
return ""
}
base = strings.TrimRight(base, "/")
name := strings.TrimSpace(teamName)
id := strings.TrimSpace(teamID)
if name == "" && id == "" {
return ""
}
cacheKey := "logoapi|" + strings.ToLower(name)
if id != "" {
cacheKey += "|" + strings.ToLower(id)
}
if v, ok := logoCache[cacheKey]; ok {
return v
}
client := &http.Client{Timeout: 5 * time.Second}
doSearch := func(q string) ([]logoAPISearchResult, bool) {
q = strings.TrimSpace(q)
if q == "" {
return nil, false
}
u := fmt.Sprintf("%s/clubs/search-with-logos?q=%s", base, neturl.QueryEscape(q))
resp, err := client.Get(u)
if err != nil {
return nil, false
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
io.Copy(io.Discard, resp.Body)
return nil, false
}
var payload []logoAPISearchResult
if err := json.NewDecoder(resp.Body).Decode(&payload); err != nil {
return nil, false
}
return payload, true
}
var candidates []logoAPISearchResult
if id != "" {
if payload, ok := doSearch(id); ok && len(payload) > 0 {
candidates = payload
}
}
if len(candidates) == 0 && name != "" {
q := simplifyClubQuery(name)
if q == "" {
q = name
}
if payload, ok := doSearch(q); ok && len(payload) > 0 {
candidates = payload
}
}
if len(candidates) == 0 {
logoCache[cacheKey] = ""
return ""
}
var withLogo []logoAPISearchResult
for _, r := range candidates {
if r.HasLocalLogo {
withLogo = append(withLogo, r)
}
}
if len(withLogo) == 0 {
logoCache[cacheKey] = ""
return ""
}
var best string
if id != "" {
for _, r := range withLogo {
if strings.EqualFold(strings.TrimSpace(r.ID), id) {
best = r.LogoURL
break
}
}
}
if best == "" && name != "" {
for _, r := range withLogo {
if strings.EqualFold(strings.TrimSpace(r.Name), name) {
best = r.LogoURL
break
}
}
}
if best == "" {
best = withLogo[0].LogoURL
}
logoCache[cacheKey] = best
return best
}
func getLogoBySearch(name string) string { func getLogoBySearch(name string) string {
key := strings.ToLower(strings.TrimSpace(name)) key := strings.ToLower(strings.TrimSpace(name))
if key == "" { if key == "" {
@@ -388,7 +535,6 @@ func getLogoBySearch(name string) string {
if query == "" { if query == "" {
query = name query = name
} }
doSearch := func(q string) (searchAPIResult, bool) { doSearch := func(q string) (searchAPIResult, bool) {
url := fmt.Sprintf("http://localhost:8080/club/search?q=%s", neturl.QueryEscape(q)) url := fmt.Sprintf("http://localhost:8080/club/search?q=%s", neturl.QueryEscape(q))
resp, err := client.Get(url) resp, err := client.Get(url)
@@ -406,7 +552,6 @@ func getLogoBySearch(name string) string {
} }
return payload, true return payload, true
} }
payload, ok := doSearch(query) payload, ok := doSearch(query)
if !ok || len(payload.Results) == 0 { if !ok || len(payload.Results) == 0 {
// Fallback to full name if simplified token yields nothing // Fallback to full name if simplified token yields nothing
@@ -439,18 +584,109 @@ func getLogoBySearch(name string) string {
return best return best
} }
func notifyMissingLogo(teamName string, teamID string) {
name := strings.ToLower(strings.TrimSpace(teamName))
id := strings.ToLower(strings.TrimSpace(teamID))
if name == "" && id == "" {
return
}
key := name
if id != "" {
if key != "" {
key += "|"
}
key += id
}
missingLogoMu.Lock()
if missingLogoNotified[key] {
missingLogoMu.Unlock()
return
}
missingLogoNotified[key] = true
missingLogoMu.Unlock()
if err := sendMissingLogoEmail(teamName, teamID); err != nil {
log.Printf("error sending missing logo email for %s (%s): %v", teamName, teamID, err)
}
}
func sendMissingLogoEmail(teamName string, teamID string) error {
host := strings.TrimSpace(os.Getenv("SMTP_HOST"))
if host == "" {
host = "smtp.purelymail.com"
}
port := strings.TrimSpace(os.Getenv("SMTP_PORT"))
if port == "" {
port = "465"
}
user := strings.TrimSpace(os.Getenv("SMTP_USER"))
pass := os.Getenv("SMTP_PASS")
if user == "" || pass == "" {
return nil
}
to := strings.TrimSpace(os.Getenv("MISSING_LOGO_NOTIFY_TO"))
if to == "" {
to = "info@tdvorak.dev"
}
addr := host + ":" + port
subject := "Missing local logo for club"
body := fmt.Sprintf("A club logo is missing in logoapi.\n\nName: %s\nID: %s\nTime: %s\n", teamName, teamID, time.Now().Format(time.RFC3339))
var msg bytes.Buffer
msg.WriteString("From: " + user + "\r\n")
msg.WriteString("To: " + to + "\r\n")
msg.WriteString("Subject: " + subject + "\r\n")
msg.WriteString("MIME-Version: 1.0\r\n")
msg.WriteString("Content-Type: text/plain; charset=utf-8\r\n")
msg.WriteString("\r\n")
msg.WriteString(body)
tlsConfig := &tls.Config{ServerName: host}
conn, err := tls.Dial("tcp", addr, tlsConfig)
if err != nil {
return err
}
defer conn.Close()
c, err := smtp.NewClient(conn, host)
if err != nil {
return err
}
defer c.Close()
if err := c.Auth(smtp.PlainAuth("", user, pass, host)); err != nil {
return err
}
if err := c.Mail(user); err != nil {
return err
}
if err := c.Rcpt(to); err != nil {
return err
}
w, err := c.Data()
if err != nil {
return err
}
if _, err := w.Write(msg.Bytes()); err != nil {
return err
}
if err := w.Close(); err != nil {
return err
}
return c.Quit()
}
func getLogo(teamName string, teamID string) string { func getLogo(teamName string, teamID string) string {
placeholder := "https://www.fotbal.cz/dist/img/logo-club-empty.svg" placeholder := "https://www.fotbal.cz/dist/img/logo-club-empty.svg"
name := strings.ToLower(strings.TrimSpace(teamName)) name := strings.ToLower(strings.TrimSpace(teamName))
if name == "" || strings.Contains(name, "volno") || strings.Contains(name, "volný los") || strings.Contains(name, "volny los") || strings.Contains(name, "bye") { if name == "" || strings.Contains(name, "volno") || strings.Contains(name, "volný los") || strings.Contains(name, "volny los") || strings.Contains(name, "bye") {
return placeholder return placeholder
} }
if logo := getLogoFromLogoAPI(teamName, teamID); logo != "" {
return logo
}
notifyMissingLogo(teamName, teamID)
// If we have a team ID, construct the official logo URL directly. // If we have a team ID, construct the official logo URL directly.
// This avoids wrong matches for duplicate names (e.g., multiple "Ořechov"). // This avoids wrong matches for duplicate names (e.g., multiple "Ořechov").
if tid := strings.TrimSpace(teamID); tid != "" { if tid := strings.TrimSpace(teamID); tid != "" {
return fmt.Sprintf("https://is1.fotbal.cz/media/kluby/%s/%s_crop.jpg", tid, tid) return fmt.Sprintf("https://is1.fotbal.cz/media/kluby/%s/%s_crop.jpg", tid, tid)
} }
// Otherwise, try the local search endpoint by name. // Otherwise, try the local FACR-based search endpoint by name.
if logo := getLogoBySearch(teamName); logo != "" { if logo := getLogoBySearch(teamName); logo != "" {
return logo return logo
} }