This commit is contained in:
Tomáš Dvořák
2025-08-26 08:07:56 +02:00
parent 770c970e49
commit 0bbf432b9a
2 changed files with 1938 additions and 107 deletions
+1821
View File
File diff suppressed because it is too large Load Diff
+42 -32
View File
@@ -145,7 +145,7 @@ func parseCompetitionMatchesFromFotbal(pageURL, clubType, clubName, clubID strin
reportURL = fmt.Sprintf("https://www.fotbal.cz/souteze/zapasy/zapas/%s", matchID) reportURL = fmt.Sprintf("https://www.fotbal.cz/souteze/zapasy/zapas/%s", matchID)
} }
} }
// Filter by club involvement: prefer UUID match, fallback to name matching including last-word token // Filter by club involvement: prefer UUID match, fallback to name matching including simplified token
if clubName != "" || clubID != "" { if clubName != "" || clubID != "" {
involved := false involved := false
// If we could extract team UUIDs, match by ID first (robust against aliases) // If we could extract team UUIDs, match by ID first (robust against aliases)
@@ -156,19 +156,14 @@ func parseCompetitionMatchesFromFotbal(pageURL, clubType, clubName, clubID strin
involved = strings.EqualFold(home, clubName) || strings.EqualFold(away, clubName) || involved = strings.EqualFold(home, clubName) || strings.EqualFold(away, clubName) ||
containsFold(clubName, home) || containsFold(clubName, away) || containsFold(clubName, home) || containsFold(clubName, away) ||
containsFold(home, clubName) || containsFold(away, clubName) containsFold(home, clubName) || containsFold(away, clubName)
// As a last resort, try matching the last word (e.g., city) token of the club name // As a last resort, try matching a simplified token (e.g., city) of the club name
if !involved { if !involved {
parts := strings.Fields(strings.TrimSpace(clubName)) token := simplifyClubQuery(clubName)
if len(parts) > 0 { if token != "" && (containsFold(home, token) || containsFold(away, token)) {
last := parts[len(parts)-1]
if last != "" {
if containsFold(home, last) || containsFold(away, last) {
involved = true involved = true
} }
} }
} }
}
}
if !involved { return } if !involved { return }
} }
// Backfill IDs for current club if missing // Backfill IDs for current club if missing
@@ -176,28 +171,22 @@ func parseCompetitionMatchesFromFotbal(pageURL, clubType, clubName, clubID strin
if strings.EqualFold(home, clubName) || containsFold(home, clubName) || containsFold(clubName, home) { if strings.EqualFold(home, clubName) || containsFold(home, clubName) || containsFold(clubName, home) {
homeID = clubID homeID = clubID
} else { } else {
parts := strings.Fields(strings.TrimSpace(clubName)) token := simplifyClubQuery(clubName)
if len(parts) > 0 { if token != "" && containsFold(home, token) {
last := parts[len(parts)-1]
if last != "" && containsFold(home, last) {
homeID = clubID homeID = clubID
} }
} }
} }
}
if awayID == "" { if awayID == "" {
if strings.EqualFold(away, clubName) || containsFold(away, clubName) || containsFold(clubName, away) { if strings.EqualFold(away, clubName) || containsFold(away, clubName) || containsFold(clubName, away) {
awayID = clubID awayID = clubID
} else { } else {
parts := strings.Fields(strings.TrimSpace(clubName)) token := simplifyClubQuery(clubName)
if len(parts) > 0 { if token != "" && containsFold(away, token) {
last := parts[len(parts)-1]
if last != "" && containsFold(away, last) {
awayID = clubID awayID = clubID
} }
} }
} }
}
homeLogo := getLogo(home, homeID) homeLogo := getLogo(home, homeID)
awayLogo := getLogo(away, awayID) awayLogo := getLogo(away, awayID)
matches = append(matches, Match{ matches = append(matches, Match{
@@ -299,7 +288,7 @@ func parseCompetitionMatchesFromIS(detailURL, clubType, clubName, clubID string)
reportURL = fmt.Sprintf("https://www.fotbal.cz/souteze/zapasy/zapas/%s", matchID) reportURL = fmt.Sprintf("https://www.fotbal.cz/souteze/zapasy/zapas/%s", matchID)
} }
} }
// Filter by club involvement: prefer UUID match, fallback to name matching // Filter by club involvement: prefer UUID match, fallback to name matching with simplified token
if clubName != "" || clubID != "" { if clubName != "" || clubID != "" {
involved := false involved := false
if clubID != "" && (strings.EqualFold(homeID, clubID) || strings.EqualFold(awayID, clubID)) { if clubID != "" && (strings.EqualFold(homeID, clubID) || strings.EqualFold(awayID, clubID)) {
@@ -309,23 +298,26 @@ func parseCompetitionMatchesFromIS(detailURL, clubType, clubName, clubID string)
containsFold(clubName, rawHome) || containsFold(clubName, rawAway) || containsFold(clubName, rawHome) || containsFold(clubName, rawAway) ||
containsFold(rawHome, clubName) || containsFold(rawAway, clubName) containsFold(rawHome, clubName) || containsFold(rawAway, clubName)
if !involved { if !involved {
parts := strings.Fields(strings.TrimSpace(clubName)) token := simplifyClubQuery(clubName)
if len(parts) > 0 { if token != "" && (containsFold(rawHome, token) || containsFold(rawAway, token)) {
last := parts[len(parts)-1]
if last != "" && (containsFold(rawHome, last) || containsFold(rawAway, last)) {
involved = true involved = true
} }
} }
} }
}
if !involved { return } if !involved { return }
} }
keptRows++ keptRows++
if homeID == "" { if homeID == "" {
if strings.EqualFold(rawHome, clubName) || containsFold(rawHome, clubName) || containsFold(clubName, rawHome) { homeID = clubID } if strings.EqualFold(rawHome, clubName) || containsFold(rawHome, clubName) || containsFold(clubName, rawHome) { homeID = clubID } else {
token := simplifyClubQuery(clubName)
if token != "" && containsFold(rawHome, token) { homeID = clubID }
}
} }
if awayID == "" { if awayID == "" {
if strings.EqualFold(rawAway, clubName) || containsFold(rawAway, clubName) || containsFold(clubName, rawAway) { awayID = clubID } if strings.EqualFold(rawAway, clubName) || containsFold(rawAway, clubName) || containsFold(clubName, rawAway) { awayID = clubID } else {
token := simplifyClubQuery(clubName)
if token != "" && containsFold(rawAway, token) { awayID = clubID }
}
} }
homeLogo := getLogo(rawHome, homeID) homeLogo := getLogo(rawHome, homeID)
awayLogo := getLogo(rawAway, awayID) awayLogo := getLogo(rawAway, awayID)
@@ -345,7 +337,6 @@ type searchAPIResult struct {
} `json:"results"` } `json:"results"`
} }
// simplifyClubQuery takes a full club name like "FK Kofola Krnov" and returns
// a simplified search token like "krnov" to improve chances of finding a logo. // a simplified search token like "krnov" to improve chances of finding a logo.
func simplifyClubQuery(name string) string { func simplifyClubQuery(name string) string {
s := strings.TrimSpace(name) s := strings.TrimSpace(name)
@@ -356,9 +347,27 @@ func simplifyClubQuery(name string) string {
if len(parts) == 0 { if len(parts) == 0 {
return "" return ""
} }
// Use the last word (often the city), strip simple punctuation, lowercased // Walk from the end to find a meaningful token (avoid legal suffixes like "z.s.")
last := parts[len(parts)-1] stop := map[string]struct{}{
last = strings.Trim(last, ",.;:-()[]{}\"'`“”’") "z.s.": {}, "z.s": {}, "zs": {}, "zapsany": {}, "zapsaný": {}, "spolek": {},
"o.s.": {}, "o.s": {}, "os": {}, "a.s.": {}, "a.s": {}, "as": {},
"s.r.o.": {}, "s.r.o": {}, "sro": {},
}
for i := len(parts) - 1; i >= 0; i-- {
tok := parts[i]
tok = strings.Trim(tok, ",.;:-()[]{}\"'`“”’")
lt := strings.ToLower(tok)
if _, banned := stop[lt]; banned {
continue
}
// prefer tokens with letters and length >= 3
letters := regexp.MustCompile(`[a-zA-Zá-žÁ-Ž]`).MatchString
if len([]rune(lt)) >= 3 && letters(lt) {
return lt
}
}
// Fallback to last token sanitized
last := strings.Trim(parts[len(parts)-1], ",.;:-()[]{}\"'`“”’")
return strings.ToLower(last) return strings.ToLower(last)
} }
@@ -413,7 +422,8 @@ func getLogoBySearch(name string) string {
} }
if best == "" { if best == "" {
for _, r := range payload.Results { for _, r := range payload.Results {
if strings.Contains(strings.ToLower(r.Name), key) || strings.Contains(key, strings.ToLower(r.Name)) { rname := strings.ToLower(r.Name)
if strings.Contains(rname, key) || strings.Contains(key, rname) {
best = r.LogoURL best = r.LogoURL
break break
} }