This commit is contained in:
Tomáš Dvořák
2025-08-26 08:07:56 +02:00
parent 770c970e49
commit 0bbf432b9a
2 changed files with 1938 additions and 107 deletions
+1821
View File
File diff suppressed because it is too large Load Diff
+117 -107
View File
@@ -145,7 +145,7 @@ func parseCompetitionMatchesFromFotbal(pageURL, clubType, clubName, clubID strin
reportURL = fmt.Sprintf("https://www.fotbal.cz/souteze/zapasy/zapas/%s", matchID) reportURL = fmt.Sprintf("https://www.fotbal.cz/souteze/zapasy/zapas/%s", matchID)
} }
} }
// Filter by club involvement: prefer UUID match, fallback to name matching including last-word token // Filter by club involvement: prefer UUID match, fallback to name matching including simplified token
if clubName != "" || clubID != "" { if clubName != "" || clubID != "" {
involved := false involved := false
// If we could extract team UUIDs, match by ID first (robust against aliases) // If we could extract team UUIDs, match by ID first (robust against aliases)
@@ -156,16 +156,11 @@ func parseCompetitionMatchesFromFotbal(pageURL, clubType, clubName, clubID strin
involved = strings.EqualFold(home, clubName) || strings.EqualFold(away, clubName) || involved = strings.EqualFold(home, clubName) || strings.EqualFold(away, clubName) ||
containsFold(clubName, home) || containsFold(clubName, away) || containsFold(clubName, home) || containsFold(clubName, away) ||
containsFold(home, clubName) || containsFold(away, clubName) containsFold(home, clubName) || containsFold(away, clubName)
// As a last resort, try matching the last word (e.g., city) token of the club name // As a last resort, try matching a simplified token (e.g., city) of the club name
if !involved { if !involved {
parts := strings.Fields(strings.TrimSpace(clubName)) token := simplifyClubQuery(clubName)
if len(parts) > 0 { if token != "" && (containsFold(home, token) || containsFold(away, token)) {
last := parts[len(parts)-1] involved = true
if last != "" {
if containsFold(home, last) || containsFold(away, last) {
involved = true
}
}
} }
} }
} }
@@ -176,12 +171,9 @@ func parseCompetitionMatchesFromFotbal(pageURL, clubType, clubName, clubID strin
if strings.EqualFold(home, clubName) || containsFold(home, clubName) || containsFold(clubName, home) { if strings.EqualFold(home, clubName) || containsFold(home, clubName) || containsFold(clubName, home) {
homeID = clubID homeID = clubID
} else { } else {
parts := strings.Fields(strings.TrimSpace(clubName)) token := simplifyClubQuery(clubName)
if len(parts) > 0 { if token != "" && containsFold(home, token) {
last := parts[len(parts)-1] homeID = clubID
if last != "" && containsFold(home, last) {
homeID = clubID
}
} }
} }
} }
@@ -189,12 +181,9 @@ func parseCompetitionMatchesFromFotbal(pageURL, clubType, clubName, clubID strin
if strings.EqualFold(away, clubName) || containsFold(away, clubName) || containsFold(clubName, away) { if strings.EqualFold(away, clubName) || containsFold(away, clubName) || containsFold(clubName, away) {
awayID = clubID awayID = clubID
} else { } else {
parts := strings.Fields(strings.TrimSpace(clubName)) token := simplifyClubQuery(clubName)
if len(parts) > 0 { if token != "" && containsFold(away, token) {
last := parts[len(parts)-1] awayID = clubID
if last != "" && containsFold(away, last) {
awayID = clubID
}
} }
} }
} }
@@ -299,7 +288,7 @@ func parseCompetitionMatchesFromIS(detailURL, clubType, clubName, clubID string)
reportURL = fmt.Sprintf("https://www.fotbal.cz/souteze/zapasy/zapas/%s", matchID) reportURL = fmt.Sprintf("https://www.fotbal.cz/souteze/zapasy/zapas/%s", matchID)
} }
} }
// Filter by club involvement: prefer UUID match, fallback to name matching // Filter by club involvement: prefer UUID match, fallback to name matching with simplified token
if clubName != "" || clubID != "" { if clubName != "" || clubID != "" {
involved := false involved := false
if clubID != "" && (strings.EqualFold(homeID, clubID) || strings.EqualFold(awayID, clubID)) { if clubID != "" && (strings.EqualFold(homeID, clubID) || strings.EqualFold(awayID, clubID)) {
@@ -309,12 +298,9 @@ func parseCompetitionMatchesFromIS(detailURL, clubType, clubName, clubID string)
containsFold(clubName, rawHome) || containsFold(clubName, rawAway) || containsFold(clubName, rawHome) || containsFold(clubName, rawAway) ||
containsFold(rawHome, clubName) || containsFold(rawAway, clubName) containsFold(rawHome, clubName) || containsFold(rawAway, clubName)
if !involved { if !involved {
parts := strings.Fields(strings.TrimSpace(clubName)) token := simplifyClubQuery(clubName)
if len(parts) > 0 { if token != "" && (containsFold(rawHome, token) || containsFold(rawAway, token)) {
last := parts[len(parts)-1] involved = true
if last != "" && (containsFold(rawHome, last) || containsFold(rawAway, last)) {
involved = true
}
} }
} }
} }
@@ -322,10 +308,16 @@ func parseCompetitionMatchesFromIS(detailURL, clubType, clubName, clubID string)
} }
keptRows++ keptRows++
if homeID == "" { if homeID == "" {
if strings.EqualFold(rawHome, clubName) || containsFold(rawHome, clubName) || containsFold(clubName, rawHome) { homeID = clubID } if strings.EqualFold(rawHome, clubName) || containsFold(rawHome, clubName) || containsFold(clubName, rawHome) { homeID = clubID } else {
token := simplifyClubQuery(clubName)
if token != "" && containsFold(rawHome, token) { homeID = clubID }
}
} }
if awayID == "" { if awayID == "" {
if strings.EqualFold(rawAway, clubName) || containsFold(rawAway, clubName) || containsFold(clubName, rawAway) { awayID = clubID } if strings.EqualFold(rawAway, clubName) || containsFold(rawAway, clubName) || containsFold(clubName, rawAway) { awayID = clubID } else {
token := simplifyClubQuery(clubName)
if token != "" && containsFold(rawAway, token) { awayID = clubID }
}
} }
homeLogo := getLogo(rawHome, homeID) homeLogo := getLogo(rawHome, homeID)
awayLogo := getLogo(rawAway, awayID) awayLogo := getLogo(rawAway, awayID)
@@ -339,91 +331,109 @@ func parseCompetitionMatchesFromIS(detailURL, clubType, clubName, clubID string)
var logoCache = map[string]string{} var logoCache = map[string]string{}
type searchAPIResult struct { type searchAPIResult struct {
Results []struct { Results []struct {
Name string `json:"name"` Name string `json:"name"`
LogoURL string `json:"logo_url"` LogoURL string `json:"logo_url"`
} `json:"results"` } `json:"results"`
} }
// simplifyClubQuery takes a full club name like "FK Kofola Krnov" and returns
// a simplified search token like "krnov" to improve chances of finding a logo. // a simplified search token like "krnov" to improve chances of finding a logo.
func simplifyClubQuery(name string) string { func simplifyClubQuery(name string) string {
s := strings.TrimSpace(name) s := strings.TrimSpace(name)
if s == "" { if s == "" {
return "" return ""
} }
parts := strings.Fields(s) parts := strings.Fields(s)
if len(parts) == 0 { if len(parts) == 0 {
return "" return ""
} }
// Use the last word (often the city), strip simple punctuation, lowercased // Walk from the end to find a meaningful token (avoid legal suffixes like "z.s.")
last := parts[len(parts)-1] stop := map[string]struct{}{
last = strings.Trim(last, ",.;:-()[]{}\"'`“”’") "z.s.": {}, "z.s": {}, "zs": {}, "zapsany": {}, "zapsaný": {}, "spolek": {},
return strings.ToLower(last) "o.s.": {}, "o.s": {}, "os": {}, "a.s.": {}, "a.s": {}, "as": {},
"s.r.o.": {}, "s.r.o": {}, "sro": {},
}
for i := len(parts) - 1; i >= 0; i-- {
tok := parts[i]
tok = strings.Trim(tok, ",.;:-()[]{}\"'`“”’")
lt := strings.ToLower(tok)
if _, banned := stop[lt]; banned {
continue
}
// prefer tokens with letters and length >= 3
letters := regexp.MustCompile(`[a-zA-Zá-žÁ-Ž]`).MatchString
if len([]rune(lt)) >= 3 && letters(lt) {
return lt
}
}
// Fallback to last token sanitized
last := strings.Trim(parts[len(parts)-1], ",.;:-()[]{}\"'`“”’")
return strings.ToLower(last)
} }
func getLogoBySearch(name string) string { func getLogoBySearch(name string) string {
key := strings.ToLower(strings.TrimSpace(name)) key := strings.ToLower(strings.TrimSpace(name))
if key == "" { if key == "" {
return "" return ""
} }
if v, ok := logoCache[key]; ok { if v, ok := logoCache[key]; ok {
return v return v
} }
client := &http.Client{Timeout: 5 * time.Second} client := &http.Client{Timeout: 5 * time.Second}
// Prefer simplified last-word token (e.g., "krnov") to improve hit rate for logos // Prefer simplified last-word token (e.g., "krnov") to improve hit rate for logos
query := simplifyClubQuery(name) query := simplifyClubQuery(name)
if query == "" { if query == "" {
query = name query = name
} }
doSearch := func(q string) (searchAPIResult, bool) { doSearch := func(q string) (searchAPIResult, bool) {
url := fmt.Sprintf("http://localhost:8080/club/search?q=%s", neturl.QueryEscape(q)) url := fmt.Sprintf("http://localhost:8080/club/search?q=%s", neturl.QueryEscape(q))
resp, err := client.Get(url) resp, err := client.Get(url)
if err != nil { if err != nil {
return searchAPIResult{}, false return searchAPIResult{}, false
} }
defer resp.Body.Close() defer resp.Body.Close()
if resp.StatusCode != http.StatusOK { if resp.StatusCode != http.StatusOK {
io.Copy(io.Discard, resp.Body) io.Copy(io.Discard, resp.Body)
return searchAPIResult{}, false return searchAPIResult{}, false
} }
var payload searchAPIResult var payload searchAPIResult
if err := json.NewDecoder(resp.Body).Decode(&payload); err != nil { if err := json.NewDecoder(resp.Body).Decode(&payload); err != nil {
return searchAPIResult{}, false return searchAPIResult{}, false
} }
return payload, true return payload, true
} }
payload, ok := doSearch(query) payload, ok := doSearch(query)
if !ok || len(payload.Results) == 0 { if !ok || len(payload.Results) == 0 {
// Fallback to full name if simplified token yields nothing // Fallback to full name if simplified token yields nothing
payload, ok = doSearch(name) payload, ok = doSearch(name)
if !ok { if !ok {
return "" return ""
} }
} }
// pick best match: exact (case-insensitive), then contains, else first // pick best match: exact (case-insensitive), then contains, else first
best := "" best := ""
for _, r := range payload.Results { for _, r := range payload.Results {
if strings.EqualFold(strings.TrimSpace(r.Name), strings.TrimSpace(name)) { if strings.EqualFold(strings.TrimSpace(r.Name), strings.TrimSpace(name)) {
best = r.LogoURL best = r.LogoURL
break break
} }
} }
if best == "" { if best == "" {
for _, r := range payload.Results { for _, r := range payload.Results {
if strings.Contains(strings.ToLower(r.Name), key) || strings.Contains(key, strings.ToLower(r.Name)) { rname := strings.ToLower(r.Name)
best = r.LogoURL if strings.Contains(rname, key) || strings.Contains(key, rname) {
break best = r.LogoURL
} break
} }
} }
if best == "" && len(payload.Results) > 0 { }
best = payload.Results[0].LogoURL if best == "" && len(payload.Results) > 0 {
} best = payload.Results[0].LogoURL
logoCache[key] = best }
return best logoCache[key] = best
return best
} }
func getLogo(teamName string, teamID string) string { func getLogo(teamName string, teamID string) string {