mirror of
https://github.com/Dvorinka/facr-scraper.git
synced 2026-06-03 20:12:57 +00:00
dd
This commit is contained in:
@@ -145,7 +145,7 @@ func parseCompetitionMatchesFromFotbal(pageURL, clubType, clubName, clubID strin
|
||||
reportURL = fmt.Sprintf("https://www.fotbal.cz/souteze/zapasy/zapas/%s", matchID)
|
||||
}
|
||||
}
|
||||
// Filter by club involvement: prefer UUID match, fallback to name matching including last-word token
|
||||
// Filter by club involvement: prefer UUID match, fallback to name matching including simplified token
|
||||
if clubName != "" || clubID != "" {
|
||||
involved := false
|
||||
// If we could extract team UUIDs, match by ID first (robust against aliases)
|
||||
@@ -156,19 +156,14 @@ func parseCompetitionMatchesFromFotbal(pageURL, clubType, clubName, clubID strin
|
||||
involved = strings.EqualFold(home, clubName) || strings.EqualFold(away, clubName) ||
|
||||
containsFold(clubName, home) || containsFold(clubName, away) ||
|
||||
containsFold(home, clubName) || containsFold(away, clubName)
|
||||
// As a last resort, try matching the last word (e.g., city) token of the club name
|
||||
// As a last resort, try matching a simplified token (e.g., city) of the club name
|
||||
if !involved {
|
||||
parts := strings.Fields(strings.TrimSpace(clubName))
|
||||
if len(parts) > 0 {
|
||||
last := parts[len(parts)-1]
|
||||
if last != "" {
|
||||
if containsFold(home, last) || containsFold(away, last) {
|
||||
token := simplifyClubQuery(clubName)
|
||||
if token != "" && (containsFold(home, token) || containsFold(away, token)) {
|
||||
involved = true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if !involved { return }
|
||||
}
|
||||
// Backfill IDs for current club if missing
|
||||
@@ -176,28 +171,22 @@ func parseCompetitionMatchesFromFotbal(pageURL, clubType, clubName, clubID strin
|
||||
if strings.EqualFold(home, clubName) || containsFold(home, clubName) || containsFold(clubName, home) {
|
||||
homeID = clubID
|
||||
} else {
|
||||
parts := strings.Fields(strings.TrimSpace(clubName))
|
||||
if len(parts) > 0 {
|
||||
last := parts[len(parts)-1]
|
||||
if last != "" && containsFold(home, last) {
|
||||
token := simplifyClubQuery(clubName)
|
||||
if token != "" && containsFold(home, token) {
|
||||
homeID = clubID
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if awayID == "" {
|
||||
if strings.EqualFold(away, clubName) || containsFold(away, clubName) || containsFold(clubName, away) {
|
||||
awayID = clubID
|
||||
} else {
|
||||
parts := strings.Fields(strings.TrimSpace(clubName))
|
||||
if len(parts) > 0 {
|
||||
last := parts[len(parts)-1]
|
||||
if last != "" && containsFold(away, last) {
|
||||
token := simplifyClubQuery(clubName)
|
||||
if token != "" && containsFold(away, token) {
|
||||
awayID = clubID
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
homeLogo := getLogo(home, homeID)
|
||||
awayLogo := getLogo(away, awayID)
|
||||
matches = append(matches, Match{
|
||||
@@ -299,7 +288,7 @@ func parseCompetitionMatchesFromIS(detailURL, clubType, clubName, clubID string)
|
||||
reportURL = fmt.Sprintf("https://www.fotbal.cz/souteze/zapasy/zapas/%s", matchID)
|
||||
}
|
||||
}
|
||||
// Filter by club involvement: prefer UUID match, fallback to name matching
|
||||
// Filter by club involvement: prefer UUID match, fallback to name matching with simplified token
|
||||
if clubName != "" || clubID != "" {
|
||||
involved := false
|
||||
if clubID != "" && (strings.EqualFold(homeID, clubID) || strings.EqualFold(awayID, clubID)) {
|
||||
@@ -309,23 +298,26 @@ func parseCompetitionMatchesFromIS(detailURL, clubType, clubName, clubID string)
|
||||
containsFold(clubName, rawHome) || containsFold(clubName, rawAway) ||
|
||||
containsFold(rawHome, clubName) || containsFold(rawAway, clubName)
|
||||
if !involved {
|
||||
parts := strings.Fields(strings.TrimSpace(clubName))
|
||||
if len(parts) > 0 {
|
||||
last := parts[len(parts)-1]
|
||||
if last != "" && (containsFold(rawHome, last) || containsFold(rawAway, last)) {
|
||||
token := simplifyClubQuery(clubName)
|
||||
if token != "" && (containsFold(rawHome, token) || containsFold(rawAway, token)) {
|
||||
involved = true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if !involved { return }
|
||||
}
|
||||
keptRows++
|
||||
if homeID == "" {
|
||||
if strings.EqualFold(rawHome, clubName) || containsFold(rawHome, clubName) || containsFold(clubName, rawHome) { homeID = clubID }
|
||||
if strings.EqualFold(rawHome, clubName) || containsFold(rawHome, clubName) || containsFold(clubName, rawHome) { homeID = clubID } else {
|
||||
token := simplifyClubQuery(clubName)
|
||||
if token != "" && containsFold(rawHome, token) { homeID = clubID }
|
||||
}
|
||||
}
|
||||
if awayID == "" {
|
||||
if strings.EqualFold(rawAway, clubName) || containsFold(rawAway, clubName) || containsFold(clubName, rawAway) { awayID = clubID }
|
||||
if strings.EqualFold(rawAway, clubName) || containsFold(rawAway, clubName) || containsFold(clubName, rawAway) { awayID = clubID } else {
|
||||
token := simplifyClubQuery(clubName)
|
||||
if token != "" && containsFold(rawAway, token) { awayID = clubID }
|
||||
}
|
||||
}
|
||||
homeLogo := getLogo(rawHome, homeID)
|
||||
awayLogo := getLogo(rawAway, awayID)
|
||||
@@ -345,7 +337,6 @@ type searchAPIResult struct {
|
||||
} `json:"results"`
|
||||
}
|
||||
|
||||
// simplifyClubQuery takes a full club name like "FK Kofola Krnov" and returns
|
||||
// a simplified search token like "krnov" to improve chances of finding a logo.
|
||||
func simplifyClubQuery(name string) string {
|
||||
s := strings.TrimSpace(name)
|
||||
@@ -356,9 +347,27 @@ func simplifyClubQuery(name string) string {
|
||||
if len(parts) == 0 {
|
||||
return ""
|
||||
}
|
||||
// Use the last word (often the city), strip simple punctuation, lowercased
|
||||
last := parts[len(parts)-1]
|
||||
last = strings.Trim(last, ",.;:-()[]{}\"'`“”’")
|
||||
// Walk from the end to find a meaningful token (avoid legal suffixes like "z.s.")
|
||||
stop := map[string]struct{}{
|
||||
"z.s.": {}, "z.s": {}, "zs": {}, "zapsany": {}, "zapsaný": {}, "spolek": {},
|
||||
"o.s.": {}, "o.s": {}, "os": {}, "a.s.": {}, "a.s": {}, "as": {},
|
||||
"s.r.o.": {}, "s.r.o": {}, "sro": {},
|
||||
}
|
||||
for i := len(parts) - 1; i >= 0; i-- {
|
||||
tok := parts[i]
|
||||
tok = strings.Trim(tok, ",.;:-()[]{}\"'`“”’")
|
||||
lt := strings.ToLower(tok)
|
||||
if _, banned := stop[lt]; banned {
|
||||
continue
|
||||
}
|
||||
// prefer tokens with letters and length >= 3
|
||||
letters := regexp.MustCompile(`[a-zA-Zá-žÁ-Ž]`).MatchString
|
||||
if len([]rune(lt)) >= 3 && letters(lt) {
|
||||
return lt
|
||||
}
|
||||
}
|
||||
// Fallback to last token sanitized
|
||||
last := strings.Trim(parts[len(parts)-1], ",.;:-()[]{}\"'`“”’")
|
||||
return strings.ToLower(last)
|
||||
}
|
||||
|
||||
@@ -413,7 +422,8 @@ func getLogoBySearch(name string) string {
|
||||
}
|
||||
if best == "" {
|
||||
for _, r := range payload.Results {
|
||||
if strings.Contains(strings.ToLower(r.Name), key) || strings.Contains(key, strings.ToLower(r.Name)) {
|
||||
rname := strings.ToLower(r.Name)
|
||||
if strings.Contains(rname, key) || strings.Contains(key, rname) {
|
||||
best = r.LogoURL
|
||||
break
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user