mirror of
https://github.com/Dvorinka/facr-scraper.git
synced 2026-06-03 20:12:57 +00:00
dd
This commit is contained in:
@@ -145,7 +145,7 @@ func parseCompetitionMatchesFromFotbal(pageURL, clubType, clubName, clubID strin
|
|||||||
reportURL = fmt.Sprintf("https://www.fotbal.cz/souteze/zapasy/zapas/%s", matchID)
|
reportURL = fmt.Sprintf("https://www.fotbal.cz/souteze/zapasy/zapas/%s", matchID)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Filter by club involvement: prefer UUID match, fallback to name matching including last-word token
|
// Filter by club involvement: prefer UUID match, fallback to name matching including simplified token
|
||||||
if clubName != "" || clubID != "" {
|
if clubName != "" || clubID != "" {
|
||||||
involved := false
|
involved := false
|
||||||
// If we could extract team UUIDs, match by ID first (robust against aliases)
|
// If we could extract team UUIDs, match by ID first (robust against aliases)
|
||||||
@@ -156,19 +156,14 @@ func parseCompetitionMatchesFromFotbal(pageURL, clubType, clubName, clubID strin
|
|||||||
involved = strings.EqualFold(home, clubName) || strings.EqualFold(away, clubName) ||
|
involved = strings.EqualFold(home, clubName) || strings.EqualFold(away, clubName) ||
|
||||||
containsFold(clubName, home) || containsFold(clubName, away) ||
|
containsFold(clubName, home) || containsFold(clubName, away) ||
|
||||||
containsFold(home, clubName) || containsFold(away, clubName)
|
containsFold(home, clubName) || containsFold(away, clubName)
|
||||||
// As a last resort, try matching the last word (e.g., city) token of the club name
|
// As a last resort, try matching a simplified token (e.g., city) of the club name
|
||||||
if !involved {
|
if !involved {
|
||||||
parts := strings.Fields(strings.TrimSpace(clubName))
|
token := simplifyClubQuery(clubName)
|
||||||
if len(parts) > 0 {
|
if token != "" && (containsFold(home, token) || containsFold(away, token)) {
|
||||||
last := parts[len(parts)-1]
|
|
||||||
if last != "" {
|
|
||||||
if containsFold(home, last) || containsFold(away, last) {
|
|
||||||
involved = true
|
involved = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
if !involved { return }
|
if !involved { return }
|
||||||
}
|
}
|
||||||
// Backfill IDs for current club if missing
|
// Backfill IDs for current club if missing
|
||||||
@@ -176,28 +171,22 @@ func parseCompetitionMatchesFromFotbal(pageURL, clubType, clubName, clubID strin
|
|||||||
if strings.EqualFold(home, clubName) || containsFold(home, clubName) || containsFold(clubName, home) {
|
if strings.EqualFold(home, clubName) || containsFold(home, clubName) || containsFold(clubName, home) {
|
||||||
homeID = clubID
|
homeID = clubID
|
||||||
} else {
|
} else {
|
||||||
parts := strings.Fields(strings.TrimSpace(clubName))
|
token := simplifyClubQuery(clubName)
|
||||||
if len(parts) > 0 {
|
if token != "" && containsFold(home, token) {
|
||||||
last := parts[len(parts)-1]
|
|
||||||
if last != "" && containsFold(home, last) {
|
|
||||||
homeID = clubID
|
homeID = clubID
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
if awayID == "" {
|
if awayID == "" {
|
||||||
if strings.EqualFold(away, clubName) || containsFold(away, clubName) || containsFold(clubName, away) {
|
if strings.EqualFold(away, clubName) || containsFold(away, clubName) || containsFold(clubName, away) {
|
||||||
awayID = clubID
|
awayID = clubID
|
||||||
} else {
|
} else {
|
||||||
parts := strings.Fields(strings.TrimSpace(clubName))
|
token := simplifyClubQuery(clubName)
|
||||||
if len(parts) > 0 {
|
if token != "" && containsFold(away, token) {
|
||||||
last := parts[len(parts)-1]
|
|
||||||
if last != "" && containsFold(away, last) {
|
|
||||||
awayID = clubID
|
awayID = clubID
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
homeLogo := getLogo(home, homeID)
|
homeLogo := getLogo(home, homeID)
|
||||||
awayLogo := getLogo(away, awayID)
|
awayLogo := getLogo(away, awayID)
|
||||||
matches = append(matches, Match{
|
matches = append(matches, Match{
|
||||||
@@ -299,7 +288,7 @@ func parseCompetitionMatchesFromIS(detailURL, clubType, clubName, clubID string)
|
|||||||
reportURL = fmt.Sprintf("https://www.fotbal.cz/souteze/zapasy/zapas/%s", matchID)
|
reportURL = fmt.Sprintf("https://www.fotbal.cz/souteze/zapasy/zapas/%s", matchID)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Filter by club involvement: prefer UUID match, fallback to name matching
|
// Filter by club involvement: prefer UUID match, fallback to name matching with simplified token
|
||||||
if clubName != "" || clubID != "" {
|
if clubName != "" || clubID != "" {
|
||||||
involved := false
|
involved := false
|
||||||
if clubID != "" && (strings.EqualFold(homeID, clubID) || strings.EqualFold(awayID, clubID)) {
|
if clubID != "" && (strings.EqualFold(homeID, clubID) || strings.EqualFold(awayID, clubID)) {
|
||||||
@@ -309,23 +298,26 @@ func parseCompetitionMatchesFromIS(detailURL, clubType, clubName, clubID string)
|
|||||||
containsFold(clubName, rawHome) || containsFold(clubName, rawAway) ||
|
containsFold(clubName, rawHome) || containsFold(clubName, rawAway) ||
|
||||||
containsFold(rawHome, clubName) || containsFold(rawAway, clubName)
|
containsFold(rawHome, clubName) || containsFold(rawAway, clubName)
|
||||||
if !involved {
|
if !involved {
|
||||||
parts := strings.Fields(strings.TrimSpace(clubName))
|
token := simplifyClubQuery(clubName)
|
||||||
if len(parts) > 0 {
|
if token != "" && (containsFold(rawHome, token) || containsFold(rawAway, token)) {
|
||||||
last := parts[len(parts)-1]
|
|
||||||
if last != "" && (containsFold(rawHome, last) || containsFold(rawAway, last)) {
|
|
||||||
involved = true
|
involved = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
if !involved { return }
|
if !involved { return }
|
||||||
}
|
}
|
||||||
keptRows++
|
keptRows++
|
||||||
if homeID == "" {
|
if homeID == "" {
|
||||||
if strings.EqualFold(rawHome, clubName) || containsFold(rawHome, clubName) || containsFold(clubName, rawHome) { homeID = clubID }
|
if strings.EqualFold(rawHome, clubName) || containsFold(rawHome, clubName) || containsFold(clubName, rawHome) { homeID = clubID } else {
|
||||||
|
token := simplifyClubQuery(clubName)
|
||||||
|
if token != "" && containsFold(rawHome, token) { homeID = clubID }
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if awayID == "" {
|
if awayID == "" {
|
||||||
if strings.EqualFold(rawAway, clubName) || containsFold(rawAway, clubName) || containsFold(clubName, rawAway) { awayID = clubID }
|
if strings.EqualFold(rawAway, clubName) || containsFold(rawAway, clubName) || containsFold(clubName, rawAway) { awayID = clubID } else {
|
||||||
|
token := simplifyClubQuery(clubName)
|
||||||
|
if token != "" && containsFold(rawAway, token) { awayID = clubID }
|
||||||
|
}
|
||||||
}
|
}
|
||||||
homeLogo := getLogo(rawHome, homeID)
|
homeLogo := getLogo(rawHome, homeID)
|
||||||
awayLogo := getLogo(rawAway, awayID)
|
awayLogo := getLogo(rawAway, awayID)
|
||||||
@@ -345,7 +337,6 @@ type searchAPIResult struct {
|
|||||||
} `json:"results"`
|
} `json:"results"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// simplifyClubQuery takes a full club name like "FK Kofola Krnov" and returns
|
|
||||||
// a simplified search token like "krnov" to improve chances of finding a logo.
|
// a simplified search token like "krnov" to improve chances of finding a logo.
|
||||||
func simplifyClubQuery(name string) string {
|
func simplifyClubQuery(name string) string {
|
||||||
s := strings.TrimSpace(name)
|
s := strings.TrimSpace(name)
|
||||||
@@ -356,9 +347,27 @@ func simplifyClubQuery(name string) string {
|
|||||||
if len(parts) == 0 {
|
if len(parts) == 0 {
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
// Use the last word (often the city), strip simple punctuation, lowercased
|
// Walk from the end to find a meaningful token (avoid legal suffixes like "z.s.")
|
||||||
last := parts[len(parts)-1]
|
stop := map[string]struct{}{
|
||||||
last = strings.Trim(last, ",.;:-()[]{}\"'`“”’")
|
"z.s.": {}, "z.s": {}, "zs": {}, "zapsany": {}, "zapsaný": {}, "spolek": {},
|
||||||
|
"o.s.": {}, "o.s": {}, "os": {}, "a.s.": {}, "a.s": {}, "as": {},
|
||||||
|
"s.r.o.": {}, "s.r.o": {}, "sro": {},
|
||||||
|
}
|
||||||
|
for i := len(parts) - 1; i >= 0; i-- {
|
||||||
|
tok := parts[i]
|
||||||
|
tok = strings.Trim(tok, ",.;:-()[]{}\"'`“”’")
|
||||||
|
lt := strings.ToLower(tok)
|
||||||
|
if _, banned := stop[lt]; banned {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// prefer tokens with letters and length >= 3
|
||||||
|
letters := regexp.MustCompile(`[a-zA-Zá-žÁ-Ž]`).MatchString
|
||||||
|
if len([]rune(lt)) >= 3 && letters(lt) {
|
||||||
|
return lt
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Fallback to last token sanitized
|
||||||
|
last := strings.Trim(parts[len(parts)-1], ",.;:-()[]{}\"'`“”’")
|
||||||
return strings.ToLower(last)
|
return strings.ToLower(last)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -413,7 +422,8 @@ func getLogoBySearch(name string) string {
|
|||||||
}
|
}
|
||||||
if best == "" {
|
if best == "" {
|
||||||
for _, r := range payload.Results {
|
for _, r := range payload.Results {
|
||||||
if strings.Contains(strings.ToLower(r.Name), key) || strings.Contains(key, strings.ToLower(r.Name)) {
|
rname := strings.ToLower(r.Name)
|
||||||
|
if strings.Contains(rname, key) || strings.Contains(key, rname) {
|
||||||
best = r.LogoURL
|
best = r.LogoURL
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user