package services import ( "regexp" "strings" ) // Simple heuristics to evaluate spammy text. Returns score 0..1 and triggered rules. func EvaluateSpamScore(s string) (float64, []string) { var rules []string content := strings.TrimSpace(s) if content == "" { return 1.0, []string{"empty"} } // Too short if len([]rune(content)) < 6 { rules = append(rules, "too_short") } // Excessive repeated characters like 'aaaaaa' or '!!!!' repeatRe := regexp.MustCompile(`([a-zA-Z!?.])\1{4,}`) if repeatRe.MatchString(content) { rules = append(rules, "repeated_chars") } // Low vowel ratio suggests gibberish in Czech/English latin text letters := regexp.MustCompile(`[A-Za-zÁáÉéĚěÍíÓóÚúŮůÝýŽžŠšČčŘřŤťŇňĎď]`).FindAllString(content, -1) if len(letters) >= 8 { vowels := regexp.MustCompile(`[AaEeIiOoUuYyÁáÉéĚěÍíÓóÚúŮůÝý]`).FindAllString(content, -1) ratio := float64(len(vowels)) / float64(len(letters)) if ratio < 0.18 { // very low vowel ratio rules = append(rules, "low_vowel_ratio") } } // Too many links linkCount := len(regexp.MustCompile(`https?://`).FindAllStringIndex(content, -1)) if linkCount >= 3 { rules = append(rules, "too_many_links") } // All-caps shouting if content == strings.ToUpper(content) && len(content) >= 8 { rules = append(rules, "all_caps") } // Compute score by rules weight weights := map[string]float64{ "empty": 1.0, "too_short": 0.4, "repeated_chars": 0.3, "low_vowel_ratio": 0.3, "too_many_links": 0.5, "all_caps": 0.2, } score := 0.0 for _, r := range rules { score += weights[r] } if score > 1.0 { score = 1.0 } return score, rules }