mirror of
https://github.com/Dvorinka/MyClubServer.git
synced 2026-06-04 18:52:56 +00:00
dev day #79
This commit is contained in:
@@ -0,0 +1,55 @@
|
||||
package services
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Simple heuristics to evaluate spammy text. Returns score 0..1 and triggered rules.
|
||||
func EvaluateSpamScore(s string) (float64, []string) {
|
||||
var rules []string
|
||||
content := strings.TrimSpace(s)
|
||||
if content == "" {
|
||||
return 1.0, []string{"empty"}
|
||||
}
|
||||
// Too short
|
||||
if len([]rune(content)) < 6 {
|
||||
rules = append(rules, "too_short")
|
||||
}
|
||||
// Excessive repeated characters like 'aaaaaa' or '!!!!'
|
||||
repeatRe := regexp.MustCompile(`([a-zA-Z!?.])\1{4,}`)
|
||||
if repeatRe.MatchString(content) {
|
||||
rules = append(rules, "repeated_chars")
|
||||
}
|
||||
// Low vowel ratio suggests gibberish in Czech/English latin text
|
||||
letters := regexp.MustCompile(`[A-Za-zÁáÉéĚěÍíÓóÚúŮůÝýŽžŠšČčŘřŤťŇňĎď]`).FindAllString(content, -1)
|
||||
if len(letters) >= 8 {
|
||||
vowels := regexp.MustCompile(`[AaEeIiOoUuYyÁáÉéĚěÍíÓóÚúŮůÝý]`).FindAllString(content, -1)
|
||||
ratio := float64(len(vowels)) / float64(len(letters))
|
||||
if ratio < 0.18 { // very low vowel ratio
|
||||
rules = append(rules, "low_vowel_ratio")
|
||||
}
|
||||
}
|
||||
// Too many links
|
||||
linkCount := len(regexp.MustCompile(`https?://`).FindAllStringIndex(content, -1))
|
||||
if linkCount >= 3 {
|
||||
rules = append(rules, "too_many_links")
|
||||
}
|
||||
// All-caps shouting
|
||||
if content == strings.ToUpper(content) && len(content) >= 8 {
|
||||
rules = append(rules, "all_caps")
|
||||
}
|
||||
// Compute score by rules weight
|
||||
weights := map[string]float64{
|
||||
"empty": 1.0,
|
||||
"too_short": 0.4,
|
||||
"repeated_chars": 0.3,
|
||||
"low_vowel_ratio": 0.3,
|
||||
"too_many_links": 0.5,
|
||||
"all_caps": 0.2,
|
||||
}
|
||||
score := 0.0
|
||||
for _, r := range rules { score += weights[r] }
|
||||
if score > 1.0 { score = 1.0 }
|
||||
return score, rules
|
||||
}
|
||||
Reference in New Issue
Block a user