mirror of
https://github.com/Dvorinka/MyClubServer.git
synced 2026-06-04 18:52:56 +00:00
dev day #79
This commit is contained in:
@@ -0,0 +1,197 @@
|
||||
package services
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// A compact list of Czech and English bad words with family-friendly replacements.
|
||||
// Note: This is a lightweight, non-exhaustive list intended for community sites.
|
||||
var badWordMap = map[string]string{
|
||||
// Czech
|
||||
"kráva": "osobo",
|
||||
"debil": "nezdvořák",
|
||||
"idiot": "nešika",
|
||||
"blbec": "popleta",
|
||||
"pitomec": "nezbeda",
|
||||
"trouba": "popleta",
|
||||
"sprostý": "nevhodný",
|
||||
"sráč": "strašpytel",
|
||||
"čůrák": "šibal",
|
||||
"kokot": "popleta",
|
||||
"kretén": "nešika",
|
||||
"hovno": "ťuťo",
|
||||
"nasrat": "naštvat",
|
||||
"nasr**": "naštv**",
|
||||
"prdel": "zadek",
|
||||
"píča": "potížistka",
|
||||
"piča": "potížistka",
|
||||
"zmrd": "nezbeda",
|
||||
"sračka": "nepěknost",
|
||||
"sračky": "nepěknosti",
|
||||
"posrat": "pokazit",
|
||||
"posranej": "zkalený",
|
||||
"šukat": "láskovat",
|
||||
"mrdat": "lumpačit",
|
||||
"mrdka": "neplecha",
|
||||
"kurva": "mrška",
|
||||
"zasran": "nepříjemn",
|
||||
"do prdele": "sakryš",
|
||||
"čubka": "neposedná",
|
||||
"svině": "nezdárná",
|
||||
|
||||
// English
|
||||
"shit": "shoot",
|
||||
"fuck": "flip",
|
||||
"fucking": "flipping",
|
||||
"asshole": "meanie",
|
||||
"bitch": "rascal",
|
||||
"bastard": "rascal",
|
||||
"dick": "goof",
|
||||
"dickhead": "goof",
|
||||
"cock": "goof",
|
||||
"pussy": "rascal",
|
||||
"cunt": "rascal",
|
||||
"crap": "crud",
|
||||
"damn": "darn",
|
||||
}
|
||||
|
||||
// Compiled replacement patterns and sensitive patterns
|
||||
type compiledReplacement struct {
|
||||
re *regexp.Regexp
|
||||
replacement string
|
||||
}
|
||||
|
||||
var compiledRepls []compiledReplacement
|
||||
var sensitiveRegexps []*regexp.Regexp
|
||||
|
||||
func init() {
|
||||
// Build compiled replacements from explicit words/phrases
|
||||
for w, rep := range badWordMap {
|
||||
var pat string
|
||||
if strings.Contains(w, " ") {
|
||||
// phrase: allow flexible spacing
|
||||
pat = "(?i)\\b" + strings.ReplaceAll(regexp.QuoteMeta(w), " ", "\\s+") + "\\b"
|
||||
} else {
|
||||
pat = "(?i)\\b" + regexp.QuoteMeta(w) + "[a-zá-ž0-9]*\\b"
|
||||
}
|
||||
compiledRepls = append(compiledRepls, compiledReplacement{ re: regexp.MustCompile(pat), replacement: rep })
|
||||
}
|
||||
|
||||
// Add Czech stems with diacritic + leet tolerant patterns
|
||||
czStems := []struct{ stem, rep string }{
|
||||
{"kurv", "mrška"}, {"píc", "potížistka"}, {"pic", "potížistka"}, {"mrd", "lumpačit"}, {"šuk", "láskovat"}, {"srač", "nepěknost"}, {"hovn", "ťuťo"}, {"zmrd", "nezbeda"}, {"čubk", "neposedná"}, {"svin", "nezdárná"}, {"kokot", "popleta"}, {"čur", "šibal"}, {"cur", "šibal"},
|
||||
{"debil", "nezdvořák"}, {"idiot", "nešika"}, {"kretén", "nešika"}, {"blbec", "popleta"}, {"prdel", "zadek"},
|
||||
}
|
||||
for _, it := range czStems {
|
||||
pat := "(?i)\\b" + diacriticLeetPattern(it.stem) + "[a-zá-ž0-9]*\\b"
|
||||
compiledRepls = append(compiledRepls, compiledReplacement{ re: regexp.MustCompile(pat), replacement: it.rep })
|
||||
}
|
||||
|
||||
// English stems (simple suffix handling)
|
||||
en := []struct{ rawPattern, rep string }{
|
||||
{`(?i)\bshit(ty|head|s|ting)?\b`, "shoot"},
|
||||
{`(?i)\bfuck(ing|er|ers|ed|s)?\b`, "flip"},
|
||||
{`(?i)\bass(hole|hat|es)?\b`, "meanie"},
|
||||
{`(?i)\bbitch(es|y)?\b`, "rascal"},
|
||||
{`(?i)\bbastard(s)?\b`, "rascal"},
|
||||
{`(?i)\bdick(head|s)?\b`, "goof"},
|
||||
{`(?i)\bcock(s|ing)?\b`, "goof"},
|
||||
{`(?i)\bpussy\b`, "rascal"},
|
||||
{`(?i)\bcunt(s)?\b`, "rascal"},
|
||||
{`(?i)\bcrap(py|s)?\b`, "crud"},
|
||||
{`(?i)\bdamn(ed|s|ing)?\b`, "darn"},
|
||||
}
|
||||
for _, e := range en {
|
||||
compiledRepls = append(compiledRepls, compiledReplacement{ re: regexp.MustCompile(e.rawPattern), replacement: e.rep })
|
||||
}
|
||||
|
||||
// Sensitive stems (trigger moderation)
|
||||
sensStems := []string{"kurv", "píc", "pic", "mrd", "šuk", "čur", "cur", "kokot", "cunt", "fuck"}
|
||||
for _, s := range sensStems {
|
||||
// Czech stems get diacritic+leet tolerant pattern; English raw
|
||||
var re *regexp.Regexp
|
||||
if isASCII(s) {
|
||||
re = regexp.MustCompile("(?i)\\b" + regexp.QuoteMeta(s) + "[a-z0-9]*\\b")
|
||||
} else {
|
||||
re = regexp.MustCompile("(?i)\\b" + diacriticLeetPattern(s) + "[a-zá-ž0-9]*\\b")
|
||||
}
|
||||
sensitiveRegexps = append(sensitiveRegexps, re)
|
||||
}
|
||||
}
|
||||
|
||||
// FilterBadWords replaces bad words with friendlier counterparts while preserving approximate case.
|
||||
func FilterBadWords(s string) (string, bool) {
|
||||
if strings.TrimSpace(s) == "" { return s, false }
|
||||
out := s
|
||||
replaced := false
|
||||
for _, cr := range compiledRepls {
|
||||
out2 := cr.re.ReplaceAllStringFunc(out, func(m string) string {
|
||||
replaced = true
|
||||
// preserve basic case style
|
||||
if isTitle(m) { return title(cr.replacement) }
|
||||
if isUpper(m) { return strings.ToUpper(cr.replacement) }
|
||||
return cr.replacement
|
||||
})
|
||||
out = out2
|
||||
}
|
||||
return out, replaced
|
||||
}
|
||||
|
||||
// ContainsSensitiveWords returns true and the matched words if content contains strong/explicit terms.
|
||||
func ContainsSensitiveWords(s string) (bool, []string) {
|
||||
if strings.TrimSpace(s) == "" { return false, nil }
|
||||
found := []string{}
|
||||
for _, re := range sensitiveRegexps {
|
||||
if loc := re.FindStringIndex(s); loc != nil {
|
||||
found = append(found, s[loc[0]:loc[1]])
|
||||
}
|
||||
}
|
||||
if len(found) == 0 { return false, nil }
|
||||
return true, found
|
||||
}
|
||||
|
||||
func isUpper(s string) bool { return s == strings.ToUpper(s) }
|
||||
func isTitle(s string) bool { return len(s) > 0 && strings.ToUpper(s[:1]) == s[:1] && strings.ToLower(s[1:]) == s[1:] }
|
||||
func title(s string) string { if len(s)==0 {return s}; return strings.ToUpper(s[:1]) + s[1:] }
|
||||
|
||||
// Helpers for Czech diacritics + simple leetspeak
|
||||
func diacriticLeetPattern(stem string) string {
|
||||
var b strings.Builder
|
||||
for _, r := range stem {
|
||||
b.WriteString(expandRune(r))
|
||||
}
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func expandRune(r rune) string {
|
||||
switch r {
|
||||
case 'a', 'A': return "[aá@4]"
|
||||
case 'e', 'E': return "[eéě3]"
|
||||
case 'i', 'I', 'l', 'L': return "[iíl1!]"
|
||||
case 'o', 'O': return "[oó0]"
|
||||
case 'u', 'U': return "[uúů]"
|
||||
case 'y', 'Y': return "[yý]"
|
||||
case 'c', 'C': return "[cč]"
|
||||
case 's', 'S': return "[sš5]"
|
||||
case 'z', 'Z': return "[zž2]"
|
||||
case 'r', 'R': return "[rř]"
|
||||
case 't', 'T': return "[tť7]"
|
||||
case 'n', 'N': return "[nň]"
|
||||
case 'd', 'D': return "[dď]"
|
||||
case 'p', 'P': return "[p]"
|
||||
case 'k', 'K': return "[k]"
|
||||
case 'm', 'M': return "[m]"
|
||||
case 'v', 'V': return "[v]"
|
||||
case 'h', 'H': return "[h]"
|
||||
case 'g', 'G': return "[g]"
|
||||
default:
|
||||
// escape everything else
|
||||
return regexp.QuoteMeta(string(r))
|
||||
}
|
||||
}
|
||||
|
||||
func isASCII(s string) bool {
|
||||
for i := 0; i < len(s); i++ { if s[i] >= 128 { return false } }
|
||||
return true
|
||||
}
|
||||
Reference in New Issue
Block a user