package services import ( "regexp" "strings" ) // A compact list of Czech and English bad words with family-friendly replacements. // Note: This is a lightweight, non-exhaustive list intended for community sites. var badWordMap = map[string]string{ // Czech "kráva": "osobo", "debil": "nezdvořák", "idiot": "nešika", "blbec": "popleta", "pitomec": "nezbeda", "trouba": "popleta", "sprostý": "nevhodný", "sráč": "strašpytel", "čůrák": "šibal", "kokot": "popleta", "kretén": "nešika", "hovno": "ťuťo", "nasrat": "naštvat", "nasr**": "naštv**", "prdel": "zadek", "píča": "potížistka", "piča": "potížistka", "zmrd": "nezbeda", "sračka": "nepěknost", "sračky": "nepěknosti", "posrat": "pokazit", "posranej": "zkalený", "šukat": "láskovat", "mrdat": "lumpačit", "mrdka": "neplecha", "kurva": "mrška", "zasran": "nepříjemn", "do prdele": "sakryš", "čubka": "neposedná", "svině": "nezdárná", // English "shit": "shoot", "fuck": "flip", "fucking": "flipping", "asshole": "meanie", "bitch": "rascal", "bastard": "rascal", "dick": "goof", "dickhead": "goof", "cock": "goof", "pussy": "rascal", "cunt": "rascal", "crap": "crud", "damn": "darn", } // Compiled replacement patterns and sensitive patterns type compiledReplacement struct { re *regexp.Regexp replacement string } var compiledRepls []compiledReplacement var sensitiveRegexps []*regexp.Regexp func init() { // Build compiled replacements from explicit words/phrases for w, rep := range badWordMap { var pat string if strings.Contains(w, " ") { // phrase: allow flexible spacing pat = "(?i)\\b" + strings.ReplaceAll(regexp.QuoteMeta(w), " ", "\\s+") + "\\b" } else { pat = "(?i)\\b" + regexp.QuoteMeta(w) + "[a-zá-ž0-9]*\\b" } compiledRepls = append(compiledRepls, compiledReplacement{ re: regexp.MustCompile(pat), replacement: rep }) } // Add Czech stems with diacritic + leet tolerant patterns czStems := []struct{ stem, rep string }{ {"kurv", "mrška"}, {"píc", "potížistka"}, {"pic", "potížistka"}, {"mrd", "lumpačit"}, {"šuk", "láskovat"}, {"srač", "nepěknost"}, {"hovn", "ťuťo"}, {"zmrd", "nezbeda"}, {"čubk", "neposedná"}, {"svin", "nezdárná"}, {"kokot", "popleta"}, {"čur", "šibal"}, {"cur", "šibal"}, {"debil", "nezdvořák"}, {"idiot", "nešika"}, {"kretén", "nešika"}, {"blbec", "popleta"}, {"prdel", "zadek"}, } for _, it := range czStems { pat := "(?i)\\b" + diacriticLeetPattern(it.stem) + "[a-zá-ž0-9]*\\b" compiledRepls = append(compiledRepls, compiledReplacement{ re: regexp.MustCompile(pat), replacement: it.rep }) } // English stems (simple suffix handling) en := []struct{ rawPattern, rep string }{ {`(?i)\bshit(ty|head|s|ting)?\b`, "shoot"}, {`(?i)\bfuck(ing|er|ers|ed|s)?\b`, "flip"}, {`(?i)\bass(hole|hat|es)?\b`, "meanie"}, {`(?i)\bbitch(es|y)?\b`, "rascal"}, {`(?i)\bbastard(s)?\b`, "rascal"}, {`(?i)\bdick(head|s)?\b`, "goof"}, {`(?i)\bcock(s|ing)?\b`, "goof"}, {`(?i)\bpussy\b`, "rascal"}, {`(?i)\bcunt(s)?\b`, "rascal"}, {`(?i)\bcrap(py|s)?\b`, "crud"}, {`(?i)\bdamn(ed|s|ing)?\b`, "darn"}, } for _, e := range en { compiledRepls = append(compiledRepls, compiledReplacement{ re: regexp.MustCompile(e.rawPattern), replacement: e.rep }) } // Sensitive stems (trigger moderation) sensStems := []string{"kurv", "píc", "pic", "mrd", "šuk", "čur", "cur", "kokot", "cunt", "fuck"} for _, s := range sensStems { // Czech stems get diacritic+leet tolerant pattern; English raw var re *regexp.Regexp if isASCII(s) { re = regexp.MustCompile("(?i)\\b" + regexp.QuoteMeta(s) + "[a-z0-9]*\\b") } else { re = regexp.MustCompile("(?i)\\b" + diacriticLeetPattern(s) + "[a-zá-ž0-9]*\\b") } sensitiveRegexps = append(sensitiveRegexps, re) } } // FilterBadWords replaces bad words with friendlier counterparts while preserving approximate case. func FilterBadWords(s string) (string, bool) { if strings.TrimSpace(s) == "" { return s, false } out := s replaced := false for _, cr := range compiledRepls { out2 := cr.re.ReplaceAllStringFunc(out, func(m string) string { replaced = true // preserve basic case style if isTitle(m) { return title(cr.replacement) } if isUpper(m) { return strings.ToUpper(cr.replacement) } return cr.replacement }) out = out2 } return out, replaced } // ContainsSensitiveWords returns true and the matched words if content contains strong/explicit terms. func ContainsSensitiveWords(s string) (bool, []string) { if strings.TrimSpace(s) == "" { return false, nil } found := []string{} for _, re := range sensitiveRegexps { if loc := re.FindStringIndex(s); loc != nil { found = append(found, s[loc[0]:loc[1]]) } } if len(found) == 0 { return false, nil } return true, found } func isUpper(s string) bool { return s == strings.ToUpper(s) } func isTitle(s string) bool { return len(s) > 0 && strings.ToUpper(s[:1]) == s[:1] && strings.ToLower(s[1:]) == s[1:] } func title(s string) string { if len(s)==0 {return s}; return strings.ToUpper(s[:1]) + s[1:] } // Helpers for Czech diacritics + simple leetspeak func diacriticLeetPattern(stem string) string { var b strings.Builder for _, r := range stem { b.WriteString(expandRune(r)) } return b.String() } func expandRune(r rune) string { switch r { case 'a', 'A': return "[aá@4]" case 'e', 'E': return "[eéě3]" case 'i', 'I', 'l', 'L': return "[iíl1!]" case 'o', 'O': return "[oó0]" case 'u', 'U': return "[uúů]" case 'y', 'Y': return "[yý]" case 'c', 'C': return "[cč]" case 's', 'S': return "[sš5]" case 'z', 'Z': return "[zž2]" case 'r', 'R': return "[rř]" case 't', 'T': return "[tť7]" case 'n', 'N': return "[nň]" case 'd', 'D': return "[dď]" case 'p', 'P': return "[p]" case 'k', 'K': return "[k]" case 'm', 'M': return "[m]" case 'v', 'V': return "[v]" case 'h', 'H': return "[h]" case 'g', 'G': return "[g]" default: // escape everything else return regexp.QuoteMeta(string(r)) } } func isASCII(s string) bool { for i := 0; i < len(s); i++ { if s[i] >= 128 { return false } } return true }