first commit

This commit is contained in:
Tomas Dvorak
2026-04-13 17:46:58 +02:00
commit 6e8fedf534
234 changed files with 53808 additions and 0 deletions
+42
View File
@@ -0,0 +1,42 @@
import xxhash
from unidecode import unidecode
def create_hash(*args: str, decode=False, limit=10) -> str:
"""
This function creates a case-insensitive, non-alphanumeric chars ignoring hash from the given arguments.
Example use case:
- Creating computable IDs for duplicate artists. eg. Juice WRLD and Juice Wrld should have the same ID.
:param args: The arguments to hash.
:param decode: Whether to decode the arguments before hashing.
:param limit: The number of characters to return.
:return: The hash.
"""
def remove_non_alnum(token: str) -> str:
token = token.lower().strip().replace(" ", "")
t = "".join(t for t in token if t.isalnum())
if t == "":
return token
return t
str_ = "".join(remove_non_alnum(t) for t in args)
if decode:
str_ = unidecode(str_)
str_ = str_.encode("utf-8")
return xxhash.xxh3_64(str_).hexdigest()
# str_ = hashlib.sha1(str_).hexdigest()
# INFO: Return first 5 + last 5 characters
# return (
# str_[: limit // 2] + str_[-limit // 2 :]
# if limit % 2 == 0
# else str_[: limit // 2] + str_[-limit // 2 - 1 :]
# )