Files
swingmusic-extended/swingmusic/lib/artistlib.py
T
cwilvx 86fabcd5e3 modularize src
+ merge main.py and manage.py
+ move start logic to swingmusic/__main__.py
+ add a run.py on the project root
2025-05-25 20:35:54 +03:00

188 lines
6.1 KiB
Python

import os
import time
import random
import urllib
import requests
from io import BytesIO
from pathlib import Path
from concurrent.futures import ProcessPoolExecutor
from PIL import Image, PngImagePlugin, UnidentifiedImageError
from requests.exceptions import ConnectionError as RequestConnectionError
from requests.exceptions import ReadTimeout
from swingmusic import settings
from swingmusic.models.artist import Artist
from swingmusic.store.artists import ArtistStore
# from swingmusic.db.libdata import ArtistTable
# from swingmusic.store import artists as artist_store
# from swingmusic.store.tracks import TrackStore
from swingmusic.utils.hashing import create_hash
from swingmusic.utils.progressbar import tqdm
LARGE_ENOUGH_NUMBER = 100
PngImagePlugin.MAX_TEXT_CHUNK = LARGE_ENOUGH_NUMBER * (1024**2)
# https://stackoverflow.com/a/61466412
def get_artist_image_link(artist: str):
"""
Returns an artist image url.
"""
response: requests.Response | None = None
def make_request():
query = urllib.parse.quote(artist) # type: ignore
url = f"https://api.deezer.com/search/artist?q={query}"
user_agents = [
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Safari/605.1.15",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:90.0) Gecko/20100101 Firefox/90.0",
"Mozilla/5.0 (iPhone; CPU iPhone OS 14_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Mobile/15E148 Safari/604.1",
]
headers = {
"User-Agent": random.choice(user_agents),
"Accept": "application/json, text/plain, */*",
"Accept-Language": "en-US,en;q=0.9",
"Referer": "https://www.deezer.com/",
"Origin": "https://www.deezer.com",
}
return requests.get(url, headers=headers, timeout=30)
for attempt in range(5):
try:
response = make_request()
try:
data = response.json()
except requests.exceptions.JSONDecodeError:
return None
for res in data["data"]:
res_hash = create_hash(res["name"], decode=True)
artist_hash = create_hash(artist, decode=True)
if res_hash == artist_hash:
return str(res["picture_big"])
return None
except (RequestConnectionError, ReadTimeout, IndexError, KeyError):
if attempt == 4:
print("Failed to get artist image link ")
if attempt <= 4:
time.sleep(10)
else:
return None
# except (IndexError, KeyError):
# print(f"Encountered index/key error in attempt {attempt}")
# if response is not None:
# print(response.headers)
# return None
# TODO: Move network calls to utils/network.py
class DownloadImage:
def __init__(self, url: str, name: str) -> None:
img = self.download(url)
if img is None:
return
sm_path = Path(settings.Paths.get_sm_artist_img_path()) / name
lg_path = Path(settings.Paths.get_lg_artist_img_path()) / name
md_path = Path(settings.Paths.get_md_artist_img_path()) / name
entries = [
(lg_path, None), # save in the original size
(sm_path, settings.Defaults.SM_ARTIST_IMG_SIZE),
(md_path, settings.Defaults.MD_ARTIST_IMG_SIZE),
]
self.save_img(img, entries)
@staticmethod
def download(url: str) -> Image.Image | None:
"""
Downloads the image from the url.
Retries after 10 seconds on a connection error.
"""
for attempt in range(2):
try:
response = requests.get(url, timeout=10)
return Image.open(BytesIO(response.content))
except (RequestConnectionError, requests.Timeout, ReadTimeout):
if attempt == 0:
time.sleep(10)
else:
return None
except UnidentifiedImageError:
return None
@staticmethod
def save_img(img: Image.Image, entries: list[tuple[Path, int | None]]):
"""
Saves the image to the destinations.
"""
ratio = img.width / img.height
for entry in entries:
path, size = entry
if size is None:
img.save(path, format="webp")
continue
img.resize((size, int(size / ratio)), Image.LANCZOS).save(
path, format="webp"
)
class CheckArtistImages:
def __init__(self):
# read all files in the artist image folder
path = settings.Paths.get_sm_artist_img_path()
processed = set(i.replace(".webp", "") for i in os.listdir(path))
unprocessed = [
a for a in ArtistStore.get_flat_list() if a.artisthash not in processed
]
# Use number of CPU cores minus 1 to leave one core free for system processes
num_workers = max(1, os.cpu_count() // 2)
with ProcessPoolExecutor(max_workers=num_workers) as executor:
res = list(
tqdm(
executor.map(self.download_image, unprocessed),
total=len(unprocessed),
desc="Downloading missing artist images",
)
)
list(res)
@staticmethod
def download_image(artist: Artist):
"""
Checks if an artist image exists and downloads it if not.
:param artist: The artist name
"""
img_path = (
Path(settings.Paths.get_sm_artist_img_path()) / f"{artist.artisthash}.webp"
)
if img_path.exists():
return
url = get_artist_image_link(artist.name)
if url is not None:
return DownloadImage(url, name=f"{artist.artisthash}.webp")