rewrite populate.py to minimize db and hdd reads

+ change process name in manage.py
+ update db query to save show albums as single, as an integer
+ enable periodic scans
+ misc
This commit is contained in:
mungai-njoroge
2023-09-11 11:37:36 +03:00
parent e9284de91f
commit bacf68248b
10 changed files with 143 additions and 19 deletions
+3 -1
View File
@@ -228,6 +228,8 @@ def get_artist(artisthash: str):
except ValueError:
year = 0
decade = None
if year:
decade = math.floor(year / 10) * 10
decade = str(decade)[2:] + "s"
@@ -273,8 +275,8 @@ def get_artist_albums(artisthash: str):
eps = [a for a in all_albums if a.is_EP]
def remove_EPs_and_singles(albums_: list[Album]):
albums_ = [a for a in albums_ if not a.is_EP]
albums_ = [a for a in albums_ if not a.is_single]
albums_ = [a for a in albums_ if not a.is_EP]
return albums_
albums = filter(lambda a: artisthash in a.albumartists_hashes, all_albums)
+1 -1
View File
@@ -32,7 +32,7 @@ CREATE TABLE IF NOT EXISTS settings (
clean_album_title integer NOT NULL DEFAULT 1,
remove_remaster integer NOT NULL DEFAULT 1,
merge_albums integer NOT NULL DEFAULT 0,
show_albums_as_singles NOT NULL DEFAULT 0
show_albums_as_singles integer NOT NULL DEFAULT 0
);
CREATE TABLE IF NOT EXISTS lastfm_similar_artists (
+13 -3
View File
@@ -1,3 +1,4 @@
import os
import urllib
from concurrent.futures import ThreadPoolExecutor
from io import BytesIO
@@ -79,15 +80,24 @@ class CheckArtistImages:
global CHECK_ARTIST_IMAGES_KEY
CHECK_ARTIST_IMAGES_KEY = instance_key
key_artist_map = (
(instance_key, artist) for artist in artist_store.ArtistStore.artists
# read all files in the artist image folder
path = settings.Paths.get_artist_img_sm_path()
processed = "".join(os.listdir(path)).replace("webp", "")
# filter out artists that already have an image
artists = filter(
lambda a: a.artisthash not in processed, artist_store.ArtistStore.artists
)
artists = list(artists)
# process the rest
key_artist_map = ((instance_key, artist) for artist in artists)
with ThreadPoolExecutor(max_workers=4) as executor:
res = list(
tqdm(
executor.map(self.download_image, key_artist_map),
total=len(artist_store.ArtistStore.artists),
total=len(artists),
desc="Downloading missing artist images",
)
)
+28 -6
View File
@@ -101,9 +101,7 @@ class Populate:
"Internet connection lost. Downloading artist images stopped."
)
else:
log.warning(
f"No internet connection. Downloading artist images stopped!"
)
log.warning(f"No internet connection. Downloading artist images stopped!")
# Re-process the new artist images.
if tried_to_download_new_images:
@@ -247,13 +245,29 @@ class ProcessTrackThumbnails:
"""
def __init__(self, instance_key: str) -> None:
key_album_map = ((instance_key, album) for album in AlbumStore.albums)
"""
Filters out albums that already have thumbnails and
extracts the thumbnail for the other albums.
"""
path = settings.Paths.get_sm_thumb_path()
# read all the files in the thumbnail directory
processed = "".join(os.listdir(path)).replace("webp", "")
# filter out albums that already have thumbnails
albums = filter(
lambda album: album.albumhash not in processed, AlbumStore.albums
)
albums = list(albums)
# process the rest
key_album_map = ((instance_key, album) for album in albums)
with ThreadPoolExecutor(max_workers=CPU_COUNT) as executor:
results = list(
tqdm(
executor.map(get_image, key_album_map),
total=len(AlbumStore.albums),
total=len(albums),
desc="Extracting track images",
)
)
@@ -291,7 +305,15 @@ class FetchSimilarArtistsLastFM:
"""
def __init__(self, instance_key: str) -> None:
artists = ArtistStore.artists
# read all artists from db
processed = lastfmdb.get_all()
processed = ".".join(a.artisthash for a in processed)
# filter out artists that already have similar artists
artists = filter(lambda a: a.artisthash not in processed, ArtistStore.artists)
artists = list(artists)
# process the rest
key_artist_map = ((instance_key, artist) for artist in artists)
with ThreadPoolExecutor(max_workers=CPU_COUNT) as executor:
+1 -1
View File
@@ -297,7 +297,7 @@ class UpdateAppSettingsTable(Migration):
clean_album_title integer NOT NULL DEFAULT 1,
remove_remaster integer NOT NULL DEFAULT 1,
merge_albums integer NOT NULL DEFAULT 0,
show_albums_as_singles NOT NULL DEFAULT 0
show_albums_as_singles integer NOT NULL DEFAULT 0
);
"""
+1 -1
View File
@@ -28,7 +28,7 @@ local audio files. Like a cooler Spotify ... but bring your own music.
Usage: swingmusic [options]
{tabulate(help_args_list, headers=["Option", "Short", "Description"], tablefmt="rounded_grid", maxcolwidths=[None, None, 44])}
{tabulate(help_args_list, headers=["Option", "Short", "Description"], tablefmt="markdown", maxcolwidths=[None, None, 44])}
"""
"80s, 90s, the noughties and today"
+3 -5
View File
@@ -168,7 +168,7 @@ class SessionVars:
CLEAN_ALBUM_TITLE = True
REMOVE_REMASTER_FROM_TRACK = True
DO_PERIODIC_SCANS = False
DO_PERIODIC_SCANS = True
PERIODIC_SCAN_INTERVAL = 600 # 10 minutes
"""
The interval between periodic scans in seconds.
@@ -176,7 +176,7 @@ class SessionVars:
MERGE_ALBUM_VERSIONS = False
ARTIST_SEPARATORS = set()
SHOW_ALBUMS_AS_SINGLES = True
SHOW_ALBUMS_AS_SINGLES = False
# TODO: Find a way to eliminate this class without breaking typings
@@ -189,9 +189,7 @@ class SessionVarKeys:
PERIODIC_SCAN_INTERVAL = "PERIODIC_SCAN_INTERVAL"
MERGE_ALBUM_VERSIONS = "MERGE_ALBUM_VERSIONS"
ARTIST_SEPARATORS = "ARTIST_SEPARATORS"
SHOW_ALBUMS_AS_SINGLES = (
"SHOW_ALBUMS_AS_SINGLES"
)
SHOW_ALBUMS_AS_SINGLES = "SHOW_ALBUMS_AS_SINGLES"
def get_flag(key: SessionVarKeys) -> bool: