start: rewrite the database layer using a freaking ORM

+ start ditching in-mem stores
+ move main db table to a new name
+ experiments!
This commit is contained in:
cwilvx
2024-06-24 00:26:47 +03:00
parent c3472a865a
commit c42ec4dcde
27 changed files with 1399 additions and 397 deletions
+45 -30
View File
@@ -8,6 +8,7 @@ from app.settings import SUPPORTED_FILES
from app.utils.wintools import win_replace_slash
from app.store.tracks import TrackStore
from app.db import TrackTable as TrackDB
def create_folder(path: str, trackcount=0, foldercount=0) -> Folder:
@@ -37,44 +38,52 @@ def get_first_child_from_path(root: str, maybe_child: str):
return os.path.join(root, first)
def get_folders(paths: list[str]):
"""
Filters out folders that don't have any tracks and
returns a list of folder objects.
"""
count_dict = {
"tracks": {path: 0 for path in paths},
# folders are immediate children of the root folder
"folders": {path: set() for path in paths},
}
for track in TrackStore.tracks:
for path in paths:
# a child path should be longer than the root path
if len(track.folder) >= len(path) and track.folder.startswith(path):
count_dict["tracks"][path] += 1
# counting subfolders
p = get_first_child_from_path(path, track.folder)
if p:
count_dict["folders"][path].add(p)
folders = [
{
"path": path,
"trackcount": count_dict["tracks"][path],
"foldercount": len(count_dict["folders"][path]),
}
for path in paths
]
folders = TrackDB.count_tracks_containing_paths(paths)
return [
create_folder(f["path"], f["trackcount"], f["foldercount"])
create_folder(f["path"], f["trackcount"], foldercount=0)
for f in folders
if f["trackcount"] > 0
]
# count_dict = {
# "tracks": {path: 0 for path in paths},
# # folders are immediate children of the root folder
# "folders": {path: set() for path in paths},
# }
# for track in TrackStore.tracks:
# for path in paths:
# # a child path should be longer than the root path
# if len(track.folder) >= len(path) and track.folder.startswith(path):
# count_dict["tracks"][path] += 1
# # counting subfolders
# p = get_first_child_from_path(path, track.folder)
# if p:
# count_dict["folders"][path].add(p)
# folders = [
# {
# "path": path,
# "trackcount": count_dict["tracks"][path],
# "foldercount": len(count_dict["folders"][path]),
# }
# for path in paths
# ]
# return [
# create_folder(f["path"], f["trackcount"], f["foldercount"])
# for f in folders
# if f["trackcount"] > 0
# ]
class GetFilesAndDirs:
@@ -131,7 +140,13 @@ class GetFilesAndDirs:
files_.sort(key=lambda f: f["time"])
files = [f["path"] for f in files_]
tracks = TrackStore.get_tracks_by_filepaths(files)
tracks = []
if files:
tracks = TrackDB.get_tracks_by_filepaths(files)
print("printing files")
print(tracks)
# tracks = TrackStore.get_tracks_by_filepaths(files)
folders = []
if not self.tracks_only:
@@ -145,7 +160,7 @@ class GetFilesAndDirs:
return {
"path": path,
"tracks": serialize_tracks(tracks),
"tracks": tracks,
"folders": folders,
}
+36 -34
View File
@@ -7,6 +7,7 @@ from requests import ConnectionError as RequestConnectionError
from requests import ReadTimeout
from app import settings
from app.db import TrackTable
from app.db.sqlite.favorite import SQLiteFavoriteMethods as favdb
from app.db.sqlite.lastfm.similar_artists import SQLiteLastFMSimilarArtists as lastfmdb
from app.db.sqlite.settings import SettingsSQLMethods as sdb
@@ -121,14 +122,14 @@ class Populate:
return
@staticmethod
def remove_modified(tracks: Generator[Track, None, None]):
def remove_modified(tracks: Generator[TrackTable, None, None]):
"""
Removes tracks from the database that have been modified
since they were added to the database.
"""
unmodified_paths = set()
modified_tracks: list[Track] = []
modified_tracks: list[TrackTable] = []
modified_paths = set()
for track in tracks:
@@ -151,18 +152,6 @@ class Populate:
@staticmethod
def tag_untagged(untagged: set[str], key: str):
log.info("Found %s new tracks", len(untagged))
tagged_tracks: deque[dict] = deque()
tagged_count = 0
favs = favdb.get_fav_tracks()
records = dict()
for fav in favs:
r = records.setdefault(fav[1], set())
r.add(fav[4])
for file in tqdm(untagged, desc="Reading files"):
if POPULATE_KEY != key:
log.warning("'Populate.tag_untagged': Populate key changed")
@@ -171,36 +160,49 @@ class Populate:
tags = get_tags(file)
if tags is not None:
tagged_tracks.append(tags)
track = Track(**tags)
TrackTable.insert_one(tags)
track.fav_userids = list(records.get(track.trackhash, set()))
# log.info("Found %s new tracks", len(untagged))
# # tagged_tracks: deque[dict] = deque()
# # tagged_count = 0
TrackStore.add_track(track)
# favs = favdb.get_fav_tracks()
# records = dict()
if not AlbumStore.album_exists(track.albumhash):
AlbumStore.add_album(AlbumStore.create_album(track))
# for fav in favs:
# r = records.setdefault(fav[1], set())
# r.add(fav[4])
for artist in track.artists:
if not ArtistStore.artist_exists(artist.artisthash):
ArtistStore.add_artist(Artist(artist.name))
# tagged_tracks.append(tags)
# track = Track(**tags)
for artist in track.albumartists:
if not ArtistStore.artist_exists(artist.artisthash):
ArtistStore.add_artist(Artist(artist.name))
# track.fav_userids = list(records.get(track.trackhash, set()))
tagged_count += 1
else:
log.warning("Could not read file: %s", file)
# TrackStore.add_track(track)
if len(tagged_tracks) > 0:
log.info("Adding %s tracks to database", len(tagged_tracks))
insert_many_tracks(tagged_tracks)
# if not AlbumStore.album_exists(track.albumhash):
# AlbumStore.add_album(AlbumStore.create_album(track))
log.info("Added %s/%s tracks", tagged_count, len(untagged))
# for artist in track.artists:
# if not ArtistStore.artist_exists(artist.artisthash):
# ArtistStore.add_artist(Artist(artist.name))
# for artist in track.albumartists:
# if not ArtistStore.artist_exists(artist.artisthash):
# ArtistStore.add_artist(Artist(artist.name))
# tagged_count += 1
# else:
# log.warning("Could not read file: %s", file)
# if len(tagged_tracks) > 0:
# log.info("Adding %s tracks to database", len(tagged_tracks))
# insert_many_tracks(tagged_tracks)
# log.info("Added %s/%s tracks", tagged_count, len(untagged))
@staticmethod
def extract_thumb_with_overwrite(tracks: list[Track]):
def extract_thumb_with_overwrite(tracks: list[TrackTable]):
"""
Extracts the thumbnail from a list of filepaths,
overwriting the existing thumbnail if it exists,
+1 -1
View File
@@ -195,7 +195,7 @@ class TopResults:
except AttributeError:
item.duration = 0
item.check_is_single(tracks)
item.is_single(tracks)
if not item.is_single:
item.check_type()
+154
View File
@@ -0,0 +1,154 @@
from pprint import pprint
from app.db import AlbumTable, ArtistTable, TrackTable
from app.lib.taglib import get_tags
from app.utils.filesystem import run_fast_scandir
from app.utils.parsers import get_base_album_title
from app.utils.progressbar import tqdm
class IndexTracks:
def __init__(self) -> None:
dirs_to_scan = ["/home/cwilvx/Music"]
files = set()
for _dir in dirs_to_scan:
files = files.union(run_fast_scandir(_dir, full=True)[1])
self.tag_untagged(files)
# unmodified, modified_tracks = self.remove_modified(tracks)
# untagged = files - unmodified
def tag_untagged(self, files: set[str]):
for file in tqdm(files, desc="Reading files"):
# if POPULATE_KEY != key:
# log.warning("'Populate.tag_untagged': Populate key changed")
# return
tags = get_tags(file)
if tags is not None:
TrackTable.insert_one(tags)
class IndexAlbums:
def __init__(self) -> None:
albums = dict()
all_tracks: list[TrackTable] = TrackTable.get_all()
for track in all_tracks:
if track.albumhash not in albums:
albums[track.albumhash] = {
"albumartists": track.albumartists,
"albumhash": track.albumhash,
"base_title": None,
"color": None,
"created_date": None,
"date": None,
"duration": track.duration,
"genres": [*track.genre] if track.genre else [],
"og_title": track.og_album,
"title": track.album,
"trackcount": 1,
"dates": [track.date],
"created_dates": [track.last_mod],
}
else:
album = albums[track.albumhash]
album["trackcount"] += 1
album["duration"] += track.duration
album["dates"].append(track.date)
album["created_dates"].append(track.last_mod)
if track.genre:
album["genres"].append(track.genre)
for album in albums.values():
album["date"] = min(album["dates"])
album["created_date"] = min(album["created_dates"])
genres = []
for genre in album["genres"]:
if genre not in genres:
genres.append(genre)
album["genres"] = genres
album["base_title"], _ = get_base_album_title(album["og_title"])
del album["dates"]
del album["created_dates"]
pprint(albums)
AlbumTable.insert_many(list(albums.values()))
class IndexArtists:
def __init__(self) -> None:
all_tracks: list[TrackTable] = TrackTable.get_all()
artists = dict()
for track in all_tracks:
this_artists = track.artists
for a in track.albumartists:
if a not in this_artists:
this_artists.append(a)
for artist in this_artists:
if artist["artisthash"] not in artists:
artists[artist["artisthash"]] = {
"albumcount": None,
"albums": {track.albumhash},
"artisthash": artist["artisthash"],
"created_dates": [track.last_mod],
"dates": [track.date],
"date": None,
"duration": track.duration,
"genres": [*track.genre] if track.genre else [],
"name": artist["name"],
"trackcount": None,
"tracks": {track.trackhash},
}
else:
artist = artists[artist["artisthash"]]
artist["duration"] += track.duration
artist["albums"].add(track.albumhash)
artist["tracks"].add(track.trackhash)
artist["dates"].append(track.date)
artist["created_dates"].append(track.last_mod)
if track.genre:
artist["genres"].append(track.genre)
for artist in artists.values():
artist["albumcount"] = len(artist["albums"])
artist["trackcount"] = len(artist["tracks"])
artist["date"] = min(artist["dates"])
artist["created_date"] = min(artist["created_dates"])
genres = []
for genre in artist["genres"]:
if genre not in genres:
genres.append(genre)
artist["genres"] = genres
del artist["tracks"]
del artist["albums"]
del artist["dates"]
del artist["created_dates"]
pprint(artists)
ArtistTable.insert_many(list(artists.values()))
class IndexEverything:
def __init__(self) -> None:
# IndexTracks()
# IndexAlbums()
# IndexArtists()
pass
+100 -4
View File
@@ -8,9 +8,16 @@ import pendulum
from PIL import Image, UnidentifiedImageError
from tinytag import TinyTag
from app.config import UserConfig
from app.settings import Defaults, Paths
from app.utils.hashing import create_hash
from app.utils.parsers import split_artists
from app.utils.parsers import (
clean_title,
get_base_title_and_versions,
parse_feat_from_title,
remove_prod,
split_artists,
)
from app.utils.wintools import win_replace_slash
@@ -206,9 +213,7 @@ def get_tags(filepath: str):
except KeyError:
tags.copyright = None
tags.albumhash = create_hash(tags.album, tags.albumartist)
tags.trackhash = create_hash(tags.artist, tags.album, tags.title)
tags.image = f"{tags.albumhash}.webp"
# tags.image = f"{tags.albumhash}.webp"
tags.folder = win_replace_slash(os.path.dirname(filepath))
tags.date = parse_date(tags.year) or int(last_mod)
@@ -218,9 +223,100 @@ def get_tags(filepath: str):
tags.artists = tags.artist
tags.albumartists = tags.albumartist
split_artist = split_artists(tags.artist)
split_albumartists = split_artists(tags.albumartist)
new_title = tags.title
# TODO: Figure out which is the best spot to create these hashes
# create albumhash using og_album
tags.albumhash = create_hash(tags.album or "", tags.albumartist)
config = UserConfig()
# extract featured artists
if config.extractFeaturedArtists:
feat, new_title = parse_feat_from_title(tags.title)
original_lower = "-".join([create_hash(a) for a in split_artist])
split_artist.extend(a for a in feat if create_hash(a) not in original_lower)
# if no albumartist, assign to the first artist
if not tags.albumartist:
tags.albumartist = split_artist[:1]
# create json objects for artists and albumartists
tags.artists = [
{
"artisthash": create_hash(a, decode=True),
"name": a,
}
for a in split_artist
]
tags.albumartists = [
{
"artisthash": create_hash(a, decode=True),
"name": a,
}
for a in split_albumartists
]
# remove prod by
if config.removeProdBy:
new_title = remove_prod(new_title)
# if track is a single, ie.
# if og_title == album, rename album to new_title
if tags.title == tags.album:
tags.album = new_title
# remove remaster from track title
if config.removeRemasterInfo:
new_title = clean_title(new_title)
# save final title
tags.og_title = tags.title
tags.title = new_title
tags.og_album = tags.album
# clean album title
if config.cleanAlbumTitle:
tags.album, _ = get_base_title_and_versions(tags.album, get_versions=False)
# merge album versions
if config.mergeAlbums:
tags.albumhash = create_hash(
tags.album, *(a["name"] for a in tags.albumartists)
)
# process genres
if tags.genre:
tags.genre = tags.genre.lower()
# separators = {"/", ";", "&"}
separators = set(config.genreSeparators)
contains_rnb = "r&b" in tags.genre
contains_rock = "rock & roll" in tags.genre
if contains_rnb:
tags.genre = tags.genre.replace("r&b", "RnB")
if contains_rock:
tags.genre = tags.genre.replace("rock & roll", "rock")
for s in separators:
tags.genre = tags.genre.replace(s, ",")
tags.genre = tags.genre.split(",")
tags.genre = [
{"name": g.strip(), "genrehash": create_hash(g.strip())} for g in tags.genre
]
# sub underscore with space
tags.title = tags.title.replace("_", " ")
tags.album = tags.album.replace("_", " ")
tags.trackhash = create_hash(
*[a["name"] for a in tags.artists], tags.album, tags.title
)
tags = tags.__dict__