rewrite remove duplicates to support removing duplicates in albums tracks efficiently

+ remove flags added to client settings page
+ misc
This commit is contained in:
mungai-njoroge
2023-08-29 20:04:30 +03:00
parent 26e36ba36f
commit 13475b0630
16 changed files with 118 additions and 95 deletions
+2 -2
View File
@@ -1,14 +1,14 @@
import re
from app.enums.album_versions import AlbumVersionEnum
from app.settings import get_flag, ParserFlags
from app.settings import SessionVarKeys, get_flag
def split_artists(src: str):
"""
Splits a string of artists into a list of artists.
"""
separators: set = get_flag(ParserFlags.ARTIST_SEPARATORS)
separators: set = get_flag(SessionVarKeys.ARTIST_SEPARATORS)
separators = separators.union({","})
for sep in separators:
+31 -4
View File
@@ -2,21 +2,48 @@ from collections import defaultdict
from operator import attrgetter
from app.models import Track
from app.utils.hashing import create_hash
def remove_duplicates(tracks: list[Track]) -> list[Track]:
def remove_duplicates(tracks: list[Track], is_album_tracks=False) -> list[Track]:
"""
Remove duplicates from a list of Track objects based on the trackhash attribute.
Retain objects with the highest bitrate.
"""
hash_to_tracks = defaultdict(list)
tracks_dict = defaultdict(list)
# if is_album_tracks, sort by disc and track number
if is_album_tracks:
for t in tracks:
# _pos is used for sorting tracks by disc and track number
t._pos = int(f"{t.disc}{str(t.track).zfill(3)}")
# _ati is used to remove duplicates when merging album versions
t._ati = f"{t._pos}{create_hash(t.title)}"
# create groups of tracks with the same _ati
for track in tracks:
tracks_dict[track._ati].append(track)
tracks = []
# pick the track with max bitrate for each group
for track_group in tracks_dict.values():
max_bitrate_track = max(track_group, key=attrgetter("bitrate"))
tracks.append(max_bitrate_track)
return sorted(tracks, key=lambda t: t._pos)
# else, sort by trackhash
for track in tracks:
hash_to_tracks[track.trackhash].append(track)
# create groups of tracks with the same trackhash
tracks_dict[track.trackhash].append(track)
tracks = []
for track_group in hash_to_tracks.values():
# pick the track with max bitrate for each trackhash group
for track_group in tracks_dict.values():
max_bitrate_track = max(track_group, key=attrgetter("bitrate"))
tracks.append(max_bitrate_track)