fix: slow folder track count

+ etc
This commit is contained in:
cwilvx
2024-06-27 00:02:08 +03:00
parent 54a1b85d8b
commit 1a66194c6c
13 changed files with 183 additions and 95 deletions
+125 -25
View File
@@ -1,7 +1,11 @@
from concurrent.futures import ThreadPoolExecutor
import json
import os
from pathlib import Path
from pprint import pprint
from typing import Any, Optional
from memory_profiler import profile
from sqlalchemy import (
JSON,
Boolean,
@@ -27,32 +31,83 @@ from app.models import Album as AlbumModel
from app.models import Artist as ArtistModel
from app.utils.remove_duplicates import remove_duplicates
fullpath = "/home/cwilvx/temp/swingmusic/swing.db"
engine = create_engine(f"sqlite+pysqlite:///{fullpath}", echo=False)
engine = create_engine(
f"sqlite+pysqlite:///{fullpath}",
echo=False,
max_overflow=0,
pool_size=5,
)
if not os.path.exists(fullpath):
os.makedirs(Path(fullpath).parent)
connection = engine.connect()
all_filepaths = list()
def todict(track: Any):
return track._asdict()
def getIndexOfFirstMatch(strings: list[str], prefix: str):
"""
Find the index of the first path that starts with the given path.
Uses a binary search algorithm to find the index.
"""
left = 0
right = len(strings) - 1
while left <= right:
mid = (left + right) // 2
if strings[mid].startswith(prefix):
if mid == 0 or not strings[mid - 1].startswith(prefix):
return mid
right = mid - 1
elif strings[mid] < prefix:
left = mid + 1
else:
right = mid - 1
return -1
def todicts(tracks: list[Any]):
return [todict(track) for track in tracks]
def countFilepathsInDir(dirpath: str):
"""
Return all the filepaths in a directory.
"""
global all_filepaths
index = getIndexOfFirstMatch(all_filepaths, dirpath)
if index == -1:
return 0
paths: list[str] = []
for path in all_filepaths[index:]:
if path.startswith(dirpath):
paths.append(path)
else:
break
return len(paths)
class DbManager:
def __init__(self, commit: bool = False):
self.commit = commit
self.engine = create_engine(f"sqlite+pysqlite:///{fullpath}", echo=True)
self.conn = self.engine.connect()
# self.engine = create_engine(f"sqlite+pysqlite:///{fullpath}", echo=True)
# self.conn = self.engine.connect()
# pass
def __enter__(self):
return self.conn.execution_options(preserve_rowcount=True)
# return self.conn.execution_options(preserve_rowcount=True)
return connection
def __exit__(self, exc_type, exc_val, exc_tb):
if self.commit:
self.conn.commit()
self.conn.close()
connection.commit()
# self.conn.close()
class Base(MappedAsDataclass, DeclarativeBase):
@@ -98,8 +153,13 @@ class ArtistTable(Base):
@classmethod
def get_all(cls, start: int, limit: int):
with DbManager() as conn:
result = conn.execute(select(cls).offset(start).limit(limit))
return albums_to_dataclasses(result.fetchall())
if start == 0:
result = conn.execute(select(cls))
else:
result = conn.execute(select(cls).offset(start).limit(limit))
all = result.fetchall()
return artists_to_dataclasses(all), len(all)
@classmethod
def get_artist_by_hash(cls, artisthash: str):
@@ -149,8 +209,14 @@ class AlbumTable(Base):
@classmethod
def get_all(cls, start: int, limit: int):
with DbManager() as conn:
result = conn.execute(select(AlbumTable).offset(start).limit(limit))
return albums_to_dataclasses(result.fetchall())
if start == 0:
result = conn.execute(select(AlbumTable))
else:
result = conn.execute(select(AlbumTable).offset(start).limit(limit))
all = result.fetchall()
return albums_to_dataclasses(all)[:limit], len(all)
@classmethod
def get_albums_by_artisthashes(cls, artisthashes: list[dict[str, str]]):
@@ -164,7 +230,6 @@ class AlbumTable(Base):
)
albums.extend(albums_to_dataclasses(result.fetchall()))
print(albums)
return albums
@classmethod
@@ -198,7 +263,7 @@ class TrackTable(Base):
date: Mapped[int] = mapped_column(Integer())
disc: Mapped[int] = mapped_column(Integer())
duration: Mapped[int] = mapped_column(Integer())
filepath: Mapped[str] = mapped_column(String(), unique=True)
filepath: Mapped[str] = mapped_column(String(), index=True, unique=True)
folder: Mapped[str] = mapped_column(String(), index=True)
genre: Mapped[Optional[list[dict[str, str]]]] = mapped_column(JSON())
last_mod: Mapped[float] = mapped_column(Integer())
@@ -211,23 +276,21 @@ class TrackTable(Base):
@classmethod
def get_tracks_by_filepaths(cls, filepaths: list[str]):
print(filepaths[0])
with DbManager() as conn:
result = conn.execute(
select(TrackTable).where(TrackTable.filepath.in_(filepaths))
)
return [dict(r) for r in result.mappings().fetchall()]
return tracks_to_dataclasses(result.fetchall())
@classmethod
def count_tracks_containing_paths(cls, paths: list[str]):
results: list[dict[str, int | str]] = []
with DbManager() as conn:
for path in paths:
result = conn.execute(
select(TrackTable).where(TrackTable.filepath.contains(path))
)
results.append({"path": path, "trackcount": result.all().__len__()})
with ThreadPoolExecutor() as executor:
res = executor.map(countFilepathsInDir, paths)
results = [
{"path": path, "trackcount": count} for path, count in zip(paths, res)
]
return results
@@ -272,6 +335,43 @@ class TrackTable(Base):
)
return tracks_to_dataclasses(result.fetchall())
@classmethod
def get_tracks_in_path(cls, path: str):
with DbManager() as conn:
result = conn.execute(
select(TrackTable)
.where(TrackTable.filepath.contains(path))
.order_by(TrackTable.last_mod)
)
return tracks_to_dataclasses(result.fetchall())
all_tracks = TrackTable.get_all()
for track in all_tracks:
all_filepaths.append(track.filepath)
all_filepaths.sort()
# print("files in path: ",getFilepathsInDir("/home/cwilvx/Music/").__len__())
# SECTION: Userdata database
class UserTable(Base):
__tablename__ = "user"
id: Mapped[int] = mapped_column(primary_key=True)
username: Mapped[str] = mapped_column(String(), unique=True)
firstname: Mapped[Optional[str]] = mapped_column(String())
lastname: Mapped[Optional[str]] = mapped_column(String())
password: Mapped[str] = mapped_column(String())
email: Mapped[Optional[str]] = mapped_column(String())
image: Mapped[Optional[str]] = mapped_column(String())
roles: Mapped[list[str]] = mapped_column(JSON(), default_factory=lambda: ["user"])
extra: Mapped[Optional[dict[str, Any]]] = mapped_column(
JSON(), default_factory=dict
)
# SECTION: HELPER FUNCTIONS