fix: slow folder track count

+ etc
2026-07-29 15:13:46 +00:00 · 2024-06-27 00:02:08 +03:00
parent 54a1b85d8b
commit 1a66194c6c
13 changed files with 183 additions and 95 deletions
@@ -1,7 +1,11 @@
+from concurrent.futures import ThreadPoolExecutor
 import json
+import os
+from pathlib import Path
 from pprint import pprint
 from typing import Any, Optional

+from memory_profiler import profile
 from sqlalchemy import (
    JSON,
    Boolean,
@@ -27,32 +31,83 @@ from app.models import Album as AlbumModel
 from app.models import Artist as ArtistModel
 from app.utils.remove_duplicates import remove_duplicates

-
 fullpath = "/home/cwilvx/temp/swingmusic/swing.db"
-engine = create_engine(f"sqlite+pysqlite:///{fullpath}", echo=False)
+engine = create_engine(
+    f"sqlite+pysqlite:///{fullpath}",
+    echo=False,
+    max_overflow=0,
+    pool_size=5,
+)
+
+if not os.path.exists(fullpath):
+    os.makedirs(Path(fullpath).parent)
+
+connection = engine.connect()
+all_filepaths = list()


-def todict(track: Any):
-    return track._asdict()
+def getIndexOfFirstMatch(strings: list[str], prefix: str):
+    """
+    Find the index of the first path that starts with the given path.
+
+    Uses a binary search algorithm to find the index.
+    """
+
+    left = 0
+    right = len(strings) - 1
+
+    while left <= right:
+        mid = (left + right) // 2
+
+        if strings[mid].startswith(prefix):
+            if mid == 0 or not strings[mid - 1].startswith(prefix):
+                return mid
+            right = mid - 1
+        elif strings[mid] < prefix:
+            left = mid + 1
+        else:
+            right = mid - 1
+
+    return -1


-def todicts(tracks: list[Any]):
-    return [todict(track) for track in tracks]
+def countFilepathsInDir(dirpath: str):
+    """
+    Return all the filepaths in a directory.
+    """
+    global all_filepaths
+    index = getIndexOfFirstMatch(all_filepaths, dirpath)
+
+    if index == -1:
+        return 0
+
+    paths: list[str] = []
+
+    for path in all_filepaths[index:]:
+        if path.startswith(dirpath):
+            paths.append(path)
+        else:
+            break
+
+    return len(paths)


 class DbManager:
    def __init__(self, commit: bool = False):
        self.commit = commit
-        self.engine = create_engine(f"sqlite+pysqlite:///{fullpath}", echo=True)
-        self.conn = self.engine.connect()
+        # self.engine = create_engine(f"sqlite+pysqlite:///{fullpath}", echo=True)
+        # self.conn = self.engine.connect()
+        # pass

    def __enter__(self):
-        return self.conn.execution_options(preserve_rowcount=True)
+        # return self.conn.execution_options(preserve_rowcount=True)
+        return connection

    def __exit__(self, exc_type, exc_val, exc_tb):
        if self.commit:
-            self.conn.commit()
-        self.conn.close()
+            connection.commit()
+
+        # self.conn.close()


 class Base(MappedAsDataclass, DeclarativeBase):
@@ -98,8 +153,13 @@ class ArtistTable(Base):
    @classmethod
    def get_all(cls, start: int, limit: int):
        with DbManager() as conn:
-            result = conn.execute(select(cls).offset(start).limit(limit))
-            return albums_to_dataclasses(result.fetchall())
+            if start == 0:
+                result = conn.execute(select(cls))
+            else:
+                result = conn.execute(select(cls).offset(start).limit(limit))
+
+            all = result.fetchall()
+            return artists_to_dataclasses(all), len(all)

    @classmethod
    def get_artist_by_hash(cls, artisthash: str):
@@ -149,8 +209,14 @@ class AlbumTable(Base):
    @classmethod
    def get_all(cls, start: int, limit: int):
        with DbManager() as conn:
-            result = conn.execute(select(AlbumTable).offset(start).limit(limit))
-            return albums_to_dataclasses(result.fetchall())
+            if start == 0:
+                result = conn.execute(select(AlbumTable))
+            else:
+                result = conn.execute(select(AlbumTable).offset(start).limit(limit))
+
+            all = result.fetchall()
+
+            return albums_to_dataclasses(all)[:limit], len(all)

    @classmethod
    def get_albums_by_artisthashes(cls, artisthashes: list[dict[str, str]]):
@@ -164,7 +230,6 @@ class AlbumTable(Base):
                )
                albums.extend(albums_to_dataclasses(result.fetchall()))

-            print(albums)
            return albums

    @classmethod
@@ -198,7 +263,7 @@ class TrackTable(Base):
    date: Mapped[int] = mapped_column(Integer())
    disc: Mapped[int] = mapped_column(Integer())
    duration: Mapped[int] = mapped_column(Integer())
-    filepath: Mapped[str] = mapped_column(String(), unique=True)
+    filepath: Mapped[str] = mapped_column(String(), index=True, unique=True)
    folder: Mapped[str] = mapped_column(String(), index=True)
    genre: Mapped[Optional[list[dict[str, str]]]] = mapped_column(JSON())
    last_mod: Mapped[float] = mapped_column(Integer())
@@ -211,23 +276,21 @@ class TrackTable(Base):

    @classmethod
    def get_tracks_by_filepaths(cls, filepaths: list[str]):
-        print(filepaths[0])
        with DbManager() as conn:
            result = conn.execute(
                select(TrackTable).where(TrackTable.filepath.in_(filepaths))
            )
-            return [dict(r) for r in result.mappings().fetchall()]
+            return tracks_to_dataclasses(result.fetchall())

    @classmethod
    def count_tracks_containing_paths(cls, paths: list[str]):
        results: list[dict[str, int | str]] = []

-        with DbManager() as conn:
-            for path in paths:
-                result = conn.execute(
-                    select(TrackTable).where(TrackTable.filepath.contains(path))
-                )
-                results.append({"path": path, "trackcount": result.all().__len__()})
+        with ThreadPoolExecutor() as executor:
+            res = executor.map(countFilepathsInDir, paths)
+            results = [
+                {"path": path, "trackcount": count} for path, count in zip(paths, res)
+            ]

        return results

@@ -272,6 +335,43 @@ class TrackTable(Base):
            )
            return tracks_to_dataclasses(result.fetchall())

+    @classmethod
+    def get_tracks_in_path(cls, path: str):
+        with DbManager() as conn:
+            result = conn.execute(
+                select(TrackTable)
+                .where(TrackTable.filepath.contains(path))
+                .order_by(TrackTable.last_mod)
+            )
+            return tracks_to_dataclasses(result.fetchall())
+
+
+all_tracks = TrackTable.get_all()
+
+for track in all_tracks:
+    all_filepaths.append(track.filepath)
+
+all_filepaths.sort()
+
+# print("files in path: ",getFilepathsInDir("/home/cwilvx/Music/").__len__())
+
+
+# SECTION: Userdata database
+class UserTable(Base):
+    __tablename__ = "user"
+
+    id: Mapped[int] = mapped_column(primary_key=True)
+    username: Mapped[str] = mapped_column(String(), unique=True)
+    firstname: Mapped[Optional[str]] = mapped_column(String())
+    lastname: Mapped[Optional[str]] = mapped_column(String())
+    password: Mapped[str] = mapped_column(String())
+    email: Mapped[Optional[str]] = mapped_column(String())
+    image: Mapped[Optional[str]] = mapped_column(String())
+    roles: Mapped[list[str]] = mapped_column(JSON(), default_factory=lambda: ["user"])
+    extra: Mapped[Optional[dict[str, Any]]] = mapped_column(
+        JSON(), default_factory=dict
+    )
+

 # SECTION: HELPER FUNCTIONS