fix: slow folder track count

+ etc
This commit is contained in:
cwilvx
2024-06-27 00:02:08 +03:00
parent 54a1b85d8b
commit 1a66194c6c
13 changed files with 183 additions and 95 deletions
+1 -1
View File
@@ -64,7 +64,7 @@ def create_api():
app = OpenAPI(__name__, info=api_info, doc_prefix="/docs")
# JWT CONFIGS
app.config["JWT_SECRET_KEY"] = UserConfig().userId
app.config["JWT_SECRET_KEY"] = UserConfig().serverId
app.config["JWT_TOKEN_LOCATION"] = ["cookies", "headers"]
app.config["JWT_COOKIE_CSRF_PROTECT"] = False
app.config["JWT_SESSION_COOKIE"] = False
+3 -5
View File
@@ -10,12 +10,13 @@ from pydantic import BaseModel, Field
from flask_openapi3 import Tag
from flask_openapi3 import APIBlueprint
from showinfm import show_in_file_manager
from memory_profiler import profile
from app import settings
from app.db import TrackTable
from app.db.sqlite.settings import SettingsSQLMethods as db
from app.lib.folderslib import GetFilesAndDirs, get_folders
from app.serializers.track import serialize_track
from app.store.tracks import TrackStore as store
from app.utils.wintools import is_windows, win_replace_slash
tag = Tag(name="Folders", description="Get folders and tracks in a directory")
@@ -66,9 +67,7 @@ def get_folder_tree(body: FolderTree):
else:
req_dir = "/" + req_dir if not req_dir.startswith("/") else req_dir
print('stuff!')
res = GetFilesAndDirs(req_dir, tracks_only=tracks_only)()
print(res['folders'])
res["folders"] = sorted(res["folders"], key=lambda i: i.name)
return res
@@ -183,8 +182,7 @@ def get_tracks_in_path(query: GetTracksInPathQuery):
Used when adding tracks to the queue.
"""
tracks = store.get_tracks_in_path(query.path)
tracks = sorted(tracks, key=lambda i: i.last_mod)
tracks = TrackTable.get_tracks_in_path(query.path)
tracks = (serialize_track(t) for t in tracks if Path(t.filepath).exists())
return {
+6 -5
View File
@@ -61,11 +61,11 @@ def get_all_items(path: GetAllItemsPath, query: GetAllItemsQuery):
is_artists = path.itemtype == "artists"
if is_albums:
items = AlbumTable.get_all(query.start, query.limit)
items, total = AlbumTable.get_all(query.start, query.limit)
elif is_artists:
items = ArtistTable.get_all(query.start, query.limit)
items, total = ArtistTable.get_all(query.start, query.limit)
print(items)
# print(items)
start = query.start
limit = query.limit
@@ -93,6 +93,7 @@ def get_all_items(path: GetAllItemsPath, query: GetAllItemsQuery):
for item in items:
item_dict = serialize_album(item) if is_albums else serialize_artist(item)
print(item_dict)
if sort_is_date:
item_dict["help_text"] = item.date
@@ -117,9 +118,9 @@ def get_all_items(path: GetAllItemsPath, query: GetAllItemsQuery):
if sort_is_artist_albumcount:
item_dict["help_text"] = (
f"{format_number(item['albumcount'])} album{'' if item['albumcount'] == 1 else 's'}"
f"{format_number(item.albumcount)} album{'' if item.albumcount == 1 else 's'}"
)
album_list.append(item_dict)
return {"items": album_list, "total": len(sorted_items)}
return {"items": album_list, "total": total}
+3 -3
View File
@@ -14,13 +14,13 @@ class UserConfig:
# auth stuff
# NOTE: Don't expose the userId via the API
userId: str = ""
serverId: str = ""
usersOnLogin: bool = True
# lists
rootDirs: list[str] = field(default_factory=list)
excludeDirs: list[str] = field(default_factory=list)
artistSeparators: set[str] = field(default_factory=list)
artistSeparators: set[str] = field(default_factory=set)
genreSeparators: set[str] = field(default_factory=lambda: {"/", ";", "&"})
# tracks
@@ -80,7 +80,7 @@ class UserConfig:
settings = {k: v for k, v in settings.items() if not k.startswith("_")}
with open(self._config_path, "w") as f:
json.dump(settings, f, indent=4)
json.dump(settings, f, indent=4, default=list)
def __setattr__(self, key: str, value: Any) -> None:
"""
+123 -23
View File
@@ -1,7 +1,11 @@
from concurrent.futures import ThreadPoolExecutor
import json
import os
from pathlib import Path
from pprint import pprint
from typing import Any, Optional
from memory_profiler import profile
from sqlalchemy import (
JSON,
Boolean,
@@ -27,32 +31,83 @@ from app.models import Album as AlbumModel
from app.models import Artist as ArtistModel
from app.utils.remove_duplicates import remove_duplicates
fullpath = "/home/cwilvx/temp/swingmusic/swing.db"
engine = create_engine(f"sqlite+pysqlite:///{fullpath}", echo=False)
engine = create_engine(
f"sqlite+pysqlite:///{fullpath}",
echo=False,
max_overflow=0,
pool_size=5,
)
if not os.path.exists(fullpath):
os.makedirs(Path(fullpath).parent)
connection = engine.connect()
all_filepaths = list()
def todict(track: Any):
return track._asdict()
def getIndexOfFirstMatch(strings: list[str], prefix: str):
"""
Find the index of the first path that starts with the given path.
Uses a binary search algorithm to find the index.
"""
left = 0
right = len(strings) - 1
while left <= right:
mid = (left + right) // 2
if strings[mid].startswith(prefix):
if mid == 0 or not strings[mid - 1].startswith(prefix):
return mid
right = mid - 1
elif strings[mid] < prefix:
left = mid + 1
else:
right = mid - 1
return -1
def todicts(tracks: list[Any]):
return [todict(track) for track in tracks]
def countFilepathsInDir(dirpath: str):
"""
Return all the filepaths in a directory.
"""
global all_filepaths
index = getIndexOfFirstMatch(all_filepaths, dirpath)
if index == -1:
return 0
paths: list[str] = []
for path in all_filepaths[index:]:
if path.startswith(dirpath):
paths.append(path)
else:
break
return len(paths)
class DbManager:
def __init__(self, commit: bool = False):
self.commit = commit
self.engine = create_engine(f"sqlite+pysqlite:///{fullpath}", echo=True)
self.conn = self.engine.connect()
# self.engine = create_engine(f"sqlite+pysqlite:///{fullpath}", echo=True)
# self.conn = self.engine.connect()
# pass
def __enter__(self):
return self.conn.execution_options(preserve_rowcount=True)
# return self.conn.execution_options(preserve_rowcount=True)
return connection
def __exit__(self, exc_type, exc_val, exc_tb):
if self.commit:
self.conn.commit()
self.conn.close()
connection.commit()
# self.conn.close()
class Base(MappedAsDataclass, DeclarativeBase):
@@ -98,8 +153,13 @@ class ArtistTable(Base):
@classmethod
def get_all(cls, start: int, limit: int):
with DbManager() as conn:
if start == 0:
result = conn.execute(select(cls))
else:
result = conn.execute(select(cls).offset(start).limit(limit))
return albums_to_dataclasses(result.fetchall())
all = result.fetchall()
return artists_to_dataclasses(all), len(all)
@classmethod
def get_artist_by_hash(cls, artisthash: str):
@@ -149,8 +209,14 @@ class AlbumTable(Base):
@classmethod
def get_all(cls, start: int, limit: int):
with DbManager() as conn:
if start == 0:
result = conn.execute(select(AlbumTable))
else:
result = conn.execute(select(AlbumTable).offset(start).limit(limit))
return albums_to_dataclasses(result.fetchall())
all = result.fetchall()
return albums_to_dataclasses(all)[:limit], len(all)
@classmethod
def get_albums_by_artisthashes(cls, artisthashes: list[dict[str, str]]):
@@ -164,7 +230,6 @@ class AlbumTable(Base):
)
albums.extend(albums_to_dataclasses(result.fetchall()))
print(albums)
return albums
@classmethod
@@ -198,7 +263,7 @@ class TrackTable(Base):
date: Mapped[int] = mapped_column(Integer())
disc: Mapped[int] = mapped_column(Integer())
duration: Mapped[int] = mapped_column(Integer())
filepath: Mapped[str] = mapped_column(String(), unique=True)
filepath: Mapped[str] = mapped_column(String(), index=True, unique=True)
folder: Mapped[str] = mapped_column(String(), index=True)
genre: Mapped[Optional[list[dict[str, str]]]] = mapped_column(JSON())
last_mod: Mapped[float] = mapped_column(Integer())
@@ -211,23 +276,21 @@ class TrackTable(Base):
@classmethod
def get_tracks_by_filepaths(cls, filepaths: list[str]):
print(filepaths[0])
with DbManager() as conn:
result = conn.execute(
select(TrackTable).where(TrackTable.filepath.in_(filepaths))
)
return [dict(r) for r in result.mappings().fetchall()]
return tracks_to_dataclasses(result.fetchall())
@classmethod
def count_tracks_containing_paths(cls, paths: list[str]):
results: list[dict[str, int | str]] = []
with DbManager() as conn:
for path in paths:
result = conn.execute(
select(TrackTable).where(TrackTable.filepath.contains(path))
)
results.append({"path": path, "trackcount": result.all().__len__()})
with ThreadPoolExecutor() as executor:
res = executor.map(countFilepathsInDir, paths)
results = [
{"path": path, "trackcount": count} for path, count in zip(paths, res)
]
return results
@@ -272,6 +335,43 @@ class TrackTable(Base):
)
return tracks_to_dataclasses(result.fetchall())
@classmethod
def get_tracks_in_path(cls, path: str):
with DbManager() as conn:
result = conn.execute(
select(TrackTable)
.where(TrackTable.filepath.contains(path))
.order_by(TrackTable.last_mod)
)
return tracks_to_dataclasses(result.fetchall())
all_tracks = TrackTable.get_all()
for track in all_tracks:
all_filepaths.append(track.filepath)
all_filepaths.sort()
# print("files in path: ",getFilepathsInDir("/home/cwilvx/Music/").__len__())
# SECTION: Userdata database
class UserTable(Base):
__tablename__ = "user"
id: Mapped[int] = mapped_column(primary_key=True)
username: Mapped[str] = mapped_column(String(), unique=True)
firstname: Mapped[Optional[str]] = mapped_column(String())
lastname: Mapped[Optional[str]] = mapped_column(String())
password: Mapped[str] = mapped_column(String())
email: Mapped[Optional[str]] = mapped_column(String())
image: Mapped[Optional[str]] = mapped_column(String())
roles: Mapped[list[str]] = mapped_column(JSON(), default_factory=lambda: ["user"])
extra: Mapped[Optional[dict[str, Any]]] = mapped_column(
JSON(), default_factory=dict
)
# SECTION: HELPER FUNCTIONS
+2 -40
View File
@@ -2,12 +2,11 @@ import os
from pathlib import Path
from app.logger import log
from app.models import Folder, Track
from app.models import Folder
from app.serializers.track import serialize_tracks
from app.settings import SUPPORTED_FILES
from app.utils.wintools import win_replace_slash
from app.store.tracks import TrackStore
from app.db import TrackTable as TrackDB
@@ -51,39 +50,6 @@ def get_folders(paths: list[str]):
for f in folders
if f["trackcount"] > 0
]
# count_dict = {
# "tracks": {path: 0 for path in paths},
# # folders are immediate children of the root folder
# "folders": {path: set() for path in paths},
# }
# for track in TrackStore.tracks:
# for path in paths:
# # a child path should be longer than the root path
# if len(track.folder) >= len(path) and track.folder.startswith(path):
# count_dict["tracks"][path] += 1
# # counting subfolders
# p = get_first_child_from_path(path, track.folder)
# if p:
# count_dict["folders"][path].add(p)
# folders = [
# {
# "path": path,
# "trackcount": count_dict["tracks"][path],
# "foldercount": len(count_dict["folders"][path]),
# }
# for path in paths
# ]
# return [
# create_folder(f["path"], f["trackcount"], f["foldercount"])
# for f in folders
# if f["trackcount"] > 0
# ]
class GetFilesAndDirs:
@@ -143,10 +109,6 @@ class GetFilesAndDirs:
tracks = []
if files:
tracks = TrackDB.get_tracks_by_filepaths(files)
print("printing files")
print(tracks)
# tracks = TrackStore.get_tracks_by_filepaths(files)
folders = []
if not self.tracks_only:
@@ -160,7 +122,7 @@ class GetFilesAndDirs:
return {
"path": path,
"tracks": tracks,
"tracks": serialize_tracks(tracks),
"folders": folders,
}
+12 -7
View File
@@ -45,6 +45,8 @@ class Populate:
"""
def __init__(self, instance_key: str) -> None:
return
global POPULATE_KEY
POPULATE_KEY = instance_key
@@ -152,15 +154,18 @@ class Populate:
@staticmethod
def tag_untagged(untagged: set[str], key: str):
for file in tqdm(untagged, desc="Reading files"):
if POPULATE_KEY != key:
log.warning("'Populate.tag_untagged': Populate key changed")
return
pass
# for file in tqdm(untagged, desc="Reading files"):
# if POPULATE_KEY != key:
# log.warning("'Populate.tag_untagged': Populate key changed")
# return
tags = get_tags(file)
# tags = get_tags(file)
if tags is not None:
TrackTable.insert_one(tags)
# if tags is not None:
# TrackTable.insert_one(tags)
# =============================================
# log.info("Found %s new tracks", len(untagged))
# # tagged_tracks: deque[dict] = deque()
+6 -3
View File
@@ -30,6 +30,7 @@ class IndexTracks:
if tags is not None:
TrackTable.insert_one(tags)
del tags
class IndexAlbums:
def __init__(self) -> None:
@@ -84,6 +85,7 @@ class IndexAlbums:
pprint(albums)
AlbumTable.insert_many(list(albums.values()))
del albums
class IndexArtists:
@@ -146,10 +148,11 @@ class IndexArtists:
pprint(artists)
ArtistTable.insert_many(list(artists.values()))
del artists
class IndexEverything:
def __init__(self) -> None:
# IndexTracks()
# IndexAlbums()
# IndexArtists()
IndexTracks()
IndexAlbums()
IndexArtists()
pass
+2 -2
View File
@@ -23,8 +23,8 @@ def run_setup():
config = UserConfig()
config.setup_config_file()
if not config.userId:
config.userId = str(uuid.uuid4())
if not config.serverId:
config.serverId = str(uuid.uuid4())
setup_sqlite()
run_migrations()
+1 -1
View File
@@ -16,7 +16,7 @@ def hash_password(password: str) -> str:
"""
return hashlib.pbkdf2_hmac(
"sha256", password.encode("utf-8"), UserConfig().userId.encode("utf-8"), 100000
"sha256", password.encode("utf-8"), UserConfig().serverId.encode("utf-8"), 100000
).hex()
+6 -2
View File
@@ -45,8 +45,12 @@ mimetypes.add_type("image/gif", ".gif")
mimetypes.add_type("font/woff", ".woff")
mimetypes.add_type("application/manifest+json", ".webmanifest")
werkzeug = logging.getLogger("werkzeug")
werkzeug.setLevel(logging.ERROR)
logging.disable(logging.CRITICAL)
# werkzeug = logging.getLogger("werkzeug")
# werkzeug.setLevel(logging.ERROR)
# # logging.basicConfig()
# logging.getLogger("sqlalchemy.engine").setLevel(logging.ERROR)
# Background tasks
Generated
+15 -1
View File
@@ -1120,6 +1120,20 @@ files = [
{file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"},
]
[[package]]
name = "memory-profiler"
version = "0.61.0"
description = "A module for monitoring memory usage of a python program"
optional = false
python-versions = ">=3.5"
files = [
{file = "memory_profiler-0.61.0-py3-none-any.whl", hash = "sha256:400348e61031e3942ad4d4109d18753b2fb08c2f6fb8290671c5513a34182d84"},
{file = "memory_profiler-0.61.0.tar.gz", hash = "sha256:4e5b73d7864a1d1292fb76a03e82a3e78ef934d06828a698d9dada76da2067b0"},
]
[package.dependencies]
psutil = "*"
[[package]]
name = "msgpack"
version = "1.0.7"
@@ -2602,4 +2616,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
[metadata]
lock-version = "2.0"
python-versions = ">=3.10,<3.12"
content-hash = "333baa055ac4a32ed914fb46025a48559575806dafba7db5aac97a3878ade23c"
content-hash = "9c7ba20671a6a3b59dbb120e3e56ded7e4dfcbf2de14418bdef41059233cdcb1"
+1
View File
@@ -27,6 +27,7 @@ pendulum = "^3.0.0"
flask-openapi3 = "^3.0.2"
flask-jwt-extended = "^4.6.0"
sqlalchemy = "^2.0.31"
memory-profiler = "^0.61.0"
[tool.poetry.dev-dependencies]
pylint = "^2.15.5"