Files
SpotifyRecAlg/swingmusic/services/download_jobs.py
T
Tomas Dvorak 6e8fedf534 first commit
2026-04-13 17:46:58 +02:00

580 lines
19 KiB
Python

from __future__ import annotations
import logging
import os
import re
import threading
import time
from typing import Any
from sqlalchemy import select
from swingmusic.config import UserConfig
# DragonflyDB integration for fast job queue operations
from swingmusic.db.dragonfly_extended_client import get_job_queue_service
from swingmusic.db.engine import DbEngine
from swingmusic.db.libdata import TrackTable
from swingmusic.db.production import (
DownloadJobTable,
LibraryFileTable,
LyricsStatusTable,
UserRootDirOwnershipTable,
)
from swingmusic.db.userdata import UserTable
from swingmusic.lib.index import run_index_pipeline
from swingmusic.services.download_provider_adapters import fallback_download_adapter
from swingmusic.services.library_projection import (
mark_track_available,
mark_track_failed,
mark_track_queued,
)
from swingmusic.services.lyrics_backfill import backfill_lyrics_async
from swingmusic.services.spotiflac_worker import spotiflac_worker
from swingmusic.utils.hashing import create_hash
log = logging.getLogger(__name__)
def _sanitize_filename(value: str) -> str:
filename = re.sub(r"[^\w\s\-.]", "", value, flags=re.UNICODE)
filename = re.sub(r"\s+", " ", filename).strip()
return filename[:120] or f"download-{int(time.time())}"
def _quality_to_codec_and_bitrate(
quality: str, codec_hint: str | None = None
) -> tuple[str, int]:
quality = (quality or "high").lower()
if quality == "lossless":
return (codec_hint or "flac", 1411)
if quality == "high":
return (codec_hint or "mp3", 320)
if quality == "medium":
return (codec_hint or "mp3", 192)
return (codec_hint or "mp3", 128)
def _resolve_primary_root_dir() -> str:
config = UserConfig()
if config.rootDirs:
root = config.rootDirs[0]
if root == "$home":
return os.path.join(os.path.expanduser("~"), "Music")
return root
return os.path.join(os.path.expanduser("~"), "Music")
def _resolve_download_root_for_user(userid: int | None = None) -> str:
if userid is None:
return _resolve_primary_root_dir()
owned_roots = UserRootDirOwnershipTable.get_paths(userid)
if owned_roots:
root = owned_roots[0]
if root == "$home":
return os.path.join(os.path.expanduser("~"), "Music")
return root
shared_root = _resolve_primary_root_dir()
user = UserTable.get_by_id(userid)
username = (
_sanitize_filename(user.username)
if user and user.username
else f"user-{userid}"
)
# Isolate user downloads by default while keeping paths under configured roots.
user_root = os.path.join(shared_root, "SwingMusic Users", username)
os.makedirs(user_root, exist_ok=True)
UserRootDirOwnershipTable.assign_paths(userid, [user_root])
return user_root
def _resolve_download_dir(
target_path: str | None = None, userid: int | None = None
) -> str:
if target_path:
directory = os.path.dirname(target_path) or target_path
os.makedirs(directory, exist_ok=True)
return directory
root = _resolve_download_root_for_user(userid)
download_dir = os.path.join(root, "SwingMusic Downloads")
os.makedirs(download_dir, exist_ok=True)
return download_dir
def _compute_trackhash(
title: str | None,
artist: str | None,
album: str | None,
fallback: str | None = None,
) -> str | None:
if title and artist:
return create_hash(title, album or "", artist)
return fallback
def _refresh_user_projection_for_download_path(
*,
userid: int,
path: str,
source: str,
) -> int:
"""
Re-indexes library metadata and marks tracks in the downloaded path as
available for the requesting user.
Returns number of projected tracks.
"""
if not path:
return 0
scope_path = path
if not os.path.isdir(scope_path):
scope_path = os.path.dirname(scope_path) or scope_path
if not scope_path or not os.path.exists(scope_path):
return 0
run_index_pipeline()
projected = 0
for track in TrackTable.get_tracks_in_path(scope_path):
if not track.filepath or not os.path.exists(track.filepath):
continue
mark_track_available(
track.trackhash,
filepath=track.filepath,
bitrate=int(track.bitrate or 0),
userid=userid,
source=source,
)
projected += 1
return projected
class DownloadJobManager:
def __init__(self) -> None:
self._thread: threading.Thread | None = None
self._stop = threading.Event()
def start(self) -> None:
if self._thread and self._thread.is_alive():
return
self._stop.clear()
self._thread = threading.Thread(
target=self._worker_loop, name="download-job-worker", daemon=True
)
self._thread.start()
def stop(self) -> None:
self._stop.set()
if self._thread and self._thread.is_alive():
self._thread.join(timeout=2)
def enqueue(
self,
*,
userid: int,
source_url: str | None,
source: str,
quality: str,
codec: str | None = None,
trackhash: str | None = None,
title: str | None = None,
artist: str | None = None,
album: str | None = None,
item_type: str = "track",
target_path: str | None = None,
payload: dict[str, Any] | None = None,
) -> int:
resolved_trackhash = _compute_trackhash(title, artist, album, trackhash)
job_id = DownloadJobTable.enqueue(
{
"userid": userid,
"source_url": source_url,
"source": source,
"provider": source,
"quality": quality,
"codec": codec or "mp3",
"trackhash": resolved_trackhash,
"title": title,
"artist": artist,
"album": album,
"item_type": item_type,
"target_path": target_path,
"payload": payload or {},
}
)
if resolved_trackhash:
mark_track_queued(
resolved_trackhash,
job_id=job_id,
source_url=source_url,
userid=userid,
)
# Also enqueue to DragonflyDB for fast queue access and monitoring
job_queue = get_job_queue_service()
if job_queue.cache.client.is_available():
try:
job_queue.enqueue_job(
"downloads",
{
"job_id": job_id,
"userid": userid,
"source": source,
"trackhash": resolved_trackhash,
"title": title,
"artist": artist,
"item_type": item_type,
"queued_at": int(time.time()),
},
)
log.debug(f"Enqueued job {job_id} to DragonflyDB queue")
except Exception as e:
log.debug(f"Failed to enqueue to DragonflyDB: {e}")
return job_id
def list_jobs(self, userid: int, limit: int = 200) -> list[dict[str, Any]]:
with DbEngine.manager() as conn:
result = conn.execute(
select(DownloadJobTable)
.where(DownloadJobTable.userid == userid)
.order_by(DownloadJobTable.created_at.desc())
.limit(limit)
)
jobs = list(result.scalars())
return [self.serialize_job(job) for job in jobs]
def get_job(self, job_id: int, userid: int | None = None) -> dict[str, Any] | None:
job = DownloadJobTable.get_by_id(job_id)
if not job:
return None
if userid is not None and job.userid != userid:
return None
return self.serialize_job(job)
def cancel(self, job_id: int, userid: int) -> bool:
job = DownloadJobTable.get_by_id(job_id)
if not job or job.userid != userid:
return False
if job.state in {"completed", "failed", "cancelled"}:
return False
DownloadJobTable.update_job(
job_id,
{
"state": "cancelled",
"error": "Cancelled by user",
"finished_at": int(time.time()),
},
)
if job.trackhash:
mark_track_failed(
job.trackhash, error="Cancelled by user", job_id=job_id, userid=userid
)
return True
def retry(self, job_id: int, userid: int) -> bool:
job = DownloadJobTable.get_by_id(job_id)
if not job or job.userid != userid:
return False
if job.state not in {"failed", "cancelled"}:
return False
DownloadJobTable.update_job(
job_id,
{
"state": "queued",
"progress": 0.0,
"error": None,
"started_at": None,
"finished_at": None,
"retry_count": int(job.retry_count or 0) + 1,
},
)
if job.trackhash:
mark_track_queued(
job.trackhash, job_id=job_id, source_url=job.source_url, userid=userid
)
return True
def clear_queue(self, userid: int) -> int:
jobs = DownloadJobTable.list_for_user(userid, states={"queued", "downloading"})
cancelled = 0
for job in jobs:
if self.cancel(job.id, userid):
cancelled += 1
return cancelled
def clear_history(self, userid: int) -> int:
return DownloadJobTable.delete_for_user(
userid,
states={"completed", "failed", "cancelled"},
)
@staticmethod
def serialize_job(job: Any) -> dict[str, Any]:
return {
"id": job.id,
"state": job.state,
"status": job.state,
"source": job.source,
"service": job.source,
"provider": job.provider,
"source_url": job.source_url,
"quality": job.quality,
"codec": job.codec,
"target_path": job.target_path,
"error": job.error,
"progress": round(float(job.progress or 0.0), 2),
"trackhash": job.trackhash,
"title": job.title,
"artist": job.artist,
"album": job.album,
"item_type": job.item_type,
"created_at": job.created_at,
"updated_at": job.updated_at,
"started_at": job.started_at,
"finished_at": job.finished_at,
"retry_count": int(job.retry_count or 0),
}
def _worker_loop(self) -> None:
while not self._stop.is_set():
job = DownloadJobTable.get_queued_job()
if not job:
time.sleep(0.6)
continue
self._process_job(job)
def _process_job(self, job: Any) -> None:
now = int(time.time())
DownloadJobTable.update_job(
job.id,
{
"state": "downloading",
"started_at": now,
"progress": 1.0,
"error": None,
},
)
trackhash = _compute_trackhash(job.title, job.artist, job.album, job.trackhash)
try:
# Job might have been cancelled by user while running.
current = DownloadJobTable.get_by_id(job.id)
if not current or current.state == "cancelled":
return
# Dedupe/import-aware reuse: if file already exists in the media registry,
# re-link it to this user instead of downloading again.
if trackhash:
existing_file = LibraryFileTable.get_by_trackhash(trackhash)
if (
existing_file
and existing_file.filepath
and os.path.exists(existing_file.filepath)
):
mark_track_available(
trackhash,
filepath=existing_file.filepath,
bitrate=int(existing_file.bitrate or 0),
userid=job.userid,
source="registry_reuse",
)
DownloadJobTable.update_job(
job.id,
{
"state": "completed",
"progress": 100.0,
"target_path": existing_file.filepath,
"trackhash": trackhash,
"codec": existing_file.codec or job.codec,
"finished_at": int(time.time()),
},
)
return
DownloadJobTable.update_job(job.id, {"progress": 11.0})
codec, bitrate = _quality_to_codec_and_bitrate(job.quality, job.codec)
extension = codec.lower() if codec else "mp3"
safe_title = _sanitize_filename(
job.title or job.trackhash or f"job-{job.id}"
)
directory = _resolve_download_dir(job.target_path, userid=job.userid)
target_path = job.target_path or os.path.join(
directory, f"{safe_title}.{extension}"
)
os.makedirs(os.path.dirname(target_path), exist_ok=True)
DownloadJobTable.update_job(job.id, {"progress": 23.0})
provider_errors: list[str] = []
result = None
try:
result = spotiflac_worker.download(
source_url=job.source_url or "",
output_dir=directory,
codec=codec,
quality=job.quality,
item_type=job.item_type,
target_path=target_path if job.item_type == "track" else None,
)
except Exception as primary_error:
provider_errors.append(f"spotiflac: {primary_error}")
if result is None and fallback_download_adapter.is_available():
try:
result = fallback_download_adapter.download(
source_url=job.source_url or "",
output_dir=directory,
codec=codec,
quality=job.quality,
item_type=job.item_type,
target_path=target_path if job.item_type == "track" else None,
)
except Exception as fallback_error:
provider_errors.append(
f"{fallback_download_adapter.name}: {fallback_error}"
)
if result is None:
error_message = (
"; ".join(provider_errors) or "No download provider succeeded"
)
raise RuntimeError(error_message)
DownloadJobTable.update_job(job.id, {"progress": 92.0})
final_path = result.file_path
final_codec = result.codec or codec
final_bitrate = int(result.bitrate or bitrate)
if trackhash and final_path and os.path.exists(final_path):
mark_track_available(
trackhash,
filepath=final_path,
bitrate=final_bitrate,
userid=job.userid,
source=result.provider or job.source,
)
# Non-track jobs (album/artist/playlist) must project downloaded files
# to the requesting user's library before final completion.
if job.item_type != "track":
try:
DownloadJobTable.update_job(job.id, {"progress": 96.0})
projected = _refresh_user_projection_for_download_path(
userid=job.userid,
path=final_path or directory,
source=result.provider or job.source,
)
DownloadJobTable.update_job(
job.id,
{
"progress": 99.0,
"payload": {
**(job.payload or {}),
"projected_tracks": projected,
},
},
)
except Exception as projection_error:
# Keep the download successful, but expose projection warning
# so UI can surface retries/rescan actions.
log.exception("Failed to refresh projection for job %s", job.id)
DownloadJobTable.update_job(
job.id,
{
"payload": {
**(job.payload or {}),
"projection_error": str(projection_error),
},
},
)
DownloadJobTable.update_job(
job.id,
{
"state": "completed",
"progress": 100.0,
"target_path": final_path,
"trackhash": trackhash,
"codec": final_codec,
"finished_at": int(time.time()),
},
)
if trackhash and final_path and os.path.exists(final_path):
LyricsStatusTable.upsert(
trackhash=trackhash,
filepath=final_path,
status="pending",
source="download",
has_embedded=False,
has_lrc=os.path.exists(os.path.splitext(final_path)[0] + ".lrc"),
last_error=None,
extra={"job_id": job.id, "provider": result.provider},
)
backfill_lyrics_async(
filepath=final_path,
title=job.title,
artist=job.artist,
album=job.album,
trackhash=trackhash,
)
except Exception as error:
message = str(error)
DownloadJobTable.update_job(
job.id,
{
"state": "failed",
"error": message,
"finished_at": int(time.time()),
},
)
if trackhash:
mark_track_failed(
trackhash, error=message, job_id=job.id, userid=job.userid
)
LyricsStatusTable.upsert(
trackhash=trackhash,
filepath=job.target_path,
status="failed",
source="download",
last_error=message,
extra={"job_id": job.id},
increment_attempt=True,
)
# Global process-wide manager used by API wrappers.
download_job_manager = DownloadJobManager()