mirror of
https://github.com/Dvorinka/SpotifyRecAlg.git
synced 2026-06-04 20:43:04 +00:00
first commit
This commit is contained in:
@@ -0,0 +1,468 @@
|
||||
"""
|
||||
Enhanced Directory Scanner for SwingMusic
|
||||
Handles multiple music directories with parallel scanning, permission validation, and error handling
|
||||
"""
|
||||
|
||||
import os
|
||||
import threading
|
||||
import time
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from watchdog.events import FileSystemEventHandler
|
||||
from watchdog.observers import Observer
|
||||
|
||||
from swingmusic import logger
|
||||
|
||||
|
||||
@dataclass
|
||||
class ScanResult:
|
||||
"""Result of directory scanning operation"""
|
||||
|
||||
directory: str
|
||||
success: bool
|
||||
files_found: int
|
||||
folders_found: int
|
||||
errors: list[str]
|
||||
scan_time: float
|
||||
permissions_ok: bool
|
||||
|
||||
|
||||
@dataclass
|
||||
class FileInfo:
|
||||
"""Information about a scanned file"""
|
||||
|
||||
path: str
|
||||
size: int
|
||||
modified_time: float
|
||||
is_audio: bool
|
||||
extension: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class DirectoryStats:
|
||||
"""Statistics for a scanned directory"""
|
||||
|
||||
total_files: int
|
||||
audio_files: int
|
||||
total_size: int
|
||||
last_scan_time: float
|
||||
scan_duration: float
|
||||
errors: list[str]
|
||||
|
||||
|
||||
class PermissionValidator:
|
||||
"""Validates directory permissions for scanning"""
|
||||
|
||||
@staticmethod
|
||||
async def validate_directory(directory: str) -> tuple[bool, list[str]]:
|
||||
"""Validate if directory can be accessed and scanned"""
|
||||
errors = []
|
||||
|
||||
try:
|
||||
path = Path(directory)
|
||||
|
||||
# Check if directory exists
|
||||
if not path.exists():
|
||||
errors.append(f"Directory does not exist: {directory}")
|
||||
return False, errors
|
||||
|
||||
# Check if it's actually a directory
|
||||
if not path.is_dir():
|
||||
errors.append(f"Path is not a directory: {directory}")
|
||||
return False, errors
|
||||
|
||||
# Check read permissions
|
||||
if not os.access(directory, os.R_OK):
|
||||
errors.append(f"No read permission for directory: {directory}")
|
||||
return False, errors
|
||||
|
||||
# Check execute permissions (needed for directory traversal)
|
||||
if not os.access(directory, os.X_OK):
|
||||
errors.append(f"No execute permission for directory: {directory}")
|
||||
return False, errors
|
||||
|
||||
# Try to list directory contents
|
||||
try:
|
||||
list(path.iterdir())
|
||||
except PermissionError as e:
|
||||
errors.append(f"Cannot list directory contents: {directory} - {str(e)}")
|
||||
return False, errors
|
||||
|
||||
# Check a subdirectory to ensure traversal works
|
||||
try:
|
||||
subdirs = [p for p in path.iterdir() if p.is_dir()]
|
||||
if subdirs:
|
||||
test_subdir = subdirs[0]
|
||||
if os.access(test_subdir, os.R_OK | os.X_OK):
|
||||
return True, errors
|
||||
else:
|
||||
errors.append(f"Cannot access subdirectories in: {directory}")
|
||||
return False, errors
|
||||
except Exception as e:
|
||||
errors.append(
|
||||
f"Error checking subdirectory access: {directory} - {str(e)}"
|
||||
)
|
||||
return False, errors
|
||||
|
||||
return True, errors
|
||||
|
||||
except Exception as e:
|
||||
errors.append(
|
||||
f"Unexpected error validating directory {directory}: {str(e)}"
|
||||
)
|
||||
return False, errors
|
||||
|
||||
|
||||
class ParallelScanner:
|
||||
"""Parallel directory scanner with performance optimization"""
|
||||
|
||||
def __init__(self, max_workers: int = 4):
|
||||
self.max_workers = max_workers
|
||||
self.audio_extensions = {
|
||||
".flac",
|
||||
".mp3",
|
||||
".wav",
|
||||
".aac",
|
||||
".m4a",
|
||||
".ogg",
|
||||
".wma",
|
||||
".alac",
|
||||
".aiff",
|
||||
".aif",
|
||||
".dsd",
|
||||
".dsf",
|
||||
".dff",
|
||||
}
|
||||
|
||||
async def scan_with_progress(
|
||||
self, directory: str, progress_callback=None
|
||||
) -> ScanResult:
|
||||
"""Scan directory with progress reporting"""
|
||||
start_time = time.time()
|
||||
errors = []
|
||||
files_found = 0
|
||||
folders_found = 0
|
||||
|
||||
try:
|
||||
path = Path(directory)
|
||||
|
||||
# Use ThreadPoolExecutor for parallel file processing
|
||||
with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
|
||||
# Collect all files and directories
|
||||
all_items = list(path.rglob("*"))
|
||||
total_items = len(all_items)
|
||||
|
||||
# Process items in batches
|
||||
batch_size = 100
|
||||
processed = 0
|
||||
|
||||
for i in range(0, total_items, batch_size):
|
||||
batch = all_items[i : i + batch_size]
|
||||
|
||||
# Process batch in parallel
|
||||
futures = []
|
||||
for item in batch:
|
||||
future = executor.submit(self._process_item, item)
|
||||
futures.append((future, item))
|
||||
|
||||
# Collect results
|
||||
for future, item in futures:
|
||||
try:
|
||||
is_audio, is_dir = future.result(timeout=5)
|
||||
if is_dir:
|
||||
folders_found += 1
|
||||
elif is_audio:
|
||||
files_found += 1
|
||||
except Exception as e:
|
||||
errors.append(f"Error processing {item}: {str(e)}")
|
||||
|
||||
processed += len(batch)
|
||||
|
||||
# Report progress
|
||||
if progress_callback:
|
||||
progress = (processed / total_items) * 100
|
||||
progress_callback(directory, progress, processed, total_items)
|
||||
|
||||
scan_time = time.time() - start_time
|
||||
|
||||
return ScanResult(
|
||||
directory=directory,
|
||||
success=len(errors) == 0,
|
||||
files_found=files_found,
|
||||
folders_found=folders_found,
|
||||
errors=errors,
|
||||
scan_time=scan_time,
|
||||
permissions_ok=True,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
scan_time = time.time() - start_time
|
||||
errors.append(f"Failed to scan directory {directory}: {str(e)}")
|
||||
|
||||
return ScanResult(
|
||||
directory=directory,
|
||||
success=False,
|
||||
files_found=0,
|
||||
folders_found=0,
|
||||
errors=errors,
|
||||
scan_time=scan_time,
|
||||
permissions_ok=False,
|
||||
)
|
||||
|
||||
def _process_item(self, item: Path) -> tuple[bool, bool]:
|
||||
"""Process a single file or directory"""
|
||||
try:
|
||||
if item.is_dir():
|
||||
return False, True
|
||||
elif item.is_file():
|
||||
is_audio = item.suffix.lower() in self.audio_extensions
|
||||
return is_audio, False
|
||||
else:
|
||||
return False, False
|
||||
except Exception:
|
||||
return False, False
|
||||
|
||||
|
||||
class DirectoryCache:
|
||||
"""Caches directory scan results to improve performance"""
|
||||
|
||||
def __init__(self, cache_ttl: int = 3600): # 1 hour TTL
|
||||
self.cache = {}
|
||||
self.cache_ttl = cache_ttl
|
||||
|
||||
def get(self, directory: str) -> DirectoryStats | None:
|
||||
"""Get cached directory stats"""
|
||||
cached = self.cache.get(directory)
|
||||
if cached and (time.time() - cached.last_scan_time) < self.cache_ttl:
|
||||
return cached
|
||||
return None
|
||||
|
||||
def set(self, directory: str, stats: DirectoryStats):
|
||||
"""Cache directory stats"""
|
||||
self.cache[directory] = stats
|
||||
|
||||
def invalidate(self, directory: str):
|
||||
"""Invalidate cache for specific directory"""
|
||||
self.cache.pop(directory, None)
|
||||
|
||||
def clear(self):
|
||||
"""Clear all cache"""
|
||||
self.cache.clear()
|
||||
|
||||
|
||||
class DirectoryWatcher(FileSystemEventHandler):
|
||||
"""Watches directory changes for automatic rescanning"""
|
||||
|
||||
def __init__(self, directory: str, callback):
|
||||
self.directory = directory
|
||||
self.callback = callback
|
||||
self.debounce_timer = None
|
||||
self.debounce_delay = 5 # 5 seconds debounce
|
||||
|
||||
def on_created(self, event):
|
||||
"""Handle file/directory creation"""
|
||||
if not event.is_directory:
|
||||
self._schedule_rescan()
|
||||
|
||||
def on_deleted(self, event):
|
||||
"""Handle file/directory deletion"""
|
||||
self._schedule_rescan()
|
||||
|
||||
def on_moved(self, event):
|
||||
"""Handle file/directory moves"""
|
||||
self._schedule_rescan()
|
||||
|
||||
def _schedule_rescan(self):
|
||||
"""Schedule a rescan with debouncing"""
|
||||
if self.debounce_timer:
|
||||
self.debounce_timer.cancel()
|
||||
|
||||
self.debounce_timer = threading.Timer(self.debounce_delay, self._trigger_rescan)
|
||||
self.debounce_timer.start()
|
||||
|
||||
def _trigger_rescan(self):
|
||||
"""Trigger the rescan callback"""
|
||||
try:
|
||||
self.callback(self.directory)
|
||||
except Exception as e:
|
||||
logger.error(f"Error in directory watcher callback: {e}")
|
||||
|
||||
|
||||
class EnhancedDirectoryScanner:
|
||||
"""Enhanced directory scanner with multiple improvements"""
|
||||
|
||||
def __init__(self, max_workers: int = 4):
|
||||
self.permission_validator = PermissionValidator()
|
||||
self.parallel_scanner = ParallelScanner(max_workers)
|
||||
self.cache = DirectoryCache()
|
||||
self.watchers = {} # directory -> observer
|
||||
self.scan_history = {}
|
||||
|
||||
async def scan_multiple_directories(
|
||||
self, directories: list[str], progress_callback=None
|
||||
) -> dict[str, ScanResult]:
|
||||
"""Efficiently scan multiple music directories in parallel"""
|
||||
logger.info(f"Starting scan of {len(directories)} directories")
|
||||
|
||||
# Validate permissions first
|
||||
validation_tasks = []
|
||||
for directory in directories:
|
||||
task = self.permission_validator.validate_directory(directory)
|
||||
validation_tasks.append((directory, task))
|
||||
|
||||
# Collect validation results
|
||||
valid_directories = []
|
||||
validation_results = {}
|
||||
|
||||
for directory, task in validation_tasks:
|
||||
permissions_ok, errors = await task
|
||||
validation_results[directory] = (permissions_ok, errors)
|
||||
|
||||
if permissions_ok:
|
||||
valid_directories.append(directory)
|
||||
else:
|
||||
logger.error(f"Directory validation failed for {directory}: {errors}")
|
||||
|
||||
# Scan valid directories in parallel
|
||||
scan_tasks = []
|
||||
for directory in valid_directories:
|
||||
task = self.parallel_scanner.scan_with_progress(
|
||||
directory, progress_callback
|
||||
)
|
||||
scan_tasks.append((directory, task))
|
||||
|
||||
# Collect scan results
|
||||
results = {}
|
||||
for directory, task in scan_tasks:
|
||||
result = await task
|
||||
results[directory] = result
|
||||
|
||||
# Cache successful results
|
||||
if result.success:
|
||||
stats = DirectoryStats(
|
||||
total_files=result.files_found + result.folders_found,
|
||||
audio_files=result.files_found,
|
||||
total_size=0, # Would need additional calculation
|
||||
last_scan_time=time.time(),
|
||||
scan_duration=result.scan_time,
|
||||
errors=result.errors,
|
||||
)
|
||||
self.cache.set(directory, stats)
|
||||
|
||||
# Store in history
|
||||
self.scan_history[directory] = {"last_scan": time.time(), "result": result}
|
||||
|
||||
# Add validation failures to results
|
||||
for directory, (permissions_ok, errors) in validation_results.items():
|
||||
if not permissions_ok:
|
||||
results[directory] = ScanResult(
|
||||
directory=directory,
|
||||
success=False,
|
||||
files_found=0,
|
||||
folders_found=0,
|
||||
errors=errors,
|
||||
scan_time=0,
|
||||
permissions_ok=False,
|
||||
)
|
||||
|
||||
logger.info(f"Completed scan of {len(results)} directories")
|
||||
return results
|
||||
|
||||
async def scan_directory_async(
|
||||
self, directory: str, progress_callback=None
|
||||
) -> ScanResult:
|
||||
"""Async directory scanning with progress tracking"""
|
||||
# Check cache first
|
||||
cached_stats = self.cache.get(directory)
|
||||
if cached_stats:
|
||||
logger.info(f"Using cached results for {directory}")
|
||||
return ScanResult(
|
||||
directory=directory,
|
||||
success=True,
|
||||
files_found=cached_stats.audio_files,
|
||||
folders_found=cached_stats.total_files - cached_stats.audio_files,
|
||||
errors=cached_stats.errors,
|
||||
scan_time=cached_stats.scan_duration,
|
||||
permissions_ok=True,
|
||||
)
|
||||
|
||||
# Validate permissions
|
||||
permissions_ok, errors = await self.permission_validator.validate_directory(
|
||||
directory
|
||||
)
|
||||
if not permissions_ok:
|
||||
return ScanResult(
|
||||
directory=directory,
|
||||
success=False,
|
||||
files_found=0,
|
||||
folders_found=0,
|
||||
errors=errors,
|
||||
scan_time=0,
|
||||
permissions_ok=False,
|
||||
)
|
||||
|
||||
# Perform scan
|
||||
result = await self.parallel_scanner.scan_with_progress(
|
||||
directory, progress_callback
|
||||
)
|
||||
|
||||
# Cache successful results
|
||||
if result.success:
|
||||
stats = DirectoryStats(
|
||||
total_files=result.files_found + result.folders_found,
|
||||
audio_files=result.files_found,
|
||||
total_size=0,
|
||||
last_scan_time=time.time(),
|
||||
scan_duration=result.scan_time,
|
||||
errors=result.errors,
|
||||
)
|
||||
self.cache.set(directory, stats)
|
||||
|
||||
return result
|
||||
|
||||
def start_watching(self, directory: str, callback):
|
||||
"""Start watching a directory for changes"""
|
||||
if directory in self.watchers:
|
||||
return # Already watching
|
||||
|
||||
try:
|
||||
observer = Observer()
|
||||
handler = DirectoryWatcher(directory, callback)
|
||||
observer.schedule(handler, directory, recursive=True)
|
||||
observer.start()
|
||||
self.watchers[directory] = observer
|
||||
logger.info(f"Started watching directory: {directory}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to start watching {directory}: {e}")
|
||||
|
||||
def stop_watching(self, directory: str):
|
||||
"""Stop watching a directory"""
|
||||
if directory in self.watchers:
|
||||
observer = self.watchers.pop(directory)
|
||||
observer.stop()
|
||||
observer.join()
|
||||
logger.info(f"Stopped watching directory: {directory}")
|
||||
|
||||
def stop_all_watching(self):
|
||||
"""Stop watching all directories"""
|
||||
for directory in list(self.watchers.keys()):
|
||||
self.stop_watching(directory)
|
||||
|
||||
def get_scan_stats(self) -> dict[str, Any]:
|
||||
"""Get scanning statistics"""
|
||||
return {
|
||||
"cached_directories": len(self.cache.cache),
|
||||
"watched_directories": len(self.watchers),
|
||||
"scan_history": len(self.scan_history),
|
||||
"last_scans": {
|
||||
directory: history["last_scan"]
|
||||
for directory, history in self.scan_history.items()
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
# Global enhanced directory scanner instance
|
||||
enhanced_directory_scanner = EnhancedDirectoryScanner()
|
||||
Reference in New Issue
Block a user