first commit

This commit is contained in:
Tomas Dvorak
2026-04-13 17:46:58 +02:00
commit 6e8fedf534
234 changed files with 53808 additions and 0 deletions
@@ -0,0 +1,468 @@
"""
Enhanced Directory Scanner for SwingMusic
Handles multiple music directories with parallel scanning, permission validation, and error handling
"""
import os
import threading
import time
from concurrent.futures import ThreadPoolExecutor
from dataclasses import dataclass
from pathlib import Path
from typing import Any
from watchdog.events import FileSystemEventHandler
from watchdog.observers import Observer
from swingmusic import logger
@dataclass
class ScanResult:
"""Result of directory scanning operation"""
directory: str
success: bool
files_found: int
folders_found: int
errors: list[str]
scan_time: float
permissions_ok: bool
@dataclass
class FileInfo:
"""Information about a scanned file"""
path: str
size: int
modified_time: float
is_audio: bool
extension: str
@dataclass
class DirectoryStats:
"""Statistics for a scanned directory"""
total_files: int
audio_files: int
total_size: int
last_scan_time: float
scan_duration: float
errors: list[str]
class PermissionValidator:
"""Validates directory permissions for scanning"""
@staticmethod
async def validate_directory(directory: str) -> tuple[bool, list[str]]:
"""Validate if directory can be accessed and scanned"""
errors = []
try:
path = Path(directory)
# Check if directory exists
if not path.exists():
errors.append(f"Directory does not exist: {directory}")
return False, errors
# Check if it's actually a directory
if not path.is_dir():
errors.append(f"Path is not a directory: {directory}")
return False, errors
# Check read permissions
if not os.access(directory, os.R_OK):
errors.append(f"No read permission for directory: {directory}")
return False, errors
# Check execute permissions (needed for directory traversal)
if not os.access(directory, os.X_OK):
errors.append(f"No execute permission for directory: {directory}")
return False, errors
# Try to list directory contents
try:
list(path.iterdir())
except PermissionError as e:
errors.append(f"Cannot list directory contents: {directory} - {str(e)}")
return False, errors
# Check a subdirectory to ensure traversal works
try:
subdirs = [p for p in path.iterdir() if p.is_dir()]
if subdirs:
test_subdir = subdirs[0]
if os.access(test_subdir, os.R_OK | os.X_OK):
return True, errors
else:
errors.append(f"Cannot access subdirectories in: {directory}")
return False, errors
except Exception as e:
errors.append(
f"Error checking subdirectory access: {directory} - {str(e)}"
)
return False, errors
return True, errors
except Exception as e:
errors.append(
f"Unexpected error validating directory {directory}: {str(e)}"
)
return False, errors
class ParallelScanner:
"""Parallel directory scanner with performance optimization"""
def __init__(self, max_workers: int = 4):
self.max_workers = max_workers
self.audio_extensions = {
".flac",
".mp3",
".wav",
".aac",
".m4a",
".ogg",
".wma",
".alac",
".aiff",
".aif",
".dsd",
".dsf",
".dff",
}
async def scan_with_progress(
self, directory: str, progress_callback=None
) -> ScanResult:
"""Scan directory with progress reporting"""
start_time = time.time()
errors = []
files_found = 0
folders_found = 0
try:
path = Path(directory)
# Use ThreadPoolExecutor for parallel file processing
with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
# Collect all files and directories
all_items = list(path.rglob("*"))
total_items = len(all_items)
# Process items in batches
batch_size = 100
processed = 0
for i in range(0, total_items, batch_size):
batch = all_items[i : i + batch_size]
# Process batch in parallel
futures = []
for item in batch:
future = executor.submit(self._process_item, item)
futures.append((future, item))
# Collect results
for future, item in futures:
try:
is_audio, is_dir = future.result(timeout=5)
if is_dir:
folders_found += 1
elif is_audio:
files_found += 1
except Exception as e:
errors.append(f"Error processing {item}: {str(e)}")
processed += len(batch)
# Report progress
if progress_callback:
progress = (processed / total_items) * 100
progress_callback(directory, progress, processed, total_items)
scan_time = time.time() - start_time
return ScanResult(
directory=directory,
success=len(errors) == 0,
files_found=files_found,
folders_found=folders_found,
errors=errors,
scan_time=scan_time,
permissions_ok=True,
)
except Exception as e:
scan_time = time.time() - start_time
errors.append(f"Failed to scan directory {directory}: {str(e)}")
return ScanResult(
directory=directory,
success=False,
files_found=0,
folders_found=0,
errors=errors,
scan_time=scan_time,
permissions_ok=False,
)
def _process_item(self, item: Path) -> tuple[bool, bool]:
"""Process a single file or directory"""
try:
if item.is_dir():
return False, True
elif item.is_file():
is_audio = item.suffix.lower() in self.audio_extensions
return is_audio, False
else:
return False, False
except Exception:
return False, False
class DirectoryCache:
"""Caches directory scan results to improve performance"""
def __init__(self, cache_ttl: int = 3600): # 1 hour TTL
self.cache = {}
self.cache_ttl = cache_ttl
def get(self, directory: str) -> DirectoryStats | None:
"""Get cached directory stats"""
cached = self.cache.get(directory)
if cached and (time.time() - cached.last_scan_time) < self.cache_ttl:
return cached
return None
def set(self, directory: str, stats: DirectoryStats):
"""Cache directory stats"""
self.cache[directory] = stats
def invalidate(self, directory: str):
"""Invalidate cache for specific directory"""
self.cache.pop(directory, None)
def clear(self):
"""Clear all cache"""
self.cache.clear()
class DirectoryWatcher(FileSystemEventHandler):
"""Watches directory changes for automatic rescanning"""
def __init__(self, directory: str, callback):
self.directory = directory
self.callback = callback
self.debounce_timer = None
self.debounce_delay = 5 # 5 seconds debounce
def on_created(self, event):
"""Handle file/directory creation"""
if not event.is_directory:
self._schedule_rescan()
def on_deleted(self, event):
"""Handle file/directory deletion"""
self._schedule_rescan()
def on_moved(self, event):
"""Handle file/directory moves"""
self._schedule_rescan()
def _schedule_rescan(self):
"""Schedule a rescan with debouncing"""
if self.debounce_timer:
self.debounce_timer.cancel()
self.debounce_timer = threading.Timer(self.debounce_delay, self._trigger_rescan)
self.debounce_timer.start()
def _trigger_rescan(self):
"""Trigger the rescan callback"""
try:
self.callback(self.directory)
except Exception as e:
logger.error(f"Error in directory watcher callback: {e}")
class EnhancedDirectoryScanner:
"""Enhanced directory scanner with multiple improvements"""
def __init__(self, max_workers: int = 4):
self.permission_validator = PermissionValidator()
self.parallel_scanner = ParallelScanner(max_workers)
self.cache = DirectoryCache()
self.watchers = {} # directory -> observer
self.scan_history = {}
async def scan_multiple_directories(
self, directories: list[str], progress_callback=None
) -> dict[str, ScanResult]:
"""Efficiently scan multiple music directories in parallel"""
logger.info(f"Starting scan of {len(directories)} directories")
# Validate permissions first
validation_tasks = []
for directory in directories:
task = self.permission_validator.validate_directory(directory)
validation_tasks.append((directory, task))
# Collect validation results
valid_directories = []
validation_results = {}
for directory, task in validation_tasks:
permissions_ok, errors = await task
validation_results[directory] = (permissions_ok, errors)
if permissions_ok:
valid_directories.append(directory)
else:
logger.error(f"Directory validation failed for {directory}: {errors}")
# Scan valid directories in parallel
scan_tasks = []
for directory in valid_directories:
task = self.parallel_scanner.scan_with_progress(
directory, progress_callback
)
scan_tasks.append((directory, task))
# Collect scan results
results = {}
for directory, task in scan_tasks:
result = await task
results[directory] = result
# Cache successful results
if result.success:
stats = DirectoryStats(
total_files=result.files_found + result.folders_found,
audio_files=result.files_found,
total_size=0, # Would need additional calculation
last_scan_time=time.time(),
scan_duration=result.scan_time,
errors=result.errors,
)
self.cache.set(directory, stats)
# Store in history
self.scan_history[directory] = {"last_scan": time.time(), "result": result}
# Add validation failures to results
for directory, (permissions_ok, errors) in validation_results.items():
if not permissions_ok:
results[directory] = ScanResult(
directory=directory,
success=False,
files_found=0,
folders_found=0,
errors=errors,
scan_time=0,
permissions_ok=False,
)
logger.info(f"Completed scan of {len(results)} directories")
return results
async def scan_directory_async(
self, directory: str, progress_callback=None
) -> ScanResult:
"""Async directory scanning with progress tracking"""
# Check cache first
cached_stats = self.cache.get(directory)
if cached_stats:
logger.info(f"Using cached results for {directory}")
return ScanResult(
directory=directory,
success=True,
files_found=cached_stats.audio_files,
folders_found=cached_stats.total_files - cached_stats.audio_files,
errors=cached_stats.errors,
scan_time=cached_stats.scan_duration,
permissions_ok=True,
)
# Validate permissions
permissions_ok, errors = await self.permission_validator.validate_directory(
directory
)
if not permissions_ok:
return ScanResult(
directory=directory,
success=False,
files_found=0,
folders_found=0,
errors=errors,
scan_time=0,
permissions_ok=False,
)
# Perform scan
result = await self.parallel_scanner.scan_with_progress(
directory, progress_callback
)
# Cache successful results
if result.success:
stats = DirectoryStats(
total_files=result.files_found + result.folders_found,
audio_files=result.files_found,
total_size=0,
last_scan_time=time.time(),
scan_duration=result.scan_time,
errors=result.errors,
)
self.cache.set(directory, stats)
return result
def start_watching(self, directory: str, callback):
"""Start watching a directory for changes"""
if directory in self.watchers:
return # Already watching
try:
observer = Observer()
handler = DirectoryWatcher(directory, callback)
observer.schedule(handler, directory, recursive=True)
observer.start()
self.watchers[directory] = observer
logger.info(f"Started watching directory: {directory}")
except Exception as e:
logger.error(f"Failed to start watching {directory}: {e}")
def stop_watching(self, directory: str):
"""Stop watching a directory"""
if directory in self.watchers:
observer = self.watchers.pop(directory)
observer.stop()
observer.join()
logger.info(f"Stopped watching directory: {directory}")
def stop_all_watching(self):
"""Stop watching all directories"""
for directory in list(self.watchers.keys()):
self.stop_watching(directory)
def get_scan_stats(self) -> dict[str, Any]:
"""Get scanning statistics"""
return {
"cached_directories": len(self.cache.cache),
"watched_directories": len(self.watchers),
"scan_history": len(self.scan_history),
"last_scans": {
directory: history["last_scan"]
for directory, history in self.scan_history.items()
},
}
# Global enhanced directory scanner instance
enhanced_directory_scanner = EnhancedDirectoryScanner()