swingmusic-extended/services/real_time_audio.py

# swingmusic/services/real_time_audio.py
import numpy as np
import librosa
import sounddevice as sd
from typing import Dict, List, Callable, Optional, Tuple
from dataclasses import dataclass
from threading import Thread, Event
import queue
import logging
from scipy import signal
from scipy.io import wavfile

logger = logging.getLogger(__name__)

@dataclass
class AudioConfig:
    """Audio processing configuration"""
    sample_rate: int = 44100
    buffer_size: int = 1024
    channels: int = 2
    dtype: str = 'float32'
    block_size: int = 512
    hop_length: int = 256

@dataclass
class AudioFeatures:
    """Real-time audio features"""
    rms_energy: float
    zero_crossing_rate: float
    spectral_centroid: float
    spectral_bandwidth: float
    spectral_rolloff: float
    mfcc: np.ndarray
    chroma: np.ndarray
    tempo: float
    beat_phase: float
    key_strength: np.ndarray

@dataclass
class AudioEvent:
    """Audio event for callbacks"""
    timestamp: float
    features: AudioFeatures
    audio_data: np.ndarray
    event_type: str

class RealTimeAudioProcessor:
    """Real-time audio processing engine for DJ features"""

    def __init__(self, config: Optional[AudioConfig] = None):
        self.config = config or AudioConfig()
        self.is_running = False
        self.audio_queue = queue.Queue()
        self.feature_queue = queue.Queue()
        self.event_callbacks: List[Callable] = []

        # Audio processing components
        self.beat_tracker = BeatTracker(self.config)
        self.key_detector = KeyDetector(self.config)
        self.effects_processor = EffectsProcessor(self.config)

        # Threading
        self.processing_thread = None
        self.callback_thread = None
        self.stop_event = Event()

        # Audio buffers
        self.input_buffer = np.zeros((self.config.buffer_size * 4, self.config.channels))
        self.output_buffer = np.zeros((self.config.buffer_size * 4, self.config.channels))
        self.buffer_index = 0

    def add_event_callback(self, callback: Callable[[AudioEvent], None]):
        """Add callback for audio events"""
        self.event_callbacks.append(callback)

    def remove_event_callback(self, callback: Callable[[AudioEvent], None]):
        """Remove audio event callback"""
        if callback in self.event_callbacks:
            self.event_callbacks.remove(callback)

    def start_processing(self):
        """Start real-time audio processing"""
        if self.is_running:
            logger.warning("Audio processing already running")
            return

        self.is_running = True
        self.stop_event.clear()

        # Start processing threads
        self.processing_thread = Thread(target=self._processing_loop, daemon=True)
        self.callback_thread = Thread(target=self._callback_loop, daemon=True)

        self.processing_thread.start()
        self.callback_thread.start()

        logger.info("Real-time audio processing started")

    def stop_processing(self):
        """Stop real-time audio processing"""
        if not self.is_running:
            return

        self.is_running = False
        self.stop_event.set()

        # Wait for threads to finish
        if self.processing_thread:
            self.processing_thread.join(timeout=1.0)
        if self.callback_thread:
            self.callback_thread.join(timeout=1.0)

        logger.info("Real-time audio processing stopped")

    def process_audio_chunk(self, audio_data: np.ndarray):
        """Process incoming audio chunk"""
        if not self.is_running:
            return

        try:
            # Add to processing queue
            self.audio_queue.put(audio_data, block=False)
        except queue.Full:
            logger.warning("Audio queue full, dropping chunk")

    def _processing_loop(self):
        """Main audio processing loop"""
        while self.is_running and not self.stop_event.is_set():
            try:
                # Get audio data with timeout
                audio_data = self.audio_queue.get(timeout=0.1)

                # Process audio
                features = self._extract_features(audio_data)

                # Create audio event
                event = AudioEvent(
                    timestamp=self._get_timestamp(),
                    features=features,
                    audio_data=audio_data,
                    event_type='audio_features'
                )

                # Add to feature queue
                self.feature_queue.put(event, block=False)

            except queue.Empty:
                continue
            except Exception as e:
                logger.error(f"Audio processing error: {e}")

    def _callback_loop(self):
        """Callback processing loop"""
        while self.is_running and not self.stop_event.is_set():
            try:
                # Get event with timeout
                event = self.feature_queue.get(timeout=0.1)

                # Call all callbacks
                for callback in self.event_callbacks:
                    try:
                        callback(event)
                    except Exception as e:
                        logger.error(f"Callback error: {e}")

            except queue.Empty:
                continue
            except Exception as e:
                logger.error(f"Callback loop error: {e}")

    def _extract_features(self, audio_data: np.ndarray) -> AudioFeatures:
        """Extract real-time audio features"""
        try:
            # Convert to mono if needed
            if audio_data.shape[1] > 1:
                audio_mono = np.mean(audio_data, axis=1)
            else:
                audio_mono = audio_data.flatten()

            # Basic features
            rms_energy = np.sqrt(np.mean(audio_mono ** 2))
            zero_crossing_rate = librosa.feature.zero_crossing_rate(audio_mono)[0]

            # Spectral features
            spectral_centroids = librosa.feature.spectral_centroid(
                y=audio_mono, sr=self.config.sample_rate
            )[0]
            spectral_bandwidth = librosa.feature.spectral_bandwidth(
                y=audio_mono, sr=self.config.sample_rate
            )[0]
            spectral_rolloff = librosa.feature.spectral_rolloff(
                y=audio_mono, sr=self.config.sample_rate
            )[0]

            # MFCC
            mfcc = librosa.feature.mfcc(
                y=audio_mono, sr=self.config.sample_rate, n_mfcc=13
            )

            # Chroma
            chroma = librosa.feature.chroma_stft(
                y=audio_mono, sr=self.config.sample_rate
            )

            # Tempo and beat tracking
            tempo, beats = librosa.beat.beat_track(
                y=audio_mono, sr=self.config.sample_rate, hop_length=self.config.hop_length
            )
            beat_phase = self._calculate_beat_phase(beats, len(audio_mono))

            # Key strength
            key_strength = np.mean(chroma, axis=1)

            return AudioFeatures(
                rms_energy=float(rms_energy),
                zero_crossing_rate=float(np.mean(zero_crossing_rate)),
                spectral_centroid=float(np.mean(spectral_centroids)),
                spectral_bandwidth=float(np.mean(spectral_bandwidth)),
                spectral_rolloff=float(np.mean(spectral_rolloff)),
                mfcc=mfcc,
                chroma=chroma,
                tempo=float(tempo),
                beat_phase=float(beat_phase),
                key_strength=key_strength
            )

        except Exception as e:
            logger.error(f"Feature extraction error: {e}")
            # Return default features
            return AudioFeatures(
                rms_energy=0.0, zero_crossing_rate=0.0, spectral_centroid=0.0,
                spectral_bandwidth=0.0, spectral_rolloff=0.0, mfcc=np.zeros((13, 1)),
                chroma=np.zeros((12, 1)), tempo=120.0, beat_phase=0.0,
                key_strength=np.zeros(12)
            )

    def _calculate_beat_phase(self, beats: np.ndarray, audio_length: int) -> float:
        """Calculate current beat phase"""
        if len(beats) == 0:
            return 0.0

        # Find the most recent beat
        current_frame = audio_length // self.config.hop_length
        recent_beats = beats[beats < current_frame]

        if len(recent_beats) == 0:
            return 0.0

        last_beat = recent_beats[-1]
        beat_duration = 60.0 / 120.0  # Assume 120 BPM if no tempo detected

        # Calculate phase within beat
        frames_since_beat = current_frame - last_beat
        time_since_beat = frames_since_beat * self.config.hop_length / self.config.sample_rate

        phase = (time_since_beat % beat_duration) / beat_duration
        return phase

    def _get_timestamp(self) -> float:
        """Get current timestamp"""
        import time
        return time.time()

    def apply_real_time_effect(self, audio_data: np.ndarray, effect_type: str,
                            params: Dict) -> np.ndarray:
        """Apply real-time audio effect"""
        return self.effects_processor.process(audio_data, effect_type, params)

class BeatTracker:
    """Real-time beat tracking"""

    def __init__(self, config: AudioConfig):
        self.config = config
        self.tempo_history = []
        self.max_history = 10

    def track_beat(self, audio_data: np.ndarray) -> Tuple[float, np.ndarray]:
        """Track beats in real-time audio"""
        try:
            # Convert to mono
            if audio_data.shape[1] > 1:
                audio_mono = np.mean(audio_data, axis=1)
            else:
                audio_mono = audio_data.flatten()

            # Track tempo and beats
            tempo, beats = librosa.beat.beat_track(
                y=audio_mono, sr=self.config.sample_rate, hop_length=self.config.hop_length
            )

            # Update tempo history
            self.tempo_history.append(tempo)
            if len(self.tempo_history) > self.max_history:
                self.tempo_history.pop(0)

            # Use median tempo for stability
            stable_tempo = np.median(self.tempo_history) if self.tempo_history else tempo

            return float(stable_tempo), beats

        except Exception as e:
            logger.error(f"Beat tracking error: {e}")
            return 120.0, np.array([])

class KeyDetector:
    """Real-time key detection"""

    def __init__(self, config: AudioConfig):
        self.config = config
        self.key_history = []
        self.max_history = 5

    def detect_key(self, audio_data: np.ndarray) -> Tuple[str, float]:
        """Detect key in real-time audio"""
        try:
            # Convert to mono
            if audio_data.shape[1] > 1:
                audio_mono = np.mean(audio_data, axis=1)
            else:
                audio_mono = audio_data.flatten()

            # Extract chroma
            chroma = librosa.feature.chroma_stft(
                y=audio_mono, sr=self.config.sample_rate
            )

            # Average chroma
            chroma_mean = np.mean(chroma, axis=1)

            # Simple key detection (would need more sophisticated implementation)
            key_idx = np.argmax(chroma_mean)
            key_names = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
            detected_key = key_names[key_idx]

            # Calculate confidence
            confidence = np.max(chroma_mean) / np.sum(chroma_mean) if np.sum(chroma_mean) > 0 else 0.0

            # Update history
            self.key_history.append((detected_key, confidence))
            if len(self.key_history) > self.max_history:
                self.key_history.pop(0)

            # Use most frequent key
            if self.key_history:
                keys = [k for k, _ in self.key_history]
                most_common_key = max(set(keys), key=keys.count)
                avg_confidence = np.mean([c for _, c in self.key_history if k == most_common_key])
                return most_common_key, avg_confidence

            return detected_key, confidence

        except Exception as e:
            logger.error(f"Key detection error: {e}")
            return 'C', 0.0

class EffectsProcessor:
    """Real-time audio effects processor"""

    def __init__(self, config: AudioConfig):
        self.config = config

    def process(self, audio_data: np.ndarray, effect_type: str, params: Dict) -> np.ndarray:
        """Process audio with specified effect"""
        try:
            if effect_type == 'reverb':
                return self._apply_reverb(audio_data, params)
            elif effect_type == 'delay':
                return self._apply_delay(audio_data, params)
            elif effect_type == 'filter':
                return self._apply_filter(audio_data, params)
            elif effect_type == 'eq':
                return self._apply_eq(audio_data, params)
            elif effect_type == 'compressor':
                return self._apply_compressor(audio_data, params)
            elif effect_type == 'distortion':
                return self._apply_distortion(audio_data, params)
            else:
                return audio_data

        except Exception as e:
            logger.error(f"Effect processing error: {e}")
            return audio_data

    def _apply_reverb(self, audio_data: np.ndarray, params: Dict) -> np.ndarray:
        """Apply reverb effect"""
        delay_time = params.get('delay_time', 0.03)
        decay = params.get('decay', 0.5)
        mix = params.get('mix', 0.3)

        # Simple reverb using delay and feedback
        delay_samples = int(delay_time * self.config.sample_rate)

        if delay_samples >= len(audio_data):
            return audio_data

        # Create delayed version
        delayed = np.zeros_like(audio_data)
        delayed[delay_samples:] = audio_data[:-delay_samples] * decay

        # Mix with original
        return audio_data * (1 - mix) + delayed * mix

    def _apply_delay(self, audio_data: np.ndarray, params: Dict) -> np.ndarray:
        """Apply delay effect"""
        delay_time = params.get('delay_time', 0.25)
        feedback = params.get('feedback', 0.4)
        mix = params.get('mix', 0.3)

        delay_samples = int(delay_time * self.config.sample_rate)

        if delay_samples >= len(audio_data):
            return audio_data

        # Create delayed signal with feedback
        delayed = np.zeros_like(audio_data)
        delayed[delay_samples:] = audio_data[:-delay_samples]

        # Add feedback
        for i in range(delay_samples, len(audio_data)):
            delayed[i] += delayed[i - delay_samples] * feedback

        # Mix with original
        return audio_data * (1 - mix) + delayed * mix

    def _apply_filter(self, audio_data: np.ndarray, params: Dict) -> np.ndarray:
        """Apply filter effect"""
        filter_type = params.get('type', 'lowpass')
        cutoff = params.get('cutoff', 1000)
        order = params.get('order', 4)

        nyquist = self.config.sample_rate / 2
        normalized_cutoff = cutoff / nyquist

        if filter_type == 'lowpass':
            b, a = signal.butter(order, normalized_cutoff, btype='low')
        elif filter_type == 'highpass':
            b, a = signal.butter(order, normalized_cutoff, btype='high')
        elif filter_type == 'bandpass':
            low = params.get('low', 500) / nyquist
            high = params.get('high', 2000) / nyquist
            b, a = signal.butter(order, [low, high], btype='band')
        else:
            return audio_data

        # Apply filter to each channel
        filtered = np.zeros_like(audio_data)
        for ch in range(audio_data.shape[1]):
            filtered[:, ch] = signal.filtfilt(b, a, audio_data[:, ch])

        return filtered

    def _apply_eq(self, audio_data: np.ndarray, params: Dict) -> np.ndarray:
        """Apply EQ effect"""
        # Simple 3-band EQ
        low_gain = params.get('low_gain', 0)  # dB
        mid_gain = params.get('mid_gain', 0)  # dB
        high_gain = params.get('high_gain', 0)  # dB

        # Convert dB to linear
        low_gain_lin = 10 ** (low_gain / 20)
        mid_gain_lin = 10 ** (mid_gain / 20)
        high_gain_lin = 10 ** (high_gain / 20)

        # Apply simple EQ (would need more sophisticated implementation)
        result = audio_data.copy()

        # Apply gains (simplified - real EQ would use filters)
        result *= (low_gain_lin + mid_gain_lin + high_gain_lin) / 3

        return result

    def _apply_compressor(self, audio_data: np.ndarray, params: Dict) -> np.ndarray:
        """Apply compressor effect"""
        threshold = params.get('threshold', 0.7)
        ratio = params.get('ratio', 4)
        attack = params.get('attack', 0.003)
        release = params.get('release', 0.1)

        # Simple compressor implementation
        result = audio_data.copy()

        for ch in range(audio_data.shape[1]):
            channel_data = audio_data[:, ch]

            # Calculate envelope
            envelope = np.abs(channel_data)

            # Apply gain reduction
            gain_reduction = np.where(
                envelope > threshold,
                1 - (envelope - threshold) * (1 - 1/ratio) / envelope,
                1.0
            )

            # Smooth gain reduction
            gain_reduction = self._smooth_gain(gain_reduction, attack, release)

            # Apply gain reduction
            result[:, ch] *= gain_reduction

        return result

    def _apply_distortion(self, audio_data: np.ndarray, params: Dict) -> np.ndarray:
        """Apply distortion effect"""
        drive = params.get('drive', 5)
        mix = params.get('mix', 0.5)

        # Apply distortion
        distorted = np.tanh(audio_data * drive)

        # Mix with original
        return audio_data * (1 - mix) + distorted * mix

    def _smooth_gain(self, gain_reduction: np.ndarray, attack: float, release: float) -> np.ndarray:
        """Smooth gain reduction with attack and release"""
        # Simplified gain smoothing
        smoothed = np.zeros_like(gain_reduction)
        smoothed[0] = gain_reduction[0]

        attack_coeff = np.exp(-1.0 / (attack * self.config.sample_rate))
        release_coeff = np.exp(-1.0 / (release * self.config.sample_rate))

        for i in range(1, len(gain_reduction)):
            if gain_reduction[i] < smoothed[i-1]:
                # Attack
                smoothed[i] = attack_coeff * smoothed[i-1] + (1 - attack_coeff) * gain_reduction[i]
            else:
                # Release
                smoothed[i] = release_coeff * smoothed[i-1] + (1 - release_coeff) * gain_reduction[i]

        return smoothed

class AudioStreamManager:
    """Manage audio input/output streams"""

    def __init__(self, processor: RealTimeAudioProcessor):
        self.processor = processor
        self.input_stream = None
        self.output_stream = None

    def start_input_stream(self, device_id: Optional[int] = None):
        """Start audio input stream"""
        try:
            self.input_stream = sd.InputStream(
                samplerate=self.processor.config.sample_rate,
                channels=self.processor.config.channels,
                dtype=self.processor.config.dtype,
                blocksize=self.processor.config.block_size,
                device=device_id,
                callback=self._input_callback
            )
            self.input_stream.start()
            logger.info("Audio input stream started")

        except Exception as e:
            logger.error(f"Failed to start input stream: {e}")
            raise

    def stop_input_stream(self):
        """Stop audio input stream"""
        if self.input_stream:
            self.input_stream.stop()
            self.input_stream.close()
            self.input_stream = None
            logger.info("Audio input stream stopped")

    def start_output_stream(self, device_id: Optional[int] = None):
        """Start audio output stream"""
        try:
            self.output_stream = sd.OutputStream(
                samplerate=self.processor.config.sample_rate,
                channels=self.processor.config.channels,
                dtype=self.processor.config.dtype,
                blocksize=self.processor.config.block_size,
                device=device_id,
                callback=self._output_callback
            )
            self.output_stream.start()
            logger.info("Audio output stream started")

        except Exception as e:
            logger.error(f"Failed to start output stream: {e}")
            raise

    def stop_output_stream(self):
        """Stop audio output stream"""
        if self.output_stream:
            self.output_stream.stop()
            self.output_stream.close()
            self.output_stream = None
            logger.info("Audio output stream stopped")

    def _input_callback(self, indata, frames, time, status):
        """Audio input callback"""
        if status:
            logger.warning(f"Input stream status: {status}")

        # Process incoming audio
        self.processor.process_audio_chunk(indata)

    def _output_callback(self, outdata, frames, time, status):
        """Audio output callback"""
        if status:
            logger.warning(f"Output stream status: {status}")

        # Generate output (would need audio source)
        outdata.fill(0)  # Silence for now