mirror of
https://github.com/Dvorinka/SpotifyRecAlg.git
synced 2026-06-04 04:23:02 +00:00
first commit
This commit is contained in:
@@ -0,0 +1,454 @@
|
||||
"""
|
||||
Analytics Event Queue using DragonflyDB.
|
||||
|
||||
Provides a high-performance event queue for analytics events using DragonflyDB
|
||||
lists. Events are queued for batch processing, reducing database load and
|
||||
enabling real-time analytics aggregation.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
from enum import StrEnum
|
||||
from typing import Any
|
||||
|
||||
from swingmusic.db.dragonfly_client import get_dragonfly_client
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Maximum queue size before forced flush
|
||||
MAX_QUEUE_SIZE = 10000
|
||||
|
||||
# Event expiry (7 days)
|
||||
EVENT_TTL = 7 * 24 * 60 * 60
|
||||
|
||||
|
||||
class EventType(StrEnum):
|
||||
"""Types of analytics events."""
|
||||
|
||||
# Playback events
|
||||
TRACK_PLAY = "track_play"
|
||||
TRACK_SKIP = "track_skip"
|
||||
TRACK_COMPLETE = "track_complete"
|
||||
ALBUM_PLAY = "album_play"
|
||||
ARTIST_PLAY = "artist_play"
|
||||
|
||||
# User interaction events
|
||||
FAVORITE_ADD = "favorite_add"
|
||||
FAVORITE_REMOVE = "favorite_remove"
|
||||
PLAYLIST_CREATE = "playlist_create"
|
||||
PLAYLIST_ADD_TRACK = "playlist_add_track"
|
||||
PLAYLIST_REMOVE_TRACK = "playlist_remove_track"
|
||||
|
||||
# Search events
|
||||
SEARCH_QUERY = "search_query"
|
||||
SEARCH_RESULT_CLICK = "search_result_click"
|
||||
|
||||
# Download events
|
||||
DOWNLOAD_START = "download_start"
|
||||
DOWNLOAD_COMPLETE = "download_complete"
|
||||
DOWNLOAD_FAIL = "download_fail"
|
||||
|
||||
# Library events
|
||||
LIBRARY_SCAN = "library_scan"
|
||||
LIBRARY_UPDATE = "library_update"
|
||||
|
||||
# Session events
|
||||
SESSION_START = "session_start"
|
||||
SESSION_END = "session_end"
|
||||
|
||||
|
||||
@dataclass
|
||||
class AnalyticsEvent:
|
||||
"""Represents a single analytics event."""
|
||||
|
||||
event_type: EventType
|
||||
timestamp: int
|
||||
userid: int
|
||||
data: dict[str, Any]
|
||||
event_id: str = field(default_factory=lambda: uuid.uuid4().hex)
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"event_id": self.event_id,
|
||||
"event_type": self.event_type.value,
|
||||
"timestamp": self.timestamp,
|
||||
"userid": self.userid,
|
||||
"data": self.data,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict[str, Any]) -> "AnalyticsEvent":
|
||||
return cls(
|
||||
event_id=data["event_id"],
|
||||
event_type=EventType(data["event_type"]),
|
||||
timestamp=data["timestamp"],
|
||||
userid=data["userid"],
|
||||
data=data["data"],
|
||||
)
|
||||
|
||||
|
||||
class AnalyticsEventQueue:
|
||||
"""
|
||||
Manages analytics events using DragonflyDB lists.
|
||||
|
||||
Events are pushed to a Redis list and can be processed in batches
|
||||
by a background worker. This decouples event collection from
|
||||
event processing, improving application responsiveness.
|
||||
"""
|
||||
|
||||
def __init__(self, max_queue_size: int = MAX_QUEUE_SIZE):
|
||||
self.max_queue_size = max_queue_size
|
||||
self._client = None
|
||||
|
||||
@property
|
||||
def client(self):
|
||||
if self._client is None:
|
||||
self._client = get_dragonfly_client()
|
||||
return self._client
|
||||
|
||||
def _get_queue_key(self) -> str:
|
||||
"""Get the main event queue key."""
|
||||
return "analytics:events:queue"
|
||||
|
||||
def _get_processing_key(self) -> str:
|
||||
"""Get the processing list key (for reliable queue pattern)."""
|
||||
return "analytics:events:processing"
|
||||
|
||||
def _get_stats_key(self, event_type: EventType) -> str:
|
||||
"""Get the key for event type statistics."""
|
||||
return f"analytics:stats:{event_type.value}"
|
||||
|
||||
def _get_hourly_key(self, event_type: EventType, hour: int) -> str:
|
||||
"""Get the key for hourly event counts."""
|
||||
return f"analytics:hourly:{event_type.value}:{hour}"
|
||||
|
||||
def enqueue(self, event: AnalyticsEvent) -> bool:
|
||||
"""
|
||||
Add an event to the queue.
|
||||
|
||||
Args:
|
||||
event: The analytics event to enqueue
|
||||
|
||||
Returns:
|
||||
True if successful, False otherwise
|
||||
"""
|
||||
if not self.client.is_available():
|
||||
return False
|
||||
|
||||
try:
|
||||
queue_key = self._get_queue_key()
|
||||
|
||||
# Push event to queue (RPUSH for FIFO processing)
|
||||
self.client.client.rpush(queue_key, json.dumps(event.to_dict()))
|
||||
|
||||
# Increment event type counter
|
||||
stats_key = self._get_stats_key(event.event_type)
|
||||
self.client.client.incr(stats_key)
|
||||
|
||||
# Increment hourly counter
|
||||
hour = event.timestamp // 3600
|
||||
hourly_key = self._get_hourly_key(event.event_type, hour)
|
||||
self.client.client.incr(hourly_key)
|
||||
self.client.client.expire(hourly_key, EVENT_TTL)
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to enqueue analytics event: {e}")
|
||||
return False
|
||||
|
||||
def enqueue_batch(self, events: list[AnalyticsEvent]) -> int:
|
||||
"""
|
||||
Add multiple events to the queue atomically.
|
||||
|
||||
Returns:
|
||||
Number of events successfully enqueued
|
||||
"""
|
||||
if not self.client.is_available():
|
||||
return 0
|
||||
|
||||
try:
|
||||
queue_key = self._get_queue_key()
|
||||
pipe = self.client.client.pipeline()
|
||||
|
||||
for event in events:
|
||||
pipe.rpush(queue_key, json.dumps(event.to_dict()))
|
||||
|
||||
# Increment counters
|
||||
stats_key = self._get_stats_key(event.event_type)
|
||||
pipe.incr(stats_key)
|
||||
|
||||
hour = event.timestamp // 3600
|
||||
hourly_key = self._get_hourly_key(event.event_type, hour)
|
||||
pipe.incr(hourly_key)
|
||||
pipe.expire(hourly_key, EVENT_TTL)
|
||||
|
||||
pipe.execute()
|
||||
return len(events)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to enqueue batch: {e}")
|
||||
return 0
|
||||
|
||||
def dequeue(self, batch_size: int = 100) -> list[AnalyticsEvent]:
|
||||
"""
|
||||
Get a batch of events from the queue for processing.
|
||||
|
||||
Uses the reliable queue pattern: events are moved to a processing
|
||||
list before being returned. They should be acknowledged after
|
||||
successful processing.
|
||||
|
||||
Args:
|
||||
batch_size: Maximum number of events to dequeue
|
||||
|
||||
Returns:
|
||||
List of analytics events
|
||||
"""
|
||||
if not self.client.is_available():
|
||||
return []
|
||||
|
||||
try:
|
||||
queue_key = self._get_queue_key()
|
||||
processing_key = self._get_processing_key()
|
||||
|
||||
events = []
|
||||
|
||||
for _ in range(batch_size):
|
||||
# RPOPLPUSH: atomically pop from queue and push to processing
|
||||
result = self.client.client.rpoplpush(queue_key, processing_key)
|
||||
|
||||
if not result:
|
||||
break
|
||||
|
||||
try:
|
||||
event = AnalyticsEvent.from_dict(json.loads(result))
|
||||
events.append(event)
|
||||
except json.JSONDecodeError:
|
||||
# Invalid event, remove from processing
|
||||
self.client.client.lrem(processing_key, 1, result)
|
||||
continue
|
||||
|
||||
return events
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to dequeue events: {e}")
|
||||
return []
|
||||
|
||||
def acknowledge(self, event_ids: list[str]) -> int:
|
||||
"""
|
||||
Acknowledge processed events, removing them from the processing list.
|
||||
|
||||
Args:
|
||||
event_ids: List of event IDs to acknowledge
|
||||
|
||||
Returns:
|
||||
Number of events acknowledged
|
||||
"""
|
||||
if not self.client.is_available():
|
||||
return 0
|
||||
|
||||
try:
|
||||
processing_key = self._get_processing_key()
|
||||
|
||||
# Get all events in processing list
|
||||
all_events = self.client.client.lrange(processing_key, 0, -1)
|
||||
|
||||
acknowledged = 0
|
||||
for event_json in all_events:
|
||||
try:
|
||||
event_data = json.loads(event_json)
|
||||
if event_data.get("event_id") in event_ids:
|
||||
self.client.client.lrem(processing_key, 1, event_json)
|
||||
acknowledged += 1
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
return acknowledged
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to acknowledge events: {e}")
|
||||
return 0
|
||||
|
||||
def requeue_unprocessed(self, timeout_seconds: int = 300) -> int:
|
||||
"""
|
||||
Re-queue events that have been in processing for too long.
|
||||
|
||||
This handles the case where a worker crashes while processing events.
|
||||
|
||||
Args:
|
||||
timeout_seconds: Time after which processing events are considered stale
|
||||
|
||||
Returns:
|
||||
Number of events re-queued
|
||||
"""
|
||||
if not self.client.is_available():
|
||||
return 0
|
||||
|
||||
try:
|
||||
queue_key = self._get_queue_key()
|
||||
processing_key = self._get_processing_key()
|
||||
|
||||
# Move all processing events back to queue
|
||||
# In production, you'd check timestamps for timeout
|
||||
requeued = 0
|
||||
while True:
|
||||
result = self.client.client.rpoplpush(processing_key, queue_key)
|
||||
if not result:
|
||||
break
|
||||
requeued += 1
|
||||
|
||||
return requeued
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to requeue unprocessed events: {e}")
|
||||
return 0
|
||||
|
||||
def get_queue_size(self) -> int:
|
||||
"""Get the number of events in the queue."""
|
||||
if not self.client.is_available():
|
||||
return 0
|
||||
|
||||
try:
|
||||
return self.client.client.llen(self._get_queue_key())
|
||||
except Exception:
|
||||
return 0
|
||||
|
||||
def get_event_count(self, event_type: EventType) -> int:
|
||||
"""Get the total count for an event type."""
|
||||
if not self.client.is_available():
|
||||
return 0
|
||||
|
||||
try:
|
||||
key = self._get_stats_key(event_type)
|
||||
value = self.client.get(key)
|
||||
return int(value) if value else 0
|
||||
except Exception:
|
||||
return 0
|
||||
|
||||
def get_hourly_counts(
|
||||
self, event_type: EventType, start_hour: int, end_hour: int
|
||||
) -> dict[int, int]:
|
||||
"""
|
||||
Get hourly event counts for a time range.
|
||||
|
||||
Args:
|
||||
event_type: The event type to query
|
||||
start_hour: Start hour (Unix timestamp / 3600)
|
||||
end_hour: End hour (Unix timestamp / 3600)
|
||||
|
||||
Returns:
|
||||
Dict mapping hour to event count
|
||||
"""
|
||||
if not self.client.is_available():
|
||||
return {}
|
||||
|
||||
try:
|
||||
counts = {}
|
||||
for hour in range(start_hour, end_hour + 1):
|
||||
key = self._get_hourly_key(event_type, hour)
|
||||
value = self.client.get(key)
|
||||
counts[hour] = int(value) if value else 0
|
||||
|
||||
return counts
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get hourly counts: {e}")
|
||||
return {}
|
||||
|
||||
def clear_queue(self) -> bool:
|
||||
"""Clear all events from the queue."""
|
||||
if not self.client.is_available():
|
||||
return False
|
||||
|
||||
try:
|
||||
self.client.client.delete(self._get_queue_key())
|
||||
self.client.client.delete(self._get_processing_key())
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
# Helper functions for common events
|
||||
def track_played(
|
||||
userid: int,
|
||||
trackhash: str,
|
||||
duration: int,
|
||||
source: str,
|
||||
) -> AnalyticsEvent:
|
||||
"""Create a track play event."""
|
||||
return AnalyticsEvent(
|
||||
event_type=EventType.TRACK_PLAY,
|
||||
timestamp=int(time.time()),
|
||||
userid=userid,
|
||||
data={
|
||||
"trackhash": trackhash,
|
||||
"duration": duration,
|
||||
"source": source,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def track_skipped(
|
||||
userid: int,
|
||||
trackhash: str,
|
||||
position: int,
|
||||
) -> AnalyticsEvent:
|
||||
"""Create a track skip event."""
|
||||
return AnalyticsEvent(
|
||||
event_type=EventType.TRACK_SKIP,
|
||||
timestamp=int(time.time()),
|
||||
userid=userid,
|
||||
data={
|
||||
"trackhash": trackhash,
|
||||
"position": position,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def favorite_toggled(
|
||||
userid: int,
|
||||
item_type: str,
|
||||
itemhash: str,
|
||||
added: bool,
|
||||
) -> AnalyticsEvent:
|
||||
"""Create a favorite toggle event."""
|
||||
return AnalyticsEvent(
|
||||
event_type=EventType.FAVORITE_ADD if added else EventType.FAVORITE_REMOVE,
|
||||
timestamp=int(time.time()),
|
||||
userid=userid,
|
||||
data={
|
||||
"item_type": item_type,
|
||||
"itemhash": itemhash,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def search_performed(
|
||||
userid: int,
|
||||
query: str,
|
||||
results_count: int,
|
||||
filters: dict[str, Any] | None = None,
|
||||
) -> AnalyticsEvent:
|
||||
"""Create a search event."""
|
||||
return AnalyticsEvent(
|
||||
event_type=EventType.SEARCH_QUERY,
|
||||
timestamp=int(time.time()),
|
||||
userid=userid,
|
||||
data={
|
||||
"query": query,
|
||||
"results_count": results_count,
|
||||
"filters": filters or {},
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
# Global instance
|
||||
analytics_queue = AnalyticsEventQueue()
|
||||
|
||||
|
||||
def get_analytics_queue() -> AnalyticsEventQueue:
|
||||
"""Get the global analytics queue instance."""
|
||||
return analytics_queue
|
||||
Reference in New Issue
Block a user