mirror of
https://github.com/Dvorinka/SpotifyRecAlg.git
synced 2026-06-03 20:13:03 +00:00
385 lines
15 KiB
Python
385 lines
15 KiB
Python
"""
|
|
Universal Music URL Parser for SwingMusic
|
|
Supports multiple music streaming services for universal downloading
|
|
"""
|
|
|
|
import re
|
|
from dataclasses import dataclass
|
|
from enum import Enum
|
|
from typing import Any
|
|
|
|
|
|
class MusicService(Enum):
|
|
SPOTIFY = "spotify"
|
|
TIDAL = "tidal"
|
|
APPLE_MUSIC = "apple_music"
|
|
YOUTUBE_MUSIC = "youtube_music"
|
|
YOUTUBE = "youtube"
|
|
SOUNDCLOUD = "soundcloud"
|
|
DEEZER = "deezer"
|
|
BANDCAMP = "bandcamp"
|
|
MUSICBRAINZ = "musicbrainz"
|
|
DISCOGS = "discogs"
|
|
|
|
|
|
@dataclass
|
|
class ParsedURL:
|
|
"""Represents a parsed music service URL"""
|
|
|
|
service: MusicService
|
|
url: str
|
|
item_type: str # track, album, playlist, artist, etc.
|
|
id: str
|
|
metadata: dict[str, Any] = None
|
|
|
|
|
|
class UniversalMusicURLParser:
|
|
"""Universal parser for music service URLs"""
|
|
|
|
def __init__(self):
|
|
self.patterns = {
|
|
MusicService.SPOTIFY: [
|
|
r"https://open\.spotify\.com/(track|album|playlist|artist|user)/([a-zA-Z0-9]+)",
|
|
r"https://spotify\.link/([a-zA-Z0-9]+)", # Short links
|
|
],
|
|
MusicService.TIDAL: [
|
|
r"https://tidal\.com/(browse|track|album|playlist|artist)/(\d+)",
|
|
r"https://tidal\.com/browse/(album|track|playlist|artist)/(\d+)",
|
|
r"https://listen\.tidal\.com/(browse|track|album|playlist|artist)/(\d+)",
|
|
],
|
|
MusicService.APPLE_MUSIC: [
|
|
r"https://music\.apple\.com/([a-z]{2})/song/([^/]+)/(\d+)",
|
|
r"https://music\.apple\.com/([a-z]{2})/album/(.*?)/(\d+)",
|
|
r"https://music\.apple\.com/([a-z]{2})/playlist/(.*?)/pl\.(.+)",
|
|
r"https://music\.apple\.com/([a-z]{2})/artist/(.*?)/(\d+)",
|
|
],
|
|
MusicService.YOUTUBE_MUSIC: [
|
|
r"https://music\.youtube\.com/(watch|playlist|channel)(\?[^#]*)",
|
|
r"https://youtube\.com/music/(watch|playlist|channel)(\?[^#]*)",
|
|
],
|
|
MusicService.YOUTUBE: [
|
|
r"https://www\.youtube\.com/watch\?v=([a-zA-Z0-9_-]+)",
|
|
r"https://youtu\.be/([a-zA-Z0-9_-]+)",
|
|
r"https://www\.youtube\.com/playlist\?list=([a-zA-Z0-9_-]+)",
|
|
r"https://www\.youtube\.com/channel/([a-zA-Z0-9_-]+)",
|
|
r"https://www\.youtube\.com/c/([a-zA-Z0-9_-]+)",
|
|
],
|
|
MusicService.SOUNDCLOUD: [
|
|
r"https://soundcloud\.com/([^/]+)/([^/]+)",
|
|
r"https://soundcloud\.com/([^/]+)/sets/([^/]+)",
|
|
],
|
|
MusicService.DEEZER: [
|
|
r"https://www\.deezer\.com/(en|fr|de|es|it|pt|nl|ru|ja)/(track|album|playlist|artist)/(\d+)",
|
|
r"https://deezer\.page\.link/(track|album|playlist|artist)/(\d+)",
|
|
r"https://link\.deezer\.com/s/([a-zA-Z0-9_-]+)",
|
|
],
|
|
MusicService.BANDCAMP: [
|
|
r"https://([a-zA-Z0-9-]+)\.bandcamp\.com/(track|album)/(.+)",
|
|
r"https://bandcamp\.com/search\?q=(.+)",
|
|
],
|
|
MusicService.MUSICBRAINZ: [
|
|
r"https://musicbrainz\.org/(recording|release|release-group|artist)/([a-f0-9-]+)",
|
|
r"https://musicbrainz\.org/doc/([a-f0-9-]+)", # API docs
|
|
r"https://musicbrainz\.org/artist/([a-f0-9-]+)", # Direct artist links
|
|
r"https://musicbrainz\.org/release-group/([a-f0-9-]+)", # Release groups
|
|
r"https://musicbrainz\.org/label/([a-f0-9-]+)", # Record labels
|
|
r"https://musicbrainz\.org/search\?query=([^&]+)", # Search queries
|
|
],
|
|
MusicService.DISCOGS: [
|
|
r"https://www\.discogs\.com/(release|master|artist)/(\d+)",
|
|
],
|
|
}
|
|
|
|
def parse_url(self, url: str) -> ParsedURL | None:
|
|
"""
|
|
Parse a music service URL and extract service, type, and ID
|
|
|
|
Args:
|
|
url: The URL to parse
|
|
|
|
Returns:
|
|
ParsedURL object if successful, None otherwise
|
|
"""
|
|
if not url or not isinstance(url, str):
|
|
return None
|
|
|
|
url = url.strip()
|
|
|
|
# Try each service pattern
|
|
for service, patterns in self.patterns.items():
|
|
for pattern in patterns:
|
|
match = re.match(pattern, url, re.IGNORECASE)
|
|
if match:
|
|
return self._extract_service_info(service, match, url)
|
|
|
|
return None
|
|
|
|
def _extract_service_info(
|
|
self, service: MusicService, match: re.Match, url: str
|
|
) -> ParsedURL:
|
|
"""Extract service-specific information from regex match"""
|
|
groups = match.groups()
|
|
|
|
if service == MusicService.SPOTIFY:
|
|
if len(groups) == 2:
|
|
item_type, item_id = groups
|
|
return ParsedURL(service, url, item_type, item_id)
|
|
elif len(groups) == 1: # Short link
|
|
# Would need to resolve short link
|
|
return ParsedURL(service, url, "short", groups[0])
|
|
|
|
elif service == MusicService.TIDAL:
|
|
item_type, item_id = groups
|
|
return ParsedURL(service, url, item_type, item_id)
|
|
|
|
elif service == MusicService.APPLE_MUSIC:
|
|
if len(groups) >= 2:
|
|
item_type = self._map_apple_music_type(groups[0])
|
|
item_id = groups[-1] # Last group is usually the ID
|
|
return ParsedURL(
|
|
service,
|
|
url,
|
|
item_type,
|
|
item_id,
|
|
{
|
|
"region": groups[0] if len(groups) > 2 else "us",
|
|
"name": groups[1] if len(groups) > 2 else "",
|
|
},
|
|
)
|
|
|
|
elif service == MusicService.YOUTUBE_MUSIC:
|
|
item_type = self._extract_youtube_type(groups[0], groups[1])
|
|
item_id = self._extract_youtube_id(groups[1])
|
|
return ParsedURL(service, url, item_type, item_id)
|
|
|
|
elif service == MusicService.YOUTUBE:
|
|
if "watch" in url:
|
|
video_id = self._extract_youtube_id(url)
|
|
return ParsedURL(service, url, "video", video_id)
|
|
elif "playlist" in url:
|
|
playlist_id = self._extract_youtube_playlist_id(url)
|
|
return ParsedURL(service, url, "playlist", playlist_id)
|
|
elif "channel" in url or "/c/" in url:
|
|
channel_id = self._extract_youtube_channel_id(url)
|
|
return ParsedURL(service, url, "channel", channel_id)
|
|
|
|
elif service == MusicService.SOUNDCLOUD:
|
|
if len(groups) == 2:
|
|
if groups[1] == "sets":
|
|
item_type = "playlist"
|
|
else:
|
|
item_type = "track" if groups[1] else "artist"
|
|
item_id = f"{groups[0]}/{groups[1]}"
|
|
return ParsedURL(service, url, item_type, item_id)
|
|
|
|
elif service == MusicService.DEEZER:
|
|
if len(groups) == 2:
|
|
item_type, item_id = groups
|
|
else:
|
|
# Short link format: link.deezer.com/s/ID
|
|
item_type = "track" # Default to track for short links
|
|
item_id = groups[0] if groups else ""
|
|
return ParsedURL(service, url, item_type, item_id)
|
|
|
|
elif service == MusicService.BANDCAMP:
|
|
if len(groups) == 3:
|
|
item_type, item_name = groups[1], groups[2]
|
|
item_id = f"{groups[0]}/{item_type}/{item_name}"
|
|
return ParsedURL(service, url, item_type, item_id)
|
|
|
|
elif service == MusicService.MUSICBRAINZ:
|
|
if len(groups) == 2:
|
|
item_type, item_id = groups
|
|
elif len(groups) == 1:
|
|
# Handle special cases like doc/, artist/, etc.
|
|
url.split("/")[-2] if "/" in url else ""
|
|
if "doc/" in url:
|
|
item_type = "doc"
|
|
elif "artist/" in url:
|
|
item_type = "artist"
|
|
elif "label/" in url:
|
|
item_type = "label"
|
|
elif "search" in url:
|
|
item_type = "search"
|
|
# Extract query from search URL
|
|
query_match = re.search(r"query=([^&]+)", url)
|
|
item_id = query_match.group(1) if query_match else groups[0]
|
|
else:
|
|
item_type = groups[0] if groups else "unknown"
|
|
item_id = groups[0] if groups else ""
|
|
return ParsedURL(service, url, item_type, item_id)
|
|
|
|
elif service == MusicService.DISCOGS:
|
|
item_type, item_id = groups
|
|
return ParsedURL(service, url, item_type, item_id)
|
|
|
|
return ParsedURL(service, url, "unknown", "")
|
|
|
|
def _map_apple_music_type(self, type_str: str) -> str:
|
|
"""Map Apple Music URL types to standard types"""
|
|
mapping = {
|
|
"album": "album",
|
|
"playlist": "playlist",
|
|
"artist": "artist",
|
|
"song": "song",
|
|
}
|
|
return mapping.get(type_str, "unknown")
|
|
|
|
def _extract_youtube_type(self, path: str, query: str) -> str:
|
|
"""Extract YouTube content type from URL"""
|
|
if "watch" in path or "v=" in query:
|
|
return "watch"
|
|
elif "playlist" in path or "list=" in query:
|
|
return "playlist"
|
|
elif "channel" in path:
|
|
return "channel"
|
|
return "unknown"
|
|
|
|
def _extract_youtube_id(self, url: str) -> str:
|
|
"""Extract YouTube video or channel ID from URL"""
|
|
# Video ID
|
|
video_match = re.search(r"[?&]v=([a-zA-Z0-9_-]+)", url)
|
|
if video_match:
|
|
return video_match.group(1)
|
|
|
|
# Short URL
|
|
short_match = re.search(r"youtu\.be/([a-zA-Z0-9_-]+)", url)
|
|
if short_match:
|
|
return short_match.group(1)
|
|
|
|
# Channel ID
|
|
channel_match = re.search(r"channel/([a-zA-Z0-9_-]+)", url)
|
|
if channel_match:
|
|
return channel_match.group(1)
|
|
|
|
# Custom channel
|
|
custom_match = re.search(r"/c/([a-zA-Z0-9_-]+)", url)
|
|
if custom_match:
|
|
return custom_match.group(1)
|
|
|
|
return ""
|
|
|
|
def _extract_youtube_playlist_id(self, url: str) -> str:
|
|
"""Extract YouTube playlist ID from URL"""
|
|
match = re.search(r"[?&]list=([a-zA-Z0-9_-]+)", url)
|
|
return match.group(1) if match else ""
|
|
|
|
def _extract_youtube_channel_id(self, url: str) -> str:
|
|
"""Extract YouTube channel ID from URL"""
|
|
# Handle both /channel/ and /c/ formats
|
|
channel_match = re.search(r"/(channel|c)/([a-zA-Z0-9_-]+)", url)
|
|
return channel_match.group(2) if channel_match else ""
|
|
|
|
def get_supported_services(self) -> list[dict[str, Any]]:
|
|
"""Get list of supported services with their info"""
|
|
return [
|
|
{
|
|
"id": MusicService.SPOTIFY.value,
|
|
"name": "Spotify",
|
|
"url_patterns": self.patterns[MusicService.SPOTIFY],
|
|
"supported_types": ["track", "album", "playlist", "artist"],
|
|
"features": ["metadata", "download", "playlist"],
|
|
},
|
|
{
|
|
"id": MusicService.TIDAL.value,
|
|
"name": "Tidal",
|
|
"url_patterns": self.patterns[MusicService.TIDAL],
|
|
"supported_types": ["track", "album", "playlist", "artist"],
|
|
"features": ["metadata", "download", "playlist"],
|
|
},
|
|
{
|
|
"id": MusicService.APPLE_MUSIC.value,
|
|
"name": "Apple Music",
|
|
"url_patterns": self.patterns[MusicService.APPLE_MUSIC],
|
|
"supported_types": ["track", "album", "playlist", "artist"],
|
|
"features": ["metadata", "download", "playlist"],
|
|
},
|
|
{
|
|
"id": MusicService.YOUTUBE_MUSIC.value,
|
|
"name": "YouTube Music",
|
|
"url_patterns": self.patterns[MusicService.YOUTUBE_MUSIC],
|
|
"supported_types": ["video", "playlist", "channel"],
|
|
"features": ["metadata", "download"],
|
|
},
|
|
{
|
|
"id": MusicService.YOUTUBE.value,
|
|
"name": "YouTube",
|
|
"url_patterns": self.patterns[MusicService.YOUTUBE],
|
|
"supported_types": ["video", "playlist", "channel"],
|
|
"features": ["metadata", "download"],
|
|
},
|
|
{
|
|
"id": MusicService.SOUNDCLOUD.value,
|
|
"name": "SoundCloud",
|
|
"url_patterns": self.patterns[MusicService.SOUNDCLOUD],
|
|
"supported_types": ["track", "playlist", "artist"],
|
|
"features": ["metadata", "download"],
|
|
},
|
|
{
|
|
"id": MusicService.DEEZER.value,
|
|
"name": "Deezer",
|
|
"url_patterns": self.patterns[MusicService.DEEZER],
|
|
"supported_types": ["track", "album", "playlist", "artist"],
|
|
"features": ["metadata", "download", "playlist"],
|
|
},
|
|
{
|
|
"id": MusicService.BANDCAMP.value,
|
|
"name": "Bandcamp",
|
|
"url_patterns": self.patterns[MusicService.BANDCAMP],
|
|
"supported_types": ["track", "album"],
|
|
"features": ["metadata", "download"],
|
|
},
|
|
{
|
|
"id": MusicService.MUSICBRAINZ.value,
|
|
"name": "MusicBrainz",
|
|
"url_patterns": self.patterns[MusicService.MUSICBRAINZ],
|
|
"supported_types": ["recording", "release", "artist"],
|
|
"features": ["metadata"],
|
|
},
|
|
{
|
|
"id": MusicService.DISCOGS.value,
|
|
"name": "Discogs",
|
|
"url_patterns": self.patterns[MusicService.DISCOGS],
|
|
"supported_types": ["release", "artist"],
|
|
"features": ["metadata"],
|
|
},
|
|
]
|
|
|
|
def validate_url(self, url: str) -> bool:
|
|
"""Validate if URL is from a supported service"""
|
|
return self.parse_url(url) is not None
|
|
|
|
def get_service_from_url(self, url: str) -> MusicService | None:
|
|
"""Get service type from URL without full parsing"""
|
|
if not url:
|
|
return None
|
|
|
|
url_lower = url.lower()
|
|
|
|
if "spotify.com" in url_lower or "spotify.link" in url_lower:
|
|
return MusicService.SPOTIFY
|
|
elif "tidal.com" in url_lower or "listen.tidal.com" in url_lower:
|
|
return MusicService.TIDAL
|
|
elif "music.apple.com" in url_lower:
|
|
return MusicService.APPLE_MUSIC
|
|
elif "music.youtube.com" in url_lower:
|
|
return MusicService.YOUTUBE_MUSIC
|
|
elif "youtube.com" in url_lower or "youtu.be" in url_lower:
|
|
return MusicService.YOUTUBE
|
|
elif "soundcloud.com" in url_lower:
|
|
return MusicService.SOUNDCLOUD
|
|
elif "deezer.com" in url_lower or "deezer.page.link" in url_lower:
|
|
return MusicService.DEEZER
|
|
elif "bandcamp.com" in url_lower:
|
|
return MusicService.BANDCAMP
|
|
elif "musicbrainz.org" in url_lower:
|
|
return MusicService.MUSICBRAINZ
|
|
elif "discogs.com" in url_lower:
|
|
return MusicService.DISCOGS
|
|
|
|
return None
|
|
|
|
|
|
# Global instance
|
|
universal_url_parser = UniversalMusicURLParser()
|