first commit

This commit is contained in:
Tomas Dvorak
2026-04-13 17:46:58 +02:00
commit 6e8fedf534
234 changed files with 53808 additions and 0 deletions
+384
View File
@@ -0,0 +1,384 @@
"""
Universal Music URL Parser for SwingMusic
Supports multiple music streaming services for universal downloading
"""
import re
from dataclasses import dataclass
from enum import Enum
from typing import Any
class MusicService(Enum):
SPOTIFY = "spotify"
TIDAL = "tidal"
APPLE_MUSIC = "apple_music"
YOUTUBE_MUSIC = "youtube_music"
YOUTUBE = "youtube"
SOUNDCLOUD = "soundcloud"
DEEZER = "deezer"
BANDCAMP = "bandcamp"
MUSICBRAINZ = "musicbrainz"
DISCOGS = "discogs"
@dataclass
class ParsedURL:
"""Represents a parsed music service URL"""
service: MusicService
url: str
item_type: str # track, album, playlist, artist, etc.
id: str
metadata: dict[str, Any] = None
class UniversalMusicURLParser:
"""Universal parser for music service URLs"""
def __init__(self):
self.patterns = {
MusicService.SPOTIFY: [
r"https://open\.spotify\.com/(track|album|playlist|artist|user)/([a-zA-Z0-9]+)",
r"https://spotify\.link/([a-zA-Z0-9]+)", # Short links
],
MusicService.TIDAL: [
r"https://tidal\.com/(browse|track|album|playlist|artist)/(\d+)",
r"https://tidal\.com/browse/(album|track|playlist|artist)/(\d+)",
r"https://listen\.tidal\.com/(browse|track|album|playlist|artist)/(\d+)",
],
MusicService.APPLE_MUSIC: [
r"https://music\.apple\.com/([a-z]{2})/song/([^/]+)/(\d+)",
r"https://music\.apple\.com/([a-z]{2})/album/(.*?)/(\d+)",
r"https://music\.apple\.com/([a-z]{2})/playlist/(.*?)/pl\.(.+)",
r"https://music\.apple\.com/([a-z]{2})/artist/(.*?)/(\d+)",
],
MusicService.YOUTUBE_MUSIC: [
r"https://music\.youtube\.com/(watch|playlist|channel)(\?[^#]*)",
r"https://youtube\.com/music/(watch|playlist|channel)(\?[^#]*)",
],
MusicService.YOUTUBE: [
r"https://www\.youtube\.com/watch\?v=([a-zA-Z0-9_-]+)",
r"https://youtu\.be/([a-zA-Z0-9_-]+)",
r"https://www\.youtube\.com/playlist\?list=([a-zA-Z0-9_-]+)",
r"https://www\.youtube\.com/channel/([a-zA-Z0-9_-]+)",
r"https://www\.youtube\.com/c/([a-zA-Z0-9_-]+)",
],
MusicService.SOUNDCLOUD: [
r"https://soundcloud\.com/([^/]+)/([^/]+)",
r"https://soundcloud\.com/([^/]+)/sets/([^/]+)",
],
MusicService.DEEZER: [
r"https://www\.deezer\.com/(en|fr|de|es|it|pt|nl|ru|ja)/(track|album|playlist|artist)/(\d+)",
r"https://deezer\.page\.link/(track|album|playlist|artist)/(\d+)",
r"https://link\.deezer\.com/s/([a-zA-Z0-9_-]+)",
],
MusicService.BANDCAMP: [
r"https://([a-zA-Z0-9-]+)\.bandcamp\.com/(track|album)/(.+)",
r"https://bandcamp\.com/search\?q=(.+)",
],
MusicService.MUSICBRAINZ: [
r"https://musicbrainz\.org/(recording|release|release-group|artist)/([a-f0-9-]+)",
r"https://musicbrainz\.org/doc/([a-f0-9-]+)", # API docs
r"https://musicbrainz\.org/artist/([a-f0-9-]+)", # Direct artist links
r"https://musicbrainz\.org/release-group/([a-f0-9-]+)", # Release groups
r"https://musicbrainz\.org/label/([a-f0-9-]+)", # Record labels
r"https://musicbrainz\.org/search\?query=([^&]+)", # Search queries
],
MusicService.DISCOGS: [
r"https://www\.discogs\.com/(release|master|artist)/(\d+)",
],
}
def parse_url(self, url: str) -> ParsedURL | None:
"""
Parse a music service URL and extract service, type, and ID
Args:
url: The URL to parse
Returns:
ParsedURL object if successful, None otherwise
"""
if not url or not isinstance(url, str):
return None
url = url.strip()
# Try each service pattern
for service, patterns in self.patterns.items():
for pattern in patterns:
match = re.match(pattern, url, re.IGNORECASE)
if match:
return self._extract_service_info(service, match, url)
return None
def _extract_service_info(
self, service: MusicService, match: re.Match, url: str
) -> ParsedURL:
"""Extract service-specific information from regex match"""
groups = match.groups()
if service == MusicService.SPOTIFY:
if len(groups) == 2:
item_type, item_id = groups
return ParsedURL(service, url, item_type, item_id)
elif len(groups) == 1: # Short link
# Would need to resolve short link
return ParsedURL(service, url, "short", groups[0])
elif service == MusicService.TIDAL:
item_type, item_id = groups
return ParsedURL(service, url, item_type, item_id)
elif service == MusicService.APPLE_MUSIC:
if len(groups) >= 2:
item_type = self._map_apple_music_type(groups[0])
item_id = groups[-1] # Last group is usually the ID
return ParsedURL(
service,
url,
item_type,
item_id,
{
"region": groups[0] if len(groups) > 2 else "us",
"name": groups[1] if len(groups) > 2 else "",
},
)
elif service == MusicService.YOUTUBE_MUSIC:
item_type = self._extract_youtube_type(groups[0], groups[1])
item_id = self._extract_youtube_id(groups[1])
return ParsedURL(service, url, item_type, item_id)
elif service == MusicService.YOUTUBE:
if "watch" in url:
video_id = self._extract_youtube_id(url)
return ParsedURL(service, url, "video", video_id)
elif "playlist" in url:
playlist_id = self._extract_youtube_playlist_id(url)
return ParsedURL(service, url, "playlist", playlist_id)
elif "channel" in url or "/c/" in url:
channel_id = self._extract_youtube_channel_id(url)
return ParsedURL(service, url, "channel", channel_id)
elif service == MusicService.SOUNDCLOUD:
if len(groups) == 2:
if groups[1] == "sets":
item_type = "playlist"
else:
item_type = "track" if groups[1] else "artist"
item_id = f"{groups[0]}/{groups[1]}"
return ParsedURL(service, url, item_type, item_id)
elif service == MusicService.DEEZER:
if len(groups) == 2:
item_type, item_id = groups
else:
# Short link format: link.deezer.com/s/ID
item_type = "track" # Default to track for short links
item_id = groups[0] if groups else ""
return ParsedURL(service, url, item_type, item_id)
elif service == MusicService.BANDCAMP:
if len(groups) == 3:
item_type, item_name = groups[1], groups[2]
item_id = f"{groups[0]}/{item_type}/{item_name}"
return ParsedURL(service, url, item_type, item_id)
elif service == MusicService.MUSICBRAINZ:
if len(groups) == 2:
item_type, item_id = groups
elif len(groups) == 1:
# Handle special cases like doc/, artist/, etc.
url.split("/")[-2] if "/" in url else ""
if "doc/" in url:
item_type = "doc"
elif "artist/" in url:
item_type = "artist"
elif "label/" in url:
item_type = "label"
elif "search" in url:
item_type = "search"
# Extract query from search URL
query_match = re.search(r"query=([^&]+)", url)
item_id = query_match.group(1) if query_match else groups[0]
else:
item_type = groups[0] if groups else "unknown"
item_id = groups[0] if groups else ""
return ParsedURL(service, url, item_type, item_id)
elif service == MusicService.DISCOGS:
item_type, item_id = groups
return ParsedURL(service, url, item_type, item_id)
return ParsedURL(service, url, "unknown", "")
def _map_apple_music_type(self, type_str: str) -> str:
"""Map Apple Music URL types to standard types"""
mapping = {
"album": "album",
"playlist": "playlist",
"artist": "artist",
"song": "song",
}
return mapping.get(type_str, "unknown")
def _extract_youtube_type(self, path: str, query: str) -> str:
"""Extract YouTube content type from URL"""
if "watch" in path or "v=" in query:
return "watch"
elif "playlist" in path or "list=" in query:
return "playlist"
elif "channel" in path:
return "channel"
return "unknown"
def _extract_youtube_id(self, url: str) -> str:
"""Extract YouTube video or channel ID from URL"""
# Video ID
video_match = re.search(r"[?&]v=([a-zA-Z0-9_-]+)", url)
if video_match:
return video_match.group(1)
# Short URL
short_match = re.search(r"youtu\.be/([a-zA-Z0-9_-]+)", url)
if short_match:
return short_match.group(1)
# Channel ID
channel_match = re.search(r"channel/([a-zA-Z0-9_-]+)", url)
if channel_match:
return channel_match.group(1)
# Custom channel
custom_match = re.search(r"/c/([a-zA-Z0-9_-]+)", url)
if custom_match:
return custom_match.group(1)
return ""
def _extract_youtube_playlist_id(self, url: str) -> str:
"""Extract YouTube playlist ID from URL"""
match = re.search(r"[?&]list=([a-zA-Z0-9_-]+)", url)
return match.group(1) if match else ""
def _extract_youtube_channel_id(self, url: str) -> str:
"""Extract YouTube channel ID from URL"""
# Handle both /channel/ and /c/ formats
channel_match = re.search(r"/(channel|c)/([a-zA-Z0-9_-]+)", url)
return channel_match.group(2) if channel_match else ""
def get_supported_services(self) -> list[dict[str, Any]]:
"""Get list of supported services with their info"""
return [
{
"id": MusicService.SPOTIFY.value,
"name": "Spotify",
"url_patterns": self.patterns[MusicService.SPOTIFY],
"supported_types": ["track", "album", "playlist", "artist"],
"features": ["metadata", "download", "playlist"],
},
{
"id": MusicService.TIDAL.value,
"name": "Tidal",
"url_patterns": self.patterns[MusicService.TIDAL],
"supported_types": ["track", "album", "playlist", "artist"],
"features": ["metadata", "download", "playlist"],
},
{
"id": MusicService.APPLE_MUSIC.value,
"name": "Apple Music",
"url_patterns": self.patterns[MusicService.APPLE_MUSIC],
"supported_types": ["track", "album", "playlist", "artist"],
"features": ["metadata", "download", "playlist"],
},
{
"id": MusicService.YOUTUBE_MUSIC.value,
"name": "YouTube Music",
"url_patterns": self.patterns[MusicService.YOUTUBE_MUSIC],
"supported_types": ["video", "playlist", "channel"],
"features": ["metadata", "download"],
},
{
"id": MusicService.YOUTUBE.value,
"name": "YouTube",
"url_patterns": self.patterns[MusicService.YOUTUBE],
"supported_types": ["video", "playlist", "channel"],
"features": ["metadata", "download"],
},
{
"id": MusicService.SOUNDCLOUD.value,
"name": "SoundCloud",
"url_patterns": self.patterns[MusicService.SOUNDCLOUD],
"supported_types": ["track", "playlist", "artist"],
"features": ["metadata", "download"],
},
{
"id": MusicService.DEEZER.value,
"name": "Deezer",
"url_patterns": self.patterns[MusicService.DEEZER],
"supported_types": ["track", "album", "playlist", "artist"],
"features": ["metadata", "download", "playlist"],
},
{
"id": MusicService.BANDCAMP.value,
"name": "Bandcamp",
"url_patterns": self.patterns[MusicService.BANDCAMP],
"supported_types": ["track", "album"],
"features": ["metadata", "download"],
},
{
"id": MusicService.MUSICBRAINZ.value,
"name": "MusicBrainz",
"url_patterns": self.patterns[MusicService.MUSICBRAINZ],
"supported_types": ["recording", "release", "artist"],
"features": ["metadata"],
},
{
"id": MusicService.DISCOGS.value,
"name": "Discogs",
"url_patterns": self.patterns[MusicService.DISCOGS],
"supported_types": ["release", "artist"],
"features": ["metadata"],
},
]
def validate_url(self, url: str) -> bool:
"""Validate if URL is from a supported service"""
return self.parse_url(url) is not None
def get_service_from_url(self, url: str) -> MusicService | None:
"""Get service type from URL without full parsing"""
if not url:
return None
url_lower = url.lower()
if "spotify.com" in url_lower or "spotify.link" in url_lower:
return MusicService.SPOTIFY
elif "tidal.com" in url_lower or "listen.tidal.com" in url_lower:
return MusicService.TIDAL
elif "music.apple.com" in url_lower:
return MusicService.APPLE_MUSIC
elif "music.youtube.com" in url_lower:
return MusicService.YOUTUBE_MUSIC
elif "youtube.com" in url_lower or "youtu.be" in url_lower:
return MusicService.YOUTUBE
elif "soundcloud.com" in url_lower:
return MusicService.SOUNDCLOUD
elif "deezer.com" in url_lower or "deezer.page.link" in url_lower:
return MusicService.DEEZER
elif "bandcamp.com" in url_lower:
return MusicService.BANDCAMP
elif "musicbrainz.org" in url_lower:
return MusicService.MUSICBRAINZ
elif "discogs.com" in url_lower:
return MusicService.DISCOGS
return None
# Global instance
universal_url_parser = UniversalMusicURLParser()