Files
swingmusic-extended/src/swingmusic/services/universal_url_parser.py
T
Tomas Dvorak 38f1981283 Move backend files to root level for cleaner GitHub display
- Move all backend files from swingmusic/ to root level
- Backend files now display directly on GitHub repository page
- Keep client applications as submodules (swingmusic-android, swingmusic-desktop, swingmusic-webclient)
- Update README to reflect new structure (no cd swingmusic needed)
- Cleaner, more professional GitHub repository layout

Files moved to root:
- src/ (main source code)
- pyproject.toml, requirements.txt, run.py
- swingmusic.spec, uv.lock, version.txt
- services/

Result: GitHub shows backend files directly while maintaining organized structure
2026-03-17 22:37:49 +01:00

376 lines
15 KiB
Python

"""
Universal Music URL Parser for SwingMusic
Supports multiple music streaming services for universal downloading
"""
import re
from enum import Enum
from typing import Dict, List, Optional, Tuple, Any
from dataclasses import dataclass
class MusicService(Enum):
SPOTIFY = "spotify"
TIDAL = "tidal"
APPLE_MUSIC = "apple_music"
YOUTUBE_MUSIC = "youtube_music"
YOUTUBE = "youtube"
SOUNDCLOUD = "soundcloud"
DEEZER = "deezer"
BANDCAMP = "bandcamp"
MUSICBRAINZ = "musicbrainz"
DISCOGS = "discogs"
@dataclass
class ParsedURL:
"""Represents a parsed music service URL"""
service: MusicService
url: str
item_type: str # track, album, playlist, artist, etc.
id: str
metadata: Dict[str, Any] = None
class UniversalMusicURLParser:
"""Universal parser for music service URLs"""
def __init__(self):
self.patterns = {
MusicService.SPOTIFY: [
r'https://open\.spotify\.com/(track|album|playlist|artist|user)/([a-zA-Z0-9]+)',
r'https://spotify\.link/([a-zA-Z0-9]+)', # Short links
],
MusicService.TIDAL: [
r'https://tidal\.com/(browse|track|album|playlist|artist)/(\d+)',
r'https://tidal\.com/browse/(album|track|playlist|artist)/(\d+)',
r'https://listen\.tidal\.com/(browse|track|album|playlist|artist)/(\d+)',
],
MusicService.APPLE_MUSIC: [
r'https://music\.apple\.com/([a-z]{2})/song/([^/]+)/(\d+)',
r'https://music\.apple\.com/([a-z]{2})/album/(.*?)/(\d+)',
r'https://music\.apple\.com/([a-z]{2})/playlist/(.*?)/pl\.(.+)',
r'https://music\.apple\.com/([a-z]{2})/artist/(.*?)/(\d+)',
],
MusicService.YOUTUBE_MUSIC: [
r'https://music\.youtube\.com/(watch|playlist|channel)(\?[^#]*)',
r'https://youtube\.com/music/(watch|playlist|channel)(\?[^#]*)',
],
MusicService.YOUTUBE: [
r'https://www\.youtube\.com/watch\?v=([a-zA-Z0-9_-]+)',
r'https://youtu\.be/([a-zA-Z0-9_-]+)',
r'https://www\.youtube\.com/playlist\?list=([a-zA-Z0-9_-]+)',
r'https://www\.youtube\.com/channel/([a-zA-Z0-9_-]+)',
r'https://www\.youtube\.com/c/([a-zA-Z0-9_-]+)',
],
MusicService.SOUNDCLOUD: [
r'https://soundcloud\.com/([^/]+)/([^/]+)',
r'https://soundcloud\.com/([^/]+)/sets/([^/]+)',
],
MusicService.DEEZER: [
r'https://www\.deezer\.com/(en|fr|de|es|it|pt|nl|ru|ja)/(track|album|playlist|artist)/(\d+)',
r'https://deezer\.page\.link/(track|album|playlist|artist)/(\d+)',
r'https://link\.deezer\.com/s/([a-zA-Z0-9_-]+)',
],
MusicService.BANDCAMP: [
r'https://([a-zA-Z0-9-]+)\.bandcamp\.com/(track|album)/(.+)',
r'https://bandcamp\.com/search\?q=(.+)',
],
MusicService.MUSICBRAINZ: [
r'https://musicbrainz\.org/(recording|release|release-group|artist)/([a-f0-9-]+)',
r'https://musicbrainz\.org/doc/([a-f0-9-]+)', # API docs
r'https://musicbrainz\.org/artist/([a-f0-9-]+)', # Direct artist links
r'https://musicbrainz\.org/release-group/([a-f0-9-]+)', # Release groups
r'https://musicbrainz\.org/label/([a-f0-9-]+)', # Record labels
r'https://musicbrainz\.org/search\?query=([^&]+)', # Search queries
],
MusicService.DISCOGS: [
r'https://www\.discogs\.com/(release|master|artist)/(\d+)',
]
}
def parse_url(self, url: str) -> Optional[ParsedURL]:
"""
Parse a music service URL and extract service, type, and ID
Args:
url: The URL to parse
Returns:
ParsedURL object if successful, None otherwise
"""
if not url or not isinstance(url, str):
return None
url = url.strip()
# Try each service pattern
for service, patterns in self.patterns.items():
for pattern in patterns:
match = re.match(pattern, url, re.IGNORECASE)
if match:
return self._extract_service_info(service, match, url)
return None
def _extract_service_info(self, service: MusicService, match: re.Match, url: str) -> ParsedURL:
"""Extract service-specific information from regex match"""
groups = match.groups()
if service == MusicService.SPOTIFY:
if len(groups) == 2:
item_type, item_id = groups
return ParsedURL(service, url, item_type, item_id)
elif len(groups) == 1: # Short link
# Would need to resolve short link
return ParsedURL(service, url, 'short', groups[0])
elif service == MusicService.TIDAL:
item_type, item_id = groups
return ParsedURL(service, url, item_type, item_id)
elif service == MusicService.APPLE_MUSIC:
if len(groups) >= 2:
item_type = self._map_apple_music_type(groups[0])
item_id = groups[-1] # Last group is usually the ID
return ParsedURL(service, url, item_type, item_id, {
'region': groups[0] if len(groups) > 2 else 'us',
'name': groups[1] if len(groups) > 2 else ''
})
elif service == MusicService.YOUTUBE_MUSIC:
item_type = self._extract_youtube_type(groups[0], groups[1])
item_id = self._extract_youtube_id(groups[1])
return ParsedURL(service, url, item_type, item_id)
elif service == MusicService.YOUTUBE:
if 'watch' in url:
video_id = self._extract_youtube_id(url)
return ParsedURL(service, url, 'video', video_id)
elif 'playlist' in url:
playlist_id = self._extract_youtube_playlist_id(url)
return ParsedURL(service, url, 'playlist', playlist_id)
elif 'channel' in url or '/c/' in url:
channel_id = self._extract_youtube_channel_id(url)
return ParsedURL(service, url, 'channel', channel_id)
elif service == MusicService.SOUNDCLOUD:
if len(groups) == 2:
if groups[1] == 'sets':
item_type = 'playlist'
else:
item_type = 'track' if groups[1] else 'artist'
item_id = f"{groups[0]}/{groups[1]}"
return ParsedURL(service, url, item_type, item_id)
elif service == MusicService.DEEZER:
if len(groups) == 2:
item_type, item_id = groups
else:
# Short link format: link.deezer.com/s/ID
item_type = 'track' # Default to track for short links
item_id = groups[0] if groups else ''
return ParsedURL(service, url, item_type, item_id)
elif service == MusicService.BANDCAMP:
if len(groups) == 3:
item_type, item_name = groups[1], groups[2]
item_id = f"{groups[0]}/{item_type}/{item_name}"
return ParsedURL(service, url, item_type, item_id)
elif service == MusicService.MUSICBRAINZ:
if len(groups) == 2:
item_type, item_id = groups
elif len(groups) == 1:
# Handle special cases like doc/, artist/, etc.
url_path = url.split('/')[-2] if '/' in url else ''
if 'doc/' in url:
item_type = 'doc'
elif 'artist/' in url:
item_type = 'artist'
elif 'label/' in url:
item_type = 'label'
elif 'search' in url:
item_type = 'search'
# Extract query from search URL
query_match = re.search(r'query=([^&]+)', url)
item_id = query_match.group(1) if query_match else groups[0]
else:
item_type = groups[0] if groups else 'unknown'
item_id = groups[0] if groups else ''
return ParsedURL(service, url, item_type, item_id)
elif service == MusicService.DISCOGS:
item_type, item_id = groups
return ParsedURL(service, url, item_type, item_id)
return ParsedURL(service, url, 'unknown', '')
def _map_apple_music_type(self, type_str: str) -> str:
"""Map Apple Music URL types to standard types"""
mapping = {
'album': 'album',
'playlist': 'playlist',
'artist': 'artist',
'song': 'song'
}
return mapping.get(type_str, 'unknown')
def _extract_youtube_type(self, path: str, query: str) -> str:
"""Extract YouTube content type from URL"""
if 'watch' in path or 'v=' in query:
return 'watch'
elif 'playlist' in path or 'list=' in query:
return 'playlist'
elif 'channel' in path:
return 'channel'
return 'unknown'
def _extract_youtube_id(self, url: str) -> str:
"""Extract YouTube video or channel ID from URL"""
# Video ID
video_match = re.search(r'[?&]v=([a-zA-Z0-9_-]+)', url)
if video_match:
return video_match.group(1)
# Short URL
short_match = re.search(r'youtu\.be/([a-zA-Z0-9_-]+)', url)
if short_match:
return short_match.group(1)
# Channel ID
channel_match = re.search(r'channel/([a-zA-Z0-9_-]+)', url)
if channel_match:
return channel_match.group(1)
# Custom channel
custom_match = re.search(r'/c/([a-zA-Z0-9_-]+)', url)
if custom_match:
return custom_match.group(1)
return ''
def _extract_youtube_playlist_id(self, url: str) -> str:
"""Extract YouTube playlist ID from URL"""
match = re.search(r'[?&]list=([a-zA-Z0-9_-]+)', url)
return match.group(1) if match else ''
def _extract_youtube_channel_id(self, url: str) -> str:
"""Extract YouTube channel ID from URL"""
# Handle both /channel/ and /c/ formats
channel_match = re.search(r'/(channel|c)/([a-zA-Z0-9_-]+)', url)
return channel_match.group(2) if channel_match else ''
def get_supported_services(self) -> List[Dict[str, Any]]:
"""Get list of supported services with their info"""
return [
{
'id': MusicService.SPOTIFY.value,
'name': 'Spotify',
'url_patterns': self.patterns[MusicService.SPOTIFY],
'supported_types': ['track', 'album', 'playlist', 'artist'],
'features': ['metadata', 'download', 'playlist']
},
{
'id': MusicService.TIDAL.value,
'name': 'Tidal',
'url_patterns': self.patterns[MusicService.TIDAL],
'supported_types': ['track', 'album', 'playlist', 'artist'],
'features': ['metadata', 'download', 'playlist']
},
{
'id': MusicService.APPLE_MUSIC.value,
'name': 'Apple Music',
'url_patterns': self.patterns[MusicService.APPLE_MUSIC],
'supported_types': ['track', 'album', 'playlist', 'artist'],
'features': ['metadata', 'download', 'playlist']
},
{
'id': MusicService.YOUTUBE_MUSIC.value,
'name': 'YouTube Music',
'url_patterns': self.patterns[MusicService.YOUTUBE_MUSIC],
'supported_types': ['video', 'playlist', 'channel'],
'features': ['metadata', 'download']
},
{
'id': MusicService.YOUTUBE.value,
'name': 'YouTube',
'url_patterns': self.patterns[MusicService.YOUTUBE],
'supported_types': ['video', 'playlist', 'channel'],
'features': ['metadata', 'download']
},
{
'id': MusicService.SOUNDCLOUD.value,
'name': 'SoundCloud',
'url_patterns': self.patterns[MusicService.SOUNDCLOUD],
'supported_types': ['track', 'playlist', 'artist'],
'features': ['metadata', 'download']
},
{
'id': MusicService.DEEZER.value,
'name': 'Deezer',
'url_patterns': self.patterns[MusicService.DEEZER],
'supported_types': ['track', 'album', 'playlist', 'artist'],
'features': ['metadata', 'download', 'playlist']
},
{
'id': MusicService.BANDCAMP.value,
'name': 'Bandcamp',
'url_patterns': self.patterns[MusicService.BANDCAMP],
'supported_types': ['track', 'album'],
'features': ['metadata', 'download']
},
{
'id': MusicService.MUSICBRAINZ.value,
'name': 'MusicBrainz',
'url_patterns': self.patterns[MusicService.MUSICBRAINZ],
'supported_types': ['recording', 'release', 'artist'],
'features': ['metadata']
},
{
'id': MusicService.DISCOGS.value,
'name': 'Discogs',
'url_patterns': self.patterns[MusicService.DISCOGS],
'supported_types': ['release', 'artist'],
'features': ['metadata']
}
]
def validate_url(self, url: str) -> bool:
"""Validate if URL is from a supported service"""
return self.parse_url(url) is not None
def get_service_from_url(self, url: str) -> Optional[MusicService]:
"""Get service type from URL without full parsing"""
if not url:
return None
url_lower = url.lower()
if 'spotify.com' in url_lower or 'spotify.link' in url_lower:
return MusicService.SPOTIFY
elif 'tidal.com' in url_lower or 'listen.tidal.com' in url_lower:
return MusicService.TIDAL
elif 'music.apple.com' in url_lower:
return MusicService.APPLE_MUSIC
elif 'music.youtube.com' in url_lower:
return MusicService.YOUTUBE_MUSIC
elif 'youtube.com' in url_lower or 'youtu.be' in url_lower:
return MusicService.YOUTUBE
elif 'soundcloud.com' in url_lower:
return MusicService.SOUNDCLOUD
elif 'deezer.com' in url_lower or 'deezer.page.link' in url_lower:
return MusicService.DEEZER
elif 'bandcamp.com' in url_lower:
return MusicService.BANDCAMP
elif 'musicbrainz.org' in url_lower:
return MusicService.MUSICBRAINZ
elif 'discogs.com' in url_lower:
return MusicService.DISCOGS
return None
# Global instance
universal_url_parser = UniversalMusicURLParser()