mirror of
https://github.com/Dvorinka/Trackeep.git
synced 2026-06-03 20:12:58 +00:00
517 lines
14 KiB
Go
517 lines
14 KiB
Go
package services
|
|
|
|
import (
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"net/url"
|
|
"regexp"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
)
|
|
|
|
// YouTubeIntegratedService provides all YouTube functionality in one service
|
|
type YouTubeIntegratedService struct {
|
|
httpClient *http.Client
|
|
}
|
|
|
|
// NewYouTubeIntegratedService creates a new integrated YouTube service
|
|
func NewYouTubeIntegratedService() *YouTubeIntegratedService {
|
|
return &YouTubeIntegratedService{
|
|
httpClient: &http.Client{
|
|
Timeout: 30 * time.Second,
|
|
},
|
|
}
|
|
}
|
|
|
|
// SearchVideosIntegrated performs YouTube video search
|
|
func (y *YouTubeIntegratedService) SearchVideosIntegrated(query string, limit int) ([]YouTubeSearchVideo, error) {
|
|
url := fmt.Sprintf(
|
|
"https://www.youtube.com/results?search_query=%s",
|
|
url.QueryEscape(query),
|
|
)
|
|
|
|
req, err := http.NewRequest("GET", url, nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36")
|
|
|
|
resp, err := y.httpClient.Do(req)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
body, err := io.ReadAll(resp.Body)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
html := string(body)
|
|
videoRe := regexp.MustCompile(`"videoRenderer":{"videoId":"([^"]{11})"`)
|
|
|
|
results := []YouTubeSearchVideo{}
|
|
seen := map[string]bool{}
|
|
|
|
videoMatches := videoRe.FindAllStringSubmatchIndex(html, -1)
|
|
|
|
for _, match := range videoMatches {
|
|
if len(results) >= limit {
|
|
break
|
|
}
|
|
|
|
if len(match) < 4 {
|
|
continue
|
|
}
|
|
|
|
videoID := html[match[2]:match[3]]
|
|
if _, ok := seen[videoID]; ok {
|
|
continue
|
|
}
|
|
seen[videoID] = true
|
|
|
|
// Extract title and channel from surrounding context
|
|
start := match[0]
|
|
if start-2000 > 0 {
|
|
start = start - 2000
|
|
}
|
|
end := match[1] + 2000
|
|
if end > len(html) {
|
|
end = len(html)
|
|
}
|
|
snippet := html[start:end]
|
|
|
|
title := ""
|
|
channel := ""
|
|
|
|
if m := regexp.MustCompile(`"title":\{"runs":\[\{"text":"([^"]+)"`).FindStringSubmatch(snippet); len(m) >= 2 {
|
|
title = unescapeYT(m[1])
|
|
} else if m := regexp.MustCompile(`"title":\{"simpleText":"([^"]+)"`).FindStringSubmatch(snippet); len(m) >= 2 {
|
|
title = unescapeYT(m[1])
|
|
}
|
|
|
|
if m := regexp.MustCompile(`"longBylineText":\{"runs":\[\{"text":"([^"]+)"`).FindStringSubmatch(snippet); len(m) >= 2 {
|
|
channel = unescapeYT(m[1])
|
|
}
|
|
|
|
if title == "" {
|
|
title = "Video " + videoID
|
|
}
|
|
|
|
results = append(results, YouTubeSearchVideo{
|
|
VideoID: videoID,
|
|
Title: title,
|
|
ChannelName: channel,
|
|
Thumbnail: fmt.Sprintf("https://img.youtube.com/vi/%s/maxresdefault.jpg", videoID),
|
|
})
|
|
}
|
|
|
|
return results, nil
|
|
}
|
|
|
|
// ChannelVideosResponse represents the response for channel videos scraping
|
|
type ChannelVideosResponse struct {
|
|
Channel string `json:"channel"`
|
|
ChannelURL string `json:"channel_url"`
|
|
SubscribersText string `json:"subscribers_text"`
|
|
Subscribers int64 `json:"subscribers"`
|
|
Videos []VideoItem `json:"videos"`
|
|
}
|
|
|
|
// GetChannelVideosIntegrated fetches channel videos directly
|
|
func (y *YouTubeIntegratedService) GetChannelVideosIntegrated(channelInput string) (ChannelVideosResponse, error) {
|
|
handle, channelURL := normalizeChannelInput(channelInput)
|
|
|
|
req, err := http.NewRequest("GET", channelURL, nil)
|
|
if err != nil {
|
|
return ChannelVideosResponse{}, err
|
|
}
|
|
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36")
|
|
req.Header.Set("Accept-Language", "en-US,en;q=0.9")
|
|
|
|
resp, err := y.httpClient.Do(req)
|
|
if err != nil {
|
|
return ChannelVideosResponse{}, err
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
return ChannelVideosResponse{}, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
|
|
}
|
|
|
|
body, err := io.ReadAll(resp.Body)
|
|
if err != nil {
|
|
return ChannelVideosResponse{}, err
|
|
}
|
|
html := string(body)
|
|
|
|
// Extract video IDs and metadata
|
|
vidRe := regexp.MustCompile(`"videoRenderer":\{[^}]*?"videoId":"([a-zA-Z0-9_-]{11})"`)
|
|
matches := vidRe.FindAllStringSubmatchIndex(html, -1)
|
|
seen := make(map[string]struct{})
|
|
var videos []VideoItem
|
|
|
|
for _, idx := range matches {
|
|
if len(idx) < 4 {
|
|
continue
|
|
}
|
|
|
|
videoID := html[idx[2]:idx[3]]
|
|
if _, ok := seen[videoID]; ok {
|
|
continue
|
|
}
|
|
seen[videoID] = struct{}{}
|
|
|
|
start := idx[0]
|
|
if start-2000 > 0 {
|
|
start = start - 2000
|
|
}
|
|
end := idx[1] + 8000
|
|
if end > len(html) {
|
|
end = len(html)
|
|
}
|
|
snippet := html[start:end]
|
|
|
|
vi := VideoItem{VideoID: videoID}
|
|
vi.ThumbnailURL = fmt.Sprintf("https://img.youtube.com/vi/%s/maxresdefault.jpg", videoID)
|
|
|
|
// Extract metadata
|
|
if m := regexp.MustCompile(`"title":\{"runs":\[\{"text":"([^"]+)"`).FindStringSubmatch(snippet); len(m) >= 2 {
|
|
vi.Title = unescapeYT(m[1])
|
|
} else if m := regexp.MustCompile(`"title":\{"simpleText":"([^"]+)"`).FindStringSubmatch(snippet); len(m) >= 2 {
|
|
vi.Title = unescapeYT(m[1])
|
|
}
|
|
|
|
if m := regexp.MustCompile(`"lengthText":\{[^}]*"simpleText":"([^"]+)"`).FindStringSubmatch(snippet); len(m) >= 2 {
|
|
vi.Length = m[1]
|
|
}
|
|
|
|
if m := regexp.MustCompile(`"publishedTimeText":\{"simpleText":"([^"]+)"`).FindStringSubmatch(snippet); len(m) >= 2 {
|
|
vi.PublishedText = m[1]
|
|
vi.PublishedDate = parseRelativeToISO(m[1])
|
|
}
|
|
|
|
if m := regexp.MustCompile(`"viewCountText":\{"simpleText":"([^"]+)"`).FindStringSubmatch(snippet); len(m) >= 2 {
|
|
vi.ViewsText = m[1]
|
|
vi.Views = parseCountText(m[1])
|
|
}
|
|
|
|
videos = append(videos, vi)
|
|
}
|
|
|
|
// Extract channel info
|
|
channelDisplay := handle
|
|
if m := regexp.MustCompile(`"canonicalBaseUrl":"\\/(@[^\"]+)"`).FindStringSubmatch(html); len(m) >= 2 {
|
|
channelDisplay = m[1]
|
|
}
|
|
|
|
subText := ""
|
|
if m := regexp.MustCompile(`"subscriberCountText":\{"simpleText":"([^"]+)"`).FindStringSubmatch(html); len(m) >= 2 {
|
|
subText = m[1]
|
|
}
|
|
subs := parseCountText(subText)
|
|
|
|
return ChannelVideosResponse{
|
|
Channel: channelDisplay,
|
|
ChannelURL: channelURL,
|
|
SubscribersText: subText,
|
|
Subscribers: subs,
|
|
Videos: videos,
|
|
}, nil
|
|
}
|
|
|
|
// IntegratedVideoInfo represents the extracted video information
|
|
type IntegratedVideoInfo struct {
|
|
VideoID string `json:"video_id"`
|
|
Title string `json:"title"`
|
|
Channel string `json:"channel"`
|
|
Thumbnail string `json:"thumbnail_url"`
|
|
Success bool `json:"success"`
|
|
Error string `json:"error,omitempty"`
|
|
}
|
|
|
|
// GetVideoDetailsIntegrated scrapes individual video details
|
|
func (y *YouTubeIntegratedService) GetVideoDetailsIntegrated(videoURL string) (IntegratedVideoInfo, error) {
|
|
videoID := extractVideoID(videoURL)
|
|
if videoID == "" {
|
|
return IntegratedVideoInfo{
|
|
Success: false,
|
|
Error: "Invalid YouTube URL",
|
|
}, nil
|
|
}
|
|
|
|
url := fmt.Sprintf("https://www.youtube.com/watch?v=%s", videoID)
|
|
req, err := http.NewRequest("GET", url, nil)
|
|
if err != nil {
|
|
return IntegratedVideoInfo{
|
|
Success: false,
|
|
Error: fmt.Sprintf("Failed to create request: %v", err),
|
|
}, nil
|
|
}
|
|
|
|
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
|
|
|
|
resp, err := y.httpClient.Do(req)
|
|
if err != nil {
|
|
return IntegratedVideoInfo{
|
|
Success: false,
|
|
Error: fmt.Sprintf("Failed to fetch page: %v", err),
|
|
}, nil
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode != 200 {
|
|
return IntegratedVideoInfo{
|
|
Success: false,
|
|
Error: fmt.Sprintf("HTTP %d", resp.StatusCode),
|
|
}, nil
|
|
}
|
|
|
|
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
|
if err != nil {
|
|
return IntegratedVideoInfo{
|
|
Success: false,
|
|
Error: fmt.Sprintf("Failed to parse HTML: %v", err),
|
|
}, nil
|
|
}
|
|
|
|
title := ""
|
|
channel := ""
|
|
|
|
doc.Find("title").Each(func(i int, s *goquery.Selection) {
|
|
if title == "" {
|
|
title = s.Text()
|
|
title = strings.TrimSuffix(title, " - YouTube")
|
|
}
|
|
})
|
|
|
|
doc.Find("a.yt-simple-endpoint.style-scope.yt-formatted-string").Each(func(i int, s *goquery.Selection) {
|
|
if channel == "" && strings.Contains(s.AttrOr("href", ""), "/@") {
|
|
channel = s.Text()
|
|
}
|
|
})
|
|
|
|
if title == "" {
|
|
title = "Video " + videoID
|
|
}
|
|
|
|
return IntegratedVideoInfo{
|
|
VideoID: videoID,
|
|
Title: title,
|
|
Channel: channel,
|
|
Thumbnail: fmt.Sprintf("https://img.youtube.com/vi/%s/maxresdefault.jpg", videoID),
|
|
Success: true,
|
|
}, nil
|
|
}
|
|
|
|
// Helper functions
|
|
|
|
func normalizeChannelInput(input string) (handle string, url string) {
|
|
in := strings.TrimSpace(input)
|
|
lower := strings.ToLower(in)
|
|
isURL := strings.HasPrefix(lower, "http://") || strings.HasPrefix(lower, "https://") || strings.HasPrefix(lower, "www.") || strings.HasPrefix(lower, "youtube.com/")
|
|
|
|
if isURL {
|
|
if strings.HasPrefix(lower, "www.") || strings.HasPrefix(lower, "youtube.com/") {
|
|
in = "https://" + strings.TrimPrefix(in, "www.")
|
|
if !strings.HasPrefix(strings.ToLower(in), "https://youtube.com/") && !strings.HasPrefix(strings.ToLower(in), "https://www.youtube.com/") {
|
|
in = "https://www." + strings.TrimPrefix(in, "https://")
|
|
}
|
|
}
|
|
in = strings.ReplaceAll(in, "m.youtube.com", "www.youtube.com")
|
|
|
|
reHandle := regexp.MustCompile(`https?://(www\.)?youtube\.com/(@[^/]+)`)
|
|
if m := reHandle.FindStringSubmatch(in); len(m) >= 3 {
|
|
handle = m[2]
|
|
} else {
|
|
rePath := regexp.MustCompile(`https?://(www\.)?youtube\.com/([^/?#]+)`)
|
|
if m2 := rePath.FindStringSubmatch(in); len(m2) >= 3 {
|
|
seg := m2[2]
|
|
if strings.HasPrefix(seg, "@") {
|
|
handle = seg
|
|
} else {
|
|
handle = "@" + seg
|
|
}
|
|
}
|
|
}
|
|
|
|
if strings.Contains(strings.ToLower(in), "/videos") || strings.Contains(strings.ToLower(in), "/shorts") || strings.Contains(strings.ToLower(in), "/streams") {
|
|
url = in
|
|
} else {
|
|
if handle == "" {
|
|
url = in
|
|
} else {
|
|
url = fmt.Sprintf("https://www.youtube.com/%s/videos", handle)
|
|
}
|
|
}
|
|
} else {
|
|
if strings.HasPrefix(in, "@") {
|
|
handle = in
|
|
} else {
|
|
handle = "@" + in
|
|
}
|
|
url = fmt.Sprintf("https://www.youtube.com/%s/videos", handle)
|
|
}
|
|
|
|
if handle == "" {
|
|
handle = in
|
|
if !strings.HasPrefix(handle, "@") {
|
|
handle = "@" + handle
|
|
}
|
|
}
|
|
return
|
|
}
|
|
|
|
func unescapeYT(s string) string {
|
|
s = strings.ReplaceAll(s, `\/`, `/`)
|
|
s = strings.ReplaceAll(s, `\u0026`, `&`)
|
|
return s
|
|
}
|
|
|
|
func parseRelativeToISO(rel string) string {
|
|
now := time.Now()
|
|
lower := strings.ToLower(rel)
|
|
re := regexp.MustCompile(`(\d+)[\s-]*(second|minute|hour|day|week|month|year)s?\s+ago`)
|
|
if m := re.FindStringSubmatch(lower); len(m) >= 3 {
|
|
n, _ := strconv.Atoi(m[1])
|
|
unit := m[2]
|
|
switch unit {
|
|
case "second":
|
|
return now.Add(-time.Duration(n) * time.Second).Format("2006-01-02")
|
|
case "minute":
|
|
return now.Add(-time.Duration(n) * time.Minute).Format("2006-01-02")
|
|
case "hour":
|
|
return now.Add(-time.Duration(n) * time.Hour).Format("2006-01-02")
|
|
case "day":
|
|
return now.AddDate(0, 0, -n).Format("2006-01-02")
|
|
case "week":
|
|
return now.AddDate(0, 0, -7*n).Format("2006-01-02")
|
|
case "month":
|
|
return now.AddDate(0, -n, 0).Format("2006-01-02")
|
|
case "year":
|
|
return now.AddDate(-n, 0, 0).Format("2006-01-02")
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
func parseCountText(s string) int64 {
|
|
t := strings.ToLower(strings.TrimSpace(s))
|
|
re := regexp.MustCompile(`([0-9]+(?:\.[0-9]+)?)([kmb])?`)
|
|
if m := re.FindStringSubmatch(t); len(m) >= 2 {
|
|
numStr := m[1]
|
|
suf := ""
|
|
if len(m) >= 3 {
|
|
suf = m[2]
|
|
}
|
|
f, err := strconv.ParseFloat(numStr, 64)
|
|
if err != nil {
|
|
return 0
|
|
}
|
|
switch suf {
|
|
case "k":
|
|
f *= 1_000
|
|
case "m":
|
|
f *= 1_000_000
|
|
case "b":
|
|
f *= 1_000_000_000
|
|
}
|
|
return int64(f)
|
|
}
|
|
digits := regexp.MustCompile(`[^0-9]`).ReplaceAllString(t, "")
|
|
if digits == "" {
|
|
return 0
|
|
}
|
|
v, _ := strconv.ParseInt(digits, 10, 64)
|
|
return v
|
|
}
|
|
|
|
func extractVideoID(url string) string {
|
|
if strings.Contains(url, "youtu.be/") {
|
|
parts := strings.Split(url, "youtu.be/")
|
|
if len(parts) > 1 {
|
|
return strings.Split(parts[1], "?")[0]
|
|
}
|
|
} else if strings.Contains(url, "youtube.com/watch") {
|
|
parts := strings.Split(url, "v=")
|
|
if len(parts) > 1 {
|
|
return strings.Split(parts[1], "&")[0]
|
|
}
|
|
} else if strings.Contains(url, "youtube.com/embed/") {
|
|
parts := strings.Split(url, "embed/")
|
|
if len(parts) > 1 {
|
|
return strings.Split(parts[1], "?")[0]
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
// Global integrated service instance
|
|
var integratedYouTubeService = NewYouTubeIntegratedService()
|
|
|
|
// Integrated service functions for backward compatibility
|
|
func SearchYouTubeVideosIntegrated(query string, maxResults int) (*YouTubeSearchResponse, error) {
|
|
// Always use real YouTube search - no more demo mode mock data
|
|
if maxResults <= 0 || maxResults > 9 {
|
|
maxResults = 9
|
|
}
|
|
|
|
videos, err := integratedYouTubeService.SearchVideosIntegrated(query, maxResults)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var ytVideos []YouTubeVideo
|
|
for _, video := range videos {
|
|
ytVideo := YouTubeVideo{
|
|
ID: video.VideoID,
|
|
Title: video.Title,
|
|
Thumbnail: video.Thumbnail,
|
|
ViewCount: 0,
|
|
PublishedAt: "",
|
|
ChannelTitle: video.ChannelName,
|
|
}
|
|
ytVideos = append(ytVideos, ytVideo)
|
|
}
|
|
|
|
return &YouTubeSearchResponse{
|
|
Videos: ytVideos,
|
|
TotalResults: len(ytVideos),
|
|
}, nil
|
|
}
|
|
|
|
func GetYouTubeChannelVideosIntegrated(channelID string, maxResults int) (*YouTubeSearchResponse, error) {
|
|
// Always use real YouTube channel service - no more demo mode mock data
|
|
response, err := integratedYouTubeService.GetChannelVideosIntegrated(channelID)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var videos []YouTubeVideo
|
|
for _, video := range response.Videos {
|
|
ytVideo := YouTubeVideo{
|
|
ID: video.VideoID,
|
|
Title: video.Title,
|
|
Thumbnail: video.ThumbnailURL,
|
|
Duration: video.Length,
|
|
ViewCount: video.Views,
|
|
PublishedAt: video.PublishedDate,
|
|
ChannelTitle: response.Channel,
|
|
}
|
|
videos = append(videos, ytVideo)
|
|
}
|
|
|
|
if len(videos) > maxResults {
|
|
videos = videos[:maxResults]
|
|
}
|
|
|
|
return &YouTubeSearchResponse{
|
|
Videos: videos,
|
|
TotalResults: len(videos),
|
|
}, nil
|
|
}
|