mirror of
https://github.com/Dvorinka/Trackeep.git
synced 2026-06-03 20:12:58 +00:00
229 lines
5.5 KiB
Go
229 lines
5.5 KiB
Go
package main
|
|
|
|
import (
|
|
"fmt"
|
|
"log"
|
|
"net/http"
|
|
"os"
|
|
"strings"
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
"github.com/gin-gonic/gin"
|
|
)
|
|
|
|
// VideoInfo represents the extracted video information
|
|
type VideoInfo struct {
|
|
VideoID string `json:"video_id"`
|
|
Title string `json:"title"`
|
|
Channel string `json:"channel"`
|
|
Thumbnail string `json:"thumbnail_url"`
|
|
Success bool `json:"success"`
|
|
Error string `json:"error,omitempty"`
|
|
}
|
|
|
|
// ExtractVideoID extracts video ID from YouTube URL
|
|
func ExtractVideoID(url string) string {
|
|
// Handle various YouTube URL formats
|
|
if strings.Contains(url, "youtu.be/") {
|
|
// https://youtu.be/VIDEO_ID
|
|
parts := strings.Split(url, "youtu.be/")
|
|
if len(parts) > 1 {
|
|
return strings.Split(parts[1], "?")[0]
|
|
}
|
|
} else if strings.Contains(url, "youtube.com/watch") {
|
|
// https://www.youtube.com/watch?v=VIDEO_ID
|
|
parts := strings.Split(url, "v=")
|
|
if len(parts) > 1 {
|
|
return strings.Split(parts[1], "&")[0]
|
|
}
|
|
} else if strings.Contains(url, "youtube.com/embed/") {
|
|
// https://www.youtube.com/embed/VIDEO_ID
|
|
parts := strings.Split(url, "embed/")
|
|
if len(parts) > 1 {
|
|
return strings.Split(parts[1], "?")[0]
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
// ScrapeVideoInfo scrapes video information from YouTube URL
|
|
func ScrapeVideoInfo(url string) VideoInfo {
|
|
videoID := ExtractVideoID(url)
|
|
fmt.Printf("Extracted video ID: '%s' from URL: %s\n", videoID, url)
|
|
|
|
if videoID == "" {
|
|
return VideoInfo{
|
|
Success: false,
|
|
Error: "Invalid YouTube URL",
|
|
}
|
|
}
|
|
|
|
// Create HTTP client with user agent
|
|
client := &http.Client{}
|
|
req, err := http.NewRequest("GET", url, nil)
|
|
if err != nil {
|
|
return VideoInfo{
|
|
Success: false,
|
|
Error: fmt.Sprintf("Failed to create request: %v", err),
|
|
}
|
|
}
|
|
|
|
// Set user agent to mimic browser
|
|
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
|
|
|
|
resp, err := client.Do(req)
|
|
if err != nil {
|
|
return VideoInfo{
|
|
Success: false,
|
|
Error: fmt.Sprintf("Failed to fetch page: %v", err),
|
|
}
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode != 200 {
|
|
return VideoInfo{
|
|
Success: false,
|
|
Error: fmt.Sprintf("HTTP %d: %s", resp.StatusCode, resp.Status),
|
|
}
|
|
}
|
|
|
|
// Parse HTML
|
|
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
|
if err != nil {
|
|
return VideoInfo{
|
|
Success: false,
|
|
Error: fmt.Sprintf("Failed to parse HTML: %v", err),
|
|
}
|
|
}
|
|
|
|
// Extract title
|
|
title := ""
|
|
doc.Find("h1.ytd-watch-metadata yt-formatted-string").Each(func(i int, s *goquery.Selection) {
|
|
title = strings.TrimSpace(s.Text())
|
|
})
|
|
|
|
// Fallback for title extraction - try multiple selectors
|
|
if title == "" {
|
|
doc.Find("h1").Each(func(i int, s *goquery.Selection) {
|
|
text := strings.TrimSpace(s.Text())
|
|
if text != "" && !strings.Contains(text, "YouTube") {
|
|
title = text
|
|
}
|
|
})
|
|
}
|
|
|
|
// Another fallback - try title tag
|
|
if title == "" {
|
|
doc.Find("title").Each(func(i int, s *goquery.Selection) {
|
|
text := strings.TrimSpace(s.Text())
|
|
if strings.Contains(text, " - YouTube") {
|
|
title = strings.Replace(text, " - YouTube", "", 1)
|
|
} else if !strings.Contains(text, "YouTube") {
|
|
title = text
|
|
}
|
|
})
|
|
}
|
|
|
|
// Extract channel name
|
|
channel := ""
|
|
doc.Find("ytd-video-owner-renderer yt-formatted-string a").Each(func(i int, s *goquery.Selection) {
|
|
channel = strings.TrimSpace(s.Text())
|
|
})
|
|
|
|
// Fallback for channel extraction - try multiple selectors
|
|
if channel == "" {
|
|
doc.Find("ytd-channel-name yt-formatted-string a").Each(func(i int, s *goquery.Selection) {
|
|
channel = strings.TrimSpace(s.Text())
|
|
})
|
|
}
|
|
|
|
// Another fallback - try channel link
|
|
if channel == "" {
|
|
doc.Find("a[href*='/@']").Each(func(i int, s *goquery.Selection) {
|
|
text := strings.TrimSpace(s.Text())
|
|
if text != "" && !strings.Contains(text, " ") {
|
|
channel = text
|
|
}
|
|
})
|
|
}
|
|
|
|
// Generate thumbnail URL
|
|
thumbnail := fmt.Sprintf("https://i.ytimg.com/vi/%s/hqdefault.jpg", videoID)
|
|
|
|
if title == "" || channel == "" {
|
|
return VideoInfo{
|
|
Success: false,
|
|
Error: "Could not extract title or channel",
|
|
}
|
|
}
|
|
|
|
return VideoInfo{
|
|
VideoID: videoID,
|
|
Title: title,
|
|
Channel: channel,
|
|
Thumbnail: thumbnail,
|
|
Success: true,
|
|
}
|
|
}
|
|
|
|
// VideoHandler handles video scraping requests
|
|
func VideoHandler(c *gin.Context) {
|
|
var request struct {
|
|
URL string `json:"url" binding:"required"`
|
|
}
|
|
|
|
if err := c.ShouldBindJSON(&request); err != nil {
|
|
fmt.Printf("Error binding request: %v\n", err)
|
|
c.JSON(http.StatusBadRequest, gin.H{
|
|
"success": false,
|
|
"error": "Invalid request format",
|
|
})
|
|
return
|
|
}
|
|
|
|
fmt.Printf("Received request for URL: %s\n", request.URL)
|
|
|
|
info := ScrapeVideoInfo(request.URL)
|
|
c.JSON(http.StatusOK, info)
|
|
}
|
|
|
|
func main() {
|
|
port := "7858"
|
|
if p := os.Getenv("PORT"); p != "" {
|
|
port = p
|
|
}
|
|
|
|
r := gin.Default()
|
|
|
|
// Enable CORS for all origins
|
|
r.Use(func(c *gin.Context) {
|
|
c.Header("Access-Control-Allow-Origin", "*")
|
|
c.Header("Access-Control-Allow-Methods", "GET, POST, PUT, DELETE, OPTIONS")
|
|
c.Header("Access-Control-Allow-Headers", "Content-Type, Authorization")
|
|
|
|
if c.Request.Method == "OPTIONS" {
|
|
c.AbortWithStatus(204)
|
|
return
|
|
}
|
|
|
|
c.Next()
|
|
})
|
|
|
|
// Routes
|
|
r.POST("/video", VideoHandler)
|
|
r.GET("/health", func(c *gin.Context) {
|
|
c.JSON(http.StatusOK, gin.H{"status": "healthy"})
|
|
})
|
|
r.GET("/test", func(c *gin.Context) {
|
|
testURL := "https://www.youtube.com/watch?v=WKXh4Z6SYMs"
|
|
videoID := ExtractVideoID(testURL)
|
|
c.JSON(http.StatusOK, gin.H{
|
|
"url": testURL,
|
|
"video_id": videoID,
|
|
})
|
|
})
|
|
|
|
log.Printf("YouTube video scraper starting on port %s", port)
|
|
log.Fatal(r.Run(":" + port))
|
|
}
|