mirror of
https://github.com/Dvorinka/Trackeep.git
synced 2026-06-04 20:42:59 +00:00
first test
This commit is contained in:
@@ -0,0 +1,228 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
// VideoInfo represents the extracted video information
|
||||
type VideoInfo struct {
|
||||
VideoID string `json:"video_id"`
|
||||
Title string `json:"title"`
|
||||
Channel string `json:"channel"`
|
||||
Thumbnail string `json:"thumbnail_url"`
|
||||
Success bool `json:"success"`
|
||||
Error string `json:"error,omitempty"`
|
||||
}
|
||||
|
||||
// ExtractVideoID extracts video ID from YouTube URL
|
||||
func ExtractVideoID(url string) string {
|
||||
// Handle various YouTube URL formats
|
||||
if strings.Contains(url, "youtu.be/") {
|
||||
// https://youtu.be/VIDEO_ID
|
||||
parts := strings.Split(url, "youtu.be/")
|
||||
if len(parts) > 1 {
|
||||
return strings.Split(parts[1], "?")[0]
|
||||
}
|
||||
} else if strings.Contains(url, "youtube.com/watch") {
|
||||
// https://www.youtube.com/watch?v=VIDEO_ID
|
||||
parts := strings.Split(url, "v=")
|
||||
if len(parts) > 1 {
|
||||
return strings.Split(parts[1], "&")[0]
|
||||
}
|
||||
} else if strings.Contains(url, "youtube.com/embed/") {
|
||||
// https://www.youtube.com/embed/VIDEO_ID
|
||||
parts := strings.Split(url, "embed/")
|
||||
if len(parts) > 1 {
|
||||
return strings.Split(parts[1], "?")[0]
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// ScrapeVideoInfo scrapes video information from YouTube URL
|
||||
func ScrapeVideoInfo(url string) VideoInfo {
|
||||
videoID := ExtractVideoID(url)
|
||||
fmt.Printf("Extracted video ID: '%s' from URL: %s\n", videoID, url)
|
||||
|
||||
if videoID == "" {
|
||||
return VideoInfo{
|
||||
Success: false,
|
||||
Error: "Invalid YouTube URL",
|
||||
}
|
||||
}
|
||||
|
||||
// Create HTTP client with user agent
|
||||
client := &http.Client{}
|
||||
req, err := http.NewRequest("GET", url, nil)
|
||||
if err != nil {
|
||||
return VideoInfo{
|
||||
Success: false,
|
||||
Error: fmt.Sprintf("Failed to create request: %v", err),
|
||||
}
|
||||
}
|
||||
|
||||
// Set user agent to mimic browser
|
||||
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return VideoInfo{
|
||||
Success: false,
|
||||
Error: fmt.Sprintf("Failed to fetch page: %v", err),
|
||||
}
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != 200 {
|
||||
return VideoInfo{
|
||||
Success: false,
|
||||
Error: fmt.Sprintf("HTTP %d: %s", resp.StatusCode, resp.Status),
|
||||
}
|
||||
}
|
||||
|
||||
// Parse HTML
|
||||
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
||||
if err != nil {
|
||||
return VideoInfo{
|
||||
Success: false,
|
||||
Error: fmt.Sprintf("Failed to parse HTML: %v", err),
|
||||
}
|
||||
}
|
||||
|
||||
// Extract title
|
||||
title := ""
|
||||
doc.Find("h1.ytd-watch-metadata yt-formatted-string").Each(func(i int, s *goquery.Selection) {
|
||||
title = strings.TrimSpace(s.Text())
|
||||
})
|
||||
|
||||
// Fallback for title extraction - try multiple selectors
|
||||
if title == "" {
|
||||
doc.Find("h1").Each(func(i int, s *goquery.Selection) {
|
||||
text := strings.TrimSpace(s.Text())
|
||||
if text != "" && !strings.Contains(text, "YouTube") {
|
||||
title = text
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// Another fallback - try title tag
|
||||
if title == "" {
|
||||
doc.Find("title").Each(func(i int, s *goquery.Selection) {
|
||||
text := strings.TrimSpace(s.Text())
|
||||
if strings.Contains(text, " - YouTube") {
|
||||
title = strings.Replace(text, " - YouTube", "", 1)
|
||||
} else if !strings.Contains(text, "YouTube") {
|
||||
title = text
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// Extract channel name
|
||||
channel := ""
|
||||
doc.Find("ytd-video-owner-renderer yt-formatted-string a").Each(func(i int, s *goquery.Selection) {
|
||||
channel = strings.TrimSpace(s.Text())
|
||||
})
|
||||
|
||||
// Fallback for channel extraction - try multiple selectors
|
||||
if channel == "" {
|
||||
doc.Find("ytd-channel-name yt-formatted-string a").Each(func(i int, s *goquery.Selection) {
|
||||
channel = strings.TrimSpace(s.Text())
|
||||
})
|
||||
}
|
||||
|
||||
// Another fallback - try channel link
|
||||
if channel == "" {
|
||||
doc.Find("a[href*='/@']").Each(func(i int, s *goquery.Selection) {
|
||||
text := strings.TrimSpace(s.Text())
|
||||
if text != "" && !strings.Contains(text, " ") {
|
||||
channel = text
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// Generate thumbnail URL
|
||||
thumbnail := fmt.Sprintf("https://i.ytimg.com/vi/%s/hqdefault.jpg", videoID)
|
||||
|
||||
if title == "" || channel == "" {
|
||||
return VideoInfo{
|
||||
Success: false,
|
||||
Error: "Could not extract title or channel",
|
||||
}
|
||||
}
|
||||
|
||||
return VideoInfo{
|
||||
VideoID: videoID,
|
||||
Title: title,
|
||||
Channel: channel,
|
||||
Thumbnail: thumbnail,
|
||||
Success: true,
|
||||
}
|
||||
}
|
||||
|
||||
// VideoHandler handles video scraping requests
|
||||
func VideoHandler(c *gin.Context) {
|
||||
var request struct {
|
||||
URL string `json:"url" binding:"required"`
|
||||
}
|
||||
|
||||
if err := c.ShouldBindJSON(&request); err != nil {
|
||||
fmt.Printf("Error binding request: %v\n", err)
|
||||
c.JSON(http.StatusBadRequest, gin.H{
|
||||
"success": false,
|
||||
"error": "Invalid request format",
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
fmt.Printf("Received request for URL: %s\n", request.URL)
|
||||
|
||||
info := ScrapeVideoInfo(request.URL)
|
||||
c.JSON(http.StatusOK, info)
|
||||
}
|
||||
|
||||
func main() {
|
||||
port := "7858"
|
||||
if p := os.Getenv("PORT"); p != "" {
|
||||
port = p
|
||||
}
|
||||
|
||||
r := gin.Default()
|
||||
|
||||
// Enable CORS for all origins
|
||||
r.Use(func(c *gin.Context) {
|
||||
c.Header("Access-Control-Allow-Origin", "*")
|
||||
c.Header("Access-Control-Allow-Methods", "GET, POST, PUT, DELETE, OPTIONS")
|
||||
c.Header("Access-Control-Allow-Headers", "Content-Type, Authorization")
|
||||
|
||||
if c.Request.Method == "OPTIONS" {
|
||||
c.AbortWithStatus(204)
|
||||
return
|
||||
}
|
||||
|
||||
c.Next()
|
||||
})
|
||||
|
||||
// Routes
|
||||
r.POST("/video", VideoHandler)
|
||||
r.GET("/health", func(c *gin.Context) {
|
||||
c.JSON(http.StatusOK, gin.H{"status": "healthy"})
|
||||
})
|
||||
r.GET("/test", func(c *gin.Context) {
|
||||
testURL := "https://www.youtube.com/watch?v=WKXh4Z6SYMs"
|
||||
videoID := ExtractVideoID(testURL)
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"url": testURL,
|
||||
"video_id": videoID,
|
||||
})
|
||||
})
|
||||
|
||||
log.Printf("YouTube video scraper starting on port %s", port)
|
||||
log.Fatal(r.Run(":" + port))
|
||||
}
|
||||
Reference in New Issue
Block a user