package main import ( "context" "fmt" "time" "github.com/yourorg/devour/internal/scraper" ) func main() { fmt.Println("=== Devour Real HTTP Scraping Test ===") fmt.Println() config := &scraper.Config{ UserAgent: "Devour/1.0 (Documentation Scraper)", Timeout: 30 * time.Second, RetryCount: 3, RetryDelay: 1 * time.Second, Concurrency: 10, } ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) defer cancel() sources := []struct { name string st scraper.SourceType url string }{ {"Go stdlib net/http", scraper.SourceTypeGoDocs, "https://pkg.go.dev/net/http"}, {"Spring AI MCP", scraper.SourceTypeSpringDocs, "https://docs.spring.io/spring-ai/reference/api/mcp/mcp-overview.html"}, {"React Hooks", scraper.SourceTypeReactDocs, "https://react.dev/reference/react"}, {"Vue Composition API", scraper.SourceTypeVueDocs, "https://vuejs.org/api/"}, {"Cloudflare Docs", scraper.SourceTypeCloudflareDocs, "https://developers.cloudflare.com/"}, } for _, src := range sources { fmt.Printf("=== Testing: %s ===\n", src.name) s := scraper.NewScraper(src.st, config) if s == nil { fmt.Printf(" ✗ Scraper not available for type: %s\n\n", src.st) continue } source := &scraper.Source{ Name: src.name, Type: src.st, URL: src.url, } fmt.Printf(" Fetching: %s\n", src.url) docs, err := s.Scrape(ctx, source) if err != nil { fmt.Printf(" ✗ Error: %v\n\n", err) continue } fmt.Printf(" ✓ Scraped %d documents\n", len(docs)) if len(docs) > 0 { first := docs[0] fmt.Printf(" First document:\n") fmt.Printf(" Title: %s\n", first.Title) fmt.Printf(" Type: %s\n", first.Type) if len(first.Content) > 100 { fmt.Printf(" Content preview: %s...\n", first.Content[:100]) } else { fmt.Printf(" Content: %s\n", first.Content) } } changed, hash, err := s.DetectChanges(ctx, source, "") if err != nil { fmt.Printf(" ✗ Change detection error: %v\n", err) } else { fmt.Printf(" ✓ Change detection: changed=%v, hash=%s\n", changed, hash[:16]+"...") } fmt.Println() } fmt.Println("=== All Source Types ===") fmt.Println() fmt.Println("Available scrapers:") allTypes := []scraper.SourceType{ scraper.SourceTypeWeb, scraper.SourceTypeGitHub, scraper.SourceTypeOpenAPI, scraper.SourceTypeLocal, scraper.SourceTypeGoDocs, scraper.SourceTypeRustDocs, scraper.SourceTypePythonDocs, scraper.SourceTypeJavaDocs, scraper.SourceTypeSpringDocs, scraper.SourceTypeTSDocs, scraper.SourceTypeReactDocs, scraper.SourceTypeVueDocs, scraper.SourceTypeNuxtDocs, scraper.SourceTypeMCPDocs, scraper.SourceTypeDockerDocs, scraper.SourceTypeCloudflareDocs, scraper.SourceTypeAstroDocs, } for _, st := range allTypes { s := scraper.NewScraper(st, config) if s != nil { fmt.Printf(" ✓ %s\n", st) } else { fmt.Printf(" ✗ %s (not implemented)\n", st) } } fmt.Println() fmt.Println("=== Test Complete ===") }