Files
Devour/cmd/realtest/main.go
T
Tomas Dvorak 898a3c303f update
2026-02-24 10:33:59 +01:00

122 lines
3.0 KiB
Go

package main
import (
"context"
"fmt"
"time"
"github.com/yourorg/devour/internal/scraper"
_ "github.com/yourorg/devour/internal/scraper/external"
)
func main() {
fmt.Println("=== Devour Real HTTP Scraping Test ===")
fmt.Println()
config := &scraper.Config{
UserAgent: "Devour/1.0 (Documentation Scraper)",
Timeout: 30 * time.Second,
RetryCount: 3,
RetryDelay: 1 * time.Second,
Concurrency: 10,
}
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
defer cancel()
sources := []struct {
name string
st scraper.SourceType
url string
}{
{"Go stdlib net/http", scraper.SourceTypeGoDocs, "https://pkg.go.dev/net/http"},
{"Spring AI MCP", scraper.SourceTypeSpringDocs, "https://docs.spring.io/spring-ai/reference/api/mcp/mcp-overview.html"},
{"React Hooks", scraper.SourceTypeReactDocs, "https://react.dev/reference/react"},
{"Vue Composition API", scraper.SourceTypeVueDocs, "https://vuejs.org/api/"},
{"Cloudflare Docs", scraper.SourceTypeCloudflareDocs, "https://developers.cloudflare.com/"},
}
for _, src := range sources {
fmt.Printf("=== Testing: %s ===\n", src.name)
s := scraper.NewScraper(src.st, config)
if s == nil {
fmt.Printf(" ✗ Scraper not available for type: %s\n\n", src.st)
continue
}
source := &scraper.Source{
Name: src.name,
Type: src.st,
URL: src.url,
}
fmt.Printf(" Fetching: %s\n", src.url)
docs, err := s.Scrape(ctx, source)
if err != nil {
fmt.Printf(" ✗ Error: %v\n\n", err)
continue
}
fmt.Printf(" ✓ Scraped %d documents\n", len(docs))
if len(docs) > 0 {
first := docs[0]
fmt.Printf(" First document:\n")
fmt.Printf(" Title: %s\n", first.Title)
fmt.Printf(" Type: %s\n", first.Type)
if len(first.Content) > 100 {
fmt.Printf(" Content preview: %s...\n", first.Content[:100])
} else {
fmt.Printf(" Content: %s\n", first.Content)
}
}
changed, hash, err := s.DetectChanges(ctx, source, "")
if err != nil {
fmt.Printf(" ✗ Change detection error: %v\n", err)
} else {
fmt.Printf(" ✓ Change detection: changed=%v, hash=%s\n", changed, hash[:16]+"...")
}
fmt.Println()
}
fmt.Println("=== All Source Types ===")
fmt.Println()
fmt.Println("Available scrapers:")
allTypes := []scraper.SourceType{
scraper.SourceTypeWeb,
scraper.SourceTypeGitHub,
scraper.SourceTypeOpenAPI,
scraper.SourceTypeLocal,
scraper.SourceTypeLocalSearch,
scraper.SourceTypeGoDocs,
scraper.SourceTypeRustDocs,
scraper.SourceTypePythonDocs,
scraper.SourceTypeJavaDocs,
scraper.SourceTypeSpringDocs,
scraper.SourceTypeTSDocs,
scraper.SourceTypeReactDocs,
scraper.SourceTypeVueDocs,
scraper.SourceTypeNuxtDocs,
scraper.SourceTypeMCPDocs,
scraper.SourceTypeDockerDocs,
scraper.SourceTypeCloudflareDocs,
scraper.SourceTypeAstroDocs,
}
for _, st := range allTypes {
s := scraper.NewScraper(st, config)
if s != nil {
fmt.Printf(" ✓ %s\n", st)
} else {
fmt.Printf(" %s (not implemented)\n", st)
}
}
fmt.Println()
fmt.Println("=== Test Complete ===")
}