mirror of
https://github.com/Dvorinka/Devour.git
synced 2026-06-03 20:13:03 +00:00
first commit
This commit is contained in:
@@ -0,0 +1,119 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/yourorg/devour/internal/scraper"
|
||||
)
|
||||
|
||||
func main() {
|
||||
fmt.Println("=== Devour Real HTTP Scraping Test ===")
|
||||
fmt.Println()
|
||||
|
||||
config := &scraper.Config{
|
||||
UserAgent: "Devour/1.0 (Documentation Scraper)",
|
||||
Timeout: 30 * time.Second,
|
||||
RetryCount: 3,
|
||||
RetryDelay: 1 * time.Second,
|
||||
Concurrency: 10,
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
|
||||
defer cancel()
|
||||
|
||||
sources := []struct {
|
||||
name string
|
||||
st scraper.SourceType
|
||||
url string
|
||||
}{
|
||||
{"Go stdlib net/http", scraper.SourceTypeGoDocs, "https://pkg.go.dev/net/http"},
|
||||
{"Spring AI MCP", scraper.SourceTypeSpringDocs, "https://docs.spring.io/spring-ai/reference/api/mcp/mcp-overview.html"},
|
||||
{"React Hooks", scraper.SourceTypeReactDocs, "https://react.dev/reference/react"},
|
||||
{"Vue Composition API", scraper.SourceTypeVueDocs, "https://vuejs.org/api/"},
|
||||
{"Cloudflare Docs", scraper.SourceTypeCloudflareDocs, "https://developers.cloudflare.com/"},
|
||||
}
|
||||
|
||||
for _, src := range sources {
|
||||
fmt.Printf("=== Testing: %s ===\n", src.name)
|
||||
|
||||
s := scraper.NewScraper(src.st, config)
|
||||
if s == nil {
|
||||
fmt.Printf(" ✗ Scraper not available for type: %s\n\n", src.st)
|
||||
continue
|
||||
}
|
||||
|
||||
source := &scraper.Source{
|
||||
Name: src.name,
|
||||
Type: src.st,
|
||||
URL: src.url,
|
||||
}
|
||||
|
||||
fmt.Printf(" Fetching: %s\n", src.url)
|
||||
|
||||
docs, err := s.Scrape(ctx, source)
|
||||
if err != nil {
|
||||
fmt.Printf(" ✗ Error: %v\n\n", err)
|
||||
continue
|
||||
}
|
||||
|
||||
fmt.Printf(" ✓ Scraped %d documents\n", len(docs))
|
||||
|
||||
if len(docs) > 0 {
|
||||
first := docs[0]
|
||||
fmt.Printf(" First document:\n")
|
||||
fmt.Printf(" Title: %s\n", first.Title)
|
||||
fmt.Printf(" Type: %s\n", first.Type)
|
||||
if len(first.Content) > 100 {
|
||||
fmt.Printf(" Content preview: %s...\n", first.Content[:100])
|
||||
} else {
|
||||
fmt.Printf(" Content: %s\n", first.Content)
|
||||
}
|
||||
}
|
||||
|
||||
changed, hash, err := s.DetectChanges(ctx, source, "")
|
||||
if err != nil {
|
||||
fmt.Printf(" ✗ Change detection error: %v\n", err)
|
||||
} else {
|
||||
fmt.Printf(" ✓ Change detection: changed=%v, hash=%s\n", changed, hash[:16]+"...")
|
||||
}
|
||||
|
||||
fmt.Println()
|
||||
}
|
||||
|
||||
fmt.Println("=== All Source Types ===")
|
||||
fmt.Println()
|
||||
fmt.Println("Available scrapers:")
|
||||
allTypes := []scraper.SourceType{
|
||||
scraper.SourceTypeWeb,
|
||||
scraper.SourceTypeGitHub,
|
||||
scraper.SourceTypeOpenAPI,
|
||||
scraper.SourceTypeLocal,
|
||||
scraper.SourceTypeGoDocs,
|
||||
scraper.SourceTypeRustDocs,
|
||||
scraper.SourceTypePythonDocs,
|
||||
scraper.SourceTypeJavaDocs,
|
||||
scraper.SourceTypeSpringDocs,
|
||||
scraper.SourceTypeTSDocs,
|
||||
scraper.SourceTypeReactDocs,
|
||||
scraper.SourceTypeVueDocs,
|
||||
scraper.SourceTypeNuxtDocs,
|
||||
scraper.SourceTypeMCPDocs,
|
||||
scraper.SourceTypeDockerDocs,
|
||||
scraper.SourceTypeCloudflareDocs,
|
||||
scraper.SourceTypeAstroDocs,
|
||||
}
|
||||
|
||||
for _, st := range allTypes {
|
||||
s := scraper.NewScraper(st, config)
|
||||
if s != nil {
|
||||
fmt.Printf(" ✓ %s\n", st)
|
||||
} else {
|
||||
fmt.Printf(" ✗ %s (not implemented)\n", st)
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Println()
|
||||
fmt.Println("=== Test Complete ===")
|
||||
}
|
||||
Reference in New Issue
Block a user