Files
Devour/examples/demo_scrapers.go
Tomas Dvorak 55885a0e8f first commit
2026-02-22 10:42:17 +01:00

197 lines
5.6 KiB
Go

package main
import (
"context"
"fmt"
"time"
"github.com/yourorg/devour/internal/scraper"
"github.com/yourorg/devour/pkg/godocs"
"github.com/yourorg/devour/pkg/javadocs"
"github.com/yourorg/devour/pkg/nuxtdocs"
"github.com/yourorg/devour/pkg/pythondocs"
"github.com/yourorg/devour/pkg/reactdocs"
"github.com/yourorg/devour/pkg/rustdocs"
"github.com/yourorg/devour/pkg/springdocs"
"github.com/yourorg/devour/pkg/tsdocs"
"github.com/yourorg/devour/pkg/vuedocs"
)
func main() {
fmt.Println("=== Devour Documentation Scrapers Demo ===")
fmt.Println()
config := &scraper.Config{
UserAgent: "Devour/1.0",
Timeout: 30 * time.Second,
RetryCount: 3,
RetryDelay: 1 * time.Second,
Concurrency: 10,
}
fmt.Println("Available Source Types:")
fmt.Println(" - godocs (Go packages from pkg.go.dev)")
fmt.Println(" - rustdocs (Rust crates from docs.rs)")
fmt.Println(" - pythondocs (Python modules from docs.python.org)")
fmt.Println(" - javadocs (Java packages from docs.oracle.com)")
fmt.Println(" - springdocs (Spring Boot from docs.spring.io)")
fmt.Println(" - tsdocs (TypeScript from typescriptlang.org)")
fmt.Println(" - reactdocs (React from react.dev)")
fmt.Println(" - vuedocs (Vue.js from vuejs.org)")
fmt.Println(" - nuxtdocs (Nuxt from nuxt.com)")
fmt.Println()
fmt.Println("=== Parsing Example Documentation ===")
fmt.Println()
fmt.Println("1. Go Documentation Parser:")
goParser := godocs.NewParser()
fmt.Printf(" Parser initialized: %T\n", goParser)
fmt.Println(" Can parse: pkg.go.dev HTML pages")
fmt.Println()
fmt.Println("2. Rust Documentation Parser:")
rustParser := rustdocs.NewParser()
fmt.Printf(" Parser initialized: %T\n", rustParser)
fmt.Println(" Can parse: docs.rs HTML pages")
fmt.Println()
fmt.Println("3. Python Documentation Parser:")
pyParser := pythondocs.NewParser()
fmt.Printf(" Parser initialized: %T\n", pyParser)
fmt.Println(" Can parse: docs.python.org HTML pages")
fmt.Println()
fmt.Println("4. Java Documentation Parser:")
javaParser := javadocs.NewParser()
fmt.Printf(" Parser initialized: %T\n", javaParser)
fmt.Println(" Can parse: docs.oracle.com HTML pages")
fmt.Println()
fmt.Println("5. Spring Boot Documentation Parser:")
springParser := springdocs.NewParser()
fmt.Printf(" Parser initialized: %T\n", springParser)
fmt.Println(" Can parse: docs.spring.io HTML pages")
fmt.Println()
fmt.Println("6. TypeScript Documentation Parser:")
tsParser := tsdocs.NewParser()
fmt.Printf(" Parser initialized: %T\n", tsParser)
fmt.Println(" Can parse: typescriptlang.org HTML pages")
fmt.Println()
fmt.Println("7. React Documentation Parser:")
reactParser := reactdocs.NewParser()
fmt.Printf(" Parser initialized: %T\n", reactParser)
fmt.Println(" Can parse: react.dev HTML pages")
fmt.Println()
fmt.Println("8. Vue.js Documentation Parser:")
vueParser := vuedocs.NewParser()
fmt.Printf(" Parser initialized: %T\n", vueParser)
fmt.Println(" Can parse: vuejs.org HTML pages")
fmt.Println()
fmt.Println("9. Nuxt Documentation Parser:")
nuxtParser := nuxtdocs.NewParser()
fmt.Printf(" Parser initialized: %T\n", nuxtParser)
fmt.Println(" Can parse: nuxt.com HTML pages")
fmt.Println()
fmt.Println("=== Scraper Factory Demo ===")
fmt.Println()
sourceTypes := []scraper.SourceType{
scraper.SourceTypeGoDocs,
scraper.SourceTypeRustDocs,
scraper.SourceTypePythonDocs,
scraper.SourceTypeJavaDocs,
scraper.SourceTypeSpringDocs,
scraper.SourceTypeTSDocs,
scraper.SourceTypeReactDocs,
scraper.SourceTypeVueDocs,
scraper.SourceTypeNuxtDocs,
}
for _, st := range sourceTypes {
s := scraper.NewScraper(st, config)
if s != nil {
fmt.Printf("✓ Created scraper for: %s\n", st)
} else {
fmt.Printf("✗ Failed to create scraper for: %s\n", st)
}
}
fmt.Println()
fmt.Println("=== Real-World Scenario: Building a Go App ===")
fmt.Println()
fmt.Println("Scenario: You're building a Go HTTP server and want reference docs.")
fmt.Println()
goScraper := scraper.NewScraper(scraper.SourceTypeGoDocs, config)
if goScraper != nil {
fmt.Println("1. Created Go docs scraper")
source := &scraper.Source{
Name: "net/http",
Type: scraper.SourceTypeGoDocs,
URL: "https://pkg.go.dev/net/http",
}
fmt.Printf("2. Source configured: %s\n", source.Name)
fmt.Printf(" URL: %s\n", source.URL)
ctx := context.Background()
fmt.Println("3. Ready to scrape (would make network request)")
fmt.Println(" In production, this would:")
fmt.Println(" - Fetch the HTML from pkg.go.dev/net/http")
fmt.Println(" - Parse all functions, types, and methods")
fmt.Println(" - Convert to structured Document objects")
fmt.Println(" - Index for semantic search")
_ = ctx
}
fmt.Println()
fmt.Println("=== Example Configuration (devour.yaml) ===")
fmt.Println()
fmt.Println(`sources:
- name: go-stdlib
type: godocs
url: https://pkg.go.dev/net/http
- name: rust-stdlib
type: rustdocs
url: https://doc.rust-lang.org/std/
- name: python-stdlib
type: pythondocs
url: https://docs.python.org/3/library/
- name: java-se
type: javadocs
url: https://docs.oracle.com/en/java/javase/17/docs/api/
- name: spring-boot
type: springdocs
url: https://docs.spring.io/spring-boot/docs/current/reference/html/
- name: typescript
type: tsdocs
url: https://www.typescriptlang.org/docs/
- name: react
type: reactdocs
url: https://react.dev/reference/react
- name: vue
type: vuedocs
url: https://vuejs.org/api/
- name: nuxt
type: nuxtdocs
url: https://nuxt.com/docs/api/`)
fmt.Println()
fmt.Println("=== Demo Complete ===")
}