mirror of
https://github.com/Dvorinka/Devour.git
synced 2026-06-03 20:13:03 +00:00
198 lines
5.7 KiB
Go
198 lines
5.7 KiB
Go
package main
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"time"
|
|
|
|
"github.com/yourorg/devour/internal/scraper"
|
|
_ "github.com/yourorg/devour/internal/scraper/external"
|
|
"github.com/yourorg/devour/pkg/godocs"
|
|
"github.com/yourorg/devour/pkg/javadocs"
|
|
"github.com/yourorg/devour/pkg/nuxtdocs"
|
|
"github.com/yourorg/devour/pkg/pythondocs"
|
|
"github.com/yourorg/devour/pkg/reactdocs"
|
|
"github.com/yourorg/devour/pkg/rustdocs"
|
|
"github.com/yourorg/devour/pkg/springdocs"
|
|
"github.com/yourorg/devour/pkg/tsdocs"
|
|
"github.com/yourorg/devour/pkg/vuedocs"
|
|
)
|
|
|
|
func main() {
|
|
fmt.Println("=== Devour Documentation Scrapers Demo ===")
|
|
fmt.Println()
|
|
|
|
config := &scraper.Config{
|
|
UserAgent: "Devour/1.0",
|
|
Timeout: 30 * time.Second,
|
|
RetryCount: 3,
|
|
RetryDelay: 1 * time.Second,
|
|
Concurrency: 10,
|
|
}
|
|
|
|
fmt.Println("Available Source Types:")
|
|
fmt.Println(" - godocs (Go packages from pkg.go.dev)")
|
|
fmt.Println(" - rustdocs (Rust crates from docs.rs)")
|
|
fmt.Println(" - pythondocs (Python modules from docs.python.org)")
|
|
fmt.Println(" - javadocs (Java packages from docs.oracle.com)")
|
|
fmt.Println(" - springdocs (Spring Boot from docs.spring.io)")
|
|
fmt.Println(" - tsdocs (TypeScript from typescriptlang.org)")
|
|
fmt.Println(" - reactdocs (React from react.dev)")
|
|
fmt.Println(" - vuedocs (Vue.js from vuejs.org)")
|
|
fmt.Println(" - nuxtdocs (Nuxt from nuxt.com)")
|
|
fmt.Println()
|
|
|
|
fmt.Println("=== Parsing Example Documentation ===")
|
|
fmt.Println()
|
|
|
|
fmt.Println("1. Go Documentation Parser:")
|
|
goParser := godocs.NewParser()
|
|
fmt.Printf(" Parser initialized: %T\n", goParser)
|
|
fmt.Println(" Can parse: pkg.go.dev HTML pages")
|
|
fmt.Println()
|
|
|
|
fmt.Println("2. Rust Documentation Parser:")
|
|
rustParser := rustdocs.NewParser()
|
|
fmt.Printf(" Parser initialized: %T\n", rustParser)
|
|
fmt.Println(" Can parse: docs.rs HTML pages")
|
|
fmt.Println()
|
|
|
|
fmt.Println("3. Python Documentation Parser:")
|
|
pyParser := pythondocs.NewParser()
|
|
fmt.Printf(" Parser initialized: %T\n", pyParser)
|
|
fmt.Println(" Can parse: docs.python.org HTML pages")
|
|
fmt.Println()
|
|
|
|
fmt.Println("4. Java Documentation Parser:")
|
|
javaParser := javadocs.NewParser()
|
|
fmt.Printf(" Parser initialized: %T\n", javaParser)
|
|
fmt.Println(" Can parse: docs.oracle.com HTML pages")
|
|
fmt.Println()
|
|
|
|
fmt.Println("5. Spring Boot Documentation Parser:")
|
|
springParser := springdocs.NewParser()
|
|
fmt.Printf(" Parser initialized: %T\n", springParser)
|
|
fmt.Println(" Can parse: docs.spring.io HTML pages")
|
|
fmt.Println()
|
|
|
|
fmt.Println("6. TypeScript Documentation Parser:")
|
|
tsParser := tsdocs.NewParser()
|
|
fmt.Printf(" Parser initialized: %T\n", tsParser)
|
|
fmt.Println(" Can parse: typescriptlang.org HTML pages")
|
|
fmt.Println()
|
|
|
|
fmt.Println("7. React Documentation Parser:")
|
|
reactParser := reactdocs.NewParser()
|
|
fmt.Printf(" Parser initialized: %T\n", reactParser)
|
|
fmt.Println(" Can parse: react.dev HTML pages")
|
|
fmt.Println()
|
|
|
|
fmt.Println("8. Vue.js Documentation Parser:")
|
|
vueParser := vuedocs.NewParser()
|
|
fmt.Printf(" Parser initialized: %T\n", vueParser)
|
|
fmt.Println(" Can parse: vuejs.org HTML pages")
|
|
fmt.Println()
|
|
|
|
fmt.Println("9. Nuxt Documentation Parser:")
|
|
nuxtParser := nuxtdocs.NewParser()
|
|
fmt.Printf(" Parser initialized: %T\n", nuxtParser)
|
|
fmt.Println(" Can parse: nuxt.com HTML pages")
|
|
fmt.Println()
|
|
|
|
fmt.Println("=== Scraper Factory Demo ===")
|
|
fmt.Println()
|
|
|
|
sourceTypes := []scraper.SourceType{
|
|
scraper.SourceTypeGoDocs,
|
|
scraper.SourceTypeRustDocs,
|
|
scraper.SourceTypePythonDocs,
|
|
scraper.SourceTypeJavaDocs,
|
|
scraper.SourceTypeSpringDocs,
|
|
scraper.SourceTypeTSDocs,
|
|
scraper.SourceTypeReactDocs,
|
|
scraper.SourceTypeVueDocs,
|
|
scraper.SourceTypeNuxtDocs,
|
|
}
|
|
|
|
for _, st := range sourceTypes {
|
|
s := scraper.NewScraper(st, config)
|
|
if s != nil {
|
|
fmt.Printf("✓ Created scraper for: %s\n", st)
|
|
} else {
|
|
fmt.Printf("✗ Failed to create scraper for: %s\n", st)
|
|
}
|
|
}
|
|
fmt.Println()
|
|
|
|
fmt.Println("=== Real-World Scenario: Building a Go App ===")
|
|
fmt.Println()
|
|
fmt.Println("Scenario: You're building a Go HTTP server and want reference docs.")
|
|
fmt.Println()
|
|
|
|
goScraper := scraper.NewScraper(scraper.SourceTypeGoDocs, config)
|
|
if goScraper != nil {
|
|
fmt.Println("1. Created Go docs scraper")
|
|
|
|
source := &scraper.Source{
|
|
Name: "net/http",
|
|
Type: scraper.SourceTypeGoDocs,
|
|
URL: "https://pkg.go.dev/net/http",
|
|
}
|
|
fmt.Printf("2. Source configured: %s\n", source.Name)
|
|
fmt.Printf(" URL: %s\n", source.URL)
|
|
|
|
ctx := context.Background()
|
|
fmt.Println("3. Ready to scrape (would make network request)")
|
|
fmt.Println(" In production, this would:")
|
|
fmt.Println(" - Fetch the HTML from pkg.go.dev/net/http")
|
|
fmt.Println(" - Parse all functions, types, and methods")
|
|
fmt.Println(" - Convert to structured Document objects")
|
|
fmt.Println(" - Index for semantic search")
|
|
|
|
_ = ctx
|
|
}
|
|
fmt.Println()
|
|
|
|
fmt.Println("=== Example Configuration (devour.yaml) ===")
|
|
fmt.Println()
|
|
fmt.Println(`sources:
|
|
- name: go-stdlib
|
|
type: godocs
|
|
url: https://pkg.go.dev/net/http
|
|
|
|
- name: rust-stdlib
|
|
type: rustdocs
|
|
url: https://doc.rust-lang.org/std/
|
|
|
|
- name: python-stdlib
|
|
type: pythondocs
|
|
url: https://docs.python.org/3/library/
|
|
|
|
- name: java-se
|
|
type: javadocs
|
|
url: https://docs.oracle.com/en/java/javase/17/docs/api/
|
|
|
|
- name: spring-boot
|
|
type: springdocs
|
|
url: https://docs.spring.io/spring-boot/docs/current/reference/html/
|
|
|
|
- name: typescript
|
|
type: tsdocs
|
|
url: https://www.typescriptlang.org/docs/
|
|
|
|
- name: react
|
|
type: reactdocs
|
|
url: https://react.dev/reference/react
|
|
|
|
- name: vue
|
|
type: vuedocs
|
|
url: https://vuejs.org/api/
|
|
|
|
- name: nuxt
|
|
type: nuxtdocs
|
|
url: https://nuxt.com/docs/api/`)
|
|
|
|
fmt.Println()
|
|
fmt.Println("=== Demo Complete ===")
|
|
}
|