mirror of
https://github.com/Dvorinka/Devour.git
synced 2026-06-03 20:13:03 +00:00
first commit
This commit is contained in:
@@ -0,0 +1,196 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/yourorg/devour/internal/scraper"
|
||||
"github.com/yourorg/devour/pkg/godocs"
|
||||
"github.com/yourorg/devour/pkg/javadocs"
|
||||
"github.com/yourorg/devour/pkg/nuxtdocs"
|
||||
"github.com/yourorg/devour/pkg/pythondocs"
|
||||
"github.com/yourorg/devour/pkg/reactdocs"
|
||||
"github.com/yourorg/devour/pkg/rustdocs"
|
||||
"github.com/yourorg/devour/pkg/springdocs"
|
||||
"github.com/yourorg/devour/pkg/tsdocs"
|
||||
"github.com/yourorg/devour/pkg/vuedocs"
|
||||
)
|
||||
|
||||
func main() {
|
||||
fmt.Println("=== Devour Documentation Scrapers Demo ===")
|
||||
fmt.Println()
|
||||
|
||||
config := &scraper.Config{
|
||||
UserAgent: "Devour/1.0",
|
||||
Timeout: 30 * time.Second,
|
||||
RetryCount: 3,
|
||||
RetryDelay: 1 * time.Second,
|
||||
Concurrency: 10,
|
||||
}
|
||||
|
||||
fmt.Println("Available Source Types:")
|
||||
fmt.Println(" - godocs (Go packages from pkg.go.dev)")
|
||||
fmt.Println(" - rustdocs (Rust crates from docs.rs)")
|
||||
fmt.Println(" - pythondocs (Python modules from docs.python.org)")
|
||||
fmt.Println(" - javadocs (Java packages from docs.oracle.com)")
|
||||
fmt.Println(" - springdocs (Spring Boot from docs.spring.io)")
|
||||
fmt.Println(" - tsdocs (TypeScript from typescriptlang.org)")
|
||||
fmt.Println(" - reactdocs (React from react.dev)")
|
||||
fmt.Println(" - vuedocs (Vue.js from vuejs.org)")
|
||||
fmt.Println(" - nuxtdocs (Nuxt from nuxt.com)")
|
||||
fmt.Println()
|
||||
|
||||
fmt.Println("=== Parsing Example Documentation ===")
|
||||
fmt.Println()
|
||||
|
||||
fmt.Println("1. Go Documentation Parser:")
|
||||
goParser := godocs.NewParser()
|
||||
fmt.Printf(" Parser initialized: %T\n", goParser)
|
||||
fmt.Println(" Can parse: pkg.go.dev HTML pages")
|
||||
fmt.Println()
|
||||
|
||||
fmt.Println("2. Rust Documentation Parser:")
|
||||
rustParser := rustdocs.NewParser()
|
||||
fmt.Printf(" Parser initialized: %T\n", rustParser)
|
||||
fmt.Println(" Can parse: docs.rs HTML pages")
|
||||
fmt.Println()
|
||||
|
||||
fmt.Println("3. Python Documentation Parser:")
|
||||
pyParser := pythondocs.NewParser()
|
||||
fmt.Printf(" Parser initialized: %T\n", pyParser)
|
||||
fmt.Println(" Can parse: docs.python.org HTML pages")
|
||||
fmt.Println()
|
||||
|
||||
fmt.Println("4. Java Documentation Parser:")
|
||||
javaParser := javadocs.NewParser()
|
||||
fmt.Printf(" Parser initialized: %T\n", javaParser)
|
||||
fmt.Println(" Can parse: docs.oracle.com HTML pages")
|
||||
fmt.Println()
|
||||
|
||||
fmt.Println("5. Spring Boot Documentation Parser:")
|
||||
springParser := springdocs.NewParser()
|
||||
fmt.Printf(" Parser initialized: %T\n", springParser)
|
||||
fmt.Println(" Can parse: docs.spring.io HTML pages")
|
||||
fmt.Println()
|
||||
|
||||
fmt.Println("6. TypeScript Documentation Parser:")
|
||||
tsParser := tsdocs.NewParser()
|
||||
fmt.Printf(" Parser initialized: %T\n", tsParser)
|
||||
fmt.Println(" Can parse: typescriptlang.org HTML pages")
|
||||
fmt.Println()
|
||||
|
||||
fmt.Println("7. React Documentation Parser:")
|
||||
reactParser := reactdocs.NewParser()
|
||||
fmt.Printf(" Parser initialized: %T\n", reactParser)
|
||||
fmt.Println(" Can parse: react.dev HTML pages")
|
||||
fmt.Println()
|
||||
|
||||
fmt.Println("8. Vue.js Documentation Parser:")
|
||||
vueParser := vuedocs.NewParser()
|
||||
fmt.Printf(" Parser initialized: %T\n", vueParser)
|
||||
fmt.Println(" Can parse: vuejs.org HTML pages")
|
||||
fmt.Println()
|
||||
|
||||
fmt.Println("9. Nuxt Documentation Parser:")
|
||||
nuxtParser := nuxtdocs.NewParser()
|
||||
fmt.Printf(" Parser initialized: %T\n", nuxtParser)
|
||||
fmt.Println(" Can parse: nuxt.com HTML pages")
|
||||
fmt.Println()
|
||||
|
||||
fmt.Println("=== Scraper Factory Demo ===")
|
||||
fmt.Println()
|
||||
|
||||
sourceTypes := []scraper.SourceType{
|
||||
scraper.SourceTypeGoDocs,
|
||||
scraper.SourceTypeRustDocs,
|
||||
scraper.SourceTypePythonDocs,
|
||||
scraper.SourceTypeJavaDocs,
|
||||
scraper.SourceTypeSpringDocs,
|
||||
scraper.SourceTypeTSDocs,
|
||||
scraper.SourceTypeReactDocs,
|
||||
scraper.SourceTypeVueDocs,
|
||||
scraper.SourceTypeNuxtDocs,
|
||||
}
|
||||
|
||||
for _, st := range sourceTypes {
|
||||
s := scraper.NewScraper(st, config)
|
||||
if s != nil {
|
||||
fmt.Printf("✓ Created scraper for: %s\n", st)
|
||||
} else {
|
||||
fmt.Printf("✗ Failed to create scraper for: %s\n", st)
|
||||
}
|
||||
}
|
||||
fmt.Println()
|
||||
|
||||
fmt.Println("=== Real-World Scenario: Building a Go App ===")
|
||||
fmt.Println()
|
||||
fmt.Println("Scenario: You're building a Go HTTP server and want reference docs.")
|
||||
fmt.Println()
|
||||
|
||||
goScraper := scraper.NewScraper(scraper.SourceTypeGoDocs, config)
|
||||
if goScraper != nil {
|
||||
fmt.Println("1. Created Go docs scraper")
|
||||
|
||||
source := &scraper.Source{
|
||||
Name: "net/http",
|
||||
Type: scraper.SourceTypeGoDocs,
|
||||
URL: "https://pkg.go.dev/net/http",
|
||||
}
|
||||
fmt.Printf("2. Source configured: %s\n", source.Name)
|
||||
fmt.Printf(" URL: %s\n", source.URL)
|
||||
|
||||
ctx := context.Background()
|
||||
fmt.Println("3. Ready to scrape (would make network request)")
|
||||
fmt.Println(" In production, this would:")
|
||||
fmt.Println(" - Fetch the HTML from pkg.go.dev/net/http")
|
||||
fmt.Println(" - Parse all functions, types, and methods")
|
||||
fmt.Println(" - Convert to structured Document objects")
|
||||
fmt.Println(" - Index for semantic search")
|
||||
|
||||
_ = ctx
|
||||
}
|
||||
fmt.Println()
|
||||
|
||||
fmt.Println("=== Example Configuration (devour.yaml) ===")
|
||||
fmt.Println()
|
||||
fmt.Println(`sources:
|
||||
- name: go-stdlib
|
||||
type: godocs
|
||||
url: https://pkg.go.dev/net/http
|
||||
|
||||
- name: rust-stdlib
|
||||
type: rustdocs
|
||||
url: https://doc.rust-lang.org/std/
|
||||
|
||||
- name: python-stdlib
|
||||
type: pythondocs
|
||||
url: https://docs.python.org/3/library/
|
||||
|
||||
- name: java-se
|
||||
type: javadocs
|
||||
url: https://docs.oracle.com/en/java/javase/17/docs/api/
|
||||
|
||||
- name: spring-boot
|
||||
type: springdocs
|
||||
url: https://docs.spring.io/spring-boot/docs/current/reference/html/
|
||||
|
||||
- name: typescript
|
||||
type: tsdocs
|
||||
url: https://www.typescriptlang.org/docs/
|
||||
|
||||
- name: react
|
||||
type: reactdocs
|
||||
url: https://react.dev/reference/react
|
||||
|
||||
- name: vue
|
||||
type: vuedocs
|
||||
url: https://vuejs.org/api/
|
||||
|
||||
- name: nuxt
|
||||
type: nuxtdocs
|
||||
url: https://nuxt.com/docs/api/`)
|
||||
|
||||
fmt.Println()
|
||||
fmt.Println("=== Demo Complete ===")
|
||||
}
|
||||
Reference in New Issue
Block a user